xref: /aosp_15_r20/external/intel-media-driver/media_driver/agnostic/common/cm/cm_hal.cpp (revision ba62d9d3abf0e404f2022b4cd7a85e107f48596f)
1 /*
2 * Copyright (c) 2017-2021, Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22 //!
23 //! \file      cm_hal.cpp
24 //! \brief     HAL Layer for CM Component
25 //!
26 #include "mos_os.h"
27 #include "cm_hal.h"
28 #include "media_interfaces_cmhal.h"
29 #include "media_interfaces_mhw.h"
30 #include "cm_common.h"
31 #include "cm_hal_vebox.h"
32 #include "cm_mem.h"
33 #include "renderhal_platform_interface.h"
34 #include "cm_execution_adv.h"
35 #include "cm_extension_creator.h"
36 #include "mos_interface.h"
37 
38 #define INDEX_ALIGN(index, elemperIndex, base) ((index * elemperIndex)/base + ( (index *elemperIndex % base))? 1:0)
39 
40 //----------------------------------
41 //| CM scoreboard XY
42 //----------------------------------
43 struct CM_HAL_SCOREBOARD_XY
44 {
45     int32_t x;
46     int32_t y;
47 };
48 typedef CM_HAL_SCOREBOARD_XY *PCM_HAL_SCOREBOARD_XY;
49 
50 //---------------------------------------
51 //| CM scoreboard XY with mask
52 //---------------------------------------
53 struct CM_HAL_SCOREBOARD_XY_MASK
54 {
55     int32_t x;
56     int32_t y;
57     uint8_t mask;
58     uint8_t resetMask;
59 };
60 typedef CM_HAL_SCOREBOARD_XY_MASK *PCM_HAL_SCOREBOARD_XY_MASK;
61 
62 //------------------------------------------------------------------------------
63 //| CM kernel slice and subslice being assigned to (for EnqueueWithHints)
64 //------------------------------------------------------------------------------
65 struct CM_HAL_KERNEL_SLICE_SUBSLICE
66 {
67     uint32_t slice;
68     uint32_t subSlice;
69 };
70 typedef CM_HAL_KERNEL_SLICE_SUBSLICE *PCM_HAL_KERNEL_SLICE_SUBSLICE;
71 
72 //------------------------------------------------------------------------------
73 //| CM kernel information for EnqueueWithHints to assign subslice
74 //------------------------------------------------------------------------------
75 struct CM_HAL_KERNEL_SUBSLICE_INFO
76 {
77     uint32_t numSubSlices;
78     uint32_t counter;
79     PCM_HAL_KERNEL_SLICE_SUBSLICE  destination;
80 };
81 typedef CM_HAL_KERNEL_SUBSLICE_INFO *PCM_HAL_KERNEL_SUBSLICE_INFO;
82 
83 // forward declaration
84 int32_t HalCm_InsertCloneKernel(
85     PCM_HAL_STATE              state,
86     PCM_HAL_KERNEL_PARAM       kernelParam,
87     PRENDERHAL_KRN_ALLOCATION  &kernelAllocation);
88 
89 #if MDF_COMMAND_BUFFER_DUMP
90 extern int32_t HalCm_InitDumpCommandBuffer(PCM_HAL_STATE state);
91 
92 extern int32_t HalCm_DumpCommadBuffer(PCM_HAL_STATE state, PMOS_COMMAND_BUFFER cmdBuffer,
93                int offsetSurfaceState, size_t sizeOfSurfaceState);
94 #endif
95 
96 #if MDF_CURBE_DATA_DUMP
97 extern int32_t HalCm_InitDumpCurbeData(PCM_HAL_STATE state);
98 
99 extern int32_t HalCm_DumpCurbeData(PCM_HAL_STATE state);
100 #endif
101 
102 #if MDF_SURFACE_CONTENT_DUMP
103 extern int32_t HalCm_InitSurfaceDump(PCM_HAL_STATE state);
104 
105 #endif
106 
107 #if MDF_SURFACE_STATE_DUMP
108 extern int32_t HalCm_InitDumpSurfaceState(PCM_HAL_STATE state);
109 extern int32_t HalCm_DumpSurfaceState(PCM_HAL_STATE state, int offsetSurfaceState, size_t sizeOfSurfaceState);
110 #endif
111 
112 #if MDF_INTERFACE_DESCRIPTOR_DATA_DUMP
113 extern int32_t HalCm_InitDumpInterfaceDescriporData(PCM_HAL_STATE state);
114 extern int32_t HalCm_DumpInterfaceDescriptorData(PCM_HAL_STATE state);
115 #endif
116 
117 extern uint64_t HalCm_GetTsFrequency(PMOS_INTERFACE pOsInterface);
118 
119 //===============<Private Functions>============================================
120 //*-----------------------------------------------------------------------------
121 //| Purpose:    Align to the next power of 2
122 //| Returns:    Aligned data
123 //| Reference:  http://graphics.stanford.edu/~seander/bithacks.html#DetermineIfPowerOf2
124 //*-----------------------------------------------------------------------------
HalCm_GetPow2Aligned(uint32_t d)125 __inline uint32_t HalCm_GetPow2Aligned(uint32_t d)
126 {
127     CM_ASSERT(d > 0);
128 
129     // subtract the number first
130     --d;
131 
132     d |= d >> 1;
133     d |= d >> 2;
134     d |= d >> 4;
135     d |= d >> 8;
136     d |= d >> 16;
137 
138     return ++d;
139 }
140 
141 //*-----------------------------------------------------------------------------
142 //| Purpose:    Checks if Task has any thread arguments
143 //| Returns:    True if task has any thread arguments, false otherwise
144 //*-----------------------------------------------------------------------------
HalCm_GetTaskHasThreadArg(PCM_HAL_KERNEL_PARAM * kernels,uint32_t numKernels)145 bool HalCm_GetTaskHasThreadArg(PCM_HAL_KERNEL_PARAM *kernels, uint32_t numKernels)
146 {
147     PCM_HAL_KERNEL_PARAM            kernelParam;
148     PCM_HAL_KERNEL_ARG_PARAM        argParam;
149     bool                            threadArgExists = false;
150 
151     for( uint32_t krn = 0; krn < numKernels; krn++)
152     {
153         kernelParam    = kernels[krn];
154         for(uint32_t argIndex = 0; argIndex < kernelParam->numArgs; argIndex++)
155         {
156             argParam = &kernelParam->argParams[argIndex];
157             if( argParam->perThread )
158             {
159                 threadArgExists = true;
160                 break;
161             }
162         }
163 
164         if( threadArgExists )
165             break;
166     }
167 
168     return threadArgExists;
169 }
170 
171 //*-----------------------------------------------------------------------------
172 //| Purpose:    Allocate Timestamp Resource
173 //| Returns:    Result of the operation
174 //*-----------------------------------------------------------------------------
HalCm_AllocateTsResource(PCM_HAL_STATE state)175 MOS_STATUS HalCm_AllocateTsResource(
176     PCM_HAL_STATE state)                                                       // [in] Pointer to CM HAL State
177 {
178     MOS_STATUS              eStatus = MOS_STATUS_SUCCESS;
179     uint32_t                size;
180     PMOS_INTERFACE          osInterface;
181     MOS_ALLOC_GFXRES_PARAMS allocParams;
182     MOS_LOCK_PARAMS         lockFlags;
183 
184     osInterface    = state->osInterface;
185     CM_CHK_NULL_GOTOFINISH_MOSERROR(osInterface);
186 
187     size = state->cmHalInterface->GetTimeStampResourceSize() * state->cmDeviceParam.maxTasks;
188     // allocate render engine Ts Resource
189     MOS_ZeroMemory(&allocParams, sizeof(MOS_ALLOC_GFXRES_PARAMS));
190     allocParams.Type    = MOS_GFXRES_BUFFER;
191     allocParams.dwBytes = size;
192     allocParams.Format  = Format_Buffer;  //used in RenderHal_OsAllocateResource_Linux
193     allocParams.TileType= MOS_TILE_LINEAR;
194     allocParams.pBufName = "TsResource";
195 
196     CM_CHK_HRESULT_GOTOFINISH_MOSERROR(
197         osInterface->pfnAllocateResource(osInterface,
198                                          &allocParams,
199                                          &state->renderTimeStampResource.osResource));
200 
201     // RegisterResource will be called in AddResourceToHWCmd. It is not allowed to be called by hal explicitly
202     if (!osInterface->apoMosEnabled)
203     {
204         CM_CHK_MOSSTATUS_GOTOFINISH(
205             osInterface->pfnRegisterResource(osInterface,
206                 &state->renderTimeStampResource.osResource,
207                 true,
208                 true));
209     }
210 
211     osInterface->pfnSkipResourceSync(&state->renderTimeStampResource.osResource);
212 
213     // Lock the Resource
214     MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
215 
216     lockFlags.ReadOnly = 1;
217     lockFlags.ForceCached = true;
218 
219     state->renderTimeStampResource.data = (uint8_t*)osInterface->pfnLockResource(
220                                         osInterface,
221                                         &state->renderTimeStampResource.osResource,
222                                         &lockFlags);
223 
224     CM_CHK_NULL_GOTOFINISH_MOSERROR(state->renderTimeStampResource.data);
225 
226     state->renderTimeStampResource.locked  = true;
227 
228     //allocated for vebox TS resource
229 
230     MOS_ZeroMemory(&allocParams, sizeof(MOS_ALLOC_GFXRES_PARAMS));
231     allocParams.Type = MOS_GFXRES_BUFFER;
232     allocParams.dwBytes = size;
233     allocParams.Format = Format_Buffer;  //used in RenderHal_OsAllocateResource_Linux
234     allocParams.TileType = MOS_TILE_LINEAR;
235     allocParams.pBufName = "TsResource";
236 
237     CM_CHK_HRESULT_GOTOFINISH_MOSERROR(osInterface->pfnAllocateResource(
238             osInterface,
239             &allocParams,
240             &state->veboxTimeStampResource.osResource));
241 
242     // Lock the Resource
243     MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
244 
245     lockFlags.ReadOnly = 1;
246     lockFlags.ForceCached = true;
247 
248     state->veboxTimeStampResource.data = (uint8_t*)osInterface->pfnLockResource(
249             osInterface,
250             &state->veboxTimeStampResource.osResource,
251             &lockFlags);
252 
253     CM_CHK_NULL_GOTOFINISH_MOSERROR(state->veboxTimeStampResource.data);
254 
255     state->veboxTimeStampResource.locked  = true;
256 
257 finish:
258     return eStatus;
259 }
260 
261 //! \brief    Allocate tracker resource
262 //! \param    [in] state
263 //!           Pointer to CM_HAL_STATE structure
264 //! \return   MOS_STATUS
HalCm_AllocateTrackerResource(PCM_HAL_STATE state)265 MOS_STATUS HalCm_AllocateTrackerResource(
266     PCM_HAL_STATE           state)
267 {
268     MOS_STATUS                          eStatus = MOS_STATUS_SUCCESS;
269     MOS_ALLOC_GFXRES_PARAMS             allocParamsLinearBuffer;
270     MOS_LOCK_PARAMS                     lockFlags;
271     PMOS_INTERFACE                      osInterface;
272     PRENDERHAL_INTERFACE                renderHal;
273 
274     osInterface = state->osInterface;
275     renderHal   = state->renderHal;
276 
277     CM_CHK_NULL_GOTOFINISH_MOSERROR(osInterface);
278     // Tracker producer for RENDER engine
279     renderHal->trackerProducer.Initialize(osInterface);
280 
281     // Tracker resource for VeBox engine
282     osInterface->pfnResetResource(&renderHal->veBoxTrackerRes.osResource);
283 
284     MOS_ZeroMemory(&allocParamsLinearBuffer, sizeof(MOS_ALLOC_GFXRES_PARAMS));
285     allocParamsLinearBuffer.Type = MOS_GFXRES_BUFFER;
286     allocParamsLinearBuffer.TileType = MOS_TILE_LINEAR;
287     allocParamsLinearBuffer.Format = Format_Buffer;
288     allocParamsLinearBuffer.dwBytes = MHW_CACHELINE_SIZE;
289     allocParamsLinearBuffer.pBufName = "VeboxTrackerRes";
290 
291     CM_CHK_HRESULT_GOTOFINISH_MOSERROR(osInterface->pfnAllocateResource(
292         osInterface,
293         &allocParamsLinearBuffer,
294         &renderHal->veBoxTrackerRes.osResource));
295 
296     // Lock the Resource
297     MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
298 
299     lockFlags.ReadOnly = 1;
300     lockFlags.ForceCached = true;
301 
302     renderHal->veBoxTrackerRes.data = (uint32_t*)osInterface->pfnLockResource(
303         osInterface,
304         &renderHal->veBoxTrackerRes.osResource,
305         &lockFlags);
306 
307     CM_CHK_NULL_GOTOFINISH_MOSERROR(renderHal->veBoxTrackerRes.data);
308 
309     *(renderHal->veBoxTrackerRes.data) = MemoryBlock::m_invalidTrackerId;
310 
311     renderHal->veBoxTrackerRes.currentTrackerId = 1;
312 
313     renderHal->veBoxTrackerRes.locked = true;
314 
315 finish:
316     return eStatus;
317 }
318 
319 //! \brief    Initialize dynamic state heap
320 //! \param    [in] state
321 //!           Pointer to CM_HAL_STATE structure
322 //! \param    [in] heapParam
323 //!           Pointer to CM_HAL_HEAP_PARAM structure
324 //! \return   MOS_STATUS
HalCm_InitializeDynamicStateHeaps(PCM_HAL_STATE state,CM_HAL_HEAP_PARAM * heapParam)325 MOS_STATUS HalCm_InitializeDynamicStateHeaps(
326     PCM_HAL_STATE           state,
327     CM_HAL_HEAP_PARAM       *heapParam)
328 {
329     MOS_STATUS   eStatus        = MOS_STATUS_SUCCESS;
330     HeapManager* dgsHeap   = state->renderHal->dgsheapManager;
331 
332     CM_CHK_NULL_GOTOFINISH_MOSERROR(heapParam);
333 
334     dgsHeap = MOS_New(HeapManager);
335     CM_CHK_NULL_GOTOFINISH_MOSERROR(dgsHeap);
336     CM_CHK_MOSSTATUS_GOTOFINISH(dgsHeap->RegisterOsInterface(state->osInterface));
337 
338     dgsHeap->SetDefaultBehavior(heapParam->behaviorGSH);
339     CM_CHK_MOSSTATUS_GOTOFINISH(dgsHeap->SetInitialHeapSize(heapParam->initialSizeGSH));
340     CM_CHK_MOSSTATUS_GOTOFINISH(dgsHeap->SetExtendHeapSize(heapParam->extendSizeGSH));
341     CM_CHK_MOSSTATUS_GOTOFINISH(dgsHeap->RegisterTrackerProducer(heapParam->trackerProducer));
342     // lock the heap in the beginning, so cpu doesn't need to wait gpu finishing occupying it to lock it again
343     CM_CHK_MOSSTATUS_GOTOFINISH(dgsHeap->LockHeapsOnAllocate());
344 
345     state->renderHal->dgsheapManager = dgsHeap;
346 
347 finish:
348     return eStatus;
349 }
350 
351 
352 //*-----------------------------------------------------------------------------
353 //| Purpose:    Free Timestamp Resource
354 //| Returns:    Result of the operation
355 //*-----------------------------------------------------------------------------
HalCm_FreeTsResource(PCM_HAL_STATE state)356 __inline void HalCm_FreeTsResource(
357     PCM_HAL_STATE state)                                                       // [in] Pointer to CM HAL State
358 {
359     PMOS_INTERFACE        osInterface;
360     MOS_STATUS            hr;
361     MOS_GFXRES_FREE_FLAGS resFreeFlags = {0};
362 
363     resFreeFlags.AssumeNotInUse = 1;
364     osInterface = state->osInterface;
365 
366     if (!Mos_ResourceIsNull(&state->renderTimeStampResource.osResource))
367     {
368         if (state->renderTimeStampResource.locked)
369         {
370             hr = (MOS_STATUS)osInterface->pfnUnlockResource(
371                     osInterface,
372                     &state->renderTimeStampResource.osResource);
373 
374             CM_ASSERT(hr == MOS_STATUS_SUCCESS);
375         }
376 
377         osInterface->pfnFreeResourceWithFlag(
378             osInterface,
379             &state->renderTimeStampResource.osResource,
380             resFreeFlags.Value);
381     }
382 
383     //free vebox TS resource
384 
385     if (!Mos_ResourceIsNull(&state->veboxTimeStampResource.osResource))
386     {
387         if (state->veboxTimeStampResource.locked)
388         {
389             hr = (MOS_STATUS)osInterface->pfnUnlockResource(
390                 osInterface,
391                 &state->veboxTimeStampResource.osResource);
392 
393             CM_ASSERT(hr == MOS_STATUS_SUCCESS);
394         }
395 
396         osInterface->pfnFreeResourceWithFlag(
397             osInterface,
398             &state->veboxTimeStampResource.osResource,
399             resFreeFlags.Value);
400     }
401 }
402 
403 //! \brief    Free tracker resource
404 //! \param    PCM_HAL_STATE state
405 //!           [in] Pointer to CM_HAL_STATE structure
406 //! \return   void
HalCm_FreeTrackerResources(PCM_HAL_STATE state)407 __inline void HalCm_FreeTrackerResources(
408     PCM_HAL_STATE state)                                                       // [in] Pointer to CM HAL State
409 {
410     PMOS_INTERFACE        osInterface;
411     MOS_STATUS            hr;
412     MOS_GFXRES_FREE_FLAGS resFreeFlags = {0};
413 
414     resFreeFlags.AssumeNotInUse = 1;
415     osInterface = state->osInterface;
416 
417     if (!Mos_ResourceIsNull(&state->renderHal->veBoxTrackerRes.osResource))
418     {
419         if (state->renderHal->veBoxTrackerRes.locked)
420         {
421             hr = (MOS_STATUS)osInterface->pfnUnlockResource(
422                 osInterface,
423                 &state->renderHal->veBoxTrackerRes.osResource);
424 
425             CM_ASSERT(hr == MOS_STATUS_SUCCESS);
426         }
427 
428         osInterface->pfnFreeResourceWithFlag(
429             osInterface,
430             &state->renderHal->veBoxTrackerRes.osResource,
431             resFreeFlags.Value);
432     }
433 }
434 
435 //*-----------------------------------------------------------------------------
436 //| Purpose:    Allocate CSR Resource
437 //| Returns:    Result of the operation
438 //*-----------------------------------------------------------------------------
HalCm_AllocateCSRResource(PCM_HAL_STATE state)439 MOS_STATUS HalCm_AllocateCSRResource(
440     PCM_HAL_STATE state)       // [in] Pointer to CM HAL State
441 {
442     PMOS_INTERFACE          osInterface = state->osInterface;
443     MOS_STATUS              eStatus = MOS_STATUS_SUCCESS;
444     uint32_t                size;
445     MOS_ALLOC_GFXRES_PARAMS allocParams;
446 
447     //Enable Mid-thread
448     state->renderHal->pfnEnableGpgpuMiddleThreadPreemption(state->renderHal);
449 
450     size = CM_CSR_SURFACE_SIZE;
451 
452     MOS_ZeroMemory(&allocParams, sizeof(MOS_ALLOC_GFXRES_PARAMS));
453     allocParams.Type = MOS_GFXRES_BUFFER;
454     allocParams.dwBytes = size;
455     allocParams.Format = Format_RAW;  //used in VpHal_OsAllocateResource_Linux
456     allocParams.TileType = MOS_TILE_LINEAR;
457     allocParams.pBufName = "CSRResource";
458 
459     CM_CHK_HRESULT_GOTOFINISH_MOSERROR(osInterface->pfnAllocateResource(
460         osInterface,
461         &allocParams,
462         &state->csrResource));
463 
464     osInterface->pfnSkipResourceSync(&state->csrResource);
465 
466 finish:
467     return eStatus;
468 }
469 
470 //*-----------------------------------------------------------------------------
471 //| Purpose:    Allocate Sip Resource
472 //| Returns:    Result of the operation
473 //*-----------------------------------------------------------------------------
HalCm_AllocateSipResource(PCM_HAL_STATE state)474 MOS_STATUS HalCm_AllocateSipResource(
475     PCM_HAL_STATE state)       // [in] Pointer to CM HAL State
476 {
477     PMOS_INTERFACE          osInterface = state->osInterface;
478     MOS_STATUS              eStatus = MOS_STATUS_SUCCESS;
479     uint32_t                size;
480     MOS_ALLOC_GFXRES_PARAMS allocParams;
481     MOS_LOCK_PARAMS         lockFlags;
482 
483     size = CM_DEBUG_SURFACE_SIZE;
484 
485     MOS_ZeroMemory(&allocParams, sizeof(MOS_ALLOC_GFXRES_PARAMS));
486     allocParams.Type = MOS_GFXRES_BUFFER;
487     allocParams.dwBytes = size;
488     allocParams.Format = Format_Buffer;  //used in RenderHal_OsAllocateResource_Linux
489     allocParams.TileType = MOS_TILE_LINEAR;
490     allocParams.pBufName = "SipResource";
491 
492     CM_CHK_HRESULT_GOTOFINISH_MOSERROR(osInterface->pfnAllocateResource(
493         osInterface,
494         &allocParams,
495         &state->sipResource.osResource));
496 
497     // Lock the Resource
498     MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
499 
500     lockFlags.ReadOnly = 1;
501     lockFlags.ForceCached = true;
502 
503     state->sipResource.data = (uint8_t*)osInterface->pfnLockResource(
504         osInterface,
505         &state->sipResource.osResource,
506         &lockFlags);
507     CM_CHK_NULL_GOTOFINISH_MOSERROR(state->sipResource.data);
508 
509     state->sipResource.locked = true;
510 
511 finish:
512     return eStatus;
513 }
514 
515 //*-----------------------------------------------------------------------------
516 //| Purpose:    Free CSR Resource
517 //| Returns:    Result of the operation
518 //*-----------------------------------------------------------------------------
HalCm_FreeCsrResource(PCM_HAL_STATE state)519 __inline void HalCm_FreeCsrResource(
520     PCM_HAL_STATE state)   // [in] Pointer to CM HAL State
521 {
522     PMOS_INTERFACE        osInterface  = state->osInterface;
523     MOS_GFXRES_FREE_FLAGS resFreeFlags = {0};
524 
525     resFreeFlags.AssumeNotInUse = 1;
526 
527     if (!Mos_ResourceIsNull(&state->csrResource))
528     {
529         osInterface->pfnFreeResourceWithFlag(
530             osInterface,
531             &state->csrResource,
532             resFreeFlags.Value);
533     }
534 }
535 
536 //*-----------------------------------------------------------------------------
537 //| Purpose:    Free Sip Resource
538 //| Returns:    Result of the operation
539 //*-----------------------------------------------------------------------------
HalCm_FreeSipResource(PCM_HAL_STATE state)540 __inline void HalCm_FreeSipResource(
541     PCM_HAL_STATE state)   // [in] Pointer to CM HAL State
542 {
543     PMOS_INTERFACE        osInterface = state->osInterface;
544     MOS_STATUS            hr = MOS_STATUS_SUCCESS;
545     MOS_GFXRES_FREE_FLAGS resFreeFlags = {0};
546 
547     resFreeFlags.AssumeNotInUse = 1;
548 
549     if (!Mos_ResourceIsNull(&state->sipResource.osResource))
550     {
551         if (state->sipResource.locked)
552         {
553             hr = (MOS_STATUS)osInterface->pfnUnlockResource(
554                     osInterface,
555                     &state->sipResource.osResource);
556 
557             CM_ASSERT(hr == MOS_STATUS_SUCCESS);
558         }
559 
560         osInterface->pfnFreeResourceWithFlag(
561             osInterface,
562             &state->sipResource.osResource,
563             resFreeFlags.Value);
564     }
565 }
566 
567 //*-----------------------------------------------------------------------------
568 //| Purpose: Sets Arg data in the buffer
569 //| Returns: Result of the operation
570 //*-----------------------------------------------------------------------------
HalCm_SetArgData(PCM_HAL_KERNEL_ARG_PARAM argParam,uint32_t threadIndex,uint8_t * buffer)571 __inline void HalCm_SetArgData(
572     PCM_HAL_KERNEL_ARG_PARAM    argParam,
573     uint32_t                    threadIndex,
574     uint8_t                     *buffer)
575 {
576     uint8_t *dst;
577     uint8_t *src;
578 
579     dst = buffer + argParam->payloadOffset;
580     src = argParam->firstValue + (threadIndex * argParam->unitSize);
581 
582     MOS_SecureMemcpy(dst, argParam->unitSize, src, argParam->unitSize);
583 }
584 
585 //*-----------------------------------------------------------------------------
586 //| Purpose:    Get the Buffer Entry
587 //| Returns:    Result of the operation.
588 //*-----------------------------------------------------------------------------
HalCm_GetResourceUPEntry(PCM_HAL_STATE state,uint32_t handle,PCM_HAL_SURFACE2D_UP_ENTRY * entryOut)589 __inline MOS_STATUS HalCm_GetResourceUPEntry(
590     PCM_HAL_STATE           state,                                             // [in]  Pointer to CM State
591     uint32_t                handle,                                           // [in]  Handle
592     PCM_HAL_SURFACE2D_UP_ENTRY    *entryOut)                                         // [out] Buffer Entry
593 {
594     MOS_STATUS                    eStatus;
595     PCM_HAL_SURFACE2D_UP_ENTRY    entry;
596 
597     eStatus = MOS_STATUS_SUCCESS;
598 
599     if (handle >= state->cmDeviceParam.max2DSurfaceUPTableSize)
600     {
601         eStatus = MOS_STATUS_INVALID_HANDLE;
602         CM_ASSERTMESSAGE("Invalid handle '%d'", handle);
603         goto finish;
604     }
605 
606     entry = &state->surf2DUPTable[handle];
607     if (entry->width == 0)
608     {
609         eStatus = MOS_STATUS_INVALID_HANDLE;
610         CM_ASSERTMESSAGE("handle '%d' is not set", handle);
611         goto finish;
612     }
613 
614     *entryOut = entry;
615 
616 finish:
617     return eStatus;
618 }
619 
620 //*-----------------------------------------------------------------------------
621 //| Purpose:    Get the Buffer Entry
622 //| Returns:    Result of the operation.
623 //*-----------------------------------------------------------------------------
HalCm_GetBufferEntry(PCM_HAL_STATE state,uint32_t handle,PCM_HAL_BUFFER_ENTRY * entryOut)624 __inline MOS_STATUS HalCm_GetBufferEntry(
625     PCM_HAL_STATE           state,                                             // [in]  Pointer to CM State
626     uint32_t                handle,                                           // [in]  Handle
627     PCM_HAL_BUFFER_ENTRY    *entryOut)                                         // [out] Buffer Entry
628 {
629     MOS_STATUS              eStatus;
630     PCM_HAL_BUFFER_ENTRY    entry;
631 
632     eStatus = MOS_STATUS_SUCCESS;
633 
634     if (handle >= state->cmDeviceParam.maxBufferTableSize)
635     {
636         eStatus = MOS_STATUS_INVALID_HANDLE;
637         CM_ASSERTMESSAGE("Invalid handle '%d'", handle);
638         goto finish;
639     }
640 
641     entry = &state->bufferTable[handle];
642     if (entry->size == 0)
643     {
644         eStatus = MOS_STATUS_INVALID_HANDLE;
645         CM_ASSERTMESSAGE("handle '%d' is not set", handle);
646         goto finish;
647     }
648 
649     *entryOut = entry;
650 
651 finish:
652     return eStatus;
653 }
654 
655 //*-----------------------------------------------------------------------------
656 //| Purpose:    Get the Surface2D Entry
657 //| Returns:    Result of the operation.
658 //*-----------------------------------------------------------------------------
HalCm_GetSurface2DEntry(PCM_HAL_STATE state,uint32_t handle,PCM_HAL_SURFACE2D_ENTRY * entryOut)659 __inline MOS_STATUS HalCm_GetSurface2DEntry(
660     PCM_HAL_STATE           state,                                             // [in]  Pointer to CM State
661     uint32_t                handle,                                           // [in]  Handle
662     PCM_HAL_SURFACE2D_ENTRY    *entryOut)                                         // [out] Buffer Entry
663 {
664     MOS_STATUS                 eStatus;
665     PCM_HAL_SURFACE2D_ENTRY    entry;
666 
667     eStatus = MOS_STATUS_SUCCESS;
668 
669     if (handle >= state->cmDeviceParam.max2DSurfaceTableSize)
670     {
671         eStatus = MOS_STATUS_INVALID_HANDLE;
672         CM_ASSERTMESSAGE("Invalid handle '%d'", handle);
673         goto finish;
674     }
675 
676     entry = &state->umdSurf2DTable[handle];
677     if ((entry->width == 0)||(entry->height == 0))
678     {
679         eStatus = MOS_STATUS_INVALID_HANDLE;
680         CM_ASSERTMESSAGE("handle '%d' is not set", handle);
681         goto finish;
682     }
683 
684     *entryOut = entry;
685 
686 finish:
687     return eStatus;
688 }
689 
690 //*-----------------------------------------------------------------------------
691 //| Purpose:    Get the 3D Entry
692 //| Returns:    Result of the operation.
693 //*-----------------------------------------------------------------------------
HalCm_Get3DResourceEntry(PCM_HAL_STATE state,uint32_t handle,PCM_HAL_3DRESOURCE_ENTRY * entryOut)694 __inline MOS_STATUS HalCm_Get3DResourceEntry(
695     PCM_HAL_STATE               state,                                         // [in]  Pointer to CM State
696     uint32_t                    handle,                                       // [in]  Handle
697     PCM_HAL_3DRESOURCE_ENTRY    *entryOut)                                     // [out] Buffer Entry
698 {
699     MOS_STATUS                  eStatus;
700     PCM_HAL_3DRESOURCE_ENTRY    entry;
701 
702     eStatus = MOS_STATUS_SUCCESS;
703 
704     if (handle >= state->cmDeviceParam.max3DSurfaceTableSize)
705     {
706         eStatus = MOS_STATUS_INVALID_HANDLE;
707         CM_ASSERTMESSAGE("Invalid handle '%d'", handle);
708         goto finish;
709     }
710 
711     entry = &state->surf3DTable[handle];
712     if (Mos_ResourceIsNull(&entry->osResource))
713     {
714         eStatus = MOS_STATUS_INVALID_HANDLE;
715         CM_ASSERTMESSAGE("3D handle '%d' is not set", handle);
716         goto finish;
717     }
718 
719     *entryOut = entry;
720 
721 finish:
722     return eStatus;
723 }
724 
725 //*-----------------------------------------------------------------------------
726 //| Purpose:    Allocates and sets up Task Param memory structure
727 //| Return:     true if enabled
728 //| Note:       A single layer of memory is allocated to avoid fragmentation
729 //*-----------------------------------------------------------------------------
HalCm_AllocateTables(PCM_HAL_STATE state)730 MOS_STATUS HalCm_AllocateTables(
731     PCM_HAL_STATE   state)         // [in] Pointer to HAL CM state
732 {
733     MOS_STATUS              eStatus = MOS_STATUS_SUCCESS;
734     PCM_HAL_DEVICE_PARAM    deviceParam;
735     uint8_t                 *pb;
736 
737     deviceParam  = &state->cmDeviceParam;
738 
739     uint32_t lookUpTableSize = deviceParam->max2DSurfaceTableSize    *
740                               sizeof(CMLOOKUP_ENTRY);
741     uint32_t i2DSURFTableSize = deviceParam->max2DSurfaceTableSize    *
742                             sizeof(CM_HAL_SURFACE2D_ENTRY);
743     uint32_t bufferTableSize = deviceParam->maxBufferTableSize       *
744                               sizeof(CM_HAL_BUFFER_ENTRY);
745     uint32_t i2DSURFUPTableSize = deviceParam->max2DSurfaceUPTableSize  *
746                               sizeof(CM_HAL_SURFACE2D_UP_ENTRY);
747     uint32_t i3DSurfTableSize = deviceParam->max3DSurfaceTableSize    *
748                               sizeof(CM_HAL_3DRESOURCE_ENTRY);
749     uint32_t samplerTableSize = deviceParam->maxSamplerTableSize      *
750                               sizeof(MHW_SAMPLER_STATE_PARAM);
751     uint32_t sampler8x8TableSize = deviceParam->maxSampler8x8TableSize      *
752                               sizeof(CM_HAL_SAMPLER_8X8_ENTRY);
753     uint32_t taskStatusTableSize = deviceParam->maxTasks                 * sizeof(char);
754     uint32_t bt2DIndexTableSize = deviceParam->max2DSurfaceTableSize    * sizeof(CM_HAL_MULTI_USE_BTI_ENTRY);
755     uint32_t bt2DUPIndexTableSize = deviceParam->max2DSurfaceUPTableSize  * sizeof(CM_HAL_MULTI_USE_BTI_ENTRY);
756     uint32_t bt3DIndexTableSize = deviceParam->max3DSurfaceTableSize    * sizeof(CM_HAL_MULTI_USE_BTI_ENTRY);
757     uint32_t btbufferIndexTableSize = deviceParam->maxBufferTableSize       * sizeof(CM_HAL_MULTI_USE_BTI_ENTRY);
758     uint32_t samplerIndexTableSize = deviceParam->maxSamplerTableSize      * sizeof(char);
759     uint32_t sampler8x8IndexTableSize = deviceParam->maxSampler8x8TableSize      * sizeof(char);
760 
761     uint32_t size           = lookUpTableSize          +
762                               i2DSURFTableSize         +
763                               bufferTableSize          +
764                               i2DSURFUPTableSize       +
765                               i3DSurfTableSize         +
766                               samplerTableSize         +
767                               sampler8x8TableSize      +
768                               taskStatusTableSize      +
769                               bt2DIndexTableSize       +
770                               bt2DUPIndexTableSize     +
771                               bt3DIndexTableSize       +
772                               btbufferIndexTableSize   +
773                               samplerIndexTableSize    +
774                               sampler8x8IndexTableSize;
775 
776     state->tableMemories = MOS_AllocAndZeroMemory(size);
777     CM_CHK_NULL_GOTOFINISH_MOSERROR(state->tableMemories);
778     pb                          = (uint8_t*)state->tableMemories;
779 
780     state->surf2DTable        = (PCMLOOKUP_ENTRY)pb;
781     pb                          += lookUpTableSize;
782 
783     state->umdSurf2DTable     = (PCM_HAL_SURFACE2D_ENTRY)pb;
784     pb                          += i2DSURFTableSize;
785 
786     state->bufferTable        = (PCM_HAL_BUFFER_ENTRY)pb;
787     pb                          += bufferTableSize;
788 
789     state->surf2DUPTable      = (PCM_HAL_SURFACE2D_UP_ENTRY)pb;
790     pb                          += i2DSURFUPTableSize;
791 
792     state->surf3DTable        = (PCM_HAL_3DRESOURCE_ENTRY)pb;
793     pb                          += i3DSurfTableSize;
794 
795     state->samplerTable       = (PMHW_SAMPLER_STATE_PARAM)pb;
796     pb                          += samplerTableSize;
797 
798     state->sampler8x8Table    = (PCM_HAL_SAMPLER_8X8_ENTRY)pb;
799     pb                          += sampler8x8TableSize;
800 
801     state->taskStatusTable    = (char *)pb;
802     pb                          += taskStatusTableSize;
803 
804     state->bti2DIndexTable     = (PCM_HAL_MULTI_USE_BTI_ENTRY)pb;
805     pb                          += bt2DIndexTableSize;
806 
807     state->bti2DUPIndexTable   = (PCM_HAL_MULTI_USE_BTI_ENTRY)pb;
808     pb                          += bt2DUPIndexTableSize;
809 
810     state->bti3DIndexTable     = (PCM_HAL_MULTI_USE_BTI_ENTRY)pb;
811     pb                          += bt3DIndexTableSize;
812 
813     state->btiBufferIndexTable = (PCM_HAL_MULTI_USE_BTI_ENTRY)pb;
814     pb                          += btbufferIndexTableSize;
815 
816     state->samplerIndexTable  = (char *)pb;
817     pb                          += samplerIndexTableSize;
818 
819     state->sampler8x8IndexTable  = (char *)pb;
820     pb                          += sampler8x8IndexTableSize;
821 
822 finish:
823     return MOS_STATUS_SUCCESS;
824 }
825 
826 //*-----------------------------------------------------------------------------
827 //| Purpose:    Adds a tag to distinguish between same kernel ID
828 //|             Used for batch buffer re-use when splitting large task into
829 //|             smaller pieces for EnqueueWithHints
830 //|             Using bits [48:42] from kernel ID for extra tag
831 //| Returns:    Result of the operation
832 //*-----------------------------------------------------------------------------
HalCm_AddKernelIDTag(PCM_HAL_KERNEL_PARAM * pKernels,uint32_t numKernels,uint32_t numTasks,uint32_t numCurrentTask)833 MOS_STATUS HalCm_AddKernelIDTag(
834     PCM_HAL_KERNEL_PARAM     *pKernels,
835     uint32_t                 numKernels,
836     uint32_t                 numTasks,
837     uint32_t                 numCurrentTask)
838 {
839     uint32_t i;
840     uint64_t tmpNumTasks;
841     uint64_t tmpNumCurrentTask;
842     uint64_t tmpNumTasksMask;
843     uint64_t tmpNumCurrentTaskMask;
844 
845     tmpNumTasks = numTasks;
846     tmpNumCurrentTask = numCurrentTask;
847     tmpNumTasksMask = tmpNumTasks << 45;
848     tmpNumCurrentTaskMask = tmpNumCurrentTask << 42;
849 
850     for( i = 0; i < numKernels; ++i )
851     {
852         pKernels[i]->kernelId |= tmpNumTasksMask;
853         pKernels[i]->kernelId |= tmpNumCurrentTaskMask;
854     }
855 
856     return MOS_STATUS_SUCCESS;
857 }
858 
859 //*-----------------------------------------------------------------------------
860 //| Purpose:    Gets the Batch Buffer for rendering. If needed, de-allocate /
861 //|             allocate the memory for BB
862 //| Returns:    Result of the operation
863 //*-----------------------------------------------------------------------------
HalCm_GetBatchBuffer(PCM_HAL_STATE state,uint32_t numKernels,PCM_HAL_KERNEL_PARAM * kernels,PMHW_BATCH_BUFFER * batchBufferOut)864 MOS_STATUS HalCm_GetBatchBuffer(
865     PCM_HAL_STATE           state,                                             // [in]  Pointer to CM State
866     uint32_t                numKernels,                                        // [in]  Number of Kernels
867     PCM_HAL_KERNEL_PARAM    *kernels,                                          // [in]  Array for kernel data
868     PMHW_BATCH_BUFFER       *batchBufferOut)                                   // [out] Batch Buffer Out
869 {
870     MOS_STATUS              eStatus;
871     PMHW_BATCH_BUFFER batchBuffer = nullptr;
872     PRENDERHAL_INTERFACE    renderHal;
873     int32_t                 size;
874     uint32_t                i;
875     uint32_t                j;
876     uint32_t                k;
877     int32_t                 freeIdx;
878     uint64_t                kernelIds[CM_MAX_KERNELS_PER_TASK];
879     uint64_t                kernelParamsIds[CM_MAX_KERNELS_PER_TASK];
880     CM_HAL_BB_DIRTY_STATUS  bbDirtyStatus;
881     PCM_HAL_BB_ARGS       bbcmArgs;
882 
883     eStatus        = MOS_STATUS_SUCCESS;
884     renderHal      = state->renderHal;
885     freeIdx        = CM_INVALID_INDEX;
886     bbDirtyStatus   = CM_HAL_BB_CLEAN;
887 
888     // Align the Batch Buffer size to power of 2
889     size = HalCm_GetPow2Aligned(state->taskParam->batchBufferSize);
890 
891     MOS_ZeroMemory(&kernelIds, CM_MAX_KERNELS_PER_TASK * sizeof(uint64_t));
892     MOS_ZeroMemory(&kernelParamsIds, CM_MAX_KERNELS_PER_TASK * sizeof(uint64_t));
893 
894     //Sanity check for batch buffer
895     if (size > CM_MAX_BB_SIZE)
896     {
897         eStatus = MOS_STATUS_EXCEED_MAX_BB_SIZE;
898         CM_ASSERTMESSAGE("Batch Buffer Size exeeceds Max '%d'", size);
899         goto finish;
900     }
901 
902     for( i = 0; i < numKernels; ++i )
903     {
904         // remove upper 16 bits used for kernel binary re-use in GSH
905         kernelParamsIds[i] = ((kernels[i])->kernelId << 16 ) >> 16;
906     }
907 
908 #if CM_BATCH_BUFFER_REUSE_ENABLE
909 
910     bbDirtyStatus = CM_HAL_BB_CLEAN;
911     for (k = 0; k < numKernels; ++k)
912     {
913         if (kernels[k]->kernelThreadSpaceParam.bbDirtyStatus == CM_HAL_BB_DIRTY)
914         {
915             bbDirtyStatus = CM_HAL_BB_DIRTY;
916             break;
917         }
918     }
919 
920     for (i = 0; i < (uint32_t)state->numBatchBuffers; i++)
921     {
922         batchBuffer = &state->batchBuffers[i];
923         CM_CHK_NULL_GOTOFINISH_MOSERROR(batchBuffer);
924         CM_CHK_NULL_GOTOFINISH_MOSERROR(batchBuffer->pPrivateData);
925 
926         //if (!Mos_ResourceIsNull(&batchBuffer->OsResource) && (!batchBuffer->bBusy))
927         if (!Mos_ResourceIsNull(&batchBuffer->OsResource))
928         {
929             MOS_FillMemory(kernelIds, sizeof(uint64_t)*CM_MAX_KERNELS_PER_TASK, 0);
930             for (j = 0; j < numKernels; j ++)
931             {
932                 kernelIds[j] = kernelParamsIds[j];
933             }
934 
935             bbcmArgs = (PCM_HAL_BB_ARGS)batchBuffer->pPrivateData;
936             if (RtlEqualMemory(kernelIds, bbcmArgs->kernelIds, sizeof(uint64_t)*CM_MAX_KERNELS_PER_TASK))
937             {
938                 if( batchBuffer->bBusy && bbDirtyStatus == CM_HAL_BB_DIRTY )
939                 {
940                     bbcmArgs->latest = false;
941                 }
942                 else if( bbcmArgs->latest == true )
943                 {
944                     break;
945                 }
946             }
947         }
948     }
949     if (i < (uint32_t)state->numBatchBuffers)
950     {
951         CM_CHK_NULL_GOTOFINISH_MOSERROR(batchBuffer);
952         CM_CHK_NULL_GOTOFINISH_MOSERROR(batchBuffer->pPrivateData);
953         bbcmArgs = (PCM_HAL_BB_ARGS)batchBuffer->pPrivateData;
954 
955         bbcmArgs->refCount ++;
956         batchBuffer->iCurrent   = 0;
957         batchBuffer->dwSyncTag  = 0;
958         batchBuffer->iRemaining = batchBuffer->iSize;
959         *batchBufferOut   = batchBuffer;
960         eStatus      = MOS_STATUS_SUCCESS;
961         goto finish;
962     }
963 #endif
964 
965     for (i = 0; i < (uint32_t)state->numBatchBuffers; i++)
966     {
967         batchBuffer = &state->batchBuffers[i];
968         CM_CHK_NULL_GOTOFINISH_MOSERROR(batchBuffer);
969         // No holes in the array of batch buffers
970         if (Mos_ResourceIsNull(&batchBuffer->OsResource))
971         {
972             freeIdx = i;
973             break;
974         }
975     }
976     if (freeIdx == CM_INVALID_INDEX)
977     {
978         for (i = 0; i < (uint32_t)state->numBatchBuffers; i++)
979         {
980             batchBuffer = &state->batchBuffers[i];
981             CM_CHK_NULL_GOTOFINISH_MOSERROR(batchBuffer);
982             CM_CHK_NULL_GOTOFINISH_MOSERROR(batchBuffer->pPrivateData);
983             bbcmArgs = (PCM_HAL_BB_ARGS)batchBuffer->pPrivateData;
984             if (!batchBuffer->bBusy)
985             {
986                 if (batchBuffer->iSize >= size)
987                 {
988                     batchBuffer->iCurrent   = 0;
989                     batchBuffer->iRemaining = batchBuffer->iSize;
990                     batchBuffer->dwSyncTag  = 0;
991 
992                     bbcmArgs->refCount = 1;
993                     for (i = 0; i <numKernels; i ++)
994                     {
995                         bbcmArgs->kernelIds[i] = kernelParamsIds[i];
996                     }
997 
998                     bbcmArgs->latest = true;
999 
1000                     *batchBufferOut   = batchBuffer;
1001                     eStatus = MOS_STATUS_SUCCESS;
1002                     goto finish;
1003                 }
1004 
1005                 if (freeIdx == CM_INVALID_INDEX)
1006                 {
1007                     freeIdx = i;
1008                 }
1009             }
1010         }
1011     }
1012     if (freeIdx == CM_INVALID_INDEX)
1013     {
1014         eStatus = MOS_STATUS_INVALID_PARAMETER;
1015         CM_ASSERTMESSAGE("No batch buffer available");
1016         goto finish;
1017     }
1018 
1019     batchBuffer = &state->batchBuffers[freeIdx];
1020     CM_CHK_NULL_GOTOFINISH_MOSERROR(batchBuffer);
1021     CM_CHK_NULL_GOTOFINISH_MOSERROR(batchBuffer->pPrivateData);
1022     bbcmArgs = (PCM_HAL_BB_ARGS)batchBuffer->pPrivateData;
1023     bbcmArgs->refCount = 1;
1024     for (i = 0; i <numKernels; i ++)
1025     {
1026         bbcmArgs->kernelIds[i] =  kernelParamsIds[i];
1027     }
1028 
1029     bbcmArgs->latest = true;
1030 
1031     if (!Mos_ResourceIsNull(&batchBuffer->OsResource))
1032     {
1033         // Deallocate Batch Buffer
1034         CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pfnFreeBB(renderHal, batchBuffer));
1035     }
1036 
1037     // Allocate Batch Buffer
1038     CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pfnAllocateBB(renderHal, batchBuffer, size));
1039     *batchBufferOut = batchBuffer;
1040 
1041 finish:
1042     return eStatus;
1043 }
1044 
1045 //*-----------------------------------------------------------------------------
1046 //| Purpose:    Parse the Kernel and populate the Task Param structure
1047 //| Return:     Result of the operation
1048 //*-----------------------------------------------------------------------------
HalCm_ParseTask(PCM_HAL_STATE state,PCM_HAL_EXEC_TASK_PARAM execParam)1049 MOS_STATUS HalCm_ParseTask(
1050     PCM_HAL_STATE               state,                                         // [in] Pointer to HAL CM state
1051     PCM_HAL_EXEC_TASK_PARAM     execParam)                                     // [in] Pointer to Exec Task Param
1052 {
1053     MOS_STATUS                  eStatus;
1054     PCM_HAL_TASK_PARAM          taskParam;
1055     PCM_HAL_KERNEL_PARAM        kernelParam;
1056     uint32_t                    hdrSize;
1057     uint32_t                    totalThreads;
1058     uint32_t                    krn;
1059     uint32_t                    curbeOffset;
1060     PMHW_VFE_SCOREBOARD         scoreboardParams;
1061     uint32_t                    hasThreadArg;
1062     bool                        nonstallingScoreboardEnable;
1063     CM_HAL_DEPENDENCY           vfeDependencyInfo;
1064     PCM_HAL_KERNEL_THREADSPACE_PARAM kernelTSParam;
1065     uint32_t                    i, j, k;
1066     uint8_t                     reuseBBUpdateMask;
1067     bool                        bitIsSet;
1068     PCM_HAL_MASK_AND_RESET      dependencyMask;
1069     uint32_t                    uSurfaceNumber;
1070     uint32_t                    uSurfaceIndex;
1071     bool                        threadArgExists;
1072 
1073     eStatus                           = MOS_STATUS_SUCCESS;
1074     curbeOffset                      = 0;
1075     totalThreads                      = 0;
1076     taskParam                         = state->taskParam;
1077     taskParam->batchBufferSize        = 0;
1078     hasThreadArg                       = 0;
1079     nonstallingScoreboardEnable       = true;
1080     reuseBBUpdateMask                  = 0;
1081     bitIsSet                           = false;
1082     threadArgExists                    = false;
1083     hdrSize = state->renderHal->pHwSizes->dwSizeMediaObjectHeaderCmd;
1084     taskParam->dependencyPattern      = execParam->dependencyPattern;
1085     taskParam->threadSpaceWidth       = execParam->threadSpaceWidth;
1086     taskParam->threadSpaceHeight      = execParam->threadSpaceHeight;
1087     taskParam->walkingPattern         = execParam->walkingPattern;
1088     taskParam->walkingParamsValid     = execParam->walkingParamsValid;
1089     taskParam->dependencyVectorsValid = execParam->dependencyVectorsValid;
1090     if( taskParam->walkingParamsValid )
1091     {
1092         taskParam->walkingParams = execParam->walkingParams;
1093     }
1094     if( taskParam->dependencyVectorsValid )
1095     {
1096         taskParam->dependencyVectors = execParam->dependencyVectors;
1097     }
1098     taskParam->kernelDebugEnabled  = (uint32_t)execParam->kernelDebugEnabled;
1099     //GT-PIN
1100     taskParam->surfEntryInfoArrays  = execParam->surfEntryInfoArrays;
1101 
1102     taskParam->surfacePerBT = 0;
1103 
1104     taskParam->colorCountMinusOne  = execParam->colorCountMinusOne;
1105     taskParam->mediaWalkerGroupSelect = execParam->mediaWalkerGroupSelect;
1106 
1107     if (execParam->threadCoordinates)
1108     {
1109         taskParam->threadCoordinates = execParam->threadCoordinates;
1110     }
1111 
1112     taskParam->dependencyMasks = execParam->dependencyMasks;
1113     taskParam->syncBitmap = execParam->syncBitmap;
1114     taskParam->conditionalEndBitmap = execParam->conditionalEndBitmap;
1115     MOS_SecureMemcpy(taskParam->conditionalEndInfo, sizeof(taskParam->conditionalEndInfo), execParam->conditionalEndInfo, sizeof(execParam->conditionalEndInfo));
1116 
1117     taskParam->numKernels = execParam->numKernels;
1118     taskParam->taskConfig   = execParam->taskConfig;
1119     state->walkerParams.CmWalkerEnable = true;
1120     state->renderHal->IsMDFLoad = (taskParam->taskConfig.turboBoostFlag == CM_TURBO_BOOST_ENABLE);
1121 
1122     for (krn = 0; krn < execParam->numKernels; krn++)
1123     {
1124         if ((execParam->kernels[krn] == nullptr) ||
1125             (execParam->kernelSizes[krn] == 0))
1126         {
1127             eStatus = MOS_STATUS_INVALID_PARAMETER;
1128             CM_ASSERTMESSAGE("Invalid Kernel data");
1129             goto finish;
1130         }
1131 
1132         kernelParam    = (PCM_HAL_KERNEL_PARAM)execParam->kernels[krn];
1133         PCM_INDIRECT_SURFACE_INFO       indirectSurfaceInfo = kernelParam->indirectDataParam.surfaceInfo;
1134         uSurfaceNumber = 0;
1135         if (kernelParam->indirectDataParam.surfaceCount)
1136         {
1137             uSurfaceIndex = 0;
1138             for (i = 0; i < kernelParam->indirectDataParam.surfaceCount; i++)
1139             {
1140                 uSurfaceIndex = (indirectSurfaceInfo + i)->bindingTableIndex > uSurfaceIndex ? ((indirectSurfaceInfo + i)->bindingTableIndex + (indirectSurfaceInfo + i)->numBTIPerSurf - 1) : uSurfaceIndex;
1141                 uSurfaceNumber = uSurfaceNumber + (indirectSurfaceInfo + i)->numBTIPerSurf;
1142             }
1143             taskParam->surfacePerBT = taskParam->surfacePerBT > uSurfaceIndex ? taskParam->surfacePerBT : uSurfaceIndex;
1144         }
1145 
1146         uSurfaceNumber += kernelParam->numSurfaces;
1147         taskParam->surfacePerBT = taskParam->surfacePerBT < uSurfaceNumber ?
1148                                             uSurfaceNumber : taskParam->surfacePerBT;
1149 
1150         // 26Z must be media object because by default it uses thread dependency mask
1151         // if there is no thread payload and dependency is not WAVEFRONT26Z, check if walker can be used
1152         if ( kernelParam->payloadSize == 0)
1153         {
1154             //per-kernel thread space is avaiable, and check it at first
1155             if((kernelParam->kernelThreadSpaceParam.threadSpaceWidth != 0) &&
1156                 (kernelParam->kernelThreadSpaceParam.patternType != CM_WAVEFRONT26Z) &&
1157                 (kernelParam->kernelThreadSpaceParam.patternType != CM_WAVEFRONT26ZI) &&
1158                 (kernelParam->kernelThreadSpaceParam.threadCoordinates == nullptr))
1159             {
1160                 kernelParam->walkerParams.cmWalkerEnable = true;
1161                 kernelParam->walkerParams.groupIdLoopSelect = execParam->mediaWalkerGroupSelect;
1162             }
1163             else if (kernelParam->kernelThreadSpaceParam.threadSpaceWidth == 0)
1164             {
1165                 //Check per-task thread space setting
1166                 if (state->taskParam->threadCoordinates)
1167                 {
1168                      if (state->taskParam->threadCoordinates[krn] == nullptr)
1169                      {
1170                         kernelParam->walkerParams.cmWalkerEnable = true;
1171                         kernelParam->walkerParams.groupIdLoopSelect = execParam->mediaWalkerGroupSelect;
1172                      }
1173                 }
1174                 else
1175                 {
1176                     kernelParam->walkerParams.cmWalkerEnable = true;
1177                     kernelParam->walkerParams.groupIdLoopSelect = execParam->mediaWalkerGroupSelect;
1178                 }
1179             }
1180         }
1181 
1182         //Media walker mode will be disabled if any kernel need use media object, we don't support mixed working modes
1183         state->walkerParams.CmWalkerEnable &= kernelParam->walkerParams.cmWalkerEnable;
1184 
1185         if (!state->walkerParams.CmWalkerEnable)
1186         {
1187             taskParam->batchBufferSize +=
1188                 kernelParam->numThreads * (hdrSize +  MOS_MAX(kernelParam->payloadSize, 4));
1189         }
1190 
1191         totalThreads += kernelParam->numThreads;
1192 
1193     }
1194 
1195     taskParam->batchBufferSize += CM_EXTRA_BB_SPACE;
1196 
1197     if (state->cmHalInterface->IsScoreboardParamNeeded())
1198     {
1199         scoreboardParams = &state->scoreboardParams;
1200         scoreboardParams->ScoreboardMask = 0;
1201         scoreboardParams->ScoreboardType = nonstallingScoreboardEnable;
1202 
1203         // set VFE scoreboarding information from union of kernel dependency vectors
1204         MOS_ZeroMemory(&vfeDependencyInfo, sizeof(CM_HAL_DEPENDENCY));
1205         for (krn = 0; krn < execParam->numKernels; krn++)
1206         {
1207             kernelParam = execParam->kernels[krn];
1208             kernelTSParam = &kernelParam->kernelThreadSpaceParam;
1209 
1210             // calculate union dependency vector of all kernels with dependency
1211             if (kernelTSParam->dependencyInfo.count || kernelTSParam->dependencyVectorsValid)
1212             {
1213                 if (vfeDependencyInfo.count == 0)
1214                 {
1215                     if (kernelTSParam->dependencyInfo.count)
1216                     {
1217                         MOS_SecureMemcpy(&vfeDependencyInfo, sizeof(CM_HAL_DEPENDENCY), &kernelTSParam->dependencyInfo, sizeof(CM_HAL_DEPENDENCY));
1218                     }
1219                     else if (kernelTSParam->dependencyVectorsValid)
1220                     {
1221                         MOS_SecureMemcpy(&vfeDependencyInfo, sizeof(CM_HAL_DEPENDENCY), &kernelTSParam->dependencyVectors, sizeof(CM_HAL_DEPENDENCY));
1222                     }
1223                     kernelTSParam->globalDependencyMask = (1 << vfeDependencyInfo.count) - 1;
1224                 }
1225                 else
1226                 {
1227                     uint32_t count = 0;
1228                     CM_HAL_DEPENDENCY dependencyInfo;
1229                     if (kernelTSParam->dependencyVectorsValid)
1230                     {
1231                         count = kernelTSParam->dependencyVectors.count;
1232                         MOS_SecureMemcpy(&dependencyInfo.deltaX, sizeof(int32_t) * count, &kernelTSParam->dependencyVectors.deltaX, sizeof(int32_t) * count);
1233                         MOS_SecureMemcpy(&dependencyInfo.deltaY, sizeof(int32_t) * count, &kernelTSParam->dependencyVectors.deltaY, sizeof(int32_t) * count);
1234                     }
1235                     else
1236                     {
1237                         count = kernelTSParam->dependencyInfo.count;
1238                         MOS_SecureMemcpy(&dependencyInfo.deltaX, sizeof(int32_t) * count, &kernelTSParam->dependencyInfo.deltaX, sizeof(int32_t) * count);
1239                         MOS_SecureMemcpy(&dependencyInfo.deltaY, sizeof(int32_t) * count, &kernelTSParam->dependencyInfo.deltaY, sizeof(int32_t) * count);
1240                     }
1241 
1242                     for (j = 0; j < count; ++j)
1243                     {
1244                         for (k = 0; k < vfeDependencyInfo.count; ++k)
1245                         {
1246                             if ((dependencyInfo.deltaX[j] == vfeDependencyInfo.deltaX[k]) &&
1247                                 (dependencyInfo.deltaY[j] == vfeDependencyInfo.deltaY[k]))
1248                             {
1249                                 CM_HAL_SETBIT(kernelTSParam->globalDependencyMask, k);
1250                                 break;
1251                             }
1252                         }
1253                         if (k == vfeDependencyInfo.count)
1254                         {
1255                             vfeDependencyInfo.deltaX[vfeDependencyInfo.count] = dependencyInfo.deltaX[j];
1256                             vfeDependencyInfo.deltaY[vfeDependencyInfo.count] = dependencyInfo.deltaY[j];
1257                             CM_HAL_SETBIT(kernelTSParam->globalDependencyMask, vfeDependencyInfo.count);
1258                             vfeDependencyInfo.count++;
1259                         }
1260                     }
1261                 }
1262             }
1263 
1264             reuseBBUpdateMask |= kernelTSParam->reuseBBUpdateMask;
1265         }
1266 
1267         if (vfeDependencyInfo.count > CM_HAL_MAX_DEPENDENCY_COUNT)
1268         {
1269             eStatus = MOS_STATUS_INVALID_PARAMETER;
1270             CM_ASSERTMESSAGE("Union of kernel dependencies exceeds max dependency count (8)");
1271             goto finish;
1272         }
1273 
1274         scoreboardParams->ScoreboardMask = (uint8_t)vfeDependencyInfo.count;
1275         for (i = 0; i < scoreboardParams->ScoreboardMask; ++i)
1276         {
1277             scoreboardParams->ScoreboardDelta[i].x = vfeDependencyInfo.deltaX[i];
1278             scoreboardParams->ScoreboardDelta[i].y = vfeDependencyInfo.deltaY[i];
1279         }
1280 
1281         //If no dependency defined in kernel data, then check per-task thread space setting
1282         if (scoreboardParams->ScoreboardMask == 0)
1283         {
1284             if (taskParam->dependencyVectorsValid)
1285             {
1286                 scoreboardParams->ScoreboardMask = (uint8_t)taskParam->dependencyVectors.count;
1287                 for (uint32_t i = 0; i < scoreboardParams->ScoreboardMask; ++i)
1288                 {
1289                     scoreboardParams->ScoreboardDelta[i].x = taskParam->dependencyVectors.deltaX[i];
1290                     scoreboardParams->ScoreboardDelta[i].y = taskParam->dependencyVectors.deltaY[i];
1291                 }
1292             }
1293             else
1294             {
1295                 switch (taskParam->dependencyPattern)
1296                 {
1297                 case CM_NONE_DEPENDENCY:
1298                     break;
1299 
1300                 case CM_VERTICAL_WAVE:
1301                     scoreboardParams->ScoreboardMask = 1;
1302                     scoreboardParams->ScoreboardDelta[0].x = 0xF; // -1 in uint8_t:4
1303                     scoreboardParams->ScoreboardDelta[0].y = 0;
1304                     break;
1305 
1306                 case CM_HORIZONTAL_WAVE:
1307                     scoreboardParams->ScoreboardMask = 1;
1308                     scoreboardParams->ScoreboardDelta[0].x = 0;
1309                     scoreboardParams->ScoreboardDelta[0].y = 0xF; // -1 in uint8_t:4
1310                     break;
1311 
1312                 case CM_WAVEFRONT:
1313                     scoreboardParams->ScoreboardMask = 3;
1314                     scoreboardParams->ScoreboardDelta[0].x = 0xF; // -1 in uint8_t:4
1315                     scoreboardParams->ScoreboardDelta[0].y = 0;
1316                     scoreboardParams->ScoreboardDelta[1].x = 0xF; // -1 in uint8_t:4
1317                     scoreboardParams->ScoreboardDelta[1].y = 0xF; // -1 in uint8_t:4
1318                     scoreboardParams->ScoreboardDelta[2].x = 0;
1319                     scoreboardParams->ScoreboardDelta[2].y = 0xF; // -1 in uint8_t:4
1320                     break;
1321 
1322                 case CM_WAVEFRONT26:
1323                     scoreboardParams->ScoreboardMask = 4;
1324                     scoreboardParams->ScoreboardDelta[0].x = 0xF; // -1 in uint8_t:4
1325                     scoreboardParams->ScoreboardDelta[0].y = 0;
1326                     scoreboardParams->ScoreboardDelta[1].x = 0xF; // -1 in uint8_t:4
1327                     scoreboardParams->ScoreboardDelta[1].y = 0xF; // -1 in uint8_t:4
1328                     scoreboardParams->ScoreboardDelta[2].x = 0;
1329                     scoreboardParams->ScoreboardDelta[2].y = 0xF; // -1 in uint8_t:4
1330                     scoreboardParams->ScoreboardDelta[3].x = 1;
1331                     scoreboardParams->ScoreboardDelta[3].y = 0xF; // -1 in uint8_t:4
1332                     break;
1333 
1334                 case CM_WAVEFRONT26Z:
1335                 case CM_WAVEFRONT26ZIG:
1336                     scoreboardParams->ScoreboardMask = 5;
1337                     scoreboardParams->ScoreboardDelta[0].x = 0xF; // -1 in uint8_t:4
1338                     scoreboardParams->ScoreboardDelta[0].y = 1;
1339                     scoreboardParams->ScoreboardDelta[1].x = 0xF; // -1 in uint8_t:4
1340                     scoreboardParams->ScoreboardDelta[1].y = 0;
1341                     scoreboardParams->ScoreboardDelta[2].x = 0xF; // -1 in uint8_t:4
1342                     scoreboardParams->ScoreboardDelta[2].y = 0xF; // -1 in uint8_t:4
1343                     scoreboardParams->ScoreboardDelta[3].x = 0;
1344                     scoreboardParams->ScoreboardDelta[3].y = 0xF; // -1 in uint8_t:4
1345                     scoreboardParams->ScoreboardDelta[4].x = 1;
1346                     scoreboardParams->ScoreboardDelta[4].y = 0xF; // -1 in uint8_t:4
1347                     break;
1348 
1349                 case CM_WAVEFRONT26ZI:
1350                     scoreboardParams->ScoreboardMask = 7;
1351                     scoreboardParams->ScoreboardDelta[0].x = 0xF; // -1 in uint8_t:4
1352                     scoreboardParams->ScoreboardDelta[0].y = 1;
1353                     scoreboardParams->ScoreboardDelta[1].x = 0xE;  // -2
1354                     scoreboardParams->ScoreboardDelta[1].y = 0;
1355                     scoreboardParams->ScoreboardDelta[2].x = 0xF; // -1 in uint8_t:4
1356                     scoreboardParams->ScoreboardDelta[2].y = 0;
1357                     scoreboardParams->ScoreboardDelta[3].x = 0xF; // -1 in uint8_t:4
1358                     scoreboardParams->ScoreboardDelta[3].y = 0xF; // -1 in uint8_t:4
1359                     scoreboardParams->ScoreboardDelta[4].x = 0;
1360                     scoreboardParams->ScoreboardDelta[4].y = 0xF; // -1 in uint8_t:4
1361                     scoreboardParams->ScoreboardDelta[5].x = 1;
1362                     scoreboardParams->ScoreboardDelta[5].y = 0xF; // -1 in uint8_t:4
1363                     scoreboardParams->ScoreboardDelta[6].x = 1;
1364                     scoreboardParams->ScoreboardDelta[6].y = 0;
1365                     break;
1366 
1367                 case CM_WAVEFRONT26X:
1368                     scoreboardParams->ScoreboardMask = 7;
1369                     scoreboardParams->ScoreboardDelta[0].x = 0xF;
1370                     scoreboardParams->ScoreboardDelta[0].y = 3;
1371                     scoreboardParams->ScoreboardDelta[1].x = 0xF;
1372                     scoreboardParams->ScoreboardDelta[1].y = 1;
1373                     scoreboardParams->ScoreboardDelta[2].x = 0xF;
1374                     scoreboardParams->ScoreboardDelta[2].y = 0xF;
1375                     scoreboardParams->ScoreboardDelta[3].x = 0;
1376                     scoreboardParams->ScoreboardDelta[3].y = 0xF;
1377                     scoreboardParams->ScoreboardDelta[4].x = 0;
1378                     scoreboardParams->ScoreboardDelta[4].y = 0xE;
1379                     scoreboardParams->ScoreboardDelta[5].x = 0;
1380                     scoreboardParams->ScoreboardDelta[5].y = 0xD;
1381                     scoreboardParams->ScoreboardDelta[6].x = 1;
1382                     scoreboardParams->ScoreboardDelta[6].y = 0xD;
1383                     break;
1384 
1385                 default:
1386                     taskParam->dependencyPattern = CM_NONE_DEPENDENCY;
1387                     break;
1388 
1389                 }
1390             }
1391         }
1392     }
1393     //Set size of surface binding table size
1394     CM_SURFACE_BTI_INFO surfBTIInfo;
1395     state->cmHalInterface->GetHwSurfaceBTIInfo(&surfBTIInfo);
1396 
1397     taskParam->surfacePerBT += surfBTIInfo.normalSurfaceStart ;
1398 
1399     // add one if kernel debugger is enabled
1400     if (execParam->kernelDebugEnabled)
1401     {
1402         taskParam->surfacePerBT += CM_RESERVED_SURFACE_NUMBER_FOR_KERNEL_DEBUG;
1403     }
1404 
1405     //If global surface is used and current surface bt size less than the max index of reserved surfaces
1406     //use set it as max bti size
1407     if ((execParam->globalSurfaceUsed) && (taskParam->surfacePerBT < surfBTIInfo.reservedSurfaceEnd))
1408     {
1409         taskParam->surfacePerBT = CM_MAX_STATIC_SURFACE_STATES_PER_BT;
1410     }
1411 
1412     //Make sure surfacePerBT do not exceed CM_MAX_STATIC_SURFACE_STATES_PER_BT
1413     taskParam->surfacePerBT = MOS_MIN(CM_MAX_STATIC_SURFACE_STATES_PER_BT, taskParam->surfacePerBT);
1414 
1415     if( taskParam->dependencyMasks )
1416     {
1417         for (krn = 0; krn < execParam->numKernels; krn++)
1418         {
1419             kernelParam    = execParam->kernels[krn];
1420             dependencyMask = taskParam->dependencyMasks[krn];
1421             if( dependencyMask )
1422             {
1423                 for( i = 0; i < kernelParam->numThreads; ++i )
1424                 {
1425                     reuseBBUpdateMask |= dependencyMask[i].resetMask;
1426                 }
1427             }
1428         }
1429     }
1430 
1431     CM_HAL_CHECKBIT_IS_SET(bitIsSet, reuseBBUpdateMask, CM_NO_BATCH_BUFFER_REUSE_BIT_POS);
1432     if( bitIsSet || reuseBBUpdateMask == 0 )
1433     {
1434         taskParam->reuseBBUpdateMask = 0;
1435     }
1436     else
1437     {
1438         taskParam->reuseBBUpdateMask = 1;
1439     }
1440 
1441     threadArgExists = HalCm_GetTaskHasThreadArg(execParam->kernels, execParam->numKernels);
1442 
1443     // For media object with thread arg, only support up to CM_MAX_USER_THREADS (512*512) threads
1444     // otherwise can support up to 262144 media object commands in batch buffer
1445     if (!state->walkerParams.CmWalkerEnable) {
1446         if (!threadArgExists)
1447         {
1448             if(totalThreads > CM_MAX_USER_THREADS_NO_THREADARG)
1449             {
1450                 eStatus = MOS_STATUS_INVALID_PARAMETER;
1451                 CM_ASSERTMESSAGE(
1452                     "Total task threads '%d' exceeds max allowed threads '%d'",
1453                     totalThreads,
1454                     CM_MAX_USER_THREADS_NO_THREADARG);
1455                 goto finish;
1456             }
1457         }
1458         else
1459         {
1460             if (totalThreads > CM_MAX_USER_THREADS)
1461             {
1462                 eStatus = MOS_STATUS_INVALID_PARAMETER;
1463                 CM_ASSERTMESSAGE(
1464                     "Total task threads '%d' exceeds max allowed threads '%d'",
1465                     totalThreads,
1466                     CM_MAX_USER_THREADS);
1467                 goto finish;
1468             }
1469         }
1470     }
1471 
1472     taskParam->queueOption = execParam->queueOption;
1473 
1474 finish:
1475     return eStatus;
1476 }
1477 
1478 //*-----------------------------------------------------------------------------
1479 //| Purpose:    Parse the Kernel and populate the Task Param structure
1480 //| Return:     Result of the operation
1481 //*-----------------------------------------------------------------------------
HalCm_ParseGroupTask(PCM_HAL_STATE state,PCM_HAL_EXEC_GROUP_TASK_PARAM execGroupParam)1482 MOS_STATUS HalCm_ParseGroupTask(
1483     PCM_HAL_STATE                       state,           // [in] Pointer to HAL CM state
1484     PCM_HAL_EXEC_GROUP_TASK_PARAM       execGroupParam)  // [in] Pointer to Exec Task Param
1485 {
1486     PCM_HAL_TASK_PARAM      taskParam      = state->taskParam;
1487     MOS_STATUS              eStatus        = MOS_STATUS_SUCCESS;
1488     PCM_HAL_KERNEL_PARAM    kernelParam    = nullptr;
1489     uint32_t                uSurfaceIndex;
1490 
1491     taskParam->surfEntryInfoArrays  = execGroupParam->surEntryInfoArrays;  //GT-PIN
1492     taskParam->batchBufferSize = 0;
1493     taskParam->kernelDebugEnabled  = (uint32_t)execGroupParam->kernelDebugEnabled;
1494 
1495     taskParam->numKernels = execGroupParam->numKernels;
1496     taskParam->syncBitmap = execGroupParam->syncBitmap;
1497     taskParam->conditionalEndBitmap = execGroupParam->conditionalEndBitmap;
1498     MOS_SecureMemcpy(taskParam->conditionalEndInfo, sizeof(taskParam->conditionalEndInfo),
1499                      execGroupParam->conditionalEndInfo, sizeof(execGroupParam->conditionalEndInfo));
1500 
1501     taskParam->taskConfig = execGroupParam->taskConfig;
1502 
1503     MOS_SecureMemcpy(taskParam->krnExecCfg, sizeof(taskParam->krnExecCfg),
1504                      execGroupParam->krnExecCfg, sizeof(execGroupParam->krnExecCfg));
1505 
1506     for (uint32_t krn = 0; krn < execGroupParam->numKernels; krn ++)
1507     {
1508         kernelParam = execGroupParam->kernels[krn];
1509         PCM_INDIRECT_SURFACE_INFO       indirectSurfaceInfo = kernelParam->indirectDataParam.surfaceInfo;
1510         uint32_t uSurfaceNumber = 0;
1511         if (kernelParam->indirectDataParam.surfaceCount)
1512         {
1513             uSurfaceIndex = 0;
1514             for (uint32_t i = 0; i < kernelParam->indirectDataParam.surfaceCount; i++)
1515             {
1516                 uSurfaceIndex = (indirectSurfaceInfo + i)->bindingTableIndex > uSurfaceIndex ? (indirectSurfaceInfo + i)->bindingTableIndex : uSurfaceIndex;
1517                 uSurfaceNumber++;
1518             }
1519             taskParam->surfacePerBT = taskParam->surfacePerBT > uSurfaceIndex ? taskParam->surfacePerBT : uSurfaceIndex;
1520         }
1521 
1522         uSurfaceNumber += kernelParam->numSurfaces;
1523 
1524         taskParam->surfacePerBT = taskParam->surfacePerBT < uSurfaceNumber ?
1525                                             uSurfaceNumber : taskParam->surfacePerBT;
1526     }
1527 
1528     CM_SURFACE_BTI_INFO surfBTIInfo;
1529     state->cmHalInterface->GetHwSurfaceBTIInfo(&surfBTIInfo);
1530 
1531     taskParam->surfacePerBT += surfBTIInfo.normalSurfaceStart ;
1532 
1533     // add one if kernel debugger is enabled
1534     if (execGroupParam->kernelDebugEnabled)
1535     {
1536         taskParam->surfacePerBT += CM_RESERVED_SURFACE_NUMBER_FOR_KERNEL_DEBUG;
1537     }
1538 
1539     //If global surface is used and current surface bt size less than the max index of reserved surfaces
1540     //use set it as max bti size
1541     if ((execGroupParam->globalSurfaceUsed) &&
1542         (taskParam->surfacePerBT < surfBTIInfo.reservedSurfaceEnd))
1543     {
1544         taskParam->surfacePerBT = CM_MAX_STATIC_SURFACE_STATES_PER_BT;
1545     }
1546 
1547     //Make sure surfacePerBT do not exceed CM_MAX_STATIC_SURFACE_STATES_PER_BT
1548     taskParam->surfacePerBT = MOS_MIN(CM_MAX_STATIC_SURFACE_STATES_PER_BT, taskParam->surfacePerBT);
1549 
1550     taskParam->queueOption = execGroupParam->queueOption;
1551     taskParam->mosVeHintParams = execGroupParam->mosVeHintParams;
1552 
1553     return eStatus;
1554 }
1555 
1556 //*-----------------------------------------------------------------------------
1557 //| Purpose:    Parse the Kernel and populate the Hints Task Param structure
1558 //| Return:     Result of the operation
1559 //*-----------------------------------------------------------------------------
HalCm_ParseHintsTask(PCM_HAL_STATE state,PCM_HAL_EXEC_HINTS_TASK_PARAM execHintsParam)1560 MOS_STATUS HalCm_ParseHintsTask(
1561     PCM_HAL_STATE                     state,                                         // [in] Pointer to HAL CM state
1562     PCM_HAL_EXEC_HINTS_TASK_PARAM     execHintsParam)
1563 {
1564     MOS_STATUS                        eStatus;
1565     PCM_HAL_TASK_PARAM                taskParam;
1566     PCM_HAL_KERNEL_PARAM              kernelParam;
1567     uint32_t                          hdrSize;
1568     uint32_t                          totalThreads;
1569     uint32_t                          krn;
1570     uint32_t                          curbeOffset;
1571     PMHW_VFE_SCOREBOARD               scoreboardParams;
1572     uint32_t                          hasThreadArg;
1573     bool                              nonstallingScoreboardEnable;
1574     bool                              bitIsSet;
1575     uint8_t                           reuseBBUpdateMask;
1576     bool                              threadArgExists;
1577 
1578     eStatus                          = MOS_STATUS_SUCCESS;
1579     krn                              = 0;
1580     taskParam                        = state->taskParam;
1581     nonstallingScoreboardEnable      = true;
1582     bitIsSet                          = false;
1583     curbeOffset                     = 0;
1584     hasThreadArg                      = 0;
1585     totalThreads                     = 0;
1586     reuseBBUpdateMask                 = 0;
1587     threadArgExists                   = false;
1588 
1589     hdrSize = state->renderHal->pHwSizes->dwSizeMediaObjectHeaderCmd;
1590     scoreboardParams = &state->scoreboardParams;
1591 
1592     for( krn = 0; krn < execHintsParam->numKernels; ++krn )
1593     {
1594         if ((execHintsParam->kernels[krn] == nullptr) ||
1595             (execHintsParam->kernelSizes[krn] == 0))
1596         {
1597             eStatus = MOS_STATUS_INVALID_PARAMETER;
1598             CM_ASSERTMESSAGE("Invalid Kernel data");
1599             goto finish;
1600         }
1601 
1602         // Parse the kernel Param
1603         kernelParam =  execHintsParam->kernels[krn];
1604 
1605         // if any kernel disables non-stalling, the non-stalling will be disabled
1606         nonstallingScoreboardEnable &= (kernelParam->cmFlags & CM_KERNEL_FLAGS_NONSTALLING_SCOREBOARD) ? true : false;
1607 
1608         if (!state->walkerParams.CmWalkerEnable)
1609         {
1610             taskParam->batchBufferSize +=
1611                 kernelParam->numThreads * (hdrSize +  MOS_MAX(kernelParam->payloadSize, 4));
1612         }
1613 
1614         totalThreads += kernelParam->numThreads;
1615 
1616         reuseBBUpdateMask |= kernelParam->kernelThreadSpaceParam.reuseBBUpdateMask;
1617     }
1618 
1619     CM_HAL_CHECKBIT_IS_SET(bitIsSet, reuseBBUpdateMask, CM_NO_BATCH_BUFFER_REUSE_BIT_POS);
1620     if( bitIsSet || reuseBBUpdateMask == 0 )
1621     {
1622         taskParam->reuseBBUpdateMask = 0;
1623     }
1624     else
1625     {
1626         taskParam->reuseBBUpdateMask = 1;
1627     }
1628 
1629     taskParam->batchBufferSize += CM_EXTRA_BB_SPACE;
1630 
1631     scoreboardParams->ScoreboardType = nonstallingScoreboardEnable;
1632 
1633     threadArgExists = HalCm_GetTaskHasThreadArg(execHintsParam->kernels, execHintsParam->numKernels);
1634 
1635     if (!state->walkerParams.CmWalkerEnable) {
1636         if (!threadArgExists)
1637         {
1638             if(totalThreads > CM_MAX_USER_THREADS_NO_THREADARG)
1639             {
1640                 eStatus = MOS_STATUS_INVALID_PARAMETER;
1641                 CM_ASSERTMESSAGE(
1642                     "Total task threads '%d' exceeds max allowed threads '%d'",
1643                     totalThreads,
1644                     CM_MAX_USER_THREADS_NO_THREADARG);
1645                 goto finish;
1646             }
1647         }
1648         else
1649         {
1650             if (totalThreads > CM_MAX_USER_THREADS)
1651             {
1652                 eStatus = MOS_STATUS_INVALID_PARAMETER;
1653                 CM_ASSERTMESSAGE(
1654                     "Total task threads '%d' exceeds max allowed threads '%d'",
1655                     totalThreads,
1656                     CM_MAX_USER_THREADS);
1657                 goto finish;
1658             }
1659         }
1660     }
1661 
1662     taskParam->queueOption = execHintsParam->queueOption;
1663 
1664 finish:
1665     return eStatus;
1666 }
1667 
1668 /*
1669 ** check to see if kernel entry is flaged as free or it is null
1670 ** used for combining
1671 */
bIsFree(PRENDERHAL_KRN_ALLOCATION kAlloc)1672 bool bIsFree( PRENDERHAL_KRN_ALLOCATION kAlloc )
1673 {
1674     if (kAlloc== nullptr)
1675     {
1676         return false;
1677     }
1678     else
1679     {
1680         if (kAlloc->dwFlags != RENDERHAL_KERNEL_ALLOCATION_FREE)
1681         {
1682             return false;
1683         }
1684     }
1685 
1686     return true;
1687 }
1688 
1689 /*
1690 ** local used supporting function
1691 ** setup correct values according to input and copy kernelBinary as needed
1692 */
CmLoadKernel(PCM_HAL_STATE state,PRENDERHAL_STATE_HEAP stateHeap,PRENDERHAL_KRN_ALLOCATION kernelAllocation,uint32_t sync,uint32_t count,PRENDERHAL_KERNEL_PARAM parameters,PCM_HAL_KERNEL_PARAM kernelParam,MHW_KERNEL_PARAM * mhwKernelParam,bool isCloneEntry)1693 void CmLoadKernel(PCM_HAL_STATE             state,
1694                   PRENDERHAL_STATE_HEAP     stateHeap,
1695                   PRENDERHAL_KRN_ALLOCATION kernelAllocation,
1696                   uint32_t sync,
1697                   uint32_t count,
1698                   PRENDERHAL_KERNEL_PARAM   parameters,
1699                   PCM_HAL_KERNEL_PARAM      kernelParam,
1700                   MHW_KERNEL_PARAM         *mhwKernelParam,
1701                   bool                      isCloneEntry)
1702 {
1703     UNUSED(state);
1704     if (mhwKernelParam)
1705     {
1706         kernelAllocation->iKID        = -1;
1707         kernelAllocation->iKUID       = mhwKernelParam->iKUID;
1708         kernelAllocation->iKCID       = mhwKernelParam->iKCID;
1709         kernelAllocation->dwSync      = sync;
1710         kernelAllocation->dwCount     = count & 0xFFFFFFFF; // 28 bits
1711         kernelAllocation->dwFlags     = RENDERHAL_KERNEL_ALLOCATION_USED;
1712         kernelAllocation->Params      = *parameters;
1713         kernelAllocation->pMhwKernelParam = mhwKernelParam;
1714 
1715         if (!isCloneEntry)
1716         {
1717             // Copy kernel data
1718             // Copy MovInstruction First
1719             MOS_SecureMemcpy(stateHeap->pIshBuffer + kernelAllocation->dwOffset,
1720                 kernelParam->movInsDataSize,
1721                 kernelParam->movInsData,
1722                 kernelParam->movInsDataSize);
1723 
1724             // Copy Cm Kernel Binary
1725             MOS_SecureMemcpy(stateHeap->pIshBuffer + kernelAllocation->dwOffset + kernelParam->movInsDataSize,
1726                 kernelParam->kernelBinarySize - kernelParam->movInsDataSize,
1727                 kernelParam->kernelBinary,
1728                 kernelParam->kernelBinarySize - kernelParam->movInsDataSize);
1729 
1730             // Padding bytes dummy instructions after kernel binary to resolve page fault issue
1731             MOS_ZeroMemory(stateHeap->pIshBuffer + kernelAllocation->dwOffset + kernelParam->kernelBinarySize, CM_KERNEL_BINARY_PADDING_SIZE);
1732         }
1733     }
1734     else
1735     {
1736         kernelAllocation->iKID        = -1;
1737         kernelAllocation->iKUID       = -1;
1738         kernelAllocation->iKCID       = -1;
1739         kernelAllocation->dwSync      = 0;
1740         FrameTrackerTokenFlat_Clear(&kernelAllocation->trackerToken);
1741         kernelAllocation->dwCount     = 0;
1742         kernelAllocation->dwFlags     = RENDERHAL_KERNEL_ALLOCATION_FREE;
1743         kernelAllocation->pMhwKernelParam = nullptr;
1744         kernelAllocation->cloneKernelParams.cloneKernelID       = -1;
1745         kernelAllocation->cloneKernelParams.isClone             = false;
1746         kernelAllocation->cloneKernelParams.isHeadKernel        = false;
1747         kernelAllocation->cloneKernelParams.kernelBinaryAllocID = -1;
1748         kernelAllocation->cloneKernelParams.referenceCount      = 0;
1749     }
1750 }
1751 
1752 /*
1753 ** local used supporting function
1754 ** Try to find free entry which is big enough to load kernel binary
1755 ** If we cannot find one, then return fail, so we will delete more entries
1756 */
CmSearchFreeSlotSize(PCM_HAL_STATE state,MHW_KERNEL_PARAM * mhwKernelParam,bool isCloneEntry)1757 int32_t CmSearchFreeSlotSize(PCM_HAL_STATE state, MHW_KERNEL_PARAM *mhwKernelParam, bool isCloneEntry)
1758 {
1759     PRENDERHAL_STATE_HEAP     stateHeap;
1760     PRENDERHAL_KRN_ALLOCATION kernelAllocation;
1761     int32_t                 kernelAllocationID;
1762     int32_t                 returnVal = -1;
1763     int32_t                 neededSize;
1764 
1765     stateHeap          = state->renderHal->pStateHeap;
1766     kernelAllocation   = stateHeap->pKernelAllocation;
1767 
1768     if (isCloneEntry)
1769     {
1770         neededSize = CM_64BYTE;
1771     }
1772     else
1773     {
1774         neededSize = mhwKernelParam->iSize;
1775     }
1776 
1777     for (kernelAllocationID = 0;
1778          kernelAllocationID < state->kernelNumInGsh;
1779          kernelAllocationID++, kernelAllocation++)
1780     {
1781         if(kernelAllocation->dwFlags == RENDERHAL_KERNEL_ALLOCATION_FREE)
1782         {
1783             if(state->totalKernelSize[kernelAllocationID] >= neededSize)
1784             {
1785                 // found free slot which is big enough
1786                 return kernelAllocationID;
1787             }
1788         }
1789     }
1790 
1791     // not found
1792     return returnVal;
1793 }
1794 
1795 //*-----------------------------------------------------------------------------
1796 //| Purpose:    Updates the clone entries' head kernel binary allocation IDs
1797 //|             Function is called after kernel allocations are shifted due to combining neighboring free entries
1798 //| Return:     Result of the operation
1799 //*-----------------------------------------------------------------------------
HalCm_UpdateCloneKernel(PCM_HAL_STATE state,uint32_t shiftPoint,CM_SHIFT_DIRECTION shiftDirection,uint32_t shiftFactor)1800 void HalCm_UpdateCloneKernel(PCM_HAL_STATE state,
1801     uint32_t shiftPoint,
1802     CM_SHIFT_DIRECTION shiftDirection,
1803     uint32_t shiftFactor)
1804 {
1805     PRENDERHAL_STATE_HEAP       stateHeap;
1806     PRENDERHAL_KRN_ALLOCATION   kernelAllocation;
1807     int32_t                     allocationID;
1808 
1809     stateHeap = state->renderHal->pStateHeap;
1810     kernelAllocation = stateHeap->pKernelAllocation;
1811 
1812     for (allocationID = 0; allocationID < state->kernelNumInGsh; allocationID++, kernelAllocation++)
1813     {
1814         kernelAllocation = &(stateHeap->pKernelAllocation[allocationID]);
1815         if (kernelAllocation->cloneKernelParams.isClone && ((kernelAllocation->cloneKernelParams.kernelBinaryAllocID) > (int32_t)shiftPoint))
1816         {
1817             if (shiftDirection == CM_SHIFT_LEFT)
1818             {
1819                 kernelAllocation->cloneKernelParams.kernelBinaryAllocID = kernelAllocation->cloneKernelParams.kernelBinaryAllocID + shiftFactor;
1820             }
1821             else
1822             {
1823                 kernelAllocation->cloneKernelParams.kernelBinaryAllocID = kernelAllocation->cloneKernelParams.kernelBinaryAllocID - shiftFactor;
1824             }
1825         }
1826     }
1827 }
1828 
1829 /*
1830 ** local used supporting function
1831 ** We found free slot and load kernel to this slot. There are 3 cases (see code)
1832 */
CmAddCurrentKernelToFreeSlot(PCM_HAL_STATE state,int32_t slot,PRENDERHAL_KERNEL_PARAM parameters,PCM_HAL_KERNEL_PARAM kernelParam,MHW_KERNEL_PARAM * mhwKernelParam,CM_CLONE_TYPE cloneType,int32_t headKernelAllocationID)1833 int32_t CmAddCurrentKernelToFreeSlot(PCM_HAL_STATE state,
1834                                   int32_t slot,
1835                                   PRENDERHAL_KERNEL_PARAM parameters,
1836                                   PCM_HAL_KERNEL_PARAM    kernelParam,
1837                                   MHW_KERNEL_PARAM       *mhwKernelParam,
1838                                   CM_CLONE_TYPE           cloneType,
1839                                   int32_t                 headKernelAllocationID)
1840 {
1841     PRENDERHAL_STATE_HEAP       stateHeap;
1842     PRENDERHAL_KRN_ALLOCATION   kernelAllocation, pKernelAllocationN;
1843 
1844     int32_t hr = CM_SUCCESS;
1845     int32_t i;
1846     int32_t totalSize, tmpSize, dwOffset, neededSize;
1847     bool    adjust, isCloneEntry, isHeadKernel, isCloneAsHead, adjustHeadKernelID;
1848     uint32_t tag;
1849 
1850     stateHeap          = state->renderHal->pStateHeap;
1851     kernelAllocation   = stateHeap->pKernelAllocation;
1852     adjustHeadKernelID = false;
1853 
1854     switch (cloneType)
1855     {
1856         case CM_CLONE_ENTRY:
1857         {
1858             neededSize    = CM_64BYTE;
1859             isCloneEntry  = true;
1860             isHeadKernel  = false;
1861             isCloneAsHead = false;
1862         }
1863         break;
1864         case CM_HEAD_KERNEL:
1865         {
1866             neededSize    = mhwKernelParam->iSize;
1867             isHeadKernel  = true;
1868             isCloneEntry  = false;
1869             isCloneAsHead = false;
1870         }
1871         break;
1872         case CM_CLONE_AS_HEAD_KERNEL:
1873         {
1874             neededSize    = mhwKernelParam->iSize;
1875             isHeadKernel  = true;
1876             isCloneEntry  = false;
1877             isCloneAsHead = true;
1878         }
1879         break;
1880         case CM_NO_CLONE:
1881         {
1882             neededSize    = mhwKernelParam->iSize;
1883             isCloneEntry  = false;
1884             isHeadKernel  = false;
1885             isCloneAsHead = false;
1886         }
1887         break;
1888         default:
1889         {
1890             hr = CM_FAILURE;
1891             goto finish;
1892         }
1893     }
1894 
1895     // to check if we have perfect size match
1896     if(stateHeap->pKernelAllocation[slot].iSize == neededSize)
1897     {
1898         adjust = false;
1899     }
1900     else
1901     {
1902         adjust = true;
1903     }
1904 
1905     if ((state->kernelNumInGsh < state->cmDeviceParam.maxGshKernelEntries) && adjust)
1906     {
1907         // we have extra entry to add
1908         // add new entry and pump index down below
1909         int32_t lastKernel = state->kernelNumInGsh - 1;
1910         for(i = lastKernel; i>slot; i--)
1911         {
1912             kernelAllocation = &stateHeap->pKernelAllocation[i];
1913             pKernelAllocationN = &stateHeap->pKernelAllocation[i+1];
1914             *pKernelAllocationN = *kernelAllocation;
1915             state->totalKernelSize[i+1] = state->totalKernelSize[i];
1916         }
1917 
1918         if (lastKernel > slot)
1919         {
1920             // update the headKernelAllocationID if it was shifted
1921             if (headKernelAllocationID > slot)
1922             {
1923                 headKernelAllocationID++;
1924                 adjustHeadKernelID = true;
1925             }
1926         }
1927 
1928         totalSize = state->totalKernelSize[slot];
1929         tmpSize = neededSize;
1930 
1931         dwOffset = stateHeap->pKernelAllocation[slot].dwOffset;
1932 
1933         // now add new one
1934         kernelAllocation = &stateHeap->pKernelAllocation[slot];
1935         if(state->cbbEnabled)
1936         {
1937             tag = state->osInterface->pfnGetGpuStatusTag(state->osInterface,
1938                 state->osInterface->CurrentGpuContextOrdinal);
1939         }
1940         else
1941         {
1942             tag = stateHeap->dwNextTag;
1943         }
1944 
1945         CmLoadKernel(state, stateHeap, kernelAllocation, tag, stateHeap->dwAccessCounter, parameters, kernelParam, mhwKernelParam, isCloneEntry);
1946         stateHeap->dwAccessCounter++;
1947 
1948         kernelAllocation->iSize = tmpSize;
1949         state->totalKernelSize[slot] = MOS_ALIGN_CEIL(tmpSize, 64);
1950 
1951         // insert a new slot which is free with rest
1952         tmpSize = MOS_ALIGN_CEIL(tmpSize, 64);  // HW required 64 byte align
1953         kernelAllocation = &stateHeap->pKernelAllocation[slot+1];
1954         CmLoadKernel(state, stateHeap, kernelAllocation, 0, 0, parameters, kernelParam, nullptr, isCloneEntry);
1955         kernelAllocation->dwOffset = dwOffset+tmpSize;
1956         kernelAllocation->iSize = 0;
1957         state->totalKernelSize[slot+1] = totalSize - tmpSize;
1958 
1959         // added one more entry
1960         state->kernelNumInGsh++;
1961 
1962         kernelAllocation = &stateHeap->pKernelAllocation[slot];
1963         if (isCloneEntry)
1964         {
1965             if (!stateHeap->pKernelAllocation[headKernelAllocationID].cloneKernelParams.isHeadKernel)
1966             {
1967                 // ERROR thought kernel with allocation ID, headKernelAllocationID, was a head kernel, but it's not
1968                 hr = CM_FAILURE;
1969                 goto finish;
1970             }
1971 
1972             kernelAllocation->cloneKernelParams.dwOffsetForAllocID  = dwOffset;
1973             kernelAllocation->dwOffset                              = stateHeap->pKernelAllocation[headKernelAllocationID].dwOffset;
1974             kernelAllocation->cloneKernelParams.isClone             = true;
1975             kernelAllocation->cloneKernelParams.kernelBinaryAllocID = headKernelAllocationID;
1976             kernelAllocation->cloneKernelParams.cloneKernelID       = stateHeap->pKernelAllocation[headKernelAllocationID].iKUID;
1977 
1978             stateHeap->pKernelAllocation[headKernelAllocationID].cloneKernelParams.referenceCount = stateHeap->pKernelAllocation[headKernelAllocationID].cloneKernelParams.referenceCount + 1;
1979 
1980             // update head kernel's count after updating the clone entry's count so that clone will be selected for deletion first
1981             stateHeap->pKernelAllocation[headKernelAllocationID].dwCount = stateHeap->dwAccessCounter++;
1982 
1983         }
1984         else
1985         {
1986             kernelAllocation->dwOffset = dwOffset;
1987 
1988             if (isHeadKernel)
1989             {
1990                 kernelAllocation->cloneKernelParams.isHeadKernel = true;
1991                 if (isCloneAsHead)
1992                 {
1993                     kernelAllocation->cloneKernelParams.cloneKernelID = kernelParam->clonedKernelParam.kernelID;
1994                 }
1995             }
1996         }
1997 
1998         if (lastKernel > slot)
1999         {
2000             HalCm_UpdateCloneKernel(state, slot, CM_SHIFT_LEFT, 1);
2001             if (isCloneEntry && adjustHeadKernelID)
2002             {
2003                 // if clone entry and already adjusted head kernel ID, then adjusted again in HalCm_UpdateCloneKernel, need to do only once
2004                 kernelAllocation->cloneKernelParams.kernelBinaryAllocID = kernelAllocation->cloneKernelParams.kernelBinaryAllocID - 1;
2005             }
2006         }
2007     }
2008     else if (state->kernelNumInGsh < state->cmDeviceParam.maxGshKernelEntries)
2009     {
2010         // no need to create a new entry since we have the same size
2011         kernelAllocation = &stateHeap->pKernelAllocation[slot];
2012 
2013         if(state->cbbEnabled)
2014         {
2015             tag = state->osInterface->pfnGetGpuStatusTag(state->osInterface,
2016                 state->osInterface->CurrentGpuContextOrdinal);
2017         }
2018         else
2019         {
2020             tag = stateHeap->dwNextTag;
2021         }
2022 
2023         CmLoadKernel(state, stateHeap, kernelAllocation, tag, stateHeap->dwAccessCounter, parameters, kernelParam, mhwKernelParam, isCloneEntry);
2024         stateHeap->dwAccessCounter++;
2025         // no change for kernelAllocation->dwOffset
2026         kernelAllocation->iSize = neededSize;
2027         state->totalKernelSize[slot] = MOS_ALIGN_CEIL(mhwKernelParam->iSize, 64);
2028         if (isCloneEntry)
2029         {
2030             if (!stateHeap->pKernelAllocation[headKernelAllocationID].cloneKernelParams.isHeadKernel)
2031             {
2032                 // ERROR thought kernel with allocation ID, headKernelAllocationID, was a head kernel, but it's not
2033                 hr = CM_FAILURE;
2034                 goto finish;
2035             }
2036 
2037             kernelAllocation->cloneKernelParams.dwOffsetForAllocID  = kernelAllocation->dwOffset;
2038             kernelAllocation->dwOffset                              = stateHeap->pKernelAllocation[headKernelAllocationID].dwOffset;
2039             kernelAllocation->cloneKernelParams.isClone             = true;
2040             kernelAllocation->cloneKernelParams.kernelBinaryAllocID = headKernelAllocationID;
2041             kernelAllocation->cloneKernelParams.cloneKernelID       = stateHeap->pKernelAllocation[headKernelAllocationID].iKUID;
2042 
2043             stateHeap->pKernelAllocation[headKernelAllocationID].cloneKernelParams.referenceCount = stateHeap->pKernelAllocation[headKernelAllocationID].cloneKernelParams.referenceCount + 1;
2044 
2045             // update head kernel's count after updating the clone entry's count so that clone will be selected for deletion first
2046             stateHeap->pKernelAllocation[headKernelAllocationID].dwCount = stateHeap->dwAccessCounter++;
2047         }
2048         else if (isHeadKernel)
2049         {
2050             kernelAllocation->cloneKernelParams.isHeadKernel = true;
2051             if (isCloneAsHead)
2052             {
2053                 kernelAllocation->cloneKernelParams.cloneKernelID = kernelParam->clonedKernelParam.kernelID;
2054             }
2055         }
2056     }
2057     else
2058     {
2059         // all slots are used, but we have one free which is big enough
2060         // we may have fragmentation, but code is the same as above case
2061         kernelAllocation = &stateHeap->pKernelAllocation[slot];
2062 
2063         if(state->cbbEnabled)
2064         {
2065             tag = state->osInterface->pfnGetGpuStatusTag(state->osInterface, state->osInterface->CurrentGpuContextOrdinal);
2066         }
2067         else
2068         {
2069             tag = stateHeap->dwNextTag;
2070         }
2071 
2072         CmLoadKernel(state, stateHeap, kernelAllocation, tag, stateHeap->dwAccessCounter, parameters, kernelParam, mhwKernelParam, isCloneEntry);
2073         stateHeap->dwAccessCounter++;
2074         // kernelAllocation->iTotalSize is not changed, but we have smaller actual size
2075         // no change for kernelAllocation->dwOffset
2076         kernelAllocation->iSize = neededSize;
2077 
2078         if (isCloneEntry)
2079         {
2080             if (!stateHeap->pKernelAllocation[headKernelAllocationID].cloneKernelParams.isHeadKernel)
2081             {
2082                 // ERROR thought kernel with allocation ID, headKernelAllocationID, was a head kernel, but it's not
2083                 hr = CM_FAILURE;
2084                 goto finish;
2085             }
2086 
2087             kernelAllocation->cloneKernelParams.dwOffsetForAllocID  = kernelAllocation->dwOffset;
2088             kernelAllocation->dwOffset                              = stateHeap->pKernelAllocation[headKernelAllocationID].dwOffset;
2089             kernelAllocation->cloneKernelParams.isClone             = true;
2090             kernelAllocation->cloneKernelParams.kernelBinaryAllocID = headKernelAllocationID;
2091             kernelAllocation->cloneKernelParams.cloneKernelID       = stateHeap->pKernelAllocation[headKernelAllocationID].iKUID;
2092 
2093             stateHeap->pKernelAllocation[headKernelAllocationID].cloneKernelParams.referenceCount = stateHeap->pKernelAllocation[headKernelAllocationID].cloneKernelParams.referenceCount + 1;
2094 
2095             // update head kernel's count after updating the clone entry's count so that clone will be selected for deletion first
2096             stateHeap->pKernelAllocation[headKernelAllocationID].dwCount = stateHeap->dwAccessCounter++;
2097         }
2098         else if (isHeadKernel)
2099         {
2100             kernelAllocation->cloneKernelParams.isHeadKernel = true;
2101             if (isCloneAsHead)
2102             {
2103                 kernelAllocation->cloneKernelParams.cloneKernelID = kernelParam->clonedKernelParam.kernelID;
2104             }
2105         }
2106     }
2107 
2108 finish:
2109     return hr;
2110 }
2111 
2112 /*----------------------------------------------------------------------------
2113 | Name      : HalCm_UnLoadKernel ( Replace RenderHal_UnloadKernel)
2114 \---------------------------------------------------------------------------*/
HalCm_UnloadKernel(PCM_HAL_STATE state,PRENDERHAL_KRN_ALLOCATION kernelAllocation)2115 int32_t HalCm_UnloadKernel(
2116     PCM_HAL_STATE              state,
2117     PRENDERHAL_KRN_ALLOCATION  kernelAllocation)
2118 {
2119     PRENDERHAL_INTERFACE       renderHal = state->renderHal;
2120     PRENDERHAL_STATE_HEAP      stateHeap;
2121     int32_t                    hr;
2122 
2123     //---------------------------------------
2124     CM_CHK_NULL_GOTOFINISH_CMERROR(renderHal);
2125     CM_CHK_NULL_GOTOFINISH_CMERROR(renderHal->pStateHeap);
2126     CM_CHK_NULL_GOTOFINISH_CMERROR(kernelAllocation);
2127     //---------------------------------------
2128 
2129     hr      = CM_FAILURE;
2130     stateHeap = renderHal->pStateHeap;
2131 
2132     if (kernelAllocation->dwFlags == RENDERHAL_KERNEL_ALLOCATION_FREE)
2133     {
2134         goto finish;
2135     }
2136 
2137     CM_CHK_CMSTATUS_GOTOFINISH(state->pfnSyncKernel(state, kernelAllocation->dwSync));
2138 
2139     // Unload kernel
2140     if (kernelAllocation->pMhwKernelParam)
2141     {
2142         kernelAllocation->pMhwKernelParam->bLoaded = 0;
2143     }
2144 
2145     if (kernelAllocation->cloneKernelParams.isClone)
2146     {
2147         if (stateHeap->pKernelAllocation[kernelAllocation->cloneKernelParams.kernelBinaryAllocID].cloneKernelParams.isHeadKernel)
2148         {
2149             if ((stateHeap->pKernelAllocation[kernelAllocation->cloneKernelParams.kernelBinaryAllocID].cloneKernelParams.referenceCount) <= 0)
2150             {
2151                 // ERROR
2152                 hr = CM_FAILURE;
2153                 goto finish;
2154             }
2155         }
2156         else
2157         {
2158             // ERROR
2159             hr = CM_FAILURE;
2160             goto finish;
2161         }
2162 
2163         stateHeap->pKernelAllocation[kernelAllocation->cloneKernelParams.kernelBinaryAllocID].cloneKernelParams.referenceCount =
2164             stateHeap->pKernelAllocation[kernelAllocation->cloneKernelParams.kernelBinaryAllocID].cloneKernelParams.referenceCount - 1;
2165 
2166         // restore the dwOffset for this allocationID
2167         kernelAllocation->dwOffset = kernelAllocation->cloneKernelParams.dwOffsetForAllocID;
2168     }
2169     else if (kernelAllocation->cloneKernelParams.isHeadKernel && kernelAllocation->cloneKernelParams.referenceCount != 0)
2170     {
2171         // ERROR, cloned kernel entries should have been selected for deletion before head kernel entry
2172         hr = CM_FAILURE;
2173         goto finish;
2174     }
2175 
2176     // Release kernel entry (Offset/size may be used for reallocation)
2177     kernelAllocation->iKID     = -1;
2178     kernelAllocation->iKUID    = -1;
2179     kernelAllocation->iKCID    = -1;
2180     kernelAllocation->dwSync   = 0;
2181     FrameTrackerTokenFlat_Clear(&kernelAllocation->trackerToken);
2182     kernelAllocation->dwFlags          = RENDERHAL_KERNEL_ALLOCATION_FREE;
2183     kernelAllocation->dwCount  = 0;
2184     kernelAllocation->pMhwKernelParam  = nullptr;
2185     kernelAllocation->cloneKernelParams.cloneKernelID       = -1;
2186     kernelAllocation->cloneKernelParams.isClone             = false;
2187     kernelAllocation->cloneKernelParams.isHeadKernel        = false;
2188     kernelAllocation->cloneKernelParams.kernelBinaryAllocID = -1;
2189     kernelAllocation->cloneKernelParams.referenceCount      = 0;
2190 
2191     hr = CM_SUCCESS;
2192 
2193 finish:
2194     return hr;
2195 }
2196 
2197 /*----------------------------------------------------------------------------
2198 | Name      : HalCmw_TouchKernel ( Replace RenderHal_TouchKernel)
2199 \---------------------------------------------------------------------------*/
HalCm_TouchKernel(PCM_HAL_STATE state,int32_t kernelAllocationID)2200 int32_t HalCm_TouchKernel(
2201     PCM_HAL_STATE       state,
2202     int32_t             kernelAllocationID)
2203 {
2204     int32_t                     hr = CM_SUCCESS;
2205     PRENDERHAL_STATE_HEAP       stateHeap;
2206     PRENDERHAL_KRN_ALLOCATION   kernelAllocation;
2207     PRENDERHAL_KRN_ALLOCATION   headKernelAllocation;
2208     uint32_t                    tag;
2209 
2210     PRENDERHAL_INTERFACE renderHal = state->renderHal;
2211     PMOS_INTERFACE osInterface     = state->osInterface;
2212 
2213     stateHeap = (renderHal) ? renderHal->pStateHeap : nullptr;
2214     if (stateHeap == nullptr ||
2215         stateHeap->pKernelAllocation == nullptr ||
2216         kernelAllocationID < 0 ||
2217         kernelAllocationID >= renderHal->StateHeapSettings.iKernelCount)
2218     {
2219         hr = CM_FAILURE;
2220         goto finish;
2221     }
2222 
2223     // Update usage
2224     kernelAllocation = &(stateHeap->pKernelAllocation[kernelAllocationID]);
2225     if (kernelAllocation->dwFlags != RENDERHAL_KERNEL_ALLOCATION_FREE &&
2226         kernelAllocation->dwFlags != RENDERHAL_KERNEL_ALLOCATION_LOCKED)
2227     {
2228         kernelAllocation->dwCount = stateHeap->dwAccessCounter++;
2229     }
2230 
2231     // Set sync tag, for deallocation control
2232     if(state->cbbEnabled)
2233     {
2234         tag = osInterface->pfnGetGpuStatusTag(osInterface, osInterface->CurrentGpuContextOrdinal);
2235     }
2236     else
2237     {
2238         tag = stateHeap->dwNextTag;
2239     }
2240 
2241     kernelAllocation->dwSync = tag;
2242 
2243     // if this kernel allocation is a cloned kernel, update the orig kernel sync tag and access counter
2244     if (kernelAllocation->cloneKernelParams.isClone)
2245     {
2246         headKernelAllocation = &(stateHeap->pKernelAllocation[kernelAllocation->cloneKernelParams.kernelBinaryAllocID]);
2247 
2248         if (headKernelAllocation->cloneKernelParams.referenceCount <= 0)
2249         {
2250             // ERROR
2251             hr = CM_FAILURE;
2252             goto finish;
2253         }
2254 
2255         headKernelAllocation->dwSync = tag;
2256         headKernelAllocation->dwCount = stateHeap->dwAccessCounter++;
2257 
2258     }
2259 
2260 finish:
2261     return hr;
2262 }
2263 
2264 /*
2265 **  Supporting function
2266 **  Delete oldest entry from table to free more space
2267 **  According to different cases, we will combine space with previous or next slot to get max space
2268 */
CmDeleteOldestKernel(PCM_HAL_STATE state,MHW_KERNEL_PARAM * mhwKernelParam)2269 int32_t CmDeleteOldestKernel(PCM_HAL_STATE state, MHW_KERNEL_PARAM *mhwKernelParam)
2270 {
2271     PRENDERHAL_KRN_ALLOCATION  kernelAllocation;
2272     PRENDERHAL_INTERFACE       renderHal = state->renderHal;;
2273     PRENDERHAL_STATE_HEAP      stateHeap = renderHal->pStateHeap;
2274     UNUSED(state);
2275     UNUSED(mhwKernelParam);
2276 
2277     uint32_t oldest = 0;
2278     uint32_t lastUsed;
2279     int32_t kernelAllocationID, searchIndex = -1, index = -1;
2280     int32_t alignedSize, shiftOffset;
2281     int32_t hr = CM_SUCCESS;
2282 
2283     kernelAllocation   = stateHeap->pKernelAllocation;
2284 
2285     // Search and deallocate oldest kernel (most likely this is optimal scheduling algorithm)
2286     kernelAllocation = stateHeap->pKernelAllocation;
2287     for (kernelAllocationID = 0;
2288         kernelAllocationID < state->kernelNumInGsh;
2289         kernelAllocationID++, kernelAllocation++)
2290     {
2291         // Skip unused entries
2292         // Skip kernels flagged as locked (cannot be automatically deallocated)
2293         if (kernelAllocation->dwFlags == RENDERHAL_KERNEL_ALLOCATION_FREE ||
2294             kernelAllocation->dwFlags == RENDERHAL_KERNEL_ALLOCATION_LOCKED)
2295         {
2296             continue;
2297         }
2298 
2299         // Find kernel not used for the greater amount of time (measured in number of operations)
2300         // Must not unload recently allocated kernels
2301         lastUsed = (uint32_t)(stateHeap->dwAccessCounter - kernelAllocation->dwCount);
2302         if (lastUsed > oldest)
2303         {
2304             searchIndex = kernelAllocationID;
2305             oldest     = lastUsed;
2306         }
2307     }
2308 
2309     // Did not found any entry for deallocation, we get into a strange case!
2310     if (searchIndex < 0)
2311     {
2312         CM_ASSERTMESSAGE("Failed to delete any slot from GSH. It is impossible.");
2313         return CM_FAILURE;
2314     }
2315 
2316     if (stateHeap->pKernelAllocation[searchIndex].cloneKernelParams.isHeadKernel &&
2317         (stateHeap->pKernelAllocation[searchIndex].cloneKernelParams.referenceCount != 0))
2318     {
2319         // ERROR, chose a head kernel for deletion but it still has clones pointing to it
2320         return CM_FAILURE;
2321     }
2322 
2323     // Free kernel entry and states associated with the kernel (if any)
2324     kernelAllocation = &stateHeap->pKernelAllocation[searchIndex];
2325     if (HalCm_UnloadKernel(state, kernelAllocation) != CM_SUCCESS)
2326     {
2327         CM_ASSERTMESSAGE("Failed to load kernel - no space available in GSH.");
2328         return CM_FAILURE;
2329     }
2330 
2331     // Let's check if we can merge searchIndex-1, searchIndex, searchIndex+1
2332     index = searchIndex;
2333     PRENDERHAL_KRN_ALLOCATION kAlloc0, kAlloc1, kAlloc2;
2334     kAlloc0 = (index == 0)? nullptr : &stateHeap->pKernelAllocation[index-1];
2335     kAlloc1 = &stateHeap->pKernelAllocation[index];  // free one
2336     kAlloc2 = (index == state->cmDeviceParam.maxGshKernelEntries - 1) ? nullptr : &stateHeap->pKernelAllocation[index + 1];
2337 
2338     if (bIsFree(kAlloc0) && bIsFree(kAlloc2))
2339     {
2340         // merge 3 into 1 slot and bump index after
2341         stateHeap->pKernelAllocation[index-1].dwFlags = RENDERHAL_KERNEL_ALLOCATION_FREE;
2342         state->totalKernelSize[index-1] += state->totalKernelSize[index] + state->totalKernelSize[index+1];
2343         stateHeap->pKernelAllocation[index-1].iSize = 0;
2344         // no change for stateHeap->pKernelAllocation[index-1].dwOffset
2345 
2346         // copy the rest
2347         for (int32_t i = index + 2; i<state->kernelNumInGsh; i++)
2348         {
2349             stateHeap->pKernelAllocation[i-2] = stateHeap->pKernelAllocation[i];
2350             state->totalKernelSize[i-2] = state->totalKernelSize[i];
2351         }
2352 
2353         state->kernelNumInGsh -= 2;
2354 
2355         if ( index == 0 )
2356             HalCm_UpdateCloneKernel(state, 0, CM_SHIFT_RIGHT, 2);
2357         else
2358             HalCm_UpdateCloneKernel(state, index - 1, CM_SHIFT_RIGHT, 2);
2359     }
2360     else if (bIsFree(kAlloc0))
2361     {
2362         // merge before and current into 1 slot
2363         stateHeap->pKernelAllocation[index-1].dwFlags = RENDERHAL_KERNEL_ALLOCATION_FREE;
2364         state->totalKernelSize[index-1] += state->totalKernelSize[index];
2365         stateHeap->pKernelAllocation[index-1].iSize = 0;
2366         // no change for stateHeap->pKernelAllocation[index-1].dwOffset
2367 
2368         for (int32_t i = index + 1; i<state->kernelNumInGsh; i++)
2369         {
2370             stateHeap->pKernelAllocation[i-1] = stateHeap->pKernelAllocation[i];
2371             state->totalKernelSize[i-1] = state->totalKernelSize[i];
2372         }
2373 
2374         state->kernelNumInGsh -= 1;
2375 
2376         if ( index == 0 )
2377             HalCm_UpdateCloneKernel(state, 0, CM_SHIFT_RIGHT, 1);
2378         else
2379             HalCm_UpdateCloneKernel(state, index - 1, CM_SHIFT_RIGHT, 1);
2380 
2381     }
2382     else if (bIsFree(kAlloc2))
2383     {
2384         // kAlloc0 is not free, but it can be nullptr
2385         // merge after and current into 1 slot
2386         stateHeap->pKernelAllocation[index].dwFlags = RENDERHAL_KERNEL_ALLOCATION_FREE;
2387         state->totalKernelSize[index] += state->totalKernelSize[index+1];
2388         stateHeap->pKernelAllocation[index].iSize = 0;
2389         if (kAlloc0)
2390         {
2391             // get free space starting point
2392             alignedSize = MOS_ALIGN_CEIL(kAlloc0->iSize, 64);
2393             shiftOffset = state->totalKernelSize[index-1] - alignedSize;
2394 
2395             state->totalKernelSize[index-1] -= shiftOffset;
2396             // no change for stateHeap->pKernelAllocation[index-1].iSize -= 0;
2397             state->totalKernelSize[index] += shiftOffset;
2398             stateHeap->pKernelAllocation[index].dwOffset -= shiftOffset;
2399         }
2400 
2401         for (int32_t i = index + 1; i<state->kernelNumInGsh; i++)
2402         {
2403             stateHeap->pKernelAllocation[i] = stateHeap->pKernelAllocation[i+1];
2404             state->totalKernelSize[i] = state->totalKernelSize[i+1];
2405         }
2406 
2407         state->kernelNumInGsh -= 1;
2408 
2409         if ( index == 0 )
2410             HalCm_UpdateCloneKernel(state, 0, CM_SHIFT_RIGHT, 1);
2411         else
2412             HalCm_UpdateCloneKernel(state, index - 1, CM_SHIFT_RIGHT, 1);
2413     }
2414     else
2415     {
2416         // no merge
2417         stateHeap->pKernelAllocation[index].dwFlags = RENDERHAL_KERNEL_ALLOCATION_FREE;
2418         // no change for stateHeap->pKernelAllocation[index].iTotalSize;
2419         stateHeap->pKernelAllocation[index].iSize = 0;
2420         if(kAlloc0)
2421         {
2422             // get free space starting point
2423             alignedSize = MOS_ALIGN_CEIL(kAlloc0->iSize, 64);
2424             shiftOffset = state->totalKernelSize[index-1] - alignedSize;
2425             state->totalKernelSize[index-1] -= shiftOffset;
2426             // no change for stateHeap->pKernelAllocation[index-1].iSize -= 0;
2427             state->totalKernelSize[index] += shiftOffset;
2428             stateHeap->pKernelAllocation[index].dwOffset -= shiftOffset;
2429         }
2430         // no change for stateHeap->iNumKernels;
2431     }
2432 
2433     return hr;
2434 }
2435 
2436 /*----------------------------------------------------------------------------
2437 | Name      : HalCm_LoadKernel ( Replace RenderHal_LoadKernel)
2438 \---------------------------------------------------------------------------*/
HalCm_LoadKernel(PCM_HAL_STATE state,PCM_HAL_KERNEL_PARAM kernelParam,int32_t samplerCount,PRENDERHAL_KRN_ALLOCATION & kernelAllocation)2439 int32_t HalCm_LoadKernel(
2440     PCM_HAL_STATE             state,
2441     PCM_HAL_KERNEL_PARAM      kernelParam,
2442     int32_t                   samplerCount,
2443     PRENDERHAL_KRN_ALLOCATION &kernelAllocation)
2444 {
2445     PRENDERHAL_STATE_HEAP     stateHeap;
2446     PRENDERHAL_INTERFACE      renderHal;
2447     int32_t                 hr;
2448     PRENDERHAL_KERNEL_PARAM   parameters;
2449     PMHW_KERNEL_PARAM         mhwKernelParam;
2450 
2451     int32_t kernelAllocationID;    // Kernel allocation ID in GSH
2452     int32_t kernelCacheID;         // Kernel cache ID
2453     int32_t kernelUniqueID;        // Kernel unique ID
2454     void    *kernelPtr;
2455     int32_t kernelSize;
2456     int32_t searchIndex;
2457     int32_t freeSlot;
2458     bool    isClonedKernel;
2459     bool    hasClones;
2460 
2461     hr                  = CM_SUCCESS;
2462     renderHal          = state->renderHal;
2463     stateHeap          = (renderHal) ? renderHal->pStateHeap : nullptr;
2464     kernelAllocationID = RENDERHAL_KERNEL_LOAD_FAIL;
2465     mhwKernelParam     = &(state->kernelParamsMhw);
2466     parameters         = &(state->kernelParamsRenderHal.Params);
2467 
2468     // Validate parameters
2469     if (stateHeap == nullptr ||
2470         stateHeap->bIshLocked == false ||
2471         stateHeap->pKernelAllocation == nullptr ||
2472         kernelParam->kernelBinarySize == 0 ||
2473         state->kernelNumInGsh > state->cmDeviceParam.maxGshKernelEntries)
2474     {
2475         CM_ASSERTMESSAGE("Failed to load kernel - invalid parameters.");
2476         return CM_FAILURE;
2477     }
2478 
2479     isClonedKernel = kernelParam->clonedKernelParam.isClonedKernel;
2480     hasClones      = kernelParam->clonedKernelParam.hasClones;
2481 
2482     parameters->Sampler_Count = samplerCount;
2483     mhwKernelParam->iKUID     = static_cast<int>( (kernelParam->kernelId >> 32) );
2484     mhwKernelParam->iKCID     = -1;
2485     mhwKernelParam->pBinary   = kernelParam->kernelBinary;
2486     mhwKernelParam->iSize     = kernelParam->kernelBinarySize + CM_KERNEL_BINARY_PADDING_SIZE;
2487 
2488     // Kernel parameters
2489     kernelPtr      = mhwKernelParam->pBinary;
2490     kernelSize     = mhwKernelParam->iSize;
2491     kernelUniqueID = mhwKernelParam->iKUID;
2492     kernelCacheID  = mhwKernelParam->iKCID;
2493 
2494     // Check if kernel is already loaded; Search free allocation index
2495     searchIndex = -1;
2496     kernelAllocation = stateHeap->pKernelAllocation;
2497     for (kernelAllocationID = 0;
2498          kernelAllocationID <  state->kernelNumInGsh;
2499          kernelAllocationID++, kernelAllocation++)
2500     {
2501         if (kernelAllocation->iKUID == kernelUniqueID &&
2502             kernelAllocation->iKCID == kernelCacheID)
2503         {
2504             // found match and Update kernel usage
2505             hr = HalCm_TouchKernel(state, kernelAllocationID);
2506             if (hr == CM_FAILURE)
2507             {
2508                 goto finish;
2509             }
2510             // Increment reference counter
2511             mhwKernelParam->bLoaded = 1;
2512             // Record kernel allocation
2513             kernelAllocation = &stateHeap->pKernelAllocation[kernelAllocationID];
2514 
2515             goto finish;
2516         }
2517     }
2518 
2519     if (isClonedKernel || hasClones)
2520     {
2521         hr = HalCm_InsertCloneKernel(state, kernelParam, kernelAllocation);
2522         goto finish;
2523     }
2524 
2525     // here is the algorithm
2526     // 1) search for free slot which is big enough to load current kerenel
2527     // 2) if found slot, then add current kerenel
2528     // 3) if we cannot find slot, we need to delete some entry (delete oldest first), after delete oldest entry
2529     //    we will loop over to step 1 until we get enough space.
2530     // The algorithm won't fail except we load 1 kernel which is larger than 2MB
2531     do
2532     {
2533         freeSlot = CmSearchFreeSlotSize(state, mhwKernelParam, false);
2534         if (freeSlot >= 0)
2535         {
2536             // found free slot which is big enough to hold kernel
2537             hr = CmAddCurrentKernelToFreeSlot(state, freeSlot, parameters, kernelParam, mhwKernelParam, CM_NO_CLONE, -1);
2538             // update GSH states stateHeap->numKernels inside add function
2539             break;
2540         }
2541         else
2542         {
2543             if (CmDeleteOldestKernel(state, mhwKernelParam) != CM_SUCCESS)
2544             {
2545                 return CM_FAILURE;
2546             }
2547         }
2548     } while(1);
2549 
2550     mhwKernelParam->bLoaded = 1;  // Increment reference counter
2551     kernelAllocation = &stateHeap->pKernelAllocation[freeSlot];  // Record kernel allocation
2552 
2553 finish:
2554 
2555     return hr;
2556 }
2557 
2558 //*-----------------------------------------------------------------------------
2559 //| Purpose:    Loads cloned kernel entries and kernels with clones into free slot
2560 //| Return:     Result of the operation
2561 //*-----------------------------------------------------------------------------
HalCm_InsertCloneKernel(PCM_HAL_STATE state,PCM_HAL_KERNEL_PARAM kernelParam,PRENDERHAL_KRN_ALLOCATION & kernelAllocation)2562 int32_t HalCm_InsertCloneKernel(
2563     PCM_HAL_STATE              state,
2564     PCM_HAL_KERNEL_PARAM       kernelParam,
2565     PRENDERHAL_KRN_ALLOCATION  &kernelAllocation)
2566 {
2567     int32_t                   hr              = CM_SUCCESS;
2568     int32_t                   kernelAllocationID;    // Kernel allocation ID in GSH
2569     uint32_t                  tag;
2570     PMOS_INTERFACE            osInterface    = state->osInterface;
2571     PMHW_KERNEL_PARAM         mhwKernelParam = &(state->kernelParamsMhw);
2572     int32_t                   freeSlot       = -1;
2573     PRENDERHAL_STATE_HEAP     stateHeap = state->renderHal->pStateHeap;
2574 
2575     kernelAllocation = state->renderHal->pStateHeap->pKernelAllocation;
2576 
2577     for (kernelAllocationID = 0; kernelAllocationID < state->kernelNumInGsh;
2578         kernelAllocationID++, kernelAllocation++)
2579     {
2580         if (kernelAllocation->cloneKernelParams.isHeadKernel)
2581         {
2582             if ((kernelAllocation->iKUID                           == kernelParam->clonedKernelParam.kernelID) ||       // original kernel that cloned from is already loaded as head
2583                 (kernelAllocation->cloneKernelParams.cloneKernelID == kernelParam->clonedKernelParam.kernelID) ||       // another clone from same original kernel is serving as the head
2584                 (kernelAllocation->cloneKernelParams.cloneKernelID == static_cast<int>(kernelParam->kernelId >> 32))) // clone is serving as the head and this is the original kernel
2585             {
2586                 // found match, insert 64B dummy entry and set piKAID
2587                 do
2588                 {
2589                     // Before getting a free slot, update head kernel sync tag and count so head will not be selected for deletion
2590                     // then update head kernel count after inserting clone
2591                     // so that clone will be selected first for deletion (this is done in CmAddCurrentKernelToFreeSlot)
2592 
2593                     // update head kernel sync tag
2594                     if(state->cbbEnabled)
2595                     {
2596                         tag = osInterface->pfnGetGpuStatusTag(osInterface, osInterface->CurrentGpuContextOrdinal);
2597                     }
2598                     else
2599                     {
2600                         tag = state->renderHal->pStateHeap->dwNextTag;
2601                     }
2602                     kernelAllocation->dwSync = tag;
2603 
2604                     // update the head kernel count so it will not be selected for deletion
2605                     kernelAllocation->dwCount = state->renderHal->pStateHeap->dwAccessCounter++;
2606 
2607                     freeSlot = CmSearchFreeSlotSize(state, mhwKernelParam, true);
2608                     if (freeSlot >= 0)
2609                     {
2610                         // found free slot
2611                         hr = CmAddCurrentKernelToFreeSlot(state, freeSlot, &(state->kernelParamsRenderHal.Params),
2612                             kernelParam, &(state->kernelParamsMhw), CM_CLONE_ENTRY, kernelAllocationID);
2613 
2614                         goto finish;
2615 
2616                     }
2617                     else
2618                     {
2619                         if (CmDeleteOldestKernel(state, mhwKernelParam) != CM_SUCCESS)
2620                         {
2621                             hr = CM_FAILURE;
2622                             goto finish;
2623                         }
2624                     }
2625                 } while (1);
2626             }
2627         }
2628     }
2629 
2630     // didn't find a match, insert this kernel as the head kernel
2631     do
2632     {
2633         freeSlot = CmSearchFreeSlotSize(state, mhwKernelParam, false);
2634         if (freeSlot >= 0)
2635         {
2636             if (kernelParam->clonedKernelParam.isClonedKernel)
2637             {
2638                 hr = CmAddCurrentKernelToFreeSlot(state, freeSlot, &(state->kernelParamsRenderHal.Params),
2639                     kernelParam, &(state->kernelParamsMhw), CM_CLONE_AS_HEAD_KERNEL, -1);
2640             }
2641             else
2642             {
2643                 hr = CmAddCurrentKernelToFreeSlot(state, freeSlot, &(state->kernelParamsRenderHal.Params),
2644                     kernelParam, &(state->kernelParamsMhw), CM_HEAD_KERNEL, -1);
2645             }
2646             break;
2647         }
2648         else
2649         {
2650             if (CmDeleteOldestKernel(state, mhwKernelParam) != CM_SUCCESS)
2651             {
2652                 hr = CM_FAILURE;
2653                 goto finish;
2654             }
2655         }
2656     } while (1);
2657 
2658 finish:
2659 
2660     if (hr == CM_SUCCESS)
2661     {
2662         mhwKernelParam->bLoaded = 1;
2663         kernelAllocation = &stateHeap->pKernelAllocation[freeSlot];
2664     }
2665 
2666     return hr;
2667 }
2668 
2669 //!
2670 //! \brief    Get offset to sampler state
2671 //! \details  Get offset to sampler state in General State Heap,
2672 //!           (Cm customized version of the RenderHal function which calculates
2673 //!           the sampler offset by MDF owned parameters).
2674 //! \param    PCM_HAL_STATE state
2675 //!           [in] Pointer to CM_HAL_STATE structure
2676 //! \param    PRENDERHAL_INTERFACE renderHal
2677 //!           [in] Pointer to RenderHal Interface
2678 //! \param    int mediaID
2679 //!           [in] Media ID associated with sampler
2680 //! \param    int samplerOffset
2681 //!           [in] sampler offset from the base of current kernel's sampler heap
2682 //! \param    int samplerBTI
2683 //!           [in] sampler BTI
2684 //! \param    unsigned long *pdwSamplerOffset
2685 //!           [out] optional; offset of sampler state from GSH base
2686 //! \return   MOS_STATUS
2687 //!
HalCm_GetSamplerOffset(PCM_HAL_STATE state,PRENDERHAL_INTERFACE renderHal,int mediaID,unsigned int samplerOffset,unsigned int samplerBTI,PMHW_SAMPLER_STATE_PARAM samplerParam,uint32_t * pdwSamplerOffset)2688 MOS_STATUS HalCm_GetSamplerOffset(
2689     PCM_HAL_STATE            state,
2690     PRENDERHAL_INTERFACE     renderHal,
2691     int                      mediaID,
2692     unsigned int             samplerOffset,
2693     unsigned int             samplerBTI,
2694     PMHW_SAMPLER_STATE_PARAM samplerParam,
2695     uint32_t                *pdwSamplerOffset)
2696 {
2697     PRENDERHAL_MEDIA_STATE_LEGACY pCurMediaStateLegacy = (PRENDERHAL_MEDIA_STATE_LEGACY)renderHal->pStateHeap->pCurMediaState;
2698     unsigned int tmpSamplerOffset = pCurMediaStateLegacy->pDynamicState->Sampler3D.dwOffset +
2699                                   state->taskParam->samplerOffsetsByKernel[mediaID] +
2700                                   samplerOffset;
2701 
2702     if (pdwSamplerOffset != nullptr)
2703     {
2704         *pdwSamplerOffset = tmpSamplerOffset;
2705     }
2706 
2707     if (samplerParam->SamplerType == MHW_SAMPLER_TYPE_3D)
2708     {
2709         samplerParam->Unorm.IndirectStateOffset = MOS_ALIGN_CEIL( pCurMediaStateLegacy->pDynamicState->Sampler3D.dwOffset +
2710                                                                   state->taskParam->samplerIndirectOffsetsByKernel[mediaID] +
2711                                                                   samplerBTI * renderHal->pHwSizes->dwSizeSamplerIndirectState,
2712                                                                   1 << MHW_SAMPLER_INDIRECT_SHIFT);
2713     }
2714     return MOS_STATUS_SUCCESS;
2715 }
2716 
2717 //!
2718 //! \brief      Setup Interface Descriptor
2719 //! \details    Set interface descriptor, (overriding RenderHal function),
2720 //!             (Cm customized version of the RenderHal function which set
2721 //!             dwSamplerOffset and dwSamplerCount by MDF owned parameters).
2722 //! \param      PCM_HAL_STATE                           state
2723 //!             [in]    Pointer to CM_HAL_STATE structure
2724 //! \param      PRENDERHAL_INTERFACE                    renderHal
2725 //!             [in]    Pointer to HW interface
2726 //! \param      PRENDERHAL_MEDIA_STATE                  mediaState
2727 //!             [in]    Pointer to media state
2728 //! \param      PRENDERHAL_KRN_ALLOCATION               kernelAllocation
2729 //!             [in]    Pointer to kernel allocation
2730 //! \param      PRENDERHAL_INTERFACE_DESCRIPTOR_PARAMS  interfaceDescriptorParams
2731 //!             [in]    Pointer to interface descriptor parameters
2732 //! \param      PMHW_GPGPU_WALKER_PARAMS          pGpGpuWalkerParams
2733 //!             [in]    Pointer to gpgpu walker parameters
2734 //! \return     MOS_STATUS
2735 //!
HalCm_SetupInterfaceDescriptor(PCM_HAL_STATE state,PRENDERHAL_INTERFACE renderHal,PRENDERHAL_MEDIA_STATE mediaState,PRENDERHAL_KRN_ALLOCATION kernelAllocation,PRENDERHAL_INTERFACE_DESCRIPTOR_PARAMS interfaceDescriptorParams)2736 MOS_STATUS HalCm_SetupInterfaceDescriptor(
2737     PCM_HAL_STATE                          state,
2738     PRENDERHAL_INTERFACE                   renderHal,
2739     PRENDERHAL_MEDIA_STATE                 mediaState,
2740     PRENDERHAL_KRN_ALLOCATION              kernelAllocation,
2741     PRENDERHAL_INTERFACE_DESCRIPTOR_PARAMS interfaceDescriptorParams)
2742 {
2743     MOS_STATUS               eStatus = MOS_STATUS_SUCCESS;
2744     MHW_ID_ENTRY_PARAMS      params;
2745     PRENDERHAL_STATE_HEAP    stateHeap;
2746     PRENDERHAL_DYNAMIC_STATE dynamicState;
2747     unsigned long            mediaStateOffset;
2748 
2749     PRENDERHAL_MEDIA_STATE_LEGACY mediaStateLegacy = (PRENDERHAL_MEDIA_STATE_LEGACY)mediaState;
2750     //-----------------------------------------
2751     MHW_RENDERHAL_CHK_NULL(renderHal);
2752     MHW_RENDERHAL_CHK_NULL(renderHal->pMhwStateHeap);
2753     MHW_RENDERHAL_CHK_NULL(mediaStateLegacy);
2754     MHW_RENDERHAL_CHK_NULL(mediaStateLegacy->pDynamicState);
2755     MHW_RENDERHAL_CHK_NULL(interfaceDescriptorParams);
2756     //-----------------------------------------
2757 
2758     // Get states, params
2759     stateHeap = renderHal->pStateHeap;
2760     dynamicState = mediaStateLegacy->pDynamicState;
2761     mediaStateOffset = dynamicState->memoryBlock.GetOffset();
2762 
2763     params.dwMediaIdOffset = mediaStateOffset + dynamicState->MediaID.dwOffset;
2764     params.iMediaId = interfaceDescriptorParams->iMediaID;
2765     params.dwKernelOffset = kernelAllocation->dwOffset;
2766     params.dwSamplerOffset = mediaStateOffset + dynamicState->Sampler3D.dwOffset + state->taskParam->samplerOffsetsByKernel[params.iMediaId];
2767     params.dwSamplerCount = ( state->taskParam->samplerCountsByKernel[params.iMediaId] + 3 ) / 4;
2768     params.dwSamplerCount = (params.dwSamplerCount > 4) ? 4 : params.dwSamplerCount;
2769     params.dwBindingTableOffset = interfaceDescriptorParams->iBindingTableID * stateHeap->iBindingTableSize;
2770     params.iCurbeOffset = interfaceDescriptorParams->iCurbeOffset;
2771     params.iCurbeLength = interfaceDescriptorParams->iCurbeLength;
2772 
2773     params.bBarrierEnable = interfaceDescriptorParams->blBarrierEnable;
2774     params.bGlobalBarrierEnable = interfaceDescriptorParams->blGlobalBarrierEnable;    //It's only applied for BDW+
2775     params.dwNumberofThreadsInGPGPUGroup = interfaceDescriptorParams->iNumberThreadsInGroup;
2776     params.dwSharedLocalMemorySize = renderHal->pfnEncodeSLMSize(renderHal, interfaceDescriptorParams->iSLMSize);
2777     params.iCrsThdConDataRdLn = interfaceDescriptorParams->iCrsThrdConstDataLn;
2778     params.memoryBlock = &dynamicState->memoryBlock;
2779 
2780     MHW_RENDERHAL_CHK_STATUS(renderHal->pMhwStateHeap->AddInterfaceDescriptorData(&params));
2781     dynamicState->MediaID.iCurrent++;
2782 
2783 finish:
2784     return eStatus;
2785 }
2786 
2787 /*----------------------------------------------------------------------------
2788 | Name      : HalCm_AllocateMediaID  replace old RenderHal_AllocateMediaID
2789 |             Don't need touch kernel since we handle this a loadKernel time
2790 |
2791 | Purpose   : Allocates an setup Interface Descriptor for Media Pipeline
2792 |
2793 | Arguments : [in] renderHal          - Pointer to RenderHal interface structure
2794 |             [in] kernelParam        - Pointer to Kernel parameters
2795 |             [in] pKernelAllocationID - Pointer to Kernel allocation
2796 |             [in] bindingTableID     - Binding table ID
2797 |             [in] curbeOffset        - Curbe offset (from CURBE base)
2798 |
2799 | Returns   : Media Interface descriptor ID
2800 |             -1 if invalid parameters
2801 |                   no Interface Descriptor entry available in GSH
2802 |
2803 | Comments  : Kernel        must be preloaded
2804 |             Curbe         must be allocated using pfnAllocateCurbe
2805 |             Binding Table must be allocated using pfnAllocateBindingTable
2806 \---------------------------------------------------------------------------*/
2807 //!
2808 //! \brief
2809 //! \details
2810 //! \param    PRENDERHAL_INTERFACE       renderHal
2811 //| \param    PCM_HAL_KERNEL_PARAM       kernelParam
2812 //| \param    PRENDERHAL_KRN_ALLOCATION  kernelAllocation
2813 //| \param    int32_t                    bindingTableID
2814 //| \param    int32_t                    curbeOffset
2815 //! \return   int32_t
2816 //!
HalCm_AllocateMediaID(PCM_HAL_STATE state,PCM_HAL_KERNEL_PARAM kernelParam,PRENDERHAL_KRN_ALLOCATION kernelAllocation,int32_t bindingTableID,int32_t curbeOffset)2817 int32_t HalCm_AllocateMediaID(
2818     PCM_HAL_STATE               state,
2819     PCM_HAL_KERNEL_PARAM        kernelParam,
2820     PRENDERHAL_KRN_ALLOCATION   kernelAllocation,
2821     int32_t                    bindingTableID,
2822     int32_t                    curbeOffset)
2823 {
2824     PRENDERHAL_INTERFACE            renderHal = state->renderHal;
2825     PRENDERHAL_MEDIA_STATE_LEGACY   curMediaState;
2826     int32_t                         curbeSize, iCurbeCurrent;
2827     int32_t                         interfaceDescriptor;
2828     RENDERHAL_INTERFACE_DESCRIPTOR_PARAMS interfaceDescriptorParams;
2829 
2830     interfaceDescriptor = -1;
2831 
2832     // Obtain pointer and validate current media state
2833     curMediaState = (PRENDERHAL_MEDIA_STATE_LEGACY)renderHal->pStateHeap->pCurMediaState;
2834 
2835     if (state->dshEnabled)
2836     {
2837         if (curMediaState == nullptr || (state->dshEnabled && (curMediaState->pDynamicState == nullptr)))
2838         {
2839             CM_ASSERTMESSAGE("Invalid Media State.");
2840             goto finish;
2841         }
2842     }
2843     else
2844     {
2845         if (curMediaState == nullptr)
2846         {
2847             CM_ASSERTMESSAGE("Invalid Media State.");
2848             goto finish;
2849         }
2850     }
2851 
2852     // Validate kernel allocation (kernel must be pre-loaded into GSH)
2853     if (!kernelAllocation ||
2854         kernelAllocation->dwFlags == RENDERHAL_KERNEL_ALLOCATION_FREE ||
2855         kernelAllocation->iSize == 0)
2856     {
2857         CM_ASSERTMESSAGE("Error: Invalid Kernel Allocation.");
2858         goto finish;
2859     }
2860 
2861     // Check Curbe allocation (CURBE_Lenght is in 256-bit count -> convert to bytes)
2862     curbeSize = kernelParam->curbeSizePerThread;
2863 
2864     if (state->dshEnabled)
2865     {
2866         iCurbeCurrent = curMediaState->pDynamicState->Curbe.iCurrent;
2867     }
2868     else
2869     {
2870         iCurbeCurrent = curMediaState->iCurbeOffset;
2871     }
2872 
2873     if (curbeSize <= 0)
2874     {
2875         // Curbe is not used by the kernel
2876         curbeSize = curbeOffset = 0;
2877     }
2878     // Validate Curbe Offset (curbe must be pre-allocated)
2879     else if ( curbeOffset < 0 ||                                       // Not allocated
2880              (curbeOffset & 0x1F) != 0 ||                              // Invalid alignment
2881              (curbeOffset + curbeSize) > iCurbeCurrent)               // Invalid size
2882     {
2883         CM_ASSERTMESSAGE("Error: Invalid Curbe Allocation.");
2884         goto finish;
2885     }
2886 
2887     // Try to reuse interface descriptor (for 2nd level buffer optimizations)
2888     // Check if ID already in use by another kernel - must use a different ID
2889     interfaceDescriptor = renderHal->pfnGetMediaID(renderHal, curMediaState, kernelAllocation);
2890     if (interfaceDescriptor < 0)
2891     {
2892         CM_ASSERTMESSAGE("Error: No Interface Descriptor available.");
2893         goto finish;
2894     }
2895 
2896     interfaceDescriptorParams.iMediaID            = interfaceDescriptor;
2897     interfaceDescriptorParams.iBindingTableID     = bindingTableID;
2898 
2899     //CURBE size and offset setting
2900     //Media w/o group: only per-thread CURBE is used, CrossThread CURBE is not used.
2901     //Media w/ group: should follow GPGPU walker setting, there is per-thread CURBE and cross-thread CURBE. But per-thread CURBE should be ZERO, and all should be cross-thread CURBE
2902     //GPGPU: both per-thread CURBE and cross-thread CURBE need be set.
2903     interfaceDescriptorParams.iCurbeOffset = curbeOffset;
2904     if ((!kernelParam->gpgpuWalkerParams.gpgpuEnabled) && (kernelParam->kernelThreadSpaceParam.groupSelect == CM_MW_GROUP_NONE) && (state->taskParam->mediaWalkerGroupSelect == CM_MW_GROUP_NONE))
2905     {   //Media pipe without group
2906         interfaceDescriptorParams.iCurbeLength          = kernelParam->curbeSizePerThread;
2907         interfaceDescriptorParams.iCrsThrdConstDataLn   = kernelParam->crossThreadConstDataLen;    //should always be 0 in this case
2908         interfaceDescriptorParams.iNumberThreadsInGroup = (kernelParam->numberThreadsInGroup > 0) ? kernelParam->numberThreadsInGroup : 1;  // This field should not be set to 0 even if the barrier is disabled, since an accurate value is needed for proper pre-emption.
2909         interfaceDescriptorParams.blGlobalBarrierEnable = false;
2910         interfaceDescriptorParams.blBarrierEnable       = false;
2911         interfaceDescriptorParams.iSLMSize              = 0;
2912     }
2913     else if ((!kernelParam->gpgpuWalkerParams.gpgpuEnabled) && ((kernelParam->kernelThreadSpaceParam.groupSelect != CM_MW_GROUP_NONE) || (state->taskParam->mediaWalkerGroupSelect != CM_MW_GROUP_NONE)))
2914     {   //Media w/ group
2915         interfaceDescriptorParams.iCurbeLength          = 0;                                    //No using per-thread CURBE
2916         interfaceDescriptorParams.iCrsThrdConstDataLn   = kernelParam->curbeSizePerThread;    //treat all CURBE as cross-thread CURBE
2917         interfaceDescriptorParams.iNumberThreadsInGroup = (kernelParam->numberThreadsInGroup > 0) ? kernelParam->numberThreadsInGroup : 1;  // This field should not be set to 0 even if the barrier is disabled, since an accurate value is needed for proper pre-emption.
2918         interfaceDescriptorParams.blBarrierEnable       = (kernelParam->barrierMode != CM_NO_BARRIER) ? true : false;
2919         interfaceDescriptorParams.blGlobalBarrierEnable = (kernelParam->barrierMode == CM_GLOBAL_BARRIER) ? true : false;
2920         interfaceDescriptorParams.iSLMSize              = kernelParam->slmSize;
2921     }
2922     else
2923     {   //GPGPU pipe
2924         interfaceDescriptorParams.iCurbeLength          = kernelParam->curbeSizePerThread;
2925         interfaceDescriptorParams.iCrsThrdConstDataLn   = kernelParam->crossThreadConstDataLen;
2926         interfaceDescriptorParams.iNumberThreadsInGroup = (kernelParam->numberThreadsInGroup > 0) ? kernelParam->numberThreadsInGroup : 1;
2927         interfaceDescriptorParams.blBarrierEnable       = (kernelParam->barrierMode != CM_NO_BARRIER) ? true : false;
2928         interfaceDescriptorParams.blGlobalBarrierEnable = (kernelParam->barrierMode == CM_GLOBAL_BARRIER) ? true : false;
2929         interfaceDescriptorParams.iSLMSize              = kernelParam->slmSize;
2930     }
2931     if (state->useNewSamplerHeap == true)
2932     {
2933         HalCm_SetupInterfaceDescriptor(state, renderHal, curMediaState, kernelAllocation, &interfaceDescriptorParams);
2934     }
2935     else
2936     {
2937         // Setup Media ID entry - this call could be HW dependent
2938         renderHal->pfnSetupInterfaceDescriptor(
2939             renderHal,
2940             curMediaState,
2941             kernelAllocation,
2942             &interfaceDescriptorParams);
2943     }
2944 
2945 finish:
2946     return interfaceDescriptor;
2947 }
2948 
isRenderTarget(PCM_HAL_STATE state,uint32_t index)2949 bool isRenderTarget(PCM_HAL_STATE state, uint32_t index)
2950 {
2951     bool readSync = false;
2952 
2953     readSync = state->umdSurf2DTable[index].readSyncs[state->osInterface->CurrentGpuContextOrdinal];
2954 
2955     if (readSync)
2956         return false;
2957     else
2958         return true;
2959 }
2960 
HalCm_DSH_LoadKernelArray(PCM_HAL_STATE state,PCM_HAL_KERNEL_PARAM * kernelArray,int32_t kernelCount,PRENDERHAL_KRN_ALLOCATION * krnAllocation)2961 int32_t HalCm_DSH_LoadKernelArray(
2962     PCM_HAL_STATE               state,
2963     PCM_HAL_KERNEL_PARAM       *kernelArray,
2964     int32_t                     kernelCount,
2965     PRENDERHAL_KRN_ALLOCATION  *krnAllocation)
2966 {
2967     PRENDERHAL_INTERFACE         renderHal;
2968     PCM_HAL_KERNEL_PARAM         kernel;
2969     PMHW_STATE_HEAP_MEMORY_BLOCK memoryBlock;                             // Kernel memory block
2970     int32_t                      totalSize;                               // Total size
2971     uint32_t                     blockSize[CM_MAX_KERNELS_PER_TASK];      // Size of kernels to load
2972     int32_t                      blockCount;                              // Number of kernels to load
2973     MOS_STATUS                   eStatus = MOS_STATUS_SUCCESS;
2974     int32_t                      hr = CM_FAILURE;
2975 
2976     renderHal = state->renderHal;
2977     state->criticalSectionDSH->Acquire();
2978     do
2979     {
2980         blockCount = 0;
2981         totalSize = 0;
2982 
2983         // Obtain list of kernels already loaded, discard kernels loaded in older heaps.
2984         // Calculate total size of kernels to be loaded, and get size of largest kernel.
2985         for (int i = 0; i < kernelCount; i++)
2986         {
2987             // Find out if kernel is already allocated and loaded in ISH
2988             kernel = kernelArray[i];
2989             krnAllocation[i] = (PRENDERHAL_KRN_ALLOCATION)renderHal->pfnSearchDynamicKernel(renderHal, static_cast<int>((kernel->kernelId >> 32)), -1);
2990 
2991             // Kernel is allocated - check if kernel is in current ISH
2992             if (krnAllocation[i])
2993             {
2994                 // Check if kernel is loaded
2995                 memoryBlock = krnAllocation[i]->pMemoryBlock;
2996 
2997                 if (memoryBlock)
2998                 {
2999                     // Kernel needs to be reloaded in current heap
3000                     if (memoryBlock->pStateHeap != renderHal->pMhwStateHeap->GetISHPointer() || state->forceKernelReload) //pInstructionStateHeaps
3001                     {
3002                         renderHal->pMhwStateHeap->FreeDynamicBlockDyn(MHW_ISH_TYPE, memoryBlock);
3003                         krnAllocation[i]->pMemoryBlock = nullptr;
3004                     }
3005                     else
3006                     {
3007                         // Increment kernel usage count, used in kernel caching architecture
3008                         state->dshKernelCacheHit++;
3009                         krnAllocation[i]->dwCount++;
3010 
3011                         // Lock kernel to avoid removal while loading other kernels
3012                         krnAllocation[i]->dwFlags = RENDERHAL_KERNEL_ALLOCATION_LOCKED;
3013                     }
3014                 }
3015                 else if (krnAllocation[i]->dwFlags == RENDERHAL_KERNEL_ALLOCATION_REMOVED)
3016                 {
3017                     // This is a kernel that was unloaded and now needs to be reloaded
3018                     // Track how many times this "cache miss" happens to determine if the
3019                     // ISH is under pressure and needs to be expanded
3020                     state->dshKernelCacheMiss++;
3021                 }
3022             }
3023             else
3024             {
3025                 // Assign kernel allocation for this kernel
3026                 krnAllocation[i] = renderHal->pfnAllocateDynamicKernel(renderHal, static_cast<int>((kernel->kernelId >> 32)), -1);
3027                 CM_CHK_NULL_GOTOFINISH_MOSERROR(krnAllocation[i]);
3028             }
3029 
3030             // Kernel is not loaded -> add to list of kernels to be loaded
3031             if (krnAllocation[i]->pMemoryBlock == nullptr &&
3032                 krnAllocation[i]->dwFlags != RENDERHAL_KERNEL_ALLOCATION_LOADING)
3033             {
3034                 // Increment amount of data that needs to be loaded in ISH (kernel already registered but unloaded)
3035                 blockSize[blockCount++] = kernel->kernelBinarySize + CM_KERNEL_BINARY_PADDING_SIZE;
3036                 totalSize += kernel->kernelBinarySize + CM_KERNEL_BINARY_PADDING_SIZE;
3037 
3038                 // Flag this kernel as loading - one single kernel instance is needed, not multiple!
3039                 // If the same kernel is used multiple times, avoid multiple reservations/loads
3040                 krnAllocation[i]->dwFlags = RENDERHAL_KERNEL_ALLOCATION_LOADING;
3041             }
3042         }
3043 
3044         // Use Hit/Miss ratio to ignore eventual cache misses
3045         // This code prevents ISH reallocation in case of eventual cache misses
3046         while (state->dshKernelCacheHit >= HAL_CM_KERNEL_CACHE_HIT_TO_MISS_RATIO)
3047         {
3048             if (state->dshKernelCacheMiss > 0) state->dshKernelCacheMiss--;
3049             state->dshKernelCacheHit -= HAL_CM_KERNEL_CACHE_HIT_TO_MISS_RATIO;
3050         }
3051 
3052         // Grow the kernel heap if too many kernels are being reloaded or there isn't enough room to load all kernels
3053         if (state->dshKernelCacheMiss > HAL_CM_KERNEL_CACHE_MISS_THRESHOLD ||
3054             renderHal->pfnRefreshDynamicKernels(renderHal, totalSize, blockSize, blockCount) != MOS_STATUS_SUCCESS)
3055         {
3056             renderHal->pfnExpandKernelStateHeap(renderHal, (uint32_t)totalSize);
3057             state->dshKernelCacheHit = 0;
3058             state->dshKernelCacheMiss = 0;
3059             continue;
3060         }
3061 
3062         // blockSize/blockCount define a list of blocks that must be loaded in current ISH for the
3063         // kernels not yet present. Pre-existing kernels are marked as bStatic to avoid being unloaded here
3064         if (blockCount > 0)
3065         {
3066             // Allocate array of kernels
3067             MHW_STATE_HEAP_DYNAMIC_ALLOC_PARAMS params;
3068             params.piSizes = (int32_t*)blockSize;
3069             params.iCount = blockCount;
3070             params.dwAlignment = RENDERHAL_KERNEL_BLOCK_ALIGN;
3071             params.bHeapAffinity = true;                                     // heap affinity - load all kernels in the same heap
3072             params.pHeapAffinity = renderHal->pMhwStateHeap->GetISHPointer();    // Select the active instruction heap
3073             params.dwScratchSpace = 0;
3074             params.bZeroAssignedMem = true;
3075             params.bStatic = true;
3076             params.bGrow = false;
3077 
3078             // Try to allocate array of blocks; if it fails, we may need to clear some space or grow the heap!
3079             memoryBlock = renderHal->pMhwStateHeap->AllocateDynamicBlockDyn(MHW_ISH_TYPE, &params);
3080             if (!memoryBlock)
3081             {
3082                 // Reset flags
3083                 for (int i = 0; i < kernelCount; i++)
3084                 {
3085                     if (krnAllocation[i] && krnAllocation[i]->dwFlags == RENDERHAL_KERNEL_ALLOCATION_LOADING)
3086                     {
3087                         krnAllocation[i]->dwFlags = RENDERHAL_KERNEL_ALLOCATION_STALE;
3088                     }
3089                 }
3090 
3091                 if (renderHal->pfnRefreshDynamicKernels(renderHal, totalSize, blockSize, blockCount) != MOS_STATUS_SUCCESS)
3092                 {
3093                     renderHal->pfnExpandKernelStateHeap(renderHal, (uint32_t)totalSize);
3094                 }
3095                 continue;
3096             }
3097 
3098             // All blocks are allocated in ISH
3099             // Setup kernel allocations, load kernel binaries
3100             for (int32_t i = 0; i < kernelCount; i++)
3101             {
3102                 // Load kernels in ISH
3103                 if (!krnAllocation[i]->pMemoryBlock)
3104                 {
3105                     PCM_HAL_KERNEL_PARAM      kernelParam = kernelArray[i];
3106                     PRENDERHAL_KRN_ALLOCATION allocation = krnAllocation[i];
3107                     if (memoryBlock)
3108                     {
3109                         allocation->iKID = -1;
3110                         allocation->iKUID = static_cast<int>((kernelArray[i]->kernelId >> 32));
3111                         allocation->iKCID = -1;
3112                         FrameTrackerTokenFlat_SetProducer(&allocation->trackerToken, &renderHal->trackerProducer);
3113                         FrameTrackerTokenFlat_Merge(&allocation->trackerToken,
3114                                                     renderHal->currentTrackerIndex,
3115                                                     renderHal->trackerProducer.GetNextTracker(renderHal->currentTrackerIndex));
3116                         allocation->dwOffset = memoryBlock->dwDataOffset;
3117                         allocation->iSize = kernelArray[i]->kernelBinarySize + CM_KERNEL_BINARY_PADDING_SIZE;
3118                         allocation->dwCount = 0;
3119                         allocation->dwFlags = RENDERHAL_KERNEL_ALLOCATION_USED;
3120                         allocation->Params = state->kernelParamsRenderHal.Params;
3121                         allocation->pMhwKernelParam = &state->kernelParamsMhw;
3122                         allocation->pMemoryBlock = memoryBlock;
3123 
3124                         // Copy kernel data
3125                         // Copy MovInstruction First
3126                         if (allocation->pMemoryBlock &&
3127                             allocation->pMemoryBlock->dwDataSize >= kernelParam->kernelBinarySize)
3128                         {
3129                             MOS_SecureMemcpy(allocation->pMemoryBlock->pDataPtr,
3130                                 kernelParam->movInsDataSize,
3131                                 kernelParam->movInsData,
3132                                 kernelParam->movInsDataSize);
3133 
3134                             // Copy Cm Kernel Binary
3135                             MOS_SecureMemcpy(allocation->pMemoryBlock->pDataPtr + kernelParam->movInsDataSize,
3136                                 kernelParam->kernelBinarySize - kernelParam->movInsDataSize,
3137                                 kernelParam->kernelBinary,
3138                                 kernelParam->kernelBinarySize - kernelParam->movInsDataSize);
3139 
3140                             // Padding bytes dummy instructions after kernel binary to resolve page fault issue
3141                             MOS_ZeroMemory(allocation->pMemoryBlock->pDataPtr + kernelParam->kernelBinarySize, CM_KERNEL_BINARY_PADDING_SIZE);
3142                         }
3143 
3144                         // Get next memory block returned as part of the array
3145                         memoryBlock = memoryBlock->pNext;
3146                     }
3147                 }
3148             }
3149         }
3150 
3151         // Kernel load was successfull, or nothing else to load -
3152         // Quit the kernel load loop
3153         hr = CM_SUCCESS;
3154         eStatus = MOS_STATUS_SUCCESS;
3155         break;
3156 
3157     } while (1);
3158 
3159 finish:
3160     if (eStatus == MOS_STATUS_SUCCESS)
3161     {
3162         for (int32_t i = 0; i < kernelCount; i++)
3163         {
3164             renderHal->pfnTouchDynamicKernel(renderHal, krnAllocation[i]);
3165         }
3166     }
3167     state->criticalSectionDSH->Release();
3168     return hr;
3169 }
3170 
HalCm_DSH_GetDynamicStateConfiguration(PCM_HAL_STATE state,PRENDERHAL_DYNAMIC_MEDIA_STATE_PARAMS params,uint32_t numKernels,PCM_HAL_KERNEL_PARAM * kernels,uint32_t * piCurbeOffsets)3171 MOS_STATUS HalCm_DSH_GetDynamicStateConfiguration(
3172     PCM_HAL_STATE                         state,
3173     PRENDERHAL_DYNAMIC_MEDIA_STATE_PARAMS params,
3174     uint32_t                              numKernels,
3175     PCM_HAL_KERNEL_PARAM                 *kernels,
3176     uint32_t                              *piCurbeOffsets)
3177 {
3178     PCM_HAL_KERNEL_PARAM      cmKernel;
3179 
3180     PRENDERHAL_INTERFACE renderHal = state->renderHal;
3181     PRENDERHAL_KRN_ALLOCATION krnAllocation;
3182 
3183     MOS_ZeroMemory(params, sizeof(RENDERHAL_DYNAMIC_MEDIA_STATE_PARAMS));
3184 
3185     params->iMaxMediaIDs = numKernels;
3186 
3187     for (uint32_t i = 0; i < numKernels; i++)
3188     {
3189         cmKernel = kernels[i];
3190 
3191         // get max curbe size
3192         int32_t curbeSize = MOS_ALIGN_CEIL(cmKernel->totalCurbeSize, state->renderHal->dwCurbeBlockAlign);
3193         int32_t curbeOffset = piCurbeOffsets[i] + curbeSize;
3194         params->iMaxCurbeOffset = MOS_MAX(params->iMaxCurbeOffset, curbeOffset);
3195         params->iMaxCurbeSize += curbeSize;
3196 
3197         // get max spill size
3198         params->iMaxSpillSize = MOS_MAX(params->iMaxSpillSize, (int32_t)cmKernel->spillSize);
3199 
3200         // check if kernel already used - increase Max Media ID to allow BB reuse logic
3201         krnAllocation = renderHal->pfnSearchDynamicKernel(renderHal, static_cast<int>((cmKernel->kernelId >> 32)), -1);
3202         if (krnAllocation)
3203         {
3204             params->iMaxMediaIDs = MOS_MAX(params->iMaxMediaIDs, krnAllocation->iKID + 1);
3205         }
3206     }
3207 
3208     if (state->useNewSamplerHeap == true)
3209     {
3210         // Update offset to the base of first kernel and update count
3211         // for 3D sampler, update indirect state information
3212         unsigned int heapOffset = 0;
3213         unsigned int sampler3DCount = 0;
3214         MHW_SAMPLER_STATE_PARAM samplerParamMhw = {};
3215         SamplerParam samplerParam = {};
3216         samplerParamMhw.SamplerType = MHW_SAMPLER_TYPE_3D;
3217         state->cmHalInterface->GetSamplerParamInfoForSamplerType(&samplerParamMhw, samplerParam);
3218         for (unsigned int i = 0; i < numKernels; i++)
3219         {
3220             cmKernel = kernels[i];
3221             std::list<SamplerParam> *sampler_heap = cmKernel->samplerHeap;
3222             std::list<SamplerParam>::iterator iter;
3223 
3224             heapOffset = MOS_ALIGN_CEIL(heapOffset, MHW_SAMPLER_STATE_ALIGN);
3225             state->taskParam->samplerOffsetsByKernel[i] = heapOffset;
3226             state->taskParam->samplerCountsByKernel[i] = sampler_heap->size();
3227 
3228             if (sampler_heap->size() > 0)
3229             {
3230                 heapOffset = heapOffset + sampler_heap->back().heapOffset + sampler_heap->back().size;
3231 
3232                 // 3D sampler needs indirect sampler heap, so calculates the required size
3233                 // and offset for indirect sampler heap.
3234                 unsigned int max3DCount = 0;
3235                 for (iter = sampler_heap->begin(); iter != sampler_heap->end(); ++iter)
3236                 {
3237                     if (iter->elementType == samplerParam.elementType)
3238                     {
3239                         if (iter->userDefinedBti == true)
3240                         {
3241                             max3DCount = iter->bti + 1;
3242                         }
3243                         else
3244                         {
3245                             max3DCount += 1;
3246                         }
3247                     }
3248                 }
3249                 heapOffset = MOS_ALIGN_CEIL(heapOffset, MHW_SAMPLER_STATE_ALIGN);
3250                 state->taskParam->samplerIndirectOffsetsByKernel[i] = heapOffset;
3251                 heapOffset += max3DCount * state->renderHal->pHwSizes->dwSizeSamplerIndirectState;
3252                 sampler3DCount += max3DCount;
3253             }
3254         }
3255 
3256         // Temporary solution for DSH sampler heap assginment:
3257         // Adjust sampler space for DSH, because the DSH use sampler count to
3258         // allocate the space. However the mechanism is not correct. The sampler
3259         // heap size is actually calculated by the maximum offset of the largest
3260         // sampler type.
3261         // So the offset of largest element plus the size of all of the largest
3262         // element samplers should be equal to the maximum size. However we cannot
3263         // do this because of the DSH's mechanism.
3264         // To resolve this, we first let DSH allocate enough 3D samplers
3265         // (because 3D samplers has indirect state), then just convert the rest of
3266         // the heap to AVS. Here we only care about the size, not the correct
3267         // number because we are going to calculate the offset by ourself.
3268         // Since DSH allocation has some alignments inside, the actually size of the
3269         // heap should be slightly larger, which should be OK.
3270 
3271         samplerParamMhw.SamplerType = MHW_SAMPLER_TYPE_AVS;
3272         state->cmHalInterface->GetSamplerParamInfoForSamplerType(&samplerParamMhw, samplerParam);
3273         params->iMaxSamplerIndex3D = (sampler3DCount + numKernels - 1) / numKernels;
3274         params->iMaxSamplerIndexAVS = ((heapOffset - sampler3DCount * (state->renderHal->pHwSizes->dwSizeSamplerState + state->renderHal->pHwSizes->dwSizeSamplerIndirectState)) + samplerParam.btiMultiplier * numKernels - 1) / (samplerParam.btiMultiplier * numKernels);
3275     }
3276     else
3277     {
3278         // Get total sampler count
3279 
3280         // Initialize pointers to samplers and reset sampler index table
3281         MOS_FillMemory(state->samplerIndexTable, state->cmDeviceParam.maxSamplerTableSize, CM_INVALID_INDEX);
3282 
3283         params->iMaxSamplerIndex3D = CM_MAX_3D_SAMPLER_SIZE;
3284         params->iMaxSamplerIndexAVS = CM_MAX_AVS_SAMPLER_SIZE;
3285         params->iMaxSamplerIndexConv = 0;
3286         params->iMaxSamplerIndexMisc = 0;
3287         params->iMax8x8Tables = CM_MAX_AVS_SAMPLER_SIZE;
3288     }
3289     return MOS_STATUS_SUCCESS;
3290 }
3291 
HalCm_DSH_UnregisterKernel(PCM_HAL_STATE state,uint64_t kernelId)3292 MOS_STATUS HalCm_DSH_UnregisterKernel(
3293     PCM_HAL_STATE               state,
3294     uint64_t                    kernelId)
3295 {
3296     PRENDERHAL_INTERFACE renderHal = state->renderHal;
3297     PRENDERHAL_KRN_ALLOCATION krnAllocation = renderHal->pfnSearchDynamicKernel(renderHal, static_cast<int>((kernelId >> 32)), -1);
3298     if (krnAllocation)
3299     {
3300         state->criticalSectionDSH->Acquire();
3301         renderHal->pfnUnregisterKernel(renderHal, krnAllocation);
3302         state->criticalSectionDSH->Release();
3303     }
3304     return MOS_STATUS_SUCCESS;
3305 }
3306 
3307 //*-----------------------------------------------------------------------------
3308 //| Purpose: Setup Sampler State
3309 //| Returns: Result of the operation
3310 //*-----------------------------------------------------------------------------
HalCm_SetupSamplerState(PCM_HAL_STATE state,PCM_HAL_KERNEL_PARAM kernelParam,PCM_HAL_KERNEL_ARG_PARAM argParam,PCM_HAL_INDEX_PARAM indexParam,int32_t mediaID,uint32_t threadIndex,uint8_t * buffer)3311 MOS_STATUS HalCm_SetupSamplerState(
3312     PCM_HAL_STATE                   state,
3313     PCM_HAL_KERNEL_PARAM            kernelParam,
3314     PCM_HAL_KERNEL_ARG_PARAM        argParam,
3315     PCM_HAL_INDEX_PARAM             indexParam,
3316     int32_t                         mediaID,
3317     uint32_t                        threadIndex,
3318     uint8_t                         *buffer)
3319 {
3320     MOS_STATUS                  eStatus;
3321     PRENDERHAL_INTERFACE        renderHal;
3322     PMHW_SAMPLER_STATE_PARAM    samplerParam;
3323     uint8_t                     *src;
3324     uint8_t                     *dst;
3325     uint32_t                    index;
3326     uint32_t                    samplerIndex = 0;
3327     void                        *sampler = nullptr;
3328     uint32_t                    samplerOffset = 0;
3329 
3330     eStatus = MOS_STATUS_SUCCESS;
3331 
3332     CM_CHK_NULL_GOTOFINISH_MOSERROR(state);
3333 
3334     renderHal    = state->renderHal;
3335 
3336     if (indexParam->samplerIndexCount >= (uint32_t)renderHal->StateHeapSettings.iSamplers)
3337     {
3338         eStatus = MOS_STATUS_INVALID_PARAMETER;
3339         CM_ASSERTMESSAGE(
3340             "Exceeded Max samplers '%d'",
3341             indexParam->samplerIndexCount);
3342         goto finish;
3343     }
3344 
3345     // Get the Index to sampler array from the kernel data
3346     //----------------------------------
3347     CM_ASSERT(argParam->unitSize == sizeof(index));
3348     //----------------------------------
3349 
3350     src    = argParam->firstValue + (threadIndex * argParam->unitSize);
3351     index  = *((uint32_t*)src);
3352 
3353     // check to see if the data present for the sampler in the array
3354     if (index >= state->cmDeviceParam.maxSamplerTableSize ||
3355         !state->samplerTable[index].bInUse)
3356     {
3357         eStatus = MOS_STATUS_INVALID_PARAMETER;
3358         CM_ASSERTMESSAGE(
3359             "Invalid Sampler array index '%d'", index);
3360         goto finish;
3361     }
3362     // Setup samplers
3363     samplerParam = &state->samplerTable[index];
3364 
3365     if (state->useNewSamplerHeap == true)
3366     {
3367         std::list<SamplerParam>::iterator iter;
3368         for (iter = kernelParam->samplerHeap->begin(); iter != kernelParam->samplerHeap->end(); ++iter)
3369         {
3370             if ((iter->samplerTableIndex == index)&&(iter->regularBti == true))
3371             {
3372                 break;
3373             }
3374         }
3375         if (iter != kernelParam->samplerHeap->end())
3376         {
3377             samplerIndex = iter->bti;
3378         }
3379         else
3380         {
3381             // There must be incorrect internal logic
3382             CM_ASSERTMESSAGE( "BTI calculation error in cm_hal\n");
3383             return MOS_STATUS_UNKNOWN;
3384         }
3385         HalCm_GetSamplerOffset(state, renderHal, mediaID, iter->heapOffset, iter->bti, samplerParam, &samplerOffset);
3386     }
3387     else
3388     {
3389         // Check to see if sampler is already assigned
3390         samplerIndex = state->samplerIndexTable[index];
3391         if ((int)samplerIndex == CM_INVALID_INDEX)
3392         {
3393 
3394             switch (state->samplerTable[index].ElementType)
3395             {
3396 
3397                 case MHW_Sampler2Elements:
3398                 {
3399                     unsigned int index = 0;
3400                     index = state->samplerStatistics.samplerIndexBase[MHW_Sampler2Elements];
3401                     while (state->samplerIndexTable[index] != CM_INVALID_INDEX)
3402                     {
3403                         index++;
3404                     }
3405                     samplerIndex = index;
3406                     state->samplerStatistics.samplerIndexBase[MHW_Sampler2Elements] = (index + 1);
3407                     break;
3408                 }
3409                 case MHW_Sampler4Elements:
3410                 {
3411                     unsigned int index = 0;
3412                     index = state->samplerStatistics.samplerIndexBase[MHW_Sampler4Elements];
3413                     while (state->samplerIndexTable[index] != CM_INVALID_INDEX)
3414                     {
3415                         index++;
3416                     }
3417                     samplerIndex = index;
3418                     state->samplerStatistics.samplerIndexBase[MHW_Sampler4Elements] = (index + 1);
3419                     break;
3420                 }
3421                 case MHW_Sampler8Elements:
3422                 {
3423                     unsigned int index = 0;
3424                     index = state->samplerStatistics.samplerIndexBase[MHW_Sampler8Elements];
3425                     while (state->samplerIndexTable[index] != CM_INVALID_INDEX)
3426                     {
3427                         index++;
3428                     }
3429                     samplerIndex = index;
3430                     state->samplerStatistics.samplerIndexBase[MHW_Sampler8Elements] = (index + 1);
3431                     break;
3432                 }
3433                 case MHW_Sampler64Elements:
3434                 {
3435                     unsigned int index = 0;
3436                     index = state->samplerStatistics.samplerIndexBase[MHW_Sampler64Elements];
3437                     while (state->samplerIndexTable[index] != CM_INVALID_INDEX)
3438                     {
3439                         index += index + 2;
3440                     }
3441                     samplerIndex = index;
3442                     state->samplerStatistics.samplerIndexBase[MHW_Sampler64Elements] = (index + 2);
3443 
3444                     break;
3445                 }
3446                 case MHW_Sampler128Elements:
3447                 {
3448                     unsigned int index = 0;
3449                     index = state->samplerStatistics.samplerIndexBase[MHW_Sampler128Elements];
3450                     while (state->samplerIndexTable[index] != CM_INVALID_INDEX)
3451                     {
3452                         index++;
3453                     }
3454                     samplerIndex = index;
3455                     state->samplerStatistics.samplerIndexBase[MHW_Sampler128Elements] = (index + 1);
3456 
3457                     break;
3458                 }
3459                 default:
3460                     CM_ASSERTMESSAGE("Invalid sampler type '%d'.", state->samplerTable[index].SamplerType);
3461                     break;
3462             }
3463         }
3464 
3465         CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pfnGetSamplerOffsetAndPtr(
3466             renderHal,
3467             mediaID,
3468             samplerIndex,
3469             samplerParam,
3470             &samplerOffset,
3471             &sampler));
3472     }
3473     CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pMhwStateHeap->AddSamplerStateData(
3474         samplerOffset,
3475         &(((PRENDERHAL_MEDIA_STATE_LEGACY)renderHal->pStateHeap->pCurMediaState)->pDynamicState->memoryBlock),
3476         samplerParam));
3477 
3478     state->samplerIndexTable[index] = (unsigned char)samplerIndex;
3479 
3480     // Update the Batch Buffer
3481     if (buffer)
3482     {
3483         dst = buffer + argParam->payloadOffset;
3484         *((uint32_t*)dst) = samplerIndex;
3485     }
3486 
3487 finish:
3488     return eStatus;
3489 }
3490 
3491 //*-----------------------------------------------------------------------------
3492 //| Purpose: Setup Sampler State
3493 //| Returns: Result of the operation
3494 //*-----------------------------------------------------------------------------
HalCm_SetupSamplerStateWithBTIndex(PCM_HAL_STATE state,PCM_HAL_KERNEL_PARAM kernelParam,PCM_HAL_SAMPLER_BTI_ENTRY samplerBTIEntry,uint32_t samplerCount,int32_t mediaID)3495 MOS_STATUS HalCm_SetupSamplerStateWithBTIndex(
3496     PCM_HAL_STATE                   state,
3497     PCM_HAL_KERNEL_PARAM            kernelParam,
3498     PCM_HAL_SAMPLER_BTI_ENTRY       samplerBTIEntry,
3499     uint32_t                        samplerCount,
3500     int32_t                         mediaID )
3501 {
3502     MOS_STATUS                      eStatus = MOS_STATUS_SUCCESS;
3503     PRENDERHAL_INTERFACE            renderHal;
3504     PMHW_SAMPLER_STATE_PARAM        samplerParam;
3505     uint32_t                        index;
3506     uint32_t                        samplerIndex;
3507     void                            *sampler = nullptr;
3508     uint32_t                        samplerOffset = 0;
3509 
3510     renderHal = state->renderHal;
3511 
3512     if (state->useNewSamplerHeap != true)
3513     {
3514         if (samplerCount >= (uint32_t)renderHal->StateHeapSettings.iSamplers)
3515         {
3516             eStatus = MOS_STATUS_INVALID_PARAMETER;
3517             CM_ASSERTMESSAGE(
3518                 "Exceeded Max samplers '%d'",
3519                 samplerCount);
3520             goto finish;
3521         }
3522     }
3523 
3524     index = samplerBTIEntry[ samplerCount ].samplerIndex;
3525 
3526     // check to see if the data present for the sampler in the array
3527     if ( index >= state->cmDeviceParam.maxSamplerTableSize ||
3528          !state->samplerTable[ index ].bInUse )
3529     {
3530         eStatus = MOS_STATUS_INVALID_PARAMETER;
3531         CM_ASSERTMESSAGE(
3532             "Invalid Sampler array index '%d'", index );
3533         goto finish;
3534     }
3535 
3536     samplerIndex = samplerBTIEntry[ samplerCount ].samplerBTI;
3537     // Setup samplers
3538     samplerParam = &state->samplerTable[ index ];
3539 
3540     if (state->useNewSamplerHeap == true)
3541     {
3542         std::list<SamplerParam>::iterator iter;
3543         for (iter = kernelParam->samplerHeap->begin(); iter != kernelParam->samplerHeap->end(); ++iter)
3544         {
3545             if ((iter->samplerTableIndex == index) && (iter->bti == samplerIndex) && (iter->userDefinedBti == true))
3546             {
3547                 break;
3548             }
3549         }
3550         if (iter == kernelParam->samplerHeap->end())
3551         {
3552             // There must be incorrect internal logic
3553             CM_ASSERTMESSAGE("BTI calculation error in cm_hal\n");
3554             return MOS_STATUS_UNKNOWN;
3555         }
3556         HalCm_GetSamplerOffset(state, renderHal, mediaID, iter->heapOffset, iter->bti, samplerParam, &samplerOffset);
3557     }
3558     else
3559     {
3560         CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pfnGetSamplerOffsetAndPtr(renderHal, mediaID, samplerIndex, samplerParam, &samplerOffset, &sampler));
3561     }
3562 
3563     CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pMhwStateHeap->AddSamplerStateData(
3564         samplerOffset,
3565         &(((PRENDERHAL_MEDIA_STATE_LEGACY)renderHal->pStateHeap->pCurMediaState)->pDynamicState->memoryBlock),
3566         samplerParam));
3567 
3568 finish:
3569     return eStatus;
3570 }
3571 
3572 //*-----------------------------------------------------------------------------
3573 //| Purpose: Setup Buffer surface State
3574 //| Returns: Result of the operation
3575 //*-----------------------------------------------------------------------------
HalCm_SetupBufferSurfaceState(PCM_HAL_STATE state,PCM_HAL_KERNEL_ARG_PARAM argParam,PCM_HAL_INDEX_PARAM indexParam,int32_t bindingTable,int16_t globalSurface,uint32_t threadIndex,uint8_t * buffer)3576 MOS_STATUS HalCm_SetupBufferSurfaceState(
3577     PCM_HAL_STATE               state,
3578     PCM_HAL_KERNEL_ARG_PARAM    argParam,
3579     PCM_HAL_INDEX_PARAM         indexParam,
3580     int32_t                     bindingTable,
3581     int16_t                     globalSurface,
3582     uint32_t                    threadIndex,
3583     uint8_t                     *buffer)
3584 {
3585     MOS_STATUS                  eStatus;
3586     RENDERHAL_SURFACE               surface;
3587     PMOS_SURFACE                    mosSurface;
3588     RENDERHAL_SURFACE_STATE_PARAMS  surfaceParam;
3589     PRENDERHAL_INTERFACE            renderHal;
3590     PRENDERHAL_SURFACE_STATE_ENTRY  surfaceEntry;
3591     uint8_t                     *src;
3592     uint8_t                     *dst;
3593     uint32_t                    index;
3594     uint32_t                    btIndex;
3595     uint16_t                    memObjCtl;
3596     uint32_t                    offsetSrc;
3597     PRENDERHAL_STATE_HEAP       stateHeap;
3598     CM_SURFACE_BTI_INFO         surfBTIInfo;
3599 
3600     eStatus              = MOS_STATUS_UNKNOWN;
3601     renderHal      = state->renderHal;
3602     //GT-PIN
3603     PCM_HAL_TASK_PARAM     taskParam = state->taskParam;
3604 
3605     // Get the Index to Buffer array from the kernel data
3606     CM_ASSERT(argParam->unitSize == sizeof(index));
3607 
3608     //Init surfBTIInfo
3609     state->cmHalInterface->GetHwSurfaceBTIInfo(&surfBTIInfo);
3610 
3611     src      = argParam->firstValue + (threadIndex * argParam->unitSize);
3612     index    = *((uint32_t*)src) & CM_SURFACE_MASK;
3613     if (index == CM_NULL_SURFACE)
3614     {
3615         if (buffer)
3616         {
3617             dst = buffer + argParam->payloadOffset;
3618             *((uint32_t*)dst) = CM_NULL_SURFACE_BINDING_INDEX;
3619         }
3620 
3621         eStatus = MOS_STATUS_SUCCESS;
3622         goto finish;
3623     }
3624 
3625     memObjCtl = state->bufferTable[index].memObjCtl;
3626     if (!memObjCtl)
3627     {
3628         memObjCtl = CM_DEFAULT_CACHE_TYPE;
3629     }
3630 
3631     // check to see if index is valid
3632     if (index >= state->cmDeviceParam.maxBufferTableSize ||
3633         (state->bufferTable[index].size == 0))
3634     {
3635         eStatus = MOS_STATUS_INVALID_PARAMETER;
3636         CM_ASSERTMESSAGE(
3637             "Invalid Buffer surface array index '%d'", index);
3638         goto finish;
3639     }
3640 
3641     // Check to see if buffer is already assigned
3642     btIndex = state->btiBufferIndexTable[index].BTI.regularSurfIndex;
3643     if (btIndex == ( unsigned char )CM_INVALID_INDEX || argParam->aliasCreated == true)
3644     {
3645         if (globalSurface < 0)
3646         {
3647             btIndex = HalCm_GetFreeBindingIndex(state, indexParam, 1);
3648         }
3649         else
3650         {
3651             btIndex = globalSurface + surfBTIInfo.reservedSurfaceStart; //CM_BINDING_START_INDEX_OF_GLOBAL_SURFACE(state);
3652             if ( btIndex >=  (surfBTIInfo.reservedSurfaceStart + CM_MAX_GLOBAL_SURFACE_NUMBER) ) {
3653                 eStatus = MOS_STATUS_INVALID_PARAMETER;
3654                 CM_ASSERTMESSAGE("Exceeded Max Global Surfaces '%d'", btIndex);
3655                 goto finish;
3656             }
3657         }
3658         // Get Details of Buffer surface and fill the surface
3659         CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_GetSurfaceAndRegister(state, &surface, CM_ARGUMENT_SURFACEBUFFER, index, 0));
3660 
3661         MOS_ZeroMemory(&surfaceParam, sizeof(surfaceParam));
3662 
3663         // override the buffer offset and size if alias is used
3664         mosSurface = &(surface.OsSurface);
3665         if (state->bufferTable[index].surfaceStateEntry[argParam->aliasIndex / state->surfaceArraySize].surfaceStateSize)
3666         {
3667             mosSurface->dwWidth = state->bufferTable[index].surfaceStateEntry[argParam->aliasIndex / state->surfaceArraySize].surfaceStateSize;
3668             mosSurface->dwOffset = state->bufferTable[index].surfaceStateEntry[argParam->aliasIndex / state->surfaceArraySize].surfaceStateOffset;
3669             surface.rcSrc.right = mosSurface->dwWidth;
3670             surface.rcDst.right = mosSurface->dwWidth;
3671         }
3672         // override the mocs value if it is set
3673         if (state->bufferTable[index].surfaceStateEntry[argParam->aliasIndex / state->surfaceArraySize].surfaceStateMOCS)
3674         {
3675             memObjCtl = state->bufferTable[index].surfaceStateEntry[argParam->aliasIndex / state->surfaceArraySize].surfaceStateMOCS;
3676         }
3677 
3678         //Cache configurations
3679         state->cmHalInterface->HwSetSurfaceMemoryObjectControl(memObjCtl, &surfaceParam);
3680 
3681         // Set the isOutput by default
3682         surfaceParam.isOutput = true;
3683 
3684         // Setup Buffer surface
3685         CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pfnSetupBufferSurfaceState(
3686                 renderHal,
3687                 &surface,
3688                 &surfaceParam,
3689                 &surfaceEntry));
3690 
3691         // Bind the surface State
3692         CM_ASSERT(((int32_t)btIndex) < renderHal->StateHeapSettings.iSurfacesPerBT + surfBTIInfo.normalSurfaceStart);
3693         CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pfnBindSurfaceState(
3694                renderHal,
3695                bindingTable,
3696                btIndex,
3697                surfaceEntry));
3698 
3699         if ((taskParam->surfEntryInfoArrays.kernelNum != 0) &&
3700             (taskParam->surfEntryInfoArrays.surfEntryInfosArray != nullptr))
3701         {
3702             //GT-Pin
3703            uint32_t dummy = 0;
3704            CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_GetSurfaceDetails(
3705                    state,
3706                    indexParam,
3707                    btIndex,
3708                    surface.OsSurface,
3709                    globalSurface,
3710                    nullptr,
3711                    dummy,
3712                    surfaceParam,
3713                    CM_ARGUMENT_SURFACEBUFFER));
3714         }
3715 
3716         // Update index to table
3717         state->btiBufferIndexTable[ index ].BTI.regularSurfIndex = btIndex;
3718         state->btiBufferIndexTable[ index ].nPlaneNumber = 1;
3719 
3720         stateHeap = renderHal->pStateHeap;
3721         offsetSrc = ( stateHeap->iCurSshBufferIndex * stateHeap->dwSshIntanceSize ) +   // Points to the Base of Current SSH Buffer Instance
3722                             ( stateHeap->iBindingTableOffset ) +                       // Moves the pointer to Base of Array of Binding Tables
3723                             ( bindingTable * stateHeap->iBindingTableSize ) +         // Moves the pointer to a Particular Binding Table
3724                             ( btIndex * sizeof( uint32_t ) );                             // Move the pointer to correct entry
3725 
3726         state->btiBufferIndexTable[ index ].BTITableEntry.regularBtiEntryPosition = stateHeap->pSshBuffer + offsetSrc;
3727     }
3728     else
3729     {
3730         stateHeap = renderHal->pStateHeap;
3731 
3732         // Get Offset to Current Binding Table
3733         uint32_t offsetCurrentBTStart = ( stateHeap->iCurSshBufferIndex * stateHeap->dwSshIntanceSize ) +     // Points to the Base of Current SSH Buffer Instance
3734                                        ( stateHeap->iBindingTableOffset ) +                             // Moves the pointer to Base of Array of Binding Tables
3735                                        ( bindingTable * stateHeap->iBindingTableSize );                // Moves the pointer to a Particular Binding Table
3736 
3737         uint32_t *currentBTStart = ( uint32_t *)( stateHeap->pSshBuffer + offsetCurrentBTStart );
3738 
3739         int nEntryIndex = (int) ((uint32_t*)( state->btiBufferIndexTable[ index ].BTITableEntry.regularBtiEntryPosition ) - currentBTStart);
3740 
3741         if ( ( nEntryIndex < 0 ) || ( nEntryIndex >= renderHal->StateHeapSettings.iSurfacesPerBT ) )
3742         {
3743             uint32_t surfaceEntries = state->btiBufferIndexTable[ index ].nPlaneNumber;
3744             if ( globalSurface < 0 )
3745             {
3746                 btIndex = HalCm_GetFreeBindingIndex( state, indexParam, surfaceEntries );
3747             }
3748             else
3749             {
3750                 btIndex = globalSurface + surfBTIInfo.reservedSurfaceStart;
3751                 if ( btIndex >= (surfBTIInfo.reservedSurfaceStart + CM_MAX_GLOBAL_SURFACE_NUMBER ) )
3752                 {
3753                     eStatus = MOS_STATUS_INVALID_PARAMETER;
3754                     CM_ASSERTMESSAGE( "Exceeded Max Global Surfaces '%d'", btIndex );
3755                     goto finish;
3756                 }
3757             }
3758 
3759             // Bind the surface State
3760             CM_ASSERT( ( ( int32_t )btIndex ) < renderHal->StateHeapSettings.iSurfacesPerBT + surfBTIInfo.normalSurfaceStart);
3761 
3762             // Get Offset to Current Binding Table
3763             uint32_t offsetDst = offsetCurrentBTStart + ( btIndex * sizeof( uint32_t ) );                             // Move the pointer to correct entry
3764 
3765             uint32_t *bindingTableEntry = ( uint32_t *)( stateHeap->pSshBuffer + offsetDst );
3766             MOS_SecureMemcpy( bindingTableEntry, sizeof( uint32_t ) * surfaceEntries, state->btiBufferIndexTable[ index ].BTITableEntry.regularBtiEntryPosition, sizeof( uint32_t ) * surfaceEntries );
3767 
3768             // Update index to table
3769             state->btiBufferIndexTable[ index ].BTI.regularSurfIndex = btIndex;
3770             state->btiBufferIndexTable[ index ].BTITableEntry.regularBtiEntryPosition = bindingTableEntry;
3771         }
3772     }
3773 
3774     // Update the Batch Buffer
3775     if (buffer)
3776     {
3777         dst = buffer + argParam->payloadOffset;
3778         *((uint32_t*)dst) = btIndex;
3779     }
3780     eStatus = MOS_STATUS_SUCCESS;
3781 
3782 finish:
3783     return eStatus;
3784 }
3785 
3786 //*-----------------------------------------------------------------------------
3787 //| Purpose: Setup 3D surface State
3788 //| Returns: Result of the operation
3789 //*-----------------------------------------------------------------------------
HalCm_Setup3DSurfaceState(PCM_HAL_STATE state,PCM_HAL_KERNEL_ARG_PARAM argParam,PCM_HAL_INDEX_PARAM indexParam,int32_t bindingTable,uint32_t threadIndex,uint8_t * buffer)3790 MOS_STATUS HalCm_Setup3DSurfaceState(
3791     PCM_HAL_STATE               state,
3792     PCM_HAL_KERNEL_ARG_PARAM    argParam,
3793     PCM_HAL_INDEX_PARAM         indexParam,
3794     int32_t                     bindingTable,
3795     uint32_t                    threadIndex,
3796     uint8_t                     *buffer)
3797 {
3798     MOS_STATUS                  eStatus;
3799     PRENDERHAL_INTERFACE            renderHal;
3800     RENDERHAL_SURFACE               surface;
3801     RENDERHAL_SURFACE_STATE_PARAMS  surfaceParam;
3802     PRENDERHAL_SURFACE_STATE_ENTRY  surfaceEntries[MHW_MAX_SURFACE_PLANES];
3803     RENDERHAL_GET_SURFACE_INFO      info;
3804     uint8_t                     *src;
3805     uint8_t                     *dst;
3806     int32_t                     nSurfaceEntries;
3807     uint32_t                    index;
3808     uint32_t                    btIndex;
3809     uint16_t                    memObjCtl;
3810     uint32_t                    i;
3811     uint32_t                    offsetSrc;
3812     PRENDERHAL_STATE_HEAP       stateHeap;
3813     CM_SURFACE_BTI_INFO         surfBTIInfo;
3814 
3815     eStatus              = MOS_STATUS_UNKNOWN;
3816     renderHal  = state->renderHal;
3817     //GT-PIN
3818     PCM_HAL_TASK_PARAM     taskParam = state->taskParam;
3819 
3820     state->cmHalInterface->GetHwSurfaceBTIInfo(&surfBTIInfo);
3821 
3822     // Get the Index to 3dsurface array from the kernel data
3823     CM_ASSERT(argParam->unitSize == sizeof(index));
3824     src      = argParam->firstValue + (threadIndex * argParam->unitSize);
3825     index    = *((uint32_t*)src) & CM_SURFACE_MASK;
3826     if (index == CM_NULL_SURFACE)
3827     {
3828         if (buffer)
3829         {
3830             dst = buffer + argParam->payloadOffset;
3831             *((uint32_t*)dst) = CM_NULL_SURFACE_BINDING_INDEX;
3832         }
3833 
3834         eStatus = MOS_STATUS_SUCCESS;
3835         goto finish;
3836     }
3837 
3838     memObjCtl = state->surf3DTable[index].memObjCtl;
3839     if (!memObjCtl)
3840     {
3841         memObjCtl = CM_DEFAULT_CACHE_TYPE;
3842     }
3843 
3844     // check to see if the data present for the 3d surface in the array
3845     if ((index >= state->cmDeviceParam.max3DSurfaceTableSize)            ||
3846         Mos_ResourceIsNull(&state->surf3DTable[index].osResource))
3847     {
3848         eStatus = MOS_STATUS_INVALID_PARAMETER;
3849         CM_ASSERTMESSAGE(
3850             "Invalid 2D surface array index '%d'", index);
3851         goto finish;
3852     }
3853 
3854     // Check to see if surface is already assigned
3855     btIndex = state->bti3DIndexTable[index].BTI.regularSurfIndex;
3856     if ( btIndex == ( unsigned char )CM_INVALID_INDEX )
3857     {
3858         uint32_t tempPlaneIndex = 0;
3859         nSurfaceEntries = 0;
3860 
3861         // Get Details of 3D surface and fill the surface
3862         CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_GetSurfaceAndRegister(state, &surface, CM_ARGUMENT_SURFACE3D, index, 0));
3863 
3864         // Setup 3D surface
3865         MOS_ZeroMemory(&surfaceParam, sizeof(surfaceParam));
3866         surfaceParam.Type       = renderHal->SurfaceTypeDefault;
3867         surfaceParam.Boundary   = RENDERHAL_SS_BOUNDARY_ORIGINAL;
3868         surfaceParam.isOutput = true;
3869 
3870         //Cache configurations
3871         state->cmHalInterface->HwSetSurfaceMemoryObjectControl(memObjCtl, &surfaceParam);
3872 
3873         CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pfnSetupSurfaceState(
3874                     renderHal,
3875                     &surface,
3876                     &surfaceParam,
3877                     &nSurfaceEntries,
3878                     surfaceEntries,
3879                     nullptr));
3880 
3881         MOS_ZeroMemory(&info, sizeof(RENDERHAL_GET_SURFACE_INFO));
3882 
3883         CM_CHK_MOSSTATUS_GOTOFINISH(RenderHal_GetSurfaceInfo(
3884             state->osInterface,
3885             &info,
3886             &surface.OsSurface));
3887 
3888         btIndex = HalCm_GetFreeBindingIndex(state, indexParam, nSurfaceEntries);
3889         for (i = 0; i < (uint32_t)nSurfaceEntries; i++)
3890         {
3891             *(surfaceEntries[i]->pSurface) = surface.OsSurface;
3892 
3893             // Bind the surface State
3894             CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pfnBindSurfaceState(
3895                         renderHal,
3896                         bindingTable,
3897                         btIndex + i,
3898                         surfaceEntries[i]));
3899 
3900             if ((taskParam->surfEntryInfoArrays.kernelNum != 0) &&
3901                 (taskParam->surfEntryInfoArrays.surfEntryInfosArray != nullptr))
3902             {
3903                 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_GetSurfaceDetails(
3904                         state,
3905                         indexParam,
3906                         btIndex + i,
3907                         surface.OsSurface,
3908                         0,
3909                         surfaceEntries[i],
3910                         tempPlaneIndex,
3911                         surfaceParam,
3912                         CM_ARGUMENT_SURFACE3D));
3913             }
3914         }
3915         // Update index to table
3916         state->bti3DIndexTable[ index ].BTI.regularSurfIndex = btIndex;
3917         state->bti3DIndexTable[ index ].nPlaneNumber = nSurfaceEntries;
3918 
3919         stateHeap = renderHal->pStateHeap;
3920         offsetSrc = ( stateHeap->iCurSshBufferIndex * stateHeap->dwSshIntanceSize ) +  // Points to the Base of Current SSH Buffer Instance
3921                             ( stateHeap->iBindingTableOffset ) +                       // Moves the pointer to Base of Array of Binding Tables
3922                             ( bindingTable * stateHeap->iBindingTableSize ) +         // Moves the pointer to a Particular Binding Table
3923                             ( btIndex * sizeof( uint32_t ) );                             // Move the pointer to correct entry
3924 
3925         state->bti3DIndexTable[ index ].BTITableEntry.regularBtiEntryPosition = stateHeap->pSshBuffer + offsetSrc;
3926     }
3927     else
3928     {
3929         stateHeap = renderHal->pStateHeap;
3930 
3931         // Get Offset to Current Binding Table
3932         uint32_t offsetCurrentBTStart = ( stateHeap->iCurSshBufferIndex * stateHeap->dwSshIntanceSize ) +     // Points to the Base of Current SSH Buffer Instance
3933                                        ( stateHeap->iBindingTableOffset ) +                             // Moves the pointer to Base of Array of Binding Tables
3934                                        ( bindingTable * stateHeap->iBindingTableSize );                // Moves the pointer to a Particular Binding Table
3935 
3936         uint32_t *currentBTStart = ( uint32_t *)( stateHeap->pSshBuffer + offsetCurrentBTStart );
3937 
3938         int nEntryIndex = (int)((uint32_t*)( state->bti3DIndexTable[ index ].BTITableEntry.regularBtiEntryPosition ) - currentBTStart);
3939 
3940         if ( ( nEntryIndex < 0 ) || ( nEntryIndex >= renderHal->StateHeapSettings.iSurfacesPerBT ) )
3941         {
3942             nSurfaceEntries = state->bti3DIndexTable[ index ].nPlaneNumber;
3943             btIndex = HalCm_GetFreeBindingIndex( state, indexParam, nSurfaceEntries );
3944 
3945             // Bind the surface State
3946             CM_ASSERT( ( ( int32_t )btIndex ) < renderHal->StateHeapSettings.iSurfacesPerBT + surfBTIInfo.normalSurfaceStart);
3947 
3948             // Get Offset to Current Binding Table
3949             uint32_t offsetDst = offsetCurrentBTStart + ( btIndex * sizeof( uint32_t ) );                             // Move the pointer to correct entry
3950 
3951             uint32_t *bindingTableEntry = ( uint32_t *)( stateHeap->pSshBuffer + offsetDst );
3952             MOS_SecureMemcpy( bindingTableEntry, sizeof( uint32_t ) * nSurfaceEntries, state->bti3DIndexTable[ index ].BTITableEntry.regularBtiEntryPosition, sizeof( uint32_t ) * nSurfaceEntries );
3953 
3954             // Update index to table
3955             state->bti3DIndexTable[ index ].BTI.regularSurfIndex = btIndex;
3956             state->bti3DIndexTable[ index ].BTITableEntry.regularBtiEntryPosition = bindingTableEntry;
3957         }
3958     }
3959 
3960     // Update the Batch Buffer
3961     if (buffer)
3962     {
3963         dst = buffer + argParam->payloadOffset;
3964         *((uint32_t*)dst) = btIndex;
3965     }
3966 
3967     eStatus = MOS_STATUS_SUCCESS;
3968 
3969 finish:
3970     return eStatus;
3971 }
3972 
3973 /*----------------------------------------------------------------------------
3974 | Purpose   : Set's surface state interlaced settings
3975 | Returns   : dword value
3976 \---------------------------------------------------------------------------*/
HalCm_HwSetSurfaceProperty(PCM_HAL_STATE state,CM_FRAME_TYPE frameType,PRENDERHAL_SURFACE_STATE_PARAMS params)3977 MOS_STATUS HalCm_HwSetSurfaceProperty(
3978     PCM_HAL_STATE                   state,
3979     CM_FRAME_TYPE                   frameType,
3980     PRENDERHAL_SURFACE_STATE_PARAMS params)
3981 {
3982     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
3983 
3984     switch (frameType)
3985     {
3986     case CM_FRAME:
3987         params->bVertStride = 0;
3988         params->bVertStrideOffs = 0;
3989         break;
3990     case CM_TOP_FIELD:
3991         params->bVertStride = 1;
3992         params->bVertStrideOffs = 0;
3993         break;
3994     case CM_BOTTOM_FIELD:
3995         params->bVertStride = 1;
3996         params->bVertStrideOffs = 1;
3997         break;
3998     default:
3999         eStatus = MOS_STATUS_UNKNOWN;
4000     }
4001 
4002     return eStatus;
4003 }
4004 
4005 // A special treatment of NV12 format. Offset of the UV plane in an NV12 surface is adjusted, so
4006 // this plane can be accessed as a separate R8G8 surface in kernels.
UpdateSurfaceAliasPlaneOffset(CM_HAL_SURFACE2D_SURFACE_STATE_PARAM * surfaceStateParam,MOS_SURFACE * mosSurface)4007 static bool UpdateSurfaceAliasPlaneOffset(
4008     CM_HAL_SURFACE2D_SURFACE_STATE_PARAM *surfaceStateParam,
4009     MOS_SURFACE *mosSurface)
4010 {
4011     if (Format_R8G8UN != surfaceStateParam->format
4012         || Format_NV12 != mosSurface->Format)
4013     {
4014         mosSurface->Format
4015                 = static_cast<MOS_FORMAT>(surfaceStateParam->format);
4016         return false;  // No need to update offset.
4017     }
4018     mosSurface->dwOffset = mosSurface->UPlaneOffset.iSurfaceOffset;
4019     mosSurface->Format = Format_R8G8UN;
4020     return false;
4021 }
4022 
4023 //*-----------------------------------------------------------------------------
4024 //| Purpose: Setup 2D surface State
4025 //| Returns: Result of the operation
4026 //*-----------------------------------------------------------------------------
HalCm_Setup2DSurfaceStateBasic(PCM_HAL_STATE state,PCM_HAL_KERNEL_ARG_PARAM argParam,PCM_HAL_INDEX_PARAM indexParam,int32_t bindingTable,uint32_t threadIndex,bool pixelPitch,uint8_t * buffer,bool multipleBinding)4027 MOS_STATUS HalCm_Setup2DSurfaceStateBasic(
4028     PCM_HAL_STATE                      state,
4029     PCM_HAL_KERNEL_ARG_PARAM           argParam,
4030     PCM_HAL_INDEX_PARAM                indexParam,
4031     int32_t                            bindingTable,
4032     uint32_t                           threadIndex,
4033     bool                               pixelPitch,
4034     uint8_t                            *buffer,
4035     bool                               multipleBinding )
4036 {
4037     MOS_STATUS                  eStatus;
4038     RENDERHAL_SURFACE               renderHalSurface;
4039     PMOS_SURFACE                    surface;
4040     RENDERHAL_SURFACE_STATE_PARAMS  surfaceParam;
4041     PRENDERHAL_INTERFACE            renderHal;
4042     PRENDERHAL_SURFACE_STATE_ENTRY  surfaceEntries[ MHW_MAX_SURFACE_PLANES ];
4043     uint8_t                     *src;
4044     uint8_t                     *dst;
4045     int32_t                     nSurfaceEntries = 0;
4046     uint32_t                    index;
4047     uint32_t                    btIndex;
4048     uint16_t                    memObjCtl;
4049     uint32_t                    i;
4050     uint32_t                    tempPlaneIndex = 0;
4051     uint32_t                    offsetSrc;
4052     PRENDERHAL_STATE_HEAP       stateHeap;
4053     PCM_HAL_SURFACE2D_SURFACE_STATE_PARAM surfStateParam = nullptr;
4054     UNUSED(multipleBinding);
4055 
4056     eStatus = MOS_STATUS_UNKNOWN;
4057     renderHal = state->renderHal;
4058     MOS_ZeroMemory(&renderHalSurface, sizeof(renderHalSurface));
4059     surface   = &renderHalSurface.OsSurface;
4060     nSurfaceEntries = 0;
4061 
4062     //GT-PIN
4063     PCM_HAL_TASK_PARAM     taskParam = state->taskParam;
4064 
4065     // Get the Index to 2dsurface array from the kernel data
4066     CM_ASSERT( argParam->unitSize == sizeof( index ) );
4067     src = argParam->firstValue + ( threadIndex * argParam->unitSize );
4068     index = *( ( uint32_t *)src ) & CM_SURFACE_MASK;
4069     if ( index == CM_NULL_SURFACE )
4070     {
4071         if ( buffer )
4072         {
4073             dst = buffer + argParam->payloadOffset;
4074             *( ( uint32_t *)dst ) = CM_NULL_SURFACE_BINDING_INDEX;
4075         }
4076 
4077         eStatus = MOS_STATUS_SUCCESS;
4078         goto finish;
4079     }
4080 
4081     memObjCtl = state->umdSurf2DTable[index].memObjCtl;
4082     if ( !memObjCtl )
4083     {
4084         memObjCtl = CM_DEFAULT_CACHE_TYPE;
4085     }
4086 
4087     // check to see if the data present for the 2d surface in the array
4088     if ( index >= state->cmDeviceParam.max2DSurfaceTableSize ||
4089          Mos_ResourceIsNull( &state->umdSurf2DTable[ index ].osResource ) )
4090     {
4091         eStatus = MOS_STATUS_INVALID_PARAMETER;
4092         CM_ASSERTMESSAGE(
4093             "Invalid 2D surface array index '%d'", index );
4094         goto finish;
4095     }
4096 
4097     // Check to see if surface is already assigned
4098     unsigned char nBTIRegularSurf, nBTISamplerSurf;
4099     nBTIRegularSurf = state->bti2DIndexTable[ index ].BTI.regularSurfIndex;
4100     nBTISamplerSurf = state->bti2DIndexTable[ index ].BTI.samplerSurfIndex;
4101 
4102     if (((!pixelPitch && (nBTIRegularSurf != (unsigned char)CM_INVALID_INDEX)) || (pixelPitch && (nBTISamplerSurf != (unsigned char)CM_INVALID_INDEX))) && argParam->aliasCreated == false )
4103     {
4104         if ( pixelPitch )
4105         {
4106             btIndex = nBTISamplerSurf;
4107         }
4108         else
4109         {
4110             btIndex = nBTIRegularSurf;
4111         }
4112 
4113         stateHeap = renderHal->pStateHeap;
4114 
4115         // Get Offset to Current Binding Table
4116         uint32_t offsetCurrentBTStart = ( stateHeap->iCurSshBufferIndex * stateHeap->dwSshIntanceSize ) +     // Points to the Base of Current SSH Buffer Instance
4117                                        ( stateHeap->iBindingTableOffset ) +                             // Moves the pointer to Base of Array of Binding Tables
4118                                        ( bindingTable * stateHeap->iBindingTableSize );                // Moves the pointer to a Particular Binding Table
4119 
4120         uint32_t *currentBTStart = ( uint32_t *)( stateHeap->pSshBuffer + offsetCurrentBTStart );
4121 
4122         int nEntryIndex = 0;
4123 
4124         if ( pixelPitch )
4125         {
4126             nEntryIndex = (int)((uint32_t*)( state->bti2DIndexTable[ index ].BTITableEntry.samplerBtiEntryPosition ) - currentBTStart);
4127         }
4128         else
4129         {
4130             nEntryIndex = (int)((uint32_t*)( state->bti2DIndexTable[ index ].BTITableEntry.regularBtiEntryPosition ) - currentBTStart);
4131         }
4132 
4133         if ( ( nEntryIndex < 0 ) || ( nEntryIndex >= renderHal->StateHeapSettings.iSurfacesPerBT ) )
4134         {
4135             nSurfaceEntries = state->bti2DIndexTable[ index ].nPlaneNumber;
4136 
4137             btIndex = HalCm_GetFreeBindingIndex( state, indexParam, nSurfaceEntries );
4138 
4139             // Get Offset to Current Binding Table
4140             uint32_t offsetDst = offsetCurrentBTStart + ( btIndex * sizeof( uint32_t ) );            // Move the pointer to correct entry
4141 
4142             uint32_t *bindingTableEntry = ( uint32_t *)( stateHeap->pSshBuffer + offsetDst );
4143 
4144             if ( pixelPitch )
4145             {
4146                 MOS_SecureMemcpy( bindingTableEntry, sizeof( uint32_t ) * nSurfaceEntries, state->bti2DIndexTable[ index ].BTITableEntry.samplerBtiEntryPosition, sizeof( uint32_t ) * nSurfaceEntries );
4147             }
4148             else
4149             {
4150                 MOS_SecureMemcpy( bindingTableEntry, sizeof( uint32_t ) * nSurfaceEntries, state->bti2DIndexTable[ index ].BTITableEntry.regularBtiEntryPosition, sizeof( uint32_t ) * nSurfaceEntries );
4151             }
4152 
4153             // update index to table
4154             if ( pixelPitch )
4155             {
4156                 state->bti2DIndexTable[ index ].BTI.samplerSurfIndex = btIndex;
4157                 state->bti2DIndexTable[ index ].BTITableEntry.samplerBtiEntryPosition = bindingTableEntry;
4158             }
4159             else
4160             {
4161                 state->bti2DIndexTable[ index ].BTI.regularSurfIndex = btIndex;
4162                 state->bti2DIndexTable[ index ].BTITableEntry.regularBtiEntryPosition = bindingTableEntry;
4163             }
4164         }
4165 
4166         // Update the Batch Buffer
4167         if ( buffer )
4168         {
4169             dst = buffer + argParam->payloadOffset;
4170             *( ( uint32_t *)dst ) = btIndex;
4171         }
4172 
4173         eStatus = MOS_STATUS_SUCCESS;
4174         goto finish;
4175     }
4176 
4177     CM_CHK_MOSSTATUS_GOTOFINISH( HalCm_GetSurfaceAndRegister( state, &renderHalSurface, CM_ARGUMENT_SURFACE2D, index, pixelPitch ) );
4178 
4179     // Setup 2D surface
4180     MOS_ZeroMemory(&surfaceParam, sizeof(surfaceParam));
4181     surfaceParam.Type       = renderHal->SurfaceTypeDefault;
4182     surfaceParam.Boundary   = RENDERHAL_SS_BOUNDARY_ORIGINAL;
4183     surfaceParam.bVertStride = 0;
4184     surfaceParam.bVertStrideOffs = 0;
4185     if (!pixelPitch) {
4186         surfaceParam.bWidthInDword_UV = true;
4187         surfaceParam.bWidthInDword_Y = true;
4188     }
4189 
4190     surfaceParam.isOutput = isRenderTarget(state, index);
4191     surfStateParam = &(state->umdSurf2DTable[index].surfaceStateParam[argParam->aliasIndex / state->surfaceArraySize]);
4192     if (surfStateParam->width)
4193     {
4194         surface->dwWidth = surfStateParam->width;
4195     }
4196     if (surfStateParam->height)
4197     {
4198         surface->dwHeight = surfStateParam->height;
4199     }
4200     if (surfStateParam->depth)
4201     {
4202         surface->dwDepth = surfStateParam->depth;
4203     }
4204     if (surfStateParam->pitch)
4205     {
4206         surface->dwPitch= surfStateParam->pitch;
4207     }
4208     if (surfStateParam->format)
4209     {
4210         UpdateSurfaceAliasPlaneOffset(surfStateParam, surface);
4211     }
4212     if (surfStateParam->surfaceXOffset)
4213     {
4214         surface->YPlaneOffset.iXOffset = surfStateParam->surfaceXOffset;
4215         if (surface->Format == Format_NV12)
4216         {
4217             surface->UPlaneOffset.iXOffset += surfStateParam->surfaceXOffset;
4218         }
4219     }
4220     if (surfStateParam->surfaceYOffset)
4221     {
4222         surface->YPlaneOffset.iYOffset = surfStateParam->surfaceYOffset;
4223         if (surface->Format == Format_NV12)
4224         {
4225             surface->UPlaneOffset.iYOffset += surfStateParam->surfaceYOffset/2;
4226         }
4227     }
4228     if (surfStateParam->memoryObjectControl)
4229     {
4230         memObjCtl = surfStateParam->memoryObjectControl;
4231     }
4232 
4233     if(pixelPitch)
4234         renderHalSurface.Rotation = state->umdSurf2DTable[index].rotationFlag;
4235 
4236     //Cache configurations
4237     state->cmHalInterface->HwSetSurfaceMemoryObjectControl(memObjCtl, &surfaceParam);
4238 
4239     // interlace setting
4240     HalCm_HwSetSurfaceProperty(state,
4241         state->umdSurf2DTable[index].frameType,
4242         &surfaceParam);
4243 
4244     CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pfnSetupSurfaceState(
4245                   renderHal,
4246                   &renderHalSurface,
4247                   &surfaceParam,
4248                   &nSurfaceEntries,
4249                   surfaceEntries,
4250                   nullptr));
4251 
4252     nSurfaceEntries = MOS_MIN( nSurfaceEntries, MHW_MAX_SURFACE_PLANES );
4253 
4254     btIndex = HalCm_GetFreeBindingIndex(state, indexParam, nSurfaceEntries);
4255     for (i = 0; i < (uint32_t)nSurfaceEntries; i++)
4256     {
4257         // Bind the surface State
4258         CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pfnBindSurfaceState(
4259                         renderHal,
4260                         bindingTable,
4261                         btIndex + i,
4262                         surfaceEntries[i]));
4263         if ((taskParam->surfEntryInfoArrays.kernelNum !=0) &&
4264             (taskParam->surfEntryInfoArrays.surfEntryInfosArray != nullptr))
4265         {
4266             //GT-Pin
4267             CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_GetSurfaceDetails(
4268                     state,
4269                     indexParam,
4270                     btIndex + i,
4271                     *surface,
4272                     0,
4273                     surfaceEntries[i],
4274                     tempPlaneIndex,
4275                     surfaceParam,
4276                     CM_ARGUMENT_SURFACE2D));
4277         }
4278         surfaceEntries[i]->pSurface->dwWidth = state->umdSurf2DTable[index].width;
4279         surfaceEntries[i]->pSurface->dwHeight = state->umdSurf2DTable[index].height;
4280     }
4281 
4282     // only update the reuse table for non-aliased surface
4283     if ( argParam->aliasCreated == false )
4284     {
4285         state->bti2DIndexTable[ index ].nPlaneNumber = nSurfaceEntries;
4286         // Get Offset to Current Binding Table
4287         stateHeap = renderHal->pStateHeap;
4288         offsetSrc = ( stateHeap->iCurSshBufferIndex * stateHeap->dwSshIntanceSize ) +     // Points to the Base of Current SSH Buffer Instance
4289             ( stateHeap->iBindingTableOffset ) +                       // Moves the pointer to Base of Array of Binding Tables
4290             ( bindingTable * stateHeap->iBindingTableSize ) +         // Moves the pointer to a Particular Binding Table
4291             ( btIndex * sizeof( uint32_t ) );                             // Move the pointer to correct entry
4292 
4293         if ( pixelPitch )
4294         {
4295             state->bti2DIndexTable[ index ].BTI.samplerSurfIndex = btIndex;
4296             state->bti2DIndexTable[ index ].BTITableEntry.samplerBtiEntryPosition = stateHeap->pSshBuffer + offsetSrc;
4297         }
4298         else
4299         {
4300             state->bti2DIndexTable[ index ].BTI.regularSurfIndex = btIndex;
4301             state->bti2DIndexTable[ index ].BTITableEntry.regularBtiEntryPosition = stateHeap->pSshBuffer + offsetSrc;
4302         }
4303     }
4304 
4305     // Update the Batch Buffer
4306     if (buffer)
4307     {
4308         dst = buffer + argParam->payloadOffset;
4309         *((uint32_t*)dst) = btIndex;
4310     }
4311 
4312     // reset surface height and width
4313     surface->dwWidth = state->umdSurf2DTable[index].width;
4314     surface->dwHeight = state->umdSurf2DTable[index].height;
4315 
4316     eStatus = MOS_STATUS_SUCCESS;
4317 
4318 finish:
4319     return eStatus;
4320 }
4321 
4322 
HalCm_Setup2DSurfaceState(PCM_HAL_STATE state,PCM_HAL_KERNEL_ARG_PARAM argParam,PCM_HAL_INDEX_PARAM indexParam,int32_t bindingTable,uint32_t threadIndex,uint8_t * buffer)4323 MOS_STATUS HalCm_Setup2DSurfaceState(
4324     PCM_HAL_STATE              state,
4325     PCM_HAL_KERNEL_ARG_PARAM   argParam,
4326     PCM_HAL_INDEX_PARAM        indexParam,
4327     int32_t                    bindingTable,
4328     uint32_t                   threadIndex,
4329     uint8_t                    *buffer)
4330 {
4331     MOS_STATUS                 eStatus;
4332 
4333     if (state->cmHalInterface->GetDecompressFlag())
4334     {
4335         state->pfnDecompressSurface(state, argParam, threadIndex);
4336     }
4337 
4338     //Binding surface based at the unit of dword
4339     CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_Setup2DSurfaceStateBasic(
4340                     state, argParam, indexParam, bindingTable, threadIndex, false, buffer, false));
4341     eStatus = MOS_STATUS_SUCCESS;
4342 
4343 finish:
4344     return eStatus;
4345 }
4346 
HalCm_Setup2DSurfaceSamplerState(PCM_HAL_STATE state,PCM_HAL_KERNEL_ARG_PARAM argParam,PCM_HAL_INDEX_PARAM indexParam,int32_t bindingTable,uint32_t threadIndex,uint8_t * buffer)4347 MOS_STATUS HalCm_Setup2DSurfaceSamplerState(
4348     PCM_HAL_STATE              state,
4349     PCM_HAL_KERNEL_ARG_PARAM   argParam,
4350     PCM_HAL_INDEX_PARAM        indexParam,
4351     int32_t                    bindingTable,
4352     uint32_t                   threadIndex,
4353     uint8_t                    *buffer)
4354 {
4355     MOS_STATUS                 eStatus;
4356 
4357     //Binding surface based at the unit of dword
4358     CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_Setup2DSurfaceStateBasic(
4359         state, argParam, indexParam, bindingTable, threadIndex, true, buffer, false));
4360     eStatus = MOS_STATUS_SUCCESS;
4361 
4362 finish:
4363     return eStatus;
4364 }
4365 
4366 //*-----------------------------------------------------------------------------
4367 //| Purpose: Setup 2D surface State
4368 //| Returns: Result of the operation
4369 //*-----------------------------------------------------------------------------
HalCm_Setup2DSurfaceUPStateBasic(PCM_HAL_STATE state,PCM_HAL_KERNEL_ARG_PARAM argParam,PCM_HAL_INDEX_PARAM indexParam,int32_t bindingTable,uint32_t threadIndex,uint8_t * buffer,bool pixelPitch)4370 MOS_STATUS HalCm_Setup2DSurfaceUPStateBasic(
4371     PCM_HAL_STATE               state,
4372     PCM_HAL_KERNEL_ARG_PARAM    argParam,
4373     PCM_HAL_INDEX_PARAM         indexParam,
4374     int32_t                     bindingTable,
4375     uint32_t                    threadIndex,
4376     uint8_t                     *buffer,
4377     bool                        pixelPitch)
4378 {
4379     MOS_STATUS                  eStatus;
4380     RENDERHAL_SURFACE               surface;
4381     RENDERHAL_SURFACE_STATE_PARAMS  surfaceParam;
4382     PRENDERHAL_INTERFACE            renderHal;
4383     PRENDERHAL_SURFACE_STATE_ENTRY  surfaceEntries[MHW_MAX_SURFACE_PLANES];
4384     uint8_t                     *src;
4385     uint8_t                     *dst;
4386     int32_t                     nSurfaceEntries;
4387     uint32_t                    index;
4388     uint32_t                    btIndex;
4389     uint16_t                    memObjCtl;
4390     uint32_t                    i;
4391     uint32_t                    offsetSrc;
4392     PRENDERHAL_STATE_HEAP       stateHeap;
4393 
4394     eStatus              = MOS_STATUS_UNKNOWN;
4395     renderHal    = state->renderHal;
4396     //GT-PIN
4397     PCM_HAL_TASK_PARAM     taskParam = state->taskParam;
4398 
4399     // Get the Index to sampler array from the kernel data
4400     CM_ASSERT(argParam->unitSize == sizeof(index));
4401     src      = argParam->firstValue + (threadIndex * argParam->unitSize);
4402     index    = *((uint32_t*)src) & CM_SURFACE_MASK;
4403     if (index == CM_NULL_SURFACE)
4404     {
4405         if (buffer)
4406         {
4407             dst = buffer + argParam->payloadOffset;
4408             *((uint32_t*)dst) = CM_NULL_SURFACE_BINDING_INDEX;
4409         }
4410 
4411         eStatus = MOS_STATUS_SUCCESS;
4412         goto finish;
4413     }
4414 
4415     memObjCtl = state->surf2DUPTable[index].memObjCtl;
4416     if (!memObjCtl)
4417     {
4418         memObjCtl = CM_DEFAULT_CACHE_TYPE;
4419     }
4420 
4421     // check to see if the data present for the sampler in the array
4422     if (index >= state->cmDeviceParam.max2DSurfaceUPTableSize ||
4423         (state->surf2DUPTable[index].width == 0))
4424     {
4425         eStatus = MOS_STATUS_INVALID_PARAMETER;
4426         CM_ASSERTMESSAGE(
4427             "Invalid 2D SurfaceUP array index '%d'", index);
4428         goto finish;
4429     }
4430 
4431     // Check to see if surface is already assigned
4432     if ( pixelPitch )
4433     {
4434         btIndex = state->bti2DUPIndexTable[ index ].BTI.samplerSurfIndex;
4435     }
4436     else
4437     {
4438         btIndex = state->bti2DUPIndexTable[ index ].BTI.regularSurfIndex;
4439     }
4440 
4441     if ( btIndex == ( unsigned char )CM_INVALID_INDEX )
4442     {
4443         uint32_t tempPlaneIndex = 0;
4444 
4445         // Get Details of 2DUP surface and fill the surface
4446         CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_GetSurfaceAndRegister(state, &surface, CM_ARGUMENT_SURFACE2D_UP, index, pixelPitch));
4447 
4448         // Setup 2D surface
4449         MOS_ZeroMemory(&surfaceParam, sizeof(surfaceParam));
4450         surfaceParam.Type       = renderHal->SurfaceTypeDefault;
4451         surfaceParam.Boundary   = RENDERHAL_SS_BOUNDARY_ORIGINAL;
4452 
4453         if (!pixelPitch) {
4454             surfaceParam.bWidthInDword_UV = true;
4455             surfaceParam.bWidthInDword_Y = true;
4456         }
4457 
4458         surfaceParam.isOutput = true;
4459 
4460         //Cache configurations
4461         state->cmHalInterface->HwSetSurfaceMemoryObjectControl(memObjCtl, &surfaceParam);
4462 
4463         // interlace setting
4464         HalCm_HwSetSurfaceProperty(state,
4465             state->umdSurf2DTable[index].frameType,
4466             &surfaceParam);
4467 
4468         CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pfnSetupSurfaceState(
4469                     renderHal,
4470                     &surface,
4471                     &surfaceParam,
4472                     &nSurfaceEntries,
4473                     surfaceEntries,
4474                     nullptr));
4475 
4476         //GT-PIN
4477         btIndex = HalCm_GetFreeBindingIndex(state, indexParam, nSurfaceEntries);
4478         for (i = 0; i < (uint32_t)nSurfaceEntries; i++)
4479         {
4480             // Bind the surface State
4481             CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pfnBindSurfaceState(
4482                         renderHal,
4483                         bindingTable,
4484                         btIndex + i,
4485                         surfaceEntries[i]));
4486             //GT-Pin
4487             if ((taskParam->surfEntryInfoArrays.kernelNum != 0) &&
4488                 (taskParam->surfEntryInfoArrays.surfEntryInfosArray != nullptr))
4489             {
4490                 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_GetSurfaceDetails(
4491                          state,
4492                          indexParam,
4493                          btIndex + i,
4494                          surface.OsSurface,
4495                          0,
4496                          surfaceEntries[i],
4497                          tempPlaneIndex,
4498                          surfaceParam,
4499                          CM_ARGUMENT_SURFACE2D_UP));
4500             }
4501         }
4502         state->bti2DUPIndexTable[ index ].nPlaneNumber = nSurfaceEntries;
4503 
4504         stateHeap = renderHal->pStateHeap;
4505         offsetSrc = ( stateHeap->iCurSshBufferIndex * stateHeap->dwSshIntanceSize ) +     // Points to the Base of Current SSH Buffer Instance
4506                             ( stateHeap->iBindingTableOffset ) +                       // Moves the pointer to Base of Array of Binding Tables
4507                             ( bindingTable * stateHeap->iBindingTableSize ) +         // Moves the pointer to a Particular Binding Table
4508                             ( btIndex * sizeof( uint32_t ) );                             // Move the pointer to correct entry
4509 
4510         if ( pixelPitch )
4511         {
4512             state->bti2DUPIndexTable[ index ].BTI.samplerSurfIndex = btIndex;
4513             state->bti2DUPIndexTable[ index ].BTITableEntry.samplerBtiEntryPosition = stateHeap->pSshBuffer + offsetSrc;
4514         }
4515         else
4516         {
4517             state->bti2DUPIndexTable[ index ].BTI.regularSurfIndex = btIndex;
4518             state->bti2DUPIndexTable[ index ].BTITableEntry.regularBtiEntryPosition = stateHeap->pSshBuffer + offsetSrc;
4519         }
4520     }
4521     else
4522     {
4523         stateHeap = renderHal->pStateHeap;
4524 
4525         // Get Offset to Current Binding Table
4526         uint32_t offsetCurrentBTStart = ( stateHeap->iCurSshBufferIndex * stateHeap->dwSshIntanceSize ) +     // Points to the Base of Current SSH Buffer Instance
4527                                        ( stateHeap->iBindingTableOffset ) +                             // Moves the pointer to Base of Array of Binding Tables
4528                                        ( bindingTable * stateHeap->iBindingTableSize );                // Moves the pointer to a Particular Binding Table
4529 
4530         uint32_t *currentBTStart = ( uint32_t *)( stateHeap->pSshBuffer + offsetCurrentBTStart );
4531 
4532         int nEntryIndex = 0;
4533 
4534         if ( pixelPitch )
4535         {
4536             nEntryIndex = (int) ((uint32_t*)( state->bti2DUPIndexTable[ index ].BTITableEntry.samplerBtiEntryPosition ) - currentBTStart);
4537         }
4538         else
4539         {
4540             nEntryIndex = (int) ((uint32_t*)( state->bti2DUPIndexTable[ index ].BTITableEntry.regularBtiEntryPosition ) - currentBTStart);
4541         }
4542 
4543         if ( ( nEntryIndex < 0 ) || ( nEntryIndex >= renderHal->StateHeapSettings.iSurfacesPerBT ) )
4544         {
4545             uint32_t tmpSurfaceEntries = state->bti2DUPIndexTable[ index ].nPlaneNumber;
4546 
4547             btIndex = HalCm_GetFreeBindingIndex( state, indexParam, tmpSurfaceEntries );
4548 
4549             // Get Offset to Current Binding Table
4550             uint32_t offsetDst = offsetCurrentBTStart + ( btIndex * sizeof( uint32_t ) );                             // Move the pointer to correct entry
4551 
4552             uint32_t *bindingTableEntry = ( uint32_t *)( stateHeap->pSshBuffer + offsetDst );
4553             if ( pixelPitch )
4554             {
4555                 MOS_SecureMemcpy( bindingTableEntry, sizeof( uint32_t ) * tmpSurfaceEntries, state->bti2DUPIndexTable[ index ].BTITableEntry.samplerBtiEntryPosition, sizeof( uint32_t ) * tmpSurfaceEntries );
4556             }
4557             else
4558             {
4559                 MOS_SecureMemcpy( bindingTableEntry, sizeof( uint32_t ) * tmpSurfaceEntries, state->bti2DUPIndexTable[ index ].BTITableEntry.regularBtiEntryPosition, sizeof( uint32_t ) * tmpSurfaceEntries );
4560             }
4561 
4562             // update index to table
4563             if ( pixelPitch )
4564             {
4565                 state->bti2DUPIndexTable[ index ].BTI.samplerSurfIndex = btIndex;
4566                 state->bti2DUPIndexTable[ index ].BTITableEntry.samplerBtiEntryPosition = bindingTableEntry;
4567             }
4568             else
4569             {
4570                 state->bti2DUPIndexTable[ index ].BTI.regularSurfIndex = btIndex;
4571                 state->bti2DUPIndexTable[ index ].BTITableEntry.regularBtiEntryPosition = bindingTableEntry;
4572             }
4573         }
4574     }
4575 
4576     // Update the Batch Buffer
4577     if (buffer)
4578     {
4579         dst = buffer + argParam->payloadOffset;
4580         *((uint32_t*)dst) = btIndex;
4581     }
4582 
4583     eStatus = MOS_STATUS_SUCCESS;
4584 
4585 finish:
4586     return eStatus;
4587 }
4588 
HalCm_Setup2DSurfaceUPState(PCM_HAL_STATE state,PCM_HAL_KERNEL_ARG_PARAM argParam,PCM_HAL_INDEX_PARAM indexParam,int32_t bindingTable,uint32_t threadIndex,uint8_t * buffer)4589 MOS_STATUS HalCm_Setup2DSurfaceUPState(
4590     PCM_HAL_STATE               state,
4591     PCM_HAL_KERNEL_ARG_PARAM    argParam,
4592     PCM_HAL_INDEX_PARAM         indexParam,
4593     int32_t                     bindingTable,
4594     uint32_t                    threadIndex,
4595     uint8_t                     *buffer)
4596 {
4597     MOS_STATUS                 eStatus;
4598 
4599     //Binding surface based at the unit of dword
4600     CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_Setup2DSurfaceUPStateBasic(
4601                     state, argParam, indexParam, bindingTable, threadIndex, buffer, false));
4602     eStatus = MOS_STATUS_SUCCESS;
4603 
4604 finish:
4605     return eStatus;
4606 }
4607 
HalCm_Setup2DSurfaceUPSamplerState(PCM_HAL_STATE state,PCM_HAL_KERNEL_ARG_PARAM argParam,PCM_HAL_INDEX_PARAM indexParam,int32_t bindingTable,uint32_t threadIndex,uint8_t * buffer)4608 MOS_STATUS HalCm_Setup2DSurfaceUPSamplerState(
4609     PCM_HAL_STATE               state,
4610     PCM_HAL_KERNEL_ARG_PARAM    argParam,
4611     PCM_HAL_INDEX_PARAM         indexParam,
4612     int32_t                     bindingTable,
4613     uint32_t                    threadIndex,
4614     uint8_t                     *buffer)
4615 {
4616     MOS_STATUS                 eStatus;
4617 
4618     //Binding surface based at the unit of pixel
4619     CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_Setup2DSurfaceUPStateBasic(
4620                     state, argParam, indexParam, bindingTable, threadIndex, buffer, true));
4621     eStatus = MOS_STATUS_SUCCESS;
4622 
4623 finish:
4624     return eStatus;
4625 }
4626 
HalCm_SetupSpecificVmeSurfaceState(PCM_HAL_STATE state,PCM_HAL_INDEX_PARAM indexParam,int32_t bindingTable,uint32_t surfIndex,uint32_t btIndex,uint16_t memObjCtl,uint32_t surfaceStateWidth,uint32_t surfaceStateHeight)4627 MOS_STATUS HalCm_SetupSpecificVmeSurfaceState(
4628     PCM_HAL_STATE                     state,
4629     PCM_HAL_INDEX_PARAM               indexParam,
4630     int32_t                           bindingTable,
4631     uint32_t                          surfIndex,
4632     uint32_t                          btIndex,
4633     uint16_t                          memObjCtl,
4634     uint32_t                          surfaceStateWidth,
4635     uint32_t                          surfaceStateHeight)
4636 {
4637     MOS_STATUS                      eStatus;
4638     RENDERHAL_SURFACE               surface;
4639     int32_t                         nSurfaceEntries = 0;
4640     RENDERHAL_SURFACE_STATE_PARAMS  surfaceParam;
4641     PRENDERHAL_INTERFACE            renderHal;
4642     PRENDERHAL_SURFACE_STATE_ENTRY  surfaceEntries[MHW_MAX_SURFACE_PLANES];
4643     uint32_t                        tempPlaneIndex = 0;
4644     PMOS_SURFACE                    mosSurface = nullptr;
4645 
4646     eStatus               = MOS_STATUS_UNKNOWN;
4647     renderHal     = state->renderHal;
4648     nSurfaceEntries  = 0;
4649 
4650     PCM_HAL_TASK_PARAM taskParam = state->taskParam;
4651 
4652     // Get Details of VME surface and fill the surface
4653     CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_GetSurfaceAndRegister(state, &surface, CM_ARGUMENT_VME_STATE, surfIndex, 0));
4654 
4655     // Setup 2D surface
4656     MOS_ZeroMemory(&surfaceParam, sizeof(surfaceParam));
4657     surfaceParam.Type              = renderHal->SurfaceTypeAdvanced;
4658     surfaceParam.isOutput     = true;
4659     surfaceParam.bWidthInDword_Y   = false;
4660     surfaceParam.bWidthInDword_UV  = false;
4661     surfaceParam.Boundary          = RENDERHAL_SS_BOUNDARY_ORIGINAL;
4662     surfaceParam.bVmeUse           = true;
4663 
4664     // Overwrite the width and height if specified
4665     if (surfaceStateWidth && surfaceStateHeight)
4666     {
4667         mosSurface = &surface.OsSurface;
4668         if (surfaceStateWidth > mosSurface->dwWidth || surfaceStateHeight > mosSurface->dwHeight)
4669         {
4670             CM_ASSERTMESSAGE("Error: VME surface state's resolution is larger than the original surface.");
4671             eStatus = MOS_STATUS_INVALID_PARAMETER;
4672             goto finish;
4673         }
4674         mosSurface->dwWidth = surfaceStateWidth;
4675         mosSurface->dwHeight = surfaceStateHeight;
4676     }
4677 
4678     //Cache configurations
4679     state->cmHalInterface->HwSetSurfaceMemoryObjectControl(memObjCtl, &surfaceParam);
4680     CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pfnSetupSurfaceState(
4681                         renderHal,
4682                         &surface,
4683                         &surfaceParam,
4684                         &nSurfaceEntries,
4685                         surfaceEntries,
4686                         nullptr));
4687 
4688     CM_ASSERT(nSurfaceEntries == 1);
4689 
4690     {
4691         // Bind the surface State
4692         CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pfnBindSurfaceState(
4693                             renderHal,
4694                             bindingTable,
4695                             btIndex,
4696                             surfaceEntries[0]));
4697 
4698         if ((taskParam->surfEntryInfoArrays.kernelNum != 0) &&
4699             (taskParam->surfEntryInfoArrays.surfEntryInfosArray != nullptr))
4700         {
4701             CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_GetSurfaceDetails(
4702                     state,
4703                     indexParam,
4704                     btIndex,
4705                     surface.OsSurface,
4706                     0,
4707                     surfaceEntries[0],
4708                     tempPlaneIndex,
4709                     surfaceParam,
4710                     CM_ARGUMENT_SURFACE2D));
4711         }
4712     }
4713     state->bti2DIndexTable[ surfIndex ].BTI.vmeSurfIndex = btIndex;
4714 
4715     eStatus = MOS_STATUS_SUCCESS;
4716 
4717 finish:
4718     return eStatus;
4719 
4720 }
4721 
4722 //*-----------------------------------------------------------------------------
4723 //| Purpose: Setup VME surface State
4724 //| Returns: Result of the operation
4725 //*-----------------------------------------------------------------------------
HalCm_SetupVmeSurfaceState(PCM_HAL_STATE state,PCM_HAL_KERNEL_ARG_PARAM argParam,PCM_HAL_INDEX_PARAM indexParam,int32_t bindingTable,uint32_t threadIndex,uint8_t * buffer)4726 MOS_STATUS HalCm_SetupVmeSurfaceState(
4727     PCM_HAL_STATE               state,
4728     PCM_HAL_KERNEL_ARG_PARAM    argParam,
4729     PCM_HAL_INDEX_PARAM         indexParam,
4730     int32_t                     bindingTable,
4731     uint32_t                    threadIndex,
4732     uint8_t                     *buffer)
4733 {
4734     MOS_STATUS                  eStatus;
4735     PRENDERHAL_INTERFACE        renderHal;
4736     PCM_HAL_VME_ARG_VALUE       vmeSrc;
4737     uint8_t                     *dst;
4738     uint32_t                    index[CM_MAX_VME_BINDING_INDEX_1];
4739     uint16_t                    memObjCtl[CM_MAX_VME_BINDING_INDEX_1];
4740     uint32_t                    fwSurfCount = 0;
4741     uint32_t                    bwSurfCount = 0;
4742     bool                        alreadyBind = true;
4743     uint32_t                    surfPairNum;
4744     uint32_t                    idx;
4745     uint32_t                    curBTIndex;
4746     uint32_t                    btIndex;
4747     uint32_t                    surfaceStateWidth = 0;
4748     uint32_t                    surfaceStateHeight = 0;
4749     uint32_t                    *fPtr = nullptr;
4750     uint32_t                    *bPtr = nullptr;
4751     uint32_t                    *refSurfaces = nullptr;
4752 
4753     eStatus              = MOS_STATUS_UNKNOWN;
4754     renderHal    = state->renderHal;
4755     btIndex        = 0;
4756 
4757     MOS_ZeroMemory(memObjCtl, CM_MAX_VME_BINDING_INDEX_1*sizeof(uint16_t));
4758     MOS_ZeroMemory(index, CM_MAX_VME_BINDING_INDEX_1*sizeof(uint32_t));
4759 
4760     CM_ASSERT(argParam->unitSize <= sizeof(uint32_t)*(CM_MAX_VME_BINDING_INDEX_1 + 2));
4761     CM_ASSERT(threadIndex == 0); // VME surface is not allowed in thread arg
4762 
4763     vmeSrc = (PCM_HAL_VME_ARG_VALUE)argParam->firstValue;
4764     fwSurfCount = vmeSrc->fwRefNum;
4765     bwSurfCount = vmeSrc->bwRefNum;
4766     refSurfaces = findRefInVmeArg(vmeSrc);
4767 
4768     index[0] = vmeSrc->curSurface & CM_SURFACE_MASK;
4769     // check to see if index[0] is valid
4770     if (index[0] == CM_NULL_SURFACE)
4771     {
4772         if (buffer)
4773         {
4774             dst = buffer + argParam->payloadOffset;
4775             *((uint32_t*)dst) = CM_NULL_SURFACE_BINDING_INDEX;
4776         }
4777 
4778         eStatus = MOS_STATUS_SUCCESS;
4779         goto finish;
4780     }
4781 
4782     if (index[0] >= state->cmDeviceParam.max2DSurfaceTableSize ||
4783         Mos_ResourceIsNull(&state->umdSurf2DTable[index[0]].osResource))
4784     {
4785         eStatus = MOS_STATUS_INVALID_PARAMETER;
4786         CM_ASSERTMESSAGE(
4787             "Invalid 2D surface array index '%d'", index[0]);
4788         goto finish;
4789     }
4790 
4791     memObjCtl[0] = state->umdSurf2DTable[index[0]].memObjCtl;
4792     if (!memObjCtl[0])
4793     {
4794         memObjCtl[0] = CM_DEFAULT_CACHE_TYPE;
4795     }
4796     for (idx = 0; idx < (vmeSrc->fwRefNum + vmeSrc->bwRefNum); idx++)
4797     {
4798         index[idx + 1] = refSurfaces[idx] & CM_SURFACE_MASK;
4799         memObjCtl[idx + 1] = state->umdSurf2DTable[index[idx + 1]].memObjCtl;
4800         if (!memObjCtl[idx + 1])
4801         {
4802             memObjCtl[idx + 1] = CM_DEFAULT_CACHE_TYPE;
4803         }
4804     }
4805 
4806     surfaceStateWidth = vmeSrc->surfStateParam.surfaceStateWidth;
4807     surfaceStateHeight = vmeSrc->surfStateParam.surfaceStateHeight;
4808 
4809     fPtr = index + 1;
4810     bPtr = index + 1 + fwSurfCount;
4811 
4812     //Max surface pair number
4813     surfPairNum = fwSurfCount > bwSurfCount ? fwSurfCount : bwSurfCount;
4814 
4815     btIndex = curBTIndex = HalCm_GetFreeBindingIndex(state, indexParam, surfPairNum*2 + 1);
4816 
4817     HalCm_SetupSpecificVmeSurfaceState(state, indexParam, bindingTable, index[0], curBTIndex, memObjCtl[0], surfaceStateWidth, surfaceStateHeight);
4818     curBTIndex++;
4819 
4820     //Setup surface states interleavely for backward and forward surfaces pairs.
4821     for (idx = 0; idx < surfPairNum; idx++)
4822     {
4823         if (idx < fwSurfCount)
4824         {
4825             HalCm_SetupSpecificVmeSurfaceState(state, indexParam, bindingTable, fPtr[idx], curBTIndex, memObjCtl[idx + 1], surfaceStateWidth, surfaceStateHeight);
4826         }
4827         curBTIndex++;
4828 
4829         if (idx < bwSurfCount)
4830         {
4831             HalCm_SetupSpecificVmeSurfaceState(state, indexParam, bindingTable, bPtr[idx], curBTIndex, memObjCtl[idx+ 1 + fwSurfCount], surfaceStateWidth, surfaceStateHeight);
4832         }
4833         curBTIndex++;
4834     }
4835 
4836     // Update the Batch Buffer
4837     if (buffer)
4838     {
4839         dst = buffer + argParam->payloadOffset;
4840         *((uint32_t*)dst) = btIndex;
4841     }
4842 
4843     eStatus = MOS_STATUS_SUCCESS;
4844 
4845 finish:
4846     return eStatus;
4847 }
4848 
4849 static bool
UpdateMosSurfaceFromAliasState(CM_HAL_STATE * state,CM_HAL_KERNEL_ARG_PARAM * argParam,uint32_t surface_index,MOS_SURFACE * surface)4850 UpdateMosSurfaceFromAliasState(CM_HAL_STATE *state,
4851                                CM_HAL_KERNEL_ARG_PARAM *argParam,
4852                                uint32_t surface_index,
4853                                MOS_SURFACE *surface)
4854 {
4855     uint32_t surface_state_index = argParam->aliasIndex/state->surfaceArraySize;
4856     const CM_HAL_SURFACE2D_SURFACE_STATE_PARAM &surface_state_param
4857             = state->umdSurf2DTable[surface_index].surfaceStateParam[
4858                 surface_state_index];
4859     if (surface_state_param.width)
4860     {
4861         surface->dwWidth = surface_state_param.width;
4862     }
4863     if (surface_state_param.height)
4864     {
4865         surface->dwHeight = surface_state_param.height;
4866     }
4867     if (surface_state_param.depth)
4868     {
4869         surface->dwDepth = surface_state_param.depth;
4870     }
4871     if (surface_state_param.pitch)
4872     {
4873         surface->dwPitch= surface_state_param.pitch;
4874     }
4875     if (surface_state_param.format)
4876     {
4877         surface->Format
4878                 = static_cast<MOS_FORMAT>(surface_state_param.format);
4879     }
4880     if (surface_state_param.surfaceXOffset)
4881     {
4882         surface->YPlaneOffset.iXOffset = surface_state_param.surfaceXOffset;
4883     }
4884     if (surface_state_param.surfaceYOffset)
4885     {
4886         surface->YPlaneOffset.iYOffset = surface_state_param.surfaceYOffset;
4887     }
4888     if (surface_state_param.surfaceOffset)
4889     {
4890         surface->dwOffset = surface_state_param.surfaceOffset;
4891     }
4892 
4893     return true;
4894 }
4895 
4896 //*-----------------------------------------------------------------------------
4897 //| Purpose: Setup VME surface State
4898 //| Returns: Result of the operation
4899 //*-----------------------------------------------------------------------------
HalCm_SetupSampler8x8SurfaceState(PCM_HAL_STATE state,PCM_HAL_KERNEL_ARG_PARAM argParam,PCM_HAL_INDEX_PARAM indexParam,int32_t bindingTable,uint32_t threadIndex,uint8_t * buffer)4900 MOS_STATUS HalCm_SetupSampler8x8SurfaceState(
4901     PCM_HAL_STATE               state,
4902     PCM_HAL_KERNEL_ARG_PARAM    argParam,
4903     PCM_HAL_INDEX_PARAM         indexParam,
4904     int32_t                     bindingTable,
4905     uint32_t                    threadIndex,
4906     uint8_t                     *buffer)
4907 {
4908     MOS_STATUS                  eStatus;
4909     RENDERHAL_SURFACE               surface;
4910     RENDERHAL_SURFACE_STATE_PARAMS  surfaceParam;
4911     PRENDERHAL_INTERFACE            renderHal;
4912     PRENDERHAL_SURFACE_STATE_ENTRY  surfaceEntries[MHW_MAX_SURFACE_PLANES];
4913     uint8_t                     *src;
4914     uint8_t                     *dst;
4915     int32_t                     nSurfaceEntries;
4916     uint32_t                    index;
4917     uint16_t                    memObjCtl;
4918     int32_t                     i;
4919     uint32_t                    btIndex;
4920     uint32_t                    tempPlaneIndex = 0;
4921     uint32_t                    offsetSrc;
4922     PRENDERHAL_STATE_HEAP       stateHeap;
4923 
4924     eStatus               = MOS_STATUS_UNKNOWN;
4925     renderHal     = state->renderHal;
4926 
4927     PCM_HAL_TASK_PARAM          taskParam    = state->taskParam;
4928 
4929     nSurfaceEntries = 0;
4930 
4931     CM_ASSERT(argParam->unitSize == sizeof(uint32_t));
4932 
4933     src      = argParam->firstValue + (threadIndex * argParam->unitSize);
4934     index     = *((uint32_t*)src) & CM_SURFACE_MASK;
4935     if (index == CM_NULL_SURFACE)
4936     {
4937         if (buffer)
4938         {
4939             dst = buffer + argParam->payloadOffset;
4940             *((uint32_t*)dst) = CM_NULL_SURFACE_BINDING_INDEX;
4941         }
4942 
4943         eStatus = MOS_STATUS_SUCCESS;
4944         goto finish;
4945     }
4946 
4947     memObjCtl = state->umdSurf2DTable[index].memObjCtl;
4948     if (!memObjCtl)
4949     {
4950         memObjCtl = CM_DEFAULT_CACHE_TYPE;
4951     }
4952 
4953     // check to see if index is valid
4954     if (index >= state->cmDeviceParam.max2DSurfaceTableSize ||
4955        Mos_ResourceIsNull(&state->umdSurf2DTable[index].osResource))
4956     {
4957         eStatus = MOS_STATUS_INVALID_PARAMETER;
4958         CM_ASSERTMESSAGE(
4959             "Invalid 2D surface array index '%d'", index);
4960         goto finish;
4961     }
4962 
4963     renderHal->bEnableP010SinglePass = state->cmHalInterface->IsP010SinglePassSupported();
4964 
4965     btIndex = state->bti2DIndexTable[ index ].BTI.sampler8x8SurfIndex;
4966     if (btIndex == ( unsigned char )CM_INVALID_INDEX || argParam->aliasCreated)
4967     {
4968         // Get Details of Sampler8x8 surface and fill the surface
4969         CM_CHK_MOSSTATUS_GOTOFINISH( HalCm_GetSurfaceAndRegister( state, &surface, argParam->kind, index, 0 ) );
4970 
4971         // Setup surface
4972         MOS_ZeroMemory( &surfaceParam, sizeof( surfaceParam ) );
4973         surfaceParam.Type = renderHal->SurfaceTypeAdvanced;
4974         surfaceParam.isOutput = true;
4975         surfaceParam.bWidthInDword_Y = false;
4976         surfaceParam.bWidthInDword_UV = false;
4977         surfaceParam.Boundary = RENDERHAL_SS_BOUNDARY_ORIGINAL;
4978         surfaceParam.bVASurface = ( argParam->kind == CM_ARGUMENT_SURFACE_SAMPLER8X8_VA ) ? 1 : 0;
4979         surfaceParam.AddressControl = argParam->nCustomValue;
4980 
4981         UpdateMosSurfaceFromAliasState(state, argParam, index,
4982                                        &surface.OsSurface);
4983 
4984         //Set memory object control
4985         state->cmHalInterface->HwSetSurfaceMemoryObjectControl(memObjCtl, &surfaceParam);
4986 
4987         surface.Rotation = state->umdSurf2DTable[index].rotationFlag;
4988         surface.ChromaSiting = state->umdSurf2DTable[index].chromaSiting;
4989         surface.ScalingMode = RENDERHAL_SCALING_AVS;
4990         nSurfaceEntries = 0;
4991 
4992         // interlace setting
4993         HalCm_HwSetSurfaceProperty(state,
4994             state->umdSurf2DTable[index].frameType,
4995             &surfaceParam);
4996 
4997         CM_CHK_MOSSTATUS_GOTOFINISH( renderHal->pfnSetupSurfaceState(
4998             renderHal,
4999             &surface,
5000             &surfaceParam,
5001             &nSurfaceEntries,
5002             surfaceEntries,
5003             nullptr ) );
5004 
5005         CM_ASSERT( nSurfaceEntries == 1 );
5006 
5007         btIndex = HalCm_GetFreeBindingIndex( state, indexParam, nSurfaceEntries );
5008 
5009         for ( i = 0; i < nSurfaceEntries; i++ )
5010         {
5011             // Bind the surface State
5012             CM_CHK_MOSSTATUS_GOTOFINISH( renderHal->pfnBindSurfaceState(
5013                 renderHal,
5014                 bindingTable,
5015                 btIndex + i,
5016                 surfaceEntries[ i ] ) );
5017 
5018             if ( ( taskParam->surfEntryInfoArrays.kernelNum != 0 ) &&
5019                  ( taskParam->surfEntryInfoArrays.surfEntryInfosArray != nullptr ) )
5020             {
5021                 CM_CHK_MOSSTATUS_GOTOFINISH( HalCm_GetSurfaceDetails(
5022                     state,
5023                     indexParam,
5024                     btIndex + i,
5025                     surface.OsSurface,
5026                     0,
5027                     surfaceEntries[ i ],
5028                     tempPlaneIndex,
5029                     surfaceParam,
5030                     CM_ARGUMENT_SURFACE2D ) );
5031             }
5032         }
5033 
5034         stateHeap = renderHal->pStateHeap;
5035         offsetSrc = ( stateHeap->iCurSshBufferIndex * stateHeap->dwSshIntanceSize ) +     // Points to the Base of Current SSH Buffer Instance
5036                       ( stateHeap->iBindingTableOffset ) +                       // Moves the pointer to Base of Array of Binding Tables
5037                       ( bindingTable * stateHeap->iBindingTableSize ) +         // Moves the pointer to a Particular Binding Table
5038                       ( btIndex * sizeof( uint32_t ) );                             // Move the pointer to correct entry
5039 
5040         state->bti2DIndexTable[ index ].nPlaneNumber = nSurfaceEntries;
5041         state->bti2DIndexTable[ index ].BTITableEntry.sampler8x8BtiEntryPosition = stateHeap->pSshBuffer + offsetSrc;
5042         state->bti2DIndexTable[ index ].BTI.sampler8x8SurfIndex = btIndex;
5043     }
5044     else
5045     {
5046         stateHeap = renderHal->pStateHeap;
5047 
5048         // Get Offset to Current Binding Table
5049         uint32_t offsetCurrentBTStart = ( stateHeap->iCurSshBufferIndex * stateHeap->dwSshIntanceSize ) +     // Points to the Base of Current SSH Buffer Instance
5050                                        ( stateHeap->iBindingTableOffset ) +                             // Moves the pointer to Base of Array of Binding Tables
5051                                        ( bindingTable * stateHeap->iBindingTableSize );                // Moves the pointer to a Particular Binding Table
5052 
5053         uint32_t *currentBTStart = ( uint32_t *)( stateHeap->pSshBuffer + offsetCurrentBTStart );
5054 
5055         int nEntryIndex = 0;
5056 
5057         nEntryIndex = ( int )( ( uint32_t *)( state->bti2DIndexTable[ index ].BTITableEntry.sampler8x8BtiEntryPosition ) - currentBTStart );
5058 
5059         if ( ( nEntryIndex < 0 ) || ( nEntryIndex >= renderHal->StateHeapSettings.iSurfacesPerBT ) )
5060         {
5061             uint32_t tmpSurfaceEntries = state->bti2DIndexTable[ index ].nPlaneNumber;
5062 
5063             btIndex = HalCm_GetFreeBindingIndex( state, indexParam, tmpSurfaceEntries );
5064 
5065             // Get Offset to Current Binding Table
5066             uint32_t offsetDst = offsetCurrentBTStart + ( btIndex * sizeof( uint32_t ) );                             // Move the pointer to correct entry
5067 
5068             uint32_t *bindingTableEntry = ( uint32_t *)( stateHeap->pSshBuffer + offsetDst );
5069             MOS_SecureMemcpy( bindingTableEntry, sizeof( uint32_t ) * tmpSurfaceEntries, state->bti2DIndexTable[ index ].BTITableEntry.sampler8x8BtiEntryPosition, sizeof( uint32_t ) * tmpSurfaceEntries );
5070 
5071             // update index to table
5072             state->bti2DIndexTable[ index ].BTI.sampler8x8SurfIndex = btIndex;
5073             state->bti2DIndexTable[ index ].BTITableEntry.sampler8x8BtiEntryPosition = bindingTableEntry;
5074         }
5075     }
5076     // Update the Batch Buffer
5077     if ( buffer )
5078     {
5079         dst = buffer + argParam->payloadOffset;
5080         *( ( uint32_t *)dst ) = state->bti2DIndexTable[ index ].BTI.sampler8x8SurfIndex;
5081     }
5082 
5083     eStatus = MOS_STATUS_SUCCESS;
5084 
5085 finish:
5086     renderHal->bEnableP010SinglePass = false;
5087     return eStatus;
5088 }
5089 
5090 //*-----------------------------------------------------------------------------
5091 //| Purpose: Setup State Buffer surface State
5092 //| Returns: Result of the operation
5093 //*-----------------------------------------------------------------------------
HalCm_SetupStateBufferSurfaceState(PCM_HAL_STATE state,PCM_HAL_KERNEL_ARG_PARAM argParam,PCM_HAL_INDEX_PARAM indexParam,int32_t bindingTable,uint32_t threadIndex,uint8_t * buffer)5094 MOS_STATUS HalCm_SetupStateBufferSurfaceState(
5095     PCM_HAL_STATE               state,
5096     PCM_HAL_KERNEL_ARG_PARAM    argParam,
5097     PCM_HAL_INDEX_PARAM         indexParam,
5098     int32_t                     bindingTable,
5099     uint32_t                    threadIndex,
5100     uint8_t                     *buffer )
5101 {
5102     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
5103     PRENDERHAL_INTERFACE            renderHal;
5104     RENDERHAL_SURFACE_STATE_PARAMS  surfaceParam;
5105     RENDERHAL_SURFACE               renderhalSurface;
5106     PRENDERHAL_SURFACE_STATE_ENTRY  surfaceEntry;
5107     uint32_t                        btIndex;
5108     CM_SURFACE_BTI_INFO             surfBTIInfo;
5109     uint16_t                        memObjCtl;
5110 
5111     state->cmHalInterface->GetHwSurfaceBTIInfo( &surfBTIInfo );
5112     uint32_t surfIndex = reinterpret_cast< uint32_t *>( argParam->firstValue )[ 0 ];
5113 
5114     surfIndex = surfIndex & CM_SURFACE_MASK;
5115     memObjCtl = state->bufferTable[ surfIndex ].memObjCtl;
5116 
5117     btIndex = HalCm_GetFreeBindingIndex( state, indexParam, 1 );
5118 
5119     renderHal = state->renderHal;
5120     MOS_ZeroMemory( &renderhalSurface, sizeof( renderhalSurface ) );
5121 
5122     // Get Details of Sampler8x8 surface and fill the surface
5123     CM_CHK_MOSSTATUS_GOTOFINISH( HalCm_GetSurfaceAndRegister( state, &renderhalSurface, argParam->kind, surfIndex, 0 ) );
5124 
5125     MOS_ZeroMemory( &surfaceParam, sizeof( surfaceParam ) );
5126 
5127     // Set the isOutput by default
5128     surfaceParam.isOutput = true;
5129 
5130     //Cache configurations default
5131     state->cmHalInterface->HwSetSurfaceMemoryObjectControl( memObjCtl, &surfaceParam );
5132 
5133     // Setup Buffer surface
5134     CM_CHK_MOSSTATUS_GOTOFINISH( renderHal->pfnSetupBufferSurfaceState(
5135         renderHal,
5136         &renderhalSurface,
5137         &surfaceParam,
5138         &surfaceEntry ) );
5139 
5140     // Bind the surface State
5141     CM_ASSERT( ( ( int32_t )btIndex ) < renderHal->StateHeapSettings.iSurfacesPerBT + surfBTIInfo.normalSurfaceStart );
5142     CM_CHK_MOSSTATUS_GOTOFINISH( renderHal->pfnBindSurfaceState(
5143         renderHal,
5144         bindingTable,
5145         btIndex,
5146         surfaceEntry ) );
5147 
5148     if ( buffer )
5149     {
5150         *( ( uint32_t *)( buffer + argParam->payloadOffset ) ) = btIndex;
5151     }
5152 
5153 finish:
5154     return eStatus;
5155 }
5156 
5157 //------------------------------------------------------------------------------
5158 //| Purpose: Get usr defined threadcount / threadgroup
5159 //| Returns:    Result of the operation
5160 //------------------------------------------------------------------------------
HalCm_GetMaxThreadCountPerThreadGroup(PCM_HAL_STATE state,uint32_t * threadsPerThreadGroup)5161 MOS_STATUS HalCm_GetMaxThreadCountPerThreadGroup(
5162     PCM_HAL_STATE                   state,                     // [in] Pointer to CM State
5163     uint32_t                        *threadsPerThreadGroup)     // [out] Pointer to threadsPerThreadGroup
5164 {
5165     MOS_STATUS                  eStatus = MOS_STATUS_SUCCESS;
5166 
5167     CM_PLATFORM_INFO      platformInfo;
5168     MOS_ZeroMemory(&platformInfo, sizeof(CM_PLATFORM_INFO));
5169     CM_CHK_MOSSTATUS_GOTOFINISH( state->pfnGetPlatformInfo( state, &platformInfo, false) );
5170 
5171     if (platformInfo.numMaxEUsPerPool)
5172     {
5173         *threadsPerThreadGroup = (platformInfo.numHWThreadsPerEU) * (platformInfo.numMaxEUsPerPool);
5174     }
5175     else
5176     {
5177         *threadsPerThreadGroup = (platformInfo.numHWThreadsPerEU) * (platformInfo.numEUsPerSubSlice);
5178     }
5179 
5180 finish:
5181     return eStatus;
5182 }
5183 
5184 //*-----------------------------------------------------------------------------
5185 //| Purpose:  Decodes hints to get number and size of kernel groups
5186 //| Returns:  Result of the operation
5187 //*-----------------------------------------------------------------------------
HalCm_GetNumKernelsPerGroup(uint8_t hintsBits,uint32_t numKernels,uint32_t * numKernelsPerGroup,uint32_t * numKernelGroups,uint32_t * remapKernelToGroup,uint32_t * remapGroupToKernel)5188 MOS_STATUS HalCm_GetNumKernelsPerGroup(
5189     uint8_t     hintsBits,
5190     uint32_t    numKernels,
5191     uint32_t    *numKernelsPerGroup,
5192     uint32_t    *numKernelGroups,
5193     uint32_t    *remapKernelToGroup,
5194     uint32_t    *remapGroupToKernel
5195     )
5196 {
5197     MOS_STATUS  eStatus   = MOS_STATUS_SUCCESS;
5198     uint32_t currGrp = 0;
5199     uint32_t i       = 0;
5200 
5201     // first group at least has one kernel
5202     numKernelsPerGroup[currGrp]++;
5203     remapGroupToKernel[currGrp] = 0;
5204 
5205     for( i = 0; i < numKernels - 1; ++i )
5206     {
5207         if( (hintsBits & CM_HINTS_LEASTBIT_MASK) == CM_HINTS_LEASTBIT_MASK )
5208         {
5209             currGrp++;
5210             *numKernelGroups = *numKernelGroups + 1;
5211 
5212             remapGroupToKernel[currGrp] = i + 1;
5213         }
5214         numKernelsPerGroup[currGrp]++;
5215         hintsBits >>= 1;
5216         remapKernelToGroup[i+1] = currGrp;
5217     }
5218 
5219     return eStatus;
5220 }
5221 
5222 //*-----------------------------------------------------------------------------
5223 //| Purpose:  Gets information about max parallelism graphs
5224 //|           numThreadsOnSides based on formula to sum 1 to n: (n(n+1))/2
5225 //| Returns:  Result of the operation
5226 //*-----------------------------------------------------------------------------
HalCm_GetParallelGraphInfo(uint32_t maximum,uint32_t numThreads,uint32_t width,uint32_t height,PCM_HAL_PARALLELISM_GRAPH_INFO graphInfo,CM_DEPENDENCY_PATTERN pattern,bool noDependencyCase)5227 MOS_STATUS HalCm_GetParallelGraphInfo(
5228     uint32_t                       maximum,
5229     uint32_t                       numThreads,
5230     uint32_t                       width,
5231     uint32_t                       height,
5232     PCM_HAL_PARALLELISM_GRAPH_INFO graphInfo,
5233     CM_DEPENDENCY_PATTERN          pattern,
5234     bool                           noDependencyCase)
5235 {
5236     MOS_STATUS eStatus             = MOS_STATUS_SUCCESS;
5237     uint32_t numThreadsOnSides = 0;
5238     uint32_t numMaxRepeat      = 0;
5239     uint32_t numSteps          = 0;
5240 
5241     switch( pattern )
5242     {
5243         case CM_NONE_DEPENDENCY:
5244             if (noDependencyCase)
5245             {
5246                 maximum = 1;
5247                 numMaxRepeat = width * height;
5248                 numSteps = width * height;
5249             }
5250             // do nothing will depend on other kernels
5251             break;
5252 
5253         case CM_VERTICAL_WAVE:
5254             numMaxRepeat = width;
5255             numSteps = width;
5256             break;
5257 
5258         case CM_HORIZONTAL_WAVE:
5259             numMaxRepeat = height;
5260             numSteps = height;
5261             break;
5262 
5263         case CM_WAVEFRONT:
5264             numThreadsOnSides = ( maximum - 1 ) * maximum;
5265             numMaxRepeat = (numThreads - numThreadsOnSides ) / maximum;
5266             numSteps = ( maximum - 1) * 2 + numMaxRepeat;
5267             break;
5268 
5269         case CM_WAVEFRONT26:
5270             numThreadsOnSides = ( maximum - 1 ) * maximum * 2;
5271             numMaxRepeat = (numThreads - numThreadsOnSides ) / maximum;
5272             numSteps = ( (maximum - 1) * 2 ) * 2 + numMaxRepeat;
5273             break;
5274 
5275         case CM_WAVEFRONT26Z:
5276             // do nothing already set outside of this function
5277             break;
5278 
5279         default:
5280             eStatus = MOS_STATUS_INVALID_PARAMETER;
5281             CM_ASSERTMESSAGE("Unsupported dependency pattern for EnqueueWithHints");
5282             goto finish;
5283     }
5284 
5285     graphInfo->maxParallelism = maximum;
5286     graphInfo->numMaxRepeat = numMaxRepeat;
5287     graphInfo->numSteps = numSteps;
5288 
5289 finish:
5290     return eStatus;
5291 }
5292 
5293 //*-----------------------------------------------------------------------------
5294 //| Purpose:  Sets dispatch pattern based on max parallelism for media objects
5295 //| Returns:  Result of the operation
5296 //*-----------------------------------------------------------------------------
HalCm_SetDispatchPattern(CM_HAL_PARALLELISM_GRAPH_INFO graphInfo,CM_DEPENDENCY_PATTERN pattern,uint32_t * dispatchFreq)5297 MOS_STATUS HalCm_SetDispatchPattern(
5298     CM_HAL_PARALLELISM_GRAPH_INFO  graphInfo,
5299     CM_DEPENDENCY_PATTERN          pattern,
5300     uint32_t                       *dispatchFreq
5301     )
5302 {
5303     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
5304     uint32_t i  = 0;
5305     uint32_t j  = 0;
5306     uint32_t k  = 0;
5307 
5308     switch( pattern )
5309     {
5310     case CM_NONE_DEPENDENCY:
5311         break;
5312     case CM_HORIZONTAL_WAVE:
5313     case CM_VERTICAL_WAVE:
5314         for( i = 0; i < graphInfo.numSteps; ++i )
5315         {
5316             dispatchFreq[i] = graphInfo.maxParallelism;
5317         }
5318         break;
5319     case CM_WAVEFRONT:
5320         for( i = 1; i < graphInfo.maxParallelism; ++i )
5321         {
5322             dispatchFreq[i-1] = i;
5323         }
5324         for( j = 0; j < graphInfo.numMaxRepeat; ++i, ++j )
5325         {
5326             dispatchFreq[i-1] = graphInfo.maxParallelism;
5327         }
5328         for( j = graphInfo.maxParallelism - 1; i <= graphInfo.numSteps; ++i, --j )
5329         {
5330             dispatchFreq[i-1] = j;
5331         }
5332         break;
5333     case CM_WAVEFRONT26:
5334         for( i = 1, j = 0; i < graphInfo.maxParallelism; ++i, j +=2 )
5335         {
5336             dispatchFreq[j] = i;
5337             dispatchFreq[j+1] = i;
5338         }
5339         for( k = 0; k < graphInfo.numMaxRepeat; ++k, ++j)
5340         {
5341             dispatchFreq[j] = graphInfo.maxParallelism;
5342         }
5343         for( i = graphInfo.maxParallelism - 1; j < graphInfo.numSteps; j +=2, --i )
5344         {
5345             dispatchFreq[j] = i;
5346             dispatchFreq[j+1] = i;
5347         }
5348         break;
5349     case CM_WAVEFRONT26Z:
5350         break;
5351     default:
5352         eStatus = MOS_STATUS_INVALID_PARAMETER;
5353         CM_ASSERTMESSAGE("Unsupported dependency pattern for EnqueueWithHints");
5354         goto finish;
5355     }
5356 
5357 finish:
5358     return eStatus;
5359 }
5360 
5361 //*-----------------------------------------------------------------------------
5362 //| Purpose:  Sets dispatch frequency for kernel group based on number of steps
5363 //| Returns:  Result of the operation
5364 //*-----------------------------------------------------------------------------
HalCm_SetKernelGrpFreqDispatch(PCM_HAL_PARALLELISM_GRAPH_INFO graphInfo,PCM_HAL_KERNEL_GROUP_INFO groupInfo,uint32_t numKernelGroups,uint32_t * minSteps)5365 MOS_STATUS HalCm_SetKernelGrpFreqDispatch(
5366     PCM_HAL_PARALLELISM_GRAPH_INFO  graphInfo,
5367     PCM_HAL_KERNEL_GROUP_INFO       groupInfo,
5368     uint32_t                        numKernelGroups,
5369     uint32_t                        *minSteps)
5370 {
5371     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
5372     uint32_t i  = 0;
5373     uint32_t j  = 0;
5374     uint32_t tmpSteps = 0;
5375     uint32_t kerIndex = 0;
5376 
5377     for( i = 0; i < numKernelGroups; ++i)
5378     {
5379         for( j = 0; j < groupInfo[i].numKernelsInGroup; ++j )
5380         {
5381             tmpSteps += graphInfo[kerIndex].numSteps;
5382             kerIndex++;
5383         }
5384 
5385         if ( tmpSteps )
5386         {
5387             *minSteps = MOS_MIN(*minSteps, tmpSteps);
5388             groupInfo[i].numStepsInGrp = tmpSteps;
5389         }
5390 
5391         tmpSteps = 0;
5392     }
5393 
5394     for( i = 0; i < numKernelGroups; ++i )
5395     {
5396         groupInfo[i].freqDispatch = (uint32_t)ceil( (groupInfo[i].numStepsInGrp / (double)*minSteps) );
5397     }
5398 
5399     return eStatus;
5400 }
5401 
5402 //*-----------------------------------------------------------------------------
5403 //| Purpose:  Sets dispatch pattern for kernel with no dependency based on
5404 //|           the minimum number of steps calculated from kernels with dependency
5405 //| Returns:  Result of the operation
5406 //*-----------------------------------------------------------------------------
HalCm_SetNoDependKernelDispatchPattern(uint32_t numThreads,uint32_t minSteps,uint32_t * dispatchFreq)5407 MOS_STATUS HalCm_SetNoDependKernelDispatchPattern(
5408     uint32_t                        numThreads,
5409     uint32_t                        minSteps,
5410     uint32_t                        *dispatchFreq)
5411 {
5412     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
5413     uint32_t i = 0;
5414     uint32_t numEachStep = 0;
5415     uint32_t total = 0;
5416 
5417     numEachStep = numThreads / minSteps;
5418     for( i = 0; i < minSteps; ++i )
5419     {
5420         dispatchFreq[i] = numEachStep;
5421         total += numEachStep;
5422     }
5423 
5424     while( total != numThreads )
5425     {
5426         // dispatch more at beginning
5427         i = 0;
5428         dispatchFreq[i]++;
5429         total++;
5430         i++;
5431     }
5432 
5433     return eStatus;
5434 }
5435 
HalCm_FinishStatesForKernel(PCM_HAL_STATE state,PRENDERHAL_MEDIA_STATE mediaState,PMHW_BATCH_BUFFER batchBuffer,int32_t taskId,PCM_HAL_KERNEL_PARAM kernelParam,int32_t kernelIndex,PCM_HAL_INDEX_PARAM indexParam,int32_t bindingTable,int32_t mediaID,PRENDERHAL_KRN_ALLOCATION krnAllocation)5436 MOS_STATUS HalCm_FinishStatesForKernel(
5437     PCM_HAL_STATE                   state,                                     // [in] Pointer to CM State
5438     PRENDERHAL_MEDIA_STATE          mediaState,
5439     PMHW_BATCH_BUFFER               batchBuffer,                               // [in] Pointer to Batch Buffer
5440     int32_t                         taskId,                                    // [in] Task ID
5441     PCM_HAL_KERNEL_PARAM            kernelParam,
5442     int32_t                         kernelIndex,
5443     PCM_HAL_INDEX_PARAM             indexParam,
5444     int32_t                         bindingTable,
5445     int32_t                         mediaID,
5446     PRENDERHAL_KRN_ALLOCATION       krnAllocation
5447     )
5448 {
5449     MOS_STATUS                      eStatus = MOS_STATUS_SUCCESS;
5450     PCM_HAL_TASK_PARAM              taskParam = state->taskParam;
5451     PRENDERHAL_INTERFACE            renderHal = state->renderHal;
5452     PCM_HAL_WALKER_PARAMS           mediaWalkerParams = &kernelParam->walkerParams;
5453     PCM_GPGPU_WALKER_PARAMS         perKernelGpGpuWalkerParams = &kernelParam->gpgpuWalkerParams;
5454     PCM_HAL_SCOREBOARD              threadCoordinates = nullptr;
5455     PCM_HAL_MASK_AND_RESET          dependencyMask = nullptr;
5456     bool                            enableThreadSpace = false;
5457     bool                            enableKernelThreadSpace = false;
5458     PCM_HAL_SCOREBOARD              kernelThreadCoordinates = nullptr;
5459     UNUSED(taskId);
5460 
5461     MHW_MEDIA_OBJECT_PARAMS         mediaObjectParam;
5462     PCM_HAL_KERNEL_ARG_PARAM        argParam;
5463     MHW_PIPE_CONTROL_PARAMS         pipeControlParam;
5464     uint32_t                        i;
5465     uint32_t                        hdrSize;
5466     uint32_t                        aIndex;
5467     uint32_t                        tIndex;
5468     uint32_t                        index;
5469 
5470     //GT-PIN
5471     taskParam->curKernelIndex =  kernelIndex;
5472 
5473     CmSafeMemSet(&mediaObjectParam, 0, sizeof(MHW_MEDIA_OBJECT_PARAMS));
5474 
5475     if (perKernelGpGpuWalkerParams->gpgpuEnabled)
5476     {
5477         // GPGPU_WALKER, just update ID here. other fields are already filled.
5478         perKernelGpGpuWalkerParams->interfaceDescriptorOffset = mediaID;// mediaObjectParam.dwInterfaceDescriptorOffset;
5479     }
5480     else if (mediaWalkerParams->cmWalkerEnable)
5481     {
5482         // Media walker, just update ID here. other fields are already filled.
5483         mediaWalkerParams->interfaceDescriptorOffset = mediaID;
5484     }
5485     else
5486     {
5487         // MEDIA_OBJECT
5488         mediaObjectParam.dwInterfaceDescriptorOffset = mediaID;
5489         hdrSize = renderHal->pHwSizes->dwSizeMediaObjectHeaderCmd;
5490 
5491         if (kernelParam->indirectDataParam.indirectDataSize)
5492         {
5493             mediaObjectParam.dwInlineDataSize = 0;
5494         }
5495         else
5496         {
5497             mediaObjectParam.dwInlineDataSize = MOS_MAX(kernelParam->payloadSize, 4);
5498         }
5499 
5500         if (taskParam->threadCoordinates)
5501         {
5502             threadCoordinates = taskParam->threadCoordinates[kernelIndex];
5503             if (threadCoordinates)
5504             {
5505                 enableThreadSpace = true;
5506             }
5507         }
5508         else if (kernelParam->kernelThreadSpaceParam.threadCoordinates)
5509         {
5510             kernelThreadCoordinates = kernelParam->kernelThreadSpaceParam.threadCoordinates;
5511             if (kernelThreadCoordinates)
5512             {
5513                 enableKernelThreadSpace = true;
5514             }
5515         }
5516 
5517         if (taskParam->dependencyMasks)
5518         {
5519             dependencyMask = taskParam->dependencyMasks[kernelIndex];
5520         }
5521 
5522         CM_CHK_NULL_GOTOFINISH_MOSERROR( batchBuffer );
5523 
5524         uint8_t inlineData[CM_MAX_THREAD_PAYLOAD_SIZE];
5525         uint8_t *cmdInline = inlineData;
5526         uint32_t cmdSize = mediaObjectParam.dwInlineDataSize + hdrSize;
5527 
5528         // Setup states for arguments and threads
5529         if (((PCM_HAL_BB_ARGS)batchBuffer->pPrivateData)->refCount > 1)
5530         {
5531             uint8_t *bBuffer = batchBuffer->pData + batchBuffer->iCurrent;
5532             for (aIndex = 0; aIndex < kernelParam->numArgs; aIndex++)
5533             {
5534                 argParam = &kernelParam->argParams[aIndex];
5535 
5536                 if ((kernelParam->cmFlags & CM_KERNEL_FLAGS_CURBE) && !argParam->perThread)
5537                 {
5538                     continue;
5539                 }
5540 
5541                 for (tIndex = 0; tIndex < kernelParam->numThreads; tIndex++)
5542                 {
5543                     index = tIndex * argParam->perThread;
5544 
5545                     //-----------------------------------------------------
5546                     CM_ASSERT(argParam->payloadOffset < kernelParam->payloadSize);
5547                     //-----------------------------------------------------
5548 
5549                     switch(argParam->kind)
5550                     {
5551                     case CM_ARGUMENT_GENERAL:
5552                         break;
5553 
5554                     case CM_ARGUMENT_SAMPLER:
5555                         CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_SetupSamplerState(
5556                             state, kernelParam, argParam, indexParam,  mediaID, index, nullptr));
5557                         break;
5558 
5559                     case CM_ARGUMENT_SURFACEBUFFER:
5560                         CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_SetupBufferSurfaceState(
5561                             state, argParam, indexParam, bindingTable, -1, index, nullptr));
5562                         break;
5563 
5564                     case CM_ARGUMENT_SURFACE2D_UP:
5565                         CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_Setup2DSurfaceUPState(
5566                             state, argParam, indexParam, bindingTable, index, nullptr));
5567                         break;
5568 
5569                     case CM_ARGUMENT_SURFACE2DUP_SAMPLER:
5570                         CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_Setup2DSurfaceUPSamplerState(
5571                             state, argParam, indexParam, bindingTable, index, nullptr));
5572                         break;
5573 
5574                     case CM_ARGUMENT_SURFACE2D_SAMPLER:
5575                         CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_Setup2DSurfaceSamplerState(
5576                             state, argParam, indexParam, bindingTable, 0, nullptr));
5577                         break;
5578 
5579                     case CM_ARGUMENT_SURFACE2D:
5580                         CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_Setup2DSurfaceState(
5581                             state, argParam, indexParam, bindingTable, index, nullptr));
5582                         break;
5583 
5584                     case CM_ARGUMENT_SURFACE3D:
5585                         CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_Setup3DSurfaceState(
5586                             state, argParam, indexParam, bindingTable, index, nullptr));
5587                         break;
5588 
5589                     case CM_ARGUMENT_SURFACE_VME:
5590                         CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_SetupVmeSurfaceState(
5591                             state, argParam, indexParam, bindingTable, 0, nullptr));
5592                         break;
5593 
5594                     case CM_ARGUMENT_SURFACE_SAMPLER8X8_AVS:
5595                     case CM_ARGUMENT_SURFACE_SAMPLER8X8_VA:
5596                         CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_SetupSampler8x8SurfaceState(
5597                             state, argParam, indexParam, bindingTable, 0, nullptr));
5598                         break;
5599 
5600                     default:
5601                         eStatus = MOS_STATUS_INVALID_PARAMETER;
5602                         CM_ASSERTMESSAGE(
5603                             "Argument kind '%d' is not supported", argParam->kind);
5604                         goto finish;
5605                     }
5606                 }
5607 
5608                 if( dependencyMask )
5609                 {
5610                     if( dependencyMask[tIndex].resetMask == CM_RESET_DEPENDENCY_MASK )
5611                     {
5612                         MOS_SecureMemcpy(bBuffer + (CM_SCOREBOARD_MASK_POS_IN_MEDIA_OBJECT_CMD*sizeof(uint32_t)),
5613                             sizeof(uint8_t), &dependencyMask[tIndex].mask, sizeof(uint8_t));
5614                     }
5615                 }
5616                 batchBuffer->iCurrent += cmdSize;
5617                 bBuffer += cmdSize;
5618             }
5619         }
5620         else
5621         {
5622             //Insert synchronization if needed (PIPE_CONTROL)
5623             // 1. synchronization is set
5624             // 2. the next kernel has dependency pattern
5625             if((kernelIndex > 0) && ((taskParam->syncBitmap & ((uint64_t)1 << (kernelIndex-1))) || (kernelParam->kernelThreadSpaceParam.patternType != CM_NONE_DEPENDENCY)))
5626             {
5627                 pipeControlParam = g_cRenderHal_InitPipeControlParams;
5628                 pipeControlParam.presDest                = nullptr;
5629                 pipeControlParam.dwFlushMode             = MHW_FLUSH_CUSTOM; // Use custom flags
5630                 pipeControlParam.dwPostSyncOp            = MHW_FLUSH_NOWRITE;
5631                 pipeControlParam.bDisableCSStall         = false;
5632                 pipeControlParam.bTlbInvalidate          = false;
5633                 pipeControlParam.bFlushRenderTargetCache = true;
5634                 pipeControlParam.bInvalidateTextureCache = true;
5635                 CM_CHK_MOSSTATUS_RETURN(renderHal->pMhwMiInterface->AddPipeControl(nullptr, batchBuffer, &pipeControlParam));
5636             }
5637 
5638             uint8_t *bBuffer = batchBuffer->pData + batchBuffer->iCurrent;
5639             for (tIndex = 0; tIndex < kernelParam->numThreads; tIndex++)
5640             {
5641                 if (enableThreadSpace)
5642                 {
5643                     mediaObjectParam.VfeScoreboard.ScoreboardEnable = (state->scoreboardParams.ScoreboardMask==0) ? 0:1;
5644                     mediaObjectParam.VfeScoreboard.Value[0] = threadCoordinates[tIndex].x;
5645                     mediaObjectParam.VfeScoreboard.Value[1] = threadCoordinates[tIndex].y;
5646                     mediaObjectParam.VfeScoreboard.ScoreboardColor = threadCoordinates[tIndex].color;
5647                     mediaObjectParam.dwSliceDestinationSelect = threadCoordinates[tIndex].sliceSelect;
5648                     mediaObjectParam.dwHalfSliceDestinationSelect = threadCoordinates[tIndex].subSliceSelect;
5649                     if( !dependencyMask )
5650                     {
5651                         mediaObjectParam.VfeScoreboard.ScoreboardMask = (1 << state->scoreboardParams.ScoreboardMask)-1;
5652                     }
5653                     else
5654                     {
5655                         mediaObjectParam.VfeScoreboard.ScoreboardMask = dependencyMask[tIndex].mask;
5656                     }
5657                 }
5658                 else if (enableKernelThreadSpace)
5659                 {
5660                     mediaObjectParam.VfeScoreboard.ScoreboardEnable = (state->scoreboardParams.ScoreboardMask == 0) ? 0 : 1;
5661                     mediaObjectParam.VfeScoreboard.Value[0] = kernelThreadCoordinates[tIndex].x;
5662                     mediaObjectParam.VfeScoreboard.Value[1] = kernelThreadCoordinates[tIndex].y;
5663                     mediaObjectParam.VfeScoreboard.ScoreboardColor = kernelThreadCoordinates[tIndex].color;
5664                     mediaObjectParam.dwSliceDestinationSelect = kernelThreadCoordinates[tIndex].sliceSelect;
5665                     mediaObjectParam.dwHalfSliceDestinationSelect = kernelThreadCoordinates[tIndex].subSliceSelect;
5666                     if (!dependencyMask)
5667                     {
5668                         mediaObjectParam.VfeScoreboard.ScoreboardMask = (1 << state->scoreboardParams.ScoreboardMask) - 1;
5669                     }
5670                     else
5671                     {
5672                         mediaObjectParam.VfeScoreboard.ScoreboardMask = dependencyMask[tIndex].mask;
5673                     }
5674                 }
5675                 else
5676                 {
5677                     mediaObjectParam.VfeScoreboard.Value[0] = tIndex % taskParam->threadSpaceWidth;
5678                     mediaObjectParam.VfeScoreboard.Value[1] = tIndex / taskParam->threadSpaceWidth;
5679                 }
5680 
5681                 for (aIndex = 0; aIndex < kernelParam->numArgs; aIndex++)
5682                 {
5683                     argParam = &kernelParam->argParams[aIndex];
5684                     index = tIndex * argParam->perThread;
5685 
5686                     if ((kernelParam->cmFlags & CM_KERNEL_FLAGS_CURBE) && !argParam->perThread)
5687                     {
5688                         continue;
5689                     }
5690 
5691                     //-----------------------------------------------------
5692                     CM_ASSERT(argParam->payloadOffset < kernelParam->payloadSize);
5693                     //-----------------------------------------------------
5694 
5695                     switch(argParam->kind)
5696                     {
5697                     case CM_ARGUMENT_GENERAL:
5698                         MOS_SecureMemcpy(
5699                             cmdInline + argParam->payloadOffset,
5700                             argParam->unitSize,
5701                             argParam->firstValue + index * argParam->unitSize,
5702                             argParam->unitSize);
5703                         break;
5704 
5705                     case CM_ARGUMENT_SAMPLER:
5706                         CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_SetupSamplerState(
5707                             state, kernelParam, argParam, indexParam,  mediaID, index, cmdInline));
5708                         break;
5709 
5710                     case CM_ARGUMENT_SURFACEBUFFER:
5711                         CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_SetupBufferSurfaceState(
5712                             state, argParam, indexParam, bindingTable, -1, index, cmdInline));
5713                         break;
5714 
5715                     case CM_ARGUMENT_SURFACE2D_UP:
5716                         CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_Setup2DSurfaceUPState(
5717                             state, argParam, indexParam, bindingTable, index, cmdInline));
5718                         break;
5719 
5720                     case CM_ARGUMENT_SURFACE2DUP_SAMPLER:
5721                         CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_Setup2DSurfaceUPSamplerState(
5722                             state, argParam, indexParam, bindingTable, index, cmdInline));
5723                         break;
5724 
5725                     case CM_ARGUMENT_SURFACE2D_SAMPLER:
5726                         CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_Setup2DSurfaceSamplerState(
5727                             state, argParam, indexParam, bindingTable, index, cmdInline));
5728                         break;
5729 
5730                     case CM_ARGUMENT_SURFACE2D:
5731                         CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_Setup2DSurfaceState(
5732                             state, argParam, indexParam, bindingTable, index, cmdInline));
5733                         break;
5734 
5735                     case CM_ARGUMENT_SURFACE3D:
5736                         CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_Setup3DSurfaceState(
5737                             state, argParam, indexParam, bindingTable, index, cmdInline));
5738                         break;
5739 
5740                     case CM_ARGUMENT_SURFACE_VME:
5741                         CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_SetupVmeSurfaceState(
5742                             state, argParam, indexParam, bindingTable, 0, cmdInline));
5743                         break;
5744 
5745                     case CM_ARGUMENT_SURFACE_SAMPLER8X8_AVS:
5746                     case CM_ARGUMENT_SURFACE_SAMPLER8X8_VA:
5747                         CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_SetupSampler8x8SurfaceState(
5748                             state, argParam, indexParam, bindingTable, 0, cmdInline));
5749                         break;
5750 
5751                     default:
5752                         eStatus = MOS_STATUS_INVALID_PARAMETER;
5753                         CM_ASSERTMESSAGE(
5754                             "Argument kind '%d' is not supported", argParam->kind);
5755                         goto finish;
5756                     }
5757                 }
5758 
5759                 mediaObjectParam.pInlineData = inlineData;
5760                 state->renderHal->pMhwRenderInterface->AddMediaObject(nullptr, batchBuffer, &mediaObjectParam);
5761             }
5762         }
5763     }
5764 
5765     for (i = 0; i < CM_MAX_GLOBAL_SURFACE_NUMBER; i++) {
5766         if ((kernelParam->globalSurface[i] & CM_SURFACE_MASK) != CM_NULL_SURFACE)
5767         {
5768              CM_HAL_KERNEL_ARG_PARAM   tempArgParam;
5769              argParam = &tempArgParam;
5770 
5771              tempArgParam.kind = CM_ARGUMENT_SURFACEBUFFER;
5772              tempArgParam.payloadOffset = 0;
5773              tempArgParam.unitCount = 1;
5774              tempArgParam.unitSize = sizeof(uint32_t);
5775              tempArgParam.perThread = false;
5776              tempArgParam.firstValue = (uint8_t*)&kernelParam->globalSurface[i];
5777              tempArgParam.aliasIndex = 0;
5778              tempArgParam.aliasCreated = false;
5779 
5780              CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_SetupBufferSurfaceState(
5781                        state, argParam, indexParam, bindingTable, (int16_t)i, 0, nullptr));
5782         }
5783     }
5784 
5785     // set number of samplers
5786     krnAllocation->Params.Sampler_Count = indexParam->samplerIndexCount;
5787 
5788     // add SIP surface
5789     if (kernelParam->kernelDebugEnabled)
5790     {
5791         CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_SetupSipSurfaceState(state, indexParam, bindingTable));
5792     }
5793 
5794 finish:
5795     return eStatus;
5796 }
5797 
5798 //*-----------------------------------------------------------------------------
5799 //| Purpose:  Finishes setting up HW states for the kernel
5800 //|           Used by EnqueueWithHints
5801 //| Returns:  Result of the operation
5802 //*-----------------------------------------------------------------------------
HalCm_FinishStatesForKernelMix(PCM_HAL_STATE state,PMHW_BATCH_BUFFER batchBuffer,int32_t taskId,PCM_HAL_KERNEL_PARAM * cmExecKernels,PCM_HAL_INDEX_PARAM indexParams,int32_t * bindingTableEntries,int32_t * mediaIds,PRENDERHAL_KRN_ALLOCATION * krnAllocations,uint32_t numKernels,uint32_t hints,bool lastTask)5803 MOS_STATUS HalCm_FinishStatesForKernelMix(
5804     PCM_HAL_STATE                      state,
5805     PMHW_BATCH_BUFFER                  batchBuffer,
5806     int32_t                            taskId,
5807     PCM_HAL_KERNEL_PARAM*              cmExecKernels,
5808     PCM_HAL_INDEX_PARAM                indexParams,
5809     int32_t                            *bindingTableEntries,
5810     int32_t                            *mediaIds,
5811     PRENDERHAL_KRN_ALLOCATION         *krnAllocations,
5812     uint32_t                           numKernels,
5813     uint32_t                           hints,
5814     bool                               lastTask)
5815 {
5816     MOS_STATUS                         eStatus                = MOS_STATUS_SUCCESS;
5817     PRENDERHAL_INTERFACE               renderHal              = state->renderHal;
5818     PMHW_MEDIA_OBJECT_PARAMS           mediaObjectParams      = nullptr;
5819     PCM_HAL_KERNEL_PARAM*              kernelParams           = nullptr;
5820     PCM_HAL_KERNEL_ARG_PARAM*          argParams              = nullptr;
5821     PCM_HAL_BB_ARGS                    bbCmArgs               = nullptr;
5822     PMHW_VFE_SCOREBOARD                scoreboardParams       = nullptr;
5823     PCM_HAL_PARALLELISM_GRAPH_INFO     parallelGraphInfo      = nullptr;
5824     PCM_HAL_KERNEL_ARG_PARAM           argParam               = nullptr;
5825     PCM_HAL_KERNEL_SUBSLICE_INFO       kernelsSliceInfo       = nullptr;
5826     PCM_HAL_KERNEL_THREADSPACE_PARAM   kernelTSParam          = nullptr;
5827     PCM_HAL_KERNEL_GROUP_INFO          groupInfo              = nullptr;
5828     CM_HAL_DEPENDENCY                  vfeDependencyInfo             ;
5829     CM_PLATFORM_INFO                   platformInfo                  ;
5830     CM_GT_SYSTEM_INFO                  systemInfo                    ;
5831     CM_HAL_SCOREBOARD_XY_MASK          threadCoordinates             ;
5832     uint32_t                           **dependRemap            = nullptr;
5833     uint32_t                           **dispatchFreq           = nullptr;
5834     uint8_t                            **cmdInline             = nullptr;
5835     uint32_t                           *cmdSizes              = nullptr;
5836     uint32_t                           *remapKrnToGrp          = nullptr;
5837     uint32_t                           *remapGrpToKrn          = nullptr;
5838     uint32_t                           *numKernelsPerGrp       = nullptr;
5839     uint8_t                            *kernelScoreboardMask   = nullptr;
5840     uint8_t                            hintsBits               = 0;
5841     uint8_t                            tmpThreadScoreboardMask = 0;
5842     uint8_t                            scoreboardMask          = 0;
5843     bool                               singleSubSlice         = false;
5844     bool                               enableThreadSpace      = false;
5845     bool                               kernelFound            = false;
5846     bool                               updateCurrKernel       = false;
5847     bool                               noDependencyCase       = false;
5848     bool                               sufficientSliceInfo    = true;
5849     uint32_t                           adjustedYCoord         = 0;
5850     uint32_t                           numKernelGroups         = CM_HINTS_DEFAULT_NUM_KERNEL_GRP;
5851     uint32_t                           totalNumThreads         = 0;
5852     uint32_t                           hdrSize                = 0;
5853     uint32_t                           i                       = 0;
5854     uint32_t                           j                       = 0;
5855     uint32_t                           k                       = 0;
5856     uint32_t                           tmp                     = 0;
5857     uint32_t                           tmp1                    = 0;
5858     uint32_t                           loopCount               = 0;
5859     uint32_t                           aIndex                  = 0;
5860     uint32_t                           index                   = 0;
5861     uint32_t                           totalReqSubSlices       = 0;
5862     uint32_t                           difference              = 0;
5863     uint32_t                           curKernel               = 0;
5864     uint32_t                           numSet                  = 0;
5865     uint32_t                           numSubSlicesEnabled     = 0;
5866     uint32_t                           sliceIndex              = 0;
5867     uint32_t                           tmpNumSubSlice          = 0;
5868     uint32_t                           tmpNumKernelsPerGrp     = 0;
5869     uint32_t                           maximum                 = 0;
5870     uint32_t                           count                   = 0;
5871     uint32_t                           numDispatched           = 0;
5872     uint32_t                           tmpIndex                = 0;
5873     uint32_t                           numStepsDispatched      = 0;
5874     uint32_t                           minSteps                = UINT_MAX;
5875     uint32_t                           grpId                   = 0;
5876     uint32_t                           allocSize               = 0;
5877     uint32_t                           currentKernel          = 0;
5878     uint32_t                           roundRobinCount        = 0;
5879     uint32_t                           numTasks                = 0;
5880     uint32_t                           extraSWThreads          = 0;
5881     UNUSED(taskId);
5882 
5883     CM_CHK_NULL_GOTOFINISH_MOSERROR(batchBuffer);
5884 
5885     MOS_ZeroMemory(&threadCoordinates, sizeof(CM_HAL_SCOREBOARD_XY_MASK));
5886     MOS_ZeroMemory(&vfeDependencyInfo, sizeof(CM_HAL_DEPENDENCY));
5887     MOS_ZeroMemory(&platformInfo, sizeof(CM_PLATFORM_INFO));
5888     MOS_ZeroMemory(&systemInfo, sizeof(CM_GT_SYSTEM_INFO));
5889 
5890     mediaObjectParams = (PMHW_MEDIA_OBJECT_PARAMS)MOS_AllocAndZeroMemory(sizeof(MHW_MEDIA_OBJECT_PARAMS)*numKernels);
5891     kernelParams = (PCM_HAL_KERNEL_PARAM*)MOS_AllocAndZeroMemory(sizeof(PCM_HAL_KERNEL_PARAM)*numKernels);
5892     argParams    = (PCM_HAL_KERNEL_ARG_PARAM*)MOS_AllocAndZeroMemory(sizeof(PCM_HAL_KERNEL_ARG_PARAM)*numKernels);
5893     cmdInline = (uint8_t**)MOS_AllocAndZeroMemory(sizeof(uint8_t*)*numKernels);
5894     cmdSizes = (uint32_t*)MOS_AllocAndZeroMemory(sizeof(uint32_t)*numKernels);
5895     remapKrnToGrp = (uint32_t*)MOS_AllocAndZeroMemory(sizeof(uint32_t)*numKernels);
5896     remapGrpToKrn = (uint32_t*)MOS_AllocAndZeroMemory(sizeof(uint32_t)*numKernels);
5897     kernelScoreboardMask = (uint8_t*)MOS_AllocAndZeroMemory(sizeof(uint8_t)*numKernels);
5898     dependRemap = (uint32_t**)MOS_AllocAndZeroMemory(sizeof(uint32_t*)*numKernels);
5899     parallelGraphInfo = (PCM_HAL_PARALLELISM_GRAPH_INFO)MOS_AllocAndZeroMemory(sizeof(CM_HAL_PARALLELISM_GRAPH_INFO)*numKernels);
5900     dispatchFreq = (uint32_t**)MOS_AllocAndZeroMemory(sizeof(uint32_t*)*numKernels);
5901     numKernelsPerGrp = (uint32_t*)MOS_AllocAndZeroMemory(sizeof(uint32_t)*numKernels);
5902 
5903     if( !mediaObjectParams || !kernelParams || !argParams ||
5904         !cmdInline || !cmdSizes ||
5905         !remapKrnToGrp || !remapGrpToKrn || !kernelScoreboardMask || !dependRemap ||
5906         !parallelGraphInfo || !dispatchFreq || !numKernelsPerGrp )
5907     {
5908         eStatus = MOS_STATUS_INVALID_PARAMETER;
5909         CM_ASSERTMESSAGE("Memory allocation failed in EnqueueWithHints");
5910         goto finish;
5911     }
5912 
5913     state->euSaturationEnabled = true;
5914 
5915     hintsBits = (hints & CM_HINTS_MASK_KERNEL_GROUPS) >> CM_HINTS_NUM_BITS_WALK_OBJ;
5916     CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_GetNumKernelsPerGroup(hintsBits, numKernels, numKernelsPerGrp,
5917         &numKernelGroups, remapKrnToGrp, remapGrpToKrn));
5918 
5919     kernelsSliceInfo = (PCM_HAL_KERNEL_SUBSLICE_INFO)MOS_AllocAndZeroMemory(sizeof(CM_HAL_KERNEL_SUBSLICE_INFO)*numKernelGroups);
5920     groupInfo = (PCM_HAL_KERNEL_GROUP_INFO)MOS_AllocAndZeroMemory(sizeof(CM_HAL_KERNEL_GROUP_INFO)*numKernelGroups);
5921     if( !kernelsSliceInfo || !groupInfo )
5922     {
5923         eStatus = MOS_STATUS_INVALID_PARAMETER;
5924         CM_ASSERTMESSAGE("Memory allocation failed in EnqueueWithHints");
5925         goto finish;
5926     }
5927 
5928     for( i = 0; i < numKernelGroups; ++i)
5929     {
5930         groupInfo[i].numKernelsInGroup = numKernelsPerGrp[i];
5931     }
5932 
5933     hdrSize = renderHal->pHwSizes->dwSizeMediaObjectHeaderCmd;
5934 
5935     for ( i = 0; i < numKernels; ++i )
5936     {
5937         kernelParams[i] = cmExecKernels[i];
5938 
5939         mediaObjectParams[i].dwInterfaceDescriptorOffset = mediaIds[i];
5940         mediaObjectParams[i].dwInlineDataSize = MOS_MAX(kernelParams[i]->payloadSize, 4);
5941 
5942         cmdInline[i] = (uint8_t*)MOS_AllocAndZeroMemory(sizeof(uint8_t) * 1024);
5943         cmdSizes[i] = mediaObjectParams[i].dwInlineDataSize + hdrSize;
5944 
5945         totalNumThreads += kernelParams[i]->numThreads;
5946     }
5947 
5948     numTasks = ( hints & CM_HINTS_MASK_NUM_TASKS ) >> CM_HINTS_NUM_BITS_TASK_POS;
5949     if( numTasks > 1 )
5950     {
5951         if( lastTask )
5952         {
5953             extraSWThreads = totalNumThreads % numTasks;
5954         }
5955 
5956         totalNumThreads = (totalNumThreads / numTasks) + extraSWThreads;
5957     }
5958 
5959     for( i = 0; i < numKernels; ++i )
5960     {
5961         dependRemap[i] = (uint32_t*)MOS_AllocAndZeroMemory(sizeof(uint32_t) * CM_HAL_MAX_DEPENDENCY_COUNT);
5962         for( k = 0; k < CM_HAL_MAX_DEPENDENCY_COUNT; ++k )
5963         {
5964             // initialize each index to map to itself
5965             dependRemap[i][k] = k;
5966         }
5967     }
5968 
5969     for( i = 0; i < numKernels; ++i )
5970     {
5971         kernelTSParam = &kernelParams[i]->kernelThreadSpaceParam;
5972 
5973         // calculate union dependency vector of all kernels with dependency
5974         if( kernelTSParam->dependencyInfo.count )
5975         {
5976             if( vfeDependencyInfo.count == 0 )
5977             {
5978                 MOS_SecureMemcpy(&vfeDependencyInfo, sizeof(CM_HAL_DEPENDENCY), &kernelTSParam->dependencyInfo, sizeof(CM_HAL_DEPENDENCY));
5979                 kernelScoreboardMask[i] = ( 1 << vfeDependencyInfo.count ) - 1;
5980             }
5981             else
5982             {
5983                 for( j = 0; j < kernelTSParam->dependencyInfo.count; ++j )
5984                 {
5985                     for( k = 0; k < vfeDependencyInfo.count; ++k )
5986                     {
5987                         if( (kernelTSParam->dependencyInfo.deltaX[j] == vfeDependencyInfo.deltaX[k]) &&
5988                             (kernelTSParam->dependencyInfo.deltaY[j] == vfeDependencyInfo.deltaY[k]) )
5989                         {
5990                             CM_HAL_SETBIT(kernelScoreboardMask[i], k);
5991                             dependRemap[i][j] = k;
5992                             break;
5993                         }
5994                     }
5995                     if ( k == vfeDependencyInfo.count )
5996                     {
5997                         vfeDependencyInfo.deltaX[vfeDependencyInfo.count] = kernelTSParam->dependencyInfo.deltaX[j];
5998                         vfeDependencyInfo.deltaY[vfeDependencyInfo.count] = kernelTSParam->dependencyInfo.deltaY[j];
5999                         CM_HAL_SETBIT(kernelScoreboardMask[i], vfeDependencyInfo.count);
6000                         vfeDependencyInfo.count++;
6001                         dependRemap[i][j] = k;
6002                     }
6003                 }
6004             }
6005         }
6006     } // for num kernels
6007 
6008     if( vfeDependencyInfo.count > CM_HAL_MAX_DEPENDENCY_COUNT )
6009     {
6010         eStatus = MOS_STATUS_INVALID_PARAMETER;
6011         CM_ASSERTMESSAGE("Union of kernel dependencies exceeds max dependency count (8)");
6012         goto finish;
6013     }
6014 
6015     // set VFE scoreboarding information from union of kernel dependency vectors
6016     scoreboardParams = &state->scoreboardParams;
6017     scoreboardParams->ScoreboardMask = (uint8_t)vfeDependencyInfo.count;
6018     for( i = 0; i < scoreboardParams->ScoreboardMask; ++i )
6019     {
6020         scoreboardParams->ScoreboardDelta[i].x = vfeDependencyInfo.deltaX[i];
6021         scoreboardParams->ScoreboardDelta[i].y = vfeDependencyInfo.deltaY[i];
6022     }
6023 
6024     if (vfeDependencyInfo.count == 0)
6025     {
6026         noDependencyCase = true;
6027     }
6028 
6029     CM_CHK_MOSSTATUS_GOTOFINISH(state->pfnGetPlatformInfo(state, &platformInfo, true));
6030     singleSubSlice = (platformInfo.numSubSlices == 1) ? true : false;
6031 
6032     CM_CHK_MOSSTATUS_GOTOFINISH(state->pfnGetGTSystemInfo(state, &systemInfo));
6033 
6034     if( !singleSubSlice )
6035     {
6036         for( i = 0; i < numKernelGroups; ++i )
6037         {
6038             tmpNumKernelsPerGrp = numKernelsPerGrp[i];
6039 
6040             for( j = 0; j < tmpNumKernelsPerGrp; ++j )
6041             {
6042                 kernelTSParam = &kernelParams[count]->kernelThreadSpaceParam;
6043 
6044                 switch( kernelTSParam->patternType )
6045                 {
6046                 case CM_NONE_DEPENDENCY:
6047                     maximum = kernelParams[count]->numThreads;
6048                     break;
6049                 case CM_WAVEFRONT:
6050                     maximum = MOS_MIN(kernelTSParam->threadSpaceWidth, kernelTSParam->threadSpaceHeight);
6051                     break;
6052                 case CM_WAVEFRONT26:
6053                     maximum = MOS_MIN( ((kernelTSParam->threadSpaceWidth + 1) >> 1), kernelTSParam->threadSpaceHeight);
6054                     break;
6055                 case CM_VERTICAL_WAVE:
6056                     maximum = kernelTSParam->threadSpaceHeight;
6057                     break;
6058                 case CM_HORIZONTAL_WAVE:
6059                     maximum = kernelTSParam->threadSpaceWidth;
6060                     break;
6061                 case CM_WAVEFRONT26Z:
6062                     maximum = MOS_MIN( ((kernelTSParam->threadSpaceWidth - 1) >> 1), kernelTSParam->threadSpaceHeight);
6063                     break;
6064                 default:
6065                     eStatus = MOS_STATUS_INVALID_PARAMETER;
6066                     CM_ASSERTMESSAGE("Unsupported dependency pattern for EnqueueWithHints");
6067                     goto finish;
6068                 }
6069 
6070                 if( kernelTSParam->patternType != CM_WAVEFRONT26Z )
6071                 {
6072                     CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_GetParallelGraphInfo(maximum, kernelParams[count]->numThreads,
6073                         kernelTSParam->threadSpaceWidth, kernelTSParam->threadSpaceHeight,
6074                         &parallelGraphInfo[count], kernelTSParam->patternType, noDependencyCase));
6075                 }
6076                 else
6077                 {
6078                     parallelGraphInfo[count].numSteps = kernelTSParam->dispatchInfo.numWaves;
6079                 }
6080 
6081                 if( kernelTSParam->patternType != CM_NONE_DEPENDENCY )
6082                 {
6083                     dispatchFreq[count] = (uint32_t*)MOS_AllocAndZeroMemory(sizeof(uint32_t)*parallelGraphInfo[count].numSteps);
6084                     if( !dispatchFreq[count] )
6085                     {
6086                         eStatus = MOS_STATUS_INVALID_PARAMETER;
6087                         CM_ASSERTMESSAGE("Memory allocation failed for EnqueueWithHints");
6088                         goto finish;
6089                     }
6090 
6091                     if( kernelTSParam->patternType != CM_WAVEFRONT26Z )
6092                     {
6093                         CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_SetDispatchPattern(parallelGraphInfo[count], kernelTSParam->patternType, dispatchFreq[count]));
6094                     }
6095                     else
6096                     {
6097                         MOS_SecureMemcpy(dispatchFreq[count], sizeof(uint32_t)*parallelGraphInfo[count].numSteps,
6098                                          kernelTSParam->dispatchInfo.numThreadsInWave, sizeof(uint32_t)*parallelGraphInfo[count].numSteps);
6099                     }
6100                 }
6101 
6102                 if (!noDependencyCase)
6103                 {
6104                     tmpNumSubSlice =
6105                         (maximum / (platformInfo.numEUsPerSubSlice * platformInfo.numHWThreadsPerEU)) + 1;
6106 
6107                     if (tmpNumSubSlice > platformInfo.numSubSlices)
6108                     {
6109                         tmpNumSubSlice = platformInfo.numSubSlices - 1;
6110                     }
6111 
6112                     if (tmpNumSubSlice > kernelsSliceInfo[i].numSubSlices)
6113                     {
6114                         kernelsSliceInfo[i].numSubSlices = tmpNumSubSlice;
6115                     }
6116                 }
6117                 else
6118                 {
6119                     kernelsSliceInfo[i].numSubSlices = platformInfo.numSubSlices;
6120                 }
6121 
6122                 count++;
6123             }
6124         }
6125 
6126         if (!noDependencyCase)
6127         {
6128             for (i = 0; i < numKernelGroups; ++i)
6129             {
6130                 totalReqSubSlices += kernelsSliceInfo[i].numSubSlices;
6131             }
6132 
6133             // adjust if requested less or more subslices than architecture has
6134             if (totalReqSubSlices < platformInfo.numSubSlices)
6135             {
6136                 // want to add subslices starting from K0
6137                 difference = platformInfo.numSubSlices - totalReqSubSlices;
6138                 tmp = tmp1 = 0;
6139                 for (i = 0; i < difference; ++i)
6140                 {
6141                     tmp = tmp1 % numKernelGroups;
6142                     kernelsSliceInfo[tmp].numSubSlices++;
6143                     totalReqSubSlices++;
6144                     tmp1++;
6145                 }
6146             }
6147             else if (totalReqSubSlices > platformInfo.numSubSlices)
6148             {
6149                 // want to subtract subslices starting from last kernel
6150                 difference = totalReqSubSlices - platformInfo.numSubSlices;
6151                 tmp = 0;
6152                 tmp1 = numKernelGroups - 1;
6153                 for (i = numKernelGroups - 1, j = 0; j < difference; --i, ++j)
6154                 {
6155                     tmp = tmp1 % numKernelGroups;
6156                     kernelsSliceInfo[tmp].numSubSlices--;
6157                     totalReqSubSlices--;
6158                     tmp1 += numKernelGroups - 1;
6159                 }
6160             }
6161 
6162             if (totalReqSubSlices != platformInfo.numSubSlices)
6163             {
6164                 eStatus = MOS_STATUS_INVALID_PARAMETER;
6165                 CM_ASSERTMESSAGE("Total requested sub-slices does not match platform's number of sub-slices");
6166                 goto finish;
6167             }
6168         }
6169 
6170         for(i = 0; i < numKernelGroups; ++i)
6171         {
6172             kernelsSliceInfo[i].destination = (PCM_HAL_KERNEL_SLICE_SUBSLICE)MOS_AllocAndZeroMemory(sizeof(CM_HAL_KERNEL_SLICE_SUBSLICE)*kernelsSliceInfo[i].numSubSlices);
6173             if( !kernelsSliceInfo[i].destination )
6174             {
6175                 eStatus = MOS_STATUS_INVALID_PARAMETER;
6176                 CM_ASSERTMESSAGE("Memory allocation failed in EnqueueWithHints");
6177                 goto finish;
6178             }
6179         }
6180 
6181         // set slice, subslice for each kernel group
6182         if (systemInfo.isSliceInfoValid)
6183         {
6184             for (i = 0; i < systemInfo.numMaxSlicesSupported; ++i)
6185             {
6186                 for (j = 0; j < (systemInfo.numMaxSubSlicesSupported / systemInfo.numMaxSlicesSupported); ++j)
6187                 {
6188                     if (systemInfo.sliceInfo[i].SubSliceInfo[j].Enabled && systemInfo.sliceInfo[i].Enabled)
6189                     {
6190                         if (curKernel < numKernelGroups)
6191                         {
6192                             if (kernelsSliceInfo[curKernel].numSubSlices == numSet)
6193                             {
6194                                 curKernel++;
6195                                 numSet = 0;
6196                             }
6197                         }
6198 
6199                         if (curKernel < numKernelGroups)
6200                         {
6201                             kernelsSliceInfo[curKernel].destination[numSet].slice = i;
6202                             kernelsSliceInfo[curKernel].destination[numSet].subSlice = j;
6203 
6204                             numSet++;
6205                         }
6206 
6207                         numSubSlicesEnabled++;
6208                     }
6209                 }
6210             }
6211 
6212 
6213             if (numSubSlicesEnabled != platformInfo.numSubSlices)
6214             {
6215                 // not enough slice information, do not assign sub-slice destination
6216                 sufficientSliceInfo = false;
6217             }
6218         }
6219 
6220         // set freq dispatch ratio for each group
6221         CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_SetKernelGrpFreqDispatch(parallelGraphInfo, groupInfo, numKernelGroups, &minSteps));
6222 
6223         // set dispatch pattern for kernel with no dependency
6224         for( i = 0; i < numKernels; ++i )
6225         {
6226             if( kernelParams[i]->kernelThreadSpaceParam.patternType == CM_NONE_DEPENDENCY )
6227             {
6228                 grpId = remapKrnToGrp[i];
6229                 allocSize = 0;
6230 
6231                 if( groupInfo[grpId].freqDispatch == 0 )
6232                 {
6233                     allocSize = minSteps;
6234                     groupInfo[grpId].freqDispatch = 1;
6235                 }
6236                 else
6237                 {
6238                     allocSize = minSteps * groupInfo[grpId].freqDispatch;
6239                     groupInfo[grpId].freqDispatch = groupInfo[grpId].freqDispatch * 2;
6240                 }
6241 
6242                 dispatchFreq[i] = (uint32_t*)MOS_AllocAndZeroMemory(sizeof(uint32_t)*allocSize);
6243                 if( !dispatchFreq[i] )
6244                 {
6245                     eStatus = MOS_STATUS_INVALID_PARAMETER;
6246                     CM_ASSERTMESSAGE("Memory allocation failed in EnqueueWithHints");
6247                     goto finish;
6248                 }
6249 
6250                 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_SetNoDependKernelDispatchPattern(kernelParams[i]->numThreads,
6251                     allocSize, dispatchFreq[i]));
6252             }
6253         }
6254     }
6255 
6256     CM_CHK_NULL_GOTOFINISH_MOSERROR(batchBuffer->pPrivateData);
6257     bbCmArgs = (PCM_HAL_BB_ARGS) batchBuffer->pPrivateData;
6258     if( bbCmArgs->refCount > 1 )
6259     {
6260 
6261         uint8_t *bBuffer = batchBuffer->pData + batchBuffer->iCurrent;
6262         updateCurrKernel = false;
6263         for( i = 0; i < totalNumThreads; ++i )
6264         {
6265             if( !singleSubSlice )
6266             {
6267                 if( (dispatchFreq[currentKernel][state->hintIndexes.dispatchIndexes[currentKernel]] == numDispatched) ||
6268                     (state->hintIndexes.kernelIndexes[currentKernel] >= kernelParams[currentKernel]->numThreads) )
6269                 {
6270                     numDispatched = 0;
6271                     numStepsDispatched++;
6272                     state->hintIndexes.dispatchIndexes[currentKernel]++;
6273 
6274                     if( state->hintIndexes.kernelIndexes[currentKernel] >= kernelParams[currentKernel]->numThreads )
6275                     {
6276                         updateCurrKernel = true;
6277                         groupInfo[remapKrnToGrp[currentKernel]].numKernelsFinished++;
6278                         if( groupInfo[remapKrnToGrp[currentKernel]].numKernelsFinished ==
6279                             groupInfo[remapKrnToGrp[currentKernel]].numKernelsInGroup )
6280                         {
6281                             groupInfo[remapKrnToGrp[currentKernel]].groupFinished = 1;
6282                         }
6283                         else
6284                         {
6285                             remapGrpToKrn[tmpIndex]++;
6286                         }
6287                     }
6288 
6289                     if( (groupInfo[remapKrnToGrp[currentKernel]].freqDispatch == numStepsDispatched) ||
6290                         updateCurrKernel )
6291                     {
6292                         numStepsDispatched = 0;
6293                         roundRobinCount++;
6294 
6295                         tmpIndex = roundRobinCount % numKernelGroups;
6296 
6297                         if( groupInfo[tmpIndex].groupFinished )
6298                         {
6299                             loopCount = 0;
6300                             while( (loopCount < numKernelGroups) && (!kernelFound) )
6301                             {
6302                                 roundRobinCount++;
6303                                 tmpIndex = roundRobinCount % numKernelGroups;
6304                                 if( state->hintIndexes.kernelIndexes[remapGrpToKrn[tmpIndex]] < kernelParams[remapGrpToKrn[tmpIndex]]->numThreads )
6305                                 {
6306                                     kernelFound = true;
6307                                 }
6308                                 loopCount++;
6309                             }
6310                             if( !kernelFound )
6311                             {
6312                                 // Error shouldn't be here
6313                                 // if still in for loop totalNumThreads, needs to be a kernel with threads left
6314                                 eStatus = MOS_STATUS_UNKNOWN;
6315                                 CM_ASSERTMESSAGE("Couldn't find kernel with threads left for EnqueueWithHints");
6316                                 goto finish;
6317                             }
6318                         }
6319                         currentKernel = remapGrpToKrn[tmpIndex];
6320                     }
6321                 }
6322             }
6323             else
6324             {
6325                  if( state->hintIndexes.kernelIndexes[currentKernel] >= kernelParams[currentKernel]->numThreads )
6326                  {
6327                      currentKernel++;
6328                  }
6329             }
6330 
6331             if( kernelParams[currentKernel]->kernelThreadSpaceParam.threadCoordinates )
6332             {
6333                 threadCoordinates.y = kernelParams[currentKernel]->kernelThreadSpaceParam.threadCoordinates[state->hintIndexes.kernelIndexes[currentKernel]].y;
6334                 threadCoordinates.mask = kernelParams[currentKernel]->kernelThreadSpaceParam.threadCoordinates[state->hintIndexes.kernelIndexes[currentKernel]].mask;
6335                 enableThreadSpace = true;
6336                 threadCoordinates.resetMask = kernelParams[currentKernel]->kernelThreadSpaceParam.threadCoordinates[state->hintIndexes.kernelIndexes[currentKernel]].resetMask;
6337             }
6338 
6339              if( enableThreadSpace )
6340              {
6341                  if( threadCoordinates.mask != CM_DEFAULT_THREAD_DEPENDENCY_MASK )
6342                  {
6343                      tmpThreadScoreboardMask = kernelScoreboardMask[currentKernel];
6344                      // do the remapping
6345                      for( k = 0; k < kernelParams[currentKernel]->kernelThreadSpaceParam.dependencyInfo.count; ++k )
6346                      {
6347                          if( (threadCoordinates.mask & CM_HINTS_LEASTBIT_MASK) == 0 )
6348                          {
6349                              CM_HAL_UNSETBIT(tmpThreadScoreboardMask, dependRemap[currentKernel][k]);
6350                          }
6351 
6352                          threadCoordinates.mask = threadCoordinates.mask >> 1;
6353                      }
6354                          scoreboardMask = tmpThreadScoreboardMask;
6355                  }
6356                  else
6357                  {
6358                      scoreboardMask = kernelScoreboardMask[currentKernel];
6359                  }
6360              }
6361              else
6362              {
6363                  threadCoordinates.y = state->hintIndexes.kernelIndexes[currentKernel] / kernelParams[currentKernel]->kernelThreadSpaceParam.threadSpaceWidth;
6364                  scoreboardMask = kernelScoreboardMask[currentKernel];
6365              }
6366 
6367              adjustedYCoord = 0;
6368              if( currentKernel > 0 )
6369              {
6370                  // if not first kernel, and has dependency,
6371                  // and along scoreboard border top need to mask out dependencies with y < 0
6372                  if( kernelScoreboardMask[currentKernel] )
6373                  {
6374                      if( threadCoordinates.y == 0 )
6375                      {
6376                          for( k = 0; k < vfeDependencyInfo.count; ++k )
6377                          {
6378                              if( vfeDependencyInfo.deltaY[k] < 0 )
6379                              {
6380                                  CM_HAL_UNSETBIT(scoreboardMask, k);
6381                              }
6382                          }
6383                      }
6384                  }
6385              }
6386 
6387              if( currentKernel < numKernels - 1 )
6388              {
6389                  // if not last kernel, and has dependency,
6390                  // along scoreboard border bottom need to mask out dependencies with y > 0
6391                  if( kernelScoreboardMask[currentKernel] )
6392                  {
6393                      if( threadCoordinates.y == (kernelParams[currentKernel]->kernelThreadSpaceParam.threadSpaceHeight - 1))
6394                      {
6395                          for( k = 0; k < vfeDependencyInfo.count; ++k)
6396                          {
6397                              if( vfeDependencyInfo.deltaY[k] > 0 )
6398                              {
6399                                  CM_HAL_UNSETBIT(scoreboardMask, k);
6400                              }
6401                          }
6402                      }
6403                  }
6404              }
6405 
6406             for( aIndex = 0; aIndex < kernelParams[currentKernel]->numArgs; aIndex++ )
6407             {
6408                 argParams[currentKernel] = &kernelParams[currentKernel]->argParams[aIndex];
6409                 index = state->hintIndexes.kernelIndexes[currentKernel] * argParams[currentKernel]->perThread;
6410 
6411                 if( (kernelParams[currentKernel]->cmFlags & CM_KERNEL_FLAGS_CURBE) && !argParams[currentKernel]->perThread )
6412                 {
6413                     continue;
6414                 }
6415 
6416                 CM_ASSERT(argParams[currentKernel]->payloadOffset < kernelParams[currentKernel]->payloadSize);
6417 
6418                 switch(argParams[currentKernel]->kind)
6419                 {
6420                 case CM_ARGUMENT_GENERAL:
6421                     break;
6422 
6423                 case CM_ARGUMENT_SAMPLER:
6424                     CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_SetupSamplerState(
6425                         state, kernelParams[currentKernel], argParams[currentKernel], &indexParams[currentKernel],
6426                         mediaIds[currentKernel], index, nullptr));
6427                     break;
6428 
6429                 case CM_ARGUMENT_SURFACEBUFFER:
6430                     CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_SetupBufferSurfaceState(
6431                         state, argParams[currentKernel], &indexParams[currentKernel],
6432                         bindingTableEntries[currentKernel], -1, index, nullptr));
6433                     break;
6434 
6435                 case CM_ARGUMENT_SURFACE2D_UP:
6436                     CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_Setup2DSurfaceUPState(
6437                         state, argParams[currentKernel], &indexParams[currentKernel],
6438                         bindingTableEntries[currentKernel], index, nullptr));
6439                     break;
6440 
6441                 case CM_ARGUMENT_SURFACE2DUP_SAMPLER:
6442                     CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_Setup2DSurfaceUPSamplerState(
6443                         state, argParams[currentKernel], &indexParams[currentKernel],
6444                         bindingTableEntries[currentKernel], index, nullptr));
6445                     break;
6446 
6447                 case CM_ARGUMENT_SURFACE2D_SAMPLER:
6448                     CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_Setup2DSurfaceSamplerState(
6449                         state, argParams[currentKernel], &indexParams[currentKernel],
6450                         bindingTableEntries[currentKernel], 0, nullptr));
6451                     break;
6452 
6453                 case CM_ARGUMENT_SURFACE2D:
6454                     CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_Setup2DSurfaceState(
6455                         state, argParams[currentKernel], &indexParams[currentKernel],
6456                         bindingTableEntries[currentKernel], index, nullptr));
6457                     break;
6458 
6459                 case CM_ARGUMENT_SURFACE3D:
6460                     CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_Setup3DSurfaceState(
6461                         state, argParams[currentKernel], &indexParams[currentKernel],
6462                         bindingTableEntries[currentKernel], index, nullptr));
6463                     break;
6464 
6465                 case CM_ARGUMENT_SURFACE_VME:
6466                     CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_SetupVmeSurfaceState(
6467                         state, argParams[currentKernel], &indexParams[currentKernel],
6468                         bindingTableEntries[currentKernel], 0, nullptr));
6469                     break;
6470 
6471                 case CM_ARGUMENT_SURFACE_SAMPLER8X8_VA:
6472                 case CM_ARGUMENT_SURFACE_SAMPLER8X8_AVS:
6473                     CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_SetupSampler8x8SurfaceState(
6474                         state, argParams[currentKernel], &indexParams[currentKernel],
6475                         bindingTableEntries[currentKernel], 0, nullptr));
6476                     break;
6477 
6478                 default:
6479                     eStatus = MOS_STATUS_INVALID_PARAMETER;
6480                     CM_ASSERTMESSAGE(
6481                         "Argument kind '%d' is not supported", argParams[currentKernel]->kind);
6482                     goto finish;
6483 
6484                  } // switch argKind
6485              } // for numArgs
6486 
6487             if( threadCoordinates.resetMask == CM_RESET_DEPENDENCY_MASK )
6488             {
6489                 MOS_SecureMemcpy(bBuffer + (CM_SCOREBOARD_MASK_POS_IN_MEDIA_OBJECT_CMD*sizeof(uint32_t)),
6490                     sizeof(uint8_t), &scoreboardMask, sizeof(uint8_t));
6491             }
6492 
6493             batchBuffer->iCurrent += cmdSizes[currentKernel];
6494             bBuffer += cmdSizes[currentKernel];
6495 
6496             state->hintIndexes.kernelIndexes[currentKernel]++;
6497             enableThreadSpace = false;
6498             kernelFound = false;
6499             updateCurrKernel = false;
6500             numDispatched++;
6501         } // for totalNumThreads
6502     } // if uiRefCount > 1
6503     else
6504     {
6505         uint8_t *bBuffer = batchBuffer->pData + batchBuffer->iCurrent;
6506         updateCurrKernel = false;
6507 
6508         for( i = 0; i < totalNumThreads; ++i)
6509         {
6510             if( !singleSubSlice )
6511             {
6512                 if( (dispatchFreq[currentKernel][state->hintIndexes.dispatchIndexes[currentKernel]] == numDispatched) ||
6513                     (state->hintIndexes.kernelIndexes[currentKernel] >= kernelParams[currentKernel]->numThreads) )
6514                 {
6515                     numDispatched = 0;
6516                     numStepsDispatched++;
6517                     state->hintIndexes.dispatchIndexes[currentKernel]++;
6518 
6519                     if( state->hintIndexes.kernelIndexes[currentKernel] >= kernelParams[currentKernel]->numThreads )
6520                     {
6521                         updateCurrKernel = true;
6522                         groupInfo[remapKrnToGrp[currentKernel]].numKernelsFinished++;
6523                         if( groupInfo[remapKrnToGrp[currentKernel]].numKernelsFinished ==
6524                             groupInfo[remapKrnToGrp[currentKernel]].numKernelsInGroup )
6525                         {
6526                             groupInfo[remapKrnToGrp[currentKernel]].groupFinished = 1;
6527                         }
6528                         else
6529                         {
6530                             remapGrpToKrn[tmpIndex]++;
6531                         }
6532                     }
6533 
6534                     if( (groupInfo[remapKrnToGrp[currentKernel]].freqDispatch == numStepsDispatched) ||
6535                         updateCurrKernel )
6536                     {
6537                         numStepsDispatched = 0;
6538                         roundRobinCount++;
6539 
6540                         tmpIndex = roundRobinCount % numKernelGroups;
6541 
6542                         if( groupInfo[tmpIndex].groupFinished )
6543                         {
6544                             loopCount = 0;
6545                             while( (loopCount < numKernelGroups) && (!kernelFound) )
6546                             {
6547                                 roundRobinCount++;
6548                                 tmpIndex = roundRobinCount % numKernelGroups;
6549                                 if( state->hintIndexes.kernelIndexes[remapGrpToKrn[tmpIndex]] < kernelParams[remapGrpToKrn[tmpIndex]]->numThreads )
6550                                 {
6551                                     kernelFound = true;
6552                                 }
6553                                 loopCount++;
6554                             }
6555                             if( !kernelFound )
6556                             {
6557                                 // Error shouldn't be here
6558                                 // if still in for loop totalNumThreads, needs to be a kernel with threads left
6559                                 eStatus = MOS_STATUS_UNKNOWN;
6560                                 CM_ASSERTMESSAGE("Couldn't find kernel with threads left for EnqueueWithHints");
6561                                 goto finish;
6562                             }
6563                         }
6564 
6565                         currentKernel = remapGrpToKrn[tmpIndex];
6566                     }
6567                 }
6568             }
6569             else
6570             {
6571                 if( state->hintIndexes.kernelIndexes[currentKernel] >= kernelParams[currentKernel]->numThreads )
6572                 {
6573                     currentKernel++;
6574                 }
6575             }
6576 
6577             if( kernelParams[currentKernel]->kernelThreadSpaceParam.threadCoordinates )
6578             {
6579                 threadCoordinates.x = kernelParams[currentKernel]->kernelThreadSpaceParam.threadCoordinates[state->hintIndexes.kernelIndexes[currentKernel]].x;
6580                 threadCoordinates.y = kernelParams[currentKernel]->kernelThreadSpaceParam.threadCoordinates[state->hintIndexes.kernelIndexes[currentKernel]].y;
6581                 threadCoordinates.mask = kernelParams[currentKernel]->kernelThreadSpaceParam.threadCoordinates[state->hintIndexes.kernelIndexes[currentKernel]].mask;
6582                 enableThreadSpace = true;
6583             }
6584 
6585             mediaObjectParams[currentKernel].VfeScoreboard.ScoreboardEnable =
6586                     (kernelParams[currentKernel]->kernelThreadSpaceParam.dependencyInfo.count == 0) ? 0:1;
6587 
6588             if( !singleSubSlice && systemInfo.isSliceInfoValid && sufficientSliceInfo )
6589             {
6590                 sliceIndex = kernelsSliceInfo[remapKrnToGrp[currentKernel]].counter % kernelsSliceInfo[remapKrnToGrp[currentKernel]].numSubSlices;
6591                 mediaObjectParams[currentKernel].dwSliceDestinationSelect = kernelsSliceInfo[remapKrnToGrp[currentKernel]].destination[sliceIndex].slice;
6592                 mediaObjectParams[currentKernel].dwHalfSliceDestinationSelect = kernelsSliceInfo[remapKrnToGrp[currentKernel]].destination[sliceIndex].subSlice;
6593                 mediaObjectParams[currentKernel].bForceDestination = true;
6594 
6595                 kernelsSliceInfo[remapKrnToGrp[currentKernel]].counter++;
6596             }
6597 
6598             if( enableThreadSpace )
6599             {
6600                 mediaObjectParams[currentKernel].VfeScoreboard.Value[0] = threadCoordinates.x;
6601                 mediaObjectParams[currentKernel].VfeScoreboard.Value[1] = threadCoordinates.y;
6602                 if( threadCoordinates.mask != CM_DEFAULT_THREAD_DEPENDENCY_MASK )
6603                 {
6604                     tmpThreadScoreboardMask = kernelScoreboardMask[currentKernel];
6605                     // do the remapping
6606                     for( k = 0; k < kernelParams[currentKernel]->kernelThreadSpaceParam.dependencyInfo.count; ++k )
6607                     {
6608                         if( (threadCoordinates.mask & CM_HINTS_LEASTBIT_MASK) == 0 )
6609                         {
6610                             CM_HAL_UNSETBIT(tmpThreadScoreboardMask, dependRemap[currentKernel][k]);
6611                         }
6612 
6613                         threadCoordinates.mask = threadCoordinates.mask >> 1;
6614                     }
6615 
6616                     mediaObjectParams[currentKernel].VfeScoreboard.ScoreboardMask = tmpThreadScoreboardMask;
6617                 }
6618                 else
6619                 {
6620                     mediaObjectParams[currentKernel].VfeScoreboard.ScoreboardMask = kernelScoreboardMask[currentKernel];
6621                 }
6622             }
6623             else
6624             {
6625                 mediaObjectParams[currentKernel].VfeScoreboard.Value[0] = state->hintIndexes.kernelIndexes[currentKernel] %
6626                         kernelParams[currentKernel]->kernelThreadSpaceParam.threadSpaceWidth;
6627                 mediaObjectParams[currentKernel].VfeScoreboard.Value[1] = state->hintIndexes.kernelIndexes[currentKernel] /
6628                         kernelParams[currentKernel]->kernelThreadSpaceParam.threadSpaceWidth;
6629                 mediaObjectParams[currentKernel].VfeScoreboard.ScoreboardMask = kernelScoreboardMask[currentKernel];
6630             }
6631 
6632              adjustedYCoord = 0;
6633              // adjust y coordinate for kernels after the first one
6634              if( currentKernel > 0 )
6635              {
6636                  // if not first kernel, and has dependency,
6637                  // and along scoreboard border need to mask out dependencies with y < 0
6638                  if( kernelScoreboardMask[currentKernel] )
6639                  {
6640                      if (mediaObjectParams[currentKernel].VfeScoreboard.Value[1] == 0)
6641                      {
6642                          for( k = 0; k < vfeDependencyInfo.count; ++k )
6643                          {
6644                              if( vfeDependencyInfo.deltaY[k] < 0 )
6645                              {
6646                                  CM_HAL_UNSETBIT(mediaObjectParams[currentKernel].VfeScoreboard.ScoreboardMask, k);
6647                              }
6648                          }
6649                      }
6650                  }
6651 
6652                  for( j = currentKernel; j > 0; --j )
6653                  {
6654                      adjustedYCoord += kernelParams[j-1]->kernelThreadSpaceParam.threadSpaceHeight;
6655                  }
6656              }
6657 
6658              if( currentKernel < numKernels - 1 )
6659              {
6660                  // if not last kernel, and has dependency,
6661                  // along scoreboard border bottom need to mask out dependencies with y > 0
6662                  if( kernelScoreboardMask[currentKernel] )
6663                  {
6664                      if (mediaObjectParams[currentKernel].VfeScoreboard.Value[1] ==
6665                            (kernelParams[currentKernel]->kernelThreadSpaceParam.threadSpaceHeight - 1))
6666                      {
6667                          for( k = 0; k < vfeDependencyInfo.count; ++k )
6668                          {
6669                              if( vfeDependencyInfo.deltaY[k] > 0 )
6670                              {
6671                                  CM_HAL_UNSETBIT(mediaObjectParams[currentKernel].VfeScoreboard.ScoreboardMask, k);
6672                              }
6673                          }
6674                      }
6675                  }
6676              }
6677 
6678             mediaObjectParams[currentKernel].VfeScoreboard.Value[1] =
6679                 mediaObjectParams[currentKernel].VfeScoreboard.Value[1] + adjustedYCoord;
6680 
6681             for( aIndex = 0; aIndex < kernelParams[currentKernel]->numArgs; aIndex++ )
6682             {
6683                 argParams[currentKernel] = &kernelParams[currentKernel]->argParams[aIndex];
6684                 index = state->hintIndexes.kernelIndexes[currentKernel] * argParams[currentKernel]->perThread;
6685 
6686                 if( (kernelParams[currentKernel]->cmFlags & CM_KERNEL_FLAGS_CURBE) && !argParams[currentKernel]->perThread )
6687                 {
6688                     continue;
6689                 }
6690 
6691                 CM_ASSERT(argParams[currentKernel]->payloadOffset < kernelParams[currentKernel]->payloadSize);
6692 
6693                 switch(argParams[currentKernel]->kind)
6694                 {
6695                 case CM_ARGUMENT_GENERAL:
6696                     MOS_SecureMemcpy(
6697                         cmdInline[currentKernel] + argParams[currentKernel]->payloadOffset,
6698                         argParams[currentKernel]->unitSize,
6699                         argParams[currentKernel]->firstValue + index * argParams[currentKernel]->unitSize,
6700                         argParams[currentKernel]->unitSize);
6701                     break;
6702 
6703                 case CM_ARGUMENT_SAMPLER:
6704                     CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_SetupSamplerState(
6705                         state, kernelParams[currentKernel], argParams[currentKernel], &indexParams[currentKernel],
6706                         mediaIds[currentKernel], index, cmdInline[currentKernel]));
6707                     break;
6708 
6709                 case CM_ARGUMENT_SURFACEBUFFER:
6710                     CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_SetupBufferSurfaceState(
6711                         state, argParams[currentKernel], &indexParams[currentKernel],
6712                         bindingTableEntries[currentKernel], -1, index, cmdInline[currentKernel]));
6713                     break;
6714 
6715                 case CM_ARGUMENT_SURFACE2D_UP:
6716                     CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_Setup2DSurfaceUPState(
6717                         state, argParams[currentKernel], &indexParams[currentKernel],
6718                         bindingTableEntries[currentKernel], index, cmdInline[currentKernel]));
6719                     break;
6720 
6721                 case CM_ARGUMENT_SURFACE2DUP_SAMPLER:
6722                     CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_Setup2DSurfaceUPSamplerState(
6723                         state, argParams[currentKernel], &indexParams[currentKernel],
6724                         bindingTableEntries[currentKernel], index, cmdInline[currentKernel]));
6725                     break;
6726 
6727                 case CM_ARGUMENT_SURFACE2D_SAMPLER:
6728                     CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_Setup2DSurfaceSamplerState(
6729                         state, argParams[currentKernel], &indexParams[currentKernel],
6730                         bindingTableEntries[currentKernel], index, cmdInline[currentKernel]));
6731                     break;
6732 
6733                 case CM_ARGUMENT_SURFACE2D:
6734                     CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_Setup2DSurfaceState(
6735                         state, argParams[currentKernel], &indexParams[currentKernel],
6736                         bindingTableEntries[currentKernel], index, cmdInline[currentKernel]));
6737                     break;
6738 
6739                 case CM_ARGUMENT_SURFACE3D:
6740                     CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_Setup3DSurfaceState(
6741                         state, argParams[currentKernel], &indexParams[currentKernel],
6742                         bindingTableEntries[currentKernel], index, cmdInline[currentKernel]));
6743                     break;
6744 
6745                 case CM_ARGUMENT_SURFACE_VME:
6746                     CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_SetupVmeSurfaceState(
6747                         state, argParams[currentKernel], &indexParams[currentKernel],
6748                         bindingTableEntries[currentKernel], 0, cmdInline[currentKernel]));
6749                     break;
6750 
6751                 case CM_ARGUMENT_SURFACE_SAMPLER8X8_VA:
6752                 case CM_ARGUMENT_SURFACE_SAMPLER8X8_AVS:
6753                     CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_SetupSampler8x8SurfaceState(
6754                         state, argParams[currentKernel], &indexParams[currentKernel],
6755                         bindingTableEntries[currentKernel], 0, cmdInline[currentKernel]));
6756                     break;
6757 
6758                 default:
6759                     eStatus = MOS_STATUS_INVALID_PARAMETER;
6760                     CM_ASSERTMESSAGE(
6761                         "Argument kind '%d' is not supported", argParams[currentKernel]->kind);
6762                     goto finish;
6763                 }
6764             }
6765 
6766             mediaObjectParams[currentKernel].pInlineData = cmdInline[currentKernel];
6767             state->renderHal->pMhwRenderInterface->AddMediaObject(nullptr, batchBuffer, &mediaObjectParams[currentKernel]);
6768 
6769             state->hintIndexes.kernelIndexes[currentKernel]++;
6770             enableThreadSpace = false;
6771             kernelFound = false;
6772             updateCurrKernel = false;
6773             numDispatched++;
6774         } // for totalNumThreads
6775     } // else refCount <= 1
6776 
6777     // setup global surfaces
6778     for( j = 0; j < numKernels; ++j )
6779     {
6780         for( i = 0; i < CM_MAX_GLOBAL_SURFACE_NUMBER; ++i )
6781         {
6782             if(( kernelParams[j]->globalSurface[i] & CM_SURFACE_MASK) != CM_NULL_SURFACE)
6783             {
6784                 CM_HAL_KERNEL_ARG_PARAM tmpArgParam;
6785                 argParam = &tmpArgParam;
6786 
6787                 tmpArgParam.kind = CM_ARGUMENT_SURFACEBUFFER;
6788                 tmpArgParam.payloadOffset = 0;
6789                 tmpArgParam.unitCount = 1;
6790                 tmpArgParam.unitSize = sizeof(uint32_t);
6791                 tmpArgParam.perThread = false;
6792                 tmpArgParam.firstValue = (uint8_t*)&kernelParams[j]->globalSurface[i];
6793 
6794                 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_SetupBufferSurfaceState(
6795                     state, argParam, &indexParams[j], bindingTableEntries[j],
6796                     (int16_t)i, 0, nullptr));
6797             }
6798         }
6799 
6800         // set number of samplers
6801         krnAllocations[j]->Params.Sampler_Count = indexParams[j].samplerIndexCount;
6802     }
6803 
6804     // check to make sure we did all threads for all kernels
6805     if (numTasks <= 1 || lastTask )
6806     {
6807         for( i = 0; i < numKernels; ++i )
6808         {
6809             if( state->hintIndexes.kernelIndexes[i] < kernelParams[i]->numThreads )
6810             {
6811                 eStatus = MOS_STATUS_INVALID_PARAMETER;
6812                 CM_ASSERTMESSAGE("Not all threads for all kernels were put into batch buffer");
6813                 goto finish;
6814             }
6815         }
6816     }
6817 
6818     if ( lastTask )
6819     {
6820         MOS_ZeroMemory(&state->hintIndexes.kernelIndexes, sizeof(uint32_t) * CM_MAX_TASKS_EU_SATURATION);
6821         MOS_ZeroMemory(&state->hintIndexes.dispatchIndexes, sizeof(uint32_t) * CM_MAX_TASKS_EU_SATURATION);
6822     }
6823 
6824 finish:
6825     // free memory
6826     if( mediaObjectParams )            MOS_FreeMemory(mediaObjectParams);
6827     if( kernelParams )                 MOS_FreeMemory(kernelParams);
6828     if( argParams )                    MOS_FreeMemory(argParams);
6829     if( cmdSizes )                    MOS_FreeMemory(cmdSizes);
6830     if( remapKrnToGrp )                MOS_FreeMemory(remapKrnToGrp);
6831     if( remapGrpToKrn )                MOS_FreeMemory(remapGrpToKrn);
6832     if( kernelScoreboardMask )         MOS_FreeMemory(kernelScoreboardMask);
6833     if( parallelGraphInfo )            MOS_FreeMemory(parallelGraphInfo);
6834     if( numKernelsPerGrp )             MOS_FreeMemory(numKernelsPerGrp);
6835     if( groupInfo )                    MOS_FreeMemory(groupInfo);
6836 
6837     if( cmdInline )
6838     {
6839         for( i = 0; i < numKernels; ++i )
6840         {
6841             if( cmdInline[i] )
6842                 MOS_FreeMemory(cmdInline[i]);
6843         }
6844        MOS_FreeMemory(cmdInline);
6845     }
6846 
6847     if( kernelsSliceInfo )
6848     {
6849         for( i = 0; i < numKernelGroups; ++i )
6850         {
6851             if( kernelsSliceInfo[i].destination )
6852                 MOS_FreeMemory(kernelsSliceInfo[i].destination);
6853         }
6854         MOS_FreeMemory(kernelsSliceInfo);
6855     }
6856 
6857     if( dependRemap )
6858     {
6859         for( i = 0; i < numKernels; ++i )
6860         {
6861             if( dependRemap[i] )
6862                 MOS_FreeMemory(dependRemap[i]);
6863         }
6864         MOS_FreeMemory(dependRemap);
6865     }
6866 
6867     if( dispatchFreq )
6868     {
6869         for( i = 0; i < numKernels; ++i )
6870         {
6871             if( dispatchFreq[i] )
6872                 MOS_FreeMemory(dispatchFreq[i]);
6873         }
6874         MOS_FreeMemory(dispatchFreq);
6875     }
6876 
6877     return eStatus;
6878 }
6879 
HalCm_ThreadsNumberPerGroup_MW(PCM_HAL_WALKER_PARAMS walkerParams)6880 uint32_t HalCm_ThreadsNumberPerGroup_MW(PCM_HAL_WALKER_PARAMS walkerParams)
6881 {
6882     int localInnerCount = 0, localMidCount = 0, localOuterCount = 0, globalInnerCount = 0, globalOuterCount = 0;
6883     int localInnerCountMax = 0, localMidCountMax = 0, localOuterCountMax = 0, globalInnerCountMax = 0;
6884     int midX = 0, midY = 0, midStep = 0;
6885     int outerX = 0, outerY = 0;
6886     int localInnerX = 0, localInnerY = 0;
6887     int blockSizeX = 0, blockSizeY = 0;
6888     //int x, y;
6889 
6890     int localLoopExecCount = walkerParams->localLoopExecCount;
6891     int globalLoopExecCount = walkerParams->globalLoopExecCount;
6892     int globalresX = walkerParams->globalResolution.x, globalresY = walkerParams->globalResolution.y;
6893     int globalOuterX = walkerParams->globalStart.x, globalOuterY = walkerParams->globalStart.y;
6894     int globalOuterStepX = walkerParams->globalOutlerLoopStride.x, globalOuterStepY = walkerParams->globalOutlerLoopStride.y;
6895     int globalInnerStepX = walkerParams->globalInnerLoopUnit.x, globalInnerStepY = walkerParams->globalInnerLoopUnit.y;
6896     int middleStepX = walkerParams->midLoopUnitX, middleStepY = walkerParams->midLoopUnitY, extraSteps = walkerParams->middleLoopExtraSteps;
6897     int localblockresX = walkerParams->blockResolution.x, localblockresY = walkerParams->blockResolution.y;
6898     int localStartX = walkerParams->localStart.x, localStartY = walkerParams->localStart.y;
6899     int localOuterStepX = walkerParams->localOutLoopStride.x, localOuterStepY = walkerParams->localOutLoopStride.y;
6900     int localInnerStepX = walkerParams->localInnerLoopUnit.x, localInnerStepY = walkerParams->localInnerLoopUnit.y;
6901 
6902     uint32_t threadsNumberPergroup = 0;
6903 
6904     //do global_outer_looper initialization
6905     while (((globalOuterX >= globalresX) && (globalInnerStepX < 0)) ||
6906         (((globalOuterX + localblockresX) < 0) && (globalInnerStepX > 0)) ||
6907         ((globalOuterY >= globalresY) && (globalInnerStepY < 0)) ||
6908         (((globalOuterX + localblockresY) < 0) && (globalInnerStepY > 0)))
6909     {
6910         globalOuterX += globalInnerStepX;
6911         globalOuterY += globalInnerStepY;
6912     }
6913 
6914     //global_ouer_loop_in_bounds()
6915     while ((globalOuterX < globalresX) &&
6916         (globalOuterY < globalresY) &&
6917         (globalOuterX + localblockresX > 0) &&
6918         (globalOuterY + localblockresY > 0) &&
6919         (globalOuterCount <= globalLoopExecCount))
6920     {
6921         int globalInnerX = globalOuterX;
6922         int globalInnerY = globalOuterY;
6923 
6924         if (globalInnerCountMax < globalInnerCount)
6925         {
6926             globalInnerCountMax = globalInnerCount;
6927         }
6928         globalInnerCount = 0;
6929 
6930         //global_inner_loop_in_bounds()
6931         while ((globalInnerX < globalresX) &&
6932             (globalInnerY < globalresY) &&
6933             (globalInnerX + localblockresX > 0) &&
6934             (globalInnerY + localblockresY > 0))
6935         {
6936             int globalInnerXCopy = globalInnerX;
6937             int globalInnerYCopy = globalInnerY;
6938             if (globalInnerX < 0)
6939                 globalInnerXCopy = 0;
6940             if (globalInnerY < 0)
6941                 globalInnerYCopy = 0;
6942 
6943             if (globalInnerX < 0)
6944                 blockSizeX = localblockresX + globalInnerX;
6945             else if ((globalresX - globalInnerX) < localblockresX)
6946                 blockSizeX = globalresX - globalInnerX;
6947             else
6948                 blockSizeX = localblockresX;
6949             if (globalInnerY < 0)
6950                 blockSizeY = localblockresY + globalInnerY;
6951             else if ((globalresY - globalInnerY) < localblockresY)
6952                 blockSizeY = globalresY - globalInnerY;
6953             else
6954                 blockSizeY = localblockresY;
6955 
6956             outerX = localStartX;
6957             outerY = localStartY;
6958 
6959             if (localOuterCountMax < localOuterCount)
6960             {
6961                 localOuterCountMax = localOuterCount;
6962             }
6963             localOuterCount = 0;
6964 
6965             while ((outerX >= blockSizeX && localInnerStepX < 0) ||
6966                 (outerX < 0 && localInnerStepX > 0) ||
6967                 (outerY >= blockSizeY && localInnerStepY < 0) ||
6968                 (outerY < 0 && localInnerStepY > 0))
6969             {
6970                 outerX += localInnerStepX;
6971                 outerY += localInnerStepY;
6972             }
6973 
6974             //local_outer_loop_in_bounds()
6975             while ((outerX < blockSizeX) &&
6976                 (outerY < blockSizeY) &&
6977                 (outerX >= 0) &&
6978                 (outerY >= 0) &&
6979                 (localOuterCount <= localLoopExecCount))
6980             {
6981                 midX = outerX;
6982                 midY = outerY;
6983                 midStep = 0;
6984 
6985                 if (localMidCountMax < localMidCount)
6986                 {
6987                     localMidCountMax = localMidCount;
6988                 }
6989                 localMidCount = 0;
6990 
6991                 //local_middle_steps_remaining()
6992                 while ((midStep <= extraSteps) &&
6993                     (midX < blockSizeX) &&
6994                     (midY < blockSizeY) &&
6995                     (midX >= 0) &&
6996                     (midY >= 0))
6997                 {
6998                     localInnerX = midX;
6999                     localInnerY = midY;
7000 
7001                     if (localInnerCountMax < localInnerCount)
7002                     {
7003                         localInnerCountMax = localInnerCount;
7004                     }
7005                     localInnerCount = 0;
7006 
7007                     //local_inner_loop_shrinking()
7008                     while ((localInnerX < blockSizeX) &&
7009                         (localInnerY < blockSizeY) &&
7010                         (localInnerX >= 0) &&
7011                         (localInnerY >= 0))
7012                     {
7013                         //x = localInnerX + globalInnerXCopy;
7014                         //y = localInnerY + globalInnerYCopy;
7015                         localInnerCount ++;
7016 
7017                         localInnerX += localInnerStepX;
7018                         localInnerY += localInnerStepY;
7019                     }
7020                     localMidCount++;
7021                     midStep++;
7022                     midX += middleStepX;
7023                     midY += middleStepY;
7024                 }
7025                 localOuterCount += 1;
7026                 outerX += localOuterStepX;
7027                 outerY += localOuterStepY;
7028                 while ((outerX >= blockSizeX && localInnerStepX < 0) ||
7029                     (outerX <0 && localInnerStepX > 0) ||
7030                     (outerY >= blockSizeY && localInnerStepY < 0) ||
7031                     (outerY <0 && localInnerStepY > 0))
7032                 {
7033                     outerX += localInnerStepX;
7034                     outerY += localInnerStepY;
7035                 }
7036             }
7037             globalInnerCount++;
7038             globalInnerX += globalInnerStepX;
7039             globalInnerY += globalInnerStepY;
7040         }
7041         globalOuterCount += 1;
7042         globalOuterX += globalOuterStepX;
7043         globalOuterY += globalOuterStepY;
7044         while (((globalOuterX >= globalresX) && (globalInnerStepX < 0)) ||
7045             (((globalOuterX + localblockresX) < 0) && (globalInnerStepX > 0)) ||
7046             ((globalOuterY >= globalresY) && (globalInnerStepY < 0)) ||
7047             (((globalOuterX + localblockresY) < 0) && (globalInnerStepY > 0)))
7048         {
7049             globalOuterX += globalInnerStepX;
7050             globalOuterY += globalInnerStepY;
7051         }
7052     }
7053 
7054     switch (walkerParams->groupIdLoopSelect)
7055     {
7056         case CM_MW_GROUP_COLORLOOP:
7057             threadsNumberPergroup = walkerParams->colorCountMinusOne + 1;
7058             break;
7059         case CM_MW_GROUP_INNERLOCAL:
7060             threadsNumberPergroup = localInnerCount * (walkerParams->colorCountMinusOne + 1);
7061             break;
7062         case CM_MW_GROUP_MIDLOCAL:
7063             threadsNumberPergroup = localMidCount * localInnerCount * (walkerParams->colorCountMinusOne + 1);
7064             break;
7065         case CM_MW_GROUP_OUTERLOCAL:
7066             threadsNumberPergroup = localOuterCount * localMidCount * localInnerCount * (walkerParams->colorCountMinusOne + 1);
7067             break;
7068         case CM_MW_GROUP_INNERGLOBAL:
7069             threadsNumberPergroup = globalInnerCount * localOuterCount * localMidCount * localInnerCount * (walkerParams->colorCountMinusOne + 1);
7070             break;
7071         default:
7072             threadsNumberPergroup = globalOuterCount * globalInnerCount * localOuterCount * localMidCount * localInnerCount * (walkerParams->colorCountMinusOne + 1);
7073             break;
7074     }
7075 
7076     return threadsNumberPergroup;
7077 }
7078 
HalCm_SetupMediaWalkerParams(PCM_HAL_STATE state,PCM_HAL_KERNEL_PARAM kernelParam)7079 MOS_STATUS HalCm_SetupMediaWalkerParams(
7080     PCM_HAL_STATE                 state,
7081     PCM_HAL_KERNEL_PARAM          kernelParam)
7082 {
7083     MOS_STATUS                      eStatus = MOS_STATUS_SUCCESS;
7084     PCM_HAL_TASK_PARAM              taskParam = state->taskParam;
7085     PCM_HAL_WALKER_PARAMS           walkerParams = &kernelParam->walkerParams;
7086 
7087     //Using global walker enable flag
7088     walkerParams->cmWalkerEnable = state->walkerParams.CmWalkerEnable;
7089     if (walkerParams->cmWalkerEnable)
7090     {
7091         // MEDIA_WALKER
7092         CM_HAL_KERNEL_THREADSPACE_PARAM kernelThreadSpace;
7093         if (kernelParam->kernelThreadSpaceParam.threadSpaceWidth)
7094         {
7095             kernelThreadSpace.threadSpaceWidth = kernelParam->kernelThreadSpaceParam.threadSpaceWidth;
7096             kernelThreadSpace.threadSpaceHeight = kernelParam->kernelThreadSpaceParam.threadSpaceHeight;
7097             kernelThreadSpace.patternType = kernelParam->kernelThreadSpaceParam.patternType;
7098             kernelThreadSpace.walkingPattern = kernelParam->kernelThreadSpaceParam.walkingPattern;
7099             kernelThreadSpace.groupSelect = kernelParam->kernelThreadSpaceParam.groupSelect;
7100             kernelThreadSpace.colorCountMinusOne = kernelParam->kernelThreadSpaceParam.colorCountMinusOne;
7101         }
7102         else
7103         {
7104             kernelThreadSpace.threadSpaceWidth = (uint16_t)taskParam->threadSpaceWidth;
7105             kernelThreadSpace.threadSpaceHeight = (uint16_t)taskParam->threadSpaceHeight;
7106             kernelThreadSpace.patternType = taskParam->dependencyPattern;
7107             kernelThreadSpace.walkingPattern = taskParam->walkingPattern;
7108             kernelThreadSpace.groupSelect = taskParam->mediaWalkerGroupSelect;
7109             kernelThreadSpace.colorCountMinusOne = taskParam->colorCountMinusOne;
7110         }
7111 
7112         // check for valid thread space width and height here since different from media object
7113         if (kernelThreadSpace.threadSpaceWidth > state->cmHalInterface->GetMediaWalkerMaxThreadWidth())
7114         {
7115             CM_ASSERTMESSAGE("Error: Exceeds the maximum thread space width.");
7116             eStatus = MOS_STATUS_INVALID_PARAMETER;
7117             goto finish;
7118         }
7119         if (kernelThreadSpace.threadSpaceHeight > state->cmHalInterface->GetMediaWalkerMaxThreadHeight())
7120         {
7121             CM_ASSERTMESSAGE("Error: Exceeds the maximum thread space height.");
7122             eStatus = MOS_STATUS_INVALID_PARAMETER;
7123             goto finish;
7124         }
7125 
7126         //walkerParams->InterfaceDescriptorOffset = mediaID;// mediaObjectParam.dwInterfaceDescriptorOffset;
7127         walkerParams->inlineDataLength = MOS_ALIGN_CEIL(kernelParam->indirectDataParam.indirectDataSize, 4);
7128         walkerParams->inlineData = kernelParam->indirectDataParam.indirectData;
7129 
7130         walkerParams->colorCountMinusOne = kernelThreadSpace.colorCountMinusOne;// taskParam->ColorCountMinusOne;
7131         walkerParams->groupIdLoopSelect = (uint32_t)kernelThreadSpace.groupSelect;
7132 
7133         CM_WALKING_PATTERN walkPattern = kernelThreadSpace.walkingPattern;
7134         switch (kernelThreadSpace.patternType)
7135         {
7136             case CM_NONE_DEPENDENCY:
7137                 break;
7138             case CM_HORIZONTAL_WAVE:
7139                 walkPattern = CM_WALK_HORIZONTAL;
7140                 break;
7141             case CM_VERTICAL_WAVE:
7142                 walkPattern = CM_WALK_VERTICAL;
7143                 break;
7144             case CM_WAVEFRONT:
7145                 walkPattern = CM_WALK_WAVEFRONT;
7146                 break;
7147             case CM_WAVEFRONT26:
7148                 walkPattern = CM_WALK_WAVEFRONT26;
7149                 break;
7150             case CM_WAVEFRONT26X:
7151                 if (kernelThreadSpace.threadSpaceWidth > 1)
7152                 {
7153                     walkPattern = CM_WALK_WAVEFRONT26X;
7154                 }
7155                 else
7156                 {
7157                     walkPattern = CM_WALK_DEFAULT;
7158                 }
7159                 break;
7160             case CM_WAVEFRONT26ZIG:
7161                 if (kernelThreadSpace.threadSpaceWidth > 2)
7162                 {
7163                     walkPattern = CM_WALK_WAVEFRONT26ZIG;
7164                 }
7165                 else
7166                 {
7167                     walkPattern = CM_WALK_DEFAULT;
7168                 }
7169                 break;
7170             default:
7171                 CM_ASSERTMESSAGE("Error: Invalid walking pattern.");
7172                 walkPattern = CM_WALK_DEFAULT;
7173                 break;
7174         }
7175         if (taskParam->walkingParamsValid)
7176         {
7177             CM_CHK_MOSSTATUS_GOTOFINISH(state->cmHalInterface->SetMediaWalkerParams
7178                 (taskParam->walkingParams, walkerParams));
7179 
7180             if (walkPattern == CM_WALK_HORIZONTAL || walkPattern == CM_WALK_DEFAULT)
7181             {
7182                 walkerParams->localEnd.x = walkerParams->blockResolution.x - 1;
7183             }
7184             else if (walkPattern == CM_WALK_VERTICAL)
7185             {
7186                 walkerParams->localEnd.y = walkerParams->blockResolution.y - 1;
7187             }
7188         }
7189         else if (kernelParam->kernelThreadSpaceParam.walkingParamsValid)
7190         {
7191             CM_CHK_MOSSTATUS_GOTOFINISH(state->cmHalInterface->SetMediaWalkerParams(
7192                 kernelParam->kernelThreadSpaceParam.walkingParams, walkerParams));
7193 
7194             if (walkPattern == CM_WALK_HORIZONTAL || walkPattern == CM_WALK_DEFAULT)
7195             {
7196                 walkerParams->localEnd.x = walkerParams->blockResolution.x - 1;
7197             }
7198             else if (walkPattern == CM_WALK_VERTICAL)
7199             {
7200                 walkerParams->localEnd.y = walkerParams->blockResolution.y - 1;
7201             }
7202 
7203         }
7204         else
7205         {
7206             //Local loop parameters
7207             walkerParams->blockResolution.x = kernelThreadSpace.threadSpaceWidth;
7208             walkerParams->blockResolution.y = kernelThreadSpace.threadSpaceHeight;
7209 
7210             walkerParams->localStart.x = 0;
7211             walkerParams->localStart.y = 0;
7212             walkerParams->localEnd.x = 0;
7213             walkerParams->localEnd.y = 0;
7214 
7215             walkerParams->globalLoopExecCount = 1;
7216             walkerParams->midLoopUnitX = 0;
7217             walkerParams->midLoopUnitY = 0;
7218             walkerParams->middleLoopExtraSteps = 0;
7219 
7220             // account for odd Height/Width for 26x and 26Zig
7221             uint16_t adjHeight = ((kernelThreadSpace.threadSpaceHeight + 1) >> 1) << 1;
7222             uint16_t adjWidth = ((kernelThreadSpace.threadSpaceWidth + 1) >> 1) << 1;
7223 
7224             uint32_t maxThreadWidth = state->cmHalInterface->GetMediaWalkerMaxThreadWidth();
7225             switch (walkPattern)
7226             {
7227                 case CM_WALK_DEFAULT:
7228                 case CM_WALK_HORIZONTAL:
7229                     if (kernelThreadSpace.threadSpaceWidth == kernelParam->numThreads &&
7230                         kernelThreadSpace.threadSpaceHeight == 1)
7231                     {
7232                         walkerParams->blockResolution.x = MOS_MIN(kernelParam->numThreads, maxThreadWidth);
7233                         walkerParams->blockResolution.y = 1 + kernelParam->numThreads / maxThreadWidth;
7234                     }
7235                     walkerParams->localLoopExecCount = walkerParams->blockResolution.y - 1;
7236 
7237                     walkerParams->localOutLoopStride.x = 0;
7238                     walkerParams->localOutLoopStride.y = 1;
7239                     walkerParams->localInnerLoopUnit.x = 1;
7240                     walkerParams->localInnerLoopUnit.y = 0;
7241 
7242                     walkerParams->localEnd.x = walkerParams->blockResolution.x - 1;
7243 
7244                     break;
7245 
7246                 case CM_WALK_WAVEFRONT:
7247                     walkerParams->localLoopExecCount = kernelThreadSpace.threadSpaceWidth + (kernelThreadSpace.threadSpaceHeight - 1) * 1 - 1;
7248 
7249                     walkerParams->localOutLoopStride.x = 1;
7250                     walkerParams->localOutLoopStride.y = 0;
7251                     walkerParams->localInnerLoopUnit.x = 0xFFFF;  // -1 in uint32_t:16
7252                     walkerParams->localInnerLoopUnit.y = 1;
7253                     break;
7254 
7255                 case CM_WALK_WAVEFRONT26:
7256                     walkerParams->globalResolution.x = kernelThreadSpace.threadSpaceWidth;
7257                     walkerParams->globalResolution.y = kernelThreadSpace.threadSpaceHeight;
7258                     walkerParams->localOutLoopStride.x = 1;
7259                     walkerParams->localOutLoopStride.y = 0;
7260                     walkerParams->localInnerLoopUnit.x = 0xFFFE;  // -2 in uint32_t:16
7261                     walkerParams->localInnerLoopUnit.y = 1;
7262                     walkerParams->localLoopExecCount = kernelThreadSpace.threadSpaceWidth +
7263                                                        (kernelThreadSpace.threadSpaceHeight - 1) * 2 - 1;
7264 
7265                     //localLoopExecCount has limitation, it should be less than 2^12
7266                     while (walkerParams->localLoopExecCount >= 0xFFF)
7267                     {
7268                         //separate to multiple global levels
7269                         if (walkerParams->blockResolution.x > (walkerParams->blockResolution.y * 2))
7270                         {
7271                             walkerParams->blockResolution.x = (walkerParams->blockResolution.x+1) >> 1;
7272                             walkerParams->globalLoopExecCount = (walkerParams->globalResolution.x +
7273                             walkerParams->blockResolution.x - 1) / walkerParams->blockResolution.x;
7274                         }
7275                         else
7276                         {
7277                             walkerParams->blockResolution.y = (walkerParams->blockResolution.y + 1) >> 1;
7278                         }
7279                         walkerParams->localLoopExecCount = walkerParams->blockResolution.x +
7280                                                            (walkerParams->blockResolution.y - 1) * 2 - 1;
7281                     }
7282                     walkerParams->globalOutlerLoopStride.x = walkerParams->blockResolution.x;
7283                     walkerParams->globalOutlerLoopStride.y = 0;
7284                     walkerParams->globalInnerLoopUnit.x = 0;
7285                     walkerParams->globalInnerLoopUnit.y = walkerParams->blockResolution.y;
7286                     break;
7287 
7288                 case CM_WALK_WAVEFRONT26X:
7289                 case CM_WALK_WAVEFRONT26XALT:
7290                     walkerParams->localLoopExecCount = 0x7ff;
7291                     walkerParams->globalLoopExecCount = 0;
7292 
7293                     walkerParams->localOutLoopStride.x = 1;
7294                     walkerParams->localOutLoopStride.y = 0;
7295                     walkerParams->localInnerLoopUnit.x = 0xFFFE;  // -2 in uint32_t:16
7296                     walkerParams->localInnerLoopUnit.y = 2;
7297 
7298                     walkerParams->middleLoopExtraSteps = 1;
7299                     walkerParams->midLoopUnitX = 0;
7300                     walkerParams->midLoopUnitY = 1;
7301                     break;
7302 
7303                 case CM_WALK_WAVEFRONT26ZIG:
7304                     walkerParams->localLoopExecCount = 1;
7305                     walkerParams->globalLoopExecCount = (adjHeight / 2 - 1) * 2 + (adjWidth / 2) - 1;
7306 
7307                     walkerParams->localOutLoopStride.x = 0;
7308                     walkerParams->localOutLoopStride.y = 1;
7309                     walkerParams->localInnerLoopUnit.x = 1;
7310                     walkerParams->localInnerLoopUnit.y = 0;
7311 
7312                     walkerParams->blockResolution.x = 2;
7313                     walkerParams->blockResolution.y = 2;
7314 
7315                     walkerParams->localEnd.x = walkerParams->blockResolution.x - 1;
7316                     break;
7317 
7318                 case CM_WALK_VERTICAL:
7319                     walkerParams->localLoopExecCount = walkerParams->blockResolution.x - 1;
7320 
7321                     walkerParams->localOutLoopStride.x = 1;
7322                     walkerParams->localOutLoopStride.y = 0;
7323                     walkerParams->localInnerLoopUnit.x = 0;
7324                     walkerParams->localInnerLoopUnit.y = 1;
7325 
7326                     walkerParams->localEnd.y = walkerParams->blockResolution.y - 1;
7327 
7328                     break;
7329 
7330                 case CM_WALK_WAVEFRONT45D:
7331                     walkerParams->localLoopExecCount = 0x7ff;
7332                     walkerParams->globalLoopExecCount = 0x7ff;
7333 
7334                     walkerParams->localStart.x = kernelThreadSpace.threadSpaceWidth;
7335                     walkerParams->localOutLoopStride.x = 1;
7336                     walkerParams->localOutLoopStride.y = 0;
7337                     walkerParams->localInnerLoopUnit.x = 0xFFFF;  // -1 in uint32_t:16
7338                     walkerParams->localInnerLoopUnit.y = 1;
7339                     break;
7340 
7341                 case CM_WALK_WAVEFRONT45XD_2:
7342                     walkerParams->localLoopExecCount = 0x7ff;
7343                     walkerParams->globalLoopExecCount = 0x7ff;
7344 
7345                     // Local
7346                     walkerParams->localStart.x = kernelThreadSpace.threadSpaceWidth;
7347                     walkerParams->localOutLoopStride.x = 1;
7348                     walkerParams->localOutLoopStride.y = 0;
7349                     walkerParams->localInnerLoopUnit.x = 0xFFFF;  // -1 in uint32_t:16
7350                     walkerParams->localInnerLoopUnit.y = 2;
7351 
7352                     // Mid
7353                     walkerParams->middleLoopExtraSteps = 1;
7354                     walkerParams->midLoopUnitX = 0;
7355                     walkerParams->midLoopUnitY = 1;
7356 
7357                     break;
7358 
7359                 case CM_WALK_WAVEFRONT26D:
7360                     walkerParams->localLoopExecCount = 0x7ff;
7361                     walkerParams->globalLoopExecCount = 0x7ff;
7362 
7363                     walkerParams->localStart.x = kernelThreadSpace.threadSpaceWidth;
7364                     walkerParams->localOutLoopStride.x = 1;
7365                     walkerParams->localOutLoopStride.y = 0;
7366                     walkerParams->localInnerLoopUnit.x = 0xFFFE;  // -2 in uint32_t:16
7367                     walkerParams->localInnerLoopUnit.y = 1;
7368                     break;
7369 
7370                 case CM_WALK_WAVEFRONT26XD:
7371                     walkerParams->localLoopExecCount = 0x7ff;
7372                     walkerParams->globalLoopExecCount = 0x7ff;
7373 
7374                     // Local
7375                     walkerParams->localStart.x = kernelThreadSpace.threadSpaceWidth;
7376                     walkerParams->localOutLoopStride.x = 1;
7377                     walkerParams->localOutLoopStride.y = 0;
7378                     walkerParams->localInnerLoopUnit.x = 0xFFFE;  // -2 in uint32_t:16
7379                     walkerParams->localInnerLoopUnit.y = 2;
7380 
7381                     // Mid
7382                     walkerParams->middleLoopExtraSteps = 1;
7383                     walkerParams->midLoopUnitX = 0;
7384                     walkerParams->midLoopUnitY = 1;
7385                     break;
7386 
7387                 default:
7388                     walkerParams->localLoopExecCount = MOS_MIN(kernelParam->numThreads, 0x3FF);
7389 
7390                     walkerParams->localOutLoopStride.x = 0;
7391                     walkerParams->localOutLoopStride.y = 1;
7392                     walkerParams->localInnerLoopUnit.x = 1;
7393                     walkerParams->localInnerLoopUnit.y = 0;
7394                     break;
7395             }
7396 
7397             //Global loop parameters: execution count, resolution and strides
7398             //Since no global loop, global resolution equals block resolution.
7399             walkerParams->globalStart.x = 0;
7400             walkerParams->globalStart.y = 0;
7401             walkerParams->globalOutlerLoopStride.y = 0;
7402 
7403             if (walkPattern == CM_WALK_WAVEFRONT26ZIG)
7404             {
7405                 walkerParams->globalResolution.x = kernelThreadSpace.threadSpaceWidth;
7406                 walkerParams->globalResolution.y = kernelThreadSpace.threadSpaceHeight;
7407                 walkerParams->globalOutlerLoopStride.x = 2;
7408                 walkerParams->globalInnerLoopUnit.x = 0xFFFC;
7409                 walkerParams->globalInnerLoopUnit.y = 2;
7410             }
7411             else if(walkPattern != CM_WALK_WAVEFRONT26)
7412             {
7413                 walkerParams->globalResolution.x = walkerParams->blockResolution.x;
7414                 walkerParams->globalResolution.y = walkerParams->blockResolution.y;
7415                 walkerParams->globalOutlerLoopStride.x = walkerParams->globalResolution.x;
7416                 walkerParams->globalInnerLoopUnit.x = 0;
7417                 walkerParams->globalInnerLoopUnit.y = walkerParams->globalResolution.y;
7418             }
7419         }
7420 
7421         //Need calculate number threads per group for media walker, the minimum value is 1
7422         if (kernelThreadSpace.groupSelect > CM_MW_GROUP_NONE)
7423         {
7424             kernelParam->numberThreadsInGroup = HalCm_ThreadsNumberPerGroup_MW(walkerParams);
7425         }
7426         else
7427         {
7428             kernelParam->numberThreadsInGroup = 1;
7429         }
7430     }
7431 
7432 finish:
7433     return eStatus;
7434 }
7435 
HalCm_AcquireSamplerStatistics(PCM_HAL_STATE state)7436 MOS_STATUS HalCm_AcquireSamplerStatistics(PCM_HAL_STATE state)
7437 {
7438     MOS_STATUS       eStatus = MOS_STATUS_SUCCESS;
7439     uint32_t i = 0;
7440 
7441     unsigned int maxBTIindex[MAX_ELEMENT_TYPE_COUNT] = {0}; //tempoary variable, it will hold the max BTI index in each element type
7442 
7443     /* enumerate through the samplerTable for the one in use, then count and analyze */
7444     for (i = 0; i < state->cmDeviceParam.maxSamplerTableSize; i++) {  //state->CmDeviceParam.iMaxSamplerTableSize;
7445 
7446         if (state->samplerTable[i].bInUse) {
7447             uint32_t samplerIndex = state->samplerIndexTable[i];
7448             if (samplerIndex != CM_INVALID_INDEX) {
7449                 MHW_SAMPLER_ELEMENT_TYPE elementType = state->samplerTable[i].ElementType;
7450                 maxBTIindex[elementType] = (maxBTIindex[elementType] > samplerIndex) ? maxBTIindex[elementType] : samplerIndex;
7451             }
7452             else
7453                 state->samplerStatistics.samplerCount[state->samplerTable[i].ElementType]++;
7454         }
7455 
7456     }
7457 
7458     int tempbase=0;
7459     state->samplerStatistics.samplerIndexBase[MHW_Sampler2Elements]
7460         = (state->samplerStatistics.samplerCount[MHW_Sampler2Elements]) ? 0 : -1;
7461     tempbase
7462         = state->samplerStatistics.samplerIndexBase[MHW_Sampler2Elements];
7463     state->samplerStatistics.samplerIndexBase[MHW_Sampler4Elements]
7464         = (state->samplerStatistics.samplerCount[MHW_Sampler4Elements]) ?
7465         ((tempbase == -1) ? 0 : INDEX_ALIGN(state->samplerStatistics.samplerCount[MHW_Sampler2Elements], 2, 4))
7466         : tempbase;
7467     tempbase
7468         = state->samplerStatistics.samplerIndexBase[MHW_Sampler4Elements];
7469     state->samplerStatistics.samplerIndexBase[MHW_Sampler8Elements]
7470         = (state->samplerStatistics.samplerCount[MHW_Sampler8Elements]) ?
7471         ((tempbase == -1) ? 0 : INDEX_ALIGN(state->samplerStatistics.samplerCount[MHW_Sampler4Elements], 4, 8))
7472         : tempbase;
7473     tempbase
7474         = state->samplerStatistics.samplerIndexBase[MHW_Sampler8Elements];
7475     state->samplerStatistics.samplerIndexBase[MHW_Sampler64Elements]
7476         = (state->samplerStatistics.samplerCount[MHW_Sampler64Elements]) ?
7477         ((tempbase == -1) ? 0 : INDEX_ALIGN(state->samplerStatistics.samplerCount[MHW_Sampler8Elements], 8, 64))
7478         : tempbase;
7479     tempbase
7480         = state->samplerStatistics.samplerIndexBase[MHW_Sampler64Elements];
7481     state->samplerStatistics.samplerIndexBase[MHW_Sampler128Elements]
7482         = (state->samplerStatistics.samplerCount[MHW_Sampler128Elements]) ?
7483         ((tempbase == -1) ? 0 : INDEX_ALIGN(state->samplerStatistics.samplerCount[MHW_Sampler64Elements], 64, 128))
7484         : tempbase;
7485 
7486     /* There are Sampler BTI, next step needs to consider it during calculate the base */
7487     for (int k = MHW_Sampler2Elements; k < MHW_Sampler128Elements; k++) {
7488         if (state->samplerStatistics.samplerIndexBase[k + 1] < maxBTIindex[k])
7489             state->samplerStatistics.samplerIndexBase[k + 1] = maxBTIindex[k];
7490     }
7491     return eStatus;
7492 }
7493 
7494 //*-----------------------------------------------------------------------------
7495 //| Purpose:  Initial setup of HW states for the kernel
7496 //| Returns:  Result of the operation
7497 //*-----------------------------------------------------------------------------
HalCm_SetupStatesForKernelInitial(PCM_HAL_STATE state,PRENDERHAL_MEDIA_STATE mediaState,PMHW_BATCH_BUFFER batchBuffer,int32_t taskId,PCM_HAL_KERNEL_PARAM kernelParam,PCM_HAL_INDEX_PARAM indexParam,uint32_t kernelCurbeOffset,int32_t & bindingTable,int32_t & mediaID,PRENDERHAL_KRN_ALLOCATION & krnAllocation)7498 MOS_STATUS HalCm_SetupStatesForKernelInitial(
7499     PCM_HAL_STATE                 state,
7500     PRENDERHAL_MEDIA_STATE        mediaState,
7501     PMHW_BATCH_BUFFER             batchBuffer,
7502     int32_t                       taskId,
7503     PCM_HAL_KERNEL_PARAM          kernelParam,
7504     PCM_HAL_INDEX_PARAM           indexParam,
7505     uint32_t                      kernelCurbeOffset,
7506     int32_t&                          bindingTable,
7507     int32_t&                          mediaID,
7508     PRENDERHAL_KRN_ALLOCATION    &krnAllocation)
7509 {
7510     MOS_STATUS                      eStatus = MOS_STATUS_SUCCESS;
7511     PRENDERHAL_INTERFACE            renderHal = state->renderHal;
7512     PRENDERHAL_STATE_HEAP           stateHeap = renderHal->pStateHeap;
7513     PCM_INDIRECT_SURFACE_INFO       indirectSurfaceInfo = kernelParam->indirectDataParam.surfaceInfo;
7514     PCM_GPGPU_WALKER_PARAMS         perKernelGpGpuWalkerParames = &kernelParam->gpgpuWalkerParams;
7515     UNUSED(batchBuffer);
7516     UNUSED(taskId);
7517 
7518     MHW_MEDIA_OBJECT_PARAMS         mediaObjectParam;
7519     PCM_HAL_KERNEL_ARG_PARAM        argParam;
7520     uint32_t                        hdrSize;
7521     uint32_t                        index;
7522     uint32_t                        value;
7523     uint32_t                        btIndex;
7524     uint32_t                        surfIndex;
7525     uint32_t                        aIndex;
7526     uint32_t                        idZ;
7527     uint32_t                        idY;
7528     uint32_t                        idX;
7529     uint32_t                        localIdIndex;
7530     CM_SURFACE_BTI_INFO             surfBTIInfo;
7531 
7532     bool                            vmeUsed = false;
7533     CM_PLATFORM_INFO                platformInfo;
7534     PRENDERHAL_MEDIA_STATE_LEGACY   mediaStateLegacy = (PRENDERHAL_MEDIA_STATE_LEGACY)mediaState;
7535 
7536     localIdIndex = kernelParam->localIdIndex;
7537 
7538     state->cmHalInterface->GetHwSurfaceBTIInfo(&surfBTIInfo);
7539 
7540     HalCm_PreSetBindingIndex(indexParam, CM_NULL_SURFACE_BINDING_INDEX, CM_NULL_SURFACE_BINDING_INDEX);
7541 
7542     HalCm_PreSetBindingIndex(indexParam, surfBTIInfo.reservedSurfaceStart,
7543         surfBTIInfo.reservedSurfaceStart + CM_MAX_GLOBAL_SURFACE_NUMBER - 1);
7544 
7545     if (kernelParam->indirectDataParam.surfaceCount)
7546     {
7547         for (index = 0; index < kernelParam->indirectDataParam.surfaceCount; index++)
7548         {
7549             value = (indirectSurfaceInfo + index)->bindingTableIndex;
7550             HalCm_PreSetBindingIndex(indexParam, value, value);
7551         }
7552     }
7553 
7554     // Get the binding table for this kernel
7555     CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pfnAssignBindingTable(renderHal, &bindingTable));
7556 
7557     if (state->dshEnabled)
7558     {
7559         // Kernels are already pre-loaded in GSH
7560         // krnAllocation is the head of a linked list
7561         if (!krnAllocation)
7562         {
7563             CM_ASSERTMESSAGE("Error: Invalid kernel allocation.");
7564             goto finish;
7565         }
7566     }
7567     else
7568     {
7569         // Load the Kernel to GSH
7570         CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_LoadKernel(
7571             state,
7572             kernelParam,
7573             0,
7574             krnAllocation));
7575     }
7576 
7577     // initialize curbe buffer
7578     if (kernelParam->totalCurbeSize > 0)
7579     {
7580         // Update Curbe offset after curbe load command
7581         if (state->dshEnabled)
7582         {
7583             mediaStateLegacy->pDynamicState->Curbe.iCurrent += MOS_ALIGN_CEIL(kernelParam->totalCurbeSize, state->renderHal->dwCurbeBlockAlign);
7584         }
7585         else
7586         {
7587             mediaStateLegacy->iCurbeOffset += MOS_ALIGN_CEIL(kernelParam->totalCurbeSize, state->renderHal->dwCurbeBlockAlign);
7588         }
7589     }
7590 
7591     //Setup  media walker parameters if it is
7592     CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_SetupMediaWalkerParams(state, kernelParam));
7593 
7594     // Allocate Interface Descriptor
7595     mediaID = HalCm_AllocateMediaID(
7596         state,
7597         kernelParam,
7598         krnAllocation,
7599         bindingTable,
7600         kernelCurbeOffset);
7601 
7602     if (mediaID < 0)
7603     {
7604         eStatus = MOS_STATUS_INVALID_PARAMETER;
7605         CM_ASSERTMESSAGE("Unable to get Media ID");
7606         goto finish;
7607     }
7608 
7609     // Setup the Media object
7610     hdrSize = renderHal->pHwSizes->dwSizeMediaObjectHeaderCmd;
7611     mediaObjectParam.dwInterfaceDescriptorOffset = mediaID;
7612     if (kernelParam->indirectDataParam.indirectDataSize)
7613     {
7614         mediaObjectParam.dwInlineDataSize = 0;
7615     }
7616     else
7617     {
7618         mediaObjectParam.dwInlineDataSize = MOS_MAX(kernelParam->payloadSize, 4);
7619     }
7620 
7621     // set surface state and binding table
7622     if (kernelParam->indirectDataParam.surfaceCount)
7623     {
7624         for (index = 0; index < kernelParam->indirectDataParam.surfaceCount; index++)
7625         {
7626             btIndex = (indirectSurfaceInfo + index)->bindingTableIndex;
7627             surfIndex = (indirectSurfaceInfo + index)->surfaceIndex;
7628             switch ((indirectSurfaceInfo + index)->kind)
7629             {
7630             case CM_ARGUMENT_SURFACEBUFFER:
7631                 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_SetupBufferSurfaceStateWithBTIndex(
7632                     state, bindingTable, surfIndex, btIndex, 0));
7633                 break;
7634 
7635             case CM_ARGUMENT_SURFACE2D:
7636                 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_Setup2DSurfaceStateWithBTIndex(
7637                     state, bindingTable, surfIndex, btIndex, 0));
7638                 break;
7639 
7640             case CM_ARGUMENT_SURFACE2D_UP:
7641                 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_Setup2DSurfaceUPStateWithBTIndex(
7642                     state, bindingTable, surfIndex, btIndex, 0));
7643                 break;
7644 
7645             case CM_ARGUMENT_SURFACE2D_SAMPLER:
7646                 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_Setup2DSurfaceStateWithBTIndex(
7647                     state, bindingTable, surfIndex, btIndex, 1));
7648                 break;
7649             case CM_ARGUMENT_SURFACE2DUP_SAMPLER:
7650                 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_Setup2DSurfaceUPStateWithBTIndex(
7651                     state, bindingTable, surfIndex, btIndex, 1));
7652                 break;
7653             case CM_ARGUMENT_SURFACE_SAMPLER8X8_AVS:
7654             case CM_ARGUMENT_SURFACE_SAMPLER8X8_VA:
7655                 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_SetupSampler8x8SurfaceStateWithBTIndex(
7656                     state, bindingTable, surfIndex, btIndex, 0, (CM_HAL_KERNEL_ARG_KIND)(indirectSurfaceInfo + index)->kind, 0));
7657                 break;
7658             case CM_ARGUMENT_SURFACE3D:
7659                 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_Setup3DSurfaceStateWithBTIndex(
7660                     state, bindingTable, surfIndex, btIndex));
7661                 break;
7662             default:
7663                 eStatus = MOS_STATUS_INVALID_PARAMETER;
7664                 CM_ASSERTMESSAGE("Indirect Data surface kind is not supported");
7665                 goto finish;
7666             }
7667         }
7668     }
7669 
7670     // set sampler bti
7671     if (kernelParam->samplerBTIParam.samplerCount > 0)
7672     {
7673         for (uint32_t i = 0; i < kernelParam->samplerBTIParam.samplerCount; i++)
7674         {
7675             HalCm_SetupSamplerStateWithBTIndex(state, kernelParam, &kernelParam->samplerBTIParam.samplerInfo[0], i, mediaID);
7676         }
7677     }
7678 
7679     if ( ( kernelParam->curbeSizePerThread > 0 ) && ( kernelParam->stateBufferType == CM_STATE_BUFFER_NONE ) )
7680     {
7681         uint8_t data[CM_MAX_THREAD_PAYLOAD_SIZE + 32];
7682         uint8_t curbe[CM_MAX_CURBE_SIZE_PER_TASK + 32];
7683 
7684         MOS_ZeroMemory(data, sizeof(data));
7685         MOS_ZeroMemory(curbe, sizeof(curbe));
7686         for (aIndex = 0; aIndex < kernelParam->numArgs; aIndex++)
7687         {
7688             argParam = &kernelParam->argParams[aIndex];
7689 
7690             if (argParam->perThread || argParam->isNull)
7691             {
7692                 continue;
7693             }
7694 
7695             switch (argParam->kind)
7696             {
7697             case CM_ARGUMENT_GENERAL:
7698             case CM_ARGUMENT_IMPLICT_GROUPSIZE:
7699             case CM_ARGUMENT_IMPLICT_LOCALSIZE:
7700             case CM_ARGUMENT_IMPLICIT_LOCALID:
7701             case CM_ARGUMENT_GENERAL_DEPVEC:
7702                 HalCm_SetArgData(argParam, 0, data);
7703                 break;
7704 
7705             case CM_ARGUMENT_SAMPLER:
7706                 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_SetupSamplerState(
7707                     state, kernelParam, argParam, indexParam, mediaID, 0, data));
7708                 break;
7709 
7710             case CM_ARGUMENT_SURFACEBUFFER:
7711                 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_SetupBufferSurfaceState(
7712                     state, argParam, indexParam, bindingTable, -1, 0, data));
7713                 break;
7714 
7715             case CM_ARGUMENT_SURFACE2D_UP:
7716                 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_Setup2DSurfaceUPState(
7717                     state, argParam, indexParam, bindingTable, 0, data));
7718                 break;
7719 
7720             case CM_ARGUMENT_SURFACE2DUP_SAMPLER:
7721                 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_Setup2DSurfaceUPSamplerState(
7722                     state, argParam, indexParam, bindingTable, 0, data));
7723                 break;
7724 
7725             case CM_ARGUMENT_SURFACE2D_SAMPLER:
7726                 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_Setup2DSurfaceSamplerState(
7727                     state, argParam, indexParam, bindingTable, 0, data));
7728                 break;
7729 
7730             case CM_ARGUMENT_SURFACE2D:
7731                 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_Setup2DSurfaceState(
7732                     state, argParam, indexParam, bindingTable, 0, data));
7733                 break;
7734 
7735             case CM_ARGUMENT_SURFACE3D:
7736                 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_Setup3DSurfaceState(
7737                     state, argParam, indexParam, bindingTable, 0, data));
7738                 break;
7739 
7740             case CM_ARGUMENT_SURFACE_VME:   // 3 surface indices
7741                 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_SetupVmeSurfaceState(
7742                     state, argParam, indexParam, bindingTable, 0, data));
7743                 vmeUsed = true;
7744                 break;
7745 
7746             case CM_ARGUMENT_SURFACE_SAMPLER8X8_AVS:   // sampler 8x8  surface
7747             case CM_ARGUMENT_SURFACE_SAMPLER8X8_VA:    // sampler 8x8  surface
7748                 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_SetupSampler8x8SurfaceState(
7749                     state, argParam, indexParam, bindingTable, 0, data));
7750                 break;
7751 
7752             case CM_ARGUMENT_STATE_BUFFER:
7753                 CM_CHK_MOSSTATUS_GOTOFINISH( HalCm_SetupStateBufferSurfaceState(
7754                     state, argParam, indexParam, bindingTable, 0, data ) );
7755                 break;
7756 
7757             case CM_ARGUMENT_SURFACE:
7758                 // Allow null surface
7759                 break;
7760             case CM_ARGUMENT_SURFACE2D_SCOREBOARD:
7761                 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_Setup2DSurfaceState(
7762                     state, argParam, indexParam, bindingTable, 0, data));
7763                 break;
7764 
7765             default:
7766                 eStatus = MOS_STATUS_INVALID_PARAMETER;
7767                 CM_ASSERTMESSAGE("Argument kind '%d' is not supported", argParam->kind);
7768                 goto finish;
7769             }
7770         }
7771 
7772         if (perKernelGpGpuWalkerParames->gpgpuEnabled)
7773         {
7774             uint32_t offset = 0;
7775 
7776             uint32_t localIdXOffset = kernelParam->argParams[localIdIndex].payloadOffset;
7777             uint32_t localIdYOffset = localIdXOffset + 4;
7778             uint32_t localIdZOffset = localIdXOffset + 8;
7779 
7780             //totalCurbeSize aligned when parsing task
7781             int32_t crossThreadSize = kernelParam->crossThreadConstDataLen;
7782 
7783             //Cross thread constant data
7784             MOS_SecureMemcpy(curbe + offset, crossThreadSize, data, crossThreadSize);
7785             offset += crossThreadSize;
7786 
7787             //Per-thread data
7788             for (idZ = 0; idZ < perKernelGpGpuWalkerParames->threadDepth; idZ++)
7789             {
7790                 for (idY = 0; idY < perKernelGpGpuWalkerParames->threadHeight; idY++)
7791                 {
7792                     for (idX = 0; idX < perKernelGpGpuWalkerParames->threadWidth; idX++)
7793                     {
7794                         *((uint32_t *)(data + localIdXOffset)) = idX;
7795                         *((uint32_t *)(data + localIdYOffset)) = idY;
7796                         *((uint32_t *)(data + localIdZOffset)) = idZ;
7797                         MOS_SecureMemcpy(curbe + offset, kernelParam->curbeSizePerThread, data + crossThreadSize, kernelParam->curbeSizePerThread);
7798                         offset += kernelParam->curbeSizePerThread;
7799                     }
7800                 }
7801             }
7802 
7803             // tell pfnLoadCurbeData the current curbe offset
7804             if (state->dshEnabled)
7805             {
7806                 PRENDERHAL_MEDIA_STATE_LEGACY pCurMediaStateLegacy = (PRENDERHAL_MEDIA_STATE_LEGACY)stateHeap->pCurMediaState;
7807                 PRENDERHAL_DYNAMIC_STATE dynamicState = pCurMediaStateLegacy->pDynamicState;
7808                 dynamicState->Curbe.iCurrent -= MOS_ALIGN_CEIL(kernelParam->totalCurbeSize, state->renderHal->dwCurbeBlockAlign);
7809                 kernelParam->curbeOffset = dynamicState->Curbe.iCurrent;
7810             }
7811             else
7812             {
7813                 stateHeap->pCurMediaState->iCurbeOffset -= MOS_ALIGN_CEIL(kernelParam->totalCurbeSize, state->renderHal->dwCurbeBlockAlign);
7814                 kernelParam->curbeOffset = stateHeap->pCurMediaState->iCurbeOffset;
7815             }
7816             // update curbe with data.
7817             renderHal->pfnLoadCurbeData(renderHal,
7818                 stateHeap->pCurMediaState,
7819                 curbe,
7820                 kernelParam->totalCurbeSize);
7821         }
7822         else
7823         {
7824             CM_ASSERT(kernelParam->totalCurbeSize == kernelParam->curbeSizePerThread);
7825 
7826             // tell pfnLoadCurbeData the current curbe offset
7827             if (state->dshEnabled)
7828             {
7829                 PRENDERHAL_MEDIA_STATE_LEGACY pCurMediaStateLegacy = (PRENDERHAL_MEDIA_STATE_LEGACY)stateHeap->pCurMediaState;
7830                 PRENDERHAL_DYNAMIC_STATE dynamicState = pCurMediaStateLegacy->pDynamicState;
7831                 dynamicState->Curbe.iCurrent -= MOS_ALIGN_CEIL(kernelParam->totalCurbeSize, state->renderHal->dwCurbeBlockAlign);
7832                 kernelParam->curbeOffset = dynamicState->Curbe.iCurrent;
7833             }
7834             else
7835             {
7836                 stateHeap->pCurMediaState->iCurbeOffset -= MOS_ALIGN_CEIL(kernelParam->totalCurbeSize, state->renderHal->dwCurbeBlockAlign);
7837                 kernelParam->curbeOffset = stateHeap->pCurMediaState->iCurbeOffset;
7838             }
7839             // update curbe with data.
7840             renderHal->pfnLoadCurbeData(renderHal,
7841                 stateHeap->pCurMediaState,
7842                 data,
7843                 kernelParam->totalCurbeSize);
7844         }
7845 
7846         if (state->cmHalInterface->IsOverridePowerOptionPerGpuContext() == false) // false means override per Batch.
7847         {
7848             if ((vmeUsed == true) && state->cmHalInterface->IsRequestShutdownSubslicesForVmeUsage())
7849             {
7850                 CM_CHK_MOSSTATUS_GOTOFINISH(state->pfnGetPlatformInfo(state, &platformInfo, true));
7851                 CM_POWER_OPTION  cmPower;
7852                 cmPower.nSlice = 1;
7853                 cmPower.nSubSlice = platformInfo.numSubSlices / 2;
7854                 cmPower.nEU = (uint16_t)platformInfo.numEUsPerSubSlice;
7855                 state->pfnSetPowerOption(state, &cmPower);
7856             }
7857         }
7858     }
7859 
7860 #if MDF_CURBE_DATA_DUMP
7861     if (state->dumpCurbeData)
7862     {
7863         HalCm_DumpCurbeData(state);
7864     }
7865 
7866 #endif
7867 
7868 #if MDF_INTERFACE_DESCRIPTOR_DATA_DUMP
7869     if (state->dumpIDData)
7870     {
7871         HalCm_DumpInterfaceDescriptorData(state);
7872     }
7873 #endif
7874 
7875 finish:
7876     return eStatus;
7877 }
7878 
HalCm_SetConditionalEndInfo(PCM_HAL_STATE state,PCM_HAL_CONDITIONAL_BB_END_INFO conditionalEndInfo,PMHW_MI_CONDITIONAL_BATCH_BUFFER_END_PARAMS conditionalBBEndParams,uint32_t index)7879 MOS_STATUS HalCm_SetConditionalEndInfo(
7880     PCM_HAL_STATE state,
7881     PCM_HAL_CONDITIONAL_BB_END_INFO conditionalEndInfo,
7882     PMHW_MI_CONDITIONAL_BATCH_BUFFER_END_PARAMS conditionalBBEndParams,
7883     uint32_t index
7884     )
7885 {
7886     if (index >= CM_MAX_CONDITIONAL_END_CMDS)
7887     {
7888         return MOS_STATUS_INVALID_PARAMETER;
7889     }
7890 
7891     MOS_ZeroMemory(&conditionalBBEndParams[index], sizeof(MHW_MI_CONDITIONAL_BATCH_BUFFER_END_PARAMS));
7892 
7893     conditionalBBEndParams[index].presSemaphoreBuffer = &(state->bufferTable[conditionalEndInfo[index].bufferTableIndex].osResource);
7894     conditionalBBEndParams[index].dwValue             = conditionalEndInfo[index].compareValue;
7895     conditionalBBEndParams[index].bDisableCompareMask = conditionalEndInfo[index].disableCompareMask;
7896     conditionalBBEndParams[index].dwOffset            = conditionalEndInfo[index].offset;
7897 
7898     return MOS_STATUS_SUCCESS;
7899 }
7900 
7901 //===============<Interface Functions>==========================================
7902 
7903 //*-----------------------------------------------------------------------------
7904 //| Purpose: Allocate Structures required for HW Rendering
7905 //| Returns: Result of the operation
7906 //*-----------------------------------------------------------------------------
HalCm_Allocate(PCM_HAL_STATE state)7907 MOS_STATUS HalCm_Allocate(
7908     PCM_HAL_STATE state)                                                       // [in] Pointer to CM State
7909 {
7910     MOS_STATUS                     eStatus;
7911     PCM_HAL_DEVICE_PARAM           deviceParam;
7912     PRENDERHAL_INTERFACE           renderHal;
7913     PRENDERHAL_STATE_HEAP_SETTINGS stateHeapSettings;
7914     uint32_t                       i;
7915     MOS_NULL_RENDERING_FLAGS       nullHWAccelerationEnable;
7916     RENDERHAL_SETTINGS_LEGACY      renderHalSettings;
7917     uint32_t                       maxTasks;
7918 
7919     PMHW_BATCH_BUFFER        batchBuffer = nullptr;
7920 
7921     //------------------------------------
7922     CM_ASSERT(state);
7923     //------------------------------------
7924 
7925     eStatus           = MOS_STATUS_UNKNOWN;
7926     deviceParam    = &state->cmDeviceParam;
7927     renderHal         = state->renderHal;
7928     stateHeapSettings = &renderHal->StateHeapSettings;
7929 
7930     stateHeapSettings->iCurbeSize        = CM_MAX_CURBE_SIZE_PER_TASK;
7931     stateHeapSettings->iMediaStateHeaps  = deviceParam->maxTasks + 1;              // + 1 to handle sync issues with current RenderHal impl (we can remove this once we insert sync value in 2nd level BB)
7932     stateHeapSettings->iMediaIDs         = deviceParam->maxKernelsPerTask;         // Number of Media IDs = Number of Kernels/Task
7933 
7934     stateHeapSettings->iKernelCount      = deviceParam->maxGshKernelEntries;
7935     stateHeapSettings->iKernelBlockSize  = deviceParam->maxKernelBinarySize;       // The kernel occupied memory need be this block size aligned 256K for IVB/HSW
7936     stateHeapSettings->iKernelHeapSize   = deviceParam->maxGshKernelEntries * CM_32K;                       // CM_MAX_GSH_KERNEL_ENTRIES * 32*1024;
7937     state->totalKernelSize              = (int32_t*)MOS_AllocAndZeroMemory(sizeof(int32_t) * deviceParam->maxGshKernelEntries);
7938     if(!state->totalKernelSize)
7939     {
7940         CM_ASSERTMESSAGE("Could not allocate enough memory for state->totalKernelSize\n");
7941         eStatus = MOS_STATUS_NO_SPACE;
7942         goto finish;
7943     }
7944 
7945     stateHeapSettings->iPerThreadScratchSize = deviceParam->maxPerThreadScratchSpaceSize;
7946     stateHeapSettings->iSipSize          = CM_MAX_SIP_SIZE;
7947     stateHeapSettings->iBindingTables    = deviceParam->maxKernelsPerTask;         // Number of Binding tables = Number of Kernels/Task
7948     stateHeapSettings->iSurfacesPerBT    = CM_MAX_SURFACE_STATES_PER_BT;             // Allocate Max Binding Table indices per binding table
7949     stateHeapSettings->iSurfaceStates    = CM_MAX_SURFACE_STATES;                    // Allocate Max Surfaces that can be indexed
7950     stateHeapSettings->iSamplersAVS      = deviceParam->maxAvsSamplers;            // Allocate Max AVS samplers
7951 
7952     // Initialize RenderHal Interface
7953     CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pfnInitialize(renderHal, nullptr));
7954 
7955     // Initialize Vebox Interface
7956     if (state->veboxInterface)
7957     {
7958         CM_CHK_MOSSTATUS_GOTOFINISH(state->veboxInterface->CreateHeap());
7959     }
7960 
7961     // Initialize the table only in Static Mode (DSH doesn't use this table at all)
7962     if (!state->dshEnabled)
7963     {
7964         // Init the data in kernel entries for Dynamic GSH
7965         for (int32_t kernelID = 0; kernelID < stateHeapSettings->iKernelCount; ++kernelID)
7966         {
7967             if (kernelID > 0)
7968             {
7969                 state->totalKernelSize[kernelID] = 0;
7970             }
7971             else
7972             {
7973                 state->totalKernelSize[kernelID] = stateHeapSettings->iKernelHeapSize;
7974             }
7975         }
7976         state->kernelNumInGsh = 1;
7977     }
7978 
7979     // Allocate BB (one for each media-state heap)
7980     state->numBatchBuffers = stateHeapSettings->iMediaStateHeaps;
7981     state->batchBuffers = (PMHW_BATCH_BUFFER)MOS_AllocAndZeroMemory(
7982                                     state->numBatchBuffers *
7983                                     sizeof(MHW_BATCH_BUFFER));
7984 
7985     CM_CHK_NULL_GOTOFINISH_MOSERROR(state->batchBuffers);
7986 
7987     batchBuffer = state->batchBuffers;
7988     for (i = 0; i < (uint32_t)state->numBatchBuffers; i ++, batchBuffer ++)
7989     {
7990         batchBuffer->dwSyncTag    = 0;
7991         batchBuffer->bMatch       = false;
7992         batchBuffer->iPrivateType = RENDERHAL_BB_TYPE_CM;
7993         batchBuffer->iPrivateSize = sizeof(CM_HAL_BB_ARGS);
7994         batchBuffer->pPrivateData = (PCM_HAL_BB_ARGS)MOS_AllocAndZeroMemory(sizeof(CM_HAL_BB_ARGS));
7995         CM_CHK_NULL_GOTOFINISH_MOSERROR(batchBuffer->pPrivateData);
7996         ((PCM_HAL_BB_ARGS)batchBuffer->pPrivateData)->refCount = 1;
7997     }
7998 
7999     // Allocate TimeStamp Buffer
8000     CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_AllocateTsResource(state));
8001 
8002     // Allocate tracker resources
8003     CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_AllocateTrackerResource(state));
8004 
8005     // Initialize dynamic general state heap
8006     CM_HAL_HEAP_PARAM heapParams;
8007     heapParams.behaviorGSH        = HeapManager::Behavior::destructiveExtend;
8008     heapParams.initialSizeGSH     = 0x0080000;
8009     heapParams.extendSizeGSH      = 0x0080000;
8010     heapParams.trackerProducer    = &state->renderHal->trackerProducer;
8011     CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_InitializeDynamicStateHeaps(state, &heapParams));
8012 
8013     CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_AllocateTables(state));
8014 
8015     // Allocate Task Param to hold max tasks
8016     state->taskParam = (PCM_HAL_TASK_PARAM)MOS_AllocAndZeroMemory(sizeof(CM_HAL_TASK_PARAM));
8017     CM_CHK_NULL_GOTOFINISH_MOSERROR(state->taskParam);
8018     state->currentTaskEntry = 0;
8019 
8020     // Allocate Task TimeStamp to hold time stamps
8021     state->taskTimeStamp = (PCM_HAL_TASK_TIMESTAMP)MOS_AllocAndZeroMemory(sizeof(CM_HAL_TASK_TIMESTAMP));
8022     CM_CHK_NULL_GOTOFINISH_MOSERROR(state->taskTimeStamp);
8023 
8024     // Setup Registration table entries
8025     state->surfaceRegTable.count      = state->cmDeviceParam.max2DSurfaceTableSize;
8026     state->surfaceRegTable.entries    = state->surf2DTable;
8027 
8028     maxTasks = state->cmDeviceParam.maxTasks;
8029     // Initialize the task status table
8030     MOS_FillMemory(state->taskStatusTable, (size_t)maxTasks, CM_INVALID_INDEX);
8031 
8032     // Init the null render flag
8033     nullHWAccelerationEnable  = state->osInterface->pfnGetNullHWRenderFlags(state->osInterface);
8034     state->nullHwRenderCm          = nullHWAccelerationEnable.Cm || nullHWAccelerationEnable.VPGobal;
8035 
8036     //during initialization stage to allocate sip resource and Get sip binary.
8037     if ((state->midThreadPreemptionDisabled == false)
8038      || (state->kernelDebugEnabled == true))
8039     {
8040         CM_CHK_MOSSTATUS_GOTOFINISH(state->cmHalInterface->AllocateSIPCSRResource());
8041         state->pfnGetSipBinary(state);
8042     }
8043 
8044     //Init flag for conditional batch buffer
8045     state->cbbEnabled = HalCm_IsCbbEnabled(state);
8046 
8047     //Turn Turbo boost on
8048     CM_CHK_MOSSTATUS_GOTOFINISH(state->pfnEnableTurboBoost(state));
8049     CM_CHK_NULL_GOTOFINISH_MOSERROR(state->osInterface);
8050     state->tsFrequency = state->osInterface->pfnGetTsFrequency(state->osInterface);
8051 
8052     if (state->refactor)
8053     {
8054         state->advExecutor = CmExtensionCreator<CmExecutionAdv>::CreateClass();
8055         if (state->advExecutor == nullptr)
8056         {
8057             CM_ASSERTMESSAGE("Could not allocate enough memory for state->advExecutor\n");
8058             eStatus = MOS_STATUS_NO_SPACE;
8059             goto finish;
8060         }
8061         state->advExecutor->Initialize(state);
8062     }
8063     else
8064     {
8065         state->advExecutor = nullptr;
8066     }
8067 
8068     eStatus = MOS_STATUS_SUCCESS;
8069 
8070 finish:
8071     return eStatus;
8072 }
8073 
HalCm_GetKernelPerfTag(PCM_HAL_STATE cmState,PCM_HAL_KERNEL_PARAM * kernelParams,uint32_t numKernels)8074 uint16_t HalCm_GetKernelPerfTag(
8075     PCM_HAL_STATE           cmState,
8076     PCM_HAL_KERNEL_PARAM    *kernelParams,
8077     uint32_t                numKernels)
8078 {
8079     using namespace std;
8080 
8081     CM_ASSERT(cmState);
8082     CM_ASSERT(kernelParams);
8083 
8084     int perfTagKernelNum = numKernels - 1;
8085     if (numKernels > MAX_COMBINE_NUM_IN_PERFTAG)
8086     {
8087         perfTagKernelNum = MAX_COMBINE_NUM_IN_PERFTAG - 1;
8088     }
8089 
8090     // get a combined kernel name
8091     uint32_t len = numKernels * CM_MAX_KERNEL_NAME_SIZE_IN_BYTE;
8092     char *combinedName = MOS_NewArray(char, len);
8093     if (combinedName == nullptr)
8094     { // Not need to abort the process as this is only for pnp profiling
8095         CM_ASSERTMESSAGE("Error: Memory allocation error in getPertTag.");
8096         return 0; // return the default perftag
8097     }
8098     CmSafeMemSet(combinedName, 0, len);
8099 
8100     MOS_SecureStrcat(combinedName, len, kernelParams[0]->kernelName);
8101     for (uint32_t i = 1; i < numKernels; i++)
8102     {
8103         MOS_SecureStrcat(combinedName, len, ";");
8104         MOS_SecureStrcat(combinedName, len, kernelParams[i]->kernelName);
8105     }
8106 
8107     // get perftag index
8108     int perfTagIndex = 0;
8109     map<string, int>::iterator ite = cmState->perfTagIndexMap[perfTagKernelNum]->find(combinedName);
8110     if (ite == cmState->perfTagIndexMap[perfTagKernelNum]->end())
8111     {
8112         if (cmState->currentPerfTagIndex[perfTagKernelNum] <= MAX_CUSTOMIZED_PERFTAG_INDEX)
8113         {
8114             cmState->perfTagIndexMap[perfTagKernelNum]->insert(pair<string, int>(combinedName, cmState->currentPerfTagIndex[perfTagKernelNum]));
8115             perfTagIndex = cmState->currentPerfTagIndex[perfTagKernelNum] ++;
8116         }
8117     }
8118     else
8119     {
8120         perfTagIndex = ite->second;
8121     }
8122 
8123     perfTagIndex = (perfTagIndex &0xFF) | (perfTagKernelNum << 8);
8124     MosSafeDeleteArray(combinedName);
8125     return (uint16_t)perfTagIndex;
8126 }
8127 
8128 //*-----------------------------------------------------------------------------
8129 //| Purpose: Executes the CM Task
8130 //| Returns: Result of the operation
8131 //*-----------------------------------------------------------------------------
HalCm_ExecuteTask(PCM_HAL_STATE state,PCM_HAL_EXEC_TASK_PARAM execParam)8132 MOS_STATUS HalCm_ExecuteTask(
8133     PCM_HAL_STATE           state,                                             // [in] Pointer to CM State
8134     PCM_HAL_EXEC_TASK_PARAM execParam)                                         // [in] Pointer to Task Param
8135 {
8136     MOS_STATUS              eStatus;
8137     PRENDERHAL_INTERFACE    renderHal;
8138     PRENDERHAL_MEDIA_STATE  mediaState;
8139     PMHW_BATCH_BUFFER       batchBuffer;
8140     PCM_HAL_BB_ARGS         bbCmArgs;
8141     PCM_HAL_KERNEL_PARAM    kernelParam;
8142     int32_t                 taskId;
8143     int32_t                 remBindingTables;
8144     int32_t                 bindingTable;
8145     int32_t                 bti;
8146     int32_t                 mediaID;
8147     PRENDERHAL_KRN_ALLOCATION krnAllocations[CM_MAX_KERNELS_PER_TASK];
8148     uint32_t                vfeCurbeSize;
8149     uint32_t                maxInlineDataSize, maxIndirectDataSize;
8150     uint32_t                i;
8151     void                    *cmdBuffer = nullptr;
8152     PCM_HAL_TASK_PARAM      taskParam = state->taskParam;
8153     uint32_t                btsizePower2;
8154     PMOS_INTERFACE          osInterface = nullptr;
8155 
8156     //-----------------------------------
8157     CM_ASSERT(state);
8158     CM_ASSERT(execParam);
8159     //-----------------------------------
8160 
8161     eStatus        = MOS_STATUS_SUCCESS;
8162     renderHal      = state->renderHal;
8163     mediaState     = nullptr;
8164     batchBuffer    = nullptr;
8165 
8166     if (execParam->numKernels > state->cmDeviceParam.maxKernelsPerTask)
8167     {
8168         eStatus = MOS_STATUS_INVALID_PARAMETER;
8169         CM_ASSERTMESSAGE("Number of Kernels per task exceeds maximum");
8170         goto finish;
8171     }
8172 
8173     // Reset states before execute
8174     // (clear allocations, get GSH allocation index + any additional housekeeping)
8175     state->osInterface->pfnResetOsStates(state->osInterface);
8176     CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pfnReset(renderHal));
8177 
8178     MOS_ZeroMemory(state->taskParam, sizeof(CM_HAL_TASK_PARAM));
8179 
8180     MOS_FillMemory(
8181         state->bti2DIndexTable,
8182         state->cmDeviceParam.max2DSurfaceTableSize * sizeof( CM_HAL_MULTI_USE_BTI_ENTRY ),
8183         CM_INVALID_INDEX );
8184 
8185     MOS_FillMemory(
8186         state->bti2DUPIndexTable,
8187         state->cmDeviceParam.max2DSurfaceUPTableSize * sizeof( CM_HAL_MULTI_USE_BTI_ENTRY ),
8188         CM_INVALID_INDEX );
8189 
8190     MOS_FillMemory(
8191         state->bti3DIndexTable,
8192         state->cmDeviceParam.max3DSurfaceTableSize * sizeof( CM_HAL_MULTI_USE_BTI_ENTRY ),
8193         CM_INVALID_INDEX );
8194 
8195     MOS_FillMemory(
8196         state->btiBufferIndexTable,
8197         state->cmDeviceParam.maxBufferTableSize * sizeof( CM_HAL_MULTI_USE_BTI_ENTRY ),
8198         CM_INVALID_INDEX );
8199 
8200     MOS_FillMemory(
8201         state->samplerIndexTable,
8202         state->cmDeviceParam.maxSamplerTableSize,
8203         CM_INVALID_INDEX);
8204 
8205     MOS_FillMemory(
8206         state->sampler8x8IndexTable,
8207         state->cmDeviceParam.maxSampler8x8TableSize,
8208         CM_INVALID_INDEX);
8209 
8210     state->walkerParams.CmWalkerEnable = 0;
8211 
8212     vfeCurbeSize = 0;
8213     maxInlineDataSize = 0;
8214     maxIndirectDataSize = 0;
8215 
8216     // Get the Task Id
8217     CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_GetNewTaskId(state, &taskId));
8218 
8219     // Parse the task
8220     CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_ParseTask(state, execParam));
8221 
8222     // Reset the SSH configuration according to the property of the task
8223     renderHal->pStateHeap->iBindingTableSize = MOS_ALIGN_CEIL(taskParam->surfacePerBT *  // Reconfigure the binding table size
8224                                                  renderHal->pRenderHalPltInterface->GetBTStateCmdSize(), renderHal->StateHeapSettings.iBTAlignment);
8225 
8226     taskParam->surfacePerBT = renderHal->pStateHeap->iBindingTableSize/renderHal->pRenderHalPltInterface->GetBTStateCmdSize();
8227 
8228     renderHal->StateHeapSettings.iBindingTables = renderHal->StateHeapSettings.iBindingTables *             // Reconfigure the binding table number
8229                                                          renderHal->StateHeapSettings.iSurfacesPerBT / taskParam->surfacePerBT;
8230 
8231     renderHal->StateHeapSettings.iSurfacesPerBT = taskParam->surfacePerBT;                            // Reconfigure the surface per BT
8232 
8233     if (execParam->numKernels > (uint32_t)renderHal->StateHeapSettings.iBindingTables)
8234     {
8235         eStatus = MOS_STATUS_INVALID_PARAMETER;
8236         CM_ASSERTMESSAGE("Number of Kernels per task exceeds the number can be hold by binding table");
8237         goto finish;
8238     }
8239 
8240     if (execParam->kernelDebugEnabled && Mos_ResourceIsNull(&state->sipResource.osResource))
8241     {
8242        HalCm_AllocateSipResource( state); // create  sip resource if it does not exist
8243     }
8244 
8245     // Assign a MediaState from the MediaStateHeap
8246     // !!!! THIS MUST BE BEFORE Getting the BATCH_BUFFER !!!
8247     // since this method syncs the batch buffer and media state.
8248     if (state->dshEnabled)
8249     {
8250         if ( execParam->userDefinedMediaState != nullptr )
8251         {
8252             // use exsiting media state as current state
8253             mediaState = static_cast< PRENDERHAL_MEDIA_STATE >( execParam->userDefinedMediaState );
8254 
8255             // update current state to dsh
8256             renderHal->pStateHeap->pCurMediaState = mediaState;
8257             // Refresh sync tag for all media states in submitted queue
8258             state->criticalSectionDSH->Acquire();
8259             renderHal->pfnRefreshSync( renderHal );
8260             state->criticalSectionDSH->Release();
8261         }
8262         else
8263         {
8264             // Obtain media state configuration - Curbe, Samplers (3d/AVS/VA), 8x8 sampler table, Media IDs, Kernel Spill area
8265             RENDERHAL_DYNAMIC_MEDIA_STATE_PARAMS params;
8266             state->criticalSectionDSH->Acquire();
8267             HalCm_DSH_GetDynamicStateConfiguration( state, &params, execParam->numKernels, execParam->kernels, execParam->kernelCurbeOffset );
8268 
8269             // Prepare Media States to accommodate all parameters - Curbe, Samplers (3d/AVS/VA), 8x8 sampler table, Media IDs
8270             mediaState = renderHal->pfnAssignDynamicState( renderHal, &params, RENDERHAL_COMPONENT_CM );
8271             state->criticalSectionDSH->Release();
8272         }
8273     }
8274     else
8275     {
8276         mediaState = renderHal->pfnAssignMediaState(renderHal, RENDERHAL_COMPONENT_CM);
8277     }
8278     CM_CHK_NULL_GOTOFINISH_MOSERROR(mediaState);
8279 
8280     // Assign/Reset SSH instance
8281     CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pfnAssignSshInstance(renderHal));
8282 
8283     // Dynamic Batch Buffer allocation
8284 
8285     if (!state->walkerParams.CmWalkerEnable)
8286     {
8287         // Get the Batch buffer
8288         CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_GetBatchBuffer(state, execParam->numKernels, execParam->kernels, &batchBuffer));
8289         CM_CHK_NULL_GOTOFINISH_MOSERROR(batchBuffer);
8290         bbCmArgs = (PCM_HAL_BB_ARGS)batchBuffer->pPrivateData;
8291 
8292         // Lock the batch buffer
8293         if ( (bbCmArgs->refCount == 1) ||
8294              (state->taskParam->reuseBBUpdateMask == 1) )
8295         {
8296             CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pfnLockBB(renderHal, batchBuffer));
8297         }
8298     }
8299 
8300     if (state->useNewSamplerHeap == false)
8301     {
8302         HalCm_AcquireSamplerStatistics(state);
8303     }
8304 
8305     // Load all kernels in the same state heap - expand ISH if necessary BEFORE programming media states.
8306     // This is better than having to expand ISH in the middle of loading, when part of MediaIDs are
8307     // already programmed - not a problem in the old implementation where it would simply remove old
8308     // kernels out of the way.
8309     if (state->dshEnabled)
8310     {
8311         CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_DSH_LoadKernelArray(state, execParam->kernels, execParam->numKernels, krnAllocations));
8312     }
8313 
8314     for (i = 0; i < execParam->numKernels; i++)
8315     {
8316         CM_HAL_INDEX_PARAM indexParam;
8317         MOS_ZeroMemory(&indexParam, sizeof(CM_HAL_INDEX_PARAM));
8318         kernelParam = execParam->kernels[i];
8319 
8320         CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_SetupStatesForKernelInitial(state, mediaState, batchBuffer, taskId, kernelParam, &indexParam,
8321             execParam->kernelCurbeOffset[i], bti, mediaID, krnAllocations[i]));
8322 
8323         CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_FinishStatesForKernel(state, mediaState, batchBuffer, taskId, kernelParam, i, &indexParam,
8324             bti, mediaID, krnAllocations[i]));
8325 
8326         vfeCurbeSize += MOS_ALIGN_CEIL(kernelParam->totalCurbeSize, state->renderHal->dwCurbeBlockAlign);
8327         if (kernelParam->payloadSize > maxInlineDataSize)
8328         {
8329             maxInlineDataSize = kernelParam->payloadSize;
8330         }
8331         if (kernelParam->indirectDataParam.indirectDataSize > maxIndirectDataSize)
8332         {
8333             maxIndirectDataSize = kernelParam->indirectDataParam.indirectDataSize;
8334         }
8335 
8336         if (execParam->conditionalEndBitmap & (uint64_t)1 << i)
8337         {
8338             CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_SetConditionalEndInfo(state, taskParam->conditionalEndInfo, taskParam->conditionalBBEndParams, i));
8339         }
8340     }
8341 
8342     // Store the Max Payload Sizes in the Task params
8343     state->taskParam->vfeCurbeSize = vfeCurbeSize;
8344     if (maxIndirectDataSize)
8345     {
8346         state->taskParam->urbEntrySize = maxIndirectDataSize;
8347     }
8348     else
8349     {
8350         state->taskParam->urbEntrySize = maxInlineDataSize;
8351     }
8352 
8353     // We may have to send additional Binding table commands in command buffer.
8354     // This is needed because the surface offset (from the base on SSH)
8355     // calculation takes into account the max binding tables allocated in the
8356     // SSH.
8357     remBindingTables = renderHal->StateHeapSettings.iBindingTables - execParam->numKernels;
8358 
8359     if (remBindingTables > 0)
8360     {
8361         for (i = 0; i < (uint32_t)remBindingTables; i++)
8362         {
8363             CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pfnAssignBindingTable(
8364                     renderHal,
8365                     &bindingTable));
8366         }
8367     }
8368 
8369     // until now, we know binding table index for debug surface
8370     // let's get system thread
8371     osInterface = state->osInterface;
8372     osInterface->pfnResetPerfBufferID(osInterface);
8373     if (osInterface->pfnIsPerfTagSet(osInterface) == false)
8374     {
8375         osInterface->pfnIncPerfFrameID(osInterface);
8376         uint16_t perfTag = HalCm_GetKernelPerfTag(state, execParam->kernels, execParam->numKernels);
8377         osInterface->pfnSetPerfTag(osInterface, perfTag);
8378     }
8379 #if (_RELEASE_INTERNAL || _DEBUG)
8380 #if defined(CM_DIRECT_GUC_SUPPORT)
8381     // Update the task ID table
8382     state->taskStatusTable[taskId] = (char)taskId;
8383 
8384     //for GuC direct submission, need to send out dummy command buffer to make sure PDP table got binded
8385     CM_CHK_MOSSTATUS_GOTOFINISH(state->cmHalInterface->SubmitDummyCommands(
8386         batchBuffer, taskId, execParam->kernels, &cmdBuffer));
8387 
8388     /* make sure Dummy submission is done */
8389 
8390     CM_HAL_QUERY_TASK_PARAM queryParam;
8391 
8392     queryParam.taskId = taskId;
8393     queryParam.status = CM_TASK_IN_PROGRESS;
8394 
8395     do {
8396         state->pfnQueryTask(state, &queryParam);
8397     } while (queryParam.status != CM_TASK_FINISHED);
8398 
8399 #endif
8400 #endif
8401 
8402     // Submit HW commands and states
8403     CM_CHK_MOSSTATUS_GOTOFINISH(state->cmHalInterface->SubmitCommands(
8404                     batchBuffer, taskId, execParam->kernels, &cmdBuffer));
8405 
8406     // Set the Task ID
8407     execParam->taskIdOut = taskId;
8408 
8409     // Set OS data
8410     if(cmdBuffer)
8411     {
8412         execParam->osData = cmdBuffer;
8413     }
8414 
8415     // Update the task ID table
8416     state->taskStatusTable[taskId] = (char)taskId;
8417 
8418 finish:
8419 
8420     if (state->dshEnabled)
8421     {
8422         state->criticalSectionDSH->Acquire();
8423         if (mediaState && eStatus != MOS_STATUS_SUCCESS)
8424         {
8425             // Failed, release media state and heap resources
8426             renderHal->pfnReleaseDynamicState(renderHal, mediaState);
8427         }
8428         else
8429         {
8430             renderHal->pfnSubmitDynamicState(renderHal, mediaState);
8431         }
8432         state->criticalSectionDSH->Release();
8433     }
8434 
8435     if (batchBuffer)  // for Media Walker, batchBuffer is empty
8436     {
8437         if (batchBuffer->bLocked)
8438         {
8439             // Only happens in Error cases
8440             CM_CHK_NULL_RETURN_MOSERROR(batchBuffer->pPrivateData);
8441             if (((PCM_HAL_BB_ARGS)batchBuffer->pPrivateData)->refCount == 1)
8442             {
8443                 renderHal->pfnUnlockBB(renderHal, batchBuffer);
8444             }
8445         }
8446     }
8447 
8448     return eStatus;
8449 }
8450 
8451 //*-----------------------------------------------------------------------------
8452 //| Purpose: Executes the CM Group Task
8453 //| Returns: Result of the operation
8454 //*-----------------------------------------------------------------------------
HalCm_ExecuteGroupTask(PCM_HAL_STATE state,PCM_HAL_EXEC_GROUP_TASK_PARAM execGroupParam)8455 MOS_STATUS HalCm_ExecuteGroupTask(
8456     PCM_HAL_STATE                   state,           // [in] Pointer to CM State
8457     PCM_HAL_EXEC_GROUP_TASK_PARAM   execGroupParam)  // [in] Pointer to Task Param
8458 {
8459     MOS_STATUS              eStatus = MOS_STATUS_SUCCESS;
8460     PRENDERHAL_INTERFACE     renderHal = state->renderHal;
8461     CM_HAL_INDEX_PARAM      indexParam;
8462     int32_t                 taskId;
8463     uint32_t                remBindingTables;
8464     int32_t                 bindingTable;
8465     int32_t                 bti;
8466     int32_t                 mediaID;
8467     PRENDERHAL_MEDIA_STATE  mediaState = nullptr;
8468     uint32_t                i;
8469     void                    *cmdBuffer   = nullptr;
8470     PCM_HAL_KERNEL_PARAM    kernelParam = nullptr;
8471     PCM_HAL_TASK_PARAM      taskParam = state->taskParam;
8472     uint32_t                btsizePower2;
8473     uint32_t                vfeCurbeSize = 0;
8474     PRENDERHAL_KRN_ALLOCATION krnAllocations[CM_MAX_KERNELS_PER_TASK];
8475     PMOS_INTERFACE          osInterface = nullptr;
8476 
8477     //-----------------------------------
8478     CM_ASSERT(state);
8479     CM_ASSERT(execGroupParam);
8480     //-----------------------------------
8481 
8482     MOS_ZeroMemory(state->taskParam, sizeof(CM_HAL_TASK_PARAM));
8483     MOS_ZeroMemory(&indexParam, sizeof(CM_HAL_INDEX_PARAM));
8484 
8485     MOS_FillMemory(
8486         state->bti2DIndexTable,
8487         state->cmDeviceParam.max2DSurfaceTableSize * sizeof( CM_HAL_MULTI_USE_BTI_ENTRY ),
8488         CM_INVALID_INDEX );
8489 
8490     MOS_FillMemory(
8491         state->bti2DUPIndexTable,
8492         state->cmDeviceParam.max2DSurfaceUPTableSize * sizeof( CM_HAL_MULTI_USE_BTI_ENTRY ),
8493         CM_INVALID_INDEX );
8494 
8495     MOS_FillMemory(
8496         state->bti3DIndexTable,
8497         state->cmDeviceParam.max3DSurfaceTableSize * sizeof( CM_HAL_MULTI_USE_BTI_ENTRY ),
8498         CM_INVALID_INDEX );
8499 
8500     MOS_FillMemory(
8501         state->btiBufferIndexTable,
8502         state->cmDeviceParam.maxBufferTableSize * sizeof( CM_HAL_MULTI_USE_BTI_ENTRY ),
8503         CM_INVALID_INDEX );
8504     MOS_FillMemory(
8505         state->samplerIndexTable,
8506         state->cmDeviceParam.maxSamplerTableSize,
8507         CM_INVALID_INDEX);
8508     MOS_FillMemory(
8509         state->sampler8x8IndexTable,
8510         state->cmDeviceParam.maxSampler8x8TableSize,
8511         CM_INVALID_INDEX);
8512 
8513     // Reset states before execute
8514     // (clear allocations, get GSH allocation index + any additional housekeeping)
8515     state->osInterface->pfnResetOsStates(state->osInterface);
8516     CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pfnReset(renderHal));
8517 
8518     state->walkerParams.CmWalkerEnable = 0;
8519     state->taskParam->blGpGpuWalkerEnabled = true;
8520 
8521     // Get the Task Id
8522     CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_GetNewTaskId(state, &taskId));
8523 
8524     // Parse the task
8525     CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_ParseGroupTask(state, execGroupParam));
8526 
8527     // Reset the SSH configuration according to the property of the task
8528     renderHal->pStateHeap->iBindingTableSize = MOS_ALIGN_CEIL(taskParam->surfacePerBT *               // Reconfigure the binding table size
8529                                                          renderHal->pRenderHalPltInterface->GetBTStateCmdSize(),
8530                                                          renderHal->StateHeapSettings.iBTAlignment);
8531 
8532     taskParam->surfacePerBT = renderHal->pStateHeap->iBindingTableSize / renderHal->pRenderHalPltInterface->GetBTStateCmdSize();
8533 
8534     renderHal->StateHeapSettings.iBindingTables           = renderHal->StateHeapSettings.iBindingTables *          // Reconfigure the binding table number
8535                                                          renderHal->StateHeapSettings.iSurfacesPerBT / taskParam->surfacePerBT;
8536     renderHal->StateHeapSettings.iSurfacesPerBT           = taskParam->surfacePerBT;                           // Reconfigure the surface per BT
8537 
8538     if (execGroupParam->numKernels > (uint32_t)renderHal->StateHeapSettings.iBindingTables)
8539     {
8540         eStatus = MOS_STATUS_INVALID_PARAMETER;
8541         CM_ASSERTMESSAGE("Number of Kernels per task exceeds the number can be hold by binding table");
8542         goto finish;
8543     }
8544 
8545     if (execGroupParam->kernelDebugEnabled && Mos_ResourceIsNull(&state->sipResource.osResource))
8546     {
8547        HalCm_AllocateSipResource( state); // create  sip resource if it does not exist
8548     }
8549 
8550     // Assign a MediaState from the MediaStateHeap
8551     // !!!! THIS MUST BE BEFORE Getting the BATCH_BUFFER !!!
8552     // since this method syncs the batch buffer and media state.
8553     if (state->dshEnabled)
8554     {
8555         if ( execGroupParam->userDefinedMediaState != nullptr )
8556         {
8557             // Preload all kernels
8558             CM_CHK_MOSSTATUS_GOTOFINISH( HalCm_DSH_LoadKernelArray( state, execGroupParam->kernels, execGroupParam->numKernels, krnAllocations ) );
8559 
8560             // use exsiting media state as current state
8561             mediaState = static_cast< PRENDERHAL_MEDIA_STATE >( execGroupParam->userDefinedMediaState );
8562 
8563             // update current state to dsh
8564             renderHal->pStateHeap->pCurMediaState = mediaState;
8565             state->criticalSectionDSH->Acquire();
8566             // Refresh sync tag for all media states in submitted queue
8567             renderHal->pfnRefreshSync( renderHal );
8568             state->criticalSectionDSH->Release();
8569         }
8570         else
8571         {
8572             // Preload all kernels
8573             CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_DSH_LoadKernelArray(state, execGroupParam->kernels, execGroupParam->numKernels, krnAllocations));
8574 
8575             // Obtain media state configuration - Curbe, Samplers (3d/AVS/VA), 8x8 sampler table, Media IDs, Kernel Spill area
8576             RENDERHAL_DYNAMIC_MEDIA_STATE_PARAMS params;
8577 
8578             state->criticalSectionDSH->Acquire();
8579             HalCm_DSH_GetDynamicStateConfiguration(state, &params, execGroupParam->numKernels, execGroupParam->kernels, execGroupParam->kernelCurbeOffset);
8580             // Prepare Media States to accommodate all parameters
8581             mediaState = renderHal->pfnAssignDynamicState(renderHal, &params, RENDERHAL_COMPONENT_CM);
8582             state->criticalSectionDSH->Release();
8583         }
8584     }
8585     else
8586     {
8587         // Assign a MediaState from the MediaStateHeap
8588         // !!!! THIS MUST BE BEFORE Getting the BATCH_BUFFER !!!
8589         // since this method syncs the batch buffer and media state.
8590         mediaState = renderHal->pfnAssignMediaState(renderHal, RENDERHAL_COMPONENT_CM);
8591     }
8592     CM_CHK_NULL_GOTOFINISH_MOSERROR(mediaState);
8593 
8594     // Assign/Reset SSH instance
8595     CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pfnAssignSshInstance(renderHal));
8596 
8597     if (state->useNewSamplerHeap == false)
8598     {
8599         HalCm_AcquireSamplerStatistics(state);
8600     }
8601 
8602     for (i = 0; i < execGroupParam->numKernels; i++)
8603     {
8604         CM_HAL_INDEX_PARAM indexParam;
8605         MOS_ZeroMemory(&indexParam, sizeof(CM_HAL_INDEX_PARAM));
8606         kernelParam = execGroupParam->kernels[i];
8607 
8608         CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_SetupStatesForKernelInitial(state, mediaState, nullptr, taskId, kernelParam, &indexParam,
8609             execGroupParam->kernelCurbeOffset[i], bti, mediaID, krnAllocations[i]));
8610 
8611         CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_FinishStatesForKernel(state, mediaState, nullptr, taskId, kernelParam, i, &indexParam,
8612             bti, mediaID, krnAllocations[i]));
8613 
8614         vfeCurbeSize += MOS_ALIGN_CEIL(kernelParam->totalCurbeSize, state->renderHal->dwCurbeBlockAlign);
8615 
8616         if (execGroupParam->conditionalEndBitmap & (uint64_t)1 << i)
8617         {
8618             CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_SetConditionalEndInfo(state, taskParam->conditionalEndInfo, taskParam->conditionalBBEndParams, i));
8619         }
8620     }
8621 
8622     // Store the Max Payload Sizes in the Task params
8623     state->taskParam->vfeCurbeSize = vfeCurbeSize;
8624     state->taskParam->urbEntrySize = 0;
8625 
8626     // We may have to send additional Binding table commands in command buffer.
8627     // This is needed because the surface offset (from the base on SSH)
8628     // calculation takes into account the max binding tables allocated in the
8629     // SSH.
8630     remBindingTables = renderHal->StateHeapSettings.iBindingTables - execGroupParam->numKernels;
8631 
8632     if (remBindingTables > 0)
8633     {
8634         for (i = 0; i < remBindingTables; i++)
8635         {
8636             CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pfnAssignBindingTable(
8637                     renderHal,
8638                     &bindingTable));
8639         }
8640     }
8641 
8642     // until now, we know binding table index for debug surface
8643     // let's get system thread
8644     if (execGroupParam->kernelDebugEnabled)
8645     {
8646         CM_CHK_MOSSTATUS_GOTOFINISH(state->pfnGetSipBinary(state));
8647     }
8648 
8649     osInterface = state->osInterface;
8650     osInterface->pfnResetPerfBufferID(osInterface);
8651     if (osInterface->pfnIsPerfTagSet(osInterface) == false)
8652     {
8653         osInterface->pfnIncPerfFrameID(osInterface);
8654         int perfTag = HalCm_GetKernelPerfTag(state, execGroupParam->kernels, execGroupParam->numKernels);
8655         osInterface->pfnSetPerfTag(osInterface, (uint16_t)perfTag);
8656     }
8657 
8658     // Submit HW commands and states
8659     CM_CHK_MOSSTATUS_GOTOFINISH(state->cmHalInterface->SubmitCommands(
8660                      nullptr, taskId, execGroupParam->kernels, &cmdBuffer));
8661 
8662     // Set the Task ID
8663     execGroupParam->taskIdOut = taskId;
8664 
8665     // Set OS data
8666     if(cmdBuffer)
8667     {
8668         execGroupParam->osData = cmdBuffer;
8669     }
8670 
8671     // Update the task ID table
8672     state->taskStatusTable[taskId] = (char)taskId;
8673 
8674 finish:
8675 
8676     if (state->dshEnabled)
8677     {
8678         state->criticalSectionDSH->Acquire();
8679         if (mediaState && eStatus != MOS_STATUS_SUCCESS)
8680         {
8681             // Failed, release media state and heap resources
8682             renderHal->pfnReleaseDynamicState(renderHal, mediaState);
8683         }
8684         else
8685         {
8686             renderHal->pfnSubmitDynamicState(renderHal, mediaState);
8687         }
8688         state->criticalSectionDSH->Release();
8689     }
8690 
8691     return eStatus;
8692 }
8693 
HalCm_ExecuteHintsTask(PCM_HAL_STATE state,PCM_HAL_EXEC_HINTS_TASK_PARAM execHintsParam)8694 MOS_STATUS HalCm_ExecuteHintsTask(
8695     PCM_HAL_STATE                 state,                     // [in] Pointer to CM State
8696     PCM_HAL_EXEC_HINTS_TASK_PARAM execHintsParam)            // [in] Pointer to Task Param
8697 {
8698     MOS_STATUS              eStatus;
8699     PRENDERHAL_INTERFACE    renderHal;
8700     PRENDERHAL_MEDIA_STATE  mediaState;
8701     PMHW_BATCH_BUFFER       batchBuffer;
8702     PCM_HAL_BB_ARGS         bbCmArgs;
8703     PCM_HAL_KERNEL_PARAM    kernelParam;
8704     uint32_t                i;
8705     uint32_t                numTasks;
8706     uint64_t                origKernelIds[CM_MAX_KERNELS_PER_TASK];
8707     int32_t                 taskId;
8708     int32_t                 remBindingTables;
8709     int32_t                 bindingTable;
8710     uint32_t                vfeCurbeSize;
8711     uint32_t                maxInlineDataSize;
8712     uint32_t                maxIndirectDataSize;
8713     int32_t                 *bindingTableEntries;
8714     int32_t                 *mediaIds;
8715     PRENDERHAL_KRN_ALLOCATION *krnAllocations;
8716     PCM_HAL_INDEX_PARAM     indexParams;
8717     bool                    useMediaObjects;
8718     void                    *cmdBuffer;
8719     bool                    splitTask;
8720     bool                    lastTask;
8721     PMOS_INTERFACE          osInterface = nullptr;
8722 
8723     //------------------------------------
8724     CM_ASSERT(state);
8725     CM_ASSERT(execHintsParam);
8726     //------------------------------------
8727 
8728     eStatus             = MOS_STATUS_SUCCESS;
8729     renderHal           = state->renderHal;
8730     mediaState          = nullptr;
8731     batchBuffer         = nullptr;
8732     bindingTableEntries = nullptr;
8733     mediaIds            = nullptr;
8734     krnAllocations      = nullptr;
8735     indexParams         = nullptr;
8736     useMediaObjects      = false;
8737     cmdBuffer           = nullptr;
8738     splitTask            = false;
8739     lastTask             = false;
8740 
8741     if (execHintsParam->numKernels > state->cmDeviceParam.maxKernelsPerTask)
8742     {
8743         eStatus = MOS_STATUS_INVALID_PARAMETER;
8744         CM_ASSERTMESSAGE("Number of Kernels per task exceeds maximum");
8745         goto finish;
8746     }
8747 
8748     bindingTableEntries = (int*)MOS_AllocAndZeroMemory(sizeof(int)*execHintsParam->numKernels);
8749     mediaIds = (int*)MOS_AllocAndZeroMemory(sizeof(int)* execHintsParam->numKernels);
8750     krnAllocations = (PRENDERHAL_KRN_ALLOCATION *)MOS_AllocAndZeroMemory(sizeof(void *)* execHintsParam->numKernels);
8751     indexParams = (PCM_HAL_INDEX_PARAM)MOS_AllocAndZeroMemory(sizeof(CM_HAL_INDEX_PARAM)* execHintsParam->numKernels);
8752     if (!bindingTableEntries || !mediaIds || !krnAllocations || !indexParams)
8753     {
8754         eStatus = MOS_STATUS_INVALID_PARAMETER;
8755         CM_ASSERTMESSAGE("Memory allocation failed in ExecuteHints Task");
8756         goto finish;
8757     }
8758 
8759     // check hints to see if need to split into multiple tasks
8760     numTasks = ( execHintsParam->hints & CM_HINTS_MASK_NUM_TASKS ) >> CM_HINTS_NUM_BITS_TASK_POS;
8761     if( numTasks > 1 )
8762     {
8763         splitTask = true;
8764     }
8765 
8766     MOS_FillMemory(bindingTableEntries, sizeof(int) * execHintsParam->numKernels, CM_INVALID_INDEX);
8767     MOS_FillMemory(mediaIds, sizeof(int) * execHintsParam->numKernels, CM_INVALID_INDEX);
8768     MOS_FillMemory(krnAllocations, sizeof(void *)* execHintsParam->numKernels, 0);
8769 
8770     // Reset states before execute
8771     // (clear allocations, get GSH allocation index + any additional housekeeping)
8772     state->osInterface->pfnResetOsStates(state->osInterface);
8773     CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pfnReset(renderHal));
8774 
8775     MOS_ZeroMemory(state->taskParam, sizeof(CM_HAL_TASK_PARAM));
8776 
8777     MOS_FillMemory(
8778         state->bti2DIndexTable,
8779         state->cmDeviceParam.max2DSurfaceTableSize * sizeof( CM_HAL_MULTI_USE_BTI_ENTRY ),
8780         CM_INVALID_INDEX );
8781 
8782     MOS_FillMemory(
8783         state->bti2DUPIndexTable,
8784         state->cmDeviceParam.max2DSurfaceUPTableSize * sizeof( CM_HAL_MULTI_USE_BTI_ENTRY ),
8785         CM_INVALID_INDEX );
8786 
8787     MOS_FillMemory(
8788         state->bti3DIndexTable,
8789         state->cmDeviceParam.max3DSurfaceTableSize * sizeof( CM_HAL_MULTI_USE_BTI_ENTRY ),
8790         CM_INVALID_INDEX );
8791 
8792     MOS_FillMemory(
8793         state->btiBufferIndexTable,
8794         state->cmDeviceParam.maxBufferTableSize * sizeof( CM_HAL_MULTI_USE_BTI_ENTRY ),
8795         CM_INVALID_INDEX );
8796 
8797     MOS_FillMemory(
8798         state->samplerIndexTable,
8799         state->cmDeviceParam.maxSamplerTableSize,
8800         CM_INVALID_INDEX);
8801 
8802     MOS_FillMemory(
8803         state->sampler8x8IndexTable,
8804         state->cmDeviceParam.maxSampler8x8TableSize,
8805         CM_INVALID_INDEX);
8806 
8807     state->walkerParams.CmWalkerEnable = 0;
8808 
8809     vfeCurbeSize = 0;
8810     maxInlineDataSize = 0;
8811     maxIndirectDataSize = 0;
8812 
8813     MOS_ZeroMemory(&origKernelIds, CM_MAX_KERNELS_PER_TASK * sizeof(uint64_t));
8814 
8815     // Get the Task Id
8816     CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_GetNewTaskId(state, &taskId));
8817 
8818     // Parse the task
8819     CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_ParseHintsTask(state, execHintsParam));
8820 
8821     // Assign a MediaState from the MediaStateHeap
8822     // !!!! THIS MUST BE BEFORE Getting the BATCH_BUFFER !!!
8823     // since this method syncs the batch buffer and media state.
8824     if (state->dshEnabled)
8825     {
8826         if ( execHintsParam->userDefinedMediaState != nullptr )
8827         {
8828             // use exsiting media state as current state
8829             mediaState = static_cast< PRENDERHAL_MEDIA_STATE >( execHintsParam->userDefinedMediaState );
8830 
8831             // update current state to dsh
8832             renderHal->pStateHeap->pCurMediaState = mediaState;
8833             // Refresh sync tag for all media states in submitted queue
8834             state->criticalSectionDSH->Acquire();
8835             renderHal->pfnRefreshSync( renderHal );
8836             state->criticalSectionDSH->Release();
8837         }
8838         else
8839         {
8840             // Obtain media state configuration - Curbe, Samplers (3d/AVS/VA), 8x8 sampler table, Media IDs, Kernel Spill area
8841             RENDERHAL_DYNAMIC_MEDIA_STATE_PARAMS params;
8842             state->criticalSectionDSH->Acquire();
8843             HalCm_DSH_GetDynamicStateConfiguration(state, &params, execHintsParam->numKernels, execHintsParam->kernels, execHintsParam->kernelCurbeOffset);
8844 
8845             // Prepare Media States to accommodate all parameters - Curbe, Samplers (3d/AVS/VA), 8x8 sampler table, Media IDs
8846             mediaState = renderHal->pfnAssignDynamicState(renderHal, &params, RENDERHAL_COMPONENT_CM);
8847             state->criticalSectionDSH->Release();
8848         }
8849     }
8850     else
8851     {
8852         mediaState = renderHal->pfnAssignMediaState(renderHal, RENDERHAL_COMPONENT_CM);
8853     }
8854     CM_CHK_NULL_GOTOFINISH_MOSERROR(mediaState);
8855 
8856     if (state->useNewSamplerHeap == false)
8857     {
8858         HalCm_AcquireSamplerStatistics(state);
8859     }
8860 
8861     // Assign/Reset SSH instance
8862     CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pfnAssignSshInstance(renderHal));
8863 
8864     if (!state->walkerParams.CmWalkerEnable)
8865     {
8866         if( splitTask )
8867         {
8868             // save original kernel IDs for kernel binary re-use in GSH
8869             for( i = 0; i < execHintsParam->numKernels; ++i )
8870             {
8871                 origKernelIds[i] = execHintsParam->kernels[i]->kernelId;
8872             }
8873 
8874             // need to add tag to kernel IDs to distinguish batch buffer
8875             CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_AddKernelIDTag(execHintsParam->kernels, execHintsParam->numKernels, numTasks, execHintsParam->numTasksGenerated));
8876         }
8877 
8878         // Get the Batch buffer
8879         CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_GetBatchBuffer(state, execHintsParam->numKernels, execHintsParam->kernels, &batchBuffer));
8880 
8881         if( splitTask )
8882         {
8883             // restore kernel IDs for kernel binary re-use in GSH
8884             for( i = 0; i < execHintsParam->numKernels; ++i )
8885             {
8886                 execHintsParam->kernels[i]->kernelId = origKernelIds[i];
8887             }
8888         }
8889 
8890         // Lock the batch buffer
8891         CM_CHK_NULL_GOTOFINISH_MOSERROR(batchBuffer->pPrivateData);
8892         bbCmArgs = (PCM_HAL_BB_ARGS)batchBuffer->pPrivateData;
8893         if ( (bbCmArgs->refCount == 1) ||
8894              ( state->taskParam->reuseBBUpdateMask == 1) )
8895         {
8896             CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pfnLockBB(renderHal, batchBuffer));
8897         }
8898     }
8899 
8900     // Load all kernels in the same state heap - expand ISH if necessary BEFORE programming media states.
8901     // This is better than having to expand ISH in the middle of loading, when part of MediaIDs are
8902     // already programmed - not a problem in the old implementation where it would simply remove old
8903     // kernels out of the way.
8904     if (state->dshEnabled)
8905     {
8906         CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_DSH_LoadKernelArray(state, execHintsParam->kernels, execHintsParam->numKernels, krnAllocations));
8907     }
8908 
8909     // 0: media walker
8910     // 1: media object
8911     if( (execHintsParam->hints & CM_HINTS_MASK_MEDIAOBJECT) == CM_HINTS_MASK_MEDIAOBJECT )
8912     {
8913         for (i = 0; i < execHintsParam->numKernels; ++i)
8914         {
8915             CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_SetupStatesForKernelInitial(state, mediaState, batchBuffer, taskId, execHintsParam->kernels[i], &indexParams[i],
8916                 execHintsParam->kernelCurbeOffset[i], bindingTableEntries[i], mediaIds[i], krnAllocations[i]));
8917         }
8918 
8919         CM_CHK_NULL_GOTOFINISH_MOSERROR(batchBuffer);
8920 
8921         CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_FinishStatesForKernelMix(state, batchBuffer, taskId, execHintsParam->kernels,
8922             indexParams, bindingTableEntries, mediaIds, krnAllocations, execHintsParam->numKernels, execHintsParam->hints, execHintsParam->isLastTask));
8923 
8924         for( i = 0; i < execHintsParam->numKernels; ++i)
8925         {
8926             kernelParam = execHintsParam->kernels[i];
8927             vfeCurbeSize += MOS_ALIGN_CEIL(kernelParam->totalCurbeSize, state->renderHal->dwCurbeBlockAlign);
8928             if( kernelParam->payloadSize > maxInlineDataSize)
8929             {
8930                 maxInlineDataSize = kernelParam->payloadSize;
8931             }
8932             if( kernelParam->indirectDataParam.indirectDataSize > maxIndirectDataSize )
8933             {
8934                 maxIndirectDataSize = kernelParam->indirectDataParam.indirectDataSize;
8935             }
8936         }
8937 
8938         // Store the Max Payload Sizes in the Task Param
8939         state->taskParam->vfeCurbeSize = vfeCurbeSize;
8940         if( maxIndirectDataSize)
8941         {
8942             state->taskParam->vfeCurbeSize = maxIndirectDataSize;
8943         }
8944         else
8945         {
8946             state->taskParam->urbEntrySize = maxInlineDataSize;
8947         }
8948 
8949         // We may have to send additional Binding table commands in command buffer.
8950         // This is needed because the surface offset (from the base on SSH)
8951         // calculation takes into account the max binding tables allocated in the
8952         // SSH.
8953         remBindingTables = state->cmDeviceParam.maxKernelsPerTask -
8954             execHintsParam->numKernels;
8955 
8956         if( remBindingTables > 0)
8957         {
8958             for( i = 0; i < (uint32_t)remBindingTables; ++i)
8959             {
8960                 CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pfnAssignBindingTable(
8961                     renderHal,
8962                     &bindingTable));
8963             }
8964         }
8965 
8966         osInterface = state->osInterface;
8967         osInterface->pfnResetPerfBufferID(osInterface);
8968         if (osInterface->pfnIsPerfTagSet(osInterface) == false)
8969         {
8970             osInterface->pfnIncPerfFrameID(osInterface);
8971             int perfTag = HalCm_GetKernelPerfTag(state, execHintsParam->kernels, execHintsParam->numKernels);
8972             osInterface->pfnSetPerfTag(osInterface, (uint16_t)perfTag);
8973         }
8974 
8975         // Submit HW commands and states
8976         CM_CHK_MOSSTATUS_GOTOFINISH(state->cmHalInterface->SubmitCommands(
8977                         batchBuffer, taskId, execHintsParam->kernels, &cmdBuffer));
8978 
8979         // Set the Task ID
8980         execHintsParam->taskIdOut = taskId;
8981 
8982         // Set OS data
8983         if( cmdBuffer )
8984         {
8985             execHintsParam->osData = cmdBuffer;
8986         }
8987 
8988         // Update the task ID table
8989         state->taskStatusTable[taskId] = (char)taskId;
8990     }
8991     else
8992     {
8993         // use media walker
8994         // unimplemented for now
8995         CM_ASSERTMESSAGE("Error: Media walker is not supported.");
8996         eStatus = MOS_STATUS_UNKNOWN;
8997     }
8998 
8999 finish:
9000 
9001     if (state->dshEnabled)
9002     {
9003         state->criticalSectionDSH->Acquire();
9004         if (mediaState && eStatus != MOS_STATUS_SUCCESS)
9005         {
9006             // Failed, release media state and heap resources
9007             renderHal->pfnReleaseDynamicState(renderHal, mediaState);
9008         }
9009         else
9010         {
9011             renderHal->pfnSubmitDynamicState(renderHal, mediaState);
9012         }
9013         state->criticalSectionDSH->Release();
9014     }
9015 
9016     if (batchBuffer) // for MediaWalker, batchBuffer is empty
9017     {
9018         if (batchBuffer->bLocked)
9019         {
9020             // Only happens in Error cases
9021             if (batchBuffer->pPrivateData && ((PCM_HAL_BB_ARGS)batchBuffer->pPrivateData)->refCount == 1)
9022             {
9023                 renderHal->pfnUnlockBB(renderHal, batchBuffer);
9024             }
9025             else if (batchBuffer->pPrivateData == nullptr)
9026             {
9027                 eStatus = MOS_STATUS_NULL_POINTER;
9028             }
9029         }
9030     }
9031 
9032     // free memory
9033     if( bindingTableEntries )          MOS_FreeMemory(bindingTableEntries);
9034     if( mediaIds )                     MOS_FreeMemory(mediaIds);
9035     if( krnAllocations )               MOS_FreeMemory(krnAllocations);
9036     if( indexParams )                  MOS_FreeMemory( indexParams );
9037 
9038     return eStatus;
9039 }
9040 
9041 //*-----------------------------------------------------------------------------
9042 //| Purpose:    Send Commands to HW
9043 //| Returns:    Get the HAL Max values
9044 //*-----------------------------------------------------------------------------
HalCm_GetMaxValues(PCM_HAL_STATE state,PCM_HAL_MAX_VALUES maxValues)9045 MOS_STATUS HalCm_GetMaxValues(
9046     PCM_HAL_STATE           state,                                             // [in]  Pointer to CM State
9047     PCM_HAL_MAX_VALUES      maxValues)                                         // [out] Pointer to Max values
9048 {
9049     PRENDERHAL_INTERFACE  renderHal;
9050 
9051     renderHal = state->renderHal;
9052 
9053     maxValues->maxTasks                         = state->cmDeviceParam.maxTasks;
9054     maxValues->maxKernelsPerTask                = CM_MAX_KERNELS_PER_TASK;
9055     maxValues->maxKernelBinarySize              = state->cmDeviceParam.maxKernelBinarySize;
9056     maxValues->maxSpillSizePerHwThread          = state->cmDeviceParam.maxPerThreadScratchSpaceSize;
9057     maxValues->maxSamplerTableSize              = CM_MAX_SAMPLER_TABLE_SIZE;
9058     maxValues->maxBufferTableSize               = CM_MAX_BUFFER_SURFACE_TABLE_SIZE;
9059     maxValues->max2DSurfaceTableSize            = CM_MAX_2D_SURFACE_TABLE_SIZE;
9060     maxValues->max3DSurfaceTableSize            = CM_MAX_3D_SURFACE_TABLE_SIZE;
9061     maxValues->maxArgsPerKernel                 = CM_MAX_ARGS_PER_KERNEL;
9062     maxValues->maxUserThreadsPerTask            = CM_MAX_USER_THREADS;
9063     maxValues->maxUserThreadsPerTaskNoThreadArg = CM_MAX_USER_THREADS_NO_THREADARG;
9064     maxValues->maxArgByteSizePerKernel          = CM_MAX_ARG_BYTE_PER_KERNEL;
9065     maxValues->maxSurfacesPerKernel             = renderHal->pHwCaps->dwMaxBTIndex;
9066     maxValues->maxSamplersPerKernel             = renderHal->pHwCaps->dwMaxUnormSamplers;
9067     maxValues->maxHwThreads                     = renderHal->pHwCaps->dwMaxThreads;
9068 
9069     return MOS_STATUS_SUCCESS;
9070 }
9071 
9072 //*-----------------------------------------------------------------------------
9073 //| Purpose:    Get the HAL Max extended values
9074 //| Returns:    Get the HAL Max extended values
9075 //*-----------------------------------------------------------------------------
HalCm_GetMaxValuesEx(PCM_HAL_STATE state,PCM_HAL_MAX_VALUES_EX maxValuesEx)9076 MOS_STATUS HalCm_GetMaxValuesEx(
9077     PCM_HAL_STATE           state,                                             // [in]  Pointer to CM State
9078     PCM_HAL_MAX_VALUES_EX   maxValuesEx)                                       // [out] Pointer to extended Max values
9079 {
9080     MOS_STATUS  eStatus = MOS_STATUS_SUCCESS;
9081     maxValuesEx->max2DUPSurfaceTableSize = CM_MAX_2D_SURFACE_UP_TABLE_SIZE;
9082     maxValuesEx->maxSampler8x8TableSize = CM_MAX_SAMPLER_8X8_TABLE_SIZE;
9083     maxValuesEx->maxCURBESizePerKernel = CM_MAX_CURBE_SIZE_PER_KERNEL;
9084     maxValuesEx->maxCURBESizePerTask = CM_MAX_CURBE_SIZE_PER_TASK;
9085     maxValuesEx->maxIndirectDataSizePerKernel = CM_MAX_INDIRECT_DATA_SIZE_PER_KERNEL;
9086 
9087     //MaxThreadWidth x MaxThreadHeight x ColorCount
9088     maxValuesEx->maxUserThreadsPerMediaWalker = \
9089                             state->cmHalInterface->GetMediaWalkerMaxThreadWidth()* \
9090                             state->cmHalInterface->GetMediaWalkerMaxThreadHeight() * \
9091                             CM_THREADSPACE_MAX_COLOR_COUNT;
9092 
9093     CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_GetMaxThreadCountPerThreadGroup( state, &maxValuesEx->maxUserThreadsPerThreadGroup ) );
9094 
9095 finish:
9096     return eStatus;
9097 }
9098 
9099 //*-----------------------------------------------------------------------------
9100 //| Purpose:    Register Sampler
9101 //| Returns:    Result of the operation.
9102 //*-----------------------------------------------------------------------------
HalCm_RegisterSampler(PCM_HAL_STATE state,PCM_HAL_SAMPLER_PARAM param)9103 MOS_STATUS HalCm_RegisterSampler(
9104     PCM_HAL_STATE           state,                                             // [in]  Pointer to CM State
9105     PCM_HAL_SAMPLER_PARAM   param)                                             // [in]  Pointer to Sampler Param
9106 {
9107     MOS_STATUS              eStatus;
9108     PMHW_SAMPLER_STATE_PARAM entry;
9109     uint32_t                i;
9110 
9111     eStatus      = MOS_STATUS_SUCCESS;
9112     entry  = nullptr;
9113 
9114     // Find a free slot
9115     for (i = 0; i < state->cmDeviceParam.maxSamplerTableSize; i++)
9116     {
9117         if (!state->samplerTable[i].bInUse)
9118         {
9119             entry              = &state->samplerTable[i];
9120             param->handle      = (uint32_t)i;
9121             break;
9122         }
9123     }
9124 
9125     if (!entry)
9126     {
9127         eStatus = MOS_STATUS_INVALID_PARAMETER;
9128         CM_ASSERTMESSAGE("Sampler table is full");
9129         goto finish;
9130     }
9131 
9132     entry->SamplerType  = MHW_SAMPLER_TYPE_3D;
9133     if (state->useNewSamplerHeap == true)
9134     {
9135         entry->ElementType = MHW_Sampler1Element;
9136     }
9137     else
9138     {
9139         entry->ElementType = MHW_Sampler4Elements;
9140     }
9141     CM_CHK_MOSSTATUS_GOTOFINISH(state->pfnGetGfxMapFilter(param->minFilter,  &entry->Unorm.MinFilter));
9142     CM_CHK_MOSSTATUS_GOTOFINISH(state->pfnGetGfxMapFilter(param->magFilter,  &entry->Unorm.MagFilter));
9143     CM_CHK_MOSSTATUS_GOTOFINISH(state->pfnGetGfxTextAddress(param->addressU, &entry->Unorm.AddressU));
9144     CM_CHK_MOSSTATUS_GOTOFINISH(state->pfnGetGfxTextAddress(param->addressV, &entry->Unorm.AddressV));
9145     CM_CHK_MOSSTATUS_GOTOFINISH(state->pfnGetGfxTextAddress(param->addressW, &entry->Unorm.AddressW));
9146 
9147     entry->Unorm.SurfaceFormat = (MHW_SAMPLER_SURFACE_PIXEL_TYPE)param->surfaceFormat;
9148     switch (entry->Unorm.SurfaceFormat)
9149     {
9150         case MHW_SAMPLER_SURFACE_PIXEL_UINT:
9151             entry->Unorm.BorderColorRedU = param->borderColorRedU;
9152             entry->Unorm.BorderColorGreenU = param->borderColorGreenU;
9153             entry->Unorm.BorderColorBlueU = param->borderColorBlueU;
9154             entry->Unorm.BorderColorAlphaU = param->borderColorAlphaU;
9155             break;
9156         case MHW_SAMPLER_SURFACE_PIXEL_SINT:
9157             entry->Unorm.BorderColorRedS = param->borderColorRedS;
9158             entry->Unorm.BorderColorGreenS = param->borderColorGreenS;
9159             entry->Unorm.BorderColorBlueS = param->borderColorBlueS;
9160             entry->Unorm.BorderColorAlphaS = param->borderColorAlphaS;
9161             break;
9162         default:
9163             entry->Unorm.BorderColorRedF = param->borderColorRedF;
9164             entry->Unorm.BorderColorGreenF = param->borderColorGreenF;
9165             entry->Unorm.BorderColorBlueF = param->borderColorBlueF;
9166             entry->Unorm.BorderColorAlphaF = param->borderColorAlphaF;
9167     }
9168     entry->Unorm.bBorderColorIsValid = true;
9169 
9170     entry->bInUse = true;
9171 
9172 finish:
9173     return eStatus;
9174 }
9175 
9176 //*-----------------------------------------------------------------------------
9177 //| Purpose:    UnRegister Sampler
9178 //| Returns:    Result of the operation.
9179 //*-----------------------------------------------------------------------------
HalCm_UnRegisterSampler(PCM_HAL_STATE state,uint32_t handle)9180 MOS_STATUS HalCm_UnRegisterSampler(
9181     PCM_HAL_STATE               state,                                         // [in]  Pointer to CM State
9182     uint32_t                    handle)                                       // [in]  Pointer to Sampler Param
9183 {
9184     MOS_STATUS              eStatus;
9185     PMHW_SAMPLER_STATE_PARAM entry;
9186 
9187     eStatus = MOS_STATUS_SUCCESS;
9188 
9189     if (handle >= state->cmDeviceParam.maxSamplerTableSize)
9190     {
9191         eStatus = MOS_STATUS_INVALID_HANDLE;
9192         CM_ASSERTMESSAGE("Invalid handle '%d'", handle);
9193         goto finish;
9194     }
9195 
9196     entry = &state->samplerTable[handle];
9197 
9198     // need to clear the state entirely instead of just setting bInUse to false
9199     MOS_ZeroMemory(entry, sizeof(MHW_SAMPLER_STATE_PARAM));
9200 
9201 finish:
9202     return eStatus;
9203 }
9204 
9205 //*-----------------------------------------------------------------------------
9206 //| Purpose:    Register Sampler8x8
9207 //| Returns:    Result of the operation.
9208 //*-----------------------------------------------------------------------------
HalCm_RegisterSampler8x8(PCM_HAL_STATE state,PCM_HAL_SAMPLER_8X8_PARAM param)9209 MOS_STATUS HalCm_RegisterSampler8x8(
9210     PCM_HAL_STATE                state,
9211     PCM_HAL_SAMPLER_8X8_PARAM    param)
9212 {
9213     return state->cmHalInterface->RegisterSampler8x8(param);
9214 }
9215 
9216 //*-----------------------------------------------------------------------------
9217 //| Purpose:    UnRegister Sampler
9218 //| Returns:    Result of the operation.
9219 //*-----------------------------------------------------------------------------
HalCm_UnRegisterSampler8x8(PCM_HAL_STATE state,uint32_t handle)9220 MOS_STATUS HalCm_UnRegisterSampler8x8(
9221     PCM_HAL_STATE               state,                                         // [in]  Pointer to CM State
9222     uint32_t                    handle)                                       // [in]  Pointer to Sampler8x8 Param
9223 {
9224     MOS_STATUS                  eStatus;
9225     uint32_t                    index8x8;
9226     PMHW_SAMPLER_STATE_PARAM    entry;
9227     PCM_HAL_SAMPLER_8X8_ENTRY   sampler8x8Entry;
9228 
9229     eStatus = MOS_STATUS_SUCCESS;
9230 
9231     if (handle >= state->cmDeviceParam.maxSamplerTableSize) {
9232         eStatus = MOS_STATUS_INVALID_HANDLE;
9233         CM_ASSERTMESSAGE("Invalid handle '%d'", handle);
9234         goto finish;
9235     }
9236 
9237     entry = &state->samplerTable[handle];
9238     entry->bInUse = false;
9239 
9240     if ( entry->SamplerType == MHW_SAMPLER_TYPE_AVS )
9241     {
9242         index8x8 = entry->Avs.stateID;
9243         if ( index8x8 >= state->cmDeviceParam.maxSampler8x8TableSize )
9244         {
9245             eStatus = MOS_STATUS_INVALID_HANDLE;
9246             CM_ASSERTMESSAGE( "Invalid 8x8 handle '%d'", handle );
9247             goto finish;
9248         }
9249 
9250         sampler8x8Entry = &state->sampler8x8Table[ index8x8 ];
9251     sampler8x8Entry->inUse = false;
9252     }
9253 
9254     // need to clear the state entirely instead of just setting bInUse to false
9255     MOS_ZeroMemory(entry, sizeof(MHW_SAMPLER_STATE_PARAM));
9256 finish:
9257     return eStatus;
9258 }
9259 
9260 //*-----------------------------------------------------------------------------
9261 //| Purpose:    Frees the buffer and removes from the table
9262 //| Returns:    Result of the operation.
9263 //*-----------------------------------------------------------------------------
HalCm_FreeBuffer(PCM_HAL_STATE state,uint32_t handle)9264 MOS_STATUS HalCm_FreeBuffer(
9265     PCM_HAL_STATE           state,                                             // [in]  Pointer to CM State
9266     uint32_t                handle)                                           // [in]  Pointer to Buffer Param
9267 {
9268     MOS_STATUS              eStatus;
9269     PCM_HAL_BUFFER_ENTRY    entry;
9270     PMOS_INTERFACE          osInterface;
9271     MOS_GFXRES_FREE_FLAGS   resFreeFlags = {0};
9272 
9273     resFreeFlags.AssumeNotInUse = 1;
9274     eStatus        = MOS_STATUS_SUCCESS;
9275     osInterface    = state->osInterface;
9276 
9277     // Get the Buffer Entry
9278     CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_GetBufferEntry(state, handle, &entry));
9279 
9280     if (state->advExecutor)
9281     {
9282         state->advExecutor->DeleteBufferStateMgr(entry->surfStateMgr);
9283     }
9284     if (entry->isAllocatedbyCmrtUmd)
9285     {
9286         osInterface->pfnFreeResourceWithFlag(osInterface, &entry->osResource, resFreeFlags.Value);
9287     }
9288     else
9289     {
9290         HalCm_OsResource_Unreference(&entry->osResource);
9291     }
9292     osInterface->pfnResetResourceAllocationIndex(osInterface, &entry->osResource);
9293     entry->size = 0;
9294     entry->address = nullptr;
9295 
9296 finish:
9297     return eStatus;
9298 }
9299 
9300 //*-----------------------------------------------------------------------------
9301 //| Purpose:    Set surface read flag used in on demand sync
9302 //| Returns:    Result of the operation.
9303 //*-----------------------------------------------------------------------------
HalCm_SetSurfaceReadFlag(PCM_HAL_STATE state,uint32_t handle,bool readSync,MOS_GPU_CONTEXT gpuContext)9304 MOS_STATUS HalCm_SetSurfaceReadFlag(
9305     PCM_HAL_STATE           state,                                             // [in]  Pointer to CM State
9306     uint32_t                handle,                                           // [in]  index of surface 2d
9307     bool                    readSync,
9308     MOS_GPU_CONTEXT         gpuContext)
9309 {
9310     MOS_STATUS                 eStatus  = MOS_STATUS_SUCCESS;
9311     PCM_HAL_SURFACE2D_ENTRY    entry;
9312 
9313     // Get the Buffer Entry
9314     CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_GetSurface2DEntry(state, handle, &entry));
9315 
9316     if (HalCm_IsValidGpuContext(gpuContext))
9317     {
9318         entry->readSyncs[gpuContext] = readSync;
9319         if (state->advExecutor)
9320         {
9321             state->advExecutor->Set2DRenderTarget(entry->surfStateMgr, !readSync);
9322         }
9323     }
9324     else
9325     {
9326         return MOS_STATUS_UNKNOWN;
9327     }
9328 
9329 finish:
9330     return eStatus;
9331 }
9332 
9333 //*-----------------------------------------------------------------------------
9334 //| Purpose:    Read the data from buffer and return
9335 //| Returns:    Result of the operation.
9336 //*-----------------------------------------------------------------------------
HalCm_LockBuffer(PCM_HAL_STATE state,PCM_HAL_BUFFER_PARAM param)9337 MOS_STATUS HalCm_LockBuffer(
9338     PCM_HAL_STATE           state,                                             // [in]  Pointer to CM State
9339     PCM_HAL_BUFFER_PARAM    param)                                             // [in]  Pointer to Buffer Param
9340 {
9341     MOS_STATUS              eStatus;
9342     PCM_HAL_BUFFER_ENTRY    entry;
9343     PMOS_INTERFACE          osInterface;
9344     MOS_LOCK_PARAMS         lockFlags;
9345     eStatus        = MOS_STATUS_SUCCESS;
9346     osInterface    = state->osInterface;
9347 
9348     CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_GetBufferEntry(state, param->handle, &entry));
9349     if ((param->lockFlag != CM_HAL_LOCKFLAG_READONLY) && (param->lockFlag != CM_HAL_LOCKFLAG_WRITEONLY) )
9350     {
9351         eStatus = MOS_STATUS_INVALID_HANDLE;
9352         CM_ASSERTMESSAGE("Invalid lock flag!");
9353         eStatus = MOS_STATUS_UNKNOWN;
9354         goto finish;
9355     }
9356 
9357     // RegisterResource will be called in AddResourceToHWCmd. It is not allowed to be called by hal explicitly
9358     if (!osInterface->apoMosEnabled)
9359     {
9360         CM_CHK_HRESULT_GOTOFINISH_MOSERROR(
9361             osInterface->pfnRegisterResource(osInterface, &entry->osResource, true, true));
9362     }
9363 
9364     // Lock the resource
9365     MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
9366 
9367     if (param->lockFlag == CM_HAL_LOCKFLAG_READONLY)
9368     {
9369         lockFlags.ReadOnly = true;
9370     }
9371     else
9372     {
9373         lockFlags.WriteOnly = true;
9374     }
9375 
9376     lockFlags.ForceCached = true;
9377     param->data = osInterface->pfnLockResource(
9378                     osInterface,
9379                     &entry->osResource,
9380                     &lockFlags);
9381     CM_CHK_NULL_GOTOFINISH_MOSERROR(param->data);
9382 
9383 finish:
9384     return eStatus;
9385 }
9386 
9387 //*-----------------------------------------------------------------------------
9388 //| Purpose:    Writes the data to buffer
9389 //| Returns:    Result of the operation.
9390 //*-----------------------------------------------------------------------------
HalCm_UnlockBuffer(PCM_HAL_STATE state,PCM_HAL_BUFFER_PARAM param)9391 MOS_STATUS HalCm_UnlockBuffer(
9392     PCM_HAL_STATE           state,                                             // [in]  Pointer to CM State
9393     PCM_HAL_BUFFER_PARAM    param)                                             // [in]  Pointer to Buffer Param
9394 {
9395     MOS_STATUS              eStatus;
9396     PCM_HAL_BUFFER_ENTRY    entry;
9397     PMOS_INTERFACE          osInterface;
9398 
9399     eStatus        = MOS_STATUS_SUCCESS;
9400     osInterface    = state->osInterface;
9401 
9402     CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_GetBufferEntry(state, param->handle, &entry));
9403 
9404     CM_CHK_HRESULT_GOTOFINISH_MOSERROR(osInterface->pfnUnlockResource(osInterface, &entry->osResource));
9405 
9406 finish:
9407     return eStatus;
9408 }
9409 
9410 //*-----------------------------------------------------------------------------
9411 //| Purpose:    Frees the buffer and removes from the table
9412 //| Returns:    Result of the operation.
9413 //*-----------------------------------------------------------------------------
HalCm_FreeSurface2DUP(PCM_HAL_STATE state,uint32_t handle)9414 MOS_STATUS HalCm_FreeSurface2DUP(
9415     PCM_HAL_STATE           state,                                             // [in]  Pointer to CM State
9416     uint32_t                handle)                                           // [in]  Pointer to Buffer Param
9417 {
9418     MOS_STATUS                    eStatus;
9419     PCM_HAL_SURFACE2D_UP_ENTRY    entry;
9420     PMOS_INTERFACE                osInterface;
9421     MOS_GFXRES_FREE_FLAGS         resFreeFlags = {0};
9422 
9423     resFreeFlags.AssumeNotInUse = 1;
9424     eStatus        = MOS_STATUS_SUCCESS;
9425     osInterface    = state->osInterface;
9426 
9427     // Get the Buffer Entry
9428     CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_GetResourceUPEntry(state, handle, &entry));
9429 
9430     if (state->advExecutor)
9431     {
9432         state->advExecutor->Delete2Dor3DStateMgr(entry->surfStateMgr);
9433     }
9434 
9435     osInterface->pfnFreeResourceWithFlag(osInterface, &entry->osResource, resFreeFlags.Value);
9436 
9437     osInterface->pfnResetResourceAllocationIndex(osInterface, &entry->osResource);
9438     entry->width = 0;
9439 
9440 finish:
9441     return eStatus;
9442 }
9443 
9444 //*-----------------------------------------------------------------------------
9445 //| Purpose:    Get 2D surface pitch and physical size
9446 //| Returns:    Result of the operation.
9447 //*-----------------------------------------------------------------------------
HalCm_GetSurface2DTileYPitch(PCM_HAL_STATE state,PCM_HAL_SURFACE2D_PARAM param)9448 MOS_STATUS HalCm_GetSurface2DTileYPitch(
9449     PCM_HAL_STATE                state,                                             // [in]  Pointer to CM State
9450     PCM_HAL_SURFACE2D_PARAM      param)                                        // [in]  Pointer to Buffer Param
9451 {
9452     MOS_STATUS                  eStatus;
9453     MOS_SURFACE                 surface;
9454     PRENDERHAL_INTERFACE        renderHal;
9455     uint32_t                    index;
9456     RENDERHAL_GET_SURFACE_INFO  info;
9457 
9458     //-----------------------------------------------
9459     CM_ASSERT(state);
9460     //-----------------------------------------------
9461 
9462     eStatus       = MOS_STATUS_UNKNOWN;
9463     renderHal     = state->renderHal;
9464     index         = param->handle;
9465 
9466     // Get Details of 2D surface and fill the surface
9467     MOS_ZeroMemory(&surface, sizeof(surface));
9468 
9469     surface.OsResource  = state->umdSurf2DTable[index].osResource;
9470     surface.dwWidth     = state->umdSurf2DTable[index].width;
9471     surface.dwHeight    = state->umdSurf2DTable[index].height;
9472     surface.Format      = state->umdSurf2DTable[index].format;
9473     surface.dwDepth     = 1;
9474 
9475     MOS_ZeroMemory(&info, sizeof(RENDERHAL_GET_SURFACE_INFO));
9476 
9477     CM_CHK_MOSSTATUS_GOTOFINISH(RenderHal_GetSurfaceInfo(
9478         state->osInterface,
9479         &info,
9480         &surface));
9481 
9482     param->pitch      = surface.dwPitch;
9483 
9484 finish:
9485     return eStatus;
9486 }
9487 
9488 //*-----------------------------------------------------------------------------
9489 //| Purpose:    Sets width and height values for 2D surface state
9490 //| Returns:    Result of the operation.
9491 //*-----------------------------------------------------------------------------
HalCm_Set2DSurfaceStateParam(PCM_HAL_STATE state,PCM_HAL_SURFACE2D_SURFACE_STATE_PARAM param,uint32_t aliasIndex,uint32_t handle)9492 MOS_STATUS HalCm_Set2DSurfaceStateParam(
9493      PCM_HAL_STATE                            state,
9494      PCM_HAL_SURFACE2D_SURFACE_STATE_PARAM    param,
9495      uint32_t                                 aliasIndex,
9496      uint32_t                                 handle)
9497 {
9498     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
9499 
9500     CM_CHK_NULL_GOTOFINISH_MOSERROR(state);
9501     CM_CHK_NULL_GOTOFINISH_MOSERROR(param);
9502 
9503     if (aliasIndex < state->surfaceArraySize)
9504     {
9505         state->umdSurf2DTable[handle].surfStateSet = true;
9506     }
9507     state->umdSurf2DTable[handle].surfaceStateParam[
9508         aliasIndex / state->surfaceArraySize] = *param;
9509 
9510 finish:
9511     return eStatus;
9512 }
9513 
9514 //*-----------------------------------------------------------------------------
9515 //| Purpose:    Sets width and height values for 2D surface state
9516 //| Returns:    Result of the operation.
9517 //*-----------------------------------------------------------------------------
HalCm_SetBufferSurfaceStateParameters(PCM_HAL_STATE state,PCM_HAL_BUFFER_SURFACE_STATE_PARAM param)9518 MOS_STATUS HalCm_SetBufferSurfaceStateParameters(
9519      PCM_HAL_STATE                            state,
9520      PCM_HAL_BUFFER_SURFACE_STATE_PARAM       param)
9521 {
9522     MOS_STATUS eStatus;
9523     uint32_t size;
9524     uint32_t offset;
9525     uint32_t index;
9526     uint32_t aliasIndex;
9527 
9528     CM_CHK_NULL_GOTOFINISH_MOSERROR(state);
9529     CM_CHK_NULL_GOTOFINISH_MOSERROR(param);
9530 
9531     eStatus     = MOS_STATUS_SUCCESS;
9532     index = param->handle;
9533     aliasIndex = param->aliasIndex;
9534 
9535     if (aliasIndex < state->surfaceArraySize)
9536         state->bufferTable[index].surfStateSet = true;
9537 
9538     state->bufferTable[index].surfaceStateEntry[aliasIndex / state->surfaceArraySize].surfaceStateSize = param->size;
9539     state->bufferTable[index].surfaceStateEntry[aliasIndex / state->surfaceArraySize].surfaceStateOffset = param->offset;
9540     state->bufferTable[index].surfaceStateEntry[aliasIndex / state->surfaceArraySize].surfaceStateMOCS = param->mocs;
9541 
9542 finish:
9543     return eStatus;
9544 }
9545 
9546 //*-----------------------------------------------------------------------------
9547 //| Purpose:    Sets mocs value for surface
9548 //| Returns:    Result of the operation.
9549 //*-----------------------------------------------------------------------------
HalCm_SetSurfaceMOCS(PCM_HAL_STATE state,uint32_t handle,uint16_t mocs,uint32_t argKind)9550 MOS_STATUS HalCm_SetSurfaceMOCS(
9551      PCM_HAL_STATE                  state,
9552      uint32_t                       handle,
9553      uint16_t                       mocs,
9554      uint32_t                       argKind)
9555 {
9556     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
9557 
9558     switch (argKind)
9559     {
9560         case CM_ARGUMENT_SURFACEBUFFER:
9561             state->bufferTable[handle].memObjCtl = mocs;
9562             if (state->advExecutor)
9563             {
9564                 state->advExecutor->SetBufferMemoryObjectControl(state->bufferTable[handle].surfStateMgr, mocs);
9565             }
9566             break;
9567         case CM_ARGUMENT_SURFACE2D:
9568         case CM_ARGUMENT_SURFACE2D_SAMPLER:
9569         case CM_ARGUMENT_SURFACE_SAMPLER8X8_AVS:
9570         case CM_ARGUMENT_SURFACE_SAMPLER8X8_VA:
9571             state->umdSurf2DTable[handle].memObjCtl = mocs;
9572             if (state->advExecutor)
9573             {
9574                 state->advExecutor->Set2Dor3DMemoryObjectControl(state->umdSurf2DTable[handle].surfStateMgr, mocs);
9575             }
9576             break;
9577         case CM_ARGUMENT_SURFACE2D_UP:
9578         case CM_ARGUMENT_SURFACE2DUP_SAMPLER:
9579             state->surf2DUPTable[handle].memObjCtl = mocs;
9580             if (state->advExecutor)
9581             {
9582                 state->advExecutor->Set2Dor3DMemoryObjectControl(state->surf2DUPTable[handle].surfStateMgr, mocs);
9583             }
9584             break;
9585         case CM_ARGUMENT_SURFACE3D:
9586             state->surf3DTable[handle].memObjCtl = mocs;
9587             if (state->advExecutor)
9588             {
9589                 state->advExecutor->Set2Dor3DMemoryObjectControl(state->surf3DTable[handle].surfStateMgr, mocs);
9590             }
9591             break;
9592         default:
9593             eStatus = MOS_STATUS_INVALID_PARAMETER;
9594             CM_ASSERTMESSAGE("Invalid argument type in MOCS settings");
9595             goto finish;
9596     }
9597 
9598 finish:
9599     return eStatus;
9600 }
9601 
9602 //*-----------------------------------------------------------------------------
9603 //| Purpose:    Allocate surface 2D
9604 //| Returns:    Result of the operation.
9605 //*-----------------------------------------------------------------------------
HalCm_AllocateSurface2D(PCM_HAL_STATE state,PCM_HAL_SURFACE2D_PARAM param)9606 MOS_STATUS HalCm_AllocateSurface2D(
9607     PCM_HAL_STATE                state,                                             // [in]  Pointer to CM State
9608     PCM_HAL_SURFACE2D_PARAM      param)                                             // [in]  Pointer to surface 2D Param
9609 {
9610     MOS_STATUS              eStatus;
9611     PMOS_INTERFACE          osInterface;
9612     PCM_HAL_SURFACE2D_ENTRY entry = nullptr;
9613     MOS_ALLOC_GFXRES_PARAMS allocParams;
9614     uint32_t                i;
9615 
9616     //-----------------------------------------------
9617     CM_ASSERT(param->width > 0);
9618     //-----------------------------------------------
9619 
9620     eStatus              = MOS_STATUS_SUCCESS;
9621     osInterface    = state->osInterface;
9622 
9623     // Find a free slot
9624     for (i = 0; i < state->cmDeviceParam.max2DSurfaceTableSize; i++)
9625     {
9626         if(Mos_ResourceIsNull(&state->umdSurf2DTable[i].osResource))
9627         {
9628             entry              = &state->umdSurf2DTable[i];
9629             param->handle      = (uint32_t)i;
9630             break;
9631         }
9632     }
9633 
9634     if (!entry)
9635     {
9636         eStatus = MOS_STATUS_INVALID_PARAMETER;
9637         CM_ASSERTMESSAGE("Surface2D table is full");
9638         goto finish;
9639     }
9640 
9641     if(param->isAllocatedbyCmrtUmd)
9642     {
9643         MOS_ZeroMemory(&allocParams, sizeof(MOS_ALLOC_GFXRES_PARAMS));
9644         allocParams.Type          = MOS_GFXRES_2D;
9645         allocParams.dwWidth       = param->width;
9646         allocParams.dwHeight      = param->height;
9647         allocParams.pSystemMemory = param->data;
9648         allocParams.Format        = param->format;
9649         allocParams.TileType      = MOS_TILE_Y;
9650         allocParams.pBufName      = "CmSurface2D";
9651 
9652         CM_CHK_HRESULT_GOTOFINISH_MOSERROR(osInterface->pfnAllocateResource(
9653             osInterface,
9654             &allocParams,
9655             &entry->osResource));
9656 
9657         entry->width                  = param->width;
9658         entry->height                 = param->height;
9659         entry->format                  = param->format;
9660         entry->isAllocatedbyCmrtUmd    = param->isAllocatedbyCmrtUmd;
9661     }
9662     else
9663     {
9664         entry->width  = param->width;
9665         entry->height = param->height;
9666         entry->format  = param->format;
9667         entry->isAllocatedbyCmrtUmd = false;
9668         entry->osResource = *param->mosResource;
9669         HalCm_OsResource_Reference(&entry->osResource);
9670     }
9671     // set default CM MOS usage
9672     entry->memObjCtl = (state->cmHalInterface->GetDefaultMOCS()) << 8;
9673 
9674     if (state->advExecutor)
9675     {
9676         entry->surfStateMgr = state->advExecutor->Create2DStateMgr(&entry->osResource);
9677         state->advExecutor->Set2Dor3DOrigFormat(entry->surfStateMgr, entry->format);
9678         state->advExecutor->Set2Dor3DOrigDimension(entry->surfStateMgr,
9679                                                  entry->width,
9680                                                  entry->height,
9681                                                  0); // no need to change depth in 2D surface
9682     }
9683 
9684     for (int i = 0; i < CM_HAL_GPU_CONTEXT_COUNT; i++)
9685     {
9686         entry->readSyncs[i] = false;
9687     }
9688 
9689 finish:
9690     return eStatus;
9691 }
9692 
9693 //*-----------------------------------------------------------------------------
9694 //| Purpose:    Allocate surface 2D
9695 //| Returns:    Result of the operation.
9696 //*-----------------------------------------------------------------------------
HalCm_UpdateSurface2D(PCM_HAL_STATE state,PCM_HAL_SURFACE2D_PARAM param)9697 MOS_STATUS HalCm_UpdateSurface2D(
9698     PCM_HAL_STATE                state,                                             // [in]  Pointer to CM State
9699     PCM_HAL_SURFACE2D_PARAM      param)                                             // [in]  Pointer to surface 2D Param
9700 {
9701     MOS_STATUS              hr;
9702     PMOS_INTERFACE          osInterface;
9703     PCM_HAL_SURFACE2D_ENTRY entry = nullptr;
9704     MOS_ALLOC_GFXRES_PARAMS allocParams;
9705     uint32_t                i = param->handle;
9706 
9707     //-----------------------------------------------
9708     CM_ASSERT(param->width > 0);
9709     //-----------------------------------------------
9710 
9711     hr              = MOS_STATUS_SUCCESS;
9712     osInterface    = state->osInterface;
9713 
9714     entry = &state->umdSurf2DTable[i];
9715 
9716     HalCm_OsResource_Unreference(&entry->osResource);
9717 
9718     entry->width  = param->width;
9719     entry->height = param->height;
9720     entry->format  = param->format;
9721     entry->isAllocatedbyCmrtUmd = false;
9722     entry->osResource = *param->mosResource;
9723 
9724     HalCm_OsResource_Reference(&entry->osResource);
9725 
9726     if (state->advExecutor)
9727     {
9728         state->advExecutor->Delete2Dor3DStateMgr(entry->surfStateMgr);
9729         entry->surfStateMgr = state->advExecutor->Create2DStateMgr(&entry->osResource);
9730         state->advExecutor->Set2Dor3DOrigFormat(entry->surfStateMgr, entry->format);
9731         state->advExecutor->Set2Dor3DOrigDimension(entry->surfStateMgr,
9732                                                  entry->width,
9733                                                  entry->height,
9734                                                  0); // no need to change depth in 2D surface
9735     }
9736 
9737     for (int i = 0; i < CM_HAL_GPU_CONTEXT_COUNT; i++)
9738     {
9739         entry->readSyncs[i] = false;
9740     }
9741 
9742     return hr;
9743 }
9744 
9745 //*-----------------------------------------------------------------------------
9746 //| Purpose:    Allocate Linear Buffer or BufferUP
9747 //| Returns:    Result of the operation.
9748 //*-----------------------------------------------------------------------------
HalCm_UpdateBuffer(PCM_HAL_STATE state,PCM_HAL_BUFFER_PARAM param)9749 MOS_STATUS HalCm_UpdateBuffer(
9750     PCM_HAL_STATE           state,                                             // [in]  Pointer to CM State
9751     PCM_HAL_BUFFER_PARAM    param)                                             // [in]  Pointer to Buffer Param
9752 {
9753     MOS_STATUS              hr;
9754     PMOS_INTERFACE          osInterface;
9755     PCM_HAL_BUFFER_ENTRY    entry = nullptr;
9756     MOS_ALLOC_GFXRES_PARAMS allocParams;
9757     uint32_t                i = param->handle;
9758     PMOS_RESOURCE           osResource;
9759 
9760     //-----------------------------------------------
9761     CM_ASSERT(param->size > 0);
9762     //-----------------------------------------------
9763 
9764     hr              = MOS_STATUS_SUCCESS;
9765     osInterface    = state->renderHal->pOsInterface;
9766 
9767     entry              = &state->bufferTable[i];
9768 
9769     HalCm_OsResource_Unreference(&entry->osResource);
9770     entry->osResource = *param->mosResource;
9771     HalCm_OsResource_Reference(&entry->osResource);
9772 
9773     entry->size = param->size;
9774     entry->isAllocatedbyCmrtUmd = false;
9775     entry->surfaceStateEntry[0].surfaceStateSize = entry->size;
9776     entry->surfaceStateEntry[0].surfaceStateOffset = 0;
9777     entry->surfaceStateEntry[0].surfaceStateMOCS = 0;
9778 
9779     if (state->advExecutor)
9780     {
9781         state->advExecutor->DeleteBufferStateMgr(entry->surfStateMgr);
9782         entry->surfStateMgr = state->advExecutor->CreateBufferStateMgr(&entry->osResource);
9783         state->advExecutor->SetBufferOrigSize(entry->surfStateMgr, entry->size);
9784     }
9785 
9786     return hr;
9787 }
9788 
9789 //*-----------------------------------------------------------------------------
9790 //| Purpose:    Frees the surface 2D and removes from the table
9791 //| Returns:    Result of the operation.
9792 //*-----------------------------------------------------------------------------
HalCm_FreeSurface2D(PCM_HAL_STATE state,uint32_t handle)9793 MOS_STATUS HalCm_FreeSurface2D(
9794     PCM_HAL_STATE           state,                                             // [in]  Pointer to CM State
9795     uint32_t                handle)                                           // [in]  Pointer to Buffer Param
9796 {
9797     MOS_STATUS                 eStatus;
9798     PCM_HAL_SURFACE2D_ENTRY    entry;
9799     PMOS_INTERFACE             osInterface;
9800     MOS_GFXRES_FREE_FLAGS      resFreeFlags = {0};
9801 
9802     resFreeFlags.AssumeNotInUse = 1;
9803     eStatus        = MOS_STATUS_SUCCESS;
9804     osInterface    = state->osInterface;
9805 
9806     // Get the Buffer Entry
9807     CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_GetSurface2DEntry(state, handle, &entry));
9808     if (state->advExecutor)
9809     {
9810         state->advExecutor->Delete2Dor3DStateMgr(entry->surfStateMgr);
9811     }
9812 
9813     if(entry->isAllocatedbyCmrtUmd)
9814     {
9815         osInterface->pfnFreeResourceWithFlag(osInterface, &entry->osResource, resFreeFlags.Value);
9816     }
9817     else
9818     {
9819         HalCm_OsResource_Unreference(&entry->osResource);
9820     }
9821 
9822     MOS_ZeroMemory(&entry->osResource, sizeof(entry->osResource));
9823 
9824     entry->width = 0;
9825     entry->height = 0;
9826     entry->frameType = CM_FRAME;
9827 
9828     for (int i = 0; i < CM_HAL_GPU_CONTEXT_COUNT; i++)
9829     {
9830         entry->readSyncs[i] = false;
9831     }
9832 
9833 finish:
9834     return eStatus;
9835 }
9836 
9837 //*-----------------------------------------------------------------------------
9838 //| Purpose:    Allocate 3D resource
9839 //| Returns:    Result of the operation.
9840 //*-----------------------------------------------------------------------------
HalCm_AllocateSurface3D(CM_HAL_STATE * state,CM_HAL_3DRESOURCE_PARAM * param)9841 MOS_STATUS HalCm_AllocateSurface3D(CM_HAL_STATE *state, // [in]  Pointer to CM State
9842                                    CM_HAL_3DRESOURCE_PARAM *param) // [in]  Pointer to Buffer Param)
9843 {
9844     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
9845 
9846     //-----------------------------------------------
9847     CM_ASSERT(state);
9848     CM_ASSERT(param->depth  > 1);
9849     CM_ASSERT(param->width  > 0);
9850     CM_ASSERT(param->height > 0);
9851     //-----------------------------------------------
9852 
9853     MOS_INTERFACE *osInterface = state->osInterface;
9854     // Finds a free slot.
9855     CM_HAL_3DRESOURCE_ENTRY *entry = nullptr;
9856     for (uint32_t i = 0; i < state->cmDeviceParam.max3DSurfaceTableSize; i++)
9857     {
9858         if (Mos_ResourceIsNull(&state->surf3DTable[i].osResource))
9859         {
9860             entry = &state->surf3DTable[i];
9861             param->handle = (uint32_t)i;
9862             break;
9863         }
9864     }
9865     if (!entry)
9866     {
9867         eStatus = MOS_STATUS_INVALID_PARAMETER;
9868         CM_ASSERTMESSAGE("3D surface table is full");
9869         return eStatus;
9870     }
9871     CM_CHK_NULL_GOTOFINISH_MOSERROR(osInterface);
9872     osInterface->pfnResetResource(&entry->osResource);  // Resets the Resource
9873 
9874     MOS_ALLOC_GFXRES_PARAMS alloc_params;
9875     MOS_ZeroMemory(&alloc_params, sizeof(alloc_params));
9876     alloc_params.Type          = MOS_GFXRES_VOLUME;
9877     alloc_params.TileType      = MOS_TILE_Y;
9878     alloc_params.dwWidth       = param->width;
9879     alloc_params.dwHeight      = param->height;
9880     alloc_params.dwDepth       = param->depth;
9881     alloc_params.pSystemMemory = param->data;
9882     alloc_params.Format        = param->format;
9883     alloc_params.pBufName      = "CmSurface3D";
9884 
9885     CM_CHK_HRESULT_GOTOFINISH_MOSERROR(osInterface->pfnAllocateResource(
9886         osInterface,
9887         &alloc_params,
9888         &entry->osResource));
9889     entry->width = param->width;
9890     entry->height = param->height;
9891     entry->depth = param->depth;
9892     entry->format = param->format;
9893 
9894     if (state->advExecutor)
9895     {
9896         entry->surfStateMgr = state->advExecutor->Create3DStateMgr(&entry->osResource);
9897         state->advExecutor->Set2Dor3DOrigDimension(entry->surfStateMgr,
9898                                                  entry->width,
9899                                                  entry->height,
9900                                                  entry->depth);
9901     }
9902 finish:
9903     return eStatus;
9904 }
9905 
9906 //*-----------------------------------------------------------------------------
9907 //| Purpose:    Frees the resource and removes from the table
9908 //| Returns:    Result of the operation.
9909 //*-----------------------------------------------------------------------------
HalCm_Free3DResource(PCM_HAL_STATE state,uint32_t handle)9910 MOS_STATUS HalCm_Free3DResource(
9911     PCM_HAL_STATE           state,                                             // [in]  Pointer to CM State
9912     uint32_t                handle)                                           // [in]  Pointer to Buffer Param
9913 {
9914     MOS_STATUS               eStatus;
9915     PCM_HAL_3DRESOURCE_ENTRY entry;
9916     PMOS_INTERFACE           osInterface;
9917     MOS_GFXRES_FREE_FLAGS    resFreeFlags = {0};
9918 
9919     resFreeFlags.AssumeNotInUse = 1;
9920     eStatus        = MOS_STATUS_SUCCESS;
9921     osInterface    = state->osInterface;
9922 
9923     // Get the Buffer Entry
9924     CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_Get3DResourceEntry(state, handle, &entry));
9925     if (state->advExecutor)
9926     {
9927         state->advExecutor->Delete2Dor3DStateMgr(entry->surfStateMgr);
9928     }
9929 
9930     osInterface->pfnFreeResourceWithFlag(osInterface, &entry->osResource, resFreeFlags.Value);
9931 
9932     osInterface->pfnResetResourceAllocationIndex(osInterface, &entry->osResource);
9933 
9934 finish:
9935     return eStatus;
9936 }
9937 
9938 //*-----------------------------------------------------------------------------
9939 //| Purpose:    Lock the resource and return
9940 //| Returns:    Result of the operation.
9941 //*-----------------------------------------------------------------------------
HalCm_Lock3DResource(PCM_HAL_STATE state,PCM_HAL_3DRESOURCE_PARAM param)9942 MOS_STATUS HalCm_Lock3DResource(
9943     PCM_HAL_STATE               state,                                         // [in]  Pointer to CM State
9944     PCM_HAL_3DRESOURCE_PARAM    param)                                         // [in]  Pointer to 3D Param
9945 {
9946     MOS_STATUS                  eStatus = MOS_STATUS_SUCCESS;
9947     PCM_HAL_3DRESOURCE_ENTRY    entry;
9948     MOS_LOCK_PARAMS             lockFlags;
9949     RENDERHAL_GET_SURFACE_INFO  info;
9950     PMOS_INTERFACE              osInterface = nullptr;
9951     MOS_SURFACE                 surface;
9952 
9953     // Get the 3D Resource Entry
9954     CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_Get3DResourceEntry(state, param->handle, &entry));
9955     if ((param->lockFlag != CM_HAL_LOCKFLAG_READONLY) && (param->lockFlag != CM_HAL_LOCKFLAG_WRITEONLY) )
9956     {
9957         CM_ASSERTMESSAGE("Invalid lock flag!");
9958         eStatus = MOS_STATUS_UNKNOWN;
9959         goto finish;
9960     }
9961 
9962     // Get resource information
9963     MOS_ZeroMemory(&surface, sizeof(surface));
9964     surface.OsResource = entry->osResource;
9965     surface.Format     = Format_Invalid;
9966     osInterface       = state->osInterface;
9967 
9968     MOS_ZeroMemory(&info, sizeof(RENDERHAL_GET_SURFACE_INFO));
9969 
9970     CM_CHK_MOSSTATUS_GOTOFINISH(RenderHal_GetSurfaceInfo(
9971               osInterface,
9972               &info,
9973               &surface));
9974 
9975     param->pitch  = surface.dwPitch;
9976     param->qpitch = surface.dwQPitch;
9977     param->qpitchEnabled = state->cmHalInterface->IsSurf3DQpitchSupportedbyHw();
9978 
9979     // Lock the resource
9980     MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
9981 
9982     if (param->lockFlag == CM_HAL_LOCKFLAG_READONLY)
9983     {
9984         lockFlags.ReadOnly = true;
9985     }
9986     else
9987     {
9988         lockFlags.WriteOnly = true;
9989     }
9990 
9991     lockFlags.ForceCached = true;
9992     param->data = osInterface->pfnLockResource(
9993                     osInterface,
9994                     &entry->osResource,
9995                     &lockFlags);
9996     CM_CHK_NULL_GOTOFINISH_MOSERROR(param->data);
9997 
9998 finish:
9999     return eStatus;
10000 }
10001 
10002 //*-----------------------------------------------------------------------------
10003 //| Purpose:    Unlock the resource and return
10004 //| Returns:    Result of the operation.
10005 //*-----------------------------------------------------------------------------
HalCm_Unlock3DResource(PCM_HAL_STATE state,PCM_HAL_3DRESOURCE_PARAM param)10006 MOS_STATUS HalCm_Unlock3DResource(
10007     PCM_HAL_STATE               state,                                         // [in]  Pointer to CM State
10008     PCM_HAL_3DRESOURCE_PARAM    param)                                         // [in]  Pointer to 3D Param
10009 {
10010     MOS_STATUS                  eStatus;
10011     PCM_HAL_3DRESOURCE_ENTRY    entry;
10012     PMOS_INTERFACE              osInterface;
10013 
10014     eStatus        = MOS_STATUS_SUCCESS;
10015     osInterface    = state->osInterface;
10016 
10017     // Get the 3D Resource Entry
10018     CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_Get3DResourceEntry(state, param->handle, &entry));
10019 
10020     // Lock the resource
10021     CM_CHK_HRESULT_GOTOFINISH_MOSERROR(osInterface->pfnUnlockResource(osInterface, &entry->osResource));
10022 
10023 finish:
10024     return eStatus;
10025 }
10026 
HalCm_SetCompressionMode(PCM_HAL_STATE state,CM_HAL_SURFACE2D_COMPRESSIOM_PARAM mmcParam)10027 MOS_STATUS HalCm_SetCompressionMode(
10028     PCM_HAL_STATE               state,
10029     CM_HAL_SURFACE2D_COMPRESSIOM_PARAM  mmcParam)
10030 {
10031     MOS_STATUS              eStatus = MOS_STATUS_SUCCESS;
10032     PMOS_INTERFACE          osInterface = state->osInterface;
10033     PCM_HAL_SURFACE2D_ENTRY     entry;
10034 
10035     // Get the 2D Resource Entry
10036     CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_GetSurface2DEntry(state, mmcParam.handle, &entry));
10037 
10038     //set compression bit passed down
10039     CM_CHK_MOSSTATUS_GOTOFINISH(osInterface->pfnSetMemoryCompressionMode(osInterface, &(entry->osResource), (MOS_MEMCOMP_STATE)mmcParam.mmcMode));
10040 
10041 finish:
10042     return eStatus;
10043 }
10044 
HalCm_SetL3Cache(const L3ConfigRegisterValues * l3Values,PCmHalL3Settings cmHalL3Cache)10045 MOS_STATUS HalCm_SetL3Cache(
10046     const L3ConfigRegisterValues            *l3Values,
10047     PCmHalL3Settings                      cmHalL3Cache )
10048 {
10049     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
10050 
10051     // in legacy platforms, we map:
10052     // ConfigRegister0->SqcReg1
10053     // ConfigRegister1->CntlReg2
10054     // ConfigRegister2->CntlReg3
10055     // ConfigRegister3->CntlReg
10056     CM_CHK_NULL_GOTOFINISH_MOSERROR( cmHalL3Cache );
10057     CM_CHK_NULL_GOTOFINISH_MOSERROR(l3Values);
10058 
10059     cmHalL3Cache->overrideSettings    =
10060                 (l3Values->config_register0  || l3Values->config_register1 ||
10061                  l3Values->config_register2 || l3Values->config_register3 );
10062     cmHalL3Cache->cntlRegOverride    = (l3Values->config_register3 != 0);
10063     cmHalL3Cache->cntlReg2Override   = (l3Values->config_register1 != 0);
10064     cmHalL3Cache->cntlReg3Override   = (l3Values->config_register2 != 0);
10065     cmHalL3Cache->sqcReg1Override    = (l3Values->config_register0 != 0);
10066     cmHalL3Cache->cntlReg             = l3Values->config_register3;
10067     cmHalL3Cache->cntlReg2            = l3Values->config_register1;
10068     cmHalL3Cache->cntlReg3            = l3Values->config_register2;
10069     cmHalL3Cache->sqcReg1             = l3Values->config_register0;
10070 
10071 finish:
10072     return MOS_STATUS_SUCCESS;
10073 }
10074 
10075 //*-----------------------------------------------------------------------------
10076 //| Purpose:    Set Cap values
10077 //| Returns:    Result of the operation.
10078 //*-----------------------------------------------------------------------------
HalCm_SetCaps(PCM_HAL_STATE state,PCM_HAL_MAX_SET_CAPS_PARAM setCapsParam)10079 MOS_STATUS HalCm_SetCaps(
10080     PCM_HAL_STATE              state,
10081     PCM_HAL_MAX_SET_CAPS_PARAM setCapsParam)
10082 {
10083     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
10084 
10085     CM_CHK_NULL_GOTOFINISH_MOSERROR(state);
10086     CM_CHK_NULL_GOTOFINISH_MOSERROR(setCapsParam);
10087     CM_CHK_NULL_GOTOFINISH_MOSERROR(state->renderHal);
10088     CM_CHK_NULL_GOTOFINISH_MOSERROR(state->renderHal->pHwCaps)
10089 
10090     switch (setCapsParam->type)
10091     {
10092     case CM_SET_MAX_HW_THREADS:
10093         if( setCapsParam->maxValue <= 0 ||
10094             setCapsParam->maxValue > state->renderHal->pHwCaps->dwMaxThreads )
10095         {
10096             eStatus = MOS_STATUS_UNKNOWN;
10097             goto finish;
10098         }
10099         else
10100         {
10101             state->maxHWThreadValues.apiValue = (setCapsParam->maxValue == 0) ? 0:
10102                     MOS_MAX(setCapsParam->maxValue, state->cmHalInterface->GetSmallestMaxThreadNum());
10103         }
10104         break;
10105 
10106     case CM_SET_HW_L3_CONFIG:
10107         eStatus = state->cmHalInterface->SetL3CacheConfig( &setCapsParam->l3CacheValues,
10108                     &state->l3Settings );
10109         break;
10110 
10111     default:
10112         eStatus = MOS_STATUS_UNKNOWN;
10113         goto finish;
10114     }
10115 
10116 finish:
10117     return eStatus;
10118 }
10119 
10120 //*-----------------------------------------------------------------------------
10121 //| Purpose:    Task sets the power option which will be used by this task
10122 //| Returns:    Result of the operation.
10123 //*-----------------------------------------------------------------------------
HalCm_SetPowerOption(PCM_HAL_STATE state,PCM_POWER_OPTION powerOption)10124 MOS_STATUS HalCm_SetPowerOption(
10125     PCM_HAL_STATE               state,
10126     PCM_POWER_OPTION            powerOption )
10127 {
10128     if (state->cmHalInterface->IsOverridePowerOptionPerGpuContext())
10129     {
10130         CM_NORMALMESSAGE("WARNING: Deprecated function due to per context SSEU overriding is enabled.\n");
10131         return MOS_STATUS_SUCCESS;
10132     }
10133 
10134     MOS_SecureMemcpy( &state->powerOption, sizeof( state->powerOption ), powerOption, sizeof( state->powerOption ) );
10135     return MOS_STATUS_SUCCESS;
10136 }
10137 
10138 //*-----------------------------------------------------------------------------
10139 // Purpose: Get the time in ns from QueryPerformanceCounter
10140 // Returns: Result of the operation
10141 //*-----------------------------------------------------------------------------
HalCm_GetGlobalTime(LARGE_INTEGER * globalTime)10142 MOS_STATUS HalCm_GetGlobalTime(LARGE_INTEGER *globalTime)
10143 {
10144     if(globalTime == nullptr)
10145     {
10146         return MOS_STATUS_NULL_POINTER;
10147     }
10148 
10149     if (MosUtilities::MosQueryPerformanceCounter((uint64_t *)&(globalTime->QuadPart)) == false)
10150     {
10151         return MOS_STATUS_UNKNOWN;
10152     }
10153 
10154     return MOS_STATUS_SUCCESS;
10155 }
10156 
10157 //*-----------------------------------------------------------------------------
10158 // Purpose: Convert time from nanosecond to QPC time
10159 // Returns: Result of the operation
10160 //*-----------------------------------------------------------------------------
HalCm_ConvertToQPCTime(uint64_t nanoseconds,LARGE_INTEGER * qpcTime)10161 MOS_STATUS HalCm_ConvertToQPCTime(uint64_t nanoseconds, LARGE_INTEGER *qpcTime)
10162 {
10163     LARGE_INTEGER     perfFreq;
10164 
10165     if(qpcTime == nullptr)
10166     {
10167         return MOS_STATUS_NULL_POINTER;
10168     }
10169 
10170     if (MosUtilities::MosQueryPerformanceFrequency((uint64_t*)&perfFreq.QuadPart) == false)
10171     {
10172         return MOS_STATUS_UNKNOWN;
10173     }
10174 
10175     qpcTime->QuadPart = (uint64_t)(nanoseconds * perfFreq.QuadPart / 1000000000.0);
10176 
10177     return MOS_STATUS_SUCCESS;
10178 }
10179 
10180 //------------------------------------------------------------------------------
10181 //| Purpose: Halcm updates power state to hw state
10182 //| Returns:
10183 //------------------------------------------------------------------------------
HalCm_UpdatePowerOption(PCM_HAL_STATE state,PCM_POWER_OPTION powerOption)10184 MOS_STATUS HalCm_UpdatePowerOption(
10185     PCM_HAL_STATE               state,
10186     PCM_POWER_OPTION            powerOption )
10187 {
10188     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
10189 
10190     if (state->cmHalInterface->IsOverridePowerOptionPerGpuContext())
10191     {
10192         CM_NORMALMESSAGE("WARNING: Deprecated function due to per context SSEU overriding is enabled.\n");
10193         return MOS_STATUS_SUCCESS;
10194     }
10195 
10196     PRENDERHAL_INTERFACE renderHal = state->renderHal;
10197 
10198     RENDERHAL_POWEROPTION renderPowerOption;
10199     renderPowerOption.nSlice     = (uint8_t)powerOption->nSlice;
10200     renderPowerOption.nSubSlice  = (uint8_t)powerOption->nSubSlice;
10201     renderPowerOption.nEU        = (uint8_t)powerOption->nEU;
10202 
10203     // option set in CM create device to use slice shutdown for life of CM device ( override previous value if necessary )
10204     if ( state->requestSingleSlice == true )
10205     {
10206         renderPowerOption.nSlice = 1;
10207     }
10208 
10209     renderHal->pfnSetPowerOptionMode( renderHal, &renderPowerOption );
10210 
10211     return eStatus;
10212 }
10213 
HalCm_InitPerfTagIndexMap(PCM_HAL_STATE cmState)10214 MOS_STATUS HalCm_InitPerfTagIndexMap(PCM_HAL_STATE cmState)
10215 {
10216     using namespace std;
10217     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
10218     CM_ASSERT(cmState);
10219     for (int i = 0; i < MAX_COMBINE_NUM_IN_PERFTAG; i++)
10220     {
10221         cmState->currentPerfTagIndex[i] = 1;
10222 #if MOS_MESSAGES_ENABLED
10223         cmState->perfTagIndexMap[i] = MosUtilities::MosNewUtil<map<string, int> >(__FUNCTION__, __FILE__, __LINE__);
10224 #else
10225         cmState->perfTagIndexMap[i] = MosUtilities::MosNewUtil<map<string, int> >();
10226 #endif
10227 
10228         CM_CHK_NULL_GOTOFINISH_MOSERROR(cmState->perfTagIndexMap[i]);
10229     }
10230 
10231     cmState->perfTagIndexMap[0]->insert(pair<string, int>("surfaceCopy_read_NV12_32x32", GPUCOPY_READ_PERFTAG_INDEX));
10232     cmState->perfTagIndexMap[0]->insert(pair<string, int>("surfaceCopy_read_NV12_aligned_32x32", GPUCOPY_READ_PERFTAG_INDEX));
10233     cmState->perfTagIndexMap[0]->insert(pair<string, int>("surfaceCopy_read_32x32", GPUCOPY_READ_PERFTAG_INDEX));
10234     cmState->perfTagIndexMap[0]->insert(pair<string, int>("surfaceCopy_read_aligned_32x32", GPUCOPY_READ_PERFTAG_INDEX));
10235 
10236     cmState->perfTagIndexMap[0]->insert(pair<string, int>("surfaceCopy_write_NV12_32x32", GPUCOPY_WRITE_PERFTAG_INDEX));
10237     cmState->perfTagIndexMap[0]->insert(pair<string, int>("surfaceCopy_write_32x32", GPUCOPY_WRITE_PERFTAG_INDEX));
10238 
10239     cmState->perfTagIndexMap[0]->insert(pair<string, int>("SurfaceCopy_2DTo2D_NV12_32x32", GPUCOPY_G2G_PERFTAG_INDEX));
10240     cmState->perfTagIndexMap[0]->insert(pair<string, int>("SurfaceCopy_2DTo2D_32x32", GPUCOPY_G2G_PERFTAG_INDEX));
10241 
10242     cmState->perfTagIndexMap[0]->insert(pair<string, int>("SurfaceCopy_BufferToBuffer_4k", GPUCOPY_C2C_PERFTAG_INDEX));
10243     cmState->perfTagIndexMap[0]->insert(pair<string, int>("SurfaceCopy_BufferToBuffer_4k", GPUCOPY_C2C_PERFTAG_INDEX));
10244 
10245     cmState->perfTagIndexMap[0]->insert(pair<string, int>("surfaceCopy_set_NV12", GPUINIT_PERFTAG_INDEX));
10246     cmState->perfTagIndexMap[0]->insert(pair<string, int>("surfaceCopy_set", GPUINIT_PERFTAG_INDEX));
10247 
10248 finish:
10249     return eStatus;
10250 }
10251 
HalCm_DeleteFromStateBufferList(PCM_HAL_STATE state,void * kernelPtr)10252 MOS_STATUS HalCm_DeleteFromStateBufferList(
10253     PCM_HAL_STATE               state,
10254     void                        *kernelPtr )
10255 {
10256     MOS_STATUS result = MOS_STATUS_SUCCESS;
10257 
10258     state->state_buffer_list_ptr->erase( kernelPtr );
10259 
10260     return result;
10261 }
10262 
HalCm_GetMediaStatePtrForKernel(PCM_HAL_STATE state,void * kernelPtr)10263 PRENDERHAL_MEDIA_STATE HalCm_GetMediaStatePtrForKernel(
10264     PCM_HAL_STATE               state,
10265     void                        *kernelPtr )
10266 {
10267     if ( state->state_buffer_list_ptr->find( kernelPtr ) != state->state_buffer_list_ptr->end() )
10268     {
10269         return ( *state->state_buffer_list_ptr )[ kernelPtr ].mediaStatePtr;
10270     }
10271     else
10272     {
10273         return nullptr;
10274     }
10275 }
10276 
HalCm_GetStateBufferVAPtrForSurfaceIndex(PCM_HAL_STATE state,uint32_t surfIndex)10277 uint64_t HalCm_GetStateBufferVAPtrForSurfaceIndex(
10278     PCM_HAL_STATE               state,
10279     uint32_t                    surfIndex )
10280 {
10281     for ( auto listItem = state->state_buffer_list_ptr->begin(); listItem != state->state_buffer_list_ptr->end(); listItem++ )
10282     {
10283         if ( listItem->second.stateBufferIndex == surfIndex )
10284         {
10285             return listItem->second.stateBufferVaPtr;
10286         }
10287     }
10288     return 0;
10289 }
10290 
HalCm_GetMediaStatePtrForSurfaceIndex(PCM_HAL_STATE state,uint32_t surfIndex)10291 PRENDERHAL_MEDIA_STATE HalCm_GetMediaStatePtrForSurfaceIndex(
10292     PCM_HAL_STATE               state,
10293     uint32_t                    surfIndex )
10294 {
10295     for ( auto listItem = state->state_buffer_list_ptr->begin(); listItem != state->state_buffer_list_ptr->end(); listItem++ )
10296     {
10297         if ( listItem->second.stateBufferIndex == surfIndex )
10298         {
10299             return listItem->second.mediaStatePtr;
10300         }
10301     }
10302     return nullptr;
10303 }
10304 
HalCm_GetStateBufferVAPtrForMediaStatePtr(PCM_HAL_STATE state,PRENDERHAL_MEDIA_STATE mediaStatePtr)10305 uint64_t HalCm_GetStateBufferVAPtrForMediaStatePtr(
10306     PCM_HAL_STATE               state,
10307     PRENDERHAL_MEDIA_STATE      mediaStatePtr )
10308 {
10309     for ( auto listItem = state->state_buffer_list_ptr->begin(); listItem != state->state_buffer_list_ptr->end(); listItem++ )
10310     {
10311         if ( listItem->second.mediaStatePtr == mediaStatePtr )
10312         {
10313             return listItem->second.stateBufferVaPtr;
10314         }
10315     }
10316     return 0;
10317 }
10318 
HalCm_GetStateBufferSizeForKernel(PCM_HAL_STATE state,void * kernelPtr)10319 uint32_t HalCm_GetStateBufferSizeForKernel(
10320     PCM_HAL_STATE               state,
10321     void                        *kernelPtr )
10322 {
10323     if ( state->state_buffer_list_ptr->find( kernelPtr ) != state->state_buffer_list_ptr->end() )
10324     {
10325         return ( *state->state_buffer_list_ptr )[ kernelPtr ].stateBufferSize;
10326     }
10327     else
10328     {
10329         return 0;
10330     }
10331 }
10332 
HalCm_GetStateBufferTypeForKernel(PCM_HAL_STATE state,void * kernelPtr)10333 CM_STATE_BUFFER_TYPE HalCm_GetStateBufferTypeForKernel(
10334     PCM_HAL_STATE               state,
10335     void                        *kernelPtr )
10336 {
10337     if ( state->state_buffer_list_ptr->find( kernelPtr ) != state->state_buffer_list_ptr->end() )
10338     {
10339         return ( *state->state_buffer_list_ptr )[ kernelPtr ].stateBufferType;
10340     }
10341     else
10342     {
10343         return CM_STATE_BUFFER_NONE;
10344     }
10345 }
10346 
LoadUserFeatures(CM_HAL_STATE * halState,MOS_GPUCTX_CREATOPTIONS * createOptions)10347 static void LoadUserFeatures(CM_HAL_STATE *halState,
10348                              MOS_GPUCTX_CREATOPTIONS *createOptions)
10349 {
10350 #if (_DEBUG || _RELEASE_INTERNAL)
10351     MOS_USER_FEATURE_VALUE_DATA  user_feature_data;
10352     MOS_ZeroMemory(&user_feature_data, sizeof(user_feature_data));
10353     MOS_STATUS result
10354             = MOS_UserFeature_ReadValue_ID(
10355                 nullptr, __MEDIA_USER_FEATURE_VALUE_MDF_FORCE_RAMODE,
10356                 &user_feature_data, halState->osInterface->pOsContext);
10357     if (MOS_STATUS_SUCCESS == result && user_feature_data.i32Data == 1)
10358     {
10359         createOptions->RAMode = 1;
10360     }
10361 
10362     MOS_USER_FEATURE_VALUE_WRITE_DATA userFeatureWriteData;
10363     userFeatureWriteData = __NULL_USER_FEATURE_VALUE_WRITE_DATA__;
10364     userFeatureWriteData.Value.i32Data = createOptions->RAMode;
10365     userFeatureWriteData.ValueID       = __MEDIA_USER_FEATURE_VALUE_MDF_FORCE_RAMODE;
10366     MOS_UserFeature_WriteValues_ID(nullptr, &userFeatureWriteData, 1, halState->osInterface->pOsContext);
10367 
10368 #endif
10369     return;
10370 }
10371 
HalCm_CreateGPUContext(PCM_HAL_STATE state,MOS_GPU_CONTEXT gpuContext,MOS_GPU_NODE gpuNode,PMOS_GPUCTX_CREATOPTIONS pMosGpuContextCreateOption)10372 MOS_STATUS HalCm_CreateGPUContext(
10373     PCM_HAL_STATE            state,
10374     MOS_GPU_CONTEXT          gpuContext,
10375     MOS_GPU_NODE             gpuNode,
10376     PMOS_GPUCTX_CREATOPTIONS pMosGpuContextCreateOption)
10377 {
10378     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
10379 
10380     LoadUserFeatures(state, pMosGpuContextCreateOption);
10381 
10382     // Create Compute Context on Compute Node
10383     CM_CHK_HRESULT_GOTOFINISH_MOSERROR(state->osInterface->pfnCreateGpuContext(
10384         state->osInterface,
10385         gpuContext,
10386         gpuNode,
10387         pMosGpuContextCreateOption));
10388 
10389     // Register Compute Context with the Batch Buffer completion event
10390     CM_CHK_HRESULT_GOTOFINISH_MOSERROR(state->osInterface->pfnRegisterBBCompleteNotifyEvent(
10391         state->osInterface,
10392         gpuContext));
10393 
10394 finish:
10395     return eStatus;
10396 }
10397 
10398 GPU_CONTEXT_HANDLE
HalCm_CreateGpuComputeContext(CM_HAL_STATE * state,MOS_GPUCTX_CREATOPTIONS * createOptions)10399 HalCm_CreateGpuComputeContext(CM_HAL_STATE *state,
10400                               MOS_GPUCTX_CREATOPTIONS *createOptions)
10401 {
10402     LoadUserFeatures(state, createOptions);
10403 
10404     GPU_CONTEXT_HANDLE context_handle
10405             = state->osInterface->pfnCreateGpuComputeContext(
10406                 state->osInterface, MOS_GPU_CONTEXT_CM_COMPUTE, createOptions);
10407     if (MOS_GPU_CONTEXT_INVALID_HANDLE != context_handle)
10408     {
10409         state->osInterface->pfnRegisterBBCompleteNotifyEvent(
10410             state->osInterface, MOS_GPU_CONTEXT_CM_COMPUTE);
10411     }
10412     return context_handle;
10413 }
10414 
HalCm_SetGpuContext(CM_HAL_STATE * halState,MOS_GPU_CONTEXT contextName,uint32_t streamIndex,GPU_CONTEXT_HANDLE contextHandle)10415 uint32_t HalCm_SetGpuContext(CM_HAL_STATE *halState,
10416                              MOS_GPU_CONTEXT contextName,
10417                              uint32_t streamIndex,
10418                              GPU_CONTEXT_HANDLE contextHandle)
10419 {
10420     uint32_t old_stream_idx = halState->osInterface->streamIndex;
10421     halState->osInterface->streamIndex = streamIndex;
10422     MOS_STATUS result = MOS_STATUS_SUCCESS;
10423 
10424     if (MOS_GPU_CONTEXT_INVALID_HANDLE == contextHandle)
10425     {
10426         result = halState->osInterface->pfnSetGpuContext(halState->osInterface,
10427                                                          contextName);
10428     }
10429     else
10430     {
10431         result = halState->osInterface->pfnSetGpuContextFromHandle(
10432             halState->osInterface, contextName, contextHandle);
10433     }
10434 
10435     if (MOS_STATUS_SUCCESS != result)
10436     {
10437         halState->osInterface->streamIndex = old_stream_idx;
10438         return INVALID_STREAM_INDEX;
10439     }
10440     return old_stream_idx;
10441 }
10442 
HalCm_SelectSyncBuffer(CM_HAL_STATE * halState,uint32_t bufferIdx)10443 MOS_STATUS HalCm_SelectSyncBuffer(CM_HAL_STATE *halState, uint32_t bufferIdx)
10444 {
10445     if (bufferIdx >= halState->cmDeviceParam.maxBufferTableSize)
10446     {
10447         halState->syncBuffer = nullptr;
10448         return MOS_STATUS_SUCCESS;
10449     }
10450     CM_HAL_BUFFER_ENTRY *entry = halState->bufferTable + bufferIdx;
10451     halState->syncBuffer = &entry->osResource;
10452     MOS_INTERFACE *os_interface = halState->osInterface;
10453     return os_interface->pfnRegisterResource(os_interface, halState->syncBuffer,
10454                                              true, true);
10455 }
10456 
10457 //*-----------------------------------------------------------------------------
10458 //| Purpose:    Creates instance of HAL CM State
10459 //| Returns:    Result of the operation
10460 //| Note:       Caller must call pfnAllocate to allocate all HalCm/Mhw states and objects.
10461 //|             Caller MUST call HalCm_Destroy to destroy the instance
10462 //*-----------------------------------------------------------------------------
HalCm_Create(PMOS_CONTEXT osDriverContext,PCM_HAL_CREATE_PARAM param,PCM_HAL_STATE * cmState)10463 MOS_STATUS HalCm_Create(
10464     PMOS_CONTEXT            osDriverContext,   // [in] OS Driver Context
10465     PCM_HAL_CREATE_PARAM     param,             // [in] Create Param
10466     PCM_HAL_STATE           *cmState)          // [out] double pointer to CM State
10467 {
10468     MOS_STATUS          eStatus;
10469     PCM_HAL_STATE       state = nullptr;
10470     uint32_t            numCmdBuffers = 0;
10471     MhwInterfaces       *mhwInterfaces = nullptr;
10472     MhwInterfaces::CreateParams params;
10473     MOS_GPUCTX_CREATOPTIONS createOption;
10474 
10475     //-----------------------------------------
10476     CM_ASSERT(osDriverContext);
10477     CM_ASSERT(param);
10478     CM_ASSERT(cmState);
10479     //-----------------------------------------
10480 
10481     eStatus  = MOS_STATUS_SUCCESS;
10482 
10483     // Allocate State structure
10484     state = (PCM_HAL_STATE)MOS_AllocAndZeroMemory(sizeof(CM_HAL_STATE));
10485     CM_CHK_NULL_GOTOFINISH_MOSERROR(state);
10486 
10487     // Allocate/Initialize OS Interface
10488     state->osInterface = (PMOS_INTERFACE)
10489                                 MOS_AllocAndZeroMemory(sizeof(MOS_INTERFACE));
10490     CM_CHK_NULL_GOTOFINISH_MOSERROR(state->osInterface);
10491     state->osInterface->bDeallocateOnExit = true;
10492     CM_CHK_HRESULT_GOTOFINISH_MOSERROR(Mos_InitInterface(state->osInterface, osDriverContext, COMPONENT_CM));
10493 #if (_RELEASE_INTERNAL || _DEBUG)
10494 #if defined(CM_DIRECT_GUC_SUPPORT)
10495     state->osInterface->m_pWorkQueueMngr = new CMRTWorkQueueMngr();
10496 #endif
10497 #endif
10498 
10499     state->osInterface->pfnGetPlatform(state->osInterface, &state->platform);
10500     state->skuTable = state->osInterface->pfnGetSkuTable(state->osInterface);
10501     state->waTable  = state->osInterface->pfnGetWaTable (state->osInterface);
10502 
10503     // Create VEBOX Context
10504     createOption.CmdBufferNumScale = MOS_GPU_CONTEXT_CREATE_DEFAULT;
10505     CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_CreateGPUContext(
10506         state,
10507         MOS_GPU_CONTEXT_VEBOX,
10508         MOS_GPU_NODE_VE,
10509         &createOption));
10510 
10511     // Allocate/Initialize CM Rendering Interface
10512     state->renderHal = (PRENDERHAL_INTERFACE_LEGACY)
10513                                 MOS_AllocAndZeroMemory(sizeof(RENDERHAL_INTERFACE_LEGACY));
10514     CM_CHK_NULL_GOTOFINISH_MOSERROR(state->renderHal);
10515 
10516     state->dshEnabled                   = param->dynamicStateHeap;
10517     state->renderHal->bDynamicStateHeap = state->dshEnabled;
10518 
10519     if (state->dshEnabled)
10520     {
10521         CM_CHK_MOSSTATUS_GOTOFINISH(RenderHal_InitInterface_Dynamic(state->renderHal, &state->cpInterface, state->osInterface));
10522     }
10523     else
10524     {
10525         CM_CHK_MOSSTATUS_GOTOFINISH(RenderHal_InitInterface_Legacy(state->renderHal, &state->cpInterface, state->osInterface));
10526     }
10527 
10528     // Allocate/Initialize VEBOX Interface
10529     if (!param->disableVebox)
10530     {
10531         CmSafeMemSet(&params, 0, sizeof(params));
10532         params.Flags.m_vebox = 1;
10533         mhwInterfaces = MhwInterfaces::CreateFactory(params, state->osInterface);
10534         if (mhwInterfaces)
10535         {
10536             state->veboxInterface = mhwInterfaces->m_veboxInterface;
10537 
10538             // MhwInterfaces always create CP and MI interfaces, so we have to delete those we don't need.
10539             MOS_Delete(mhwInterfaces->m_miInterface);
10540             state->osInterface->pfnDeleteMhwCpInterface(mhwInterfaces->m_cpInterface);
10541             mhwInterfaces->m_cpInterface = nullptr;
10542             MOS_Delete(mhwInterfaces);
10543             CM_CHK_NULL_GOTOFINISH_MOSERROR(state->veboxInterface);
10544         }
10545         else
10546         {
10547             CM_ASSERTMESSAGE("Allocate MhwInterfaces failed");
10548             HalCm_Destroy(state);
10549             *cmState = nullptr;
10550             return MOS_STATUS_NO_SPACE;
10551         }
10552     }
10553     else
10554     {
10555         state->veboxInterface = nullptr;
10556     }
10557 
10558     // set IsMDFLoad to distinguish MDF context from other Media Contexts
10559     state->renderHal->IsMDFLoad = true;
10560 
10561     // disable YV12SinglePass as CMRT & compiler don't support it
10562     state->renderHal->bEnableYV12SinglePass = false;
10563 
10564     state->cmDeviceParam.maxKernelBinarySize      = CM_KERNEL_BINARY_BLOCK_SIZE;
10565 
10566     // set if the new sampler heap management is used or not
10567     // currently new sampler heap management depends on DSH
10568     if (state->dshEnabled)
10569     {
10570         state->useNewSamplerHeap = true;
10571     }
10572     else
10573     {
10574         state->useNewSamplerHeap = false;
10575     }
10576 
10577     //Get Max Scratch Space Size
10578     if( param->disableScratchSpace)
10579     {
10580         state->cmDeviceParam.maxPerThreadScratchSpaceSize = 0;
10581     }
10582     else
10583     {
10584          //Gen7_5 + : (MaxScratchSpaceSize + 1) *16k
10585           if(param->scratchSpaceSize == CM_DEVICE_CONFIG_SCRATCH_SPACE_SIZE_DEFAULT)
10586           { //By default, 128K for HSW
10587                state->cmDeviceParam.maxPerThreadScratchSpaceSize = 8 * CM_DEVICE_CONFIG_SCRATCH_SPACE_SIZE_16K_STEP;
10588           }
10589           else
10590           {
10591                state->cmDeviceParam.maxPerThreadScratchSpaceSize = (param->scratchSpaceSize)*
10592                                 CM_DEVICE_CONFIG_SCRATCH_SPACE_SIZE_16K_STEP;
10593           }
10594     }
10595 
10596     // Initialize kernel parameters
10597     state->kernelParamsRenderHal.pMhwKernelParam = &state->kernelParamsMhw;
10598 
10599     // Enable SLM in L3 Cache
10600     state->l3Settings.enableSlm = true;
10601 
10602     // Slice shutdown
10603     state->requestSingleSlice = param->requestSliceShutdown;
10604 
10605     //mid thread preemption on/off and SIP debug control
10606     state->midThreadPreemptionDisabled = param->disabledMidThreadPreemption;
10607     state->kernelDebugEnabled = param->enabledKernelDebug;
10608 
10609     // init mapping for the state buffer
10610 #if MOS_MESSAGES_ENABLED
10611     state->state_buffer_list_ptr = MosUtilities::MosNewUtil<std::map< void *, CM_HAL_STATE_BUFFER_ENTRY> >(__FUNCTION__, __FILE__, __LINE__);
10612 #else
10613     state->state_buffer_list_ptr = MosUtilities::MosNewUtil<std::map< void *, CM_HAL_STATE_BUFFER_ENTRY> >();
10614 #endif
10615 
10616     CM_CHK_NULL_GOTOFINISH_MOSERROR( state->state_buffer_list_ptr );
10617 
10618     MOS_ZeroMemory(&state->hintIndexes.kernelIndexes, sizeof(uint32_t) * CM_MAX_TASKS_EU_SATURATION);
10619     MOS_ZeroMemory(&state->hintIndexes.dispatchIndexes, sizeof(uint32_t) * CM_MAX_TASKS_EU_SATURATION);
10620 
10621     // get the global media profiler
10622     state->perfProfiler = MediaPerfProfiler::Instance();
10623     CM_CHK_NULL_GOTOFINISH_MOSERROR(state->perfProfiler);
10624     CM_CHK_MOSSTATUS_GOTOFINISH(state->perfProfiler->Initialize((void*)state, state->osInterface));
10625 
10626     state->criticalSectionDSH = MOS_New(CMRT_UMD::CSync);
10627     CM_CHK_NULL_GOTOFINISH_MOSERROR(state->criticalSectionDSH);
10628 
10629     state->cmDeviceParam.maxKernelsPerTask        = CM_MAX_KERNELS_PER_TASK;
10630     state->cmDeviceParam.maxSamplerTableSize      = CM_MAX_SAMPLER_TABLE_SIZE;
10631     state->cmDeviceParam.maxSampler8x8TableSize   = state->renderHal->pHwSizes->dwSizeSampler8x8Table;
10632     state->cmDeviceParam.maxBufferTableSize       = CM_MAX_BUFFER_SURFACE_TABLE_SIZE;
10633     state->cmDeviceParam.max2DSurfaceUPTableSize  = CM_MAX_2D_SURFACE_UP_TABLE_SIZE;
10634     state->cmDeviceParam.max2DSurfaceTableSize    = CM_MAX_2D_SURFACE_TABLE_SIZE;
10635     state->cmDeviceParam.max3DSurfaceTableSize    = CM_MAX_3D_SURFACE_TABLE_SIZE;
10636     state->cmDeviceParam.maxTasks                 = param->maxTaskNumber;
10637     state->cmDeviceParam.maxAvsSamplers           = CM_MAX_AVS_SAMPLER_SIZE;
10638     state->cmDeviceParam.maxGshKernelEntries      = param->kernelBinarySizeinGSH / (CM_32K);
10639 
10640     if (state->dshEnabled)
10641     {
10642         // Initialize Kernel Cache Hit/Miss counters
10643         state->dshKernelCacheMiss = 0;
10644         state->dshKernelCacheHit  = 0;
10645     }
10646 
10647     // Setup Function pointers
10648     state->pfnCmAllocate                  = HalCm_Allocate;
10649     state->pfnGetMaxValues                = HalCm_GetMaxValues;
10650     state->pfnGetMaxValuesEx              = HalCm_GetMaxValuesEx;
10651     state->pfnExecuteTask                 = HalCm_ExecuteTask;
10652     state->pfnExecuteGroupTask            = HalCm_ExecuteGroupTask;
10653     state->pfnExecuteHintsTask            = HalCm_ExecuteHintsTask;
10654     state->pfnRegisterSampler             = HalCm_RegisterSampler;
10655     state->pfnUnRegisterSampler           = HalCm_UnRegisterSampler;
10656     state->pfnRegisterSampler8x8          = HalCm_RegisterSampler8x8;
10657     state->pfnUnRegisterSampler8x8        = HalCm_UnRegisterSampler8x8;
10658     state->pfnFreeBuffer                  = HalCm_FreeBuffer;
10659     state->pfnLockBuffer                  = HalCm_LockBuffer;
10660     state->pfnUnlockBuffer                = HalCm_UnlockBuffer;
10661     state->pfnFreeSurface2DUP             = HalCm_FreeSurface2DUP;
10662     state->pfnGetSurface2DTileYPitch      = HalCm_GetSurface2DTileYPitch;
10663     state->pfnSet2DSurfaceStateParam      = HalCm_Set2DSurfaceStateParam;
10664     state->pfnSetBufferSurfaceStatePara   = HalCm_SetBufferSurfaceStateParameters;
10665     state->pfnSetSurfaceMOCS              = HalCm_SetSurfaceMOCS;
10666     /************************************************************/
10667     state->pfnAllocateSurface2D           = HalCm_AllocateSurface2D;
10668     state->pfnAllocate3DResource          = HalCm_AllocateSurface3D;
10669     state->pfnFreeSurface2D               = HalCm_FreeSurface2D;
10670     state->pfnLock2DResource              = HalCm_Lock2DResource;
10671     state->pfnUnlock2DResource            = HalCm_Unlock2DResource;
10672     state->pfnSetCompressionMode          = HalCm_SetCompressionMode;
10673     /************************************************************/
10674     state->pfnFree3DResource              = HalCm_Free3DResource;
10675     state->pfnLock3DResource              = HalCm_Lock3DResource;
10676     state->pfnUnlock3DResource            = HalCm_Unlock3DResource;
10677     state->pfnSetCaps                     = HalCm_SetCaps;
10678     state->pfnSetPowerOption              = HalCm_SetPowerOption;
10679     state->pfnUpdatePowerOption           = HalCm_UpdatePowerOption;
10680 
10681     state->pfnSendMediaWalkerState        = HalCm_SendMediaWalkerState;
10682     state->pfnSendGpGpuWalkerState        = HalCm_SendGpGpuWalkerState;
10683     state->pfnSetSurfaceReadFlag          = HalCm_SetSurfaceReadFlag;
10684     state->pfnSetVtuneProfilingFlag       = HalCm_SetVtuneProfilingFlag;
10685     state->pfnExecuteVeboxTask            = HalCm_ExecuteVeboxTask;
10686     state->pfnGetTaskSyncLocation         = HalCm_GetTaskSyncLocation;
10687 
10688     state->pfnGetGlobalTime               = HalCm_GetGlobalTime;
10689     state->pfnConvertToQPCTime            = HalCm_ConvertToQPCTime;
10690 
10691     state->pfnSyncOnResource              = HalCm_SyncOnResource;
10692 
10693     state->pfnDeleteFromStateBufferList = HalCm_DeleteFromStateBufferList;
10694     state->pfnGetMediaStatePtrForKernel = HalCm_GetMediaStatePtrForKernel;
10695     state->pfnGetStateBufferVAPtrForSurfaceIndex = HalCm_GetStateBufferVAPtrForSurfaceIndex;
10696     state->pfnGetMediaStatePtrForSurfaceIndex = HalCm_GetMediaStatePtrForSurfaceIndex;
10697     state->pfnGetStateBufferVAPtrForMediaStatePtr = HalCm_GetStateBufferVAPtrForMediaStatePtr;
10698     state->pfnGetStateBufferSizeForKernel = HalCm_GetStateBufferSizeForKernel;
10699     state->pfnGetStateBufferTypeForKernel = HalCm_GetStateBufferTypeForKernel;
10700     state->pfnCreateGPUContext            = HalCm_CreateGPUContext;
10701     state->pfnCreateGpuComputeContext     = HalCm_CreateGpuComputeContext;
10702     state->pfnSetGpuContext               = HalCm_SetGpuContext;
10703     state->pfnSelectSyncBuffer            = HalCm_SelectSyncBuffer;
10704     state->pfnDSHUnregisterKernel         = HalCm_DSH_UnregisterKernel;
10705 
10706     state->pfnUpdateBuffer                = HalCm_UpdateBuffer;
10707     state->pfnUpdateSurface2D             = HalCm_UpdateSurface2D;
10708 
10709     //==========<Initialize 5 OS-dependent DDI functions: pfnAllocate3DResource, pfnAllocateSurface2DUP====
10710     //                 pfnAllocateBuffer,pfnRegisterKMDNotifyEventHandle, pfnGetSurface2DPitchAndSize >====
10711     HalCm_OsInitInterface(state);
10712 
10713     state->osInterface->pfnInitCmInterface(state);
10714 
10715     HalCm_InitPerfTagIndexMap(state);
10716 
10717     state->maxHWThreadValues.userFeatureValue = 0;
10718     state->maxHWThreadValues.apiValue = 0;
10719 
10720     HalCm_GetUserFeatureSettings(state);
10721 
10722 #if MDF_COMMAND_BUFFER_DUMP
10723     HalCm_InitDumpCommandBuffer(state);
10724     state->pfnInitDumpCommandBuffer = HalCm_InitDumpCommandBuffer;
10725     state->pfnDumpCommadBuffer      = HalCm_DumpCommadBuffer;
10726 #endif //MDF_COMMAND_BUFFER_DUMP
10727 
10728 #if MDF_CURBE_DATA_DUMP
10729     HalCm_InitDumpCurbeData(state);
10730 #endif
10731 
10732 #if MDF_SURFACE_CONTENT_DUMP
10733     HalCm_InitSurfaceDump(state);
10734 #endif
10735 
10736 #if MDF_SURFACE_STATE_DUMP
10737     HalCm_InitDumpSurfaceState(state);
10738     state->pfnInitDumpSurfaceState = HalCm_InitDumpSurfaceState;
10739     state->pfnDumpSurfaceState = HalCm_DumpSurfaceState;
10740 #endif
10741 
10742 #if MDF_INTERFACE_DESCRIPTOR_DATA_DUMP
10743   HalCm_InitDumpInterfaceDescriporData(state);
10744 #endif
10745 
10746     state->cmHalInterface = CMHalDevice::CreateFactory(state);
10747     CM_CHK_NULL_GOTOFINISH_MOSERROR(state->cmHalInterface);
10748 
10749     if (param->refactor)
10750     {
10751         state->refactor = true;
10752     }
10753     else
10754     {
10755         state->refactor = false;
10756     }
10757 
10758     state->requestCustomGpuContext = param->requestCustomGpuContext;
10759 
10760 #if (_DEBUG || _RELEASE_INTERNAL)
10761     {
10762         MOS_USER_FEATURE_VALUE_DATA userFeatureData;
10763 
10764         MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
10765         MOS_UserFeature_ReadValue_ID(
10766             nullptr,
10767             __MEDIA_USER_FEATURE_VALUE_MDF_FORCE_EXECUTION_PATH_ID,
10768             &userFeatureData,
10769             state->osInterface->pOsContext);
10770 
10771         if (userFeatureData.i32Data == 1)
10772         {
10773             state->refactor = false;
10774         }
10775         else if (userFeatureData.i32Data == 2)
10776         {
10777             state->refactor = true;
10778             state->cmHalInterface->SetFastPathByDefault(true);
10779         }
10780 
10781         FILE *fp1 = nullptr;
10782         MosUtilities::MosSecureFileOpen(&fp1, "refactor.key", "r");
10783         if (fp1 != nullptr)
10784         {
10785             state->refactor = true;
10786             state->cmHalInterface->SetFastPathByDefault(true);
10787             fclose(fp1);
10788         }
10789 
10790         FILE *fp2 = nullptr;
10791         MosUtilities::MosSecureFileOpen(&fp2, "origin.key", "r");
10792         if (fp2 != nullptr)
10793         {
10794             state->refactor = false;
10795             fclose(fp2);
10796         }
10797     }
10798 #endif
10799 
10800     if (state->refactor)
10801     {
10802         CM_NORMALMESSAGE("Info: Fast path is enabled!\n");
10803     }
10804     else
10805     {
10806         CM_NORMALMESSAGE("Info: Fast path is disabled!\n");
10807     }
10808 
10809 finish:
10810     if (eStatus != MOS_STATUS_SUCCESS)
10811     {
10812         HalCm_Destroy(state);
10813         *cmState = nullptr;
10814     }
10815     else
10816     {
10817         *cmState = state;
10818     }
10819 
10820     return eStatus;
10821 }
10822 
10823 //*-----------------------------------------------------------------------------
10824 //| Purpose: Destroys instance of HAL CM State
10825 //| Returns: N/A
10826 //*-----------------------------------------------------------------------------
HalCm_Destroy(PCM_HAL_STATE state)10827 void HalCm_Destroy(
10828     PCM_HAL_STATE state)                                                       // [in] Pointer to CM State
10829 {
10830     MOS_STATUS eStatus;
10831     int32_t    i;
10832 
10833     if (state)
10834     {
10835         //Delete CmHal Interface
10836         MosSafeDelete(state->cmHalInterface);
10837         if (state->osInterface)
10838         {
10839             state->osInterface->pfnDeleteMhwCpInterface(state->cpInterface);
10840             state->cpInterface = nullptr;
10841         }
10842         else
10843         {
10844             CM_ASSERTMESSAGE("Failed to destroy cpInterface.");
10845         }
10846         MosSafeDelete(state->state_buffer_list_ptr);
10847         MosSafeDelete(state->criticalSectionDSH);
10848 
10849         // Delete the unified media profiler
10850         if (state->perfProfiler)
10851         {
10852             MediaPerfProfiler::Destroy(state->perfProfiler, (void*)state, state->osInterface);
10853             state->perfProfiler = nullptr;
10854         }
10855 
10856         // Delete Batch Buffers
10857         if (state->batchBuffers)
10858         {
10859             for (i=0; i < state->numBatchBuffers; i++)
10860             {
10861                 if (!Mos_ResourceIsNull(&state->batchBuffers[i].OsResource))
10862                 {
10863                     eStatus = (MOS_STATUS)state->renderHal->pfnFreeBB(
10864                                 state->renderHal,
10865                                 &state->batchBuffers[i]);
10866 
10867                     CM_ASSERT(eStatus == MOS_STATUS_SUCCESS);
10868                 }
10869 
10870                 MOS_FreeMemory(state->batchBuffers[i].pPrivateData);
10871             }
10872 
10873             MOS_FreeMemory(state->batchBuffers);
10874             state->batchBuffers = nullptr;
10875         }
10876 
10877         // Delete TimeStamp Buffer
10878         HalCm_FreeTsResource(state);
10879         if ((state->midThreadPreemptionDisabled == false) || (state->kernelDebugEnabled == true)) {
10880             // Delete CSR surface
10881             HalCm_FreeCsrResource(state);
10882 
10883             // Delete sip surface
10884             HalCm_FreeSipResource(state);
10885         }
10886 
10887         // Delete tracker resource
10888         HalCm_FreeTrackerResources(state);
10889 
10890         // Delete advance executor
10891         MOS_Delete(state->advExecutor);
10892 
10893         // Delete heap manager
10894         if (state->renderHal)
10895         {
10896             MOS_Delete(state->renderHal->dgsheapManager);
10897         }
10898 
10899         if (state->hLibModule)
10900         {
10901             MosUtilities::MosFreeLibrary(state->hLibModule);
10902             state->hLibModule = nullptr;
10903         }
10904 
10905         // Delete RenderHal Interface
10906         if (state->renderHal)
10907         {
10908             if (state->renderHal->pfnDestroy)
10909             {
10910                 state->renderHal->pfnDestroy(state->renderHal);
10911             }
10912             MOS_FreeMemory(state->renderHal);
10913             state->renderHal = nullptr;
10914         }
10915 
10916         // Delete VEBOX Interface
10917         if (state->veboxInterface
10918             && state->veboxInterface->m_veboxHeap)
10919         {
10920             state->veboxInterface->DestroyHeap( );
10921             MOS_Delete(state->veboxInterface);
10922             state->veboxInterface = nullptr;
10923         }
10924 
10925         // Delete OS Interface
10926         if (state->osInterface)
10927         {
10928             if (state->osInterface->pfnDestroy)
10929             {
10930                 state->osInterface->pfnDestroy(state->osInterface, true);
10931             }
10932             if (state->osInterface->bDeallocateOnExit)
10933             {
10934                 MOS_FreeMemory(state->osInterface);
10935                 state->osInterface = nullptr;
10936             }
10937         }
10938 
10939         // Delete the TaskParam
10940         MOS_FreeMemory(state->taskParam);
10941 
10942         // Delete the TaskTimeStamp
10943         MOS_FreeMemory(state->taskTimeStamp);
10944 
10945         // Delete Tables
10946         MOS_FreeMemory(state->tableMemories);
10947 
10948         // Delete the pTotalKernelSize table for GSH
10949         MOS_FreeMemory(state->totalKernelSize);
10950 
10951         // Delete the perfTag Map
10952         for (int i = 0; i < MAX_COMBINE_NUM_IN_PERFTAG; i++)
10953         {
10954             MosSafeDelete(state->perfTagIndexMap[i]);
10955         }
10956 
10957         // Delete the state
10958         MOS_FreeMemory(state);
10959     }
10960 }
10961 
HalCm_GetUserFeatureSettings(PCM_HAL_STATE cmState)10962 void HalCm_GetUserFeatureSettings(
10963     PCM_HAL_STATE                  cmState
10964 )
10965 {
10966 #if (_DEBUG || _RELEASE_INTERNAL)
10967     PMOS_INTERFACE osInterface = cmState->osInterface;
10968 
10969     MOS_USER_FEATURE_VALUE_DATA userFeatureData;
10970 
10971     MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
10972     MOS_UserFeature_ReadValue_ID(
10973         nullptr,
10974         __MEDIA_USER_FEATURE_VALUE_MDF_MAX_THREAD_NUM_ID,
10975         &userFeatureData,
10976         cmState->osInterface->pOsContext);
10977 
10978     if (userFeatureData.i32Data != 0)
10979     {
10980         uint32_t data = userFeatureData.i32Data;
10981         if ((data > 0) && (data <= cmState->renderHal->pHwCaps->dwMaxThreads))
10982         {
10983             cmState->maxHWThreadValues.userFeatureValue = data;
10984         }
10985     }
10986 
10987 #else
10988     UNUSED(cmState);
10989 #endif // _DEBUG || _RELEASE_INTERNAL
10990 }
10991 
10992 //*-----------------------------------------------------------------------------
10993 //| Purpose: Gathers information about the surface - used by GT-Pin
10994 //| Returns: MOS_STATUS_SUCCESS if surface type recognized, S_FAIL otherwise
10995 //*-----------------------------------------------------------------------------
HalCm_GetSurfaceDetails(PCM_HAL_STATE cmState,PCM_HAL_INDEX_PARAM indexParam,uint32_t btIndex,MOS_SURFACE & surface,int16_t globalSurface,PRENDERHAL_SURFACE_STATE_ENTRY surfaceEntry,uint32_t tempPlaneIndex,RENDERHAL_SURFACE_STATE_PARAMS surfaceParam,CM_HAL_KERNEL_ARG_KIND argKind)10996 MOS_STATUS HalCm_GetSurfaceDetails(
10997     PCM_HAL_STATE                  cmState,
10998     PCM_HAL_INDEX_PARAM            indexParam,
10999     uint32_t                       btIndex,
11000     MOS_SURFACE&                   surface,
11001     int16_t                        globalSurface,
11002     PRENDERHAL_SURFACE_STATE_ENTRY surfaceEntry,
11003     uint32_t                       tempPlaneIndex,
11004     RENDERHAL_SURFACE_STATE_PARAMS surfaceParam,
11005     CM_HAL_KERNEL_ARG_KIND         argKind
11006     )
11007 {
11008     MOS_STATUS                 eStatus = MOS_STATUS_UNKNOWN;
11009     PCM_SURFACE_DETAILS        surfaceInfos  = nullptr;
11010     PCM_SURFACE_DETAILS        pgSurfaceInfos = nullptr;
11011     PCM_HAL_TASK_PARAM         taskParam     = cmState->taskParam;
11012     uint32_t                   curKernelIndex = taskParam->curKernelIndex;
11013     PMOS_PLANE_OFFSET          planeOffset   = 0;
11014     uint32_t                   maxEntryNum    = 0;
11015     MOS_OS_FORMAT              tempOsFormat   ;
11016 
11017     CM_SURFACE_BTI_INFO surfBTIInfo;
11018     CM_CHK_NULL_GOTOFINISH_MOSERROR(cmState);
11019     CM_CHK_NULL_GOTOFINISH_MOSERROR(cmState->cmHalInterface);
11020     CM_CHK_NULL_GOTOFINISH_MOSERROR(cmState->osInterface);
11021     cmState->cmHalInterface->GetHwSurfaceBTIInfo(&surfBTIInfo);
11022 
11023     UNUSED(indexParam);
11024 
11025     if(curKernelIndex+1>taskParam->surfEntryInfoArrays.kernelNum)
11026     {
11027         eStatus = MOS_STATUS_INVALID_PARAMETER;
11028         CM_ASSERTMESSAGE(
11029             "Mismatched kernel index: curKernelIndex '%d' vs krnNum '%d'",
11030             curKernelIndex,taskParam->surfEntryInfoArrays.kernelNum);
11031         goto finish;
11032     }
11033 
11034     surfaceInfos  = taskParam->surfEntryInfoArrays.surfEntryInfosArray[curKernelIndex].surfEntryInfos;
11035     pgSurfaceInfos = taskParam->surfEntryInfoArrays.surfEntryInfosArray[curKernelIndex].globalSurfInfos;
11036 
11037     tempOsFormat = (MOS_OS_FORMAT)cmState->osInterface->pfnMosFmtToOsFmt(surface.Format);
11038 
11039     switch (argKind)
11040     {
11041     case CM_ARGUMENT_SURFACEBUFFER:
11042 
11043         if((btIndex >= surfBTIInfo.reservedSurfaceStart) &&
11044             (btIndex < surfBTIInfo.reservedSurfaceStart + CM_MAX_GLOBAL_SURFACE_NUMBER))
11045         {
11046             btIndex = btIndex - surfBTIInfo.reservedSurfaceStart;
11047 
11048             maxEntryNum = taskParam->surfEntryInfoArrays.surfEntryInfosArray->globalSurfNum;
11049             if ( btIndex >= maxEntryNum )
11050             {
11051                 eStatus = MOS_STATUS_INVALID_PARAMETER;
11052                 CM_ASSERTMESSAGE(
11053                 "Array for surface details is full: Max number of entries '%d' and trying to add index '%d'",
11054                 maxEntryNum, btIndex);
11055                 goto finish;
11056             }
11057 
11058             MOS_ZeroMemory(&pgSurfaceInfos[btIndex], sizeof(CM_SURFACE_DETAILS));
11059             pgSurfaceInfos[btIndex].width  = surface.dwWidth;
11060             pgSurfaceInfos[btIndex].format = DDI_FORMAT_UNKNOWN;
11061         }
11062         else
11063         {
11064             btIndex = btIndex - surfBTIInfo.reservedSurfaceStart - CM_MAX_GLOBAL_SURFACE_NUMBER;
11065             maxEntryNum = taskParam->surfEntryInfoArrays.surfEntryInfosArray->maxEntryNum;
11066             if ( btIndex >= maxEntryNum )
11067             {
11068                 eStatus = MOS_STATUS_INVALID_PARAMETER;
11069                 CM_ASSERTMESSAGE(
11070                 "Array for surface details is full: Max number of entries '%d' and trying to add index '%d'",
11071                 maxEntryNum, btIndex);
11072                 goto finish;
11073             }
11074 
11075             MOS_ZeroMemory(&surfaceInfos[btIndex], sizeof(CM_SURFACE_DETAILS));
11076             surfaceInfos[btIndex].width  = surface.dwWidth;
11077             surfaceInfos[btIndex].format = DDI_FORMAT_UNKNOWN;
11078         }
11079 
11080         if (globalSurface < 0)
11081         {
11082             ++taskParam->surfEntryInfoArrays.surfEntryInfosArray[curKernelIndex].usedIndex;
11083         }
11084 
11085         eStatus = MOS_STATUS_SUCCESS;
11086         break;
11087 
11088     case CM_ARGUMENT_SURFACE2D_UP:
11089     case CM_ARGUMENT_SURFACE2D:
11090     // VME surface and sampler8x8 called with CM_ARGUMENT_SURFACE2D
11091          btIndex = btIndex - surfBTIInfo.reservedSurfaceStart - CM_MAX_GLOBAL_SURFACE_NUMBER;
11092          maxEntryNum = taskParam->surfEntryInfoArrays.surfEntryInfosArray->maxEntryNum;
11093 
11094          if ( btIndex >= maxEntryNum )
11095          {
11096              eStatus = MOS_STATUS_INVALID_PARAMETER;
11097              CM_ASSERTMESSAGE(
11098              "Array for surface details is full: Max number of entries '%d' and trying to add index '%d'",
11099              maxEntryNum, btIndex);
11100              goto finish;
11101          }
11102 
11103          surfaceInfos[btIndex].width              = surfaceEntry->dwWidth;
11104          surfaceInfos[btIndex].height             = surfaceEntry->dwHeight;
11105          surfaceInfos[btIndex].depth              = 0;
11106          surfaceInfos[btIndex].format             = (DdiSurfaceFormat)tempOsFormat;
11107          surfaceInfos[btIndex].planeIndex         = tempPlaneIndex;
11108          surfaceInfos[btIndex].pitch              = surfaceEntry->dwPitch;
11109          surfaceInfos[btIndex].slicePitch         = 0;
11110          surfaceInfos[btIndex].surfaceBaseAddress = 0;
11111          surfaceInfos[btIndex].tileWalk           = surfaceEntry->bTileWalk;
11112          surfaceInfos[btIndex].tiledSurface       = surfaceEntry->bTiledSurface;
11113 
11114          if (surfaceEntry->YUVPlane == MHW_U_PLANE ||
11115              surfaceEntry->YUVPlane == MHW_V_PLANE)
11116          {
11117              planeOffset = (surfaceEntry->YUVPlane == MHW_U_PLANE)
11118              ? &surface.UPlaneOffset
11119              : &surface.VPlaneOffset;
11120 
11121              surfaceInfos[btIndex].yOffset = planeOffset->iYOffset >> 1;
11122 
11123              if ( argKind == CM_ARGUMENT_SURFACE2D_UP )
11124              {
11125                  surfaceInfos[btIndex].xOffset = (planeOffset->iXOffset/(uint32_t)sizeof(uint32_t)) >> 2;
11126              }
11127              else
11128              {
11129                  uint32_t pixelsPerSampleUV = 0;
11130                  //Get Pixels Per Sample if we use dataport read
11131                  if(surfaceParam.bWidthInDword_UV)
11132                  {
11133                      RenderHal_GetPixelsPerSample(surface.Format, &pixelsPerSampleUV);
11134                  }
11135                  else
11136                  {
11137                      // If the kernel uses sampler - do not change width (it affects coordinates)
11138                      pixelsPerSampleUV = 1;
11139                  }
11140 
11141                  if(pixelsPerSampleUV == 1)
11142                  {
11143                      surfaceInfos[btIndex].xOffset = planeOffset->iXOffset >> 2;
11144                  }
11145                  else
11146                  {
11147                      surfaceInfos[btIndex].xOffset = (planeOffset->iXOffset/(uint32_t)sizeof(uint32_t)) >> 2;
11148                  }
11149              }
11150          }
11151          else
11152          {
11153              surfaceInfos[btIndex].xOffset = (surface.YPlaneOffset.iXOffset/(uint32_t)sizeof(uint32_t)) >> 2;
11154              surfaceInfos[btIndex].yOffset = surface.YPlaneOffset.iYOffset >> 1;
11155          }
11156 
11157          ++taskParam->surfEntryInfoArrays.surfEntryInfosArray[curKernelIndex].usedIndex;
11158          ++tempPlaneIndex;
11159 
11160          eStatus = MOS_STATUS_SUCCESS;
11161          break;
11162 
11163     case CM_ARGUMENT_SURFACE3D:
11164 
11165         btIndex = btIndex - surfBTIInfo.normalSurfaceStart - CM_MAX_GLOBAL_SURFACE_NUMBER;
11166         maxEntryNum = taskParam->surfEntryInfoArrays.surfEntryInfosArray->maxEntryNum;
11167 
11168         if ( btIndex >= maxEntryNum )
11169         {
11170             eStatus = MOS_STATUS_INVALID_PARAMETER;
11171             CM_ASSERTMESSAGE(
11172             "Array for surface details is full: Max number of entries '%d' and trying to add index '%d'",
11173             maxEntryNum, btIndex);
11174             goto finish;
11175         }
11176 
11177         surfaceInfos[btIndex].width              = surfaceEntry->dwWidth;
11178         surfaceInfos[btIndex].height             = surfaceEntry->dwHeight;
11179         surfaceInfos[btIndex].depth              = surface.dwDepth;
11180         surfaceInfos[btIndex].format             = (DdiSurfaceFormat)tempOsFormat;
11181         surfaceInfos[btIndex].pitch              = surfaceEntry->dwPitch;
11182         surfaceInfos[btIndex].planeIndex         = tempPlaneIndex;
11183         surfaceInfos[btIndex].slicePitch         = surface.dwSlicePitch;
11184         surfaceInfos[btIndex].surfaceBaseAddress = 0;
11185         surfaceInfos[btIndex].tileWalk           = surfaceEntry->bTileWalk;
11186         surfaceInfos[btIndex].tiledSurface       = surfaceEntry->bTiledSurface;
11187 
11188         if (surfaceEntry->YUVPlane == MHW_U_PLANE ||
11189             surfaceEntry->YUVPlane == MHW_V_PLANE)
11190         {
11191             planeOffset = (surfaceEntry->YUVPlane == MHW_U_PLANE)
11192             ? &surface.UPlaneOffset
11193             : &surface.VPlaneOffset;
11194 
11195             surfaceInfos[btIndex].yOffset = planeOffset->iYOffset >> 1;
11196             surfaceInfos[btIndex].xOffset = (planeOffset->iXOffset/(uint32_t)sizeof(uint32_t)) >> 2;
11197         }
11198         else
11199         {
11200             surfaceInfos[btIndex].xOffset = (surface.YPlaneOffset.iXOffset/(uint32_t)sizeof(uint32_t)) >> 2;
11201             surfaceInfos[btIndex].yOffset = surface.YPlaneOffset.iYOffset >> 1;
11202         }
11203 
11204         ++tempPlaneIndex;
11205         ++taskParam->surfEntryInfoArrays.surfEntryInfosArray[curKernelIndex].usedIndex;
11206 
11207         eStatus = MOS_STATUS_SUCCESS;
11208         break;
11209 
11210     default:
11211         break;
11212     }
11213 
11214  finish:
11215         return eStatus;
11216 }
11217 
HalCm_GetFreeBindingIndex(PCM_HAL_STATE state,PCM_HAL_INDEX_PARAM indexParam,uint32_t total)11218 uint32_t HalCm_GetFreeBindingIndex(
11219     PCM_HAL_STATE             state,
11220     PCM_HAL_INDEX_PARAM       indexParam,
11221     uint32_t                  total)
11222 {
11223     CM_SURFACE_BTI_INFO surfBTIInfo;
11224     state->cmHalInterface->GetHwSurfaceBTIInfo(&surfBTIInfo);
11225 
11226     uint32_t btIndex = surfBTIInfo.normalSurfaceStart;
11227     uint32_t unAllocated = total;
11228 
11229     while (btIndex < 256 && unAllocated > 0)
11230     {
11231         uint32_t arrayIndex = btIndex >> 5;
11232         uint32_t bitMask = (uint32_t)0x1 << (btIndex % 32);
11233         if (indexParam->btArray[arrayIndex] & bitMask)
11234         {
11235             // oops, occupied
11236             if (unAllocated != total)
11237             {
11238                 // clear previous allocation
11239                 uint32_t allocated = total - unAllocated;
11240                 uint32_t tmpIndex = btIndex - 1;
11241                 while (allocated > 0)
11242                 {
11243                     uint32_t arrayIndex = tmpIndex >> 5;
11244                     uint32_t bitMask = 1 << (tmpIndex % 32);
11245                     indexParam->btArray[arrayIndex] &= ~bitMask;
11246                     allocated--;
11247                     tmpIndex--;
11248                 }
11249                 // reset
11250                 unAllocated = total;
11251             }
11252         }
11253         else
11254         {
11255             indexParam->btArray[arrayIndex] |= bitMask;
11256             unAllocated--;
11257         }
11258         btIndex++;
11259     }
11260 
11261     if (unAllocated == 0)
11262     {
11263         // found slot
11264         return btIndex - total;
11265     }
11266 
11267     // no slot
11268     return 0;
11269 }
11270 
HalCm_PreSetBindingIndex(PCM_HAL_INDEX_PARAM indexParam,uint32_t start,uint32_t end)11271 void HalCm_PreSetBindingIndex(
11272     PCM_HAL_INDEX_PARAM     indexParam,
11273     uint32_t                start,
11274     uint32_t                end)
11275 {
11276     uint32_t btIndex;
11277     for ( btIndex = start; btIndex <= end ; btIndex++)
11278     {
11279         uint32_t arrayIndex = btIndex >> 5;
11280         uint32_t bitMask = 1 << (btIndex % 32);
11281         indexParam->btArray[arrayIndex] |= bitMask;
11282     }
11283 }
11284 
11285 //*-----------------------------------------------------------------------------
11286 //| Purpose: Setup surface State with BTIndex
11287 //| Returns: Result of the operation
11288 //*-----------------------------------------------------------------------------
HalCm_Setup2DSurfaceStateWithBTIndex(PCM_HAL_STATE state,int32_t bindingTable,uint32_t surfIndex,uint32_t btIndex,bool pixelPitch)11289 MOS_STATUS HalCm_Setup2DSurfaceStateWithBTIndex(
11290     PCM_HAL_STATE                      state,
11291     int32_t                            bindingTable,
11292     uint32_t                           surfIndex,
11293     uint32_t                           btIndex,
11294     bool                               pixelPitch)
11295 {
11296     PRENDERHAL_INTERFACE            renderHal = state->renderHal;
11297     MOS_STATUS                  eStatus;
11298     RENDERHAL_SURFACE               surface;
11299     RENDERHAL_SURFACE_STATE_PARAMS  surfaceParam;
11300     PRENDERHAL_SURFACE_STATE_ENTRY  surfaceEntries[MHW_MAX_SURFACE_PLANES];
11301     int32_t                     nSurfaceEntries, i;
11302     uint16_t                    memObjCtl;
11303     uint32_t                    offsetSrc;
11304     PRENDERHAL_STATE_HEAP       stateHeap;
11305 
11306     eStatus         = MOS_STATUS_UNKNOWN;
11307     nSurfaceEntries = 0;
11308 
11309     if (surfIndex == CM_NULL_SURFACE)
11310     {
11311         return MOS_STATUS_SUCCESS;
11312     }
11313 
11314     memObjCtl = CM_DEFAULT_CACHE_TYPE;
11315 
11316     // check the surfIndex
11317     if (surfIndex >= state->cmDeviceParam.max2DSurfaceTableSize ||
11318         Mos_ResourceIsNull(&state->umdSurf2DTable[surfIndex].osResource) )
11319     {
11320         CM_ASSERTMESSAGE(
11321             "Invalid 2D surface array index '%d'", surfIndex);
11322         return MOS_STATUS_UNKNOWN;
11323     }
11324 
11325     // Check to see if surface is already assigned
11326     uint32_t nBTInTable = ( unsigned char )CM_INVALID_INDEX;
11327     if ( pixelPitch )
11328     {
11329         nBTInTable = state->bti2DIndexTable[ surfIndex ].BTI.samplerSurfIndex;
11330     }
11331     else
11332     {
11333         nBTInTable = state->bti2DIndexTable[ surfIndex ].BTI.regularSurfIndex;
11334     }
11335 
11336     if ( btIndex == nBTInTable )
11337     {
11338         nSurfaceEntries = state->bti2DIndexTable[ surfIndex ].nPlaneNumber;
11339 
11340         stateHeap = renderHal->pStateHeap;
11341 
11342         // Get Offset to Current Binding Table
11343         uint32_t offsetDst = ( stateHeap->iCurSshBufferIndex * stateHeap->dwSshIntanceSize ) +     // Points to the Base of Current SSH Buffer Instance
11344                             ( stateHeap->iBindingTableOffset ) +                       // Moves the pointer to Base of Array of Binding Tables
11345                             ( bindingTable * stateHeap->iBindingTableSize ) +         // Moves the pointer to a Particular Binding Table
11346                             ( btIndex * sizeof( uint32_t ) );                              // Move the pointer to correct entry
11347 
11348         uint32_t *bindingTableEntry = ( uint32_t *)( stateHeap->pSshBuffer + offsetDst );
11349 
11350         if ( pixelPitch )
11351         {
11352             MOS_SecureMemcpy( bindingTableEntry, sizeof( uint32_t ) * nSurfaceEntries, state->bti2DIndexTable[ surfIndex ].BTITableEntry.samplerBtiEntryPosition, sizeof( uint32_t ) * nSurfaceEntries );
11353         }
11354         else
11355         {
11356             MOS_SecureMemcpy( bindingTableEntry, sizeof( uint32_t ) * nSurfaceEntries, state->bti2DIndexTable[ surfIndex ].BTITableEntry.regularBtiEntryPosition, sizeof( uint32_t ) * nSurfaceEntries );
11357         }
11358 
11359         return MOS_STATUS_SUCCESS;
11360     }
11361 
11362     // Get Details of 2D surface and fill the surface
11363     CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_GetSurfaceAndRegister(state, &surface, CM_ARGUMENT_SURFACE2D, surfIndex, pixelPitch));
11364 
11365     // Setup 2D surface
11366     MOS_ZeroMemory(&surfaceParam, sizeof(surfaceParam));
11367     surfaceParam.Type       = renderHal->SurfaceTypeDefault;
11368     surfaceParam.Boundary   = RENDERHAL_SS_BOUNDARY_ORIGINAL;
11369     if (!pixelPitch) {
11370         surfaceParam.bWidthInDword_UV = true;
11371         surfaceParam.bWidthInDword_Y = true;
11372     }
11373 
11374     surfaceParam.isOutput = isRenderTarget(state, surfIndex);
11375 
11376     //Cache configurations
11377     state->cmHalInterface->HwSetSurfaceMemoryObjectControl(memObjCtl, &surfaceParam);
11378 
11379     CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pfnSetupSurfaceState(
11380                   renderHal,
11381                   &surface,
11382                   &surfaceParam,
11383                   &nSurfaceEntries,
11384                   surfaceEntries,
11385                   nullptr));
11386 
11387     for (i = 0; i < nSurfaceEntries; i++)
11388     {
11389         // Bind the surface State
11390         CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pfnBindSurfaceState(
11391                         renderHal,
11392                         bindingTable,
11393                         btIndex + i,
11394                         surfaceEntries[i]));
11395     }
11396 
11397     state->bti2DIndexTable[ surfIndex ].nPlaneNumber = nSurfaceEntries;
11398     // Get Offset to Current Binding Table
11399     stateHeap = renderHal->pStateHeap;
11400     offsetSrc = ( stateHeap->iCurSshBufferIndex * stateHeap->dwSshIntanceSize ) +     // Points to the Base of Current SSH Buffer Instance
11401                         ( stateHeap->iBindingTableOffset ) +                       // Moves the pointer to Base of Array of Binding Tables
11402                         ( bindingTable * stateHeap->iBindingTableSize ) +         // Moves the pointer to a Particular Binding Table
11403                         ( btIndex * sizeof( uint32_t ) );                              // Move the pointer to correct entry
11404 
11405     if ( pixelPitch )
11406     {
11407         state->bti2DIndexTable[ surfIndex ].BTI.samplerSurfIndex = btIndex;
11408         state->bti2DIndexTable[ surfIndex ].BTITableEntry.samplerBtiEntryPosition = stateHeap->pSshBuffer + offsetSrc;
11409     }
11410     else
11411     {
11412         state->bti2DIndexTable[ surfIndex ].BTI.regularSurfIndex = btIndex;
11413         state->bti2DIndexTable[ surfIndex ].BTITableEntry.regularBtiEntryPosition = stateHeap->pSshBuffer + offsetSrc;
11414     }
11415 
11416     eStatus = MOS_STATUS_SUCCESS;
11417 
11418 finish:
11419     return eStatus;
11420 }
11421 
11422 //*-----------------------------------------------------------------------------
11423 //| Purpose: Setup Buffer surface State with BTIndex
11424 //| Returns: Result of the operation
11425 //*-----------------------------------------------------------------------------
HalCm_SetupBufferSurfaceStateWithBTIndex(PCM_HAL_STATE state,int32_t bindingTable,uint32_t surfIndex,uint32_t btIndex,bool pixelPitch)11426 MOS_STATUS HalCm_SetupBufferSurfaceStateWithBTIndex(
11427     PCM_HAL_STATE                      state,
11428     int32_t                            bindingTable,
11429     uint32_t                           surfIndex,
11430     uint32_t                           btIndex,
11431     bool                               pixelPitch)
11432 {
11433     PRENDERHAL_INTERFACE            renderHal = state ? state->renderHal : nullptr;
11434     MOS_STATUS                      eStatus;
11435     RENDERHAL_SURFACE               surface;
11436     RENDERHAL_SURFACE_STATE_PARAMS  surfaceParam;
11437     PRENDERHAL_SURFACE_STATE_ENTRY  surfaceEntry;
11438     uint16_t                        memObjCtl;
11439     uint32_t                        offsetSrc;
11440     PRENDERHAL_STATE_HEAP           stateHeap;
11441     UNUSED(pixelPitch);
11442 
11443     eStatus              = MOS_STATUS_UNKNOWN;
11444 
11445     CM_CHK_NULL_RETURN_MOSERROR(state);
11446     CM_CHK_NULL_RETURN_MOSERROR(renderHal);
11447 
11448     if (surfIndex == CM_NULL_SURFACE)
11449     {
11450         return MOS_STATUS_SUCCESS;
11451     }
11452 
11453     memObjCtl = CM_DEFAULT_CACHE_TYPE;
11454 
11455     // Check to see if surface is already assigned
11456     if ( btIndex == ( uint32_t )state->btiBufferIndexTable[ surfIndex ].BTI.regularSurfIndex )
11457     {
11458         uint32_t nSurfaceEntries = state->btiBufferIndexTable[ surfIndex ].nPlaneNumber;
11459 
11460         stateHeap = renderHal->pStateHeap;
11461 
11462         // Get Offset to Current Binding Table
11463         uint32_t offsetDst = ( stateHeap->iCurSshBufferIndex * stateHeap->dwSshIntanceSize ) +     // Points to the Base of Current SSH Buffer Instance
11464                             ( stateHeap->iBindingTableOffset ) +                       // Moves the pointer to Base of Array of Binding Tables
11465                             ( bindingTable * stateHeap->iBindingTableSize ) +               // Moves the pointer to a Particular Binding Table
11466                             ( btIndex * sizeof( uint32_t ) );                              // Move the pointer to correct entry
11467 
11468         uint32_t *bindingTableEntry = ( uint32_t *)( stateHeap->pSshBuffer + offsetDst );
11469         MOS_SecureMemcpy( bindingTableEntry, sizeof( uint32_t ) * nSurfaceEntries, state->btiBufferIndexTable[ surfIndex ].BTITableEntry.regularBtiEntryPosition, sizeof( uint32_t ) * nSurfaceEntries );
11470 
11471         return MOS_STATUS_SUCCESS;
11472     }
11473 
11474     // Get Details of Buffer surface and fill the surface
11475     CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_GetSurfaceAndRegister(state, &surface, CM_ARGUMENT_SURFACEBUFFER, surfIndex, 0));
11476 
11477     // set up buffer surface
11478     MOS_ZeroMemory(&surfaceParam, sizeof(surfaceParam));
11479 
11480     // Set isOutput by default
11481     surfaceParam.isOutput = true;
11482 
11483     //Cache configurations default
11484     state->cmHalInterface->HwSetSurfaceMemoryObjectControl(memObjCtl, &surfaceParam);
11485 
11486     // Setup Buffer surface
11487     CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pfnSetupBufferSurfaceState(
11488             renderHal,
11489             &surface,
11490             &surfaceParam,
11491             &surfaceEntry));
11492 
11493     //Cache configurations
11494     state->cmHalInterface->HwSetSurfaceMemoryObjectControl(memObjCtl, &surfaceParam);
11495 
11496     // Bind the surface State
11497     CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pfnBindSurfaceState(
11498                renderHal,
11499                bindingTable,
11500                btIndex,
11501                surfaceEntry));
11502 
11503     state->btiBufferIndexTable[ surfIndex ].BTI.regularSurfIndex = btIndex;
11504     state->btiBufferIndexTable[ surfIndex ].nPlaneNumber = 1;
11505 
11506     stateHeap = renderHal->pStateHeap;
11507     offsetSrc = ( stateHeap->iCurSshBufferIndex * stateHeap->dwSshIntanceSize ) +     // Points to the Base of Current SSH Buffer Instance
11508                         ( stateHeap->iBindingTableOffset ) +                       // Moves the pointer to Base of Array of Binding Tables
11509                         ( bindingTable * stateHeap->iBindingTableSize ) +         // Moves the pointer to a Particular Binding Table
11510                         ( btIndex * sizeof( uint32_t ) );                              // Move the pointer to correct entry
11511 
11512     state->btiBufferIndexTable[ surfIndex ].BTITableEntry.regularBtiEntryPosition = stateHeap->pSshBuffer + offsetSrc;
11513 
11514     eStatus = MOS_STATUS_SUCCESS;
11515 
11516 finish:
11517     return eStatus;
11518 }
11519 
HalCm_Setup2DSurfaceUPStateWithBTIndex(PCM_HAL_STATE state,int32_t bindingTable,uint32_t surfIndex,uint32_t btIndex,bool pixelPitch)11520 MOS_STATUS HalCm_Setup2DSurfaceUPStateWithBTIndex(
11521     PCM_HAL_STATE                      state,
11522     int32_t                            bindingTable,
11523     uint32_t                           surfIndex,
11524     uint32_t                           btIndex,
11525     bool                               pixelPitch)
11526 {
11527     MOS_STATUS                     eStatus;
11528     RENDERHAL_SURFACE              surface;
11529     RENDERHAL_SURFACE_STATE_PARAMS surfaceParam;
11530     PRENDERHAL_INTERFACE           renderHal;
11531     PRENDERHAL_SURFACE_STATE_ENTRY surfaceEntries[MHW_MAX_SURFACE_PLANES];
11532     int32_t                        nSurfaceEntries, i;
11533     uint16_t                       memObjCtl;
11534     uint32_t                       offsetSrc;
11535     PRENDERHAL_STATE_HEAP          stateHeap;
11536 
11537     eStatus              = MOS_STATUS_UNKNOWN;
11538     renderHal    = state->renderHal;
11539 
11540     if (surfIndex == CM_NULL_SURFACE)
11541     {
11542         return MOS_STATUS_SUCCESS;
11543     }
11544 
11545     memObjCtl = CM_DEFAULT_CACHE_TYPE;
11546 
11547     // Check to see if surface is already assigned
11548     uint32_t nBTInTable = ( unsigned char )CM_INVALID_INDEX;
11549     if ( pixelPitch )
11550     {
11551         nBTInTable = state->bti2DUPIndexTable[ surfIndex ].BTI.samplerSurfIndex;
11552     }
11553     else
11554     {
11555         nBTInTable = state->bti2DUPIndexTable[ surfIndex ].BTI.regularSurfIndex;
11556     }
11557 
11558     if ( btIndex == nBTInTable )
11559     {
11560         uint32_t nSurfaceEntries = state->bti2DUPIndexTable[ surfIndex ].nPlaneNumber;
11561 
11562         stateHeap = renderHal->pStateHeap;
11563 
11564         // Get Offset to Current Binding Table
11565         uint32_t offsetDst = ( stateHeap->iCurSshBufferIndex * stateHeap->dwSshIntanceSize ) +   // Points to the Base of Current SSH Buffer Instance
11566                             ( stateHeap->iBindingTableOffset ) +                       // Moves the pointer to Base of Array of Binding Tables
11567                             ( bindingTable * stateHeap->iBindingTableSize ) +         // Moves the pointer to a Particular Binding Table
11568                             ( btIndex * sizeof( uint32_t ) );                              // Move the pointer to correct entry
11569 
11570         uint32_t *bindingTableEntry = ( uint32_t *)( stateHeap->pSshBuffer + offsetDst );
11571         if ( pixelPitch )
11572         {
11573             MOS_SecureMemcpy( bindingTableEntry, sizeof( uint32_t ) * nSurfaceEntries, state->bti2DUPIndexTable[ surfIndex ].BTITableEntry.samplerBtiEntryPosition, sizeof( uint32_t ) * nSurfaceEntries );
11574         }
11575         else
11576         {
11577             MOS_SecureMemcpy( bindingTableEntry, sizeof( uint32_t ) * nSurfaceEntries, state->bti2DUPIndexTable[ surfIndex ].BTITableEntry.regularBtiEntryPosition, sizeof( uint32_t ) * nSurfaceEntries );
11578         }
11579 
11580         return MOS_STATUS_SUCCESS;
11581     }
11582 
11583     // Get Details of 2DUP surface and fill the surface
11584     CM_CHK_MOSSTATUS_GOTOFINISH( HalCm_GetSurfaceAndRegister( state, &surface, CM_ARGUMENT_SURFACE2D_UP, surfIndex, pixelPitch ) );
11585 
11586     // Setup 2D surface
11587     MOS_ZeroMemory( &surfaceParam, sizeof( surfaceParam ) );
11588     surfaceParam.Type = renderHal->SurfaceTypeDefault;
11589     surfaceParam.Boundary = RENDERHAL_SS_BOUNDARY_ORIGINAL;
11590 
11591     if ( !pixelPitch )
11592     {
11593         surfaceParam.bWidthInDword_UV = true;
11594         surfaceParam.bWidthInDword_Y = true;
11595     }
11596 
11597     surfaceParam.isOutput = true;
11598 
11599     //Cache configurations
11600     state->cmHalInterface->HwSetSurfaceMemoryObjectControl(memObjCtl, &surfaceParam);
11601 
11602     CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pfnSetupSurfaceState(
11603                 renderHal,
11604                 &surface,
11605                 &surfaceParam,
11606                 &nSurfaceEntries,
11607                 surfaceEntries,
11608                 nullptr));
11609 
11610     for (i = 0; i < nSurfaceEntries; i++)
11611     {
11612         // Bind the surface State
11613         CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pfnBindSurfaceState(
11614                         renderHal,
11615                         bindingTable,
11616                         btIndex + i,
11617                         surfaceEntries[i]));
11618     }
11619 
11620     state->bti2DUPIndexTable[ surfIndex ].nPlaneNumber = nSurfaceEntries;
11621 
11622     stateHeap = renderHal->pStateHeap;
11623     offsetSrc = ( stateHeap->iCurSshBufferIndex * stateHeap->dwSshIntanceSize ) +     // Points to the Base of Current SSH Buffer Instance
11624                         ( stateHeap->iBindingTableOffset ) +                       // Moves the pointer to Base of Array of Binding Tables
11625                         ( bindingTable * stateHeap->iBindingTableSize ) +         // Moves the pointer to a Particular Binding Table
11626                         ( btIndex * sizeof( uint32_t ) );                              // Move the pointer to correct entry
11627 
11628     if ( pixelPitch )
11629     {
11630         state->bti2DUPIndexTable[ surfIndex ].BTI.samplerSurfIndex = btIndex;
11631         state->bti2DUPIndexTable[ surfIndex ].BTITableEntry.samplerBtiEntryPosition = stateHeap->pSshBuffer + offsetSrc;
11632     }
11633     else
11634     {
11635         state->bti2DUPIndexTable[ surfIndex ].BTI.regularSurfIndex = btIndex;
11636         state->bti2DUPIndexTable[ surfIndex ].BTITableEntry.regularBtiEntryPosition = stateHeap->pSshBuffer + offsetSrc;
11637     }
11638 
11639     eStatus = MOS_STATUS_SUCCESS;
11640 
11641 finish:
11642     return eStatus;
11643 }
11644 
HalCm_SetupSampler8x8SurfaceStateWithBTIndex(PCM_HAL_STATE state,int32_t bindingTable,uint32_t surfIndex,uint32_t btIndex,bool pixelPitch,CM_HAL_KERNEL_ARG_KIND kind,uint32_t addressControl)11645 MOS_STATUS HalCm_SetupSampler8x8SurfaceStateWithBTIndex(
11646     PCM_HAL_STATE           state,
11647     int32_t                 bindingTable,
11648     uint32_t                surfIndex,
11649     uint32_t                btIndex,
11650     bool                    pixelPitch,
11651     CM_HAL_KERNEL_ARG_KIND  kind,
11652     uint32_t                addressControl )
11653 {
11654     MOS_STATUS                  eStatus;
11655     RENDERHAL_SURFACE               surface;
11656     RENDERHAL_SURFACE_STATE_PARAMS  surfaceParam;
11657     PRENDERHAL_INTERFACE            renderHal;
11658     PRENDERHAL_SURFACE_STATE_ENTRY  surfaceEntries[ MHW_MAX_SURFACE_PLANES ];
11659     int32_t                         nSurfaceEntries;
11660     uint16_t                        memObjCtl;
11661     int32_t                         i;
11662     uint32_t                        offsetSrc;
11663     PRENDERHAL_STATE_HEAP           stateHeap;
11664     UNUSED(pixelPitch);
11665 
11666     eStatus = MOS_STATUS_UNKNOWN;
11667     renderHal = state->renderHal;
11668 
11669     if ( surfIndex == CM_NULL_SURFACE )
11670     {
11671         eStatus = MOS_STATUS_SUCCESS;
11672         goto finish;
11673     }
11674 
11675     memObjCtl = CM_DEFAULT_CACHE_TYPE;
11676 
11677     // check to see if index is valid
11678     if ( surfIndex >= state->cmDeviceParam.max2DSurfaceTableSize ||
11679          Mos_ResourceIsNull( &state->umdSurf2DTable[ surfIndex ].osResource ) )
11680     {
11681         eStatus = MOS_STATUS_INVALID_PARAMETER;
11682         CM_ASSERTMESSAGE(
11683             "Invalid 2D surface array index '%d'", surfIndex );
11684         goto finish;
11685     }
11686 
11687     // Get Details of Sampler8x8 surface and fill the surface
11688     CM_CHK_MOSSTATUS_GOTOFINISH( HalCm_GetSurfaceAndRegister( state, &surface, kind, surfIndex, 0 ) );
11689 
11690     // Setup surface
11691     MOS_ZeroMemory( &surfaceParam, sizeof( surfaceParam ) );
11692     surfaceParam.Type = renderHal->SurfaceTypeAdvanced;
11693     surfaceParam.isOutput = true;
11694     surfaceParam.bWidthInDword_Y = false;
11695     surfaceParam.bWidthInDword_UV = false;
11696     surfaceParam.Boundary = RENDERHAL_SS_BOUNDARY_ORIGINAL;
11697     surfaceParam.bVASurface = ( kind == CM_ARGUMENT_SURFACE_SAMPLER8X8_VA ) ? 1 : 0;
11698     surfaceParam.AddressControl = addressControl;
11699     state->cmHalInterface->HwSetSurfaceMemoryObjectControl(memObjCtl, &surfaceParam );
11700     renderHal->bEnableP010SinglePass = state->cmHalInterface->IsP010SinglePassSupported();
11701     nSurfaceEntries = 0;
11702     CM_CHK_MOSSTATUS_GOTOFINISH( renderHal->pfnSetupSurfaceState(
11703         renderHal,
11704         &surface,
11705         &surfaceParam,
11706         &nSurfaceEntries,
11707         surfaceEntries,
11708         nullptr ) );
11709 
11710     CM_ASSERT( nSurfaceEntries == 1 );
11711 
11712     for ( i = 0; i < nSurfaceEntries; i++ )
11713     {
11714         // Bind the surface State
11715         CM_CHK_MOSSTATUS_GOTOFINISH( renderHal->pfnBindSurfaceState(
11716             renderHal,
11717             bindingTable,
11718             btIndex + i,
11719             surfaceEntries[ i ] ) );
11720     }
11721 
11722     stateHeap = renderHal->pStateHeap;
11723     offsetSrc = ( stateHeap->iCurSshBufferIndex * stateHeap->dwSshIntanceSize ) +     // Points to the Base of Current SSH Buffer Instance
11724         ( stateHeap->iBindingTableOffset ) +                       // Moves the pointer to Base of Array of Binding Tables
11725         ( bindingTable * stateHeap->iBindingTableSize ) +         // Moves the pointer to a Particular Binding Table
11726         ( btIndex * sizeof( uint32_t ) );                              // Move the pointer to correct entry
11727 
11728     state->bti2DIndexTable[ surfIndex ].nPlaneNumber = nSurfaceEntries;
11729     state->bti2DIndexTable[ surfIndex ].BTITableEntry.sampler8x8BtiEntryPosition = stateHeap->pSshBuffer + offsetSrc;
11730     state->bti2DIndexTable[ surfIndex ].BTI.sampler8x8SurfIndex = btIndex;
11731 
11732     eStatus = MOS_STATUS_SUCCESS;
11733 
11734 finish:
11735     renderHal->bEnableP010SinglePass = false;
11736     return eStatus;
11737 }
11738 
11739 //*-----------------------------------------------------------------------------
11740 //| Purpose: Setup 3D surface State with BTIndex
11741 //| Returns: Result of the operation
11742 //*-----------------------------------------------------------------------------
HalCm_Setup3DSurfaceStateWithBTIndex(PCM_HAL_STATE state,int32_t bindingTable,uint32_t surfIndex,uint32_t btIndex)11743 MOS_STATUS HalCm_Setup3DSurfaceStateWithBTIndex(
11744     PCM_HAL_STATE                      state,
11745     int32_t                            bindingTable,
11746     uint32_t                           surfIndex,
11747     uint32_t                           btIndex)
11748 {
11749     PRENDERHAL_INTERFACE            renderHal = state->renderHal;
11750     MOS_STATUS                      eStatus;
11751     RENDERHAL_SURFACE               surface;
11752     RENDERHAL_SURFACE_STATE_PARAMS  surfaceParam;
11753     PRENDERHAL_SURFACE_STATE_ENTRY  surfaceEntries[MHW_MAX_SURFACE_PLANES];
11754     int32_t                     nSurfaceEntries, i;
11755     uint16_t                    memObjCtl;
11756     uint32_t                    offsetSrc;
11757     PRENDERHAL_STATE_HEAP       stateHeap;
11758 
11759     eStatus = MOS_STATUS_UNKNOWN;
11760     nSurfaceEntries = 0;
11761 
11762     if (surfIndex == CM_NULL_SURFACE)
11763     {
11764         return MOS_STATUS_SUCCESS;
11765     }
11766 
11767     memObjCtl = CM_DEFAULT_CACHE_TYPE;
11768 
11769     // check the surfIndex
11770     if (surfIndex >= state->cmDeviceParam.max3DSurfaceTableSize ||
11771         Mos_ResourceIsNull(&state->surf3DTable[surfIndex].osResource))
11772     {
11773         eStatus = MOS_STATUS_INVALID_PARAMETER;
11774         CM_ASSERTMESSAGE(
11775             "Invalid 3D surface array index '%d'", surfIndex);
11776         return MOS_STATUS_UNKNOWN;
11777     }
11778 
11779     // Check to see if surface is already assigned
11780     uint32_t nBTInTable = (unsigned char)CM_INVALID_INDEX;
11781     nBTInTable = state->bti3DIndexTable[surfIndex].BTI.regularSurfIndex;
11782 
11783     if (btIndex == nBTInTable)
11784     {
11785         nSurfaceEntries = state->bti3DIndexTable[surfIndex].nPlaneNumber;
11786 
11787         stateHeap = renderHal->pStateHeap;
11788 
11789         // Get Offset to Current Binding Table
11790         uint32_t offsetDst = (stateHeap->iCurSshBufferIndex * stateHeap->dwSshIntanceSize) +     // Points to the Base of Current SSH Buffer Instance
11791             (stateHeap->iBindingTableOffset) +                       // Moves the pointer to Base of Array of Binding Tables
11792             (bindingTable * stateHeap->iBindingTableSize) +         // Moves the pointer to a Particular Binding Table
11793             (btIndex * sizeof(uint32_t));                              // Move the pointer to correct entry
11794 
11795         uint32_t *bindingTableEntry = (uint32_t*)(stateHeap->pSshBuffer + offsetDst);
11796 
11797         MOS_SecureMemcpy(bindingTableEntry, sizeof(uint32_t)* nSurfaceEntries, state->bti3DIndexTable[surfIndex].BTITableEntry.regularBtiEntryPosition, sizeof(uint32_t)* nSurfaceEntries);
11798 
11799         return MOS_STATUS_SUCCESS;
11800     }
11801 
11802     // Get Details of 3D surface and fill the surface
11803     CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_GetSurfaceAndRegister(state, &surface, CM_ARGUMENT_SURFACE3D, surfIndex, false));
11804 
11805     // Setup 3D surface
11806     MOS_ZeroMemory(&surfaceParam, sizeof(surfaceParam));
11807     surfaceParam.Type = renderHal->SurfaceTypeDefault;
11808     surfaceParam.Boundary = RENDERHAL_SS_BOUNDARY_ORIGINAL;
11809 
11810     //Cache configurations
11811     state->cmHalInterface->HwSetSurfaceMemoryObjectControl(memObjCtl, &surfaceParam);
11812 
11813     //Set isOutput by default
11814     surfaceParam.isOutput = true;
11815 
11816     CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pfnSetupSurfaceState(
11817         renderHal,
11818         &surface,
11819         &surfaceParam,
11820         &nSurfaceEntries,
11821         surfaceEntries,
11822         nullptr));
11823 
11824     for (i = 0; i < nSurfaceEntries; i++)
11825     {
11826         // Bind the surface State
11827         CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pfnBindSurfaceState(
11828             renderHal,
11829             bindingTable,
11830             btIndex + i,
11831             surfaceEntries[i]));
11832     }
11833     state->bti3DIndexTable[surfIndex].BTI.regularSurfIndex = btIndex;
11834     state->bti3DIndexTable[surfIndex].nPlaneNumber = nSurfaceEntries;
11835     // Get Offset to Current Binding Table
11836     stateHeap = renderHal->pStateHeap;
11837     offsetSrc = (stateHeap->iCurSshBufferIndex * stateHeap->dwSshIntanceSize) +     // Points to the Base of Current SSH Buffer Instance
11838         (stateHeap->iBindingTableOffset) +                       // Moves the pointer to Base of Array of Binding Tables
11839         (bindingTable * stateHeap->iBindingTableSize) +         // Moves the pointer to a Particular Binding Table
11840         (btIndex * sizeof(uint32_t));                              // Move the pointer to correct entry
11841 
11842     state->bti3DIndexTable[surfIndex].BTI.regularSurfIndex = btIndex;
11843     state->bti3DIndexTable[surfIndex].BTITableEntry.regularBtiEntryPosition = stateHeap->pSshBuffer + offsetSrc;
11844 
11845     eStatus = MOS_STATUS_SUCCESS;
11846 
11847 finish:
11848     return eStatus;
11849 }
11850 
11851 //|-----------------------------------------------------------------------------
11852 //| Purpose   : Tag-based Synchronization on Resource
11853 //| Input     : state   - Hal CM State
11854 //|             surface    surface
11855 //|             isWrite  - Write or Read
11856 //| Returns   : Result of the operation
11857 //|-----------------------------------------------------------------------------
HalCm_SyncOnResource(PCM_HAL_STATE state,PMOS_SURFACE surface,bool isWrite)11858 MOS_STATUS HalCm_SyncOnResource(
11859     PCM_HAL_STATE           state,
11860     PMOS_SURFACE            surface,
11861     bool                    isWrite)
11862 {
11863     MOS_STATUS              eStatus;
11864     PMOS_INTERFACE          osInterface;
11865 
11866     eStatus           = MOS_STATUS_SUCCESS;
11867     osInterface = state->osInterface;
11868 
11869     if (surface == nullptr || Mos_ResourceIsNull(&surface->OsResource))
11870     {
11871         CM_ASSERTMESSAGE("Input resource is not valid.");
11872         eStatus = MOS_STATUS_UNKNOWN;
11873         return eStatus;
11874     }
11875 
11876     osInterface->pfnSyncOnResource(
11877             osInterface,
11878             &(surface->OsResource),
11879             state->osInterface->CurrentGpuContextOrdinal, //state->GpuContext,
11880             isWrite);
11881 
11882     // Sync Render Target with Overlay Context
11883     if (surface->bOverlay)
11884     {
11885         osInterface->pfnSyncOnOverlayResource(
11886             osInterface,
11887             &(surface->OsResource),
11888             state->osInterface->CurrentGpuContextOrdinal);
11889     }
11890 
11891     return eStatus;
11892 }
11893 
11894 //!
11895 //! \brief    Send Media Walker State
11896 //! \details  Send MEDIA_OBJECT_WALKER command
11897 //! \param    PCM_HAL_STATE state
11898 //!           [in] Pointer to CM_HAL_STATE Structure
11899 //! \param    PRENDERHAL_INTERFACE renderHal
11900 //!           [in] Pointer to Hardware Interface Structure
11901 //! \param    PMOS_COMMAND_BUFFER cmdBuffer
11902 //!           [in] Pointer to Command Buffer
11903 //! \return   MOS_STATUS
11904 //!
HalCm_SendMediaWalkerState(PCM_HAL_STATE state,PCM_HAL_KERNEL_PARAM kernelParam,PMOS_COMMAND_BUFFER cmdBuffer)11905 MOS_STATUS HalCm_SendMediaWalkerState(
11906     PCM_HAL_STATE               state,
11907     PCM_HAL_KERNEL_PARAM        kernelParam,
11908     PMOS_COMMAND_BUFFER         cmdBuffer)
11909 {
11910     PRENDERHAL_INTERFACE_LEGACY     renderHal;
11911     MHW_WALKER_PARAMS               mediaWalkerParams;
11912     MOS_STATUS                      eStatus;
11913 
11914     eStatus         = MOS_STATUS_SUCCESS;
11915     renderHal = state->renderHal;
11916 
11917     MOS_SecureMemcpy(&mediaWalkerParams, sizeof(MHW_WALKER_PARAMS), &kernelParam->walkerParams, sizeof(CM_HAL_WALKER_PARAMS));
11918 
11919     if (kernelParam->kernelThreadSpaceParam.threadSpaceWidth)
11920     {
11921         //per-kernel thread space is set, need use its own dependency mask
11922         mediaWalkerParams.UseScoreboard  = renderHal->VfeScoreboard.ScoreboardEnable;
11923         mediaWalkerParams.ScoreboardMask = kernelParam->kernelThreadSpaceParam.globalDependencyMask;
11924     }
11925     else
11926     {
11927         //No per-kernel thread space setting, need use per-task depedency mask
11928         mediaWalkerParams.UseScoreboard  = renderHal->VfeScoreboard.ScoreboardEnable;
11929         mediaWalkerParams.ScoreboardMask = renderHal->VfeScoreboard.ScoreboardMask;
11930     }
11931 
11932     eStatus = renderHal->pMhwRenderInterface->AddMediaObjectWalkerCmd(
11933                                   cmdBuffer, &mediaWalkerParams);
11934 
11935     return eStatus;
11936 }
11937 
11938 //!
11939 //! \brief    Send GpGpu Walker State
11940 //! \details  Send GPGPU_WALKER state
11941 //! \param    PCM_HAL_STATE state
11942 //!           [in] Pointer to CM_HAL_STATE Structure
11943 //! \param    PRENDERHAL_INTERFACE renderHal
11944 //!           [in] Pointer to Hardware Interface Structure
11945 //! \param    PMOS_COMMAND_BUFFER cmdBuffer
11946 //!           [in] Pointer to Command Buffer
11947 //! \return   MOS_STATUS
11948 //!
HalCm_SendGpGpuWalkerState(PCM_HAL_STATE state,PCM_HAL_KERNEL_PARAM kernelParam,PMOS_COMMAND_BUFFER cmdBuffer)11949 MOS_STATUS HalCm_SendGpGpuWalkerState(
11950     PCM_HAL_STATE               state,
11951     PCM_HAL_KERNEL_PARAM        kernelParam,
11952     PMOS_COMMAND_BUFFER         cmdBuffer)
11953 {
11954     MhwRenderInterface           *mhwRender;
11955     MHW_GPGPU_WALKER_PARAMS      gpGpuWalkerParams;
11956     MOS_STATUS                   eStatus;
11957 
11958     eStatus           = MOS_STATUS_SUCCESS;
11959     mhwRender = state->renderHal->pMhwRenderInterface;
11960 
11961     gpGpuWalkerParams.InterfaceDescriptorOffset = kernelParam->gpgpuWalkerParams.interfaceDescriptorOffset;
11962     gpGpuWalkerParams.GpGpuEnable               = kernelParam->gpgpuWalkerParams.gpgpuEnabled;
11963     gpGpuWalkerParams.GroupWidth                = kernelParam->gpgpuWalkerParams.groupWidth;
11964     gpGpuWalkerParams.GroupHeight               = kernelParam->gpgpuWalkerParams.groupHeight;
11965     gpGpuWalkerParams.GroupDepth               = kernelParam->gpgpuWalkerParams.groupDepth;
11966     gpGpuWalkerParams.ThreadWidth               = kernelParam->gpgpuWalkerParams.threadWidth;
11967     gpGpuWalkerParams.ThreadHeight              = kernelParam->gpgpuWalkerParams.threadHeight;
11968     gpGpuWalkerParams.ThreadDepth               = kernelParam->gpgpuWalkerParams.threadDepth;
11969     gpGpuWalkerParams.SLMSize                   = kernelParam->slmSize;
11970 
11971     eStatus = mhwRender->AddGpGpuWalkerStateCmd(cmdBuffer, &gpGpuWalkerParams);
11972 
11973     return eStatus;
11974 }
11975 
11976 //!
11977 //! \brief    surface Format Convert
11978 //! \details  Convert RENDERHAL_SURFACE to MHW_VEBOX_SURFACE
11979 //! \param    PRENDERHAL_SURFACE            renderHalSurface
11980 //!           [in] Pointer to RENDERHAL_SURFACE Structure
11981 //! \param    PMHW_VEBOX_SURFACE_PARAMS    mhwVeboxSurface
11982 //!           [in] Pointer to PMHW_VEBOX_SURFACE_PARAMS
11983 //! \return   MOS_STATUS
11984 //!
HalCm_Convert_RENDERHAL_SURFACE_To_MHW_VEBOX_SURFACE(PRENDERHAL_SURFACE renderHalSurface,PMHW_VEBOX_SURFACE_PARAMS mhwVeboxSurface)11985 MOS_STATUS HalCm_Convert_RENDERHAL_SURFACE_To_MHW_VEBOX_SURFACE(
11986     PRENDERHAL_SURFACE              renderHalSurface,
11987     PMHW_VEBOX_SURFACE_PARAMS    mhwVeboxSurface)
11988 {
11989     PMOS_SURFACE                    surface;
11990     MOS_STATUS                      eStatus = MOS_STATUS_SUCCESS;
11991 
11992     CM_CHK_NULL_GOTOFINISH_MOSERROR(renderHalSurface);
11993     CM_CHK_NULL_GOTOFINISH_MOSERROR(mhwVeboxSurface);
11994 
11995     surface = &renderHalSurface->OsSurface;
11996     mhwVeboxSurface->Format        = surface->Format;
11997     mhwVeboxSurface->dwWidth       = surface->dwWidth;
11998     mhwVeboxSurface->dwHeight      = surface->dwHeight;
11999     mhwVeboxSurface->dwPitch       = surface->dwPitch;
12000     if (surface->dwPitch > 0)
12001     {
12002         mhwVeboxSurface->dwUYoffset = ((surface->UPlaneOffset.iSurfaceOffset - surface->YPlaneOffset.iSurfaceOffset) / surface->dwPitch)
12003                                       + surface->UPlaneOffset.iYOffset;
12004     }
12005     mhwVeboxSurface->TileType      = surface->TileType;
12006     mhwVeboxSurface->TileModeGMM   = surface->TileModeGMM;
12007     mhwVeboxSurface->bGMMTileEnabled = surface->bGMMTileEnabled;
12008     mhwVeboxSurface->rcMaxSrc      = renderHalSurface->rcMaxSrc;
12009     mhwVeboxSurface->pOsResource   = &surface->OsResource;
12010 
12011 finish:
12012     return eStatus;
12013 }
12014 
12015 //!
12016 //! \brief    Set Vtune Profiling Flag
12017 //! \details  Trun Vtune Profiling Flag On or off
12018 //! \param    PCM_HAL_STATE state
12019 //!           [in] Pointer to CM_HAL_STATE Structure
12020 //! \return   MOS_STATUS_SUCCESS
12021 //!
HalCm_SetVtuneProfilingFlag(PCM_HAL_STATE state,bool vtuneOn)12022 MOS_STATUS HalCm_SetVtuneProfilingFlag(
12023     PCM_HAL_STATE               state,
12024     bool                        vtuneOn)
12025 {
12026 
12027     state->vtuneProfilerOn   = vtuneOn;
12028 
12029     return MOS_STATUS_SUCCESS;
12030 }
12031 
12032 //*-----------------------------------------------------------------------------
12033 //| Purpose:    Get the offset for the Task Sync Location given the task ID
12034 //| Returns:    Sync Location
12035 //*-----------------------------------------------------------------------------
HalCm_GetTaskSyncLocation(PCM_HAL_STATE state,int32_t taskId)12036 int32_t HalCm_GetTaskSyncLocation(
12037     PCM_HAL_STATE       state,
12038     int32_t             taskId)        // [in] Task ID
12039 {
12040     return (taskId * state->cmHalInterface->GetTimeStampResourceSize());
12041 }
12042 
HalCm_GetLegacyRenderHalL3Setting(CmHalL3Settings * l3SettingsPtr,RENDERHAL_L3_CACHE_SETTINGS * l3SettingsLegacyPtr)12043 void HalCm_GetLegacyRenderHalL3Setting( CmHalL3Settings *l3SettingsPtr, RENDERHAL_L3_CACHE_SETTINGS *l3SettingsLegacyPtr )
12044 {
12045     *l3SettingsLegacyPtr = {};
12046     l3SettingsLegacyPtr->bOverride = l3SettingsPtr->overrideSettings;
12047     l3SettingsLegacyPtr->bEnableSLM = l3SettingsPtr->enableSlm;
12048     l3SettingsLegacyPtr->bL3CachingEnabled = l3SettingsPtr->l3CachingEnabled;
12049     l3SettingsLegacyPtr->bCntlRegOverride = l3SettingsPtr->cntlRegOverride;
12050     l3SettingsLegacyPtr->bCntlReg2Override = l3SettingsPtr->cntlReg2Override;
12051     l3SettingsLegacyPtr->bCntlReg3Override = l3SettingsPtr->cntlReg3Override;
12052     l3SettingsLegacyPtr->bSqcReg1Override = l3SettingsPtr->sqcReg1Override;
12053     l3SettingsLegacyPtr->bSqcReg4Override = l3SettingsPtr->sqcReg4Override;
12054     l3SettingsLegacyPtr->bLra1RegOverride = l3SettingsPtr->lra1RegOverride;
12055     l3SettingsLegacyPtr->dwCntlReg = l3SettingsPtr->cntlReg;
12056     l3SettingsLegacyPtr->dwCntlReg2 = l3SettingsPtr->cntlReg2;
12057     l3SettingsLegacyPtr->dwCntlReg3 = l3SettingsPtr->cntlReg3;
12058     l3SettingsLegacyPtr->dwSqcReg1 = l3SettingsPtr->sqcReg1;
12059     l3SettingsLegacyPtr->dwSqcReg4 = l3SettingsPtr->sqcReg4;
12060     l3SettingsLegacyPtr->dwLra1Reg = l3SettingsPtr->lra1Reg;
12061 
12062     return;
12063 }
12064 
HalCm_ConvertTicksToNanoSeconds(PCM_HAL_STATE state,uint64_t ticks)12065 uint64_t HalCm_ConvertTicksToNanoSeconds(
12066     PCM_HAL_STATE               state,
12067     uint64_t                    ticks)
12068 {
12069     if (state->tsFrequency == 0)
12070     {
12071         // if KMD doesn't report an valid value, fall back to default configs
12072         return state->cmHalInterface->ConverTicksToNanoSecondsDefault(ticks);
12073     }
12074     return (ticks * 1000000000) / (state->tsFrequency);
12075 }
12076 
12077 //!
12078 //! \brief    Check GPU context
12079 //! \details  Check if the GPU context is valid for CM layer
12080 //! \param    MOS_GPU_CONTEXT gpuContext
12081 //!           [in] GPU Context ordinal
12082 //! \return   true/false
12083 //!
HalCm_IsValidGpuContext(MOS_GPU_CONTEXT gpuContext)12084 bool HalCm_IsValidGpuContext(
12085     MOS_GPU_CONTEXT             gpuContext)
12086 {
12087     if( gpuContext == MOS_GPU_CONTEXT_RENDER3
12088      || gpuContext == MOS_GPU_CONTEXT_RENDER4
12089      || gpuContext == MOS_GPU_CONTEXT_CM_COMPUTE
12090      || gpuContext == MOS_GPU_CONTEXT_VEBOX)
12091     {
12092         return true;
12093     }
12094     else
12095     {
12096         CM_ASSERTMESSAGE("Invalid GPU context for CM.");
12097         return false;
12098     }
12099 }
12100