xref: /aosp_15_r20/external/intel-media-driver/media_driver/agnostic/common/cm/cm_kernel_ex.cpp (revision ba62d9d3abf0e404f2022b4cd7a85e107f48596f)
1 /*
2 * Copyright (c) 2018-2019, Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22 //!
23 //! \file      cm_kernel_ex.cpp
24 //! \brief     Contains Class CmKernelEx definitions
25 //!
26 
27 #include "cm_kernel_ex.h"
28 #include "cm_surface.h"
29 #include "cm_surface_manager.h"
30 #include "cm_surface_sampler8x8.h"
31 #include "cm_surface_sampler.h"
32 #include "cm_mem.h"
33 #include "cm_surface_2d_rt.h"
34 #include "cm_surface_2d_up_rt.h"
35 #include "cm_surface_3d_rt.h"
36 #include "cm_buffer_rt.h"
37 #include "cm_device_rt.h"
38 #include "cm_hal.h"
39 #include "cm_surface_state.h"
40 #include "cm_surface_state_manager.h"
41 #include "cm_surface_vme.h"
42 #include "cm_ssh.h"
43 #include "cm_thread_space_rt.h"
44 #include "cm_surface_sampler.h"
45 #include "cm_media_state.h"
46 
47 #include "mhw_state_heap.h"
48 
49 using namespace CMRT_UMD;
50 
~CmKernelEx()51 CmKernelEx::~CmKernelEx()
52 {
53     if (m_dummyThreadSpace)
54     {
55         m_device->DestroyThreadSpace(m_dummyThreadSpace);
56     }
57     if (m_dummyThreadGroupSpace)
58     {
59         m_device->DestroyThreadGroupSpace(m_dummyThreadGroupSpace);
60     }
61     MOS_DeleteArray(m_indexMap);
62     MOS_DeleteArray(m_flatArgs);
63     MOS_DeleteArray(m_propertyIndexes);
64     MOS_DeleteArray(m_cmSurfIndexes);
65     MOS_DeleteArray(m_data);
66     MOS_DeleteArray(m_surfaceInArg);
67     MOS_DeleteArray(m_curbe);
68 }
69 
Initialize(const char * kernelName,const char * options)70 int32_t CmKernelEx::Initialize(const char *kernelName, const char *options)
71 {
72     int ret = CmKernelRT::Initialize(kernelName, options);
73     if (ret != CM_SUCCESS)
74     {
75         return ret;
76     }
77 
78     m_indexMap = MOS_NewArray(uint32_t, (m_argCount+1));
79     CM_CHK_NULL_RETURN_CMERROR(m_indexMap);
80     MOS_ZeroMemory(m_indexMap, (m_argCount+1)*sizeof(uint32_t));
81     m_flatArgCount= 0;
82     bool isGpgpuKernel = false;
83     uint32_t minPayload = 0;
84     for (uint32_t i = 0; i < m_argCount; i++)
85     {
86         if (ArgArraySupported(m_args[i].unitKind))
87         {
88             int numSurfaces = m_args[i].unitSize/sizeof(int);
89             m_flatArgCount += numSurfaces;
90         }
91         else
92         {
93             ++m_flatArgCount;
94         }
95 
96         if (!isGpgpuKernel &&
97             ( m_args[i].unitKind == CM_ARGUMENT_IMPLICT_LOCALSIZE
98             ||m_args[i].unitKind == CM_ARGUMENT_IMPLICT_GROUPSIZE
99             ||m_args[i].unitKind == CM_ARGUMENT_IMPLICIT_LOCALID))
100         {
101             isGpgpuKernel = true;
102         }
103         if (i == 0 || (m_args[i].unitKind != CM_ARGUMENT_IMPLICIT_LOCALID && minPayload > m_args[i].unitOffsetInPayload))
104         {
105             minPayload = m_args[i].unitOffsetInPayload;
106         }
107     }
108 
109     if (!isGpgpuKernel)
110     {
111         minPayload = CM_PAYLOAD_OFFSET;
112     }
113 
114     if (m_flatArgCount == 0)
115     {
116         return CM_SUCCESS;
117     }
118 
119     m_flatArgs = MOS_NewArray(_CmArg, m_flatArgCount);
120     CM_CHK_NULL_RETURN_CMERROR(m_flatArgs);
121     MOS_ZeroMemory(m_flatArgs, m_flatArgCount * sizeof(_CmArg));
122     m_propertyIndexes = MOS_NewArray(uint8_t, m_flatArgCount);
123     CM_CHK_NULL_RETURN_CMERROR(m_propertyIndexes);
124     MOS_ZeroMemory(m_propertyIndexes, m_flatArgCount);
125     m_cmSurfIndexes = MOS_NewArray(uint32_t, m_flatArgCount);
126     CM_CHK_NULL_RETURN_CMERROR(m_cmSurfIndexes);
127     MOS_ZeroMemory(m_cmSurfIndexes, m_flatArgCount * sizeof(uint32_t));
128 
129     int j = 0;
130     uint32_t offset = 0; //offset in the local buffer
131     int localIDIndex = -1;
132     for (uint32_t i = 0; i < m_argCount; i++)
133     {
134         if (ArgArraySupported(m_args[i].unitKind))
135         {
136             m_indexMap[i] = j;
137             int numSurfaces = m_args[i].unitSize/sizeof(int);
138             for (int k = 0; k < numSurfaces; k ++)
139             {
140                 m_flatArgs[j].isaKind = m_args[i].unitKind;
141                 m_flatArgs[j].kind = m_args[i].unitKind;
142                 m_flatArgs[j].unitSize = sizeof(void *); // we can either store the pointer to CmSurfaceState or pointer to mos_resource here
143                 m_flatArgs[j].payloadOffset = m_args[i].unitOffsetInPayload + k*4 - minPayload; //each bte index has 4 bytes
144                 m_flatArgs[j].offset = offset;
145                 m_flatArgs[j].sizeInCurbe = 4;
146                 offset += m_flatArgs[j].unitSize;
147 
148                 // update curbe size
149                 if (m_explicitCurbeSize < (uint32_t)(m_flatArgs[j].payloadOffset + m_flatArgs[j].sizeInCurbe))
150                 {
151                     m_explicitCurbeSize = m_flatArgs[j].payloadOffset + m_flatArgs[j].sizeInCurbe;
152                 }
153                 ++ j;
154             }
155         }
156         else
157         {
158             m_indexMap[i] = j;
159             m_flatArgs[j].isaKind = m_args[i].unitKind;
160             m_flatArgs[j].kind = m_args[i].unitKind;
161             m_flatArgs[j].unitSize = m_args[i].unitSize;
162             m_flatArgs[j].payloadOffset = m_args[i].unitOffsetInPayload - minPayload;
163             m_flatArgs[j].offset = offset;
164             m_flatArgs[j].sizeInCurbe = m_flatArgs[j].unitSize;
165             offset += m_flatArgs[j].unitSize;
166 
167             // update curbe size
168             if (m_args[i].unitKind == CM_ARGUMENT_IMPLICIT_LOCALID)
169             {
170                 localIDIndex = j;
171             }
172             else
173             {
174                 if (m_explicitCurbeSize < (uint32_t)(m_flatArgs[j].payloadOffset + m_flatArgs[j].sizeInCurbe))
175                 {
176                     m_explicitCurbeSize = m_flatArgs[j].payloadOffset + m_flatArgs[j].sizeInCurbe;
177                 }
178             }
179             ++ j;
180         }
181         m_indexMap[m_argCount] = j;
182     }
183 
184     // adjust the payload of local id
185     if (localIDIndex >= 0)
186     {
187         m_flatArgs[localIDIndex].payloadOffset = MOS_ALIGN_CEIL(m_explicitCurbeSize, 32);
188     }
189 
190     m_data = MOS_NewArray(uint8_t, offset);
191     CM_CHK_NULL_RETURN_CMERROR(m_data);
192     m_surfaceInArg = MOS_NewArray(uint8_t, offset);
193     CM_CHK_NULL_RETURN_CMERROR(m_surfaceInArg);
194     MOS_ZeroMemory(m_data, sizeof(uint8_t)*offset);
195     MOS_ZeroMemory(m_surfaceInArg, sizeof(uint8_t)*offset);
196 
197     m_hashValue = m_kernelInfo->hashValue;
198 
199     return CM_SUCCESS;
200 }
201 
AllocateCurbe()202 MOS_STATUS CmKernelEx::AllocateCurbe()
203 {
204     MOS_DeleteArray(m_curbe);
205     if (m_explicitCurbeSize > 0)
206     {
207         m_curbeSize = MOS_ALIGN_CEIL(m_explicitCurbeSize, 64);
208         m_curbeSizePerThread = m_curbeSize;
209         m_curbeSizeCrossThread = 0;
210         m_curbe = MOS_NewArray(uint8_t, m_curbeSize);
211         CM_CHK_NULL_RETURN_MOSERROR(m_curbe);
212         MOS_ZeroMemory(m_curbe, m_curbeSize);
213     }
214     return MOS_STATUS_SUCCESS;
215 }
216 
AllocateCurbeAndFillImplicitArgs(CmThreadGroupSpace * globalGroupSpace)217 MOS_STATUS CmKernelEx::AllocateCurbeAndFillImplicitArgs(CmThreadGroupSpace *globalGroupSpace)
218 {
219     CmThreadGroupSpace *tgs = (globalGroupSpace == nullptr)?m_threadGroupSpace:globalGroupSpace;
220 
221     uint32_t thrdSpaceWidth = 0;
222     uint32_t thrdSpaceHeight = 0;
223     uint32_t thrdSpaceDepth = 0;
224     uint32_t grpSpaceWidth = 0;
225     uint32_t grpSpaceHeight = 0;
226     uint32_t grpSpaceDepth = 0;
227 
228     if (tgs)
229     {
230         tgs->GetThreadGroupSpaceSize(thrdSpaceWidth, thrdSpaceHeight, thrdSpaceDepth, grpSpaceWidth, grpSpaceHeight, grpSpaceDepth);
231     }
232 
233     MOS_DeleteArray(m_curbe);
234     m_curbeSizePerThread = (m_explicitCurbeSize%32 == 4)? 64:32;
235     m_curbeSizeCrossThread = MOS_ALIGN_CEIL(m_explicitCurbeSize, 32);
236     m_curbeSize = m_curbeSizeCrossThread + m_curbeSizePerThread * thrdSpaceWidth * thrdSpaceHeight * thrdSpaceDepth;
237     m_curbeSize = MOS_ALIGN_CEIL(m_curbeSize, 64);
238     m_curbe = MOS_NewArray(uint8_t, m_curbeSize);
239     CM_CHK_NULL_RETURN_MOSERROR(m_curbe);
240     MOS_ZeroMemory(m_curbe, m_curbeSize);
241 
242     int localIdPayload = -1;
243     int groupSizePayload = -1;
244     int localSizePayload = -1;
245 
246     for (uint32_t i = 0; i < m_flatArgCount; i++)
247     {
248         if (m_flatArgs[i].kind == ARG_KIND_IMPLICT_LOCALSIZE)
249             localSizePayload = m_flatArgs[i].payloadOffset;
250         if (m_flatArgs[i].kind == ARG_KIND_IMPLICT_GROUPSIZE)
251             groupSizePayload = m_flatArgs[i].payloadOffset;
252         if (m_flatArgs[i].kind == ARG_KIND_IMPLICIT_LOCALID)
253             localIdPayload = m_flatArgs[i].payloadOffset;
254     }
255 
256     // set group size implicit args
257     if (groupSizePayload >= 0)
258     {
259         *(uint32_t *)(m_curbe + groupSizePayload) = grpSpaceWidth;
260         *(uint32_t *)(m_curbe + groupSizePayload + 4) = grpSpaceHeight;
261         *(uint32_t *)(m_curbe + groupSizePayload + 8) = grpSpaceDepth;
262     }
263 
264     // set local size implicit args
265     if (localSizePayload >= 0)
266     {
267         *(uint32_t *)(m_curbe + localSizePayload) = thrdSpaceWidth;
268         *(uint32_t *)(m_curbe + localSizePayload + 4) = thrdSpaceHeight;
269         *(uint32_t *)(m_curbe + localSizePayload + 8) = thrdSpaceDepth;
270     }
271 
272     // set local id data per thread
273     if (localIdPayload >= 0)
274     {
275         int offset = localIdPayload;
276         for (uint32_t idZ = 0; idZ < thrdSpaceDepth; idZ++)
277         {
278             for (uint32_t idY = 0; idY < thrdSpaceHeight; idY++)
279             {
280                 for (uint32_t idX = 0; idX < thrdSpaceWidth; idX++)
281                 {
282                     *(uint32_t *)(m_curbe + offset) = idX;
283                     *(uint32_t *)(m_curbe + offset + 4) = idY;
284                     *(uint32_t *)(m_curbe + offset + 8) = idZ;
285                     offset += m_curbeSizePerThread;
286                 }
287             }
288         }
289     }
290 
291     return MOS_STATUS_SUCCESS;
292 }
293 
IsSurface(uint16_t kind)294 bool CmKernelEx::IsSurface(uint16_t kind)
295 {
296     switch (kind)
297     {
298         case ARG_KIND_SURFACE:
299         case ARG_KIND_SURFACE_1D:
300         case ARG_KIND_SURFACE_2D:
301         case ARG_KIND_SURFACE_2D_UP:
302         case ARG_KIND_SURFACE_SAMPLER:
303         case ARG_KIND_SURFACE2DUP_SAMPLER:
304         case ARG_KIND_SURFACE_3D:
305         case ARG_KIND_SURFACE_SAMPLER8X8_AVS:
306         case ARG_KIND_SURFACE_SAMPLER8X8_VA:
307         case ARG_KIND_SURFACE_2D_SCOREBOARD:
308         case ARG_KIND_STATE_BUFFER:
309         case ARG_KIND_SURFACE_VME:
310             return true;
311         default:
312             return false;
313     }
314     return false;
315 }
316 
SetKernelArg(uint32_t index,size_t size,const void * value)317 int32_t CmKernelEx::SetKernelArg(uint32_t index, size_t size, const void * value)
318 {
319     if (!m_blCreatingGPUCopyKernel) // gpucopy kernels only executed by fastpath, no need to set legacy kernels
320     {
321         CmKernelRT::SetKernelArg(index, size, value);
322     }
323     if( index >= m_argCount )
324     {
325         CM_ASSERTMESSAGE("Error: Invalid kernel arg count.");
326         return CM_INVALID_ARG_INDEX;
327 
328     }
329 
330     if( !value)
331     {
332         CM_ASSERTMESSAGE("Error: Invalid kernel arg value.");
333         return CM_INVALID_ARG_VALUE;
334     }
335 
336     if( size == 0)
337     {
338         CM_ASSERTMESSAGE("Error: Invalid kernel arg size.");
339         return CM_INVALID_ARG_SIZE;
340     }
341 
342     uint32_t start = m_indexMap[index];
343     uint32_t len = m_indexMap[index + 1] - start;
344 
345     if (IsSurface(m_flatArgs[start].isaKind))
346     {
347         CMRT_UMD::SurfaceIndex *surfIndexes = (CMRT_UMD::SurfaceIndex *)value;
348         if (surfIndexes == (CMRT_UMD::SurfaceIndex *)CM_NULL_SURFACE)
349         {
350             for (uint32_t i = 0; i < len; i++)
351             {
352                 *(void **)(m_data + m_flatArgs[start + i].offset) = nullptr;
353                 *(void **)(m_surfaceInArg + m_flatArgs[start + i].offset) = nullptr;
354                 m_flatArgs[start + i].isSet = true;
355             }
356             return CM_SUCCESS;
357         }
358         // sanity check
359         if (len * sizeof(CMRT_UMD::SurfaceIndex) != size)
360         {
361             CM_ASSERTMESSAGE("Error: Invalid kernel arg size.");
362             return CM_INVALID_ARG_SIZE;
363         }
364 
365         for (uint32_t i = 0; i < len; i++)
366         {
367             uint32_t index = surfIndexes[i].get_data();
368 
369             m_flatArgs[start + i].isSet = true;
370             if (index == CM_NULL_SURFACE)
371             {
372                 *(void **)(m_data + m_flatArgs[start+i].offset) = nullptr;
373                 *(void **)(m_surfaceInArg + m_flatArgs[start+i].offset) = nullptr;
374             }
375             else
376             {
377                 CmSurface* surface = nullptr;
378                 m_surfaceMgr->GetSurface(index, surface);
379                 if (nullptr == surface)
380                 {
381                     *(void **)(m_data + m_flatArgs[start+i].offset) = nullptr;
382                     *(void **)(m_surfaceInArg + m_flatArgs[start+i].offset) = nullptr;
383                 }
384                 else
385                 {
386                     m_flatArgs[start + i].kind = ToArgKind(surface);
387 
388                     // get the CmSurfaceState from the surface index, this will be changed if surfmgr optimized
389                     // most likely, this will be moved to CmSurface
390                     CmSurfaceState *temp = GetSurfaceState(surface, index);
391                     *(CmSurfaceState **)(m_data + m_flatArgs[start + i].offset) = temp;
392                     *(CmSurface **)(m_surfaceInArg + m_flatArgs[start+i].offset) = surface;
393                     m_propertyIndexes[start + i] = surface->GetPropertyIndex();
394                     m_cmSurfIndexes[start + i] = index;
395                 }
396             }
397         }
398     }
399     else if (m_flatArgs[start].isaKind == ARG_KIND_SAMPLER) // only support 3D sampler and AVS sampler in fastpath
400     {
401         CMRT_UMD::SamplerIndex *samplerIndexes = (CMRT_UMD::SamplerIndex *)value;
402         // sanity check
403         if (len * sizeof(CMRT_UMD::SurfaceIndex) != size)
404         {
405             CM_ASSERTMESSAGE("Error: Invalid kernel arg size.");
406             return CM_INVALID_ARG_SIZE;
407         }
408 
409         for (uint32_t i = 0; i < len; i++)
410         {
411             uint32_t index = samplerIndexes[i].get_data();
412             MHW_SAMPLER_STATE_PARAM *temp = (MHW_SAMPLER_STATE_PARAM *)GetSamplerParam(index);
413             *(MHW_SAMPLER_STATE_PARAM **)(m_data + m_flatArgs[start + i].offset) = temp;
414         }
415     }
416     else
417     {
418         if (size != m_flatArgs[start].unitSize)
419         {
420             CM_ASSERTMESSAGE("Error: Invalid kernel arg size.");
421             return CM_INVALID_ARG_SIZE;
422         }
423         CmSafeMemCopy((void *)(m_data + m_flatArgs[start].offset), value, size);
424     }
425     return CM_SUCCESS;
426 }
427 
ToArgKind(CmSurface * surface)428 CM_ARG_KIND CmKernelEx::ToArgKind(CmSurface *surface)
429 {
430     switch(surface->Type())
431     {
432         case CM_ENUM_CLASS_TYPE_CMBUFFER_RT:
433             return ARG_KIND_SURFACE_1D;
434         case CM_ENUM_CLASS_TYPE_CMSURFACE2D:
435             return ARG_KIND_SURFACE_2D;
436         case CM_ENUM_CLASS_TYPE_CMSURFACE2DUP:
437             return ARG_KIND_SURFACE_2D_UP;
438         case CM_ENUM_CLASS_TYPE_CMSURFACE3D:
439             return ARG_KIND_SURFACE_3D;
440         case CM_ENUM_CLASS_TYPE_CMSURFACESAMPLER:
441         {
442             CmSurfaceSampler* surfSampler = static_cast <CmSurfaceSampler *> (surface);
443             SAMPLER_SURFACE_TYPE type;
444             surfSampler->GetSurfaceType(type);
445             if (type == SAMPLER_SURFACE_TYPE_2D)
446             {
447                 return ARG_KIND_SURFACE_SAMPLER;
448             }
449             else if (type == SAMPLER_SURFACE_TYPE_2DUP)
450             {
451                 return ARG_KIND_SURFACE2DUP_SAMPLER;
452             }
453             else
454             {
455                 return ARG_KIND_SURFACE_3D;
456             }
457         }
458         case CM_ENUM_CLASS_TYPE_CMSURFACESAMPLER8X8:
459         {
460             CmSurfaceSampler8x8* surfSampler8x8 = static_cast <CmSurfaceSampler8x8 *> (surface);
461             if (surfSampler8x8->GetSampler8x8SurfaceType() == CM_VA_SURFACE)
462             {
463                 return ARG_KIND_SURFACE_SAMPLER8X8_VA;
464             }
465             else
466             {
467                 return ARG_KIND_SURFACE_SAMPLER8X8_AVS;
468             }
469         }
470         case CM_ENUM_CLASS_TYPE_CMSURFACEVME:
471             return ARG_KIND_SURFACE_VME;
472         case CM_ENUM_CLASS_TYPE_CM_STATE_BUFFER:
473             return ARG_KIND_STATE_BUFFER;
474         default:
475             return ARG_KIND_GENERAL;
476     }
477 }
478 
GetSurfaceState(CmSurface * surface,uint32_t index)479 CmSurfaceState* CmKernelEx::GetSurfaceState(CmSurface *surface, uint32_t index)
480 {
481     CM_HAL_STATE *cmHalState = ((PCM_CONTEXT_DATA)m_device->GetAccelData())->cmHalState;
482     uint32_t surfaceArraySize = 0;
483     m_surfaceMgr->GetSurfaceArraySize(surfaceArraySize);
484     CM_CHK_COND_RETURN((surfaceArraySize == 0), nullptr, "Surface Array is empty.");
485     uint32_t aliasIndex = index/surfaceArraySize;
486 
487     switch (surface->Type())
488     {
489         case CM_ENUM_CLASS_TYPE_CMSURFACE2D:
490         {
491             CmSurface2DRT* surf2D = static_cast<CmSurface2DRT*>(surface);
492             uint32_t halIndex = 0;
493             surf2D->GetIndexFor2D(halIndex);
494             PCM_HAL_SURFACE2D_SURFACE_STATE_PARAM surfStateParam = nullptr;
495             if (aliasIndex > 0 || cmHalState->umdSurf2DTable[halIndex].surfStateSet)
496             {
497                 surfStateParam = &(cmHalState->umdSurf2DTable[halIndex].surfaceStateParam[aliasIndex]);
498             }
499             return cmHalState->umdSurf2DTable[halIndex].surfStateMgr->GetSurfaceState(0, 0, surfStateParam);
500         }
501         case CM_ENUM_CLASS_TYPE_CMSURFACE2DUP:
502         {
503             CmSurface2DUPRT* surf2DUP = static_cast<CmSurface2DUPRT*>(surface);
504             uint32_t halIndex = 0;
505             surf2DUP->GetHandle(halIndex);
506             return cmHalState->surf2DUPTable[halIndex].surfStateMgr->GetSurfaceState();
507         }
508         case CM_ENUM_CLASS_TYPE_CMBUFFER_RT:
509         {
510             CmBuffer_RT* surf1D = static_cast<CmBuffer_RT*>(surface);
511             uint32_t halIndex = 0;
512             surf1D->GetHandle(halIndex);
513             CM_HAL_BUFFER_SURFACE_STATE_ENTRY *surfStateParam = nullptr;
514             if (aliasIndex > 0 || cmHalState->bufferTable[halIndex].surfStateSet)
515             {
516                 surfStateParam = &(cmHalState->bufferTable[halIndex].surfaceStateEntry[aliasIndex]);
517             }
518             return cmHalState->bufferTable[halIndex].surfStateMgr->GetSurfaceState(surfStateParam);
519         }
520         case CM_ENUM_CLASS_TYPE_CMSURFACE3D:
521         {
522             CmSurface3DRT *surf3D = static_cast<CmSurface3DRT *>(surface);
523             uint32_t halIndex = 0;
524             surf3D->GetHandle(halIndex);
525             return cmHalState->surf3DTable[halIndex].surfStateMgr->GetSurfaceState(0, 1);
526         }
527         case CM_ENUM_CLASS_TYPE_CMSURFACEVME:
528         {
529             CmSurfaceVme *surfVme = static_cast<CmSurfaceVme*>(surface);
530             CmSurfaceStateVME *surfState = surfVme->GetSurfaceState();
531             if (surfState == nullptr)
532             {
533                 int argSize = surfVme->GetVmeCmArgSize();
534                 int surfCount = surfVme->GetTotalSurfacesCount();
535 
536                 uint8_t *vmeValue = MOS_NewArray(uint8_t, argSize);
537                 if (vmeValue == nullptr)
538                 {
539                     return nullptr;
540                 }
541                 uint16_t surfIndexes[17];
542                 SetArgsSingleVme(surfVme, vmeValue, surfIndexes);
543                 surfState = MOS_New(CmSurfaceStateVME, cmHalState);
544                 if (surfState == nullptr)
545                 {
546                     MOS_DeleteArray(vmeValue);
547                     return nullptr;
548                 }
549                 surfState->Initialize((CM_HAL_VME_ARG_VALUE *)vmeValue);
550 
551                 surfVme->SetSurfState(cmHalState->advExecutor, vmeValue, surfState); // set for destroy later
552             }
553             return surfState;
554         }
555         case CM_ENUM_CLASS_TYPE_CMSURFACESAMPLER:
556         {
557             uint32_t halIndex = 0;
558             uint16_t cmIndex = 0;
559             CmSurfaceSampler* surfSampler = static_cast <CmSurfaceSampler *> (surface);
560             surfSampler->GetHandle(halIndex);
561             surfSampler->GetCmIndexCurrent(cmIndex);
562             SAMPLER_SURFACE_TYPE type;
563             surfSampler->GetSurfaceType(type);
564             switch (type)
565             {
566                 case SAMPLER_SURFACE_TYPE_2D:
567                 {
568                     // re-calculate the aliasIndex
569                     aliasIndex = cmIndex/surfaceArraySize;
570 
571                     PCM_HAL_SURFACE2D_SURFACE_STATE_PARAM surfStateParam = nullptr;
572                     if (aliasIndex > 0 || cmHalState->umdSurf2DTable[halIndex].surfStateSet)
573                     {
574                         surfStateParam = &(cmHalState->umdSurf2DTable[halIndex].surfaceStateParam[aliasIndex]);
575                     }
576                     return cmHalState->umdSurf2DTable[halIndex].surfStateMgr->GetSurfaceState(0, 1, surfStateParam);
577                 }
578                 case SAMPLER_SURFACE_TYPE_2DUP:
579                 {
580                     return cmHalState->surf2DUPTable[halIndex].surfStateMgr->GetSurfaceState(0, 1);
581                 }
582                 case SAMPLER_SURFACE_TYPE_3D:
583                 {
584                     return cmHalState->surf3DTable[halIndex].surfStateMgr->GetSurfaceState(0, 1);
585                 }
586                 default:
587                 {
588                     break;
589                 }
590             }
591             break;
592         }
593         case CM_ENUM_CLASS_TYPE_CMSURFACESAMPLER8X8:
594         {
595             CmSurfaceSampler8x8* surfSampler8x8 = static_cast <CmSurfaceSampler8x8 *> (surface);
596             uint32_t halIndex = 0;
597             uint16_t cmIndex = 0;
598 
599             surfSampler8x8->GetIndexCurrent(halIndex);
600             surfSampler8x8->GetCmIndex(cmIndex);
601             // re-calculate the aliasIndex
602             aliasIndex = cmIndex/surfaceArraySize;
603 
604             PCM_HAL_SURFACE2D_SURFACE_STATE_PARAM surfStateParam = nullptr;
605             if (aliasIndex > 0 || cmHalState->umdSurf2DTable[halIndex].surfStateSet)
606             {
607                 surfStateParam = &(cmHalState->umdSurf2DTable[halIndex].surfaceStateParam[aliasIndex]);
608             }
609             return cmHalState->umdSurf2DTable[halIndex].surfStateMgr->GetSurfaceState(1, 1, surfStateParam);
610         }
611         default: //not implemented yet
612             return nullptr;
613 
614     }
615     return nullptr;
616 }
617 
GetMaxBteNum()618 uint32_t CmKernelEx::GetMaxBteNum()
619 {
620     uint32_t bteCount = 0;
621     for (uint32_t i = 0; i < m_flatArgCount; i++)
622     {
623         if (IsSurface(m_flatArgs[i].kind))
624         {
625             CmSurfaceState *surfState = *(CmSurfaceState **)(m_data + m_flatArgs[i].offset);
626             if (surfState == nullptr) //CM_NULL_SURFACE
627             {
628                 continue;
629             }
630             bteCount += surfState->GetNumBte();
631         }
632     }
633     return bteCount;
634 }
635 
UpdateCurbe(CmSSH * ssh,CmMediaState * mediaState,uint32_t kernelIdx)636 MOS_STATUS CmKernelEx::UpdateCurbe(CmSSH *ssh, CmMediaState *mediaState, uint32_t kernelIdx)
637 {
638     for (uint32_t i = 0; i < m_flatArgCount; i++)
639     {
640         if (IsSurface(m_flatArgs[i].kind))
641         {
642             CmSurface *surface = *(CmSurface **)(m_surfaceInArg + m_flatArgs[i].offset);
643             if (surface != nullptr && m_propertyIndexes[i] != surface->GetPropertyIndex())
644             {
645                 // need to update the surface state
646                 CmSurfaceState *temp = GetSurfaceState(surface, m_cmSurfIndexes[i]);
647                 m_propertyIndexes[i] = surface->GetPropertyIndex();
648                 *(CmSurfaceState **)(m_data + m_flatArgs[i].offset) = temp;
649             }
650             CmSurfaceState *surfState = *(CmSurfaceState **)(m_data + m_flatArgs[i].offset);
651             if (surfState == nullptr)
652             {
653                 continue;
654             }
655             uint32_t bteIdx = ssh->AddSurfaceState(surfState);
656             *(uint32_t *)(m_curbe + m_flatArgs[i].payloadOffset) = bteIdx;
657         }
658         else if (m_flatArgs[i].kind == ARG_KIND_SAMPLER)
659         {
660             MHW_SAMPLER_STATE_PARAM *param = *(MHW_SAMPLER_STATE_PARAM **)(m_data + m_flatArgs[i].offset);
661             uint32_t bteIdx = mediaState->AddSampler(param, kernelIdx);
662             *(uint32_t *)(m_curbe + m_flatArgs[i].payloadOffset) = bteIdx;
663         }
664         else if (m_flatArgs[i].kind != ARG_KIND_IMPLICT_LOCALSIZE
665                  && m_flatArgs[i].kind != ARG_KIND_IMPLICT_GROUPSIZE
666                  && m_flatArgs[i].kind != ARG_KIND_IMPLICIT_LOCALID)
667         {
668             MOS_SecureMemcpy(m_curbe + m_flatArgs[i].payloadOffset, m_flatArgs[i].sizeInCurbe,
669                 m_data + m_flatArgs[i].offset, m_flatArgs[i].unitSize);
670         }
671     }
672 
673     // dump
674     /*
675     for (int i = 0; i < m_curbeSize/4; i++)
676     {
677         printf("0x%x, ", *((uint32_t *)m_curbe + i));
678     }
679     printf("\n");
680     */
681     return MOS_STATUS_SUCCESS;
682 }
683 
UpdateFastTracker(uint32_t trackerIndex,uint32_t tracker)684 MOS_STATUS CmKernelEx::UpdateFastTracker(uint32_t trackerIndex, uint32_t tracker)
685 {
686     for (uint32_t i = 0; i < m_flatArgCount; i++)
687     {
688         if (IsSurface(m_flatArgs[i].kind))
689         {
690             CmSurface *surface = *(CmSurface **)(m_surfaceInArg + m_flatArgs[i].offset);
691             if (surface == nullptr)
692             {
693                 continue;
694             }
695             surface->SetFastTracker(trackerIndex, tracker);
696         }
697     }
698     return MOS_STATUS_SUCCESS;
699 }
700 
701 
UpdateSWSBArgs(CmThreadSpaceRT * threadSpace)702 MOS_STATUS CmKernelEx::UpdateSWSBArgs(CmThreadSpaceRT *threadSpace)
703 {
704     CmThreadSpaceRT *ts = (threadSpace == nullptr)?m_threadSpace:threadSpace;
705     if (ts == nullptr)
706     {
707         return MOS_STATUS_SUCCESS;
708     }
709     int ret = ts->SetDependencyArgToKernel(this);
710     return (ret == 0)? MOS_STATUS_SUCCESS : MOS_STATUS_UNKNOWN;
711 }
712 
SetStaticBuffer(uint32_t index,const void * value)713 int32_t CmKernelEx::SetStaticBuffer(uint32_t index, const void *value)
714 {
715     CM_CHK_CMSTATUS_RETURN(CmKernelRT::SetStaticBuffer(index, value));
716 
717     if(index >= CM_GLOBAL_SURFACE_NUMBER)
718     {
719         CM_ASSERTMESSAGE("Error: Surface Index exceeds max global surface number.");
720         return CM_INVALID_GLOBAL_BUFFER_INDEX;
721     }
722 
723     if(!value)
724     {
725         CM_ASSERTMESSAGE("Error: Invalid StaticBuffer arg value.");
726         return CM_INVALID_BUFFER_HANDLER;
727     }
728 
729     SurfaceIndex* surfIndex = (SurfaceIndex* )value;
730     uint32_t indexData = surfIndex->get_data();
731 
732     CmSurface* surface = nullptr;
733     m_surfaceMgr->GetSurface(indexData, surface);
734     if (surface != nullptr)
735     {
736         // for gen9+ platforms, index + 1 is the BTI
737         m_reservedSurfaceBteIndexes[index + CM_GLOBAL_SURFACE_INDEX_START_GEN9_PLUS]
738                                                 = GetSurfaceState(surface, indexData);
739     }
740     return CM_SUCCESS;
741 }
742 
SetSurfaceBTI(SurfaceIndex * surfIndex,uint32_t bti)743 int32_t CmKernelEx::SetSurfaceBTI(SurfaceIndex *surfIndex, uint32_t bti)
744 {
745     CM_CHK_CMSTATUS_RETURN(CmKernelRT::SetSurfaceBTI(surfIndex, bti));
746 
747     CM_CHK_NULL_RETURN_CMERROR(surfIndex);
748     uint32_t index = surfIndex->get_data();
749 
750     CmSurface* surface = nullptr;
751     m_surfaceMgr->GetSurface(index, surface);
752     if (surface != nullptr)
753     {
754         m_reservedSurfaceBteIndexes[bti] = GetSurfaceState(surface, index);
755     }
756     return CM_SUCCESS;
757 }
758 
SetSamplerBTI(SamplerIndex * sampler,uint32_t nIndex)759 int32_t CmKernelEx::SetSamplerBTI(SamplerIndex* sampler, uint32_t nIndex)
760 {
761     CM_CHK_CMSTATUS_RETURN(CmKernelRT::SetSamplerBTI(sampler, nIndex));
762 
763     uint32_t index = sampler->get_data();
764     m_reservedSamplerBteIndexes[nIndex] = (MHW_SAMPLER_STATE_PARAM *)GetSamplerParam(index);
765     return MOS_STATUS_SUCCESS;
766 }
767 
LoadReservedSurfaces(CmSSH * ssh)768 MOS_STATUS CmKernelEx::LoadReservedSurfaces(CmSSH *ssh)
769 {
770     for (auto it = m_reservedSurfaceBteIndexes.begin(); it != m_reservedSurfaceBteIndexes.end(); ++ it)
771     {
772         ssh->AddSurfaceState(it->second, it->first);
773     }
774 
775     // reset the table in legacy kernel for bti reuse
776     if (m_usKernelPayloadSurfaceCount)
777     {
778         CmSafeMemSet(m_IndirectSurfaceInfoArray, 0, m_usKernelPayloadSurfaceCount * sizeof(CM_INDIRECT_SURFACE_INFO));
779         m_usKernelPayloadSurfaceCount = 0;
780     }
781     return MOS_STATUS_SUCCESS;
782 }
783 
LoadReservedSamplers(CmMediaState * mediaState,uint32_t kernelIdx)784 MOS_STATUS CmKernelEx::LoadReservedSamplers(CmMediaState *mediaState, uint32_t kernelIdx)
785 {
786     for (auto it = m_reservedSamplerBteIndexes.begin(); it != m_reservedSamplerBteIndexes.end(); ++ it)
787     {
788         mediaState->AddSampler((MHW_SAMPLER_STATE_PARAM *)it->second, kernelIdx, it->first);
789     }
790     return MOS_STATUS_SUCCESS;
791 }
792 
GetSamplerParam(uint32_t index)793 void* CmKernelEx::GetSamplerParam(uint32_t index)
794 {
795     CM_HAL_STATE *cmHalState = ((PCM_CONTEXT_DATA)m_device->GetAccelData())->cmHalState;
796     return (void *)&cmHalState->samplerTable[index];
797 }
798 
GetSamplerCount(uint32_t * count3D,uint32_t * countAVS)799 MOS_STATUS CmKernelEx::GetSamplerCount(uint32_t *count3D, uint32_t *countAVS)
800 {
801     *count3D = 0;
802     *countAVS = 0;
803     for (uint32_t i = 0; i < m_flatArgCount; i++)
804     {
805         if (m_flatArgs[i].kind == ARG_KIND_SAMPLER)
806         {
807             MHW_SAMPLER_STATE_PARAM *temp = *(MHW_SAMPLER_STATE_PARAM **)(m_data + m_flatArgs[i].offset);
808             if (temp->SamplerType == MHW_SAMPLER_TYPE_3D)
809             {
810                 ++ (*count3D);
811             }
812             else if (temp->SamplerType == MHW_SAMPLER_TYPE_AVS)
813             {
814                 ++ (*countAVS);
815             }
816             else
817             {
818                 // only support 3D and AVS samplers by now in fast path
819                 return MOS_STATUS_INVALID_PARAMETER;
820             }
821         }
822     }
823     return MOS_STATUS_SUCCESS;
824 }
825 
GetThreadSpaceEx()826 CmThreadSpaceRT* CmKernelEx::GetThreadSpaceEx()
827 {
828     int status = CM_SUCCESS;
829     if (m_threadSpace)
830     {
831         return m_threadSpace;
832     }
833     if (m_dummyThreadSpace)
834     {
835         status = m_device->DestroyThreadSpace(m_dummyThreadSpace);
836         if (status != CM_SUCCESS)
837         {
838             CM_ASSERTMESSAGE("Error: Failed to destroy thread space data.");
839         }
840     }
841     if (m_threadCount)
842     {
843         status = m_device->CreateThreadSpace(m_threadCount, 1, m_dummyThreadSpace);
844         if (status != CM_SUCCESS)
845         {
846             CM_ASSERTMESSAGE("Error: Failed to create thread space data.");
847         }
848     }
849     return static_cast<CmThreadSpaceRT *>(m_dummyThreadSpace);
850 }
851 
GetThreadGroupSpaceEx()852 CmThreadGroupSpace* CmKernelEx::GetThreadGroupSpaceEx()
853 {
854     if (m_threadGroupSpace)
855     {
856         return m_threadGroupSpace;
857     }
858     if (m_dummyThreadGroupSpace)
859     {
860         m_device->DestroyThreadGroupSpace(m_dummyThreadGroupSpace);
861     }
862 
863     if (m_threadCount)
864     {
865         m_device->CreateThreadGroupSpace(1, 1, m_threadCount, 1, m_dummyThreadGroupSpace);
866     }
867     return m_dummyThreadGroupSpace;
868 }
869 
SurfaceDumpEx(uint32_t kernelNumber,int32_t taskId)870 void CmKernelEx::SurfaceDumpEx(uint32_t kernelNumber, int32_t taskId)
871 {
872     for(uint32_t argIdx = 0; argIdx < m_argCount; argIdx++)
873     {
874         uint32_t start = m_indexMap[argIdx];
875         uint32_t len = m_indexMap[argIdx + 1] - start;
876 
877         for (uint32_t v = 0; v < len; v ++)
878         {
879             uint32_t i = start + v;
880             if (IsSurface(m_flatArgs[i].kind))
881             {
882                 CmSurface *surface = *(CmSurface **)(m_surfaceInArg + m_flatArgs[i].offset);
883                 if (surface == nullptr)
884                 {
885                     continue;
886                 }
887                 surface->DumpContent(kernelNumber, m_kernelInfo->kernelName, taskId, argIdx, v);
888             }
889         }
890     }
891 }
892 
IsFastPathSupported()893 bool CmKernelEx::IsFastPathSupported()
894 {
895     // current fast path doesn't support media object
896     bool specialDependency = false;
897     if (m_threadSpace)
898     {
899         CM_DEPENDENCY_PATTERN dependencyPatternType = CM_NONE_DEPENDENCY;
900         m_threadSpace->GetDependencyPatternType(dependencyPatternType);
901         specialDependency = (dependencyPatternType == CM_WAVEFRONT26Z || dependencyPatternType == CM_WAVEFRONT26ZI);
902     }
903 
904     return !(m_perThreadArgExists || specialDependency);
905 }
906 
907