1 /*
2 * Copyright (c) 2018-2019, Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22 //!
23 //! \file cm_kernel_ex.cpp
24 //! \brief Contains Class CmKernelEx definitions
25 //!
26
27 #include "cm_kernel_ex.h"
28 #include "cm_surface.h"
29 #include "cm_surface_manager.h"
30 #include "cm_surface_sampler8x8.h"
31 #include "cm_surface_sampler.h"
32 #include "cm_mem.h"
33 #include "cm_surface_2d_rt.h"
34 #include "cm_surface_2d_up_rt.h"
35 #include "cm_surface_3d_rt.h"
36 #include "cm_buffer_rt.h"
37 #include "cm_device_rt.h"
38 #include "cm_hal.h"
39 #include "cm_surface_state.h"
40 #include "cm_surface_state_manager.h"
41 #include "cm_surface_vme.h"
42 #include "cm_ssh.h"
43 #include "cm_thread_space_rt.h"
44 #include "cm_surface_sampler.h"
45 #include "cm_media_state.h"
46
47 #include "mhw_state_heap.h"
48
49 using namespace CMRT_UMD;
50
~CmKernelEx()51 CmKernelEx::~CmKernelEx()
52 {
53 if (m_dummyThreadSpace)
54 {
55 m_device->DestroyThreadSpace(m_dummyThreadSpace);
56 }
57 if (m_dummyThreadGroupSpace)
58 {
59 m_device->DestroyThreadGroupSpace(m_dummyThreadGroupSpace);
60 }
61 MOS_DeleteArray(m_indexMap);
62 MOS_DeleteArray(m_flatArgs);
63 MOS_DeleteArray(m_propertyIndexes);
64 MOS_DeleteArray(m_cmSurfIndexes);
65 MOS_DeleteArray(m_data);
66 MOS_DeleteArray(m_surfaceInArg);
67 MOS_DeleteArray(m_curbe);
68 }
69
Initialize(const char * kernelName,const char * options)70 int32_t CmKernelEx::Initialize(const char *kernelName, const char *options)
71 {
72 int ret = CmKernelRT::Initialize(kernelName, options);
73 if (ret != CM_SUCCESS)
74 {
75 return ret;
76 }
77
78 m_indexMap = MOS_NewArray(uint32_t, (m_argCount+1));
79 CM_CHK_NULL_RETURN_CMERROR(m_indexMap);
80 MOS_ZeroMemory(m_indexMap, (m_argCount+1)*sizeof(uint32_t));
81 m_flatArgCount= 0;
82 bool isGpgpuKernel = false;
83 uint32_t minPayload = 0;
84 for (uint32_t i = 0; i < m_argCount; i++)
85 {
86 if (ArgArraySupported(m_args[i].unitKind))
87 {
88 int numSurfaces = m_args[i].unitSize/sizeof(int);
89 m_flatArgCount += numSurfaces;
90 }
91 else
92 {
93 ++m_flatArgCount;
94 }
95
96 if (!isGpgpuKernel &&
97 ( m_args[i].unitKind == CM_ARGUMENT_IMPLICT_LOCALSIZE
98 ||m_args[i].unitKind == CM_ARGUMENT_IMPLICT_GROUPSIZE
99 ||m_args[i].unitKind == CM_ARGUMENT_IMPLICIT_LOCALID))
100 {
101 isGpgpuKernel = true;
102 }
103 if (i == 0 || (m_args[i].unitKind != CM_ARGUMENT_IMPLICIT_LOCALID && minPayload > m_args[i].unitOffsetInPayload))
104 {
105 minPayload = m_args[i].unitOffsetInPayload;
106 }
107 }
108
109 if (!isGpgpuKernel)
110 {
111 minPayload = CM_PAYLOAD_OFFSET;
112 }
113
114 if (m_flatArgCount == 0)
115 {
116 return CM_SUCCESS;
117 }
118
119 m_flatArgs = MOS_NewArray(_CmArg, m_flatArgCount);
120 CM_CHK_NULL_RETURN_CMERROR(m_flatArgs);
121 MOS_ZeroMemory(m_flatArgs, m_flatArgCount * sizeof(_CmArg));
122 m_propertyIndexes = MOS_NewArray(uint8_t, m_flatArgCount);
123 CM_CHK_NULL_RETURN_CMERROR(m_propertyIndexes);
124 MOS_ZeroMemory(m_propertyIndexes, m_flatArgCount);
125 m_cmSurfIndexes = MOS_NewArray(uint32_t, m_flatArgCount);
126 CM_CHK_NULL_RETURN_CMERROR(m_cmSurfIndexes);
127 MOS_ZeroMemory(m_cmSurfIndexes, m_flatArgCount * sizeof(uint32_t));
128
129 int j = 0;
130 uint32_t offset = 0; //offset in the local buffer
131 int localIDIndex = -1;
132 for (uint32_t i = 0; i < m_argCount; i++)
133 {
134 if (ArgArraySupported(m_args[i].unitKind))
135 {
136 m_indexMap[i] = j;
137 int numSurfaces = m_args[i].unitSize/sizeof(int);
138 for (int k = 0; k < numSurfaces; k ++)
139 {
140 m_flatArgs[j].isaKind = m_args[i].unitKind;
141 m_flatArgs[j].kind = m_args[i].unitKind;
142 m_flatArgs[j].unitSize = sizeof(void *); // we can either store the pointer to CmSurfaceState or pointer to mos_resource here
143 m_flatArgs[j].payloadOffset = m_args[i].unitOffsetInPayload + k*4 - minPayload; //each bte index has 4 bytes
144 m_flatArgs[j].offset = offset;
145 m_flatArgs[j].sizeInCurbe = 4;
146 offset += m_flatArgs[j].unitSize;
147
148 // update curbe size
149 if (m_explicitCurbeSize < (uint32_t)(m_flatArgs[j].payloadOffset + m_flatArgs[j].sizeInCurbe))
150 {
151 m_explicitCurbeSize = m_flatArgs[j].payloadOffset + m_flatArgs[j].sizeInCurbe;
152 }
153 ++ j;
154 }
155 }
156 else
157 {
158 m_indexMap[i] = j;
159 m_flatArgs[j].isaKind = m_args[i].unitKind;
160 m_flatArgs[j].kind = m_args[i].unitKind;
161 m_flatArgs[j].unitSize = m_args[i].unitSize;
162 m_flatArgs[j].payloadOffset = m_args[i].unitOffsetInPayload - minPayload;
163 m_flatArgs[j].offset = offset;
164 m_flatArgs[j].sizeInCurbe = m_flatArgs[j].unitSize;
165 offset += m_flatArgs[j].unitSize;
166
167 // update curbe size
168 if (m_args[i].unitKind == CM_ARGUMENT_IMPLICIT_LOCALID)
169 {
170 localIDIndex = j;
171 }
172 else
173 {
174 if (m_explicitCurbeSize < (uint32_t)(m_flatArgs[j].payloadOffset + m_flatArgs[j].sizeInCurbe))
175 {
176 m_explicitCurbeSize = m_flatArgs[j].payloadOffset + m_flatArgs[j].sizeInCurbe;
177 }
178 }
179 ++ j;
180 }
181 m_indexMap[m_argCount] = j;
182 }
183
184 // adjust the payload of local id
185 if (localIDIndex >= 0)
186 {
187 m_flatArgs[localIDIndex].payloadOffset = MOS_ALIGN_CEIL(m_explicitCurbeSize, 32);
188 }
189
190 m_data = MOS_NewArray(uint8_t, offset);
191 CM_CHK_NULL_RETURN_CMERROR(m_data);
192 m_surfaceInArg = MOS_NewArray(uint8_t, offset);
193 CM_CHK_NULL_RETURN_CMERROR(m_surfaceInArg);
194 MOS_ZeroMemory(m_data, sizeof(uint8_t)*offset);
195 MOS_ZeroMemory(m_surfaceInArg, sizeof(uint8_t)*offset);
196
197 m_hashValue = m_kernelInfo->hashValue;
198
199 return CM_SUCCESS;
200 }
201
AllocateCurbe()202 MOS_STATUS CmKernelEx::AllocateCurbe()
203 {
204 MOS_DeleteArray(m_curbe);
205 if (m_explicitCurbeSize > 0)
206 {
207 m_curbeSize = MOS_ALIGN_CEIL(m_explicitCurbeSize, 64);
208 m_curbeSizePerThread = m_curbeSize;
209 m_curbeSizeCrossThread = 0;
210 m_curbe = MOS_NewArray(uint8_t, m_curbeSize);
211 CM_CHK_NULL_RETURN_MOSERROR(m_curbe);
212 MOS_ZeroMemory(m_curbe, m_curbeSize);
213 }
214 return MOS_STATUS_SUCCESS;
215 }
216
AllocateCurbeAndFillImplicitArgs(CmThreadGroupSpace * globalGroupSpace)217 MOS_STATUS CmKernelEx::AllocateCurbeAndFillImplicitArgs(CmThreadGroupSpace *globalGroupSpace)
218 {
219 CmThreadGroupSpace *tgs = (globalGroupSpace == nullptr)?m_threadGroupSpace:globalGroupSpace;
220
221 uint32_t thrdSpaceWidth = 0;
222 uint32_t thrdSpaceHeight = 0;
223 uint32_t thrdSpaceDepth = 0;
224 uint32_t grpSpaceWidth = 0;
225 uint32_t grpSpaceHeight = 0;
226 uint32_t grpSpaceDepth = 0;
227
228 if (tgs)
229 {
230 tgs->GetThreadGroupSpaceSize(thrdSpaceWidth, thrdSpaceHeight, thrdSpaceDepth, grpSpaceWidth, grpSpaceHeight, grpSpaceDepth);
231 }
232
233 MOS_DeleteArray(m_curbe);
234 m_curbeSizePerThread = (m_explicitCurbeSize%32 == 4)? 64:32;
235 m_curbeSizeCrossThread = MOS_ALIGN_CEIL(m_explicitCurbeSize, 32);
236 m_curbeSize = m_curbeSizeCrossThread + m_curbeSizePerThread * thrdSpaceWidth * thrdSpaceHeight * thrdSpaceDepth;
237 m_curbeSize = MOS_ALIGN_CEIL(m_curbeSize, 64);
238 m_curbe = MOS_NewArray(uint8_t, m_curbeSize);
239 CM_CHK_NULL_RETURN_MOSERROR(m_curbe);
240 MOS_ZeroMemory(m_curbe, m_curbeSize);
241
242 int localIdPayload = -1;
243 int groupSizePayload = -1;
244 int localSizePayload = -1;
245
246 for (uint32_t i = 0; i < m_flatArgCount; i++)
247 {
248 if (m_flatArgs[i].kind == ARG_KIND_IMPLICT_LOCALSIZE)
249 localSizePayload = m_flatArgs[i].payloadOffset;
250 if (m_flatArgs[i].kind == ARG_KIND_IMPLICT_GROUPSIZE)
251 groupSizePayload = m_flatArgs[i].payloadOffset;
252 if (m_flatArgs[i].kind == ARG_KIND_IMPLICIT_LOCALID)
253 localIdPayload = m_flatArgs[i].payloadOffset;
254 }
255
256 // set group size implicit args
257 if (groupSizePayload >= 0)
258 {
259 *(uint32_t *)(m_curbe + groupSizePayload) = grpSpaceWidth;
260 *(uint32_t *)(m_curbe + groupSizePayload + 4) = grpSpaceHeight;
261 *(uint32_t *)(m_curbe + groupSizePayload + 8) = grpSpaceDepth;
262 }
263
264 // set local size implicit args
265 if (localSizePayload >= 0)
266 {
267 *(uint32_t *)(m_curbe + localSizePayload) = thrdSpaceWidth;
268 *(uint32_t *)(m_curbe + localSizePayload + 4) = thrdSpaceHeight;
269 *(uint32_t *)(m_curbe + localSizePayload + 8) = thrdSpaceDepth;
270 }
271
272 // set local id data per thread
273 if (localIdPayload >= 0)
274 {
275 int offset = localIdPayload;
276 for (uint32_t idZ = 0; idZ < thrdSpaceDepth; idZ++)
277 {
278 for (uint32_t idY = 0; idY < thrdSpaceHeight; idY++)
279 {
280 for (uint32_t idX = 0; idX < thrdSpaceWidth; idX++)
281 {
282 *(uint32_t *)(m_curbe + offset) = idX;
283 *(uint32_t *)(m_curbe + offset + 4) = idY;
284 *(uint32_t *)(m_curbe + offset + 8) = idZ;
285 offset += m_curbeSizePerThread;
286 }
287 }
288 }
289 }
290
291 return MOS_STATUS_SUCCESS;
292 }
293
IsSurface(uint16_t kind)294 bool CmKernelEx::IsSurface(uint16_t kind)
295 {
296 switch (kind)
297 {
298 case ARG_KIND_SURFACE:
299 case ARG_KIND_SURFACE_1D:
300 case ARG_KIND_SURFACE_2D:
301 case ARG_KIND_SURFACE_2D_UP:
302 case ARG_KIND_SURFACE_SAMPLER:
303 case ARG_KIND_SURFACE2DUP_SAMPLER:
304 case ARG_KIND_SURFACE_3D:
305 case ARG_KIND_SURFACE_SAMPLER8X8_AVS:
306 case ARG_KIND_SURFACE_SAMPLER8X8_VA:
307 case ARG_KIND_SURFACE_2D_SCOREBOARD:
308 case ARG_KIND_STATE_BUFFER:
309 case ARG_KIND_SURFACE_VME:
310 return true;
311 default:
312 return false;
313 }
314 return false;
315 }
316
SetKernelArg(uint32_t index,size_t size,const void * value)317 int32_t CmKernelEx::SetKernelArg(uint32_t index, size_t size, const void * value)
318 {
319 if (!m_blCreatingGPUCopyKernel) // gpucopy kernels only executed by fastpath, no need to set legacy kernels
320 {
321 CmKernelRT::SetKernelArg(index, size, value);
322 }
323 if( index >= m_argCount )
324 {
325 CM_ASSERTMESSAGE("Error: Invalid kernel arg count.");
326 return CM_INVALID_ARG_INDEX;
327
328 }
329
330 if( !value)
331 {
332 CM_ASSERTMESSAGE("Error: Invalid kernel arg value.");
333 return CM_INVALID_ARG_VALUE;
334 }
335
336 if( size == 0)
337 {
338 CM_ASSERTMESSAGE("Error: Invalid kernel arg size.");
339 return CM_INVALID_ARG_SIZE;
340 }
341
342 uint32_t start = m_indexMap[index];
343 uint32_t len = m_indexMap[index + 1] - start;
344
345 if (IsSurface(m_flatArgs[start].isaKind))
346 {
347 CMRT_UMD::SurfaceIndex *surfIndexes = (CMRT_UMD::SurfaceIndex *)value;
348 if (surfIndexes == (CMRT_UMD::SurfaceIndex *)CM_NULL_SURFACE)
349 {
350 for (uint32_t i = 0; i < len; i++)
351 {
352 *(void **)(m_data + m_flatArgs[start + i].offset) = nullptr;
353 *(void **)(m_surfaceInArg + m_flatArgs[start + i].offset) = nullptr;
354 m_flatArgs[start + i].isSet = true;
355 }
356 return CM_SUCCESS;
357 }
358 // sanity check
359 if (len * sizeof(CMRT_UMD::SurfaceIndex) != size)
360 {
361 CM_ASSERTMESSAGE("Error: Invalid kernel arg size.");
362 return CM_INVALID_ARG_SIZE;
363 }
364
365 for (uint32_t i = 0; i < len; i++)
366 {
367 uint32_t index = surfIndexes[i].get_data();
368
369 m_flatArgs[start + i].isSet = true;
370 if (index == CM_NULL_SURFACE)
371 {
372 *(void **)(m_data + m_flatArgs[start+i].offset) = nullptr;
373 *(void **)(m_surfaceInArg + m_flatArgs[start+i].offset) = nullptr;
374 }
375 else
376 {
377 CmSurface* surface = nullptr;
378 m_surfaceMgr->GetSurface(index, surface);
379 if (nullptr == surface)
380 {
381 *(void **)(m_data + m_flatArgs[start+i].offset) = nullptr;
382 *(void **)(m_surfaceInArg + m_flatArgs[start+i].offset) = nullptr;
383 }
384 else
385 {
386 m_flatArgs[start + i].kind = ToArgKind(surface);
387
388 // get the CmSurfaceState from the surface index, this will be changed if surfmgr optimized
389 // most likely, this will be moved to CmSurface
390 CmSurfaceState *temp = GetSurfaceState(surface, index);
391 *(CmSurfaceState **)(m_data + m_flatArgs[start + i].offset) = temp;
392 *(CmSurface **)(m_surfaceInArg + m_flatArgs[start+i].offset) = surface;
393 m_propertyIndexes[start + i] = surface->GetPropertyIndex();
394 m_cmSurfIndexes[start + i] = index;
395 }
396 }
397 }
398 }
399 else if (m_flatArgs[start].isaKind == ARG_KIND_SAMPLER) // only support 3D sampler and AVS sampler in fastpath
400 {
401 CMRT_UMD::SamplerIndex *samplerIndexes = (CMRT_UMD::SamplerIndex *)value;
402 // sanity check
403 if (len * sizeof(CMRT_UMD::SurfaceIndex) != size)
404 {
405 CM_ASSERTMESSAGE("Error: Invalid kernel arg size.");
406 return CM_INVALID_ARG_SIZE;
407 }
408
409 for (uint32_t i = 0; i < len; i++)
410 {
411 uint32_t index = samplerIndexes[i].get_data();
412 MHW_SAMPLER_STATE_PARAM *temp = (MHW_SAMPLER_STATE_PARAM *)GetSamplerParam(index);
413 *(MHW_SAMPLER_STATE_PARAM **)(m_data + m_flatArgs[start + i].offset) = temp;
414 }
415 }
416 else
417 {
418 if (size != m_flatArgs[start].unitSize)
419 {
420 CM_ASSERTMESSAGE("Error: Invalid kernel arg size.");
421 return CM_INVALID_ARG_SIZE;
422 }
423 CmSafeMemCopy((void *)(m_data + m_flatArgs[start].offset), value, size);
424 }
425 return CM_SUCCESS;
426 }
427
ToArgKind(CmSurface * surface)428 CM_ARG_KIND CmKernelEx::ToArgKind(CmSurface *surface)
429 {
430 switch(surface->Type())
431 {
432 case CM_ENUM_CLASS_TYPE_CMBUFFER_RT:
433 return ARG_KIND_SURFACE_1D;
434 case CM_ENUM_CLASS_TYPE_CMSURFACE2D:
435 return ARG_KIND_SURFACE_2D;
436 case CM_ENUM_CLASS_TYPE_CMSURFACE2DUP:
437 return ARG_KIND_SURFACE_2D_UP;
438 case CM_ENUM_CLASS_TYPE_CMSURFACE3D:
439 return ARG_KIND_SURFACE_3D;
440 case CM_ENUM_CLASS_TYPE_CMSURFACESAMPLER:
441 {
442 CmSurfaceSampler* surfSampler = static_cast <CmSurfaceSampler *> (surface);
443 SAMPLER_SURFACE_TYPE type;
444 surfSampler->GetSurfaceType(type);
445 if (type == SAMPLER_SURFACE_TYPE_2D)
446 {
447 return ARG_KIND_SURFACE_SAMPLER;
448 }
449 else if (type == SAMPLER_SURFACE_TYPE_2DUP)
450 {
451 return ARG_KIND_SURFACE2DUP_SAMPLER;
452 }
453 else
454 {
455 return ARG_KIND_SURFACE_3D;
456 }
457 }
458 case CM_ENUM_CLASS_TYPE_CMSURFACESAMPLER8X8:
459 {
460 CmSurfaceSampler8x8* surfSampler8x8 = static_cast <CmSurfaceSampler8x8 *> (surface);
461 if (surfSampler8x8->GetSampler8x8SurfaceType() == CM_VA_SURFACE)
462 {
463 return ARG_KIND_SURFACE_SAMPLER8X8_VA;
464 }
465 else
466 {
467 return ARG_KIND_SURFACE_SAMPLER8X8_AVS;
468 }
469 }
470 case CM_ENUM_CLASS_TYPE_CMSURFACEVME:
471 return ARG_KIND_SURFACE_VME;
472 case CM_ENUM_CLASS_TYPE_CM_STATE_BUFFER:
473 return ARG_KIND_STATE_BUFFER;
474 default:
475 return ARG_KIND_GENERAL;
476 }
477 }
478
GetSurfaceState(CmSurface * surface,uint32_t index)479 CmSurfaceState* CmKernelEx::GetSurfaceState(CmSurface *surface, uint32_t index)
480 {
481 CM_HAL_STATE *cmHalState = ((PCM_CONTEXT_DATA)m_device->GetAccelData())->cmHalState;
482 uint32_t surfaceArraySize = 0;
483 m_surfaceMgr->GetSurfaceArraySize(surfaceArraySize);
484 CM_CHK_COND_RETURN((surfaceArraySize == 0), nullptr, "Surface Array is empty.");
485 uint32_t aliasIndex = index/surfaceArraySize;
486
487 switch (surface->Type())
488 {
489 case CM_ENUM_CLASS_TYPE_CMSURFACE2D:
490 {
491 CmSurface2DRT* surf2D = static_cast<CmSurface2DRT*>(surface);
492 uint32_t halIndex = 0;
493 surf2D->GetIndexFor2D(halIndex);
494 PCM_HAL_SURFACE2D_SURFACE_STATE_PARAM surfStateParam = nullptr;
495 if (aliasIndex > 0 || cmHalState->umdSurf2DTable[halIndex].surfStateSet)
496 {
497 surfStateParam = &(cmHalState->umdSurf2DTable[halIndex].surfaceStateParam[aliasIndex]);
498 }
499 return cmHalState->umdSurf2DTable[halIndex].surfStateMgr->GetSurfaceState(0, 0, surfStateParam);
500 }
501 case CM_ENUM_CLASS_TYPE_CMSURFACE2DUP:
502 {
503 CmSurface2DUPRT* surf2DUP = static_cast<CmSurface2DUPRT*>(surface);
504 uint32_t halIndex = 0;
505 surf2DUP->GetHandle(halIndex);
506 return cmHalState->surf2DUPTable[halIndex].surfStateMgr->GetSurfaceState();
507 }
508 case CM_ENUM_CLASS_TYPE_CMBUFFER_RT:
509 {
510 CmBuffer_RT* surf1D = static_cast<CmBuffer_RT*>(surface);
511 uint32_t halIndex = 0;
512 surf1D->GetHandle(halIndex);
513 CM_HAL_BUFFER_SURFACE_STATE_ENTRY *surfStateParam = nullptr;
514 if (aliasIndex > 0 || cmHalState->bufferTable[halIndex].surfStateSet)
515 {
516 surfStateParam = &(cmHalState->bufferTable[halIndex].surfaceStateEntry[aliasIndex]);
517 }
518 return cmHalState->bufferTable[halIndex].surfStateMgr->GetSurfaceState(surfStateParam);
519 }
520 case CM_ENUM_CLASS_TYPE_CMSURFACE3D:
521 {
522 CmSurface3DRT *surf3D = static_cast<CmSurface3DRT *>(surface);
523 uint32_t halIndex = 0;
524 surf3D->GetHandle(halIndex);
525 return cmHalState->surf3DTable[halIndex].surfStateMgr->GetSurfaceState(0, 1);
526 }
527 case CM_ENUM_CLASS_TYPE_CMSURFACEVME:
528 {
529 CmSurfaceVme *surfVme = static_cast<CmSurfaceVme*>(surface);
530 CmSurfaceStateVME *surfState = surfVme->GetSurfaceState();
531 if (surfState == nullptr)
532 {
533 int argSize = surfVme->GetVmeCmArgSize();
534 int surfCount = surfVme->GetTotalSurfacesCount();
535
536 uint8_t *vmeValue = MOS_NewArray(uint8_t, argSize);
537 if (vmeValue == nullptr)
538 {
539 return nullptr;
540 }
541 uint16_t surfIndexes[17];
542 SetArgsSingleVme(surfVme, vmeValue, surfIndexes);
543 surfState = MOS_New(CmSurfaceStateVME, cmHalState);
544 if (surfState == nullptr)
545 {
546 MOS_DeleteArray(vmeValue);
547 return nullptr;
548 }
549 surfState->Initialize((CM_HAL_VME_ARG_VALUE *)vmeValue);
550
551 surfVme->SetSurfState(cmHalState->advExecutor, vmeValue, surfState); // set for destroy later
552 }
553 return surfState;
554 }
555 case CM_ENUM_CLASS_TYPE_CMSURFACESAMPLER:
556 {
557 uint32_t halIndex = 0;
558 uint16_t cmIndex = 0;
559 CmSurfaceSampler* surfSampler = static_cast <CmSurfaceSampler *> (surface);
560 surfSampler->GetHandle(halIndex);
561 surfSampler->GetCmIndexCurrent(cmIndex);
562 SAMPLER_SURFACE_TYPE type;
563 surfSampler->GetSurfaceType(type);
564 switch (type)
565 {
566 case SAMPLER_SURFACE_TYPE_2D:
567 {
568 // re-calculate the aliasIndex
569 aliasIndex = cmIndex/surfaceArraySize;
570
571 PCM_HAL_SURFACE2D_SURFACE_STATE_PARAM surfStateParam = nullptr;
572 if (aliasIndex > 0 || cmHalState->umdSurf2DTable[halIndex].surfStateSet)
573 {
574 surfStateParam = &(cmHalState->umdSurf2DTable[halIndex].surfaceStateParam[aliasIndex]);
575 }
576 return cmHalState->umdSurf2DTable[halIndex].surfStateMgr->GetSurfaceState(0, 1, surfStateParam);
577 }
578 case SAMPLER_SURFACE_TYPE_2DUP:
579 {
580 return cmHalState->surf2DUPTable[halIndex].surfStateMgr->GetSurfaceState(0, 1);
581 }
582 case SAMPLER_SURFACE_TYPE_3D:
583 {
584 return cmHalState->surf3DTable[halIndex].surfStateMgr->GetSurfaceState(0, 1);
585 }
586 default:
587 {
588 break;
589 }
590 }
591 break;
592 }
593 case CM_ENUM_CLASS_TYPE_CMSURFACESAMPLER8X8:
594 {
595 CmSurfaceSampler8x8* surfSampler8x8 = static_cast <CmSurfaceSampler8x8 *> (surface);
596 uint32_t halIndex = 0;
597 uint16_t cmIndex = 0;
598
599 surfSampler8x8->GetIndexCurrent(halIndex);
600 surfSampler8x8->GetCmIndex(cmIndex);
601 // re-calculate the aliasIndex
602 aliasIndex = cmIndex/surfaceArraySize;
603
604 PCM_HAL_SURFACE2D_SURFACE_STATE_PARAM surfStateParam = nullptr;
605 if (aliasIndex > 0 || cmHalState->umdSurf2DTable[halIndex].surfStateSet)
606 {
607 surfStateParam = &(cmHalState->umdSurf2DTable[halIndex].surfaceStateParam[aliasIndex]);
608 }
609 return cmHalState->umdSurf2DTable[halIndex].surfStateMgr->GetSurfaceState(1, 1, surfStateParam);
610 }
611 default: //not implemented yet
612 return nullptr;
613
614 }
615 return nullptr;
616 }
617
GetMaxBteNum()618 uint32_t CmKernelEx::GetMaxBteNum()
619 {
620 uint32_t bteCount = 0;
621 for (uint32_t i = 0; i < m_flatArgCount; i++)
622 {
623 if (IsSurface(m_flatArgs[i].kind))
624 {
625 CmSurfaceState *surfState = *(CmSurfaceState **)(m_data + m_flatArgs[i].offset);
626 if (surfState == nullptr) //CM_NULL_SURFACE
627 {
628 continue;
629 }
630 bteCount += surfState->GetNumBte();
631 }
632 }
633 return bteCount;
634 }
635
UpdateCurbe(CmSSH * ssh,CmMediaState * mediaState,uint32_t kernelIdx)636 MOS_STATUS CmKernelEx::UpdateCurbe(CmSSH *ssh, CmMediaState *mediaState, uint32_t kernelIdx)
637 {
638 for (uint32_t i = 0; i < m_flatArgCount; i++)
639 {
640 if (IsSurface(m_flatArgs[i].kind))
641 {
642 CmSurface *surface = *(CmSurface **)(m_surfaceInArg + m_flatArgs[i].offset);
643 if (surface != nullptr && m_propertyIndexes[i] != surface->GetPropertyIndex())
644 {
645 // need to update the surface state
646 CmSurfaceState *temp = GetSurfaceState(surface, m_cmSurfIndexes[i]);
647 m_propertyIndexes[i] = surface->GetPropertyIndex();
648 *(CmSurfaceState **)(m_data + m_flatArgs[i].offset) = temp;
649 }
650 CmSurfaceState *surfState = *(CmSurfaceState **)(m_data + m_flatArgs[i].offset);
651 if (surfState == nullptr)
652 {
653 continue;
654 }
655 uint32_t bteIdx = ssh->AddSurfaceState(surfState);
656 *(uint32_t *)(m_curbe + m_flatArgs[i].payloadOffset) = bteIdx;
657 }
658 else if (m_flatArgs[i].kind == ARG_KIND_SAMPLER)
659 {
660 MHW_SAMPLER_STATE_PARAM *param = *(MHW_SAMPLER_STATE_PARAM **)(m_data + m_flatArgs[i].offset);
661 uint32_t bteIdx = mediaState->AddSampler(param, kernelIdx);
662 *(uint32_t *)(m_curbe + m_flatArgs[i].payloadOffset) = bteIdx;
663 }
664 else if (m_flatArgs[i].kind != ARG_KIND_IMPLICT_LOCALSIZE
665 && m_flatArgs[i].kind != ARG_KIND_IMPLICT_GROUPSIZE
666 && m_flatArgs[i].kind != ARG_KIND_IMPLICIT_LOCALID)
667 {
668 MOS_SecureMemcpy(m_curbe + m_flatArgs[i].payloadOffset, m_flatArgs[i].sizeInCurbe,
669 m_data + m_flatArgs[i].offset, m_flatArgs[i].unitSize);
670 }
671 }
672
673 // dump
674 /*
675 for (int i = 0; i < m_curbeSize/4; i++)
676 {
677 printf("0x%x, ", *((uint32_t *)m_curbe + i));
678 }
679 printf("\n");
680 */
681 return MOS_STATUS_SUCCESS;
682 }
683
UpdateFastTracker(uint32_t trackerIndex,uint32_t tracker)684 MOS_STATUS CmKernelEx::UpdateFastTracker(uint32_t trackerIndex, uint32_t tracker)
685 {
686 for (uint32_t i = 0; i < m_flatArgCount; i++)
687 {
688 if (IsSurface(m_flatArgs[i].kind))
689 {
690 CmSurface *surface = *(CmSurface **)(m_surfaceInArg + m_flatArgs[i].offset);
691 if (surface == nullptr)
692 {
693 continue;
694 }
695 surface->SetFastTracker(trackerIndex, tracker);
696 }
697 }
698 return MOS_STATUS_SUCCESS;
699 }
700
701
UpdateSWSBArgs(CmThreadSpaceRT * threadSpace)702 MOS_STATUS CmKernelEx::UpdateSWSBArgs(CmThreadSpaceRT *threadSpace)
703 {
704 CmThreadSpaceRT *ts = (threadSpace == nullptr)?m_threadSpace:threadSpace;
705 if (ts == nullptr)
706 {
707 return MOS_STATUS_SUCCESS;
708 }
709 int ret = ts->SetDependencyArgToKernel(this);
710 return (ret == 0)? MOS_STATUS_SUCCESS : MOS_STATUS_UNKNOWN;
711 }
712
SetStaticBuffer(uint32_t index,const void * value)713 int32_t CmKernelEx::SetStaticBuffer(uint32_t index, const void *value)
714 {
715 CM_CHK_CMSTATUS_RETURN(CmKernelRT::SetStaticBuffer(index, value));
716
717 if(index >= CM_GLOBAL_SURFACE_NUMBER)
718 {
719 CM_ASSERTMESSAGE("Error: Surface Index exceeds max global surface number.");
720 return CM_INVALID_GLOBAL_BUFFER_INDEX;
721 }
722
723 if(!value)
724 {
725 CM_ASSERTMESSAGE("Error: Invalid StaticBuffer arg value.");
726 return CM_INVALID_BUFFER_HANDLER;
727 }
728
729 SurfaceIndex* surfIndex = (SurfaceIndex* )value;
730 uint32_t indexData = surfIndex->get_data();
731
732 CmSurface* surface = nullptr;
733 m_surfaceMgr->GetSurface(indexData, surface);
734 if (surface != nullptr)
735 {
736 // for gen9+ platforms, index + 1 is the BTI
737 m_reservedSurfaceBteIndexes[index + CM_GLOBAL_SURFACE_INDEX_START_GEN9_PLUS]
738 = GetSurfaceState(surface, indexData);
739 }
740 return CM_SUCCESS;
741 }
742
SetSurfaceBTI(SurfaceIndex * surfIndex,uint32_t bti)743 int32_t CmKernelEx::SetSurfaceBTI(SurfaceIndex *surfIndex, uint32_t bti)
744 {
745 CM_CHK_CMSTATUS_RETURN(CmKernelRT::SetSurfaceBTI(surfIndex, bti));
746
747 CM_CHK_NULL_RETURN_CMERROR(surfIndex);
748 uint32_t index = surfIndex->get_data();
749
750 CmSurface* surface = nullptr;
751 m_surfaceMgr->GetSurface(index, surface);
752 if (surface != nullptr)
753 {
754 m_reservedSurfaceBteIndexes[bti] = GetSurfaceState(surface, index);
755 }
756 return CM_SUCCESS;
757 }
758
SetSamplerBTI(SamplerIndex * sampler,uint32_t nIndex)759 int32_t CmKernelEx::SetSamplerBTI(SamplerIndex* sampler, uint32_t nIndex)
760 {
761 CM_CHK_CMSTATUS_RETURN(CmKernelRT::SetSamplerBTI(sampler, nIndex));
762
763 uint32_t index = sampler->get_data();
764 m_reservedSamplerBteIndexes[nIndex] = (MHW_SAMPLER_STATE_PARAM *)GetSamplerParam(index);
765 return MOS_STATUS_SUCCESS;
766 }
767
LoadReservedSurfaces(CmSSH * ssh)768 MOS_STATUS CmKernelEx::LoadReservedSurfaces(CmSSH *ssh)
769 {
770 for (auto it = m_reservedSurfaceBteIndexes.begin(); it != m_reservedSurfaceBteIndexes.end(); ++ it)
771 {
772 ssh->AddSurfaceState(it->second, it->first);
773 }
774
775 // reset the table in legacy kernel for bti reuse
776 if (m_usKernelPayloadSurfaceCount)
777 {
778 CmSafeMemSet(m_IndirectSurfaceInfoArray, 0, m_usKernelPayloadSurfaceCount * sizeof(CM_INDIRECT_SURFACE_INFO));
779 m_usKernelPayloadSurfaceCount = 0;
780 }
781 return MOS_STATUS_SUCCESS;
782 }
783
LoadReservedSamplers(CmMediaState * mediaState,uint32_t kernelIdx)784 MOS_STATUS CmKernelEx::LoadReservedSamplers(CmMediaState *mediaState, uint32_t kernelIdx)
785 {
786 for (auto it = m_reservedSamplerBteIndexes.begin(); it != m_reservedSamplerBteIndexes.end(); ++ it)
787 {
788 mediaState->AddSampler((MHW_SAMPLER_STATE_PARAM *)it->second, kernelIdx, it->first);
789 }
790 return MOS_STATUS_SUCCESS;
791 }
792
GetSamplerParam(uint32_t index)793 void* CmKernelEx::GetSamplerParam(uint32_t index)
794 {
795 CM_HAL_STATE *cmHalState = ((PCM_CONTEXT_DATA)m_device->GetAccelData())->cmHalState;
796 return (void *)&cmHalState->samplerTable[index];
797 }
798
GetSamplerCount(uint32_t * count3D,uint32_t * countAVS)799 MOS_STATUS CmKernelEx::GetSamplerCount(uint32_t *count3D, uint32_t *countAVS)
800 {
801 *count3D = 0;
802 *countAVS = 0;
803 for (uint32_t i = 0; i < m_flatArgCount; i++)
804 {
805 if (m_flatArgs[i].kind == ARG_KIND_SAMPLER)
806 {
807 MHW_SAMPLER_STATE_PARAM *temp = *(MHW_SAMPLER_STATE_PARAM **)(m_data + m_flatArgs[i].offset);
808 if (temp->SamplerType == MHW_SAMPLER_TYPE_3D)
809 {
810 ++ (*count3D);
811 }
812 else if (temp->SamplerType == MHW_SAMPLER_TYPE_AVS)
813 {
814 ++ (*countAVS);
815 }
816 else
817 {
818 // only support 3D and AVS samplers by now in fast path
819 return MOS_STATUS_INVALID_PARAMETER;
820 }
821 }
822 }
823 return MOS_STATUS_SUCCESS;
824 }
825
GetThreadSpaceEx()826 CmThreadSpaceRT* CmKernelEx::GetThreadSpaceEx()
827 {
828 int status = CM_SUCCESS;
829 if (m_threadSpace)
830 {
831 return m_threadSpace;
832 }
833 if (m_dummyThreadSpace)
834 {
835 status = m_device->DestroyThreadSpace(m_dummyThreadSpace);
836 if (status != CM_SUCCESS)
837 {
838 CM_ASSERTMESSAGE("Error: Failed to destroy thread space data.");
839 }
840 }
841 if (m_threadCount)
842 {
843 status = m_device->CreateThreadSpace(m_threadCount, 1, m_dummyThreadSpace);
844 if (status != CM_SUCCESS)
845 {
846 CM_ASSERTMESSAGE("Error: Failed to create thread space data.");
847 }
848 }
849 return static_cast<CmThreadSpaceRT *>(m_dummyThreadSpace);
850 }
851
GetThreadGroupSpaceEx()852 CmThreadGroupSpace* CmKernelEx::GetThreadGroupSpaceEx()
853 {
854 if (m_threadGroupSpace)
855 {
856 return m_threadGroupSpace;
857 }
858 if (m_dummyThreadGroupSpace)
859 {
860 m_device->DestroyThreadGroupSpace(m_dummyThreadGroupSpace);
861 }
862
863 if (m_threadCount)
864 {
865 m_device->CreateThreadGroupSpace(1, 1, m_threadCount, 1, m_dummyThreadGroupSpace);
866 }
867 return m_dummyThreadGroupSpace;
868 }
869
SurfaceDumpEx(uint32_t kernelNumber,int32_t taskId)870 void CmKernelEx::SurfaceDumpEx(uint32_t kernelNumber, int32_t taskId)
871 {
872 for(uint32_t argIdx = 0; argIdx < m_argCount; argIdx++)
873 {
874 uint32_t start = m_indexMap[argIdx];
875 uint32_t len = m_indexMap[argIdx + 1] - start;
876
877 for (uint32_t v = 0; v < len; v ++)
878 {
879 uint32_t i = start + v;
880 if (IsSurface(m_flatArgs[i].kind))
881 {
882 CmSurface *surface = *(CmSurface **)(m_surfaceInArg + m_flatArgs[i].offset);
883 if (surface == nullptr)
884 {
885 continue;
886 }
887 surface->DumpContent(kernelNumber, m_kernelInfo->kernelName, taskId, argIdx, v);
888 }
889 }
890 }
891 }
892
IsFastPathSupported()893 bool CmKernelEx::IsFastPathSupported()
894 {
895 // current fast path doesn't support media object
896 bool specialDependency = false;
897 if (m_threadSpace)
898 {
899 CM_DEPENDENCY_PATTERN dependencyPatternType = CM_NONE_DEPENDENCY;
900 m_threadSpace->GetDependencyPatternType(dependencyPatternType);
901 specialDependency = (dependencyPatternType == CM_WAVEFRONT26Z || dependencyPatternType == CM_WAVEFRONT26ZI);
902 }
903
904 return !(m_perThreadArgExists || specialDependency);
905 }
906
907