1 /*
2 * Copyright (c) 2021-2022, Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22 //!
23 //! \file vp_render_cmd_packet.cpp
24 //! \brief render packet which used in by mediapipline.
25 //! \details render packet provide the structures and generate the cmd buffer which mediapipline will used.
26 //!
27 #include <iomanip>
28
29 #include "vp_render_cmd_packet.h"
30 #include "vp_platform_interface.h"
31 #include "vp_pipeline_common.h"
32 #include "vp_render_kernel_obj.h"
33 #include "vp_pipeline.h"
34 #include "vp_packet_pipe.h"
35 #include "vp_user_feature_control.h"
36 #include "mhw_mi_itf.h"
37 #include "mhw_mi_cmdpar.h"
38 #include "vp_platform_interface.h"
39 #include "hal_oca_interface_next.h"
40 #include "renderhal_platform_interface.h"
41
42 namespace vp
43 {
InitRenderHalSurfType(VPHAL_SURFACE_TYPE vpSurfType)44 static inline RENDERHAL_SURFACE_TYPE InitRenderHalSurfType(VPHAL_SURFACE_TYPE vpSurfType)
45 {
46 VP_FUNC_CALL();
47
48 switch (vpSurfType)
49 {
50 case SURF_IN_BACKGROUND:
51 return RENDERHAL_SURF_IN_BACKGROUND;
52
53 case SURF_IN_PRIMARY:
54 return RENDERHAL_SURF_IN_PRIMARY;
55
56 case SURF_IN_SUBSTREAM:
57 return RENDERHAL_SURF_IN_SUBSTREAM;
58
59 case SURF_IN_REFERENCE:
60 return RENDERHAL_SURF_IN_REFERENCE;
61
62 case SURF_OUT_RENDERTARGET:
63 return RENDERHAL_SURF_OUT_RENDERTARGET;
64
65 case SURF_NONE:
66 default:
67 return RENDERHAL_SURF_NONE;
68 }
69 }
70
VpRenderCmdPacket(MediaTask * task,PVP_MHWINTERFACE hwInterface,PVpAllocator & allocator,VPMediaMemComp * mmc,VpKernelSet * kernelSet)71 VpRenderCmdPacket::VpRenderCmdPacket(MediaTask *task, PVP_MHWINTERFACE hwInterface, PVpAllocator &allocator, VPMediaMemComp *mmc, VpKernelSet *kernelSet) : CmdPacket(task),
72 RenderCmdPacket(task, hwInterface->m_osInterface, hwInterface->m_renderHal),
73 VpCmdPacket(task, hwInterface, allocator, mmc, VP_PIPELINE_PACKET_RENDER),
74 m_firstFrame(true),
75 m_kernelSet(kernelSet)
76 {
77 if (m_hwInterface && m_hwInterface->m_userFeatureControl)
78 {
79 bool computeContextEnabled = m_hwInterface->m_userFeatureControl->IsComputeContextEnabled();
80 m_PacketId = computeContextEnabled ? VP_PIPELINE_PACKET_COMPUTE : VP_PIPELINE_PACKET_RENDER;
81 m_vpUserFeatureControl = m_hwInterface->m_userFeatureControl;
82 }
83 else
84 {
85 VP_RENDER_ASSERTMESSAGE("m_hwInterface or m_hwInterface->m_userFeatureControl is nullptr!");
86 }
87 }
88
~VpRenderCmdPacket()89 VpRenderCmdPacket::~VpRenderCmdPacket()
90 {
91 for (auto &samplerstate : m_kernelSamplerStateGroup)
92 {
93 if (samplerstate.second.SamplerType == MHW_SAMPLER_TYPE_AVS)
94 {
95 MOS_FreeMemAndSetNull(samplerstate.second.Avs.pMhwSamplerAvsTableParam);
96 }
97 }
98
99 MOS_Delete(m_surfMemCacheCtl);
100 }
101
Init()102 MOS_STATUS VpRenderCmdPacket::Init()
103 {
104 VP_RENDER_CHK_STATUS_RETURN(RenderCmdPacket::Init());
105
106 return MOS_STATUS_SUCCESS;
107 }
108
LoadKernel()109 MOS_STATUS VpRenderCmdPacket::LoadKernel()
110 {
111 int32_t iKrnAllocation = 0;
112 MHW_KERNEL_PARAM MhwKernelParam = {};
113 RENDERHAL_KERNEL_PARAM KernelParam = m_renderData.KernelParam;
114 // Load kernel to GSH
115 INIT_MHW_KERNEL_PARAM(MhwKernelParam, &m_renderData.KernelEntry);
116 UpdateKernelConfigParam(KernelParam);
117 iKrnAllocation = m_renderHal->pfnLoadKernel(
118 m_renderHal,
119 &KernelParam,
120 &MhwKernelParam,
121 m_kernel->GetCachedEntryForKernelLoad());
122
123 if (iKrnAllocation < 0)
124 {
125 RENDER_PACKET_ASSERTMESSAGE("kernel load failed");
126 return MOS_STATUS_UNKNOWN;
127 }
128
129 m_renderData.kernelAllocationID = iKrnAllocation;
130
131 if (m_renderData.iCurbeOffset < 0)
132 {
133 RENDER_PACKET_ASSERTMESSAGE("Curbe Set Fail, return error");
134 return MOS_STATUS_UNKNOWN;
135 }
136 // Allocate Media ID, link to kernel
137 m_renderData.mediaID = m_renderHal->pfnAllocateMediaID(
138 m_renderHal,
139 iKrnAllocation,
140 m_renderData.bindingTable,
141 m_renderData.iCurbeOffset,
142 (m_renderData.iCurbeLength),
143 0,
144 nullptr);
145
146 if (m_renderData.mediaID < 0)
147 {
148 RENDER_PACKET_ASSERTMESSAGE("Allocate Media ID failed, return error");
149 return MOS_STATUS_UNKNOWN;
150 }
151
152 return MOS_STATUS_SUCCESS;
153 }
154
SetEuThreadSchedulingMode(uint32_t mode)155 MOS_STATUS VpRenderCmdPacket::SetEuThreadSchedulingMode(uint32_t mode)
156 {
157 VP_FUNC_CALL();
158 VP_RENDER_CHK_NULL_RETURN(m_renderHal);
159 uint32_t curMode = m_renderHal->euThreadSchedulingMode;
160 if (curMode != mode)
161 {
162 if (curMode != 0)
163 {
164 RENDER_PACKET_ASSERTMESSAGE("Not support different modes in same kernelObjs!");
165 }
166 else
167 {
168 m_renderHal->euThreadSchedulingMode = mode;
169 }
170 }
171 return MOS_STATUS_SUCCESS;
172 }
173
Prepare()174 MOS_STATUS VpRenderCmdPacket::Prepare()
175 {
176 VP_FUNC_CALL();
177 VP_RENDER_CHK_NULL_RETURN(m_renderHal);
178 VP_RENDER_CHK_NULL_RETURN(m_kernelSet);
179 VP_RENDER_CHK_NULL_RETURN(m_surfMemCacheCtl);
180
181 if (m_renderHal->pStateHeap == nullptr)
182 {
183 VP_RENDER_CHK_STATUS_RETURN(m_renderHal->pfnAllocateStateHeaps(m_renderHal, &m_renderHal->StateHeapSettings));
184 if (m_renderHal->pStateHeap)
185 {
186 MHW_STATE_BASE_ADDR_PARAMS *pStateBaseParams = &m_renderHal->StateBaseAddressParams;
187
188 pStateBaseParams->presGeneralState = &m_renderHal->pStateHeap->GshOsResource;
189 pStateBaseParams->dwGeneralStateSize = m_renderHal->pStateHeap->dwSizeGSH;
190 pStateBaseParams->presDynamicState = &m_renderHal->pStateHeap->GshOsResource;
191 pStateBaseParams->dwDynamicStateSize = m_renderHal->pStateHeap->dwSizeGSH;
192 pStateBaseParams->bDynamicStateRenderTarget = false;
193 pStateBaseParams->presIndirectObjectBuffer = &m_renderHal->pStateHeap->GshOsResource;
194 pStateBaseParams->dwIndirectObjectBufferSize = m_renderHal->pStateHeap->dwSizeGSH;
195 pStateBaseParams->presInstructionBuffer = &m_renderHal->pStateHeap->IshOsResource;
196 pStateBaseParams->dwInstructionBufferSize = m_renderHal->pStateHeap->dwSizeISH;
197 }
198 }
199
200 if (m_packetResourcesPrepared)
201 {
202 VP_RENDER_NORMALMESSAGE("Resource Prepared, skip this time");
203 return MOS_STATUS_SUCCESS;
204 }
205
206 m_renderHal->euThreadSchedulingMode = 0;
207
208 VP_RENDER_CHK_STATUS_RETURN(m_kernelSet->CreateKernelObjects(
209 m_renderKernelParams,
210 m_surfSetting.surfGroup,
211 m_kernelSamplerStateGroup,
212 m_kernelConfigs,
213 m_kernelObjs,
214 *m_surfMemCacheCtl,
215 m_packetSharedContext));
216
217 if (m_submissionMode == SINGLE_KERNEL_ONLY)
218 {
219 m_kernelRenderData.clear();
220
221 VP_RENDER_CHK_NULL_RETURN(m_renderHal->pStateHeap);
222
223 m_renderHal->pStateHeap->iCurrentBindingTable = 0;
224 m_renderHal->pStateHeap->iCurrentSurfaceState = 0;
225
226 for (auto it = m_kernelObjs.begin(); it != m_kernelObjs.end(); it++)
227 {
228 m_kernel = it->second;
229 VP_RENDER_CHK_NULL_RETURN(m_kernel);
230
231 m_kernel->SetCacheCntl(m_surfMemCacheCtl);
232 VP_RENDER_CHK_STATUS_RETURN(SetEuThreadSchedulingMode(m_kernel->GetEuThreadSchedulingMode()));
233
234 // reset render Data for current kernel
235 MOS_ZeroMemory(&m_renderData, sizeof(KERNEL_PACKET_RENDER_DATA));
236
237 if (m_submissionMode != SINGLE_KERNEL_ONLY)
238 {
239 m_isMultiBindingTables = true;
240 }
241 else
242 {
243 m_isMultiBindingTables = false;
244 }
245
246 VP_RENDER_CHK_STATUS_RETURN(RenderEngineSetup());
247
248 VP_RENDER_CHK_STATUS_RETURN(KernelStateSetup());
249
250 VP_RENDER_CHK_STATUS_RETURN(SetupSurfaceState()); // once Surface setup done, surface index should be created here
251
252 VP_RENDER_CHK_STATUS_RETURN(SetupCurbeState()); // Set Curbe with updated surface index
253
254 VP_RENDER_CHK_STATUS_RETURN(LoadKernel());
255
256 VP_RENDER_CHK_STATUS_RETURN(SetupSamplerStates());
257
258 VP_RENDER_CHK_STATUS_RETURN(SetupWalkerParams());
259
260 VP_RENDER_CHK_STATUS_RETURN(m_renderHal->pfnSetVfeStateParams(
261 m_renderHal,
262 MEDIASTATE_DEBUG_COUNTER_FREE_RUNNING,
263 m_renderData.KernelParam.Thread_Count,
264 m_renderData.iCurbeLength,
265 m_renderData.iInlineLength,
266 m_renderData.scoreboardParams));
267
268 m_kernelRenderData.insert(std::make_pair(it->first, m_renderData));
269 }
270 }
271 else if (m_submissionMode == MULTI_KERNELS_SINGLE_MEDIA_STATE)
272 {
273 bool bAllocated = false;
274
275 VP_RENDER_CHK_STATUS_RETURN(m_renderHal->pfnReAllocateStateHeapsforAdvFeatureWithAllHeapsEnlarged(m_renderHal, bAllocated));
276 if (bAllocated && m_renderHal->pStateHeap)
277 {
278 MHW_STATE_BASE_ADDR_PARAMS *pStateBaseParams = &m_renderHal->StateBaseAddressParams;
279 pStateBaseParams->presGeneralState = &m_renderHal->pStateHeap->GshOsResource;
280 pStateBaseParams->dwGeneralStateSize = m_renderHal->pStateHeap->dwSizeGSH;
281 pStateBaseParams->presDynamicState = &m_renderHal->pStateHeap->GshOsResource;
282 pStateBaseParams->dwDynamicStateSize = m_renderHal->pStateHeap->dwSizeGSH;
283 pStateBaseParams->bDynamicStateRenderTarget = false;
284 pStateBaseParams->presIndirectObjectBuffer = &m_renderHal->pStateHeap->GshOsResource;
285 pStateBaseParams->dwIndirectObjectBufferSize = m_renderHal->pStateHeap->dwSizeGSH;
286 pStateBaseParams->presInstructionBuffer = &m_renderHal->pStateHeap->IshOsResource;
287 pStateBaseParams->dwInstructionBufferSize = m_renderHal->pStateHeap->dwSizeISH;
288 uint32_t heapMocs = m_renderHal->pOsInterface->pfnCachePolicyGetMemoryObject(MOS_HW_RESOURCE_USAGE_VP_INPUT_PICTURE_RENDER,
289 m_renderHal->pOsInterface->pfnGetGmmClientContext(m_renderHal->pOsInterface)).DwordValue;
290 pStateBaseParams->mocs4SurfaceState = heapMocs;
291 pStateBaseParams->mocs4GeneralState = heapMocs;
292 pStateBaseParams->mocs4DynamicState = heapMocs;
293 pStateBaseParams->mocs4InstructionCache = heapMocs;
294 pStateBaseParams->mocs4IndirectObjectBuffer = heapMocs;
295 pStateBaseParams->mocs4StatelessDataport = heapMocs;
296 }
297
298 MOS_ZeroMemory(&m_renderData, sizeof(KERNEL_PACKET_RENDER_DATA));
299 m_isMultiBindingTables = true;
300 m_isMultiKernelOneMediaState = true;
301 VP_RENDER_CHK_STATUS_RETURN(RenderEngineSetup());
302
303 m_kernelRenderData.clear();
304
305 // for multi-kernel prepare together
306 for (auto it = m_kernelObjs.begin(); it != m_kernelObjs.end(); it++)
307 {
308 m_kernel = it->second;
309 VP_RENDER_CHK_NULL_RETURN(m_kernel);
310 m_kernel->SetPerfTag();
311 VP_RENDER_CHK_STATUS_RETURN(SetEuThreadSchedulingMode(m_kernel->GetEuThreadSchedulingMode()));
312
313 if (it != m_kernelObjs.begin())
314 {
315 // reset render Data for current kernel
316 PRENDERHAL_MEDIA_STATE pMediaState = m_renderData.mediaState;
317 MOS_ZeroMemory(&m_renderData, sizeof(KERNEL_PACKET_RENDER_DATA));
318 m_renderData.mediaState = pMediaState;
319 // Assign and Reset binding table
320 RENDER_PACKET_CHK_STATUS_RETURN(m_renderHal->pfnAssignBindingTable(
321 m_renderHal,
322 &m_renderData.bindingTable));
323 }
324
325 VP_RENDER_CHK_STATUS_RETURN(KernelStateSetup());
326
327 VP_RENDER_CHK_STATUS_RETURN(SetupSurfaceState()); // once Surface setup done, surface index should be created here
328
329 VP_RENDER_CHK_STATUS_RETURN(SetupCurbeState()); // Set Curbe with updated surface index
330
331 VP_RENDER_CHK_STATUS_RETURN(LoadKernel());
332
333 VP_RENDER_CHK_STATUS_RETURN(SetupSamplerStates());
334
335 VP_RENDER_CHK_STATUS_RETURN(SetupWalkerParams());
336
337 m_kernelRenderData.insert(std::make_pair(it->first, m_renderData));
338 }
339
340 VP_RENDER_CHK_STATUS_RETURN(m_renderHal->pfnSetVfeStateParams(
341 m_renderHal,
342 MEDIASTATE_DEBUG_COUNTER_FREE_RUNNING,
343 RENDERHAL_USE_MEDIA_THREADS_MAX,
344 m_totalCurbeSize,
345 m_totoalInlineSize,
346 m_renderData.scoreboardParams));
347 }
348 else
349 {
350 return MOS_STATUS_INVALID_PARAMETER;
351 }
352
353 return MOS_STATUS_SUCCESS;
354 }
355
SetupSamplerStates()356 MOS_STATUS VpRenderCmdPacket::SetupSamplerStates()
357 {
358 VP_FUNC_CALL();
359 VP_RENDER_CHK_NULL_RETURN(m_renderHal);
360 VP_RENDER_CHK_NULL_RETURN(m_kernel);
361
362 KERNEL_SAMPLER_STATES samplerStates = {};
363
364 // For AdvKernel, SetSamplerStates is called by VpRenderKernelObj::SetKernelConfigs
365 // For some AdvKernels, when UseIndependentSamplerGroup is true, each kernel in one media state submission uses a stand alone sampler state group
366 if (!m_kernel->IsAdvKernel() || m_kernel->UseIndependentSamplerGroup())
367 {
368 // Initialize m_kernelSamplerStateGroup.
369 VP_RENDER_CHK_STATUS_RETURN(m_kernel->SetSamplerStates(m_kernelSamplerStateGroup));
370 }
371
372 for (int samplerIndex = 0, activeSamplerLeft = m_kernelSamplerStateGroup.size(); activeSamplerLeft > 0; ++samplerIndex)
373 {
374 auto it = m_kernelSamplerStateGroup.find(samplerIndex);
375 if (m_kernelSamplerStateGroup.end() != it)
376 {
377 --activeSamplerLeft;
378 samplerStates.push_back(it->second);
379 }
380 else
381 {
382 MHW_SAMPLER_STATE_PARAM param = {};
383 samplerStates.push_back(param);
384 }
385 }
386
387 if (!samplerStates.empty())
388 {
389 if (samplerStates.size() > MHW_RENDER_ENGINE_SAMPLERS_MAX)
390 {
391 MOS_STATUS_INVALID_PARAMETER;
392 }
393
394 VP_RENDER_CHK_STATUS_RETURN(m_renderHal->pfnSetAndGetSamplerStates(
395 m_renderHal,
396 m_renderData.mediaID,
397 &samplerStates[0],
398 samplerStates.size(),
399 m_kernel->GetBindlessSamplers()));
400
401 }
402
403 return MOS_STATUS_SUCCESS;
404 }
405
Submit(MOS_COMMAND_BUFFER * commandBuffer,uint8_t packetPhase)406 MOS_STATUS VpRenderCmdPacket::Submit(MOS_COMMAND_BUFFER *commandBuffer, uint8_t packetPhase)
407 {
408 VP_FUNC_CALL();
409 if (m_kernelObjs.empty())
410 {
411 VP_RENDER_ASSERTMESSAGE("No Kernel Object Creation");
412 return MOS_STATUS_NULL_POINTER;
413 }
414
415 if (m_submissionMode == SINGLE_KERNEL_ONLY)
416 {
417 VP_RENDER_CHK_STATUS_RETURN(SetupMediaWalker());
418
419 VP_RENDER_CHK_STATUS_RETURN(RenderCmdPacket::Submit(commandBuffer, packetPhase));
420 }
421 else if (m_submissionMode == MULTI_KERNELS_SINGLE_MEDIA_STATE)
422 {
423 VP_RENDER_CHK_STATUS_RETURN(SubmitWithMultiKernel(commandBuffer, packetPhase));
424 }
425 else
426 {
427 return MOS_STATUS_INVALID_PARAMETER;
428 }
429
430
431 if (!m_surfSetting.dumpLaceSurface &&
432 !m_surfSetting.dumpPostSurface)
433 {
434 VP_RENDER_CHK_STATUS_RETURN(m_kernelSet->DestroyKernelObjects(m_kernelObjs));
435 }
436
437 return MOS_STATUS_SUCCESS;
438 }
439
InitFcMemCacheControlForTarget(PVP_RENDER_CACHE_CNTL settings)440 MOS_STATUS VpRenderCmdPacket::InitFcMemCacheControlForTarget(PVP_RENDER_CACHE_CNTL settings)
441 {
442 MOS_HW_RESOURCE_DEF Usage = MOS_HW_RESOURCE_DEF_MAX;
443 MEMORY_OBJECT_CONTROL_STATE MemObjCtrl = {};
444 PMOS_INTERFACE pOsInterface = m_osInterface;
445
446 VP_RENDER_CHK_NULL_RETURN(pOsInterface);
447 VP_RENDER_CHK_NULL_RETURN(settings);
448
449 VPHAL_SET_SURF_MEMOBJCTL(settings->Composite.TargetSurfMemObjCtl, MOS_HW_RESOURCE_USAGE_VP_INTERNAL_READ_WRITE_RENDER);
450
451 return MOS_STATUS_SUCCESS;
452 }
453
InitFcMemCacheControl(PVP_RENDER_CACHE_CNTL settings)454 MOS_STATUS VpRenderCmdPacket::InitFcMemCacheControl(PVP_RENDER_CACHE_CNTL settings)
455 {
456 MOS_HW_RESOURCE_DEF Usage = MOS_HW_RESOURCE_DEF_MAX;
457 MEMORY_OBJECT_CONTROL_STATE MemObjCtrl = {};
458 PMOS_INTERFACE pOsInterface = m_osInterface;
459
460 VP_RENDER_CHK_NULL_RETURN(settings);
461
462 if (!settings->bCompositing)
463 {
464 return MOS_STATUS_SUCCESS;
465 }
466
467 settings->Composite.bL3CachingEnabled = true;
468
469 VPHAL_SET_SURF_MEMOBJCTL(settings->Composite.PrimaryInputSurfMemObjCtl, MOS_HW_RESOURCE_USAGE_VP_INPUT_PICTURE_RENDER);
470 VPHAL_SET_SURF_MEMOBJCTL(settings->Composite.InputSurfMemObjCtl, MOS_HW_RESOURCE_USAGE_VP_INPUT_PICTURE_RENDER);
471
472 VP_RENDER_CHK_STATUS_RETURN(InitFcMemCacheControlForTarget(settings));
473
474 return MOS_STATUS_SUCCESS;
475 }
476
InitSurfMemCacheControl(VP_EXECUTE_CAPS packetCaps)477 MOS_STATUS VpRenderCmdPacket::InitSurfMemCacheControl(VP_EXECUTE_CAPS packetCaps)
478 {
479 MOS_HW_RESOURCE_DEF Usage = MOS_HW_RESOURCE_DEF_MAX;
480 MEMORY_OBJECT_CONTROL_STATE MemObjCtrl = {};
481 PMOS_INTERFACE pOsInterface = nullptr;
482 PVP_RENDER_CACHE_CNTL pSettings = nullptr;
483
484 VP_FUNC_CALL();
485
486 if (nullptr == m_surfMemCacheCtl)
487 {
488 m_surfMemCacheCtl = MOS_New(VP_RENDER_CACHE_CNTL);
489 VP_PUBLIC_CHK_NULL_RETURN(m_surfMemCacheCtl);
490 }
491
492 VP_PUBLIC_CHK_NULL_RETURN(m_hwInterface);
493 VP_PUBLIC_CHK_NULL_RETURN(m_hwInterface->m_osInterface);
494
495 MOS_ZeroMemory(m_surfMemCacheCtl, sizeof(*m_surfMemCacheCtl));
496
497 pOsInterface = m_hwInterface->m_osInterface;
498 pSettings = m_surfMemCacheCtl;
499
500 pSettings->bCompositing = packetCaps.bComposite;
501 pSettings->bDnDi = true;
502 pSettings->bLace = MEDIA_IS_SKU(m_hwInterface->m_skuTable, FtrLace);
503 pSettings->bHdr = MEDIA_IS_SKU(m_hwInterface->m_skuTable, FtrHDR);
504
505 VP_RENDER_CHK_STATUS_RETURN(InitFcMemCacheControl(pSettings));
506
507 if (pSettings->bDnDi)
508 {
509 pSettings->DnDi.bL3CachingEnabled = true;
510
511 VPHAL_SET_SURF_MEMOBJCTL(pSettings->DnDi.CurrentInputSurfMemObjCtl, MOS_HW_RESOURCE_USAGE_VP_INPUT_PICTURE_RENDER);
512 VPHAL_SET_SURF_MEMOBJCTL(pSettings->DnDi.PreviousInputSurfMemObjCtl, MOS_HW_RESOURCE_USAGE_VP_INPUT_PICTURE_RENDER);
513 VPHAL_SET_SURF_MEMOBJCTL(pSettings->DnDi.STMMInputSurfMemObjCtl, MOS_HW_RESOURCE_USAGE_VP_INPUT_PICTURE_RENDER);
514 VPHAL_SET_SURF_MEMOBJCTL(pSettings->DnDi.STMMOutputSurfMemObjCtl, MOS_HW_RESOURCE_USAGE_VP_OUTPUT_PICTURE_RENDER);
515 VPHAL_SET_SURF_MEMOBJCTL(pSettings->DnDi.DnOutSurfMemObjCtl, MOS_HW_RESOURCE_USAGE_VP_OUTPUT_PICTURE_RENDER);
516
517 if (packetCaps.bVebox && !packetCaps.bSFC && !packetCaps.bRender)
518 {
519 // Disable cache for output surface in vebox only condition
520 VPHAL_SET_SURF_MEMOBJCTL(pSettings->DnDi.CurrentOutputSurfMemObjCtl, MOS_HW_RESOURCE_USAGE_VP_OUTPUT_PICTURE_RENDER);
521 }
522 else
523 {
524 VPHAL_SET_SURF_MEMOBJCTL(pSettings->DnDi.CurrentOutputSurfMemObjCtl, MOS_HW_RESOURCE_USAGE_VP_OUTPUT_PICTURE_RENDER);
525 }
526
527 VPHAL_SET_SURF_MEMOBJCTL(pSettings->DnDi.StatisticsOutputSurfMemObjCtl, MOS_HW_RESOURCE_USAGE_VP_OUTPUT_PICTURE_RENDER);
528 VPHAL_SET_SURF_MEMOBJCTL(pSettings->DnDi.AlphaOrVignetteSurfMemObjCtl, MOS_HW_RESOURCE_USAGE_VP_INTERNAL_READ_WRITE_RENDER);
529 VPHAL_SET_SURF_MEMOBJCTL(pSettings->DnDi.LaceOrAceOrRgbHistogramSurfCtrl, MOS_HW_RESOURCE_USAGE_VP_INTERNAL_READ_WRITE_RENDER);
530 VPHAL_SET_SURF_MEMOBJCTL(pSettings->DnDi.SkinScoreSurfMemObjCtl, MOS_HW_RESOURCE_USAGE_VP_INTERNAL_READ_WRITE_RENDER);
531 VPHAL_SET_SURF_MEMOBJCTL(pSettings->DnDi.LaceLookUpTablesSurfMemObjCtl, MOS_HW_RESOURCE_USAGE_VP_INTERNAL_READ_WRITE_RENDER);
532 VPHAL_SET_SURF_MEMOBJCTL(pSettings->DnDi.Vebox3DLookUpTablesSurfMemObjCtl, MOS_HW_RESOURCE_USAGE_VP_INTERNAL_READ_WRITE_RENDER);
533 }
534 else
535 {
536 pSettings->DnDi.bL3CachingEnabled = false;
537
538 VPHAL_SET_SURF_MEMOBJCTL(pSettings->DnDi.CurrentInputSurfMemObjCtl, MOS_HW_RESOURCE_USAGE_VP_INPUT_PICTURE_RENDER);
539 VPHAL_SET_SURF_MEMOBJCTL(pSettings->DnDi.PreviousInputSurfMemObjCtl, MOS_HW_RESOURCE_USAGE_VP_INPUT_PICTURE_RENDER);
540 VPHAL_SET_SURF_MEMOBJCTL(pSettings->DnDi.STMMInputSurfMemObjCtl, MOS_HW_RESOURCE_USAGE_VP_INPUT_PICTURE_RENDER);
541 VPHAL_SET_SURF_MEMOBJCTL(pSettings->DnDi.STMMOutputSurfMemObjCtl, MOS_HW_RESOURCE_USAGE_VP_OUTPUT_PICTURE_RENDER);
542 VPHAL_SET_SURF_MEMOBJCTL(pSettings->DnDi.DnOutSurfMemObjCtl, MOS_HW_RESOURCE_USAGE_VP_OUTPUT_PICTURE_RENDER);
543 VPHAL_SET_SURF_MEMOBJCTL(pSettings->DnDi.CurrentOutputSurfMemObjCtl, MOS_HW_RESOURCE_USAGE_VP_OUTPUT_PICTURE_RENDER);
544 VPHAL_SET_SURF_MEMOBJCTL(pSettings->DnDi.StatisticsOutputSurfMemObjCtl, MOS_HW_RESOURCE_USAGE_VP_OUTPUT_PICTURE_RENDER);
545 VPHAL_SET_SURF_MEMOBJCTL(pSettings->DnDi.AlphaOrVignetteSurfMemObjCtl, MOS_HW_RESOURCE_USAGE_VP_INTERNAL_READ_WRITE_RENDER);
546 VPHAL_SET_SURF_MEMOBJCTL(pSettings->DnDi.LaceOrAceOrRgbHistogramSurfCtrl, MOS_HW_RESOURCE_USAGE_VP_INTERNAL_READ_WRITE_RENDER);
547 VPHAL_SET_SURF_MEMOBJCTL(pSettings->DnDi.SkinScoreSurfMemObjCtl, MOS_HW_RESOURCE_USAGE_VP_INTERNAL_READ_WRITE_RENDER);
548 VPHAL_SET_SURF_MEMOBJCTL(pSettings->DnDi.LaceLookUpTablesSurfMemObjCtl, MOS_HW_RESOURCE_USAGE_VP_INTERNAL_READ_WRITE_RENDER);
549 VPHAL_SET_SURF_MEMOBJCTL(pSettings->DnDi.Vebox3DLookUpTablesSurfMemObjCtl, MOS_HW_RESOURCE_USAGE_VP_INTERNAL_READ_WRITE_RENDER);
550 }
551
552 if (pSettings->bLace)
553 {
554 VPHAL_SET_SURF_MEMOBJCTL(pSettings->Lace.FrameHistogramSurfaceMemObjCtl, MOS_HW_RESOURCE_USAGE_VP_INTERNAL_READ_WRITE_RENDER);
555 VPHAL_SET_SURF_MEMOBJCTL(pSettings->Lace.AggregatedHistogramSurfaceMemObjCtl, MOS_HW_RESOURCE_USAGE_VP_INTERNAL_READ_WRITE_RENDER);
556 VPHAL_SET_SURF_MEMOBJCTL(pSettings->Lace.StdStatisticsSurfaceMemObjCtl, MOS_HW_RESOURCE_USAGE_VP_INTERNAL_READ_WRITE_RENDER);
557 VPHAL_SET_SURF_MEMOBJCTL(pSettings->Lace.PwlfInSurfaceMemObjCtl, MOS_HW_RESOURCE_USAGE_VP_INPUT_PICTURE_RENDER);
558 VPHAL_SET_SURF_MEMOBJCTL(pSettings->Lace.PwlfOutSurfaceMemObjCtl, MOS_HW_RESOURCE_USAGE_VP_OUTPUT_PICTURE_RENDER);
559 VPHAL_SET_SURF_MEMOBJCTL(pSettings->Lace.WeitCoefSurfaceMemObjCtl, MOS_HW_RESOURCE_USAGE_VP_INTERNAL_READ_WRITE_RENDER);
560 }
561 else
562 {
563 VPHAL_SET_SURF_MEMOBJCTL(pSettings->Lace.FrameHistogramSurfaceMemObjCtl, MOS_HW_RESOURCE_USAGE_VP_INTERNAL_READ_WRITE_RENDER);
564 VPHAL_SET_SURF_MEMOBJCTL(pSettings->Lace.AggregatedHistogramSurfaceMemObjCtl, MOS_HW_RESOURCE_USAGE_VP_INTERNAL_READ_WRITE_RENDER);
565 VPHAL_SET_SURF_MEMOBJCTL(pSettings->Lace.StdStatisticsSurfaceMemObjCtl, MOS_HW_RESOURCE_USAGE_VP_INTERNAL_READ_WRITE_RENDER);
566 VPHAL_SET_SURF_MEMOBJCTL(pSettings->Lace.PwlfInSurfaceMemObjCtl, MOS_HW_RESOURCE_USAGE_VP_INPUT_PICTURE_RENDER);
567 VPHAL_SET_SURF_MEMOBJCTL(pSettings->Lace.PwlfOutSurfaceMemObjCtl, MOS_HW_RESOURCE_USAGE_VP_OUTPUT_PICTURE_RENDER);
568 VPHAL_SET_SURF_MEMOBJCTL(pSettings->Lace.WeitCoefSurfaceMemObjCtl, MOS_HW_RESOURCE_USAGE_VP_INTERNAL_READ_WRITE_RENDER);
569 VPHAL_SET_SURF_MEMOBJCTL(pSettings->Lace.GlobalToneMappingCurveLUTSurfaceMemObjCtl, MOS_HW_RESOURCE_USAGE_VP_INTERNAL_READ_WRITE_RENDER);
570 }
571
572 if (pSettings->bHdr)
573 {
574 VPHAL_SET_SURF_MEMOBJCTL(pSettings->Hdr.SourceSurfMemObjCtl, MOS_MP_RESOURCE_USAGE_SurfaceState_FF);
575 VPHAL_SET_SURF_MEMOBJCTL(pSettings->Hdr.TargetSurfMemObjCtl, MOS_MP_RESOURCE_USAGE_DEFAULT_FF);
576 VPHAL_SET_SURF_MEMOBJCTL(pSettings->Hdr.Lut2DSurfMemObjCtl, MOS_MP_RESOURCE_USAGE_SurfaceState_FF);
577 VPHAL_SET_SURF_MEMOBJCTL(pSettings->Hdr.Lut3DSurfMemObjCtl, MOS_MP_RESOURCE_USAGE_SurfaceState_FF);
578 VPHAL_SET_SURF_MEMOBJCTL(pSettings->Hdr.CoeffSurfMemObjCtl, MOS_MP_RESOURCE_USAGE_SurfaceState_FF);
579 }
580 else
581 {
582 pSettings->Hdr.bL3CachingEnabled = false;
583 VPHAL_SET_SURF_MEMOBJCTL(pSettings->Hdr.SourceSurfMemObjCtl, MOS_MP_RESOURCE_USAGE_DEFAULT);
584 VPHAL_SET_SURF_MEMOBJCTL(pSettings->Hdr.TargetSurfMemObjCtl, MOS_MP_RESOURCE_USAGE_DEFAULT);
585 VPHAL_SET_SURF_MEMOBJCTL(pSettings->Hdr.Lut2DSurfMemObjCtl, MOS_MP_RESOURCE_USAGE_DEFAULT);
586 VPHAL_SET_SURF_MEMOBJCTL(pSettings->Hdr.Lut3DSurfMemObjCtl, MOS_MP_RESOURCE_USAGE_DEFAULT);
587 VPHAL_SET_SURF_MEMOBJCTL(pSettings->Hdr.CoeffSurfMemObjCtl, MOS_MP_RESOURCE_USAGE_DEFAULT);
588 }
589
590 return MOS_STATUS_SUCCESS;
591 }
592
PacketInit(VP_SURFACE * inputSurface,VP_SURFACE * outputSurface,VP_SURFACE * previousSurface,VP_SURFACE_SETTING & surfSetting,VP_EXECUTE_CAPS packetCaps)593 MOS_STATUS VpRenderCmdPacket::PacketInit(
594 VP_SURFACE * inputSurface,
595 VP_SURFACE * outputSurface,
596 VP_SURFACE * previousSurface,
597 VP_SURFACE_SETTING &surfSetting,
598 VP_EXECUTE_CAPS packetCaps)
599 {
600 VP_FUNC_CALL();
601
602 // will remodify when normal render path enabled
603 VP_UNUSED(inputSurface);
604 VP_UNUSED(outputSurface);
605 VP_UNUSED(previousSurface);
606 VP_RENDER_CHK_NULL_RETURN(m_renderHal);
607
608 m_PacketCaps = packetCaps;
609
610 // Init packet surface params.
611 m_surfSetting = surfSetting;
612
613 m_packetResourcesPrepared = false;
614 m_kernelConfigs.clear();
615 m_renderKernelParams.clear();
616
617 m_renderHal->eufusionBypass = false;
618 m_totoalInlineSize = 0;
619
620 VP_RENDER_CHK_STATUS_RETURN(InitSurfMemCacheControl(packetCaps));
621
622 return MOS_STATUS_SUCCESS;
623 }
624
KernelStateSetup()625 MOS_STATUS VpRenderCmdPacket::KernelStateSetup()
626 {
627 VP_FUNC_CALL();
628 VP_RENDER_CHK_NULL_RETURN(m_kernel);
629
630 // Initialize States
631 MOS_ZeroMemory(&m_renderData.KernelEntry, sizeof(Kdll_CacheEntry));
632
633 // Store pointer to Kernel Parameter
634 VP_RENDER_CHK_STATUS_RETURN(m_kernel->GetKernelSettings(m_renderData.KernelParam));
635
636 // Set Parameters for Kernel Entry
637 VP_RENDER_CHK_STATUS_RETURN(m_kernel->GetKernelEntry(m_renderData.KernelEntry));
638
639 // set the Inline Data length
640 m_renderData.iInlineLength = (int32_t)m_kernel->GetInlineDataSize();
641 m_totoalInlineSize += m_renderData.iInlineLength;
642
643 VP_RENDER_CHK_STATUS_RETURN(m_kernel->GetScoreboardParams(m_renderData.scoreboardParams));
644
645 return MOS_STATUS_SUCCESS;
646 }
647
SetupSurfaceState()648 MOS_STATUS VpRenderCmdPacket::SetupSurfaceState()
649 {
650 VP_FUNC_CALL();
651 VP_RENDER_CHK_NULL_RETURN(m_kernel);
652 VP_RENDER_CHK_NULL_RETURN(m_renderHal);
653 VP_RENDER_CHK_NULL_RETURN(m_renderHal->pOsInterface);
654
655
656 if (!m_kernel->GetKernelSurfaceConfig().empty())
657 {
658 for (auto surface = m_kernel->GetKernelSurfaceConfig().begin(); surface != m_kernel->GetKernelSurfaceConfig().end(); surface++)
659 {
660 KERNEL_SURFACE_STATE_PARAM *kernelSurfaceParam = &surface->second;
661 SurfaceType type = surface->first;
662
663 RENDERHAL_SURFACE_NEXT renderHalSurface;
664 MOS_ZeroMemory(&renderHalSurface, sizeof(RENDERHAL_SURFACE_NEXT));
665
666 RENDERHAL_SURFACE_STATE_PARAMS renderSurfaceParams;
667 MOS_ZeroMemory(&renderSurfaceParams, sizeof(RENDERHAL_SURFACE_STATE_PARAMS));
668 if (kernelSurfaceParam->surfaceOverwriteParams.updatedRenderSurfaces)
669 {
670 renderSurfaceParams = kernelSurfaceParam->surfaceOverwriteParams.renderSurfaceParams;
671 }
672 else
673 {
674 renderSurfaceParams.isOutput = (kernelSurfaceParam->isOutput == true) ? 1 : 0;
675 renderSurfaceParams.Boundary = RENDERHAL_SS_BOUNDARY_ORIGINAL; // Add conditional in future for Surfaces out of range
676 renderSurfaceParams.bWidth16Align = false;
677 renderSurfaceParams.bWidthInDword_Y = true;
678 renderSurfaceParams.bWidthInDword_UV = true;
679
680 //set mem object control for cache
681 renderSurfaceParams.MemObjCtl = (m_renderHal->pOsInterface->pfnCachePolicyGetMemoryObject(
682 MOS_HW_RESOURCE_USAGE_VP_INTERNAL_READ_WRITE_RENDER,
683 m_renderHal->pOsInterface->pfnGetGmmClientContext(m_renderHal->pOsInterface))).DwordValue;
684 }
685
686 VP_SURFACE *vpSurface = nullptr;
687
688 if (m_surfSetting.surfGroup.find(type) != m_surfSetting.surfGroup.end())
689 {
690 vpSurface = m_surfSetting.surfGroup.find(type)->second;
691 }
692
693 if (vpSurface)
694 {
695 MOS_STATUS status = m_kernel->InitRenderHalSurface(type, vpSurface, &renderHalSurface);
696 if (MOS_STATUS_UNIMPLEMENTED == status)
697 {
698 // Prepare surfaces tracked in Resource manager
699 VP_RENDER_CHK_STATUS_RETURN(InitRenderHalSurface(*vpSurface, renderHalSurface));
700 VP_RENDER_CHK_STATUS_RETURN(UpdateRenderSurface(renderHalSurface, *kernelSurfaceParam));
701 }
702 else
703 {
704 VP_RENDER_CHK_STATUS_RETURN(status);
705 }
706 if (SurfaceTypeFcCscCoeff == type)
707 {
708 m_renderHal->bCmfcCoeffUpdate = true;
709 m_renderHal->pCmfcCoeffSurface = &vpSurface->osSurface->OsResource;
710 }
711 else
712 {
713 m_renderHal->bCmfcCoeffUpdate = false;
714 m_renderHal->pCmfcCoeffSurface = nullptr;
715 }
716 }
717 else
718 {
719 // State Heaps are not tracked in resource manager till now
720 VP_RENDER_CHK_STATUS_RETURN(InitStateHeapSurface(type, renderHalSurface));
721 VP_RENDER_CHK_STATUS_RETURN(UpdateRenderSurface(renderHalSurface, *kernelSurfaceParam));
722 }
723
724 if (m_hwInterface->m_vpPlatformInterface->IsRenderMMCLimitationCheckNeeded())
725 {
726 RenderMMCLimitationCheck(vpSurface, renderHalSurface, type);
727 }
728
729 uint32_t index = 0;
730 bool bWrite = renderSurfaceParams.isOutput;
731 if (renderSurfaceParams.bSurfaceTypeDefined)
732 {
733 bWrite = false;
734 }
735
736 std::set<uint32_t> stateOffsets;
737 if (kernelSurfaceParam->surfaceOverwriteParams.bindedKernel && !kernelSurfaceParam->surfaceOverwriteParams.bufferResource)
738 {
739 auto bindingMap = m_kernel->GetSurfaceBindingIndex(type);
740 if (bindingMap.empty())
741 {
742 VP_RENDER_CHK_STATUS_RETURN(MOS_STATUS_INVALID_PARAMETER);
743 }
744 VP_RENDER_CHK_STATUS_RETURN(SetSurfaceForHwAccess(
745 &renderHalSurface.OsSurface,
746 &renderHalSurface,
747 &renderSurfaceParams,
748 bindingMap,
749 bWrite,
750 stateOffsets,
751 kernelSurfaceParam->iCapcityOfSurfaceEntry,
752 kernelSurfaceParam->surfaceEntries,
753 kernelSurfaceParam->sizeOfSurfaceEntries));
754 for (uint32_t const& bti : bindingMap)
755 {
756 VP_RENDER_NORMALMESSAGE("Using Binded Index Surface. KernelID %d, SurfType %d, bti %d", m_kernel->GetKernelId(), type, bti);
757 }
758 }
759 else
760 {
761 if ((kernelSurfaceParam->surfaceOverwriteParams.updatedSurfaceParams &&
762 kernelSurfaceParam->surfaceOverwriteParams.bufferResource &&
763 kernelSurfaceParam->surfaceOverwriteParams.bindedKernel))
764 {
765 auto bindingMap = m_kernel->GetSurfaceBindingIndex(type);
766 if (bindingMap.empty())
767 {
768 VP_RENDER_CHK_STATUS_RETURN(MOS_STATUS_INVALID_PARAMETER);
769 }
770 VP_RENDER_CHK_STATUS_RETURN(SetBufferForHwAccess(
771 &renderHalSurface.OsSurface,
772 &renderHalSurface,
773 &renderSurfaceParams,
774 bindingMap,
775 bWrite,
776 stateOffsets));
777 for (uint32_t const &bti : bindingMap)
778 {
779 VP_RENDER_NORMALMESSAGE("Using Binded Index Buffer. KernelID %d, SurfType %d, bti %d", m_kernel->GetKernelId(), type, bti);
780 }
781 }
782 else if ((kernelSurfaceParam->surfaceOverwriteParams.updatedSurfaceParams &&
783 kernelSurfaceParam->surfaceOverwriteParams.bufferResource &&
784 !kernelSurfaceParam->surfaceOverwriteParams.bindedKernel) ||
785 (!kernelSurfaceParam->surfaceOverwriteParams.updatedSurfaceParams &&
786 (renderHalSurface.OsSurface.Type == MOS_GFXRES_BUFFER ||
787 renderHalSurface.OsSurface.Type == MOS_GFXRES_INVALID)))
788 {
789 index = SetBufferForHwAccess(
790 &renderHalSurface.OsSurface,
791 &renderHalSurface,
792 &renderSurfaceParams,
793 bWrite,
794 stateOffsets);
795 VP_RENDER_CHK_STATUS_RETURN(m_kernel->UpdateCurbeBindingIndex(type, index));
796 VP_RENDER_NORMALMESSAGE("Using UnBinded Index Buffer. KernelID %d, SurfType %d, bti %d", m_kernel->GetKernelId(), type, index);
797 }
798 else
799 {
800 index = SetSurfaceForHwAccess(
801 &renderHalSurface.OsSurface,
802 &renderHalSurface,
803 &renderSurfaceParams,
804 bWrite,
805 stateOffsets);
806 VP_RENDER_CHK_STATUS_RETURN(m_kernel->UpdateCurbeBindingIndex(type, index));
807 VP_RENDER_NORMALMESSAGE("Using UnBinded Index Surface. KernelID %d, SurfType %d, bti %d. If 1D buffer overwrite to 2D for use, it will go SetSurfaceForHwAccess()", m_kernel->GetKernelId(), type, index);
808 }
809 }
810
811 if (stateOffsets.size() > 0)
812 {
813 m_kernel->UpdateBindlessSurfaceResource(type, stateOffsets);
814 }
815 }
816 VP_RENDER_CHK_STATUS_RETURN(m_kernel->UpdateCompParams());
817 }
818 else
819 {
820 // Reset status
821 m_renderHal->bCmfcCoeffUpdate = false;
822 m_renderHal->pCmfcCoeffSurface = nullptr;
823 }
824
825 return MOS_STATUS_SUCCESS;
826 }
827
RenderMMCLimitationCheck(VP_SURFACE * vpSurface,RENDERHAL_SURFACE_NEXT & renderHalSurface,SurfaceType type)828 void VpRenderCmdPacket::RenderMMCLimitationCheck(VP_SURFACE *vpSurface, RENDERHAL_SURFACE_NEXT &renderHalSurface, SurfaceType type)
829 {
830 if (vpSurface &&
831 vpSurface->osSurface &&
832 (type == SurfaceTypeFcTarget0 ||
833 type == SurfaceTypeFcTarget1 ||
834 type == SurfaceTypeRenderOutput))
835 {
836 if (!vpSurface->osSurface->OsResource.bUncompressedWriteNeeded &&
837 vpSurface->osSurface->CompressionMode == MOS_MMC_MC &&
838 IsRenderUncompressedWriteNeeded(vpSurface))
839 {
840 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
841 eStatus = m_renderHal->pOsInterface->pfnDecompResource(m_renderHal->pOsInterface, &vpSurface->osSurface->OsResource);
842
843 if (eStatus != MOS_STATUS_SUCCESS)
844 {
845 VP_RENDER_NORMALMESSAGE("inplace decompression failed for render target.");
846 }
847 else
848 {
849 VP_RENDER_NORMALMESSAGE("inplace decompression enabled for render target RECT is not compression block align.");
850 vpSurface->osSurface->OsResource.bUncompressedWriteNeeded = 1;
851 }
852 }
853 if (vpSurface->osSurface->OsResource.bUncompressedWriteNeeded)
854 {
855 renderHalSurface.OsSurface.CompressionMode = MOS_MMC_MC;
856 }
857 }
858 }
859
IsRenderUncompressedWriteNeeded(PVP_SURFACE VpSurface)860 bool VpRenderCmdPacket::IsRenderUncompressedWriteNeeded(PVP_SURFACE VpSurface)
861 {
862 VP_FUNC_CALL();
863
864 if ((!VpSurface) ||
865 (!VpSurface->osSurface))
866 {
867 return false;
868 }
869
870 auto *skuTable = m_renderHal->pOsInterface->pfnGetSkuTable(m_renderHal->pOsInterface);
871 if (!MEDIA_IS_SKU(skuTable, FtrE2ECompression))
872 {
873 return false;
874 }
875
876 if (m_renderHal->pOsInterface && m_renderHal->pOsInterface->bSimIsActive)
877 {
878 return false;
879 }
880
881 uint32_t byteInpixel = 1;
882 #if !EMUL
883 if (!VpSurface->osSurface->OsResource.pGmmResInfo)
884 {
885 VP_RENDER_NORMALMESSAGE("IsSFCUncompressedWriteNeeded cannot support non GMM info cases");
886 return false;
887 }
888
889 byteInpixel = VpSurface->osSurface->OsResource.pGmmResInfo->GetBitsPerPixel() >> 3;
890
891 if (byteInpixel == 0)
892 {
893 VP_RENDER_NORMALMESSAGE("surface format is not a valid format for Render");
894 return false;
895 }
896 #endif // !EMUL
897
898 uint32_t writeAlignInWidth = 32 / byteInpixel;
899 uint32_t writeAlignInHeight = 8;
900
901
902 if ((VpSurface->rcSrc.top % writeAlignInHeight) ||
903 ((VpSurface->rcSrc.bottom - VpSurface->rcSrc.top) % writeAlignInHeight) ||
904 (VpSurface->rcSrc.left % writeAlignInWidth) ||
905 ((VpSurface->rcSrc.right - VpSurface->rcSrc.left) % writeAlignInWidth))
906 {
907 VP_RENDER_NORMALMESSAGE(
908 "Render Target Uncompressed write needed, \
909 VpSurface->rcSrc.top % d, \
910 VpSurface->rcSrc.bottom % d, \
911 VpSurface->rcSrc.left % d, \
912 VpSurface->rcSrc.right % d \
913 VpSurface->Format % d",
914 VpSurface->rcSrc.top,
915 VpSurface->rcSrc.bottom,
916 VpSurface->rcSrc.left,
917 VpSurface->rcSrc.right,
918 VpSurface->osSurface->Format);
919
920 return true;
921 }
922
923 return false;
924 }
925
SetupCurbeState()926 MOS_STATUS VpRenderCmdPacket::SetupCurbeState()
927 {
928 VP_FUNC_CALL();
929 MT_LOG1(MT_VP_HAL_RENDER_SETUP_CURBE_STATE, MT_NORMAL, MT_FUNC_START, 1);
930 VP_RENDER_CHK_NULL_RETURN(m_kernel);
931
932 // set the Curbe Data length
933 void * curbeData = nullptr;
934 uint32_t curbeLength = 0;
935 uint32_t curbeLengthAligned = 0;
936
937 VP_RENDER_CHK_STATUS_RETURN(m_kernel->GetCurbeState(curbeData, curbeLength, curbeLengthAligned, m_renderData.KernelParam, m_renderHal->dwCurbeBlockAlign));
938
939 m_renderData.iCurbeOffset = m_renderHal->pfnLoadCurbeData(
940 m_renderHal,
941 m_renderData.mediaState,
942 curbeData,
943 curbeLength);
944
945 if (m_renderData.iCurbeOffset < 0)
946 {
947 RENDER_PACKET_ASSERTMESSAGE("Curbe Set Fail, return error");
948 return MOS_STATUS_UNKNOWN;
949 }
950
951 m_renderData.iCurbeLength = curbeLengthAligned;
952
953 m_totalCurbeSize += m_renderData.iCurbeLength;
954
955 m_kernel->FreeCurbe(curbeData);
956 MT_LOG2(MT_VP_HAL_RENDER_SETUP_CURBE_STATE, MT_NORMAL, MT_FUNC_END, 1, MT_MOS_STATUS, MOS_STATUS_SUCCESS);
957
958 return MOS_STATUS_SUCCESS;
959 }
960
GetSurface(SurfaceType type)961 VP_SURFACE *VpRenderCmdPacket::GetSurface(SurfaceType type)
962 {
963 VP_FUNC_CALL();
964
965 auto it = m_surfSetting.surfGroup.find(type);
966 VP_SURFACE *surf = (m_surfSetting.surfGroup.end() != it) ? it->second : nullptr;
967
968 return surf;
969 }
970
SetupMediaWalker()971 MOS_STATUS VpRenderCmdPacket::SetupMediaWalker()
972 {
973 VP_FUNC_CALL();
974
975 switch (m_walkerType)
976 {
977 case WALKER_TYPE_MEDIA:
978 MOS_ZeroMemory(&m_mediaWalkerParams, sizeof(MHW_WALKER_PARAMS));
979 // Prepare Media Walker Params
980 VP_RENDER_CHK_STATUS_RETURN(PrepareMediaWalkerParams(m_renderData.walkerParam, m_mediaWalkerParams));
981 break;
982 case WALKER_TYPE_COMPUTE:
983 // Parepare Compute Walker Param
984 MOS_ZeroMemory(&m_gpgpuWalkerParams, sizeof(MHW_GPGPU_WALKER_PARAMS));
985 VP_RENDER_CHK_STATUS_RETURN(PrepareComputeWalkerParams(m_renderData.walkerParam, m_gpgpuWalkerParams));
986 break;
987 case WALKER_TYPE_DISABLED:
988 default:
989 // using BB for walker setting
990 return MOS_STATUS_UNIMPLEMENTED;
991 }
992
993 return MOS_STATUS_SUCCESS;
994 }
995
SetupWalkerParams()996 MOS_STATUS VpRenderCmdPacket::SetupWalkerParams()
997 {
998 VP_FUNC_CALL();
999 MT_LOG1(MT_VP_HAL_RENDER_SETUP_WALKER_PARAM, MT_NORMAL, MT_FUNC_START, 1);
1000 VP_RENDER_CHK_NULL_RETURN(m_kernel);
1001
1002 VP_RENDER_CHK_STATUS_RETURN(m_kernel->GetWalkerSetting(m_renderData.walkerParam, m_renderData));
1003 MT_LOG2(MT_VP_CREATE, MT_NORMAL, MT_FUNC_END, 1, MT_MOS_STATUS, MOS_STATUS_SUCCESS);
1004
1005 return MOS_STATUS_SUCCESS;
1006 }
1007
UpdateKernelConfigParam(RENDERHAL_KERNEL_PARAM & kernelParam)1008 void VpRenderCmdPacket::UpdateKernelConfigParam(RENDERHAL_KERNEL_PARAM &kernelParam)
1009 {
1010 // In VP, 32 alignment with 5 bits right shift has already been done for CURBE_Length.
1011 // No need update here.
1012 }
1013
OcaDumpDbgInfo(MOS_COMMAND_BUFFER & cmdBuffer,MOS_CONTEXT & mosContext)1014 void VpRenderCmdPacket::OcaDumpDbgInfo(MOS_COMMAND_BUFFER &cmdBuffer, MOS_CONTEXT &mosContext)
1015 {
1016 // Add kernel info to log.
1017 for (auto it = m_kernelObjs.begin(); it != m_kernelObjs.end(); it++)
1018 {
1019 auto kernel = it->second;
1020 if (kernel)
1021 {
1022 kernel->OcaDumpKernelInfo(cmdBuffer, mosContext);
1023 }
1024 else
1025 {
1026 VP_RENDER_ASSERTMESSAGE("nullptr in m_kernelObjs!");
1027 }
1028 }
1029 // Add vphal param to log.
1030 HalOcaInterfaceNext::DumpVphalParam(cmdBuffer, (MOS_CONTEXT_HANDLE)&mosContext, m_renderHal->pVphalOcaDumper);
1031
1032 if (m_vpUserFeatureControl)
1033 {
1034 HalOcaInterfaceNext::DumpVpUserFeautreControlInfo(cmdBuffer, &mosContext, m_vpUserFeatureControl->GetOcaFeautreControlInfo());
1035 }
1036 }
1037
SetMediaFrameTracking(RENDERHAL_GENERIC_PROLOG_PARAMS & genericPrologParams)1038 MOS_STATUS VpRenderCmdPacket::SetMediaFrameTracking(RENDERHAL_GENERIC_PROLOG_PARAMS &genericPrologParams)
1039 {
1040 return VpCmdPacket::SetMediaFrameTracking(genericPrologParams);
1041 }
1042
InitRenderHalSurface(VP_SURFACE & surface,RENDERHAL_SURFACE & renderSurface)1043 MOS_STATUS VpRenderCmdPacket::InitRenderHalSurface(VP_SURFACE &surface, RENDERHAL_SURFACE &renderSurface)
1044 {
1045 VP_FUNC_CALL();
1046 VP_RENDER_CHK_NULL_RETURN(surface.osSurface);
1047 PMOS_INTERFACE pOsInterface = nullptr;
1048 RENDER_PACKET_CHK_NULL_RETURN(m_renderHal->pOsInterface);
1049 pOsInterface = m_renderHal->pOsInterface;
1050 RENDER_PACKET_CHK_NULL_RETURN(pOsInterface->pfnGetMemoryCompressionMode);
1051 RENDER_PACKET_CHK_NULL_RETURN(pOsInterface->pfnGetMemoryCompressionFormat);
1052
1053 // Update compression status
1054 VP_PUBLIC_CHK_STATUS_RETURN(pOsInterface->pfnGetMemoryCompressionMode(pOsInterface,
1055 &surface.osSurface->OsResource,
1056 &surface.osSurface->MmcState));
1057
1058 VP_PUBLIC_CHK_STATUS_RETURN(pOsInterface->pfnGetMemoryCompressionFormat(pOsInterface,
1059 &surface.osSurface->OsResource,
1060 &surface.osSurface->CompressionFormat));
1061
1062 if (Mos_ResourceIsNull(&renderSurface.OsSurface.OsResource))
1063 {
1064 renderSurface.OsSurface = *surface.osSurface;
1065 }
1066
1067 renderSurface.rcSrc = surface.rcSrc;
1068 renderSurface.rcDst = surface.rcDst;
1069 renderSurface.rcMaxSrc = surface.rcMaxSrc;
1070 renderSurface.SurfType =
1071 InitRenderHalSurfType(surface.SurfType);
1072
1073 return MOS_STATUS_SUCCESS;
1074 }
InitStateHeapSurface(SurfaceType type,RENDERHAL_SURFACE & renderSurface)1075 MOS_STATUS VpRenderCmdPacket::InitStateHeapSurface(SurfaceType type, RENDERHAL_SURFACE &renderSurface)
1076 {
1077 VP_FUNC_CALL();
1078 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1079 MOS_SURFACE mosSurface;
1080
1081 MOS_ZeroMemory(&mosSurface, sizeof(MOS_SURFACE));
1082
1083 // Check for Vebox Heap readiness
1084 const MHW_VEBOX_HEAP *pVeboxHeap = nullptr;
1085 std::shared_ptr<mhw::vebox::Itf> veboxItf = nullptr;
1086
1087 VP_RENDER_CHK_NULL_RETURN(m_hwInterface);
1088
1089 VP_PUBLIC_CHK_STATUS_RETURN(m_hwInterface->m_vpPlatformInterface->GetVeboxHeapInfo(
1090 m_hwInterface,
1091 &pVeboxHeap));
1092
1093 VP_RENDER_CHK_NULL_RETURN(pVeboxHeap);
1094
1095 switch (type)
1096 {
1097 case SurfaceTypeVeboxStateHeap_Drv:
1098 mosSurface.OsResource = pVeboxHeap->DriverResource;
1099 break;
1100 case SurfaceTypeVeboxStateHeap_Knr:
1101 mosSurface.OsResource = pVeboxHeap->KernelResource;
1102 break;
1103 default:
1104 eStatus = MOS_STATUS_UNIMPLEMENTED;
1105 VP_RENDER_ASSERTMESSAGE("Not Inplenmented in driver now, return fail, surfacetype %d", type);
1106 break;
1107 }
1108
1109 VP_RENDER_CHK_STATUS_RETURN(RenderCmdPacket::InitRenderHalSurface(mosSurface, &renderSurface));
1110
1111 return eStatus;
1112 }
UpdateRenderSurface(RENDERHAL_SURFACE_NEXT & renderSurface,KERNEL_SURFACE_STATE_PARAM & kernelParams)1113 MOS_STATUS VpRenderCmdPacket::UpdateRenderSurface(RENDERHAL_SURFACE_NEXT &renderSurface, KERNEL_SURFACE_STATE_PARAM &kernelParams)
1114 {
1115 VP_FUNC_CALL();
1116 auto &overwriteParam = kernelParams.surfaceOverwriteParams;
1117 if (overwriteParam.updatedSurfaceParams)
1118 {
1119 if (overwriteParam.width && overwriteParam.height)
1120 {
1121 renderSurface.OsSurface.dwWidth = overwriteParam.width;
1122 renderSurface.OsSurface.dwHeight = overwriteParam.height;
1123 renderSurface.OsSurface.dwQPitch = overwriteParam.height;
1124 }
1125
1126 renderSurface.OsSurface.dwPitch = overwriteParam.pitch != 0 ? overwriteParam.pitch : renderSurface.OsSurface.dwPitch;
1127
1128 if (renderSurface.OsSurface.dwPitch < renderSurface.OsSurface.dwWidth)
1129 {
1130 VP_RENDER_ASSERTMESSAGE("Invalid Surface where Pitch < Width, return invalid Overwrite Params");
1131 return MOS_STATUS_INVALID_PARAMETER;
1132 }
1133
1134 renderSurface.OsSurface.Format = (overwriteParam.format != 0) ? overwriteParam.format : renderSurface.OsSurface.Format;
1135
1136 if (0 == renderSurface.OsSurface.dwQPitch)
1137 {
1138 renderSurface.OsSurface.dwQPitch = renderSurface.OsSurface.dwHeight;
1139 }
1140 }
1141
1142 return MOS_STATUS_SUCCESS;
1143 }
1144
SamplerAvsCalcScalingTable(MHW_AVS_PARAMS & avsParameters,MOS_FORMAT SrcFormat,bool bVertical,float fLumaScale,float fChromaScale,uint32_t dwChromaSiting,bool b8TapAdaptiveEnable)1145 MOS_STATUS VpRenderCmdPacket::SamplerAvsCalcScalingTable(
1146 MHW_AVS_PARAMS &avsParameters,
1147 MOS_FORMAT SrcFormat,
1148 bool bVertical,
1149 float fLumaScale,
1150 float fChromaScale,
1151 uint32_t dwChromaSiting,
1152 bool b8TapAdaptiveEnable)
1153 {
1154 VP_FUNC_CALL();
1155 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1156 MHW_PLANE Plane;
1157 int32_t iUvPhaseOffset;
1158 uint32_t dwHwPhrase;
1159 uint32_t YCoefTableSize;
1160 uint32_t UVCoefTableSize;
1161 float fLumaScaleParam;
1162 float fChromaScaleParam;
1163 int32_t * piYCoefsParam;
1164 int32_t * piUVCoefsParam;
1165 float fHPStrength;
1166
1167 VP_PUBLIC_CHK_NULL_RETURN(avsParameters.piYCoefsY);
1168 VP_PUBLIC_CHK_NULL_RETURN(avsParameters.piYCoefsX);
1169 VP_PUBLIC_CHK_NULL_RETURN(avsParameters.piUVCoefsY);
1170 VP_PUBLIC_CHK_NULL_RETURN(avsParameters.piUVCoefsX);
1171
1172 YCoefTableSize = (NUM_POLYPHASE_Y_ENTRIES * NUM_HW_POLYPHASE_TABLES_G9 * sizeof(float));
1173 UVCoefTableSize = (NUM_POLYPHASE_UV_ENTRIES * NUM_HW_POLYPHASE_TABLES_G9 * sizeof(float));
1174 dwHwPhrase = NUM_HW_POLYPHASE_TABLES_G9;
1175
1176 fHPStrength = 0.0F;
1177 piYCoefsParam = bVertical ? avsParameters.piYCoefsY : avsParameters.piYCoefsX;
1178 piUVCoefsParam = bVertical ? avsParameters.piUVCoefsY : avsParameters.piUVCoefsX;
1179
1180 // Recalculate Horizontal or Vertical scaling table
1181 if (SrcFormat != avsParameters.Format) //|| fLumaScale != fLumaScaleParam || fChromaScale != fChromaScaleParam
1182 {
1183 MOS_ZeroMemory(piYCoefsParam, YCoefTableSize);
1184 MOS_ZeroMemory(piUVCoefsParam, UVCoefTableSize);
1185
1186 // 4-tap filtering for RGformat G-channel if 8tap adaptive filter is not enabled.
1187 Plane = (IS_RGB32_FORMAT(SrcFormat) && !b8TapAdaptiveEnable) ? MHW_U_PLANE : MHW_Y_PLANE;
1188
1189 // For 1x scaling in horizontal direction, use special coefficients for filtering
1190 // we don't do this when bForcePolyPhaseCoefs flag is set
1191 if (fLumaScale == 1.0F && !avsParameters.bForcePolyPhaseCoefs)
1192 {
1193 VP_RENDER_CHK_STATUS_RETURN(SetNearestModeTable(
1194 piYCoefsParam,
1195 Plane,
1196 true));
1197 // If the 8-tap adaptive is enabled for all channel, then UV/RB use the same coefficient as Y/G
1198 // So, coefficient for UV/RB channels caculation can be passed
1199 if (!b8TapAdaptiveEnable)
1200 {
1201 if (fChromaScale == 1.0F)
1202 {
1203 VP_RENDER_CHK_STATUS_RETURN(SetNearestModeTable(
1204 piUVCoefsParam,
1205 MHW_U_PLANE,
1206 true));
1207 }
1208 else
1209 {
1210 if (dwChromaSiting & (bVertical ? MHW_CHROMA_SITING_VERT_TOP : MHW_CHROMA_SITING_HORZ_LEFT))
1211 {
1212 // No Chroma Siting
1213 VP_RENDER_CHK_STATUS_RETURN(CalcPolyphaseTablesUV(
1214 piUVCoefsParam,
1215 2.0F,
1216 fChromaScale));
1217 }
1218 else
1219 {
1220 // Chroma siting offset needs to be added
1221 if (dwChromaSiting & (bVertical ? MHW_CHROMA_SITING_VERT_CENTER : MHW_CHROMA_SITING_HORZ_CENTER))
1222 {
1223 iUvPhaseOffset = MOS_UF_ROUND(0.5F * 16.0F); // U0.4
1224 }
1225 else //if (ChromaSiting & (bVertical ? MHW_CHROMA_SITING_VERT_BOTTOM : MHW_CHROMA_SITING_HORZ_RIGHT))
1226 {
1227 iUvPhaseOffset = MOS_UF_ROUND(1.0F * 16.0F); // U0.4
1228 }
1229
1230 VP_RENDER_CHK_STATUS_RETURN(CalcPolyphaseTablesUVOffset(
1231 piUVCoefsParam,
1232 3.0F,
1233 fChromaScale,
1234 iUvPhaseOffset));
1235 }
1236 }
1237 }
1238 }
1239 else
1240 {
1241 // Clamp the Scaling Factor if > 1.0x
1242 fLumaScale = MOS_MIN(1.0F, fLumaScale);
1243
1244 VP_RENDER_CHK_STATUS_RETURN(CalcPolyphaseTablesY(
1245 piYCoefsParam,
1246 fLumaScale,
1247 Plane,
1248 SrcFormat,
1249 fHPStrength,
1250 true,
1251 dwHwPhrase));
1252
1253 // If the 8-tap adaptive is enabled for all channel, then UV/RB use the same coefficient as Y/G
1254 // So, coefficient for UV/RB channels caculation can be passed
1255 if (!b8TapAdaptiveEnable)
1256 {
1257 {
1258 if (fChromaScale == 1.0F)
1259 {
1260 VP_RENDER_CHK_STATUS_RETURN(SetNearestModeTable(
1261 piUVCoefsParam,
1262 MHW_U_PLANE,
1263 true));
1264 }
1265 else
1266 {
1267 // If Chroma Siting info is present
1268 if (dwChromaSiting & (bVertical ? MHW_CHROMA_SITING_VERT_TOP : MHW_CHROMA_SITING_HORZ_LEFT))
1269 {
1270 // No Chroma Siting
1271 VP_RENDER_CHK_STATUS_RETURN(CalcPolyphaseTablesUV(
1272 piUVCoefsParam,
1273 2.0F,
1274 fChromaScale));
1275 }
1276 else
1277 {
1278 // Chroma siting offset needs to be added
1279 if (dwChromaSiting & (bVertical ? MHW_CHROMA_SITING_VERT_CENTER : MHW_CHROMA_SITING_HORZ_CENTER))
1280 {
1281 iUvPhaseOffset = MOS_UF_ROUND(0.5F * 16.0F); // U0.4
1282 }
1283 else //if (ChromaSiting & (bVertical ? MHW_CHROMA_SITING_VERT_BOTTOM : MHW_CHROMA_SITING_HORZ_RIGHT))
1284 {
1285 iUvPhaseOffset = MOS_UF_ROUND(1.0F * 16.0F); // U0.4
1286 }
1287
1288 VP_RENDER_CHK_STATUS_RETURN(CalcPolyphaseTablesUVOffset(
1289 piUVCoefsParam,
1290 3.0F,
1291 fChromaScale,
1292 iUvPhaseOffset));
1293 }
1294 }
1295 }
1296 }
1297 }
1298 }
1299 return MOS_STATUS_SUCCESS;
1300 }
1301
SetNearestModeTable(int32_t * iCoefs,uint32_t dwPlane,bool bBalancedFilter)1302 MOS_STATUS VpRenderCmdPacket::SetNearestModeTable(
1303 int32_t *iCoefs,
1304 uint32_t dwPlane,
1305 bool bBalancedFilter)
1306 {
1307 VP_FUNC_CALL();
1308 uint32_t dwNumEntries;
1309 uint32_t dwOffset;
1310 uint32_t i;
1311 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1312
1313 MHW_FUNCTION_ENTER;
1314
1315 MHW_CHK_NULL(iCoefs);
1316
1317 if (dwPlane == MHW_GENERIC_PLANE || dwPlane == MHW_Y_PLANE)
1318 {
1319 dwNumEntries = NUM_POLYPHASE_Y_ENTRIES;
1320 dwOffset = 3;
1321 }
1322 else // if (dwPlane == MHW_U_PLANE || dwPlane == MHW_V_PLANE)
1323 {
1324 dwNumEntries = NUM_POLYPHASE_UV_ENTRIES;
1325 dwOffset = 1;
1326 }
1327
1328 for (i = 0; i <= NUM_HW_POLYPHASE_TABLES / 2; i++)
1329 {
1330 iCoefs[i * dwNumEntries + dwOffset] = 0x40;
1331 }
1332
1333 if (bBalancedFilter)
1334 {
1335 // Fix offset so that filter is balanced
1336 for (i = (NUM_HW_POLYPHASE_TABLES / 2 + 1); i < NUM_HW_POLYPHASE_TABLES; i++)
1337 {
1338 iCoefs[i * dwNumEntries + dwOffset + 1] = 0x40;
1339 }
1340 }
1341
1342 finish:
1343 return eStatus;
1344 }
1345
CalcPolyphaseTablesUV(int32_t * piCoefs,float fLanczosT,float fInverseScaleFactor)1346 MOS_STATUS VpRenderCmdPacket::CalcPolyphaseTablesUV(
1347 int32_t *piCoefs,
1348 float fLanczosT,
1349 float fInverseScaleFactor)
1350 {
1351 VP_FUNC_CALL();
1352 int32_t phaseCount, tableCoefUnit, centerPixel, sumQuantCoefs;
1353 double phaseCoefs[MHW_SCALER_UV_WIN_SIZE];
1354 double startOffset, sf, base, sumCoefs, pos;
1355 int32_t minCoef[MHW_SCALER_UV_WIN_SIZE];
1356 int32_t maxCoef[MHW_SCALER_UV_WIN_SIZE];
1357 int32_t i, j;
1358 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1359
1360 MHW_FUNCTION_ENTER;
1361
1362 MHW_CHK_NULL(piCoefs);
1363
1364 phaseCount = MHW_TABLE_PHASE_COUNT;
1365 centerPixel = (MHW_SCALER_UV_WIN_SIZE / 2) - 1;
1366 startOffset = (double)(-centerPixel);
1367 tableCoefUnit = 1 << MHW_TBL_COEF_PREC;
1368 sf = MOS_MIN(1.0, fInverseScaleFactor); // Sf isn't used for upscaling
1369
1370 MOS_ZeroMemory(piCoefs, sizeof(int32_t) * MHW_SCALER_UV_WIN_SIZE * phaseCount);
1371 MOS_ZeroMemory(minCoef, sizeof(minCoef));
1372 MOS_ZeroMemory(maxCoef, sizeof(maxCoef));
1373
1374 if (sf < 1.0F)
1375 {
1376 fLanczosT = 2.0F;
1377 }
1378
1379 for (i = 0; i < phaseCount; ++i, piCoefs += MHW_SCALER_UV_WIN_SIZE)
1380 {
1381 // Write all
1382 // Note - to shift by a half you need to a half to each phase.
1383 base = startOffset - (double)(i) / (double)(phaseCount);
1384 sumCoefs = 0.0;
1385
1386 for (j = 0; j < MHW_SCALER_UV_WIN_SIZE; ++j)
1387 {
1388 pos = base + (double)j;
1389 phaseCoefs[j] = MosUtilities::MosLanczos((float)(pos * sf), MHW_SCALER_UV_WIN_SIZE, fLanczosT);
1390 sumCoefs += phaseCoefs[j];
1391 }
1392 // Normalize coefs and save
1393 for (j = 0; j < MHW_SCALER_UV_WIN_SIZE; ++j)
1394 {
1395 piCoefs[j] = (int32_t)floor((0.5 + (double)(tableCoefUnit) * (phaseCoefs[j] / sumCoefs)));
1396
1397 //For debug purposes:
1398 minCoef[j] = MOS_MIN(minCoef[j], piCoefs[j]);
1399 maxCoef[j] = MOS_MAX(maxCoef[j], piCoefs[j]);
1400 }
1401
1402 // Recalc center coef
1403 sumQuantCoefs = 0;
1404 for (j = 0; j < MHW_SCALER_UV_WIN_SIZE; ++j)
1405 {
1406 sumQuantCoefs += piCoefs[j];
1407 }
1408
1409 // Fix center coef so that filter is balanced
1410 if (i <= phaseCount / 2)
1411 {
1412 piCoefs[centerPixel] -= sumQuantCoefs - tableCoefUnit;
1413 }
1414 else
1415 {
1416 piCoefs[centerPixel + 1] -= sumQuantCoefs - tableCoefUnit;
1417 }
1418 }
1419
1420 finish:
1421 return eStatus;
1422 }
1423
CalcPolyphaseTablesY(int32_t * iCoefs,float fScaleFactor,uint32_t dwPlane,MOS_FORMAT srcFmt,float fHPStrength,bool bUse8x8Filter,uint32_t dwHwPhase)1424 MOS_STATUS VpRenderCmdPacket::CalcPolyphaseTablesY(
1425 int32_t * iCoefs,
1426 float fScaleFactor,
1427 uint32_t dwPlane,
1428 MOS_FORMAT srcFmt,
1429 float fHPStrength,
1430 bool bUse8x8Filter,
1431 uint32_t dwHwPhase)
1432 {
1433 VP_FUNC_CALL();
1434 uint32_t dwNumEntries;
1435 uint32_t dwTableCoefUnit;
1436 uint32_t i, j;
1437 int32_t k;
1438 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1439 float fPhaseCoefs[NUM_POLYPHASE_Y_ENTRIES];
1440 float fPhaseCoefsCopy[NUM_POLYPHASE_Y_ENTRIES];
1441 float fStartOffset;
1442 float fHPFilter[3], fHPSum, fHPHalfPhase; // Only used for Y_PLANE
1443 float fBase, fPos, fSumCoefs;
1444 float fLanczosT;
1445 int32_t iCenterPixel;
1446 int32_t iSumQuantCoefs;
1447
1448 MHW_FUNCTION_ENTER;
1449
1450 MHW_CHK_NULL(iCoefs);
1451 MHW_ASSERT((dwHwPhase == MHW_NUM_HW_POLYPHASE_TABLES) || (dwHwPhase == NUM_HW_POLYPHASE_TABLES));
1452
1453 if (dwPlane == MHW_GENERIC_PLANE || dwPlane == MHW_Y_PLANE)
1454 {
1455 dwNumEntries = NUM_POLYPHASE_Y_ENTRIES;
1456 }
1457 else // if (dwPlane == MHW_U_PLANE || dwPlane == MHW_V_PLANE)
1458 {
1459 dwNumEntries = NUM_POLYPHASE_UV_ENTRIES;
1460 }
1461
1462 MOS_ZeroMemory(fPhaseCoefs, sizeof(fPhaseCoefs));
1463 MOS_ZeroMemory(fPhaseCoefsCopy, sizeof(fPhaseCoefsCopy));
1464
1465 dwTableCoefUnit = 1 << MHW_AVS_TBL_COEF_PREC;
1466 iCenterPixel = dwNumEntries / 2 - 1;
1467 fStartOffset = (float)(-iCenterPixel);
1468
1469 if ((IS_YUV_FORMAT(srcFmt) &&
1470 dwPlane != MHW_U_PLANE &&
1471 dwPlane != MHW_V_PLANE) ||
1472 ((IS_RGB32_FORMAT(srcFmt) ||
1473 srcFmt == Format_Y410 ||
1474 srcFmt == Format_AYUV) &&
1475 dwPlane == MHW_Y_PLANE))
1476 {
1477 if (fScaleFactor < 1.0F)
1478 {
1479 fLanczosT = 4.0F;
1480 }
1481 else
1482 {
1483 fLanczosT = 8.0F;
1484 }
1485 }
1486 else // if (dwPlane == MHW_U_PLANE || dwPlane == MHW_V_PLANE || (IS_RGB_FORMAT(srcFmt) && dwPlane != MHW_V_PLANE))
1487 {
1488 fLanczosT = 2.0F;
1489 }
1490
1491 for (i = 0; i < dwHwPhase; i++)
1492 {
1493 fBase = fStartOffset - (float)i / (float)NUM_POLYPHASE_TABLES;
1494 fSumCoefs = 0.0F;
1495
1496 for (j = 0; j < dwNumEntries; j++)
1497 {
1498 fPos = fBase + (float)j;
1499
1500 if (bUse8x8Filter)
1501 {
1502 fPhaseCoefs[j] = fPhaseCoefsCopy[j] = MosUtilities::MosLanczos(fPos * fScaleFactor, dwNumEntries, fLanczosT);
1503 }
1504 else
1505 {
1506 fPhaseCoefs[j] = fPhaseCoefsCopy[j] = MosUtilities::MosLanczosG(fPos * fScaleFactor, NUM_POLYPHASE_5x5_Y_ENTRIES, fLanczosT);
1507 }
1508
1509 fSumCoefs += fPhaseCoefs[j];
1510 }
1511
1512 // Convolve with HP
1513 if (dwPlane == MHW_GENERIC_PLANE || dwPlane == MHW_Y_PLANE)
1514 {
1515 if (i <= NUM_POLYPHASE_TABLES / 2)
1516 {
1517 fHPHalfPhase = (float)i / (float)NUM_POLYPHASE_TABLES;
1518 }
1519 else
1520 {
1521 fHPHalfPhase = (float)(NUM_POLYPHASE_TABLES - i) / (float)NUM_POLYPHASE_TABLES;
1522 }
1523 fHPFilter[0] = fHPFilter[2] = -fHPStrength * MosUtilities::MosSinc(fHPHalfPhase * MOS_PI);
1524 fHPFilter[1] = 1.0F + 2.0F * fHPStrength;
1525
1526 for (j = 0; j < dwNumEntries; j++)
1527 {
1528 fHPSum = 0.0F;
1529 for (k = -1; k <= 1; k++)
1530 {
1531 if ((((long)j + k) >= 0) && (j + k < dwNumEntries))
1532 {
1533 fHPSum += fPhaseCoefsCopy[(int32_t)j + k] * fHPFilter[k + 1];
1534 }
1535 fPhaseCoefs[j] = fHPSum;
1536 }
1537 }
1538 }
1539
1540 // Normalize coefs and save
1541 iSumQuantCoefs = 0;
1542 for (j = 0; j < dwNumEntries; j++)
1543 {
1544 iCoefs[i * dwNumEntries + j] = (int32_t)floor(0.5F + (float)dwTableCoefUnit * fPhaseCoefs[j] / fSumCoefs);
1545 iSumQuantCoefs += iCoefs[i * dwNumEntries + j];
1546 }
1547
1548 // Fix center coef so that filter is balanced
1549 if (i <= NUM_POLYPHASE_TABLES / 2)
1550 {
1551 iCoefs[i * dwNumEntries + iCenterPixel] -= iSumQuantCoefs - dwTableCoefUnit;
1552 }
1553 else
1554 {
1555 iCoefs[i * dwNumEntries + iCenterPixel + 1] -= iSumQuantCoefs - dwTableCoefUnit;
1556 }
1557 }
1558
1559 finish:
1560 return eStatus;
1561 }
1562
CalcPolyphaseTablesUVOffset(int32_t * piCoefs,float fLanczosT,float fInverseScaleFactor,int32_t iUvPhaseOffset)1563 MOS_STATUS VpRenderCmdPacket::CalcPolyphaseTablesUVOffset(
1564 int32_t *piCoefs,
1565 float fLanczosT,
1566 float fInverseScaleFactor,
1567 int32_t iUvPhaseOffset)
1568 {
1569 VP_FUNC_CALL();
1570 int32_t phaseCount, tableCoefUnit, centerPixel, sumQuantCoefs;
1571 double phaseCoefs[MHW_SCALER_UV_WIN_SIZE];
1572 double startOffset, sf, pos, sumCoefs, base;
1573 int32_t minCoef[MHW_SCALER_UV_WIN_SIZE];
1574 int32_t maxCoef[MHW_SCALER_UV_WIN_SIZE];
1575 int32_t i, j;
1576 int32_t adjusted_phase;
1577 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1578
1579 MHW_FUNCTION_ENTER;
1580
1581 MHW_CHK_NULL(piCoefs);
1582
1583 phaseCount = MHW_TABLE_PHASE_COUNT;
1584 centerPixel = (MHW_SCALER_UV_WIN_SIZE / 2) - 1;
1585 startOffset = (double)(-centerPixel +
1586 (double)iUvPhaseOffset / (double)(phaseCount));
1587 tableCoefUnit = 1 << MHW_TBL_COEF_PREC;
1588
1589 MOS_ZeroMemory(minCoef, sizeof(minCoef));
1590 MOS_ZeroMemory(maxCoef, sizeof(maxCoef));
1591 MOS_ZeroMemory(piCoefs, sizeof(int32_t) * MHW_SCALER_UV_WIN_SIZE * phaseCount);
1592
1593 sf = MOS_MIN(1.0, fInverseScaleFactor); // Sf isn't used for upscaling
1594 if (sf < 1.0)
1595 {
1596 fLanczosT = 3.0;
1597 }
1598
1599 for (i = 0; i < phaseCount; ++i, piCoefs += MHW_SCALER_UV_WIN_SIZE)
1600 {
1601 // Write all
1602 // Note - to shift by a half you need to a half to each phase.
1603 base = startOffset - (double)(i) / (double)(phaseCount);
1604 sumCoefs = 0.0;
1605
1606 for (j = 0; j < MHW_SCALER_UV_WIN_SIZE; ++j)
1607 {
1608 pos = base + (double)j;
1609 phaseCoefs[j] = MosUtilities::MosLanczos((float)(pos * sf), 6 /*MHW_SCALER_UV_WIN_SIZE*/, fLanczosT);
1610 sumCoefs += phaseCoefs[j];
1611 }
1612 // Normalize coefs and save
1613 for (j = 0; j < MHW_SCALER_UV_WIN_SIZE; ++j)
1614 {
1615 piCoefs[j] = (int32_t)floor((0.5 + (double)(tableCoefUnit) * (phaseCoefs[j] / sumCoefs)));
1616
1617 // For debug purposes:
1618 minCoef[j] = MOS_MIN(minCoef[j], piCoefs[j]);
1619 maxCoef[j] = MOS_MAX(maxCoef[j], piCoefs[j]);
1620 }
1621
1622 // Recalc center coef
1623 sumQuantCoefs = 0;
1624 for (j = 0; j < MHW_SCALER_UV_WIN_SIZE; ++j)
1625 {
1626 sumQuantCoefs += piCoefs[j];
1627 }
1628
1629 // Fix center coef so that filter is balanced
1630 adjusted_phase = i - iUvPhaseOffset;
1631 if (adjusted_phase <= phaseCount / 2)
1632 {
1633 piCoefs[centerPixel] -= sumQuantCoefs - tableCoefUnit;
1634 }
1635 else // if(adjusted_phase < phaseCount)
1636 {
1637 piCoefs[centerPixel + 1] -= sumQuantCoefs - tableCoefUnit;
1638 }
1639 }
1640
1641 finish:
1642 return eStatus;
1643 }
1644
SubmitWithMultiKernel(MOS_COMMAND_BUFFER * commandBuffer,uint8_t packetPhase)1645 MOS_STATUS VpRenderCmdPacket::SubmitWithMultiKernel(MOS_COMMAND_BUFFER *commandBuffer, uint8_t packetPhase)
1646 {
1647 VP_FUNC_CALL();
1648 PMOS_INTERFACE pOsInterface = nullptr;
1649 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1650 uint32_t dwSyncTag = 0;
1651 int32_t i = 0, iRemaining = 0;
1652 MHW_MEDIA_STATE_FLUSH_PARAM FlushParam = {};
1653 bool bEnableSLM = false;
1654 RENDERHAL_GENERIC_PROLOG_PARAMS GenericPrologParams = {};
1655 MOS_RESOURCE GpuStatusBuffer = {};
1656 MOS_CONTEXT * pOsContext = nullptr;
1657 PMHW_MI_MMIOREGISTERS pMmioRegisters = nullptr;
1658 std::shared_ptr<mhw::mi::Itf> m_miItf = nullptr;
1659
1660 RENDER_PACKET_CHK_NULL_RETURN(m_renderHal);
1661 RENDER_PACKET_CHK_NULL_RETURN(m_renderHal->pRenderHalPltInterface);
1662 RENDER_PACKET_CHK_NULL_RETURN(m_renderHal->pRenderHalPltInterface->GetMmioRegisters(m_renderHal));
1663 RENDER_PACKET_CHK_NULL_RETURN(m_renderHal->pOsInterface);
1664 RENDER_PACKET_CHK_NULL_RETURN(m_renderHal->pOsInterface->pOsContext);
1665
1666 eStatus = MOS_STATUS_UNKNOWN;
1667 pOsInterface = m_renderHal->pOsInterface;
1668 iRemaining = 0;
1669 FlushParam = g_cRenderHal_InitMediaStateFlushParams;
1670 pOsContext = pOsInterface->pOsContext;
1671 pMmioRegisters = m_renderHal->pRenderHalPltInterface->GetMmioRegisters(m_renderHal);
1672
1673 RENDER_PACKET_CHK_STATUS_RETURN(SetPowerMode(kernelCombinedFc));
1674
1675 m_renderHal->pRenderHalPltInterface->On1stLevelBBStart(m_renderHal, commandBuffer, pOsContext, pOsInterface->CurrentGpuContextHandle, pMmioRegisters);
1676
1677 OcaDumpDbgInfo(*commandBuffer, *pOsContext);
1678
1679 RENDER_PACKET_CHK_STATUS_RETURN(SetMediaFrameTracking(GenericPrologParams));
1680
1681 // Initialize command buffer and insert prolog
1682 RENDER_PACKET_CHK_STATUS_RETURN(m_renderHal->pfnInitCommandBuffer(m_renderHal, commandBuffer, &GenericPrologParams));
1683
1684 RENDER_PACKET_CHK_STATUS_RETURN(m_renderHal->pRenderHalPltInterface->AddPerfCollectStartCmd(m_renderHal, pOsInterface, commandBuffer));
1685
1686 RENDER_PACKET_CHK_STATUS_RETURN(m_renderHal->pRenderHalPltInterface->StartPredicate(m_renderHal, commandBuffer));
1687
1688 // Write timing data for 3P budget
1689 RENDER_PACKET_CHK_STATUS_RETURN(m_renderHal->pfnSendTimingData(m_renderHal, commandBuffer, true));
1690
1691 bEnableSLM = false; // Media walker first
1692 RENDER_PACKET_CHK_STATUS_RETURN(m_renderHal->pfnSetCacheOverrideParams(
1693 m_renderHal,
1694 &m_renderHal->L3CacheSettings,
1695 bEnableSLM));
1696
1697 // Flush media states
1698 VP_RENDER_CHK_STATUS_RETURN(SendMediaStates(m_renderHal, commandBuffer));
1699
1700 // Write back GPU Status tag
1701 if (!pOsInterface->bEnableKmdMediaFrameTracking)
1702 {
1703 RENDER_PACKET_CHK_STATUS_RETURN(m_renderHal->pfnSendRcsStatusTag(m_renderHal, commandBuffer));
1704 }
1705
1706 RENDER_PACKET_CHK_STATUS_RETURN(m_renderHal->pRenderHalPltInterface->StopPredicate(m_renderHal, commandBuffer));
1707
1708 RENDER_PACKET_CHK_STATUS_RETURN(m_renderHal->pRenderHalPltInterface->AddPerfCollectEndCmd(m_renderHal, pOsInterface, commandBuffer));
1709
1710 // Write timing data for 3P budget
1711 RENDER_PACKET_CHK_STATUS_RETURN(m_renderHal->pfnSendTimingData(m_renderHal, commandBuffer, false));
1712
1713 MHW_PIPE_CONTROL_PARAMS PipeControlParams;
1714
1715 MOS_ZeroMemory(&PipeControlParams, sizeof(PipeControlParams));
1716 PipeControlParams.dwFlushMode = MHW_FLUSH_WRITE_CACHE;
1717 PipeControlParams.bGenericMediaStateClear = true;
1718 PipeControlParams.bIndirectStatePointersDisable = true;
1719 PipeControlParams.bDisableCSStall = false;
1720
1721 RENDER_PACKET_CHK_NULL_RETURN(pOsInterface->pfnGetSkuTable);
1722 auto *skuTable = pOsInterface->pfnGetSkuTable(pOsInterface);
1723 if (skuTable && MEDIA_IS_SKU(skuTable, FtrEnablePPCFlush))
1724 {
1725 // Add PPC fulsh
1726 PipeControlParams.bPPCFlush = true;
1727 }
1728 RENDER_PACKET_CHK_STATUS_RETURN(m_renderHal->pRenderHalPltInterface->AddMiPipeControl(m_renderHal, commandBuffer, &PipeControlParams));
1729
1730 if (MEDIA_IS_WA(m_renderHal->pWaTable, WaSendDummyVFEafterPipelineSelect))
1731 {
1732 MHW_VFE_PARAMS VfeStateParams = {};
1733 VfeStateParams.dwNumberofURBEntries = 1;
1734 RENDER_PACKET_CHK_STATUS_RETURN(m_renderHal->pRenderHalPltInterface->AddMediaVfeCmd(m_renderHal, commandBuffer, &VfeStateParams));
1735 }
1736
1737 // Add media flush command in case HW not cleaning the media state
1738 if (MEDIA_IS_WA(m_renderHal->pWaTable, WaMSFWithNoWatermarkTSGHang))
1739 {
1740 FlushParam.bFlushToGo = true;
1741 if (m_walkerType == WALKER_TYPE_MEDIA)
1742 {
1743 FlushParam.ui8InterfaceDescriptorOffset = m_mediaWalkerParams.InterfaceDescriptorOffset;
1744 }
1745 else
1746 {
1747 RENDER_PACKET_ASSERTMESSAGE("ERROR, pWalkerParams is nullptr and cannot get InterfaceDescriptorOffset.");
1748 }
1749
1750 RENDER_PACKET_CHK_STATUS_RETURN(m_renderHal->pRenderHalPltInterface->AddMediaStateFlush(m_renderHal, commandBuffer, &FlushParam));
1751 }
1752 else if (MEDIA_IS_WA(m_renderHal->pWaTable, WaAddMediaStateFlushCmd))
1753 {
1754 RENDER_PACKET_CHK_STATUS_RETURN(m_renderHal->pRenderHalPltInterface->AddMediaStateFlush(m_renderHal, commandBuffer, &FlushParam));
1755 }
1756
1757 #if (_DEBUG || _RELEASE_INTERNAL)
1758 RENDER_PACKET_CHK_STATUS_RETURN(StallBatchBuffer(commandBuffer));
1759 #endif
1760
1761 HalOcaInterfaceNext::On1stLevelBBEnd(*commandBuffer, *pOsInterface);
1762
1763 if (pBatchBuffer)
1764 {
1765 // Send Batch Buffer end command (HW/OS dependent)
1766 RENDER_PACKET_CHK_STATUS_RETURN(m_renderHal->pRenderHalPltInterface->AddMiBatchBufferEnd(m_renderHal, commandBuffer, nullptr));
1767 }
1768 else if (IsMiBBEndNeeded(pOsInterface))
1769 {
1770 // Send Batch Buffer end command for 1st level Batch Buffer
1771 RENDER_PACKET_CHK_STATUS_RETURN(m_renderHal->pRenderHalPltInterface->AddMiBatchBufferEnd(m_renderHal, commandBuffer, nullptr));
1772 }
1773 else if (m_renderHal->pOsInterface->bNoParsingAssistanceInKmd)
1774 {
1775 RENDER_PACKET_CHK_STATUS_RETURN(m_renderHal->pRenderHalPltInterface->AddMiBatchBufferEnd(m_renderHal, commandBuffer, nullptr));
1776 }
1777
1778 MOS_NULL_RENDERING_FLAGS NullRenderingFlags;
1779
1780 NullRenderingFlags =
1781 pOsInterface->pfnGetNullHWRenderFlags(pOsInterface);
1782
1783 if ((NullRenderingFlags.VPLgca ||
1784 NullRenderingFlags.VPGobal) == false)
1785 {
1786 dwSyncTag = m_renderHal->pStateHeap->dwNextTag++;
1787
1788 // Set media state and batch buffer as busy
1789 m_renderHal->pStateHeap->pCurMediaState->bBusy = true;
1790 if (pBatchBuffer)
1791 {
1792 pBatchBuffer->bBusy = true;
1793 pBatchBuffer->dwSyncTag = dwSyncTag;
1794 }
1795 }
1796
1797 return MOS_STATUS_SUCCESS;
1798 }
1799
DumpOutput()1800 MOS_STATUS VpRenderCmdPacket::DumpOutput()
1801 {
1802 VP_FUNC_CALL();
1803
1804 return MOS_STATUS_SUCCESS;
1805 }
1806
PrintWalkerParas(MHW_GPGPU_WALKER_PARAMS & WalkerParams)1807 void VpRenderCmdPacket::PrintWalkerParas(MHW_GPGPU_WALKER_PARAMS& WalkerParams)
1808 {
1809 #if (_DEBUG || _RELEASE_INTERNAL)
1810 std::string inlineData = "";
1811 if (WalkerParams.inlineDataLength > 0 && WalkerParams.inlineData != nullptr)
1812 {
1813 for (uint32_t i = 0; i < WalkerParams.inlineDataLength; ++i)
1814 {
1815 uint8_t iData = WalkerParams.inlineData[i];
1816 std::stringstream hex;
1817 hex << "0x" << std::hex << std::setfill('0') << std::setw(2) << static_cast<int>(iData) << " ";
1818 inlineData += hex.str();
1819 }
1820 }
1821 VP_RENDER_VERBOSEMESSAGE("GpGPU WalkerParams: InterfaceDescriptorOffset = %x, GpGpuEnable = %d, IndirectDataLength = %d, IndirectDataStartAddress = %x, BindingTableID %d, ForcePreferredSLMZero %d",
1822 WalkerParams.InterfaceDescriptorOffset,
1823 WalkerParams.GpGpuEnable,
1824 WalkerParams.IndirectDataLength,
1825 WalkerParams.IndirectDataStartAddress,
1826 WalkerParams.BindingTableID,
1827 WalkerParams.ForcePreferredSLMZero);
1828 VP_RENDER_VERBOSEMESSAGE("GpGPU WalkerParams: ThreadWidth = %d, ThreadHeight = %d, ThreadDepth = %d, GroupWidth = %d, GroupHeight = %d, GroupDepth = %d, GroupStartingX = %d, GroupStartingY = %d, GroupStartingZ = %d, SLMSize %d",
1829 WalkerParams.ThreadWidth,
1830 WalkerParams.ThreadHeight,
1831 WalkerParams.ThreadDepth,
1832 WalkerParams.GroupWidth,
1833 WalkerParams.GroupHeight,
1834 WalkerParams.GroupDepth,
1835 WalkerParams.GroupStartingX,
1836 WalkerParams.GroupStartingY,
1837 WalkerParams.GroupStartingZ,
1838 WalkerParams.SLMSize);
1839 VP_RENDER_VERBOSEMESSAGE("GpGPU WalkerParams: GenerateLocalId %d, EmitLocal %d, EmitInlineParameter %d, HasBarrier %d",
1840 WalkerParams.isGenerateLocalID,
1841 WalkerParams.emitLocal,
1842 WalkerParams.isEmitInlineParameter,
1843 WalkerParams.hasBarrier);
1844 VP_RENDER_VERBOSEMESSAGE("GpGPU WalkerParams: InlineDataLength = %d, InlineData = %s",
1845 WalkerParams.inlineDataLength,
1846 inlineData.c_str());
1847 #endif
1848 }
1849
PrintWalkerParas(MHW_WALKER_PARAMS & WalkerParams)1850 void VpRenderCmdPacket::PrintWalkerParas(MHW_WALKER_PARAMS &WalkerParams)
1851 {
1852 #if (_DEBUG || _RELEASE_INTERNAL)
1853 VP_RENDER_VERBOSEMESSAGE("WalkerParams: InterfaceDescriptorOffset = %x, CmWalkerEnable = %x, ColorCountMinusOne = %x, UseScoreboard = %x, ScoreboardMask = %x, MidLoopUnitX = %x, MidLoopUnitY = %x, MiddleLoopExtraSteps = %x",
1854 WalkerParams.InterfaceDescriptorOffset,
1855 WalkerParams.CmWalkerEnable,
1856 WalkerParams.ColorCountMinusOne,
1857 WalkerParams.UseScoreboard,
1858 WalkerParams.ScoreboardMask,
1859 WalkerParams.MidLoopUnitX,
1860 WalkerParams.MidLoopUnitY,
1861 WalkerParams.MiddleLoopExtraSteps);
1862 VP_RENDER_VERBOSEMESSAGE("WalkerParams: GroupIdLoopSelect = %x, InlineDataLength = %x, pInlineData = %x, dwLocalLoopExecCount = %x, dwGlobalLoopExecCount = %x, WalkerMode = %x, BlockResolution = %x, LocalStart = %x",
1863 WalkerParams.GroupIdLoopSelect,
1864 WalkerParams.InlineDataLength,
1865 WalkerParams.pInlineData,
1866 WalkerParams.dwLocalLoopExecCount,
1867 WalkerParams.dwGlobalLoopExecCount,
1868 WalkerParams.WalkerMode,
1869 WalkerParams.BlockResolution,
1870 WalkerParams.LocalStart);
1871 VP_RENDER_VERBOSEMESSAGE("WalkerParams: LocalEnd = %x, LocalOutLoopStride = %x, LocalInnerLoopUnit = %x, GlobalResolution = %x, GlobalStart = %x, GlobalOutlerLoopStride = %x, GlobalInnerLoopUnit = %x, bAddMediaFlush = %x, bRequestSingleSlice = %x, IndirectDataLength = %x, IndirectDataStartAddress = %x",
1872 WalkerParams.LocalEnd,
1873 WalkerParams.LocalOutLoopStride,
1874 WalkerParams.LocalInnerLoopUnit,
1875 WalkerParams.GlobalResolution,
1876 WalkerParams.GlobalStart,
1877 WalkerParams.GlobalOutlerLoopStride,
1878 WalkerParams.GlobalInnerLoopUnit,
1879 WalkerParams.bAddMediaFlush,
1880 WalkerParams.bRequestSingleSlice,
1881 WalkerParams.IndirectDataLength,
1882 WalkerParams.IndirectDataStartAddress);
1883 #endif
1884 }
1885
SendMediaStates(PRENDERHAL_INTERFACE pRenderHal,PMOS_COMMAND_BUFFER pCmdBuffer)1886 MOS_STATUS VpRenderCmdPacket::SendMediaStates(
1887 PRENDERHAL_INTERFACE pRenderHal,
1888 PMOS_COMMAND_BUFFER pCmdBuffer)
1889 {
1890 VP_FUNC_CALL();
1891 PMOS_INTERFACE pOsInterface = nullptr;
1892 PRENDERHAL_STATE_HEAP pStateHeap = nullptr;
1893 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1894 MHW_VFE_PARAMS * pVfeStateParams = nullptr;
1895 MOS_CONTEXT * pOsContext = nullptr;
1896 MHW_MI_LOAD_REGISTER_IMM_PARAMS loadRegisterImmParams = {};
1897 PMHW_MI_MMIOREGISTERS pMmioRegisters = nullptr;
1898 MOS_OCA_BUFFER_HANDLE hOcaBuf = 0;
1899 bool flushL1 = false;
1900 //---------------------------------------
1901 MHW_RENDERHAL_CHK_NULL(pRenderHal);
1902 MHW_RENDERHAL_CHK_NULL(pRenderHal->pStateHeap);
1903 MHW_RENDERHAL_CHK_NULL(pRenderHal->pRenderHalPltInterface);
1904 MHW_RENDERHAL_ASSERT(pRenderHal->pStateHeap->bGshLocked);
1905 RENDER_PACKET_CHK_NULL_RETURN(pRenderHal->pRenderHalPltInterface);
1906 RENDER_PACKET_CHK_NULL_RETURN(pRenderHal->pRenderHalPltInterface->GetMmioRegisters(pRenderHal));
1907
1908 //---------------------------------------
1909 pOsInterface = pRenderHal->pOsInterface;
1910 pStateHeap = pRenderHal->pStateHeap;
1911 pOsContext = pOsInterface->pOsContext;
1912 pMmioRegisters = pRenderHal->pRenderHalPltInterface->GetMmioRegisters(pRenderHal);
1913
1914 // Setup L3$ Config, LRI commands used here & hence must be launched from a secure bb
1915 pRenderHal->L3CacheSettings.bEnableSLM = (m_walkerType == WALKER_TYPE_COMPUTE && m_slmSize > 0) ? true : false;
1916 MHW_RENDERHAL_CHK_STATUS(pRenderHal->pfnEnableL3Caching(pRenderHal, &pRenderHal->L3CacheSettings));
1917
1918 // Send L3 Cache Configuration
1919 MHW_RENDERHAL_CHK_STATUS(pRenderHal->pRenderHalPltInterface->SetL3Cache(pRenderHal, pCmdBuffer));
1920
1921 MHW_RENDERHAL_CHK_STATUS(pRenderHal->pRenderHalPltInterface->EnablePreemption(pRenderHal, pCmdBuffer));
1922
1923 // Send Pipeline Select command
1924 MHW_RENDERHAL_CHK_STATUS(pRenderHal->pRenderHalPltInterface->AddPipelineSelectCmd(pRenderHal, pCmdBuffer, (m_walkerType == WALKER_TYPE_COMPUTE) ? true : false));
1925
1926 // The binding table for surface states is at end of command buffer. No need to add it to indirect state heap.
1927 HalOcaInterfaceNext::OnIndirectState(*pCmdBuffer, (MOS_CONTEXT_HANDLE)pOsContext, pRenderHal->StateBaseAddressParams.presInstructionBuffer, pStateHeap->CurIDEntryParams.dwKernelOffset, false, pStateHeap->iKernelUsedForDump);
1928
1929 // Send State Base Address command
1930 MHW_RENDERHAL_CHK_STATUS(pRenderHal->pfnSendStateBaseAddress(pRenderHal, pCmdBuffer));
1931
1932 if (pRenderHal->bComputeContextInUse && !pRenderHal->isBindlessHeapInUse)
1933 {
1934 pRenderHal->pRenderHalPltInterface->SendTo3DStateBindingTablePoolAlloc(pRenderHal, pCmdBuffer);
1935 }
1936
1937 // Send Surface States
1938 MHW_RENDERHAL_CHK_STATUS(pRenderHal->pfnSendSurfaces(pRenderHal, pCmdBuffer));
1939
1940 // Send SIP State if ASM debug enabled
1941 if (pRenderHal->bIsaAsmDebugEnable)
1942 {
1943 MHW_RENDERHAL_CHK_STATUS(pRenderHal->pRenderHalPltInterface->AddSipStateCmd(pRenderHal, pCmdBuffer));
1944 }
1945
1946 pVfeStateParams = pRenderHal->pRenderHalPltInterface->GetVfeStateParameters();
1947 if (!pRenderHal->bComputeContextInUse)
1948 {
1949 // set VFE State
1950 MHW_RENDERHAL_CHK_STATUS(pRenderHal->pRenderHalPltInterface->AddMediaVfeCmd(pRenderHal, pCmdBuffer, pVfeStateParams));
1951 }
1952 else
1953 {
1954 if (!pRenderHal->isBindlessHeapInUse)
1955 {
1956 // set CFE State
1957 MHW_RENDERHAL_CHK_STATUS(pRenderHal->pRenderHalPltInterface->AddCfeStateCmd(pRenderHal, pCmdBuffer, pVfeStateParams));
1958 }
1959 }
1960
1961 // Send CURBE Load
1962 if (!pRenderHal->bComputeContextInUse)
1963 {
1964 MHW_RENDERHAL_CHK_STATUS(pRenderHal->pfnSendCurbeLoad(pRenderHal, pCmdBuffer));
1965 }
1966
1967 // Send Interface Descriptor Load
1968 if (!pRenderHal->bComputeContextInUse)
1969 {
1970 MHW_RENDERHAL_CHK_STATUS(pRenderHal->pfnSendMediaIdLoad(pRenderHal, pCmdBuffer));
1971 }
1972
1973 // Send Chroma Keys
1974 MHW_RENDERHAL_CHK_STATUS(pRenderHal->pfnSendChromaKey(pRenderHal, pCmdBuffer));
1975
1976 // Send Palettes in use
1977 MHW_RENDERHAL_CHK_STATUS(pRenderHal->pfnSendPalette(pRenderHal, pCmdBuffer));
1978
1979 pRenderHal->pRenderHalPltInterface->OnDispatch(pRenderHal, pCmdBuffer, pOsInterface, pMmioRegisters);
1980
1981 for (uint32_t kernelIndex = 0; kernelIndex < m_kernelRenderData.size(); kernelIndex++)
1982 {
1983 auto it = m_kernelRenderData.find(kernelIndex);
1984 if (it == m_kernelRenderData.end())
1985 {
1986 eStatus = MOS_STATUS_INVALID_PARAMETER;
1987 goto finish;
1988 }
1989
1990 if (kernelIndex > 0 && it->second.walkerParam.bSyncFlag)
1991 {
1992 MHW_PIPE_CONTROL_PARAMS pipeCtlParams = g_cRenderHal_InitPipeControlParams;
1993 pipeCtlParams.dwPostSyncOp = MHW_FLUSH_NOWRITE;
1994 pipeCtlParams.dwFlushMode = MHW_FLUSH_CUSTOM;
1995 pipeCtlParams.bInvalidateTextureCache = true;
1996 pipeCtlParams.bFlushRenderTargetCache = true;
1997
1998 if (it->second.walkerParam.pipeControlParams.bUpdateNeeded)
1999 {
2000 pipeCtlParams.bHdcPipelineFlush = it->second.walkerParam.pipeControlParams.bEnableDataPortFlush;
2001 pipeCtlParams.bUnTypedDataPortCacheFlush = it->second.walkerParam.pipeControlParams.bUnTypedDataPortCacheFlush;
2002 pipeCtlParams.bFlushRenderTargetCache = it->second.walkerParam.pipeControlParams.bFlushRenderTargetCache;
2003 pipeCtlParams.bInvalidateTextureCache = it->second.walkerParam.pipeControlParams.bInvalidateTextureCache;
2004 }
2005
2006 if (flushL1)
2007 { //Flush L1 cache after consumer walker when there is a producer-consumer relationship walker.
2008 pipeCtlParams.bUnTypedDataPortCacheFlush = true;
2009 pipeCtlParams.bHdcPipelineFlush = true;
2010 }
2011 MHW_RENDERHAL_CHK_STATUS(pRenderHal->pRenderHalPltInterface->AddMiPipeControl(pRenderHal,
2012 pCmdBuffer,
2013 &pipeCtlParams));
2014 }
2015
2016 if (m_walkerType == WALKER_TYPE_MEDIA)
2017 {
2018 MOS_ZeroMemory(&m_mediaWalkerParams, sizeof(m_mediaWalkerParams));
2019
2020 MHW_RENDERHAL_CHK_STATUS(PrepareMediaWalkerParams(it->second.walkerParam, m_mediaWalkerParams));
2021
2022 MHW_RENDERHAL_CHK_STATUS(pRenderHal->pRenderHalPltInterface->AddMediaObjectWalkerCmd(pRenderHal,
2023 pCmdBuffer,
2024 &m_mediaWalkerParams));
2025
2026 PrintWalkerParas(m_mediaWalkerParams);
2027 }
2028 else if (m_walkerType == WALKER_TYPE_COMPUTE)
2029 {
2030 MOS_ZeroMemory(&m_gpgpuWalkerParams, sizeof(m_gpgpuWalkerParams));
2031
2032 MHW_RENDERHAL_CHK_STATUS(PrepareComputeWalkerParams(it->second.walkerParam, m_gpgpuWalkerParams));
2033
2034 if (m_submissionMode == MULTI_KERNELS_SINGLE_MEDIA_STATE)
2035 {
2036 pRenderHal->iKernelAllocationID = it->second.kernelAllocationID;
2037 }
2038
2039 MHW_RENDERHAL_CHK_STATUS(pRenderHal->pRenderHalPltInterface->SendComputeWalker(
2040 pRenderHal,
2041 pCmdBuffer,
2042 &m_gpgpuWalkerParams));
2043
2044 flushL1 = it->second.walkerParam.bFlushL1;
2045 PrintWalkerParas(m_gpgpuWalkerParams);
2046 }
2047 else
2048 {
2049 eStatus = MOS_STATUS_UNIMPLEMENTED;
2050 goto finish;
2051 }
2052 }
2053
2054 // This need not be secure, since PPGTT will be used here. But moving this after
2055 // L3 cache configuration will delay UMD from fetching another media state.
2056 // Send Sync Tag
2057 MHW_RENDERHAL_CHK_STATUS(pRenderHal->pfnSendSyncTag(pRenderHal, pCmdBuffer));
2058
2059 m_kernelRenderData.clear();
2060
2061 finish:
2062 return eStatus;
2063 }
2064
SetFcParams(PRENDER_FC_PARAMS params)2065 MOS_STATUS VpRenderCmdPacket::SetFcParams(PRENDER_FC_PARAMS params)
2066 {
2067 VP_FUNC_CALL();
2068 VP_RENDER_CHK_NULL_RETURN(params);
2069
2070 m_kernelConfigs.insert(std::make_pair(params->kernelId, (void *)params));
2071
2072 KERNEL_PARAMS kernelParams = {};
2073 kernelParams.kernelId = params->kernelId;
2074 m_renderKernelParams.push_back(kernelParams);
2075 m_isMultiBindingTables = false;
2076 m_submissionMode = SINGLE_KERNEL_ONLY;
2077 return MOS_STATUS_SUCCESS;
2078 }
2079
SetL0FcParams(PRENDER_L0_FC_PARAMS params)2080 MOS_STATUS VpRenderCmdPacket::SetL0FcParams(PRENDER_L0_FC_PARAMS params)
2081 {
2082 VP_FUNC_CALL();
2083 VP_RENDER_CHK_NULL_RETURN(params);
2084
2085 KERNEL_PARAMS kernelParam = {};
2086 for (auto &krnParams : params->fc_kernelParams)
2087 {
2088 kernelParam.kernelId = krnParams.kernelId;
2089 kernelParam.kernelArgs = krnParams.kernelArgs;
2090 kernelParam.kernelThreadSpace.uWidth = krnParams.threadWidth;
2091 kernelParam.kernelThreadSpace.uHeight = krnParams.threadHeight;
2092 kernelParam.kernelThreadSpace.uLocalWidth = krnParams.localWidth;
2093 kernelParam.kernelThreadSpace.uLocalHeight = krnParams.localHeight;
2094 kernelParam.syncFlag = true;
2095 kernelParam.kernelStatefulSurfaces = krnParams.kernelStatefulSurfaces;
2096
2097 m_renderKernelParams.push_back(kernelParam);
2098
2099 m_kernelConfigs.insert(std::make_pair(krnParams.kernelId, (void *)(&krnParams.kernelConfig)));
2100 }
2101
2102 m_submissionMode = MULTI_KERNELS_SINGLE_MEDIA_STATE;
2103 m_isMultiBindingTables = true;
2104 m_isLargeSurfaceStateNeeded = true;
2105 return MOS_STATUS_SUCCESS;
2106 }
2107
SetHdr3DLutParams(PRENDER_HDR_3DLUT_CAL_PARAMS params)2108 MOS_STATUS VpRenderCmdPacket::SetHdr3DLutParams(
2109 PRENDER_HDR_3DLUT_CAL_PARAMS params)
2110 {
2111 VP_FUNC_CALL();
2112 VP_RENDER_CHK_NULL_RETURN(params);
2113
2114 m_kernelConfigs.insert(std::make_pair(params->kernelId, (void *)params));
2115
2116 KERNEL_PARAMS kernelParams = {};
2117 kernelParams.kernelId = params->kernelId;
2118 // kernelArgs will be initialized in VpRenderHdr3DLutKernel::Init with
2119 // kernel.GetKernelArgs().
2120 kernelParams.kernelThreadSpace.uWidth = params->threadWidth;
2121 kernelParams.kernelThreadSpace.uHeight = params->threadHeight;
2122 kernelParams.kernelThreadSpace.uLocalWidth = params->localWidth;
2123 kernelParams.kernelThreadSpace.uLocalHeight = params->localHeight;
2124 kernelParams.kernelArgs = params->kernelArgs;
2125 kernelParams.syncFlag = true;
2126 m_renderKernelParams.push_back(kernelParams);
2127
2128 return MOS_STATUS_SUCCESS;
2129 }
2130
MHW_SETPAR_DECL_SRC(PIPE_CONTROL,VpRenderCmdPacket)2131 MHW_SETPAR_DECL_SRC(PIPE_CONTROL, VpRenderCmdPacket)
2132 {
2133 MOS_ZeroMemory(¶ms, sizeof(params));
2134 params.dwFlushMode = MHW_FLUSH_WRITE_CACHE;
2135 params.bGenericMediaStateClear = true;
2136 params.bIndirectStatePointersDisable = true;
2137 params.bDisableCSStall = false;
2138
2139 RENDER_PACKET_CHK_NULL_RETURN(m_osInterface);
2140 RENDER_PACKET_CHK_NULL_RETURN(m_osInterface->pfnGetSkuTable);
2141 auto *skuTable = m_osInterface->pfnGetSkuTable(m_osInterface);
2142 if (skuTable && MEDIA_IS_SKU(skuTable, FtrEnablePPCFlush))
2143 {
2144 // Add PPC fulsh
2145 params.bPPCFlush = true;
2146 }
2147
2148 return MOS_STATUS_SUCCESS;
2149 }
2150
SetDnHVSParams(PRENDER_DN_HVS_CAL_PARAMS params)2151 MOS_STATUS VpRenderCmdPacket::SetDnHVSParams(
2152 PRENDER_DN_HVS_CAL_PARAMS params)
2153 {
2154 VP_FUNC_CALL();
2155 VP_RENDER_CHK_NULL_RETURN(params);
2156
2157 m_kernelConfigs.insert(std::make_pair(params->kernelId, (void *)params));
2158
2159 KERNEL_PARAMS kernelParams = {};
2160 kernelParams.kernelId = params->kernelId;
2161 // kernelArgs will be initialized in VpRenderHVSKernel::Init with
2162 // kernel.GetKernelArgs().
2163 kernelParams.kernelThreadSpace.uWidth = params->threadWidth;
2164 kernelParams.kernelThreadSpace.uHeight = params->threadHeight;
2165 kernelParams.kernelArgs = params->kernelArgs;
2166 kernelParams.syncFlag = true;
2167 m_renderKernelParams.push_back(kernelParams);
2168
2169 return MOS_STATUS_SUCCESS;
2170 }
2171
SetHdrParams(PRENDER_HDR_PARAMS params)2172 MOS_STATUS VpRenderCmdPacket::SetHdrParams(PRENDER_HDR_PARAMS params)
2173 {
2174 VP_FUNC_CALL();
2175 VP_RENDER_CHK_NULL_RETURN(params);
2176
2177 KERNEL_PARAMS kernelParams = {};
2178 VP_SURFACE *surf = GetSurface(SurfaceTypeHdrInputLayer0);
2179 PMHW_SAMPLER_STATE_PARAM pSamplerStateParams = nullptr;
2180 uint32_t i = 0;
2181
2182 params->coeffAllocated = m_surfSetting.coeffAllocated;
2183 params->OETF1DLUTAllocated = m_surfSetting.OETF1DLUTAllocated;
2184 params->Cri3DLUTAllocated = m_surfSetting.Cri3DLUTAllocated;
2185 params->pHDRStageConfigTable = m_surfSetting.pHDRStageConfigTable;
2186
2187 //MOS_SURFACE *surface = surf->osSurface;
2188 params->dwSurfaceHeight = surf->rcSrc.bottom - surf->rcSrc.top;
2189 params->dwSurfaceWidth = surf->rcSrc.right - surf->rcSrc.left;
2190
2191 for (i = 0; i < 16; i++)
2192 {
2193 MHW_SAMPLER_STATE_PARAM samplerStateParam = {};
2194 MOS_ZeroMemory(&samplerStateParam, sizeof(samplerStateParam));
2195
2196 pSamplerStateParams = &samplerStateParam;
2197
2198 switch (i)
2199 {
2200 case 13:
2201 pSamplerStateParams->bInUse = true;
2202 pSamplerStateParams->SamplerType = MHW_SAMPLER_TYPE_3D;
2203 pSamplerStateParams->Unorm.SamplerFilterMode = MHW_SAMPLER_FILTER_NEAREST;
2204 pSamplerStateParams->Unorm.AddressU = MHW_GFX3DSTATE_TEXCOORDMODE_CLAMP;
2205 pSamplerStateParams->Unorm.AddressV = MHW_GFX3DSTATE_TEXCOORDMODE_CLAMP;
2206 pSamplerStateParams->Unorm.AddressW = MHW_GFX3DSTATE_TEXCOORDMODE_CLAMP;
2207 break;
2208 case 14:
2209 pSamplerStateParams->bInUse = true;
2210 pSamplerStateParams->SamplerType = MHW_SAMPLER_TYPE_3D;
2211 pSamplerStateParams->Unorm.SamplerFilterMode = MHW_SAMPLER_FILTER_BILINEAR;
2212 pSamplerStateParams->Unorm.AddressU = MHW_GFX3DSTATE_TEXCOORDMODE_CLAMP;
2213 pSamplerStateParams->Unorm.AddressV = MHW_GFX3DSTATE_TEXCOORDMODE_CLAMP;
2214 pSamplerStateParams->Unorm.AddressW = MHW_GFX3DSTATE_TEXCOORDMODE_CLAMP;
2215 break;
2216 default:
2217 break;
2218 }
2219 m_kernelSamplerStateGroup.insert(std::make_pair(i, samplerStateParam));
2220 }
2221
2222 m_kernelConfigs.insert(std::make_pair(params->kernelId, (void *)params));
2223
2224 kernelParams.kernelId = params->kernelId;
2225 kernelParams.kernelThreadSpace.uWidth = params->threadWidth;
2226 kernelParams.kernelThreadSpace.uHeight = params->threadHeight;
2227 kernelParams.syncFlag = true;
2228 m_renderKernelParams.push_back(kernelParams);
2229
2230 return MOS_STATUS_SUCCESS;
2231
2232 }
2233
2234
2235 } // namespace vp
2236