1 /*
2 * Copyright (c) 2017-2020, Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22 //!
23 //! \file cm_hal_g11.cpp
24 //! \brief Common HAL CM Gen11 functions
25 //!
26
27 #include "cm_hal_g11.h"
28 #include "mhw_render_hwcmd_g11_X.h"
29 #include "mhw_state_heap_hwcmd_g11_X.h"
30 #include "mhw_render_g11_X.h"
31 #include "mhw_utilities.h"
32 #include "cm_def.h"
33 #include "renderhal_platform_interface.h"
34 #include "hal_oca_interface.h"
35 #include "mhw_mmio_g11.h"
36 #if defined(ENABLE_KERNELS) && (!defined(_FULL_OPEN_SOURCE))
37 #include "cm_gpucopy_kernel_g11lp.h"
38 #include "cm_gpuinit_kernel_g11lp.h"
39 #else
40 unsigned int iGPUCopy_kernel_isa_size_gen11lp = 0;
41 unsigned int iGPUInit_kernel_isa_size_gen11lp = 0;
42 unsigned char *pGPUCopy_kernel_isa_gen11lp = nullptr;
43 unsigned char *pGPUInit_kernel_isa_gen11lp = nullptr;
44 #endif
45
46 // Gen11 Surface state tokenized commands - a SURFACE_STATE_G11 command and
47 // a surface state command, either SURFACE_STATE_G11 or SURFACE_STATE_ADV_G11
48 struct PACKET_SURFACE_STATE
49 {
50 SURFACE_STATE_TOKEN_COMMON token;
51 union
52 {
53 mhw_state_heap_g11_X::RENDER_SURFACE_STATE_CMD cmdSurfaceState;
54 mhw_state_heap_g11_X::MEDIA_SURFACE_STATE_CMD cmdSurfaceStateAdv;
55 };
56 };
57
GetCopyKernelIsa(void * & isa,uint32_t & isaSize)58 MOS_STATUS CM_HAL_G11_X::GetCopyKernelIsa(void *&isa, uint32_t &isaSize)
59 {
60 isa = ( void *)pGPUCopy_kernel_isa_gen11lp;
61 isaSize = iGPUCopy_kernel_isa_size_gen11lp;
62
63 return MOS_STATUS_SUCCESS;
64 }
65
GetInitKernelIsa(void * & isa,uint32_t & isaSize)66 MOS_STATUS CM_HAL_G11_X::GetInitKernelIsa(void *&isa, uint32_t &isaSize)
67 {
68 isa = (void *)pGPUInit_kernel_isa_gen11lp;
69 isaSize = iGPUInit_kernel_isa_size_gen11lp;
70
71 return MOS_STATUS_SUCCESS;
72 }
73
SetMediaWalkerParams(CM_WALKING_PARAMETERS engineeringParams,PCM_HAL_WALKER_PARAMS walkerParams)74 MOS_STATUS CM_HAL_G11_X::SetMediaWalkerParams(
75 CM_WALKING_PARAMETERS engineeringParams,
76 PCM_HAL_WALKER_PARAMS walkerParams)
77 {
78 mhw_render_g11_X::MEDIA_OBJECT_WALKER_CMD mediaWalkerCmd;
79 mediaWalkerCmd.DW5.Value = engineeringParams.Value[0];
80
81 mediaWalkerCmd.DW6.Value = engineeringParams.Value[1];
82 walkerParams->colorCountMinusOne = mediaWalkerCmd.DW6.ColorCountMinusOne;
83 walkerParams->midLoopUnitX = mediaWalkerCmd.DW6.MidLoopUnitX;
84 walkerParams->midLoopUnitY = mediaWalkerCmd.DW6.LocalMidLoopUnitY;
85 walkerParams->middleLoopExtraSteps = mediaWalkerCmd.DW6.MiddleLoopExtraSteps;
86
87 mediaWalkerCmd.DW7.Value = engineeringParams.Value[2];
88 walkerParams->localLoopExecCount = mediaWalkerCmd.DW7.LocalLoopExecCount;
89 walkerParams->globalLoopExecCount = mediaWalkerCmd.DW7.GlobalLoopExecCount;
90
91 mediaWalkerCmd.DW8.Value = engineeringParams.Value[3];
92 walkerParams->blockResolution.x = mediaWalkerCmd.DW8.BlockResolutionX;
93 walkerParams->blockResolution.y = mediaWalkerCmd.DW8.BlockResolutionY;
94
95 mediaWalkerCmd.DW9.Value = engineeringParams.Value[4];
96 walkerParams->localStart.x = mediaWalkerCmd.DW9.LocalStartX;
97 walkerParams->localStart.y = mediaWalkerCmd.DW9.LocalStartY;
98
99 mediaWalkerCmd.DW11.Value = engineeringParams.Value[6];
100 walkerParams->localOutLoopStride.x = mediaWalkerCmd.DW11.LocalOuterLoopStrideX;
101 walkerParams->localOutLoopStride.y = mediaWalkerCmd.DW11.LocalOuterLoopStrideY;
102
103 mediaWalkerCmd.DW12.Value = engineeringParams.Value[7];
104 walkerParams->localInnerLoopUnit.x = mediaWalkerCmd.DW12.LocalInnerLoopUnitX;
105 walkerParams->localInnerLoopUnit.y = mediaWalkerCmd.DW12.LocalInnerLoopUnitY;
106
107 mediaWalkerCmd.DW13.Value = engineeringParams.Value[8];
108 walkerParams->globalResolution.x = mediaWalkerCmd.DW13.GlobalResolutionX;
109 walkerParams->globalResolution.y = mediaWalkerCmd.DW13.GlobalResolutionY;
110
111 mediaWalkerCmd.DW14.Value = engineeringParams.Value[9];
112 walkerParams->globalStart.x = mediaWalkerCmd.DW14.GlobalStartX;
113 walkerParams->globalStart.y = mediaWalkerCmd.DW14.GlobalStartY;
114
115 mediaWalkerCmd.DW15.Value = engineeringParams.Value[10];
116 walkerParams->globalOutlerLoopStride.x = mediaWalkerCmd.DW15.GlobalOuterLoopStrideX;
117 walkerParams->globalOutlerLoopStride.y = mediaWalkerCmd.DW15.GlobalOuterLoopStrideY;
118
119 mediaWalkerCmd.DW16.Value = engineeringParams.Value[11];
120 walkerParams->globalInnerLoopUnit.x = mediaWalkerCmd.DW16.GlobalInnerLoopUnitX;
121 walkerParams->globalInnerLoopUnit.y = mediaWalkerCmd.DW16.GlobalInnerLoopUnitY;
122
123 walkerParams->localEnd.x = 0;
124 walkerParams->localEnd.y = 0;
125
126 return MOS_STATUS_SUCCESS;
127 }
128
HwSetSurfaceMemoryObjectControl(uint16_t memObjCtl,PRENDERHAL_SURFACE_STATE_PARAMS surfStateParams)129 MOS_STATUS CM_HAL_G11_X::HwSetSurfaceMemoryObjectControl(
130 uint16_t memObjCtl,
131 PRENDERHAL_SURFACE_STATE_PARAMS surfStateParams)
132 {
133 PRENDERHAL_INTERFACE renderHal = m_cmState->renderHal;
134 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
135 MOS_HW_RESOURCE_DEF mosUsage;
136 // The memory object control uint16_t is composed with cache type(8:15), memory type(4:7), ages(0:3)
137 mosUsage = (MOS_HW_RESOURCE_DEF)((memObjCtl & CM_MEMOBJCTL_CACHE_MASK) >> 8);
138 if (mosUsage >= MOS_HW_RESOURCE_DEF_MAX)
139 mosUsage = GetDefaultMOCS();
140
141 surfStateParams->MemObjCtl = renderHal->pOsInterface->pfnCachePolicyGetMemoryObject(mosUsage,
142 renderHal->pOsInterface->pfnGetGmmClientContext(renderHal->pOsInterface)).DwordValue;
143
144 return eStatus;
145 }
146
147
RegisterSampler8x8AVSTable(PCM_HAL_SAMPLER_8X8_TABLE sampler8x8AvsTable,PCM_AVS_TABLE_STATE_PARAMS avsTable)148 MOS_STATUS CM_HAL_G11_X::RegisterSampler8x8AVSTable(
149 PCM_HAL_SAMPLER_8X8_TABLE sampler8x8AvsTable,
150 PCM_AVS_TABLE_STATE_PARAMS avsTable)
151 {
152 MOS_ZeroMemory(&sampler8x8AvsTable->mhwSamplerAvsTableParam, sizeof(sampler8x8AvsTable->mhwSamplerAvsTableParam));
153
154 sampler8x8AvsTable->mhwSamplerAvsTableParam.byteTransitionArea8Pixels = MEDIASTATE_AVS_TRANSITION_AREA_8_PIXELS;
155 sampler8x8AvsTable->mhwSamplerAvsTableParam.byteTransitionArea4Pixels = MEDIASTATE_AVS_TRANSITION_AREA_4_PIXELS;
156 sampler8x8AvsTable->mhwSamplerAvsTableParam.byteMaxDerivative8Pixels = MEDIASTATE_AVS_MAX_DERIVATIVE_8_PIXELS;
157 sampler8x8AvsTable->mhwSamplerAvsTableParam.byteMaxDerivative4Pixels = MEDIASTATE_AVS_MAX_DERIVATIVE_4_PIXELS;
158
159 sampler8x8AvsTable->mhwSamplerAvsTableParam.bEnableRGBAdaptive = avsTable->enableRgbAdaptive;
160 sampler8x8AvsTable->mhwSamplerAvsTableParam.bAdaptiveFilterAllChannels = avsTable->adaptiveFilterAllChannels;
161
162 // Assign the coefficient table;
163 for (uint32_t i = 0; i < CM_NUM_HW_POLYPHASE_TABLES_G11; i++)
164 {
165 sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParam[i].ZeroXFilterCoefficient[0] = (uint8_t)avsTable->tbl0X[i].FilterCoeff_0_0;
166 sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParam[i].ZeroXFilterCoefficient[1] = (uint8_t)avsTable->tbl0X[i].FilterCoeff_0_1;
167
168 sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParam[i].ZeroXFilterCoefficient[2] = (uint8_t)avsTable->tbl0X[i].FilterCoeff_0_2;
169 sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParam[i].ZeroXFilterCoefficient[3] = (uint8_t)avsTable->tbl0X[i].FilterCoeff_0_3;
170
171 sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParam[i].ZeroXFilterCoefficient[4] = (uint8_t)avsTable->tbl0X[i].FilterCoeff_0_4;
172 sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParam[i].ZeroXFilterCoefficient[5] = (uint8_t)avsTable->tbl0X[i].FilterCoeff_0_5;
173
174 sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParam[i].ZeroXFilterCoefficient[6] = (uint8_t)avsTable->tbl0X[i].FilterCoeff_0_6;
175 sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParam[i].ZeroXFilterCoefficient[7] = (uint8_t)avsTable->tbl0X[i].FilterCoeff_0_7;
176
177 sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParam[i].ZeroYFilterCoefficient[0] = (uint8_t)avsTable->tbl0Y[i].FilterCoeff_0_0;
178 sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParam[i].ZeroYFilterCoefficient[1] = (uint8_t)avsTable->tbl0Y[i].FilterCoeff_0_1;
179
180 sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParam[i].ZeroYFilterCoefficient[2] = (uint8_t)avsTable->tbl0Y[i].FilterCoeff_0_2;
181 sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParam[i].ZeroYFilterCoefficient[3] = (uint8_t)avsTable->tbl0Y[i].FilterCoeff_0_3;
182
183 sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParam[i].ZeroYFilterCoefficient[4] = (uint8_t)avsTable->tbl0Y[i].FilterCoeff_0_4;
184 sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParam[i].ZeroYFilterCoefficient[5] = (uint8_t)avsTable->tbl0Y[i].FilterCoeff_0_5;
185
186 sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParam[i].ZeroYFilterCoefficient[6] = (uint8_t)avsTable->tbl0Y[i].FilterCoeff_0_6;
187 sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParam[i].ZeroYFilterCoefficient[7] = (uint8_t)avsTable->tbl0Y[i].FilterCoeff_0_7;
188
189 sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParam[i].OneXFilterCoefficient[0] = (uint8_t)avsTable->tbl1X[i].FilterCoeff_0_2;
190 sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParam[i].OneXFilterCoefficient[1] = (uint8_t)avsTable->tbl1X[i].FilterCoeff_0_3;
191 sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParam[i].OneXFilterCoefficient[2] = (uint8_t)avsTable->tbl1X[i].FilterCoeff_0_4;
192 sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParam[i].OneXFilterCoefficient[3] = (uint8_t)avsTable->tbl1X[i].FilterCoeff_0_5;
193
194 sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParam[i].OneYFilterCoefficient[0] = (uint8_t)avsTable->tbl1Y[i].FilterCoeff_0_2;
195 sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParam[i].OneYFilterCoefficient[1] = (uint8_t)avsTable->tbl1Y[i].FilterCoeff_0_3;
196 sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParam[i].OneYFilterCoefficient[2] = (uint8_t)avsTable->tbl1Y[i].FilterCoeff_0_4;
197 sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParam[i].OneYFilterCoefficient[3] = (uint8_t)avsTable->tbl1Y[i].FilterCoeff_0_5;
198 }
199
200 sampler8x8AvsTable->mhwSamplerAvsTableParam.byteDefaultSharpnessLevel = avsTable->defaultSharpLevel;
201 sampler8x8AvsTable->mhwSamplerAvsTableParam.bBypassXAdaptiveFiltering = avsTable->bypassXAF;
202 sampler8x8AvsTable->mhwSamplerAvsTableParam.bBypassYAdaptiveFiltering = avsTable->bypassYAF;
203
204 if (!avsTable->bypassXAF && !avsTable->bypassYAF)
205 {
206 sampler8x8AvsTable->mhwSamplerAvsTableParam.byteMaxDerivative8Pixels = avsTable->maxDerivative8Pixels;
207 sampler8x8AvsTable->mhwSamplerAvsTableParam.byteMaxDerivative4Pixels = avsTable->maxDerivative4Pixels;
208 sampler8x8AvsTable->mhwSamplerAvsTableParam.byteTransitionArea8Pixels = avsTable->transitionArea8Pixels;
209 sampler8x8AvsTable->mhwSamplerAvsTableParam.byteTransitionArea4Pixels = avsTable->transitionArea4Pixels;
210 }
211
212 for (int i = 0; i < CM_NUM_HW_POLYPHASE_EXTRA_TABLES_G11; i++)
213 {
214 int src = i + CM_NUM_HW_POLYPHASE_TABLES_G11;
215 sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParamExtra[i].ZeroXFilterCoefficient[0] = (uint8_t)avsTable->tbl0X[src].FilterCoeff_0_0;
216 sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParamExtra[i].ZeroXFilterCoefficient[1] = (uint8_t)avsTable->tbl0X[src].FilterCoeff_0_1;
217
218 sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParamExtra[i].ZeroXFilterCoefficient[2] = (uint8_t)avsTable->tbl0X[src].FilterCoeff_0_2;
219 sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParamExtra[i].ZeroXFilterCoefficient[3] = (uint8_t)avsTable->tbl0X[src].FilterCoeff_0_3;
220
221 sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParamExtra[i].ZeroXFilterCoefficient[4] = (uint8_t)avsTable->tbl0X[src].FilterCoeff_0_4;
222 sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParamExtra[i].ZeroXFilterCoefficient[5] = (uint8_t)avsTable->tbl0X[src].FilterCoeff_0_5;
223
224 sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParamExtra[i].ZeroXFilterCoefficient[6] = (uint8_t)avsTable->tbl0X[src].FilterCoeff_0_6;
225 sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParamExtra[i].ZeroXFilterCoefficient[7] = (uint8_t)avsTable->tbl0X[src].FilterCoeff_0_7;
226
227 sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParamExtra[i].ZeroYFilterCoefficient[0] = (uint8_t)avsTable->tbl0Y[src].FilterCoeff_0_0;
228 sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParamExtra[i].ZeroYFilterCoefficient[1] = (uint8_t)avsTable->tbl0Y[src].FilterCoeff_0_1;
229
230 sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParamExtra[i].ZeroYFilterCoefficient[2] = (uint8_t)avsTable->tbl0Y[src].FilterCoeff_0_2;
231 sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParamExtra[i].ZeroYFilterCoefficient[3] = (uint8_t)avsTable->tbl0Y[src].FilterCoeff_0_3;
232
233 sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParamExtra[i].ZeroYFilterCoefficient[4] = (uint8_t)avsTable->tbl0Y[src].FilterCoeff_0_4;
234 sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParamExtra[i].ZeroYFilterCoefficient[5] = (uint8_t)avsTable->tbl0Y[src].FilterCoeff_0_5;
235
236 sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParamExtra[i].ZeroYFilterCoefficient[6] = (uint8_t)avsTable->tbl0Y[src].FilterCoeff_0_6;
237 sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParamExtra[i].ZeroYFilterCoefficient[7] = (uint8_t)avsTable->tbl0Y[src].FilterCoeff_0_7;
238
239 sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParamExtra[i].OneXFilterCoefficient[0] = (uint8_t)avsTable->tbl1X[src].FilterCoeff_0_2;
240 sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParamExtra[i].OneXFilterCoefficient[1] = (uint8_t)avsTable->tbl1X[src].FilterCoeff_0_3;
241 sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParamExtra[i].OneXFilterCoefficient[2] = (uint8_t)avsTable->tbl1X[src].FilterCoeff_0_4;
242 sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParamExtra[i].OneXFilterCoefficient[3] = (uint8_t)avsTable->tbl1X[src].FilterCoeff_0_5;
243
244 sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParamExtra[i].OneYFilterCoefficient[0] = (uint8_t)avsTable->tbl1Y[src].FilterCoeff_0_2;
245 sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParamExtra[i].OneYFilterCoefficient[1] = (uint8_t)avsTable->tbl1Y[src].FilterCoeff_0_3;
246 sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParamExtra[i].OneYFilterCoefficient[2] = (uint8_t)avsTable->tbl1Y[src].FilterCoeff_0_4;
247 sampler8x8AvsTable->mhwSamplerAvsTableParam.paMhwAvsCoeffParamExtra[i].OneYFilterCoefficient[3] = (uint8_t)avsTable->tbl1Y[src].FilterCoeff_0_5;
248
249 }
250
251 return MOS_STATUS_SUCCESS;
252 }
253
RegisterSampler8x8(PCM_HAL_SAMPLER_8X8_PARAM param)254 MOS_STATUS CM_HAL_G11_X::RegisterSampler8x8(
255 PCM_HAL_SAMPLER_8X8_PARAM param)
256 {
257 PCM_HAL_STATE state = m_cmState;
258 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
259 int16_t samplerIndex = 0;
260 PMHW_SAMPLER_STATE_PARAM samplerEntry = nullptr;
261 PCM_HAL_SAMPLER_8X8_ENTRY sampler8x8Entry = nullptr;
262
263 if (param->sampler8x8State.stateType == CM_SAMPLER8X8_AVS)
264 {
265 for (uint32_t i = 0; i < state->cmDeviceParam.maxSamplerTableSize; i++) {
266 if (!state->samplerTable[i].bInUse) {
267 samplerEntry = &state->samplerTable[i];
268 param->handle = (uint32_t)i << 16;
269 samplerEntry->bInUse = true;
270 break;
271 }
272 }
273
274 for (uint32_t i = 0; i < state->cmDeviceParam.maxSampler8x8TableSize; i++) {
275 if (!state->sampler8x8Table[i].inUse) {
276 sampler8x8Entry = &state->sampler8x8Table[i];
277 samplerIndex = (int16_t)i;
278 param->handle |= (uint32_t)(i & 0xffff);
279 sampler8x8Entry->inUse = true;
280 break;
281 }
282 }
283
284 if (!samplerEntry || !sampler8x8Entry) {
285 eStatus = MOS_STATUS_INVALID_PARAMETER;
286 CM_ASSERTMESSAGE("Sampler or AVS table is full");
287 goto finish;
288 }
289
290 //State data from application
291 samplerEntry->SamplerType = MHW_SAMPLER_TYPE_AVS;
292 samplerEntry->ElementType = MHW_Sampler128Elements;
293 samplerEntry->Avs = param->sampler8x8State.avsParam.avsState;
294 samplerEntry->Avs.stateID = samplerIndex;
295 samplerEntry->Avs.iTable8x8_Index = samplerIndex; // Used for calculating the Media offset of 8x8 table
296 samplerEntry->Avs.pMhwSamplerAvsTableParam = &sampler8x8Entry->sampler8x8State.mhwSamplerAvsTableParam;
297
298 if (samplerEntry->Avs.EightTapAFEnable)
299 param->sampler8x8State.avsParam.avsTable.adaptiveFilterAllChannels = true;
300 else
301 param->sampler8x8State.avsParam.avsTable.adaptiveFilterAllChannels = false;
302
303 CM_CHK_MOSSTATUS_GOTOFINISH(RegisterSampler8x8AVSTable(&sampler8x8Entry->sampler8x8State,
304 ¶m->sampler8x8State.avsParam.avsTable));
305
306 sampler8x8Entry->sampler8x8State.stateType = CM_SAMPLER8X8_AVS;
307 }
308
309 finish:
310 return eStatus;
311 }
312
313 #if (_RELEASE_INTERNAL || _DEBUG)
314 #if defined(CM_DIRECT_GUC_SUPPORT)
SubmitDummyCommands(PMHW_BATCH_BUFFER batchBuffer,int32_t taskId,PCM_HAL_KERNEL_PARAM * kernelParam,void ** cmdBuffer)315 MOS_STATUS CM_HAL_G11_X::SubmitDummyCommands(
316 PMHW_BATCH_BUFFER batchBuffer,
317 int32_t taskId,
318 PCM_HAL_KERNEL_PARAM *kernelParam,
319 void **cmdBuffer)
320 {
321 return MOS_STATUS_UNIMPLEMENTED;
322
323 }
324 #endif
325 #endif
326
SubmitCommands(PMHW_BATCH_BUFFER batchBuffer,int32_t taskId,PCM_HAL_KERNEL_PARAM * kernelParam,void ** cmdBuffer)327 MOS_STATUS CM_HAL_G11_X::SubmitCommands(
328 PMHW_BATCH_BUFFER batchBuffer,
329 int32_t taskId,
330 PCM_HAL_KERNEL_PARAM *kernelParam,
331 void **cmdBuffer)
332 {
333 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
334 PCM_HAL_STATE state = m_cmState;
335 PRENDERHAL_INTERFACE_LEGACY renderHal = state->renderHal;
336 PMOS_INTERFACE osInterface = renderHal->pOsInterface;
337 MhwRenderInterface *mhwRender = renderHal->pMhwRenderInterface;
338 PMHW_MI_INTERFACE mhwMiInterface = renderHal->pMhwMiInterface;
339 PRENDERHAL_STATE_HEAP stateHeap = renderHal->pStateHeap;
340 MHW_PIPE_CONTROL_PARAMS pipeCtlParams = g_cRenderHal_InitPipeControlParams;
341 MHW_MEDIA_STATE_FLUSH_PARAM flushParam = g_cRenderHal_InitMediaStateFlushParams;
342 MHW_ID_LOAD_PARAMS idLoadParams;
343 int32_t remaining = 0;
344 bool enableWalker = state->walkerParams.CmWalkerEnable;
345 bool enableGpGpu = state->taskParam->blGpGpuWalkerEnabled;
346 PCM_HAL_TASK_PARAM taskParam = state->taskParam;
347 PCM_HAL_BB_ARGS bbCmArgs;
348 MOS_COMMAND_BUFFER mosCmdBuffer;
349 uint32_t syncTag;
350 int64_t *taskSyncLocation;
351 int32_t syncOffset;
352 int32_t tmp;
353 bool sipEnable = renderHal->bSIPKernel?true: false;
354 bool csrEnable = renderHal->bCSRKernel?true: false;
355 RENDERHAL_GENERIC_PROLOG_PARAMS genericPrologParams = {};
356 MOS_RESOURCE *osResource;
357 uint32_t tag;
358 uint32_t tagOffset = 0;
359 MHW_RENDER_ENGINE_L3_CACHE_SETTINGS_G11 cacheSettings = {};
360 MOS_CONTEXT *pOsContext = renderHal->pOsInterface->pOsContext;
361 PMHW_MI_MMIOREGISTERS pMmioRegisters = renderHal->pMhwRenderInterface->GetMmioRegisters();
362 CM_HAL_MI_REG_OFFSETS miRegG11 = { REG_TIMESTAMP_BASE_G11, REG_GPR_BASE_G11 };
363
364 MOS_ZeroMemory(&mosCmdBuffer, sizeof(MOS_COMMAND_BUFFER));
365
366 // get the tag
367 tag = renderHal->trackerProducer.GetNextTracker(renderHal->currentTrackerIndex);
368
369 // Get the task sync offset
370 syncOffset = state->pfnGetTaskSyncLocation(state, taskId);
371
372 // Initialize the location
373 taskSyncLocation = (int64_t*)(state->renderTimeStampResource.data + syncOffset);
374 *taskSyncLocation = CM_INVALID_INDEX;
375 *(taskSyncLocation + 1) = CM_INVALID_INDEX;
376 if (state->cbbEnabled)
377 {
378 *(taskSyncLocation + 2) = tag;
379 *(taskSyncLocation + 3) = state->renderHal->currentTrackerIndex;
380 }
381
382 // Register batch buffer for rendering
383 if (!enableWalker && !enableGpGpu)
384 {
385 CM_CHK_MOSSTATUS_GOTOFINISH(osInterface->pfnRegisterResource(
386 osInterface,
387 &batchBuffer->OsResource,
388 true,
389 true));
390 }
391
392 // Register Timestamp Buffer
393 CM_CHK_MOSSTATUS_GOTOFINISH(osInterface->pfnRegisterResource(
394 osInterface,
395 &state->renderTimeStampResource.osResource,
396 true,
397 true));
398
399 // Allocate all available space, unused buffer will be returned later
400 CM_CHK_HRESULT_GOTOFINISH_MOSERROR(osInterface->pfnGetCommandBuffer(osInterface, &mosCmdBuffer, 0));
401 remaining = mosCmdBuffer.iRemaining;
402
403 // Update power option of this command;
404 CM_CHK_MOSSTATUS_GOTOFINISH(state->pfnUpdatePowerOption(state, &state->powerOption));
405
406 // use frame tracking to write the tracker ID to CM tracker resource
407 renderHal->trackerProducer.GetLatestTrackerResource(renderHal->currentTrackerIndex, &osResource, &tagOffset);
408 renderHal->pfnSetupPrologParams(renderHal, &genericPrologParams, osResource, tagOffset, tag);
409 FrameTrackerTokenFlat_SetProducer(&stateHeap->pCurMediaState->trackerToken, &renderHal->trackerProducer);
410 FrameTrackerTokenFlat_Merge(&stateHeap->pCurMediaState->trackerToken, renderHal->currentTrackerIndex, tag);
411
412 // Record registers by unified media profiler in the beginning
413 if (state->perfProfiler != nullptr)
414 {
415 CM_CHK_MOSSTATUS_GOTOFINISH(state->perfProfiler->AddPerfCollectStartCmd((void *)state, state->osInterface, mhwMiInterface, &mosCmdBuffer));
416 }
417
418 //Send the First PipeControl Command to indicate the beginning of execution
419 pipeCtlParams = g_cRenderHal_InitPipeControlParams;
420 pipeCtlParams.presDest = &state->renderTimeStampResource.osResource;
421 pipeCtlParams.dwResourceOffset = syncOffset;
422 pipeCtlParams.dwPostSyncOp = MHW_FLUSH_WRITE_TIMESTAMP_REG;
423 pipeCtlParams.dwFlushMode = MHW_FLUSH_WRITE_CACHE;
424 CM_CHK_MOSSTATUS_GOTOFINISH(mhwMiInterface->AddPipeControl(&mosCmdBuffer, nullptr, &pipeCtlParams));
425
426 // Initialize command buffer and insert prolog
427 CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pfnInitCommandBuffer(renderHal, &mosCmdBuffer, &genericPrologParams));
428
429 HalOcaInterface::On1stLevelBBStart(mosCmdBuffer, *pOsContext, osInterface->CurrentGpuContextHandle,
430 *renderHal->pMhwMiInterface, *pMmioRegisters);
431
432 // update tracker tag used with CM tracker resource
433 renderHal->trackerProducer.StepForward(renderHal->currentTrackerIndex);
434
435 // Increment sync tag
436 syncTag = stateHeap->dwNextTag++;
437
438 //enable ICL L3 config
439
440 if (state->l3Settings.overrideSettings != 0)
441 {
442 cacheSettings.dwCntlReg = state->l3Settings.cntlReg;
443 cacheSettings.dwTcCntlReg = state->l3Settings.tcCntlReg;
444 mhwRender->EnableL3Caching(&cacheSettings);
445 }
446 else
447 {
448 mhwRender->EnableL3Caching(nullptr);
449 HalCm_GetLegacyRenderHalL3Setting(&state->l3Settings, &renderHal->L3CacheSettings);
450 renderHal->pfnEnableL3Caching(renderHal, &renderHal->L3CacheSettings);
451 }
452 mhwRender->SetL3Cache(&mosCmdBuffer);
453
454 if ( sipEnable )
455 {
456 CM_CHK_MOSSTATUS_GOTOFINISH( SetupHwDebugControl( renderHal, &mosCmdBuffer ) );
457 }
458
459 // add granularity control for preemption for ICL
460 // Supporting Preemption granularity control reg for 3D and GPGPU mode for per ctx and with non-privileged access
461 if (MEDIA_IS_SKU(state->skuTable, FtrPerCtxtPreemptionGranularityControl))
462 {
463 MHW_MI_LOAD_REGISTER_IMM_PARAMS loadRegImm;
464 MOS_ZeroMemory(&loadRegImm, sizeof(MHW_MI_LOAD_REGISTER_IMM_PARAMS));
465
466 loadRegImm.dwRegister = MHW_RENDER_ENGINE_PREEMPTION_CONTROL_OFFSET;
467
468 // Same reg offset and value for gpgpu pipe and media pipe
469 if (enableGpGpu)
470 {
471 if (MEDIA_IS_SKU(state->skuTable, FtrGpGpuMidThreadLevelPreempt))
472 {
473 if (csrEnable)
474 loadRegImm.dwData = MHW_RENDER_ENGINE_MID_THREAD_PREEMPT_VALUE;
475 else
476 loadRegImm.dwData = MHW_RENDER_ENGINE_THREAD_GROUP_PREEMPT_VALUE;
477 }
478 else if (MEDIA_IS_SKU(state->skuTable, FtrGpGpuThreadGroupLevelPreempt))
479 {
480 loadRegImm.dwData = MHW_RENDER_ENGINE_THREAD_GROUP_PREEMPT_VALUE;
481 state->renderHal->pfnEnableGpgpuMiddleBatchBufferPreemption(state->renderHal);
482 }
483 else if (MEDIA_IS_SKU(state->skuTable, FtrGpGpuMidBatchPreempt))
484 {
485 loadRegImm.dwData = MHW_RENDER_ENGINE_MID_BATCH_PREEMPT_VALUE;
486 state->renderHal->pfnEnableGpgpuMiddleBatchBufferPreemption(state->renderHal);
487 }
488 else
489 {
490 // if hit this branch then platform does not support any media preemption in render engine. Still program the register to avoid GPU hang
491 loadRegImm.dwData = MHW_RENDER_ENGINE_MID_BATCH_PREEMPT_VALUE;
492 }
493 }
494 else
495 {
496 if (MEDIA_IS_SKU(state->skuTable, FtrMediaMidThreadLevelPreempt))
497 {
498 loadRegImm.dwData = MHW_RENDER_ENGINE_MID_THREAD_PREEMPT_VALUE;
499 }
500 else if (MEDIA_IS_SKU(state->skuTable, FtrMediaThreadGroupLevelPreempt))
501 {
502 loadRegImm.dwData = MHW_RENDER_ENGINE_THREAD_GROUP_PREEMPT_VALUE;
503 }
504 else if (MEDIA_IS_SKU(state->skuTable, FtrMediaMidBatchPreempt))
505 {
506 loadRegImm.dwData = MHW_RENDER_ENGINE_MID_BATCH_PREEMPT_VALUE;
507 }
508 else
509 {
510 // if hit this branch then platform does not support any media preemption in render engine. Still program the register to avoid GPU hang
511 loadRegImm.dwData = MHW_RENDER_ENGINE_MID_BATCH_PREEMPT_VALUE;
512 }
513 }
514 CM_CHK_MOSSTATUS_GOTOFINISH(mhwMiInterface->AddMiLoadRegisterImmCmd(&mosCmdBuffer, &loadRegImm));
515 }
516
517 // Send Pipeline Select command
518 CM_CHK_MOSSTATUS_GOTOFINISH( mhwRender->AddPipelineSelectCmd(&mosCmdBuffer, enableGpGpu ) );
519
520 // Send State Base Address command
521 CM_CHK_MOSSTATUS_GOTOFINISH( renderHal->pfnSendStateBaseAddress( renderHal, &mosCmdBuffer ) );
522
523 // Send Surface States
524 CM_CHK_MOSSTATUS_GOTOFINISH( renderHal->pfnSendSurfaces( renderHal, &mosCmdBuffer ) );
525
526 if (enableGpGpu)
527 {
528 if (csrEnable)
529 {
530 // Send CS_STALL pipe control
531 //Insert a pipe control as synchronization
532 pipeCtlParams = g_cRenderHal_InitPipeControlParams;
533 pipeCtlParams.presDest = &state->renderTimeStampResource.osResource;
534 pipeCtlParams.dwPostSyncOp = MHW_FLUSH_NOWRITE;
535 pipeCtlParams.dwFlushMode = MHW_FLUSH_WRITE_CACHE;
536 pipeCtlParams.bDisableCSStall = 0;
537 CM_CHK_MOSSTATUS_GOTOFINISH(mhwMiInterface->AddPipeControl(&mosCmdBuffer, nullptr, &pipeCtlParams));
538 }
539
540 if (sipEnable || csrEnable)
541 {
542 // Send SIP State
543 CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pfnSendSipStateCmd(renderHal, &mosCmdBuffer));
544
545 CM_CHK_HRESULT_GOTOFINISH_MOSERROR(osInterface->pfnRegisterResource(
546 osInterface,
547 &state->csrResource,
548 true,
549 true));
550
551 // Send csr base addr command
552 CM_CHK_MOSSTATUS_GOTOFINISH(mhwRender->AddGpgpuCsrBaseAddrCmd(&mosCmdBuffer, &state->csrResource));
553 }
554 }
555
556 // Setup VFE State params. Each Renderer MUST call pfnSetVfeStateParams().
557 // See comment in VpHal_HwSetVfeStateParams() for details.
558 tmp = RENDERHAL_USE_MEDIA_THREADS_MAX;
559 if ( state->maxHWThreadValues.userFeatureValue != 0 )
560 {
561 if ( state->maxHWThreadValues.userFeatureValue < renderHal->pHwCaps->dwMaxThreads )
562 {
563 tmp = state->maxHWThreadValues.userFeatureValue;
564 }
565 }
566 else if ( state->maxHWThreadValues.apiValue != 0 )
567 {
568 if ( state->maxHWThreadValues.apiValue < renderHal->pHwCaps->dwMaxThreads )
569 {
570 tmp = state->maxHWThreadValues.apiValue;
571 }
572 }
573
574 renderHal->pfnSetVfeStateParams(
575 renderHal,
576 MEDIASTATE_DEBUG_COUNTER_FREE_RUNNING,
577 tmp,
578 state->taskParam->vfeCurbeSize,
579 state->taskParam->urbEntrySize,
580 nullptr);
581
582 // Send VFE State
583 CM_CHK_MOSSTATUS_GOTOFINISH( mhwRender->AddMediaVfeCmd(&mosCmdBuffer,
584 renderHal->pRenderHalPltInterface->GetVfeStateParameters() ) );
585
586 // Send CURBE Load
587 if ( state->taskParam->vfeCurbeSize > 0 )
588 {
589 CM_CHK_MOSSTATUS_GOTOFINISH( renderHal->pfnSendCurbeLoad( renderHal, &mosCmdBuffer ) );
590 }
591
592 // Send Interface Descriptor Load
593 if ( state->dshEnabled )
594 {
595 PRENDERHAL_DYNAMIC_STATE dynamicState = ((PRENDERHAL_MEDIA_STATE_LEGACY)stateHeap->pCurMediaState)->pDynamicState;
596 idLoadParams.dwInterfaceDescriptorStartOffset = dynamicState->memoryBlock.GetOffset() +
597 dynamicState->MediaID.dwOffset;
598 idLoadParams.dwInterfaceDescriptorLength = dynamicState->MediaID.iCount * stateHeap->dwSizeMediaID;
599 }
600 else
601 {
602 idLoadParams.dwInterfaceDescriptorStartOffset = stateHeap->pCurMediaState->dwOffset + stateHeap->dwOffsetMediaID;
603 idLoadParams.dwInterfaceDescriptorLength = renderHal->StateHeapSettings.iMediaIDs * stateHeap->dwSizeMediaID;
604 }
605 idLoadParams.pKernelState = nullptr;
606 CM_CHK_MOSSTATUS_GOTOFINISH( mhwRender->AddMediaIDLoadCmd(&mosCmdBuffer, &idLoadParams ) );
607
608 HalOcaInterface::OnDispatch(mosCmdBuffer, *osInterface, *renderHal->pMhwMiInterface, *pMmioRegisters);
609
610 if ( enableWalker )
611 {
612 // send media walker command, if required
613
614 for ( uint32_t i = 0; i < state->taskParam->numKernels; i++ )
615 {
616 // Insert CONDITIONAL_BATCH_BUFFER_END
617 if ( taskParam->conditionalEndBitmap & ( ( uint64_t )1 << ( i ) ) )
618 {
619 // this could be batch buffer end so need to update sync tag, media state flush, write end timestamp
620
621 CM_CHK_MOSSTATUS_GOTOFINISH( renderHal->pfnSendSyncTag( renderHal, &mosCmdBuffer ) );
622
623 // conditionally write timestamp
624 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_OsAddArtifactConditionalPipeControl(&miRegG11, state, &mosCmdBuffer, syncOffset, &taskParam->conditionalBBEndParams[i], tag));
625
626 // Insert conditional batch buffer end
627 mhwMiInterface->AddMiConditionalBatchBufferEndCmd(&mosCmdBuffer, &taskParam->conditionalBBEndParams[ i ] );
628 }
629
630 //Insert PIPE_CONTROL at two cases:
631 // 1. synchronization is set
632 // 2. the next kernel has dependency pattern
633 if ( ( i > 0 ) && ( ( taskParam->syncBitmap & ( ( uint64_t )1 << ( i - 1 ) ) ) ||
634 ( kernelParam[ i ]->kernelThreadSpaceParam.patternType != CM_NONE_DEPENDENCY ) ) )
635 {
636 //Insert a pipe control as synchronization
637 pipeCtlParams = g_cRenderHal_InitPipeControlParams;
638 pipeCtlParams.presDest = &state->renderTimeStampResource.osResource;
639 pipeCtlParams.dwPostSyncOp = MHW_FLUSH_NOWRITE;
640 pipeCtlParams.dwFlushMode = MHW_FLUSH_CUSTOM;
641 pipeCtlParams.bInvalidateTextureCache = true;
642 pipeCtlParams.bFlushRenderTargetCache = true;
643 CM_CHK_MOSSTATUS_GOTOFINISH( mhwMiInterface->AddPipeControl( &mosCmdBuffer, nullptr, &pipeCtlParams ) );
644 }
645
646 // send media walker command, if required
647 CM_CHK_MOSSTATUS_GOTOFINISH( state->pfnSendMediaWalkerState( state, kernelParam[ i ], &mosCmdBuffer ) );
648 }
649
650 }
651 else if ( enableGpGpu )
652 {
653 // send GPGPU walker command, if required
654
655 for ( uint32_t i = 0; i < state->taskParam->numKernels; i++ )
656 {
657 MHW_GPGPU_WALKER_PARAMS gpGpuWalkerParams;
658
659 gpGpuWalkerParams.InterfaceDescriptorOffset = kernelParam[ i ]->gpgpuWalkerParams.interfaceDescriptorOffset;
660 gpGpuWalkerParams.GpGpuEnable = kernelParam[ i ]->gpgpuWalkerParams.gpgpuEnabled;
661 gpGpuWalkerParams.GroupWidth = kernelParam[ i ]->gpgpuWalkerParams.groupWidth;
662 gpGpuWalkerParams.GroupHeight = kernelParam[ i ]->gpgpuWalkerParams.groupHeight;
663 gpGpuWalkerParams.GroupDepth = kernelParam[i]->gpgpuWalkerParams.groupDepth;
664 gpGpuWalkerParams.ThreadWidth = kernelParam[ i ]->gpgpuWalkerParams.threadWidth;
665 gpGpuWalkerParams.ThreadHeight = kernelParam[ i ]->gpgpuWalkerParams.threadHeight;
666 gpGpuWalkerParams.ThreadDepth = kernelParam[i]->gpgpuWalkerParams.threadDepth;
667 gpGpuWalkerParams.SLMSize = kernelParam[ i ]->slmSize;
668 //Insert PIPE_CONTROL at two cases:
669 // 1. synchronization is set
670 // 2. the next kernel has dependency pattern
671 if ( ( i > 0 ) && ( ( taskParam->syncBitmap & ( ( uint64_t )1 << ( i - 1 ) ) ) ||
672 ( kernelParam[ i ]->kernelThreadSpaceParam.patternType != CM_NONE_DEPENDENCY ) ) )
673 {
674 //Insert a pipe control as synchronization
675 pipeCtlParams = g_cRenderHal_InitPipeControlParams;
676 pipeCtlParams.presDest = &state->renderTimeStampResource.osResource;
677 pipeCtlParams.dwPostSyncOp = MHW_FLUSH_NOWRITE;
678 pipeCtlParams.dwFlushMode = MHW_FLUSH_CUSTOM;
679 pipeCtlParams.bInvalidateTextureCache = true;
680 pipeCtlParams.bFlushRenderTargetCache = true;
681 CM_CHK_MOSSTATUS_GOTOFINISH( mhwMiInterface->AddPipeControl( &mosCmdBuffer, nullptr, &pipeCtlParams ) );
682 }
683
684 // send media walker command, if required
685 MOS_SecureMemcpy( &state->walkerParams, sizeof( MHW_WALKER_PARAMS ), &kernelParam[ i ]->walkerParams, sizeof( CM_HAL_WALKER_PARAMS ) );
686 CM_CHK_MOSSTATUS_GOTOFINISH( mhwRender->AddGpGpuWalkerStateCmd(&mosCmdBuffer, &gpGpuWalkerParams ) );
687 }
688
689 }
690 else
691 {
692 // Send Start batch buffer command
693 CM_CHK_MOSSTATUS_GOTOFINISH(mhwMiInterface->AddMiBatchBufferStartCmd(
694 &mosCmdBuffer,
695 batchBuffer ) );
696
697 CM_CHK_NULL_GOTOFINISH_MOSERROR( batchBuffer->pPrivateData );
698 bbCmArgs = ( PCM_HAL_BB_ARGS )batchBuffer->pPrivateData;
699
700 if ( ( bbCmArgs->refCount == 1 ) ||
701 ( state->taskParam->reuseBBUpdateMask == 1 ) )
702 {
703 CM_CHK_MOSSTATUS_GOTOFINISH(mhwMiInterface->AddMiBatchBufferEnd( nullptr, batchBuffer ) );
704 }
705 else
706 {
707 // Skip BB end command
708 CM_CHK_MOSSTATUS_GOTOFINISH(mhwMiInterface->SkipMiBatchBufferEndBb(batchBuffer ) );
709 }
710
711 // UnLock the batch buffer
712 if ( ( bbCmArgs->refCount == 1 ) ||
713 ( state->taskParam->reuseBBUpdateMask == 1 ) )
714 {
715 CM_CHK_MOSSTATUS_GOTOFINISH( renderHal->pfnUnlockBB( renderHal, batchBuffer ) );
716 }
717 }
718
719 // issue a PIPE_CONTROL to flush all caches and the stall the CS before
720 // issuing a PIPE_CONTROL to write the timestamp
721 pipeCtlParams = g_cRenderHal_InitPipeControlParams;
722 pipeCtlParams.presDest = &state->renderTimeStampResource.osResource;
723 pipeCtlParams.dwPostSyncOp = MHW_FLUSH_NOWRITE;
724 pipeCtlParams.dwFlushMode = MHW_FLUSH_WRITE_CACHE;
725 CM_CHK_MOSSTATUS_GOTOFINISH( mhwMiInterface->AddPipeControl( &mosCmdBuffer, nullptr, &pipeCtlParams ) );
726
727 if (state->svmBufferUsed || state->statelessBufferUsed)
728 {
729 // Find the SVM slot, patch it into this dummy pipe_control
730 for (uint32_t i = 0; i < state->cmDeviceParam.maxBufferTableSize; i++ )
731 {
732 //register resource here
733 if ( state->bufferTable[ i ].address )
734 {
735 CM_CHK_HRESULT_GOTOFINISH_MOSERROR( osInterface->pfnRegisterResource(
736 osInterface,
737 &state->bufferTable[ i ].osResource,
738 true,
739 false ) );
740 }
741
742 // sync resource
743 MOS_SURFACE mosSurface;
744 MOS_ZeroMemory(&mosSurface, sizeof(mosSurface));
745 CM_CHK_HRESULT_GOTOFINISH_MOSERROR(osInterface->pfnGetResourceInfo(
746 osInterface,
747 &state->bufferTable[i].osResource,
748 &mosSurface));
749 mosSurface.OsResource = state->bufferTable[i].osResource;
750
751 CM_CHK_HRESULT_GOTOFINISH_MOSERROR(state->pfnSurfaceSync(state, &mosSurface, false));
752 }
753 }
754
755 // Send Sync Tag
756 if (!state->dshEnabled || !(enableWalker || enableGpGpu))
757 {
758 CM_CHK_MOSSTATUS_GOTOFINISH( renderHal->pfnSendSyncTag( renderHal, &mosCmdBuffer ) );
759 }
760
761 // Update tracker resource
762 CM_CHK_MOSSTATUS_GOTOFINISH(state->pfnUpdateTrackerResource(state, &mosCmdBuffer, tag));
763
764 // issue a PIPE_CONTROL to write timestamp
765 syncOffset += sizeof( uint64_t );
766 pipeCtlParams = g_cRenderHal_InitPipeControlParams;
767 pipeCtlParams.presDest = &state->renderTimeStampResource.osResource;
768 pipeCtlParams.dwResourceOffset = syncOffset;
769 pipeCtlParams.dwPostSyncOp = MHW_FLUSH_WRITE_TIMESTAMP_REG;
770 pipeCtlParams.dwFlushMode = MHW_FLUSH_READ_CACHE;
771 CM_CHK_MOSSTATUS_GOTOFINISH( mhwMiInterface->AddPipeControl( &mosCmdBuffer, nullptr, &pipeCtlParams ) );
772
773 // Record registers by unified media profiler in the end
774 if (state->perfProfiler != nullptr)
775 {
776 CM_CHK_MOSSTATUS_GOTOFINISH(state->perfProfiler->AddPerfCollectEndCmd((void *)state, state->osInterface, mhwMiInterface, &mosCmdBuffer));
777 }
778
779 // Add PipeControl to invalidate ISP and MediaState to avoid PageFault issue
780 MHW_PIPE_CONTROL_PARAMS pipeControlParams;
781
782 MOS_ZeroMemory(&pipeControlParams, sizeof(pipeControlParams));
783 pipeControlParams.dwFlushMode = MHW_FLUSH_WRITE_CACHE;
784 pipeControlParams.bGenericMediaStateClear = true;
785 pipeControlParams.bIndirectStatePointersDisable = true;
786 pipeControlParams.bDisableCSStall = false;
787 CM_CHK_MOSSTATUS_GOTOFINISH(mhwMiInterface->AddPipeControl(&mosCmdBuffer, nullptr, &pipeControlParams));
788
789 HalOcaInterface::On1stLevelBBEnd(mosCmdBuffer, *osInterface);
790 //Couple to the BB_START , otherwise GPU Hang without it
791 CM_CHK_MOSSTATUS_GOTOFINISH(mhwMiInterface->AddMiBatchBufferEnd(&mosCmdBuffer, nullptr ) );
792
793 // Return unused command buffer space to OS
794 osInterface->pfnReturnCommandBuffer( osInterface, &mosCmdBuffer, 0);
795
796 #if MDF_COMMAND_BUFFER_DUMP
797 if ( state->dumpCommandBuffer )
798 {
799 state->pfnDumpCommadBuffer(
800 state,
801 &mosCmdBuffer,
802 offsetof(PACKET_SURFACE_STATE, cmdSurfaceState),
803 mhw_state_heap_g11_X::RENDER_SURFACE_STATE_CMD::byteSize);
804 }
805 #endif
806
807
808 #if MDF_SURFACE_STATE_DUMP
809 if (state->dumpSurfaceState)
810 {
811 state->pfnDumpSurfaceState(
812 state,
813 offsetof(PACKET_SURFACE_STATE, cmdSurfaceState),
814 mhw_state_heap_g11_X::RENDER_SURFACE_STATE_CMD::byteSize);
815 }
816 #endif
817
818 CM_CHK_MOSSTATUS_GOTOFINISH( state->pfnGetGlobalTime( &state->taskTimeStamp->submitTimeInCpu[ taskId ] ) );
819 CM_CHK_MOSSTATUS_GOTOFINISH( state->pfnGetGpuTime( state, &state->taskTimeStamp->submitTimeInGpu[ taskId ] ) );
820
821 // Submit command buffer
822 CM_CHK_HRESULT_GOTOFINISH_MOSERROR( osInterface->pfnSubmitCommandBuffer( osInterface,
823 &mosCmdBuffer,
824 state->nullHwRenderCm ) );
825
826 if ( state->nullHwRenderCm == false )
827 {
828 stateHeap->pCurMediaState->bBusy = true;
829 if ( !enableWalker && !enableGpGpu )
830 {
831 batchBuffer->bBusy = true;
832 batchBuffer->dwSyncTag = syncTag;
833 }
834 }
835
836 // reset API call number of HW threads
837 state->maxHWThreadValues.apiValue = 0;
838
839 state->pfnReferenceCommandBuffer( &mosCmdBuffer.OsResource, cmdBuffer );
840
841 eStatus = MOS_STATUS_SUCCESS;
842
843 finish:
844 // Failed -> discard all changes in Command Buffer
845 if ( eStatus != MOS_STATUS_SUCCESS )
846 {
847 // Buffer overflow - display overflow size
848 if ( mosCmdBuffer.iRemaining < 0 )
849 {
850 CM_ASSERTMESSAGE( "Command Buffer overflow by %d bytes.", -mosCmdBuffer.iRemaining );
851 }
852
853 // Move command buffer back to beginning
854 tmp = remaining - mosCmdBuffer.iRemaining;
855 mosCmdBuffer.iRemaining = remaining;
856 mosCmdBuffer.iOffset -= tmp;
857 mosCmdBuffer.pCmdPtr = mosCmdBuffer.pCmdBase + mosCmdBuffer.iOffset / sizeof( uint32_t );
858
859 // Return unused command buffer space to OS
860 osInterface->pfnReturnCommandBuffer( osInterface, &mosCmdBuffer, 0);
861 }
862
863 return eStatus;
864 }
865
GetMediaWalkerMaxThreadWidth()866 uint32_t CM_HAL_G11_X::GetMediaWalkerMaxThreadWidth()
867 {
868 return CM_MAX_THREADSPACE_WIDTH_SKLUP_FOR_MW;
869 }
870
GetMediaWalkerMaxThreadHeight()871 uint32_t CM_HAL_G11_X::GetMediaWalkerMaxThreadHeight()
872 {
873 return CM_MAX_THREADSPACE_HEIGHT_SKLUP_FOR_MW;
874 }
875
GetHwSurfaceBTIInfo(PCM_SURFACE_BTI_INFO btiInfo)876 MOS_STATUS CM_HAL_G11_X::GetHwSurfaceBTIInfo(
877 PCM_SURFACE_BTI_INFO btiInfo)
878 {
879
880 if (btiInfo == nullptr)
881 {
882 return MOS_STATUS_NULL_POINTER;
883 }
884
885 btiInfo->normalSurfaceStart = CM_GLOBAL_SURFACE_INDEX_START_GEN9_PLUS + \
886 CM_GLOBAL_SURFACE_NUMBER + CM_GTPIN_SURFACE_NUMBER ;
887 btiInfo->normalSurfaceEnd = GT_RESERVED_INDEX_START_GEN9_PLUS - 1;
888 btiInfo->reservedSurfaceStart = CM_GLOBAL_SURFACE_INDEX_START_GEN9_PLUS;
889 btiInfo->reservedSurfaceEnd = CM_GLOBAL_SURFACE_NUMBER + CM_GTPIN_SURFACE_NUMBER;
890
891 return MOS_STATUS_SUCCESS;
892 }
893
UpdatePlatformInfoFromPower(PCM_PLATFORM_INFO platformInfo,bool euSaturated)894 MOS_STATUS CM_HAL_G11_X::UpdatePlatformInfoFromPower(
895 PCM_PLATFORM_INFO platformInfo,
896 bool euSaturated)
897 {
898 PCM_HAL_STATE state = m_cmState;
899 PRENDERHAL_INTERFACE renderHal = state->renderHal;
900 CM_POWER_OPTION cmPower;
901
902 if (state->requestSingleSlice ||
903 renderHal->bRequestSingleSlice ||
904 (state->powerOption.nSlice != 0 && state->powerOption.nSlice < platformInfo->numSlices))
905 {
906 platformInfo->numSubSlices = platformInfo->numSubSlices / platformInfo->numSlices;
907 if (state->powerOption.nSlice > 1)
908 {
909 platformInfo->numSubSlices *= state->powerOption.nSlice;
910 platformInfo->numSlices = state->powerOption.nSlice;
911 }
912 else
913 {
914 platformInfo->numSlices = 1;
915 }
916 }
917 else if (euSaturated)
918 {
919 // No SSD and EU Saturation, request maximum number of slices/subslices/EUs
920 cmPower.nSlice = (uint16_t)platformInfo->numSlices;
921 cmPower.nSubSlice = (uint16_t)platformInfo->numSubSlices;
922 cmPower.nEU = (uint16_t)(platformInfo->numEUsPerSubSlice * platformInfo->numSubSlices);
923
924 state->pfnSetPowerOption(state, &cmPower);
925 }
926
927 return MOS_STATUS_SUCCESS;
928 }
929
AllocateSIPCSRResource()930 MOS_STATUS CM_HAL_G11_X::AllocateSIPCSRResource()
931 {
932 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
933 if (Mos_ResourceIsNull(&m_cmState->sipResource.osResource))
934 {
935 // create sip resource if it does not exist
936 CM_CHK_MOSSTATUS_RETURN(HalCm_AllocateSipResource(m_cmState));
937 CM_CHK_MOSSTATUS_RETURN(HalCm_AllocateCSRResource(m_cmState));
938 }
939 return eStatus;
940 }
941
SetupHwDebugControl(PRENDERHAL_INTERFACE renderHal,PMOS_COMMAND_BUFFER cmdBuffer)942 MOS_STATUS CM_HAL_G11_X::SetupHwDebugControl(
943 PRENDERHAL_INTERFACE renderHal,
944 PMOS_COMMAND_BUFFER cmdBuffer)
945 {
946 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
947 MHW_MI_LOAD_REGISTER_IMM_PARAMS loadRegImm;
948
949 //---------------------------------------
950 CM_CHK_NULL_RETURN_MOSERROR( renderHal );
951 CM_CHK_NULL_RETURN_MOSERROR( renderHal->pMhwMiInterface );
952 CM_CHK_NULL_RETURN_MOSERROR( cmdBuffer );
953 //---------------------------------------
954
955 MOS_ZeroMemory( &loadRegImm, sizeof( MHW_MI_LOAD_REGISTER_IMM_PARAMS ) );
956
957 // CS_DEBUG_MODE2, global debug enable
958 loadRegImm.dwRegister = CS_DEBUG_MODE2;
959 loadRegImm.dwData = ( CS_DEBUG_MODE2_GLOBAL_DEBUG << 16 ) | CS_DEBUG_MODE2_GLOBAL_DEBUG;
960 CM_CHK_MOSSTATUS_RETURN( renderHal->pMhwMiInterface->AddMiLoadRegisterImmCmd( cmdBuffer, &loadRegImm ) );
961
962 // TD_CTL, force thread breakpoint enable
963 // Also enable external exception, because the source-level debugger has to
964 // be able to interrupt runing EU threads.
965 loadRegImm.dwRegister = TD_CTL;
966 loadRegImm.dwData = TD_CTL_FORCE_THREAD_BKPT_ENABLE | TD_CTL_FORCE_EXT_EXCEPTION_ENABLE;
967 CM_CHK_MOSSTATUS_RETURN( renderHal->pMhwMiInterface->AddMiLoadRegisterImmCmd( cmdBuffer, &loadRegImm ) );
968
969 return eStatus;
970 }
971
SetSuggestedL3Conf(L3_SUGGEST_CONFIG l3Config)972 MOS_STATUS CM_HAL_G11_X::SetSuggestedL3Conf(
973 L3_SUGGEST_CONFIG l3Config)
974 {
975 if (l3Config >= sizeof(ICL_L3_PLANE)/sizeof(L3ConfigRegisterValues))
976 {
977 return MOS_STATUS_INVALID_PARAMETER;
978 }
979 SetL3CacheConfig( &ICL_L3_PLANE[ l3Config ], &m_cmState->l3Settings );
980
981 return MOS_STATUS_SUCCESS;
982 }
983
GetGenStepInfo(char * & stepInfoStr)984 MOS_STATUS CM_HAL_G11_X::GetGenStepInfo(char*& stepInfoStr)
985 {
986 const char *cmSteppingInfoICL[] = { "A0", "B0", "C0", "D0", "E0" };
987
988 uint32_t genStepId = m_cmState->platform.usRevId;
989
990 uint32_t tablesize = sizeof(cmSteppingInfoICL) / sizeof(char *);
991
992 if (genStepId < tablesize)
993 {
994 stepInfoStr = (char *)cmSteppingInfoICL[genStepId];
995 }
996 else
997 {
998 stepInfoStr = nullptr;
999 }
1000 return MOS_STATUS_SUCCESS;
1001 }
1002
ColorCountSanityCheck(uint32_t colorCount)1003 int32_t CM_HAL_G11_X::ColorCountSanityCheck(uint32_t colorCount)
1004 {
1005 if (colorCount == CM_INVALID_COLOR_COUNT || colorCount > CM_THREADSPACE_MAX_COLOR_COUNT_GEN11)
1006 {
1007 CM_ASSERTMESSAGE("Error: Invalid color count.");
1008 return CM_INVALID_ARG_VALUE;
1009 }
1010 return CM_SUCCESS;
1011 }
1012
MemoryObjectCtrlPolicyCheck(uint32_t memCtrl)1013 bool CM_HAL_G11_X::MemoryObjectCtrlPolicyCheck(uint32_t memCtrl)
1014 {
1015 if (memCtrl >= MEMORY_OBJECT_CONTROL_TOTAL)
1016 {
1017 return false;
1018 }
1019 return true;
1020 }
1021
GetConvSamplerIndex(PMHW_SAMPLER_STATE_PARAM samplerParam,char * samplerIndexTable,int32_t nSamp8X8Num,int32_t nSampConvNum)1022 int32_t CM_HAL_G11_X::GetConvSamplerIndex(
1023 PMHW_SAMPLER_STATE_PARAM samplerParam,
1024 char *samplerIndexTable,
1025 int32_t nSamp8X8Num,
1026 int32_t nSampConvNum)
1027 {
1028 int32_t samplerIndex = 0;
1029
1030 if ((samplerParam->Convolve.ui8ConvolveType == CM_CONVOLVE_SKL_TYPE_2D) &&
1031 (samplerParam->Convolve.skl_mode))
1032 {
1033 // 2D convolve & SKL+
1034 samplerIndex = 1 + nSampConvNum + nSamp8X8Num;
1035 }
1036 else if (samplerParam->Convolve.ui8ConvolveType == CM_CONVOLVE_SKL_TYPE_1D)
1037 {
1038 // 1D convolve & SKL+
1039 samplerIndex = nSampConvNum;
1040 }
1041 else
1042 {
1043 // 1P convolve SKL+
1044 samplerIndex = 1 + (nSamp8X8Num + nSampConvNum) * 2;
1045 while (samplerIndexTable[samplerIndex] != CM_INVALID_INDEX)
1046 {
1047 samplerIndex += 2;
1048 }
1049
1050 }
1051 return samplerIndex;
1052 }
1053
SetL3CacheConfig(const L3ConfigRegisterValues * values,PCmHalL3Settings cmHalL3Setting)1054 MOS_STATUS CM_HAL_G11_X::SetL3CacheConfig(
1055 const L3ConfigRegisterValues *values,
1056 PCmHalL3Settings cmHalL3Setting )
1057 {
1058 // currently we have the following mapping for gen11:
1059 // config_register0->L3CntlReg
1060 // config_register1->L3TcCntlReg
1061
1062 cmHalL3Setting->overrideSettings = ( values->config_register0 || values->config_register1 );
1063 cmHalL3Setting->cntlRegOverride = ( values->config_register0 != 0 );
1064 cmHalL3Setting->tcCntlRegOverride = ( values->config_register1 != 0 );
1065 cmHalL3Setting->cntlReg = values->config_register0;
1066 cmHalL3Setting->tcCntlReg = values->config_register1;
1067
1068 return MOS_STATUS_SUCCESS;
1069 }
1070
GetSamplerParamInfoForSamplerType(PMHW_SAMPLER_STATE_PARAM mhwSamplerParam,SamplerParam & samplerParam)1071 MOS_STATUS CM_HAL_G11_X::GetSamplerParamInfoForSamplerType(
1072 PMHW_SAMPLER_STATE_PARAM mhwSamplerParam,
1073 SamplerParam &samplerParam)
1074 {
1075 const unsigned int samplerElementSize[MAX_ELEMENT_TYPE_COUNT] = {16, 32, 64, 128, 1024, 2048};
1076
1077 // gets element_type
1078 switch (mhwSamplerParam->SamplerType)
1079 {
1080 case MHW_SAMPLER_TYPE_3D:
1081 samplerParam.elementType = MHW_Sampler1Element;
1082 break;
1083 case MHW_SAMPLER_TYPE_AVS:
1084 samplerParam.elementType = MHW_Sampler128Elements;
1085 break;
1086 default:
1087 return MOS_STATUS_UNIMPLEMENTED;
1088 break;
1089 }
1090
1091 samplerParam.btiStepping = 1;
1092 samplerParam.btiMultiplier = samplerElementSize[samplerParam.elementType] / samplerParam.btiStepping;
1093 samplerParam.size = samplerElementSize[samplerParam.elementType];
1094
1095 return MOS_STATUS_SUCCESS;
1096 }
1097
GetExpectedGtSystemConfig(PCM_EXPECTED_GT_SYSTEM_INFO expectedConfig)1098 MOS_STATUS CM_HAL_G11_X::GetExpectedGtSystemConfig(
1099 PCM_EXPECTED_GT_SYSTEM_INFO expectedConfig)
1100 {
1101 expectedConfig->numSlices = 0;
1102 expectedConfig->numSubSlices = 0;
1103
1104 return MOS_STATUS_UNIMPLEMENTED;
1105 }
1106
ConverTicksToNanoSecondsDefault(uint64_t ticks)1107 uint64_t CM_HAL_G11_X::ConverTicksToNanoSecondsDefault(uint64_t ticks)
1108 {
1109 return (uint64_t)(ticks * CM_NS_PER_TICK_RENDER_G11_DEFAULT);
1110 }
1111
1112