1 /*
2 * Copyright (c) 2017-2020, Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22 //!
23 //! \file codechal_encode_wp.cpp
24 //! \brief Defines base class for weighted prediction kernel
25 //!
26
27 #include "codechal_encoder_base.h"
28 #include "codechal_encode_wp.h"
29 #include "hal_oca_interface.h"
30
AllocateResources()31 MOS_STATUS CodechalEncodeWP::AllocateResources()
32 {
33 CODECHAL_ENCODE_FUNCTION_ENTER;
34
35 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
36
37 if (Mos_ResourceIsNull(&m_surfaceParams.weightedPredOutputPicList[m_surfaceParams.wpOutListIdx].OsResource))
38 {
39 MOS_ZeroMemory(&m_surfaceParams.weightedPredOutputPicList[m_surfaceParams.wpOutListIdx], sizeof(MOS_SURFACE));
40
41 MOS_ALLOC_GFXRES_PARAMS allocParamsForBufferNV12;
42 MOS_ZeroMemory(&allocParamsForBufferNV12, sizeof(MOS_ALLOC_GFXRES_PARAMS));
43 allocParamsForBufferNV12.Type = MOS_GFXRES_2D;
44 allocParamsForBufferNV12.TileType = MOS_TILE_Y;
45 allocParamsForBufferNV12.Format = Format_NV12;
46 allocParamsForBufferNV12.dwWidth = m_frameWidth;
47 allocParamsForBufferNV12.dwHeight = m_frameHeight;
48 allocParamsForBufferNV12.pBufName = "WP Scaled output Buffer";
49 CODECHAL_ENCODE_CHK_STATUS_MESSAGE_RETURN(m_osInterface->pfnAllocateResource(
50 m_osInterface,
51 &allocParamsForBufferNV12,
52 &m_surfaceParams.weightedPredOutputPicList[m_surfaceParams.wpOutListIdx].OsResource),
53 "Failed to allocate WP Scaled output Buffer.");
54
55 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalGetResourceInfo(m_osInterface,
56 &m_surfaceParams.weightedPredOutputPicList[m_surfaceParams.wpOutListIdx]));
57 }
58
59 return eStatus;
60 }
61
ReleaseResources()62 void CodechalEncodeWP::ReleaseResources()
63 {
64 for (auto i = 0; i < CODEC_NUM_WP_FRAME; i++)
65 {
66 if (!Mos_ResourceIsNull(&m_surfaceParams.weightedPredOutputPicList[i].OsResource))
67 {
68 m_osInterface->pfnFreeResource(
69 m_osInterface,
70 &m_surfaceParams.weightedPredOutputPicList[i].OsResource);
71 }
72 }
73 }
74
GetBTCount()75 uint8_t CodechalEncodeWP::GetBTCount()
76 {
77 return (uint8_t)wpNumSurfaces;
78 }
79
InitKernelState()80 MOS_STATUS CodechalEncodeWP::InitKernelState()
81 {
82 CODECHAL_ENCODE_FUNCTION_ENTER;
83
84 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
85
86 if (!m_kernelState)
87 {
88 CODECHAL_ENCODE_CHK_NULL_RETURN(m_kernelState = MOS_New(MHW_KERNEL_STATE));
89 }
90
91 uint8_t* binary;
92 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalGetKernelBinaryAndSize(
93 m_kernelBase,
94 m_kernelUID,
95 &binary,
96 &m_combinedKernelSize));
97
98 auto kernelSize = m_combinedKernelSize;
99 CODECHAL_KERNEL_HEADER currKrnHeader;
100
101 CODECHAL_ENCODE_CHK_STATUS_RETURN(pfnGetKernelHeaderAndSize(
102 binary,
103 ENC_WP,
104 0,
105 &currKrnHeader,
106 &kernelSize));
107
108 m_kernelState->KernelParams.iBTCount = wpNumSurfaces;
109 m_kernelState->KernelParams.iThreadCount = m_renderInterface->GetHwCaps()->dwMaxThreads;
110 m_kernelState->KernelParams.iCurbeLength = m_curbeLength;
111 m_kernelState->KernelParams.iBlockWidth = CODECHAL_MACROBLOCK_WIDTH;
112 m_kernelState->KernelParams.iBlockHeight = CODECHAL_MACROBLOCK_HEIGHT;
113 m_kernelState->KernelParams.iIdCount = 1;
114 m_kernelState->KernelParams.iInlineDataLength = 0;
115 m_kernelState->dwCurbeOffset = m_stateHeapInterface->GetSizeofCmdInterfaceDescriptorData();
116 m_kernelState->KernelParams.pBinary = binary + (currKrnHeader.KernelStartPointer << MHW_KERNEL_OFFSET_SHIFT);
117 m_kernelState->KernelParams.iSize = kernelSize;
118 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->CalculateSshAndBtSizesRequested(
119 m_kernelState->KernelParams.iBTCount,
120 &m_kernelState->dwSshSize,
121 &m_kernelState->dwBindingTableSize));
122
123 CODECHAL_ENCODE_CHK_NULL_RETURN(m_renderInterface->m_stateHeapInterface);
124 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->MhwInitISH(m_renderInterface->m_stateHeapInterface, m_kernelState));
125
126 return eStatus;
127 }
128
SetCurbe()129 MOS_STATUS CodechalEncodeWP::SetCurbe()
130 {
131 CODECHAL_ENCODE_FUNCTION_ENTER;
132
133 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
134 CurbeData curbe;
135
136 MOS_ZeroMemory(&curbe, sizeof(CurbeData));
137 /* Weights[i][j][k][m] is interpreted as:
138
139 i refers to reference picture list 0 or 1;
140 j refers to reference list entry 0-31;
141 k refers to data for the luma (Y) component when it is 0, the Cb chroma component when it is 1 and the Cr chroma component when it is 2;
142 m refers to weight when it is 0 and offset when it is 1
143 */
144 //C Model hard code log2WeightDenom = 6. No need to send WD paramters to WP Kernel.
145 curbe.DW0.defaultWeight = m_curbeParams.slcParams->weights[m_curbeParams.refPicListIdx][m_curbeParams.wpIdx][0][0];
146 curbe.DW0.defaultOffset = m_curbeParams.slcParams->weights[m_curbeParams.refPicListIdx][m_curbeParams.wpIdx][0][1];
147
148 curbe.DW49.inputSurface = wpInputRefSurface;
149 curbe.DW50.outputSurface = wpOutputScaledSurface;
150
151 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_kernelState->m_dshRegion.AddData(
152 &curbe,
153 m_kernelState->dwCurbeOffset,
154 sizeof(curbe)));
155
156 return eStatus;
157 }
158
SendSurface(PMOS_COMMAND_BUFFER cmdBuffer)159 MOS_STATUS CodechalEncodeWP::SendSurface(PMOS_COMMAND_BUFFER cmdBuffer)
160 {
161 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
162
163 CODECHAL_ENCODE_FUNCTION_ENTER;
164
165 CODECHAL_ENCODE_CHK_NULL_RETURN(cmdBuffer);
166
167 auto currFieldPicture = CodecHal_PictureIsField(m_currOriginalPic);
168 // Program the surface based on current picture's field/frame mode
169 uint32_t refBindingTableOffset;
170 uint32_t refVerticalLineStride;
171 uint32_t refVerticalLineStrideOffset;
172 uint8_t refVDirection;
173 if (currFieldPicture) // if current picture is field
174 {
175 if (m_surfaceParams.refIsBottomField)
176 {
177 refVDirection = CODECHAL_VDIRECTION_BOT_FIELD;
178 refVerticalLineStride = CODECHAL_VLINESTRIDE_FIELD;
179 refVerticalLineStrideOffset = CODECHAL_VLINESTRIDEOFFSET_BOT_FIELD;
180 }
181 else
182 {
183 refVDirection = CODECHAL_VDIRECTION_TOP_FIELD;
184 refVerticalLineStride = CODECHAL_VLINESTRIDE_FIELD;
185 refVerticalLineStrideOffset = CODECHAL_VLINESTRIDEOFFSET_TOP_FIELD;
186 }
187 }
188 else // if current picture is frame
189 {
190 refVDirection = CODECHAL_VDIRECTION_FRAME;
191 refVerticalLineStride = CODECHAL_VLINESTRIDE_FRAME;
192 refVerticalLineStrideOffset = CODECHAL_VLINESTRIDEOFFSET_TOP_FIELD;
193 }
194
195 CODECHAL_SURFACE_CODEC_PARAMS surfaceCodecParams;
196 MOS_ZeroMemory(&surfaceCodecParams, sizeof(surfaceCodecParams));
197 surfaceCodecParams.bIs2DSurface = true;
198 surfaceCodecParams.bMediaBlockRW = true;
199 surfaceCodecParams.psSurface = m_surfaceParams.refFrameInput; // Input surface
200 surfaceCodecParams.bIsWritable = false;
201 surfaceCodecParams.bRenderTarget = false;
202 surfaceCodecParams.dwBindingTableOffset = wpInputRefSurface;
203 surfaceCodecParams.dwCacheabilityControl = m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_WP_DOWNSAMPLED_ENCODE].Value;
204 surfaceCodecParams.dwVerticalLineStride = refVerticalLineStride;
205 surfaceCodecParams.dwVerticalLineStrideOffset = refVerticalLineStrideOffset;
206 surfaceCodecParams.ucVDirection = refVDirection;
207 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
208 m_hwInterface,
209 cmdBuffer,
210 &surfaceCodecParams,
211 m_kernelState));
212
213 MOS_ZeroMemory(&surfaceCodecParams, sizeof(surfaceCodecParams));
214 surfaceCodecParams.bIs2DSurface = true;
215 surfaceCodecParams.bMediaBlockRW = true;
216 surfaceCodecParams.psSurface = &m_surfaceParams.weightedPredOutputPicList[m_surfaceParams.wpOutListIdx]; // output surface
217 surfaceCodecParams.bIsWritable = true;
218 surfaceCodecParams.bRenderTarget = true;
219 surfaceCodecParams.dwBindingTableOffset = wpOutputScaledSurface;
220 surfaceCodecParams.dwCacheabilityControl = m_hwInterface->GetCacheabilitySettings()[MOS_CODEC_RESOURCE_USAGE_SURFACE_WP_DOWNSAMPLED_ENCODE].Value;
221 surfaceCodecParams.dwVerticalLineStride = refVerticalLineStride;
222 surfaceCodecParams.dwVerticalLineStrideOffset = refVerticalLineStrideOffset;
223 surfaceCodecParams.ucVDirection = refVDirection;
224 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalSetRcsSurfaceState(
225 m_hwInterface,
226 cmdBuffer,
227 &surfaceCodecParams,
228 m_kernelState));
229
230 return eStatus;
231 }
232
Execute(KernelParams * params)233 MOS_STATUS CodechalEncodeWP::Execute(KernelParams *params)
234 {
235 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
236
237 CODECHAL_ENCODE_FUNCTION_ENTER;
238
239 CODECHAL_ENCODE_CHK_NULL_RETURN(params);
240
241 if (params->slcWPParams && params->slcWPParams->luma_log2_weight_denom != 6)
242 {
243 eStatus = MOS_STATUS_INVALID_PARAMETER;
244 CODECHAL_ENCODE_ASSERTMESSAGE("Weighted Prediction Kernel does not support Log2LumaWeightDenom != 6!");
245 return eStatus;
246 }
247
248 PerfTagSetting perfTag;
249 CODECHAL_ENCODE_SET_PERFTAG_INFO(perfTag, CODECHAL_ENCODE_PERFTAG_CALL_WP_KERNEL);
250
251 if (params->useRefPicList1)
252 {
253 *(params->useWeightedSurfaceForL1) = true;
254 m_surfaceParams.wpOutListIdx = CODEC_WP_OUTPUT_L1_START + params->wpIndex;
255 }
256 else
257 {
258 *(params->useWeightedSurfaceForL0) = true;
259 m_surfaceParams.wpOutListIdx = CODEC_WP_OUTPUT_L0_START + params->wpIndex;
260 }
261 if (m_surfaceParams.wpOutListIdx >= CODEC_NUM_WP_FRAME)
262 {
263 eStatus = MOS_STATUS_INVALID_PARAMETER;
264 CODECHAL_ENCODE_ASSERTMESSAGE("index exceeds maximum value of array weightedPredOutputPicList.");
265 return eStatus;
266 }
267
268 // Allocate output surface
269 CODECHAL_ENCODE_CHK_STATUS_RETURN(AllocateResources());
270
271 // If Single Task Phase is not enabled, use BT count for the kernel state.
272 if (m_firstTaskInPhase == true || !m_singleTaskPhaseSupported)
273 {
274 auto maxBtCount = m_singleTaskPhaseSupported ?
275 m_maxBtCount : m_kernelState->KernelParams.iBTCount;
276 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->RequestSshSpaceForCmdBuf(maxBtCount));
277 m_vmeStatesSize = m_hwInterface->GetKernelLoadCommandSize(maxBtCount);
278 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_encoder->VerifySpaceAvailable());
279 }
280
281 // setup DSH and Interface Descriptor
282 auto stateHeapInterface = m_renderInterface->m_stateHeapInterface;
283 CODECHAL_ENCODE_CHK_NULL_RETURN(stateHeapInterface);
284 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->AssignDshAndSshSpace(
285 stateHeapInterface,
286 m_kernelState,
287 false,
288 0,
289 false,
290 m_storeData));
291
292 MHW_INTERFACE_DESCRIPTOR_PARAMS idParams;
293 MOS_ZeroMemory(&idParams, sizeof(idParams));
294 idParams.pKernelState = m_kernelState;
295 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->SetInterfaceDescriptor(1, &idParams));
296
297 // Setup Curbe
298 m_curbeParams.refPicListIdx = (params->useRefPicList1) ? LIST_1 : LIST_0;
299 m_curbeParams.wpIdx = params->wpIndex;
300 m_curbeParams.slcParams = params->slcWPParams;
301 CODECHAL_ENCODE_CHK_STATUS_RETURN(SetCurbe());
302
303 auto encFunctionType = CODECHAL_MEDIA_STATE_ENC_WP;
304 CODECHAL_DEBUG_TOOL(
305 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpKernelRegion(
306 encFunctionType,
307 MHW_DSH_TYPE,
308 m_kernelState));
309 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCurbe(
310 encFunctionType,
311 m_kernelState));
312 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpKernelRegion(
313 encFunctionType,
314 MHW_ISH_TYPE,
315 m_kernelState));
316 )
317
318 MOS_COMMAND_BUFFER cmdBuffer;
319 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnGetCommandBuffer(m_osInterface, &cmdBuffer, 0));
320
321 SendKernelCmdsParams sendKernelCmdsParams = SendKernelCmdsParams();
322 sendKernelCmdsParams.EncFunctionType = encFunctionType;
323 sendKernelCmdsParams.pKernelState = m_kernelState;
324 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_encoder->SendGenericKernelCmds(&cmdBuffer, &sendKernelCmdsParams));
325
326 // add binding table
327 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->SetBindingTable(m_kernelState));
328
329 (params->useRefPicList1) ? (*params->useWeightedSurfaceForL1 = true) : (*params->useWeightedSurfaceForL0 = true);
330 CodecHalGetResourceInfo(m_osInterface, params->refFrameInput);
331
332 //Set Surface States
333 m_surfaceParams.refFrameInput = params->refFrameInput;
334 m_surfaceParams.refIsBottomField = params->refIsBottomField;
335 CODECHAL_ENCODE_CHK_STATUS_RETURN(SendSurface(&cmdBuffer));
336
337 CODECHAL_DEBUG_TOOL(
338 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpKernelRegion(
339 encFunctionType,
340 MHW_SSH_TYPE,
341 m_kernelState));
342 )
343
344 // Thread Dispatch Pattern - MEDIA OBJECT WALKER
345 if (m_hwWalker)
346 {
347 auto resolutionX = CODECHAL_GET_WIDTH_IN_MACROBLOCKS(m_frameWidth);
348 auto resolutionY = CODECHAL_GET_HEIGHT_IN_MACROBLOCKS(m_frameFieldHeight);
349
350 CODECHAL_WALKER_CODEC_PARAMS walkerCodecParams;
351 MOS_ZeroMemory(&walkerCodecParams, sizeof(walkerCodecParams));
352 walkerCodecParams.WalkerMode = m_walkerMode;
353 walkerCodecParams.bUseScoreboard = m_useHwScoreboard;
354 walkerCodecParams.dwResolutionX = resolutionX;
355 walkerCodecParams.dwResolutionY = resolutionY;
356 walkerCodecParams.bGroupIdSelectSupported = m_groupIdSelectSupported;
357 walkerCodecParams.ucGroupId = m_groupId;
358 walkerCodecParams.bNoDependency = true;
359
360 MHW_WALKER_PARAMS walkerParams;
361 CODECHAL_ENCODE_CHK_STATUS_RETURN(CodecHalInitMediaObjectWalkerParams(m_hwInterface, &walkerParams, &walkerCodecParams));
362
363 HalOcaInterface::TraceMessage(cmdBuffer, (MOS_CONTEXT_HANDLE)m_osInterface->pOsContext, __FUNCTION__, sizeof(__FUNCTION__));
364 HalOcaInterface::OnDispatch(cmdBuffer, *m_osInterface, *m_miInterface, *m_renderInterface->GetMmioRegisters());
365
366 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_renderInterface->AddMediaObjectWalkerCmd(&cmdBuffer, &walkerParams));
367 }
368
369 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_encoder->EndStatusReport(&cmdBuffer, encFunctionType));
370
371 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->SubmitBlocks(m_kernelState));
372 if (!m_singleTaskPhaseSupported || m_lastTaskInPhase)
373 {
374 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_stateHeapInterface->UpdateGlobalCmdBufId());
375 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferEnd(&cmdBuffer, nullptr));
376 }
377
378 CODECHAL_DEBUG_TOOL(CODECHAL_ENCODE_CHK_STATUS_RETURN(m_debugInterface->DumpCmdBuffer(
379 &cmdBuffer,
380 encFunctionType,
381 nullptr)));
382
383 CODECHAL_ENCODE_CHK_STATUS_RETURN(m_hwInterface->UpdateSSEuForCmdBuffer(
384 &cmdBuffer, m_singleTaskPhaseSupported, m_lastTaskInPhase));
385
386 m_osInterface->pfnReturnCommandBuffer(m_osInterface, &cmdBuffer, 0);
387
388 if (!m_singleTaskPhaseSupported || m_lastTaskInPhase)
389 {
390 HalOcaInterface::On1stLevelBBEnd(cmdBuffer, *m_osInterface);
391 m_osInterface->pfnSubmitCommandBuffer(m_osInterface, &cmdBuffer, m_renderContextUsesNullHw);
392 m_lastTaskInPhase = false;
393 }
394
395 return eStatus;
396 }
397
CodechalEncodeWP(CodechalEncoderState * encoder)398 CodechalEncodeWP::CodechalEncodeWP(CodechalEncoderState *encoder)
399 : m_useHwScoreboard(encoder->m_useHwScoreboard),
400 m_renderContextUsesNullHw(encoder->m_renderContextUsesNullHw),
401 m_groupIdSelectSupported(encoder->m_groupIdSelectSupported),
402 m_singleTaskPhaseSupported(encoder->m_singleTaskPhaseSupported),
403 m_firstTaskInPhase(encoder->m_firstTaskInPhase),
404 m_lastTaskInPhase(encoder->m_lastTaskInPhase),
405 m_hwWalker(encoder->m_hwWalker),
406 m_groupId(encoder->m_groupId),
407 m_pictureCodingType(encoder->m_pictureCodingType),
408 m_mode(encoder->m_mode),
409 m_verticalLineStride(encoder->m_verticalLineStride),
410 m_maxBtCount(encoder->m_maxBtCount),
411 m_vmeStatesSize(encoder->m_vmeStatesSize),
412 m_storeData(encoder->m_storeData),
413 m_frameWidth(encoder->m_frameWidth),
414 m_frameHeight(encoder->m_frameHeight),
415 m_frameFieldHeight(encoder->m_frameFieldHeight),
416 m_currOriginalPic(encoder->m_currOriginalPic),
417 m_walkerMode(encoder->m_walkerMode)
418 {
419 CODECHAL_ENCODE_CHK_NULL_NO_STATUS_RETURN(encoder);
420
421 // Initilize interface pointers
422 m_encoder = encoder;
423 m_osInterface = encoder->GetOsInterface();
424 m_hwInterface = encoder->GetHwInterface();
425 m_debugInterface = encoder->GetDebugInterface();
426 m_miInterface = m_hwInterface->GetMiInterface();
427 m_renderInterface = m_hwInterface->GetRenderInterface();
428 m_stateHeapInterface = m_renderInterface->m_stateHeapInterface->pStateHeapInterface;
429 m_curbeLength = sizeof(CurbeData);
430 }
431
~CodechalEncodeWP()432 CodechalEncodeWP::~CodechalEncodeWP()
433 {
434 // free weighted prediction surface
435 ReleaseResources();
436
437 MOS_Delete(m_kernelState);
438 m_kernelState = nullptr;
439 }