1 /*
2 * Copyright (c) 2021, Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22 //!
23 //! \file encode_hevc_vdenc_packet.cpp
24 //! \brief Defines the interface for hevc encode vdenc packet
25 //!
26 #include "encode_hevc_vdenc_packet.h"
27 #include "mos_solo_generic.h"
28 #include "encode_vdenc_lpla_analysis.h"
29 #include "encode_hevc_vdenc_weighted_prediction.h"
30 #include "mhw_mi_itf.h"
31 #include "media_perf_profiler.h"
32 #include "codec_hw_next.h"
33 #include "hal_oca_interface_next.h"
34
35 using namespace mhw::vdbox;
36
37 namespace encode
38 {
AllocateResources()39 MOS_STATUS HevcVdencPkt::AllocateResources()
40 {
41 ENCODE_FUNC_CALL();
42
43 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
44
45 ENCODE_CHK_NULL_RETURN(m_allocator);
46 MOS_ALLOC_GFXRES_PARAMS allocParamsForBufferLinear;
47 MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
48 allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
49 allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
50 allocParamsForBufferLinear.Format = Format_Buffer;
51
52 allocParamsForBufferLinear.dwBytes = MOS_ROUNDUP_DIVIDE(m_basicFeature->m_frameWidth, m_basicFeature->m_maxLCUSize) * CODECHAL_CACHELINE_SIZE * 2 * 2;
53 allocParamsForBufferLinear.pBufName = "vdencIntraRowStoreScratch";
54 allocParamsForBufferLinear.ResUsageType = MOS_HW_RESOURCE_USAGE_ENCODE_INTERNAL_READ;
55 m_vdencIntraRowStoreScratch = m_allocator->AllocateResource(allocParamsForBufferLinear, false);
56
57 MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
58 allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
59 allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
60 allocParamsForBufferLinear.Format = Format_Buffer;
61
62 // VDENC tile row store buffer
63 allocParamsForBufferLinear.dwBytes = MOS_ROUNDUP_DIVIDE(m_basicFeature->m_frameWidth, 32) * CODECHAL_CACHELINE_SIZE * 2;
64 allocParamsForBufferLinear.pBufName = "VDENC Tile Row Store Buffer";
65 allocParamsForBufferLinear.ResUsageType = MOS_HW_RESOURCE_USAGE_ENCODE_INTERNAL_READ;
66 m_vdencTileRowStoreBuffer = m_allocator->AllocateResource(allocParamsForBufferLinear,false);
67
68 hcp::HcpBufferSizePar hcpBufSizePar;
69 MOS_ZeroMemory(&hcpBufSizePar, sizeof(hcpBufSizePar));
70
71 hcpBufSizePar.ucMaxBitDepth = m_basicFeature->m_bitDepth;
72 hcpBufSizePar.ucChromaFormat = m_basicFeature->m_chromaFormat;
73 // We should move the buffer allocation to picture level if the size is dependent on LCU size
74 hcpBufSizePar.dwCtbLog2SizeY = 6; //assume Max LCU size
75 hcpBufSizePar.dwPicWidth = MOS_ALIGN_CEIL(m_basicFeature->m_frameWidth, m_basicFeature->m_maxLCUSize);
76 hcpBufSizePar.dwPicHeight = MOS_ALIGN_CEIL(m_basicFeature->m_frameHeight, m_basicFeature->m_maxLCUSize);
77
78 auto AllocateHcpBuffer = [&](PMOS_RESOURCE &res, const hcp::HCP_INTERNAL_BUFFER_TYPE bufferType, const char *bufferName) {
79 uint32_t bufSize = 0;
80 hcpBufSizePar.bufferType = bufferType;
81 eStatus = m_hcpItf->GetHcpBufSize(hcpBufSizePar, bufSize);
82 if (eStatus != MOS_STATUS_SUCCESS)
83 {
84 ENCODE_ASSERTMESSAGE("Failed to get hcp buffer size.");
85 return eStatus;
86 }
87 allocParamsForBufferLinear.dwBytes = bufSize;
88 allocParamsForBufferLinear.pBufName = bufferName;
89 allocParamsForBufferLinear.ResUsageType = MOS_HW_RESOURCE_USAGE_ENCODE_INTERNAL_READ;
90 res = m_allocator->AllocateResource(allocParamsForBufferLinear, false);
91 return MOS_STATUS_SUCCESS;
92 };
93
94 // Metadata Line buffer
95 ENCODE_CHK_STATUS_RETURN(AllocateHcpBuffer(m_resMetadataLineBuffer, hcp::HCP_INTERNAL_BUFFER_TYPE::META_LINE, "MetadataLineBuffer"));
96 // Metadata Tile Line buffer
97 ENCODE_CHK_STATUS_RETURN(AllocateHcpBuffer(m_resMetadataTileLineBuffer, hcp::HCP_INTERNAL_BUFFER_TYPE::META_TILE_LINE, "MetadataTileLineBuffer"));
98 // Metadata Tile Column buffer
99 ENCODE_CHK_STATUS_RETURN(AllocateHcpBuffer(m_resMetadataTileColumnBuffer, hcp::HCP_INTERNAL_BUFFER_TYPE::META_TILE_COL, "MetadataTileColumnBuffer"));
100
101 // Lcu ILDB StreamOut buffer
102 // This is not enabled with HCP_PIPE_MODE_SELECT yet, placeholder here
103 allocParamsForBufferLinear.dwBytes = CODECHAL_CACHELINE_SIZE;
104 allocParamsForBufferLinear.pBufName = "LcuILDBStreamOutBuffer";
105 allocParamsForBufferLinear.ResUsageType = MOS_HW_RESOURCE_USAGE_ENCODE_INTERNAL_READ;
106 m_resLCUIldbStreamOutBuffer = m_allocator->AllocateResource(allocParamsForBufferLinear,false);
107
108 // Allocate SSE Source Pixel Row Store Buffer
109 uint32_t maxTileColumns = MOS_ROUNDUP_DIVIDE(m_basicFeature->m_frameWidth, CODECHAL_HEVC_MIN_TILE_SIZE);
110 allocParamsForBufferLinear.dwBytes = 2 * m_basicFeature->m_sizeOfSseSrcPixelRowStoreBufferPerLcu * (m_basicFeature->m_widthAlignedMaxLCU + 3 * maxTileColumns);
111 allocParamsForBufferLinear.pBufName = "SseSrcPixelRowStoreBuffer";
112 allocParamsForBufferLinear.ResUsageType = MOS_HW_RESOURCE_USAGE_ENCODE_INTERNAL_READ;
113 m_resSSESrcPixelRowStoreBuffer = m_allocator->AllocateResource(allocParamsForBufferLinear,false);
114
115 uint32_t frameWidthInCus = CODECHAL_GET_WIDTH_IN_BLOCKS(m_basicFeature->m_frameWidth, CODECHAL_HEVC_MIN_CU_SIZE);
116 uint32_t frameHeightInCus = CODECHAL_GET_WIDTH_IN_BLOCKS(m_basicFeature->m_frameHeight, CODECHAL_HEVC_MIN_CU_SIZE);
117 uint32_t frameWidthInLCUs = CODECHAL_GET_WIDTH_IN_BLOCKS(m_basicFeature->m_frameWidth, CODECHAL_HEVC_VDENC_LCU_SIZE);
118 uint32_t frameHeightInLCUs = CODECHAL_GET_WIDTH_IN_BLOCKS(m_basicFeature->m_frameHeight, CODECHAL_HEVC_VDENC_LCU_SIZE);
119 // PAK CU Level Streamout Data: DW57-59 in HCP pipe buffer address command
120 // One CU has 16-byte. But, each tile needs to be aliged to the cache line
121 auto size = MOS_ALIGN_CEIL(frameWidthInCus * frameHeightInCus * 16, CODECHAL_CACHELINE_SIZE);
122 allocParamsForBufferLinear.dwBytes = size;
123 allocParamsForBufferLinear.pBufName = "PAK CU Level Streamout Data";
124 allocParamsForBufferLinear.ResUsageType = MOS_HW_RESOURCE_USAGE_ENCODE_INTERNAL_READ;
125 m_resPakcuLevelStreamOutData = m_allocator->AllocateResource(allocParamsForBufferLinear,false);
126
127 MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
128 allocParamsForBufferLinear.Format = Format_Buffer;
129 allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
130 allocParamsForBufferLinear.dwBytes = frameWidthInLCUs * frameHeightInLCUs * 4;
131 allocParamsForBufferLinear.pBufName = "VDEnc Cumulative CU Count Streamout Surface";
132 m_resCumulativeCuCountStreamoutBuffer = m_allocator->AllocateResource(allocParamsForBufferLinear, false);
133
134 if(m_osInterface->bInlineCodecStatusUpdate)
135 {
136 m_atomicScratchBuf.size = MOS_ALIGN_CEIL(sizeof(AtomicScratchBuffer), sizeof(uint64_t));
137 allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
138 allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
139 allocParamsForBufferLinear.Format = Format_Buffer;
140 allocParamsForBufferLinear.ResUsageType = MOS_HW_RESOURCE_USAGE_ENCODE_INTERNAL_READ_WRITE_CACHE;
141 size = MHW_CACHELINE_SIZE * 4 * 2; // each set of scratch is 4 cacheline size, and allocate 2 set.
142 allocParamsForBufferLinear.dwBytes = size;
143 allocParamsForBufferLinear.pBufName = "atomic sratch buffer";
144
145 if (MEDIA_IS_SKU(m_hwInterface->GetSkuTable(), FtrLocalMemory))
146 {
147 allocParamsForBufferLinear.dwMemType = MOS_MEMPOOL_DEVICEMEMORY;
148 }
149 else
150 {
151 allocParamsForBufferLinear.dwMemType = MOS_MEMPOOL_VIDEOMEMORY;
152 }
153
154 m_atomicScratchBuf.resAtomicScratchBuffer = m_allocator->AllocateResource(allocParamsForBufferLinear, false);
155
156 ENCODE_CHK_NULL_RETURN(m_atomicScratchBuf.resAtomicScratchBuffer);
157
158 m_atomicScratchBuf.size = size;
159 m_atomicScratchBuf.zeroValueOffset = 0;
160 m_atomicScratchBuf.operand1Offset = MHW_CACHELINE_SIZE;
161 m_atomicScratchBuf.operand2Offset = MHW_CACHELINE_SIZE * 2;
162 m_atomicScratchBuf.operand3Offset = MHW_CACHELINE_SIZE * 3;
163 m_atomicScratchBuf.encodeUpdateIndex = 0;
164 m_atomicScratchBuf.tearDownIndex = 1;
165 m_atomicScratchBuf.operandSetSize = MHW_CACHELINE_SIZE * 4;
166 }
167
168 #if USE_CODECHAL_DEBUG_TOOL && _ENCODE_RESERVED
169 m_hevcParDump = std::make_shared<HevcVdencParDump>(m_pipeline);
170 #endif // USE_CODECHAL_DEBUG_TOOL
171
172 return eStatus;
173 }
174
Prepare()175 MOS_STATUS HevcVdencPkt::Prepare()
176 {
177 ENCODE_FUNC_CALL();
178
179 m_pictureStatesSize = m_defaultPictureStatesSize;
180 m_picturePatchListSize = m_defaultPicturePatchListSize;
181 m_sliceStatesSize = m_defaultSliceStatesSize;
182 m_slicePatchListSize = m_defaultSlicePatchListSize;
183
184 HevcPipeline *pipeline = dynamic_cast<HevcPipeline *>(m_pipeline);
185 ENCODE_CHK_NULL_RETURN(pipeline);
186
187 m_hevcSeqParams = m_basicFeature->m_hevcSeqParams;
188 m_hevcPicParams = m_basicFeature->m_hevcPicParams;
189 m_hevcSliceParams = m_basicFeature->m_hevcSliceParams;
190 m_hevcIqMatrixParams = m_basicFeature->m_hevcIqMatrixParams;
191 m_nalUnitParams = m_basicFeature->m_nalUnitParams;
192
193 ENCODE_CHK_STATUS_RETURN(ValidateVdboxIdx(m_vdboxIndex));
194
195 m_pakOnlyPass = false;
196
197 ENCODE_CHK_STATUS_RETURN(SetBatchBufferForPakSlices());
198
199 SetRowstoreCachingOffsets();
200
201 RUN_FEATURE_INTERFACE_RETURN(HevcEncodeTile, HevcFeatureIDs::encodeTile, SetPipeNumber, m_pipeline->GetPipeNum());
202
203 return MOS_STATUS_SUCCESS;
204 }
205
SubmitPictureLevel(MOS_COMMAND_BUFFER * commandBuffer,uint8_t packetPhase)206 MOS_STATUS HevcVdencPkt::SubmitPictureLevel(
207 MOS_COMMAND_BUFFER* commandBuffer,
208 uint8_t packetPhase)
209 {
210 ENCODE_FUNC_CALL();
211
212 MOS_COMMAND_BUFFER& cmdBuffer = *commandBuffer;
213 ENCODE_CHK_STATUS_RETURN(Mos_Solo_PreProcessEncode(m_osInterface, &m_basicFeature->m_resBitstreamBuffer, &m_basicFeature->m_reconSurface));
214
215 // Ensure the input is ready to be read.
216 // Currently, mos RegisterResource has sync limitation for Raw resource.
217 // Temporaly, call Resource Wait to do the sync explicitly.
218 if(m_pipeline->IsFirstPass())
219 {
220 MOS_SYNC_PARAMS syncParams;
221 syncParams = g_cInitSyncParams;
222 syncParams.GpuContext = m_osInterface->pfnGetGpuContext(m_osInterface);
223 syncParams.presSyncResource = &m_basicFeature->m_rawSurface.OsResource;
224 syncParams.bReadOnly = true;
225 ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnResourceWait(m_osInterface, &syncParams));
226 m_osInterface->pfnSetResourceSyncTag(m_osInterface, &syncParams);
227 }
228
229 ENCODE_CHK_STATUS_RETURN(PatchPictureLevelCommands(packetPhase, cmdBuffer));
230
231 return MOS_STATUS_SUCCESS;
232 }
233
SubmitTileLevel(MOS_COMMAND_BUFFER * commandBuffer,uint8_t packetPhase)234 MOS_STATUS HevcVdencPkt::SubmitTileLevel(
235 MOS_COMMAND_BUFFER* commandBuffer,
236 uint8_t packetPhase)
237 {
238 ENCODE_FUNC_CALL();
239 auto eStatus = MOS_STATUS_SUCCESS;
240
241 if (!m_hevcPicParams->tiles_enabled_flag)
242 {
243 return MOS_STATUS_INVALID_PARAMETER;
244 }
245
246 MOS_COMMAND_BUFFER& cmdBuffer = *commandBuffer;
247
248 ENCODE_CHK_STATUS_RETURN(Construct3rdLevelBatch());
249
250 uint16_t numTileColumns = 1;
251 uint16_t numTileRows = 1;
252 RUN_FEATURE_INTERFACE_RETURN(HevcEncodeTile, HevcFeatureIDs::encodeTile, GetTileRowColumns, numTileRows, numTileColumns);
253
254 if (m_pipeline->GetPipeNum() == 2)
255 {
256 ENCODE_CHK_STATUS_RETURN(AddOneTileCommands(
257 cmdBuffer,
258 m_pipeline->GetCurrentRow(),
259 m_pipeline->GetCurrentPipe(),
260 m_pipeline->GetCurrentSubPass()));
261 }
262 else
263 {
264 for (uint16_t tileCol = 0; tileCol < numTileColumns; tileCol++)
265 {
266 ENCODE_CHK_STATUS_RETURN(AddOneTileCommands(
267 cmdBuffer,
268 m_pipeline->GetCurrentRow(),
269 tileCol,
270 m_pipeline->GetCurrentPass()));
271 }
272 }
273
274 // Insert end of sequence/stream if set
275 if ((m_basicFeature->m_lastPicInSeq || m_basicFeature->m_lastPicInStream) && m_pipeline->IsLastPipe())
276 {
277 ENCODE_CHK_STATUS_RETURN(InsertSeqStreamEnd(cmdBuffer));
278 }
279
280 if (m_pipeline->GetCurrentRow() == (numTileRows - 1))
281 {
282 // Send VD_CONTROL_STATE (Memory Implict Flush)
283 auto &vdControlStateParams = m_miItf->MHW_GETPAR_F(VD_CONTROL_STATE)();
284 vdControlStateParams = {};
285 vdControlStateParams.memoryImplicitFlush = true;
286 ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(VD_CONTROL_STATE)(&cmdBuffer));
287
288 m_flushCmd = waitHevc;
289 SETPAR_AND_ADDCMD(VD_PIPELINE_FLUSH, m_vdencItf, &cmdBuffer);
290
291 ENCODE_CHK_STATUS_RETURN(EnsureAllCommandsExecuted(cmdBuffer));
292
293 // Wait all pipe cmds done for the packet
294 auto scalability = m_pipeline->GetMediaScalability();
295 ENCODE_CHK_STATUS_RETURN(scalability->SyncPipe(syncOnePipeWaitOthers, 0, &cmdBuffer));
296
297 // Store PAK frameSize MMIO to PakInfo buffer
298 auto mmioRegisters = m_hcpItf->GetMmioRegisters(m_vdboxIndex);
299 auto &miStoreRegMemParams = m_miItf->MHW_GETPAR_F(MI_STORE_REGISTER_MEM)();
300 miStoreRegMemParams = {};
301 miStoreRegMemParams.presStoreBuffer = m_basicFeature->m_recycleBuf->GetBuffer(PakInfo, 0);;
302 miStoreRegMemParams.dwOffset = 0;
303 miStoreRegMemParams.dwRegister = mmioRegisters->hcpEncBitstreamBytecountFrameRegOffset;
304 ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_STORE_REGISTER_MEM)(&cmdBuffer));
305
306 ENCODE_CHK_STATUS_RETURN(Mos_Solo_PostProcessEncode(m_osInterface, &m_basicFeature->m_resBitstreamBuffer, &m_basicFeature->m_reconSurface));
307 }
308 // post-operations are done by pak integrate pkt
309
310 return MOS_STATUS_SUCCESS;
311 }
312
Submit(MOS_COMMAND_BUFFER * commandBuffer,uint8_t packetPhase)313 MOS_STATUS HevcVdencPkt::Submit(
314 MOS_COMMAND_BUFFER* commandBuffer,
315 uint8_t packetPhase)
316 {
317 ENCODE_FUNC_CALL();
318
319 if (m_submitState == submitFrameByDefault)
320 {
321 ENCODE_CHK_STATUS_RETURN(SubmitPictureLevel(commandBuffer, packetPhase));
322
323 MOS_COMMAND_BUFFER& cmdBuffer = *commandBuffer;
324
325 bool tileEnabled = false;
326 RUN_FEATURE_INTERFACE_RETURN(HevcEncodeTile, HevcFeatureIDs::encodeTile, IsEnabled, tileEnabled);
327 if (!tileEnabled)
328 {
329 ENCODE_CHK_STATUS_RETURN(PatchSliceLevelCommands(cmdBuffer, packetPhase));
330 }
331 else
332 {
333 ENCODE_CHK_STATUS_RETURN(PatchTileLevelCommands(cmdBuffer, packetPhase));
334 }
335
336 ENCODE_CHK_STATUS_RETURN(Mos_Solo_PostProcessEncode(m_osInterface, &m_basicFeature->m_resBitstreamBuffer, &m_basicFeature->m_reconSurface));
337 }
338 else
339 {
340 switch (m_submitState)
341 {
342 case submitPic:
343 {
344 ENCODE_CHK_STATUS_RETURN(SubmitPictureLevel(commandBuffer, packetPhase));
345 m_submitState = submitInvalid;
346 break;
347 };
348 case submitTile:
349 {
350 ENCODE_FUNC_CALL();
351
352 bool tileEnabled = false;
353 RUN_FEATURE_INTERFACE_RETURN(HevcEncodeTile, HevcFeatureIDs::encodeTile, IsEnabled, tileEnabled);
354 ENCODE_CHK_STATUS_RETURN(SubmitTileLevel(commandBuffer, packetPhase));
355
356 m_submitState = submitInvalid;
357 break;
358 };
359 default:
360 m_submitState = submitInvalid;
361 break;
362 }
363 }
364
365 m_enableVdencStatusReport = true;
366
367 #if USE_CODECHAL_DEBUG_TOOL && _ENCODE_RESERVED
368 m_hevcParDump->SetParFile();
369 ENCODE_CHK_STATUS_RETURN(DumpInput());
370 #endif
371
372 return MOS_STATUS_SUCCESS;
373 }
374
PatchPictureLevelCommands(const uint8_t & packetPhase,MOS_COMMAND_BUFFER & cmdBuffer)375 MOS_STATUS HevcVdencPkt::PatchPictureLevelCommands(const uint8_t &packetPhase, MOS_COMMAND_BUFFER &cmdBuffer)
376 {
377 ENCODE_FUNC_CALL();
378
379 cmdBuffer.Attributes.bFrequencyBoost = (m_basicFeature->m_hevcSeqParams->ScenarioInfo == ESCENARIO_REMOTEGAMING);
380
381 ENCODE_CHK_STATUS_RETURN(m_miItf->SetWatchdogTimerThreshold(m_basicFeature->m_frameWidth, m_basicFeature->m_frameHeight, true));
382
383 SetPerfTag();
384
385 auto feature = dynamic_cast<HEVCEncodeBRC*>(m_featureManager->GetFeature(HevcFeatureIDs::hevcBrcFeature));
386 ENCODE_CHK_NULL_RETURN(feature);
387 bool firstTaskInPhase = packetPhase & firstPacket;
388 if (!m_pipeline->IsSingleTaskPhaseSupported() || firstTaskInPhase)//(m_pipeline->IsFirstPass() && !feature->IsVdencHucUsed())) && m_pipeline->GetPipeNum() == 1) || m_pipeline->GetPipeNum() >= 2)
389 {
390 ENCODE_CHK_STATUS_RETURN(AddForceWakeup(cmdBuffer));
391
392 // Send command buffer header at the beginning (OS dependent)
393 ENCODE_CHK_STATUS_RETURN(SendPrologCmds(cmdBuffer));
394 }
395
396 if (m_pipeline->GetPipeNum() >= 2)
397 {
398 auto scalability = m_pipeline->GetMediaScalability();
399 if (m_pipeline->IsFirstPass())
400 {
401 // Reset multi-pipe sync semaphores
402 ENCODE_CHK_STATUS_RETURN(scalability->ResetSemaphore(syncOnePipeWaitOthers, m_pipeline->GetCurrentPipe(), &cmdBuffer));
403 }
404
405 // For brc case, other pipes wait for BRCupdate done on first pipe
406 // For cqp case, pipes also need sync
407 ENCODE_CHK_STATUS_RETURN(scalability->SyncPipe(syncOtherPipesForOne, 0, &cmdBuffer));
408 }
409
410 m_streamInEnabled = false;
411 RUN_FEATURE_INTERFACE_RETURN(VdencLplaAnalysis, HevcFeatureIDs::vdencLplaAnalysisFeature,
412 EnableStreamIn, m_pipeline->IsFirstPass(), m_pipeline->IsLastPass(), m_streamInEnabled);
413
414 ENCODE_CHK_STATUS_RETURN(AddCondBBEndForLastPass(cmdBuffer));
415
416 if (m_pipeline->IsFirstPipe())
417 {
418 ENCODE_CHK_STATUS_RETURN(StartStatusReport(statusReportMfx, &cmdBuffer));
419 }
420
421 MediaPerfProfiler *perfProfiler = MediaPerfProfiler::Instance();
422 ENCODE_CHK_NULL_RETURN(perfProfiler);
423 ENCODE_CHK_STATUS_RETURN(perfProfiler->AddPerfCollectStartCmd(
424 (void *)m_pipeline, m_osInterface, m_miItf, &cmdBuffer));
425
426 ENCODE_CHK_STATUS_RETURN(AddPictureHcpCommands(cmdBuffer));
427
428 ENCODE_CHK_STATUS_RETURN(AddPictureVdencCommands(cmdBuffer));
429
430 ENCODE_CHK_STATUS_RETURN(AddPicStateWithNoTile(cmdBuffer));
431
432 return MOS_STATUS_SUCCESS;
433 }
434
PatchSliceLevelCommands(MOS_COMMAND_BUFFER & cmdBuffer,uint8_t packetPhase)435 MOS_STATUS HevcVdencPkt::PatchSliceLevelCommands(MOS_COMMAND_BUFFER &cmdBuffer, uint8_t packetPhase)
436 {
437 ENCODE_FUNC_CALL();
438
439 if (m_hevcPicParams->tiles_enabled_flag)
440 {
441 return MOS_STATUS_SUCCESS;
442 }
443
444 auto feature = dynamic_cast<HEVCEncodeBRC*>(m_featureManager->GetFeature(HevcFeatureIDs::hevcBrcFeature));
445 ENCODE_CHK_NULL_RETURN(feature);
446 auto vdenc2ndLevelBatchBuffer = feature->GetVdenc2ndLevelBatchBuffer(m_pipeline->m_currRecycledBufIdx);
447
448 // starting location for executing slice level cmds
449 vdenc2ndLevelBatchBuffer->dwOffset = m_hwInterface->m_vdencBatchBuffer1stGroupSize + m_hwInterface->m_vdencBatchBuffer2ndGroupSize;
450
451 PCODEC_ENCODER_SLCDATA slcData = m_basicFeature->m_slcData;
452 for (uint32_t startLcu = 0, slcCount = 0; slcCount < m_basicFeature->m_numSlices; slcCount++)
453 {
454 if (m_pipeline->IsFirstPass())
455 {
456 slcData[slcCount].CmdOffset = startLcu * (m_hcpItf->GetHcpPakObjSize()) * sizeof(uint32_t);
457 }
458 m_basicFeature->m_curNumSlices = slcCount;
459
460 ENCODE_CHK_STATUS_RETURN(SendHwSliceEncodeCommand(nullptr, slcCount, cmdBuffer));
461
462 startLcu += m_hevcSliceParams[slcCount].NumLCUsInSlice;
463
464 m_batchBufferForPakSlicesStartOffset = (uint32_t)m_batchBufferForPakSlices[m_basicFeature->m_currPakSliceIdx].iCurrent;
465 if (feature->IsACQPEnabled() || feature->IsBRCEnabled())
466 {
467 // save offset for next 2nd level batch buffer usage
468 // This is because we don't know how many times HCP_WEIGHTOFFSET_STATE & HCP_PAK_INSERT_OBJECT will be inserted for each slice
469 // dwVdencBatchBufferPerSliceConstSize: constant size for each slice
470 // m_vdencBatchBufferPerSliceVarSize: variable size for each slice
471 vdenc2ndLevelBatchBuffer->dwOffset += m_hwInterface->m_vdencBatchBufferPerSliceConstSize + m_basicFeature->m_vdencBatchBufferPerSliceVarSize[slcCount];
472 }
473
474 m_flushCmd = waitVdenc;
475 SETPAR_AND_ADDCMD(VD_PIPELINE_FLUSH, m_vdencItf, &cmdBuffer);
476 }
477
478 if (m_useBatchBufferForPakSlices)
479 {
480 ENCODE_CHK_STATUS_RETURN(Mhw_UnlockBb(
481 m_osInterface,
482 &m_batchBufferForPakSlices[m_basicFeature->m_currPakSliceIdx],
483 m_lastTaskInPhase));
484 }
485
486 // Insert end of sequence/stream if set
487 if (m_basicFeature->m_lastPicInSeq || m_basicFeature->m_lastPicInStream)
488 {
489 ENCODE_CHK_STATUS_RETURN(InsertSeqStreamEnd(cmdBuffer));
490 }
491
492 ENCODE_CHK_STATUS_RETURN(EnsureAllCommandsExecuted(cmdBuffer));
493
494 m_flushCmd = waitHevc;
495 SETPAR_AND_ADDCMD(VD_PIPELINE_FLUSH, m_vdencItf, &cmdBuffer);
496
497 ENCODE_CHK_STATUS_RETURN(EnsureAllCommandsExecuted(cmdBuffer));
498
499 ENCODE_CHK_STATUS_RETURN(ReadHcpStatus(m_vdboxIndex, m_statusReport, cmdBuffer));
500 // BRC PAK statistics different for each pass
501 if (feature->IsBRCEnabled())
502 {
503 uint8_t ucPass = (uint8_t)m_pipeline->GetCurrentPass();
504 EncodeReadBrcPakStatsParams readBrcPakStatsParams;
505 MOS_RESOURCE * osResource = nullptr;
506 uint32_t offset = 0;
507 m_statusReport->GetAddress(statusReportNumberPasses, osResource, offset);
508 RUN_FEATURE_INTERFACE_RETURN(HEVCEncodeBRC, HevcFeatureIDs::hevcBrcFeature, SetReadBrcPakStatsParams, ucPass, offset, osResource, readBrcPakStatsParams);
509 ReadBrcPakStatistics(&cmdBuffer, &readBrcPakStatsParams);
510 }
511 ENCODE_CHK_STATUS_RETURN(ReadExtStatistics(cmdBuffer));
512 ENCODE_CHK_STATUS_RETURN(ReadSliceSize(cmdBuffer));
513 ENCODE_CHK_STATUS_RETURN(PrepareHWMetaData(&cmdBuffer));
514 RUN_FEATURE_INTERFACE_RETURN(VdencLplaAnalysis, HevcFeatureIDs::vdencLplaAnalysisFeature, StoreLookaheadStatistics, cmdBuffer, m_vdboxIndex);
515
516 #if USE_CODECHAL_DEBUG_TOOL
517 uint32_t sizeInByte = 0;
518 bool isIframe = m_basicFeature->m_pictureCodingType == I_TYPE;
519 ENCODE_CHK_NULL_RETURN(m_packetUtilities);
520 if (m_packetUtilities->GetFakeHeaderSettings(sizeInByte, isIframe))
521 {
522 ENCODE_CHK_NULL_RETURN(m_basicFeature->m_recycleBuf);
523 ENCODE_CHK_STATUS_RETURN(m_packetUtilities->ModifyEncodedFrameSizeWithFakeHeaderSize(
524 &cmdBuffer,
525 sizeInByte,
526 m_basicFeature->m_recycleBuf->GetBuffer(PakInfo, 0),
527 0,
528 m_basicFeature->m_recycleBuf->GetBuffer(FrameStatStreamOutBuffer, 0),
529 sizeof(uint32_t) * 4));
530 }
531 #endif
532
533 ENCODE_CHK_STATUS_RETURN(EndStatusReport(statusReportMfx, &cmdBuffer));
534
535 if (Mos_Solo_Extension((MOS_CONTEXT_HANDLE)m_osInterface->pOsContext))
536 {
537 if (m_pipeline->IsLastPass() && m_pipeline->IsFirstPipe())
538 {
539 MediaPacket::UpdateStatusReportNext(statusReportGlobalCount, &cmdBuffer);
540 }
541 }
542 else if (m_osInterface->bInlineCodecStatusUpdate
543 && !(m_basicFeature->m_422State && m_basicFeature->m_422State->GetFeature422Flag())
544 )
545 {
546 if (feature->IsBRCEnabled())
547 {
548 ENCODE_CHK_STATUS_RETURN(UpdateStatusReport(statusReportGlobalCount, &cmdBuffer));
549 }
550 else
551 {
552 ENCODE_CHK_STATUS_RETURN(MediaPacket::UpdateStatusReportNext(statusReportGlobalCount, &cmdBuffer));
553 }
554 }
555
556 CODECHAL_DEBUG_TOOL(
557 if (m_mmcState) {
558 m_mmcState->UpdateUserFeatureKey(&(m_basicFeature->m_reconSurface));
559 })
560 // Reset parameters for next PAK execution
561 if (false == m_pipeline->IsFrameTrackingEnabled() && m_pipeline->IsLastPass() && m_pipeline->IsLastPipe())
562 {
563 UpdateParameters();
564 }
565
566 return MOS_STATUS_SUCCESS;
567 }
568
Construct3rdLevelBatch()569 MOS_STATUS HevcVdencPkt::Construct3rdLevelBatch()
570 {
571 ENCODE_FUNC_CALL();
572
573 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
574
575 // Begin patching 3rd level batch cmds
576 MOS_COMMAND_BUFFER constructedCmdBuf;
577 RUN_FEATURE_INTERFACE_RETURN(HevcEncodeTile, HevcFeatureIDs::encodeTile, BeginPatch3rdLevelBatch, constructedCmdBuf);
578
579 SETPAR_AND_ADDCMD(VDENC_CMD1, m_vdencItf, &constructedCmdBuf);
580
581 SETPAR_AND_ADDCMD(HCP_PIC_STATE, m_hcpItf, &constructedCmdBuf);
582
583 SETPAR_AND_ADDCMD(VDENC_CMD2, m_vdencItf, &constructedCmdBuf);
584
585 // set MI_BATCH_BUFFER_END command
586 ENCODE_CHK_STATUS_RETURN(m_miItf->AddMiBatchBufferEnd(&constructedCmdBuf, nullptr));
587
588 // End patching 3rd level batch cmds
589 RUN_FEATURE_INTERFACE_RETURN(HevcEncodeTile, HevcFeatureIDs::encodeTile, EndPatch3rdLevelBatch);
590
591 return eStatus;
592 }
593
AddSlicesCommandsInTile(MOS_COMMAND_BUFFER & cmdBuffer)594 MOS_STATUS HevcVdencPkt::AddSlicesCommandsInTile(
595 MOS_COMMAND_BUFFER &cmdBuffer)
596 {
597 ENCODE_FUNC_CALL();
598
599 PCODEC_ENCODER_SLCDATA slcData = m_basicFeature->m_slcData;
600
601 uint32_t slcCount, sliceNumInTile = 0;
602 for (slcCount = 0; slcCount < m_basicFeature->m_numSlices; slcCount++)
603 {
604 m_basicFeature->m_curNumSlices = slcCount;
605 bool sliceInTile = false;
606 m_lastSliceInTile = false;
607
608 EncodeTileData curTileData = {};
609 RUN_FEATURE_INTERFACE_RETURN(HevcEncodeTile, HevcFeatureIDs::encodeTile, GetCurrentTile, curTileData);
610 RUN_FEATURE_INTERFACE_RETURN(HevcEncodeTile, HevcFeatureIDs::encodeTile, IsSliceInTile, slcCount, &curTileData, &sliceInTile, &m_lastSliceInTile);
611
612 m_basicFeature->m_lastSliceInTile = m_lastSliceInTile;
613 if (!sliceInTile)
614 {
615 continue;
616 }
617
618 ENCODE_CHK_STATUS_RETURN(SendHwSliceEncodeCommand(nullptr, slcCount, cmdBuffer));
619
620 m_flushCmd = waitHevcVdenc;
621 SETPAR_AND_ADDCMD(VD_PIPELINE_FLUSH, m_vdencItf, &cmdBuffer);
622
623 sliceNumInTile++;
624 } // end of slice
625
626 if (0 == sliceNumInTile)
627 {
628 // One tile must have at least one slice
629 ENCODE_ASSERT(false);
630 return MOS_STATUS_INVALID_PARAMETER;
631 }
632
633 uint16_t numTileRows = 1;
634 uint16_t numTileColumns = 1;
635 RUN_FEATURE_INTERFACE_RETURN(HevcEncodeTile, HevcFeatureIDs::encodeTile, GetTileRowColumns, numTileRows, numTileColumns);
636
637 if (sliceNumInTile > 1 && (numTileColumns > 1 || numTileRows > 1))
638 {
639 ENCODE_ASSERTMESSAGE("Multi-slices in a tile is not supported!");
640 return MOS_STATUS_INVALID_PARAMETER;
641 }
642 return MOS_STATUS_SUCCESS;
643 }
644
AddOneTileCommands(MOS_COMMAND_BUFFER & cmdBuffer,uint32_t tileRow,uint32_t tileCol,uint32_t tileRowPass)645 MOS_STATUS HevcVdencPkt::AddOneTileCommands(
646 MOS_COMMAND_BUFFER &cmdBuffer,
647 uint32_t tileRow,
648 uint32_t tileCol,
649 uint32_t tileRowPass)
650 {
651 ENCODE_FUNC_CALL();
652 PMOS_COMMAND_BUFFER tempCmdBuffer = &cmdBuffer;
653 PMHW_BATCH_BUFFER tileLevelBatchBuffer = nullptr;
654 auto eStatus = MOS_STATUS_SUCCESS;
655 MOS_COMMAND_BUFFER constructTileBatchBuf = {};
656
657 RUN_FEATURE_INTERFACE_RETURN(HevcEncodeTile, HevcFeatureIDs::encodeTile, SetCurrentTile, tileRow, tileCol, m_pipeline);
658
659 if ((m_pipeline->GetPipeNum() > 1) && (tileCol != m_pipeline->GetCurrentPipe()))
660 {
661 return MOS_STATUS_SUCCESS;
662 }
663
664 if (!m_osInterface->bUsesPatchList)
665 {
666 // Begin patching tile level batch cmds
667 RUN_FEATURE_INTERFACE_RETURN(HevcEncodeTile, HevcFeatureIDs::encodeTile, BeginPatchTileLevelBatch, tileRowPass, constructTileBatchBuf);
668
669 // Add batch buffer start for tile
670 RUN_FEATURE_INTERFACE_RETURN(HevcEncodeTile, HevcFeatureIDs::encodeTile, GetTileLevelBatchBuffer, tileLevelBatchBuffer);
671 ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_BATCH_BUFFER_START)(&cmdBuffer, tileLevelBatchBuffer));
672
673 tempCmdBuffer = &constructTileBatchBuf;
674 MHW_MI_MMIOREGISTERS mmioRegister;
675 if (m_vdencItf->ConvertToMiRegister(MHW_VDBOX_NODE_1, mmioRegister))
676 {
677 HalOcaInterfaceNext::On1stLevelBBStart(
678 *tempCmdBuffer,
679 (MOS_CONTEXT_HANDLE)m_osInterface->pOsContext,
680 m_osInterface->CurrentGpuContextHandle,
681 m_miItf,
682 mmioRegister);
683 }
684 }
685
686 // HCP Lock for multiple pipe mode
687 if (m_pipeline->GetPipeNum() > 1)
688 {
689 auto &vdControlStateParams = m_miItf->MHW_GETPAR_F(VD_CONTROL_STATE)();
690 vdControlStateParams = {};
691 vdControlStateParams.scalableModePipeLock = true;
692 ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(VD_CONTROL_STATE)(tempCmdBuffer));
693 }
694
695 SETPAR_AND_ADDCMD(VDENC_PIPE_MODE_SELECT, m_vdencItf, tempCmdBuffer);
696
697 // for Gen11+, we need to add MFX wait for both KIN and VRT before and after HCP Pipemode select...
698 auto &mfxWaitParams = m_miItf->MHW_GETPAR_F(MFX_WAIT)();
699 mfxWaitParams = {};
700 mfxWaitParams.iStallVdboxPipeline = true;
701 ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MFX_WAIT)(tempCmdBuffer));
702
703 SETPAR_AND_ADDCMD(HCP_PIPE_MODE_SELECT, m_hcpItf, tempCmdBuffer);
704
705 // for Gen11+, we need to add MFX wait for both KIN and VRT before and after HCP Pipemode select...
706 mfxWaitParams = {};
707 mfxWaitParams.iStallVdboxPipeline = true;
708 ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MFX_WAIT)(tempCmdBuffer));
709
710 ENCODE_CHK_STATUS_RETURN(AddPicStateWithTile(*tempCmdBuffer));
711
712 SETPAR_AND_ADDCMD(HCP_TILE_CODING, m_hcpItf, tempCmdBuffer);
713
714 ENCODE_CHK_STATUS_RETURN(AddSlicesCommandsInTile(*tempCmdBuffer));
715
716 //HCP unLock for multiple pipe mode
717 if (m_pipeline->GetPipeNum() > 1)
718 {
719 auto &vdControlStateParams = m_miItf->MHW_GETPAR_F(VD_CONTROL_STATE)();
720 vdControlStateParams = {};
721 vdControlStateParams.scalableModePipeUnlock = true;
722 ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(VD_CONTROL_STATE)(tempCmdBuffer));
723 }
724
725 m_flushCmd = waitHevc;
726 SETPAR_AND_ADDCMD(VD_PIPELINE_FLUSH, m_vdencItf, tempCmdBuffer);
727
728 ENCODE_CHK_STATUS_RETURN(EnsureAllCommandsExecuted(*tempCmdBuffer));
729
730 if (!m_osInterface->bUsesPatchList)
731 {
732 // For 2nd level BB, we must use tileLevelBatchBuffer to prevent adding Epilogue before MI_BATCH_BUFFER_END
733 ENCODE_CHK_NULL_RETURN(tileLevelBatchBuffer);
734 tileLevelBatchBuffer->iCurrent = tempCmdBuffer->iOffset;
735 tileLevelBatchBuffer->iRemaining = tempCmdBuffer->iRemaining;
736 ENCODE_CHK_STATUS_RETURN(m_miItf->AddMiBatchBufferEnd(nullptr, tileLevelBatchBuffer));
737 HalOcaInterfaceNext::OnSubLevelBBStart(
738 cmdBuffer,
739 m_osInterface->pOsContext,
740 &tempCmdBuffer->OsResource,
741 0,
742 false,
743 tempCmdBuffer->iOffset);
744 HalOcaInterfaceNext::On1stLevelBBEnd(*tempCmdBuffer, *m_osInterface);
745
746 #if USE_CODECHAL_DEBUG_TOOL
747 if (tempCmdBuffer->pCmdPtr && tempCmdBuffer->pCmdBase &&
748 tempCmdBuffer->pCmdPtr > tempCmdBuffer->pCmdBase)
749 {
750 CodechalDebugInterface *debugInterface = m_pipeline->GetDebugInterface();
751 std::string name("TileLevelBatchBuffer");
752 name += "Row" + std::to_string(tileRow) + "Col" + std::to_string(tileCol);
753
754 ENCODE_CHK_STATUS_RETURN(debugInterface->DumpData(
755 tempCmdBuffer->pCmdBase,
756 (uint32_t)(4 * (tempCmdBuffer->pCmdPtr - tempCmdBuffer->pCmdBase)),
757 CodechalDbgAttr::attrCmdBufferMfx,
758 name.c_str()));
759 }
760 #endif
761 }
762
763 // End patching tile level batch cmds
764 RUN_FEATURE_INTERFACE_RETURN(HevcEncodeTile, HevcFeatureIDs::encodeTile, EndPatchTileLevelBatch);
765
766 return eStatus;
767 }
768
PatchTileLevelCommands(MOS_COMMAND_BUFFER & cmdBuffer,uint8_t packetPhase)769 MOS_STATUS HevcVdencPkt::PatchTileLevelCommands(MOS_COMMAND_BUFFER &cmdBuffer, uint8_t packetPhase)
770 {
771 ENCODE_FUNC_CALL();
772 auto eStatus = MOS_STATUS_SUCCESS;
773
774 if (!m_hevcPicParams->tiles_enabled_flag)
775 {
776 return MOS_STATUS_INVALID_PARAMETER;
777 }
778
779 // multi tiles cases on Liunx, 3rd level batch buffer is 2nd level.
780 ENCODE_CHK_STATUS_RETURN(Construct3rdLevelBatch());
781
782 uint16_t numTileColumns = 1;
783 uint16_t numTileRows = 1;
784 RUN_FEATURE_INTERFACE_RETURN(HevcEncodeTile, HevcFeatureIDs::encodeTile, GetTileRowColumns, numTileRows, numTileColumns);
785
786 for (uint32_t tileRow = 0; tileRow < numTileRows; tileRow++)
787 {
788 uint32_t Pass = m_pipeline->GetCurrentPass();
789 for (uint32_t tileCol = 0; tileCol < numTileColumns; tileCol++)
790 {
791 ENCODE_CHK_STATUS_RETURN(AddOneTileCommands(
792 cmdBuffer,
793 tileRow,
794 tileCol,
795 Pass));
796 }
797 }
798
799 if(m_pipeline->IsLastPipe())
800 {
801 // increment the 3rd lvl bb to break successive frames dependency
802 RUN_FEATURE_INTERFACE_RETURN(HevcEncodeTile, HevcFeatureIDs::encodeTile, IncrementThirdLevelBatchBuffer);
803
804 // Insert end of sequence/stream if set
805 if (m_basicFeature->m_lastPicInSeq || m_basicFeature->m_lastPicInStream)
806 {
807 ENCODE_CHK_STATUS_RETURN(InsertSeqStreamEnd(cmdBuffer));
808 }
809 }
810
811 // Send VD_CONTROL_STATE (Memory Implict Flush)
812 auto &vdControlStateParams = m_miItf->MHW_GETPAR_F(VD_CONTROL_STATE)();
813 vdControlStateParams = {};
814 vdControlStateParams.memoryImplicitFlush = true;
815 ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(VD_CONTROL_STATE)(&cmdBuffer));
816
817 m_flushCmd = waitHevc;
818 SETPAR_AND_ADDCMD(VD_PIPELINE_FLUSH, m_vdencItf, &cmdBuffer);
819
820 ENCODE_CHK_STATUS_RETURN(EnsureAllCommandsExecuted(cmdBuffer));
821
822 // read info from MMIO register in VDENC, incase pak int can't get info
823 auto feature = dynamic_cast<HEVCEncodeBRC *>(m_featureManager->GetFeature(HevcFeatureIDs::hevcBrcFeature));
824 ENCODE_CHK_NULL_RETURN(feature);
825 if (m_pipeline->GetPipeNum() <= 1 && !m_pipeline->IsSingleTaskPhaseSupported())
826 {
827 ENCODE_CHK_STATUS_RETURN(ReadHcpStatus(m_vdboxIndex, m_statusReport, cmdBuffer));
828 // BRC PAK statistics different for each pass
829 if (feature->IsBRCEnabled())
830 {
831 uint8_t ucPass = (uint8_t)m_pipeline->GetCurrentPass();
832 EncodeReadBrcPakStatsParams readBrcPakStatsParams;
833 MOS_RESOURCE *osResource = nullptr;
834 uint32_t offset = 0;
835 m_statusReport->GetAddress(statusReportNumberPasses, osResource, offset);
836 RUN_FEATURE_INTERFACE_RETURN(HEVCEncodeBRC, HevcFeatureIDs::hevcBrcFeature, SetReadBrcPakStatsParams, ucPass, offset, osResource, readBrcPakStatsParams);
837 ReadBrcPakStatistics(&cmdBuffer, &readBrcPakStatsParams);
838 }
839 }
840
841 // Wait all pipe cmds done for the packet
842 auto scalability = m_pipeline->GetMediaScalability();
843 ENCODE_CHK_STATUS_RETURN(scalability->SyncPipe(syncOnePipeWaitOthers, 0, &cmdBuffer));
844
845 MediaPerfProfiler *perfProfiler = MediaPerfProfiler::Instance();
846 ENCODE_CHK_NULL_RETURN(perfProfiler);
847 ENCODE_CHK_STATUS_RETURN(perfProfiler->AddPerfCollectEndCmd(
848 (void *)m_pipeline, m_osInterface, m_miItf, &cmdBuffer));
849
850 // post-operations are done by pak integrate pkt
851
852 return MOS_STATUS_SUCCESS;
853 }
854
AddPicStateWithNoTile(MOS_COMMAND_BUFFER & cmdBuffer)855 MOS_STATUS HevcVdencPkt::AddPicStateWithNoTile(
856 MOS_COMMAND_BUFFER &cmdBuffer)
857 {
858 ENCODE_FUNC_CALL();
859
860 bool tileEnabled = false;
861 RUN_FEATURE_INTERFACE_RETURN(HevcEncodeTile, HevcFeatureIDs::encodeTile, IsEnabled, tileEnabled);
862 if (tileEnabled)
863 {
864 return MOS_STATUS_SUCCESS;
865 }
866
867 auto brcFeature = dynamic_cast<HEVCEncodeBRC *>(m_featureManager->GetFeature(HevcFeatureIDs::hevcBrcFeature));
868 ENCODE_CHK_NULL_RETURN(brcFeature);
869 auto vdenc2ndLevelBatchBuffer = brcFeature->GetVdenc2ndLevelBatchBuffer(m_pipeline->m_currRecycledBufIdx);
870 vdenc2ndLevelBatchBuffer->dwOffset = m_hwInterface->m_vdencBatchBuffer1stGroupSize;
871
872 if (brcFeature->IsBRCUpdateRequired())
873 {
874 ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_BATCH_BUFFER_START(&cmdBuffer, vdenc2ndLevelBatchBuffer)));
875 HalOcaInterfaceNext::OnSubLevelBBStart(
876 cmdBuffer,
877 m_osInterface->pOsContext,
878 &vdenc2ndLevelBatchBuffer->OsResource,
879 vdenc2ndLevelBatchBuffer->dwOffset,
880 false,
881 m_hwInterface->m_vdencBatchBuffer2ndGroupSize);
882 }
883 // When tile is enabled, below commands are needed for each tile instead of each picture
884 else
885 {
886 SETPAR_AND_ADDCMD(VDENC_CMD1, m_vdencItf, &cmdBuffer);
887
888 SETPAR_AND_ADDCMD(HCP_PIC_STATE, m_hcpItf, &cmdBuffer);
889
890 SETPAR_AND_ADDCMD(VDENC_CMD2, m_vdencItf, &cmdBuffer);
891 }
892
893 auto rdoqFeature = dynamic_cast<HevcEncodeCqp *>(m_featureManager->GetFeature(HevcFeatureIDs::hevcCqpFeature));
894 ENCODE_CHK_NULL_RETURN(rdoqFeature);
895 if (rdoqFeature->IsRDOQEnabled())
896 {
897 SETPAR_AND_ADDCMD(HEVC_VP9_RDOQ_STATE, m_hcpItf, &cmdBuffer);
898 }
899
900 return MOS_STATUS_SUCCESS;
901 }
902
AddPicStateWithTile(MOS_COMMAND_BUFFER & cmdBuffer)903 MOS_STATUS HevcVdencPkt::AddPicStateWithTile(
904 MOS_COMMAND_BUFFER &cmdBuffer)
905 {
906 ENCODE_FUNC_CALL();
907
908 bool tileEnabled = false;
909 RUN_FEATURE_INTERFACE_RETURN(HevcEncodeTile, HevcFeatureIDs::encodeTile, IsEnabled, tileEnabled);
910 if (!tileEnabled)
911 {
912 return MOS_STATUS_SUCCESS;
913 }
914
915 auto brcFeature = dynamic_cast<HEVCEncodeBRC *>(m_featureManager->GetFeature(HevcFeatureIDs::hevcBrcFeature));
916 ENCODE_CHK_NULL_RETURN(brcFeature);
917 auto vdenc2ndLevelBatchBuffer = brcFeature->GetVdenc2ndLevelBatchBuffer(m_pipeline->m_currRecycledBufIdx);
918 vdenc2ndLevelBatchBuffer->dwOffset = m_hwInterface->m_vdencBatchBuffer1stGroupSize;
919
920 if (brcFeature->IsBRCUpdateRequired())
921 {
922 ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_BATCH_BUFFER_START(&cmdBuffer, vdenc2ndLevelBatchBuffer)));
923 HalOcaInterfaceNext::OnSubLevelBBStart(
924 cmdBuffer,
925 m_osInterface->pOsContext,
926 &vdenc2ndLevelBatchBuffer->OsResource,
927 vdenc2ndLevelBatchBuffer->dwOffset,
928 false,
929 m_hwInterface->m_vdencBatchBuffer2ndGroupSize);
930 }
931 // When tile is enabled, below commands are needed for each tile instead of each picture
932 else
933 {
934 PMHW_BATCH_BUFFER thirdLevelBatchBuffer = nullptr;
935 RUN_FEATURE_INTERFACE_RETURN(HevcEncodeTile, HevcFeatureIDs::encodeTile, GetThirdLevelBatchBuffer, thirdLevelBatchBuffer);
936 ENCODE_CHK_NULL_RETURN(thirdLevelBatchBuffer);
937 ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_BATCH_BUFFER_START(&cmdBuffer, thirdLevelBatchBuffer)));
938 HalOcaInterfaceNext::OnSubLevelBBStart(
939 cmdBuffer,
940 m_osInterface->pOsContext,
941 &thirdLevelBatchBuffer->OsResource,
942 thirdLevelBatchBuffer->dwOffset,
943 false,
944 m_hwInterface->m_vdencBatchBuffer2ndGroupSize);
945 }
946
947 // Send HEVC_VP9_RDOQ_STATE command
948 SETPAR_AND_ADDCMD(HEVC_VP9_RDOQ_STATE, m_hcpItf, &cmdBuffer);
949
950 return MOS_STATUS_SUCCESS;
951 }
952
UpdateParameters()953 void HevcVdencPkt::UpdateParameters()
954 {
955 ENCODE_FUNC_CALL();
956
957 if (!m_pipeline->IsSingleTaskPhaseSupported())
958 {
959 m_osInterface->pfnResetPerfBufferID(m_osInterface);
960 }
961
962 m_basicFeature->m_currPakSliceIdx = (m_basicFeature->m_currPakSliceIdx + 1) % m_basicFeature->m_codecHalHevcNumPakSliceBatchBuffers;
963 }
964
UpdateStatusReport(uint32_t srType,MOS_COMMAND_BUFFER * cmdBuffer)965 MOS_STATUS HevcVdencPkt::UpdateStatusReport(uint32_t srType, MOS_COMMAND_BUFFER *cmdBuffer)
966 {
967 ENCODE_FUNC_CALL();
968 ENCODE_CHK_NULL_RETURN(cmdBuffer);
969
970 //initialize following
971 MOS_RESOURCE *osResourceInline = nullptr;
972 uint32_t offsetInline = 0;
973 m_statusReport->GetAddress(statusReportGlobalCount, osResourceInline, offsetInline);
974 offsetInline = m_atomicScratchBuf.operandSetSize * m_atomicScratchBuf.encodeUpdateIndex;
975 uint32_t zeroValueOffset = offsetInline;
976 uint32_t operand1Offset = offsetInline + m_atomicScratchBuf.operand1Offset;
977 uint32_t operand2Offset = offsetInline + m_atomicScratchBuf.operand2Offset;
978 uint32_t operand3Offset = offsetInline + m_atomicScratchBuf.operand3Offset;
979 auto mmioRegisters = m_hwInterface->GetVdencInterfaceNext()->GetMmioRegisters(m_vdboxIndex);
980
981 // Make Flush DW call to make sure all previous work is done
982 auto &flushDwParams = m_miItf->MHW_GETPAR_F(MI_FLUSH_DW)();
983 flushDwParams = {};
984 ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_FLUSH_DW)(cmdBuffer));
985
986 // n1_lo = 0x00
987 auto &storeDataParams = m_miItf->MHW_GETPAR_F(MI_STORE_DATA_IMM)();
988 storeDataParams = {};
989 storeDataParams.pOsResource = m_atomicScratchBuf.resAtomicScratchBuffer;
990 storeDataParams.dwResourceOffset = operand1Offset;
991 storeDataParams.dwValue = 0x00;
992 ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_STORE_DATA_IMM)(cmdBuffer));
993
994 // n2_lo = dwImageStatusMask
995 auto ©MemMemParams = m_miItf->MHW_GETPAR_F(MI_COPY_MEM_MEM)();
996 copyMemMemParams = {};
997 copyMemMemParams.presSrc = m_basicFeature->m_recycleBuf->GetBuffer(VdencBrcPakMmioBuffer, 0);
998 copyMemMemParams.dwSrcOffset = (sizeof(uint32_t) * 1);
999 copyMemMemParams.presDst = m_atomicScratchBuf.resAtomicScratchBuffer;
1000 copyMemMemParams.dwDstOffset = operand2Offset;
1001 ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_COPY_MEM_MEM)(cmdBuffer));
1002
1003 // VCS_GPR0_Lo = ImageStatusCtrl
1004 auto ®isterMemParams = m_miItf->MHW_GETPAR_F(MI_LOAD_REGISTER_MEM)();
1005 registerMemParams = {};
1006 registerMemParams.presStoreBuffer = m_basicFeature->m_recycleBuf->GetBuffer(VdencBrcPakMmioBuffer, 0);
1007 registerMemParams.dwOffset = (sizeof(uint32_t) * 0);
1008 registerMemParams.dwRegister = mmioRegisters->generalPurposeRegister0LoOffset; // VCS_GPR0_Lo
1009 ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_LOAD_REGISTER_MEM)(cmdBuffer));
1010
1011 // Reset GPR4_Lo
1012 registerMemParams = {};
1013 registerMemParams.presStoreBuffer = m_atomicScratchBuf.resAtomicScratchBuffer;
1014 registerMemParams.dwOffset = zeroValueOffset; //Offset 0, has value of 0.
1015 registerMemParams.dwRegister = mmioRegisters->generalPurposeRegister4LoOffset; // VCS_GPR4
1016 ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_LOAD_REGISTER_MEM)(cmdBuffer));
1017
1018 // Make Flush DW call to make sure all previous work is done
1019 ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_FLUSH_DW)(cmdBuffer));
1020
1021 // step-1: n2_lo = n2_lo & VCS_GPR0_Lo = dwImageStatusMask & ImageStatusCtrl
1022 auto &atomicParams = m_miItf->MHW_GETPAR_F(MI_ATOMIC)();
1023 atomicParams = {};
1024 atomicParams.pOsResource = m_atomicScratchBuf.resAtomicScratchBuffer;
1025 atomicParams.dwResourceOffset = operand2Offset;
1026 atomicParams.dwDataSize = sizeof(uint32_t);
1027 atomicParams.Operation = mhw::mi::MHW_MI_ATOMIC_AND;
1028 ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_ATOMIC)(cmdBuffer));
1029
1030 // n3_lo = 0
1031 storeDataParams = {};
1032 storeDataParams.pOsResource = m_atomicScratchBuf.resAtomicScratchBuffer;
1033 storeDataParams.dwResourceOffset = operand3Offset;
1034 storeDataParams.dwValue = 0;
1035 ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_STORE_DATA_IMM)(cmdBuffer));
1036
1037 // Make Flush DW call to make sure all previous work is done
1038 ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_FLUSH_DW)(cmdBuffer));
1039
1040 // GPR0_lo = n1_lo = 0
1041 registerMemParams = {};
1042 registerMemParams.presStoreBuffer = m_atomicScratchBuf.resAtomicScratchBuffer;
1043 registerMemParams.dwOffset = operand1Offset;
1044 registerMemParams.dwRegister = mmioRegisters->generalPurposeRegister0LoOffset; // VCS_GPR0
1045 ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_LOAD_REGISTER_MEM)(cmdBuffer));
1046
1047 // Reset GPR4_Lo
1048 registerMemParams = {};
1049 registerMemParams.presStoreBuffer = m_atomicScratchBuf.resAtomicScratchBuffer;
1050 registerMemParams.dwOffset = zeroValueOffset; //Offset 0, has value of 0.
1051 registerMemParams.dwRegister = mmioRegisters->generalPurposeRegister4LoOffset; // VCS_GPR4
1052 ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_LOAD_REGISTER_MEM)(cmdBuffer));
1053
1054 // Make Flush DW call to make sure all previous work is done
1055 ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_FLUSH_DW)(cmdBuffer));
1056
1057 // step-2: n2_lo == n1_lo ? 0 : n2_lo
1058 // compare n1 vs n2. i.e. GRP0 vs. memory of operand2
1059 atomicParams = {};
1060 atomicParams.pOsResource = m_atomicScratchBuf.resAtomicScratchBuffer;
1061 atomicParams.dwResourceOffset = operand2Offset;
1062 atomicParams.dwDataSize = sizeof(uint32_t);
1063 atomicParams.Operation = mhw::mi::MHW_MI_ATOMIC_CMP;
1064 atomicParams.bReturnData = true;
1065 ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_ATOMIC)(cmdBuffer));
1066
1067 // n2_hi = 1
1068 storeDataParams = {};
1069 storeDataParams.pOsResource = m_atomicScratchBuf.resAtomicScratchBuffer;
1070 storeDataParams.dwResourceOffset = operand2Offset + sizeof(uint32_t);
1071 storeDataParams.dwValue = 1;
1072 ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_STORE_DATA_IMM)(cmdBuffer));
1073
1074 // n3_hi = 1
1075 storeDataParams = {};
1076 storeDataParams.pOsResource = m_atomicScratchBuf.resAtomicScratchBuffer;
1077 storeDataParams.dwResourceOffset = operand3Offset + sizeof(uint32_t);
1078 storeDataParams.dwValue = 1;
1079 ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_STORE_DATA_IMM)(cmdBuffer));
1080
1081 // VCS_GPR0_Lo = n3_lo = 0
1082 registerMemParams = {};
1083 registerMemParams.presStoreBuffer = m_atomicScratchBuf.resAtomicScratchBuffer;
1084 registerMemParams.dwOffset = operand3Offset;
1085 registerMemParams.dwRegister = mmioRegisters->generalPurposeRegister0LoOffset; // VCS_GPR0_Lo
1086 ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_LOAD_REGISTER_MEM)(cmdBuffer));
1087
1088 // GPR0_Hi = n2_hi = 1
1089 registerMemParams = {};
1090 registerMemParams.presStoreBuffer = m_atomicScratchBuf.resAtomicScratchBuffer;
1091 registerMemParams.dwOffset = operand2Offset + sizeof(uint32_t); // update 1
1092 registerMemParams.dwRegister = mmioRegisters->generalPurposeRegister0HiOffset; // VCS_GPR0_Hi
1093 ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_LOAD_REGISTER_MEM)(cmdBuffer));
1094
1095 // Reset GPR4_Lo and GPR4_Hi
1096 registerMemParams = {};
1097 registerMemParams.presStoreBuffer = m_atomicScratchBuf.resAtomicScratchBuffer;
1098 registerMemParams.dwOffset = zeroValueOffset;
1099 registerMemParams.dwRegister = mmioRegisters->generalPurposeRegister4LoOffset; // VCS_GPR4_Hi
1100 ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_LOAD_REGISTER_MEM)(cmdBuffer));
1101
1102 registerMemParams = {};
1103 registerMemParams.presStoreBuffer = m_atomicScratchBuf.resAtomicScratchBuffer;
1104 registerMemParams.dwOffset = zeroValueOffset;
1105 registerMemParams.dwRegister = mmioRegisters->generalPurposeRegister4HiOffset; // VCS_GPR4_Hi
1106 ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_LOAD_REGISTER_MEM)(cmdBuffer));
1107
1108 // Make Flush DW call to make sure all previous work is done
1109 ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_FLUSH_DW)(cmdBuffer));
1110
1111 // step-3: n2 = (n2 == 0:1) ? 0:0 : n2 // uint64_t CMP
1112 // If n2==0 (Lo) and 1 (Hi), covert n2 to 0 (Lo)and 0 (Hi), else no change.
1113 // n2 == 0:1 means encoding completsion. the n2 memory will be updated with 0:0, otherwise, no change.
1114 atomicParams = {};
1115 atomicParams.pOsResource = m_atomicScratchBuf.resAtomicScratchBuffer;
1116 atomicParams.dwResourceOffset = operand2Offset;
1117 atomicParams.dwDataSize = sizeof(uint64_t);
1118 atomicParams.Operation = mhw::mi::MHW_MI_ATOMIC_CMP;
1119 atomicParams.bReturnData = true;
1120 ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_ATOMIC)(cmdBuffer));
1121
1122 // Make Flush DW call to make sure all previous work is done
1123 ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_FLUSH_DW)(cmdBuffer));
1124
1125 // VCS_GPR0_Lo = n3_hi = 1
1126 registerMemParams = {};
1127 registerMemParams.presStoreBuffer = m_atomicScratchBuf.resAtomicScratchBuffer;
1128 registerMemParams.dwOffset = operand3Offset + sizeof(uint32_t);
1129 registerMemParams.dwRegister = mmioRegisters->generalPurposeRegister0LoOffset; // VCS_GPR0_Lo
1130 ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_LOAD_REGISTER_MEM)(cmdBuffer));
1131
1132 // Make Flush DW call to make sure all previous work is done
1133 ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_FLUSH_DW)(cmdBuffer));
1134
1135 // step-4: n2_hi = n2_hi ^ VCS_GPR0_Lo = n2_hi ^ n3_hi
1136 atomicParams = {};
1137 atomicParams.pOsResource = m_atomicScratchBuf.resAtomicScratchBuffer;
1138 atomicParams.dwResourceOffset = operand2Offset + sizeof(uint32_t);
1139 atomicParams.dwDataSize = sizeof(uint32_t);
1140 atomicParams.Operation = mhw::mi::MHW_MI_ATOMIC_XOR;
1141 ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_ATOMIC)(cmdBuffer));
1142
1143 // VCS_GPR0_Lo = n2_hi
1144 registerMemParams = {};
1145 registerMemParams.presStoreBuffer = m_atomicScratchBuf.resAtomicScratchBuffer;
1146 registerMemParams.dwOffset = operand2Offset + sizeof(uint32_t);
1147 registerMemParams.dwRegister = mmioRegisters->generalPurposeRegister0LoOffset; // VCS_GPR0_Lo
1148 ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_LOAD_REGISTER_MEM)(cmdBuffer));
1149
1150 // step-5: m_storeData = m_storeData + VCS_GPR0_Lo = m_storeData + n2_hi
1151 // if not completed n2_hi should be 0, then m_storeData = m_storeData + 0
1152 // if completed, n2_hi should be 1, then m_storeData = m_storeData + 1
1153 auto &miLoadRegMemParams = m_miItf->MHW_GETPAR_F(MI_LOAD_REGISTER_MEM)();
1154 miLoadRegMemParams = {};
1155 miLoadRegMemParams.presStoreBuffer = osResourceInline;
1156 miLoadRegMemParams.dwOffset = 0;
1157 miLoadRegMemParams.dwRegister = mmioRegisters->generalPurposeRegister4LoOffset;
1158 ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_LOAD_REGISTER_MEM)(cmdBuffer));
1159
1160 mhw::mi::MHW_MI_ALU_PARAMS aluParams[4] = { 0 };
1161
1162 int aluCount = 0;
1163
1164 //load1 srca, reg1
1165 aluParams[aluCount].AluOpcode = MHW_MI_ALU_LOAD;
1166 aluParams[aluCount].Operand1 = MHW_MI_ALU_SRCA;
1167 aluParams[aluCount].Operand2 = MHW_MI_ALU_GPREG0;
1168 ++aluCount;
1169 //load srcb, reg2
1170 aluParams[aluCount].AluOpcode = MHW_MI_ALU_LOAD;
1171 aluParams[aluCount].Operand1 = MHW_MI_ALU_SRCB;
1172 aluParams[aluCount].Operand2 = MHW_MI_ALU_GPREG4;
1173 ++aluCount;
1174 //add
1175 aluParams[aluCount].AluOpcode = MHW_MI_ALU_ADD;
1176 ++aluCount;
1177 //store reg1, accu
1178 aluParams[aluCount].AluOpcode = MHW_MI_ALU_STORE;
1179 aluParams[aluCount].Operand1 = MHW_MI_ALU_GPREG0;
1180 aluParams[aluCount].Operand2 = MHW_MI_ALU_ACCU;
1181 ++aluCount;
1182
1183 auto &miMathParams = m_miItf->MHW_GETPAR_F(MI_MATH)();
1184 miMathParams = {};
1185 miMathParams.dwNumAluParams = aluCount;
1186 miMathParams.pAluPayload = aluParams;
1187 ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_MATH)(cmdBuffer));
1188
1189 auto &miStoreRegMemParams = m_miItf->MHW_GETPAR_F(MI_STORE_REGISTER_MEM)();
1190 miStoreRegMemParams = {};
1191 miStoreRegMemParams.presStoreBuffer = osResourceInline;
1192 miStoreRegMemParams.dwOffset = 0;
1193 miStoreRegMemParams.dwRegister = mmioRegisters->generalPurposeRegister0LoOffset;
1194 ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_STORE_REGISTER_MEM)(cmdBuffer));
1195
1196 // Make Flush DW call to make sure all previous work is done
1197 ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_FLUSH_DW)(cmdBuffer));
1198
1199 return MOS_STATUS_SUCCESS;
1200 }
1201
EnsureAllCommandsExecuted(MOS_COMMAND_BUFFER & cmdBuffer)1202 MOS_STATUS HevcVdencPkt::EnsureAllCommandsExecuted(MOS_COMMAND_BUFFER &cmdBuffer)
1203 {
1204 ENCODE_FUNC_CALL();
1205
1206 // Send MI_FLUSH command
1207 auto &flushDwParams = m_miItf->MHW_GETPAR_F(MI_FLUSH_DW)();
1208 flushDwParams = {};
1209 flushDwParams.bVideoPipelineCacheInvalidate = true;
1210 ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_FLUSH_DW)(&cmdBuffer));
1211
1212 return MOS_STATUS_SUCCESS;
1213 }
1214
InsertSeqStreamEnd(MOS_COMMAND_BUFFER & cmdBuffer)1215 MOS_STATUS HevcVdencPkt::InsertSeqStreamEnd(MOS_COMMAND_BUFFER &cmdBuffer)
1216 {
1217 ENCODE_FUNC_CALL();
1218
1219 ENCODE_CHK_STATUS_RETURN(AddAllCmds_HCP_PAK_INSERT_OBJECT(&cmdBuffer));
1220
1221 return MOS_STATUS_SUCCESS;
1222 }
1223
AddPictureVdencCommands(MOS_COMMAND_BUFFER & cmdBuffer)1224 MOS_STATUS HevcVdencPkt::AddPictureVdencCommands(MOS_COMMAND_BUFFER & cmdBuffer)
1225 {
1226 ENCODE_FUNC_CALL();
1227
1228 SETPAR_AND_ADDCMD(VDENC_PIPE_MODE_SELECT, m_vdencItf, &cmdBuffer);
1229 SETPAR_AND_ADDCMD(VDENC_SRC_SURFACE_STATE, m_vdencItf, &cmdBuffer);
1230 SETPAR_AND_ADDCMD(VDENC_REF_SURFACE_STATE, m_vdencItf, &cmdBuffer);
1231 SETPAR_AND_ADDCMD(VDENC_DS_REF_SURFACE_STATE, m_vdencItf, &cmdBuffer);
1232 SETPAR_AND_ADDCMD(VDENC_PIPE_BUF_ADDR_STATE, m_vdencItf, &cmdBuffer);
1233
1234 return MOS_STATUS_SUCCESS;
1235 }
1236
AddPictureHcpCommands(MOS_COMMAND_BUFFER & cmdBuffer)1237 MOS_STATUS HevcVdencPkt::AddPictureHcpCommands(
1238 MOS_COMMAND_BUFFER &cmdBuffer)
1239 {
1240 ENCODE_FUNC_CALL();
1241
1242 ENCODE_CHK_STATUS_RETURN(AddHcpPipeModeSelect(cmdBuffer));
1243
1244 ENCODE_CHK_STATUS_RETURN(AddAllCmds_HCP_SURFACE_STATE(&cmdBuffer));
1245
1246 SETPAR_AND_ADDCMD(HCP_PIPE_BUF_ADDR_STATE, m_hcpItf, &cmdBuffer);
1247
1248 SETPAR_AND_ADDCMD(HCP_IND_OBJ_BASE_ADDR_STATE, m_hcpItf, &cmdBuffer);
1249
1250 ENCODE_CHK_STATUS_RETURN(AddAllCmds_HCP_FQM_STATE(&cmdBuffer));
1251 ENCODE_CHK_STATUS_RETURN(AddAllCmds_HCP_QM_STATE(&cmdBuffer));
1252
1253 return MOS_STATUS_SUCCESS;
1254 }
1255
AddHcpPipeModeSelect(MOS_COMMAND_BUFFER & cmdBuffer)1256 MOS_STATUS HevcVdencPkt::AddHcpPipeModeSelect(
1257 MOS_COMMAND_BUFFER &cmdBuffer)
1258 {
1259 ENCODE_FUNC_CALL();
1260
1261 SETPAR_AND_ADDCMD(VDENC_CONTROL_STATE, m_vdencItf, &cmdBuffer);
1262
1263 auto &vdControlStateParams = m_miItf->MHW_GETPAR_F(VD_CONTROL_STATE)();
1264 vdControlStateParams = {};
1265 vdControlStateParams.initialization = true;
1266 ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(VD_CONTROL_STATE)(&cmdBuffer));
1267
1268 // for Gen11+, we need to add MFX wait for both KIN and VRT before and after HCP Pipemode select...
1269 auto &mfxWaitParams = m_miItf->MHW_GETPAR_F(MFX_WAIT)();
1270 mfxWaitParams = {};
1271 mfxWaitParams.iStallVdboxPipeline = true;
1272 ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MFX_WAIT)(&cmdBuffer));
1273
1274 SETPAR_AND_ADDCMD(HCP_PIPE_MODE_SELECT, m_hcpItf, &cmdBuffer);
1275
1276 mfxWaitParams = {};
1277 mfxWaitParams.iStallVdboxPipeline = true;
1278 ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MFX_WAIT)(&cmdBuffer));
1279
1280 return MOS_STATUS_SUCCESS;
1281 }
1282
CalculatePictureStateCommandSize()1283 MOS_STATUS HevcVdencPkt::CalculatePictureStateCommandSize()
1284 {
1285 ENCODE_FUNC_CALL();
1286
1287 uint32_t hcpCommandsSize = 0;
1288 uint32_t hcpPatchListSize = 0;
1289 uint32_t cpCmdsize = 0;
1290 uint32_t cpPatchListSize = 0;
1291 uint32_t hucCommandsSize = 0;
1292 uint32_t hucPatchListSize = 0;
1293
1294 MHW_VDBOX_STATE_CMDSIZE_PARAMS stateCmdSizeParams;
1295
1296 hcpCommandsSize =
1297 m_vdencItf->MHW_GETSIZE_F(VD_PIPELINE_FLUSH)() +
1298 m_miItf->MHW_GETSIZE_F(MI_FLUSH_DW)() +
1299 m_hcpItf->MHW_GETSIZE_F(HCP_PIPE_MODE_SELECT)() +
1300 m_hcpItf->MHW_GETSIZE_F(HCP_SURFACE_STATE)() +
1301 m_hcpItf->MHW_GETSIZE_F(HCP_PIPE_BUF_ADDR_STATE)() +
1302 m_hcpItf->MHW_GETSIZE_F(HCP_IND_OBJ_BASE_ADDR_STATE)() +
1303 m_miItf->MHW_GETSIZE_F(MI_LOAD_REGISTER_REG)() * 8;
1304
1305 hcpPatchListSize =
1306 PATCH_LIST_COMMAND(mhw::vdbox::hcp::Itf::VD_PIPELINE_FLUSH_CMD) +
1307 PATCH_LIST_COMMAND(mhw::vdbox::hcp::Itf::MI_FLUSH_DW_CMD) +
1308 PATCH_LIST_COMMAND(mhw::vdbox::hcp::Itf::HCP_PIPE_MODE_SELECT_CMD) +
1309 PATCH_LIST_COMMAND(mhw::vdbox::hcp::Itf::HCP_SURFACE_STATE_CMD) +
1310 PATCH_LIST_COMMAND(mhw::vdbox::hcp::Itf::HCP_PIPE_BUF_ADDR_STATE_CMD) +
1311 PATCH_LIST_COMMAND(mhw::vdbox::hcp::Itf::HCP_IND_OBJ_BASE_ADDR_STATE_CMD);
1312
1313 // HCP_QM_STATE_CMD may be issued up to 20 times: 3x Colour Component plus 2x intra/inter plus 4x SizeID minus 4 for the 32x32 chroma components.
1314 // HCP_FQP_STATE_CMD may be issued up to 8 times: 4 scaling list per intra and inter.
1315 hcpCommandsSize +=
1316 2 * m_miItf->MHW_GETSIZE_F(VD_CONTROL_STATE)() +
1317 m_hcpItf->MHW_GETSIZE_F(HCP_SURFACE_STATE)() + // encoder needs two surface state commands. One is for raw and another one is for recon surfaces.
1318 20 * m_hcpItf->MHW_GETSIZE_F(HCP_QM_STATE)() +
1319 8 * m_hcpItf->MHW_GETSIZE_F(HCP_FQM_STATE)() +
1320 m_hcpItf->MHW_GETSIZE_F(HCP_PIC_STATE)() +
1321 m_hcpItf->MHW_GETSIZE_F(HEVC_VP9_RDOQ_STATE)() + // RDOQ
1322 2 * m_miItf->MHW_GETSIZE_F(MI_STORE_DATA_IMM)() + // Slice level commands
1323 2 * m_miItf->MHW_GETSIZE_F(MI_FLUSH_DW)() + // need for Status report, Mfc Status and
1324 10 * m_miItf->MHW_GETSIZE_F(MI_STORE_REGISTER_MEM)() + // 8 for BRCStatistics and 2 for RC6 WAs
1325 m_miItf->MHW_GETSIZE_F(MI_LOAD_REGISTER_MEM)() + // 1 for RC6 WA
1326 2 * m_hcpItf->MHW_GETSIZE_F(HCP_PAK_INSERT_OBJECT)() + // Two PAK insert object commands are for headers before the slice header and the header for the end of stream
1327 4 * m_miItf->MHW_GETSIZE_F(MI_STORE_DATA_IMM)() + // two (BRC+reference frame) for clean-up HW semaphore memory and another two for signal it
1328 17 * m_miItf->MHW_GETSIZE_F(MI_SEMAPHORE_WAIT)() + // Use HW wait command for each reference and one wait for current semaphore object
1329 m_miItf->MHW_GETSIZE_F(MI_SEMAPHORE_WAIT)() + // Use HW wait command for each BRC pass
1330 +m_miItf->MHW_GETSIZE_F(MI_SEMAPHORE_WAIT)() // Use HW wait command for each VDBOX
1331 + 2 * m_miItf->MHW_GETSIZE_F(MI_STORE_DATA_IMM)() // One is for reset and another one for set per VDBOX
1332 + 8 * m_miItf->MHW_GETSIZE_F(MI_COPY_MEM_MEM)() // Need to copy SSE statistics/ Slice Size overflow into memory
1333 ;
1334
1335 hcpPatchListSize +=
1336 20 * PATCH_LIST_COMMAND(mhw::vdbox::hcp::Itf::HCP_QM_STATE_CMD) +
1337 8 * PATCH_LIST_COMMAND(mhw::vdbox::hcp::Itf::HCP_FQM_STATE_CMD) +
1338 PATCH_LIST_COMMAND(mhw::vdbox::hcp::Itf::HCP_PIC_STATE_CMD) +
1339 PATCH_LIST_COMMAND(mhw::vdbox::hcp::Itf::MI_BATCH_BUFFER_START_CMD) + // When BRC is on, HCP_PIC_STATE_CMD command is in the BB
1340 2 * PATCH_LIST_COMMAND(mhw::vdbox::hcp::Itf::MI_STORE_DATA_IMM_CMD) + // Slice level commands
1341 2 * PATCH_LIST_COMMAND(mhw::vdbox::hcp::Itf::MI_FLUSH_DW_CMD) + // need for Status report, Mfc Status and
1342 11 * PATCH_LIST_COMMAND(mhw::vdbox::hcp::Itf::MI_STORE_REGISTER_MEM_CMD) + // 8 for BRCStatistics and 3 for RC6 WAs
1343 22 * PATCH_LIST_COMMAND(mhw::vdbox::hcp::Itf::MI_STORE_DATA_IMM_CMD) // Use HW wait commands plus its memory clean-up and signal (4+ 16 + 1 + 1)
1344 + 8 * PATCH_LIST_COMMAND(mhw::vdbox::hcp::Itf::MI_BATCH_BUFFER_START_CMD) // At maximal, there are 8 batch buffers for 8 VDBOXes for VE. Each box has one BB.
1345 + PATCH_LIST_COMMAND(mhw::vdbox::hcp::Itf::MI_FLUSH_DW_CMD) // Need one flush before copy command
1346 + PATCH_LIST_COMMAND(mhw::vdbox::hcp::Itf::MFX_WAIT_CMD) // Need one wait after copy command
1347 + 3 * PATCH_LIST_COMMAND(mhw::vdbox::hcp::Itf::MI_STORE_DATA_IMM_CMD) // one wait commands and two for reset and set semaphore memory
1348 + 8 * PATCH_LIST_COMMAND(mhw::vdbox::hcp::Itf::MI_COPY_MEM_MEM_CMD) // Need to copy SSE statistics/ Slice Size overflow into memory
1349 ;
1350
1351 auto cpInterface = m_hwInterface->GetCpInterface();
1352 cpInterface->GetCpStateLevelCmdSize(cpCmdsize, cpPatchListSize);
1353
1354 ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetHucStateCommandSize(
1355 m_basicFeature->m_mode, (uint32_t *)&hucCommandsSize, (uint32_t *)&hucPatchListSize, &stateCmdSizeParams));
1356
1357 m_defaultPictureStatesSize = hcpCommandsSize + hucCommandsSize + (uint32_t)cpCmdsize;
1358 m_defaultPicturePatchListSize = hcpPatchListSize + hucPatchListSize + (uint32_t)cpPatchListSize;
1359
1360 return MOS_STATUS_SUCCESS;
1361 }
1362
SendHwSliceEncodeCommand(const PCODEC_ENCODER_SLCDATA slcData,const uint32_t currSlcIdx,MOS_COMMAND_BUFFER & cmdBuffer)1363 MOS_STATUS HevcVdencPkt::SendHwSliceEncodeCommand(const PCODEC_ENCODER_SLCDATA slcData, const uint32_t currSlcIdx, MOS_COMMAND_BUFFER &cmdBuffer)
1364 {
1365 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1366
1367 ENCODE_FUNC_CALL();
1368
1369 // VDENC does not use batch buffer for slice state
1370 // add HCP_REF_IDX command
1371 ENCODE_CHK_STATUS_RETURN(AddAllCmds_HCP_REF_IDX_STATE(&cmdBuffer));
1372
1373 bool vdencHucInUse = false;
1374 PMHW_BATCH_BUFFER vdencBatchBuffer = nullptr;
1375
1376 RUN_FEATURE_INTERFACE_RETURN(HEVCEncodeBRC, HevcFeatureIDs::hevcBrcFeature, SetVdencBatchBufferState, m_pipeline->m_currRecycledBufIdx, currSlcIdx, vdencBatchBuffer, vdencHucInUse);
1377
1378 if (vdencHucInUse)
1379 {
1380 // 2nd level batch buffer
1381 PMHW_BATCH_BUFFER secondLevelBatchBufferUsed = vdencBatchBuffer;
1382 ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_BATCH_BUFFER_START(&cmdBuffer, secondLevelBatchBufferUsed)));
1383 HalOcaInterfaceNext::OnSubLevelBBStart(
1384 cmdBuffer,
1385 m_osInterface->pOsContext,
1386 &secondLevelBatchBufferUsed->OsResource,
1387 secondLevelBatchBufferUsed->dwOffset,
1388 false,
1389 m_basicFeature->m_vdencBatchBufferPerSlicePart2Start[currSlcIdx] - secondLevelBatchBufferUsed->dwOffset);
1390 ENCODE_CHK_STATUS_RETURN(AddAllCmds_HCP_PAK_INSERT_OBJECT_BRC(&cmdBuffer));
1391 secondLevelBatchBufferUsed->dwOffset = m_basicFeature->m_vdencBatchBufferPerSlicePart2Start[currSlcIdx];
1392 ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_BATCH_BUFFER_START(&cmdBuffer, secondLevelBatchBufferUsed)));
1393 HalOcaInterfaceNext::OnSubLevelBBStart(
1394 cmdBuffer,
1395 m_osInterface->pOsContext,
1396 &secondLevelBatchBufferUsed->OsResource,
1397 secondLevelBatchBufferUsed->dwOffset,
1398 false,
1399 m_basicFeature->m_vdencBatchBufferPerSlicePart2Size[currSlcIdx]);
1400 }
1401 else
1402 {
1403 // Weighted Prediction
1404 // This slice level command is issued, if the weighted_pred_flag or weighted_bipred_flag equals one.
1405 // If zero, then this command is not issued.
1406 ENCODE_CHK_STATUS_RETURN(AddAllCmds_HCP_WEIGHTOFFSET_STATE(&cmdBuffer));
1407
1408 m_basicFeature->m_useDefaultRoundingForHcpSliceState = false;
1409 SETPAR_AND_ADDCMD(HCP_SLICE_STATE, m_hcpItf, &cmdBuffer);
1410
1411 // add HCP_PAK_INSERT_OBJECTS command
1412 ENCODE_CHK_STATUS_RETURN(AddAllCmds_HCP_PAK_INSERT_OBJECT(&cmdBuffer));
1413
1414 SETPAR_AND_ADDCMD(VDENC_WEIGHTSOFFSETS_STATE, m_vdencItf, &cmdBuffer);
1415 }
1416 SETPAR_AND_ADDCMD(VDENC_HEVC_VP9_TILE_SLICE_STATE, m_vdencItf, &cmdBuffer);
1417 SETPAR_AND_ADDCMD(VDENC_WALKER_STATE, m_vdencItf, &cmdBuffer);
1418 return eStatus;
1419 }
1420
AddAllCmds_HCP_PAK_INSERT_OBJECT_BRC(PMOS_COMMAND_BUFFER cmdBuffer) const1421 MOS_STATUS HevcVdencPkt::AddAllCmds_HCP_PAK_INSERT_OBJECT_BRC(PMOS_COMMAND_BUFFER cmdBuffer) const
1422 {
1423 ENCODE_FUNC_CALL();
1424
1425 ENCODE_CHK_NULL_RETURN(cmdBuffer);
1426
1427 auto ¶ms = m_hcpItf->MHW_GETPAR_F(HCP_PAK_INSERT_OBJECT)();
1428 params = {};
1429
1430 PCODECHAL_NAL_UNIT_PARAMS *ppNalUnitParams = (CODECHAL_NAL_UNIT_PARAMS **)m_nalUnitParams;
1431
1432 auto brcFeature = dynamic_cast<HEVCEncodeBRC *>(m_featureManager->GetFeature(HevcFeatureIDs::hevcBrcFeature));
1433 ENCODE_CHK_NULL_RETURN(brcFeature);
1434
1435 PBSBuffer pBsBuffer = &(m_basicFeature->m_bsBuffer);
1436 uint32_t bitSize = 0;
1437 uint32_t offSet = 0;
1438
1439 //insert AU, SPS, PSP headers before first slice header
1440 if (m_basicFeature->m_curNumSlices == 0)
1441 {
1442 uint32_t maxBytesInPakInsertObjCmd = ((2 << 11) - 1) * 4; // 12 bits for Length field in PAK_INSERT_OBJ cmd
1443
1444 for (auto i = 0; i < HEVC_MAX_NAL_UNIT_TYPE; i++)
1445 {
1446 uint32_t nalunitPosiSize = ppNalUnitParams[i]->uiSize;
1447 uint32_t nalunitPosiOffset = ppNalUnitParams[i]->uiOffset;
1448
1449 while (nalunitPosiSize > 0)
1450 {
1451 bitSize = MOS_MIN(maxBytesInPakInsertObjCmd * 8, nalunitPosiSize * 8);
1452 offSet = nalunitPosiOffset;
1453
1454 params = {};
1455
1456 params.dwPadding = (MOS_ALIGN_CEIL((bitSize + 7) >> 3, sizeof(uint32_t))) / sizeof(uint32_t);
1457 params.bEmulationByteBitsInsert = ppNalUnitParams[i]->bInsertEmulationBytes;
1458 params.uiSkipEmulationCheckCount = ppNalUnitParams[i]->uiSkipEmulationCheckCount;
1459 params.dataBitsInLastDw = bitSize % 32;
1460 if (params.dataBitsInLastDw == 0)
1461 {
1462 params.dataBitsInLastDw = 32;
1463 }
1464
1465 if (nalunitPosiSize > maxBytesInPakInsertObjCmd)
1466 {
1467 nalunitPosiSize -= maxBytesInPakInsertObjCmd;
1468 nalunitPosiOffset += maxBytesInPakInsertObjCmd;
1469 }
1470 else
1471 {
1472 nalunitPosiSize = 0;
1473 }
1474 m_hcpItf->MHW_ADDCMD_F(HCP_PAK_INSERT_OBJECT)(cmdBuffer);
1475 uint32_t byteSize = (bitSize + 7) >> 3;
1476 if (byteSize)
1477 {
1478 MHW_MI_CHK_NULL(pBsBuffer);
1479 MHW_MI_CHK_NULL(pBsBuffer->pBase);
1480 uint8_t *data = (uint8_t *)(pBsBuffer->pBase + offSet);
1481 MHW_MI_CHK_STATUS(Mhw_AddCommandCmdOrBB(m_osInterface, cmdBuffer, nullptr, data, byteSize));
1482 }
1483 }
1484 }
1485 }
1486
1487 return MOS_STATUS_SUCCESS;
1488 }
1489
AddCondBBEndForLastPass(MOS_COMMAND_BUFFER & cmdBuffer)1490 MOS_STATUS HevcVdencPkt::AddCondBBEndForLastPass(MOS_COMMAND_BUFFER &cmdBuffer)
1491 {
1492 ENCODE_FUNC_CALL();
1493
1494 if (m_pipeline->IsFirstPass() || m_pipeline->GetPassNum() == 1)
1495 {
1496 return MOS_STATUS_SUCCESS;
1497 }
1498
1499 bool conditionalPass = true;
1500 RUN_FEATURE_INTERFACE_RETURN(VdencLplaAnalysis, HevcFeatureIDs::vdencLplaAnalysisFeature,
1501 SetConditionalPass, m_pipeline->IsLastPass(), conditionalPass);
1502
1503 if (conditionalPass)
1504 {
1505 auto &miConditionalBatchBufferEndParams = m_miItf->MHW_GETPAR_F(MI_CONDITIONAL_BATCH_BUFFER_END)();
1506 miConditionalBatchBufferEndParams = {};
1507
1508 // VDENC uses HuC FW generated semaphore for conditional 2nd pass
1509 miConditionalBatchBufferEndParams.presSemaphoreBuffer =
1510 m_basicFeature->m_recycleBuf->GetBuffer(VdencBrcPakMmioBuffer, 0);
1511
1512 ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_CONDITIONAL_BATCH_BUFFER_END)(&cmdBuffer));
1513 }
1514
1515 // where is m_encodeStatusBuf?
1516 auto mmioRegisters = m_hcpItf->GetMmioRegisters(m_vdboxIndex);
1517 MOS_RESOURCE *osResource = nullptr;
1518 uint32_t offset = 0;
1519 m_statusReport->GetAddress(statusReportImageStatusCtrl, osResource, offset);
1520 //uint32_t baseOffset = (m_encodeStatusBuf.wCurrIndex * m_encodeStatusBuf.dwReportSize) + sizeof(uint32_t) * 2; // encodeStatus is offset by 2 DWs in the resource
1521
1522 // Write back the HCP image control register for RC6 may clean it out
1523 auto ®isterMemParams = m_miItf->MHW_GETPAR_F(MI_LOAD_REGISTER_MEM)();
1524 registerMemParams = {};
1525 registerMemParams.presStoreBuffer = osResource;
1526 registerMemParams.dwOffset = offset;
1527 registerMemParams.dwRegister = mmioRegisters->hcpEncImageStatusCtrlRegOffset;
1528 ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_LOAD_REGISTER_MEM)(&cmdBuffer));
1529
1530 HevcVdencBrcBuffers *vdencBrcBuffers = nullptr;
1531 auto feature = dynamic_cast<HEVCEncodeBRC *>(m_featureManager->GetFeature(HevcFeatureIDs::hevcBrcFeature));
1532 ENCODE_CHK_NULL_RETURN(feature);
1533 vdencBrcBuffers = feature->GetHevcVdencBrcBuffers();
1534 ENCODE_CHK_NULL_RETURN(vdencBrcBuffers);
1535
1536 auto &miStoreRegMemParams = m_miItf->MHW_GETPAR_F(MI_STORE_REGISTER_MEM)();
1537 miStoreRegMemParams = {};
1538 miStoreRegMemParams.presStoreBuffer = vdencBrcBuffers->resBrcPakStatisticBuffer[vdencBrcBuffers->currBrcPakStasIdxForWrite];
1539 miStoreRegMemParams.dwOffset = CODECHAL_OFFSETOF(CODECHAL_ENCODE_HEVC_PAK_STATS_BUFFER, HCP_IMAGE_STATUS_CONTROL_FOR_LAST_PASS);
1540 miStoreRegMemParams.dwRegister = mmioRegisters->hcpEncImageStatusCtrlRegOffset;
1541 ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_STORE_REGISTER_MEM)(&cmdBuffer));
1542
1543 m_statusReport->GetAddress(statusReportImageStatusCtrlOfLastBRCPass, osResource, offset);
1544 miStoreRegMemParams = {};
1545 miStoreRegMemParams.presStoreBuffer = osResource;
1546 miStoreRegMemParams.dwOffset = offset;
1547 miStoreRegMemParams.dwRegister = mmioRegisters->hcpEncImageStatusCtrlRegOffset;
1548 ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_STORE_REGISTER_MEM)(&cmdBuffer));
1549
1550 return MOS_STATUS_SUCCESS;
1551 }
1552
FreeResources()1553 MOS_STATUS HevcVdencPkt::FreeResources()
1554 {
1555 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1556
1557 ENCODE_FUNC_CALL();
1558
1559 #if USE_CODECHAL_DEBUG_TOOL && _ENCODE_RESERVED
1560 CODECHAL_DEBUG_TOOL(
1561 CodechalDebugInterface *debugInterface = m_pipeline->GetDebugInterface();
1562 if (debugInterface && debugInterface->DumpIsEnabled(CodechalDbgAttr::attrDumpEncodePar)) {
1563 m_hevcParDump->DumpParFile();
1564 })
1565 #endif
1566 for (auto j = 0; j < HevcBasicFeature::m_codecHalHevcNumPakSliceBatchBuffers; j++)
1567 {
1568 eStatus = Mhw_FreeBb(m_osInterface, &m_batchBufferForPakSlices[j], nullptr);
1569 ENCODE_ASSERT(eStatus == MOS_STATUS_SUCCESS);
1570 }
1571
1572 return eStatus;
1573 }
1574
Init()1575 MOS_STATUS HevcVdencPkt::Init()
1576 {
1577 ENCODE_FUNC_CALL();
1578 ENCODE_CHK_NULL_RETURN(m_statusReport);
1579
1580 ENCODE_CHK_STATUS_RETURN(CmdPacket::Init());
1581 m_basicFeature = dynamic_cast<HevcBasicFeature *>(m_featureManager->GetFeature(HevcFeatureIDs::basicFeature));
1582 ENCODE_CHK_NULL_RETURN(m_basicFeature);
1583
1584 #ifdef _MMC_SUPPORTED
1585 m_mmcState = m_pipeline->GetMmcState();
1586 ENCODE_CHK_NULL_RETURN(m_mmcState);
1587 m_basicFeature->m_mmcState = m_mmcState;
1588 m_basicFeature->m_ref.m_mmcState = m_mmcState;
1589 #endif
1590 m_allocator = m_pipeline->GetEncodeAllocator();
1591 ENCODE_CHK_STATUS_RETURN(AllocateResources());
1592
1593 ENCODE_CHK_STATUS_RETURN(m_statusReport->RegistObserver(this));
1594
1595 CalculatePictureStateCommandSize();
1596
1597 uint32_t vdencPictureStatesSize = 0, vdencPicturePatchListSize = 0;
1598 GetVdencStateCommandsDataSize(vdencPictureStatesSize, vdencPicturePatchListSize);
1599 m_defaultPictureStatesSize += vdencPictureStatesSize;
1600 m_defaultPicturePatchListSize += vdencPicturePatchListSize;
1601
1602 GetHxxPrimitiveCommandSize();
1603
1604 m_usePatchList = m_osInterface->bUsesPatchList;
1605
1606 m_packetUtilities = m_pipeline->GetPacketUtilities();
1607 ENCODE_CHK_NULL_RETURN(m_packetUtilities);
1608
1609 return MOS_STATUS_SUCCESS;
1610 }
1611
SetRowstoreCachingOffsets()1612 MOS_STATUS HevcVdencPkt::SetRowstoreCachingOffsets()
1613 {
1614 MHW_VDBOX_ROWSTORE_PARAMS rowStoreParams;
1615
1616 rowStoreParams.Mode = m_basicFeature->m_mode;
1617 rowStoreParams.dwPicWidth = m_basicFeature->m_frameWidth;
1618 rowStoreParams.ucChromaFormat = m_basicFeature->m_chromaFormat;
1619 rowStoreParams.ucBitDepthMinus8 = m_hevcSeqParams->bit_depth_luma_minus8;
1620 rowStoreParams.ucLCUSize = 1 << (m_hevcSeqParams->log2_max_coding_block_size_minus3 + 3);
1621 // VDEnc only support LCU64 for now
1622 ENCODE_ASSERT(rowStoreParams.ucLCUSize == m_basicFeature->m_maxLCUSize);
1623 ENCODE_CHK_STATUS_RETURN(m_hwInterface->SetRowstoreCachingOffsets(&rowStoreParams));
1624
1625 if (m_vdencItf)
1626 {
1627 mhw::vdbox::vdenc::RowStorePar par = {};
1628
1629 par.mode = mhw::vdbox::vdenc::RowStorePar::HEVC;
1630 par.bitDepth = mhw::vdbox::vdenc::RowStorePar::DEPTH_8;
1631 if (rowStoreParams.ucBitDepthMinus8 == 1 || rowStoreParams.ucBitDepthMinus8 == 2)
1632 {
1633 par.bitDepth = mhw::vdbox::vdenc::RowStorePar::DEPTH_10;
1634 }
1635 else if (rowStoreParams.ucBitDepthMinus8 > 2)
1636 {
1637 par.bitDepth = mhw::vdbox::vdenc::RowStorePar::DEPTH_12;
1638 }
1639 par.lcuSize = mhw ::vdbox::vdenc::RowStorePar::SIZE_OTHER;
1640 if (rowStoreParams.ucLCUSize == 32)
1641 {
1642 par.lcuSize = mhw ::vdbox::vdenc::RowStorePar::SIZE_32;
1643 }
1644 else if (rowStoreParams.ucLCUSize == 64)
1645 {
1646 par.lcuSize = mhw ::vdbox::vdenc::RowStorePar::SIZE_64;
1647 }
1648 par.frameWidth = rowStoreParams.dwPicWidth;
1649 switch (rowStoreParams.ucChromaFormat)
1650 {
1651 case HCP_CHROMA_FORMAT_MONOCHROME:
1652 par.format = mhw ::vdbox::vdenc::RowStorePar::MONOCHROME;
1653 break;
1654 case HCP_CHROMA_FORMAT_YUV420:
1655 par.format = mhw ::vdbox::vdenc::RowStorePar::YUV420;
1656 break;
1657 case HCP_CHROMA_FORMAT_YUV422:
1658 par.format = mhw ::vdbox::vdenc::RowStorePar::YUV422;
1659 break;
1660 case HCP_CHROMA_FORMAT_YUV444:
1661 par.format = mhw ::vdbox::vdenc::RowStorePar::YUV444;
1662 break;
1663 }
1664
1665 ENCODE_CHK_STATUS_RETURN(m_vdencItf->SetRowstoreCachingOffsets(par));
1666 }
1667
1668 hcp::HcpVdboxRowStorePar rowstoreParams = {};
1669 rowstoreParams.Mode = m_basicFeature->m_mode;
1670 rowstoreParams.dwPicWidth = m_basicFeature->m_frameWidth;
1671 rowstoreParams.ucChromaFormat = m_basicFeature->m_chromaFormat;
1672 rowstoreParams.ucBitDepthMinus8 = m_hevcSeqParams->bit_depth_luma_minus8;
1673 rowstoreParams.ucLCUSize = 1 << (m_hevcSeqParams->log2_max_coding_block_size_minus3 + 3);
1674 // VDEnc only support LCU64 for now
1675 ENCODE_ASSERT(rowstoreParams.ucLCUSize == m_basicFeature->m_maxLCUSize);
1676 m_hcpItf->SetRowstoreCachingOffsets(rowstoreParams);
1677
1678 return MOS_STATUS_SUCCESS;
1679 }
1680
Destroy()1681 MOS_STATUS HevcVdencPkt::Destroy()
1682 {
1683 m_statusReport->UnregistObserver(this);
1684 return MOS_STATUS_SUCCESS;
1685 }
1686
SetPakPassType()1687 void HevcVdencPkt::SetPakPassType()
1688 {
1689 ENCODE_FUNC_CALL();
1690
1691 // default: VDEnc+PAK pass
1692 m_pakOnlyPass = false;
1693
1694 return;
1695 }
1696
1697 // Inline functions
ValidateVdboxIdx(const MHW_VDBOX_NODE_IND & vdboxIndex)1698 MOS_STATUS HevcVdencPkt::ValidateVdboxIdx(const MHW_VDBOX_NODE_IND &vdboxIndex)
1699 {
1700 ENCODE_FUNC_CALL();
1701
1702 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1703 if (vdboxIndex > m_hwInterface->GetMaxVdboxIndex())
1704 {
1705 ENCODE_ASSERTMESSAGE("ERROR - vdbox index exceed the maximum");
1706 eStatus = MOS_STATUS_INVALID_PARAMETER;
1707 }
1708
1709 return eStatus;
1710 }
1711
SetPerfTag()1712 void HevcVdencPkt::SetPerfTag()
1713 {
1714 ENCODE_FUNC_CALL();
1715
1716 uint16_t callType = m_pipeline->IsFirstPass() ? CODECHAL_ENCODE_PERFTAG_CALL_PAK_ENGINE : CODECHAL_ENCODE_PERFTAG_CALL_PAK_ENGINE_SECOND_PASS;
1717 uint16_t picType = m_basicFeature->m_pictureCodingType;
1718 if (m_basicFeature->m_pictureCodingType == B_TYPE && m_basicFeature->m_ref.IsLowDelay())
1719 {
1720 picType = 0;
1721 }
1722
1723 PerfTagSetting perfTag;
1724 perfTag.Value = 0;
1725 perfTag.Mode = (uint16_t)m_basicFeature->m_mode & CODECHAL_ENCODE_MODE_BIT_MASK;
1726 perfTag.CallType = callType;
1727 perfTag.PictureCodingType = picType;
1728 m_osInterface->pfnSetPerfTag(m_osInterface, perfTag.Value);
1729 m_osInterface->pfnIncPerfBufferID(m_osInterface);
1730 }
1731
SetSemaphoreMem(MOS_RESOURCE & semaphoreMem,uint32_t value,MOS_COMMAND_BUFFER & cmdBuffer)1732 MOS_STATUS HevcVdencPkt::SetSemaphoreMem(
1733 MOS_RESOURCE & semaphoreMem,
1734 uint32_t value,
1735 MOS_COMMAND_BUFFER &cmdBuffer)
1736 {
1737 ENCODE_FUNC_CALL();
1738
1739 auto &storeDataParams = m_miItf->MHW_GETPAR_F(MI_STORE_DATA_IMM)();
1740 storeDataParams = {};
1741 storeDataParams.pOsResource = &semaphoreMem;
1742 storeDataParams.dwResourceOffset = 0;
1743 storeDataParams.dwValue = value;
1744
1745 ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_STORE_DATA_IMM)(&cmdBuffer));
1746
1747 return MOS_STATUS_SUCCESS;
1748 }
1749
SendPrologCmds(MOS_COMMAND_BUFFER & cmdBuffer)1750 MOS_STATUS HevcVdencPkt::SendPrologCmds(
1751 MOS_COMMAND_BUFFER &cmdBuffer)
1752 {
1753 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1754
1755 ENCODE_FUNC_CALL();
1756
1757 auto packetUtilities = m_pipeline->GetPacketUtilities();
1758 ENCODE_CHK_NULL_RETURN(packetUtilities);
1759 if (m_basicFeature->m_setMarkerEnabled)
1760 {
1761 PMOS_RESOURCE presSetMarker = m_osInterface->pfnGetMarkerResource(m_osInterface);
1762 ENCODE_CHK_STATUS_RETURN(packetUtilities->SendMarkerCommand(&cmdBuffer, presSetMarker));
1763 }
1764
1765 #ifdef _MMC_SUPPORTED
1766 ENCODE_CHK_NULL_RETURN(m_mmcState);
1767 ENCODE_CHK_STATUS_RETURN(m_mmcState->SendPrologCmd(&cmdBuffer, false));
1768 #endif
1769
1770 MHW_GENERIC_PROLOG_PARAMS genericPrologParams;
1771 MOS_ZeroMemory(&genericPrologParams, sizeof(genericPrologParams));
1772 genericPrologParams.pOsInterface = m_osInterface;
1773 genericPrologParams.pvMiInterface = nullptr;
1774 genericPrologParams.bMmcEnabled = m_mmcState ? m_mmcState->IsMmcEnabled() : false;
1775 ENCODE_CHK_STATUS_RETURN(Mhw_SendGenericPrologCmdNext(&cmdBuffer, &genericPrologParams, m_miItf));
1776
1777 // Send predication command
1778 if (m_basicFeature->m_predicationEnabled)
1779 {
1780 ENCODE_CHK_STATUS_RETURN(packetUtilities->SendPredicationCommand(&cmdBuffer));
1781 }
1782
1783 return eStatus;
1784 }
1785
AllocateBatchBufferForPakSlices(uint32_t numSlices,uint16_t numPakPasses)1786 MOS_STATUS HevcVdencPkt::AllocateBatchBufferForPakSlices(
1787 uint32_t numSlices,
1788 uint16_t numPakPasses)
1789 {
1790 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1791
1792 ENCODE_FUNC_CALL();
1793
1794 MOS_ZeroMemory(
1795 &m_batchBufferForPakSlices[m_basicFeature->m_currPakSliceIdx],
1796 sizeof(MHW_BATCH_BUFFER));
1797
1798 // Get the slice size
1799 uint32_t size = numPakPasses * numSlices * m_sliceStatesSize;
1800
1801 m_batchBufferForPakSlices[m_basicFeature->m_currPakSliceIdx].bSecondLevel = true;
1802 ENCODE_CHK_STATUS_RETURN(Mhw_AllocateBb(
1803 m_osInterface,
1804 &m_batchBufferForPakSlices[m_basicFeature->m_currPakSliceIdx],
1805 nullptr,
1806 size));
1807
1808 MOS_LOCK_PARAMS lockFlags;
1809 MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
1810 lockFlags.WriteOnly = 1;
1811 uint8_t *data = (uint8_t *)m_allocator->LockResourceForWrite(&m_batchBufferForPakSlices[m_basicFeature->m_currPakSliceIdx].OsResource);
1812
1813 if (data == nullptr)
1814 {
1815 ENCODE_ASSERTMESSAGE("Failed to lock batch buffer for PAK slices.");
1816 eStatus = MOS_STATUS_UNKNOWN;
1817 return eStatus;
1818 }
1819
1820 m_allocator->UnLock(&m_batchBufferForPakSlices[m_basicFeature->m_currPakSliceIdx].OsResource);
1821
1822 return eStatus;
1823 }
1824
ReadExtStatistics(MOS_COMMAND_BUFFER & cmdBuffer)1825 MOS_STATUS HevcVdencPkt::ReadExtStatistics(MOS_COMMAND_BUFFER &cmdBuffer)
1826 {
1827 ENCODE_FUNC_CALL();
1828
1829 PMOS_RESOURCE osResource = nullptr;
1830 uint32_t offset = 0;
1831
1832 m_statusReport->GetAddress(statusReportSumSquareError, osResource, offset);
1833
1834 for (auto i = 0; i < 3; i++) // 64 bit SSE values for luma/ chroma channels need to be copied
1835 {
1836 auto &miCpyMemMemParams = m_miItf->MHW_GETPAR_F(MI_COPY_MEM_MEM)();
1837 miCpyMemMemParams = {};
1838 MOS_RESOURCE *resHuCPakAggregatedFrameStatsBuffer = nullptr;
1839 RUN_FEATURE_INTERFACE_RETURN(HevcEncodeTile, HevcFeatureIDs::encodeTile, GetHucPakAggregatedFrameStatsBuffer, resHuCPakAggregatedFrameStatsBuffer);
1840 ENCODE_CHK_NULL_RETURN(resHuCPakAggregatedFrameStatsBuffer);
1841 miCpyMemMemParams.presSrc = m_hevcPicParams->tiles_enabled_flag && (m_pipeline->GetPipeNum() > 1) ? resHuCPakAggregatedFrameStatsBuffer : m_basicFeature->m_recycleBuf->GetBuffer(FrameStatStreamOutBuffer, 0);
1842 miCpyMemMemParams.dwSrcOffset = (m_basicFeature->m_hevcPakStatsSSEOffset + i) * sizeof(uint32_t); // SSE luma offset is located at DW32 in Frame statistics, followed by chroma
1843 miCpyMemMemParams.presDst = osResource;
1844 miCpyMemMemParams.dwDstOffset = offset + i * sizeof(uint32_t);
1845 ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_COPY_MEM_MEM)(&cmdBuffer));
1846 }
1847 return MOS_STATUS_SUCCESS;
1848 }
1849
AddForceWakeup(MOS_COMMAND_BUFFER & cmdBuffer)1850 MOS_STATUS HevcVdencPkt::AddForceWakeup(MOS_COMMAND_BUFFER &cmdBuffer)
1851 {
1852 ENCODE_FUNC_CALL();
1853
1854 auto &forceWakeupParams = m_miItf->MHW_GETPAR_F(MI_FORCE_WAKEUP)();
1855 forceWakeupParams = {};
1856 forceWakeupParams.bMFXPowerWellControl = true;
1857 forceWakeupParams.bMFXPowerWellControlMask = true;
1858 forceWakeupParams.bHEVCPowerWellControl = true;
1859 forceWakeupParams.bHEVCPowerWellControlMask = true;
1860
1861 ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_FORCE_WAKEUP)(&cmdBuffer));
1862
1863 return MOS_STATUS_SUCCESS;
1864 }
1865
SetBatchBufferForPakSlices()1866 MOS_STATUS HevcVdencPkt::SetBatchBufferForPakSlices()
1867 {
1868 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1869
1870 ENCODE_FUNC_CALL();
1871
1872 if (m_hevcPicParams->tiles_enabled_flag)
1873 {
1874 return eStatus;
1875 }
1876
1877 m_useBatchBufferForPakSlices = m_pipeline->IsSingleTaskPhaseSupported() && m_pipeline->IsSingleTaskPhaseSupportedInPak();
1878 m_batchBufferForPakSlicesStartOffset = 0;
1879
1880 if (m_useBatchBufferForPakSlices)
1881 {
1882 if (m_pipeline->IsFirstPass())
1883 {
1884 // The same buffer is used for all slices for all passes
1885 uint32_t batchBufferForPakSlicesSize =
1886 m_pipeline->GetPassNum() * m_basicFeature->m_numSlices * m_sliceStatesSize;
1887
1888 ENCODE_ASSERT(batchBufferForPakSlicesSize);
1889
1890 if (batchBufferForPakSlicesSize >
1891 (uint32_t)m_batchBufferForPakSlices[m_basicFeature->m_currPakSliceIdx].iSize)
1892 {
1893 if (m_batchBufferForPakSlices[m_basicFeature->m_currPakSliceIdx].iSize)
1894 {
1895 Mhw_FreeBb(m_osInterface, &m_batchBufferForPakSlices[m_basicFeature->m_currPakSliceIdx], nullptr);
1896 m_batchBufferForPakSlices[m_basicFeature->m_currPakSliceIdx].iSize = 0;
1897 }
1898
1899 ENCODE_CHK_STATUS_RETURN(AllocateBatchBufferForPakSlices(
1900 m_basicFeature->m_numSlices,
1901 m_pipeline->GetPassNum()));
1902 }
1903 }
1904
1905 ENCODE_CHK_STATUS_RETURN(Mhw_LockBb(
1906 m_osInterface,
1907 &m_batchBufferForPakSlices[m_basicFeature->m_currPakSliceIdx]));
1908
1909 m_batchBufferForPakSlicesStartOffset =
1910 m_pipeline->IsFirstPass() ? 0 : (uint32_t)m_batchBufferForPakSlices[m_basicFeature->m_currPakSliceIdx].iCurrent;
1911 }
1912
1913 return eStatus;
1914 }
1915
StartStatusReport(uint32_t srType,MOS_COMMAND_BUFFER * cmdBuffer)1916 MOS_STATUS HevcVdencPkt::StartStatusReport(
1917 uint32_t srType,
1918 MOS_COMMAND_BUFFER *cmdBuffer)
1919 {
1920 ENCODE_FUNC_CALL();
1921 ENCODE_CHK_NULL_RETURN(cmdBuffer);
1922
1923 ENCODE_CHK_STATUS_RETURN(MediaPacket::StartStatusReportNext(srType, cmdBuffer));
1924 m_encodecp->StartCpStatusReport(cmdBuffer);
1925
1926 return MOS_STATUS_SUCCESS;
1927 }
1928
EndStatusReport(uint32_t srType,MOS_COMMAND_BUFFER * cmdBuffer)1929 MOS_STATUS HevcVdencPkt::EndStatusReport(
1930 uint32_t srType,
1931 MOS_COMMAND_BUFFER *cmdBuffer)
1932 {
1933 ENCODE_FUNC_CALL();
1934 ENCODE_CHK_NULL_RETURN(cmdBuffer);
1935
1936 ENCODE_CHK_STATUS_RETURN(MediaPacket::EndStatusReportNext(srType, cmdBuffer));
1937
1938 MediaPerfProfiler *perfProfiler = MediaPerfProfiler::Instance();
1939 ENCODE_CHK_NULL_RETURN(perfProfiler);
1940 ENCODE_CHK_STATUS_RETURN(perfProfiler->AddPerfCollectEndCmd(
1941 (void *)m_pipeline, m_osInterface, m_miItf, cmdBuffer));
1942
1943 return MOS_STATUS_SUCCESS;
1944 }
1945
ReadHcpStatus(MHW_VDBOX_NODE_IND vdboxIndex,MediaStatusReport * statusReport,MOS_COMMAND_BUFFER & cmdBuffer)1946 MOS_STATUS HevcVdencPkt::ReadHcpStatus(
1947 MHW_VDBOX_NODE_IND vdboxIndex,
1948 MediaStatusReport * statusReport,
1949 MOS_COMMAND_BUFFER &cmdBuffer)
1950 {
1951 ENCODE_FUNC_CALL();
1952
1953 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1954
1955 CODEC_HW_FUNCTION_ENTER;
1956
1957 ENCODE_CHK_NULL_RETURN(statusReport);
1958 ENCODE_CHK_NULL_RETURN(m_hwInterface);
1959
1960 MOS_RESOURCE *osResource;
1961 uint32_t offset;
1962
1963 EncodeStatusReadParams params;
1964 MOS_ZeroMemory(¶ms, sizeof(params));
1965
1966 ENCODE_CHK_STATUS_RETURN(statusReport->GetAddress(encode::statusReportMfxBitstreamByteCountPerFrame, osResource, offset));
1967 params.resBitstreamByteCountPerFrame = osResource;
1968 params.bitstreamByteCountPerFrameOffset = offset;
1969
1970 ENCODE_CHK_STATUS_RETURN(statusReport->GetAddress(encode::statusReportMfxBitstreamSyntaxElementOnlyBitCount, osResource, offset));
1971 params.resBitstreamSyntaxElementOnlyBitCount = osResource;
1972 params.bitstreamSyntaxElementOnlyBitCountOffset = offset;
1973
1974 ENCODE_CHK_STATUS_RETURN(statusReport->GetAddress(encode::statusReportQPStatusCount, osResource, offset));
1975 params.resQpStatusCount = osResource;
1976 params.qpStatusCountOffset = offset;
1977
1978 ENCODE_CHK_STATUS_RETURN(statusReport->GetAddress(encode::statusReportImageStatusMask, osResource, offset));
1979 params.resImageStatusMask = osResource;
1980 params.imageStatusMaskOffset = offset;
1981
1982 ENCODE_CHK_STATUS_RETURN(statusReport->GetAddress(encode::statusReportImageStatusCtrl, osResource, offset));
1983 params.resImageStatusCtrl = osResource;
1984 params.imageStatusCtrlOffset = offset;
1985
1986 ENCODE_CHK_STATUS_RETURN(statusReport->GetAddress(encode::statusReportNumSlices, osResource, offset));
1987 params.resNumSlices = osResource;
1988 params.numSlicesOffset = offset;
1989
1990 ENCODE_CHK_STATUS_RETURN(m_hwInterface->ReadHcpStatus(vdboxIndex, params, &cmdBuffer));
1991
1992 // Slice Size Conformance
1993 if (m_hevcSeqParams->SliceSizeControl)
1994 {
1995 RUN_FEATURE_INTERFACE_RETURN(HevcEncodeDss, HevcFeatureIDs::hevcVdencDssFeature, ReadHcpStatus, vdboxIndex, cmdBuffer);
1996 }
1997
1998 auto brcFeature = dynamic_cast<HEVCEncodeBRC *>(m_featureManager->GetFeature(HevcFeatureIDs::hevcBrcFeature));
1999 ENCODE_CHK_NULL_RETURN(brcFeature);
2000 bool vdencHucUsed = brcFeature->IsVdencHucUsed();
2001 auto mmioRegisters = m_hcpItf->GetMmioRegisters(m_vdboxIndex);
2002 if (vdencHucUsed)
2003 {
2004 // Store PAK frameSize MMIO to PakInfo buffer
2005 auto &miStoreRegMemParams = m_miItf->MHW_GETPAR_F(MI_STORE_REGISTER_MEM)();
2006 miStoreRegMemParams = {};
2007 miStoreRegMemParams.presStoreBuffer = m_basicFeature->m_recycleBuf->GetBuffer(PakInfo, 0);
2008 miStoreRegMemParams.dwOffset = 0;
2009 miStoreRegMemParams.dwRegister = mmioRegisters->hcpEncBitstreamBytecountFrameRegOffset;
2010 ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_STORE_REGISTER_MEM)(&cmdBuffer));
2011 }
2012 ENCODE_CHK_STATUS_RETURN(m_hwInterface->ReadImageStatusForHcp(vdboxIndex, params, &cmdBuffer));
2013 return eStatus;
2014 }
2015
ReadSliceSizeForSinglePipe(MOS_COMMAND_BUFFER & cmdBuffer)2016 MOS_STATUS HevcVdencPkt::ReadSliceSizeForSinglePipe(MOS_COMMAND_BUFFER &cmdBuffer)
2017 {
2018 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2019
2020 ENCODE_FUNC_CALL();
2021
2022 // Report slice size to app only when dynamic slice is enabled
2023 if (!m_hevcSeqParams->SliceSizeControl)
2024 {
2025 return eStatus;
2026 }
2027 RUN_FEATURE_INTERFACE_RETURN(HevcEncodeDss, HevcFeatureIDs::hevcVdencDssFeature, ReadSliceSizeForSinglePipe, m_pipeline, cmdBuffer);
2028
2029 return eStatus;
2030 }
2031
ReadSliceSize(MOS_COMMAND_BUFFER & cmdBuffer)2032 MOS_STATUS HevcVdencPkt::ReadSliceSize(MOS_COMMAND_BUFFER &cmdBuffer)
2033 {
2034 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2035
2036 ENCODE_FUNC_CALL();
2037
2038 // Use FrameStats buffer if in single pipe mode.
2039 if (m_pipeline->GetPipeNum() == 1)
2040 {
2041 return ReadSliceSizeForSinglePipe(cmdBuffer);
2042 }
2043
2044 // In multi-tile multi-pipe mode, use PAK integration kernel output
2045 // PAK integration kernel accumulates frame statistics across tiles, which should be used to setup slice size report
2046 // Report slice size to app only when dynamic scaling is enabled
2047 if (!m_hevcSeqParams->SliceSizeControl)
2048 {
2049 return eStatus;
2050 }
2051
2052 RUN_FEATURE_INTERFACE_RETURN(HevcEncodeDss, HevcFeatureIDs::hevcVdencDssFeature, ReadSliceSize, m_pipeline, cmdBuffer);
2053
2054 return eStatus;
2055 }
2056
Completed(void * mfxStatus,void * rcsStatus,void * statusReport)2057 MOS_STATUS HevcVdencPkt::Completed(void *mfxStatus, void *rcsStatus, void *statusReport)
2058 {
2059 ENCODE_FUNC_CALL();
2060
2061 if (!m_enableVdencStatusReport)
2062 {
2063 return MOS_STATUS_SUCCESS;
2064 }
2065
2066 ENCODE_CHK_NULL_RETURN(mfxStatus);
2067 ENCODE_CHK_NULL_RETURN(statusReport);
2068 ENCODE_CHK_NULL_RETURN(m_basicFeature);
2069
2070 EncodeStatusMfx * encodeStatusMfx = (EncodeStatusMfx *)mfxStatus;
2071 EncodeStatusReportData *statusReportData = (EncodeStatusReportData *)statusReport;
2072 if (statusReportData->hwCtr)
2073 {
2074 m_encodecp->UpdateCpStatusReport(statusReport);
2075 }
2076
2077 // The last pass of BRC may have a zero value of hcpCumulativeFrameDeltaQp
2078 if (encodeStatusMfx->imageStatusCtrl.hcpTotalPass && encodeStatusMfx->imageStatusCtrl.hcpCumulativeFrameDeltaQP == 0)
2079 {
2080 encodeStatusMfx->imageStatusCtrl.hcpCumulativeFrameDeltaQP = encodeStatusMfx->imageStatusCtrlOfLastBRCPass.hcpCumulativeFrameDeltaQP;
2081 }
2082 encodeStatusMfx->imageStatusCtrlOfLastBRCPass.hcpCumulativeFrameDeltaQP = 0;
2083
2084 statusReportData->codecStatus = CODECHAL_STATUS_SUCCESSFUL;
2085 statusReportData->bitstreamSize = encodeStatusMfx->mfcBitstreamByteCountPerFrame + encodeStatusMfx->headerBytesInserted;
2086
2087 statusReportData->numberSlices = 0;
2088 statusReportData->panicMode = encodeStatusMfx->imageStatusCtrl.panic;
2089 statusReportData->averageQP = 0;
2090 statusReportData->qpY = 0;
2091 statusReportData->suggestedQPYDelta = encodeStatusMfx->imageStatusCtrl.hcpCumulativeFrameDeltaQP;
2092 statusReportData->numberPasses = (unsigned char)encodeStatusMfx->imageStatusCtrl.hcpTotalPass + 1; //initial pass is considered to be 0,hence +1 to report;
2093 ENCODE_VERBOSEMESSAGE("Exectued PAK Pass number: %d\n", encodeStatusMfx->numberPasses);
2094
2095 if (m_basicFeature->m_frameWidth != 0 && m_basicFeature->m_frameHeight != 0)
2096 {
2097 ENCODE_CHK_NULL_RETURN(m_basicFeature->m_hevcSeqParams);
2098
2099 uint32_t log2CBSize = 2;
2100
2101 // Based on HW team:
2102 // The CumulativeQp from the PAK accumulated at TU level and normalized to TU4x4
2103 // qp(for TU 8x8) = qp*4
2104 // qp(for TU 16x16) = qp *16
2105 // qp(for TU 32x32) = qp*64
2106 // all these qp are accumulated for entire frame.
2107 // the HW will ceil the CumulativeQp number to max (24 bit)
2108
2109 uint32_t log2McuSize = m_basicFeature->m_hevcSeqParams->log2_min_coding_block_size_minus3 + 3;
2110
2111 uint32_t numLumaPixels = ((m_basicFeature->m_hevcSeqParams->wFrameHeightInMinCbMinus1 + 1) << log2McuSize) *
2112 ((m_basicFeature->m_hevcSeqParams->wFrameWidthInMinCbMinus1 + 1) << log2McuSize);
2113
2114 statusReportData->qpY = statusReportData->averageQP = static_cast<uint8_t>(
2115 static_cast<double>(encodeStatusMfx->qpStatusCount.hcpCumulativeQP)
2116 / (numLumaPixels / 16) - (m_basicFeature->m_hevcSeqParams->bit_depth_luma_minus8 != 0) * 12);
2117 }
2118
2119 // When tile replay is enabled with tile replay, need to report out the tile size and the bit stream is not continous
2120 if (m_pipeline->GetPipeNum() == 1)
2121 {
2122 //ENCODE_CHK_STATUS_RETURN(CodechalVdencHevcState::GetStatusReport(encodeStatus, encodeStatusReport));
2123 MOS_LOCK_PARAMS lockFlags;
2124 MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
2125 lockFlags.ReadOnly = 1;
2126
2127 uint32_t *sliceSize = nullptr;
2128 // pSliceSize is set/ allocated only when dynamic slice is enabled. Cannot use SSC flag here, as it is an asynchronous call
2129 if (encodeStatusMfx->sliceReport.sliceSize)
2130 {
2131 sliceSize = (uint32_t *)m_osInterface->pfnLockResource(m_osInterface, encodeStatusMfx->sliceReport.sliceSize, &lockFlags);
2132 ENCODE_CHK_NULL_RETURN(sliceSize);
2133
2134 statusReportData->numberSlices = encodeStatusMfx->sliceReport.numberSlices;
2135 statusReportData->sizeOfSliceSizesBuffer = sizeof(uint16_t) * encodeStatusMfx->sliceReport.numberSlices;
2136 statusReportData->sliceSizeOverflow = (encodeStatusMfx->sliceReport.sliceSizeOverflow >> 16) & 1;
2137 statusReportData->sliceSizes = (uint16_t *)sliceSize;
2138
2139 uint16_t prevCumulativeSliceSize = 0;
2140 // HW writes out a DW for each slice size. Copy in place the DW into 16bit fields expected by App
2141 for (auto sliceCount = 0; sliceCount < encodeStatusMfx->sliceReport.numberSlices; sliceCount++)
2142 {
2143 // PAK output the sliceSize at 16DW intervals.
2144 ENCODE_CHK_NULL_RETURN(&sliceSize[sliceCount * 16]);
2145 uint32_t CurrAccumulatedSliceSize = sliceSize[sliceCount * 16];
2146
2147 //convert cummulative slice size to individual, first slice may have PPS/SPS,
2148 statusReportData->sliceSizes[sliceCount] = CurrAccumulatedSliceSize - prevCumulativeSliceSize;
2149 prevCumulativeSliceSize += statusReportData->sliceSizes[sliceCount];
2150 }
2151 m_osInterface->pfnUnlockResource(m_osInterface, encodeStatusMfx->sliceReport.sliceSize);
2152 }
2153 }
2154
2155 ENCODE_CHK_STATUS_RETURN(ReportExtStatistics(*encodeStatusMfx, *statusReportData));
2156
2157 CODECHAL_DEBUG_TOOL(
2158 ENCODE_CHK_STATUS_RETURN(DumpResources(encodeStatusMfx, statusReportData)););
2159
2160 if (statusReportData->numberTilesInFrame > 1)
2161 {
2162 // When Tile feature enabled, Reset is not in vdenc packet
2163 return MOS_STATUS_SUCCESS;
2164 }
2165
2166 m_basicFeature->Reset((CODEC_REF_LIST *)statusReportData->currRefList);
2167 return MOS_STATUS_SUCCESS;
2168 }
2169
ReportExtStatistics(EncodeStatusMfx & encodeStatusMfx,EncodeStatusReportData & statusReportData)2170 MOS_STATUS HevcVdencPkt::ReportExtStatistics(
2171 EncodeStatusMfx &encodeStatusMfx,
2172 EncodeStatusReportData &statusReportData)
2173 {
2174 ENCODE_FUNC_CALL();
2175
2176 ENCODE_CHK_NULL_RETURN(m_basicFeature);
2177
2178 uint32_t numLumaPixels = 0, numPixelsPerChromaChannel = 0;
2179
2180 numLumaPixels = m_basicFeature->m_frameHeight * m_basicFeature->m_frameWidth;
2181 switch (m_basicFeature->m_hevcSeqParams->chroma_format_idc)
2182 {
2183 case HCP_CHROMA_FORMAT_MONOCHROME:
2184 numPixelsPerChromaChannel = 0;
2185 break;
2186 case HCP_CHROMA_FORMAT_YUV420:
2187 numPixelsPerChromaChannel = numLumaPixels / 4;
2188 break;
2189 case HCP_CHROMA_FORMAT_YUV422:
2190 numPixelsPerChromaChannel = numLumaPixels / 2;
2191 break;
2192 case HCP_CHROMA_FORMAT_YUV444:
2193 numPixelsPerChromaChannel = numLumaPixels;
2194 break;
2195 default:
2196 numPixelsPerChromaChannel = numLumaPixels / 2;
2197 break;
2198 }
2199
2200 double squarePeakPixelValue = pow((1 << (m_basicFeature->m_hevcSeqParams->bit_depth_luma_minus8 + 8)) - 1, 2);
2201
2202 for (auto i = 0; i < 3; i++)
2203 {
2204 uint32_t numPixels = i ? numPixelsPerChromaChannel : numLumaPixels;
2205
2206 if (m_basicFeature->m_hevcSeqParams->bit_depth_luma_minus8 == 0)
2207 {
2208 //8bit pixel data is represented in 10bit format in HW. so SSE should right shift by 4.
2209 encodeStatusMfx.sumSquareError[i] >>= 4;
2210 }
2211 statusReportData.psnrX100[i] = (uint16_t)CodecHal_Clip3(0, 10000, (uint16_t)(encodeStatusMfx.sumSquareError[i] ? 1000 * log10(squarePeakPixelValue * numPixels / encodeStatusMfx.sumSquareError[i]) : -1));
2212
2213 ENCODE_VERBOSEMESSAGE("psnrX100[%d]:%d.\n", i, statusReportData.psnrX100[i]);
2214 }
2215
2216 return MOS_STATUS_SUCCESS;
2217 }
2218
GetVdencStateCommandsDataSize(uint32_t & vdencPictureStatesSize,uint32_t & vdencPicturePatchListSize)2219 MOS_STATUS HevcVdencPkt::GetVdencStateCommandsDataSize(uint32_t &vdencPictureStatesSize, uint32_t &vdencPicturePatchListSize)
2220 {
2221 vdencPictureStatesSize =
2222 m_vdencItf->MHW_GETSIZE_F(VDENC_PIPE_MODE_SELECT)() +
2223 m_vdencItf->MHW_GETSIZE_F(VDENC_SRC_SURFACE_STATE)() +
2224 m_vdencItf->MHW_GETSIZE_F(VDENC_REF_SURFACE_STATE)() +
2225 m_vdencItf->MHW_GETSIZE_F(VDENC_DS_REF_SURFACE_STATE)() +
2226 m_vdencItf->MHW_GETSIZE_F(VDENC_PIPE_BUF_ADDR_STATE)() +
2227 m_vdencItf->MHW_GETSIZE_F(VDENC_WEIGHTSOFFSETS_STATE)() +
2228 m_vdencItf->MHW_GETSIZE_F(VDENC_WALKER_STATE)() +
2229 m_vdencItf->MHW_GETSIZE_F(VD_PIPELINE_FLUSH)() +
2230 m_miItf->MHW_GETSIZE_F(MI_LOAD_REGISTER_IMM)()*8 +
2231 m_miItf->MHW_GETSIZE_F(MI_FLUSH_DW)() +
2232 m_miItf->MHW_GETSIZE_F(MI_BATCH_BUFFER_START)() +
2233 m_hcpItf->MHW_GETSIZE_F(HEVC_VP9_RDOQ_STATE)() +
2234 m_miItf->MHW_GETSIZE_F(MI_BATCH_BUFFER_END)();
2235
2236 vdencPicturePatchListSize = PATCH_LIST_COMMAND(mhw::vdbox::vdenc::Itf::VDENC_PIPE_BUF_ADDR_STATE_CMD);
2237
2238 return MOS_STATUS_SUCCESS;
2239 }
2240
GetHxxPrimitiveCommandSize()2241 MOS_STATUS HevcVdencPkt::GetHxxPrimitiveCommandSize()
2242 {
2243 uint32_t hcpCommandsSize = 0;
2244 uint32_t hcpPatchListSize = 0;
2245 hcpCommandsSize =
2246 m_hcpItf->MHW_GETSIZE_F(HCP_REF_IDX_STATE)() * 2 +
2247 m_hcpItf->MHW_GETSIZE_F(HCP_WEIGHTOFFSET_STATE)() * 2 +
2248 m_hcpItf->MHW_GETSIZE_F(HCP_SLICE_STATE)() +
2249 m_hcpItf->MHW_GETSIZE_F(HCP_PAK_INSERT_OBJECT)() +
2250 m_miItf->MHW_GETSIZE_F(MI_BATCH_BUFFER_START)() * 2 +
2251 m_hcpItf->MHW_GETSIZE_F(HCP_TILE_CODING)(); // one slice cannot be with more than one tile
2252
2253 hcpPatchListSize =
2254 mhw::vdbox::hcp::Itf::HCP_REF_IDX_STATE_CMD_NUMBER_OF_ADDRESSES * 2 +
2255 mhw::vdbox::hcp::Itf::HCP_WEIGHTOFFSET_STATE_CMD_NUMBER_OF_ADDRESSES * 2 +
2256 mhw::vdbox::hcp::Itf::HCP_SLICE_STATE_CMD_NUMBER_OF_ADDRESSES +
2257 mhw::vdbox::hcp::Itf::HCP_PAK_INSERT_OBJECT_CMD_NUMBER_OF_ADDRESSES +
2258 mhw::vdbox::hcp::Itf::MI_BATCH_BUFFER_START_CMD_NUMBER_OF_ADDRESSES * 2 + // One is for the PAK command and another one is for the BB when BRC and single task mode are on
2259 mhw::vdbox::hcp::Itf::HCP_TILE_CODING_COMMAND_NUMBER_OF_ADDRESSES; // HCP_TILE_CODING_STATE command
2260
2261 uint32_t cpCmdsize = 0;
2262 uint32_t cpPatchListSize = 0;
2263 if (m_hwInterface->GetCpInterface())
2264 {
2265 m_hwInterface->GetCpInterface()->GetCpSliceLevelCmdSize(cpCmdsize, cpPatchListSize);
2266 }
2267
2268 m_defaultSliceStatesSize = hcpCommandsSize + (uint32_t)cpCmdsize;
2269 m_defaultSlicePatchListSize = hcpPatchListSize + (uint32_t)cpPatchListSize;
2270
2271 return MOS_STATUS_SUCCESS;
2272 }
2273
CalculateCommandSize(uint32_t & commandBufferSize,uint32_t & requestedPatchListSize)2274 MOS_STATUS HevcVdencPkt::CalculateCommandSize(uint32_t &commandBufferSize, uint32_t &requestedPatchListSize)
2275 {
2276 m_pictureStatesSize = m_defaultPictureStatesSize;
2277 m_picturePatchListSize = m_defaultPicturePatchListSize;
2278 m_sliceStatesSize = m_defaultSliceStatesSize;
2279 m_slicePatchListSize = m_defaultSlicePatchListSize;
2280
2281 commandBufferSize = CalculateCommandBufferSize();
2282 requestedPatchListSize = CalculatePatchListSize();
2283 return MOS_STATUS_SUCCESS;
2284 }
2285
CalculateCommandBufferSize()2286 uint32_t HevcVdencPkt::CalculateCommandBufferSize()
2287 {
2288 ENCODE_FUNC_CALL();
2289 uint32_t commandBufferSize = 0;
2290
2291 // To be refined later, differentiate BRC and CQP
2292 commandBufferSize =
2293 m_pictureStatesSize +
2294 (m_sliceStatesSize * m_basicFeature->m_numSlices);
2295
2296 // 4K align since allocation is in chunks of 4K bytes.
2297 commandBufferSize = MOS_ALIGN_CEIL(commandBufferSize, CODECHAL_PAGE_SIZE);
2298
2299 return commandBufferSize;
2300 }
2301
CalculatePatchListSize()2302 uint32_t HevcVdencPkt::CalculatePatchListSize()
2303 {
2304 ENCODE_FUNC_CALL();
2305 uint32_t requestedPatchListSize = 0;
2306 if (m_usePatchList)
2307 {
2308 requestedPatchListSize =
2309 m_picturePatchListSize +
2310 (m_slicePatchListSize * m_basicFeature->m_numSlices);
2311
2312 // Multi pipes are sharing one patchlist
2313 requestedPatchListSize *= m_pipeline->GetPipeNum();
2314 }
2315 return requestedPatchListSize;
2316 }
2317
ReadBrcPakStatistics(PMOS_COMMAND_BUFFER cmdBuffer,EncodeReadBrcPakStatsParams * params)2318 MOS_STATUS HevcVdencPkt::ReadBrcPakStatistics(
2319 PMOS_COMMAND_BUFFER cmdBuffer,
2320 EncodeReadBrcPakStatsParams *params)
2321 {
2322 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2323
2324 ENCODE_FUNC_CALL();
2325
2326 ENCODE_CHK_NULL_RETURN(cmdBuffer);
2327 ENCODE_CHK_NULL_RETURN(params);
2328 ENCODE_CHK_NULL_RETURN(params->presBrcPakStatisticBuffer);
2329 ENCODE_CHK_NULL_RETURN(params->presStatusBuffer);
2330
2331 ENCODE_CHK_STATUS_RETURN(ValidateVdboxIdx(m_vdboxIndex));
2332 auto mmioRegisters = m_hcpItf->GetMmioRegisters(m_vdboxIndex);
2333
2334 auto &miStoreRegMemParams = m_miItf->MHW_GETPAR_F(MI_STORE_REGISTER_MEM)();
2335 miStoreRegMemParams = {};
2336 miStoreRegMemParams.presStoreBuffer = params->presBrcPakStatisticBuffer;
2337 miStoreRegMemParams.dwOffset = CODECHAL_OFFSETOF(CODECHAL_ENCODE_HEVC_PAK_STATS_BUFFER, HCP_BITSTREAM_BYTECOUNT_FRAME);
2338 miStoreRegMemParams.dwRegister = mmioRegisters->hcpEncBitstreamBytecountFrameRegOffset;
2339 ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_STORE_REGISTER_MEM)(cmdBuffer));
2340
2341 miStoreRegMemParams = {};
2342 miStoreRegMemParams.presStoreBuffer = params->presBrcPakStatisticBuffer;
2343 miStoreRegMemParams.dwOffset = CODECHAL_OFFSETOF(CODECHAL_ENCODE_HEVC_PAK_STATS_BUFFER, HCP_BITSTREAM_BYTECOUNT_FRAME_NOHEADER);
2344 miStoreRegMemParams.dwRegister = mmioRegisters->hcpEncBitstreamBytecountFrameNoHeaderRegOffset;
2345 ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_STORE_REGISTER_MEM)(cmdBuffer));
2346
2347 miStoreRegMemParams = {};
2348 miStoreRegMemParams.presStoreBuffer = params->presBrcPakStatisticBuffer;
2349 miStoreRegMemParams.dwOffset = CODECHAL_OFFSETOF(CODECHAL_ENCODE_HEVC_PAK_STATS_BUFFER, HCP_IMAGE_STATUS_CONTROL);
2350 miStoreRegMemParams.dwRegister = mmioRegisters->hcpEncImageStatusCtrlRegOffset;
2351 ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_STORE_REGISTER_MEM)(cmdBuffer));
2352
2353 auto &storeDataParams = m_miItf->MHW_GETPAR_F(MI_STORE_DATA_IMM)();
2354 storeDataParams = {};
2355 storeDataParams.pOsResource = params->presStatusBuffer;
2356 storeDataParams.dwResourceOffset = params->dwStatusBufNumPassesOffset;
2357 storeDataParams.dwValue = params->ucPass;
2358 ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_STORE_DATA_IMM)(cmdBuffer));
2359
2360 return eStatus;
2361 }
2362
MHW_SETPAR_DECL_SRC(VDENC_CONTROL_STATE,HevcVdencPkt)2363 MHW_SETPAR_DECL_SRC(VDENC_CONTROL_STATE, HevcVdencPkt)
2364 {
2365 params.vdencInitialization = true;
2366
2367 return MOS_STATUS_SUCCESS;
2368 }
2369
MHW_SETPAR_DECL_SRC(VDENC_PIPE_MODE_SELECT,HevcVdencPkt)2370 MHW_SETPAR_DECL_SRC(VDENC_PIPE_MODE_SELECT, HevcVdencPkt)
2371 {
2372 //params.tlbPrefetch = true;
2373
2374 params.pakObjCmdStreamOut = m_vdencPakObjCmdStreamOutForceEnabled? true : m_hevcPicParams->StatusReportEnable.fields.BlockStats;
2375
2376 // needs to be enabled for 1st pass in multi-pass case
2377 // This bit is ignored if PAK only second pass is enabled.
2378 if ((m_pipeline->GetCurrentPass() == 0) && !m_pipeline->IsLastPass()
2379 || (m_basicFeature->m_422State && m_basicFeature->m_422State->GetFeature422Flag())
2380 )
2381 {
2382 params.pakObjCmdStreamOut = true;
2383 }
2384
2385 if (!MEDIA_IS_WA(m_osInterface->pfnGetWaTable(m_osInterface), WaEnableOnlyASteppingFeatures))
2386 {
2387 params.VdencPipeModeSelectPar0 = 1;
2388 }
2389
2390 MHW_VDBOX_HCP_MULTI_ENGINE_MODE multiEngineMode;
2391 if (m_pipeline->GetPipeNum() > 1)
2392 {
2393 // Running in the multiple VDBOX mode
2394 if (m_pipeline->IsFirstPipe())
2395 {
2396 multiEngineMode = MHW_VDBOX_HCP_MULTI_ENGINE_MODE_LEFT;
2397 }
2398 else if (m_pipeline->IsLastPipe())
2399 {
2400 multiEngineMode = MHW_VDBOX_HCP_MULTI_ENGINE_MODE_RIGHT;
2401 }
2402 else
2403 {
2404 multiEngineMode = MHW_VDBOX_HCP_MULTI_ENGINE_MODE_MIDDLE;
2405 }
2406 }
2407 else
2408 {
2409 multiEngineMode = MHW_VDBOX_HCP_MULTI_ENGINE_MODE_FE_LEGACY;
2410 }
2411
2412 // Enable RGB encoding
2413 params.rgbEncodingMode = false;
2414 params.scalabilityMode = !(multiEngineMode == MHW_VDBOX_HCP_MULTI_ENGINE_MODE_FE_LEGACY);
2415
2416 auto waTable = m_osInterface->pfnGetWaTable(m_osInterface);
2417 ENCODE_CHK_NULL_RETURN(waTable);
2418
2419 if (MEDIA_IS_WA(waTable, Wa_22011549751) &&
2420 !m_osInterface->bSimIsActive &&
2421 !m_basicFeature->m_hevcPicParams->pps_curr_pic_ref_enabled_flag)
2422 {
2423 params.hmeRegionPrefetch = m_basicFeature->m_hevcPicParams->CodingType != I_TYPE;
2424 }
2425
2426 if (MEDIA_IS_WA(waTable, Wa_14012254246))
2427 {
2428 params.hmeRegionPrefetch = 0;
2429 params.leftPrefetchAtWrapAround = 0;
2430 }
2431
2432 return MOS_STATUS_SUCCESS;
2433 }
2434
MHW_SETPAR_DECL_SRC(VDENC_PIPE_BUF_ADDR_STATE,HevcVdencPkt)2435 MHW_SETPAR_DECL_SRC(VDENC_PIPE_BUF_ADDR_STATE, HevcVdencPkt)
2436 {
2437 params.intraRowStoreScratchBuffer = m_vdencIntraRowStoreScratch;
2438 params.tileRowStoreBuffer = m_vdencTileRowStoreBuffer;
2439 params.cumulativeCuCountStreamOutBuffer = m_resCumulativeCuCountStreamoutBuffer;
2440
2441 return MOS_STATUS_SUCCESS;
2442 }
2443
MHW_SETPAR_DECL_SRC(VD_PIPELINE_FLUSH,HevcVdencPkt)2444 MHW_SETPAR_DECL_SRC(VD_PIPELINE_FLUSH, HevcVdencPkt)
2445 {
2446 switch (m_flushCmd)
2447 {
2448 case waitHevc:
2449 params.waitDoneHEVC = true;
2450 params.flushHEVC = true;
2451 params.waitDoneVDCmdMsgParser = true;
2452 break;
2453 case waitVdenc:
2454 params.waitDoneMFX = true;
2455 params.waitDoneVDENC = true;
2456 params.flushVDENC = true;
2457 params.waitDoneVDCmdMsgParser = true;
2458 break;
2459 case waitHevcVdenc:
2460 params.waitDoneMFX = true;
2461 params.waitDoneVDENC = true;
2462 params.flushVDENC = true;
2463 params.flushHEVC = true;
2464 params.waitDoneVDCmdMsgParser = true;
2465 break;
2466 }
2467
2468 return MOS_STATUS_SUCCESS;
2469 }
2470
MHW_SETPAR_DECL_SRC(HCP_SURFACE_STATE,HevcVdencPkt)2471 MHW_SETPAR_DECL_SRC(HCP_SURFACE_STATE, HevcVdencPkt)
2472 {
2473 params.surfaceStateId = m_curHcpSurfStateId;
2474
2475 return MOS_STATUS_SUCCESS;
2476 }
2477
MHW_SETPAR_DECL_SRC(VDENC_HEVC_VP9_TILE_SLICE_STATE,HevcVdencPkt)2478 MHW_SETPAR_DECL_SRC(VDENC_HEVC_VP9_TILE_SLICE_STATE, HevcVdencPkt)
2479 {
2480 uint32_t dwNumberOfPipes = 0;
2481 switch (m_pipeline->GetPipeNum())
2482 {
2483 case 0:
2484 case 1:
2485 dwNumberOfPipes = VDENC_PIPE_SINGLE_PIPE;
2486 break;
2487 case 2:
2488 dwNumberOfPipes = VDENC_PIPE_TWO_PIPE;
2489 break;
2490 case 4:
2491 dwNumberOfPipes = VDENC_PIPE_FOUR_PIPE;
2492 break;
2493 default:
2494 dwNumberOfPipes = VDENC_PIPE_INVALID;
2495 ENCODE_ASSERT(false);
2496 break;
2497 }
2498
2499 params.numPipe = dwNumberOfPipes;
2500
2501 return MOS_STATUS_SUCCESS;
2502 }
2503
PrepareHWMetaData(MOS_COMMAND_BUFFER * cmdBuffer)2504 MOS_STATUS HevcVdencPkt::PrepareHWMetaData(MOS_COMMAND_BUFFER *cmdBuffer)
2505 {
2506 ENCODE_FUNC_CALL();
2507 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2508
2509 ENCODE_CHK_NULL_RETURN(m_basicFeature);
2510 if (!m_basicFeature->m_resMetadataBuffer)
2511 {
2512 return MOS_STATUS_SUCCESS;
2513 }
2514
2515 // Intra/Inter/Skip CU Cnt
2516 auto xCalAtomic = [&](PMOS_RESOURCE presDst, uint32_t dstOffset, PMOS_RESOURCE presSrc, uint32_t srcOffset, mhw::mi::MHW_COMMON_MI_ATOMIC_OPCODE opCode) {
2517 auto mmioRegisters = m_hwInterface->GetVdencInterfaceNext()->GetMmioRegisters(m_vdboxIndex);
2518 auto &miLoadRegMemParams = m_miItf->MHW_GETPAR_F(MI_LOAD_REGISTER_MEM)();
2519 auto &flushDwParams = m_miItf->MHW_GETPAR_F(MI_FLUSH_DW)();
2520 auto &atomicParams = m_miItf->MHW_GETPAR_F(MI_ATOMIC)();
2521
2522 miLoadRegMemParams = {};
2523 flushDwParams = {};
2524 atomicParams = {};
2525
2526 miLoadRegMemParams.presStoreBuffer = presSrc;
2527 miLoadRegMemParams.dwOffset = srcOffset;
2528 miLoadRegMemParams.dwRegister = mmioRegisters->generalPurposeRegister0LoOffset;
2529 ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_LOAD_REGISTER_MEM)(cmdBuffer));
2530
2531 ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_FLUSH_DW)(cmdBuffer));
2532
2533 atomicParams.pOsResource = presDst;
2534 atomicParams.dwResourceOffset = dstOffset;
2535 atomicParams.dwDataSize = sizeof(uint32_t);
2536 atomicParams.Operation = opCode;
2537 ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_ATOMIC)(cmdBuffer));
2538
2539 return MOS_STATUS_SUCCESS;
2540 };
2541
2542 MetaDataOffset resourceOffset = m_basicFeature->m_metaDataOffset;
2543 PMOS_RESOURCE resLcuBaseAddressBuffer = m_basicFeature->m_recycleBuf->GetBuffer(LcuBaseAddressBuffer, 0);
2544 ENCODE_CHK_NULL_RETURN(resLcuBaseAddressBuffer);
2545
2546 auto &storeDataParams = m_miItf->MHW_GETPAR_F(MI_STORE_DATA_IMM)();
2547 storeDataParams = {};
2548 storeDataParams.pOsResource = m_basicFeature->m_resMetadataBuffer;
2549 storeDataParams.dwResourceOffset = resourceOffset.dwEncodeErrorFlags;
2550 storeDataParams.dwValue = 0;
2551 ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_STORE_DATA_IMM)(cmdBuffer));
2552
2553 storeDataParams.dwResourceOffset = resourceOffset.dwWrittenSubregionsCount;
2554 storeDataParams.dwValue = m_basicFeature->m_numSlices;
2555 ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_STORE_DATA_IMM)(cmdBuffer));
2556
2557 auto &miCpyMemMemParams = m_miItf->MHW_GETPAR_F(MI_COPY_MEM_MEM)();
2558 miCpyMemMemParams = {};
2559 miCpyMemMemParams.presSrc = resLcuBaseAddressBuffer;
2560 miCpyMemMemParams.presDst = m_basicFeature->m_resMetadataBuffer;
2561
2562 for (uint16_t slcCount = 0; slcCount < m_basicFeature->m_numSlices; slcCount++)
2563 {
2564 uint32_t subRegionStartOffset = resourceOffset.dwMetaDataSize + slcCount * resourceOffset.dwMetaDataSubRegionSize;
2565
2566 storeDataParams.dwResourceOffset = subRegionStartOffset + resourceOffset.dwbStartOffset;
2567 storeDataParams.dwValue = 0;
2568 ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_STORE_DATA_IMM)(cmdBuffer));
2569
2570 storeDataParams.dwResourceOffset = subRegionStartOffset + resourceOffset.dwbHeaderSize;
2571 storeDataParams.dwValue = m_basicFeature->m_slcData[slcCount].BitSize;
2572 ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_STORE_DATA_IMM)(cmdBuffer));
2573
2574 miCpyMemMemParams.presSrc = resLcuBaseAddressBuffer;
2575 miCpyMemMemParams.presDst = m_basicFeature->m_resMetadataBuffer;
2576 miCpyMemMemParams.dwSrcOffset = slcCount * 16 * sizeof(uint32_t); //slice size offset in resLcuBaseAddressBuffer is 16DW
2577 miCpyMemMemParams.dwDstOffset = subRegionStartOffset + resourceOffset.dwbSize;
2578 ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_COPY_MEM_MEM)(cmdBuffer));
2579 if (slcCount)
2580 {
2581 ENCODE_CHK_STATUS_RETURN(xCalAtomic(
2582 m_basicFeature->m_resMetadataBuffer,
2583 subRegionStartOffset + resourceOffset.dwbSize,
2584 resLcuBaseAddressBuffer,
2585 (slcCount - 1) * 16 * sizeof(uint32_t),
2586 mhw::mi::MHW_MI_ATOMIC_SUB));
2587 }
2588 }
2589
2590 auto mmioRegisters = m_hcpItf->GetMmioRegisters(m_vdboxIndex);
2591 auto &storeRegMemParams = m_miItf->MHW_GETPAR_F(MI_STORE_REGISTER_MEM)();
2592 storeRegMemParams = {};
2593 storeRegMemParams.presStoreBuffer = m_basicFeature->m_resMetadataBuffer;
2594 storeRegMemParams.dwOffset = resourceOffset.dwEncodedBitstreamWrittenBytesCount;
2595 storeRegMemParams.dwRegister = mmioRegisters->hcpEncBitstreamBytecountFrameRegOffset;
2596 ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_STORE_REGISTER_MEM)(cmdBuffer));
2597
2598 // Statistics
2599 // Average QP
2600 if (m_hevcSeqParams->RateControlMethod == RATECONTROL_CQP)
2601 {
2602 storeDataParams.dwResourceOffset = resourceOffset.dwEncodeStats + resourceOffset.dwAverageQP;
2603 storeDataParams.dwValue = m_hevcPicParams->QpY + m_hevcSliceParams->slice_qp_delta;
2604 ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_STORE_DATA_IMM)(cmdBuffer));
2605 }
2606 else
2607 {
2608 auto brcFeature = dynamic_cast<HEVCEncodeBRC *>(m_featureManager->GetFeature(HevcFeatureIDs::hevcBrcFeature));
2609 ENCODE_CHK_NULL_RETURN(brcFeature);
2610
2611 miCpyMemMemParams.presSrc = brcFeature->GetHevcVdenc2ndLevelBatchBuffer(m_pipeline->m_currRecycledBufIdx);
2612 miCpyMemMemParams.dwSrcOffset = 0x6F * sizeof(uint32_t);
2613 miCpyMemMemParams.presDst = m_basicFeature->m_resMetadataBuffer;
2614 miCpyMemMemParams.dwDstOffset = resourceOffset.dwEncodeStats + resourceOffset.dwAverageQP;
2615 ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_COPY_MEM_MEM)(cmdBuffer));
2616
2617 auto &atomicParams = m_miItf->MHW_GETPAR_F(MI_ATOMIC)();
2618 atomicParams = {};
2619 atomicParams.pOsResource = m_basicFeature->m_resMetadataBuffer;
2620 atomicParams.dwResourceOffset = resourceOffset.dwEncodeStats + resourceOffset.dwAverageQP;
2621 atomicParams.dwDataSize = sizeof(uint32_t);
2622 atomicParams.Operation = mhw::mi::MHW_MI_ATOMIC_AND;
2623 atomicParams.bInlineData = true;
2624 atomicParams.dwOperand1Data[0] = 0xFF;
2625 ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_ATOMIC)(cmdBuffer));
2626 }
2627
2628 PMOS_RESOURCE resFrameStatStreamOutBuffer = m_basicFeature->m_recycleBuf->GetBuffer(FrameStatStreamOutBuffer, 0);
2629 ENCODE_CHK_NULL_RETURN(resFrameStatStreamOutBuffer);
2630
2631 // LCUSkipIn8x8Unit
2632 miCpyMemMemParams.presSrc = resFrameStatStreamOutBuffer;
2633 miCpyMemMemParams.dwSrcOffset = 7 * sizeof(uint32_t);
2634 miCpyMemMemParams.presDst = m_basicFeature->m_resMetadataBuffer;
2635 miCpyMemMemParams.dwDstOffset = resourceOffset.dwEncodeStats + resourceOffset.dwSkipCodingUnitsCount;
2636 ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_COPY_MEM_MEM)(cmdBuffer));
2637 ENCODE_CHK_STATUS_RETURN(xCalAtomic(m_basicFeature->m_resMetadataBuffer, resourceOffset.dwEncodeStats + resourceOffset.dwSkipCodingUnitsCount, resFrameStatStreamOutBuffer, 7 * sizeof(uint32_t), mhw::mi::MHW_MI_ATOMIC_ADD));
2638 ENCODE_CHK_STATUS_RETURN(xCalAtomic(m_basicFeature->m_resMetadataBuffer, resourceOffset.dwEncodeStats + resourceOffset.dwSkipCodingUnitsCount, resFrameStatStreamOutBuffer, 7 * sizeof(uint32_t), mhw::mi::MHW_MI_ATOMIC_ADD));
2639 ENCODE_CHK_STATUS_RETURN(xCalAtomic(m_basicFeature->m_resMetadataBuffer, resourceOffset.dwEncodeStats + resourceOffset.dwSkipCodingUnitsCount, resFrameStatStreamOutBuffer, 7 * sizeof(uint32_t), mhw::mi::MHW_MI_ATOMIC_ADD));
2640
2641 // NumCU_IntraDC, NumCU_IntraPlanar, NumCU_IntraAngular
2642 miCpyMemMemParams.presSrc = resFrameStatStreamOutBuffer;
2643 miCpyMemMemParams.dwSrcOffset = 20 * sizeof(uint32_t);
2644 miCpyMemMemParams.dwDstOffset = resourceOffset.dwEncodeStats + resourceOffset.dwIntraCodingUnitsCount;
2645 ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_COPY_MEM_MEM)(cmdBuffer));
2646 ENCODE_CHK_STATUS_RETURN(xCalAtomic(m_basicFeature->m_resMetadataBuffer, resourceOffset.dwEncodeStats + resourceOffset.dwIntraCodingUnitsCount, resFrameStatStreamOutBuffer, 21 * sizeof(uint32_t), mhw::mi::MHW_MI_ATOMIC_ADD));
2647 ENCODE_CHK_STATUS_RETURN(xCalAtomic(m_basicFeature->m_resMetadataBuffer, resourceOffset.dwEncodeStats + resourceOffset.dwIntraCodingUnitsCount, resFrameStatStreamOutBuffer, 22 * sizeof(uint32_t), mhw::mi::MHW_MI_ATOMIC_ADD));
2648
2649 //NumCU_Merge (LCUSkipIn8x8Unit), NumCU_MVdirL0, NumCU_MVdirL1, NumCU_MVdirBi
2650 miCpyMemMemParams.presSrc = resFrameStatStreamOutBuffer;
2651 miCpyMemMemParams.dwSrcOffset = 27 * sizeof(uint32_t);
2652 miCpyMemMemParams.dwDstOffset = resourceOffset.dwEncodeStats + resourceOffset.dwInterCodingUnitsCount;
2653 ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_COPY_MEM_MEM)(cmdBuffer));
2654 ENCODE_CHK_STATUS_RETURN(xCalAtomic(m_basicFeature->m_resMetadataBuffer, resourceOffset.dwEncodeStats + resourceOffset.dwInterCodingUnitsCount, resFrameStatStreamOutBuffer, 28 * sizeof(uint32_t), mhw::mi::MHW_MI_ATOMIC_ADD));
2655 ENCODE_CHK_STATUS_RETURN(xCalAtomic(m_basicFeature->m_resMetadataBuffer, resourceOffset.dwEncodeStats + resourceOffset.dwInterCodingUnitsCount, resFrameStatStreamOutBuffer, 29 * sizeof(uint32_t), mhw::mi::MHW_MI_ATOMIC_ADD));
2656 ENCODE_CHK_STATUS_RETURN(xCalAtomic(m_basicFeature->m_resMetadataBuffer, resourceOffset.dwEncodeStats + resourceOffset.dwInterCodingUnitsCount, resFrameStatStreamOutBuffer, 30 * sizeof(uint32_t), mhw::mi::MHW_MI_ATOMIC_ADD));
2657 ENCODE_CHK_STATUS_RETURN(xCalAtomic(m_basicFeature->m_resMetadataBuffer, resourceOffset.dwEncodeStats + resourceOffset.dwInterCodingUnitsCount, m_basicFeature->m_resMetadataBuffer, resourceOffset.dwEncodeStats + resourceOffset.dwSkipCodingUnitsCount, mhw::mi::MHW_MI_ATOMIC_SUB));
2658
2659 // Average MV_X/MV_Y, report (0,0) as temp solution, later may need kernel involved
2660 storeDataParams.dwResourceOffset = resourceOffset.dwEncodeStats + resourceOffset.dwAverageMotionEstimationXDirection;
2661 storeDataParams.dwValue = 0;
2662 ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_STORE_DATA_IMM)(cmdBuffer));
2663
2664 storeDataParams.dwResourceOffset = resourceOffset.dwEncodeStats + resourceOffset.dwAverageMotionEstimationYDirection;
2665 storeDataParams.dwValue = 0;
2666 ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_STORE_DATA_IMM)(cmdBuffer));
2667
2668 return eStatus;
2669 }
2670
2671 #if USE_CODECHAL_DEBUG_TOOL
DumpInput()2672 MOS_STATUS HevcVdencPkt::DumpInput()
2673 {
2674 ENCODE_FUNC_CALL();
2675 ENCODE_CHK_NULL_RETURN(m_pipeline);
2676 ENCODE_CHK_NULL_RETURN(m_basicFeature);
2677
2678 CodechalDebugInterface *debugInterface = m_pipeline->GetDebugInterface();
2679 ENCODE_CHK_NULL_RETURN(debugInterface);
2680
2681 debugInterface->m_DumpInputNum = m_basicFeature->m_frameNum - 1;
2682
2683 ENCODE_CHK_NULL_RETURN(m_basicFeature->m_ref.GetCurrRefList());
2684 CODEC_REF_LIST currRefList = *((CODEC_REF_LIST *)m_basicFeature->m_ref.GetCurrRefList());
2685
2686 ENCODE_CHK_STATUS_RETURN(debugInterface->DumpYUVSurface(
2687 &currRefList.sRefRawBuffer,
2688 CodechalDbgAttr::attrEncodeRawInputSurface,
2689 "SrcSurf"))
2690 return MOS_STATUS_SUCCESS;
2691 }
2692
DumpResources(EncodeStatusMfx * encodeStatusMfx,EncodeStatusReportData * statusReportData)2693 MOS_STATUS HevcVdencPkt::DumpResources(
2694 EncodeStatusMfx * encodeStatusMfx,
2695 EncodeStatusReportData *statusReportData)
2696 {
2697 ENCODE_FUNC_CALL();
2698 ENCODE_CHK_NULL_RETURN(encodeStatusMfx);
2699 ENCODE_CHK_NULL_RETURN(statusReportData);
2700 ENCODE_CHK_NULL_RETURN(m_pipeline);
2701 ENCODE_CHK_NULL_RETURN(m_statusReport);
2702 ENCODE_CHK_NULL_RETURN(m_basicFeature);
2703 ENCODE_CHK_NULL_RETURN(m_basicFeature->m_trackedBuf);
2704
2705 CodechalDebugInterface *debugInterface = m_pipeline->GetStatusReportDebugInterface();
2706 ENCODE_CHK_NULL_RETURN(debugInterface);
2707
2708 CODEC_REF_LIST currRefList = *((CODEC_REF_LIST *)statusReportData->currRefList);
2709 currRefList.RefPic = statusReportData->currOriginalPic;
2710
2711 debugInterface->m_currPic = statusReportData->currOriginalPic;
2712 debugInterface->m_bufferDumpFrameNum = m_basicFeature->m_frameNum - 1;
2713 debugInterface->m_frameType = encodeStatusMfx->pictureCodingType;
2714
2715 ENCODE_CHK_STATUS_RETURN(debugInterface->DumpBuffer(
2716 &currRefList.resBitstreamBuffer,
2717 CodechalDbgAttr::attrBitstream,
2718 "_PAK",
2719 statusReportData->bitstreamSize,
2720 0,
2721 CODECHAL_NUM_MEDIA_STATES));
2722
2723 ENCODE_CHK_STATUS_RETURN(debugInterface->DumpData(
2724 statusReportData,
2725 sizeof(EncodeStatusReportData),
2726 CodechalDbgAttr::attrStatusReport,
2727 "EncodeStatusReport_Buffer"));
2728
2729 PMOS_RESOURCE frameStatStreamOutBuffer = m_basicFeature->m_recycleBuf->GetBuffer(FrameStatStreamOutBuffer, 0);
2730 ENCODE_CHK_NULL_RETURN(frameStatStreamOutBuffer);
2731 ENCODE_CHK_STATUS_RETURN(debugInterface->DumpBuffer(
2732 frameStatStreamOutBuffer,
2733 CodechalDbgAttr::attrFrameState,
2734 "FrameStatus",
2735 frameStatStreamOutBuffer->iSize,
2736 0,
2737 CODECHAL_NUM_MEDIA_STATES));
2738
2739 MOS_SURFACE *ds4xSurface = m_basicFeature->m_trackedBuf->GetSurface(
2740 BufferType::ds4xSurface, currRefList.ucScalingIdx);
2741
2742 if (ds4xSurface != nullptr)
2743 {
2744 ENCODE_CHK_STATUS_RETURN(debugInterface->DumpYUVSurface(
2745 ds4xSurface,
2746 CodechalDbgAttr::attrReconstructedSurface,
2747 "4xScaledSurf"))
2748 }
2749
2750 MOS_SURFACE *ds8xSurface = m_basicFeature->m_trackedBuf->GetSurface(
2751 BufferType::ds8xSurface, currRefList.ucScalingIdx);
2752
2753 if (ds8xSurface != nullptr)
2754 {
2755 ENCODE_CHK_STATUS_RETURN(debugInterface->DumpYUVSurface(
2756 ds8xSurface,
2757 CodechalDbgAttr::attrReconstructedSurface,
2758 "8xScaledSurf"))
2759 }
2760
2761 MOS_RESOURCE *mbCodedBuffer = m_basicFeature->m_trackedBuf->GetBuffer(
2762 BufferType::mbCodedBuffer, currRefList.ucScalingIdx);
2763 if (mbCodedBuffer != nullptr)
2764 {
2765 ENCODE_CHK_STATUS_RETURN(debugInterface->DumpBuffer(
2766 mbCodedBuffer,
2767 CodechalDbgAttr::attrVdencOutput,
2768 "_MbCode",
2769 m_basicFeature->m_mbCodeSize,
2770 0,
2771 CODECHAL_NUM_MEDIA_STATES));
2772 }
2773
2774 // Slice Size Conformance
2775 if (m_hevcSeqParams->SliceSizeControl)
2776 {
2777 uint32_t dwSize = CODECHAL_HEVC_MAX_NUM_SLICES_LVL_6 * CODECHAL_CACHELINE_SIZE;
2778 if (!m_hevcPicParams->tiles_enabled_flag || m_pipeline->GetPipeNum() <= 1)
2779 {
2780 // Slice Size StreamOut Surface
2781 ENCODE_CHK_STATUS_RETURN(debugInterface->DumpBuffer(
2782 m_basicFeature->m_recycleBuf->GetBuffer(LcuBaseAddressBuffer, 0),
2783 CodechalDbgAttr::attrVdencOutput,
2784 "_SliceSize",
2785 dwSize,
2786 0,
2787 CODECHAL_NUM_MEDIA_STATES));
2788 }
2789
2790 dwSize = MOS_ALIGN_CEIL(4, CODECHAL_CACHELINE_SIZE);
2791 auto dssFeature = dynamic_cast<HevcEncodeDss *>(m_featureManager->GetFeature(HevcFeatureIDs::hevcVdencDssFeature));
2792 ENCODE_CHK_NULL_RETURN(dssFeature);
2793 PMOS_RESOURCE resSliceCountBuffer = nullptr;
2794 PMOS_RESOURCE resVDEncModeTimerBuffer = nullptr;
2795 ENCODE_CHK_STATUS_RETURN(dssFeature->GetDssBuffer(resSliceCountBuffer, resVDEncModeTimerBuffer));
2796 // Slice Count buffer 1 DW = 4 Bytes
2797 ENCODE_CHK_STATUS_RETURN(debugInterface->DumpBuffer(
2798 resSliceCountBuffer,
2799 CodechalDbgAttr::attrVdencOutput,
2800 "_SliceCount",
2801 dwSize,
2802 0,
2803 CODECHAL_NUM_MEDIA_STATES));
2804
2805 // VDEncMode Timer buffer 1 DW = 4 Bytes
2806 ENCODE_CHK_STATUS_RETURN(debugInterface->DumpBuffer(
2807 resVDEncModeTimerBuffer,
2808 CodechalDbgAttr::attrVdencOutput,
2809 "_ModeTimer",
2810 dwSize,
2811 0,
2812 CODECHAL_NUM_MEDIA_STATES));
2813 }
2814
2815 auto streamInBufferSize = (MOS_ALIGN_CEIL(m_basicFeature->m_frameWidth, 64) / 32) * (MOS_ALIGN_CEIL(m_basicFeature->m_frameHeight, 64) / 32) * CODECHAL_CACHELINE_SIZE;
2816 PMOS_RESOURCE streamInbuffer = m_basicFeature->m_recycleBuf->GetBuffer(RecycleResId::StreamInBuffer, debugInterface->m_bufferDumpFrameNum);
2817 if (streamInbuffer)
2818 {
2819 ENCODE_CHK_STATUS_RETURN(debugInterface->DumpBuffer(
2820 streamInbuffer,
2821 CodechalDbgAttr::attrStreamIn,
2822 "_ROIStreamin",
2823 streamInBufferSize,
2824 0,
2825 CODECHAL_NUM_MEDIA_STATES))
2826 }
2827
2828 ENCODE_CHK_STATUS_RETURN(debugInterface->DumpBltOutput(
2829 &currRefList.sRefReconBuffer,
2830 CodechalDbgAttr::attrDecodeBltOutput));
2831 ENCODE_CHK_STATUS_RETURN(debugInterface->DumpYUVSurface(
2832 &currRefList.sRefReconBuffer,
2833 CodechalDbgAttr::attrReconstructedSurface,
2834 "ReconSurf"))
2835
2836 ENCODE_CHK_STATUS_RETURN(debugInterface->DumpBltOutput(
2837 &currRefList.sRefRawBuffer,
2838 CodechalDbgAttr::attrDecodeBltOutput));
2839
2840 return MOS_STATUS_SUCCESS;
2841 }
2842
2843 #endif
2844
MHW_SETPAR_DECL_SRC(HCP_PIPE_MODE_SELECT,HevcVdencPkt)2845 MHW_SETPAR_DECL_SRC(HCP_PIPE_MODE_SELECT, HevcVdencPkt)
2846 {
2847 params.codecStandardSelect = CodecHal_GetStandardFromMode(m_basicFeature->m_mode) - CODECHAL_HCP_BASE;
2848 params.bStreamOutEnabled = true;
2849 params.bVdencEnabled = true;
2850 params.codecSelect = 1;
2851
2852 if (m_pipeline->GetPipeNum() > 1)
2853 {
2854 // Running in the multiple VDBOX mode
2855 if (m_pipeline->IsFirstPipe())
2856 {
2857 params.multiEngineMode = MHW_VDBOX_HCP_MULTI_ENGINE_MODE_LEFT;
2858 }
2859 else if (m_pipeline->IsLastPipe())
2860 {
2861 params.multiEngineMode = MHW_VDBOX_HCP_MULTI_ENGINE_MODE_RIGHT;
2862 }
2863 else
2864 {
2865 params.multiEngineMode = MHW_VDBOX_HCP_MULTI_ENGINE_MODE_MIDDLE;
2866 }
2867 params.pipeWorkMode = MHW_VDBOX_HCP_PIPE_WORK_MODE_CODEC_BE;
2868 }
2869 else
2870 {
2871 params.multiEngineMode = MHW_VDBOX_HCP_MULTI_ENGINE_MODE_FE_LEGACY;
2872 params.pipeWorkMode = MHW_VDBOX_HCP_PIPE_WORK_MODE_LEGACY;
2873 }
2874
2875 if (m_hevcPicParams->tiles_enabled_flag)
2876 {
2877 RUN_FEATURE_INTERFACE_RETURN(HevcEncodeTile, HevcFeatureIDs::encodeTile, IsTileReplayEnabled, params.bTileBasedReplayMode);
2878 }
2879 else
2880 {
2881 params.bTileBasedReplayMode = 0;
2882 }
2883
2884 auto cpInterface = m_hwInterface->GetCpInterface();
2885 bool twoPassScalable = params.multiEngineMode != MHW_VDBOX_HCP_MULTI_ENGINE_MODE_FE_LEGACY && !params.bTileBasedReplayMode;
2886
2887 ENCODE_CHK_NULL_RETURN(cpInterface);
2888 params.setProtectionSettings = [=](uint32_t *data) { return cpInterface->SetProtectionSettingsForHcpPipeModeSelect(data, twoPassScalable); };
2889
2890 auto waTable = m_osInterface->pfnGetWaTable(m_osInterface);
2891 ENCODE_CHK_NULL_RETURN(waTable);
2892
2893 if(MEDIA_IS_WA(waTable, Wa_14012254246))
2894 {
2895 MediaUserSetting::Value outValue;
2896 ReadUserSetting(
2897 m_userSettingPtr,
2898 outValue,
2899 "DisableTlbPrefetch",
2900 MediaUserSetting::Group::Sequence);
2901 params.prefetchDisable = outValue.Get<bool>();
2902 }
2903
2904 return MOS_STATUS_SUCCESS;
2905 }
2906
MHW_SETPAR_DECL_SRC(HCP_TILE_CODING,HevcVdencPkt)2907 MHW_SETPAR_DECL_SRC(HCP_TILE_CODING, HevcVdencPkt)
2908 {
2909 ENCODE_FUNC_CALL();
2910 params.numberOfActiveBePipes = m_pipeline->GetPipeNum();
2911 return MOS_STATUS_SUCCESS;
2912 }
2913
AddAllCmds_HCP_PAK_INSERT_OBJECT(PMOS_COMMAND_BUFFER cmdBuffer) const2914 MOS_STATUS HevcVdencPkt::AddAllCmds_HCP_PAK_INSERT_OBJECT(PMOS_COMMAND_BUFFER cmdBuffer) const
2915 {
2916 ENCODE_FUNC_CALL();
2917
2918 ENCODE_CHK_NULL_RETURN(cmdBuffer);
2919
2920 bool bLastPicInSeq = m_basicFeature->m_lastPicInSeq;
2921 bool bLastPicInStream = m_basicFeature->m_lastPicInStream;
2922 auto ¶ms = m_hcpItf->MHW_GETPAR_F(HCP_PAK_INSERT_OBJECT)();
2923 params = {};
2924
2925 if (bLastPicInSeq && bLastPicInStream)
2926 {
2927 params = {};
2928
2929 uint32_t dwPadding[3];
2930
2931 params.dwPadding = sizeof(dwPadding) / sizeof(dwPadding[0]);
2932 params.bHeaderLengthExcludeFrmSize = 0;
2933 params.bEndOfSlice = 1;
2934 params.bLastHeader = 1;
2935 params.bEmulationByteBitsInsert = 0;
2936 params.uiSkipEmulationCheckCount = 0;
2937 params.dataBitsInLastDw = 16;
2938 params.databyteoffset = 0;
2939 params.bIndirectPayloadEnable = 0;
2940
2941 m_hcpItf->MHW_ADDCMD_F(HCP_PAK_INSERT_OBJECT)(cmdBuffer);
2942
2943 dwPadding[0] = (uint32_t)((1 << 16) | ((HEVC_NAL_UT_EOS << 1) << 24));
2944 dwPadding[1] = (1L | (1L << 24));
2945 dwPadding[2] = (HEVC_NAL_UT_EOB << 1) | (1L << 8);
2946 MHW_MI_CHK_STATUS(Mhw_AddCommandCmdOrBB(m_osInterface, cmdBuffer, nullptr, &dwPadding[0], sizeof(dwPadding)));
2947 }
2948 else if (bLastPicInSeq || bLastPicInStream)
2949 {
2950 params = {};
2951 uint32_t dwLastPicInSeqData[2], dwLastPicInStreamData[2];
2952
2953 params.dwPadding = bLastPicInSeq * 2 + bLastPicInStream * 2;
2954 params.bHeaderLengthExcludeFrmSize = 0;
2955 params.bEndOfSlice = 1;
2956 params.bLastHeader = 1;
2957 params.bEmulationByteBitsInsert = 0;
2958 params.uiSkipEmulationCheckCount = 0;
2959 params.dataBitsInLastDw = 8;
2960 params.databyteoffset = 0;
2961 params.bIndirectPayloadEnable = 0;
2962
2963 m_hcpItf->MHW_ADDCMD_F(HCP_PAK_INSERT_OBJECT)(cmdBuffer);
2964
2965 if (bLastPicInSeq)
2966 {
2967 dwLastPicInSeqData[0] = (uint32_t)((1 << 16) | ((HEVC_NAL_UT_EOS << 1) << 24));
2968 dwLastPicInSeqData[1] = 1; // nuh_temporal_id_plus1
2969 MHW_MI_CHK_STATUS(Mhw_AddCommandCmdOrBB(m_osInterface, cmdBuffer, nullptr, &dwLastPicInSeqData[0], sizeof(dwLastPicInSeqData)));
2970 }
2971
2972 if (bLastPicInStream)
2973 {
2974 dwLastPicInStreamData[0] = (uint32_t)((1 << 16) | ((HEVC_NAL_UT_EOB << 1) << 24));
2975 dwLastPicInStreamData[1] = 1; // nuh_temporal_id_plus1
2976 MHW_MI_CHK_STATUS(Mhw_AddCommandCmdOrBB(m_osInterface, cmdBuffer, nullptr, &dwLastPicInStreamData[0], sizeof(dwLastPicInStreamData)));
2977 }
2978 }
2979 else
2980 {
2981 PCODECHAL_NAL_UNIT_PARAMS *ppNalUnitParams = (CODECHAL_NAL_UNIT_PARAMS **)m_nalUnitParams;
2982
2983 auto brcFeature = dynamic_cast<HEVCEncodeBRC *>(m_featureManager->GetFeature(HevcFeatureIDs::hevcBrcFeature));
2984 ENCODE_CHK_NULL_RETURN(brcFeature);
2985
2986 PMHW_BATCH_BUFFER batchBuffer = brcFeature->GetVdenc2ndLevelBatchBuffer(m_pipeline->m_currRecycledBufIdx);
2987 PBSBuffer pBsBuffer = &(m_basicFeature->m_bsBuffer);
2988 uint32_t bitSize = 0;
2989 uint32_t offSet = 0;
2990
2991 //insert AU, SPS, PSP headers before first slice header
2992 if (m_basicFeature->m_curNumSlices == 0)
2993 {
2994 uint32_t maxBytesInPakInsertObjCmd = ((2 << 11) - 1) * 4; // 12 bits for Length field in PAK_INSERT_OBJ cmd
2995
2996 for (auto i = 0; i < HEVC_MAX_NAL_UNIT_TYPE; i++)
2997 {
2998 uint32_t nalunitPosiSize = ppNalUnitParams[i]->uiSize;
2999 uint32_t nalunitPosiOffset = ppNalUnitParams[i]->uiOffset;
3000
3001 while (nalunitPosiSize > 0)
3002 {
3003 bitSize = MOS_MIN(maxBytesInPakInsertObjCmd * 8, nalunitPosiSize * 8);
3004 offSet = nalunitPosiOffset;
3005
3006 params = {};
3007
3008 params.dwPadding = (MOS_ALIGN_CEIL((bitSize + 7) >> 3, sizeof(uint32_t))) / sizeof(uint32_t);
3009 params.bEmulationByteBitsInsert = ppNalUnitParams[i]->bInsertEmulationBytes;
3010 params.uiSkipEmulationCheckCount = ppNalUnitParams[i]->uiSkipEmulationCheckCount;
3011 params.dataBitsInLastDw = bitSize % 32;
3012 if (params.dataBitsInLastDw == 0)
3013 {
3014 params.dataBitsInLastDw = 32;
3015 }
3016
3017 if (nalunitPosiSize > maxBytesInPakInsertObjCmd)
3018 {
3019 nalunitPosiSize -= maxBytesInPakInsertObjCmd;
3020 nalunitPosiOffset += maxBytesInPakInsertObjCmd;
3021 }
3022 else
3023 {
3024 nalunitPosiSize = 0;
3025 }
3026 m_hcpItf->MHW_ADDCMD_F(HCP_PAK_INSERT_OBJECT)(cmdBuffer);
3027 uint32_t byteSize = (bitSize + 7) >> 3;
3028 if (byteSize)
3029 {
3030 MHW_MI_CHK_NULL(pBsBuffer);
3031 MHW_MI_CHK_NULL(pBsBuffer->pBase);
3032 uint8_t *data = (uint8_t *)(pBsBuffer->pBase + offSet);
3033 MHW_MI_CHK_STATUS(Mhw_AddCommandCmdOrBB(m_osInterface, cmdBuffer, batchBuffer, data, byteSize));
3034 }
3035 }
3036 }
3037 }
3038
3039 params = {};
3040 // Insert slice header
3041 params.bLastHeader = true;
3042 params.bEmulationByteBitsInsert = true;
3043
3044 // App does the slice header packing, set the skip count passed by the app
3045 PCODEC_ENCODER_SLCDATA slcData = m_basicFeature->m_slcData;
3046 uint32_t currSlcIdx = m_basicFeature->m_curNumSlices;
3047
3048 params.uiSkipEmulationCheckCount = slcData[currSlcIdx].SkipEmulationByteCount;
3049 bitSize = slcData[currSlcIdx].BitSize;
3050 offSet = slcData[currSlcIdx].SliceOffset;
3051
3052 if (m_hevcSeqParams->SliceSizeControl)
3053 {
3054 params.bLastHeader = false;
3055 params.bEmulationByteBitsInsert = false;
3056 bitSize = m_hevcSliceParams->BitLengthSliceHeaderStartingPortion;
3057 params.bResetBitstreamStartingPos = true;
3058 params.dwPadding = (MOS_ALIGN_CEIL((bitSize + 7) >> 3, sizeof(uint32_t))) / sizeof(uint32_t);
3059 params.dataBitsInLastDw = bitSize % 32;
3060 if (params.dataBitsInLastDw == 0)
3061 {
3062 params.dataBitsInLastDw = 32;
3063 }
3064
3065 m_hcpItf->MHW_ADDCMD_F(HCP_PAK_INSERT_OBJECT)(cmdBuffer);
3066 uint32_t byteSize = (bitSize + 7) >> 3;
3067 if (byteSize)
3068 {
3069 MHW_MI_CHK_NULL(pBsBuffer);
3070 MHW_MI_CHK_NULL(pBsBuffer->pBase);
3071 uint8_t *data = (uint8_t *)(pBsBuffer->pBase + offSet);
3072 MHW_MI_CHK_STATUS(Mhw_AddCommandCmdOrBB(m_osInterface, cmdBuffer, batchBuffer, data, byteSize));
3073 }
3074
3075 // Send HCP_PAK_INSERT_OBJ command. For dynamic slice, we are skipping the beginning part of slice header.
3076 params.bLastHeader = true;
3077 bitSize = bitSize - m_hevcSliceParams->BitLengthSliceHeaderStartingPortion;
3078 offSet += ((m_hevcSliceParams->BitLengthSliceHeaderStartingPortion + 7) / 8); // Skips the first 5 bytes which is Start Code + Nal Unit Header
3079 params.dwPadding = (MOS_ALIGN_CEIL((bitSize + 7) >> 3, sizeof(uint32_t))) / sizeof(uint32_t);
3080 params.dataBitsInLastDw = bitSize % 32;
3081 if (params.dataBitsInLastDw == 0)
3082 {
3083 params.dataBitsInLastDw = 32;
3084 }
3085 params.bResetBitstreamStartingPos = true;
3086 m_hcpItf->MHW_ADDCMD_F(HCP_PAK_INSERT_OBJECT)(cmdBuffer);
3087 byteSize = (bitSize + 7) >> 3;
3088 if (byteSize)
3089 {
3090 MHW_MI_CHK_NULL(pBsBuffer);
3091 MHW_MI_CHK_NULL(pBsBuffer->pBase);
3092 uint8_t *data = (uint8_t *)(pBsBuffer->pBase + offSet);
3093 MHW_MI_CHK_STATUS(Mhw_AddCommandCmdOrBB(m_osInterface, cmdBuffer, batchBuffer, data, byteSize));
3094 }
3095 }
3096 else
3097 {
3098 params.dwPadding = (MOS_ALIGN_CEIL((bitSize + 7) >> 3, sizeof(uint32_t))) / sizeof(uint32_t);
3099 params.dataBitsInLastDw = bitSize % 32;
3100 if (params.dataBitsInLastDw == 0)
3101 {
3102 params.dataBitsInLastDw = 32;
3103 }
3104 m_hcpItf->MHW_ADDCMD_F(HCP_PAK_INSERT_OBJECT)(cmdBuffer);
3105 uint32_t byteSize = (bitSize + 7) >> 3;
3106 if (byteSize)
3107 {
3108 MHW_MI_CHK_NULL(pBsBuffer);
3109 MHW_MI_CHK_NULL(pBsBuffer->pBase);
3110 uint8_t *data = (uint8_t *)(pBsBuffer->pBase + offSet);
3111 MHW_MI_CHK_STATUS(Mhw_AddCommandCmdOrBB(m_osInterface, cmdBuffer, batchBuffer, data, byteSize));
3112 }
3113 }
3114 }
3115
3116 return MOS_STATUS_SUCCESS;
3117 }
3118
MHW_SETPAR_DECL_SRC(HCP_PIPE_BUF_ADDR_STATE,HevcVdencPkt)3119 MHW_SETPAR_DECL_SRC(HCP_PIPE_BUF_ADDR_STATE, HevcVdencPkt)
3120 {
3121 ENCODE_FUNC_CALL();
3122
3123 params.Mode = m_basicFeature->m_mode;
3124 params.psPreDeblockSurface = &m_basicFeature->m_reconSurface;
3125 params.psPostDeblockSurface = &m_basicFeature->m_reconSurface;
3126 params.psRawSurface = m_basicFeature->m_rawSurfaceToPak;
3127
3128 params.presMetadataLineBuffer = m_resMetadataLineBuffer;
3129 params.presMetadataTileLineBuffer = m_resMetadataTileLineBuffer;
3130 params.presMetadataTileColumnBuffer = m_resMetadataTileColumnBuffer;
3131
3132 params.presCurMvTempBuffer = m_basicFeature->m_resMvTemporalBuffer;
3133 params.dwLcuStreamOutOffset = 0;
3134 params.presLcuILDBStreamOutBuffer = m_resLCUIldbStreamOutBuffer;
3135 params.dwFrameStatStreamOutOffset = 0;
3136 params.presSseSrcPixelRowStoreBuffer = m_resSSESrcPixelRowStoreBuffer;
3137 params.presPakCuLevelStreamoutBuffer = m_resPakcuLevelStreamOutData;
3138 // Mos_ResourceIsNull(&m_resPakcuLevelStreamoutData.sResource) ? nullptr : &m_resPakcuLevelStreamoutData.sResource;
3139
3140 params.bRawIs10Bit = m_basicFeature->m_is10Bit;
3141
3142 #ifdef _MMC_SUPPORTED
3143 ENCODE_CHK_NULL_RETURN(m_mmcState);
3144 if (m_mmcState->IsMmcEnabled())
3145 {
3146 ENCODE_CHK_STATUS_RETURN(m_mmcState->GetSurfaceMmcState(&m_basicFeature->m_reconSurface, ¶ms.PreDeblockSurfMmcState));
3147 ENCODE_CHK_STATUS_RETURN(m_mmcState->GetSurfaceMmcState(&m_basicFeature->m_rawSurface, ¶ms.RawSurfMmcState));
3148 }
3149 else
3150 {
3151 params.PreDeblockSurfMmcState = MOS_MEMCOMP_DISABLED;
3152 params.RawSurfMmcState = MOS_MEMCOMP_DISABLED;
3153 }
3154
3155 CODECHAL_DEBUG_TOOL(
3156 m_basicFeature->m_reconSurface.MmcState = params.PreDeblockSurfMmcState;)
3157 #endif
3158
3159 m_basicFeature->m_ref.MHW_SETPAR_F(HCP_PIPE_BUF_ADDR_STATE)(params);
3160
3161 return MOS_STATUS_SUCCESS;
3162 }
3163
MHW_SETPAR_DECL_SRC(HCP_IND_OBJ_BASE_ADDR_STATE,HevcVdencPkt)3164 MHW_SETPAR_DECL_SRC(HCP_IND_OBJ_BASE_ADDR_STATE, HevcVdencPkt)
3165 {
3166 ENCODE_FUNC_CALL();
3167
3168 params.presMvObjectBuffer = m_basicFeature->m_resMbCodeBuffer;
3169 params.dwMvObjectOffset = m_mvOffset;
3170 params.dwMvObjectSize = m_basicFeature->m_mbCodeSize - m_mvOffset;
3171 params.presPakBaseObjectBuffer = &m_basicFeature->m_resBitstreamBuffer;
3172 params.dwPakBaseObjectSize = m_basicFeature->m_bitstreamSize;
3173
3174 return MOS_STATUS_SUCCESS;
3175 }
3176
MHW_SETPAR_DECL_SRC(HCP_SLICE_STATE,HevcVdencPkt)3177 MHW_SETPAR_DECL_SRC(HCP_SLICE_STATE, HevcVdencPkt)
3178 {
3179 ENCODE_FUNC_CALL();
3180
3181 params.intrareffetchdisable = m_pakOnlyPass;
3182
3183 return MOS_STATUS_SUCCESS;
3184 }
3185
AddAllCmds_HCP_SURFACE_STATE(PMOS_COMMAND_BUFFER cmdBuffer) const3186 MOS_STATUS HevcVdencPkt::AddAllCmds_HCP_SURFACE_STATE(PMOS_COMMAND_BUFFER cmdBuffer) const
3187 {
3188 ENCODE_FUNC_CALL();
3189 ENCODE_CHK_NULL_RETURN(cmdBuffer);
3190
3191 m_curHcpSurfStateId = CODECHAL_HCP_SRC_SURFACE_ID;
3192 SETPAR_AND_ADDCMD(HCP_SURFACE_STATE, m_hcpItf, cmdBuffer);
3193
3194 m_curHcpSurfStateId = CODECHAL_HCP_DECODED_SURFACE_ID;
3195 SETPAR_AND_ADDCMD(HCP_SURFACE_STATE, m_hcpItf, cmdBuffer);
3196
3197 m_curHcpSurfStateId = CODECHAL_HCP_REF_SURFACE_ID;
3198 SETPAR_AND_ADDCMD(HCP_SURFACE_STATE, m_hcpItf, cmdBuffer);
3199
3200 return MOS_STATUS_SUCCESS;
3201 }
3202
AddAllCmds_HCP_REF_IDX_STATE(PMOS_COMMAND_BUFFER cmdBuffer) const3203 MOS_STATUS HevcVdencPkt::AddAllCmds_HCP_REF_IDX_STATE(PMOS_COMMAND_BUFFER cmdBuffer) const
3204 {
3205 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
3206 ENCODE_FUNC_CALL();
3207 ENCODE_CHK_NULL_RETURN(cmdBuffer);
3208
3209 auto ¶ms = m_hcpItf->MHW_GETPAR_F(HCP_REF_IDX_STATE)();
3210 params = {};
3211
3212 uint32_t currSlcIdx = m_basicFeature->m_curNumSlices;
3213 PCODEC_HEVC_ENCODE_PICTURE_PARAMS hevcPicParams = (CODEC_HEVC_ENCODE_PICTURE_PARAMS *)m_hevcPicParams;
3214 PCODEC_HEVC_ENCODE_SLICE_PARAMS hevcSlcParams = (CODEC_HEVC_ENCODE_SLICE_PARAMS *)&m_hevcSliceParams[currSlcIdx];
3215
3216 CODEC_PICTURE currPic = {};
3217 CODEC_PICTURE refPicList[2][CODEC_MAX_NUM_REF_FRAME_HEVC] = {};
3218 void ** hevcRefList = nullptr;
3219 int32_t pocCurrPic = 0;
3220 int8_t * pRefIdxMapping = nullptr;
3221 int32_t pocList[CODEC_MAX_NUM_REF_FRAME_HEVC] = {};
3222
3223 if (hevcSlcParams->slice_type != encodeHevcISlice)
3224 {
3225 currPic = hevcPicParams->CurrReconstructedPic;
3226 params.ucList = LIST_0;
3227 params.numRefIdxLRefpiclistnumActiveMinus1 = hevcSlcParams->num_ref_idx_l0_active_minus1;
3228 eStatus = MOS_SecureMemcpy(&refPicList, sizeof(refPicList), &hevcSlcParams->RefPicList, sizeof(hevcSlcParams->RefPicList));
3229 if (eStatus != MOS_STATUS_SUCCESS)
3230 {
3231 ENCODE_ASSERTMESSAGE("Failed to copy memory.");
3232 return eStatus;
3233 }
3234
3235 hevcRefList = (void **)m_basicFeature->m_ref.GetRefList();
3236 pocCurrPic = hevcPicParams->CurrPicOrderCnt;
3237 for (auto i = 0; i < CODEC_MAX_NUM_REF_FRAME_HEVC; i++)
3238 {
3239 pocList[i] = hevcPicParams->RefFramePOCList[i];
3240 }
3241
3242 pRefIdxMapping = m_basicFeature->m_ref.GetRefIdxMapping();
3243
3244 MHW_ASSERT(currPic.FrameIdx != 0x7F);
3245
3246 for (uint8_t i = 0; i <= params.numRefIdxLRefpiclistnumActiveMinus1; i++)
3247 {
3248 uint8_t refFrameIDx = refPicList[params.ucList][i].FrameIdx;
3249 if (refFrameIDx < CODEC_MAX_NUM_REF_FRAME_HEVC)
3250 {
3251 MHW_ASSERT(*(pRefIdxMapping + refFrameIDx) >= 0);
3252
3253 params.listEntryLxReferencePictureFrameIdRefaddr07[i] = *(pRefIdxMapping + refFrameIDx);
3254 int32_t pocDiff = pocCurrPic - pocList[refFrameIDx];
3255 params.referencePictureTbValue[i] = (uint8_t)CodecHal_Clip3(-128, 127, pocDiff);
3256 CODEC_REF_LIST **refList = (CODEC_REF_LIST **)hevcRefList;
3257 params.longtermreference[i] = CodecHal_PictureIsLongTermRef(refList[currPic.FrameIdx]->RefList[refFrameIDx]);
3258 params.bottomFieldFlag[i] = 1;
3259 }
3260 else
3261 {
3262 params.listEntryLxReferencePictureFrameIdRefaddr07[i] = 0;
3263 params.referencePictureTbValue[i] = 0;
3264 params.longtermreference[i] = false;
3265 params.bottomFieldFlag[i] = 0;
3266 }
3267 }
3268
3269 for (uint8_t i = (uint8_t)(params.numRefIdxLRefpiclistnumActiveMinus1 + 1); i < 16; i++)
3270 {
3271 params.listEntryLxReferencePictureFrameIdRefaddr07[i] = 0;
3272 params.referencePictureTbValue[i] = 0;
3273 params.longtermreference[i] = false;
3274 params.bottomFieldFlag[i] = 0;
3275 }
3276
3277 ENCODE_CHK_NULL_RETURN(m_featureManager);
3278 auto sccFeature = dynamic_cast<HevcVdencScc *>(m_featureManager->GetFeature(HevcFeatureIDs::hevcVdencSccFeature));
3279 ENCODE_CHK_NULL_RETURN(sccFeature);
3280
3281 MHW_CHK_STATUS_RETURN(sccFeature->MHW_SETPAR_F(HCP_REF_IDX_STATE)(params));
3282
3283 m_hcpItf->MHW_ADDCMD_F(HCP_REF_IDX_STATE)(cmdBuffer);
3284
3285 params = {};
3286
3287 if (hevcSlcParams->slice_type == encodeHevcBSlice)
3288 {
3289 params.ucList = LIST_1;
3290 params.numRefIdxLRefpiclistnumActiveMinus1 = hevcSlcParams->num_ref_idx_l1_active_minus1;
3291 for (uint8_t i = 0; i <= params.numRefIdxLRefpiclistnumActiveMinus1; i++)
3292 {
3293 uint8_t refFrameIDx = refPicList[params.ucList][i].FrameIdx;
3294 if (refFrameIDx < CODEC_MAX_NUM_REF_FRAME_HEVC)
3295 {
3296 MHW_ASSERT(*(pRefIdxMapping + refFrameIDx) >= 0);
3297
3298 params.listEntryLxReferencePictureFrameIdRefaddr07[i] = *(pRefIdxMapping + refFrameIDx);
3299 int32_t pocDiff = pocCurrPic - pocList[refFrameIDx];
3300 params.referencePictureTbValue[i] = (uint8_t)CodecHal_Clip3(-128, 127, pocDiff);
3301 CODEC_REF_LIST **refList = (CODEC_REF_LIST **)hevcRefList;
3302 params.longtermreference[i] = CodecHal_PictureIsLongTermRef(refList[currPic.FrameIdx]->RefList[refFrameIDx]);
3303 params.bottomFieldFlag[i] = 1;
3304 }
3305 else
3306 {
3307 params.listEntryLxReferencePictureFrameIdRefaddr07[i] = 0;
3308 params.referencePictureTbValue[i] = 0;
3309 params.longtermreference[i] = false;
3310 params.bottomFieldFlag[i] = 0;
3311 }
3312 }
3313
3314 for (uint8_t i = (uint8_t)(params.numRefIdxLRefpiclistnumActiveMinus1 + 1); i < 16; i++)
3315 {
3316 params.listEntryLxReferencePictureFrameIdRefaddr07[i] = 0;
3317 params.referencePictureTbValue[i] = 0;
3318 params.longtermreference[i] = false;
3319 params.bottomFieldFlag[i] = 0;
3320 }
3321 m_hcpItf->MHW_ADDCMD_F(HCP_REF_IDX_STATE)(cmdBuffer);
3322 }
3323 }
3324
3325 return MOS_STATUS_SUCCESS;
3326 }
3327
AddAllCmds_HCP_FQM_STATE(PMOS_COMMAND_BUFFER cmdBuffer) const3328 MOS_STATUS HevcVdencPkt::AddAllCmds_HCP_FQM_STATE(PMOS_COMMAND_BUFFER cmdBuffer) const
3329 {
3330 ENCODE_FUNC_CALL();
3331 ENCODE_CHK_NULL_RETURN(cmdBuffer);
3332
3333 MHW_MI_CHK_NULL(m_hevcIqMatrixParams);
3334
3335 auto ¶ms = m_hcpItf->MHW_GETPAR_F(HCP_FQM_STATE)();
3336 params = {};
3337
3338 auto iqMatrix = (PMHW_VDBOX_HEVC_QM_PARAMS)m_hevcIqMatrixParams;
3339 uint16_t *fqMatrix = (uint16_t *)params.quantizermatrix;
3340
3341 /* 4x4 */
3342 for (uint8_t i = 0; i < 32; i++)
3343 {
3344 params.quantizermatrix[i] = 0;
3345 }
3346 for (uint8_t intraInter = 0; intraInter <= 1; intraInter++)
3347 {
3348 params.intraInter = intraInter;
3349 params.sizeid = 0;
3350 params.colorComponent = 0;
3351
3352 for (uint8_t i = 0; i < 16; i++)
3353 {
3354 fqMatrix[i] =
3355 GetReciprocalScalingValue(iqMatrix->List4x4[3 * intraInter][i]);
3356 }
3357
3358 m_hcpItf->MHW_ADDCMD_F(HCP_FQM_STATE)(cmdBuffer);
3359 }
3360
3361 /* 8x8, 16x16 and 32x32 */
3362 for (uint8_t i = 0; i < 32; i++)
3363 {
3364 params.quantizermatrix[i] = 0;
3365 }
3366 for (uint8_t intraInter = 0; intraInter <= 1; intraInter++)
3367 {
3368 params.intraInter = intraInter;
3369 params.sizeid = 1;
3370 params.colorComponent = 0;
3371
3372 for (uint8_t i = 0; i < 64; i++)
3373 {
3374 fqMatrix[i] =
3375 GetReciprocalScalingValue(iqMatrix->List8x8[3 * intraInter][i]);
3376 }
3377
3378 m_hcpItf->MHW_ADDCMD_F(HCP_FQM_STATE)(cmdBuffer);
3379 }
3380
3381 /* 16x16 DC */
3382 for (uint8_t i = 0; i < 32; i++)
3383 {
3384 params.quantizermatrix[i] = 0;
3385 }
3386 for (uint8_t intraInter = 0; intraInter <= 1; intraInter++)
3387 {
3388 params.intraInter = intraInter;
3389 params.sizeid = 2;
3390 params.colorComponent = 0;
3391 params.fqmDcValue1Dc = GetReciprocalScalingValue(iqMatrix->ListDC16x16[3 * intraInter]);
3392
3393 for (uint8_t i = 0; i < 64; i++)
3394 {
3395 fqMatrix[i] =
3396 GetReciprocalScalingValue(iqMatrix->List16x16[3 * intraInter][i]);
3397 }
3398
3399 m_hcpItf->MHW_ADDCMD_F(HCP_FQM_STATE)(cmdBuffer);
3400 }
3401
3402 /* 32x32 DC */
3403 for (uint8_t i = 0; i < 32; i++)
3404 {
3405 params.quantizermatrix[i] = 0;
3406 }
3407 for (uint8_t intraInter = 0; intraInter <= 1; intraInter++)
3408 {
3409 params.intraInter = intraInter;
3410 params.sizeid = 3;
3411 params.colorComponent = 0;
3412 params.fqmDcValue1Dc = GetReciprocalScalingValue(iqMatrix->ListDC32x32[intraInter]);
3413
3414 for (uint8_t i = 0; i < 64; i++)
3415 {
3416 fqMatrix[i] =
3417 GetReciprocalScalingValue(iqMatrix->List32x32[intraInter][i]);
3418 }
3419
3420 m_hcpItf->MHW_ADDCMD_F(HCP_FQM_STATE)(cmdBuffer);
3421 }
3422
3423 return MOS_STATUS_SUCCESS;
3424 }
3425
AddAllCmds_HCP_QM_STATE(PMOS_COMMAND_BUFFER cmdBuffer) const3426 MOS_STATUS HevcVdencPkt::AddAllCmds_HCP_QM_STATE(PMOS_COMMAND_BUFFER cmdBuffer) const
3427 {
3428 ENCODE_FUNC_CALL();
3429 ENCODE_CHK_NULL_RETURN(cmdBuffer);
3430
3431 MHW_MI_CHK_NULL(m_hevcIqMatrixParams);
3432
3433 auto ¶ms = m_hcpItf->MHW_GETPAR_F(HCP_QM_STATE)();
3434 params = {};
3435
3436 auto iqMatrix = (PMHW_VDBOX_HEVC_QM_PARAMS)m_hevcIqMatrixParams;
3437 uint8_t *qMatrix = (uint8_t *)params.quantizermatrix;
3438
3439 for (uint8_t sizeId = 0; sizeId < 4; sizeId++) // 4x4, 8x8, 16x16, 32x32
3440 {
3441 for (uint8_t predType = 0; predType < 2; predType++) // Intra, Inter
3442 {
3443 for (uint8_t color = 0; color < 3; color++) // Y, Cb, Cr
3444 {
3445 if ((sizeId == 3) && (color != 0))
3446 break;
3447
3448 params.sizeid = sizeId;
3449 params.predictionType = predType;
3450 params.colorComponent = color;
3451 switch (sizeId)
3452 {
3453 case 0:
3454 case 1:
3455 default:
3456 params.dcCoefficient = 0;
3457 break;
3458 case 2:
3459 params.dcCoefficient = iqMatrix->ListDC16x16[3 * predType + color];
3460 break;
3461 case 3:
3462 params.dcCoefficient = iqMatrix->ListDC32x32[predType];
3463 break;
3464 }
3465
3466 if (sizeId == 0)
3467 {
3468 for (uint8_t i = 0; i < 4; i++)
3469 {
3470 for (uint8_t ii = 0; ii < 4; ii++)
3471 {
3472 qMatrix[4 * i + ii] = iqMatrix->List4x4[3 * predType + color][4 * i + ii];
3473 }
3474 }
3475 }
3476 else if (sizeId == 1)
3477 {
3478 for (uint8_t i = 0; i < 8; i++)
3479 {
3480 for (uint8_t ii = 0; ii < 8; ii++)
3481 {
3482 qMatrix[8 * i + ii] = iqMatrix->List8x8[3 * predType + color][8 * i + ii];
3483 }
3484 }
3485 }
3486 else if (sizeId == 2)
3487 {
3488 for (uint8_t i = 0; i < 8; i++)
3489 {
3490 for (uint8_t ii = 0; ii < 8; ii++)
3491 {
3492 qMatrix[8 * i + ii] = iqMatrix->List16x16[3 * predType + color][8 * i + ii];
3493 }
3494 }
3495 }
3496 else // 32x32
3497 {
3498 for (uint8_t i = 0; i < 8; i++)
3499 {
3500 for (uint8_t ii = 0; ii < 8; ii++)
3501 {
3502 qMatrix[8 * i + ii] = iqMatrix->List32x32[predType][8 * i + ii];
3503 }
3504 }
3505 }
3506
3507 m_hcpItf->MHW_ADDCMD_F(HCP_QM_STATE)(cmdBuffer);
3508 }
3509 }
3510 }
3511
3512 return MOS_STATUS_SUCCESS;
3513 }
3514
AddAllCmds_HCP_WEIGHTOFFSET_STATE(PMOS_COMMAND_BUFFER cmdBuffer) const3515 MOS_STATUS HevcVdencPkt::AddAllCmds_HCP_WEIGHTOFFSET_STATE(PMOS_COMMAND_BUFFER cmdBuffer) const
3516 {
3517 ENCODE_FUNC_CALL();
3518
3519 auto wpFeature = dynamic_cast<HevcVdencWeightedPred *>(m_featureManager->GetFeature(HevcFeatureIDs::hevcVdencWpFeature));
3520 ENCODE_CHK_NULL_RETURN(wpFeature);
3521 if (wpFeature->IsEnabled())
3522 {
3523 auto ¶ms = m_hcpItf->MHW_GETPAR_F(HCP_WEIGHTOFFSET_STATE)();
3524 params = {};
3525 CODEC_HEVC_ENCODE_SLICE_PARAMS *pEncodeHevcSliceParams = (CODEC_HEVC_ENCODE_SLICE_PARAMS *)&m_hevcSliceParams[m_basicFeature->m_curNumSlices];
3526 if (pEncodeHevcSliceParams->slice_type == encodeHevcPSlice ||
3527 pEncodeHevcSliceParams->slice_type == encodeHevcBSlice)
3528 {
3529 params.ucList = LIST_0;
3530 MHW_CHK_STATUS_RETURN(wpFeature->MHW_SETPAR_F(HCP_WEIGHTOFFSET_STATE)(params));
3531 m_hcpItf->MHW_ADDCMD_F(HCP_WEIGHTOFFSET_STATE)(cmdBuffer);
3532 }
3533
3534 if (pEncodeHevcSliceParams->slice_type == encodeHevcBSlice)
3535 {
3536 params.ucList = LIST_1;
3537 MHW_CHK_STATUS_RETURN(wpFeature->MHW_SETPAR_F(HCP_WEIGHTOFFSET_STATE)(params));
3538 m_hcpItf->MHW_ADDCMD_F(HCP_WEIGHTOFFSET_STATE)(cmdBuffer);
3539 }
3540 }
3541 return MOS_STATUS_SUCCESS;
3542 }
3543 }
3544
3545