1 /*
2 * Copyright (c) 2021, Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22 //!
23 //! \file     encode_hevc_vdenc_packet.cpp
24 //! \brief    Defines the interface for hevc encode vdenc packet
25 //!
26 #include "encode_hevc_vdenc_packet.h"
27 #include "mos_solo_generic.h"
28 #include "encode_vdenc_lpla_analysis.h"
29 #include "encode_hevc_vdenc_weighted_prediction.h"
30 #include "mhw_mi_itf.h"
31 #include "media_perf_profiler.h"
32 #include "codec_hw_next.h"
33 #include "hal_oca_interface_next.h"
34 
35 using namespace mhw::vdbox;
36 
37 namespace encode
38 {
AllocateResources()39     MOS_STATUS HevcVdencPkt::AllocateResources()
40     {
41         ENCODE_FUNC_CALL();
42 
43         MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
44 
45         ENCODE_CHK_NULL_RETURN(m_allocator);
46         MOS_ALLOC_GFXRES_PARAMS allocParamsForBufferLinear;
47         MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
48         allocParamsForBufferLinear.Type     = MOS_GFXRES_BUFFER;
49         allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
50         allocParamsForBufferLinear.Format   = Format_Buffer;
51 
52         allocParamsForBufferLinear.dwBytes  = MOS_ROUNDUP_DIVIDE(m_basicFeature->m_frameWidth, m_basicFeature->m_maxLCUSize) * CODECHAL_CACHELINE_SIZE * 2 * 2;
53         allocParamsForBufferLinear.pBufName = "vdencIntraRowStoreScratch";
54         allocParamsForBufferLinear.ResUsageType = MOS_HW_RESOURCE_USAGE_ENCODE_INTERNAL_READ;
55         m_vdencIntraRowStoreScratch         = m_allocator->AllocateResource(allocParamsForBufferLinear, false);
56 
57         MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
58         allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER;
59         allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
60         allocParamsForBufferLinear.Format = Format_Buffer;
61 
62         // VDENC tile row store buffer
63         allocParamsForBufferLinear.dwBytes  = MOS_ROUNDUP_DIVIDE(m_basicFeature->m_frameWidth, 32) * CODECHAL_CACHELINE_SIZE * 2;
64         allocParamsForBufferLinear.pBufName = "VDENC Tile Row Store Buffer";
65         allocParamsForBufferLinear.ResUsageType = MOS_HW_RESOURCE_USAGE_ENCODE_INTERNAL_READ;
66         m_vdencTileRowStoreBuffer = m_allocator->AllocateResource(allocParamsForBufferLinear,false);
67 
68         hcp::HcpBufferSizePar hcpBufSizePar;
69         MOS_ZeroMemory(&hcpBufSizePar, sizeof(hcpBufSizePar));
70 
71         hcpBufSizePar.ucMaxBitDepth  = m_basicFeature->m_bitDepth;
72         hcpBufSizePar.ucChromaFormat = m_basicFeature->m_chromaFormat;
73         // We should move the buffer allocation to picture level if the size is dependent on LCU size
74         hcpBufSizePar.dwCtbLog2SizeY = 6;  //assume Max LCU size
75         hcpBufSizePar.dwPicWidth     = MOS_ALIGN_CEIL(m_basicFeature->m_frameWidth, m_basicFeature->m_maxLCUSize);
76         hcpBufSizePar.dwPicHeight    = MOS_ALIGN_CEIL(m_basicFeature->m_frameHeight, m_basicFeature->m_maxLCUSize);
77 
78         auto AllocateHcpBuffer = [&](PMOS_RESOURCE &res, const hcp::HCP_INTERNAL_BUFFER_TYPE bufferType, const char *bufferName) {
79             uint32_t bufSize = 0;
80             hcpBufSizePar.bufferType = bufferType;
81             eStatus                  = m_hcpItf->GetHcpBufSize(hcpBufSizePar, bufSize);
82             if (eStatus != MOS_STATUS_SUCCESS)
83             {
84                 ENCODE_ASSERTMESSAGE("Failed to get hcp buffer size.");
85                 return eStatus;
86             }
87             allocParamsForBufferLinear.dwBytes  = bufSize;
88             allocParamsForBufferLinear.pBufName = bufferName;
89             allocParamsForBufferLinear.ResUsageType = MOS_HW_RESOURCE_USAGE_ENCODE_INTERNAL_READ;
90             res                                 = m_allocator->AllocateResource(allocParamsForBufferLinear, false);
91             return MOS_STATUS_SUCCESS;
92         };
93 
94         // Metadata Line buffer
95         ENCODE_CHK_STATUS_RETURN(AllocateHcpBuffer(m_resMetadataLineBuffer, hcp::HCP_INTERNAL_BUFFER_TYPE::META_LINE, "MetadataLineBuffer"));
96         // Metadata Tile Line buffer
97         ENCODE_CHK_STATUS_RETURN(AllocateHcpBuffer(m_resMetadataTileLineBuffer, hcp::HCP_INTERNAL_BUFFER_TYPE::META_TILE_LINE, "MetadataTileLineBuffer"));
98         // Metadata Tile Column buffer
99         ENCODE_CHK_STATUS_RETURN(AllocateHcpBuffer(m_resMetadataTileColumnBuffer, hcp::HCP_INTERNAL_BUFFER_TYPE::META_TILE_COL, "MetadataTileColumnBuffer"));
100 
101         // Lcu ILDB StreamOut buffer
102         // This is not enabled with HCP_PIPE_MODE_SELECT yet, placeholder here
103         allocParamsForBufferLinear.dwBytes = CODECHAL_CACHELINE_SIZE;
104         allocParamsForBufferLinear.pBufName = "LcuILDBStreamOutBuffer";
105         allocParamsForBufferLinear.ResUsageType = MOS_HW_RESOURCE_USAGE_ENCODE_INTERNAL_READ;
106         m_resLCUIldbStreamOutBuffer = m_allocator->AllocateResource(allocParamsForBufferLinear,false);
107 
108         // Allocate SSE Source Pixel Row Store Buffer
109         uint32_t maxTileColumns    = MOS_ROUNDUP_DIVIDE(m_basicFeature->m_frameWidth, CODECHAL_HEVC_MIN_TILE_SIZE);
110         allocParamsForBufferLinear.dwBytes  = 2 * m_basicFeature->m_sizeOfSseSrcPixelRowStoreBufferPerLcu * (m_basicFeature->m_widthAlignedMaxLCU + 3 * maxTileColumns);
111         allocParamsForBufferLinear.pBufName = "SseSrcPixelRowStoreBuffer";
112         allocParamsForBufferLinear.ResUsageType = MOS_HW_RESOURCE_USAGE_ENCODE_INTERNAL_READ;
113         m_resSSESrcPixelRowStoreBuffer = m_allocator->AllocateResource(allocParamsForBufferLinear,false);
114 
115         uint32_t frameWidthInCus = CODECHAL_GET_WIDTH_IN_BLOCKS(m_basicFeature->m_frameWidth, CODECHAL_HEVC_MIN_CU_SIZE);
116         uint32_t frameHeightInCus = CODECHAL_GET_WIDTH_IN_BLOCKS(m_basicFeature->m_frameHeight, CODECHAL_HEVC_MIN_CU_SIZE);
117         uint32_t frameWidthInLCUs = CODECHAL_GET_WIDTH_IN_BLOCKS(m_basicFeature->m_frameWidth, CODECHAL_HEVC_VDENC_LCU_SIZE);
118         uint32_t frameHeightInLCUs = CODECHAL_GET_WIDTH_IN_BLOCKS(m_basicFeature->m_frameHeight, CODECHAL_HEVC_VDENC_LCU_SIZE);
119         // PAK CU Level Streamout Data:   DW57-59 in HCP pipe buffer address command
120         // One CU has 16-byte. But, each tile needs to be aliged to the cache line
121         auto size = MOS_ALIGN_CEIL(frameWidthInCus * frameHeightInCus * 16, CODECHAL_CACHELINE_SIZE);
122         allocParamsForBufferLinear.dwBytes = size;
123         allocParamsForBufferLinear.pBufName = "PAK CU Level Streamout Data";
124         allocParamsForBufferLinear.ResUsageType = MOS_HW_RESOURCE_USAGE_ENCODE_INTERNAL_READ;
125         m_resPakcuLevelStreamOutData = m_allocator->AllocateResource(allocParamsForBufferLinear,false);
126 
127         MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
128         allocParamsForBufferLinear.Format = Format_Buffer;
129         allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
130         allocParamsForBufferLinear.dwBytes  = frameWidthInLCUs * frameHeightInLCUs * 4;
131         allocParamsForBufferLinear.pBufName = "VDEnc Cumulative CU Count Streamout Surface";
132         m_resCumulativeCuCountStreamoutBuffer = m_allocator->AllocateResource(allocParamsForBufferLinear, false);
133 
134         if(m_osInterface->bInlineCodecStatusUpdate)
135         {
136             m_atomicScratchBuf.size = MOS_ALIGN_CEIL(sizeof(AtomicScratchBuffer), sizeof(uint64_t));
137             allocParamsForBufferLinear.Type     = MOS_GFXRES_BUFFER;
138             allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
139             allocParamsForBufferLinear.Format   = Format_Buffer;
140             allocParamsForBufferLinear.ResUsageType = MOS_HW_RESOURCE_USAGE_ENCODE_INTERNAL_READ_WRITE_CACHE;
141             size  = MHW_CACHELINE_SIZE * 4 * 2; //  each set of scratch is 4 cacheline size, and allocate 2 set.
142             allocParamsForBufferLinear.dwBytes  = size;
143             allocParamsForBufferLinear.pBufName = "atomic sratch buffer";
144 
145             if (MEDIA_IS_SKU(m_hwInterface->GetSkuTable(), FtrLocalMemory))
146             {
147                 allocParamsForBufferLinear.dwMemType = MOS_MEMPOOL_DEVICEMEMORY;
148             }
149             else
150             {
151                 allocParamsForBufferLinear.dwMemType = MOS_MEMPOOL_VIDEOMEMORY;
152             }
153 
154             m_atomicScratchBuf.resAtomicScratchBuffer = m_allocator->AllocateResource(allocParamsForBufferLinear, false);
155 
156             ENCODE_CHK_NULL_RETURN(m_atomicScratchBuf.resAtomicScratchBuffer);
157 
158             m_atomicScratchBuf.size               = size;
159             m_atomicScratchBuf.zeroValueOffset    = 0;
160             m_atomicScratchBuf.operand1Offset     = MHW_CACHELINE_SIZE;
161             m_atomicScratchBuf.operand2Offset     = MHW_CACHELINE_SIZE * 2;
162             m_atomicScratchBuf.operand3Offset     = MHW_CACHELINE_SIZE * 3;
163             m_atomicScratchBuf.encodeUpdateIndex  = 0;
164             m_atomicScratchBuf.tearDownIndex      = 1;
165             m_atomicScratchBuf.operandSetSize     = MHW_CACHELINE_SIZE * 4;
166         }
167 
168 #if USE_CODECHAL_DEBUG_TOOL && _ENCODE_RESERVED
169         m_hevcParDump = std::make_shared<HevcVdencParDump>(m_pipeline);
170 #endif  // USE_CODECHAL_DEBUG_TOOL
171 
172         return eStatus;
173     }
174 
Prepare()175     MOS_STATUS HevcVdencPkt::Prepare()
176     {
177         ENCODE_FUNC_CALL();
178 
179         m_pictureStatesSize    = m_defaultPictureStatesSize;
180         m_picturePatchListSize = m_defaultPicturePatchListSize;
181         m_sliceStatesSize      = m_defaultSliceStatesSize;
182         m_slicePatchListSize   = m_defaultSlicePatchListSize;
183 
184         HevcPipeline *pipeline = dynamic_cast<HevcPipeline *>(m_pipeline);
185         ENCODE_CHK_NULL_RETURN(pipeline);
186 
187         m_hevcSeqParams      = m_basicFeature->m_hevcSeqParams;
188         m_hevcPicParams      = m_basicFeature->m_hevcPicParams;
189         m_hevcSliceParams    = m_basicFeature->m_hevcSliceParams;
190         m_hevcIqMatrixParams = m_basicFeature->m_hevcIqMatrixParams;
191         m_nalUnitParams      = m_basicFeature->m_nalUnitParams;
192 
193         ENCODE_CHK_STATUS_RETURN(ValidateVdboxIdx(m_vdboxIndex));
194 
195         m_pakOnlyPass = false;
196 
197         ENCODE_CHK_STATUS_RETURN(SetBatchBufferForPakSlices());
198 
199         SetRowstoreCachingOffsets();
200 
201         RUN_FEATURE_INTERFACE_RETURN(HevcEncodeTile, HevcFeatureIDs::encodeTile, SetPipeNumber, m_pipeline->GetPipeNum());
202 
203         return MOS_STATUS_SUCCESS;
204     }
205 
SubmitPictureLevel(MOS_COMMAND_BUFFER * commandBuffer,uint8_t packetPhase)206     MOS_STATUS HevcVdencPkt::SubmitPictureLevel(
207         MOS_COMMAND_BUFFER* commandBuffer,
208         uint8_t packetPhase)
209     {
210         ENCODE_FUNC_CALL();
211 
212         MOS_COMMAND_BUFFER& cmdBuffer = *commandBuffer;
213         ENCODE_CHK_STATUS_RETURN(Mos_Solo_PreProcessEncode(m_osInterface, &m_basicFeature->m_resBitstreamBuffer, &m_basicFeature->m_reconSurface));
214 
215         // Ensure the input is ready to be read.
216         // Currently, mos RegisterResource has sync limitation for Raw resource.
217         // Temporaly, call Resource Wait to do the sync explicitly.
218         if(m_pipeline->IsFirstPass())
219         {
220             MOS_SYNC_PARAMS       syncParams;
221             syncParams = g_cInitSyncParams;
222             syncParams.GpuContext = m_osInterface->pfnGetGpuContext(m_osInterface);
223             syncParams.presSyncResource = &m_basicFeature->m_rawSurface.OsResource;
224             syncParams.bReadOnly = true;
225             ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnResourceWait(m_osInterface, &syncParams));
226             m_osInterface->pfnSetResourceSyncTag(m_osInterface, &syncParams);
227         }
228 
229         ENCODE_CHK_STATUS_RETURN(PatchPictureLevelCommands(packetPhase, cmdBuffer));
230 
231         return MOS_STATUS_SUCCESS;
232     }
233 
SubmitTileLevel(MOS_COMMAND_BUFFER * commandBuffer,uint8_t packetPhase)234     MOS_STATUS HevcVdencPkt::SubmitTileLevel(
235         MOS_COMMAND_BUFFER* commandBuffer,
236         uint8_t packetPhase)
237     {
238         ENCODE_FUNC_CALL();
239         auto eStatus = MOS_STATUS_SUCCESS;
240 
241         if (!m_hevcPicParams->tiles_enabled_flag)
242         {
243             return MOS_STATUS_INVALID_PARAMETER;
244         }
245 
246         MOS_COMMAND_BUFFER& cmdBuffer = *commandBuffer;
247 
248         ENCODE_CHK_STATUS_RETURN(Construct3rdLevelBatch());
249 
250         uint16_t numTileColumns = 1;
251         uint16_t numTileRows = 1;
252         RUN_FEATURE_INTERFACE_RETURN(HevcEncodeTile, HevcFeatureIDs::encodeTile, GetTileRowColumns, numTileRows, numTileColumns);
253 
254         if (m_pipeline->GetPipeNum() == 2)
255         {
256             ENCODE_CHK_STATUS_RETURN(AddOneTileCommands(
257                 cmdBuffer,
258                 m_pipeline->GetCurrentRow(),
259                 m_pipeline->GetCurrentPipe(),
260                 m_pipeline->GetCurrentSubPass()));
261         }
262         else
263         {
264             for (uint16_t tileCol = 0; tileCol < numTileColumns; tileCol++)
265             {
266                 ENCODE_CHK_STATUS_RETURN(AddOneTileCommands(
267                     cmdBuffer,
268                     m_pipeline->GetCurrentRow(),
269                     tileCol,
270                     m_pipeline->GetCurrentPass()));
271             }
272         }
273 
274         // Insert end of sequence/stream if set
275         if ((m_basicFeature->m_lastPicInSeq || m_basicFeature->m_lastPicInStream) && m_pipeline->IsLastPipe())
276         {
277             ENCODE_CHK_STATUS_RETURN(InsertSeqStreamEnd(cmdBuffer));
278         }
279 
280         if (m_pipeline->GetCurrentRow() == (numTileRows - 1))
281         {
282             // Send VD_CONTROL_STATE (Memory Implict Flush)
283             auto &vdControlStateParams = m_miItf->MHW_GETPAR_F(VD_CONTROL_STATE)();
284             vdControlStateParams = {};
285             vdControlStateParams.memoryImplicitFlush = true;
286             ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(VD_CONTROL_STATE)(&cmdBuffer));
287 
288             m_flushCmd = waitHevc;
289             SETPAR_AND_ADDCMD(VD_PIPELINE_FLUSH, m_vdencItf, &cmdBuffer);
290 
291             ENCODE_CHK_STATUS_RETURN(EnsureAllCommandsExecuted(cmdBuffer));
292 
293             // Wait all pipe cmds done for the packet
294             auto scalability = m_pipeline->GetMediaScalability();
295             ENCODE_CHK_STATUS_RETURN(scalability->SyncPipe(syncOnePipeWaitOthers, 0, &cmdBuffer));
296 
297             // Store PAK frameSize MMIO to PakInfo buffer
298             auto mmioRegisters                  = m_hcpItf->GetMmioRegisters(m_vdboxIndex);
299             auto &miStoreRegMemParams           = m_miItf->MHW_GETPAR_F(MI_STORE_REGISTER_MEM)();
300             miStoreRegMemParams                 = {};
301             miStoreRegMemParams.presStoreBuffer = m_basicFeature->m_recycleBuf->GetBuffer(PakInfo, 0);;
302             miStoreRegMemParams.dwOffset        = 0;
303             miStoreRegMemParams.dwRegister      = mmioRegisters->hcpEncBitstreamBytecountFrameRegOffset;
304             ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_STORE_REGISTER_MEM)(&cmdBuffer));
305 
306             ENCODE_CHK_STATUS_RETURN(Mos_Solo_PostProcessEncode(m_osInterface, &m_basicFeature->m_resBitstreamBuffer, &m_basicFeature->m_reconSurface));
307         }
308         // post-operations are done by pak integrate pkt
309 
310         return MOS_STATUS_SUCCESS;
311     }
312 
Submit(MOS_COMMAND_BUFFER * commandBuffer,uint8_t packetPhase)313     MOS_STATUS HevcVdencPkt::Submit(
314         MOS_COMMAND_BUFFER* commandBuffer,
315         uint8_t packetPhase)
316     {
317         ENCODE_FUNC_CALL();
318 
319         if (m_submitState == submitFrameByDefault)
320         {
321             ENCODE_CHK_STATUS_RETURN(SubmitPictureLevel(commandBuffer, packetPhase));
322 
323             MOS_COMMAND_BUFFER& cmdBuffer = *commandBuffer;
324 
325             bool tileEnabled = false;
326             RUN_FEATURE_INTERFACE_RETURN(HevcEncodeTile, HevcFeatureIDs::encodeTile, IsEnabled, tileEnabled);
327             if (!tileEnabled)
328             {
329                 ENCODE_CHK_STATUS_RETURN(PatchSliceLevelCommands(cmdBuffer, packetPhase));
330             }
331             else
332             {
333                 ENCODE_CHK_STATUS_RETURN(PatchTileLevelCommands(cmdBuffer, packetPhase));
334             }
335 
336             ENCODE_CHK_STATUS_RETURN(Mos_Solo_PostProcessEncode(m_osInterface, &m_basicFeature->m_resBitstreamBuffer, &m_basicFeature->m_reconSurface));
337         }
338         else
339         {
340             switch (m_submitState)
341             {
342                 case submitPic:
343                 {
344                     ENCODE_CHK_STATUS_RETURN(SubmitPictureLevel(commandBuffer, packetPhase));
345                     m_submitState = submitInvalid;
346                     break;
347                 };
348                 case submitTile:
349                 {
350                     ENCODE_FUNC_CALL();
351 
352                     bool tileEnabled = false;
353                     RUN_FEATURE_INTERFACE_RETURN(HevcEncodeTile, HevcFeatureIDs::encodeTile, IsEnabled, tileEnabled);
354                     ENCODE_CHK_STATUS_RETURN(SubmitTileLevel(commandBuffer, packetPhase));
355 
356                     m_submitState = submitInvalid;
357                     break;
358                 };
359                 default:
360                     m_submitState = submitInvalid;
361                     break;
362             }
363         }
364 
365         m_enableVdencStatusReport = true;
366 
367     #if USE_CODECHAL_DEBUG_TOOL && _ENCODE_RESERVED
368         m_hevcParDump->SetParFile();
369         ENCODE_CHK_STATUS_RETURN(DumpInput());
370     #endif
371 
372         return MOS_STATUS_SUCCESS;
373     }
374 
PatchPictureLevelCommands(const uint8_t & packetPhase,MOS_COMMAND_BUFFER & cmdBuffer)375     MOS_STATUS HevcVdencPkt::PatchPictureLevelCommands(const uint8_t &packetPhase, MOS_COMMAND_BUFFER  &cmdBuffer)
376     {
377         ENCODE_FUNC_CALL();
378 
379         cmdBuffer.Attributes.bFrequencyBoost = (m_basicFeature->m_hevcSeqParams->ScenarioInfo == ESCENARIO_REMOTEGAMING);
380 
381         ENCODE_CHK_STATUS_RETURN(m_miItf->SetWatchdogTimerThreshold(m_basicFeature->m_frameWidth, m_basicFeature->m_frameHeight, true));
382 
383         SetPerfTag();
384 
385         auto feature = dynamic_cast<HEVCEncodeBRC*>(m_featureManager->GetFeature(HevcFeatureIDs::hevcBrcFeature));
386         ENCODE_CHK_NULL_RETURN(feature);
387         bool firstTaskInPhase = packetPhase & firstPacket;
388         if (!m_pipeline->IsSingleTaskPhaseSupported() || firstTaskInPhase)//(m_pipeline->IsFirstPass() && !feature->IsVdencHucUsed())) && m_pipeline->GetPipeNum() == 1) || m_pipeline->GetPipeNum() >= 2)
389         {
390             ENCODE_CHK_STATUS_RETURN(AddForceWakeup(cmdBuffer));
391 
392             // Send command buffer header at the beginning (OS dependent)
393             ENCODE_CHK_STATUS_RETURN(SendPrologCmds(cmdBuffer));
394         }
395 
396         if (m_pipeline->GetPipeNum() >= 2)
397         {
398             auto scalability = m_pipeline->GetMediaScalability();
399             if (m_pipeline->IsFirstPass())
400             {
401                 // Reset multi-pipe sync semaphores
402                 ENCODE_CHK_STATUS_RETURN(scalability->ResetSemaphore(syncOnePipeWaitOthers, m_pipeline->GetCurrentPipe(), &cmdBuffer));
403             }
404 
405             // For brc case, other pipes wait for BRCupdate done on first pipe
406             // For cqp case, pipes also need sync
407             ENCODE_CHK_STATUS_RETURN(scalability->SyncPipe(syncOtherPipesForOne, 0, &cmdBuffer));
408         }
409 
410         m_streamInEnabled = false;
411         RUN_FEATURE_INTERFACE_RETURN(VdencLplaAnalysis, HevcFeatureIDs::vdencLplaAnalysisFeature,
412             EnableStreamIn, m_pipeline->IsFirstPass(), m_pipeline->IsLastPass(), m_streamInEnabled);
413 
414         ENCODE_CHK_STATUS_RETURN(AddCondBBEndForLastPass(cmdBuffer));
415 
416         if (m_pipeline->IsFirstPipe())
417         {
418             ENCODE_CHK_STATUS_RETURN(StartStatusReport(statusReportMfx, &cmdBuffer));
419         }
420 
421         MediaPerfProfiler *perfProfiler = MediaPerfProfiler::Instance();
422         ENCODE_CHK_NULL_RETURN(perfProfiler);
423         ENCODE_CHK_STATUS_RETURN(perfProfiler->AddPerfCollectStartCmd(
424             (void *)m_pipeline, m_osInterface, m_miItf, &cmdBuffer));
425 
426         ENCODE_CHK_STATUS_RETURN(AddPictureHcpCommands(cmdBuffer));
427 
428         ENCODE_CHK_STATUS_RETURN(AddPictureVdencCommands(cmdBuffer));
429 
430         ENCODE_CHK_STATUS_RETURN(AddPicStateWithNoTile(cmdBuffer));
431 
432         return MOS_STATUS_SUCCESS;
433     }
434 
PatchSliceLevelCommands(MOS_COMMAND_BUFFER & cmdBuffer,uint8_t packetPhase)435     MOS_STATUS HevcVdencPkt::PatchSliceLevelCommands(MOS_COMMAND_BUFFER  &cmdBuffer, uint8_t packetPhase)
436     {
437         ENCODE_FUNC_CALL();
438 
439         if (m_hevcPicParams->tiles_enabled_flag)
440         {
441             return MOS_STATUS_SUCCESS;
442         }
443 
444         auto feature = dynamic_cast<HEVCEncodeBRC*>(m_featureManager->GetFeature(HevcFeatureIDs::hevcBrcFeature));
445         ENCODE_CHK_NULL_RETURN(feature);
446         auto vdenc2ndLevelBatchBuffer = feature->GetVdenc2ndLevelBatchBuffer(m_pipeline->m_currRecycledBufIdx);
447 
448         // starting location for executing slice level cmds
449         vdenc2ndLevelBatchBuffer->dwOffset = m_hwInterface->m_vdencBatchBuffer1stGroupSize + m_hwInterface->m_vdencBatchBuffer2ndGroupSize;
450 
451         PCODEC_ENCODER_SLCDATA slcData = m_basicFeature->m_slcData;
452         for (uint32_t startLcu = 0, slcCount = 0; slcCount < m_basicFeature->m_numSlices; slcCount++)
453         {
454             if (m_pipeline->IsFirstPass())
455             {
456                 slcData[slcCount].CmdOffset = startLcu * (m_hcpItf->GetHcpPakObjSize()) * sizeof(uint32_t);
457             }
458             m_basicFeature->m_curNumSlices = slcCount;
459 
460             ENCODE_CHK_STATUS_RETURN(SendHwSliceEncodeCommand(nullptr, slcCount, cmdBuffer));
461 
462             startLcu += m_hevcSliceParams[slcCount].NumLCUsInSlice;
463 
464             m_batchBufferForPakSlicesStartOffset = (uint32_t)m_batchBufferForPakSlices[m_basicFeature->m_currPakSliceIdx].iCurrent;
465             if (feature->IsACQPEnabled() || feature->IsBRCEnabled())
466             {
467                 // save offset for next 2nd level batch buffer usage
468                 // This is because we don't know how many times HCP_WEIGHTOFFSET_STATE & HCP_PAK_INSERT_OBJECT will be inserted for each slice
469                 // dwVdencBatchBufferPerSliceConstSize: constant size for each slice
470                 // m_vdencBatchBufferPerSliceVarSize:   variable size for each slice
471                 vdenc2ndLevelBatchBuffer->dwOffset += m_hwInterface->m_vdencBatchBufferPerSliceConstSize + m_basicFeature->m_vdencBatchBufferPerSliceVarSize[slcCount];
472             }
473 
474             m_flushCmd = waitVdenc;
475             SETPAR_AND_ADDCMD(VD_PIPELINE_FLUSH, m_vdencItf, &cmdBuffer);
476         }
477 
478         if (m_useBatchBufferForPakSlices)
479         {
480             ENCODE_CHK_STATUS_RETURN(Mhw_UnlockBb(
481                 m_osInterface,
482                 &m_batchBufferForPakSlices[m_basicFeature->m_currPakSliceIdx],
483                 m_lastTaskInPhase));
484         }
485 
486         // Insert end of sequence/stream if set
487         if (m_basicFeature->m_lastPicInSeq || m_basicFeature->m_lastPicInStream)
488         {
489             ENCODE_CHK_STATUS_RETURN(InsertSeqStreamEnd(cmdBuffer));
490         }
491 
492         ENCODE_CHK_STATUS_RETURN(EnsureAllCommandsExecuted(cmdBuffer));
493 
494         m_flushCmd = waitHevc;
495         SETPAR_AND_ADDCMD(VD_PIPELINE_FLUSH, m_vdencItf, &cmdBuffer);
496 
497         ENCODE_CHK_STATUS_RETURN(EnsureAllCommandsExecuted(cmdBuffer));
498 
499         ENCODE_CHK_STATUS_RETURN(ReadHcpStatus(m_vdboxIndex, m_statusReport, cmdBuffer));
500         // BRC PAK statistics different for each pass
501         if (feature->IsBRCEnabled())
502         {
503             uint8_t                     ucPass = (uint8_t)m_pipeline->GetCurrentPass();
504             EncodeReadBrcPakStatsParams readBrcPakStatsParams;
505             MOS_RESOURCE *              osResource = nullptr;
506             uint32_t                    offset = 0;
507             m_statusReport->GetAddress(statusReportNumberPasses, osResource, offset);
508             RUN_FEATURE_INTERFACE_RETURN(HEVCEncodeBRC, HevcFeatureIDs::hevcBrcFeature, SetReadBrcPakStatsParams, ucPass, offset, osResource, readBrcPakStatsParams);
509             ReadBrcPakStatistics(&cmdBuffer, &readBrcPakStatsParams);
510         }
511         ENCODE_CHK_STATUS_RETURN(ReadExtStatistics(cmdBuffer));
512         ENCODE_CHK_STATUS_RETURN(ReadSliceSize(cmdBuffer));
513         ENCODE_CHK_STATUS_RETURN(PrepareHWMetaData(&cmdBuffer));
514         RUN_FEATURE_INTERFACE_RETURN(VdencLplaAnalysis, HevcFeatureIDs::vdencLplaAnalysisFeature, StoreLookaheadStatistics, cmdBuffer, m_vdboxIndex);
515 
516 #if USE_CODECHAL_DEBUG_TOOL
517         uint32_t sizeInByte = 0;
518         bool     isIframe   = m_basicFeature->m_pictureCodingType == I_TYPE;
519         ENCODE_CHK_NULL_RETURN(m_packetUtilities);
520         if (m_packetUtilities->GetFakeHeaderSettings(sizeInByte, isIframe))
521         {
522             ENCODE_CHK_NULL_RETURN(m_basicFeature->m_recycleBuf);
523             ENCODE_CHK_STATUS_RETURN(m_packetUtilities->ModifyEncodedFrameSizeWithFakeHeaderSize(
524                 &cmdBuffer,
525                 sizeInByte,
526                 m_basicFeature->m_recycleBuf->GetBuffer(PakInfo, 0),
527                 0,
528                 m_basicFeature->m_recycleBuf->GetBuffer(FrameStatStreamOutBuffer, 0),
529                 sizeof(uint32_t) * 4));
530         }
531 #endif
532 
533         ENCODE_CHK_STATUS_RETURN(EndStatusReport(statusReportMfx, &cmdBuffer));
534 
535         if (Mos_Solo_Extension((MOS_CONTEXT_HANDLE)m_osInterface->pOsContext))
536         {
537             if (m_pipeline->IsLastPass() && m_pipeline->IsFirstPipe())
538             {
539                 MediaPacket::UpdateStatusReportNext(statusReportGlobalCount, &cmdBuffer);
540             }
541         }
542         else if (m_osInterface->bInlineCodecStatusUpdate
543             && !(m_basicFeature->m_422State && m_basicFeature->m_422State->GetFeature422Flag())
544             )
545         {
546             if (feature->IsBRCEnabled())
547             {
548                 ENCODE_CHK_STATUS_RETURN(UpdateStatusReport(statusReportGlobalCount, &cmdBuffer));
549             }
550             else
551             {
552                 ENCODE_CHK_STATUS_RETURN(MediaPacket::UpdateStatusReportNext(statusReportGlobalCount, &cmdBuffer));
553             }
554         }
555 
556         CODECHAL_DEBUG_TOOL(
557             if (m_mmcState) {
558                 m_mmcState->UpdateUserFeatureKey(&(m_basicFeature->m_reconSurface));
559             })
560         // Reset parameters for next PAK execution
561         if (false == m_pipeline->IsFrameTrackingEnabled() && m_pipeline->IsLastPass() && m_pipeline->IsLastPipe())
562         {
563             UpdateParameters();
564         }
565 
566         return MOS_STATUS_SUCCESS;
567     }
568 
Construct3rdLevelBatch()569     MOS_STATUS HevcVdencPkt::Construct3rdLevelBatch()
570     {
571         ENCODE_FUNC_CALL();
572 
573         MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
574 
575         // Begin patching 3rd level batch cmds
576         MOS_COMMAND_BUFFER constructedCmdBuf;
577         RUN_FEATURE_INTERFACE_RETURN(HevcEncodeTile, HevcFeatureIDs::encodeTile, BeginPatch3rdLevelBatch, constructedCmdBuf);
578 
579         SETPAR_AND_ADDCMD(VDENC_CMD1, m_vdencItf, &constructedCmdBuf);
580 
581         SETPAR_AND_ADDCMD(HCP_PIC_STATE, m_hcpItf, &constructedCmdBuf);
582 
583         SETPAR_AND_ADDCMD(VDENC_CMD2, m_vdencItf, &constructedCmdBuf);
584 
585         // set MI_BATCH_BUFFER_END command
586         ENCODE_CHK_STATUS_RETURN(m_miItf->AddMiBatchBufferEnd(&constructedCmdBuf, nullptr));
587 
588         // End patching 3rd level batch cmds
589         RUN_FEATURE_INTERFACE_RETURN(HevcEncodeTile, HevcFeatureIDs::encodeTile, EndPatch3rdLevelBatch);
590 
591         return eStatus;
592     }
593 
AddSlicesCommandsInTile(MOS_COMMAND_BUFFER & cmdBuffer)594     MOS_STATUS HevcVdencPkt::AddSlicesCommandsInTile(
595         MOS_COMMAND_BUFFER &cmdBuffer)
596     {
597         ENCODE_FUNC_CALL();
598 
599         PCODEC_ENCODER_SLCDATA         slcData = m_basicFeature->m_slcData;
600 
601         uint32_t slcCount, sliceNumInTile = 0;
602         for (slcCount = 0; slcCount < m_basicFeature->m_numSlices; slcCount++)
603         {
604             m_basicFeature->m_curNumSlices = slcCount;
605             bool sliceInTile               = false;
606             m_lastSliceInTile              = false;
607 
608             EncodeTileData curTileData = {};
609             RUN_FEATURE_INTERFACE_RETURN(HevcEncodeTile, HevcFeatureIDs::encodeTile, GetCurrentTile, curTileData);
610             RUN_FEATURE_INTERFACE_RETURN(HevcEncodeTile, HevcFeatureIDs::encodeTile, IsSliceInTile, slcCount, &curTileData, &sliceInTile, &m_lastSliceInTile);
611 
612             m_basicFeature->m_lastSliceInTile = m_lastSliceInTile;
613             if (!sliceInTile)
614             {
615                 continue;
616             }
617 
618             ENCODE_CHK_STATUS_RETURN(SendHwSliceEncodeCommand(nullptr, slcCount, cmdBuffer));
619 
620             m_flushCmd = waitHevcVdenc;
621             SETPAR_AND_ADDCMD(VD_PIPELINE_FLUSH, m_vdencItf, &cmdBuffer);
622 
623             sliceNumInTile++;
624         }  // end of slice
625 
626         if (0 == sliceNumInTile)
627         {
628             // One tile must have at least one slice
629             ENCODE_ASSERT(false);
630             return MOS_STATUS_INVALID_PARAMETER;
631         }
632 
633         uint16_t numTileRows    = 1;
634         uint16_t numTileColumns = 1;
635         RUN_FEATURE_INTERFACE_RETURN(HevcEncodeTile, HevcFeatureIDs::encodeTile, GetTileRowColumns, numTileRows, numTileColumns);
636 
637         if (sliceNumInTile > 1 && (numTileColumns > 1 || numTileRows > 1))
638         {
639             ENCODE_ASSERTMESSAGE("Multi-slices in a tile is not supported!");
640             return MOS_STATUS_INVALID_PARAMETER;
641         }
642         return MOS_STATUS_SUCCESS;
643     }
644 
AddOneTileCommands(MOS_COMMAND_BUFFER & cmdBuffer,uint32_t tileRow,uint32_t tileCol,uint32_t tileRowPass)645     MOS_STATUS HevcVdencPkt::AddOneTileCommands(
646         MOS_COMMAND_BUFFER &cmdBuffer,
647         uint32_t            tileRow,
648         uint32_t            tileCol,
649         uint32_t            tileRowPass)
650     {
651         ENCODE_FUNC_CALL();
652         PMOS_COMMAND_BUFFER tempCmdBuffer         = &cmdBuffer;
653         PMHW_BATCH_BUFFER   tileLevelBatchBuffer  = nullptr;
654         auto                eStatus               = MOS_STATUS_SUCCESS;
655         MOS_COMMAND_BUFFER constructTileBatchBuf = {};
656 
657         RUN_FEATURE_INTERFACE_RETURN(HevcEncodeTile, HevcFeatureIDs::encodeTile, SetCurrentTile, tileRow, tileCol, m_pipeline);
658 
659         if ((m_pipeline->GetPipeNum() > 1) && (tileCol != m_pipeline->GetCurrentPipe()))
660         {
661             return MOS_STATUS_SUCCESS;
662         }
663 
664         if (!m_osInterface->bUsesPatchList)
665         {
666             // Begin patching tile level batch cmds
667             RUN_FEATURE_INTERFACE_RETURN(HevcEncodeTile, HevcFeatureIDs::encodeTile, BeginPatchTileLevelBatch, tileRowPass, constructTileBatchBuf);
668 
669             // Add batch buffer start for tile
670             RUN_FEATURE_INTERFACE_RETURN(HevcEncodeTile, HevcFeatureIDs::encodeTile, GetTileLevelBatchBuffer, tileLevelBatchBuffer);
671             ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_BATCH_BUFFER_START)(&cmdBuffer, tileLevelBatchBuffer));
672 
673             tempCmdBuffer = &constructTileBatchBuf;
674             MHW_MI_MMIOREGISTERS mmioRegister;
675             if (m_vdencItf->ConvertToMiRegister(MHW_VDBOX_NODE_1, mmioRegister))
676             {
677                 HalOcaInterfaceNext::On1stLevelBBStart(
678                     *tempCmdBuffer,
679                     (MOS_CONTEXT_HANDLE)m_osInterface->pOsContext,
680                     m_osInterface->CurrentGpuContextHandle,
681                     m_miItf,
682                     mmioRegister);
683             }
684         }
685 
686         // HCP Lock for multiple pipe mode
687         if (m_pipeline->GetPipeNum() > 1)
688         {
689             auto &vdControlStateParams                = m_miItf->MHW_GETPAR_F(VD_CONTROL_STATE)();
690             vdControlStateParams                      = {};
691             vdControlStateParams.scalableModePipeLock = true;
692             ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(VD_CONTROL_STATE)(tempCmdBuffer));
693         }
694 
695         SETPAR_AND_ADDCMD(VDENC_PIPE_MODE_SELECT, m_vdencItf, tempCmdBuffer);
696 
697         // for Gen11+, we need to add MFX wait for both KIN and VRT before and after HCP Pipemode select...
698         auto &mfxWaitParams                 = m_miItf->MHW_GETPAR_F(MFX_WAIT)();
699         mfxWaitParams                       = {};
700         mfxWaitParams.iStallVdboxPipeline   = true;
701         ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MFX_WAIT)(tempCmdBuffer));
702 
703         SETPAR_AND_ADDCMD(HCP_PIPE_MODE_SELECT, m_hcpItf, tempCmdBuffer);
704 
705         // for Gen11+, we need to add MFX wait for both KIN and VRT before and after HCP Pipemode select...
706         mfxWaitParams                       = {};
707         mfxWaitParams.iStallVdboxPipeline   = true;
708         ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MFX_WAIT)(tempCmdBuffer));
709 
710         ENCODE_CHK_STATUS_RETURN(AddPicStateWithTile(*tempCmdBuffer));
711 
712         SETPAR_AND_ADDCMD(HCP_TILE_CODING, m_hcpItf, tempCmdBuffer);
713 
714         ENCODE_CHK_STATUS_RETURN(AddSlicesCommandsInTile(*tempCmdBuffer));
715 
716         //HCP unLock for multiple pipe mode
717         if (m_pipeline->GetPipeNum() > 1)
718         {
719             auto &vdControlStateParams                  = m_miItf->MHW_GETPAR_F(VD_CONTROL_STATE)();
720             vdControlStateParams                        = {};
721             vdControlStateParams.scalableModePipeUnlock = true;
722             ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(VD_CONTROL_STATE)(tempCmdBuffer));
723         }
724 
725         m_flushCmd = waitHevc;
726         SETPAR_AND_ADDCMD(VD_PIPELINE_FLUSH, m_vdencItf, tempCmdBuffer);
727 
728         ENCODE_CHK_STATUS_RETURN(EnsureAllCommandsExecuted(*tempCmdBuffer));
729 
730         if (!m_osInterface->bUsesPatchList)
731         {
732             // For 2nd level BB, we must use tileLevelBatchBuffer to prevent adding Epilogue before MI_BATCH_BUFFER_END
733             ENCODE_CHK_NULL_RETURN(tileLevelBatchBuffer);
734             tileLevelBatchBuffer->iCurrent   = tempCmdBuffer->iOffset;
735             tileLevelBatchBuffer->iRemaining = tempCmdBuffer->iRemaining;
736             ENCODE_CHK_STATUS_RETURN(m_miItf->AddMiBatchBufferEnd(nullptr, tileLevelBatchBuffer));
737             HalOcaInterfaceNext::OnSubLevelBBStart(
738                 cmdBuffer,
739                 m_osInterface->pOsContext,
740                 &tempCmdBuffer->OsResource,
741                 0,
742                 false,
743                 tempCmdBuffer->iOffset);
744             HalOcaInterfaceNext::On1stLevelBBEnd(*tempCmdBuffer, *m_osInterface);
745 
746         #if USE_CODECHAL_DEBUG_TOOL
747             if (tempCmdBuffer->pCmdPtr && tempCmdBuffer->pCmdBase &&
748                 tempCmdBuffer->pCmdPtr > tempCmdBuffer->pCmdBase)
749             {
750                 CodechalDebugInterface *debugInterface = m_pipeline->GetDebugInterface();
751                 std::string             name("TileLevelBatchBuffer");
752                 name += "Row" + std::to_string(tileRow) + "Col" + std::to_string(tileCol);
753 
754                 ENCODE_CHK_STATUS_RETURN(debugInterface->DumpData(
755                     tempCmdBuffer->pCmdBase,
756                     (uint32_t)(4 * (tempCmdBuffer->pCmdPtr - tempCmdBuffer->pCmdBase)),
757                     CodechalDbgAttr::attrCmdBufferMfx,
758                     name.c_str()));
759             }
760         #endif
761         }
762 
763         // End patching tile level batch cmds
764         RUN_FEATURE_INTERFACE_RETURN(HevcEncodeTile, HevcFeatureIDs::encodeTile, EndPatchTileLevelBatch);
765 
766         return eStatus;
767     }
768 
PatchTileLevelCommands(MOS_COMMAND_BUFFER & cmdBuffer,uint8_t packetPhase)769     MOS_STATUS HevcVdencPkt::PatchTileLevelCommands(MOS_COMMAND_BUFFER &cmdBuffer, uint8_t packetPhase)
770     {
771         ENCODE_FUNC_CALL();
772         auto eStatus = MOS_STATUS_SUCCESS;
773 
774         if (!m_hevcPicParams->tiles_enabled_flag)
775         {
776             return MOS_STATUS_INVALID_PARAMETER;
777         }
778 
779         // multi tiles cases on Liunx, 3rd level batch buffer is 2nd level.
780         ENCODE_CHK_STATUS_RETURN(Construct3rdLevelBatch());
781 
782         uint16_t numTileColumns = 1;
783         uint16_t numTileRows    = 1;
784         RUN_FEATURE_INTERFACE_RETURN(HevcEncodeTile, HevcFeatureIDs::encodeTile, GetTileRowColumns, numTileRows, numTileColumns);
785 
786         for (uint32_t tileRow = 0; tileRow < numTileRows; tileRow++)
787         {
788             uint32_t Pass = m_pipeline->GetCurrentPass();
789             for (uint32_t tileCol = 0; tileCol < numTileColumns; tileCol++)
790             {
791                 ENCODE_CHK_STATUS_RETURN(AddOneTileCommands(
792                     cmdBuffer,
793                     tileRow,
794                     tileCol,
795                     Pass));
796             }
797         }
798 
799         if(m_pipeline->IsLastPipe())
800         {
801             // increment the 3rd lvl bb to break successive frames dependency
802             RUN_FEATURE_INTERFACE_RETURN(HevcEncodeTile, HevcFeatureIDs::encodeTile, IncrementThirdLevelBatchBuffer);
803 
804             // Insert end of sequence/stream if set
805             if (m_basicFeature->m_lastPicInSeq || m_basicFeature->m_lastPicInStream)
806             {
807                 ENCODE_CHK_STATUS_RETURN(InsertSeqStreamEnd(cmdBuffer));
808             }
809         }
810 
811         // Send VD_CONTROL_STATE (Memory Implict Flush)
812         auto &vdControlStateParams               = m_miItf->MHW_GETPAR_F(VD_CONTROL_STATE)();
813         vdControlStateParams                     = {};
814         vdControlStateParams.memoryImplicitFlush = true;
815         ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(VD_CONTROL_STATE)(&cmdBuffer));
816 
817         m_flushCmd = waitHevc;
818         SETPAR_AND_ADDCMD(VD_PIPELINE_FLUSH, m_vdencItf, &cmdBuffer);
819 
820         ENCODE_CHK_STATUS_RETURN(EnsureAllCommandsExecuted(cmdBuffer));
821 
822         // read info from MMIO register in VDENC, incase pak int can't get info
823         auto feature = dynamic_cast<HEVCEncodeBRC *>(m_featureManager->GetFeature(HevcFeatureIDs::hevcBrcFeature));
824         ENCODE_CHK_NULL_RETURN(feature);
825         if (m_pipeline->GetPipeNum() <= 1 && !m_pipeline->IsSingleTaskPhaseSupported())
826         {
827             ENCODE_CHK_STATUS_RETURN(ReadHcpStatus(m_vdboxIndex, m_statusReport, cmdBuffer));
828             // BRC PAK statistics different for each pass
829             if (feature->IsBRCEnabled())
830             {
831                 uint8_t                     ucPass = (uint8_t)m_pipeline->GetCurrentPass();
832                 EncodeReadBrcPakStatsParams readBrcPakStatsParams;
833                 MOS_RESOURCE               *osResource = nullptr;
834                 uint32_t                    offset     = 0;
835                 m_statusReport->GetAddress(statusReportNumberPasses, osResource, offset);
836                 RUN_FEATURE_INTERFACE_RETURN(HEVCEncodeBRC, HevcFeatureIDs::hevcBrcFeature, SetReadBrcPakStatsParams, ucPass, offset, osResource, readBrcPakStatsParams);
837                 ReadBrcPakStatistics(&cmdBuffer, &readBrcPakStatsParams);
838             }
839         }
840 
841         // Wait all pipe cmds done for the packet
842         auto scalability = m_pipeline->GetMediaScalability();
843         ENCODE_CHK_STATUS_RETURN(scalability->SyncPipe(syncOnePipeWaitOthers, 0, &cmdBuffer));
844 
845         MediaPerfProfiler *perfProfiler = MediaPerfProfiler::Instance();
846         ENCODE_CHK_NULL_RETURN(perfProfiler);
847         ENCODE_CHK_STATUS_RETURN(perfProfiler->AddPerfCollectEndCmd(
848             (void *)m_pipeline, m_osInterface, m_miItf, &cmdBuffer));
849 
850         // post-operations are done by pak integrate pkt
851 
852         return MOS_STATUS_SUCCESS;
853     }
854 
AddPicStateWithNoTile(MOS_COMMAND_BUFFER & cmdBuffer)855     MOS_STATUS HevcVdencPkt::AddPicStateWithNoTile(
856         MOS_COMMAND_BUFFER &cmdBuffer)
857     {
858         ENCODE_FUNC_CALL();
859 
860         bool tileEnabled = false;
861         RUN_FEATURE_INTERFACE_RETURN(HevcEncodeTile, HevcFeatureIDs::encodeTile, IsEnabled, tileEnabled);
862         if (tileEnabled)
863         {
864             return MOS_STATUS_SUCCESS;
865         }
866 
867         auto brcFeature = dynamic_cast<HEVCEncodeBRC *>(m_featureManager->GetFeature(HevcFeatureIDs::hevcBrcFeature));
868         ENCODE_CHK_NULL_RETURN(brcFeature);
869         auto vdenc2ndLevelBatchBuffer      = brcFeature->GetVdenc2ndLevelBatchBuffer(m_pipeline->m_currRecycledBufIdx);
870         vdenc2ndLevelBatchBuffer->dwOffset = m_hwInterface->m_vdencBatchBuffer1stGroupSize;
871 
872         if (brcFeature->IsBRCUpdateRequired())
873         {
874             ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_BATCH_BUFFER_START(&cmdBuffer, vdenc2ndLevelBatchBuffer)));
875             HalOcaInterfaceNext::OnSubLevelBBStart(
876                 cmdBuffer,
877                 m_osInterface->pOsContext,
878                 &vdenc2ndLevelBatchBuffer->OsResource,
879                 vdenc2ndLevelBatchBuffer->dwOffset,
880                 false,
881                 m_hwInterface->m_vdencBatchBuffer2ndGroupSize);
882         }
883         // When tile is enabled, below commands are needed for each tile instead of each picture
884         else
885         {
886             SETPAR_AND_ADDCMD(VDENC_CMD1, m_vdencItf, &cmdBuffer);
887 
888             SETPAR_AND_ADDCMD(HCP_PIC_STATE, m_hcpItf, &cmdBuffer);
889 
890             SETPAR_AND_ADDCMD(VDENC_CMD2, m_vdencItf, &cmdBuffer);
891         }
892 
893         auto rdoqFeature = dynamic_cast<HevcEncodeCqp *>(m_featureManager->GetFeature(HevcFeatureIDs::hevcCqpFeature));
894         ENCODE_CHK_NULL_RETURN(rdoqFeature);
895         if (rdoqFeature->IsRDOQEnabled())
896         {
897             SETPAR_AND_ADDCMD(HEVC_VP9_RDOQ_STATE, m_hcpItf, &cmdBuffer);
898         }
899 
900         return MOS_STATUS_SUCCESS;
901     }
902 
AddPicStateWithTile(MOS_COMMAND_BUFFER & cmdBuffer)903     MOS_STATUS HevcVdencPkt::AddPicStateWithTile(
904         MOS_COMMAND_BUFFER &cmdBuffer)
905     {
906         ENCODE_FUNC_CALL();
907 
908         bool tileEnabled = false;
909         RUN_FEATURE_INTERFACE_RETURN(HevcEncodeTile, HevcFeatureIDs::encodeTile, IsEnabled, tileEnabled);
910         if (!tileEnabled)
911         {
912             return MOS_STATUS_SUCCESS;
913         }
914 
915         auto brcFeature = dynamic_cast<HEVCEncodeBRC *>(m_featureManager->GetFeature(HevcFeatureIDs::hevcBrcFeature));
916         ENCODE_CHK_NULL_RETURN(brcFeature);
917         auto vdenc2ndLevelBatchBuffer      = brcFeature->GetVdenc2ndLevelBatchBuffer(m_pipeline->m_currRecycledBufIdx);
918         vdenc2ndLevelBatchBuffer->dwOffset = m_hwInterface->m_vdencBatchBuffer1stGroupSize;
919 
920         if (brcFeature->IsBRCUpdateRequired())
921         {
922             ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_BATCH_BUFFER_START(&cmdBuffer, vdenc2ndLevelBatchBuffer)));
923             HalOcaInterfaceNext::OnSubLevelBBStart(
924                 cmdBuffer,
925                 m_osInterface->pOsContext,
926                 &vdenc2ndLevelBatchBuffer->OsResource,
927                 vdenc2ndLevelBatchBuffer->dwOffset,
928                 false,
929                 m_hwInterface->m_vdencBatchBuffer2ndGroupSize);
930         }
931         // When tile is enabled, below commands are needed for each tile instead of each picture
932         else
933         {
934             PMHW_BATCH_BUFFER thirdLevelBatchBuffer = nullptr;
935             RUN_FEATURE_INTERFACE_RETURN(HevcEncodeTile, HevcFeatureIDs::encodeTile, GetThirdLevelBatchBuffer, thirdLevelBatchBuffer);
936             ENCODE_CHK_NULL_RETURN(thirdLevelBatchBuffer);
937             ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_BATCH_BUFFER_START(&cmdBuffer, thirdLevelBatchBuffer)));
938             HalOcaInterfaceNext::OnSubLevelBBStart(
939                 cmdBuffer,
940                 m_osInterface->pOsContext,
941                 &thirdLevelBatchBuffer->OsResource,
942                 thirdLevelBatchBuffer->dwOffset,
943                 false,
944                 m_hwInterface->m_vdencBatchBuffer2ndGroupSize);
945         }
946 
947         // Send HEVC_VP9_RDOQ_STATE command
948         SETPAR_AND_ADDCMD(HEVC_VP9_RDOQ_STATE, m_hcpItf, &cmdBuffer);
949 
950         return MOS_STATUS_SUCCESS;
951     }
952 
UpdateParameters()953     void HevcVdencPkt::UpdateParameters()
954     {
955         ENCODE_FUNC_CALL();
956 
957         if (!m_pipeline->IsSingleTaskPhaseSupported())
958         {
959             m_osInterface->pfnResetPerfBufferID(m_osInterface);
960         }
961 
962         m_basicFeature->m_currPakSliceIdx = (m_basicFeature->m_currPakSliceIdx + 1) % m_basicFeature->m_codecHalHevcNumPakSliceBatchBuffers;
963     }
964 
UpdateStatusReport(uint32_t srType,MOS_COMMAND_BUFFER * cmdBuffer)965     MOS_STATUS HevcVdencPkt::UpdateStatusReport(uint32_t srType, MOS_COMMAND_BUFFER *cmdBuffer)
966     {
967         ENCODE_FUNC_CALL();
968         ENCODE_CHK_NULL_RETURN(cmdBuffer);
969 
970         //initialize following
971         MOS_RESOURCE *osResourceInline = nullptr;
972         uint32_t      offsetInline     = 0;
973         m_statusReport->GetAddress(statusReportGlobalCount, osResourceInline, offsetInline);
974         offsetInline             = m_atomicScratchBuf.operandSetSize * m_atomicScratchBuf.encodeUpdateIndex;
975         uint32_t zeroValueOffset = offsetInline;
976         uint32_t operand1Offset  = offsetInline + m_atomicScratchBuf.operand1Offset;
977         uint32_t operand2Offset  = offsetInline + m_atomicScratchBuf.operand2Offset;
978         uint32_t operand3Offset  = offsetInline + m_atomicScratchBuf.operand3Offset;
979         auto     mmioRegisters   = m_hwInterface->GetVdencInterfaceNext()->GetMmioRegisters(m_vdboxIndex);
980 
981         // Make Flush DW call to make sure all previous work is done
982         auto &flushDwParams = m_miItf->MHW_GETPAR_F(MI_FLUSH_DW)();
983         flushDwParams       = {};
984         ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_FLUSH_DW)(cmdBuffer));
985 
986         // n1_lo = 0x00
987         auto &storeDataParams            = m_miItf->MHW_GETPAR_F(MI_STORE_DATA_IMM)();
988         storeDataParams                  = {};
989         storeDataParams.pOsResource      = m_atomicScratchBuf.resAtomicScratchBuffer;
990         storeDataParams.dwResourceOffset = operand1Offset;
991         storeDataParams.dwValue          = 0x00;
992         ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_STORE_DATA_IMM)(cmdBuffer));
993 
994         // n2_lo = dwImageStatusMask
995         auto &copyMemMemParams       = m_miItf->MHW_GETPAR_F(MI_COPY_MEM_MEM)();
996         copyMemMemParams             = {};
997         copyMemMemParams.presSrc     = m_basicFeature->m_recycleBuf->GetBuffer(VdencBrcPakMmioBuffer, 0);
998         copyMemMemParams.dwSrcOffset = (sizeof(uint32_t) * 1);
999         copyMemMemParams.presDst     = m_atomicScratchBuf.resAtomicScratchBuffer;
1000         copyMemMemParams.dwDstOffset = operand2Offset;
1001         ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_COPY_MEM_MEM)(cmdBuffer));
1002 
1003         // VCS_GPR0_Lo = ImageStatusCtrl
1004         auto &registerMemParams           = m_miItf->MHW_GETPAR_F(MI_LOAD_REGISTER_MEM)();
1005         registerMemParams                 = {};
1006         registerMemParams.presStoreBuffer = m_basicFeature->m_recycleBuf->GetBuffer(VdencBrcPakMmioBuffer, 0);
1007         registerMemParams.dwOffset        = (sizeof(uint32_t) * 0);
1008         registerMemParams.dwRegister      = mmioRegisters->generalPurposeRegister0LoOffset;  // VCS_GPR0_Lo
1009         ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_LOAD_REGISTER_MEM)(cmdBuffer));
1010 
1011         // Reset GPR4_Lo
1012         registerMemParams                 = {};
1013         registerMemParams.presStoreBuffer = m_atomicScratchBuf.resAtomicScratchBuffer;
1014         registerMemParams.dwOffset        = zeroValueOffset;                                 //Offset 0, has value of 0.
1015         registerMemParams.dwRegister      = mmioRegisters->generalPurposeRegister4LoOffset;  // VCS_GPR4
1016         ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_LOAD_REGISTER_MEM)(cmdBuffer));
1017 
1018         // Make Flush DW call to make sure all previous work is done
1019         ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_FLUSH_DW)(cmdBuffer));
1020 
1021         // step-1: n2_lo = n2_lo & VCS_GPR0_Lo = dwImageStatusMask & ImageStatusCtrl
1022         auto &atomicParams            = m_miItf->MHW_GETPAR_F(MI_ATOMIC)();
1023         atomicParams                  = {};
1024         atomicParams.pOsResource      = m_atomicScratchBuf.resAtomicScratchBuffer;
1025         atomicParams.dwResourceOffset = operand2Offset;
1026         atomicParams.dwDataSize       = sizeof(uint32_t);
1027         atomicParams.Operation        = mhw::mi::MHW_MI_ATOMIC_AND;
1028         ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_ATOMIC)(cmdBuffer));
1029 
1030         // n3_lo = 0
1031         storeDataParams                  = {};
1032         storeDataParams.pOsResource      = m_atomicScratchBuf.resAtomicScratchBuffer;
1033         storeDataParams.dwResourceOffset = operand3Offset;
1034         storeDataParams.dwValue          = 0;
1035         ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_STORE_DATA_IMM)(cmdBuffer));
1036 
1037         // Make Flush DW call to make sure all previous work is done
1038         ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_FLUSH_DW)(cmdBuffer));
1039 
1040         // GPR0_lo = n1_lo = 0
1041         registerMemParams                 = {};
1042         registerMemParams.presStoreBuffer = m_atomicScratchBuf.resAtomicScratchBuffer;
1043         registerMemParams.dwOffset        = operand1Offset;
1044         registerMemParams.dwRegister      = mmioRegisters->generalPurposeRegister0LoOffset;  // VCS_GPR0
1045         ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_LOAD_REGISTER_MEM)(cmdBuffer));
1046 
1047         // Reset GPR4_Lo
1048         registerMemParams                 = {};
1049         registerMemParams.presStoreBuffer = m_atomicScratchBuf.resAtomicScratchBuffer;
1050         registerMemParams.dwOffset        = zeroValueOffset;                                 //Offset 0, has value of 0.
1051         registerMemParams.dwRegister      = mmioRegisters->generalPurposeRegister4LoOffset;  // VCS_GPR4
1052         ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_LOAD_REGISTER_MEM)(cmdBuffer));
1053 
1054         // Make Flush DW call to make sure all previous work is done
1055         ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_FLUSH_DW)(cmdBuffer));
1056 
1057         // step-2: n2_lo == n1_lo ? 0 : n2_lo
1058         // compare n1 vs n2. i.e. GRP0 vs. memory of operand2
1059         atomicParams                  = {};
1060         atomicParams.pOsResource      = m_atomicScratchBuf.resAtomicScratchBuffer;
1061         atomicParams.dwResourceOffset = operand2Offset;
1062         atomicParams.dwDataSize       = sizeof(uint32_t);
1063         atomicParams.Operation        = mhw::mi::MHW_MI_ATOMIC_CMP;
1064         atomicParams.bReturnData      = true;
1065         ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_ATOMIC)(cmdBuffer));
1066 
1067         // n2_hi = 1
1068         storeDataParams                  = {};
1069         storeDataParams.pOsResource      = m_atomicScratchBuf.resAtomicScratchBuffer;
1070         storeDataParams.dwResourceOffset = operand2Offset + sizeof(uint32_t);
1071         storeDataParams.dwValue          = 1;
1072         ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_STORE_DATA_IMM)(cmdBuffer));
1073 
1074         // n3_hi = 1
1075         storeDataParams                  = {};
1076         storeDataParams.pOsResource      = m_atomicScratchBuf.resAtomicScratchBuffer;
1077         storeDataParams.dwResourceOffset = operand3Offset + sizeof(uint32_t);
1078         storeDataParams.dwValue          = 1;
1079         ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_STORE_DATA_IMM)(cmdBuffer));
1080 
1081         // VCS_GPR0_Lo = n3_lo = 0
1082         registerMemParams                 = {};
1083         registerMemParams.presStoreBuffer = m_atomicScratchBuf.resAtomicScratchBuffer;
1084         registerMemParams.dwOffset        = operand3Offset;
1085         registerMemParams.dwRegister      = mmioRegisters->generalPurposeRegister0LoOffset;  // VCS_GPR0_Lo
1086         ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_LOAD_REGISTER_MEM)(cmdBuffer));
1087 
1088         // GPR0_Hi = n2_hi = 1
1089         registerMemParams                 = {};
1090         registerMemParams.presStoreBuffer = m_atomicScratchBuf.resAtomicScratchBuffer;
1091         registerMemParams.dwOffset        = operand2Offset + sizeof(uint32_t);               // update 1
1092         registerMemParams.dwRegister      = mmioRegisters->generalPurposeRegister0HiOffset;  // VCS_GPR0_Hi
1093         ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_LOAD_REGISTER_MEM)(cmdBuffer));
1094 
1095         // Reset GPR4_Lo and GPR4_Hi
1096         registerMemParams                 = {};
1097         registerMemParams.presStoreBuffer = m_atomicScratchBuf.resAtomicScratchBuffer;
1098         registerMemParams.dwOffset        = zeroValueOffset;
1099         registerMemParams.dwRegister      = mmioRegisters->generalPurposeRegister4LoOffset;  // VCS_GPR4_Hi
1100         ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_LOAD_REGISTER_MEM)(cmdBuffer));
1101 
1102         registerMemParams                 = {};
1103         registerMemParams.presStoreBuffer = m_atomicScratchBuf.resAtomicScratchBuffer;
1104         registerMemParams.dwOffset        = zeroValueOffset;
1105         registerMemParams.dwRegister      = mmioRegisters->generalPurposeRegister4HiOffset;  // VCS_GPR4_Hi
1106         ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_LOAD_REGISTER_MEM)(cmdBuffer));
1107 
1108         // Make Flush DW call to make sure all previous work is done
1109         ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_FLUSH_DW)(cmdBuffer));
1110 
1111         // step-3: n2 = (n2 == 0:1) ? 0:0 : n2      // uint64_t CMP
1112         // If n2==0 (Lo) and 1 (Hi), covert n2 to 0 (Lo)and 0 (Hi), else no change.
1113         // n2 == 0:1 means encoding completsion. the n2 memory will be updated with 0:0, otherwise, no change.
1114         atomicParams                  = {};
1115         atomicParams.pOsResource      = m_atomicScratchBuf.resAtomicScratchBuffer;
1116         atomicParams.dwResourceOffset = operand2Offset;
1117         atomicParams.dwDataSize       = sizeof(uint64_t);
1118         atomicParams.Operation        = mhw::mi::MHW_MI_ATOMIC_CMP;
1119         atomicParams.bReturnData      = true;
1120         ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_ATOMIC)(cmdBuffer));
1121 
1122         // Make Flush DW call to make sure all previous work is done
1123         ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_FLUSH_DW)(cmdBuffer));
1124 
1125         // VCS_GPR0_Lo = n3_hi = 1
1126         registerMemParams                 = {};
1127         registerMemParams.presStoreBuffer = m_atomicScratchBuf.resAtomicScratchBuffer;
1128         registerMemParams.dwOffset        = operand3Offset + sizeof(uint32_t);
1129         registerMemParams.dwRegister      = mmioRegisters->generalPurposeRegister0LoOffset;  // VCS_GPR0_Lo
1130         ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_LOAD_REGISTER_MEM)(cmdBuffer));
1131 
1132         // Make Flush DW call to make sure all previous work is done
1133         ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_FLUSH_DW)(cmdBuffer));
1134 
1135         // step-4: n2_hi = n2_hi ^ VCS_GPR0_Lo = n2_hi ^ n3_hi
1136         atomicParams                  = {};
1137         atomicParams.pOsResource      = m_atomicScratchBuf.resAtomicScratchBuffer;
1138         atomicParams.dwResourceOffset = operand2Offset + sizeof(uint32_t);
1139         atomicParams.dwDataSize       = sizeof(uint32_t);
1140         atomicParams.Operation        = mhw::mi::MHW_MI_ATOMIC_XOR;
1141         ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_ATOMIC)(cmdBuffer));
1142 
1143         // VCS_GPR0_Lo = n2_hi
1144         registerMemParams                 = {};
1145         registerMemParams.presStoreBuffer = m_atomicScratchBuf.resAtomicScratchBuffer;
1146         registerMemParams.dwOffset        = operand2Offset + sizeof(uint32_t);
1147         registerMemParams.dwRegister      = mmioRegisters->generalPurposeRegister0LoOffset;  // VCS_GPR0_Lo
1148         ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_LOAD_REGISTER_MEM)(cmdBuffer));
1149 
1150         // step-5: m_storeData = m_storeData + VCS_GPR0_Lo = m_storeData + n2_hi
1151         // if not completed n2_hi should be 0, then m_storeData = m_storeData + 0
1152         // if completed, n2_hi should be 1, then m_storeData = m_storeData + 1
1153         auto &miLoadRegMemParams           = m_miItf->MHW_GETPAR_F(MI_LOAD_REGISTER_MEM)();
1154         miLoadRegMemParams                 = {};
1155         miLoadRegMemParams.presStoreBuffer = osResourceInline;
1156         miLoadRegMemParams.dwOffset        = 0;
1157         miLoadRegMemParams.dwRegister      = mmioRegisters->generalPurposeRegister4LoOffset;
1158         ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_LOAD_REGISTER_MEM)(cmdBuffer));
1159 
1160         mhw::mi::MHW_MI_ALU_PARAMS aluParams[4] = { 0 };
1161 
1162         int aluCount = 0;
1163 
1164         //load1 srca, reg1
1165         aluParams[aluCount].AluOpcode = MHW_MI_ALU_LOAD;
1166         aluParams[aluCount].Operand1  = MHW_MI_ALU_SRCA;
1167         aluParams[aluCount].Operand2  = MHW_MI_ALU_GPREG0;
1168         ++aluCount;
1169         //load srcb, reg2
1170         aluParams[aluCount].AluOpcode = MHW_MI_ALU_LOAD;
1171         aluParams[aluCount].Operand1  = MHW_MI_ALU_SRCB;
1172         aluParams[aluCount].Operand2  = MHW_MI_ALU_GPREG4;
1173         ++aluCount;
1174         //add
1175         aluParams[aluCount].AluOpcode = MHW_MI_ALU_ADD;
1176         ++aluCount;
1177         //store reg1, accu
1178         aluParams[aluCount].AluOpcode = MHW_MI_ALU_STORE;
1179         aluParams[aluCount].Operand1  = MHW_MI_ALU_GPREG0;
1180         aluParams[aluCount].Operand2  = MHW_MI_ALU_ACCU;
1181         ++aluCount;
1182 
1183         auto &miMathParams          = m_miItf->MHW_GETPAR_F(MI_MATH)();
1184         miMathParams                = {};
1185         miMathParams.dwNumAluParams = aluCount;
1186         miMathParams.pAluPayload    = aluParams;
1187         ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_MATH)(cmdBuffer));
1188 
1189         auto &miStoreRegMemParams           = m_miItf->MHW_GETPAR_F(MI_STORE_REGISTER_MEM)();
1190         miStoreRegMemParams                 = {};
1191         miStoreRegMemParams.presStoreBuffer = osResourceInline;
1192         miStoreRegMemParams.dwOffset        = 0;
1193         miStoreRegMemParams.dwRegister      = mmioRegisters->generalPurposeRegister0LoOffset;
1194         ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_STORE_REGISTER_MEM)(cmdBuffer));
1195 
1196         // Make Flush DW call to make sure all previous work is done
1197         ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_FLUSH_DW)(cmdBuffer));
1198 
1199         return MOS_STATUS_SUCCESS;
1200     }
1201 
EnsureAllCommandsExecuted(MOS_COMMAND_BUFFER & cmdBuffer)1202     MOS_STATUS HevcVdencPkt::EnsureAllCommandsExecuted(MOS_COMMAND_BUFFER &cmdBuffer)
1203     {
1204         ENCODE_FUNC_CALL();
1205 
1206         // Send MI_FLUSH command
1207         auto &flushDwParams                         = m_miItf->MHW_GETPAR_F(MI_FLUSH_DW)();
1208         flushDwParams                               = {};
1209         flushDwParams.bVideoPipelineCacheInvalidate = true;
1210         ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_FLUSH_DW)(&cmdBuffer));
1211 
1212         return MOS_STATUS_SUCCESS;
1213     }
1214 
InsertSeqStreamEnd(MOS_COMMAND_BUFFER & cmdBuffer)1215     MOS_STATUS HevcVdencPkt::InsertSeqStreamEnd(MOS_COMMAND_BUFFER &cmdBuffer)
1216     {
1217         ENCODE_FUNC_CALL();
1218 
1219         ENCODE_CHK_STATUS_RETURN(AddAllCmds_HCP_PAK_INSERT_OBJECT(&cmdBuffer));
1220 
1221         return MOS_STATUS_SUCCESS;
1222     }
1223 
AddPictureVdencCommands(MOS_COMMAND_BUFFER & cmdBuffer)1224     MOS_STATUS HevcVdencPkt::AddPictureVdencCommands(MOS_COMMAND_BUFFER & cmdBuffer)
1225     {
1226         ENCODE_FUNC_CALL();
1227 
1228         SETPAR_AND_ADDCMD(VDENC_PIPE_MODE_SELECT, m_vdencItf, &cmdBuffer);
1229         SETPAR_AND_ADDCMD(VDENC_SRC_SURFACE_STATE, m_vdencItf, &cmdBuffer);
1230         SETPAR_AND_ADDCMD(VDENC_REF_SURFACE_STATE, m_vdencItf, &cmdBuffer);
1231         SETPAR_AND_ADDCMD(VDENC_DS_REF_SURFACE_STATE, m_vdencItf, &cmdBuffer);
1232         SETPAR_AND_ADDCMD(VDENC_PIPE_BUF_ADDR_STATE, m_vdencItf, &cmdBuffer);
1233 
1234         return MOS_STATUS_SUCCESS;
1235     }
1236 
AddPictureHcpCommands(MOS_COMMAND_BUFFER & cmdBuffer)1237     MOS_STATUS HevcVdencPkt::AddPictureHcpCommands(
1238         MOS_COMMAND_BUFFER &cmdBuffer)
1239     {
1240         ENCODE_FUNC_CALL();
1241 
1242         ENCODE_CHK_STATUS_RETURN(AddHcpPipeModeSelect(cmdBuffer));
1243 
1244         ENCODE_CHK_STATUS_RETURN(AddAllCmds_HCP_SURFACE_STATE(&cmdBuffer));
1245 
1246         SETPAR_AND_ADDCMD(HCP_PIPE_BUF_ADDR_STATE, m_hcpItf, &cmdBuffer);
1247 
1248         SETPAR_AND_ADDCMD(HCP_IND_OBJ_BASE_ADDR_STATE, m_hcpItf, &cmdBuffer);
1249 
1250         ENCODE_CHK_STATUS_RETURN(AddAllCmds_HCP_FQM_STATE(&cmdBuffer));
1251         ENCODE_CHK_STATUS_RETURN(AddAllCmds_HCP_QM_STATE(&cmdBuffer));
1252 
1253         return MOS_STATUS_SUCCESS;
1254     }
1255 
AddHcpPipeModeSelect(MOS_COMMAND_BUFFER & cmdBuffer)1256     MOS_STATUS HevcVdencPkt::AddHcpPipeModeSelect(
1257         MOS_COMMAND_BUFFER &cmdBuffer)
1258     {
1259         ENCODE_FUNC_CALL();
1260 
1261         SETPAR_AND_ADDCMD(VDENC_CONTROL_STATE, m_vdencItf, &cmdBuffer);
1262 
1263         auto &vdControlStateParams          = m_miItf->MHW_GETPAR_F(VD_CONTROL_STATE)();
1264         vdControlStateParams                = {};
1265         vdControlStateParams.initialization = true;
1266         ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(VD_CONTROL_STATE)(&cmdBuffer));
1267 
1268         // for Gen11+, we need to add MFX wait for both KIN and VRT before and after HCP Pipemode select...
1269         auto &mfxWaitParams                 = m_miItf->MHW_GETPAR_F(MFX_WAIT)();
1270         mfxWaitParams                       = {};
1271         mfxWaitParams.iStallVdboxPipeline   = true;
1272         ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MFX_WAIT)(&cmdBuffer));
1273 
1274         SETPAR_AND_ADDCMD(HCP_PIPE_MODE_SELECT, m_hcpItf, &cmdBuffer);
1275 
1276         mfxWaitParams                       = {};
1277         mfxWaitParams.iStallVdboxPipeline   = true;
1278         ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MFX_WAIT)(&cmdBuffer));
1279 
1280         return MOS_STATUS_SUCCESS;
1281     }
1282 
CalculatePictureStateCommandSize()1283     MOS_STATUS HevcVdencPkt::CalculatePictureStateCommandSize()
1284     {
1285         ENCODE_FUNC_CALL();
1286 
1287         uint32_t hcpCommandsSize  = 0;
1288         uint32_t hcpPatchListSize = 0;
1289         uint32_t cpCmdsize        = 0;
1290         uint32_t cpPatchListSize  = 0;
1291         uint32_t hucCommandsSize = 0;
1292         uint32_t hucPatchListSize = 0;
1293 
1294         MHW_VDBOX_STATE_CMDSIZE_PARAMS stateCmdSizeParams;
1295 
1296         hcpCommandsSize =
1297             m_vdencItf->MHW_GETSIZE_F(VD_PIPELINE_FLUSH)() +
1298             m_miItf->MHW_GETSIZE_F(MI_FLUSH_DW)() +
1299             m_hcpItf->MHW_GETSIZE_F(HCP_PIPE_MODE_SELECT)() +
1300             m_hcpItf->MHW_GETSIZE_F(HCP_SURFACE_STATE)() +
1301             m_hcpItf->MHW_GETSIZE_F(HCP_PIPE_BUF_ADDR_STATE)() +
1302             m_hcpItf->MHW_GETSIZE_F(HCP_IND_OBJ_BASE_ADDR_STATE)() +
1303             m_miItf->MHW_GETSIZE_F(MI_LOAD_REGISTER_REG)() * 8;
1304 
1305         hcpPatchListSize =
1306             PATCH_LIST_COMMAND(mhw::vdbox::hcp::Itf::VD_PIPELINE_FLUSH_CMD) +
1307             PATCH_LIST_COMMAND(mhw::vdbox::hcp::Itf::MI_FLUSH_DW_CMD) +
1308             PATCH_LIST_COMMAND(mhw::vdbox::hcp::Itf::HCP_PIPE_MODE_SELECT_CMD) +
1309             PATCH_LIST_COMMAND(mhw::vdbox::hcp::Itf::HCP_SURFACE_STATE_CMD) +
1310             PATCH_LIST_COMMAND(mhw::vdbox::hcp::Itf::HCP_PIPE_BUF_ADDR_STATE_CMD) +
1311             PATCH_LIST_COMMAND(mhw::vdbox::hcp::Itf::HCP_IND_OBJ_BASE_ADDR_STATE_CMD);
1312 
1313         // HCP_QM_STATE_CMD may be issued up to 20 times: 3x Colour Component plus 2x intra/inter plus 4x SizeID minus 4 for the 32x32 chroma components.
1314         // HCP_FQP_STATE_CMD may be issued up to 8 times: 4 scaling list per intra and inter.
1315         hcpCommandsSize +=
1316             2 * m_miItf->MHW_GETSIZE_F(VD_CONTROL_STATE)() +
1317             m_hcpItf->MHW_GETSIZE_F(HCP_SURFACE_STATE)() +  // encoder needs two surface state commands. One is for raw and another one is for recon surfaces.
1318             20 * m_hcpItf->MHW_GETSIZE_F(HCP_QM_STATE)() +
1319             8 * m_hcpItf->MHW_GETSIZE_F(HCP_FQM_STATE)() +
1320             m_hcpItf->MHW_GETSIZE_F(HCP_PIC_STATE)() +
1321             m_hcpItf->MHW_GETSIZE_F(HEVC_VP9_RDOQ_STATE)() +        // RDOQ
1322             2 * m_miItf->MHW_GETSIZE_F(MI_STORE_DATA_IMM)() +       // Slice level commands
1323             2 * m_miItf->MHW_GETSIZE_F(MI_FLUSH_DW)() +             // need for Status report, Mfc Status and
1324             10 * m_miItf->MHW_GETSIZE_F(MI_STORE_REGISTER_MEM)() +  // 8 for BRCStatistics and 2 for RC6 WAs
1325             m_miItf->MHW_GETSIZE_F(MI_LOAD_REGISTER_MEM)() +        // 1 for RC6 WA
1326             2 * m_hcpItf->MHW_GETSIZE_F(HCP_PAK_INSERT_OBJECT)() +  // Two PAK insert object commands are for headers before the slice header and the header for the end of stream
1327             4 * m_miItf->MHW_GETSIZE_F(MI_STORE_DATA_IMM)() +       // two (BRC+reference frame) for clean-up HW semaphore memory and another two for signal it
1328             17 * m_miItf->MHW_GETSIZE_F(MI_SEMAPHORE_WAIT)() +      // Use HW wait command for each reference and one wait for current semaphore object
1329             m_miItf->MHW_GETSIZE_F(MI_SEMAPHORE_WAIT)() +           // Use HW wait command for each BRC pass
1330             +m_miItf->MHW_GETSIZE_F(MI_SEMAPHORE_WAIT)()            // Use HW wait command for each VDBOX
1331             + 2 * m_miItf->MHW_GETSIZE_F(MI_STORE_DATA_IMM)()       // One is for reset and another one for set per VDBOX
1332             + 8 * m_miItf->MHW_GETSIZE_F(MI_COPY_MEM_MEM)()         // Need to copy SSE statistics/ Slice Size overflow into memory
1333             ;
1334 
1335         hcpPatchListSize +=
1336             20 * PATCH_LIST_COMMAND(mhw::vdbox::hcp::Itf::HCP_QM_STATE_CMD) +
1337             8 * PATCH_LIST_COMMAND(mhw::vdbox::hcp::Itf::HCP_FQM_STATE_CMD) +
1338             PATCH_LIST_COMMAND(mhw::vdbox::hcp::Itf::HCP_PIC_STATE_CMD) +
1339             PATCH_LIST_COMMAND(mhw::vdbox::hcp::Itf::MI_BATCH_BUFFER_START_CMD) +       // When BRC is on, HCP_PIC_STATE_CMD command is in the BB
1340             2 * PATCH_LIST_COMMAND(mhw::vdbox::hcp::Itf::MI_STORE_DATA_IMM_CMD) +       // Slice level commands
1341             2 * PATCH_LIST_COMMAND(mhw::vdbox::hcp::Itf::MI_FLUSH_DW_CMD) +             // need for Status report, Mfc Status and
1342             11 * PATCH_LIST_COMMAND(mhw::vdbox::hcp::Itf::MI_STORE_REGISTER_MEM_CMD) +  // 8 for BRCStatistics and 3 for RC6 WAs
1343             22 * PATCH_LIST_COMMAND(mhw::vdbox::hcp::Itf::MI_STORE_DATA_IMM_CMD)        // Use HW wait commands plus its memory clean-up and signal (4+ 16 + 1 + 1)
1344             + 8 * PATCH_LIST_COMMAND(mhw::vdbox::hcp::Itf::MI_BATCH_BUFFER_START_CMD)   // At maximal, there are 8 batch buffers for 8 VDBOXes for VE. Each box has one BB.
1345             + PATCH_LIST_COMMAND(mhw::vdbox::hcp::Itf::MI_FLUSH_DW_CMD)                 // Need one flush before copy command
1346             + PATCH_LIST_COMMAND(mhw::vdbox::hcp::Itf::MFX_WAIT_CMD)                    // Need one wait after copy command
1347             + 3 * PATCH_LIST_COMMAND(mhw::vdbox::hcp::Itf::MI_STORE_DATA_IMM_CMD)       // one wait commands and two for reset and set semaphore memory
1348             + 8 * PATCH_LIST_COMMAND(mhw::vdbox::hcp::Itf::MI_COPY_MEM_MEM_CMD)         // Need to copy SSE statistics/ Slice Size overflow into memory
1349             ;
1350 
1351         auto cpInterface = m_hwInterface->GetCpInterface();
1352         cpInterface->GetCpStateLevelCmdSize(cpCmdsize, cpPatchListSize);
1353 
1354         ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetHucStateCommandSize(
1355             m_basicFeature->m_mode, (uint32_t *)&hucCommandsSize, (uint32_t *)&hucPatchListSize, &stateCmdSizeParams));
1356 
1357         m_defaultPictureStatesSize    = hcpCommandsSize + hucCommandsSize + (uint32_t)cpCmdsize;
1358         m_defaultPicturePatchListSize = hcpPatchListSize + hucPatchListSize + (uint32_t)cpPatchListSize;
1359 
1360         return MOS_STATUS_SUCCESS;
1361     }
1362 
SendHwSliceEncodeCommand(const PCODEC_ENCODER_SLCDATA slcData,const uint32_t currSlcIdx,MOS_COMMAND_BUFFER & cmdBuffer)1363     MOS_STATUS HevcVdencPkt::SendHwSliceEncodeCommand(const PCODEC_ENCODER_SLCDATA slcData, const uint32_t currSlcIdx, MOS_COMMAND_BUFFER &cmdBuffer)
1364     {
1365         MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1366 
1367         ENCODE_FUNC_CALL();
1368 
1369         // VDENC does not use batch buffer for slice state
1370         // add HCP_REF_IDX command
1371         ENCODE_CHK_STATUS_RETURN(AddAllCmds_HCP_REF_IDX_STATE(&cmdBuffer));
1372 
1373         bool              vdencHucInUse    = false;
1374         PMHW_BATCH_BUFFER vdencBatchBuffer = nullptr;
1375 
1376         RUN_FEATURE_INTERFACE_RETURN(HEVCEncodeBRC, HevcFeatureIDs::hevcBrcFeature, SetVdencBatchBufferState, m_pipeline->m_currRecycledBufIdx, currSlcIdx, vdencBatchBuffer, vdencHucInUse);
1377 
1378         if (vdencHucInUse)
1379         {
1380             // 2nd level batch buffer
1381             PMHW_BATCH_BUFFER secondLevelBatchBufferUsed = vdencBatchBuffer;
1382             ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_BATCH_BUFFER_START(&cmdBuffer, secondLevelBatchBufferUsed)));
1383             HalOcaInterfaceNext::OnSubLevelBBStart(
1384                 cmdBuffer,
1385                 m_osInterface->pOsContext,
1386                 &secondLevelBatchBufferUsed->OsResource,
1387                 secondLevelBatchBufferUsed->dwOffset,
1388                 false,
1389                 m_basicFeature->m_vdencBatchBufferPerSlicePart2Start[currSlcIdx] - secondLevelBatchBufferUsed->dwOffset);
1390             ENCODE_CHK_STATUS_RETURN(AddAllCmds_HCP_PAK_INSERT_OBJECT_BRC(&cmdBuffer));
1391             secondLevelBatchBufferUsed->dwOffset = m_basicFeature->m_vdencBatchBufferPerSlicePart2Start[currSlcIdx];
1392             ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_BATCH_BUFFER_START(&cmdBuffer, secondLevelBatchBufferUsed)));
1393             HalOcaInterfaceNext::OnSubLevelBBStart(
1394                 cmdBuffer,
1395                 m_osInterface->pOsContext,
1396                 &secondLevelBatchBufferUsed->OsResource,
1397                 secondLevelBatchBufferUsed->dwOffset,
1398                 false,
1399                 m_basicFeature->m_vdencBatchBufferPerSlicePart2Size[currSlcIdx]);
1400         }
1401         else
1402         {
1403             // Weighted Prediction
1404             // This slice level command is issued, if the weighted_pred_flag or weighted_bipred_flag equals one.
1405             // If zero, then this command is not issued.
1406             ENCODE_CHK_STATUS_RETURN(AddAllCmds_HCP_WEIGHTOFFSET_STATE(&cmdBuffer));
1407 
1408             m_basicFeature->m_useDefaultRoundingForHcpSliceState = false;
1409             SETPAR_AND_ADDCMD(HCP_SLICE_STATE, m_hcpItf, &cmdBuffer);
1410 
1411             // add HCP_PAK_INSERT_OBJECTS command
1412             ENCODE_CHK_STATUS_RETURN(AddAllCmds_HCP_PAK_INSERT_OBJECT(&cmdBuffer));
1413 
1414             SETPAR_AND_ADDCMD(VDENC_WEIGHTSOFFSETS_STATE, m_vdencItf, &cmdBuffer);
1415         }
1416         SETPAR_AND_ADDCMD(VDENC_HEVC_VP9_TILE_SLICE_STATE, m_vdencItf, &cmdBuffer);
1417         SETPAR_AND_ADDCMD(VDENC_WALKER_STATE, m_vdencItf, &cmdBuffer);
1418         return eStatus;
1419     }
1420 
AddAllCmds_HCP_PAK_INSERT_OBJECT_BRC(PMOS_COMMAND_BUFFER cmdBuffer) const1421 MOS_STATUS HevcVdencPkt::AddAllCmds_HCP_PAK_INSERT_OBJECT_BRC(PMOS_COMMAND_BUFFER cmdBuffer) const
1422 {
1423     ENCODE_FUNC_CALL();
1424 
1425     ENCODE_CHK_NULL_RETURN(cmdBuffer);
1426 
1427     auto &params = m_hcpItf->MHW_GETPAR_F(HCP_PAK_INSERT_OBJECT)();
1428     params       = {};
1429 
1430     PCODECHAL_NAL_UNIT_PARAMS *ppNalUnitParams = (CODECHAL_NAL_UNIT_PARAMS **)m_nalUnitParams;
1431 
1432     auto brcFeature = dynamic_cast<HEVCEncodeBRC *>(m_featureManager->GetFeature(HevcFeatureIDs::hevcBrcFeature));
1433     ENCODE_CHK_NULL_RETURN(brcFeature);
1434 
1435     PBSBuffer pBsBuffer = &(m_basicFeature->m_bsBuffer);
1436     uint32_t  bitSize   = 0;
1437     uint32_t  offSet    = 0;
1438 
1439     //insert AU, SPS, PSP headers before first slice header
1440     if (m_basicFeature->m_curNumSlices == 0)
1441     {
1442         uint32_t maxBytesInPakInsertObjCmd = ((2 << 11) - 1) * 4;  // 12 bits for Length field in PAK_INSERT_OBJ cmd
1443 
1444         for (auto i = 0; i < HEVC_MAX_NAL_UNIT_TYPE; i++)
1445         {
1446             uint32_t nalunitPosiSize   = ppNalUnitParams[i]->uiSize;
1447             uint32_t nalunitPosiOffset = ppNalUnitParams[i]->uiOffset;
1448 
1449             while (nalunitPosiSize > 0)
1450             {
1451                 bitSize = MOS_MIN(maxBytesInPakInsertObjCmd * 8, nalunitPosiSize * 8);
1452                 offSet  = nalunitPosiOffset;
1453 
1454                 params = {};
1455 
1456                 params.dwPadding                 = (MOS_ALIGN_CEIL((bitSize + 7) >> 3, sizeof(uint32_t))) / sizeof(uint32_t);
1457                 params.bEmulationByteBitsInsert  = ppNalUnitParams[i]->bInsertEmulationBytes;
1458                 params.uiSkipEmulationCheckCount = ppNalUnitParams[i]->uiSkipEmulationCheckCount;
1459                 params.dataBitsInLastDw          = bitSize % 32;
1460                 if (params.dataBitsInLastDw == 0)
1461                 {
1462                     params.dataBitsInLastDw = 32;
1463                 }
1464 
1465                 if (nalunitPosiSize > maxBytesInPakInsertObjCmd)
1466                 {
1467                     nalunitPosiSize -= maxBytesInPakInsertObjCmd;
1468                     nalunitPosiOffset += maxBytesInPakInsertObjCmd;
1469                 }
1470                 else
1471                 {
1472                     nalunitPosiSize = 0;
1473                 }
1474                 m_hcpItf->MHW_ADDCMD_F(HCP_PAK_INSERT_OBJECT)(cmdBuffer);
1475                 uint32_t byteSize = (bitSize + 7) >> 3;
1476                 if (byteSize)
1477                 {
1478                     MHW_MI_CHK_NULL(pBsBuffer);
1479                     MHW_MI_CHK_NULL(pBsBuffer->pBase);
1480                     uint8_t *data = (uint8_t *)(pBsBuffer->pBase + offSet);
1481                     MHW_MI_CHK_STATUS(Mhw_AddCommandCmdOrBB(m_osInterface, cmdBuffer, nullptr, data, byteSize));
1482                 }
1483             }
1484         }
1485     }
1486 
1487     return MOS_STATUS_SUCCESS;
1488 }
1489 
AddCondBBEndForLastPass(MOS_COMMAND_BUFFER & cmdBuffer)1490     MOS_STATUS HevcVdencPkt::AddCondBBEndForLastPass(MOS_COMMAND_BUFFER &cmdBuffer)
1491     {
1492         ENCODE_FUNC_CALL();
1493 
1494         if (m_pipeline->IsFirstPass() || m_pipeline->GetPassNum() == 1)
1495         {
1496             return MOS_STATUS_SUCCESS;
1497         }
1498 
1499         bool conditionalPass = true;
1500         RUN_FEATURE_INTERFACE_RETURN(VdencLplaAnalysis, HevcFeatureIDs::vdencLplaAnalysisFeature,
1501             SetConditionalPass, m_pipeline->IsLastPass(), conditionalPass);
1502 
1503         if (conditionalPass)
1504         {
1505             auto &miConditionalBatchBufferEndParams = m_miItf->MHW_GETPAR_F(MI_CONDITIONAL_BATCH_BUFFER_END)();
1506             miConditionalBatchBufferEndParams       = {};
1507 
1508             // VDENC uses HuC FW generated semaphore for conditional 2nd pass
1509             miConditionalBatchBufferEndParams.presSemaphoreBuffer =
1510                 m_basicFeature->m_recycleBuf->GetBuffer(VdencBrcPakMmioBuffer, 0);
1511 
1512             ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_CONDITIONAL_BATCH_BUFFER_END)(&cmdBuffer));
1513         }
1514 
1515         // where is m_encodeStatusBuf?
1516         auto mmioRegisters = m_hcpItf->GetMmioRegisters(m_vdboxIndex);
1517         MOS_RESOURCE *osResource = nullptr;
1518         uint32_t      offset = 0;
1519         m_statusReport->GetAddress(statusReportImageStatusCtrl, osResource, offset);
1520         //uint32_t baseOffset = (m_encodeStatusBuf.wCurrIndex * m_encodeStatusBuf.dwReportSize) + sizeof(uint32_t) * 2;  // encodeStatus is offset by 2 DWs in the resource
1521 
1522         // Write back the HCP image control register for RC6 may clean it out
1523         auto &registerMemParams           = m_miItf->MHW_GETPAR_F(MI_LOAD_REGISTER_MEM)();
1524         registerMemParams                 = {};
1525         registerMemParams.presStoreBuffer = osResource;
1526         registerMemParams.dwOffset        = offset;
1527         registerMemParams.dwRegister      = mmioRegisters->hcpEncImageStatusCtrlRegOffset;
1528         ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_LOAD_REGISTER_MEM)(&cmdBuffer));
1529 
1530         HevcVdencBrcBuffers *vdencBrcBuffers = nullptr;
1531         auto feature = dynamic_cast<HEVCEncodeBRC *>(m_featureManager->GetFeature(HevcFeatureIDs::hevcBrcFeature));
1532         ENCODE_CHK_NULL_RETURN(feature);
1533         vdencBrcBuffers = feature->GetHevcVdencBrcBuffers();
1534         ENCODE_CHK_NULL_RETURN(vdencBrcBuffers);
1535 
1536         auto &miStoreRegMemParams           = m_miItf->MHW_GETPAR_F(MI_STORE_REGISTER_MEM)();
1537         miStoreRegMemParams                 = {};
1538         miStoreRegMemParams.presStoreBuffer = vdencBrcBuffers->resBrcPakStatisticBuffer[vdencBrcBuffers->currBrcPakStasIdxForWrite];
1539         miStoreRegMemParams.dwOffset        = CODECHAL_OFFSETOF(CODECHAL_ENCODE_HEVC_PAK_STATS_BUFFER, HCP_IMAGE_STATUS_CONTROL_FOR_LAST_PASS);
1540         miStoreRegMemParams.dwRegister      = mmioRegisters->hcpEncImageStatusCtrlRegOffset;
1541         ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_STORE_REGISTER_MEM)(&cmdBuffer));
1542 
1543         m_statusReport->GetAddress(statusReportImageStatusCtrlOfLastBRCPass, osResource, offset);
1544         miStoreRegMemParams                 = {};
1545         miStoreRegMemParams.presStoreBuffer = osResource;
1546         miStoreRegMemParams.dwOffset        = offset;
1547         miStoreRegMemParams.dwRegister      = mmioRegisters->hcpEncImageStatusCtrlRegOffset;
1548         ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_STORE_REGISTER_MEM)(&cmdBuffer));
1549 
1550         return MOS_STATUS_SUCCESS;
1551     }
1552 
FreeResources()1553     MOS_STATUS HevcVdencPkt::FreeResources()
1554     {
1555         MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1556 
1557         ENCODE_FUNC_CALL();
1558 
1559         #if USE_CODECHAL_DEBUG_TOOL && _ENCODE_RESERVED
1560         CODECHAL_DEBUG_TOOL(
1561             CodechalDebugInterface *debugInterface = m_pipeline->GetDebugInterface();
1562             if (debugInterface && debugInterface->DumpIsEnabled(CodechalDbgAttr::attrDumpEncodePar)) {
1563                 m_hevcParDump->DumpParFile();
1564             })
1565         #endif
1566         for (auto j = 0; j < HevcBasicFeature::m_codecHalHevcNumPakSliceBatchBuffers; j++)
1567         {
1568             eStatus = Mhw_FreeBb(m_osInterface, &m_batchBufferForPakSlices[j], nullptr);
1569             ENCODE_ASSERT(eStatus == MOS_STATUS_SUCCESS);
1570         }
1571 
1572         return eStatus;
1573     }
1574 
Init()1575     MOS_STATUS HevcVdencPkt::Init()
1576     {
1577         ENCODE_FUNC_CALL();
1578         ENCODE_CHK_NULL_RETURN(m_statusReport);
1579 
1580         ENCODE_CHK_STATUS_RETURN(CmdPacket::Init());
1581         m_basicFeature = dynamic_cast<HevcBasicFeature *>(m_featureManager->GetFeature(HevcFeatureIDs::basicFeature));
1582         ENCODE_CHK_NULL_RETURN(m_basicFeature);
1583 
1584 #ifdef _MMC_SUPPORTED
1585         m_mmcState = m_pipeline->GetMmcState();
1586         ENCODE_CHK_NULL_RETURN(m_mmcState);
1587         m_basicFeature->m_mmcState = m_mmcState;
1588         m_basicFeature->m_ref.m_mmcState = m_mmcState;
1589 #endif
1590         m_allocator = m_pipeline->GetEncodeAllocator();
1591         ENCODE_CHK_STATUS_RETURN(AllocateResources());
1592 
1593         ENCODE_CHK_STATUS_RETURN(m_statusReport->RegistObserver(this));
1594 
1595         CalculatePictureStateCommandSize();
1596 
1597         uint32_t vdencPictureStatesSize = 0, vdencPicturePatchListSize = 0;
1598         GetVdencStateCommandsDataSize(vdencPictureStatesSize, vdencPicturePatchListSize);
1599         m_defaultPictureStatesSize += vdencPictureStatesSize;
1600         m_defaultPicturePatchListSize += vdencPicturePatchListSize;
1601 
1602         GetHxxPrimitiveCommandSize();
1603 
1604         m_usePatchList = m_osInterface->bUsesPatchList;
1605 
1606         m_packetUtilities = m_pipeline->GetPacketUtilities();
1607         ENCODE_CHK_NULL_RETURN(m_packetUtilities);
1608 
1609         return MOS_STATUS_SUCCESS;
1610     }
1611 
SetRowstoreCachingOffsets()1612     MOS_STATUS HevcVdencPkt::SetRowstoreCachingOffsets()
1613     {
1614         MHW_VDBOX_ROWSTORE_PARAMS rowStoreParams;
1615 
1616         rowStoreParams.Mode             = m_basicFeature->m_mode;
1617         rowStoreParams.dwPicWidth       = m_basicFeature->m_frameWidth;
1618         rowStoreParams.ucChromaFormat   = m_basicFeature->m_chromaFormat;
1619         rowStoreParams.ucBitDepthMinus8 = m_hevcSeqParams->bit_depth_luma_minus8;
1620         rowStoreParams.ucLCUSize        = 1 << (m_hevcSeqParams->log2_max_coding_block_size_minus3 + 3);
1621         // VDEnc only support LCU64 for now
1622         ENCODE_ASSERT(rowStoreParams.ucLCUSize == m_basicFeature->m_maxLCUSize);
1623         ENCODE_CHK_STATUS_RETURN(m_hwInterface->SetRowstoreCachingOffsets(&rowStoreParams));
1624 
1625         if (m_vdencItf)
1626         {
1627             mhw::vdbox::vdenc::RowStorePar par = {};
1628 
1629             par.mode = mhw::vdbox::vdenc::RowStorePar::HEVC;
1630             par.bitDepth = mhw::vdbox::vdenc::RowStorePar::DEPTH_8;
1631             if (rowStoreParams.ucBitDepthMinus8 == 1 || rowStoreParams.ucBitDepthMinus8 == 2)
1632             {
1633                 par.bitDepth = mhw::vdbox::vdenc::RowStorePar::DEPTH_10;
1634             }
1635             else if (rowStoreParams.ucBitDepthMinus8 > 2)
1636             {
1637                 par.bitDepth = mhw::vdbox::vdenc::RowStorePar::DEPTH_12;
1638             }
1639             par.lcuSize = mhw ::vdbox::vdenc::RowStorePar::SIZE_OTHER;
1640             if (rowStoreParams.ucLCUSize == 32)
1641             {
1642                 par.lcuSize = mhw ::vdbox::vdenc::RowStorePar::SIZE_32;
1643             }
1644             else if (rowStoreParams.ucLCUSize == 64)
1645             {
1646                 par.lcuSize = mhw ::vdbox::vdenc::RowStorePar::SIZE_64;
1647             }
1648             par.frameWidth = rowStoreParams.dwPicWidth;
1649             switch (rowStoreParams.ucChromaFormat)
1650             {
1651             case HCP_CHROMA_FORMAT_MONOCHROME:
1652                 par.format = mhw ::vdbox::vdenc::RowStorePar::MONOCHROME;
1653                 break;
1654             case HCP_CHROMA_FORMAT_YUV420:
1655                 par.format = mhw ::vdbox::vdenc::RowStorePar::YUV420;
1656                 break;
1657             case HCP_CHROMA_FORMAT_YUV422:
1658                 par.format = mhw ::vdbox::vdenc::RowStorePar::YUV422;
1659                 break;
1660             case HCP_CHROMA_FORMAT_YUV444:
1661                 par.format = mhw ::vdbox::vdenc::RowStorePar::YUV444;
1662                 break;
1663             }
1664 
1665             ENCODE_CHK_STATUS_RETURN(m_vdencItf->SetRowstoreCachingOffsets(par));
1666         }
1667 
1668         hcp::HcpVdboxRowStorePar rowstoreParams = {};
1669         rowstoreParams.Mode                     = m_basicFeature->m_mode;
1670         rowstoreParams.dwPicWidth               = m_basicFeature->m_frameWidth;
1671         rowstoreParams.ucChromaFormat           = m_basicFeature->m_chromaFormat;
1672         rowstoreParams.ucBitDepthMinus8         = m_hevcSeqParams->bit_depth_luma_minus8;
1673         rowstoreParams.ucLCUSize                = 1 << (m_hevcSeqParams->log2_max_coding_block_size_minus3 + 3);
1674         // VDEnc only support LCU64 for now
1675         ENCODE_ASSERT(rowstoreParams.ucLCUSize == m_basicFeature->m_maxLCUSize);
1676         m_hcpItf->SetRowstoreCachingOffsets(rowstoreParams);
1677 
1678         return MOS_STATUS_SUCCESS;
1679     }
1680 
Destroy()1681     MOS_STATUS HevcVdencPkt::Destroy()
1682     {
1683         m_statusReport->UnregistObserver(this);
1684         return MOS_STATUS_SUCCESS;
1685     }
1686 
SetPakPassType()1687     void HevcVdencPkt::SetPakPassType()
1688     {
1689         ENCODE_FUNC_CALL();
1690 
1691         // default: VDEnc+PAK pass
1692         m_pakOnlyPass = false;
1693 
1694         return;
1695     }
1696 
1697     // Inline functions
ValidateVdboxIdx(const MHW_VDBOX_NODE_IND & vdboxIndex)1698     MOS_STATUS HevcVdencPkt::ValidateVdboxIdx(const MHW_VDBOX_NODE_IND &vdboxIndex)
1699     {
1700         ENCODE_FUNC_CALL();
1701 
1702         MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1703         if (vdboxIndex > m_hwInterface->GetMaxVdboxIndex())
1704         {
1705             ENCODE_ASSERTMESSAGE("ERROR - vdbox index exceed the maximum");
1706             eStatus = MOS_STATUS_INVALID_PARAMETER;
1707         }
1708 
1709         return eStatus;
1710     }
1711 
SetPerfTag()1712     void HevcVdencPkt::SetPerfTag()
1713     {
1714         ENCODE_FUNC_CALL();
1715 
1716         uint16_t callType = m_pipeline->IsFirstPass() ? CODECHAL_ENCODE_PERFTAG_CALL_PAK_ENGINE : CODECHAL_ENCODE_PERFTAG_CALL_PAK_ENGINE_SECOND_PASS;
1717         uint16_t picType  = m_basicFeature->m_pictureCodingType;
1718         if (m_basicFeature->m_pictureCodingType == B_TYPE && m_basicFeature->m_ref.IsLowDelay())
1719         {
1720             picType = 0;
1721         }
1722 
1723         PerfTagSetting perfTag;
1724         perfTag.Value             = 0;
1725         perfTag.Mode              = (uint16_t)m_basicFeature->m_mode & CODECHAL_ENCODE_MODE_BIT_MASK;
1726         perfTag.CallType          = callType;
1727         perfTag.PictureCodingType = picType;
1728         m_osInterface->pfnSetPerfTag(m_osInterface, perfTag.Value);
1729         m_osInterface->pfnIncPerfBufferID(m_osInterface);
1730     }
1731 
SetSemaphoreMem(MOS_RESOURCE & semaphoreMem,uint32_t value,MOS_COMMAND_BUFFER & cmdBuffer)1732     MOS_STATUS HevcVdencPkt::SetSemaphoreMem(
1733         MOS_RESOURCE &      semaphoreMem,
1734         uint32_t            value,
1735         MOS_COMMAND_BUFFER &cmdBuffer)
1736     {
1737         ENCODE_FUNC_CALL();
1738 
1739         auto &storeDataParams            = m_miItf->MHW_GETPAR_F(MI_STORE_DATA_IMM)();
1740         storeDataParams                  = {};
1741         storeDataParams.pOsResource      = &semaphoreMem;
1742         storeDataParams.dwResourceOffset = 0;
1743         storeDataParams.dwValue          = value;
1744 
1745         ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_STORE_DATA_IMM)(&cmdBuffer));
1746 
1747         return MOS_STATUS_SUCCESS;
1748     }
1749 
SendPrologCmds(MOS_COMMAND_BUFFER & cmdBuffer)1750     MOS_STATUS HevcVdencPkt::SendPrologCmds(
1751         MOS_COMMAND_BUFFER &cmdBuffer)
1752     {
1753         MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1754 
1755         ENCODE_FUNC_CALL();
1756 
1757         auto packetUtilities = m_pipeline->GetPacketUtilities();
1758         ENCODE_CHK_NULL_RETURN(packetUtilities);
1759         if (m_basicFeature->m_setMarkerEnabled)
1760         {
1761             PMOS_RESOURCE presSetMarker = m_osInterface->pfnGetMarkerResource(m_osInterface);
1762             ENCODE_CHK_STATUS_RETURN(packetUtilities->SendMarkerCommand(&cmdBuffer, presSetMarker));
1763         }
1764 
1765 #ifdef _MMC_SUPPORTED
1766         ENCODE_CHK_NULL_RETURN(m_mmcState);
1767         ENCODE_CHK_STATUS_RETURN(m_mmcState->SendPrologCmd(&cmdBuffer, false));
1768 #endif
1769 
1770         MHW_GENERIC_PROLOG_PARAMS genericPrologParams;
1771         MOS_ZeroMemory(&genericPrologParams, sizeof(genericPrologParams));
1772         genericPrologParams.pOsInterface  = m_osInterface;
1773         genericPrologParams.pvMiInterface = nullptr;
1774         genericPrologParams.bMmcEnabled   = m_mmcState ? m_mmcState->IsMmcEnabled() : false;
1775         ENCODE_CHK_STATUS_RETURN(Mhw_SendGenericPrologCmdNext(&cmdBuffer, &genericPrologParams, m_miItf));
1776 
1777         // Send predication command
1778         if (m_basicFeature->m_predicationEnabled)
1779         {
1780             ENCODE_CHK_STATUS_RETURN(packetUtilities->SendPredicationCommand(&cmdBuffer));
1781         }
1782 
1783         return eStatus;
1784     }
1785 
AllocateBatchBufferForPakSlices(uint32_t numSlices,uint16_t numPakPasses)1786     MOS_STATUS HevcVdencPkt::AllocateBatchBufferForPakSlices(
1787         uint32_t numSlices,
1788         uint16_t numPakPasses)
1789     {
1790         MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1791 
1792         ENCODE_FUNC_CALL();
1793 
1794         MOS_ZeroMemory(
1795             &m_batchBufferForPakSlices[m_basicFeature->m_currPakSliceIdx],
1796             sizeof(MHW_BATCH_BUFFER));
1797 
1798         // Get the slice size
1799         uint32_t size = numPakPasses * numSlices * m_sliceStatesSize;
1800 
1801         m_batchBufferForPakSlices[m_basicFeature->m_currPakSliceIdx].bSecondLevel = true;
1802         ENCODE_CHK_STATUS_RETURN(Mhw_AllocateBb(
1803             m_osInterface,
1804             &m_batchBufferForPakSlices[m_basicFeature->m_currPakSliceIdx],
1805             nullptr,
1806             size));
1807 
1808         MOS_LOCK_PARAMS lockFlags;
1809         MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
1810         lockFlags.WriteOnly = 1;
1811         uint8_t *data       = (uint8_t *)m_allocator->LockResourceForWrite(&m_batchBufferForPakSlices[m_basicFeature->m_currPakSliceIdx].OsResource);
1812 
1813         if (data == nullptr)
1814         {
1815             ENCODE_ASSERTMESSAGE("Failed to lock batch buffer for PAK slices.");
1816             eStatus = MOS_STATUS_UNKNOWN;
1817             return eStatus;
1818         }
1819 
1820         m_allocator->UnLock(&m_batchBufferForPakSlices[m_basicFeature->m_currPakSliceIdx].OsResource);
1821 
1822         return eStatus;
1823     }
1824 
ReadExtStatistics(MOS_COMMAND_BUFFER & cmdBuffer)1825     MOS_STATUS HevcVdencPkt::ReadExtStatistics(MOS_COMMAND_BUFFER &cmdBuffer)
1826     {
1827         ENCODE_FUNC_CALL();
1828 
1829         PMOS_RESOURCE osResource = nullptr;
1830         uint32_t      offset     = 0;
1831 
1832         m_statusReport->GetAddress(statusReportSumSquareError, osResource, offset);
1833 
1834         for (auto i = 0; i < 3; i++)  // 64 bit SSE values for luma/ chroma channels need to be copied
1835         {
1836             auto &miCpyMemMemParams = m_miItf->MHW_GETPAR_F(MI_COPY_MEM_MEM)();
1837             miCpyMemMemParams       = {};
1838             MOS_RESOURCE *resHuCPakAggregatedFrameStatsBuffer = nullptr;
1839             RUN_FEATURE_INTERFACE_RETURN(HevcEncodeTile, HevcFeatureIDs::encodeTile, GetHucPakAggregatedFrameStatsBuffer, resHuCPakAggregatedFrameStatsBuffer);
1840             ENCODE_CHK_NULL_RETURN(resHuCPakAggregatedFrameStatsBuffer);
1841             miCpyMemMemParams.presSrc     = m_hevcPicParams->tiles_enabled_flag && (m_pipeline->GetPipeNum() > 1) ? resHuCPakAggregatedFrameStatsBuffer : m_basicFeature->m_recycleBuf->GetBuffer(FrameStatStreamOutBuffer, 0);
1842             miCpyMemMemParams.dwSrcOffset = (m_basicFeature->m_hevcPakStatsSSEOffset + i) * sizeof(uint32_t);  // SSE luma offset is located at DW32 in Frame statistics, followed by chroma
1843             miCpyMemMemParams.presDst     = osResource;
1844             miCpyMemMemParams.dwDstOffset = offset + i * sizeof(uint32_t);
1845             ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_COPY_MEM_MEM)(&cmdBuffer));
1846         }
1847         return MOS_STATUS_SUCCESS;
1848     }
1849 
AddForceWakeup(MOS_COMMAND_BUFFER & cmdBuffer)1850     MOS_STATUS HevcVdencPkt::AddForceWakeup(MOS_COMMAND_BUFFER &cmdBuffer)
1851     {
1852         ENCODE_FUNC_CALL();
1853 
1854         auto &forceWakeupParams                     = m_miItf->MHW_GETPAR_F(MI_FORCE_WAKEUP)();
1855         forceWakeupParams                           = {};
1856         forceWakeupParams.bMFXPowerWellControl      = true;
1857         forceWakeupParams.bMFXPowerWellControlMask  = true;
1858         forceWakeupParams.bHEVCPowerWellControl     = true;
1859         forceWakeupParams.bHEVCPowerWellControlMask = true;
1860 
1861         ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_FORCE_WAKEUP)(&cmdBuffer));
1862 
1863         return MOS_STATUS_SUCCESS;
1864     }
1865 
SetBatchBufferForPakSlices()1866     MOS_STATUS HevcVdencPkt::SetBatchBufferForPakSlices()
1867     {
1868         MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1869 
1870         ENCODE_FUNC_CALL();
1871 
1872         if (m_hevcPicParams->tiles_enabled_flag)
1873         {
1874             return eStatus;
1875         }
1876 
1877         m_useBatchBufferForPakSlices         = m_pipeline->IsSingleTaskPhaseSupported() && m_pipeline->IsSingleTaskPhaseSupportedInPak();
1878         m_batchBufferForPakSlicesStartOffset = 0;
1879 
1880         if (m_useBatchBufferForPakSlices)
1881         {
1882             if (m_pipeline->IsFirstPass())
1883             {
1884                 // The same buffer is used for all slices for all passes
1885                 uint32_t batchBufferForPakSlicesSize =
1886                     m_pipeline->GetPassNum() * m_basicFeature->m_numSlices * m_sliceStatesSize;
1887 
1888                 ENCODE_ASSERT(batchBufferForPakSlicesSize);
1889 
1890                 if (batchBufferForPakSlicesSize >
1891                     (uint32_t)m_batchBufferForPakSlices[m_basicFeature->m_currPakSliceIdx].iSize)
1892                 {
1893                     if (m_batchBufferForPakSlices[m_basicFeature->m_currPakSliceIdx].iSize)
1894                     {
1895                         Mhw_FreeBb(m_osInterface, &m_batchBufferForPakSlices[m_basicFeature->m_currPakSliceIdx], nullptr);
1896                         m_batchBufferForPakSlices[m_basicFeature->m_currPakSliceIdx].iSize = 0;
1897                     }
1898 
1899                     ENCODE_CHK_STATUS_RETURN(AllocateBatchBufferForPakSlices(
1900                         m_basicFeature->m_numSlices,
1901                         m_pipeline->GetPassNum()));
1902                 }
1903             }
1904 
1905             ENCODE_CHK_STATUS_RETURN(Mhw_LockBb(
1906                 m_osInterface,
1907                 &m_batchBufferForPakSlices[m_basicFeature->m_currPakSliceIdx]));
1908 
1909             m_batchBufferForPakSlicesStartOffset =
1910                 m_pipeline->IsFirstPass() ? 0 : (uint32_t)m_batchBufferForPakSlices[m_basicFeature->m_currPakSliceIdx].iCurrent;
1911         }
1912 
1913         return eStatus;
1914     }
1915 
StartStatusReport(uint32_t srType,MOS_COMMAND_BUFFER * cmdBuffer)1916     MOS_STATUS HevcVdencPkt::StartStatusReport(
1917         uint32_t            srType,
1918         MOS_COMMAND_BUFFER *cmdBuffer)
1919     {
1920         ENCODE_FUNC_CALL();
1921         ENCODE_CHK_NULL_RETURN(cmdBuffer);
1922 
1923         ENCODE_CHK_STATUS_RETURN(MediaPacket::StartStatusReportNext(srType, cmdBuffer));
1924         m_encodecp->StartCpStatusReport(cmdBuffer);
1925 
1926         return MOS_STATUS_SUCCESS;
1927     }
1928 
EndStatusReport(uint32_t srType,MOS_COMMAND_BUFFER * cmdBuffer)1929     MOS_STATUS HevcVdencPkt::EndStatusReport(
1930         uint32_t            srType,
1931         MOS_COMMAND_BUFFER *cmdBuffer)
1932     {
1933         ENCODE_FUNC_CALL();
1934         ENCODE_CHK_NULL_RETURN(cmdBuffer);
1935 
1936         ENCODE_CHK_STATUS_RETURN(MediaPacket::EndStatusReportNext(srType, cmdBuffer));
1937 
1938         MediaPerfProfiler *perfProfiler = MediaPerfProfiler::Instance();
1939         ENCODE_CHK_NULL_RETURN(perfProfiler);
1940         ENCODE_CHK_STATUS_RETURN(perfProfiler->AddPerfCollectEndCmd(
1941             (void *)m_pipeline, m_osInterface, m_miItf, cmdBuffer));
1942 
1943         return MOS_STATUS_SUCCESS;
1944     }
1945 
ReadHcpStatus(MHW_VDBOX_NODE_IND vdboxIndex,MediaStatusReport * statusReport,MOS_COMMAND_BUFFER & cmdBuffer)1946     MOS_STATUS HevcVdencPkt::ReadHcpStatus(
1947         MHW_VDBOX_NODE_IND  vdboxIndex,
1948         MediaStatusReport * statusReport,
1949         MOS_COMMAND_BUFFER &cmdBuffer)
1950     {
1951         ENCODE_FUNC_CALL();
1952 
1953         MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1954 
1955         CODEC_HW_FUNCTION_ENTER;
1956 
1957         ENCODE_CHK_NULL_RETURN(statusReport);
1958         ENCODE_CHK_NULL_RETURN(m_hwInterface);
1959 
1960         MOS_RESOURCE *osResource;
1961         uint32_t      offset;
1962 
1963         EncodeStatusReadParams params;
1964         MOS_ZeroMemory(&params, sizeof(params));
1965 
1966         ENCODE_CHK_STATUS_RETURN(statusReport->GetAddress(encode::statusReportMfxBitstreamByteCountPerFrame, osResource, offset));
1967         params.resBitstreamByteCountPerFrame    = osResource;
1968         params.bitstreamByteCountPerFrameOffset = offset;
1969 
1970         ENCODE_CHK_STATUS_RETURN(statusReport->GetAddress(encode::statusReportMfxBitstreamSyntaxElementOnlyBitCount, osResource, offset));
1971         params.resBitstreamSyntaxElementOnlyBitCount    = osResource;
1972         params.bitstreamSyntaxElementOnlyBitCountOffset = offset;
1973 
1974         ENCODE_CHK_STATUS_RETURN(statusReport->GetAddress(encode::statusReportQPStatusCount, osResource, offset));
1975         params.resQpStatusCount    = osResource;
1976         params.qpStatusCountOffset = offset;
1977 
1978         ENCODE_CHK_STATUS_RETURN(statusReport->GetAddress(encode::statusReportImageStatusMask, osResource, offset));
1979         params.resImageStatusMask    = osResource;
1980         params.imageStatusMaskOffset = offset;
1981 
1982         ENCODE_CHK_STATUS_RETURN(statusReport->GetAddress(encode::statusReportImageStatusCtrl, osResource, offset));
1983         params.resImageStatusCtrl    = osResource;
1984         params.imageStatusCtrlOffset = offset;
1985 
1986         ENCODE_CHK_STATUS_RETURN(statusReport->GetAddress(encode::statusReportNumSlices, osResource, offset));
1987         params.resNumSlices    = osResource;
1988         params.numSlicesOffset = offset;
1989 
1990         ENCODE_CHK_STATUS_RETURN(m_hwInterface->ReadHcpStatus(vdboxIndex, params, &cmdBuffer));
1991 
1992         // Slice Size Conformance
1993         if (m_hevcSeqParams->SliceSizeControl)
1994         {
1995             RUN_FEATURE_INTERFACE_RETURN(HevcEncodeDss, HevcFeatureIDs::hevcVdencDssFeature, ReadHcpStatus, vdboxIndex, cmdBuffer);
1996         }
1997 
1998         auto brcFeature = dynamic_cast<HEVCEncodeBRC *>(m_featureManager->GetFeature(HevcFeatureIDs::hevcBrcFeature));
1999         ENCODE_CHK_NULL_RETURN(brcFeature);
2000         bool vdencHucUsed  = brcFeature->IsVdencHucUsed();
2001         auto mmioRegisters = m_hcpItf->GetMmioRegisters(m_vdboxIndex);
2002         if (vdencHucUsed)
2003         {
2004             // Store PAK frameSize MMIO to PakInfo buffer
2005             auto &miStoreRegMemParams           = m_miItf->MHW_GETPAR_F(MI_STORE_REGISTER_MEM)();
2006             miStoreRegMemParams                 = {};
2007             miStoreRegMemParams.presStoreBuffer = m_basicFeature->m_recycleBuf->GetBuffer(PakInfo, 0);
2008             miStoreRegMemParams.dwOffset        = 0;
2009             miStoreRegMemParams.dwRegister      = mmioRegisters->hcpEncBitstreamBytecountFrameRegOffset;
2010             ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_STORE_REGISTER_MEM)(&cmdBuffer));
2011         }
2012         ENCODE_CHK_STATUS_RETURN(m_hwInterface->ReadImageStatusForHcp(vdboxIndex, params, &cmdBuffer));
2013         return eStatus;
2014     }
2015 
ReadSliceSizeForSinglePipe(MOS_COMMAND_BUFFER & cmdBuffer)2016     MOS_STATUS HevcVdencPkt::ReadSliceSizeForSinglePipe(MOS_COMMAND_BUFFER &cmdBuffer)
2017     {
2018         MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2019 
2020         ENCODE_FUNC_CALL();
2021 
2022         // Report slice size to app only when dynamic slice is enabled
2023         if (!m_hevcSeqParams->SliceSizeControl)
2024         {
2025             return eStatus;
2026         }
2027         RUN_FEATURE_INTERFACE_RETURN(HevcEncodeDss, HevcFeatureIDs::hevcVdencDssFeature, ReadSliceSizeForSinglePipe, m_pipeline, cmdBuffer);
2028 
2029         return eStatus;
2030     }
2031 
ReadSliceSize(MOS_COMMAND_BUFFER & cmdBuffer)2032     MOS_STATUS HevcVdencPkt::ReadSliceSize(MOS_COMMAND_BUFFER &cmdBuffer)
2033     {
2034         MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2035 
2036         ENCODE_FUNC_CALL();
2037 
2038         // Use FrameStats buffer if in single pipe mode.
2039         if (m_pipeline->GetPipeNum() == 1)
2040         {
2041             return ReadSliceSizeForSinglePipe(cmdBuffer);
2042         }
2043 
2044         // In multi-tile multi-pipe mode, use PAK integration kernel output
2045         // PAK integration kernel accumulates frame statistics across tiles, which should be used to setup slice size report
2046         // Report slice size to app only when dynamic scaling is enabled
2047         if (!m_hevcSeqParams->SliceSizeControl)
2048         {
2049             return eStatus;
2050         }
2051 
2052         RUN_FEATURE_INTERFACE_RETURN(HevcEncodeDss, HevcFeatureIDs::hevcVdencDssFeature, ReadSliceSize, m_pipeline, cmdBuffer);
2053 
2054         return eStatus;
2055     }
2056 
Completed(void * mfxStatus,void * rcsStatus,void * statusReport)2057     MOS_STATUS HevcVdencPkt::Completed(void *mfxStatus, void *rcsStatus, void *statusReport)
2058     {
2059         ENCODE_FUNC_CALL();
2060 
2061         if (!m_enableVdencStatusReport)
2062         {
2063             return MOS_STATUS_SUCCESS;
2064         }
2065 
2066         ENCODE_CHK_NULL_RETURN(mfxStatus);
2067         ENCODE_CHK_NULL_RETURN(statusReport);
2068         ENCODE_CHK_NULL_RETURN(m_basicFeature);
2069 
2070         EncodeStatusMfx *       encodeStatusMfx  = (EncodeStatusMfx *)mfxStatus;
2071         EncodeStatusReportData *statusReportData = (EncodeStatusReportData *)statusReport;
2072         if (statusReportData->hwCtr)
2073         {
2074             m_encodecp->UpdateCpStatusReport(statusReport);
2075         }
2076 
2077         // The last pass of BRC may have a zero value of hcpCumulativeFrameDeltaQp
2078         if (encodeStatusMfx->imageStatusCtrl.hcpTotalPass && encodeStatusMfx->imageStatusCtrl.hcpCumulativeFrameDeltaQP == 0)
2079         {
2080             encodeStatusMfx->imageStatusCtrl.hcpCumulativeFrameDeltaQP = encodeStatusMfx->imageStatusCtrlOfLastBRCPass.hcpCumulativeFrameDeltaQP;
2081         }
2082         encodeStatusMfx->imageStatusCtrlOfLastBRCPass.hcpCumulativeFrameDeltaQP = 0;
2083 
2084         statusReportData->codecStatus   = CODECHAL_STATUS_SUCCESSFUL;
2085         statusReportData->bitstreamSize = encodeStatusMfx->mfcBitstreamByteCountPerFrame + encodeStatusMfx->headerBytesInserted;
2086 
2087         statusReportData->numberSlices      = 0;
2088         statusReportData->panicMode         = encodeStatusMfx->imageStatusCtrl.panic;
2089         statusReportData->averageQP         = 0;
2090         statusReportData->qpY               = 0;
2091         statusReportData->suggestedQPYDelta = encodeStatusMfx->imageStatusCtrl.hcpCumulativeFrameDeltaQP;
2092         statusReportData->numberPasses      = (unsigned char)encodeStatusMfx->imageStatusCtrl.hcpTotalPass + 1;  //initial pass is considered to be 0,hence +1 to report;
2093         ENCODE_VERBOSEMESSAGE("Exectued PAK Pass number: %d\n", encodeStatusMfx->numberPasses);
2094 
2095         if (m_basicFeature->m_frameWidth != 0 && m_basicFeature->m_frameHeight != 0)
2096         {
2097             ENCODE_CHK_NULL_RETURN(m_basicFeature->m_hevcSeqParams);
2098 
2099             uint32_t log2CBSize = 2;
2100 
2101             // Based on HW team:
2102             // The CumulativeQp from the PAK accumulated at TU level and normalized to TU4x4
2103             // qp(for TU 8x8) = qp*4
2104             // qp(for TU 16x16) = qp *16
2105             // qp(for TU 32x32) = qp*64
2106             // all these qp are accumulated for entire frame.
2107             // the HW will ceil the CumulativeQp number to max (24 bit)
2108 
2109             uint32_t log2McuSize = m_basicFeature->m_hevcSeqParams->log2_min_coding_block_size_minus3 + 3;
2110 
2111             uint32_t numLumaPixels = ((m_basicFeature->m_hevcSeqParams->wFrameHeightInMinCbMinus1 + 1) << log2McuSize) *
2112                             ((m_basicFeature->m_hevcSeqParams->wFrameWidthInMinCbMinus1 + 1) << log2McuSize);
2113 
2114             statusReportData->qpY = statusReportData->averageQP = static_cast<uint8_t>(
2115                 static_cast<double>(encodeStatusMfx->qpStatusCount.hcpCumulativeQP)
2116                 / (numLumaPixels / 16) - (m_basicFeature->m_hevcSeqParams->bit_depth_luma_minus8 != 0) * 12);
2117         }
2118 
2119         // When tile replay is enabled with tile replay, need to report out the tile size and the bit stream is not continous
2120         if (m_pipeline->GetPipeNum() == 1)
2121         {
2122             //ENCODE_CHK_STATUS_RETURN(CodechalVdencHevcState::GetStatusReport(encodeStatus, encodeStatusReport));
2123             MOS_LOCK_PARAMS lockFlags;
2124             MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
2125             lockFlags.ReadOnly = 1;
2126 
2127             uint32_t *sliceSize = nullptr;
2128             // pSliceSize is set/ allocated only when dynamic slice is enabled. Cannot use SSC flag here, as it is an asynchronous call
2129             if (encodeStatusMfx->sliceReport.sliceSize)
2130             {
2131                 sliceSize = (uint32_t *)m_osInterface->pfnLockResource(m_osInterface, encodeStatusMfx->sliceReport.sliceSize, &lockFlags);
2132                 ENCODE_CHK_NULL_RETURN(sliceSize);
2133 
2134                 statusReportData->numberSlices           = encodeStatusMfx->sliceReport.numberSlices;
2135                 statusReportData->sizeOfSliceSizesBuffer = sizeof(uint16_t) * encodeStatusMfx->sliceReport.numberSlices;
2136                 statusReportData->sliceSizeOverflow      = (encodeStatusMfx->sliceReport.sliceSizeOverflow >> 16) & 1;
2137                 statusReportData->sliceSizes             = (uint16_t *)sliceSize;
2138 
2139                 uint16_t prevCumulativeSliceSize = 0;
2140                 // HW writes out a DW for each slice size. Copy in place the DW into 16bit fields expected by App
2141                 for (auto sliceCount = 0; sliceCount < encodeStatusMfx->sliceReport.numberSlices; sliceCount++)
2142                 {
2143                     // PAK output the sliceSize at 16DW intervals.
2144                     ENCODE_CHK_NULL_RETURN(&sliceSize[sliceCount * 16]);
2145                     uint32_t CurrAccumulatedSliceSize = sliceSize[sliceCount * 16];
2146 
2147                     //convert cummulative slice size to individual, first slice may have PPS/SPS,
2148                     statusReportData->sliceSizes[sliceCount] = CurrAccumulatedSliceSize - prevCumulativeSliceSize;
2149                     prevCumulativeSliceSize += statusReportData->sliceSizes[sliceCount];
2150                 }
2151                 m_osInterface->pfnUnlockResource(m_osInterface, encodeStatusMfx->sliceReport.sliceSize);
2152             }
2153         }
2154 
2155         ENCODE_CHK_STATUS_RETURN(ReportExtStatistics(*encodeStatusMfx, *statusReportData));
2156 
2157         CODECHAL_DEBUG_TOOL(
2158             ENCODE_CHK_STATUS_RETURN(DumpResources(encodeStatusMfx, statusReportData)););
2159 
2160         if (statusReportData->numberTilesInFrame > 1)
2161         {
2162             // When Tile feature enabled, Reset is not in vdenc packet
2163             return MOS_STATUS_SUCCESS;
2164         }
2165 
2166         m_basicFeature->Reset((CODEC_REF_LIST *)statusReportData->currRefList);
2167         return MOS_STATUS_SUCCESS;
2168     }
2169 
ReportExtStatistics(EncodeStatusMfx & encodeStatusMfx,EncodeStatusReportData & statusReportData)2170     MOS_STATUS HevcVdencPkt::ReportExtStatistics(
2171         EncodeStatusMfx        &encodeStatusMfx,
2172         EncodeStatusReportData &statusReportData)
2173     {
2174         ENCODE_FUNC_CALL();
2175 
2176         ENCODE_CHK_NULL_RETURN(m_basicFeature);
2177 
2178         uint32_t numLumaPixels = 0, numPixelsPerChromaChannel = 0;
2179 
2180         numLumaPixels = m_basicFeature->m_frameHeight * m_basicFeature->m_frameWidth;
2181         switch (m_basicFeature->m_hevcSeqParams->chroma_format_idc)
2182         {
2183         case HCP_CHROMA_FORMAT_MONOCHROME:
2184             numPixelsPerChromaChannel = 0;
2185             break;
2186         case HCP_CHROMA_FORMAT_YUV420:
2187             numPixelsPerChromaChannel = numLumaPixels / 4;
2188             break;
2189         case HCP_CHROMA_FORMAT_YUV422:
2190             numPixelsPerChromaChannel = numLumaPixels / 2;
2191             break;
2192         case HCP_CHROMA_FORMAT_YUV444:
2193             numPixelsPerChromaChannel = numLumaPixels;
2194             break;
2195         default:
2196             numPixelsPerChromaChannel = numLumaPixels / 2;
2197             break;
2198         }
2199 
2200         double squarePeakPixelValue = pow((1 << (m_basicFeature->m_hevcSeqParams->bit_depth_luma_minus8 + 8)) - 1, 2);
2201 
2202         for (auto i = 0; i < 3; i++)
2203         {
2204             uint32_t numPixels = i ? numPixelsPerChromaChannel : numLumaPixels;
2205 
2206             if (m_basicFeature->m_hevcSeqParams->bit_depth_luma_minus8 == 0)
2207             {
2208                 //8bit pixel data is represented in 10bit format in HW. so SSE should right shift by 4.
2209                 encodeStatusMfx.sumSquareError[i] >>= 4;
2210             }
2211             statusReportData.psnrX100[i] = (uint16_t)CodecHal_Clip3(0, 10000, (uint16_t)(encodeStatusMfx.sumSquareError[i] ? 1000 * log10(squarePeakPixelValue * numPixels / encodeStatusMfx.sumSquareError[i]) : -1));
2212 
2213             ENCODE_VERBOSEMESSAGE("psnrX100[%d]:%d.\n", i, statusReportData.psnrX100[i]);
2214         }
2215 
2216         return MOS_STATUS_SUCCESS;
2217     }
2218 
GetVdencStateCommandsDataSize(uint32_t & vdencPictureStatesSize,uint32_t & vdencPicturePatchListSize)2219     MOS_STATUS HevcVdencPkt::GetVdencStateCommandsDataSize(uint32_t &vdencPictureStatesSize, uint32_t &vdencPicturePatchListSize)
2220     {
2221         vdencPictureStatesSize =
2222             m_vdencItf->MHW_GETSIZE_F(VDENC_PIPE_MODE_SELECT)() +
2223             m_vdencItf->MHW_GETSIZE_F(VDENC_SRC_SURFACE_STATE)() +
2224             m_vdencItf->MHW_GETSIZE_F(VDENC_REF_SURFACE_STATE)() +
2225             m_vdencItf->MHW_GETSIZE_F(VDENC_DS_REF_SURFACE_STATE)() +
2226             m_vdencItf->MHW_GETSIZE_F(VDENC_PIPE_BUF_ADDR_STATE)() +
2227             m_vdencItf->MHW_GETSIZE_F(VDENC_WEIGHTSOFFSETS_STATE)() +
2228             m_vdencItf->MHW_GETSIZE_F(VDENC_WALKER_STATE)() +
2229             m_vdencItf->MHW_GETSIZE_F(VD_PIPELINE_FLUSH)() +
2230             m_miItf->MHW_GETSIZE_F(MI_LOAD_REGISTER_IMM)()*8 +
2231             m_miItf->MHW_GETSIZE_F(MI_FLUSH_DW)() +
2232             m_miItf->MHW_GETSIZE_F(MI_BATCH_BUFFER_START)() +
2233             m_hcpItf->MHW_GETSIZE_F(HEVC_VP9_RDOQ_STATE)() +
2234             m_miItf->MHW_GETSIZE_F(MI_BATCH_BUFFER_END)();
2235 
2236         vdencPicturePatchListSize = PATCH_LIST_COMMAND(mhw::vdbox::vdenc::Itf::VDENC_PIPE_BUF_ADDR_STATE_CMD);
2237 
2238         return MOS_STATUS_SUCCESS;
2239     }
2240 
GetHxxPrimitiveCommandSize()2241     MOS_STATUS HevcVdencPkt::GetHxxPrimitiveCommandSize()
2242     {
2243         uint32_t hcpCommandsSize  = 0;
2244         uint32_t hcpPatchListSize = 0;
2245         hcpCommandsSize =
2246             m_hcpItf->MHW_GETSIZE_F(HCP_REF_IDX_STATE)() * 2 +
2247             m_hcpItf->MHW_GETSIZE_F(HCP_WEIGHTOFFSET_STATE)() * 2 +
2248             m_hcpItf->MHW_GETSIZE_F(HCP_SLICE_STATE)() +
2249             m_hcpItf->MHW_GETSIZE_F(HCP_PAK_INSERT_OBJECT)() +
2250             m_miItf->MHW_GETSIZE_F(MI_BATCH_BUFFER_START)() * 2 +
2251             m_hcpItf->MHW_GETSIZE_F(HCP_TILE_CODING)();  // one slice cannot be with more than one tile
2252 
2253         hcpPatchListSize =
2254             mhw::vdbox::hcp::Itf::HCP_REF_IDX_STATE_CMD_NUMBER_OF_ADDRESSES * 2 +
2255             mhw::vdbox::hcp::Itf::HCP_WEIGHTOFFSET_STATE_CMD_NUMBER_OF_ADDRESSES * 2 +
2256             mhw::vdbox::hcp::Itf::HCP_SLICE_STATE_CMD_NUMBER_OF_ADDRESSES +
2257             mhw::vdbox::hcp::Itf::HCP_PAK_INSERT_OBJECT_CMD_NUMBER_OF_ADDRESSES +
2258             mhw::vdbox::hcp::Itf::MI_BATCH_BUFFER_START_CMD_NUMBER_OF_ADDRESSES * 2 +  // One is for the PAK command and another one is for the BB when BRC and single task mode are on
2259             mhw::vdbox::hcp::Itf::HCP_TILE_CODING_COMMAND_NUMBER_OF_ADDRESSES;         // HCP_TILE_CODING_STATE command
2260 
2261         uint32_t cpCmdsize = 0;
2262         uint32_t cpPatchListSize = 0;
2263         if (m_hwInterface->GetCpInterface())
2264         {
2265             m_hwInterface->GetCpInterface()->GetCpSliceLevelCmdSize(cpCmdsize, cpPatchListSize);
2266         }
2267 
2268         m_defaultSliceStatesSize = hcpCommandsSize + (uint32_t)cpCmdsize;
2269         m_defaultSlicePatchListSize = hcpPatchListSize + (uint32_t)cpPatchListSize;
2270 
2271         return MOS_STATUS_SUCCESS;
2272     }
2273 
CalculateCommandSize(uint32_t & commandBufferSize,uint32_t & requestedPatchListSize)2274     MOS_STATUS HevcVdencPkt::CalculateCommandSize(uint32_t &commandBufferSize, uint32_t &requestedPatchListSize)
2275     {
2276         m_pictureStatesSize    = m_defaultPictureStatesSize;
2277         m_picturePatchListSize = m_defaultPicturePatchListSize;
2278         m_sliceStatesSize      = m_defaultSliceStatesSize;
2279         m_slicePatchListSize   = m_defaultSlicePatchListSize;
2280 
2281         commandBufferSize      = CalculateCommandBufferSize();
2282         requestedPatchListSize = CalculatePatchListSize();
2283         return MOS_STATUS_SUCCESS;
2284     }
2285 
CalculateCommandBufferSize()2286     uint32_t HevcVdencPkt::CalculateCommandBufferSize()
2287     {
2288         ENCODE_FUNC_CALL();
2289         uint32_t commandBufferSize = 0;
2290 
2291         // To be refined later, differentiate BRC and CQP
2292         commandBufferSize =
2293             m_pictureStatesSize +
2294             (m_sliceStatesSize * m_basicFeature->m_numSlices);
2295 
2296         // 4K align since allocation is in chunks of 4K bytes.
2297         commandBufferSize = MOS_ALIGN_CEIL(commandBufferSize, CODECHAL_PAGE_SIZE);
2298 
2299         return commandBufferSize;
2300     }
2301 
CalculatePatchListSize()2302     uint32_t HevcVdencPkt::CalculatePatchListSize()
2303     {
2304         ENCODE_FUNC_CALL();
2305         uint32_t requestedPatchListSize = 0;
2306         if (m_usePatchList)
2307         {
2308             requestedPatchListSize =
2309                 m_picturePatchListSize +
2310                 (m_slicePatchListSize * m_basicFeature->m_numSlices);
2311 
2312             // Multi pipes are sharing one patchlist
2313             requestedPatchListSize *= m_pipeline->GetPipeNum();
2314         }
2315         return requestedPatchListSize;
2316     }
2317 
ReadBrcPakStatistics(PMOS_COMMAND_BUFFER cmdBuffer,EncodeReadBrcPakStatsParams * params)2318     MOS_STATUS HevcVdencPkt::ReadBrcPakStatistics(
2319         PMOS_COMMAND_BUFFER          cmdBuffer,
2320         EncodeReadBrcPakStatsParams *params)
2321     {
2322         MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2323 
2324         ENCODE_FUNC_CALL();
2325 
2326         ENCODE_CHK_NULL_RETURN(cmdBuffer);
2327         ENCODE_CHK_NULL_RETURN(params);
2328         ENCODE_CHK_NULL_RETURN(params->presBrcPakStatisticBuffer);
2329         ENCODE_CHK_NULL_RETURN(params->presStatusBuffer);
2330 
2331         ENCODE_CHK_STATUS_RETURN(ValidateVdboxIdx(m_vdboxIndex));
2332         auto mmioRegisters = m_hcpItf->GetMmioRegisters(m_vdboxIndex);
2333 
2334         auto &miStoreRegMemParams           = m_miItf->MHW_GETPAR_F(MI_STORE_REGISTER_MEM)();
2335         miStoreRegMemParams                 = {};
2336         miStoreRegMemParams.presStoreBuffer = params->presBrcPakStatisticBuffer;
2337         miStoreRegMemParams.dwOffset        = CODECHAL_OFFSETOF(CODECHAL_ENCODE_HEVC_PAK_STATS_BUFFER, HCP_BITSTREAM_BYTECOUNT_FRAME);
2338         miStoreRegMemParams.dwRegister      = mmioRegisters->hcpEncBitstreamBytecountFrameRegOffset;
2339         ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_STORE_REGISTER_MEM)(cmdBuffer));
2340 
2341         miStoreRegMemParams                 = {};
2342         miStoreRegMemParams.presStoreBuffer = params->presBrcPakStatisticBuffer;
2343         miStoreRegMemParams.dwOffset        = CODECHAL_OFFSETOF(CODECHAL_ENCODE_HEVC_PAK_STATS_BUFFER, HCP_BITSTREAM_BYTECOUNT_FRAME_NOHEADER);
2344         miStoreRegMemParams.dwRegister      = mmioRegisters->hcpEncBitstreamBytecountFrameNoHeaderRegOffset;
2345         ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_STORE_REGISTER_MEM)(cmdBuffer));
2346 
2347         miStoreRegMemParams                 = {};
2348         miStoreRegMemParams.presStoreBuffer = params->presBrcPakStatisticBuffer;
2349         miStoreRegMemParams.dwOffset        = CODECHAL_OFFSETOF(CODECHAL_ENCODE_HEVC_PAK_STATS_BUFFER, HCP_IMAGE_STATUS_CONTROL);
2350         miStoreRegMemParams.dwRegister      = mmioRegisters->hcpEncImageStatusCtrlRegOffset;
2351         ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_STORE_REGISTER_MEM)(cmdBuffer));
2352 
2353         auto &storeDataParams            = m_miItf->MHW_GETPAR_F(MI_STORE_DATA_IMM)();
2354         storeDataParams                  = {};
2355         storeDataParams.pOsResource      = params->presStatusBuffer;
2356         storeDataParams.dwResourceOffset = params->dwStatusBufNumPassesOffset;
2357         storeDataParams.dwValue          = params->ucPass;
2358         ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_STORE_DATA_IMM)(cmdBuffer));
2359 
2360         return eStatus;
2361     }
2362 
MHW_SETPAR_DECL_SRC(VDENC_CONTROL_STATE,HevcVdencPkt)2363     MHW_SETPAR_DECL_SRC(VDENC_CONTROL_STATE, HevcVdencPkt)
2364     {
2365         params.vdencInitialization = true;
2366 
2367         return MOS_STATUS_SUCCESS;
2368     }
2369 
MHW_SETPAR_DECL_SRC(VDENC_PIPE_MODE_SELECT,HevcVdencPkt)2370     MHW_SETPAR_DECL_SRC(VDENC_PIPE_MODE_SELECT, HevcVdencPkt)
2371     {
2372         //params.tlbPrefetch = true;
2373 
2374         params.pakObjCmdStreamOut = m_vdencPakObjCmdStreamOutForceEnabled? true : m_hevcPicParams->StatusReportEnable.fields.BlockStats;
2375 
2376         // needs to be enabled for 1st pass in multi-pass case
2377         // This bit is ignored if PAK only second pass is enabled.
2378         if ((m_pipeline->GetCurrentPass() == 0) && !m_pipeline->IsLastPass()
2379             || (m_basicFeature->m_422State && m_basicFeature->m_422State->GetFeature422Flag())
2380         )
2381         {
2382             params.pakObjCmdStreamOut = true;
2383         }
2384 
2385         if (!MEDIA_IS_WA(m_osInterface->pfnGetWaTable(m_osInterface), WaEnableOnlyASteppingFeatures))
2386         {
2387             params.VdencPipeModeSelectPar0 = 1;
2388         }
2389 
2390         MHW_VDBOX_HCP_MULTI_ENGINE_MODE multiEngineMode;
2391         if (m_pipeline->GetPipeNum() > 1)
2392         {
2393             // Running in the multiple VDBOX mode
2394             if (m_pipeline->IsFirstPipe())
2395             {
2396                 multiEngineMode = MHW_VDBOX_HCP_MULTI_ENGINE_MODE_LEFT;
2397             }
2398             else if (m_pipeline->IsLastPipe())
2399             {
2400                 multiEngineMode = MHW_VDBOX_HCP_MULTI_ENGINE_MODE_RIGHT;
2401             }
2402             else
2403             {
2404                 multiEngineMode = MHW_VDBOX_HCP_MULTI_ENGINE_MODE_MIDDLE;
2405             }
2406         }
2407         else
2408         {
2409             multiEngineMode = MHW_VDBOX_HCP_MULTI_ENGINE_MODE_FE_LEGACY;
2410         }
2411 
2412         // Enable RGB encoding
2413         params.rgbEncodingMode = false;
2414         params.scalabilityMode = !(multiEngineMode == MHW_VDBOX_HCP_MULTI_ENGINE_MODE_FE_LEGACY);
2415 
2416         auto waTable = m_osInterface->pfnGetWaTable(m_osInterface);
2417         ENCODE_CHK_NULL_RETURN(waTable);
2418 
2419         if (MEDIA_IS_WA(waTable, Wa_22011549751) &&
2420             !m_osInterface->bSimIsActive &&
2421             !m_basicFeature->m_hevcPicParams->pps_curr_pic_ref_enabled_flag)
2422         {
2423             params.hmeRegionPrefetch = m_basicFeature->m_hevcPicParams->CodingType != I_TYPE;
2424         }
2425 
2426         if (MEDIA_IS_WA(waTable, Wa_14012254246))
2427         {
2428             params.hmeRegionPrefetch        = 0;
2429             params.leftPrefetchAtWrapAround = 0;
2430         }
2431 
2432         return MOS_STATUS_SUCCESS;
2433     }
2434 
MHW_SETPAR_DECL_SRC(VDENC_PIPE_BUF_ADDR_STATE,HevcVdencPkt)2435     MHW_SETPAR_DECL_SRC(VDENC_PIPE_BUF_ADDR_STATE, HevcVdencPkt)
2436     {
2437         params.intraRowStoreScratchBuffer       = m_vdencIntraRowStoreScratch;
2438         params.tileRowStoreBuffer               = m_vdencTileRowStoreBuffer;
2439         params.cumulativeCuCountStreamOutBuffer = m_resCumulativeCuCountStreamoutBuffer;
2440 
2441         return MOS_STATUS_SUCCESS;
2442     }
2443 
MHW_SETPAR_DECL_SRC(VD_PIPELINE_FLUSH,HevcVdencPkt)2444     MHW_SETPAR_DECL_SRC(VD_PIPELINE_FLUSH, HevcVdencPkt)
2445     {
2446         switch (m_flushCmd)
2447         {
2448         case waitHevc:
2449             params.waitDoneHEVC           = true;
2450             params.flushHEVC              = true;
2451             params.waitDoneVDCmdMsgParser = true;
2452             break;
2453         case waitVdenc:
2454             params.waitDoneMFX            = true;
2455             params.waitDoneVDENC          = true;
2456             params.flushVDENC             = true;
2457             params.waitDoneVDCmdMsgParser = true;
2458             break;
2459         case waitHevcVdenc:
2460             params.waitDoneMFX            = true;
2461             params.waitDoneVDENC          = true;
2462             params.flushVDENC             = true;
2463             params.flushHEVC              = true;
2464             params.waitDoneVDCmdMsgParser = true;
2465             break;
2466         }
2467 
2468         return MOS_STATUS_SUCCESS;
2469     }
2470 
MHW_SETPAR_DECL_SRC(HCP_SURFACE_STATE,HevcVdencPkt)2471     MHW_SETPAR_DECL_SRC(HCP_SURFACE_STATE, HevcVdencPkt)
2472     {
2473         params.surfaceStateId = m_curHcpSurfStateId;
2474 
2475         return MOS_STATUS_SUCCESS;
2476     }
2477 
MHW_SETPAR_DECL_SRC(VDENC_HEVC_VP9_TILE_SLICE_STATE,HevcVdencPkt)2478     MHW_SETPAR_DECL_SRC(VDENC_HEVC_VP9_TILE_SLICE_STATE, HevcVdencPkt)
2479     {
2480         uint32_t dwNumberOfPipes = 0;
2481         switch (m_pipeline->GetPipeNum())
2482         {
2483         case 0:
2484         case 1:
2485             dwNumberOfPipes = VDENC_PIPE_SINGLE_PIPE;
2486             break;
2487         case 2:
2488             dwNumberOfPipes = VDENC_PIPE_TWO_PIPE;
2489             break;
2490         case 4:
2491             dwNumberOfPipes = VDENC_PIPE_FOUR_PIPE;
2492             break;
2493         default:
2494             dwNumberOfPipes = VDENC_PIPE_INVALID;
2495             ENCODE_ASSERT(false);
2496             break;
2497         }
2498 
2499         params.numPipe = dwNumberOfPipes;
2500 
2501         return MOS_STATUS_SUCCESS;
2502     }
2503 
PrepareHWMetaData(MOS_COMMAND_BUFFER * cmdBuffer)2504     MOS_STATUS HevcVdencPkt::PrepareHWMetaData(MOS_COMMAND_BUFFER *cmdBuffer)
2505     {
2506         ENCODE_FUNC_CALL();
2507         MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2508 
2509         ENCODE_CHK_NULL_RETURN(m_basicFeature);
2510         if (!m_basicFeature->m_resMetadataBuffer)
2511         {
2512             return MOS_STATUS_SUCCESS;
2513         }
2514 
2515         // Intra/Inter/Skip CU Cnt
2516         auto xCalAtomic = [&](PMOS_RESOURCE presDst, uint32_t dstOffset, PMOS_RESOURCE presSrc, uint32_t srcOffset, mhw::mi::MHW_COMMON_MI_ATOMIC_OPCODE opCode) {
2517             auto  mmioRegisters      = m_hwInterface->GetVdencInterfaceNext()->GetMmioRegisters(m_vdboxIndex);
2518             auto &miLoadRegMemParams = m_miItf->MHW_GETPAR_F(MI_LOAD_REGISTER_MEM)();
2519             auto &flushDwParams      = m_miItf->MHW_GETPAR_F(MI_FLUSH_DW)();
2520             auto &atomicParams       = m_miItf->MHW_GETPAR_F(MI_ATOMIC)();
2521 
2522             miLoadRegMemParams = {};
2523             flushDwParams      = {};
2524             atomicParams       = {};
2525 
2526             miLoadRegMemParams.presStoreBuffer = presSrc;
2527             miLoadRegMemParams.dwOffset        = srcOffset;
2528             miLoadRegMemParams.dwRegister      = mmioRegisters->generalPurposeRegister0LoOffset;
2529             ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_LOAD_REGISTER_MEM)(cmdBuffer));
2530 
2531             ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_FLUSH_DW)(cmdBuffer));
2532 
2533             atomicParams.pOsResource      = presDst;
2534             atomicParams.dwResourceOffset = dstOffset;
2535             atomicParams.dwDataSize       = sizeof(uint32_t);
2536             atomicParams.Operation        = opCode;
2537             ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_ATOMIC)(cmdBuffer));
2538 
2539             return MOS_STATUS_SUCCESS;
2540         };
2541 
2542         MetaDataOffset resourceOffset = m_basicFeature->m_metaDataOffset;
2543         PMOS_RESOURCE  resLcuBaseAddressBuffer = m_basicFeature->m_recycleBuf->GetBuffer(LcuBaseAddressBuffer, 0);
2544         ENCODE_CHK_NULL_RETURN(resLcuBaseAddressBuffer);
2545 
2546         auto &storeDataParams            = m_miItf->MHW_GETPAR_F(MI_STORE_DATA_IMM)();
2547         storeDataParams                  = {};
2548         storeDataParams.pOsResource      = m_basicFeature->m_resMetadataBuffer;
2549         storeDataParams.dwResourceOffset = resourceOffset.dwEncodeErrorFlags;
2550         storeDataParams.dwValue          = 0;
2551         ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_STORE_DATA_IMM)(cmdBuffer));
2552 
2553         storeDataParams.dwResourceOffset = resourceOffset.dwWrittenSubregionsCount;
2554         storeDataParams.dwValue          = m_basicFeature->m_numSlices;
2555         ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_STORE_DATA_IMM)(cmdBuffer));
2556 
2557         auto &miCpyMemMemParams   = m_miItf->MHW_GETPAR_F(MI_COPY_MEM_MEM)();
2558         miCpyMemMemParams         = {};
2559         miCpyMemMemParams.presSrc = resLcuBaseAddressBuffer;
2560         miCpyMemMemParams.presDst = m_basicFeature->m_resMetadataBuffer;
2561 
2562         for (uint16_t slcCount = 0; slcCount < m_basicFeature->m_numSlices; slcCount++)
2563         {
2564             uint32_t subRegionStartOffset = resourceOffset.dwMetaDataSize + slcCount * resourceOffset.dwMetaDataSubRegionSize;
2565 
2566             storeDataParams.dwResourceOffset = subRegionStartOffset + resourceOffset.dwbStartOffset;
2567             storeDataParams.dwValue          = 0;
2568             ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_STORE_DATA_IMM)(cmdBuffer));
2569 
2570             storeDataParams.dwResourceOffset = subRegionStartOffset + resourceOffset.dwbHeaderSize;
2571             storeDataParams.dwValue          = m_basicFeature->m_slcData[slcCount].BitSize;
2572             ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_STORE_DATA_IMM)(cmdBuffer));
2573 
2574             miCpyMemMemParams.presSrc     = resLcuBaseAddressBuffer;
2575             miCpyMemMemParams.presDst     = m_basicFeature->m_resMetadataBuffer;
2576             miCpyMemMemParams.dwSrcOffset = slcCount * 16 * sizeof(uint32_t);  //slice size offset in resLcuBaseAddressBuffer is 16DW
2577             miCpyMemMemParams.dwDstOffset = subRegionStartOffset + resourceOffset.dwbSize;
2578             ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_COPY_MEM_MEM)(cmdBuffer));
2579             if (slcCount)
2580             {
2581                 ENCODE_CHK_STATUS_RETURN(xCalAtomic(
2582                     m_basicFeature->m_resMetadataBuffer,
2583                     subRegionStartOffset + resourceOffset.dwbSize,
2584                     resLcuBaseAddressBuffer,
2585                     (slcCount - 1) * 16 * sizeof(uint32_t),
2586                     mhw::mi::MHW_MI_ATOMIC_SUB));
2587             }
2588         }
2589 
2590         auto mmioRegisters                = m_hcpItf->GetMmioRegisters(m_vdboxIndex);
2591         auto &storeRegMemParams           = m_miItf->MHW_GETPAR_F(MI_STORE_REGISTER_MEM)();
2592         storeRegMemParams                 = {};
2593         storeRegMemParams.presStoreBuffer = m_basicFeature->m_resMetadataBuffer;
2594         storeRegMemParams.dwOffset        = resourceOffset.dwEncodedBitstreamWrittenBytesCount;
2595         storeRegMemParams.dwRegister      = mmioRegisters->hcpEncBitstreamBytecountFrameRegOffset;
2596         ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_STORE_REGISTER_MEM)(cmdBuffer));
2597 
2598         // Statistics
2599         // Average QP
2600         if (m_hevcSeqParams->RateControlMethod == RATECONTROL_CQP)
2601         {
2602             storeDataParams.dwResourceOffset = resourceOffset.dwEncodeStats + resourceOffset.dwAverageQP;
2603             storeDataParams.dwValue          = m_hevcPicParams->QpY + m_hevcSliceParams->slice_qp_delta;
2604             ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_STORE_DATA_IMM)(cmdBuffer));
2605         }
2606         else
2607         {
2608             auto brcFeature = dynamic_cast<HEVCEncodeBRC *>(m_featureManager->GetFeature(HevcFeatureIDs::hevcBrcFeature));
2609             ENCODE_CHK_NULL_RETURN(brcFeature);
2610 
2611             miCpyMemMemParams.presSrc     = brcFeature->GetHevcVdenc2ndLevelBatchBuffer(m_pipeline->m_currRecycledBufIdx);
2612             miCpyMemMemParams.dwSrcOffset = 0x6F * sizeof(uint32_t);
2613             miCpyMemMemParams.presDst     = m_basicFeature->m_resMetadataBuffer;
2614             miCpyMemMemParams.dwDstOffset = resourceOffset.dwEncodeStats + resourceOffset.dwAverageQP;
2615             ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_COPY_MEM_MEM)(cmdBuffer));
2616 
2617             auto &atomicParams             = m_miItf->MHW_GETPAR_F(MI_ATOMIC)();
2618             atomicParams                   = {};
2619             atomicParams.pOsResource       = m_basicFeature->m_resMetadataBuffer;
2620             atomicParams.dwResourceOffset  = resourceOffset.dwEncodeStats + resourceOffset.dwAverageQP;
2621             atomicParams.dwDataSize        = sizeof(uint32_t);
2622             atomicParams.Operation         = mhw::mi::MHW_MI_ATOMIC_AND;
2623             atomicParams.bInlineData       = true;
2624             atomicParams.dwOperand1Data[0] = 0xFF;
2625             ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_ATOMIC)(cmdBuffer));
2626         }
2627 
2628         PMOS_RESOURCE resFrameStatStreamOutBuffer = m_basicFeature->m_recycleBuf->GetBuffer(FrameStatStreamOutBuffer, 0);
2629         ENCODE_CHK_NULL_RETURN(resFrameStatStreamOutBuffer);
2630 
2631         // LCUSkipIn8x8Unit
2632         miCpyMemMemParams.presSrc     = resFrameStatStreamOutBuffer;
2633         miCpyMemMemParams.dwSrcOffset = 7 * sizeof(uint32_t);
2634         miCpyMemMemParams.presDst     = m_basicFeature->m_resMetadataBuffer;
2635         miCpyMemMemParams.dwDstOffset = resourceOffset.dwEncodeStats + resourceOffset.dwSkipCodingUnitsCount;
2636         ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_COPY_MEM_MEM)(cmdBuffer));
2637         ENCODE_CHK_STATUS_RETURN(xCalAtomic(m_basicFeature->m_resMetadataBuffer, resourceOffset.dwEncodeStats + resourceOffset.dwSkipCodingUnitsCount, resFrameStatStreamOutBuffer, 7 * sizeof(uint32_t), mhw::mi::MHW_MI_ATOMIC_ADD));
2638         ENCODE_CHK_STATUS_RETURN(xCalAtomic(m_basicFeature->m_resMetadataBuffer, resourceOffset.dwEncodeStats + resourceOffset.dwSkipCodingUnitsCount, resFrameStatStreamOutBuffer, 7 * sizeof(uint32_t), mhw::mi::MHW_MI_ATOMIC_ADD));
2639         ENCODE_CHK_STATUS_RETURN(xCalAtomic(m_basicFeature->m_resMetadataBuffer, resourceOffset.dwEncodeStats + resourceOffset.dwSkipCodingUnitsCount, resFrameStatStreamOutBuffer, 7 * sizeof(uint32_t), mhw::mi::MHW_MI_ATOMIC_ADD));
2640 
2641         // NumCU_IntraDC, NumCU_IntraPlanar, NumCU_IntraAngular
2642         miCpyMemMemParams.presSrc     = resFrameStatStreamOutBuffer;
2643         miCpyMemMemParams.dwSrcOffset = 20 * sizeof(uint32_t);
2644         miCpyMemMemParams.dwDstOffset = resourceOffset.dwEncodeStats + resourceOffset.dwIntraCodingUnitsCount;
2645         ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_COPY_MEM_MEM)(cmdBuffer));
2646         ENCODE_CHK_STATUS_RETURN(xCalAtomic(m_basicFeature->m_resMetadataBuffer, resourceOffset.dwEncodeStats + resourceOffset.dwIntraCodingUnitsCount, resFrameStatStreamOutBuffer, 21 * sizeof(uint32_t), mhw::mi::MHW_MI_ATOMIC_ADD));
2647         ENCODE_CHK_STATUS_RETURN(xCalAtomic(m_basicFeature->m_resMetadataBuffer, resourceOffset.dwEncodeStats + resourceOffset.dwIntraCodingUnitsCount, resFrameStatStreamOutBuffer, 22 * sizeof(uint32_t), mhw::mi::MHW_MI_ATOMIC_ADD));
2648 
2649         //NumCU_Merge (LCUSkipIn8x8Unit), NumCU_MVdirL0, NumCU_MVdirL1, NumCU_MVdirBi
2650         miCpyMemMemParams.presSrc     = resFrameStatStreamOutBuffer;
2651         miCpyMemMemParams.dwSrcOffset = 27 * sizeof(uint32_t);
2652         miCpyMemMemParams.dwDstOffset = resourceOffset.dwEncodeStats + resourceOffset.dwInterCodingUnitsCount;
2653         ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_COPY_MEM_MEM)(cmdBuffer));
2654         ENCODE_CHK_STATUS_RETURN(xCalAtomic(m_basicFeature->m_resMetadataBuffer, resourceOffset.dwEncodeStats + resourceOffset.dwInterCodingUnitsCount, resFrameStatStreamOutBuffer, 28 * sizeof(uint32_t), mhw::mi::MHW_MI_ATOMIC_ADD));
2655         ENCODE_CHK_STATUS_RETURN(xCalAtomic(m_basicFeature->m_resMetadataBuffer, resourceOffset.dwEncodeStats + resourceOffset.dwInterCodingUnitsCount, resFrameStatStreamOutBuffer, 29 * sizeof(uint32_t), mhw::mi::MHW_MI_ATOMIC_ADD));
2656         ENCODE_CHK_STATUS_RETURN(xCalAtomic(m_basicFeature->m_resMetadataBuffer, resourceOffset.dwEncodeStats + resourceOffset.dwInterCodingUnitsCount, resFrameStatStreamOutBuffer, 30 * sizeof(uint32_t), mhw::mi::MHW_MI_ATOMIC_ADD));
2657         ENCODE_CHK_STATUS_RETURN(xCalAtomic(m_basicFeature->m_resMetadataBuffer, resourceOffset.dwEncodeStats + resourceOffset.dwInterCodingUnitsCount, m_basicFeature->m_resMetadataBuffer, resourceOffset.dwEncodeStats + resourceOffset.dwSkipCodingUnitsCount, mhw::mi::MHW_MI_ATOMIC_SUB));
2658 
2659         // Average MV_X/MV_Y, report (0,0) as temp solution, later may need kernel involved
2660         storeDataParams.dwResourceOffset = resourceOffset.dwEncodeStats + resourceOffset.dwAverageMotionEstimationXDirection;
2661         storeDataParams.dwValue          = 0;
2662         ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_STORE_DATA_IMM)(cmdBuffer));
2663 
2664         storeDataParams.dwResourceOffset = resourceOffset.dwEncodeStats + resourceOffset.dwAverageMotionEstimationYDirection;
2665         storeDataParams.dwValue          = 0;
2666         ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_STORE_DATA_IMM)(cmdBuffer));
2667 
2668         return eStatus;
2669     }
2670 
2671 #if USE_CODECHAL_DEBUG_TOOL
DumpInput()2672     MOS_STATUS HevcVdencPkt::DumpInput()
2673     {
2674         ENCODE_FUNC_CALL();
2675         ENCODE_CHK_NULL_RETURN(m_pipeline);
2676         ENCODE_CHK_NULL_RETURN(m_basicFeature);
2677 
2678         CodechalDebugInterface *debugInterface = m_pipeline->GetDebugInterface();
2679         ENCODE_CHK_NULL_RETURN(debugInterface);
2680 
2681         debugInterface->m_DumpInputNum         = m_basicFeature->m_frameNum - 1;
2682 
2683         ENCODE_CHK_NULL_RETURN(m_basicFeature->m_ref.GetCurrRefList());
2684         CODEC_REF_LIST currRefList = *((CODEC_REF_LIST *)m_basicFeature->m_ref.GetCurrRefList());
2685 
2686         ENCODE_CHK_STATUS_RETURN(debugInterface->DumpYUVSurface(
2687             &currRefList.sRefRawBuffer,
2688             CodechalDbgAttr::attrEncodeRawInputSurface,
2689             "SrcSurf"))
2690         return MOS_STATUS_SUCCESS;
2691     }
2692 
DumpResources(EncodeStatusMfx * encodeStatusMfx,EncodeStatusReportData * statusReportData)2693     MOS_STATUS HevcVdencPkt::DumpResources(
2694         EncodeStatusMfx *       encodeStatusMfx,
2695         EncodeStatusReportData *statusReportData)
2696     {
2697         ENCODE_FUNC_CALL();
2698         ENCODE_CHK_NULL_RETURN(encodeStatusMfx);
2699         ENCODE_CHK_NULL_RETURN(statusReportData);
2700         ENCODE_CHK_NULL_RETURN(m_pipeline);
2701         ENCODE_CHK_NULL_RETURN(m_statusReport);
2702         ENCODE_CHK_NULL_RETURN(m_basicFeature);
2703         ENCODE_CHK_NULL_RETURN(m_basicFeature->m_trackedBuf);
2704 
2705         CodechalDebugInterface *debugInterface = m_pipeline->GetStatusReportDebugInterface();
2706         ENCODE_CHK_NULL_RETURN(debugInterface);
2707 
2708         CODEC_REF_LIST currRefList = *((CODEC_REF_LIST *)statusReportData->currRefList);
2709         currRefList.RefPic         = statusReportData->currOriginalPic;
2710 
2711         debugInterface->m_currPic            = statusReportData->currOriginalPic;
2712         debugInterface->m_bufferDumpFrameNum = m_basicFeature->m_frameNum - 1;
2713         debugInterface->m_frameType          = encodeStatusMfx->pictureCodingType;
2714 
2715         ENCODE_CHK_STATUS_RETURN(debugInterface->DumpBuffer(
2716             &currRefList.resBitstreamBuffer,
2717             CodechalDbgAttr::attrBitstream,
2718             "_PAK",
2719             statusReportData->bitstreamSize,
2720             0,
2721             CODECHAL_NUM_MEDIA_STATES));
2722 
2723         ENCODE_CHK_STATUS_RETURN(debugInterface->DumpData(
2724             statusReportData,
2725             sizeof(EncodeStatusReportData),
2726             CodechalDbgAttr::attrStatusReport,
2727             "EncodeStatusReport_Buffer"));
2728 
2729         PMOS_RESOURCE frameStatStreamOutBuffer = m_basicFeature->m_recycleBuf->GetBuffer(FrameStatStreamOutBuffer, 0);
2730         ENCODE_CHK_NULL_RETURN(frameStatStreamOutBuffer);
2731         ENCODE_CHK_STATUS_RETURN(debugInterface->DumpBuffer(
2732             frameStatStreamOutBuffer,
2733             CodechalDbgAttr::attrFrameState,
2734             "FrameStatus",
2735             frameStatStreamOutBuffer->iSize,
2736             0,
2737             CODECHAL_NUM_MEDIA_STATES));
2738 
2739         MOS_SURFACE *ds4xSurface = m_basicFeature->m_trackedBuf->GetSurface(
2740             BufferType::ds4xSurface, currRefList.ucScalingIdx);
2741 
2742         if (ds4xSurface != nullptr)
2743         {
2744             ENCODE_CHK_STATUS_RETURN(debugInterface->DumpYUVSurface(
2745                 ds4xSurface,
2746                 CodechalDbgAttr::attrReconstructedSurface,
2747                 "4xScaledSurf"))
2748         }
2749 
2750         MOS_SURFACE *ds8xSurface = m_basicFeature->m_trackedBuf->GetSurface(
2751             BufferType::ds8xSurface, currRefList.ucScalingIdx);
2752 
2753         if (ds8xSurface != nullptr)
2754         {
2755             ENCODE_CHK_STATUS_RETURN(debugInterface->DumpYUVSurface(
2756                 ds8xSurface,
2757                 CodechalDbgAttr::attrReconstructedSurface,
2758                 "8xScaledSurf"))
2759         }
2760 
2761         MOS_RESOURCE *mbCodedBuffer = m_basicFeature->m_trackedBuf->GetBuffer(
2762             BufferType::mbCodedBuffer, currRefList.ucScalingIdx);
2763         if (mbCodedBuffer != nullptr)
2764         {
2765             ENCODE_CHK_STATUS_RETURN(debugInterface->DumpBuffer(
2766                 mbCodedBuffer,
2767                 CodechalDbgAttr::attrVdencOutput,
2768                 "_MbCode",
2769                 m_basicFeature->m_mbCodeSize,
2770                 0,
2771                 CODECHAL_NUM_MEDIA_STATES));
2772         }
2773 
2774         // Slice Size Conformance
2775         if (m_hevcSeqParams->SliceSizeControl)
2776         {
2777             uint32_t dwSize = CODECHAL_HEVC_MAX_NUM_SLICES_LVL_6 * CODECHAL_CACHELINE_SIZE;
2778             if (!m_hevcPicParams->tiles_enabled_flag || m_pipeline->GetPipeNum() <= 1)
2779             {
2780                 // Slice Size StreamOut Surface
2781                 ENCODE_CHK_STATUS_RETURN(debugInterface->DumpBuffer(
2782                     m_basicFeature->m_recycleBuf->GetBuffer(LcuBaseAddressBuffer, 0),
2783                     CodechalDbgAttr::attrVdencOutput,
2784                     "_SliceSize",
2785                     dwSize,
2786                     0,
2787                     CODECHAL_NUM_MEDIA_STATES));
2788             }
2789 
2790             dwSize          = MOS_ALIGN_CEIL(4, CODECHAL_CACHELINE_SIZE);
2791             auto dssFeature = dynamic_cast<HevcEncodeDss *>(m_featureManager->GetFeature(HevcFeatureIDs::hevcVdencDssFeature));
2792             ENCODE_CHK_NULL_RETURN(dssFeature);
2793             PMOS_RESOURCE resSliceCountBuffer     = nullptr;
2794             PMOS_RESOURCE resVDEncModeTimerBuffer = nullptr;
2795             ENCODE_CHK_STATUS_RETURN(dssFeature->GetDssBuffer(resSliceCountBuffer, resVDEncModeTimerBuffer));
2796             // Slice Count buffer 1 DW = 4 Bytes
2797             ENCODE_CHK_STATUS_RETURN(debugInterface->DumpBuffer(
2798                 resSliceCountBuffer,
2799                 CodechalDbgAttr::attrVdencOutput,
2800                 "_SliceCount",
2801                 dwSize,
2802                 0,
2803                 CODECHAL_NUM_MEDIA_STATES));
2804 
2805             // VDEncMode Timer buffer 1 DW = 4 Bytes
2806             ENCODE_CHK_STATUS_RETURN(debugInterface->DumpBuffer(
2807                 resVDEncModeTimerBuffer,
2808                 CodechalDbgAttr::attrVdencOutput,
2809                 "_ModeTimer",
2810                 dwSize,
2811                 0,
2812                 CODECHAL_NUM_MEDIA_STATES));
2813         }
2814 
2815         auto          streamInBufferSize = (MOS_ALIGN_CEIL(m_basicFeature->m_frameWidth, 64) / 32) * (MOS_ALIGN_CEIL(m_basicFeature->m_frameHeight, 64) / 32) * CODECHAL_CACHELINE_SIZE;
2816         PMOS_RESOURCE streamInbuffer     = m_basicFeature->m_recycleBuf->GetBuffer(RecycleResId::StreamInBuffer, debugInterface->m_bufferDumpFrameNum);
2817         if (streamInbuffer)
2818         {
2819             ENCODE_CHK_STATUS_RETURN(debugInterface->DumpBuffer(
2820                 streamInbuffer,
2821                 CodechalDbgAttr::attrStreamIn,
2822                 "_ROIStreamin",
2823                 streamInBufferSize,
2824                 0,
2825                 CODECHAL_NUM_MEDIA_STATES))
2826         }
2827 
2828         ENCODE_CHK_STATUS_RETURN(debugInterface->DumpBltOutput(
2829             &currRefList.sRefReconBuffer,
2830             CodechalDbgAttr::attrDecodeBltOutput));
2831         ENCODE_CHK_STATUS_RETURN(debugInterface->DumpYUVSurface(
2832             &currRefList.sRefReconBuffer,
2833             CodechalDbgAttr::attrReconstructedSurface,
2834             "ReconSurf"))
2835 
2836         ENCODE_CHK_STATUS_RETURN(debugInterface->DumpBltOutput(
2837             &currRefList.sRefRawBuffer,
2838             CodechalDbgAttr::attrDecodeBltOutput));
2839 
2840         return MOS_STATUS_SUCCESS;
2841     }
2842 
2843 #endif
2844 
MHW_SETPAR_DECL_SRC(HCP_PIPE_MODE_SELECT,HevcVdencPkt)2845     MHW_SETPAR_DECL_SRC(HCP_PIPE_MODE_SELECT, HevcVdencPkt)
2846     {
2847         params.codecStandardSelect = CodecHal_GetStandardFromMode(m_basicFeature->m_mode) - CODECHAL_HCP_BASE;
2848         params.bStreamOutEnabled   = true;
2849         params.bVdencEnabled       = true;
2850         params.codecSelect         = 1;
2851 
2852         if (m_pipeline->GetPipeNum() > 1)
2853         {
2854             // Running in the multiple VDBOX mode
2855             if (m_pipeline->IsFirstPipe())
2856             {
2857                 params.multiEngineMode = MHW_VDBOX_HCP_MULTI_ENGINE_MODE_LEFT;
2858             }
2859             else if (m_pipeline->IsLastPipe())
2860             {
2861                 params.multiEngineMode = MHW_VDBOX_HCP_MULTI_ENGINE_MODE_RIGHT;
2862             }
2863             else
2864             {
2865                 params.multiEngineMode = MHW_VDBOX_HCP_MULTI_ENGINE_MODE_MIDDLE;
2866             }
2867             params.pipeWorkMode = MHW_VDBOX_HCP_PIPE_WORK_MODE_CODEC_BE;
2868         }
2869         else
2870         {
2871             params.multiEngineMode = MHW_VDBOX_HCP_MULTI_ENGINE_MODE_FE_LEGACY;
2872             params.pipeWorkMode    = MHW_VDBOX_HCP_PIPE_WORK_MODE_LEGACY;
2873         }
2874 
2875         if (m_hevcPicParams->tiles_enabled_flag)
2876         {
2877             RUN_FEATURE_INTERFACE_RETURN(HevcEncodeTile, HevcFeatureIDs::encodeTile, IsTileReplayEnabled, params.bTileBasedReplayMode);
2878         }
2879         else
2880         {
2881             params.bTileBasedReplayMode = 0;
2882         }
2883 
2884         auto cpInterface     = m_hwInterface->GetCpInterface();
2885         bool twoPassScalable = params.multiEngineMode != MHW_VDBOX_HCP_MULTI_ENGINE_MODE_FE_LEGACY && !params.bTileBasedReplayMode;
2886 
2887         ENCODE_CHK_NULL_RETURN(cpInterface);
2888         params.setProtectionSettings = [=](uint32_t *data) { return cpInterface->SetProtectionSettingsForHcpPipeModeSelect(data, twoPassScalable); };
2889 
2890         auto waTable = m_osInterface->pfnGetWaTable(m_osInterface);
2891         ENCODE_CHK_NULL_RETURN(waTable);
2892 
2893         if(MEDIA_IS_WA(waTable, Wa_14012254246))
2894         {
2895             MediaUserSetting::Value outValue;
2896             ReadUserSetting(
2897                 m_userSettingPtr,
2898                 outValue,
2899                 "DisableTlbPrefetch",
2900                 MediaUserSetting::Group::Sequence);
2901             params.prefetchDisable = outValue.Get<bool>();
2902         }
2903 
2904         return MOS_STATUS_SUCCESS;
2905     }
2906 
MHW_SETPAR_DECL_SRC(HCP_TILE_CODING,HevcVdencPkt)2907     MHW_SETPAR_DECL_SRC(HCP_TILE_CODING, HevcVdencPkt)
2908     {
2909         ENCODE_FUNC_CALL();
2910         params.numberOfActiveBePipes = m_pipeline->GetPipeNum();
2911         return MOS_STATUS_SUCCESS;
2912     }
2913 
AddAllCmds_HCP_PAK_INSERT_OBJECT(PMOS_COMMAND_BUFFER cmdBuffer) const2914     MOS_STATUS HevcVdencPkt::AddAllCmds_HCP_PAK_INSERT_OBJECT(PMOS_COMMAND_BUFFER cmdBuffer) const
2915     {
2916         ENCODE_FUNC_CALL();
2917 
2918         ENCODE_CHK_NULL_RETURN(cmdBuffer);
2919 
2920         bool bLastPicInSeq    = m_basicFeature->m_lastPicInSeq;
2921         bool bLastPicInStream = m_basicFeature->m_lastPicInStream;
2922         auto &params = m_hcpItf->MHW_GETPAR_F(HCP_PAK_INSERT_OBJECT)();
2923         params       = {};
2924 
2925         if (bLastPicInSeq && bLastPicInStream)
2926         {
2927             params = {};
2928 
2929             uint32_t dwPadding[3];
2930 
2931             params.dwPadding                   = sizeof(dwPadding) / sizeof(dwPadding[0]);
2932             params.bHeaderLengthExcludeFrmSize = 0;
2933             params.bEndOfSlice                 = 1;
2934             params.bLastHeader                 = 1;
2935             params.bEmulationByteBitsInsert    = 0;
2936             params.uiSkipEmulationCheckCount   = 0;
2937             params.dataBitsInLastDw            = 16;
2938             params.databyteoffset              = 0;
2939             params.bIndirectPayloadEnable      = 0;
2940 
2941             m_hcpItf->MHW_ADDCMD_F(HCP_PAK_INSERT_OBJECT)(cmdBuffer);
2942 
2943             dwPadding[0] = (uint32_t)((1 << 16) | ((HEVC_NAL_UT_EOS << 1) << 24));
2944             dwPadding[1] = (1L | (1L << 24));
2945             dwPadding[2] = (HEVC_NAL_UT_EOB << 1) | (1L << 8);
2946             MHW_MI_CHK_STATUS(Mhw_AddCommandCmdOrBB(m_osInterface, cmdBuffer, nullptr, &dwPadding[0], sizeof(dwPadding)));
2947         }
2948         else if (bLastPicInSeq || bLastPicInStream)
2949         {
2950             params = {};
2951             uint32_t dwLastPicInSeqData[2], dwLastPicInStreamData[2];
2952 
2953             params.dwPadding                   = bLastPicInSeq * 2 + bLastPicInStream * 2;
2954             params.bHeaderLengthExcludeFrmSize = 0;
2955             params.bEndOfSlice                 = 1;
2956             params.bLastHeader                 = 1;
2957             params.bEmulationByteBitsInsert    = 0;
2958             params.uiSkipEmulationCheckCount   = 0;
2959             params.dataBitsInLastDw            = 8;
2960             params.databyteoffset              = 0;
2961             params.bIndirectPayloadEnable      = 0;
2962 
2963             m_hcpItf->MHW_ADDCMD_F(HCP_PAK_INSERT_OBJECT)(cmdBuffer);
2964 
2965             if (bLastPicInSeq)
2966             {
2967                 dwLastPicInSeqData[0] = (uint32_t)((1 << 16) | ((HEVC_NAL_UT_EOS << 1) << 24));
2968                 dwLastPicInSeqData[1] = 1;  // nuh_temporal_id_plus1
2969                 MHW_MI_CHK_STATUS(Mhw_AddCommandCmdOrBB(m_osInterface, cmdBuffer, nullptr, &dwLastPicInSeqData[0], sizeof(dwLastPicInSeqData)));
2970             }
2971 
2972             if (bLastPicInStream)
2973             {
2974                 dwLastPicInStreamData[0] = (uint32_t)((1 << 16) | ((HEVC_NAL_UT_EOB << 1) << 24));
2975                 dwLastPicInStreamData[1] = 1;  // nuh_temporal_id_plus1
2976                 MHW_MI_CHK_STATUS(Mhw_AddCommandCmdOrBB(m_osInterface, cmdBuffer, nullptr, &dwLastPicInStreamData[0], sizeof(dwLastPicInStreamData)));
2977             }
2978         }
2979         else
2980         {
2981             PCODECHAL_NAL_UNIT_PARAMS *ppNalUnitParams = (CODECHAL_NAL_UNIT_PARAMS **)m_nalUnitParams;
2982 
2983             auto brcFeature = dynamic_cast<HEVCEncodeBRC *>(m_featureManager->GetFeature(HevcFeatureIDs::hevcBrcFeature));
2984             ENCODE_CHK_NULL_RETURN(brcFeature);
2985 
2986             PMHW_BATCH_BUFFER batchBuffer = brcFeature->GetVdenc2ndLevelBatchBuffer(m_pipeline->m_currRecycledBufIdx);
2987             PBSBuffer         pBsBuffer   = &(m_basicFeature->m_bsBuffer);
2988             uint32_t          bitSize     = 0;
2989             uint32_t          offSet      = 0;
2990 
2991             //insert AU, SPS, PSP headers before first slice header
2992             if (m_basicFeature->m_curNumSlices == 0)
2993             {
2994                 uint32_t maxBytesInPakInsertObjCmd = ((2 << 11) - 1) * 4;  // 12 bits for Length field in PAK_INSERT_OBJ cmd
2995 
2996                 for (auto i = 0; i < HEVC_MAX_NAL_UNIT_TYPE; i++)
2997                 {
2998                     uint32_t nalunitPosiSize   = ppNalUnitParams[i]->uiSize;
2999                     uint32_t nalunitPosiOffset = ppNalUnitParams[i]->uiOffset;
3000 
3001                     while (nalunitPosiSize > 0)
3002                     {
3003                         bitSize = MOS_MIN(maxBytesInPakInsertObjCmd * 8, nalunitPosiSize * 8);
3004                         offSet  = nalunitPosiOffset;
3005 
3006                         params = {};
3007 
3008                         params.dwPadding                 = (MOS_ALIGN_CEIL((bitSize + 7) >> 3, sizeof(uint32_t))) / sizeof(uint32_t);
3009                         params.bEmulationByteBitsInsert  = ppNalUnitParams[i]->bInsertEmulationBytes;
3010                         params.uiSkipEmulationCheckCount = ppNalUnitParams[i]->uiSkipEmulationCheckCount;
3011                         params.dataBitsInLastDw          = bitSize % 32;
3012                         if (params.dataBitsInLastDw == 0)
3013                         {
3014                             params.dataBitsInLastDw = 32;
3015                         }
3016 
3017                         if (nalunitPosiSize > maxBytesInPakInsertObjCmd)
3018                         {
3019                             nalunitPosiSize -= maxBytesInPakInsertObjCmd;
3020                             nalunitPosiOffset += maxBytesInPakInsertObjCmd;
3021                         }
3022                         else
3023                         {
3024                             nalunitPosiSize = 0;
3025                         }
3026                         m_hcpItf->MHW_ADDCMD_F(HCP_PAK_INSERT_OBJECT)(cmdBuffer);
3027                         uint32_t byteSize = (bitSize + 7) >> 3;
3028                         if (byteSize)
3029                         {
3030                             MHW_MI_CHK_NULL(pBsBuffer);
3031                             MHW_MI_CHK_NULL(pBsBuffer->pBase);
3032                             uint8_t *data = (uint8_t *)(pBsBuffer->pBase + offSet);
3033                             MHW_MI_CHK_STATUS(Mhw_AddCommandCmdOrBB(m_osInterface, cmdBuffer, batchBuffer, data, byteSize));
3034                         }
3035                     }
3036                 }
3037             }
3038 
3039             params = {};
3040             // Insert slice header
3041             params.bLastHeader              = true;
3042             params.bEmulationByteBitsInsert = true;
3043 
3044             // App does the slice header packing, set the skip count passed by the app
3045             PCODEC_ENCODER_SLCDATA slcData    = m_basicFeature->m_slcData;
3046             uint32_t               currSlcIdx = m_basicFeature->m_curNumSlices;
3047 
3048             params.uiSkipEmulationCheckCount = slcData[currSlcIdx].SkipEmulationByteCount;
3049             bitSize                          = slcData[currSlcIdx].BitSize;
3050             offSet                           = slcData[currSlcIdx].SliceOffset;
3051 
3052             if (m_hevcSeqParams->SliceSizeControl)
3053             {
3054                 params.bLastHeader                = false;
3055                 params.bEmulationByteBitsInsert   = false;
3056                 bitSize                           = m_hevcSliceParams->BitLengthSliceHeaderStartingPortion;
3057                 params.bResetBitstreamStartingPos = true;
3058                 params.dwPadding                  = (MOS_ALIGN_CEIL((bitSize + 7) >> 3, sizeof(uint32_t))) / sizeof(uint32_t);
3059                 params.dataBitsInLastDw           = bitSize % 32;
3060                 if (params.dataBitsInLastDw == 0)
3061                 {
3062                     params.dataBitsInLastDw = 32;
3063                 }
3064 
3065                 m_hcpItf->MHW_ADDCMD_F(HCP_PAK_INSERT_OBJECT)(cmdBuffer);
3066                 uint32_t byteSize = (bitSize + 7) >> 3;
3067                 if (byteSize)
3068                 {
3069                     MHW_MI_CHK_NULL(pBsBuffer);
3070                     MHW_MI_CHK_NULL(pBsBuffer->pBase);
3071                     uint8_t *data = (uint8_t *)(pBsBuffer->pBase + offSet);
3072                     MHW_MI_CHK_STATUS(Mhw_AddCommandCmdOrBB(m_osInterface, cmdBuffer, batchBuffer, data, byteSize));
3073                 }
3074 
3075                 // Send HCP_PAK_INSERT_OBJ command. For dynamic slice, we are skipping the beginning part of slice header.
3076                 params.bLastHeader = true;
3077                 bitSize            = bitSize - m_hevcSliceParams->BitLengthSliceHeaderStartingPortion;
3078                 offSet += ((m_hevcSliceParams->BitLengthSliceHeaderStartingPortion + 7) / 8);  // Skips the first 5 bytes which is Start Code + Nal Unit Header
3079                 params.dwPadding        = (MOS_ALIGN_CEIL((bitSize + 7) >> 3, sizeof(uint32_t))) / sizeof(uint32_t);
3080                 params.dataBitsInLastDw = bitSize % 32;
3081                 if (params.dataBitsInLastDw == 0)
3082                 {
3083                     params.dataBitsInLastDw = 32;
3084                 }
3085                 params.bResetBitstreamStartingPos = true;
3086                 m_hcpItf->MHW_ADDCMD_F(HCP_PAK_INSERT_OBJECT)(cmdBuffer);
3087                 byteSize = (bitSize + 7) >> 3;
3088                 if (byteSize)
3089                 {
3090                     MHW_MI_CHK_NULL(pBsBuffer);
3091                     MHW_MI_CHK_NULL(pBsBuffer->pBase);
3092                     uint8_t *data = (uint8_t *)(pBsBuffer->pBase + offSet);
3093                     MHW_MI_CHK_STATUS(Mhw_AddCommandCmdOrBB(m_osInterface, cmdBuffer, batchBuffer, data, byteSize));
3094                 }
3095             }
3096             else
3097             {
3098                 params.dwPadding        = (MOS_ALIGN_CEIL((bitSize + 7) >> 3, sizeof(uint32_t))) / sizeof(uint32_t);
3099                 params.dataBitsInLastDw = bitSize % 32;
3100                 if (params.dataBitsInLastDw == 0)
3101                 {
3102                     params.dataBitsInLastDw = 32;
3103                 }
3104                 m_hcpItf->MHW_ADDCMD_F(HCP_PAK_INSERT_OBJECT)(cmdBuffer);
3105                 uint32_t byteSize = (bitSize + 7) >> 3;
3106                 if (byteSize)
3107                 {
3108                     MHW_MI_CHK_NULL(pBsBuffer);
3109                     MHW_MI_CHK_NULL(pBsBuffer->pBase);
3110                     uint8_t *data = (uint8_t *)(pBsBuffer->pBase + offSet);
3111                     MHW_MI_CHK_STATUS(Mhw_AddCommandCmdOrBB(m_osInterface, cmdBuffer, batchBuffer, data, byteSize));
3112                 }
3113             }
3114         }
3115 
3116         return MOS_STATUS_SUCCESS;
3117     }
3118 
MHW_SETPAR_DECL_SRC(HCP_PIPE_BUF_ADDR_STATE,HevcVdencPkt)3119     MHW_SETPAR_DECL_SRC(HCP_PIPE_BUF_ADDR_STATE, HevcVdencPkt)
3120     {
3121         ENCODE_FUNC_CALL();
3122 
3123         params.Mode                 = m_basicFeature->m_mode;
3124         params.psPreDeblockSurface  = &m_basicFeature->m_reconSurface;
3125         params.psPostDeblockSurface = &m_basicFeature->m_reconSurface;
3126         params.psRawSurface         = m_basicFeature->m_rawSurfaceToPak;
3127 
3128         params.presMetadataLineBuffer       = m_resMetadataLineBuffer;
3129         params.presMetadataTileLineBuffer   = m_resMetadataTileLineBuffer;
3130         params.presMetadataTileColumnBuffer = m_resMetadataTileColumnBuffer;
3131 
3132         params.presCurMvTempBuffer           = m_basicFeature->m_resMvTemporalBuffer;
3133         params.dwLcuStreamOutOffset          = 0;
3134         params.presLcuILDBStreamOutBuffer    = m_resLCUIldbStreamOutBuffer;
3135         params.dwFrameStatStreamOutOffset    = 0;
3136         params.presSseSrcPixelRowStoreBuffer = m_resSSESrcPixelRowStoreBuffer;
3137         params.presPakCuLevelStreamoutBuffer = m_resPakcuLevelStreamOutData;
3138         //    Mos_ResourceIsNull(&m_resPakcuLevelStreamoutData.sResource) ? nullptr : &m_resPakcuLevelStreamoutData.sResource;
3139 
3140         params.bRawIs10Bit = m_basicFeature->m_is10Bit;
3141 
3142 #ifdef _MMC_SUPPORTED
3143         ENCODE_CHK_NULL_RETURN(m_mmcState);
3144         if (m_mmcState->IsMmcEnabled())
3145         {
3146             ENCODE_CHK_STATUS_RETURN(m_mmcState->GetSurfaceMmcState(&m_basicFeature->m_reconSurface, &params.PreDeblockSurfMmcState));
3147             ENCODE_CHK_STATUS_RETURN(m_mmcState->GetSurfaceMmcState(&m_basicFeature->m_rawSurface, &params.RawSurfMmcState));
3148         }
3149         else
3150         {
3151             params.PreDeblockSurfMmcState = MOS_MEMCOMP_DISABLED;
3152             params.RawSurfMmcState        = MOS_MEMCOMP_DISABLED;
3153         }
3154 
3155         CODECHAL_DEBUG_TOOL(
3156             m_basicFeature->m_reconSurface.MmcState = params.PreDeblockSurfMmcState;)
3157 #endif
3158 
3159         m_basicFeature->m_ref.MHW_SETPAR_F(HCP_PIPE_BUF_ADDR_STATE)(params);
3160 
3161         return MOS_STATUS_SUCCESS;
3162     }
3163 
MHW_SETPAR_DECL_SRC(HCP_IND_OBJ_BASE_ADDR_STATE,HevcVdencPkt)3164     MHW_SETPAR_DECL_SRC(HCP_IND_OBJ_BASE_ADDR_STATE, HevcVdencPkt)
3165     {
3166         ENCODE_FUNC_CALL();
3167 
3168         params.presMvObjectBuffer      = m_basicFeature->m_resMbCodeBuffer;
3169         params.dwMvObjectOffset        = m_mvOffset;
3170         params.dwMvObjectSize          = m_basicFeature->m_mbCodeSize - m_mvOffset;
3171         params.presPakBaseObjectBuffer = &m_basicFeature->m_resBitstreamBuffer;
3172         params.dwPakBaseObjectSize     = m_basicFeature->m_bitstreamSize;
3173 
3174         return MOS_STATUS_SUCCESS;
3175     }
3176 
MHW_SETPAR_DECL_SRC(HCP_SLICE_STATE,HevcVdencPkt)3177     MHW_SETPAR_DECL_SRC(HCP_SLICE_STATE, HevcVdencPkt)
3178     {
3179         ENCODE_FUNC_CALL();
3180 
3181         params.intrareffetchdisable = m_pakOnlyPass;
3182 
3183         return MOS_STATUS_SUCCESS;
3184     }
3185 
AddAllCmds_HCP_SURFACE_STATE(PMOS_COMMAND_BUFFER cmdBuffer) const3186     MOS_STATUS HevcVdencPkt::AddAllCmds_HCP_SURFACE_STATE(PMOS_COMMAND_BUFFER cmdBuffer) const
3187     {
3188         ENCODE_FUNC_CALL();
3189         ENCODE_CHK_NULL_RETURN(cmdBuffer);
3190 
3191         m_curHcpSurfStateId = CODECHAL_HCP_SRC_SURFACE_ID;
3192         SETPAR_AND_ADDCMD(HCP_SURFACE_STATE, m_hcpItf, cmdBuffer);
3193 
3194         m_curHcpSurfStateId = CODECHAL_HCP_DECODED_SURFACE_ID;
3195         SETPAR_AND_ADDCMD(HCP_SURFACE_STATE, m_hcpItf, cmdBuffer);
3196 
3197         m_curHcpSurfStateId = CODECHAL_HCP_REF_SURFACE_ID;
3198         SETPAR_AND_ADDCMD(HCP_SURFACE_STATE, m_hcpItf, cmdBuffer);
3199 
3200         return MOS_STATUS_SUCCESS;
3201     }
3202 
AddAllCmds_HCP_REF_IDX_STATE(PMOS_COMMAND_BUFFER cmdBuffer) const3203     MOS_STATUS HevcVdencPkt::AddAllCmds_HCP_REF_IDX_STATE(PMOS_COMMAND_BUFFER cmdBuffer) const
3204     {
3205         MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
3206         ENCODE_FUNC_CALL();
3207         ENCODE_CHK_NULL_RETURN(cmdBuffer);
3208 
3209         auto &params = m_hcpItf->MHW_GETPAR_F(HCP_REF_IDX_STATE)();
3210         params       = {};
3211 
3212         uint32_t                          currSlcIdx    = m_basicFeature->m_curNumSlices;
3213         PCODEC_HEVC_ENCODE_PICTURE_PARAMS hevcPicParams = (CODEC_HEVC_ENCODE_PICTURE_PARAMS *)m_hevcPicParams;
3214         PCODEC_HEVC_ENCODE_SLICE_PARAMS   hevcSlcParams = (CODEC_HEVC_ENCODE_SLICE_PARAMS *)&m_hevcSliceParams[currSlcIdx];
3215 
3216         CODEC_PICTURE currPic                                     = {};
3217         CODEC_PICTURE refPicList[2][CODEC_MAX_NUM_REF_FRAME_HEVC] = {};
3218         void **       hevcRefList                                 = nullptr;
3219         int32_t       pocCurrPic                                  = 0;
3220         int8_t *      pRefIdxMapping                              = nullptr;
3221         int32_t       pocList[CODEC_MAX_NUM_REF_FRAME_HEVC]       = {};
3222 
3223         if (hevcSlcParams->slice_type != encodeHevcISlice)
3224         {
3225             currPic                                    = hevcPicParams->CurrReconstructedPic;
3226             params.ucList                              = LIST_0;
3227             params.numRefIdxLRefpiclistnumActiveMinus1 = hevcSlcParams->num_ref_idx_l0_active_minus1;
3228             eStatus                                    = MOS_SecureMemcpy(&refPicList, sizeof(refPicList), &hevcSlcParams->RefPicList, sizeof(hevcSlcParams->RefPicList));
3229             if (eStatus != MOS_STATUS_SUCCESS)
3230             {
3231                 ENCODE_ASSERTMESSAGE("Failed to copy memory.");
3232                 return eStatus;
3233             }
3234 
3235             hevcRefList = (void **)m_basicFeature->m_ref.GetRefList();
3236             pocCurrPic  = hevcPicParams->CurrPicOrderCnt;
3237             for (auto i = 0; i < CODEC_MAX_NUM_REF_FRAME_HEVC; i++)
3238             {
3239                 pocList[i] = hevcPicParams->RefFramePOCList[i];
3240             }
3241 
3242             pRefIdxMapping = m_basicFeature->m_ref.GetRefIdxMapping();
3243 
3244             MHW_ASSERT(currPic.FrameIdx != 0x7F);
3245 
3246             for (uint8_t i = 0; i <= params.numRefIdxLRefpiclistnumActiveMinus1; i++)
3247             {
3248                 uint8_t refFrameIDx = refPicList[params.ucList][i].FrameIdx;
3249                 if (refFrameIDx < CODEC_MAX_NUM_REF_FRAME_HEVC)
3250                 {
3251                     MHW_ASSERT(*(pRefIdxMapping + refFrameIDx) >= 0);
3252 
3253                     params.listEntryLxReferencePictureFrameIdRefaddr07[i] = *(pRefIdxMapping + refFrameIDx);
3254                     int32_t pocDiff                                       = pocCurrPic - pocList[refFrameIDx];
3255                     params.referencePictureTbValue[i]                     = (uint8_t)CodecHal_Clip3(-128, 127, pocDiff);
3256                     CODEC_REF_LIST **refList                              = (CODEC_REF_LIST **)hevcRefList;
3257                     params.longtermreference[i]                           = CodecHal_PictureIsLongTermRef(refList[currPic.FrameIdx]->RefList[refFrameIDx]);
3258                     params.bottomFieldFlag[i]                             = 1;
3259                 }
3260                 else
3261                 {
3262                     params.listEntryLxReferencePictureFrameIdRefaddr07[i] = 0;
3263                     params.referencePictureTbValue[i]                     = 0;
3264                     params.longtermreference[i]                           = false;
3265                     params.bottomFieldFlag[i]                             = 0;
3266                 }
3267             }
3268 
3269             for (uint8_t i = (uint8_t)(params.numRefIdxLRefpiclistnumActiveMinus1 + 1); i < 16; i++)
3270             {
3271                 params.listEntryLxReferencePictureFrameIdRefaddr07[i] = 0;
3272                 params.referencePictureTbValue[i]                     = 0;
3273                 params.longtermreference[i]                           = false;
3274                 params.bottomFieldFlag[i]                             = 0;
3275             }
3276 
3277             ENCODE_CHK_NULL_RETURN(m_featureManager);
3278             auto sccFeature = dynamic_cast<HevcVdencScc *>(m_featureManager->GetFeature(HevcFeatureIDs::hevcVdencSccFeature));
3279             ENCODE_CHK_NULL_RETURN(sccFeature);
3280 
3281             MHW_CHK_STATUS_RETURN(sccFeature->MHW_SETPAR_F(HCP_REF_IDX_STATE)(params));
3282 
3283             m_hcpItf->MHW_ADDCMD_F(HCP_REF_IDX_STATE)(cmdBuffer);
3284 
3285             params = {};
3286 
3287             if (hevcSlcParams->slice_type == encodeHevcBSlice)
3288             {
3289                 params.ucList                              = LIST_1;
3290                 params.numRefIdxLRefpiclistnumActiveMinus1 = hevcSlcParams->num_ref_idx_l1_active_minus1;
3291                 for (uint8_t i = 0; i <= params.numRefIdxLRefpiclistnumActiveMinus1; i++)
3292                 {
3293                     uint8_t refFrameIDx = refPicList[params.ucList][i].FrameIdx;
3294                     if (refFrameIDx < CODEC_MAX_NUM_REF_FRAME_HEVC)
3295                     {
3296                         MHW_ASSERT(*(pRefIdxMapping + refFrameIDx) >= 0);
3297 
3298                         params.listEntryLxReferencePictureFrameIdRefaddr07[i] = *(pRefIdxMapping + refFrameIDx);
3299                         int32_t pocDiff                                       = pocCurrPic - pocList[refFrameIDx];
3300                         params.referencePictureTbValue[i]                     = (uint8_t)CodecHal_Clip3(-128, 127, pocDiff);
3301                         CODEC_REF_LIST **refList                              = (CODEC_REF_LIST **)hevcRefList;
3302                         params.longtermreference[i]                           = CodecHal_PictureIsLongTermRef(refList[currPic.FrameIdx]->RefList[refFrameIDx]);
3303                         params.bottomFieldFlag[i]                             = 1;
3304                     }
3305                     else
3306                     {
3307                         params.listEntryLxReferencePictureFrameIdRefaddr07[i] = 0;
3308                         params.referencePictureTbValue[i]                     = 0;
3309                         params.longtermreference[i]                           = false;
3310                         params.bottomFieldFlag[i]                             = 0;
3311                     }
3312                 }
3313 
3314                 for (uint8_t i = (uint8_t)(params.numRefIdxLRefpiclistnumActiveMinus1 + 1); i < 16; i++)
3315                 {
3316                     params.listEntryLxReferencePictureFrameIdRefaddr07[i] = 0;
3317                     params.referencePictureTbValue[i]                     = 0;
3318                     params.longtermreference[i]                           = false;
3319                     params.bottomFieldFlag[i]                             = 0;
3320                 }
3321                 m_hcpItf->MHW_ADDCMD_F(HCP_REF_IDX_STATE)(cmdBuffer);
3322             }
3323         }
3324 
3325         return MOS_STATUS_SUCCESS;
3326     }
3327 
AddAllCmds_HCP_FQM_STATE(PMOS_COMMAND_BUFFER cmdBuffer) const3328     MOS_STATUS HevcVdencPkt::AddAllCmds_HCP_FQM_STATE(PMOS_COMMAND_BUFFER cmdBuffer) const
3329     {
3330         ENCODE_FUNC_CALL();
3331         ENCODE_CHK_NULL_RETURN(cmdBuffer);
3332 
3333         MHW_MI_CHK_NULL(m_hevcIqMatrixParams);
3334 
3335         auto &params = m_hcpItf->MHW_GETPAR_F(HCP_FQM_STATE)();
3336         params       = {};
3337 
3338         auto      iqMatrix = (PMHW_VDBOX_HEVC_QM_PARAMS)m_hevcIqMatrixParams;
3339         uint16_t *fqMatrix = (uint16_t *)params.quantizermatrix;
3340 
3341         /* 4x4 */
3342         for (uint8_t i = 0; i < 32; i++)
3343         {
3344             params.quantizermatrix[i] = 0;
3345         }
3346         for (uint8_t intraInter = 0; intraInter <= 1; intraInter++)
3347         {
3348             params.intraInter     = intraInter;
3349             params.sizeid         = 0;
3350             params.colorComponent = 0;
3351 
3352             for (uint8_t i = 0; i < 16; i++)
3353             {
3354                 fqMatrix[i] =
3355                     GetReciprocalScalingValue(iqMatrix->List4x4[3 * intraInter][i]);
3356             }
3357 
3358             m_hcpItf->MHW_ADDCMD_F(HCP_FQM_STATE)(cmdBuffer);
3359         }
3360 
3361         /* 8x8, 16x16 and 32x32 */
3362         for (uint8_t i = 0; i < 32; i++)
3363         {
3364             params.quantizermatrix[i] = 0;
3365         }
3366         for (uint8_t intraInter = 0; intraInter <= 1; intraInter++)
3367         {
3368             params.intraInter     = intraInter;
3369             params.sizeid         = 1;
3370             params.colorComponent = 0;
3371 
3372             for (uint8_t i = 0; i < 64; i++)
3373             {
3374                 fqMatrix[i] =
3375                     GetReciprocalScalingValue(iqMatrix->List8x8[3 * intraInter][i]);
3376             }
3377 
3378             m_hcpItf->MHW_ADDCMD_F(HCP_FQM_STATE)(cmdBuffer);
3379         }
3380 
3381         /* 16x16 DC */
3382         for (uint8_t i = 0; i < 32; i++)
3383         {
3384             params.quantizermatrix[i] = 0;
3385         }
3386         for (uint8_t intraInter = 0; intraInter <= 1; intraInter++)
3387         {
3388             params.intraInter     = intraInter;
3389             params.sizeid         = 2;
3390             params.colorComponent = 0;
3391             params.fqmDcValue1Dc  = GetReciprocalScalingValue(iqMatrix->ListDC16x16[3 * intraInter]);
3392 
3393             for (uint8_t i = 0; i < 64; i++)
3394             {
3395                 fqMatrix[i] =
3396                     GetReciprocalScalingValue(iqMatrix->List16x16[3 * intraInter][i]);
3397             }
3398 
3399             m_hcpItf->MHW_ADDCMD_F(HCP_FQM_STATE)(cmdBuffer);
3400         }
3401 
3402         /* 32x32 DC */
3403         for (uint8_t i = 0; i < 32; i++)
3404         {
3405             params.quantizermatrix[i] = 0;
3406         }
3407         for (uint8_t intraInter = 0; intraInter <= 1; intraInter++)
3408         {
3409             params.intraInter     = intraInter;
3410             params.sizeid         = 3;
3411             params.colorComponent = 0;
3412             params.fqmDcValue1Dc  = GetReciprocalScalingValue(iqMatrix->ListDC32x32[intraInter]);
3413 
3414             for (uint8_t i = 0; i < 64; i++)
3415             {
3416                 fqMatrix[i] =
3417                     GetReciprocalScalingValue(iqMatrix->List32x32[intraInter][i]);
3418             }
3419 
3420             m_hcpItf->MHW_ADDCMD_F(HCP_FQM_STATE)(cmdBuffer);
3421         }
3422 
3423         return MOS_STATUS_SUCCESS;
3424     }
3425 
AddAllCmds_HCP_QM_STATE(PMOS_COMMAND_BUFFER cmdBuffer) const3426     MOS_STATUS HevcVdencPkt::AddAllCmds_HCP_QM_STATE(PMOS_COMMAND_BUFFER cmdBuffer) const
3427     {
3428         ENCODE_FUNC_CALL();
3429         ENCODE_CHK_NULL_RETURN(cmdBuffer);
3430 
3431         MHW_MI_CHK_NULL(m_hevcIqMatrixParams);
3432 
3433         auto &params = m_hcpItf->MHW_GETPAR_F(HCP_QM_STATE)();
3434         params       = {};
3435 
3436         auto     iqMatrix = (PMHW_VDBOX_HEVC_QM_PARAMS)m_hevcIqMatrixParams;
3437         uint8_t *qMatrix  = (uint8_t *)params.quantizermatrix;
3438 
3439         for (uint8_t sizeId = 0; sizeId < 4; sizeId++)  // 4x4, 8x8, 16x16, 32x32
3440         {
3441             for (uint8_t predType = 0; predType < 2; predType++)  // Intra, Inter
3442             {
3443                 for (uint8_t color = 0; color < 3; color++)  // Y, Cb, Cr
3444                 {
3445                     if ((sizeId == 3) && (color != 0))
3446                         break;
3447 
3448                     params.sizeid         = sizeId;
3449                     params.predictionType = predType;
3450                     params.colorComponent = color;
3451                     switch (sizeId)
3452                     {
3453                     case 0:
3454                     case 1:
3455                     default:
3456                         params.dcCoefficient = 0;
3457                         break;
3458                     case 2:
3459                         params.dcCoefficient = iqMatrix->ListDC16x16[3 * predType + color];
3460                         break;
3461                     case 3:
3462                         params.dcCoefficient = iqMatrix->ListDC32x32[predType];
3463                         break;
3464                     }
3465 
3466                     if (sizeId == 0)
3467                     {
3468                         for (uint8_t i = 0; i < 4; i++)
3469                         {
3470                             for (uint8_t ii = 0; ii < 4; ii++)
3471                             {
3472                                 qMatrix[4 * i + ii] = iqMatrix->List4x4[3 * predType + color][4 * i + ii];
3473                             }
3474                         }
3475                     }
3476                     else if (sizeId == 1)
3477                     {
3478                         for (uint8_t i = 0; i < 8; i++)
3479                         {
3480                             for (uint8_t ii = 0; ii < 8; ii++)
3481                             {
3482                                 qMatrix[8 * i + ii] = iqMatrix->List8x8[3 * predType + color][8 * i + ii];
3483                             }
3484                         }
3485                     }
3486                     else if (sizeId == 2)
3487                     {
3488                         for (uint8_t i = 0; i < 8; i++)
3489                         {
3490                             for (uint8_t ii = 0; ii < 8; ii++)
3491                             {
3492                                 qMatrix[8 * i + ii] = iqMatrix->List16x16[3 * predType + color][8 * i + ii];
3493                             }
3494                         }
3495                     }
3496                     else  // 32x32
3497                     {
3498                         for (uint8_t i = 0; i < 8; i++)
3499                         {
3500                             for (uint8_t ii = 0; ii < 8; ii++)
3501                             {
3502                                 qMatrix[8 * i + ii] = iqMatrix->List32x32[predType][8 * i + ii];
3503                             }
3504                         }
3505                     }
3506 
3507                     m_hcpItf->MHW_ADDCMD_F(HCP_QM_STATE)(cmdBuffer);
3508                 }
3509             }
3510         }
3511 
3512         return MOS_STATUS_SUCCESS;
3513     }
3514 
AddAllCmds_HCP_WEIGHTOFFSET_STATE(PMOS_COMMAND_BUFFER cmdBuffer) const3515     MOS_STATUS HevcVdencPkt::AddAllCmds_HCP_WEIGHTOFFSET_STATE(PMOS_COMMAND_BUFFER cmdBuffer) const
3516     {
3517         ENCODE_FUNC_CALL();
3518 
3519         auto wpFeature = dynamic_cast<HevcVdencWeightedPred *>(m_featureManager->GetFeature(HevcFeatureIDs::hevcVdencWpFeature));
3520         ENCODE_CHK_NULL_RETURN(wpFeature);
3521         if (wpFeature->IsEnabled())
3522         {
3523             auto &params                                           = m_hcpItf->MHW_GETPAR_F(HCP_WEIGHTOFFSET_STATE)();
3524             params                                                 = {};
3525             CODEC_HEVC_ENCODE_SLICE_PARAMS *pEncodeHevcSliceParams = (CODEC_HEVC_ENCODE_SLICE_PARAMS *)&m_hevcSliceParams[m_basicFeature->m_curNumSlices];
3526             if (pEncodeHevcSliceParams->slice_type == encodeHevcPSlice ||
3527                 pEncodeHevcSliceParams->slice_type == encodeHevcBSlice)
3528             {
3529                 params.ucList = LIST_0;
3530                 MHW_CHK_STATUS_RETURN(wpFeature->MHW_SETPAR_F(HCP_WEIGHTOFFSET_STATE)(params));
3531                 m_hcpItf->MHW_ADDCMD_F(HCP_WEIGHTOFFSET_STATE)(cmdBuffer);
3532             }
3533 
3534             if (pEncodeHevcSliceParams->slice_type == encodeHevcBSlice)
3535             {
3536                 params.ucList = LIST_1;
3537                 MHW_CHK_STATUS_RETURN(wpFeature->MHW_SETPAR_F(HCP_WEIGHTOFFSET_STATE)(params));
3538                 m_hcpItf->MHW_ADDCMD_F(HCP_WEIGHTOFFSET_STATE)(cmdBuffer);
3539             }
3540         }
3541         return MOS_STATUS_SUCCESS;
3542     }
3543     }
3544 
3545