1 /*
2 * Copyright (c) 2018, Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22 //!
23 //! \file     encode_pak_integrate_packet.cpp
24 //! \brief    Defines the interface for pak integrate packet
25 //!
26 #include "mos_defs.h"
27 #include "encode_pak_integrate_packet.h"
28 #include "mhw_vdbox.h"
29 #include "encode_hevc_brc.h"
30 #include "encode_status_report_defs.h"
31 #include "mos_os_cp_interface_specific.h"
32 
33 namespace encode {
Init()34     MOS_STATUS HevcPakIntegratePkt::Init()
35     {
36         ENCODE_FUNC_CALL();
37 
38         m_basicFeature = dynamic_cast<HevcBasicFeature *>(m_featureManager->GetFeature(HevcFeatureIDs::basicFeature));
39         ENCODE_CHK_NULL_RETURN(m_basicFeature);
40 
41         ENCODE_CHK_STATUS_RETURN(EncodeHucPkt::Init());
42 
43         ENCODE_CHK_NULL_RETURN(m_hwInterface);
44         m_osInterface  = m_hwInterface->GetOsInterface();
45         ENCODE_CHK_NULL_RETURN(m_osInterface);
46 
47         m_miItf = m_hwInterface->GetMiInterfaceNext();
48         ENCODE_CHK_NULL_RETURN(m_miItf);
49 
50         ENCODE_CHK_NULL_RETURN(m_pipeline);
51 #ifdef _MMC_SUPPORTED
52         m_mmcState = m_pipeline->GetMmcState();
53         ENCODE_CHK_NULL_RETURN(m_mmcState);
54 #endif
55 
56         return MOS_STATUS_SUCCESS;
57     }
58 
AllocateResources()59     MOS_STATUS HevcPakIntegratePkt::AllocateResources()
60     {
61         ENCODE_CHK_STATUS_RETURN(EncodeHucPkt::AllocateResources());
62 
63         // Only needed when tile & BRC is enabled, but the size is not changing at frame level
64         if (m_resHucPakStitchDmemBuffer[0][0] == nullptr)
65         {
66             uint8_t *data;
67             MOS_ALLOC_GFXRES_PARAMS allocParamsForBufferLinear;
68 
69             // Pak stitch DMEM
70             MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS));
71             allocParamsForBufferLinear.Type     = MOS_GFXRES_BUFFER;
72             allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR;
73             allocParamsForBufferLinear.Format   = Format_Buffer;
74             allocParamsForBufferLinear.dwBytes  = MOS_ALIGN_CEIL(sizeof(HucPakIntegrateDmem), CODECHAL_CACHELINE_SIZE);
75             allocParamsForBufferLinear.pBufName = "PAK Stitch Dmem Buffer";
76             allocParamsForBufferLinear.ResUsageType = MOS_HW_RESOURCE_USAGE_ENCODE_INTERNAL_READ_WRITE_NOCACHE;
77             auto numOfPasses                    = CODECHAL_VDENC_BRC_NUM_OF_PASSES;
78 
79             for (auto k = 0; k < CODECHAL_ENCODE_RECYCLED_BUFFER_NUM; k++)
80             {
81                 for (auto i = 0; i < numOfPasses; i++)
82                 {
83                     m_resHucPakStitchDmemBuffer[k][i] = m_allocator->AllocateResource(allocParamsForBufferLinear, true);
84                 }
85             }
86 
87             if (m_basicFeature->m_enableTileStitchByHW)
88             {
89                 // HuC stitching data buffer
90                 allocParamsForBufferLinear.dwBytes  = MOS_ALIGN_CEIL(sizeof(HucCommandData), CODECHAL_PAGE_SIZE);
91                 allocParamsForBufferLinear.pBufName = "HEVC HuC Stitch Data Buffer";
92                 allocParamsForBufferLinear.ResUsageType = MOS_HW_RESOURCE_USAGE_ENCODE_INTERNAL_READ_WRITE_CACHE;
93                 MOS_RESOURCE *allocatedBuffer       = nullptr;
94                 for (auto i = 0; i < CODECHAL_ENCODE_RECYCLED_BUFFER_NUM; ++i)
95                 {
96                     for (auto j = 0; j < CODECHAL_VDENC_BRC_NUM_OF_PASSES; ++j)
97                     {
98                         allocatedBuffer = m_allocator->AllocateResource(allocParamsForBufferLinear, true);
99                         ENCODE_CHK_NULL_RETURN(allocatedBuffer);
100                         m_resHucStitchDataBuffer[i][j] = *allocatedBuffer;
101                     }
102                 }
103 
104                 // Second level batch buffer for HuC stitching CMD
105                 MOS_ZeroMemory(&m_HucStitchCmdBatchBuffer, sizeof(m_HucStitchCmdBatchBuffer));
106                 m_HucStitchCmdBatchBuffer.bSecondLevel = true;
107                 ENCODE_CHK_STATUS_RETURN(Mhw_AllocateBb(
108                     m_osInterface,
109                     &m_HucStitchCmdBatchBuffer,
110                     nullptr,
111                     m_hwInterface->m_HucStitchCmdBatchBufferSize));
112             }
113         }
114 
115         return MOS_STATUS_SUCCESS;
116     }
117 
FreeResources()118     MOS_STATUS HevcPakIntegratePkt::FreeResources()
119     {
120         MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
121 
122         ENCODE_FUNC_CALL();
123 
124         eStatus = Mhw_FreeBb(m_osInterface, &m_HucStitchCmdBatchBuffer, nullptr);
125         ENCODE_ASSERT(eStatus == MOS_STATUS_SUCCESS);
126 
127         return eStatus;
128     }
129 
UpdateParameters()130     void HevcPakIntegratePkt::UpdateParameters()
131     {
132         ENCODE_FUNC_CALL();
133 
134         if (!m_pipeline->IsSingleTaskPhaseSupported())
135         {
136             m_osInterface->pfnResetPerfBufferID(m_osInterface);
137         }
138 
139         m_basicFeature->m_currPakSliceIdx = (m_basicFeature->m_currPakSliceIdx + 1) % m_basicFeature->m_codecHalHevcNumPakSliceBatchBuffers;
140     }
141 
Submit(MOS_COMMAND_BUFFER * commandBuffer,uint8_t packetPhase)142     MOS_STATUS HevcPakIntegratePkt::Submit(MOS_COMMAND_BUFFER *commandBuffer, uint8_t packetPhase)
143     {
144         ENCODE_FUNC_CALL();
145 
146         bool firstTaskInPhase = packetPhase & firstPacket;
147         bool requestProlog = !m_pipeline->IsSingleTaskPhaseSupported() || firstTaskInPhase;
148 
149         uint16_t perfTag = CODECHAL_ENCODE_PERFTAG_CALL_PAK_KERNEL;
150         SetPerfTag(perfTag, (uint16_t)m_basicFeature->m_mode, m_basicFeature->m_pictureCodingType);
151 
152         auto brcFeature = dynamic_cast<HEVCEncodeBRC *>(m_featureManager->GetFeature(HevcFeatureIDs::hevcBrcFeature));
153         ENCODE_CHK_NULL_RETURN(brcFeature);
154 
155         ENCODE_CHK_STATUS_RETURN(AddCondBBEndForLastPass(*commandBuffer));
156 
157         m_vdencHucUsed = brcFeature->IsVdencHucUsed();
158 
159         bool isTileReplayEnabled = false;
160         RUN_FEATURE_INTERFACE_RETURN(HevcEncodeTile, FeatureIDs::encodeTile, IsTileReplayEnabled, isTileReplayEnabled);
161 
162         MediaPerfProfiler *perfProfiler = MediaPerfProfiler::Instance();
163         ENCODE_CHK_NULL_RETURN(perfProfiler);
164         ENCODE_CHK_STATUS_RETURN(perfProfiler->AddPerfCollectStartCmd(
165             (void *)m_pipeline, m_osInterface, m_miItf, commandBuffer));
166 
167         if (m_vdencHucUsed || (m_basicFeature->m_enableTileStitchByHW && (isTileReplayEnabled || m_pipeline->GetPipeNum() > 1)))
168         {
169             // Huc basic
170             ENCODE_CHK_STATUS_RETURN(Execute(commandBuffer, true, requestProlog));
171 
172             // Add huc status update to status buffer
173             PMOS_RESOURCE osResource = nullptr;
174             uint32_t offset = 0;
175             ENCODE_CHK_STATUS_RETURN(m_statusReport->GetAddress(statusReportHucStatusRegMask, osResource, offset));
176             ENCODE_CHK_NULL_RETURN(osResource);
177 
178             // Write HUC_STATUS mask
179             auto &storeDataParams            = m_miItf->MHW_GETPAR_F(MI_STORE_DATA_IMM)();
180             storeDataParams                  = {};
181             storeDataParams.pOsResource      = osResource;
182             storeDataParams.dwResourceOffset = offset;
183             storeDataParams.dwValue          = m_hwInterface->GetHucInterfaceNext()->GetHucStatusReEncodeMask();
184             ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_STORE_DATA_IMM)(commandBuffer));
185 
186             // store HUC_STATUS register
187             osResource = nullptr;
188             offset     = 0;
189             ENCODE_CHK_STATUS_RETURN(m_statusReport->GetAddress(statusReportHucStatusReg, osResource, offset));
190             ENCODE_CHK_NULL_RETURN(osResource);
191             auto mmioRegisters             = m_hucItf->GetMmioRegisters(m_vdboxIndex);
192             auto &storeRegParams           = m_miItf->MHW_GETPAR_F(MI_STORE_REGISTER_MEM)();
193             storeDataParams                = {};
194             storeRegParams.presStoreBuffer = osResource;
195             storeRegParams.dwOffset        = offset;
196             storeRegParams.dwRegister      = mmioRegisters->hucStatusRegOffset;
197             ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_STORE_REGISTER_MEM)(commandBuffer));
198         }
199 
200         // Use HW stitch commands only in the scalable mode
201         // For single pipe with tile replay, stitch also needed
202         if (m_basicFeature->m_enableTileStitchByHW && (isTileReplayEnabled || m_pipeline->GetPipeNum() > 1))
203         {
204             ENCODE_CHK_STATUS_RETURN(PerformHwStitch(commandBuffer));
205         }
206 
207         ENCODE_CHK_STATUS_RETURN(ReadSseStatistics(*commandBuffer));
208 
209         ENCODE_CHK_STATUS_RETURN(ReadSliceSize(*commandBuffer));
210 
211         ENCODE_CHK_STATUS_RETURN(EndStatusReport(statusReportMfx, commandBuffer));
212         if (false == m_pipeline->IsFrameTrackingEnabled())
213         {
214             ENCODE_CHK_STATUS_RETURN(UpdateStatusReportNext(statusReportGlobalCount, commandBuffer));
215         }
216         CODECHAL_DEBUG_TOOL(
217             if (m_mmcState) {
218                 m_mmcState->UpdateUserFeatureKey(&(m_basicFeature->m_reconSurface));
219             })
220         // Reset parameters for next PAK execution
221         if (false == m_pipeline->IsFrameTrackingEnabled())
222         {
223             UpdateParameters();
224         }
225 
226         CODECHAL_DEBUG_TOOL
227         (
228             ENCODE_CHK_STATUS_RETURN(DumpInput());
229         )
230 
231         return MOS_STATUS_SUCCESS;
232     }
233 
EndStatusReport(uint32_t srType,MOS_COMMAND_BUFFER * cmdBuffer)234     MOS_STATUS HevcPakIntegratePkt::EndStatusReport(
235         uint32_t            srType,
236         MOS_COMMAND_BUFFER *cmdBuffer)
237     {
238         ENCODE_FUNC_CALL();
239         ENCODE_CHK_NULL_RETURN(cmdBuffer);
240         auto brcFeature = dynamic_cast<HEVCEncodeBRC *>(m_featureManager->GetFeature(HevcFeatureIDs::hevcBrcFeature));
241         ENCODE_CHK_NULL_RETURN(brcFeature);
242         if (m_pipeline->GetPipeNum() <= 1 && m_pipeline->IsSingleTaskPhaseSupported())
243         {
244             // single pipe mode can read the info from MMIO register. Otherwise,
245             // we have to use the tile size statistic buffer
246             ENCODE_CHK_STATUS_RETURN(ReadHcpStatus(m_vdboxIndex, m_statusReport, *cmdBuffer));
247             // BRC PAK statistics different for each pass
248             if (brcFeature->IsBRCEnabled())
249             {
250                 uint8_t ucPass = (uint8_t)m_pipeline->GetCurrentPass();
251                 EncodeReadBrcPakStatsParams readBrcPakStatsParams;
252                 MOS_RESOURCE *osResource = nullptr;
253                 uint32_t      offset = 0;
254                 m_statusReport->GetAddress(statusReportNumberPasses, osResource, offset);
255                 RUN_FEATURE_INTERFACE_RETURN(HEVCEncodeBRC, HevcFeatureIDs::hevcBrcFeature, SetReadBrcPakStatsParams, ucPass, offset, osResource, readBrcPakStatsParams);
256                 ReadBrcPakStatistics(cmdBuffer, &readBrcPakStatsParams);
257             }
258         }
259         ENCODE_CHK_STATUS_RETURN(MediaPacket::EndStatusReportNext(srType, cmdBuffer));
260 
261         MediaPerfProfiler *perfProfiler = MediaPerfProfiler::Instance();
262         ENCODE_CHK_NULL_RETURN(perfProfiler);
263         ENCODE_CHK_STATUS_RETURN(perfProfiler->AddPerfCollectEndCmd(
264             (void *)m_pipeline, m_osInterface, m_miItf, cmdBuffer));
265 
266         return MOS_STATUS_SUCCESS;
267     }
268 
ReadHcpStatus(MHW_VDBOX_NODE_IND vdboxIndex,MediaStatusReport * statusReport,MOS_COMMAND_BUFFER & cmdBuffer)269     MOS_STATUS HevcPakIntegratePkt::ReadHcpStatus(
270         MHW_VDBOX_NODE_IND  vdboxIndex,
271         MediaStatusReport * statusReport,
272         MOS_COMMAND_BUFFER &cmdBuffer)
273     {
274         ENCODE_FUNC_CALL();
275 
276         MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
277 
278         CODEC_HW_FUNCTION_ENTER;
279 
280         ENCODE_CHK_NULL_RETURN(statusReport);
281         ENCODE_CHK_NULL_RETURN(m_hwInterface);
282 
283         MOS_RESOURCE *osResource = nullptr;
284         uint32_t      offset = 0;
285 
286         EncodeStatusReadParams params;
287         MOS_ZeroMemory(&params, sizeof(params));
288 
289         ENCODE_CHK_STATUS_RETURN(statusReport->GetAddress(encode::statusReportMfxBitstreamByteCountPerFrame, osResource, offset));
290         params.resBitstreamByteCountPerFrame    = osResource;
291         params.bitstreamByteCountPerFrameOffset = offset;
292 
293         ENCODE_CHK_STATUS_RETURN(statusReport->GetAddress(encode::statusReportMfxBitstreamSyntaxElementOnlyBitCount, osResource, offset));
294         params.resBitstreamSyntaxElementOnlyBitCount    = osResource;
295         params.bitstreamSyntaxElementOnlyBitCountOffset = offset;
296 
297         ENCODE_CHK_STATUS_RETURN(statusReport->GetAddress(encode::statusReportQPStatusCount, osResource, offset));
298         params.resQpStatusCount    = osResource;
299         params.qpStatusCountOffset = offset;
300 
301         ENCODE_CHK_STATUS_RETURN(statusReport->GetAddress(encode::statusReportImageStatusMask, osResource, offset));
302         params.resImageStatusMask    = osResource;
303         params.imageStatusMaskOffset = offset;
304 
305         ENCODE_CHK_STATUS_RETURN(statusReport->GetAddress(encode::statusReportImageStatusCtrl, osResource, offset));
306         params.resImageStatusCtrl    = osResource;
307         params.imageStatusCtrlOffset = offset;
308 
309         ENCODE_CHK_STATUS_RETURN(m_hwInterface->ReadHcpStatus(vdboxIndex, params, &cmdBuffer));
310 
311         // Slice Size Conformance
312         if (m_basicFeature->m_hevcSeqParams->SliceSizeControl)
313         {
314             RUN_FEATURE_INTERFACE_RETURN(HevcEncodeDss, HevcFeatureIDs::hevcVdencDssFeature, ReadHcpStatus, vdboxIndex, cmdBuffer);
315         }
316         auto brcFeature = dynamic_cast<HEVCEncodeBRC *>(m_featureManager->GetFeature(HevcFeatureIDs::hevcBrcFeature));
317         ENCODE_CHK_NULL_RETURN(brcFeature);
318         bool vdencHucUsed  = brcFeature->IsVdencHucUsed();
319         if (vdencHucUsed)
320         {
321             // Store PAK frameSize MMIO to PakInfo buffer
322             auto &miStoreRegMemParams           = m_miItf->MHW_GETPAR_F(MI_STORE_REGISTER_MEM)();
323             miStoreRegMemParams                 = {};
324             miStoreRegMemParams.presStoreBuffer = m_basicFeature->m_recycleBuf->GetBuffer(PakInfo, 0);
325             miStoreRegMemParams.dwOffset        = 0;
326             auto mmioRegisters                  = m_hcpItf->GetMmioRegisters(m_vdboxIndex);
327             ENCODE_CHK_NULL_RETURN(mmioRegisters);
328             miStoreRegMemParams.dwRegister = mmioRegisters->hcpEncBitstreamBytecountFrameRegOffset;
329 
330             ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_STORE_REGISTER_MEM)(&cmdBuffer));
331         }
332         ENCODE_CHK_STATUS_RETURN(m_hwInterface->ReadImageStatusForHcp(vdboxIndex, params, &cmdBuffer));
333         return eStatus;
334     }
335 
ReadBrcPakStatistics(PMOS_COMMAND_BUFFER cmdBuffer,EncodeReadBrcPakStatsParams * params)336     MOS_STATUS HevcPakIntegratePkt::ReadBrcPakStatistics(
337         PMOS_COMMAND_BUFFER          cmdBuffer,
338         EncodeReadBrcPakStatsParams *params)
339     {
340         MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
341 
342         ENCODE_FUNC_CALL();
343 
344         ENCODE_CHK_NULL_RETURN(cmdBuffer);
345         ENCODE_CHK_NULL_RETURN(params);
346         ENCODE_CHK_NULL_RETURN(params->presBrcPakStatisticBuffer);
347         ENCODE_CHK_NULL_RETURN(params->presStatusBuffer);
348 
349         ENCODE_CHK_STATUS_RETURN(ValidateVdboxIdx(m_vdboxIndex));
350 
351         auto mmioRegisters = m_hcpItf->GetMmioRegisters(m_vdboxIndex);
352         ENCODE_CHK_NULL_RETURN(mmioRegisters);
353 
354         auto AddMiStoreRegisterMemCmd = [&](uint32_t offset, uint32_t hcpMmioRegister) {
355             auto &miStoreRegMemParams           = m_miItf->MHW_GETPAR_F(MI_STORE_REGISTER_MEM)();
356             miStoreRegMemParams                 = {};
357             miStoreRegMemParams.presStoreBuffer = params->presBrcPakStatisticBuffer;
358             miStoreRegMemParams.dwOffset        = offset;
359             miStoreRegMemParams.dwRegister      = hcpMmioRegister;
360             ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_STORE_REGISTER_MEM)(cmdBuffer));
361             return MOS_STATUS_SUCCESS;
362         };
363 
364         ENCODE_CHK_STATUS_RETURN(AddMiStoreRegisterMemCmd(CODECHAL_OFFSETOF(CODECHAL_ENCODE_HEVC_PAK_STATS_BUFFER, HCP_BITSTREAM_BYTECOUNT_FRAME), mmioRegisters->hcpEncBitstreamBytecountFrameRegOffset));
365         ENCODE_CHK_STATUS_RETURN(AddMiStoreRegisterMemCmd(CODECHAL_OFFSETOF(CODECHAL_ENCODE_HEVC_PAK_STATS_BUFFER, HCP_BITSTREAM_BYTECOUNT_FRAME_NOHEADER), mmioRegisters->hcpEncBitstreamBytecountFrameNoHeaderRegOffset));
366         ENCODE_CHK_STATUS_RETURN(AddMiStoreRegisterMemCmd(CODECHAL_OFFSETOF(CODECHAL_ENCODE_HEVC_PAK_STATS_BUFFER, HCP_IMAGE_STATUS_CONTROL), mmioRegisters->hcpEncImageStatusCtrlRegOffset));
367 
368         auto &storeDataParams            = m_miItf->MHW_GETPAR_F(MI_STORE_DATA_IMM)();
369         storeDataParams                  = {};
370         storeDataParams.pOsResource      = params->presStatusBuffer;
371         storeDataParams.dwResourceOffset = params->dwStatusBufNumPassesOffset;
372         storeDataParams.dwValue          = params->ucPass;
373         ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_STORE_DATA_IMM)(cmdBuffer));
374 
375         return eStatus;
376     }
377 
378         // Inline functions
ValidateVdboxIdx(const MHW_VDBOX_NODE_IND & vdboxIndex)379     MOS_STATUS HevcPakIntegratePkt::ValidateVdboxIdx(const MHW_VDBOX_NODE_IND &vdboxIndex)
380     {
381         ENCODE_FUNC_CALL();
382 
383         MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
384         if (vdboxIndex > m_hwInterface->GetMaxVdboxIndex())
385         {
386             //ENCODE_ASSERTMESSAGE("ERROR - vdbox index exceed the maximum");
387             eStatus = MOS_STATUS_INVALID_PARAMETER;
388         }
389 
390         return eStatus;
391     }
392 
CalculateCommandSize(uint32_t & commandBufferSize,uint32_t & requestedPatchListSize)393     MOS_STATUS HevcPakIntegratePkt::CalculateCommandSize(uint32_t &commandBufferSize, uint32_t &requestedPatchListSize)
394     {
395         ENCODE_FUNC_CALL();
396 
397         uint32_t hucCommandsSize = 0;
398         uint32_t hucPatchListSize = 0;
399         MHW_VDBOX_STATE_CMDSIZE_PARAMS stateCmdSizeParams;
400 
401         stateCmdSizeParams.uNumStoreDataImm = 2;
402         stateCmdSizeParams.uNumStoreReg     = 4;
403         stateCmdSizeParams.uNumMfxWait      = 11;
404         stateCmdSizeParams.uNumMiCopy       = 5;
405         stateCmdSizeParams.uNumMiFlush      = 2;
406         stateCmdSizeParams.uNumVdPipelineFlush  = 1;
407         stateCmdSizeParams.bPerformHucStreamOut = true;
408         ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetHucStateCommandSize(
409             m_basicFeature->m_mode, (uint32_t*)&hucCommandsSize, (uint32_t*)&hucPatchListSize, &stateCmdSizeParams));
410 
411         bool isTileReplayEnabled = false;
412         RUN_FEATURE_INTERFACE_RETURN(HevcEncodeTile, FeatureIDs::encodeTile, IsTileReplayEnabled, isTileReplayEnabled);
413         if (m_basicFeature->m_enableTileStitchByHW && (isTileReplayEnabled || m_pipeline->GetPipeNum() > 1))
414         {
415             uint32_t maxSize = 0;
416             uint32_t patchListMaxSize = 0;
417             ENCODE_CHK_NULL_RETURN(m_hwInterface);
418             ENCODE_CHK_NULL_RETURN(m_hwInterface->GetCpInterface());
419             MhwCpInterface *cpInterface = m_hwInterface->GetCpInterface();
420             cpInterface->GetCpStateLevelCmdSize(maxSize, patchListMaxSize);
421             hucCommandsSize     += maxSize;
422             hucPatchListSize    += patchListMaxSize;
423         }
424 
425         commandBufferSize = hucCommandsSize;
426         requestedPatchListSize = m_osInterface->bUsesPatchList ? hucPatchListSize : 0;
427 
428         // reserve cmd size for hw stitch
429         commandBufferSize += m_hwStitchCmdSize;
430 
431         // 4K align since allocation is in chunks of 4K bytes.
432         commandBufferSize = MOS_ALIGN_CEIL(commandBufferSize, CODECHAL_PAGE_SIZE);
433 
434         return MOS_STATUS_SUCCESS;
435     }
436 
SetDmemBuffer() const437     MOS_STATUS HevcPakIntegratePkt::SetDmemBuffer() const
438     {
439         ENCODE_FUNC_CALL();
440 
441         MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
442 
443         int32_t currentPass = m_pipeline->GetCurrentPass();
444         if (currentPass < 0 || currentPass >= CODECHAL_VDENC_BRC_NUM_OF_PASSES)
445         {
446             eStatus = MOS_STATUS_INVALID_PARAMETER;
447             return eStatus;
448         }
449 
450         HucPakIntegrateDmem *hucPakStitchDmem =
451             (HucPakIntegrateDmem *)m_allocator->LockResourceForWrite(m_resHucPakStitchDmemBuffer[m_pipeline->m_currRecycledBufIdx][currentPass]);
452 
453         ENCODE_CHK_NULL_RETURN(hucPakStitchDmem);
454         MOS_ZeroMemory(hucPakStitchDmem, sizeof(HucPakIntegrateDmem));
455 
456         // Reset all the offsets to be shared in the huc dmem (6*5 DW's)
457         MOS_FillMemory(hucPakStitchDmem, 6 * (MAX_PAK_NUM + 1) * sizeof(uint32_t), 0xFF);
458 
459         uint16_t numTileColumns = 1;
460         uint16_t numTileRows    = 1;
461         RUN_FEATURE_INTERFACE_RETURN(HevcEncodeTile, HevcFeatureIDs::encodeTile, GetTileRowColumns, numTileRows, numTileColumns);
462 
463         uint32_t numTiles = 1;
464         RUN_FEATURE_INTERFACE_RETURN(HevcEncodeTile, HevcFeatureIDs::encodeTile, GetTileNum, numTiles);
465 
466         uint16_t numTilesPerPipe = (uint16_t)(numTiles / m_pipeline->GetPipeNum());
467 
468         auto feature = dynamic_cast<HEVCEncodeBRC *>(m_featureManager->GetFeature(HevcFeatureIDs::hevcBrcFeature));
469         ENCODE_CHK_NULL_RETURN(feature);
470 
471         hucPakStitchDmem->TotalSizeInCommandBuffer = numTiles * CODECHAL_CACHELINE_SIZE;
472         // Last tile length may get modified by HuC. Obtain last Tile Record, Add an offset of 8bytes to skip address field in Tile Record
473         hucPakStitchDmem->OffsetInCommandBuffer = (numTiles - 1) * CODECHAL_CACHELINE_SIZE + 8;
474         hucPakStitchDmem->PicWidthInPixel       = (uint16_t)m_basicFeature->m_frameWidth;
475         hucPakStitchDmem->PicHeightInPixel      = (uint16_t)m_basicFeature->m_frameHeight;
476         hucPakStitchDmem->TotalNumberOfPAKs     = feature->IsBRCEnabled() ? m_pipeline->GetPipeNum() : 0;
477         hucPakStitchDmem->Codec                 = 2;  // 1: HEVC DP; 2: HEVC VDEnc; 3: VP9 VDEnc
478 
479         hucPakStitchDmem->MAXPass           = feature->IsBRCEnabled() ? CODECHAL_VDENC_BRC_NUM_OF_PASSES : 1;
480         hucPakStitchDmem->CurrentPass       = (uint8_t)currentPass + 1;  // Current BRC pass [1..MAXPass]
481         hucPakStitchDmem->MinCUSize         = m_basicFeature->m_hevcSeqParams->log2_min_coding_block_size_minus3 + 3;
482         hucPakStitchDmem->CabacZeroWordFlag = true;
483         hucPakStitchDmem->bitdepth_luma     = m_basicFeature->m_hevcSeqParams->bit_depth_luma_minus8 + 8;    // default: 8
484         hucPakStitchDmem->bitdepth_chroma   = m_basicFeature->m_hevcSeqParams->bit_depth_chroma_minus8 + 8;  // default: 8
485         hucPakStitchDmem->ChromaFormatIdc   = m_basicFeature->m_hevcSeqParams->chroma_format_idc;
486 
487         uint32_t       lastTileIndex = numTiles - 1;
488         EncodeTileData tileData      = {};
489         RUN_FEATURE_INTERFACE_RETURN(HevcEncodeTile, HevcFeatureIDs::encodeTile, GetTileByIndex, tileData, lastTileIndex);
490         hucPakStitchDmem->LastTileBS_StartInBytes = (tileData.bitstreamByteOffset * CODECHAL_CACHELINE_SIZE) & (CODECHAL_PAGE_SIZE - 1);
491 
492         hucPakStitchDmem->PIC_STATE_StartInBytes = (uint16_t)m_basicFeature->m_picStateCmdStartInBytes;
493 
494         HevcTileStatusInfo hevcTileStatsOffset  = {};
495         HevcTileStatusInfo hevcFrameStatsOffset = {};
496         HevcTileStatusInfo hevcStatsSize        = {};
497         RUN_FEATURE_INTERFACE_RETURN(HevcEncodeTile, HevcFeatureIDs::encodeTile, GetTileStatusInfo, hevcTileStatsOffset, hevcFrameStatsOffset, hevcStatsSize);
498 
499         if (m_pipeline->GetPipeNum() > 1)
500         {
501             //Set the kernel output offsets
502             hucPakStitchDmem->HEVC_PAKSTAT_offset[0]   = feature->IsBRCEnabled() ? hevcFrameStatsOffset.hevcPakStatistics : 0xFFFFFFFF;
503             hucPakStitchDmem->HEVC_Streamout_offset[0] = feature->IsBRCEnabled() ? hevcFrameStatsOffset.hevcSliceStreamout : 0xFFFFFFFF;
504             hucPakStitchDmem->TileSizeRecord_offset[0] = hevcFrameStatsOffset.tileSizeRecord;
505             hucPakStitchDmem->VDENCSTAT_offset[0]      = feature->IsBRCEnabled() ? hevcFrameStatsOffset.vdencStatistics : 0xFFFFFFFF;
506 
507             // Calculate number of slices that execute on a single pipe
508             for (auto tileRow = 0; tileRow < numTileRows; tileRow++)
509             {
510                 for (auto tileCol = 0; tileCol < numTileColumns; tileCol++)
511                 {
512                     PCODEC_ENCODER_SLCDATA slcData = m_basicFeature->m_slcData;
513                     uint16_t               slcCount, idx, sliceNumInTile = 0;
514 
515                     idx = tileRow * numTileColumns + tileCol;
516                     for (slcCount = 0; slcCount < m_basicFeature->m_numSlices; slcCount++)
517                     {
518                         bool lastSliceInTile = false, sliceInTile = false;
519 
520                         EncodeTileData curTileData = {};
521                         RUN_FEATURE_INTERFACE_RETURN(HevcEncodeTile, HevcFeatureIDs::encodeTile, GetTileByIndex, curTileData, idx);
522                         RUN_FEATURE_INTERFACE_RETURN(HevcEncodeTile, HevcFeatureIDs::encodeTile, IsSliceInTile, slcCount, &curTileData, &sliceInTile, &lastSliceInTile);
523 
524                         if (!sliceInTile)
525                         {
526                             continue;
527                         }
528 
529                         sliceNumInTile++;
530                     }  // end of slice
531                     if (0 == sliceNumInTile)
532                     {
533                         // One tile must have at least one slice
534                         ENCODE_ASSERT(false);
535                         eStatus = MOS_STATUS_INVALID_PARAMETER;
536                         break;
537                     }
538                     // Set the number of slices per pipe in the Dmem structure
539                     hucPakStitchDmem->NumSlices[tileCol] += sliceNumInTile;
540                 }
541             }
542 
543             for (auto i = 0; i < m_pipeline->GetPipeNum(); i++)
544             {
545                 hucPakStitchDmem->NumTiles[i]  = numTilesPerPipe;
546                 hucPakStitchDmem->NumSlices[i] = numTilesPerPipe;  // Assuming 1 slice/ tile. To do: change this later.
547 
548                 // Statistics are dumped out at a tile level. Driver shares with kernel starting offset of each pipe statistic.
549                 // Offset is calculated by adding size of statistics/pipe to the offset in combined statistics region.
550                 hucPakStitchDmem->TileSizeRecord_offset[i + 1] = (i * numTilesPerPipe * hevcStatsSize.tileSizeRecord) + hevcTileStatsOffset.tileSizeRecord;
551                 hucPakStitchDmem->HEVC_PAKSTAT_offset[i + 1]   = (i * numTilesPerPipe * hevcStatsSize.hevcPakStatistics) + hevcTileStatsOffset.hevcPakStatistics;
552                 hucPakStitchDmem->VDENCSTAT_offset[i + 1]      = (i * numTilesPerPipe * hevcStatsSize.vdencStatistics) + hevcTileStatsOffset.vdencStatistics;
553                 hucPakStitchDmem->HEVC_Streamout_offset[i + 1] = (i * hucPakStitchDmem->NumSlices[i] * CODECHAL_CACHELINE_SIZE) + hevcTileStatsOffset.hevcSliceStreamout;
554             }
555         }
556         else
557         {
558             hucPakStitchDmem->NumTiles[0]       = (uint16_t)numTiles;
559             hucPakStitchDmem->TotalNumberOfPAKs = m_pipeline->GetPipeNum();
560 
561             // non-scalable mode, only VDEnc statistics need to be aggregated
562             hucPakStitchDmem->VDENCSTAT_offset[0] = hevcFrameStatsOffset.vdencStatistics;
563             hucPakStitchDmem->VDENCSTAT_offset[1] = hevcTileStatsOffset.vdencStatistics;
564         }
565 
566         bool isTileReplayEnabled = false;
567         RUN_FEATURE_INTERFACE_RETURN(HevcEncodeTile, FeatureIDs::encodeTile, IsTileReplayEnabled, isTileReplayEnabled);
568         if (m_basicFeature->m_enableTileStitchByHW && (isTileReplayEnabled || m_pipeline->GetPipeNum() > 1))
569         {
570             hucPakStitchDmem->StitchEnable        = true;
571             hucPakStitchDmem->StitchCommandOffset = 0;
572             hucPakStitchDmem->BBEndforStitch      = HUC_BATCH_BUFFER_END;
573         }
574 
575         m_allocator->UnLock(m_resHucPakStitchDmemBuffer[m_pipeline->m_currRecycledBufIdx][currentPass]);
576 
577         return eStatus;
578     }
579 
ReadSseStatistics(MOS_COMMAND_BUFFER & cmdBuffer)580     MOS_STATUS HevcPakIntegratePkt::ReadSseStatistics(MOS_COMMAND_BUFFER &cmdBuffer)
581     {
582         // implement SSE
583         ENCODE_FUNC_CALL();
584 
585         PMOS_RESOURCE osResource = nullptr;
586         uint32_t      offset     = 0;
587 
588         m_statusReport->GetAddress(statusReportSumSquareError, osResource, offset);
589 
590         for (auto i = 0; i < 3; i++)  // 64 bit SSE values for luma/ chroma channels need to be copied
591         {
592             auto &miCpyMemMemParams       = m_miItf->MHW_GETPAR_F(MI_COPY_MEM_MEM)();
593             miCpyMemMemParams             = {};
594             MOS_RESOURCE *resHuCPakAggregatedFrameStatsBuffer = nullptr;
595             RUN_FEATURE_INTERFACE_RETURN(HevcEncodeTile, HevcFeatureIDs::encodeTile, GetHucPakAggregatedFrameStatsBuffer, resHuCPakAggregatedFrameStatsBuffer);
596             ENCODE_CHK_NULL_RETURN(resHuCPakAggregatedFrameStatsBuffer);
597             bool tiles_enabled = false;
598             RUN_FEATURE_INTERFACE_RETURN(HevcEncodeTile, HevcFeatureIDs::encodeTile, IsEnabled, tiles_enabled);
599             miCpyMemMemParams.presSrc     = tiles_enabled && (m_pipeline->GetPipeNum() > 1) ? resHuCPakAggregatedFrameStatsBuffer : m_basicFeature->m_recycleBuf->GetBuffer(FrameStatStreamOutBuffer, 0);
600             miCpyMemMemParams.dwSrcOffset = (m_basicFeature->m_hevcPakStatsSSEOffset + i) * sizeof(uint32_t);  // SSE luma offset is located at DW32 in Frame statistics, followed by chroma
601             miCpyMemMemParams.presDst     = osResource;
602             miCpyMemMemParams.dwDstOffset = offset + i * sizeof(uint32_t);
603             ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_COPY_MEM_MEM)(&cmdBuffer));
604         }
605 
606         return MOS_STATUS_SUCCESS;
607     }
608 
ReadSliceSize(MOS_COMMAND_BUFFER & cmdBuffer)609     MOS_STATUS HevcPakIntegratePkt::ReadSliceSize(MOS_COMMAND_BUFFER &cmdBuffer)
610     {
611         MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
612 
613         ENCODE_FUNC_CALL();
614 
615         // Use FrameStats buffer if in single pipe mode.
616         if (m_pipeline->GetPipeNum() == 1)
617         {
618             return ReadSliceSizeForSinglePipe(cmdBuffer);
619         }
620 
621         // In multi-tile multi-pipe mode, use PAK integration kernel output
622         // PAK integration kernel accumulates frame statistics across tiles, which should be used to setup slice size report
623         // Report slice size to app only when dynamic scaling is enabled
624         if (!m_basicFeature->m_hevcSeqParams->SliceSizeControl)
625         {
626             return eStatus;
627         }
628 
629         RUN_FEATURE_INTERFACE_RETURN(HevcEncodeDss, HevcFeatureIDs::hevcVdencDssFeature, ReadSliceSize, m_pipeline, cmdBuffer);
630         return eStatus;
631     }
632 
ReadSliceSizeForSinglePipe(MOS_COMMAND_BUFFER & cmdBuffer)633     MOS_STATUS HevcPakIntegratePkt::ReadSliceSizeForSinglePipe(MOS_COMMAND_BUFFER &cmdBuffer)
634     {
635         MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
636 
637         ENCODE_FUNC_CALL();
638 
639          // Report slice size to app only when dynamic slice is enabled
640         if (!m_basicFeature->m_hevcSeqParams->SliceSizeControl)
641         {
642             return eStatus;
643         }
644         RUN_FEATURE_INTERFACE_RETURN(HevcEncodeDss, HevcFeatureIDs::hevcVdencDssFeature, ReadSliceSizeForSinglePipe, m_pipeline, cmdBuffer);
645 
646         return eStatus;
647     }
648 
SetupTilesStatusData(void * mfxStatus,void * statusReport)649     MOS_STATUS HevcPakIntegratePkt::SetupTilesStatusData(void *mfxStatus, void *statusReport)
650     {
651         ENCODE_FUNC_CALL();
652 
653         ENCODE_CHK_NULL_RETURN(mfxStatus);
654         ENCODE_CHK_NULL_RETURN(statusReport);
655         ENCODE_CHK_NULL_RETURN(m_basicFeature);
656 
657         EncodeStatusMfx *       encodeStatusMfx  = (EncodeStatusMfx *)mfxStatus;
658         EncodeStatusReportData *statusReportData = (EncodeStatusReportData *)statusReport;
659 
660         uint32_t statBufIdx     = statusReportData->currOriginalPic.FrameIdx;
661         const EncodeReportTileData *tileReportData = nullptr;
662         RUN_FEATURE_INTERFACE_RETURN(HevcEncodeTile, HevcFeatureIDs::encodeTile, GetReportTileData, statBufIdx, tileReportData);
663         if(tileReportData == nullptr)
664          {
665             // When Tile feature is not enabled, not need following complete options
666             ENCODE_NORMALMESSAGE("Free tileReportData for frames, which include only one tile.");
667             return MOS_STATUS_SUCCESS;
668         }
669 
670         if (tileReportData[0].reportValid == false)
671         {
672             // Only multi-pipe contain tile report data. No tile report data needed for one-pipe.
673             return MOS_STATUS_SUCCESS;
674         }
675 
676         statusReportData->codecStatus                                           = CODECHAL_STATUS_SUCCESSFUL;
677         statusReportData->panicMode                                             = false;
678         statusReportData->averageQP                                             = 0;
679         statusReportData->qpY                                                   = 0;
680         statusReportData->suggestedQPYDelta                                     = 0;
681         statusReportData->numberPasses                                          = 1;
682         statusReportData->bitstreamSize                                         = 0;
683         statusReportData->numberSlices                                          = 0;
684         encodeStatusMfx->imageStatusCtrlOfLastBRCPass.hcpCumulativeFrameDeltaQP = 0;
685 
686         // Allocate the tile size report memory
687         statusReportData->sizeOfTileInfoBuffer = statusReportData->numberTilesInFrame * sizeof(CodechalTileInfo);
688         if (statusReportData->hevcTileinfo)
689         {
690             MOS_FreeMemory(statusReportData->hevcTileinfo);
691         }
692         statusReportData->hevcTileinfo = (CodechalTileInfo *)MOS_AllocAndZeroMemory(statusReportData->sizeOfTileInfoBuffer);
693         ENCODE_CHK_NULL_RETURN(statusReportData->hevcTileinfo);
694 
695         MOS_RESOURCE *tileSizeStatusBuffer = nullptr;
696         RUN_FEATURE_INTERFACE_RETURN(HevcEncodeTile, HevcFeatureIDs::encodeTile, GetTileRecordBuffer, statBufIdx, tileSizeStatusBuffer);
697         ENCODE_CHK_NULL_RETURN(tileSizeStatusBuffer);
698 
699         MOS_LOCK_PARAMS lockFlags;
700         MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
701         PakHwTileSizeRecord *tileStatusReport =
702             (PakHwTileSizeRecord *)m_allocator->Lock(tileSizeStatusBuffer, &lockFlags);
703         ENCODE_CHK_NULL_RETURN(tileStatusReport);
704 
705         uint32_t *sliceSize = nullptr;
706 
707         // pSliceSize is set/ allocated only when dynamic slice is enabled. Cannot use SSC flag here, as it is an asynchronous call
708         if (encodeStatusMfx->sliceReport.sliceSize)
709         {
710             sliceSize = (uint32_t *)m_osInterface->pfnLockResource(m_osInterface, encodeStatusMfx->sliceReport.sliceSize, &lockFlags);
711             ENCODE_CHK_NULL_RETURN(sliceSize);
712         }
713         encodeStatusMfx->imageStatusCtrlOfLastBRCPass.hcpCumulativeFrameDeltaQP = 0;
714 
715         uint32_t totalCU    = 0;
716         uint32_t sliceCount = 0;
717         double   sumQp      = 0.0;
718         for (uint32_t i = 0; i < statusReportData->numberTilesInFrame; i++)
719         {
720             if (tileStatusReport[i].Length == 0)
721             {
722                 statusReportData->codecStatus = CODECHAL_STATUS_INCOMPLETE;
723                 return MOS_STATUS_SUCCESS;
724             }
725 
726             // Tile Replay currently shares same frame level status report as tile
727 
728             statusReportData->hevcTileinfo[i].TileSizeInBytes = tileStatusReport[i].Length;
729             // The offset only valid if there is no stream stitching
730             statusReportData->hevcTileinfo[i].TileBitStreamOffset = tileReportData[i].bitstreamByteOffset * CODECHAL_CACHELINE_SIZE;
731             statusReportData->hevcTileinfo[i].TileRowNum          = i / tileReportData[i].numTileColumns;
732             statusReportData->hevcTileinfo[i].TileColNum          = i % tileReportData[i].numTileColumns;
733             statusReportData->numTileReported                     = i + 1;
734             statusReportData->bitstreamSize += tileStatusReport[i].Length;
735             totalCU += (tileReportData[i].tileHeightInMinCbMinus1 + 1) * (tileReportData[i].tileWidthInMinCbMinus1 + 1);
736             sumQp += tileStatusReport[i].Hcp_Qp_Status_Count;
737 
738             //Add silce Size Control support in each tile
739             if (sliceSize)
740             {
741                 statusReportData->sliceSizes = (uint16_t *)sliceSize;
742                 statusReportData->numberSlices += (uint8_t)tileStatusReport[i].Hcp_Slice_Count_Tile;
743                 uint16_t prevCumulativeSliceSize = 0;
744                 // HW writes out a DW for each slice size. Copy in place the DW into 16bit fields expected by App
745                 for (uint32_t idx = 0; idx < tileStatusReport[i].Hcp_Slice_Count_Tile; idx++)
746                 {
747                     // PAK output the sliceSize at 16DW intervals.
748                     ENCODE_CHK_NULL_RETURN(&sliceSize[sliceCount * 16]);
749 
750                     //convert cummulative slice size to individual, first slice may have PPS/SPS,
751                     uint32_t CurrAccumulatedSliceSize           = sliceSize[sliceCount * 16];
752                     statusReportData->sliceSizes[sliceCount] = CurrAccumulatedSliceSize - prevCumulativeSliceSize;
753                     prevCumulativeSliceSize += statusReportData->sliceSizes[sliceCount];
754                     sliceCount++;
755                 }
756             }
757         }
758 
759         if (sliceSize)
760         {
761             statusReportData->sizeOfSliceSizesBuffer = sizeof(uint16_t) * statusReportData->numberSlices;
762             statusReportData->sliceSizeOverflow      = (encodeStatusMfx->sliceReport.sliceSizeOverflow >> 16) & 1;
763             m_osInterface->pfnUnlockResource(m_osInterface, encodeStatusMfx->sliceReport.sliceSize);
764         }
765 
766         if (statusReportData->bitstreamSize == 0 ||
767             statusReportData->bitstreamSize > m_basicFeature->m_bitstreamSize)
768         {
769             statusReportData->codecStatus   = CODECHAL_STATUS_ERROR;
770             statusReportData->bitstreamSize = 0;
771             return MOS_STATUS_INVALID_FILE_SIZE;
772         }
773 
774         if (totalCU != 0)
775         {
776             statusReportData->qpY = statusReportData->averageQP =
777                 (uint8_t)((sumQp / (double)totalCU) / 4.0);  // due to TU is 4x4 and there are 4 TUs in one CU
778         }
779         else
780         {
781             return MOS_STATUS_INVALID_PARAMETER;
782         }
783 
784         if (!m_basicFeature->m_enableTileStitchByHW && m_pipeline->GetPipeNum() > 1)
785         {
786             ENCODE_CHK_STATUS_RETURN(PerformSwStitch(tileReportData, tileStatusReport, statusReportData));
787         }
788 
789         if (tileStatusReport)
790         {
791             // clean-up the tile status report buffer
792             MOS_ZeroMemory(tileStatusReport, sizeof(tileStatusReport[0]) * statusReportData->numberTilesInFrame);
793             m_allocator->UnLock(tileSizeStatusBuffer);
794         }
795 
796         return MOS_STATUS_SUCCESS;
797     }
798 
Completed(void * mfxStatus,void * rcsStatus,void * statusReport)799     MOS_STATUS HevcPakIntegratePkt::Completed(void *mfxStatus, void *rcsStatus, void *statusReport)
800     {
801         ENCODE_FUNC_CALL();
802 
803         ENCODE_CHK_NULL_RETURN(mfxStatus);
804         ENCODE_CHK_NULL_RETURN(statusReport);
805         ENCODE_CHK_NULL_RETURN(m_basicFeature);
806 
807         EncodeStatusReportData *statusReportData = (EncodeStatusReportData *)statusReport;
808 
809         if (statusReportData->numberTilesInFrame == 1)
810         {
811             // When Tile feature is not enabled, not need following complete options
812             return MOS_STATUS_SUCCESS;
813         }
814 
815         ENCODE_CHK_STATUS_RETURN(EncodeHucPkt::Completed(mfxStatus, rcsStatus, statusReport));
816 
817         // Tile status data is only update and performed in multi-pipe mode
818         ENCODE_CHK_STATUS_RETURN(SetupTilesStatusData(mfxStatus, statusReport));
819 
820         m_basicFeature->Reset((CODEC_REF_LIST *)statusReportData->currRefList);
821         return MOS_STATUS_SUCCESS;
822     }
823 
PerformSwStitch(const EncodeReportTileData * tileReportData,PakHwTileSizeRecord * tileStatusReport,EncodeStatusReportData * statusReportData)824     MOS_STATUS HevcPakIntegratePkt::PerformSwStitch(
825         const EncodeReportTileData *tileReportData,
826         PakHwTileSizeRecord *       tileStatusReport,
827         EncodeStatusReportData *    statusReportData)
828     {
829         ENCODE_FUNC_CALL();
830 
831         ENCODE_CHK_NULL_RETURN(tileReportData);
832         ENCODE_CHK_NULL_RETURN(tileStatusReport);
833 
834         uint8_t *tempBsBuffer = nullptr, *bufPtr = nullptr;
835         tempBsBuffer = bufPtr = (uint8_t *)MOS_AllocAndZeroMemory(statusReportData->bitstreamSize);
836         ENCODE_CHK_NULL_RETURN(tempBsBuffer);
837 
838         PCODEC_REF_LIST currRefList = (PCODEC_REF_LIST)statusReportData->currRefList;
839 
840         MOS_LOCK_PARAMS lockFlags;
841         MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
842         lockFlags.ReadOnly = 1;
843         uint8_t *bitstream = (uint8_t *)m_allocator->Lock(
844             &currRefList->resBitstreamBuffer,
845             &lockFlags);
846         if (bitstream == nullptr)
847         {
848             MOS_FreeMemory(tempBsBuffer);
849             ENCODE_CHK_NULL_RETURN(nullptr);
850         }
851 
852         for (uint32_t i = 0; i < statusReportData->numberTilesInFrame; i++)
853         {
854             uint32_t offset = tileReportData[i].bitstreamByteOffset * CODECHAL_CACHELINE_SIZE;
855             uint32_t len    = tileStatusReport[i].Length;
856 
857             MOS_SecureMemcpy(bufPtr, len, &bitstream[offset], len);
858             bufPtr += len;
859         }
860 
861         MOS_SecureMemcpy(bitstream, statusReportData->bitstreamSize, tempBsBuffer, statusReportData->bitstreamSize);
862         MOS_ZeroMemory(&bitstream[statusReportData->bitstreamSize], m_basicFeature->m_bitstreamSize - statusReportData->bitstreamSize);
863 
864         if (bitstream)
865         {
866             m_allocator->UnLock(&currRefList->resBitstreamBuffer);
867         }
868 
869         MOS_FreeMemory(tempBsBuffer);
870 
871         return MOS_STATUS_SUCCESS;
872     }
873 
PerformHwStitch(PMOS_COMMAND_BUFFER cmdBuffer)874     MOS_STATUS HevcPakIntegratePkt::PerformHwStitch(
875         PMOS_COMMAND_BUFFER cmdBuffer)
876     {
877         ENCODE_FUNC_CALL();
878 
879         // 2nd level BB buffer for stitching cmd
880         // Current location to add cmds in 2nd level batch buffer
881         m_HucStitchCmdBatchBuffer.iCurrent = 0;
882         // Reset starting location (offset) executing 2nd level batch buffer for each frame & each pass
883         m_HucStitchCmdBatchBuffer.dwOffset = 0;
884         ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_BATCH_BUFFER_START)(cmdBuffer, &m_HucStitchCmdBatchBuffer));
885         // This wait cmd is needed to make sure copy command is done as suggested by HW folk in encode cases
886         auto &mfxWaitParams               = m_miItf->MHW_GETPAR_F(MFX_WAIT)();
887         mfxWaitParams                     = {};
888         mfxWaitParams.iStallVdboxPipeline = m_osInterface->osCpInterface->IsCpEnabled() ? true : false;
889         ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MFX_WAIT)(cmdBuffer));
890 
891         return MOS_STATUS_SUCCESS;
892     }
893 
ConfigStitchDataBuffer() const894     MOS_STATUS HevcPakIntegratePkt::ConfigStitchDataBuffer() const
895     {
896         ENCODE_FUNC_CALL();
897 
898         auto currPass = m_pipeline->GetCurrentPass();
899         HucCommandData *hucStitchDataBuf = (HucCommandData*)m_allocator->LockResourceForWrite(const_cast<MOS_RESOURCE*>(&m_resHucStitchDataBuffer[m_pipeline->m_currRecycledBufIdx][currPass]));
900         ENCODE_CHK_NULL_RETURN(hucStitchDataBuf);
901 
902         MOS_ZeroMemory(hucStitchDataBuf, sizeof(HucCommandData));
903         hucStitchDataBuf->TotalCommands          = 1;
904         hucStitchDataBuf->InputCOM[0].SizeOfData = 0xf;
905 
906         uint16_t numTileColumns = 1;
907         uint16_t numTileRows    = 1;
908         RUN_FEATURE_INTERFACE_RETURN(HevcEncodeTile, HevcFeatureIDs::encodeTile, GetTileRowColumns, numTileRows, numTileColumns);
909 
910         HucInputCmd hucInputCmd;
911         MOS_ZeroMemory(&hucInputCmd, sizeof(HucInputCmd));
912 
913         ENCODE_CHK_NULL_RETURN(m_osInterface->osCpInterface);
914         hucInputCmd.SelectionForIndData = m_osInterface->osCpInterface->IsCpEnabled() ? 4 : 0;
915         hucInputCmd.CmdMode             = HUC_CMD_LIST_MODE;
916         hucInputCmd.LengthOfTable       = numTileRows * numTileColumns;
917         hucInputCmd.CopySize            = m_hwInterface->m_tileRecordSize;
918 
919         // Tile record always in m_tileRecordBuffer even in scalable node
920         uint32_t      statBufIdx = m_basicFeature->m_currOriginalPic.FrameIdx;
921         MOS_RESOURCE *presSrc    = nullptr;
922 
923         RUN_FEATURE_INTERFACE_RETURN(HevcEncodeTile, HevcFeatureIDs::encodeTile, GetTileRecordBuffer, statBufIdx, presSrc);
924         ENCODE_CHK_NULL_RETURN(presSrc);
925 
926         ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnRegisterResource(
927             m_osInterface,
928             presSrc,
929             false,
930             false));
931 
932         ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnRegisterResource(
933             m_osInterface,
934             &m_basicFeature->m_resBitstreamBuffer,
935             true,
936             true));
937 
938         uint64_t srcAddr = m_osInterface->pfnGetResourceGfxAddress(m_osInterface, presSrc);
939         uint64_t destrAddr = m_osInterface->pfnGetResourceGfxAddress(m_osInterface, &m_basicFeature->m_resBitstreamBuffer);
940         hucInputCmd.SrcAddrBottom  = (uint32_t)(srcAddr & 0x00000000FFFFFFFF);
941         hucInputCmd.SrcAddrTop     = (uint32_t)((srcAddr & 0xFFFFFFFF00000000) >> 32);
942         hucInputCmd.DestAddrBottom = (uint32_t)(destrAddr & 0x00000000FFFFFFFF);
943         hucInputCmd.DestAddrTop    = (uint32_t)((destrAddr & 0xFFFFFFFF00000000) >> 32);
944 
945         MOS_SecureMemcpy(hucStitchDataBuf->InputCOM[0].data, sizeof(HucInputCmd), &hucInputCmd, sizeof(HucInputCmd));
946 
947         m_allocator->UnLock(const_cast<MOS_RESOURCE*>(&m_resHucStitchDataBuffer[m_pipeline->m_currRecycledBufIdx][currPass]));
948 
949         return MOS_STATUS_SUCCESS;
950     }
951 
MHW_SETPAR_DECL_SRC(HUC_IMEM_STATE,HevcPakIntegratePkt)952     MHW_SETPAR_DECL_SRC(HUC_IMEM_STATE, HevcPakIntegratePkt)
953     {
954         params.kernelDescriptor = m_vdboxHucPakIntKernelDescriptor;
955         return MOS_STATUS_SUCCESS;
956     }
957 
MHW_SETPAR_DECL_SRC(HUC_DMEM_STATE,HevcPakIntegratePkt)958     MHW_SETPAR_DECL_SRC(HUC_DMEM_STATE, HevcPakIntegratePkt)
959     {
960         params.function = PAK_INTEGRATE;
961 
962         ENCODE_CHK_STATUS_RETURN(SetDmemBuffer());
963 
964         int32_t currentPass  = m_pipeline->GetCurrentPass();
965         params.hucDataSource = m_resHucPakStitchDmemBuffer[m_pipeline->m_currRecycledBufIdx][currentPass];
966         params.dataLength    = MOS_ALIGN_CEIL(sizeof(HucPakIntegrateDmem), CODECHAL_CACHELINE_SIZE);
967         params.dmemOffset    = HUC_DMEM_OFFSET_RTOS_GEMS;
968 
969         return MOS_STATUS_SUCCESS;
970     }
971 
MHW_SETPAR_DECL_SRC(HUC_VIRTUAL_ADDR_STATE,HevcPakIntegratePkt)972     MHW_SETPAR_DECL_SRC(HUC_VIRTUAL_ADDR_STATE, HevcPakIntegratePkt)
973     {
974         params.function = PAK_INTEGRATE;
975 
976         uint32_t statBufIdx = 0;
977         RUN_FEATURE_INTERFACE_RETURN(HevcEncodeTile, HevcFeatureIDs::encodeTile, GetStatisticsBufferIndex, statBufIdx);
978 
979         MOS_RESOURCE *resTileBasedStatisticsBuffer = nullptr;
980         RUN_FEATURE_INTERFACE_RETURN(HevcEncodeTile, HevcFeatureIDs::encodeTile, GetTileBasedStatisticsBuffer, statBufIdx, resTileBasedStatisticsBuffer);
981         MOS_RESOURCE *resHuCPakAggregatedFrameStatsBuffer = nullptr;
982         RUN_FEATURE_INTERFACE_RETURN(HevcEncodeTile, HevcFeatureIDs::encodeTile, GetHucPakAggregatedFrameStatsBuffer, resHuCPakAggregatedFrameStatsBuffer);
983         MOS_RESOURCE *resTileRecordBuffer = nullptr;
984         RUN_FEATURE_INTERFACE_RETURN(HevcEncodeTile, HevcFeatureIDs::encodeTile, GetTileRecordBuffer, statBufIdx, resTileRecordBuffer);
985         uint32_t numTiles = 1;
986         RUN_FEATURE_INTERFACE_RETURN(HevcEncodeTile, HevcFeatureIDs::encodeTile, GetTileNum, numTiles);
987         uint32_t       lastTileIndex = numTiles - 1;
988         EncodeTileData tileData      = {};
989         RUN_FEATURE_INTERFACE_RETURN(HevcEncodeTile, HevcFeatureIDs::encodeTile, GetTileByIndex, tileData, lastTileIndex);
990 
991         auto brcFeature = dynamic_cast<HEVCEncodeBRC *>(m_featureManager->GetFeature(HevcFeatureIDs::hevcBrcFeature));
992         ENCODE_CHK_NULL_RETURN(brcFeature);
993         auto vdenc2ndLevelBatchBuffer = brcFeature->GetVdenc2ndLevelBatchBuffer(m_pipeline->m_currRecycledBufIdx);
994 
995         // Add Virtual addr
996         params.regionParams[0].presRegion = resTileBasedStatisticsBuffer;                 // Region 0 Input - Tile based input statistics from PAK/ VDEnc
997         params.regionParams[0].dwOffset   = 0;
998         params.regionParams[1].presRegion = resHuCPakAggregatedFrameStatsBuffer;          // Region 1 Output - HuC Frame statistics output
999         params.regionParams[1].isWritable = true;
1000 
1001         params.regionParams[4].presRegion = &m_basicFeature->m_resBitstreamBuffer;        // Region 4 Input - Last Tile bitstream
1002         params.regionParams[4].dwOffset   = MOS_ALIGN_FLOOR(tileData.bitstreamByteOffset * CODECHAL_CACHELINE_SIZE, CODECHAL_PAGE_SIZE);
1003         params.regionParams[5].presRegion = &m_basicFeature->m_resBitstreamBuffer;        // Region 5 Output - HuC modifies the last tile bitstream before stitch
1004         params.regionParams[5].dwOffset   = MOS_ALIGN_FLOOR(tileData.bitstreamByteOffset * CODECHAL_CACHELINE_SIZE, CODECHAL_PAGE_SIZE);
1005         params.regionParams[5].isWritable = true;
1006         params.regionParams[6].presRegion =
1007             m_basicFeature->m_recycleBuf->GetBuffer(VdencBRCHistoryBuffer, m_basicFeature->m_frameNum); // Region 6 Output - History Buffer (Input/Output)
1008         params.regionParams[6].isWritable = true;
1009         params.regionParams[7].presRegion = &vdenc2ndLevelBatchBuffer->OsResource;         // Region 7 Input- HCP PIC state command
1010         MOS_RESOURCE *resBrcDataBuffer                 = nullptr;
1011         RUN_FEATURE_INTERFACE_RETURN(HEVCEncodeBRC, HevcFeatureIDs::hevcBrcFeature, GetBrcDataBuffer, resBrcDataBuffer);
1012         params.regionParams[9].presRegion = resBrcDataBuffer;                              // Region 9 Output - HuC outputs BRC data
1013         params.regionParams[9].isWritable = true;
1014 
1015         params.regionParams[15].presRegion = resTileRecordBuffer;
1016         params.regionParams[15].dwOffset = 0;
1017 
1018         bool isTileReplayEnabled = false;
1019         RUN_FEATURE_INTERFACE_RETURN(HevcEncodeTile, FeatureIDs::encodeTile, IsTileReplayEnabled, isTileReplayEnabled);
1020         if (m_basicFeature->m_enableTileStitchByHW && (isTileReplayEnabled || m_pipeline->GetPipeNum() > 1))
1021         {
1022             ENCODE_CHK_STATUS_RETURN(ConfigStitchDataBuffer());
1023 
1024             uint32_t currentPass               = m_pipeline->GetCurrentPass();
1025             params.regionParams[8].presRegion  = const_cast<PMOS_RESOURCE>(&m_resHucStitchDataBuffer[m_pipeline->m_currRecycledBufIdx][currentPass]);  // Region 8 - data buffer read by HUC for stitching cmd generation
1026             params.regionParams[10].presRegion = const_cast<PMOS_RESOURCE>(&m_HucStitchCmdBatchBuffer.OsResource);  // Region 10 - SLB for stitching cmd output from Huc
1027             params.regionParams[10].isWritable = true;
1028         }
1029 
1030         return MOS_STATUS_SUCCESS;
1031     }
1032 
1033 #if USE_CODECHAL_DEBUG_TOOL
DumpInput()1034     MOS_STATUS HevcPakIntegratePkt::DumpInput()
1035     {
1036         ENCODE_FUNC_CALL();
1037         int32_t currentPass = m_pipeline->GetCurrentPass();
1038 
1039         CodechalDebugInterface *debugInterface = m_pipeline->GetDebugInterface();
1040         ENCODE_CHK_NULL_RETURN(debugInterface);
1041 
1042         ENCODE_CHK_STATUS_RETURN(debugInterface->DumpHucDmem(
1043             m_resHucPakStitchDmemBuffer[m_pipeline->m_currRecycledBufIdx][currentPass],
1044             m_vdencHucPakDmemBufferSize,
1045             currentPass,
1046             hucRegionDumpPakIntegrate));
1047 
1048         ENCODE_CHK_STATUS_RETURN(DumpRegion(0, "_TileBasedStatistic", true, hucRegionDumpPakIntegrate));
1049         ENCODE_CHK_STATUS_RETURN(DumpRegion(4, "_Bitstream", true, hucRegionDumpPakIntegrate));
1050         ENCODE_CHK_STATUS_RETURN(DumpRegion(7, "_HcpPicState", true, hucRegionDumpPakIntegrate));
1051         ENCODE_CHK_STATUS_RETURN(DumpRegion(15, "_TileRecord", true, hucRegionDumpPakIntegrate));
1052 
1053         return MOS_STATUS_SUCCESS;
1054     }
1055 
DumpOutput()1056     MOS_STATUS HevcPakIntegratePkt::DumpOutput()
1057     {
1058         ENCODE_FUNC_CALL();
1059 
1060         ENCODE_CHK_STATUS_RETURN(DumpRegion(1, "_HuCPakAggregatedFrameStats", false, hucRegionDumpPakIntegrate));
1061         ENCODE_CHK_STATUS_RETURN(DumpRegion(5, "_Bitstream", false, hucRegionDumpPakIntegrate));
1062         ENCODE_CHK_STATUS_RETURN(DumpRegion(6, "_BrcHistory", false, hucRegionDumpPakIntegrate));
1063         ENCODE_CHK_STATUS_RETURN(DumpRegion(9, "_OutputBrcData", false, hucRegionDumpPakIntegrate));
1064         ENCODE_CHK_STATUS_RETURN(DumpRegion(10, "_StitchCmd", false, hucRegionDumpPakIntegrate));
1065 
1066         return MOS_STATUS_SUCCESS;
1067     }
1068 #endif
1069 
AddCondBBEndForLastPass(MOS_COMMAND_BUFFER & cmdBuffer)1070      MOS_STATUS HevcPakIntegratePkt::AddCondBBEndForLastPass(MOS_COMMAND_BUFFER &cmdBuffer)
1071     {
1072         ENCODE_FUNC_CALL();
1073 
1074         if (m_pipeline->IsSingleTaskPhaseSupported() || m_pipeline->IsFirstPass() || m_pipeline->GetPassNum() == 1)
1075         {
1076             return MOS_STATUS_SUCCESS;
1077         }
1078 
1079         auto &miConditionalBatchBufferEndParams = m_miItf->MHW_GETPAR_F(MI_CONDITIONAL_BATCH_BUFFER_END)();
1080         miConditionalBatchBufferEndParams       = {};
1081 
1082         // VDENC uses HuC FW generated semaphore for conditional 2nd pass
1083         miConditionalBatchBufferEndParams.presSemaphoreBuffer =
1084             m_basicFeature->m_recycleBuf->GetBuffer(VdencBrcPakMmioBuffer, 0);
1085 
1086         ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_CONDITIONAL_BATCH_BUFFER_END)(&cmdBuffer));
1087 
1088         auto          mmioRegisters = m_hcpItf->GetMmioRegisters(m_vdboxIndex);
1089         MOS_RESOURCE *osResource    = nullptr;
1090         uint32_t      offset        = 0;
1091         m_statusReport->GetAddress(statusReportImageStatusCtrl, osResource, offset);
1092         //uint32_t baseOffset = (m_encodeStatusBuf.wCurrIndex * m_encodeStatusBuf.dwReportSize) + sizeof(uint32_t) * 2;  // encodeStatus is offset by 2 DWs in the resource
1093 
1094         // Write back the HCP image control register for RC6 may clean it out
1095         auto &registerMemParams           = m_miItf->MHW_GETPAR_F(MI_LOAD_REGISTER_MEM)();
1096         registerMemParams                 = {};
1097         registerMemParams.presStoreBuffer = osResource;
1098         registerMemParams.dwOffset        = offset;
1099         registerMemParams.dwRegister      = mmioRegisters->hcpEncImageStatusCtrlRegOffset;
1100         ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_LOAD_REGISTER_MEM)(&cmdBuffer));
1101 
1102         HevcVdencBrcBuffers *vdencBrcBuffers = nullptr;
1103         auto                 feature         = dynamic_cast<HEVCEncodeBRC *>(m_featureManager->GetFeature(HevcFeatureIDs::hevcBrcFeature));
1104         ENCODE_CHK_NULL_RETURN(feature);
1105         vdencBrcBuffers = feature->GetHevcVdencBrcBuffers();
1106         ENCODE_CHK_NULL_RETURN(vdencBrcBuffers);
1107 
1108         auto &miStoreRegMemParams           = m_miItf->MHW_GETPAR_F(MI_STORE_REGISTER_MEM)();
1109         miStoreRegMemParams                 = {};
1110         miStoreRegMemParams.presStoreBuffer = vdencBrcBuffers->resBrcPakStatisticBuffer[vdencBrcBuffers->currBrcPakStasIdxForWrite];
1111         miStoreRegMemParams.dwOffset        = CODECHAL_OFFSETOF(CODECHAL_ENCODE_HEVC_PAK_STATS_BUFFER, HCP_IMAGE_STATUS_CONTROL_FOR_LAST_PASS);
1112         miStoreRegMemParams.dwRegister      = mmioRegisters->hcpEncImageStatusCtrlRegOffset;
1113         ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_STORE_REGISTER_MEM)(&cmdBuffer));
1114 
1115         m_statusReport->GetAddress(statusReportImageStatusCtrlOfLastBRCPass, osResource, offset);
1116         miStoreRegMemParams                 = {};
1117         miStoreRegMemParams.presStoreBuffer = osResource;
1118         miStoreRegMemParams.dwOffset        = offset;
1119         miStoreRegMemParams.dwRegister      = mmioRegisters->hcpEncImageStatusCtrlRegOffset;
1120         ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_STORE_REGISTER_MEM)(&cmdBuffer));
1121 
1122         return MOS_STATUS_SUCCESS;
1123     }
1124 
1125     }
1126