1 /* 2 * Copyright (c) 2018, Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included 12 * in all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 */ 22 //! 23 //! \file encode_pak_integrate_packet.cpp 24 //! \brief Defines the interface for pak integrate packet 25 //! 26 #include "mos_defs.h" 27 #include "encode_pak_integrate_packet.h" 28 #include "mhw_vdbox.h" 29 #include "encode_hevc_brc.h" 30 #include "encode_status_report_defs.h" 31 #include "mos_os_cp_interface_specific.h" 32 33 namespace encode { Init()34 MOS_STATUS HevcPakIntegratePkt::Init() 35 { 36 ENCODE_FUNC_CALL(); 37 38 m_basicFeature = dynamic_cast<HevcBasicFeature *>(m_featureManager->GetFeature(HevcFeatureIDs::basicFeature)); 39 ENCODE_CHK_NULL_RETURN(m_basicFeature); 40 41 ENCODE_CHK_STATUS_RETURN(EncodeHucPkt::Init()); 42 43 ENCODE_CHK_NULL_RETURN(m_hwInterface); 44 m_osInterface = m_hwInterface->GetOsInterface(); 45 ENCODE_CHK_NULL_RETURN(m_osInterface); 46 47 m_miItf = m_hwInterface->GetMiInterfaceNext(); 48 ENCODE_CHK_NULL_RETURN(m_miItf); 49 50 ENCODE_CHK_NULL_RETURN(m_pipeline); 51 #ifdef _MMC_SUPPORTED 52 m_mmcState = m_pipeline->GetMmcState(); 53 ENCODE_CHK_NULL_RETURN(m_mmcState); 54 #endif 55 56 return MOS_STATUS_SUCCESS; 57 } 58 AllocateResources()59 MOS_STATUS HevcPakIntegratePkt::AllocateResources() 60 { 61 ENCODE_CHK_STATUS_RETURN(EncodeHucPkt::AllocateResources()); 62 63 // Only needed when tile & BRC is enabled, but the size is not changing at frame level 64 if (m_resHucPakStitchDmemBuffer[0][0] == nullptr) 65 { 66 uint8_t *data; 67 MOS_ALLOC_GFXRES_PARAMS allocParamsForBufferLinear; 68 69 // Pak stitch DMEM 70 MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS)); 71 allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER; 72 allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR; 73 allocParamsForBufferLinear.Format = Format_Buffer; 74 allocParamsForBufferLinear.dwBytes = MOS_ALIGN_CEIL(sizeof(HucPakIntegrateDmem), CODECHAL_CACHELINE_SIZE); 75 allocParamsForBufferLinear.pBufName = "PAK Stitch Dmem Buffer"; 76 allocParamsForBufferLinear.ResUsageType = MOS_HW_RESOURCE_USAGE_ENCODE_INTERNAL_READ_WRITE_NOCACHE; 77 auto numOfPasses = CODECHAL_VDENC_BRC_NUM_OF_PASSES; 78 79 for (auto k = 0; k < CODECHAL_ENCODE_RECYCLED_BUFFER_NUM; k++) 80 { 81 for (auto i = 0; i < numOfPasses; i++) 82 { 83 m_resHucPakStitchDmemBuffer[k][i] = m_allocator->AllocateResource(allocParamsForBufferLinear, true); 84 } 85 } 86 87 if (m_basicFeature->m_enableTileStitchByHW) 88 { 89 // HuC stitching data buffer 90 allocParamsForBufferLinear.dwBytes = MOS_ALIGN_CEIL(sizeof(HucCommandData), CODECHAL_PAGE_SIZE); 91 allocParamsForBufferLinear.pBufName = "HEVC HuC Stitch Data Buffer"; 92 allocParamsForBufferLinear.ResUsageType = MOS_HW_RESOURCE_USAGE_ENCODE_INTERNAL_READ_WRITE_CACHE; 93 MOS_RESOURCE *allocatedBuffer = nullptr; 94 for (auto i = 0; i < CODECHAL_ENCODE_RECYCLED_BUFFER_NUM; ++i) 95 { 96 for (auto j = 0; j < CODECHAL_VDENC_BRC_NUM_OF_PASSES; ++j) 97 { 98 allocatedBuffer = m_allocator->AllocateResource(allocParamsForBufferLinear, true); 99 ENCODE_CHK_NULL_RETURN(allocatedBuffer); 100 m_resHucStitchDataBuffer[i][j] = *allocatedBuffer; 101 } 102 } 103 104 // Second level batch buffer for HuC stitching CMD 105 MOS_ZeroMemory(&m_HucStitchCmdBatchBuffer, sizeof(m_HucStitchCmdBatchBuffer)); 106 m_HucStitchCmdBatchBuffer.bSecondLevel = true; 107 ENCODE_CHK_STATUS_RETURN(Mhw_AllocateBb( 108 m_osInterface, 109 &m_HucStitchCmdBatchBuffer, 110 nullptr, 111 m_hwInterface->m_HucStitchCmdBatchBufferSize)); 112 } 113 } 114 115 return MOS_STATUS_SUCCESS; 116 } 117 FreeResources()118 MOS_STATUS HevcPakIntegratePkt::FreeResources() 119 { 120 MOS_STATUS eStatus = MOS_STATUS_SUCCESS; 121 122 ENCODE_FUNC_CALL(); 123 124 eStatus = Mhw_FreeBb(m_osInterface, &m_HucStitchCmdBatchBuffer, nullptr); 125 ENCODE_ASSERT(eStatus == MOS_STATUS_SUCCESS); 126 127 return eStatus; 128 } 129 UpdateParameters()130 void HevcPakIntegratePkt::UpdateParameters() 131 { 132 ENCODE_FUNC_CALL(); 133 134 if (!m_pipeline->IsSingleTaskPhaseSupported()) 135 { 136 m_osInterface->pfnResetPerfBufferID(m_osInterface); 137 } 138 139 m_basicFeature->m_currPakSliceIdx = (m_basicFeature->m_currPakSliceIdx + 1) % m_basicFeature->m_codecHalHevcNumPakSliceBatchBuffers; 140 } 141 Submit(MOS_COMMAND_BUFFER * commandBuffer,uint8_t packetPhase)142 MOS_STATUS HevcPakIntegratePkt::Submit(MOS_COMMAND_BUFFER *commandBuffer, uint8_t packetPhase) 143 { 144 ENCODE_FUNC_CALL(); 145 146 bool firstTaskInPhase = packetPhase & firstPacket; 147 bool requestProlog = !m_pipeline->IsSingleTaskPhaseSupported() || firstTaskInPhase; 148 149 uint16_t perfTag = CODECHAL_ENCODE_PERFTAG_CALL_PAK_KERNEL; 150 SetPerfTag(perfTag, (uint16_t)m_basicFeature->m_mode, m_basicFeature->m_pictureCodingType); 151 152 auto brcFeature = dynamic_cast<HEVCEncodeBRC *>(m_featureManager->GetFeature(HevcFeatureIDs::hevcBrcFeature)); 153 ENCODE_CHK_NULL_RETURN(brcFeature); 154 155 ENCODE_CHK_STATUS_RETURN(AddCondBBEndForLastPass(*commandBuffer)); 156 157 m_vdencHucUsed = brcFeature->IsVdencHucUsed(); 158 159 bool isTileReplayEnabled = false; 160 RUN_FEATURE_INTERFACE_RETURN(HevcEncodeTile, FeatureIDs::encodeTile, IsTileReplayEnabled, isTileReplayEnabled); 161 162 MediaPerfProfiler *perfProfiler = MediaPerfProfiler::Instance(); 163 ENCODE_CHK_NULL_RETURN(perfProfiler); 164 ENCODE_CHK_STATUS_RETURN(perfProfiler->AddPerfCollectStartCmd( 165 (void *)m_pipeline, m_osInterface, m_miItf, commandBuffer)); 166 167 if (m_vdencHucUsed || (m_basicFeature->m_enableTileStitchByHW && (isTileReplayEnabled || m_pipeline->GetPipeNum() > 1))) 168 { 169 // Huc basic 170 ENCODE_CHK_STATUS_RETURN(Execute(commandBuffer, true, requestProlog)); 171 172 // Add huc status update to status buffer 173 PMOS_RESOURCE osResource = nullptr; 174 uint32_t offset = 0; 175 ENCODE_CHK_STATUS_RETURN(m_statusReport->GetAddress(statusReportHucStatusRegMask, osResource, offset)); 176 ENCODE_CHK_NULL_RETURN(osResource); 177 178 // Write HUC_STATUS mask 179 auto &storeDataParams = m_miItf->MHW_GETPAR_F(MI_STORE_DATA_IMM)(); 180 storeDataParams = {}; 181 storeDataParams.pOsResource = osResource; 182 storeDataParams.dwResourceOffset = offset; 183 storeDataParams.dwValue = m_hwInterface->GetHucInterfaceNext()->GetHucStatusReEncodeMask(); 184 ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_STORE_DATA_IMM)(commandBuffer)); 185 186 // store HUC_STATUS register 187 osResource = nullptr; 188 offset = 0; 189 ENCODE_CHK_STATUS_RETURN(m_statusReport->GetAddress(statusReportHucStatusReg, osResource, offset)); 190 ENCODE_CHK_NULL_RETURN(osResource); 191 auto mmioRegisters = m_hucItf->GetMmioRegisters(m_vdboxIndex); 192 auto &storeRegParams = m_miItf->MHW_GETPAR_F(MI_STORE_REGISTER_MEM)(); 193 storeDataParams = {}; 194 storeRegParams.presStoreBuffer = osResource; 195 storeRegParams.dwOffset = offset; 196 storeRegParams.dwRegister = mmioRegisters->hucStatusRegOffset; 197 ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_STORE_REGISTER_MEM)(commandBuffer)); 198 } 199 200 // Use HW stitch commands only in the scalable mode 201 // For single pipe with tile replay, stitch also needed 202 if (m_basicFeature->m_enableTileStitchByHW && (isTileReplayEnabled || m_pipeline->GetPipeNum() > 1)) 203 { 204 ENCODE_CHK_STATUS_RETURN(PerformHwStitch(commandBuffer)); 205 } 206 207 ENCODE_CHK_STATUS_RETURN(ReadSseStatistics(*commandBuffer)); 208 209 ENCODE_CHK_STATUS_RETURN(ReadSliceSize(*commandBuffer)); 210 211 ENCODE_CHK_STATUS_RETURN(EndStatusReport(statusReportMfx, commandBuffer)); 212 if (false == m_pipeline->IsFrameTrackingEnabled()) 213 { 214 ENCODE_CHK_STATUS_RETURN(UpdateStatusReportNext(statusReportGlobalCount, commandBuffer)); 215 } 216 CODECHAL_DEBUG_TOOL( 217 if (m_mmcState) { 218 m_mmcState->UpdateUserFeatureKey(&(m_basicFeature->m_reconSurface)); 219 }) 220 // Reset parameters for next PAK execution 221 if (false == m_pipeline->IsFrameTrackingEnabled()) 222 { 223 UpdateParameters(); 224 } 225 226 CODECHAL_DEBUG_TOOL 227 ( 228 ENCODE_CHK_STATUS_RETURN(DumpInput()); 229 ) 230 231 return MOS_STATUS_SUCCESS; 232 } 233 EndStatusReport(uint32_t srType,MOS_COMMAND_BUFFER * cmdBuffer)234 MOS_STATUS HevcPakIntegratePkt::EndStatusReport( 235 uint32_t srType, 236 MOS_COMMAND_BUFFER *cmdBuffer) 237 { 238 ENCODE_FUNC_CALL(); 239 ENCODE_CHK_NULL_RETURN(cmdBuffer); 240 auto brcFeature = dynamic_cast<HEVCEncodeBRC *>(m_featureManager->GetFeature(HevcFeatureIDs::hevcBrcFeature)); 241 ENCODE_CHK_NULL_RETURN(brcFeature); 242 if (m_pipeline->GetPipeNum() <= 1 && m_pipeline->IsSingleTaskPhaseSupported()) 243 { 244 // single pipe mode can read the info from MMIO register. Otherwise, 245 // we have to use the tile size statistic buffer 246 ENCODE_CHK_STATUS_RETURN(ReadHcpStatus(m_vdboxIndex, m_statusReport, *cmdBuffer)); 247 // BRC PAK statistics different for each pass 248 if (brcFeature->IsBRCEnabled()) 249 { 250 uint8_t ucPass = (uint8_t)m_pipeline->GetCurrentPass(); 251 EncodeReadBrcPakStatsParams readBrcPakStatsParams; 252 MOS_RESOURCE *osResource = nullptr; 253 uint32_t offset = 0; 254 m_statusReport->GetAddress(statusReportNumberPasses, osResource, offset); 255 RUN_FEATURE_INTERFACE_RETURN(HEVCEncodeBRC, HevcFeatureIDs::hevcBrcFeature, SetReadBrcPakStatsParams, ucPass, offset, osResource, readBrcPakStatsParams); 256 ReadBrcPakStatistics(cmdBuffer, &readBrcPakStatsParams); 257 } 258 } 259 ENCODE_CHK_STATUS_RETURN(MediaPacket::EndStatusReportNext(srType, cmdBuffer)); 260 261 MediaPerfProfiler *perfProfiler = MediaPerfProfiler::Instance(); 262 ENCODE_CHK_NULL_RETURN(perfProfiler); 263 ENCODE_CHK_STATUS_RETURN(perfProfiler->AddPerfCollectEndCmd( 264 (void *)m_pipeline, m_osInterface, m_miItf, cmdBuffer)); 265 266 return MOS_STATUS_SUCCESS; 267 } 268 ReadHcpStatus(MHW_VDBOX_NODE_IND vdboxIndex,MediaStatusReport * statusReport,MOS_COMMAND_BUFFER & cmdBuffer)269 MOS_STATUS HevcPakIntegratePkt::ReadHcpStatus( 270 MHW_VDBOX_NODE_IND vdboxIndex, 271 MediaStatusReport * statusReport, 272 MOS_COMMAND_BUFFER &cmdBuffer) 273 { 274 ENCODE_FUNC_CALL(); 275 276 MOS_STATUS eStatus = MOS_STATUS_SUCCESS; 277 278 CODEC_HW_FUNCTION_ENTER; 279 280 ENCODE_CHK_NULL_RETURN(statusReport); 281 ENCODE_CHK_NULL_RETURN(m_hwInterface); 282 283 MOS_RESOURCE *osResource = nullptr; 284 uint32_t offset = 0; 285 286 EncodeStatusReadParams params; 287 MOS_ZeroMemory(¶ms, sizeof(params)); 288 289 ENCODE_CHK_STATUS_RETURN(statusReport->GetAddress(encode::statusReportMfxBitstreamByteCountPerFrame, osResource, offset)); 290 params.resBitstreamByteCountPerFrame = osResource; 291 params.bitstreamByteCountPerFrameOffset = offset; 292 293 ENCODE_CHK_STATUS_RETURN(statusReport->GetAddress(encode::statusReportMfxBitstreamSyntaxElementOnlyBitCount, osResource, offset)); 294 params.resBitstreamSyntaxElementOnlyBitCount = osResource; 295 params.bitstreamSyntaxElementOnlyBitCountOffset = offset; 296 297 ENCODE_CHK_STATUS_RETURN(statusReport->GetAddress(encode::statusReportQPStatusCount, osResource, offset)); 298 params.resQpStatusCount = osResource; 299 params.qpStatusCountOffset = offset; 300 301 ENCODE_CHK_STATUS_RETURN(statusReport->GetAddress(encode::statusReportImageStatusMask, osResource, offset)); 302 params.resImageStatusMask = osResource; 303 params.imageStatusMaskOffset = offset; 304 305 ENCODE_CHK_STATUS_RETURN(statusReport->GetAddress(encode::statusReportImageStatusCtrl, osResource, offset)); 306 params.resImageStatusCtrl = osResource; 307 params.imageStatusCtrlOffset = offset; 308 309 ENCODE_CHK_STATUS_RETURN(m_hwInterface->ReadHcpStatus(vdboxIndex, params, &cmdBuffer)); 310 311 // Slice Size Conformance 312 if (m_basicFeature->m_hevcSeqParams->SliceSizeControl) 313 { 314 RUN_FEATURE_INTERFACE_RETURN(HevcEncodeDss, HevcFeatureIDs::hevcVdencDssFeature, ReadHcpStatus, vdboxIndex, cmdBuffer); 315 } 316 auto brcFeature = dynamic_cast<HEVCEncodeBRC *>(m_featureManager->GetFeature(HevcFeatureIDs::hevcBrcFeature)); 317 ENCODE_CHK_NULL_RETURN(brcFeature); 318 bool vdencHucUsed = brcFeature->IsVdencHucUsed(); 319 if (vdencHucUsed) 320 { 321 // Store PAK frameSize MMIO to PakInfo buffer 322 auto &miStoreRegMemParams = m_miItf->MHW_GETPAR_F(MI_STORE_REGISTER_MEM)(); 323 miStoreRegMemParams = {}; 324 miStoreRegMemParams.presStoreBuffer = m_basicFeature->m_recycleBuf->GetBuffer(PakInfo, 0); 325 miStoreRegMemParams.dwOffset = 0; 326 auto mmioRegisters = m_hcpItf->GetMmioRegisters(m_vdboxIndex); 327 ENCODE_CHK_NULL_RETURN(mmioRegisters); 328 miStoreRegMemParams.dwRegister = mmioRegisters->hcpEncBitstreamBytecountFrameRegOffset; 329 330 ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_STORE_REGISTER_MEM)(&cmdBuffer)); 331 } 332 ENCODE_CHK_STATUS_RETURN(m_hwInterface->ReadImageStatusForHcp(vdboxIndex, params, &cmdBuffer)); 333 return eStatus; 334 } 335 ReadBrcPakStatistics(PMOS_COMMAND_BUFFER cmdBuffer,EncodeReadBrcPakStatsParams * params)336 MOS_STATUS HevcPakIntegratePkt::ReadBrcPakStatistics( 337 PMOS_COMMAND_BUFFER cmdBuffer, 338 EncodeReadBrcPakStatsParams *params) 339 { 340 MOS_STATUS eStatus = MOS_STATUS_SUCCESS; 341 342 ENCODE_FUNC_CALL(); 343 344 ENCODE_CHK_NULL_RETURN(cmdBuffer); 345 ENCODE_CHK_NULL_RETURN(params); 346 ENCODE_CHK_NULL_RETURN(params->presBrcPakStatisticBuffer); 347 ENCODE_CHK_NULL_RETURN(params->presStatusBuffer); 348 349 ENCODE_CHK_STATUS_RETURN(ValidateVdboxIdx(m_vdboxIndex)); 350 351 auto mmioRegisters = m_hcpItf->GetMmioRegisters(m_vdboxIndex); 352 ENCODE_CHK_NULL_RETURN(mmioRegisters); 353 354 auto AddMiStoreRegisterMemCmd = [&](uint32_t offset, uint32_t hcpMmioRegister) { 355 auto &miStoreRegMemParams = m_miItf->MHW_GETPAR_F(MI_STORE_REGISTER_MEM)(); 356 miStoreRegMemParams = {}; 357 miStoreRegMemParams.presStoreBuffer = params->presBrcPakStatisticBuffer; 358 miStoreRegMemParams.dwOffset = offset; 359 miStoreRegMemParams.dwRegister = hcpMmioRegister; 360 ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_STORE_REGISTER_MEM)(cmdBuffer)); 361 return MOS_STATUS_SUCCESS; 362 }; 363 364 ENCODE_CHK_STATUS_RETURN(AddMiStoreRegisterMemCmd(CODECHAL_OFFSETOF(CODECHAL_ENCODE_HEVC_PAK_STATS_BUFFER, HCP_BITSTREAM_BYTECOUNT_FRAME), mmioRegisters->hcpEncBitstreamBytecountFrameRegOffset)); 365 ENCODE_CHK_STATUS_RETURN(AddMiStoreRegisterMemCmd(CODECHAL_OFFSETOF(CODECHAL_ENCODE_HEVC_PAK_STATS_BUFFER, HCP_BITSTREAM_BYTECOUNT_FRAME_NOHEADER), mmioRegisters->hcpEncBitstreamBytecountFrameNoHeaderRegOffset)); 366 ENCODE_CHK_STATUS_RETURN(AddMiStoreRegisterMemCmd(CODECHAL_OFFSETOF(CODECHAL_ENCODE_HEVC_PAK_STATS_BUFFER, HCP_IMAGE_STATUS_CONTROL), mmioRegisters->hcpEncImageStatusCtrlRegOffset)); 367 368 auto &storeDataParams = m_miItf->MHW_GETPAR_F(MI_STORE_DATA_IMM)(); 369 storeDataParams = {}; 370 storeDataParams.pOsResource = params->presStatusBuffer; 371 storeDataParams.dwResourceOffset = params->dwStatusBufNumPassesOffset; 372 storeDataParams.dwValue = params->ucPass; 373 ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_STORE_DATA_IMM)(cmdBuffer)); 374 375 return eStatus; 376 } 377 378 // Inline functions ValidateVdboxIdx(const MHW_VDBOX_NODE_IND & vdboxIndex)379 MOS_STATUS HevcPakIntegratePkt::ValidateVdboxIdx(const MHW_VDBOX_NODE_IND &vdboxIndex) 380 { 381 ENCODE_FUNC_CALL(); 382 383 MOS_STATUS eStatus = MOS_STATUS_SUCCESS; 384 if (vdboxIndex > m_hwInterface->GetMaxVdboxIndex()) 385 { 386 //ENCODE_ASSERTMESSAGE("ERROR - vdbox index exceed the maximum"); 387 eStatus = MOS_STATUS_INVALID_PARAMETER; 388 } 389 390 return eStatus; 391 } 392 CalculateCommandSize(uint32_t & commandBufferSize,uint32_t & requestedPatchListSize)393 MOS_STATUS HevcPakIntegratePkt::CalculateCommandSize(uint32_t &commandBufferSize, uint32_t &requestedPatchListSize) 394 { 395 ENCODE_FUNC_CALL(); 396 397 uint32_t hucCommandsSize = 0; 398 uint32_t hucPatchListSize = 0; 399 MHW_VDBOX_STATE_CMDSIZE_PARAMS stateCmdSizeParams; 400 401 stateCmdSizeParams.uNumStoreDataImm = 2; 402 stateCmdSizeParams.uNumStoreReg = 4; 403 stateCmdSizeParams.uNumMfxWait = 11; 404 stateCmdSizeParams.uNumMiCopy = 5; 405 stateCmdSizeParams.uNumMiFlush = 2; 406 stateCmdSizeParams.uNumVdPipelineFlush = 1; 407 stateCmdSizeParams.bPerformHucStreamOut = true; 408 ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetHucStateCommandSize( 409 m_basicFeature->m_mode, (uint32_t*)&hucCommandsSize, (uint32_t*)&hucPatchListSize, &stateCmdSizeParams)); 410 411 bool isTileReplayEnabled = false; 412 RUN_FEATURE_INTERFACE_RETURN(HevcEncodeTile, FeatureIDs::encodeTile, IsTileReplayEnabled, isTileReplayEnabled); 413 if (m_basicFeature->m_enableTileStitchByHW && (isTileReplayEnabled || m_pipeline->GetPipeNum() > 1)) 414 { 415 uint32_t maxSize = 0; 416 uint32_t patchListMaxSize = 0; 417 ENCODE_CHK_NULL_RETURN(m_hwInterface); 418 ENCODE_CHK_NULL_RETURN(m_hwInterface->GetCpInterface()); 419 MhwCpInterface *cpInterface = m_hwInterface->GetCpInterface(); 420 cpInterface->GetCpStateLevelCmdSize(maxSize, patchListMaxSize); 421 hucCommandsSize += maxSize; 422 hucPatchListSize += patchListMaxSize; 423 } 424 425 commandBufferSize = hucCommandsSize; 426 requestedPatchListSize = m_osInterface->bUsesPatchList ? hucPatchListSize : 0; 427 428 // reserve cmd size for hw stitch 429 commandBufferSize += m_hwStitchCmdSize; 430 431 // 4K align since allocation is in chunks of 4K bytes. 432 commandBufferSize = MOS_ALIGN_CEIL(commandBufferSize, CODECHAL_PAGE_SIZE); 433 434 return MOS_STATUS_SUCCESS; 435 } 436 SetDmemBuffer() const437 MOS_STATUS HevcPakIntegratePkt::SetDmemBuffer() const 438 { 439 ENCODE_FUNC_CALL(); 440 441 MOS_STATUS eStatus = MOS_STATUS_SUCCESS; 442 443 int32_t currentPass = m_pipeline->GetCurrentPass(); 444 if (currentPass < 0 || currentPass >= CODECHAL_VDENC_BRC_NUM_OF_PASSES) 445 { 446 eStatus = MOS_STATUS_INVALID_PARAMETER; 447 return eStatus; 448 } 449 450 HucPakIntegrateDmem *hucPakStitchDmem = 451 (HucPakIntegrateDmem *)m_allocator->LockResourceForWrite(m_resHucPakStitchDmemBuffer[m_pipeline->m_currRecycledBufIdx][currentPass]); 452 453 ENCODE_CHK_NULL_RETURN(hucPakStitchDmem); 454 MOS_ZeroMemory(hucPakStitchDmem, sizeof(HucPakIntegrateDmem)); 455 456 // Reset all the offsets to be shared in the huc dmem (6*5 DW's) 457 MOS_FillMemory(hucPakStitchDmem, 6 * (MAX_PAK_NUM + 1) * sizeof(uint32_t), 0xFF); 458 459 uint16_t numTileColumns = 1; 460 uint16_t numTileRows = 1; 461 RUN_FEATURE_INTERFACE_RETURN(HevcEncodeTile, HevcFeatureIDs::encodeTile, GetTileRowColumns, numTileRows, numTileColumns); 462 463 uint32_t numTiles = 1; 464 RUN_FEATURE_INTERFACE_RETURN(HevcEncodeTile, HevcFeatureIDs::encodeTile, GetTileNum, numTiles); 465 466 uint16_t numTilesPerPipe = (uint16_t)(numTiles / m_pipeline->GetPipeNum()); 467 468 auto feature = dynamic_cast<HEVCEncodeBRC *>(m_featureManager->GetFeature(HevcFeatureIDs::hevcBrcFeature)); 469 ENCODE_CHK_NULL_RETURN(feature); 470 471 hucPakStitchDmem->TotalSizeInCommandBuffer = numTiles * CODECHAL_CACHELINE_SIZE; 472 // Last tile length may get modified by HuC. Obtain last Tile Record, Add an offset of 8bytes to skip address field in Tile Record 473 hucPakStitchDmem->OffsetInCommandBuffer = (numTiles - 1) * CODECHAL_CACHELINE_SIZE + 8; 474 hucPakStitchDmem->PicWidthInPixel = (uint16_t)m_basicFeature->m_frameWidth; 475 hucPakStitchDmem->PicHeightInPixel = (uint16_t)m_basicFeature->m_frameHeight; 476 hucPakStitchDmem->TotalNumberOfPAKs = feature->IsBRCEnabled() ? m_pipeline->GetPipeNum() : 0; 477 hucPakStitchDmem->Codec = 2; // 1: HEVC DP; 2: HEVC VDEnc; 3: VP9 VDEnc 478 479 hucPakStitchDmem->MAXPass = feature->IsBRCEnabled() ? CODECHAL_VDENC_BRC_NUM_OF_PASSES : 1; 480 hucPakStitchDmem->CurrentPass = (uint8_t)currentPass + 1; // Current BRC pass [1..MAXPass] 481 hucPakStitchDmem->MinCUSize = m_basicFeature->m_hevcSeqParams->log2_min_coding_block_size_minus3 + 3; 482 hucPakStitchDmem->CabacZeroWordFlag = true; 483 hucPakStitchDmem->bitdepth_luma = m_basicFeature->m_hevcSeqParams->bit_depth_luma_minus8 + 8; // default: 8 484 hucPakStitchDmem->bitdepth_chroma = m_basicFeature->m_hevcSeqParams->bit_depth_chroma_minus8 + 8; // default: 8 485 hucPakStitchDmem->ChromaFormatIdc = m_basicFeature->m_hevcSeqParams->chroma_format_idc; 486 487 uint32_t lastTileIndex = numTiles - 1; 488 EncodeTileData tileData = {}; 489 RUN_FEATURE_INTERFACE_RETURN(HevcEncodeTile, HevcFeatureIDs::encodeTile, GetTileByIndex, tileData, lastTileIndex); 490 hucPakStitchDmem->LastTileBS_StartInBytes = (tileData.bitstreamByteOffset * CODECHAL_CACHELINE_SIZE) & (CODECHAL_PAGE_SIZE - 1); 491 492 hucPakStitchDmem->PIC_STATE_StartInBytes = (uint16_t)m_basicFeature->m_picStateCmdStartInBytes; 493 494 HevcTileStatusInfo hevcTileStatsOffset = {}; 495 HevcTileStatusInfo hevcFrameStatsOffset = {}; 496 HevcTileStatusInfo hevcStatsSize = {}; 497 RUN_FEATURE_INTERFACE_RETURN(HevcEncodeTile, HevcFeatureIDs::encodeTile, GetTileStatusInfo, hevcTileStatsOffset, hevcFrameStatsOffset, hevcStatsSize); 498 499 if (m_pipeline->GetPipeNum() > 1) 500 { 501 //Set the kernel output offsets 502 hucPakStitchDmem->HEVC_PAKSTAT_offset[0] = feature->IsBRCEnabled() ? hevcFrameStatsOffset.hevcPakStatistics : 0xFFFFFFFF; 503 hucPakStitchDmem->HEVC_Streamout_offset[0] = feature->IsBRCEnabled() ? hevcFrameStatsOffset.hevcSliceStreamout : 0xFFFFFFFF; 504 hucPakStitchDmem->TileSizeRecord_offset[0] = hevcFrameStatsOffset.tileSizeRecord; 505 hucPakStitchDmem->VDENCSTAT_offset[0] = feature->IsBRCEnabled() ? hevcFrameStatsOffset.vdencStatistics : 0xFFFFFFFF; 506 507 // Calculate number of slices that execute on a single pipe 508 for (auto tileRow = 0; tileRow < numTileRows; tileRow++) 509 { 510 for (auto tileCol = 0; tileCol < numTileColumns; tileCol++) 511 { 512 PCODEC_ENCODER_SLCDATA slcData = m_basicFeature->m_slcData; 513 uint16_t slcCount, idx, sliceNumInTile = 0; 514 515 idx = tileRow * numTileColumns + tileCol; 516 for (slcCount = 0; slcCount < m_basicFeature->m_numSlices; slcCount++) 517 { 518 bool lastSliceInTile = false, sliceInTile = false; 519 520 EncodeTileData curTileData = {}; 521 RUN_FEATURE_INTERFACE_RETURN(HevcEncodeTile, HevcFeatureIDs::encodeTile, GetTileByIndex, curTileData, idx); 522 RUN_FEATURE_INTERFACE_RETURN(HevcEncodeTile, HevcFeatureIDs::encodeTile, IsSliceInTile, slcCount, &curTileData, &sliceInTile, &lastSliceInTile); 523 524 if (!sliceInTile) 525 { 526 continue; 527 } 528 529 sliceNumInTile++; 530 } // end of slice 531 if (0 == sliceNumInTile) 532 { 533 // One tile must have at least one slice 534 ENCODE_ASSERT(false); 535 eStatus = MOS_STATUS_INVALID_PARAMETER; 536 break; 537 } 538 // Set the number of slices per pipe in the Dmem structure 539 hucPakStitchDmem->NumSlices[tileCol] += sliceNumInTile; 540 } 541 } 542 543 for (auto i = 0; i < m_pipeline->GetPipeNum(); i++) 544 { 545 hucPakStitchDmem->NumTiles[i] = numTilesPerPipe; 546 hucPakStitchDmem->NumSlices[i] = numTilesPerPipe; // Assuming 1 slice/ tile. To do: change this later. 547 548 // Statistics are dumped out at a tile level. Driver shares with kernel starting offset of each pipe statistic. 549 // Offset is calculated by adding size of statistics/pipe to the offset in combined statistics region. 550 hucPakStitchDmem->TileSizeRecord_offset[i + 1] = (i * numTilesPerPipe * hevcStatsSize.tileSizeRecord) + hevcTileStatsOffset.tileSizeRecord; 551 hucPakStitchDmem->HEVC_PAKSTAT_offset[i + 1] = (i * numTilesPerPipe * hevcStatsSize.hevcPakStatistics) + hevcTileStatsOffset.hevcPakStatistics; 552 hucPakStitchDmem->VDENCSTAT_offset[i + 1] = (i * numTilesPerPipe * hevcStatsSize.vdencStatistics) + hevcTileStatsOffset.vdencStatistics; 553 hucPakStitchDmem->HEVC_Streamout_offset[i + 1] = (i * hucPakStitchDmem->NumSlices[i] * CODECHAL_CACHELINE_SIZE) + hevcTileStatsOffset.hevcSliceStreamout; 554 } 555 } 556 else 557 { 558 hucPakStitchDmem->NumTiles[0] = (uint16_t)numTiles; 559 hucPakStitchDmem->TotalNumberOfPAKs = m_pipeline->GetPipeNum(); 560 561 // non-scalable mode, only VDEnc statistics need to be aggregated 562 hucPakStitchDmem->VDENCSTAT_offset[0] = hevcFrameStatsOffset.vdencStatistics; 563 hucPakStitchDmem->VDENCSTAT_offset[1] = hevcTileStatsOffset.vdencStatistics; 564 } 565 566 bool isTileReplayEnabled = false; 567 RUN_FEATURE_INTERFACE_RETURN(HevcEncodeTile, FeatureIDs::encodeTile, IsTileReplayEnabled, isTileReplayEnabled); 568 if (m_basicFeature->m_enableTileStitchByHW && (isTileReplayEnabled || m_pipeline->GetPipeNum() > 1)) 569 { 570 hucPakStitchDmem->StitchEnable = true; 571 hucPakStitchDmem->StitchCommandOffset = 0; 572 hucPakStitchDmem->BBEndforStitch = HUC_BATCH_BUFFER_END; 573 } 574 575 m_allocator->UnLock(m_resHucPakStitchDmemBuffer[m_pipeline->m_currRecycledBufIdx][currentPass]); 576 577 return eStatus; 578 } 579 ReadSseStatistics(MOS_COMMAND_BUFFER & cmdBuffer)580 MOS_STATUS HevcPakIntegratePkt::ReadSseStatistics(MOS_COMMAND_BUFFER &cmdBuffer) 581 { 582 // implement SSE 583 ENCODE_FUNC_CALL(); 584 585 PMOS_RESOURCE osResource = nullptr; 586 uint32_t offset = 0; 587 588 m_statusReport->GetAddress(statusReportSumSquareError, osResource, offset); 589 590 for (auto i = 0; i < 3; i++) // 64 bit SSE values for luma/ chroma channels need to be copied 591 { 592 auto &miCpyMemMemParams = m_miItf->MHW_GETPAR_F(MI_COPY_MEM_MEM)(); 593 miCpyMemMemParams = {}; 594 MOS_RESOURCE *resHuCPakAggregatedFrameStatsBuffer = nullptr; 595 RUN_FEATURE_INTERFACE_RETURN(HevcEncodeTile, HevcFeatureIDs::encodeTile, GetHucPakAggregatedFrameStatsBuffer, resHuCPakAggregatedFrameStatsBuffer); 596 ENCODE_CHK_NULL_RETURN(resHuCPakAggregatedFrameStatsBuffer); 597 bool tiles_enabled = false; 598 RUN_FEATURE_INTERFACE_RETURN(HevcEncodeTile, HevcFeatureIDs::encodeTile, IsEnabled, tiles_enabled); 599 miCpyMemMemParams.presSrc = tiles_enabled && (m_pipeline->GetPipeNum() > 1) ? resHuCPakAggregatedFrameStatsBuffer : m_basicFeature->m_recycleBuf->GetBuffer(FrameStatStreamOutBuffer, 0); 600 miCpyMemMemParams.dwSrcOffset = (m_basicFeature->m_hevcPakStatsSSEOffset + i) * sizeof(uint32_t); // SSE luma offset is located at DW32 in Frame statistics, followed by chroma 601 miCpyMemMemParams.presDst = osResource; 602 miCpyMemMemParams.dwDstOffset = offset + i * sizeof(uint32_t); 603 ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_COPY_MEM_MEM)(&cmdBuffer)); 604 } 605 606 return MOS_STATUS_SUCCESS; 607 } 608 ReadSliceSize(MOS_COMMAND_BUFFER & cmdBuffer)609 MOS_STATUS HevcPakIntegratePkt::ReadSliceSize(MOS_COMMAND_BUFFER &cmdBuffer) 610 { 611 MOS_STATUS eStatus = MOS_STATUS_SUCCESS; 612 613 ENCODE_FUNC_CALL(); 614 615 // Use FrameStats buffer if in single pipe mode. 616 if (m_pipeline->GetPipeNum() == 1) 617 { 618 return ReadSliceSizeForSinglePipe(cmdBuffer); 619 } 620 621 // In multi-tile multi-pipe mode, use PAK integration kernel output 622 // PAK integration kernel accumulates frame statistics across tiles, which should be used to setup slice size report 623 // Report slice size to app only when dynamic scaling is enabled 624 if (!m_basicFeature->m_hevcSeqParams->SliceSizeControl) 625 { 626 return eStatus; 627 } 628 629 RUN_FEATURE_INTERFACE_RETURN(HevcEncodeDss, HevcFeatureIDs::hevcVdencDssFeature, ReadSliceSize, m_pipeline, cmdBuffer); 630 return eStatus; 631 } 632 ReadSliceSizeForSinglePipe(MOS_COMMAND_BUFFER & cmdBuffer)633 MOS_STATUS HevcPakIntegratePkt::ReadSliceSizeForSinglePipe(MOS_COMMAND_BUFFER &cmdBuffer) 634 { 635 MOS_STATUS eStatus = MOS_STATUS_SUCCESS; 636 637 ENCODE_FUNC_CALL(); 638 639 // Report slice size to app only when dynamic slice is enabled 640 if (!m_basicFeature->m_hevcSeqParams->SliceSizeControl) 641 { 642 return eStatus; 643 } 644 RUN_FEATURE_INTERFACE_RETURN(HevcEncodeDss, HevcFeatureIDs::hevcVdencDssFeature, ReadSliceSizeForSinglePipe, m_pipeline, cmdBuffer); 645 646 return eStatus; 647 } 648 SetupTilesStatusData(void * mfxStatus,void * statusReport)649 MOS_STATUS HevcPakIntegratePkt::SetupTilesStatusData(void *mfxStatus, void *statusReport) 650 { 651 ENCODE_FUNC_CALL(); 652 653 ENCODE_CHK_NULL_RETURN(mfxStatus); 654 ENCODE_CHK_NULL_RETURN(statusReport); 655 ENCODE_CHK_NULL_RETURN(m_basicFeature); 656 657 EncodeStatusMfx * encodeStatusMfx = (EncodeStatusMfx *)mfxStatus; 658 EncodeStatusReportData *statusReportData = (EncodeStatusReportData *)statusReport; 659 660 uint32_t statBufIdx = statusReportData->currOriginalPic.FrameIdx; 661 const EncodeReportTileData *tileReportData = nullptr; 662 RUN_FEATURE_INTERFACE_RETURN(HevcEncodeTile, HevcFeatureIDs::encodeTile, GetReportTileData, statBufIdx, tileReportData); 663 if(tileReportData == nullptr) 664 { 665 // When Tile feature is not enabled, not need following complete options 666 ENCODE_NORMALMESSAGE("Free tileReportData for frames, which include only one tile."); 667 return MOS_STATUS_SUCCESS; 668 } 669 670 if (tileReportData[0].reportValid == false) 671 { 672 // Only multi-pipe contain tile report data. No tile report data needed for one-pipe. 673 return MOS_STATUS_SUCCESS; 674 } 675 676 statusReportData->codecStatus = CODECHAL_STATUS_SUCCESSFUL; 677 statusReportData->panicMode = false; 678 statusReportData->averageQP = 0; 679 statusReportData->qpY = 0; 680 statusReportData->suggestedQPYDelta = 0; 681 statusReportData->numberPasses = 1; 682 statusReportData->bitstreamSize = 0; 683 statusReportData->numberSlices = 0; 684 encodeStatusMfx->imageStatusCtrlOfLastBRCPass.hcpCumulativeFrameDeltaQP = 0; 685 686 // Allocate the tile size report memory 687 statusReportData->sizeOfTileInfoBuffer = statusReportData->numberTilesInFrame * sizeof(CodechalTileInfo); 688 if (statusReportData->hevcTileinfo) 689 { 690 MOS_FreeMemory(statusReportData->hevcTileinfo); 691 } 692 statusReportData->hevcTileinfo = (CodechalTileInfo *)MOS_AllocAndZeroMemory(statusReportData->sizeOfTileInfoBuffer); 693 ENCODE_CHK_NULL_RETURN(statusReportData->hevcTileinfo); 694 695 MOS_RESOURCE *tileSizeStatusBuffer = nullptr; 696 RUN_FEATURE_INTERFACE_RETURN(HevcEncodeTile, HevcFeatureIDs::encodeTile, GetTileRecordBuffer, statBufIdx, tileSizeStatusBuffer); 697 ENCODE_CHK_NULL_RETURN(tileSizeStatusBuffer); 698 699 MOS_LOCK_PARAMS lockFlags; 700 MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS)); 701 PakHwTileSizeRecord *tileStatusReport = 702 (PakHwTileSizeRecord *)m_allocator->Lock(tileSizeStatusBuffer, &lockFlags); 703 ENCODE_CHK_NULL_RETURN(tileStatusReport); 704 705 uint32_t *sliceSize = nullptr; 706 707 // pSliceSize is set/ allocated only when dynamic slice is enabled. Cannot use SSC flag here, as it is an asynchronous call 708 if (encodeStatusMfx->sliceReport.sliceSize) 709 { 710 sliceSize = (uint32_t *)m_osInterface->pfnLockResource(m_osInterface, encodeStatusMfx->sliceReport.sliceSize, &lockFlags); 711 ENCODE_CHK_NULL_RETURN(sliceSize); 712 } 713 encodeStatusMfx->imageStatusCtrlOfLastBRCPass.hcpCumulativeFrameDeltaQP = 0; 714 715 uint32_t totalCU = 0; 716 uint32_t sliceCount = 0; 717 double sumQp = 0.0; 718 for (uint32_t i = 0; i < statusReportData->numberTilesInFrame; i++) 719 { 720 if (tileStatusReport[i].Length == 0) 721 { 722 statusReportData->codecStatus = CODECHAL_STATUS_INCOMPLETE; 723 return MOS_STATUS_SUCCESS; 724 } 725 726 // Tile Replay currently shares same frame level status report as tile 727 728 statusReportData->hevcTileinfo[i].TileSizeInBytes = tileStatusReport[i].Length; 729 // The offset only valid if there is no stream stitching 730 statusReportData->hevcTileinfo[i].TileBitStreamOffset = tileReportData[i].bitstreamByteOffset * CODECHAL_CACHELINE_SIZE; 731 statusReportData->hevcTileinfo[i].TileRowNum = i / tileReportData[i].numTileColumns; 732 statusReportData->hevcTileinfo[i].TileColNum = i % tileReportData[i].numTileColumns; 733 statusReportData->numTileReported = i + 1; 734 statusReportData->bitstreamSize += tileStatusReport[i].Length; 735 totalCU += (tileReportData[i].tileHeightInMinCbMinus1 + 1) * (tileReportData[i].tileWidthInMinCbMinus1 + 1); 736 sumQp += tileStatusReport[i].Hcp_Qp_Status_Count; 737 738 //Add silce Size Control support in each tile 739 if (sliceSize) 740 { 741 statusReportData->sliceSizes = (uint16_t *)sliceSize; 742 statusReportData->numberSlices += (uint8_t)tileStatusReport[i].Hcp_Slice_Count_Tile; 743 uint16_t prevCumulativeSliceSize = 0; 744 // HW writes out a DW for each slice size. Copy in place the DW into 16bit fields expected by App 745 for (uint32_t idx = 0; idx < tileStatusReport[i].Hcp_Slice_Count_Tile; idx++) 746 { 747 // PAK output the sliceSize at 16DW intervals. 748 ENCODE_CHK_NULL_RETURN(&sliceSize[sliceCount * 16]); 749 750 //convert cummulative slice size to individual, first slice may have PPS/SPS, 751 uint32_t CurrAccumulatedSliceSize = sliceSize[sliceCount * 16]; 752 statusReportData->sliceSizes[sliceCount] = CurrAccumulatedSliceSize - prevCumulativeSliceSize; 753 prevCumulativeSliceSize += statusReportData->sliceSizes[sliceCount]; 754 sliceCount++; 755 } 756 } 757 } 758 759 if (sliceSize) 760 { 761 statusReportData->sizeOfSliceSizesBuffer = sizeof(uint16_t) * statusReportData->numberSlices; 762 statusReportData->sliceSizeOverflow = (encodeStatusMfx->sliceReport.sliceSizeOverflow >> 16) & 1; 763 m_osInterface->pfnUnlockResource(m_osInterface, encodeStatusMfx->sliceReport.sliceSize); 764 } 765 766 if (statusReportData->bitstreamSize == 0 || 767 statusReportData->bitstreamSize > m_basicFeature->m_bitstreamSize) 768 { 769 statusReportData->codecStatus = CODECHAL_STATUS_ERROR; 770 statusReportData->bitstreamSize = 0; 771 return MOS_STATUS_INVALID_FILE_SIZE; 772 } 773 774 if (totalCU != 0) 775 { 776 statusReportData->qpY = statusReportData->averageQP = 777 (uint8_t)((sumQp / (double)totalCU) / 4.0); // due to TU is 4x4 and there are 4 TUs in one CU 778 } 779 else 780 { 781 return MOS_STATUS_INVALID_PARAMETER; 782 } 783 784 if (!m_basicFeature->m_enableTileStitchByHW && m_pipeline->GetPipeNum() > 1) 785 { 786 ENCODE_CHK_STATUS_RETURN(PerformSwStitch(tileReportData, tileStatusReport, statusReportData)); 787 } 788 789 if (tileStatusReport) 790 { 791 // clean-up the tile status report buffer 792 MOS_ZeroMemory(tileStatusReport, sizeof(tileStatusReport[0]) * statusReportData->numberTilesInFrame); 793 m_allocator->UnLock(tileSizeStatusBuffer); 794 } 795 796 return MOS_STATUS_SUCCESS; 797 } 798 Completed(void * mfxStatus,void * rcsStatus,void * statusReport)799 MOS_STATUS HevcPakIntegratePkt::Completed(void *mfxStatus, void *rcsStatus, void *statusReport) 800 { 801 ENCODE_FUNC_CALL(); 802 803 ENCODE_CHK_NULL_RETURN(mfxStatus); 804 ENCODE_CHK_NULL_RETURN(statusReport); 805 ENCODE_CHK_NULL_RETURN(m_basicFeature); 806 807 EncodeStatusReportData *statusReportData = (EncodeStatusReportData *)statusReport; 808 809 if (statusReportData->numberTilesInFrame == 1) 810 { 811 // When Tile feature is not enabled, not need following complete options 812 return MOS_STATUS_SUCCESS; 813 } 814 815 ENCODE_CHK_STATUS_RETURN(EncodeHucPkt::Completed(mfxStatus, rcsStatus, statusReport)); 816 817 // Tile status data is only update and performed in multi-pipe mode 818 ENCODE_CHK_STATUS_RETURN(SetupTilesStatusData(mfxStatus, statusReport)); 819 820 m_basicFeature->Reset((CODEC_REF_LIST *)statusReportData->currRefList); 821 return MOS_STATUS_SUCCESS; 822 } 823 PerformSwStitch(const EncodeReportTileData * tileReportData,PakHwTileSizeRecord * tileStatusReport,EncodeStatusReportData * statusReportData)824 MOS_STATUS HevcPakIntegratePkt::PerformSwStitch( 825 const EncodeReportTileData *tileReportData, 826 PakHwTileSizeRecord * tileStatusReport, 827 EncodeStatusReportData * statusReportData) 828 { 829 ENCODE_FUNC_CALL(); 830 831 ENCODE_CHK_NULL_RETURN(tileReportData); 832 ENCODE_CHK_NULL_RETURN(tileStatusReport); 833 834 uint8_t *tempBsBuffer = nullptr, *bufPtr = nullptr; 835 tempBsBuffer = bufPtr = (uint8_t *)MOS_AllocAndZeroMemory(statusReportData->bitstreamSize); 836 ENCODE_CHK_NULL_RETURN(tempBsBuffer); 837 838 PCODEC_REF_LIST currRefList = (PCODEC_REF_LIST)statusReportData->currRefList; 839 840 MOS_LOCK_PARAMS lockFlags; 841 MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS)); 842 lockFlags.ReadOnly = 1; 843 uint8_t *bitstream = (uint8_t *)m_allocator->Lock( 844 &currRefList->resBitstreamBuffer, 845 &lockFlags); 846 if (bitstream == nullptr) 847 { 848 MOS_FreeMemory(tempBsBuffer); 849 ENCODE_CHK_NULL_RETURN(nullptr); 850 } 851 852 for (uint32_t i = 0; i < statusReportData->numberTilesInFrame; i++) 853 { 854 uint32_t offset = tileReportData[i].bitstreamByteOffset * CODECHAL_CACHELINE_SIZE; 855 uint32_t len = tileStatusReport[i].Length; 856 857 MOS_SecureMemcpy(bufPtr, len, &bitstream[offset], len); 858 bufPtr += len; 859 } 860 861 MOS_SecureMemcpy(bitstream, statusReportData->bitstreamSize, tempBsBuffer, statusReportData->bitstreamSize); 862 MOS_ZeroMemory(&bitstream[statusReportData->bitstreamSize], m_basicFeature->m_bitstreamSize - statusReportData->bitstreamSize); 863 864 if (bitstream) 865 { 866 m_allocator->UnLock(&currRefList->resBitstreamBuffer); 867 } 868 869 MOS_FreeMemory(tempBsBuffer); 870 871 return MOS_STATUS_SUCCESS; 872 } 873 PerformHwStitch(PMOS_COMMAND_BUFFER cmdBuffer)874 MOS_STATUS HevcPakIntegratePkt::PerformHwStitch( 875 PMOS_COMMAND_BUFFER cmdBuffer) 876 { 877 ENCODE_FUNC_CALL(); 878 879 // 2nd level BB buffer for stitching cmd 880 // Current location to add cmds in 2nd level batch buffer 881 m_HucStitchCmdBatchBuffer.iCurrent = 0; 882 // Reset starting location (offset) executing 2nd level batch buffer for each frame & each pass 883 m_HucStitchCmdBatchBuffer.dwOffset = 0; 884 ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_BATCH_BUFFER_START)(cmdBuffer, &m_HucStitchCmdBatchBuffer)); 885 // This wait cmd is needed to make sure copy command is done as suggested by HW folk in encode cases 886 auto &mfxWaitParams = m_miItf->MHW_GETPAR_F(MFX_WAIT)(); 887 mfxWaitParams = {}; 888 mfxWaitParams.iStallVdboxPipeline = m_osInterface->osCpInterface->IsCpEnabled() ? true : false; 889 ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MFX_WAIT)(cmdBuffer)); 890 891 return MOS_STATUS_SUCCESS; 892 } 893 ConfigStitchDataBuffer() const894 MOS_STATUS HevcPakIntegratePkt::ConfigStitchDataBuffer() const 895 { 896 ENCODE_FUNC_CALL(); 897 898 auto currPass = m_pipeline->GetCurrentPass(); 899 HucCommandData *hucStitchDataBuf = (HucCommandData*)m_allocator->LockResourceForWrite(const_cast<MOS_RESOURCE*>(&m_resHucStitchDataBuffer[m_pipeline->m_currRecycledBufIdx][currPass])); 900 ENCODE_CHK_NULL_RETURN(hucStitchDataBuf); 901 902 MOS_ZeroMemory(hucStitchDataBuf, sizeof(HucCommandData)); 903 hucStitchDataBuf->TotalCommands = 1; 904 hucStitchDataBuf->InputCOM[0].SizeOfData = 0xf; 905 906 uint16_t numTileColumns = 1; 907 uint16_t numTileRows = 1; 908 RUN_FEATURE_INTERFACE_RETURN(HevcEncodeTile, HevcFeatureIDs::encodeTile, GetTileRowColumns, numTileRows, numTileColumns); 909 910 HucInputCmd hucInputCmd; 911 MOS_ZeroMemory(&hucInputCmd, sizeof(HucInputCmd)); 912 913 ENCODE_CHK_NULL_RETURN(m_osInterface->osCpInterface); 914 hucInputCmd.SelectionForIndData = m_osInterface->osCpInterface->IsCpEnabled() ? 4 : 0; 915 hucInputCmd.CmdMode = HUC_CMD_LIST_MODE; 916 hucInputCmd.LengthOfTable = numTileRows * numTileColumns; 917 hucInputCmd.CopySize = m_hwInterface->m_tileRecordSize; 918 919 // Tile record always in m_tileRecordBuffer even in scalable node 920 uint32_t statBufIdx = m_basicFeature->m_currOriginalPic.FrameIdx; 921 MOS_RESOURCE *presSrc = nullptr; 922 923 RUN_FEATURE_INTERFACE_RETURN(HevcEncodeTile, HevcFeatureIDs::encodeTile, GetTileRecordBuffer, statBufIdx, presSrc); 924 ENCODE_CHK_NULL_RETURN(presSrc); 925 926 ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnRegisterResource( 927 m_osInterface, 928 presSrc, 929 false, 930 false)); 931 932 ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnRegisterResource( 933 m_osInterface, 934 &m_basicFeature->m_resBitstreamBuffer, 935 true, 936 true)); 937 938 uint64_t srcAddr = m_osInterface->pfnGetResourceGfxAddress(m_osInterface, presSrc); 939 uint64_t destrAddr = m_osInterface->pfnGetResourceGfxAddress(m_osInterface, &m_basicFeature->m_resBitstreamBuffer); 940 hucInputCmd.SrcAddrBottom = (uint32_t)(srcAddr & 0x00000000FFFFFFFF); 941 hucInputCmd.SrcAddrTop = (uint32_t)((srcAddr & 0xFFFFFFFF00000000) >> 32); 942 hucInputCmd.DestAddrBottom = (uint32_t)(destrAddr & 0x00000000FFFFFFFF); 943 hucInputCmd.DestAddrTop = (uint32_t)((destrAddr & 0xFFFFFFFF00000000) >> 32); 944 945 MOS_SecureMemcpy(hucStitchDataBuf->InputCOM[0].data, sizeof(HucInputCmd), &hucInputCmd, sizeof(HucInputCmd)); 946 947 m_allocator->UnLock(const_cast<MOS_RESOURCE*>(&m_resHucStitchDataBuffer[m_pipeline->m_currRecycledBufIdx][currPass])); 948 949 return MOS_STATUS_SUCCESS; 950 } 951 MHW_SETPAR_DECL_SRC(HUC_IMEM_STATE,HevcPakIntegratePkt)952 MHW_SETPAR_DECL_SRC(HUC_IMEM_STATE, HevcPakIntegratePkt) 953 { 954 params.kernelDescriptor = m_vdboxHucPakIntKernelDescriptor; 955 return MOS_STATUS_SUCCESS; 956 } 957 MHW_SETPAR_DECL_SRC(HUC_DMEM_STATE,HevcPakIntegratePkt)958 MHW_SETPAR_DECL_SRC(HUC_DMEM_STATE, HevcPakIntegratePkt) 959 { 960 params.function = PAK_INTEGRATE; 961 962 ENCODE_CHK_STATUS_RETURN(SetDmemBuffer()); 963 964 int32_t currentPass = m_pipeline->GetCurrentPass(); 965 params.hucDataSource = m_resHucPakStitchDmemBuffer[m_pipeline->m_currRecycledBufIdx][currentPass]; 966 params.dataLength = MOS_ALIGN_CEIL(sizeof(HucPakIntegrateDmem), CODECHAL_CACHELINE_SIZE); 967 params.dmemOffset = HUC_DMEM_OFFSET_RTOS_GEMS; 968 969 return MOS_STATUS_SUCCESS; 970 } 971 MHW_SETPAR_DECL_SRC(HUC_VIRTUAL_ADDR_STATE,HevcPakIntegratePkt)972 MHW_SETPAR_DECL_SRC(HUC_VIRTUAL_ADDR_STATE, HevcPakIntegratePkt) 973 { 974 params.function = PAK_INTEGRATE; 975 976 uint32_t statBufIdx = 0; 977 RUN_FEATURE_INTERFACE_RETURN(HevcEncodeTile, HevcFeatureIDs::encodeTile, GetStatisticsBufferIndex, statBufIdx); 978 979 MOS_RESOURCE *resTileBasedStatisticsBuffer = nullptr; 980 RUN_FEATURE_INTERFACE_RETURN(HevcEncodeTile, HevcFeatureIDs::encodeTile, GetTileBasedStatisticsBuffer, statBufIdx, resTileBasedStatisticsBuffer); 981 MOS_RESOURCE *resHuCPakAggregatedFrameStatsBuffer = nullptr; 982 RUN_FEATURE_INTERFACE_RETURN(HevcEncodeTile, HevcFeatureIDs::encodeTile, GetHucPakAggregatedFrameStatsBuffer, resHuCPakAggregatedFrameStatsBuffer); 983 MOS_RESOURCE *resTileRecordBuffer = nullptr; 984 RUN_FEATURE_INTERFACE_RETURN(HevcEncodeTile, HevcFeatureIDs::encodeTile, GetTileRecordBuffer, statBufIdx, resTileRecordBuffer); 985 uint32_t numTiles = 1; 986 RUN_FEATURE_INTERFACE_RETURN(HevcEncodeTile, HevcFeatureIDs::encodeTile, GetTileNum, numTiles); 987 uint32_t lastTileIndex = numTiles - 1; 988 EncodeTileData tileData = {}; 989 RUN_FEATURE_INTERFACE_RETURN(HevcEncodeTile, HevcFeatureIDs::encodeTile, GetTileByIndex, tileData, lastTileIndex); 990 991 auto brcFeature = dynamic_cast<HEVCEncodeBRC *>(m_featureManager->GetFeature(HevcFeatureIDs::hevcBrcFeature)); 992 ENCODE_CHK_NULL_RETURN(brcFeature); 993 auto vdenc2ndLevelBatchBuffer = brcFeature->GetVdenc2ndLevelBatchBuffer(m_pipeline->m_currRecycledBufIdx); 994 995 // Add Virtual addr 996 params.regionParams[0].presRegion = resTileBasedStatisticsBuffer; // Region 0 Input - Tile based input statistics from PAK/ VDEnc 997 params.regionParams[0].dwOffset = 0; 998 params.regionParams[1].presRegion = resHuCPakAggregatedFrameStatsBuffer; // Region 1 Output - HuC Frame statistics output 999 params.regionParams[1].isWritable = true; 1000 1001 params.regionParams[4].presRegion = &m_basicFeature->m_resBitstreamBuffer; // Region 4 Input - Last Tile bitstream 1002 params.regionParams[4].dwOffset = MOS_ALIGN_FLOOR(tileData.bitstreamByteOffset * CODECHAL_CACHELINE_SIZE, CODECHAL_PAGE_SIZE); 1003 params.regionParams[5].presRegion = &m_basicFeature->m_resBitstreamBuffer; // Region 5 Output - HuC modifies the last tile bitstream before stitch 1004 params.regionParams[5].dwOffset = MOS_ALIGN_FLOOR(tileData.bitstreamByteOffset * CODECHAL_CACHELINE_SIZE, CODECHAL_PAGE_SIZE); 1005 params.regionParams[5].isWritable = true; 1006 params.regionParams[6].presRegion = 1007 m_basicFeature->m_recycleBuf->GetBuffer(VdencBRCHistoryBuffer, m_basicFeature->m_frameNum); // Region 6 Output - History Buffer (Input/Output) 1008 params.regionParams[6].isWritable = true; 1009 params.regionParams[7].presRegion = &vdenc2ndLevelBatchBuffer->OsResource; // Region 7 Input- HCP PIC state command 1010 MOS_RESOURCE *resBrcDataBuffer = nullptr; 1011 RUN_FEATURE_INTERFACE_RETURN(HEVCEncodeBRC, HevcFeatureIDs::hevcBrcFeature, GetBrcDataBuffer, resBrcDataBuffer); 1012 params.regionParams[9].presRegion = resBrcDataBuffer; // Region 9 Output - HuC outputs BRC data 1013 params.regionParams[9].isWritable = true; 1014 1015 params.regionParams[15].presRegion = resTileRecordBuffer; 1016 params.regionParams[15].dwOffset = 0; 1017 1018 bool isTileReplayEnabled = false; 1019 RUN_FEATURE_INTERFACE_RETURN(HevcEncodeTile, FeatureIDs::encodeTile, IsTileReplayEnabled, isTileReplayEnabled); 1020 if (m_basicFeature->m_enableTileStitchByHW && (isTileReplayEnabled || m_pipeline->GetPipeNum() > 1)) 1021 { 1022 ENCODE_CHK_STATUS_RETURN(ConfigStitchDataBuffer()); 1023 1024 uint32_t currentPass = m_pipeline->GetCurrentPass(); 1025 params.regionParams[8].presRegion = const_cast<PMOS_RESOURCE>(&m_resHucStitchDataBuffer[m_pipeline->m_currRecycledBufIdx][currentPass]); // Region 8 - data buffer read by HUC for stitching cmd generation 1026 params.regionParams[10].presRegion = const_cast<PMOS_RESOURCE>(&m_HucStitchCmdBatchBuffer.OsResource); // Region 10 - SLB for stitching cmd output from Huc 1027 params.regionParams[10].isWritable = true; 1028 } 1029 1030 return MOS_STATUS_SUCCESS; 1031 } 1032 1033 #if USE_CODECHAL_DEBUG_TOOL DumpInput()1034 MOS_STATUS HevcPakIntegratePkt::DumpInput() 1035 { 1036 ENCODE_FUNC_CALL(); 1037 int32_t currentPass = m_pipeline->GetCurrentPass(); 1038 1039 CodechalDebugInterface *debugInterface = m_pipeline->GetDebugInterface(); 1040 ENCODE_CHK_NULL_RETURN(debugInterface); 1041 1042 ENCODE_CHK_STATUS_RETURN(debugInterface->DumpHucDmem( 1043 m_resHucPakStitchDmemBuffer[m_pipeline->m_currRecycledBufIdx][currentPass], 1044 m_vdencHucPakDmemBufferSize, 1045 currentPass, 1046 hucRegionDumpPakIntegrate)); 1047 1048 ENCODE_CHK_STATUS_RETURN(DumpRegion(0, "_TileBasedStatistic", true, hucRegionDumpPakIntegrate)); 1049 ENCODE_CHK_STATUS_RETURN(DumpRegion(4, "_Bitstream", true, hucRegionDumpPakIntegrate)); 1050 ENCODE_CHK_STATUS_RETURN(DumpRegion(7, "_HcpPicState", true, hucRegionDumpPakIntegrate)); 1051 ENCODE_CHK_STATUS_RETURN(DumpRegion(15, "_TileRecord", true, hucRegionDumpPakIntegrate)); 1052 1053 return MOS_STATUS_SUCCESS; 1054 } 1055 DumpOutput()1056 MOS_STATUS HevcPakIntegratePkt::DumpOutput() 1057 { 1058 ENCODE_FUNC_CALL(); 1059 1060 ENCODE_CHK_STATUS_RETURN(DumpRegion(1, "_HuCPakAggregatedFrameStats", false, hucRegionDumpPakIntegrate)); 1061 ENCODE_CHK_STATUS_RETURN(DumpRegion(5, "_Bitstream", false, hucRegionDumpPakIntegrate)); 1062 ENCODE_CHK_STATUS_RETURN(DumpRegion(6, "_BrcHistory", false, hucRegionDumpPakIntegrate)); 1063 ENCODE_CHK_STATUS_RETURN(DumpRegion(9, "_OutputBrcData", false, hucRegionDumpPakIntegrate)); 1064 ENCODE_CHK_STATUS_RETURN(DumpRegion(10, "_StitchCmd", false, hucRegionDumpPakIntegrate)); 1065 1066 return MOS_STATUS_SUCCESS; 1067 } 1068 #endif 1069 AddCondBBEndForLastPass(MOS_COMMAND_BUFFER & cmdBuffer)1070 MOS_STATUS HevcPakIntegratePkt::AddCondBBEndForLastPass(MOS_COMMAND_BUFFER &cmdBuffer) 1071 { 1072 ENCODE_FUNC_CALL(); 1073 1074 if (m_pipeline->IsSingleTaskPhaseSupported() || m_pipeline->IsFirstPass() || m_pipeline->GetPassNum() == 1) 1075 { 1076 return MOS_STATUS_SUCCESS; 1077 } 1078 1079 auto &miConditionalBatchBufferEndParams = m_miItf->MHW_GETPAR_F(MI_CONDITIONAL_BATCH_BUFFER_END)(); 1080 miConditionalBatchBufferEndParams = {}; 1081 1082 // VDENC uses HuC FW generated semaphore for conditional 2nd pass 1083 miConditionalBatchBufferEndParams.presSemaphoreBuffer = 1084 m_basicFeature->m_recycleBuf->GetBuffer(VdencBrcPakMmioBuffer, 0); 1085 1086 ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_CONDITIONAL_BATCH_BUFFER_END)(&cmdBuffer)); 1087 1088 auto mmioRegisters = m_hcpItf->GetMmioRegisters(m_vdboxIndex); 1089 MOS_RESOURCE *osResource = nullptr; 1090 uint32_t offset = 0; 1091 m_statusReport->GetAddress(statusReportImageStatusCtrl, osResource, offset); 1092 //uint32_t baseOffset = (m_encodeStatusBuf.wCurrIndex * m_encodeStatusBuf.dwReportSize) + sizeof(uint32_t) * 2; // encodeStatus is offset by 2 DWs in the resource 1093 1094 // Write back the HCP image control register for RC6 may clean it out 1095 auto ®isterMemParams = m_miItf->MHW_GETPAR_F(MI_LOAD_REGISTER_MEM)(); 1096 registerMemParams = {}; 1097 registerMemParams.presStoreBuffer = osResource; 1098 registerMemParams.dwOffset = offset; 1099 registerMemParams.dwRegister = mmioRegisters->hcpEncImageStatusCtrlRegOffset; 1100 ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_LOAD_REGISTER_MEM)(&cmdBuffer)); 1101 1102 HevcVdencBrcBuffers *vdencBrcBuffers = nullptr; 1103 auto feature = dynamic_cast<HEVCEncodeBRC *>(m_featureManager->GetFeature(HevcFeatureIDs::hevcBrcFeature)); 1104 ENCODE_CHK_NULL_RETURN(feature); 1105 vdencBrcBuffers = feature->GetHevcVdencBrcBuffers(); 1106 ENCODE_CHK_NULL_RETURN(vdencBrcBuffers); 1107 1108 auto &miStoreRegMemParams = m_miItf->MHW_GETPAR_F(MI_STORE_REGISTER_MEM)(); 1109 miStoreRegMemParams = {}; 1110 miStoreRegMemParams.presStoreBuffer = vdencBrcBuffers->resBrcPakStatisticBuffer[vdencBrcBuffers->currBrcPakStasIdxForWrite]; 1111 miStoreRegMemParams.dwOffset = CODECHAL_OFFSETOF(CODECHAL_ENCODE_HEVC_PAK_STATS_BUFFER, HCP_IMAGE_STATUS_CONTROL_FOR_LAST_PASS); 1112 miStoreRegMemParams.dwRegister = mmioRegisters->hcpEncImageStatusCtrlRegOffset; 1113 ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_STORE_REGISTER_MEM)(&cmdBuffer)); 1114 1115 m_statusReport->GetAddress(statusReportImageStatusCtrlOfLastBRCPass, osResource, offset); 1116 miStoreRegMemParams = {}; 1117 miStoreRegMemParams.presStoreBuffer = osResource; 1118 miStoreRegMemParams.dwOffset = offset; 1119 miStoreRegMemParams.dwRegister = mmioRegisters->hcpEncImageStatusCtrlRegOffset; 1120 ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_STORE_REGISTER_MEM)(&cmdBuffer)); 1121 1122 return MOS_STATUS_SUCCESS; 1123 } 1124 1125 } 1126