1 /* 2 * Copyright (c) 2020-2023, Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included 12 * in all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 */ 22 //! 23 //! \file encode_avc_vdenc_packet.cpp 24 //! \brief Defines the interface for avc encode vdenc packet 25 //! 26 27 #include "encode_avc_vdenc_packet.h" 28 #include "encode_avc_vdenc_weighted_prediction.h" 29 #include "encode_avc_vdenc_stream_in_feature.h" 30 #include "encode_avc_rounding.h" 31 #include "encode_avc_brc.h" 32 #include "encode_avc_vdenc_const_settings.h" 33 #include "media_avc_feature_defs.h" 34 #include "mos_solo_generic.h" 35 #include "encode_avc_header_packer.h" 36 #include "media_perf_profiler.h" 37 #include "mos_os_cp_interface_specific.h" 38 #include "hal_oca_interface_next.h" 39 40 #include "media_packet.h" 41 42 #define CODEC_AVC_MIN_BLOCK_HEIGHT 16 43 44 namespace encode { 45 AvcVdencPkt(MediaPipeline * pipeline,MediaTask * task,CodechalHwInterfaceNext * hwInterface)46 AvcVdencPkt::AvcVdencPkt( 47 MediaPipeline *pipeline, 48 MediaTask *task, 49 CodechalHwInterfaceNext *hwInterface) : 50 CmdPacket(task), 51 m_pipeline(dynamic_cast<AvcVdencPipeline *>(pipeline)), 52 m_hwInterface(dynamic_cast<CodechalHwInterfaceNext *>(hwInterface)) 53 { 54 ENCODE_CHK_NULL_NO_STATUS_RETURN(hwInterface); 55 ENCODE_CHK_NULL_NO_STATUS_RETURN(m_pipeline); 56 ENCODE_CHK_NULL_NO_STATUS_RETURN(m_hwInterface); 57 58 m_osInterface = hwInterface->GetOsInterface(); 59 m_statusReport = m_pipeline->GetStatusReportInstance(); 60 m_legacyFeatureManager = m_pipeline->GetFeatureManager(); 61 m_featureManager = m_pipeline->GetPacketLevelFeatureManager(AvcVdencPipeline::VdencPacket); 62 m_encodecp = m_pipeline->GetEncodeCp(); 63 m_vdencItf = std::static_pointer_cast<mhw::vdbox::vdenc::Itf>(m_hwInterface->GetVdencInterfaceNext()); 64 m_miItf = std::static_pointer_cast<mhw::mi::Itf>(m_hwInterface->GetMiInterfaceNext()); 65 m_mfxItf = std::static_pointer_cast<mhw::vdbox::mfx::Itf>(m_hwInterface->GetMfxInterfaceNext()); 66 } 67 ~AvcVdencPkt()68 AvcVdencPkt::~AvcVdencPkt() 69 { 70 FreeResources(); 71 } 72 FreeResources()73 MOS_STATUS AvcVdencPkt::FreeResources() 74 { 75 ENCODE_FUNC_CALL(); 76 77 if (m_vdencBrcImgStatAllocated) 78 { 79 for (uint8_t i = 0; i < CODECHAL_ENCODE_RECYCLED_BUFFER_NUM; i++) 80 { 81 ENCODE_CHK_STATUS_RETURN(Mhw_FreeBb(m_osInterface, &m_batchBufferForVdencImgStat[i], nullptr)); 82 } 83 } 84 85 return MOS_STATUS_SUCCESS; 86 } 87 Init()88 MOS_STATUS AvcVdencPkt::Init() 89 { 90 ENCODE_FUNC_CALL(); 91 ENCODE_CHK_NULL_RETURN(m_statusReport); 92 93 ENCODE_CHK_STATUS_RETURN(CmdPacket::Init()); 94 95 m_basicFeature = dynamic_cast<AvcBasicFeature *>(m_featureManager->GetFeature(FeatureIDs::basicFeature)); 96 ENCODE_CHK_NULL_RETURN(m_basicFeature); 97 98 #ifdef _MMC_SUPPORTED 99 m_mmcState = m_pipeline->GetMmcState(); 100 ENCODE_CHK_NULL_RETURN(m_mmcState); 101 m_basicFeature->m_mmcState = m_mmcState; 102 #endif 103 m_allocator = m_pipeline->GetEncodeAllocator(); 104 ENCODE_CHK_STATUS_RETURN(AllocateResources()); 105 106 ENCODE_CHK_STATUS_RETURN(m_statusReport->RegistObserver(this)); 107 108 m_usePatchList = m_osInterface->bUsesPatchList; 109 110 return MOS_STATUS_SUCCESS; 111 } 112 Prepare()113 MOS_STATUS AvcVdencPkt::Prepare() 114 { 115 ENCODE_FUNC_CALL(); 116 117 AvcVdencPipeline *pipeline = dynamic_cast<AvcVdencPipeline *>(m_pipeline); 118 ENCODE_CHK_NULL_RETURN(pipeline); 119 120 m_seqParam = m_basicFeature->m_seqParam; 121 m_picParam = m_basicFeature->m_picParam; 122 m_sliceParams = m_basicFeature->m_sliceParams; 123 124 ENCODE_CHK_STATUS_RETURN(ValidateVdboxIdx(m_vdboxIndex)); 125 ENCODE_CHK_STATUS_RETURN(SetRowstoreCachingOffsets()); 126 127 return MOS_STATUS_SUCCESS; 128 } 129 SetRowstoreCachingOffsets()130 MOS_STATUS AvcVdencPkt::SetRowstoreCachingOffsets() 131 { 132 ENCODE_CHK_NULL_RETURN(m_mfxItf); 133 // Get row store cache offset as all the needed information is got here 134 if (m_mfxItf->IsRowStoreCachingSupported()) 135 { 136 MHW_VDBOX_ROWSTORE_PARAMS rowstoreParams; 137 MOS_ZeroMemory(&rowstoreParams, sizeof(rowstoreParams)); 138 rowstoreParams.Mode = CODECHAL_ENCODE_MODE_AVC; 139 rowstoreParams.dwPicWidth = m_basicFeature->m_frameWidth; 140 rowstoreParams.bIsFrame = (m_seqParam->frame_mbs_only_flag == 1); 141 rowstoreParams.ucChromaFormat = m_basicFeature->m_chromaFormat; 142 ENCODE_CHK_STATUS_RETURN(m_hwInterface->SetRowstoreCachingOffsets(&rowstoreParams)); 143 144 if (m_vdencItf) 145 { 146 mhw::vdbox::vdenc::RowStorePar par = {}; 147 148 par.mode = mhw::vdbox::vdenc::RowStorePar::AVC; 149 par.isField = (m_seqParam->frame_mbs_only_flag != 1); 150 151 ENCODE_CHK_STATUS_RETURN(m_vdencItf->SetRowstoreCachingOffsets(par)); 152 } 153 if (m_mfxItf) 154 { 155 ENCODE_CHK_STATUS_RETURN(m_mfxItf->GetRowstoreCachingAddrs(&rowstoreParams)); 156 } 157 } 158 159 return MOS_STATUS_SUCCESS; 160 } 161 Destroy()162 MOS_STATUS AvcVdencPkt::Destroy() 163 { 164 ENCODE_FUNC_CALL(); 165 ENCODE_CHK_STATUS_RETURN(m_statusReport->UnregistObserver(this)); 166 167 return MOS_STATUS_SUCCESS; 168 } 169 Submit(MOS_COMMAND_BUFFER * commandBuffer,uint8_t packetPhase)170 MOS_STATUS AvcVdencPkt::Submit( 171 MOS_COMMAND_BUFFER* commandBuffer, 172 uint8_t packetPhase) 173 { 174 ENCODE_FUNC_CALL(); 175 176 MOS_COMMAND_BUFFER& cmdBuffer = *commandBuffer; 177 ENCODE_CHK_STATUS_RETURN(Mos_Solo_PreProcessEncode(m_osInterface, &m_basicFeature->m_resBitstreamBuffer, &m_basicFeature->m_reconSurface)); 178 179 // Ensure the input is ready to be read. 180 // Currently, mos RegisterResource has sync limitation for Raw resource. 181 // Temporaly, call Resource Wait to do the sync explicitly. 182 // TODO, Refine it when MOS refactor ready. 183 MOS_SYNC_PARAMS syncParams; 184 syncParams = g_cInitSyncParams; 185 syncParams.GpuContext = m_osInterface->pfnGetGpuContext(m_osInterface); 186 syncParams.presSyncResource = &m_basicFeature->m_rawSurface.OsResource; 187 syncParams.bReadOnly = true; 188 ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnResourceWait(m_osInterface, &syncParams)); 189 m_osInterface->pfnSetResourceSyncTag(m_osInterface, &syncParams); 190 191 ENCODE_CHK_STATUS_RETURN(PatchPictureLevelCommands(packetPhase, cmdBuffer)); 192 ENCODE_CHK_STATUS_RETURN(PatchSliceLevelCommands(cmdBuffer, packetPhase)); 193 194 ENCODE_CHK_STATUS_RETURN(Mos_Solo_PostProcessEncode(m_osInterface, &m_basicFeature->m_resBitstreamBuffer, &m_basicFeature->m_reconSurface)); 195 196 return MOS_STATUS_SUCCESS; 197 } 198 AllocateResources()199 MOS_STATUS AvcVdencPkt::AllocateResources() 200 { 201 ENCODE_FUNC_CALL(); 202 203 ENCODE_CHK_NULL_RETURN(m_allocator); 204 205 auto settings = static_cast<AvcVdencFeatureSettings *>(m_legacyFeatureManager->GetFeatureSettings()->GetConstSettings()); 206 ENCODE_CHK_NULL_RETURN(settings); 207 208 auto brcSettings = settings->brcSettings; 209 210 // initiate allocation parameters and lock flags 211 MOS_ALLOC_GFXRES_PARAMS allocParamsForBufferLinear; 212 MOS_ZeroMemory(&allocParamsForBufferLinear, sizeof(MOS_ALLOC_GFXRES_PARAMS)); 213 allocParamsForBufferLinear.Type = MOS_GFXRES_BUFFER; 214 allocParamsForBufferLinear.TileType = MOS_TILE_LINEAR; 215 allocParamsForBufferLinear.Format = Format_Buffer; 216 217 // PAK Slice Size Streamout Buffer 218 allocParamsForBufferLinear.dwBytes = MOS_ALIGN_CEIL(CODECHAL_ENCODE_SLICESIZE_BUF_SIZE, CODECHAL_PAGE_SIZE); 219 allocParamsForBufferLinear.pBufName = "PAK Slice Size Streamout Buffer"; 220 allocParamsForBufferLinear.ResUsageType = MOS_HW_RESOURCE_USAGE_ENCODE_INTERNAL_READ_WRITE_CACHE; 221 ENCODE_CHK_STATUS_RETURN(m_basicFeature->m_recycleBuf->RegisterResource(PakSliceSizeStreamOutBuffer, allocParamsForBufferLinear)); 222 223 // VDENC Intra Row Store Scratch buffer 224 // 1 cacheline per MB 225 allocParamsForBufferLinear.dwBytes = m_basicFeature->m_picWidthInMb * CODECHAL_CACHELINE_SIZE; 226 allocParamsForBufferLinear.pBufName = "VDENC Intra Row Store Scratch Buffer"; 227 allocParamsForBufferLinear.ResUsageType = MOS_HW_RESOURCE_USAGE_ENCODE_INTERNAL_READ; 228 m_vdencIntraRowStoreScratch = m_allocator->AllocateResource(allocParamsForBufferLinear, false); 229 230 // PAK Statistics buffer 231 allocParamsForBufferLinear.dwBytes = MOS_ALIGN_CEIL(brcSettings.vdencBrcPakStatsBufferSize, CODECHAL_PAGE_SIZE); 232 allocParamsForBufferLinear.pBufName = "VDENC BRC PAK Statistics Buffer"; 233 allocParamsForBufferLinear.ResUsageType = MOS_HW_RESOURCE_USAGE_ENCODE_INTERNAL_READ_WRITE_CACHE; 234 ENCODE_CHK_STATUS_RETURN(m_basicFeature->m_recycleBuf->RegisterResource(BrcPakStatisticBuffer, allocParamsForBufferLinear, 1)); 235 236 // Here allocate the buffer for MB+FrameLevel PAK statistics. 237 MOS_ALLOC_GFXRES_PARAMS allocParamsForStatisticBufferFull = allocParamsForBufferLinear; 238 uint32_t size = brcSettings.vdencBrcPakStatsBufferSize + m_basicFeature->m_picWidthInMb * m_basicFeature->m_picHeightInMb * 64; 239 allocParamsForStatisticBufferFull.dwBytes = MOS_ALIGN_CEIL(size, CODECHAL_PAGE_SIZE); 240 allocParamsForStatisticBufferFull.pBufName = "VDENC BRC PAK Statistics Buffer Full"; 241 allocParamsForStatisticBufferFull.ResUsageType = MOS_HW_RESOURCE_USAGE_ENCODE_INTERNAL_READ_WRITE_CACHE; 242 if (m_osInterface->osCpInterface == nullptr || !m_osInterface->osCpInterface->IsCpEnabled()) 243 { 244 allocParamsForStatisticBufferFull.dwMemType = MOS_MEMPOOL_SYSTEMMEMORY; 245 allocParamsForStatisticBufferFull.Flags.bCacheable = true; 246 } 247 ENCODE_CHK_STATUS_RETURN(m_basicFeature->m_recycleBuf->RegisterResource(BrcPakStatisticBufferFull, allocParamsForStatisticBufferFull)); 248 249 if (m_mfxItf->IsDeblockingFilterRowstoreCacheEnabled() == false) 250 { 251 // Deblocking Filter Row Store Scratch buffer 252 allocParamsForBufferLinear.dwBytes = m_basicFeature->m_picWidthInMb * 4 * CODECHAL_CACHELINE_SIZE; // 4 cachelines per MB 253 allocParamsForBufferLinear.pBufName = "Deblocking Filter Row Store Scratch Buffer"; 254 allocParamsForBufferLinear.ResUsageType = MOS_HW_RESOURCE_USAGE_ENCODE_INTERNAL_READ_WRITE_CACHE; 255 m_resDeblockingFilterRowStoreScratchBuffer = m_allocator->AllocateResource(allocParamsForBufferLinear, false); 256 } 257 258 if (m_mfxItf->IsIntraRowstoreCacheEnabled() == false) 259 { 260 // Intra Row Store Scratch buffer 261 // 1 cacheline per MB 262 allocParamsForBufferLinear.dwBytes = m_basicFeature->m_picWidthInMb * CODECHAL_CACHELINE_SIZE; 263 allocParamsForBufferLinear.pBufName = "Intra Row Store Scratch Buffer"; 264 allocParamsForBufferLinear.ResUsageType = MOS_HW_RESOURCE_USAGE_ENCODE_INTERNAL_READ_WRITE_CACHE; 265 m_intraRowStoreScratchBuffer = m_allocator->AllocateResource(allocParamsForBufferLinear, false); 266 } 267 268 if (m_mfxItf->IsBsdMpcRowstoreCacheEnabled() == false) 269 { 270 // MPC Row Store Scratch buffer 271 allocParamsForBufferLinear.dwBytes = m_basicFeature->m_picWidthInMb * 2 * 64; // 2 cachelines per MB 272 allocParamsForBufferLinear.pBufName = "MPC Row Store Scratch Buffer"; 273 allocParamsForBufferLinear.ResUsageType = MOS_HW_RESOURCE_USAGE_ENCODE_INTERNAL_READ_WRITE_CACHE; 274 m_resMPCRowStoreScratchBuffer = m_allocator->AllocateResource(allocParamsForBufferLinear, false); 275 } 276 277 auto brcFeature = dynamic_cast<AvcEncodeBRC*>(m_featureManager->GetFeature(AvcFeatureIDs::avcBrcFeature)); 278 ENCODE_CHK_NULL_RETURN(brcFeature); 279 280 // ToDo: we always go to BRC disabld case because do not know RCM here. Same to legacy implementation 281 // VDENC uses second level batch buffer for image state cmds 282 if (!brcFeature->IsVdencBrcEnabled()) 283 { 284 // CQP mode needs a set of buffers for concurrency between SFD and VDEnc 285 for (uint8_t i = 0; i < CODECHAL_ENCODE_RECYCLED_BUFFER_NUM; i++) 286 { 287 MOS_ZeroMemory( 288 &m_batchBufferForVdencImgStat[i], 289 sizeof(m_batchBufferForVdencImgStat[i])); 290 m_batchBufferForVdencImgStat[i].bSecondLevel = true; 291 ENCODE_CHK_STATUS_RETURN(Mhw_AllocateBb( 292 m_osInterface, 293 &m_batchBufferForVdencImgStat[i], 294 nullptr, 295 m_hwInterface->m_vdencBrcImgStateBufferSize)); 296 } 297 m_vdencBrcImgStatAllocated = true; 298 } 299 300 return MOS_STATUS_SUCCESS; 301 } 302 PatchPictureLevelCommands(const uint8_t & packetPhase,MOS_COMMAND_BUFFER & cmdBuffer)303 MOS_STATUS AvcVdencPkt::PatchPictureLevelCommands(const uint8_t &packetPhase, MOS_COMMAND_BUFFER &cmdBuffer) 304 { 305 ENCODE_FUNC_CALL(); 306 ENCODE_CHK_NULL_RETURN(m_basicFeature); 307 ENCODE_CHK_NULL_RETURN(m_basicFeature->m_trackedBuf); 308 ENCODE_CHK_NULL_RETURN(m_seqParam); 309 310 // Set flag bIsMdfLoad in remote gaming scenario to boost GPU frequency for low latency 311 cmdBuffer.Attributes.bFrequencyBoost = (m_seqParam->ScenarioInfo == ESCENARIO_REMOTEGAMING); 312 313 ENCODE_CHK_STATUS_RETURN(m_miItf->SetWatchdogTimerThreshold(m_basicFeature->m_frameWidth, m_basicFeature->m_frameHeight, true)); 314 315 SetPerfTag(m_pipeline->IsFirstPass() ? CODECHAL_ENCODE_PERFTAG_CALL_PAK_ENGINE : CODECHAL_ENCODE_PERFTAG_CALL_PAK_ENGINE_SECOND_PASS, 316 (uint16_t)m_basicFeature->m_mode, 317 m_basicFeature->m_pictureCodingType); 318 319 auto brcFeature = dynamic_cast<AvcEncodeBRC*>(m_featureManager->GetFeature(AvcFeatureIDs::avcBrcFeature)); 320 ENCODE_CHK_NULL_RETURN(brcFeature); 321 322 if (!m_pipeline->IsSingleTaskPhaseSupported() || (m_pipeline->IsFirstPass() && !brcFeature->IsVdencBrcEnabled())) 323 { 324 SETPAR_AND_ADDCMD(MI_FORCE_WAKEUP, m_miItf, &cmdBuffer); 325 326 // Send command buffer header at the beginning (OS dependent) 327 ENCODE_CHK_STATUS_RETURN(SendPrologCmds(cmdBuffer)); 328 } 329 330 if (brcFeature->IsVdencBrcEnabled()) 331 { 332 #if _SW_BRC 333 if (!brcFeature->m_swBrc) 334 { 335 #endif 336 // Insert conditional batch buffer end for HuC valid IMEM loaded check 337 m_pResource = brcFeature->GetHucStatus2Buffer(); 338 SETPAR_AND_ADDCMD(MI_CONDITIONAL_BATCH_BUFFER_END, m_miItf, &cmdBuffer); 339 #if _SW_BRC 340 } 341 #endif 342 } 343 344 if (m_pipeline->GetCurrentPass()) 345 { 346 if ((Mos_Solo_Extension((MOS_CONTEXT_HANDLE)m_osInterface->pOsContext) || m_osInterface->bInlineCodecStatusUpdate) 347 && brcFeature->IsVdencBrcEnabled()) 348 { 349 // increment dwStoreData conditionaly 350 ENCODE_CHK_STATUS_RETURN(MediaPacket::UpdateStatusReportNext(statusReportGlobalCount, &cmdBuffer)); 351 } 352 353 // Insert conditional batch buffer end 354 // VDENC uses HuC BRC FW generated semaphore for conditional 2nd pass 355 m_pResource = 356 m_basicFeature->m_recycleBuf->GetBuffer(VdencBrcPakMmioBuffer, 0); 357 SETPAR_AND_ADDCMD(MI_CONDITIONAL_BATCH_BUFFER_END, m_miItf, &cmdBuffer); 358 } 359 360 if (m_pipeline->IsFirstPipe()) 361 { 362 ENCODE_CHK_STATUS_RETURN(StartStatusReport(statusReportMfx, &cmdBuffer)); 363 } 364 365 SETPAR_AND_ADDCMD(VDENC_CONTROL_STATE, m_vdencItf, &cmdBuffer); 366 367 ENCODE_CHK_STATUS_RETURN(AddPictureMfxCommands(cmdBuffer)); 368 369 ENCODE_CHK_STATUS_RETURN(AddPictureVdencCommands(cmdBuffer)); 370 371 PMHW_BATCH_BUFFER secondLevelBatchBufferUsed = nullptr; 372 373 // VDENC CQP case 374 if (!brcFeature->IsVdencBrcEnabled()) 375 { 376 // VDENC case uses multiple buffers for concurrency between SFD and VDENC 377 secondLevelBatchBufferUsed = &(m_batchBufferForVdencImgStat[m_pipeline->m_currRecycledBufIdx]); 378 379 // CQP case, driver programs the 2nd Level BB 380 MOS_STATUS status = Mhw_LockBb(m_osInterface, secondLevelBatchBufferUsed); 381 if (status != MOS_STATUS_SUCCESS) 382 { 383 ENCODE_NORMALMESSAGE("ERROR - Recycled buffer index exceed the maximum"); 384 SETPAR_AND_ADDCMD(MI_BATCH_BUFFER_END, m_miItf, &cmdBuffer); 385 return status; 386 } 387 388 SETPAR_AND_ADDCMD(MFX_AVC_IMG_STATE, m_mfxItf, nullptr, secondLevelBatchBufferUsed); 389 SETPAR_AND_ADDCMD(VDENC_CMD3, m_vdencItf, nullptr, secondLevelBatchBufferUsed); 390 SETPAR_AND_ADDCMD(VDENC_AVC_IMG_STATE, m_vdencItf, nullptr, secondLevelBatchBufferUsed); 391 392 ENCODE_CHK_STATUS_RETURN(m_miItf->AddMiBatchBufferEnd(nullptr, secondLevelBatchBufferUsed)); 393 394 CODECHAL_DEBUG_TOOL( 395 ENCODE_CHK_STATUS_RETURN(PopulatePakParam( 396 nullptr, 397 secondLevelBatchBufferUsed)); 398 399 ENCODE_CHK_STATUS_RETURN(PopulateEncParam( 400 0, 401 nullptr)); 402 403 ENCODE_CHK_STATUS_RETURN(DumpEncodeImgStats(nullptr)); 404 ) 405 406 ENCODE_CHK_STATUS_RETURN(Mhw_UnlockBb(m_osInterface, secondLevelBatchBufferUsed, true)); 407 } 408 else 409 { 410 secondLevelBatchBufferUsed = brcFeature->GetBatchBufferForVdencImgStat(); 411 // current location to add cmds in 2nd level batch buffer 412 secondLevelBatchBufferUsed->iCurrent = 0; 413 // reset starting location (offset) executing 2nd level batch buffer for each frame & each pass 414 secondLevelBatchBufferUsed->dwOffset = 0; 415 } 416 417 ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_BATCH_BUFFER_START)(&cmdBuffer, secondLevelBatchBufferUsed)); 418 HalOcaInterfaceNext::OnSubLevelBBStart( 419 cmdBuffer, 420 m_osInterface->pOsContext, 421 &secondLevelBatchBufferUsed->OsResource, 422 secondLevelBatchBufferUsed->dwOffset, 423 false, 424 MOS_ALIGN_CEIL(m_hwInterface->m_vdencBrcImgStateBufferSize, CODECHAL_CACHELINE_SIZE)); 425 426 CODECHAL_DEBUG_TOOL 427 ( 428 CodechalDebugInterface *debugInterface = m_pipeline->GetDebugInterface(); 429 ENCODE_CHK_STATUS_RETURN(debugInterface->Dump2ndLvlBatch( 430 secondLevelBatchBufferUsed, 431 CODECHAL_MEDIA_STATE_ENC_NORMAL, 432 nullptr)); 433 ) 434 435 ENCODE_CHK_STATUS_RETURN(AddAllCmds_MFX_QM_STATE(&cmdBuffer)); 436 ENCODE_CHK_STATUS_RETURN(AddAllCmds_MFX_FQM_STATE(&cmdBuffer)); 437 438 if (m_basicFeature->m_pictureCodingType == B_TYPE) 439 { 440 SETPAR_AND_ADDCMD(MFX_AVC_DIRECTMODE_STATE, m_mfxItf, &cmdBuffer); 441 } 442 443 return MOS_STATUS_SUCCESS; 444 } 445 PatchSliceLevelCommands(MOS_COMMAND_BUFFER & cmdBuffer,uint8_t packetPhase)446 MOS_STATUS AvcVdencPkt::PatchSliceLevelCommands(MOS_COMMAND_BUFFER &cmdBuffer, uint8_t packetPhase) 447 { 448 ENCODE_FUNC_CALL(); 449 450 auto slcData = m_basicFeature->m_slcData; 451 452 // For use with the single task phase implementation 453 if (m_basicFeature->m_sliceStructCaps < CODECHAL_SLICE_STRUCT_ARBITRARYROWSLICE) 454 { 455 uint32_t numSlc = (m_basicFeature->m_frameFieldHeightInMb + m_basicFeature->m_sliceHeight - 1) / m_basicFeature->m_sliceHeight; 456 if (numSlc != m_basicFeature->m_numSlices) 457 { 458 return MOS_STATUS_INVALID_PARAMETER; 459 } 460 } 461 462 ENCODE_CHK_STATUS_RETURN(LockBatchBufferForPakSlices()); 463 464 CODECHAL_DEBUG_TOOL( 465 ENCODE_CHK_STATUS_RETURN(SetSliceStateCommonParams(m_basicFeature->sliceState))) 466 467 for (uint16_t slcCount = 0; slcCount < m_basicFeature->m_numSlices; slcCount++) 468 { 469 m_basicFeature->m_curNumSlices = slcCount; 470 if (m_pipeline->IsFirstPass()) 471 { 472 if (m_basicFeature->m_acceleratorHeaderPackingCaps) 473 { 474 slcData[slcCount].SliceOffset = m_basicFeature->m_bsBuffer.SliceOffset; 475 ENCODE_CHK_STATUS_RETURN(PackSliceHeader(slcCount)); 476 slcData[slcCount].BitSize = m_basicFeature->m_bsBuffer.BitSize; 477 } 478 if (m_basicFeature->m_sliceStructCaps != CODECHAL_SLICE_STRUCT_ARBITRARYMBSLICE) 479 { 480 slcData[slcCount].CmdOffset = slcCount * m_basicFeature->m_sliceHeight * m_basicFeature->m_picWidthInMb * 16 * 4; 481 } 482 else 483 { 484 slcData[slcCount].CmdOffset = m_sliceParams[slcCount].first_mb_in_slice * 16 * 4; 485 } 486 } 487 488 CODECHAL_DEBUG_TOOL( 489 ENCODE_CHK_STATUS_RETURN(SetSliceStateParams(m_basicFeature->sliceState, slcData, slcCount))) 490 491 ENCODE_CHK_STATUS_RETURN(SendSlice(&cmdBuffer)); 492 ENCODE_CHK_STATUS_RETURN(ReportSliceSizeMetaData(&cmdBuffer, slcCount)); 493 494 m_lastSlice = (slcCount == (m_basicFeature->m_numSlices) - 1); 495 SETPAR_AND_ADDCMD(VD_PIPELINE_FLUSH, m_vdencItf, &cmdBuffer); 496 497 // Do not send MI_FLUSH for last Super slice now 498 if (!m_lastSlice) 499 { 500 SETPAR_AND_ADDCMD(MI_FLUSH_DW, m_miItf, &cmdBuffer); 501 } 502 } 503 504 ENCODE_CHK_STATUS_RETURN(UnlockBatchBufferForPakSlices()); 505 506 // Insert end of sequence/stream if set 507 if (m_basicFeature->m_lastPicInStream || m_basicFeature->m_lastPicInSeq) 508 { 509 m_lastPic = true; 510 ENCODE_CHK_STATUS_RETURN(InsertSeqStreamEnd(cmdBuffer)); 511 m_lastPic = false; 512 } 513 514 ENCODE_CHK_STATUS_RETURN(EnsureAllCommandsExecuted(cmdBuffer)); 515 ENCODE_CHK_STATUS_RETURN(PrepareHWMetaData(&cmdBuffer)); 516 ENCODE_CHK_STATUS_RETURN(ReadMfcStatus(cmdBuffer)); 517 518 auto brcFeature = dynamic_cast<AvcEncodeBRC*>(m_featureManager->GetFeature(AvcFeatureIDs::avcBrcFeature)); 519 ENCODE_CHK_NULL_RETURN(brcFeature); 520 521 if (brcFeature->IsVdencBrcEnabled()) 522 { 523 ENCODE_CHK_STATUS_RETURN(StoreNumPasses(cmdBuffer)); 524 525 #if USE_CODECHAL_DEBUG_TOOL 526 uint32_t sizeInByte = 0; 527 bool isIframe = m_basicFeature->m_pictureCodingType == I_TYPE; 528 auto packetUtilities = m_pipeline->GetPacketUtilities(); 529 ENCODE_CHK_NULL_RETURN(packetUtilities); 530 EncodeStatusReadParams params; 531 MOS_ZeroMemory(¶ms, sizeof(params)); 532 RUN_FEATURE_INTERFACE_RETURN(AvcEncodeBRC, AvcFeatureIDs::avcBrcFeature, SetMfcStatusParams, params); 533 if (packetUtilities->GetFakeHeaderSettings(sizeInByte, isIframe)) 534 { 535 ENCODE_CHK_NULL_RETURN(m_basicFeature->m_recycleBuf); 536 ENCODE_CHK_STATUS_RETURN(packetUtilities->ModifyEncodedFrameSizeWithFakeHeaderSizeAVC( 537 &cmdBuffer, 538 sizeInByte, 539 params.resVdencBrcUpdateDmemBufferPtr, 540 0, 541 m_basicFeature->m_recycleBuf->GetBuffer(BrcPakStatisticBuffer, m_basicFeature->m_frameNum), 542 sizeof(uint32_t) * 4)); 543 } 544 #endif 545 } 546 547 if (m_picParam->StatusReportEnable.fields.FrameStats) 548 { 549 ENCODE_CHK_STATUS_RETURN(GetAvcVdencFrameLevelStatusExt(m_picParam->StatusReportFeedbackNumber, &cmdBuffer)); 550 } 551 552 ENCODE_CHK_STATUS_RETURN(EndStatusReport(statusReportMfx, &cmdBuffer)); 553 554 if (m_pipeline->IsLastPass() && m_pipeline->IsFirstPipe()) 555 { 556 // increment dwStoreData conditionaly 557 MediaPacket::UpdateStatusReportNext(statusReportGlobalCount, &cmdBuffer); 558 559 CODECHAL_DEBUG_TOOL(m_mmcState->UpdateUserFeatureKey(&(m_basicFeature->m_reconSurface))); 560 } 561 562 // Reset parameters for next PAK execution 563 if (m_pipeline->IsLastPass()) 564 { 565 UpdateParameters(); 566 } 567 568 return MOS_STATUS_SUCCESS; 569 } 570 ValidateVdboxIdx(const MHW_VDBOX_NODE_IND & vdboxIndex)571 MOS_STATUS AvcVdencPkt::ValidateVdboxIdx(const MHW_VDBOX_NODE_IND &vdboxIndex) 572 { 573 ENCODE_FUNC_CALL(); 574 575 MOS_STATUS eStatus = MOS_STATUS_SUCCESS; 576 if (vdboxIndex > m_mfxItf->GetMaxVdboxIndex()) 577 { 578 //ENCODE_ASSERTMESSAGE("ERROR - vdbox index exceed the maximum"); 579 eStatus = MOS_STATUS_INVALID_PARAMETER; 580 } 581 582 return eStatus; 583 } 584 PrepareHWMetaData(PMOS_COMMAND_BUFFER cmdBuffer)585 MOS_STATUS AvcVdencPkt::PrepareHWMetaData(PMOS_COMMAND_BUFFER cmdBuffer) 586 { 587 ENCODE_FUNC_CALL(); 588 589 PMOS_RESOURCE presMetadataBuffer = m_basicFeature->m_resMetadataBuffer; 590 MetaDataOffset resourceOffset = m_basicFeature->m_metaDataOffset; 591 if ((presMetadataBuffer == nullptr) || !m_pipeline->IsLastPass()) 592 { 593 return MOS_STATUS_SUCCESS; 594 } 595 596 m_pResource = presMetadataBuffer; 597 m_dwOffset = resourceOffset.dwEncodeErrorFlags; 598 m_dwValue = 0; 599 SETPAR_AND_ADDCMD(MI_STORE_DATA_IMM, m_miItf, cmdBuffer); 600 601 m_dwOffset = resourceOffset.dwWrittenSubregionsCount; 602 m_dwValue = m_basicFeature->m_numSlices; 603 SETPAR_AND_ADDCMD(MI_STORE_DATA_IMM, m_miItf, cmdBuffer); 604 605 ENCODE_CHK_COND_RETURN((m_vdboxIndex > m_mfxItf->GetMaxVdboxIndex()), "ERROR - vdbox index exceed the maximum"); 606 MmioRegistersMfx *mmioRegisters = SelectVdboxAndGetMmioRegister(m_vdboxIndex, cmdBuffer); 607 CODEC_HW_CHK_NULL_RETURN(mmioRegisters); 608 609 m_dwOffset = resourceOffset.dwEncodedBitstreamWrittenBytesCount; 610 m_dwValue = mmioRegisters->mfcBitstreamBytecountFrameRegOffset; 611 SETPAR_AND_ADDCMD(MI_STORE_REGISTER_MEM, m_miItf, cmdBuffer); 612 613 // Statistics 614 // Average QP 615 if (m_seqParam->RateControlMethod == RATECONTROL_CQP) 616 { 617 m_dwOffset = resourceOffset.dwEncodeStats + resourceOffset.dwAverageQP; 618 m_dwValue = m_picParam->QpY + m_sliceParams->slice_qp_delta; 619 SETPAR_AND_ADDCMD(MI_STORE_DATA_IMM, m_miItf, cmdBuffer); 620 } 621 else 622 { 623 ENCODE_NORMALMESSAGE("RC mode is temporarily not supported"); 624 } 625 626 // PAK frame status pointer 627 MOS_RESOURCE *pPakFrameStat = (m_basicFeature->m_perMBStreamOutEnable) ? 628 m_basicFeature->m_recycleBuf->GetBuffer(BrcPakStatisticBufferFull, m_basicFeature->m_frameNum) : 629 m_basicFeature->m_recycleBuf->GetBuffer(BrcPakStatisticBuffer, 0); 630 631 auto &miLoadRegImmParams = m_miItf->MHW_GETPAR_F(MI_LOAD_REGISTER_IMM)(); 632 auto &miLoadRegMemParams = m_miItf->MHW_GETPAR_F(MI_LOAD_REGISTER_MEM)(); 633 auto &miLoadRegRegParams = m_miItf->MHW_GETPAR_F(MI_LOAD_REGISTER_REG)(); 634 auto &flushDwParams = m_miItf->MHW_GETPAR_F(MI_FLUSH_DW)(); 635 636 // Intra/Inter/Skip statistics counted by number of MBs (not sub-blocks) 637 638 // Intra16x16 + Intra8x8 + Intra4x4 639 miLoadRegImmParams = {}; 640 miLoadRegImmParams.dwRegister = mmioRegisters->generalPurposeRegister4LoOffset; 641 miLoadRegImmParams.dwData = 0xFFFF0000; 642 ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_LOAD_REGISTER_IMM)(cmdBuffer)); 643 644 miLoadRegImmParams = {}; 645 miLoadRegImmParams.dwData = 0; 646 miLoadRegImmParams.dwRegister = mmioRegisters->generalPurposeRegister4HiOffset; 647 ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_LOAD_REGISTER_IMM)(cmdBuffer)); 648 649 // DW4 Intra16x16:Intra8x8 650 miLoadRegMemParams = {}; 651 miLoadRegMemParams.presStoreBuffer = pPakFrameStat; 652 miLoadRegMemParams.dwOffset = 4 * sizeof(uint32_t); 653 miLoadRegMemParams.dwRegister = mmioRegisters->generalPurposeRegister0LoOffset; 654 ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_LOAD_REGISTER_MEM)(cmdBuffer)); 655 656 miLoadRegImmParams = {}; 657 miLoadRegImmParams.dwRegister = mmioRegisters->generalPurposeRegister0HiOffset; 658 miLoadRegImmParams.dwData = 0; 659 ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_LOAD_REGISTER_IMM)(cmdBuffer)); 660 661 mhw::mi::MHW_MI_ALU_PARAMS aluParams[4 + 16 * 4]; 662 int aluCount; 663 664 auto Reg0OpReg4ToReg0 = [&](MHW_MI_ALU_OPCODE opCode) { 665 aluCount = 0; 666 // load SrcA, reg0 667 aluParams[aluCount].AluOpcode = MHW_MI_ALU_LOAD; 668 aluParams[aluCount].Operand1 = MHW_MI_ALU_SRCA; 669 aluParams[aluCount].Operand2 = MHW_MI_ALU_GPREG0; 670 ++aluCount; 671 // load SrcB, reg4 672 aluParams[aluCount].AluOpcode = MHW_MI_ALU_LOAD; 673 aluParams[aluCount].Operand1 = MHW_MI_ALU_SRCB; 674 aluParams[aluCount].Operand2 = MHW_MI_ALU_GPREG4; 675 ++aluCount; 676 // and SrcA, SrcB 677 aluParams[aluCount].AluOpcode = opCode; 678 ++aluCount; 679 680 // store reg0, accu 681 aluParams[aluCount].AluOpcode = MHW_MI_ALU_STORE; 682 aluParams[aluCount].Operand1 = MHW_MI_ALU_GPREG0; 683 aluParams[aluCount].Operand2 = MHW_MI_ALU_ACCU; 684 ++aluCount; 685 686 auto &miMathParams = m_miItf->MHW_GETPAR_F(MI_MATH)(); 687 miMathParams = {}; 688 miMathParams.dwNumAluParams = aluCount; 689 miMathParams.pAluPayload = aluParams; 690 ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_MATH)(cmdBuffer)); 691 692 return MOS_STATUS_SUCCESS; 693 }; 694 695 ENCODE_CHK_STATUS_RETURN(Reg0OpReg4ToReg0(MHW_MI_ALU_AND)); // reg0 0:0:intra16x16:0 696 697 // DW5 Intra4x4:Inter16x16 698 miLoadRegMemParams = {}; 699 miLoadRegMemParams.presStoreBuffer = pPakFrameStat; 700 miLoadRegMemParams.dwOffset = 5 * sizeof(uint32_t); 701 miLoadRegMemParams.dwRegister = mmioRegisters->generalPurposeRegister4LoOffset; 702 ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_LOAD_REGISTER_MEM)(cmdBuffer)); 703 704 miLoadRegImmParams = {}; 705 miLoadRegImmParams.dwRegister = mmioRegisters->generalPurposeRegister4HiOffset; 706 miLoadRegImmParams.dwData = 0; 707 ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_LOAD_REGISTER_IMM)(cmdBuffer)); // reg4 0:0:intra4x4:inter16x16(garb) 708 709 auto AddHighShortsOfReg0Reg4ToReg0 = [&]() { 710 aluCount = 0; 711 // load SrcA, reg0 712 aluParams[aluCount].AluOpcode = MHW_MI_ALU_LOAD; 713 aluParams[aluCount].Operand1 = MHW_MI_ALU_SRCA; 714 aluParams[aluCount].Operand2 = MHW_MI_ALU_GPREG0; 715 ++aluCount; 716 // load SrcB, reg4 717 aluParams[aluCount].AluOpcode = MHW_MI_ALU_LOAD; 718 aluParams[aluCount].Operand1 = MHW_MI_ALU_SRCB; 719 aluParams[aluCount].Operand2 = MHW_MI_ALU_GPREG4; 720 ++aluCount; 721 // add SrcA, SrcB 722 aluParams[aluCount].AluOpcode = MHW_MI_ALU_ADD; 723 ++aluCount; 724 725 // ACCU keeps now 0:0:reg0+reg4:0 726 727 // 16bit shift left 728 for (int i = 0; i < 16; ++i) 729 { 730 // store reg0, accu 731 aluParams[aluCount].AluOpcode = MHW_MI_ALU_STORE; 732 aluParams[aluCount].Operand1 = MHW_MI_ALU_GPREG0; 733 aluParams[aluCount].Operand2 = MHW_MI_ALU_ACCU; 734 ++aluCount; 735 // load SrcA, accu 736 aluParams[aluCount].AluOpcode = MHW_MI_ALU_LOAD; 737 aluParams[aluCount].Operand1 = MHW_MI_ALU_SRCA; 738 aluParams[aluCount].Operand2 = MHW_MI_ALU_GPREG0; 739 ++aluCount; 740 // load SrcB, accu 741 aluParams[aluCount].AluOpcode = MHW_MI_ALU_LOAD; 742 aluParams[aluCount].Operand1 = MHW_MI_ALU_SRCB; 743 aluParams[aluCount].Operand2 = MHW_MI_ALU_GPREG0; 744 ++aluCount; 745 // add SrcA, SrcB 746 aluParams[aluCount].AluOpcode = MHW_MI_ALU_ADD; 747 ++aluCount; 748 } 749 750 // store reg0, accu 751 aluParams[aluCount].AluOpcode = MHW_MI_ALU_STORE; 752 aluParams[aluCount].Operand1 = MHW_MI_ALU_GPREG0; 753 aluParams[aluCount].Operand2 = MHW_MI_ALU_ACCU; 754 ++aluCount; 755 756 auto &miMathParams = m_miItf->MHW_GETPAR_F(MI_MATH)(); 757 miMathParams = {}; 758 miMathParams.dwNumAluParams = aluCount; 759 miMathParams.pAluPayload = aluParams; 760 ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_MATH)(cmdBuffer)); 761 762 // move from reg0hi to reg0lo 763 miLoadRegRegParams = {}; 764 miLoadRegRegParams.dwSrcRegister = mmioRegisters->generalPurposeRegister0HiOffset; 765 miLoadRegRegParams.dwDstRegister = mmioRegisters->generalPurposeRegister0LoOffset; 766 ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_LOAD_REGISTER_REG)(cmdBuffer)); 767 768 miLoadRegImmParams = {}; 769 miLoadRegImmParams.dwData = 0; 770 miLoadRegImmParams.dwRegister = mmioRegisters->generalPurposeRegister0HiOffset; 771 ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_LOAD_REGISTER_IMM)(cmdBuffer)); 772 773 return MOS_STATUS_SUCCESS; 774 }; 775 776 ENCODE_CHK_STATUS_RETURN(AddHighShortsOfReg0Reg4ToReg0()); // reg0 0:0:(Intra4x4+Intra16x16).hi:(Intra4x4+Intra16x16).lo 777 778 // Temp store from reg0 to presMetadataBuffer 779 m_pResource = presMetadataBuffer; 780 m_dwOffset = resourceOffset.dwEncodeStats + resourceOffset.dwIntraCodingUnitsCount; 781 m_dwValue = mmioRegisters->generalPurposeRegister0LoOffset; 782 SETPAR_AND_ADDCMD(MI_STORE_REGISTER_MEM, m_miItf, cmdBuffer); 783 784 // DW4 Intra16x16:Intra8x8 785 miLoadRegMemParams = {}; 786 miLoadRegMemParams.presStoreBuffer = pPakFrameStat; 787 miLoadRegMemParams.dwOffset = 4 * sizeof(uint32_t); 788 miLoadRegMemParams.dwRegister = mmioRegisters->generalPurposeRegister0LoOffset; 789 ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_LOAD_REGISTER_MEM)(cmdBuffer)); 790 791 miLoadRegImmParams = {}; 792 miLoadRegImmParams.dwRegister = mmioRegisters->generalPurposeRegister4LoOffset; 793 miLoadRegImmParams.dwData = 0x0000FFFF; 794 ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_LOAD_REGISTER_IMM)(cmdBuffer)); 795 796 miLoadRegImmParams = {}; 797 miLoadRegImmParams.dwData = 0; 798 miLoadRegImmParams.dwRegister = mmioRegisters->generalPurposeRegister4HiOffset; 799 ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_LOAD_REGISTER_IMM)(cmdBuffer)); 800 801 ENCODE_CHK_STATUS_RETURN(Reg0OpReg4ToReg0(MHW_MI_ALU_AND)); // reg0 0:0:0:Intra8x8 802 803 flushDwParams = {}; 804 ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_FLUSH_DW)(cmdBuffer)); 805 806 miLoadRegMemParams = {}; 807 miLoadRegMemParams.presStoreBuffer = presMetadataBuffer; 808 miLoadRegMemParams.dwOffset = resourceOffset.dwEncodeStats + resourceOffset.dwIntraCodingUnitsCount; 809 miLoadRegMemParams.dwRegister = mmioRegisters->generalPurposeRegister4LoOffset; 810 ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_LOAD_REGISTER_MEM)(cmdBuffer)); 811 812 ENCODE_CHK_STATUS_RETURN(Reg0OpReg4ToReg0(MHW_MI_ALU_ADD)); 813 814 // Store from reg0 to presMetadataBuffer 815 m_pResource = presMetadataBuffer; 816 m_dwOffset = resourceOffset.dwEncodeStats + resourceOffset.dwIntraCodingUnitsCount; 817 m_dwValue = mmioRegisters->generalPurposeRegister0LoOffset; 818 SETPAR_AND_ADDCMD(MI_STORE_REGISTER_MEM, m_miItf, cmdBuffer); 819 820 // Inter16x16 + Inter16x8 + Inter8x16 + Inter8x8 821 miLoadRegImmParams = {}; 822 miLoadRegImmParams.dwRegister = mmioRegisters->generalPurposeRegister4LoOffset; 823 miLoadRegImmParams.dwData = 0xFFFF0000; 824 ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_LOAD_REGISTER_IMM)(cmdBuffer)); 825 826 miLoadRegImmParams = {}; 827 miLoadRegImmParams.dwData = 0; 828 miLoadRegImmParams.dwRegister = mmioRegisters->generalPurposeRegister4HiOffset; 829 ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_LOAD_REGISTER_IMM)(cmdBuffer)); 830 831 // DW6 Inter16x8:Inter8x16 832 miLoadRegMemParams = {}; 833 miLoadRegMemParams.presStoreBuffer = pPakFrameStat; 834 miLoadRegMemParams.dwOffset = 6 * sizeof(uint32_t); 835 miLoadRegMemParams.dwRegister = mmioRegisters->generalPurposeRegister0LoOffset; 836 ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_LOAD_REGISTER_MEM)(cmdBuffer)); 837 838 miLoadRegImmParams = {}; 839 miLoadRegImmParams.dwRegister = mmioRegisters->generalPurposeRegister0HiOffset; 840 miLoadRegImmParams.dwData = 0; 841 ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_LOAD_REGISTER_IMM)(cmdBuffer)); 842 843 ENCODE_CHK_STATUS_RETURN(Reg0OpReg4ToReg0(MHW_MI_ALU_AND)); // reg0 0:0:inter16x8:0 844 845 // DW7 Inter8x8:InterSkip16x16 846 miLoadRegMemParams = {}; 847 miLoadRegMemParams.presStoreBuffer = pPakFrameStat; 848 miLoadRegMemParams.dwOffset = 7 * sizeof(uint32_t); 849 miLoadRegMemParams.dwRegister = mmioRegisters->generalPurposeRegister4LoOffset; 850 ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_LOAD_REGISTER_MEM)(cmdBuffer)); 851 852 miLoadRegImmParams = {}; 853 miLoadRegImmParams.dwRegister = mmioRegisters->generalPurposeRegister4HiOffset; 854 miLoadRegImmParams.dwData = 0; 855 ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_LOAD_REGISTER_IMM)(cmdBuffer)); // reg4 0:0:inter8x8:0 856 857 ENCODE_CHK_STATUS_RETURN(AddHighShortsOfReg0Reg4ToReg0()); // reg0 0:0:(Inter16x8+Inter8x8).hi:(Inter16x8+Inter8x8).lo; 858 859 // Temp store from reg0 to presMetadataBuffer 860 m_pResource = presMetadataBuffer; 861 m_dwOffset = resourceOffset.dwEncodeStats + resourceOffset.dwInterCodingUnitsCount; 862 m_dwValue = mmioRegisters->generalPurposeRegister0LoOffset; 863 SETPAR_AND_ADDCMD(MI_STORE_REGISTER_MEM, m_miItf, cmdBuffer); 864 865 // DW6 Inter16x8:Inter8x16 866 miLoadRegMemParams = {}; 867 miLoadRegMemParams.presStoreBuffer = pPakFrameStat; 868 miLoadRegMemParams.dwOffset = 6 * sizeof(uint32_t); 869 miLoadRegMemParams.dwRegister = mmioRegisters->generalPurposeRegister0HiOffset; 870 ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_LOAD_REGISTER_MEM)(cmdBuffer)); 871 872 // DW5 Intra4x4 : Inter16x16 873 miLoadRegMemParams = {}; 874 miLoadRegMemParams.presStoreBuffer = pPakFrameStat; 875 miLoadRegMemParams.dwOffset = 5 * sizeof(uint32_t); 876 miLoadRegMemParams.dwRegister = mmioRegisters->generalPurposeRegister0LoOffset; 877 ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_LOAD_REGISTER_MEM)(cmdBuffer)); 878 879 miLoadRegImmParams = {}; 880 miLoadRegImmParams.dwRegister = mmioRegisters->generalPurposeRegister4LoOffset; 881 miLoadRegImmParams.dwData = 0x0000FFFF; 882 ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_LOAD_REGISTER_IMM)(cmdBuffer)); 883 884 miLoadRegImmParams = {}; 885 miLoadRegImmParams.dwRegister = mmioRegisters->generalPurposeRegister4HiOffset; 886 miLoadRegImmParams.dwData = 0x0000FFFF; 887 ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_LOAD_REGISTER_IMM)(cmdBuffer)); 888 889 ENCODE_CHK_STATUS_RETURN(Reg0OpReg4ToReg0(MHW_MI_ALU_AND)); // reg0 0:Inter8x16:0:Inter16x16 890 891 // move from reg0hi to reg4lo 892 miLoadRegRegParams = {}; 893 miLoadRegRegParams.dwSrcRegister = mmioRegisters->generalPurposeRegister0HiOffset; 894 miLoadRegRegParams.dwDstRegister = mmioRegisters->generalPurposeRegister4LoOffset; 895 ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_LOAD_REGISTER_REG)(cmdBuffer)); 896 897 ENCODE_CHK_STATUS_RETURN(Reg0OpReg4ToReg0(MHW_MI_ALU_ADD)); // reg0 0:0:(Inter8x16+Inter16x16).hi::(Inter8x16+Inter16x16).hi 898 899 flushDwParams = {}; 900 ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_FLUSH_DW)(cmdBuffer)); 901 902 miLoadRegMemParams = {}; 903 miLoadRegMemParams.presStoreBuffer = presMetadataBuffer; 904 miLoadRegMemParams.dwOffset = resourceOffset.dwEncodeStats + resourceOffset.dwInterCodingUnitsCount; 905 miLoadRegMemParams.dwRegister = mmioRegisters->generalPurposeRegister4LoOffset; 906 ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_LOAD_REGISTER_MEM)(cmdBuffer)); 907 908 ENCODE_CHK_STATUS_RETURN(Reg0OpReg4ToReg0(MHW_MI_ALU_ADD)); 909 910 // Store from reg0 to presMetadataBuffer 911 m_pResource = presMetadataBuffer; 912 m_dwOffset = resourceOffset.dwEncodeStats + resourceOffset.dwInterCodingUnitsCount; 913 m_dwValue = mmioRegisters->generalPurposeRegister0LoOffset; 914 SETPAR_AND_ADDCMD(MI_STORE_REGISTER_MEM, m_miItf, cmdBuffer); 915 916 // Inter skip 16x16 917 miLoadRegImmParams = {}; 918 miLoadRegImmParams.dwRegister = mmioRegisters->generalPurposeRegister4LoOffset; 919 miLoadRegImmParams.dwData = 0x0000FFFF; 920 ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_LOAD_REGISTER_IMM)(cmdBuffer)); 921 922 miLoadRegImmParams = {}; 923 miLoadRegImmParams.dwData = 0; 924 miLoadRegImmParams.dwRegister = mmioRegisters->generalPurposeRegister4HiOffset; 925 ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_LOAD_REGISTER_IMM)(cmdBuffer)); 926 927 // DW7 Inter8x8:InterSkip16x16 928 miLoadRegMemParams = {}; 929 miLoadRegMemParams.presStoreBuffer = pPakFrameStat; 930 miLoadRegMemParams.dwOffset = 7 * sizeof(uint32_t); 931 miLoadRegMemParams.dwRegister = mmioRegisters->generalPurposeRegister0LoOffset; 932 ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_LOAD_REGISTER_MEM)(cmdBuffer)); 933 934 ENCODE_CHK_STATUS_RETURN(Reg0OpReg4ToReg0(MHW_MI_ALU_AND)); 935 936 // Store from reg0 to presMetadataBuffer 937 m_pResource = presMetadataBuffer; 938 m_dwOffset = resourceOffset.dwEncodeStats + resourceOffset.dwSkipCodingUnitsCount; 939 m_dwValue = mmioRegisters->generalPurposeRegister0LoOffset; 940 SETPAR_AND_ADDCMD(MI_STORE_REGISTER_MEM, m_miItf, cmdBuffer); 941 942 // Average MV_X/MV_Y, report (0,0) as temp solution, later may need kernel involved 943 m_dwOffset = resourceOffset.dwEncodeStats + resourceOffset.dwAverageMotionEstimationXDirection; 944 m_dwValue = 0; 945 SETPAR_AND_ADDCMD(MI_STORE_DATA_IMM, m_miItf, cmdBuffer); 946 947 m_dwOffset = resourceOffset.dwEncodeStats + resourceOffset.dwAverageMotionEstimationYDirection; 948 m_dwValue = 0; 949 SETPAR_AND_ADDCMD(MI_STORE_DATA_IMM, m_miItf, cmdBuffer); 950 951 return MOS_STATUS_SUCCESS; 952 } 953 ReportSliceSizeMetaData(PMOS_COMMAND_BUFFER cmdBuffer,uint32_t slcCount)954 MOS_STATUS AvcVdencPkt::ReportSliceSizeMetaData( 955 PMOS_COMMAND_BUFFER cmdBuffer, 956 uint32_t slcCount) 957 { 958 ENCODE_FUNC_CALL(); 959 960 PMOS_RESOURCE presMetadataBuffer = m_basicFeature->m_resMetadataBuffer; 961 MetaDataOffset resourceOffset = m_basicFeature->m_metaDataOffset; 962 if ((presMetadataBuffer == nullptr) || !m_pipeline->IsLastPass()) 963 { 964 return MOS_STATUS_SUCCESS; 965 } 966 967 uint32_t subRegionSartOffset = resourceOffset.dwMetaDataSize + slcCount * resourceOffset.dwMetaDataSubRegionSize; 968 969 SETPAR_AND_ADDCMD(MI_FLUSH_DW, m_miItf, cmdBuffer); 970 971 m_pResource = presMetadataBuffer; 972 m_dwOffset = subRegionSartOffset + resourceOffset.dwbStartOffset; 973 m_dwValue = 0; 974 SETPAR_AND_ADDCMD(MI_STORE_DATA_IMM, m_miItf, cmdBuffer); 975 976 m_dwOffset = subRegionSartOffset + resourceOffset.dwbHeaderSize; 977 m_dwValue = m_basicFeature->m_slcData[slcCount].BitSize; 978 SETPAR_AND_ADDCMD(MI_STORE_DATA_IMM, m_miItf, cmdBuffer); 979 980 MmioRegistersMfx *mmioRegisters = SelectVdboxAndGetMmioRegister(m_vdboxIndex, cmdBuffer); 981 CODEC_HW_CHK_NULL_RETURN(mmioRegisters); 982 ENCODE_CHK_COND_RETURN((m_vdboxIndex > m_mfxItf->GetMaxVdboxIndex()), "ERROR - vdbox index exceed the maximum"); 983 m_pResource = presMetadataBuffer; 984 m_dwOffset = subRegionSartOffset + resourceOffset.dwbSize; 985 m_dwValue = mmioRegisters->mfcBitstreamBytecountSliceRegOffset; 986 SETPAR_AND_ADDCMD(MI_STORE_REGISTER_MEM, m_miItf, cmdBuffer); 987 988 return MOS_STATUS_SUCCESS; 989 } 990 SetSliceStateCommonParams(MHW_VDBOX_AVC_SLICE_STATE & sliceState)991 MOS_STATUS AvcVdencPkt::SetSliceStateCommonParams(MHW_VDBOX_AVC_SLICE_STATE &sliceState) 992 { 993 ENCODE_FUNC_CALL(); 994 995 MOS_ZeroMemory(&sliceState, sizeof(sliceState)); 996 sliceState.presDataBuffer = m_basicFeature->m_resMbCodeBuffer; 997 sliceState.pAvcPicIdx = m_basicFeature->m_ref->GetPicIndex(); 998 sliceState.pEncodeAvcSeqParams = m_seqParam; 999 sliceState.pEncodeAvcPicParams = m_picParam; 1000 sliceState.pBsBuffer = &(m_basicFeature->m_bsBuffer); 1001 sliceState.ppNalUnitParams = m_basicFeature->m_nalUnitParams; 1002 sliceState.bBrcEnabled = false; 1003 sliceState.bVdencInUse = true; 1004 sliceState.oneOnOneMapping = false; 1005 sliceState.bFirstPass = m_pipeline->IsFirstPass(); 1006 sliceState.bLastPass = m_pipeline->IsLastPass(); 1007 // Disable Panic mode when min/max QP control is on. kernel may disable it, but disable in driver also. 1008 sliceState.bRCPanicEnable = m_basicFeature->m_panicEnable && (!m_basicFeature->m_minMaxQpControlEnabled); 1009 sliceState.wFrameFieldHeightInMB = m_basicFeature->m_frameFieldHeightInMb; 1010 // App handles tail insertion for VDEnc dynamic slice in non-cp case 1011 sliceState.bVdencNoTailInsertion = m_basicFeature->m_vdencNoTailInsertion; 1012 sliceState.bAcceleratorHeaderPackingCaps = m_basicFeature->m_acceleratorHeaderPackingCaps; 1013 1014 uint32_t batchBufferForPakSlicesStartOffset = (uint32_t)m_batchBufferForPakSlices[m_pipeline->m_currRecycledBufIdx].iCurrent; 1015 if (m_useBatchBufferForPakSlices) 1016 { 1017 sliceState.pBatchBufferForPakSlices = &m_batchBufferForPakSlices[m_pipeline->m_currRecycledBufIdx]; 1018 sliceState.bSingleTaskPhaseSupported = true; 1019 sliceState.dwBatchBufferForPakSlicesStartOffset = batchBufferForPakSlicesStartOffset; 1020 } 1021 1022 return MOS_STATUS_SUCCESS; 1023 } 1024 SetSliceStateParams(MHW_VDBOX_AVC_SLICE_STATE & sliceState,PCODEC_ENCODER_SLCDATA slcData,uint16_t slcCount)1025 MOS_STATUS AvcVdencPkt::SetSliceStateParams( 1026 MHW_VDBOX_AVC_SLICE_STATE &sliceState, 1027 PCODEC_ENCODER_SLCDATA slcData, 1028 uint16_t slcCount) 1029 { 1030 ENCODE_FUNC_CALL(); 1031 1032 sliceState.pEncodeAvcSliceParams = &m_sliceParams[slcCount]; 1033 sliceState.dwDataBufferOffset = slcData[slcCount].CmdOffset; 1034 sliceState.dwOffset = slcData[slcCount].SliceOffset; 1035 sliceState.dwLength = slcData[slcCount].BitSize; 1036 sliceState.uiSkipEmulationCheckCount = slcData[slcCount].SkipEmulationByteCount; 1037 sliceState.dwSliceIndex = (uint32_t)slcCount; 1038 sliceState.bInsertBeforeSliceHeaders = (slcCount == 0); 1039 1040 RUN_FEATURE_INTERFACE_RETURN(AvcEncodeRounding, AvcFeatureIDs::avcRoundingFeature, SetRoundingParams, sliceState); 1041 1042 return MOS_STATUS_SUCCESS; 1043 } 1044 PackSliceHeader(uint16_t slcCount)1045 MOS_STATUS AvcVdencPkt::PackSliceHeader(uint16_t slcCount) 1046 { 1047 ENCODE_FUNC_CALL(); 1048 1049 CODECHAL_ENCODE_AVC_PACK_SLC_HEADER_PARAMS packSlcHeaderParams; 1050 packSlcHeaderParams.pBsBuffer = &(m_basicFeature->m_bsBuffer); 1051 packSlcHeaderParams.pPicParams = m_picParam; 1052 packSlcHeaderParams.pSeqParams = m_seqParam; 1053 packSlcHeaderParams.ppRefList = m_basicFeature->m_ref->GetRefList(); 1054 packSlcHeaderParams.CurrPic = m_basicFeature->m_currOriginalPic; 1055 packSlcHeaderParams.CurrReconPic = m_basicFeature->m_currReconstructedPic; 1056 packSlcHeaderParams.UserFlags = m_basicFeature->m_userFlags; 1057 packSlcHeaderParams.NalUnitType = m_basicFeature->m_nalUnitType; 1058 packSlcHeaderParams.wPictureCodingType = m_basicFeature->m_pictureCodingType; 1059 packSlcHeaderParams.bVdencEnabled = true; 1060 packSlcHeaderParams.pAvcSliceParams = &m_sliceParams[slcCount]; 1061 1062 ENCODE_CHK_STATUS_RETURN(AvcEncodeHeaderPacker::PackSliceHeader(&packSlcHeaderParams)); 1063 1064 return MOS_STATUS_SUCCESS; 1065 } 1066 InsertSeqStreamEnd(MOS_COMMAND_BUFFER & cmdBuffer)1067 MOS_STATUS AvcVdencPkt::InsertSeqStreamEnd(MOS_COMMAND_BUFFER &cmdBuffer) 1068 { 1069 ENCODE_FUNC_CALL(); 1070 1071 ENCODE_CHK_STATUS_RETURN(AddAllCmds_MFX_PAK_INSERT_OBJECT(&cmdBuffer)); 1072 1073 return MOS_STATUS_SUCCESS; 1074 } 1075 Completed(void * mfxStatus,void * rcsStatus,void * statusReport)1076 MOS_STATUS AvcVdencPkt::Completed(void *mfxStatus, void *rcsStatus, void *statusReport) 1077 { 1078 ENCODE_FUNC_CALL(); 1079 1080 ENCODE_CHK_NULL_RETURN(mfxStatus); 1081 ENCODE_CHK_NULL_RETURN(statusReport); 1082 ENCODE_CHK_NULL_RETURN(m_basicFeature); 1083 1084 EncodeStatusMfx * encodeStatusMfx = (EncodeStatusMfx *)mfxStatus; 1085 EncodeStatusReportData *statusReportData = (EncodeStatusReportData *)statusReport; 1086 1087 if (statusReportData->hwCtr) 1088 { 1089 m_encodecp->UpdateCpStatusReport(statusReport); 1090 } 1091 1092 statusReportData->codecStatus = CODECHAL_STATUS_SUCCESSFUL; 1093 statusReportData->numberPasses = (uint8_t)encodeStatusMfx->numberPasses; 1094 ENCODE_VERBOSEMESSAGE("statusReportData->numberPasses: %d\n", statusReportData->numberPasses); 1095 1096 ENCODE_CHK_STATUS_RETURN(ReportExtStatistics(*encodeStatusMfx, *statusReportData)); 1097 1098 CODECHAL_DEBUG_TOOL( 1099 ENCODE_CHK_STATUS_RETURN(DumpResources(encodeStatusMfx, statusReportData));); 1100 1101 m_basicFeature->Reset((CODEC_REF_LIST *)statusReportData->currRefList); 1102 1103 return MOS_STATUS_SUCCESS; 1104 } 1105 StartStatusReport(uint32_t srType,MOS_COMMAND_BUFFER * cmdBuffer)1106 MOS_STATUS AvcVdencPkt::StartStatusReport( 1107 uint32_t srType, 1108 MOS_COMMAND_BUFFER *cmdBuffer) 1109 { 1110 ENCODE_FUNC_CALL(); 1111 ENCODE_CHK_NULL_RETURN(cmdBuffer); 1112 1113 ENCODE_CHK_STATUS_RETURN(MediaPacket::StartStatusReportNext(srType, cmdBuffer)); 1114 m_encodecp->StartCpStatusReport(cmdBuffer); 1115 1116 MediaPerfProfiler *perfProfiler = MediaPerfProfiler::Instance(); 1117 ENCODE_CHK_NULL_RETURN(perfProfiler); 1118 ENCODE_CHK_STATUS_RETURN(perfProfiler->AddPerfCollectStartCmd( 1119 (void *)m_pipeline, m_osInterface, m_miItf, cmdBuffer)); 1120 1121 return MOS_STATUS_SUCCESS; 1122 } 1123 CalculateCommandSize(uint32_t & commandBufferSize,uint32_t & requestedPatchListSize)1124 MOS_STATUS AvcVdencPkt::CalculateCommandSize(uint32_t &commandBufferSize, uint32_t &requestedPatchListSize) 1125 { 1126 ENCODE_CHK_STATUS_RETURN(CalculateMfxCommandsSize()); 1127 ENCODE_CHK_STATUS_RETURN(CalculateVdencCommandsSize()); 1128 commandBufferSize = CalculateCommandBufferSize(); 1129 requestedPatchListSize = CalculatePatchListSize(); 1130 return MOS_STATUS_SUCCESS; 1131 } 1132 CalculateCommandBufferSize()1133 uint32_t AvcVdencPkt::CalculateCommandBufferSize() 1134 { 1135 ENCODE_FUNC_CALL(); 1136 uint32_t commandBufferSize = 0; 1137 1138 commandBufferSize = 1139 m_pictureStatesSize + 1140 m_basicFeature->m_extraPictureStatesSize + 1141 (m_sliceStatesSize * m_basicFeature->m_numSlices); 1142 1143 if (m_pipeline->IsSingleTaskPhaseSupported()) 1144 { 1145 commandBufferSize *= m_pipeline->GetPassNum(); 1146 } 1147 1148 // 4K align since allocation is in chunks of 4K bytes. 1149 commandBufferSize = MOS_ALIGN_CEIL(commandBufferSize, CODECHAL_PAGE_SIZE); 1150 1151 return commandBufferSize; 1152 } 1153 CalculatePatchListSize()1154 uint32_t AvcVdencPkt::CalculatePatchListSize() 1155 { 1156 ENCODE_FUNC_CALL(); 1157 uint32_t requestedPatchListSize = 0; 1158 if (m_usePatchList) 1159 { 1160 requestedPatchListSize = 1161 m_picturePatchListSize + 1162 (m_slicePatchListSize * m_basicFeature->m_numSlices); 1163 1164 if (m_pipeline->IsSingleTaskPhaseSupported()) 1165 { 1166 requestedPatchListSize *= m_pipeline->GetPassNum(); 1167 } 1168 } 1169 return requestedPatchListSize; 1170 } 1171 CalculateVdencCommandsSize()1172 MOS_STATUS AvcVdencPkt::CalculateVdencCommandsSize() 1173 { 1174 ENCODE_FUNC_CALL(); 1175 1176 MHW_VDBOX_STATE_CMDSIZE_PARAMS stateCmdSizeParams; 1177 1178 uint32_t hucCommandsSize = 0; 1179 uint32_t hucPatchListSize = 0; 1180 ENCODE_CHK_STATUS_RETURN(m_hwInterface->GetHucStateCommandSize( 1181 CODECHAL_ENCODE_MODE_AVC, (uint32_t *)&hucCommandsSize, (uint32_t *)&hucPatchListSize, &stateCmdSizeParams)); 1182 m_pictureStatesSize += hucCommandsSize; 1183 m_picturePatchListSize += hucPatchListSize; 1184 1185 // Picture Level Commands 1186 uint32_t vdencPictureStatesSize = 0; 1187 uint32_t vdencPicturePatchListSize = 0; 1188 vdencPictureStatesSize = 1189 m_vdencItf->MHW_GETSIZE_F(VDENC_PIPE_MODE_SELECT)() + 1190 m_vdencItf->MHW_GETSIZE_F(VDENC_SRC_SURFACE_STATE)() + 1191 m_vdencItf->MHW_GETSIZE_F(VDENC_REF_SURFACE_STATE)() + 1192 m_vdencItf->MHW_GETSIZE_F(VDENC_DS_REF_SURFACE_STATE)() + 1193 m_vdencItf->MHW_GETSIZE_F(VDENC_PIPE_BUF_ADDR_STATE)() + 1194 m_vdencItf->MHW_GETSIZE_F(VDENC_AVC_IMG_STATE)() + 1195 m_vdencItf->MHW_GETSIZE_F(VDENC_WALKER_STATE)() + 1196 m_vdencItf->MHW_GETSIZE_F(VD_PIPELINE_FLUSH)() + 1197 m_vdencItf->MHW_GETSIZE_F(VDENC_CMD3)() + 1198 m_vdencItf->MHW_GETSIZE_F(VDENC_AVC_SLICE_STATE)(); 1199 vdencPicturePatchListSize = mhw::vdbox::vdenc::Itf::VDENC_PIPE_BUF_ADDR_STATE_CMD_NUMBER_OF_ADDRESSES; 1200 1201 m_pictureStatesSize += vdencPictureStatesSize; 1202 m_picturePatchListSize += vdencPicturePatchListSize; 1203 1204 // Slice Level Commands 1205 uint32_t vdencSliceStatesSize = 0; 1206 uint32_t vdencSlicePatchListSize = 0; 1207 vdencSliceStatesSize = 1208 m_vdencItf->MHW_GETSIZE_F(VDENC_WEIGHTSOFFSETS_STATE)() + 1209 m_vdencItf->MHW_GETSIZE_F(VDENC_AVC_SLICE_STATE)() + 1210 m_vdencItf->MHW_GETSIZE_F(VDENC_WALKER_STATE)() + 1211 m_vdencItf->MHW_GETSIZE_F(VD_PIPELINE_FLUSH)(); 1212 vdencSlicePatchListSize = mhw::vdbox::vdenc::Itf::VDENC_PIPE_BUF_ADDR_STATE_CMD_NUMBER_OF_ADDRESSES; 1213 1214 m_sliceStatesSize += vdencSliceStatesSize; 1215 m_slicePatchListSize += vdencSlicePatchListSize; 1216 1217 #if USE_CODECHAL_DEBUG_TOOL 1218 // for ModifyEncodedFrameSizeWithFakeHeaderSize 1219 // total sum is 368 (108*2 + 152) 1220 auto packetUtilities = m_pipeline->GetPacketUtilities(); 1221 ENCODE_CHK_NULL_RETURN(packetUtilities); 1222 uint32_t sizeInByte = 0; 1223 bool isIframe = m_basicFeature->m_pictureCodingType == I_TYPE; 1224 if (packetUtilities->GetFakeHeaderSettings(sizeInByte, isIframe)) 1225 { 1226 m_pictureStatesSize += 1227 // 2x AddBufferWithIMMValue to change frame size 1228 ( 1229 m_miItf->MHW_GETSIZE_F(MI_FLUSH_DW)() + 1230 m_miItf->MHW_GETSIZE_F(MI_LOAD_REGISTER_MEM)() + 1231 m_miItf->MHW_GETSIZE_F(MI_LOAD_REGISTER_MEM)() * 3 + 1232 m_miItf->MHW_GETSIZE_F(MI_MATH)() + sizeof(mhw::mi::MHW_MI_ALU_PARAMS) * 4 + 1233 m_miItf->MHW_GETSIZE_F(MI_STORE_REGISTER_MEM)()) * 1234 2 + 1235 // SetBufferWithIMMValueU16 to change header size 1236 (m_miItf->MHW_GETSIZE_F(MI_FLUSH_DW)() + 1237 m_miItf->MHW_GETSIZE_F(MI_LOAD_REGISTER_MEM)() + 1238 m_miItf->MHW_GETSIZE_F(MI_LOAD_REGISTER_MEM)() * 5 + 1239 2 * (m_miItf->MHW_GETSIZE_F(MI_MATH)() + sizeof(mhw::mi::MHW_MI_ALU_PARAMS) * 4) + 1240 m_miItf->MHW_GETSIZE_F(MI_STORE_REGISTER_MEM)()); 1241 } 1242 #endif 1243 1244 return MOS_STATUS_SUCCESS; 1245 } 1246 CalculateMfxCommandsSize()1247 MOS_STATUS AvcVdencPkt::CalculateMfxCommandsSize() 1248 { 1249 ENCODE_FUNC_CALL(); 1250 1251 // PAK Slice Level Commands 1252 ENCODE_CHK_STATUS_RETURN(GetMfxPrimitiveCommandsDataSize(&m_pakSliceSize, &m_pakSlicePatchListSize, false)) 1253 1254 // Picture Level Commands 1255 ENCODE_CHK_STATUS_RETURN(GetMfxStateCommandsDataSize(&m_pictureStatesSize, &m_picturePatchListSize, false)) 1256 1257 // Slice Level Commands 1258 ENCODE_CHK_STATUS_RETURN(GetMfxPrimitiveCommandsDataSize(&m_sliceStatesSize, &m_slicePatchListSize, false)) 1259 1260 return MOS_STATUS_SUCCESS; 1261 } 1262 GetMfxPrimitiveCommandsDataSize(uint32_t * commandsSize,uint32_t * patchListSize,bool isModeSpecific)1263 MOS_STATUS AvcVdencPkt::GetMfxPrimitiveCommandsDataSize( 1264 uint32_t *commandsSize, 1265 uint32_t *patchListSize, 1266 bool isModeSpecific) 1267 { 1268 ENCODE_FUNC_CALL() 1269 uint32_t cpCmdsize = 0; 1270 uint32_t cpPatchListSize = 0; 1271 1272 if (m_mfxItf && m_miItf) 1273 { 1274 uint32_t maxSize = 0, patchListMaxSize = 0; 1275 // 1 PAK_INSERT_OBJECT inserted for every end of frame/stream with 1 DW payload 1276 maxSize = m_mfxItf->MHW_GETSIZE_F(MFX_PAK_INSERT_OBJECT)() + sizeof(uint32_t); 1277 patchListMaxSize = PATCH_LIST_COMMAND(mhw::vdbox::mfx::Itf::MFC_AVC_PAK_INSERT_OBJECT_CMD); 1278 1279 maxSize += 1280 (2 * m_mfxItf->MHW_GETSIZE_F(MFX_AVC_REF_IDX_STATE)()) + 1281 (2 * m_mfxItf->MHW_GETSIZE_F(MFX_AVC_WEIGHTOFFSET_STATE)()) + 1282 m_mfxItf->MHW_GETSIZE_F(MFX_AVC_SLICE_STATE)() + 1283 MHW_VDBOX_PAK_SLICE_HEADER_OVERFLOW_SIZE + // slice header payload 1284 (2 * m_mfxItf->MHW_GETSIZE_F(MFX_PAK_INSERT_OBJECT)()) + 1285 m_miItf->MHW_GETSIZE_F(MI_BATCH_BUFFER_START)() + 1286 m_miItf->MHW_GETSIZE_F(MI_FLUSH_DW)(); 1287 1288 patchListMaxSize += 1289 (2 * PATCH_LIST_COMMAND(mhw::vdbox::mfx::Itf::MFX_AVC_REF_IDX_STATE_CMD)) + 1290 (2 * PATCH_LIST_COMMAND(mhw::vdbox::mfx::Itf::MFX_AVC_WEIGHTOFFSET_STATE_CMD)) + 1291 PATCH_LIST_COMMAND(mhw::vdbox::mfx::Itf::MFX_AVC_SLICE_STATE_CMD) + 1292 (2 * PATCH_LIST_COMMAND(mhw::vdbox::mfx::Itf::MFC_AVC_PAK_INSERT_OBJECT_CMD)) + 1293 PATCH_LIST_COMMAND(mhw::vdbox::mfx::Itf::MI_BATCH_BUFFER_START_CMD) + 1294 PATCH_LIST_COMMAND(mhw::vdbox::huc::Itf::MI_FLUSH_DW_CMD); 1295 1296 *commandsSize = maxSize; 1297 *patchListSize = patchListMaxSize; 1298 1299 m_hwInterface->GetCpInterface()->GetCpSliceLevelCmdSize(cpCmdsize, cpPatchListSize); 1300 } 1301 1302 *commandsSize += (uint32_t)cpCmdsize; 1303 *patchListSize += (uint32_t)cpPatchListSize; 1304 return MOS_STATUS_SUCCESS; 1305 } 1306 GetMfxStateCommandsDataSize(uint32_t * commandsSize,uint32_t * patchListSize,bool isShortFormat)1307 MOS_STATUS AvcVdencPkt::GetMfxStateCommandsDataSize( 1308 uint32_t *commandsSize, 1309 uint32_t *patchListSize, 1310 bool isShortFormat) 1311 { 1312 ENCODE_FUNC_CALL() 1313 ENCODE_CHK_NULL_RETURN(commandsSize); 1314 ENCODE_CHK_NULL_RETURN(patchListSize); 1315 1316 uint32_t cpCmdsize = 0; 1317 uint32_t cpPatchListSize = 0; 1318 1319 if (m_mfxItf && m_miItf) 1320 { 1321 uint32_t maxSize = 1322 m_miItf->MHW_GETSIZE_F(MI_FLUSH_DW)() + 1323 m_mfxItf->MHW_GETSIZE_F(MFX_PIPE_MODE_SELECT)() + 1324 m_mfxItf->MHW_GETSIZE_F(MFX_SURFACE_STATE)() + 1325 m_mfxItf->MHW_GETSIZE_F(MFX_PIPE_BUF_ADDR_STATE)() + 1326 m_mfxItf->MHW_GETSIZE_F(MFX_IND_OBJ_BASE_ADDR_STATE)() + 1327 2 * m_miItf->MHW_GETSIZE_F(MI_STORE_DATA_IMM)() + 1328 2 * m_miItf->MHW_GETSIZE_F(MI_STORE_REGISTER_MEM)() + 1329 8 * m_miItf->MHW_GETSIZE_F(MI_LOAD_REGISTER_REG)(); 1330 1331 uint32_t patchListMaxSize = 1332 PATCH_LIST_COMMAND(mhw::vdbox::huc::Itf::MI_FLUSH_DW_CMD) + 1333 PATCH_LIST_COMMAND(mhw::vdbox::mfx::Itf::MFX_PIPE_MODE_SELECT_CMD) + 1334 PATCH_LIST_COMMAND(mhw::vdbox::mfx::Itf::MFX_SURFACE_STATE_CMD) + 1335 PATCH_LIST_COMMAND(mhw::vdbox::mfx::Itf::MFX_PIPE_BUF_ADDR_STATE_CMD) + 1336 PATCH_LIST_COMMAND(mhw::vdbox::mfx::Itf::MFX_IND_OBJ_BASE_ADDR_STATE_CMD) + 1337 (2 * PATCH_LIST_COMMAND(mhw::vdbox::huc::Itf::MI_STORE_DATA_IMM_CMD)) + 1338 (2 * PATCH_LIST_COMMAND(mhw::vdbox::huc::Itf::MI_STORE_REGISTER_MEM_CMD)); 1339 1340 maxSize += 1341 m_mfxItf->MHW_GETSIZE_F(MFX_BSP_BUF_BASE_ADDR_STATE)() + 1342 m_mfxItf->MHW_GETSIZE_F(MFD_AVC_PICID_STATE)() + 1343 m_mfxItf->MHW_GETSIZE_F(MFX_AVC_DIRECTMODE_STATE)() + 1344 m_mfxItf->MHW_GETSIZE_F(MFX_AVC_IMG_STATE)() + 1345 m_mfxItf->MHW_GETSIZE_F(MFX_QM_STATE)() * 4; // QM_State sent 4 times 1346 1347 patchListMaxSize += 1348 PATCH_LIST_COMMAND(mhw::vdbox::mfx::Itf::MFX_BSP_BUF_BASE_ADDR_STATE_CMD) + 1349 PATCH_LIST_COMMAND(mhw::vdbox::mfx::Itf::MFD_AVC_PICID_STATE_CMD) + 1350 PATCH_LIST_COMMAND(mhw::vdbox::mfx::Itf::MFX_AVC_DIRECTMODE_STATE_CMD) + 1351 PATCH_LIST_COMMAND(mhw::vdbox::mfx::Itf::MFX_AVC_IMG_STATE_CMD) + 1352 PATCH_LIST_COMMAND(mhw::vdbox::mfx::Itf::MFX_QM_STATE_CMD) * 4; 1353 1354 maxSize += 1355 m_miItf->MHW_GETSIZE_F(MI_CONDITIONAL_BATCH_BUFFER_END)() + 1356 m_miItf->MHW_GETSIZE_F(MI_FLUSH_DW)() * 3 + // 3 extra MI_FLUSH_DWs for encode 1357 m_mfxItf->MHW_GETSIZE_F(MFX_FQM_STATE)() * 4 + // FQM_State sent 4 times 1358 m_miItf->MHW_GETSIZE_F(MI_STORE_REGISTER_MEM)() * 8 + // 5 extra register queries for encode, 3 extra slice level commands for BrcPakStatistics 1359 m_miItf->MHW_GETSIZE_F(MI_STORE_DATA_IMM)() * 3 + // slice level commands for StatusReport, BrcPakStatistics 1360 MHW_VDBOX_PAK_BITSTREAM_OVERFLOW_SIZE + // accounting for the max DW payload for PAK_INSERT_OBJECT, for frame header payload 1361 m_mfxItf->MHW_GETSIZE_F(MFX_PAK_INSERT_OBJECT)() * 4; // for inserting AU, SPS, PSP, SEI headers before first slice header 1362 1363 patchListMaxSize += 1364 PATCH_LIST_COMMAND(mhw::vdbox::huc::Itf::MI_CONDITIONAL_BATCH_BUFFER_END_CMD) + 1365 PATCH_LIST_COMMAND(mhw::vdbox::huc::Itf::MI_FLUSH_DW_CMD) * 3 + // 3 extra MI_FLUSH_DWs for encode 1366 PATCH_LIST_COMMAND(mhw::vdbox::mfx::Itf::MFX_FQM_STATE_CMD) * 4 + // FQM_State sent 4 times 1367 PATCH_LIST_COMMAND(mhw::vdbox::huc::Itf::MI_STORE_REGISTER_MEM_CMD) * 8 + // 5 extra register queries for encode, 3 extra slice level commands for BrcPakStatistics 1368 PATCH_LIST_COMMAND(mhw::vdbox::huc::Itf::MI_STORE_DATA_IMM_CMD) * 3; // slice level commands for StatusReport, BrcPakStatistics 1369 PATCH_LIST_COMMAND(mhw::vdbox::mfx::Itf::MFC_AVC_PAK_INSERT_OBJECT_CMD) * 4; // for inserting AU, SPS, PSP, SEI headers before first slice header 1370 1371 *commandsSize = maxSize; 1372 *patchListSize = patchListMaxSize; 1373 1374 m_hwInterface->GetCpInterface()->GetCpStateLevelCmdSize(cpCmdsize, cpPatchListSize); 1375 } 1376 1377 *commandsSize += (uint32_t)cpCmdsize; 1378 *patchListSize += (uint32_t)cpPatchListSize; 1379 1380 return MOS_STATUS_SUCCESS; 1381 } 1382 SelectVdboxAndGetMmioRegister(MHW_VDBOX_NODE_IND index,PMOS_COMMAND_BUFFER pCmdBuffer)1383 MmioRegistersMfx * AvcVdencPkt::SelectVdboxAndGetMmioRegister( 1384 MHW_VDBOX_NODE_IND index, 1385 PMOS_COMMAND_BUFFER pCmdBuffer) 1386 { 1387 if (m_hwInterface->m_getVdboxNodeByUMD) 1388 { 1389 pCmdBuffer->iVdboxNodeIndex = m_osInterface->pfnGetVdboxNodeId(m_osInterface, pCmdBuffer); 1390 switch (pCmdBuffer->iVdboxNodeIndex) 1391 { 1392 case MOS_VDBOX_NODE_1: 1393 index = MHW_VDBOX_NODE_1; 1394 break; 1395 case MOS_VDBOX_NODE_2: 1396 index = MHW_VDBOX_NODE_2; 1397 break; 1398 case MOS_VDBOX_NODE_INVALID: 1399 // That's a legal case meaning that we were not assigned with per-bb index because 1400 // balancing algorithm can't work (forcedly diabled or miss kernel support). 1401 // If that's the case we just proceed with the further static context assignment. 1402 break; 1403 default: 1404 // That's the case when MHW and MOS enumerations mismatch. We again proceed with the 1405 // best effort (static context assignment, but provide debug note). 1406 MHW_ASSERTMESSAGE("MOS and MHW VDBOX enumerations mismatch! Adjust HW description!"); 1407 break; 1408 } 1409 } 1410 1411 if (m_vdencItf) 1412 { 1413 return m_vdencItf->GetMmioRegisters(index); 1414 } 1415 else 1416 { 1417 MHW_ASSERTMESSAGE("Get vdenc interface failed!"); 1418 return nullptr; 1419 } 1420 } 1421 SetPerfTag(uint16_t type,uint16_t mode,uint16_t picCodingType)1422 void AvcVdencPkt::SetPerfTag(uint16_t type, uint16_t mode, uint16_t picCodingType) 1423 { 1424 ENCODE_FUNC_CALL(); 1425 1426 PerfTagSetting perfTag; 1427 perfTag.Value = 0; 1428 perfTag.Mode = mode & CODECHAL_ENCODE_MODE_BIT_MASK; 1429 perfTag.CallType = type; 1430 perfTag.PictureCodingType = picCodingType > 3 ? 0 : picCodingType; 1431 m_osInterface->pfnSetPerfTag(m_osInterface, perfTag.Value); 1432 m_osInterface->pfnIncPerfBufferID(m_osInterface); 1433 } 1434 SendPrologCmds(MOS_COMMAND_BUFFER & cmdBuffer)1435 MOS_STATUS AvcVdencPkt::SendPrologCmds( 1436 MOS_COMMAND_BUFFER &cmdBuffer) 1437 { 1438 ENCODE_FUNC_CALL(); 1439 1440 auto packetUtilities = m_pipeline->GetPacketUtilities(); 1441 ENCODE_CHK_NULL_RETURN(packetUtilities); 1442 if (m_basicFeature->m_setMarkerEnabled) 1443 { 1444 PMOS_RESOURCE presSetMarker = m_osInterface->pfnGetMarkerResource(m_osInterface); 1445 ENCODE_CHK_STATUS_RETURN(packetUtilities->SendMarkerCommand(&cmdBuffer, presSetMarker)); 1446 } 1447 1448 #ifdef _MMC_SUPPORTED 1449 ENCODE_CHK_NULL_RETURN(m_mmcState); 1450 ENCODE_CHK_STATUS_RETURN(m_mmcState->SendPrologCmd(&cmdBuffer, false)); 1451 #endif 1452 1453 MHW_GENERIC_PROLOG_PARAMS genericPrologParams; 1454 MOS_ZeroMemory(&genericPrologParams, sizeof(genericPrologParams)); 1455 genericPrologParams.pOsInterface = m_osInterface; 1456 genericPrologParams.pvMiInterface = nullptr; 1457 genericPrologParams.bMmcEnabled = m_mmcState ? m_mmcState->IsMmcEnabled() : false; 1458 ENCODE_CHK_STATUS_RETURN(Mhw_SendGenericPrologCmdNext(&cmdBuffer, &genericPrologParams, m_miItf)); 1459 1460 // Send predication command 1461 if (m_basicFeature->m_predicationEnabled) 1462 { 1463 ENCODE_CHK_STATUS_RETURN(packetUtilities->SendPredicationCommand(&cmdBuffer)); 1464 } 1465 1466 return MOS_STATUS_SUCCESS; 1467 } 1468 AllocateBatchBufferForPakSlices(uint32_t numSlices,uint16_t numPakPasses,uint8_t currRecycledBufIdx)1469 MOS_STATUS AvcVdencPkt::AllocateBatchBufferForPakSlices( 1470 uint32_t numSlices, 1471 uint16_t numPakPasses, 1472 uint8_t currRecycledBufIdx) 1473 { 1474 ENCODE_FUNC_CALL(); 1475 1476 MOS_ZeroMemory( 1477 &m_batchBufferForPakSlices[currRecycledBufIdx], 1478 sizeof(MHW_BATCH_BUFFER)); 1479 1480 // Get the slice size 1481 uint32_t size = numPakPasses * numSlices * m_pakSliceSize; 1482 1483 m_batchBufferForPakSlices[currRecycledBufIdx].bSecondLevel = true; 1484 ENCODE_CHK_STATUS_RETURN(Mhw_AllocateBb( 1485 m_osInterface, 1486 &m_batchBufferForPakSlices[currRecycledBufIdx], 1487 nullptr, 1488 size)); 1489 1490 MOS_LOCK_PARAMS lockFlags; 1491 MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS)); 1492 lockFlags.WriteOnly = 1; 1493 uint8_t *data = (uint8_t *)m_osInterface->pfnLockResource( 1494 m_osInterface, 1495 &m_batchBufferForPakSlices[currRecycledBufIdx].OsResource, 1496 &lockFlags); 1497 1498 ENCODE_CHK_NULL_RETURN(data); 1499 1500 MOS_ZeroMemory(data, size); 1501 ENCODE_CHK_STATUS_RETURN(m_osInterface->pfnUnlockResource( 1502 m_osInterface, 1503 &m_batchBufferForPakSlices[currRecycledBufIdx].OsResource)); 1504 1505 return MOS_STATUS_SUCCESS; 1506 } 1507 ReleaseBatchBufferForPakSlices(uint8_t currRecycledBufIdx)1508 MOS_STATUS AvcVdencPkt::ReleaseBatchBufferForPakSlices( 1509 uint8_t currRecycledBufIdx) 1510 { 1511 ENCODE_FUNC_CALL(); 1512 1513 if (m_batchBufferForPakSlices[currRecycledBufIdx].iSize) 1514 { 1515 ENCODE_CHK_STATUS_RETURN(Mhw_FreeBb(m_osInterface, &m_batchBufferForPakSlices[currRecycledBufIdx], nullptr)); 1516 } 1517 1518 return MOS_STATUS_SUCCESS; 1519 } 1520 AddPictureMfxCommands(MOS_COMMAND_BUFFER & cmdBuffer)1521 MOS_STATUS AvcVdencPkt::AddPictureMfxCommands( 1522 MOS_COMMAND_BUFFER & cmdBuffer) 1523 { 1524 ENCODE_FUNC_CALL(); 1525 1526 //for gen 12, we need to add MFX wait for both KIN and VRT before and after MFX Pipemode select... 1527 SETPAR_AND_ADDCMD(MFX_WAIT, m_miItf, &cmdBuffer); 1528 SETPAR_AND_ADDCMD(MFX_PIPE_MODE_SELECT, m_mfxItf, &cmdBuffer); 1529 SETPAR_AND_ADDCMD(MFX_WAIT, m_miItf, &cmdBuffer); 1530 1531 ENCODE_CHK_STATUS_RETURN(AddAllCmds_MFX_SURFACE_STATE(&cmdBuffer)); 1532 1533 SETPAR_AND_ADDCMD(MFX_PIPE_BUF_ADDR_STATE, m_mfxItf, &cmdBuffer); 1534 SETPAR_AND_ADDCMD(MFX_IND_OBJ_BASE_ADDR_STATE, m_mfxItf, &cmdBuffer); 1535 SETPAR_AND_ADDCMD(MFX_BSP_BUF_BASE_ADDR_STATE, m_mfxItf, &cmdBuffer); 1536 1537 return MOS_STATUS_SUCCESS; 1538 } 1539 AddPictureVdencCommands(MOS_COMMAND_BUFFER & cmdBuffer)1540 MOS_STATUS AvcVdencPkt::AddPictureVdencCommands(MOS_COMMAND_BUFFER & cmdBuffer) 1541 { 1542 ENCODE_FUNC_CALL(); 1543 1544 SETPAR_AND_ADDCMD(VDENC_PIPE_MODE_SELECT, m_vdencItf, &cmdBuffer); 1545 SETPAR_AND_ADDCMD(VDENC_SRC_SURFACE_STATE, m_vdencItf, &cmdBuffer); 1546 SETPAR_AND_ADDCMD(VDENC_REF_SURFACE_STATE, m_vdencItf, &cmdBuffer); 1547 SETPAR_AND_ADDCMD(VDENC_DS_REF_SURFACE_STATE, m_vdencItf, &cmdBuffer); 1548 SETPAR_AND_ADDCMD(VDENC_PIPE_BUF_ADDR_STATE, m_vdencItf, &cmdBuffer); 1549 1550 return MOS_STATUS_SUCCESS; 1551 } 1552 SendSlice(PMOS_COMMAND_BUFFER cmdBuffer)1553 MOS_STATUS AvcVdencPkt::SendSlice(PMOS_COMMAND_BUFFER cmdBuffer) 1554 { 1555 ENCODE_FUNC_CALL(); 1556 1557 ENCODE_CHK_NULL_RETURN(cmdBuffer); 1558 1559 ENCODE_CHK_STATUS_RETURN(AddAllCmds_MFX_AVC_REF_IDX_STATE(cmdBuffer)); 1560 1561 ENCODE_CHK_STATUS_RETURN(AddAllCmds_MFX_AVC_WEIGHTOFFSET_STATE(cmdBuffer)); 1562 1563 auto brcFeature = dynamic_cast<AvcEncodeBRC *>(m_featureManager->GetFeature(AvcFeatureIDs::avcBrcFeature)); 1564 ENCODE_CHK_NULL_RETURN(brcFeature); 1565 1566 if (!brcFeature->IsVdencBrcEnabled()) 1567 { 1568 SETPAR_AND_ADDCMD(MFX_AVC_SLICE_STATE, m_mfxItf, cmdBuffer); 1569 SETPAR_AND_ADDCMD(VDENC_AVC_SLICE_STATE, m_vdencItf, cmdBuffer); 1570 } 1571 else 1572 { 1573 PMHW_BATCH_BUFFER secondLevelBatchBuffer = brcFeature->GetBatchBufferForVdencImgStat(); 1574 // current location to add cmds in 2nd level batch buffer 1575 secondLevelBatchBuffer->iCurrent = 0; 1576 // reset starting location (offset) executing 2nd level batch buffer for each frame & each pass 1577 // base part of 2nd lvl BB must be aligned for CODECHAL_CACHELINE_SIZE 1578 secondLevelBatchBuffer->dwOffset = MOS_ALIGN_CEIL(m_hwInterface->m_vdencBrcImgStateBufferSize, CODECHAL_CACHELINE_SIZE) + 1579 m_basicFeature->m_curNumSlices * brcFeature->GetVdencOneSliceStateSize(); 1580 ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_BATCH_BUFFER_START)(cmdBuffer, secondLevelBatchBuffer)); 1581 HalOcaInterfaceNext::OnSubLevelBBStart( 1582 *cmdBuffer, 1583 m_osInterface->pOsContext, 1584 &secondLevelBatchBuffer->OsResource, 1585 secondLevelBatchBuffer->dwOffset, 1586 false, 1587 brcFeature->GetVdencOneSliceStateSize()); 1588 } 1589 1590 ENCODE_CHK_STATUS_RETURN(AddAllCmds_MFX_PAK_INSERT_OBJECT(cmdBuffer)); 1591 1592 SETPAR_AND_ADDCMD(VDENC_WEIGHTSOFFSETS_STATE, m_vdencItf, cmdBuffer); 1593 SETPAR_AND_ADDCMD(VDENC_WALKER_STATE, m_vdencItf, cmdBuffer); 1594 1595 return MOS_STATUS_SUCCESS; 1596 } 1597 EndStatusReport(uint32_t srType,MOS_COMMAND_BUFFER * cmdBuffer)1598 MOS_STATUS AvcVdencPkt::EndStatusReport( 1599 uint32_t srType, 1600 MOS_COMMAND_BUFFER *cmdBuffer) 1601 { 1602 ENCODE_FUNC_CALL(); 1603 ENCODE_CHK_NULL_RETURN(cmdBuffer); 1604 1605 ENCODE_CHK_STATUS_RETURN(MediaPacket::EndStatusReportNext(srType, cmdBuffer)); 1606 1607 MediaPerfProfiler *perfProfiler = MediaPerfProfiler::Instance(); 1608 ENCODE_CHK_NULL_RETURN(perfProfiler); 1609 ENCODE_CHK_STATUS_RETURN(perfProfiler->AddPerfCollectEndCmd( 1610 (void *)m_pipeline, m_osInterface, m_miItf, cmdBuffer)); 1611 1612 return MOS_STATUS_SUCCESS; 1613 } 1614 UpdateParameters()1615 void AvcVdencPkt::UpdateParameters() 1616 { 1617 ENCODE_FUNC_CALL(); 1618 1619 if (!m_pipeline->IsSingleTaskPhaseSupported()) 1620 { 1621 m_osInterface->pfnResetPerfBufferID(m_osInterface); 1622 } 1623 1624 m_basicFeature->m_newPpsHeader = 0; 1625 m_basicFeature->m_newSeqHeader = 0; 1626 } 1627 EnsureAllCommandsExecuted(MOS_COMMAND_BUFFER & cmdBuffer)1628 MOS_STATUS AvcVdencPkt::EnsureAllCommandsExecuted(MOS_COMMAND_BUFFER &cmdBuffer) 1629 { 1630 ENCODE_FUNC_CALL(); 1631 1632 // Send MI_FLUSH command 1633 auto &flushDwParams = m_miItf->MHW_GETPAR_F(MI_FLUSH_DW)(); 1634 flushDwParams = {}; 1635 flushDwParams.bVideoPipelineCacheInvalidate = true; 1636 ENCODE_CHK_STATUS_RETURN(m_miItf->MHW_ADDCMD_F(MI_FLUSH_DW)(&cmdBuffer)); 1637 1638 return MOS_STATUS_SUCCESS; 1639 } 1640 ReadMfcStatus(MOS_COMMAND_BUFFER & cmdBuffer)1641 MOS_STATUS AvcVdencPkt::ReadMfcStatus(MOS_COMMAND_BUFFER &cmdBuffer) 1642 { 1643 ENCODE_FUNC_CALL(); 1644 1645 ENCODE_CHK_NULL_RETURN(m_hwInterface); 1646 1647 MOS_RESOURCE *osResource = nullptr; 1648 uint32_t offset = 0; 1649 1650 EncodeStatusReadParams params; 1651 MOS_ZeroMemory(¶ms, sizeof(params)); 1652 1653 ENCODE_CHK_STATUS_RETURN(m_statusReport->GetAddress(encode::statusReportMfxBitstreamByteCountPerFrame, osResource, offset)); 1654 params.resBitstreamByteCountPerFrame = osResource; 1655 params.bitstreamByteCountPerFrameOffset = offset; 1656 1657 ENCODE_CHK_STATUS_RETURN(m_statusReport->GetAddress(encode::statusReportMfxBitstreamSyntaxElementOnlyBitCount, osResource, offset)); 1658 params.resBitstreamSyntaxElementOnlyBitCount = osResource; 1659 params.bitstreamSyntaxElementOnlyBitCountOffset = offset; 1660 1661 ENCODE_CHK_STATUS_RETURN(m_statusReport->GetAddress(encode::statusReportQPStatusCount, osResource, offset)); 1662 params.resQpStatusCount = osResource; 1663 params.qpStatusCountOffset = offset; 1664 1665 ENCODE_CHK_STATUS_RETURN(m_statusReport->GetAddress(encode::statusReportImageStatusMask, osResource, offset)); 1666 params.resImageStatusMask = osResource; 1667 params.imageStatusMaskOffset = offset; 1668 1669 ENCODE_CHK_STATUS_RETURN(m_statusReport->GetAddress(encode::statusReportImageStatusCtrl, osResource, offset)); 1670 params.resImageStatusCtrl = osResource; 1671 params.imageStatusCtrlOffset = offset; 1672 1673 ENCODE_CHK_STATUS_RETURN(m_statusReport->GetAddress(encode::statusReportNumSlices, osResource, offset)); 1674 params.resNumSlices = osResource; 1675 params.numSlicesOffset = offset; 1676 1677 RUN_FEATURE_INTERFACE_RETURN(AvcEncodeBRC, AvcFeatureIDs::avcBrcFeature, SetMfcStatusParams, params); 1678 1679 ENCODE_CHK_COND_RETURN((m_vdboxIndex > m_mfxItf->GetMaxVdboxIndex()), "ERROR - vdbox index exceed the maximum"); 1680 1681 SETPAR_AND_ADDCMD(MI_FLUSH_DW, m_miItf, &cmdBuffer); 1682 1683 MmioRegistersMfx *mmioRegisters = SelectVdboxAndGetMmioRegister(m_vdboxIndex, &cmdBuffer); 1684 CODEC_HW_CHK_NULL_RETURN(mmioRegisters); 1685 m_pResource = params.resBitstreamByteCountPerFrame; 1686 m_dwOffset = params.bitstreamByteCountPerFrameOffset; 1687 m_dwValue = mmioRegisters->mfcBitstreamBytecountFrameRegOffset; 1688 SETPAR_AND_ADDCMD(MI_STORE_REGISTER_MEM, m_miItf, &cmdBuffer); 1689 1690 m_pResource = params.resBitstreamSyntaxElementOnlyBitCount; 1691 m_dwOffset = params.bitstreamSyntaxElementOnlyBitCountOffset; 1692 m_dwValue = mmioRegisters->mfcBitstreamSeBitcountFrameRegOffset; 1693 SETPAR_AND_ADDCMD(MI_STORE_REGISTER_MEM, m_miItf, &cmdBuffer); 1694 1695 m_pResource = params.resQpStatusCount; 1696 m_dwOffset = params.qpStatusCountOffset; 1697 m_dwValue = mmioRegisters->mfcQPStatusCountOffset; 1698 SETPAR_AND_ADDCMD(MI_STORE_REGISTER_MEM, m_miItf, &cmdBuffer); 1699 1700 if (mmioRegisters->mfcAvcNumSlicesRegOffset > 0) 1701 { 1702 m_pResource = params.resNumSlices; 1703 m_dwOffset = params.numSlicesOffset; 1704 m_dwValue = mmioRegisters->mfcAvcNumSlicesRegOffset; 1705 SETPAR_AND_ADDCMD(MI_STORE_REGISTER_MEM, m_miItf, &cmdBuffer); 1706 } 1707 1708 if (params.vdencBrcEnabled) 1709 { 1710 // Store PAK FrameSize MMIO to DMEM for HuC next BRC pass of current frame and first pass of next frame. 1711 for (int i = 0; i < 2; i++) 1712 { 1713 if (params.resVdencBrcUpdateDmemBufferPtr[i]) 1714 { 1715 m_pResource = params.resVdencBrcUpdateDmemBufferPtr[i]; 1716 m_dwOffset = 5 * sizeof(uint32_t); 1717 m_dwValue = mmioRegisters->mfcBitstreamBytecountFrameRegOffset; 1718 SETPAR_AND_ADDCMD(MI_STORE_REGISTER_MEM, m_miItf, &cmdBuffer); 1719 1720 if (params.vdencBrcNumOfSliceOffset) 1721 { 1722 m_pResource = params.resVdencBrcUpdateDmemBufferPtr[i]; 1723 m_dwOffset = params.vdencBrcNumOfSliceOffset; 1724 m_dwValue = mmioRegisters->mfcAvcNumSlicesRegOffset; 1725 SETPAR_AND_ADDCMD(MI_STORE_REGISTER_MEM, m_miItf, &cmdBuffer); 1726 } 1727 } 1728 } 1729 } 1730 1731 ENCODE_CHK_STATUS_RETURN(ReadImageStatus(params, &cmdBuffer)) 1732 1733 return MOS_STATUS_SUCCESS; 1734 } 1735 ReadImageStatus(const EncodeStatusReadParams & params,PMOS_COMMAND_BUFFER cmdBuffer)1736 MOS_STATUS AvcVdencPkt::ReadImageStatus( 1737 const EncodeStatusReadParams& params, 1738 PMOS_COMMAND_BUFFER cmdBuffer) 1739 { 1740 MOS_STATUS eStatus = MOS_STATUS_SUCCESS; 1741 1742 CODEC_HW_FUNCTION_ENTER; 1743 1744 CODEC_HW_CHK_NULL_RETURN(cmdBuffer); 1745 1746 CODEC_HW_CHK_COND_RETURN((m_vdboxIndex > m_mfxItf->GetMaxVdboxIndex()), "ERROR - vdbox index exceed the maximum"); 1747 1748 MmioRegistersMfx *mmioRegisters = SelectVdboxAndGetMmioRegister(m_vdboxIndex, cmdBuffer); 1749 CODEC_HW_CHK_NULL_RETURN(mmioRegisters); 1750 m_pResource = params.resImageStatusMask; 1751 m_dwOffset = params.imageStatusMaskOffset; 1752 m_dwValue = mmioRegisters->mfcImageStatusMaskRegOffset; 1753 SETPAR_AND_ADDCMD(MI_STORE_REGISTER_MEM, m_miItf, cmdBuffer); 1754 1755 m_pResource = params.resImageStatusCtrl; 1756 m_dwOffset = params.imageStatusCtrlOffset; 1757 m_dwValue = mmioRegisters->mfcImageStatusCtrlRegOffset; 1758 SETPAR_AND_ADDCMD(MI_STORE_REGISTER_MEM, m_miItf, cmdBuffer); 1759 1760 // VDEnc dynamic slice overflow semaphore, DW0 is SW programmed mask(MFX_IMAGE_MASK does not support), DW1 is MFX_IMAGE_STATUS_CONTROL 1761 if (params.vdencBrcEnabled) 1762 { 1763 MHW_VDBOX_PIPE_MODE_SELECT_PARAMS pipeModeSelectParams; 1764 1765 // Added for VDEnc slice overflow bit in MFC_IMAGE_STATUS_CONTROL 1766 // The bit is connected on the non-AVC encoder side of MMIO register. 1767 // Need a dummy MFX_PIPE_MODE_SELECT to decoder and read this register. 1768 if (params.waReadVDEncOverflowStatus) 1769 { 1770 auto &mfxPipeModeSelectParams = m_mfxItf->MHW_GETPAR_F(MFX_PIPE_MODE_SELECT)(); 1771 mfxPipeModeSelectParams.Mode = CODECHAL_DECODE_MODE_AVCVLD; 1772 SETPAR_AND_ADDCMD(MFX_PIPE_MODE_SELECT, m_mfxItf, cmdBuffer); 1773 } 1774 1775 // Store MFC_IMAGE_STATUS_CONTROL MMIO to DMEM for HuC next BRC pass of current frame and first pass of next frame. 1776 for (int i = 0; i < 2; i++) 1777 { 1778 if (params.resVdencBrcUpdateDmemBufferPtr[i]) 1779 { 1780 m_pResource = params.resVdencBrcUpdateDmemBufferPtr[i]; 1781 m_dwOffset = 7 * sizeof(uint32_t); // offset of SliceSizeViolation in HUC_BRC_UPDATE_DMEM 1782 m_dwValue = mmioRegisters->mfcImageStatusCtrlRegOffset; 1783 SETPAR_AND_ADDCMD(MI_STORE_REGISTER_MEM, m_miItf, cmdBuffer); 1784 } 1785 } 1786 1787 // Restore MFX_PIPE_MODE_SELECT to encode mode 1788 if (params.waReadVDEncOverflowStatus) 1789 { 1790 auto &mfxPipeModeSelectParams = m_mfxItf->MHW_GETPAR_F(MFX_PIPE_MODE_SELECT)(); 1791 mfxPipeModeSelectParams.Mode = params.mode; 1792 mfxPipeModeSelectParams.vdencMode = 1; 1793 SETPAR_AND_ADDCMD(MFX_PIPE_MODE_SELECT, m_mfxItf, cmdBuffer); 1794 } 1795 } 1796 1797 SETPAR_AND_ADDCMD(MI_FLUSH_DW, m_miItf, cmdBuffer); 1798 1799 return eStatus; 1800 } 1801 StoreNumPasses(MOS_COMMAND_BUFFER & cmdBuffer)1802 MOS_STATUS AvcVdencPkt::StoreNumPasses(MOS_COMMAND_BUFFER &cmdBuffer) 1803 { 1804 ENCODE_FUNC_CALL(); 1805 1806 MOS_RESOURCE * osResource = nullptr; 1807 uint32_t offset = 0; 1808 ENCODE_CHK_STATUS_RETURN(m_statusReport->GetAddress(statusReportNumberPasses, osResource, offset)); 1809 1810 m_pResource = osResource; 1811 m_dwOffset = offset; 1812 m_dwValue = m_pipeline->GetCurrentPass() + 1; 1813 SETPAR_AND_ADDCMD(MI_STORE_DATA_IMM, m_miItf, &cmdBuffer); 1814 1815 return MOS_STATUS_SUCCESS; 1816 } 1817 LockBatchBufferForPakSlices()1818 MOS_STATUS AvcVdencPkt::LockBatchBufferForPakSlices() 1819 { 1820 ENCODE_FUNC_CALL(); 1821 1822 m_useBatchBufferForPakSlices = false; 1823 if (m_pipeline->IsSingleTaskPhaseSupported() && m_pipeline->IsSingleTaskPhaseSupportedInPak()) 1824 { 1825 if (m_pipeline->IsFirstPass()) 1826 { 1827 // The same buffer is used for all slices for all passes. 1828 uint32_t batchBufferForPakSlicesSize = m_pipeline->GetPassNum() * m_basicFeature->m_numSlices * m_pakSliceSize; 1829 if (batchBufferForPakSlicesSize > 1830 (uint32_t)m_batchBufferForPakSlices[m_pipeline->m_currRecycledBufIdx].iSize) 1831 { 1832 if (m_batchBufferForPakSlices[m_pipeline->m_currRecycledBufIdx].iSize) 1833 { 1834 ENCODE_CHK_STATUS_RETURN(ReleaseBatchBufferForPakSlices(m_pipeline->m_currRecycledBufIdx)); 1835 } 1836 1837 ENCODE_CHK_STATUS_RETURN(AllocateBatchBufferForPakSlices( 1838 m_basicFeature->m_numSlices, 1839 m_pipeline->GetPassNum(), 1840 m_pipeline->m_currRecycledBufIdx)); 1841 } 1842 } 1843 ENCODE_CHK_STATUS_RETURN(Mhw_LockBb( 1844 m_osInterface, 1845 &m_batchBufferForPakSlices[m_pipeline->m_currRecycledBufIdx])); 1846 m_useBatchBufferForPakSlices = true; 1847 } 1848 1849 return MOS_STATUS_SUCCESS; 1850 } 1851 UnlockBatchBufferForPakSlices()1852 MOS_STATUS AvcVdencPkt::UnlockBatchBufferForPakSlices() 1853 { 1854 ENCODE_FUNC_CALL(); 1855 1856 if (m_useBatchBufferForPakSlices) 1857 { 1858 ENCODE_CHK_STATUS_RETURN(Mhw_UnlockBb( 1859 m_osInterface, 1860 &m_batchBufferForPakSlices[m_pipeline->m_currRecycledBufIdx], 1861 m_pipeline->IsLastPass())); 1862 } 1863 1864 return MOS_STATUS_SUCCESS; 1865 } 1866 MHW_SETPAR_DECL_SRC(VDENC_PIPE_BUF_ADDR_STATE,AvcVdencPkt)1867 MHW_SETPAR_DECL_SRC(VDENC_PIPE_BUF_ADDR_STATE, AvcVdencPkt) 1868 { 1869 params.intraRowStoreScratchBuffer = m_vdencIntraRowStoreScratch; 1870 params.mfdIntraRowStoreScratchBuffer = m_intraRowStoreScratchBuffer; 1871 params.numActiveRefL0 = m_sliceParams->num_ref_idx_l0_active_minus1 + 1; 1872 params.numActiveRefL1 = m_sliceParams->num_ref_idx_l1_active_minus1 + 1; 1873 1874 ENCODE_CHK_STATUS_RETURN(m_basicFeature->m_ref->MHW_SETPAR_F(VDENC_PIPE_BUF_ADDR_STATE)(params)); 1875 1876 auto settings = static_cast<AvcVdencFeatureSettings *>(m_legacyFeatureManager->GetFeatureSettings()->GetConstSettings()); 1877 ENCODE_CHK_NULL_RETURN(settings); 1878 1879 // PerfMode; replace all 4x Ds refs with the 1st L0 ref 1880 if (m_vdencItf->IsPerfModeSupported() && 1881 settings->perfModeEnabled[m_seqParam->TargetUsage] && 1882 params.numActiveRefL0 == 1) 1883 { 1884 params.numActiveRefL0 = 2; 1885 params.refs[1] = nullptr; 1886 params.refsDsStage1[1] = params.refsDsStage1[0]; 1887 } 1888 1889 return MOS_STATUS_SUCCESS; 1890 } 1891 MHW_SETPAR_DECL_SRC(VD_PIPELINE_FLUSH,AvcVdencPkt)1892 MHW_SETPAR_DECL_SRC(VD_PIPELINE_FLUSH, AvcVdencPkt) 1893 { 1894 // MfxPipeDone should be set for all super slices except the last super slice and should not be set for tail insertion. 1895 params.waitDoneMFX = m_lastSlice ? 1896 ((m_basicFeature->m_lastPicInStream || m_basicFeature->m_lastPicInSeq) ? false : true) : true; 1897 params.waitDoneVDENC = true; 1898 params.flushVDENC = true; 1899 params.waitDoneVDCmdMsgParser = true; 1900 1901 return MOS_STATUS_SUCCESS; 1902 } 1903 AddAllCmds_MFX_QM_STATE(PMOS_COMMAND_BUFFER cmdBuffer) const1904 MOS_STATUS AvcVdencPkt::AddAllCmds_MFX_QM_STATE(PMOS_COMMAND_BUFFER cmdBuffer) const 1905 { 1906 ENCODE_FUNC_CALL(); 1907 ENCODE_CHK_NULL_RETURN(cmdBuffer); 1908 1909 MHW_MI_CHK_NULL(m_basicFeature->m_iqWeightScaleLists); 1910 1911 auto ¶ms = m_mfxItf->MHW_GETPAR_F(MFX_QM_STATE)(); 1912 params = {}; 1913 1914 auto iqMatrix = (PMHW_VDBOX_AVC_QM_PARAMS)m_basicFeature->m_iqWeightScaleLists; 1915 uint8_t *qMatrix = (uint8_t *)params.quantizermatrix; 1916 1917 for (uint8_t i = 0; i < 16; i++) 1918 { 1919 params.quantizermatrix[i] = 0; 1920 } 1921 1922 params.qmType = avcQmIntra4x4; 1923 for (auto i = 0; i < 3; i++) 1924 { 1925 for (auto ii = 0; ii < 16; ii++) 1926 { 1927 qMatrix[i * 16 + ii] = iqMatrix->List4x4[i][ii]; 1928 } 1929 } 1930 m_mfxItf->MHW_ADDCMD_F(MFX_QM_STATE)(cmdBuffer); 1931 1932 params.qmType = avcQmInter4x4; 1933 for (auto i = 3; i < 6; i++) 1934 { 1935 for (auto ii = 0; ii < 16; ii++) 1936 { 1937 qMatrix[(i - 3) * 16 + ii] = iqMatrix->List4x4[i][ii]; 1938 } 1939 } 1940 m_mfxItf->MHW_ADDCMD_F(MFX_QM_STATE)(cmdBuffer); 1941 1942 params.qmType = avcQmIntra8x8; 1943 for (auto ii = 0; ii < 64; ii++) 1944 { 1945 qMatrix[ii] = iqMatrix->List8x8[0][ii]; 1946 } 1947 m_mfxItf->MHW_ADDCMD_F(MFX_QM_STATE)(cmdBuffer); 1948 1949 params.qmType = avcQmInter8x8; 1950 for (auto ii = 0; ii < 64; ii++) 1951 { 1952 qMatrix[ii] = iqMatrix->List8x8[1][ii]; 1953 } 1954 m_mfxItf->MHW_ADDCMD_F(MFX_QM_STATE)(cmdBuffer); 1955 1956 return MOS_STATUS_SUCCESS; 1957 } 1958 AddAllCmds_MFX_FQM_STATE(PMOS_COMMAND_BUFFER cmdBuffer) const1959 MOS_STATUS AvcVdencPkt::AddAllCmds_MFX_FQM_STATE(PMOS_COMMAND_BUFFER cmdBuffer) const 1960 { 1961 ENCODE_FUNC_CALL(); 1962 ENCODE_CHK_NULL_RETURN(cmdBuffer); 1963 1964 auto settings = static_cast<AvcVdencFeatureSettings *>(m_legacyFeatureManager->GetFeatureSettings()->GetConstSettings()); 1965 ENCODE_CHK_NULL_RETURN(settings); 1966 1967 MHW_MI_CHK_NULL(m_basicFeature->m_iqWeightScaleLists); 1968 1969 auto ¶ms = m_mfxItf->MHW_GETPAR_F(MFX_FQM_STATE)(); 1970 params = {}; 1971 1972 auto iqMatrix = (PMHW_VDBOX_AVC_QM_PARAMS)m_basicFeature->m_iqWeightScaleLists; 1973 uint16_t *fqMatrix = (uint16_t*)params.quantizermatrix; 1974 1975 for (uint8_t i = 0; i < 32; i++) 1976 { 1977 params.quantizermatrix[i] = 0; 1978 } 1979 1980 params.qmType = avcQmIntra4x4; 1981 for (auto i = 0; i < 3; i++) 1982 { 1983 for (auto ii = 0; ii < 16; ii++) 1984 { 1985 fqMatrix[i * 16 + ii] = GetReciprocalScalingValue(iqMatrix->List4x4[i][settings->columnScan4x4[ii]]); 1986 } 1987 } 1988 m_mfxItf->MHW_ADDCMD_F(MFX_FQM_STATE)(cmdBuffer); 1989 1990 params.qmType = avcQmInter4x4; 1991 for (auto i = 0; i < 3; i++) 1992 { 1993 for (auto ii = 0; ii < 16; ii++) 1994 { 1995 fqMatrix[i * 16 + ii] = GetReciprocalScalingValue(iqMatrix->List4x4[i + 3][settings->columnScan4x4[ii]]); 1996 } 1997 } 1998 m_mfxItf->MHW_ADDCMD_F(MFX_FQM_STATE)(cmdBuffer); 1999 2000 params.qmType = avcQmIntra8x8; 2001 for (auto i = 0; i < 64; i++) 2002 { 2003 fqMatrix[i] = GetReciprocalScalingValue(iqMatrix->List8x8[0][settings->columnScan8x8[i]]); 2004 } 2005 m_mfxItf->MHW_ADDCMD_F(MFX_FQM_STATE)(cmdBuffer); 2006 2007 params.qmType = avcQmInter8x8; 2008 for (auto i = 0; i < 64; i++) 2009 { 2010 fqMatrix[i] = GetReciprocalScalingValue(iqMatrix->List8x8[1][settings->columnScan8x8[i]]); 2011 } 2012 m_mfxItf->MHW_ADDCMD_F(MFX_FQM_STATE)(cmdBuffer); 2013 2014 return MOS_STATUS_SUCCESS; 2015 } 2016 fill_pad_with_value(PMOS_SURFACE psSurface,uint32_t real_height,uint32_t aligned_height) const2017 void AvcVdencPkt::fill_pad_with_value(PMOS_SURFACE psSurface, uint32_t real_height, uint32_t aligned_height) const 2018 { 2019 ENCODE_CHK_NULL_NO_STATUS_RETURN(psSurface); 2020 2021 // unaligned surfaces only 2022 if (aligned_height <= real_height || aligned_height > psSurface->dwHeight) 2023 { 2024 return; 2025 } 2026 2027 if (psSurface->OsResource.TileType == MOS_TILE_INVALID) 2028 { 2029 return; 2030 } 2031 2032 if (psSurface->Format == Format_NV12 || psSurface->Format == Format_P010) 2033 { 2034 uint32_t pitch = psSurface->dwPitch; 2035 uint32_t UVPlaneOffset = psSurface->UPlaneOffset.iSurfaceOffset; 2036 uint32_t YPlaneOffset = psSurface->dwOffset; 2037 uint32_t pad_rows = aligned_height - real_height; 2038 uint32_t y_plane_size = pitch * real_height; 2039 uint32_t uv_plane_size = pitch * real_height / 2; 2040 2041 MOS_LOCK_PARAMS lockFlags; 2042 MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS)); 2043 lockFlags.WriteOnly = 1; 2044 2045 // padding for the linear format buffer. 2046 if (psSurface->OsResource.TileType == MOS_TILE_LINEAR) 2047 { 2048 #include "media_packet.h" 2049 uint8_t *src_data = (uint8_t *)m_osInterface->pfnLockResource(m_osInterface, &(psSurface->OsResource), &lockFlags); 2050 2051 if (!src_data) 2052 return; 2053 2054 uint8_t *src_data_y = src_data + YPlaneOffset; 2055 uint8_t *src_data_y_end = src_data_y + y_plane_size; 2056 for (uint32_t i = 0; i < pad_rows; i++) 2057 { 2058 MOS_SecureMemcpy(src_data_y_end + i * pitch, pitch, src_data_y_end - pitch, pitch); 2059 } 2060 2061 uint8_t *src_data_uv = src_data + UVPlaneOffset; 2062 uint8_t *src_data_uv_end = src_data_uv + uv_plane_size; 2063 for (uint32_t i = 0; i < pad_rows / 2; i++) 2064 { 2065 MOS_SecureMemcpy(src_data_uv_end + i * pitch, pitch, src_data_uv_end - pitch, pitch); 2066 } 2067 2068 m_osInterface->pfnUnlockResource(m_osInterface, &(psSurface->OsResource)); 2069 } 2070 else 2071 { 2072 // we don't copy out the whole tiled buffer to linear and padding on the tiled buffer directly. 2073 lockFlags.TiledAsTiled = 1; 2074 2075 uint8_t *src_data = (uint8_t *)m_osInterface->pfnLockResource(m_osInterface, &(psSurface->OsResource), &lockFlags); 2076 if (!src_data) 2077 return; 2078 2079 uint8_t *padding_data = (uint8_t *)MOS_AllocMemory(pitch * pad_rows); 2080 2081 // Copy last Y row data to linear padding data. 2082 GMM_RES_COPY_BLT gmmResCopyBlt = {0}; 2083 gmmResCopyBlt.Gpu.pData = src_data; 2084 gmmResCopyBlt.Gpu.OffsetX = 0; 2085 gmmResCopyBlt.Gpu.OffsetY = (YPlaneOffset + y_plane_size - pitch) / pitch; 2086 gmmResCopyBlt.Sys.pData = padding_data; 2087 gmmResCopyBlt.Sys.RowPitch = pitch; 2088 gmmResCopyBlt.Sys.BufferSize = pitch * pad_rows; 2089 gmmResCopyBlt.Sys.SlicePitch = pitch; 2090 gmmResCopyBlt.Blt.Slices = 1; 2091 gmmResCopyBlt.Blt.Upload = false; 2092 gmmResCopyBlt.Blt.Width = psSurface->dwWidth; 2093 gmmResCopyBlt.Blt.Height = 1; 2094 psSurface->OsResource.pGmmResInfo->CpuBlt(&gmmResCopyBlt); 2095 // Fill the remain padding lines with last Y row data. 2096 for (uint32_t i = 1; i < pad_rows; i++) 2097 { 2098 MOS_SecureMemcpy(padding_data + i * pitch, pitch, padding_data, pitch); 2099 } 2100 // Filling the padding for Y. 2101 gmmResCopyBlt.Gpu.pData = src_data; 2102 gmmResCopyBlt.Gpu.OffsetX = 0; 2103 gmmResCopyBlt.Gpu.OffsetY = (YPlaneOffset + y_plane_size) / pitch; 2104 gmmResCopyBlt.Sys.pData = padding_data; 2105 gmmResCopyBlt.Sys.RowPitch = pitch; 2106 gmmResCopyBlt.Sys.BufferSize = pitch * pad_rows; 2107 gmmResCopyBlt.Sys.SlicePitch = pitch; 2108 gmmResCopyBlt.Blt.Slices = 1; 2109 gmmResCopyBlt.Blt.Upload = true; 2110 gmmResCopyBlt.Blt.Width = psSurface->dwWidth; 2111 gmmResCopyBlt.Blt.Height = pad_rows; 2112 psSurface->OsResource.pGmmResInfo->CpuBlt(&gmmResCopyBlt); 2113 2114 // Copy last UV row data to linear padding data. 2115 gmmResCopyBlt.Gpu.pData = src_data; 2116 gmmResCopyBlt.Gpu.OffsetX = 0; 2117 gmmResCopyBlt.Gpu.OffsetY = (UVPlaneOffset + uv_plane_size - pitch) / pitch; 2118 gmmResCopyBlt.Sys.pData = padding_data; 2119 gmmResCopyBlt.Sys.RowPitch = pitch; 2120 gmmResCopyBlt.Sys.BufferSize = pitch * pad_rows / 2; 2121 gmmResCopyBlt.Sys.SlicePitch = pitch; 2122 gmmResCopyBlt.Blt.Slices = 1; 2123 gmmResCopyBlt.Blt.Upload = false; 2124 gmmResCopyBlt.Blt.Width = psSurface->dwWidth; 2125 gmmResCopyBlt.Blt.Height = 1; 2126 psSurface->OsResource.pGmmResInfo->CpuBlt(&gmmResCopyBlt); 2127 // Fill the remain padding lines with last UV row data. 2128 for (uint32_t i = 1; i < pad_rows / 2; i++) 2129 { 2130 MOS_SecureMemcpy(padding_data + i * pitch, pitch, padding_data, pitch); 2131 } 2132 // Filling the padding for UV. 2133 gmmResCopyBlt.Gpu.pData = src_data; 2134 gmmResCopyBlt.Gpu.OffsetX = 0; 2135 gmmResCopyBlt.Gpu.OffsetY = (UVPlaneOffset + uv_plane_size) / pitch; 2136 gmmResCopyBlt.Sys.pData = padding_data; 2137 gmmResCopyBlt.Sys.RowPitch = pitch; 2138 gmmResCopyBlt.Sys.BufferSize = pitch * pad_rows / 2; 2139 gmmResCopyBlt.Sys.SlicePitch = pitch; 2140 gmmResCopyBlt.Blt.Slices = 1; 2141 gmmResCopyBlt.Blt.Upload = true; 2142 gmmResCopyBlt.Blt.Width = psSurface->dwWidth; 2143 gmmResCopyBlt.Blt.Height = pad_rows / 2; 2144 psSurface->OsResource.pGmmResInfo->CpuBlt(&gmmResCopyBlt); 2145 2146 MOS_FreeMemory(padding_data); 2147 padding_data = nullptr; 2148 m_osInterface->pfnUnlockResource(m_osInterface, &(psSurface->OsResource)); 2149 } 2150 } 2151 } 2152 AddAllCmds_MFX_SURFACE_STATE(PMOS_COMMAND_BUFFER cmdBuffer) const2153 MOS_STATUS AvcVdencPkt::AddAllCmds_MFX_SURFACE_STATE(PMOS_COMMAND_BUFFER cmdBuffer) const 2154 { 2155 ENCODE_FUNC_CALL(); 2156 ENCODE_CHK_NULL_RETURN(cmdBuffer); 2157 2158 m_curMfxSurfStateId = CODECHAL_MFX_REF_SURFACE_ID; 2159 SETPAR_AND_ADDCMD(MFX_SURFACE_STATE, m_mfxItf, cmdBuffer); 2160 2161 m_curMfxSurfStateId = CODECHAL_MFX_SRC_SURFACE_ID; 2162 SETPAR_AND_ADDCMD(MFX_SURFACE_STATE, m_mfxItf, cmdBuffer); 2163 2164 m_curMfxSurfStateId = CODECHAL_MFX_DSRECON_SURFACE_ID; 2165 SETPAR_AND_ADDCMD(MFX_SURFACE_STATE, m_mfxItf, cmdBuffer); 2166 2167 //add fill_pad_with_value function 2168 auto waTable = m_osInterface == nullptr ? nullptr : m_osInterface->pfnGetWaTable(m_osInterface); 2169 if (waTable) 2170 { 2171 if (MEDIA_IS_WA(waTable, Wa_AvcUnalignedHeight)) 2172 { 2173 if (m_basicFeature->m_frame_cropping_flag) 2174 { 2175 uint32_t crop_unit_y = 2 * (2 - m_basicFeature->m_frame_mbs_only_flag); 2176 uint32_t real_height = m_basicFeature->m_oriFrameHeight - (m_basicFeature->m_frame_crop_bottom_offset * crop_unit_y); 2177 uint32_t aligned_height = MOS_ALIGN_CEIL(real_height, CODEC_AVC_MIN_BLOCK_HEIGHT); 2178 2179 fill_pad_with_value(m_basicFeature->m_rawSurfaceToPak, real_height, aligned_height); 2180 } 2181 } 2182 } 2183 2184 return MOS_STATUS_SUCCESS; 2185 } 2186 AddAllCmds_MFX_PAK_INSERT_OBJECT(PMOS_COMMAND_BUFFER cmdBuffer) const2187 MOS_STATUS AvcVdencPkt::AddAllCmds_MFX_PAK_INSERT_OBJECT(PMOS_COMMAND_BUFFER cmdBuffer) const 2188 { 2189 ENCODE_FUNC_CALL(); 2190 ENCODE_CHK_NULL_RETURN(cmdBuffer); 2191 2192 bool bLastPicInSeq = m_basicFeature->m_lastPicInSeq; 2193 bool bLastPicInStream = m_basicFeature->m_lastPicInStream; 2194 auto ¶ms = m_mfxItf->MHW_GETPAR_F(MFX_PAK_INSERT_OBJECT)(); 2195 params = {}; 2196 2197 if (m_lastPic && (bLastPicInSeq || bLastPicInStream)) // used by AVC, MPEG2 2198 { 2199 params.dwPadding = bLastPicInSeq + bLastPicInStream; 2200 params.bitstreamstartresetResetbitstreamstartingpos = false; 2201 params.endofsliceflagLastdstdatainsertcommandflag = true; 2202 params.lastheaderflagLastsrcheaderdatainsertcommandflag = true; 2203 params.emulationflagEmulationbytebitsinsertenable = false; 2204 params.skipemulbytecntSkipEmulationByteCount = 0; 2205 params.databitsinlastdwSrcdataendingbitinclusion50 = 32; 2206 params.sliceHeaderIndicator = false; 2207 params.headerlengthexcludefrmsize = true; 2208 2209 m_mfxItf->MHW_ADDCMD_F(MFX_PAK_INSERT_OBJECT)(cmdBuffer); 2210 2211 if (bLastPicInSeq) // only used by AVC, not used by MPEG2 2212 { 2213 uint32_t lastPicInSeqData = (uint32_t)((1 << 16) | CODECHAL_ENCODE_AVC_NAL_UT_EOSEQ << 24); 2214 MHW_MI_CHK_STATUS(Mhw_AddCommandCmdOrBB(m_osInterface, cmdBuffer, nullptr, &lastPicInSeqData, sizeof(lastPicInSeqData))); 2215 } 2216 2217 if (bLastPicInStream) // used by AVC, MPEG2 2218 { 2219 uint32_t lastPicInStreamData = (uint32_t)((1 << 16) | CODECHAL_ENCODE_AVC_NAL_UT_EOSTREAM << 24); 2220 MHW_MI_CHK_STATUS(Mhw_AddCommandCmdOrBB(m_osInterface, cmdBuffer, nullptr, &lastPicInStreamData, sizeof(lastPicInStreamData))); 2221 } 2222 } 2223 else // used by AVC, MPEG2, JPEG 2224 { 2225 bool insertZeroByteWA = false; 2226 2227 MEDIA_WA_TABLE *waTable = m_basicFeature->GetWaTable(); 2228 ENCODE_CHK_NULL_RETURN(waTable); 2229 2230 //insert AU, SPS, PSP headers before first slice header 2231 if (m_basicFeature->m_curNumSlices == 0) 2232 { 2233 uint32_t maxBytesInPakInsertObjCmd = ((2 << 11) - 1) * 4; // 12 bits for DwordLength field in PAK_INSERT_OBJ cmd 2234 2235 uint8_t *dataBase = (uint8_t *)(m_basicFeature->m_bsBuffer.pBase); 2236 uint32_t startCode = ((*dataBase) << 24) + ((*(dataBase + 1)) << 16) + ((*(dataBase + 2)) << 8) + (*(dataBase + 3)); 2237 // Only apply the WaSuperSliceHeaderPacking for the cases with 00 00 00 01 start code 2238 if (startCode == 0x00000001) 2239 { 2240 insertZeroByteWA = true; 2241 } 2242 2243 for (auto i = 0; i < CODECHAL_ENCODE_AVC_MAX_NAL_TYPE; i++) 2244 { 2245 if (m_basicFeature->m_nalUnitParams[i]->bInsertEmulationBytes) 2246 { 2247 ENCODE_VERBOSEMESSAGE("The emulation prevention bytes are not inserted by the app and are requested to be inserted by HW."); 2248 } 2249 2250 uint32_t nalunitPosiSize = m_basicFeature->m_nalUnitParams[i]->uiSize; 2251 uint32_t nalunitPosiOffset = m_basicFeature->m_nalUnitParams[i]->uiOffset; 2252 while (nalunitPosiSize > 0) 2253 { 2254 uint32_t dwBitSize = MOS_MIN(maxBytesInPakInsertObjCmd * 8, nalunitPosiSize * 8); 2255 uint32_t byteSize = (dwBitSize + 7) >> 3; 2256 uint32_t dataBitsInLastDw = dwBitSize % 32; 2257 2258 if (dataBitsInLastDw == 0) 2259 { 2260 dataBitsInLastDw = 32; 2261 } 2262 2263 params = {}; 2264 params.dwPadding = ((byteSize + 3) >> 2); 2265 params.bitstreamstartresetResetbitstreamstartingpos = false; 2266 params.endofsliceflagLastdstdatainsertcommandflag = false; 2267 params.lastheaderflagLastsrcheaderdatainsertcommandflag = false; 2268 params.emulationflagEmulationbytebitsinsertenable = m_basicFeature->m_nalUnitParams[i]->bInsertEmulationBytes; 2269 params.skipemulbytecntSkipEmulationByteCount = m_basicFeature->m_nalUnitParams[i]->uiSkipEmulationCheckCount; 2270 params.databitsinlastdwSrcdataendingbitinclusion50 = dataBitsInLastDw; 2271 params.sliceHeaderIndicator = false; 2272 params.headerlengthexcludefrmsize = params.emulationflagEmulationbytebitsinsertenable ? false : true; // Cannot be set to true if emulation byte bit insertion is enabled 2273 2274 m_mfxItf->MHW_ADDCMD_F(MFX_PAK_INSERT_OBJECT)(cmdBuffer); 2275 2276 // Add actual data 2277 uint8_t* data = (uint8_t*)(m_basicFeature->m_bsBuffer.pBase + nalunitPosiOffset); 2278 MHW_MI_CHK_STATUS(Mhw_AddCommandCmdOrBB(m_osInterface, cmdBuffer, nullptr, data, byteSize)); 2279 2280 if (nalunitPosiSize > maxBytesInPakInsertObjCmd) 2281 { 2282 nalunitPosiSize -= maxBytesInPakInsertObjCmd; 2283 nalunitPosiOffset += maxBytesInPakInsertObjCmd; 2284 } 2285 else 2286 { 2287 nalunitPosiSize = 0; 2288 } 2289 2290 insertZeroByteWA = false; 2291 } 2292 } 2293 } 2294 2295 uint8_t *dataBase = (uint8_t *)(m_basicFeature->m_bsBuffer.pBase + m_basicFeature->m_slcData[m_basicFeature->m_curNumSlices].SliceOffset); 2296 uint32_t startCode = ((*dataBase) << 24) + ((*(dataBase + 1)) << 16) + ((*(dataBase + 2)) << 8) + (*(dataBase + 3)); 2297 if (startCode == 0x00000001) 2298 { 2299 insertZeroByteWA = true; 2300 } 2301 2302 // Insert 0x00 for super slice case when PPS/AUD is not inserted 2303 if (insertZeroByteWA) 2304 { 2305 uint32_t byteSize = 1; 2306 2307 params = {}; 2308 params.dwPadding = ((byteSize + 3) >> 2); 2309 params.bitstreamstartresetResetbitstreamstartingpos = false; 2310 params.endofsliceflagLastdstdatainsertcommandflag = false; 2311 params.lastheaderflagLastsrcheaderdatainsertcommandflag = false; 2312 params.emulationflagEmulationbytebitsinsertenable = false; 2313 params.skipemulbytecntSkipEmulationByteCount = 0; 2314 params.databitsinlastdwSrcdataendingbitinclusion50 = 8; 2315 params.sliceHeaderIndicator = false; 2316 params.headerlengthexcludefrmsize = false; 2317 2318 m_mfxItf->MHW_ADDCMD_F(MFX_PAK_INSERT_OBJECT)(cmdBuffer); 2319 2320 // Add actual data 2321 uint8_t* data = (uint8_t*)(m_basicFeature->m_bsBuffer.pBase + m_basicFeature->m_slcData[m_basicFeature->m_curNumSlices].SliceOffset); 2322 MHW_MI_CHK_STATUS(Mhw_AddCommandCmdOrBB(m_osInterface, cmdBuffer, nullptr, data, byteSize)); 2323 } 2324 2325 // Insert slice header 2326 uint32_t uiSkipEmulationCheckCount = 0; 2327 if (m_basicFeature->m_acceleratorHeaderPackingCaps) 2328 { 2329 // If driver does slice header packing set the skip count to 4 2330 uiSkipEmulationCheckCount = 4; 2331 } 2332 else 2333 { 2334 // App does the slice header packing, set the skip count passed by the app 2335 uiSkipEmulationCheckCount = m_basicFeature->m_slcData[m_basicFeature->m_curNumSlices].SkipEmulationByteCount; 2336 } 2337 2338 // Remove one byte of 00 for super slice case when PPS/AUD is not inserted, so that HW could patch slice header correctly 2339 uint32_t dwBitSize = 0, dwOffset = 0; 2340 if (insertZeroByteWA) 2341 { 2342 dwBitSize = m_basicFeature->m_slcData[m_basicFeature->m_curNumSlices].BitSize - 8; 2343 dwOffset = m_basicFeature->m_slcData[m_basicFeature->m_curNumSlices].SliceOffset + 1; 2344 } 2345 else 2346 { 2347 dwBitSize = m_basicFeature->m_slcData[m_basicFeature->m_curNumSlices].BitSize; 2348 dwOffset = m_basicFeature->m_slcData[m_basicFeature->m_curNumSlices].SliceOffset; 2349 } 2350 2351 uint32_t byteSize = (dwBitSize + 7) >> 3; 2352 uint32_t dataBitsInLastDw = dwBitSize % 32; 2353 2354 if (dataBitsInLastDw == 0) 2355 { 2356 dataBitsInLastDw = 32; 2357 } 2358 2359 params = {}; 2360 params.dwPadding = ((byteSize + 3) >> 2); 2361 params.bitstreamstartresetResetbitstreamstartingpos = false; 2362 params.endofsliceflagLastdstdatainsertcommandflag = false; 2363 params.lastheaderflagLastsrcheaderdatainsertcommandflag = true; 2364 params.emulationflagEmulationbytebitsinsertenable = true; 2365 params.skipemulbytecntSkipEmulationByteCount = uiSkipEmulationCheckCount; 2366 params.databitsinlastdwSrcdataendingbitinclusion50 = dataBitsInLastDw; 2367 params.sliceHeaderIndicator = true; 2368 params.headerlengthexcludefrmsize = false; 2369 2370 m_mfxItf->MHW_ADDCMD_F(MFX_PAK_INSERT_OBJECT)(cmdBuffer); 2371 2372 // Add actual data 2373 uint8_t* data = (uint8_t*)(m_basicFeature->m_bsBuffer.pBase + dwOffset); 2374 MHW_MI_CHK_STATUS(Mhw_AddCommandCmdOrBB(m_osInterface, cmdBuffer, nullptr, data, byteSize)); 2375 } 2376 2377 return MOS_STATUS_SUCCESS; 2378 } 2379 AddAllCmds_MFX_AVC_REF_IDX_STATE(PMOS_COMMAND_BUFFER cmdBuffer) const2380 MOS_STATUS AvcVdencPkt::AddAllCmds_MFX_AVC_REF_IDX_STATE(PMOS_COMMAND_BUFFER cmdBuffer) const 2381 { 2382 ENCODE_FUNC_CALL(); 2383 ENCODE_CHK_NULL_RETURN(cmdBuffer); 2384 2385 PCODEC_AVC_ENCODE_SLICE_PARAMS slcParams = &m_sliceParams[m_basicFeature->m_curNumSlices]; 2386 2387 auto ¶ms = m_mfxItf->MHW_GETPAR_F(MFX_AVC_REF_IDX_STATE)(); 2388 params = {}; 2389 2390 if (Slice_Type[slcParams->slice_type] == SLICE_P || 2391 Slice_Type[slcParams->slice_type] == SLICE_B) 2392 { 2393 params.uiList = LIST_0; 2394 ENCODE_CHK_STATUS_RETURN(m_basicFeature->MHW_SETPAR_F(MFX_AVC_REF_IDX_STATE)(params)); 2395 m_mfxItf->MHW_ADDCMD_F(MFX_AVC_REF_IDX_STATE)(cmdBuffer); 2396 } 2397 2398 if (Slice_Type[slcParams->slice_type] == SLICE_B) 2399 { 2400 params.uiList = LIST_1; 2401 ENCODE_CHK_STATUS_RETURN(m_basicFeature->MHW_SETPAR_F(MFX_AVC_REF_IDX_STATE)(params)); 2402 m_mfxItf->MHW_ADDCMD_F(MFX_AVC_REF_IDX_STATE)(cmdBuffer); 2403 } 2404 2405 return MOS_STATUS_SUCCESS; 2406 } 2407 AddAllCmds_MFX_AVC_WEIGHTOFFSET_STATE(PMOS_COMMAND_BUFFER cmdBuffer) const2408 MOS_STATUS AvcVdencPkt::AddAllCmds_MFX_AVC_WEIGHTOFFSET_STATE(PMOS_COMMAND_BUFFER cmdBuffer) const 2409 { 2410 ENCODE_FUNC_CALL(); 2411 ENCODE_CHK_NULL_RETURN(cmdBuffer); 2412 2413 PCODEC_AVC_ENCODE_SLICE_PARAMS slcParams = &m_sliceParams[m_basicFeature->m_curNumSlices]; 2414 2415 auto ¶ms = m_mfxItf->MHW_GETPAR_F(MFX_AVC_WEIGHTOFFSET_STATE)(); 2416 params = {}; 2417 2418 auto wpFeature = dynamic_cast<AvcVdencWeightedPred *>(m_featureManager->GetFeature(AvcFeatureIDs::avcVdencWpFeature)); 2419 ENCODE_CHK_NULL_RETURN(wpFeature); 2420 2421 if ((Slice_Type[slcParams->slice_type] == SLICE_P) && 2422 (m_picParam->weighted_pred_flag == EXPLICIT_WEIGHTED_INTER_PRED_MODE) || 2423 (Slice_Type[slcParams->slice_type] == SLICE_B) && 2424 (m_picParam->weighted_bipred_idc == EXPLICIT_WEIGHTED_INTER_PRED_MODE)) 2425 { 2426 params.uiList = LIST_0; 2427 ENCODE_CHK_STATUS_RETURN(wpFeature->MHW_SETPAR_F(MFX_AVC_WEIGHTOFFSET_STATE)(params)); 2428 m_mfxItf->MHW_ADDCMD_F(MFX_AVC_WEIGHTOFFSET_STATE)(cmdBuffer); 2429 } 2430 2431 if ((Slice_Type[slcParams->slice_type] == SLICE_B) && 2432 (m_picParam->weighted_bipred_idc == EXPLICIT_WEIGHTED_INTER_PRED_MODE)) 2433 { 2434 params.uiList = LIST_1; 2435 ENCODE_CHK_STATUS_RETURN(wpFeature->MHW_SETPAR_F(MFX_AVC_WEIGHTOFFSET_STATE)(params)); 2436 m_mfxItf->MHW_ADDCMD_F(MFX_AVC_WEIGHTOFFSET_STATE)(cmdBuffer); 2437 } 2438 2439 return MOS_STATUS_SUCCESS; 2440 } 2441 MHW_SETPAR_DECL_SRC(MFX_SURFACE_STATE,AvcVdencPkt)2442 MHW_SETPAR_DECL_SRC(MFX_SURFACE_STATE, AvcVdencPkt) 2443 { 2444 params.surfaceId = m_curMfxSurfStateId; 2445 2446 return MOS_STATUS_SUCCESS; 2447 } 2448 MHW_SETPAR_DECL_SRC(MFX_PIPE_BUF_ADDR_STATE,AvcVdencPkt)2449 MHW_SETPAR_DECL_SRC(MFX_PIPE_BUF_ADDR_STATE, AvcVdencPkt) 2450 { 2451 params.presMfdDeblockingFilterRowStoreScratchBuffer = m_resDeblockingFilterRowStoreScratchBuffer; 2452 params.presMfdIntraRowStoreScratchBuffer = m_intraRowStoreScratchBuffer; 2453 2454 if (m_basicFeature->m_perMBStreamOutEnable) 2455 { 2456 // Using frame and PerMB level buffer to get PerMB StreamOut PAK Statistic. 2457 params.presStreamOutBuffer = m_basicFeature->m_recycleBuf->GetBuffer(BrcPakStatisticBufferFull, m_basicFeature->m_frameNum); 2458 } 2459 else 2460 { 2461 params.presStreamOutBuffer = m_basicFeature->m_recycleBuf->GetBuffer(BrcPakStatisticBuffer, 0); 2462 } 2463 2464 return MOS_STATUS_SUCCESS; 2465 } 2466 MHW_SETPAR_DECL_SRC(MFX_BSP_BUF_BASE_ADDR_STATE,AvcVdencPkt)2467 MHW_SETPAR_DECL_SRC(MFX_BSP_BUF_BASE_ADDR_STATE, AvcVdencPkt) 2468 { 2469 params.presBsdMpcRowStoreScratchBuffer = m_resMPCRowStoreScratchBuffer; 2470 2471 return MOS_STATUS_SUCCESS; 2472 } 2473 MHW_SETPAR_DECL_SRC(MFX_AVC_IMG_STATE,AvcVdencPkt)2474 MHW_SETPAR_DECL_SRC(MFX_AVC_IMG_STATE, AvcVdencPkt) 2475 { 2476 auto brcFeature = dynamic_cast<AvcEncodeBRC*>(m_featureManager->GetFeature(AvcFeatureIDs::avcBrcFeature)); 2477 ENCODE_CHK_NULL_RETURN(brcFeature); 2478 2479 bool bIPCMPass = m_pipeline->GetCurrentPass() && m_pipeline->IsLastPass() && (!brcFeature->IsVdencBrcEnabled()); 2480 params.mbstatenabled = bIPCMPass ? true : false; // Disable for the first pass 2481 2482 return MOS_STATUS_SUCCESS; 2483 } 2484 MHW_SETPAR_DECL_SRC(MI_CONDITIONAL_BATCH_BUFFER_END,AvcVdencPkt)2485 MHW_SETPAR_DECL_SRC(MI_CONDITIONAL_BATCH_BUFFER_END, AvcVdencPkt) 2486 { 2487 params.presSemaphoreBuffer = m_pResource; 2488 2489 return MOS_STATUS_SUCCESS; 2490 } 2491 MHW_SETPAR_DECL_SRC(MI_STORE_REGISTER_MEM,AvcVdencPkt)2492 MHW_SETPAR_DECL_SRC(MI_STORE_REGISTER_MEM, AvcVdencPkt) 2493 { 2494 params.presStoreBuffer = m_pResource; 2495 params.dwOffset = m_dwOffset; 2496 params.dwRegister = m_dwValue; 2497 2498 return MOS_STATUS_SUCCESS; 2499 } 2500 MHW_SETPAR_DECL_SRC(MI_STORE_DATA_IMM,AvcVdencPkt)2501 MHW_SETPAR_DECL_SRC(MI_STORE_DATA_IMM, AvcVdencPkt) 2502 { 2503 params.pOsResource = m_pResource; 2504 params.dwResourceOffset = m_dwOffset; 2505 params.dwValue = m_dwValue; 2506 2507 return MOS_STATUS_SUCCESS; 2508 } 2509 2510 #if USE_CODECHAL_DEBUG_TOOL DumpResources(EncodeStatusMfx * encodeStatusMfx,EncodeStatusReportData * statusReportData)2511 MOS_STATUS AvcVdencPkt::DumpResources( 2512 EncodeStatusMfx * encodeStatusMfx, 2513 EncodeStatusReportData *statusReportData) 2514 { 2515 ENCODE_FUNC_CALL(); 2516 ENCODE_CHK_NULL_RETURN(encodeStatusMfx); 2517 ENCODE_CHK_NULL_RETURN(statusReportData); 2518 ENCODE_CHK_NULL_RETURN(m_pipeline); 2519 ENCODE_CHK_NULL_RETURN(m_statusReport); 2520 ENCODE_CHK_NULL_RETURN(m_basicFeature); 2521 ENCODE_CHK_NULL_RETURN(m_basicFeature->m_trackedBuf); 2522 2523 CodechalDebugInterface *debugInterface = m_pipeline->GetStatusReportDebugInterface(); 2524 ENCODE_CHK_NULL_RETURN(debugInterface); 2525 2526 CODEC_REF_LIST currRefList = *((CODEC_REF_LIST *)statusReportData->currRefList); 2527 currRefList.RefPic = statusReportData->currOriginalPic; 2528 2529 debugInterface->m_currPic = statusReportData->currOriginalPic; 2530 debugInterface->m_bufferDumpFrameNum = m_statusReport->GetReportedCount() + 1; // ToDo: for debug purpose 2531 debugInterface->m_frameType = encodeStatusMfx->pictureCodingType; 2532 2533 auto settings = static_cast<AvcVdencFeatureSettings *>(m_legacyFeatureManager->GetFeatureSettings()->GetConstSettings()); 2534 ENCODE_CHK_NULL_RETURN(settings); 2535 auto brcSettings = settings->brcSettings; 2536 2537 ENCODE_CHK_STATUS_RETURN(debugInterface->DumpBuffer( 2538 &currRefList.resBitstreamBuffer, 2539 CodechalDbgAttr::attrBitstream, 2540 "_PAK", 2541 statusReportData->bitstreamSize, 2542 0, 2543 CODECHAL_NUM_MEDIA_STATES)); 2544 2545 ENCODE_CHK_STATUS_RETURN(debugInterface->DumpData( 2546 statusReportData, 2547 sizeof(EncodeStatusReportData), 2548 CodechalDbgAttr::attrStatusReport, 2549 "EncodeStatusReport_Buffer")); 2550 2551 // BRC non-native ROI dump as HuC_region8[in], HuC_region9[in] and HuC_region10[out] 2552 auto brcFeature = dynamic_cast<AvcEncodeBRC*>(m_featureManager->GetFeature(AvcFeatureIDs::avcBrcFeature)); 2553 auto streamInFeature = dynamic_cast<AvcVdencStreamInFeature*>(m_featureManager->GetFeature(AvcFeatureIDs::avcVdencStreamInFeature)); 2554 bool isVdencBrcEnabled = brcFeature && brcFeature->IsVdencBrcEnabled(); 2555 if (streamInFeature && (!isVdencBrcEnabled || m_basicFeature->m_picParam->bNativeROI)) 2556 { 2557 ENCODE_CHK_STATUS_RETURN(streamInFeature->Dump(debugInterface, m_basicFeature->m_mbQpDataEnabled ? "_MBQP" : "_ROI")); 2558 } 2559 2560 MOS_SURFACE *ds4xSurface = m_basicFeature->m_trackedBuf->GetSurface( 2561 BufferType::ds4xSurface, currRefList.ucScalingIdx); 2562 if (ds4xSurface != nullptr) 2563 { 2564 ENCODE_CHK_STATUS_RETURN(debugInterface->DumpYUVSurface( 2565 ds4xSurface, 2566 CodechalDbgAttr::attrReconstructedSurface, 2567 "4XScaling")) 2568 } 2569 2570 if (MEDIA_IS_SKU(m_hwInterface->GetSkuTable(), FtrFlatPhysCCS)) 2571 { 2572 ENCODE_CHK_STATUS_RETURN(debugInterface->DumpBltOutput( 2573 &currRefList.sRefReconBuffer, 2574 CodechalDbgAttr::attrDecodeBltOutput)); 2575 } 2576 ENCODE_CHK_STATUS_RETURN(debugInterface->DumpYUVSurface( 2577 &currRefList.sRefReconBuffer, 2578 CodechalDbgAttr::attrReconstructedSurface, 2579 "ReconSurf")) 2580 2581 if (MEDIA_IS_SKU(m_hwInterface->GetSkuTable(), FtrFlatPhysCCS)) 2582 { 2583 ENCODE_CHK_STATUS_RETURN(debugInterface->DumpBltOutput( 2584 &currRefList.sRefRawBuffer, 2585 CodechalDbgAttr::attrDecodeBltOutput)); 2586 } 2587 ENCODE_CHK_STATUS_RETURN(debugInterface->DumpYUVSurface( 2588 &currRefList.sRefRawBuffer, 2589 CodechalDbgAttr::attrEncodeRawInputSurface, 2590 "SrcSurf")) 2591 2592 if (currRefList.bUsedAsRef) { 2593 auto curColBuf= (currRefList.bIsIntra) ? m_basicFeature->m_colocatedMVBufferForIFrames : m_basicFeature->m_trackedBuf->GetBuffer(BufferType::mvTemporalBuffer, currRefList.ucScalingIdx); 2594 ENCODE_CHK_STATUS_RETURN(debugInterface->DumpBuffer( 2595 curColBuf, 2596 CodechalDbgAttr::attrMvData, 2597 "_CoLocated_Out", 2598 m_basicFeature->m_colocatedMVBufferSize)); 2599 } 2600 2601 // Slice size Buffer dump 2602 ENCODE_CHK_STATUS_RETURN(debugInterface->DumpBuffer( 2603 m_basicFeature->m_recycleBuf->GetBuffer(PakSliceSizeStreamOutBuffer, m_statusReport->GetReportedCount()), 2604 CodechalDbgAttr::attrSliceSizeStreamout, 2605 "_SliceSizeStreamOut", 2606 CODECHAL_ENCODE_SLICESIZE_BUF_SIZE)); 2607 2608 // here add the dump buffer for PAK statistics 2609 ENCODE_CHK_STATUS_RETURN(debugInterface->DumpBuffer( 2610 m_basicFeature->m_recycleBuf->GetBuffer(BrcPakStatisticBufferFull, m_statusReport->GetReportedCount()), 2611 CodechalDbgAttr::attrPakOutput, 2612 "MB and FrameLevel PAK staistics vdenc", 2613 brcSettings.vdencBrcPakStatsBufferSize + m_basicFeature->m_picWidthInMb * m_basicFeature->m_picHeightInMb * 64)); 2614 2615 return MOS_STATUS_SUCCESS; 2616 } 2617 SearchNALHeader(PMHW_VDBOX_AVC_SLICE_STATE sliceState,uint32_t startCode)2618 bool SearchNALHeader( 2619 PMHW_VDBOX_AVC_SLICE_STATE sliceState, 2620 uint32_t startCode) 2621 { 2622 ENCODE_FUNC_CALL(); 2623 2624 for (auto i = 0; i < CODECHAL_ENCODE_AVC_MAX_NAL_TYPE; i++) 2625 { 2626 if (sliceState->ppNalUnitParams[i]->uiSize > 0) 2627 { 2628 uint32_t offset = 0; 2629 uint8_t *dataBase = (uint8_t *)(sliceState->pBsBuffer->pBase + sliceState->ppNalUnitParams[i]->uiOffset); 2630 2631 while (offset < sliceState->ppNalUnitParams[i]->uiSize - 3) 2632 { 2633 uint8_t *dataBuf = dataBase + offset; 2634 2635 if (dataBuf[0] == 0 && dataBuf[1] == 0 && dataBuf[2] == 1 && (dataBuf[3] + 0x100) == startCode) 2636 return true; 2637 2638 offset++; 2639 } 2640 } 2641 } 2642 2643 return false; 2644 } 2645 DumpEncodeImgStats(PMOS_COMMAND_BUFFER cmdbuffer)2646 MOS_STATUS AvcVdencPkt::DumpEncodeImgStats( 2647 PMOS_COMMAND_BUFFER cmdbuffer) 2648 { 2649 ENCODE_FUNC_CALL(); 2650 2651 CodechalDebugInterface *debugInterface = m_pipeline->GetDebugInterface(); 2652 ENCODE_CHK_NULL_RETURN(debugInterface); 2653 2654 if (!debugInterface->DumpIsEnabled(CodechalDbgAttr::attrImageState)) 2655 { 2656 return MOS_STATUS_SUCCESS; 2657 } 2658 2659 std::string SurfName = "Pak_VDEnc_Pass[" + std::to_string(static_cast<uint32_t>(m_pipeline->GetCurrentPass())) + "]"; 2660 2661 auto brcFeature = dynamic_cast<AvcEncodeBRC *>(m_featureManager->GetFeature(AvcFeatureIDs::avcBrcFeature)); 2662 ENCODE_CHK_NULL_RETURN(brcFeature); 2663 2664 // MFX_AVC_IMG_STATE 2665 if (!brcFeature->IsVdencBrcEnabled()) 2666 { 2667 ENCODE_CHK_STATUS_RETURN(debugInterface->DumpBuffer( 2668 &m_batchBufferForVdencImgStat[m_pipeline->m_currRecycledBufIdx].OsResource, 2669 CodechalDbgAttr::attrImageState, 2670 SurfName.c_str(), 2671 m_hwInterface->m_vdencBrcImgStateBufferSize, 2672 0, 2673 CODECHAL_NUM_MEDIA_STATES)); 2674 } 2675 return MOS_STATUS_SUCCESS; 2676 } 2677 2678 #endif 2679 2680 } 2681