1 /* 2 * Copyright (c) 2020-2022, Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included 12 * in all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 */ 22 #ifndef __VP_RENDER_KERNEL_OBJ_H__ 23 #define __VP_RENDER_KERNEL_OBJ_H__ 24 25 #include "vp_pipeline_common.h" 26 #include "sw_filter.h" 27 #include "media_render_cmd_packet.h" 28 #include "vp_platform_interface.h" 29 #include <vector> 30 #include <map> 31 #include <set> 32 33 class RenderCmdPacket; 34 35 namespace vp { 36 37 //! 38 //! \brief Secure Block Copy kernel inline data size 39 //! 40 #define SECURE_BLOCK_COPY_KERNEL_INLINE_SIZE (1 * sizeof(uint32_t)) 41 //! 42 //! \brief Secure Block Copy kernel width 43 //! 44 #define SECURE_BLOCK_COPY_KERNEL_SURF_WIDTH 64 45 46 //! 47 //! \brief Secure Block Copy kernel block height 48 //! 49 #define SECURE_BLOCK_COPY_KERNEL_BLOCK_HEIGHT 24 50 51 #define KERNEL_BINARY_PADDING_SIZE CM_KERNEL_BINARY_PADDING_SIZE 52 53 typedef struct _KERNEL_SURFACE_STATE_PARAM 54 { 55 struct { 56 bool updatedSurfaceParams; // true if update format/width/height/pitch to renderSurface.OsSurface. 57 MOS_FORMAT format; // MOS_FORMAT for processing surfaces 58 uint32_t width; 59 uint32_t height; 60 uint32_t pitch; 61 62 uint32_t surface_offset; // Offset to the origin of the surface, in bytes. 63 MOS_TILE_TYPE tileType; 64 bool bufferResource; 65 bool bindedKernel; // true if bind index is hardcoded by bindIndex. 66 bool updatedRenderSurfaces; // true if renderSurfaceParams be used. 67 RENDERHAL_SURFACE_STATE_PARAMS renderSurfaceParams; // default can be skip. for future usages, if surface configed by kernel, use it directlly 68 } surfaceOverwriteParams; 69 70 bool isOutput; // true for render target 71 PRENDERHAL_SURFACE_STATE_ENTRY *surfaceEntries; 72 uint32_t *sizeOfSurfaceEntries; 73 uint32_t iCapcityOfSurfaceEntry = 0; 74 bool isBindlessSurface = false; 75 } KERNEL_SURFACE_STATE_PARAM; 76 77 typedef struct _KERNEL_TUNING_PARAMS 78 { 79 uint32_t euThreadSchedulingMode; 80 } KERNEL_TUNING_PARAMS, *PKERNEL_TUNING_PARAMS; 81 82 using KERNEL_CONFIGS = std::map<VpKernelID, void *>; // Only for legacy/non-cm kernels 83 using KERNEL_ARGS = std::vector<KRN_ARG>; 84 using KERNEL_SAMPLER_STATE_GROUP = std::map<SamplerIndex, MHW_SAMPLER_STATE_PARAM>; 85 using KERNEL_SAMPLER_STATES = std::vector<MHW_SAMPLER_STATE_PARAM>; 86 using KERNEL_SAMPLER_INDEX = std::vector<SamplerIndex>; 87 using KERNEL_SURFACE_CONFIG = std::map<SurfaceType, KERNEL_SURFACE_STATE_PARAM>; 88 using KERNEL_SURFACE_BINDING_INDEX = std::map<SurfaceType, std::set<uint32_t>>; 89 using KERNEL_STATELESS_BUFF_CONFIG = std::map<SurfaceType, uint64_t>; 90 using KERNEL_BINDELESS_SURFACE = std::map<SurfaceType, std::set<uint32_t>>; 91 using KERNEL_BINDELESS_SAMPLER = std::map<uint32_t, uint32_t>; 92 93 typedef struct _KERNEL_PARAMS 94 { 95 VpKernelID kernelId; 96 KERNEL_ARGS kernelArgs; 97 KERNEL_THREAD_SPACE kernelThreadSpace; 98 bool syncFlag; 99 bool flushL1; 100 KERNEL_TUNING_PARAMS kernelTuningParams; 101 KERNEL_ARG_INDEX_SURFACE_MAP kernelStatefulSurfaces; 102 } KERNEL_PARAMS; 103 104 struct MEDIA_OBJECT_KA2_INLINE_DATA 105 { 106 // DWORD 0 - GRF R7.0 107 union 108 { 109 // All 110 struct 111 { 112 uint32_t DestinationBlockHorizontalOrigin : 16; 113 uint32_t DestinationBlockVerticalOrigin : 16; 114 }; 115 116 // Secure Block Copy 117 struct 118 { 119 uint32_t BlockHeight : 16; 120 uint32_t BufferOffset : 16; 121 }; 122 123 // FMD Summation 124 struct 125 { 126 uint32_t StartRowOffset; 127 }; 128 129 uint32_t Value; 130 } DW00; 131 132 // DWORD 1 - GRF R7.1 133 union 134 { 135 // Composite 136 struct 137 { 138 uint32_t HorizontalBlockCompositeMaskLayer0 : 16; 139 uint32_t VerticalBlockCompositeMaskLayer0 : 16; 140 }; 141 142 // FMD Summation 143 struct 144 { 145 uint32_t TotalRows; 146 }; 147 148 uint32_t Value; 149 } DW01; 150 151 // DWORD 2 - GRF R7.2 152 union 153 { 154 // Composite 155 struct 156 { 157 uint32_t HorizontalBlockCompositeMaskLayer1 : 16; 158 uint32_t VerticalBlockCompositeMaskLayer1 : 16; 159 }; 160 161 // FMD Summation 162 struct 163 { 164 uint32_t StartColumnOffset; 165 }; 166 167 uint32_t Value; 168 } DW02; 169 170 // DWORD 3 - GRF R7.3 171 union 172 { 173 // Composite 174 struct 175 { 176 uint32_t HorizontalBlockCompositeMaskLayer2 : 16; 177 uint32_t VerticalBlockCompositeMaskLayer2 : 16; 178 }; 179 180 // FMD Summation 181 struct 182 { 183 uint32_t TotalColumns; 184 }; 185 186 uint32_t Value; 187 } DW03; 188 189 // DWORD 4 - GRF R7.4 190 union 191 { 192 // Sampler Load 193 struct 194 { 195 float VideoXScalingStep; 196 }; 197 198 uint32_t Value; 199 } DW04; 200 201 // DWORD 5 - GRF R7.5 202 union 203 { 204 // NLAS 205 struct 206 { 207 float VideoStepDelta; 208 }; 209 210 uint32_t Value; 211 } DW05; 212 213 // DWORD 6 - GRF R7.6 214 union 215 { 216 // AVScaling 217 struct 218 { 219 uint32_t VerticalBlockNumber : 17; 220 uint32_t AreaOfInterest : 1; 221 uint32_t : 14; 222 }; 223 224 uint32_t Value; 225 } DW06; 226 227 // DWORD 7 - GRF R7.7 228 union 229 { 230 // AVScaling 231 struct 232 { 233 uint32_t GroupIDNumber; 234 }; 235 236 uint32_t Value; 237 } DW07; 238 239 // DWORD 8 - GRF R8.0 240 union 241 { 242 // Composite 243 struct 244 { 245 uint32_t HorizontalBlockCompositeMaskLayer3 : 16; 246 uint32_t VerticalBlockCompositeMaskLayer3 : 16; 247 }; 248 249 uint32_t Value; 250 } DW08; 251 252 // DWORD 9 - GRF R8.1 253 union 254 { 255 // Composite 256 struct 257 { 258 uint32_t HorizontalBlockCompositeMaskLayer4 : 16; 259 uint32_t VerticalBlockCompositeMaskLayer4 : 16; 260 }; 261 262 uint32_t Value; 263 } DW09; 264 265 // DWORD 10 - GRF R8.2 266 union 267 { 268 // Composite 269 struct 270 { 271 uint32_t HorizontalBlockCompositeMaskLayer5 : 16; 272 uint32_t VerticalBlockCompositeMaskLayer5 : 16; 273 }; 274 275 uint32_t Value; 276 } DW10; 277 278 // DWORD 11 - GRF R8.3 279 union 280 { 281 // Composite 282 struct 283 { 284 uint32_t HorizontalBlockCompositeMaskLayer6 : 16; 285 uint32_t VerticalBlockCompositeMaskLayer6 : 16; 286 }; 287 288 uint32_t Value; 289 } DW11; 290 291 // DWORD 12 - GRF R8.4 292 union 293 { 294 // Composite 295 struct 296 { 297 uint32_t HorizontalBlockCompositeMaskLayer7 : 16; 298 uint32_t VerticalBlockCompositeMaskLayer7 : 16; 299 }; 300 301 uint32_t Value; 302 } DW12; 303 304 // DWORD 13 - GRF R8.5 305 union 306 { 307 struct 308 { 309 uint32_t Reserved; 310 }; 311 312 uint32_t Value; 313 } DW13; 314 315 // DWORD 14 - GRF R8.6 316 union 317 { 318 struct 319 { 320 uint32_t Reserved; 321 }; 322 323 uint32_t Value; 324 } DW14; 325 326 // DWORD 15 - GRF R8.7 327 union 328 { 329 struct 330 { 331 uint32_t Reserved; 332 }; 333 334 uint32_t Value; 335 } DW15; 336 }; 337 338 class VpRenderKernelObj 339 { 340 public: 341 VpRenderKernelObj(PVP_MHWINTERFACE hwInterface, PVpAllocator allocator); 342 VpRenderKernelObj(PVP_MHWINTERFACE hwInterface, VpKernelID kernelID, uint32_t kernelIndex, std::string kernelName = "", PVpAllocator allocator = nullptr); 343 virtual ~VpRenderKernelObj(); 344 345 // For Adv kernel 346 // Kernel Specific, which will inplenment be each kernel 347 // GetCurbeState should be called after UpdateCurbeBindingIndex for all processed surfaces being called 348 virtual MOS_STATUS Init(VpRenderKernel& kernel); 349 GetCurbeState(void * & curbe,uint32_t & curbeLength,uint32_t & curbeLengthAligned,RENDERHAL_KERNEL_PARAM kernelParam,uint32_t dwBlockAlign)350 MOS_STATUS GetCurbeState(void *&curbe, uint32_t &curbeLength, uint32_t &curbeLengthAligned, RENDERHAL_KERNEL_PARAM kernelParam, uint32_t dwBlockAlign) 351 { 352 VP_PUBLIC_CHK_STATUS_RETURN(GetCurbeState(curbe, curbeLength)); 353 VP_PUBLIC_CHK_STATUS_RETURN(GetAlignedLength(curbeLength, curbeLengthAligned, kernelParam, dwBlockAlign)); 354 return MOS_STATUS_SUCCESS; 355 } 356 357 virtual uint32_t GetInlineDataSize() = 0; 358 359 virtual uint32_t GetKernelIndex(); 360 GetKernelId()361 VpKernelID GetKernelId() 362 { 363 return m_kernelId; 364 } 365 GetKernelType()366 DelayLoadedKernelType GetKernelType() 367 { 368 return m_kernelType; 369 } 370 IsKernelCached()371 virtual bool IsKernelCached() 372 { 373 return false; 374 } 375 GetCachedEntryForKernelLoad()376 virtual Kdll_CacheEntry *GetCachedEntryForKernelLoad() 377 { 378 return nullptr; 379 } 380 381 virtual MOS_STATUS GetWalkerSetting(KERNEL_WALKER_PARAMS& walkerParam, KERNEL_PACKET_RENDER_DATA &renderData); 382 383 virtual MOS_STATUS SetKernelConfigs( 384 KERNEL_PARAMS& kernelParams, 385 VP_SURFACE_GROUP& surfaces, 386 KERNEL_SAMPLER_STATE_GROUP& samplerStateGroup, 387 KERNEL_CONFIGS& kernelConfigs, 388 VP_PACKET_SHARED_CONTEXT* sharedContext); 389 GetScoreboardParams(PMHW_VFE_SCOREBOARD & scoreboardParams)390 virtual MOS_STATUS GetScoreboardParams(PMHW_VFE_SCOREBOARD &scoreboardParams) 391 { 392 scoreboardParams = nullptr; 393 return MOS_STATUS_SUCCESS; 394 } 395 DumpSurfaces()396 virtual void DumpSurfaces() 397 { 398 return; 399 } 400 401 virtual void DumpSurface(VP_SURFACE *pSurface,PCCHAR fileName); 402 403 // Kernel Common configs GetKernelSettings(RENDERHAL_KERNEL_PARAM & settsings)404 virtual MOS_STATUS GetKernelSettings(RENDERHAL_KERNEL_PARAM &settsings) 405 { 406 if (IsAdvKernel()) 407 { 408 // For adv kernel, no need for kernel param. 409 return MOS_STATUS_SUCCESS; 410 } 411 if (m_hwInterface && m_hwInterface->m_vpPlatformInterface) 412 { 413 VP_PUBLIC_CHK_STATUS_RETURN(m_hwInterface->m_vpPlatformInterface->GetKernelParam(m_kernelId, settsings)); 414 return MOS_STATUS_SUCCESS; 415 } 416 else 417 { 418 return MOS_STATUS_INVALID_HANDLE; 419 } 420 return MOS_STATUS_SUCCESS; 421 } 422 423 virtual MOS_STATUS GetKernelEntry(Kdll_CacheEntry &entry); 424 FreeCurbe(void * & curbe)425 virtual MOS_STATUS FreeCurbe(void*& curbe) 426 { 427 VP_FUNC_CALL(); 428 429 MOS_SafeFreeMemory(curbe); 430 return MOS_STATUS_SUCCESS; 431 } 432 433 virtual uint32_t GetKernelBinaryID(); 434 GetKernelBinary()435 void* GetKernelBinary() 436 { 437 return m_kernelBinary; 438 } 439 GetKernelSurfaceConfig()440 KERNEL_SURFACE_CONFIG& GetKernelSurfaceConfig() 441 { 442 return m_surfaceState; 443 } 444 GetKernelName()445 std::string& GetKernelName() 446 { 447 return m_kernelName; 448 } 449 UpdateCurbeBindingIndex(SurfaceType surface,uint32_t index)450 MOS_STATUS UpdateCurbeBindingIndex(SurfaceType surface, uint32_t index) 451 { 452 // Surface Type is specified during one submission 453 auto it = m_surfaceBindingIndex.find(surface); 454 if (it != m_surfaceBindingIndex.end()) 455 { 456 it->second.insert(index); 457 } 458 else 459 { 460 std::set<uint32_t> bindingMap; 461 bindingMap.insert(index); 462 m_surfaceBindingIndex.insert(std::make_pair(surface, bindingMap)); 463 } 464 465 return MOS_STATUS_SUCCESS; 466 } 467 GetSurfaceBindingIndex(SurfaceType surface)468 std::set<uint32_t>& GetSurfaceBindingIndex(SurfaceType surface) 469 { 470 auto it = m_surfaceBindingIndex.find(surface); 471 472 if (it == m_surfaceBindingIndex.end()) 473 { 474 VP_RENDER_ASSERTMESSAGE("No surface index created for current surface"); 475 std::set<uint32_t> bindingMap; 476 it = m_surfaceBindingIndex.insert(std::make_pair(surface, bindingMap)).first; 477 } 478 return it->second; 479 } 480 481 MOS_STATUS InitKernel(void* binary, uint32_t size, KERNEL_CONFIGS& kernelConfigs, 482 VP_SURFACE_GROUP& surfacesGroup, VP_RENDER_CACHE_CNTL& surfMemCacheCtl); 483 IsAdvKernel()484 bool IsAdvKernel() 485 { 486 return m_isAdvKernel; 487 } 488 UseIndependentSamplerGroup()489 bool UseIndependentSamplerGroup() 490 { 491 return m_useIndependentSamplerGroup; 492 } 493 494 virtual MOS_STATUS SetSamplerStates(KERNEL_SAMPLER_STATE_GROUP& samplerStateGroup); 495 UpdateCompParams()496 virtual MOS_STATUS UpdateCompParams() 497 { 498 return MOS_STATUS_SUCCESS; 499 } 500 SetCacheCntl(PVP_RENDER_CACHE_CNTL)501 virtual MOS_STATUS SetCacheCntl(PVP_RENDER_CACHE_CNTL) 502 { 503 return MOS_STATUS_SUCCESS; 504 } 505 SetPerfTag()506 virtual MOS_STATUS SetPerfTag() 507 { 508 return MOS_STATUS_SUCCESS; 509 } 510 InitRenderHalSurface(SurfaceType type,VP_SURFACE * surf,PRENDERHAL_SURFACE renderHalSurface)511 virtual MOS_STATUS InitRenderHalSurface( 512 SurfaceType type, 513 VP_SURFACE *surf, 514 PRENDERHAL_SURFACE renderHalSurface) 515 { 516 return MOS_STATUS_UNIMPLEMENTED; 517 } 518 519 virtual void OcaDumpKernelInfo(MOS_COMMAND_BUFFER &cmdBuffer, MOS_CONTEXT &mosContext); 520 GetEuThreadSchedulingMode()521 virtual uint32_t GetEuThreadSchedulingMode() 522 { 523 // hw default mode 524 return 0; 525 } 526 527 virtual MOS_STATUS InitRenderHalSurfaceCMF(MOS_SURFACE* src, PRENDERHAL_SURFACE renderHalSurface); 528 529 virtual MOS_STATUS SetInlineDataParameter(KRN_ARG args, RENDERHAL_INTERFACE *renderhal); 530 UpdateBindlessSurfaceResource(SurfaceType surf,std::set<uint32_t> surfStateOffset)531 virtual MOS_STATUS UpdateBindlessSurfaceResource(SurfaceType surf, std::set<uint32_t> surfStateOffset) 532 { 533 if (surf != SurfaceTypeInvalid) 534 { 535 m_bindlessSurfaceArray.insert(std::make_pair(surf, surfStateOffset)); 536 } 537 538 return MOS_STATUS_SUCCESS; 539 } 540 GetBindlessSamplers()541 virtual std::map<uint32_t, uint32_t>& GetBindlessSamplers() 542 { 543 return m_bindlessSamperArray; 544 } 545 InitBindlessResources()546 virtual MOS_STATUS InitBindlessResources() 547 { 548 m_bindlessSurfaceArray.clear(); 549 m_bindlessSamperArray.clear(); 550 return MOS_STATUS_SUCCESS; 551 } 552 553 protected: 554 555 virtual MOS_STATUS SetWalkerSetting(KERNEL_THREAD_SPACE &threadSpace, bool bSyncFlag, bool flushL1 = false); 556 557 virtual MOS_STATUS SetKernelArgs(KERNEL_ARGS &kernelArgs, VP_PACKET_SHARED_CONTEXT *sharedContext); 558 559 virtual MOS_STATUS SetKernelStatefulSurfaces(KERNEL_ARG_INDEX_SURFACE_MAP &statefulSurfaces); 560 561 virtual MOS_STATUS SetupSurfaceState() = 0; 562 563 virtual MOS_STATUS SetKernelConfigs(KERNEL_CONFIGS& kernelConfigs); 564 565 virtual MOS_STATUS SetProcessSurfaceGroup(VP_SURFACE_GROUP &surfaces); 566 567 virtual MOS_STATUS CpPrepareResources(); 568 569 virtual MOS_STATUS SetupStatelessBuffer(); 570 571 virtual MOS_STATUS SetupStatelessBufferResource(SurfaceType surf); 572 573 virtual MOS_STATUS GetCurbeState(void *&curbe, uint32_t &curbeLength) = 0; 574 GetAlignedLength(uint32_t & curbeLength,uint32_t & curbeLengthAligned,RENDERHAL_KERNEL_PARAM kernelParam,uint32_t dwBlockAlign)575 virtual MOS_STATUS GetAlignedLength(uint32_t &curbeLength, uint32_t &curbeLengthAligned, RENDERHAL_KERNEL_PARAM kernelParam, uint32_t dwBlockAlign) 576 { 577 curbeLengthAligned = MOS_ALIGN_CEIL(curbeLength, dwBlockAlign); 578 return MOS_STATUS_SUCCESS; 579 } 580 581 virtual MOS_STATUS SetTuningFlag(PKERNEL_TUNING_PARAMS tuningParams); 582 583 protected: 584 585 VP_SURFACE_GROUP *m_surfaceGroup = nullptr; // input surface process surface groups 586 PVP_MHWINTERFACE m_hwInterface = nullptr; 587 KERNEL_SURFACE_CONFIG m_surfaceState; // surfaces processed pool where the surface state will generated here, if KERNEL_SURFACE_STATE_PARAM 588 KERNEL_SURFACE_BINDING_INDEX m_surfaceBindingIndex; // store the binding index for processed surface 589 PVpAllocator m_allocator = nullptr; 590 MediaUserSettingSharedPtr m_userSettingPtr = nullptr; // usersettingInstance 591 KERNEL_STATELESS_BUFF_CONFIG m_statelessArray; 592 KERNEL_BINDELESS_SURFACE m_bindlessSurfaceArray; 593 KERNEL_BINDELESS_SAMPLER m_bindlessSamperArray; 594 // kernel attribute 595 std::string m_kernelName = ""; 596 void * m_kernelBinary = nullptr; 597 uint32_t m_kernelBinaryID = 0; 598 uint32_t m_kernelSize = 0; 599 VpKernelID m_kernelId = kernelCombinedFc; 600 DelayLoadedKernelType m_kernelType = KernelNone; 601 KernelIndex m_kernelIndex = 0; // index of current kernel in KERNEL_PARAMS_LIST 602 603 PKERNEL_TUNING_PARAMS m_kernelTuningParams = nullptr; 604 605 bool m_isAdvKernel = false; // true mean multi kernel can be submitted in one workload. 606 bool m_useIndependentSamplerGroup = false; //true means multi kernels has their own stand alone sampler states group. only can be true when m_isAdvKernel is true. 607 608 std::shared_ptr<mhw::vebox::Itf> m_veboxItf = nullptr; 609 std ::vector<MHW_INLINE_DATA_PARAMS> m_inlineDataParams = {}; 610 611 MEDIA_CLASS_DEFINE_END(vp__VpRenderKernelObj) 612 }; 613 } 614 #endif // __VP_RENDER_KERNEL_OBJ_H__ 615