1 /* 2 * Copyright (c) 2017, Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included 12 * in all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 */ 22 //! 23 //! \file cm_kernel_rt.h 24 //! \brief Contains CmKernelRT declarations. 25 //! 26 27 #ifndef MEDIADRIVER_AGNOSTIC_COMMON_CM_CMKERNELRT_H_ 28 #define MEDIADRIVER_AGNOSTIC_COMMON_CM_CMKERNELRT_H_ 29 30 #include "cm_kernel.h" 31 #include "cm_hal.h" 32 #include "cm_log.h" 33 34 enum SURFACE_KIND 35 { 36 DATA_PORT_SURF, 37 SAMPLER_SURF, 38 }; 39 40 struct SURFACE_ARRAY_ARG 41 { 42 uint16_t argKindForArray; // record each arg kind in array, used for surface array 43 uint32_t addressModeForArray; // record each arg address control mode for media sampler in surface array 44 }; 45 46 struct CM_ARG 47 { 48 uint16_t unitKind; // value is of type CM_ARG_KIND 49 uint16_t unitKindOrig; // used to restore unitKind when reset 50 51 uint16_t index; 52 SURFACE_KIND surfaceKind; 53 54 uint32_t unitCount; // 1 for for per kernel arg ; thread # for per thread arg 55 56 uint16_t unitSize; // size of arg in byte 57 uint16_t unitSizeOrig; // used to restore unitSize when reset 58 59 uint16_t unitOffsetInPayload; // offset relative to R0 in payload 60 uint16_t unitOffsetInPayloadOrig; // used to restore unitOffsetInPayload in adding move instruction for CURBE 61 bool isDirty; // used to indicate if its value be changed 62 bool isSet; // used to indicate if this argument is set correctly 63 uint32_t nCustomValue; // CM defined value for special argument kind 64 65 uint32_t aliasIndex; // CmSurface2D alias index 66 bool aliasCreated; // whether or not alias was created for this argument 67 68 bool isNull; // used to indicate if this is a null surface 69 70 uint32_t unitVmeArraySize; // number of Vme surfaces in surface array 71 72 // pointer to the arg values. the size is unitCount * unitSize 73 union 74 { 75 uint8_t *value; 76 int32_t *intValue; 77 uint32_t *uintValue; 78 float *floatValue; 79 }; 80 81 uint16_t *surfIndex; 82 SURFACE_ARRAY_ARG *surfArrayArg; // record each arg kind and address control mode for media sampler in surface array 83 bool isStatelessBuffer; CM_ARGCM_ARG84 CM_ARG() 85 { 86 unitKind = 0; 87 unitCount = 0; 88 unitSize = 0; 89 unitOffsetInPayload = 0; 90 value = nullptr; 91 isDirty = false; 92 isNull = false; 93 unitVmeArraySize = 0; 94 surfIndex = nullptr; 95 aliasIndex = 0; 96 unitOffsetInPayloadOrig = 0; 97 isSet = false; 98 index = 0; 99 unitKindOrig = 0; 100 nCustomValue = 0; 101 surfaceKind = DATA_PORT_SURF; 102 unitSizeOrig = 0; 103 surfArrayArg = nullptr; 104 aliasCreated = false; 105 isStatelessBuffer = false; 106 } 107 }; 108 109 enum CM_KERNEL_INTERNAL_ARG_TYPE 110 { 111 CM_KERNEL_INTERNEL_ARG_PERKERNEL = 0, 112 CM_KERNEL_INTERNEL_ARG_PERTHREAD = 1 113 }; 114 115 struct CM_KERNEL_INFO; 116 class CmExecutionAdv; 117 118 namespace CMRT_UMD 119 { 120 class CmDeviceRT; 121 class CmKernelData; 122 class CmThreadSpaceRT; 123 class CmSurfaceVme; 124 class CmSurface; 125 class CmSurfaceManager; 126 class CmProgramRT; 127 class CmDynamicArray; 128 129 class CmMovInstConstructor 130 { 131 public: CmMovInstConstructor()132 CmMovInstConstructor() {} ~CmMovInstConstructor()133 virtual ~CmMovInstConstructor() {} 134 SetInstDistanceConfig(uint32_t size,uint32_t renderGen)135 virtual CM_RETURN_CODE SetInstDistanceConfig(uint32_t size, uint32_t renderGen) 136 { 137 // not implemented in currect platforms 138 // reserved for future platforms 139 return CM_NOT_IMPLEMENTED; 140 } 141 142 virtual uint32_t ConstructObjMovs(uint32_t dstOffset, 143 uint32_t srcOffset, 144 uint32_t size, 145 CmDynamicArray &movInsts, 146 uint32_t index, 147 bool isBdw, 148 bool isHwDebug); 149 150 }; 151 152 //*----------------------------------------------------------------------------- 153 //! CM Kernel 154 //*----------------------------------------------------------------------------- 155 class CmKernelRT: public CmKernel 156 { 157 public: 158 static int32_t Create(CmDeviceRT *device, 159 CmProgramRT *program, 160 const char *kernelName, 161 uint32_t kernelIndex, 162 uint32_t kernelSeqNum, 163 CmKernelRT *&kernel, 164 const char *options); 165 166 static int32_t Destroy(CmKernelRT *&kernel, CmProgramRT *&program); 167 168 int32_t GetThreadCount(uint32_t &count); 169 170 CM_RT_API int32_t SetThreadCount(uint32_t count); 171 172 CM_RT_API int32_t SetKernelArg(uint32_t index, 173 size_t size, 174 const void *value); 175 176 CM_RT_API virtual int32_t SetKernelArgPointer(uint32_t index, 177 size_t size, 178 const void *value); 179 180 CM_RT_API int32_t SetThreadArg(uint32_t threadId, 181 uint32_t index, 182 size_t size, 183 const void *value); 184 185 CM_RT_API int32_t SetStaticBuffer(uint32_t index, const void *value); 186 187 CM_RT_API int32_t SetSurfaceBTI(SurfaceIndex *surface, uint32_t bti); 188 189 CM_RT_API int32_t AssociateThreadSpace(CmThreadSpace *&threadSpace); 190 191 CM_RT_API int32_t AssociateThreadGroupSpace(CmThreadGroupSpace *&threadGroupSpace); 192 193 CM_RT_API int32_t SetSamplerBTI(SamplerIndex *sampler, uint32_t nIndex); 194 195 CM_RT_API int32_t DeAssociateThreadSpace(CmThreadSpace *&threadSpace); 196 197 CM_RT_API int32_t DeAssociateThreadGroupSpace(CmThreadGroupSpace *&threadGroupSpace); 198 199 CM_RT_API int32_t QuerySpillSize(uint32_t &spillMemorySize); 200 201 CMRT_UMD_API int32_t GetBinary(std::vector<char> &binary); 202 203 CMRT_UMD_API int32_t ReplaceBinary(std::vector<char> &binary); 204 205 CMRT_UMD_API int32_t ResetBinary(); 206 207 int32_t GetArgs(CM_ARG *&arg); 208 209 int32_t GetArgCount(uint32_t &argCount); 210 211 int32_t GetCurbeEnable(bool &b); 212 213 int32_t SetCurbeEnable(bool b); 214 215 int32_t GetSizeInCurbe(uint32_t &size); 216 217 uint32_t GetAlignedCurbeSize(uint32_t value); 218 219 virtual int32_t GetCmDevice(CmDeviceRT *&); 220 221 virtual int32_t GetCmProgram(CmProgramRT *&); 222 223 int32_t GetSizeInPayload(uint32_t &size); 224 225 int32_t CreateKernelData(CmKernelData *&kernelData, 226 uint32_t &kernelDataSize, 227 const CmThreadSpaceRT *threadSpace); 228 229 int32_t CreateKernelData(CmKernelData *&kernelData, 230 uint32_t &kernelDataSize, 231 const CmThreadGroupSpace *threadGroupSpace); 232 233 virtual char *GetName(); 234 235 int32_t SetIndexInTask(uint32_t index); 236 237 uint32_t GetIndexInTask(); 238 239 int32_t SetAssociatedToTSFlag(bool b); 240 241 bool IsThreadArgExisted(); 242 243 virtual uint32_t GetKernelIndex(); 244 GetThreadSpace(CmThreadSpaceRT * & threadSpace)245 int32_t GetThreadSpace(CmThreadSpaceRT *&threadSpace) 246 { 247 threadSpace = m_threadSpace; 248 return CM_SUCCESS; 249 } 250 GetThreadGroupSpace(CmThreadGroupSpace * & threadGroupSpace)251 int32_t GetThreadGroupSpace(CmThreadGroupSpace *&threadGroupSpace) 252 { 253 threadGroupSpace = m_threadGroupSpace; 254 return CM_SUCCESS; 255 } 256 SetAdjustedYCoord(uint32_t value)257 int32_t SetAdjustedYCoord(uint32_t value) 258 { 259 m_adjustScoreboardY = value; 260 return CM_SUCCESS; 261 } 262 GetAdjustedYCoord()263 int32_t GetAdjustedYCoord() { return m_adjustScoreboardY; } 264 265 uint32_t GetSLMSize(); 266 267 uint32_t GetSpillMemUsed(); 268 269 int32_t Acquire(); 270 271 int32_t SafeRelease(); 272 273 int32_t CollectKernelSurface(); 274 275 int32_t GetKernelSurfaces(bool *&surfArray); 276 277 int32_t ResetKernelSurfaces(); 278 279 int32_t CalculateKernelSurfacesNum(uint32_t &kernelSurfaceNum, 280 uint32_t &neededBTEntryNum); 281 282 uint32_t GetKernelGenxBinarySize(); 283 284 int32_t ReleaseKernelData(CmKernelData *&kernelData); 285 286 int32_t AcquireKernelData(CmKernelData *&kernelData); 287 288 virtual int32_t CloneKernel(CmKernelRT*& kernelOut, uint32_t id); 289 290 void SetAsClonedKernel(uint32_t cloneKernelID); 291 292 bool GetCloneKernelID(uint32_t &cloneKernelID); 293 294 void SetHasClones(); 295 GetMaxSurfaceIndexAllocated()296 uint32_t GetMaxSurfaceIndexAllocated() 297 { return m_maxSurfaceIndexAllocated; } 298 299 int UpdateSamplerHeap(CmKernelData *kernelData); 300 301 #if CM_LOG_ON 302 std::string Log(); 303 304 CM_HAL_STATE* GetHalState(); 305 #endif 306 307 void SurfaceDump(uint32_t kernelNumber, int32_t taskId); 308 309 protected: 310 friend CmExecutionAdv; 311 CmKernelRT(CmDeviceRT *device, 312 CmProgramRT *program, 313 uint32_t kernelIndex, 314 uint32_t kernelSeqNum); 315 316 virtual ~CmKernelRT(); 317 318 int32_t SetArgsInternal(CM_KERNEL_INTERNAL_ARG_TYPE nArgType, 319 uint32_t index, 320 size_t size, 321 const void *value, 322 uint32_t nThreadID = 0); 323 324 virtual int32_t Initialize(const char *kernelName, const char *options); 325 326 int32_t DestroyArgs(); 327 328 int32_t Reset(); 329 330 int32_t IsKernelDataReusable(CmThreadSpaceRT *threadSpace); 331 332 int32_t CreateKernelArgDataGroup(uint8_t *&data, uint32_t value); 333 334 int32_t CreateMovInstructions(uint32_t &movInstNum, 335 uint8_t *&codeDst, 336 CM_ARG *tempArgs, 337 uint32_t numArgs); 338 339 int32_t CalcKernelDataSize(uint32_t movInstNum, 340 uint32_t numArgs, 341 uint32_t argSize, 342 uint32_t &totalKernelDataSize); 343 344 int32_t GetArgCountPlusSurfArray(uint32_t &argSize, uint32_t &argCountPlus); 345 346 int32_t CreateKernelDataInternal(CmKernelData *&kernelData, 347 uint32_t &kernelDataSize, 348 const CmThreadSpaceRT *threadSpace); 349 350 int32_t CreateKernelDataInternal(CmKernelData *&kernelData, 351 uint32_t &kernelDataSize, 352 const CmThreadGroupSpace *threadGroupSpace); 353 354 int32_t UpdateKernelData(CmKernelData *kernelData, 355 const CmThreadSpaceRT *threadSpace); 356 357 int32_t UpdateKernelData(CmKernelData *kernelData, 358 const CmThreadGroupSpace *threadGroupSpace); 359 360 int32_t CreateThreadArgData(PCM_HAL_KERNEL_ARG_PARAM kernelArg, 361 uint32_t threadArgIndex, 362 CmThreadSpaceRT *threadSpace, 363 CM_ARG *cmArgs); 364 365 int32_t UpdateLastKernelData(CmKernelData *&kernelData); 366 367 int32_t CreateKernelIndirectData( 368 PCM_HAL_INDIRECT_DATA_PARAM halIndirectData); 369 370 int32_t CreateThreadSpaceParam( 371 PCM_HAL_KERNEL_THREADSPACE_PARAM kernelThreadSpaceParam, 372 CmThreadSpaceRT *threadSpace); 373 374 int32_t CreateTempArgs(uint32_t numArgs, CM_ARG *&tempArgs); 375 376 int32_t SortThreadSpace(CmThreadSpaceRT *threadSpace); 377 378 int32_t CleanArgDirtyFlag(); 379 380 bool IsBatchBufferReusable(CmThreadSpaceRT *taskThreadSpace); 381 382 bool IsPrologueDirty(); 383 384 void DumpKernelData(CmKernelData *kernelData); 385 386 int32_t 387 UpdateKernelDataGlobalSurfaceInfo(PCM_HAL_KERNEL_PARAM halKernelParam); 388 389 CM_ARG_KIND SurfTypeToArgKind(CM_ENUM_CLASS_TYPE surfType); 390 391 int32_t AcquireKernelProgram(); 392 393 int32_t SetArgsVme(CM_KERNEL_INTERNAL_ARG_TYPE nArgType, 394 uint32_t argIndex, 395 const void *value, 396 uint32_t nThreadID); 397 398 int32_t SetArgsSingleVme(CmSurfaceVme *vmeSurface, 399 uint8_t *vmeArgValueArray, 400 uint16_t *cmSufacesArray); 401 402 int32_t GetVmeSurfaceIndex(uint32_t *vmeIndexArray, 403 uint32_t *vmeCmIndexArray, 404 uint32_t index, 405 uint32_t *outputValue); 406 407 CmSurface *GetSurfaceFromSurfaceArray(SurfaceIndex *value, 408 uint32_t indexSurfaceArray); 409 410 void ArgLog(std::ostringstream &oss, 411 uint32_t index, 412 CM_ARG arg); 413 414 int32_t CreateKernelImplicitArgDataGroup(uint8_t *&data, uint32_t size); 415 416 int32_t SearchAvailableIndirectSurfInfoTableEntry(uint16_t kind, 417 uint32_t surfaceIndex, 418 uint32_t bti); 419 420 int32_t SetSurfBTINumForIndirectData(CM_SURFACE_FORMAT format, 421 CM_ENUM_CLASS_TYPE surfaceType); 422 423 int32_t SetArgsInternalSurfArray(int32_t offset, 424 uint32_t kernelArgIndex, 425 int32_t surfCount, 426 CmSurface *currentSurface, 427 uint32_t currentSurfIndex, 428 SurfaceIndex *value, 429 uint32_t surfValue[], 430 uint16_t origSurfIndex[]); 431 432 #if USE_EXTENSION_CODE 433 int InitForGTPin(CmDeviceRT *device, 434 CmProgramRT *program, 435 CmKernelRT *kernel); 436 437 int32_t 438 UpdateKernelDataGTPinSurfaceInfo(PCM_HAL_KERNEL_PARAM halKernelParam); 439 #endif 440 441 CmDeviceRT *m_device; 442 CmSurfaceManager *m_surfaceMgr; 443 CmProgramRT *m_program; 444 char *m_options; 445 char *m_binary; 446 char *m_binaryOrig; 447 uint32_t m_binarySize; 448 uint32_t m_binarySizeOrig; 449 450 uint32_t m_threadCount; 451 uint32_t m_lastThreadCount; 452 uint32_t m_sizeInCurbe; //data size in CURBE 453 uint32_t m_sizeInPayload; //data size of inline data in media object or media walker commands 454 uint32_t m_argCount; 455 456 CM_ARG *m_args; 457 SurfaceIndex *m_globalSurfaces[CM_GLOBAL_SURFACE_NUMBER]; 458 uint32_t m_globalCmIndex[CM_GLOBAL_SURFACE_NUMBER]; 459 CM_KERNEL_INFO *m_kernelInfo; 460 uint32_t m_kernelIndexInProgram; 461 462 bool m_curbeEnabled; 463 bool m_nonstallingScoreboardEnabled; 464 465 uint64_t m_id; // high 32bit is kernel id (highest 16 bits used for kernel binary re-use 466 // in GSH), low 32bit is kernel data id 467 468 uint32_t m_dirty; 469 CmKernelData *m_lastKernelData; 470 uint32_t m_lastKernelDataSize; 471 472 uint32_t m_indexInTask; 473 bool m_threadSpaceAssociated; // Indicates if this kernel is associated the task threadspace 474 // (scoreboard) 475 476 bool m_perThreadArgExists; // Indicates if this kernel has thread arg. 477 bool m_perKernelArgExists; // Indicates if the user call SetKernelArg() to set per-kernel arg 478 479 CmThreadSpaceRT *m_threadSpace; // Pointer to the kernel threadspace 480 uint32_t m_adjustScoreboardY; // value to adjust Y coordinate read from r0.1 used for 481 // EnqueueWithHints 482 uint32_t m_lastAdjustScoreboardY; 483 484 bool m_blCreatingGPUCopyKernel; // Indicate if this is a predefined GPUCopy kernel 485 bool m_blhwDebugEnable; // Indicate if the hw debug enabled 486 487 uint16_t m_usKernelPayloadDataSize; // Size of kernel indirect data (in byte) 488 uint8_t *m_kernelPayloadData; // Pointer to the kernel indirect data memory 489 490 uint16_t m_usKernelPayloadSurfaceCount; //the surface count in kernel indirect data 491 492 // the surface index list included in kernel indirect data 493 SurfaceIndex* 494 m_pKernelPayloadSurfaceArray[CM_MAX_STATIC_SURFACE_STATES_PER_BT]; 495 496 CM_INDIRECT_SURFACE_INFO 497 m_IndirectSurfaceInfoArray[CM_MAX_STATIC_SURFACE_STATES_PER_BT]; // information used by driver 498 499 uint32_t m_samplerBtiCount; 500 CM_SAMPLER_BTI_ENTRY m_samplerBtiEntry[CM_MAX_SAMPLER_TABLE_SIZE]; 501 502 uint32_t m_refcount; // reference count for kernel 503 504 CM_HAL_MAX_VALUES *m_halMaxValues; 505 CM_HAL_MAX_VALUES_EX *m_halMaxValuesEx; 506 bool *m_surfaceArray; 507 508 uint32_t m_kernelIndex; // Kernel index in kernel array 509 510 CmThreadGroupSpace *m_threadGroupSpace; //should be exclusive with m_threadSpace 511 512 uint32_t m_vmeSurfaceCount; // to record how many VME surface are using in this kernel 513 uint32_t m_maxSurfaceIndexAllocated; // to record the largest surface index used in the pool, 514 // static or reserved surfaces are not included 515 // the var should be inited in CollectKernelSurface 516 517 uint32_t m_barrierMode; // to record barrier mode for this kernel 518 519 bool m_isClonedKernel; 520 uint32_t m_cloneKernelID; 521 bool m_hasClones; 522 CM_STATE_BUFFER_TYPE m_stateBufferBounded; 523 524 CmMovInstConstructor *m_movInstConstructor; 525 526 friend class CmThreadSpaceRT; 527 528 private: 529 CmKernelRT(const CmKernelRT &other); 530 CmKernelRT &operator=(const CmKernelRT &other); 531 }; 532 }; //namespace 533 534 #endif // #ifndef MEDIADRIVER_AGNOSTIC_COMMON_CM_CMKERNELRT_H_ 535