1 /* 2 * Copyright (c) 2017, Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included 12 * in all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 */ 22 //! 23 //! \file cm_queue_rt.h 24 //! \brief Contains CmQueueRT declarations. 25 //! 26 27 #ifndef MEDIADRIVER_AGNOSTIC_COMMON_CM_CMQUEUERT_H_ 28 #define MEDIADRIVER_AGNOSTIC_COMMON_CM_CMQUEUERT_H_ 29 30 #include "cm_queue.h" 31 32 #include <queue> 33 34 #include "cm_array.h" 35 #include "cm_csync.h" 36 #include "cm_hal.h" 37 #include "cm_log.h" 38 39 namespace CMRT_UMD 40 { 41 class CmDeviceRT; 42 class CmKernel; 43 class CmKernelRT; 44 class CmTaskInternal; 45 class CmEventRT; 46 class CmThreadSpaceRT; 47 class CmThreadGroupSpace; 48 class CmVebox; 49 class CmBuffer; 50 class CmSurface2D; 51 class CmSurface2DRT; 52 53 struct CM_GPUCOPY_KERNEL 54 { 55 CmKernel *kernel; 56 CM_GPUCOPY_KERNEL_ID kernelID; 57 bool locked; 58 }; 59 60 class ThreadSafeQueue 61 { 62 public: Push(CmTaskInternal * element)63 bool Push(CmTaskInternal *element) 64 { 65 mCriticalSection.Acquire(); 66 mQueue.push(element); 67 mCriticalSection.Release(); 68 return true; 69 } 70 Pop()71 CmTaskInternal *Pop() 72 { 73 CmTaskInternal *element = nullptr; 74 mCriticalSection.Acquire(); 75 if (mQueue.empty()) 76 { 77 CM_ASSERT(0); 78 } 79 else 80 { 81 element = mQueue.front(); 82 mQueue.pop(); 83 } 84 mCriticalSection.Release(); 85 return element; 86 } 87 Top()88 CmTaskInternal *Top() 89 { 90 CmTaskInternal *element = nullptr; 91 if (mQueue.empty()) 92 { 93 CM_ASSERT(0); 94 } 95 else 96 { 97 element = mQueue.front(); 98 } 99 return element; 100 } 101 IsEmpty()102 bool IsEmpty() { return mQueue.empty(); } 103 GetCount()104 int GetCount() { return mQueue.size(); } 105 106 private: 107 std::queue<CmTaskInternal*> mQueue; 108 CSync mCriticalSection; 109 }; 110 111 //! 112 //! \brief Class CmQueueRT definitions. 113 //! 114 class CmQueueRT: public CmQueue 115 { 116 public: 117 static int32_t Create(CmDeviceRT *device, 118 CmQueueRT *&queue, 119 CM_QUEUE_CREATE_OPTION queueCreateOption); 120 121 static int32_t Destroy(CmQueueRT *&queue); 122 123 CM_RT_API int32_t Enqueue(CmTask *task, 124 CmEvent *&event, 125 const CmThreadSpace *threadSpace = nullptr); 126 127 CM_RT_API int32_t DestroyEvent(CmEvent *&event); 128 129 CM_RT_API int32_t 130 EnqueueWithGroup(CmTask *task, 131 CmEvent *&event, 132 const CmThreadGroupSpace *threadGroupSpace = nullptr); 133 134 CM_RT_API int32_t EnqueueVebox(CmVebox *vebox, CmEvent *&event); 135 136 CM_RT_API int32_t EnqueueWithHints(CmTask *task, 137 CmEvent *&event, 138 uint32_t hints = 0); 139 140 CM_RT_API int32_t EnqueueCopyCPUToGPU(CmSurface2D *surface, 141 const unsigned char *sysMem, 142 CmEvent *&event); 143 144 CM_RT_API int32_t EnqueueCopyGPUToCPU(CmSurface2D *surface, 145 unsigned char *sysMem, 146 CmEvent *&event); 147 148 CM_RT_API int32_t EnqueueInitSurface2D(CmSurface2D *surf2D, 149 const uint32_t initValue, 150 CmEvent *&event); 151 152 CM_RT_API int32_t EnqueueCopyGPUToGPU(CmSurface2D *outputSurface, 153 CmSurface2D *inputSurface, 154 uint32_t option, 155 CmEvent *&event); 156 157 CM_RT_API int32_t EnqueueCopyCPUToCPU(unsigned char *dstSysMem, 158 unsigned char *srcSysMem, 159 uint32_t size, 160 uint32_t option, 161 CmEvent *&event); 162 163 CM_RT_API int32_t EnqueueCopyCPUToGPUFullStride(CmSurface2D *surface, 164 const unsigned char *sysMem, 165 const uint32_t widthStride, 166 const uint32_t heightStride, 167 const uint32_t option, 168 CmEvent *&event); 169 170 CM_RT_API int32_t EnqueueCopyGPUToCPUFullStride(CmSurface2D *surface, 171 unsigned char *sysMem, 172 const uint32_t widthStride, 173 const uint32_t heightStride, 174 const uint32_t option, 175 CmEvent *&event); 176 177 CM_RT_API int32_t EnqueueFast(CmTask *task, 178 CmEvent *&event, 179 const CmThreadSpace *threadSpace = nullptr); 180 181 CM_RT_API int32_t DestroyEventFast(CmEvent *&event); 182 183 CM_RT_API int32_t EnqueueWithGroupFast(CmTask *task, 184 CmEvent *&event, 185 const CmThreadGroupSpace *threadGroupSpace = nullptr); 186 187 int32_t EnqueueCopyInternal_1Plane(CmSurface2DRT *surface, 188 unsigned char *sysMem, 189 CM_SURFACE_FORMAT format, 190 const uint32_t widthInPixel, 191 const uint32_t widthStride, 192 const uint32_t heightInRow, 193 const uint32_t heightStride, 194 const uint32_t sizePerPixel, 195 CM_GPUCOPY_DIRECTION direction, 196 const uint32_t option, 197 CmEvent *&event); 198 199 int32_t EnqueueCopyInternal_2Planes(CmSurface2DRT *surface, 200 unsigned char *sysMem, 201 CM_SURFACE_FORMAT format, 202 const uint32_t widthInPixel, 203 const uint32_t widthStride, 204 const uint32_t heightInRow, 205 const uint32_t heightStride, 206 const uint32_t sizePerPixel, 207 CM_GPUCOPY_DIRECTION direction, 208 const uint32_t option, 209 CmEvent *&event); 210 211 virtual int32_t EnqueueCopyInternal(CmSurface2DRT *surface, 212 unsigned char *sysMem, 213 const uint32_t widthStride, 214 const uint32_t heightStride, 215 CM_GPUCOPY_DIRECTION direction, 216 const uint32_t option, 217 CmEvent *&event); 218 219 int32_t EnqueueUnalignedCopyInternal(CmSurface2DRT *surface, 220 unsigned char *sysMem, 221 const uint32_t widthStride, 222 const uint32_t heightStride, 223 CM_GPUCOPY_DIRECTION direction); 224 225 int32_t FlushTaskWithoutSync(bool flushBlocked = false); 226 227 int32_t GetTaskCount(uint32_t &numTasks); 228 229 virtual int32_t TouchFlushedTasks(); 230 231 int32_t GetTaskHasThreadArg(CmKernelRT *kernelArray[], 232 uint32_t numKernels, 233 bool &threadArgExists); 234 virtual int32_t CleanQueue(); 235 236 virtual CM_QUEUE_CREATE_OPTION &GetQueueOption(); 237 238 int32_t GetOSSyncEventHandle(void *& hOSSyncEvent); 239 GetFastTrackerIndex()240 uint32_t GetFastTrackerIndex() { return m_fastTrackerIndex; } 241 StreamIndex()242 uint32_t StreamIndex() const { return m_streamIndex; } 243 GpuContextHandle()244 GPU_CONTEXT_HANDLE GpuContextHandle() { return m_gpuContextHandle; }; 245 246 virtual int32_t EnqueueBufferCopy( CmBuffer* buffer, 247 size_t offset, 248 const unsigned char* sysMem, 249 uint64_t sysMemSize, 250 CM_GPUCOPY_DIRECTION dir, 251 CmEvent* wait_event, 252 CmEvent*& event, 253 uint32_t option); 254 255 protected: 256 CmQueueRT(CmDeviceRT *device, CM_QUEUE_CREATE_OPTION queueCreateOption); 257 258 ~CmQueueRT(); 259 260 int32_t Initialize(); 261 262 int32_t 263 Enqueue_RT(CmKernelRT *kernelArray[], 264 const uint32_t kernelCount, 265 const uint32_t totalThreadCount, 266 CmEventRT *&event, 267 const CmThreadSpaceRT *threadSpace = nullptr, 268 const uint64_t syncBitmap = 0, 269 PCM_POWER_OPTION powerOption = nullptr, 270 const uint64_t conditionalEndBitmap = 0, 271 PCM_HAL_CONDITIONAL_BB_END_INFO conditionalEndInfo = nullptr, 272 CM_TASK_CONFIG *taskConfig = nullptr); 273 274 int32_t Enqueue_RT(CmKernelRT *kernelArray[], 275 const uint32_t kernelCount, 276 const uint32_t totalThreadCount, 277 CmEventRT *&event, 278 const CmThreadGroupSpace *threadGroupSpace = nullptr, 279 const uint64_t syncBitmap = 0, 280 PCM_POWER_OPTION powerOption = nullptr, 281 const uint64_t conditionalEndBitmap = 0, 282 PCM_HAL_CONDITIONAL_BB_END_INFO conditionalEndInfo = nullptr, 283 CM_TASK_CONFIG *taskConfig = nullptr, 284 const CM_EXECUTION_CONFIG* krnExecCfg = nullptr); 285 286 int32_t Enqueue_RT(CmKernelRT *kernelArray[], 287 CmEventRT *&event, 288 uint32_t numTaskGenerated, 289 bool isLastTask, 290 uint32_t hints = 0, 291 PCM_POWER_OPTION powerOption = nullptr); 292 293 int32_t QueryFlushedTasks(); 294 295 //New sub functions for different task flush 296 int32_t FlushGeneralTask(CmTaskInternal *task); 297 298 int32_t FlushGroupTask(CmTaskInternal *task); 299 300 int32_t FlushVeboxTask(CmTaskInternal *task); 301 302 int32_t FlushEnqueueWithHintsTask(CmTaskInternal *task); 303 304 void PopTaskFromFlushedQueue(); 305 306 int32_t CreateEvent(CmTaskInternal *task, 307 bool isVisible, 308 int32_t &taskDriverId, 309 CmEventRT *&event); 310 311 int32_t AddGPUCopyKernel(CM_GPUCOPY_KERNEL* &kernelParam); 312 313 int32_t GetGPUCopyKrnID(uint32_t widthInByte, 314 uint32_t height, 315 CM_SURFACE_FORMAT format, 316 CM_GPUCOPY_DIRECTION copyDirection, 317 CM_GPUCOPY_KERNEL_ID &kernelID); 318 319 int32_t AllocateGPUCopyKernel(uint32_t widthInByte, 320 uint32_t height, 321 CM_SURFACE_FORMAT format, 322 CM_GPUCOPY_DIRECTION copyDirection, 323 CmKernel* &kernel); 324 325 int32_t CreateGPUCopyKernel(uint32_t widthInByte, 326 uint32_t height, 327 CM_SURFACE_FORMAT format, 328 CM_GPUCOPY_DIRECTION copyDirection, 329 CM_GPUCOPY_KERNEL* &gpuCopyKernelParam); 330 331 int32_t SearchGPUCopyKernel(uint32_t widthInByte, 332 uint32_t height, 333 CM_SURFACE_FORMAT format, 334 CM_GPUCOPY_DIRECTION copyDirection, 335 CM_GPUCOPY_KERNEL* &kernelParam); 336 337 int32_t RegisterSyncEvent(); 338 339 340 CmDeviceRT *m_device; 341 ThreadSafeQueue m_enqueuedTasks; 342 ThreadSafeQueue m_flushedTasks; 343 344 CmDynamicArray m_eventArray; 345 CSync m_criticalSectionEvent; // Protect m_eventArray 346 CSync m_criticalSectionHalExecute; // Protect execution in HALCm, i.e HalCm_Execute 347 CSync m_criticalSectionFlushedTask; // Protect QueryFlushedTask 348 CSync m_criticalSectionTaskInternal; 349 350 uint32_t m_eventCount; 351 uint64_t m_CPUperformanceFrequency; 352 353 CmDynamicArray m_copyKernelParamArray; 354 uint32_t m_copyKernelParamArrayCount; 355 356 CSync m_criticalSectionGPUCopyKrn; 357 358 CM_HAL_MAX_VALUES *m_halMaxValues; 359 CM_QUEUE_CREATE_OPTION m_queueOption; 360 361 bool m_usingVirtualEngine; 362 MOS_VIRTUALENGINE_HINT_PARAMS m_mosVeHintParams; 363 364 void *m_osSyncEvent; //KMD Notification 365 366 uint32_t m_trackerIndex; 367 uint32_t m_fastTrackerIndex; 368 369 private: 370 static const uint32_t INVALID_SYNC_BUFFER_HANDLE = 0xDEADBEEF; 371 372 //-------------------------------------------------------------------------------- 373 // Create a GPU context for this object. 374 //-------------------------------------------------------------------------------- 375 MOS_STATUS CreateGpuContext(CM_HAL_STATE *halState, 376 MOS_GPU_CONTEXT gpuContextName, 377 MOS_GPU_NODE gpuNode, 378 MOS_GPUCTX_CREATOPTIONS *createOptions); 379 380 //-------------------------------------------------------------------------------- 381 // Destroy compute GPU context 382 //-------------------------------------------------------------------------------- 383 MOS_STATUS DestroyComputeGpuContext(); 384 385 //-------------------------------------------------------------------------------- 386 // Calls CM HAL API to submit a group task to command buffer. 387 //-------------------------------------------------------------------------------- 388 MOS_STATUS ExecuteGroupTask(CM_HAL_STATE *halState, 389 CM_HAL_EXEC_TASK_GROUP_PARAM *taskParam, 390 MOS_GPU_CONTEXT gpuContextName); 391 392 //-------------------------------------------------------------------------------- 393 // Calls CM HAL API to submit a general task to command buffer. 394 //-------------------------------------------------------------------------------- 395 MOS_STATUS ExecuteGeneralTask(CM_HAL_STATE *halState, 396 CM_HAL_EXEC_TASK_PARAM *taskParam, 397 MOS_GPU_CONTEXT gpuContextName); 398 399 //-------------------------------------------------------------------------------- 400 // Creates a buffer to synchronize all tasks in this queue. 401 // It's useful only on certain operating systems. 402 //-------------------------------------------------------------------------------- 403 MOS_STATUS CreateSyncBuffer(CM_HAL_STATE *halState); 404 405 //-------------------------------------------------------------------------------- 406 // Selects sync buffer in this queue so CM HAL can add it to the command buffer. 407 // It's useful only on certain operating systems. 408 //-------------------------------------------------------------------------------- 409 MOS_STATUS SelectSyncBuffer(CM_HAL_STATE *halState); 410 411 //-------------------------------------------------------------------------------- 412 // Releases sync buffer in this queue if it's created. 413 //-------------------------------------------------------------------------------- 414 MOS_STATUS ReleaseSyncBuffer(CM_HAL_STATE *halState); 415 416 #if CM_LOG_ON 417 CM_HAL_STATE* GetHalState(); 418 #endif // #if CM_LOG_ON 419 420 uint32_t m_streamIndex; 421 422 GPU_CONTEXT_HANDLE m_gpuContextHandle; 423 424 // Handle of buffer resource for synchronizing tasks in this queue. 425 uint32_t m_syncBufferHandle; 426 427 428 CmQueueRT(const CmQueueRT& other); 429 CmQueueRT& operator=(const CmQueueRT& other); 430 }; 431 }; //namespace 432 433 #endif // #ifnfef MEDIADRIVER_AGNOSTIC_COMMON_CM_CMQUEUERT_H_ 434