1 /*
2 * Copyright (c) 2021-2023, Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22 //!
23 //! \file media_perf_profiler.cpp
24 //! \brief Defines data structures and interfaces for media performance profiler.
25 //! \details
26 //!
27
28 #include <stddef.h>
29 #include "media_perf_profiler.h"
30 #include "media_skuwa_specific.h"
31 #include "mhw_itf.h"
32 #include "mhw_mi.h"
33 #include "mhw_mi_cmdpar.h"
34 #include "mhw_mi_itf.h"
35 #include "mos_resource_defs.h"
36 #include "mos_util_debug.h"
37 #include "mos_utilities.h"
38 #include "mos_utilities_common.h"
39
40 #define UMD_PERF_LOG 8
41 #define NAME_LEN 60
42 #define LOCAL_STRING_SIZE 64
43 #define OFFSET_OF(TYPE, MEMBER) ((size_t) & ((TYPE *)0)->MEMBER )
44
45 typedef enum _UMD_PERF_MODE
46 {
47 UMD_PERF_MODE_TIMING_ONLY = 0,
48 UMD_PERF_MODE_WITH_MEMORY_INFO = 4
49 } UMD_PERF_MODE;
50
51 #pragma pack(push)
52 #pragma pack(8)
53 struct PerfEntry
54 {
55 uint32_t nodeIndex; //!< Perf node index
56 uint32_t processId; //!< Process Id
57 uint32_t instanceId; //!< Instance Id
58 uint32_t engineTag; //!< Engine tag
59 uint32_t perfTag; //!< Performance tag
60 uint32_t timeStampBase; //!< HW timestamp base
61 uint32_t beginRegisterValue[8]; //!< Begin register value
62 uint32_t endRegisterValue[8]; //!< End register value
63 uint32_t beginCpuTime[2]; //!< Begin CPU Time Stamp
64 uint32_t bitstreamSize; //!< frame level: bitstreamSize
65 uint32_t SSEY; //!< frame level: SSEY
66 uint32_t SSEU; //!< frame level: SSEU
67 uint32_t SSEV; //!< frame level: SSEV
68 union
69 {
70 uint32_t DWMeanSsimLayer1_YU;
71 struct
72 {
73 uint32_t MeanSsimLayer1_Y : 12, // [11:0]
74 DW3_Res_15_12 : 4, // [15:12]
75 MeanSsimLayer1_U : 12, // [27:16]
76 DW3_Res_31_18 : 4; // [31:28]
77 };
78 };
79 union
80 {
81 uint32_t DWMeanSsimLayer1_V;
82 struct
83 {
84 uint32_t MeanSsimLayer1_V : 12, // [11:0]
85 DW4_Res_15_12 : 4, // [15:12]
86 MeanSsimLayer1Part_Y : 12, // [27:16]
87 DW4_Res_31_18 : 4; // [31:28]
88 };
89 };
90 uint32_t reserved[8]; //!< Reserved[8]
91 uint64_t beginTimeClockValue; //!< Begin timestamp
92 uint64_t endTimeClockValue; //!< End timestamp
93 };
94 #pragma pack(pop)
95
96 struct NodeHeader
97 {
98 uint32_t osPlatform : 3;
99 uint32_t genPlatform : 3;
100 uint32_t eventType : 4;
101 uint32_t perfMode : 3;
102 uint32_t genAndroid : 4;
103 uint32_t genPlatform_ext : 2;
104 uint32_t reserved : 13;
105 };
106
107 #define BASE_OF_NODE(perfDataIndex) (sizeof(NodeHeader) + (sizeof(PerfEntry) * perfDataIndex))
108
109 #define CHK_STATUS_RETURN(_stmt) \
110 { \
111 MOS_STATUS stmtStatus = (MOS_STATUS)(_stmt); \
112 if (stmtStatus != MOS_STATUS_SUCCESS) \
113 { \
114 return stmtStatus; \
115 } \
116 }
117
118 #define CHK_NULL_RETURN(_ptr) \
119 { \
120 if ((_ptr) == nullptr) \
121 { \
122 return MOS_STATUS_NULL_POINTER; \
123 } \
124 }
125
126 #define CHK_NULL_NO_STATUS_RETURN(_ptr) \
127 { \
128 if ((_ptr) == nullptr) \
129 { \
130 return; \
131 } \
132 }
133
134 #define CHK_STATUS_UNLOCK_MUTEX_RETURN(_stmt) \
135 { \
136 MOS_STATUS stmtStatus = (MOS_STATUS)(_stmt); \
137 if (stmtStatus != MOS_STATUS_SUCCESS) \
138 { \
139 MosUtilities::MosUnlockMutex(m_mutex); \
140 return stmtStatus; \
141 } \
142 }
143
144 #define CHK_NULL_UNLOCK_MUTEX_RETURN(_ptr) \
145 { \
146 if ((_ptr) == nullptr) \
147 { \
148 MosUtilities::MosUnlockMutex(m_mutex); \
149 return MOS_STATUS_NULL_POINTER; \
150 } \
151 }
152
MediaPerfProfiler()153 MediaPerfProfiler::MediaPerfProfiler()
154 {
155 m_perfStoreBufferMap.clear();
156 m_perfDataIndexMap.clear();
157 m_refMap.clear();
158 m_initializedMap.clear();
159
160 m_profilerEnabled = 0;
161
162 m_mutex = MosUtilities::MosCreateMutex();
163
164 if (m_mutex)
165 {
166 // m_mutex is destroyed after MemNinja report, this will cause fake memory leak,
167 // the following 2 lines is to circumvent Memninja counter validation and log parser
168 MosUtilities::MosAtomicDecrement(MosUtilities::m_mosMemAllocCounter);
169 MOS_MEMNINJA_FREE_MESSAGE(m_mutex, __FUNCTION__, __FILE__, __LINE__);
170 PRINT_DESTROY_MEMORY(MT_MOS_DESTROY_MEMORY, MT_NORMAL, MT_MEMORY_PTR, (int64_t)(m_mutex), __FUNCTION__, __FILE__, __LINE__);
171 }
172 else
173 {
174 MOS_OS_ASSERTMESSAGE("Create Mutex failed!");
175 }
176 }
177
~MediaPerfProfiler()178 MediaPerfProfiler::~MediaPerfProfiler()
179 {
180 if (m_mutex != nullptr)
181 {
182 MosUtilities::MosDestroyMutex(m_mutex);
183 m_mutex = nullptr;
184 }
185 }
186
Instance()187 MediaPerfProfiler* MediaPerfProfiler::Instance()
188 {
189 static MediaPerfProfiler instance;
190 if (!instance.m_mutex && instance.m_profilerEnabled)
191 {
192 MOS_OS_ASSERTMESSAGE("Create MediaPerfProfiler failed!");
193 return nullptr;
194 }
195 else
196 {
197 return &instance;
198 }
199 }
200
Destroy(MediaPerfProfiler * profiler,void * context,MOS_INTERFACE * osInterface)201 void MediaPerfProfiler::Destroy(MediaPerfProfiler* profiler, void* context, MOS_INTERFACE *osInterface)
202 {
203 PERF_UTILITY_PRINT;
204
205 CHK_NULL_NO_STATUS_RETURN(profiler);
206 CHK_NULL_NO_STATUS_RETURN(osInterface);
207
208 if (profiler->m_profilerEnabled == 0 || profiler->m_mutex == nullptr)
209 {
210 return;
211 }
212
213 PMOS_CONTEXT pOsContext = osInterface->pOsContext;
214 CHK_NULL_NO_STATUS_RETURN(pOsContext);
215 MosUtilities::MosLockMutex(profiler->m_mutex);
216 if (profiler->m_refMap[pOsContext] > 0)
217 {
218 profiler->m_refMap[pOsContext]--;
219 }
220 osInterface->pfnWaitAllCmdCompletion(osInterface);
221
222 profiler->m_contextIndexMap.erase(context);
223
224 if (profiler->m_refMap[pOsContext] == 0)
225 {
226 if (profiler->m_initializedMap[pOsContext] == true)
227 {
228 if(profiler->m_enableProfilerDump)
229 {
230 profiler->SavePerfData(osInterface);
231 }
232
233 osInterface->pfnFreeResource(
234 osInterface,
235 profiler->m_perfStoreBufferMap[pOsContext]);
236
237 MOS_FreeMemAndSetNull(profiler->m_perfStoreBufferMap[pOsContext]);
238
239 profiler->m_perfStoreBufferMap.erase(pOsContext);
240 profiler->m_initializedMap.erase(pOsContext);
241 profiler->m_refMap.erase(pOsContext);
242 profiler->m_perfDataIndexMap.erase(pOsContext);
243 }
244
245 MosUtilities::MosUnlockMutex(profiler->m_mutex);
246 }
247 else
248 {
249 MosUtilities::MosUnlockMutex(profiler->m_mutex);
250 }
251 }
252
Initialize(void * context,MOS_INTERFACE * osInterface)253 MOS_STATUS MediaPerfProfiler::Initialize(void* context, MOS_INTERFACE *osInterface)
254 {
255 MOS_STATUS status = MOS_STATUS_SUCCESS;
256 CHK_NULL_RETURN(osInterface);
257 CHK_NULL_RETURN(m_mutex);
258
259 PMOS_CONTEXT pOsContext = osInterface->pOsContext;
260 CHK_NULL_RETURN(pOsContext);
261 MediaUserSettingSharedPtr userSettingPtr = osInterface->pfnGetUserSettingInstance(osInterface);
262 // Check whether profiler is enabled
263 ReadUserSetting(
264 userSettingPtr,
265 m_profilerEnabled,
266 __MEDIA_USER_FEATURE_VALUE_PERF_PROFILER_ENABLE,
267 MediaUserSetting::Group::Device);
268
269 if (m_profilerEnabled == 0 || m_mutex == nullptr)
270 {
271 return MOS_STATUS_SUCCESS;
272 }
273
274 MosUtilities::MosLockMutex(m_mutex);
275
276 m_contextIndexMap[context] = 0;
277
278 if (m_initializedMap[pOsContext] == true)
279 {
280 MosUtilities::MosUnlockMutex(m_mutex);
281 return status;
282 }
283
284 m_refMap[pOsContext]++;
285
286 m_enableProfilerDump = MosUtilities::MosIsProfilerDumpEnabled();
287
288 // Read output file name
289 status = ReadUserSetting(
290 userSettingPtr,
291 m_outputFileName,
292 __MEDIA_USER_FEATURE_VALUE_PERF_PROFILER_OUTPUT_FILE_NAME,
293 MediaUserSetting::Group::Device);
294 if (status != MOS_STATUS_SUCCESS)
295 {
296 MosUtilities::MosUnlockMutex(m_mutex);
297 return status;
298 }
299
300 // Read buffer size
301 ReadUserSetting(
302 userSettingPtr,
303 m_bufferSize,
304 __MEDIA_USER_FEATURE_VALUE_PERF_PROFILER_BUFFER_SIZE_KEY,
305 MediaUserSetting::Group::Device);
306
307 m_timerBase = osInterface->pfnGetTsFrequency(osInterface);
308
309 // Read multi processes support
310 ReadUserSetting(
311 userSettingPtr,
312 m_multiprocess,
313 __MEDIA_USER_FEATURE_VALUE_PERF_PROFILER_ENABLE_MUL_PROC,
314 MediaUserSetting::Group::Device);
315
316 // Read multi header support
317 ReadUserSetting(
318 userSettingPtr,
319 m_mergeheader,
320 __MEDIA_USER_FEATURE_VALUE_PERF_PROFILER_ENABLE_MER_HEADER,
321 MediaUserSetting::Group::Device);
322
323 // Read memory information register address
324 int8_t regIndex = 0;
325 for (regIndex = 0; regIndex < 8; regIndex++)
326 {
327 ReadUserSetting(
328 userSettingPtr,
329 m_registers[regIndex],
330 m_registersKey[regIndex],
331 MediaUserSetting::Group::Device);
332 }
333
334 // Read multi processes single binary flag
335 ReadUserSetting(
336 userSettingPtr,
337 m_multiprocesssinglebin,
338 __MEDIA_USER_FEATURE_VALUE_PERF_PROFILER_MUL_PROC_SINGLE_BIN,
339 MediaUserSetting::Group::Device);
340
341 PMOS_RESOURCE pPerfStoreBuffer = (PMOS_RESOURCE)MOS_AllocAndZeroMemory(sizeof(MOS_RESOURCE));
342 m_perfStoreBufferMap[pOsContext] = pPerfStoreBuffer;
343 // Allocate the buffer which store the performance data
344 MOS_ALLOC_GFXRES_PARAMS allocParams;
345 MOS_ZeroMemory(&allocParams, sizeof(MOS_ALLOC_GFXRES_PARAMS));
346 allocParams.Type = MOS_GFXRES_BUFFER;
347 allocParams.TileType = MOS_TILE_LINEAR;
348 allocParams.Format = Format_Buffer;
349 allocParams.dwBytes = m_bufferSize;
350 allocParams.pBufName = "PerfStoreBuffer";
351
352 status = osInterface->pfnAllocateResource(
353 osInterface,
354 &allocParams,
355 pPerfStoreBuffer);
356
357 CHK_STATUS_UNLOCK_MUTEX_RETURN(status);
358
359 CHK_STATUS_UNLOCK_MUTEX_RETURN(
360 osInterface->pfnSkipResourceSync(pPerfStoreBuffer));
361
362 PLATFORM platform = { IGFX_UNKNOWN };
363 osInterface->pfnGetPlatform(osInterface, &platform);
364
365 MOS_LOCK_PARAMS lockFlags;
366 MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
367 lockFlags.WriteOnly = 1;
368
369 NodeHeader* header = (NodeHeader*)osInterface->pfnLockResource(
370 osInterface,
371 pPerfStoreBuffer,
372 &lockFlags);
373
374 CHK_NULL_UNLOCK_MUTEX_RETURN(header);
375
376 // Append the header info
377 MOS_ZeroMemory(header, m_bufferSize);
378 header->eventType = UMD_PERF_LOG;
379
380 uint32_t mappedPlatFormId = PlatFormIdMap(platform);
381 header->genPlatform = (mappedPlatFormId - 8) & 0x7;
382 header->genPlatform_ext = ((mappedPlatFormId - 8) >> 3) & 0x3;
383
384 if (IsPerfModeWidthMemInfo(m_registers))
385 {
386 header->perfMode = UMD_PERF_MODE_WITH_MEMORY_INFO;
387 }
388 else
389 {
390 header->perfMode = UMD_PERF_MODE_TIMING_ONLY;
391 }
392
393 osInterface->pfnUnlockResource(
394 osInterface,
395 pPerfStoreBuffer);
396
397 m_initializedMap[pOsContext] = true;
398
399 MosUtilities::MosUnlockMutex(m_mutex);
400
401 return MOS_STATUS_SUCCESS;
402 }
403
StoreData(std::shared_ptr<mhw::mi::Itf> miItf,PMOS_COMMAND_BUFFER cmdBuffer,MOS_CONTEXT_HANDLE pOsContext,uint32_t offset,uint32_t value)404 MOS_STATUS MediaPerfProfiler::StoreData(
405 std::shared_ptr<mhw::mi::Itf> miItf,
406 PMOS_COMMAND_BUFFER cmdBuffer,
407 MOS_CONTEXT_HANDLE pOsContext,
408 uint32_t offset,
409 uint32_t value)
410 {
411 CHK_NULL_RETURN(miItf);
412
413 auto& storeDataParams = miItf->MHW_GETPAR_F(MI_STORE_DATA_IMM)();
414 storeDataParams = {};
415 storeDataParams.pOsResource = m_perfStoreBufferMap[(PMOS_CONTEXT)pOsContext];
416 storeDataParams.dwResourceOffset = offset;
417 storeDataParams.dwValue = value;
418 CHK_STATUS_RETURN(miItf->MHW_ADDCMD_F(MI_STORE_DATA_IMM)(cmdBuffer));
419
420 return MOS_STATUS_SUCCESS;
421 }
422
StoreRegister(MOS_INTERFACE * osInterface,std::shared_ptr<mhw::mi::Itf> miItf,PMOS_COMMAND_BUFFER cmdBuffer,uint32_t offset,uint32_t reg)423 MOS_STATUS MediaPerfProfiler::StoreRegister(
424 MOS_INTERFACE *osInterface,
425 std::shared_ptr<mhw::mi::Itf> miItf,
426 PMOS_COMMAND_BUFFER cmdBuffer,
427 uint32_t offset,
428 uint32_t reg)
429 {
430 CHK_NULL_RETURN(osInterface);
431 CHK_NULL_RETURN(miItf);
432
433 auto& storeRegMemParams = miItf->MHW_GETPAR_F(MI_STORE_REGISTER_MEM)();
434 storeRegMemParams = {};
435 storeRegMemParams.presStoreBuffer = m_perfStoreBufferMap[osInterface->pOsContext];
436 storeRegMemParams.dwOffset = offset;
437 storeRegMemParams.dwRegister = reg;
438
439 MEDIA_FEATURE_TABLE* skuTable = osInterface->pfnGetSkuTable(osInterface);
440 if(skuTable && MEDIA_IS_SKU(skuTable, FtrMemoryRemapSupport))
441 {
442 storeRegMemParams.dwOption = CCS_HW_FRONT_END_MMIO_REMAP;
443 }
444
445 CHK_STATUS_RETURN(miItf->MHW_ADDCMD_F(MI_STORE_REGISTER_MEM)(cmdBuffer));
446
447 return MOS_STATUS_SUCCESS;
448 }
449
StoreTSByPipeCtrl(std::shared_ptr<mhw::mi::Itf> miItf,PMOS_COMMAND_BUFFER cmdBuffer,MOS_CONTEXT_HANDLE pOsContext,uint32_t offset)450 MOS_STATUS MediaPerfProfiler::StoreTSByPipeCtrl(
451 std::shared_ptr<mhw::mi::Itf> miItf,
452 PMOS_COMMAND_BUFFER cmdBuffer,
453 MOS_CONTEXT_HANDLE pOsContext,
454 uint32_t offset)
455 {
456 CHK_NULL_RETURN(miItf);
457
458 auto& PipeControlParams = miItf->MHW_GETPAR_F(PIPE_CONTROL)();
459 PipeControlParams = {};
460 PipeControlParams.dwResourceOffset = offset;
461 PipeControlParams.dwPostSyncOp = MHW_FLUSH_WRITE_TIMESTAMP_REG;
462 PipeControlParams.dwFlushMode = MHW_FLUSH_READ_CACHE;
463 PipeControlParams.presDest = m_perfStoreBufferMap[(PMOS_CONTEXT)pOsContext];
464
465 CHK_STATUS_RETURN(miItf->MHW_ADDCMD_F(PIPE_CONTROL)(cmdBuffer));
466
467 return MOS_STATUS_SUCCESS;
468 }
469
StoreTSByMiFlush(std::shared_ptr<mhw::mi::Itf> miItf,PMOS_COMMAND_BUFFER cmdBuffer,MOS_CONTEXT_HANDLE pOsContext,uint32_t offset)470 MOS_STATUS MediaPerfProfiler::StoreTSByMiFlush(
471 std::shared_ptr<mhw::mi::Itf> miItf,
472 PMOS_COMMAND_BUFFER cmdBuffer,
473 MOS_CONTEXT_HANDLE pOsContext,
474 uint32_t offset)
475 {
476 CHK_NULL_RETURN(miItf);
477
478 auto& FlushDwParams = miItf->MHW_GETPAR_F(MI_FLUSH_DW)();
479 FlushDwParams = {};
480 FlushDwParams.postSyncOperation = MHW_FLUSH_WRITE_TIMESTAMP_REG;
481 FlushDwParams.dwResourceOffset = offset;
482 FlushDwParams.pOsResource = m_perfStoreBufferMap[(PMOS_CONTEXT)pOsContext];
483
484 CHK_STATUS_RETURN(miItf->MHW_ADDCMD_F(MI_FLUSH_DW)(cmdBuffer));
485
486 return MOS_STATUS_SUCCESS;
487 }
488
AddPerfCollectStartCmd(void * context,MOS_INTERFACE * osInterface,std::shared_ptr<mhw::mi::Itf> miItf,MOS_COMMAND_BUFFER * cmdBuffer)489 MOS_STATUS MediaPerfProfiler::AddPerfCollectStartCmd(
490 void *context,
491 MOS_INTERFACE *osInterface,
492 std::shared_ptr<mhw::mi::Itf> miItf,
493 MOS_COMMAND_BUFFER *cmdBuffer)
494 {
495 MOS_STATUS status = MOS_STATUS_SUCCESS;
496
497 CHK_NULL_RETURN(osInterface);
498 CHK_NULL_RETURN(miItf);
499 CHK_NULL_RETURN(cmdBuffer);
500 CHK_NULL_RETURN(m_mutex);
501
502 PMOS_CONTEXT pOsContext = osInterface->pOsContext;
503 CHK_NULL_RETURN(pOsContext);
504
505 if (m_profilerEnabled == 0 || m_initializedMap[pOsContext] == false)
506 {
507 return status;
508 }
509
510 uint32_t perfDataIndex = 0;
511
512 MosUtilities::MosLockMutex(m_mutex);
513
514 perfDataIndex = m_perfDataIndexMap[pOsContext];
515 m_perfDataIndexMap[pOsContext]++;
516 m_contextIndexMap[context] = perfDataIndex;
517
518 MosUtilities::MosUnlockMutex(m_mutex);
519
520 bool rcsEngineUsed = false;
521 MOS_GPU_CONTEXT gpuContext;
522
523 gpuContext = osInterface->pfnGetGpuContext(osInterface);
524 rcsEngineUsed = MOS_RCS_ENGINE_USED(gpuContext);
525
526 if (m_multiprocess)
527 {
528 CHK_STATUS_RETURN(StoreData(
529 miItf,
530 cmdBuffer,
531 pOsContext,
532 BASE_OF_NODE(perfDataIndex) + OFFSET_OF(PerfEntry, processId),
533 MosUtilities::MosGetPid()));
534 }
535
536 CHK_STATUS_RETURN(StoreData(
537 miItf,
538 cmdBuffer,
539 pOsContext,
540 BASE_OF_NODE(perfDataIndex) + OFFSET_OF(PerfEntry, perfTag),
541 osInterface->pfnGetPerfTag(osInterface)));
542
543 CHK_STATUS_RETURN(StoreData(
544 miItf,
545 cmdBuffer,
546 pOsContext,
547 BASE_OF_NODE(perfDataIndex) + OFFSET_OF(PerfEntry, engineTag),
548 GpuContextToGpuNode(gpuContext)));
549
550 if (m_timerBase != 0)
551 {
552 CHK_STATUS_RETURN(StoreData(
553 miItf,
554 cmdBuffer,
555 pOsContext,
556 BASE_OF_NODE(perfDataIndex) + OFFSET_OF(PerfEntry, timeStampBase),
557 m_timerBase));
558 }
559
560 int8_t regIndex = 0;
561 for (regIndex = 0; regIndex < 8; regIndex++)
562 {
563 if (m_registers[regIndex] != 0)
564 {
565 CHK_STATUS_RETURN(StoreRegister(
566 osInterface,
567 miItf,
568 cmdBuffer,
569 BASE_OF_NODE(perfDataIndex) + OFFSET_OF(PerfEntry, beginRegisterValue[regIndex]),
570 m_registers[regIndex]));
571 }
572 }
573
574 uint64_t beginCPUTimestamp = MosUtilities::MosGetCurTime();
575 uint32_t timeStamp[2];
576 MOS_SecureMemcpy(timeStamp, 2*sizeof(uint32_t), &beginCPUTimestamp, 2*sizeof(uint32_t));
577
578 for (int i = 0; i < 2; i++)
579 {
580 CHK_STATUS_RETURN(StoreData(
581 miItf,
582 cmdBuffer,
583 pOsContext,
584 BASE_OF_NODE(perfDataIndex) + OFFSET_OF(PerfEntry, beginCpuTime[i]),
585 timeStamp[i]));
586 }
587
588 // The address of timestamp must be 8 bytes aligned.
589 uint32_t offset = BASE_OF_NODE(perfDataIndex) + OFFSET_OF(PerfEntry, beginTimeClockValue);
590 offset = MOS_ALIGN_CEIL(offset, 8);
591
592 if (rcsEngineUsed)
593 {
594 CHK_STATUS_RETURN(StoreTSByPipeCtrl(
595 miItf,
596 cmdBuffer,
597 pOsContext,
598 offset));
599 }
600 else
601 {
602 CHK_STATUS_RETURN(StoreTSByMiFlush(
603 miItf,
604 cmdBuffer,
605 pOsContext,
606 offset));
607 }
608
609 return status;
610 }
611
AddPerfCollectEndCmd(void * context,MOS_INTERFACE * osInterface,std::shared_ptr<mhw::mi::Itf> miItf,MOS_COMMAND_BUFFER * cmdBuffer)612 MOS_STATUS MediaPerfProfiler::AddPerfCollectEndCmd(
613 void *context,
614 MOS_INTERFACE *osInterface,
615 std::shared_ptr<mhw::mi::Itf> miItf,
616 MOS_COMMAND_BUFFER *cmdBuffer)
617 {
618 MOS_STATUS status = MOS_STATUS_SUCCESS;
619
620 CHK_NULL_RETURN(osInterface);
621 CHK_NULL_RETURN(miItf);
622 CHK_NULL_RETURN(cmdBuffer);
623
624 PMOS_CONTEXT pOsContext = osInterface->pOsContext;
625 CHK_NULL_RETURN(pOsContext);
626
627 if (m_profilerEnabled == 0 || m_initializedMap[pOsContext] == false)
628 {
629 return status;
630 }
631
632 MOS_GPU_CONTEXT gpuContext;
633 bool rcsEngineUsed = false;
634 uint32_t perfDataIndex = 0;
635
636 gpuContext = osInterface->pfnGetGpuContext(osInterface);
637 rcsEngineUsed = MOS_RCS_ENGINE_USED(gpuContext);
638
639 perfDataIndex = m_contextIndexMap[context];
640
641 int8_t regIndex = 0;
642 for (regIndex = 0; regIndex < 8; regIndex++)
643 {
644 if (m_registers[regIndex] != 0)
645 {
646 CHK_STATUS_RETURN(StoreRegister(
647 osInterface,
648 miItf,
649 cmdBuffer,
650 BASE_OF_NODE(perfDataIndex) + OFFSET_OF(PerfEntry, endRegisterValue[regIndex]),
651 m_registers[regIndex]));
652 }
653 }
654
655 // The address of timestamp must be 8 bytes aligned.
656 uint32_t offset = BASE_OF_NODE(perfDataIndex) + OFFSET_OF(PerfEntry, endTimeClockValue);
657 offset = MOS_ALIGN_CEIL(offset, 8);
658
659 if (rcsEngineUsed)
660 {
661 CHK_STATUS_RETURN(StoreTSByPipeCtrl(
662 miItf,
663 cmdBuffer,
664 pOsContext,
665 offset));
666 }
667 else
668 {
669 CHK_STATUS_RETURN(StoreTSByMiFlush(
670 miItf,
671 cmdBuffer,
672 pOsContext,
673 offset));
674 }
675
676 return status;
677 }
678
AddStoreBitstreamSizeCmd(void * context,MOS_INTERFACE * osInterface,std::shared_ptr<mhw::mi::Itf> & miItf,MOS_COMMAND_BUFFER * cmdBuffer,uint32_t reg)679 MOS_STATUS MediaPerfProfiler::AddStoreBitstreamSizeCmd(
680 void *context,
681 MOS_INTERFACE *osInterface,
682 std::shared_ptr<mhw::mi::Itf>& miItf,
683 MOS_COMMAND_BUFFER *cmdBuffer,
684 uint32_t reg)
685 {
686 MOS_STATUS status = MOS_STATUS_SUCCESS;
687
688 if (m_profilerEnabled == 0)
689 {
690 return status;
691 }
692
693 CHK_NULL_RETURN(context);
694 CHK_NULL_RETURN(osInterface);
695 CHK_NULL_RETURN(miItf);
696 CHK_NULL_RETURN(cmdBuffer);
697
698 PMOS_CONTEXT pOsContext = osInterface->pOsContext;
699 CHK_NULL_RETURN(pOsContext);
700
701 uint32_t perfDataIndex = m_contextIndexMap[context];
702
703 CHK_STATUS_RETURN(StoreRegister(
704 osInterface,
705 miItf,
706 cmdBuffer,
707 BASE_OF_NODE(perfDataIndex) + OFFSET_OF(PerfEntry, bitstreamSize),
708 reg));
709
710 return status;
711 }
712
CopyMemData(std::shared_ptr<mhw::mi::Itf> & miItf,PMOS_COMMAND_BUFFER cmdBuffer,MOS_CONTEXT_HANDLE pOsContext,PMOS_RESOURCE presSrc,uint32_t dwSrcOffset,uint32_t dwDstOffset)713 MOS_STATUS MediaPerfProfiler::CopyMemData(
714 std::shared_ptr<mhw::mi::Itf>& miItf,
715 PMOS_COMMAND_BUFFER cmdBuffer,
716 MOS_CONTEXT_HANDLE pOsContext,
717 PMOS_RESOURCE presSrc,
718 uint32_t dwSrcOffset,
719 uint32_t dwDstOffset)
720 {
721 CHK_NULL_RETURN(miItf);
722
723 auto &miCpyMemMemParams = miItf->MHW_GETPAR_F(MI_COPY_MEM_MEM)();
724 miCpyMemMemParams = {};
725
726 miCpyMemMemParams.presSrc = presSrc;
727 miCpyMemMemParams.dwSrcOffset = dwSrcOffset;
728 miCpyMemMemParams.presDst = m_perfStoreBufferMap[(PMOS_CONTEXT)pOsContext];
729 miCpyMemMemParams.dwDstOffset = dwDstOffset;
730 CHK_STATUS_RETURN(miItf->MHW_ADDCMD_F(MI_COPY_MEM_MEM)(cmdBuffer));
731
732 return MOS_STATUS_SUCCESS;
733 }
734
AddCopyQualityMetricCmd(void * context,MOS_INTERFACE * osInterface,std::shared_ptr<mhw::mi::Itf> & miItf,MOS_COMMAND_BUFFER * cmdBuffer,UMD_QUALITY_METRIC_ITEM item,PMOS_RESOURCE presSrc,uint32_t dwSrcOffset)735 MOS_STATUS MediaPerfProfiler::AddCopyQualityMetricCmd(
736 void *context,
737 MOS_INTERFACE *osInterface,
738 std::shared_ptr<mhw::mi::Itf>& miItf,
739 MOS_COMMAND_BUFFER *cmdBuffer,
740 UMD_QUALITY_METRIC_ITEM item,
741 PMOS_RESOURCE presSrc,
742 uint32_t dwSrcOffset)
743 {
744 MOS_STATUS status = MOS_STATUS_SUCCESS;
745 uint32_t offset = 0;
746
747 if (m_profilerEnabled == 0)
748 {
749 return status;
750 }
751
752 CHK_NULL_RETURN(context);
753 CHK_NULL_RETURN(osInterface);
754 CHK_NULL_RETURN(miItf);
755 CHK_NULL_RETURN(cmdBuffer);
756 CHK_NULL_RETURN(presSrc);
757
758 PMOS_CONTEXT pOsContext = osInterface->pOsContext;
759 CHK_NULL_RETURN(pOsContext);
760
761 uint32_t perfDataIndex = m_contextIndexMap[context];
762
763 switch (item)
764 {
765 case UMD_QUALITY_ITEM_SSEY:
766 offset = BASE_OF_NODE(perfDataIndex) + OFFSET_OF(PerfEntry, SSEY);
767 break;
768 case UMD_QUALITY_ITEM_SSEU:
769 offset = BASE_OF_NODE(perfDataIndex) + OFFSET_OF(PerfEntry, SSEU);
770 break;
771 case UMD_QUALITY_ITEM_SSEV:
772 offset = BASE_OF_NODE(perfDataIndex) + OFFSET_OF(PerfEntry, SSEV);
773 break;
774 case UMD_QUALITY_ITEM_MEAN_SSIM_YU:
775 offset = BASE_OF_NODE(perfDataIndex) + OFFSET_OF(PerfEntry, DWMeanSsimLayer1_YU);
776 break;
777 case UMD_QUALITY_ITEM_MEAN_SSIM_V:
778 offset = BASE_OF_NODE(perfDataIndex) + OFFSET_OF(PerfEntry, DWMeanSsimLayer1_V);
779 break;
780 default:
781 status = MOS_STATUS_INVALID_PARAMETER;
782 break;
783 }
784
785 if (status == MOS_STATUS_SUCCESS)
786 {
787 CHK_STATUS_RETURN(CopyMemData(miItf, cmdBuffer, pOsContext, presSrc, dwSrcOffset, offset));
788 }
789
790 return status;
791 }
792
SavePerfData(MOS_INTERFACE * osInterface)793 MOS_STATUS MediaPerfProfiler::SavePerfData(MOS_INTERFACE *osInterface)
794 {
795 MOS_STATUS status = MOS_STATUS_SUCCESS;
796
797 CHK_NULL_RETURN(osInterface);
798
799 PMOS_CONTEXT pOsContext = osInterface->pOsContext;
800 CHK_NULL_RETURN(pOsContext);
801
802 if (m_multiprocesssinglebin)
803 {
804 uint32_t cnt = 0;
805 MOS_LOCK_PARAMS LockFlagsNoOverWrite = {};
806
807 MOS_ZeroMemory(&LockFlagsNoOverWrite, sizeof(MOS_LOCK_PARAMS));
808 LockFlagsNoOverWrite.WriteOnly = 1;
809 LockFlagsNoOverWrite.NoOverWrite = 1;
810
811 if (m_perfDataCombined == nullptr)
812 {
813 m_perfDataCombinedSize = 96;
814
815 for (auto iter = m_perfDataIndexMap.begin(); iter != m_perfDataIndexMap.end(); ++iter)
816 {
817 if (iter->second > 0)
818 {
819 m_perfDataCombinedSize += BASE_OF_NODE(m_perfDataIndexMap[iter->first]) + 4;
820 cnt += 1;
821 }
822 }
823
824 if (cnt == 0)
825 {
826 return status;
827 }
828
829 m_perfDataCombined = (uint32_t *)MOS_AllocAndZeroMemory(m_perfDataCombinedSize);
830 CHK_NULL_RETURN(m_perfDataCombined);
831
832 m_perfDataCombined[0] = 0x8086;
833 m_perfDataCombined[2] = m_perfDataCombinedSize - (cnt * 4);
834 m_perfDataCombined[3] = cnt;
835
836 m_perfDataCombinedOffset = 96 + (cnt * 4);
837 }
838
839 if (m_perfDataIndexMap[pOsContext] > 0)
840 {
841 uint8_t* pData = (uint8_t*)osInterface->pfnLockResource(
842 osInterface,
843 m_perfStoreBufferMap[pOsContext],
844 &LockFlagsNoOverWrite);
845
846 CHK_NULL_RETURN(pData);
847 MOS_SecureMemcpy(((uint8_t *)m_perfDataCombined) + m_perfDataCombinedOffset, BASE_OF_NODE(m_perfDataIndexMap[pOsContext]), pData, BASE_OF_NODE(m_perfDataIndexMap[pOsContext]));
848
849 osInterface->pfnUnlockResource(
850 osInterface,
851 m_perfStoreBufferMap[pOsContext]);
852
853 m_perfDataCombinedOffset += BASE_OF_NODE(m_perfDataIndexMap[pOsContext]);
854 m_perfDataCombined[24 + m_perfDataCombinedIndex] = BASE_OF_NODE(m_perfDataIndexMap[pOsContext]);
855 m_perfDataCombinedIndex ++;
856
857 if (m_perfDataCombinedOffset == m_perfDataCombinedSize)
858 {
859 MosUtilities::MosWriteFileFromPtr(m_outputFileName.c_str(), m_perfDataCombined, m_perfDataCombinedSize);
860 MOS_SafeFreeMemory(m_perfDataCombined);
861 m_perfDataCombined = nullptr;
862 m_perfDataCombinedIndex = 0;
863 m_perfDataCombinedOffset = 0;
864 m_perfDataCombinedSize = 0;
865 }
866 }
867
868 return status;
869 }
870 else if (m_perfDataIndexMap[pOsContext] > 0)
871 {
872 MOS_LOCK_PARAMS LockFlagsNoOverWrite;
873 MOS_ZeroMemory(&LockFlagsNoOverWrite, sizeof(MOS_LOCK_PARAMS));
874
875 LockFlagsNoOverWrite.WriteOnly = 1;
876 LockFlagsNoOverWrite.NoOverWrite = 1;
877
878 uint8_t* pData = (uint8_t*)osInterface->pfnLockResource(
879 osInterface,
880 m_perfStoreBufferMap[pOsContext],
881 &LockFlagsNoOverWrite);
882
883 CHK_NULL_RETURN(pData);
884
885 if (m_multiprocess)
886 {
887 int32_t pid = MosUtilities::MosGetPid();
888 tm localtime = { 0 };
889 MosUtilities::MosGetLocalTime(&localtime);
890 char outputFileName[MOS_MAX_PATH_LENGTH + 1];
891
892 MOS_SecureStringPrint(outputFileName, MOS_MAX_PATH_LENGTH + 1, MOS_MAX_PATH_LENGTH + 1, "%s-pid%d-context%p-%04d%02d%02d%02d%02d%02d.bin",
893 m_outputFileName.c_str(), pid, pOsContext, localtime.tm_year + 1900, localtime.tm_mon + 1, localtime.tm_mday, localtime.tm_hour, localtime.tm_min, localtime.tm_sec);
894
895 MosUtilities::MosWriteFileFromPtr(outputFileName, pData, BASE_OF_NODE(m_perfDataIndexMap[pOsContext]));
896 }
897 else if (m_mergeheader)
898 {
899 NodeHeader *header = reinterpret_cast<NodeHeader *>(pData);
900 char outputFileName[MOS_MAX_PATH_LENGTH + 1];
901 MOS_SecureStringPrint(outputFileName, MOS_MAX_PATH_LENGTH + 1, MOS_MAX_PATH_LENGTH + 1, "%s-header%u.bin", m_outputFileName.c_str(), *reinterpret_cast<uint32_t*>(header));
902 HANDLE hFile = nullptr;
903 if (MosUtilities::MosCreateFile(&hFile, outputFileName, 0) != MOS_STATUS_SUCCESS)
904 {
905 MosUtilities::MosWriteFileFromPtr(outputFileName, pData, BASE_OF_NODE(m_perfDataIndexMap[pOsContext]));
906 }
907 else
908 {
909 MosUtilities::MosCloseHandle(hFile);
910 MosUtilities::MosAppendFileFromPtr(outputFileName, pData + sizeof(NodeHeader), BASE_OF_NODE(m_perfDataIndexMap[pOsContext]) - sizeof(NodeHeader));
911 }
912 }
913 else
914 {
915 MosUtilities::MosWriteFileFromPtr(m_outputFileName.c_str(), pData, BASE_OF_NODE(m_perfDataIndexMap[pOsContext]));
916 }
917
918 osInterface->pfnUnlockResource(
919 osInterface,
920 m_perfStoreBufferMap[pOsContext]);
921 }
922
923 return status;
924 }
925
GpuContextToGpuNode(MOS_GPU_CONTEXT context)926 PerfGPUNode MediaPerfProfiler::GpuContextToGpuNode(MOS_GPU_CONTEXT context)
927 {
928 PerfGPUNode node = PERF_GPU_NODE_UNKNOW;
929
930 switch (context)
931 {
932 case MOS_GPU_CONTEXT_RENDER:
933 case MOS_GPU_CONTEXT_RENDER2:
934 case MOS_GPU_CONTEXT_RENDER3:
935 case MOS_GPU_CONTEXT_RENDER4:
936 case MOS_GPU_OVERLAY_CONTEXT:
937 case MOS_GPU_CONTEXT_RENDER_RA:
938 node = PERF_GPU_NODE_3D;
939 break;
940 case MOS_GPU_CONTEXT_COMPUTE:
941 case MOS_GPU_CONTEXT_CM_COMPUTE:
942 case MOS_GPU_CONTEXT_COMPUTE_RA:
943 node = PERF_GPU_NODE_3D;
944 break;
945 case MOS_GPU_CONTEXT_VIDEO:
946 case MOS_GPU_CONTEXT_VIDEO2:
947 case MOS_GPU_CONTEXT_VIDEO3:
948 case MOS_GPU_CONTEXT_VIDEO4:
949 case MOS_GPU_CONTEXT_VIDEO5:
950 case MOS_GPU_CONTEXT_VIDEO6:
951 case MOS_GPU_CONTEXT_VIDEO7:
952 node = PERF_GPU_NODE_VIDEO;
953 break;
954 case MOS_GPU_CONTEXT_VDBOX2_VIDEO:
955 case MOS_GPU_CONTEXT_VDBOX2_VIDEO2:
956 case MOS_GPU_CONTEXT_VDBOX2_VIDEO3:
957 node = PERF_GPU_NODE_VIDEO2;
958 break;
959 case MOS_GPU_CONTEXT_VEBOX:
960 case MOS_GPU_CONTEXT_VEBOX2:
961 node = PERF_GPU_NODE_VE;
962 break;
963 case MOS_GPU_CONTEXT_BLT:
964 node = PERF_GPU_NODE_BLT;
965 break;
966 case MOS_GPU_CONTEXT_TEE:
967 node = PERF_GPU_NODE_TEE;
968 break;
969 default:
970 node = PERF_GPU_NODE_UNKNOW;
971 break;
972 }
973
974 return node;
975 }
976
PlatFormIdMap(PLATFORM platform)977 uint32_t MediaPerfProfiler::PlatFormIdMap(PLATFORM platform)
978 {
979 uint32_t perfPlatFormId = 0;
980
981 if (GFX_GET_CURRENT_RENDERCORE(platform) > IGFX_GEN12LP_CORE)
982 {
983 perfPlatFormId = ((((uint32_t)(GFX_GET_CURRENT_RENDERCORE(platform)) >> 8) - 0xc) << 2) + (GFX_GET_CURRENT_RENDERCORE(platform) & 0x3) + (uint32_t)(IGFX_GEN12LP_CORE);
984 }
985 else
986 {
987 perfPlatFormId = (uint32_t)(GFX_GET_CURRENT_RENDERCORE(platform));
988 }
989
990 return perfPlatFormId;
991 }
992
IsPerfModeWidthMemInfo(uint32_t * regs)993 bool MediaPerfProfiler::IsPerfModeWidthMemInfo(uint32_t *regs)
994 {
995 int8_t index = 0;
996 bool ret = false;
997
998 for (index = 0; index < 8; index++)
999 {
1000 if (regs[index] != 0)
1001 {
1002 ret = true;
1003 break;
1004 }
1005 }
1006
1007 return ret;
1008 }
1009