1 /*
2 * Copyright (c) 2021-2023, Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22 //!
23 //! \file     media_perf_profiler.cpp
24 //! \brief    Defines data structures and interfaces for media performance profiler.
25 //! \details
26 //!
27 
28 #include <stddef.h>
29 #include "media_perf_profiler.h"
30 #include "media_skuwa_specific.h"
31 #include "mhw_itf.h"
32 #include "mhw_mi.h"
33 #include "mhw_mi_cmdpar.h"
34 #include "mhw_mi_itf.h"
35 #include "mos_resource_defs.h"
36 #include "mos_util_debug.h"
37 #include "mos_utilities.h"
38 #include "mos_utilities_common.h"
39 
40 #define UMD_PERF_LOG            8
41 #define NAME_LEN                60
42 #define LOCAL_STRING_SIZE       64
43 #define OFFSET_OF(TYPE, MEMBER) ((size_t) & ((TYPE *)0)->MEMBER )
44 
45 typedef enum _UMD_PERF_MODE
46 {
47     UMD_PERF_MODE_TIMING_ONLY      = 0,
48     UMD_PERF_MODE_WITH_MEMORY_INFO = 4
49 } UMD_PERF_MODE;
50 
51 #pragma pack(push)
52 #pragma pack(8)
53 struct PerfEntry
54 {
55     uint32_t    nodeIndex;                  //!< Perf node index
56     uint32_t    processId;                  //!< Process Id
57     uint32_t    instanceId;                 //!< Instance Id
58     uint32_t    engineTag;                  //!< Engine tag
59     uint32_t    perfTag;                    //!< Performance tag
60     uint32_t    timeStampBase;              //!< HW timestamp base
61     uint32_t    beginRegisterValue[8];      //!< Begin register value
62     uint32_t    endRegisterValue[8];        //!< End register value
63     uint32_t    beginCpuTime[2];            //!< Begin CPU Time Stamp
64     uint32_t    bitstreamSize;              //!< frame level: bitstreamSize
65     uint32_t    SSEY;                       //!< frame level: SSEY
66     uint32_t    SSEU;                       //!< frame level: SSEU
67     uint32_t    SSEV;                       //!< frame level: SSEV
68     union
69     {
70         uint32_t DWMeanSsimLayer1_YU;
71         struct
72         {
73             uint32_t MeanSsimLayer1_Y : 12,  // [11:0]
74                 DW3_Res_15_12 : 4,           // [15:12]
75                 MeanSsimLayer1_U : 12,       // [27:16]
76                 DW3_Res_31_18 : 4;           // [31:28]
77         };
78     };
79     union
80     {
81         uint32_t DWMeanSsimLayer1_V;
82         struct
83         {
84             uint32_t MeanSsimLayer1_V : 12,  // [11:0]
85                 DW4_Res_15_12 : 4,           // [15:12]
86                 MeanSsimLayer1Part_Y : 12,   // [27:16]
87                 DW4_Res_31_18 : 4;           // [31:28]
88         };
89     };
90     uint32_t    reserved[8];                //!< Reserved[8]
91     uint64_t    beginTimeClockValue;        //!< Begin timestamp
92     uint64_t    endTimeClockValue;          //!< End timestamp
93 };
94 #pragma pack(pop)
95 
96 struct NodeHeader
97 {
98     uint32_t osPlatform  : 3;
99     uint32_t genPlatform : 3;
100     uint32_t eventType   : 4;
101     uint32_t perfMode    : 3;
102     uint32_t genAndroid  : 4;
103     uint32_t genPlatform_ext : 2;
104     uint32_t reserved    : 13;
105 };
106 
107 #define BASE_OF_NODE(perfDataIndex) (sizeof(NodeHeader) + (sizeof(PerfEntry) * perfDataIndex))
108 
109 #define CHK_STATUS_RETURN(_stmt)                   \
110 {                                                  \
111     MOS_STATUS stmtStatus = (MOS_STATUS)(_stmt);   \
112     if (stmtStatus != MOS_STATUS_SUCCESS)          \
113     {                                              \
114         return stmtStatus;                         \
115     }                                              \
116 }
117 
118 #define CHK_NULL_RETURN(_ptr)                      \
119 {                                                  \
120     if ((_ptr) == nullptr)                         \
121     {                                              \
122         return MOS_STATUS_NULL_POINTER;            \
123     }                                              \
124 }
125 
126 #define CHK_NULL_NO_STATUS_RETURN(_ptr)            \
127 {                                                  \
128     if ((_ptr) == nullptr)                         \
129     {                                              \
130         return;                                    \
131     }                                              \
132 }
133 
134 #define CHK_STATUS_UNLOCK_MUTEX_RETURN(_stmt)      \
135 {                                                  \
136     MOS_STATUS stmtStatus = (MOS_STATUS)(_stmt);   \
137     if (stmtStatus != MOS_STATUS_SUCCESS)          \
138     {                                              \
139         MosUtilities::MosUnlockMutex(m_mutex);     \
140         return stmtStatus;                         \
141     }                                              \
142 }
143 
144 #define CHK_NULL_UNLOCK_MUTEX_RETURN(_ptr)         \
145 {                                                  \
146     if ((_ptr) == nullptr)                         \
147     {                                              \
148         MosUtilities::MosUnlockMutex(m_mutex);     \
149         return MOS_STATUS_NULL_POINTER;            \
150     }                                              \
151 }
152 
MediaPerfProfiler()153 MediaPerfProfiler::MediaPerfProfiler()
154 {
155     m_perfStoreBufferMap.clear();
156     m_perfDataIndexMap.clear();
157     m_refMap.clear();
158     m_initializedMap.clear();
159 
160     m_profilerEnabled = 0;
161 
162     m_mutex = MosUtilities::MosCreateMutex();
163 
164     if (m_mutex)
165     {
166         // m_mutex is destroyed after MemNinja report, this will cause fake memory leak,
167         // the following 2 lines is to circumvent Memninja counter validation and log parser
168         MosUtilities::MosAtomicDecrement(MosUtilities::m_mosMemAllocCounter);
169         MOS_MEMNINJA_FREE_MESSAGE(m_mutex, __FUNCTION__, __FILE__, __LINE__);
170         PRINT_DESTROY_MEMORY(MT_MOS_DESTROY_MEMORY, MT_NORMAL, MT_MEMORY_PTR, (int64_t)(m_mutex), __FUNCTION__, __FILE__, __LINE__);
171     }
172     else
173     {
174         MOS_OS_ASSERTMESSAGE("Create Mutex failed!");
175     }
176 }
177 
~MediaPerfProfiler()178 MediaPerfProfiler::~MediaPerfProfiler()
179 {
180     if (m_mutex != nullptr)
181     {
182         MosUtilities::MosDestroyMutex(m_mutex);
183         m_mutex = nullptr;
184     }
185 }
186 
Instance()187 MediaPerfProfiler* MediaPerfProfiler::Instance()
188 {
189     static MediaPerfProfiler instance;
190     if (!instance.m_mutex && instance.m_profilerEnabled)
191     {
192         MOS_OS_ASSERTMESSAGE("Create MediaPerfProfiler failed!");
193         return nullptr;
194     }
195     else
196     {
197         return &instance;
198     }
199 }
200 
Destroy(MediaPerfProfiler * profiler,void * context,MOS_INTERFACE * osInterface)201 void MediaPerfProfiler::Destroy(MediaPerfProfiler* profiler, void* context, MOS_INTERFACE *osInterface)
202 {
203     PERF_UTILITY_PRINT;
204 
205     CHK_NULL_NO_STATUS_RETURN(profiler);
206     CHK_NULL_NO_STATUS_RETURN(osInterface);
207 
208     if (profiler->m_profilerEnabled == 0 || profiler->m_mutex == nullptr)
209     {
210         return;
211     }
212 
213     PMOS_CONTEXT pOsContext = osInterface->pOsContext;
214     CHK_NULL_NO_STATUS_RETURN(pOsContext);
215     MosUtilities::MosLockMutex(profiler->m_mutex);
216     if (profiler->m_refMap[pOsContext] > 0)
217     {
218         profiler->m_refMap[pOsContext]--;
219     }
220     osInterface->pfnWaitAllCmdCompletion(osInterface);
221 
222     profiler->m_contextIndexMap.erase(context);
223 
224     if (profiler->m_refMap[pOsContext] == 0)
225     {
226         if (profiler->m_initializedMap[pOsContext] == true)
227         {
228             if(profiler->m_enableProfilerDump)
229             {
230                 profiler->SavePerfData(osInterface);
231             }
232 
233             osInterface->pfnFreeResource(
234                 osInterface,
235                 profiler->m_perfStoreBufferMap[pOsContext]);
236 
237             MOS_FreeMemAndSetNull(profiler->m_perfStoreBufferMap[pOsContext]);
238 
239             profiler->m_perfStoreBufferMap.erase(pOsContext);
240             profiler->m_initializedMap.erase(pOsContext);
241             profiler->m_refMap.erase(pOsContext);
242             profiler->m_perfDataIndexMap.erase(pOsContext);
243         }
244 
245         MosUtilities::MosUnlockMutex(profiler->m_mutex);
246     }
247     else
248     {
249         MosUtilities::MosUnlockMutex(profiler->m_mutex);
250     }
251 }
252 
Initialize(void * context,MOS_INTERFACE * osInterface)253 MOS_STATUS MediaPerfProfiler::Initialize(void* context, MOS_INTERFACE *osInterface)
254 {
255     MOS_STATUS status = MOS_STATUS_SUCCESS;
256     CHK_NULL_RETURN(osInterface);
257     CHK_NULL_RETURN(m_mutex);
258 
259     PMOS_CONTEXT pOsContext = osInterface->pOsContext;
260     CHK_NULL_RETURN(pOsContext);
261     MediaUserSettingSharedPtr userSettingPtr = osInterface->pfnGetUserSettingInstance(osInterface);
262     // Check whether profiler is enabled
263     ReadUserSetting(
264         userSettingPtr,
265         m_profilerEnabled,
266         __MEDIA_USER_FEATURE_VALUE_PERF_PROFILER_ENABLE,
267         MediaUserSetting::Group::Device);
268 
269     if (m_profilerEnabled == 0 || m_mutex == nullptr)
270     {
271         return MOS_STATUS_SUCCESS;
272     }
273 
274     MosUtilities::MosLockMutex(m_mutex);
275 
276     m_contextIndexMap[context] = 0;
277 
278     if (m_initializedMap[pOsContext] == true)
279     {
280         MosUtilities::MosUnlockMutex(m_mutex);
281         return status;
282     }
283 
284     m_refMap[pOsContext]++;
285 
286     m_enableProfilerDump = MosUtilities::MosIsProfilerDumpEnabled();
287 
288     // Read output file name
289     status = ReadUserSetting(
290                 userSettingPtr,
291                 m_outputFileName,
292                 __MEDIA_USER_FEATURE_VALUE_PERF_PROFILER_OUTPUT_FILE_NAME,
293                 MediaUserSetting::Group::Device);
294     if (status != MOS_STATUS_SUCCESS)
295     {
296         MosUtilities::MosUnlockMutex(m_mutex);
297         return status;
298     }
299 
300     // Read buffer size
301     ReadUserSetting(
302             userSettingPtr,
303             m_bufferSize,
304             __MEDIA_USER_FEATURE_VALUE_PERF_PROFILER_BUFFER_SIZE_KEY,
305             MediaUserSetting::Group::Device);
306 
307     m_timerBase = osInterface->pfnGetTsFrequency(osInterface);
308 
309     // Read multi processes support
310     ReadUserSetting(
311         userSettingPtr,
312         m_multiprocess,
313         __MEDIA_USER_FEATURE_VALUE_PERF_PROFILER_ENABLE_MUL_PROC,
314         MediaUserSetting::Group::Device);
315 
316     // Read multi header support
317     ReadUserSetting(
318         userSettingPtr,
319         m_mergeheader,
320         __MEDIA_USER_FEATURE_VALUE_PERF_PROFILER_ENABLE_MER_HEADER,
321         MediaUserSetting::Group::Device);
322 
323     // Read memory information register address
324     int8_t regIndex = 0;
325     for (regIndex = 0; regIndex < 8; regIndex++)
326     {
327         ReadUserSetting(
328             userSettingPtr,
329             m_registers[regIndex],
330             m_registersKey[regIndex],
331             MediaUserSetting::Group::Device);
332     }
333 
334     // Read multi processes single binary flag
335     ReadUserSetting(
336         userSettingPtr,
337         m_multiprocesssinglebin,
338         __MEDIA_USER_FEATURE_VALUE_PERF_PROFILER_MUL_PROC_SINGLE_BIN,
339         MediaUserSetting::Group::Device);
340 
341     PMOS_RESOURCE  pPerfStoreBuffer = (PMOS_RESOURCE)MOS_AllocAndZeroMemory(sizeof(MOS_RESOURCE));
342     m_perfStoreBufferMap[pOsContext] = pPerfStoreBuffer;
343     // Allocate the buffer which store the performance data
344     MOS_ALLOC_GFXRES_PARAMS allocParams;
345     MOS_ZeroMemory(&allocParams, sizeof(MOS_ALLOC_GFXRES_PARAMS));
346     allocParams.Type        = MOS_GFXRES_BUFFER;
347     allocParams.TileType    = MOS_TILE_LINEAR;
348     allocParams.Format      = Format_Buffer;
349     allocParams.dwBytes     = m_bufferSize;
350     allocParams.pBufName    = "PerfStoreBuffer";
351 
352     status = osInterface->pfnAllocateResource(
353                                         osInterface,
354                                         &allocParams,
355                                         pPerfStoreBuffer);
356 
357     CHK_STATUS_UNLOCK_MUTEX_RETURN(status);
358 
359     CHK_STATUS_UNLOCK_MUTEX_RETURN(
360         osInterface->pfnSkipResourceSync(pPerfStoreBuffer));
361 
362     PLATFORM platform = { IGFX_UNKNOWN };
363     osInterface->pfnGetPlatform(osInterface, &platform);
364 
365     MOS_LOCK_PARAMS lockFlags;
366     MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
367     lockFlags.WriteOnly   = 1;
368 
369     NodeHeader* header = (NodeHeader*)osInterface->pfnLockResource(
370             osInterface,
371             pPerfStoreBuffer,
372             &lockFlags);
373 
374     CHK_NULL_UNLOCK_MUTEX_RETURN(header);
375 
376     // Append the header info
377     MOS_ZeroMemory(header, m_bufferSize);
378     header->eventType   = UMD_PERF_LOG;
379 
380     uint32_t mappedPlatFormId = PlatFormIdMap(platform);
381     header->genPlatform = (mappedPlatFormId - 8) & 0x7;
382     header->genPlatform_ext = ((mappedPlatFormId - 8) >> 3) & 0x3;
383 
384     if (IsPerfModeWidthMemInfo(m_registers))
385     {
386         header->perfMode    = UMD_PERF_MODE_WITH_MEMORY_INFO;
387     }
388     else
389     {
390         header->perfMode    = UMD_PERF_MODE_TIMING_ONLY;
391     }
392 
393     osInterface->pfnUnlockResource(
394             osInterface,
395             pPerfStoreBuffer);
396 
397     m_initializedMap[pOsContext] = true;
398 
399     MosUtilities::MosUnlockMutex(m_mutex);
400 
401     return MOS_STATUS_SUCCESS;
402 }
403 
StoreData(std::shared_ptr<mhw::mi::Itf> miItf,PMOS_COMMAND_BUFFER cmdBuffer,MOS_CONTEXT_HANDLE pOsContext,uint32_t offset,uint32_t value)404 MOS_STATUS MediaPerfProfiler::StoreData(
405     std::shared_ptr<mhw::mi::Itf> miItf,
406     PMOS_COMMAND_BUFFER           cmdBuffer,
407     MOS_CONTEXT_HANDLE            pOsContext,
408     uint32_t                      offset,
409     uint32_t                      value)
410 {
411     CHK_NULL_RETURN(miItf);
412 
413     auto& storeDataParams            = miItf->MHW_GETPAR_F(MI_STORE_DATA_IMM)();
414     storeDataParams                  = {};
415     storeDataParams.pOsResource      = m_perfStoreBufferMap[(PMOS_CONTEXT)pOsContext];
416     storeDataParams.dwResourceOffset = offset;
417     storeDataParams.dwValue          = value;
418     CHK_STATUS_RETURN(miItf->MHW_ADDCMD_F(MI_STORE_DATA_IMM)(cmdBuffer));
419 
420     return MOS_STATUS_SUCCESS;
421 }
422 
StoreRegister(MOS_INTERFACE * osInterface,std::shared_ptr<mhw::mi::Itf> miItf,PMOS_COMMAND_BUFFER cmdBuffer,uint32_t offset,uint32_t reg)423 MOS_STATUS MediaPerfProfiler::StoreRegister(
424     MOS_INTERFACE                 *osInterface,
425     std::shared_ptr<mhw::mi::Itf> miItf,
426     PMOS_COMMAND_BUFFER           cmdBuffer,
427     uint32_t                      offset,
428     uint32_t                      reg)
429 {
430     CHK_NULL_RETURN(osInterface);
431     CHK_NULL_RETURN(miItf);
432 
433     auto& storeRegMemParams           = miItf->MHW_GETPAR_F(MI_STORE_REGISTER_MEM)();
434     storeRegMemParams                 = {};
435     storeRegMemParams.presStoreBuffer = m_perfStoreBufferMap[osInterface->pOsContext];
436     storeRegMemParams.dwOffset        = offset;
437     storeRegMemParams.dwRegister      = reg;
438 
439     MEDIA_FEATURE_TABLE* skuTable = osInterface->pfnGetSkuTable(osInterface);
440     if(skuTable && MEDIA_IS_SKU(skuTable, FtrMemoryRemapSupport))
441     {
442         storeRegMemParams.dwOption = CCS_HW_FRONT_END_MMIO_REMAP;
443     }
444 
445     CHK_STATUS_RETURN(miItf->MHW_ADDCMD_F(MI_STORE_REGISTER_MEM)(cmdBuffer));
446 
447     return MOS_STATUS_SUCCESS;
448 }
449 
StoreTSByPipeCtrl(std::shared_ptr<mhw::mi::Itf> miItf,PMOS_COMMAND_BUFFER cmdBuffer,MOS_CONTEXT_HANDLE pOsContext,uint32_t offset)450 MOS_STATUS MediaPerfProfiler::StoreTSByPipeCtrl(
451     std::shared_ptr<mhw::mi::Itf> miItf,
452     PMOS_COMMAND_BUFFER           cmdBuffer,
453     MOS_CONTEXT_HANDLE            pOsContext,
454     uint32_t                      offset)
455 {
456     CHK_NULL_RETURN(miItf);
457 
458     auto& PipeControlParams            = miItf->MHW_GETPAR_F(PIPE_CONTROL)();
459     PipeControlParams                  = {};
460     PipeControlParams.dwResourceOffset = offset;
461     PipeControlParams.dwPostSyncOp     = MHW_FLUSH_WRITE_TIMESTAMP_REG;
462     PipeControlParams.dwFlushMode      = MHW_FLUSH_READ_CACHE;
463     PipeControlParams.presDest         = m_perfStoreBufferMap[(PMOS_CONTEXT)pOsContext];
464 
465     CHK_STATUS_RETURN(miItf->MHW_ADDCMD_F(PIPE_CONTROL)(cmdBuffer));
466 
467     return MOS_STATUS_SUCCESS;
468 }
469 
StoreTSByMiFlush(std::shared_ptr<mhw::mi::Itf> miItf,PMOS_COMMAND_BUFFER cmdBuffer,MOS_CONTEXT_HANDLE pOsContext,uint32_t offset)470 MOS_STATUS MediaPerfProfiler::StoreTSByMiFlush(
471     std::shared_ptr<mhw::mi::Itf> miItf,
472     PMOS_COMMAND_BUFFER           cmdBuffer,
473     MOS_CONTEXT_HANDLE            pOsContext,
474     uint32_t                      offset)
475 {
476     CHK_NULL_RETURN(miItf);
477 
478     auto& FlushDwParams             = miItf->MHW_GETPAR_F(MI_FLUSH_DW)();
479     FlushDwParams                   = {};
480     FlushDwParams.postSyncOperation = MHW_FLUSH_WRITE_TIMESTAMP_REG;
481     FlushDwParams.dwResourceOffset  = offset;
482     FlushDwParams.pOsResource       = m_perfStoreBufferMap[(PMOS_CONTEXT)pOsContext];
483 
484     CHK_STATUS_RETURN(miItf->MHW_ADDCMD_F(MI_FLUSH_DW)(cmdBuffer));
485 
486     return MOS_STATUS_SUCCESS;
487 }
488 
AddPerfCollectStartCmd(void * context,MOS_INTERFACE * osInterface,std::shared_ptr<mhw::mi::Itf> miItf,MOS_COMMAND_BUFFER * cmdBuffer)489 MOS_STATUS MediaPerfProfiler::AddPerfCollectStartCmd(
490     void                          *context,
491     MOS_INTERFACE                 *osInterface,
492     std::shared_ptr<mhw::mi::Itf> miItf,
493     MOS_COMMAND_BUFFER            *cmdBuffer)
494 {
495     MOS_STATUS status = MOS_STATUS_SUCCESS;
496 
497     CHK_NULL_RETURN(osInterface);
498     CHK_NULL_RETURN(miItf);
499     CHK_NULL_RETURN(cmdBuffer);
500     CHK_NULL_RETURN(m_mutex);
501 
502     PMOS_CONTEXT pOsContext = osInterface->pOsContext;
503     CHK_NULL_RETURN(pOsContext);
504 
505     if (m_profilerEnabled == 0 || m_initializedMap[pOsContext] == false)
506     {
507         return status;
508     }
509 
510     uint32_t perfDataIndex = 0;
511 
512     MosUtilities::MosLockMutex(m_mutex);
513 
514     perfDataIndex = m_perfDataIndexMap[pOsContext];
515     m_perfDataIndexMap[pOsContext]++;
516     m_contextIndexMap[context] = perfDataIndex;
517 
518     MosUtilities::MosUnlockMutex(m_mutex);
519 
520     bool             rcsEngineUsed = false;
521     MOS_GPU_CONTEXT  gpuContext;
522 
523     gpuContext     = osInterface->pfnGetGpuContext(osInterface);
524     rcsEngineUsed = MOS_RCS_ENGINE_USED(gpuContext);
525 
526     if (m_multiprocess)
527     {
528         CHK_STATUS_RETURN(StoreData(
529             miItf,
530             cmdBuffer,
531             pOsContext,
532             BASE_OF_NODE(perfDataIndex) + OFFSET_OF(PerfEntry, processId),
533             MosUtilities::MosGetPid()));
534     }
535 
536     CHK_STATUS_RETURN(StoreData(
537         miItf,
538         cmdBuffer,
539         pOsContext,
540         BASE_OF_NODE(perfDataIndex) + OFFSET_OF(PerfEntry, perfTag),
541         osInterface->pfnGetPerfTag(osInterface)));
542 
543     CHK_STATUS_RETURN(StoreData(
544         miItf,
545         cmdBuffer,
546         pOsContext,
547         BASE_OF_NODE(perfDataIndex) + OFFSET_OF(PerfEntry, engineTag),
548         GpuContextToGpuNode(gpuContext)));
549 
550     if (m_timerBase != 0)
551     {
552         CHK_STATUS_RETURN(StoreData(
553             miItf,
554             cmdBuffer,
555             pOsContext,
556             BASE_OF_NODE(perfDataIndex) + OFFSET_OF(PerfEntry, timeStampBase),
557             m_timerBase));
558     }
559 
560     int8_t regIndex = 0;
561     for (regIndex = 0; regIndex < 8; regIndex++)
562     {
563         if (m_registers[regIndex] != 0)
564         {
565             CHK_STATUS_RETURN(StoreRegister(
566                 osInterface,
567                 miItf,
568                 cmdBuffer,
569                 BASE_OF_NODE(perfDataIndex) + OFFSET_OF(PerfEntry, beginRegisterValue[regIndex]),
570                 m_registers[regIndex]));
571         }
572     }
573 
574     uint64_t beginCPUTimestamp = MosUtilities::MosGetCurTime();
575     uint32_t timeStamp[2];
576     MOS_SecureMemcpy(timeStamp, 2*sizeof(uint32_t), &beginCPUTimestamp, 2*sizeof(uint32_t));
577 
578     for (int i = 0; i < 2; i++)
579     {
580         CHK_STATUS_RETURN(StoreData(
581             miItf,
582             cmdBuffer,
583             pOsContext,
584             BASE_OF_NODE(perfDataIndex) + OFFSET_OF(PerfEntry, beginCpuTime[i]),
585             timeStamp[i]));
586     }
587 
588     // The address of timestamp must be 8 bytes aligned.
589     uint32_t offset = BASE_OF_NODE(perfDataIndex) + OFFSET_OF(PerfEntry, beginTimeClockValue);
590     offset = MOS_ALIGN_CEIL(offset, 8);
591 
592     if (rcsEngineUsed)
593     {
594         CHK_STATUS_RETURN(StoreTSByPipeCtrl(
595             miItf,
596             cmdBuffer,
597             pOsContext,
598             offset));
599     }
600     else
601     {
602         CHK_STATUS_RETURN(StoreTSByMiFlush(
603             miItf,
604             cmdBuffer,
605             pOsContext,
606             offset));
607     }
608 
609     return status;
610 }
611 
AddPerfCollectEndCmd(void * context,MOS_INTERFACE * osInterface,std::shared_ptr<mhw::mi::Itf> miItf,MOS_COMMAND_BUFFER * cmdBuffer)612 MOS_STATUS MediaPerfProfiler::AddPerfCollectEndCmd(
613     void                          *context,
614     MOS_INTERFACE                 *osInterface,
615     std::shared_ptr<mhw::mi::Itf> miItf,
616     MOS_COMMAND_BUFFER            *cmdBuffer)
617 {
618     MOS_STATUS       status        = MOS_STATUS_SUCCESS;
619 
620     CHK_NULL_RETURN(osInterface);
621     CHK_NULL_RETURN(miItf);
622     CHK_NULL_RETURN(cmdBuffer);
623 
624     PMOS_CONTEXT pOsContext = osInterface->pOsContext;
625     CHK_NULL_RETURN(pOsContext);
626 
627     if (m_profilerEnabled == 0 || m_initializedMap[pOsContext] == false)
628     {
629         return status;
630     }
631 
632     MOS_GPU_CONTEXT  gpuContext;
633     bool             rcsEngineUsed = false;
634     uint32_t         perfDataIndex = 0;
635 
636     gpuContext     = osInterface->pfnGetGpuContext(osInterface);
637     rcsEngineUsed = MOS_RCS_ENGINE_USED(gpuContext);
638 
639     perfDataIndex = m_contextIndexMap[context];
640 
641     int8_t regIndex = 0;
642     for (regIndex = 0; regIndex < 8; regIndex++)
643     {
644         if (m_registers[regIndex] != 0)
645         {
646             CHK_STATUS_RETURN(StoreRegister(
647                 osInterface,
648                 miItf,
649                 cmdBuffer,
650                 BASE_OF_NODE(perfDataIndex) + OFFSET_OF(PerfEntry, endRegisterValue[regIndex]),
651                 m_registers[regIndex]));
652         }
653     }
654 
655     // The address of timestamp must be 8 bytes aligned.
656     uint32_t offset = BASE_OF_NODE(perfDataIndex) + OFFSET_OF(PerfEntry, endTimeClockValue);
657     offset = MOS_ALIGN_CEIL(offset, 8);
658 
659     if (rcsEngineUsed)
660     {
661         CHK_STATUS_RETURN(StoreTSByPipeCtrl(
662             miItf,
663             cmdBuffer,
664             pOsContext,
665             offset));
666     }
667     else
668     {
669         CHK_STATUS_RETURN(StoreTSByMiFlush(
670             miItf,
671             cmdBuffer,
672             pOsContext,
673             offset));
674     }
675 
676     return status;
677 }
678 
AddStoreBitstreamSizeCmd(void * context,MOS_INTERFACE * osInterface,std::shared_ptr<mhw::mi::Itf> & miItf,MOS_COMMAND_BUFFER * cmdBuffer,uint32_t reg)679 MOS_STATUS MediaPerfProfiler::AddStoreBitstreamSizeCmd(
680     void                           *context,
681     MOS_INTERFACE                  *osInterface,
682     std::shared_ptr<mhw::mi::Itf>& miItf,
683     MOS_COMMAND_BUFFER             *cmdBuffer,
684     uint32_t                       reg)
685 {
686     MOS_STATUS status = MOS_STATUS_SUCCESS;
687 
688     if (m_profilerEnabled == 0)
689     {
690         return status;
691     }
692 
693     CHK_NULL_RETURN(context);
694     CHK_NULL_RETURN(osInterface);
695     CHK_NULL_RETURN(miItf);
696     CHK_NULL_RETURN(cmdBuffer);
697 
698     PMOS_CONTEXT pOsContext = osInterface->pOsContext;
699     CHK_NULL_RETURN(pOsContext);
700 
701     uint32_t perfDataIndex = m_contextIndexMap[context];
702 
703     CHK_STATUS_RETURN(StoreRegister(
704         osInterface,
705         miItf,
706         cmdBuffer,
707         BASE_OF_NODE(perfDataIndex) + OFFSET_OF(PerfEntry, bitstreamSize),
708         reg));
709 
710     return status;
711 }
712 
CopyMemData(std::shared_ptr<mhw::mi::Itf> & miItf,PMOS_COMMAND_BUFFER cmdBuffer,MOS_CONTEXT_HANDLE pOsContext,PMOS_RESOURCE presSrc,uint32_t dwSrcOffset,uint32_t dwDstOffset)713 MOS_STATUS MediaPerfProfiler::CopyMemData(
714     std::shared_ptr<mhw::mi::Itf>& miItf,
715     PMOS_COMMAND_BUFFER            cmdBuffer,
716     MOS_CONTEXT_HANDLE             pOsContext,
717     PMOS_RESOURCE                  presSrc,
718     uint32_t                       dwSrcOffset,
719     uint32_t                       dwDstOffset)
720 {
721     CHK_NULL_RETURN(miItf);
722 
723     auto &miCpyMemMemParams = miItf->MHW_GETPAR_F(MI_COPY_MEM_MEM)();
724     miCpyMemMemParams       = {};
725 
726     miCpyMemMemParams.presSrc     = presSrc;
727     miCpyMemMemParams.dwSrcOffset = dwSrcOffset;
728     miCpyMemMemParams.presDst     = m_perfStoreBufferMap[(PMOS_CONTEXT)pOsContext];
729     miCpyMemMemParams.dwDstOffset = dwDstOffset;
730     CHK_STATUS_RETURN(miItf->MHW_ADDCMD_F(MI_COPY_MEM_MEM)(cmdBuffer));
731 
732     return MOS_STATUS_SUCCESS;
733 }
734 
AddCopyQualityMetricCmd(void * context,MOS_INTERFACE * osInterface,std::shared_ptr<mhw::mi::Itf> & miItf,MOS_COMMAND_BUFFER * cmdBuffer,UMD_QUALITY_METRIC_ITEM item,PMOS_RESOURCE presSrc,uint32_t dwSrcOffset)735 MOS_STATUS MediaPerfProfiler::AddCopyQualityMetricCmd(
736     void                           *context,
737     MOS_INTERFACE                  *osInterface,
738     std::shared_ptr<mhw::mi::Itf>& miItf,
739     MOS_COMMAND_BUFFER             *cmdBuffer,
740     UMD_QUALITY_METRIC_ITEM        item,
741     PMOS_RESOURCE                  presSrc,
742     uint32_t                       dwSrcOffset)
743 {
744     MOS_STATUS status = MOS_STATUS_SUCCESS;
745     uint32_t   offset = 0;
746 
747     if (m_profilerEnabled == 0)
748     {
749         return status;
750     }
751 
752     CHK_NULL_RETURN(context);
753     CHK_NULL_RETURN(osInterface);
754     CHK_NULL_RETURN(miItf);
755     CHK_NULL_RETURN(cmdBuffer);
756     CHK_NULL_RETURN(presSrc);
757 
758     PMOS_CONTEXT pOsContext = osInterface->pOsContext;
759     CHK_NULL_RETURN(pOsContext);
760 
761     uint32_t perfDataIndex = m_contextIndexMap[context];
762 
763     switch (item)
764     {
765         case UMD_QUALITY_ITEM_SSEY:
766             offset = BASE_OF_NODE(perfDataIndex) + OFFSET_OF(PerfEntry, SSEY);
767             break;
768         case UMD_QUALITY_ITEM_SSEU:
769             offset = BASE_OF_NODE(perfDataIndex) + OFFSET_OF(PerfEntry, SSEU);
770             break;
771         case UMD_QUALITY_ITEM_SSEV:
772             offset = BASE_OF_NODE(perfDataIndex) + OFFSET_OF(PerfEntry, SSEV);
773             break;
774         case UMD_QUALITY_ITEM_MEAN_SSIM_YU:
775             offset = BASE_OF_NODE(perfDataIndex) + OFFSET_OF(PerfEntry, DWMeanSsimLayer1_YU);
776             break;
777         case UMD_QUALITY_ITEM_MEAN_SSIM_V:
778             offset = BASE_OF_NODE(perfDataIndex) + OFFSET_OF(PerfEntry, DWMeanSsimLayer1_V);
779             break;
780         default:
781             status = MOS_STATUS_INVALID_PARAMETER;
782             break;
783     }
784 
785     if (status == MOS_STATUS_SUCCESS)
786     {
787         CHK_STATUS_RETURN(CopyMemData(miItf, cmdBuffer, pOsContext, presSrc, dwSrcOffset, offset));
788     }
789 
790     return status;
791 }
792 
SavePerfData(MOS_INTERFACE * osInterface)793 MOS_STATUS MediaPerfProfiler::SavePerfData(MOS_INTERFACE *osInterface)
794 {
795     MOS_STATUS status = MOS_STATUS_SUCCESS;
796 
797     CHK_NULL_RETURN(osInterface);
798 
799     PMOS_CONTEXT pOsContext = osInterface->pOsContext;
800     CHK_NULL_RETURN(pOsContext);
801 
802     if (m_multiprocesssinglebin)
803     {
804         uint32_t        cnt                     = 0;
805         MOS_LOCK_PARAMS LockFlagsNoOverWrite    = {};
806 
807         MOS_ZeroMemory(&LockFlagsNoOverWrite, sizeof(MOS_LOCK_PARAMS));
808         LockFlagsNoOverWrite.WriteOnly = 1;
809         LockFlagsNoOverWrite.NoOverWrite = 1;
810 
811         if (m_perfDataCombined == nullptr)
812         {
813             m_perfDataCombinedSize  = 96;
814 
815             for (auto iter = m_perfDataIndexMap.begin(); iter != m_perfDataIndexMap.end(); ++iter)
816             {
817                 if (iter->second > 0)
818                 {
819                     m_perfDataCombinedSize += BASE_OF_NODE(m_perfDataIndexMap[iter->first]) + 4;
820                     cnt += 1;
821                 }
822             }
823 
824             if (cnt == 0)
825             {
826                 return status;
827             }
828 
829             m_perfDataCombined = (uint32_t *)MOS_AllocAndZeroMemory(m_perfDataCombinedSize);
830             CHK_NULL_RETURN(m_perfDataCombined);
831 
832             m_perfDataCombined[0] = 0x8086;
833             m_perfDataCombined[2] = m_perfDataCombinedSize - (cnt * 4);
834             m_perfDataCombined[3] = cnt;
835 
836             m_perfDataCombinedOffset = 96 + (cnt * 4);
837         }
838 
839         if (m_perfDataIndexMap[pOsContext] > 0)
840         {
841             uint8_t* pData = (uint8_t*)osInterface->pfnLockResource(
842                 osInterface,
843                 m_perfStoreBufferMap[pOsContext],
844                 &LockFlagsNoOverWrite);
845 
846             CHK_NULL_RETURN(pData);
847             MOS_SecureMemcpy(((uint8_t *)m_perfDataCombined) + m_perfDataCombinedOffset, BASE_OF_NODE(m_perfDataIndexMap[pOsContext]), pData, BASE_OF_NODE(m_perfDataIndexMap[pOsContext]));
848 
849             osInterface->pfnUnlockResource(
850                 osInterface,
851                 m_perfStoreBufferMap[pOsContext]);
852 
853             m_perfDataCombinedOffset += BASE_OF_NODE(m_perfDataIndexMap[pOsContext]);
854             m_perfDataCombined[24 + m_perfDataCombinedIndex] = BASE_OF_NODE(m_perfDataIndexMap[pOsContext]);
855             m_perfDataCombinedIndex ++;
856 
857             if (m_perfDataCombinedOffset == m_perfDataCombinedSize)
858             {
859                 MosUtilities::MosWriteFileFromPtr(m_outputFileName.c_str(), m_perfDataCombined, m_perfDataCombinedSize);
860                 MOS_SafeFreeMemory(m_perfDataCombined);
861                 m_perfDataCombined = nullptr;
862                 m_perfDataCombinedIndex = 0;
863                 m_perfDataCombinedOffset = 0;
864                 m_perfDataCombinedSize = 0;
865             }
866         }
867 
868         return status;
869     }
870     else if (m_perfDataIndexMap[pOsContext] > 0)
871     {
872         MOS_LOCK_PARAMS     LockFlagsNoOverWrite;
873         MOS_ZeroMemory(&LockFlagsNoOverWrite, sizeof(MOS_LOCK_PARAMS));
874 
875         LockFlagsNoOverWrite.WriteOnly = 1;
876         LockFlagsNoOverWrite.NoOverWrite = 1;
877 
878         uint8_t* pData = (uint8_t*)osInterface->pfnLockResource(
879             osInterface,
880             m_perfStoreBufferMap[pOsContext],
881             &LockFlagsNoOverWrite);
882 
883         CHK_NULL_RETURN(pData);
884 
885         if (m_multiprocess)
886         {
887             int32_t pid = MosUtilities::MosGetPid();
888             tm      localtime = { 0 };
889             MosUtilities::MosGetLocalTime(&localtime);
890             char outputFileName[MOS_MAX_PATH_LENGTH + 1];
891 
892             MOS_SecureStringPrint(outputFileName, MOS_MAX_PATH_LENGTH + 1, MOS_MAX_PATH_LENGTH + 1, "%s-pid%d-context%p-%04d%02d%02d%02d%02d%02d.bin",
893                 m_outputFileName.c_str(), pid, pOsContext, localtime.tm_year + 1900, localtime.tm_mon + 1, localtime.tm_mday, localtime.tm_hour, localtime.tm_min, localtime.tm_sec);
894 
895             MosUtilities::MosWriteFileFromPtr(outputFileName, pData, BASE_OF_NODE(m_perfDataIndexMap[pOsContext]));
896         }
897         else if (m_mergeheader)
898         {
899             NodeHeader *header = reinterpret_cast<NodeHeader *>(pData);
900             char outputFileName[MOS_MAX_PATH_LENGTH + 1];
901             MOS_SecureStringPrint(outputFileName, MOS_MAX_PATH_LENGTH + 1, MOS_MAX_PATH_LENGTH + 1, "%s-header%u.bin", m_outputFileName.c_str(), *reinterpret_cast<uint32_t*>(header));
902             HANDLE hFile = nullptr;
903             if (MosUtilities::MosCreateFile(&hFile, outputFileName, 0) != MOS_STATUS_SUCCESS)
904             {
905                 MosUtilities::MosWriteFileFromPtr(outputFileName, pData, BASE_OF_NODE(m_perfDataIndexMap[pOsContext]));
906             }
907             else
908             {
909                 MosUtilities::MosCloseHandle(hFile);
910                 MosUtilities::MosAppendFileFromPtr(outputFileName, pData + sizeof(NodeHeader), BASE_OF_NODE(m_perfDataIndexMap[pOsContext]) - sizeof(NodeHeader));
911             }
912         }
913         else
914         {
915             MosUtilities::MosWriteFileFromPtr(m_outputFileName.c_str(), pData, BASE_OF_NODE(m_perfDataIndexMap[pOsContext]));
916         }
917 
918         osInterface->pfnUnlockResource(
919             osInterface,
920             m_perfStoreBufferMap[pOsContext]);
921     }
922 
923     return status;
924 }
925 
GpuContextToGpuNode(MOS_GPU_CONTEXT context)926 PerfGPUNode MediaPerfProfiler::GpuContextToGpuNode(MOS_GPU_CONTEXT context)
927 {
928     PerfGPUNode node = PERF_GPU_NODE_UNKNOW;
929 
930     switch (context)
931     {
932         case MOS_GPU_CONTEXT_RENDER:
933         case MOS_GPU_CONTEXT_RENDER2:
934         case MOS_GPU_CONTEXT_RENDER3:
935         case MOS_GPU_CONTEXT_RENDER4:
936         case MOS_GPU_OVERLAY_CONTEXT:
937         case MOS_GPU_CONTEXT_RENDER_RA:
938             node = PERF_GPU_NODE_3D;
939             break;
940         case MOS_GPU_CONTEXT_COMPUTE:
941         case MOS_GPU_CONTEXT_CM_COMPUTE:
942         case MOS_GPU_CONTEXT_COMPUTE_RA:
943             node = PERF_GPU_NODE_3D;
944             break;
945         case MOS_GPU_CONTEXT_VIDEO:
946         case MOS_GPU_CONTEXT_VIDEO2:
947         case MOS_GPU_CONTEXT_VIDEO3:
948         case MOS_GPU_CONTEXT_VIDEO4:
949         case MOS_GPU_CONTEXT_VIDEO5:
950         case MOS_GPU_CONTEXT_VIDEO6:
951         case MOS_GPU_CONTEXT_VIDEO7:
952             node = PERF_GPU_NODE_VIDEO;
953             break;
954         case MOS_GPU_CONTEXT_VDBOX2_VIDEO:
955         case MOS_GPU_CONTEXT_VDBOX2_VIDEO2:
956         case MOS_GPU_CONTEXT_VDBOX2_VIDEO3:
957             node = PERF_GPU_NODE_VIDEO2;
958             break;
959         case MOS_GPU_CONTEXT_VEBOX:
960         case MOS_GPU_CONTEXT_VEBOX2:
961             node = PERF_GPU_NODE_VE;
962             break;
963         case MOS_GPU_CONTEXT_BLT:
964             node = PERF_GPU_NODE_BLT;
965             break;
966         case MOS_GPU_CONTEXT_TEE:
967             node = PERF_GPU_NODE_TEE;
968             break;
969         default:
970             node = PERF_GPU_NODE_UNKNOW;
971             break;
972     }
973 
974     return node;
975 }
976 
PlatFormIdMap(PLATFORM platform)977 uint32_t MediaPerfProfiler::PlatFormIdMap(PLATFORM platform)
978 {
979     uint32_t perfPlatFormId = 0;
980 
981     if (GFX_GET_CURRENT_RENDERCORE(platform) > IGFX_GEN12LP_CORE)
982     {
983         perfPlatFormId = ((((uint32_t)(GFX_GET_CURRENT_RENDERCORE(platform)) >> 8) - 0xc) << 2) + (GFX_GET_CURRENT_RENDERCORE(platform) & 0x3) + (uint32_t)(IGFX_GEN12LP_CORE);
984     }
985     else
986     {
987         perfPlatFormId = (uint32_t)(GFX_GET_CURRENT_RENDERCORE(platform));
988     }
989 
990     return perfPlatFormId;
991 }
992 
IsPerfModeWidthMemInfo(uint32_t * regs)993 bool MediaPerfProfiler::IsPerfModeWidthMemInfo(uint32_t *regs)
994 {
995     int8_t index = 0;
996     bool   ret   = false;
997 
998     for (index = 0; index < 8; index++)
999     {
1000         if (regs[index] != 0)
1001         {
1002             ret = true;
1003             break;
1004         }
1005     }
1006 
1007     return ret;
1008 }
1009