1 /*
2 * Copyright (c) 2020-2022, Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22 #ifndef __VP_RENDER_KERNEL_OBJ_H__
23 #define __VP_RENDER_KERNEL_OBJ_H__
24 
25 #include "vp_pipeline_common.h"
26 #include "sw_filter.h"
27 #include "media_render_cmd_packet.h"
28 #include "vp_platform_interface.h"
29 #include <vector>
30 #include <map>
31 #include <set>
32 
33 class RenderCmdPacket;
34 
35 namespace vp {
36 
37 //!
38 //! \brief Secure Block Copy kernel inline data size
39 //!
40 #define SECURE_BLOCK_COPY_KERNEL_INLINE_SIZE    (1 * sizeof(uint32_t))
41 //!
42 //! \brief Secure Block Copy kernel width
43 //!
44 #define SECURE_BLOCK_COPY_KERNEL_SURF_WIDTH     64
45 
46 //!
47 //! \brief Secure Block Copy kernel block height
48 //!
49 #define SECURE_BLOCK_COPY_KERNEL_BLOCK_HEIGHT   24
50 
51 #define KERNEL_BINARY_PADDING_SIZE CM_KERNEL_BINARY_PADDING_SIZE
52 
53 typedef struct _KERNEL_SURFACE_STATE_PARAM
54 {
55     struct {
56         bool                           updatedSurfaceParams; // true if update format/width/height/pitch to renderSurface.OsSurface.
57         MOS_FORMAT                     format;               // MOS_FORMAT for processing surfaces
58         uint32_t                       width;
59         uint32_t                       height;
60         uint32_t                       pitch;
61 
62         uint32_t                       surface_offset;     // Offset to the origin of the surface, in bytes.
63         MOS_TILE_TYPE                  tileType;
64         bool                           bufferResource;
65         bool                           bindedKernel;        // true if bind index is hardcoded by bindIndex.
66         bool                           updatedRenderSurfaces; // true if renderSurfaceParams be used.
67         RENDERHAL_SURFACE_STATE_PARAMS renderSurfaceParams;  // default can be skip. for future usages, if surface configed by kernel, use it directlly
68     } surfaceOverwriteParams;
69 
70     bool                                isOutput;        // true for render target
71     PRENDERHAL_SURFACE_STATE_ENTRY      *surfaceEntries;
72     uint32_t                            *sizeOfSurfaceEntries;
73     uint32_t                            iCapcityOfSurfaceEntry = 0;
74     bool                                isBindlessSurface = false;
75 } KERNEL_SURFACE_STATE_PARAM;
76 
77 typedef struct _KERNEL_TUNING_PARAMS
78 {
79     uint32_t euThreadSchedulingMode;
80 } KERNEL_TUNING_PARAMS, *PKERNEL_TUNING_PARAMS;
81 
82 using KERNEL_CONFIGS = std::map<VpKernelID, void *>; // Only for legacy/non-cm kernels
83 using KERNEL_ARGS = std::vector<KRN_ARG>;
84 using KERNEL_SAMPLER_STATE_GROUP = std::map<SamplerIndex, MHW_SAMPLER_STATE_PARAM>;
85 using KERNEL_SAMPLER_STATES = std::vector<MHW_SAMPLER_STATE_PARAM>;
86 using KERNEL_SAMPLER_INDEX = std::vector<SamplerIndex>;
87 using KERNEL_SURFACE_CONFIG = std::map<SurfaceType, KERNEL_SURFACE_STATE_PARAM>;
88 using KERNEL_SURFACE_BINDING_INDEX = std::map<SurfaceType, std::set<uint32_t>>;
89 using KERNEL_STATELESS_BUFF_CONFIG = std::map<SurfaceType, uint64_t>;
90 using KERNEL_BINDELESS_SURFACE = std::map<SurfaceType, std::set<uint32_t>>;
91 using KERNEL_BINDELESS_SAMPLER = std::map<uint32_t, uint32_t>;
92 
93 typedef struct _KERNEL_PARAMS
94 {
95     VpKernelID                   kernelId;
96     KERNEL_ARGS                  kernelArgs;
97     KERNEL_THREAD_SPACE          kernelThreadSpace;
98     bool                         syncFlag;
99     bool                         flushL1;
100     KERNEL_TUNING_PARAMS         kernelTuningParams;
101     KERNEL_ARG_INDEX_SURFACE_MAP kernelStatefulSurfaces;
102 } KERNEL_PARAMS;
103 
104 struct MEDIA_OBJECT_KA2_INLINE_DATA
105 {
106     // DWORD 0 - GRF R7.0
107     union
108     {
109         // All
110         struct
111         {
112             uint32_t       DestinationBlockHorizontalOrigin : 16;
113             uint32_t       DestinationBlockVerticalOrigin : 16;
114         };
115 
116         // Secure Block Copy
117         struct
118         {
119             uint32_t       BlockHeight : 16;
120             uint32_t       BufferOffset : 16;
121         };
122 
123         // FMD Summation
124         struct
125         {
126             uint32_t       StartRowOffset;
127         };
128 
129         uint32_t       Value;
130     } DW00;
131 
132     // DWORD 1 - GRF R7.1
133     union
134     {
135         // Composite
136         struct
137         {
138             uint32_t       HorizontalBlockCompositeMaskLayer0 : 16;
139             uint32_t       VerticalBlockCompositeMaskLayer0 : 16;
140         };
141 
142         // FMD Summation
143         struct
144         {
145             uint32_t       TotalRows;
146         };
147 
148         uint32_t       Value;
149     } DW01;
150 
151     // DWORD 2 - GRF R7.2
152     union
153     {
154         // Composite
155         struct
156         {
157             uint32_t       HorizontalBlockCompositeMaskLayer1 : 16;
158             uint32_t       VerticalBlockCompositeMaskLayer1 : 16;
159         };
160 
161         // FMD Summation
162         struct
163         {
164             uint32_t       StartColumnOffset;
165         };
166 
167         uint32_t       Value;
168     } DW02;
169 
170     // DWORD 3 - GRF R7.3
171     union
172     {
173         // Composite
174         struct
175         {
176             uint32_t       HorizontalBlockCompositeMaskLayer2 : 16;
177             uint32_t       VerticalBlockCompositeMaskLayer2 : 16;
178         };
179 
180         // FMD Summation
181         struct
182         {
183             uint32_t       TotalColumns;
184         };
185 
186         uint32_t       Value;
187     } DW03;
188 
189     // DWORD 4 - GRF R7.4
190     union
191     {
192         // Sampler Load
193         struct
194         {
195             float       VideoXScalingStep;
196         };
197 
198         uint32_t       Value;
199     } DW04;
200 
201     // DWORD 5 - GRF R7.5
202     union
203     {
204         // NLAS
205         struct
206         {
207             float       VideoStepDelta;
208         };
209 
210         uint32_t       Value;
211     } DW05;
212 
213     // DWORD 6 - GRF R7.6
214     union
215     {
216         // AVScaling
217         struct
218         {
219             uint32_t       VerticalBlockNumber : 17;
220             uint32_t       AreaOfInterest : 1;
221             uint32_t : 14;
222         };
223 
224         uint32_t       Value;
225     } DW06;
226 
227     // DWORD 7 - GRF R7.7
228     union
229     {
230         // AVScaling
231         struct
232         {
233             uint32_t       GroupIDNumber;
234         };
235 
236         uint32_t       Value;
237     } DW07;
238 
239     // DWORD 8 - GRF R8.0
240     union
241     {
242         // Composite
243         struct
244         {
245             uint32_t       HorizontalBlockCompositeMaskLayer3 : 16;
246             uint32_t       VerticalBlockCompositeMaskLayer3 : 16;
247         };
248 
249         uint32_t       Value;
250     } DW08;
251 
252     // DWORD 9 - GRF R8.1
253     union
254     {
255         // Composite
256         struct
257         {
258             uint32_t       HorizontalBlockCompositeMaskLayer4 : 16;
259             uint32_t       VerticalBlockCompositeMaskLayer4 : 16;
260         };
261 
262         uint32_t       Value;
263     } DW09;
264 
265     // DWORD 10 - GRF R8.2
266     union
267     {
268         // Composite
269         struct
270         {
271             uint32_t       HorizontalBlockCompositeMaskLayer5 : 16;
272             uint32_t       VerticalBlockCompositeMaskLayer5 : 16;
273         };
274 
275         uint32_t       Value;
276     } DW10;
277 
278     // DWORD 11 - GRF R8.3
279     union
280     {
281         // Composite
282         struct
283         {
284             uint32_t       HorizontalBlockCompositeMaskLayer6 : 16;
285             uint32_t       VerticalBlockCompositeMaskLayer6 : 16;
286         };
287 
288         uint32_t       Value;
289     } DW11;
290 
291     // DWORD 12 - GRF R8.4
292     union
293     {
294         // Composite
295         struct
296         {
297             uint32_t       HorizontalBlockCompositeMaskLayer7 : 16;
298             uint32_t       VerticalBlockCompositeMaskLayer7 : 16;
299         };
300 
301         uint32_t       Value;
302     } DW12;
303 
304     // DWORD 13 - GRF R8.5
305     union
306     {
307         struct
308         {
309             uint32_t       Reserved;
310         };
311 
312         uint32_t       Value;
313     } DW13;
314 
315     // DWORD 14 - GRF R8.6
316     union
317     {
318         struct
319         {
320             uint32_t       Reserved;
321         };
322 
323         uint32_t       Value;
324     } DW14;
325 
326     // DWORD 15 - GRF R8.7
327     union
328     {
329         struct
330         {
331             uint32_t       Reserved;
332         };
333 
334         uint32_t       Value;
335     } DW15;
336 };
337 
338 class VpRenderKernelObj
339 {
340 public:
341     VpRenderKernelObj(PVP_MHWINTERFACE hwInterface, PVpAllocator allocator);
342     VpRenderKernelObj(PVP_MHWINTERFACE hwInterface, VpKernelID kernelID, uint32_t kernelIndex, std::string kernelName = "", PVpAllocator allocator = nullptr);
343     virtual ~VpRenderKernelObj();
344 
345     // For Adv kernel
346     // Kernel Specific, which will inplenment be each kernel
347     // GetCurbeState should be called after UpdateCurbeBindingIndex for all processed surfaces being called
348     virtual MOS_STATUS Init(VpRenderKernel& kernel);
349 
GetCurbeState(void * & curbe,uint32_t & curbeLength,uint32_t & curbeLengthAligned,RENDERHAL_KERNEL_PARAM kernelParam,uint32_t dwBlockAlign)350     MOS_STATUS GetCurbeState(void *&curbe, uint32_t &curbeLength, uint32_t &curbeLengthAligned, RENDERHAL_KERNEL_PARAM kernelParam, uint32_t dwBlockAlign)
351     {
352         VP_PUBLIC_CHK_STATUS_RETURN(GetCurbeState(curbe, curbeLength));
353         VP_PUBLIC_CHK_STATUS_RETURN(GetAlignedLength(curbeLength, curbeLengthAligned, kernelParam, dwBlockAlign));
354         return MOS_STATUS_SUCCESS;
355     }
356 
357     virtual uint32_t GetInlineDataSize() = 0;
358 
359     virtual uint32_t GetKernelIndex();
360 
GetKernelId()361     VpKernelID GetKernelId()
362     {
363         return m_kernelId;
364     }
365 
GetKernelType()366     DelayLoadedKernelType GetKernelType()
367     {
368         return m_kernelType;
369     }
370 
IsKernelCached()371     virtual bool IsKernelCached()
372     {
373         return false;
374     }
375 
GetCachedEntryForKernelLoad()376     virtual Kdll_CacheEntry *GetCachedEntryForKernelLoad()
377     {
378         return nullptr;
379     }
380 
381     virtual MOS_STATUS GetWalkerSetting(KERNEL_WALKER_PARAMS& walkerParam, KERNEL_PACKET_RENDER_DATA &renderData);
382 
383     virtual MOS_STATUS SetKernelConfigs(
384         KERNEL_PARAMS& kernelParams,
385         VP_SURFACE_GROUP& surfaces,
386         KERNEL_SAMPLER_STATE_GROUP& samplerStateGroup,
387         KERNEL_CONFIGS& kernelConfigs,
388         VP_PACKET_SHARED_CONTEXT* sharedContext);
389 
GetScoreboardParams(PMHW_VFE_SCOREBOARD & scoreboardParams)390     virtual MOS_STATUS GetScoreboardParams(PMHW_VFE_SCOREBOARD &scoreboardParams)
391     {
392         scoreboardParams = nullptr;
393         return MOS_STATUS_SUCCESS;
394     }
395 
DumpSurfaces()396     virtual void DumpSurfaces()
397     {
398         return;
399     }
400 
401     virtual void DumpSurface(VP_SURFACE *pSurface,PCCHAR fileName);
402 
403     // Kernel Common configs
GetKernelSettings(RENDERHAL_KERNEL_PARAM & settsings)404     virtual MOS_STATUS GetKernelSettings(RENDERHAL_KERNEL_PARAM &settsings)
405     {
406         if (IsAdvKernel())
407         {
408             // For adv kernel, no need for kernel param.
409             return MOS_STATUS_SUCCESS;
410         }
411         if (m_hwInterface && m_hwInterface->m_vpPlatformInterface)
412         {
413             VP_PUBLIC_CHK_STATUS_RETURN(m_hwInterface->m_vpPlatformInterface->GetKernelParam(m_kernelId, settsings));
414             return MOS_STATUS_SUCCESS;
415         }
416         else
417         {
418             return MOS_STATUS_INVALID_HANDLE;
419         }
420         return MOS_STATUS_SUCCESS;
421     }
422 
423     virtual MOS_STATUS GetKernelEntry(Kdll_CacheEntry &entry);
424 
FreeCurbe(void * & curbe)425     virtual MOS_STATUS FreeCurbe(void*& curbe)
426     {
427         VP_FUNC_CALL();
428 
429         MOS_SafeFreeMemory(curbe);
430         return MOS_STATUS_SUCCESS;
431     }
432 
433     virtual uint32_t GetKernelBinaryID();
434 
GetKernelBinary()435     void* GetKernelBinary()
436     {
437         return m_kernelBinary;
438     }
439 
GetKernelSurfaceConfig()440     KERNEL_SURFACE_CONFIG& GetKernelSurfaceConfig()
441     {
442         return m_surfaceState;
443     }
444 
GetKernelName()445     std::string& GetKernelName()
446     {
447         return m_kernelName;
448     }
449 
UpdateCurbeBindingIndex(SurfaceType surface,uint32_t index)450     MOS_STATUS UpdateCurbeBindingIndex(SurfaceType surface, uint32_t index)
451     {
452         // Surface Type is specified during one submission
453         auto it = m_surfaceBindingIndex.find(surface);
454         if (it != m_surfaceBindingIndex.end())
455         {
456             it->second.insert(index);
457         }
458         else
459         {
460             std::set<uint32_t> bindingMap;
461             bindingMap.insert(index);
462             m_surfaceBindingIndex.insert(std::make_pair(surface, bindingMap));
463         }
464 
465         return MOS_STATUS_SUCCESS;
466     }
467 
GetSurfaceBindingIndex(SurfaceType surface)468     std::set<uint32_t>& GetSurfaceBindingIndex(SurfaceType surface)
469     {
470         auto it = m_surfaceBindingIndex.find(surface);
471 
472         if (it == m_surfaceBindingIndex.end())
473         {
474             VP_RENDER_ASSERTMESSAGE("No surface index created for current surface");
475             std::set<uint32_t> bindingMap;
476             it = m_surfaceBindingIndex.insert(std::make_pair(surface, bindingMap)).first;
477         }
478         return it->second;
479     }
480 
481     MOS_STATUS InitKernel(void* binary, uint32_t size, KERNEL_CONFIGS& kernelConfigs,
482                         VP_SURFACE_GROUP& surfacesGroup, VP_RENDER_CACHE_CNTL& surfMemCacheCtl);
483 
IsAdvKernel()484     bool IsAdvKernel()
485     {
486         return m_isAdvKernel;
487     }
488 
UseIndependentSamplerGroup()489     bool UseIndependentSamplerGroup()
490     {
491         return m_useIndependentSamplerGroup;
492     }
493 
494     virtual MOS_STATUS SetSamplerStates(KERNEL_SAMPLER_STATE_GROUP& samplerStateGroup);
495 
UpdateCompParams()496     virtual MOS_STATUS UpdateCompParams()
497     {
498         return MOS_STATUS_SUCCESS;
499     }
500 
SetCacheCntl(PVP_RENDER_CACHE_CNTL)501     virtual MOS_STATUS SetCacheCntl(PVP_RENDER_CACHE_CNTL)
502     {
503         return MOS_STATUS_SUCCESS;
504     }
505 
SetPerfTag()506     virtual MOS_STATUS SetPerfTag()
507     {
508         return MOS_STATUS_SUCCESS;
509     }
510 
InitRenderHalSurface(SurfaceType type,VP_SURFACE * surf,PRENDERHAL_SURFACE renderHalSurface)511     virtual MOS_STATUS InitRenderHalSurface(
512         SurfaceType             type,
513         VP_SURFACE              *surf,
514         PRENDERHAL_SURFACE      renderHalSurface)
515     {
516         return MOS_STATUS_UNIMPLEMENTED;
517     }
518 
519     virtual void OcaDumpKernelInfo(MOS_COMMAND_BUFFER &cmdBuffer, MOS_CONTEXT &mosContext);
520 
GetEuThreadSchedulingMode()521     virtual uint32_t GetEuThreadSchedulingMode()
522     {
523         // hw default mode
524         return 0;
525     }
526 
527     virtual MOS_STATUS InitRenderHalSurfaceCMF(MOS_SURFACE* src, PRENDERHAL_SURFACE renderHalSurface);
528 
529     virtual MOS_STATUS SetInlineDataParameter(KRN_ARG args, RENDERHAL_INTERFACE *renderhal);
530 
UpdateBindlessSurfaceResource(SurfaceType surf,std::set<uint32_t> surfStateOffset)531     virtual MOS_STATUS UpdateBindlessSurfaceResource(SurfaceType surf, std::set<uint32_t> surfStateOffset)
532     {
533         if (surf != SurfaceTypeInvalid)
534         {
535             m_bindlessSurfaceArray.insert(std::make_pair(surf, surfStateOffset));
536         }
537 
538         return MOS_STATUS_SUCCESS;
539     }
540 
GetBindlessSamplers()541     virtual std::map<uint32_t, uint32_t>& GetBindlessSamplers()
542     {
543         return m_bindlessSamperArray;
544     }
545 
InitBindlessResources()546     virtual MOS_STATUS InitBindlessResources()
547     {
548         m_bindlessSurfaceArray.clear();
549         m_bindlessSamperArray.clear();
550         return MOS_STATUS_SUCCESS;
551     }
552 
553 protected:
554 
555     virtual MOS_STATUS SetWalkerSetting(KERNEL_THREAD_SPACE &threadSpace, bool bSyncFlag, bool flushL1 = false);
556 
557     virtual MOS_STATUS SetKernelArgs(KERNEL_ARGS &kernelArgs, VP_PACKET_SHARED_CONTEXT *sharedContext);
558 
559     virtual MOS_STATUS SetKernelStatefulSurfaces(KERNEL_ARG_INDEX_SURFACE_MAP &statefulSurfaces);
560 
561     virtual MOS_STATUS SetupSurfaceState() = 0;
562 
563     virtual MOS_STATUS SetKernelConfigs(KERNEL_CONFIGS& kernelConfigs);
564 
565     virtual MOS_STATUS SetProcessSurfaceGroup(VP_SURFACE_GROUP &surfaces);
566 
567     virtual MOS_STATUS CpPrepareResources();
568 
569     virtual MOS_STATUS SetupStatelessBuffer();
570 
571     virtual MOS_STATUS SetupStatelessBufferResource(SurfaceType surf);
572 
573     virtual MOS_STATUS GetCurbeState(void *&curbe, uint32_t &curbeLength) = 0;
574 
GetAlignedLength(uint32_t & curbeLength,uint32_t & curbeLengthAligned,RENDERHAL_KERNEL_PARAM kernelParam,uint32_t dwBlockAlign)575     virtual MOS_STATUS GetAlignedLength(uint32_t &curbeLength, uint32_t &curbeLengthAligned, RENDERHAL_KERNEL_PARAM kernelParam, uint32_t dwBlockAlign)
576     {
577         curbeLengthAligned = MOS_ALIGN_CEIL(curbeLength, dwBlockAlign);
578         return MOS_STATUS_SUCCESS;
579     }
580 
581     virtual MOS_STATUS SetTuningFlag(PKERNEL_TUNING_PARAMS tuningParams);
582 
583 protected:
584 
585     VP_SURFACE_GROUP                                        *m_surfaceGroup = nullptr;  // input surface process surface groups
586     PVP_MHWINTERFACE                                        m_hwInterface = nullptr;
587     KERNEL_SURFACE_CONFIG                                   m_surfaceState;             // surfaces processed pool where the surface state will generated here, if KERNEL_SURFACE_STATE_PARAM
588     KERNEL_SURFACE_BINDING_INDEX                            m_surfaceBindingIndex;      // store the binding index for processed surface
589     PVpAllocator                                            m_allocator = nullptr;
590     MediaUserSettingSharedPtr                               m_userSettingPtr = nullptr;  // usersettingInstance
591     KERNEL_STATELESS_BUFF_CONFIG                            m_statelessArray;
592     KERNEL_BINDELESS_SURFACE                                m_bindlessSurfaceArray;
593     KERNEL_BINDELESS_SAMPLER                                m_bindlessSamperArray;
594     // kernel attribute
595     std::string                                             m_kernelName = "";
596     void *                                                  m_kernelBinary = nullptr;
597     uint32_t                                                m_kernelBinaryID = 0;
598     uint32_t                                                m_kernelSize = 0;
599     VpKernelID                                              m_kernelId = kernelCombinedFc;
600     DelayLoadedKernelType                                   m_kernelType     = KernelNone;
601     KernelIndex                                             m_kernelIndex = 0;          // index of current kernel in KERNEL_PARAMS_LIST
602 
603     PKERNEL_TUNING_PARAMS                                   m_kernelTuningParams = nullptr;
604 
605     bool                                                    m_isAdvKernel = false;      // true mean multi kernel can be submitted in one workload.
606     bool                                                    m_useIndependentSamplerGroup = false; //true means multi kernels has their own stand alone sampler states group. only can be true when m_isAdvKernel is true.
607 
608     std::shared_ptr<mhw::vebox::Itf>                        m_veboxItf = nullptr;
609     std ::vector<MHW_INLINE_DATA_PARAMS>                    m_inlineDataParams = {};
610 
611 MEDIA_CLASS_DEFINE_END(vp__VpRenderKernelObj)
612 };
613 }
614 #endif // __VP_RENDER_KERNEL_OBJ_H__
615