1 /*
2 * Copyright (c) 2022, Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22 //!
23 //! \file       media_render_copy.cpp
24 //! \brief      render copy implement file
25 //! \details    render copy implement file
26 //!
27 
28 #include "media_render_copy.h"
29 #include "hal_kerneldll.h"
30 #include "media_copy.h"
31 #include "media_interfaces_mhw.h"
32 #include "mhw_cp_interface.h"
33 #include "mhw_state_heap.h"
34 #include "mos_defs_specific.h"
35 #include "mos_os_hw.h"
36 #include "mos_utilities.h"
37 #include "vphal_render_common.h"
38 
RenderCopyState(PMOS_INTERFACE osInterface,MhwInterfaces * mhwInterfaces)39 RenderCopyState::RenderCopyState(PMOS_INTERFACE osInterface, MhwInterfaces *mhwInterfaces) :
40     m_osInterface(osInterface),
41     m_mhwInterfaces(mhwInterfaces)
42 {
43     if (nullptr == osInterface)
44     {
45         MCPY_ASSERTMESSAGE("osInterface is nullptr");
46         return;
47     }
48     m_renderInterface = mhwInterfaces->m_renderInterface;
49     m_RenderData.pKernelParam = (PRENDERHAL_KERNEL_PARAM)g_rendercopy_KernelParam;
50     Mos_SetVirtualEngineSupported(osInterface, true);
51     osInterface->pfnVirtualEngineSupported(osInterface, true, false);
52 
53     MOS_NULL_RENDERING_FLAGS        NullRenderingFlags;
54     NullRenderingFlags = osInterface->pfnGetNullHWRenderFlags(osInterface);
55 
56     m_bNullHwRenderCopy =
57      NullRenderingFlags.VPComp ||
58      NullRenderingFlags.VPGobal;
59 }
60 
~RenderCopyState()61 RenderCopyState:: ~RenderCopyState()
62 {
63     if (m_renderHal != nullptr)
64     {
65        MOS_STATUS eStatus = m_renderHal->pfnDestroy(m_renderHal);
66        if (eStatus != MOS_STATUS_SUCCESS)
67        {
68            MCPY_ASSERTMESSAGE("Failed to destroy RenderHal, eStatus:%d.\n", eStatus);
69        }
70        MOS_FreeMemAndSetNull(m_renderHal);
71     }
72 
73     if (m_cpInterface != nullptr)
74     {
75         if (m_osInterface)
76         {
77             m_osInterface->pfnDeleteMhwCpInterface(m_cpInterface);
78             m_cpInterface = nullptr;
79         }
80         else
81         {
82             MCPY_ASSERTMESSAGE("Failed to destroy vpInterface.");
83         }
84     }
85 
86     // Destroy Kernel DLL objects (cache, hash table, states)
87     if (m_pKernelDllState)
88     {
89        KernelDll_ReleaseStates(m_pKernelDllState);
90        m_pKernelBin = nullptr;
91     }
92 }
93 
Initialize()94 MOS_STATUS RenderCopyState::Initialize()
95 {
96     RENDERHAL_SETTINGS_LEGACY RenderHalSettings;
97 
98     MCPY_CHK_NULL_RETURN(m_osInterface);
99 
100     m_renderHal = (PRENDERHAL_INTERFACE_LEGACY)MOS_AllocAndZeroMemory(sizeof(RENDERHAL_INTERFACE_LEGACY));
101     MCPY_CHK_NULL_RETURN(m_renderHal);
102     MCPY_CHK_STATUS_RETURN(RenderHal_InitInterface_Legacy(
103         m_renderHal,
104         &m_cpInterface,
105         m_osInterface));
106 
107     // Allocate and initialize HW states
108     RenderHalSettings.iMediaStates = 32;
109     MCPY_CHK_STATUS_RETURN(m_renderHal->pfnInitialize(m_renderHal, &RenderHalSettings));
110 
111     m_renderHal->sseuTable              = VpDefaultSSEUTable;
112     m_renderHal->forceDisablePreemption = true;
113 
114     return MOS_STATUS_SUCCESS;
115 }
116 
GetBytesPerPixelPerPlane(MOS_FORMAT Format)117 int32_t RenderCopyState::GetBytesPerPixelPerPlane(
118     MOS_FORMAT        Format)
119 {
120     int32_t     iBytePerPixelPerPlane = 0;
121 
122     switch(Format)
123     {
124     case Format_NV12:
125     case Format_RGBP:
126         iBytePerPixelPerPlane = 1;
127         break;
128     case Format_YUY2:
129     case Format_P010:
130     case Format_P016:
131         iBytePerPixelPerPlane = 2;
132         break;
133     case Format_Y210:
134     case Format_Y216:
135     case Format_Y410:
136     case Format_AYUV:
137     case Format_A8R8G8B8:
138         iBytePerPixelPerPlane = 4;
139         break;
140     case Format_Y416:
141         iBytePerPixelPerPlane = 8;
142         break;
143     default:
144         MCPY_ASSERTMESSAGE("can't support formats %d for render copy", Format);
145         break;
146     }
147 
148     return iBytePerPixelPerPlane;
149 }
150 
SubmitCMD()151 MOS_STATUS RenderCopyState::SubmitCMD( )
152 {
153     return MOS_STATUS_SUCCESS;
154 }
155 
GetCurentKernelID()156 MOS_STATUS RenderCopyState::GetCurentKernelID( )
157 {
158     int32_t iBytePerPixelPerPlane = GetBytesPerPixelPerPlane(m_Source.Format);
159 
160     if ((iBytePerPixelPerPlane < 1) || (iBytePerPixelPerPlane > 8))
161     {
162         MCPY_ASSERTMESSAGE("GetCurentKernelID wrong pixel size.");
163         return MOS_STATUS_INVALID_PARAMETER;
164     }
165 
166     // This scheme is temporary
167     // for a !MOS_TILE_LINEAR plane surface, it is 2D surface.
168     // for a MOS_TILE_LINEAR plane surface, if (dwWidth * bytes_per_pixel < dwPitch) it is 2D surface
169     // if (dwWidth * bytes_per_pixel == dwPitch) it is a 1D surface.
170 
171     if ((m_Source.Format == Format_NV12) || (m_Source.Format == Format_P010) || (m_Source.Format == Format_P016))
172     {
173         if (m_Source.TileType == MOS_TILE_LINEAR && m_Target.TileType != MOS_TILE_LINEAR)
174         {
175             m_currKernelId = KERNEL_CopyKernel_1D_to_2D_NV12;
176         }
177         else if (m_Source.TileType != MOS_TILE_LINEAR && m_Target.TileType == MOS_TILE_LINEAR)
178         {
179             m_currKernelId = KERNEL_CopyKernel_2D_to_1D_NV12;
180         }
181         else if (m_Source.TileType != MOS_TILE_LINEAR && m_Target.TileType != MOS_TILE_LINEAR)
182         {
183             m_currKernelId = KERNEL_CopyKernel_2D_to_2D_NV12;
184         }
185         else
186         {
187              m_currKernelId = KERNEL_CopyKernel_MAX;
188              MCPY_ASSERTMESSAGE("Can't find right kernel to support, pls help to check input parameters.");
189              return MOS_STATUS_INVALID_PARAMETER;
190         }
191     }
192     else if (m_Source.Format == Format_RGBP)
193     {
194         if (m_Source.TileType == MOS_TILE_LINEAR && m_Target.TileType != MOS_TILE_LINEAR)
195         {
196             m_currKernelId = KERNEL_CopyKernel_1D_to_2D_Planar;
197         }
198         else if (m_Source.TileType != MOS_TILE_LINEAR && m_Target.TileType == MOS_TILE_LINEAR)
199         {
200             m_currKernelId = KERNEL_CopyKernel_2D_to_1D_Planar;
201         }
202         else if (m_Source.TileType != MOS_TILE_LINEAR && m_Target.TileType != MOS_TILE_LINEAR)
203         {
204             m_currKernelId = KERNEL_CopyKernel_2D_to_2D_Planar;
205         }
206         else
207         {
208             m_currKernelId = KERNEL_CopyKernel_MAX;
209             MCPY_ASSERTMESSAGE("kernel can't support it.");
210             return MOS_STATUS_INVALID_PARAMETER;
211         }
212     }
213     else if ((m_Source.Format == Format_YUY2) || (m_Source.Format == Format_Y210) || (m_Source.Format == Format_Y216)
214               || (m_Source.Format == Format_AYUV) || (m_Source.Format == Format_Y410) || (m_Source.Format == Format_Y416)
215               || (m_Source.Format == Format_A8R8G8B8))
216     {
217         if (m_Source.TileType == MOS_TILE_LINEAR && m_Target.TileType != MOS_TILE_LINEAR)
218         {
219             m_currKernelId = KERNEL_CopyKernel_1D_to_2D_Packed;
220         }
221         else if (m_Source.TileType != MOS_TILE_LINEAR && m_Target.TileType == MOS_TILE_LINEAR)
222         {
223             m_currKernelId = KERNEL_CopyKernel_2D_to_1D_Packed;
224         }
225         else if (m_Source.TileType != MOS_TILE_LINEAR && m_Target.TileType != MOS_TILE_LINEAR)
226         {
227             m_currKernelId = KERNEL_CopyKernel_2D_to_2D_Packed;
228         }
229         else
230         {
231              m_currKernelId = KERNEL_CopyKernel_MAX;
232              MCPY_ASSERTMESSAGE("kernel can't support it.");
233              return MOS_STATUS_INVALID_PARAMETER;
234         }
235 
236     }
237     MCPY_NORMALMESSAGE("Used Render copy and currentKernel id = %d.", m_currKernelId);
238     return MOS_STATUS_SUCCESS;
239 }
240 
241 //!
242 //! \brief    setup surface states
243 //! \details  Setup surface states for fast 1toN
244 //! \return   MOS_STATUS
245 //!           Return MOS_STATUS_SUCCESS if successful, otherwise failed
246 //!
SetupSurfaceStates()247 MOS_STATUS RenderCopyState::SetupSurfaceStates()
248 {
249     RENDERHAL_SURFACE_STATE_PARAMS  SurfaceParams;
250     MOS_STATUS                      eStatus = MOS_STATUS_SUCCESS;
251     uint32_t                        index;
252     uint32_t                        width  = 0;
253     MOS_FORMAT                      format = Format_NV12;
254     int32_t                         iBTEntry;
255     PRENDERHAL_INTERFACE            pRenderHal  = m_renderHal;
256     PMEDIACOPY_RENDER_DATA          pRenderData = &m_RenderData;
257     RENDERHAL_SURFACE               RenderHalSource = {};  // source for mhw
258     RENDERHAL_SURFACE               RenderHalTarget = {};  // target for mhw
259     MCPY_CHK_NULL_RETURN(pRenderHal);
260     MCPY_CHK_NULL_RETURN(pRenderData);
261     // Source surface
262     MOS_ZeroMemory(&SurfaceParams, sizeof(SurfaceParams));
263 
264     pRenderData->SurfMemObjCtl.SourceSurfMemObjCtl =
265          pRenderHal->pOsInterface->pfnCachePolicyGetMemoryObject(
266          MOS_MP_RESOURCE_USAGE_SurfaceState_RCS,
267          pRenderHal->pOsInterface->pfnGetGmmClientContext(pRenderHal->pOsInterface)).DwordValue;
268 
269     pRenderData->SurfMemObjCtl.TargetSurfMemObjCtl = pRenderData->SurfMemObjCtl.SourceSurfMemObjCtl;
270 
271     SurfaceParams.bAVS              = false;
272     SurfaceParams.Boundary          = RENDERHAL_SS_BOUNDARY_SRCRECT;
273     SurfaceParams.isOutput     = false;
274     SurfaceParams.MemObjCtl         = pRenderData->SurfMemObjCtl.SourceSurfMemObjCtl;
275 
276     SurfaceParams.Type              = RENDERHAL_SURFACE_TYPE_G10;
277     SurfaceParams.bWidthInDword_Y   = false;
278     SurfaceParams.bWidthInDword_UV  = false;
279     SurfaceParams.bWidth16Align     = false;
280 
281     if (Format_NV12 == m_Target.Format)
282     {
283         m_Target.SurfType = SURF_OUT_RENDERTARGET;
284         m_Source.SurfType = SURF_OUT_RENDERTARGET;
285     }
286 
287     if (m_currKernelId == KERNEL_CopyKernel_1D_to_2D_NV12
288         || m_currKernelId == KERNEL_CopyKernel_1D_to_2D_Planar
289         || m_currKernelId == KERNEL_CopyKernel_1D_to_2D_Packed)
290     {
291         format = m_Source.Format;
292         width = m_Source.dwWidth;
293         m_Source.Format = Format_RAW;
294 
295         if ((format == Format_NV12) || (format == Format_P010) || (format == Format_P016))
296         {
297            m_Source.dwWidth = (m_Source.dwHeight * m_Source.dwPitch) * 3 / 2;
298         }
299         else if (format == Format_RGBP)
300         {
301            m_Source.dwWidth = (m_Source.dwHeight * m_Source.dwPitch) * 3;
302         }
303         else if ((format == Format_YUY2) || (format == Format_Y210) || (format == Format_Y216)
304                  || (format == Format_AYUV) || (format == Format_Y410) || (format == Format_Y416)
305                  || (format == Format_A8R8G8B8))
306         {
307            m_Source.dwWidth = m_Source.dwHeight * m_Source.dwPitch;
308         }
309 
310         m_Source.dwWidth = MOS_ALIGN_CEIL(m_Source.dwWidth, 128);
311         //1D surfaces
312         VPHAL_RENDER_CHK_STATUS(VpHal_CommonSetBufferSurfaceForHwAccess(
313              pRenderHal,
314              &m_Source,
315              &RenderHalSource,
316              &SurfaceParams,
317              pRenderData->iBindingTable,
318              RENDERCOPY_SRC_INDEX,
319              false));
320         m_Source.Format = format;
321         m_Source.dwWidth = width;
322     }
323     else {
324         //2D surfaces
325         VPHAL_RENDER_CHK_STATUS(VpHal_CommonSetSurfaceForHwAccess(
326             pRenderHal,
327             &m_Source,
328             &RenderHalSource,
329             &SurfaceParams,
330             pRenderData->iBindingTable,
331             RENDERCOPY_SRC_INDEX,
332             false));
333     }
334 
335     // Target surface
336     SurfaceParams.MemObjCtl         = pRenderData->SurfMemObjCtl.TargetSurfMemObjCtl;
337     SurfaceParams.Type              = pRenderHal->SurfaceTypeDefault;
338     SurfaceParams.isOutput     = true;
339     SurfaceParams.bAVS              = false;
340     SurfaceParams.Boundary          = RENDERHAL_SS_BOUNDARY_DSTRECT;
341 
342     if (m_currKernelId == KERNEL_CopyKernel_2D_to_1D_NV12
343         || m_currKernelId == KERNEL_CopyKernel_2D_to_1D_Planar
344         || m_currKernelId == KERNEL_CopyKernel_2D_to_1D_Packed)
345     {
346         format = m_Target.Format;
347         width = m_Target.dwWidth;
348         m_Target.Format = Format_RAW;
349 
350           if ((format == Format_NV12) || (format == Format_P010) || (format == Format_P016))
351           {
352              m_Target.dwWidth = (m_Target.dwHeight * m_Target.dwPitch) * 3 / 2;
353           }
354           else if (format == Format_RGBP)
355           {
356              m_Target.dwWidth = (m_Target.dwHeight * m_Target.dwPitch) * 3;
357           }
358           else if ((format == Format_YUY2) || (format == Format_Y210) || (format == Format_Y216)
359                    || (format == Format_AYUV) || (format == Format_Y410) || (format == Format_Y416)
360                    || (format == Format_A8R8G8B8))
361           {
362              m_Target.dwWidth = m_Target.dwHeight * m_Target.dwPitch;
363           }
364 
365         m_Target.dwWidth = MOS_ALIGN_CEIL(m_Target.dwWidth, 128);
366 
367         //1D surface.
368         VPHAL_RENDER_CHK_STATUS(VpHal_CommonSetBufferSurfaceForHwAccess(
369             pRenderHal,
370             &m_Target,
371             &RenderHalTarget,
372             &SurfaceParams,
373             pRenderData->iBindingTable,
374             RENDERCOPY_DST_INDEX,
375             true));
376         m_Target.Format = format;
377         m_Target.dwWidth = width;
378     }
379     else
380     {
381         //2D surface.
382         VPHAL_RENDER_CHK_STATUS(VpHal_CommonSetSurfaceForHwAccess(
383             pRenderHal,
384             &m_Target,
385             &RenderHalTarget,
386             &SurfaceParams,
387             pRenderData->iBindingTable,
388             RENDERCOPY_DST_INDEX,
389             true));
390 
391     }
392 finish:
393     return eStatus;
394 }
395 
396 
LoadStaticData(int32_t * piCurbeOffset)397 MOS_STATUS RenderCopyState::LoadStaticData(
398     int32_t*                        piCurbeOffset)
399 {
400     DP_RENDERCOPY_NV12_STATIC_DATA          WalkerNV12Static;
401     DP_RENDERCOPY_RGBP_STATIC_DATA          WalkerPlanarStatic;
402     DP_RENDERCOPY_PACKED_STATIC_DATA        WalkerSinglePlaneStatic;
403 
404     MOS_STATUS                              eStatus = MOS_STATUS_SUCCESS;
405     int32_t                                 iCurbeLength = 0;
406     int32_t                                 iBytePerPixelPerPlane = GetBytesPerPixelPerPlane(m_Target.Format);
407     PRENDERHAL_INTERFACE                    pRenderHal  = m_renderHal;
408     PMEDIACOPY_RENDER_DATA                  pRenderData = &m_RenderData;
409 
410     MCPY_CHK_NULL_RETURN(pRenderHal);
411     MCPY_CHK_NULL_RETURN(pRenderData);
412     if ((iBytePerPixelPerPlane < 1) || (iBytePerPixelPerPlane > 8))
413     {
414         MCPY_ASSERTMESSAGE("LoadStaticData wrong pixel size.");
415         return MOS_STATUS_INVALID_PARAMETER;
416     }
417 
418     int32_t srcResourceOffset = (int32_t)(m_Source.OsResource.pGmmResInfo->GetPlanarXOffset(GMM_NO_PLANE));
419     int32_t dstResourceOffset = (int32_t)(m_Target.OsResource.pGmmResInfo->GetPlanarXOffset(GMM_NO_PLANE));
420 
421     if (srcResourceOffset)
422     {
423         m_Source.dwOffset -= srcResourceOffset;
424     }
425 
426     if (dstResourceOffset)
427     {
428         m_Target.dwOffset -= dstResourceOffset;
429     }
430 
431     if ((m_Target.Format == Format_NV12) || ((m_Target.Format == Format_P010) || (m_Target.Format == Format_P016)))
432     {
433         // Set relevant static data
434         MOS_ZeroMemory(&WalkerNV12Static, sizeof(DP_RENDERCOPY_NV12_STATIC_DATA));
435 
436         WalkerNV12Static.DW0.Inputsurfaceindex = RENDERCOPY_SRC_INDEX;
437         WalkerNV12Static.DW1.Outputsurfaceindex = RENDERCOPY_DST_INDEX;
438 
439         if (m_currKernelId == KERNEL_CopyKernel_1D_to_2D_NV12)
440         {
441             WalkerNV12Static.DW2.Widthdword = (m_Source.dwWidth * iBytePerPixelPerPlane + 3) / 4;
442             WalkerNV12Static.DW3.Height = m_Source.dwHeight;
443             WalkerNV12Static.DW4.ShiftLeftOffsetInBytes = m_Source.dwOffset;
444             WalkerNV12Static.DW5.Widthstride = m_Source.dwPitch;
445             WalkerNV12Static.DW6.Heightstride = m_Source.dwHeight;
446         }
447         else
448         {
449             WalkerNV12Static.DW2.Widthdword = (m_Source.dwWidth < m_Target.dwWidth) ? m_Source.dwWidth : m_Target.dwWidth;
450             WalkerNV12Static.DW2.Widthdword = (WalkerNV12Static.DW2.Widthdword * iBytePerPixelPerPlane + 3) / 4;
451             WalkerNV12Static.DW3.Height = (m_Source.dwHeight < m_Target.dwHeight) ? m_Source.dwHeight:m_Target.dwHeight;
452             WalkerNV12Static.DW4.ShiftLeftOffsetInBytes = m_Target.dwOffset;
453             WalkerNV12Static.DW5.Widthstride = m_Target.dwPitch;
454             WalkerNV12Static.DW6.Heightstride = m_Target.dwHeight;
455         }
456 
457         iCurbeLength = sizeof(DP_RENDERCOPY_NV12_STATIC_DATA);
458         MCPY_NORMALMESSAGE("Load Curbe data: DW0.Inputsurfaceindex = %d, DW1.Outputsurfaceindex = %d, DW2.WidthDWord= %d, DW3.Height= %d,"
459             "DW4.ShiftLeftOffsetInBytes= %d,DW5.Widthstride = %d, DW6.Heightstride = % d.",
460             WalkerNV12Static.DW0.Inputsurfaceindex,
461             WalkerNV12Static.DW1.Outputsurfaceindex,
462             WalkerNV12Static.DW2.Widthdword,
463             WalkerNV12Static.DW3.Height,
464             WalkerNV12Static.DW4.ShiftLeftOffsetInBytes,
465             WalkerNV12Static.DW5.Widthstride,
466             WalkerNV12Static.DW6.Heightstride);
467         *piCurbeOffset = pRenderHal->pfnLoadCurbeData(
468          pRenderHal,
469          pRenderData->pMediaState,
470          &WalkerNV12Static,
471          iCurbeLength);
472     }
473     else if (m_Target.Format == Format_RGBP)
474     {
475         // Set relevant static data
476         MOS_ZeroMemory(&WalkerPlanarStatic, sizeof(DP_RENDERCOPY_RGBP_STATIC_DATA));
477 
478         if (m_currKernelId == KERNEL_CopyKernel_2D_to_1D_Planar)
479         {
480             WalkerPlanarStatic.DW0.InputsurfaceRindex = RENDERCOPY_SRC_INDEX + 2;
481             WalkerPlanarStatic.DW1.InputsurfaceGindex = RENDERCOPY_SRC_INDEX;
482             WalkerPlanarStatic.DW2.InputsurfaceBindex = RENDERCOPY_SRC_INDEX + 1;
483         }
484         else
485         {
486             WalkerPlanarStatic.DW0.InputsurfaceRindex = RENDERCOPY_SRC_INDEX;
487             WalkerPlanarStatic.DW1.InputsurfaceGindex = RENDERCOPY_SRC_INDEX + 1;
488             WalkerPlanarStatic.DW2.InputsurfaceBindex = RENDERCOPY_SRC_INDEX + 2;
489         }
490 
491         if (m_currKernelId == KERNEL_CopyKernel_1D_to_2D_Planar)
492         {
493             WalkerPlanarStatic.DW3.OutputsurfaceRindex = RENDERCOPY_DST_INDEX + 2;
494             WalkerPlanarStatic.DW4.OutputsurfaceGindex = RENDERCOPY_DST_INDEX;
495             WalkerPlanarStatic.DW5.OutputsurfaceBindex = RENDERCOPY_DST_INDEX + 1;
496         }
497         else
498         {
499             WalkerPlanarStatic.DW3.OutputsurfaceRindex = RENDERCOPY_DST_INDEX;
500             WalkerPlanarStatic.DW4.OutputsurfaceGindex = RENDERCOPY_DST_INDEX + 1;
501             WalkerPlanarStatic.DW5.OutputsurfaceBindex = RENDERCOPY_DST_INDEX + 2;
502         }
503 
504         if (m_currKernelId == KERNEL_CopyKernel_1D_to_2D_Planar)
505         {
506             WalkerPlanarStatic.DW6.Widthdword = m_Source.dwPitch / 4;
507             WalkerPlanarStatic.DW7.Height = m_Source.dwHeight;
508             WalkerPlanarStatic.DW8.ShiftLeftOffsetInBytes = m_Source.dwOffset;
509         }
510         else
511         {
512             WalkerPlanarStatic.DW6.Widthdword = m_Target.dwPitch / 4;
513             WalkerPlanarStatic.DW7.Height = m_Target.dwHeight;
514             WalkerPlanarStatic.DW8.ShiftLeftOffsetInBytes = m_Target.dwOffset;
515         }
516 
517         WalkerPlanarStatic.DW9.WidthdwordNoPadding = (m_Source.dwWidth < m_Target.dwWidth) ? m_Source.dwWidth : m_Target.dwWidth;
518         WalkerPlanarStatic.DW9.WidthdwordNoPadding = (WalkerPlanarStatic.DW9.WidthdwordNoPadding * iBytePerPixelPerPlane + 3) / 4;
519         WalkerPlanarStatic.DW10.Dst2DStartX = 0;
520         WalkerPlanarStatic.DW11.Dst2DStartY = 0;
521 
522         iCurbeLength = sizeof(DP_RENDERCOPY_RGBP_STATIC_DATA);
523         MCPY_NORMALMESSAGE("Load Curbe data: DW0.InputsurfaceRindex = %d, DW1.InputsurfaceGindex = %d, DW2.InputsurfaceBindex= %d, DW3.Height= %d,"
524             "DW4.OutputsurfaceGindex = %d, DW5.OutputsurfaceBindex = %d, DW6.Widthdword = %d, DW7.Height = %d, DW8.ShiftLeftOffsetInByte= %d,"
525             "DW9.WidthdwordNoPadding = %d, WalkerPlanarStatic.DW10.Dst2DStartX = %d, WalkerPlanarStatic.DW11.Dst2DStartY = %d.",
526             WalkerPlanarStatic.DW0.InputsurfaceRindex,
527             WalkerPlanarStatic.DW1.InputsurfaceGindex,
528             WalkerPlanarStatic.DW2.InputsurfaceBindex,
529             WalkerPlanarStatic.DW3.OutputsurfaceRindex,
530             WalkerPlanarStatic.DW4.OutputsurfaceGindex,
531             WalkerPlanarStatic.DW5.OutputsurfaceBindex,
532             WalkerPlanarStatic.DW6.Widthdword,
533             WalkerPlanarStatic.DW7.Height,
534             WalkerPlanarStatic.DW8.ShiftLeftOffsetInBytes,
535             WalkerPlanarStatic.DW9.WidthdwordNoPadding,
536             WalkerPlanarStatic.DW10.Dst2DStartX,
537             WalkerPlanarStatic.DW11.Dst2DStartY);
538 
539         *piCurbeOffset = pRenderHal->pfnLoadCurbeData(
540                                          pRenderHal,
541                                          pRenderData->pMediaState,
542                                          &WalkerPlanarStatic,
543                                          iCurbeLength);
544     }
545     else if ((m_Target.Format == Format_YUY2) || (m_Target.Format == Format_Y210) || (m_Target.Format == Format_Y216)
546               || (m_Target.Format == Format_AYUV) || (m_Target.Format == Format_Y410) || (m_Target.Format == Format_Y416)
547               || (m_Target.Format == Format_A8R8G8B8))
548     {
549         // Set relevant static data
550         MOS_ZeroMemory(&WalkerSinglePlaneStatic, sizeof(WalkerSinglePlaneStatic));
551 
552         WalkerSinglePlaneStatic.DW0.InputSurfaceIndex = RENDERCOPY_SRC_INDEX;
553         WalkerSinglePlaneStatic.DW1.OutputSurfaceIndex = RENDERCOPY_DST_INDEX;
554         if (m_currKernelId == KERNEL_CopyKernel_1D_to_2D_Packed)
555         {
556             WalkerSinglePlaneStatic.DW2.WidthDWord = m_Source.dwPitch / 4;
557             WalkerSinglePlaneStatic.DW3.Height = m_Source.dwHeight;
558             WalkerSinglePlaneStatic.DW4.ShiftLeftOffsetInBytes = m_Source.dwOffset;
559         }
560         else
561         {
562             WalkerSinglePlaneStatic.DW2.WidthDWord = m_Target.dwPitch / 4;
563             WalkerSinglePlaneStatic.DW3.Height = m_Target.dwHeight;
564             WalkerSinglePlaneStatic.DW4.ShiftLeftOffsetInBytes = m_Target.dwOffset;
565         }
566 
567         if ((m_currKernelId == KERNEL_CopyKernel_1D_to_2D_Packed) || (m_currKernelId == KERNEL_CopyKernel_2D_to_1D_Packed))
568         {
569             WalkerSinglePlaneStatic.DW5.ThreadHeight = (m_Source.dwHeight < m_Target.dwHeight) ? m_Source.dwHeight : m_Target.dwHeight;
570             WalkerSinglePlaneStatic.DW5.ThreadHeight = (WalkerSinglePlaneStatic.DW5.ThreadHeight + 32 - 1) / 32;
571         }
572         else if (m_currKernelId == KERNEL_CopyKernel_2D_to_2D_Packed)
573         {
574             WalkerSinglePlaneStatic.DW5.ThreadHeight = (m_Source.dwHeight + 8 - 1) / 8;
575         }
576         else
577         {
578             MCPY_ASSERTMESSAGE("LoadStaticData wrong kernel file.");
579             return MOS_STATUS_INVALID_PARAMETER;
580         }
581 
582         WalkerSinglePlaneStatic.DW6.WidthdwordNoPadding = (m_Source.dwWidth < m_Target.dwWidth) ? m_Source.dwWidth : m_Target.dwWidth;
583         WalkerSinglePlaneStatic.DW6.WidthdwordNoPadding = (WalkerSinglePlaneStatic.DW6.WidthdwordNoPadding * iBytePerPixelPerPlane + 3) / 4;
584         WalkerSinglePlaneStatic.DW7.Dst2DStartX = 0;
585         WalkerSinglePlaneStatic.DW8.Dst2DStartY = 0;
586 
587         iCurbeLength = sizeof(DP_RENDERCOPY_PACKED_STATIC_DATA);
588         MCPY_NORMALMESSAGE("Load Curbe data: DW0.InputSurfaceIndex = %d, DW1.OutputSurfaceIndex = %d, DW2.WidthDWord= %d, DW3.Height= %d,"
589             "DW4.ShiftLeftOffsetInBytes= %d,DW5.ThreadHeight = %d, DW6.WidthdwordNoPadding = %d, DW7.Dst2DStartX = %d, DW8.Dst2DStartY = %d.",
590             WalkerSinglePlaneStatic.DW0.InputSurfaceIndex,
591             WalkerSinglePlaneStatic.DW1.OutputSurfaceIndex,
592             WalkerSinglePlaneStatic.DW2.WidthDWord,
593             WalkerSinglePlaneStatic.DW3.Height,
594             WalkerSinglePlaneStatic.DW4.ShiftLeftOffsetInBytes,
595             WalkerSinglePlaneStatic.DW5.ThreadHeight,
596             WalkerSinglePlaneStatic.DW6.WidthdwordNoPadding,
597             WalkerSinglePlaneStatic.DW7.Dst2DStartX,
598             WalkerSinglePlaneStatic.DW8.Dst2DStartY);
599 
600         *piCurbeOffset = pRenderHal->pfnLoadCurbeData(
601          pRenderHal,
602          pRenderData->pMediaState,
603          &WalkerSinglePlaneStatic,
604          iCurbeLength);
605     }
606     else
607     {
608          MCPY_ASSERTMESSAGE("can't support Target format %d", m_Target.Format);
609     }
610 
611     if (*piCurbeOffset < 0)
612     {
613         return MOS_STATUS_UNKNOWN;
614     }
615 
616     pRenderData->iCurbeLength = iCurbeLength;
617 
618     return eStatus;
619 }
620 
621  //!
622  //! \brief    Render copy omputer walker setup
623  //! \details  Computer walker setup for render copy
624  //! \param    PMHW_WALKER_PARAMS pWalkerParams
625  //!           [in/out] Pointer to Walker params
626  //! \return   MOS_STATUS
627  //!           Return MOS_STATUS_SUCCESS if successful, otherwise failed
628  //!
RenderCopyComputerWalker(PMHW_GPGPU_WALKER_PARAMS pWalkerParams)629  MOS_STATUS RenderCopyState::RenderCopyComputerWalker(
630  PMHW_GPGPU_WALKER_PARAMS    pWalkerParams)
631 {
632     MOS_STATUS                              eStatus = MOS_STATUS_SUCCESS;
633     PMEDIACOPY_RENDER_DATA                  pRenderData = &m_RenderData;
634     RECT                                    AlignedRect;
635     int32_t                                 iBytePerPixelPerPlane = GetBytesPerPixelPerPlane(m_Target.Format);
636     uint32_t                                WalkerWidthBlockSize = 128;
637     uint32_t                                WalkerHeightBlockSize = 8;
638 
639     MCPY_CHK_NULL_RETURN(pRenderData);
640 
641     if ((iBytePerPixelPerPlane < 1) || (iBytePerPixelPerPlane > 8))
642     {
643         MCPY_ASSERTMESSAGE("RenderCopyComputerWalker wrong pixel size.");
644         return MOS_STATUS_INVALID_PARAMETER;
645     }
646 
647     if ((m_Target.Format == Format_YUY2) || (m_Target.Format == Format_Y210) || (m_Target.Format == Format_Y216)
648         || (m_Target.Format == Format_AYUV) || (m_Target.Format == Format_Y410) || (m_Target.Format == Format_Y416)
649         || (m_Target.Format == Format_A8R8G8B8))
650     {
651         if ((m_currKernelId == KERNEL_CopyKernel_1D_to_2D_Packed) || (m_currKernelId == KERNEL_CopyKernel_2D_to_1D_Packed))
652         {
653             WalkerHeightBlockSize = 32;
654         }
655         else if (m_currKernelId == KERNEL_CopyKernel_2D_to_2D_Packed)
656         {
657             WalkerHeightBlockSize = 8;
658         }
659         else
660         {
661             MCPY_ASSERTMESSAGE("RenderCopyComputerWalker wrong kernel file.");
662             return MOS_STATUS_INVALID_PARAMETER;
663         }
664     }
665     else
666     {
667         WalkerHeightBlockSize = 8;
668     }
669 
670     // Set walker cmd params - Rasterscan
671     MOS_ZeroMemory(pWalkerParams, sizeof(*pWalkerParams));
672 
673 
674     AlignedRect.left   = 0;
675     AlignedRect.top    = 0;
676     AlignedRect.right  = (m_Source.dwPitch < m_Target.dwPitch) ? m_Source.dwPitch : m_Target.dwPitch;
677     AlignedRect.bottom = (m_Source.dwHeight < m_Target.dwHeight) ? m_Source.dwHeight : m_Target.dwHeight;
678     // Calculate aligned output area in order to determine the total # blocks
679    // to process in case of non-16x16 aligned target.
680     AlignedRect.right += WalkerWidthBlockSize - 1;
681     AlignedRect.bottom += WalkerHeightBlockSize - 1;
682     AlignedRect.left -= AlignedRect.left % WalkerWidthBlockSize;
683     AlignedRect.top -= AlignedRect.top % WalkerHeightBlockSize;
684     AlignedRect.right -= AlignedRect.right % WalkerWidthBlockSize;
685     AlignedRect.bottom -= AlignedRect.bottom % WalkerHeightBlockSize;
686 
687     pWalkerParams->InterfaceDescriptorOffset = pRenderData->iMediaID;
688 
689     pWalkerParams->GroupStartingX = (AlignedRect.left / WalkerWidthBlockSize);
690     pWalkerParams->GroupStartingY = (AlignedRect.top / WalkerHeightBlockSize);
691 
692     // Set number of blocks
693     pRenderData->iBlocksX =
694         ((AlignedRect.right - AlignedRect.left) + WalkerWidthBlockSize - 1) / WalkerWidthBlockSize;
695     pRenderData->iBlocksY =
696         ((AlignedRect.bottom - AlignedRect.top) + WalkerHeightBlockSize -1)/ WalkerHeightBlockSize;
697 
698     // Set number of blocks, block size is WalkerWidthBlockSize x WalkerHeightBlockSize.
699     pWalkerParams->GroupWidth = pRenderData->iBlocksX;
700     pWalkerParams->GroupHeight = pRenderData->iBlocksY; // hight/WalkerWidthBlockSize
701 
702     pWalkerParams->ThreadWidth = 1;
703     pWalkerParams->ThreadHeight = 1;
704     pWalkerParams->ThreadDepth = 1;
705     pWalkerParams->IndirectDataStartAddress = pRenderData->iCurbeOffset;
706     // Indirect Data Length is a multiple of 64 bytes (size of L3 cacheline). Bits [5:0] are zero.
707     pWalkerParams->IndirectDataLength = MOS_ALIGN_CEIL(pRenderData->iCurbeLength, 1 << MHW_COMPUTE_INDIRECT_SHIFT);
708     pWalkerParams->BindingTableID = pRenderData->iBindingTable;
709     MCPY_NORMALMESSAGE("this = %p, WidthBlockSize %d, HeightBlockSize %d, Widththreads %d, Heightthreads%d",
710         this, WalkerWidthBlockSize, WalkerHeightBlockSize, pWalkerParams->GroupWidth, pWalkerParams->GroupHeight);
711 
712     return eStatus;
713 }
714