1 /*
2 * Copyright (c) 2022, Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22 //!
23 //! \file media_render_copy.cpp
24 //! \brief render copy implement file
25 //! \details render copy implement file
26 //!
27
28 #include "media_render_copy.h"
29 #include "hal_kerneldll.h"
30 #include "media_copy.h"
31 #include "media_interfaces_mhw.h"
32 #include "mhw_cp_interface.h"
33 #include "mhw_state_heap.h"
34 #include "mos_defs_specific.h"
35 #include "mos_os_hw.h"
36 #include "mos_utilities.h"
37 #include "vphal_render_common.h"
38
RenderCopyState(PMOS_INTERFACE osInterface,MhwInterfaces * mhwInterfaces)39 RenderCopyState::RenderCopyState(PMOS_INTERFACE osInterface, MhwInterfaces *mhwInterfaces) :
40 m_osInterface(osInterface),
41 m_mhwInterfaces(mhwInterfaces)
42 {
43 if (nullptr == osInterface)
44 {
45 MCPY_ASSERTMESSAGE("osInterface is nullptr");
46 return;
47 }
48 m_renderInterface = mhwInterfaces->m_renderInterface;
49 m_RenderData.pKernelParam = (PRENDERHAL_KERNEL_PARAM)g_rendercopy_KernelParam;
50 Mos_SetVirtualEngineSupported(osInterface, true);
51 osInterface->pfnVirtualEngineSupported(osInterface, true, false);
52
53 MOS_NULL_RENDERING_FLAGS NullRenderingFlags;
54 NullRenderingFlags = osInterface->pfnGetNullHWRenderFlags(osInterface);
55
56 m_bNullHwRenderCopy =
57 NullRenderingFlags.VPComp ||
58 NullRenderingFlags.VPGobal;
59 }
60
~RenderCopyState()61 RenderCopyState:: ~RenderCopyState()
62 {
63 if (m_renderHal != nullptr)
64 {
65 MOS_STATUS eStatus = m_renderHal->pfnDestroy(m_renderHal);
66 if (eStatus != MOS_STATUS_SUCCESS)
67 {
68 MCPY_ASSERTMESSAGE("Failed to destroy RenderHal, eStatus:%d.\n", eStatus);
69 }
70 MOS_FreeMemAndSetNull(m_renderHal);
71 }
72
73 if (m_cpInterface != nullptr)
74 {
75 if (m_osInterface)
76 {
77 m_osInterface->pfnDeleteMhwCpInterface(m_cpInterface);
78 m_cpInterface = nullptr;
79 }
80 else
81 {
82 MCPY_ASSERTMESSAGE("Failed to destroy vpInterface.");
83 }
84 }
85
86 // Destroy Kernel DLL objects (cache, hash table, states)
87 if (m_pKernelDllState)
88 {
89 KernelDll_ReleaseStates(m_pKernelDllState);
90 m_pKernelBin = nullptr;
91 }
92 }
93
Initialize()94 MOS_STATUS RenderCopyState::Initialize()
95 {
96 RENDERHAL_SETTINGS_LEGACY RenderHalSettings;
97
98 MCPY_CHK_NULL_RETURN(m_osInterface);
99
100 m_renderHal = (PRENDERHAL_INTERFACE_LEGACY)MOS_AllocAndZeroMemory(sizeof(RENDERHAL_INTERFACE_LEGACY));
101 MCPY_CHK_NULL_RETURN(m_renderHal);
102 MCPY_CHK_STATUS_RETURN(RenderHal_InitInterface_Legacy(
103 m_renderHal,
104 &m_cpInterface,
105 m_osInterface));
106
107 // Allocate and initialize HW states
108 RenderHalSettings.iMediaStates = 32;
109 MCPY_CHK_STATUS_RETURN(m_renderHal->pfnInitialize(m_renderHal, &RenderHalSettings));
110
111 m_renderHal->sseuTable = VpDefaultSSEUTable;
112 m_renderHal->forceDisablePreemption = true;
113
114 return MOS_STATUS_SUCCESS;
115 }
116
GetBytesPerPixelPerPlane(MOS_FORMAT Format)117 int32_t RenderCopyState::GetBytesPerPixelPerPlane(
118 MOS_FORMAT Format)
119 {
120 int32_t iBytePerPixelPerPlane = 0;
121
122 switch(Format)
123 {
124 case Format_NV12:
125 case Format_RGBP:
126 iBytePerPixelPerPlane = 1;
127 break;
128 case Format_YUY2:
129 case Format_P010:
130 case Format_P016:
131 iBytePerPixelPerPlane = 2;
132 break;
133 case Format_Y210:
134 case Format_Y216:
135 case Format_Y410:
136 case Format_AYUV:
137 case Format_A8R8G8B8:
138 iBytePerPixelPerPlane = 4;
139 break;
140 case Format_Y416:
141 iBytePerPixelPerPlane = 8;
142 break;
143 default:
144 MCPY_ASSERTMESSAGE("can't support formats %d for render copy", Format);
145 break;
146 }
147
148 return iBytePerPixelPerPlane;
149 }
150
SubmitCMD()151 MOS_STATUS RenderCopyState::SubmitCMD( )
152 {
153 return MOS_STATUS_SUCCESS;
154 }
155
GetCurentKernelID()156 MOS_STATUS RenderCopyState::GetCurentKernelID( )
157 {
158 int32_t iBytePerPixelPerPlane = GetBytesPerPixelPerPlane(m_Source.Format);
159
160 if ((iBytePerPixelPerPlane < 1) || (iBytePerPixelPerPlane > 8))
161 {
162 MCPY_ASSERTMESSAGE("GetCurentKernelID wrong pixel size.");
163 return MOS_STATUS_INVALID_PARAMETER;
164 }
165
166 // This scheme is temporary
167 // for a !MOS_TILE_LINEAR plane surface, it is 2D surface.
168 // for a MOS_TILE_LINEAR plane surface, if (dwWidth * bytes_per_pixel < dwPitch) it is 2D surface
169 // if (dwWidth * bytes_per_pixel == dwPitch) it is a 1D surface.
170
171 if ((m_Source.Format == Format_NV12) || (m_Source.Format == Format_P010) || (m_Source.Format == Format_P016))
172 {
173 if (m_Source.TileType == MOS_TILE_LINEAR && m_Target.TileType != MOS_TILE_LINEAR)
174 {
175 m_currKernelId = KERNEL_CopyKernel_1D_to_2D_NV12;
176 }
177 else if (m_Source.TileType != MOS_TILE_LINEAR && m_Target.TileType == MOS_TILE_LINEAR)
178 {
179 m_currKernelId = KERNEL_CopyKernel_2D_to_1D_NV12;
180 }
181 else if (m_Source.TileType != MOS_TILE_LINEAR && m_Target.TileType != MOS_TILE_LINEAR)
182 {
183 m_currKernelId = KERNEL_CopyKernel_2D_to_2D_NV12;
184 }
185 else
186 {
187 m_currKernelId = KERNEL_CopyKernel_MAX;
188 MCPY_ASSERTMESSAGE("Can't find right kernel to support, pls help to check input parameters.");
189 return MOS_STATUS_INVALID_PARAMETER;
190 }
191 }
192 else if (m_Source.Format == Format_RGBP)
193 {
194 if (m_Source.TileType == MOS_TILE_LINEAR && m_Target.TileType != MOS_TILE_LINEAR)
195 {
196 m_currKernelId = KERNEL_CopyKernel_1D_to_2D_Planar;
197 }
198 else if (m_Source.TileType != MOS_TILE_LINEAR && m_Target.TileType == MOS_TILE_LINEAR)
199 {
200 m_currKernelId = KERNEL_CopyKernel_2D_to_1D_Planar;
201 }
202 else if (m_Source.TileType != MOS_TILE_LINEAR && m_Target.TileType != MOS_TILE_LINEAR)
203 {
204 m_currKernelId = KERNEL_CopyKernel_2D_to_2D_Planar;
205 }
206 else
207 {
208 m_currKernelId = KERNEL_CopyKernel_MAX;
209 MCPY_ASSERTMESSAGE("kernel can't support it.");
210 return MOS_STATUS_INVALID_PARAMETER;
211 }
212 }
213 else if ((m_Source.Format == Format_YUY2) || (m_Source.Format == Format_Y210) || (m_Source.Format == Format_Y216)
214 || (m_Source.Format == Format_AYUV) || (m_Source.Format == Format_Y410) || (m_Source.Format == Format_Y416)
215 || (m_Source.Format == Format_A8R8G8B8))
216 {
217 if (m_Source.TileType == MOS_TILE_LINEAR && m_Target.TileType != MOS_TILE_LINEAR)
218 {
219 m_currKernelId = KERNEL_CopyKernel_1D_to_2D_Packed;
220 }
221 else if (m_Source.TileType != MOS_TILE_LINEAR && m_Target.TileType == MOS_TILE_LINEAR)
222 {
223 m_currKernelId = KERNEL_CopyKernel_2D_to_1D_Packed;
224 }
225 else if (m_Source.TileType != MOS_TILE_LINEAR && m_Target.TileType != MOS_TILE_LINEAR)
226 {
227 m_currKernelId = KERNEL_CopyKernel_2D_to_2D_Packed;
228 }
229 else
230 {
231 m_currKernelId = KERNEL_CopyKernel_MAX;
232 MCPY_ASSERTMESSAGE("kernel can't support it.");
233 return MOS_STATUS_INVALID_PARAMETER;
234 }
235
236 }
237 MCPY_NORMALMESSAGE("Used Render copy and currentKernel id = %d.", m_currKernelId);
238 return MOS_STATUS_SUCCESS;
239 }
240
241 //!
242 //! \brief setup surface states
243 //! \details Setup surface states for fast 1toN
244 //! \return MOS_STATUS
245 //! Return MOS_STATUS_SUCCESS if successful, otherwise failed
246 //!
SetupSurfaceStates()247 MOS_STATUS RenderCopyState::SetupSurfaceStates()
248 {
249 RENDERHAL_SURFACE_STATE_PARAMS SurfaceParams;
250 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
251 uint32_t index;
252 uint32_t width = 0;
253 MOS_FORMAT format = Format_NV12;
254 int32_t iBTEntry;
255 PRENDERHAL_INTERFACE pRenderHal = m_renderHal;
256 PMEDIACOPY_RENDER_DATA pRenderData = &m_RenderData;
257 RENDERHAL_SURFACE RenderHalSource = {}; // source for mhw
258 RENDERHAL_SURFACE RenderHalTarget = {}; // target for mhw
259 MCPY_CHK_NULL_RETURN(pRenderHal);
260 MCPY_CHK_NULL_RETURN(pRenderData);
261 // Source surface
262 MOS_ZeroMemory(&SurfaceParams, sizeof(SurfaceParams));
263
264 pRenderData->SurfMemObjCtl.SourceSurfMemObjCtl =
265 pRenderHal->pOsInterface->pfnCachePolicyGetMemoryObject(
266 MOS_MP_RESOURCE_USAGE_SurfaceState_RCS,
267 pRenderHal->pOsInterface->pfnGetGmmClientContext(pRenderHal->pOsInterface)).DwordValue;
268
269 pRenderData->SurfMemObjCtl.TargetSurfMemObjCtl = pRenderData->SurfMemObjCtl.SourceSurfMemObjCtl;
270
271 SurfaceParams.bAVS = false;
272 SurfaceParams.Boundary = RENDERHAL_SS_BOUNDARY_SRCRECT;
273 SurfaceParams.isOutput = false;
274 SurfaceParams.MemObjCtl = pRenderData->SurfMemObjCtl.SourceSurfMemObjCtl;
275
276 SurfaceParams.Type = RENDERHAL_SURFACE_TYPE_G10;
277 SurfaceParams.bWidthInDword_Y = false;
278 SurfaceParams.bWidthInDword_UV = false;
279 SurfaceParams.bWidth16Align = false;
280
281 if (Format_NV12 == m_Target.Format)
282 {
283 m_Target.SurfType = SURF_OUT_RENDERTARGET;
284 m_Source.SurfType = SURF_OUT_RENDERTARGET;
285 }
286
287 if (m_currKernelId == KERNEL_CopyKernel_1D_to_2D_NV12
288 || m_currKernelId == KERNEL_CopyKernel_1D_to_2D_Planar
289 || m_currKernelId == KERNEL_CopyKernel_1D_to_2D_Packed)
290 {
291 format = m_Source.Format;
292 width = m_Source.dwWidth;
293 m_Source.Format = Format_RAW;
294
295 if ((format == Format_NV12) || (format == Format_P010) || (format == Format_P016))
296 {
297 m_Source.dwWidth = (m_Source.dwHeight * m_Source.dwPitch) * 3 / 2;
298 }
299 else if (format == Format_RGBP)
300 {
301 m_Source.dwWidth = (m_Source.dwHeight * m_Source.dwPitch) * 3;
302 }
303 else if ((format == Format_YUY2) || (format == Format_Y210) || (format == Format_Y216)
304 || (format == Format_AYUV) || (format == Format_Y410) || (format == Format_Y416)
305 || (format == Format_A8R8G8B8))
306 {
307 m_Source.dwWidth = m_Source.dwHeight * m_Source.dwPitch;
308 }
309
310 m_Source.dwWidth = MOS_ALIGN_CEIL(m_Source.dwWidth, 128);
311 //1D surfaces
312 VPHAL_RENDER_CHK_STATUS(VpHal_CommonSetBufferSurfaceForHwAccess(
313 pRenderHal,
314 &m_Source,
315 &RenderHalSource,
316 &SurfaceParams,
317 pRenderData->iBindingTable,
318 RENDERCOPY_SRC_INDEX,
319 false));
320 m_Source.Format = format;
321 m_Source.dwWidth = width;
322 }
323 else {
324 //2D surfaces
325 VPHAL_RENDER_CHK_STATUS(VpHal_CommonSetSurfaceForHwAccess(
326 pRenderHal,
327 &m_Source,
328 &RenderHalSource,
329 &SurfaceParams,
330 pRenderData->iBindingTable,
331 RENDERCOPY_SRC_INDEX,
332 false));
333 }
334
335 // Target surface
336 SurfaceParams.MemObjCtl = pRenderData->SurfMemObjCtl.TargetSurfMemObjCtl;
337 SurfaceParams.Type = pRenderHal->SurfaceTypeDefault;
338 SurfaceParams.isOutput = true;
339 SurfaceParams.bAVS = false;
340 SurfaceParams.Boundary = RENDERHAL_SS_BOUNDARY_DSTRECT;
341
342 if (m_currKernelId == KERNEL_CopyKernel_2D_to_1D_NV12
343 || m_currKernelId == KERNEL_CopyKernel_2D_to_1D_Planar
344 || m_currKernelId == KERNEL_CopyKernel_2D_to_1D_Packed)
345 {
346 format = m_Target.Format;
347 width = m_Target.dwWidth;
348 m_Target.Format = Format_RAW;
349
350 if ((format == Format_NV12) || (format == Format_P010) || (format == Format_P016))
351 {
352 m_Target.dwWidth = (m_Target.dwHeight * m_Target.dwPitch) * 3 / 2;
353 }
354 else if (format == Format_RGBP)
355 {
356 m_Target.dwWidth = (m_Target.dwHeight * m_Target.dwPitch) * 3;
357 }
358 else if ((format == Format_YUY2) || (format == Format_Y210) || (format == Format_Y216)
359 || (format == Format_AYUV) || (format == Format_Y410) || (format == Format_Y416)
360 || (format == Format_A8R8G8B8))
361 {
362 m_Target.dwWidth = m_Target.dwHeight * m_Target.dwPitch;
363 }
364
365 m_Target.dwWidth = MOS_ALIGN_CEIL(m_Target.dwWidth, 128);
366
367 //1D surface.
368 VPHAL_RENDER_CHK_STATUS(VpHal_CommonSetBufferSurfaceForHwAccess(
369 pRenderHal,
370 &m_Target,
371 &RenderHalTarget,
372 &SurfaceParams,
373 pRenderData->iBindingTable,
374 RENDERCOPY_DST_INDEX,
375 true));
376 m_Target.Format = format;
377 m_Target.dwWidth = width;
378 }
379 else
380 {
381 //2D surface.
382 VPHAL_RENDER_CHK_STATUS(VpHal_CommonSetSurfaceForHwAccess(
383 pRenderHal,
384 &m_Target,
385 &RenderHalTarget,
386 &SurfaceParams,
387 pRenderData->iBindingTable,
388 RENDERCOPY_DST_INDEX,
389 true));
390
391 }
392 finish:
393 return eStatus;
394 }
395
396
LoadStaticData(int32_t * piCurbeOffset)397 MOS_STATUS RenderCopyState::LoadStaticData(
398 int32_t* piCurbeOffset)
399 {
400 DP_RENDERCOPY_NV12_STATIC_DATA WalkerNV12Static;
401 DP_RENDERCOPY_RGBP_STATIC_DATA WalkerPlanarStatic;
402 DP_RENDERCOPY_PACKED_STATIC_DATA WalkerSinglePlaneStatic;
403
404 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
405 int32_t iCurbeLength = 0;
406 int32_t iBytePerPixelPerPlane = GetBytesPerPixelPerPlane(m_Target.Format);
407 PRENDERHAL_INTERFACE pRenderHal = m_renderHal;
408 PMEDIACOPY_RENDER_DATA pRenderData = &m_RenderData;
409
410 MCPY_CHK_NULL_RETURN(pRenderHal);
411 MCPY_CHK_NULL_RETURN(pRenderData);
412 if ((iBytePerPixelPerPlane < 1) || (iBytePerPixelPerPlane > 8))
413 {
414 MCPY_ASSERTMESSAGE("LoadStaticData wrong pixel size.");
415 return MOS_STATUS_INVALID_PARAMETER;
416 }
417
418 int32_t srcResourceOffset = (int32_t)(m_Source.OsResource.pGmmResInfo->GetPlanarXOffset(GMM_NO_PLANE));
419 int32_t dstResourceOffset = (int32_t)(m_Target.OsResource.pGmmResInfo->GetPlanarXOffset(GMM_NO_PLANE));
420
421 if (srcResourceOffset)
422 {
423 m_Source.dwOffset -= srcResourceOffset;
424 }
425
426 if (dstResourceOffset)
427 {
428 m_Target.dwOffset -= dstResourceOffset;
429 }
430
431 if ((m_Target.Format == Format_NV12) || ((m_Target.Format == Format_P010) || (m_Target.Format == Format_P016)))
432 {
433 // Set relevant static data
434 MOS_ZeroMemory(&WalkerNV12Static, sizeof(DP_RENDERCOPY_NV12_STATIC_DATA));
435
436 WalkerNV12Static.DW0.Inputsurfaceindex = RENDERCOPY_SRC_INDEX;
437 WalkerNV12Static.DW1.Outputsurfaceindex = RENDERCOPY_DST_INDEX;
438
439 if (m_currKernelId == KERNEL_CopyKernel_1D_to_2D_NV12)
440 {
441 WalkerNV12Static.DW2.Widthdword = (m_Source.dwWidth * iBytePerPixelPerPlane + 3) / 4;
442 WalkerNV12Static.DW3.Height = m_Source.dwHeight;
443 WalkerNV12Static.DW4.ShiftLeftOffsetInBytes = m_Source.dwOffset;
444 WalkerNV12Static.DW5.Widthstride = m_Source.dwPitch;
445 WalkerNV12Static.DW6.Heightstride = m_Source.dwHeight;
446 }
447 else
448 {
449 WalkerNV12Static.DW2.Widthdword = (m_Source.dwWidth < m_Target.dwWidth) ? m_Source.dwWidth : m_Target.dwWidth;
450 WalkerNV12Static.DW2.Widthdword = (WalkerNV12Static.DW2.Widthdword * iBytePerPixelPerPlane + 3) / 4;
451 WalkerNV12Static.DW3.Height = (m_Source.dwHeight < m_Target.dwHeight) ? m_Source.dwHeight:m_Target.dwHeight;
452 WalkerNV12Static.DW4.ShiftLeftOffsetInBytes = m_Target.dwOffset;
453 WalkerNV12Static.DW5.Widthstride = m_Target.dwPitch;
454 WalkerNV12Static.DW6.Heightstride = m_Target.dwHeight;
455 }
456
457 iCurbeLength = sizeof(DP_RENDERCOPY_NV12_STATIC_DATA);
458 MCPY_NORMALMESSAGE("Load Curbe data: DW0.Inputsurfaceindex = %d, DW1.Outputsurfaceindex = %d, DW2.WidthDWord= %d, DW3.Height= %d,"
459 "DW4.ShiftLeftOffsetInBytes= %d,DW5.Widthstride = %d, DW6.Heightstride = % d.",
460 WalkerNV12Static.DW0.Inputsurfaceindex,
461 WalkerNV12Static.DW1.Outputsurfaceindex,
462 WalkerNV12Static.DW2.Widthdword,
463 WalkerNV12Static.DW3.Height,
464 WalkerNV12Static.DW4.ShiftLeftOffsetInBytes,
465 WalkerNV12Static.DW5.Widthstride,
466 WalkerNV12Static.DW6.Heightstride);
467 *piCurbeOffset = pRenderHal->pfnLoadCurbeData(
468 pRenderHal,
469 pRenderData->pMediaState,
470 &WalkerNV12Static,
471 iCurbeLength);
472 }
473 else if (m_Target.Format == Format_RGBP)
474 {
475 // Set relevant static data
476 MOS_ZeroMemory(&WalkerPlanarStatic, sizeof(DP_RENDERCOPY_RGBP_STATIC_DATA));
477
478 if (m_currKernelId == KERNEL_CopyKernel_2D_to_1D_Planar)
479 {
480 WalkerPlanarStatic.DW0.InputsurfaceRindex = RENDERCOPY_SRC_INDEX + 2;
481 WalkerPlanarStatic.DW1.InputsurfaceGindex = RENDERCOPY_SRC_INDEX;
482 WalkerPlanarStatic.DW2.InputsurfaceBindex = RENDERCOPY_SRC_INDEX + 1;
483 }
484 else
485 {
486 WalkerPlanarStatic.DW0.InputsurfaceRindex = RENDERCOPY_SRC_INDEX;
487 WalkerPlanarStatic.DW1.InputsurfaceGindex = RENDERCOPY_SRC_INDEX + 1;
488 WalkerPlanarStatic.DW2.InputsurfaceBindex = RENDERCOPY_SRC_INDEX + 2;
489 }
490
491 if (m_currKernelId == KERNEL_CopyKernel_1D_to_2D_Planar)
492 {
493 WalkerPlanarStatic.DW3.OutputsurfaceRindex = RENDERCOPY_DST_INDEX + 2;
494 WalkerPlanarStatic.DW4.OutputsurfaceGindex = RENDERCOPY_DST_INDEX;
495 WalkerPlanarStatic.DW5.OutputsurfaceBindex = RENDERCOPY_DST_INDEX + 1;
496 }
497 else
498 {
499 WalkerPlanarStatic.DW3.OutputsurfaceRindex = RENDERCOPY_DST_INDEX;
500 WalkerPlanarStatic.DW4.OutputsurfaceGindex = RENDERCOPY_DST_INDEX + 1;
501 WalkerPlanarStatic.DW5.OutputsurfaceBindex = RENDERCOPY_DST_INDEX + 2;
502 }
503
504 if (m_currKernelId == KERNEL_CopyKernel_1D_to_2D_Planar)
505 {
506 WalkerPlanarStatic.DW6.Widthdword = m_Source.dwPitch / 4;
507 WalkerPlanarStatic.DW7.Height = m_Source.dwHeight;
508 WalkerPlanarStatic.DW8.ShiftLeftOffsetInBytes = m_Source.dwOffset;
509 }
510 else
511 {
512 WalkerPlanarStatic.DW6.Widthdword = m_Target.dwPitch / 4;
513 WalkerPlanarStatic.DW7.Height = m_Target.dwHeight;
514 WalkerPlanarStatic.DW8.ShiftLeftOffsetInBytes = m_Target.dwOffset;
515 }
516
517 WalkerPlanarStatic.DW9.WidthdwordNoPadding = (m_Source.dwWidth < m_Target.dwWidth) ? m_Source.dwWidth : m_Target.dwWidth;
518 WalkerPlanarStatic.DW9.WidthdwordNoPadding = (WalkerPlanarStatic.DW9.WidthdwordNoPadding * iBytePerPixelPerPlane + 3) / 4;
519 WalkerPlanarStatic.DW10.Dst2DStartX = 0;
520 WalkerPlanarStatic.DW11.Dst2DStartY = 0;
521
522 iCurbeLength = sizeof(DP_RENDERCOPY_RGBP_STATIC_DATA);
523 MCPY_NORMALMESSAGE("Load Curbe data: DW0.InputsurfaceRindex = %d, DW1.InputsurfaceGindex = %d, DW2.InputsurfaceBindex= %d, DW3.Height= %d,"
524 "DW4.OutputsurfaceGindex = %d, DW5.OutputsurfaceBindex = %d, DW6.Widthdword = %d, DW7.Height = %d, DW8.ShiftLeftOffsetInByte= %d,"
525 "DW9.WidthdwordNoPadding = %d, WalkerPlanarStatic.DW10.Dst2DStartX = %d, WalkerPlanarStatic.DW11.Dst2DStartY = %d.",
526 WalkerPlanarStatic.DW0.InputsurfaceRindex,
527 WalkerPlanarStatic.DW1.InputsurfaceGindex,
528 WalkerPlanarStatic.DW2.InputsurfaceBindex,
529 WalkerPlanarStatic.DW3.OutputsurfaceRindex,
530 WalkerPlanarStatic.DW4.OutputsurfaceGindex,
531 WalkerPlanarStatic.DW5.OutputsurfaceBindex,
532 WalkerPlanarStatic.DW6.Widthdword,
533 WalkerPlanarStatic.DW7.Height,
534 WalkerPlanarStatic.DW8.ShiftLeftOffsetInBytes,
535 WalkerPlanarStatic.DW9.WidthdwordNoPadding,
536 WalkerPlanarStatic.DW10.Dst2DStartX,
537 WalkerPlanarStatic.DW11.Dst2DStartY);
538
539 *piCurbeOffset = pRenderHal->pfnLoadCurbeData(
540 pRenderHal,
541 pRenderData->pMediaState,
542 &WalkerPlanarStatic,
543 iCurbeLength);
544 }
545 else if ((m_Target.Format == Format_YUY2) || (m_Target.Format == Format_Y210) || (m_Target.Format == Format_Y216)
546 || (m_Target.Format == Format_AYUV) || (m_Target.Format == Format_Y410) || (m_Target.Format == Format_Y416)
547 || (m_Target.Format == Format_A8R8G8B8))
548 {
549 // Set relevant static data
550 MOS_ZeroMemory(&WalkerSinglePlaneStatic, sizeof(WalkerSinglePlaneStatic));
551
552 WalkerSinglePlaneStatic.DW0.InputSurfaceIndex = RENDERCOPY_SRC_INDEX;
553 WalkerSinglePlaneStatic.DW1.OutputSurfaceIndex = RENDERCOPY_DST_INDEX;
554 if (m_currKernelId == KERNEL_CopyKernel_1D_to_2D_Packed)
555 {
556 WalkerSinglePlaneStatic.DW2.WidthDWord = m_Source.dwPitch / 4;
557 WalkerSinglePlaneStatic.DW3.Height = m_Source.dwHeight;
558 WalkerSinglePlaneStatic.DW4.ShiftLeftOffsetInBytes = m_Source.dwOffset;
559 }
560 else
561 {
562 WalkerSinglePlaneStatic.DW2.WidthDWord = m_Target.dwPitch / 4;
563 WalkerSinglePlaneStatic.DW3.Height = m_Target.dwHeight;
564 WalkerSinglePlaneStatic.DW4.ShiftLeftOffsetInBytes = m_Target.dwOffset;
565 }
566
567 if ((m_currKernelId == KERNEL_CopyKernel_1D_to_2D_Packed) || (m_currKernelId == KERNEL_CopyKernel_2D_to_1D_Packed))
568 {
569 WalkerSinglePlaneStatic.DW5.ThreadHeight = (m_Source.dwHeight < m_Target.dwHeight) ? m_Source.dwHeight : m_Target.dwHeight;
570 WalkerSinglePlaneStatic.DW5.ThreadHeight = (WalkerSinglePlaneStatic.DW5.ThreadHeight + 32 - 1) / 32;
571 }
572 else if (m_currKernelId == KERNEL_CopyKernel_2D_to_2D_Packed)
573 {
574 WalkerSinglePlaneStatic.DW5.ThreadHeight = (m_Source.dwHeight + 8 - 1) / 8;
575 }
576 else
577 {
578 MCPY_ASSERTMESSAGE("LoadStaticData wrong kernel file.");
579 return MOS_STATUS_INVALID_PARAMETER;
580 }
581
582 WalkerSinglePlaneStatic.DW6.WidthdwordNoPadding = (m_Source.dwWidth < m_Target.dwWidth) ? m_Source.dwWidth : m_Target.dwWidth;
583 WalkerSinglePlaneStatic.DW6.WidthdwordNoPadding = (WalkerSinglePlaneStatic.DW6.WidthdwordNoPadding * iBytePerPixelPerPlane + 3) / 4;
584 WalkerSinglePlaneStatic.DW7.Dst2DStartX = 0;
585 WalkerSinglePlaneStatic.DW8.Dst2DStartY = 0;
586
587 iCurbeLength = sizeof(DP_RENDERCOPY_PACKED_STATIC_DATA);
588 MCPY_NORMALMESSAGE("Load Curbe data: DW0.InputSurfaceIndex = %d, DW1.OutputSurfaceIndex = %d, DW2.WidthDWord= %d, DW3.Height= %d,"
589 "DW4.ShiftLeftOffsetInBytes= %d,DW5.ThreadHeight = %d, DW6.WidthdwordNoPadding = %d, DW7.Dst2DStartX = %d, DW8.Dst2DStartY = %d.",
590 WalkerSinglePlaneStatic.DW0.InputSurfaceIndex,
591 WalkerSinglePlaneStatic.DW1.OutputSurfaceIndex,
592 WalkerSinglePlaneStatic.DW2.WidthDWord,
593 WalkerSinglePlaneStatic.DW3.Height,
594 WalkerSinglePlaneStatic.DW4.ShiftLeftOffsetInBytes,
595 WalkerSinglePlaneStatic.DW5.ThreadHeight,
596 WalkerSinglePlaneStatic.DW6.WidthdwordNoPadding,
597 WalkerSinglePlaneStatic.DW7.Dst2DStartX,
598 WalkerSinglePlaneStatic.DW8.Dst2DStartY);
599
600 *piCurbeOffset = pRenderHal->pfnLoadCurbeData(
601 pRenderHal,
602 pRenderData->pMediaState,
603 &WalkerSinglePlaneStatic,
604 iCurbeLength);
605 }
606 else
607 {
608 MCPY_ASSERTMESSAGE("can't support Target format %d", m_Target.Format);
609 }
610
611 if (*piCurbeOffset < 0)
612 {
613 return MOS_STATUS_UNKNOWN;
614 }
615
616 pRenderData->iCurbeLength = iCurbeLength;
617
618 return eStatus;
619 }
620
621 //!
622 //! \brief Render copy omputer walker setup
623 //! \details Computer walker setup for render copy
624 //! \param PMHW_WALKER_PARAMS pWalkerParams
625 //! [in/out] Pointer to Walker params
626 //! \return MOS_STATUS
627 //! Return MOS_STATUS_SUCCESS if successful, otherwise failed
628 //!
RenderCopyComputerWalker(PMHW_GPGPU_WALKER_PARAMS pWalkerParams)629 MOS_STATUS RenderCopyState::RenderCopyComputerWalker(
630 PMHW_GPGPU_WALKER_PARAMS pWalkerParams)
631 {
632 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
633 PMEDIACOPY_RENDER_DATA pRenderData = &m_RenderData;
634 RECT AlignedRect;
635 int32_t iBytePerPixelPerPlane = GetBytesPerPixelPerPlane(m_Target.Format);
636 uint32_t WalkerWidthBlockSize = 128;
637 uint32_t WalkerHeightBlockSize = 8;
638
639 MCPY_CHK_NULL_RETURN(pRenderData);
640
641 if ((iBytePerPixelPerPlane < 1) || (iBytePerPixelPerPlane > 8))
642 {
643 MCPY_ASSERTMESSAGE("RenderCopyComputerWalker wrong pixel size.");
644 return MOS_STATUS_INVALID_PARAMETER;
645 }
646
647 if ((m_Target.Format == Format_YUY2) || (m_Target.Format == Format_Y210) || (m_Target.Format == Format_Y216)
648 || (m_Target.Format == Format_AYUV) || (m_Target.Format == Format_Y410) || (m_Target.Format == Format_Y416)
649 || (m_Target.Format == Format_A8R8G8B8))
650 {
651 if ((m_currKernelId == KERNEL_CopyKernel_1D_to_2D_Packed) || (m_currKernelId == KERNEL_CopyKernel_2D_to_1D_Packed))
652 {
653 WalkerHeightBlockSize = 32;
654 }
655 else if (m_currKernelId == KERNEL_CopyKernel_2D_to_2D_Packed)
656 {
657 WalkerHeightBlockSize = 8;
658 }
659 else
660 {
661 MCPY_ASSERTMESSAGE("RenderCopyComputerWalker wrong kernel file.");
662 return MOS_STATUS_INVALID_PARAMETER;
663 }
664 }
665 else
666 {
667 WalkerHeightBlockSize = 8;
668 }
669
670 // Set walker cmd params - Rasterscan
671 MOS_ZeroMemory(pWalkerParams, sizeof(*pWalkerParams));
672
673
674 AlignedRect.left = 0;
675 AlignedRect.top = 0;
676 AlignedRect.right = (m_Source.dwPitch < m_Target.dwPitch) ? m_Source.dwPitch : m_Target.dwPitch;
677 AlignedRect.bottom = (m_Source.dwHeight < m_Target.dwHeight) ? m_Source.dwHeight : m_Target.dwHeight;
678 // Calculate aligned output area in order to determine the total # blocks
679 // to process in case of non-16x16 aligned target.
680 AlignedRect.right += WalkerWidthBlockSize - 1;
681 AlignedRect.bottom += WalkerHeightBlockSize - 1;
682 AlignedRect.left -= AlignedRect.left % WalkerWidthBlockSize;
683 AlignedRect.top -= AlignedRect.top % WalkerHeightBlockSize;
684 AlignedRect.right -= AlignedRect.right % WalkerWidthBlockSize;
685 AlignedRect.bottom -= AlignedRect.bottom % WalkerHeightBlockSize;
686
687 pWalkerParams->InterfaceDescriptorOffset = pRenderData->iMediaID;
688
689 pWalkerParams->GroupStartingX = (AlignedRect.left / WalkerWidthBlockSize);
690 pWalkerParams->GroupStartingY = (AlignedRect.top / WalkerHeightBlockSize);
691
692 // Set number of blocks
693 pRenderData->iBlocksX =
694 ((AlignedRect.right - AlignedRect.left) + WalkerWidthBlockSize - 1) / WalkerWidthBlockSize;
695 pRenderData->iBlocksY =
696 ((AlignedRect.bottom - AlignedRect.top) + WalkerHeightBlockSize -1)/ WalkerHeightBlockSize;
697
698 // Set number of blocks, block size is WalkerWidthBlockSize x WalkerHeightBlockSize.
699 pWalkerParams->GroupWidth = pRenderData->iBlocksX;
700 pWalkerParams->GroupHeight = pRenderData->iBlocksY; // hight/WalkerWidthBlockSize
701
702 pWalkerParams->ThreadWidth = 1;
703 pWalkerParams->ThreadHeight = 1;
704 pWalkerParams->ThreadDepth = 1;
705 pWalkerParams->IndirectDataStartAddress = pRenderData->iCurbeOffset;
706 // Indirect Data Length is a multiple of 64 bytes (size of L3 cacheline). Bits [5:0] are zero.
707 pWalkerParams->IndirectDataLength = MOS_ALIGN_CEIL(pRenderData->iCurbeLength, 1 << MHW_COMPUTE_INDIRECT_SHIFT);
708 pWalkerParams->BindingTableID = pRenderData->iBindingTable;
709 MCPY_NORMALMESSAGE("this = %p, WidthBlockSize %d, HeightBlockSize %d, Widththreads %d, Heightthreads%d",
710 this, WalkerWidthBlockSize, WalkerHeightBlockSize, pWalkerParams->GroupWidth, pWalkerParams->GroupHeight);
711
712 return eStatus;
713 }
714