xref: /aosp_15_r20/external/intel-media-driver/media_driver/agnostic/common/codec/hal/codechal_memdecomp.cpp (revision ba62d9d3abf0e404f2022b4cd7a85e107f48596f)
1 /*
2 * Copyright (c) 2013-2017, Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22 //!
23 //! \file     codechal_memdecomp.cpp
24 //! \brief    This module sets up a kernel for media memory decompression.
25 
26 #include "codechal_memdecomp.h"
27 #include "codeckrnheader.h"
28 #include "mos_os_cp_interface_specific.h"
29 
30 //!
31 //! \class MediaObjectCopyCurbe
32 //! \brief Media object memory decompress copy knernel curbe.
33 //!        Note: Cube data DW0-6 must be defined at the begining of the class.
34 //!
35 class MediaObjectCopyCurbe
36 {
37 public:
38     // DW 0
39     union
40     {
41         struct
42         {
43             uint32_t srcSurface0Index;
44         };
45         struct
46         {
47             uint32_t value;
48         };
49     } m_dw0;
50 
51     // DW 1
52     union
53     {
54         struct
55         {
56             uint32_t srcSurface1Index;
57         };
58         struct
59         {
60             uint32_t value;
61         };
62     } m_dw1;
63 
64     // DW 2
65     union
66     {
67         struct
68         {
69             uint32_t srcSurface2Index;
70         };
71         struct
72         {
73             uint32_t value;
74         };
75     } m_dw2;
76 
77     // DW 3
78     union
79     {
80         struct
81         {
82             uint32_t dstSurface0Index;
83         };
84         struct
85         {
86             uint32_t value;
87         };
88     } m_dw3;
89 
90     // DW 4
91     union
92     {
93         struct
94         {
95             uint32_t dstSurface1Index;
96         };
97         struct
98         {
99             uint32_t value;
100         };
101     } m_dw4;
102 
103     // DW 5
104     union
105     {
106         struct
107         {
108             uint32_t dstSurface2Index;
109         };
110         struct
111         {
112             uint32_t value;
113         };
114     } m_dw5;
115 
116     // DW 6
117     union
118     {
119         struct
120         {
121             uint32_t surfaceWidth;
122         };
123         struct
124         {
125             uint32_t value;
126         };
127     } m_dw6;
128 
129     //!
130     //! \brief    Constructor
131     //!
132     MediaObjectCopyCurbe();
133 
134     //!
135     //! \brief    Destructor
136     //!
~MediaObjectCopyCurbe()137     ~MediaObjectCopyCurbe(){};
138 
139     static const size_t m_byteSize = 28; //!< Byte size of cube data DW0-6.
140 } ;
141 
MediaObjectCopyCurbe()142 MediaObjectCopyCurbe::MediaObjectCopyCurbe()
143 {
144     MOS_ZeroMemory(this, m_byteSize);
145 }
146 
~MediaMemDecompState()147 MediaMemDecompState::~MediaMemDecompState()
148 {
149     MHW_FUNCTION_ENTER;
150 
151     if (m_cpInterface)
152     {
153         if (m_osInterface)
154         {
155             m_osInterface->pfnDeleteMhwCpInterface(m_cpInterface);
156             m_cpInterface = nullptr;
157         }
158         else
159         {
160             MHW_ASSERTMESSAGE("Failed to destroy cpInterface.");
161         }
162     }
163 
164     if (m_cmdBufIdGlobal)
165     {
166         if (m_osInterface)
167         {
168             m_osInterface->pfnUnlockResource(m_osInterface, &m_resCmdBufIdGlobal);
169             m_osInterface->pfnFreeResource(m_osInterface, &m_resCmdBufIdGlobal);
170             m_cmdBufIdGlobal = nullptr;
171         }
172         else
173         {
174             MHW_ASSERTMESSAGE("Failed to destroy command buffer global Id.");
175         }
176     }
177 
178     if (m_miInterface)
179     {
180         MOS_Delete(m_miInterface);
181         m_miInterface = nullptr;
182     }
183 
184     if (m_renderInterface)
185     {
186         MOS_Delete(m_renderInterface);
187         m_renderInterface = nullptr;
188     }
189 
190     if (m_osInterface)
191     {
192         m_osInterface->pfnDestroy(m_osInterface, false);
193         MOS_FreeMemory(m_osInterface);
194         m_osInterface = nullptr;
195     }
196 }
197 
MediaMemDecompState()198 MediaMemDecompState::MediaMemDecompState() :
199     MediaMemDecompBaseState(),
200     m_currCmdBufId(0)
201 {
202     MHW_FUNCTION_ENTER;
203     m_stateHeapSettings.m_ishBehavior = HeapManager::Behavior::clientControlled;
204     m_stateHeapSettings.m_dshBehavior = HeapManager::Behavior::destructiveExtend;
205     m_stateHeapSettings.m_keepDshLocked = true;
206     m_stateHeapSettings.dwDshIncrement = 2 * MOS_PAGE_SIZE;
207 
208     MOS_ZeroMemory(&m_renderContext, sizeof(m_renderContext));
209     MOS_ZeroMemory(&m_krnUniId, sizeof(m_krnUniId));
210     MOS_ZeroMemory(&m_kernelSize, sizeof(m_kernelSize));
211     MOS_ZeroMemory(&m_resCmdBufIdGlobal, sizeof(m_resCmdBufIdGlobal));
212 
213     for (uint8_t idx = decompKernelStatePa; idx < decompKernelStateMax; idx++)
214     {
215         m_kernelBinary[idx] = nullptr;
216         m_kernelStates[idx] = MHW_KERNEL_STATE();
217     }
218 
219      m_krnUniId[decompKernelStatePa] = IDR_CODEC_ALLPACopy;
220      m_krnUniId[decompKernelStatePl2] = IDR_CODEC_ALLPL2Copy;
221 
222 }
223 
GetKernelBinaryAndSize(uint8_t * kernelBase,uint32_t krnUniId,uint8_t ** kernelBinary,uint32_t * kernelSize)224 MOS_STATUS MediaMemDecompState::GetKernelBinaryAndSize(
225     uint8_t  *kernelBase,
226     uint32_t krnUniId,
227     uint8_t  **kernelBinary,
228     uint32_t *kernelSize)
229 {
230     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
231 
232     MHW_CHK_NULL_RETURN(kernelBase);
233     MHW_CHK_NULL_RETURN(kernelBinary);
234     MHW_CHK_NULL_RETURN(kernelSize);
235 
236     if (krnUniId >= IDR_CODEC_TOTAL_NUM_KERNELS)
237     {
238         eStatus = MOS_STATUS_INVALID_PARAMETER;
239         return eStatus;
240     }
241 
242     uint32_t *kernelOffsetTable = (uint32_t*)kernelBase;
243     uint8_t  *base              = (uint8_t*)(kernelOffsetTable + IDR_CODEC_TOTAL_NUM_KERNELS + 1);
244 
245     *kernelSize =
246         kernelOffsetTable[krnUniId + 1] -
247         kernelOffsetTable[krnUniId];
248     *kernelBinary =
249         ((*kernelSize) > 0) ? (base + kernelOffsetTable[krnUniId]) : nullptr;
250 
251     return eStatus;
252 }
253 
InitKernelState(uint32_t kernelStateIdx)254 MOS_STATUS MediaMemDecompState::InitKernelState(
255     uint32_t                 kernelStateIdx)
256 {
257     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
258 
259     MHW_FUNCTION_ENTER;
260 
261     if (kernelStateIdx >= decompKernelStateMax)
262     {
263         eStatus = MOS_STATUS_INVALID_PARAMETER;
264         return eStatus;
265     }
266 
267     uint8_t **kernelBase  = &m_kernelBinary[kernelStateIdx];
268     uint32_t *kernelSize = &m_kernelSize[kernelStateIdx];
269 
270     MHW_CHK_STATUS_RETURN(GetKernelBinaryAndSize(
271         m_kernelBase,
272         m_krnUniId[kernelStateIdx],
273         kernelBase,
274         kernelSize));
275 
276     m_stateHeapSettings.dwIshSize +=
277         MOS_ALIGN_CEIL(*kernelSize, (1 << MHW_KERNEL_OFFSET_SHIFT));
278     m_stateHeapSettings.dwDshSize += MHW_CACHELINE_SIZE* m_numMemDecompSyncTags;
279     m_stateHeapSettings.dwNumSyncTags += m_numMemDecompSyncTags;
280 
281     return eStatus;
282 }
283 
MemoryDecompress(PMOS_RESOURCE targetResource)284 MOS_STATUS MediaMemDecompState::MemoryDecompress(
285     PMOS_RESOURCE targetResource)
286 {
287     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
288 
289     MHW_FUNCTION_ENTER;
290 
291     MHW_CHK_NULL_RETURN(targetResource);
292 
293     MOS_SURFACE targetSurface;
294     MOS_ZeroMemory(&targetSurface, sizeof(MOS_SURFACE));
295     targetSurface.Format     = Format_Invalid;
296     targetSurface.OsResource = *targetResource;
297     MHW_CHK_STATUS_RETURN(GetResourceInfo(&targetSurface));
298 
299     //Set context before proceeding
300     auto gpuContext = m_osInterface->CurrentGpuContextOrdinal;
301     m_osInterface->pfnSetGpuContext(m_osInterface, m_renderContext);
302     m_osInterface->pfnResetOsStates(m_osInterface);
303 
304     DecompKernelStateIdx kernelStateIdx;
305     bool                 useUVPlane;
306     if ((targetSurface.Format == Format_YUY2) || (targetSurface.Format == Format_UYVY))
307     {
308         kernelStateIdx = decompKernelStatePa;
309         useUVPlane     = false;
310     }
311     else if ((targetSurface.Format == Format_NV12) || (targetSurface.Format == Format_P010))
312     {
313         kernelStateIdx = decompKernelStatePl2;
314         useUVPlane     = true;
315     }
316     else
317     {
318         eStatus = MOS_STATUS_INVALID_PARAMETER;
319         return eStatus;
320     }
321 
322     auto kernelState = &m_kernelStates[kernelStateIdx];
323     kernelState->m_currTrackerId = m_currCmdBufId;
324 
325     // preprocess in cp first
326     m_osInterface->osCpInterface->PrepareResources((void **)&targetResource, 1, nullptr, 0);
327 
328     if (kernelStateIdx == decompKernelStatePl2)
329     {
330         if (m_osInterface->osCpInterface->IsSMEnabled())
331         {
332             uint32_t *kernelBase = nullptr;
333             uint32_t  kernelSize = 0;
334             MHW_CHK_STATUS_RETURN(m_osInterface->osCpInterface->GetTK(
335                 &kernelBase,
336                 &kernelSize,
337                 nullptr));
338             if (nullptr == kernelBase || 0 == kernelSize)
339             {
340                 MHW_ASSERT("Could not get TK kernels for MMC!");
341                 eStatus = MOS_STATUS_INVALID_PARAMETER;
342                 return eStatus;
343             }
344 
345             kernelState->KernelParams.pBinary = (uint8_t *)kernelBase;
346         }
347         else
348         {
349             kernelState->KernelParams.pBinary = m_kernelBinary[kernelStateIdx];
350         }
351         MHW_CHK_STATUS_RETURN(kernelState->m_ishRegion.AddData(
352             kernelState->KernelParams.pBinary,
353             0,
354             kernelState->KernelParams.iSize));
355     }
356 
357     MHW_CHK_STATUS_RETURN(m_stateHeapInterface->pfnRequestSshSpaceForCmdBuf(
358         m_stateHeapInterface,
359         kernelState->KernelParams.iBTCount));
360 
361     uint32_t dshSize = m_stateHeapInterface->pStateHeapInterface->GetSizeofCmdInterfaceDescriptorData() +
362         MOS_ALIGN_CEIL(kernelState->KernelParams.iCurbeLength,
363         m_stateHeapInterface->pStateHeapInterface->GetCurbeAlignment());
364 
365     eStatus = m_stateHeapInterface->pfnAssignSpaceInStateHeap(
366         m_stateHeapInterface,
367         MHW_DSH_TYPE,
368         kernelState,
369         dshSize,
370         false,
371         true);
372 
373     if (eStatus == MOS_STATUS_CLIENT_AR_NO_SPACE)
374     {
375         MHW_CHK_STATUS_RETURN(m_stateHeapInterface->pfnAssignSpaceInStateHeap(
376             m_stateHeapInterface,
377             MHW_DSH_TYPE,
378             kernelState,
379             dshSize,
380             false,
381             true));
382     }
383     else if (eStatus != MOS_STATUS_SUCCESS)
384     {
385         return eStatus;
386     }
387 
388     MHW_CHK_STATUS_RETURN(m_stateHeapInterface->pfnAssignSpaceInStateHeap(
389         m_stateHeapInterface,
390         MHW_SSH_TYPE,
391         kernelState,
392         kernelState->dwSshSize,
393         false,
394         false));
395 
396     MHW_INTERFACE_DESCRIPTOR_PARAMS idParams;
397     MOS_ZeroMemory(&idParams, sizeof(idParams));
398     idParams.pKernelState = kernelState;
399     MHW_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSetInterfaceDescriptor(
400         m_stateHeapInterface,
401         1,
402         &idParams));
403 
404     MHW_CHK_STATUS_RETURN(SetMediaObjectCopyCurbe(kernelStateIdx));
405 
406     MOS_COMMAND_BUFFER cmdBuffer;
407     // Send HW commands (including SSH)
408     MHW_CHK_STATUS_RETURN(m_osInterface->pfnGetCommandBuffer(m_osInterface, &cmdBuffer, 0));
409 
410     MHW_GENERIC_PROLOG_PARAMS genericPrologParams;
411     MOS_ZeroMemory(&genericPrologParams, sizeof(genericPrologParams));
412     genericPrologParams.pOsInterface        = m_osInterface;
413     genericPrologParams.pvMiInterface       = m_miInterface;
414     genericPrologParams.bMmcEnabled         = true;
415     MHW_CHK_STATUS_RETURN(Mhw_SendGenericPrologCmd(&cmdBuffer, &genericPrologParams));
416 
417     MHW_CHK_NULL_RETURN(m_renderInterface);
418     if (m_renderInterface->GetL3CacheConfig()->bL3CachingEnabled)
419     {
420         MHW_CHK_STATUS_RETURN(m_renderInterface->SetL3Cache(&cmdBuffer));
421     }
422 
423     MHW_CHK_STATUS_RETURN(m_renderInterface->EnablePreemption(&cmdBuffer));
424 
425     MHW_CHK_STATUS_RETURN(m_renderInterface->AddPipelineSelectCmd(&cmdBuffer, false));
426 
427     MHW_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSetBindingTable(
428         m_stateHeapInterface,
429         kernelState));
430 
431     MHW_RCS_SURFACE_PARAMS surfaceParams;
432     MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
433     surfaceParams.dwNumPlanes = useUVPlane ? 2 : 1;  // Y+UV : Y
434     surfaceParams.psSurface   = &targetSurface;
435     // Y Plane
436     surfaceParams.dwBindingTableOffset[MHW_Y_PLANE] = copySurfaceSrcY;
437 
438     if (surfaceParams.psSurface->Format == Format_YUY2)
439     {
440         surfaceParams.ForceSurfaceFormat[MHW_Y_PLANE] = MHW_GFX3DSTATE_SURFACEFORMAT_YCRCB_NORMAL;
441     }
442     else if (surfaceParams.psSurface->Format == Format_UYVY)
443     {
444         surfaceParams.ForceSurfaceFormat[MHW_Y_PLANE] = MHW_GFX3DSTATE_SURFACEFORMAT_YCRCB_SWAPY;
445     }
446     else if (surfaceParams.psSurface->Format == Format_P010)
447     {
448         surfaceParams.ForceSurfaceFormat[MHW_Y_PLANE] = MHW_GFX3DSTATE_SURFACEFORMAT_R16_UNORM;
449     }
450     else  //NV12
451     {
452         surfaceParams.ForceSurfaceFormat[MHW_Y_PLANE] = MHW_GFX3DSTATE_SURFACEFORMAT_R8_UNORM;
453     }
454 
455     uint32_t widthInBytes = GetSurfaceWidthInBytes(surfaceParams.psSurface);
456     surfaceParams.dwWidthToUse[MHW_Y_PLANE] = MHW_WIDTH_IN_DW(widthInBytes);
457 
458     // UV Plane
459     if (useUVPlane)
460     {
461         surfaceParams.dwBindingTableOffset[MHW_U_PLANE] = copySurfaceSrcU;
462         if (surfaceParams.psSurface->Format == Format_P010)
463         {
464             surfaceParams.ForceSurfaceFormat[MHW_U_PLANE] = MHW_GFX3DSTATE_SURFACEFORMAT_YCRCB_SWAPUVY;
465         }
466         else  //NV12
467         {
468             surfaceParams.ForceSurfaceFormat[MHW_U_PLANE] = MHW_GFX3DSTATE_SURFACEFORMAT_R16_UINT;
469         }
470         surfaceParams.dwBaseAddrOffset[MHW_U_PLANE] =
471             targetSurface.dwPitch *
472             MOS_ALIGN_FLOOR(targetSurface.UPlaneOffset.iYOffset, MOS_YTILE_H_ALIGNMENT);
473         surfaceParams.dwWidthToUse[MHW_U_PLANE]  = MHW_WIDTH_IN_DW(widthInBytes);
474         surfaceParams.dwHeightToUse[MHW_U_PLANE] = surfaceParams.psSurface->dwHeight / 2;
475         surfaceParams.dwYOffset[MHW_U_PLANE] =
476             (targetSurface.UPlaneOffset.iYOffset % MOS_YTILE_H_ALIGNMENT);
477     }
478     m_osInterface->pfnGetMemoryCompressionMode(
479         m_osInterface, &targetSurface.OsResource, (PMOS_MEMCOMP_STATE)&surfaceParams.psSurface->CompressionMode);
480     MHW_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSetSurfaceState(
481         m_stateHeapInterface,
482         kernelState,
483         &cmdBuffer,
484         1,
485         &surfaceParams));
486 
487     //In place decompression: src shares the same surface with dst.
488     surfaceParams.bIsWritable                       = true;
489     surfaceParams.dwBindingTableOffset[MHW_Y_PLANE] = copySurfaceDstY;
490     if (useUVPlane)
491     {
492         surfaceParams.dwBindingTableOffset[MHW_U_PLANE] = copySurfaceDstU;
493     }
494     MHW_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSetSurfaceState(
495         m_stateHeapInterface,
496         kernelState,
497         &cmdBuffer,
498         1,
499         &surfaceParams));
500 
501     MHW_STATE_BASE_ADDR_PARAMS stateBaseAddrParams;
502     MOS_ZeroMemory(&stateBaseAddrParams, sizeof(stateBaseAddrParams));
503     MOS_RESOURCE *dsh = nullptr, *ish = nullptr;
504     MHW_CHK_NULL_RETURN(dsh = kernelState->m_dshRegion.GetResource());
505     MHW_CHK_NULL_RETURN(ish = kernelState->m_ishRegion.GetResource());
506     stateBaseAddrParams.presDynamicState = dsh;
507     stateBaseAddrParams.dwDynamicStateSize = kernelState->m_dshRegion.GetHeapSize();
508     stateBaseAddrParams.presInstructionBuffer = ish;
509     stateBaseAddrParams.dwInstructionBufferSize = kernelState->m_ishRegion.GetHeapSize();
510     MHW_CHK_STATUS_RETURN(m_renderInterface->AddStateBaseAddrCmd(
511         &cmdBuffer,
512         &stateBaseAddrParams));
513 
514     MHW_VFE_PARAMS vfeParams = {};
515     vfeParams.pKernelState = kernelState;
516     auto waTable          = m_osInterface->pfnGetWaTable(m_osInterface);
517 
518     vfeParams.eVfeSliceDisable = MHW_VFE_SLICE_ALL;
519 
520     MHW_CHK_STATUS_RETURN(m_renderInterface->AddMediaVfeCmd(
521         &cmdBuffer,
522         &vfeParams));
523 
524     MHW_CURBE_LOAD_PARAMS curbeLoadParams;
525     MOS_ZeroMemory(&curbeLoadParams, sizeof(curbeLoadParams));
526     curbeLoadParams.pKernelState = kernelState;
527     MHW_CHK_STATUS_RETURN(m_renderInterface->AddMediaCurbeLoadCmd(
528         &cmdBuffer,
529         &curbeLoadParams));
530 
531     MHW_ID_LOAD_PARAMS idLoadParams;
532     MOS_ZeroMemory(&idLoadParams, sizeof(idLoadParams));
533     idLoadParams.pKernelState = kernelState;
534     idLoadParams.dwNumKernelsLoaded = 1;
535     MHW_CHK_STATUS_RETURN(m_renderInterface->AddMediaIDLoadCmd(
536         &cmdBuffer,
537         &idLoadParams));
538 
539     uint32_t resolutionX;
540     if (kernelStateIdx == decompKernelStatePa)  // Format_YUY2, Format_UYVY
541     {
542         resolutionX = MOS_ROUNDUP_DIVIDE(targetSurface.dwWidth * 2, 32);
543     }
544     else  // DecompKernelStatePl2: Format_NV12, Format_P010
545     {
546         if (targetSurface.Format == Format_P010)  // Format_P010
547         {
548             resolutionX = MOS_ROUNDUP_DIVIDE(targetSurface.dwWidth * 2, 32);
549         }
550         else  // Format_NV12
551         {
552             resolutionX = MOS_ROUNDUP_DIVIDE(targetSurface.dwWidth, 32);
553         }
554     }
555     uint32_t resolutionY = MOS_ROUNDUP_DIVIDE(targetSurface.dwHeight, 16);
556 
557     MHW_WALKER_PARAMS walkerParams;
558     MOS_ZeroMemory(&walkerParams, sizeof(walkerParams));
559     walkerParams.WalkerMode               = MHW_WALKER_MODE_SINGLE;
560     walkerParams.BlockResolution.x        = resolutionX;
561     walkerParams.BlockResolution.y        = resolutionY;
562     walkerParams.GlobalResolution.x       = resolutionX;
563     walkerParams.GlobalResolution.y       = resolutionY;
564     walkerParams.GlobalOutlerLoopStride.x = resolutionX;
565     walkerParams.GlobalOutlerLoopStride.y = 0;
566     walkerParams.GlobalInnerLoopUnit.x    = 0;
567     walkerParams.GlobalInnerLoopUnit.y    = resolutionY;
568     walkerParams.dwLocalLoopExecCount     = 0xFFFF;  //MAX VALUE
569     walkerParams.dwGlobalLoopExecCount    = 0xFFFF;  //MAX VALUE
570 
571     // No dependency
572     walkerParams.ScoreboardMask = 0;
573     // Raster scan walking pattern
574     walkerParams.LocalOutLoopStride.x = 0;
575     walkerParams.LocalOutLoopStride.y = 1;
576     walkerParams.LocalInnerLoopUnit.x = 1;
577     walkerParams.LocalInnerLoopUnit.y = 0;
578     walkerParams.LocalEnd.x           = resolutionX - 1;
579     walkerParams.LocalEnd.y           = 0;
580 
581     MHW_CHK_STATUS_RETURN(m_renderInterface->AddMediaObjectWalkerCmd(
582         &cmdBuffer,
583         &walkerParams));
584 
585     // Check if destination surface needs to be synchronized, before command buffer submission
586     MOS_SYNC_PARAMS    syncParams;
587     MOS_ZeroMemory(&syncParams, sizeof(syncParams));
588     syncParams.uiSemaphoreCount         = 1;
589     syncParams.GpuContext               = m_renderContext;
590     syncParams.presSyncResource         = &targetSurface.OsResource;
591     syncParams.bReadOnly                = false;
592     syncParams.bDisableDecodeSyncLock   = m_disableDecodeSyncLock;
593     syncParams.bDisableLockForTranscode = m_disableLockForTranscode;
594 
595     MHW_CHK_STATUS_RETURN(m_osInterface->pfnPerformOverlaySync(m_osInterface, &syncParams));
596     MHW_CHK_STATUS_RETURN(m_osInterface->pfnResourceWait(m_osInterface, &syncParams));
597 
598     // Update the resource tag (s/w tag) for On-Demand Sync
599     m_osInterface->pfnSetResourceSyncTag(m_osInterface, &syncParams);
600 
601     // Update the tag in GPU Sync eStatus buffer (H/W Tag) to match the current S/W tag
602     if (m_osInterface->bTagResourceSync)
603     {
604         MHW_PIPE_CONTROL_PARAMS pipeControlParams;
605         MOS_ZeroMemory(&pipeControlParams, sizeof(pipeControlParams));
606 
607         pipeControlParams.dwFlushMode = MHW_FLUSH_WRITE_CACHE;
608         MHW_CHK_STATUS_RETURN(m_miInterface->AddPipeControl(
609             &cmdBuffer,
610             nullptr,
611             &pipeControlParams));
612 
613         MHW_CHK_STATUS_RETURN(WriteSyncTagToResourceCmd(&cmdBuffer));
614     }
615 
616     MHW_MI_STORE_DATA_PARAMS        miStoreDataParams;
617     MOS_ZeroMemory(&miStoreDataParams, sizeof(miStoreDataParams));
618     miStoreDataParams.pOsResource = &m_resCmdBufIdGlobal;
619     miStoreDataParams.dwValue = m_currCmdBufId;
620     MHW_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(
621         &cmdBuffer,
622         &miStoreDataParams));
623 
624     MHW_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSubmitBlocks(
625         m_stateHeapInterface,
626         kernelState));
627     MHW_CHK_STATUS_RETURN(m_stateHeapInterface->pfnUpdateGlobalCmdBufId(
628         m_stateHeapInterface));
629 
630     // Add PipeControl to invalidate ISP and MediaState to avoid PageFault issue
631     // This code is temporal and it will be moved to batch buffer end in short
632     PLATFORM platform;
633     m_osInterface->pfnGetPlatform(m_osInterface, &platform);
634     if (GFX_IS_GEN_9_OR_LATER(platform))
635     {
636         MHW_PIPE_CONTROL_PARAMS pipeControlParams;
637 
638         MOS_ZeroMemory(&pipeControlParams, sizeof(pipeControlParams));
639         pipeControlParams.dwFlushMode = MHW_FLUSH_WRITE_CACHE;
640         pipeControlParams.bGenericMediaStateClear = true;
641         pipeControlParams.bIndirectStatePointersDisable = true;
642         pipeControlParams.bDisableCSStall = false;
643         MHW_CHK_STATUS_RETURN(m_miInterface->AddPipeControl(&cmdBuffer, NULL, &pipeControlParams));
644 
645         if (MEDIA_IS_WA(m_osInterface->pfnGetWaTable(m_osInterface), WaSendDummyVFEafterPipelineSelect))
646         {
647             MHW_VFE_PARAMS vfeStateParams = {};
648             vfeStateParams.dwNumberofURBEntries = 1;
649             MHW_CHK_STATUS_RETURN(m_renderInterface->AddMediaVfeCmd(&cmdBuffer, &vfeStateParams));
650         }
651     }
652 
653     MHW_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferEnd(
654         &cmdBuffer,
655         nullptr));
656 
657     m_osInterface->pfnReturnCommandBuffer(m_osInterface, &cmdBuffer, 0);
658 
659     MHW_CHK_STATUS_RETURN(m_osInterface->pfnSubmitCommandBuffer(
660         m_osInterface,
661         &cmdBuffer,
662         m_renderContextUsesNullHw));
663 
664     // Update the compression mode
665     MHW_CHK_STATUS_RETURN(m_osInterface->pfnSetMemoryCompressionMode(
666         m_osInterface,
667         targetResource,
668         MOS_MEMCOMP_DISABLED));
669     MHW_CHK_STATUS_RETURN(m_osInterface->pfnSetMemoryCompressionHint(
670         m_osInterface,
671         targetResource,
672         false));
673 
674     //Update CmdBufId...
675     m_currCmdBufId++;
676     if (m_currCmdBufId == MemoryBlock::m_invalidTrackerId)
677     {
678         m_currCmdBufId++;
679     }
680 
681     // Send the signal to indicate decode completion, in case On-Demand Sync is not present
682     MHW_CHK_STATUS_RETURN(m_osInterface->pfnResourceSignal(m_osInterface, &syncParams));
683 
684     if (gpuContext != m_renderContext)
685     {
686         m_osInterface->pfnSetGpuContext(m_osInterface, gpuContext);
687     }
688 
689     return eStatus;
690 }
691 
GetResourceInfo(PMOS_SURFACE surface)692 MOS_STATUS MediaMemDecompState::GetResourceInfo(
693     PMOS_SURFACE   surface)
694 {
695     MOS_STATUS  eStatus = MOS_STATUS_SUCCESS;
696 
697     MHW_CHK_NULL_RETURN(m_osInterface);
698     MHW_CHK_NULL_RETURN(surface);
699 
700     MOS_SURFACE details;
701     MOS_ZeroMemory(&details, sizeof(details));
702     details.Format = Format_Invalid;
703 
704     MHW_CHK_STATUS_RETURN(m_osInterface->pfnGetResourceInfo(
705         m_osInterface,
706         &surface->OsResource,
707         &details));
708 
709     surface->Format                      = details.Format;
710     surface->dwWidth                     = details.dwWidth;
711     surface->dwHeight                    = details.dwHeight;
712     surface->dwPitch                     = details.dwPitch;
713     surface->dwDepth                     = details.dwDepth;
714     surface->bArraySpacing               = details.bArraySpacing;
715     surface->TileType                    = details.TileType;
716     surface->TileModeGMM                 = details.TileModeGMM;
717     surface->bGMMTileEnabled             = details.bGMMTileEnabled;
718     surface->dwOffset                    = details.RenderOffset.YUV.Y.BaseOffset;
719     surface->UPlaneOffset.iSurfaceOffset = details.RenderOffset.YUV.U.BaseOffset;
720     surface->UPlaneOffset.iXOffset       = details.RenderOffset.YUV.U.XOffset;
721     surface->UPlaneOffset.iYOffset =
722         (surface->UPlaneOffset.iSurfaceOffset - surface->dwOffset) / surface->dwPitch +
723         details.RenderOffset.YUV.U.YOffset;
724     surface->VPlaneOffset.iSurfaceOffset = details.RenderOffset.YUV.V.BaseOffset;
725     surface->VPlaneOffset.iXOffset       = details.RenderOffset.YUV.V.XOffset;
726     surface->VPlaneOffset.iYOffset =
727         (surface->VPlaneOffset.iSurfaceOffset - surface->dwOffset) / surface->dwPitch +
728         details.RenderOffset.YUV.V.YOffset;
729     surface->bCompressible   = details.bCompressible;
730     surface->bIsCompressed   = details.bIsCompressed;
731     surface->CompressionMode = details.CompressionMode;
732 
733     return eStatus;
734 }
735 
GetSurfaceWidthInBytes(PMOS_SURFACE surface)736 uint32_t MediaMemDecompState::GetSurfaceWidthInBytes(PMOS_SURFACE surface)
737 {
738     uint32_t widthInBytes;
739 
740     switch (surface->Format)
741     {
742     case Format_IMC1:
743     case Format_IMC3:
744     case Format_IMC2:
745     case Format_IMC4:
746     case Format_NV12:
747     case Format_YV12:
748     case Format_I420:
749     case Format_IYUV:
750     case Format_400P:
751     case Format_411P:
752     case Format_422H:
753     case Format_422V:
754     case Format_444P:
755     case Format_RGBP:
756     case Format_BGRP:
757         widthInBytes = surface->dwWidth;
758         break;
759     case Format_YUY2:
760     case Format_YUYV:
761     case Format_YVYU:
762     case Format_UYVY:
763     case Format_VYUY:
764     case Format_P010:
765         widthInBytes = surface->dwWidth << 1;
766         break;
767     case Format_A8R8G8B8:
768     case Format_X8R8G8B8:
769     case Format_A8B8G8R8:
770         widthInBytes = surface->dwWidth << 2;
771         break;
772     default:
773         widthInBytes = surface->dwWidth;
774         break;
775     }
776 
777     return widthInBytes;
778 }
779 
WriteSyncTagToResourceCmd(PMOS_COMMAND_BUFFER cmdBuffer)780 MOS_STATUS MediaMemDecompState::WriteSyncTagToResourceCmd(
781     PMOS_COMMAND_BUFFER   cmdBuffer)
782 {
783     MOS_STATUS               eStatus = MOS_STATUS_SUCCESS;
784 
785     MHW_FUNCTION_ENTER;
786 
787     PMOS_RESOURCE globalGpuContextSyncTagBuffer = nullptr;
788     MHW_CHK_STATUS_RETURN(m_osInterface->pfnGetGpuStatusBufferResource(
789         m_osInterface,
790         globalGpuContextSyncTagBuffer));
791     MHW_CHK_NULL_RETURN(globalGpuContextSyncTagBuffer);
792 
793     uint32_t offset = m_osInterface->pfnGetGpuStatusTagOffset(
794         m_osInterface,
795         m_osInterface->CurrentGpuContextOrdinal);
796     uint32_t value  = m_osInterface->pfnGetGpuStatusTag(
797         m_osInterface,
798         m_osInterface->CurrentGpuContextOrdinal);
799 
800     MHW_MI_STORE_DATA_PARAMS params;
801     params.pOsResource      = globalGpuContextSyncTagBuffer;
802     params.dwResourceOffset = offset;
803     params.dwValue          = value;
804 
805     MHW_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(cmdBuffer, &params));
806 
807     // Increment GPU Context Tag for next use
808     m_osInterface->pfnIncrementGpuStatusTag(m_osInterface, m_osInterface->CurrentGpuContextOrdinal);
809 
810     return eStatus;
811 }
812 
SetMediaObjectCopyCurbe(DecompKernelStateIdx kernelStateIdx)813 MOS_STATUS MediaMemDecompState::SetMediaObjectCopyCurbe(
814     DecompKernelStateIdx kernelStateIdx)
815 {
816     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
817 
818     MHW_FUNCTION_ENTER;
819 
820     if ((kernelStateIdx >= decompKernelStateMax))
821     {
822         eStatus = MOS_STATUS_INVALID_PARAMETER;
823         return eStatus;
824     }
825 
826     MediaObjectCopyCurbe cmd;
827 
828     cmd.m_dw0.srcSurface0Index = copySurfaceSrcY;
829     cmd.m_dw3.dstSurface0Index = copySurfaceDstY;
830 
831     if (kernelStateIdx == decompKernelStatePl2)
832     {
833         cmd.m_dw1.srcSurface1Index = copySurfaceSrcU;
834         cmd.m_dw4.dstSurface1Index = copySurfaceDstU;
835     }
836 
837     MHW_CHK_STATUS_RETURN(m_kernelStates[kernelStateIdx].m_dshRegion.AddData(
838         &cmd,
839         m_kernelStates[kernelStateIdx].dwCurbeOffset,
840         sizeof(cmd)));
841 
842     return eStatus;
843 }
844 
SetKernelStateParams()845 MOS_STATUS MediaMemDecompState::SetKernelStateParams()
846 {
847     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
848 
849     MHW_FUNCTION_ENTER;
850 
851     MHW_CHK_NULL_RETURN(m_renderInterface->GetHwCaps());
852 
853     for (uint32_t krnIdx = 0; krnIdx < decompKernelStateMax; krnIdx++)
854     {
855         auto kernelState = &m_kernelStates[krnIdx];
856         kernelState->KernelParams.pBinary = m_kernelBinary[krnIdx];
857         kernelState->KernelParams.iSize   = m_kernelSize[krnIdx];
858         kernelState->KernelParams.iBTCount     = copySurfaceNum;
859         kernelState->KernelParams.iThreadCount = m_renderInterface->GetHwCaps()->dwMaxThreads;
860         kernelState->KernelParams.iCurbeLength = MOS_ALIGN_CEIL(
861             MediaObjectCopyCurbe::m_byteSize,
862             m_stateHeapInterface->pStateHeapInterface->GetCurbeAlignment());
863         kernelState->KernelParams.iBlockWidth  = 32;
864         kernelState->KernelParams.iBlockHeight = 16;
865         kernelState->KernelParams.iIdCount     = 1;
866 
867         kernelState->dwCurbeOffset =
868             m_stateHeapInterface->pStateHeapInterface->GetSizeofCmdInterfaceDescriptorData();
869 
870         MHW_CHK_STATUS_RETURN(m_stateHeapInterface->pfnCalculateSshAndBtSizesRequested(
871             m_stateHeapInterface,
872             kernelState->KernelParams.iBTCount,
873             &kernelState->dwSshSize,
874             &kernelState->dwBindingTableSize));
875 
876         kernelState->dwKernelBinaryOffset = 0;
877 
878         eStatus = m_stateHeapInterface->pfnAssignSpaceInStateHeap(
879             m_stateHeapInterface,
880             MHW_ISH_TYPE,
881             kernelState,
882             kernelState->KernelParams.iSize,
883             true,
884             false);
885 
886         if (eStatus == MOS_STATUS_CLIENT_AR_NO_SPACE)
887         {
888             MHW_ASSERTMESSAGE("CodecHal does not handle this case");
889             return eStatus;
890         }
891         else if (eStatus != MOS_STATUS_SUCCESS)
892         {
893             return eStatus;
894         }
895 
896         MHW_CHK_STATUS_RETURN(kernelState->m_ishRegion.AddData(
897             kernelState->KernelParams.pBinary,
898             0,
899             kernelState->KernelParams.iSize));
900     }
901 
902     return eStatus;
903 }
904 
Initialize(PMOS_INTERFACE osInterface,MhwCpInterface * cpInterface,MhwMiInterface * miInterface,MhwRenderInterface * renderInterface)905 MOS_STATUS MediaMemDecompState::Initialize(
906     PMOS_INTERFACE                  osInterface,
907     MhwCpInterface                  *cpInterface,
908     MhwMiInterface                  *miInterface,
909     MhwRenderInterface              *renderInterface)
910 {
911     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
912     MediaUserSettingSharedPtr   userSettingPtr = nullptr;
913     MOS_USER_FEATURE_VALUE_DATA userFeatureData = {};
914 
915     MHW_FUNCTION_ENTER;
916 
917     MHW_CHK_NULL_RETURN(osInterface);
918     MHW_CHK_NULL_RETURN(cpInterface);
919     MHW_CHK_NULL_RETURN(miInterface);
920     MHW_CHK_NULL_RETURN(renderInterface);
921 
922     m_osInterface = osInterface;
923     m_cpInterface = cpInterface;
924     m_miInterface = miInterface;
925     m_renderInterface = renderInterface;
926     userSettingPtr = osInterface->pfnGetUserSettingInstance(osInterface);
927 
928     for (uint8_t kernelIdx = decompKernelStatePa; kernelIdx < decompKernelStateMax; kernelIdx++)
929     {
930         MHW_CHK_STATUS_RETURN(InitKernelState(kernelIdx));
931     }
932 
933     if (m_stateHeapSettings.dwIshSize > 0 &&
934         m_stateHeapSettings.dwDshSize > 0 &&
935         m_stateHeapSettings.dwNumSyncTags > 0)
936     {
937         MHW_CHK_STATUS_RETURN(m_renderInterface->AllocateHeaps(
938             m_stateHeapSettings));
939     }
940 
941     m_stateHeapInterface = m_renderInterface->m_stateHeapInterface;
942     MHW_CHK_NULL_RETURN(m_stateHeapInterface);
943 
944     if (m_osInterface->pfnIsGpuContextValid(m_osInterface, MOS_GPU_CONTEXT_RENDER) == MOS_STATUS_SUCCESS)
945     {
946         m_renderContext = MOS_GPU_CONTEXT_RENDER;
947     }
948     else
949     {
950         MOS_GPUCTX_CREATOPTIONS createOption;
951         MHW_CHK_STATUS_RETURN(m_osInterface->pfnCreateGpuContext(
952             m_osInterface,
953             MOS_GPU_CONTEXT_RENDER,
954             MOS_GPU_NODE_3D,
955             &createOption));
956 
957         m_renderContext = MOS_GPU_CONTEXT_RENDER;
958     }
959 
960     MOS_NULL_RENDERING_FLAGS nullHWAccelerationEnable;
961     nullHWAccelerationEnable.Value = 0;
962     m_disableDecodeSyncLock        = false;
963 #if (_DEBUG || _RELEASE_INTERNAL)
964     ReadUserSettingForDebug(
965         userSettingPtr,
966         nullHWAccelerationEnable.Value,
967         __MEDIA_USER_FEATURE_VALUE_NULL_HW_ACCELERATION_ENABLE,
968         MediaUserSetting::Group::Device);
969 
970     MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
971     MOS_UserFeature_ReadValue_ID(
972         nullptr,
973         __MEDIA_USER_FEATURE_VALUE_DECODE_LOCK_DISABLE_ID,
974         &userFeatureData,
975         m_osInterface->pOsContext);
976     m_disableDecodeSyncLock = userFeatureData.u32Data ? true : false;
977 #endif  // _DEBUG || _RELEASE_INTERNAL
978 
979     m_disableLockForTranscode =
980         MEDIA_IS_WA(m_osInterface->pfnGetWaTable(m_osInterface), WaDisableLockForTranscodePerf);
981 
982     MHW_CHK_STATUS_RETURN(SetKernelStateParams());
983 
984     m_renderContextUsesNullHw =
985         ((m_renderContext == MOS_GPU_CONTEXT_RENDER) ? nullHWAccelerationEnable.CtxRender : nullHWAccelerationEnable.CtxRender2) ||
986         nullHWAccelerationEnable.Mmc;
987 
988     MOS_ALLOC_GFXRES_PARAMS allocParams;
989     MOS_ZeroMemory(&allocParams, sizeof(allocParams));
990     allocParams.Type = MOS_GFXRES_BUFFER;
991     allocParams.TileType = MOS_TILE_LINEAR;
992     allocParams.Format = Format_Buffer;
993     allocParams.dwBytes = MHW_CACHELINE_SIZE;
994     allocParams.pBufName = "CmdBufIdGlobal";
995     MHW_CHK_STATUS_RETURN(m_osInterface->pfnAllocateResource(
996         m_osInterface,
997         &allocParams,
998         &m_resCmdBufIdGlobal));
999     m_currCmdBufId = MemoryBlock::m_invalidTrackerId + 1;
1000 
1001     MOS_LOCK_PARAMS lockParams;
1002     MOS_ZeroMemory(&lockParams, sizeof(lockParams));
1003     lockParams.WriteOnly = 1;
1004     m_cmdBufIdGlobal = (uint32_t *)m_osInterface->pfnLockResource(
1005         m_osInterface,
1006         &m_resCmdBufIdGlobal,
1007         &lockParams);
1008     MHW_CHK_NULL_RETURN(m_cmdBufIdGlobal);
1009     MOS_ZeroMemory(m_cmdBufIdGlobal, allocParams.dwBytes);
1010 
1011     MHW_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSetCmdBufStatusPtr(
1012         m_stateHeapInterface,
1013         m_cmdBufIdGlobal));
1014 
1015     return eStatus;
1016 }
1017