1 /*
2 * Copyright (c) 2013-2017, Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22 //!
23 //! \file codechal_memdecomp.cpp
24 //! \brief This module sets up a kernel for media memory decompression.
25
26 #include "codechal_memdecomp.h"
27 #include "codeckrnheader.h"
28 #include "mos_os_cp_interface_specific.h"
29
30 //!
31 //! \class MediaObjectCopyCurbe
32 //! \brief Media object memory decompress copy knernel curbe.
33 //! Note: Cube data DW0-6 must be defined at the begining of the class.
34 //!
35 class MediaObjectCopyCurbe
36 {
37 public:
38 // DW 0
39 union
40 {
41 struct
42 {
43 uint32_t srcSurface0Index;
44 };
45 struct
46 {
47 uint32_t value;
48 };
49 } m_dw0;
50
51 // DW 1
52 union
53 {
54 struct
55 {
56 uint32_t srcSurface1Index;
57 };
58 struct
59 {
60 uint32_t value;
61 };
62 } m_dw1;
63
64 // DW 2
65 union
66 {
67 struct
68 {
69 uint32_t srcSurface2Index;
70 };
71 struct
72 {
73 uint32_t value;
74 };
75 } m_dw2;
76
77 // DW 3
78 union
79 {
80 struct
81 {
82 uint32_t dstSurface0Index;
83 };
84 struct
85 {
86 uint32_t value;
87 };
88 } m_dw3;
89
90 // DW 4
91 union
92 {
93 struct
94 {
95 uint32_t dstSurface1Index;
96 };
97 struct
98 {
99 uint32_t value;
100 };
101 } m_dw4;
102
103 // DW 5
104 union
105 {
106 struct
107 {
108 uint32_t dstSurface2Index;
109 };
110 struct
111 {
112 uint32_t value;
113 };
114 } m_dw5;
115
116 // DW 6
117 union
118 {
119 struct
120 {
121 uint32_t surfaceWidth;
122 };
123 struct
124 {
125 uint32_t value;
126 };
127 } m_dw6;
128
129 //!
130 //! \brief Constructor
131 //!
132 MediaObjectCopyCurbe();
133
134 //!
135 //! \brief Destructor
136 //!
~MediaObjectCopyCurbe()137 ~MediaObjectCopyCurbe(){};
138
139 static const size_t m_byteSize = 28; //!< Byte size of cube data DW0-6.
140 } ;
141
MediaObjectCopyCurbe()142 MediaObjectCopyCurbe::MediaObjectCopyCurbe()
143 {
144 MOS_ZeroMemory(this, m_byteSize);
145 }
146
~MediaMemDecompState()147 MediaMemDecompState::~MediaMemDecompState()
148 {
149 MHW_FUNCTION_ENTER;
150
151 if (m_cpInterface)
152 {
153 if (m_osInterface)
154 {
155 m_osInterface->pfnDeleteMhwCpInterface(m_cpInterface);
156 m_cpInterface = nullptr;
157 }
158 else
159 {
160 MHW_ASSERTMESSAGE("Failed to destroy cpInterface.");
161 }
162 }
163
164 if (m_cmdBufIdGlobal)
165 {
166 if (m_osInterface)
167 {
168 m_osInterface->pfnUnlockResource(m_osInterface, &m_resCmdBufIdGlobal);
169 m_osInterface->pfnFreeResource(m_osInterface, &m_resCmdBufIdGlobal);
170 m_cmdBufIdGlobal = nullptr;
171 }
172 else
173 {
174 MHW_ASSERTMESSAGE("Failed to destroy command buffer global Id.");
175 }
176 }
177
178 if (m_miInterface)
179 {
180 MOS_Delete(m_miInterface);
181 m_miInterface = nullptr;
182 }
183
184 if (m_renderInterface)
185 {
186 MOS_Delete(m_renderInterface);
187 m_renderInterface = nullptr;
188 }
189
190 if (m_osInterface)
191 {
192 m_osInterface->pfnDestroy(m_osInterface, false);
193 MOS_FreeMemory(m_osInterface);
194 m_osInterface = nullptr;
195 }
196 }
197
MediaMemDecompState()198 MediaMemDecompState::MediaMemDecompState() :
199 MediaMemDecompBaseState(),
200 m_currCmdBufId(0)
201 {
202 MHW_FUNCTION_ENTER;
203 m_stateHeapSettings.m_ishBehavior = HeapManager::Behavior::clientControlled;
204 m_stateHeapSettings.m_dshBehavior = HeapManager::Behavior::destructiveExtend;
205 m_stateHeapSettings.m_keepDshLocked = true;
206 m_stateHeapSettings.dwDshIncrement = 2 * MOS_PAGE_SIZE;
207
208 MOS_ZeroMemory(&m_renderContext, sizeof(m_renderContext));
209 MOS_ZeroMemory(&m_krnUniId, sizeof(m_krnUniId));
210 MOS_ZeroMemory(&m_kernelSize, sizeof(m_kernelSize));
211 MOS_ZeroMemory(&m_resCmdBufIdGlobal, sizeof(m_resCmdBufIdGlobal));
212
213 for (uint8_t idx = decompKernelStatePa; idx < decompKernelStateMax; idx++)
214 {
215 m_kernelBinary[idx] = nullptr;
216 m_kernelStates[idx] = MHW_KERNEL_STATE();
217 }
218
219 m_krnUniId[decompKernelStatePa] = IDR_CODEC_ALLPACopy;
220 m_krnUniId[decompKernelStatePl2] = IDR_CODEC_ALLPL2Copy;
221
222 }
223
GetKernelBinaryAndSize(uint8_t * kernelBase,uint32_t krnUniId,uint8_t ** kernelBinary,uint32_t * kernelSize)224 MOS_STATUS MediaMemDecompState::GetKernelBinaryAndSize(
225 uint8_t *kernelBase,
226 uint32_t krnUniId,
227 uint8_t **kernelBinary,
228 uint32_t *kernelSize)
229 {
230 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
231
232 MHW_CHK_NULL_RETURN(kernelBase);
233 MHW_CHK_NULL_RETURN(kernelBinary);
234 MHW_CHK_NULL_RETURN(kernelSize);
235
236 if (krnUniId >= IDR_CODEC_TOTAL_NUM_KERNELS)
237 {
238 eStatus = MOS_STATUS_INVALID_PARAMETER;
239 return eStatus;
240 }
241
242 uint32_t *kernelOffsetTable = (uint32_t*)kernelBase;
243 uint8_t *base = (uint8_t*)(kernelOffsetTable + IDR_CODEC_TOTAL_NUM_KERNELS + 1);
244
245 *kernelSize =
246 kernelOffsetTable[krnUniId + 1] -
247 kernelOffsetTable[krnUniId];
248 *kernelBinary =
249 ((*kernelSize) > 0) ? (base + kernelOffsetTable[krnUniId]) : nullptr;
250
251 return eStatus;
252 }
253
InitKernelState(uint32_t kernelStateIdx)254 MOS_STATUS MediaMemDecompState::InitKernelState(
255 uint32_t kernelStateIdx)
256 {
257 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
258
259 MHW_FUNCTION_ENTER;
260
261 if (kernelStateIdx >= decompKernelStateMax)
262 {
263 eStatus = MOS_STATUS_INVALID_PARAMETER;
264 return eStatus;
265 }
266
267 uint8_t **kernelBase = &m_kernelBinary[kernelStateIdx];
268 uint32_t *kernelSize = &m_kernelSize[kernelStateIdx];
269
270 MHW_CHK_STATUS_RETURN(GetKernelBinaryAndSize(
271 m_kernelBase,
272 m_krnUniId[kernelStateIdx],
273 kernelBase,
274 kernelSize));
275
276 m_stateHeapSettings.dwIshSize +=
277 MOS_ALIGN_CEIL(*kernelSize, (1 << MHW_KERNEL_OFFSET_SHIFT));
278 m_stateHeapSettings.dwDshSize += MHW_CACHELINE_SIZE* m_numMemDecompSyncTags;
279 m_stateHeapSettings.dwNumSyncTags += m_numMemDecompSyncTags;
280
281 return eStatus;
282 }
283
MemoryDecompress(PMOS_RESOURCE targetResource)284 MOS_STATUS MediaMemDecompState::MemoryDecompress(
285 PMOS_RESOURCE targetResource)
286 {
287 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
288
289 MHW_FUNCTION_ENTER;
290
291 MHW_CHK_NULL_RETURN(targetResource);
292
293 MOS_SURFACE targetSurface;
294 MOS_ZeroMemory(&targetSurface, sizeof(MOS_SURFACE));
295 targetSurface.Format = Format_Invalid;
296 targetSurface.OsResource = *targetResource;
297 MHW_CHK_STATUS_RETURN(GetResourceInfo(&targetSurface));
298
299 //Set context before proceeding
300 auto gpuContext = m_osInterface->CurrentGpuContextOrdinal;
301 m_osInterface->pfnSetGpuContext(m_osInterface, m_renderContext);
302 m_osInterface->pfnResetOsStates(m_osInterface);
303
304 DecompKernelStateIdx kernelStateIdx;
305 bool useUVPlane;
306 if ((targetSurface.Format == Format_YUY2) || (targetSurface.Format == Format_UYVY))
307 {
308 kernelStateIdx = decompKernelStatePa;
309 useUVPlane = false;
310 }
311 else if ((targetSurface.Format == Format_NV12) || (targetSurface.Format == Format_P010))
312 {
313 kernelStateIdx = decompKernelStatePl2;
314 useUVPlane = true;
315 }
316 else
317 {
318 eStatus = MOS_STATUS_INVALID_PARAMETER;
319 return eStatus;
320 }
321
322 auto kernelState = &m_kernelStates[kernelStateIdx];
323 kernelState->m_currTrackerId = m_currCmdBufId;
324
325 // preprocess in cp first
326 m_osInterface->osCpInterface->PrepareResources((void **)&targetResource, 1, nullptr, 0);
327
328 if (kernelStateIdx == decompKernelStatePl2)
329 {
330 if (m_osInterface->osCpInterface->IsSMEnabled())
331 {
332 uint32_t *kernelBase = nullptr;
333 uint32_t kernelSize = 0;
334 MHW_CHK_STATUS_RETURN(m_osInterface->osCpInterface->GetTK(
335 &kernelBase,
336 &kernelSize,
337 nullptr));
338 if (nullptr == kernelBase || 0 == kernelSize)
339 {
340 MHW_ASSERT("Could not get TK kernels for MMC!");
341 eStatus = MOS_STATUS_INVALID_PARAMETER;
342 return eStatus;
343 }
344
345 kernelState->KernelParams.pBinary = (uint8_t *)kernelBase;
346 }
347 else
348 {
349 kernelState->KernelParams.pBinary = m_kernelBinary[kernelStateIdx];
350 }
351 MHW_CHK_STATUS_RETURN(kernelState->m_ishRegion.AddData(
352 kernelState->KernelParams.pBinary,
353 0,
354 kernelState->KernelParams.iSize));
355 }
356
357 MHW_CHK_STATUS_RETURN(m_stateHeapInterface->pfnRequestSshSpaceForCmdBuf(
358 m_stateHeapInterface,
359 kernelState->KernelParams.iBTCount));
360
361 uint32_t dshSize = m_stateHeapInterface->pStateHeapInterface->GetSizeofCmdInterfaceDescriptorData() +
362 MOS_ALIGN_CEIL(kernelState->KernelParams.iCurbeLength,
363 m_stateHeapInterface->pStateHeapInterface->GetCurbeAlignment());
364
365 eStatus = m_stateHeapInterface->pfnAssignSpaceInStateHeap(
366 m_stateHeapInterface,
367 MHW_DSH_TYPE,
368 kernelState,
369 dshSize,
370 false,
371 true);
372
373 if (eStatus == MOS_STATUS_CLIENT_AR_NO_SPACE)
374 {
375 MHW_CHK_STATUS_RETURN(m_stateHeapInterface->pfnAssignSpaceInStateHeap(
376 m_stateHeapInterface,
377 MHW_DSH_TYPE,
378 kernelState,
379 dshSize,
380 false,
381 true));
382 }
383 else if (eStatus != MOS_STATUS_SUCCESS)
384 {
385 return eStatus;
386 }
387
388 MHW_CHK_STATUS_RETURN(m_stateHeapInterface->pfnAssignSpaceInStateHeap(
389 m_stateHeapInterface,
390 MHW_SSH_TYPE,
391 kernelState,
392 kernelState->dwSshSize,
393 false,
394 false));
395
396 MHW_INTERFACE_DESCRIPTOR_PARAMS idParams;
397 MOS_ZeroMemory(&idParams, sizeof(idParams));
398 idParams.pKernelState = kernelState;
399 MHW_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSetInterfaceDescriptor(
400 m_stateHeapInterface,
401 1,
402 &idParams));
403
404 MHW_CHK_STATUS_RETURN(SetMediaObjectCopyCurbe(kernelStateIdx));
405
406 MOS_COMMAND_BUFFER cmdBuffer;
407 // Send HW commands (including SSH)
408 MHW_CHK_STATUS_RETURN(m_osInterface->pfnGetCommandBuffer(m_osInterface, &cmdBuffer, 0));
409
410 MHW_GENERIC_PROLOG_PARAMS genericPrologParams;
411 MOS_ZeroMemory(&genericPrologParams, sizeof(genericPrologParams));
412 genericPrologParams.pOsInterface = m_osInterface;
413 genericPrologParams.pvMiInterface = m_miInterface;
414 genericPrologParams.bMmcEnabled = true;
415 MHW_CHK_STATUS_RETURN(Mhw_SendGenericPrologCmd(&cmdBuffer, &genericPrologParams));
416
417 MHW_CHK_NULL_RETURN(m_renderInterface);
418 if (m_renderInterface->GetL3CacheConfig()->bL3CachingEnabled)
419 {
420 MHW_CHK_STATUS_RETURN(m_renderInterface->SetL3Cache(&cmdBuffer));
421 }
422
423 MHW_CHK_STATUS_RETURN(m_renderInterface->EnablePreemption(&cmdBuffer));
424
425 MHW_CHK_STATUS_RETURN(m_renderInterface->AddPipelineSelectCmd(&cmdBuffer, false));
426
427 MHW_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSetBindingTable(
428 m_stateHeapInterface,
429 kernelState));
430
431 MHW_RCS_SURFACE_PARAMS surfaceParams;
432 MOS_ZeroMemory(&surfaceParams, sizeof(surfaceParams));
433 surfaceParams.dwNumPlanes = useUVPlane ? 2 : 1; // Y+UV : Y
434 surfaceParams.psSurface = &targetSurface;
435 // Y Plane
436 surfaceParams.dwBindingTableOffset[MHW_Y_PLANE] = copySurfaceSrcY;
437
438 if (surfaceParams.psSurface->Format == Format_YUY2)
439 {
440 surfaceParams.ForceSurfaceFormat[MHW_Y_PLANE] = MHW_GFX3DSTATE_SURFACEFORMAT_YCRCB_NORMAL;
441 }
442 else if (surfaceParams.psSurface->Format == Format_UYVY)
443 {
444 surfaceParams.ForceSurfaceFormat[MHW_Y_PLANE] = MHW_GFX3DSTATE_SURFACEFORMAT_YCRCB_SWAPY;
445 }
446 else if (surfaceParams.psSurface->Format == Format_P010)
447 {
448 surfaceParams.ForceSurfaceFormat[MHW_Y_PLANE] = MHW_GFX3DSTATE_SURFACEFORMAT_R16_UNORM;
449 }
450 else //NV12
451 {
452 surfaceParams.ForceSurfaceFormat[MHW_Y_PLANE] = MHW_GFX3DSTATE_SURFACEFORMAT_R8_UNORM;
453 }
454
455 uint32_t widthInBytes = GetSurfaceWidthInBytes(surfaceParams.psSurface);
456 surfaceParams.dwWidthToUse[MHW_Y_PLANE] = MHW_WIDTH_IN_DW(widthInBytes);
457
458 // UV Plane
459 if (useUVPlane)
460 {
461 surfaceParams.dwBindingTableOffset[MHW_U_PLANE] = copySurfaceSrcU;
462 if (surfaceParams.psSurface->Format == Format_P010)
463 {
464 surfaceParams.ForceSurfaceFormat[MHW_U_PLANE] = MHW_GFX3DSTATE_SURFACEFORMAT_YCRCB_SWAPUVY;
465 }
466 else //NV12
467 {
468 surfaceParams.ForceSurfaceFormat[MHW_U_PLANE] = MHW_GFX3DSTATE_SURFACEFORMAT_R16_UINT;
469 }
470 surfaceParams.dwBaseAddrOffset[MHW_U_PLANE] =
471 targetSurface.dwPitch *
472 MOS_ALIGN_FLOOR(targetSurface.UPlaneOffset.iYOffset, MOS_YTILE_H_ALIGNMENT);
473 surfaceParams.dwWidthToUse[MHW_U_PLANE] = MHW_WIDTH_IN_DW(widthInBytes);
474 surfaceParams.dwHeightToUse[MHW_U_PLANE] = surfaceParams.psSurface->dwHeight / 2;
475 surfaceParams.dwYOffset[MHW_U_PLANE] =
476 (targetSurface.UPlaneOffset.iYOffset % MOS_YTILE_H_ALIGNMENT);
477 }
478 m_osInterface->pfnGetMemoryCompressionMode(
479 m_osInterface, &targetSurface.OsResource, (PMOS_MEMCOMP_STATE)&surfaceParams.psSurface->CompressionMode);
480 MHW_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSetSurfaceState(
481 m_stateHeapInterface,
482 kernelState,
483 &cmdBuffer,
484 1,
485 &surfaceParams));
486
487 //In place decompression: src shares the same surface with dst.
488 surfaceParams.bIsWritable = true;
489 surfaceParams.dwBindingTableOffset[MHW_Y_PLANE] = copySurfaceDstY;
490 if (useUVPlane)
491 {
492 surfaceParams.dwBindingTableOffset[MHW_U_PLANE] = copySurfaceDstU;
493 }
494 MHW_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSetSurfaceState(
495 m_stateHeapInterface,
496 kernelState,
497 &cmdBuffer,
498 1,
499 &surfaceParams));
500
501 MHW_STATE_BASE_ADDR_PARAMS stateBaseAddrParams;
502 MOS_ZeroMemory(&stateBaseAddrParams, sizeof(stateBaseAddrParams));
503 MOS_RESOURCE *dsh = nullptr, *ish = nullptr;
504 MHW_CHK_NULL_RETURN(dsh = kernelState->m_dshRegion.GetResource());
505 MHW_CHK_NULL_RETURN(ish = kernelState->m_ishRegion.GetResource());
506 stateBaseAddrParams.presDynamicState = dsh;
507 stateBaseAddrParams.dwDynamicStateSize = kernelState->m_dshRegion.GetHeapSize();
508 stateBaseAddrParams.presInstructionBuffer = ish;
509 stateBaseAddrParams.dwInstructionBufferSize = kernelState->m_ishRegion.GetHeapSize();
510 MHW_CHK_STATUS_RETURN(m_renderInterface->AddStateBaseAddrCmd(
511 &cmdBuffer,
512 &stateBaseAddrParams));
513
514 MHW_VFE_PARAMS vfeParams = {};
515 vfeParams.pKernelState = kernelState;
516 auto waTable = m_osInterface->pfnGetWaTable(m_osInterface);
517
518 vfeParams.eVfeSliceDisable = MHW_VFE_SLICE_ALL;
519
520 MHW_CHK_STATUS_RETURN(m_renderInterface->AddMediaVfeCmd(
521 &cmdBuffer,
522 &vfeParams));
523
524 MHW_CURBE_LOAD_PARAMS curbeLoadParams;
525 MOS_ZeroMemory(&curbeLoadParams, sizeof(curbeLoadParams));
526 curbeLoadParams.pKernelState = kernelState;
527 MHW_CHK_STATUS_RETURN(m_renderInterface->AddMediaCurbeLoadCmd(
528 &cmdBuffer,
529 &curbeLoadParams));
530
531 MHW_ID_LOAD_PARAMS idLoadParams;
532 MOS_ZeroMemory(&idLoadParams, sizeof(idLoadParams));
533 idLoadParams.pKernelState = kernelState;
534 idLoadParams.dwNumKernelsLoaded = 1;
535 MHW_CHK_STATUS_RETURN(m_renderInterface->AddMediaIDLoadCmd(
536 &cmdBuffer,
537 &idLoadParams));
538
539 uint32_t resolutionX;
540 if (kernelStateIdx == decompKernelStatePa) // Format_YUY2, Format_UYVY
541 {
542 resolutionX = MOS_ROUNDUP_DIVIDE(targetSurface.dwWidth * 2, 32);
543 }
544 else // DecompKernelStatePl2: Format_NV12, Format_P010
545 {
546 if (targetSurface.Format == Format_P010) // Format_P010
547 {
548 resolutionX = MOS_ROUNDUP_DIVIDE(targetSurface.dwWidth * 2, 32);
549 }
550 else // Format_NV12
551 {
552 resolutionX = MOS_ROUNDUP_DIVIDE(targetSurface.dwWidth, 32);
553 }
554 }
555 uint32_t resolutionY = MOS_ROUNDUP_DIVIDE(targetSurface.dwHeight, 16);
556
557 MHW_WALKER_PARAMS walkerParams;
558 MOS_ZeroMemory(&walkerParams, sizeof(walkerParams));
559 walkerParams.WalkerMode = MHW_WALKER_MODE_SINGLE;
560 walkerParams.BlockResolution.x = resolutionX;
561 walkerParams.BlockResolution.y = resolutionY;
562 walkerParams.GlobalResolution.x = resolutionX;
563 walkerParams.GlobalResolution.y = resolutionY;
564 walkerParams.GlobalOutlerLoopStride.x = resolutionX;
565 walkerParams.GlobalOutlerLoopStride.y = 0;
566 walkerParams.GlobalInnerLoopUnit.x = 0;
567 walkerParams.GlobalInnerLoopUnit.y = resolutionY;
568 walkerParams.dwLocalLoopExecCount = 0xFFFF; //MAX VALUE
569 walkerParams.dwGlobalLoopExecCount = 0xFFFF; //MAX VALUE
570
571 // No dependency
572 walkerParams.ScoreboardMask = 0;
573 // Raster scan walking pattern
574 walkerParams.LocalOutLoopStride.x = 0;
575 walkerParams.LocalOutLoopStride.y = 1;
576 walkerParams.LocalInnerLoopUnit.x = 1;
577 walkerParams.LocalInnerLoopUnit.y = 0;
578 walkerParams.LocalEnd.x = resolutionX - 1;
579 walkerParams.LocalEnd.y = 0;
580
581 MHW_CHK_STATUS_RETURN(m_renderInterface->AddMediaObjectWalkerCmd(
582 &cmdBuffer,
583 &walkerParams));
584
585 // Check if destination surface needs to be synchronized, before command buffer submission
586 MOS_SYNC_PARAMS syncParams;
587 MOS_ZeroMemory(&syncParams, sizeof(syncParams));
588 syncParams.uiSemaphoreCount = 1;
589 syncParams.GpuContext = m_renderContext;
590 syncParams.presSyncResource = &targetSurface.OsResource;
591 syncParams.bReadOnly = false;
592 syncParams.bDisableDecodeSyncLock = m_disableDecodeSyncLock;
593 syncParams.bDisableLockForTranscode = m_disableLockForTranscode;
594
595 MHW_CHK_STATUS_RETURN(m_osInterface->pfnPerformOverlaySync(m_osInterface, &syncParams));
596 MHW_CHK_STATUS_RETURN(m_osInterface->pfnResourceWait(m_osInterface, &syncParams));
597
598 // Update the resource tag (s/w tag) for On-Demand Sync
599 m_osInterface->pfnSetResourceSyncTag(m_osInterface, &syncParams);
600
601 // Update the tag in GPU Sync eStatus buffer (H/W Tag) to match the current S/W tag
602 if (m_osInterface->bTagResourceSync)
603 {
604 MHW_PIPE_CONTROL_PARAMS pipeControlParams;
605 MOS_ZeroMemory(&pipeControlParams, sizeof(pipeControlParams));
606
607 pipeControlParams.dwFlushMode = MHW_FLUSH_WRITE_CACHE;
608 MHW_CHK_STATUS_RETURN(m_miInterface->AddPipeControl(
609 &cmdBuffer,
610 nullptr,
611 &pipeControlParams));
612
613 MHW_CHK_STATUS_RETURN(WriteSyncTagToResourceCmd(&cmdBuffer));
614 }
615
616 MHW_MI_STORE_DATA_PARAMS miStoreDataParams;
617 MOS_ZeroMemory(&miStoreDataParams, sizeof(miStoreDataParams));
618 miStoreDataParams.pOsResource = &m_resCmdBufIdGlobal;
619 miStoreDataParams.dwValue = m_currCmdBufId;
620 MHW_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(
621 &cmdBuffer,
622 &miStoreDataParams));
623
624 MHW_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSubmitBlocks(
625 m_stateHeapInterface,
626 kernelState));
627 MHW_CHK_STATUS_RETURN(m_stateHeapInterface->pfnUpdateGlobalCmdBufId(
628 m_stateHeapInterface));
629
630 // Add PipeControl to invalidate ISP and MediaState to avoid PageFault issue
631 // This code is temporal and it will be moved to batch buffer end in short
632 PLATFORM platform;
633 m_osInterface->pfnGetPlatform(m_osInterface, &platform);
634 if (GFX_IS_GEN_9_OR_LATER(platform))
635 {
636 MHW_PIPE_CONTROL_PARAMS pipeControlParams;
637
638 MOS_ZeroMemory(&pipeControlParams, sizeof(pipeControlParams));
639 pipeControlParams.dwFlushMode = MHW_FLUSH_WRITE_CACHE;
640 pipeControlParams.bGenericMediaStateClear = true;
641 pipeControlParams.bIndirectStatePointersDisable = true;
642 pipeControlParams.bDisableCSStall = false;
643 MHW_CHK_STATUS_RETURN(m_miInterface->AddPipeControl(&cmdBuffer, NULL, &pipeControlParams));
644
645 if (MEDIA_IS_WA(m_osInterface->pfnGetWaTable(m_osInterface), WaSendDummyVFEafterPipelineSelect))
646 {
647 MHW_VFE_PARAMS vfeStateParams = {};
648 vfeStateParams.dwNumberofURBEntries = 1;
649 MHW_CHK_STATUS_RETURN(m_renderInterface->AddMediaVfeCmd(&cmdBuffer, &vfeStateParams));
650 }
651 }
652
653 MHW_CHK_STATUS_RETURN(m_miInterface->AddMiBatchBufferEnd(
654 &cmdBuffer,
655 nullptr));
656
657 m_osInterface->pfnReturnCommandBuffer(m_osInterface, &cmdBuffer, 0);
658
659 MHW_CHK_STATUS_RETURN(m_osInterface->pfnSubmitCommandBuffer(
660 m_osInterface,
661 &cmdBuffer,
662 m_renderContextUsesNullHw));
663
664 // Update the compression mode
665 MHW_CHK_STATUS_RETURN(m_osInterface->pfnSetMemoryCompressionMode(
666 m_osInterface,
667 targetResource,
668 MOS_MEMCOMP_DISABLED));
669 MHW_CHK_STATUS_RETURN(m_osInterface->pfnSetMemoryCompressionHint(
670 m_osInterface,
671 targetResource,
672 false));
673
674 //Update CmdBufId...
675 m_currCmdBufId++;
676 if (m_currCmdBufId == MemoryBlock::m_invalidTrackerId)
677 {
678 m_currCmdBufId++;
679 }
680
681 // Send the signal to indicate decode completion, in case On-Demand Sync is not present
682 MHW_CHK_STATUS_RETURN(m_osInterface->pfnResourceSignal(m_osInterface, &syncParams));
683
684 if (gpuContext != m_renderContext)
685 {
686 m_osInterface->pfnSetGpuContext(m_osInterface, gpuContext);
687 }
688
689 return eStatus;
690 }
691
GetResourceInfo(PMOS_SURFACE surface)692 MOS_STATUS MediaMemDecompState::GetResourceInfo(
693 PMOS_SURFACE surface)
694 {
695 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
696
697 MHW_CHK_NULL_RETURN(m_osInterface);
698 MHW_CHK_NULL_RETURN(surface);
699
700 MOS_SURFACE details;
701 MOS_ZeroMemory(&details, sizeof(details));
702 details.Format = Format_Invalid;
703
704 MHW_CHK_STATUS_RETURN(m_osInterface->pfnGetResourceInfo(
705 m_osInterface,
706 &surface->OsResource,
707 &details));
708
709 surface->Format = details.Format;
710 surface->dwWidth = details.dwWidth;
711 surface->dwHeight = details.dwHeight;
712 surface->dwPitch = details.dwPitch;
713 surface->dwDepth = details.dwDepth;
714 surface->bArraySpacing = details.bArraySpacing;
715 surface->TileType = details.TileType;
716 surface->TileModeGMM = details.TileModeGMM;
717 surface->bGMMTileEnabled = details.bGMMTileEnabled;
718 surface->dwOffset = details.RenderOffset.YUV.Y.BaseOffset;
719 surface->UPlaneOffset.iSurfaceOffset = details.RenderOffset.YUV.U.BaseOffset;
720 surface->UPlaneOffset.iXOffset = details.RenderOffset.YUV.U.XOffset;
721 surface->UPlaneOffset.iYOffset =
722 (surface->UPlaneOffset.iSurfaceOffset - surface->dwOffset) / surface->dwPitch +
723 details.RenderOffset.YUV.U.YOffset;
724 surface->VPlaneOffset.iSurfaceOffset = details.RenderOffset.YUV.V.BaseOffset;
725 surface->VPlaneOffset.iXOffset = details.RenderOffset.YUV.V.XOffset;
726 surface->VPlaneOffset.iYOffset =
727 (surface->VPlaneOffset.iSurfaceOffset - surface->dwOffset) / surface->dwPitch +
728 details.RenderOffset.YUV.V.YOffset;
729 surface->bCompressible = details.bCompressible;
730 surface->bIsCompressed = details.bIsCompressed;
731 surface->CompressionMode = details.CompressionMode;
732
733 return eStatus;
734 }
735
GetSurfaceWidthInBytes(PMOS_SURFACE surface)736 uint32_t MediaMemDecompState::GetSurfaceWidthInBytes(PMOS_SURFACE surface)
737 {
738 uint32_t widthInBytes;
739
740 switch (surface->Format)
741 {
742 case Format_IMC1:
743 case Format_IMC3:
744 case Format_IMC2:
745 case Format_IMC4:
746 case Format_NV12:
747 case Format_YV12:
748 case Format_I420:
749 case Format_IYUV:
750 case Format_400P:
751 case Format_411P:
752 case Format_422H:
753 case Format_422V:
754 case Format_444P:
755 case Format_RGBP:
756 case Format_BGRP:
757 widthInBytes = surface->dwWidth;
758 break;
759 case Format_YUY2:
760 case Format_YUYV:
761 case Format_YVYU:
762 case Format_UYVY:
763 case Format_VYUY:
764 case Format_P010:
765 widthInBytes = surface->dwWidth << 1;
766 break;
767 case Format_A8R8G8B8:
768 case Format_X8R8G8B8:
769 case Format_A8B8G8R8:
770 widthInBytes = surface->dwWidth << 2;
771 break;
772 default:
773 widthInBytes = surface->dwWidth;
774 break;
775 }
776
777 return widthInBytes;
778 }
779
WriteSyncTagToResourceCmd(PMOS_COMMAND_BUFFER cmdBuffer)780 MOS_STATUS MediaMemDecompState::WriteSyncTagToResourceCmd(
781 PMOS_COMMAND_BUFFER cmdBuffer)
782 {
783 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
784
785 MHW_FUNCTION_ENTER;
786
787 PMOS_RESOURCE globalGpuContextSyncTagBuffer = nullptr;
788 MHW_CHK_STATUS_RETURN(m_osInterface->pfnGetGpuStatusBufferResource(
789 m_osInterface,
790 globalGpuContextSyncTagBuffer));
791 MHW_CHK_NULL_RETURN(globalGpuContextSyncTagBuffer);
792
793 uint32_t offset = m_osInterface->pfnGetGpuStatusTagOffset(
794 m_osInterface,
795 m_osInterface->CurrentGpuContextOrdinal);
796 uint32_t value = m_osInterface->pfnGetGpuStatusTag(
797 m_osInterface,
798 m_osInterface->CurrentGpuContextOrdinal);
799
800 MHW_MI_STORE_DATA_PARAMS params;
801 params.pOsResource = globalGpuContextSyncTagBuffer;
802 params.dwResourceOffset = offset;
803 params.dwValue = value;
804
805 MHW_CHK_STATUS_RETURN(m_miInterface->AddMiStoreDataImmCmd(cmdBuffer, ¶ms));
806
807 // Increment GPU Context Tag for next use
808 m_osInterface->pfnIncrementGpuStatusTag(m_osInterface, m_osInterface->CurrentGpuContextOrdinal);
809
810 return eStatus;
811 }
812
SetMediaObjectCopyCurbe(DecompKernelStateIdx kernelStateIdx)813 MOS_STATUS MediaMemDecompState::SetMediaObjectCopyCurbe(
814 DecompKernelStateIdx kernelStateIdx)
815 {
816 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
817
818 MHW_FUNCTION_ENTER;
819
820 if ((kernelStateIdx >= decompKernelStateMax))
821 {
822 eStatus = MOS_STATUS_INVALID_PARAMETER;
823 return eStatus;
824 }
825
826 MediaObjectCopyCurbe cmd;
827
828 cmd.m_dw0.srcSurface0Index = copySurfaceSrcY;
829 cmd.m_dw3.dstSurface0Index = copySurfaceDstY;
830
831 if (kernelStateIdx == decompKernelStatePl2)
832 {
833 cmd.m_dw1.srcSurface1Index = copySurfaceSrcU;
834 cmd.m_dw4.dstSurface1Index = copySurfaceDstU;
835 }
836
837 MHW_CHK_STATUS_RETURN(m_kernelStates[kernelStateIdx].m_dshRegion.AddData(
838 &cmd,
839 m_kernelStates[kernelStateIdx].dwCurbeOffset,
840 sizeof(cmd)));
841
842 return eStatus;
843 }
844
SetKernelStateParams()845 MOS_STATUS MediaMemDecompState::SetKernelStateParams()
846 {
847 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
848
849 MHW_FUNCTION_ENTER;
850
851 MHW_CHK_NULL_RETURN(m_renderInterface->GetHwCaps());
852
853 for (uint32_t krnIdx = 0; krnIdx < decompKernelStateMax; krnIdx++)
854 {
855 auto kernelState = &m_kernelStates[krnIdx];
856 kernelState->KernelParams.pBinary = m_kernelBinary[krnIdx];
857 kernelState->KernelParams.iSize = m_kernelSize[krnIdx];
858 kernelState->KernelParams.iBTCount = copySurfaceNum;
859 kernelState->KernelParams.iThreadCount = m_renderInterface->GetHwCaps()->dwMaxThreads;
860 kernelState->KernelParams.iCurbeLength = MOS_ALIGN_CEIL(
861 MediaObjectCopyCurbe::m_byteSize,
862 m_stateHeapInterface->pStateHeapInterface->GetCurbeAlignment());
863 kernelState->KernelParams.iBlockWidth = 32;
864 kernelState->KernelParams.iBlockHeight = 16;
865 kernelState->KernelParams.iIdCount = 1;
866
867 kernelState->dwCurbeOffset =
868 m_stateHeapInterface->pStateHeapInterface->GetSizeofCmdInterfaceDescriptorData();
869
870 MHW_CHK_STATUS_RETURN(m_stateHeapInterface->pfnCalculateSshAndBtSizesRequested(
871 m_stateHeapInterface,
872 kernelState->KernelParams.iBTCount,
873 &kernelState->dwSshSize,
874 &kernelState->dwBindingTableSize));
875
876 kernelState->dwKernelBinaryOffset = 0;
877
878 eStatus = m_stateHeapInterface->pfnAssignSpaceInStateHeap(
879 m_stateHeapInterface,
880 MHW_ISH_TYPE,
881 kernelState,
882 kernelState->KernelParams.iSize,
883 true,
884 false);
885
886 if (eStatus == MOS_STATUS_CLIENT_AR_NO_SPACE)
887 {
888 MHW_ASSERTMESSAGE("CodecHal does not handle this case");
889 return eStatus;
890 }
891 else if (eStatus != MOS_STATUS_SUCCESS)
892 {
893 return eStatus;
894 }
895
896 MHW_CHK_STATUS_RETURN(kernelState->m_ishRegion.AddData(
897 kernelState->KernelParams.pBinary,
898 0,
899 kernelState->KernelParams.iSize));
900 }
901
902 return eStatus;
903 }
904
Initialize(PMOS_INTERFACE osInterface,MhwCpInterface * cpInterface,MhwMiInterface * miInterface,MhwRenderInterface * renderInterface)905 MOS_STATUS MediaMemDecompState::Initialize(
906 PMOS_INTERFACE osInterface,
907 MhwCpInterface *cpInterface,
908 MhwMiInterface *miInterface,
909 MhwRenderInterface *renderInterface)
910 {
911 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
912 MediaUserSettingSharedPtr userSettingPtr = nullptr;
913 MOS_USER_FEATURE_VALUE_DATA userFeatureData = {};
914
915 MHW_FUNCTION_ENTER;
916
917 MHW_CHK_NULL_RETURN(osInterface);
918 MHW_CHK_NULL_RETURN(cpInterface);
919 MHW_CHK_NULL_RETURN(miInterface);
920 MHW_CHK_NULL_RETURN(renderInterface);
921
922 m_osInterface = osInterface;
923 m_cpInterface = cpInterface;
924 m_miInterface = miInterface;
925 m_renderInterface = renderInterface;
926 userSettingPtr = osInterface->pfnGetUserSettingInstance(osInterface);
927
928 for (uint8_t kernelIdx = decompKernelStatePa; kernelIdx < decompKernelStateMax; kernelIdx++)
929 {
930 MHW_CHK_STATUS_RETURN(InitKernelState(kernelIdx));
931 }
932
933 if (m_stateHeapSettings.dwIshSize > 0 &&
934 m_stateHeapSettings.dwDshSize > 0 &&
935 m_stateHeapSettings.dwNumSyncTags > 0)
936 {
937 MHW_CHK_STATUS_RETURN(m_renderInterface->AllocateHeaps(
938 m_stateHeapSettings));
939 }
940
941 m_stateHeapInterface = m_renderInterface->m_stateHeapInterface;
942 MHW_CHK_NULL_RETURN(m_stateHeapInterface);
943
944 if (m_osInterface->pfnIsGpuContextValid(m_osInterface, MOS_GPU_CONTEXT_RENDER) == MOS_STATUS_SUCCESS)
945 {
946 m_renderContext = MOS_GPU_CONTEXT_RENDER;
947 }
948 else
949 {
950 MOS_GPUCTX_CREATOPTIONS createOption;
951 MHW_CHK_STATUS_RETURN(m_osInterface->pfnCreateGpuContext(
952 m_osInterface,
953 MOS_GPU_CONTEXT_RENDER,
954 MOS_GPU_NODE_3D,
955 &createOption));
956
957 m_renderContext = MOS_GPU_CONTEXT_RENDER;
958 }
959
960 MOS_NULL_RENDERING_FLAGS nullHWAccelerationEnable;
961 nullHWAccelerationEnable.Value = 0;
962 m_disableDecodeSyncLock = false;
963 #if (_DEBUG || _RELEASE_INTERNAL)
964 ReadUserSettingForDebug(
965 userSettingPtr,
966 nullHWAccelerationEnable.Value,
967 __MEDIA_USER_FEATURE_VALUE_NULL_HW_ACCELERATION_ENABLE,
968 MediaUserSetting::Group::Device);
969
970 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
971 MOS_UserFeature_ReadValue_ID(
972 nullptr,
973 __MEDIA_USER_FEATURE_VALUE_DECODE_LOCK_DISABLE_ID,
974 &userFeatureData,
975 m_osInterface->pOsContext);
976 m_disableDecodeSyncLock = userFeatureData.u32Data ? true : false;
977 #endif // _DEBUG || _RELEASE_INTERNAL
978
979 m_disableLockForTranscode =
980 MEDIA_IS_WA(m_osInterface->pfnGetWaTable(m_osInterface), WaDisableLockForTranscodePerf);
981
982 MHW_CHK_STATUS_RETURN(SetKernelStateParams());
983
984 m_renderContextUsesNullHw =
985 ((m_renderContext == MOS_GPU_CONTEXT_RENDER) ? nullHWAccelerationEnable.CtxRender : nullHWAccelerationEnable.CtxRender2) ||
986 nullHWAccelerationEnable.Mmc;
987
988 MOS_ALLOC_GFXRES_PARAMS allocParams;
989 MOS_ZeroMemory(&allocParams, sizeof(allocParams));
990 allocParams.Type = MOS_GFXRES_BUFFER;
991 allocParams.TileType = MOS_TILE_LINEAR;
992 allocParams.Format = Format_Buffer;
993 allocParams.dwBytes = MHW_CACHELINE_SIZE;
994 allocParams.pBufName = "CmdBufIdGlobal";
995 MHW_CHK_STATUS_RETURN(m_osInterface->pfnAllocateResource(
996 m_osInterface,
997 &allocParams,
998 &m_resCmdBufIdGlobal));
999 m_currCmdBufId = MemoryBlock::m_invalidTrackerId + 1;
1000
1001 MOS_LOCK_PARAMS lockParams;
1002 MOS_ZeroMemory(&lockParams, sizeof(lockParams));
1003 lockParams.WriteOnly = 1;
1004 m_cmdBufIdGlobal = (uint32_t *)m_osInterface->pfnLockResource(
1005 m_osInterface,
1006 &m_resCmdBufIdGlobal,
1007 &lockParams);
1008 MHW_CHK_NULL_RETURN(m_cmdBufIdGlobal);
1009 MOS_ZeroMemory(m_cmdBufIdGlobal, allocParams.dwBytes);
1010
1011 MHW_CHK_STATUS_RETURN(m_stateHeapInterface->pfnSetCmdBufStatusPtr(
1012 m_stateHeapInterface,
1013 m_cmdBufIdGlobal));
1014
1015 return eStatus;
1016 }
1017