1 /*
2 * Copyright (c) 2020-2022, Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22 //!
23 //! \file encode_vp9_segmentation.cpp
24 //! \brief Defines the common interface for vp9 encode segmentation features
25 //!
26
27 #include "encode_vp9_segmentation.h"
28 #include "encode_vp9_vdenc_const_settings.h"
29 #include "media_vp9_packet_defs.h"
30 #include "mhw_utilities.h"
31 #include "encode_vp9_tile.h"
32 #include "media_vp9_feature_defs.h"
33
34 namespace encode
35 {
Vp9Segmentation(MediaFeatureManager * featureManager,EncodeAllocator * allocator,CodechalHwInterfaceNext * hwInterface,void * constSettings)36 Vp9Segmentation::Vp9Segmentation(
37 MediaFeatureManager *featureManager,
38 EncodeAllocator * allocator,
39 CodechalHwInterfaceNext *hwInterface,
40 void * constSettings) : MediaFeature(constSettings), m_allocator(allocator)
41 {
42 ENCODE_FUNC_CALL();
43 ENCODE_CHK_NULL_NO_STATUS_RETURN(featureManager);
44
45 m_featureManager = featureManager;
46
47 m_basicFeature = dynamic_cast<Vp9BasicFeature *>(featureManager->GetFeature(FeatureIDs::basicFeature));
48 ENCODE_CHK_NULL_NO_STATUS_RETURN(m_basicFeature);
49
50 ENCODE_CHK_NULL_NO_STATUS_RETURN(hwInterface);
51 m_hwInterface = hwInterface;
52 }
53
~Vp9Segmentation()54 Vp9Segmentation::~Vp9Segmentation()
55 {
56 FreeResources();
57 }
58
Init(void * settings)59 MOS_STATUS Vp9Segmentation::Init(void *settings)
60 {
61 ENCODE_FUNC_CALL();
62 MEDIA_CHK_NULL_RETURN(settings);
63
64 CodechalSetting *codecSettings = (CodechalSetting *)settings;
65
66 ENCODE_CHK_STATUS_RETURN(AllocateResources());
67
68 return MOS_STATUS_SUCCESS;
69 }
70
Update(void * params)71 MOS_STATUS Vp9Segmentation::Update(void *params)
72 {
73 ENCODE_FUNC_CALL();
74 ENCODE_CHK_NULL_RETURN(params);
75 auto allocator = m_basicFeature->GetAllocator();
76 ENCODE_CHK_NULL_RETURN(allocator);
77
78 EncoderParams *encodeParams = (EncoderParams *)params;
79
80 auto vp9SeqParams = static_cast<PCODEC_VP9_ENCODE_SEQUENCE_PARAMS>(encodeParams->pSeqParams);
81 ENCODE_CHK_NULL_RETURN(vp9SeqParams);
82 auto vp9PicParams = static_cast<PCODEC_VP9_ENCODE_PIC_PARAMS>(encodeParams->pPicParams);
83 ENCODE_CHK_NULL_RETURN(vp9PicParams);
84 m_vp9SegmentParams = static_cast<PCODEC_VP9_ENCODE_SEGMENT_PARAMS>(encodeParams->pSegmentParams);
85 ENCODE_CHK_NULL_RETURN(m_vp9SegmentParams);
86
87 m_enabled = vp9PicParams->PicFlags.fields.segmentation_enabled;
88
89 m_segmentMapProvided = encodeParams->bSegmentMapProvided && vp9PicParams->PicFlags.fields.segmentation_enabled;
90 // In MBBRC case, without a SegMap provided by the app, we need to set SegMapUpdate ON
91 // as the SegMap is generated by HuC and it can be different for every frame
92 if (vp9PicParams->PicFlags.fields.segmentation_enabled && !encodeParams->bSegmentMapProvided)
93 {
94 vp9PicParams->PicFlags.fields.segmentation_update_map = 1;
95 }
96
97 m_mbBrcEnabled = false;
98 vp9SeqParams->SeqFlags.fields.MBBRC = MBBRC_DISABLED;
99
100 // We do not support segmentation w/o seg map in CQP case, only support segmentation w/ seg map in CQP
101 // BRC/ACQP supports segmentation both w/ and w/o seg map
102 if (vp9PicParams->PicFlags.fields.segmentation_enabled && !encodeParams->bSegmentMapProvided && vp9SeqParams->RateControlMethod == RATECONTROL_CQP)
103 {
104 return MOS_STATUS_INVALID_PARAMETER;
105 }
106 // Need to index properly when more than one temporal layer is present
107 ENCODE_ASSERT(vp9SeqParams->FrameRate[0].uiDenominator > 0);
108 if (vp9SeqParams->FrameRate[0].uiDenominator == 0)
109 {
110 return MOS_STATUS_INVALID_PARAMETER;
111 }
112 uint32_t frameRate = vp9SeqParams->FrameRate[0].uiNumerator / vp9SeqParams->FrameRate[0].uiDenominator;
113
114 if (!m_mbBrcEnabled)
115 {
116 m_mbStatsEnabled = false;
117 if (m_segmentMapProvided)
118 {
119 m_mbSegmentMapSurface = *(encodeParams->psMbSegmentMapSurface);
120 ENCODE_CHK_STATUS_RETURN(allocator->GetSurfaceInfo(&(m_mbSegmentMapSurface)));
121 }
122 }
123 else
124 {
125 //Kernel C model fixed Qindex delta's when MBBRC is enabled
126 int16_t segmentQIndexDelta[CODEC_VP9_MAX_SEGMENTS] = {0, -8, -6, -4, -2, 2, 4, 6};
127
128 for (uint8_t i = 0; i < CODEC_VP9_MAX_SEGMENTS; i++)
129 {
130 m_vp9SegmentParams->SegData[i].SegmentFlags.value = 0;
131 m_vp9SegmentParams->SegData[i].SegmentLFLevelDelta = 0;
132 m_vp9SegmentParams->SegData[i].SegmentQIndexDelta = segmentQIndexDelta[i];
133 }
134 m_mbStatsEnabled = true;
135 }
136
137 if (m_basicFeature->m_newSeq)
138 {
139 ENCODE_CHK_STATUS_RETURN(SetConstSettings());
140 ENCODE_CHK_STATUS_RETURN(SetSequenceStructs());
141 }
142
143 ENCODE_CHK_STATUS_RETURN(SetupSegmentationStreamIn());
144
145 return MOS_STATUS_SUCCESS;
146 }
147
SetDmemForUpdate(void * params)148 MOS_STATUS Vp9Segmentation::SetDmemForUpdate(void *params)
149 {
150 ENCODE_FUNC_CALL();
151 ENCODE_CHK_NULL_RETURN(params);
152
153 auto dmem = (HucBrcUpdateDmem *)params;
154 ENCODE_CHK_NULL_RETURN(dmem);
155 auto vp9PicParams = static_cast<PCODEC_VP9_ENCODE_PIC_PARAMS>(m_basicFeature->m_vp9PicParams);
156 ENCODE_CHK_NULL_RETURN(vp9PicParams);
157
158 // If app gives segment map, we honor the QP deltas provided, if not, and segmentation is enabled,
159 // BRC generates the QP deltas and patches them into the segment states
160 dmem->UPD_SegMapGenerating_U8 = vp9PicParams->PicFlags.fields.segmentation_enabled && !m_segmentMapProvided;
161
162 return MOS_STATUS_SUCCESS;
163 }
164
SetDmemForHucProb(void * params)165 MOS_STATUS Vp9Segmentation::SetDmemForHucProb(void *params)
166 {
167 ENCODE_FUNC_CALL();
168 ENCODE_CHK_NULL_RETURN(params);
169
170 auto dmem = (HucProbDmem *)params;
171 ENCODE_CHK_NULL_RETURN(dmem);
172 auto vp9PicParams = static_cast<PCODEC_VP9_ENCODE_PIC_PARAMS>(m_basicFeature->m_vp9PicParams);
173 ENCODE_CHK_NULL_RETURN(vp9PicParams);
174
175 dmem->FrameCtrl.SegOn = vp9PicParams->PicFlags.fields.segmentation_enabled;
176 dmem->FrameCtrl.SegMapUpdate = vp9PicParams->PicFlags.fields.segmentation_update_map;
177 dmem->FrameCtrl.SegUpdateData = vp9PicParams->PicFlags.fields.seg_update_data;
178
179 dmem->StreamInSegEnable = (uint8_t)m_segmentMapProvided;
180 dmem->StreamInEnable = (uint8_t)m_segmentMapProvided; // Currently unused, if used may || with HME enabled
181
182 return MOS_STATUS_SUCCESS;
183 }
184
FreeResources()185 MOS_STATUS Vp9Segmentation::FreeResources()
186 {
187 ENCODE_FUNC_CALL();
188
189 if (m_mapBuffer)
190 {
191 MOS_FreeMemory(m_mapBuffer);
192 }
193
194 return MOS_STATUS_SUCCESS;
195 }
196
SetSequenceStructs()197 MOS_STATUS Vp9Segmentation::SetSequenceStructs()
198 {
199 ENCODE_FUNC_CALL();
200
201 auto vp9SeqParams = static_cast<PCODEC_VP9_ENCODE_SEQUENCE_PARAMS>(m_basicFeature->m_vp9SeqParams);
202 ENCODE_CHK_NULL_RETURN(vp9SeqParams);
203
204 if ((vp9SeqParams->SeqFlags.fields.MBBRC == MBBRC_ENABLED) || (vp9SeqParams->SeqFlags.fields.MBBRC == MBBRC_ENABLED_TU_DEPENDENCY))
205 {
206 if (!m_segmentMapAllocated)
207 {
208 ENCODE_CHK_STATUS_RETURN(AllocateMbBrcSegMapSurface());
209 }
210 }
211 else
212 {
213 //Allocated Driver MbBrc Segment map resource needs to be deallocated when MBBRC is disabled. The reason being
214 //same segmnet map surface (sMbSegmentMapSurface) will be used in the driver referencing both the Application passed
215 //as well as Driver allocated resource for segmentmap depending on mbbrc disabled or enabled.
216 if (!Mos_ResourceIsNull(&m_mbSegmentMapSurface.OsResource) && m_segmentMapAllocated)
217 {
218 m_allocator->DestroyResource(&m_mbSegmentMapSurface.OsResource);
219 }
220
221 m_segmentMapAllocated = false;
222 }
223
224
225 return MOS_STATUS_SUCCESS;
226 }
227
AllocateMbBrcSegMapSurface()228 MOS_STATUS Vp9Segmentation::AllocateMbBrcSegMapSurface()
229 {
230 ENCODE_FUNC_CALL();
231
232 MOS_RESOURCE *allocatedBuffer = nullptr;
233
234 // MBBRC segment map surface needs to be allocated when mbbrc is enabled as segment map will not be
235 // passed from APP when MBBRC is enabled
236 uint32_t picWidthInMb = CODECHAL_GET_WIDTH_IN_MACROBLOCKS(m_basicFeature->m_maxPicWidth);
237 uint32_t picHeightInMb = CODECHAL_GET_HEIGHT_IN_MACROBLOCKS(m_basicFeature->m_maxPicHeight);
238
239 m_mbSegmentMapSurface.TileType = MOS_TILE_LINEAR;
240 m_mbSegmentMapSurface.bArraySpacing = true;
241 m_mbSegmentMapSurface.Format = Format_Buffer_2D;
242 m_mbSegmentMapSurface.dwWidth = MOS_ALIGN_CEIL(picWidthInMb, 4);
243 m_mbSegmentMapSurface.dwHeight = picHeightInMb;
244 m_mbSegmentMapSurface.dwPitch = MOS_ALIGN_CEIL(picWidthInMb, 64);
245
246 MOS_ALLOC_GFXRES_PARAMS allocParamsForBuffer2D;
247 MOS_ZeroMemory(&allocParamsForBuffer2D, sizeof(MOS_ALLOC_GFXRES_PARAMS));
248
249 allocParamsForBuffer2D.Type = MOS_GFXRES_2D;
250 allocParamsForBuffer2D.TileType = MOS_TILE_LINEAR;
251 allocParamsForBuffer2D.Format = Format_Buffer_2D;
252 allocParamsForBuffer2D.dwWidth = m_mbSegmentMapSurface.dwPitch;
253 allocParamsForBuffer2D.dwHeight = picHeightInMb;
254 allocParamsForBuffer2D.pBufName = "MBBRC driver Segment Map Surface";
255 allocParamsForBuffer2D.ResUsageType = MOS_HW_RESOURCE_USAGE_ENCODE_INTERNAL_READ_WRITE_NOCACHE;
256
257 uint32_t size = allocParamsForBuffer2D.dwWidth * allocParamsForBuffer2D.dwHeight;
258
259 allocatedBuffer = m_allocator->AllocateResource(allocParamsForBuffer2D, true);
260 ENCODE_CHK_NULL_RETURN(allocatedBuffer);
261 m_mbSegmentMapSurface.OsResource = *allocatedBuffer;
262
263 m_segmentMapAllocated = true;
264
265 return MOS_STATUS_SUCCESS;
266 }
267
SetupSegmentationStreamIn()268 MOS_STATUS Vp9Segmentation::SetupSegmentationStreamIn()
269 {
270 ENCODE_FUNC_CALL();
271
272 if (!m_segmentMapProvided && !m_basicFeature->m_hmeEnabled)
273 {
274 // If we're not going to use the streamin surface leave now
275 return MOS_STATUS_SUCCESS;
276 }
277
278 ENCODE_CHK_NULL_RETURN(m_hwInterface);
279 PMOS_INTERFACE osInterface = m_hwInterface->GetOsInterface();
280 ENCODE_CHK_NULL_RETURN(osInterface);
281 auto basicFeature = static_cast<Vp9BasicFeature *>(m_basicFeature);
282 ENCODE_CHK_NULL_RETURN(basicFeature);
283 ENCODE_CHK_NULL_RETURN(basicFeature->m_recycleBuf);
284
285 auto vp9PicParams = static_cast<PCODEC_VP9_ENCODE_PIC_PARAMS>(m_basicFeature->m_vp9PicParams);
286 ENCODE_CHK_NULL_RETURN(vp9PicParams);
287 auto vp9SeqParams = static_cast<PCODEC_VP9_ENCODE_SEQUENCE_PARAMS>(m_basicFeature->m_vp9SeqParams);
288 ENCODE_CHK_NULL_RETURN(vp9SeqParams);
289
290 auto vdencStreamInBuffer = m_basicFeature->m_recycleBuf->GetBuffer(RecycleResId::StreamInBuffer, m_basicFeature->m_currRecycledBufIdx);
291 ENCODE_CHK_NULL_RETURN(vdencStreamInBuffer);
292 auto streamIn = static_cast<Vp9VdencStreamInState *>(m_allocator->LockResourceForWrite(vdencStreamInBuffer));
293 ENCODE_CHK_NULL_RETURN(streamIn);
294
295 // Align to cache line size is OK since streamin state is padded to cacheline size
296 // - HW uses cacheline size to read, not command size
297 uint32_t blockWidth = MOS_ALIGN_CEIL(m_basicFeature->m_frameWidth, CODEC_VP9_SUPER_BLOCK_WIDTH) / 32;
298 uint32_t blockHeight = MOS_ALIGN_CEIL(m_basicFeature->m_frameHeight, CODEC_VP9_SUPER_BLOCK_HEIGHT) / 32;
299 uint32_t streamInSize = blockHeight * blockWidth * CODECHAL_CACHELINE_SIZE;
300 MOS_ZeroMemory(streamIn, streamInSize);
301
302 // If segment map isn't provided then we unlock surface and exit function here.
303 // Reason why check isn't done before function call is to take advantage of the fact that
304 // we need the surface locked here if seg map is provided and we want it 0'd either way.
305 // This saves us from doing 2 locks on this buffer per frame
306 if (!m_segmentMapProvided)
307 {
308 ENCODE_CHK_STATUS_RETURN(m_allocator->UnLock(vdencStreamInBuffer));
309 return MOS_STATUS_SUCCESS;
310 }
311
312 auto data = static_cast<uint8_t *>(m_allocator->LockResourceForRead(&m_mbSegmentMapSurface.OsResource));
313 ENCODE_CHK_NULL_RETURN(data);
314
315 bool tileEnabled = false;
316 RUN_FEATURE_INTERFACE_RETURN(Vp9EncodeTile, Vp9FeatureIDs::encodeTile, IsEnabled, tileEnabled);
317
318 if (tileEnabled)
319 {
320 uint32_t numTileColumns = (1 << vp9PicParams->log2_tile_columns);
321 uint32_t numTileRows = (1 << vp9PicParams->log2_tile_rows);
322 uint32_t numTiles = numTileColumns * numTileRows;
323 uint32_t tileX = 0;
324 uint32_t tileY = 0;
325
326 uint32_t currTileStartX64Aligned = 0;
327 uint32_t currTileStartY64Aligned = 0; //Set tile Y coordinate 0
328 uint32_t blocksRasterized = 0; //Count of rasterized blocks for this frame
329
330 for (uint32_t tileIdx = 0; tileIdx < numTiles; tileIdx++)
331 {
332 tileX = tileIdx % numTileColumns; //Current tile column position
333 tileY = tileIdx / numTileColumns; //Current tile row position
334
335 currTileStartX64Aligned = ((tileX * basicFeature->m_picWidthInSb) >> vp9PicParams->log2_tile_columns) * CODEC_VP9_SUPER_BLOCK_WIDTH;
336 currTileStartY64Aligned = ((tileY * basicFeature->m_picHeightInSb) >> vp9PicParams->log2_tile_rows) * CODEC_VP9_SUPER_BLOCK_HEIGHT;
337
338 uint32_t tileWidth64Aligned = (((tileX == (numTileColumns - 1)) ? basicFeature->m_picWidthInSb : (((tileX + 1) * basicFeature->m_picWidthInSb) >> vp9PicParams->log2_tile_columns)) *
339 CODEC_VP9_SUPER_BLOCK_WIDTH) -
340 currTileStartX64Aligned;
341
342 uint32_t tileHeight64Aligned = (((tileY == (numTileRows - 1)) ? basicFeature->m_picHeightInSb : (((tileY + 1) * basicFeature->m_picHeightInSb) >> vp9PicParams->log2_tile_rows)) *
343 CODEC_VP9_SUPER_BLOCK_HEIGHT) -
344 currTileStartY64Aligned;
345
346 // Last tile col raw width and raw height - not necessarily 64 aligned,
347 // use this length to duplicate values from segmap for empty padding blocks in last tiles
348 uint32_t lastTileColWidth = (tileX == (numTileColumns - 1)) ? (basicFeature->m_frameWidth - currTileStartX64Aligned) : tileWidth64Aligned;
349 uint32_t lastTileRowHeight = (tileY == (numTileRows - 1)) ? (basicFeature->m_frameHeight - currTileStartY64Aligned) : tileHeight64Aligned;
350
351 uint32_t tileWidth = (tileX == (numTileColumns - 1)) ? lastTileColWidth : tileWidth64Aligned;
352 uint32_t tileHeight = (tileY == (numTileRows - 1)) ? lastTileRowHeight : tileHeight64Aligned;
353
354 // Recreate the mapbuffer and remap it if, for this frame, tile height and width have changed from previous tile
355 // which was processed from this frame or previous,
356 // or if map buffer is created for previous frame and tile map has changed from previous frame (numtilerows and cols)
357 EncodeTileData tileData = {};
358 RUN_FEATURE_INTERFACE_RETURN(Vp9EncodeTile, Vp9FeatureIDs::encodeTile, GetTileByIndex, tileData, tileIdx);
359
360 if (!m_mapBuffer ||
361 tileWidth != m_segStreamInWidth ||
362 tileHeight != m_segStreamInHeight ||
363 numTileColumns != tileData.numOfTileColumnsInFrame ||
364 numTiles != tileData.numOfTilesInFrame)
365 {
366 ENCODE_CHK_STATUS_RETURN(InitZigZagToRasterLUTPerTile(
367 tileWidth,
368 tileHeight,
369 currTileStartX64Aligned,
370 currTileStartY64Aligned,
371 blocksRasterized));
372 }
373 tileData.numOfTileColumnsInFrame = numTileColumns;
374 tileData.numOfTilesInFrame = numTiles;
375 }
376 }
377
378 uint32_t dwPitch = m_mbSegmentMapSurface.dwPitch;
379 if (osInterface->pfnGetResType(&m_mbSegmentMapSurface.OsResource) == MOS_GFXRES_BUFFER)
380 {
381 // Application can send 1D or 2D buffer, based on that change the pitch to correctly access the map buffer.
382 // Driver reads the seg ids from the buffer for each 16x16 block. Reads 4 values for each 32x32 block
383 dwPitch = MOS_ALIGN_CEIL(basicFeature->m_frameWidth, CODECHAL_MACROBLOCK_WIDTH) / CODECHAL_MACROBLOCK_WIDTH;
384 }
385
386 auto tuSettings = static_cast<Vp9VdencFeatureSettings *>(m_constSettings);
387 ENCODE_CHK_NULL_RETURN(tuSettings);
388
389 if (false == TargetUsage::isValid(vp9SeqParams->TargetUsage))
390 {
391 ENCODE_CHK_STATUS_RETURN(m_allocator->UnLock(&m_mbSegmentMapSurface.OsResource));
392 ENCODE_CHK_STATUS_RETURN(m_allocator->UnLock(vdencStreamInBuffer));
393 return MOS_STATUS_INVALID_PARAMETER;
394 }
395
396 // Set seg ID's of streamin states
397 for (uint32_t i = 0; i < blockHeight * blockWidth; ++i)
398 {
399 uint32_t addrOffset = CalculateBufferOffset(
400 m_mapBuffer[i],
401 basicFeature->m_frameWidth,
402 vp9PicParams->PicFlags.fields.seg_id_block_size,
403 dwPitch);
404
405 uint32_t segId = *(data + addrOffset);
406
407 streamIn[i].DW7.SegidEnable = 1;
408 streamIn[i].DW7.Segid32X32016X1603Vp9Only = segId | (segId << 4) | (segId << 8) | (segId << 12);
409
410 // TU functions copied from there
411 streamIn[i].DW0.Maxtusize = 3;
412
413 streamIn[i].DW0.Maxcusize = 3;
414 // For InterFrames we change the CUsize to 32x32 if we have sub 32 blocks with different segids in superblock
415 if ((i % 4) == 3 && basicFeature->m_pictureCodingType == P_TYPE)
416 {
417 if (!(streamIn[i - 3].DW7.Segid32X32016X1603Vp9Only == streamIn[i - 2].DW7.Segid32X32016X1603Vp9Only &&
418 streamIn[i - 2].DW7.Segid32X32016X1603Vp9Only == streamIn[i - 1].DW7.Segid32X32016X1603Vp9Only &&
419 streamIn[i - 1].DW7.Segid32X32016X1603Vp9Only == streamIn[i].DW7.Segid32X32016X1603Vp9Only))
420 {
421 streamIn[i - 3].DW0.Maxcusize =
422 streamIn[i - 2].DW0.Maxcusize =
423 streamIn[i - 1].DW0.Maxcusize =
424 streamIn[i].DW0.Maxcusize = 2;
425 }
426 }
427
428 streamIn[i].DW0.Numimepredictors = tuSettings->NumImePredictors[vp9SeqParams->TargetUsage];
429 streamIn[i].DW6.Nummergecandidatecu8X8 = tuSettings->NumMergeCandidateCu8x8[vp9SeqParams->TargetUsage];
430 streamIn[i].DW6.Nummergecandidatecu16X16 = tuSettings->NumMergeCandidateCu16x16[vp9SeqParams->TargetUsage];
431 streamIn[i].DW6.Nummergecandidatecu32X32 = tuSettings->NumMergeCandidateCu32x32[vp9SeqParams->TargetUsage];
432 streamIn[i].DW6.Nummergecandidatecu64X64 = tuSettings->NumMergeCandidateCu64x64[vp9SeqParams->TargetUsage];
433
434 }
435
436 ENCODE_CHK_STATUS_RETURN(m_allocator->UnLock(&m_mbSegmentMapSurface.OsResource));
437
438 ENCODE_CHK_STATUS_RETURN(m_allocator->UnLock(vdencStreamInBuffer));
439
440 return MOS_STATUS_SUCCESS;
441 }
442
InitZigZagToRasterLUTPerTile(uint32_t tileWidth,uint32_t tileHeight,uint32_t currTileStartXInFrame,uint32_t currTileStartYInFrame,uint32_t & blocksRasterized)443 MOS_STATUS Vp9Segmentation::InitZigZagToRasterLUTPerTile(
444 uint32_t tileWidth,
445 uint32_t tileHeight,
446 uint32_t currTileStartXInFrame,
447 uint32_t currTileStartYInFrame,
448 uint32_t &blocksRasterized)
449 {
450 ENCODE_FUNC_CALL();
451
452 auto basicFeature = static_cast<Vp9BasicFeature *>(m_basicFeature);
453 ENCODE_CHK_NULL_RETURN(basicFeature);
454
455 // Allocate space for zig-zag to raster LUT used for vdenc streamin (1 int32_t for every 32x32 block (pic 64 aligned)).
456 // We only do this when the 1st tile of new frame is being processed and keep it the same unless tile resolutions changed.
457 // We keep this map around until sequence is finished, it's deleted at device destruction
458 if (currTileStartXInFrame == 0 && currTileStartYInFrame == 0)
459 {
460 // Free previous if it exists - it may exist if this isn't first seg streamin frame, but it's a new tile with different res
461 if (m_mapBuffer)
462 {
463 MOS_FreeMemory(m_mapBuffer);
464 }
465 // Allocate one integer space for each 32*32 block in the whole frame to hold the segmentation index
466 m_mapBuffer = (uint32_t *)MOS_AllocAndZeroMemory(
467 (MOS_ALIGN_CEIL(basicFeature->m_frameWidth, CODEC_VP9_SUPER_BLOCK_WIDTH) / 32) *
468 (MOS_ALIGN_CEIL(basicFeature->m_frameHeight, CODEC_VP9_SUPER_BLOCK_HEIGHT) / 32) *
469 sizeof(int32_t)); // Framewidth and height are 64 aligned already
470 }
471 ENCODE_CHK_NULL_RETURN(m_mapBuffer);
472
473 uint32_t align64Width32 = MOS_ALIGN_CEIL(tileWidth, CODEC_VP9_SUPER_BLOCK_WIDTH) / 32;
474 uint32_t align64Height32 = MOS_ALIGN_CEIL(tileHeight, CODEC_VP9_SUPER_BLOCK_HEIGHT) / 32;
475 uint32_t *mapBufferZigZagPerTile = (uint32_t *)MOS_AllocAndZeroMemory(align64Width32 * align64Height32 * sizeof(uint32_t));
476 ENCODE_CHK_NULL_RETURN(mapBufferZigZagPerTile);
477
478 m_segStreamInWidth = basicFeature->m_frameWidth;
479 m_segStreamInHeight = basicFeature->m_frameHeight;
480
481 uint32_t count32 = 0; // Number of 32 by 32 blocks that will be processed here
482 for (uint32_t curr32YInTile = 0; curr32YInTile < align64Height32; curr32YInTile++)
483 {
484 for (uint32_t curr32XInTile = 0; curr32XInTile < align64Width32; curr32XInTile++)
485 {
486 mapBufferZigZagPerTile[count32++] = GetSegmentBlockIndexInFrame(
487 basicFeature->m_frameWidth,
488 curr32XInTile,
489 curr32YInTile,
490 currTileStartXInFrame,
491 currTileStartYInFrame);
492 }
493 }
494
495 // mapBufferZigZagPerTile ---> m_mapBuffer
496 // | a b c d ... ---> | a b W X c d Y Z ....
497 // | W X Y Z ...
498 uint32_t num32blocks = align64Width32 * align64Height32;
499 uint32_t tileOffsetIndex = blocksRasterized;
500 for (uint32_t i = 0, rasterCount = 0; i < num32blocks; i += (align64Width32 * 2))
501 {
502 for (uint32_t j = i; j < i + (align64Width32 * 2); j += 4)
503 {
504 m_mapBuffer[j + tileOffsetIndex] = mapBufferZigZagPerTile[rasterCount++];
505 m_mapBuffer[j + tileOffsetIndex + 1] = mapBufferZigZagPerTile[rasterCount++];
506 }
507 for (uint32_t j = i + 2; j < i + (align64Width32 * 2); j += 4)
508 {
509 m_mapBuffer[j + tileOffsetIndex] = mapBufferZigZagPerTile[rasterCount++];
510 m_mapBuffer[j + tileOffsetIndex + 1] = mapBufferZigZagPerTile[rasterCount++];
511 }
512 }
513 // Free per tile map buffer as it has been rasterized and copied into the mapbuffer
514 if (mapBufferZigZagPerTile)
515 {
516 MOS_FreeMemory(mapBufferZigZagPerTile);
517 }
518
519 // Zig-zag pattern filled to SB aligned (CEIL), if unaligned then we base seg ID address on previous row/column (data replication)
520 uint32_t width32 = CODECHAL_GET_WIDTH_IN_BLOCKS(tileWidth, 32);
521 if (width32 != align64Width32) // replicate last column
522 {
523 for (auto i = (align64Width32 * 2) - 1 - 2; i < num32blocks; i += (align64Width32 * 2))
524 {
525 m_mapBuffer[i + tileOffsetIndex] = m_mapBuffer[i + tileOffsetIndex - 1];
526 m_mapBuffer[i + tileOffsetIndex + 2] = m_mapBuffer[i + tileOffsetIndex + 1];
527 }
528 }
529
530 uint32_t height32 = CODECHAL_GET_HEIGHT_IN_BLOCKS(tileHeight, 32);
531 if (height32 != align64Height32) // replicate last row
532 {
533 for (auto i = num32blocks - (align64Width32 * 2) + 2; i < num32blocks; i += 4)
534 {
535 m_mapBuffer[i + tileOffsetIndex] = m_mapBuffer[i + tileOffsetIndex - 2];
536 m_mapBuffer[i + tileOffsetIndex + 1] = m_mapBuffer[i + tileOffsetIndex + 1 - 2];
537 }
538 }
539 // Index offset to be added to the buffer for the next tile depending on how many
540 // blocks were rasterized already in this tile
541 blocksRasterized += count32;
542
543 return MOS_STATUS_SUCCESS;
544 }
545
GetSegmentBlockIndexInFrame(uint32_t frameWidth,uint32_t curr32XInTile,uint32_t curr32YInTile,uint32_t currTileStartX64aligned,uint32_t currTileStartY64aligned)546 uint32_t Vp9Segmentation::GetSegmentBlockIndexInFrame(
547 uint32_t frameWidth,
548 uint32_t curr32XInTile,
549 uint32_t curr32YInTile,
550 uint32_t currTileStartX64aligned,
551 uint32_t currTileStartY64aligned)
552 {
553 ENCODE_FUNC_CALL();
554 uint32_t frameWidthIn32 = MOS_ALIGN_CEIL(frameWidth, CODEC_VP9_SUPER_BLOCK_WIDTH) / 32;
555 uint32_t curr32XInFrame = currTileStartX64aligned / 32 + curr32XInTile;
556 uint32_t curr32YInFrame = currTileStartY64aligned / 32 + curr32YInTile;
557 uint32_t curr32BlockInFrame = curr32YInFrame * frameWidthIn32 + curr32XInFrame;
558 return curr32BlockInFrame;
559 }
560
CalculateBufferOffset(uint32_t idx,uint32_t width,uint32_t blockSize,uint32_t bufferPitch)561 uint32_t Vp9Segmentation::CalculateBufferOffset(uint32_t idx, uint32_t width, uint32_t blockSize, uint32_t bufferPitch)
562 {
563 uint32_t y = idx / (MOS_ALIGN_CEIL(width, CODEC_VP9_SUPER_BLOCK_WIDTH) / 32);
564 uint32_t x = idx % (MOS_ALIGN_CEIL(width, CODEC_VP9_SUPER_BLOCK_WIDTH) / 32);
565
566 switch (blockSize)
567 {
568 case 0: // 16x16
569 x *= 2;
570 y *= 2;
571 break;
572 case 1: // 32x32 (no multiplier since streamin chunks are for 32x32)
573 break;
574 case 2: // 64x64
575 x /= 2;
576 y /= 2;
577 break;
578 case 3: // 8x8
579 x *= 4;
580 y *= 4;
581 break;
582 }
583
584 uint32_t addr = y * bufferPitch;
585 addr += x;
586
587 return addr;
588 }
589
SetSegmentId(uint8_t segmentId)590 MOS_STATUS Vp9Segmentation::SetSegmentId(uint8_t segmentId)
591 {
592 m_segmentId = segmentId;
593
594 return MOS_STATUS_SUCCESS;
595 }
596
MHW_SETPAR_DECL_SRC(HCP_VP9_SEGMENT_STATE,Vp9Segmentation)597 MHW_SETPAR_DECL_SRC(HCP_VP9_SEGMENT_STATE, Vp9Segmentation)
598 {
599 ENCODE_FUNC_CALL();
600
601 auto vp9SegData = m_vp9SegmentParams->SegData[m_segmentId];
602
603 params.segmentId = m_segmentId;
604
605 params.segmentSkipped = vp9SegData.SegmentFlags.fields.SegmentSkipped;
606 params.segmentReference = vp9SegData.SegmentFlags.fields.SegmentReference;
607 params.segmentReferenceEnabled = vp9SegData.SegmentFlags.fields.SegmentReferenceEnabled;
608
609 params.segmentLfLevelDeltaEncodeModeOnly = m_basicFeature->Convert2SignMagnitude(vp9SegData.SegmentLFLevelDelta, 7);
610 params.segmentQindexDeltaEncodeModeOnly = m_basicFeature->Convert2SignMagnitude(vp9SegData.SegmentQIndexDelta, 9);
611
612 return MOS_STATUS_SUCCESS;
613 }
614
MHW_SETPAR_DECL_SRC(VDENC_CMD2,Vp9Segmentation)615 MHW_SETPAR_DECL_SRC(VDENC_CMD2, Vp9Segmentation)
616 {
617 ENCODE_FUNC_CALL();
618
619 MHW_MI_CHK_NULL(m_basicFeature->m_vp9PicParams);
620 auto vp9PicParams = m_basicFeature->m_vp9PicParams;
621
622 bool segmentationEnabled = vp9PicParams->PicFlags.fields.segmentation_enabled;
623
624 uint8_t lumaAcqIndex = vp9PicParams->LumaACQIndex;
625 uint8_t lumaDcqIndexDelta = vp9PicParams->LumaDCQIndexDelta;
626
627 MHW_MI_CHK_NULL(m_vp9SegmentParams);
628 MHW_VDBOX_VP9_SEGMENT_STATE segmentState;
629 MOS_ZeroMemory(&segmentState, sizeof(segmentState));
630 segmentState.pVp9EncodeSegmentParams = m_vp9SegmentParams;
631 segmentState.Mode = m_basicFeature->m_mode;
632 CODEC_VP9_ENCODE_SEG_PARAMS *segData = segmentState.pVp9EncodeSegmentParams->SegData;
633
634 params.vdencStreamIn = m_segmentMapProvided || m_basicFeature->m_16xMeEnabled;
635
636 if (segmentationEnabled)
637 {
638 // DW24
639
640 params.qpForSegs[0] = lumaAcqIndex + segData[0].SegmentQIndexDelta; // QpForSeg0
641 params.qpForSegs[1] = lumaAcqIndex + segData[1].SegmentQIndexDelta; // QpForSeg1
642 params.qpForSegs[2] = lumaAcqIndex + segData[2].SegmentQIndexDelta; // QpForSeg2
643 params.qpForSegs[3] = lumaAcqIndex + segData[3].SegmentQIndexDelta; // QpForSeg3
644
645 // DW25
646
647 params.qpForSegs[4] = lumaAcqIndex + segData[4].SegmentQIndexDelta; // QpForSeg4
648 params.qpForSegs[5] = lumaAcqIndex + segData[5].SegmentQIndexDelta; // QpForSeg5
649 params.qpForSegs[6] = lumaAcqIndex + segData[6].SegmentQIndexDelta; // QpForSeg6
650 params.qpForSegs[7] = lumaAcqIndex + segData[7].SegmentQIndexDelta; // QpForSeg7
651 }
652 else // Segmentation disabled
653 {
654 // DW24
655
656 params.qpForSegs[0] = lumaAcqIndex + lumaDcqIndexDelta; // QpForSeg0
657 params.qpForSegs[1] = lumaAcqIndex + lumaDcqIndexDelta; // QpForSeg1
658 params.qpForSegs[2] = lumaAcqIndex + lumaDcqIndexDelta; // QpForSeg2
659 params.qpForSegs[3] = lumaAcqIndex + lumaDcqIndexDelta; // QpForSeg3
660
661 // DW25
662
663 params.qpForSegs[4] = lumaAcqIndex + lumaDcqIndexDelta; // QpForSeg4
664 params.qpForSegs[5] = lumaAcqIndex + lumaDcqIndexDelta; // QpForSeg5
665 params.qpForSegs[6] = lumaAcqIndex + lumaDcqIndexDelta; // QpForSeg6
666 params.qpForSegs[7] = lumaAcqIndex + lumaDcqIndexDelta; // QpForSeg7
667 }
668
669 return MOS_STATUS_SUCCESS;
670 }
671
672 } // namespace encode
673