1 /*
2 * Copyright (c) 2020-2022, Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22 //!
23 //! \file     encode_vp9_segmentation.cpp
24 //! \brief    Defines the common interface for vp9 encode segmentation features
25 //!
26 
27 #include "encode_vp9_segmentation.h"
28 #include "encode_vp9_vdenc_const_settings.h"
29 #include "media_vp9_packet_defs.h"
30 #include "mhw_utilities.h"
31 #include "encode_vp9_tile.h"
32 #include "media_vp9_feature_defs.h"
33 
34 namespace encode
35 {
Vp9Segmentation(MediaFeatureManager * featureManager,EncodeAllocator * allocator,CodechalHwInterfaceNext * hwInterface,void * constSettings)36 Vp9Segmentation::Vp9Segmentation(
37     MediaFeatureManager *featureManager,
38     EncodeAllocator *    allocator,
39     CodechalHwInterfaceNext *hwInterface,
40     void *               constSettings) : MediaFeature(constSettings), m_allocator(allocator)
41 {
42     ENCODE_FUNC_CALL();
43     ENCODE_CHK_NULL_NO_STATUS_RETURN(featureManager);
44 
45     m_featureManager = featureManager;
46 
47     m_basicFeature = dynamic_cast<Vp9BasicFeature *>(featureManager->GetFeature(FeatureIDs::basicFeature));
48     ENCODE_CHK_NULL_NO_STATUS_RETURN(m_basicFeature);
49 
50     ENCODE_CHK_NULL_NO_STATUS_RETURN(hwInterface);
51     m_hwInterface = hwInterface;
52 }
53 
~Vp9Segmentation()54 Vp9Segmentation::~Vp9Segmentation()
55 {
56     FreeResources();
57 }
58 
Init(void * settings)59 MOS_STATUS Vp9Segmentation::Init(void *settings)
60 {
61     ENCODE_FUNC_CALL();
62     MEDIA_CHK_NULL_RETURN(settings);
63 
64     CodechalSetting *codecSettings = (CodechalSetting *)settings;
65 
66     ENCODE_CHK_STATUS_RETURN(AllocateResources());
67 
68     return MOS_STATUS_SUCCESS;
69 }
70 
Update(void * params)71 MOS_STATUS Vp9Segmentation::Update(void *params)
72 {
73     ENCODE_FUNC_CALL();
74     ENCODE_CHK_NULL_RETURN(params);
75     auto allocator = m_basicFeature->GetAllocator();
76     ENCODE_CHK_NULL_RETURN(allocator);
77 
78     EncoderParams *encodeParams = (EncoderParams *)params;
79 
80     auto vp9SeqParams = static_cast<PCODEC_VP9_ENCODE_SEQUENCE_PARAMS>(encodeParams->pSeqParams);
81     ENCODE_CHK_NULL_RETURN(vp9SeqParams);
82     auto vp9PicParams = static_cast<PCODEC_VP9_ENCODE_PIC_PARAMS>(encodeParams->pPicParams);
83     ENCODE_CHK_NULL_RETURN(vp9PicParams);
84     m_vp9SegmentParams = static_cast<PCODEC_VP9_ENCODE_SEGMENT_PARAMS>(encodeParams->pSegmentParams);
85     ENCODE_CHK_NULL_RETURN(m_vp9SegmentParams);
86 
87     m_enabled = vp9PicParams->PicFlags.fields.segmentation_enabled;
88 
89     m_segmentMapProvided = encodeParams->bSegmentMapProvided && vp9PicParams->PicFlags.fields.segmentation_enabled;
90     // In MBBRC case, without a SegMap provided by the app, we need to set SegMapUpdate ON
91     // as the SegMap is generated by HuC and it can be different for every frame
92     if (vp9PicParams->PicFlags.fields.segmentation_enabled && !encodeParams->bSegmentMapProvided)
93     {
94         vp9PicParams->PicFlags.fields.segmentation_update_map = 1;
95     }
96 
97     m_mbBrcEnabled                      = false;
98     vp9SeqParams->SeqFlags.fields.MBBRC = MBBRC_DISABLED;
99 
100     // We do not support segmentation w/o seg map in CQP case, only support segmentation w/ seg map in CQP
101     // BRC/ACQP supports segmentation both w/ and w/o seg map
102     if (vp9PicParams->PicFlags.fields.segmentation_enabled && !encodeParams->bSegmentMapProvided && vp9SeqParams->RateControlMethod == RATECONTROL_CQP)
103     {
104         return MOS_STATUS_INVALID_PARAMETER;
105     }
106     // Need to index properly when more than one temporal layer is present
107     ENCODE_ASSERT(vp9SeqParams->FrameRate[0].uiDenominator > 0);
108     if (vp9SeqParams->FrameRate[0].uiDenominator == 0)
109     {
110         return MOS_STATUS_INVALID_PARAMETER;
111     }
112     uint32_t frameRate = vp9SeqParams->FrameRate[0].uiNumerator / vp9SeqParams->FrameRate[0].uiDenominator;
113 
114     if (!m_mbBrcEnabled)
115     {
116         m_mbStatsEnabled = false;
117         if (m_segmentMapProvided)
118         {
119             m_mbSegmentMapSurface = *(encodeParams->psMbSegmentMapSurface);
120             ENCODE_CHK_STATUS_RETURN(allocator->GetSurfaceInfo(&(m_mbSegmentMapSurface)));
121         }
122     }
123     else
124     {
125         //Kernel C model fixed Qindex delta's when MBBRC is enabled
126         int16_t segmentQIndexDelta[CODEC_VP9_MAX_SEGMENTS] = {0, -8, -6, -4, -2, 2, 4, 6};
127 
128         for (uint8_t i = 0; i < CODEC_VP9_MAX_SEGMENTS; i++)
129         {
130             m_vp9SegmentParams->SegData[i].SegmentFlags.value  = 0;
131             m_vp9SegmentParams->SegData[i].SegmentLFLevelDelta = 0;
132             m_vp9SegmentParams->SegData[i].SegmentQIndexDelta  = segmentQIndexDelta[i];
133         }
134         m_mbStatsEnabled = true;
135     }
136 
137     if (m_basicFeature->m_newSeq)
138     {
139         ENCODE_CHK_STATUS_RETURN(SetConstSettings());
140         ENCODE_CHK_STATUS_RETURN(SetSequenceStructs());
141     }
142 
143     ENCODE_CHK_STATUS_RETURN(SetupSegmentationStreamIn());
144 
145     return MOS_STATUS_SUCCESS;
146 }
147 
SetDmemForUpdate(void * params)148 MOS_STATUS Vp9Segmentation::SetDmemForUpdate(void *params)
149 {
150     ENCODE_FUNC_CALL();
151     ENCODE_CHK_NULL_RETURN(params);
152 
153     auto dmem    = (HucBrcUpdateDmem *)params;
154     ENCODE_CHK_NULL_RETURN(dmem);
155     auto vp9PicParams = static_cast<PCODEC_VP9_ENCODE_PIC_PARAMS>(m_basicFeature->m_vp9PicParams);
156     ENCODE_CHK_NULL_RETURN(vp9PicParams);
157 
158     // If app gives segment map, we honor the QP deltas provided, if not, and segmentation is enabled,
159     // BRC generates the QP deltas and patches them into the segment states
160     dmem->UPD_SegMapGenerating_U8 = vp9PicParams->PicFlags.fields.segmentation_enabled && !m_segmentMapProvided;
161 
162     return MOS_STATUS_SUCCESS;
163 }
164 
SetDmemForHucProb(void * params)165 MOS_STATUS Vp9Segmentation::SetDmemForHucProb(void *params)
166 {
167     ENCODE_FUNC_CALL();
168     ENCODE_CHK_NULL_RETURN(params);
169 
170     auto dmem    = (HucProbDmem *)params;
171     ENCODE_CHK_NULL_RETURN(dmem);
172     auto vp9PicParams = static_cast<PCODEC_VP9_ENCODE_PIC_PARAMS>(m_basicFeature->m_vp9PicParams);
173     ENCODE_CHK_NULL_RETURN(vp9PicParams);
174 
175     dmem->FrameCtrl.SegOn         = vp9PicParams->PicFlags.fields.segmentation_enabled;
176     dmem->FrameCtrl.SegMapUpdate  = vp9PicParams->PicFlags.fields.segmentation_update_map;
177     dmem->FrameCtrl.SegUpdateData = vp9PicParams->PicFlags.fields.seg_update_data;
178 
179     dmem->StreamInSegEnable = (uint8_t)m_segmentMapProvided;
180     dmem->StreamInEnable    = (uint8_t)m_segmentMapProvided;  // Currently unused, if used may || with HME enabled
181 
182     return MOS_STATUS_SUCCESS;
183 }
184 
FreeResources()185 MOS_STATUS Vp9Segmentation::FreeResources()
186 {
187     ENCODE_FUNC_CALL();
188 
189     if (m_mapBuffer)
190     {
191         MOS_FreeMemory(m_mapBuffer);
192     }
193 
194     return MOS_STATUS_SUCCESS;
195 }
196 
SetSequenceStructs()197 MOS_STATUS Vp9Segmentation::SetSequenceStructs()
198 {
199     ENCODE_FUNC_CALL();
200 
201     auto vp9SeqParams = static_cast<PCODEC_VP9_ENCODE_SEQUENCE_PARAMS>(m_basicFeature->m_vp9SeqParams);
202     ENCODE_CHK_NULL_RETURN(vp9SeqParams);
203 
204     if ((vp9SeqParams->SeqFlags.fields.MBBRC == MBBRC_ENABLED) || (vp9SeqParams->SeqFlags.fields.MBBRC == MBBRC_ENABLED_TU_DEPENDENCY))
205     {
206         if (!m_segmentMapAllocated)
207         {
208             ENCODE_CHK_STATUS_RETURN(AllocateMbBrcSegMapSurface());
209         }
210     }
211     else
212     {
213         //Allocated Driver MbBrc Segment map resource needs to be deallocated when MBBRC is disabled. The reason being
214         //same segmnet map surface (sMbSegmentMapSurface) will be used in the driver referencing both the Application passed
215         //as well as Driver allocated resource for segmentmap depending on mbbrc disabled or enabled.
216         if (!Mos_ResourceIsNull(&m_mbSegmentMapSurface.OsResource) && m_segmentMapAllocated)
217         {
218             m_allocator->DestroyResource(&m_mbSegmentMapSurface.OsResource);
219         }
220 
221         m_segmentMapAllocated = false;
222     }
223 
224 
225     return MOS_STATUS_SUCCESS;
226 }
227 
AllocateMbBrcSegMapSurface()228 MOS_STATUS Vp9Segmentation::AllocateMbBrcSegMapSurface()
229 {
230     ENCODE_FUNC_CALL();
231 
232     MOS_RESOURCE *allocatedBuffer = nullptr;
233 
234     // MBBRC segment map surface needs to be allocated when mbbrc is enabled as segment map will not be
235     // passed from APP when MBBRC is enabled
236     uint32_t picWidthInMb  = CODECHAL_GET_WIDTH_IN_MACROBLOCKS(m_basicFeature->m_maxPicWidth);
237     uint32_t picHeightInMb = CODECHAL_GET_HEIGHT_IN_MACROBLOCKS(m_basicFeature->m_maxPicHeight);
238 
239     m_mbSegmentMapSurface.TileType      = MOS_TILE_LINEAR;
240     m_mbSegmentMapSurface.bArraySpacing = true;
241     m_mbSegmentMapSurface.Format        = Format_Buffer_2D;
242     m_mbSegmentMapSurface.dwWidth       = MOS_ALIGN_CEIL(picWidthInMb, 4);
243     m_mbSegmentMapSurface.dwHeight      = picHeightInMb;
244     m_mbSegmentMapSurface.dwPitch       = MOS_ALIGN_CEIL(picWidthInMb, 64);
245 
246     MOS_ALLOC_GFXRES_PARAMS allocParamsForBuffer2D;
247     MOS_ZeroMemory(&allocParamsForBuffer2D, sizeof(MOS_ALLOC_GFXRES_PARAMS));
248 
249     allocParamsForBuffer2D.Type     = MOS_GFXRES_2D;
250     allocParamsForBuffer2D.TileType = MOS_TILE_LINEAR;
251     allocParamsForBuffer2D.Format   = Format_Buffer_2D;
252     allocParamsForBuffer2D.dwWidth  = m_mbSegmentMapSurface.dwPitch;
253     allocParamsForBuffer2D.dwHeight = picHeightInMb;
254     allocParamsForBuffer2D.pBufName = "MBBRC driver Segment Map Surface";
255     allocParamsForBuffer2D.ResUsageType = MOS_HW_RESOURCE_USAGE_ENCODE_INTERNAL_READ_WRITE_NOCACHE;
256 
257     uint32_t size = allocParamsForBuffer2D.dwWidth * allocParamsForBuffer2D.dwHeight;
258 
259     allocatedBuffer = m_allocator->AllocateResource(allocParamsForBuffer2D, true);
260     ENCODE_CHK_NULL_RETURN(allocatedBuffer);
261     m_mbSegmentMapSurface.OsResource = *allocatedBuffer;
262 
263     m_segmentMapAllocated = true;
264 
265     return MOS_STATUS_SUCCESS;
266 }
267 
SetupSegmentationStreamIn()268 MOS_STATUS Vp9Segmentation::SetupSegmentationStreamIn()
269 {
270     ENCODE_FUNC_CALL();
271 
272     if (!m_segmentMapProvided && !m_basicFeature->m_hmeEnabled)
273     {
274       // If we're not going to use the streamin surface leave now
275         return MOS_STATUS_SUCCESS;
276     }
277 
278     ENCODE_CHK_NULL_RETURN(m_hwInterface);
279     PMOS_INTERFACE osInterface = m_hwInterface->GetOsInterface();
280     ENCODE_CHK_NULL_RETURN(osInterface);
281     auto basicFeature = static_cast<Vp9BasicFeature *>(m_basicFeature);
282     ENCODE_CHK_NULL_RETURN(basicFeature);
283     ENCODE_CHK_NULL_RETURN(basicFeature->m_recycleBuf);
284 
285     auto vp9PicParams = static_cast<PCODEC_VP9_ENCODE_PIC_PARAMS>(m_basicFeature->m_vp9PicParams);
286     ENCODE_CHK_NULL_RETURN(vp9PicParams);
287     auto vp9SeqParams = static_cast<PCODEC_VP9_ENCODE_SEQUENCE_PARAMS>(m_basicFeature->m_vp9SeqParams);
288     ENCODE_CHK_NULL_RETURN(vp9SeqParams);
289 
290     auto vdencStreamInBuffer = m_basicFeature->m_recycleBuf->GetBuffer(RecycleResId::StreamInBuffer, m_basicFeature->m_currRecycledBufIdx);
291     ENCODE_CHK_NULL_RETURN(vdencStreamInBuffer);
292     auto streamIn = static_cast<Vp9VdencStreamInState *>(m_allocator->LockResourceForWrite(vdencStreamInBuffer));
293     ENCODE_CHK_NULL_RETURN(streamIn);
294 
295     // Align to cache line size is OK since streamin state is padded to cacheline size
296     // - HW uses cacheline size to read, not command size
297     uint32_t blockWidth   = MOS_ALIGN_CEIL(m_basicFeature->m_frameWidth, CODEC_VP9_SUPER_BLOCK_WIDTH) / 32;
298     uint32_t blockHeight  = MOS_ALIGN_CEIL(m_basicFeature->m_frameHeight, CODEC_VP9_SUPER_BLOCK_HEIGHT) / 32;
299     uint32_t streamInSize = blockHeight * blockWidth * CODECHAL_CACHELINE_SIZE;
300     MOS_ZeroMemory(streamIn, streamInSize);
301 
302     // If segment map isn't provided then we unlock surface and exit function here.
303     // Reason why check isn't done before function call is to take advantage of the fact that
304     // we need the surface locked here if seg map is provided and we want it 0'd either way.
305     // This saves us from doing 2 locks on this buffer per frame
306     if (!m_segmentMapProvided)
307     {
308         ENCODE_CHK_STATUS_RETURN(m_allocator->UnLock(vdencStreamInBuffer));
309         return MOS_STATUS_SUCCESS;
310     }
311 
312     auto data = static_cast<uint8_t *>(m_allocator->LockResourceForRead(&m_mbSegmentMapSurface.OsResource));
313     ENCODE_CHK_NULL_RETURN(data);
314 
315     bool tileEnabled = false;
316     RUN_FEATURE_INTERFACE_RETURN(Vp9EncodeTile, Vp9FeatureIDs::encodeTile, IsEnabled, tileEnabled);
317 
318     if (tileEnabled)
319     {
320         uint32_t numTileColumns = (1 << vp9PicParams->log2_tile_columns);
321         uint32_t numTileRows    = (1 << vp9PicParams->log2_tile_rows);
322         uint32_t numTiles       = numTileColumns * numTileRows;
323         uint32_t tileX          = 0;
324         uint32_t tileY          = 0;
325 
326         uint32_t currTileStartX64Aligned = 0;
327         uint32_t currTileStartY64Aligned = 0;  //Set tile Y coordinate 0
328         uint32_t blocksRasterized        = 0;  //Count of rasterized blocks for this frame
329 
330         for (uint32_t tileIdx = 0; tileIdx < numTiles; tileIdx++)
331         {
332             tileX = tileIdx % numTileColumns;  //Current tile column position
333             tileY = tileIdx / numTileColumns;  //Current tile row position
334 
335             currTileStartX64Aligned = ((tileX * basicFeature->m_picWidthInSb) >> vp9PicParams->log2_tile_columns) * CODEC_VP9_SUPER_BLOCK_WIDTH;
336             currTileStartY64Aligned = ((tileY * basicFeature->m_picHeightInSb) >> vp9PicParams->log2_tile_rows) * CODEC_VP9_SUPER_BLOCK_HEIGHT;
337 
338             uint32_t tileWidth64Aligned = (((tileX == (numTileColumns - 1)) ? basicFeature->m_picWidthInSb : (((tileX + 1) * basicFeature->m_picWidthInSb) >> vp9PicParams->log2_tile_columns)) *
339                                               CODEC_VP9_SUPER_BLOCK_WIDTH) -
340                                           currTileStartX64Aligned;
341 
342             uint32_t tileHeight64Aligned = (((tileY == (numTileRows - 1)) ? basicFeature->m_picHeightInSb : (((tileY + 1) * basicFeature->m_picHeightInSb) >> vp9PicParams->log2_tile_rows)) *
343                                                CODEC_VP9_SUPER_BLOCK_HEIGHT) -
344                                            currTileStartY64Aligned;
345 
346             // Last tile col raw width and raw height - not necessarily 64 aligned,
347             // use this length to duplicate values from segmap for empty padding blocks in last tiles
348             uint32_t lastTileColWidth  = (tileX == (numTileColumns - 1)) ? (basicFeature->m_frameWidth - currTileStartX64Aligned) : tileWidth64Aligned;
349             uint32_t lastTileRowHeight = (tileY == (numTileRows - 1)) ? (basicFeature->m_frameHeight - currTileStartY64Aligned) : tileHeight64Aligned;
350 
351             uint32_t tileWidth  = (tileX == (numTileColumns - 1)) ? lastTileColWidth : tileWidth64Aligned;
352             uint32_t tileHeight = (tileY == (numTileRows - 1)) ? lastTileRowHeight : tileHeight64Aligned;
353 
354             // Recreate the mapbuffer and remap it if, for this frame, tile height and width have changed from previous tile
355             // which was processed from this frame or previous,
356             // or if map buffer is created for previous frame and tile map has changed from previous frame (numtilerows and cols)
357             EncodeTileData tileData = {};
358             RUN_FEATURE_INTERFACE_RETURN(Vp9EncodeTile, Vp9FeatureIDs::encodeTile, GetTileByIndex, tileData, tileIdx);
359 
360             if (!m_mapBuffer ||
361                 tileWidth != m_segStreamInWidth ||
362                 tileHeight != m_segStreamInHeight ||
363                 numTileColumns != tileData.numOfTileColumnsInFrame ||
364                 numTiles != tileData.numOfTilesInFrame)
365             {
366                 ENCODE_CHK_STATUS_RETURN(InitZigZagToRasterLUTPerTile(
367                     tileWidth,
368                     tileHeight,
369                     currTileStartX64Aligned,
370                     currTileStartY64Aligned,
371                     blocksRasterized));
372             }
373             tileData.numOfTileColumnsInFrame = numTileColumns;
374             tileData.numOfTilesInFrame       = numTiles;
375         }
376     }
377 
378     uint32_t dwPitch = m_mbSegmentMapSurface.dwPitch;
379     if (osInterface->pfnGetResType(&m_mbSegmentMapSurface.OsResource) == MOS_GFXRES_BUFFER)
380     {
381         // Application can send 1D or 2D buffer, based on that change the pitch to correctly access the map buffer.
382         // Driver reads the seg ids from the buffer for each 16x16 block. Reads 4 values for each 32x32 block
383         dwPitch = MOS_ALIGN_CEIL(basicFeature->m_frameWidth, CODECHAL_MACROBLOCK_WIDTH) / CODECHAL_MACROBLOCK_WIDTH;
384     }
385 
386     auto tuSettings = static_cast<Vp9VdencFeatureSettings *>(m_constSettings);
387     ENCODE_CHK_NULL_RETURN(tuSettings);
388 
389     if (false == TargetUsage::isValid(vp9SeqParams->TargetUsage))
390     {
391         ENCODE_CHK_STATUS_RETURN(m_allocator->UnLock(&m_mbSegmentMapSurface.OsResource));
392         ENCODE_CHK_STATUS_RETURN(m_allocator->UnLock(vdencStreamInBuffer));
393         return MOS_STATUS_INVALID_PARAMETER;
394     }
395 
396     // Set seg ID's of streamin states
397     for (uint32_t i = 0; i < blockHeight * blockWidth; ++i)
398     {
399         uint32_t addrOffset = CalculateBufferOffset(
400             m_mapBuffer[i],
401             basicFeature->m_frameWidth,
402             vp9PicParams->PicFlags.fields.seg_id_block_size,
403             dwPitch);
404 
405         uint32_t segId = *(data + addrOffset);
406 
407         streamIn[i].DW7.SegidEnable               = 1;
408         streamIn[i].DW7.Segid32X32016X1603Vp9Only = segId | (segId << 4) | (segId << 8) | (segId << 12);
409 
410         // TU functions copied from there
411         streamIn[i].DW0.Maxtusize = 3;
412 
413         streamIn[i].DW0.Maxcusize = 3;
414         // For InterFrames we change the CUsize to 32x32 if we have sub 32 blocks with different segids in superblock
415         if ((i % 4) == 3 && basicFeature->m_pictureCodingType == P_TYPE)
416         {
417             if (!(streamIn[i - 3].DW7.Segid32X32016X1603Vp9Only == streamIn[i - 2].DW7.Segid32X32016X1603Vp9Only &&
418                     streamIn[i - 2].DW7.Segid32X32016X1603Vp9Only == streamIn[i - 1].DW7.Segid32X32016X1603Vp9Only &&
419                     streamIn[i - 1].DW7.Segid32X32016X1603Vp9Only == streamIn[i].DW7.Segid32X32016X1603Vp9Only))
420             {
421                 streamIn[i - 3].DW0.Maxcusize =
422                     streamIn[i - 2].DW0.Maxcusize =
423                         streamIn[i - 1].DW0.Maxcusize =
424                             streamIn[i].DW0.Maxcusize = 2;
425             }
426         }
427 
428         streamIn[i].DW0.Numimepredictors         = tuSettings->NumImePredictors[vp9SeqParams->TargetUsage];
429         streamIn[i].DW6.Nummergecandidatecu8X8   = tuSettings->NumMergeCandidateCu8x8[vp9SeqParams->TargetUsage];
430         streamIn[i].DW6.Nummergecandidatecu16X16 = tuSettings->NumMergeCandidateCu16x16[vp9SeqParams->TargetUsage];
431         streamIn[i].DW6.Nummergecandidatecu32X32 = tuSettings->NumMergeCandidateCu32x32[vp9SeqParams->TargetUsage];
432         streamIn[i].DW6.Nummergecandidatecu64X64 = tuSettings->NumMergeCandidateCu64x64[vp9SeqParams->TargetUsage];
433 
434     }
435 
436     ENCODE_CHK_STATUS_RETURN(m_allocator->UnLock(&m_mbSegmentMapSurface.OsResource));
437 
438     ENCODE_CHK_STATUS_RETURN(m_allocator->UnLock(vdencStreamInBuffer));
439 
440     return MOS_STATUS_SUCCESS;
441 }
442 
InitZigZagToRasterLUTPerTile(uint32_t tileWidth,uint32_t tileHeight,uint32_t currTileStartXInFrame,uint32_t currTileStartYInFrame,uint32_t & blocksRasterized)443 MOS_STATUS Vp9Segmentation::InitZigZagToRasterLUTPerTile(
444     uint32_t  tileWidth,
445     uint32_t  tileHeight,
446     uint32_t  currTileStartXInFrame,
447     uint32_t  currTileStartYInFrame,
448     uint32_t &blocksRasterized)
449 {
450     ENCODE_FUNC_CALL();
451 
452     auto basicFeature = static_cast<Vp9BasicFeature *>(m_basicFeature);
453     ENCODE_CHK_NULL_RETURN(basicFeature);
454 
455     // Allocate space for zig-zag to raster LUT used for vdenc streamin (1 int32_t for every 32x32 block (pic 64 aligned)).
456     // We only do this when the 1st tile of new frame is being processed and keep it the same unless tile resolutions changed.
457     // We keep this map around until sequence is finished, it's deleted at device destruction
458     if (currTileStartXInFrame == 0 && currTileStartYInFrame == 0)
459     {
460         // Free previous if it exists - it may exist if this isn't first seg streamin frame, but it's a new tile with different res
461         if (m_mapBuffer)
462         {
463             MOS_FreeMemory(m_mapBuffer);
464         }
465         // Allocate one integer space for each 32*32 block in the whole frame to hold the segmentation index
466         m_mapBuffer = (uint32_t *)MOS_AllocAndZeroMemory(
467             (MOS_ALIGN_CEIL(basicFeature->m_frameWidth, CODEC_VP9_SUPER_BLOCK_WIDTH) / 32) *
468             (MOS_ALIGN_CEIL(basicFeature->m_frameHeight, CODEC_VP9_SUPER_BLOCK_HEIGHT) / 32) *
469             sizeof(int32_t));  // Framewidth and height are 64 aligned already
470     }
471     ENCODE_CHK_NULL_RETURN(m_mapBuffer);
472 
473     uint32_t  align64Width32         = MOS_ALIGN_CEIL(tileWidth, CODEC_VP9_SUPER_BLOCK_WIDTH) / 32;
474     uint32_t  align64Height32        = MOS_ALIGN_CEIL(tileHeight, CODEC_VP9_SUPER_BLOCK_HEIGHT) / 32;
475     uint32_t *mapBufferZigZagPerTile = (uint32_t *)MOS_AllocAndZeroMemory(align64Width32 * align64Height32 * sizeof(uint32_t));
476     ENCODE_CHK_NULL_RETURN(mapBufferZigZagPerTile);
477 
478     m_segStreamInWidth  = basicFeature->m_frameWidth;
479     m_segStreamInHeight = basicFeature->m_frameHeight;
480 
481     uint32_t count32 = 0;  // Number of 32 by 32 blocks that will be processed here
482     for (uint32_t curr32YInTile = 0; curr32YInTile < align64Height32; curr32YInTile++)
483     {
484         for (uint32_t curr32XInTile = 0; curr32XInTile < align64Width32; curr32XInTile++)
485         {
486             mapBufferZigZagPerTile[count32++] = GetSegmentBlockIndexInFrame(
487                 basicFeature->m_frameWidth,
488                 curr32XInTile,
489                 curr32YInTile,
490                 currTileStartXInFrame,
491                 currTileStartYInFrame);
492         }
493     }
494 
495     //    mapBufferZigZagPerTile --->   m_mapBuffer
496     //  | a b c d ...            ---> | a b W X c d Y Z ....
497     //  | W X Y Z ...
498     uint32_t num32blocks     = align64Width32 * align64Height32;
499     uint32_t tileOffsetIndex = blocksRasterized;
500     for (uint32_t i = 0, rasterCount = 0; i < num32blocks; i += (align64Width32 * 2))
501     {
502         for (uint32_t j = i; j < i + (align64Width32 * 2); j += 4)
503         {
504             m_mapBuffer[j + tileOffsetIndex]     = mapBufferZigZagPerTile[rasterCount++];
505             m_mapBuffer[j + tileOffsetIndex + 1] = mapBufferZigZagPerTile[rasterCount++];
506         }
507         for (uint32_t j = i + 2; j < i + (align64Width32 * 2); j += 4)
508         {
509             m_mapBuffer[j + tileOffsetIndex]     = mapBufferZigZagPerTile[rasterCount++];
510             m_mapBuffer[j + tileOffsetIndex + 1] = mapBufferZigZagPerTile[rasterCount++];
511         }
512     }
513     // Free per tile map buffer as it has been rasterized and copied into the mapbuffer
514     if (mapBufferZigZagPerTile)
515     {
516         MOS_FreeMemory(mapBufferZigZagPerTile);
517     }
518 
519     // Zig-zag pattern filled to SB aligned (CEIL), if unaligned then we base seg ID address on previous row/column (data replication)
520     uint32_t width32 = CODECHAL_GET_WIDTH_IN_BLOCKS(tileWidth, 32);
521     if (width32 != align64Width32)  // replicate last column
522     {
523         for (auto i = (align64Width32 * 2) - 1 - 2; i < num32blocks; i += (align64Width32 * 2))
524         {
525             m_mapBuffer[i + tileOffsetIndex]     = m_mapBuffer[i + tileOffsetIndex - 1];
526             m_mapBuffer[i + tileOffsetIndex + 2] = m_mapBuffer[i + tileOffsetIndex + 1];
527         }
528     }
529 
530     uint32_t height32 = CODECHAL_GET_HEIGHT_IN_BLOCKS(tileHeight, 32);
531     if (height32 != align64Height32)  // replicate last row
532     {
533         for (auto i = num32blocks - (align64Width32 * 2) + 2; i < num32blocks; i += 4)
534         {
535             m_mapBuffer[i + tileOffsetIndex]     = m_mapBuffer[i + tileOffsetIndex - 2];
536             m_mapBuffer[i + tileOffsetIndex + 1] = m_mapBuffer[i + tileOffsetIndex + 1 - 2];
537         }
538     }
539     // Index offset to be added to the buffer for the next tile depending on how many
540     // blocks were rasterized already in this tile
541     blocksRasterized += count32;
542 
543     return MOS_STATUS_SUCCESS;
544 }
545 
GetSegmentBlockIndexInFrame(uint32_t frameWidth,uint32_t curr32XInTile,uint32_t curr32YInTile,uint32_t currTileStartX64aligned,uint32_t currTileStartY64aligned)546 uint32_t Vp9Segmentation::GetSegmentBlockIndexInFrame(
547     uint32_t frameWidth,
548     uint32_t curr32XInTile,
549     uint32_t curr32YInTile,
550     uint32_t currTileStartX64aligned,
551     uint32_t currTileStartY64aligned)
552 {
553     ENCODE_FUNC_CALL();
554     uint32_t frameWidthIn32     = MOS_ALIGN_CEIL(frameWidth, CODEC_VP9_SUPER_BLOCK_WIDTH) / 32;
555     uint32_t curr32XInFrame     = currTileStartX64aligned / 32 + curr32XInTile;
556     uint32_t curr32YInFrame     = currTileStartY64aligned / 32 + curr32YInTile;
557     uint32_t curr32BlockInFrame = curr32YInFrame * frameWidthIn32 + curr32XInFrame;
558     return curr32BlockInFrame;
559 }
560 
CalculateBufferOffset(uint32_t idx,uint32_t width,uint32_t blockSize,uint32_t bufferPitch)561 uint32_t Vp9Segmentation::CalculateBufferOffset(uint32_t idx, uint32_t width, uint32_t blockSize, uint32_t bufferPitch)
562 {
563     uint32_t y = idx / (MOS_ALIGN_CEIL(width, CODEC_VP9_SUPER_BLOCK_WIDTH) / 32);
564     uint32_t x = idx % (MOS_ALIGN_CEIL(width, CODEC_VP9_SUPER_BLOCK_WIDTH) / 32);
565 
566     switch (blockSize)
567     {
568     case 0:  // 16x16
569         x *= 2;
570         y *= 2;
571         break;
572     case 1:  // 32x32 (no multiplier since streamin chunks are for 32x32)
573         break;
574     case 2:  // 64x64
575         x /= 2;
576         y /= 2;
577         break;
578     case 3:  // 8x8
579         x *= 4;
580         y *= 4;
581         break;
582     }
583 
584     uint32_t addr = y * bufferPitch;
585     addr += x;
586 
587     return addr;
588 }
589 
SetSegmentId(uint8_t segmentId)590 MOS_STATUS Vp9Segmentation::SetSegmentId(uint8_t segmentId)
591 {
592     m_segmentId = segmentId;
593 
594     return MOS_STATUS_SUCCESS;
595 }
596 
MHW_SETPAR_DECL_SRC(HCP_VP9_SEGMENT_STATE,Vp9Segmentation)597 MHW_SETPAR_DECL_SRC(HCP_VP9_SEGMENT_STATE, Vp9Segmentation)
598 {
599     ENCODE_FUNC_CALL();
600 
601     auto vp9SegData = m_vp9SegmentParams->SegData[m_segmentId];
602 
603     params.segmentId = m_segmentId;
604 
605     params.segmentSkipped          = vp9SegData.SegmentFlags.fields.SegmentSkipped;
606     params.segmentReference        = vp9SegData.SegmentFlags.fields.SegmentReference;
607     params.segmentReferenceEnabled = vp9SegData.SegmentFlags.fields.SegmentReferenceEnabled;
608 
609     params.segmentLfLevelDeltaEncodeModeOnly = m_basicFeature->Convert2SignMagnitude(vp9SegData.SegmentLFLevelDelta, 7);
610     params.segmentQindexDeltaEncodeModeOnly  = m_basicFeature->Convert2SignMagnitude(vp9SegData.SegmentQIndexDelta, 9);
611 
612     return MOS_STATUS_SUCCESS;
613 }
614 
MHW_SETPAR_DECL_SRC(VDENC_CMD2,Vp9Segmentation)615 MHW_SETPAR_DECL_SRC(VDENC_CMD2, Vp9Segmentation)
616 {
617     ENCODE_FUNC_CALL();
618 
619     MHW_MI_CHK_NULL(m_basicFeature->m_vp9PicParams);
620     auto vp9PicParams = m_basicFeature->m_vp9PicParams;
621 
622     bool segmentationEnabled = vp9PicParams->PicFlags.fields.segmentation_enabled;
623 
624     uint8_t lumaAcqIndex      = vp9PicParams->LumaACQIndex;
625     uint8_t lumaDcqIndexDelta = vp9PicParams->LumaDCQIndexDelta;
626 
627     MHW_MI_CHK_NULL(m_vp9SegmentParams);
628     MHW_VDBOX_VP9_SEGMENT_STATE segmentState;
629     MOS_ZeroMemory(&segmentState, sizeof(segmentState));
630     segmentState.pVp9EncodeSegmentParams = m_vp9SegmentParams;
631     segmentState.Mode                    = m_basicFeature->m_mode;
632     CODEC_VP9_ENCODE_SEG_PARAMS *segData = segmentState.pVp9EncodeSegmentParams->SegData;
633 
634     params.vdencStreamIn = m_segmentMapProvided || m_basicFeature->m_16xMeEnabled;
635 
636     if (segmentationEnabled)
637     {
638         // DW24
639 
640         params.qpForSegs[0] = lumaAcqIndex + segData[0].SegmentQIndexDelta;  // QpForSeg0
641         params.qpForSegs[1] = lumaAcqIndex + segData[1].SegmentQIndexDelta;  // QpForSeg1
642         params.qpForSegs[2] = lumaAcqIndex + segData[2].SegmentQIndexDelta;  // QpForSeg2
643         params.qpForSegs[3] = lumaAcqIndex + segData[3].SegmentQIndexDelta;  // QpForSeg3
644 
645         // DW25
646 
647         params.qpForSegs[4] = lumaAcqIndex + segData[4].SegmentQIndexDelta;  // QpForSeg4
648         params.qpForSegs[5] = lumaAcqIndex + segData[5].SegmentQIndexDelta;  // QpForSeg5
649         params.qpForSegs[6] = lumaAcqIndex + segData[6].SegmentQIndexDelta;  // QpForSeg6
650         params.qpForSegs[7] = lumaAcqIndex + segData[7].SegmentQIndexDelta;  // QpForSeg7
651     }
652     else  // Segmentation disabled
653     {
654         // DW24
655 
656         params.qpForSegs[0] = lumaAcqIndex + lumaDcqIndexDelta;  // QpForSeg0
657         params.qpForSegs[1] = lumaAcqIndex + lumaDcqIndexDelta;  // QpForSeg1
658         params.qpForSegs[2] = lumaAcqIndex + lumaDcqIndexDelta;  // QpForSeg2
659         params.qpForSegs[3] = lumaAcqIndex + lumaDcqIndexDelta;  // QpForSeg3
660 
661         // DW25
662 
663         params.qpForSegs[4] = lumaAcqIndex + lumaDcqIndexDelta;  // QpForSeg4
664         params.qpForSegs[5] = lumaAcqIndex + lumaDcqIndexDelta;  // QpForSeg5
665         params.qpForSegs[6] = lumaAcqIndex + lumaDcqIndexDelta;  // QpForSeg6
666         params.qpForSegs[7] = lumaAcqIndex + lumaDcqIndexDelta;  // QpForSeg7
667     }
668 
669     return MOS_STATUS_SUCCESS;
670 }
671 
672 }  // namespace encode
673