1 /*
2 * Copyright © 2021 Red Hat
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "anv_private.h"
25
26 #include "genxml/gen_macros.h"
27 #include "genxml/genX_pack.h"
28
29 #include "util/vl_zscan_data.h"
30
31 void
genX(CmdBeginVideoCodingKHR)32 genX(CmdBeginVideoCodingKHR)(VkCommandBuffer commandBuffer,
33 const VkVideoBeginCodingInfoKHR *pBeginInfo)
34 {
35 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
36 ANV_FROM_HANDLE(anv_video_session, vid, pBeginInfo->videoSession);
37 ANV_FROM_HANDLE(anv_video_session_params, params, pBeginInfo->videoSessionParameters);
38
39 cmd_buffer->video.vid = vid;
40 cmd_buffer->video.params = params;
41 }
42
43 void
genX(CmdControlVideoCodingKHR)44 genX(CmdControlVideoCodingKHR)(VkCommandBuffer commandBuffer,
45 const VkVideoCodingControlInfoKHR *pCodingControlInfo)
46 {
47 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
48
49 if (pCodingControlInfo->flags & VK_VIDEO_CODING_CONTROL_RESET_BIT_KHR) {
50 anv_batch_emit(&cmd_buffer->batch, GENX(MI_FLUSH_DW), flush) {
51 flush.VideoPipelineCacheInvalidate = 1;
52 }
53 }
54
55 if (pCodingControlInfo->flags & VK_VIDEO_CODING_CONTROL_ENCODE_RATE_CONTROL_BIT_KHR) {
56 const struct VkVideoEncodeRateControlInfoKHR *rate_control_info =
57 vk_find_struct_const(pCodingControlInfo->pNext, VIDEO_ENCODE_RATE_CONTROL_INFO_KHR);
58
59 /* Support for only CQP rate control for the moment */
60 assert((rate_control_info->rateControlMode == VK_VIDEO_ENCODE_RATE_CONTROL_MODE_DEFAULT_KHR) ||
61 (rate_control_info->rateControlMode == VK_VIDEO_ENCODE_RATE_CONTROL_MODE_DISABLED_BIT_KHR));
62
63 cmd_buffer->video.params->rc_mode = rate_control_info->rateControlMode;
64 } else {
65 cmd_buffer->video.params->rc_mode = VK_VIDEO_ENCODE_RATE_CONTROL_MODE_DEFAULT_KHR;
66 }
67 }
68
69 void
genX(CmdEndVideoCodingKHR)70 genX(CmdEndVideoCodingKHR)(VkCommandBuffer commandBuffer,
71 const VkVideoEndCodingInfoKHR *pEndCodingInfo)
72 {
73 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
74
75 cmd_buffer->video.vid = NULL;
76 cmd_buffer->video.params = NULL;
77 }
78
79 /*
80 * The default scan order of scaling lists is up-right-diagonal
81 * according to the spec. But the device requires raster order,
82 * so we need to convert from the passed scaling lists.
83 */
84 static void
anv_h265_matrix_from_uprightdiagonal(StdVideoH265ScalingLists * out_sl,const StdVideoH265ScalingLists * sl)85 anv_h265_matrix_from_uprightdiagonal(StdVideoH265ScalingLists *out_sl,
86 const StdVideoH265ScalingLists *sl)
87 {
88 uint8_t i, j;
89
90 for (i = 0; i < 6; i++) {
91 for (j = 0; j < STD_VIDEO_H265_SCALING_LIST_4X4_NUM_ELEMENTS; j++)
92 out_sl->ScalingList4x4[i][vl_zscan_h265_up_right_diagonal_16[j]] =
93 sl->ScalingList4x4[i][j];
94
95 for (j = 0; j < STD_VIDEO_H265_SCALING_LIST_8X8_NUM_ELEMENTS; j++)
96 out_sl->ScalingList8x8[i][vl_zscan_h265_up_right_diagonal[j]] =
97 sl->ScalingList8x8[i][j];
98
99 for (j = 0; j < STD_VIDEO_H265_SCALING_LIST_16X16_NUM_ELEMENTS; j++)
100 out_sl->ScalingList16x16[i][vl_zscan_h265_up_right_diagonal[j]] =
101 sl->ScalingList16x16[i][j];
102 }
103
104 for (i = 0; i < STD_VIDEO_H265_SCALING_LIST_32X32_NUM_LISTS; i++) {
105 for (j = 0; j < STD_VIDEO_H265_SCALING_LIST_32X32_NUM_ELEMENTS; j++)
106 out_sl->ScalingList32x32[i][vl_zscan_h265_up_right_diagonal[j]] =
107 sl->ScalingList32x32[i][j];
108 }
109 }
110
111 static void
scaling_list(struct anv_cmd_buffer * cmd_buffer,const StdVideoH265ScalingLists * scaling_list)112 scaling_list(struct anv_cmd_buffer *cmd_buffer,
113 const StdVideoH265ScalingLists *scaling_list)
114 {
115 StdVideoH265ScalingLists out_sl = {0, };
116
117 anv_h265_matrix_from_uprightdiagonal(&out_sl, scaling_list);
118
119 /* 4x4, 8x8, 16x16, 32x32 */
120 for (uint8_t size = 0; size < 4; size++) {
121 /* Intra, Inter */
122 for (uint8_t pred = 0; pred < 2; pred++) {
123 /* Y, Cb, Cr */
124 for (uint8_t color = 0; color < 3; color++) {
125 if (size == 3 && color > 0)
126 continue;
127
128 anv_batch_emit(&cmd_buffer->batch, GENX(HCP_QM_STATE), qm) {
129 qm.SizeID = size;
130 qm.PredictionType = pred;
131 qm.ColorComponent = color;
132
133 qm.DCCoefficient = size > 1 ?
134 (size == 2 ? scaling_list->ScalingListDCCoef16x16[3 * pred + color] :
135 scaling_list->ScalingListDCCoef32x32[pred]) : 0;
136
137 if (size == 0) {
138 for (uint8_t i = 0; i < 4; i++)
139 for (uint8_t j = 0; j < 4; j++)
140 qm.QuantizerMatrix8x8[4 * i + j] =
141 out_sl.ScalingList4x4[3 * pred + color][4 * i + j];
142 } else if (size == 1) {
143 for (uint8_t i = 0; i < 8; i++)
144 for (uint8_t j = 0; j < 8; j++)
145 qm.QuantizerMatrix8x8[8 * i + j] =
146 out_sl.ScalingList8x8[3 * pred + color][8 * i + j];
147 } else if (size == 2) {
148 for (uint8_t i = 0; i < 8; i++)
149 for (uint8_t j = 0; j < 8; j++)
150 qm.QuantizerMatrix8x8[8 * i + j] =
151 out_sl.ScalingList16x16[3 * pred + color][8 * i + j];
152 } else if (size == 3) {
153 for (uint8_t i = 0; i < 8; i++)
154 for (uint8_t j = 0; j < 8; j++)
155 qm.QuantizerMatrix8x8[8 * i + j] =
156 out_sl.ScalingList32x32[pred][8 * i + j];
157 }
158 }
159 }
160 }
161 }
162 }
163
164 static void
anv_h265_decode_video(struct anv_cmd_buffer * cmd_buffer,const VkVideoDecodeInfoKHR * frame_info)165 anv_h265_decode_video(struct anv_cmd_buffer *cmd_buffer,
166 const VkVideoDecodeInfoKHR *frame_info)
167 {
168 ANV_FROM_HANDLE(anv_buffer, src_buffer, frame_info->srcBuffer);
169 struct anv_video_session *vid = cmd_buffer->video.vid;
170 struct anv_video_session_params *params = cmd_buffer->video.params;
171
172 const struct VkVideoDecodeH265PictureInfoKHR *h265_pic_info =
173 vk_find_struct_const(frame_info->pNext, VIDEO_DECODE_H265_PICTURE_INFO_KHR);
174
175 const StdVideoH265SequenceParameterSet *sps =
176 vk_video_find_h265_dec_std_sps(¶ms->vk, h265_pic_info->pStdPictureInfo->pps_seq_parameter_set_id);
177 const StdVideoH265PictureParameterSet *pps =
178 vk_video_find_h265_dec_std_pps(¶ms->vk, h265_pic_info->pStdPictureInfo->pps_pic_parameter_set_id);
179
180 struct vk_video_h265_reference ref_slots[2][8] = { 0 };
181 uint8_t dpb_idx[ANV_VIDEO_H265_MAX_NUM_REF_FRAME] = { 0,};
182 bool is_10bit = sps->bit_depth_chroma_minus8 || sps->bit_depth_luma_minus8;
183
184 anv_batch_emit(&cmd_buffer->batch, GENX(MI_FLUSH_DW), flush) {
185 flush.VideoPipelineCacheInvalidate = 1;
186 };
187
188 #if GFX_VER >= 12
189 anv_batch_emit(&cmd_buffer->batch, GENX(MI_FORCE_WAKEUP), wake) {
190 wake.HEVCPowerWellControl = 1;
191 wake.MaskBits = 768;
192 }
193
194 anv_batch_emit(&cmd_buffer->batch, GENX(VD_CONTROL_STATE), cs) {
195 cs.PipelineInitialization = true;
196 }
197
198 anv_batch_emit(&cmd_buffer->batch, GENX(MFX_WAIT), mfx) {
199 mfx.MFXSyncControlFlag = 1;
200 }
201 #endif
202
203 anv_batch_emit(&cmd_buffer->batch, GENX(HCP_PIPE_MODE_SELECT), sel) {
204 sel.CodecSelect = Decode;
205 sel.CodecStandardSelect = HEVC;
206 }
207
208 #if GFX_VER >= 12
209 anv_batch_emit(&cmd_buffer->batch, GENX(MFX_WAIT), mfx) {
210 mfx.MFXSyncControlFlag = 1;
211 }
212 #endif
213
214 const struct anv_image_view *iv =
215 anv_image_view_from_handle(frame_info->dstPictureResource.imageViewBinding);
216 const struct anv_image *img = iv->image;
217
218 anv_batch_emit(&cmd_buffer->batch, GENX(HCP_SURFACE_STATE), ss) {
219 ss.SurfacePitch = img->planes[0].primary_surface.isl.row_pitch_B - 1;
220 ss.SurfaceID = HCP_CurrentDecodedPicture;
221 ss.SurfaceFormat = is_10bit ? P010 : PLANAR_420_8;
222
223 ss.YOffsetforUCb = img->planes[1].primary_surface.memory_range.offset /
224 img->planes[0].primary_surface.isl.row_pitch_B;
225
226 #if GFX_VER >= 11
227 ss.DefaultAlphaValue = 0xffff;
228 #endif
229 }
230
231 #if GFX_VER >= 12
232 /* Seems to need to set same states to ref as decode on gen12 */
233 anv_batch_emit(&cmd_buffer->batch, GENX(HCP_SURFACE_STATE), ss) {
234 ss.SurfacePitch = img->planes[0].primary_surface.isl.row_pitch_B - 1;
235 ss.SurfaceID = HCP_ReferencePicture;
236 ss.SurfaceFormat = is_10bit ? P010 : PLANAR_420_8;
237
238 ss.YOffsetforUCb = img->planes[1].primary_surface.memory_range.offset /
239 img->planes[0].primary_surface.isl.row_pitch_B;
240
241 ss.DefaultAlphaValue = 0xffff;
242 }
243 #endif
244
245 anv_batch_emit(&cmd_buffer->batch, GENX(HCP_PIPE_BUF_ADDR_STATE), buf) {
246 buf.DecodedPictureAddress =
247 anv_image_address(img, &img->planes[0].primary_surface.memory_range);
248
249 buf.DecodedPictureMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
250 .MOCS = anv_mocs(cmd_buffer->device, buf.DecodedPictureAddress.bo, 0),
251 };
252
253 buf.DeblockingFilterLineBufferAddress = (struct anv_address) {
254 vid->vid_mem[ANV_VID_MEM_H265_DEBLOCK_FILTER_ROW_STORE_LINE].mem->bo,
255 vid->vid_mem[ANV_VID_MEM_H265_DEBLOCK_FILTER_ROW_STORE_LINE].offset
256 };
257
258 buf.DeblockingFilterLineBufferMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
259 .MOCS = anv_mocs(cmd_buffer->device, buf.DeblockingFilterLineBufferAddress.bo, 0),
260 };
261
262 buf.DeblockingFilterTileLineBufferAddress = (struct anv_address) {
263 vid->vid_mem[ANV_VID_MEM_H265_DEBLOCK_FILTER_ROW_STORE_TILE_LINE].mem->bo,
264 vid->vid_mem[ANV_VID_MEM_H265_DEBLOCK_FILTER_ROW_STORE_TILE_LINE].offset
265 };
266
267 buf.DeblockingFilterTileLineBufferMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
268 .MOCS = anv_mocs(cmd_buffer->device, buf.DeblockingFilterTileLineBufferAddress.bo, 0),
269 };
270
271 buf.DeblockingFilterTileColumnBufferAddress = (struct anv_address) {
272 vid->vid_mem[ANV_VID_MEM_H265_DEBLOCK_FILTER_ROW_STORE_TILE_COLUMN].mem->bo,
273 vid->vid_mem[ANV_VID_MEM_H265_DEBLOCK_FILTER_ROW_STORE_TILE_COLUMN].offset
274 };
275
276 buf.DeblockingFilterTileColumnBufferMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
277 .MOCS = anv_mocs(cmd_buffer->device, buf.DeblockingFilterTileColumnBufferAddress.bo, 0),
278 };
279
280 buf.MetadataLineBufferAddress = (struct anv_address) {
281 vid->vid_mem[ANV_VID_MEM_H265_METADATA_LINE].mem->bo,
282 vid->vid_mem[ANV_VID_MEM_H265_METADATA_LINE].offset
283 };
284
285 buf.MetadataLineBufferMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
286 .MOCS = anv_mocs(cmd_buffer->device, buf.MetadataLineBufferAddress.bo, 0),
287 };
288
289 buf.MetadataTileLineBufferAddress = (struct anv_address) {
290 vid->vid_mem[ANV_VID_MEM_H265_METADATA_TILE_LINE].mem->bo,
291 vid->vid_mem[ANV_VID_MEM_H265_METADATA_TILE_LINE].offset
292 };
293
294 buf.MetadataTileLineBufferMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
295 .MOCS = anv_mocs(cmd_buffer->device, buf.MetadataTileLineBufferAddress.bo, 0),
296 };
297
298 buf.MetadataTileColumnBufferAddress = (struct anv_address) {
299 vid->vid_mem[ANV_VID_MEM_H265_METADATA_TILE_COLUMN].mem->bo,
300 vid->vid_mem[ANV_VID_MEM_H265_METADATA_TILE_COLUMN].offset
301 };
302
303 buf.MetadataTileColumnBufferMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
304 .MOCS = anv_mocs(cmd_buffer->device, buf.MetadataTileColumnBufferAddress.bo, 0),
305 };
306
307 buf.SAOLineBufferAddress = (struct anv_address) {
308 vid->vid_mem[ANV_VID_MEM_H265_SAO_LINE].mem->bo,
309 vid->vid_mem[ANV_VID_MEM_H265_SAO_LINE].offset
310 };
311
312 buf.SAOLineBufferMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
313 .MOCS = anv_mocs(cmd_buffer->device, buf.SAOLineBufferAddress.bo, 0),
314 };
315
316 buf.SAOTileLineBufferAddress = (struct anv_address) {
317 vid->vid_mem[ANV_VID_MEM_H265_SAO_TILE_LINE].mem->bo,
318 vid->vid_mem[ANV_VID_MEM_H265_SAO_TILE_LINE].offset
319 };
320
321 buf.SAOTileLineBufferMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
322 .MOCS = anv_mocs(cmd_buffer->device, buf.SAOTileLineBufferAddress.bo, 0),
323 };
324
325 buf.SAOTileColumnBufferAddress = (struct anv_address) {
326 vid->vid_mem[ANV_VID_MEM_H265_SAO_TILE_COLUMN].mem->bo,
327 vid->vid_mem[ANV_VID_MEM_H265_SAO_TILE_COLUMN].offset
328 };
329
330 buf.SAOTileColumnBufferMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
331 .MOCS = anv_mocs(cmd_buffer->device, buf.SAOTileColumnBufferAddress.bo, 0),
332 };
333
334 buf.CurrentMVTemporalBufferAddress = anv_image_address(img, &img->vid_dmv_top_surface);
335
336 buf.CurrentMVTemporalBufferMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
337 .MOCS = anv_mocs(cmd_buffer->device, buf.CurrentMVTemporalBufferAddress.bo, 0),
338 };
339
340 for (unsigned i = 0; i < frame_info->referenceSlotCount; i++) {
341 const struct anv_image_view *ref_iv =
342 anv_image_view_from_handle(frame_info->pReferenceSlots[i].pPictureResource->imageViewBinding);
343 int slot_idx = frame_info->pReferenceSlots[i].slotIndex;
344
345 assert(slot_idx < ANV_VIDEO_H265_MAX_NUM_REF_FRAME);
346 dpb_idx[slot_idx] = i;
347
348 buf.ReferencePictureAddress[i] =
349 anv_image_address(ref_iv->image, &ref_iv->image->planes[0].primary_surface.memory_range);
350 }
351
352 buf.ReferencePictureMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
353 .MOCS = anv_mocs(cmd_buffer->device, NULL, 0),
354 };
355
356 buf.OriginalUncompressedPictureSourceMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
357 .MOCS = anv_mocs(cmd_buffer->device, NULL, 0),
358 };
359
360 buf.StreamOutDataDestinationMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
361 .MOCS = anv_mocs(cmd_buffer->device, NULL, 0),
362 };
363
364 buf.DecodedPictureStatusBufferMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
365 .MOCS = anv_mocs(cmd_buffer->device, NULL, 0),
366 };
367
368 buf.LCUILDBStreamOutBufferMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
369 .MOCS = anv_mocs(cmd_buffer->device, NULL, 0),
370 };
371
372 for (unsigned i = 0; i < frame_info->referenceSlotCount; i++) {
373 const struct anv_image_view *ref_iv =
374 anv_image_view_from_handle(frame_info->pReferenceSlots[i].pPictureResource->imageViewBinding);
375
376 buf.CollocatedMVTemporalBufferAddress[i] =
377 anv_image_address(ref_iv->image, &ref_iv->image->vid_dmv_top_surface);
378 }
379
380 buf.CollocatedMVTemporalBufferMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
381 .MOCS = anv_mocs(cmd_buffer->device, buf.CollocatedMVTemporalBufferAddress[0].bo, 0),
382 };
383
384 buf.VP9ProbabilityBufferMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
385 .MOCS = anv_mocs(cmd_buffer->device, NULL, 0),
386 };
387
388 buf.VP9SegmentIDBufferMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
389 .MOCS = anv_mocs(cmd_buffer->device, NULL, 0),
390 };
391
392 buf.VP9HVDLineRowStoreBufferMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
393 .MOCS = anv_mocs(cmd_buffer->device, NULL, 0),
394 };
395
396 buf.VP9HVDTileRowStoreBufferMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
397 .MOCS = anv_mocs(cmd_buffer->device, NULL, 0),
398 };
399 #if GFX_VER >= 11
400 buf.SAOStreamOutDataDestinationBufferMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
401 .MOCS = anv_mocs(cmd_buffer->device, NULL, 0),
402 };
403 buf.FrameStatisticsStreamOutDataDestinationBufferMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
404 .MOCS = anv_mocs(cmd_buffer->device, NULL, 0),
405 };
406 buf.SSESourcePixelRowStoreBufferMemoryAddressAttributesReadWrite = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
407 .MOCS = anv_mocs(cmd_buffer->device, NULL, 0),
408 };
409 buf.HCPScalabilitySliceStateBufferMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
410 .MOCS = anv_mocs(cmd_buffer->device, NULL, 0),
411 };
412 buf.HCPScalabilityCABACDecodedSyntaxElementsBufferMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
413 .MOCS = anv_mocs(cmd_buffer->device, NULL, 0),
414 };
415 buf.MVUpperRightColumnStoreBufferMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
416 .MOCS = anv_mocs(cmd_buffer->device, NULL, 0),
417 };
418 buf.IntraPredictionUpperRightColumnStoreBufferMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
419 .MOCS = anv_mocs(cmd_buffer->device, NULL, 0),
420 };
421 buf.IntraPredictionLeftReconColumnStoreBufferMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
422 .MOCS = anv_mocs(cmd_buffer->device, NULL, 0),
423 };
424 #endif
425 }
426
427 anv_batch_emit(&cmd_buffer->batch, GENX(HCP_IND_OBJ_BASE_ADDR_STATE), indirect) {
428 indirect.HCPIndirectBitstreamObjectBaseAddress =
429 anv_address_add(src_buffer->address, frame_info->srcBufferOffset & ~4095);
430
431 indirect.HCPIndirectBitstreamObjectMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
432 .MOCS = anv_mocs(cmd_buffer->device, src_buffer->address.bo, 0),
433 };
434
435 indirect.HCPIndirectBitstreamObjectAccessUpperBound =
436 anv_address_add(src_buffer->address, align64(frame_info->srcBufferRange, 4096));
437
438 indirect.HCPIndirectCUObjectMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
439 .MOCS = anv_mocs(cmd_buffer->device, NULL, 0),
440 };
441
442 indirect.HCPPAKBSEObjectMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
443 .MOCS = anv_mocs(cmd_buffer->device, NULL, 0),
444 };
445
446 #if GFX_VER >= 11
447 indirect.HCPVP9PAKCompressedHeaderSyntaxStreamInMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
448 .MOCS = anv_mocs(cmd_buffer->device, NULL, 0),
449 };
450 indirect.HCPVP9PAKProbabilityCounterStreamOutMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
451 .MOCS = anv_mocs(cmd_buffer->device, NULL, 0),
452 };
453 indirect.HCPVP9PAKProbabilityDeltasStreamInMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
454 .MOCS = anv_mocs(cmd_buffer->device, NULL, 0),
455 };
456 indirect.HCPVP9PAKTileRecordStreamOutMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
457 .MOCS = anv_mocs(cmd_buffer->device, NULL, 0),
458 };
459 indirect.HCPVP9PAKCULevelStatisticStreamOutMemoryAddressAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
460 .MOCS = anv_mocs(cmd_buffer->device, NULL, 0),
461 };
462 #endif
463 }
464
465 if (sps->flags.scaling_list_enabled_flag) {
466 if (pps->flags.pps_scaling_list_data_present_flag) {
467 scaling_list(cmd_buffer, pps->pScalingLists);
468 } else if (sps->flags.sps_scaling_list_data_present_flag) {
469 scaling_list(cmd_buffer, sps->pScalingLists);
470 }
471 } else {
472 for (uint8_t size = 0; size < 4; size++) {
473 for (uint8_t pred = 0; pred < 2; pred++) {
474 for (uint8_t color = 0; color < 3; color++) {
475
476 if (size == 3 && color > 0)
477 continue;
478
479 anv_batch_emit(&cmd_buffer->batch, GENX(HCP_QM_STATE), qm) {
480 qm.SizeID = size;
481 qm.PredictionType = pred;
482 qm.ColorComponent = color;
483 qm.DCCoefficient = (size > 1) ? 16 : 0;
484 unsigned len = (size == 0) ? 16 : 64;
485
486 for (uint8_t q = 0; q < len; q++)
487 qm.QuantizerMatrix8x8[q] = 0x10;
488 }
489 }
490 }
491 }
492 }
493
494 anv_batch_emit(&cmd_buffer->batch, GENX(HCP_PIC_STATE), pic) {
495 pic.FrameWidthInMinimumCodingBlockSize =
496 sps->pic_width_in_luma_samples / (1 << (sps->log2_min_luma_coding_block_size_minus3 + 3)) - 1;
497 pic.FrameHeightInMinimumCodingBlockSize =
498 sps->pic_height_in_luma_samples / (1 << (sps->log2_min_luma_coding_block_size_minus3 + 3)) - 1;
499
500 pic.MinCUSize = sps->log2_min_luma_coding_block_size_minus3 & 0x3;
501 pic.LCUSize = (sps->log2_diff_max_min_luma_coding_block_size +
502 sps->log2_min_luma_coding_block_size_minus3) & 0x3;
503
504 pic.MinTUSize = sps->log2_min_luma_transform_block_size_minus2 & 0x3;
505 pic.MaxTUSize = (sps->log2_diff_max_min_luma_transform_block_size + sps->log2_min_luma_transform_block_size_minus2) & 0x3;
506 pic.MinPCMSize = sps->log2_min_pcm_luma_coding_block_size_minus3 & 0x3;
507 pic.MaxPCMSize = (sps->log2_diff_max_min_pcm_luma_coding_block_size + sps->log2_min_pcm_luma_coding_block_size_minus3) & 0x3;
508
509 #if GFX_VER >= 11
510 pic.Log2SAOOffsetScaleLuma = pps->log2_sao_offset_scale_luma;
511 pic.Log2SAOOffsetScaleChroma = pps->log2_sao_offset_scale_chroma;
512 pic.ChromaQPOffsetListLength = pps->chroma_qp_offset_list_len_minus1;
513 pic.DiffCUChromaQPOffsetDepth = pps->diff_cu_chroma_qp_offset_depth;
514 pic.ChromaQPOffsetListEnable = pps->flags.chroma_qp_offset_list_enabled_flag;
515 pic.ChromaSubsampling = sps->chroma_format_idc;
516
517 pic.HighPrecisionOffsetsEnable = sps->flags.high_precision_offsets_enabled_flag;
518 pic.Log2MaxTransformSkipSize = pps->log2_max_transform_skip_block_size_minus2 + 2;
519 pic.CrossComponentPredictionEnable = pps->flags.cross_component_prediction_enabled_flag;
520 pic.CABACBypassAlignmentEnable = sps->flags.cabac_bypass_alignment_enabled_flag;
521 pic.PersistentRiceAdaptationEnable = sps->flags.persistent_rice_adaptation_enabled_flag;
522 pic.IntraSmoothingDisable = sps->flags.intra_smoothing_disabled_flag;
523 pic.ExplicitRDPCMEnable = sps->flags.explicit_rdpcm_enabled_flag;
524 pic.ImplicitRDPCMEnable = sps->flags.implicit_rdpcm_enabled_flag;
525 pic.TransformSkipContextEnable = sps->flags.transform_skip_context_enabled_flag;
526 pic.TransformSkipRotationEnable = sps->flags.transform_skip_rotation_enabled_flag;
527 pic.SPSRangeExtensionEnable = sps->flags.sps_range_extension_flag;
528 #endif
529
530 pic.CollocatedPictureIsISlice = false;
531 pic.CurrentPictureIsISlice = false;
532 pic.SampleAdaptiveOffsetEnable = sps->flags.sample_adaptive_offset_enabled_flag;
533 pic.PCMEnable = sps->flags.pcm_enabled_flag;
534 pic.CUQPDeltaEnable = pps->flags.cu_qp_delta_enabled_flag;
535 pic.MaxDQPDepth = pps->diff_cu_qp_delta_depth;
536 pic.PCMLoopFilterDisable = sps->flags.pcm_loop_filter_disabled_flag;
537 pic.ConstrainedIntraPrediction = pps->flags.constrained_intra_pred_flag;
538 pic.Log2ParallelMergeLevel = pps->log2_parallel_merge_level_minus2;
539 pic.SignDataHiding = pps->flags.sign_data_hiding_enabled_flag;
540 pic.LoopFilterEnable = pps->flags.loop_filter_across_tiles_enabled_flag;
541 pic.EntropyCodingSyncEnable = pps->flags.entropy_coding_sync_enabled_flag;
542 pic.TilingEnable = pps->flags.tiles_enabled_flag;
543 pic.WeightedBiPredicationEnable = pps->flags.weighted_bipred_flag;
544 pic.WeightedPredicationEnable = pps->flags.weighted_pred_flag;
545 pic.FieldPic = 0;
546 pic.TopField = true;
547 pic.TransformSkipEnable = pps->flags.transform_skip_enabled_flag;
548 pic.AMPEnable = sps->flags.amp_enabled_flag;
549 pic.TransquantBypassEnable = pps->flags.transquant_bypass_enabled_flag;
550 pic.StrongIntraSmoothingEnable = sps->flags.strong_intra_smoothing_enabled_flag;
551 pic.CUPacketStructure = 0;
552
553 pic.PictureCbQPOffset = pps->pps_cb_qp_offset;
554 pic.PictureCrQPOffset = pps->pps_cr_qp_offset;
555 pic.IntraMaxTransformHierarchyDepth = sps->max_transform_hierarchy_depth_intra;
556 pic.InterMaxTransformHierarchyDepth = sps->max_transform_hierarchy_depth_inter;
557 pic.ChromaPCMSampleBitDepth = sps->pcm_sample_bit_depth_chroma_minus1 & 0xf;
558 pic.LumaPCMSampleBitDepth = sps->pcm_sample_bit_depth_luma_minus1 & 0xf;
559
560 pic.ChromaBitDepth = sps->bit_depth_chroma_minus8;
561 pic.LumaBitDepth = sps->bit_depth_luma_minus8;
562
563 #if GFX_VER >= 11
564 pic.CbQPOffsetList0 = pps->cb_qp_offset_list[0];
565 pic.CbQPOffsetList1 = pps->cb_qp_offset_list[1];
566 pic.CbQPOffsetList2 = pps->cb_qp_offset_list[2];
567 pic.CbQPOffsetList3 = pps->cb_qp_offset_list[3];
568 pic.CbQPOffsetList4 = pps->cb_qp_offset_list[4];
569 pic.CbQPOffsetList5 = pps->cb_qp_offset_list[5];
570
571 pic.CrQPOffsetList0 = pps->cr_qp_offset_list[0];
572 pic.CrQPOffsetList1 = pps->cr_qp_offset_list[1];
573 pic.CrQPOffsetList2 = pps->cr_qp_offset_list[2];
574 pic.CrQPOffsetList3 = pps->cr_qp_offset_list[3];
575 pic.CrQPOffsetList4 = pps->cr_qp_offset_list[4];
576 pic.CrQPOffsetList5 = pps->cr_qp_offset_list[5];
577 #endif
578 }
579
580 if (pps->flags.tiles_enabled_flag) {
581 int cum = 0;
582 anv_batch_emit(&cmd_buffer->batch, GENX(HCP_TILE_STATE), tile) {
583 tile.NumberofTileColumns = pps->num_tile_columns_minus1;
584 tile.NumberofTileRows = pps->num_tile_rows_minus1;
585 for (unsigned i = 0; i < 5; i++) {
586 tile.ColumnPosition[i].CtbPos0i = cum;
587 if ((4 * i) == pps->num_tile_columns_minus1)
588 break;
589
590 cum += pps->column_width_minus1[4 * i] + 1;
591 tile.ColumnPosition[i].CtbPos1i = cum;
592
593 if ((4 * i + 1) == pps->num_tile_columns_minus1)
594 break;
595 cum += pps->column_width_minus1[4 * i + 1] + 1;
596 tile.ColumnPosition[i].CtbPos2i = cum;
597
598 if ((4 * i + 2) == pps->num_tile_columns_minus1)
599 break;
600 cum += pps->column_width_minus1[4 * i + 2] + 1;
601 tile.ColumnPosition[i].CtbPos3i = cum;
602
603 if ((4 * i + 3) >= MIN2(pps->num_tile_columns_minus1,
604 ARRAY_SIZE(pps->column_width_minus1)))
605 break;
606
607 cum += pps->column_width_minus1[4 * i + 3] + 1;
608 }
609
610 cum = 0;
611
612 for (unsigned i = 0; i < 5; i++) {
613 tile.Rowposition[i].CtbPos0i = cum;
614 if ((4 * i) == pps->num_tile_rows_minus1)
615 break;
616
617 cum += pps->row_height_minus1[4 * i] + 1;
618 tile.Rowposition[i].CtbPos1i = cum;
619
620 if ((4 * i + 1) == pps->num_tile_rows_minus1)
621 break;
622 cum += pps->row_height_minus1[4 * i + 1] + 1;
623 tile.Rowposition[i].CtbPos2i = cum;
624
625 if ((4 * i + 2) == pps->num_tile_rows_minus1)
626 break;
627 cum += pps->row_height_minus1[4 * i + 2] + 1;
628 tile.Rowposition[i].CtbPos3i = cum;
629
630 if ((4 * i + 3) == pps->num_tile_rows_minus1)
631 break;
632
633 cum += pps->row_height_minus1[4 * i + 3] + 1;
634 }
635
636 if (pps->num_tile_rows_minus1 == 20) {
637 tile.Rowposition[5].CtbPos0i = cum;
638 }
639 if (pps->num_tile_rows_minus1 == 20) {
640 tile.Rowposition[5].CtbPos0i = cum;
641 cum += pps->row_height_minus1[20] + 1;
642 tile.Rowposition[5].CtbPos1i = cum;
643 }
644 }
645 }
646
647 /* Slice parsing */
648 uint32_t last_slice = h265_pic_info->sliceSegmentCount - 1;
649 void *slice_map;
650 VkResult result =
651 anv_device_map_bo(cmd_buffer->device,
652 src_buffer->address.bo,
653 src_buffer->address.offset,
654 frame_info->srcBufferRange + frame_info->srcBufferOffset,
655 NULL /* placed_addr */,
656 &slice_map);
657 if (result != VK_SUCCESS) {
658 anv_batch_set_error(&cmd_buffer->batch, result);
659 return;
660 }
661
662 slice_map += frame_info->srcBufferOffset;
663
664 struct vk_video_h265_slice_params slice_params[h265_pic_info->sliceSegmentCount];
665
666 /* All slices should be parsed in advance to collect information necessary */
667 for (unsigned s = 0; s < h265_pic_info->sliceSegmentCount; s++) {
668 uint32_t current_offset = h265_pic_info->pSliceSegmentOffsets[s];
669 void *map = slice_map + current_offset;
670 uint32_t slice_size = 0;
671
672 if (s == last_slice)
673 slice_size = frame_info->srcBufferRange - current_offset;
674 else
675 slice_size = h265_pic_info->pSliceSegmentOffsets[s + 1] - current_offset;
676
677 vk_video_parse_h265_slice_header(frame_info, h265_pic_info, sps, pps, map, slice_size, &slice_params[s]);
678 vk_fill_video_h265_reference_info(frame_info, h265_pic_info, &slice_params[s], ref_slots);
679 }
680
681 anv_device_unmap_bo(cmd_buffer->device, src_buffer->address.bo,
682 slice_map, frame_info->srcBufferRange,
683 false /* replace */);
684
685 for (unsigned s = 0; s < h265_pic_info->sliceSegmentCount; s++) {
686 uint32_t ctb_size = 1 << (sps->log2_diff_max_min_luma_coding_block_size +
687 sps->log2_min_luma_coding_block_size_minus3 + 3);
688 uint32_t pic_width_in_min_cbs_y = sps->pic_width_in_luma_samples /
689 (1 << (sps->log2_min_luma_coding_block_size_minus3 + 3));
690 uint32_t width_in_pix = (1 << (sps->log2_min_luma_coding_block_size_minus3 + 3)) *
691 pic_width_in_min_cbs_y;
692 uint32_t ctb_w = DIV_ROUND_UP(width_in_pix, ctb_size);
693 bool is_last = (s == last_slice);
694 int slice_qp = (slice_params[s].slice_qp_delta + pps->init_qp_minus26 + 26) & 0x3f;
695
696 anv_batch_emit(&cmd_buffer->batch, GENX(HCP_SLICE_STATE), slice) {
697 slice.SliceHorizontalPosition = slice_params[s].slice_segment_address % ctb_w;
698 slice.SliceVerticalPosition = slice_params[s].slice_segment_address / ctb_w;
699
700 if (is_last) {
701 slice.NextSliceHorizontalPosition = 0;
702 slice.NextSliceVerticalPosition = 0;
703 } else {
704 slice.NextSliceHorizontalPosition = (slice_params[s + 1].slice_segment_address) % ctb_w;
705 slice.NextSliceVerticalPosition = (slice_params[s + 1].slice_segment_address) / ctb_w;
706 }
707
708 slice.SliceType = slice_params[s].slice_type;
709 slice.LastSlice = is_last;
710 slice.DependentSlice = slice_params[s].dependent_slice_segment;
711 slice.SliceTemporalMVPEnable = slice_params[s].temporal_mvp_enable;
712 slice.SliceQP = abs(slice_qp);
713 slice.SliceQPSign = slice_qp >= 0 ? 0 : 1;
714 slice.SliceCbQPOffset = slice_params[s].slice_cb_qp_offset;
715 slice.SliceCrQPOffset = slice_params[s].slice_cr_qp_offset;
716 slice.SliceHeaderDisableDeblockingFilter = pps->flags.deblocking_filter_override_enabled_flag ?
717 slice_params[s].disable_deblocking_filter_idc : pps->flags.pps_deblocking_filter_disabled_flag;
718 slice.SliceTCOffsetDiv2 = slice_params[s].tc_offset_div2;
719 slice.SliceBetaOffsetDiv2 = slice_params[s].beta_offset_div2;
720 slice.SliceLoopFilterEnable = slice_params[s].loop_filter_across_slices_enable;
721 slice.SliceSAOChroma = slice_params[s].sao_chroma_flag;
722 slice.SliceSAOLuma = slice_params[s].sao_luma_flag;
723 slice.MVDL1Zero = slice_params[s].mvd_l1_zero_flag;
724
725 uint8_t low_delay = true;
726
727 if (slice_params[s].slice_type == STD_VIDEO_H265_SLICE_TYPE_I) {
728 low_delay = false;
729 } else {
730 for (unsigned i = 0; i < slice_params[s].num_ref_idx_l0_active; i++) {
731 int slot_idx = ref_slots[0][i].slot_index;
732
733 if (vk_video_h265_poc_by_slot(frame_info, slot_idx) >
734 h265_pic_info->pStdPictureInfo->PicOrderCntVal) {
735 low_delay = false;
736 break;
737 }
738 }
739
740 for (unsigned i = 0; i < slice_params[s].num_ref_idx_l1_active; i++) {
741 int slot_idx = ref_slots[1][i].slot_index;
742 if (vk_video_h265_poc_by_slot(frame_info, slot_idx) >
743 h265_pic_info->pStdPictureInfo->PicOrderCntVal) {
744 low_delay = false;
745 break;
746 }
747 }
748 }
749
750 slice.LowDelay = low_delay;
751 slice.CollocatedFromL0 = slice_params[s].collocated_list == 0 ? true : false;
752 slice.Log2WeightDenominatorChroma = slice_params[s].luma_log2_weight_denom +
753 (slice_params[s].chroma_log2_weight_denom - slice_params[s].luma_log2_weight_denom);
754 slice.Log2WeightDenominatorLuma = slice_params[s].luma_log2_weight_denom;
755 slice.CABACInit = slice_params[s].cabac_init_idc;
756 slice.MaxMergeIndex = slice_params[s].max_num_merge_cand - 1;
757 slice.CollocatedMVTemporalBufferIndex =
758 dpb_idx[ref_slots[slice_params[s].collocated_list][slice_params[s].collocated_ref_idx].slot_index];
759 assert(slice.CollocatedMVTemporalBufferIndex < ANV_VIDEO_H265_HCP_NUM_REF_FRAME);
760
761 slice.SliceHeaderLength = slice_params[s].slice_data_bytes_offset;
762 slice.CABACZeroWordInsertionEnable = false;
763 slice.EmulationByteSliceInsertEnable = false;
764 slice.TailInsertionPresent = false;
765 slice.SliceDataInsertionPresent = false;
766 slice.HeaderInsertionPresent = false;
767
768 slice.IndirectPAKBSEDataStartOffset = 0;
769 slice.TransformSkipLambda = 0;
770 slice.TransformSkipNumberofNonZeroCoeffsFactor0 = 0;
771 slice.TransformSkipNumberofZeroCoeffsFactor0 = 0;
772 slice.TransformSkipNumberofNonZeroCoeffsFactor1 = 0;
773 slice.TransformSkipNumberofZeroCoeffsFactor1 = 0;
774
775 #if GFX_VER >= 12
776 slice.OriginalSliceStartCtbX = slice_params[s].slice_segment_address % ctb_w;
777 slice.OriginalSliceStartCtbY = slice_params[s].slice_segment_address / ctb_w;
778 #endif
779 }
780
781 if (slice_params[s].slice_type != STD_VIDEO_H265_SLICE_TYPE_I) {
782 anv_batch_emit(&cmd_buffer->batch, GENX(HCP_REF_IDX_STATE), ref) {
783 ref.ReferencePictureListSelect = 0;
784 ref.NumberofReferenceIndexesActive = slice_params[s].num_ref_idx_l0_active - 1;
785
786 for (unsigned i = 0; i < ref.NumberofReferenceIndexesActive + 1; i++) {
787 int slot_idx = ref_slots[0][i].slot_index;
788 unsigned poc = ref_slots[0][i].pic_order_cnt;
789 int32_t diff_poc = h265_pic_info->pStdPictureInfo->PicOrderCntVal - poc;
790
791 assert(dpb_idx[slot_idx] < ANV_VIDEO_H265_HCP_NUM_REF_FRAME);
792
793 ref.ReferenceListEntry[i].ListEntry = dpb_idx[slot_idx];
794 ref.ReferenceListEntry[i].ReferencePicturetbValue = CLAMP(diff_poc, -128, 127) & 0xff;
795 ref.ReferenceListEntry[i].TopField = true;
796 }
797 }
798 }
799
800 if (slice_params[s].slice_type == STD_VIDEO_H265_SLICE_TYPE_B) {
801 anv_batch_emit(&cmd_buffer->batch, GENX(HCP_REF_IDX_STATE), ref) {
802 ref.ReferencePictureListSelect = 1;
803 ref.NumberofReferenceIndexesActive = slice_params[s].num_ref_idx_l1_active - 1;
804
805 for (unsigned i = 0; i < ref.NumberofReferenceIndexesActive + 1; i++) {
806 int slot_idx = ref_slots[1][i].slot_index;;
807 unsigned poc = ref_slots[1][i].pic_order_cnt;
808 int32_t diff_poc = h265_pic_info->pStdPictureInfo->PicOrderCntVal - poc;
809
810 assert(dpb_idx[slot_idx] < ANV_VIDEO_H265_HCP_NUM_REF_FRAME);
811
812 ref.ReferenceListEntry[i].ListEntry = dpb_idx[slot_idx];
813 ref.ReferenceListEntry[i].ReferencePicturetbValue = CLAMP(diff_poc, -128, 127) & 0xff;
814 ref.ReferenceListEntry[i].TopField = true;
815 }
816 }
817 }
818
819 if ((pps->flags.weighted_pred_flag && (slice_params[s].slice_type == STD_VIDEO_H265_SLICE_TYPE_P)) ||
820 (pps->flags.weighted_bipred_flag && (slice_params[s].slice_type == STD_VIDEO_H265_SLICE_TYPE_B))) {
821 anv_batch_emit(&cmd_buffer->batch, GENX(HCP_WEIGHTOFFSET_STATE), w) {
822 w.ReferencePictureListSelect = 0;
823
824 for (unsigned i = 0; i < ANV_VIDEO_H265_MAX_NUM_REF_FRAME; i++) {
825 w.LumaOffsets[i].DeltaLumaWeightLX = slice_params[s].delta_luma_weight_l0[i] & 0xff;
826 w.LumaOffsets[i].LumaOffsetLX = slice_params[s].luma_offset_l0[i] & 0xff;
827 w.ChromaOffsets[i].DeltaChromaWeightLX0 = slice_params[s].delta_chroma_weight_l0[i][0] & 0xff;
828 w.ChromaOffsets[i].ChromaOffsetLX0 = slice_params[s].chroma_offset_l0[i][0] & 0xff;
829 w.ChromaOffsets[i].DeltaChromaWeightLX1 = slice_params[s].delta_chroma_weight_l0[i][1] & 0xff;
830 w.ChromaOffsets[i].ChromaOffsetLX1 = slice_params[s].chroma_offset_l0[i][1] & 0xff;
831 }
832 }
833
834 if (slice_params[s].slice_type == STD_VIDEO_H265_SLICE_TYPE_B) {
835 anv_batch_emit(&cmd_buffer->batch, GENX(HCP_WEIGHTOFFSET_STATE), w) {
836 w.ReferencePictureListSelect = 1;
837
838 for (unsigned i = 0; i < ANV_VIDEO_H265_MAX_NUM_REF_FRAME; i++) {
839 w.LumaOffsets[i].DeltaLumaWeightLX = slice_params[s].delta_luma_weight_l1[i] & 0xff;
840 w.LumaOffsets[i].LumaOffsetLX = slice_params[s].luma_offset_l1[i] & 0xff;
841 w.ChromaOffsets[i].DeltaChromaWeightLX0 = slice_params[s].delta_chroma_weight_l1[i][0] & 0xff;
842 w.ChromaOffsets[i].DeltaChromaWeightLX1 = slice_params[s].delta_chroma_weight_l1[i][1] & 0xff;
843 w.ChromaOffsets[i].ChromaOffsetLX0 = slice_params[s].chroma_offset_l1[i][0] & 0xff;
844 w.ChromaOffsets[i].ChromaOffsetLX1 = slice_params[s].chroma_offset_l1[i][1] & 0xff;
845 }
846 }
847 }
848 }
849
850 uint32_t buffer_offset = frame_info->srcBufferOffset & 4095;
851
852 anv_batch_emit(&cmd_buffer->batch, GENX(HCP_BSD_OBJECT), bsd) {
853 bsd.IndirectBSDDataLength = slice_params[s].slice_size - 3;
854 bsd.IndirectBSDDataStartAddress = buffer_offset + h265_pic_info->pSliceSegmentOffsets[s] + 3;
855 }
856 }
857
858 #if GFX_VER >= 12
859 anv_batch_emit(&cmd_buffer->batch, GENX(VD_CONTROL_STATE), cs) {
860 cs.MemoryImplicitFlush = true;
861 }
862 #endif
863
864 anv_batch_emit(&cmd_buffer->batch, GENX(VD_PIPELINE_FLUSH), flush) {
865 flush.HEVCPipelineDone = true;
866 flush.HEVCPipelineCommandFlush = true;
867 flush.VDCommandMessageParserDone = true;
868 }
869 }
870
871 static void
anv_h264_decode_video(struct anv_cmd_buffer * cmd_buffer,const VkVideoDecodeInfoKHR * frame_info)872 anv_h264_decode_video(struct anv_cmd_buffer *cmd_buffer,
873 const VkVideoDecodeInfoKHR *frame_info)
874 {
875 ANV_FROM_HANDLE(anv_buffer, src_buffer, frame_info->srcBuffer);
876 struct anv_video_session *vid = cmd_buffer->video.vid;
877 struct anv_video_session_params *params = cmd_buffer->video.params;
878 const struct VkVideoDecodeH264PictureInfoKHR *h264_pic_info =
879 vk_find_struct_const(frame_info->pNext, VIDEO_DECODE_H264_PICTURE_INFO_KHR);
880 const StdVideoH264SequenceParameterSet *sps = vk_video_find_h264_dec_std_sps(¶ms->vk, h264_pic_info->pStdPictureInfo->seq_parameter_set_id);
881 const StdVideoH264PictureParameterSet *pps = vk_video_find_h264_dec_std_pps(¶ms->vk, h264_pic_info->pStdPictureInfo->pic_parameter_set_id);
882
883 anv_batch_emit(&cmd_buffer->batch, GENX(MI_FLUSH_DW), flush) {
884 flush.DWordLength = 2;
885 flush.VideoPipelineCacheInvalidate = 1;
886 };
887
888 #if GFX_VER >= 12
889 anv_batch_emit(&cmd_buffer->batch, GENX(MI_FORCE_WAKEUP), wake) {
890 wake.MFXPowerWellControl = 1;
891 wake.MaskBits = 768;
892 }
893
894 anv_batch_emit(&cmd_buffer->batch, GENX(MFX_WAIT), mfx) {
895 mfx.MFXSyncControlFlag = 1;
896 }
897 #endif
898
899 anv_batch_emit(&cmd_buffer->batch, GENX(MFX_PIPE_MODE_SELECT), sel) {
900 sel.StandardSelect = SS_AVC;
901 sel.CodecSelect = Decode;
902 sel.DecoderShortFormatMode = ShortFormatDriverInterface;
903 sel.DecoderModeSelect = VLDMode; // Hardcoded
904
905 sel.PreDeblockingOutputEnable = 0;
906 sel.PostDeblockingOutputEnable = 1;
907 }
908
909 #if GFX_VER >= 12
910 anv_batch_emit(&cmd_buffer->batch, GENX(MFX_WAIT), mfx) {
911 mfx.MFXSyncControlFlag = 1;
912 }
913 #endif
914
915 const struct anv_image_view *iv = anv_image_view_from_handle(frame_info->dstPictureResource.imageViewBinding);
916 const struct anv_image *img = iv->image;
917 anv_batch_emit(&cmd_buffer->batch, GENX(MFX_SURFACE_STATE), ss) {
918 ss.Width = img->vk.extent.width - 1;
919 ss.Height = img->vk.extent.height - 1;
920 ss.SurfaceFormat = PLANAR_420_8; // assert on this?
921 ss.InterleaveChroma = 1;
922 ss.SurfacePitch = img->planes[0].primary_surface.isl.row_pitch_B - 1;
923 ss.TiledSurface = img->planes[0].primary_surface.isl.tiling != ISL_TILING_LINEAR;
924 ss.TileWalk = TW_YMAJOR;
925
926 ss.YOffsetforUCb = ss.YOffsetforVCr =
927 img->planes[1].primary_surface.memory_range.offset / img->planes[0].primary_surface.isl.row_pitch_B;
928 }
929
930 anv_batch_emit(&cmd_buffer->batch, GENX(MFX_PIPE_BUF_ADDR_STATE), buf) {
931 bool use_pre_deblock = false;
932 if (use_pre_deblock) {
933 buf.PreDeblockingDestinationAddress = anv_image_address(img,
934 &img->planes[0].primary_surface.memory_range);
935 } else {
936 buf.PostDeblockingDestinationAddress = anv_image_address(img,
937 &img->planes[0].primary_surface.memory_range);
938 }
939 buf.PreDeblockingDestinationAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
940 .MOCS = anv_mocs(cmd_buffer->device, buf.PreDeblockingDestinationAddress.bo, 0),
941 };
942 buf.PostDeblockingDestinationAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
943 .MOCS = anv_mocs(cmd_buffer->device, buf.PostDeblockingDestinationAddress.bo, 0),
944 };
945
946 buf.IntraRowStoreScratchBufferAddress = (struct anv_address) { vid->vid_mem[ANV_VID_MEM_H264_INTRA_ROW_STORE].mem->bo, vid->vid_mem[ANV_VID_MEM_H264_INTRA_ROW_STORE].offset };
947 buf.IntraRowStoreScratchBufferAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
948 .MOCS = anv_mocs(cmd_buffer->device, buf.IntraRowStoreScratchBufferAddress.bo, 0),
949 };
950 buf.DeblockingFilterRowStoreScratchAddress = (struct anv_address) { vid->vid_mem[ANV_VID_MEM_H264_DEBLOCK_FILTER_ROW_STORE].mem->bo, vid->vid_mem[ANV_VID_MEM_H264_DEBLOCK_FILTER_ROW_STORE].offset };
951 buf.DeblockingFilterRowStoreScratchAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
952 .MOCS = anv_mocs(cmd_buffer->device, buf.DeblockingFilterRowStoreScratchAddress.bo, 0),
953 };
954 buf.MBStatusBufferAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
955 .MOCS = anv_mocs(cmd_buffer->device, NULL, 0),
956 };
957 buf.MBILDBStreamOutBufferAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
958 .MOCS = anv_mocs(cmd_buffer->device, NULL, 0),
959 };
960 buf.SecondMBILDBStreamOutBufferAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
961 .MOCS = anv_mocs(cmd_buffer->device, NULL, 0),
962 };
963 buf.ScaledReferenceSurfaceAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
964 .MOCS = anv_mocs(cmd_buffer->device, NULL, 0),
965 };
966 buf.OriginalUncompressedPictureSourceAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
967 .MOCS = anv_mocs(cmd_buffer->device, NULL, 0),
968 };
969 buf.StreamOutDataDestinationAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
970 .MOCS = anv_mocs(cmd_buffer->device, NULL, 0),
971 };
972
973 struct anv_bo *ref_bo = NULL;
974 for (unsigned i = 0; i < frame_info->referenceSlotCount; i++) {
975 const struct anv_image_view *ref_iv = anv_image_view_from_handle(frame_info->pReferenceSlots[i].pPictureResource->imageViewBinding);
976 int idx = frame_info->pReferenceSlots[i].slotIndex;
977 buf.ReferencePictureAddress[idx] = anv_image_address(ref_iv->image,
978 &ref_iv->image->planes[0].primary_surface.memory_range);
979
980 if (i == 0) {
981 ref_bo = ref_iv->image->bindings[0].address.bo;
982 }
983 }
984 buf.ReferencePictureAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
985 .MOCS = anv_mocs(cmd_buffer->device, ref_bo, 0),
986 };
987 }
988
989 anv_batch_emit(&cmd_buffer->batch, GENX(MFX_IND_OBJ_BASE_ADDR_STATE), index_obj) {
990 index_obj.MFXIndirectBitstreamObjectAddress = anv_address_add(src_buffer->address,
991 frame_info->srcBufferOffset & ~4095);
992 index_obj.MFXIndirectBitstreamObjectAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
993 .MOCS = anv_mocs(cmd_buffer->device, src_buffer->address.bo, 0),
994 };
995 index_obj.MFXIndirectMVObjectAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
996 .MOCS = anv_mocs(cmd_buffer->device, NULL, 0),
997 };
998 index_obj.MFDIndirectITCOEFFObjectAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
999 .MOCS = anv_mocs(cmd_buffer->device, NULL, 0),
1000 };
1001 index_obj.MFDIndirectITDBLKObjectAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
1002 .MOCS = anv_mocs(cmd_buffer->device, NULL, 0),
1003 };
1004 index_obj.MFCIndirectPAKBSEObjectAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
1005 .MOCS = anv_mocs(cmd_buffer->device, NULL, 0),
1006 };
1007 }
1008
1009 anv_batch_emit(&cmd_buffer->batch, GENX(MFX_BSP_BUF_BASE_ADDR_STATE), bsp) {
1010 bsp.BSDMPCRowStoreScratchBufferAddress = (struct anv_address) { vid->vid_mem[ANV_VID_MEM_H264_BSD_MPC_ROW_SCRATCH].mem->bo,
1011 vid->vid_mem[ANV_VID_MEM_H264_BSD_MPC_ROW_SCRATCH].offset };
1012
1013 bsp.BSDMPCRowStoreScratchBufferAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
1014 .MOCS = anv_mocs(cmd_buffer->device, bsp.BSDMPCRowStoreScratchBufferAddress.bo, 0),
1015 };
1016 bsp.MPRRowStoreScratchBufferAddress = (struct anv_address) { vid->vid_mem[ANV_VID_MEM_H264_MPR_ROW_SCRATCH].mem->bo,
1017 vid->vid_mem[ANV_VID_MEM_H264_MPR_ROW_SCRATCH].offset };
1018
1019 bsp.MPRRowStoreScratchBufferAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
1020 .MOCS = anv_mocs(cmd_buffer->device, bsp.MPRRowStoreScratchBufferAddress.bo, 0),
1021 };
1022 bsp.BitplaneReadBufferAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
1023 .MOCS = anv_mocs(cmd_buffer->device, NULL, 0),
1024 };
1025 }
1026
1027 anv_batch_emit(&cmd_buffer->batch, GENX(MFD_AVC_DPB_STATE), avc_dpb) {
1028 for (unsigned i = 0; i < frame_info->referenceSlotCount; i++) {
1029 const struct VkVideoDecodeH264DpbSlotInfoKHR *dpb_slot =
1030 vk_find_struct_const(frame_info->pReferenceSlots[i].pNext, VIDEO_DECODE_H264_DPB_SLOT_INFO_KHR);
1031 const StdVideoDecodeH264ReferenceInfo *ref_info = dpb_slot->pStdReferenceInfo;
1032 int idx = frame_info->pReferenceSlots[i].slotIndex;
1033 avc_dpb.NonExistingFrame[idx] = ref_info->flags.is_non_existing;
1034 avc_dpb.LongTermFrame[idx] = ref_info->flags.used_for_long_term_reference;
1035 if (!ref_info->flags.top_field_flag && !ref_info->flags.bottom_field_flag)
1036 avc_dpb.UsedforReference[idx] = 3;
1037 else
1038 avc_dpb.UsedforReference[idx] = ref_info->flags.top_field_flag | (ref_info->flags.bottom_field_flag << 1);
1039 avc_dpb.LTSTFrameNumberList[idx] = ref_info->FrameNum;
1040 }
1041 }
1042
1043 anv_batch_emit(&cmd_buffer->batch, GENX(MFD_AVC_PICID_STATE), picid) {
1044 picid.PictureIDRemappingDisable = true;
1045 }
1046
1047 uint32_t pic_height = sps->pic_height_in_map_units_minus1 + 1;
1048 if (!sps->flags.frame_mbs_only_flag)
1049 pic_height *= 2;
1050 anv_batch_emit(&cmd_buffer->batch, GENX(MFX_AVC_IMG_STATE), avc_img) {
1051 avc_img.FrameWidth = sps->pic_width_in_mbs_minus1;
1052 avc_img.FrameHeight = pic_height - 1;
1053 avc_img.FrameSize = (sps->pic_width_in_mbs_minus1 + 1) * pic_height;
1054
1055 if (!h264_pic_info->pStdPictureInfo->flags.field_pic_flag)
1056 avc_img.ImageStructure = FramePicture;
1057 else if (h264_pic_info->pStdPictureInfo->flags.bottom_field_flag)
1058 avc_img.ImageStructure = BottomFieldPicture;
1059 else
1060 avc_img.ImageStructure = TopFieldPicture;
1061
1062 avc_img.WeightedBiPredictionIDC = pps->weighted_bipred_idc;
1063 avc_img.WeightedPredictionEnable = pps->flags.weighted_pred_flag;
1064 avc_img.FirstChromaQPOffset = pps->chroma_qp_index_offset;
1065 avc_img.SecondChromaQPOffset = pps->second_chroma_qp_index_offset;
1066 avc_img.FieldPicture = h264_pic_info->pStdPictureInfo->flags.field_pic_flag;
1067 avc_img.MBAFFMode = (sps->flags.mb_adaptive_frame_field_flag &&
1068 !h264_pic_info->pStdPictureInfo->flags.field_pic_flag);
1069 avc_img.FrameMBOnly = sps->flags.frame_mbs_only_flag;
1070 avc_img._8x8IDCTTransformMode = pps->flags.transform_8x8_mode_flag;
1071 avc_img.Direct8x8Inference = sps->flags.direct_8x8_inference_flag;
1072 avc_img.ConstrainedIntraPrediction = pps->flags.constrained_intra_pred_flag;
1073 avc_img.NonReferencePicture = !h264_pic_info->pStdPictureInfo->flags.is_reference;
1074 avc_img.EntropyCodingSyncEnable = pps->flags.entropy_coding_mode_flag;
1075 avc_img.ChromaFormatIDC = sps->chroma_format_idc;
1076 avc_img.TrellisQuantizationChromaDisable = true;
1077 avc_img.NumberofReferenceFrames = frame_info->referenceSlotCount;
1078 avc_img.NumberofActiveReferencePicturesfromL0 = pps->num_ref_idx_l0_default_active_minus1 + 1;
1079 avc_img.NumberofActiveReferencePicturesfromL1 = pps->num_ref_idx_l1_default_active_minus1 + 1;
1080 avc_img.InitialQPValue = pps->pic_init_qp_minus26;
1081 avc_img.PicOrderPresent = pps->flags.bottom_field_pic_order_in_frame_present_flag;
1082 avc_img.DeltaPicOrderAlwaysZero = sps->flags.delta_pic_order_always_zero_flag;
1083 avc_img.PicOrderCountType = sps->pic_order_cnt_type;
1084 avc_img.DeblockingFilterControlPresent = pps->flags.deblocking_filter_control_present_flag;
1085 avc_img.RedundantPicCountPresent = pps->flags.redundant_pic_cnt_present_flag;
1086 avc_img.Log2MaxFrameNumber = sps->log2_max_frame_num_minus4;
1087 avc_img.Log2MaxPicOrderCountLSB = sps->log2_max_pic_order_cnt_lsb_minus4;
1088 avc_img.CurrentPictureFrameNumber = h264_pic_info->pStdPictureInfo->frame_num;
1089 }
1090
1091 StdVideoH264ScalingLists scaling_lists;
1092 vk_video_derive_h264_scaling_list(sps, pps, &scaling_lists);
1093 anv_batch_emit(&cmd_buffer->batch, GENX(MFX_QM_STATE), qm) {
1094 qm.DWordLength = 16;
1095 qm.AVC = AVC_4x4_Intra_MATRIX;
1096 for (unsigned m = 0; m < 3; m++)
1097 for (unsigned q = 0; q < 16; q++)
1098 qm.ForwardQuantizerMatrix[m * 16 + vl_zscan_normal_16[q]] = scaling_lists.ScalingList4x4[m][q];
1099 }
1100 anv_batch_emit(&cmd_buffer->batch, GENX(MFX_QM_STATE), qm) {
1101 qm.DWordLength = 16;
1102 qm.AVC = AVC_4x4_Inter_MATRIX;
1103 for (unsigned m = 0; m < 3; m++)
1104 for (unsigned q = 0; q < 16; q++)
1105 qm.ForwardQuantizerMatrix[m * 16 + vl_zscan_normal_16[q]] = scaling_lists.ScalingList4x4[m + 3][q];
1106 }
1107 if (pps->flags.transform_8x8_mode_flag) {
1108 anv_batch_emit(&cmd_buffer->batch, GENX(MFX_QM_STATE), qm) {
1109 qm.DWordLength = 16;
1110 qm.AVC = AVC_8x8_Intra_MATRIX;
1111 for (unsigned q = 0; q < 64; q++)
1112 qm.ForwardQuantizerMatrix[vl_zscan_normal[q]] = scaling_lists.ScalingList8x8[0][q];
1113 }
1114 anv_batch_emit(&cmd_buffer->batch, GENX(MFX_QM_STATE), qm) {
1115 qm.DWordLength = 16;
1116 qm.AVC = AVC_8x8_Inter_MATRIX;
1117 for (unsigned q = 0; q < 64; q++)
1118 qm.ForwardQuantizerMatrix[vl_zscan_normal[q]] = scaling_lists.ScalingList8x8[1][q];
1119 }
1120 }
1121
1122 anv_batch_emit(&cmd_buffer->batch, GENX(MFX_AVC_DIRECTMODE_STATE), avc_directmode) {
1123 /* bind reference frame DMV */
1124 struct anv_bo *dmv_bo = NULL;
1125 for (unsigned i = 0; i < frame_info->referenceSlotCount; i++) {
1126 int idx = frame_info->pReferenceSlots[i].slotIndex;
1127 const struct VkVideoDecodeH264DpbSlotInfoKHR *dpb_slot =
1128 vk_find_struct_const(frame_info->pReferenceSlots[i].pNext, VIDEO_DECODE_H264_DPB_SLOT_INFO_KHR);
1129 const struct anv_image_view *ref_iv = anv_image_view_from_handle(frame_info->pReferenceSlots[i].pPictureResource->imageViewBinding);
1130 const StdVideoDecodeH264ReferenceInfo *ref_info = dpb_slot->pStdReferenceInfo;
1131 avc_directmode.DirectMVBufferAddress[idx] = anv_image_address(ref_iv->image,
1132 &ref_iv->image->vid_dmv_top_surface);
1133 if (i == 0) {
1134 dmv_bo = ref_iv->image->bindings[0].address.bo;
1135 }
1136 avc_directmode.POCList[2 * idx] = ref_info->PicOrderCnt[0];
1137 avc_directmode.POCList[2 * idx + 1] = ref_info->PicOrderCnt[1];
1138 }
1139 avc_directmode.DirectMVBufferAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
1140 .MOCS = anv_mocs(cmd_buffer->device, dmv_bo, 0),
1141 };
1142
1143 avc_directmode.DirectMVBufferWriteAddress = anv_image_address(img,
1144 &img->vid_dmv_top_surface);
1145 avc_directmode.DirectMVBufferWriteAttributes = (struct GENX(MEMORYADDRESSATTRIBUTES)) {
1146 .MOCS = anv_mocs(cmd_buffer->device, img->bindings[0].address.bo, 0),
1147 };
1148 avc_directmode.POCList[32] = h264_pic_info->pStdPictureInfo->PicOrderCnt[0];
1149 avc_directmode.POCList[33] = h264_pic_info->pStdPictureInfo->PicOrderCnt[1];
1150 }
1151
1152 uint32_t buffer_offset = frame_info->srcBufferOffset & 4095;
1153 #define HEADER_OFFSET 3
1154 for (unsigned s = 0; s < h264_pic_info->sliceCount; s++) {
1155 bool last_slice = s == (h264_pic_info->sliceCount - 1);
1156 uint32_t current_offset = h264_pic_info->pSliceOffsets[s];
1157 uint32_t this_end;
1158 if (!last_slice) {
1159 uint32_t next_offset = h264_pic_info->pSliceOffsets[s + 1];
1160 uint32_t next_end = h264_pic_info->pSliceOffsets[s + 2];
1161 if (s == h264_pic_info->sliceCount - 2)
1162 next_end = frame_info->srcBufferRange;
1163 anv_batch_emit(&cmd_buffer->batch, GENX(MFD_AVC_SLICEADDR), sliceaddr) {
1164 sliceaddr.IndirectBSDDataLength = next_end - next_offset - HEADER_OFFSET;
1165 /* start decoding after the 3-byte header. */
1166 sliceaddr.IndirectBSDDataStartAddress = buffer_offset + next_offset + HEADER_OFFSET;
1167 };
1168 this_end = next_offset;
1169 } else
1170 this_end = frame_info->srcBufferRange;
1171 anv_batch_emit(&cmd_buffer->batch, GENX(MFD_AVC_BSD_OBJECT), avc_bsd) {
1172 avc_bsd.IndirectBSDDataLength = this_end - current_offset - HEADER_OFFSET;
1173 /* start decoding after the 3-byte header. */
1174 avc_bsd.IndirectBSDDataStartAddress = buffer_offset + current_offset + HEADER_OFFSET;
1175 avc_bsd.InlineData.LastSlice = last_slice;
1176 avc_bsd.InlineData.FixPrevMBSkipped = 1;
1177 avc_bsd.InlineData.IntraPredictionErrorControl = 1;
1178 avc_bsd.InlineData.Intra8x84x4PredictionErrorConcealmentControl = 1;
1179 avc_bsd.InlineData.ISliceConcealmentMode = 1;
1180 };
1181 }
1182 }
1183
1184 void
genX(CmdDecodeVideoKHR)1185 genX(CmdDecodeVideoKHR)(VkCommandBuffer commandBuffer,
1186 const VkVideoDecodeInfoKHR *frame_info)
1187 {
1188 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
1189
1190 switch (cmd_buffer->video.vid->vk.op) {
1191 case VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR:
1192 anv_h264_decode_video(cmd_buffer, frame_info);
1193 break;
1194 case VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR:
1195 anv_h265_decode_video(cmd_buffer, frame_info);
1196 break;
1197 default:
1198 assert(0);
1199 }
1200 }
1201