xref: /aosp_15_r20/external/mesa3d/src/gallium/drivers/d3d12/d3d12_video_dec_h264.cpp (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © Microsoft Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "d3d12_video_dec.h"
25 #include "d3d12_video_dec_h264.h"
26 #include "vl/vl_zscan.h"
27 
28 #include <cmath>
29 
30 void
d3d12_video_decoder_refresh_dpb_active_references_h264(struct d3d12_video_decoder * pD3D12Dec)31 d3d12_video_decoder_refresh_dpb_active_references_h264(struct d3d12_video_decoder *pD3D12Dec)
32 {
33    // Method overview
34    // 1. Codec specific strategy in switch statement regarding reference frames eviction policy. Should only mark active
35    // DPB references, leaving evicted ones as unused
36    // 2. Call release_unused_references_texture_memory(); at the end of this method. Any references (and texture
37    // allocations associated)
38    //    that were left not marked as used in m_spDPBManager by step (2) are lost.
39 
40    // Assign DXVA original Index7Bits indices to current frame and references
41    DXVA_PicParams_H264 *pCurrPicParams = d3d12_video_decoder_get_current_dxva_picparams<DXVA_PicParams_H264>(pD3D12Dec);
42    for (uint8_t i = 0; i < 16; i++) {
43       // From H264 DXVA spec:
44       // Index7Bits
45       //     An index that identifies an uncompressed surface for the CurrPic or RefFrameList member of the picture
46       //     parameters structure(section 4.0) or the RefPicList member of the slice control data
47       //     structure(section 6.0) When Index7Bits is used in the CurrPic and RefFrameList members of the picture
48       //     parameters structure, the value directly specifies the DXVA index of an uncompressed surface. When
49       //     Index7Bits is used in the RefPicList member of the slice control data structure, the value identifies
50       //     the surface indirectly, as an index into the RefFrameList array of the associated picture parameters
51       //     structure.For more information, see section 6.2. In all cases, when Index7Bits does not contain a valid
52       //     index, the value is 127.
53       if (pCurrPicParams->RefFrameList[i].bPicEntry != DXVA_H264_INVALID_PICTURE_ENTRY_VALUE) {
54          pCurrPicParams->RefFrameList[i].Index7Bits =
55             pD3D12Dec->m_spDPBManager->get_index7bits(pD3D12Dec->m_pCurrentReferenceTargets[i]);
56       }
57    }
58 
59    pD3D12Dec->m_spDPBManager->mark_all_references_as_unused();
60    pD3D12Dec->m_spDPBManager->mark_references_in_use(pCurrPicParams->RefFrameList);
61 
62    // Releases the underlying reference picture texture objects of all references that were not marked as used in this
63    // method.
64    pD3D12Dec->m_spDPBManager->release_unused_references_texture_memory();
65 
66    pCurrPicParams->CurrPic.Index7Bits = pD3D12Dec->m_spDPBManager->get_index7bits(pD3D12Dec->m_pCurrentDecodeTarget);
67 
68    debug_printf("[d3d12_video_decoder_store_converted_dxva_picparams_from_pipe_input] DXVA_PicParams_H264 converted "
69                  "from pipe_h264_picture_desc (No reference index remapping)\n");
70    d3d12_video_decoder_log_pic_params_h264(pCurrPicParams);
71 }
72 
73 void
d3d12_video_decoder_get_frame_info_h264(struct d3d12_video_decoder * pD3D12Dec,uint32_t * pWidth,uint32_t * pHeight,uint16_t * pMaxDPB)74 d3d12_video_decoder_get_frame_info_h264(
75    struct d3d12_video_decoder *pD3D12Dec, uint32_t *pWidth, uint32_t *pHeight, uint16_t *pMaxDPB)
76 {
77    auto pPicParams = d3d12_video_decoder_get_current_dxva_picparams<DXVA_PicParams_H264>(pD3D12Dec);
78    // wFrameWidthInMbsMinus1 Width of the frame containing this picture, in units of macroblocks, minus 1. (The width in
79    // macroblocks is wFrameWidthInMbsMinus1 plus 1.) wFrameHeightInMbsMinus1 Height of the frame containing this
80    // picture, in units of macroblocks, minus 1. (The height in macroblocks is wFrameHeightInMbsMinus1 plus 1.) When the
81    // picture is a field, the height of the frame is twice the height of the picture and is an integer multiple of 2 in
82    // units of macroblocks.
83    *pWidth = (pPicParams->wFrameWidthInMbsMinus1 + 1) * 16;
84    *pHeight = (pPicParams->wFrameHeightInMbsMinus1 + 1) / (pPicParams->frame_mbs_only_flag ? 1 : 2);
85    *pHeight = (2 - pPicParams->frame_mbs_only_flag) * *pHeight;
86    *pHeight = *pHeight * 16;
87    *pMaxDPB = pPicParams->num_ref_frames + 1;
88 }
89 
90 ///
91 /// Pushes the current frame as next reference, updates the DXVA H264 structure with the indices of the DPB and
92 /// transitions the references
93 ///
94 void
d3d12_video_decoder_prepare_current_frame_references_h264(struct d3d12_video_decoder * pD3D12Dec,ID3D12Resource * pTexture2D,uint32_t subresourceIndex)95 d3d12_video_decoder_prepare_current_frame_references_h264(struct d3d12_video_decoder *pD3D12Dec,
96                                                           ID3D12Resource *pTexture2D,
97                                                           uint32_t subresourceIndex)
98 {
99    DXVA_PicParams_H264 *pPicParams = d3d12_video_decoder_get_current_dxva_picparams<DXVA_PicParams_H264>(pD3D12Dec);
100    pPicParams->CurrPic.Index7Bits = pD3D12Dec->m_spDPBManager->store_future_reference(pPicParams->CurrPic.Index7Bits,
101                                                                                       pD3D12Dec->m_spVideoDecoderHeap,
102                                                                                       pTexture2D,
103                                                                                       subresourceIndex);
104 
105    // From H264 DXVA spec:
106    // Index7Bits
107    //     An index that identifies an uncompressed surface for the CurrPic or RefFrameList member of the picture
108    //     parameters structure(section 4.0) or the RefPicList member of the slice control data structure(section 6.0)
109    //     When Index7Bits is used in the CurrPic and RefFrameList members of the picture parameters structure, the value
110    //     directly specifies the DXVA index of an uncompressed surface. When Index7Bits is used in the RefPicList member
111    //     of the slice control data structure, the value identifies the surface indirectly, as an index into the
112    //     RefFrameList array of the associated picture parameters structure.For more information, see section 6.2. In
113    //     all cases, when Index7Bits does not contain a valid index, the value is 127.
114 
115    pD3D12Dec->m_spDPBManager->update_entries(
116       d3d12_video_decoder_get_current_dxva_picparams<DXVA_PicParams_H264>(pD3D12Dec)->RefFrameList,
117       pD3D12Dec->m_transitionsStorage);
118 
119    pD3D12Dec->m_spDecodeCommandList->ResourceBarrier(pD3D12Dec->m_transitionsStorage.size(), pD3D12Dec->m_transitionsStorage.data());
120 
121    // Schedule reverse (back to common) transitions before command list closes for current frame
122    for (auto BarrierDesc : pD3D12Dec->m_transitionsStorage) {
123       std::swap(BarrierDesc.Transition.StateBefore, BarrierDesc.Transition.StateAfter);
124       pD3D12Dec->m_transitionsBeforeCloseCmdList.push_back(BarrierDesc);
125    }
126 
127    debug_printf(
128       "[d3d12_video_decoder_prepare_current_frame_references_h264] DXVA_PicParams_H264 after index remapping)\n");
129    d3d12_video_decoder_log_pic_params_h264(
130       d3d12_video_decoder_get_current_dxva_picparams<DXVA_PicParams_H264>(pD3D12Dec));
131 }
132 
133 void
d3d12_video_decoder_prepare_dxva_slices_control_h264(struct d3d12_video_decoder * pD3D12Dec,std::vector<uint8_t> & vecOutSliceControlBuffers,struct pipe_h264_picture_desc * picture_h264)134 d3d12_video_decoder_prepare_dxva_slices_control_h264(struct d3d12_video_decoder *pD3D12Dec,
135                                                      std::vector<uint8_t> &vecOutSliceControlBuffers,
136                                                      struct pipe_h264_picture_desc *picture_h264)
137 {
138    uint64_t TotalSlicesDXVAArrayByteSize = picture_h264->slice_count * sizeof(DXVA_Slice_H264_Short);
139    vecOutSliceControlBuffers.resize(TotalSlicesDXVAArrayByteSize);
140    uint8_t* pData = vecOutSliceControlBuffers.data();
141    assert(picture_h264->slice_parameter.slice_info_present);
142    debug_printf("[d3d12_video_decoder_h264] Upper layer reported %d slices for this frame...\n",
143                   picture_h264->slice_count);
144 
145    static const uint32_t start_code_size = 3;
146    uint32_t acum_slice_offset = (picture_h264->slice_count > 0) ? picture_h264->slice_parameter.slice_data_offset[0] : 0;
147    for (uint32_t sliceIdx = 0; sliceIdx < picture_h264->slice_count; sliceIdx++)
148    {
149       DXVA_Slice_H264_Short* currentSliceEntry = (DXVA_Slice_H264_Short*) pData;
150       // From H264 DXVA Spec
151       // wBadSliceChopping
152       // 0	All bits for the slice are located within the corresponding bitstream data buffer.
153       // 1	The bitstream data buffer contains the start of the slice, but not the entire slice, because the buffer is full.
154       // 2	The bitstream data buffer contains the end of the slice. It does not contain the start of the slice, because the start of the slice was located in the previous bitstream data buffer.
155       // 3	The bitstream data buffer does not contain the start of the slice (because the start of the slice was located in the previous bitstream data buffer),
156       //     and it does not contain the end of the slice (because the current bitstream data buffer is also full).
157 
158       switch (picture_h264->slice_parameter.slice_data_flag[sliceIdx]) {
159          /* whole slice is in the buffer */
160          case PIPE_SLICE_BUFFER_PLACEMENT_TYPE_WHOLE:
161             currentSliceEntry->wBadSliceChopping = 0u;
162             break;
163          /* The beginning of the slice is in the buffer but the end is not */
164          case PIPE_SLICE_BUFFER_PLACEMENT_TYPE_BEGIN:
165             currentSliceEntry->wBadSliceChopping = 1u;
166             break;
167          /* Neither beginning nor end of the slice is in the buffer */
168          case PIPE_SLICE_BUFFER_PLACEMENT_TYPE_MIDDLE:
169             currentSliceEntry->wBadSliceChopping = 3u;
170             break;
171          /* end of the slice is in the buffer */
172          case PIPE_SLICE_BUFFER_PLACEMENT_TYPE_END:
173             currentSliceEntry->wBadSliceChopping = 2u;
174             break;
175          default:
176          {
177             unreachable("Unsupported pipe_slice_buffer_placement_type");
178          } break;
179       }
180 
181       /* slice_data_size from pipe/va does not include the NAL unit size, DXVA requires it */
182       currentSliceEntry->SliceBytesInBuffer = picture_h264->slice_parameter.slice_data_size[sliceIdx] + start_code_size;
183 
184       /* slice_data_offset from pipe/va are relative to the current slice, and in DXVA they are absolute within the frame source buffer */
185       currentSliceEntry->BSNALunitDataLocation = acum_slice_offset;
186       acum_slice_offset += (currentSliceEntry->SliceBytesInBuffer + picture_h264->slice_parameter.slice_data_offset[sliceIdx]);
187 
188       debug_printf("[d3d12_video_decoder_h264] Reported slice index %" PRIu32 " with SliceBytesInBuffer %d - BSNALunitDataLocation %d - wBadSliceChopping: %" PRIu16
189                   " for frame with "
190                   "fenceValue: %d\n",
191                   sliceIdx,
192                   currentSliceEntry->SliceBytesInBuffer,
193                   currentSliceEntry->BSNALunitDataLocation,
194                   currentSliceEntry->wBadSliceChopping,
195                   pD3D12Dec->m_fenceValue);
196 
197       pData += sizeof(DXVA_Slice_H264_Short);
198    }
199    assert(vecOutSliceControlBuffers.size() == TotalSlicesDXVAArrayByteSize);
200 }
201 
202 static void
d3d12_video_decoder_log_pic_entry_h264(DXVA_PicEntry_H264 & picEntry)203 d3d12_video_decoder_log_pic_entry_h264(DXVA_PicEntry_H264 &picEntry)
204 {
205    debug_printf("\t\tIndex7Bits: %d\n"
206                  "\t\tAssociatedFlag: %d\n"
207                  "\t\tbPicEntry: %d\n",
208                  picEntry.Index7Bits,
209                  picEntry.AssociatedFlag,
210                  picEntry.bPicEntry);
211 }
212 
213 void
d3d12_video_decoder_log_pic_params_h264(DXVA_PicParams_H264 * pPicParams)214 d3d12_video_decoder_log_pic_params_h264(DXVA_PicParams_H264 *pPicParams)
215 {
216    debug_printf("\n=============================================\n");
217    debug_printf("wFrameWidthInMbsMinus1 = %d\n", pPicParams->wFrameWidthInMbsMinus1);
218    debug_printf("wFrameHeightInMbsMinus1 = %d\n", pPicParams->wFrameHeightInMbsMinus1);
219    debug_printf("CurrPic.Index7Bits = %d\n", pPicParams->CurrPic.Index7Bits);
220    debug_printf("CurrPic.AssociatedFlag = %d\n", pPicParams->CurrPic.AssociatedFlag);
221    debug_printf("num_ref_frames = %d\n", pPicParams->num_ref_frames);
222    debug_printf("sp_for_switch_flag = %d\n", pPicParams->sp_for_switch_flag);
223    debug_printf("field_pic_flag = %d\n", pPicParams->field_pic_flag);
224    debug_printf("MbaffFrameFlag = %d\n", pPicParams->MbaffFrameFlag);
225    debug_printf("residual_colour_transform_flag = %d\n", pPicParams->residual_colour_transform_flag);
226    debug_printf("chroma_format_idc = %d\n", pPicParams->chroma_format_idc);
227    debug_printf("RefPicFlag = %d\n", pPicParams->RefPicFlag);
228    debug_printf("IntraPicFlag = %d\n", pPicParams->IntraPicFlag);
229    debug_printf("constrained_intra_pred_flag = %d\n", pPicParams->constrained_intra_pred_flag);
230    debug_printf("MinLumaBipredSize8x8Flag = %d\n", pPicParams->MinLumaBipredSize8x8Flag);
231    debug_printf("weighted_pred_flag = %d\n", pPicParams->weighted_pred_flag);
232    debug_printf("weighted_bipred_idc = %d\n", pPicParams->weighted_bipred_idc);
233    debug_printf("MbsConsecutiveFlag = %d\n", pPicParams->MbsConsecutiveFlag);
234    debug_printf("frame_mbs_only_flag = %d\n", pPicParams->frame_mbs_only_flag);
235    debug_printf("transform_8x8_mode_flag = %d\n", pPicParams->transform_8x8_mode_flag);
236    debug_printf("StatusReportFeedbackNumber = %d\n", pPicParams->StatusReportFeedbackNumber);
237    debug_printf("CurrFieldOrderCnt[0] = %d\n", pPicParams->CurrFieldOrderCnt[0]);
238    debug_printf("CurrFieldOrderCnt[1] = %d\n", pPicParams->CurrFieldOrderCnt[1]);
239    debug_printf("chroma_qp_index_offset = %d\n", pPicParams->chroma_qp_index_offset);
240    debug_printf("second_chroma_qp_index_offset = %d\n", pPicParams->second_chroma_qp_index_offset);
241    debug_printf("ContinuationFlag = %d\n", pPicParams->ContinuationFlag);
242    debug_printf("pic_init_qp_minus26 = %d\n", pPicParams->pic_init_qp_minus26);
243    debug_printf("pic_init_qs_minus26 = %d\n", pPicParams->pic_init_qs_minus26);
244    debug_printf("num_ref_idx_l0_active_minus1 = %d\n", pPicParams->num_ref_idx_l0_active_minus1);
245    debug_printf("num_ref_idx_l1_active_minus1 = %d\n", pPicParams->num_ref_idx_l1_active_minus1);
246    debug_printf("frame_num = %d\n", pPicParams->frame_num);
247    debug_printf("log2_max_frame_num_minus4 = %d\n", pPicParams->log2_max_frame_num_minus4);
248    debug_printf("pic_order_cnt_type = %d\n", pPicParams->pic_order_cnt_type);
249    debug_printf("log2_max_pic_order_cnt_lsb_minus4 = %d\n", pPicParams->log2_max_pic_order_cnt_lsb_minus4);
250    debug_printf("delta_pic_order_always_zero_flag = %d\n", pPicParams->delta_pic_order_always_zero_flag);
251    debug_printf("direct_8x8_inference_flag = %d\n", pPicParams->direct_8x8_inference_flag);
252    debug_printf("entropy_coding_mode_flag = %d\n", pPicParams->entropy_coding_mode_flag);
253    debug_printf("pic_order_present_flag = %d\n", pPicParams->pic_order_present_flag);
254    debug_printf("deblocking_filter_control_present_flag = %d\n", pPicParams->deblocking_filter_control_present_flag);
255    debug_printf("redundant_pic_cnt_present_flag = %d\n", pPicParams->redundant_pic_cnt_present_flag);
256    debug_printf("num_slice_groups_minus1 = %d\n", pPicParams->num_slice_groups_minus1);
257    debug_printf("slice_group_map_type = %d\n", pPicParams->slice_group_map_type);
258    debug_printf("slice_group_change_rate_minus1 = %d\n", pPicParams->slice_group_change_rate_minus1);
259    debug_printf("Reserved8BitsB = %d\n", pPicParams->Reserved8BitsB);
260    debug_printf("UsedForReferenceFlags 0x%08x\n", pPicParams->UsedForReferenceFlags);
261    debug_printf("NonExistingFrameFlags 0x%08x\n", pPicParams->NonExistingFrameFlags);
262 
263    const UINT16 RefPicListLength = _countof(DXVA_PicParams_H264::RefFrameList);
264 
265    debug_printf("[D3D12 Video Decoder H264 DXVA PicParams info]\n"
266                  "\t[Current Picture Entry]\n");
267    d3d12_video_decoder_log_pic_entry_h264(pPicParams->CurrPic);
268 
269    debug_printf("[Decode RefFrameList Pic_Entry list] Entries where bPicEntry == "
270                  "DXVA_H264_INVALID_PICTURE_ENTRY_VALUE are not printed\n");
271    for (uint32_t refIdx = 0; refIdx < RefPicListLength; refIdx++) {
272       if (DXVA_H264_INVALID_PICTURE_ENTRY_VALUE != pPicParams->RefFrameList[refIdx].bPicEntry) {
273          debug_printf("\t[Reference PicEntry %d]\n", refIdx);
274          d3d12_video_decoder_log_pic_entry_h264(pPicParams->RefFrameList[refIdx]);
275          debug_printf("\t\tFrameNumList: %d\n"
276                        "\t\tFieldOrderCntList[0]: %d\n"
277                        "\t\tFieldOrderCntList[1]: %d\n",
278                        pPicParams->FrameNumList[refIdx],
279                        pPicParams->FieldOrderCntList[refIdx][0],
280                        pPicParams->FieldOrderCntList[refIdx][1]);
281       }
282    }
283 }
284 
285 DXVA_PicParams_H264
d3d12_video_decoder_dxva_picparams_from_pipe_picparams_h264(uint32_t frameNum,pipe_video_profile profile,uint32_t decodeWidth,uint32_t decodeHeight,pipe_h264_picture_desc * pPipeDesc)286 d3d12_video_decoder_dxva_picparams_from_pipe_picparams_h264(
287    uint32_t frameNum,
288    pipe_video_profile profile,
289    uint32_t decodeWidth,    // pipe_h264_picture_desc doesn't have the size of the frame for H264, but it does for other
290                             // codecs.
291    uint32_t decodeHeight,   // pipe_h264_picture_desc doesn't have the size of the frame for H264, but it does for other
292                             // codecs.
293    pipe_h264_picture_desc *pPipeDesc)
294 {
295    DXVA_PicParams_H264 dxvaStructure = {};
296 
297    // uint16_t  wFrameWidthInMbsMinus1;
298    uint width_in_mb = decodeWidth / D3D12_VIDEO_H264_MB_IN_PIXELS;
299    dxvaStructure.wFrameWidthInMbsMinus1 = width_in_mb - 1;
300    // uint16_t  wFrameHeightInMbsMinus1;
301    uint height_in_mb = static_cast<uint>(std::ceil(decodeHeight / D3D12_VIDEO_H264_MB_IN_PIXELS));
302    dxvaStructure.wFrameHeightInMbsMinus1 = height_in_mb - 1;
303 
304    // CurrPic.Index7Bits is handled by d3d12_video_decoder_refresh_dpb_active_references_h264
305    // CurrPic.AssociatedFlag
306    // If field_pic_flag is 1, the AssociatedFlag field in CurrPic is interpreted as follows:
307    // 0 -> The current picture is the top field of the uncompressed destination frame surface.
308    // 1 -> The current picture is the bottom field of the uncompressed destination frame surface.
309    // If field_pic_flag is 0, AssociatedFlag has no meaning and shall be 0, and the accelerator shall ignore the value.
310    if (pPipeDesc->field_pic_flag) {
311       dxvaStructure.CurrPic.AssociatedFlag = (pPipeDesc->bottom_field_flag == 0) ? 0 : 1;
312    } else {
313       dxvaStructure.CurrPic.AssociatedFlag = 0;
314    }
315 
316    // uint8_t   num_ref_frames;
317    dxvaStructure.num_ref_frames = pPipeDesc->num_ref_frames;
318    // union {
319    // struct {
320    // uint16_t  field_pic_flag                 : 1;
321    dxvaStructure.field_pic_flag = pPipeDesc->field_pic_flag;
322    // From H264 codec spec
323    // The variable MbaffFrameFlag is derived as
324    // MbaffFrameFlag = ( mb_adaptive_frame_field_flag && !field_pic_flag )
325    dxvaStructure.MbaffFrameFlag = (pPipeDesc->pps->sps->mb_adaptive_frame_field_flag && !pPipeDesc->field_pic_flag);
326    // uint16_t  residual_colour_transform_flag :1
327    dxvaStructure.residual_colour_transform_flag = pPipeDesc->pps->sps->separate_colour_plane_flag;
328    // uint16_t sp_for_switch_flag // switch slices are not supported by VA
329    dxvaStructure.sp_for_switch_flag = 0;
330    // uint16_t  chroma_format_idc              : 2;
331    assert(pPipeDesc->pps->sps->chroma_format_idc == 1);   // Not supported otherwise
332    dxvaStructure.chroma_format_idc = 1;   // This is always 4:2:0 for D3D12 Video. NV12/P010 DXGI formats only.
333    // uint16_t  RefPicFlag                     : 1;
334    dxvaStructure.RefPicFlag = pPipeDesc->is_reference;
335 
336    // uint16_t  constrained_intra_pred_flag    : 1;
337    dxvaStructure.constrained_intra_pred_flag = pPipeDesc->pps->constrained_intra_pred_flag;
338    // uint16_t  weighted_pred_flag             : 1;
339    dxvaStructure.weighted_pred_flag = pPipeDesc->pps->weighted_pred_flag;
340    // uint16_t  weighted_bipred_idc            : 2;
341    dxvaStructure.weighted_bipred_idc = pPipeDesc->pps->weighted_bipred_idc;
342    // From DXVA spec:
343    // The value shall be 1 unless the restricted-mode profile in use explicitly supports the value 0.
344    // FMO is not supported by VAAPI
345    dxvaStructure.MbsConsecutiveFlag = 1;
346    // uint16_t  frame_mbs_only_flag            : 1;
347    dxvaStructure.frame_mbs_only_flag = pPipeDesc->pps->sps->frame_mbs_only_flag;
348    // uint16_t  transform_8x8_mode_flag        : 1;
349    dxvaStructure.transform_8x8_mode_flag = pPipeDesc->pps->transform_8x8_mode_flag;
350    // };
351    // uint16_t  wBitFields;
352    // };
353    // uint8_t  bit_depth_luma_minus8;
354    dxvaStructure.bit_depth_luma_minus8 = pPipeDesc->pps->sps->bit_depth_luma_minus8;
355    assert(dxvaStructure.bit_depth_luma_minus8 == 0);   // Only support for NV12 now
356    // uint8_t  bit_depth_chroma_minus8;
357    dxvaStructure.bit_depth_chroma_minus8 = pPipeDesc->pps->sps->bit_depth_chroma_minus8;
358    assert(dxvaStructure.bit_depth_chroma_minus8 == 0);   // Only support for NV12 now
359    // uint16_t MinLumaBipredSize8x8Flag
360    dxvaStructure.MinLumaBipredSize8x8Flag = pPipeDesc->pps->sps->MinLumaBiPredSize8x8;
361    // char pic_init_qs_minus26
362    dxvaStructure.pic_init_qs_minus26 = pPipeDesc->pps->pic_init_qs_minus26;
363    // uint8_t   chroma_qp_index_offset;   /* also used for QScb */
364    dxvaStructure.chroma_qp_index_offset = pPipeDesc->pps->chroma_qp_index_offset;
365    // uint8_t   second_chroma_qp_index_offset; /* also for QScr */
366    dxvaStructure.second_chroma_qp_index_offset = pPipeDesc->pps->second_chroma_qp_index_offset;
367 
368    /* remainder for parsing */
369    // uint8_t   pic_init_qp_minus26;
370    dxvaStructure.pic_init_qp_minus26 = pPipeDesc->pps->pic_init_qp_minus26;
371    // uint8_t  num_ref_idx_l0_active_minus1;
372    dxvaStructure.num_ref_idx_l0_active_minus1 = pPipeDesc->num_ref_idx_l0_active_minus1;
373    // uint8_t  num_ref_idx_l1_active_minus1;
374    dxvaStructure.num_ref_idx_l1_active_minus1 = pPipeDesc->num_ref_idx_l1_active_minus1;
375 
376    // uint16_t frame_num;
377    dxvaStructure.frame_num = pPipeDesc->frame_num;
378 
379    // uint8_t  log2_max_frame_num_minus4;
380    dxvaStructure.log2_max_frame_num_minus4 = pPipeDesc->pps->sps->log2_max_frame_num_minus4;
381    // uint8_t  pic_order_cnt_type;
382    dxvaStructure.pic_order_cnt_type = pPipeDesc->pps->sps->pic_order_cnt_type;
383    // uint8_t  log2_max_pic_order_cnt_lsb_minus4;
384    dxvaStructure.log2_max_pic_order_cnt_lsb_minus4 = pPipeDesc->pps->sps->log2_max_pic_order_cnt_lsb_minus4;
385    // uint8_t  delta_pic_order_always_zero_flag;
386    dxvaStructure.delta_pic_order_always_zero_flag = pPipeDesc->pps->sps->delta_pic_order_always_zero_flag;
387    // uint8_t  direct_8x8_inference_flag;
388    dxvaStructure.direct_8x8_inference_flag = pPipeDesc->pps->sps->direct_8x8_inference_flag;
389    // uint8_t  entropy_coding_mode_flag;
390    dxvaStructure.entropy_coding_mode_flag = pPipeDesc->pps->entropy_coding_mode_flag;
391    // uint8_t  num_slice_groups_minus1;
392    dxvaStructure.num_slice_groups_minus1 = pPipeDesc->pps->num_slice_groups_minus1;
393    assert(dxvaStructure.num_slice_groups_minus1 == 0);   // FMO Not supported by VA
394 
395    // uint8_t  slice_group_map_type;
396    dxvaStructure.slice_group_map_type = pPipeDesc->pps->slice_group_map_type;
397    // uint8_t  deblocking_filter_control_present_flag;
398    dxvaStructure.deblocking_filter_control_present_flag = pPipeDesc->pps->deblocking_filter_control_present_flag;
399    // uint8_t  redundant_pic_cnt_present_flag;
400    dxvaStructure.redundant_pic_cnt_present_flag = pPipeDesc->pps->redundant_pic_cnt_present_flag;
401    // uint16_t slice_group_change_rate_minus1;
402    dxvaStructure.slice_group_change_rate_minus1 = pPipeDesc->pps->slice_group_change_rate_minus1;
403 
404    // int32_t    CurrFieldOrderCnt[2];
405    dxvaStructure.CurrFieldOrderCnt[0] = pPipeDesc->field_order_cnt[0];
406    dxvaStructure.CurrFieldOrderCnt[1] = pPipeDesc->field_order_cnt[1];
407 
408    // DXVA_PicEntry_H264  RefFrameList[16]; /* DXVA_PicEntry_H264.AssociatedFlag 1 means LongTermRef */
409    // From DXVA spec:
410    // RefFrameList
411    // Contains a list of 16 uncompressed frame buffer surfaces.  All uncompressed surfaces that correspond to pictures
412    // currently marked as "used for reference" must appear in the RefFrameList array. Non-reference surfaces (those
413    // which only contain pictures for which the value of RefPicFlag was 0 when the picture was decoded) shall not appear
414    // in RefFrameList for a subsequent picture. In addition, surfaces that contain only pictures marked as "unused for
415    // reference" shall not appear in RefFrameList for a subsequent picture.
416 
417    dxvaStructure.UsedForReferenceFlags = 0;   // initialize to zero and set only the appropiate values below
418 
419    bool frameUsesAnyRefPicture = false;
420    for (uint i = 0; i < 16; i++) {
421       // Fix ad-hoc behaviour from the VA upper layer which always marks short term references as top_is_reference and
422       // bottom_is_reference as true and then differenciates using INT_MAX in field_order_cnt_list[i][0]/[1] to indicate
423       // not used convert to expected
424       if (pPipeDesc->field_order_cnt_list[i][0] == INT_MAX) {
425          pPipeDesc->top_is_reference[i] = false;
426          pPipeDesc->field_order_cnt_list[i][0] = 0;   // DXVA Spec says this has to be zero if unused
427       }
428 
429       if (pPipeDesc->field_order_cnt_list[i][1] == INT_MAX) {
430          pPipeDesc->bottom_is_reference[i] = false;
431          pPipeDesc->field_order_cnt_list[i][1] = 0;   // DXVA Spec says this has to be zero if unused
432       }
433 
434       // If both top and bottom reference flags are false, this is an invalid entry
435       bool validEntry = (pPipeDesc->top_is_reference[i] || pPipeDesc->bottom_is_reference[i] || pPipeDesc->is_long_term[i]);
436       if (!validEntry) {
437          // From DXVA spec:
438          // Entries that will not be used for decoding the current picture, or any subsequent pictures, are indicated by
439          // setting bPicEntry to 0xFF. If bPicEntry is not 0xFF, the entry may be used as a reference surface for
440          // decoding the current picture or a subsequent picture (in decoding order).
441          dxvaStructure.RefFrameList[i].bPicEntry = DXVA_H264_INVALID_PICTURE_ENTRY_VALUE;
442          dxvaStructure.FieldOrderCntList[i][0] = 0;
443          dxvaStructure.FieldOrderCntList[i][1] = 0;
444          dxvaStructure.FrameNumList[i] = 0;
445       } else {
446          frameUsesAnyRefPicture = true;
447          // From DXVA spec:
448          // For each entry whose value is not 0xFF, the value of AssociatedFlag is interpreted as follows:
449          // 0 - Not a long-term reference frame.
450          // 1 - Long-term reference frame. The uncompressed frame buffer contains a reference frame or one or more
451          // reference fields marked as "used for long-term reference." If field_pic_flag is 1, the current uncompressed
452          // frame surface may appear in the list for the purpose of decoding the second field of a complementary
453          // reference field pair.
454          dxvaStructure.RefFrameList[i].AssociatedFlag = pPipeDesc->is_long_term[i] ? 1u : 0u;
455 
456          // dxvaStructure.RefFrameList[i].Index7Bits is handled by d3d12_video_decoder_refresh_dpb_active_references_h264
457 
458          // uint16_t FrameNumList[16];
459          // 	 FrameNumList
460          // For each entry in RefFrameList, the corresponding entry in FrameNumList
461          // contains the value of FrameNum or LongTermFrameIdx, depending on the value of
462          // AssociatedFlag in the RefFrameList entry. (FrameNum is assigned to short-term
463          // reference pictures, and LongTermFrameIdx is assigned to long-term reference
464          // pictures.)
465          // If an element in the list of frames is not relevent (for example, if the corresponding
466          // entry in RefFrameList is empty or is marked as "not used for reference"), the value
467          // of the FrameNumList entry shall be 0. Accelerators can rely on this constraint being
468          // fulfilled.
469          dxvaStructure.FrameNumList[i] = pPipeDesc->frame_num_list[i];
470 
471          // int32_t    FieldOrderCntList[16][2];
472          // Contains the picture order counts for the reference frames listed in RefFrameList.
473          // For each entry i in the RefFrameList array, FieldOrderCntList[i][0] contains the
474          // value of TopFieldOrderCnt for entry i, and FieldOrderCntList[i][1] contains the
475          // value of BottomFieldOrderCnt for entry i.
476          //
477          // If an element of the list is not relevent (for example, if the corresponding entry in
478          // RefFrameList is empty or is marked as "not used for reference"), the value of
479          // TopFieldOrderCnt or BottomFieldOrderCnt in FieldOrderCntList shall be 0.
480          // Accelerators can rely on this constraint being fulfilled.
481 
482          dxvaStructure.FieldOrderCntList[i][0] = pPipeDesc->field_order_cnt_list[i][0];
483          dxvaStructure.FieldOrderCntList[i][1] = pPipeDesc->field_order_cnt_list[i][1];
484 
485          // From DXVA spec
486          // UsedForReferenceFlags
487          // Contains two 1-bit flags for each entry in RefFrameList. For the ith entry in RefFrameList, the two flags
488          // are accessed as follows:  Flag1i = (UsedForReferenceFlags >> (2 * i)) & 1  Flag2i = (UsedForReferenceFlags
489          // >> (2 * i + 1)) & 1 If Flag1i is 1, the top field of frame number i is marked as "used for reference," as
490          // defined by the H.264/AVC specification. If Flag2i is 1, the bottom field of frame number i is marked as
491          // "used for reference." (Otherwise, if either flag is 0, that field is not marked as "used for reference.") If
492          // an element in the list of frames is not relevent (for example, if the corresponding entry in RefFrameList is
493          // empty), the value of both flags for that entry shall be 0. Accelerators may rely on this constraint being
494          // fulfilled.
495 
496          if (pPipeDesc->top_is_reference[i] || pPipeDesc->is_long_term[i]) {
497             dxvaStructure.UsedForReferenceFlags |= (1 << (2 * i));
498          }
499 
500          if (pPipeDesc->bottom_is_reference[i] || pPipeDesc->is_long_term[i]) {
501             dxvaStructure.UsedForReferenceFlags |= (1 << (2 * i + 1));
502          }
503       }
504    }
505 
506    // frame type (I, P, B, etc) is not included in pipeDesc data, let's try to derive it
507    // from the reference list...if frame doesn't use any references, it should be an I frame.
508    dxvaStructure.IntraPicFlag = !frameUsesAnyRefPicture;
509 
510    // uint8_t  pic_order_present_flag; /* Renamed to bottom_field_pic_order_in_frame_present_flag in newer standard
511    // versions. */
512    dxvaStructure.pic_order_present_flag = pPipeDesc->pps->bottom_field_pic_order_in_frame_present_flag;
513 
514    // Software decoders should be implemented, as soon as feasible, to set the value of
515    // Reserved16Bits to 3. The value 0 was previously assigned for uses prior to July 20,
516    // 2007. The value 1 was previously assigned for uses prior to October 12, 2007. The
517    // value 2 was previously assigned for uses prior to January 15, 2009. Software
518    // decoders shall not set Reserved16Bits to any value other than those listed here.
519    // Note Software decoders that set Reserved16Bits to 3 should ensure that any aspects of software decoder operation
520    // that were previously not in conformance with this version of the specification have been corrected in the current
521    // implementation. One particular aspect of conformance that should be checked is the ordering of quantization
522    // scaling list data, as specified in section 5.2. In addition, the ReservedIntraBit flag in the macroblock control
523    // buffer must use the semantics described in section 7.2 (this flag was previously reserved). The semantics of
524    // Index7Bits and RefPicList have also been clarified in updates to this specification.
525    dxvaStructure.Reserved16Bits = 3;
526 
527    // DXVA spec: Arbitrary number set by the host decoder to use as a tag in the status report
528    // feedback data. The value should not equal 0, and should be different in each call to
529    // Execute. For more information, see section 12.0, Status Report Data Structure.
530    dxvaStructure.StatusReportFeedbackNumber = frameNum;
531    assert(dxvaStructure.StatusReportFeedbackNumber > 0);
532 
533    // from DXVA spec
534    // ContinuationFlag
535    // If this flag is 1, the remainder of this structure is present in the buffer and contains valid values. If this
536    // flag is 0, the structure might be truncated at this point in the buffer, or the remaining fields may be set to 0
537    // and shall be ignored by the accelerator. The remaining members of this structure are needed only for off-host
538    // bitstream parsing. If the host decoder parses the bitstream, the decoder can truncate the picture parameters data
539    // structure buffer after the ContinuationFlag or set the remaining members to zero. uint8_t  ContinuationFlag;
540    dxvaStructure.ContinuationFlag =
541       1;   // DXVA destination struct does contain members from the slice section of pipeDesc...
542 
543    return dxvaStructure;
544 }
545 
546 void
d3d12_video_decoder_dxva_qmatrix_from_pipe_picparams_h264(pipe_h264_picture_desc * pPipeDesc,DXVA_Qmatrix_H264 & outMatrixBuffer)547 d3d12_video_decoder_dxva_qmatrix_from_pipe_picparams_h264(pipe_h264_picture_desc *pPipeDesc,
548                                                           DXVA_Qmatrix_H264 &outMatrixBuffer)
549 {
550    // Please note here that the matrices coming from the gallium VA frontend are copied from VAIQMatrixBufferH264
551    // which are specified in VAAPI as being in raster scan order (different than zigzag needed by DXVA)
552    // also please note that VAIQMatrixBufferH264.ScalingList8x8 is copied into the first two rows of
553    // pipe_h264_pps.ScalingList8x8 leaving the upper 4 rows of  pipe_h264_pps.ScalingList8x8[6][64] unmodified
554    // Finally, please note that other gallium frontends might decide to copy the scaling lists in other order
555    // and this section might have to be extended to add support for them.
556 
557    // In DXVA each scaling list is ordered in zig-zag scan order, convert them from raster scan order.
558    unsigned i, j;
559    for (i = 0; i < 6; i++) {
560       for (j = 0; j < 16; j++) {
561          outMatrixBuffer.bScalingLists4x4[i][j] = pPipeDesc->pps->ScalingList4x4[i][vl_zscan_normal_16[j]];
562       }
563    }
564    for (i = 0; i < 64; i++) {
565       outMatrixBuffer.bScalingLists8x8[0][i] = pPipeDesc->pps->ScalingList8x8[0][vl_zscan_normal[i]];
566       outMatrixBuffer.bScalingLists8x8[1][i] = pPipeDesc->pps->ScalingList8x8[1][vl_zscan_normal[i]];
567    }
568 }
569