xref: /aosp_15_r20/external/mesa3d/src/gallium/drivers/d3d12/d3d12_video_dec_hevc.cpp (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © Microsoft Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "d3d12_video_dec.h"
25 #include "d3d12_video_dec_hevc.h"
26 #include "d3d12_resource.h"
27 #include "d3d12_video_buffer.h"
28 #include <cmath>
29 
30 void
d3d12_video_decoder_refresh_dpb_active_references_hevc(struct d3d12_video_decoder * pD3D12Dec)31 d3d12_video_decoder_refresh_dpb_active_references_hevc(struct d3d12_video_decoder *pD3D12Dec)
32 {
33    // Method overview
34    // 1. Codec specific strategy in switch statement regarding reference frames eviction policy. Should only mark active
35    // DPB references, leaving evicted ones as unused
36    // 2. Call release_unused_references_texture_memory(); at the end of this method. Any references (and texture
37    // allocations associated)
38    //    that were left not marked as used in m_spDPBManager by step (2) are lost.
39 
40    // Assign DXVA original Index7Bits indices to current frame and references
41    DXVA_PicParams_HEVC *pCurrPicParams = d3d12_video_decoder_get_current_dxva_picparams<DXVA_PicParams_HEVC>(pD3D12Dec);
42    for (uint8_t i = 0; i < _countof(pCurrPicParams->RefPicList); i++) {
43       // From HEVC DXVA spec:
44       // Index7Bits
45       //     An index that identifies an uncompressed surface for the CurrPic or RefPicList member of the picture parameters structure (section 4.0).
46       //     When Index7Bits is used in the CurrPic and RefPicList members of the picture parameters structure, the value directly specifies the DXVA index of an uncompressed surface.
47       //     When Index7Bits is 127 (0x7F), this indicates that it does not contain a valid index.
48 
49       //     AssociatedFlag
50       //     Optional 1-bit flag associated with the surface. It specifies whether the reference picture is a long-term reference or a short-term reference for RefPicList, and it has no meaning when used for CurrPic.
51       //     bPicEntry
52       //     Accesses the entire 8 bits of the union.
53 
54       if (pCurrPicParams->RefPicList[i].bPicEntry != DXVA_HEVC_INVALID_PICTURE_ENTRY_VALUE) {
55          pCurrPicParams->RefPicList[i].Index7Bits =
56             pD3D12Dec->m_spDPBManager->get_index7bits(pD3D12Dec->m_pCurrentReferenceTargets[i]);
57       }
58    }
59 
60    pD3D12Dec->m_spDPBManager->mark_all_references_as_unused();
61    pD3D12Dec->m_spDPBManager->mark_references_in_use(pCurrPicParams->RefPicList);
62 
63    // Releases the underlying reference picture texture objects of all references that were not marked as used in this
64    // method.
65    pD3D12Dec->m_spDPBManager->release_unused_references_texture_memory();
66 
67    pCurrPicParams->CurrPic.Index7Bits = pD3D12Dec->m_spDPBManager->get_index7bits(pD3D12Dec->m_pCurrentDecodeTarget);
68 }
69 
70 inline int
LengthFromMinCb(int length,int cbsize)71 LengthFromMinCb(int length, int cbsize)
72 {
73    return length * (1 << cbsize);
74 }
75 
76 void
d3d12_video_decoder_get_frame_info_hevc(struct d3d12_video_decoder * pD3D12Dec,uint32_t * pWidth,uint32_t * pHeight,uint16_t * pMaxDPB)77 d3d12_video_decoder_get_frame_info_hevc(
78    struct d3d12_video_decoder *pD3D12Dec, uint32_t *pWidth, uint32_t *pHeight, uint16_t *pMaxDPB)
79 {
80    auto pPicParams = d3d12_video_decoder_get_current_dxva_picparams<DXVA_PicParams_HEVC>(pD3D12Dec);
81    UINT log2_min_luma_coding_block_size = pPicParams->log2_min_luma_coding_block_size_minus3 + 3;
82    *pWidth = LengthFromMinCb(pPicParams->PicWidthInMinCbsY, log2_min_luma_coding_block_size);
83    *pHeight = LengthFromMinCb(pPicParams->PicHeightInMinCbsY, log2_min_luma_coding_block_size);
84    *pMaxDPB = pPicParams->sps_max_dec_pic_buffering_minus1 + 1;
85 }
86 
87 ///
88 /// Pushes the current frame as next reference, updates the DXVA HEVC structure with the indices of the DPB and
89 /// transitions the references
90 ///
91 void
d3d12_video_decoder_prepare_current_frame_references_hevc(struct d3d12_video_decoder * pD3D12Dec,ID3D12Resource * pTexture2D,uint32_t subresourceIndex)92 d3d12_video_decoder_prepare_current_frame_references_hevc(struct d3d12_video_decoder *pD3D12Dec,
93                                                           ID3D12Resource *pTexture2D,
94                                                           uint32_t subresourceIndex)
95 {
96    DXVA_PicParams_HEVC *pPicParams = d3d12_video_decoder_get_current_dxva_picparams<DXVA_PicParams_HEVC>(pD3D12Dec);
97    pPicParams->CurrPic.Index7Bits = pD3D12Dec->m_spDPBManager->store_future_reference(pPicParams->CurrPic.Index7Bits,
98                                                                                       pD3D12Dec->m_spVideoDecoderHeap,
99                                                                                       pTexture2D,
100                                                                                       subresourceIndex);
101    // From HEVC DXVA spec:
102    // Index7Bits
103    //     An index that identifies an uncompressed surface for the CurrPic or RefPicList member of the picture parameters structure (section 4.0).
104    //     When Index7Bits is used in the CurrPic and RefPicList members of the picture parameters structure, the value directly specifies the DXVA index of an uncompressed surface.
105    //     When Index7Bits is 127 (0x7F), this indicates that it does not contain a valid index.
106 
107    pD3D12Dec->m_spDPBManager->update_entries(
108       d3d12_video_decoder_get_current_dxva_picparams<DXVA_PicParams_HEVC>(pD3D12Dec)->RefPicList,
109       pD3D12Dec->m_transitionsStorage);
110 
111    pD3D12Dec->m_spDecodeCommandList->ResourceBarrier(pD3D12Dec->m_transitionsStorage.size(), pD3D12Dec->m_transitionsStorage.data());
112 
113    // Schedule reverse (back to common) transitions before command list closes for current frame
114    for (auto BarrierDesc : pD3D12Dec->m_transitionsStorage) {
115       std::swap(BarrierDesc.Transition.StateBefore, BarrierDesc.Transition.StateAfter);
116       pD3D12Dec->m_transitionsBeforeCloseCmdList.push_back(BarrierDesc);
117    }
118 
119    debug_printf(
120       "[d3d12_video_decoder_prepare_current_frame_references_hevc] DXVA_PicParams_HEVC after index remapping)\n");
121    d3d12_video_decoder_log_pic_params_hevc(
122       d3d12_video_decoder_get_current_dxva_picparams<DXVA_PicParams_HEVC>(pD3D12Dec));
123 }
124 
125 void
d3d12_video_decoder_prepare_dxva_slices_control_hevc(struct d3d12_video_decoder * pD3D12Dec,std::vector<uint8_t> & vecOutSliceControlBuffers,struct pipe_h265_picture_desc * picture_hevc)126 d3d12_video_decoder_prepare_dxva_slices_control_hevc(struct d3d12_video_decoder *pD3D12Dec,
127                                                      std::vector<uint8_t> &vecOutSliceControlBuffers,
128                                                      struct pipe_h265_picture_desc *picture_hevc)
129 {
130 
131    if(!picture_hevc->slice_parameter.slice_info_present)
132    {
133       unreachable("Unsupported - need pipe_h265_picture_desc.slice_parameter.slice_info_present");
134    }
135 
136    debug_printf("[d3d12_video_decoder_hevc] Upper layer reported %d slices for this frame, parsing them below...\n",
137                   picture_hevc->slice_parameter.slice_count);
138 
139    uint64_t TotalSlicesDXVAArrayByteSize = picture_hevc->slice_parameter.slice_count * sizeof(DXVA_Slice_HEVC_Short);
140    vecOutSliceControlBuffers.resize(TotalSlicesDXVAArrayByteSize);
141 
142    uint8_t* pData = vecOutSliceControlBuffers.data();
143    static const uint32_t start_code_size = 3;
144    uint32_t acum_slice_offset = (picture_hevc->slice_parameter.slice_count > 0) ? picture_hevc->slice_parameter.slice_data_offset[0] : 0;
145    for (uint32_t sliceIdx = 0; sliceIdx < picture_hevc->slice_parameter.slice_count; sliceIdx++)
146    {
147       DXVA_Slice_HEVC_Short currentSliceEntry = {};
148       // From HEVC DXVA Spec
149       // wBadSliceChopping
150       // 0	All bits for the slice are located within the corresponding bitstream data buffer.
151       // 1	The bitstream data buffer contains the start of the slice, but not the entire slice, because the buffer is full.
152       // 2	The bitstream data buffer contains the end of the slice. It does not contain the start of the slice, because the start of the slice was located in the previous bitstream data buffer.
153       // 3	The bitstream data buffer does not contain the start of the slice (because the start of the slice was located in the previous bitstream data buffer),
154       //     and it does not contain the end of the slice (because the current bitstream data buffer is also full).
155 
156       switch (picture_hevc->slice_parameter.slice_data_flag[sliceIdx]) {
157          /* whole slice is in the buffer */
158          case PIPE_SLICE_BUFFER_PLACEMENT_TYPE_WHOLE:
159             currentSliceEntry.wBadSliceChopping = 0u;
160             break;
161          /* The beginning of the slice is in the buffer but the end is not */
162          case PIPE_SLICE_BUFFER_PLACEMENT_TYPE_BEGIN:
163             currentSliceEntry.wBadSliceChopping = 1u;
164             break;
165          /* Neither beginning nor end of the slice is in the buffer */
166          case PIPE_SLICE_BUFFER_PLACEMENT_TYPE_MIDDLE:
167             currentSliceEntry.wBadSliceChopping = 3u;
168             break;
169          /* end of the slice is in the buffer */
170          case PIPE_SLICE_BUFFER_PLACEMENT_TYPE_END:
171             currentSliceEntry.wBadSliceChopping = 2u;
172             break;
173          default:
174          {
175             unreachable("Unsupported pipe_slice_buffer_placement_type");
176          } break;
177       }
178 
179       /* slice_data_size from pipe/va does not include the NAL unit size, DXVA requires it */
180       currentSliceEntry.SliceBytesInBuffer = picture_hevc->slice_parameter.slice_data_size[sliceIdx] + start_code_size;
181 
182       /* slice_data_offset from pipe/va are relative to the current slice, and in DXVA they are absolute within the frame source buffer */
183       currentSliceEntry.BSNALunitDataLocation = acum_slice_offset;
184       acum_slice_offset += (currentSliceEntry.SliceBytesInBuffer + picture_hevc->slice_parameter.slice_data_offset[sliceIdx]);
185 
186       debug_printf("[d3d12_video_decoder_hevc] Detected slice index %" PRIu32 " with SliceBytesInBuffer %d - BSNALunitDataLocation %d - wBadSliceChopping: %" PRIu16
187                   " for frame with "
188                   "fenceValue: %d\n",
189                   sliceIdx,
190                   currentSliceEntry.SliceBytesInBuffer,
191                   currentSliceEntry.BSNALunitDataLocation,
192                   currentSliceEntry.wBadSliceChopping,
193                   pD3D12Dec->m_fenceValue);
194 
195       memcpy(pData, &currentSliceEntry, sizeof(DXVA_Slice_HEVC_Short));
196       pData += sizeof(DXVA_Slice_HEVC_Short);
197    }
198    assert(vecOutSliceControlBuffers.size() == TotalSlicesDXVAArrayByteSize);
199 }
200 
201 static void
d3d12_video_decoder_log_pic_entry_hevc(DXVA_PicEntry_HEVC & picEntry)202 d3d12_video_decoder_log_pic_entry_hevc(DXVA_PicEntry_HEVC &picEntry)
203 {
204    debug_printf("\t\tIndex7Bits: %d\n"
205                  "\t\tAssociatedFlag: %d\n"
206                  "\t\tbPicEntry: %d\n",
207                  picEntry.Index7Bits,
208                  picEntry.AssociatedFlag,
209                  picEntry.bPicEntry);
210 }
211 
212 void
d3d12_video_decoder_log_pic_params_hevc(DXVA_PicParams_HEVC * pPicParams)213 d3d12_video_decoder_log_pic_params_hevc(DXVA_PicParams_HEVC *pPicParams)
214 {
215    debug_printf("\n=============================================\n");
216    debug_printf("PicWidthInMinCbsY = %d\n", pPicParams->PicWidthInMinCbsY);
217    debug_printf("PicHeightInMinCbsY = %d\n", pPicParams->PicHeightInMinCbsY);
218    debug_printf("chroma_format_idc = %d\n", pPicParams->chroma_format_idc);
219    debug_printf("separate_colour_plane_flag = %d\n", pPicParams->separate_colour_plane_flag);
220    debug_printf("bit_depth_luma_minus8 = %d\n", pPicParams->bit_depth_luma_minus8);
221    debug_printf("bit_depth_chroma_minus8 = %d\n", pPicParams->bit_depth_chroma_minus8);
222    debug_printf("log2_max_pic_order_cnt_lsb_minus4 = %d\n", pPicParams->log2_max_pic_order_cnt_lsb_minus4);
223    debug_printf("NoPicReorderingFlag = %d\n", pPicParams->NoPicReorderingFlag);
224    debug_printf("NoBiPredFlag = %d\n", pPicParams->NoBiPredFlag);
225    debug_printf("ReservedBits1 = %d\n", pPicParams->ReservedBits1);
226    debug_printf("wFormatAndSequenceInfoFlags = %d\n", pPicParams->wFormatAndSequenceInfoFlags);
227    debug_printf("CurrPic.Index7Bits = %d\n", pPicParams->CurrPic.Index7Bits);
228    debug_printf("CurrPic.AssociatedFlag = %d\n", pPicParams->CurrPic.AssociatedFlag);
229    debug_printf("sps_max_dec_pic_buffering_minus1 = %d\n", pPicParams->sps_max_dec_pic_buffering_minus1);
230    debug_printf("log2_min_luma_coding_block_size_minus3 = %d\n", pPicParams->log2_min_luma_coding_block_size_minus3);
231    debug_printf("log2_diff_max_min_luma_coding_block_size = %d\n", pPicParams->log2_diff_max_min_luma_coding_block_size);
232    debug_printf("log2_min_transform_block_size_minus2 = %d\n", pPicParams->log2_min_transform_block_size_minus2);
233    debug_printf("log2_diff_max_min_transform_block_size = %d\n", pPicParams->log2_diff_max_min_transform_block_size);
234    debug_printf("max_transform_hierarchy_depth_inter = %d\n", pPicParams->max_transform_hierarchy_depth_inter);
235    debug_printf("max_transform_hierarchy_depth_intra = %d\n", pPicParams->max_transform_hierarchy_depth_intra);
236    debug_printf("num_short_term_ref_pic_sets = %d\n", pPicParams->num_short_term_ref_pic_sets);
237    debug_printf("num_long_term_ref_pics_sps = %d\n", pPicParams->num_long_term_ref_pics_sps);
238    debug_printf("num_ref_idx_l0_default_active_minus1 = %d\n", pPicParams->num_ref_idx_l0_default_active_minus1);
239    debug_printf("num_ref_idx_l1_default_active_minus1 = %d\n", pPicParams->num_ref_idx_l1_default_active_minus1);
240    debug_printf("init_qp_minus26 = %d\n", pPicParams->init_qp_minus26);
241    debug_printf("ucNumDeltaPocsOfRefRpsIdx = %d\n", pPicParams->ucNumDeltaPocsOfRefRpsIdx);
242    debug_printf("wNumBitsForShortTermRPSInSlice = %d\n", pPicParams->wNumBitsForShortTermRPSInSlice);
243    debug_printf("ReservedBits2 = %d\n", pPicParams->ReservedBits2);
244    debug_printf("scaling_list_enabled_flag = %d\n", pPicParams->scaling_list_enabled_flag);
245    debug_printf("amp_enabled_flag = %d\n", pPicParams->amp_enabled_flag);
246    debug_printf("sample_adaptive_offset_enabled_flag = %d\n", pPicParams->sample_adaptive_offset_enabled_flag);
247    debug_printf("pcm_enabled_flag = %d\n", pPicParams->pcm_enabled_flag);
248    debug_printf("pcm_sample_bit_depth_luma_minus1 = %d\n", pPicParams->pcm_sample_bit_depth_luma_minus1);
249    debug_printf("pcm_sample_bit_depth_chroma_minus1 = %d\n", pPicParams->pcm_sample_bit_depth_chroma_minus1);
250    debug_printf("log2_min_pcm_luma_coding_block_size_minus3 = %d\n", pPicParams->log2_min_pcm_luma_coding_block_size_minus3);
251    debug_printf("log2_diff_max_min_pcm_luma_coding_block_size = %d\n", pPicParams->log2_diff_max_min_pcm_luma_coding_block_size);
252    debug_printf("pcm_loop_filter_disabled_flag = %d\n", pPicParams->pcm_loop_filter_disabled_flag);
253    debug_printf("long_term_ref_pics_present_flag = %d\n", pPicParams->long_term_ref_pics_present_flag);
254    debug_printf("sps_temporal_mvp_enabled_flag = %d\n", pPicParams->sps_temporal_mvp_enabled_flag);
255    debug_printf("strong_intra_smoothing_enabled_flag = %d\n", pPicParams->strong_intra_smoothing_enabled_flag);
256    debug_printf("dependent_slice_segments_enabled_flag = %d\n", pPicParams->dependent_slice_segments_enabled_flag);
257    debug_printf("output_flag_present_flag = %d\n", pPicParams->output_flag_present_flag);
258    debug_printf("num_extra_slice_header_bits = %d\n", pPicParams->num_extra_slice_header_bits);
259    debug_printf("sign_data_hiding_enabled_flag = %d\n", pPicParams->sign_data_hiding_enabled_flag);
260    debug_printf("cabac_init_present_flag = %d\n", pPicParams->cabac_init_present_flag);
261    debug_printf("ReservedBits3 = %d\n", pPicParams->ReservedBits3);
262    debug_printf("dwCodingParamToolFlags = %d\n", pPicParams->dwCodingParamToolFlags);
263    debug_printf("constrained_intra_pred_flag = %d\n", pPicParams->constrained_intra_pred_flag);
264    debug_printf("transform_skip_enabled_flag = %d\n", pPicParams->transform_skip_enabled_flag);
265    debug_printf("cu_qp_delta_enabled_flag = %d\n", pPicParams->cu_qp_delta_enabled_flag);
266    debug_printf("pps_slice_chroma_qp_offsets_present_flag = %d\n", pPicParams->pps_slice_chroma_qp_offsets_present_flag);
267    debug_printf("weighted_pred_flag = %d\n", pPicParams->weighted_pred_flag);
268    debug_printf("weighted_bipred_flag = %d\n", pPicParams->weighted_bipred_flag);
269    debug_printf("transquant_bypass_enabled_flag = %d\n", pPicParams->transquant_bypass_enabled_flag);
270    debug_printf("tiles_enabled_flag = %d\n", pPicParams->tiles_enabled_flag);
271    debug_printf("entropy_coding_sync_enabled_flag = %d\n", pPicParams->entropy_coding_sync_enabled_flag);
272    debug_printf("uniform_spacing_flag = %d\n", pPicParams->uniform_spacing_flag);
273    debug_printf("loop_filter_across_tiles_enabled_flag = %d\n", pPicParams->loop_filter_across_tiles_enabled_flag);
274    debug_printf("pps_loop_filter_across_slices_enabled_flag = %d\n", pPicParams->pps_loop_filter_across_slices_enabled_flag);
275    debug_printf("deblocking_filter_override_enabled_flag = %d\n", pPicParams->deblocking_filter_override_enabled_flag);
276    debug_printf("pps_deblocking_filter_disabled_flag = %d\n", pPicParams->pps_deblocking_filter_disabled_flag);
277    debug_printf("lists_modification_present_flag = %d\n", pPicParams->lists_modification_present_flag);
278    debug_printf("slice_segment_header_extension_present_flag = %d\n", pPicParams->slice_segment_header_extension_present_flag);
279    debug_printf("IrapPicFlag = %d\n", pPicParams->IrapPicFlag);
280    debug_printf("IdrPicFlag = %d\n", pPicParams->IdrPicFlag);
281    debug_printf("IntraPicFlag = %d\n", pPicParams->IntraPicFlag);
282    debug_printf("ReservedBits4 = %d\n", pPicParams->ReservedBits4);
283    debug_printf("dwCodingSettingPicturePropertyFlags = %d\n", pPicParams->dwCodingSettingPicturePropertyFlags);
284    debug_printf("pps_cb_qp_offset = %d\n", pPicParams->pps_cb_qp_offset);
285    debug_printf("pps_cr_qp_offset = %d\n", pPicParams->pps_cr_qp_offset);
286    debug_printf("num_tile_columns_minus1 = %d\n", pPicParams->num_tile_columns_minus1);
287    debug_printf("num_tile_rows_minus1 = %d\n", pPicParams->num_tile_rows_minus1);
288    for (uint32_t i = 0; i < std::min((unsigned) pPicParams->num_tile_columns_minus1 + 1u, (unsigned) _countof(DXVA_PicParams_HEVC::column_width_minus1)); i++) {
289       debug_printf("column_width_minus1[%d]; = %d\n", i, pPicParams->column_width_minus1[i]);
290    }
291    for (uint32_t i = 0; i < std::min((unsigned) pPicParams->num_tile_rows_minus1 + 1u, (unsigned) _countof(DXVA_PicParams_HEVC::row_height_minus1)); i++) {
292       debug_printf("row_height_minus1[%d]; = %d\n", i, pPicParams->row_height_minus1[i]);
293    }
294    debug_printf("diff_cu_qp_delta_depth = %d\n", pPicParams->diff_cu_qp_delta_depth);
295    debug_printf("pps_beta_offset_div2 = %d\n", pPicParams->pps_beta_offset_div2);
296    debug_printf("pps_tc_offset_div2 = %d\n", pPicParams->pps_tc_offset_div2);
297    debug_printf("log2_parallel_merge_level_minus2 = %d\n", pPicParams->log2_parallel_merge_level_minus2);
298    debug_printf("CurrPicOrderCntVal = %d\n", pPicParams->CurrPicOrderCntVal);
299    debug_printf("ReservedBits5 = %d\n", pPicParams->ReservedBits5);
300    debug_printf("ReservedBits6 = %d\n", pPicParams->ReservedBits6);
301    debug_printf("ReservedBits7 = %d\n", pPicParams->ReservedBits7);
302    debug_printf("StatusReportFeedbackNumber = %d\n", pPicParams->StatusReportFeedbackNumber);
303 
304    debug_printf("[D3D12 Video Decoder HEVC DXVA PicParams info]\n"
305                  "\t[Current Picture Entry]\n");
306    d3d12_video_decoder_log_pic_entry_hevc(pPicParams->CurrPic);
307 
308    debug_printf("[D3D12 Video Decoder HEVC DXVA PicParams info]\n"
309                  "\t[Current Picture Reference sets, hiding entries with bPicEntry 0xFF]\n");
310 
311    for (uint32_t refIdx = 0; refIdx < _countof(DXVA_PicParams_HEVC::RefPicSetStCurrBefore); refIdx++) {
312       if(pPicParams->RefPicSetStCurrBefore[refIdx] != DXVA_HEVC_INVALID_PICTURE_ENTRY_VALUE) {
313          debug_printf("\tRefPicSetStCurrBefore[%d] = %d \n PicEntry RefPicList[%d]\n", refIdx, pPicParams->RefPicSetStCurrBefore[refIdx], pPicParams->RefPicSetStCurrBefore[refIdx]);
314          d3d12_video_decoder_log_pic_entry_hevc(pPicParams->RefPicList[pPicParams->RefPicSetStCurrBefore[refIdx]]);
315          debug_printf("\t\tPicOrderCntValList: %d\n",
316                      pPicParams->PicOrderCntValList[pPicParams->RefPicSetStCurrBefore[refIdx]]);
317       }
318    }
319    for (uint32_t refIdx = 0; refIdx < _countof(DXVA_PicParams_HEVC::RefPicSetStCurrAfter); refIdx++) {
320       if(pPicParams->RefPicSetStCurrAfter[refIdx] != DXVA_HEVC_INVALID_PICTURE_ENTRY_VALUE) {
321          debug_printf("\tRefPicSetStCurrAfter[%d] = %d \n PicEntry RefPicList[%d]\n", refIdx, pPicParams->RefPicSetStCurrAfter[refIdx], pPicParams->RefPicSetStCurrAfter[refIdx]);
322          d3d12_video_decoder_log_pic_entry_hevc(pPicParams->RefPicList[pPicParams->RefPicSetStCurrAfter[refIdx]]);
323          debug_printf("\t\tPicOrderCntValList: %d\n",
324                      pPicParams->PicOrderCntValList[pPicParams->RefPicSetStCurrAfter[refIdx]]);
325       }
326    }
327    for (uint32_t refIdx = 0; refIdx < _countof(DXVA_PicParams_HEVC::RefPicSetLtCurr); refIdx++) {
328       if(pPicParams->RefPicSetLtCurr[refIdx] != DXVA_HEVC_INVALID_PICTURE_ENTRY_VALUE) {
329          debug_printf("\tRefPicSetLtCurr[%d] = %d \n PicEntry RefPicList[%d]\n", refIdx, pPicParams->RefPicSetLtCurr[refIdx], pPicParams->RefPicSetLtCurr[refIdx]);
330          d3d12_video_decoder_log_pic_entry_hevc(pPicParams->RefPicList[pPicParams->RefPicSetLtCurr[refIdx]]);
331          debug_printf("\t\tPicOrderCntValList: %d\n",
332                      pPicParams->PicOrderCntValList[pPicParams->RefPicSetLtCurr[refIdx]]);
333       }
334    }
335 }
336 
337 void
d3d12_video_decoder_sort_rps_lists_by_refpoc(struct d3d12_video_decoder * pD3D12Dec,DXVA_PicParams_HEVC * pDXVAStruct,pipe_h265_picture_desc * pPipeDesc)338 d3d12_video_decoder_sort_rps_lists_by_refpoc(struct d3d12_video_decoder *pD3D12Dec, DXVA_PicParams_HEVC* pDXVAStruct, pipe_h265_picture_desc *pPipeDesc)
339 {
340    // Sort the RPS lists in pDXVAStruct in order by pPipeDesc->PicOrderCntVal for DXVA expectations.
341    // Both arrays have parallel indices
342 
343    pD3D12Dec->m_ReferencesConversionStorage.clear();
344    for (uint8_t i = 0; i < pPipeDesc->NumPocStCurrBefore; i++)
345       pD3D12Dec->m_ReferencesConversionStorage.push_back({ pDXVAStruct->RefPicSetStCurrBefore[i], pPipeDesc->PicOrderCntVal[pDXVAStruct->RefPicSetStCurrBefore[i]] });
346 
347    std::sort(std::begin(pD3D12Dec->m_ReferencesConversionStorage), std::end(pD3D12Dec->m_ReferencesConversionStorage),
348       [](d3d12_video_decoder_reference_poc_entry entryI, d3d12_video_decoder_reference_poc_entry entryJ)
349                                                     { return entryI.poc_value /*desc order*/ > entryJ.poc_value; });
350    for (uint8_t i = 0; i < pPipeDesc->NumPocStCurrBefore; i++)
351       pDXVAStruct->RefPicSetStCurrBefore[i] = pD3D12Dec->m_ReferencesConversionStorage[i].refpicset_index;
352 
353    pD3D12Dec->m_ReferencesConversionStorage.clear();
354    for (uint8_t i = 0; i < pPipeDesc->NumPocStCurrAfter; i++)
355       pD3D12Dec->m_ReferencesConversionStorage.push_back({ pDXVAStruct->RefPicSetStCurrAfter[i], pPipeDesc->PicOrderCntVal[pDXVAStruct->RefPicSetStCurrAfter[i]] });
356 
357    std::sort(std::begin(pD3D12Dec->m_ReferencesConversionStorage), std::end(pD3D12Dec->m_ReferencesConversionStorage),
358       [](d3d12_video_decoder_reference_poc_entry entryI, d3d12_video_decoder_reference_poc_entry entryJ)
359                                                     { return entryI.poc_value /*ascending order*/ < entryJ.poc_value; });
360    for (uint8_t i = 0; i < pPipeDesc->NumPocStCurrAfter; i++)
361       pDXVAStruct->RefPicSetStCurrAfter[i] = pD3D12Dec->m_ReferencesConversionStorage[i].refpicset_index;
362 
363    pD3D12Dec->m_ReferencesConversionStorage.clear();
364    for (uint8_t i = 0; i < pPipeDesc->NumPocLtCurr; i++)
365       pD3D12Dec->m_ReferencesConversionStorage.push_back({ pDXVAStruct->RefPicSetLtCurr[i], pPipeDesc->PicOrderCntVal[pDXVAStruct->RefPicSetLtCurr[i]] });
366 
367    // The ordering of RefPicSetLtCurr is unclear from the DXVA spec, might need to be changed
368    std::sort(std::begin(pD3D12Dec->m_ReferencesConversionStorage), std::end(pD3D12Dec->m_ReferencesConversionStorage),
369       [](d3d12_video_decoder_reference_poc_entry entryI, d3d12_video_decoder_reference_poc_entry entryJ)
370                                                     { return entryI.poc_value /*ascending order*/ < entryJ.poc_value; });
371    for (uint8_t i = 0; i < pPipeDesc->NumPocLtCurr; i++)
372       pDXVAStruct->RefPicSetLtCurr[i] = pD3D12Dec->m_ReferencesConversionStorage[i].refpicset_index;
373 }
374 
375 DXVA_PicParams_HEVC
d3d12_video_decoder_dxva_picparams_from_pipe_picparams_hevc(struct d3d12_video_decoder * pD3D12Dec,pipe_video_profile profile,pipe_h265_picture_desc * pPipeDesc)376 d3d12_video_decoder_dxva_picparams_from_pipe_picparams_hevc(
377    struct d3d12_video_decoder *pD3D12Dec,
378    pipe_video_profile profile,
379    pipe_h265_picture_desc *pPipeDesc)
380 {
381    uint32_t frameNum = pD3D12Dec->m_fenceValue;
382    pipe_h265_pps *pps = pPipeDesc->pps;
383    pipe_h265_sps *sps = pPipeDesc->pps->sps;
384 
385    DXVA_PicParams_HEVC dxvaStructure;
386    memset(&dxvaStructure, 0, sizeof(dxvaStructure));
387 
388    uint8_t log2_min_cb_size = sps->log2_min_luma_coding_block_size_minus3 + 3;
389    dxvaStructure.PicWidthInMinCbsY = sps->pic_width_in_luma_samples  >> log2_min_cb_size;
390    dxvaStructure.PicHeightInMinCbsY = sps->pic_height_in_luma_samples >> log2_min_cb_size;
391    dxvaStructure.chroma_format_idc = sps->chroma_format_idc;
392    dxvaStructure.separate_colour_plane_flag = sps->separate_colour_plane_flag;
393    dxvaStructure.bit_depth_luma_minus8 = sps->bit_depth_luma_minus8;
394    dxvaStructure.bit_depth_chroma_minus8 = sps->bit_depth_chroma_minus8;
395    dxvaStructure.log2_max_pic_order_cnt_lsb_minus4 = sps->log2_max_pic_order_cnt_lsb_minus4;
396    dxvaStructure.NoPicReorderingFlag = sps->no_pic_reordering_flag;
397    dxvaStructure.NoBiPredFlag = sps->no_bi_pred_flag;
398 
399    dxvaStructure.CurrPic.bPicEntry = 0; // No semantic for this flag in HEVC DXVA spec
400    // CurrPic.Index7Bits is handled by d3d12_video_decoder_refresh_dpb_active_references_hevc
401 
402    dxvaStructure.sps_max_dec_pic_buffering_minus1         = sps->sps_max_dec_pic_buffering_minus1;
403    dxvaStructure.log2_min_luma_coding_block_size_minus3   = sps->log2_min_luma_coding_block_size_minus3;
404    dxvaStructure.log2_diff_max_min_luma_coding_block_size = sps->log2_diff_max_min_luma_coding_block_size;
405    dxvaStructure.log2_min_transform_block_size_minus2     = sps->log2_min_transform_block_size_minus2;
406    dxvaStructure.log2_diff_max_min_transform_block_size   = sps->log2_diff_max_min_transform_block_size;
407    dxvaStructure.max_transform_hierarchy_depth_inter      = sps->max_transform_hierarchy_depth_inter;
408    dxvaStructure.max_transform_hierarchy_depth_intra      = sps->max_transform_hierarchy_depth_intra;
409    dxvaStructure.num_short_term_ref_pic_sets              = sps->num_short_term_ref_pic_sets;
410    dxvaStructure.num_long_term_ref_pics_sps               = sps->num_long_term_ref_pics_sps;
411 
412    dxvaStructure.num_ref_idx_l0_default_active_minus1     = pps->num_ref_idx_l0_default_active_minus1;
413    dxvaStructure.num_ref_idx_l1_default_active_minus1     = pps->num_ref_idx_l1_default_active_minus1;
414    dxvaStructure.init_qp_minus26                          = pps->init_qp_minus26;
415 
416    // NumDeltaPocsOfRefRpsIdx is not passed from VA to pipe, and VA doesn't have it defined in their va_dec_hevc header.
417    // DXVA drivers should use wNumBitsForShortTermRPSInSlice (st_rps_bits in VA) to derive the slice header info instead
418    dxvaStructure.ucNumDeltaPocsOfRefRpsIdx            = pPipeDesc->NumDeltaPocsOfRefRpsIdx;
419    dxvaStructure.wNumBitsForShortTermRPSInSlice = pps->st_rps_bits;
420 
421    dxvaStructure.scaling_list_enabled_flag = sps->scaling_list_enabled_flag;
422    dxvaStructure.amp_enabled_flag = sps->amp_enabled_flag;
423    dxvaStructure.sample_adaptive_offset_enabled_flag = sps->sample_adaptive_offset_enabled_flag;
424    dxvaStructure.pcm_enabled_flag = sps->pcm_enabled_flag;
425    dxvaStructure.pcm_sample_bit_depth_luma_minus1 = sps->pcm_sample_bit_depth_luma_minus1;
426    dxvaStructure.pcm_sample_bit_depth_chroma_minus1 = sps->pcm_sample_bit_depth_chroma_minus1;
427    dxvaStructure.log2_min_pcm_luma_coding_block_size_minus3 = sps->log2_min_pcm_luma_coding_block_size_minus3;
428    dxvaStructure.log2_diff_max_min_pcm_luma_coding_block_size = sps->log2_diff_max_min_pcm_luma_coding_block_size;
429    dxvaStructure.pcm_loop_filter_disabled_flag = sps->pcm_loop_filter_disabled_flag;
430    dxvaStructure.long_term_ref_pics_present_flag = sps->long_term_ref_pics_present_flag;
431    dxvaStructure.sps_temporal_mvp_enabled_flag = sps->sps_temporal_mvp_enabled_flag;
432    dxvaStructure.strong_intra_smoothing_enabled_flag = sps->strong_intra_smoothing_enabled_flag;
433    dxvaStructure.dependent_slice_segments_enabled_flag = pps->dependent_slice_segments_enabled_flag;
434    dxvaStructure.output_flag_present_flag = pps->output_flag_present_flag;
435    dxvaStructure.num_extra_slice_header_bits = pps->num_extra_slice_header_bits;
436    dxvaStructure.sign_data_hiding_enabled_flag = pps->sign_data_hiding_enabled_flag;
437    dxvaStructure.cabac_init_present_flag = pps->cabac_init_present_flag;
438    dxvaStructure.ReservedBits3 = 0;
439 
440    dxvaStructure.constrained_intra_pred_flag = pps->constrained_intra_pred_flag;
441    dxvaStructure.transform_skip_enabled_flag = pps->transform_skip_enabled_flag;
442    dxvaStructure.cu_qp_delta_enabled_flag = pps->cu_qp_delta_enabled_flag;
443    dxvaStructure.pps_slice_chroma_qp_offsets_present_flag = pps->pps_slice_chroma_qp_offsets_present_flag;
444    dxvaStructure.weighted_pred_flag = pps->weighted_pred_flag;
445    dxvaStructure.weighted_bipred_flag = pps->weighted_bipred_flag;
446    dxvaStructure.transquant_bypass_enabled_flag = pps->transquant_bypass_enabled_flag;
447    dxvaStructure.tiles_enabled_flag = pps->tiles_enabled_flag;
448    dxvaStructure.entropy_coding_sync_enabled_flag = pps->entropy_coding_sync_enabled_flag;
449    dxvaStructure.uniform_spacing_flag = pps->uniform_spacing_flag;
450    dxvaStructure.loop_filter_across_tiles_enabled_flag = (pps->tiles_enabled_flag ? pps->loop_filter_across_tiles_enabled_flag : 0);
451    dxvaStructure.pps_loop_filter_across_slices_enabled_flag = pps->pps_loop_filter_across_slices_enabled_flag;
452    dxvaStructure.deblocking_filter_override_enabled_flag = pps->deblocking_filter_override_enabled_flag;
453    dxvaStructure.pps_deblocking_filter_disabled_flag = pps->pps_deblocking_filter_disabled_flag;
454    dxvaStructure.lists_modification_present_flag = pps->lists_modification_present_flag;
455    dxvaStructure.slice_segment_header_extension_present_flag = pps->slice_segment_header_extension_present_flag;
456    dxvaStructure.IrapPicFlag = pPipeDesc->RAPPicFlag;
457    dxvaStructure.IdrPicFlag = pPipeDesc->IDRPicFlag;
458    dxvaStructure.IntraPicFlag = pPipeDesc->IntraPicFlag;
459    dxvaStructure.pps_cb_qp_offset            = pps->pps_cb_qp_offset;
460    dxvaStructure.pps_cr_qp_offset            = pps->pps_cr_qp_offset;
461    if (pps->tiles_enabled_flag) {
462       dxvaStructure.num_tile_columns_minus1 = pps->num_tile_columns_minus1;
463       dxvaStructure.num_tile_rows_minus1    = pps->num_tile_rows_minus1;
464       if (!pps->uniform_spacing_flag) {
465          for (uint8_t i = 0; i < _countof(dxvaStructure.column_width_minus1); i++)
466             dxvaStructure.column_width_minus1[i] = pps->column_width_minus1[i];
467 
468          for (uint8_t i = 0; i < _countof(dxvaStructure.row_height_minus1); i++)
469             dxvaStructure.row_height_minus1[i] = pps->row_height_minus1[i];
470       }
471    }
472    dxvaStructure.diff_cu_qp_delta_depth           = pps->diff_cu_qp_delta_depth;
473    dxvaStructure.pps_beta_offset_div2             = pps->pps_beta_offset_div2;
474    dxvaStructure.pps_tc_offset_div2               = pps->pps_tc_offset_div2;
475    dxvaStructure.log2_parallel_merge_level_minus2 = pps->log2_parallel_merge_level_minus2;
476    dxvaStructure.CurrPicOrderCntVal               = pPipeDesc->CurrPicOrderCntVal;
477 
478    // Update RefPicList with the DPB pictures to be kept alive for current or future frames
479    for (uint8_t refIdx = 0; refIdx < _countof(DXVA_PicParams_HEVC::PicOrderCntValList); refIdx++)
480    {
481       if (pPipeDesc->ref[refIdx] != nullptr) {
482          // Mark as used so d3d12_video_decoder_refresh_dpb_active_references_hevc will assign the correct Index7Bits
483          dxvaStructure.RefPicList[refIdx].Index7Bits = 0;
484          // Mark refpic as LTR if necessary.
485          dxvaStructure.RefPicList[refIdx].AssociatedFlag = pPipeDesc->IsLongTerm[refIdx] ? 1u : 0u;
486       }
487       else
488       {
489          dxvaStructure.RefPicList[refIdx].bPicEntry = DXVA_HEVC_INVALID_PICTURE_ENTRY_VALUE;
490       }
491    }
492 
493    // Copy POC values for the references
494    memcpy(dxvaStructure.PicOrderCntValList, pPipeDesc->PicOrderCntVal, sizeof(dxvaStructure.PicOrderCntValList));
495 
496    // Copy RPS Sets to DXVA or mark them as 0xFF if unused in current frame
497    for (uint8_t i = 0; i < DXVA_RPS_COUNT; i++) {
498       dxvaStructure.RefPicSetStCurrBefore[i] = (i < pPipeDesc->NumPocStCurrBefore) ? pPipeDesc->RefPicSetStCurrBefore[i] : DXVA_HEVC_INVALID_PICTURE_ENTRY_VALUE;
499       dxvaStructure.RefPicSetStCurrAfter[i] = (i < pPipeDesc->NumPocStCurrAfter) ? pPipeDesc->RefPicSetStCurrAfter[i] : DXVA_HEVC_INVALID_PICTURE_ENTRY_VALUE;
500       dxvaStructure.RefPicSetLtCurr[i] = (i < pPipeDesc->NumPocLtCurr) ? pPipeDesc->RefPicSetLtCurr[i] : DXVA_HEVC_INVALID_PICTURE_ENTRY_VALUE;
501    }
502 
503    // DXVA drivers expect these in POC order, VA/pipe sends them out of order.
504    d3d12_video_decoder_sort_rps_lists_by_refpoc(pD3D12Dec, &dxvaStructure, pPipeDesc);
505 
506    for (uint32_t refIdx = 0; refIdx < DXVA_RPS_COUNT; refIdx++) {
507       if ((refIdx < pPipeDesc->NumPocStCurrBefore) && (pPipeDesc->RefPicSetStCurrBefore[refIdx] != DXVA_HEVC_INVALID_PICTURE_ENTRY_VALUE)) {
508          debug_printf("pPipeDesc->RefPicSetStCurrBefore[%d]: %d (index into RefPicList) Refs[%d] pipe buffer ptr = %p - d3d12 resource %p POC: %d\n",
509             refIdx, pPipeDesc->RefPicSetStCurrBefore[refIdx], pPipeDesc->RefPicSetStCurrBefore[refIdx], pPipeDesc->ref[pPipeDesc->RefPicSetStCurrBefore[refIdx]],
510             d3d12_resource_resource(((struct d3d12_video_buffer *)(pPipeDesc->ref[pPipeDesc->RefPicSetStCurrBefore[refIdx]]))->texture),
511             dxvaStructure.PicOrderCntValList[pPipeDesc->RefPicSetStCurrBefore[refIdx]]);
512       }
513       if ((refIdx < pPipeDesc->NumPocStCurrAfter) && (pPipeDesc->RefPicSetStCurrAfter[refIdx] != DXVA_HEVC_INVALID_PICTURE_ENTRY_VALUE)) {
514          debug_printf("pPipeDesc->RefPicSetStCurrAfter[%d]: %d (index into RefPicList) Refs[%d] pipe buffer ptr = %p - d3d12 resource %p POC: %d\n",
515             refIdx, pPipeDesc->RefPicSetStCurrAfter[refIdx], pPipeDesc->RefPicSetStCurrAfter[refIdx], pPipeDesc->ref[pPipeDesc->RefPicSetStCurrAfter[refIdx]],
516             d3d12_resource_resource(((struct d3d12_video_buffer *)(pPipeDesc->ref[pPipeDesc->RefPicSetStCurrAfter[refIdx]]))->texture),
517             dxvaStructure.PicOrderCntValList[pPipeDesc->RefPicSetStCurrAfter[refIdx]]);
518       }
519       if ((refIdx < pPipeDesc->NumPocLtCurr) && (pPipeDesc->RefPicSetLtCurr[refIdx] != DXVA_HEVC_INVALID_PICTURE_ENTRY_VALUE)) {
520          debug_printf("pPipeDesc->RefPicSetLtCurr[%d]: %d (index into RefPicList) Refs[%d] pipe buffer ptr = %p - d3d12 resource %p POC: %d\n",
521             refIdx, pPipeDesc->RefPicSetLtCurr[refIdx], pPipeDesc->RefPicSetLtCurr[refIdx], pPipeDesc->ref[pPipeDesc->RefPicSetLtCurr[refIdx]],
522             d3d12_resource_resource(((struct d3d12_video_buffer *)(pPipeDesc->ref[pPipeDesc->RefPicSetLtCurr[refIdx]]))->texture),
523             dxvaStructure.PicOrderCntValList[pPipeDesc->RefPicSetLtCurr[refIdx]]);
524       }
525    }
526 
527    // DXVA spec: Arbitrary number set by the host decoder to use as a tag in the status report
528    // feedback data. The value should not equal 0, and should be different in each call to
529    // Execute. For more information, see section 12.0, Status Report Data Structure.
530    dxvaStructure.StatusReportFeedbackNumber = frameNum;
531    assert(dxvaStructure.StatusReportFeedbackNumber > 0);
532    return dxvaStructure;
533 }
534 
535 void
d3d12_video_decoder_dxva_qmatrix_from_pipe_picparams_hevc(pipe_h265_picture_desc * pPipeDesc,DXVA_Qmatrix_HEVC & outMatrixBuffer,bool & outScalingListEnabled)536 d3d12_video_decoder_dxva_qmatrix_from_pipe_picparams_hevc(pipe_h265_picture_desc *pPipeDesc,
537                                                           DXVA_Qmatrix_HEVC &outMatrixBuffer,
538                                                           bool &outScalingListEnabled)
539 {
540    // VA is already converting hevc scaling lists to zigzag order
541    // https://gitlab.freedesktop.org/mesa/mesa/-/commit/63dcfed81f011dae5ca68af3369433be28135415
542 
543    outScalingListEnabled = (pPipeDesc->pps->sps->scaling_list_enabled_flag != 0);
544    if (outScalingListEnabled) {
545       memcpy(outMatrixBuffer.ucScalingLists0, pPipeDesc->pps->sps->ScalingList4x4, 6 * 16);
546       memcpy(outMatrixBuffer.ucScalingLists1, pPipeDesc->pps->sps->ScalingList8x8, 6 * 64);
547       memcpy(outMatrixBuffer.ucScalingLists2, pPipeDesc->pps->sps->ScalingList16x16, 6 * 64);
548       memcpy(outMatrixBuffer.ucScalingLists3, pPipeDesc->pps->sps->ScalingList32x32, 2 * 64);
549       memcpy(outMatrixBuffer.ucScalingListDCCoefSizeID2, pPipeDesc->pps->sps->ScalingListDCCoeff16x16, 6);
550       memcpy(outMatrixBuffer.ucScalingListDCCoefSizeID3, pPipeDesc->pps->sps->ScalingListDCCoeff32x32, 2);
551    } else {
552       memset(&outMatrixBuffer, 0, sizeof(outMatrixBuffer));
553    }
554 }
555