1 /*
2 * Copyright © Microsoft Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "d3d12_video_dec.h"
25 #include "d3d12_video_dec_hevc.h"
26 #include "d3d12_resource.h"
27 #include "d3d12_video_buffer.h"
28 #include <cmath>
29
30 void
d3d12_video_decoder_refresh_dpb_active_references_hevc(struct d3d12_video_decoder * pD3D12Dec)31 d3d12_video_decoder_refresh_dpb_active_references_hevc(struct d3d12_video_decoder *pD3D12Dec)
32 {
33 // Method overview
34 // 1. Codec specific strategy in switch statement regarding reference frames eviction policy. Should only mark active
35 // DPB references, leaving evicted ones as unused
36 // 2. Call release_unused_references_texture_memory(); at the end of this method. Any references (and texture
37 // allocations associated)
38 // that were left not marked as used in m_spDPBManager by step (2) are lost.
39
40 // Assign DXVA original Index7Bits indices to current frame and references
41 DXVA_PicParams_HEVC *pCurrPicParams = d3d12_video_decoder_get_current_dxva_picparams<DXVA_PicParams_HEVC>(pD3D12Dec);
42 for (uint8_t i = 0; i < _countof(pCurrPicParams->RefPicList); i++) {
43 // From HEVC DXVA spec:
44 // Index7Bits
45 // An index that identifies an uncompressed surface for the CurrPic or RefPicList member of the picture parameters structure (section 4.0).
46 // When Index7Bits is used in the CurrPic and RefPicList members of the picture parameters structure, the value directly specifies the DXVA index of an uncompressed surface.
47 // When Index7Bits is 127 (0x7F), this indicates that it does not contain a valid index.
48
49 // AssociatedFlag
50 // Optional 1-bit flag associated with the surface. It specifies whether the reference picture is a long-term reference or a short-term reference for RefPicList, and it has no meaning when used for CurrPic.
51 // bPicEntry
52 // Accesses the entire 8 bits of the union.
53
54 if (pCurrPicParams->RefPicList[i].bPicEntry != DXVA_HEVC_INVALID_PICTURE_ENTRY_VALUE) {
55 pCurrPicParams->RefPicList[i].Index7Bits =
56 pD3D12Dec->m_spDPBManager->get_index7bits(pD3D12Dec->m_pCurrentReferenceTargets[i]);
57 }
58 }
59
60 pD3D12Dec->m_spDPBManager->mark_all_references_as_unused();
61 pD3D12Dec->m_spDPBManager->mark_references_in_use(pCurrPicParams->RefPicList);
62
63 // Releases the underlying reference picture texture objects of all references that were not marked as used in this
64 // method.
65 pD3D12Dec->m_spDPBManager->release_unused_references_texture_memory();
66
67 pCurrPicParams->CurrPic.Index7Bits = pD3D12Dec->m_spDPBManager->get_index7bits(pD3D12Dec->m_pCurrentDecodeTarget);
68 }
69
70 inline int
LengthFromMinCb(int length,int cbsize)71 LengthFromMinCb(int length, int cbsize)
72 {
73 return length * (1 << cbsize);
74 }
75
76 void
d3d12_video_decoder_get_frame_info_hevc(struct d3d12_video_decoder * pD3D12Dec,uint32_t * pWidth,uint32_t * pHeight,uint16_t * pMaxDPB)77 d3d12_video_decoder_get_frame_info_hevc(
78 struct d3d12_video_decoder *pD3D12Dec, uint32_t *pWidth, uint32_t *pHeight, uint16_t *pMaxDPB)
79 {
80 auto pPicParams = d3d12_video_decoder_get_current_dxva_picparams<DXVA_PicParams_HEVC>(pD3D12Dec);
81 UINT log2_min_luma_coding_block_size = pPicParams->log2_min_luma_coding_block_size_minus3 + 3;
82 *pWidth = LengthFromMinCb(pPicParams->PicWidthInMinCbsY, log2_min_luma_coding_block_size);
83 *pHeight = LengthFromMinCb(pPicParams->PicHeightInMinCbsY, log2_min_luma_coding_block_size);
84 *pMaxDPB = pPicParams->sps_max_dec_pic_buffering_minus1 + 1;
85 }
86
87 ///
88 /// Pushes the current frame as next reference, updates the DXVA HEVC structure with the indices of the DPB and
89 /// transitions the references
90 ///
91 void
d3d12_video_decoder_prepare_current_frame_references_hevc(struct d3d12_video_decoder * pD3D12Dec,ID3D12Resource * pTexture2D,uint32_t subresourceIndex)92 d3d12_video_decoder_prepare_current_frame_references_hevc(struct d3d12_video_decoder *pD3D12Dec,
93 ID3D12Resource *pTexture2D,
94 uint32_t subresourceIndex)
95 {
96 DXVA_PicParams_HEVC *pPicParams = d3d12_video_decoder_get_current_dxva_picparams<DXVA_PicParams_HEVC>(pD3D12Dec);
97 pPicParams->CurrPic.Index7Bits = pD3D12Dec->m_spDPBManager->store_future_reference(pPicParams->CurrPic.Index7Bits,
98 pD3D12Dec->m_spVideoDecoderHeap,
99 pTexture2D,
100 subresourceIndex);
101 // From HEVC DXVA spec:
102 // Index7Bits
103 // An index that identifies an uncompressed surface for the CurrPic or RefPicList member of the picture parameters structure (section 4.0).
104 // When Index7Bits is used in the CurrPic and RefPicList members of the picture parameters structure, the value directly specifies the DXVA index of an uncompressed surface.
105 // When Index7Bits is 127 (0x7F), this indicates that it does not contain a valid index.
106
107 pD3D12Dec->m_spDPBManager->update_entries(
108 d3d12_video_decoder_get_current_dxva_picparams<DXVA_PicParams_HEVC>(pD3D12Dec)->RefPicList,
109 pD3D12Dec->m_transitionsStorage);
110
111 pD3D12Dec->m_spDecodeCommandList->ResourceBarrier(pD3D12Dec->m_transitionsStorage.size(), pD3D12Dec->m_transitionsStorage.data());
112
113 // Schedule reverse (back to common) transitions before command list closes for current frame
114 for (auto BarrierDesc : pD3D12Dec->m_transitionsStorage) {
115 std::swap(BarrierDesc.Transition.StateBefore, BarrierDesc.Transition.StateAfter);
116 pD3D12Dec->m_transitionsBeforeCloseCmdList.push_back(BarrierDesc);
117 }
118
119 debug_printf(
120 "[d3d12_video_decoder_prepare_current_frame_references_hevc] DXVA_PicParams_HEVC after index remapping)\n");
121 d3d12_video_decoder_log_pic_params_hevc(
122 d3d12_video_decoder_get_current_dxva_picparams<DXVA_PicParams_HEVC>(pD3D12Dec));
123 }
124
125 void
d3d12_video_decoder_prepare_dxva_slices_control_hevc(struct d3d12_video_decoder * pD3D12Dec,std::vector<uint8_t> & vecOutSliceControlBuffers,struct pipe_h265_picture_desc * picture_hevc)126 d3d12_video_decoder_prepare_dxva_slices_control_hevc(struct d3d12_video_decoder *pD3D12Dec,
127 std::vector<uint8_t> &vecOutSliceControlBuffers,
128 struct pipe_h265_picture_desc *picture_hevc)
129 {
130
131 if(!picture_hevc->slice_parameter.slice_info_present)
132 {
133 unreachable("Unsupported - need pipe_h265_picture_desc.slice_parameter.slice_info_present");
134 }
135
136 debug_printf("[d3d12_video_decoder_hevc] Upper layer reported %d slices for this frame, parsing them below...\n",
137 picture_hevc->slice_parameter.slice_count);
138
139 uint64_t TotalSlicesDXVAArrayByteSize = picture_hevc->slice_parameter.slice_count * sizeof(DXVA_Slice_HEVC_Short);
140 vecOutSliceControlBuffers.resize(TotalSlicesDXVAArrayByteSize);
141
142 uint8_t* pData = vecOutSliceControlBuffers.data();
143 static const uint32_t start_code_size = 3;
144 uint32_t acum_slice_offset = (picture_hevc->slice_parameter.slice_count > 0) ? picture_hevc->slice_parameter.slice_data_offset[0] : 0;
145 for (uint32_t sliceIdx = 0; sliceIdx < picture_hevc->slice_parameter.slice_count; sliceIdx++)
146 {
147 DXVA_Slice_HEVC_Short currentSliceEntry = {};
148 // From HEVC DXVA Spec
149 // wBadSliceChopping
150 // 0 All bits for the slice are located within the corresponding bitstream data buffer.
151 // 1 The bitstream data buffer contains the start of the slice, but not the entire slice, because the buffer is full.
152 // 2 The bitstream data buffer contains the end of the slice. It does not contain the start of the slice, because the start of the slice was located in the previous bitstream data buffer.
153 // 3 The bitstream data buffer does not contain the start of the slice (because the start of the slice was located in the previous bitstream data buffer),
154 // and it does not contain the end of the slice (because the current bitstream data buffer is also full).
155
156 switch (picture_hevc->slice_parameter.slice_data_flag[sliceIdx]) {
157 /* whole slice is in the buffer */
158 case PIPE_SLICE_BUFFER_PLACEMENT_TYPE_WHOLE:
159 currentSliceEntry.wBadSliceChopping = 0u;
160 break;
161 /* The beginning of the slice is in the buffer but the end is not */
162 case PIPE_SLICE_BUFFER_PLACEMENT_TYPE_BEGIN:
163 currentSliceEntry.wBadSliceChopping = 1u;
164 break;
165 /* Neither beginning nor end of the slice is in the buffer */
166 case PIPE_SLICE_BUFFER_PLACEMENT_TYPE_MIDDLE:
167 currentSliceEntry.wBadSliceChopping = 3u;
168 break;
169 /* end of the slice is in the buffer */
170 case PIPE_SLICE_BUFFER_PLACEMENT_TYPE_END:
171 currentSliceEntry.wBadSliceChopping = 2u;
172 break;
173 default:
174 {
175 unreachable("Unsupported pipe_slice_buffer_placement_type");
176 } break;
177 }
178
179 /* slice_data_size from pipe/va does not include the NAL unit size, DXVA requires it */
180 currentSliceEntry.SliceBytesInBuffer = picture_hevc->slice_parameter.slice_data_size[sliceIdx] + start_code_size;
181
182 /* slice_data_offset from pipe/va are relative to the current slice, and in DXVA they are absolute within the frame source buffer */
183 currentSliceEntry.BSNALunitDataLocation = acum_slice_offset;
184 acum_slice_offset += (currentSliceEntry.SliceBytesInBuffer + picture_hevc->slice_parameter.slice_data_offset[sliceIdx]);
185
186 debug_printf("[d3d12_video_decoder_hevc] Detected slice index %" PRIu32 " with SliceBytesInBuffer %d - BSNALunitDataLocation %d - wBadSliceChopping: %" PRIu16
187 " for frame with "
188 "fenceValue: %d\n",
189 sliceIdx,
190 currentSliceEntry.SliceBytesInBuffer,
191 currentSliceEntry.BSNALunitDataLocation,
192 currentSliceEntry.wBadSliceChopping,
193 pD3D12Dec->m_fenceValue);
194
195 memcpy(pData, ¤tSliceEntry, sizeof(DXVA_Slice_HEVC_Short));
196 pData += sizeof(DXVA_Slice_HEVC_Short);
197 }
198 assert(vecOutSliceControlBuffers.size() == TotalSlicesDXVAArrayByteSize);
199 }
200
201 static void
d3d12_video_decoder_log_pic_entry_hevc(DXVA_PicEntry_HEVC & picEntry)202 d3d12_video_decoder_log_pic_entry_hevc(DXVA_PicEntry_HEVC &picEntry)
203 {
204 debug_printf("\t\tIndex7Bits: %d\n"
205 "\t\tAssociatedFlag: %d\n"
206 "\t\tbPicEntry: %d\n",
207 picEntry.Index7Bits,
208 picEntry.AssociatedFlag,
209 picEntry.bPicEntry);
210 }
211
212 void
d3d12_video_decoder_log_pic_params_hevc(DXVA_PicParams_HEVC * pPicParams)213 d3d12_video_decoder_log_pic_params_hevc(DXVA_PicParams_HEVC *pPicParams)
214 {
215 debug_printf("\n=============================================\n");
216 debug_printf("PicWidthInMinCbsY = %d\n", pPicParams->PicWidthInMinCbsY);
217 debug_printf("PicHeightInMinCbsY = %d\n", pPicParams->PicHeightInMinCbsY);
218 debug_printf("chroma_format_idc = %d\n", pPicParams->chroma_format_idc);
219 debug_printf("separate_colour_plane_flag = %d\n", pPicParams->separate_colour_plane_flag);
220 debug_printf("bit_depth_luma_minus8 = %d\n", pPicParams->bit_depth_luma_minus8);
221 debug_printf("bit_depth_chroma_minus8 = %d\n", pPicParams->bit_depth_chroma_minus8);
222 debug_printf("log2_max_pic_order_cnt_lsb_minus4 = %d\n", pPicParams->log2_max_pic_order_cnt_lsb_minus4);
223 debug_printf("NoPicReorderingFlag = %d\n", pPicParams->NoPicReorderingFlag);
224 debug_printf("NoBiPredFlag = %d\n", pPicParams->NoBiPredFlag);
225 debug_printf("ReservedBits1 = %d\n", pPicParams->ReservedBits1);
226 debug_printf("wFormatAndSequenceInfoFlags = %d\n", pPicParams->wFormatAndSequenceInfoFlags);
227 debug_printf("CurrPic.Index7Bits = %d\n", pPicParams->CurrPic.Index7Bits);
228 debug_printf("CurrPic.AssociatedFlag = %d\n", pPicParams->CurrPic.AssociatedFlag);
229 debug_printf("sps_max_dec_pic_buffering_minus1 = %d\n", pPicParams->sps_max_dec_pic_buffering_minus1);
230 debug_printf("log2_min_luma_coding_block_size_minus3 = %d\n", pPicParams->log2_min_luma_coding_block_size_minus3);
231 debug_printf("log2_diff_max_min_luma_coding_block_size = %d\n", pPicParams->log2_diff_max_min_luma_coding_block_size);
232 debug_printf("log2_min_transform_block_size_minus2 = %d\n", pPicParams->log2_min_transform_block_size_minus2);
233 debug_printf("log2_diff_max_min_transform_block_size = %d\n", pPicParams->log2_diff_max_min_transform_block_size);
234 debug_printf("max_transform_hierarchy_depth_inter = %d\n", pPicParams->max_transform_hierarchy_depth_inter);
235 debug_printf("max_transform_hierarchy_depth_intra = %d\n", pPicParams->max_transform_hierarchy_depth_intra);
236 debug_printf("num_short_term_ref_pic_sets = %d\n", pPicParams->num_short_term_ref_pic_sets);
237 debug_printf("num_long_term_ref_pics_sps = %d\n", pPicParams->num_long_term_ref_pics_sps);
238 debug_printf("num_ref_idx_l0_default_active_minus1 = %d\n", pPicParams->num_ref_idx_l0_default_active_minus1);
239 debug_printf("num_ref_idx_l1_default_active_minus1 = %d\n", pPicParams->num_ref_idx_l1_default_active_minus1);
240 debug_printf("init_qp_minus26 = %d\n", pPicParams->init_qp_minus26);
241 debug_printf("ucNumDeltaPocsOfRefRpsIdx = %d\n", pPicParams->ucNumDeltaPocsOfRefRpsIdx);
242 debug_printf("wNumBitsForShortTermRPSInSlice = %d\n", pPicParams->wNumBitsForShortTermRPSInSlice);
243 debug_printf("ReservedBits2 = %d\n", pPicParams->ReservedBits2);
244 debug_printf("scaling_list_enabled_flag = %d\n", pPicParams->scaling_list_enabled_flag);
245 debug_printf("amp_enabled_flag = %d\n", pPicParams->amp_enabled_flag);
246 debug_printf("sample_adaptive_offset_enabled_flag = %d\n", pPicParams->sample_adaptive_offset_enabled_flag);
247 debug_printf("pcm_enabled_flag = %d\n", pPicParams->pcm_enabled_flag);
248 debug_printf("pcm_sample_bit_depth_luma_minus1 = %d\n", pPicParams->pcm_sample_bit_depth_luma_minus1);
249 debug_printf("pcm_sample_bit_depth_chroma_minus1 = %d\n", pPicParams->pcm_sample_bit_depth_chroma_minus1);
250 debug_printf("log2_min_pcm_luma_coding_block_size_minus3 = %d\n", pPicParams->log2_min_pcm_luma_coding_block_size_minus3);
251 debug_printf("log2_diff_max_min_pcm_luma_coding_block_size = %d\n", pPicParams->log2_diff_max_min_pcm_luma_coding_block_size);
252 debug_printf("pcm_loop_filter_disabled_flag = %d\n", pPicParams->pcm_loop_filter_disabled_flag);
253 debug_printf("long_term_ref_pics_present_flag = %d\n", pPicParams->long_term_ref_pics_present_flag);
254 debug_printf("sps_temporal_mvp_enabled_flag = %d\n", pPicParams->sps_temporal_mvp_enabled_flag);
255 debug_printf("strong_intra_smoothing_enabled_flag = %d\n", pPicParams->strong_intra_smoothing_enabled_flag);
256 debug_printf("dependent_slice_segments_enabled_flag = %d\n", pPicParams->dependent_slice_segments_enabled_flag);
257 debug_printf("output_flag_present_flag = %d\n", pPicParams->output_flag_present_flag);
258 debug_printf("num_extra_slice_header_bits = %d\n", pPicParams->num_extra_slice_header_bits);
259 debug_printf("sign_data_hiding_enabled_flag = %d\n", pPicParams->sign_data_hiding_enabled_flag);
260 debug_printf("cabac_init_present_flag = %d\n", pPicParams->cabac_init_present_flag);
261 debug_printf("ReservedBits3 = %d\n", pPicParams->ReservedBits3);
262 debug_printf("dwCodingParamToolFlags = %d\n", pPicParams->dwCodingParamToolFlags);
263 debug_printf("constrained_intra_pred_flag = %d\n", pPicParams->constrained_intra_pred_flag);
264 debug_printf("transform_skip_enabled_flag = %d\n", pPicParams->transform_skip_enabled_flag);
265 debug_printf("cu_qp_delta_enabled_flag = %d\n", pPicParams->cu_qp_delta_enabled_flag);
266 debug_printf("pps_slice_chroma_qp_offsets_present_flag = %d\n", pPicParams->pps_slice_chroma_qp_offsets_present_flag);
267 debug_printf("weighted_pred_flag = %d\n", pPicParams->weighted_pred_flag);
268 debug_printf("weighted_bipred_flag = %d\n", pPicParams->weighted_bipred_flag);
269 debug_printf("transquant_bypass_enabled_flag = %d\n", pPicParams->transquant_bypass_enabled_flag);
270 debug_printf("tiles_enabled_flag = %d\n", pPicParams->tiles_enabled_flag);
271 debug_printf("entropy_coding_sync_enabled_flag = %d\n", pPicParams->entropy_coding_sync_enabled_flag);
272 debug_printf("uniform_spacing_flag = %d\n", pPicParams->uniform_spacing_flag);
273 debug_printf("loop_filter_across_tiles_enabled_flag = %d\n", pPicParams->loop_filter_across_tiles_enabled_flag);
274 debug_printf("pps_loop_filter_across_slices_enabled_flag = %d\n", pPicParams->pps_loop_filter_across_slices_enabled_flag);
275 debug_printf("deblocking_filter_override_enabled_flag = %d\n", pPicParams->deblocking_filter_override_enabled_flag);
276 debug_printf("pps_deblocking_filter_disabled_flag = %d\n", pPicParams->pps_deblocking_filter_disabled_flag);
277 debug_printf("lists_modification_present_flag = %d\n", pPicParams->lists_modification_present_flag);
278 debug_printf("slice_segment_header_extension_present_flag = %d\n", pPicParams->slice_segment_header_extension_present_flag);
279 debug_printf("IrapPicFlag = %d\n", pPicParams->IrapPicFlag);
280 debug_printf("IdrPicFlag = %d\n", pPicParams->IdrPicFlag);
281 debug_printf("IntraPicFlag = %d\n", pPicParams->IntraPicFlag);
282 debug_printf("ReservedBits4 = %d\n", pPicParams->ReservedBits4);
283 debug_printf("dwCodingSettingPicturePropertyFlags = %d\n", pPicParams->dwCodingSettingPicturePropertyFlags);
284 debug_printf("pps_cb_qp_offset = %d\n", pPicParams->pps_cb_qp_offset);
285 debug_printf("pps_cr_qp_offset = %d\n", pPicParams->pps_cr_qp_offset);
286 debug_printf("num_tile_columns_minus1 = %d\n", pPicParams->num_tile_columns_minus1);
287 debug_printf("num_tile_rows_minus1 = %d\n", pPicParams->num_tile_rows_minus1);
288 for (uint32_t i = 0; i < std::min((unsigned) pPicParams->num_tile_columns_minus1 + 1u, (unsigned) _countof(DXVA_PicParams_HEVC::column_width_minus1)); i++) {
289 debug_printf("column_width_minus1[%d]; = %d\n", i, pPicParams->column_width_minus1[i]);
290 }
291 for (uint32_t i = 0; i < std::min((unsigned) pPicParams->num_tile_rows_minus1 + 1u, (unsigned) _countof(DXVA_PicParams_HEVC::row_height_minus1)); i++) {
292 debug_printf("row_height_minus1[%d]; = %d\n", i, pPicParams->row_height_minus1[i]);
293 }
294 debug_printf("diff_cu_qp_delta_depth = %d\n", pPicParams->diff_cu_qp_delta_depth);
295 debug_printf("pps_beta_offset_div2 = %d\n", pPicParams->pps_beta_offset_div2);
296 debug_printf("pps_tc_offset_div2 = %d\n", pPicParams->pps_tc_offset_div2);
297 debug_printf("log2_parallel_merge_level_minus2 = %d\n", pPicParams->log2_parallel_merge_level_minus2);
298 debug_printf("CurrPicOrderCntVal = %d\n", pPicParams->CurrPicOrderCntVal);
299 debug_printf("ReservedBits5 = %d\n", pPicParams->ReservedBits5);
300 debug_printf("ReservedBits6 = %d\n", pPicParams->ReservedBits6);
301 debug_printf("ReservedBits7 = %d\n", pPicParams->ReservedBits7);
302 debug_printf("StatusReportFeedbackNumber = %d\n", pPicParams->StatusReportFeedbackNumber);
303
304 debug_printf("[D3D12 Video Decoder HEVC DXVA PicParams info]\n"
305 "\t[Current Picture Entry]\n");
306 d3d12_video_decoder_log_pic_entry_hevc(pPicParams->CurrPic);
307
308 debug_printf("[D3D12 Video Decoder HEVC DXVA PicParams info]\n"
309 "\t[Current Picture Reference sets, hiding entries with bPicEntry 0xFF]\n");
310
311 for (uint32_t refIdx = 0; refIdx < _countof(DXVA_PicParams_HEVC::RefPicSetStCurrBefore); refIdx++) {
312 if(pPicParams->RefPicSetStCurrBefore[refIdx] != DXVA_HEVC_INVALID_PICTURE_ENTRY_VALUE) {
313 debug_printf("\tRefPicSetStCurrBefore[%d] = %d \n PicEntry RefPicList[%d]\n", refIdx, pPicParams->RefPicSetStCurrBefore[refIdx], pPicParams->RefPicSetStCurrBefore[refIdx]);
314 d3d12_video_decoder_log_pic_entry_hevc(pPicParams->RefPicList[pPicParams->RefPicSetStCurrBefore[refIdx]]);
315 debug_printf("\t\tPicOrderCntValList: %d\n",
316 pPicParams->PicOrderCntValList[pPicParams->RefPicSetStCurrBefore[refIdx]]);
317 }
318 }
319 for (uint32_t refIdx = 0; refIdx < _countof(DXVA_PicParams_HEVC::RefPicSetStCurrAfter); refIdx++) {
320 if(pPicParams->RefPicSetStCurrAfter[refIdx] != DXVA_HEVC_INVALID_PICTURE_ENTRY_VALUE) {
321 debug_printf("\tRefPicSetStCurrAfter[%d] = %d \n PicEntry RefPicList[%d]\n", refIdx, pPicParams->RefPicSetStCurrAfter[refIdx], pPicParams->RefPicSetStCurrAfter[refIdx]);
322 d3d12_video_decoder_log_pic_entry_hevc(pPicParams->RefPicList[pPicParams->RefPicSetStCurrAfter[refIdx]]);
323 debug_printf("\t\tPicOrderCntValList: %d\n",
324 pPicParams->PicOrderCntValList[pPicParams->RefPicSetStCurrAfter[refIdx]]);
325 }
326 }
327 for (uint32_t refIdx = 0; refIdx < _countof(DXVA_PicParams_HEVC::RefPicSetLtCurr); refIdx++) {
328 if(pPicParams->RefPicSetLtCurr[refIdx] != DXVA_HEVC_INVALID_PICTURE_ENTRY_VALUE) {
329 debug_printf("\tRefPicSetLtCurr[%d] = %d \n PicEntry RefPicList[%d]\n", refIdx, pPicParams->RefPicSetLtCurr[refIdx], pPicParams->RefPicSetLtCurr[refIdx]);
330 d3d12_video_decoder_log_pic_entry_hevc(pPicParams->RefPicList[pPicParams->RefPicSetLtCurr[refIdx]]);
331 debug_printf("\t\tPicOrderCntValList: %d\n",
332 pPicParams->PicOrderCntValList[pPicParams->RefPicSetLtCurr[refIdx]]);
333 }
334 }
335 }
336
337 void
d3d12_video_decoder_sort_rps_lists_by_refpoc(struct d3d12_video_decoder * pD3D12Dec,DXVA_PicParams_HEVC * pDXVAStruct,pipe_h265_picture_desc * pPipeDesc)338 d3d12_video_decoder_sort_rps_lists_by_refpoc(struct d3d12_video_decoder *pD3D12Dec, DXVA_PicParams_HEVC* pDXVAStruct, pipe_h265_picture_desc *pPipeDesc)
339 {
340 // Sort the RPS lists in pDXVAStruct in order by pPipeDesc->PicOrderCntVal for DXVA expectations.
341 // Both arrays have parallel indices
342
343 pD3D12Dec->m_ReferencesConversionStorage.clear();
344 for (uint8_t i = 0; i < pPipeDesc->NumPocStCurrBefore; i++)
345 pD3D12Dec->m_ReferencesConversionStorage.push_back({ pDXVAStruct->RefPicSetStCurrBefore[i], pPipeDesc->PicOrderCntVal[pDXVAStruct->RefPicSetStCurrBefore[i]] });
346
347 std::sort(std::begin(pD3D12Dec->m_ReferencesConversionStorage), std::end(pD3D12Dec->m_ReferencesConversionStorage),
348 [](d3d12_video_decoder_reference_poc_entry entryI, d3d12_video_decoder_reference_poc_entry entryJ)
349 { return entryI.poc_value /*desc order*/ > entryJ.poc_value; });
350 for (uint8_t i = 0; i < pPipeDesc->NumPocStCurrBefore; i++)
351 pDXVAStruct->RefPicSetStCurrBefore[i] = pD3D12Dec->m_ReferencesConversionStorage[i].refpicset_index;
352
353 pD3D12Dec->m_ReferencesConversionStorage.clear();
354 for (uint8_t i = 0; i < pPipeDesc->NumPocStCurrAfter; i++)
355 pD3D12Dec->m_ReferencesConversionStorage.push_back({ pDXVAStruct->RefPicSetStCurrAfter[i], pPipeDesc->PicOrderCntVal[pDXVAStruct->RefPicSetStCurrAfter[i]] });
356
357 std::sort(std::begin(pD3D12Dec->m_ReferencesConversionStorage), std::end(pD3D12Dec->m_ReferencesConversionStorage),
358 [](d3d12_video_decoder_reference_poc_entry entryI, d3d12_video_decoder_reference_poc_entry entryJ)
359 { return entryI.poc_value /*ascending order*/ < entryJ.poc_value; });
360 for (uint8_t i = 0; i < pPipeDesc->NumPocStCurrAfter; i++)
361 pDXVAStruct->RefPicSetStCurrAfter[i] = pD3D12Dec->m_ReferencesConversionStorage[i].refpicset_index;
362
363 pD3D12Dec->m_ReferencesConversionStorage.clear();
364 for (uint8_t i = 0; i < pPipeDesc->NumPocLtCurr; i++)
365 pD3D12Dec->m_ReferencesConversionStorage.push_back({ pDXVAStruct->RefPicSetLtCurr[i], pPipeDesc->PicOrderCntVal[pDXVAStruct->RefPicSetLtCurr[i]] });
366
367 // The ordering of RefPicSetLtCurr is unclear from the DXVA spec, might need to be changed
368 std::sort(std::begin(pD3D12Dec->m_ReferencesConversionStorage), std::end(pD3D12Dec->m_ReferencesConversionStorage),
369 [](d3d12_video_decoder_reference_poc_entry entryI, d3d12_video_decoder_reference_poc_entry entryJ)
370 { return entryI.poc_value /*ascending order*/ < entryJ.poc_value; });
371 for (uint8_t i = 0; i < pPipeDesc->NumPocLtCurr; i++)
372 pDXVAStruct->RefPicSetLtCurr[i] = pD3D12Dec->m_ReferencesConversionStorage[i].refpicset_index;
373 }
374
375 DXVA_PicParams_HEVC
d3d12_video_decoder_dxva_picparams_from_pipe_picparams_hevc(struct d3d12_video_decoder * pD3D12Dec,pipe_video_profile profile,pipe_h265_picture_desc * pPipeDesc)376 d3d12_video_decoder_dxva_picparams_from_pipe_picparams_hevc(
377 struct d3d12_video_decoder *pD3D12Dec,
378 pipe_video_profile profile,
379 pipe_h265_picture_desc *pPipeDesc)
380 {
381 uint32_t frameNum = pD3D12Dec->m_fenceValue;
382 pipe_h265_pps *pps = pPipeDesc->pps;
383 pipe_h265_sps *sps = pPipeDesc->pps->sps;
384
385 DXVA_PicParams_HEVC dxvaStructure;
386 memset(&dxvaStructure, 0, sizeof(dxvaStructure));
387
388 uint8_t log2_min_cb_size = sps->log2_min_luma_coding_block_size_minus3 + 3;
389 dxvaStructure.PicWidthInMinCbsY = sps->pic_width_in_luma_samples >> log2_min_cb_size;
390 dxvaStructure.PicHeightInMinCbsY = sps->pic_height_in_luma_samples >> log2_min_cb_size;
391 dxvaStructure.chroma_format_idc = sps->chroma_format_idc;
392 dxvaStructure.separate_colour_plane_flag = sps->separate_colour_plane_flag;
393 dxvaStructure.bit_depth_luma_minus8 = sps->bit_depth_luma_minus8;
394 dxvaStructure.bit_depth_chroma_minus8 = sps->bit_depth_chroma_minus8;
395 dxvaStructure.log2_max_pic_order_cnt_lsb_minus4 = sps->log2_max_pic_order_cnt_lsb_minus4;
396 dxvaStructure.NoPicReorderingFlag = sps->no_pic_reordering_flag;
397 dxvaStructure.NoBiPredFlag = sps->no_bi_pred_flag;
398
399 dxvaStructure.CurrPic.bPicEntry = 0; // No semantic for this flag in HEVC DXVA spec
400 // CurrPic.Index7Bits is handled by d3d12_video_decoder_refresh_dpb_active_references_hevc
401
402 dxvaStructure.sps_max_dec_pic_buffering_minus1 = sps->sps_max_dec_pic_buffering_minus1;
403 dxvaStructure.log2_min_luma_coding_block_size_minus3 = sps->log2_min_luma_coding_block_size_minus3;
404 dxvaStructure.log2_diff_max_min_luma_coding_block_size = sps->log2_diff_max_min_luma_coding_block_size;
405 dxvaStructure.log2_min_transform_block_size_minus2 = sps->log2_min_transform_block_size_minus2;
406 dxvaStructure.log2_diff_max_min_transform_block_size = sps->log2_diff_max_min_transform_block_size;
407 dxvaStructure.max_transform_hierarchy_depth_inter = sps->max_transform_hierarchy_depth_inter;
408 dxvaStructure.max_transform_hierarchy_depth_intra = sps->max_transform_hierarchy_depth_intra;
409 dxvaStructure.num_short_term_ref_pic_sets = sps->num_short_term_ref_pic_sets;
410 dxvaStructure.num_long_term_ref_pics_sps = sps->num_long_term_ref_pics_sps;
411
412 dxvaStructure.num_ref_idx_l0_default_active_minus1 = pps->num_ref_idx_l0_default_active_minus1;
413 dxvaStructure.num_ref_idx_l1_default_active_minus1 = pps->num_ref_idx_l1_default_active_minus1;
414 dxvaStructure.init_qp_minus26 = pps->init_qp_minus26;
415
416 // NumDeltaPocsOfRefRpsIdx is not passed from VA to pipe, and VA doesn't have it defined in their va_dec_hevc header.
417 // DXVA drivers should use wNumBitsForShortTermRPSInSlice (st_rps_bits in VA) to derive the slice header info instead
418 dxvaStructure.ucNumDeltaPocsOfRefRpsIdx = pPipeDesc->NumDeltaPocsOfRefRpsIdx;
419 dxvaStructure.wNumBitsForShortTermRPSInSlice = pps->st_rps_bits;
420
421 dxvaStructure.scaling_list_enabled_flag = sps->scaling_list_enabled_flag;
422 dxvaStructure.amp_enabled_flag = sps->amp_enabled_flag;
423 dxvaStructure.sample_adaptive_offset_enabled_flag = sps->sample_adaptive_offset_enabled_flag;
424 dxvaStructure.pcm_enabled_flag = sps->pcm_enabled_flag;
425 dxvaStructure.pcm_sample_bit_depth_luma_minus1 = sps->pcm_sample_bit_depth_luma_minus1;
426 dxvaStructure.pcm_sample_bit_depth_chroma_minus1 = sps->pcm_sample_bit_depth_chroma_minus1;
427 dxvaStructure.log2_min_pcm_luma_coding_block_size_minus3 = sps->log2_min_pcm_luma_coding_block_size_minus3;
428 dxvaStructure.log2_diff_max_min_pcm_luma_coding_block_size = sps->log2_diff_max_min_pcm_luma_coding_block_size;
429 dxvaStructure.pcm_loop_filter_disabled_flag = sps->pcm_loop_filter_disabled_flag;
430 dxvaStructure.long_term_ref_pics_present_flag = sps->long_term_ref_pics_present_flag;
431 dxvaStructure.sps_temporal_mvp_enabled_flag = sps->sps_temporal_mvp_enabled_flag;
432 dxvaStructure.strong_intra_smoothing_enabled_flag = sps->strong_intra_smoothing_enabled_flag;
433 dxvaStructure.dependent_slice_segments_enabled_flag = pps->dependent_slice_segments_enabled_flag;
434 dxvaStructure.output_flag_present_flag = pps->output_flag_present_flag;
435 dxvaStructure.num_extra_slice_header_bits = pps->num_extra_slice_header_bits;
436 dxvaStructure.sign_data_hiding_enabled_flag = pps->sign_data_hiding_enabled_flag;
437 dxvaStructure.cabac_init_present_flag = pps->cabac_init_present_flag;
438 dxvaStructure.ReservedBits3 = 0;
439
440 dxvaStructure.constrained_intra_pred_flag = pps->constrained_intra_pred_flag;
441 dxvaStructure.transform_skip_enabled_flag = pps->transform_skip_enabled_flag;
442 dxvaStructure.cu_qp_delta_enabled_flag = pps->cu_qp_delta_enabled_flag;
443 dxvaStructure.pps_slice_chroma_qp_offsets_present_flag = pps->pps_slice_chroma_qp_offsets_present_flag;
444 dxvaStructure.weighted_pred_flag = pps->weighted_pred_flag;
445 dxvaStructure.weighted_bipred_flag = pps->weighted_bipred_flag;
446 dxvaStructure.transquant_bypass_enabled_flag = pps->transquant_bypass_enabled_flag;
447 dxvaStructure.tiles_enabled_flag = pps->tiles_enabled_flag;
448 dxvaStructure.entropy_coding_sync_enabled_flag = pps->entropy_coding_sync_enabled_flag;
449 dxvaStructure.uniform_spacing_flag = pps->uniform_spacing_flag;
450 dxvaStructure.loop_filter_across_tiles_enabled_flag = (pps->tiles_enabled_flag ? pps->loop_filter_across_tiles_enabled_flag : 0);
451 dxvaStructure.pps_loop_filter_across_slices_enabled_flag = pps->pps_loop_filter_across_slices_enabled_flag;
452 dxvaStructure.deblocking_filter_override_enabled_flag = pps->deblocking_filter_override_enabled_flag;
453 dxvaStructure.pps_deblocking_filter_disabled_flag = pps->pps_deblocking_filter_disabled_flag;
454 dxvaStructure.lists_modification_present_flag = pps->lists_modification_present_flag;
455 dxvaStructure.slice_segment_header_extension_present_flag = pps->slice_segment_header_extension_present_flag;
456 dxvaStructure.IrapPicFlag = pPipeDesc->RAPPicFlag;
457 dxvaStructure.IdrPicFlag = pPipeDesc->IDRPicFlag;
458 dxvaStructure.IntraPicFlag = pPipeDesc->IntraPicFlag;
459 dxvaStructure.pps_cb_qp_offset = pps->pps_cb_qp_offset;
460 dxvaStructure.pps_cr_qp_offset = pps->pps_cr_qp_offset;
461 if (pps->tiles_enabled_flag) {
462 dxvaStructure.num_tile_columns_minus1 = pps->num_tile_columns_minus1;
463 dxvaStructure.num_tile_rows_minus1 = pps->num_tile_rows_minus1;
464 if (!pps->uniform_spacing_flag) {
465 for (uint8_t i = 0; i < _countof(dxvaStructure.column_width_minus1); i++)
466 dxvaStructure.column_width_minus1[i] = pps->column_width_minus1[i];
467
468 for (uint8_t i = 0; i < _countof(dxvaStructure.row_height_minus1); i++)
469 dxvaStructure.row_height_minus1[i] = pps->row_height_minus1[i];
470 }
471 }
472 dxvaStructure.diff_cu_qp_delta_depth = pps->diff_cu_qp_delta_depth;
473 dxvaStructure.pps_beta_offset_div2 = pps->pps_beta_offset_div2;
474 dxvaStructure.pps_tc_offset_div2 = pps->pps_tc_offset_div2;
475 dxvaStructure.log2_parallel_merge_level_minus2 = pps->log2_parallel_merge_level_minus2;
476 dxvaStructure.CurrPicOrderCntVal = pPipeDesc->CurrPicOrderCntVal;
477
478 // Update RefPicList with the DPB pictures to be kept alive for current or future frames
479 for (uint8_t refIdx = 0; refIdx < _countof(DXVA_PicParams_HEVC::PicOrderCntValList); refIdx++)
480 {
481 if (pPipeDesc->ref[refIdx] != nullptr) {
482 // Mark as used so d3d12_video_decoder_refresh_dpb_active_references_hevc will assign the correct Index7Bits
483 dxvaStructure.RefPicList[refIdx].Index7Bits = 0;
484 // Mark refpic as LTR if necessary.
485 dxvaStructure.RefPicList[refIdx].AssociatedFlag = pPipeDesc->IsLongTerm[refIdx] ? 1u : 0u;
486 }
487 else
488 {
489 dxvaStructure.RefPicList[refIdx].bPicEntry = DXVA_HEVC_INVALID_PICTURE_ENTRY_VALUE;
490 }
491 }
492
493 // Copy POC values for the references
494 memcpy(dxvaStructure.PicOrderCntValList, pPipeDesc->PicOrderCntVal, sizeof(dxvaStructure.PicOrderCntValList));
495
496 // Copy RPS Sets to DXVA or mark them as 0xFF if unused in current frame
497 for (uint8_t i = 0; i < DXVA_RPS_COUNT; i++) {
498 dxvaStructure.RefPicSetStCurrBefore[i] = (i < pPipeDesc->NumPocStCurrBefore) ? pPipeDesc->RefPicSetStCurrBefore[i] : DXVA_HEVC_INVALID_PICTURE_ENTRY_VALUE;
499 dxvaStructure.RefPicSetStCurrAfter[i] = (i < pPipeDesc->NumPocStCurrAfter) ? pPipeDesc->RefPicSetStCurrAfter[i] : DXVA_HEVC_INVALID_PICTURE_ENTRY_VALUE;
500 dxvaStructure.RefPicSetLtCurr[i] = (i < pPipeDesc->NumPocLtCurr) ? pPipeDesc->RefPicSetLtCurr[i] : DXVA_HEVC_INVALID_PICTURE_ENTRY_VALUE;
501 }
502
503 // DXVA drivers expect these in POC order, VA/pipe sends them out of order.
504 d3d12_video_decoder_sort_rps_lists_by_refpoc(pD3D12Dec, &dxvaStructure, pPipeDesc);
505
506 for (uint32_t refIdx = 0; refIdx < DXVA_RPS_COUNT; refIdx++) {
507 if ((refIdx < pPipeDesc->NumPocStCurrBefore) && (pPipeDesc->RefPicSetStCurrBefore[refIdx] != DXVA_HEVC_INVALID_PICTURE_ENTRY_VALUE)) {
508 debug_printf("pPipeDesc->RefPicSetStCurrBefore[%d]: %d (index into RefPicList) Refs[%d] pipe buffer ptr = %p - d3d12 resource %p POC: %d\n",
509 refIdx, pPipeDesc->RefPicSetStCurrBefore[refIdx], pPipeDesc->RefPicSetStCurrBefore[refIdx], pPipeDesc->ref[pPipeDesc->RefPicSetStCurrBefore[refIdx]],
510 d3d12_resource_resource(((struct d3d12_video_buffer *)(pPipeDesc->ref[pPipeDesc->RefPicSetStCurrBefore[refIdx]]))->texture),
511 dxvaStructure.PicOrderCntValList[pPipeDesc->RefPicSetStCurrBefore[refIdx]]);
512 }
513 if ((refIdx < pPipeDesc->NumPocStCurrAfter) && (pPipeDesc->RefPicSetStCurrAfter[refIdx] != DXVA_HEVC_INVALID_PICTURE_ENTRY_VALUE)) {
514 debug_printf("pPipeDesc->RefPicSetStCurrAfter[%d]: %d (index into RefPicList) Refs[%d] pipe buffer ptr = %p - d3d12 resource %p POC: %d\n",
515 refIdx, pPipeDesc->RefPicSetStCurrAfter[refIdx], pPipeDesc->RefPicSetStCurrAfter[refIdx], pPipeDesc->ref[pPipeDesc->RefPicSetStCurrAfter[refIdx]],
516 d3d12_resource_resource(((struct d3d12_video_buffer *)(pPipeDesc->ref[pPipeDesc->RefPicSetStCurrAfter[refIdx]]))->texture),
517 dxvaStructure.PicOrderCntValList[pPipeDesc->RefPicSetStCurrAfter[refIdx]]);
518 }
519 if ((refIdx < pPipeDesc->NumPocLtCurr) && (pPipeDesc->RefPicSetLtCurr[refIdx] != DXVA_HEVC_INVALID_PICTURE_ENTRY_VALUE)) {
520 debug_printf("pPipeDesc->RefPicSetLtCurr[%d]: %d (index into RefPicList) Refs[%d] pipe buffer ptr = %p - d3d12 resource %p POC: %d\n",
521 refIdx, pPipeDesc->RefPicSetLtCurr[refIdx], pPipeDesc->RefPicSetLtCurr[refIdx], pPipeDesc->ref[pPipeDesc->RefPicSetLtCurr[refIdx]],
522 d3d12_resource_resource(((struct d3d12_video_buffer *)(pPipeDesc->ref[pPipeDesc->RefPicSetLtCurr[refIdx]]))->texture),
523 dxvaStructure.PicOrderCntValList[pPipeDesc->RefPicSetLtCurr[refIdx]]);
524 }
525 }
526
527 // DXVA spec: Arbitrary number set by the host decoder to use as a tag in the status report
528 // feedback data. The value should not equal 0, and should be different in each call to
529 // Execute. For more information, see section 12.0, Status Report Data Structure.
530 dxvaStructure.StatusReportFeedbackNumber = frameNum;
531 assert(dxvaStructure.StatusReportFeedbackNumber > 0);
532 return dxvaStructure;
533 }
534
535 void
d3d12_video_decoder_dxva_qmatrix_from_pipe_picparams_hevc(pipe_h265_picture_desc * pPipeDesc,DXVA_Qmatrix_HEVC & outMatrixBuffer,bool & outScalingListEnabled)536 d3d12_video_decoder_dxva_qmatrix_from_pipe_picparams_hevc(pipe_h265_picture_desc *pPipeDesc,
537 DXVA_Qmatrix_HEVC &outMatrixBuffer,
538 bool &outScalingListEnabled)
539 {
540 // VA is already converting hevc scaling lists to zigzag order
541 // https://gitlab.freedesktop.org/mesa/mesa/-/commit/63dcfed81f011dae5ca68af3369433be28135415
542
543 outScalingListEnabled = (pPipeDesc->pps->sps->scaling_list_enabled_flag != 0);
544 if (outScalingListEnabled) {
545 memcpy(outMatrixBuffer.ucScalingLists0, pPipeDesc->pps->sps->ScalingList4x4, 6 * 16);
546 memcpy(outMatrixBuffer.ucScalingLists1, pPipeDesc->pps->sps->ScalingList8x8, 6 * 64);
547 memcpy(outMatrixBuffer.ucScalingLists2, pPipeDesc->pps->sps->ScalingList16x16, 6 * 64);
548 memcpy(outMatrixBuffer.ucScalingLists3, pPipeDesc->pps->sps->ScalingList32x32, 2 * 64);
549 memcpy(outMatrixBuffer.ucScalingListDCCoefSizeID2, pPipeDesc->pps->sps->ScalingListDCCoeff16x16, 6);
550 memcpy(outMatrixBuffer.ucScalingListDCCoefSizeID3, pPipeDesc->pps->sps->ScalingListDCCoeff32x32, 2);
551 } else {
552 memset(&outMatrixBuffer, 0, sizeof(outMatrixBuffer));
553 }
554 }
555