1 /*
2 * Copyright © Microsoft Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "d3d12_context.h"
25 #include "d3d12_format.h"
26 #include "d3d12_resource.h"
27 #include "d3d12_screen.h"
28 #include "d3d12_surface.h"
29 #include "d3d12_video_dec.h"
30 #if VIDEO_CODEC_H264DEC
31 #include "d3d12_video_dec_h264.h"
32 #endif
33 #if VIDEO_CODEC_H265DEC
34 #include "d3d12_video_dec_hevc.h"
35 #endif
36 #if VIDEO_CODEC_AV1DEC
37 #include "d3d12_video_dec_av1.h"
38 #endif
39 #if VIDEO_CODEC_VP9DEC
40 #include "d3d12_video_dec_vp9.h"
41 #endif
42 #include "d3d12_video_buffer.h"
43 #include "d3d12_residency.h"
44
45 #include "vl/vl_video_buffer.h"
46 #include "util/format/u_format.h"
47 #include "util/u_inlines.h"
48 #include "util/u_memory.h"
49 #include "util/u_video.h"
50
51 uint64_t
d3d12_video_decoder_pool_current_index(struct d3d12_video_decoder * pD3D12Dec)52 d3d12_video_decoder_pool_current_index(struct d3d12_video_decoder *pD3D12Dec)
53 {
54 return pD3D12Dec->m_fenceValue % D3D12_VIDEO_DEC_ASYNC_DEPTH;
55 }
56
57 struct pipe_video_codec *
d3d12_video_create_decoder(struct pipe_context * context,const struct pipe_video_codec * codec)58 d3d12_video_create_decoder(struct pipe_context *context, const struct pipe_video_codec *codec)
59 {
60 ///
61 /// Initialize d3d12_video_decoder
62 ///
63
64
65 // Not using new doesn't call ctor and the initializations in the class declaration are lost
66 struct d3d12_video_decoder *pD3D12Dec = new d3d12_video_decoder;
67
68 pD3D12Dec->m_inflightResourcesPool.resize(D3D12_VIDEO_DEC_ASYNC_DEPTH, { 0 });
69
70 pD3D12Dec->base = *codec;
71 pD3D12Dec->m_screen = context->screen;
72
73 pD3D12Dec->base.context = context;
74 pD3D12Dec->base.width = codec->width;
75 pD3D12Dec->base.height = codec->height;
76 // Only fill methods that are supported by the d3d12 decoder, leaving null the rest (ie. encode_* / decode_macroblock
77 // / get_feedback for encode)
78 pD3D12Dec->base.destroy = d3d12_video_decoder_destroy;
79 pD3D12Dec->base.begin_frame = d3d12_video_decoder_begin_frame;
80 pD3D12Dec->base.decode_bitstream = d3d12_video_decoder_decode_bitstream;
81 pD3D12Dec->base.end_frame = d3d12_video_decoder_end_frame;
82 pD3D12Dec->base.flush = d3d12_video_decoder_flush;
83 pD3D12Dec->base.get_decoder_fence = d3d12_video_decoder_get_decoder_fence;
84
85 pD3D12Dec->m_decodeFormat = d3d12_convert_pipe_video_profile_to_dxgi_format(codec->profile);
86 pD3D12Dec->m_d3d12DecProfileType = d3d12_video_decoder_convert_pipe_video_profile_to_profile_type(codec->profile);
87 pD3D12Dec->m_d3d12DecProfile = d3d12_video_decoder_convert_pipe_video_profile_to_d3d12_profile(codec->profile);
88
89 ///
90 /// Try initializing D3D12 Video device and check for device caps
91 ///
92
93 struct d3d12_context *pD3D12Ctx = (struct d3d12_context *) context;
94 pD3D12Dec->m_pD3D12Screen = d3d12_screen(pD3D12Ctx->base.screen);
95
96 ///
97 /// Create decode objects
98 ///
99 HRESULT hr = S_OK;
100 if (FAILED(pD3D12Dec->m_pD3D12Screen->dev->QueryInterface(
101 IID_PPV_ARGS(pD3D12Dec->m_spD3D12VideoDevice.GetAddressOf())))) {
102 debug_printf("[d3d12_video_decoder] d3d12_video_create_decoder - D3D12 Device has no Video support\n");
103 goto failed;
104 }
105
106 if (!d3d12_video_decoder_check_caps_and_create_decoder(pD3D12Dec->m_pD3D12Screen, pD3D12Dec)) {
107 debug_printf("[d3d12_video_decoder] d3d12_video_create_decoder - Failure on "
108 "d3d12_video_decoder_check_caps_and_create_decoder\n");
109 goto failed;
110 }
111
112 if (!d3d12_video_decoder_create_command_objects(pD3D12Dec->m_pD3D12Screen, pD3D12Dec)) {
113 debug_printf(
114 "[d3d12_video_decoder] d3d12_video_create_decoder - Failure on d3d12_video_decoder_create_command_objects\n");
115 goto failed;
116 }
117
118 if (!d3d12_video_decoder_create_video_state_buffers(pD3D12Dec->m_pD3D12Screen, pD3D12Dec)) {
119 debug_printf("[d3d12_video_decoder] d3d12_video_create_decoder - Failure on "
120 "d3d12_video_decoder_create_video_state_buffers\n");
121 goto failed;
122 }
123
124 pD3D12Dec->m_decodeFormatInfo = { pD3D12Dec->m_decodeFormat };
125 hr = pD3D12Dec->m_pD3D12Screen->dev->CheckFeatureSupport(D3D12_FEATURE_FORMAT_INFO,
126 &pD3D12Dec->m_decodeFormatInfo,
127 sizeof(pD3D12Dec->m_decodeFormatInfo));
128 if (FAILED(hr)) {
129 debug_printf("CheckFeatureSupport failed with HR %x\n", hr);
130 goto failed;
131 }
132
133 return &pD3D12Dec->base;
134
135 failed:
136 if (pD3D12Dec != nullptr) {
137 d3d12_video_decoder_destroy((struct pipe_video_codec *) pD3D12Dec);
138 }
139
140 return nullptr;
141 }
142
143 /**
144 * Destroys a d3d12_video_decoder
145 * Call destroy_XX for applicable XX nested member types before deallocating
146 * Destroy methods should check != nullptr on their input target argument as this method can be called as part of
147 * cleanup from failure on the creation method
148 */
149 void
d3d12_video_decoder_destroy(struct pipe_video_codec * codec)150 d3d12_video_decoder_destroy(struct pipe_video_codec *codec)
151 {
152 if (codec == nullptr) {
153 return;
154 }
155
156 struct d3d12_video_decoder *pD3D12Dec = (struct d3d12_video_decoder *) codec;
157 // Flush and wait for completion of any in-flight GPU work before destroying objects
158 d3d12_video_decoder_flush(codec);
159 if (pD3D12Dec->m_fenceValue > 1 /* Check we submitted at least one frame */) {
160 auto decode_queue_completion_fence = pD3D12Dec->m_inflightResourcesPool[(pD3D12Dec->m_fenceValue - 1u) % D3D12_VIDEO_DEC_ASYNC_DEPTH].m_FenceData;
161 d3d12_video_decoder_sync_completion(codec, decode_queue_completion_fence.cmdqueue_fence, decode_queue_completion_fence.value, OS_TIMEOUT_INFINITE);
162 struct pipe_fence_handle *context_queue_completion_fence = NULL;
163 pD3D12Dec->base.context->flush(pD3D12Dec->base.context, &context_queue_completion_fence, PIPE_FLUSH_ASYNC | PIPE_FLUSH_HINT_FINISH);
164 pD3D12Dec->m_pD3D12Screen->base.fence_finish(&pD3D12Dec->m_pD3D12Screen->base, NULL, context_queue_completion_fence, OS_TIMEOUT_INFINITE);
165 pD3D12Dec->m_pD3D12Screen->base.fence_reference(&pD3D12Dec->m_pD3D12Screen->base, &context_queue_completion_fence, NULL);
166 }
167
168 //
169 // Destroys a decoder
170 // Call destroy_XX for applicable XX nested member types before deallocating
171 // Destroy methods should check != nullptr on their input target argument as this method can be called as part of
172 // cleanup from failure on the creation method
173 //
174
175 // No need for d3d12_destroy_video_objects
176 // All the objects created here are smart pointer members of d3d12_video_decoder
177 // No need for d3d12_destroy_video_decoder_and_heap
178 // All the objects created here are smart pointer members of d3d12_video_decoder
179 // No need for d3d12_destroy_video_dpbmanagers
180 // All the objects created here are smart pointer members of d3d12_video_decoder
181
182 // No need for m_pD3D12Screen as it is not managed by d3d12_video_decoder
183
184 // Call dtor to make ComPtr work
185 delete pD3D12Dec;
186 }
187
188 /**
189 * start decoding of a new frame
190 */
191 void
d3d12_video_decoder_begin_frame(struct pipe_video_codec * codec,struct pipe_video_buffer * target,struct pipe_picture_desc * picture)192 d3d12_video_decoder_begin_frame(struct pipe_video_codec *codec,
193 struct pipe_video_buffer *target,
194 struct pipe_picture_desc *picture)
195 {
196 // Do nothing here. Initialize happens on decoder creation, re-config (if any) happens in
197 // d3d12_video_decoder_decode_bitstream
198 struct d3d12_video_decoder *pD3D12Dec = (struct d3d12_video_decoder *) codec;
199 assert(pD3D12Dec);
200
201 ///
202 /// Wait here to make sure the next in flight resource set is empty before using it
203 ///
204 uint64_t fenceValueToWaitOn = static_cast<uint64_t>(
205 std::max(static_cast<int64_t>(0l),
206 static_cast<int64_t>(pD3D12Dec->m_fenceValue) - static_cast<int64_t>(D3D12_VIDEO_DEC_ASYNC_DEPTH)));
207
208 debug_printf("[d3d12_video_decoder] d3d12_video_decoder_begin_frame Waiting for completion of in flight resource "
209 "sets with previous work with fenceValue: %" PRIu64 "\n",
210 fenceValueToWaitOn);
211
212 ASSERTED bool wait_res =
213 d3d12_video_decoder_sync_completion(codec, pD3D12Dec->m_spFence.Get(), fenceValueToWaitOn, OS_TIMEOUT_INFINITE);
214 assert(wait_res);
215
216 HRESULT hr = pD3D12Dec->m_spDecodeCommandList->Reset(
217 pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)].m_spCommandAllocator.Get());
218 if (FAILED(hr)) {
219 debug_printf("[d3d12_video_decoder] resetting ID3D12GraphicsCommandList failed with HR %x\n", hr);
220 assert(false);
221 }
222
223 debug_printf("[d3d12_video_decoder] d3d12_video_decoder_begin_frame finalized for fenceValue: %d\n",
224 pD3D12Dec->m_fenceValue);
225 }
226
227 /**
228 * decode a bitstream
229 */
230 void
d3d12_video_decoder_decode_bitstream(struct pipe_video_codec * codec,struct pipe_video_buffer * target,struct pipe_picture_desc * picture,unsigned num_buffers,const void * const * buffers,const unsigned * sizes)231 d3d12_video_decoder_decode_bitstream(struct pipe_video_codec *codec,
232 struct pipe_video_buffer *target,
233 struct pipe_picture_desc *picture,
234 unsigned num_buffers,
235 const void *const *buffers,
236 const unsigned *sizes)
237 {
238 struct d3d12_video_decoder *pD3D12Dec = (struct d3d12_video_decoder *) codec;
239 assert(pD3D12Dec);
240 debug_printf("[d3d12_video_decoder] d3d12_video_decoder_decode_bitstream started for fenceValue: %d\n",
241 pD3D12Dec->m_fenceValue);
242 assert(pD3D12Dec->m_spD3D12VideoDevice);
243 assert(pD3D12Dec->m_spDecodeCommandQueue);
244 assert(pD3D12Dec->m_pD3D12Screen);
245 ASSERTED struct d3d12_video_buffer *pD3D12VideoBuffer = (struct d3d12_video_buffer *) target;
246 assert(pD3D12VideoBuffer);
247
248 ///
249 /// Compressed bitstream buffers
250 ///
251
252 /// Mesa VA frontend Video buffer passing semantics for H264, HEVC, MPEG4, VC1 and PIPE_VIDEO_PROFILE_VC1_ADVANCED
253 /// are: If num_buffers == 1 -> buf[0] has the compressed bitstream WITH the starting code If num_buffers == 2 ->
254 /// buf[0] has the NALU starting code and buf[1] has the compressed bitstream WITHOUT any starting code. If
255 /// num_buffers = 3 -> It's JPEG, not supported in D3D12. num_buffers is at most 3.
256 /// Mesa VDPAU frontend passes the buffers as they get passed in VdpDecoderRender without fixing any start codes
257 /// except for PIPE_VIDEO_PROFILE_VC1_ADVANCED
258 // In https://http.download.nvidia.com/XFree86/vdpau/doxygen/html/index.html#video_mixer_usage it's mentioned that:
259 // It is recommended that applications pass solely the slice data to VDPAU; specifically that any header data
260 // structures be excluded from the portion of the bitstream passed to VDPAU. VDPAU implementations must operate
261 // correctly if non-slice data is included, at least for formats employing start codes to delimit slice data. For all
262 // codecs/profiles it's highly recommended (when the codec/profile has such codes...) that the start codes are passed
263 // to VDPAU, even when not included in the bitstream the VDPAU client is parsing. Let's assume we get all the start
264 // codes for VDPAU. The doc also says "VDPAU implementations must operate correctly if non-slice data is included, at
265 // least for formats employing start codes to delimit slice data" if we ever get an issue with VDPAU start codes we
266 // should consider adding the code that handles this in the VDPAU layer above the gallium driver like mesa VA does.
267
268 // To handle the multi-slice case end_frame already takes care of this by parsing the start codes from the
269 // combined bitstream of all decode_bitstream calls.
270
271 // VAAPI seems to send one decode_bitstream command per slice, but we should also support the VDPAU case where the
272 // buffers have multiple buffer array entry per slice {startCode (optional), slice1, slice2, ..., startCode
273 // (optional) , sliceN}
274
275 if (num_buffers > 2) // Assume this means multiple slices at once in a decode_bitstream call
276 {
277 // Based on VA frontend codebase, this never happens for video (no JPEG)
278 // Based on VDPAU frontends codebase, this only happens when sending more than one slice at once in decode bitstream
279
280 // To handle the case where VDPAU send all the slices at once in a single decode_bitstream call, let's pretend it
281 // was a series of different calls
282
283 // group by start codes and buffers and perform calls for the number of slices
284 debug_printf("[d3d12_video_decoder] d3d12_video_decoder_decode_bitstream multiple slices on same call detected "
285 "for fenceValue: %d, breaking down the calls into one per slice\n",
286 pD3D12Dec->m_fenceValue);
287
288 size_t curBufferIdx = 0;
289
290 // Vars to be used for the delegation calls to decode_bitstream
291 unsigned call_num_buffers = 0;
292 const void *const *call_buffers = nullptr;
293 const unsigned *call_sizes = nullptr;
294
295 while (curBufferIdx < num_buffers) {
296 // Store the current buffer as the base array pointer for the delegated call, later decide if it'll be a
297 // startcode+slicedata or just slicedata call
298 call_buffers = &buffers[curBufferIdx];
299 call_sizes = &sizes[curBufferIdx];
300
301 // Usually start codes are less or equal than 4 bytes
302 // If the current buffer is a start code buffer, send it along with the next buffer. Otherwise, just send the
303 // current buffer.
304 call_num_buffers = (sizes[curBufferIdx] <= 4) ? 2 : 1;
305
306 // Delegate call with one or two buffers only
307 d3d12_video_decoder_decode_bitstream(codec, target, picture, call_num_buffers, call_buffers, call_sizes);
308
309 curBufferIdx += call_num_buffers; // Consume from the loop the buffers sent in the last call
310 }
311 } else {
312 ///
313 /// Handle single slice buffer path, maybe with an extra start code buffer at buffers[0].
314 ///
315
316 // Both the start codes being present at buffers[0] and the rest in buffers [1] or full buffer at [0] cases can be
317 // handled by flattening all the buffers into a single one and passing that to HW.
318
319 size_t totalReceivedBuffersSize = 0u; // Combined size of all sizes[]
320 for (size_t bufferIdx = 0; bufferIdx < num_buffers; bufferIdx++) {
321 totalReceivedBuffersSize += sizes[bufferIdx];
322 }
323
324 // Bytes of data pre-staged before this decode_frame call
325 auto &inFlightResources = pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)];
326 size_t preStagedDataSize = inFlightResources.m_stagingDecodeBitstream.size();
327
328 // Extend the staging buffer size, as decode_frame can be called several times before end_frame
329 inFlightResources.m_stagingDecodeBitstream.resize(preStagedDataSize + totalReceivedBuffersSize);
330
331 // Point newSliceDataPositionDstBase to the end of the pre-staged data in m_stagingDecodeBitstream, where the new
332 // buffers will be appended
333 uint8_t *newSliceDataPositionDstBase = inFlightResources.m_stagingDecodeBitstream.data() + preStagedDataSize;
334
335 // Append new data at the end.
336 size_t dstOffset = 0u;
337 for (size_t bufferIdx = 0; bufferIdx < num_buffers; bufferIdx++) {
338 memcpy(newSliceDataPositionDstBase + dstOffset, buffers[bufferIdx], sizes[bufferIdx]);
339 dstOffset += sizes[bufferIdx];
340 }
341
342 debug_printf("[d3d12_video_decoder] d3d12_video_decoder_decode_bitstream finalized for fenceValue: %d\n",
343 pD3D12Dec->m_fenceValue);
344 }
345
346 if (pD3D12Dec->m_d3d12DecProfileType == d3d12_video_decode_profile_type_h264) {
347 struct pipe_h264_picture_desc *h264 = (pipe_h264_picture_desc*) picture;
348 target->interlaced = !h264->pps->sps->frame_mbs_only_flag;
349 }
350 }
351
352 void
d3d12_video_decoder_store_upper_layer_references(struct d3d12_video_decoder * pD3D12Dec,struct pipe_video_buffer * target,struct pipe_picture_desc * picture)353 d3d12_video_decoder_store_upper_layer_references(struct d3d12_video_decoder *pD3D12Dec,
354 struct pipe_video_buffer *target,
355 struct pipe_picture_desc *picture)
356 {
357 #if D3D12_VIDEO_ANY_DECODER_ENABLED
358 pD3D12Dec->m_pCurrentDecodeTarget = target;
359 switch (pD3D12Dec->m_d3d12DecProfileType) {
360 #if VIDEO_CODEC_H264DEC
361 case d3d12_video_decode_profile_type_h264:
362 {
363 pipe_h264_picture_desc *pPicControlH264 = (pipe_h264_picture_desc *) picture;
364 pD3D12Dec->m_pCurrentReferenceTargets = pPicControlH264->ref;
365 } break;
366 #endif
367 #if VIDEO_CODEC_H265DEC
368 case d3d12_video_decode_profile_type_hevc:
369 {
370 pipe_h265_picture_desc *pPicControlHevc = (pipe_h265_picture_desc *) picture;
371 pD3D12Dec->m_pCurrentReferenceTargets = pPicControlHevc->ref;
372 } break;
373 #endif
374 #if VIDEO_CODEC_AV1DEC
375 case d3d12_video_decode_profile_type_av1:
376 {
377 pipe_av1_picture_desc *pPicControlAV1 = (pipe_av1_picture_desc *) picture;
378 pD3D12Dec->m_pCurrentReferenceTargets = pPicControlAV1->ref;
379 } break;
380 #endif
381 #if VIDEO_CODEC_VP9DEC
382 case d3d12_video_decode_profile_type_vp9:
383 {
384 pipe_vp9_picture_desc *pPicControlVP9 = (pipe_vp9_picture_desc *) picture;
385 pD3D12Dec->m_pCurrentReferenceTargets = pPicControlVP9->ref;
386 } break;
387 #endif
388 default:
389 {
390 unreachable("Unsupported d3d12_video_decode_profile_type");
391 } break;
392 }
393 #endif // D3D12_VIDEO_ANY_DECODER_ENABLED
394 }
395
396 /**
397 * end decoding of the current frame
398 */
399 int
d3d12_video_decoder_end_frame(struct pipe_video_codec * codec,struct pipe_video_buffer * target,struct pipe_picture_desc * picture)400 d3d12_video_decoder_end_frame(struct pipe_video_codec *codec,
401 struct pipe_video_buffer *target,
402 struct pipe_picture_desc *picture)
403 {
404 struct d3d12_video_decoder *pD3D12Dec = (struct d3d12_video_decoder *) codec;
405 assert(pD3D12Dec);
406 struct d3d12_screen *pD3D12Screen = (struct d3d12_screen *) pD3D12Dec->m_pD3D12Screen;
407 assert(pD3D12Screen);
408 debug_printf("[d3d12_video_decoder] d3d12_video_decoder_end_frame started for fenceValue: %d\n",
409 pD3D12Dec->m_fenceValue);
410 assert(pD3D12Dec->m_spD3D12VideoDevice);
411 assert(pD3D12Dec->m_spDecodeCommandQueue);
412 struct d3d12_video_buffer *pD3D12VideoBuffer = (struct d3d12_video_buffer *) target;
413 assert(pD3D12VideoBuffer);
414
415 ///
416 /// Store current decode output target texture and reference textures from upper layer
417 ///
418 d3d12_video_decoder_store_upper_layer_references(pD3D12Dec, target, picture);
419
420 ///
421 /// Codec header picture parameters buffers
422 ///
423
424 auto &inFlightResources = pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)];
425
426 d3d12_video_decoder_store_converted_dxva_picparams_from_pipe_input(pD3D12Dec, picture, pD3D12VideoBuffer);
427 assert(inFlightResources.m_picParamsBuffer.size() > 0);
428
429 ///
430 /// Prepare Slice control buffers before clearing staging buffer
431 ///
432 assert(inFlightResources.m_stagingDecodeBitstream.size() >
433 0); // Make sure the staging wasn't cleared yet in end_frame
434 d3d12_video_decoder_prepare_dxva_slices_control(pD3D12Dec, picture);
435 assert(inFlightResources.m_SliceControlBuffer.size() > 0);
436
437 ///
438 /// Upload m_stagingDecodeBitstream to GPU memory now that end_frame is called and clear staging buffer
439 ///
440
441 uint64_t sliceDataStagingBufferSize = inFlightResources.m_stagingDecodeBitstream.size();
442 uint8_t *sliceDataStagingBufferPtr = inFlightResources.m_stagingDecodeBitstream.data();
443
444 // Reallocate if necessary to accomodate the current frame bitstream buffer in GPU memory
445 if (inFlightResources.m_curFrameCompressedBitstreamBufferAllocatedSize < sliceDataStagingBufferSize) {
446 if (!d3d12_video_decoder_create_staging_bitstream_buffer(pD3D12Screen, pD3D12Dec, sliceDataStagingBufferSize)) {
447 debug_printf("[d3d12_video_decoder] d3d12_video_decoder_end_frame - Failure on "
448 "d3d12_video_decoder_create_staging_bitstream_buffer\n");
449 debug_printf("[d3d12_video_decoder] d3d12_video_decoder_end_frame failed for fenceValue: %d\n",
450 pD3D12Dec->m_fenceValue);
451 assert(false);
452 return 1;
453 }
454 }
455
456 // Upload frame bitstream CPU data to ID3D12Resource buffer
457 inFlightResources.m_curFrameCompressedBitstreamBufferPayloadSize =
458 sliceDataStagingBufferSize; // This can be less than m_curFrameCompressedBitstreamBufferAllocatedSize.
459 assert(inFlightResources.m_curFrameCompressedBitstreamBufferPayloadSize <=
460 inFlightResources.m_curFrameCompressedBitstreamBufferAllocatedSize);
461
462 /* One-shot transfer operation with data supplied in a user
463 * pointer.
464 */
465 inFlightResources.pPipeCompressedBufferObj =
466 d3d12_resource_from_resource(&pD3D12Screen->base, inFlightResources.m_curFrameCompressedBitstreamBuffer.Get());
467 assert(inFlightResources.pPipeCompressedBufferObj);
468 pD3D12Dec->base.context->buffer_subdata(pD3D12Dec->base.context, // context
469 inFlightResources.pPipeCompressedBufferObj, // dst buffer
470 PIPE_MAP_WRITE, // usage PIPE_MAP_x
471 0, // offset
472 sizeof(*sliceDataStagingBufferPtr) * sliceDataStagingBufferSize, // size
473 sliceDataStagingBufferPtr // data
474 );
475
476 // Flush buffer_subdata batch
477 // before deleting the source CPU buffer below
478
479 pD3D12Dec->base.context->flush(pD3D12Dec->base.context,
480 &inFlightResources.m_pBitstreamUploadGPUCompletionFence,
481 PIPE_FLUSH_ASYNC | PIPE_FLUSH_HINT_FINISH);
482 assert(inFlightResources.m_pBitstreamUploadGPUCompletionFence);
483 // To be waited on GPU fence before flushing current frame DecodeFrame to GPU
484
485 ///
486 /// Proceed to record the GPU Decode commands
487 ///
488
489 // Requested conversions by caller upper layer (none for now)
490 d3d12_video_decode_output_conversion_arguments requestedConversionArguments = {};
491
492 ///
493 /// Record DecodeFrame operation and resource state transitions.
494 ///
495
496 // Translate input D3D12 structure
497 D3D12_VIDEO_DECODE_INPUT_STREAM_ARGUMENTS d3d12InputArguments = {};
498
499 d3d12InputArguments.CompressedBitstream.pBuffer = inFlightResources.m_curFrameCompressedBitstreamBuffer.Get();
500 d3d12InputArguments.CompressedBitstream.Offset = 0u;
501 ASSERTED constexpr uint64_t d3d12BitstreamOffsetAlignment =
502 128u; // specified in
503 // https://docs.microsoft.com/en-us/windows/win32/api/d3d12video/ne-d3d12video-d3d12_video_decode_tier
504 assert((d3d12InputArguments.CompressedBitstream.Offset == 0) ||
505 ((d3d12InputArguments.CompressedBitstream.Offset % d3d12BitstreamOffsetAlignment) == 0));
506 d3d12InputArguments.CompressedBitstream.Size = inFlightResources.m_curFrameCompressedBitstreamBufferPayloadSize;
507
508 D3D12_RESOURCE_BARRIER resourceBarrierCommonToDecode[1] = {
509 CD3DX12_RESOURCE_BARRIER::Transition(d3d12InputArguments.CompressedBitstream.pBuffer,
510 D3D12_RESOURCE_STATE_COMMON,
511 D3D12_RESOURCE_STATE_VIDEO_DECODE_READ),
512 };
513 pD3D12Dec->m_spDecodeCommandList->ResourceBarrier(1u, resourceBarrierCommonToDecode);
514
515 // Schedule reverse (back to common) transitions before command list closes for current frame
516 pD3D12Dec->m_transitionsBeforeCloseCmdList.push_back(
517 CD3DX12_RESOURCE_BARRIER::Transition(d3d12InputArguments.CompressedBitstream.pBuffer,
518 D3D12_RESOURCE_STATE_VIDEO_DECODE_READ,
519 D3D12_RESOURCE_STATE_COMMON));
520
521 ///
522 /// Clear texture (no reference only flags in resource allocation) to use as decode output to send downstream for
523 /// display/consumption
524 ///
525 ID3D12Resource *pOutputD3D12Texture;
526 uint outputD3D12Subresource = 0;
527
528 ///
529 /// Ref Only texture (with reference only flags in resource allocation) to use as reconstructed picture decode output
530 /// and to store as future reference in DPB
531 ///
532 ID3D12Resource *pRefOnlyOutputD3D12Texture;
533 uint refOnlyOutputD3D12Subresource = 0;
534
535 if (!d3d12_video_decoder_prepare_for_decode_frame(pD3D12Dec,
536 target,
537 pD3D12VideoBuffer,
538 &pOutputD3D12Texture, // output
539 &outputD3D12Subresource, // output
540 &pRefOnlyOutputD3D12Texture, // output
541 &refOnlyOutputD3D12Subresource, // output
542 requestedConversionArguments)) {
543 debug_printf("[d3d12_video_decoder] d3d12_video_decoder_end_frame - Failure on "
544 "d3d12_video_decoder_prepare_for_decode_frame\n");
545 debug_printf("[d3d12_video_decoder] d3d12_video_decoder_end_frame failed for fenceValue: %d\n",
546 pD3D12Dec->m_fenceValue);
547 assert(false);
548 return 1;
549 }
550
551 ///
552 /// Set codec picture parameters CPU buffer
553 ///
554
555 d3d12InputArguments.NumFrameArguments =
556 1u; // Only the codec data received from the above layer with picture params
557 d3d12InputArguments.FrameArguments[d3d12InputArguments.NumFrameArguments - 1] = {
558 D3D12_VIDEO_DECODE_ARGUMENT_TYPE_PICTURE_PARAMETERS,
559 static_cast<uint32_t>(inFlightResources.m_picParamsBuffer.size()),
560 inFlightResources.m_picParamsBuffer.data(),
561 };
562
563 if (inFlightResources.m_SliceControlBuffer.size() > 0) {
564 d3d12InputArguments.NumFrameArguments++;
565 d3d12InputArguments.FrameArguments[d3d12InputArguments.NumFrameArguments - 1] = {
566 D3D12_VIDEO_DECODE_ARGUMENT_TYPE_SLICE_CONTROL,
567 static_cast<uint32_t>(inFlightResources.m_SliceControlBuffer.size()),
568 inFlightResources.m_SliceControlBuffer.data(),
569 };
570 }
571
572 if (inFlightResources.qp_matrix_frame_argument_enabled &&
573 (inFlightResources.m_InverseQuantMatrixBuffer.size() > 0)) {
574 d3d12InputArguments.NumFrameArguments++;
575 d3d12InputArguments.FrameArguments[d3d12InputArguments.NumFrameArguments - 1] = {
576 D3D12_VIDEO_DECODE_ARGUMENT_TYPE_INVERSE_QUANTIZATION_MATRIX,
577 static_cast<uint32_t>(inFlightResources.m_InverseQuantMatrixBuffer.size()),
578 inFlightResources.m_InverseQuantMatrixBuffer.data(),
579 };
580 }
581
582 d3d12InputArguments.ReferenceFrames = pD3D12Dec->m_spDPBManager->get_current_reference_frames();
583 if (D3D12_DEBUG_VERBOSE & d3d12_debug) {
584 pD3D12Dec->m_spDPBManager->print_dpb();
585 }
586
587 d3d12InputArguments.pHeap = pD3D12Dec->m_spVideoDecoderHeap.Get();
588
589 // translate output D3D12 structure
590 D3D12_VIDEO_DECODE_OUTPUT_STREAM_ARGUMENTS1 d3d12OutputArguments = {};
591 d3d12OutputArguments.pOutputTexture2D = pOutputD3D12Texture;
592 d3d12OutputArguments.OutputSubresource = outputD3D12Subresource;
593
594 bool fReferenceOnly = (pD3D12Dec->m_ConfigDecoderSpecificFlags &
595 d3d12_video_decode_config_specific_flag_reference_only_textures_required) != 0;
596 if (fReferenceOnly) {
597 d3d12OutputArguments.ConversionArguments.Enable = true;
598
599 assert(pRefOnlyOutputD3D12Texture);
600 d3d12OutputArguments.ConversionArguments.pReferenceTexture2D = pRefOnlyOutputD3D12Texture;
601 d3d12OutputArguments.ConversionArguments.ReferenceSubresource = refOnlyOutputD3D12Subresource;
602
603 const D3D12_RESOURCE_DESC &descReference = GetDesc(d3d12OutputArguments.ConversionArguments.pReferenceTexture2D);
604 d3d12OutputArguments.ConversionArguments.DecodeColorSpace = d3d12_convert_from_legacy_color_space(
605 !util_format_is_yuv(d3d12_get_pipe_format(descReference.Format)),
606 util_format_get_blocksize(d3d12_get_pipe_format(descReference.Format)) * 8 /*bytes to bits conversion*/,
607 /* StudioRGB= */ false,
608 /* P709= */ true,
609 /* StudioYUV= */ true);
610
611 const D3D12_RESOURCE_DESC &descOutput = GetDesc(d3d12OutputArguments.pOutputTexture2D);
612 d3d12OutputArguments.ConversionArguments.OutputColorSpace = d3d12_convert_from_legacy_color_space(
613 !util_format_is_yuv(d3d12_get_pipe_format(descOutput.Format)),
614 util_format_get_blocksize(d3d12_get_pipe_format(descOutput.Format)) * 8 /*bytes to bits conversion*/,
615 /* StudioRGB= */ false,
616 /* P709= */ true,
617 /* StudioYUV= */ true);
618
619 const D3D12_VIDEO_DECODER_HEAP_DESC &HeapDesc = GetDesc(pD3D12Dec->m_spVideoDecoderHeap.Get());
620 d3d12OutputArguments.ConversionArguments.OutputWidth = HeapDesc.DecodeWidth;
621 d3d12OutputArguments.ConversionArguments.OutputHeight = HeapDesc.DecodeHeight;
622 } else {
623 d3d12OutputArguments.ConversionArguments.Enable = false;
624 }
625
626 CD3DX12_RESOURCE_DESC outputDesc(GetDesc(d3d12OutputArguments.pOutputTexture2D));
627 uint32_t MipLevel, PlaneSlice, ArraySlice;
628 D3D12DecomposeSubresource(d3d12OutputArguments.OutputSubresource,
629 outputDesc.MipLevels,
630 outputDesc.ArraySize(),
631 MipLevel,
632 ArraySlice,
633 PlaneSlice);
634
635 for (PlaneSlice = 0; PlaneSlice < pD3D12Dec->m_decodeFormatInfo.PlaneCount; PlaneSlice++) {
636 uint planeOutputSubresource = outputDesc.CalcSubresource(MipLevel, ArraySlice, PlaneSlice);
637
638 D3D12_RESOURCE_BARRIER resourceBarrierCommonToDecode[1] = {
639 CD3DX12_RESOURCE_BARRIER::Transition(d3d12OutputArguments.pOutputTexture2D,
640 D3D12_RESOURCE_STATE_COMMON,
641 D3D12_RESOURCE_STATE_VIDEO_DECODE_WRITE,
642 planeOutputSubresource),
643 };
644 pD3D12Dec->m_spDecodeCommandList->ResourceBarrier(1u, resourceBarrierCommonToDecode);
645 }
646
647 // Schedule reverse (back to common) transitions before command list closes for current frame
648 for (PlaneSlice = 0; PlaneSlice < pD3D12Dec->m_decodeFormatInfo.PlaneCount; PlaneSlice++) {
649 uint planeOutputSubresource = outputDesc.CalcSubresource(MipLevel, ArraySlice, PlaneSlice);
650 pD3D12Dec->m_transitionsBeforeCloseCmdList.push_back(
651 CD3DX12_RESOURCE_BARRIER::Transition(d3d12OutputArguments.pOutputTexture2D,
652 D3D12_RESOURCE_STATE_VIDEO_DECODE_WRITE,
653 D3D12_RESOURCE_STATE_COMMON,
654 planeOutputSubresource));
655 }
656
657 // Record DecodeFrame
658
659 pD3D12Dec->m_spDecodeCommandList->DecodeFrame1(pD3D12Dec->m_spVideoDecoder.Get(),
660 &d3d12OutputArguments,
661 &d3d12InputArguments);
662
663 debug_printf("[d3d12_video_decoder] d3d12_video_decoder_end_frame finalized for fenceValue: %d\n",
664 pD3D12Dec->m_fenceValue);
665
666 // Save extra references of Decoder, DecoderHeap and DPB allocations in case
667 // there's a reconfiguration that trigers the construction of new objects
668 inFlightResources.m_spDecoder = pD3D12Dec->m_spVideoDecoder;
669 inFlightResources.m_spDecoderHeap = pD3D12Dec->m_spVideoDecoderHeap;
670 inFlightResources.m_References = pD3D12Dec->m_spDPBManager;
671
672 ///
673 /// Flush work to the GPU
674 ///
675 pD3D12Dec->m_needsGPUFlush = true;
676 d3d12_video_decoder_flush(codec);
677 // Call to d3d12_video_decoder_flush increases m_FenceValue
678 uint64_t inflightIndexBeforeFlush = (pD3D12Dec->m_fenceValue - 1u) % D3D12_VIDEO_DEC_ASYNC_DEPTH;
679
680 if (pD3D12Dec->m_spDPBManager->is_pipe_buffer_underlying_output_decode_allocation()) {
681 // No need to copy, the output surface fence is merely the decode queue fence
682 *picture->fence = (pipe_fence_handle *) &pD3D12Dec->m_inflightResourcesPool[inflightIndexBeforeFlush].m_FenceData;
683 } else {
684 ///
685 /// If !pD3D12Dec->m_spDPBManager->is_pipe_buffer_underlying_output_decode_allocation()
686 /// We cannot use the standalone video buffer allocation directly and we must use instead
687 /// either a ID3D12Resource with DECODE_REFERENCE only flag or a texture array within the same
688 /// allocation
689 /// Do GPU->GPU texture copy from decode output to pipe target decode texture sampler view planes
690 ///
691
692 // Get destination resource
693 struct pipe_sampler_view **pPipeDstViews = target->get_sampler_view_planes(target);
694
695 // Get source pipe_resource
696 pipe_resource *pPipeSrc =
697 d3d12_resource_from_resource(&pD3D12Screen->base, d3d12OutputArguments.pOutputTexture2D);
698 assert(pPipeSrc);
699
700 // GPU wait on the graphics context which will do the copy until the decode finishes
701 pD3D12Screen->cmdqueue->Wait(
702 pD3D12Dec->m_inflightResourcesPool[inflightIndexBeforeFlush].m_FenceData.cmdqueue_fence,
703 pD3D12Dec->m_inflightResourcesPool[inflightIndexBeforeFlush].m_FenceData.value);
704
705 // Copy all format subresources/texture planes
706 for (PlaneSlice = 0; PlaneSlice < pD3D12Dec->m_decodeFormatInfo.PlaneCount; PlaneSlice++) {
707 assert(d3d12OutputArguments.OutputSubresource < INT16_MAX);
708 struct pipe_box box;
709 u_box_3d(0,
710 0,
711 // src array slice, taken as Z for TEXTURE_2D_ARRAY
712 static_cast<int16_t>(d3d12OutputArguments.OutputSubresource),
713 static_cast<int>(pPipeDstViews[PlaneSlice]->texture->width0),
714 static_cast<int16_t>(pPipeDstViews[PlaneSlice]->texture->height0),
715 1, &box);
716
717 pD3D12Dec->base.context->resource_copy_region(pD3D12Dec->base.context,
718 pPipeDstViews[PlaneSlice]->texture, // dst
719 0, // dst level
720 0, // dstX
721 0, // dstY
722 0, // dstZ
723 (PlaneSlice == 0) ? pPipeSrc : pPipeSrc->next, // src
724 0, // src level
725 &box);
726 }
727 // Flush resource_copy_region batch
728 // The output surface fence is the graphics queue that will signal after the copy ends
729 pD3D12Dec->base.context->flush(pD3D12Dec->base.context, picture->fence, PIPE_FLUSH_ASYNC | PIPE_FLUSH_HINT_FINISH);
730 }
731 return 0;
732 }
733
734 /**
735 * Get decoder fence.
736 */
737 int
d3d12_video_decoder_get_decoder_fence(struct pipe_video_codec * codec,struct pipe_fence_handle * fence,uint64_t timeout)738 d3d12_video_decoder_get_decoder_fence(struct pipe_video_codec *codec, struct pipe_fence_handle *fence, uint64_t timeout)
739 {
740 struct d3d12_fence *fenceValueToWaitOn = (struct d3d12_fence *) fence;
741 assert(fenceValueToWaitOn);
742
743 ASSERTED bool wait_res =
744 d3d12_video_decoder_sync_completion(codec, fenceValueToWaitOn->cmdqueue_fence, fenceValueToWaitOn->value, timeout);
745
746 // Return semantics based on p_video_codec interface
747 // ret == 0 -> Decode in progress
748 // ret != 0 -> Decode completed
749 return wait_res ? 1 : 0;
750 }
751
752 /**
753 * flush any outstanding command buffers to the hardware
754 * should be called before a video_buffer is acessed by the gallium frontend again
755 */
756 void
d3d12_video_decoder_flush(struct pipe_video_codec * codec)757 d3d12_video_decoder_flush(struct pipe_video_codec *codec)
758 {
759 struct d3d12_video_decoder *pD3D12Dec = (struct d3d12_video_decoder *) codec;
760 assert(pD3D12Dec);
761 assert(pD3D12Dec->m_spD3D12VideoDevice);
762 assert(pD3D12Dec->m_spDecodeCommandQueue);
763 debug_printf("[d3d12_video_decoder] d3d12_video_decoder_flush started. Will flush video queue work and CPU wait on "
764 "fenceValue: %d\n",
765 pD3D12Dec->m_fenceValue);
766
767 if (!pD3D12Dec->m_needsGPUFlush) {
768 debug_printf("[d3d12_video_decoder] d3d12_video_decoder_flush started. Nothing to flush, all up to date.\n");
769 } else {
770 HRESULT hr = pD3D12Dec->m_pD3D12Screen->dev->GetDeviceRemovedReason();
771 if (hr != S_OK) {
772 debug_printf("[d3d12_video_decoder] d3d12_video_decoder_flush"
773 " - D3D12Device was removed BEFORE commandlist "
774 "execution with HR %x.\n",
775 hr);
776 goto flush_fail;
777 }
778
779 if (pD3D12Dec->m_transitionsBeforeCloseCmdList.size() > 0) {
780 pD3D12Dec->m_spDecodeCommandList->ResourceBarrier(pD3D12Dec->m_transitionsBeforeCloseCmdList.size(),
781 pD3D12Dec->m_transitionsBeforeCloseCmdList.data());
782 pD3D12Dec->m_transitionsBeforeCloseCmdList.clear();
783 }
784
785 hr = pD3D12Dec->m_spDecodeCommandList->Close();
786 if (FAILED(hr)) {
787 debug_printf("[d3d12_video_decoder] d3d12_video_decoder_flush - Can't close command list with HR %x\n", hr);
788 goto flush_fail;
789 }
790
791 auto &inFlightResources = pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)];
792 ID3D12CommandList *ppCommandLists[1] = { pD3D12Dec->m_spDecodeCommandList.Get() };
793 struct d3d12_fence *pUploadBitstreamFence = d3d12_fence(inFlightResources.m_pBitstreamUploadGPUCompletionFence);
794 pD3D12Dec->m_spDecodeCommandQueue->Wait(pUploadBitstreamFence->cmdqueue_fence, pUploadBitstreamFence->value);
795 pD3D12Dec->m_spDecodeCommandQueue->ExecuteCommandLists(1, ppCommandLists);
796 pD3D12Dec->m_spDecodeCommandQueue->Signal(pD3D12Dec->m_spFence.Get(), pD3D12Dec->m_fenceValue);
797
798 // Validate device was not removed
799 hr = pD3D12Dec->m_pD3D12Screen->dev->GetDeviceRemovedReason();
800 if (hr != S_OK) {
801 debug_printf("[d3d12_video_decoder] d3d12_video_decoder_flush"
802 " - D3D12Device was removed AFTER commandlist "
803 "execution with HR %x, but wasn't before.\n",
804 hr);
805 goto flush_fail;
806 }
807
808 // Set async fence info
809 memset(&inFlightResources.m_FenceData, 0, sizeof(inFlightResources.m_FenceData));
810
811 inFlightResources.m_FenceData.value = pD3D12Dec->m_fenceValue;
812 inFlightResources.m_FenceData.cmdqueue_fence = pD3D12Dec->m_spFence.Get();
813
814 pD3D12Dec->m_fenceValue++;
815 pD3D12Dec->m_needsGPUFlush = false;
816 }
817 return;
818
819 flush_fail:
820 debug_printf("[d3d12_video_decoder] d3d12_video_decoder_flush failed for fenceValue: %d\n", pD3D12Dec->m_fenceValue);
821 assert(false);
822 }
823
824 bool
d3d12_video_decoder_create_command_objects(const struct d3d12_screen * pD3D12Screen,struct d3d12_video_decoder * pD3D12Dec)825 d3d12_video_decoder_create_command_objects(const struct d3d12_screen *pD3D12Screen,
826 struct d3d12_video_decoder *pD3D12Dec)
827 {
828 assert(pD3D12Dec->m_spD3D12VideoDevice);
829
830 D3D12_COMMAND_QUEUE_DESC commandQueueDesc = { D3D12_COMMAND_LIST_TYPE_VIDEO_DECODE };
831 HRESULT hr = pD3D12Screen->dev->CreateCommandQueue(&commandQueueDesc,
832 IID_PPV_ARGS(pD3D12Dec->m_spDecodeCommandQueue.GetAddressOf()));
833 if (FAILED(hr)) {
834 debug_printf("[d3d12_video_decoder] d3d12_video_decoder_create_command_objects - Call to CreateCommandQueue "
835 "failed with HR %x\n",
836 hr);
837 return false;
838 }
839
840 hr = pD3D12Screen->dev->CreateFence(0, D3D12_FENCE_FLAG_SHARED, IID_PPV_ARGS(&pD3D12Dec->m_spFence));
841 if (FAILED(hr)) {
842 debug_printf(
843 "[d3d12_video_decoder] d3d12_video_decoder_create_command_objects - Call to CreateFence failed with HR %x\n",
844 hr);
845 return false;
846 }
847
848 for (auto &inputResource : pD3D12Dec->m_inflightResourcesPool) {
849 hr = pD3D12Dec->m_pD3D12Screen->dev->CreateCommandAllocator(
850 D3D12_COMMAND_LIST_TYPE_VIDEO_DECODE,
851 IID_PPV_ARGS(inputResource.m_spCommandAllocator.GetAddressOf()));
852 if (FAILED(hr)) {
853 debug_printf("[d3d12_video_decoder] d3d12_video_decoder_create_command_objects - Call to "
854 "CreateCommandAllocator failed with HR %x\n",
855 hr);
856 return false;
857 }
858 }
859
860 ComPtr<ID3D12Device4> spD3D12Device4;
861 if (FAILED(pD3D12Dec->m_pD3D12Screen->dev->QueryInterface(IID_PPV_ARGS(spD3D12Device4.GetAddressOf())))) {
862 debug_printf(
863 "[d3d12_video_decoder] d3d12_video_decoder_create_decoder - D3D12 Device has no ID3D12Device4 support\n");
864 return false;
865 }
866
867 hr = spD3D12Device4->CreateCommandList1(0,
868 D3D12_COMMAND_LIST_TYPE_VIDEO_DECODE,
869 D3D12_COMMAND_LIST_FLAG_NONE,
870 IID_PPV_ARGS(pD3D12Dec->m_spDecodeCommandList.GetAddressOf()));
871
872 if (FAILED(hr)) {
873 debug_printf("[d3d12_video_decoder] d3d12_video_decoder_create_command_objects - Call to CreateCommandList "
874 "failed with HR %x\n",
875 hr);
876 return false;
877 }
878
879 return true;
880 }
881
882 bool
d3d12_video_decoder_check_caps_and_create_decoder(const struct d3d12_screen * pD3D12Screen,struct d3d12_video_decoder * pD3D12Dec)883 d3d12_video_decoder_check_caps_and_create_decoder(const struct d3d12_screen *pD3D12Screen,
884 struct d3d12_video_decoder *pD3D12Dec)
885 {
886 assert(pD3D12Dec->m_spD3D12VideoDevice);
887
888 pD3D12Dec->m_decoderDesc = {};
889
890 D3D12_VIDEO_DECODE_CONFIGURATION decodeConfiguration = { pD3D12Dec->m_d3d12DecProfile,
891 D3D12_BITSTREAM_ENCRYPTION_TYPE_NONE,
892 D3D12_VIDEO_FRAME_CODED_INTERLACE_TYPE_NONE };
893
894 D3D12_FEATURE_DATA_VIDEO_DECODE_SUPPORT decodeSupport = {};
895 decodeSupport.NodeIndex = pD3D12Dec->m_NodeIndex;
896 decodeSupport.Configuration = decodeConfiguration;
897 decodeSupport.Width = pD3D12Dec->base.width;
898 decodeSupport.Height = pD3D12Dec->base.height;
899 decodeSupport.DecodeFormat = pD3D12Dec->m_decodeFormat;
900 // no info from above layer on framerate/bitrate
901 decodeSupport.FrameRate.Numerator = 0;
902 decodeSupport.FrameRate.Denominator = 0;
903 decodeSupport.BitRate = 0;
904
905 HRESULT hr = pD3D12Dec->m_spD3D12VideoDevice->CheckFeatureSupport(D3D12_FEATURE_VIDEO_DECODE_SUPPORT,
906 &decodeSupport,
907 sizeof(decodeSupport));
908 if (FAILED(hr)) {
909 debug_printf("[d3d12_video_decoder] d3d12_video_decoder_check_caps_and_create_decoder - CheckFeatureSupport "
910 "failed with HR %x\n",
911 hr);
912 return false;
913 }
914
915 if (!(decodeSupport.SupportFlags & D3D12_VIDEO_DECODE_SUPPORT_FLAG_SUPPORTED)) {
916 debug_printf("[d3d12_video_decoder] d3d12_video_decoder_check_caps_and_create_decoder - "
917 "D3D12_VIDEO_DECODE_SUPPORT_FLAG_SUPPORTED was false when checking caps \n");
918 return false;
919 }
920
921 pD3D12Dec->m_configurationFlags = decodeSupport.ConfigurationFlags;
922 pD3D12Dec->m_tier = decodeSupport.DecodeTier;
923
924 if (d3d12_video_decoder_supports_aot_dpb(decodeSupport, pD3D12Dec->m_d3d12DecProfileType)) {
925 pD3D12Dec->m_ConfigDecoderSpecificFlags |= d3d12_video_decode_config_specific_flag_array_of_textures;
926 }
927
928 if (decodeSupport.ConfigurationFlags & D3D12_VIDEO_DECODE_CONFIGURATION_FLAG_HEIGHT_ALIGNMENT_MULTIPLE_32_REQUIRED) {
929 pD3D12Dec->m_ConfigDecoderSpecificFlags |= d3d12_video_decode_config_specific_flag_alignment_height;
930 }
931
932 if (decodeSupport.ConfigurationFlags & D3D12_VIDEO_DECODE_CONFIGURATION_FLAG_REFERENCE_ONLY_ALLOCATIONS_REQUIRED) {
933 pD3D12Dec->m_ConfigDecoderSpecificFlags |=
934 d3d12_video_decode_config_specific_flag_reference_only_textures_required;
935 }
936
937 pD3D12Dec->m_decoderDesc.NodeMask = pD3D12Dec->m_NodeMask;
938 pD3D12Dec->m_decoderDesc.Configuration = decodeConfiguration;
939
940 hr = pD3D12Dec->m_spD3D12VideoDevice->CreateVideoDecoder(&pD3D12Dec->m_decoderDesc,
941 IID_PPV_ARGS(pD3D12Dec->m_spVideoDecoder.GetAddressOf()));
942 if (FAILED(hr)) {
943 debug_printf("[d3d12_video_decoder] d3d12_video_decoder_check_caps_and_create_decoder - CreateVideoDecoder "
944 "failed with HR %x\n",
945 hr);
946 return false;
947 }
948
949 return true;
950 }
951
952 bool
d3d12_video_decoder_create_video_state_buffers(const struct d3d12_screen * pD3D12Screen,struct d3d12_video_decoder * pD3D12Dec)953 d3d12_video_decoder_create_video_state_buffers(const struct d3d12_screen *pD3D12Screen,
954 struct d3d12_video_decoder *pD3D12Dec)
955 {
956 assert(pD3D12Dec->m_spD3D12VideoDevice);
957 if (!d3d12_video_decoder_create_staging_bitstream_buffer(pD3D12Screen,
958 pD3D12Dec,
959 pD3D12Dec->m_InitialCompBitstreamGPUBufferSize)) {
960 debug_printf("[d3d12_video_decoder] d3d12_video_decoder_create_video_state_buffers - Failure on "
961 "d3d12_video_decoder_create_staging_bitstream_buffer\n");
962 return false;
963 }
964
965 return true;
966 }
967
968 bool
d3d12_video_decoder_create_staging_bitstream_buffer(const struct d3d12_screen * pD3D12Screen,struct d3d12_video_decoder * pD3D12Dec,uint64_t bufSize)969 d3d12_video_decoder_create_staging_bitstream_buffer(const struct d3d12_screen *pD3D12Screen,
970 struct d3d12_video_decoder *pD3D12Dec,
971 uint64_t bufSize)
972 {
973 assert(pD3D12Dec->m_spD3D12VideoDevice);
974 auto &inFlightResources = pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)];
975 if (inFlightResources.m_curFrameCompressedBitstreamBuffer.Get() != nullptr) {
976 inFlightResources.m_curFrameCompressedBitstreamBuffer.Reset();
977 }
978
979 auto descHeap = CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT, pD3D12Dec->m_NodeMask, pD3D12Dec->m_NodeMask);
980 auto descResource = CD3DX12_RESOURCE_DESC::Buffer(bufSize);
981 HRESULT hr = pD3D12Screen->dev->CreateCommittedResource(
982 &descHeap,
983 D3D12_HEAP_FLAG_NONE,
984 &descResource,
985 D3D12_RESOURCE_STATE_COMMON,
986 nullptr,
987 IID_PPV_ARGS(inFlightResources.m_curFrameCompressedBitstreamBuffer.GetAddressOf()));
988 if (FAILED(hr)) {
989 debug_printf("[d3d12_video_decoder] d3d12_video_decoder_create_staging_bitstream_buffer - "
990 "CreateCommittedResource failed with HR %x\n",
991 hr);
992 return false;
993 }
994
995 inFlightResources.m_curFrameCompressedBitstreamBufferAllocatedSize = bufSize;
996 return true;
997 }
998
999 bool
d3d12_video_decoder_prepare_for_decode_frame(struct d3d12_video_decoder * pD3D12Dec,struct pipe_video_buffer * pCurrentDecodeTarget,struct d3d12_video_buffer * pD3D12VideoBuffer,ID3D12Resource ** ppOutTexture2D,uint32_t * pOutSubresourceIndex,ID3D12Resource ** ppRefOnlyOutTexture2D,uint32_t * pRefOnlyOutSubresourceIndex,const d3d12_video_decode_output_conversion_arguments & conversionArgs)1000 d3d12_video_decoder_prepare_for_decode_frame(struct d3d12_video_decoder *pD3D12Dec,
1001 struct pipe_video_buffer *pCurrentDecodeTarget,
1002 struct d3d12_video_buffer *pD3D12VideoBuffer,
1003 ID3D12Resource **ppOutTexture2D,
1004 uint32_t *pOutSubresourceIndex,
1005 ID3D12Resource **ppRefOnlyOutTexture2D,
1006 uint32_t *pRefOnlyOutSubresourceIndex,
1007 const d3d12_video_decode_output_conversion_arguments &conversionArgs)
1008 {
1009 if (!d3d12_video_decoder_reconfigure_dpb(pD3D12Dec, pD3D12VideoBuffer, conversionArgs)) {
1010 debug_printf("d3d12_video_decoder_reconfigure_dpb failed!\n");
1011 return false;
1012 }
1013
1014 // Refresh DPB active references for current frame, release memory for unused references.
1015 d3d12_video_decoder_refresh_dpb_active_references(pD3D12Dec);
1016
1017 // Get the output texture for the current frame to be decoded
1018 pD3D12Dec->m_spDPBManager->get_current_frame_decode_output_texture(pCurrentDecodeTarget,
1019 ppOutTexture2D,
1020 pOutSubresourceIndex);
1021
1022 auto vidBuffer = (struct d3d12_video_buffer *) (pCurrentDecodeTarget);
1023 // If is_pipe_buffer_underlying_output_decode_allocation is enabled,
1024 // we can just use the underlying allocation in pCurrentDecodeTarget
1025 // and avoid an extra copy after decoding the frame.
1026 // If this is the case, we need to handle the residency of this resource
1027 // (if not we're actually creating the resources with CreateCommitedResource with
1028 // residency by default)
1029 if (pD3D12Dec->m_spDPBManager->is_pipe_buffer_underlying_output_decode_allocation()) {
1030 assert(d3d12_resource_resource(vidBuffer->texture) == *ppOutTexture2D);
1031 // Make it permanently resident for video use
1032 d3d12_promote_to_permanent_residency(pD3D12Dec->m_pD3D12Screen, vidBuffer->texture);
1033 }
1034
1035 // Get the reference only texture for the current frame to be decoded (if applicable)
1036 bool fReferenceOnly = (pD3D12Dec->m_ConfigDecoderSpecificFlags &
1037 d3d12_video_decode_config_specific_flag_reference_only_textures_required) != 0;
1038 if (fReferenceOnly) {
1039 bool needsTransitionToDecodeWrite = false;
1040 pD3D12Dec->m_spDPBManager->get_reference_only_output(pCurrentDecodeTarget,
1041 ppRefOnlyOutTexture2D,
1042 pRefOnlyOutSubresourceIndex,
1043 needsTransitionToDecodeWrite);
1044 assert(needsTransitionToDecodeWrite);
1045
1046 CD3DX12_RESOURCE_DESC outputDesc(GetDesc(*ppRefOnlyOutTexture2D));
1047 uint32_t MipLevel, PlaneSlice, ArraySlice;
1048 D3D12DecomposeSubresource(*pRefOnlyOutSubresourceIndex,
1049 outputDesc.MipLevels,
1050 outputDesc.ArraySize(),
1051 MipLevel,
1052 ArraySlice,
1053 PlaneSlice);
1054
1055 for (PlaneSlice = 0; PlaneSlice < pD3D12Dec->m_decodeFormatInfo.PlaneCount; PlaneSlice++) {
1056 uint planeOutputSubresource = outputDesc.CalcSubresource(MipLevel, ArraySlice, PlaneSlice);
1057
1058 D3D12_RESOURCE_BARRIER resourceBarrierCommonToDecode[1] = {
1059 CD3DX12_RESOURCE_BARRIER::Transition(*ppRefOnlyOutTexture2D,
1060 D3D12_RESOURCE_STATE_COMMON,
1061 D3D12_RESOURCE_STATE_VIDEO_DECODE_WRITE,
1062 planeOutputSubresource),
1063 };
1064 pD3D12Dec->m_spDecodeCommandList->ResourceBarrier(1u, resourceBarrierCommonToDecode);
1065 }
1066
1067 // Schedule reverse (back to common) transitions before command list closes for current frame
1068 for (PlaneSlice = 0; PlaneSlice < pD3D12Dec->m_decodeFormatInfo.PlaneCount; PlaneSlice++) {
1069 uint planeOutputSubresource = outputDesc.CalcSubresource(MipLevel, ArraySlice, PlaneSlice);
1070 pD3D12Dec->m_transitionsBeforeCloseCmdList.push_back(
1071 CD3DX12_RESOURCE_BARRIER::Transition(*ppRefOnlyOutTexture2D,
1072 D3D12_RESOURCE_STATE_VIDEO_DECODE_WRITE,
1073 D3D12_RESOURCE_STATE_COMMON,
1074 planeOutputSubresource));
1075 }
1076 }
1077
1078 // If decoded needs reference_only entries in the dpb, use the reference_only allocation for current frame
1079 // otherwise, use the standard output resource
1080 [[maybe_unused]] ID3D12Resource *pCurrentFrameDPBEntry = fReferenceOnly ? *ppRefOnlyOutTexture2D : *ppOutTexture2D;
1081 [[maybe_unused]] uint32_t currentFrameDPBEntrySubresource = fReferenceOnly ? *pRefOnlyOutSubresourceIndex : *pOutSubresourceIndex;
1082 #if D3D12_VIDEO_ANY_DECODER_ENABLED
1083 switch (pD3D12Dec->m_d3d12DecProfileType) {
1084 #if VIDEO_CODEC_H264DEC
1085 case d3d12_video_decode_profile_type_h264:
1086 {
1087 d3d12_video_decoder_prepare_current_frame_references_h264(pD3D12Dec,
1088 pCurrentFrameDPBEntry,
1089 currentFrameDPBEntrySubresource);
1090 } break;
1091 #endif
1092 #if VIDEO_CODEC_H265DEC
1093 case d3d12_video_decode_profile_type_hevc:
1094 {
1095 d3d12_video_decoder_prepare_current_frame_references_hevc(pD3D12Dec,
1096 pCurrentFrameDPBEntry,
1097 currentFrameDPBEntrySubresource);
1098 } break;
1099 #endif
1100 #if VIDEO_CODEC_AV1DEC
1101 case d3d12_video_decode_profile_type_av1:
1102 {
1103 d3d12_video_decoder_prepare_current_frame_references_av1(pD3D12Dec,
1104 pCurrentFrameDPBEntry,
1105 currentFrameDPBEntrySubresource);
1106 } break;
1107 #endif
1108 #if VIDEO_CODEC_VP9DEC
1109 case d3d12_video_decode_profile_type_vp9:
1110 {
1111 d3d12_video_decoder_prepare_current_frame_references_vp9(pD3D12Dec,
1112 pCurrentFrameDPBEntry,
1113 currentFrameDPBEntrySubresource);
1114 } break;
1115 #endif
1116 default:
1117 {
1118 unreachable("Unsupported d3d12_video_decode_profile_type");
1119 } break;
1120 }
1121 #endif // D3D12_VIDEO_ANY_DECODER_ENABLED
1122 return true;
1123 }
1124
1125 bool
d3d12_video_decoder_reconfigure_dpb(struct d3d12_video_decoder * pD3D12Dec,struct d3d12_video_buffer * pD3D12VideoBuffer,const d3d12_video_decode_output_conversion_arguments & conversionArguments)1126 d3d12_video_decoder_reconfigure_dpb(struct d3d12_video_decoder *pD3D12Dec,
1127 struct d3d12_video_buffer *pD3D12VideoBuffer,
1128 const d3d12_video_decode_output_conversion_arguments &conversionArguments)
1129 {
1130 uint32_t width;
1131 uint32_t height;
1132 uint16_t maxDPB;
1133 d3d12_video_decoder_get_frame_info(pD3D12Dec, &width, &height, &maxDPB);
1134
1135 ID3D12Resource *pPipeD3D12DstResource = d3d12_resource_resource(pD3D12VideoBuffer->texture);
1136 D3D12_RESOURCE_DESC outputResourceDesc = GetDesc(pPipeD3D12DstResource);
1137
1138 D3D12_VIDEO_FRAME_CODED_INTERLACE_TYPE interlaceTypeRequested =
1139 pD3D12VideoBuffer->base.interlaced ? D3D12_VIDEO_FRAME_CODED_INTERLACE_TYPE_FIELD_BASED : D3D12_VIDEO_FRAME_CODED_INTERLACE_TYPE_NONE;
1140 if ((pD3D12Dec->m_decodeFormat != outputResourceDesc.Format) ||
1141 (pD3D12Dec->m_decoderDesc.Configuration.InterlaceType != interlaceTypeRequested)) {
1142 // Copy current pD3D12Dec->m_decoderDesc, modify decodeprofile and re-create decoder.
1143 D3D12_VIDEO_DECODER_DESC decoderDesc = pD3D12Dec->m_decoderDesc;
1144 decoderDesc.Configuration.InterlaceType = interlaceTypeRequested;
1145 decoderDesc.Configuration.DecodeProfile =
1146 d3d12_video_decoder_resolve_profile(pD3D12Dec->m_d3d12DecProfileType, pD3D12Dec->m_decodeFormat);
1147 pD3D12Dec->m_spVideoDecoder.Reset();
1148 HRESULT hr =
1149 pD3D12Dec->m_spD3D12VideoDevice->CreateVideoDecoder(&decoderDesc,
1150 IID_PPV_ARGS(pD3D12Dec->m_spVideoDecoder.GetAddressOf()));
1151 if (FAILED(hr)) {
1152 debug_printf(
1153 "[d3d12_video_decoder] d3d12_video_decoder_reconfigure_dpb - CreateVideoDecoder failed with HR %x\n",
1154 hr);
1155 return false;
1156 }
1157 // Update state after CreateVideoDecoder succeeds only.
1158 pD3D12Dec->m_decoderDesc = decoderDesc;
1159 }
1160
1161 if (!pD3D12Dec->m_spDPBManager || !pD3D12Dec->m_spVideoDecoderHeap ||
1162 pD3D12Dec->m_decodeFormat != outputResourceDesc.Format || pD3D12Dec->m_decoderHeapDesc.DecodeWidth != width ||
1163 pD3D12Dec->m_decoderHeapDesc.DecodeHeight != height ||
1164 pD3D12Dec->m_decoderHeapDesc.MaxDecodePictureBufferCount < maxDPB) {
1165 // Detect the combination of AOT/ReferenceOnly to configure the DPB manager
1166 uint16_t referenceCount = (conversionArguments.Enable) ? (uint16_t) conversionArguments.ReferenceFrameCount +
1167 1 /*extra slot for current picture*/ :
1168 maxDPB;
1169 d3d12_video_decode_dpb_descriptor dpbDesc = {};
1170 dpbDesc.Width = (conversionArguments.Enable) ? conversionArguments.ReferenceInfo.Width : width;
1171 dpbDesc.Height = (conversionArguments.Enable) ? conversionArguments.ReferenceInfo.Height : height;
1172 dpbDesc.Format =
1173 (conversionArguments.Enable) ? conversionArguments.ReferenceInfo.Format.Format : outputResourceDesc.Format;
1174 dpbDesc.fArrayOfTexture =
1175 ((pD3D12Dec->m_ConfigDecoderSpecificFlags & d3d12_video_decode_config_specific_flag_array_of_textures) != 0);
1176 dpbDesc.dpbSize = referenceCount;
1177 dpbDesc.m_NodeMask = pD3D12Dec->m_NodeMask;
1178 dpbDesc.fReferenceOnly = ((pD3D12Dec->m_ConfigDecoderSpecificFlags &
1179 d3d12_video_decode_config_specific_flag_reference_only_textures_required) != 0);
1180
1181 // Create DPB manager
1182 if (pD3D12Dec->m_spDPBManager == nullptr) {
1183 pD3D12Dec->m_spDPBManager.reset(new d3d12_video_decoder_references_manager(pD3D12Dec->m_pD3D12Screen,
1184 pD3D12Dec->m_NodeMask,
1185 pD3D12Dec->m_d3d12DecProfileType,
1186 dpbDesc));
1187 }
1188
1189 //
1190 // (Re)-create decoder heap
1191 //
1192 D3D12_VIDEO_DECODER_HEAP_DESC decoderHeapDesc = {};
1193 decoderHeapDesc.NodeMask = pD3D12Dec->m_NodeMask;
1194 decoderHeapDesc.Configuration = pD3D12Dec->m_decoderDesc.Configuration;
1195 decoderHeapDesc.DecodeWidth = dpbDesc.Width;
1196 decoderHeapDesc.DecodeHeight = dpbDesc.Height;
1197 decoderHeapDesc.Format = dpbDesc.Format;
1198 decoderHeapDesc.MaxDecodePictureBufferCount = maxDPB;
1199 pD3D12Dec->m_spVideoDecoderHeap.Reset();
1200 HRESULT hr = pD3D12Dec->m_spD3D12VideoDevice->CreateVideoDecoderHeap(
1201 &decoderHeapDesc,
1202 IID_PPV_ARGS(pD3D12Dec->m_spVideoDecoderHeap.GetAddressOf()));
1203 if (FAILED(hr)) {
1204 debug_printf(
1205 "[d3d12_video_decoder] d3d12_video_decoder_reconfigure_dpb - CreateVideoDecoderHeap failed with HR %x\n",
1206 hr);
1207 return false;
1208 }
1209 // Update pD3D12Dec after CreateVideoDecoderHeap succeeds only.
1210 pD3D12Dec->m_decoderHeapDesc = decoderHeapDesc;
1211 }
1212
1213 pD3D12Dec->m_decodeFormat = outputResourceDesc.Format;
1214
1215 return true;
1216 }
1217
1218 void
d3d12_video_decoder_refresh_dpb_active_references(struct d3d12_video_decoder * pD3D12Dec)1219 d3d12_video_decoder_refresh_dpb_active_references(struct d3d12_video_decoder *pD3D12Dec)
1220 {
1221 #if D3D12_VIDEO_ANY_DECODER_ENABLED
1222 switch (pD3D12Dec->m_d3d12DecProfileType) {
1223 #if VIDEO_CODEC_H264DEC
1224 case d3d12_video_decode_profile_type_h264:
1225 {
1226 d3d12_video_decoder_refresh_dpb_active_references_h264(pD3D12Dec);
1227 } break;
1228 #endif
1229 #if VIDEO_CODEC_H265DEC
1230 case d3d12_video_decode_profile_type_hevc:
1231 {
1232 d3d12_video_decoder_refresh_dpb_active_references_hevc(pD3D12Dec);
1233 } break;
1234 #endif
1235 #if VIDEO_CODEC_AV1DEC
1236 case d3d12_video_decode_profile_type_av1:
1237 {
1238 d3d12_video_decoder_refresh_dpb_active_references_av1(pD3D12Dec);
1239 } break;
1240 #endif
1241 #if VIDEO_CODEC_VP9DEC
1242 case d3d12_video_decode_profile_type_vp9:
1243 {
1244 d3d12_video_decoder_refresh_dpb_active_references_vp9(pD3D12Dec);
1245 } break;
1246 #endif
1247 default:
1248 {
1249 unreachable("Unsupported d3d12_video_decode_profile_type");
1250 } break;
1251 }
1252 #endif // D3D12_VIDEO_ANY_DECODER_ENABLED
1253 }
1254
1255 void
d3d12_video_decoder_get_frame_info(struct d3d12_video_decoder * pD3D12Dec,uint32_t * pWidth,uint32_t * pHeight,uint16_t * pMaxDPB)1256 d3d12_video_decoder_get_frame_info(
1257 struct d3d12_video_decoder *pD3D12Dec, uint32_t *pWidth, uint32_t *pHeight, uint16_t *pMaxDPB)
1258 {
1259 *pWidth = 0;
1260 *pHeight = 0;
1261 *pMaxDPB = 0;
1262
1263 #if D3D12_VIDEO_ANY_DECODER_ENABLED
1264 switch (pD3D12Dec->m_d3d12DecProfileType) {
1265 #if VIDEO_CODEC_H264DEC
1266 case d3d12_video_decode_profile_type_h264:
1267 {
1268 d3d12_video_decoder_get_frame_info_h264(pD3D12Dec, pWidth, pHeight, pMaxDPB);
1269 } break;
1270 #endif
1271 #if VIDEO_CODEC_H265DEC
1272 case d3d12_video_decode_profile_type_hevc:
1273 {
1274 d3d12_video_decoder_get_frame_info_hevc(pD3D12Dec, pWidth, pHeight, pMaxDPB);
1275 } break;
1276 #endif
1277 #if VIDEO_CODEC_AV1DEC
1278 case d3d12_video_decode_profile_type_av1:
1279 {
1280 d3d12_video_decoder_get_frame_info_av1(pD3D12Dec, pWidth, pHeight, pMaxDPB);
1281 } break;
1282 #endif
1283 #if VIDEO_CODEC_VP9DEC
1284 case d3d12_video_decode_profile_type_vp9:
1285 {
1286 d3d12_video_decoder_get_frame_info_vp9(pD3D12Dec, pWidth, pHeight, pMaxDPB);
1287 } break;
1288 #endif
1289 default:
1290 {
1291 unreachable("Unsupported d3d12_video_decode_profile_type");
1292 } break;
1293 }
1294 #endif // D3D12_VIDEO_ANY_DECODER_ENABLED
1295
1296 if (pD3D12Dec->m_ConfigDecoderSpecificFlags & d3d12_video_decode_config_specific_flag_alignment_height) {
1297 const uint32_t AlignmentMask = 31;
1298 *pHeight = (*pHeight + AlignmentMask) & ~AlignmentMask;
1299 }
1300 }
1301
1302 void
d3d12_video_decoder_store_converted_dxva_picparams_from_pipe_input(struct d3d12_video_decoder * codec,struct pipe_picture_desc * picture,struct d3d12_video_buffer * pD3D12VideoBuffer)1303 d3d12_video_decoder_store_converted_dxva_picparams_from_pipe_input(
1304 struct d3d12_video_decoder *codec, // input argument, current decoder
1305 struct pipe_picture_desc
1306 *picture, // input argument, base structure of pipe_XXX_picture_desc where XXX is the codec name
1307 struct d3d12_video_buffer *pD3D12VideoBuffer // input argument, target video buffer
1308 )
1309 {
1310 #if D3D12_VIDEO_ANY_DECODER_ENABLED
1311 assert(picture);
1312 assert(codec);
1313 struct d3d12_video_decoder *pD3D12Dec = (struct d3d12_video_decoder *) codec;
1314
1315 d3d12_video_decode_profile_type profileType =
1316 d3d12_video_decoder_convert_pipe_video_profile_to_profile_type(codec->base.profile);
1317 ID3D12Resource *pPipeD3D12DstResource = d3d12_resource_resource(pD3D12VideoBuffer->texture);
1318 D3D12_RESOURCE_DESC outputResourceDesc = GetDesc(pPipeD3D12DstResource);
1319 auto &inFlightResources = pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)];
1320 inFlightResources.qp_matrix_frame_argument_enabled = false;
1321 switch (profileType) {
1322 #if VIDEO_CODEC_H264DEC
1323 case d3d12_video_decode_profile_type_h264:
1324 {
1325 size_t dxvaPicParamsBufferSize = sizeof(DXVA_PicParams_H264);
1326 pipe_h264_picture_desc *pPicControlH264 = (pipe_h264_picture_desc *) picture;
1327 DXVA_PicParams_H264 dxvaPicParamsH264 =
1328 d3d12_video_decoder_dxva_picparams_from_pipe_picparams_h264(pD3D12Dec->m_fenceValue,
1329 codec->base.profile,
1330 outputResourceDesc.Width,
1331 outputResourceDesc.Height,
1332 pPicControlH264);
1333
1334 d3d12_video_decoder_store_dxva_picparams_in_picparams_buffer(codec,
1335 &dxvaPicParamsH264,
1336 dxvaPicParamsBufferSize);
1337
1338 size_t dxvaQMatrixBufferSize = sizeof(DXVA_Qmatrix_H264);
1339 DXVA_Qmatrix_H264 dxvaQmatrixH264 = {};
1340 d3d12_video_decoder_dxva_qmatrix_from_pipe_picparams_h264((pipe_h264_picture_desc *) picture, dxvaQmatrixH264);
1341 inFlightResources.qp_matrix_frame_argument_enabled =
1342 true; // We don't have a way of knowing from the pipe params so send always
1343 d3d12_video_decoder_store_dxva_qmatrix_in_qmatrix_buffer(codec, &dxvaQmatrixH264, dxvaQMatrixBufferSize);
1344 } break;
1345 #endif
1346 #if VIDEO_CODEC_H265DEC
1347 case d3d12_video_decode_profile_type_hevc:
1348 {
1349 size_t dxvaPicParamsBufferSize = sizeof(DXVA_PicParams_HEVC);
1350 pipe_h265_picture_desc *pPicControlHEVC = (pipe_h265_picture_desc *) picture;
1351 DXVA_PicParams_HEVC dxvaPicParamsHEVC =
1352 d3d12_video_decoder_dxva_picparams_from_pipe_picparams_hevc(pD3D12Dec, codec->base.profile, pPicControlHEVC);
1353
1354 d3d12_video_decoder_store_dxva_picparams_in_picparams_buffer(codec,
1355 &dxvaPicParamsHEVC,
1356 dxvaPicParamsBufferSize);
1357
1358 size_t dxvaQMatrixBufferSize = sizeof(DXVA_Qmatrix_HEVC);
1359 DXVA_Qmatrix_HEVC dxvaQmatrixHEVC = {};
1360 inFlightResources.qp_matrix_frame_argument_enabled = false;
1361 d3d12_video_decoder_dxva_qmatrix_from_pipe_picparams_hevc((pipe_h265_picture_desc *) picture,
1362 dxvaQmatrixHEVC,
1363 inFlightResources.qp_matrix_frame_argument_enabled);
1364 d3d12_video_decoder_store_dxva_qmatrix_in_qmatrix_buffer(codec, &dxvaQmatrixHEVC, dxvaQMatrixBufferSize);
1365 } break;
1366 #endif
1367 #if VIDEO_CODEC_AV1DEC
1368 case d3d12_video_decode_profile_type_av1:
1369 {
1370 size_t dxvaPicParamsBufferSize = sizeof(DXVA_PicParams_AV1);
1371 pipe_av1_picture_desc *pPicControlAV1 = (pipe_av1_picture_desc *) picture;
1372 DXVA_PicParams_AV1 dxvaPicParamsAV1 =
1373 d3d12_video_decoder_dxva_picparams_from_pipe_picparams_av1(pD3D12Dec->m_fenceValue,
1374 codec->base.profile,
1375 pPicControlAV1);
1376
1377 d3d12_video_decoder_store_dxva_picparams_in_picparams_buffer(codec, &dxvaPicParamsAV1, dxvaPicParamsBufferSize);
1378 inFlightResources.qp_matrix_frame_argument_enabled = false;
1379 } break;
1380 #endif
1381 #if VIDEO_CODEC_VP9DEC
1382 case d3d12_video_decode_profile_type_vp9:
1383 {
1384 size_t dxvaPicParamsBufferSize = sizeof(DXVA_PicParams_VP9);
1385 pipe_vp9_picture_desc *pPicControlVP9 = (pipe_vp9_picture_desc *) picture;
1386 DXVA_PicParams_VP9 dxvaPicParamsVP9 =
1387 d3d12_video_decoder_dxva_picparams_from_pipe_picparams_vp9(pD3D12Dec, codec->base.profile, pPicControlVP9);
1388
1389 d3d12_video_decoder_store_dxva_picparams_in_picparams_buffer(codec, &dxvaPicParamsVP9, dxvaPicParamsBufferSize);
1390 inFlightResources.qp_matrix_frame_argument_enabled = false;
1391 } break;
1392 #endif
1393 default:
1394 {
1395 unreachable("Unsupported d3d12_video_decode_profile_type");
1396 } break;
1397 }
1398 #endif // D3D12_VIDEO_ANY_DECODER_ENABLED
1399 }
1400
1401 void
d3d12_video_decoder_prepare_dxva_slices_control(struct d3d12_video_decoder * pD3D12Dec,struct pipe_picture_desc * picture)1402 d3d12_video_decoder_prepare_dxva_slices_control(
1403 struct d3d12_video_decoder *pD3D12Dec, // input argument, current decoder
1404 struct pipe_picture_desc *picture)
1405 {
1406 #if D3D12_VIDEO_ANY_DECODER_ENABLED
1407 [[maybe_unused]] auto &inFlightResources = pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)];
1408 d3d12_video_decode_profile_type profileType =
1409 d3d12_video_decoder_convert_pipe_video_profile_to_profile_type(pD3D12Dec->base.profile);
1410 switch (profileType) {
1411 #if VIDEO_CODEC_H264DEC
1412 case d3d12_video_decode_profile_type_h264:
1413 {
1414 d3d12_video_decoder_prepare_dxva_slices_control_h264(pD3D12Dec,
1415 inFlightResources.m_SliceControlBuffer,
1416 (struct pipe_h264_picture_desc *) picture);
1417 } break;
1418 #endif
1419 #if VIDEO_CODEC_H265DEC
1420 case d3d12_video_decode_profile_type_hevc:
1421 {
1422 d3d12_video_decoder_prepare_dxva_slices_control_hevc(pD3D12Dec,
1423 inFlightResources.m_SliceControlBuffer,
1424 (struct pipe_h265_picture_desc *) picture);
1425 } break;
1426 #endif
1427 #if VIDEO_CODEC_AV1DEC
1428 case d3d12_video_decode_profile_type_av1:
1429 {
1430 d3d12_video_decoder_prepare_dxva_slices_control_av1(pD3D12Dec,
1431 inFlightResources.m_SliceControlBuffer,
1432 (struct pipe_av1_picture_desc *) picture);
1433 } break;
1434 #endif
1435 #if VIDEO_CODEC_VP9DEC
1436 case d3d12_video_decode_profile_type_vp9:
1437 {
1438 d3d12_video_decoder_prepare_dxva_slices_control_vp9(pD3D12Dec,
1439 inFlightResources.m_SliceControlBuffer,
1440 (struct pipe_vp9_picture_desc *) picture);
1441 } break;
1442 #endif
1443 default:
1444 {
1445 unreachable("Unsupported d3d12_video_decode_profile_type");
1446 } break;
1447 }
1448 #endif // D3D12_VIDEO_ANY_DECODER_ENABLED
1449 }
1450
1451 void
d3d12_video_decoder_store_dxva_qmatrix_in_qmatrix_buffer(struct d3d12_video_decoder * pD3D12Dec,void * pDXVAStruct,uint64_t DXVAStructSize)1452 d3d12_video_decoder_store_dxva_qmatrix_in_qmatrix_buffer(struct d3d12_video_decoder *pD3D12Dec,
1453 void *pDXVAStruct,
1454 uint64_t DXVAStructSize)
1455 {
1456 auto &inFlightResources = pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)];
1457 if (inFlightResources.m_InverseQuantMatrixBuffer.capacity() < DXVAStructSize) {
1458 inFlightResources.m_InverseQuantMatrixBuffer.reserve(DXVAStructSize);
1459 }
1460
1461 inFlightResources.m_InverseQuantMatrixBuffer.resize(DXVAStructSize);
1462 memcpy(inFlightResources.m_InverseQuantMatrixBuffer.data(), pDXVAStruct, DXVAStructSize);
1463 }
1464
1465 void
d3d12_video_decoder_store_dxva_picparams_in_picparams_buffer(struct d3d12_video_decoder * pD3D12Dec,void * pDXVAStruct,uint64_t DXVAStructSize)1466 d3d12_video_decoder_store_dxva_picparams_in_picparams_buffer(struct d3d12_video_decoder *pD3D12Dec,
1467 void *pDXVAStruct,
1468 uint64_t DXVAStructSize)
1469 {
1470 auto &inFlightResources = pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)];
1471 if (inFlightResources.m_picParamsBuffer.capacity() < DXVAStructSize) {
1472 inFlightResources.m_picParamsBuffer.reserve(DXVAStructSize);
1473 }
1474
1475 inFlightResources.m_picParamsBuffer.resize(DXVAStructSize);
1476 memcpy(inFlightResources.m_picParamsBuffer.data(), pDXVAStruct, DXVAStructSize);
1477 }
1478
1479 bool
d3d12_video_decoder_supports_aot_dpb(D3D12_FEATURE_DATA_VIDEO_DECODE_SUPPORT decodeSupport,d3d12_video_decode_profile_type profileType)1480 d3d12_video_decoder_supports_aot_dpb(D3D12_FEATURE_DATA_VIDEO_DECODE_SUPPORT decodeSupport,
1481 d3d12_video_decode_profile_type profileType)
1482 {
1483 bool supportedProfile = false;
1484 #if D3D12_VIDEO_ANY_DECODER_ENABLED
1485 switch (profileType) {
1486 #if VIDEO_CODEC_H264DEC
1487 case d3d12_video_decode_profile_type_h264:
1488 {
1489 supportedProfile = true;
1490 } break;
1491 #endif
1492 #if VIDEO_CODEC_H265DEC
1493 case d3d12_video_decode_profile_type_hevc:
1494 {
1495 supportedProfile = true;
1496 } break;
1497 #endif
1498 #if VIDEO_CODEC_AV1DEC
1499 case d3d12_video_decode_profile_type_av1:
1500 {
1501 supportedProfile = true;
1502 } break;
1503 #endif
1504 #if VIDEO_CODEC_VP9DEC
1505 case d3d12_video_decode_profile_type_vp9:
1506 {
1507 supportedProfile = true;
1508 } break;
1509 #endif
1510 default:
1511 supportedProfile = false;
1512 break;
1513 }
1514 #endif // D3D12_VIDEO_ANY_DECODER_ENABLED
1515
1516 return (decodeSupport.DecodeTier >= D3D12_VIDEO_DECODE_TIER_2) && supportedProfile;
1517 }
1518
1519 d3d12_video_decode_profile_type
d3d12_video_decoder_convert_pipe_video_profile_to_profile_type(enum pipe_video_profile profile)1520 d3d12_video_decoder_convert_pipe_video_profile_to_profile_type(enum pipe_video_profile profile)
1521 {
1522 switch (profile) {
1523 case PIPE_VIDEO_PROFILE_MPEG4_AVC_BASELINE:
1524 case PIPE_VIDEO_PROFILE_MPEG4_AVC_CONSTRAINED_BASELINE:
1525 case PIPE_VIDEO_PROFILE_MPEG4_AVC_MAIN:
1526 case PIPE_VIDEO_PROFILE_MPEG4_AVC_EXTENDED:
1527 case PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH:
1528 case PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH10:
1529 return d3d12_video_decode_profile_type_h264;
1530 case PIPE_VIDEO_PROFILE_HEVC_MAIN:
1531 case PIPE_VIDEO_PROFILE_HEVC_MAIN_10:
1532 return d3d12_video_decode_profile_type_hevc;
1533 case PIPE_VIDEO_PROFILE_AV1_MAIN:
1534 return d3d12_video_decode_profile_type_av1;
1535 case PIPE_VIDEO_PROFILE_VP9_PROFILE0:
1536 case PIPE_VIDEO_PROFILE_VP9_PROFILE2:
1537 return d3d12_video_decode_profile_type_vp9;
1538 default:
1539 {
1540 unreachable("Unsupported pipe video profile");
1541 } break;
1542 }
1543 }
1544
1545 GUID
d3d12_video_decoder_convert_pipe_video_profile_to_d3d12_profile(enum pipe_video_profile profile)1546 d3d12_video_decoder_convert_pipe_video_profile_to_d3d12_profile(enum pipe_video_profile profile)
1547 {
1548 switch (profile) {
1549 case PIPE_VIDEO_PROFILE_MPEG4_AVC_BASELINE:
1550 case PIPE_VIDEO_PROFILE_MPEG4_AVC_CONSTRAINED_BASELINE:
1551 case PIPE_VIDEO_PROFILE_MPEG4_AVC_MAIN:
1552 case PIPE_VIDEO_PROFILE_MPEG4_AVC_EXTENDED:
1553 case PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH:
1554 case PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH10:
1555 return D3D12_VIDEO_DECODE_PROFILE_H264;
1556 case PIPE_VIDEO_PROFILE_HEVC_MAIN:
1557 return D3D12_VIDEO_DECODE_PROFILE_HEVC_MAIN;
1558 case PIPE_VIDEO_PROFILE_HEVC_MAIN_10:
1559 return D3D12_VIDEO_DECODE_PROFILE_HEVC_MAIN10;
1560 case PIPE_VIDEO_PROFILE_AV1_MAIN:
1561 return D3D12_VIDEO_DECODE_PROFILE_AV1_PROFILE0;
1562 case PIPE_VIDEO_PROFILE_VP9_PROFILE0:
1563 return D3D12_VIDEO_DECODE_PROFILE_VP9;
1564 case PIPE_VIDEO_PROFILE_VP9_PROFILE2:
1565 return D3D12_VIDEO_DECODE_PROFILE_VP9_10BIT_PROFILE2;
1566 default:
1567 return {};
1568 }
1569 }
1570
1571 GUID
d3d12_video_decoder_resolve_profile(d3d12_video_decode_profile_type profileType,DXGI_FORMAT decode_format)1572 d3d12_video_decoder_resolve_profile(d3d12_video_decode_profile_type profileType, DXGI_FORMAT decode_format)
1573 {
1574 #if D3D12_VIDEO_ANY_DECODER_ENABLED
1575 switch (profileType) {
1576 #if VIDEO_CODEC_H264DEC
1577 case d3d12_video_decode_profile_type_h264:
1578 return D3D12_VIDEO_DECODE_PROFILE_H264;
1579 #endif
1580 #if VIDEO_CODEC_H265DEC
1581 case d3d12_video_decode_profile_type_hevc:
1582 {
1583 switch (decode_format) {
1584 case DXGI_FORMAT_NV12:
1585 return D3D12_VIDEO_DECODE_PROFILE_HEVC_MAIN;
1586 case DXGI_FORMAT_P010:
1587 return D3D12_VIDEO_DECODE_PROFILE_HEVC_MAIN10;
1588 default:
1589 {
1590 unreachable("Unsupported decode_format");
1591 } break;
1592 }
1593 } break;
1594 #endif
1595 #if VIDEO_CODEC_AV1DEC
1596 case d3d12_video_decode_profile_type_av1:
1597 return D3D12_VIDEO_DECODE_PROFILE_AV1_PROFILE0;
1598 break;
1599 #endif
1600 #if VIDEO_CODEC_VP9DEC
1601 case d3d12_video_decode_profile_type_vp9:
1602 {
1603 switch (decode_format) {
1604 case DXGI_FORMAT_NV12:
1605 return D3D12_VIDEO_DECODE_PROFILE_VP9;
1606 case DXGI_FORMAT_P010:
1607 return D3D12_VIDEO_DECODE_PROFILE_VP9_10BIT_PROFILE2;
1608 default:
1609 {
1610 unreachable("Unsupported decode_format");
1611 } break;
1612 }
1613 } break;
1614 #endif
1615 default:
1616 {
1617 unreachable("Unsupported d3d12_video_decode_profile_type");
1618 } break;
1619 }
1620 #else
1621 return {};
1622 #endif // D3D12_VIDEO_ANY_DECODER_ENABLED
1623 }
1624
1625 bool
d3d12_video_decoder_ensure_fence_finished(struct pipe_video_codec * codec,ID3D12Fence * fence,uint64_t fenceValueToWaitOn,uint64_t timeout_ns)1626 d3d12_video_decoder_ensure_fence_finished(struct pipe_video_codec *codec,
1627 ID3D12Fence *fence,
1628 uint64_t fenceValueToWaitOn,
1629 uint64_t timeout_ns)
1630 {
1631 bool wait_result = true;
1632 HRESULT hr = S_OK;
1633 uint64_t completedValue = fence->GetCompletedValue();
1634
1635 debug_printf(
1636 "[d3d12_video_decoder] d3d12_video_decoder_ensure_fence_finished - Waiting for fence (with timeout_ns %" PRIu64
1637 ") to finish with "
1638 "fenceValue: %" PRIu64 " - Current Fence Completed Value %" PRIu64 "\n",
1639 timeout_ns,
1640 fenceValueToWaitOn,
1641 completedValue);
1642
1643 if (completedValue < fenceValueToWaitOn) {
1644
1645 HANDLE event = {};
1646 int event_fd = 0;
1647 event = d3d12_fence_create_event(&event_fd);
1648
1649 hr = fence->SetEventOnCompletion(fenceValueToWaitOn, event);
1650 if (FAILED(hr)) {
1651 debug_printf("[d3d12_video_decoder] d3d12_video_decoder_ensure_fence_finished - SetEventOnCompletion for "
1652 "fenceValue %" PRIu64 " failed with HR %x\n",
1653 fenceValueToWaitOn,
1654 hr);
1655 return false;
1656 }
1657
1658 wait_result = d3d12_fence_wait_event(event, event_fd, timeout_ns);
1659 d3d12_fence_close_event(event, event_fd);
1660
1661 debug_printf("[d3d12_video_decoder] d3d12_video_decoder_ensure_fence_finished - Waiting on fence to be done with "
1662 "fenceValue: %" PRIu64 " - current CompletedValue: %" PRIu64 "\n",
1663 fenceValueToWaitOn,
1664 completedValue);
1665 } else {
1666 debug_printf("[d3d12_video_decoder] d3d12_video_decoder_ensure_fence_finished - Fence already done with "
1667 "fenceValue: %" PRIu64 " - current CompletedValue: %" PRIu64 "\n",
1668 fenceValueToWaitOn,
1669 completedValue);
1670 }
1671 return wait_result;
1672 }
1673
1674 bool
d3d12_video_decoder_sync_completion(struct pipe_video_codec * codec,ID3D12Fence * fence,uint64_t fenceValueToWaitOn,uint64_t timeout_ns)1675 d3d12_video_decoder_sync_completion(struct pipe_video_codec *codec,
1676 ID3D12Fence *fence,
1677 uint64_t fenceValueToWaitOn,
1678 uint64_t timeout_ns)
1679 {
1680 struct d3d12_video_decoder *pD3D12Dec = (struct d3d12_video_decoder *) codec;
1681 assert(pD3D12Dec);
1682 assert(pD3D12Dec->m_spD3D12VideoDevice);
1683 assert(pD3D12Dec->m_spDecodeCommandQueue);
1684 HRESULT hr = S_OK;
1685
1686 ASSERTED bool wait_result = d3d12_video_decoder_ensure_fence_finished(codec, fence, fenceValueToWaitOn, timeout_ns);
1687 assert(wait_result);
1688
1689 // Release references granted on end_frame for this inflight operations
1690 pD3D12Dec->m_inflightResourcesPool[fenceValueToWaitOn % D3D12_VIDEO_DEC_ASYNC_DEPTH].m_spDecoder.Reset();
1691 pD3D12Dec->m_inflightResourcesPool[fenceValueToWaitOn % D3D12_VIDEO_DEC_ASYNC_DEPTH].m_spDecoderHeap.Reset();
1692 pD3D12Dec->m_inflightResourcesPool[fenceValueToWaitOn % D3D12_VIDEO_DEC_ASYNC_DEPTH].m_References.reset();
1693 pD3D12Dec->m_inflightResourcesPool[fenceValueToWaitOn % D3D12_VIDEO_DEC_ASYNC_DEPTH].m_stagingDecodeBitstream.resize(
1694 0);
1695 pipe_resource_reference(
1696 &pD3D12Dec->m_inflightResourcesPool[fenceValueToWaitOn % D3D12_VIDEO_DEC_ASYNC_DEPTH].pPipeCompressedBufferObj,
1697 NULL);
1698
1699 struct d3d12_screen *pD3D12Screen = (struct d3d12_screen *) pD3D12Dec->m_pD3D12Screen;
1700 assert(pD3D12Screen);
1701
1702 pD3D12Screen->base.fence_reference(
1703 &pD3D12Screen->base,
1704 &pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)]
1705 .m_pBitstreamUploadGPUCompletionFence,
1706 NULL);
1707
1708 hr =
1709 pD3D12Dec->m_inflightResourcesPool[fenceValueToWaitOn % D3D12_VIDEO_DEC_ASYNC_DEPTH].m_spCommandAllocator->Reset();
1710 if (FAILED(hr)) {
1711 debug_printf("failed with %x.\n", hr);
1712 goto sync_with_token_fail;
1713 }
1714
1715 // Validate device was not removed
1716 hr = pD3D12Dec->m_pD3D12Screen->dev->GetDeviceRemovedReason();
1717 if (hr != S_OK) {
1718 debug_printf("[d3d12_video_decoder] d3d12_video_decoder_sync_completion"
1719 " - D3D12Device was removed AFTER d3d12_video_decoder_ensure_fence_finished "
1720 "execution with HR %x, but wasn't before.\n",
1721 hr);
1722 goto sync_with_token_fail;
1723 }
1724
1725 debug_printf(
1726 "[d3d12_video_decoder] d3d12_video_decoder_sync_completion - GPU execution finalized for fenceValue: %" PRIu64
1727 "\n",
1728 fenceValueToWaitOn);
1729
1730 return wait_result;
1731
1732 sync_with_token_fail:
1733 debug_printf("[d3d12_video_decoder] d3d12_video_decoder_sync_completion failed for fenceValue: %" PRIu64 "\n",
1734 fenceValueToWaitOn);
1735 assert(false);
1736 return false;
1737 }