xref: /aosp_15_r20/external/mesa3d/src/gallium/drivers/d3d12/d3d12_video_dec.cpp (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © Microsoft Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "d3d12_context.h"
25 #include "d3d12_format.h"
26 #include "d3d12_resource.h"
27 #include "d3d12_screen.h"
28 #include "d3d12_surface.h"
29 #include "d3d12_video_dec.h"
30 #if VIDEO_CODEC_H264DEC
31 #include "d3d12_video_dec_h264.h"
32 #endif
33 #if VIDEO_CODEC_H265DEC
34 #include "d3d12_video_dec_hevc.h"
35 #endif
36 #if VIDEO_CODEC_AV1DEC
37 #include "d3d12_video_dec_av1.h"
38 #endif
39 #if VIDEO_CODEC_VP9DEC
40 #include "d3d12_video_dec_vp9.h"
41 #endif
42 #include "d3d12_video_buffer.h"
43 #include "d3d12_residency.h"
44 
45 #include "vl/vl_video_buffer.h"
46 #include "util/format/u_format.h"
47 #include "util/u_inlines.h"
48 #include "util/u_memory.h"
49 #include "util/u_video.h"
50 
51 uint64_t
d3d12_video_decoder_pool_current_index(struct d3d12_video_decoder * pD3D12Dec)52 d3d12_video_decoder_pool_current_index(struct d3d12_video_decoder *pD3D12Dec)
53 {
54    return pD3D12Dec->m_fenceValue % D3D12_VIDEO_DEC_ASYNC_DEPTH;
55 }
56 
57 struct pipe_video_codec *
d3d12_video_create_decoder(struct pipe_context * context,const struct pipe_video_codec * codec)58 d3d12_video_create_decoder(struct pipe_context *context, const struct pipe_video_codec *codec)
59 {
60    ///
61    /// Initialize d3d12_video_decoder
62    ///
63 
64 
65    // Not using new doesn't call ctor and the initializations in the class declaration are lost
66    struct d3d12_video_decoder *pD3D12Dec = new d3d12_video_decoder;
67 
68    pD3D12Dec->m_inflightResourcesPool.resize(D3D12_VIDEO_DEC_ASYNC_DEPTH, { 0 });
69 
70    pD3D12Dec->base = *codec;
71    pD3D12Dec->m_screen = context->screen;
72 
73    pD3D12Dec->base.context = context;
74    pD3D12Dec->base.width = codec->width;
75    pD3D12Dec->base.height = codec->height;
76    // Only fill methods that are supported by the d3d12 decoder, leaving null the rest (ie. encode_* / decode_macroblock
77    // / get_feedback for encode)
78    pD3D12Dec->base.destroy = d3d12_video_decoder_destroy;
79    pD3D12Dec->base.begin_frame = d3d12_video_decoder_begin_frame;
80    pD3D12Dec->base.decode_bitstream = d3d12_video_decoder_decode_bitstream;
81    pD3D12Dec->base.end_frame = d3d12_video_decoder_end_frame;
82    pD3D12Dec->base.flush = d3d12_video_decoder_flush;
83    pD3D12Dec->base.get_decoder_fence = d3d12_video_decoder_get_decoder_fence;
84 
85    pD3D12Dec->m_decodeFormat = d3d12_convert_pipe_video_profile_to_dxgi_format(codec->profile);
86    pD3D12Dec->m_d3d12DecProfileType = d3d12_video_decoder_convert_pipe_video_profile_to_profile_type(codec->profile);
87    pD3D12Dec->m_d3d12DecProfile = d3d12_video_decoder_convert_pipe_video_profile_to_d3d12_profile(codec->profile);
88 
89    ///
90    /// Try initializing D3D12 Video device and check for device caps
91    ///
92 
93    struct d3d12_context *pD3D12Ctx = (struct d3d12_context *) context;
94    pD3D12Dec->m_pD3D12Screen = d3d12_screen(pD3D12Ctx->base.screen);
95 
96    ///
97    /// Create decode objects
98    ///
99    HRESULT hr = S_OK;
100    if (FAILED(pD3D12Dec->m_pD3D12Screen->dev->QueryInterface(
101           IID_PPV_ARGS(pD3D12Dec->m_spD3D12VideoDevice.GetAddressOf())))) {
102       debug_printf("[d3d12_video_decoder] d3d12_video_create_decoder - D3D12 Device has no Video support\n");
103       goto failed;
104    }
105 
106    if (!d3d12_video_decoder_check_caps_and_create_decoder(pD3D12Dec->m_pD3D12Screen, pD3D12Dec)) {
107       debug_printf("[d3d12_video_decoder] d3d12_video_create_decoder - Failure on "
108                    "d3d12_video_decoder_check_caps_and_create_decoder\n");
109       goto failed;
110    }
111 
112    if (!d3d12_video_decoder_create_command_objects(pD3D12Dec->m_pD3D12Screen, pD3D12Dec)) {
113       debug_printf(
114          "[d3d12_video_decoder] d3d12_video_create_decoder - Failure on d3d12_video_decoder_create_command_objects\n");
115       goto failed;
116    }
117 
118    if (!d3d12_video_decoder_create_video_state_buffers(pD3D12Dec->m_pD3D12Screen, pD3D12Dec)) {
119       debug_printf("[d3d12_video_decoder] d3d12_video_create_decoder - Failure on "
120                    "d3d12_video_decoder_create_video_state_buffers\n");
121       goto failed;
122    }
123 
124    pD3D12Dec->m_decodeFormatInfo = { pD3D12Dec->m_decodeFormat };
125    hr = pD3D12Dec->m_pD3D12Screen->dev->CheckFeatureSupport(D3D12_FEATURE_FORMAT_INFO,
126                                                             &pD3D12Dec->m_decodeFormatInfo,
127                                                             sizeof(pD3D12Dec->m_decodeFormatInfo));
128    if (FAILED(hr)) {
129       debug_printf("CheckFeatureSupport failed with HR %x\n", hr);
130       goto failed;
131    }
132 
133    return &pD3D12Dec->base;
134 
135 failed:
136    if (pD3D12Dec != nullptr) {
137       d3d12_video_decoder_destroy((struct pipe_video_codec *) pD3D12Dec);
138    }
139 
140    return nullptr;
141 }
142 
143 /**
144  * Destroys a d3d12_video_decoder
145  * Call destroy_XX for applicable XX nested member types before deallocating
146  * Destroy methods should check != nullptr on their input target argument as this method can be called as part of
147  * cleanup from failure on the creation method
148  */
149 void
d3d12_video_decoder_destroy(struct pipe_video_codec * codec)150 d3d12_video_decoder_destroy(struct pipe_video_codec *codec)
151 {
152    if (codec == nullptr) {
153       return;
154    }
155 
156    struct d3d12_video_decoder *pD3D12Dec = (struct d3d12_video_decoder *) codec;
157    // Flush and wait for completion of any in-flight GPU work before destroying objects
158    d3d12_video_decoder_flush(codec);
159    if (pD3D12Dec->m_fenceValue > 1 /* Check we submitted at least one frame */) {
160       auto decode_queue_completion_fence = pD3D12Dec->m_inflightResourcesPool[(pD3D12Dec->m_fenceValue - 1u) % D3D12_VIDEO_DEC_ASYNC_DEPTH].m_FenceData;
161       d3d12_video_decoder_sync_completion(codec, decode_queue_completion_fence.cmdqueue_fence, decode_queue_completion_fence.value, OS_TIMEOUT_INFINITE);
162       struct pipe_fence_handle *context_queue_completion_fence = NULL;
163       pD3D12Dec->base.context->flush(pD3D12Dec->base.context, &context_queue_completion_fence, PIPE_FLUSH_ASYNC | PIPE_FLUSH_HINT_FINISH);
164       pD3D12Dec->m_pD3D12Screen->base.fence_finish(&pD3D12Dec->m_pD3D12Screen->base, NULL, context_queue_completion_fence, OS_TIMEOUT_INFINITE);
165       pD3D12Dec->m_pD3D12Screen->base.fence_reference(&pD3D12Dec->m_pD3D12Screen->base, &context_queue_completion_fence, NULL);
166    }
167 
168    //
169    // Destroys a decoder
170    // Call destroy_XX for applicable XX nested member types before deallocating
171    // Destroy methods should check != nullptr on their input target argument as this method can be called as part of
172    // cleanup from failure on the creation method
173    //
174 
175    // No need for d3d12_destroy_video_objects
176    //    All the objects created here are smart pointer members of d3d12_video_decoder
177    // No need for d3d12_destroy_video_decoder_and_heap
178    //    All the objects created here are smart pointer members of d3d12_video_decoder
179    // No need for d3d12_destroy_video_dpbmanagers
180    //    All the objects created here are smart pointer members of d3d12_video_decoder
181 
182    // No need for m_pD3D12Screen as it is not managed by d3d12_video_decoder
183 
184    // Call dtor to make ComPtr work
185    delete pD3D12Dec;
186 }
187 
188 /**
189  * start decoding of a new frame
190  */
191 void
d3d12_video_decoder_begin_frame(struct pipe_video_codec * codec,struct pipe_video_buffer * target,struct pipe_picture_desc * picture)192 d3d12_video_decoder_begin_frame(struct pipe_video_codec *codec,
193                                 struct pipe_video_buffer *target,
194                                 struct pipe_picture_desc *picture)
195 {
196    // Do nothing here. Initialize happens on decoder creation, re-config (if any) happens in
197    // d3d12_video_decoder_decode_bitstream
198    struct d3d12_video_decoder *pD3D12Dec = (struct d3d12_video_decoder *) codec;
199    assert(pD3D12Dec);
200 
201    ///
202    /// Wait here to make sure the next in flight resource set is empty before using it
203    ///
204    uint64_t fenceValueToWaitOn = static_cast<uint64_t>(
205       std::max(static_cast<int64_t>(0l),
206                static_cast<int64_t>(pD3D12Dec->m_fenceValue) - static_cast<int64_t>(D3D12_VIDEO_DEC_ASYNC_DEPTH)));
207 
208    debug_printf("[d3d12_video_decoder] d3d12_video_decoder_begin_frame Waiting for completion of in flight resource "
209                 "sets with previous work with fenceValue: %" PRIu64 "\n",
210                 fenceValueToWaitOn);
211 
212    ASSERTED bool wait_res =
213       d3d12_video_decoder_sync_completion(codec, pD3D12Dec->m_spFence.Get(), fenceValueToWaitOn, OS_TIMEOUT_INFINITE);
214    assert(wait_res);
215 
216    HRESULT hr = pD3D12Dec->m_spDecodeCommandList->Reset(
217       pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)].m_spCommandAllocator.Get());
218    if (FAILED(hr)) {
219       debug_printf("[d3d12_video_decoder] resetting ID3D12GraphicsCommandList failed with HR %x\n", hr);
220       assert(false);
221    }
222 
223    debug_printf("[d3d12_video_decoder] d3d12_video_decoder_begin_frame finalized for fenceValue: %d\n",
224                 pD3D12Dec->m_fenceValue);
225 }
226 
227 /**
228  * decode a bitstream
229  */
230 void
d3d12_video_decoder_decode_bitstream(struct pipe_video_codec * codec,struct pipe_video_buffer * target,struct pipe_picture_desc * picture,unsigned num_buffers,const void * const * buffers,const unsigned * sizes)231 d3d12_video_decoder_decode_bitstream(struct pipe_video_codec *codec,
232                                      struct pipe_video_buffer *target,
233                                      struct pipe_picture_desc *picture,
234                                      unsigned num_buffers,
235                                      const void *const *buffers,
236                                      const unsigned *sizes)
237 {
238    struct d3d12_video_decoder *pD3D12Dec = (struct d3d12_video_decoder *) codec;
239    assert(pD3D12Dec);
240    debug_printf("[d3d12_video_decoder] d3d12_video_decoder_decode_bitstream started for fenceValue: %d\n",
241                 pD3D12Dec->m_fenceValue);
242    assert(pD3D12Dec->m_spD3D12VideoDevice);
243    assert(pD3D12Dec->m_spDecodeCommandQueue);
244    assert(pD3D12Dec->m_pD3D12Screen);
245    ASSERTED struct d3d12_video_buffer *pD3D12VideoBuffer = (struct d3d12_video_buffer *) target;
246    assert(pD3D12VideoBuffer);
247 
248    ///
249    /// Compressed bitstream buffers
250    ///
251 
252    /// Mesa VA frontend Video buffer passing semantics for H264, HEVC, MPEG4, VC1 and PIPE_VIDEO_PROFILE_VC1_ADVANCED
253    /// are: If num_buffers == 1 -> buf[0] has the compressed bitstream WITH the starting code If num_buffers == 2 ->
254    /// buf[0] has the NALU starting code and buf[1] has the compressed bitstream WITHOUT any starting code. If
255    /// num_buffers = 3 -> It's JPEG, not supported in D3D12. num_buffers is at most 3.
256    /// Mesa VDPAU frontend passes the buffers as they get passed in VdpDecoderRender without fixing any start codes
257    /// except for PIPE_VIDEO_PROFILE_VC1_ADVANCED
258    // In https://http.download.nvidia.com/XFree86/vdpau/doxygen/html/index.html#video_mixer_usage it's mentioned that:
259    // It is recommended that applications pass solely the slice data to VDPAU; specifically that any header data
260    // structures be excluded from the portion of the bitstream passed to VDPAU. VDPAU implementations must operate
261    // correctly if non-slice data is included, at least for formats employing start codes to delimit slice data. For all
262    // codecs/profiles it's highly recommended (when the codec/profile has such codes...) that the start codes are passed
263    // to VDPAU, even when not included in the bitstream the VDPAU client is parsing. Let's assume we get all the start
264    // codes for VDPAU. The doc also says "VDPAU implementations must operate correctly if non-slice data is included, at
265    // least for formats employing start codes to delimit slice data" if we ever get an issue with VDPAU start codes we
266    // should consider adding the code that handles this in the VDPAU layer above the gallium driver like mesa VA does.
267 
268    // To handle the multi-slice case end_frame already takes care of this by parsing the start codes from the
269    // combined bitstream of all decode_bitstream calls.
270 
271    // VAAPI seems to send one decode_bitstream command per slice, but we should also support the VDPAU case where the
272    // buffers have multiple buffer array entry per slice {startCode (optional), slice1, slice2, ..., startCode
273    // (optional) , sliceN}
274 
275    if (num_buffers > 2)   // Assume this means multiple slices at once in a decode_bitstream call
276    {
277       // Based on VA frontend codebase, this never happens for video (no JPEG)
278       // Based on VDPAU frontends codebase, this only happens when sending more than one slice at once in decode bitstream
279 
280       // To handle the case where VDPAU send all the slices at once in a single decode_bitstream call, let's pretend it
281       // was a series of different calls
282 
283       // group by start codes and buffers and perform calls for the number of slices
284       debug_printf("[d3d12_video_decoder] d3d12_video_decoder_decode_bitstream multiple slices on same call detected "
285                    "for fenceValue: %d, breaking down the calls into one per slice\n",
286                    pD3D12Dec->m_fenceValue);
287 
288       size_t curBufferIdx = 0;
289 
290       // Vars to be used for the delegation calls to decode_bitstream
291       unsigned call_num_buffers = 0;
292       const void *const *call_buffers = nullptr;
293       const unsigned *call_sizes = nullptr;
294 
295       while (curBufferIdx < num_buffers) {
296          // Store the current buffer as the base array pointer for the delegated call, later decide if it'll be a
297          // startcode+slicedata or just slicedata call
298          call_buffers = &buffers[curBufferIdx];
299          call_sizes = &sizes[curBufferIdx];
300 
301          // Usually start codes are less or equal than 4 bytes
302          // If the current buffer is a start code buffer, send it along with the next buffer. Otherwise, just send the
303          // current buffer.
304          call_num_buffers = (sizes[curBufferIdx] <= 4) ? 2 : 1;
305 
306          // Delegate call with one or two buffers only
307          d3d12_video_decoder_decode_bitstream(codec, target, picture, call_num_buffers, call_buffers, call_sizes);
308 
309          curBufferIdx += call_num_buffers;   // Consume from the loop the buffers sent in the last call
310       }
311    } else {
312       ///
313       /// Handle single slice buffer path, maybe with an extra start code buffer at buffers[0].
314       ///
315 
316       // Both the start codes being present at buffers[0] and the rest in buffers [1] or full buffer at [0] cases can be
317       // handled by flattening all the buffers into a single one and passing that to HW.
318 
319       size_t totalReceivedBuffersSize = 0u;   // Combined size of all sizes[]
320       for (size_t bufferIdx = 0; bufferIdx < num_buffers; bufferIdx++) {
321          totalReceivedBuffersSize += sizes[bufferIdx];
322       }
323 
324       // Bytes of data pre-staged before this decode_frame call
325       auto &inFlightResources = pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)];
326       size_t preStagedDataSize = inFlightResources.m_stagingDecodeBitstream.size();
327 
328       // Extend the staging buffer size, as decode_frame can be called several times before end_frame
329       inFlightResources.m_stagingDecodeBitstream.resize(preStagedDataSize + totalReceivedBuffersSize);
330 
331       // Point newSliceDataPositionDstBase to the end of the pre-staged data in m_stagingDecodeBitstream, where the new
332       // buffers will be appended
333       uint8_t *newSliceDataPositionDstBase = inFlightResources.m_stagingDecodeBitstream.data() + preStagedDataSize;
334 
335       // Append new data at the end.
336       size_t dstOffset = 0u;
337       for (size_t bufferIdx = 0; bufferIdx < num_buffers; bufferIdx++) {
338          memcpy(newSliceDataPositionDstBase + dstOffset, buffers[bufferIdx], sizes[bufferIdx]);
339          dstOffset += sizes[bufferIdx];
340       }
341 
342       debug_printf("[d3d12_video_decoder] d3d12_video_decoder_decode_bitstream finalized for fenceValue: %d\n",
343                    pD3D12Dec->m_fenceValue);
344    }
345 
346    if (pD3D12Dec->m_d3d12DecProfileType == d3d12_video_decode_profile_type_h264) {
347       struct pipe_h264_picture_desc *h264 = (pipe_h264_picture_desc*) picture;
348       target->interlaced = !h264->pps->sps->frame_mbs_only_flag;
349    }
350 }
351 
352 void
d3d12_video_decoder_store_upper_layer_references(struct d3d12_video_decoder * pD3D12Dec,struct pipe_video_buffer * target,struct pipe_picture_desc * picture)353 d3d12_video_decoder_store_upper_layer_references(struct d3d12_video_decoder *pD3D12Dec,
354                                                  struct pipe_video_buffer *target,
355                                                  struct pipe_picture_desc *picture)
356 {
357 #if D3D12_VIDEO_ANY_DECODER_ENABLED
358    pD3D12Dec->m_pCurrentDecodeTarget = target;
359    switch (pD3D12Dec->m_d3d12DecProfileType) {
360 #if VIDEO_CODEC_H264DEC
361       case d3d12_video_decode_profile_type_h264:
362       {
363          pipe_h264_picture_desc *pPicControlH264 = (pipe_h264_picture_desc *) picture;
364          pD3D12Dec->m_pCurrentReferenceTargets = pPicControlH264->ref;
365       } break;
366 #endif
367 #if VIDEO_CODEC_H265DEC
368       case d3d12_video_decode_profile_type_hevc:
369       {
370          pipe_h265_picture_desc *pPicControlHevc = (pipe_h265_picture_desc *) picture;
371          pD3D12Dec->m_pCurrentReferenceTargets = pPicControlHevc->ref;
372       } break;
373 #endif
374 #if VIDEO_CODEC_AV1DEC
375       case d3d12_video_decode_profile_type_av1:
376       {
377          pipe_av1_picture_desc *pPicControlAV1 = (pipe_av1_picture_desc *) picture;
378          pD3D12Dec->m_pCurrentReferenceTargets = pPicControlAV1->ref;
379       } break;
380 #endif
381 #if VIDEO_CODEC_VP9DEC
382       case d3d12_video_decode_profile_type_vp9:
383       {
384          pipe_vp9_picture_desc *pPicControlVP9 = (pipe_vp9_picture_desc *) picture;
385          pD3D12Dec->m_pCurrentReferenceTargets = pPicControlVP9->ref;
386       } break;
387 #endif
388       default:
389       {
390          unreachable("Unsupported d3d12_video_decode_profile_type");
391       } break;
392    }
393 #endif // D3D12_VIDEO_ANY_DECODER_ENABLED
394 }
395 
396 /**
397  * end decoding of the current frame
398  */
399 int
d3d12_video_decoder_end_frame(struct pipe_video_codec * codec,struct pipe_video_buffer * target,struct pipe_picture_desc * picture)400 d3d12_video_decoder_end_frame(struct pipe_video_codec *codec,
401                               struct pipe_video_buffer *target,
402                               struct pipe_picture_desc *picture)
403 {
404    struct d3d12_video_decoder *pD3D12Dec = (struct d3d12_video_decoder *) codec;
405    assert(pD3D12Dec);
406    struct d3d12_screen *pD3D12Screen = (struct d3d12_screen *) pD3D12Dec->m_pD3D12Screen;
407    assert(pD3D12Screen);
408    debug_printf("[d3d12_video_decoder] d3d12_video_decoder_end_frame started for fenceValue: %d\n",
409                 pD3D12Dec->m_fenceValue);
410    assert(pD3D12Dec->m_spD3D12VideoDevice);
411    assert(pD3D12Dec->m_spDecodeCommandQueue);
412    struct d3d12_video_buffer *pD3D12VideoBuffer = (struct d3d12_video_buffer *) target;
413    assert(pD3D12VideoBuffer);
414 
415    ///
416    /// Store current decode output target texture and reference textures from upper layer
417    ///
418    d3d12_video_decoder_store_upper_layer_references(pD3D12Dec, target, picture);
419 
420    ///
421    /// Codec header picture parameters buffers
422    ///
423 
424    auto &inFlightResources = pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)];
425 
426    d3d12_video_decoder_store_converted_dxva_picparams_from_pipe_input(pD3D12Dec, picture, pD3D12VideoBuffer);
427    assert(inFlightResources.m_picParamsBuffer.size() > 0);
428 
429    ///
430    /// Prepare Slice control buffers before clearing staging buffer
431    ///
432    assert(inFlightResources.m_stagingDecodeBitstream.size() >
433           0);   // Make sure the staging wasn't cleared yet in end_frame
434    d3d12_video_decoder_prepare_dxva_slices_control(pD3D12Dec, picture);
435    assert(inFlightResources.m_SliceControlBuffer.size() > 0);
436 
437    ///
438    /// Upload m_stagingDecodeBitstream to GPU memory now that end_frame is called and clear staging buffer
439    ///
440 
441    uint64_t sliceDataStagingBufferSize = inFlightResources.m_stagingDecodeBitstream.size();
442    uint8_t *sliceDataStagingBufferPtr = inFlightResources.m_stagingDecodeBitstream.data();
443 
444    // Reallocate if necessary to accomodate the current frame bitstream buffer in GPU memory
445    if (inFlightResources.m_curFrameCompressedBitstreamBufferAllocatedSize < sliceDataStagingBufferSize) {
446       if (!d3d12_video_decoder_create_staging_bitstream_buffer(pD3D12Screen, pD3D12Dec, sliceDataStagingBufferSize)) {
447          debug_printf("[d3d12_video_decoder] d3d12_video_decoder_end_frame - Failure on "
448                       "d3d12_video_decoder_create_staging_bitstream_buffer\n");
449          debug_printf("[d3d12_video_decoder] d3d12_video_decoder_end_frame failed for fenceValue: %d\n",
450                       pD3D12Dec->m_fenceValue);
451          assert(false);
452          return 1;
453       }
454    }
455 
456    // Upload frame bitstream CPU data to ID3D12Resource buffer
457    inFlightResources.m_curFrameCompressedBitstreamBufferPayloadSize =
458       sliceDataStagingBufferSize;   // This can be less than m_curFrameCompressedBitstreamBufferAllocatedSize.
459    assert(inFlightResources.m_curFrameCompressedBitstreamBufferPayloadSize <=
460           inFlightResources.m_curFrameCompressedBitstreamBufferAllocatedSize);
461 
462    /* One-shot transfer operation with data supplied in a user
463     * pointer.
464     */
465    inFlightResources.pPipeCompressedBufferObj =
466       d3d12_resource_from_resource(&pD3D12Screen->base, inFlightResources.m_curFrameCompressedBitstreamBuffer.Get());
467    assert(inFlightResources.pPipeCompressedBufferObj);
468    pD3D12Dec->base.context->buffer_subdata(pD3D12Dec->base.context,                      // context
469                                            inFlightResources.pPipeCompressedBufferObj,   // dst buffer
470                                            PIPE_MAP_WRITE,                               // usage PIPE_MAP_x
471                                            0,                                            // offset
472                                            sizeof(*sliceDataStagingBufferPtr) * sliceDataStagingBufferSize,   // size
473                                            sliceDataStagingBufferPtr                                          // data
474    );
475 
476    // Flush buffer_subdata batch
477    // before deleting the source CPU buffer below
478 
479    pD3D12Dec->base.context->flush(pD3D12Dec->base.context,
480                                   &inFlightResources.m_pBitstreamUploadGPUCompletionFence,
481                                   PIPE_FLUSH_ASYNC | PIPE_FLUSH_HINT_FINISH);
482    assert(inFlightResources.m_pBitstreamUploadGPUCompletionFence);
483    // To be waited on GPU fence before flushing current frame DecodeFrame to GPU
484 
485    ///
486    /// Proceed to record the GPU Decode commands
487    ///
488 
489    // Requested conversions by caller upper layer (none for now)
490    d3d12_video_decode_output_conversion_arguments requestedConversionArguments = {};
491 
492    ///
493    /// Record DecodeFrame operation and resource state transitions.
494    ///
495 
496    // Translate input D3D12 structure
497    D3D12_VIDEO_DECODE_INPUT_STREAM_ARGUMENTS d3d12InputArguments = {};
498 
499    d3d12InputArguments.CompressedBitstream.pBuffer = inFlightResources.m_curFrameCompressedBitstreamBuffer.Get();
500    d3d12InputArguments.CompressedBitstream.Offset = 0u;
501    ASSERTED constexpr uint64_t d3d12BitstreamOffsetAlignment =
502       128u;   // specified in
503               // https://docs.microsoft.com/en-us/windows/win32/api/d3d12video/ne-d3d12video-d3d12_video_decode_tier
504    assert((d3d12InputArguments.CompressedBitstream.Offset == 0) ||
505           ((d3d12InputArguments.CompressedBitstream.Offset % d3d12BitstreamOffsetAlignment) == 0));
506    d3d12InputArguments.CompressedBitstream.Size = inFlightResources.m_curFrameCompressedBitstreamBufferPayloadSize;
507 
508    D3D12_RESOURCE_BARRIER resourceBarrierCommonToDecode[1] = {
509       CD3DX12_RESOURCE_BARRIER::Transition(d3d12InputArguments.CompressedBitstream.pBuffer,
510                                            D3D12_RESOURCE_STATE_COMMON,
511                                            D3D12_RESOURCE_STATE_VIDEO_DECODE_READ),
512    };
513    pD3D12Dec->m_spDecodeCommandList->ResourceBarrier(1u, resourceBarrierCommonToDecode);
514 
515    // Schedule reverse (back to common) transitions before command list closes for current frame
516    pD3D12Dec->m_transitionsBeforeCloseCmdList.push_back(
517       CD3DX12_RESOURCE_BARRIER::Transition(d3d12InputArguments.CompressedBitstream.pBuffer,
518                                            D3D12_RESOURCE_STATE_VIDEO_DECODE_READ,
519                                            D3D12_RESOURCE_STATE_COMMON));
520 
521    ///
522    /// Clear texture (no reference only flags in resource allocation) to use as decode output to send downstream for
523    /// display/consumption
524    ///
525    ID3D12Resource *pOutputD3D12Texture;
526    uint outputD3D12Subresource = 0;
527 
528    ///
529    /// Ref Only texture (with reference only flags in resource allocation) to use as reconstructed picture decode output
530    /// and to store as future reference in DPB
531    ///
532    ID3D12Resource *pRefOnlyOutputD3D12Texture;
533    uint refOnlyOutputD3D12Subresource = 0;
534 
535    if (!d3d12_video_decoder_prepare_for_decode_frame(pD3D12Dec,
536                                                      target,
537                                                      pD3D12VideoBuffer,
538                                                      &pOutputD3D12Texture,             // output
539                                                      &outputD3D12Subresource,          // output
540                                                      &pRefOnlyOutputD3D12Texture,      // output
541                                                      &refOnlyOutputD3D12Subresource,   // output
542                                                      requestedConversionArguments)) {
543       debug_printf("[d3d12_video_decoder] d3d12_video_decoder_end_frame - Failure on "
544                    "d3d12_video_decoder_prepare_for_decode_frame\n");
545       debug_printf("[d3d12_video_decoder] d3d12_video_decoder_end_frame failed for fenceValue: %d\n",
546                    pD3D12Dec->m_fenceValue);
547       assert(false);
548       return 1;
549    }
550 
551    ///
552    /// Set codec picture parameters CPU buffer
553    ///
554 
555    d3d12InputArguments.NumFrameArguments =
556       1u;   // Only the codec data received from the above layer with picture params
557    d3d12InputArguments.FrameArguments[d3d12InputArguments.NumFrameArguments - 1] = {
558       D3D12_VIDEO_DECODE_ARGUMENT_TYPE_PICTURE_PARAMETERS,
559       static_cast<uint32_t>(inFlightResources.m_picParamsBuffer.size()),
560       inFlightResources.m_picParamsBuffer.data(),
561    };
562 
563    if (inFlightResources.m_SliceControlBuffer.size() > 0) {
564       d3d12InputArguments.NumFrameArguments++;
565       d3d12InputArguments.FrameArguments[d3d12InputArguments.NumFrameArguments - 1] = {
566          D3D12_VIDEO_DECODE_ARGUMENT_TYPE_SLICE_CONTROL,
567          static_cast<uint32_t>(inFlightResources.m_SliceControlBuffer.size()),
568          inFlightResources.m_SliceControlBuffer.data(),
569       };
570    }
571 
572    if (inFlightResources.qp_matrix_frame_argument_enabled &&
573        (inFlightResources.m_InverseQuantMatrixBuffer.size() > 0)) {
574       d3d12InputArguments.NumFrameArguments++;
575       d3d12InputArguments.FrameArguments[d3d12InputArguments.NumFrameArguments - 1] = {
576          D3D12_VIDEO_DECODE_ARGUMENT_TYPE_INVERSE_QUANTIZATION_MATRIX,
577          static_cast<uint32_t>(inFlightResources.m_InverseQuantMatrixBuffer.size()),
578          inFlightResources.m_InverseQuantMatrixBuffer.data(),
579       };
580    }
581 
582    d3d12InputArguments.ReferenceFrames = pD3D12Dec->m_spDPBManager->get_current_reference_frames();
583    if (D3D12_DEBUG_VERBOSE & d3d12_debug) {
584       pD3D12Dec->m_spDPBManager->print_dpb();
585    }
586 
587    d3d12InputArguments.pHeap = pD3D12Dec->m_spVideoDecoderHeap.Get();
588 
589    // translate output D3D12 structure
590    D3D12_VIDEO_DECODE_OUTPUT_STREAM_ARGUMENTS1 d3d12OutputArguments = {};
591    d3d12OutputArguments.pOutputTexture2D = pOutputD3D12Texture;
592    d3d12OutputArguments.OutputSubresource = outputD3D12Subresource;
593 
594    bool fReferenceOnly = (pD3D12Dec->m_ConfigDecoderSpecificFlags &
595                           d3d12_video_decode_config_specific_flag_reference_only_textures_required) != 0;
596    if (fReferenceOnly) {
597       d3d12OutputArguments.ConversionArguments.Enable = true;
598 
599       assert(pRefOnlyOutputD3D12Texture);
600       d3d12OutputArguments.ConversionArguments.pReferenceTexture2D = pRefOnlyOutputD3D12Texture;
601       d3d12OutputArguments.ConversionArguments.ReferenceSubresource = refOnlyOutputD3D12Subresource;
602 
603       const D3D12_RESOURCE_DESC &descReference = GetDesc(d3d12OutputArguments.ConversionArguments.pReferenceTexture2D);
604       d3d12OutputArguments.ConversionArguments.DecodeColorSpace = d3d12_convert_from_legacy_color_space(
605          !util_format_is_yuv(d3d12_get_pipe_format(descReference.Format)),
606          util_format_get_blocksize(d3d12_get_pipe_format(descReference.Format)) * 8 /*bytes to bits conversion*/,
607          /* StudioRGB= */ false,
608          /* P709= */ true,
609          /* StudioYUV= */ true);
610 
611       const D3D12_RESOURCE_DESC &descOutput = GetDesc(d3d12OutputArguments.pOutputTexture2D);
612       d3d12OutputArguments.ConversionArguments.OutputColorSpace = d3d12_convert_from_legacy_color_space(
613          !util_format_is_yuv(d3d12_get_pipe_format(descOutput.Format)),
614          util_format_get_blocksize(d3d12_get_pipe_format(descOutput.Format)) * 8 /*bytes to bits conversion*/,
615          /* StudioRGB= */ false,
616          /* P709= */ true,
617          /* StudioYUV= */ true);
618 
619       const D3D12_VIDEO_DECODER_HEAP_DESC &HeapDesc = GetDesc(pD3D12Dec->m_spVideoDecoderHeap.Get());
620       d3d12OutputArguments.ConversionArguments.OutputWidth = HeapDesc.DecodeWidth;
621       d3d12OutputArguments.ConversionArguments.OutputHeight = HeapDesc.DecodeHeight;
622    } else {
623       d3d12OutputArguments.ConversionArguments.Enable = false;
624    }
625 
626    CD3DX12_RESOURCE_DESC outputDesc(GetDesc(d3d12OutputArguments.pOutputTexture2D));
627    uint32_t MipLevel, PlaneSlice, ArraySlice;
628    D3D12DecomposeSubresource(d3d12OutputArguments.OutputSubresource,
629                              outputDesc.MipLevels,
630                              outputDesc.ArraySize(),
631                              MipLevel,
632                              ArraySlice,
633                              PlaneSlice);
634 
635    for (PlaneSlice = 0; PlaneSlice < pD3D12Dec->m_decodeFormatInfo.PlaneCount; PlaneSlice++) {
636       uint planeOutputSubresource = outputDesc.CalcSubresource(MipLevel, ArraySlice, PlaneSlice);
637 
638       D3D12_RESOURCE_BARRIER resourceBarrierCommonToDecode[1] = {
639          CD3DX12_RESOURCE_BARRIER::Transition(d3d12OutputArguments.pOutputTexture2D,
640                                               D3D12_RESOURCE_STATE_COMMON,
641                                               D3D12_RESOURCE_STATE_VIDEO_DECODE_WRITE,
642                                               planeOutputSubresource),
643       };
644       pD3D12Dec->m_spDecodeCommandList->ResourceBarrier(1u, resourceBarrierCommonToDecode);
645    }
646 
647    // Schedule reverse (back to common) transitions before command list closes for current frame
648    for (PlaneSlice = 0; PlaneSlice < pD3D12Dec->m_decodeFormatInfo.PlaneCount; PlaneSlice++) {
649       uint planeOutputSubresource = outputDesc.CalcSubresource(MipLevel, ArraySlice, PlaneSlice);
650       pD3D12Dec->m_transitionsBeforeCloseCmdList.push_back(
651          CD3DX12_RESOURCE_BARRIER::Transition(d3d12OutputArguments.pOutputTexture2D,
652                                               D3D12_RESOURCE_STATE_VIDEO_DECODE_WRITE,
653                                               D3D12_RESOURCE_STATE_COMMON,
654                                               planeOutputSubresource));
655    }
656 
657    // Record DecodeFrame
658 
659    pD3D12Dec->m_spDecodeCommandList->DecodeFrame1(pD3D12Dec->m_spVideoDecoder.Get(),
660                                                   &d3d12OutputArguments,
661                                                   &d3d12InputArguments);
662 
663    debug_printf("[d3d12_video_decoder] d3d12_video_decoder_end_frame finalized for fenceValue: %d\n",
664                 pD3D12Dec->m_fenceValue);
665 
666    // Save extra references of Decoder, DecoderHeap and DPB allocations in case
667    // there's a reconfiguration that trigers the construction of new objects
668    inFlightResources.m_spDecoder = pD3D12Dec->m_spVideoDecoder;
669    inFlightResources.m_spDecoderHeap = pD3D12Dec->m_spVideoDecoderHeap;
670    inFlightResources.m_References = pD3D12Dec->m_spDPBManager;
671 
672    ///
673    /// Flush work to the GPU
674    ///
675    pD3D12Dec->m_needsGPUFlush = true;
676    d3d12_video_decoder_flush(codec);
677    // Call to d3d12_video_decoder_flush increases m_FenceValue
678    uint64_t inflightIndexBeforeFlush = (pD3D12Dec->m_fenceValue - 1u) % D3D12_VIDEO_DEC_ASYNC_DEPTH;
679 
680    if (pD3D12Dec->m_spDPBManager->is_pipe_buffer_underlying_output_decode_allocation()) {
681       // No need to copy, the output surface fence is merely the decode queue fence
682       *picture->fence = (pipe_fence_handle *) &pD3D12Dec->m_inflightResourcesPool[inflightIndexBeforeFlush].m_FenceData;
683    } else {
684       ///
685       /// If !pD3D12Dec->m_spDPBManager->is_pipe_buffer_underlying_output_decode_allocation()
686       /// We cannot use the standalone video buffer allocation directly and we must use instead
687       /// either a ID3D12Resource with DECODE_REFERENCE only flag or a texture array within the same
688       /// allocation
689       /// Do GPU->GPU texture copy from decode output to pipe target decode texture sampler view planes
690       ///
691 
692       // Get destination resource
693       struct pipe_sampler_view **pPipeDstViews = target->get_sampler_view_planes(target);
694 
695       // Get source pipe_resource
696       pipe_resource *pPipeSrc =
697          d3d12_resource_from_resource(&pD3D12Screen->base, d3d12OutputArguments.pOutputTexture2D);
698       assert(pPipeSrc);
699 
700       // GPU wait on the graphics context which will do the copy until the decode finishes
701       pD3D12Screen->cmdqueue->Wait(
702          pD3D12Dec->m_inflightResourcesPool[inflightIndexBeforeFlush].m_FenceData.cmdqueue_fence,
703          pD3D12Dec->m_inflightResourcesPool[inflightIndexBeforeFlush].m_FenceData.value);
704 
705       // Copy all format subresources/texture planes
706       for (PlaneSlice = 0; PlaneSlice < pD3D12Dec->m_decodeFormatInfo.PlaneCount; PlaneSlice++) {
707          assert(d3d12OutputArguments.OutputSubresource < INT16_MAX);
708          struct pipe_box box;
709          u_box_3d(0,
710                   0,
711                   // src array slice, taken as Z for TEXTURE_2D_ARRAY
712                   static_cast<int16_t>(d3d12OutputArguments.OutputSubresource),
713                   static_cast<int>(pPipeDstViews[PlaneSlice]->texture->width0),
714                   static_cast<int16_t>(pPipeDstViews[PlaneSlice]->texture->height0),
715                   1, &box);
716 
717          pD3D12Dec->base.context->resource_copy_region(pD3D12Dec->base.context,
718                                                        pPipeDstViews[PlaneSlice]->texture,              // dst
719                                                        0,                                               // dst level
720                                                        0,                                               // dstX
721                                                        0,                                               // dstY
722                                                        0,                                               // dstZ
723                                                        (PlaneSlice == 0) ? pPipeSrc : pPipeSrc->next,   // src
724                                                        0,                                               // src level
725                                                        &box);
726       }
727       // Flush resource_copy_region batch
728       // The output surface fence is the graphics queue that will signal after the copy ends
729       pD3D12Dec->base.context->flush(pD3D12Dec->base.context, picture->fence, PIPE_FLUSH_ASYNC | PIPE_FLUSH_HINT_FINISH);
730    }
731    return 0;
732 }
733 
734 /**
735  * Get decoder fence.
736  */
737 int
d3d12_video_decoder_get_decoder_fence(struct pipe_video_codec * codec,struct pipe_fence_handle * fence,uint64_t timeout)738 d3d12_video_decoder_get_decoder_fence(struct pipe_video_codec *codec, struct pipe_fence_handle *fence, uint64_t timeout)
739 {
740    struct d3d12_fence *fenceValueToWaitOn = (struct d3d12_fence *) fence;
741    assert(fenceValueToWaitOn);
742 
743    ASSERTED bool wait_res =
744       d3d12_video_decoder_sync_completion(codec, fenceValueToWaitOn->cmdqueue_fence, fenceValueToWaitOn->value, timeout);
745 
746    // Return semantics based on p_video_codec interface
747    // ret == 0 -> Decode in progress
748    // ret != 0 -> Decode completed
749    return wait_res ? 1 : 0;
750 }
751 
752 /**
753  * flush any outstanding command buffers to the hardware
754  * should be called before a video_buffer is acessed by the gallium frontend again
755  */
756 void
d3d12_video_decoder_flush(struct pipe_video_codec * codec)757 d3d12_video_decoder_flush(struct pipe_video_codec *codec)
758 {
759    struct d3d12_video_decoder *pD3D12Dec = (struct d3d12_video_decoder *) codec;
760    assert(pD3D12Dec);
761    assert(pD3D12Dec->m_spD3D12VideoDevice);
762    assert(pD3D12Dec->m_spDecodeCommandQueue);
763    debug_printf("[d3d12_video_decoder] d3d12_video_decoder_flush started. Will flush video queue work and CPU wait on "
764                 "fenceValue: %d\n",
765                 pD3D12Dec->m_fenceValue);
766 
767    if (!pD3D12Dec->m_needsGPUFlush) {
768       debug_printf("[d3d12_video_decoder] d3d12_video_decoder_flush started. Nothing to flush, all up to date.\n");
769    } else {
770       HRESULT hr = pD3D12Dec->m_pD3D12Screen->dev->GetDeviceRemovedReason();
771       if (hr != S_OK) {
772          debug_printf("[d3d12_video_decoder] d3d12_video_decoder_flush"
773                       " - D3D12Device was removed BEFORE commandlist "
774                       "execution with HR %x.\n",
775                       hr);
776          goto flush_fail;
777       }
778 
779       if (pD3D12Dec->m_transitionsBeforeCloseCmdList.size() > 0) {
780          pD3D12Dec->m_spDecodeCommandList->ResourceBarrier(pD3D12Dec->m_transitionsBeforeCloseCmdList.size(),
781                                                            pD3D12Dec->m_transitionsBeforeCloseCmdList.data());
782          pD3D12Dec->m_transitionsBeforeCloseCmdList.clear();
783       }
784 
785       hr = pD3D12Dec->m_spDecodeCommandList->Close();
786       if (FAILED(hr)) {
787          debug_printf("[d3d12_video_decoder] d3d12_video_decoder_flush - Can't close command list with HR %x\n", hr);
788          goto flush_fail;
789       }
790 
791       auto &inFlightResources = pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)];
792       ID3D12CommandList *ppCommandLists[1] = { pD3D12Dec->m_spDecodeCommandList.Get() };
793       struct d3d12_fence *pUploadBitstreamFence = d3d12_fence(inFlightResources.m_pBitstreamUploadGPUCompletionFence);
794       pD3D12Dec->m_spDecodeCommandQueue->Wait(pUploadBitstreamFence->cmdqueue_fence, pUploadBitstreamFence->value);
795       pD3D12Dec->m_spDecodeCommandQueue->ExecuteCommandLists(1, ppCommandLists);
796       pD3D12Dec->m_spDecodeCommandQueue->Signal(pD3D12Dec->m_spFence.Get(), pD3D12Dec->m_fenceValue);
797 
798       // Validate device was not removed
799       hr = pD3D12Dec->m_pD3D12Screen->dev->GetDeviceRemovedReason();
800       if (hr != S_OK) {
801          debug_printf("[d3d12_video_decoder] d3d12_video_decoder_flush"
802                       " - D3D12Device was removed AFTER commandlist "
803                       "execution with HR %x, but wasn't before.\n",
804                       hr);
805          goto flush_fail;
806       }
807 
808       // Set async fence info
809       memset(&inFlightResources.m_FenceData, 0, sizeof(inFlightResources.m_FenceData));
810 
811       inFlightResources.m_FenceData.value = pD3D12Dec->m_fenceValue;
812       inFlightResources.m_FenceData.cmdqueue_fence = pD3D12Dec->m_spFence.Get();
813 
814       pD3D12Dec->m_fenceValue++;
815       pD3D12Dec->m_needsGPUFlush = false;
816    }
817    return;
818 
819 flush_fail:
820    debug_printf("[d3d12_video_decoder] d3d12_video_decoder_flush failed for fenceValue: %d\n", pD3D12Dec->m_fenceValue);
821    assert(false);
822 }
823 
824 bool
d3d12_video_decoder_create_command_objects(const struct d3d12_screen * pD3D12Screen,struct d3d12_video_decoder * pD3D12Dec)825 d3d12_video_decoder_create_command_objects(const struct d3d12_screen *pD3D12Screen,
826                                            struct d3d12_video_decoder *pD3D12Dec)
827 {
828    assert(pD3D12Dec->m_spD3D12VideoDevice);
829 
830    D3D12_COMMAND_QUEUE_DESC commandQueueDesc = { D3D12_COMMAND_LIST_TYPE_VIDEO_DECODE };
831    HRESULT hr = pD3D12Screen->dev->CreateCommandQueue(&commandQueueDesc,
832                                                       IID_PPV_ARGS(pD3D12Dec->m_spDecodeCommandQueue.GetAddressOf()));
833    if (FAILED(hr)) {
834       debug_printf("[d3d12_video_decoder] d3d12_video_decoder_create_command_objects - Call to CreateCommandQueue "
835                    "failed with HR %x\n",
836                    hr);
837       return false;
838    }
839 
840    hr = pD3D12Screen->dev->CreateFence(0, D3D12_FENCE_FLAG_SHARED, IID_PPV_ARGS(&pD3D12Dec->m_spFence));
841    if (FAILED(hr)) {
842       debug_printf(
843          "[d3d12_video_decoder] d3d12_video_decoder_create_command_objects - Call to CreateFence failed with HR %x\n",
844          hr);
845       return false;
846    }
847 
848    for (auto &inputResource : pD3D12Dec->m_inflightResourcesPool) {
849       hr = pD3D12Dec->m_pD3D12Screen->dev->CreateCommandAllocator(
850          D3D12_COMMAND_LIST_TYPE_VIDEO_DECODE,
851          IID_PPV_ARGS(inputResource.m_spCommandAllocator.GetAddressOf()));
852       if (FAILED(hr)) {
853          debug_printf("[d3d12_video_decoder] d3d12_video_decoder_create_command_objects - Call to "
854                       "CreateCommandAllocator failed with HR %x\n",
855                       hr);
856          return false;
857       }
858    }
859 
860    ComPtr<ID3D12Device4> spD3D12Device4;
861    if (FAILED(pD3D12Dec->m_pD3D12Screen->dev->QueryInterface(IID_PPV_ARGS(spD3D12Device4.GetAddressOf())))) {
862       debug_printf(
863          "[d3d12_video_decoder] d3d12_video_decoder_create_decoder - D3D12 Device has no ID3D12Device4 support\n");
864       return false;
865    }
866 
867    hr = spD3D12Device4->CreateCommandList1(0,
868                                            D3D12_COMMAND_LIST_TYPE_VIDEO_DECODE,
869                                            D3D12_COMMAND_LIST_FLAG_NONE,
870                                            IID_PPV_ARGS(pD3D12Dec->m_spDecodeCommandList.GetAddressOf()));
871 
872    if (FAILED(hr)) {
873       debug_printf("[d3d12_video_decoder] d3d12_video_decoder_create_command_objects - Call to CreateCommandList "
874                    "failed with HR %x\n",
875                    hr);
876       return false;
877    }
878 
879    return true;
880 }
881 
882 bool
d3d12_video_decoder_check_caps_and_create_decoder(const struct d3d12_screen * pD3D12Screen,struct d3d12_video_decoder * pD3D12Dec)883 d3d12_video_decoder_check_caps_and_create_decoder(const struct d3d12_screen *pD3D12Screen,
884                                                   struct d3d12_video_decoder *pD3D12Dec)
885 {
886    assert(pD3D12Dec->m_spD3D12VideoDevice);
887 
888    pD3D12Dec->m_decoderDesc = {};
889 
890    D3D12_VIDEO_DECODE_CONFIGURATION decodeConfiguration = { pD3D12Dec->m_d3d12DecProfile,
891                                                             D3D12_BITSTREAM_ENCRYPTION_TYPE_NONE,
892                                                             D3D12_VIDEO_FRAME_CODED_INTERLACE_TYPE_NONE };
893 
894    D3D12_FEATURE_DATA_VIDEO_DECODE_SUPPORT decodeSupport = {};
895    decodeSupport.NodeIndex = pD3D12Dec->m_NodeIndex;
896    decodeSupport.Configuration = decodeConfiguration;
897    decodeSupport.Width = pD3D12Dec->base.width;
898    decodeSupport.Height = pD3D12Dec->base.height;
899    decodeSupport.DecodeFormat = pD3D12Dec->m_decodeFormat;
900    // no info from above layer on framerate/bitrate
901    decodeSupport.FrameRate.Numerator = 0;
902    decodeSupport.FrameRate.Denominator = 0;
903    decodeSupport.BitRate = 0;
904 
905    HRESULT hr = pD3D12Dec->m_spD3D12VideoDevice->CheckFeatureSupport(D3D12_FEATURE_VIDEO_DECODE_SUPPORT,
906                                                                      &decodeSupport,
907                                                                      sizeof(decodeSupport));
908    if (FAILED(hr)) {
909       debug_printf("[d3d12_video_decoder] d3d12_video_decoder_check_caps_and_create_decoder - CheckFeatureSupport "
910                    "failed with HR %x\n",
911                    hr);
912       return false;
913    }
914 
915    if (!(decodeSupport.SupportFlags & D3D12_VIDEO_DECODE_SUPPORT_FLAG_SUPPORTED)) {
916       debug_printf("[d3d12_video_decoder] d3d12_video_decoder_check_caps_and_create_decoder - "
917                    "D3D12_VIDEO_DECODE_SUPPORT_FLAG_SUPPORTED was false when checking caps \n");
918       return false;
919    }
920 
921    pD3D12Dec->m_configurationFlags = decodeSupport.ConfigurationFlags;
922    pD3D12Dec->m_tier = decodeSupport.DecodeTier;
923 
924    if (d3d12_video_decoder_supports_aot_dpb(decodeSupport, pD3D12Dec->m_d3d12DecProfileType)) {
925       pD3D12Dec->m_ConfigDecoderSpecificFlags |= d3d12_video_decode_config_specific_flag_array_of_textures;
926    }
927 
928    if (decodeSupport.ConfigurationFlags & D3D12_VIDEO_DECODE_CONFIGURATION_FLAG_HEIGHT_ALIGNMENT_MULTIPLE_32_REQUIRED) {
929       pD3D12Dec->m_ConfigDecoderSpecificFlags |= d3d12_video_decode_config_specific_flag_alignment_height;
930    }
931 
932    if (decodeSupport.ConfigurationFlags & D3D12_VIDEO_DECODE_CONFIGURATION_FLAG_REFERENCE_ONLY_ALLOCATIONS_REQUIRED) {
933       pD3D12Dec->m_ConfigDecoderSpecificFlags |=
934          d3d12_video_decode_config_specific_flag_reference_only_textures_required;
935    }
936 
937    pD3D12Dec->m_decoderDesc.NodeMask = pD3D12Dec->m_NodeMask;
938    pD3D12Dec->m_decoderDesc.Configuration = decodeConfiguration;
939 
940    hr = pD3D12Dec->m_spD3D12VideoDevice->CreateVideoDecoder(&pD3D12Dec->m_decoderDesc,
941                                                             IID_PPV_ARGS(pD3D12Dec->m_spVideoDecoder.GetAddressOf()));
942    if (FAILED(hr)) {
943       debug_printf("[d3d12_video_decoder] d3d12_video_decoder_check_caps_and_create_decoder - CreateVideoDecoder "
944                    "failed with HR %x\n",
945                    hr);
946       return false;
947    }
948 
949    return true;
950 }
951 
952 bool
d3d12_video_decoder_create_video_state_buffers(const struct d3d12_screen * pD3D12Screen,struct d3d12_video_decoder * pD3D12Dec)953 d3d12_video_decoder_create_video_state_buffers(const struct d3d12_screen *pD3D12Screen,
954                                                struct d3d12_video_decoder *pD3D12Dec)
955 {
956    assert(pD3D12Dec->m_spD3D12VideoDevice);
957    if (!d3d12_video_decoder_create_staging_bitstream_buffer(pD3D12Screen,
958                                                             pD3D12Dec,
959                                                             pD3D12Dec->m_InitialCompBitstreamGPUBufferSize)) {
960       debug_printf("[d3d12_video_decoder] d3d12_video_decoder_create_video_state_buffers - Failure on "
961                    "d3d12_video_decoder_create_staging_bitstream_buffer\n");
962       return false;
963    }
964 
965    return true;
966 }
967 
968 bool
d3d12_video_decoder_create_staging_bitstream_buffer(const struct d3d12_screen * pD3D12Screen,struct d3d12_video_decoder * pD3D12Dec,uint64_t bufSize)969 d3d12_video_decoder_create_staging_bitstream_buffer(const struct d3d12_screen *pD3D12Screen,
970                                                     struct d3d12_video_decoder *pD3D12Dec,
971                                                     uint64_t bufSize)
972 {
973    assert(pD3D12Dec->m_spD3D12VideoDevice);
974    auto &inFlightResources = pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)];
975    if (inFlightResources.m_curFrameCompressedBitstreamBuffer.Get() != nullptr) {
976       inFlightResources.m_curFrameCompressedBitstreamBuffer.Reset();
977    }
978 
979    auto descHeap = CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT, pD3D12Dec->m_NodeMask, pD3D12Dec->m_NodeMask);
980    auto descResource = CD3DX12_RESOURCE_DESC::Buffer(bufSize);
981    HRESULT hr = pD3D12Screen->dev->CreateCommittedResource(
982       &descHeap,
983       D3D12_HEAP_FLAG_NONE,
984       &descResource,
985       D3D12_RESOURCE_STATE_COMMON,
986       nullptr,
987       IID_PPV_ARGS(inFlightResources.m_curFrameCompressedBitstreamBuffer.GetAddressOf()));
988    if (FAILED(hr)) {
989       debug_printf("[d3d12_video_decoder] d3d12_video_decoder_create_staging_bitstream_buffer - "
990                    "CreateCommittedResource failed with HR %x\n",
991                    hr);
992       return false;
993    }
994 
995    inFlightResources.m_curFrameCompressedBitstreamBufferAllocatedSize = bufSize;
996    return true;
997 }
998 
999 bool
d3d12_video_decoder_prepare_for_decode_frame(struct d3d12_video_decoder * pD3D12Dec,struct pipe_video_buffer * pCurrentDecodeTarget,struct d3d12_video_buffer * pD3D12VideoBuffer,ID3D12Resource ** ppOutTexture2D,uint32_t * pOutSubresourceIndex,ID3D12Resource ** ppRefOnlyOutTexture2D,uint32_t * pRefOnlyOutSubresourceIndex,const d3d12_video_decode_output_conversion_arguments & conversionArgs)1000 d3d12_video_decoder_prepare_for_decode_frame(struct d3d12_video_decoder *pD3D12Dec,
1001                                              struct pipe_video_buffer *pCurrentDecodeTarget,
1002                                              struct d3d12_video_buffer *pD3D12VideoBuffer,
1003                                              ID3D12Resource **ppOutTexture2D,
1004                                              uint32_t *pOutSubresourceIndex,
1005                                              ID3D12Resource **ppRefOnlyOutTexture2D,
1006                                              uint32_t *pRefOnlyOutSubresourceIndex,
1007                                              const d3d12_video_decode_output_conversion_arguments &conversionArgs)
1008 {
1009    if (!d3d12_video_decoder_reconfigure_dpb(pD3D12Dec, pD3D12VideoBuffer, conversionArgs)) {
1010       debug_printf("d3d12_video_decoder_reconfigure_dpb failed!\n");
1011       return false;
1012    }
1013 
1014    // Refresh DPB active references for current frame, release memory for unused references.
1015    d3d12_video_decoder_refresh_dpb_active_references(pD3D12Dec);
1016 
1017    // Get the output texture for the current frame to be decoded
1018    pD3D12Dec->m_spDPBManager->get_current_frame_decode_output_texture(pCurrentDecodeTarget,
1019                                                                       ppOutTexture2D,
1020                                                                       pOutSubresourceIndex);
1021 
1022    auto vidBuffer = (struct d3d12_video_buffer *) (pCurrentDecodeTarget);
1023    // If is_pipe_buffer_underlying_output_decode_allocation is enabled,
1024    // we can just use the underlying allocation in pCurrentDecodeTarget
1025    // and avoid an extra copy after decoding the frame.
1026    // If this is the case, we need to handle the residency of this resource
1027    // (if not we're actually creating the resources with CreateCommitedResource with
1028    // residency by default)
1029    if (pD3D12Dec->m_spDPBManager->is_pipe_buffer_underlying_output_decode_allocation()) {
1030       assert(d3d12_resource_resource(vidBuffer->texture) == *ppOutTexture2D);
1031       // Make it permanently resident for video use
1032       d3d12_promote_to_permanent_residency(pD3D12Dec->m_pD3D12Screen, vidBuffer->texture);
1033    }
1034 
1035    // Get the reference only texture for the current frame to be decoded (if applicable)
1036    bool fReferenceOnly = (pD3D12Dec->m_ConfigDecoderSpecificFlags &
1037                           d3d12_video_decode_config_specific_flag_reference_only_textures_required) != 0;
1038    if (fReferenceOnly) {
1039       bool needsTransitionToDecodeWrite = false;
1040       pD3D12Dec->m_spDPBManager->get_reference_only_output(pCurrentDecodeTarget,
1041                                                            ppRefOnlyOutTexture2D,
1042                                                            pRefOnlyOutSubresourceIndex,
1043                                                            needsTransitionToDecodeWrite);
1044       assert(needsTransitionToDecodeWrite);
1045 
1046       CD3DX12_RESOURCE_DESC outputDesc(GetDesc(*ppRefOnlyOutTexture2D));
1047       uint32_t MipLevel, PlaneSlice, ArraySlice;
1048       D3D12DecomposeSubresource(*pRefOnlyOutSubresourceIndex,
1049                                 outputDesc.MipLevels,
1050                                 outputDesc.ArraySize(),
1051                                 MipLevel,
1052                                 ArraySlice,
1053                                 PlaneSlice);
1054 
1055       for (PlaneSlice = 0; PlaneSlice < pD3D12Dec->m_decodeFormatInfo.PlaneCount; PlaneSlice++) {
1056          uint planeOutputSubresource = outputDesc.CalcSubresource(MipLevel, ArraySlice, PlaneSlice);
1057 
1058          D3D12_RESOURCE_BARRIER resourceBarrierCommonToDecode[1] = {
1059             CD3DX12_RESOURCE_BARRIER::Transition(*ppRefOnlyOutTexture2D,
1060                                                  D3D12_RESOURCE_STATE_COMMON,
1061                                                  D3D12_RESOURCE_STATE_VIDEO_DECODE_WRITE,
1062                                                  planeOutputSubresource),
1063          };
1064          pD3D12Dec->m_spDecodeCommandList->ResourceBarrier(1u, resourceBarrierCommonToDecode);
1065       }
1066 
1067       // Schedule reverse (back to common) transitions before command list closes for current frame
1068       for (PlaneSlice = 0; PlaneSlice < pD3D12Dec->m_decodeFormatInfo.PlaneCount; PlaneSlice++) {
1069          uint planeOutputSubresource = outputDesc.CalcSubresource(MipLevel, ArraySlice, PlaneSlice);
1070          pD3D12Dec->m_transitionsBeforeCloseCmdList.push_back(
1071             CD3DX12_RESOURCE_BARRIER::Transition(*ppRefOnlyOutTexture2D,
1072                                                  D3D12_RESOURCE_STATE_VIDEO_DECODE_WRITE,
1073                                                  D3D12_RESOURCE_STATE_COMMON,
1074                                                  planeOutputSubresource));
1075       }
1076    }
1077 
1078    // If decoded needs reference_only entries in the dpb, use the reference_only allocation for current frame
1079    // otherwise, use the standard output resource
1080    [[maybe_unused]] ID3D12Resource *pCurrentFrameDPBEntry = fReferenceOnly ? *ppRefOnlyOutTexture2D : *ppOutTexture2D;
1081    [[maybe_unused]] uint32_t currentFrameDPBEntrySubresource = fReferenceOnly ? *pRefOnlyOutSubresourceIndex : *pOutSubresourceIndex;
1082 #if D3D12_VIDEO_ANY_DECODER_ENABLED
1083    switch (pD3D12Dec->m_d3d12DecProfileType) {
1084 #if VIDEO_CODEC_H264DEC
1085       case d3d12_video_decode_profile_type_h264:
1086       {
1087          d3d12_video_decoder_prepare_current_frame_references_h264(pD3D12Dec,
1088                                                                    pCurrentFrameDPBEntry,
1089                                                                    currentFrameDPBEntrySubresource);
1090       } break;
1091 #endif
1092 #if VIDEO_CODEC_H265DEC
1093       case d3d12_video_decode_profile_type_hevc:
1094       {
1095          d3d12_video_decoder_prepare_current_frame_references_hevc(pD3D12Dec,
1096                                                                    pCurrentFrameDPBEntry,
1097                                                                    currentFrameDPBEntrySubresource);
1098       } break;
1099 #endif
1100 #if VIDEO_CODEC_AV1DEC
1101       case d3d12_video_decode_profile_type_av1:
1102       {
1103          d3d12_video_decoder_prepare_current_frame_references_av1(pD3D12Dec,
1104                                                                   pCurrentFrameDPBEntry,
1105                                                                   currentFrameDPBEntrySubresource);
1106       } break;
1107 #endif
1108 #if VIDEO_CODEC_VP9DEC
1109       case d3d12_video_decode_profile_type_vp9:
1110       {
1111          d3d12_video_decoder_prepare_current_frame_references_vp9(pD3D12Dec,
1112                                                                   pCurrentFrameDPBEntry,
1113                                                                   currentFrameDPBEntrySubresource);
1114       } break;
1115 #endif
1116       default:
1117       {
1118          unreachable("Unsupported d3d12_video_decode_profile_type");
1119       } break;
1120    }
1121 #endif // D3D12_VIDEO_ANY_DECODER_ENABLED
1122    return true;
1123 }
1124 
1125 bool
d3d12_video_decoder_reconfigure_dpb(struct d3d12_video_decoder * pD3D12Dec,struct d3d12_video_buffer * pD3D12VideoBuffer,const d3d12_video_decode_output_conversion_arguments & conversionArguments)1126 d3d12_video_decoder_reconfigure_dpb(struct d3d12_video_decoder *pD3D12Dec,
1127                                     struct d3d12_video_buffer *pD3D12VideoBuffer,
1128                                     const d3d12_video_decode_output_conversion_arguments &conversionArguments)
1129 {
1130    uint32_t width;
1131    uint32_t height;
1132    uint16_t maxDPB;
1133    d3d12_video_decoder_get_frame_info(pD3D12Dec, &width, &height, &maxDPB);
1134 
1135    ID3D12Resource *pPipeD3D12DstResource = d3d12_resource_resource(pD3D12VideoBuffer->texture);
1136    D3D12_RESOURCE_DESC outputResourceDesc = GetDesc(pPipeD3D12DstResource);
1137 
1138    D3D12_VIDEO_FRAME_CODED_INTERLACE_TYPE interlaceTypeRequested =
1139       pD3D12VideoBuffer->base.interlaced ? D3D12_VIDEO_FRAME_CODED_INTERLACE_TYPE_FIELD_BASED : D3D12_VIDEO_FRAME_CODED_INTERLACE_TYPE_NONE;
1140    if ((pD3D12Dec->m_decodeFormat != outputResourceDesc.Format) ||
1141        (pD3D12Dec->m_decoderDesc.Configuration.InterlaceType != interlaceTypeRequested)) {
1142       // Copy current pD3D12Dec->m_decoderDesc, modify decodeprofile and re-create decoder.
1143       D3D12_VIDEO_DECODER_DESC decoderDesc = pD3D12Dec->m_decoderDesc;
1144       decoderDesc.Configuration.InterlaceType = interlaceTypeRequested;
1145       decoderDesc.Configuration.DecodeProfile =
1146          d3d12_video_decoder_resolve_profile(pD3D12Dec->m_d3d12DecProfileType, pD3D12Dec->m_decodeFormat);
1147       pD3D12Dec->m_spVideoDecoder.Reset();
1148       HRESULT hr =
1149          pD3D12Dec->m_spD3D12VideoDevice->CreateVideoDecoder(&decoderDesc,
1150                                                              IID_PPV_ARGS(pD3D12Dec->m_spVideoDecoder.GetAddressOf()));
1151       if (FAILED(hr)) {
1152          debug_printf(
1153             "[d3d12_video_decoder] d3d12_video_decoder_reconfigure_dpb - CreateVideoDecoder failed with HR %x\n",
1154             hr);
1155          return false;
1156       }
1157       // Update state after CreateVideoDecoder succeeds only.
1158       pD3D12Dec->m_decoderDesc = decoderDesc;
1159    }
1160 
1161    if (!pD3D12Dec->m_spDPBManager || !pD3D12Dec->m_spVideoDecoderHeap ||
1162        pD3D12Dec->m_decodeFormat != outputResourceDesc.Format || pD3D12Dec->m_decoderHeapDesc.DecodeWidth != width ||
1163        pD3D12Dec->m_decoderHeapDesc.DecodeHeight != height ||
1164        pD3D12Dec->m_decoderHeapDesc.MaxDecodePictureBufferCount < maxDPB) {
1165       // Detect the combination of AOT/ReferenceOnly to configure the DPB manager
1166       uint16_t referenceCount = (conversionArguments.Enable) ? (uint16_t) conversionArguments.ReferenceFrameCount +
1167                                                                   1 /*extra slot for current picture*/ :
1168                                                                maxDPB;
1169       d3d12_video_decode_dpb_descriptor dpbDesc = {};
1170       dpbDesc.Width = (conversionArguments.Enable) ? conversionArguments.ReferenceInfo.Width : width;
1171       dpbDesc.Height = (conversionArguments.Enable) ? conversionArguments.ReferenceInfo.Height : height;
1172       dpbDesc.Format =
1173          (conversionArguments.Enable) ? conversionArguments.ReferenceInfo.Format.Format : outputResourceDesc.Format;
1174       dpbDesc.fArrayOfTexture =
1175          ((pD3D12Dec->m_ConfigDecoderSpecificFlags & d3d12_video_decode_config_specific_flag_array_of_textures) != 0);
1176       dpbDesc.dpbSize = referenceCount;
1177       dpbDesc.m_NodeMask = pD3D12Dec->m_NodeMask;
1178       dpbDesc.fReferenceOnly = ((pD3D12Dec->m_ConfigDecoderSpecificFlags &
1179                                  d3d12_video_decode_config_specific_flag_reference_only_textures_required) != 0);
1180 
1181       // Create DPB manager
1182       if (pD3D12Dec->m_spDPBManager == nullptr) {
1183          pD3D12Dec->m_spDPBManager.reset(new d3d12_video_decoder_references_manager(pD3D12Dec->m_pD3D12Screen,
1184                                                                                     pD3D12Dec->m_NodeMask,
1185                                                                                     pD3D12Dec->m_d3d12DecProfileType,
1186                                                                                     dpbDesc));
1187       }
1188 
1189       //
1190       // (Re)-create decoder heap
1191       //
1192       D3D12_VIDEO_DECODER_HEAP_DESC decoderHeapDesc = {};
1193       decoderHeapDesc.NodeMask = pD3D12Dec->m_NodeMask;
1194       decoderHeapDesc.Configuration = pD3D12Dec->m_decoderDesc.Configuration;
1195       decoderHeapDesc.DecodeWidth = dpbDesc.Width;
1196       decoderHeapDesc.DecodeHeight = dpbDesc.Height;
1197       decoderHeapDesc.Format = dpbDesc.Format;
1198       decoderHeapDesc.MaxDecodePictureBufferCount = maxDPB;
1199       pD3D12Dec->m_spVideoDecoderHeap.Reset();
1200       HRESULT hr = pD3D12Dec->m_spD3D12VideoDevice->CreateVideoDecoderHeap(
1201          &decoderHeapDesc,
1202          IID_PPV_ARGS(pD3D12Dec->m_spVideoDecoderHeap.GetAddressOf()));
1203       if (FAILED(hr)) {
1204          debug_printf(
1205             "[d3d12_video_decoder] d3d12_video_decoder_reconfigure_dpb - CreateVideoDecoderHeap failed with HR %x\n",
1206             hr);
1207          return false;
1208       }
1209       // Update pD3D12Dec after CreateVideoDecoderHeap succeeds only.
1210       pD3D12Dec->m_decoderHeapDesc = decoderHeapDesc;
1211    }
1212 
1213    pD3D12Dec->m_decodeFormat = outputResourceDesc.Format;
1214 
1215    return true;
1216 }
1217 
1218 void
d3d12_video_decoder_refresh_dpb_active_references(struct d3d12_video_decoder * pD3D12Dec)1219 d3d12_video_decoder_refresh_dpb_active_references(struct d3d12_video_decoder *pD3D12Dec)
1220 {
1221 #if D3D12_VIDEO_ANY_DECODER_ENABLED
1222    switch (pD3D12Dec->m_d3d12DecProfileType) {
1223 #if VIDEO_CODEC_H264DEC
1224       case d3d12_video_decode_profile_type_h264:
1225       {
1226          d3d12_video_decoder_refresh_dpb_active_references_h264(pD3D12Dec);
1227       } break;
1228 #endif
1229 #if VIDEO_CODEC_H265DEC
1230       case d3d12_video_decode_profile_type_hevc:
1231       {
1232          d3d12_video_decoder_refresh_dpb_active_references_hevc(pD3D12Dec);
1233       } break;
1234 #endif
1235 #if VIDEO_CODEC_AV1DEC
1236       case d3d12_video_decode_profile_type_av1:
1237       {
1238          d3d12_video_decoder_refresh_dpb_active_references_av1(pD3D12Dec);
1239       } break;
1240 #endif
1241 #if VIDEO_CODEC_VP9DEC
1242       case d3d12_video_decode_profile_type_vp9:
1243       {
1244          d3d12_video_decoder_refresh_dpb_active_references_vp9(pD3D12Dec);
1245       } break;
1246 #endif
1247       default:
1248       {
1249          unreachable("Unsupported d3d12_video_decode_profile_type");
1250       } break;
1251    }
1252 #endif // D3D12_VIDEO_ANY_DECODER_ENABLED
1253 }
1254 
1255 void
d3d12_video_decoder_get_frame_info(struct d3d12_video_decoder * pD3D12Dec,uint32_t * pWidth,uint32_t * pHeight,uint16_t * pMaxDPB)1256 d3d12_video_decoder_get_frame_info(
1257    struct d3d12_video_decoder *pD3D12Dec, uint32_t *pWidth, uint32_t *pHeight, uint16_t *pMaxDPB)
1258 {
1259    *pWidth = 0;
1260    *pHeight = 0;
1261    *pMaxDPB = 0;
1262 
1263 #if D3D12_VIDEO_ANY_DECODER_ENABLED
1264    switch (pD3D12Dec->m_d3d12DecProfileType) {
1265 #if VIDEO_CODEC_H264DEC
1266       case d3d12_video_decode_profile_type_h264:
1267       {
1268          d3d12_video_decoder_get_frame_info_h264(pD3D12Dec, pWidth, pHeight, pMaxDPB);
1269       } break;
1270 #endif
1271 #if VIDEO_CODEC_H265DEC
1272       case d3d12_video_decode_profile_type_hevc:
1273       {
1274          d3d12_video_decoder_get_frame_info_hevc(pD3D12Dec, pWidth, pHeight, pMaxDPB);
1275       } break;
1276 #endif
1277 #if VIDEO_CODEC_AV1DEC
1278       case d3d12_video_decode_profile_type_av1:
1279       {
1280          d3d12_video_decoder_get_frame_info_av1(pD3D12Dec, pWidth, pHeight, pMaxDPB);
1281       } break;
1282 #endif
1283 #if VIDEO_CODEC_VP9DEC
1284       case d3d12_video_decode_profile_type_vp9:
1285       {
1286          d3d12_video_decoder_get_frame_info_vp9(pD3D12Dec, pWidth, pHeight, pMaxDPB);
1287       } break;
1288 #endif
1289       default:
1290       {
1291          unreachable("Unsupported d3d12_video_decode_profile_type");
1292       } break;
1293    }
1294 #endif // D3D12_VIDEO_ANY_DECODER_ENABLED
1295 
1296    if (pD3D12Dec->m_ConfigDecoderSpecificFlags & d3d12_video_decode_config_specific_flag_alignment_height) {
1297       const uint32_t AlignmentMask = 31;
1298       *pHeight = (*pHeight + AlignmentMask) & ~AlignmentMask;
1299    }
1300 }
1301 
1302 void
d3d12_video_decoder_store_converted_dxva_picparams_from_pipe_input(struct d3d12_video_decoder * codec,struct pipe_picture_desc * picture,struct d3d12_video_buffer * pD3D12VideoBuffer)1303 d3d12_video_decoder_store_converted_dxva_picparams_from_pipe_input(
1304    struct d3d12_video_decoder *codec,   // input argument, current decoder
1305    struct pipe_picture_desc
1306       *picture,   // input argument, base structure of pipe_XXX_picture_desc where XXX is the codec name
1307    struct d3d12_video_buffer *pD3D12VideoBuffer   // input argument, target video buffer
1308 )
1309 {
1310 #if D3D12_VIDEO_ANY_DECODER_ENABLED
1311    assert(picture);
1312    assert(codec);
1313    struct d3d12_video_decoder *pD3D12Dec = (struct d3d12_video_decoder *) codec;
1314 
1315    d3d12_video_decode_profile_type profileType =
1316       d3d12_video_decoder_convert_pipe_video_profile_to_profile_type(codec->base.profile);
1317    ID3D12Resource *pPipeD3D12DstResource = d3d12_resource_resource(pD3D12VideoBuffer->texture);
1318    D3D12_RESOURCE_DESC outputResourceDesc = GetDesc(pPipeD3D12DstResource);
1319    auto &inFlightResources = pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)];
1320    inFlightResources.qp_matrix_frame_argument_enabled = false;
1321    switch (profileType) {
1322 #if VIDEO_CODEC_H264DEC
1323       case d3d12_video_decode_profile_type_h264:
1324       {
1325          size_t dxvaPicParamsBufferSize = sizeof(DXVA_PicParams_H264);
1326          pipe_h264_picture_desc *pPicControlH264 = (pipe_h264_picture_desc *) picture;
1327          DXVA_PicParams_H264 dxvaPicParamsH264 =
1328             d3d12_video_decoder_dxva_picparams_from_pipe_picparams_h264(pD3D12Dec->m_fenceValue,
1329                                                                         codec->base.profile,
1330                                                                         outputResourceDesc.Width,
1331                                                                         outputResourceDesc.Height,
1332                                                                         pPicControlH264);
1333 
1334          d3d12_video_decoder_store_dxva_picparams_in_picparams_buffer(codec,
1335                                                                       &dxvaPicParamsH264,
1336                                                                       dxvaPicParamsBufferSize);
1337 
1338          size_t dxvaQMatrixBufferSize = sizeof(DXVA_Qmatrix_H264);
1339          DXVA_Qmatrix_H264 dxvaQmatrixH264 = {};
1340          d3d12_video_decoder_dxva_qmatrix_from_pipe_picparams_h264((pipe_h264_picture_desc *) picture, dxvaQmatrixH264);
1341          inFlightResources.qp_matrix_frame_argument_enabled =
1342             true;   // We don't have a way of knowing from the pipe params so send always
1343          d3d12_video_decoder_store_dxva_qmatrix_in_qmatrix_buffer(codec, &dxvaQmatrixH264, dxvaQMatrixBufferSize);
1344       } break;
1345 #endif
1346 #if VIDEO_CODEC_H265DEC
1347       case d3d12_video_decode_profile_type_hevc:
1348       {
1349          size_t dxvaPicParamsBufferSize = sizeof(DXVA_PicParams_HEVC);
1350          pipe_h265_picture_desc *pPicControlHEVC = (pipe_h265_picture_desc *) picture;
1351          DXVA_PicParams_HEVC dxvaPicParamsHEVC =
1352             d3d12_video_decoder_dxva_picparams_from_pipe_picparams_hevc(pD3D12Dec, codec->base.profile, pPicControlHEVC);
1353 
1354          d3d12_video_decoder_store_dxva_picparams_in_picparams_buffer(codec,
1355                                                                       &dxvaPicParamsHEVC,
1356                                                                       dxvaPicParamsBufferSize);
1357 
1358          size_t dxvaQMatrixBufferSize = sizeof(DXVA_Qmatrix_HEVC);
1359          DXVA_Qmatrix_HEVC dxvaQmatrixHEVC = {};
1360          inFlightResources.qp_matrix_frame_argument_enabled = false;
1361          d3d12_video_decoder_dxva_qmatrix_from_pipe_picparams_hevc((pipe_h265_picture_desc *) picture,
1362                                                                    dxvaQmatrixHEVC,
1363                                                                    inFlightResources.qp_matrix_frame_argument_enabled);
1364          d3d12_video_decoder_store_dxva_qmatrix_in_qmatrix_buffer(codec, &dxvaQmatrixHEVC, dxvaQMatrixBufferSize);
1365       } break;
1366 #endif
1367 #if VIDEO_CODEC_AV1DEC
1368       case d3d12_video_decode_profile_type_av1:
1369       {
1370          size_t dxvaPicParamsBufferSize = sizeof(DXVA_PicParams_AV1);
1371          pipe_av1_picture_desc *pPicControlAV1 = (pipe_av1_picture_desc *) picture;
1372          DXVA_PicParams_AV1 dxvaPicParamsAV1 =
1373             d3d12_video_decoder_dxva_picparams_from_pipe_picparams_av1(pD3D12Dec->m_fenceValue,
1374                                                                        codec->base.profile,
1375                                                                        pPicControlAV1);
1376 
1377          d3d12_video_decoder_store_dxva_picparams_in_picparams_buffer(codec, &dxvaPicParamsAV1, dxvaPicParamsBufferSize);
1378          inFlightResources.qp_matrix_frame_argument_enabled = false;
1379       } break;
1380 #endif
1381 #if VIDEO_CODEC_VP9DEC
1382       case d3d12_video_decode_profile_type_vp9:
1383       {
1384          size_t dxvaPicParamsBufferSize = sizeof(DXVA_PicParams_VP9);
1385          pipe_vp9_picture_desc *pPicControlVP9 = (pipe_vp9_picture_desc *) picture;
1386          DXVA_PicParams_VP9 dxvaPicParamsVP9 =
1387             d3d12_video_decoder_dxva_picparams_from_pipe_picparams_vp9(pD3D12Dec, codec->base.profile, pPicControlVP9);
1388 
1389          d3d12_video_decoder_store_dxva_picparams_in_picparams_buffer(codec, &dxvaPicParamsVP9, dxvaPicParamsBufferSize);
1390          inFlightResources.qp_matrix_frame_argument_enabled = false;
1391       } break;
1392 #endif
1393       default:
1394       {
1395          unreachable("Unsupported d3d12_video_decode_profile_type");
1396       } break;
1397    }
1398 #endif // D3D12_VIDEO_ANY_DECODER_ENABLED
1399 }
1400 
1401 void
d3d12_video_decoder_prepare_dxva_slices_control(struct d3d12_video_decoder * pD3D12Dec,struct pipe_picture_desc * picture)1402 d3d12_video_decoder_prepare_dxva_slices_control(
1403    struct d3d12_video_decoder *pD3D12Dec,   // input argument, current decoder
1404    struct pipe_picture_desc *picture)
1405 {
1406 #if D3D12_VIDEO_ANY_DECODER_ENABLED
1407    [[maybe_unused]] auto &inFlightResources = pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)];
1408    d3d12_video_decode_profile_type profileType =
1409       d3d12_video_decoder_convert_pipe_video_profile_to_profile_type(pD3D12Dec->base.profile);
1410    switch (profileType) {
1411 #if VIDEO_CODEC_H264DEC
1412       case d3d12_video_decode_profile_type_h264:
1413       {
1414          d3d12_video_decoder_prepare_dxva_slices_control_h264(pD3D12Dec,
1415                                                               inFlightResources.m_SliceControlBuffer,
1416                                                               (struct pipe_h264_picture_desc *) picture);
1417       } break;
1418 #endif
1419 #if VIDEO_CODEC_H265DEC
1420       case d3d12_video_decode_profile_type_hevc:
1421       {
1422          d3d12_video_decoder_prepare_dxva_slices_control_hevc(pD3D12Dec,
1423                                                               inFlightResources.m_SliceControlBuffer,
1424                                                               (struct pipe_h265_picture_desc *) picture);
1425       } break;
1426 #endif
1427 #if VIDEO_CODEC_AV1DEC
1428       case d3d12_video_decode_profile_type_av1:
1429       {
1430          d3d12_video_decoder_prepare_dxva_slices_control_av1(pD3D12Dec,
1431                                                              inFlightResources.m_SliceControlBuffer,
1432                                                              (struct pipe_av1_picture_desc *) picture);
1433       } break;
1434 #endif
1435 #if VIDEO_CODEC_VP9DEC
1436       case d3d12_video_decode_profile_type_vp9:
1437       {
1438          d3d12_video_decoder_prepare_dxva_slices_control_vp9(pD3D12Dec,
1439                                                              inFlightResources.m_SliceControlBuffer,
1440                                                              (struct pipe_vp9_picture_desc *) picture);
1441       } break;
1442 #endif
1443       default:
1444       {
1445          unreachable("Unsupported d3d12_video_decode_profile_type");
1446       } break;
1447    }
1448 #endif // D3D12_VIDEO_ANY_DECODER_ENABLED
1449 }
1450 
1451 void
d3d12_video_decoder_store_dxva_qmatrix_in_qmatrix_buffer(struct d3d12_video_decoder * pD3D12Dec,void * pDXVAStruct,uint64_t DXVAStructSize)1452 d3d12_video_decoder_store_dxva_qmatrix_in_qmatrix_buffer(struct d3d12_video_decoder *pD3D12Dec,
1453                                                          void *pDXVAStruct,
1454                                                          uint64_t DXVAStructSize)
1455 {
1456    auto &inFlightResources = pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)];
1457    if (inFlightResources.m_InverseQuantMatrixBuffer.capacity() < DXVAStructSize) {
1458       inFlightResources.m_InverseQuantMatrixBuffer.reserve(DXVAStructSize);
1459    }
1460 
1461    inFlightResources.m_InverseQuantMatrixBuffer.resize(DXVAStructSize);
1462    memcpy(inFlightResources.m_InverseQuantMatrixBuffer.data(), pDXVAStruct, DXVAStructSize);
1463 }
1464 
1465 void
d3d12_video_decoder_store_dxva_picparams_in_picparams_buffer(struct d3d12_video_decoder * pD3D12Dec,void * pDXVAStruct,uint64_t DXVAStructSize)1466 d3d12_video_decoder_store_dxva_picparams_in_picparams_buffer(struct d3d12_video_decoder *pD3D12Dec,
1467                                                              void *pDXVAStruct,
1468                                                              uint64_t DXVAStructSize)
1469 {
1470    auto &inFlightResources = pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)];
1471    if (inFlightResources.m_picParamsBuffer.capacity() < DXVAStructSize) {
1472       inFlightResources.m_picParamsBuffer.reserve(DXVAStructSize);
1473    }
1474 
1475    inFlightResources.m_picParamsBuffer.resize(DXVAStructSize);
1476    memcpy(inFlightResources.m_picParamsBuffer.data(), pDXVAStruct, DXVAStructSize);
1477 }
1478 
1479 bool
d3d12_video_decoder_supports_aot_dpb(D3D12_FEATURE_DATA_VIDEO_DECODE_SUPPORT decodeSupport,d3d12_video_decode_profile_type profileType)1480 d3d12_video_decoder_supports_aot_dpb(D3D12_FEATURE_DATA_VIDEO_DECODE_SUPPORT decodeSupport,
1481                                      d3d12_video_decode_profile_type profileType)
1482 {
1483    bool supportedProfile = false;
1484 #if D3D12_VIDEO_ANY_DECODER_ENABLED
1485    switch (profileType) {
1486 #if VIDEO_CODEC_H264DEC
1487       case d3d12_video_decode_profile_type_h264:
1488       {
1489          supportedProfile = true;
1490       } break;
1491 #endif
1492 #if VIDEO_CODEC_H265DEC
1493       case d3d12_video_decode_profile_type_hevc:
1494       {
1495          supportedProfile = true;
1496       } break;
1497 #endif
1498 #if VIDEO_CODEC_AV1DEC
1499       case d3d12_video_decode_profile_type_av1:
1500       {
1501          supportedProfile = true;
1502       } break;
1503 #endif
1504 #if VIDEO_CODEC_VP9DEC
1505       case d3d12_video_decode_profile_type_vp9:
1506       {
1507          supportedProfile = true;
1508       } break;
1509 #endif
1510       default:
1511          supportedProfile = false;
1512          break;
1513    }
1514 #endif // D3D12_VIDEO_ANY_DECODER_ENABLED
1515 
1516    return (decodeSupport.DecodeTier >= D3D12_VIDEO_DECODE_TIER_2) && supportedProfile;
1517 }
1518 
1519 d3d12_video_decode_profile_type
d3d12_video_decoder_convert_pipe_video_profile_to_profile_type(enum pipe_video_profile profile)1520 d3d12_video_decoder_convert_pipe_video_profile_to_profile_type(enum pipe_video_profile profile)
1521 {
1522    switch (profile) {
1523       case PIPE_VIDEO_PROFILE_MPEG4_AVC_BASELINE:
1524       case PIPE_VIDEO_PROFILE_MPEG4_AVC_CONSTRAINED_BASELINE:
1525       case PIPE_VIDEO_PROFILE_MPEG4_AVC_MAIN:
1526       case PIPE_VIDEO_PROFILE_MPEG4_AVC_EXTENDED:
1527       case PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH:
1528       case PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH10:
1529          return d3d12_video_decode_profile_type_h264;
1530       case PIPE_VIDEO_PROFILE_HEVC_MAIN:
1531       case PIPE_VIDEO_PROFILE_HEVC_MAIN_10:
1532          return d3d12_video_decode_profile_type_hevc;
1533       case PIPE_VIDEO_PROFILE_AV1_MAIN:
1534          return d3d12_video_decode_profile_type_av1;
1535       case PIPE_VIDEO_PROFILE_VP9_PROFILE0:
1536       case PIPE_VIDEO_PROFILE_VP9_PROFILE2:
1537          return d3d12_video_decode_profile_type_vp9;
1538       default:
1539       {
1540          unreachable("Unsupported pipe video profile");
1541       } break;
1542    }
1543 }
1544 
1545 GUID
d3d12_video_decoder_convert_pipe_video_profile_to_d3d12_profile(enum pipe_video_profile profile)1546 d3d12_video_decoder_convert_pipe_video_profile_to_d3d12_profile(enum pipe_video_profile profile)
1547 {
1548    switch (profile) {
1549       case PIPE_VIDEO_PROFILE_MPEG4_AVC_BASELINE:
1550       case PIPE_VIDEO_PROFILE_MPEG4_AVC_CONSTRAINED_BASELINE:
1551       case PIPE_VIDEO_PROFILE_MPEG4_AVC_MAIN:
1552       case PIPE_VIDEO_PROFILE_MPEG4_AVC_EXTENDED:
1553       case PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH:
1554       case PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH10:
1555          return D3D12_VIDEO_DECODE_PROFILE_H264;
1556       case PIPE_VIDEO_PROFILE_HEVC_MAIN:
1557          return D3D12_VIDEO_DECODE_PROFILE_HEVC_MAIN;
1558       case PIPE_VIDEO_PROFILE_HEVC_MAIN_10:
1559          return D3D12_VIDEO_DECODE_PROFILE_HEVC_MAIN10;
1560       case PIPE_VIDEO_PROFILE_AV1_MAIN:
1561          return D3D12_VIDEO_DECODE_PROFILE_AV1_PROFILE0;
1562       case PIPE_VIDEO_PROFILE_VP9_PROFILE0:
1563          return D3D12_VIDEO_DECODE_PROFILE_VP9;
1564       case PIPE_VIDEO_PROFILE_VP9_PROFILE2:
1565          return D3D12_VIDEO_DECODE_PROFILE_VP9_10BIT_PROFILE2;
1566       default:
1567          return {};
1568    }
1569 }
1570 
1571 GUID
d3d12_video_decoder_resolve_profile(d3d12_video_decode_profile_type profileType,DXGI_FORMAT decode_format)1572 d3d12_video_decoder_resolve_profile(d3d12_video_decode_profile_type profileType, DXGI_FORMAT decode_format)
1573 {
1574 #if D3D12_VIDEO_ANY_DECODER_ENABLED
1575    switch (profileType) {
1576 #if VIDEO_CODEC_H264DEC
1577       case d3d12_video_decode_profile_type_h264:
1578          return D3D12_VIDEO_DECODE_PROFILE_H264;
1579 #endif
1580 #if VIDEO_CODEC_H265DEC
1581       case d3d12_video_decode_profile_type_hevc:
1582       {
1583          switch (decode_format) {
1584             case DXGI_FORMAT_NV12:
1585                return D3D12_VIDEO_DECODE_PROFILE_HEVC_MAIN;
1586             case DXGI_FORMAT_P010:
1587                return D3D12_VIDEO_DECODE_PROFILE_HEVC_MAIN10;
1588             default:
1589             {
1590                unreachable("Unsupported decode_format");
1591             } break;
1592          }
1593       } break;
1594 #endif
1595 #if VIDEO_CODEC_AV1DEC
1596       case d3d12_video_decode_profile_type_av1:
1597          return D3D12_VIDEO_DECODE_PROFILE_AV1_PROFILE0;
1598          break;
1599 #endif
1600 #if VIDEO_CODEC_VP9DEC
1601       case d3d12_video_decode_profile_type_vp9:
1602       {
1603          switch (decode_format) {
1604             case DXGI_FORMAT_NV12:
1605                return D3D12_VIDEO_DECODE_PROFILE_VP9;
1606             case DXGI_FORMAT_P010:
1607                return D3D12_VIDEO_DECODE_PROFILE_VP9_10BIT_PROFILE2;
1608             default:
1609             {
1610                unreachable("Unsupported decode_format");
1611             } break;
1612          }
1613       } break;
1614 #endif
1615       default:
1616       {
1617          unreachable("Unsupported d3d12_video_decode_profile_type");
1618       } break;
1619    }
1620 #else
1621    return {};
1622 #endif // D3D12_VIDEO_ANY_DECODER_ENABLED
1623 }
1624 
1625 bool
d3d12_video_decoder_ensure_fence_finished(struct pipe_video_codec * codec,ID3D12Fence * fence,uint64_t fenceValueToWaitOn,uint64_t timeout_ns)1626 d3d12_video_decoder_ensure_fence_finished(struct pipe_video_codec *codec,
1627                                           ID3D12Fence *fence,
1628                                           uint64_t fenceValueToWaitOn,
1629                                           uint64_t timeout_ns)
1630 {
1631    bool wait_result = true;
1632    HRESULT hr = S_OK;
1633    uint64_t completedValue = fence->GetCompletedValue();
1634 
1635    debug_printf(
1636       "[d3d12_video_decoder] d3d12_video_decoder_ensure_fence_finished - Waiting for fence (with timeout_ns %" PRIu64
1637       ") to finish with "
1638       "fenceValue: %" PRIu64 " - Current Fence Completed Value %" PRIu64 "\n",
1639       timeout_ns,
1640       fenceValueToWaitOn,
1641       completedValue);
1642 
1643    if (completedValue < fenceValueToWaitOn) {
1644 
1645       HANDLE event = {};
1646       int event_fd = 0;
1647       event = d3d12_fence_create_event(&event_fd);
1648 
1649       hr = fence->SetEventOnCompletion(fenceValueToWaitOn, event);
1650       if (FAILED(hr)) {
1651          debug_printf("[d3d12_video_decoder] d3d12_video_decoder_ensure_fence_finished - SetEventOnCompletion for "
1652                       "fenceValue %" PRIu64 " failed with HR %x\n",
1653                       fenceValueToWaitOn,
1654                       hr);
1655          return false;
1656       }
1657 
1658       wait_result = d3d12_fence_wait_event(event, event_fd, timeout_ns);
1659       d3d12_fence_close_event(event, event_fd);
1660 
1661       debug_printf("[d3d12_video_decoder] d3d12_video_decoder_ensure_fence_finished - Waiting on fence to be done with "
1662                    "fenceValue: %" PRIu64 " - current CompletedValue: %" PRIu64 "\n",
1663                    fenceValueToWaitOn,
1664                    completedValue);
1665    } else {
1666       debug_printf("[d3d12_video_decoder] d3d12_video_decoder_ensure_fence_finished - Fence already done with "
1667                    "fenceValue: %" PRIu64 " - current CompletedValue: %" PRIu64 "\n",
1668                    fenceValueToWaitOn,
1669                    completedValue);
1670    }
1671    return wait_result;
1672 }
1673 
1674 bool
d3d12_video_decoder_sync_completion(struct pipe_video_codec * codec,ID3D12Fence * fence,uint64_t fenceValueToWaitOn,uint64_t timeout_ns)1675 d3d12_video_decoder_sync_completion(struct pipe_video_codec *codec,
1676                                     ID3D12Fence *fence,
1677                                     uint64_t fenceValueToWaitOn,
1678                                     uint64_t timeout_ns)
1679 {
1680    struct d3d12_video_decoder *pD3D12Dec = (struct d3d12_video_decoder *) codec;
1681    assert(pD3D12Dec);
1682    assert(pD3D12Dec->m_spD3D12VideoDevice);
1683    assert(pD3D12Dec->m_spDecodeCommandQueue);
1684    HRESULT hr = S_OK;
1685 
1686    ASSERTED bool wait_result = d3d12_video_decoder_ensure_fence_finished(codec, fence, fenceValueToWaitOn, timeout_ns);
1687    assert(wait_result);
1688 
1689    // Release references granted on end_frame for this inflight operations
1690    pD3D12Dec->m_inflightResourcesPool[fenceValueToWaitOn % D3D12_VIDEO_DEC_ASYNC_DEPTH].m_spDecoder.Reset();
1691    pD3D12Dec->m_inflightResourcesPool[fenceValueToWaitOn % D3D12_VIDEO_DEC_ASYNC_DEPTH].m_spDecoderHeap.Reset();
1692    pD3D12Dec->m_inflightResourcesPool[fenceValueToWaitOn % D3D12_VIDEO_DEC_ASYNC_DEPTH].m_References.reset();
1693    pD3D12Dec->m_inflightResourcesPool[fenceValueToWaitOn % D3D12_VIDEO_DEC_ASYNC_DEPTH].m_stagingDecodeBitstream.resize(
1694       0);
1695    pipe_resource_reference(
1696       &pD3D12Dec->m_inflightResourcesPool[fenceValueToWaitOn % D3D12_VIDEO_DEC_ASYNC_DEPTH].pPipeCompressedBufferObj,
1697       NULL);
1698 
1699    struct d3d12_screen *pD3D12Screen = (struct d3d12_screen *) pD3D12Dec->m_pD3D12Screen;
1700    assert(pD3D12Screen);
1701 
1702    pD3D12Screen->base.fence_reference(
1703       &pD3D12Screen->base,
1704       &pD3D12Dec->m_inflightResourcesPool[d3d12_video_decoder_pool_current_index(pD3D12Dec)]
1705           .m_pBitstreamUploadGPUCompletionFence,
1706       NULL);
1707 
1708    hr =
1709       pD3D12Dec->m_inflightResourcesPool[fenceValueToWaitOn % D3D12_VIDEO_DEC_ASYNC_DEPTH].m_spCommandAllocator->Reset();
1710    if (FAILED(hr)) {
1711       debug_printf("failed with %x.\n", hr);
1712       goto sync_with_token_fail;
1713    }
1714 
1715    // Validate device was not removed
1716    hr = pD3D12Dec->m_pD3D12Screen->dev->GetDeviceRemovedReason();
1717    if (hr != S_OK) {
1718       debug_printf("[d3d12_video_decoder] d3d12_video_decoder_sync_completion"
1719                    " - D3D12Device was removed AFTER d3d12_video_decoder_ensure_fence_finished "
1720                    "execution with HR %x, but wasn't before.\n",
1721                    hr);
1722       goto sync_with_token_fail;
1723    }
1724 
1725    debug_printf(
1726       "[d3d12_video_decoder] d3d12_video_decoder_sync_completion - GPU execution finalized for fenceValue: %" PRIu64
1727       "\n",
1728       fenceValueToWaitOn);
1729 
1730    return wait_result;
1731 
1732 sync_with_token_fail:
1733    debug_printf("[d3d12_video_decoder] d3d12_video_decoder_sync_completion failed for fenceValue: %" PRIu64 "\n",
1734                 fenceValueToWaitOn);
1735    assert(false);
1736    return false;
1737 }