xref: /aosp_15_r20/external/mesa3d/src/gallium/drivers/d3d12/d3d12_video_encoder_bitstream_builder_av1.cpp (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © Microsoft Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "d3d12_video_encoder_bitstream_builder_av1.h"
25 
26 void
write_obu_header(d3d12_video_encoder_bitstream * pBit,av1_obutype_t obu_type,uint32_t obu_extension_flag,uint32_t temporal_id,uint32_t spatial_id)27 d3d12_video_bitstream_builder_av1::write_obu_header(d3d12_video_encoder_bitstream *pBit,
28                                                     av1_obutype_t obu_type,
29                                                     uint32_t obu_extension_flag,
30                                                     uint32_t temporal_id,
31                                                     uint32_t spatial_id)
32 {
33    pBit->put_bits(1, 0);          // obu_forbidden_bit
34    pBit->put_bits(4, obu_type);   // type
35    pBit->put_bits(1, obu_extension_flag);
36    pBit->put_bits(1, 1);   // obu_has_size_field
37    pBit->put_bits(1, 0);   // reserved
38    if (obu_extension_flag) {
39       // obu_extension_header()
40       pBit->put_bits(3, temporal_id);
41       pBit->put_bits(2, spatial_id);
42       pBit->put_bits(3, 0);   // extension_header_reserved_3bits
43    }
44 }
45 
46 void
pack_obu_header_size(d3d12_video_encoder_bitstream * pBit,uint64_t val)47 d3d12_video_bitstream_builder_av1::pack_obu_header_size(d3d12_video_encoder_bitstream *pBit, uint64_t val)
48 {
49    pBit->put_leb128_bytes(val);
50 }
51 
52 void
write_seq_data(d3d12_video_encoder_bitstream * pBit,const av1_seq_header_t * pSeqHdr)53 d3d12_video_bitstream_builder_av1::write_seq_data(d3d12_video_encoder_bitstream *pBit, const av1_seq_header_t *pSeqHdr)
54 {
55    pBit->put_bits(3, pSeqHdr->seq_profile);
56    pBit->put_bits(1, 0);   // still_picture default 0
57    pBit->put_bits(1, 0);   // reduced_still_picture_header
58    pBit->put_bits(1, 0);   // timing_info_present_flag
59    pBit->put_bits(1, 0);   // initial_display_delay_present_flag
60 
61    pBit->put_bits(5, pSeqHdr->operating_points_cnt_minus_1);
62    for (uint8_t i = 0; i <= pSeqHdr->operating_points_cnt_minus_1; i++) {
63       pBit->put_bits(8, pSeqHdr->operating_point_idc[i] >> 4);
64       pBit->put_bits(4, pSeqHdr->operating_point_idc[i] & 0x9f);
65       pBit->put_bits(5, pSeqHdr->seq_level_idx[i]);
66       if (pSeqHdr->seq_level_idx[i] > 7)
67          pBit->put_bits(1, pSeqHdr->seq_tier[i]);
68    }
69 
70    pBit->put_bits(4, d3d12_video_bitstream_builder_av1::frame_width_bits_minus_1);    // frame_width_bits_minus_1
71    pBit->put_bits(4, d3d12_video_bitstream_builder_av1::frame_height_bits_minus_1);   // frame_height_bits_minus_1
72    pBit->put_bits(d3d12_video_bitstream_builder_av1::frame_width_bits_minus_1 + 1,
73                   pSeqHdr->max_frame_width - 1);   // max_frame_width_minus_1
74    pBit->put_bits(d3d12_video_bitstream_builder_av1::frame_height_bits_minus_1 + 1,
75                   pSeqHdr->max_frame_height - 1);            // max_frame_height_minus_1
76    pBit->put_bits(1, 0);                                     // frame_id_numbers_present_flag
77    pBit->put_bits(1, pSeqHdr->use_128x128_superblock);       // use_128x128_superblock
78    pBit->put_bits(1, pSeqHdr->enable_filter_intra);          // enable_filter_intra
79    pBit->put_bits(1, pSeqHdr->enable_intra_edge_filter);     // enable_intra_edge_filter
80    pBit->put_bits(1, pSeqHdr->enable_interintra_compound);   // enable_interintra_compound
81    pBit->put_bits(1, pSeqHdr->enable_masked_compound);       // enable_masked_compound
82    pBit->put_bits(1, pSeqHdr->enable_warped_motion);         // enable_warped_motion
83    pBit->put_bits(1, pSeqHdr->enable_dual_filter);           // enable_dual_filter
84    pBit->put_bits(1, pSeqHdr->enable_order_hint);            // enable_order_hint
85 
86    if (pSeqHdr->enable_order_hint) {
87       pBit->put_bits(1, pSeqHdr->enable_jnt_comp);        // enable_jnt_comp
88       pBit->put_bits(1, pSeqHdr->enable_ref_frame_mvs);   // enable_ref_frame_mvs
89    }
90 
91    pBit->put_bits(1, pSeqHdr->seq_choose_screen_content_tools);   // seq_choose_screen_content_tools
92    if (!pSeqHdr->seq_choose_screen_content_tools)
93       pBit->put_bits(1, pSeqHdr->seq_force_screen_content_tools);   // seq_force_screen_content_tools
94 
95    if (pSeqHdr->seq_force_screen_content_tools) {
96       pBit->put_bits(1, pSeqHdr->seq_choose_integer_mv);   // seq_choose_integer_mv
97       if (!pSeqHdr->seq_choose_integer_mv)
98          pBit->put_bits(1, pSeqHdr->seq_force_integer_mv);   // seq_force_integer_mv
99    }
100 
101    if (pSeqHdr->enable_order_hint)
102       pBit->put_bits(3, pSeqHdr->order_hint_bits_minus1);
103 
104    pBit->put_bits(1, pSeqHdr->enable_superres);      // enable_superres
105    pBit->put_bits(1, pSeqHdr->enable_cdef);          // enable_cdef
106    pBit->put_bits(1, pSeqHdr->enable_restoration);   // enable_restoration
107 
108    // color_config ()
109    pBit->put_bits(1,
110                   pSeqHdr->color_config.bit_depth == DXGI_FORMAT_P010 ? 1 : 0);   // Assume DXGI_FORMAT_NV12 otherwise
111    if (pSeqHdr->seq_profile != 1)
112       pBit->put_bits(1, 0);   // mono_chrome not supported
113 
114    pBit->put_bits(1, pSeqHdr->color_config.color_description_present_flag);
115 
116    if (pSeqHdr->color_config.color_description_present_flag) {
117       pBit->put_bits(8, pSeqHdr->color_config.color_primaries);
118       pBit->put_bits(8, pSeqHdr->color_config.transfer_characteristics);
119       pBit->put_bits(8, pSeqHdr->color_config.matrix_coefficients);
120    }
121 
122    pBit->put_bits(1, pSeqHdr->color_config.color_range);   // color_range
123 
124    if (pSeqHdr->seq_profile == 0)
125       pBit->put_bits(2, pSeqHdr->color_config.chroma_sample_position);   // chroma_sample_position
126 
127    pBit->put_bits(1, pSeqHdr->color_config.separate_uv_delta_q);   // separate_uv_delta_q
128 
129    pBit->put_bits(1, 0);   // film_grain_params_present
130 
131    pBit->put_trailing_bits();
132 }
133 
134 void
write_temporal_delimiter_obu(std::vector<uint8_t> & headerBitstream,std::vector<uint8_t>::iterator placingPositionStart,size_t & writtenBytes)135 d3d12_video_bitstream_builder_av1::write_temporal_delimiter_obu(std::vector<uint8_t> &headerBitstream,
136                                                                 std::vector<uint8_t>::iterator placingPositionStart,
137                                                                 size_t &writtenBytes)
138 {
139    auto startByteOffset = std::distance(headerBitstream.begin(), placingPositionStart);
140    if (headerBitstream.size() < (startByteOffset + c_DefaultBitstreamBufSize))
141       headerBitstream.resize(startByteOffset + c_DefaultBitstreamBufSize);
142 
143    d3d12_video_encoder_bitstream bitstream_full_obu;
144    bitstream_full_obu.setup_bitstream(headerBitstream.size(), headerBitstream.data(), startByteOffset);
145 
146    {
147       // temporal_delimiter_obu() has empty payload as per AV1 codec spec
148 
149       // Write the header
150       constexpr uint32_t obu_extension_flag = 0;
151       constexpr uint32_t temporal_id = 0;
152       constexpr uint32_t spatial_id = 0;
153       write_obu_header(&bitstream_full_obu, OBU_TEMPORAL_DELIMITER, obu_extension_flag, temporal_id, spatial_id);
154 
155       // Write the data size
156       const uint64_t obu_size_in_bytes = 0;
157       debug_printf("obu_size: %" PRIu64 " (temporal_delimiter_obu() has empty payload as per AV1 codec spec)\n",
158                    obu_size_in_bytes);
159       pack_obu_header_size(&bitstream_full_obu, obu_size_in_bytes);
160    }
161 
162    bitstream_full_obu.flush();
163 
164    // Shrink headerBitstream to fit
165    writtenBytes = bitstream_full_obu.get_byte_count() - startByteOffset;
166    headerBitstream.resize(writtenBytes + startByteOffset);
167 }
168 
169 void
write_sequence_header(const av1_seq_header_t * pSeqHdr,std::vector<uint8_t> & headerBitstream,std::vector<uint8_t>::iterator placingPositionStart,size_t & writtenBytes)170 d3d12_video_bitstream_builder_av1::write_sequence_header(const av1_seq_header_t *pSeqHdr,
171                                                          std::vector<uint8_t> &headerBitstream,
172                                                          std::vector<uint8_t>::iterator placingPositionStart,
173                                                          size_t &writtenBytes)
174 {
175    auto startByteOffset = std::distance(headerBitstream.begin(), placingPositionStart);
176    if (headerBitstream.size() < (startByteOffset + c_DefaultBitstreamBufSize))
177       headerBitstream.resize(startByteOffset + c_DefaultBitstreamBufSize);
178 
179    d3d12_video_encoder_bitstream bitstream_full_obu;
180    bitstream_full_obu.setup_bitstream(headerBitstream.size(), headerBitstream.data(), startByteOffset);
181 
182    // to handle variable length we first write the content
183    // and later the obu header and concatenate both bitstreams
184    d3d12_video_encoder_bitstream bitstream_seq;
185    bitstream_seq.create_bitstream(c_DefaultBitstreamBufSize);
186 
187    {
188       // Write the data
189       write_seq_data(&bitstream_seq, pSeqHdr);
190       bitstream_seq.flush();
191       debug_printf("sequence_header_obu() bytes: %" PRId32 "\n", bitstream_seq.get_byte_count());
192 
193       // Write the header
194       constexpr uint32_t obu_extension_flag = 0;
195       constexpr uint32_t temporal_id = 0;
196       constexpr uint32_t spatial_id = 0;
197       write_obu_header(&bitstream_full_obu, OBU_SEQUENCE_HEADER, obu_extension_flag, temporal_id, spatial_id);
198 
199       // Write the data size
200       const uint64_t obu_size_in_bytes = bitstream_seq.get_byte_count();
201       debug_printf("obu_size: %" PRIu64 "\n", obu_size_in_bytes);
202       pack_obu_header_size(&bitstream_full_obu, obu_size_in_bytes);
203 
204       bitstream_full_obu.flush();
205 
206       // bitstream_full_obu has external buffer allocation and
207       // append_bitstream deep copies bitstream_seq, so it's okay
208       // for RAII of bitstream_seq to be deallocated out of scope
209       bitstream_full_obu.append_byte_stream(&bitstream_seq);
210    }
211 
212    bitstream_full_obu.flush();
213 
214    // Shrink headerBitstream to fit
215    writtenBytes = bitstream_full_obu.get_byte_count() - startByteOffset;
216    headerBitstream.resize(writtenBytes + startByteOffset);
217 }
218 
219 void
write_frame_size_with_refs(d3d12_video_encoder_bitstream * pBit,const av1_seq_header_t * pSeqHdr,const av1_pic_header_t * pPicHdr)220 d3d12_video_bitstream_builder_av1::write_frame_size_with_refs(d3d12_video_encoder_bitstream *pBit,
221                                                               const av1_seq_header_t *pSeqHdr,
222                                                               const av1_pic_header_t *pPicHdr)
223 {
224    bool found_ref = false;   // Send explicitly as default
225    for (int i = 0; i < 7 /*REFS_PER_FRAME*/; i++) {
226       pBit->put_bits(1, found_ref);   // found_ref
227    }
228 
229    if (found_ref) {
230       // frame_size()
231       write_frame_size(pBit, pSeqHdr, pPicHdr);
232       // render_size()
233       write_render_size(pBit, pPicHdr);
234    } else {
235       // superres_params()
236       write_superres_params(pBit, pSeqHdr, pPicHdr);
237    }
238 }
239 
240 void
write_frame_size(d3d12_video_encoder_bitstream * pBit,const av1_seq_header_t * pSeqHdr,const av1_pic_header_t * pPicHdr)241 d3d12_video_bitstream_builder_av1::write_frame_size(d3d12_video_encoder_bitstream *pBit,
242                                                     const av1_seq_header_t *pSeqHdr,
243                                                     const av1_pic_header_t *pPicHdr)
244 {
245    if (pPicHdr->frame_size_override_flag) {
246       pBit->put_bits(d3d12_video_bitstream_builder_av1::frame_width_bits_minus_1 + 1,
247                      pPicHdr->FrameWidth - 1);   // frame_width_minus_1
248       pBit->put_bits(d3d12_video_bitstream_builder_av1::frame_height_bits_minus_1 + 1,
249                      pPicHdr->FrameHeight - 1);   // frame_height_minus_1
250    }
251    // superres_params()
252    write_superres_params(pBit, pSeqHdr, pPicHdr);
253 }
254 
255 
256 void
write_superres_params(d3d12_video_encoder_bitstream * pBit,const av1_seq_header_t * pSeqHdr,const av1_pic_header_t * pPicHdr)257 d3d12_video_bitstream_builder_av1::write_superres_params(d3d12_video_encoder_bitstream *pBit,
258                                                          const av1_seq_header_t *pSeqHdr,
259                                                          const av1_pic_header_t *pPicHdr)
260 {
261    if (pSeqHdr->enable_superres)
262       pBit->put_bits(1, pPicHdr->use_superres);   // use_superres
263 
264    constexpr unsigned SUPERRES_DENOM_BITS = 3;   // As per AV1 codec spec
265    if (pPicHdr->use_superres) {
266       constexpr uint32_t SUPERRES_DENOM_MIN = 9;   // As per AV1 codec spec
267       assert(pPicHdr->SuperresDenom >= SUPERRES_DENOM_MIN);
268       uint32_t coded_denom = pPicHdr->SuperresDenom - SUPERRES_DENOM_MIN;
269       pBit->put_bits(SUPERRES_DENOM_BITS, coded_denom);
270    }
271 }
272 
273 void
write_render_size(d3d12_video_encoder_bitstream * pBit,const av1_pic_header_t * pPicHdr)274 d3d12_video_bitstream_builder_av1::write_render_size(d3d12_video_encoder_bitstream *pBit,
275                                                      const av1_pic_header_t *pPicHdr)
276 {
277    uint8_t render_and_frame_size_different =
278       ((pPicHdr->RenderWidth != pPicHdr->FrameWidth) || (pPicHdr->RenderHeight != pPicHdr->FrameHeight)) ? 1 : 0;
279 
280    pBit->put_bits(1, render_and_frame_size_different);   // render_and_frame_size_different
281 
282    if (render_and_frame_size_different == 1) {
283       pBit->put_bits(16, pPicHdr->RenderWidth - 1);    // render_width_minus_1
284       pBit->put_bits(16, pPicHdr->RenderHeight - 1);   // render_height_minus_1
285    }
286 }
287 
288 void
write_delta_q_value(d3d12_video_encoder_bitstream * pBit,int32_t delta_q_val)289 d3d12_video_bitstream_builder_av1::write_delta_q_value(d3d12_video_encoder_bitstream *pBit, int32_t delta_q_val)
290 {
291    if (delta_q_val) {
292       pBit->put_bits(1, 1);
293       pBit->put_su_bits(7, delta_q_val);
294    } else {
295       pBit->put_bits(1, 0);
296    }
297 }
298 
299 inline int
get_relative_dist(int a,int b,int OrderHintBits,uint8_t enable_order_hint)300 get_relative_dist(int a, int b, int OrderHintBits, uint8_t enable_order_hint)
301 {
302    if (!enable_order_hint)
303       return 0;
304    int diff = a - b;
305    int m = 1 << (OrderHintBits - 1);
306    diff = (diff & (m - 1)) - (diff & m);
307    return diff;
308 }
309 
310 static uint32_t
tile_log2(uint32_t blkSize,uint32_t target)311 tile_log2(uint32_t blkSize, uint32_t target)
312 {
313    uint32_t k = 0;
314    for (k = 0; (blkSize << k) < target; k++);
315    return k;
316 }
317 
318 void
write_pic_data(d3d12_video_encoder_bitstream * pBit,const av1_seq_header_t * pSeqHdr,const av1_pic_header_t * pPicHdr)319 d3d12_video_bitstream_builder_av1::write_pic_data(d3d12_video_encoder_bitstream *pBit,
320                                                   const av1_seq_header_t *pSeqHdr,
321                                                   const av1_pic_header_t *pPicHdr)
322 {
323    // uncompressed_header()
324 
325    pBit->put_bits(1, pPicHdr->show_existing_frame);
326 
327    if (pPicHdr->show_existing_frame) {
328       pBit->put_bits(3, pPicHdr->frame_to_show_map_idx);   // frame_to_show_map_idx	f(3)
329 
330       // decoder_model_info_present_flag Default 0
331       // if ( decoder_model_info_present_flag && !equal_picture_interval ) {
332       //       temporal_point_info( )
333       // }
334 
335       // frame_id_numbers_present_flag default 0
336       // if ( frame_id_numbers_present_flag ) {
337       //       display_frame_id	f(idLen)
338       // }
339    } else {
340 
341       const uint8_t FrameIsIntra = (pPicHdr->frame_type == D3D12_VIDEO_ENCODER_AV1_FRAME_TYPE_INTRA_ONLY_FRAME ||
342                                     pPicHdr->frame_type == D3D12_VIDEO_ENCODER_AV1_FRAME_TYPE_KEY_FRAME);
343       pBit->put_bits(2, pPicHdr->frame_type);   // frame_type
344 
345       pBit->put_bits(1, pPicHdr->show_frame);   // show_frame
346       if (!pPicHdr->show_frame)
347          pBit->put_bits(1, pPicHdr->showable_frame);   // showable_frame
348 
349 
350       if (pPicHdr->frame_type == D3D12_VIDEO_ENCODER_AV1_FRAME_TYPE_SWITCH_FRAME ||
351           (pPicHdr->frame_type == D3D12_VIDEO_ENCODER_AV1_FRAME_TYPE_KEY_FRAME && pPicHdr->show_frame)) {
352          assert(pPicHdr->error_resilient_mode == 1);
353       } else {
354          pBit->put_bits(1, pPicHdr->error_resilient_mode);   // error_resilient_mode
355       }
356 
357       pBit->put_bits(1, pPicHdr->disable_cdf_update);   // disable_cdf_update
358       if (pSeqHdr->seq_force_screen_content_tools == /*SELECT_SCREEN_CONTENT_TOOLS */ 2)
359          pBit->put_bits(1, pPicHdr->allow_screen_content_tools);   // allow_screen_content_tools
360 
361       if (pPicHdr->allow_screen_content_tools && (pSeqHdr->seq_force_integer_mv == /*SELECT_INTEGER_MV */ 2))
362          pBit->put_bits(1, pPicHdr->force_integer_mv);   // force_integer_mv
363 
364       // reduced_still_picture_header default 0 and frame_type != SWITCH
365       if (pPicHdr->frame_type != D3D12_VIDEO_ENCODER_AV1_FRAME_TYPE_SWITCH_FRAME) {
366          // Expicitly coded if NOT SWITCH FRAME
367          pBit->put_bits(1, pPicHdr->frame_size_override_flag);   // frame_size_override_flag
368       } else {
369          assert(pPicHdr->frame_size_override_flag ==
370                 1);   // As per AV1 spec for SWITCH FRAME it's not coded but defaulted to 1 instead
371       }
372 
373       pBit->put_bits(pSeqHdr->order_hint_bits_minus1 + 1, pPicHdr->order_hint);   // order_hint
374 
375       if (!(FrameIsIntra || pPicHdr->error_resilient_mode))
376          pBit->put_bits(3, pPicHdr->primary_ref_frame);   // primary_ref_frame
377 
378       // decoder_model_info_present_flag Default 0
379 
380       if (!(pPicHdr->frame_type == D3D12_VIDEO_ENCODER_AV1_FRAME_TYPE_SWITCH_FRAME ||
381             (pPicHdr->frame_type == D3D12_VIDEO_ENCODER_AV1_FRAME_TYPE_KEY_FRAME && pPicHdr->show_frame)))
382          pBit->put_bits(8 /* NUM_REF_FRAMES from AV1 spec */, pPicHdr->refresh_frame_flags);
383 
384       constexpr uint32_t allFrames = (1 << 8 /* NUM_REF_FRAMES from AV1 spec */) - 1;
385       if (!FrameIsIntra || pPicHdr->refresh_frame_flags != allFrames) {
386 
387          if (pPicHdr->error_resilient_mode && pSeqHdr->enable_order_hint) {
388             for (uint8_t i = 0; i < 8 /* NUM_REF_FRAMES from AV1 spec */; i++) {
389                pBit->put_bits(pSeqHdr->order_hint_bits_minus1 + 1,
390                               pPicHdr->ref_order_hint[i]);   // ref_order_hint[i] f(OrderHintBits)
391             }
392          }
393       }
394 
395       if (FrameIsIntra) {
396          // frame_size()
397          write_frame_size(pBit, pSeqHdr, pPicHdr);
398          // render_size()
399          write_render_size(pBit, pPicHdr);
400 
401          if (pPicHdr->allow_screen_content_tools && pPicHdr->UpscaledWidth == pPicHdr->FrameWidth)
402             pBit->put_bits(1, pPicHdr->allow_intrabc);
403       } else {
404          if (pSeqHdr->enable_order_hint)
405             pBit->put_bits(1, 0);   // frame_refs_short_signaling default 0
406 
407          for (uint8_t ref = 0; ref < ARRAY_SIZE(pPicHdr->ref_frame_idx); ref++)
408             pBit->put_bits(3 /* log2 of NUM_REF_FRAMES from AV1 spec */, pPicHdr->ref_frame_idx[ref]);
409 
410          // frame_id_numbers_present_flag default 0
411 
412          if (pPicHdr->frame_size_override_flag && !pPicHdr->error_resilient_mode) {
413             // frame_size_with_refs()
414             write_frame_size_with_refs(pBit, pSeqHdr, pPicHdr);
415          } else {
416             // frame_size()
417             write_frame_size(pBit, pSeqHdr, pPicHdr);
418             // render_size()
419             write_render_size(pBit, pPicHdr);
420          }
421 
422          if (!pPicHdr->force_integer_mv)
423             pBit->put_bits(1, pPicHdr->allow_high_precision_mv);   // allow_high_precision_mv
424 
425          // read_interpolation_filter()
426          {
427             const uint8_t is_filter_switchable =
428                (pPicHdr->interpolation_filter == D3D12_VIDEO_ENCODER_AV1_INTERPOLATION_FILTERS_SWITCHABLE ? 1 : 0);
429             pBit->put_bits(1, is_filter_switchable);   // is_filter_switchable
430             if (!is_filter_switchable) {
431                pBit->put_bits(2, pPicHdr->interpolation_filter);   // interpolation_filter
432             }
433          }
434 
435          pBit->put_bits(1, pPicHdr->is_motion_mode_switchable);   // is_motion_mode_switchable
436 
437          if (!(pPicHdr->error_resilient_mode || !pPicHdr->use_ref_frame_mvs))
438             pBit->put_bits(1, 1);   // use_ref_frame_mvs
439       }
440 
441       if (!pPicHdr->disable_cdf_update /* || reduced_still_picture_header default 0 */)
442          pBit->put_bits(1, pPicHdr->disable_frame_end_update_cdf);   // disable_frame_end_update_cdf
443 
444       // tile_info()
445       {
446          unsigned maxTileWidthSb = pPicHdr->tile_info.tile_support_caps.MaxTileWidth;
447          unsigned maxTileAreaSb = pPicHdr->tile_info.tile_support_caps.MaxTileArea;
448 
449          unsigned minLog2TileCols = tile_log2(maxTileWidthSb, pPicHdr->tile_info.tile_support_caps.MinTileCols);
450          unsigned maxLog2TileCols = tile_log2(1, pPicHdr->tile_info.tile_support_caps.MaxTileCols);
451          unsigned log2TileCols = tile_log2(1, pPicHdr->tile_info.tile_partition.ColCount);
452 
453          unsigned minLog2TileRows = tile_log2(1, pPicHdr->tile_info.tile_support_caps.MinTileRows);
454          unsigned maxLog2TileRows = tile_log2(1, pPicHdr->tile_info.tile_support_caps.MaxTileRows);
455          unsigned log2TileRows = tile_log2(1, pPicHdr->tile_info.tile_partition.RowCount);
456 
457          pBit->put_bits(1, pPicHdr->tile_info.uniform_tile_spacing_flag);   // uniform_tile_spacing_flag
458 
459          if (pPicHdr->tile_info.uniform_tile_spacing_flag) {
460             for (unsigned i = minLog2TileCols; i < log2TileCols; i++)
461                pBit->put_bits(1, 1);   // one increment_tile_cols_log2
462             if (log2TileCols < maxLog2TileCols)
463                pBit->put_bits(1, 0);   // zero increment_tile_cols_log2
464             for (unsigned i = minLog2TileRows; i < log2TileRows; i++)
465                pBit->put_bits(1, 1);   // increment_tile_rows_log2
466             if (log2TileRows < maxLog2TileRows)
467                pBit->put_bits(1, 0);   // increment_tile_rows_log2
468          } else {
469             unsigned sizeSb = 0;
470             unsigned widestTileSb = 0;
471             unsigned widthSb = pPicHdr->frame_width_sb;
472             for (unsigned i = 0; i < pPicHdr->tile_info.tile_partition.ColCount; i++) {
473                sizeSb = pPicHdr->tile_info.tile_partition.ColWidths[i];
474                unsigned maxWidth = std::min(widthSb, maxTileWidthSb);
475                pBit->put_ns_bits(maxWidth, sizeSb - 1);   // width_in_sbs_minus_1
476                widestTileSb = std::max(sizeSb, widestTileSb);
477                widthSb -= sizeSb;
478             }
479 
480             unsigned maxTileHeightSb = std::max(maxTileAreaSb / widestTileSb, 1u);
481             unsigned heightSb = pPicHdr->frame_height_sb;
482             for (unsigned i = 0; i < pPicHdr->tile_info.tile_partition.RowCount; i++) {
483                sizeSb = pPicHdr->tile_info.tile_partition.RowHeights[i];
484                unsigned maxHeight = std::min(heightSb, maxTileHeightSb);
485                pBit->put_ns_bits(maxHeight, sizeSb - 1);   // height_in_sbs_minus_1
486                heightSb -= sizeSb;
487             }
488          }
489 
490          if (log2TileCols > 0 || log2TileRows > 0) {
491             pBit->put_bits(log2TileRows + log2TileCols,
492                            pPicHdr->tile_info.tile_partition.ContextUpdateTileId);   // f(TileRowsLog2 + TileColsLog2)
493             pBit->put_bits(2, pPicHdr->tile_info.tile_support_caps.TileSizeBytesMinus1);   // tile_size_bytes_minus_1
494                                                                                            // f(2)
495          }
496       }
497 
498       // quantization_params()
499       {
500          pBit->put_bits(8, pPicHdr->quantization_params.BaseQIndex);   // base_q_idx
501          write_delta_q_value(pBit, pPicHdr->quantization_params.YDCDeltaQ);
502 
503          bool diff_uv_delta = false;
504          if (pPicHdr->quantization_params.UDCDeltaQ != pPicHdr->quantization_params.VDCDeltaQ ||
505              pPicHdr->quantization_params.UACDeltaQ != pPicHdr->quantization_params.VACDeltaQ)
506             diff_uv_delta = true;
507 
508          if (diff_uv_delta)
509             assert(pSeqHdr->color_config.separate_uv_delta_q == 1);
510 
511          if (pSeqHdr->color_config.separate_uv_delta_q)
512             pBit->put_bits(1, diff_uv_delta);
513 
514          write_delta_q_value(pBit, pPicHdr->quantization_params.UDCDeltaQ);
515          write_delta_q_value(pBit, pPicHdr->quantization_params.UACDeltaQ);
516 
517          if (diff_uv_delta) {
518             write_delta_q_value(pBit, pPicHdr->quantization_params.VDCDeltaQ);
519             write_delta_q_value(pBit, pPicHdr->quantization_params.VACDeltaQ);
520          }
521 
522          pBit->put_bits(1, pPicHdr->quantization_params.UsingQMatrix);   // using_qmatrix
523          if (pPicHdr->quantization_params.UsingQMatrix) {
524             pBit->put_bits(4, pPicHdr->quantization_params.QMY);   // qm_y
525             pBit->put_bits(4, pPicHdr->quantization_params.QMU);   // qm_u
526             if (pSeqHdr->color_config.separate_uv_delta_q)
527                pBit->put_bits(4, pPicHdr->quantization_params.QMV);   // qm_v
528          }
529       }
530 
531       // segmentation_params()
532       {
533          pBit->put_bits(1, pPicHdr->segmentation_enabled);   // segmentation_enabled
534          if (pPicHdr->segmentation_enabled) {
535             if (pPicHdr->primary_ref_frame != 7 /*PRIMARY_REF_NONE*/) {
536                pBit->put_bits(1, pPicHdr->segmentation_config.UpdateMap);   // segmentation_update_map f(1)
537                if (pPicHdr->segmentation_config.UpdateMap == 1)
538                   pBit->put_bits(1, pPicHdr->segmentation_config.TemporalUpdate);   // segmentation_temporal_update f(1)
539                pBit->put_bits(1, pPicHdr->segmentation_config.UpdateData);          // segmentation_update_data f(1)
540             }
541 
542             if (pPicHdr->segmentation_config.UpdateData == 1) {
543                const int av1_segmentation_feature_bits[8 /*SEG_LVL_MAX*/] = { 8, 6, 6, 6, 6, 3, 0, 0 };
544                const int av1_segmentation_feature_signed[8 /*SEG_LVL_MAX*/] = { 1, 1, 1, 1, 1, 0, 0, 0 };
545 
546                for (int i = 0; i < 8 /*MAX_SEGMENTS*/; i++) {
547                   for (int j = 0; j < 8 /*SEG_LVL_MAX*/; j++) {
548                      bool feature_enabled =
549                         ((static_cast<UINT>(1 << j) & static_cast<UINT>(pPicHdr->segmentation_config.SegmentsData[i].EnabledFeatures)) != 0);
550                      pBit->put_bits(1, feature_enabled ? 1 : 0);   // feature_enabled	f(1)
551 
552                      if (feature_enabled) {
553                         int bitsToRead = av1_segmentation_feature_bits[j];
554                         if (av1_segmentation_feature_signed[j] == 1) {
555                            pBit->put_su_bits(
556                               1 + bitsToRead,
557                               pPicHdr->segmentation_config.SegmentsData[i].FeatureValue[j]);   // su(1+bitsToRead)
558                         } else {
559                            pBit->put_bits(
560                               bitsToRead,
561                               pPicHdr->segmentation_config.SegmentsData[i].FeatureValue[j]);   // f(bitsToRead)
562                         }
563                      }
564                   }
565                }
566             }
567          }
568       }
569 
570       // delta_q_params()
571       // combined with delta_lf_params()
572       {
573          if (pPicHdr->quantization_params.BaseQIndex)
574             pBit->put_bits(1, pPicHdr->delta_q_params.DeltaQPresent);   // delta_q_present
575          if (pPicHdr->delta_q_params.DeltaQPresent) {
576             pBit->put_bits(2, pPicHdr->delta_q_params.DeltaQRes);   // delta_q_res
577 
578             // delta_lf_params()
579             if (!pPicHdr->allow_intrabc) {
580                pBit->put_bits(1, pPicHdr->delta_lf_params.DeltaLFPresent);   // delta_lf_present
581                if (pPicHdr->delta_lf_params.DeltaLFPresent) {
582                   pBit->put_bits(2, pPicHdr->delta_lf_params.DeltaLFRes);     // delta_lf_res
583                   pBit->put_bits(1, pPicHdr->delta_lf_params.DeltaLFMulti);   // delta_lf_multi
584                }
585             }
586          }
587       }
588 
589       constexpr bool CodedLossless = false;   // CodedLossless default 0
590       constexpr bool AllLossless = false;     // AllLossless default 0
591       // loop_filter_params()
592       {
593          if (!(CodedLossless || pPicHdr->allow_intrabc)) {
594             pBit->put_bits(6, pPicHdr->loop_filter_params.LoopFilterLevel[0]);   // loop_filter_level[0]
595             pBit->put_bits(6, pPicHdr->loop_filter_params.LoopFilterLevel[1]);   // loop_filter_level[1]
596 
597             if (pPicHdr->loop_filter_params.LoopFilterLevel[0] || pPicHdr->loop_filter_params.LoopFilterLevel[1]) {
598                pBit->put_bits(6, pPicHdr->loop_filter_params.LoopFilterLevelU);   // loop_filter_level[2]
599                pBit->put_bits(6, pPicHdr->loop_filter_params.LoopFilterLevelV);   // loop_filter_level[3]
600             }
601 
602             pBit->put_bits(3, pPicHdr->loop_filter_params.LoopFilterSharpnessLevel);   // loop_filter_sharpness
603             pBit->put_bits(1, pPicHdr->loop_filter_params.LoopFilterDeltaEnabled);     // loop_filter_delta_enabled
604 
605             if (pPicHdr->loop_filter_params.LoopFilterDeltaEnabled) {
606                bool loop_filter_delta_update =
607                   (pPicHdr->loop_filter_params.UpdateRefDelta || pPicHdr->loop_filter_params.UpdateModeDelta);
608                pBit->put_bits(1, loop_filter_delta_update);   // loop_filter_delta_update
609                if (loop_filter_delta_update) {
610                   constexpr uint8_t TOTAL_REFS_PER_FRAME = 8;   // From AV1 spec
611                   static_assert(ARRAY_SIZE(pPicHdr->loop_filter_params.RefDeltas) == TOTAL_REFS_PER_FRAME);
612                   for (uint8_t i = 0; i < TOTAL_REFS_PER_FRAME; i++) {
613                      pBit->put_bits(1, pPicHdr->loop_filter_params.UpdateRefDelta);   // loop_filter_delta_update
614                      if (pPicHdr->loop_filter_params.UpdateRefDelta) {
615                         pBit->put_su_bits(7, pPicHdr->loop_filter_params.RefDeltas[i]);   // loop_filter_ref_deltas[i]
616                      }
617                   }
618 
619                   static_assert(ARRAY_SIZE(pPicHdr->loop_filter_params.ModeDeltas) == 2);   // From AV1 spec
620                   for (uint8_t i = 0; i < 2; i++) {
621                      pBit->put_bits(1, pPicHdr->loop_filter_params.UpdateModeDelta);   // update_mode_delta
622                      if (pPicHdr->loop_filter_params.UpdateModeDelta) {
623                         pBit->put_su_bits(7, pPicHdr->loop_filter_params.ModeDeltas[i]);   // loop_filter_mode_deltas[i]
624                      }
625                   }
626                }
627             }
628          }
629       }
630 
631       // cdef_params()
632       {
633          if (!(!pSeqHdr->enable_cdef || CodedLossless || pPicHdr->allow_intrabc)) {
634             uint16_t num_planes = 3;                                     // mono_chrome not supported
635             pBit->put_bits(2, pPicHdr->cdef_params.CdefDampingMinus3);   // cdef_damping_minus_3
636             pBit->put_bits(2, pPicHdr->cdef_params.CdefBits);            // cdef_bits
637             for (uint16_t i = 0; i < (1 << pPicHdr->cdef_params.CdefBits); ++i) {
638                pBit->put_bits(4, pPicHdr->cdef_params.CdefYPriStrength[i]);   // cdef_y_pri_strength[i]
639                pBit->put_bits(2, pPicHdr->cdef_params.CdefYSecStrength[i]);   // cdef_y_sec_strength[i]
640                if (num_planes > 1) {
641                   pBit->put_bits(4, pPicHdr->cdef_params.CdefUVPriStrength[i]);   // cdef_uv_pri_strength[i]
642                   pBit->put_bits(2, pPicHdr->cdef_params.CdefUVSecStrength[i]);   // cdef_uv_sec_strength[i]
643                }
644             }
645          }
646       }
647 
648       // lr_params()
649       {
650          if (!(AllLossless || pPicHdr->allow_intrabc || !pSeqHdr->enable_restoration)) {
651             bool uses_lr = false;
652             bool uses_chroma_lr = false;
653             for (int i = 0; i < 3 /*MaxNumPlanes*/; i++) {
654                pBit->put_bits(2, pPicHdr->lr_params.lr_type[i]);
655                if (pPicHdr->lr_params.lr_type[i] != D3D12_VIDEO_ENCODER_AV1_RESTORATION_TYPE_DISABLED) {
656                   uses_lr = true;
657                   if (i > 0)
658                      uses_chroma_lr = true;
659                }
660             }
661 
662             if (uses_lr) {
663                pBit->put_bits(1, pPicHdr->lr_params.lr_unit_shift);
664 
665                if (!pSeqHdr->use_128x128_superblock && pPicHdr->lr_params.lr_unit_shift) {
666                   pBit->put_bits(1, pPicHdr->lr_params.lr_unit_extra_shift);
667                }
668 
669                if (pSeqHdr->color_config.subsampling_x && pSeqHdr->color_config.subsampling_y && uses_chroma_lr) {
670                   pBit->put_bits(1, pPicHdr->lr_params.lr_uv_shift);
671                }
672             }
673          }
674       }
675 
676       // read_tx_mode()
677       {
678          const uint8_t tx_mode_select = (pPicHdr->TxMode == D3D12_VIDEO_ENCODER_AV1_TX_MODE_SELECT) ? 1 : 0;
679          if (!CodedLossless)
680             pBit->put_bits(1, tx_mode_select);   // tx_mode_select
681       }
682 
683       // frame_reference_mode()
684       {
685          if (!FrameIsIntra)
686             pBit->put_bits(1, pPicHdr->reference_select);   // reference_select
687       }
688 
689       // skip_mode_params()
690       {
691          uint8_t skipModeAllowed = 0;
692          if (!(FrameIsIntra || !pPicHdr->reference_select || !pSeqHdr->enable_order_hint)) {
693             int forwardIdx = -1;
694             int backwardIdx = -1;
695             int forwardHint = 0;
696             int backwardHint = 0;
697             for (int i = 0; i < 7 /*REFS_PER_FRAME*/; i++) {
698                uint32_t refHint = pPicHdr->ref_order_hint[pPicHdr->ref_frame_idx[i]];
699                if (get_relative_dist(refHint,
700                                      pPicHdr->order_hint,
701                                      pSeqHdr->order_hint_bits_minus1 + 1,
702                                      pSeqHdr->enable_order_hint) < 0) {
703                   if (forwardIdx < 0 || get_relative_dist(refHint,
704                                                           forwardHint,
705                                                           pSeqHdr->order_hint_bits_minus1 + 1,
706                                                           pSeqHdr->enable_order_hint) > 0) {
707                      forwardIdx = i;
708                      forwardHint = refHint;
709                   }
710                } else if (get_relative_dist(refHint,
711                                             pPicHdr->order_hint,
712                                             pSeqHdr->order_hint_bits_minus1 + 1,
713                                             pSeqHdr->enable_order_hint) > 0) {
714                   if (backwardIdx < 0 || get_relative_dist(refHint,
715                                                            backwardHint,
716                                                            pSeqHdr->order_hint_bits_minus1 + 1,
717                                                            pSeqHdr->enable_order_hint) < 0) {
718                      backwardIdx = i;
719                      backwardHint = refHint;
720                   }
721                }
722             }
723             if (forwardIdx < 0) {
724                skipModeAllowed = 0;
725             } else if (backwardIdx >= 0) {
726                skipModeAllowed = 1;
727             } else {
728                int secondForwardIdx = -1;
729                int secondForwardHint = 0;
730                for (int i = 0; i < 7 /*REFS_PER_FRAME*/; i++) {
731                   uint32_t refHint = pPicHdr->ref_order_hint[pPicHdr->ref_frame_idx[i]];
732                   if (get_relative_dist(refHint,
733                                         forwardHint,
734                                         pSeqHdr->order_hint_bits_minus1 + 1,
735                                         pSeqHdr->enable_order_hint) < 0) {
736                      if (secondForwardIdx < 0 || get_relative_dist(refHint,
737                                                                    secondForwardHint,
738                                                                    pSeqHdr->order_hint_bits_minus1 + 1,
739                                                                    pSeqHdr->enable_order_hint) > 0) {
740                         secondForwardIdx = i;
741                         secondForwardHint = refHint;
742                      }
743                   }
744                }
745                if (secondForwardIdx < 0) {
746                   skipModeAllowed = 0;
747                } else {
748                   skipModeAllowed = 1;
749                }
750             }
751          }
752 
753          if (skipModeAllowed)
754             pBit->put_bits(1, pPicHdr->skip_mode_present);   // skip_mode_present
755 
756          if (!(FrameIsIntra || pPicHdr->error_resilient_mode || !pSeqHdr->enable_warped_motion)) {
757             pBit->put_bits(1, pPicHdr->allow_warped_motion);   // allow_warped_motion
758          }
759       }
760 
761       pBit->put_bits(1, pPicHdr->reduced_tx_set);   // reduced_tx_set
762 
763       // global_motion_params()
764       {
765          if (!FrameIsIntra) {
766             for (uint8_t i = 0; i < 7; i++) {
767                pBit->put_bits(1, 0);   // is_global[7]
768                // Unimplemented: Enable global_motion_params with ref_global_motion_info
769                assert(pPicHdr->ref_global_motion_info[i].TransformationType ==
770                       D3D12_VIDEO_ENCODER_AV1_REFERENCE_WARPED_MOTION_TRANSFORMATION_IDENTITY);
771             }
772          }
773       }
774 
775       // film_grain_params()
776       // constexpr uint8_t film_grain_params_present = 0; // film_grain_params_present default 0
777       // {
778       // if (!(!film_grain_params_present || (!pPicHdr->show_frame && !pPicHdr->showable_frame))
779       // ... this will be unreachable as film_grain_params_present is zero.
780       // }
781    }
782 }
783 
784 void
write_frame_header(const av1_seq_header_t * pSeqHdr,const av1_pic_header_t * pPicHdr,av1_obutype_t frame_pack_type,size_t extra_obu_size_bytes,std::vector<uint8_t> & headerBitstream,std::vector<uint8_t>::iterator placingPositionStart,size_t & writtenBytes)785 d3d12_video_bitstream_builder_av1::write_frame_header(const av1_seq_header_t *pSeqHdr,
786                                                       const av1_pic_header_t *pPicHdr,
787                                                       av1_obutype_t frame_pack_type,
788                                                       size_t extra_obu_size_bytes,
789                                                       std::vector<uint8_t> &headerBitstream,
790                                                       std::vector<uint8_t>::iterator placingPositionStart,
791                                                       size_t &writtenBytes)
792 {
793    assert((frame_pack_type == OBU_FRAME) || (frame_pack_type == OBU_FRAME_HEADER));
794    auto startByteOffset = std::distance(headerBitstream.begin(), placingPositionStart);
795    if (headerBitstream.size() < (startByteOffset + c_DefaultBitstreamBufSize))
796       headerBitstream.resize(startByteOffset + c_DefaultBitstreamBufSize);
797 
798    d3d12_video_encoder_bitstream bitstream_full_obu;
799    bitstream_full_obu.setup_bitstream(headerBitstream.size(), headerBitstream.data(), startByteOffset);
800 
801    // to handle variable length we first write the content
802    // and later the obu header and concatenate both bitstreams
803    d3d12_video_encoder_bitstream bitstream_pic;
804    bitstream_pic.create_bitstream(c_DefaultBitstreamBufSize);
805 
806    {
807       // Write frame_header_obu()
808       write_pic_data(&bitstream_pic, pSeqHdr, pPicHdr);
809 
810       debug_printf("frame_header_obu() bytes (without OBU_FRAME nor OBU_FRAME_HEADER alignment padding): %" PRId32 "\n",
811                    bitstream_pic.get_byte_count());   // May be bit unaligned at this point (see padding below)
812       debug_printf("extra_obu_size_bytes (ie. tile_group_obu_size if writing OBU_FRAME ): %" PRIu64 "\n",
813                    static_cast<uint64_t>(extra_obu_size_bytes));
814 
815       // Write the obu_header
816       constexpr uint32_t obu_extension_flag = 0;
817       constexpr uint32_t temporal_id = 0;
818       constexpr uint32_t spatial_id = 0;
819       write_obu_header(&bitstream_full_obu, frame_pack_type, obu_extension_flag, temporal_id, spatial_id);
820 
821       if (frame_pack_type == OBU_FRAME) {
822          // Required byte_alignment() in frame_obu() after frame_header_obu()
823          bitstream_pic.put_aligning_bits();
824          debug_printf("Adding byte_alignment() after frame_header_obu() for OBU_FRAME\n");
825       } else if (frame_pack_type == OBU_FRAME_HEADER) {
826          // whole open_bitstream_unit() for OBU_FRAME_HEADER
827          // required in open_bitstream_unit () for OBU_FRAME_HEADER
828          bitstream_pic.put_trailing_bits();
829          debug_printf("Adding trailing_bits() after frame_header_obu() for OBU_FRAME\n");
830          assert(extra_obu_size_bytes == 0);
831       }
832 
833       bitstream_pic.flush();
834 
835       // Write the obu_size element
836       const uint64_t obu_size_in_bytes = bitstream_pic.get_byte_count() + extra_obu_size_bytes;
837       debug_printf("obu_size: %" PRIu64 "\n", obu_size_in_bytes);
838       pack_obu_header_size(&bitstream_full_obu, obu_size_in_bytes);
839 
840       bitstream_full_obu.flush();
841 
842       // bitstream_full_obu has external buffer allocation and
843       // append_bitstream deep copies bitstream_pic, so it's okay
844       // for RAII of bitstream_pic to be deallocated out of scope
845       bitstream_full_obu.append_byte_stream(&bitstream_pic);
846    }
847 
848    bitstream_full_obu.flush();
849 
850    // Shrink headerBitstream to fit
851    writtenBytes = bitstream_full_obu.get_byte_count() - startByteOffset;
852    headerBitstream.resize(writtenBytes + startByteOffset);
853 }
854 
855 void
calculate_tile_group_obu_size(const D3D12_VIDEO_ENCODER_OUTPUT_METADATA * pParsedMetadata,const D3D12_VIDEO_ENCODER_FRAME_SUBREGION_METADATA * pFrameSubregionMetadata,size_t TileSizeBytes,const D3D12_VIDEO_ENCODER_AV1_PICTURE_CONTROL_SUBREGIONS_LAYOUT_DATA_TILES & TilesPartition,const av1_tile_group_t & tileGroup,size_t & tile_group_obu_size,size_t & decode_tile_elements_size)856 d3d12_video_bitstream_builder_av1::calculate_tile_group_obu_size(
857    const D3D12_VIDEO_ENCODER_OUTPUT_METADATA *pParsedMetadata,
858    const D3D12_VIDEO_ENCODER_FRAME_SUBREGION_METADATA *pFrameSubregionMetadata,
859    size_t TileSizeBytes,   // Pass already +1'd from TileSizeBytesMinus1
860    const D3D12_VIDEO_ENCODER_AV1_PICTURE_CONTROL_SUBREGIONS_LAYOUT_DATA_TILES &TilesPartition,
861    const av1_tile_group_t &tileGroup,
862    size_t &tile_group_obu_size,
863    size_t &decode_tile_elements_size)
864 {
865    size_t tile_group_obu_size_bits = 0;
866 
867    uint8_t NumTiles = TilesPartition.ColCount * TilesPartition.RowCount;
868    if (NumTiles > 1)
869       tile_group_obu_size_bits++;   // tile_start_and_end_present_flag	f(1)
870 
871    bool tile_start_and_end_present_flag = !(tileGroup.tg_start == 0 && (tileGroup.tg_end == (NumTiles - 1)));
872    if (!(NumTiles == 1 || !tile_start_and_end_present_flag)) {
873       uint8_t tileBits = tile_log2(1, TilesPartition.ColCount) + tile_log2(1, TilesPartition.RowCount);
874       tile_group_obu_size_bits += tileBits;   // tg_start	f(tileBits)
875       tile_group_obu_size_bits += tileBits;   // tg_end	   f(tileBits)
876    }
877 
878    while (tile_group_obu_size_bits & 7)   // byte_alignment()
879       tile_group_obu_size_bits++;
880 
881    decode_tile_elements_size = 0;
882    for (UINT64 TileIdx = tileGroup.tg_start; TileIdx <= tileGroup.tg_end; TileIdx++) {
883       // tile_size_minus_1	not coded for last tile
884       if ((TileIdx != tileGroup.tg_end))
885          tile_group_obu_size_bits += (TileSizeBytes * 8);   // tile_size_minus_1	le(TileSizeBytes)
886 
887       size_t tile_effective_bytes_size =
888          static_cast<size_t>(pFrameSubregionMetadata[TileIdx].bSize - pFrameSubregionMetadata[TileIdx].bStartOffset);
889       decode_tile_elements_size += tile_effective_bytes_size;
890       tile_group_obu_size_bits += (tile_effective_bytes_size * 8);
891    }
892 
893    assert((tile_group_obu_size_bits % 8) == 0);
894    tile_group_obu_size = (tile_group_obu_size_bits / 8);
895 }
896 
897 void
write_obu_tile_group_header(size_t tile_group_obu_size,std::vector<uint8_t> & headerBitstream,std::vector<uint8_t>::iterator placingPositionStart,size_t & writtenBytes)898 d3d12_video_bitstream_builder_av1::write_obu_tile_group_header(size_t tile_group_obu_size,
899                                                                std::vector<uint8_t> &headerBitstream,
900                                                                std::vector<uint8_t>::iterator placingPositionStart,
901                                                                size_t &writtenBytes)
902 {
903    auto startByteOffset = std::distance(headerBitstream.begin(), placingPositionStart);
904    if (headerBitstream.size() < (startByteOffset + c_DefaultBitstreamBufSize))
905       headerBitstream.resize(startByteOffset + c_DefaultBitstreamBufSize);
906 
907    d3d12_video_encoder_bitstream bitstream_full_obu;
908    bitstream_full_obu.setup_bitstream(headerBitstream.size(), headerBitstream.data(), startByteOffset);
909 
910    // Write the obu_header
911    constexpr uint32_t obu_extension_flag = 0;
912    constexpr uint32_t temporal_id = 0;
913    constexpr uint32_t spatial_id = 0;
914    write_obu_header(&bitstream_full_obu, OBU_TILE_GROUP, obu_extension_flag, temporal_id, spatial_id);
915 
916    // tile_group_obu() will be copied by get_feedback from EncodeFrame output
917    // we have to calculate its size anyways using the metadata for the obu_header.
918    // so we just add below the argument tile_group_obu_size informing about the
919    // tile_group_obu() byte size
920    // For OBU_TILE_GROUP there is no padding/alignment requirement so they can be concatenated directly by get_feedback
921 
922    // Write the obu_size element
923    pack_obu_header_size(&bitstream_full_obu, tile_group_obu_size);
924    debug_printf("obu_size: %" PRIu64 "\n", static_cast<uint64_t>(tile_group_obu_size));
925 
926    bitstream_full_obu.flush();
927 
928    // Shrink headerBitstream to fit
929    writtenBytes = bitstream_full_obu.get_byte_count() - startByteOffset;
930    headerBitstream.resize(writtenBytes + startByteOffset);
931 }
932