xref: /aosp_15_r20/external/mesa3d/src/gallium/drivers/radeonsi/radeon_vce.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /**************************************************************************
2  *
3  * Copyright 2013 Advanced Micro Devices, Inc.
4  *
5  * SPDX-License-Identifier: MIT
6  *
7  **************************************************************************/
8 
9 #include "radeon_vce.h"
10 
11 #include "pipe/p_video_codec.h"
12 #include "radeon_video.h"
13 #include "radeonsi/si_pipe.h"
14 #include "util/u_memory.h"
15 #include "util/u_video.h"
16 #include "vl/vl_video_buffer.h"
17 
18 #include <stdio.h>
19 
20 #define FW_40_2_2  ((40 << 24) | (2 << 16) | (2 << 8))
21 #define FW_50_0_1  ((50 << 24) | (0 << 16) | (1 << 8))
22 #define FW_50_1_2  ((50 << 24) | (1 << 16) | (2 << 8))
23 #define FW_50_10_2 ((50 << 24) | (10 << 16) | (2 << 8))
24 #define FW_50_17_3 ((50 << 24) | (17 << 16) | (3 << 8))
25 #define FW_52_0_3  ((52 << 24) | (0 << 16) | (3 << 8))
26 #define FW_52_4_3  ((52 << 24) | (4 << 16) | (3 << 8))
27 #define FW_52_8_3  ((52 << 24) | (8 << 16) | (3 << 8))
28 #define FW_53       (53 << 24)
29 
30 /**
31  * flush commands to the hardware
32  */
flush(struct rvce_encoder * enc,unsigned flags)33 static void flush(struct rvce_encoder *enc, unsigned flags)
34 {
35    enc->ws->cs_flush(&enc->cs, flags, NULL);
36    enc->task_info_idx = 0;
37    enc->bs_idx = 0;
38 }
39 
40 #if 0
41 static void dump_feedback(struct rvce_encoder *enc, struct rvid_buffer *fb)
42 {
43    uint32_t *ptr = enc->ws->buffer_map(fb->res->buf, &enc->cs, PIPE_MAP_READ_WRITE);
44    unsigned i = 0;
45    fprintf(stderr, "\n");
46    fprintf(stderr, "encStatus:\t\t\t%08x\n", ptr[i++]);
47    fprintf(stderr, "encHasBitstream:\t\t%08x\n", ptr[i++]);
48    fprintf(stderr, "encHasAudioBitstream:\t\t%08x\n", ptr[i++]);
49    fprintf(stderr, "encBitstreamOffset:\t\t%08x\n", ptr[i++]);
50    fprintf(stderr, "encBitstreamSize:\t\t%08x\n", ptr[i++]);
51    fprintf(stderr, "encAudioBitstreamOffset:\t%08x\n", ptr[i++]);
52    fprintf(stderr, "encAudioBitstreamSize:\t\t%08x\n", ptr[i++]);
53    fprintf(stderr, "encExtrabytes:\t\t\t%08x\n", ptr[i++]);
54    fprintf(stderr, "encAudioExtrabytes:\t\t%08x\n", ptr[i++]);
55    fprintf(stderr, "videoTimeStamp:\t\t\t%08x\n", ptr[i++]);
56    fprintf(stderr, "audioTimeStamp:\t\t\t%08x\n", ptr[i++]);
57    fprintf(stderr, "videoOutputType:\t\t%08x\n", ptr[i++]);
58    fprintf(stderr, "attributeFlags:\t\t\t%08x\n", ptr[i++]);
59    fprintf(stderr, "seiPrivatePackageOffset:\t%08x\n", ptr[i++]);
60    fprintf(stderr, "seiPrivatePackageSize:\t\t%08x\n", ptr[i++]);
61    fprintf(stderr, "\n");
62    enc->ws->buffer_unmap(fb->res->buf);
63 }
64 #endif
65 
66 /**
67  * reset the CPB handling
68  */
reset_cpb(struct rvce_encoder * enc)69 static void reset_cpb(struct rvce_encoder *enc)
70 {
71    unsigned i;
72 
73    list_inithead(&enc->cpb_slots);
74    for (i = 0; i < enc->cpb_num; ++i) {
75       struct rvce_cpb_slot *slot = &enc->cpb_array[i];
76       slot->index = i;
77       slot->picture_type = PIPE_H2645_ENC_PICTURE_TYPE_SKIP;
78       slot->frame_num = 0;
79       slot->pic_order_cnt = 0;
80       list_addtail(&slot->list, &enc->cpb_slots);
81    }
82 }
83 
84 /**
85  * sort l0 and l1 to the top of the list
86  */
sort_cpb(struct rvce_encoder * enc)87 static void sort_cpb(struct rvce_encoder *enc)
88 {
89    struct rvce_cpb_slot *i, *l0 = NULL, *l1 = NULL;
90 
91    LIST_FOR_EACH_ENTRY (i, &enc->cpb_slots, list) {
92       if (i->frame_num == enc->pic.ref_idx_l0_list[0])
93          l0 = i;
94 
95       if (i->frame_num == enc->pic.ref_idx_l1_list[0])
96          l1 = i;
97 
98       if (enc->pic.picture_type == PIPE_H2645_ENC_PICTURE_TYPE_P && l0)
99          break;
100 
101       if (enc->pic.picture_type == PIPE_H2645_ENC_PICTURE_TYPE_B && l0 && l1)
102          break;
103    }
104 
105    if (l1) {
106       list_del(&l1->list);
107       list_add(&l1->list, &enc->cpb_slots);
108    }
109 
110    if (l0) {
111       list_del(&l0->list);
112       list_add(&l0->list, &enc->cpb_slots);
113    }
114 }
115 
116 /**
117  * get number of cpbs based on dpb
118  */
get_cpb_num(struct rvce_encoder * enc,unsigned level_idc)119 static unsigned get_cpb_num(struct rvce_encoder *enc, unsigned level_idc)
120 {
121    unsigned w = align(enc->base.width, 16) / 16;
122    unsigned h = align(enc->base.height, 16) / 16;
123    unsigned dpb;
124 
125    switch (level_idc) {
126    case 10:
127       dpb = 396;
128       break;
129    case 11:
130       dpb = 900;
131       break;
132    case 12:
133    case 13:
134    case 20:
135       dpb = 2376;
136       break;
137    case 21:
138       dpb = 4752;
139       break;
140    case 22:
141    case 30:
142       dpb = 8100;
143       break;
144    case 31:
145       dpb = 18000;
146       break;
147    case 32:
148       dpb = 20480;
149       break;
150    case 40:
151    case 41:
152       dpb = 32768;
153       break;
154    case 42:
155       dpb = 34816;
156       break;
157    case 50:
158       dpb = 110400;
159       break;
160    default:
161    case 51:
162    case 52:
163       dpb = 184320;
164       break;
165    }
166 
167    return MIN2(dpb / (w * h), 16);
168 }
169 
170 /**
171  * Get the slot for the currently encoded frame
172  */
si_current_slot(struct rvce_encoder * enc)173 struct rvce_cpb_slot *si_current_slot(struct rvce_encoder *enc)
174 {
175    return list_entry(enc->cpb_slots.prev, struct rvce_cpb_slot, list);
176 }
177 
178 /**
179  * Get the slot for L0
180  */
si_l0_slot(struct rvce_encoder * enc)181 struct rvce_cpb_slot *si_l0_slot(struct rvce_encoder *enc)
182 {
183    return list_entry(enc->cpb_slots.next, struct rvce_cpb_slot, list);
184 }
185 
186 /**
187  * Get the slot for L1
188  */
si_l1_slot(struct rvce_encoder * enc)189 struct rvce_cpb_slot *si_l1_slot(struct rvce_encoder *enc)
190 {
191    return list_entry(enc->cpb_slots.next->next, struct rvce_cpb_slot, list);
192 }
193 
194 /**
195  * Calculate the offsets into the CPB
196  */
si_vce_frame_offset(struct rvce_encoder * enc,struct rvce_cpb_slot * slot,signed * luma_offset,signed * chroma_offset)197 void si_vce_frame_offset(struct rvce_encoder *enc, struct rvce_cpb_slot *slot, signed *luma_offset,
198                          signed *chroma_offset)
199 {
200    struct si_screen *sscreen = (struct si_screen *)enc->screen;
201    unsigned pitch, vpitch, fsize;
202 
203    if (sscreen->info.gfx_level < GFX9) {
204       pitch = align(enc->luma->u.legacy.level[0].nblk_x * enc->luma->bpe, 128);
205       vpitch = align(enc->luma->u.legacy.level[0].nblk_y, 16);
206    } else {
207       pitch = align(enc->luma->u.gfx9.surf_pitch * enc->luma->bpe, 256);
208       vpitch = align(enc->luma->u.gfx9.surf_height, 16);
209    }
210    fsize = pitch * (vpitch + vpitch / 2);
211 
212    *luma_offset = slot->index * fsize;
213    *chroma_offset = *luma_offset + pitch * vpitch;
214 }
215 
216 /**
217  * destroy this video encoder
218  */
rvce_destroy(struct pipe_video_codec * encoder)219 static void rvce_destroy(struct pipe_video_codec *encoder)
220 {
221    struct rvce_encoder *enc = (struct rvce_encoder *)encoder;
222    if (enc->stream_handle) {
223       struct rvid_buffer fb;
224       si_vid_create_buffer(enc->screen, &fb, 512, PIPE_USAGE_STAGING);
225       enc->fb = &fb;
226       enc->session(enc);
227       enc->destroy(enc);
228       flush(enc, PIPE_FLUSH_ASYNC);
229       si_vid_destroy_buffer(&fb);
230    }
231    si_vid_destroy_buffer(&enc->cpb);
232    enc->ws->cs_destroy(&enc->cs);
233    FREE(enc->cpb_array);
234    FREE(enc);
235 }
236 
rvce_begin_frame(struct pipe_video_codec * encoder,struct pipe_video_buffer * source,struct pipe_picture_desc * picture)237 static void rvce_begin_frame(struct pipe_video_codec *encoder, struct pipe_video_buffer *source,
238                              struct pipe_picture_desc *picture)
239 {
240    struct rvce_encoder *enc = (struct rvce_encoder *)encoder;
241    struct vl_video_buffer *vid_buf = (struct vl_video_buffer *)source;
242    struct pipe_h264_enc_picture_desc *pic = (struct pipe_h264_enc_picture_desc *)picture;
243 
244    bool need_rate_control =
245       enc->pic.rate_ctrl[0].rate_ctrl_method != pic->rate_ctrl[0].rate_ctrl_method ||
246       enc->pic.quant_i_frames != pic->quant_i_frames ||
247       enc->pic.quant_p_frames != pic->quant_p_frames ||
248       enc->pic.quant_b_frames != pic->quant_b_frames ||
249       enc->pic.rate_ctrl[0].target_bitrate != pic->rate_ctrl[0].target_bitrate ||
250       enc->pic.rate_ctrl[0].frame_rate_num != pic->rate_ctrl[0].frame_rate_num ||
251       enc->pic.rate_ctrl[0].frame_rate_den != pic->rate_ctrl[0].frame_rate_den;
252 
253    enc->pic = *pic;
254    enc->base.max_references = pic->seq.max_num_ref_frames;
255    enc->si_get_pic_param(enc, pic);
256 
257    enc->get_buffer(vid_buf->resources[0], &enc->handle, &enc->luma);
258    enc->get_buffer(vid_buf->resources[1], NULL, &enc->chroma);
259 
260    if (!enc->cpb_num) {
261       struct si_screen *sscreen = (struct si_screen *)encoder->context->screen;
262       unsigned cpb_size;
263 
264       /* TODO enable B frame with dual instance */
265       if ((sscreen->info.family >= CHIP_TONGA) && (enc->base.max_references == 1) &&
266             (sscreen->info.vce_harvest_config == 0))
267          enc->dual_inst = true;
268 
269       enc->cpb_num = get_cpb_num(enc, enc->pic.seq.level_idc);
270       if (!enc->cpb_num)
271          return;
272 
273       enc->cpb_array = CALLOC(enc->cpb_num, sizeof(struct rvce_cpb_slot));
274       if (!enc->cpb_array)
275          return;
276 
277       cpb_size = (sscreen->info.gfx_level < GFX9)
278                     ? align(enc->luma->u.legacy.level[0].nblk_x * enc->luma->bpe, 128) *
279                          align(enc->luma->u.legacy.level[0].nblk_y, 32)
280                     :
281 
282                     align(enc->luma->u.gfx9.surf_pitch * enc->luma->bpe, 256) *
283                        align(enc->luma->u.gfx9.surf_height, 32);
284 
285       cpb_size = cpb_size * 3 / 2;
286       cpb_size = cpb_size * enc->cpb_num;
287       if (enc->dual_pipe)
288          cpb_size += RVCE_MAX_AUX_BUFFER_NUM * RVCE_MAX_BITSTREAM_OUTPUT_ROW_SIZE * 2;
289       if (!si_vid_create_buffer(enc->screen, &enc->cpb, cpb_size, PIPE_USAGE_DEFAULT)) {
290          RVID_ERR("Can't create CPB buffer.\n");
291          return;
292       }
293    }
294 
295    if (pic->picture_type == PIPE_H2645_ENC_PICTURE_TYPE_IDR)
296       reset_cpb(enc);
297    else if (pic->picture_type == PIPE_H2645_ENC_PICTURE_TYPE_P ||
298             pic->picture_type == PIPE_H2645_ENC_PICTURE_TYPE_B)
299       sort_cpb(enc);
300 
301    if (!enc->stream_handle) {
302       struct rvid_buffer fb;
303       enc->stream_handle = si_vid_alloc_stream_handle();
304       si_vid_create_buffer(enc->screen, &fb, 512, PIPE_USAGE_STAGING);
305       enc->fb = &fb;
306       enc->session(enc);
307       enc->create(enc);
308       enc->config(enc);
309       enc->feedback(enc);
310       flush(enc, PIPE_FLUSH_ASYNC);
311       // dump_feedback(enc, &fb);
312       si_vid_destroy_buffer(&fb);
313       need_rate_control = false;
314    }
315 
316    if (need_rate_control) {
317       enc->session(enc);
318       enc->config(enc);
319       flush(enc, PIPE_FLUSH_ASYNC);
320    }
321 }
322 
rvce_encode_bitstream(struct pipe_video_codec * encoder,struct pipe_video_buffer * source,struct pipe_resource * destination,void ** fb)323 static void rvce_encode_bitstream(struct pipe_video_codec *encoder,
324                                   struct pipe_video_buffer *source,
325                                   struct pipe_resource *destination, void **fb)
326 {
327    struct rvce_encoder *enc = (struct rvce_encoder *)encoder;
328    enc->get_buffer(destination, &enc->bs_handle, NULL);
329    enc->bs_size = destination->width0;
330 
331    *fb = enc->fb = CALLOC_STRUCT(rvid_buffer);
332    if (!si_vid_create_buffer(enc->screen, enc->fb, 512, PIPE_USAGE_STAGING)) {
333       RVID_ERR("Can't create feedback buffer.\n");
334       return;
335    }
336    if (!radeon_emitted(&enc->cs, 0))
337       enc->session(enc);
338    enc->encode(enc);
339    enc->feedback(enc);
340 }
341 
rvce_end_frame(struct pipe_video_codec * encoder,struct pipe_video_buffer * source,struct pipe_picture_desc * picture)342 static int rvce_end_frame(struct pipe_video_codec *encoder, struct pipe_video_buffer *source,
343                           struct pipe_picture_desc *picture)
344 {
345    struct rvce_encoder *enc = (struct rvce_encoder *)encoder;
346    struct rvce_cpb_slot *slot = list_entry(enc->cpb_slots.prev, struct rvce_cpb_slot, list);
347 
348    if (!enc->dual_inst || enc->bs_idx > 1)
349       flush(enc, picture->flush_flags);
350 
351    /* update the CPB backtrack with the just encoded frame */
352    slot->picture_type = enc->pic.picture_type;
353    slot->frame_num = enc->pic.frame_num;
354    slot->pic_order_cnt = enc->pic.pic_order_cnt;
355    if (!enc->pic.not_referenced) {
356       list_del(&slot->list);
357       list_add(&slot->list, &enc->cpb_slots);
358    }
359    return 0;
360 }
361 
rvce_get_feedback(struct pipe_video_codec * encoder,void * feedback,unsigned * size,struct pipe_enc_feedback_metadata * metadata)362 static void rvce_get_feedback(struct pipe_video_codec *encoder, void *feedback, unsigned *size,
363                               struct pipe_enc_feedback_metadata* metadata)
364 {
365    struct rvce_encoder *enc = (struct rvce_encoder *)encoder;
366    struct rvid_buffer *fb = feedback;
367 
368    if (size) {
369       uint32_t *ptr = enc->ws->buffer_map(enc->ws, fb->res->buf, &enc->cs,
370                                           PIPE_MAP_READ_WRITE | RADEON_MAP_TEMPORARY);
371 
372       if (ptr[1]) {
373          *size = ptr[4] - ptr[9];
374       } else {
375          *size = 0;
376       }
377 
378       enc->ws->buffer_unmap(enc->ws, fb->res->buf);
379    }
380    // dump_feedback(enc, fb);
381    si_vid_destroy_buffer(fb);
382    FREE(fb);
383 }
384 
rvce_destroy_fence(struct pipe_video_codec * encoder,struct pipe_fence_handle * fence)385 static void rvce_destroy_fence(struct pipe_video_codec *encoder,
386                                struct pipe_fence_handle *fence)
387 {
388    struct rvce_encoder *enc = (struct rvce_encoder *)encoder;
389 
390    enc->ws->fence_reference(enc->ws, &fence, NULL);
391 }
392 
393 /**
394  * flush any outstanding command buffers to the hardware
395  */
rvce_flush(struct pipe_video_codec * encoder)396 static void rvce_flush(struct pipe_video_codec *encoder)
397 {
398    struct rvce_encoder *enc = (struct rvce_encoder *)encoder;
399 
400    flush(enc, PIPE_FLUSH_ASYNC);
401 }
402 
rvce_cs_flush(void * ctx,unsigned flags,struct pipe_fence_handle ** fence)403 static void rvce_cs_flush(void *ctx, unsigned flags, struct pipe_fence_handle **fence)
404 {
405    // just ignored
406 }
407 
si_vce_create_encoder(struct pipe_context * context,const struct pipe_video_codec * templ,struct radeon_winsys * ws,rvce_get_buffer get_buffer)408 struct pipe_video_codec *si_vce_create_encoder(struct pipe_context *context,
409                                                const struct pipe_video_codec *templ,
410                                                struct radeon_winsys *ws, rvce_get_buffer get_buffer)
411 {
412    struct si_screen *sscreen = (struct si_screen *)context->screen;
413    struct si_context *sctx = (struct si_context *)context;
414    struct rvce_encoder *enc;
415 
416    if (!sscreen->info.vce_fw_version) {
417       RVID_ERR("Kernel doesn't supports VCE!\n");
418       return NULL;
419 
420    } else if (!si_vce_is_fw_version_supported(sscreen)) {
421       RVID_ERR("Unsupported VCE fw version loaded!\n");
422       return NULL;
423    }
424 
425    enc = CALLOC_STRUCT(rvce_encoder);
426    if (!enc)
427       return NULL;
428 
429    if (sscreen->info.is_amdgpu)
430       enc->use_vm = true;
431 
432    enc->use_vui = true;
433 
434    if (sscreen->info.family >= CHIP_TONGA && sscreen->info.family != CHIP_STONEY &&
435        sscreen->info.family != CHIP_POLARIS11 && sscreen->info.family != CHIP_POLARIS12 &&
436        sscreen->info.family != CHIP_VEGAM)
437       enc->dual_pipe = true;
438 
439    enc->base = *templ;
440    enc->base.context = context;
441 
442    enc->base.destroy = rvce_destroy;
443    enc->base.begin_frame = rvce_begin_frame;
444    enc->base.encode_bitstream = rvce_encode_bitstream;
445    enc->base.end_frame = rvce_end_frame;
446    enc->base.flush = rvce_flush;
447    enc->base.get_feedback = rvce_get_feedback;
448    enc->base.destroy_fence = rvce_destroy_fence;
449    enc->get_buffer = get_buffer;
450 
451    enc->screen = context->screen;
452    enc->ws = ws;
453 
454    if (!ws->cs_create(&enc->cs, sctx->ctx, AMD_IP_VCE, rvce_cs_flush, enc)) {
455       RVID_ERR("Can't get command submission context.\n");
456       goto error;
457    }
458 
459    switch (sscreen->info.vce_fw_version) {
460    case FW_40_2_2:
461       si_vce_40_2_2_init(enc);
462       break;
463 
464    case FW_50_0_1:
465    case FW_50_1_2:
466    case FW_50_10_2:
467    case FW_50_17_3:
468       si_vce_50_init(enc);
469       break;
470 
471    case FW_52_0_3:
472    case FW_52_4_3:
473    case FW_52_8_3:
474       si_vce_52_init(enc);
475       break;
476 
477    default:
478       if ((sscreen->info.vce_fw_version & (0xff << 24)) >= FW_53) {
479          si_vce_52_init(enc);
480       } else
481          goto error;
482    }
483 
484    return &enc->base;
485 
486 error:
487    enc->ws->cs_destroy(&enc->cs);
488 
489    FREE(enc);
490    return NULL;
491 }
492 
493 /**
494  * check if kernel has the right fw version loaded
495  */
si_vce_is_fw_version_supported(struct si_screen * sscreen)496 bool si_vce_is_fw_version_supported(struct si_screen *sscreen)
497 {
498    switch (sscreen->info.vce_fw_version) {
499    case FW_40_2_2:
500    case FW_50_0_1:
501    case FW_50_1_2:
502    case FW_50_10_2:
503    case FW_50_17_3:
504    case FW_52_0_3:
505    case FW_52_4_3:
506    case FW_52_8_3:
507       return true;
508    default:
509       if ((sscreen->info.vce_fw_version & (0xff << 24)) >= FW_53)
510          return true;
511       else
512          return false;
513    }
514 }
515 
516 /**
517  * Add the buffer as relocation to the current command submission
518  */
si_vce_add_buffer(struct rvce_encoder * enc,struct pb_buffer_lean * buf,unsigned usage,enum radeon_bo_domain domain,signed offset)519 void si_vce_add_buffer(struct rvce_encoder *enc, struct pb_buffer_lean *buf, unsigned usage,
520                        enum radeon_bo_domain domain, signed offset)
521 {
522    int reloc_idx;
523 
524    reloc_idx = enc->ws->cs_add_buffer(&enc->cs, buf, usage | RADEON_USAGE_SYNCHRONIZED, domain);
525    if (enc->use_vm) {
526       uint64_t addr;
527       addr = enc->ws->buffer_get_virtual_address(buf);
528       addr = addr + offset;
529       RVCE_CS(addr >> 32);
530       RVCE_CS(addr);
531    } else {
532       offset += enc->ws->buffer_get_reloc_offset(buf);
533       RVCE_CS(reloc_idx * 4);
534       RVCE_CS(offset);
535    }
536 }
537