xref: /aosp_15_r20/external/mesa3d/src/gallium/drivers/r600/radeon_uvd.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright 2011 Advanced Micro Devices, Inc.
3  * Authors:
4  *	Christian König <[email protected]>
5  * SPDX-License-Identifier: MIT
6  */
7 
8 #include <sys/types.h>
9 #include <assert.h>
10 #include <errno.h>
11 #include <unistd.h>
12 #include <stdio.h>
13 
14 #include "pipe/p_video_codec.h"
15 
16 #include "util/u_memory.h"
17 #include "util/u_video.h"
18 
19 #include "vl/vl_defines.h"
20 #include "vl/vl_mpeg12_decoder.h"
21 
22 #include "r600_pipe_common.h"
23 #include "radeon_video.h"
24 #include "radeon_uvd.h"
25 
26 #define NUM_BUFFERS 4
27 
28 #define NUM_MPEG2_REFS 6
29 #define NUM_H264_REFS 17
30 #define NUM_VC1_REFS 5
31 
32 #define FB_BUFFER_OFFSET 0x1000
33 #define FB_BUFFER_SIZE 2048
34 #define FB_BUFFER_SIZE_TONGA (2048 * 64)
35 #define IT_SCALING_TABLE_SIZE 992
36 #define UVD_SESSION_CONTEXT_SIZE (128 * 1024)
37 
38 /* UVD decoder representation */
39 struct ruvd_decoder {
40 	struct pipe_video_codec		base;
41 
42 	ruvd_set_dtb			set_dtb;
43 
44 	unsigned			stream_handle;
45 	unsigned			stream_type;
46 	unsigned			frame_number;
47 
48 	struct pipe_screen		*screen;
49 	struct radeon_winsys*		ws;
50 	struct radeon_cmdbuf	cs;
51 
52 	unsigned			cur_buffer;
53 
54 	struct rvid_buffer		msg_fb_it_buffers[NUM_BUFFERS];
55 	struct ruvd_msg			*msg;
56 	uint32_t			*fb;
57 	unsigned			fb_size;
58 	uint8_t				*it;
59 
60 	struct rvid_buffer		bs_buffers[NUM_BUFFERS];
61 	void*				bs_ptr;
62 	unsigned			bs_size;
63 
64 	struct rvid_buffer		dpb;
65 	bool				use_legacy;
66 	struct rvid_buffer		ctx;
67 	struct rvid_buffer		sessionctx;
68 	struct {
69 		unsigned 		data0;
70 		unsigned		data1;
71 		unsigned		cmd;
72 		unsigned		cntl;
73 	} reg;
74 };
75 
76 /* flush IB to the hardware */
flush(struct ruvd_decoder * dec,unsigned flags,struct pipe_fence_handle ** fence)77 static int flush(struct ruvd_decoder *dec, unsigned flags,
78 				 struct pipe_fence_handle **fence) {
79 	return dec->ws->cs_flush(&dec->cs, flags, fence);
80 }
81 
82 /* add a new set register command to the IB */
set_reg(struct ruvd_decoder * dec,unsigned reg,uint32_t val)83 static void set_reg(struct ruvd_decoder *dec, unsigned reg, uint32_t val)
84 {
85 	radeon_emit(&dec->cs, RUVD_PKT0(reg >> 2, 0));
86 	radeon_emit(&dec->cs, val);
87 }
88 
89 /* send a command to the VCPU through the GPCOM registers */
send_cmd(struct ruvd_decoder * dec,unsigned cmd,struct pb_buffer_lean * buf,uint32_t off,unsigned usage,enum radeon_bo_domain domain)90 static void send_cmd(struct ruvd_decoder *dec, unsigned cmd,
91 		     struct pb_buffer_lean* buf, uint32_t off,
92 		     unsigned usage, enum radeon_bo_domain domain)
93 {
94 	int reloc_idx;
95 
96 	reloc_idx = dec->ws->cs_add_buffer(&dec->cs, buf, usage | RADEON_USAGE_SYNCHRONIZED,
97 					   domain);
98 	if (!dec->use_legacy) {
99 		uint64_t addr;
100 		addr = dec->ws->buffer_get_virtual_address(buf);
101 		addr = addr + off;
102 		set_reg(dec, dec->reg.data0, addr);
103 		set_reg(dec, dec->reg.data1, addr >> 32);
104 	} else {
105 		off += dec->ws->buffer_get_reloc_offset(buf);
106 		set_reg(dec, RUVD_GPCOM_VCPU_DATA0, off);
107 		set_reg(dec, RUVD_GPCOM_VCPU_DATA1, reloc_idx * 4);
108 	}
109 	set_reg(dec, dec->reg.cmd, cmd << 1);
110 }
111 
112 /* do the codec needs an IT buffer ?*/
have_it(struct ruvd_decoder * dec)113 static bool have_it(struct ruvd_decoder *dec)
114 {
115 	return dec->stream_type == RUVD_CODEC_H264_PERF ||
116 		dec->stream_type == RUVD_CODEC_H265;
117 }
118 
119 /* map the next available message/feedback/itscaling buffer */
map_msg_fb_it_buf(struct ruvd_decoder * dec)120 static void map_msg_fb_it_buf(struct ruvd_decoder *dec)
121 {
122 	struct rvid_buffer* buf;
123 	uint8_t *ptr;
124 
125 	/* grab the current message/feedback buffer */
126 	buf = &dec->msg_fb_it_buffers[dec->cur_buffer];
127 
128 	/* and map it for CPU access */
129 	ptr = dec->ws->buffer_map(dec->ws, buf->res->buf, &dec->cs,
130                                   PIPE_MAP_WRITE | RADEON_MAP_TEMPORARY);
131 
132 	/* calc buffer offsets */
133 	dec->msg = (struct ruvd_msg *)ptr;
134 	memset(dec->msg, 0, sizeof(*dec->msg));
135 
136 	dec->fb = (uint32_t *)(ptr + FB_BUFFER_OFFSET);
137 	if (have_it(dec))
138 		dec->it = (uint8_t *)(ptr + FB_BUFFER_OFFSET + dec->fb_size);
139 }
140 
141 /* unmap and send a message command to the VCPU */
send_msg_buf(struct ruvd_decoder * dec)142 static void send_msg_buf(struct ruvd_decoder *dec)
143 {
144 	struct rvid_buffer* buf;
145 
146 	/* ignore the request if message/feedback buffer isn't mapped */
147 	if (!dec->msg || !dec->fb)
148 		return;
149 
150 	/* grab the current message buffer */
151 	buf = &dec->msg_fb_it_buffers[dec->cur_buffer];
152 
153 	/* unmap the buffer */
154 	dec->ws->buffer_unmap(dec->ws, buf->res->buf);
155 	dec->bs_ptr = NULL;
156 	dec->msg = NULL;
157 	dec->fb = NULL;
158 	dec->it = NULL;
159 
160 
161 	if (dec->sessionctx.res)
162 		send_cmd(dec, RUVD_CMD_SESSION_CONTEXT_BUFFER,
163 			 dec->sessionctx.res->buf, 0, RADEON_USAGE_READWRITE,
164 			 RADEON_DOMAIN_VRAM);
165 
166 	/* and send it to the hardware */
167 	send_cmd(dec, RUVD_CMD_MSG_BUFFER, buf->res->buf, 0,
168 		 RADEON_USAGE_READ, RADEON_DOMAIN_GTT);
169 }
170 
171 /* cycle to the next set of buffers */
next_buffer(struct ruvd_decoder * dec)172 static void next_buffer(struct ruvd_decoder *dec)
173 {
174 	++dec->cur_buffer;
175 	dec->cur_buffer %= NUM_BUFFERS;
176 }
177 
178 /* convert the profile into something UVD understands */
profile2stream_type(struct ruvd_decoder * dec,unsigned family)179 static uint32_t profile2stream_type(struct ruvd_decoder *dec, unsigned family)
180 {
181 	switch (u_reduce_video_profile(dec->base.profile)) {
182 	case PIPE_VIDEO_FORMAT_MPEG4_AVC:
183 		return RUVD_CODEC_H264;
184 
185 	case PIPE_VIDEO_FORMAT_VC1:
186 		return RUVD_CODEC_VC1;
187 
188 	case PIPE_VIDEO_FORMAT_MPEG12:
189 		return RUVD_CODEC_MPEG2;
190 
191 	case PIPE_VIDEO_FORMAT_MPEG4:
192 		return RUVD_CODEC_MPEG4;
193 
194 	case PIPE_VIDEO_FORMAT_JPEG:
195 		return RUVD_CODEC_MJPEG;
196 
197 	default:
198 		assert(0);
199 		return 0;
200 	}
201 }
202 
203 
get_db_pitch_alignment(struct ruvd_decoder * dec)204 static unsigned get_db_pitch_alignment(struct ruvd_decoder *dec)
205 {
206 	return 16;
207 }
208 
209 /* calculate size of reference picture buffer */
calc_dpb_size(struct ruvd_decoder * dec)210 static unsigned calc_dpb_size(struct ruvd_decoder *dec)
211 {
212 	unsigned width_in_mb, height_in_mb, image_size, dpb_size;
213 
214 	// always align them to MB size for dpb calculation
215 	unsigned width = align(dec->base.width, VL_MACROBLOCK_WIDTH);
216 	unsigned height = align(dec->base.height, VL_MACROBLOCK_HEIGHT);
217 
218 	// always one more for currently decoded picture
219 	unsigned max_references = dec->base.max_references + 1;
220 
221 	// aligned size of a single frame
222 	image_size = align(width, get_db_pitch_alignment(dec)) * height;
223 	image_size += image_size / 2;
224 	image_size = align(image_size, 1024);
225 
226 	// picture width & height in 16 pixel units
227 	width_in_mb = width / VL_MACROBLOCK_WIDTH;
228 	height_in_mb = align(height / VL_MACROBLOCK_HEIGHT, 2);
229 
230 	switch (u_reduce_video_profile(dec->base.profile)) {
231 	case PIPE_VIDEO_FORMAT_MPEG4_AVC: {
232 		if (!dec->use_legacy) {
233 			unsigned fs_in_mb = width_in_mb * height_in_mb;
234 			unsigned alignment = 64, num_dpb_buffer_lean;
235 
236 			if (dec->stream_type == RUVD_CODEC_H264_PERF)
237 				alignment = 256;
238 			switch(dec->base.level) {
239 			case 30:
240 				num_dpb_buffer_lean = 8100 / fs_in_mb;
241 				break;
242 			case 31:
243 				num_dpb_buffer_lean = 18000 / fs_in_mb;
244 				break;
245 			case 32:
246 				num_dpb_buffer_lean = 20480 / fs_in_mb;
247 				break;
248 			case 41:
249 				num_dpb_buffer_lean = 32768 / fs_in_mb;
250 				break;
251 			case 42:
252 				num_dpb_buffer_lean = 34816 / fs_in_mb;
253 				break;
254 			case 50:
255 				num_dpb_buffer_lean = 110400 / fs_in_mb;
256 				break;
257 			case 51:
258 				num_dpb_buffer_lean = 184320 / fs_in_mb;
259 				break;
260 			default:
261 				num_dpb_buffer_lean = 184320 / fs_in_mb;
262 				break;
263 			}
264 			num_dpb_buffer_lean++;
265 			max_references = MAX2(MIN2(NUM_H264_REFS, num_dpb_buffer_lean), max_references);
266 			dpb_size = image_size * max_references;
267 			if ((dec->stream_type != RUVD_CODEC_H264_PERF)) {
268 				dpb_size += max_references * align(width_in_mb * height_in_mb  * 192, alignment);
269 				dpb_size += align(width_in_mb * height_in_mb * 32, alignment);
270 			}
271 		} else {
272 			// the firmware seems to always assume a minimum of ref frames
273 			max_references = MAX2(NUM_H264_REFS, max_references);
274 			// reference picture buffer
275 			dpb_size = image_size * max_references;
276 			if ((dec->stream_type != RUVD_CODEC_H264_PERF)) {
277 				// macroblock context buffer
278 				dpb_size += width_in_mb * height_in_mb * max_references * 192;
279 				// IT surface buffer
280 				dpb_size += width_in_mb * height_in_mb * 32;
281 			}
282 		}
283 		break;
284 	}
285 
286 	case PIPE_VIDEO_FORMAT_VC1:
287 		// the firmware seems to always assume a minimum of ref frames
288 		max_references = MAX2(NUM_VC1_REFS, max_references);
289 
290 		// reference picture buffer
291 		dpb_size = image_size * max_references;
292 
293 		// CONTEXT_BUFFER
294 		dpb_size += width_in_mb * height_in_mb * 128;
295 
296 		// IT surface buffer
297 		dpb_size += width_in_mb * 64;
298 
299 		// DB surface buffer
300 		dpb_size += width_in_mb * 128;
301 
302 		// BP
303 		dpb_size += align(MAX2(width_in_mb, height_in_mb) * 7 * 16, 64);
304 		break;
305 
306 	case PIPE_VIDEO_FORMAT_MPEG12:
307 		// reference picture buffer, must be big enough for all frames
308 		dpb_size = image_size * NUM_MPEG2_REFS;
309 		break;
310 
311 	case PIPE_VIDEO_FORMAT_MPEG4:
312 		// reference picture buffer
313 		dpb_size = image_size * max_references;
314 
315 		// CM
316 		dpb_size += width_in_mb * height_in_mb * 64;
317 
318 		// IT surface buffer
319 		dpb_size += align(width_in_mb * height_in_mb * 32, 64);
320 
321 		dpb_size = MAX2(dpb_size, 30 * 1024 * 1024);
322 		break;
323 
324 	case PIPE_VIDEO_FORMAT_JPEG:
325 		dpb_size = 0;
326 		break;
327 
328 	default:
329 		// something is missing here
330 		assert(0);
331 
332 		// at least use a sane default value
333 		dpb_size = 32 * 1024 * 1024;
334 		break;
335 	}
336 	return dpb_size;
337 }
338 
339 /* free associated data in the video buffer callback */
ruvd_destroy_associated_data(void * data)340 static void ruvd_destroy_associated_data(void *data)
341 {
342 	/* NOOP, since we only use an intptr */
343 }
344 
345 /* get h264 specific message bits */
get_h264_msg(struct ruvd_decoder * dec,struct pipe_h264_picture_desc * pic)346 static struct ruvd_h264 get_h264_msg(struct ruvd_decoder *dec, struct pipe_h264_picture_desc *pic)
347 {
348 	struct ruvd_h264 result;
349 
350 	memset(&result, 0, sizeof(result));
351 	switch (pic->base.profile) {
352 	case PIPE_VIDEO_PROFILE_MPEG4_AVC_BASELINE:
353 	case PIPE_VIDEO_PROFILE_MPEG4_AVC_CONSTRAINED_BASELINE:
354 		result.profile = RUVD_H264_PROFILE_BASELINE;
355 		break;
356 
357 	case PIPE_VIDEO_PROFILE_MPEG4_AVC_MAIN:
358 		result.profile = RUVD_H264_PROFILE_MAIN;
359 		break;
360 
361 	case PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH:
362 		result.profile = RUVD_H264_PROFILE_HIGH;
363 		break;
364 
365 	default:
366 		assert(0);
367 		break;
368 	}
369 
370 	result.level = dec->base.level;
371 
372 	result.sps_info_flags = 0;
373 	result.sps_info_flags |= pic->pps->sps->direct_8x8_inference_flag << 0;
374 	result.sps_info_flags |= pic->pps->sps->mb_adaptive_frame_field_flag << 1;
375 	result.sps_info_flags |= pic->pps->sps->frame_mbs_only_flag << 2;
376 	result.sps_info_flags |= pic->pps->sps->delta_pic_order_always_zero_flag << 3;
377 
378 	result.bit_depth_luma_minus8 = pic->pps->sps->bit_depth_luma_minus8;
379 	result.bit_depth_chroma_minus8 = pic->pps->sps->bit_depth_chroma_minus8;
380 	result.log2_max_frame_num_minus4 = pic->pps->sps->log2_max_frame_num_minus4;
381 	result.pic_order_cnt_type = pic->pps->sps->pic_order_cnt_type;
382 	result.log2_max_pic_order_cnt_lsb_minus4 = pic->pps->sps->log2_max_pic_order_cnt_lsb_minus4;
383 
384 	switch (dec->base.chroma_format) {
385 	case PIPE_VIDEO_CHROMA_FORMAT_NONE:
386 		/* TODO: assert? */
387 		break;
388 	case PIPE_VIDEO_CHROMA_FORMAT_400:
389 		result.chroma_format = 0;
390 		break;
391 	case PIPE_VIDEO_CHROMA_FORMAT_420:
392 		result.chroma_format = 1;
393 		break;
394 	case PIPE_VIDEO_CHROMA_FORMAT_422:
395 		result.chroma_format = 2;
396 		break;
397 	case PIPE_VIDEO_CHROMA_FORMAT_444:
398 		result.chroma_format = 3;
399 		break;
400 	case PIPE_VIDEO_CHROMA_FORMAT_440:
401 		result.chroma_format = 4;
402 		break;
403 	}
404 
405 	result.pps_info_flags = 0;
406 	result.pps_info_flags |= pic->pps->transform_8x8_mode_flag << 0;
407 	result.pps_info_flags |= pic->pps->redundant_pic_cnt_present_flag << 1;
408 	result.pps_info_flags |= pic->pps->constrained_intra_pred_flag << 2;
409 	result.pps_info_flags |= pic->pps->deblocking_filter_control_present_flag << 3;
410 	result.pps_info_flags |= pic->pps->weighted_bipred_idc << 4;
411 	result.pps_info_flags |= pic->pps->weighted_pred_flag << 6;
412 	result.pps_info_flags |= pic->pps->bottom_field_pic_order_in_frame_present_flag << 7;
413 	result.pps_info_flags |= pic->pps->entropy_coding_mode_flag << 8;
414 
415 	result.num_slice_groups_minus1 = pic->pps->num_slice_groups_minus1;
416 	result.slice_group_map_type = pic->pps->slice_group_map_type;
417 	result.slice_group_change_rate_minus1 = pic->pps->slice_group_change_rate_minus1;
418 	result.pic_init_qp_minus26 = pic->pps->pic_init_qp_minus26;
419 	result.chroma_qp_index_offset = pic->pps->chroma_qp_index_offset;
420 	result.second_chroma_qp_index_offset = pic->pps->second_chroma_qp_index_offset;
421 
422 	memcpy(result.scaling_list_4x4, pic->pps->ScalingList4x4, 6*16);
423 	memcpy(result.scaling_list_8x8, pic->pps->ScalingList8x8, 2*64);
424 
425 	if (dec->stream_type == RUVD_CODEC_H264_PERF) {
426 		memcpy(dec->it, result.scaling_list_4x4, 6*16);
427 		memcpy((dec->it + 96), result.scaling_list_8x8, 2*64);
428 	}
429 
430 	result.num_ref_frames = pic->num_ref_frames;
431 
432 	result.num_ref_idx_l0_active_minus1 = pic->num_ref_idx_l0_active_minus1;
433 	result.num_ref_idx_l1_active_minus1 = pic->num_ref_idx_l1_active_minus1;
434 
435 	result.frame_num = pic->frame_num;
436 	memcpy(result.frame_num_list, pic->frame_num_list, 4*16);
437 	result.curr_field_order_cnt_list[0] = pic->field_order_cnt[0];
438 	result.curr_field_order_cnt_list[1] = pic->field_order_cnt[1];
439 	memcpy(result.field_order_cnt_list, pic->field_order_cnt_list, 4*16*2);
440 
441 	result.decoded_pic_idx = pic->frame_num;
442 
443 	return result;
444 }
445 
446 /* get vc1 specific message bits */
get_vc1_msg(struct pipe_vc1_picture_desc * pic)447 static struct ruvd_vc1 get_vc1_msg(struct pipe_vc1_picture_desc *pic)
448 {
449 	struct ruvd_vc1 result;
450 
451 	memset(&result, 0, sizeof(result));
452 
453 	switch(pic->base.profile) {
454 	case PIPE_VIDEO_PROFILE_VC1_SIMPLE:
455 		result.profile = RUVD_VC1_PROFILE_SIMPLE;
456 		result.level = 1;
457 		break;
458 
459 	case PIPE_VIDEO_PROFILE_VC1_MAIN:
460 		result.profile = RUVD_VC1_PROFILE_MAIN;
461 		result.level = 2;
462 		break;
463 
464 	case PIPE_VIDEO_PROFILE_VC1_ADVANCED:
465 		result.profile = RUVD_VC1_PROFILE_ADVANCED;
466 		result.level = 4;
467 		break;
468 
469 	default:
470 		assert(0);
471 	}
472 
473 	/* fields common for all profiles */
474 	result.sps_info_flags |= pic->postprocflag << 7;
475 	result.sps_info_flags |= pic->pulldown << 6;
476 	result.sps_info_flags |= pic->interlace << 5;
477 	result.sps_info_flags |= pic->tfcntrflag << 4;
478 	result.sps_info_flags |= pic->finterpflag << 3;
479 	result.sps_info_flags |= pic->psf << 1;
480 
481 	result.pps_info_flags |= pic->range_mapy_flag << 31;
482 	result.pps_info_flags |= pic->range_mapy << 28;
483 	result.pps_info_flags |= pic->range_mapuv_flag << 27;
484 	result.pps_info_flags |= pic->range_mapuv << 24;
485 	result.pps_info_flags |= pic->multires << 21;
486 	result.pps_info_flags |= pic->maxbframes << 16;
487 	result.pps_info_flags |= pic->overlap << 11;
488 	result.pps_info_flags |= pic->quantizer << 9;
489 	result.pps_info_flags |= pic->panscan_flag << 7;
490 	result.pps_info_flags |= pic->refdist_flag << 6;
491 	result.pps_info_flags |= pic->vstransform << 0;
492 
493 	/* some fields only apply to main/advanced profile */
494 	if (pic->base.profile != PIPE_VIDEO_PROFILE_VC1_SIMPLE) {
495 		result.pps_info_flags |= pic->syncmarker << 20;
496 		result.pps_info_flags |= pic->rangered << 19;
497 		result.pps_info_flags |= pic->loopfilter << 5;
498 		result.pps_info_flags |= pic->fastuvmc << 4;
499 		result.pps_info_flags |= pic->extended_mv << 3;
500 		result.pps_info_flags |= pic->extended_dmv << 8;
501 		result.pps_info_flags |= pic->dquant << 1;
502 	}
503 
504 	result.chroma_format = 1;
505 
506 #if 0
507 //(((unsigned int)(pPicParams->advance.reserved1))        << SPS_INFO_VC1_RESERVED_SHIFT)
508 uint32_t 	slice_count
509 uint8_t 	picture_type
510 uint8_t 	frame_coding_mode
511 uint8_t 	deblockEnable
512 uint8_t 	pquant
513 #endif
514 
515 	return result;
516 }
517 
518 /* extract the frame number from a referenced video buffer */
get_ref_pic_idx(struct ruvd_decoder * dec,struct pipe_video_buffer * ref)519 static uint32_t get_ref_pic_idx(struct ruvd_decoder *dec, struct pipe_video_buffer *ref)
520 {
521 	uint32_t min = MAX2(dec->frame_number, NUM_MPEG2_REFS) - NUM_MPEG2_REFS;
522 	uint32_t max = MAX2(dec->frame_number, 1) - 1;
523 	uintptr_t frame;
524 
525 	/* seems to be the most sane fallback */
526 	if (!ref)
527 		return max;
528 
529 	/* get the frame number from the associated data */
530 	frame = (uintptr_t)vl_video_buffer_get_associated_data(ref, &dec->base);
531 
532 	/* limit the frame number to a valid range */
533 	return MAX2(MIN2(frame, max), min);
534 }
535 
536 /* get mpeg2 specific msg bits */
get_mpeg2_msg(struct ruvd_decoder * dec,struct pipe_mpeg12_picture_desc * pic)537 static struct ruvd_mpeg2 get_mpeg2_msg(struct ruvd_decoder *dec,
538 				       struct pipe_mpeg12_picture_desc *pic)
539 {
540 	const int *zscan = pic->alternate_scan ? vl_zscan_alternate : vl_zscan_normal;
541 	struct ruvd_mpeg2 result;
542 	unsigned i;
543 
544 	memset(&result, 0, sizeof(result));
545 	result.decoded_pic_idx = dec->frame_number;
546 	for (i = 0; i < 2; ++i)
547 		result.ref_pic_idx[i] = get_ref_pic_idx(dec, pic->ref[i]);
548 
549 	result.load_intra_quantiser_matrix = 1;
550 	result.load_nonintra_quantiser_matrix = 1;
551 
552 	for (i = 0; i < 64; ++i) {
553 		result.intra_quantiser_matrix[i] = pic->intra_matrix[zscan[i]];
554 		result.nonintra_quantiser_matrix[i] = pic->non_intra_matrix[zscan[i]];
555 	}
556 
557 	result.profile_and_level_indication = 0;
558 	result.chroma_format = 0x1;
559 
560 	result.picture_coding_type = pic->picture_coding_type;
561 	result.f_code[0][0] = pic->f_code[0][0] + 1;
562 	result.f_code[0][1] = pic->f_code[0][1] + 1;
563 	result.f_code[1][0] = pic->f_code[1][0] + 1;
564 	result.f_code[1][1] = pic->f_code[1][1] + 1;
565 	result.intra_dc_precision = pic->intra_dc_precision;
566 	result.pic_structure = pic->picture_structure;
567 	result.top_field_first = pic->top_field_first;
568 	result.frame_pred_frame_dct = pic->frame_pred_frame_dct;
569 	result.concealment_motion_vectors = pic->concealment_motion_vectors;
570 	result.q_scale_type = pic->q_scale_type;
571 	result.intra_vlc_format = pic->intra_vlc_format;
572 	result.alternate_scan = pic->alternate_scan;
573 
574 	return result;
575 }
576 
577 /* get mpeg4 specific msg bits */
get_mpeg4_msg(struct ruvd_decoder * dec,struct pipe_mpeg4_picture_desc * pic)578 static struct ruvd_mpeg4 get_mpeg4_msg(struct ruvd_decoder *dec,
579 				       struct pipe_mpeg4_picture_desc *pic)
580 {
581 	struct ruvd_mpeg4 result;
582 	unsigned i;
583 
584 	memset(&result, 0, sizeof(result));
585 	result.decoded_pic_idx = dec->frame_number;
586 	for (i = 0; i < 2; ++i)
587 		result.ref_pic_idx[i] = get_ref_pic_idx(dec, pic->ref[i]);
588 
589 	result.variant_type = 0;
590 	result.profile_and_level_indication = 0xF0; // ASP Level0
591 
592 	result.video_object_layer_verid = 0x5; // advanced simple
593 	result.video_object_layer_shape = 0x0; // rectangular
594 
595 	result.video_object_layer_width = dec->base.width;
596 	result.video_object_layer_height = dec->base.height;
597 
598 	result.vop_time_increment_resolution = pic->vop_time_increment_resolution;
599 
600 	result.flags |= pic->short_video_header << 0;
601 	//result.flags |= obmc_disable << 1;
602 	result.flags |= pic->interlaced << 2;
603         result.flags |= 1 << 3; // load_intra_quant_mat
604 	result.flags |= 1 << 4; // load_nonintra_quant_mat
605 	result.flags |= pic->quarter_sample << 5;
606 	result.flags |= 1 << 6; // complexity_estimation_disable
607 	result.flags |= pic->resync_marker_disable << 7;
608 	//result.flags |= data_partitioned << 8;
609 	//result.flags |= reversible_vlc << 9;
610 	result.flags |= 0 << 10; // newpred_enable
611 	result.flags |= 0 << 11; // reduced_resolution_vop_enable
612 	//result.flags |= scalability << 12;
613 	//result.flags |= is_object_layer_identifier << 13;
614 	//result.flags |= fixed_vop_rate << 14;
615 	//result.flags |= newpred_segment_type << 15;
616 
617 	result.quant_type = pic->quant_type;
618 
619 	for (i = 0; i < 64; ++i) {
620 		result.intra_quant_mat[i] = pic->intra_matrix[vl_zscan_normal[i]];
621 		result.nonintra_quant_mat[i] = pic->non_intra_matrix[vl_zscan_normal[i]];
622 	}
623 
624 	/*
625 	int32_t 	trd [2]
626 	int32_t 	trb [2]
627 	uint8_t 	vop_coding_type
628 	uint8_t 	vop_fcode_forward
629 	uint8_t 	vop_fcode_backward
630 	uint8_t 	rounding_control
631 	uint8_t 	alternate_vertical_scan_flag
632 	uint8_t 	top_field_first
633 	*/
634 
635 	return result;
636 }
637 
get_mjpeg_slice_header(struct ruvd_decoder * dec,struct pipe_mjpeg_picture_desc * pic)638 static void get_mjpeg_slice_header(struct ruvd_decoder *dec, struct pipe_mjpeg_picture_desc *pic)
639 {
640 	int size = 0, saved_size, len_pos, i;
641 	uint16_t *bs;
642 	uint8_t *buf = dec->bs_ptr;
643 
644 	/* SOI */
645 	buf[size++] = 0xff;
646 	buf[size++] = 0xd8;
647 
648 	/* DQT */
649 	buf[size++] = 0xff;
650 	buf[size++] = 0xdb;
651 
652 	len_pos = size++;
653 	size++;
654 
655 	for (i = 0; i < 4; ++i) {
656 		if (pic->quantization_table.load_quantiser_table[i] == 0)
657 			continue;
658 
659 		buf[size++] = i;
660 		memcpy((buf + size), &pic->quantization_table.quantiser_table[i], 64);
661 		size += 64;
662 	}
663 
664 	bs = (uint16_t*)&buf[len_pos];
665 	*bs = util_bswap16(size - 4);
666 
667 	saved_size = size;
668 
669 	/* DHT */
670 	buf[size++] = 0xff;
671 	buf[size++] = 0xc4;
672 
673 	len_pos = size++;
674 	size++;
675 
676 	for (i = 0; i < 2; ++i) {
677 		if (pic->huffman_table.load_huffman_table[i] == 0)
678 			continue;
679 
680 		buf[size++] = 0x00 | i;
681 		memcpy((buf + size), &pic->huffman_table.table[i].num_dc_codes, 16);
682 		size += 16;
683 		memcpy((buf + size), &pic->huffman_table.table[i].dc_values, 12);
684 		size += 12;
685 	}
686 
687 	for (i = 0; i < 2; ++i) {
688 		if (pic->huffman_table.load_huffman_table[i] == 0)
689 			continue;
690 
691 		buf[size++] = 0x10 | i;
692 		memcpy((buf + size), &pic->huffman_table.table[i].num_ac_codes, 16);
693 		size += 16;
694 		memcpy((buf + size), &pic->huffman_table.table[i].ac_values, 162);
695 		size += 162;
696 	}
697 
698 	bs = (uint16_t*)&buf[len_pos];
699 	*bs = util_bswap16(size - saved_size - 2);
700 
701 	saved_size = size;
702 
703 	/* DRI */
704 	if (pic->slice_parameter.restart_interval) {
705 		buf[size++] = 0xff;
706 		buf[size++] = 0xdd;
707 		buf[size++] = 0x00;
708 		buf[size++] = 0x04;
709 		bs = (uint16_t*)&buf[size++];
710 		*bs = util_bswap16(pic->slice_parameter.restart_interval);
711 		saved_size = ++size;
712 	}
713 
714 	/* SOF */
715 	buf[size++] = 0xff;
716 	buf[size++] = 0xc0;
717 
718 	len_pos = size++;
719 	size++;
720 
721 	buf[size++] = 0x08;
722 
723 	bs = (uint16_t*)&buf[size++];
724 	*bs = util_bswap16(pic->picture_parameter.picture_height);
725 	size++;
726 
727 	bs = (uint16_t*)&buf[size++];
728 	*bs = util_bswap16(pic->picture_parameter.picture_width);
729 	size++;
730 
731 	buf[size++] = pic->picture_parameter.num_components;
732 
733 	for (i = 0; i < pic->picture_parameter.num_components; ++i) {
734 		buf[size++] = pic->picture_parameter.components[i].component_id;
735 		buf[size++] = pic->picture_parameter.components[i].h_sampling_factor << 4 |
736 			pic->picture_parameter.components[i].v_sampling_factor;
737 		buf[size++] = pic->picture_parameter.components[i].quantiser_table_selector;
738 	}
739 
740 	bs = (uint16_t*)&buf[len_pos];
741 	*bs = util_bswap16(size - saved_size - 2);
742 
743 	saved_size = size;
744 
745 	/* SOS */
746 	buf[size++] = 0xff;
747 	buf[size++] = 0xda;
748 
749 	len_pos = size++;
750 	size++;
751 
752 	buf[size++] = pic->slice_parameter.num_components;
753 
754 	for (i = 0; i < pic->slice_parameter.num_components; ++i) {
755 		buf[size++] = pic->slice_parameter.components[i].component_selector;
756 		buf[size++] = pic->slice_parameter.components[i].dc_table_selector << 4 |
757 			pic->slice_parameter.components[i].ac_table_selector;
758 	}
759 
760 	buf[size++] = 0x00;
761 	buf[size++] = 0x3f;
762 	buf[size++] = 0x00;
763 
764 	bs = (uint16_t*)&buf[len_pos];
765 	*bs = util_bswap16(size - saved_size - 2);
766 
767 	dec->bs_ptr += size;
768 	dec->bs_size += size;
769 }
770 
771 /**
772  * destroy this video decoder
773  */
ruvd_destroy(struct pipe_video_codec * decoder)774 static void ruvd_destroy(struct pipe_video_codec *decoder)
775 {
776 	struct ruvd_decoder *dec = (struct ruvd_decoder*)decoder;
777 	unsigned i;
778 
779 	assert(decoder);
780 
781 	map_msg_fb_it_buf(dec);
782 	dec->msg->size = sizeof(*dec->msg);
783 	dec->msg->msg_type = RUVD_MSG_DESTROY;
784 	dec->msg->stream_handle = dec->stream_handle;
785 	send_msg_buf(dec);
786 
787 	flush(dec, 0, NULL);
788 
789 	dec->ws->cs_destroy(&dec->cs);
790 
791 	for (i = 0; i < NUM_BUFFERS; ++i) {
792 		rvid_destroy_buffer(&dec->msg_fb_it_buffers[i]);
793 		rvid_destroy_buffer(&dec->bs_buffers[i]);
794 	}
795 
796 	rvid_destroy_buffer(&dec->dpb);
797 	rvid_destroy_buffer(&dec->ctx);
798 	rvid_destroy_buffer(&dec->sessionctx);
799 
800 	FREE(dec);
801 }
802 
803 /**
804  * start decoding of a new frame
805  */
ruvd_begin_frame(struct pipe_video_codec * decoder,struct pipe_video_buffer * target,struct pipe_picture_desc * picture)806 static void ruvd_begin_frame(struct pipe_video_codec *decoder,
807 			     struct pipe_video_buffer *target,
808 			     struct pipe_picture_desc *picture)
809 {
810 	struct ruvd_decoder *dec = (struct ruvd_decoder*)decoder;
811 	uintptr_t frame;
812 
813 	assert(decoder);
814 
815 	frame = ++dec->frame_number;
816 	vl_video_buffer_set_associated_data(target, decoder, (void *)frame,
817 					    &ruvd_destroy_associated_data);
818 
819 	dec->bs_size = 0;
820 	dec->bs_ptr = dec->ws->buffer_map(dec->ws,
821 		dec->bs_buffers[dec->cur_buffer].res->buf,
822 		&dec->cs, PIPE_MAP_WRITE | RADEON_MAP_TEMPORARY);
823 }
824 
825 /**
826  * decode a macroblock
827  */
ruvd_decode_macroblock(struct pipe_video_codec * decoder,struct pipe_video_buffer * target,struct pipe_picture_desc * picture,const struct pipe_macroblock * macroblocks,unsigned num_macroblocks)828 static void ruvd_decode_macroblock(struct pipe_video_codec *decoder,
829 				   struct pipe_video_buffer *target,
830 				   struct pipe_picture_desc *picture,
831 				   const struct pipe_macroblock *macroblocks,
832 				   unsigned num_macroblocks)
833 {
834 	/* not supported (yet) */
835 	assert(0);
836 }
837 
838 /**
839  * decode a bitstream
840  */
ruvd_decode_bitstream(struct pipe_video_codec * decoder,struct pipe_video_buffer * target,struct pipe_picture_desc * picture,unsigned num_buffers,const void * const * buffers,const unsigned * sizes)841 static void ruvd_decode_bitstream(struct pipe_video_codec *decoder,
842 				  struct pipe_video_buffer *target,
843 				  struct pipe_picture_desc *picture,
844 				  unsigned num_buffers,
845 				  const void * const *buffers,
846 				  const unsigned *sizes)
847 {
848 	struct ruvd_decoder *dec = (struct ruvd_decoder*)decoder;
849 	enum pipe_video_format format = u_reduce_video_profile(picture->profile);
850 	unsigned i;
851 
852 	assert(decoder);
853 
854 	if (!dec->bs_ptr)
855 		return;
856 
857 	if (format == PIPE_VIDEO_FORMAT_JPEG)
858 		get_mjpeg_slice_header(dec, (struct pipe_mjpeg_picture_desc*)picture);
859 
860 	for (i = 0; i < num_buffers; ++i) {
861 		struct rvid_buffer *buf = &dec->bs_buffers[dec->cur_buffer];
862 		unsigned new_size = dec->bs_size + sizes[i];
863 
864 		if (format == PIPE_VIDEO_FORMAT_JPEG)
865 			new_size += 2; /* save for EOI */
866 
867 		if (new_size > buf->res->buf->size) {
868 			dec->ws->buffer_unmap(dec->ws, buf->res->buf);
869 			dec->bs_ptr = NULL;
870 			if (!rvid_resize_buffer(dec->screen, &dec->cs, buf, new_size)) {
871 				RVID_ERR("Can't resize bitstream buffer!");
872 				return;
873 			}
874 
875 			dec->bs_ptr = dec->ws->buffer_map(dec->ws, buf->res->buf, &dec->cs,
876 							  PIPE_MAP_WRITE |
877 							  RADEON_MAP_TEMPORARY);
878 			if (!dec->bs_ptr)
879 				return;
880 
881 			dec->bs_ptr += dec->bs_size;
882 		}
883 
884 		memcpy(dec->bs_ptr, buffers[i], sizes[i]);
885 		dec->bs_size += sizes[i];
886 		dec->bs_ptr += sizes[i];
887 	}
888 
889 	if (format == PIPE_VIDEO_FORMAT_JPEG) {
890 		((uint8_t *)dec->bs_ptr)[0] = 0xff;	/* EOI */
891 		((uint8_t *)dec->bs_ptr)[1] = 0xd9;
892 		dec->bs_size += 2;
893 		dec->bs_ptr += 2;
894 	}
895 }
896 
897 /**
898  * end decoding of the current frame
899  */
ruvd_end_frame(struct pipe_video_codec * decoder,struct pipe_video_buffer * target,struct pipe_picture_desc * picture)900 static int ruvd_end_frame(struct pipe_video_codec *decoder,
901 			   struct pipe_video_buffer *target,
902 			   struct pipe_picture_desc *picture)
903 {
904 	struct ruvd_decoder *dec = (struct ruvd_decoder*)decoder;
905 	struct pb_buffer_lean *dt;
906 	struct rvid_buffer *msg_fb_it_buf, *bs_buf;
907 	unsigned bs_size;
908 
909 	assert(decoder);
910 
911 	if (!dec->bs_ptr)
912 		return 1;
913 
914 	msg_fb_it_buf = &dec->msg_fb_it_buffers[dec->cur_buffer];
915 	bs_buf = &dec->bs_buffers[dec->cur_buffer];
916 
917 	bs_size = align(dec->bs_size, 128);
918 	memset(dec->bs_ptr, 0, bs_size - dec->bs_size);
919 	dec->ws->buffer_unmap(dec->ws, bs_buf->res->buf);
920 	dec->bs_ptr = NULL;
921 
922 	map_msg_fb_it_buf(dec);
923 	dec->msg->size = sizeof(*dec->msg);
924 	dec->msg->msg_type = RUVD_MSG_DECODE;
925 	dec->msg->stream_handle = dec->stream_handle;
926 	dec->msg->status_report_feedback_number = dec->frame_number;
927 
928 	dec->msg->body.decode.stream_type = dec->stream_type;
929 	dec->msg->body.decode.decode_flags = 0x1;
930 	dec->msg->body.decode.width_in_samples = dec->base.width;
931 	dec->msg->body.decode.height_in_samples = dec->base.height;
932 
933 	if ((picture->profile == PIPE_VIDEO_PROFILE_VC1_SIMPLE) ||
934 	    (picture->profile == PIPE_VIDEO_PROFILE_VC1_MAIN)) {
935 		dec->msg->body.decode.width_in_samples = align(dec->msg->body.decode.width_in_samples, 16) / 16;
936 		dec->msg->body.decode.height_in_samples = align(dec->msg->body.decode.height_in_samples, 16) / 16;
937 	}
938 
939 	if (dec->dpb.res)
940 		dec->msg->body.decode.dpb_size = dec->dpb.res->buf->size;
941 	dec->msg->body.decode.bsd_size = bs_size;
942 	dec->msg->body.decode.db_pitch = align(dec->base.width, get_db_pitch_alignment(dec));
943 
944 	dt = dec->set_dtb(dec->msg, (struct vl_video_buffer *)target);
945 
946 	switch (u_reduce_video_profile(picture->profile)) {
947 	case PIPE_VIDEO_FORMAT_MPEG4_AVC:
948 		dec->msg->body.decode.codec.h264 = get_h264_msg(dec, (struct pipe_h264_picture_desc*)picture);
949 		break;
950 
951 	case PIPE_VIDEO_FORMAT_VC1:
952 		dec->msg->body.decode.codec.vc1 = get_vc1_msg((struct pipe_vc1_picture_desc*)picture);
953 		break;
954 
955 	case PIPE_VIDEO_FORMAT_MPEG12:
956 		dec->msg->body.decode.codec.mpeg2 = get_mpeg2_msg(dec, (struct pipe_mpeg12_picture_desc*)picture);
957 		break;
958 
959 	case PIPE_VIDEO_FORMAT_MPEG4:
960 		dec->msg->body.decode.codec.mpeg4 = get_mpeg4_msg(dec, (struct pipe_mpeg4_picture_desc*)picture);
961 		break;
962 
963 	case PIPE_VIDEO_FORMAT_JPEG:
964 		break;
965 
966 	default:
967 		assert(0);
968 		return 1;
969 	}
970 
971 	dec->msg->body.decode.db_surf_tile_config = dec->msg->body.decode.dt_surf_tile_config;
972 	dec->msg->body.decode.extension_support = 0x1;
973 
974 	/* set at least the feedback buffer size */
975 	dec->fb[0] = dec->fb_size;
976 
977 	send_msg_buf(dec);
978 
979 	if (dec->dpb.res)
980 		send_cmd(dec, RUVD_CMD_DPB_BUFFER, dec->dpb.res->buf, 0,
981 			RADEON_USAGE_READWRITE, RADEON_DOMAIN_VRAM);
982 
983 	if (dec->ctx.res)
984 		send_cmd(dec, RUVD_CMD_CONTEXT_BUFFER, dec->ctx.res->buf, 0,
985 			RADEON_USAGE_READWRITE, RADEON_DOMAIN_VRAM);
986 	send_cmd(dec, RUVD_CMD_BITSTREAM_BUFFER, bs_buf->res->buf,
987 		 0, RADEON_USAGE_READ, RADEON_DOMAIN_GTT);
988 	send_cmd(dec, RUVD_CMD_DECODING_TARGET_BUFFER, dt, 0,
989 		 RADEON_USAGE_WRITE, RADEON_DOMAIN_VRAM);
990 	send_cmd(dec, RUVD_CMD_FEEDBACK_BUFFER, msg_fb_it_buf->res->buf,
991 		 FB_BUFFER_OFFSET, RADEON_USAGE_WRITE, RADEON_DOMAIN_GTT);
992 	if (have_it(dec))
993 		send_cmd(dec, RUVD_CMD_ITSCALING_TABLE_BUFFER, msg_fb_it_buf->res->buf,
994 			 FB_BUFFER_OFFSET + dec->fb_size, RADEON_USAGE_READ, RADEON_DOMAIN_GTT);
995 	set_reg(dec, dec->reg.cntl, 1);
996 
997 	flush(dec, PIPE_FLUSH_ASYNC, picture->fence);
998 	next_buffer(dec);
999 	return 0;
1000 }
1001 
1002 /**
1003  * flush any outstanding command buffers to the hardware
1004  */
ruvd_flush(struct pipe_video_codec * decoder)1005 static void ruvd_flush(struct pipe_video_codec *decoder)
1006 {
1007 }
1008 
ruvd_get_decoder_fence(struct pipe_video_codec * decoder,struct pipe_fence_handle * fence,uint64_t timeout)1009 static int ruvd_get_decoder_fence(struct pipe_video_codec *decoder,
1010                                   struct pipe_fence_handle *fence,
1011                                   uint64_t timeout) {
1012 
1013   struct ruvd_decoder *dec = (struct ruvd_decoder *)decoder;
1014   return dec->ws->fence_wait(dec->ws, fence, timeout);
1015 }
1016 
1017 /**
1018  * create and UVD decoder
1019  */
ruvd_create_decoder(struct pipe_context * context,const struct pipe_video_codec * templ,ruvd_set_dtb set_dtb)1020 struct pipe_video_codec *ruvd_create_decoder(struct pipe_context *context,
1021 					     const struct pipe_video_codec *templ,
1022 					     ruvd_set_dtb set_dtb)
1023 {
1024 	struct radeon_winsys* ws = ((struct r600_common_context *)context)->ws;
1025 	struct r600_common_context *rctx = (struct r600_common_context*)context;
1026 	unsigned dpb_size;
1027 	unsigned width = templ->width, height = templ->height;
1028 	unsigned bs_buf_size;
1029 	struct radeon_info info;
1030 	struct ruvd_decoder *dec;
1031 	int r, i;
1032 
1033 	ws->query_info(ws, &info);
1034 
1035 	switch(u_reduce_video_profile(templ->profile)) {
1036 	case PIPE_VIDEO_FORMAT_MPEG12:
1037 		if (templ->entrypoint > PIPE_VIDEO_ENTRYPOINT_BITSTREAM || info.family < CHIP_PALM)
1038 			return vl_create_mpeg12_decoder(context, templ);
1039 
1040 		FALLTHROUGH;
1041 	case PIPE_VIDEO_FORMAT_MPEG4:
1042 		width = align(width, VL_MACROBLOCK_WIDTH);
1043 		height = align(height, VL_MACROBLOCK_HEIGHT);
1044 		break;
1045 	case PIPE_VIDEO_FORMAT_MPEG4_AVC:
1046 		width = align(width, VL_MACROBLOCK_WIDTH);
1047 		height = align(height, VL_MACROBLOCK_HEIGHT);
1048 		break;
1049 
1050 	default:
1051 		break;
1052 	}
1053 
1054 
1055 	dec = CALLOC_STRUCT(ruvd_decoder);
1056 
1057 	if (!dec)
1058 		return NULL;
1059 
1060 	dec->use_legacy = true;
1061 
1062 	dec->base = *templ;
1063 	dec->base.context = context;
1064 	dec->base.width = width;
1065 	dec->base.height = height;
1066 
1067 	dec->base.destroy = ruvd_destroy;
1068 	dec->base.begin_frame = ruvd_begin_frame;
1069 	dec->base.decode_macroblock = ruvd_decode_macroblock;
1070 	dec->base.decode_bitstream = ruvd_decode_bitstream;
1071 	dec->base.end_frame = ruvd_end_frame;
1072 	dec->base.flush = ruvd_flush;
1073 	dec->base.get_decoder_fence = ruvd_get_decoder_fence;
1074 
1075 	dec->stream_type = profile2stream_type(dec, info.family);
1076 	dec->set_dtb = set_dtb;
1077 	dec->stream_handle = rvid_alloc_stream_handle();
1078 	dec->screen = context->screen;
1079 	dec->ws = ws;
1080 
1081 	if (!ws->cs_create(&dec->cs, rctx->ctx, AMD_IP_UVD, NULL, NULL)) {
1082 		RVID_ERR("Can't get command submission context.\n");
1083 		goto error;
1084 	}
1085 
1086 	dec->fb_size = FB_BUFFER_SIZE;
1087 	bs_buf_size = width * height * (512 / (16 * 16));
1088 	for (i = 0; i < NUM_BUFFERS; ++i) {
1089 		unsigned msg_fb_it_size = FB_BUFFER_OFFSET + dec->fb_size;
1090 		STATIC_ASSERT(sizeof(struct ruvd_msg) <= FB_BUFFER_OFFSET);
1091 		if (have_it(dec))
1092 			msg_fb_it_size += IT_SCALING_TABLE_SIZE;
1093 		if (!rvid_create_buffer(dec->screen, &dec->msg_fb_it_buffers[i],
1094 					msg_fb_it_size, PIPE_USAGE_STAGING)) {
1095 			RVID_ERR("Can't allocated message buffers.\n");
1096 			goto error;
1097 		}
1098 
1099 		if (!rvid_create_buffer(dec->screen, &dec->bs_buffers[i],
1100 					bs_buf_size, PIPE_USAGE_STAGING)) {
1101 			RVID_ERR("Can't allocated bitstream buffers.\n");
1102 			goto error;
1103 		}
1104 
1105 		rvid_clear_buffer(context, &dec->msg_fb_it_buffers[i]);
1106 		rvid_clear_buffer(context, &dec->bs_buffers[i]);
1107 	}
1108 
1109 	dpb_size = calc_dpb_size(dec);
1110 	if (dpb_size) {
1111 		if (!rvid_create_buffer(dec->screen, &dec->dpb, dpb_size, PIPE_USAGE_DEFAULT)) {
1112 			RVID_ERR("Can't allocated dpb.\n");
1113 			goto error;
1114 		}
1115 		rvid_clear_buffer(context, &dec->dpb);
1116 	}
1117 
1118 	dec->reg.data0 = RUVD_GPCOM_VCPU_DATA0;
1119 	dec->reg.data1 = RUVD_GPCOM_VCPU_DATA1;
1120 	dec->reg.cmd = RUVD_GPCOM_VCPU_CMD;
1121 	dec->reg.cntl = RUVD_ENGINE_CNTL;
1122 
1123 	map_msg_fb_it_buf(dec);
1124 	dec->msg->size = sizeof(*dec->msg);
1125 	dec->msg->msg_type = RUVD_MSG_CREATE;
1126 	dec->msg->stream_handle = dec->stream_handle;
1127 	dec->msg->body.create.stream_type = dec->stream_type;
1128 	dec->msg->body.create.width_in_samples = dec->base.width;
1129 	dec->msg->body.create.height_in_samples = dec->base.height;
1130 	dec->msg->body.create.dpb_size = dpb_size;
1131 	send_msg_buf(dec);
1132 	r = flush(dec, 0, NULL);
1133 	if (r)
1134 		goto error;
1135 
1136 	next_buffer(dec);
1137 
1138 	return &dec->base;
1139 
1140 error:
1141 	dec->ws->cs_destroy(&dec->cs);
1142 
1143 	for (i = 0; i < NUM_BUFFERS; ++i) {
1144 		rvid_destroy_buffer(&dec->msg_fb_it_buffers[i]);
1145 		rvid_destroy_buffer(&dec->bs_buffers[i]);
1146 	}
1147 
1148 	rvid_destroy_buffer(&dec->dpb);
1149 	rvid_destroy_buffer(&dec->ctx);
1150 	rvid_destroy_buffer(&dec->sessionctx);
1151 
1152 	FREE(dec);
1153 
1154 	return NULL;
1155 }
1156 
1157 /* calculate top/bottom offset */
texture_offset(struct radeon_surf * surface,unsigned layer)1158 static unsigned texture_offset(struct radeon_surf *surface, unsigned layer)
1159 {
1160 	return (uint64_t)surface->u.legacy.level[0].offset_256B * 256 +
1161 		layer * (uint64_t)surface->u.legacy.level[0].slice_size_dw * 4;
1162 }
1163 
1164 /* hw encode the aspect of macro tiles */
macro_tile_aspect(unsigned macro_tile_aspect)1165 static unsigned macro_tile_aspect(unsigned macro_tile_aspect)
1166 {
1167 	switch (macro_tile_aspect) {
1168 	default:
1169 	case 1: macro_tile_aspect = 0;  break;
1170 	case 2: macro_tile_aspect = 1;  break;
1171 	case 4: macro_tile_aspect = 2;  break;
1172 	case 8: macro_tile_aspect = 3;  break;
1173 	}
1174 	return macro_tile_aspect;
1175 }
1176 
1177 /* hw encode the bank width and height */
bank_wh(unsigned bankwh)1178 static unsigned bank_wh(unsigned bankwh)
1179 {
1180 	switch (bankwh) {
1181 	default:
1182 	case 1: bankwh = 0;     break;
1183 	case 2: bankwh = 1;     break;
1184 	case 4: bankwh = 2;     break;
1185 	case 8: bankwh = 3;     break;
1186 	}
1187 	return bankwh;
1188 }
1189 
1190 /**
1191  * fill decoding target field from the luma and chroma surfaces
1192  */
ruvd_set_dt_surfaces(struct ruvd_msg * msg,struct radeon_surf * luma,struct radeon_surf * chroma)1193 void ruvd_set_dt_surfaces(struct ruvd_msg *msg, struct radeon_surf *luma,
1194 			  struct radeon_surf *chroma)
1195 {
1196 	msg->body.decode.dt_pitch = luma->u.legacy.level[0].nblk_x * luma->blk_w;
1197 	switch (luma->u.legacy.level[0].mode) {
1198 	case RADEON_SURF_MODE_LINEAR_ALIGNED:
1199 		msg->body.decode.dt_tiling_mode = RUVD_TILE_LINEAR;
1200 		msg->body.decode.dt_array_mode = RUVD_ARRAY_MODE_LINEAR;
1201 		break;
1202 	case RADEON_SURF_MODE_1D:
1203 		msg->body.decode.dt_tiling_mode = RUVD_TILE_8X8;
1204 		msg->body.decode.dt_array_mode = RUVD_ARRAY_MODE_1D_THIN;
1205 		break;
1206 	case RADEON_SURF_MODE_2D:
1207 		msg->body.decode.dt_tiling_mode = RUVD_TILE_8X8;
1208 		msg->body.decode.dt_array_mode = RUVD_ARRAY_MODE_2D_THIN;
1209 		break;
1210 	default:
1211 		assert(0);
1212 		break;
1213 	}
1214 
1215 	msg->body.decode.dt_luma_top_offset = texture_offset(luma, 0);
1216 	if (chroma)
1217 		msg->body.decode.dt_chroma_top_offset = texture_offset(chroma, 0);
1218 	if (msg->body.decode.dt_field_mode) {
1219 		msg->body.decode.dt_luma_bottom_offset = texture_offset(luma, 1);
1220 		if (chroma)
1221 			msg->body.decode.dt_chroma_bottom_offset = texture_offset(chroma, 1);
1222 	} else {
1223 		msg->body.decode.dt_luma_bottom_offset = msg->body.decode.dt_luma_top_offset;
1224 		msg->body.decode.dt_chroma_bottom_offset = msg->body.decode.dt_chroma_top_offset;
1225 	}
1226 
1227 	if (chroma) {
1228 		assert(luma->u.legacy.bankw == chroma->u.legacy.bankw);
1229 		assert(luma->u.legacy.bankh == chroma->u.legacy.bankh);
1230 		assert(luma->u.legacy.mtilea == chroma->u.legacy.mtilea);
1231 	}
1232 
1233 	msg->body.decode.dt_surf_tile_config |= RUVD_BANK_WIDTH(bank_wh(luma->u.legacy.bankw));
1234 	msg->body.decode.dt_surf_tile_config |= RUVD_BANK_HEIGHT(bank_wh(luma->u.legacy.bankh));
1235 	msg->body.decode.dt_surf_tile_config |= RUVD_MACRO_TILE_ASPECT_RATIO(macro_tile_aspect(luma->u.legacy.mtilea));
1236 }
1237