1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (c) 2023, Collabora
4  *
5  * Author: Benjamin Gaignard <[email protected]>
6  */
7 
8 #include <media/v4l2-mem2mem.h>
9 #include "hantro.h"
10 #include "hantro_v4l2.h"
11 #include "rockchip_vpu981_regs.h"
12 
13 #define AV1_DEC_MODE		17
14 #define GM_GLOBAL_MODELS_PER_FRAME	7
15 #define GLOBAL_MODEL_TOTAL_SIZE	(6 * 4 + 4 * 2)
16 #define GLOBAL_MODEL_SIZE	ALIGN(GM_GLOBAL_MODELS_PER_FRAME * GLOBAL_MODEL_TOTAL_SIZE, 2048)
17 #define AV1_MAX_TILES		128
18 #define AV1_TILE_INFO_SIZE	(AV1_MAX_TILES * 16)
19 #define AV1DEC_MAX_PIC_BUFFERS	24
20 #define AV1_REF_SCALE_SHIFT	14
21 #define AV1_INVALID_IDX		-1
22 #define MAX_FRAME_DISTANCE	31
23 #define AV1_PRIMARY_REF_NONE	7
24 #define AV1_TILE_SIZE		ALIGN(32 * 128, 4096)
25 /*
26  * These 3 values aren't defined enum v4l2_av1_segment_feature because
27  * they are not part of the specification
28  */
29 #define V4L2_AV1_SEG_LVL_ALT_LF_Y_H	2
30 #define V4L2_AV1_SEG_LVL_ALT_LF_U	3
31 #define V4L2_AV1_SEG_LVL_ALT_LF_V	4
32 
33 #define SUPERRES_SCALE_BITS 3
34 #define SCALE_NUMERATOR 8
35 #define SUPERRES_SCALE_DENOMINATOR_MIN (SCALE_NUMERATOR + 1)
36 
37 #define RS_SUBPEL_BITS 6
38 #define RS_SUBPEL_MASK ((1 << RS_SUBPEL_BITS) - 1)
39 #define RS_SCALE_SUBPEL_BITS 14
40 #define RS_SCALE_SUBPEL_MASK ((1 << RS_SCALE_SUBPEL_BITS) - 1)
41 #define RS_SCALE_EXTRA_BITS (RS_SCALE_SUBPEL_BITS - RS_SUBPEL_BITS)
42 #define RS_SCALE_EXTRA_OFF (1 << (RS_SCALE_EXTRA_BITS - 1))
43 
44 #define IS_INTRA(type) ((type == V4L2_AV1_KEY_FRAME) || (type == V4L2_AV1_INTRA_ONLY_FRAME))
45 
46 #define LST_BUF_IDX (V4L2_AV1_REF_LAST_FRAME - V4L2_AV1_REF_LAST_FRAME)
47 #define LST2_BUF_IDX (V4L2_AV1_REF_LAST2_FRAME - V4L2_AV1_REF_LAST_FRAME)
48 #define LST3_BUF_IDX (V4L2_AV1_REF_LAST3_FRAME - V4L2_AV1_REF_LAST_FRAME)
49 #define GLD_BUF_IDX (V4L2_AV1_REF_GOLDEN_FRAME - V4L2_AV1_REF_LAST_FRAME)
50 #define BWD_BUF_IDX (V4L2_AV1_REF_BWDREF_FRAME - V4L2_AV1_REF_LAST_FRAME)
51 #define ALT2_BUF_IDX (V4L2_AV1_REF_ALTREF2_FRAME - V4L2_AV1_REF_LAST_FRAME)
52 #define ALT_BUF_IDX (V4L2_AV1_REF_ALTREF_FRAME - V4L2_AV1_REF_LAST_FRAME)
53 
54 #define DIV_LUT_PREC_BITS 14
55 #define DIV_LUT_BITS 8
56 #define DIV_LUT_NUM BIT(DIV_LUT_BITS)
57 #define WARP_PARAM_REDUCE_BITS 6
58 #define WARPEDMODEL_PREC_BITS 16
59 
60 #define AV1_DIV_ROUND_UP_POW2(value, n)			\
61 ({							\
62 	typeof(n) _n  = n;				\
63 	typeof(value) _value = value;			\
64 	(_value + (BIT(_n) >> 1)) >> _n;		\
65 })
66 
67 #define AV1_DIV_ROUND_UP_POW2_SIGNED(value, n)				\
68 ({									\
69 	typeof(n) _n_  = n;						\
70 	typeof(value) _value_ = value;					\
71 	(((_value_) < 0) ? -AV1_DIV_ROUND_UP_POW2(-(_value_), (_n_))	\
72 		: AV1_DIV_ROUND_UP_POW2((_value_), (_n_)));		\
73 })
74 
75 struct rockchip_av1_film_grain {
76 	u8 scaling_lut_y[256];
77 	u8 scaling_lut_cb[256];
78 	u8 scaling_lut_cr[256];
79 	s16 cropped_luma_grain_block[4096];
80 	s16 cropped_chroma_grain_block[1024 * 2];
81 };
82 
83 static const short div_lut[DIV_LUT_NUM + 1] = {
84 	16384, 16320, 16257, 16194, 16132, 16070, 16009, 15948, 15888, 15828, 15768,
85 	15709, 15650, 15592, 15534, 15477, 15420, 15364, 15308, 15252, 15197, 15142,
86 	15087, 15033, 14980, 14926, 14873, 14821, 14769, 14717, 14665, 14614, 14564,
87 	14513, 14463, 14413, 14364, 14315, 14266, 14218, 14170, 14122, 14075, 14028,
88 	13981, 13935, 13888, 13843, 13797, 13752, 13707, 13662, 13618, 13574, 13530,
89 	13487, 13443, 13400, 13358, 13315, 13273, 13231, 13190, 13148, 13107, 13066,
90 	13026, 12985, 12945, 12906, 12866, 12827, 12788, 12749, 12710, 12672, 12633,
91 	12596, 12558, 12520, 12483, 12446, 12409, 12373, 12336, 12300, 12264, 12228,
92 	12193, 12157, 12122, 12087, 12053, 12018, 11984, 11950, 11916, 11882, 11848,
93 	11815, 11782, 11749, 11716, 11683, 11651, 11619, 11586, 11555, 11523, 11491,
94 	11460, 11429, 11398, 11367, 11336, 11305, 11275, 11245, 11215, 11185, 11155,
95 	11125, 11096, 11067, 11038, 11009, 10980, 10951, 10923, 10894, 10866, 10838,
96 	10810, 10782, 10755, 10727, 10700, 10673, 10645, 10618, 10592, 10565, 10538,
97 	10512, 10486, 10460, 10434, 10408, 10382, 10356, 10331, 10305, 10280, 10255,
98 	10230, 10205, 10180, 10156, 10131, 10107, 10082, 10058, 10034, 10010, 9986,
99 	9963,  9939,  9916,  9892,  9869,  9846,  9823,  9800,  9777,  9754,  9732,
100 	9709,  9687,  9664,  9642,  9620,  9598,  9576,  9554,  9533,  9511,  9489,
101 	9468,  9447,  9425,  9404,  9383,  9362,  9341,  9321,  9300,  9279,  9259,
102 	9239,  9218,  9198,  9178,  9158,  9138,  9118,  9098,  9079,  9059,  9039,
103 	9020,  9001,  8981,  8962,  8943,  8924,  8905,  8886,  8867,  8849,  8830,
104 	8812,  8793,  8775,  8756,  8738,  8720,  8702,  8684,  8666,  8648,  8630,
105 	8613,  8595,  8577,  8560,  8542,  8525,  8508,  8490,  8473,  8456,  8439,
106 	8422,  8405,  8389,  8372,  8355,  8339,  8322,  8306,  8289,  8273,  8257,
107 	8240,  8224,  8208,  8192,
108 };
109 
rockchip_vpu981_get_frame_index(struct hantro_ctx * ctx,int ref)110 static int rockchip_vpu981_get_frame_index(struct hantro_ctx *ctx, int ref)
111 {
112 	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
113 	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
114 	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
115 	u64 timestamp;
116 	int i, idx = frame->ref_frame_idx[ref];
117 
118 	if (idx >= V4L2_AV1_TOTAL_REFS_PER_FRAME || idx < 0)
119 		return AV1_INVALID_IDX;
120 
121 	timestamp = frame->reference_frame_ts[idx];
122 	for (i = 0; i < AV1_MAX_FRAME_BUF_COUNT; i++) {
123 		if (!av1_dec->frame_refs[i].used)
124 			continue;
125 		if (av1_dec->frame_refs[i].timestamp == timestamp)
126 			return i;
127 	}
128 
129 	return AV1_INVALID_IDX;
130 }
131 
rockchip_vpu981_get_order_hint(struct hantro_ctx * ctx,int ref)132 static int rockchip_vpu981_get_order_hint(struct hantro_ctx *ctx, int ref)
133 {
134 	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
135 	int idx = rockchip_vpu981_get_frame_index(ctx, ref);
136 
137 	if (idx != AV1_INVALID_IDX)
138 		return av1_dec->frame_refs[idx].order_hint;
139 
140 	return 0;
141 }
142 
rockchip_vpu981_av1_dec_frame_ref(struct hantro_ctx * ctx,u64 timestamp)143 static int rockchip_vpu981_av1_dec_frame_ref(struct hantro_ctx *ctx,
144 					     u64 timestamp)
145 {
146 	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
147 	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
148 	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
149 	int i;
150 
151 	for (i = 0; i < AV1_MAX_FRAME_BUF_COUNT; i++) {
152 		int j;
153 
154 		if (av1_dec->frame_refs[i].used)
155 			continue;
156 
157 		av1_dec->frame_refs[i].width = frame->frame_width_minus_1 + 1;
158 		av1_dec->frame_refs[i].height = frame->frame_height_minus_1 + 1;
159 		av1_dec->frame_refs[i].mi_cols = DIV_ROUND_UP(frame->frame_width_minus_1 + 1, 8);
160 		av1_dec->frame_refs[i].mi_rows = DIV_ROUND_UP(frame->frame_height_minus_1 + 1, 8);
161 		av1_dec->frame_refs[i].timestamp = timestamp;
162 		av1_dec->frame_refs[i].frame_type = frame->frame_type;
163 		av1_dec->frame_refs[i].order_hint = frame->order_hint;
164 		av1_dec->frame_refs[i].vb2_ref = hantro_get_dst_buf(ctx);
165 
166 		for (j = 0; j < V4L2_AV1_TOTAL_REFS_PER_FRAME; j++)
167 			av1_dec->frame_refs[i].order_hints[j] = frame->order_hints[j];
168 		av1_dec->frame_refs[i].used = true;
169 		av1_dec->current_frame_index = i;
170 
171 		return i;
172 	}
173 
174 	return AV1_INVALID_IDX;
175 }
176 
rockchip_vpu981_av1_dec_frame_unref(struct hantro_ctx * ctx,int idx)177 static void rockchip_vpu981_av1_dec_frame_unref(struct hantro_ctx *ctx, int idx)
178 {
179 	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
180 
181 	if (idx >= 0)
182 		av1_dec->frame_refs[idx].used = false;
183 }
184 
rockchip_vpu981_av1_dec_clean_refs(struct hantro_ctx * ctx)185 static void rockchip_vpu981_av1_dec_clean_refs(struct hantro_ctx *ctx)
186 {
187 	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
188 	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
189 
190 	int ref, idx;
191 
192 	for (idx = 0; idx < AV1_MAX_FRAME_BUF_COUNT; idx++) {
193 		u64 timestamp = av1_dec->frame_refs[idx].timestamp;
194 		bool used = false;
195 
196 		if (!av1_dec->frame_refs[idx].used)
197 			continue;
198 
199 		for (ref = 0; ref < V4L2_AV1_TOTAL_REFS_PER_FRAME; ref++) {
200 			if (ctrls->frame->reference_frame_ts[ref] == timestamp)
201 				used = true;
202 		}
203 
204 		if (!used)
205 			rockchip_vpu981_av1_dec_frame_unref(ctx, idx);
206 	}
207 }
208 
rockchip_vpu981_av1_dec_luma_size(struct hantro_ctx * ctx)209 static size_t rockchip_vpu981_av1_dec_luma_size(struct hantro_ctx *ctx)
210 {
211 	return ctx->dst_fmt.width * ctx->dst_fmt.height * ctx->bit_depth / 8;
212 }
213 
rockchip_vpu981_av1_dec_chroma_size(struct hantro_ctx * ctx)214 static size_t rockchip_vpu981_av1_dec_chroma_size(struct hantro_ctx *ctx)
215 {
216 	size_t cr_offset = rockchip_vpu981_av1_dec_luma_size(ctx);
217 
218 	return ALIGN((cr_offset * 3) / 2, 64);
219 }
220 
rockchip_vpu981_av1_dec_tiles_free(struct hantro_ctx * ctx)221 static void rockchip_vpu981_av1_dec_tiles_free(struct hantro_ctx *ctx)
222 {
223 	struct hantro_dev *vpu = ctx->dev;
224 	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
225 
226 	if (av1_dec->db_data_col.cpu)
227 		dma_free_coherent(vpu->dev, av1_dec->db_data_col.size,
228 				  av1_dec->db_data_col.cpu,
229 				  av1_dec->db_data_col.dma);
230 	av1_dec->db_data_col.cpu = NULL;
231 
232 	if (av1_dec->db_ctrl_col.cpu)
233 		dma_free_coherent(vpu->dev, av1_dec->db_ctrl_col.size,
234 				  av1_dec->db_ctrl_col.cpu,
235 				  av1_dec->db_ctrl_col.dma);
236 	av1_dec->db_ctrl_col.cpu = NULL;
237 
238 	if (av1_dec->cdef_col.cpu)
239 		dma_free_coherent(vpu->dev, av1_dec->cdef_col.size,
240 				  av1_dec->cdef_col.cpu, av1_dec->cdef_col.dma);
241 	av1_dec->cdef_col.cpu = NULL;
242 
243 	if (av1_dec->sr_col.cpu)
244 		dma_free_coherent(vpu->dev, av1_dec->sr_col.size,
245 				  av1_dec->sr_col.cpu, av1_dec->sr_col.dma);
246 	av1_dec->sr_col.cpu = NULL;
247 
248 	if (av1_dec->lr_col.cpu)
249 		dma_free_coherent(vpu->dev, av1_dec->lr_col.size,
250 				  av1_dec->lr_col.cpu, av1_dec->lr_col.dma);
251 	av1_dec->lr_col.cpu = NULL;
252 }
253 
rockchip_vpu981_av1_dec_tiles_reallocate(struct hantro_ctx * ctx)254 static int rockchip_vpu981_av1_dec_tiles_reallocate(struct hantro_ctx *ctx)
255 {
256 	struct hantro_dev *vpu = ctx->dev;
257 	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
258 	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
259 	const struct v4l2_av1_tile_info *tile_info = &ctrls->frame->tile_info;
260 	unsigned int num_tile_cols = tile_info->tile_cols;
261 	unsigned int height = ALIGN(ctrls->frame->frame_height_minus_1 + 1, 64);
262 	unsigned int height_in_sb = height / 64;
263 	unsigned int stripe_num = ((height + 8) + 63) / 64;
264 	size_t size;
265 
266 	if (av1_dec->db_data_col.size >=
267 	    ALIGN(height * 12 * ctx->bit_depth / 8, 128) * num_tile_cols)
268 		return 0;
269 
270 	rockchip_vpu981_av1_dec_tiles_free(ctx);
271 
272 	size = ALIGN(height * 12 * ctx->bit_depth / 8, 128) * num_tile_cols;
273 	av1_dec->db_data_col.cpu = dma_alloc_coherent(vpu->dev, size,
274 						      &av1_dec->db_data_col.dma,
275 						      GFP_KERNEL);
276 	if (!av1_dec->db_data_col.cpu)
277 		goto buffer_allocation_error;
278 	av1_dec->db_data_col.size = size;
279 
280 	size = ALIGN(height * 2 * 16 / 4, 128) * num_tile_cols;
281 	av1_dec->db_ctrl_col.cpu = dma_alloc_coherent(vpu->dev, size,
282 						      &av1_dec->db_ctrl_col.dma,
283 						      GFP_KERNEL);
284 	if (!av1_dec->db_ctrl_col.cpu)
285 		goto buffer_allocation_error;
286 	av1_dec->db_ctrl_col.size = size;
287 
288 	size = ALIGN(height_in_sb * 44 * ctx->bit_depth * 16 / 8, 128) * num_tile_cols;
289 	av1_dec->cdef_col.cpu = dma_alloc_coherent(vpu->dev, size,
290 						   &av1_dec->cdef_col.dma,
291 						   GFP_KERNEL);
292 	if (!av1_dec->cdef_col.cpu)
293 		goto buffer_allocation_error;
294 	av1_dec->cdef_col.size = size;
295 
296 	size = ALIGN(height_in_sb * (3040 + 1280), 128) * num_tile_cols;
297 	av1_dec->sr_col.cpu = dma_alloc_coherent(vpu->dev, size,
298 						 &av1_dec->sr_col.dma,
299 						 GFP_KERNEL);
300 	if (!av1_dec->sr_col.cpu)
301 		goto buffer_allocation_error;
302 	av1_dec->sr_col.size = size;
303 
304 	size = ALIGN(stripe_num * 1536 * ctx->bit_depth / 8, 128) * num_tile_cols;
305 	av1_dec->lr_col.cpu = dma_alloc_coherent(vpu->dev, size,
306 						 &av1_dec->lr_col.dma,
307 						 GFP_KERNEL);
308 	if (!av1_dec->lr_col.cpu)
309 		goto buffer_allocation_error;
310 	av1_dec->lr_col.size = size;
311 
312 	av1_dec->num_tile_cols_allocated = num_tile_cols;
313 	return 0;
314 
315 buffer_allocation_error:
316 	rockchip_vpu981_av1_dec_tiles_free(ctx);
317 	return -ENOMEM;
318 }
319 
rockchip_vpu981_av1_dec_exit(struct hantro_ctx * ctx)320 void rockchip_vpu981_av1_dec_exit(struct hantro_ctx *ctx)
321 {
322 	struct hantro_dev *vpu = ctx->dev;
323 	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
324 
325 	if (av1_dec->global_model.cpu)
326 		dma_free_coherent(vpu->dev, av1_dec->global_model.size,
327 				  av1_dec->global_model.cpu,
328 				  av1_dec->global_model.dma);
329 	av1_dec->global_model.cpu = NULL;
330 
331 	if (av1_dec->tile_info.cpu)
332 		dma_free_coherent(vpu->dev, av1_dec->tile_info.size,
333 				  av1_dec->tile_info.cpu,
334 				  av1_dec->tile_info.dma);
335 	av1_dec->tile_info.cpu = NULL;
336 
337 	if (av1_dec->film_grain.cpu)
338 		dma_free_coherent(vpu->dev, av1_dec->film_grain.size,
339 				  av1_dec->film_grain.cpu,
340 				  av1_dec->film_grain.dma);
341 	av1_dec->film_grain.cpu = NULL;
342 
343 	if (av1_dec->prob_tbl.cpu)
344 		dma_free_coherent(vpu->dev, av1_dec->prob_tbl.size,
345 				  av1_dec->prob_tbl.cpu, av1_dec->prob_tbl.dma);
346 	av1_dec->prob_tbl.cpu = NULL;
347 
348 	if (av1_dec->prob_tbl_out.cpu)
349 		dma_free_coherent(vpu->dev, av1_dec->prob_tbl_out.size,
350 				  av1_dec->prob_tbl_out.cpu,
351 				  av1_dec->prob_tbl_out.dma);
352 	av1_dec->prob_tbl_out.cpu = NULL;
353 
354 	if (av1_dec->tile_buf.cpu)
355 		dma_free_coherent(vpu->dev, av1_dec->tile_buf.size,
356 				  av1_dec->tile_buf.cpu, av1_dec->tile_buf.dma);
357 	av1_dec->tile_buf.cpu = NULL;
358 
359 	rockchip_vpu981_av1_dec_tiles_free(ctx);
360 }
361 
rockchip_vpu981_av1_dec_init(struct hantro_ctx * ctx)362 int rockchip_vpu981_av1_dec_init(struct hantro_ctx *ctx)
363 {
364 	struct hantro_dev *vpu = ctx->dev;
365 	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
366 
367 	memset(av1_dec, 0, sizeof(*av1_dec));
368 
369 	av1_dec->global_model.cpu = dma_alloc_coherent(vpu->dev, GLOBAL_MODEL_SIZE,
370 						       &av1_dec->global_model.dma,
371 						       GFP_KERNEL);
372 	if (!av1_dec->global_model.cpu)
373 		return -ENOMEM;
374 	av1_dec->global_model.size = GLOBAL_MODEL_SIZE;
375 
376 	av1_dec->tile_info.cpu = dma_alloc_coherent(vpu->dev, AV1_MAX_TILES,
377 						    &av1_dec->tile_info.dma,
378 						    GFP_KERNEL);
379 	if (!av1_dec->tile_info.cpu)
380 		return -ENOMEM;
381 	av1_dec->tile_info.size = AV1_MAX_TILES;
382 
383 	av1_dec->film_grain.cpu = dma_alloc_coherent(vpu->dev,
384 						     ALIGN(sizeof(struct rockchip_av1_film_grain), 2048),
385 						     &av1_dec->film_grain.dma,
386 						     GFP_KERNEL);
387 	if (!av1_dec->film_grain.cpu)
388 		return -ENOMEM;
389 	av1_dec->film_grain.size = ALIGN(sizeof(struct rockchip_av1_film_grain), 2048);
390 
391 	av1_dec->prob_tbl.cpu = dma_alloc_coherent(vpu->dev,
392 						   ALIGN(sizeof(struct av1cdfs), 2048),
393 						   &av1_dec->prob_tbl.dma,
394 						   GFP_KERNEL);
395 	if (!av1_dec->prob_tbl.cpu)
396 		return -ENOMEM;
397 	av1_dec->prob_tbl.size = ALIGN(sizeof(struct av1cdfs), 2048);
398 
399 	av1_dec->prob_tbl_out.cpu = dma_alloc_coherent(vpu->dev,
400 						       ALIGN(sizeof(struct av1cdfs), 2048),
401 						       &av1_dec->prob_tbl_out.dma,
402 						       GFP_KERNEL);
403 	if (!av1_dec->prob_tbl_out.cpu)
404 		return -ENOMEM;
405 	av1_dec->prob_tbl_out.size = ALIGN(sizeof(struct av1cdfs), 2048);
406 	av1_dec->cdfs = &av1_dec->default_cdfs;
407 	av1_dec->cdfs_ndvc = &av1_dec->default_cdfs_ndvc;
408 
409 	rockchip_av1_set_default_cdfs(av1_dec->cdfs, av1_dec->cdfs_ndvc);
410 
411 	av1_dec->tile_buf.cpu = dma_alloc_coherent(vpu->dev,
412 						   AV1_TILE_SIZE,
413 						   &av1_dec->tile_buf.dma,
414 						   GFP_KERNEL);
415 	if (!av1_dec->tile_buf.cpu)
416 		return -ENOMEM;
417 	av1_dec->tile_buf.size = AV1_TILE_SIZE;
418 
419 	return 0;
420 }
421 
rockchip_vpu981_av1_dec_prepare_run(struct hantro_ctx * ctx)422 static int rockchip_vpu981_av1_dec_prepare_run(struct hantro_ctx *ctx)
423 {
424 	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
425 	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
426 
427 	ctrls->sequence = hantro_get_ctrl(ctx, V4L2_CID_STATELESS_AV1_SEQUENCE);
428 	if (WARN_ON(!ctrls->sequence))
429 		return -EINVAL;
430 
431 	ctrls->tile_group_entry =
432 	    hantro_get_ctrl(ctx, V4L2_CID_STATELESS_AV1_TILE_GROUP_ENTRY);
433 	if (WARN_ON(!ctrls->tile_group_entry))
434 		return -EINVAL;
435 
436 	ctrls->frame = hantro_get_ctrl(ctx, V4L2_CID_STATELESS_AV1_FRAME);
437 	if (WARN_ON(!ctrls->frame))
438 		return -EINVAL;
439 
440 	ctrls->film_grain =
441 	    hantro_get_ctrl(ctx, V4L2_CID_STATELESS_AV1_FILM_GRAIN);
442 
443 	return rockchip_vpu981_av1_dec_tiles_reallocate(ctx);
444 }
445 
rockchip_vpu981_av1_dec_get_msb(u32 n)446 static inline int rockchip_vpu981_av1_dec_get_msb(u32 n)
447 {
448 	if (n == 0)
449 		return 0;
450 	return 31 ^ __builtin_clz(n);
451 }
452 
rockchip_vpu981_av1_dec_resolve_divisor_32(u32 d,short * shift)453 static short rockchip_vpu981_av1_dec_resolve_divisor_32(u32 d, short *shift)
454 {
455 	int f;
456 	u64 e;
457 
458 	*shift = rockchip_vpu981_av1_dec_get_msb(d);
459 	/* e is obtained from D after resetting the most significant 1 bit. */
460 	e = d - ((u32)1 << *shift);
461 	/* Get the most significant DIV_LUT_BITS (8) bits of e into f */
462 	if (*shift > DIV_LUT_BITS)
463 		f = AV1_DIV_ROUND_UP_POW2(e, *shift - DIV_LUT_BITS);
464 	else
465 		f = e << (DIV_LUT_BITS - *shift);
466 	if (f > DIV_LUT_NUM)
467 		return -1;
468 	*shift += DIV_LUT_PREC_BITS;
469 	/* Use f as lookup into the precomputed table of multipliers */
470 	return div_lut[f];
471 }
472 
473 static void
rockchip_vpu981_av1_dec_get_shear_params(const u32 * params,s64 * alpha,s64 * beta,s64 * gamma,s64 * delta)474 rockchip_vpu981_av1_dec_get_shear_params(const u32 *params, s64 *alpha,
475 					 s64 *beta, s64 *gamma, s64 *delta)
476 {
477 	const int *mat = params;
478 	short shift;
479 	short y;
480 	long long gv, dv;
481 
482 	if (mat[2] <= 0)
483 		return;
484 
485 	*alpha = clamp_val(mat[2] - (1 << WARPEDMODEL_PREC_BITS), S16_MIN, S16_MAX);
486 	*beta = clamp_val(mat[3], S16_MIN, S16_MAX);
487 
488 	y = rockchip_vpu981_av1_dec_resolve_divisor_32(abs(mat[2]), &shift) * (mat[2] < 0 ? -1 : 1);
489 
490 	gv = ((long long)mat[4] * (1 << WARPEDMODEL_PREC_BITS)) * y;
491 
492 	*gamma = clamp_val((int)AV1_DIV_ROUND_UP_POW2_SIGNED(gv, shift), S16_MIN, S16_MAX);
493 
494 	dv = ((long long)mat[3] * mat[4]) * y;
495 	*delta = clamp_val(mat[5] -
496 		(int)AV1_DIV_ROUND_UP_POW2_SIGNED(dv, shift) - (1 << WARPEDMODEL_PREC_BITS),
497 		S16_MIN, S16_MAX);
498 
499 	*alpha = AV1_DIV_ROUND_UP_POW2_SIGNED(*alpha, WARP_PARAM_REDUCE_BITS)
500 		 * (1 << WARP_PARAM_REDUCE_BITS);
501 	*beta = AV1_DIV_ROUND_UP_POW2_SIGNED(*beta, WARP_PARAM_REDUCE_BITS)
502 		* (1 << WARP_PARAM_REDUCE_BITS);
503 	*gamma = AV1_DIV_ROUND_UP_POW2_SIGNED(*gamma, WARP_PARAM_REDUCE_BITS)
504 		 * (1 << WARP_PARAM_REDUCE_BITS);
505 	*delta = AV1_DIV_ROUND_UP_POW2_SIGNED(*delta, WARP_PARAM_REDUCE_BITS)
506 		* (1 << WARP_PARAM_REDUCE_BITS);
507 }
508 
rockchip_vpu981_av1_dec_set_global_model(struct hantro_ctx * ctx)509 static void rockchip_vpu981_av1_dec_set_global_model(struct hantro_ctx *ctx)
510 {
511 	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
512 	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
513 	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
514 	const struct v4l2_av1_global_motion *gm = &frame->global_motion;
515 	u8 *dst = av1_dec->global_model.cpu;
516 	struct hantro_dev *vpu = ctx->dev;
517 	int ref_frame, i;
518 
519 	memset(dst, 0, GLOBAL_MODEL_SIZE);
520 	for (ref_frame = 0; ref_frame < V4L2_AV1_REFS_PER_FRAME; ++ref_frame) {
521 		s64 alpha = 0, beta = 0, gamma = 0, delta = 0;
522 
523 		for (i = 0; i < 6; ++i) {
524 			if (i == 2)
525 				*(s32 *)dst =
526 					gm->params[V4L2_AV1_REF_LAST_FRAME + ref_frame][3];
527 			else if (i == 3)
528 				*(s32 *)dst =
529 					gm->params[V4L2_AV1_REF_LAST_FRAME + ref_frame][2];
530 			else
531 				*(s32 *)dst =
532 					gm->params[V4L2_AV1_REF_LAST_FRAME + ref_frame][i];
533 			dst += 4;
534 		}
535 
536 		if (gm->type[V4L2_AV1_REF_LAST_FRAME + ref_frame] <= V4L2_AV1_WARP_MODEL_AFFINE)
537 			rockchip_vpu981_av1_dec_get_shear_params(&gm->params[V4L2_AV1_REF_LAST_FRAME + ref_frame][0],
538 								 &alpha, &beta, &gamma, &delta);
539 
540 		*(s16 *)dst = alpha;
541 		dst += 2;
542 		*(s16 *)dst = beta;
543 		dst += 2;
544 		*(s16 *)dst = gamma;
545 		dst += 2;
546 		*(s16 *)dst = delta;
547 		dst += 2;
548 	}
549 
550 	hantro_write_addr(vpu, AV1_GLOBAL_MODEL, av1_dec->global_model.dma);
551 }
552 
rockchip_vpu981_av1_tile_log2(int target)553 static int rockchip_vpu981_av1_tile_log2(int target)
554 {
555 	int k;
556 
557 	/*
558 	 * returns the smallest value for k such that 1 << k is greater
559 	 * than or equal to target
560 	 */
561 	for (k = 0; (1 << k) < target; k++);
562 
563 	return k;
564 }
565 
rockchip_vpu981_av1_dec_set_tile_info(struct hantro_ctx * ctx)566 static void rockchip_vpu981_av1_dec_set_tile_info(struct hantro_ctx *ctx)
567 {
568 	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
569 	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
570 	const struct v4l2_av1_tile_info *tile_info = &ctrls->frame->tile_info;
571 	const struct v4l2_ctrl_av1_tile_group_entry *group_entry =
572 	    ctrls->tile_group_entry;
573 	int context_update_y =
574 	    tile_info->context_update_tile_id / tile_info->tile_cols;
575 	int context_update_x =
576 	    tile_info->context_update_tile_id % tile_info->tile_cols;
577 	int context_update_tile_id =
578 	    context_update_x * tile_info->tile_rows + context_update_y;
579 	u8 *dst = av1_dec->tile_info.cpu;
580 	struct hantro_dev *vpu = ctx->dev;
581 	int tile0, tile1;
582 
583 	memset(dst, 0, av1_dec->tile_info.size);
584 
585 	for (tile0 = 0; tile0 < tile_info->tile_cols; tile0++) {
586 		for (tile1 = 0; tile1 < tile_info->tile_rows; tile1++) {
587 			int tile_id = tile1 * tile_info->tile_cols + tile0;
588 			u32 start, end;
589 			u32 y0 =
590 			    tile_info->height_in_sbs_minus_1[tile1] + 1;
591 			u32 x0 = tile_info->width_in_sbs_minus_1[tile0] + 1;
592 
593 			/* tile size in SB units (width,height) */
594 			*dst++ = x0;
595 			*dst++ = 0;
596 			*dst++ = 0;
597 			*dst++ = 0;
598 			*dst++ = y0;
599 			*dst++ = 0;
600 			*dst++ = 0;
601 			*dst++ = 0;
602 
603 			/* tile start position */
604 			start = group_entry[tile_id].tile_offset - group_entry[0].tile_offset;
605 			*dst++ = start & 255;
606 			*dst++ = (start >> 8) & 255;
607 			*dst++ = (start >> 16) & 255;
608 			*dst++ = (start >> 24) & 255;
609 
610 			/* number of bytes in tile data */
611 			end = start + group_entry[tile_id].tile_size;
612 			*dst++ = end & 255;
613 			*dst++ = (end >> 8) & 255;
614 			*dst++ = (end >> 16) & 255;
615 			*dst++ = (end >> 24) & 255;
616 		}
617 	}
618 
619 	hantro_reg_write(vpu, &av1_multicore_expect_context_update, !!(context_update_x == 0));
620 	hantro_reg_write(vpu, &av1_tile_enable,
621 			 !!((tile_info->tile_cols > 1) || (tile_info->tile_rows > 1)));
622 	hantro_reg_write(vpu, &av1_num_tile_cols_8k, tile_info->tile_cols);
623 	hantro_reg_write(vpu, &av1_num_tile_rows_8k, tile_info->tile_rows);
624 	hantro_reg_write(vpu, &av1_context_update_tile_id, context_update_tile_id);
625 	hantro_reg_write(vpu, &av1_tile_transpose, 1);
626 	if (rockchip_vpu981_av1_tile_log2(tile_info->tile_cols) ||
627 	    rockchip_vpu981_av1_tile_log2(tile_info->tile_rows))
628 		hantro_reg_write(vpu, &av1_dec_tile_size_mag, tile_info->tile_size_bytes - 1);
629 	else
630 		hantro_reg_write(vpu, &av1_dec_tile_size_mag, 3);
631 
632 	hantro_write_addr(vpu, AV1_TILE_BASE, av1_dec->tile_info.dma);
633 }
634 
rockchip_vpu981_av1_dec_get_dist(struct hantro_ctx * ctx,int a,int b)635 static int rockchip_vpu981_av1_dec_get_dist(struct hantro_ctx *ctx,
636 					    int a, int b)
637 {
638 	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
639 	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
640 	int bits = ctrls->sequence->order_hint_bits - 1;
641 	int diff, m;
642 
643 	if (!ctrls->sequence->order_hint_bits)
644 		return 0;
645 
646 	diff = a - b;
647 	m = 1 << bits;
648 	diff = (diff & (m - 1)) - (diff & m);
649 
650 	return diff;
651 }
652 
rockchip_vpu981_av1_dec_set_frame_sign_bias(struct hantro_ctx * ctx)653 static void rockchip_vpu981_av1_dec_set_frame_sign_bias(struct hantro_ctx *ctx)
654 {
655 	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
656 	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
657 	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
658 	const struct v4l2_ctrl_av1_sequence *sequence = ctrls->sequence;
659 	int i;
660 
661 	if (!sequence->order_hint_bits || IS_INTRA(frame->frame_type)) {
662 		for (i = 0; i < V4L2_AV1_TOTAL_REFS_PER_FRAME; i++)
663 			av1_dec->ref_frame_sign_bias[i] = 0;
664 
665 		return;
666 	}
667 	// Identify the nearest forward and backward references.
668 	for (i = 0; i < V4L2_AV1_TOTAL_REFS_PER_FRAME - 1; i++) {
669 		if (rockchip_vpu981_get_frame_index(ctx, i) >= 0) {
670 			int rel_off =
671 			    rockchip_vpu981_av1_dec_get_dist(ctx,
672 							     rockchip_vpu981_get_order_hint(ctx, i),
673 							     frame->order_hint);
674 			av1_dec->ref_frame_sign_bias[i + 1] = (rel_off <= 0) ? 0 : 1;
675 		}
676 	}
677 }
678 
679 static bool
rockchip_vpu981_av1_dec_set_ref(struct hantro_ctx * ctx,int ref,int idx,int width,int height)680 rockchip_vpu981_av1_dec_set_ref(struct hantro_ctx *ctx, int ref, int idx,
681 				int width, int height)
682 {
683 	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
684 	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
685 	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
686 	struct hantro_dev *vpu = ctx->dev;
687 	struct hantro_decoded_buffer *dst;
688 	dma_addr_t luma_addr, chroma_addr, mv_addr = 0;
689 	int cur_width = frame->frame_width_minus_1 + 1;
690 	int cur_height = frame->frame_height_minus_1 + 1;
691 	int scale_width =
692 	    ((width << AV1_REF_SCALE_SHIFT) + cur_width / 2) / cur_width;
693 	int scale_height =
694 	    ((height << AV1_REF_SCALE_SHIFT) + cur_height / 2) / cur_height;
695 
696 	switch (ref) {
697 	case 0:
698 		hantro_reg_write(vpu, &av1_ref0_height, height);
699 		hantro_reg_write(vpu, &av1_ref0_width, width);
700 		hantro_reg_write(vpu, &av1_ref0_ver_scale, scale_width);
701 		hantro_reg_write(vpu, &av1_ref0_hor_scale, scale_height);
702 		break;
703 	case 1:
704 		hantro_reg_write(vpu, &av1_ref1_height, height);
705 		hantro_reg_write(vpu, &av1_ref1_width, width);
706 		hantro_reg_write(vpu, &av1_ref1_ver_scale, scale_width);
707 		hantro_reg_write(vpu, &av1_ref1_hor_scale, scale_height);
708 		break;
709 	case 2:
710 		hantro_reg_write(vpu, &av1_ref2_height, height);
711 		hantro_reg_write(vpu, &av1_ref2_width, width);
712 		hantro_reg_write(vpu, &av1_ref2_ver_scale, scale_width);
713 		hantro_reg_write(vpu, &av1_ref2_hor_scale, scale_height);
714 		break;
715 	case 3:
716 		hantro_reg_write(vpu, &av1_ref3_height, height);
717 		hantro_reg_write(vpu, &av1_ref3_width, width);
718 		hantro_reg_write(vpu, &av1_ref3_ver_scale, scale_width);
719 		hantro_reg_write(vpu, &av1_ref3_hor_scale, scale_height);
720 		break;
721 	case 4:
722 		hantro_reg_write(vpu, &av1_ref4_height, height);
723 		hantro_reg_write(vpu, &av1_ref4_width, width);
724 		hantro_reg_write(vpu, &av1_ref4_ver_scale, scale_width);
725 		hantro_reg_write(vpu, &av1_ref4_hor_scale, scale_height);
726 		break;
727 	case 5:
728 		hantro_reg_write(vpu, &av1_ref5_height, height);
729 		hantro_reg_write(vpu, &av1_ref5_width, width);
730 		hantro_reg_write(vpu, &av1_ref5_ver_scale, scale_width);
731 		hantro_reg_write(vpu, &av1_ref5_hor_scale, scale_height);
732 		break;
733 	case 6:
734 		hantro_reg_write(vpu, &av1_ref6_height, height);
735 		hantro_reg_write(vpu, &av1_ref6_width, width);
736 		hantro_reg_write(vpu, &av1_ref6_ver_scale, scale_width);
737 		hantro_reg_write(vpu, &av1_ref6_hor_scale, scale_height);
738 		break;
739 	default:
740 		pr_warn("AV1 invalid reference frame index\n");
741 	}
742 
743 	dst = vb2_to_hantro_decoded_buf(&av1_dec->frame_refs[idx].vb2_ref->vb2_buf);
744 	luma_addr = hantro_get_dec_buf_addr(ctx, &dst->base.vb.vb2_buf);
745 	chroma_addr = luma_addr + dst->av1.chroma_offset;
746 	mv_addr = luma_addr + dst->av1.mv_offset;
747 
748 	hantro_write_addr(vpu, AV1_REFERENCE_Y(ref), luma_addr);
749 	hantro_write_addr(vpu, AV1_REFERENCE_CB(ref), chroma_addr);
750 	hantro_write_addr(vpu, AV1_REFERENCE_MV(ref), mv_addr);
751 
752 	return (scale_width != (1 << AV1_REF_SCALE_SHIFT)) ||
753 		(scale_height != (1 << AV1_REF_SCALE_SHIFT));
754 }
755 
rockchip_vpu981_av1_dec_set_sign_bias(struct hantro_ctx * ctx,int ref,int val)756 static void rockchip_vpu981_av1_dec_set_sign_bias(struct hantro_ctx *ctx,
757 						  int ref, int val)
758 {
759 	struct hantro_dev *vpu = ctx->dev;
760 
761 	switch (ref) {
762 	case 0:
763 		hantro_reg_write(vpu, &av1_ref0_sign_bias, val);
764 		break;
765 	case 1:
766 		hantro_reg_write(vpu, &av1_ref1_sign_bias, val);
767 		break;
768 	case 2:
769 		hantro_reg_write(vpu, &av1_ref2_sign_bias, val);
770 		break;
771 	case 3:
772 		hantro_reg_write(vpu, &av1_ref3_sign_bias, val);
773 		break;
774 	case 4:
775 		hantro_reg_write(vpu, &av1_ref4_sign_bias, val);
776 		break;
777 	case 5:
778 		hantro_reg_write(vpu, &av1_ref5_sign_bias, val);
779 		break;
780 	case 6:
781 		hantro_reg_write(vpu, &av1_ref6_sign_bias, val);
782 		break;
783 	default:
784 		pr_warn("AV1 invalid sign bias index\n");
785 		break;
786 	}
787 }
788 
rockchip_vpu981_av1_dec_set_segmentation(struct hantro_ctx * ctx)789 static void rockchip_vpu981_av1_dec_set_segmentation(struct hantro_ctx *ctx)
790 {
791 	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
792 	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
793 	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
794 	const struct v4l2_av1_segmentation *seg = &frame->segmentation;
795 	u32 segval[V4L2_AV1_MAX_SEGMENTS][V4L2_AV1_SEG_LVL_MAX] = { 0 };
796 	struct hantro_dev *vpu = ctx->dev;
797 	u8 segsign = 0, preskip_segid = 0, last_active_seg = 0, i, j;
798 
799 	if (!!(seg->flags & V4L2_AV1_SEGMENTATION_FLAG_ENABLED) &&
800 	    frame->primary_ref_frame < V4L2_AV1_REFS_PER_FRAME) {
801 		int idx = rockchip_vpu981_get_frame_index(ctx, frame->primary_ref_frame);
802 
803 		if (idx >= 0) {
804 			dma_addr_t luma_addr, mv_addr = 0;
805 			struct hantro_decoded_buffer *seg;
806 			size_t mv_offset = rockchip_vpu981_av1_dec_chroma_size(ctx);
807 
808 			seg = vb2_to_hantro_decoded_buf(&av1_dec->frame_refs[idx].vb2_ref->vb2_buf);
809 			luma_addr = hantro_get_dec_buf_addr(ctx, &seg->base.vb.vb2_buf);
810 			mv_addr = luma_addr + mv_offset;
811 
812 			hantro_write_addr(vpu, AV1_SEGMENTATION, mv_addr);
813 			hantro_reg_write(vpu, &av1_use_temporal3_mvs, 1);
814 		}
815 	}
816 
817 	hantro_reg_write(vpu, &av1_segment_temp_upd_e,
818 			 !!(seg->flags & V4L2_AV1_SEGMENTATION_FLAG_TEMPORAL_UPDATE));
819 	hantro_reg_write(vpu, &av1_segment_upd_e,
820 			 !!(seg->flags & V4L2_AV1_SEGMENTATION_FLAG_UPDATE_MAP));
821 	hantro_reg_write(vpu, &av1_segment_e,
822 			 !!(seg->flags & V4L2_AV1_SEGMENTATION_FLAG_ENABLED));
823 
824 	hantro_reg_write(vpu, &av1_error_resilient,
825 			 !!(frame->flags & V4L2_AV1_FRAME_FLAG_ERROR_RESILIENT_MODE));
826 
827 	if (IS_INTRA(frame->frame_type) ||
828 	    !!(frame->flags & V4L2_AV1_FRAME_FLAG_ERROR_RESILIENT_MODE)) {
829 		hantro_reg_write(vpu, &av1_use_temporal3_mvs, 0);
830 	}
831 
832 	if (seg->flags & V4L2_AV1_SEGMENTATION_FLAG_ENABLED) {
833 		int s;
834 
835 		for (s = 0; s < V4L2_AV1_MAX_SEGMENTS; s++) {
836 			if (seg->feature_enabled[s] &
837 			    V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_ALT_Q)) {
838 				segval[s][V4L2_AV1_SEG_LVL_ALT_Q] =
839 				    clamp(abs(seg->feature_data[s][V4L2_AV1_SEG_LVL_ALT_Q]),
840 					  0, 255);
841 				segsign |=
842 					(seg->feature_data[s][V4L2_AV1_SEG_LVL_ALT_Q] < 0) << s;
843 			}
844 
845 			if (seg->feature_enabled[s] &
846 			    V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_ALT_LF_Y_V))
847 				segval[s][V4L2_AV1_SEG_LVL_ALT_LF_Y_V] =
848 					clamp(abs(seg->feature_data[s][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]),
849 					      -63, 63);
850 
851 			if (seg->feature_enabled[s] &
852 			    V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_ALT_LF_Y_H))
853 				segval[s][V4L2_AV1_SEG_LVL_ALT_LF_Y_H] =
854 				    clamp(abs(seg->feature_data[s][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]),
855 					  -63, 63);
856 
857 			if (seg->feature_enabled[s] &
858 			    V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_ALT_LF_U))
859 				segval[s][V4L2_AV1_SEG_LVL_ALT_LF_U] =
860 				    clamp(abs(seg->feature_data[s][V4L2_AV1_SEG_LVL_ALT_LF_U]),
861 					  -63, 63);
862 
863 			if (seg->feature_enabled[s] &
864 			    V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_ALT_LF_V))
865 				segval[s][V4L2_AV1_SEG_LVL_ALT_LF_V] =
866 				    clamp(abs(seg->feature_data[s][V4L2_AV1_SEG_LVL_ALT_LF_V]),
867 					  -63, 63);
868 
869 			if (frame->frame_type && seg->feature_enabled[s] &
870 			    V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_REF_FRAME))
871 				segval[s][V4L2_AV1_SEG_LVL_REF_FRAME]++;
872 
873 			if (seg->feature_enabled[s] &
874 			    V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_REF_SKIP))
875 				segval[s][V4L2_AV1_SEG_LVL_REF_SKIP] = 1;
876 
877 			if (seg->feature_enabled[s] &
878 			    V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_REF_GLOBALMV))
879 				segval[s][V4L2_AV1_SEG_LVL_REF_GLOBALMV] = 1;
880 		}
881 	}
882 
883 	for (i = 0; i < V4L2_AV1_MAX_SEGMENTS; i++) {
884 		for (j = 0; j < V4L2_AV1_SEG_LVL_MAX; j++) {
885 			if (seg->feature_enabled[i]
886 			    & V4L2_AV1_SEGMENT_FEATURE_ENABLED(j)) {
887 				preskip_segid |= (j >= V4L2_AV1_SEG_LVL_REF_FRAME);
888 				last_active_seg = max(i, last_active_seg);
889 			}
890 		}
891 	}
892 
893 	hantro_reg_write(vpu, &av1_last_active_seg, last_active_seg);
894 	hantro_reg_write(vpu, &av1_preskip_segid, preskip_segid);
895 
896 	hantro_reg_write(vpu, &av1_seg_quant_sign, segsign);
897 
898 	/* Write QP, filter level, ref frame and skip for every segment */
899 	hantro_reg_write(vpu, &av1_quant_seg0,
900 			 segval[0][V4L2_AV1_SEG_LVL_ALT_Q]);
901 	hantro_reg_write(vpu, &av1_filt_level_delta0_seg0,
902 			 segval[0][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
903 	hantro_reg_write(vpu, &av1_filt_level_delta1_seg0,
904 			 segval[0][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
905 	hantro_reg_write(vpu, &av1_filt_level_delta2_seg0,
906 			 segval[0][V4L2_AV1_SEG_LVL_ALT_LF_U]);
907 	hantro_reg_write(vpu, &av1_filt_level_delta3_seg0,
908 			 segval[0][V4L2_AV1_SEG_LVL_ALT_LF_V]);
909 	hantro_reg_write(vpu, &av1_refpic_seg0,
910 			 segval[0][V4L2_AV1_SEG_LVL_REF_FRAME]);
911 	hantro_reg_write(vpu, &av1_skip_seg0,
912 			 segval[0][V4L2_AV1_SEG_LVL_REF_SKIP]);
913 	hantro_reg_write(vpu, &av1_global_mv_seg0,
914 			 segval[0][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
915 
916 	hantro_reg_write(vpu, &av1_quant_seg1,
917 			 segval[1][V4L2_AV1_SEG_LVL_ALT_Q]);
918 	hantro_reg_write(vpu, &av1_filt_level_delta0_seg1,
919 			 segval[1][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
920 	hantro_reg_write(vpu, &av1_filt_level_delta1_seg1,
921 			 segval[1][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
922 	hantro_reg_write(vpu, &av1_filt_level_delta2_seg1,
923 			 segval[1][V4L2_AV1_SEG_LVL_ALT_LF_U]);
924 	hantro_reg_write(vpu, &av1_filt_level_delta3_seg1,
925 			 segval[1][V4L2_AV1_SEG_LVL_ALT_LF_V]);
926 	hantro_reg_write(vpu, &av1_refpic_seg1,
927 			 segval[1][V4L2_AV1_SEG_LVL_REF_FRAME]);
928 	hantro_reg_write(vpu, &av1_skip_seg1,
929 			 segval[1][V4L2_AV1_SEG_LVL_REF_SKIP]);
930 	hantro_reg_write(vpu, &av1_global_mv_seg1,
931 			 segval[1][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
932 
933 	hantro_reg_write(vpu, &av1_quant_seg2,
934 			 segval[2][V4L2_AV1_SEG_LVL_ALT_Q]);
935 	hantro_reg_write(vpu, &av1_filt_level_delta0_seg2,
936 			 segval[2][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
937 	hantro_reg_write(vpu, &av1_filt_level_delta1_seg2,
938 			 segval[2][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
939 	hantro_reg_write(vpu, &av1_filt_level_delta2_seg2,
940 			 segval[2][V4L2_AV1_SEG_LVL_ALT_LF_U]);
941 	hantro_reg_write(vpu, &av1_filt_level_delta3_seg2,
942 			 segval[2][V4L2_AV1_SEG_LVL_ALT_LF_V]);
943 	hantro_reg_write(vpu, &av1_refpic_seg2,
944 			 segval[2][V4L2_AV1_SEG_LVL_REF_FRAME]);
945 	hantro_reg_write(vpu, &av1_skip_seg2,
946 			 segval[2][V4L2_AV1_SEG_LVL_REF_SKIP]);
947 	hantro_reg_write(vpu, &av1_global_mv_seg2,
948 			 segval[2][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
949 
950 	hantro_reg_write(vpu, &av1_quant_seg3,
951 			 segval[3][V4L2_AV1_SEG_LVL_ALT_Q]);
952 	hantro_reg_write(vpu, &av1_filt_level_delta0_seg3,
953 			 segval[3][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
954 	hantro_reg_write(vpu, &av1_filt_level_delta1_seg3,
955 			 segval[3][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
956 	hantro_reg_write(vpu, &av1_filt_level_delta2_seg3,
957 			 segval[3][V4L2_AV1_SEG_LVL_ALT_LF_U]);
958 	hantro_reg_write(vpu, &av1_filt_level_delta3_seg3,
959 			 segval[3][V4L2_AV1_SEG_LVL_ALT_LF_V]);
960 	hantro_reg_write(vpu, &av1_refpic_seg3,
961 			 segval[3][V4L2_AV1_SEG_LVL_REF_FRAME]);
962 	hantro_reg_write(vpu, &av1_skip_seg3,
963 			 segval[3][V4L2_AV1_SEG_LVL_REF_SKIP]);
964 	hantro_reg_write(vpu, &av1_global_mv_seg3,
965 			 segval[3][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
966 
967 	hantro_reg_write(vpu, &av1_quant_seg4,
968 			 segval[4][V4L2_AV1_SEG_LVL_ALT_Q]);
969 	hantro_reg_write(vpu, &av1_filt_level_delta0_seg4,
970 			 segval[4][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
971 	hantro_reg_write(vpu, &av1_filt_level_delta1_seg4,
972 			 segval[4][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
973 	hantro_reg_write(vpu, &av1_filt_level_delta2_seg4,
974 			 segval[4][V4L2_AV1_SEG_LVL_ALT_LF_U]);
975 	hantro_reg_write(vpu, &av1_filt_level_delta3_seg4,
976 			 segval[4][V4L2_AV1_SEG_LVL_ALT_LF_V]);
977 	hantro_reg_write(vpu, &av1_refpic_seg4,
978 			 segval[4][V4L2_AV1_SEG_LVL_REF_FRAME]);
979 	hantro_reg_write(vpu, &av1_skip_seg4,
980 			 segval[4][V4L2_AV1_SEG_LVL_REF_SKIP]);
981 	hantro_reg_write(vpu, &av1_global_mv_seg4,
982 			 segval[4][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
983 
984 	hantro_reg_write(vpu, &av1_quant_seg5,
985 			 segval[5][V4L2_AV1_SEG_LVL_ALT_Q]);
986 	hantro_reg_write(vpu, &av1_filt_level_delta0_seg5,
987 			 segval[5][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
988 	hantro_reg_write(vpu, &av1_filt_level_delta1_seg5,
989 			 segval[5][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
990 	hantro_reg_write(vpu, &av1_filt_level_delta2_seg5,
991 			 segval[5][V4L2_AV1_SEG_LVL_ALT_LF_U]);
992 	hantro_reg_write(vpu, &av1_filt_level_delta3_seg5,
993 			 segval[5][V4L2_AV1_SEG_LVL_ALT_LF_V]);
994 	hantro_reg_write(vpu, &av1_refpic_seg5,
995 			 segval[5][V4L2_AV1_SEG_LVL_REF_FRAME]);
996 	hantro_reg_write(vpu, &av1_skip_seg5,
997 			 segval[5][V4L2_AV1_SEG_LVL_REF_SKIP]);
998 	hantro_reg_write(vpu, &av1_global_mv_seg5,
999 			 segval[5][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
1000 
1001 	hantro_reg_write(vpu, &av1_quant_seg6,
1002 			 segval[6][V4L2_AV1_SEG_LVL_ALT_Q]);
1003 	hantro_reg_write(vpu, &av1_filt_level_delta0_seg6,
1004 			 segval[6][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
1005 	hantro_reg_write(vpu, &av1_filt_level_delta1_seg6,
1006 			 segval[6][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
1007 	hantro_reg_write(vpu, &av1_filt_level_delta2_seg6,
1008 			 segval[6][V4L2_AV1_SEG_LVL_ALT_LF_U]);
1009 	hantro_reg_write(vpu, &av1_filt_level_delta3_seg6,
1010 			 segval[6][V4L2_AV1_SEG_LVL_ALT_LF_V]);
1011 	hantro_reg_write(vpu, &av1_refpic_seg6,
1012 			 segval[6][V4L2_AV1_SEG_LVL_REF_FRAME]);
1013 	hantro_reg_write(vpu, &av1_skip_seg6,
1014 			 segval[6][V4L2_AV1_SEG_LVL_REF_SKIP]);
1015 	hantro_reg_write(vpu, &av1_global_mv_seg6,
1016 			 segval[6][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
1017 
1018 	hantro_reg_write(vpu, &av1_quant_seg7,
1019 			 segval[7][V4L2_AV1_SEG_LVL_ALT_Q]);
1020 	hantro_reg_write(vpu, &av1_filt_level_delta0_seg7,
1021 			 segval[7][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
1022 	hantro_reg_write(vpu, &av1_filt_level_delta1_seg7,
1023 			 segval[7][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
1024 	hantro_reg_write(vpu, &av1_filt_level_delta2_seg7,
1025 			 segval[7][V4L2_AV1_SEG_LVL_ALT_LF_U]);
1026 	hantro_reg_write(vpu, &av1_filt_level_delta3_seg7,
1027 			 segval[7][V4L2_AV1_SEG_LVL_ALT_LF_V]);
1028 	hantro_reg_write(vpu, &av1_refpic_seg7,
1029 			 segval[7][V4L2_AV1_SEG_LVL_REF_FRAME]);
1030 	hantro_reg_write(vpu, &av1_skip_seg7,
1031 			 segval[7][V4L2_AV1_SEG_LVL_REF_SKIP]);
1032 	hantro_reg_write(vpu, &av1_global_mv_seg7,
1033 			 segval[7][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
1034 }
1035 
rockchip_vpu981_av1_dec_is_lossless(struct hantro_ctx * ctx)1036 static bool rockchip_vpu981_av1_dec_is_lossless(struct hantro_ctx *ctx)
1037 {
1038 	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
1039 	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
1040 	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
1041 	const struct v4l2_av1_segmentation *segmentation = &frame->segmentation;
1042 	const struct v4l2_av1_quantization *quantization = &frame->quantization;
1043 	int i;
1044 
1045 	for (i = 0; i < V4L2_AV1_MAX_SEGMENTS; i++) {
1046 		int qindex = quantization->base_q_idx;
1047 
1048 		if (segmentation->feature_enabled[i] &
1049 		    V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_ALT_Q)) {
1050 			qindex += segmentation->feature_data[i][V4L2_AV1_SEG_LVL_ALT_Q];
1051 		}
1052 		qindex = clamp(qindex, 0, 255);
1053 
1054 		if (qindex ||
1055 		    quantization->delta_q_y_dc ||
1056 		    quantization->delta_q_u_dc ||
1057 		    quantization->delta_q_u_ac ||
1058 		    quantization->delta_q_v_dc ||
1059 		    quantization->delta_q_v_ac)
1060 			return false;
1061 	}
1062 	return true;
1063 }
1064 
rockchip_vpu981_av1_dec_set_loopfilter(struct hantro_ctx * ctx)1065 static void rockchip_vpu981_av1_dec_set_loopfilter(struct hantro_ctx *ctx)
1066 {
1067 	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
1068 	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
1069 	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
1070 	const struct v4l2_av1_loop_filter *loop_filter = &frame->loop_filter;
1071 	bool filtering_dis = (loop_filter->level[0] == 0) && (loop_filter->level[1] == 0);
1072 	struct hantro_dev *vpu = ctx->dev;
1073 
1074 	hantro_reg_write(vpu, &av1_filtering_dis, filtering_dis);
1075 	hantro_reg_write(vpu, &av1_filt_level_base_gt32, loop_filter->level[0] > 32);
1076 	hantro_reg_write(vpu, &av1_filt_sharpness, loop_filter->sharpness);
1077 
1078 	hantro_reg_write(vpu, &av1_filt_level0, loop_filter->level[0]);
1079 	hantro_reg_write(vpu, &av1_filt_level1, loop_filter->level[1]);
1080 	hantro_reg_write(vpu, &av1_filt_level2, loop_filter->level[2]);
1081 	hantro_reg_write(vpu, &av1_filt_level3, loop_filter->level[3]);
1082 
1083 	if (loop_filter->flags & V4L2_AV1_LOOP_FILTER_FLAG_DELTA_ENABLED &&
1084 	    !rockchip_vpu981_av1_dec_is_lossless(ctx) &&
1085 	    !(frame->flags & V4L2_AV1_FRAME_FLAG_ALLOW_INTRABC)) {
1086 		hantro_reg_write(vpu, &av1_filt_ref_adj_0,
1087 				 loop_filter->ref_deltas[0]);
1088 		hantro_reg_write(vpu, &av1_filt_ref_adj_1,
1089 				 loop_filter->ref_deltas[1]);
1090 		hantro_reg_write(vpu, &av1_filt_ref_adj_2,
1091 				 loop_filter->ref_deltas[2]);
1092 		hantro_reg_write(vpu, &av1_filt_ref_adj_3,
1093 				 loop_filter->ref_deltas[3]);
1094 		hantro_reg_write(vpu, &av1_filt_ref_adj_4,
1095 				 loop_filter->ref_deltas[4]);
1096 		hantro_reg_write(vpu, &av1_filt_ref_adj_5,
1097 				 loop_filter->ref_deltas[5]);
1098 		hantro_reg_write(vpu, &av1_filt_ref_adj_6,
1099 				 loop_filter->ref_deltas[6]);
1100 		hantro_reg_write(vpu, &av1_filt_ref_adj_7,
1101 				 loop_filter->ref_deltas[7]);
1102 		hantro_reg_write(vpu, &av1_filt_mb_adj_0,
1103 				 loop_filter->mode_deltas[0]);
1104 		hantro_reg_write(vpu, &av1_filt_mb_adj_1,
1105 				 loop_filter->mode_deltas[1]);
1106 	} else {
1107 		hantro_reg_write(vpu, &av1_filt_ref_adj_0, 0);
1108 		hantro_reg_write(vpu, &av1_filt_ref_adj_1, 0);
1109 		hantro_reg_write(vpu, &av1_filt_ref_adj_2, 0);
1110 		hantro_reg_write(vpu, &av1_filt_ref_adj_3, 0);
1111 		hantro_reg_write(vpu, &av1_filt_ref_adj_4, 0);
1112 		hantro_reg_write(vpu, &av1_filt_ref_adj_5, 0);
1113 		hantro_reg_write(vpu, &av1_filt_ref_adj_6, 0);
1114 		hantro_reg_write(vpu, &av1_filt_ref_adj_7, 0);
1115 		hantro_reg_write(vpu, &av1_filt_mb_adj_0, 0);
1116 		hantro_reg_write(vpu, &av1_filt_mb_adj_1, 0);
1117 	}
1118 
1119 	hantro_write_addr(vpu, AV1_DB_DATA_COL, av1_dec->db_data_col.dma);
1120 	hantro_write_addr(vpu, AV1_DB_CTRL_COL, av1_dec->db_ctrl_col.dma);
1121 }
1122 
rockchip_vpu981_av1_dec_update_prob(struct hantro_ctx * ctx)1123 static void rockchip_vpu981_av1_dec_update_prob(struct hantro_ctx *ctx)
1124 {
1125 	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
1126 	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
1127 	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
1128 	bool frame_is_intra = IS_INTRA(frame->frame_type);
1129 	struct av1cdfs *out_cdfs = (struct av1cdfs *)av1_dec->prob_tbl_out.cpu;
1130 	int i;
1131 
1132 	if (frame->flags & V4L2_AV1_FRAME_FLAG_DISABLE_FRAME_END_UPDATE_CDF)
1133 		return;
1134 
1135 	for (i = 0; i < NUM_REF_FRAMES; i++) {
1136 		if (frame->refresh_frame_flags & BIT(i)) {
1137 			struct mvcdfs stored_mv_cdf;
1138 
1139 			rockchip_av1_get_cdfs(ctx, i);
1140 			stored_mv_cdf = av1_dec->cdfs->mv_cdf;
1141 			*av1_dec->cdfs = *out_cdfs;
1142 			if (frame_is_intra) {
1143 				av1_dec->cdfs->mv_cdf = stored_mv_cdf;
1144 				*av1_dec->cdfs_ndvc = out_cdfs->mv_cdf;
1145 			}
1146 			rockchip_av1_store_cdfs(ctx,
1147 						frame->refresh_frame_flags);
1148 			break;
1149 		}
1150 	}
1151 }
1152 
rockchip_vpu981_av1_dec_done(struct hantro_ctx * ctx)1153 void rockchip_vpu981_av1_dec_done(struct hantro_ctx *ctx)
1154 {
1155 	rockchip_vpu981_av1_dec_update_prob(ctx);
1156 }
1157 
rockchip_vpu981_av1_dec_set_prob(struct hantro_ctx * ctx)1158 static void rockchip_vpu981_av1_dec_set_prob(struct hantro_ctx *ctx)
1159 {
1160 	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
1161 	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
1162 	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
1163 	const struct v4l2_av1_quantization *quantization = &frame->quantization;
1164 	struct hantro_dev *vpu = ctx->dev;
1165 	bool error_resilient_mode =
1166 	    !!(frame->flags & V4L2_AV1_FRAME_FLAG_ERROR_RESILIENT_MODE);
1167 	bool frame_is_intra = IS_INTRA(frame->frame_type);
1168 
1169 	if (error_resilient_mode || frame_is_intra ||
1170 	    frame->primary_ref_frame == AV1_PRIMARY_REF_NONE) {
1171 		av1_dec->cdfs = &av1_dec->default_cdfs;
1172 		av1_dec->cdfs_ndvc = &av1_dec->default_cdfs_ndvc;
1173 		rockchip_av1_default_coeff_probs(quantization->base_q_idx,
1174 						 av1_dec->cdfs);
1175 	} else {
1176 		rockchip_av1_get_cdfs(ctx, frame->ref_frame_idx[frame->primary_ref_frame]);
1177 	}
1178 	rockchip_av1_store_cdfs(ctx, frame->refresh_frame_flags);
1179 
1180 	memcpy(av1_dec->prob_tbl.cpu, av1_dec->cdfs, sizeof(struct av1cdfs));
1181 
1182 	if (frame_is_intra) {
1183 		int mv_offset = offsetof(struct av1cdfs, mv_cdf);
1184 		/* Overwrite MV context area with intrabc MV context */
1185 		memcpy(av1_dec->prob_tbl.cpu + mv_offset, av1_dec->cdfs_ndvc,
1186 		       sizeof(struct mvcdfs));
1187 	}
1188 
1189 	hantro_write_addr(vpu, AV1_PROP_TABLE_OUT, av1_dec->prob_tbl_out.dma);
1190 	hantro_write_addr(vpu, AV1_PROP_TABLE, av1_dec->prob_tbl.dma);
1191 }
1192 
1193 static void
rockchip_vpu981_av1_dec_init_scaling_function(const u8 * values,const u8 * scaling,u8 num_points,u8 * scaling_lut)1194 rockchip_vpu981_av1_dec_init_scaling_function(const u8 *values, const u8 *scaling,
1195 					      u8 num_points, u8 *scaling_lut)
1196 {
1197 	int i, point;
1198 
1199 	if (num_points == 0) {
1200 		memset(scaling_lut, 0, 256);
1201 		return;
1202 	}
1203 
1204 	for (point = 0; point < num_points - 1; point++) {
1205 		int x;
1206 		s32 delta_y = scaling[point + 1] - scaling[point];
1207 		s32 delta_x = values[point + 1] - values[point];
1208 		s64 delta =
1209 		    delta_x ? delta_y * ((65536 + (delta_x >> 1)) /
1210 					 delta_x) : 0;
1211 
1212 		for (x = 0; x < delta_x; x++) {
1213 			scaling_lut[values[point] + x] =
1214 			    scaling[point] +
1215 			    (s32)((x * delta + 32768) >> 16);
1216 		}
1217 	}
1218 
1219 	for (i = values[num_points - 1]; i < 256; i++)
1220 		scaling_lut[i] = scaling[num_points - 1];
1221 }
1222 
rockchip_vpu981_av1_dec_set_fgs(struct hantro_ctx * ctx)1223 static void rockchip_vpu981_av1_dec_set_fgs(struct hantro_ctx *ctx)
1224 {
1225 	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
1226 	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
1227 	const struct v4l2_ctrl_av1_film_grain *film_grain = ctrls->film_grain;
1228 	struct rockchip_av1_film_grain *fgmem = av1_dec->film_grain.cpu;
1229 	struct hantro_dev *vpu = ctx->dev;
1230 	bool scaling_from_luma =
1231 		!!(film_grain->flags & V4L2_AV1_FILM_GRAIN_FLAG_CHROMA_SCALING_FROM_LUMA);
1232 	s32 (*ar_coeffs_y)[24];
1233 	s32 (*ar_coeffs_cb)[25];
1234 	s32 (*ar_coeffs_cr)[25];
1235 	s32 (*luma_grain_block)[73][82];
1236 	s32 (*cb_grain_block)[38][44];
1237 	s32 (*cr_grain_block)[38][44];
1238 	s32 ar_coeff_lag, ar_coeff_shift;
1239 	s32 grain_scale_shift, bitdepth;
1240 	s32 grain_center, grain_min, grain_max;
1241 	int i, j;
1242 
1243 	hantro_reg_write(vpu, &av1_apply_grain, 0);
1244 
1245 	if (!(film_grain->flags & V4L2_AV1_FILM_GRAIN_FLAG_APPLY_GRAIN)) {
1246 		hantro_reg_write(vpu, &av1_num_y_points_b, 0);
1247 		hantro_reg_write(vpu, &av1_num_cb_points_b, 0);
1248 		hantro_reg_write(vpu, &av1_num_cr_points_b, 0);
1249 		hantro_reg_write(vpu, &av1_scaling_shift, 0);
1250 		hantro_reg_write(vpu, &av1_cb_mult, 0);
1251 		hantro_reg_write(vpu, &av1_cb_luma_mult, 0);
1252 		hantro_reg_write(vpu, &av1_cb_offset, 0);
1253 		hantro_reg_write(vpu, &av1_cr_mult, 0);
1254 		hantro_reg_write(vpu, &av1_cr_luma_mult, 0);
1255 		hantro_reg_write(vpu, &av1_cr_offset, 0);
1256 		hantro_reg_write(vpu, &av1_overlap_flag, 0);
1257 		hantro_reg_write(vpu, &av1_clip_to_restricted_range, 0);
1258 		hantro_reg_write(vpu, &av1_chroma_scaling_from_luma, 0);
1259 		hantro_reg_write(vpu, &av1_random_seed, 0);
1260 		hantro_write_addr(vpu, AV1_FILM_GRAIN, 0);
1261 		return;
1262 	}
1263 
1264 	ar_coeffs_y = kzalloc(sizeof(int32_t) * 24, GFP_KERNEL);
1265 	ar_coeffs_cb = kzalloc(sizeof(int32_t) * 25, GFP_KERNEL);
1266 	ar_coeffs_cr = kzalloc(sizeof(int32_t) * 25, GFP_KERNEL);
1267 	luma_grain_block = kzalloc(sizeof(int32_t) * 73 * 82, GFP_KERNEL);
1268 	cb_grain_block = kzalloc(sizeof(int32_t) * 38 * 44, GFP_KERNEL);
1269 	cr_grain_block = kzalloc(sizeof(int32_t) * 38 * 44, GFP_KERNEL);
1270 
1271 	if (!ar_coeffs_y || !ar_coeffs_cb || !ar_coeffs_cr ||
1272 	    !luma_grain_block || !cb_grain_block || !cr_grain_block) {
1273 		pr_warn("Fail allocating memory for film grain parameters\n");
1274 		goto alloc_fail;
1275 	}
1276 
1277 	hantro_reg_write(vpu, &av1_apply_grain, 1);
1278 
1279 	hantro_reg_write(vpu, &av1_num_y_points_b,
1280 			 film_grain->num_y_points > 0);
1281 	hantro_reg_write(vpu, &av1_num_cb_points_b,
1282 			 film_grain->num_cb_points > 0);
1283 	hantro_reg_write(vpu, &av1_num_cr_points_b,
1284 			 film_grain->num_cr_points > 0);
1285 	hantro_reg_write(vpu, &av1_scaling_shift,
1286 			 film_grain->grain_scaling_minus_8 + 8);
1287 
1288 	if (!scaling_from_luma) {
1289 		hantro_reg_write(vpu, &av1_cb_mult, film_grain->cb_mult - 128);
1290 		hantro_reg_write(vpu, &av1_cb_luma_mult, film_grain->cb_luma_mult - 128);
1291 		hantro_reg_write(vpu, &av1_cb_offset, film_grain->cb_offset - 256);
1292 		hantro_reg_write(vpu, &av1_cr_mult, film_grain->cr_mult - 128);
1293 		hantro_reg_write(vpu, &av1_cr_luma_mult, film_grain->cr_luma_mult - 128);
1294 		hantro_reg_write(vpu, &av1_cr_offset, film_grain->cr_offset - 256);
1295 	} else {
1296 		hantro_reg_write(vpu, &av1_cb_mult, 0);
1297 		hantro_reg_write(vpu, &av1_cb_luma_mult, 0);
1298 		hantro_reg_write(vpu, &av1_cb_offset, 0);
1299 		hantro_reg_write(vpu, &av1_cr_mult, 0);
1300 		hantro_reg_write(vpu, &av1_cr_luma_mult, 0);
1301 		hantro_reg_write(vpu, &av1_cr_offset, 0);
1302 	}
1303 
1304 	hantro_reg_write(vpu, &av1_overlap_flag,
1305 			 !!(film_grain->flags & V4L2_AV1_FILM_GRAIN_FLAG_OVERLAP));
1306 	hantro_reg_write(vpu, &av1_clip_to_restricted_range,
1307 			 !!(film_grain->flags & V4L2_AV1_FILM_GRAIN_FLAG_CLIP_TO_RESTRICTED_RANGE));
1308 	hantro_reg_write(vpu, &av1_chroma_scaling_from_luma, scaling_from_luma);
1309 	hantro_reg_write(vpu, &av1_random_seed, film_grain->grain_seed);
1310 
1311 	rockchip_vpu981_av1_dec_init_scaling_function(film_grain->point_y_value,
1312 						      film_grain->point_y_scaling,
1313 						      film_grain->num_y_points,
1314 						      fgmem->scaling_lut_y);
1315 
1316 	if (film_grain->flags &
1317 	    V4L2_AV1_FILM_GRAIN_FLAG_CHROMA_SCALING_FROM_LUMA) {
1318 		memcpy(fgmem->scaling_lut_cb, fgmem->scaling_lut_y,
1319 		       sizeof(*fgmem->scaling_lut_y) * 256);
1320 		memcpy(fgmem->scaling_lut_cr, fgmem->scaling_lut_y,
1321 		       sizeof(*fgmem->scaling_lut_y) * 256);
1322 	} else {
1323 		rockchip_vpu981_av1_dec_init_scaling_function
1324 		    (film_grain->point_cb_value, film_grain->point_cb_scaling,
1325 		     film_grain->num_cb_points, fgmem->scaling_lut_cb);
1326 		rockchip_vpu981_av1_dec_init_scaling_function
1327 		    (film_grain->point_cr_value, film_grain->point_cr_scaling,
1328 		     film_grain->num_cr_points, fgmem->scaling_lut_cr);
1329 	}
1330 
1331 	for (i = 0; i < V4L2_AV1_AR_COEFFS_SIZE; i++) {
1332 		if (i < 24)
1333 			(*ar_coeffs_y)[i] = film_grain->ar_coeffs_y_plus_128[i] - 128;
1334 		(*ar_coeffs_cb)[i] = film_grain->ar_coeffs_cb_plus_128[i] - 128;
1335 		(*ar_coeffs_cr)[i] = film_grain->ar_coeffs_cr_plus_128[i] - 128;
1336 	}
1337 
1338 	ar_coeff_lag = film_grain->ar_coeff_lag;
1339 	ar_coeff_shift = film_grain->ar_coeff_shift_minus_6 + 6;
1340 	grain_scale_shift = film_grain->grain_scale_shift;
1341 	bitdepth = ctx->bit_depth;
1342 	grain_center = 128 << (bitdepth - 8);
1343 	grain_min = 0 - grain_center;
1344 	grain_max = (256 << (bitdepth - 8)) - 1 - grain_center;
1345 
1346 	rockchip_av1_generate_luma_grain_block(luma_grain_block, bitdepth,
1347 					       film_grain->num_y_points, grain_scale_shift,
1348 					       ar_coeff_lag, ar_coeffs_y, ar_coeff_shift,
1349 					       grain_min, grain_max, film_grain->grain_seed);
1350 
1351 	rockchip_av1_generate_chroma_grain_block(luma_grain_block, cb_grain_block,
1352 						 cr_grain_block, bitdepth,
1353 						 film_grain->num_y_points,
1354 						 film_grain->num_cb_points,
1355 						 film_grain->num_cr_points,
1356 						 grain_scale_shift, ar_coeff_lag, ar_coeffs_cb,
1357 						 ar_coeffs_cr, ar_coeff_shift, grain_min,
1358 						 grain_max,
1359 						 scaling_from_luma,
1360 						 film_grain->grain_seed);
1361 
1362 	for (i = 0; i < 64; i++) {
1363 		for (j = 0; j < 64; j++)
1364 			fgmem->cropped_luma_grain_block[i * 64 + j] =
1365 				(*luma_grain_block)[i + 9][j + 9];
1366 	}
1367 
1368 	for (i = 0; i < 32; i++) {
1369 		for (j = 0; j < 32; j++) {
1370 			fgmem->cropped_chroma_grain_block[i * 64 + 2 * j] =
1371 				(*cb_grain_block)[i + 6][j + 6];
1372 			fgmem->cropped_chroma_grain_block[i * 64 + 2 * j + 1] =
1373 				(*cr_grain_block)[i + 6][j + 6];
1374 		}
1375 	}
1376 
1377 	hantro_write_addr(vpu, AV1_FILM_GRAIN, av1_dec->film_grain.dma);
1378 
1379 alloc_fail:
1380 	kfree(ar_coeffs_y);
1381 	kfree(ar_coeffs_cb);
1382 	kfree(ar_coeffs_cr);
1383 	kfree(luma_grain_block);
1384 	kfree(cb_grain_block);
1385 	kfree(cr_grain_block);
1386 }
1387 
rockchip_vpu981_av1_dec_set_cdef(struct hantro_ctx * ctx)1388 static void rockchip_vpu981_av1_dec_set_cdef(struct hantro_ctx *ctx)
1389 {
1390 	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
1391 	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
1392 	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
1393 	const struct v4l2_av1_cdef *cdef = &frame->cdef;
1394 	struct hantro_dev *vpu = ctx->dev;
1395 	u32 luma_pri_strength = 0;
1396 	u16 luma_sec_strength = 0;
1397 	u32 chroma_pri_strength = 0;
1398 	u16 chroma_sec_strength = 0;
1399 	int i;
1400 
1401 	hantro_reg_write(vpu, &av1_cdef_bits, cdef->bits);
1402 	hantro_reg_write(vpu, &av1_cdef_damping, cdef->damping_minus_3);
1403 
1404 	for (i = 0; i < BIT(cdef->bits); i++) {
1405 		luma_pri_strength |= cdef->y_pri_strength[i] << (i * 4);
1406 		if (cdef->y_sec_strength[i] == 4)
1407 			luma_sec_strength |= 3 << (i * 2);
1408 		else
1409 			luma_sec_strength |= cdef->y_sec_strength[i] << (i * 2);
1410 
1411 		chroma_pri_strength |= cdef->uv_pri_strength[i] << (i * 4);
1412 		if (cdef->uv_sec_strength[i] == 4)
1413 			chroma_sec_strength |= 3 << (i * 2);
1414 		else
1415 			chroma_sec_strength |= cdef->uv_sec_strength[i] << (i * 2);
1416 	}
1417 
1418 	hantro_reg_write(vpu, &av1_cdef_luma_primary_strength,
1419 			 luma_pri_strength);
1420 	hantro_reg_write(vpu, &av1_cdef_luma_secondary_strength,
1421 			 luma_sec_strength);
1422 	hantro_reg_write(vpu, &av1_cdef_chroma_primary_strength,
1423 			 chroma_pri_strength);
1424 	hantro_reg_write(vpu, &av1_cdef_chroma_secondary_strength,
1425 			 chroma_sec_strength);
1426 
1427 	hantro_write_addr(vpu, AV1_CDEF_COL, av1_dec->cdef_col.dma);
1428 }
1429 
rockchip_vpu981_av1_dec_set_lr(struct hantro_ctx * ctx)1430 static void rockchip_vpu981_av1_dec_set_lr(struct hantro_ctx *ctx)
1431 {
1432 	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
1433 	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
1434 	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
1435 	const struct v4l2_av1_loop_restoration *loop_restoration =
1436 	    &frame->loop_restoration;
1437 	struct hantro_dev *vpu = ctx->dev;
1438 	u16 lr_type = 0, lr_unit_size = 0;
1439 	u8 restoration_unit_size[V4L2_AV1_NUM_PLANES_MAX] = { 3, 3, 3 };
1440 	int i;
1441 
1442 	if (loop_restoration->flags & V4L2_AV1_LOOP_RESTORATION_FLAG_USES_LR) {
1443 		restoration_unit_size[0] = 1 + loop_restoration->lr_unit_shift;
1444 		restoration_unit_size[1] =
1445 		    1 + loop_restoration->lr_unit_shift - loop_restoration->lr_uv_shift;
1446 		restoration_unit_size[2] =
1447 		    1 + loop_restoration->lr_unit_shift - loop_restoration->lr_uv_shift;
1448 	}
1449 
1450 	for (i = 0; i < V4L2_AV1_NUM_PLANES_MAX; i++) {
1451 		lr_type |=
1452 		    loop_restoration->frame_restoration_type[i] << (i * 2);
1453 		lr_unit_size |= restoration_unit_size[i] << (i * 2);
1454 	}
1455 
1456 	hantro_reg_write(vpu, &av1_lr_type, lr_type);
1457 	hantro_reg_write(vpu, &av1_lr_unit_size, lr_unit_size);
1458 	hantro_write_addr(vpu, AV1_LR_COL, av1_dec->lr_col.dma);
1459 }
1460 
rockchip_vpu981_av1_dec_set_superres_params(struct hantro_ctx * ctx)1461 static void rockchip_vpu981_av1_dec_set_superres_params(struct hantro_ctx *ctx)
1462 {
1463 	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
1464 	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
1465 	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
1466 	struct hantro_dev *vpu = ctx->dev;
1467 	u8 superres_scale_denominator = SCALE_NUMERATOR;
1468 	int superres_luma_step = RS_SCALE_SUBPEL_BITS;
1469 	int superres_chroma_step = RS_SCALE_SUBPEL_BITS;
1470 	int superres_luma_step_invra = RS_SCALE_SUBPEL_BITS;
1471 	int superres_chroma_step_invra = RS_SCALE_SUBPEL_BITS;
1472 	int superres_init_luma_subpel_x = 0;
1473 	int superres_init_chroma_subpel_x = 0;
1474 	int superres_is_scaled = 0;
1475 	int min_w = min_t(uint32_t, 16, frame->upscaled_width);
1476 	int upscaled_luma, downscaled_luma;
1477 	int downscaled_chroma, upscaled_chroma;
1478 	int step_luma, step_chroma;
1479 	int err_luma, err_chroma;
1480 	int initial_luma, initial_chroma;
1481 	int width = 0;
1482 
1483 	if (frame->flags & V4L2_AV1_FRAME_FLAG_USE_SUPERRES)
1484 		superres_scale_denominator = frame->superres_denom;
1485 
1486 	if (superres_scale_denominator <= SCALE_NUMERATOR)
1487 		goto set_regs;
1488 
1489 	width = (frame->upscaled_width * SCALE_NUMERATOR +
1490 		(superres_scale_denominator / 2)) / superres_scale_denominator;
1491 
1492 	if (width < min_w)
1493 		width = min_w;
1494 
1495 	if (width == frame->upscaled_width)
1496 		goto set_regs;
1497 
1498 	superres_is_scaled = 1;
1499 	upscaled_luma = frame->upscaled_width;
1500 	downscaled_luma = width;
1501 	downscaled_chroma = (downscaled_luma + 1) >> 1;
1502 	upscaled_chroma = (upscaled_luma + 1) >> 1;
1503 	step_luma =
1504 		((downscaled_luma << RS_SCALE_SUBPEL_BITS) +
1505 		 (upscaled_luma / 2)) / upscaled_luma;
1506 	step_chroma =
1507 		((downscaled_chroma << RS_SCALE_SUBPEL_BITS) +
1508 		 (upscaled_chroma / 2)) / upscaled_chroma;
1509 	err_luma =
1510 		(upscaled_luma * step_luma)
1511 		- (downscaled_luma << RS_SCALE_SUBPEL_BITS);
1512 	err_chroma =
1513 		(upscaled_chroma * step_chroma)
1514 		- (downscaled_chroma << RS_SCALE_SUBPEL_BITS);
1515 	initial_luma =
1516 		((-((upscaled_luma - downscaled_luma) << (RS_SCALE_SUBPEL_BITS - 1))
1517 		  + upscaled_luma / 2)
1518 		 / upscaled_luma + (1 << (RS_SCALE_EXTRA_BITS - 1)) - err_luma / 2)
1519 		& RS_SCALE_SUBPEL_MASK;
1520 	initial_chroma =
1521 		((-((upscaled_chroma - downscaled_chroma) << (RS_SCALE_SUBPEL_BITS - 1))
1522 		  + upscaled_chroma / 2)
1523 		 / upscaled_chroma + (1 << (RS_SCALE_EXTRA_BITS - 1)) - err_chroma / 2)
1524 		& RS_SCALE_SUBPEL_MASK;
1525 	superres_luma_step = step_luma;
1526 	superres_chroma_step = step_chroma;
1527 	superres_luma_step_invra =
1528 		((upscaled_luma << RS_SCALE_SUBPEL_BITS) + (downscaled_luma / 2))
1529 		/ downscaled_luma;
1530 	superres_chroma_step_invra =
1531 		((upscaled_chroma << RS_SCALE_SUBPEL_BITS) + (downscaled_chroma / 2))
1532 		/ downscaled_chroma;
1533 	superres_init_luma_subpel_x = initial_luma;
1534 	superres_init_chroma_subpel_x = initial_chroma;
1535 
1536 set_regs:
1537 	hantro_reg_write(vpu, &av1_superres_pic_width, frame->upscaled_width);
1538 
1539 	if (frame->flags & V4L2_AV1_FRAME_FLAG_USE_SUPERRES)
1540 		hantro_reg_write(vpu, &av1_scale_denom_minus9,
1541 				 frame->superres_denom - SUPERRES_SCALE_DENOMINATOR_MIN);
1542 	else
1543 		hantro_reg_write(vpu, &av1_scale_denom_minus9, frame->superres_denom);
1544 
1545 	hantro_reg_write(vpu, &av1_superres_luma_step, superres_luma_step);
1546 	hantro_reg_write(vpu, &av1_superres_chroma_step, superres_chroma_step);
1547 	hantro_reg_write(vpu, &av1_superres_luma_step_invra,
1548 			 superres_luma_step_invra);
1549 	hantro_reg_write(vpu, &av1_superres_chroma_step_invra,
1550 			 superres_chroma_step_invra);
1551 	hantro_reg_write(vpu, &av1_superres_init_luma_subpel_x,
1552 			 superres_init_luma_subpel_x);
1553 	hantro_reg_write(vpu, &av1_superres_init_chroma_subpel_x,
1554 			 superres_init_chroma_subpel_x);
1555 	hantro_reg_write(vpu, &av1_superres_is_scaled, superres_is_scaled);
1556 
1557 	hantro_write_addr(vpu, AV1_SR_COL, av1_dec->sr_col.dma);
1558 }
1559 
rockchip_vpu981_av1_dec_set_picture_dimensions(struct hantro_ctx * ctx)1560 static void rockchip_vpu981_av1_dec_set_picture_dimensions(struct hantro_ctx *ctx)
1561 {
1562 	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
1563 	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
1564 	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
1565 	struct hantro_dev *vpu = ctx->dev;
1566 	int pic_width_in_cbs = DIV_ROUND_UP(frame->frame_width_minus_1 + 1, 8);
1567 	int pic_height_in_cbs = DIV_ROUND_UP(frame->frame_height_minus_1 + 1, 8);
1568 	int pic_width_pad = ALIGN(frame->frame_width_minus_1 + 1, 8)
1569 			    - (frame->frame_width_minus_1 + 1);
1570 	int pic_height_pad = ALIGN(frame->frame_height_minus_1 + 1, 8)
1571 			     - (frame->frame_height_minus_1 + 1);
1572 
1573 	hantro_reg_write(vpu, &av1_pic_width_in_cbs, pic_width_in_cbs);
1574 	hantro_reg_write(vpu, &av1_pic_height_in_cbs, pic_height_in_cbs);
1575 	hantro_reg_write(vpu, &av1_pic_width_pad, pic_width_pad);
1576 	hantro_reg_write(vpu, &av1_pic_height_pad, pic_height_pad);
1577 
1578 	rockchip_vpu981_av1_dec_set_superres_params(ctx);
1579 }
1580 
rockchip_vpu981_av1_dec_set_other_frames(struct hantro_ctx * ctx)1581 static void rockchip_vpu981_av1_dec_set_other_frames(struct hantro_ctx *ctx)
1582 {
1583 	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
1584 	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
1585 	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
1586 	struct hantro_dev *vpu = ctx->dev;
1587 	bool use_ref_frame_mvs =
1588 	    !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_USE_REF_FRAME_MVS);
1589 	int cur_frame_offset = frame->order_hint;
1590 	int alt_frame_offset = 0;
1591 	int gld_frame_offset = 0;
1592 	int bwd_frame_offset = 0;
1593 	int alt2_frame_offset = 0;
1594 	int refs_selected[3] = { 0, 0, 0 };
1595 	int cur_mi_cols = DIV_ROUND_UP(frame->frame_width_minus_1 + 1, 8);
1596 	int cur_mi_rows = DIV_ROUND_UP(frame->frame_height_minus_1 + 1, 8);
1597 	int cur_offset[V4L2_AV1_TOTAL_REFS_PER_FRAME - 1];
1598 	int cur_roffset[V4L2_AV1_TOTAL_REFS_PER_FRAME - 1];
1599 	int mf_types[3] = { 0, 0, 0 };
1600 	int ref_stamp = 2;
1601 	int ref_ind = 0;
1602 	int rf, idx;
1603 
1604 	alt_frame_offset = rockchip_vpu981_get_order_hint(ctx, ALT_BUF_IDX);
1605 	gld_frame_offset = rockchip_vpu981_get_order_hint(ctx, GLD_BUF_IDX);
1606 	bwd_frame_offset = rockchip_vpu981_get_order_hint(ctx, BWD_BUF_IDX);
1607 	alt2_frame_offset = rockchip_vpu981_get_order_hint(ctx, ALT2_BUF_IDX);
1608 
1609 	idx = rockchip_vpu981_get_frame_index(ctx, LST_BUF_IDX);
1610 	if (idx >= 0) {
1611 		int alt_frame_offset_in_lst =
1612 			av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_ALTREF_FRAME];
1613 		bool is_lst_overlay =
1614 		    (alt_frame_offset_in_lst == gld_frame_offset);
1615 
1616 		if (!is_lst_overlay) {
1617 			int lst_mi_cols = av1_dec->frame_refs[idx].mi_cols;
1618 			int lst_mi_rows = av1_dec->frame_refs[idx].mi_rows;
1619 			bool lst_intra_only =
1620 			    IS_INTRA(av1_dec->frame_refs[idx].frame_type);
1621 
1622 			if (lst_mi_cols == cur_mi_cols &&
1623 			    lst_mi_rows == cur_mi_rows && !lst_intra_only) {
1624 				mf_types[ref_ind] = V4L2_AV1_REF_LAST_FRAME;
1625 				refs_selected[ref_ind++] = LST_BUF_IDX;
1626 			}
1627 		}
1628 		ref_stamp--;
1629 	}
1630 
1631 	idx = rockchip_vpu981_get_frame_index(ctx, BWD_BUF_IDX);
1632 	if (rockchip_vpu981_av1_dec_get_dist(ctx, bwd_frame_offset, cur_frame_offset) > 0) {
1633 		int bwd_mi_cols = av1_dec->frame_refs[idx].mi_cols;
1634 		int bwd_mi_rows = av1_dec->frame_refs[idx].mi_rows;
1635 		bool bwd_intra_only =
1636 		    IS_INTRA(av1_dec->frame_refs[idx].frame_type);
1637 
1638 		if (bwd_mi_cols == cur_mi_cols && bwd_mi_rows == cur_mi_rows &&
1639 		    !bwd_intra_only) {
1640 			mf_types[ref_ind] = V4L2_AV1_REF_BWDREF_FRAME;
1641 			refs_selected[ref_ind++] = BWD_BUF_IDX;
1642 			ref_stamp--;
1643 		}
1644 	}
1645 
1646 	idx = rockchip_vpu981_get_frame_index(ctx, ALT2_BUF_IDX);
1647 	if (rockchip_vpu981_av1_dec_get_dist(ctx, alt2_frame_offset, cur_frame_offset) > 0) {
1648 		int alt2_mi_cols = av1_dec->frame_refs[idx].mi_cols;
1649 		int alt2_mi_rows = av1_dec->frame_refs[idx].mi_rows;
1650 		bool alt2_intra_only =
1651 		    IS_INTRA(av1_dec->frame_refs[idx].frame_type);
1652 
1653 		if (alt2_mi_cols == cur_mi_cols && alt2_mi_rows == cur_mi_rows &&
1654 		    !alt2_intra_only) {
1655 			mf_types[ref_ind] = V4L2_AV1_REF_ALTREF2_FRAME;
1656 			refs_selected[ref_ind++] = ALT2_BUF_IDX;
1657 			ref_stamp--;
1658 		}
1659 	}
1660 
1661 	idx = rockchip_vpu981_get_frame_index(ctx, ALT_BUF_IDX);
1662 	if (rockchip_vpu981_av1_dec_get_dist(ctx, alt_frame_offset, cur_frame_offset) > 0 &&
1663 	    ref_stamp >= 0) {
1664 		int alt_mi_cols = av1_dec->frame_refs[idx].mi_cols;
1665 		int alt_mi_rows = av1_dec->frame_refs[idx].mi_rows;
1666 		bool alt_intra_only =
1667 		    IS_INTRA(av1_dec->frame_refs[idx].frame_type);
1668 
1669 		if (alt_mi_cols == cur_mi_cols && alt_mi_rows == cur_mi_rows &&
1670 		    !alt_intra_only) {
1671 			mf_types[ref_ind] = V4L2_AV1_REF_ALTREF_FRAME;
1672 			refs_selected[ref_ind++] = ALT_BUF_IDX;
1673 			ref_stamp--;
1674 		}
1675 	}
1676 
1677 	idx = rockchip_vpu981_get_frame_index(ctx, LST2_BUF_IDX);
1678 	if (idx >= 0 && ref_stamp >= 0) {
1679 		int lst2_mi_cols = av1_dec->frame_refs[idx].mi_cols;
1680 		int lst2_mi_rows = av1_dec->frame_refs[idx].mi_rows;
1681 		bool lst2_intra_only =
1682 		    IS_INTRA(av1_dec->frame_refs[idx].frame_type);
1683 
1684 		if (lst2_mi_cols == cur_mi_cols && lst2_mi_rows == cur_mi_rows &&
1685 		    !lst2_intra_only) {
1686 			mf_types[ref_ind] = V4L2_AV1_REF_LAST2_FRAME;
1687 			refs_selected[ref_ind++] = LST2_BUF_IDX;
1688 			ref_stamp--;
1689 		}
1690 	}
1691 
1692 	for (rf = 0; rf < V4L2_AV1_TOTAL_REFS_PER_FRAME - 1; ++rf) {
1693 		idx = rockchip_vpu981_get_frame_index(ctx, rf);
1694 		if (idx >= 0) {
1695 			int rf_order_hint = rockchip_vpu981_get_order_hint(ctx, rf);
1696 
1697 			cur_offset[rf] =
1698 			    rockchip_vpu981_av1_dec_get_dist(ctx, cur_frame_offset, rf_order_hint);
1699 			cur_roffset[rf] =
1700 			    rockchip_vpu981_av1_dec_get_dist(ctx, rf_order_hint, cur_frame_offset);
1701 		} else {
1702 			cur_offset[rf] = 0;
1703 			cur_roffset[rf] = 0;
1704 		}
1705 	}
1706 
1707 	hantro_reg_write(vpu, &av1_use_temporal0_mvs, 0);
1708 	hantro_reg_write(vpu, &av1_use_temporal1_mvs, 0);
1709 	hantro_reg_write(vpu, &av1_use_temporal2_mvs, 0);
1710 	hantro_reg_write(vpu, &av1_use_temporal3_mvs, 0);
1711 
1712 	hantro_reg_write(vpu, &av1_mf1_last_offset, 0);
1713 	hantro_reg_write(vpu, &av1_mf1_last2_offset, 0);
1714 	hantro_reg_write(vpu, &av1_mf1_last3_offset, 0);
1715 	hantro_reg_write(vpu, &av1_mf1_golden_offset, 0);
1716 	hantro_reg_write(vpu, &av1_mf1_bwdref_offset, 0);
1717 	hantro_reg_write(vpu, &av1_mf1_altref2_offset, 0);
1718 	hantro_reg_write(vpu, &av1_mf1_altref_offset, 0);
1719 
1720 	if (use_ref_frame_mvs && ref_ind > 0 &&
1721 	    cur_offset[mf_types[0] - V4L2_AV1_REF_LAST_FRAME] <= MAX_FRAME_DISTANCE &&
1722 	    cur_offset[mf_types[0] - V4L2_AV1_REF_LAST_FRAME] >= -MAX_FRAME_DISTANCE) {
1723 		int rf = rockchip_vpu981_get_order_hint(ctx, refs_selected[0]);
1724 		int idx = rockchip_vpu981_get_frame_index(ctx, refs_selected[0]);
1725 		u32 *oh = av1_dec->frame_refs[idx].order_hints;
1726 		int val;
1727 
1728 		hantro_reg_write(vpu, &av1_use_temporal0_mvs, 1);
1729 
1730 		val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_LAST_FRAME]);
1731 		hantro_reg_write(vpu, &av1_mf1_last_offset, val);
1732 
1733 		val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_LAST2_FRAME]);
1734 		hantro_reg_write(vpu, &av1_mf1_last2_offset, val);
1735 
1736 		val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_LAST3_FRAME]);
1737 		hantro_reg_write(vpu, &av1_mf1_last3_offset, val);
1738 
1739 		val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_GOLDEN_FRAME]);
1740 		hantro_reg_write(vpu, &av1_mf1_golden_offset, val);
1741 
1742 		val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_BWDREF_FRAME]);
1743 		hantro_reg_write(vpu, &av1_mf1_bwdref_offset, val);
1744 
1745 		val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_ALTREF2_FRAME]);
1746 		hantro_reg_write(vpu, &av1_mf1_altref2_offset, val);
1747 
1748 		val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_ALTREF_FRAME]);
1749 		hantro_reg_write(vpu, &av1_mf1_altref_offset, val);
1750 	}
1751 
1752 	hantro_reg_write(vpu, &av1_mf2_last_offset, 0);
1753 	hantro_reg_write(vpu, &av1_mf2_last2_offset, 0);
1754 	hantro_reg_write(vpu, &av1_mf2_last3_offset, 0);
1755 	hantro_reg_write(vpu, &av1_mf2_golden_offset, 0);
1756 	hantro_reg_write(vpu, &av1_mf2_bwdref_offset, 0);
1757 	hantro_reg_write(vpu, &av1_mf2_altref2_offset, 0);
1758 	hantro_reg_write(vpu, &av1_mf2_altref_offset, 0);
1759 
1760 	if (use_ref_frame_mvs && ref_ind > 1 &&
1761 	    cur_offset[mf_types[1] - V4L2_AV1_REF_LAST_FRAME] <= MAX_FRAME_DISTANCE &&
1762 	    cur_offset[mf_types[1] - V4L2_AV1_REF_LAST_FRAME] >= -MAX_FRAME_DISTANCE) {
1763 		int rf = rockchip_vpu981_get_order_hint(ctx, refs_selected[1]);
1764 		int idx = rockchip_vpu981_get_frame_index(ctx, refs_selected[1]);
1765 		u32 *oh = av1_dec->frame_refs[idx].order_hints;
1766 		int val;
1767 
1768 		hantro_reg_write(vpu, &av1_use_temporal1_mvs, 1);
1769 
1770 		val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_LAST_FRAME]);
1771 		hantro_reg_write(vpu, &av1_mf2_last_offset, val);
1772 
1773 		val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_LAST2_FRAME]);
1774 		hantro_reg_write(vpu, &av1_mf2_last2_offset, val);
1775 
1776 		val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_LAST3_FRAME]);
1777 		hantro_reg_write(vpu, &av1_mf2_last3_offset, val);
1778 
1779 		val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_GOLDEN_FRAME]);
1780 		hantro_reg_write(vpu, &av1_mf2_golden_offset, val);
1781 
1782 		val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_BWDREF_FRAME]);
1783 		hantro_reg_write(vpu, &av1_mf2_bwdref_offset, val);
1784 
1785 		val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_ALTREF2_FRAME]);
1786 		hantro_reg_write(vpu, &av1_mf2_altref2_offset, val);
1787 
1788 		val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_ALTREF_FRAME]);
1789 		hantro_reg_write(vpu, &av1_mf2_altref_offset, val);
1790 	}
1791 
1792 	hantro_reg_write(vpu, &av1_mf3_last_offset, 0);
1793 	hantro_reg_write(vpu, &av1_mf3_last2_offset, 0);
1794 	hantro_reg_write(vpu, &av1_mf3_last3_offset, 0);
1795 	hantro_reg_write(vpu, &av1_mf3_golden_offset, 0);
1796 	hantro_reg_write(vpu, &av1_mf3_bwdref_offset, 0);
1797 	hantro_reg_write(vpu, &av1_mf3_altref2_offset, 0);
1798 	hantro_reg_write(vpu, &av1_mf3_altref_offset, 0);
1799 
1800 	if (use_ref_frame_mvs && ref_ind > 2 &&
1801 	    cur_offset[mf_types[2] - V4L2_AV1_REF_LAST_FRAME] <= MAX_FRAME_DISTANCE &&
1802 	    cur_offset[mf_types[2] - V4L2_AV1_REF_LAST_FRAME] >= -MAX_FRAME_DISTANCE) {
1803 		int rf = rockchip_vpu981_get_order_hint(ctx, refs_selected[2]);
1804 		int idx = rockchip_vpu981_get_frame_index(ctx, refs_selected[2]);
1805 		u32 *oh = av1_dec->frame_refs[idx].order_hints;
1806 		int val;
1807 
1808 		hantro_reg_write(vpu, &av1_use_temporal2_mvs, 1);
1809 
1810 		val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_LAST_FRAME]);
1811 		hantro_reg_write(vpu, &av1_mf3_last_offset, val);
1812 
1813 		val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_LAST2_FRAME]);
1814 		hantro_reg_write(vpu, &av1_mf3_last2_offset, val);
1815 
1816 		val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_LAST3_FRAME]);
1817 		hantro_reg_write(vpu, &av1_mf3_last3_offset, val);
1818 
1819 		val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_GOLDEN_FRAME]);
1820 		hantro_reg_write(vpu, &av1_mf3_golden_offset, val);
1821 
1822 		val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_BWDREF_FRAME]);
1823 		hantro_reg_write(vpu, &av1_mf3_bwdref_offset, val);
1824 
1825 		val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_ALTREF2_FRAME]);
1826 		hantro_reg_write(vpu, &av1_mf3_altref2_offset, val);
1827 
1828 		val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_ALTREF_FRAME]);
1829 		hantro_reg_write(vpu, &av1_mf3_altref_offset, val);
1830 	}
1831 
1832 	hantro_reg_write(vpu, &av1_cur_last_offset, cur_offset[0]);
1833 	hantro_reg_write(vpu, &av1_cur_last2_offset, cur_offset[1]);
1834 	hantro_reg_write(vpu, &av1_cur_last3_offset, cur_offset[2]);
1835 	hantro_reg_write(vpu, &av1_cur_golden_offset, cur_offset[3]);
1836 	hantro_reg_write(vpu, &av1_cur_bwdref_offset, cur_offset[4]);
1837 	hantro_reg_write(vpu, &av1_cur_altref2_offset, cur_offset[5]);
1838 	hantro_reg_write(vpu, &av1_cur_altref_offset, cur_offset[6]);
1839 
1840 	hantro_reg_write(vpu, &av1_cur_last_roffset, cur_roffset[0]);
1841 	hantro_reg_write(vpu, &av1_cur_last2_roffset, cur_roffset[1]);
1842 	hantro_reg_write(vpu, &av1_cur_last3_roffset, cur_roffset[2]);
1843 	hantro_reg_write(vpu, &av1_cur_golden_roffset, cur_roffset[3]);
1844 	hantro_reg_write(vpu, &av1_cur_bwdref_roffset, cur_roffset[4]);
1845 	hantro_reg_write(vpu, &av1_cur_altref2_roffset, cur_roffset[5]);
1846 	hantro_reg_write(vpu, &av1_cur_altref_roffset, cur_roffset[6]);
1847 
1848 	hantro_reg_write(vpu, &av1_mf1_type, mf_types[0] - V4L2_AV1_REF_LAST_FRAME);
1849 	hantro_reg_write(vpu, &av1_mf2_type, mf_types[1] - V4L2_AV1_REF_LAST_FRAME);
1850 	hantro_reg_write(vpu, &av1_mf3_type, mf_types[2] - V4L2_AV1_REF_LAST_FRAME);
1851 }
1852 
rockchip_vpu981_av1_dec_set_reference_frames(struct hantro_ctx * ctx)1853 static void rockchip_vpu981_av1_dec_set_reference_frames(struct hantro_ctx *ctx)
1854 {
1855 	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
1856 	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
1857 	const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
1858 	int frame_type = frame->frame_type;
1859 	bool allow_intrabc = !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_ALLOW_INTRABC);
1860 	int ref_count[AV1DEC_MAX_PIC_BUFFERS] = { 0 };
1861 	struct hantro_dev *vpu = ctx->dev;
1862 	int i, ref_frames = 0;
1863 	bool scale_enable = false;
1864 
1865 	if (IS_INTRA(frame_type) && !allow_intrabc)
1866 		return;
1867 
1868 	if (!allow_intrabc) {
1869 		for (i = 0; i < V4L2_AV1_REFS_PER_FRAME; i++) {
1870 			int idx = rockchip_vpu981_get_frame_index(ctx, i);
1871 
1872 			if (idx >= 0)
1873 				ref_count[idx]++;
1874 		}
1875 
1876 		for (i = 0; i < AV1DEC_MAX_PIC_BUFFERS; i++) {
1877 			if (ref_count[i])
1878 				ref_frames++;
1879 		}
1880 	} else {
1881 		ref_frames = 1;
1882 	}
1883 	hantro_reg_write(vpu, &av1_ref_frames, ref_frames);
1884 
1885 	rockchip_vpu981_av1_dec_set_frame_sign_bias(ctx);
1886 
1887 	for (i = V4L2_AV1_REF_LAST_FRAME; i < V4L2_AV1_TOTAL_REFS_PER_FRAME; i++) {
1888 		u32 ref = i - 1;
1889 		int idx = 0;
1890 		int width, height;
1891 
1892 		if (allow_intrabc) {
1893 			idx = av1_dec->current_frame_index;
1894 			width = frame->frame_width_minus_1 + 1;
1895 			height = frame->frame_height_minus_1 + 1;
1896 		} else {
1897 			if (rockchip_vpu981_get_frame_index(ctx, ref) > 0)
1898 				idx = rockchip_vpu981_get_frame_index(ctx, ref);
1899 			width = av1_dec->frame_refs[idx].width;
1900 			height = av1_dec->frame_refs[idx].height;
1901 		}
1902 
1903 		scale_enable |=
1904 		    rockchip_vpu981_av1_dec_set_ref(ctx, ref, idx, width,
1905 						    height);
1906 
1907 		rockchip_vpu981_av1_dec_set_sign_bias(ctx, ref,
1908 						      av1_dec->ref_frame_sign_bias[i]);
1909 	}
1910 	hantro_reg_write(vpu, &av1_ref_scaling_enable, scale_enable);
1911 
1912 	hantro_reg_write(vpu, &av1_ref0_gm_mode,
1913 			 frame->global_motion.type[V4L2_AV1_REF_LAST_FRAME]);
1914 	hantro_reg_write(vpu, &av1_ref1_gm_mode,
1915 			 frame->global_motion.type[V4L2_AV1_REF_LAST2_FRAME]);
1916 	hantro_reg_write(vpu, &av1_ref2_gm_mode,
1917 			 frame->global_motion.type[V4L2_AV1_REF_LAST3_FRAME]);
1918 	hantro_reg_write(vpu, &av1_ref3_gm_mode,
1919 			 frame->global_motion.type[V4L2_AV1_REF_GOLDEN_FRAME]);
1920 	hantro_reg_write(vpu, &av1_ref4_gm_mode,
1921 			 frame->global_motion.type[V4L2_AV1_REF_BWDREF_FRAME]);
1922 	hantro_reg_write(vpu, &av1_ref5_gm_mode,
1923 			 frame->global_motion.type[V4L2_AV1_REF_ALTREF2_FRAME]);
1924 	hantro_reg_write(vpu, &av1_ref6_gm_mode,
1925 			 frame->global_motion.type[V4L2_AV1_REF_ALTREF_FRAME]);
1926 
1927 	rockchip_vpu981_av1_dec_set_other_frames(ctx);
1928 }
1929 
rockchip_vpu981_av1_dec_set_parameters(struct hantro_ctx * ctx)1930 static void rockchip_vpu981_av1_dec_set_parameters(struct hantro_ctx *ctx)
1931 {
1932 	struct hantro_dev *vpu = ctx->dev;
1933 	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
1934 	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
1935 
1936 	hantro_reg_write(vpu, &av1_skip_mode,
1937 			 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_SKIP_MODE_PRESENT));
1938 	hantro_reg_write(vpu, &av1_tempor_mvp_e,
1939 			 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_USE_REF_FRAME_MVS));
1940 	hantro_reg_write(vpu, &av1_delta_lf_res_log,
1941 			 ctrls->frame->loop_filter.delta_lf_res);
1942 	hantro_reg_write(vpu, &av1_delta_lf_multi,
1943 			 !!(ctrls->frame->loop_filter.flags
1944 			    & V4L2_AV1_LOOP_FILTER_FLAG_DELTA_LF_MULTI));
1945 	hantro_reg_write(vpu, &av1_delta_lf_present,
1946 			 !!(ctrls->frame->loop_filter.flags
1947 			    & V4L2_AV1_LOOP_FILTER_FLAG_DELTA_LF_PRESENT));
1948 	hantro_reg_write(vpu, &av1_disable_cdf_update,
1949 			 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_DISABLE_CDF_UPDATE));
1950 	hantro_reg_write(vpu, &av1_allow_warp,
1951 			 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_ALLOW_WARPED_MOTION));
1952 	hantro_reg_write(vpu, &av1_show_frame,
1953 			 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_SHOW_FRAME));
1954 	hantro_reg_write(vpu, &av1_switchable_motion_mode,
1955 			 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_IS_MOTION_MODE_SWITCHABLE));
1956 	hantro_reg_write(vpu, &av1_enable_cdef,
1957 			 !!(ctrls->sequence->flags & V4L2_AV1_SEQUENCE_FLAG_ENABLE_CDEF));
1958 	hantro_reg_write(vpu, &av1_allow_masked_compound,
1959 			 !!(ctrls->sequence->flags
1960 			    & V4L2_AV1_SEQUENCE_FLAG_ENABLE_MASKED_COMPOUND));
1961 	hantro_reg_write(vpu, &av1_allow_interintra,
1962 			 !!(ctrls->sequence->flags
1963 			    & V4L2_AV1_SEQUENCE_FLAG_ENABLE_INTERINTRA_COMPOUND));
1964 	hantro_reg_write(vpu, &av1_enable_intra_edge_filter,
1965 			 !!(ctrls->sequence->flags
1966 			    & V4L2_AV1_SEQUENCE_FLAG_ENABLE_INTRA_EDGE_FILTER));
1967 	hantro_reg_write(vpu, &av1_allow_filter_intra,
1968 			 !!(ctrls->sequence->flags & V4L2_AV1_SEQUENCE_FLAG_ENABLE_FILTER_INTRA));
1969 	hantro_reg_write(vpu, &av1_enable_jnt_comp,
1970 			 !!(ctrls->sequence->flags & V4L2_AV1_SEQUENCE_FLAG_ENABLE_JNT_COMP));
1971 	hantro_reg_write(vpu, &av1_enable_dual_filter,
1972 			 !!(ctrls->sequence->flags & V4L2_AV1_SEQUENCE_FLAG_ENABLE_DUAL_FILTER));
1973 	hantro_reg_write(vpu, &av1_reduced_tx_set_used,
1974 			 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_REDUCED_TX_SET));
1975 	hantro_reg_write(vpu, &av1_allow_screen_content_tools,
1976 			 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_ALLOW_SCREEN_CONTENT_TOOLS));
1977 	hantro_reg_write(vpu, &av1_allow_intrabc,
1978 			 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_ALLOW_INTRABC));
1979 
1980 	if (!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_ALLOW_SCREEN_CONTENT_TOOLS))
1981 		hantro_reg_write(vpu, &av1_force_interger_mv, 0);
1982 	else
1983 		hantro_reg_write(vpu, &av1_force_interger_mv,
1984 				 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_FORCE_INTEGER_MV));
1985 
1986 	hantro_reg_write(vpu, &av1_blackwhite_e, 0);
1987 	hantro_reg_write(vpu, &av1_delta_q_res_log, ctrls->frame->quantization.delta_q_res);
1988 	hantro_reg_write(vpu, &av1_delta_q_present,
1989 			 !!(ctrls->frame->quantization.flags
1990 			    & V4L2_AV1_QUANTIZATION_FLAG_DELTA_Q_PRESENT));
1991 
1992 	hantro_reg_write(vpu, &av1_idr_pic_e, !ctrls->frame->frame_type);
1993 	hantro_reg_write(vpu, &av1_quant_base_qindex, ctrls->frame->quantization.base_q_idx);
1994 	hantro_reg_write(vpu, &av1_bit_depth_y_minus8, ctx->bit_depth - 8);
1995 	hantro_reg_write(vpu, &av1_bit_depth_c_minus8, ctx->bit_depth - 8);
1996 
1997 	hantro_reg_write(vpu, &av1_mcomp_filt_type, ctrls->frame->interpolation_filter);
1998 	hantro_reg_write(vpu, &av1_high_prec_mv_e,
1999 			 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_ALLOW_HIGH_PRECISION_MV));
2000 	hantro_reg_write(vpu, &av1_comp_pred_mode,
2001 			 (ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_REFERENCE_SELECT) ? 2 : 0);
2002 	hantro_reg_write(vpu, &av1_transform_mode, (ctrls->frame->tx_mode == 1) ? 3 : 4);
2003 	hantro_reg_write(vpu, &av1_max_cb_size,
2004 			 (ctrls->sequence->flags
2005 			  & V4L2_AV1_SEQUENCE_FLAG_USE_128X128_SUPERBLOCK) ? 7 : 6);
2006 	hantro_reg_write(vpu, &av1_min_cb_size, 3);
2007 
2008 	hantro_reg_write(vpu, &av1_comp_pred_fixed_ref, 0);
2009 	hantro_reg_write(vpu, &av1_comp_pred_var_ref0_av1, 0);
2010 	hantro_reg_write(vpu, &av1_comp_pred_var_ref1_av1, 0);
2011 	hantro_reg_write(vpu, &av1_filt_level_seg0, 0);
2012 	hantro_reg_write(vpu, &av1_filt_level_seg1, 0);
2013 	hantro_reg_write(vpu, &av1_filt_level_seg2, 0);
2014 	hantro_reg_write(vpu, &av1_filt_level_seg3, 0);
2015 	hantro_reg_write(vpu, &av1_filt_level_seg4, 0);
2016 	hantro_reg_write(vpu, &av1_filt_level_seg5, 0);
2017 	hantro_reg_write(vpu, &av1_filt_level_seg6, 0);
2018 	hantro_reg_write(vpu, &av1_filt_level_seg7, 0);
2019 
2020 	hantro_reg_write(vpu, &av1_qp_delta_y_dc_av1, ctrls->frame->quantization.delta_q_y_dc);
2021 	hantro_reg_write(vpu, &av1_qp_delta_ch_dc_av1, ctrls->frame->quantization.delta_q_u_dc);
2022 	hantro_reg_write(vpu, &av1_qp_delta_ch_ac_av1, ctrls->frame->quantization.delta_q_u_ac);
2023 	if (ctrls->frame->quantization.flags & V4L2_AV1_QUANTIZATION_FLAG_USING_QMATRIX) {
2024 		hantro_reg_write(vpu, &av1_qmlevel_y, ctrls->frame->quantization.qm_y);
2025 		hantro_reg_write(vpu, &av1_qmlevel_u, ctrls->frame->quantization.qm_u);
2026 		hantro_reg_write(vpu, &av1_qmlevel_v, ctrls->frame->quantization.qm_v);
2027 	} else {
2028 		hantro_reg_write(vpu, &av1_qmlevel_y, 0xff);
2029 		hantro_reg_write(vpu, &av1_qmlevel_u, 0xff);
2030 		hantro_reg_write(vpu, &av1_qmlevel_v, 0xff);
2031 	}
2032 
2033 	hantro_reg_write(vpu, &av1_lossless_e, rockchip_vpu981_av1_dec_is_lossless(ctx));
2034 	hantro_reg_write(vpu, &av1_quant_delta_v_dc, ctrls->frame->quantization.delta_q_v_dc);
2035 	hantro_reg_write(vpu, &av1_quant_delta_v_ac, ctrls->frame->quantization.delta_q_v_ac);
2036 
2037 	hantro_reg_write(vpu, &av1_skip_ref0,
2038 			 (ctrls->frame->skip_mode_frame[0]) ? ctrls->frame->skip_mode_frame[0] : 1);
2039 	hantro_reg_write(vpu, &av1_skip_ref1,
2040 			 (ctrls->frame->skip_mode_frame[1]) ? ctrls->frame->skip_mode_frame[1] : 1);
2041 
2042 	hantro_write_addr(vpu, AV1_MC_SYNC_CURR, av1_dec->tile_buf.dma);
2043 	hantro_write_addr(vpu, AV1_MC_SYNC_LEFT, av1_dec->tile_buf.dma);
2044 }
2045 
2046 static void
rockchip_vpu981_av1_dec_set_input_buffer(struct hantro_ctx * ctx,struct vb2_v4l2_buffer * vb2_src)2047 rockchip_vpu981_av1_dec_set_input_buffer(struct hantro_ctx *ctx,
2048 					 struct vb2_v4l2_buffer *vb2_src)
2049 {
2050 	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
2051 	struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
2052 	const struct v4l2_ctrl_av1_tile_group_entry *group_entry =
2053 	    ctrls->tile_group_entry;
2054 	struct hantro_dev *vpu = ctx->dev;
2055 	dma_addr_t src_dma;
2056 	u32 src_len, src_buf_len;
2057 	int start_bit, offset;
2058 
2059 	src_dma = vb2_dma_contig_plane_dma_addr(&vb2_src->vb2_buf, 0);
2060 	src_len = vb2_get_plane_payload(&vb2_src->vb2_buf, 0);
2061 	src_buf_len = vb2_plane_size(&vb2_src->vb2_buf, 0);
2062 
2063 	start_bit = (group_entry[0].tile_offset & 0xf) * 8;
2064 	offset = group_entry[0].tile_offset & ~0xf;
2065 
2066 	hantro_reg_write(vpu, &av1_strm_buffer_len, src_buf_len);
2067 	hantro_reg_write(vpu, &av1_strm_start_bit, start_bit);
2068 	hantro_reg_write(vpu, &av1_stream_len, src_len);
2069 	hantro_reg_write(vpu, &av1_strm_start_offset, 0);
2070 	hantro_write_addr(vpu, AV1_INPUT_STREAM, src_dma + offset);
2071 }
2072 
2073 static void
rockchip_vpu981_av1_dec_set_output_buffer(struct hantro_ctx * ctx)2074 rockchip_vpu981_av1_dec_set_output_buffer(struct hantro_ctx *ctx)
2075 {
2076 	struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
2077 	struct hantro_dev *vpu = ctx->dev;
2078 	struct hantro_decoded_buffer *dst;
2079 	struct vb2_v4l2_buffer *vb2_dst;
2080 	dma_addr_t luma_addr, chroma_addr, mv_addr = 0;
2081 	size_t cr_offset = rockchip_vpu981_av1_dec_luma_size(ctx);
2082 	size_t mv_offset = rockchip_vpu981_av1_dec_chroma_size(ctx);
2083 
2084 	vb2_dst = av1_dec->frame_refs[av1_dec->current_frame_index].vb2_ref;
2085 	dst = vb2_to_hantro_decoded_buf(&vb2_dst->vb2_buf);
2086 	luma_addr = hantro_get_dec_buf_addr(ctx, &dst->base.vb.vb2_buf);
2087 	chroma_addr = luma_addr + cr_offset;
2088 	mv_addr = luma_addr + mv_offset;
2089 
2090 	dst->av1.chroma_offset = cr_offset;
2091 	dst->av1.mv_offset = mv_offset;
2092 
2093 	hantro_write_addr(vpu, AV1_TILE_OUT_LU, luma_addr);
2094 	hantro_write_addr(vpu, AV1_TILE_OUT_CH, chroma_addr);
2095 	hantro_write_addr(vpu, AV1_TILE_OUT_MV, mv_addr);
2096 }
2097 
rockchip_vpu981_av1_dec_run(struct hantro_ctx * ctx)2098 int rockchip_vpu981_av1_dec_run(struct hantro_ctx *ctx)
2099 {
2100 	struct hantro_dev *vpu = ctx->dev;
2101 	struct vb2_v4l2_buffer *vb2_src;
2102 	int ret;
2103 
2104 	hantro_start_prepare_run(ctx);
2105 
2106 	ret = rockchip_vpu981_av1_dec_prepare_run(ctx);
2107 	if (ret)
2108 		goto prepare_error;
2109 
2110 	vb2_src = hantro_get_src_buf(ctx);
2111 	if (!vb2_src) {
2112 		ret = -EINVAL;
2113 		goto prepare_error;
2114 	}
2115 
2116 	rockchip_vpu981_av1_dec_clean_refs(ctx);
2117 	rockchip_vpu981_av1_dec_frame_ref(ctx, vb2_src->vb2_buf.timestamp);
2118 
2119 	rockchip_vpu981_av1_dec_set_parameters(ctx);
2120 	rockchip_vpu981_av1_dec_set_global_model(ctx);
2121 	rockchip_vpu981_av1_dec_set_tile_info(ctx);
2122 	rockchip_vpu981_av1_dec_set_reference_frames(ctx);
2123 	rockchip_vpu981_av1_dec_set_segmentation(ctx);
2124 	rockchip_vpu981_av1_dec_set_loopfilter(ctx);
2125 	rockchip_vpu981_av1_dec_set_picture_dimensions(ctx);
2126 	rockchip_vpu981_av1_dec_set_cdef(ctx);
2127 	rockchip_vpu981_av1_dec_set_lr(ctx);
2128 	rockchip_vpu981_av1_dec_set_fgs(ctx);
2129 	rockchip_vpu981_av1_dec_set_prob(ctx);
2130 
2131 	hantro_reg_write(vpu, &av1_dec_mode, AV1_DEC_MODE);
2132 	hantro_reg_write(vpu, &av1_dec_out_ec_byte_word, 0);
2133 	hantro_reg_write(vpu, &av1_write_mvs_e, 1);
2134 	hantro_reg_write(vpu, &av1_dec_out_ec_bypass, 1);
2135 	hantro_reg_write(vpu, &av1_dec_clk_gate_e, 1);
2136 
2137 	hantro_reg_write(vpu, &av1_dec_abort_e, 0);
2138 	hantro_reg_write(vpu, &av1_dec_tile_int_e, 0);
2139 
2140 	hantro_reg_write(vpu, &av1_dec_alignment, 64);
2141 	hantro_reg_write(vpu, &av1_apf_disable, 0);
2142 	hantro_reg_write(vpu, &av1_apf_threshold, 8);
2143 	hantro_reg_write(vpu, &av1_dec_buswidth, 2);
2144 	hantro_reg_write(vpu, &av1_dec_max_burst, 16);
2145 	hantro_reg_write(vpu, &av1_error_conceal_e, 0);
2146 	hantro_reg_write(vpu, &av1_axi_rd_ostd_threshold, 64);
2147 	hantro_reg_write(vpu, &av1_axi_wr_ostd_threshold, 64);
2148 
2149 	hantro_reg_write(vpu, &av1_ext_timeout_cycles, 0xfffffff);
2150 	hantro_reg_write(vpu, &av1_ext_timeout_override_e, 1);
2151 	hantro_reg_write(vpu, &av1_timeout_cycles, 0xfffffff);
2152 	hantro_reg_write(vpu, &av1_timeout_override_e, 1);
2153 
2154 	rockchip_vpu981_av1_dec_set_output_buffer(ctx);
2155 	rockchip_vpu981_av1_dec_set_input_buffer(ctx, vb2_src);
2156 
2157 	hantro_end_prepare_run(ctx);
2158 
2159 	hantro_reg_write(vpu, &av1_dec_e, 1);
2160 
2161 	return 0;
2162 
2163 prepare_error:
2164 	hantro_end_prepare_run(ctx);
2165 	hantro_irq_done(vpu, VB2_BUF_STATE_ERROR);
2166 	return ret;
2167 }
2168 
rockchip_vpu981_postproc_enable(struct hantro_ctx * ctx)2169 static void rockchip_vpu981_postproc_enable(struct hantro_ctx *ctx)
2170 {
2171 	struct hantro_dev *vpu = ctx->dev;
2172 	int width = ctx->dst_fmt.width;
2173 	int height = ctx->dst_fmt.height;
2174 	struct vb2_v4l2_buffer *vb2_dst;
2175 	size_t chroma_offset;
2176 	dma_addr_t dst_dma;
2177 
2178 	vb2_dst = hantro_get_dst_buf(ctx);
2179 
2180 	dst_dma = vb2_dma_contig_plane_dma_addr(&vb2_dst->vb2_buf, 0);
2181 	chroma_offset = ctx->dst_fmt.plane_fmt[0].bytesperline *
2182 	    ctx->dst_fmt.height;
2183 
2184 	/* enable post processor */
2185 	hantro_reg_write(vpu, &av1_pp_out_e, 1);
2186 	hantro_reg_write(vpu, &av1_pp_in_format, 0);
2187 	hantro_reg_write(vpu, &av1_pp0_dup_hor, 1);
2188 	hantro_reg_write(vpu, &av1_pp0_dup_ver, 1);
2189 
2190 	hantro_reg_write(vpu, &av1_pp_in_height, height / 2);
2191 	hantro_reg_write(vpu, &av1_pp_in_width, width / 2);
2192 	hantro_reg_write(vpu, &av1_pp_out_height, height);
2193 	hantro_reg_write(vpu, &av1_pp_out_width, width);
2194 	hantro_reg_write(vpu, &av1_pp_out_y_stride,
2195 			 ctx->dst_fmt.plane_fmt[0].bytesperline);
2196 	hantro_reg_write(vpu, &av1_pp_out_c_stride,
2197 			 ctx->dst_fmt.plane_fmt[0].bytesperline);
2198 	switch (ctx->dst_fmt.pixelformat) {
2199 	case V4L2_PIX_FMT_P010:
2200 		hantro_reg_write(vpu, &av1_pp_out_format, 1);
2201 		break;
2202 	case V4L2_PIX_FMT_NV12:
2203 		hantro_reg_write(vpu, &av1_pp_out_format, 3);
2204 		break;
2205 	default:
2206 		hantro_reg_write(vpu, &av1_pp_out_format, 0);
2207 	}
2208 
2209 	hantro_reg_write(vpu, &av1_ppd_blend_exist, 0);
2210 	hantro_reg_write(vpu, &av1_ppd_dith_exist, 0);
2211 	hantro_reg_write(vpu, &av1_ablend_crop_e, 0);
2212 	hantro_reg_write(vpu, &av1_pp_format_customer1_e, 0);
2213 	hantro_reg_write(vpu, &av1_pp_crop_exist, 0);
2214 	hantro_reg_write(vpu, &av1_pp_up_level, 0);
2215 	hantro_reg_write(vpu, &av1_pp_down_level, 0);
2216 	hantro_reg_write(vpu, &av1_pp_exist, 0);
2217 
2218 	hantro_write_addr(vpu, AV1_PP_OUT_LU, dst_dma);
2219 	hantro_write_addr(vpu, AV1_PP_OUT_CH, dst_dma + chroma_offset);
2220 }
2221 
rockchip_vpu981_postproc_disable(struct hantro_ctx * ctx)2222 static void rockchip_vpu981_postproc_disable(struct hantro_ctx *ctx)
2223 {
2224 	struct hantro_dev *vpu = ctx->dev;
2225 
2226 	/* disable post processor */
2227 	hantro_reg_write(vpu, &av1_pp_out_e, 0);
2228 }
2229 
2230 const struct hantro_postproc_ops rockchip_vpu981_postproc_ops = {
2231 	.enable = rockchip_vpu981_postproc_enable,
2232 	.disable = rockchip_vpu981_postproc_disable,
2233 };
2234