1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * Copyright (c) 2023, Collabora
4 *
5 * Author: Benjamin Gaignard <[email protected]>
6 */
7
8 #include <media/v4l2-mem2mem.h>
9 #include "hantro.h"
10 #include "hantro_v4l2.h"
11 #include "rockchip_vpu981_regs.h"
12
13 #define AV1_DEC_MODE 17
14 #define GM_GLOBAL_MODELS_PER_FRAME 7
15 #define GLOBAL_MODEL_TOTAL_SIZE (6 * 4 + 4 * 2)
16 #define GLOBAL_MODEL_SIZE ALIGN(GM_GLOBAL_MODELS_PER_FRAME * GLOBAL_MODEL_TOTAL_SIZE, 2048)
17 #define AV1_MAX_TILES 128
18 #define AV1_TILE_INFO_SIZE (AV1_MAX_TILES * 16)
19 #define AV1DEC_MAX_PIC_BUFFERS 24
20 #define AV1_REF_SCALE_SHIFT 14
21 #define AV1_INVALID_IDX -1
22 #define MAX_FRAME_DISTANCE 31
23 #define AV1_PRIMARY_REF_NONE 7
24 #define AV1_TILE_SIZE ALIGN(32 * 128, 4096)
25 /*
26 * These 3 values aren't defined enum v4l2_av1_segment_feature because
27 * they are not part of the specification
28 */
29 #define V4L2_AV1_SEG_LVL_ALT_LF_Y_H 2
30 #define V4L2_AV1_SEG_LVL_ALT_LF_U 3
31 #define V4L2_AV1_SEG_LVL_ALT_LF_V 4
32
33 #define SUPERRES_SCALE_BITS 3
34 #define SCALE_NUMERATOR 8
35 #define SUPERRES_SCALE_DENOMINATOR_MIN (SCALE_NUMERATOR + 1)
36
37 #define RS_SUBPEL_BITS 6
38 #define RS_SUBPEL_MASK ((1 << RS_SUBPEL_BITS) - 1)
39 #define RS_SCALE_SUBPEL_BITS 14
40 #define RS_SCALE_SUBPEL_MASK ((1 << RS_SCALE_SUBPEL_BITS) - 1)
41 #define RS_SCALE_EXTRA_BITS (RS_SCALE_SUBPEL_BITS - RS_SUBPEL_BITS)
42 #define RS_SCALE_EXTRA_OFF (1 << (RS_SCALE_EXTRA_BITS - 1))
43
44 #define IS_INTRA(type) ((type == V4L2_AV1_KEY_FRAME) || (type == V4L2_AV1_INTRA_ONLY_FRAME))
45
46 #define LST_BUF_IDX (V4L2_AV1_REF_LAST_FRAME - V4L2_AV1_REF_LAST_FRAME)
47 #define LST2_BUF_IDX (V4L2_AV1_REF_LAST2_FRAME - V4L2_AV1_REF_LAST_FRAME)
48 #define LST3_BUF_IDX (V4L2_AV1_REF_LAST3_FRAME - V4L2_AV1_REF_LAST_FRAME)
49 #define GLD_BUF_IDX (V4L2_AV1_REF_GOLDEN_FRAME - V4L2_AV1_REF_LAST_FRAME)
50 #define BWD_BUF_IDX (V4L2_AV1_REF_BWDREF_FRAME - V4L2_AV1_REF_LAST_FRAME)
51 #define ALT2_BUF_IDX (V4L2_AV1_REF_ALTREF2_FRAME - V4L2_AV1_REF_LAST_FRAME)
52 #define ALT_BUF_IDX (V4L2_AV1_REF_ALTREF_FRAME - V4L2_AV1_REF_LAST_FRAME)
53
54 #define DIV_LUT_PREC_BITS 14
55 #define DIV_LUT_BITS 8
56 #define DIV_LUT_NUM BIT(DIV_LUT_BITS)
57 #define WARP_PARAM_REDUCE_BITS 6
58 #define WARPEDMODEL_PREC_BITS 16
59
60 #define AV1_DIV_ROUND_UP_POW2(value, n) \
61 ({ \
62 typeof(n) _n = n; \
63 typeof(value) _value = value; \
64 (_value + (BIT(_n) >> 1)) >> _n; \
65 })
66
67 #define AV1_DIV_ROUND_UP_POW2_SIGNED(value, n) \
68 ({ \
69 typeof(n) _n_ = n; \
70 typeof(value) _value_ = value; \
71 (((_value_) < 0) ? -AV1_DIV_ROUND_UP_POW2(-(_value_), (_n_)) \
72 : AV1_DIV_ROUND_UP_POW2((_value_), (_n_))); \
73 })
74
75 struct rockchip_av1_film_grain {
76 u8 scaling_lut_y[256];
77 u8 scaling_lut_cb[256];
78 u8 scaling_lut_cr[256];
79 s16 cropped_luma_grain_block[4096];
80 s16 cropped_chroma_grain_block[1024 * 2];
81 };
82
83 static const short div_lut[DIV_LUT_NUM + 1] = {
84 16384, 16320, 16257, 16194, 16132, 16070, 16009, 15948, 15888, 15828, 15768,
85 15709, 15650, 15592, 15534, 15477, 15420, 15364, 15308, 15252, 15197, 15142,
86 15087, 15033, 14980, 14926, 14873, 14821, 14769, 14717, 14665, 14614, 14564,
87 14513, 14463, 14413, 14364, 14315, 14266, 14218, 14170, 14122, 14075, 14028,
88 13981, 13935, 13888, 13843, 13797, 13752, 13707, 13662, 13618, 13574, 13530,
89 13487, 13443, 13400, 13358, 13315, 13273, 13231, 13190, 13148, 13107, 13066,
90 13026, 12985, 12945, 12906, 12866, 12827, 12788, 12749, 12710, 12672, 12633,
91 12596, 12558, 12520, 12483, 12446, 12409, 12373, 12336, 12300, 12264, 12228,
92 12193, 12157, 12122, 12087, 12053, 12018, 11984, 11950, 11916, 11882, 11848,
93 11815, 11782, 11749, 11716, 11683, 11651, 11619, 11586, 11555, 11523, 11491,
94 11460, 11429, 11398, 11367, 11336, 11305, 11275, 11245, 11215, 11185, 11155,
95 11125, 11096, 11067, 11038, 11009, 10980, 10951, 10923, 10894, 10866, 10838,
96 10810, 10782, 10755, 10727, 10700, 10673, 10645, 10618, 10592, 10565, 10538,
97 10512, 10486, 10460, 10434, 10408, 10382, 10356, 10331, 10305, 10280, 10255,
98 10230, 10205, 10180, 10156, 10131, 10107, 10082, 10058, 10034, 10010, 9986,
99 9963, 9939, 9916, 9892, 9869, 9846, 9823, 9800, 9777, 9754, 9732,
100 9709, 9687, 9664, 9642, 9620, 9598, 9576, 9554, 9533, 9511, 9489,
101 9468, 9447, 9425, 9404, 9383, 9362, 9341, 9321, 9300, 9279, 9259,
102 9239, 9218, 9198, 9178, 9158, 9138, 9118, 9098, 9079, 9059, 9039,
103 9020, 9001, 8981, 8962, 8943, 8924, 8905, 8886, 8867, 8849, 8830,
104 8812, 8793, 8775, 8756, 8738, 8720, 8702, 8684, 8666, 8648, 8630,
105 8613, 8595, 8577, 8560, 8542, 8525, 8508, 8490, 8473, 8456, 8439,
106 8422, 8405, 8389, 8372, 8355, 8339, 8322, 8306, 8289, 8273, 8257,
107 8240, 8224, 8208, 8192,
108 };
109
rockchip_vpu981_get_frame_index(struct hantro_ctx * ctx,int ref)110 static int rockchip_vpu981_get_frame_index(struct hantro_ctx *ctx, int ref)
111 {
112 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
113 struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
114 const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
115 u64 timestamp;
116 int i, idx = frame->ref_frame_idx[ref];
117
118 if (idx >= V4L2_AV1_TOTAL_REFS_PER_FRAME || idx < 0)
119 return AV1_INVALID_IDX;
120
121 timestamp = frame->reference_frame_ts[idx];
122 for (i = 0; i < AV1_MAX_FRAME_BUF_COUNT; i++) {
123 if (!av1_dec->frame_refs[i].used)
124 continue;
125 if (av1_dec->frame_refs[i].timestamp == timestamp)
126 return i;
127 }
128
129 return AV1_INVALID_IDX;
130 }
131
rockchip_vpu981_get_order_hint(struct hantro_ctx * ctx,int ref)132 static int rockchip_vpu981_get_order_hint(struct hantro_ctx *ctx, int ref)
133 {
134 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
135 int idx = rockchip_vpu981_get_frame_index(ctx, ref);
136
137 if (idx != AV1_INVALID_IDX)
138 return av1_dec->frame_refs[idx].order_hint;
139
140 return 0;
141 }
142
rockchip_vpu981_av1_dec_frame_ref(struct hantro_ctx * ctx,u64 timestamp)143 static int rockchip_vpu981_av1_dec_frame_ref(struct hantro_ctx *ctx,
144 u64 timestamp)
145 {
146 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
147 struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
148 const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
149 int i;
150
151 for (i = 0; i < AV1_MAX_FRAME_BUF_COUNT; i++) {
152 int j;
153
154 if (av1_dec->frame_refs[i].used)
155 continue;
156
157 av1_dec->frame_refs[i].width = frame->frame_width_minus_1 + 1;
158 av1_dec->frame_refs[i].height = frame->frame_height_minus_1 + 1;
159 av1_dec->frame_refs[i].mi_cols = DIV_ROUND_UP(frame->frame_width_minus_1 + 1, 8);
160 av1_dec->frame_refs[i].mi_rows = DIV_ROUND_UP(frame->frame_height_minus_1 + 1, 8);
161 av1_dec->frame_refs[i].timestamp = timestamp;
162 av1_dec->frame_refs[i].frame_type = frame->frame_type;
163 av1_dec->frame_refs[i].order_hint = frame->order_hint;
164 av1_dec->frame_refs[i].vb2_ref = hantro_get_dst_buf(ctx);
165
166 for (j = 0; j < V4L2_AV1_TOTAL_REFS_PER_FRAME; j++)
167 av1_dec->frame_refs[i].order_hints[j] = frame->order_hints[j];
168 av1_dec->frame_refs[i].used = true;
169 av1_dec->current_frame_index = i;
170
171 return i;
172 }
173
174 return AV1_INVALID_IDX;
175 }
176
rockchip_vpu981_av1_dec_frame_unref(struct hantro_ctx * ctx,int idx)177 static void rockchip_vpu981_av1_dec_frame_unref(struct hantro_ctx *ctx, int idx)
178 {
179 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
180
181 if (idx >= 0)
182 av1_dec->frame_refs[idx].used = false;
183 }
184
rockchip_vpu981_av1_dec_clean_refs(struct hantro_ctx * ctx)185 static void rockchip_vpu981_av1_dec_clean_refs(struct hantro_ctx *ctx)
186 {
187 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
188 struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
189
190 int ref, idx;
191
192 for (idx = 0; idx < AV1_MAX_FRAME_BUF_COUNT; idx++) {
193 u64 timestamp = av1_dec->frame_refs[idx].timestamp;
194 bool used = false;
195
196 if (!av1_dec->frame_refs[idx].used)
197 continue;
198
199 for (ref = 0; ref < V4L2_AV1_TOTAL_REFS_PER_FRAME; ref++) {
200 if (ctrls->frame->reference_frame_ts[ref] == timestamp)
201 used = true;
202 }
203
204 if (!used)
205 rockchip_vpu981_av1_dec_frame_unref(ctx, idx);
206 }
207 }
208
rockchip_vpu981_av1_dec_luma_size(struct hantro_ctx * ctx)209 static size_t rockchip_vpu981_av1_dec_luma_size(struct hantro_ctx *ctx)
210 {
211 return ctx->dst_fmt.width * ctx->dst_fmt.height * ctx->bit_depth / 8;
212 }
213
rockchip_vpu981_av1_dec_chroma_size(struct hantro_ctx * ctx)214 static size_t rockchip_vpu981_av1_dec_chroma_size(struct hantro_ctx *ctx)
215 {
216 size_t cr_offset = rockchip_vpu981_av1_dec_luma_size(ctx);
217
218 return ALIGN((cr_offset * 3) / 2, 64);
219 }
220
rockchip_vpu981_av1_dec_tiles_free(struct hantro_ctx * ctx)221 static void rockchip_vpu981_av1_dec_tiles_free(struct hantro_ctx *ctx)
222 {
223 struct hantro_dev *vpu = ctx->dev;
224 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
225
226 if (av1_dec->db_data_col.cpu)
227 dma_free_coherent(vpu->dev, av1_dec->db_data_col.size,
228 av1_dec->db_data_col.cpu,
229 av1_dec->db_data_col.dma);
230 av1_dec->db_data_col.cpu = NULL;
231
232 if (av1_dec->db_ctrl_col.cpu)
233 dma_free_coherent(vpu->dev, av1_dec->db_ctrl_col.size,
234 av1_dec->db_ctrl_col.cpu,
235 av1_dec->db_ctrl_col.dma);
236 av1_dec->db_ctrl_col.cpu = NULL;
237
238 if (av1_dec->cdef_col.cpu)
239 dma_free_coherent(vpu->dev, av1_dec->cdef_col.size,
240 av1_dec->cdef_col.cpu, av1_dec->cdef_col.dma);
241 av1_dec->cdef_col.cpu = NULL;
242
243 if (av1_dec->sr_col.cpu)
244 dma_free_coherent(vpu->dev, av1_dec->sr_col.size,
245 av1_dec->sr_col.cpu, av1_dec->sr_col.dma);
246 av1_dec->sr_col.cpu = NULL;
247
248 if (av1_dec->lr_col.cpu)
249 dma_free_coherent(vpu->dev, av1_dec->lr_col.size,
250 av1_dec->lr_col.cpu, av1_dec->lr_col.dma);
251 av1_dec->lr_col.cpu = NULL;
252 }
253
rockchip_vpu981_av1_dec_tiles_reallocate(struct hantro_ctx * ctx)254 static int rockchip_vpu981_av1_dec_tiles_reallocate(struct hantro_ctx *ctx)
255 {
256 struct hantro_dev *vpu = ctx->dev;
257 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
258 struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
259 const struct v4l2_av1_tile_info *tile_info = &ctrls->frame->tile_info;
260 unsigned int num_tile_cols = tile_info->tile_cols;
261 unsigned int height = ALIGN(ctrls->frame->frame_height_minus_1 + 1, 64);
262 unsigned int height_in_sb = height / 64;
263 unsigned int stripe_num = ((height + 8) + 63) / 64;
264 size_t size;
265
266 if (av1_dec->db_data_col.size >=
267 ALIGN(height * 12 * ctx->bit_depth / 8, 128) * num_tile_cols)
268 return 0;
269
270 rockchip_vpu981_av1_dec_tiles_free(ctx);
271
272 size = ALIGN(height * 12 * ctx->bit_depth / 8, 128) * num_tile_cols;
273 av1_dec->db_data_col.cpu = dma_alloc_coherent(vpu->dev, size,
274 &av1_dec->db_data_col.dma,
275 GFP_KERNEL);
276 if (!av1_dec->db_data_col.cpu)
277 goto buffer_allocation_error;
278 av1_dec->db_data_col.size = size;
279
280 size = ALIGN(height * 2 * 16 / 4, 128) * num_tile_cols;
281 av1_dec->db_ctrl_col.cpu = dma_alloc_coherent(vpu->dev, size,
282 &av1_dec->db_ctrl_col.dma,
283 GFP_KERNEL);
284 if (!av1_dec->db_ctrl_col.cpu)
285 goto buffer_allocation_error;
286 av1_dec->db_ctrl_col.size = size;
287
288 size = ALIGN(height_in_sb * 44 * ctx->bit_depth * 16 / 8, 128) * num_tile_cols;
289 av1_dec->cdef_col.cpu = dma_alloc_coherent(vpu->dev, size,
290 &av1_dec->cdef_col.dma,
291 GFP_KERNEL);
292 if (!av1_dec->cdef_col.cpu)
293 goto buffer_allocation_error;
294 av1_dec->cdef_col.size = size;
295
296 size = ALIGN(height_in_sb * (3040 + 1280), 128) * num_tile_cols;
297 av1_dec->sr_col.cpu = dma_alloc_coherent(vpu->dev, size,
298 &av1_dec->sr_col.dma,
299 GFP_KERNEL);
300 if (!av1_dec->sr_col.cpu)
301 goto buffer_allocation_error;
302 av1_dec->sr_col.size = size;
303
304 size = ALIGN(stripe_num * 1536 * ctx->bit_depth / 8, 128) * num_tile_cols;
305 av1_dec->lr_col.cpu = dma_alloc_coherent(vpu->dev, size,
306 &av1_dec->lr_col.dma,
307 GFP_KERNEL);
308 if (!av1_dec->lr_col.cpu)
309 goto buffer_allocation_error;
310 av1_dec->lr_col.size = size;
311
312 av1_dec->num_tile_cols_allocated = num_tile_cols;
313 return 0;
314
315 buffer_allocation_error:
316 rockchip_vpu981_av1_dec_tiles_free(ctx);
317 return -ENOMEM;
318 }
319
rockchip_vpu981_av1_dec_exit(struct hantro_ctx * ctx)320 void rockchip_vpu981_av1_dec_exit(struct hantro_ctx *ctx)
321 {
322 struct hantro_dev *vpu = ctx->dev;
323 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
324
325 if (av1_dec->global_model.cpu)
326 dma_free_coherent(vpu->dev, av1_dec->global_model.size,
327 av1_dec->global_model.cpu,
328 av1_dec->global_model.dma);
329 av1_dec->global_model.cpu = NULL;
330
331 if (av1_dec->tile_info.cpu)
332 dma_free_coherent(vpu->dev, av1_dec->tile_info.size,
333 av1_dec->tile_info.cpu,
334 av1_dec->tile_info.dma);
335 av1_dec->tile_info.cpu = NULL;
336
337 if (av1_dec->film_grain.cpu)
338 dma_free_coherent(vpu->dev, av1_dec->film_grain.size,
339 av1_dec->film_grain.cpu,
340 av1_dec->film_grain.dma);
341 av1_dec->film_grain.cpu = NULL;
342
343 if (av1_dec->prob_tbl.cpu)
344 dma_free_coherent(vpu->dev, av1_dec->prob_tbl.size,
345 av1_dec->prob_tbl.cpu, av1_dec->prob_tbl.dma);
346 av1_dec->prob_tbl.cpu = NULL;
347
348 if (av1_dec->prob_tbl_out.cpu)
349 dma_free_coherent(vpu->dev, av1_dec->prob_tbl_out.size,
350 av1_dec->prob_tbl_out.cpu,
351 av1_dec->prob_tbl_out.dma);
352 av1_dec->prob_tbl_out.cpu = NULL;
353
354 if (av1_dec->tile_buf.cpu)
355 dma_free_coherent(vpu->dev, av1_dec->tile_buf.size,
356 av1_dec->tile_buf.cpu, av1_dec->tile_buf.dma);
357 av1_dec->tile_buf.cpu = NULL;
358
359 rockchip_vpu981_av1_dec_tiles_free(ctx);
360 }
361
rockchip_vpu981_av1_dec_init(struct hantro_ctx * ctx)362 int rockchip_vpu981_av1_dec_init(struct hantro_ctx *ctx)
363 {
364 struct hantro_dev *vpu = ctx->dev;
365 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
366
367 memset(av1_dec, 0, sizeof(*av1_dec));
368
369 av1_dec->global_model.cpu = dma_alloc_coherent(vpu->dev, GLOBAL_MODEL_SIZE,
370 &av1_dec->global_model.dma,
371 GFP_KERNEL);
372 if (!av1_dec->global_model.cpu)
373 return -ENOMEM;
374 av1_dec->global_model.size = GLOBAL_MODEL_SIZE;
375
376 av1_dec->tile_info.cpu = dma_alloc_coherent(vpu->dev, AV1_MAX_TILES,
377 &av1_dec->tile_info.dma,
378 GFP_KERNEL);
379 if (!av1_dec->tile_info.cpu)
380 return -ENOMEM;
381 av1_dec->tile_info.size = AV1_MAX_TILES;
382
383 av1_dec->film_grain.cpu = dma_alloc_coherent(vpu->dev,
384 ALIGN(sizeof(struct rockchip_av1_film_grain), 2048),
385 &av1_dec->film_grain.dma,
386 GFP_KERNEL);
387 if (!av1_dec->film_grain.cpu)
388 return -ENOMEM;
389 av1_dec->film_grain.size = ALIGN(sizeof(struct rockchip_av1_film_grain), 2048);
390
391 av1_dec->prob_tbl.cpu = dma_alloc_coherent(vpu->dev,
392 ALIGN(sizeof(struct av1cdfs), 2048),
393 &av1_dec->prob_tbl.dma,
394 GFP_KERNEL);
395 if (!av1_dec->prob_tbl.cpu)
396 return -ENOMEM;
397 av1_dec->prob_tbl.size = ALIGN(sizeof(struct av1cdfs), 2048);
398
399 av1_dec->prob_tbl_out.cpu = dma_alloc_coherent(vpu->dev,
400 ALIGN(sizeof(struct av1cdfs), 2048),
401 &av1_dec->prob_tbl_out.dma,
402 GFP_KERNEL);
403 if (!av1_dec->prob_tbl_out.cpu)
404 return -ENOMEM;
405 av1_dec->prob_tbl_out.size = ALIGN(sizeof(struct av1cdfs), 2048);
406 av1_dec->cdfs = &av1_dec->default_cdfs;
407 av1_dec->cdfs_ndvc = &av1_dec->default_cdfs_ndvc;
408
409 rockchip_av1_set_default_cdfs(av1_dec->cdfs, av1_dec->cdfs_ndvc);
410
411 av1_dec->tile_buf.cpu = dma_alloc_coherent(vpu->dev,
412 AV1_TILE_SIZE,
413 &av1_dec->tile_buf.dma,
414 GFP_KERNEL);
415 if (!av1_dec->tile_buf.cpu)
416 return -ENOMEM;
417 av1_dec->tile_buf.size = AV1_TILE_SIZE;
418
419 return 0;
420 }
421
rockchip_vpu981_av1_dec_prepare_run(struct hantro_ctx * ctx)422 static int rockchip_vpu981_av1_dec_prepare_run(struct hantro_ctx *ctx)
423 {
424 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
425 struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
426
427 ctrls->sequence = hantro_get_ctrl(ctx, V4L2_CID_STATELESS_AV1_SEQUENCE);
428 if (WARN_ON(!ctrls->sequence))
429 return -EINVAL;
430
431 ctrls->tile_group_entry =
432 hantro_get_ctrl(ctx, V4L2_CID_STATELESS_AV1_TILE_GROUP_ENTRY);
433 if (WARN_ON(!ctrls->tile_group_entry))
434 return -EINVAL;
435
436 ctrls->frame = hantro_get_ctrl(ctx, V4L2_CID_STATELESS_AV1_FRAME);
437 if (WARN_ON(!ctrls->frame))
438 return -EINVAL;
439
440 ctrls->film_grain =
441 hantro_get_ctrl(ctx, V4L2_CID_STATELESS_AV1_FILM_GRAIN);
442
443 return rockchip_vpu981_av1_dec_tiles_reallocate(ctx);
444 }
445
rockchip_vpu981_av1_dec_get_msb(u32 n)446 static inline int rockchip_vpu981_av1_dec_get_msb(u32 n)
447 {
448 if (n == 0)
449 return 0;
450 return 31 ^ __builtin_clz(n);
451 }
452
rockchip_vpu981_av1_dec_resolve_divisor_32(u32 d,short * shift)453 static short rockchip_vpu981_av1_dec_resolve_divisor_32(u32 d, short *shift)
454 {
455 int f;
456 u64 e;
457
458 *shift = rockchip_vpu981_av1_dec_get_msb(d);
459 /* e is obtained from D after resetting the most significant 1 bit. */
460 e = d - ((u32)1 << *shift);
461 /* Get the most significant DIV_LUT_BITS (8) bits of e into f */
462 if (*shift > DIV_LUT_BITS)
463 f = AV1_DIV_ROUND_UP_POW2(e, *shift - DIV_LUT_BITS);
464 else
465 f = e << (DIV_LUT_BITS - *shift);
466 if (f > DIV_LUT_NUM)
467 return -1;
468 *shift += DIV_LUT_PREC_BITS;
469 /* Use f as lookup into the precomputed table of multipliers */
470 return div_lut[f];
471 }
472
473 static void
rockchip_vpu981_av1_dec_get_shear_params(const u32 * params,s64 * alpha,s64 * beta,s64 * gamma,s64 * delta)474 rockchip_vpu981_av1_dec_get_shear_params(const u32 *params, s64 *alpha,
475 s64 *beta, s64 *gamma, s64 *delta)
476 {
477 const int *mat = params;
478 short shift;
479 short y;
480 long long gv, dv;
481
482 if (mat[2] <= 0)
483 return;
484
485 *alpha = clamp_val(mat[2] - (1 << WARPEDMODEL_PREC_BITS), S16_MIN, S16_MAX);
486 *beta = clamp_val(mat[3], S16_MIN, S16_MAX);
487
488 y = rockchip_vpu981_av1_dec_resolve_divisor_32(abs(mat[2]), &shift) * (mat[2] < 0 ? -1 : 1);
489
490 gv = ((long long)mat[4] * (1 << WARPEDMODEL_PREC_BITS)) * y;
491
492 *gamma = clamp_val((int)AV1_DIV_ROUND_UP_POW2_SIGNED(gv, shift), S16_MIN, S16_MAX);
493
494 dv = ((long long)mat[3] * mat[4]) * y;
495 *delta = clamp_val(mat[5] -
496 (int)AV1_DIV_ROUND_UP_POW2_SIGNED(dv, shift) - (1 << WARPEDMODEL_PREC_BITS),
497 S16_MIN, S16_MAX);
498
499 *alpha = AV1_DIV_ROUND_UP_POW2_SIGNED(*alpha, WARP_PARAM_REDUCE_BITS)
500 * (1 << WARP_PARAM_REDUCE_BITS);
501 *beta = AV1_DIV_ROUND_UP_POW2_SIGNED(*beta, WARP_PARAM_REDUCE_BITS)
502 * (1 << WARP_PARAM_REDUCE_BITS);
503 *gamma = AV1_DIV_ROUND_UP_POW2_SIGNED(*gamma, WARP_PARAM_REDUCE_BITS)
504 * (1 << WARP_PARAM_REDUCE_BITS);
505 *delta = AV1_DIV_ROUND_UP_POW2_SIGNED(*delta, WARP_PARAM_REDUCE_BITS)
506 * (1 << WARP_PARAM_REDUCE_BITS);
507 }
508
rockchip_vpu981_av1_dec_set_global_model(struct hantro_ctx * ctx)509 static void rockchip_vpu981_av1_dec_set_global_model(struct hantro_ctx *ctx)
510 {
511 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
512 struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
513 const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
514 const struct v4l2_av1_global_motion *gm = &frame->global_motion;
515 u8 *dst = av1_dec->global_model.cpu;
516 struct hantro_dev *vpu = ctx->dev;
517 int ref_frame, i;
518
519 memset(dst, 0, GLOBAL_MODEL_SIZE);
520 for (ref_frame = 0; ref_frame < V4L2_AV1_REFS_PER_FRAME; ++ref_frame) {
521 s64 alpha = 0, beta = 0, gamma = 0, delta = 0;
522
523 for (i = 0; i < 6; ++i) {
524 if (i == 2)
525 *(s32 *)dst =
526 gm->params[V4L2_AV1_REF_LAST_FRAME + ref_frame][3];
527 else if (i == 3)
528 *(s32 *)dst =
529 gm->params[V4L2_AV1_REF_LAST_FRAME + ref_frame][2];
530 else
531 *(s32 *)dst =
532 gm->params[V4L2_AV1_REF_LAST_FRAME + ref_frame][i];
533 dst += 4;
534 }
535
536 if (gm->type[V4L2_AV1_REF_LAST_FRAME + ref_frame] <= V4L2_AV1_WARP_MODEL_AFFINE)
537 rockchip_vpu981_av1_dec_get_shear_params(&gm->params[V4L2_AV1_REF_LAST_FRAME + ref_frame][0],
538 &alpha, &beta, &gamma, &delta);
539
540 *(s16 *)dst = alpha;
541 dst += 2;
542 *(s16 *)dst = beta;
543 dst += 2;
544 *(s16 *)dst = gamma;
545 dst += 2;
546 *(s16 *)dst = delta;
547 dst += 2;
548 }
549
550 hantro_write_addr(vpu, AV1_GLOBAL_MODEL, av1_dec->global_model.dma);
551 }
552
rockchip_vpu981_av1_tile_log2(int target)553 static int rockchip_vpu981_av1_tile_log2(int target)
554 {
555 int k;
556
557 /*
558 * returns the smallest value for k such that 1 << k is greater
559 * than or equal to target
560 */
561 for (k = 0; (1 << k) < target; k++);
562
563 return k;
564 }
565
rockchip_vpu981_av1_dec_set_tile_info(struct hantro_ctx * ctx)566 static void rockchip_vpu981_av1_dec_set_tile_info(struct hantro_ctx *ctx)
567 {
568 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
569 struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
570 const struct v4l2_av1_tile_info *tile_info = &ctrls->frame->tile_info;
571 const struct v4l2_ctrl_av1_tile_group_entry *group_entry =
572 ctrls->tile_group_entry;
573 int context_update_y =
574 tile_info->context_update_tile_id / tile_info->tile_cols;
575 int context_update_x =
576 tile_info->context_update_tile_id % tile_info->tile_cols;
577 int context_update_tile_id =
578 context_update_x * tile_info->tile_rows + context_update_y;
579 u8 *dst = av1_dec->tile_info.cpu;
580 struct hantro_dev *vpu = ctx->dev;
581 int tile0, tile1;
582
583 memset(dst, 0, av1_dec->tile_info.size);
584
585 for (tile0 = 0; tile0 < tile_info->tile_cols; tile0++) {
586 for (tile1 = 0; tile1 < tile_info->tile_rows; tile1++) {
587 int tile_id = tile1 * tile_info->tile_cols + tile0;
588 u32 start, end;
589 u32 y0 =
590 tile_info->height_in_sbs_minus_1[tile1] + 1;
591 u32 x0 = tile_info->width_in_sbs_minus_1[tile0] + 1;
592
593 /* tile size in SB units (width,height) */
594 *dst++ = x0;
595 *dst++ = 0;
596 *dst++ = 0;
597 *dst++ = 0;
598 *dst++ = y0;
599 *dst++ = 0;
600 *dst++ = 0;
601 *dst++ = 0;
602
603 /* tile start position */
604 start = group_entry[tile_id].tile_offset - group_entry[0].tile_offset;
605 *dst++ = start & 255;
606 *dst++ = (start >> 8) & 255;
607 *dst++ = (start >> 16) & 255;
608 *dst++ = (start >> 24) & 255;
609
610 /* number of bytes in tile data */
611 end = start + group_entry[tile_id].tile_size;
612 *dst++ = end & 255;
613 *dst++ = (end >> 8) & 255;
614 *dst++ = (end >> 16) & 255;
615 *dst++ = (end >> 24) & 255;
616 }
617 }
618
619 hantro_reg_write(vpu, &av1_multicore_expect_context_update, !!(context_update_x == 0));
620 hantro_reg_write(vpu, &av1_tile_enable,
621 !!((tile_info->tile_cols > 1) || (tile_info->tile_rows > 1)));
622 hantro_reg_write(vpu, &av1_num_tile_cols_8k, tile_info->tile_cols);
623 hantro_reg_write(vpu, &av1_num_tile_rows_8k, tile_info->tile_rows);
624 hantro_reg_write(vpu, &av1_context_update_tile_id, context_update_tile_id);
625 hantro_reg_write(vpu, &av1_tile_transpose, 1);
626 if (rockchip_vpu981_av1_tile_log2(tile_info->tile_cols) ||
627 rockchip_vpu981_av1_tile_log2(tile_info->tile_rows))
628 hantro_reg_write(vpu, &av1_dec_tile_size_mag, tile_info->tile_size_bytes - 1);
629 else
630 hantro_reg_write(vpu, &av1_dec_tile_size_mag, 3);
631
632 hantro_write_addr(vpu, AV1_TILE_BASE, av1_dec->tile_info.dma);
633 }
634
rockchip_vpu981_av1_dec_get_dist(struct hantro_ctx * ctx,int a,int b)635 static int rockchip_vpu981_av1_dec_get_dist(struct hantro_ctx *ctx,
636 int a, int b)
637 {
638 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
639 struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
640 int bits = ctrls->sequence->order_hint_bits - 1;
641 int diff, m;
642
643 if (!ctrls->sequence->order_hint_bits)
644 return 0;
645
646 diff = a - b;
647 m = 1 << bits;
648 diff = (diff & (m - 1)) - (diff & m);
649
650 return diff;
651 }
652
rockchip_vpu981_av1_dec_set_frame_sign_bias(struct hantro_ctx * ctx)653 static void rockchip_vpu981_av1_dec_set_frame_sign_bias(struct hantro_ctx *ctx)
654 {
655 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
656 struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
657 const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
658 const struct v4l2_ctrl_av1_sequence *sequence = ctrls->sequence;
659 int i;
660
661 if (!sequence->order_hint_bits || IS_INTRA(frame->frame_type)) {
662 for (i = 0; i < V4L2_AV1_TOTAL_REFS_PER_FRAME; i++)
663 av1_dec->ref_frame_sign_bias[i] = 0;
664
665 return;
666 }
667 // Identify the nearest forward and backward references.
668 for (i = 0; i < V4L2_AV1_TOTAL_REFS_PER_FRAME - 1; i++) {
669 if (rockchip_vpu981_get_frame_index(ctx, i) >= 0) {
670 int rel_off =
671 rockchip_vpu981_av1_dec_get_dist(ctx,
672 rockchip_vpu981_get_order_hint(ctx, i),
673 frame->order_hint);
674 av1_dec->ref_frame_sign_bias[i + 1] = (rel_off <= 0) ? 0 : 1;
675 }
676 }
677 }
678
679 static bool
rockchip_vpu981_av1_dec_set_ref(struct hantro_ctx * ctx,int ref,int idx,int width,int height)680 rockchip_vpu981_av1_dec_set_ref(struct hantro_ctx *ctx, int ref, int idx,
681 int width, int height)
682 {
683 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
684 struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
685 const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
686 struct hantro_dev *vpu = ctx->dev;
687 struct hantro_decoded_buffer *dst;
688 dma_addr_t luma_addr, chroma_addr, mv_addr = 0;
689 int cur_width = frame->frame_width_minus_1 + 1;
690 int cur_height = frame->frame_height_minus_1 + 1;
691 int scale_width =
692 ((width << AV1_REF_SCALE_SHIFT) + cur_width / 2) / cur_width;
693 int scale_height =
694 ((height << AV1_REF_SCALE_SHIFT) + cur_height / 2) / cur_height;
695
696 switch (ref) {
697 case 0:
698 hantro_reg_write(vpu, &av1_ref0_height, height);
699 hantro_reg_write(vpu, &av1_ref0_width, width);
700 hantro_reg_write(vpu, &av1_ref0_ver_scale, scale_width);
701 hantro_reg_write(vpu, &av1_ref0_hor_scale, scale_height);
702 break;
703 case 1:
704 hantro_reg_write(vpu, &av1_ref1_height, height);
705 hantro_reg_write(vpu, &av1_ref1_width, width);
706 hantro_reg_write(vpu, &av1_ref1_ver_scale, scale_width);
707 hantro_reg_write(vpu, &av1_ref1_hor_scale, scale_height);
708 break;
709 case 2:
710 hantro_reg_write(vpu, &av1_ref2_height, height);
711 hantro_reg_write(vpu, &av1_ref2_width, width);
712 hantro_reg_write(vpu, &av1_ref2_ver_scale, scale_width);
713 hantro_reg_write(vpu, &av1_ref2_hor_scale, scale_height);
714 break;
715 case 3:
716 hantro_reg_write(vpu, &av1_ref3_height, height);
717 hantro_reg_write(vpu, &av1_ref3_width, width);
718 hantro_reg_write(vpu, &av1_ref3_ver_scale, scale_width);
719 hantro_reg_write(vpu, &av1_ref3_hor_scale, scale_height);
720 break;
721 case 4:
722 hantro_reg_write(vpu, &av1_ref4_height, height);
723 hantro_reg_write(vpu, &av1_ref4_width, width);
724 hantro_reg_write(vpu, &av1_ref4_ver_scale, scale_width);
725 hantro_reg_write(vpu, &av1_ref4_hor_scale, scale_height);
726 break;
727 case 5:
728 hantro_reg_write(vpu, &av1_ref5_height, height);
729 hantro_reg_write(vpu, &av1_ref5_width, width);
730 hantro_reg_write(vpu, &av1_ref5_ver_scale, scale_width);
731 hantro_reg_write(vpu, &av1_ref5_hor_scale, scale_height);
732 break;
733 case 6:
734 hantro_reg_write(vpu, &av1_ref6_height, height);
735 hantro_reg_write(vpu, &av1_ref6_width, width);
736 hantro_reg_write(vpu, &av1_ref6_ver_scale, scale_width);
737 hantro_reg_write(vpu, &av1_ref6_hor_scale, scale_height);
738 break;
739 default:
740 pr_warn("AV1 invalid reference frame index\n");
741 }
742
743 dst = vb2_to_hantro_decoded_buf(&av1_dec->frame_refs[idx].vb2_ref->vb2_buf);
744 luma_addr = hantro_get_dec_buf_addr(ctx, &dst->base.vb.vb2_buf);
745 chroma_addr = luma_addr + dst->av1.chroma_offset;
746 mv_addr = luma_addr + dst->av1.mv_offset;
747
748 hantro_write_addr(vpu, AV1_REFERENCE_Y(ref), luma_addr);
749 hantro_write_addr(vpu, AV1_REFERENCE_CB(ref), chroma_addr);
750 hantro_write_addr(vpu, AV1_REFERENCE_MV(ref), mv_addr);
751
752 return (scale_width != (1 << AV1_REF_SCALE_SHIFT)) ||
753 (scale_height != (1 << AV1_REF_SCALE_SHIFT));
754 }
755
rockchip_vpu981_av1_dec_set_sign_bias(struct hantro_ctx * ctx,int ref,int val)756 static void rockchip_vpu981_av1_dec_set_sign_bias(struct hantro_ctx *ctx,
757 int ref, int val)
758 {
759 struct hantro_dev *vpu = ctx->dev;
760
761 switch (ref) {
762 case 0:
763 hantro_reg_write(vpu, &av1_ref0_sign_bias, val);
764 break;
765 case 1:
766 hantro_reg_write(vpu, &av1_ref1_sign_bias, val);
767 break;
768 case 2:
769 hantro_reg_write(vpu, &av1_ref2_sign_bias, val);
770 break;
771 case 3:
772 hantro_reg_write(vpu, &av1_ref3_sign_bias, val);
773 break;
774 case 4:
775 hantro_reg_write(vpu, &av1_ref4_sign_bias, val);
776 break;
777 case 5:
778 hantro_reg_write(vpu, &av1_ref5_sign_bias, val);
779 break;
780 case 6:
781 hantro_reg_write(vpu, &av1_ref6_sign_bias, val);
782 break;
783 default:
784 pr_warn("AV1 invalid sign bias index\n");
785 break;
786 }
787 }
788
rockchip_vpu981_av1_dec_set_segmentation(struct hantro_ctx * ctx)789 static void rockchip_vpu981_av1_dec_set_segmentation(struct hantro_ctx *ctx)
790 {
791 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
792 struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
793 const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
794 const struct v4l2_av1_segmentation *seg = &frame->segmentation;
795 u32 segval[V4L2_AV1_MAX_SEGMENTS][V4L2_AV1_SEG_LVL_MAX] = { 0 };
796 struct hantro_dev *vpu = ctx->dev;
797 u8 segsign = 0, preskip_segid = 0, last_active_seg = 0, i, j;
798
799 if (!!(seg->flags & V4L2_AV1_SEGMENTATION_FLAG_ENABLED) &&
800 frame->primary_ref_frame < V4L2_AV1_REFS_PER_FRAME) {
801 int idx = rockchip_vpu981_get_frame_index(ctx, frame->primary_ref_frame);
802
803 if (idx >= 0) {
804 dma_addr_t luma_addr, mv_addr = 0;
805 struct hantro_decoded_buffer *seg;
806 size_t mv_offset = rockchip_vpu981_av1_dec_chroma_size(ctx);
807
808 seg = vb2_to_hantro_decoded_buf(&av1_dec->frame_refs[idx].vb2_ref->vb2_buf);
809 luma_addr = hantro_get_dec_buf_addr(ctx, &seg->base.vb.vb2_buf);
810 mv_addr = luma_addr + mv_offset;
811
812 hantro_write_addr(vpu, AV1_SEGMENTATION, mv_addr);
813 hantro_reg_write(vpu, &av1_use_temporal3_mvs, 1);
814 }
815 }
816
817 hantro_reg_write(vpu, &av1_segment_temp_upd_e,
818 !!(seg->flags & V4L2_AV1_SEGMENTATION_FLAG_TEMPORAL_UPDATE));
819 hantro_reg_write(vpu, &av1_segment_upd_e,
820 !!(seg->flags & V4L2_AV1_SEGMENTATION_FLAG_UPDATE_MAP));
821 hantro_reg_write(vpu, &av1_segment_e,
822 !!(seg->flags & V4L2_AV1_SEGMENTATION_FLAG_ENABLED));
823
824 hantro_reg_write(vpu, &av1_error_resilient,
825 !!(frame->flags & V4L2_AV1_FRAME_FLAG_ERROR_RESILIENT_MODE));
826
827 if (IS_INTRA(frame->frame_type) ||
828 !!(frame->flags & V4L2_AV1_FRAME_FLAG_ERROR_RESILIENT_MODE)) {
829 hantro_reg_write(vpu, &av1_use_temporal3_mvs, 0);
830 }
831
832 if (seg->flags & V4L2_AV1_SEGMENTATION_FLAG_ENABLED) {
833 int s;
834
835 for (s = 0; s < V4L2_AV1_MAX_SEGMENTS; s++) {
836 if (seg->feature_enabled[s] &
837 V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_ALT_Q)) {
838 segval[s][V4L2_AV1_SEG_LVL_ALT_Q] =
839 clamp(abs(seg->feature_data[s][V4L2_AV1_SEG_LVL_ALT_Q]),
840 0, 255);
841 segsign |=
842 (seg->feature_data[s][V4L2_AV1_SEG_LVL_ALT_Q] < 0) << s;
843 }
844
845 if (seg->feature_enabled[s] &
846 V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_ALT_LF_Y_V))
847 segval[s][V4L2_AV1_SEG_LVL_ALT_LF_Y_V] =
848 clamp(abs(seg->feature_data[s][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]),
849 -63, 63);
850
851 if (seg->feature_enabled[s] &
852 V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_ALT_LF_Y_H))
853 segval[s][V4L2_AV1_SEG_LVL_ALT_LF_Y_H] =
854 clamp(abs(seg->feature_data[s][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]),
855 -63, 63);
856
857 if (seg->feature_enabled[s] &
858 V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_ALT_LF_U))
859 segval[s][V4L2_AV1_SEG_LVL_ALT_LF_U] =
860 clamp(abs(seg->feature_data[s][V4L2_AV1_SEG_LVL_ALT_LF_U]),
861 -63, 63);
862
863 if (seg->feature_enabled[s] &
864 V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_ALT_LF_V))
865 segval[s][V4L2_AV1_SEG_LVL_ALT_LF_V] =
866 clamp(abs(seg->feature_data[s][V4L2_AV1_SEG_LVL_ALT_LF_V]),
867 -63, 63);
868
869 if (frame->frame_type && seg->feature_enabled[s] &
870 V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_REF_FRAME))
871 segval[s][V4L2_AV1_SEG_LVL_REF_FRAME]++;
872
873 if (seg->feature_enabled[s] &
874 V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_REF_SKIP))
875 segval[s][V4L2_AV1_SEG_LVL_REF_SKIP] = 1;
876
877 if (seg->feature_enabled[s] &
878 V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_REF_GLOBALMV))
879 segval[s][V4L2_AV1_SEG_LVL_REF_GLOBALMV] = 1;
880 }
881 }
882
883 for (i = 0; i < V4L2_AV1_MAX_SEGMENTS; i++) {
884 for (j = 0; j < V4L2_AV1_SEG_LVL_MAX; j++) {
885 if (seg->feature_enabled[i]
886 & V4L2_AV1_SEGMENT_FEATURE_ENABLED(j)) {
887 preskip_segid |= (j >= V4L2_AV1_SEG_LVL_REF_FRAME);
888 last_active_seg = max(i, last_active_seg);
889 }
890 }
891 }
892
893 hantro_reg_write(vpu, &av1_last_active_seg, last_active_seg);
894 hantro_reg_write(vpu, &av1_preskip_segid, preskip_segid);
895
896 hantro_reg_write(vpu, &av1_seg_quant_sign, segsign);
897
898 /* Write QP, filter level, ref frame and skip for every segment */
899 hantro_reg_write(vpu, &av1_quant_seg0,
900 segval[0][V4L2_AV1_SEG_LVL_ALT_Q]);
901 hantro_reg_write(vpu, &av1_filt_level_delta0_seg0,
902 segval[0][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
903 hantro_reg_write(vpu, &av1_filt_level_delta1_seg0,
904 segval[0][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
905 hantro_reg_write(vpu, &av1_filt_level_delta2_seg0,
906 segval[0][V4L2_AV1_SEG_LVL_ALT_LF_U]);
907 hantro_reg_write(vpu, &av1_filt_level_delta3_seg0,
908 segval[0][V4L2_AV1_SEG_LVL_ALT_LF_V]);
909 hantro_reg_write(vpu, &av1_refpic_seg0,
910 segval[0][V4L2_AV1_SEG_LVL_REF_FRAME]);
911 hantro_reg_write(vpu, &av1_skip_seg0,
912 segval[0][V4L2_AV1_SEG_LVL_REF_SKIP]);
913 hantro_reg_write(vpu, &av1_global_mv_seg0,
914 segval[0][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
915
916 hantro_reg_write(vpu, &av1_quant_seg1,
917 segval[1][V4L2_AV1_SEG_LVL_ALT_Q]);
918 hantro_reg_write(vpu, &av1_filt_level_delta0_seg1,
919 segval[1][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
920 hantro_reg_write(vpu, &av1_filt_level_delta1_seg1,
921 segval[1][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
922 hantro_reg_write(vpu, &av1_filt_level_delta2_seg1,
923 segval[1][V4L2_AV1_SEG_LVL_ALT_LF_U]);
924 hantro_reg_write(vpu, &av1_filt_level_delta3_seg1,
925 segval[1][V4L2_AV1_SEG_LVL_ALT_LF_V]);
926 hantro_reg_write(vpu, &av1_refpic_seg1,
927 segval[1][V4L2_AV1_SEG_LVL_REF_FRAME]);
928 hantro_reg_write(vpu, &av1_skip_seg1,
929 segval[1][V4L2_AV1_SEG_LVL_REF_SKIP]);
930 hantro_reg_write(vpu, &av1_global_mv_seg1,
931 segval[1][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
932
933 hantro_reg_write(vpu, &av1_quant_seg2,
934 segval[2][V4L2_AV1_SEG_LVL_ALT_Q]);
935 hantro_reg_write(vpu, &av1_filt_level_delta0_seg2,
936 segval[2][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
937 hantro_reg_write(vpu, &av1_filt_level_delta1_seg2,
938 segval[2][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
939 hantro_reg_write(vpu, &av1_filt_level_delta2_seg2,
940 segval[2][V4L2_AV1_SEG_LVL_ALT_LF_U]);
941 hantro_reg_write(vpu, &av1_filt_level_delta3_seg2,
942 segval[2][V4L2_AV1_SEG_LVL_ALT_LF_V]);
943 hantro_reg_write(vpu, &av1_refpic_seg2,
944 segval[2][V4L2_AV1_SEG_LVL_REF_FRAME]);
945 hantro_reg_write(vpu, &av1_skip_seg2,
946 segval[2][V4L2_AV1_SEG_LVL_REF_SKIP]);
947 hantro_reg_write(vpu, &av1_global_mv_seg2,
948 segval[2][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
949
950 hantro_reg_write(vpu, &av1_quant_seg3,
951 segval[3][V4L2_AV1_SEG_LVL_ALT_Q]);
952 hantro_reg_write(vpu, &av1_filt_level_delta0_seg3,
953 segval[3][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
954 hantro_reg_write(vpu, &av1_filt_level_delta1_seg3,
955 segval[3][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
956 hantro_reg_write(vpu, &av1_filt_level_delta2_seg3,
957 segval[3][V4L2_AV1_SEG_LVL_ALT_LF_U]);
958 hantro_reg_write(vpu, &av1_filt_level_delta3_seg3,
959 segval[3][V4L2_AV1_SEG_LVL_ALT_LF_V]);
960 hantro_reg_write(vpu, &av1_refpic_seg3,
961 segval[3][V4L2_AV1_SEG_LVL_REF_FRAME]);
962 hantro_reg_write(vpu, &av1_skip_seg3,
963 segval[3][V4L2_AV1_SEG_LVL_REF_SKIP]);
964 hantro_reg_write(vpu, &av1_global_mv_seg3,
965 segval[3][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
966
967 hantro_reg_write(vpu, &av1_quant_seg4,
968 segval[4][V4L2_AV1_SEG_LVL_ALT_Q]);
969 hantro_reg_write(vpu, &av1_filt_level_delta0_seg4,
970 segval[4][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
971 hantro_reg_write(vpu, &av1_filt_level_delta1_seg4,
972 segval[4][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
973 hantro_reg_write(vpu, &av1_filt_level_delta2_seg4,
974 segval[4][V4L2_AV1_SEG_LVL_ALT_LF_U]);
975 hantro_reg_write(vpu, &av1_filt_level_delta3_seg4,
976 segval[4][V4L2_AV1_SEG_LVL_ALT_LF_V]);
977 hantro_reg_write(vpu, &av1_refpic_seg4,
978 segval[4][V4L2_AV1_SEG_LVL_REF_FRAME]);
979 hantro_reg_write(vpu, &av1_skip_seg4,
980 segval[4][V4L2_AV1_SEG_LVL_REF_SKIP]);
981 hantro_reg_write(vpu, &av1_global_mv_seg4,
982 segval[4][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
983
984 hantro_reg_write(vpu, &av1_quant_seg5,
985 segval[5][V4L2_AV1_SEG_LVL_ALT_Q]);
986 hantro_reg_write(vpu, &av1_filt_level_delta0_seg5,
987 segval[5][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
988 hantro_reg_write(vpu, &av1_filt_level_delta1_seg5,
989 segval[5][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
990 hantro_reg_write(vpu, &av1_filt_level_delta2_seg5,
991 segval[5][V4L2_AV1_SEG_LVL_ALT_LF_U]);
992 hantro_reg_write(vpu, &av1_filt_level_delta3_seg5,
993 segval[5][V4L2_AV1_SEG_LVL_ALT_LF_V]);
994 hantro_reg_write(vpu, &av1_refpic_seg5,
995 segval[5][V4L2_AV1_SEG_LVL_REF_FRAME]);
996 hantro_reg_write(vpu, &av1_skip_seg5,
997 segval[5][V4L2_AV1_SEG_LVL_REF_SKIP]);
998 hantro_reg_write(vpu, &av1_global_mv_seg5,
999 segval[5][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
1000
1001 hantro_reg_write(vpu, &av1_quant_seg6,
1002 segval[6][V4L2_AV1_SEG_LVL_ALT_Q]);
1003 hantro_reg_write(vpu, &av1_filt_level_delta0_seg6,
1004 segval[6][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
1005 hantro_reg_write(vpu, &av1_filt_level_delta1_seg6,
1006 segval[6][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
1007 hantro_reg_write(vpu, &av1_filt_level_delta2_seg6,
1008 segval[6][V4L2_AV1_SEG_LVL_ALT_LF_U]);
1009 hantro_reg_write(vpu, &av1_filt_level_delta3_seg6,
1010 segval[6][V4L2_AV1_SEG_LVL_ALT_LF_V]);
1011 hantro_reg_write(vpu, &av1_refpic_seg6,
1012 segval[6][V4L2_AV1_SEG_LVL_REF_FRAME]);
1013 hantro_reg_write(vpu, &av1_skip_seg6,
1014 segval[6][V4L2_AV1_SEG_LVL_REF_SKIP]);
1015 hantro_reg_write(vpu, &av1_global_mv_seg6,
1016 segval[6][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
1017
1018 hantro_reg_write(vpu, &av1_quant_seg7,
1019 segval[7][V4L2_AV1_SEG_LVL_ALT_Q]);
1020 hantro_reg_write(vpu, &av1_filt_level_delta0_seg7,
1021 segval[7][V4L2_AV1_SEG_LVL_ALT_LF_Y_V]);
1022 hantro_reg_write(vpu, &av1_filt_level_delta1_seg7,
1023 segval[7][V4L2_AV1_SEG_LVL_ALT_LF_Y_H]);
1024 hantro_reg_write(vpu, &av1_filt_level_delta2_seg7,
1025 segval[7][V4L2_AV1_SEG_LVL_ALT_LF_U]);
1026 hantro_reg_write(vpu, &av1_filt_level_delta3_seg7,
1027 segval[7][V4L2_AV1_SEG_LVL_ALT_LF_V]);
1028 hantro_reg_write(vpu, &av1_refpic_seg7,
1029 segval[7][V4L2_AV1_SEG_LVL_REF_FRAME]);
1030 hantro_reg_write(vpu, &av1_skip_seg7,
1031 segval[7][V4L2_AV1_SEG_LVL_REF_SKIP]);
1032 hantro_reg_write(vpu, &av1_global_mv_seg7,
1033 segval[7][V4L2_AV1_SEG_LVL_REF_GLOBALMV]);
1034 }
1035
rockchip_vpu981_av1_dec_is_lossless(struct hantro_ctx * ctx)1036 static bool rockchip_vpu981_av1_dec_is_lossless(struct hantro_ctx *ctx)
1037 {
1038 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
1039 struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
1040 const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
1041 const struct v4l2_av1_segmentation *segmentation = &frame->segmentation;
1042 const struct v4l2_av1_quantization *quantization = &frame->quantization;
1043 int i;
1044
1045 for (i = 0; i < V4L2_AV1_MAX_SEGMENTS; i++) {
1046 int qindex = quantization->base_q_idx;
1047
1048 if (segmentation->feature_enabled[i] &
1049 V4L2_AV1_SEGMENT_FEATURE_ENABLED(V4L2_AV1_SEG_LVL_ALT_Q)) {
1050 qindex += segmentation->feature_data[i][V4L2_AV1_SEG_LVL_ALT_Q];
1051 }
1052 qindex = clamp(qindex, 0, 255);
1053
1054 if (qindex ||
1055 quantization->delta_q_y_dc ||
1056 quantization->delta_q_u_dc ||
1057 quantization->delta_q_u_ac ||
1058 quantization->delta_q_v_dc ||
1059 quantization->delta_q_v_ac)
1060 return false;
1061 }
1062 return true;
1063 }
1064
rockchip_vpu981_av1_dec_set_loopfilter(struct hantro_ctx * ctx)1065 static void rockchip_vpu981_av1_dec_set_loopfilter(struct hantro_ctx *ctx)
1066 {
1067 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
1068 struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
1069 const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
1070 const struct v4l2_av1_loop_filter *loop_filter = &frame->loop_filter;
1071 bool filtering_dis = (loop_filter->level[0] == 0) && (loop_filter->level[1] == 0);
1072 struct hantro_dev *vpu = ctx->dev;
1073
1074 hantro_reg_write(vpu, &av1_filtering_dis, filtering_dis);
1075 hantro_reg_write(vpu, &av1_filt_level_base_gt32, loop_filter->level[0] > 32);
1076 hantro_reg_write(vpu, &av1_filt_sharpness, loop_filter->sharpness);
1077
1078 hantro_reg_write(vpu, &av1_filt_level0, loop_filter->level[0]);
1079 hantro_reg_write(vpu, &av1_filt_level1, loop_filter->level[1]);
1080 hantro_reg_write(vpu, &av1_filt_level2, loop_filter->level[2]);
1081 hantro_reg_write(vpu, &av1_filt_level3, loop_filter->level[3]);
1082
1083 if (loop_filter->flags & V4L2_AV1_LOOP_FILTER_FLAG_DELTA_ENABLED &&
1084 !rockchip_vpu981_av1_dec_is_lossless(ctx) &&
1085 !(frame->flags & V4L2_AV1_FRAME_FLAG_ALLOW_INTRABC)) {
1086 hantro_reg_write(vpu, &av1_filt_ref_adj_0,
1087 loop_filter->ref_deltas[0]);
1088 hantro_reg_write(vpu, &av1_filt_ref_adj_1,
1089 loop_filter->ref_deltas[1]);
1090 hantro_reg_write(vpu, &av1_filt_ref_adj_2,
1091 loop_filter->ref_deltas[2]);
1092 hantro_reg_write(vpu, &av1_filt_ref_adj_3,
1093 loop_filter->ref_deltas[3]);
1094 hantro_reg_write(vpu, &av1_filt_ref_adj_4,
1095 loop_filter->ref_deltas[4]);
1096 hantro_reg_write(vpu, &av1_filt_ref_adj_5,
1097 loop_filter->ref_deltas[5]);
1098 hantro_reg_write(vpu, &av1_filt_ref_adj_6,
1099 loop_filter->ref_deltas[6]);
1100 hantro_reg_write(vpu, &av1_filt_ref_adj_7,
1101 loop_filter->ref_deltas[7]);
1102 hantro_reg_write(vpu, &av1_filt_mb_adj_0,
1103 loop_filter->mode_deltas[0]);
1104 hantro_reg_write(vpu, &av1_filt_mb_adj_1,
1105 loop_filter->mode_deltas[1]);
1106 } else {
1107 hantro_reg_write(vpu, &av1_filt_ref_adj_0, 0);
1108 hantro_reg_write(vpu, &av1_filt_ref_adj_1, 0);
1109 hantro_reg_write(vpu, &av1_filt_ref_adj_2, 0);
1110 hantro_reg_write(vpu, &av1_filt_ref_adj_3, 0);
1111 hantro_reg_write(vpu, &av1_filt_ref_adj_4, 0);
1112 hantro_reg_write(vpu, &av1_filt_ref_adj_5, 0);
1113 hantro_reg_write(vpu, &av1_filt_ref_adj_6, 0);
1114 hantro_reg_write(vpu, &av1_filt_ref_adj_7, 0);
1115 hantro_reg_write(vpu, &av1_filt_mb_adj_0, 0);
1116 hantro_reg_write(vpu, &av1_filt_mb_adj_1, 0);
1117 }
1118
1119 hantro_write_addr(vpu, AV1_DB_DATA_COL, av1_dec->db_data_col.dma);
1120 hantro_write_addr(vpu, AV1_DB_CTRL_COL, av1_dec->db_ctrl_col.dma);
1121 }
1122
rockchip_vpu981_av1_dec_update_prob(struct hantro_ctx * ctx)1123 static void rockchip_vpu981_av1_dec_update_prob(struct hantro_ctx *ctx)
1124 {
1125 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
1126 struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
1127 const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
1128 bool frame_is_intra = IS_INTRA(frame->frame_type);
1129 struct av1cdfs *out_cdfs = (struct av1cdfs *)av1_dec->prob_tbl_out.cpu;
1130 int i;
1131
1132 if (frame->flags & V4L2_AV1_FRAME_FLAG_DISABLE_FRAME_END_UPDATE_CDF)
1133 return;
1134
1135 for (i = 0; i < NUM_REF_FRAMES; i++) {
1136 if (frame->refresh_frame_flags & BIT(i)) {
1137 struct mvcdfs stored_mv_cdf;
1138
1139 rockchip_av1_get_cdfs(ctx, i);
1140 stored_mv_cdf = av1_dec->cdfs->mv_cdf;
1141 *av1_dec->cdfs = *out_cdfs;
1142 if (frame_is_intra) {
1143 av1_dec->cdfs->mv_cdf = stored_mv_cdf;
1144 *av1_dec->cdfs_ndvc = out_cdfs->mv_cdf;
1145 }
1146 rockchip_av1_store_cdfs(ctx,
1147 frame->refresh_frame_flags);
1148 break;
1149 }
1150 }
1151 }
1152
rockchip_vpu981_av1_dec_done(struct hantro_ctx * ctx)1153 void rockchip_vpu981_av1_dec_done(struct hantro_ctx *ctx)
1154 {
1155 rockchip_vpu981_av1_dec_update_prob(ctx);
1156 }
1157
rockchip_vpu981_av1_dec_set_prob(struct hantro_ctx * ctx)1158 static void rockchip_vpu981_av1_dec_set_prob(struct hantro_ctx *ctx)
1159 {
1160 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
1161 struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
1162 const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
1163 const struct v4l2_av1_quantization *quantization = &frame->quantization;
1164 struct hantro_dev *vpu = ctx->dev;
1165 bool error_resilient_mode =
1166 !!(frame->flags & V4L2_AV1_FRAME_FLAG_ERROR_RESILIENT_MODE);
1167 bool frame_is_intra = IS_INTRA(frame->frame_type);
1168
1169 if (error_resilient_mode || frame_is_intra ||
1170 frame->primary_ref_frame == AV1_PRIMARY_REF_NONE) {
1171 av1_dec->cdfs = &av1_dec->default_cdfs;
1172 av1_dec->cdfs_ndvc = &av1_dec->default_cdfs_ndvc;
1173 rockchip_av1_default_coeff_probs(quantization->base_q_idx,
1174 av1_dec->cdfs);
1175 } else {
1176 rockchip_av1_get_cdfs(ctx, frame->ref_frame_idx[frame->primary_ref_frame]);
1177 }
1178 rockchip_av1_store_cdfs(ctx, frame->refresh_frame_flags);
1179
1180 memcpy(av1_dec->prob_tbl.cpu, av1_dec->cdfs, sizeof(struct av1cdfs));
1181
1182 if (frame_is_intra) {
1183 int mv_offset = offsetof(struct av1cdfs, mv_cdf);
1184 /* Overwrite MV context area with intrabc MV context */
1185 memcpy(av1_dec->prob_tbl.cpu + mv_offset, av1_dec->cdfs_ndvc,
1186 sizeof(struct mvcdfs));
1187 }
1188
1189 hantro_write_addr(vpu, AV1_PROP_TABLE_OUT, av1_dec->prob_tbl_out.dma);
1190 hantro_write_addr(vpu, AV1_PROP_TABLE, av1_dec->prob_tbl.dma);
1191 }
1192
1193 static void
rockchip_vpu981_av1_dec_init_scaling_function(const u8 * values,const u8 * scaling,u8 num_points,u8 * scaling_lut)1194 rockchip_vpu981_av1_dec_init_scaling_function(const u8 *values, const u8 *scaling,
1195 u8 num_points, u8 *scaling_lut)
1196 {
1197 int i, point;
1198
1199 if (num_points == 0) {
1200 memset(scaling_lut, 0, 256);
1201 return;
1202 }
1203
1204 for (point = 0; point < num_points - 1; point++) {
1205 int x;
1206 s32 delta_y = scaling[point + 1] - scaling[point];
1207 s32 delta_x = values[point + 1] - values[point];
1208 s64 delta =
1209 delta_x ? delta_y * ((65536 + (delta_x >> 1)) /
1210 delta_x) : 0;
1211
1212 for (x = 0; x < delta_x; x++) {
1213 scaling_lut[values[point] + x] =
1214 scaling[point] +
1215 (s32)((x * delta + 32768) >> 16);
1216 }
1217 }
1218
1219 for (i = values[num_points - 1]; i < 256; i++)
1220 scaling_lut[i] = scaling[num_points - 1];
1221 }
1222
rockchip_vpu981_av1_dec_set_fgs(struct hantro_ctx * ctx)1223 static void rockchip_vpu981_av1_dec_set_fgs(struct hantro_ctx *ctx)
1224 {
1225 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
1226 struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
1227 const struct v4l2_ctrl_av1_film_grain *film_grain = ctrls->film_grain;
1228 struct rockchip_av1_film_grain *fgmem = av1_dec->film_grain.cpu;
1229 struct hantro_dev *vpu = ctx->dev;
1230 bool scaling_from_luma =
1231 !!(film_grain->flags & V4L2_AV1_FILM_GRAIN_FLAG_CHROMA_SCALING_FROM_LUMA);
1232 s32 (*ar_coeffs_y)[24];
1233 s32 (*ar_coeffs_cb)[25];
1234 s32 (*ar_coeffs_cr)[25];
1235 s32 (*luma_grain_block)[73][82];
1236 s32 (*cb_grain_block)[38][44];
1237 s32 (*cr_grain_block)[38][44];
1238 s32 ar_coeff_lag, ar_coeff_shift;
1239 s32 grain_scale_shift, bitdepth;
1240 s32 grain_center, grain_min, grain_max;
1241 int i, j;
1242
1243 hantro_reg_write(vpu, &av1_apply_grain, 0);
1244
1245 if (!(film_grain->flags & V4L2_AV1_FILM_GRAIN_FLAG_APPLY_GRAIN)) {
1246 hantro_reg_write(vpu, &av1_num_y_points_b, 0);
1247 hantro_reg_write(vpu, &av1_num_cb_points_b, 0);
1248 hantro_reg_write(vpu, &av1_num_cr_points_b, 0);
1249 hantro_reg_write(vpu, &av1_scaling_shift, 0);
1250 hantro_reg_write(vpu, &av1_cb_mult, 0);
1251 hantro_reg_write(vpu, &av1_cb_luma_mult, 0);
1252 hantro_reg_write(vpu, &av1_cb_offset, 0);
1253 hantro_reg_write(vpu, &av1_cr_mult, 0);
1254 hantro_reg_write(vpu, &av1_cr_luma_mult, 0);
1255 hantro_reg_write(vpu, &av1_cr_offset, 0);
1256 hantro_reg_write(vpu, &av1_overlap_flag, 0);
1257 hantro_reg_write(vpu, &av1_clip_to_restricted_range, 0);
1258 hantro_reg_write(vpu, &av1_chroma_scaling_from_luma, 0);
1259 hantro_reg_write(vpu, &av1_random_seed, 0);
1260 hantro_write_addr(vpu, AV1_FILM_GRAIN, 0);
1261 return;
1262 }
1263
1264 ar_coeffs_y = kzalloc(sizeof(int32_t) * 24, GFP_KERNEL);
1265 ar_coeffs_cb = kzalloc(sizeof(int32_t) * 25, GFP_KERNEL);
1266 ar_coeffs_cr = kzalloc(sizeof(int32_t) * 25, GFP_KERNEL);
1267 luma_grain_block = kzalloc(sizeof(int32_t) * 73 * 82, GFP_KERNEL);
1268 cb_grain_block = kzalloc(sizeof(int32_t) * 38 * 44, GFP_KERNEL);
1269 cr_grain_block = kzalloc(sizeof(int32_t) * 38 * 44, GFP_KERNEL);
1270
1271 if (!ar_coeffs_y || !ar_coeffs_cb || !ar_coeffs_cr ||
1272 !luma_grain_block || !cb_grain_block || !cr_grain_block) {
1273 pr_warn("Fail allocating memory for film grain parameters\n");
1274 goto alloc_fail;
1275 }
1276
1277 hantro_reg_write(vpu, &av1_apply_grain, 1);
1278
1279 hantro_reg_write(vpu, &av1_num_y_points_b,
1280 film_grain->num_y_points > 0);
1281 hantro_reg_write(vpu, &av1_num_cb_points_b,
1282 film_grain->num_cb_points > 0);
1283 hantro_reg_write(vpu, &av1_num_cr_points_b,
1284 film_grain->num_cr_points > 0);
1285 hantro_reg_write(vpu, &av1_scaling_shift,
1286 film_grain->grain_scaling_minus_8 + 8);
1287
1288 if (!scaling_from_luma) {
1289 hantro_reg_write(vpu, &av1_cb_mult, film_grain->cb_mult - 128);
1290 hantro_reg_write(vpu, &av1_cb_luma_mult, film_grain->cb_luma_mult - 128);
1291 hantro_reg_write(vpu, &av1_cb_offset, film_grain->cb_offset - 256);
1292 hantro_reg_write(vpu, &av1_cr_mult, film_grain->cr_mult - 128);
1293 hantro_reg_write(vpu, &av1_cr_luma_mult, film_grain->cr_luma_mult - 128);
1294 hantro_reg_write(vpu, &av1_cr_offset, film_grain->cr_offset - 256);
1295 } else {
1296 hantro_reg_write(vpu, &av1_cb_mult, 0);
1297 hantro_reg_write(vpu, &av1_cb_luma_mult, 0);
1298 hantro_reg_write(vpu, &av1_cb_offset, 0);
1299 hantro_reg_write(vpu, &av1_cr_mult, 0);
1300 hantro_reg_write(vpu, &av1_cr_luma_mult, 0);
1301 hantro_reg_write(vpu, &av1_cr_offset, 0);
1302 }
1303
1304 hantro_reg_write(vpu, &av1_overlap_flag,
1305 !!(film_grain->flags & V4L2_AV1_FILM_GRAIN_FLAG_OVERLAP));
1306 hantro_reg_write(vpu, &av1_clip_to_restricted_range,
1307 !!(film_grain->flags & V4L2_AV1_FILM_GRAIN_FLAG_CLIP_TO_RESTRICTED_RANGE));
1308 hantro_reg_write(vpu, &av1_chroma_scaling_from_luma, scaling_from_luma);
1309 hantro_reg_write(vpu, &av1_random_seed, film_grain->grain_seed);
1310
1311 rockchip_vpu981_av1_dec_init_scaling_function(film_grain->point_y_value,
1312 film_grain->point_y_scaling,
1313 film_grain->num_y_points,
1314 fgmem->scaling_lut_y);
1315
1316 if (film_grain->flags &
1317 V4L2_AV1_FILM_GRAIN_FLAG_CHROMA_SCALING_FROM_LUMA) {
1318 memcpy(fgmem->scaling_lut_cb, fgmem->scaling_lut_y,
1319 sizeof(*fgmem->scaling_lut_y) * 256);
1320 memcpy(fgmem->scaling_lut_cr, fgmem->scaling_lut_y,
1321 sizeof(*fgmem->scaling_lut_y) * 256);
1322 } else {
1323 rockchip_vpu981_av1_dec_init_scaling_function
1324 (film_grain->point_cb_value, film_grain->point_cb_scaling,
1325 film_grain->num_cb_points, fgmem->scaling_lut_cb);
1326 rockchip_vpu981_av1_dec_init_scaling_function
1327 (film_grain->point_cr_value, film_grain->point_cr_scaling,
1328 film_grain->num_cr_points, fgmem->scaling_lut_cr);
1329 }
1330
1331 for (i = 0; i < V4L2_AV1_AR_COEFFS_SIZE; i++) {
1332 if (i < 24)
1333 (*ar_coeffs_y)[i] = film_grain->ar_coeffs_y_plus_128[i] - 128;
1334 (*ar_coeffs_cb)[i] = film_grain->ar_coeffs_cb_plus_128[i] - 128;
1335 (*ar_coeffs_cr)[i] = film_grain->ar_coeffs_cr_plus_128[i] - 128;
1336 }
1337
1338 ar_coeff_lag = film_grain->ar_coeff_lag;
1339 ar_coeff_shift = film_grain->ar_coeff_shift_minus_6 + 6;
1340 grain_scale_shift = film_grain->grain_scale_shift;
1341 bitdepth = ctx->bit_depth;
1342 grain_center = 128 << (bitdepth - 8);
1343 grain_min = 0 - grain_center;
1344 grain_max = (256 << (bitdepth - 8)) - 1 - grain_center;
1345
1346 rockchip_av1_generate_luma_grain_block(luma_grain_block, bitdepth,
1347 film_grain->num_y_points, grain_scale_shift,
1348 ar_coeff_lag, ar_coeffs_y, ar_coeff_shift,
1349 grain_min, grain_max, film_grain->grain_seed);
1350
1351 rockchip_av1_generate_chroma_grain_block(luma_grain_block, cb_grain_block,
1352 cr_grain_block, bitdepth,
1353 film_grain->num_y_points,
1354 film_grain->num_cb_points,
1355 film_grain->num_cr_points,
1356 grain_scale_shift, ar_coeff_lag, ar_coeffs_cb,
1357 ar_coeffs_cr, ar_coeff_shift, grain_min,
1358 grain_max,
1359 scaling_from_luma,
1360 film_grain->grain_seed);
1361
1362 for (i = 0; i < 64; i++) {
1363 for (j = 0; j < 64; j++)
1364 fgmem->cropped_luma_grain_block[i * 64 + j] =
1365 (*luma_grain_block)[i + 9][j + 9];
1366 }
1367
1368 for (i = 0; i < 32; i++) {
1369 for (j = 0; j < 32; j++) {
1370 fgmem->cropped_chroma_grain_block[i * 64 + 2 * j] =
1371 (*cb_grain_block)[i + 6][j + 6];
1372 fgmem->cropped_chroma_grain_block[i * 64 + 2 * j + 1] =
1373 (*cr_grain_block)[i + 6][j + 6];
1374 }
1375 }
1376
1377 hantro_write_addr(vpu, AV1_FILM_GRAIN, av1_dec->film_grain.dma);
1378
1379 alloc_fail:
1380 kfree(ar_coeffs_y);
1381 kfree(ar_coeffs_cb);
1382 kfree(ar_coeffs_cr);
1383 kfree(luma_grain_block);
1384 kfree(cb_grain_block);
1385 kfree(cr_grain_block);
1386 }
1387
rockchip_vpu981_av1_dec_set_cdef(struct hantro_ctx * ctx)1388 static void rockchip_vpu981_av1_dec_set_cdef(struct hantro_ctx *ctx)
1389 {
1390 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
1391 struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
1392 const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
1393 const struct v4l2_av1_cdef *cdef = &frame->cdef;
1394 struct hantro_dev *vpu = ctx->dev;
1395 u32 luma_pri_strength = 0;
1396 u16 luma_sec_strength = 0;
1397 u32 chroma_pri_strength = 0;
1398 u16 chroma_sec_strength = 0;
1399 int i;
1400
1401 hantro_reg_write(vpu, &av1_cdef_bits, cdef->bits);
1402 hantro_reg_write(vpu, &av1_cdef_damping, cdef->damping_minus_3);
1403
1404 for (i = 0; i < BIT(cdef->bits); i++) {
1405 luma_pri_strength |= cdef->y_pri_strength[i] << (i * 4);
1406 if (cdef->y_sec_strength[i] == 4)
1407 luma_sec_strength |= 3 << (i * 2);
1408 else
1409 luma_sec_strength |= cdef->y_sec_strength[i] << (i * 2);
1410
1411 chroma_pri_strength |= cdef->uv_pri_strength[i] << (i * 4);
1412 if (cdef->uv_sec_strength[i] == 4)
1413 chroma_sec_strength |= 3 << (i * 2);
1414 else
1415 chroma_sec_strength |= cdef->uv_sec_strength[i] << (i * 2);
1416 }
1417
1418 hantro_reg_write(vpu, &av1_cdef_luma_primary_strength,
1419 luma_pri_strength);
1420 hantro_reg_write(vpu, &av1_cdef_luma_secondary_strength,
1421 luma_sec_strength);
1422 hantro_reg_write(vpu, &av1_cdef_chroma_primary_strength,
1423 chroma_pri_strength);
1424 hantro_reg_write(vpu, &av1_cdef_chroma_secondary_strength,
1425 chroma_sec_strength);
1426
1427 hantro_write_addr(vpu, AV1_CDEF_COL, av1_dec->cdef_col.dma);
1428 }
1429
rockchip_vpu981_av1_dec_set_lr(struct hantro_ctx * ctx)1430 static void rockchip_vpu981_av1_dec_set_lr(struct hantro_ctx *ctx)
1431 {
1432 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
1433 struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
1434 const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
1435 const struct v4l2_av1_loop_restoration *loop_restoration =
1436 &frame->loop_restoration;
1437 struct hantro_dev *vpu = ctx->dev;
1438 u16 lr_type = 0, lr_unit_size = 0;
1439 u8 restoration_unit_size[V4L2_AV1_NUM_PLANES_MAX] = { 3, 3, 3 };
1440 int i;
1441
1442 if (loop_restoration->flags & V4L2_AV1_LOOP_RESTORATION_FLAG_USES_LR) {
1443 restoration_unit_size[0] = 1 + loop_restoration->lr_unit_shift;
1444 restoration_unit_size[1] =
1445 1 + loop_restoration->lr_unit_shift - loop_restoration->lr_uv_shift;
1446 restoration_unit_size[2] =
1447 1 + loop_restoration->lr_unit_shift - loop_restoration->lr_uv_shift;
1448 }
1449
1450 for (i = 0; i < V4L2_AV1_NUM_PLANES_MAX; i++) {
1451 lr_type |=
1452 loop_restoration->frame_restoration_type[i] << (i * 2);
1453 lr_unit_size |= restoration_unit_size[i] << (i * 2);
1454 }
1455
1456 hantro_reg_write(vpu, &av1_lr_type, lr_type);
1457 hantro_reg_write(vpu, &av1_lr_unit_size, lr_unit_size);
1458 hantro_write_addr(vpu, AV1_LR_COL, av1_dec->lr_col.dma);
1459 }
1460
rockchip_vpu981_av1_dec_set_superres_params(struct hantro_ctx * ctx)1461 static void rockchip_vpu981_av1_dec_set_superres_params(struct hantro_ctx *ctx)
1462 {
1463 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
1464 struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
1465 const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
1466 struct hantro_dev *vpu = ctx->dev;
1467 u8 superres_scale_denominator = SCALE_NUMERATOR;
1468 int superres_luma_step = RS_SCALE_SUBPEL_BITS;
1469 int superres_chroma_step = RS_SCALE_SUBPEL_BITS;
1470 int superres_luma_step_invra = RS_SCALE_SUBPEL_BITS;
1471 int superres_chroma_step_invra = RS_SCALE_SUBPEL_BITS;
1472 int superres_init_luma_subpel_x = 0;
1473 int superres_init_chroma_subpel_x = 0;
1474 int superres_is_scaled = 0;
1475 int min_w = min_t(uint32_t, 16, frame->upscaled_width);
1476 int upscaled_luma, downscaled_luma;
1477 int downscaled_chroma, upscaled_chroma;
1478 int step_luma, step_chroma;
1479 int err_luma, err_chroma;
1480 int initial_luma, initial_chroma;
1481 int width = 0;
1482
1483 if (frame->flags & V4L2_AV1_FRAME_FLAG_USE_SUPERRES)
1484 superres_scale_denominator = frame->superres_denom;
1485
1486 if (superres_scale_denominator <= SCALE_NUMERATOR)
1487 goto set_regs;
1488
1489 width = (frame->upscaled_width * SCALE_NUMERATOR +
1490 (superres_scale_denominator / 2)) / superres_scale_denominator;
1491
1492 if (width < min_w)
1493 width = min_w;
1494
1495 if (width == frame->upscaled_width)
1496 goto set_regs;
1497
1498 superres_is_scaled = 1;
1499 upscaled_luma = frame->upscaled_width;
1500 downscaled_luma = width;
1501 downscaled_chroma = (downscaled_luma + 1) >> 1;
1502 upscaled_chroma = (upscaled_luma + 1) >> 1;
1503 step_luma =
1504 ((downscaled_luma << RS_SCALE_SUBPEL_BITS) +
1505 (upscaled_luma / 2)) / upscaled_luma;
1506 step_chroma =
1507 ((downscaled_chroma << RS_SCALE_SUBPEL_BITS) +
1508 (upscaled_chroma / 2)) / upscaled_chroma;
1509 err_luma =
1510 (upscaled_luma * step_luma)
1511 - (downscaled_luma << RS_SCALE_SUBPEL_BITS);
1512 err_chroma =
1513 (upscaled_chroma * step_chroma)
1514 - (downscaled_chroma << RS_SCALE_SUBPEL_BITS);
1515 initial_luma =
1516 ((-((upscaled_luma - downscaled_luma) << (RS_SCALE_SUBPEL_BITS - 1))
1517 + upscaled_luma / 2)
1518 / upscaled_luma + (1 << (RS_SCALE_EXTRA_BITS - 1)) - err_luma / 2)
1519 & RS_SCALE_SUBPEL_MASK;
1520 initial_chroma =
1521 ((-((upscaled_chroma - downscaled_chroma) << (RS_SCALE_SUBPEL_BITS - 1))
1522 + upscaled_chroma / 2)
1523 / upscaled_chroma + (1 << (RS_SCALE_EXTRA_BITS - 1)) - err_chroma / 2)
1524 & RS_SCALE_SUBPEL_MASK;
1525 superres_luma_step = step_luma;
1526 superres_chroma_step = step_chroma;
1527 superres_luma_step_invra =
1528 ((upscaled_luma << RS_SCALE_SUBPEL_BITS) + (downscaled_luma / 2))
1529 / downscaled_luma;
1530 superres_chroma_step_invra =
1531 ((upscaled_chroma << RS_SCALE_SUBPEL_BITS) + (downscaled_chroma / 2))
1532 / downscaled_chroma;
1533 superres_init_luma_subpel_x = initial_luma;
1534 superres_init_chroma_subpel_x = initial_chroma;
1535
1536 set_regs:
1537 hantro_reg_write(vpu, &av1_superres_pic_width, frame->upscaled_width);
1538
1539 if (frame->flags & V4L2_AV1_FRAME_FLAG_USE_SUPERRES)
1540 hantro_reg_write(vpu, &av1_scale_denom_minus9,
1541 frame->superres_denom - SUPERRES_SCALE_DENOMINATOR_MIN);
1542 else
1543 hantro_reg_write(vpu, &av1_scale_denom_minus9, frame->superres_denom);
1544
1545 hantro_reg_write(vpu, &av1_superres_luma_step, superres_luma_step);
1546 hantro_reg_write(vpu, &av1_superres_chroma_step, superres_chroma_step);
1547 hantro_reg_write(vpu, &av1_superres_luma_step_invra,
1548 superres_luma_step_invra);
1549 hantro_reg_write(vpu, &av1_superres_chroma_step_invra,
1550 superres_chroma_step_invra);
1551 hantro_reg_write(vpu, &av1_superres_init_luma_subpel_x,
1552 superres_init_luma_subpel_x);
1553 hantro_reg_write(vpu, &av1_superres_init_chroma_subpel_x,
1554 superres_init_chroma_subpel_x);
1555 hantro_reg_write(vpu, &av1_superres_is_scaled, superres_is_scaled);
1556
1557 hantro_write_addr(vpu, AV1_SR_COL, av1_dec->sr_col.dma);
1558 }
1559
rockchip_vpu981_av1_dec_set_picture_dimensions(struct hantro_ctx * ctx)1560 static void rockchip_vpu981_av1_dec_set_picture_dimensions(struct hantro_ctx *ctx)
1561 {
1562 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
1563 struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
1564 const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
1565 struct hantro_dev *vpu = ctx->dev;
1566 int pic_width_in_cbs = DIV_ROUND_UP(frame->frame_width_minus_1 + 1, 8);
1567 int pic_height_in_cbs = DIV_ROUND_UP(frame->frame_height_minus_1 + 1, 8);
1568 int pic_width_pad = ALIGN(frame->frame_width_minus_1 + 1, 8)
1569 - (frame->frame_width_minus_1 + 1);
1570 int pic_height_pad = ALIGN(frame->frame_height_minus_1 + 1, 8)
1571 - (frame->frame_height_minus_1 + 1);
1572
1573 hantro_reg_write(vpu, &av1_pic_width_in_cbs, pic_width_in_cbs);
1574 hantro_reg_write(vpu, &av1_pic_height_in_cbs, pic_height_in_cbs);
1575 hantro_reg_write(vpu, &av1_pic_width_pad, pic_width_pad);
1576 hantro_reg_write(vpu, &av1_pic_height_pad, pic_height_pad);
1577
1578 rockchip_vpu981_av1_dec_set_superres_params(ctx);
1579 }
1580
rockchip_vpu981_av1_dec_set_other_frames(struct hantro_ctx * ctx)1581 static void rockchip_vpu981_av1_dec_set_other_frames(struct hantro_ctx *ctx)
1582 {
1583 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
1584 struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
1585 const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
1586 struct hantro_dev *vpu = ctx->dev;
1587 bool use_ref_frame_mvs =
1588 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_USE_REF_FRAME_MVS);
1589 int cur_frame_offset = frame->order_hint;
1590 int alt_frame_offset = 0;
1591 int gld_frame_offset = 0;
1592 int bwd_frame_offset = 0;
1593 int alt2_frame_offset = 0;
1594 int refs_selected[3] = { 0, 0, 0 };
1595 int cur_mi_cols = DIV_ROUND_UP(frame->frame_width_minus_1 + 1, 8);
1596 int cur_mi_rows = DIV_ROUND_UP(frame->frame_height_minus_1 + 1, 8);
1597 int cur_offset[V4L2_AV1_TOTAL_REFS_PER_FRAME - 1];
1598 int cur_roffset[V4L2_AV1_TOTAL_REFS_PER_FRAME - 1];
1599 int mf_types[3] = { 0, 0, 0 };
1600 int ref_stamp = 2;
1601 int ref_ind = 0;
1602 int rf, idx;
1603
1604 alt_frame_offset = rockchip_vpu981_get_order_hint(ctx, ALT_BUF_IDX);
1605 gld_frame_offset = rockchip_vpu981_get_order_hint(ctx, GLD_BUF_IDX);
1606 bwd_frame_offset = rockchip_vpu981_get_order_hint(ctx, BWD_BUF_IDX);
1607 alt2_frame_offset = rockchip_vpu981_get_order_hint(ctx, ALT2_BUF_IDX);
1608
1609 idx = rockchip_vpu981_get_frame_index(ctx, LST_BUF_IDX);
1610 if (idx >= 0) {
1611 int alt_frame_offset_in_lst =
1612 av1_dec->frame_refs[idx].order_hints[V4L2_AV1_REF_ALTREF_FRAME];
1613 bool is_lst_overlay =
1614 (alt_frame_offset_in_lst == gld_frame_offset);
1615
1616 if (!is_lst_overlay) {
1617 int lst_mi_cols = av1_dec->frame_refs[idx].mi_cols;
1618 int lst_mi_rows = av1_dec->frame_refs[idx].mi_rows;
1619 bool lst_intra_only =
1620 IS_INTRA(av1_dec->frame_refs[idx].frame_type);
1621
1622 if (lst_mi_cols == cur_mi_cols &&
1623 lst_mi_rows == cur_mi_rows && !lst_intra_only) {
1624 mf_types[ref_ind] = V4L2_AV1_REF_LAST_FRAME;
1625 refs_selected[ref_ind++] = LST_BUF_IDX;
1626 }
1627 }
1628 ref_stamp--;
1629 }
1630
1631 idx = rockchip_vpu981_get_frame_index(ctx, BWD_BUF_IDX);
1632 if (rockchip_vpu981_av1_dec_get_dist(ctx, bwd_frame_offset, cur_frame_offset) > 0) {
1633 int bwd_mi_cols = av1_dec->frame_refs[idx].mi_cols;
1634 int bwd_mi_rows = av1_dec->frame_refs[idx].mi_rows;
1635 bool bwd_intra_only =
1636 IS_INTRA(av1_dec->frame_refs[idx].frame_type);
1637
1638 if (bwd_mi_cols == cur_mi_cols && bwd_mi_rows == cur_mi_rows &&
1639 !bwd_intra_only) {
1640 mf_types[ref_ind] = V4L2_AV1_REF_BWDREF_FRAME;
1641 refs_selected[ref_ind++] = BWD_BUF_IDX;
1642 ref_stamp--;
1643 }
1644 }
1645
1646 idx = rockchip_vpu981_get_frame_index(ctx, ALT2_BUF_IDX);
1647 if (rockchip_vpu981_av1_dec_get_dist(ctx, alt2_frame_offset, cur_frame_offset) > 0) {
1648 int alt2_mi_cols = av1_dec->frame_refs[idx].mi_cols;
1649 int alt2_mi_rows = av1_dec->frame_refs[idx].mi_rows;
1650 bool alt2_intra_only =
1651 IS_INTRA(av1_dec->frame_refs[idx].frame_type);
1652
1653 if (alt2_mi_cols == cur_mi_cols && alt2_mi_rows == cur_mi_rows &&
1654 !alt2_intra_only) {
1655 mf_types[ref_ind] = V4L2_AV1_REF_ALTREF2_FRAME;
1656 refs_selected[ref_ind++] = ALT2_BUF_IDX;
1657 ref_stamp--;
1658 }
1659 }
1660
1661 idx = rockchip_vpu981_get_frame_index(ctx, ALT_BUF_IDX);
1662 if (rockchip_vpu981_av1_dec_get_dist(ctx, alt_frame_offset, cur_frame_offset) > 0 &&
1663 ref_stamp >= 0) {
1664 int alt_mi_cols = av1_dec->frame_refs[idx].mi_cols;
1665 int alt_mi_rows = av1_dec->frame_refs[idx].mi_rows;
1666 bool alt_intra_only =
1667 IS_INTRA(av1_dec->frame_refs[idx].frame_type);
1668
1669 if (alt_mi_cols == cur_mi_cols && alt_mi_rows == cur_mi_rows &&
1670 !alt_intra_only) {
1671 mf_types[ref_ind] = V4L2_AV1_REF_ALTREF_FRAME;
1672 refs_selected[ref_ind++] = ALT_BUF_IDX;
1673 ref_stamp--;
1674 }
1675 }
1676
1677 idx = rockchip_vpu981_get_frame_index(ctx, LST2_BUF_IDX);
1678 if (idx >= 0 && ref_stamp >= 0) {
1679 int lst2_mi_cols = av1_dec->frame_refs[idx].mi_cols;
1680 int lst2_mi_rows = av1_dec->frame_refs[idx].mi_rows;
1681 bool lst2_intra_only =
1682 IS_INTRA(av1_dec->frame_refs[idx].frame_type);
1683
1684 if (lst2_mi_cols == cur_mi_cols && lst2_mi_rows == cur_mi_rows &&
1685 !lst2_intra_only) {
1686 mf_types[ref_ind] = V4L2_AV1_REF_LAST2_FRAME;
1687 refs_selected[ref_ind++] = LST2_BUF_IDX;
1688 ref_stamp--;
1689 }
1690 }
1691
1692 for (rf = 0; rf < V4L2_AV1_TOTAL_REFS_PER_FRAME - 1; ++rf) {
1693 idx = rockchip_vpu981_get_frame_index(ctx, rf);
1694 if (idx >= 0) {
1695 int rf_order_hint = rockchip_vpu981_get_order_hint(ctx, rf);
1696
1697 cur_offset[rf] =
1698 rockchip_vpu981_av1_dec_get_dist(ctx, cur_frame_offset, rf_order_hint);
1699 cur_roffset[rf] =
1700 rockchip_vpu981_av1_dec_get_dist(ctx, rf_order_hint, cur_frame_offset);
1701 } else {
1702 cur_offset[rf] = 0;
1703 cur_roffset[rf] = 0;
1704 }
1705 }
1706
1707 hantro_reg_write(vpu, &av1_use_temporal0_mvs, 0);
1708 hantro_reg_write(vpu, &av1_use_temporal1_mvs, 0);
1709 hantro_reg_write(vpu, &av1_use_temporal2_mvs, 0);
1710 hantro_reg_write(vpu, &av1_use_temporal3_mvs, 0);
1711
1712 hantro_reg_write(vpu, &av1_mf1_last_offset, 0);
1713 hantro_reg_write(vpu, &av1_mf1_last2_offset, 0);
1714 hantro_reg_write(vpu, &av1_mf1_last3_offset, 0);
1715 hantro_reg_write(vpu, &av1_mf1_golden_offset, 0);
1716 hantro_reg_write(vpu, &av1_mf1_bwdref_offset, 0);
1717 hantro_reg_write(vpu, &av1_mf1_altref2_offset, 0);
1718 hantro_reg_write(vpu, &av1_mf1_altref_offset, 0);
1719
1720 if (use_ref_frame_mvs && ref_ind > 0 &&
1721 cur_offset[mf_types[0] - V4L2_AV1_REF_LAST_FRAME] <= MAX_FRAME_DISTANCE &&
1722 cur_offset[mf_types[0] - V4L2_AV1_REF_LAST_FRAME] >= -MAX_FRAME_DISTANCE) {
1723 int rf = rockchip_vpu981_get_order_hint(ctx, refs_selected[0]);
1724 int idx = rockchip_vpu981_get_frame_index(ctx, refs_selected[0]);
1725 u32 *oh = av1_dec->frame_refs[idx].order_hints;
1726 int val;
1727
1728 hantro_reg_write(vpu, &av1_use_temporal0_mvs, 1);
1729
1730 val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_LAST_FRAME]);
1731 hantro_reg_write(vpu, &av1_mf1_last_offset, val);
1732
1733 val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_LAST2_FRAME]);
1734 hantro_reg_write(vpu, &av1_mf1_last2_offset, val);
1735
1736 val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_LAST3_FRAME]);
1737 hantro_reg_write(vpu, &av1_mf1_last3_offset, val);
1738
1739 val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_GOLDEN_FRAME]);
1740 hantro_reg_write(vpu, &av1_mf1_golden_offset, val);
1741
1742 val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_BWDREF_FRAME]);
1743 hantro_reg_write(vpu, &av1_mf1_bwdref_offset, val);
1744
1745 val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_ALTREF2_FRAME]);
1746 hantro_reg_write(vpu, &av1_mf1_altref2_offset, val);
1747
1748 val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_ALTREF_FRAME]);
1749 hantro_reg_write(vpu, &av1_mf1_altref_offset, val);
1750 }
1751
1752 hantro_reg_write(vpu, &av1_mf2_last_offset, 0);
1753 hantro_reg_write(vpu, &av1_mf2_last2_offset, 0);
1754 hantro_reg_write(vpu, &av1_mf2_last3_offset, 0);
1755 hantro_reg_write(vpu, &av1_mf2_golden_offset, 0);
1756 hantro_reg_write(vpu, &av1_mf2_bwdref_offset, 0);
1757 hantro_reg_write(vpu, &av1_mf2_altref2_offset, 0);
1758 hantro_reg_write(vpu, &av1_mf2_altref_offset, 0);
1759
1760 if (use_ref_frame_mvs && ref_ind > 1 &&
1761 cur_offset[mf_types[1] - V4L2_AV1_REF_LAST_FRAME] <= MAX_FRAME_DISTANCE &&
1762 cur_offset[mf_types[1] - V4L2_AV1_REF_LAST_FRAME] >= -MAX_FRAME_DISTANCE) {
1763 int rf = rockchip_vpu981_get_order_hint(ctx, refs_selected[1]);
1764 int idx = rockchip_vpu981_get_frame_index(ctx, refs_selected[1]);
1765 u32 *oh = av1_dec->frame_refs[idx].order_hints;
1766 int val;
1767
1768 hantro_reg_write(vpu, &av1_use_temporal1_mvs, 1);
1769
1770 val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_LAST_FRAME]);
1771 hantro_reg_write(vpu, &av1_mf2_last_offset, val);
1772
1773 val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_LAST2_FRAME]);
1774 hantro_reg_write(vpu, &av1_mf2_last2_offset, val);
1775
1776 val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_LAST3_FRAME]);
1777 hantro_reg_write(vpu, &av1_mf2_last3_offset, val);
1778
1779 val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_GOLDEN_FRAME]);
1780 hantro_reg_write(vpu, &av1_mf2_golden_offset, val);
1781
1782 val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_BWDREF_FRAME]);
1783 hantro_reg_write(vpu, &av1_mf2_bwdref_offset, val);
1784
1785 val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_ALTREF2_FRAME]);
1786 hantro_reg_write(vpu, &av1_mf2_altref2_offset, val);
1787
1788 val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_ALTREF_FRAME]);
1789 hantro_reg_write(vpu, &av1_mf2_altref_offset, val);
1790 }
1791
1792 hantro_reg_write(vpu, &av1_mf3_last_offset, 0);
1793 hantro_reg_write(vpu, &av1_mf3_last2_offset, 0);
1794 hantro_reg_write(vpu, &av1_mf3_last3_offset, 0);
1795 hantro_reg_write(vpu, &av1_mf3_golden_offset, 0);
1796 hantro_reg_write(vpu, &av1_mf3_bwdref_offset, 0);
1797 hantro_reg_write(vpu, &av1_mf3_altref2_offset, 0);
1798 hantro_reg_write(vpu, &av1_mf3_altref_offset, 0);
1799
1800 if (use_ref_frame_mvs && ref_ind > 2 &&
1801 cur_offset[mf_types[2] - V4L2_AV1_REF_LAST_FRAME] <= MAX_FRAME_DISTANCE &&
1802 cur_offset[mf_types[2] - V4L2_AV1_REF_LAST_FRAME] >= -MAX_FRAME_DISTANCE) {
1803 int rf = rockchip_vpu981_get_order_hint(ctx, refs_selected[2]);
1804 int idx = rockchip_vpu981_get_frame_index(ctx, refs_selected[2]);
1805 u32 *oh = av1_dec->frame_refs[idx].order_hints;
1806 int val;
1807
1808 hantro_reg_write(vpu, &av1_use_temporal2_mvs, 1);
1809
1810 val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_LAST_FRAME]);
1811 hantro_reg_write(vpu, &av1_mf3_last_offset, val);
1812
1813 val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_LAST2_FRAME]);
1814 hantro_reg_write(vpu, &av1_mf3_last2_offset, val);
1815
1816 val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_LAST3_FRAME]);
1817 hantro_reg_write(vpu, &av1_mf3_last3_offset, val);
1818
1819 val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_GOLDEN_FRAME]);
1820 hantro_reg_write(vpu, &av1_mf3_golden_offset, val);
1821
1822 val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_BWDREF_FRAME]);
1823 hantro_reg_write(vpu, &av1_mf3_bwdref_offset, val);
1824
1825 val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_ALTREF2_FRAME]);
1826 hantro_reg_write(vpu, &av1_mf3_altref2_offset, val);
1827
1828 val = rockchip_vpu981_av1_dec_get_dist(ctx, rf, oh[V4L2_AV1_REF_ALTREF_FRAME]);
1829 hantro_reg_write(vpu, &av1_mf3_altref_offset, val);
1830 }
1831
1832 hantro_reg_write(vpu, &av1_cur_last_offset, cur_offset[0]);
1833 hantro_reg_write(vpu, &av1_cur_last2_offset, cur_offset[1]);
1834 hantro_reg_write(vpu, &av1_cur_last3_offset, cur_offset[2]);
1835 hantro_reg_write(vpu, &av1_cur_golden_offset, cur_offset[3]);
1836 hantro_reg_write(vpu, &av1_cur_bwdref_offset, cur_offset[4]);
1837 hantro_reg_write(vpu, &av1_cur_altref2_offset, cur_offset[5]);
1838 hantro_reg_write(vpu, &av1_cur_altref_offset, cur_offset[6]);
1839
1840 hantro_reg_write(vpu, &av1_cur_last_roffset, cur_roffset[0]);
1841 hantro_reg_write(vpu, &av1_cur_last2_roffset, cur_roffset[1]);
1842 hantro_reg_write(vpu, &av1_cur_last3_roffset, cur_roffset[2]);
1843 hantro_reg_write(vpu, &av1_cur_golden_roffset, cur_roffset[3]);
1844 hantro_reg_write(vpu, &av1_cur_bwdref_roffset, cur_roffset[4]);
1845 hantro_reg_write(vpu, &av1_cur_altref2_roffset, cur_roffset[5]);
1846 hantro_reg_write(vpu, &av1_cur_altref_roffset, cur_roffset[6]);
1847
1848 hantro_reg_write(vpu, &av1_mf1_type, mf_types[0] - V4L2_AV1_REF_LAST_FRAME);
1849 hantro_reg_write(vpu, &av1_mf2_type, mf_types[1] - V4L2_AV1_REF_LAST_FRAME);
1850 hantro_reg_write(vpu, &av1_mf3_type, mf_types[2] - V4L2_AV1_REF_LAST_FRAME);
1851 }
1852
rockchip_vpu981_av1_dec_set_reference_frames(struct hantro_ctx * ctx)1853 static void rockchip_vpu981_av1_dec_set_reference_frames(struct hantro_ctx *ctx)
1854 {
1855 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
1856 struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
1857 const struct v4l2_ctrl_av1_frame *frame = ctrls->frame;
1858 int frame_type = frame->frame_type;
1859 bool allow_intrabc = !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_ALLOW_INTRABC);
1860 int ref_count[AV1DEC_MAX_PIC_BUFFERS] = { 0 };
1861 struct hantro_dev *vpu = ctx->dev;
1862 int i, ref_frames = 0;
1863 bool scale_enable = false;
1864
1865 if (IS_INTRA(frame_type) && !allow_intrabc)
1866 return;
1867
1868 if (!allow_intrabc) {
1869 for (i = 0; i < V4L2_AV1_REFS_PER_FRAME; i++) {
1870 int idx = rockchip_vpu981_get_frame_index(ctx, i);
1871
1872 if (idx >= 0)
1873 ref_count[idx]++;
1874 }
1875
1876 for (i = 0; i < AV1DEC_MAX_PIC_BUFFERS; i++) {
1877 if (ref_count[i])
1878 ref_frames++;
1879 }
1880 } else {
1881 ref_frames = 1;
1882 }
1883 hantro_reg_write(vpu, &av1_ref_frames, ref_frames);
1884
1885 rockchip_vpu981_av1_dec_set_frame_sign_bias(ctx);
1886
1887 for (i = V4L2_AV1_REF_LAST_FRAME; i < V4L2_AV1_TOTAL_REFS_PER_FRAME; i++) {
1888 u32 ref = i - 1;
1889 int idx = 0;
1890 int width, height;
1891
1892 if (allow_intrabc) {
1893 idx = av1_dec->current_frame_index;
1894 width = frame->frame_width_minus_1 + 1;
1895 height = frame->frame_height_minus_1 + 1;
1896 } else {
1897 if (rockchip_vpu981_get_frame_index(ctx, ref) > 0)
1898 idx = rockchip_vpu981_get_frame_index(ctx, ref);
1899 width = av1_dec->frame_refs[idx].width;
1900 height = av1_dec->frame_refs[idx].height;
1901 }
1902
1903 scale_enable |=
1904 rockchip_vpu981_av1_dec_set_ref(ctx, ref, idx, width,
1905 height);
1906
1907 rockchip_vpu981_av1_dec_set_sign_bias(ctx, ref,
1908 av1_dec->ref_frame_sign_bias[i]);
1909 }
1910 hantro_reg_write(vpu, &av1_ref_scaling_enable, scale_enable);
1911
1912 hantro_reg_write(vpu, &av1_ref0_gm_mode,
1913 frame->global_motion.type[V4L2_AV1_REF_LAST_FRAME]);
1914 hantro_reg_write(vpu, &av1_ref1_gm_mode,
1915 frame->global_motion.type[V4L2_AV1_REF_LAST2_FRAME]);
1916 hantro_reg_write(vpu, &av1_ref2_gm_mode,
1917 frame->global_motion.type[V4L2_AV1_REF_LAST3_FRAME]);
1918 hantro_reg_write(vpu, &av1_ref3_gm_mode,
1919 frame->global_motion.type[V4L2_AV1_REF_GOLDEN_FRAME]);
1920 hantro_reg_write(vpu, &av1_ref4_gm_mode,
1921 frame->global_motion.type[V4L2_AV1_REF_BWDREF_FRAME]);
1922 hantro_reg_write(vpu, &av1_ref5_gm_mode,
1923 frame->global_motion.type[V4L2_AV1_REF_ALTREF2_FRAME]);
1924 hantro_reg_write(vpu, &av1_ref6_gm_mode,
1925 frame->global_motion.type[V4L2_AV1_REF_ALTREF_FRAME]);
1926
1927 rockchip_vpu981_av1_dec_set_other_frames(ctx);
1928 }
1929
rockchip_vpu981_av1_dec_set_parameters(struct hantro_ctx * ctx)1930 static void rockchip_vpu981_av1_dec_set_parameters(struct hantro_ctx *ctx)
1931 {
1932 struct hantro_dev *vpu = ctx->dev;
1933 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
1934 struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
1935
1936 hantro_reg_write(vpu, &av1_skip_mode,
1937 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_SKIP_MODE_PRESENT));
1938 hantro_reg_write(vpu, &av1_tempor_mvp_e,
1939 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_USE_REF_FRAME_MVS));
1940 hantro_reg_write(vpu, &av1_delta_lf_res_log,
1941 ctrls->frame->loop_filter.delta_lf_res);
1942 hantro_reg_write(vpu, &av1_delta_lf_multi,
1943 !!(ctrls->frame->loop_filter.flags
1944 & V4L2_AV1_LOOP_FILTER_FLAG_DELTA_LF_MULTI));
1945 hantro_reg_write(vpu, &av1_delta_lf_present,
1946 !!(ctrls->frame->loop_filter.flags
1947 & V4L2_AV1_LOOP_FILTER_FLAG_DELTA_LF_PRESENT));
1948 hantro_reg_write(vpu, &av1_disable_cdf_update,
1949 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_DISABLE_CDF_UPDATE));
1950 hantro_reg_write(vpu, &av1_allow_warp,
1951 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_ALLOW_WARPED_MOTION));
1952 hantro_reg_write(vpu, &av1_show_frame,
1953 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_SHOW_FRAME));
1954 hantro_reg_write(vpu, &av1_switchable_motion_mode,
1955 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_IS_MOTION_MODE_SWITCHABLE));
1956 hantro_reg_write(vpu, &av1_enable_cdef,
1957 !!(ctrls->sequence->flags & V4L2_AV1_SEQUENCE_FLAG_ENABLE_CDEF));
1958 hantro_reg_write(vpu, &av1_allow_masked_compound,
1959 !!(ctrls->sequence->flags
1960 & V4L2_AV1_SEQUENCE_FLAG_ENABLE_MASKED_COMPOUND));
1961 hantro_reg_write(vpu, &av1_allow_interintra,
1962 !!(ctrls->sequence->flags
1963 & V4L2_AV1_SEQUENCE_FLAG_ENABLE_INTERINTRA_COMPOUND));
1964 hantro_reg_write(vpu, &av1_enable_intra_edge_filter,
1965 !!(ctrls->sequence->flags
1966 & V4L2_AV1_SEQUENCE_FLAG_ENABLE_INTRA_EDGE_FILTER));
1967 hantro_reg_write(vpu, &av1_allow_filter_intra,
1968 !!(ctrls->sequence->flags & V4L2_AV1_SEQUENCE_FLAG_ENABLE_FILTER_INTRA));
1969 hantro_reg_write(vpu, &av1_enable_jnt_comp,
1970 !!(ctrls->sequence->flags & V4L2_AV1_SEQUENCE_FLAG_ENABLE_JNT_COMP));
1971 hantro_reg_write(vpu, &av1_enable_dual_filter,
1972 !!(ctrls->sequence->flags & V4L2_AV1_SEQUENCE_FLAG_ENABLE_DUAL_FILTER));
1973 hantro_reg_write(vpu, &av1_reduced_tx_set_used,
1974 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_REDUCED_TX_SET));
1975 hantro_reg_write(vpu, &av1_allow_screen_content_tools,
1976 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_ALLOW_SCREEN_CONTENT_TOOLS));
1977 hantro_reg_write(vpu, &av1_allow_intrabc,
1978 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_ALLOW_INTRABC));
1979
1980 if (!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_ALLOW_SCREEN_CONTENT_TOOLS))
1981 hantro_reg_write(vpu, &av1_force_interger_mv, 0);
1982 else
1983 hantro_reg_write(vpu, &av1_force_interger_mv,
1984 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_FORCE_INTEGER_MV));
1985
1986 hantro_reg_write(vpu, &av1_blackwhite_e, 0);
1987 hantro_reg_write(vpu, &av1_delta_q_res_log, ctrls->frame->quantization.delta_q_res);
1988 hantro_reg_write(vpu, &av1_delta_q_present,
1989 !!(ctrls->frame->quantization.flags
1990 & V4L2_AV1_QUANTIZATION_FLAG_DELTA_Q_PRESENT));
1991
1992 hantro_reg_write(vpu, &av1_idr_pic_e, !ctrls->frame->frame_type);
1993 hantro_reg_write(vpu, &av1_quant_base_qindex, ctrls->frame->quantization.base_q_idx);
1994 hantro_reg_write(vpu, &av1_bit_depth_y_minus8, ctx->bit_depth - 8);
1995 hantro_reg_write(vpu, &av1_bit_depth_c_minus8, ctx->bit_depth - 8);
1996
1997 hantro_reg_write(vpu, &av1_mcomp_filt_type, ctrls->frame->interpolation_filter);
1998 hantro_reg_write(vpu, &av1_high_prec_mv_e,
1999 !!(ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_ALLOW_HIGH_PRECISION_MV));
2000 hantro_reg_write(vpu, &av1_comp_pred_mode,
2001 (ctrls->frame->flags & V4L2_AV1_FRAME_FLAG_REFERENCE_SELECT) ? 2 : 0);
2002 hantro_reg_write(vpu, &av1_transform_mode, (ctrls->frame->tx_mode == 1) ? 3 : 4);
2003 hantro_reg_write(vpu, &av1_max_cb_size,
2004 (ctrls->sequence->flags
2005 & V4L2_AV1_SEQUENCE_FLAG_USE_128X128_SUPERBLOCK) ? 7 : 6);
2006 hantro_reg_write(vpu, &av1_min_cb_size, 3);
2007
2008 hantro_reg_write(vpu, &av1_comp_pred_fixed_ref, 0);
2009 hantro_reg_write(vpu, &av1_comp_pred_var_ref0_av1, 0);
2010 hantro_reg_write(vpu, &av1_comp_pred_var_ref1_av1, 0);
2011 hantro_reg_write(vpu, &av1_filt_level_seg0, 0);
2012 hantro_reg_write(vpu, &av1_filt_level_seg1, 0);
2013 hantro_reg_write(vpu, &av1_filt_level_seg2, 0);
2014 hantro_reg_write(vpu, &av1_filt_level_seg3, 0);
2015 hantro_reg_write(vpu, &av1_filt_level_seg4, 0);
2016 hantro_reg_write(vpu, &av1_filt_level_seg5, 0);
2017 hantro_reg_write(vpu, &av1_filt_level_seg6, 0);
2018 hantro_reg_write(vpu, &av1_filt_level_seg7, 0);
2019
2020 hantro_reg_write(vpu, &av1_qp_delta_y_dc_av1, ctrls->frame->quantization.delta_q_y_dc);
2021 hantro_reg_write(vpu, &av1_qp_delta_ch_dc_av1, ctrls->frame->quantization.delta_q_u_dc);
2022 hantro_reg_write(vpu, &av1_qp_delta_ch_ac_av1, ctrls->frame->quantization.delta_q_u_ac);
2023 if (ctrls->frame->quantization.flags & V4L2_AV1_QUANTIZATION_FLAG_USING_QMATRIX) {
2024 hantro_reg_write(vpu, &av1_qmlevel_y, ctrls->frame->quantization.qm_y);
2025 hantro_reg_write(vpu, &av1_qmlevel_u, ctrls->frame->quantization.qm_u);
2026 hantro_reg_write(vpu, &av1_qmlevel_v, ctrls->frame->quantization.qm_v);
2027 } else {
2028 hantro_reg_write(vpu, &av1_qmlevel_y, 0xff);
2029 hantro_reg_write(vpu, &av1_qmlevel_u, 0xff);
2030 hantro_reg_write(vpu, &av1_qmlevel_v, 0xff);
2031 }
2032
2033 hantro_reg_write(vpu, &av1_lossless_e, rockchip_vpu981_av1_dec_is_lossless(ctx));
2034 hantro_reg_write(vpu, &av1_quant_delta_v_dc, ctrls->frame->quantization.delta_q_v_dc);
2035 hantro_reg_write(vpu, &av1_quant_delta_v_ac, ctrls->frame->quantization.delta_q_v_ac);
2036
2037 hantro_reg_write(vpu, &av1_skip_ref0,
2038 (ctrls->frame->skip_mode_frame[0]) ? ctrls->frame->skip_mode_frame[0] : 1);
2039 hantro_reg_write(vpu, &av1_skip_ref1,
2040 (ctrls->frame->skip_mode_frame[1]) ? ctrls->frame->skip_mode_frame[1] : 1);
2041
2042 hantro_write_addr(vpu, AV1_MC_SYNC_CURR, av1_dec->tile_buf.dma);
2043 hantro_write_addr(vpu, AV1_MC_SYNC_LEFT, av1_dec->tile_buf.dma);
2044 }
2045
2046 static void
rockchip_vpu981_av1_dec_set_input_buffer(struct hantro_ctx * ctx,struct vb2_v4l2_buffer * vb2_src)2047 rockchip_vpu981_av1_dec_set_input_buffer(struct hantro_ctx *ctx,
2048 struct vb2_v4l2_buffer *vb2_src)
2049 {
2050 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
2051 struct hantro_av1_dec_ctrls *ctrls = &av1_dec->ctrls;
2052 const struct v4l2_ctrl_av1_tile_group_entry *group_entry =
2053 ctrls->tile_group_entry;
2054 struct hantro_dev *vpu = ctx->dev;
2055 dma_addr_t src_dma;
2056 u32 src_len, src_buf_len;
2057 int start_bit, offset;
2058
2059 src_dma = vb2_dma_contig_plane_dma_addr(&vb2_src->vb2_buf, 0);
2060 src_len = vb2_get_plane_payload(&vb2_src->vb2_buf, 0);
2061 src_buf_len = vb2_plane_size(&vb2_src->vb2_buf, 0);
2062
2063 start_bit = (group_entry[0].tile_offset & 0xf) * 8;
2064 offset = group_entry[0].tile_offset & ~0xf;
2065
2066 hantro_reg_write(vpu, &av1_strm_buffer_len, src_buf_len);
2067 hantro_reg_write(vpu, &av1_strm_start_bit, start_bit);
2068 hantro_reg_write(vpu, &av1_stream_len, src_len);
2069 hantro_reg_write(vpu, &av1_strm_start_offset, 0);
2070 hantro_write_addr(vpu, AV1_INPUT_STREAM, src_dma + offset);
2071 }
2072
2073 static void
rockchip_vpu981_av1_dec_set_output_buffer(struct hantro_ctx * ctx)2074 rockchip_vpu981_av1_dec_set_output_buffer(struct hantro_ctx *ctx)
2075 {
2076 struct hantro_av1_dec_hw_ctx *av1_dec = &ctx->av1_dec;
2077 struct hantro_dev *vpu = ctx->dev;
2078 struct hantro_decoded_buffer *dst;
2079 struct vb2_v4l2_buffer *vb2_dst;
2080 dma_addr_t luma_addr, chroma_addr, mv_addr = 0;
2081 size_t cr_offset = rockchip_vpu981_av1_dec_luma_size(ctx);
2082 size_t mv_offset = rockchip_vpu981_av1_dec_chroma_size(ctx);
2083
2084 vb2_dst = av1_dec->frame_refs[av1_dec->current_frame_index].vb2_ref;
2085 dst = vb2_to_hantro_decoded_buf(&vb2_dst->vb2_buf);
2086 luma_addr = hantro_get_dec_buf_addr(ctx, &dst->base.vb.vb2_buf);
2087 chroma_addr = luma_addr + cr_offset;
2088 mv_addr = luma_addr + mv_offset;
2089
2090 dst->av1.chroma_offset = cr_offset;
2091 dst->av1.mv_offset = mv_offset;
2092
2093 hantro_write_addr(vpu, AV1_TILE_OUT_LU, luma_addr);
2094 hantro_write_addr(vpu, AV1_TILE_OUT_CH, chroma_addr);
2095 hantro_write_addr(vpu, AV1_TILE_OUT_MV, mv_addr);
2096 }
2097
rockchip_vpu981_av1_dec_run(struct hantro_ctx * ctx)2098 int rockchip_vpu981_av1_dec_run(struct hantro_ctx *ctx)
2099 {
2100 struct hantro_dev *vpu = ctx->dev;
2101 struct vb2_v4l2_buffer *vb2_src;
2102 int ret;
2103
2104 hantro_start_prepare_run(ctx);
2105
2106 ret = rockchip_vpu981_av1_dec_prepare_run(ctx);
2107 if (ret)
2108 goto prepare_error;
2109
2110 vb2_src = hantro_get_src_buf(ctx);
2111 if (!vb2_src) {
2112 ret = -EINVAL;
2113 goto prepare_error;
2114 }
2115
2116 rockchip_vpu981_av1_dec_clean_refs(ctx);
2117 rockchip_vpu981_av1_dec_frame_ref(ctx, vb2_src->vb2_buf.timestamp);
2118
2119 rockchip_vpu981_av1_dec_set_parameters(ctx);
2120 rockchip_vpu981_av1_dec_set_global_model(ctx);
2121 rockchip_vpu981_av1_dec_set_tile_info(ctx);
2122 rockchip_vpu981_av1_dec_set_reference_frames(ctx);
2123 rockchip_vpu981_av1_dec_set_segmentation(ctx);
2124 rockchip_vpu981_av1_dec_set_loopfilter(ctx);
2125 rockchip_vpu981_av1_dec_set_picture_dimensions(ctx);
2126 rockchip_vpu981_av1_dec_set_cdef(ctx);
2127 rockchip_vpu981_av1_dec_set_lr(ctx);
2128 rockchip_vpu981_av1_dec_set_fgs(ctx);
2129 rockchip_vpu981_av1_dec_set_prob(ctx);
2130
2131 hantro_reg_write(vpu, &av1_dec_mode, AV1_DEC_MODE);
2132 hantro_reg_write(vpu, &av1_dec_out_ec_byte_word, 0);
2133 hantro_reg_write(vpu, &av1_write_mvs_e, 1);
2134 hantro_reg_write(vpu, &av1_dec_out_ec_bypass, 1);
2135 hantro_reg_write(vpu, &av1_dec_clk_gate_e, 1);
2136
2137 hantro_reg_write(vpu, &av1_dec_abort_e, 0);
2138 hantro_reg_write(vpu, &av1_dec_tile_int_e, 0);
2139
2140 hantro_reg_write(vpu, &av1_dec_alignment, 64);
2141 hantro_reg_write(vpu, &av1_apf_disable, 0);
2142 hantro_reg_write(vpu, &av1_apf_threshold, 8);
2143 hantro_reg_write(vpu, &av1_dec_buswidth, 2);
2144 hantro_reg_write(vpu, &av1_dec_max_burst, 16);
2145 hantro_reg_write(vpu, &av1_error_conceal_e, 0);
2146 hantro_reg_write(vpu, &av1_axi_rd_ostd_threshold, 64);
2147 hantro_reg_write(vpu, &av1_axi_wr_ostd_threshold, 64);
2148
2149 hantro_reg_write(vpu, &av1_ext_timeout_cycles, 0xfffffff);
2150 hantro_reg_write(vpu, &av1_ext_timeout_override_e, 1);
2151 hantro_reg_write(vpu, &av1_timeout_cycles, 0xfffffff);
2152 hantro_reg_write(vpu, &av1_timeout_override_e, 1);
2153
2154 rockchip_vpu981_av1_dec_set_output_buffer(ctx);
2155 rockchip_vpu981_av1_dec_set_input_buffer(ctx, vb2_src);
2156
2157 hantro_end_prepare_run(ctx);
2158
2159 hantro_reg_write(vpu, &av1_dec_e, 1);
2160
2161 return 0;
2162
2163 prepare_error:
2164 hantro_end_prepare_run(ctx);
2165 hantro_irq_done(vpu, VB2_BUF_STATE_ERROR);
2166 return ret;
2167 }
2168
rockchip_vpu981_postproc_enable(struct hantro_ctx * ctx)2169 static void rockchip_vpu981_postproc_enable(struct hantro_ctx *ctx)
2170 {
2171 struct hantro_dev *vpu = ctx->dev;
2172 int width = ctx->dst_fmt.width;
2173 int height = ctx->dst_fmt.height;
2174 struct vb2_v4l2_buffer *vb2_dst;
2175 size_t chroma_offset;
2176 dma_addr_t dst_dma;
2177
2178 vb2_dst = hantro_get_dst_buf(ctx);
2179
2180 dst_dma = vb2_dma_contig_plane_dma_addr(&vb2_dst->vb2_buf, 0);
2181 chroma_offset = ctx->dst_fmt.plane_fmt[0].bytesperline *
2182 ctx->dst_fmt.height;
2183
2184 /* enable post processor */
2185 hantro_reg_write(vpu, &av1_pp_out_e, 1);
2186 hantro_reg_write(vpu, &av1_pp_in_format, 0);
2187 hantro_reg_write(vpu, &av1_pp0_dup_hor, 1);
2188 hantro_reg_write(vpu, &av1_pp0_dup_ver, 1);
2189
2190 hantro_reg_write(vpu, &av1_pp_in_height, height / 2);
2191 hantro_reg_write(vpu, &av1_pp_in_width, width / 2);
2192 hantro_reg_write(vpu, &av1_pp_out_height, height);
2193 hantro_reg_write(vpu, &av1_pp_out_width, width);
2194 hantro_reg_write(vpu, &av1_pp_out_y_stride,
2195 ctx->dst_fmt.plane_fmt[0].bytesperline);
2196 hantro_reg_write(vpu, &av1_pp_out_c_stride,
2197 ctx->dst_fmt.plane_fmt[0].bytesperline);
2198 switch (ctx->dst_fmt.pixelformat) {
2199 case V4L2_PIX_FMT_P010:
2200 hantro_reg_write(vpu, &av1_pp_out_format, 1);
2201 break;
2202 case V4L2_PIX_FMT_NV12:
2203 hantro_reg_write(vpu, &av1_pp_out_format, 3);
2204 break;
2205 default:
2206 hantro_reg_write(vpu, &av1_pp_out_format, 0);
2207 }
2208
2209 hantro_reg_write(vpu, &av1_ppd_blend_exist, 0);
2210 hantro_reg_write(vpu, &av1_ppd_dith_exist, 0);
2211 hantro_reg_write(vpu, &av1_ablend_crop_e, 0);
2212 hantro_reg_write(vpu, &av1_pp_format_customer1_e, 0);
2213 hantro_reg_write(vpu, &av1_pp_crop_exist, 0);
2214 hantro_reg_write(vpu, &av1_pp_up_level, 0);
2215 hantro_reg_write(vpu, &av1_pp_down_level, 0);
2216 hantro_reg_write(vpu, &av1_pp_exist, 0);
2217
2218 hantro_write_addr(vpu, AV1_PP_OUT_LU, dst_dma);
2219 hantro_write_addr(vpu, AV1_PP_OUT_CH, dst_dma + chroma_offset);
2220 }
2221
rockchip_vpu981_postproc_disable(struct hantro_ctx * ctx)2222 static void rockchip_vpu981_postproc_disable(struct hantro_ctx *ctx)
2223 {
2224 struct hantro_dev *vpu = ctx->dev;
2225
2226 /* disable post processor */
2227 hantro_reg_write(vpu, &av1_pp_out_e, 0);
2228 }
2229
2230 const struct hantro_postproc_ops rockchip_vpu981_postproc_ops = {
2231 .enable = rockchip_vpu981_postproc_enable,
2232 .disable = rockchip_vpu981_postproc_disable,
2233 };
2234