xref: /aosp_15_r20/external/libdav1d/src/internal.h (revision c09093415860a1c2373dacd84c4fde00c507cdfd)
1 /*
2  * Copyright © 2018-2021, VideoLAN and dav1d authors
3  * Copyright © 2018, Two Orioles, LLC
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions are met:
8  *
9  * 1. Redistributions of source code must retain the above copyright notice, this
10  *    list of conditions and the following disclaimer.
11  *
12  * 2. Redistributions in binary form must reproduce the above copyright notice,
13  *    this list of conditions and the following disclaimer in the documentation
14  *    and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
20  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 #ifndef DAV1D_SRC_INTERNAL_H
29 #define DAV1D_SRC_INTERNAL_H
30 
31 #include <stdatomic.h>
32 
33 #include "dav1d/data.h"
34 
35 typedef struct Dav1dFrameContext Dav1dFrameContext;
36 typedef struct Dav1dTileState Dav1dTileState;
37 typedef struct Dav1dTaskContext Dav1dTaskContext;
38 typedef struct Dav1dTask Dav1dTask;
39 
40 #include "common/attributes.h"
41 
42 #include "src/cdef.h"
43 #include "src/cdf.h"
44 #include "src/data.h"
45 #include "src/env.h"
46 #include "src/filmgrain.h"
47 #include "src/intra_edge.h"
48 #include "src/ipred.h"
49 #include "src/itx.h"
50 #include "src/levels.h"
51 #include "src/lf_mask.h"
52 #include "src/loopfilter.h"
53 #include "src/looprestoration.h"
54 #include "src/mc.h"
55 #include "src/msac.h"
56 #include "src/pal.h"
57 #include "src/picture.h"
58 #include "src/recon.h"
59 #include "src/refmvs.h"
60 #include "src/thread.h"
61 
62 typedef struct Dav1dDSPContext {
63     Dav1dFilmGrainDSPContext fg;
64     Dav1dIntraPredDSPContext ipred;
65     Dav1dMCDSPContext mc;
66     Dav1dInvTxfmDSPContext itx;
67     Dav1dLoopFilterDSPContext lf;
68     Dav1dCdefDSPContext cdef;
69     Dav1dLoopRestorationDSPContext lr;
70 } Dav1dDSPContext;
71 
72 struct Dav1dTileGroup {
73     Dav1dData data;
74     int start, end;
75 };
76 
77 enum TaskType {
78     DAV1D_TASK_TYPE_INIT,
79     DAV1D_TASK_TYPE_INIT_CDF,
80     DAV1D_TASK_TYPE_TILE_ENTROPY,
81     DAV1D_TASK_TYPE_ENTROPY_PROGRESS,
82     DAV1D_TASK_TYPE_TILE_RECONSTRUCTION,
83     DAV1D_TASK_TYPE_DEBLOCK_COLS,
84     DAV1D_TASK_TYPE_DEBLOCK_ROWS,
85     DAV1D_TASK_TYPE_CDEF,
86     DAV1D_TASK_TYPE_SUPER_RESOLUTION,
87     DAV1D_TASK_TYPE_LOOP_RESTORATION,
88     DAV1D_TASK_TYPE_RECONSTRUCTION_PROGRESS,
89     DAV1D_TASK_TYPE_FG_PREP,
90     DAV1D_TASK_TYPE_FG_APPLY,
91 };
92 
93 struct Dav1dContext {
94     Dav1dFrameContext *fc;
95     unsigned n_fc;
96 
97     Dav1dTaskContext *tc;
98     unsigned n_tc;
99 
100     // cache of OBUs that make up a single frame before we submit them
101     // to a frame worker to be decoded
102     struct Dav1dTileGroup *tile;
103     int n_tile_data_alloc;
104     int n_tile_data;
105     int n_tiles;
106     Dav1dMemPool *seq_hdr_pool;
107     Dav1dRef *seq_hdr_ref;
108     Dav1dSequenceHeader *seq_hdr;
109     Dav1dMemPool *frame_hdr_pool;
110     Dav1dRef *frame_hdr_ref;
111     Dav1dFrameHeader *frame_hdr;
112 
113     Dav1dRef *content_light_ref;
114     Dav1dContentLightLevel *content_light;
115     Dav1dRef *mastering_display_ref;
116     Dav1dMasteringDisplay *mastering_display;
117     Dav1dRef *itut_t35_ref;
118     Dav1dITUTT35 *itut_t35;
119     int n_itut_t35;
120 
121     // decoded output picture queue
122     Dav1dData in;
123     Dav1dThreadPicture out, cache;
124     // dummy is a pointer to prevent compiler errors about atomic_load()
125     // not taking const arguments
126     atomic_int flush_mem, *flush;
127     struct {
128         Dav1dThreadPicture *out_delayed;
129         unsigned next;
130     } frame_thread;
131 
132     // task threading (refer to tc[] for per_thread thingies)
133     struct TaskThreadData {
134         pthread_mutex_t lock;
135         pthread_cond_t cond;
136         atomic_uint first;
137         unsigned cur;
138         // This is used for delayed reset of the task cur pointer when
139         // such operation is needed but the thread doesn't enter a critical
140         // section (typically when executing the next sbrow task locklessly).
141         // See src/thread_task.c:reset_task_cur().
142         atomic_uint reset_task_cur;
143         atomic_int cond_signaled;
144         struct {
145             int exec, finished;
146             pthread_cond_t cond;
147             const Dav1dPicture *in;
148             Dav1dPicture *out;
149             enum TaskType type;
150             atomic_int progress[2]; /* [0]=started, [1]=completed */
151             union {
152                 struct {
153                     ALIGN(int8_t grain_lut_8bpc[3][GRAIN_HEIGHT + 1][GRAIN_WIDTH], 16);
154                     ALIGN(uint8_t scaling_8bpc[3][256], 64);
155                 };
156                 struct {
157                     ALIGN(int16_t grain_lut_16bpc[3][GRAIN_HEIGHT + 1][GRAIN_WIDTH], 16);
158                     ALIGN(uint8_t scaling_16bpc[3][4096], 64);
159                 };
160             };
161         } delayed_fg;
162         int inited;
163     } task_thread;
164 
165     // reference/entropy state
166     Dav1dMemPool *segmap_pool;
167     Dav1dMemPool *refmvs_pool;
168     struct {
169         Dav1dThreadPicture p;
170         Dav1dRef *segmap;
171         Dav1dRef *refmvs;
172         unsigned refpoc[7];
173     } refs[8];
174     Dav1dMemPool *cdf_pool;
175     CdfThreadContext cdf[8];
176 
177     Dav1dDSPContext dsp[3 /* 8, 10, 12 bits/component */];
178     Dav1dPalDSPContext pal_dsp;
179     Dav1dRefmvsDSPContext refmvs_dsp;
180 
181     Dav1dPicAllocator allocator;
182     int apply_grain;
183     int operating_point;
184     unsigned operating_point_idc;
185     int all_layers;
186     int max_spatial_id;
187     unsigned frame_size_limit;
188     int strict_std_compliance;
189     int output_invisible_frames;
190     enum Dav1dInloopFilterType inloop_filters;
191     enum Dav1dDecodeFrameType decode_frame_type;
192     int drain;
193     enum PictureFlags frame_flags;
194     enum Dav1dEventFlags event_flags;
195     Dav1dDataProps cached_error_props;
196     int cached_error;
197 
198     Dav1dLogger logger;
199 
200     Dav1dMemPool *picture_pool;
201     Dav1dMemPool *pic_ctx_pool;
202 };
203 
204 struct Dav1dTask {
205     unsigned frame_idx;         // frame thread id
206     enum TaskType type;         // task work
207     int sby;                    // sbrow
208 
209     // task dependencies
210     int recon_progress, deblock_progress;
211     int deps_skip;
212     struct Dav1dTask *next; // only used in task queue
213 };
214 
215 struct Dav1dFrameContext {
216     Dav1dRef *seq_hdr_ref;
217     Dav1dSequenceHeader *seq_hdr;
218     Dav1dRef *frame_hdr_ref;
219     Dav1dFrameHeader *frame_hdr;
220     Dav1dThreadPicture refp[7];
221     Dav1dPicture cur; // during block coding / reconstruction
222     Dav1dThreadPicture sr_cur; // after super-resolution upscaling
223     Dav1dRef *mvs_ref;
224     refmvs_temporal_block *mvs, *ref_mvs[7];
225     Dav1dRef *ref_mvs_ref[7];
226     Dav1dRef *cur_segmap_ref, *prev_segmap_ref;
227     uint8_t *cur_segmap;
228     const uint8_t *prev_segmap;
229     unsigned refpoc[7], refrefpoc[7][7];
230     uint8_t gmv_warp_allowed[7];
231     CdfThreadContext in_cdf, out_cdf;
232     struct Dav1dTileGroup *tile;
233     int n_tile_data_alloc;
234     int n_tile_data;
235 
236     // for scalable references
237     struct ScalableMotionParams {
238         int scale; // if no scaling, this is 0
239         int step;
240     } svc[7][2 /* x, y */];
241     int resize_step[2 /* y, uv */], resize_start[2 /* y, uv */];
242 
243     const Dav1dContext *c;
244     Dav1dTileState *ts;
245     int n_ts;
246     const Dav1dDSPContext *dsp;
247     struct {
248         recon_b_intra_fn recon_b_intra;
249         recon_b_inter_fn recon_b_inter;
250         filter_sbrow_fn filter_sbrow;
251         filter_sbrow_fn filter_sbrow_deblock_cols;
252         filter_sbrow_fn filter_sbrow_deblock_rows;
253         void (*filter_sbrow_cdef)(Dav1dTaskContext *tc, int sby);
254         filter_sbrow_fn filter_sbrow_resize;
255         filter_sbrow_fn filter_sbrow_lr;
256         backup_ipred_edge_fn backup_ipred_edge;
257         read_coef_blocks_fn read_coef_blocks;
258         copy_pal_block_fn copy_pal_block_y;
259         copy_pal_block_fn copy_pal_block_uv;
260         read_pal_plane_fn read_pal_plane;
261         read_pal_uv_fn read_pal_uv;
262     } bd_fn;
263 
264     int ipred_edge_sz;
265     pixel *ipred_edge[3];
266     ptrdiff_t b4_stride;
267     int w4, h4, bw, bh, sb128w, sb128h, sbh, sb_shift, sb_step, sr_sb128w;
268     uint16_t dq[DAV1D_MAX_SEGMENTS][3 /* plane */][2 /* dc/ac */];
269     const uint8_t *qm[N_RECT_TX_SIZES][3 /* plane */];
270     BlockContext *a;
271     int a_sz /* w*tile_rows */;
272     refmvs_frame rf;
273     uint8_t jnt_weights[7][7];
274     int bitdepth_max;
275 
276     struct {
277         int next_tile_row[2 /* 0: reconstruction, 1: entropy */];
278         atomic_int entropy_progress;
279         atomic_int deblock_progress; // in sby units
280         atomic_uint *frame_progress, *copy_lpf_progress;
281         // indexed using t->by * f->b4_stride + t->bx
282         Av1Block *b;
283         int16_t *cbi; /* bits 0-4: txtp, bits 5-15: eob */
284         // indexed using (t->by >> 1) * (f->b4_stride >> 1) + (t->bx >> 1)
285         pixel (*pal)[3 /* plane */][8 /* idx */];
286         // iterated over inside tile state
287         uint8_t *pal_idx;
288         coef *cf;
289         int prog_sz;
290         int cbi_sz, pal_sz, pal_idx_sz, cf_sz;
291         // start offsets per tile
292         unsigned *tile_start_off;
293     } frame_thread;
294 
295     // loopfilter
296     struct {
297         uint8_t (*level)[4];
298         Av1Filter *mask;
299         Av1Restoration *lr_mask;
300         int mask_sz /* w*h */, lr_mask_sz;
301         int cdef_buf_plane_sz[2]; /* stride*sbh*4 */
302         int cdef_buf_sbh;
303         int lr_buf_plane_sz[2]; /* (stride*sbh*4) << sb128 if n_tc > 1, else stride*4 */
304         int re_sz /* h */;
305         ALIGN(Av1FilterLUT lim_lut, 16);
306         ALIGN(uint8_t lvl[8 /* seg_id */][4 /* dir */][8 /* ref */][2 /* is_gmv */], 16);
307         int last_sharpness;
308         uint8_t *tx_lpf_right_edge[2];
309         uint8_t *cdef_line_buf, *lr_line_buf;
310         pixel *cdef_line[2 /* pre, post */][3 /* plane */];
311         pixel *cdef_lpf_line[3 /* plane */];
312         pixel *lr_lpf_line[3 /* plane */];
313 
314         // in-loop filter per-frame state keeping
315         uint8_t *start_of_tile_row;
316         int start_of_tile_row_sz;
317         int need_cdef_lpf_copy;
318         pixel *p[3], *sr_p[3];
319         int restore_planes; // enum LrRestorePlanes
320     } lf;
321 
322     struct {
323         pthread_mutex_t lock;
324         pthread_cond_t cond;
325         struct TaskThreadData *ttd;
326         struct Dav1dTask *tasks, *tile_tasks[2], init_task;
327         int num_tasks, num_tile_tasks;
328         atomic_int init_done;
329         atomic_int done[2];
330         int retval;
331         int update_set; // whether we need to update CDF reference
332         atomic_int error;
333         atomic_int task_counter;
334         struct Dav1dTask *task_head, *task_tail;
335         // Points to the task directly before the cur pointer in the queue.
336         // This cur pointer is theoretical here, we actually keep track of the
337         // "prev_t" variable. This is needed to not loose the tasks in
338         // [head;cur-1] when picking one for execution.
339         struct Dav1dTask *task_cur_prev;
340         struct { // async task insertion
341             atomic_int merge;
342             pthread_mutex_t lock;
343             Dav1dTask *head, *tail;
344         } pending_tasks;
345     } task_thread;
346 
347     // threading (refer to tc[] for per-thread things)
348     struct FrameTileThreadData {
349         int (*lowest_pixel_mem)[7][2];
350         int lowest_pixel_mem_sz;
351     } tile_thread;
352 };
353 
354 struct Dav1dTileState {
355     CdfContext cdf;
356     MsacContext msac;
357 
358     struct {
359         int col_start, col_end, row_start, row_end; // in 4px units
360         int col, row; // in tile units
361     } tiling;
362 
363     // in sby units, TILE_ERROR after a decoding error
364     atomic_int progress[2 /* 0: reconstruction, 1: entropy */];
365     struct {
366         uint8_t *pal_idx;
367         int16_t *cbi;
368         coef *cf;
369     } frame_thread[2 /* 0: reconstruction, 1: entropy */];
370 
371     // in fullpel units, [0] = Y, [1] = UV, used for progress requirements
372     // each entry is one tile-sbrow; middle index is refidx
373     int (*lowest_pixel)[7][2];
374 
375     uint16_t dqmem[DAV1D_MAX_SEGMENTS][3 /* plane */][2 /* dc/ac */];
376     const uint16_t (*dq)[3][2];
377     int last_qidx;
378 
379     union {
380         int8_t i8[4];
381         uint32_t u32;
382     } last_delta_lf;
383     ALIGN(uint8_t lflvlmem[8 /* seg_id */][4 /* dir */][8 /* ref */][2 /* is_gmv */], 16);
384     const uint8_t (*lflvl)[4][8][2];
385 
386     Av1RestorationUnit *lr_ref[3];
387 };
388 
389 struct Dav1dTaskContext {
390     const Dav1dContext *c;
391     const Dav1dFrameContext *f;
392     Dav1dTileState *ts;
393     int bx, by;
394     BlockContext l, *a;
395     refmvs_tile rt;
396     ALIGN(union, 64) {
397         int16_t cf_8bpc [32 * 32];
398         int32_t cf_16bpc[32 * 32];
399     };
400     union {
401         uint8_t  al_pal_8bpc [2 /* a/l */][32 /* bx/y4 */][3 /* plane */][8 /* palette_idx */];
402         uint16_t al_pal_16bpc[2 /* a/l */][32 /* bx/y4 */][3 /* plane */][8 /* palette_idx */];
403     };
404     uint8_t pal_sz_uv[2 /* a/l */][32 /* bx4/by4 */];
405     ALIGN(union, 64) {
406         struct {
407             union {
408                 uint8_t  lap_8bpc [128 * 32];
409                 uint16_t lap_16bpc[128 * 32];
410                 struct {
411                     int16_t compinter[2][128 * 128];
412                     uint8_t seg_mask[128 * 128];
413                 };
414             };
415             union {
416                 // stride=192 for non-SVC, or 320 for SVC
417                 uint8_t  emu_edge_8bpc [320 * (256 + 7)];
418                 uint16_t emu_edge_16bpc[320 * (256 + 7)];
419             };
420         };
421         struct {
422             union {
423                 uint8_t levels[32 * 34];
424                 struct {
425                     uint8_t pal_order[64][8];
426                     uint8_t pal_ctx[64];
427                 };
428             };
429             union {
430                 int16_t ac[32 * 32]; // intra-only
431                 uint8_t txtp_map[32 * 32]; // inter-only
432             };
433             uint8_t pal_idx_y[32 * 64];
434             uint8_t pal_idx_uv[64 * 64]; /* also used as pre-pack scratch buffer */
435             union {
436                 struct {
437                     uint8_t interintra_8bpc[64 * 64];
438                     uint8_t edge_8bpc[257];
439                     ALIGN(uint8_t pal_8bpc[3 /* plane */][8 /* palette_idx */], 8);
440                 };
441                 struct {
442                     uint16_t interintra_16bpc[64 * 64];
443                     uint16_t edge_16bpc[257];
444                     ALIGN(uint16_t pal_16bpc[3 /* plane */][8 /* palette_idx */], 16);
445                 };
446             };
447         };
448     } scratch;
449 
450     Dav1dWarpedMotionParams warpmv;
451     Av1Filter *lf_mask;
452     int top_pre_cdef_toggle;
453     int8_t *cur_sb_cdef_idx_ptr;
454     // for chroma sub8x8, we need to know the filter for all 4 subblocks in
455     // a 4x4 area, but the top/left one can go out of cache already, so this
456     // keeps it accessible
457     enum Filter2d tl_4x4_filter;
458 
459     struct {
460         int pass;
461     } frame_thread;
462     struct {
463         struct thread_data td;
464         struct TaskThreadData *ttd;
465         struct FrameTileThreadData *fttd;
466         int flushed;
467         int die;
468     } task_thread;
469 };
470 
471 #endif /* DAV1D_SRC_INTERNAL_H */
472