xref: /aosp_15_r20/external/libaom/av1/common/thread_common.h (revision 77c1e3ccc04c968bd2bc212e87364f250e820521)
1 /*
2  * Copyright (c) 2016, Alliance for Open Media. All rights reserved.
3  *
4  * This source code is subject to the terms of the BSD 2 Clause License and
5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6  * was not distributed with this source code in the LICENSE file, you can
7  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8  * Media Patent License 1.0 was not distributed with this source code in the
9  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10  */
11 
12 #ifndef AOM_AV1_COMMON_THREAD_COMMON_H_
13 #define AOM_AV1_COMMON_THREAD_COMMON_H_
14 
15 #include "config/aom_config.h"
16 
17 #include "av1/common/av1_loopfilter.h"
18 #include "av1/common/cdef.h"
19 #include "aom_util/aom_pthread.h"
20 #include "aom_util/aom_thread.h"
21 
22 #ifdef __cplusplus
23 extern "C" {
24 #endif
25 
26 struct AV1Common;
27 
28 typedef struct AV1LfMTInfo {
29   int mi_row;
30   int plane;
31   int dir;
32   int lpf_opt_level;
33 } AV1LfMTInfo;
34 
35 // Loopfilter row synchronization
36 typedef struct AV1LfSyncData {
37 #if CONFIG_MULTITHREAD
38   pthread_mutex_t *mutex_[MAX_MB_PLANE];
39   pthread_cond_t *cond_[MAX_MB_PLANE];
40 #endif
41   // Allocate memory to store the loop-filtered superblock index in each row.
42   int *cur_sb_col[MAX_MB_PLANE];
43   // The optimal sync_range for different resolution and platform should be
44   // determined by testing. Currently, it is chosen to be a power-of-2 number.
45   int sync_range;
46   int rows;
47 
48   // Row-based parallel loopfilter data
49   LFWorkerData *lfdata;
50   int num_workers;
51 
52 #if CONFIG_MULTITHREAD
53   pthread_mutex_t *job_mutex;
54 #endif
55   AV1LfMTInfo *job_queue;
56   int jobs_enqueued;
57   int jobs_dequeued;
58 
59   // Initialized to false, set to true by the worker thread that encounters an
60   // error in order to abort the processing of other worker threads.
61   bool lf_mt_exit;
62 } AV1LfSync;
63 
64 typedef struct AV1LrMTInfo {
65   int v_start;
66   int v_end;
67   int lr_unit_row;
68   int plane;
69   int sync_mode;
70   int v_copy_start;
71   int v_copy_end;
72 } AV1LrMTInfo;
73 
74 typedef struct LoopRestorationWorkerData {
75   int32_t *rst_tmpbuf;
76   void *rlbs;
77   void *lr_ctxt;
78   int do_extend_border;
79   struct aom_internal_error_info error_info;
80 } LRWorkerData;
81 
82 // Looprestoration row synchronization
83 typedef struct AV1LrSyncData {
84 #if CONFIG_MULTITHREAD
85   pthread_mutex_t *mutex_[MAX_MB_PLANE];
86   pthread_cond_t *cond_[MAX_MB_PLANE];
87 #endif
88   // Allocate memory to store the loop-restoration block index in each row.
89   int *cur_sb_col[MAX_MB_PLANE];
90   // The optimal sync_range for different resolution and platform should be
91   // determined by testing. Currently, it is chosen to be a power-of-2 number.
92   int sync_range;
93   int rows;
94   int num_planes;
95 
96   int num_workers;
97 
98 #if CONFIG_MULTITHREAD
99   pthread_mutex_t *job_mutex;
100 #endif
101   // Row-based parallel loopfilter data
102   LRWorkerData *lrworkerdata;
103 
104   AV1LrMTInfo *job_queue;
105   int jobs_enqueued;
106   int jobs_dequeued;
107   // Initialized to false, set to true by the worker thread that encounters
108   // an error in order to abort the processing of other worker threads.
109   bool lr_mt_exit;
110 } AV1LrSync;
111 
112 typedef struct AV1CdefWorker {
113   AV1_COMMON *cm;
114   MACROBLOCKD *xd;
115   uint16_t *colbuf[MAX_MB_PLANE];
116   uint16_t *srcbuf;
117   uint16_t *linebuf[MAX_MB_PLANE];
118   cdef_init_fb_row_t cdef_init_fb_row_fn;
119   int do_extend_border;
120   struct aom_internal_error_info error_info;
121 } AV1CdefWorkerData;
122 
123 typedef struct AV1CdefRowSync {
124 #if CONFIG_MULTITHREAD
125   pthread_mutex_t *row_mutex_;
126   pthread_cond_t *row_cond_;
127 #endif  // CONFIG_MULTITHREAD
128   int is_row_done;
129 } AV1CdefRowSync;
130 
131 // Data related to CDEF search multi-thread synchronization.
132 typedef struct AV1CdefSyncData {
133 #if CONFIG_MULTITHREAD
134   // Mutex lock used while dispatching jobs.
135   pthread_mutex_t *mutex_;
136 #endif  // CONFIG_MULTITHREAD
137   // Data related to CDEF row mt sync information
138   AV1CdefRowSync *cdef_row_mt;
139   // Flag to indicate all blocks are processed and end of frame is reached
140   int end_of_frame;
141   // Row index in units of 64x64 block
142   int fbr;
143   // Column index in units of 64x64 block
144   int fbc;
145   // Initialized to false, set to true by the worker thread that encounters
146   // an error in order to abort the processing of other worker threads.
147   bool cdef_mt_exit;
148 } AV1CdefSync;
149 
150 void av1_cdef_frame_mt(AV1_COMMON *const cm, MACROBLOCKD *const xd,
151                        AV1CdefWorkerData *const cdef_worker,
152                        AVxWorker *const workers, AV1CdefSync *const cdef_sync,
153                        int num_workers, cdef_init_fb_row_t cdef_init_fb_row_fn,
154                        int do_extend_border);
155 void av1_cdef_init_fb_row_mt(const AV1_COMMON *const cm,
156                              const MACROBLOCKD *const xd,
157                              CdefBlockInfo *const fb_info,
158                              uint16_t **const linebuf, uint16_t *const src,
159                              struct AV1CdefSyncData *const cdef_sync, int fbr);
160 void av1_cdef_copy_sb8_16(const AV1_COMMON *const cm, uint16_t *const dst,
161                           int dstride, const uint8_t *src, int src_voffset,
162                           int src_hoffset, int sstride, int vsize, int hsize);
163 void av1_cdef_copy_sb8_16_lowbd(uint16_t *const dst, int dstride,
164                                 const uint8_t *src, int src_voffset,
165                                 int src_hoffset, int sstride, int vsize,
166                                 int hsize);
167 #if CONFIG_AV1_HIGHBITDEPTH
168 void av1_cdef_copy_sb8_16_highbd(uint16_t *const dst, int dstride,
169                                  const uint8_t *src, int src_voffset,
170                                  int src_hoffset, int sstride, int vsize,
171                                  int hsize);
172 #endif  // CONFIG_AV1_HIGHBITDEPTH
173 void av1_alloc_cdef_sync(AV1_COMMON *const cm, AV1CdefSync *cdef_sync,
174                          int num_workers);
175 void av1_free_cdef_sync(AV1CdefSync *cdef_sync);
176 
177 // Deallocate loopfilter synchronization related mutex and data.
178 void av1_loop_filter_dealloc(AV1LfSync *lf_sync);
179 void av1_loop_filter_alloc(AV1LfSync *lf_sync, AV1_COMMON *cm, int rows,
180                            int width, int num_workers);
181 
182 void av1_set_vert_loop_filter_done(AV1_COMMON *cm, AV1LfSync *lf_sync,
183                                    int num_mis_in_lpf_unit_height_log2);
184 
185 void av1_loop_filter_frame_mt(YV12_BUFFER_CONFIG *frame, struct AV1Common *cm,
186                               struct macroblockd *xd, int plane_start,
187                               int plane_end, int partial_frame,
188                               AVxWorker *workers, int num_workers,
189                               AV1LfSync *lf_sync, int lpf_opt_level);
190 
191 #if !CONFIG_REALTIME_ONLY || CONFIG_AV1_DECODER
192 void av1_loop_restoration_filter_frame_mt(YV12_BUFFER_CONFIG *frame,
193                                           struct AV1Common *cm,
194                                           int optimized_lr, AVxWorker *workers,
195                                           int num_workers, AV1LrSync *lr_sync,
196                                           void *lr_ctxt, int do_extend_border);
197 void av1_loop_restoration_dealloc(AV1LrSync *lr_sync);
198 void av1_loop_restoration_alloc(AV1LrSync *lr_sync, AV1_COMMON *cm,
199                                 int num_workers, int num_rows_lr,
200                                 int num_planes, int width);
201 #endif  // !CONFIG_REALTIME_ONLY || CONFIG_AV1_DECODER
202 
203 int av1_get_intrabc_extra_top_right_sb_delay(const AV1_COMMON *cm);
204 
205 void av1_thread_loop_filter_rows(
206     const YV12_BUFFER_CONFIG *const frame_buffer, AV1_COMMON *const cm,
207     struct macroblockd_plane *planes, MACROBLOCKD *xd, int mi_row, int plane,
208     int dir, int lpf_opt_level, AV1LfSync *const lf_sync,
209     struct aom_internal_error_info *error_info,
210     AV1_DEBLOCKING_PARAMETERS *params_buf, TX_SIZE *tx_buf, int mib_size_log2);
211 
skip_loop_filter_plane(const int planes_to_lf[MAX_MB_PLANE],int plane,int lpf_opt_level)212 static AOM_FORCE_INLINE bool skip_loop_filter_plane(
213     const int planes_to_lf[MAX_MB_PLANE], int plane, int lpf_opt_level) {
214   // If LPF_PICK_METHOD is LPF_PICK_FROM_Q, we have the option to filter both
215   // chroma planes together
216   if (lpf_opt_level == 2) {
217     if (plane == AOM_PLANE_Y) {
218       return !planes_to_lf[plane];
219     }
220     if (plane == AOM_PLANE_U) {
221       // U and V are handled together
222       return !planes_to_lf[1] && !planes_to_lf[2];
223     }
224     assert(plane == AOM_PLANE_V);
225     if (plane == AOM_PLANE_V) {
226       // V is handled when u is filtered
227       return true;
228     }
229   }
230 
231   // Normal operation mode
232   return !planes_to_lf[plane];
233 }
234 
enqueue_lf_jobs(AV1LfSync * lf_sync,int start,int stop,const int planes_to_lf[MAX_MB_PLANE],int lpf_opt_level,int num_mis_in_lpf_unit_height)235 static inline void enqueue_lf_jobs(AV1LfSync *lf_sync, int start, int stop,
236                                    const int planes_to_lf[MAX_MB_PLANE],
237                                    int lpf_opt_level,
238                                    int num_mis_in_lpf_unit_height) {
239   int mi_row, plane, dir;
240   AV1LfMTInfo *lf_job_queue = lf_sync->job_queue;
241   lf_sync->jobs_enqueued = 0;
242   lf_sync->jobs_dequeued = 0;
243 
244   // Launch all vertical jobs first, as they are blocking the horizontal ones.
245   // Launch top row jobs for all planes first, in case the output can be
246   // partially reconstructed row by row.
247   for (dir = 0; dir < 2; ++dir) {
248     for (mi_row = start; mi_row < stop; mi_row += num_mis_in_lpf_unit_height) {
249       for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
250         if (skip_loop_filter_plane(planes_to_lf, plane, lpf_opt_level)) {
251           continue;
252         }
253         if (!planes_to_lf[plane]) continue;
254         lf_job_queue->mi_row = mi_row;
255         lf_job_queue->plane = plane;
256         lf_job_queue->dir = dir;
257         lf_job_queue->lpf_opt_level = lpf_opt_level;
258         lf_job_queue++;
259         lf_sync->jobs_enqueued++;
260       }
261     }
262   }
263 }
264 
loop_filter_frame_mt_init(AV1_COMMON * cm,int start_mi_row,int end_mi_row,const int planes_to_lf[MAX_MB_PLANE],int num_workers,AV1LfSync * lf_sync,int lpf_opt_level,int num_mis_in_lpf_unit_height_log2)265 static inline void loop_filter_frame_mt_init(
266     AV1_COMMON *cm, int start_mi_row, int end_mi_row,
267     const int planes_to_lf[MAX_MB_PLANE], int num_workers, AV1LfSync *lf_sync,
268     int lpf_opt_level, int num_mis_in_lpf_unit_height_log2) {
269   // Number of superblock rows
270   const int sb_rows =
271       CEIL_POWER_OF_TWO(cm->mi_params.mi_rows, num_mis_in_lpf_unit_height_log2);
272 
273   if (!lf_sync->sync_range || sb_rows != lf_sync->rows ||
274       num_workers > lf_sync->num_workers) {
275     av1_loop_filter_dealloc(lf_sync);
276     av1_loop_filter_alloc(lf_sync, cm, sb_rows, cm->width, num_workers);
277   }
278   lf_sync->lf_mt_exit = false;
279 
280   // Initialize cur_sb_col to -1 for all SB rows.
281   for (int i = 0; i < MAX_MB_PLANE; i++) {
282     memset(lf_sync->cur_sb_col[i], -1,
283            sizeof(*(lf_sync->cur_sb_col[i])) * sb_rows);
284   }
285 
286   enqueue_lf_jobs(lf_sync, start_mi_row, end_mi_row, planes_to_lf,
287                   lpf_opt_level, (1 << num_mis_in_lpf_unit_height_log2));
288 }
289 
get_lf_job_info(AV1LfSync * lf_sync)290 static inline AV1LfMTInfo *get_lf_job_info(AV1LfSync *lf_sync) {
291   AV1LfMTInfo *cur_job_info = NULL;
292 
293 #if CONFIG_MULTITHREAD
294   pthread_mutex_lock(lf_sync->job_mutex);
295 
296   if (!lf_sync->lf_mt_exit && lf_sync->jobs_dequeued < lf_sync->jobs_enqueued) {
297     cur_job_info = lf_sync->job_queue + lf_sync->jobs_dequeued;
298     lf_sync->jobs_dequeued++;
299   }
300 
301   pthread_mutex_unlock(lf_sync->job_mutex);
302 #else
303   (void)lf_sync;
304 #endif
305 
306   return cur_job_info;
307 }
308 
loop_filter_data_reset(LFWorkerData * lf_data,YV12_BUFFER_CONFIG * frame_buffer,struct AV1Common * cm,MACROBLOCKD * xd)309 static inline void loop_filter_data_reset(LFWorkerData *lf_data,
310                                           YV12_BUFFER_CONFIG *frame_buffer,
311                                           struct AV1Common *cm,
312                                           MACROBLOCKD *xd) {
313   struct macroblockd_plane *pd = xd->plane;
314   lf_data->frame_buffer = frame_buffer;
315   lf_data->cm = cm;
316   lf_data->xd = xd;
317   for (int i = 0; i < MAX_MB_PLANE; i++) {
318     memcpy(&lf_data->planes[i].dst, &pd[i].dst, sizeof(lf_data->planes[i].dst));
319     lf_data->planes[i].subsampling_x = pd[i].subsampling_x;
320     lf_data->planes[i].subsampling_y = pd[i].subsampling_y;
321   }
322 }
323 
set_planes_to_loop_filter(const struct loopfilter * lf,int planes_to_lf[MAX_MB_PLANE],int plane_start,int plane_end)324 static inline void set_planes_to_loop_filter(const struct loopfilter *lf,
325                                              int planes_to_lf[MAX_MB_PLANE],
326                                              int plane_start, int plane_end) {
327   // For each luma and chroma plane, whether to filter it or not.
328   planes_to_lf[0] = (lf->filter_level[0] || lf->filter_level[1]) &&
329                     plane_start <= 0 && 0 < plane_end;
330   planes_to_lf[1] = lf->filter_level_u && plane_start <= 1 && 1 < plane_end;
331   planes_to_lf[2] = lf->filter_level_v && plane_start <= 2 && 2 < plane_end;
332 }
333 
check_planes_to_loop_filter(const struct loopfilter * lf,int planes_to_lf[MAX_MB_PLANE],int plane_start,int plane_end)334 static inline int check_planes_to_loop_filter(const struct loopfilter *lf,
335                                               int planes_to_lf[MAX_MB_PLANE],
336                                               int plane_start, int plane_end) {
337   set_planes_to_loop_filter(lf, planes_to_lf, plane_start, plane_end);
338   // If the luma plane is purposely not filtered, neither are the chroma
339   // planes.
340   if (!planes_to_lf[0] && plane_start <= 0 && 0 < plane_end) return 0;
341   // Early exit.
342   if (!planes_to_lf[0] && !planes_to_lf[1] && !planes_to_lf[2]) return 0;
343   return 1;
344 }
345 
346 #ifdef __cplusplus
347 }  // extern "C"
348 #endif
349 
350 #endif  // AOM_AV1_COMMON_THREAD_COMMON_H_
351