xref: /aosp_15_r20/external/libvpx/vp9/encoder/vp9_multi_thread.c (revision fb1b10ab9aebc7c7068eedab379b749d7e3900be)
1 /*
2  *  Copyright (c) 2017 The WebM project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include <assert.h>
12 
13 #include "vpx_util/vpx_pthread.h"
14 #include "vp9/encoder/vp9_encoder.h"
15 #include "vp9/encoder/vp9_ethread.h"
16 #include "vp9/encoder/vp9_multi_thread.h"
17 #include "vp9/encoder/vp9_temporal_filter.h"
18 
vp9_enc_grp_get_next_job(MultiThreadHandle * multi_thread_ctxt,int tile_id)19 void *vp9_enc_grp_get_next_job(MultiThreadHandle *multi_thread_ctxt,
20                                int tile_id) {
21   RowMTInfo *row_mt_info;
22   JobQueueHandle *job_queue_hdl = NULL;
23   void *next = NULL;
24   JobNode *job_info = NULL;
25 #if CONFIG_MULTITHREAD
26   pthread_mutex_t *mutex_handle = NULL;
27 #endif
28 
29   row_mt_info = (RowMTInfo *)(&multi_thread_ctxt->row_mt_info[tile_id]);
30   job_queue_hdl = (JobQueueHandle *)&row_mt_info->job_queue_hdl;
31 #if CONFIG_MULTITHREAD
32   mutex_handle = &row_mt_info->job_mutex;
33 #endif
34 
35 // lock the mutex for queue access
36 #if CONFIG_MULTITHREAD
37   pthread_mutex_lock(mutex_handle);
38 #endif
39   next = job_queue_hdl->next;
40   if (next != NULL) {
41     JobQueue *job_queue = (JobQueue *)next;
42     job_info = &job_queue->job_info;
43     // Update the next job in the queue
44     job_queue_hdl->next = job_queue->next;
45     job_queue_hdl->num_jobs_acquired++;
46   }
47 
48 #if CONFIG_MULTITHREAD
49   pthread_mutex_unlock(mutex_handle);
50 #endif
51 
52   return job_info;
53 }
54 
vp9_row_mt_alloc_rd_thresh(VP9_COMP * const cpi,TileDataEnc * const this_tile)55 void vp9_row_mt_alloc_rd_thresh(VP9_COMP *const cpi,
56                                 TileDataEnc *const this_tile) {
57   VP9_COMMON *const cm = &cpi->common;
58   const int sb_rows = mi_cols_aligned_to_sb(cm->mi_rows) >> MI_BLOCK_SIZE_LOG2;
59   int i;
60 
61   if (this_tile->row_base_thresh_freq_fact != NULL) {
62     if (sb_rows <= this_tile->sb_rows) {
63       return;
64     }
65     vpx_free(this_tile->row_base_thresh_freq_fact);
66     this_tile->row_base_thresh_freq_fact = NULL;
67   }
68   CHECK_MEM_ERROR(
69       &cm->error, this_tile->row_base_thresh_freq_fact,
70       (int *)vpx_calloc(sb_rows * BLOCK_SIZES * MAX_MODES,
71                         sizeof(*(this_tile->row_base_thresh_freq_fact))));
72   for (i = 0; i < sb_rows * BLOCK_SIZES * MAX_MODES; i++)
73     this_tile->row_base_thresh_freq_fact[i] = RD_THRESH_INIT_FACT;
74   this_tile->sb_rows = sb_rows;
75 }
76 
vp9_row_mt_mem_alloc(VP9_COMP * cpi)77 void vp9_row_mt_mem_alloc(VP9_COMP *cpi) {
78   struct VP9Common *cm = &cpi->common;
79   MultiThreadHandle *multi_thread_ctxt = &cpi->multi_thread_ctxt;
80   int tile_row, tile_col;
81   const int tile_cols = 1 << cm->log2_tile_cols;
82   const int tile_rows = 1 << cm->log2_tile_rows;
83   const int sb_rows = mi_cols_aligned_to_sb(cm->mi_rows) >> MI_BLOCK_SIZE_LOG2;
84   int jobs_per_tile_col, total_jobs;
85 
86   // Allocate memory that is large enough for all row_mt stages. First pass
87   // uses 16x16 block size.
88   jobs_per_tile_col = VPXMAX(cm->mb_rows, sb_rows);
89   // Calculate the total number of jobs
90   total_jobs = jobs_per_tile_col * tile_cols;
91 
92   multi_thread_ctxt->allocated_tile_cols = tile_cols;
93   multi_thread_ctxt->allocated_tile_rows = tile_rows;
94   multi_thread_ctxt->allocated_vert_unit_rows = jobs_per_tile_col;
95 
96   CHECK_MEM_ERROR(&cm->error, multi_thread_ctxt->job_queue,
97                   (JobQueue *)vpx_memalign(32, total_jobs * sizeof(JobQueue)));
98 
99 #if CONFIG_MULTITHREAD
100   // Create mutex for each tile
101   for (tile_col = 0; tile_col < tile_cols; tile_col++) {
102     RowMTInfo *row_mt_info = &multi_thread_ctxt->row_mt_info[tile_col];
103     pthread_mutex_init(&row_mt_info->job_mutex, NULL);
104   }
105 #endif
106 
107   // Allocate memory for row based multi-threading
108   for (tile_col = 0; tile_col < tile_cols; tile_col++) {
109     TileDataEnc *this_tile = &cpi->tile_data[tile_col];
110     vp9_row_mt_sync_mem_alloc(&this_tile->row_mt_sync, cm, jobs_per_tile_col);
111   }
112 
113   // Assign the sync pointer of tile row zero for every tile row > 0
114   for (tile_row = 1; tile_row < tile_rows; tile_row++) {
115     for (tile_col = 0; tile_col < tile_cols; tile_col++) {
116       TileDataEnc *this_tile = &cpi->tile_data[tile_row * tile_cols + tile_col];
117       TileDataEnc *this_col_tile = &cpi->tile_data[tile_col];
118       this_tile->row_mt_sync = this_col_tile->row_mt_sync;
119     }
120   }
121 
122   // Calculate the number of vertical units in the given tile row
123   for (tile_row = 0; tile_row < tile_rows; tile_row++) {
124     TileDataEnc *this_tile = &cpi->tile_data[tile_row * tile_cols];
125     TileInfo *tile_info = &this_tile->tile_info;
126     multi_thread_ctxt->num_tile_vert_sbs[tile_row] =
127         get_num_vert_units(*tile_info, MI_BLOCK_SIZE_LOG2);
128   }
129 }
130 
vp9_row_mt_mem_dealloc(VP9_COMP * cpi)131 void vp9_row_mt_mem_dealloc(VP9_COMP *cpi) {
132   MultiThreadHandle *multi_thread_ctxt = &cpi->multi_thread_ctxt;
133   int tile_col;
134 #if CONFIG_MULTITHREAD
135   int tile_row;
136 #endif
137 
138   // Deallocate memory for job queue
139   if (multi_thread_ctxt->job_queue) {
140     vpx_free(multi_thread_ctxt->job_queue);
141     multi_thread_ctxt->job_queue = NULL;
142   }
143 
144 #if CONFIG_MULTITHREAD
145   // Destroy mutex for each tile
146   for (tile_col = 0; tile_col < multi_thread_ctxt->allocated_tile_cols;
147        tile_col++) {
148     RowMTInfo *row_mt_info = &multi_thread_ctxt->row_mt_info[tile_col];
149     pthread_mutex_destroy(&row_mt_info->job_mutex);
150   }
151 #endif
152 
153   // Free row based multi-threading sync memory
154   for (tile_col = 0; tile_col < multi_thread_ctxt->allocated_tile_cols;
155        tile_col++) {
156     TileDataEnc *this_tile = &cpi->tile_data[tile_col];
157     vp9_row_mt_sync_mem_dealloc(&this_tile->row_mt_sync);
158   }
159 
160 #if CONFIG_MULTITHREAD
161   for (tile_row = 0; tile_row < multi_thread_ctxt->allocated_tile_rows;
162        tile_row++) {
163     for (tile_col = 0; tile_col < multi_thread_ctxt->allocated_tile_cols;
164          tile_col++) {
165       TileDataEnc *this_tile =
166           &cpi->tile_data[tile_row * multi_thread_ctxt->allocated_tile_cols +
167                           tile_col];
168       if (this_tile->row_base_thresh_freq_fact != NULL) {
169         vpx_free(this_tile->row_base_thresh_freq_fact);
170         this_tile->row_base_thresh_freq_fact = NULL;
171       }
172     }
173   }
174 #endif
175 
176   multi_thread_ctxt->allocated_tile_cols = 0;
177   multi_thread_ctxt->allocated_tile_rows = 0;
178   multi_thread_ctxt->allocated_vert_unit_rows = 0;
179 }
180 
vp9_multi_thread_tile_init(VP9_COMP * cpi)181 void vp9_multi_thread_tile_init(VP9_COMP *cpi) {
182   VP9_COMMON *const cm = &cpi->common;
183   const int tile_cols = 1 << cm->log2_tile_cols;
184   const int sb_rows = mi_cols_aligned_to_sb(cm->mi_rows) >> MI_BLOCK_SIZE_LOG2;
185   int i;
186 
187   for (i = 0; i < tile_cols; i++) {
188     TileDataEnc *this_tile = &cpi->tile_data[i];
189     int jobs_per_tile_col = cpi->oxcf.pass == 1 ? cm->mb_rows : sb_rows;
190 
191     // Initialize cur_col to -1 for all rows.
192     memset(this_tile->row_mt_sync.cur_col, -1,
193            sizeof(*this_tile->row_mt_sync.cur_col) * jobs_per_tile_col);
194     vp9_zero(this_tile->fp_data);
195     this_tile->fp_data.image_data_start_row = INVALID_ROW;
196   }
197 }
198 
vp9_assign_tile_to_thread(MultiThreadHandle * multi_thread_ctxt,int tile_cols,int num_workers)199 void vp9_assign_tile_to_thread(MultiThreadHandle *multi_thread_ctxt,
200                                int tile_cols, int num_workers) {
201   int tile_id = 0;
202   int i;
203 
204   // Allocating the threads for the tiles
205   for (i = 0; i < num_workers; i++) {
206     multi_thread_ctxt->thread_id_to_tile_id[i] = tile_id++;
207     if (tile_id == tile_cols) tile_id = 0;
208   }
209 }
210 
vp9_get_job_queue_status(MultiThreadHandle * multi_thread_ctxt,int cur_tile_id)211 int vp9_get_job_queue_status(MultiThreadHandle *multi_thread_ctxt,
212                              int cur_tile_id) {
213   RowMTInfo *row_mt_info;
214   JobQueueHandle *job_queue_hndl;
215 #if CONFIG_MULTITHREAD
216   pthread_mutex_t *mutex;
217 #endif
218   int num_jobs_remaining;
219 
220   row_mt_info = &multi_thread_ctxt->row_mt_info[cur_tile_id];
221   job_queue_hndl = &row_mt_info->job_queue_hdl;
222 #if CONFIG_MULTITHREAD
223   mutex = &row_mt_info->job_mutex;
224 #endif
225 
226 #if CONFIG_MULTITHREAD
227   pthread_mutex_lock(mutex);
228 #endif
229   num_jobs_remaining =
230       multi_thread_ctxt->jobs_per_tile_col - job_queue_hndl->num_jobs_acquired;
231 #if CONFIG_MULTITHREAD
232   pthread_mutex_unlock(mutex);
233 #endif
234 
235   return (num_jobs_remaining);
236 }
237 
vp9_prepare_job_queue(VP9_COMP * cpi,JOB_TYPE job_type)238 void vp9_prepare_job_queue(VP9_COMP *cpi, JOB_TYPE job_type) {
239   VP9_COMMON *const cm = &cpi->common;
240   MultiThreadHandle *multi_thread_ctxt = &cpi->multi_thread_ctxt;
241   JobQueue *job_queue = multi_thread_ctxt->job_queue;
242   const int tile_cols = 1 << cm->log2_tile_cols;
243   int job_row_num, jobs_per_tile, jobs_per_tile_col = 0, total_jobs;
244   const int sb_rows = mi_cols_aligned_to_sb(cm->mi_rows) >> MI_BLOCK_SIZE_LOG2;
245   int tile_col, i;
246 
247   switch (job_type) {
248     case ENCODE_JOB: jobs_per_tile_col = sb_rows; break;
249     case FIRST_PASS_JOB: jobs_per_tile_col = cm->mb_rows; break;
250     case ARNR_JOB:
251       jobs_per_tile_col = ((cm->mi_rows + TF_ROUND) >> TF_SHIFT);
252       break;
253     default: assert(0);
254   }
255 
256   total_jobs = jobs_per_tile_col * tile_cols;
257 
258   multi_thread_ctxt->jobs_per_tile_col = jobs_per_tile_col;
259   // memset the entire job queue buffer to zero
260   memset(job_queue, 0, total_jobs * sizeof(JobQueue));
261 
262   // Job queue preparation
263   for (tile_col = 0; tile_col < tile_cols; tile_col++) {
264     RowMTInfo *tile_ctxt = &multi_thread_ctxt->row_mt_info[tile_col];
265     JobQueue *job_queue_curr, *job_queue_temp;
266     int tile_row = 0;
267 
268     tile_ctxt->job_queue_hdl.next = (void *)job_queue;
269     tile_ctxt->job_queue_hdl.num_jobs_acquired = 0;
270 
271     job_queue_curr = job_queue;
272     job_queue_temp = job_queue;
273 
274     // loop over all the vertical rows
275     for (job_row_num = 0, jobs_per_tile = 0; job_row_num < jobs_per_tile_col;
276          job_row_num++, jobs_per_tile++) {
277       job_queue_curr->job_info.vert_unit_row_num = job_row_num;
278       job_queue_curr->job_info.tile_col_id = tile_col;
279       job_queue_curr->job_info.tile_row_id = tile_row;
280       job_queue_curr->next = (void *)(job_queue_temp + 1);
281       job_queue_curr = ++job_queue_temp;
282 
283       if (ENCODE_JOB == job_type) {
284         if (jobs_per_tile >=
285             multi_thread_ctxt->num_tile_vert_sbs[tile_row] - 1) {
286           tile_row++;
287           jobs_per_tile = -1;
288         }
289       }
290     }
291 
292     // Set the last pointer to NULL
293     job_queue_curr += -1;
294     job_queue_curr->next = (void *)NULL;
295 
296     // Move to the next tile
297     job_queue += jobs_per_tile_col;
298   }
299 
300   for (i = 0; i < cpi->num_workers; i++) {
301     EncWorkerData *thread_data;
302     thread_data = &cpi->tile_thr_data[i];
303     thread_data->thread_id = i;
304 
305     for (tile_col = 0; tile_col < tile_cols; tile_col++)
306       thread_data->tile_completion_status[tile_col] = 0;
307   }
308 }
309 
vp9_get_tiles_proc_status(MultiThreadHandle * multi_thread_ctxt,int * tile_completion_status,int * cur_tile_id,int tile_cols)310 int vp9_get_tiles_proc_status(MultiThreadHandle *multi_thread_ctxt,
311                               int *tile_completion_status, int *cur_tile_id,
312                               int tile_cols) {
313   int tile_col;
314   int tile_id = -1;  // Stores the tile ID with minimum proc done
315   int max_num_jobs_remaining = 0;
316   int num_jobs_remaining;
317 
318   // Mark the completion to avoid check in the loop
319   tile_completion_status[*cur_tile_id] = 1;
320   // Check for the status of all the tiles
321   for (tile_col = 0; tile_col < tile_cols; tile_col++) {
322     if (tile_completion_status[tile_col] == 0) {
323       num_jobs_remaining =
324           vp9_get_job_queue_status(multi_thread_ctxt, tile_col);
325       // Mark the completion to avoid checks during future switches across tiles
326       if (num_jobs_remaining == 0) tile_completion_status[tile_col] = 1;
327       if (num_jobs_remaining > max_num_jobs_remaining) {
328         max_num_jobs_remaining = num_jobs_remaining;
329         tile_id = tile_col;
330       }
331     }
332   }
333 
334   if (-1 == tile_id) {
335     return 1;
336   } else {
337     // Update the cur ID to the next tile ID that will be processed,
338     // which will be the least processed tile
339     *cur_tile_id = tile_id;
340     return 0;
341   }
342 }
343