1 /*
2 * Copyright (c) 2022, Alliance for Open Media. All rights reserved.
3 *
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 */
11
12 #ifndef AOM_AV1_ENCODER_NONRD_OPT_H_
13 #define AOM_AV1_ENCODER_NONRD_OPT_H_
14
15 #include "av1/encoder/rdopt_utils.h"
16 #include "av1/encoder/rdopt.h"
17
18 #define RTC_INTER_MODES (4)
19 #define RTC_INTRA_MODES (4)
20 #define RTC_MODES (AOMMAX(RTC_INTER_MODES, RTC_INTRA_MODES))
21 #define CALC_BIASED_RDCOST(rdcost) (7 * (rdcost) >> 3)
22 #define NUM_COMP_INTER_MODES_RT (6)
23 #define NUM_INTER_MODES 12
24 #define CAP_TX_SIZE_FOR_BSIZE_GT32(tx_mode_search_type, bsize) \
25 (((tx_mode_search_type) != ONLY_4X4 && (bsize) > BLOCK_32X32) ? true : false)
26 #define TX_SIZE_FOR_BSIZE_GT32 (TX_16X16)
27 #define FILTER_SEARCH_SIZE 2
28 #if !CONFIG_REALTIME_ONLY
29 #define MOTION_MODE_SEARCH_SIZE 2
30 #endif
31
32 extern int g_pick_inter_mode_cnt;
33 /*!\cond */
34 typedef struct {
35 uint8_t *data;
36 int stride;
37 int in_use;
38 } PRED_BUFFER;
39
40 typedef struct {
41 PRED_BUFFER *best_pred;
42 PREDICTION_MODE best_mode;
43 TX_SIZE best_tx_size;
44 TX_TYPE tx_type;
45 MV_REFERENCE_FRAME best_ref_frame;
46 MV_REFERENCE_FRAME best_second_ref_frame;
47 uint8_t best_mode_skip_txfm;
48 uint8_t best_mode_initial_skip_flag;
49 int_interpfilters best_pred_filter;
50 MOTION_MODE best_motion_mode;
51 WarpedMotionParams wm_params;
52 int num_proj_ref;
53 PALETTE_MODE_INFO pmi;
54 int64_t best_sse;
55 } BEST_PICKMODE;
56
57 typedef struct {
58 MV_REFERENCE_FRAME ref_frame;
59 PREDICTION_MODE pred_mode;
60 } REF_MODE;
61
62 typedef struct {
63 MV_REFERENCE_FRAME ref_frame[2];
64 PREDICTION_MODE pred_mode;
65 } COMP_REF_MODE;
66
67 struct estimate_block_intra_args {
68 AV1_COMP *cpi;
69 MACROBLOCK *x;
70 PREDICTION_MODE mode;
71 int skippable;
72 RD_STATS *rdc;
73 unsigned int best_sad;
74 bool prune_mode_based_on_sad;
75 };
76 /*!\endcond */
77
78 /*!\brief Structure to store parameters and statistics used in non-rd inter mode
79 * evaluation.
80 */
81 typedef struct {
82 //! Structure to hold best inter mode data
83 BEST_PICKMODE best_pickmode;
84 //! Structure to RD cost of current mode
85 RD_STATS this_rdc;
86 //! Pointer to the RD Cost for the best mode found so far
87 RD_STATS best_rdc;
88 //! Distortion of chroma planes for all modes and reference frames
89 int64_t uv_dist[RTC_INTER_MODES][REF_FRAMES];
90 //! Buffer to hold predicted block for all reference frames and planes
91 struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE];
92 //! Array to hold variance of all modes and reference frames
93 unsigned int vars[RTC_INTER_MODES][REF_FRAMES];
94 //! Array to hold ref cost of single reference mode for all ref frames
95 unsigned int ref_costs_single[REF_FRAMES];
96 //! Array to hold motion vector for all modes and reference frames
97 int_mv frame_mv[MB_MODE_COUNT][REF_FRAMES];
98 //! Array to hold best mv for all modes and reference frames
99 int_mv frame_mv_best[MB_MODE_COUNT][REF_FRAMES];
100 //! Array to hold inter mode cost of single ref mode for all ref frames
101 int single_inter_mode_costs[RTC_INTER_MODES][REF_FRAMES];
102 //! Array to hold use reference frame mask for each reference frame
103 int use_ref_frame_mask[REF_FRAMES];
104 //! Array to hold flags of evaluated modes for each reference frame
105 uint8_t mode_checked[MB_MODE_COUNT][REF_FRAMES];
106 //! Array to hold flag indicating if scaled reference frame is used.
107 bool use_scaled_ref_frame[REF_FRAMES];
108 } InterModeSearchStateNonrd;
109
110 static const uint8_t b_width_log2_lookup[BLOCK_SIZES] = { 0, 0, 1, 1, 1, 2,
111 2, 2, 3, 3, 3, 4,
112 4, 4, 5, 5 };
113 static const uint8_t b_height_log2_lookup[BLOCK_SIZES] = { 0, 1, 0, 1, 2, 1,
114 2, 3, 2, 3, 4, 3,
115 4, 5, 4, 5 };
116
117 static const PREDICTION_MODE intra_mode_list[] = { DC_PRED, V_PRED, H_PRED,
118 SMOOTH_PRED };
119
120 static const PREDICTION_MODE inter_mode_list[] = { NEARESTMV, NEARMV, GLOBALMV,
121 NEWMV };
122
123 static const THR_MODES mode_idx[REF_FRAMES][RTC_MODES] = {
124 { THR_DC, THR_V_PRED, THR_H_PRED, THR_SMOOTH },
125 { THR_NEARESTMV, THR_NEARMV, THR_GLOBALMV, THR_NEWMV },
126 { THR_NEARESTL2, THR_NEARL2, THR_GLOBALL2, THR_NEWL2 },
127 { THR_NEARESTL3, THR_NEARL3, THR_GLOBALL3, THR_NEWL3 },
128 { THR_NEARESTG, THR_NEARG, THR_GLOBALG, THR_NEWG },
129 { THR_NEARESTB, THR_NEARB, THR_GLOBALB, THR_NEWB },
130 { THR_NEARESTA2, THR_NEARA2, THR_GLOBALA2, THR_NEWA2 },
131 { THR_NEARESTA, THR_NEARA, THR_GLOBALA, THR_NEWA },
132 };
133
134 // GLOBALMV in the set below is in fact ZEROMV as we don't do global ME in RT
135 // mode
136 static const REF_MODE ref_mode_set[NUM_INTER_MODES] = {
137 { LAST_FRAME, NEARESTMV }, { LAST_FRAME, NEARMV },
138 { LAST_FRAME, GLOBALMV }, { LAST_FRAME, NEWMV },
139 { GOLDEN_FRAME, NEARESTMV }, { GOLDEN_FRAME, NEARMV },
140 { GOLDEN_FRAME, GLOBALMV }, { GOLDEN_FRAME, NEWMV },
141 { ALTREF_FRAME, NEARESTMV }, { ALTREF_FRAME, NEARMV },
142 { ALTREF_FRAME, GLOBALMV }, { ALTREF_FRAME, NEWMV },
143 };
144
145 static const COMP_REF_MODE comp_ref_mode_set[NUM_COMP_INTER_MODES_RT] = {
146 { { LAST_FRAME, GOLDEN_FRAME }, GLOBAL_GLOBALMV },
147 { { LAST_FRAME, GOLDEN_FRAME }, NEAREST_NEARESTMV },
148 { { LAST_FRAME, LAST2_FRAME }, GLOBAL_GLOBALMV },
149 { { LAST_FRAME, LAST2_FRAME }, NEAREST_NEARESTMV },
150 { { LAST_FRAME, ALTREF_FRAME }, GLOBAL_GLOBALMV },
151 { { LAST_FRAME, ALTREF_FRAME }, NEAREST_NEARESTMV },
152 };
153
154 static const int_interpfilters filters_ref_set[9] = {
155 [0].as_filters = { EIGHTTAP_REGULAR, EIGHTTAP_REGULAR },
156 [1].as_filters = { EIGHTTAP_SMOOTH, EIGHTTAP_SMOOTH },
157 [2].as_filters = { EIGHTTAP_REGULAR, EIGHTTAP_SMOOTH },
158 [3].as_filters = { EIGHTTAP_SMOOTH, EIGHTTAP_REGULAR },
159 [4].as_filters = { MULTITAP_SHARP, MULTITAP_SHARP },
160 [5].as_filters = { EIGHTTAP_REGULAR, MULTITAP_SHARP },
161 [6].as_filters = { MULTITAP_SHARP, EIGHTTAP_REGULAR },
162 [7].as_filters = { EIGHTTAP_SMOOTH, MULTITAP_SHARP },
163 [8].as_filters = { MULTITAP_SHARP, EIGHTTAP_SMOOTH }
164 };
165
166 enum {
167 // INTER_ALL = (1 << NEARESTMV) | (1 << NEARMV) | (1 << NEWMV),
168 INTER_NEAREST = (1 << NEARESTMV),
169 INTER_NEAREST_NEW = (1 << NEARESTMV) | (1 << NEWMV),
170 INTER_NEAREST_NEAR = (1 << NEARESTMV) | (1 << NEARMV),
171 INTER_NEAR_NEW = (1 << NEARMV) | (1 << NEWMV),
172 };
173
174 // The original scan order (default_scan_8x8) is modified according to the extra
175 // transpose in hadamard c implementation, i.e., aom_hadamard_lp_8x8_c and
176 // aom_hadamard_8x8_c.
177 DECLARE_ALIGNED(16, static const int16_t, default_scan_8x8_transpose[64]) = {
178 0, 8, 1, 2, 9, 16, 24, 17, 10, 3, 4, 11, 18, 25, 32, 40,
179 33, 26, 19, 12, 5, 6, 13, 20, 27, 34, 41, 48, 56, 49, 42, 35,
180 28, 21, 14, 7, 15, 22, 29, 36, 43, 50, 57, 58, 51, 44, 37, 30,
181 23, 31, 38, 45, 52, 59, 60, 53, 46, 39, 47, 54, 61, 62, 55, 63
182 };
183
184 // The original scan order (av1_default_iscan_8x8) is modified to match
185 // hadamard AVX2 implementation, i.e., aom_hadamard_lp_8x8_avx2 and
186 // aom_hadamard_8x8_avx2. Since hadamard AVX2 implementation will modify the
187 // order of coefficients, such that the normal scan order is no longer
188 // guaranteed to scan low coefficients first, therefore we modify the scan order
189 // accordingly.
190 // Note that this one has to be used together with default_scan_8x8_transpose.
191 DECLARE_ALIGNED(16, static const int16_t,
192 av1_default_iscan_8x8_transpose[64]) = {
193 0, 2, 3, 9, 10, 20, 21, 35, 1, 4, 8, 11, 19, 22, 34, 36,
194 5, 7, 12, 18, 23, 33, 37, 48, 6, 13, 17, 24, 32, 38, 47, 49,
195 14, 16, 25, 31, 39, 46, 50, 57, 15, 26, 30, 40, 45, 51, 56, 58,
196 27, 29, 41, 44, 52, 55, 59, 62, 28, 42, 43, 53, 54, 60, 61, 63
197 };
198
199 // The original scan order (default_scan_16x16) is modified according to the
200 // extra transpose in hadamard c implementation in lp case, i.e.,
201 // aom_hadamard_lp_16x16_c.
202 DECLARE_ALIGNED(16, static const int16_t,
203 default_scan_lp_16x16_transpose[256]) = {
204 0, 8, 2, 4, 10, 16, 24, 18, 12, 6, 64, 14, 20, 26, 32,
205 40, 34, 28, 22, 72, 66, 68, 74, 80, 30, 36, 42, 48, 56, 50,
206 44, 38, 88, 82, 76, 70, 128, 78, 84, 90, 96, 46, 52, 58, 1,
207 9, 3, 60, 54, 104, 98, 92, 86, 136, 130, 132, 138, 144, 94, 100,
208 106, 112, 62, 5, 11, 17, 25, 19, 13, 7, 120, 114, 108, 102, 152,
209 146, 140, 134, 192, 142, 148, 154, 160, 110, 116, 122, 65, 15, 21, 27,
210 33, 41, 35, 29, 23, 73, 67, 124, 118, 168, 162, 156, 150, 200, 194,
211 196, 202, 208, 158, 164, 170, 176, 126, 69, 75, 81, 31, 37, 43, 49,
212 57, 51, 45, 39, 89, 83, 77, 71, 184, 178, 172, 166, 216, 210, 204,
213 198, 206, 212, 218, 224, 174, 180, 186, 129, 79, 85, 91, 97, 47, 53,
214 59, 61, 55, 105, 99, 93, 87, 137, 131, 188, 182, 232, 226, 220, 214,
215 222, 228, 234, 240, 190, 133, 139, 145, 95, 101, 107, 113, 63, 121, 115,
216 109, 103, 153, 147, 141, 135, 248, 242, 236, 230, 238, 244, 250, 193, 143,
217 149, 155, 161, 111, 117, 123, 125, 119, 169, 163, 157, 151, 201, 195, 252,
218 246, 254, 197, 203, 209, 159, 165, 171, 177, 127, 185, 179, 173, 167, 217,
219 211, 205, 199, 207, 213, 219, 225, 175, 181, 187, 189, 183, 233, 227, 221,
220 215, 223, 229, 235, 241, 191, 249, 243, 237, 231, 239, 245, 251, 253, 247,
221 255
222 };
223
224 #if CONFIG_AV1_HIGHBITDEPTH
225 // The original scan order (default_scan_16x16) is modified according to the
226 // extra shift in hadamard c implementation in fp case, i.e.,
227 // aom_hadamard_16x16_c. Note that 16x16 lp and fp hadamard generate different
228 // outputs, so we handle them separately.
229 DECLARE_ALIGNED(16, static const int16_t,
230 default_scan_fp_16x16_transpose[256]) = {
231 0, 4, 2, 8, 6, 16, 20, 18, 12, 10, 64, 14, 24, 22, 32,
232 36, 34, 28, 26, 68, 66, 72, 70, 80, 30, 40, 38, 48, 52, 50,
233 44, 42, 84, 82, 76, 74, 128, 78, 88, 86, 96, 46, 56, 54, 1,
234 5, 3, 60, 58, 100, 98, 92, 90, 132, 130, 136, 134, 144, 94, 104,
235 102, 112, 62, 9, 7, 17, 21, 19, 13, 11, 116, 114, 108, 106, 148,
236 146, 140, 138, 192, 142, 152, 150, 160, 110, 120, 118, 65, 15, 25, 23,
237 33, 37, 35, 29, 27, 69, 67, 124, 122, 164, 162, 156, 154, 196, 194,
238 200, 198, 208, 158, 168, 166, 176, 126, 73, 71, 81, 31, 41, 39, 49,
239 53, 51, 45, 43, 85, 83, 77, 75, 180, 178, 172, 170, 212, 210, 204,
240 202, 206, 216, 214, 224, 174, 184, 182, 129, 79, 89, 87, 97, 47, 57,
241 55, 61, 59, 101, 99, 93, 91, 133, 131, 188, 186, 228, 226, 220, 218,
242 222, 232, 230, 240, 190, 137, 135, 145, 95, 105, 103, 113, 63, 117, 115,
243 109, 107, 149, 147, 141, 139, 244, 242, 236, 234, 238, 248, 246, 193, 143,
244 153, 151, 161, 111, 121, 119, 125, 123, 165, 163, 157, 155, 197, 195, 252,
245 250, 254, 201, 199, 209, 159, 169, 167, 177, 127, 181, 179, 173, 171, 213,
246 211, 205, 203, 207, 217, 215, 225, 175, 185, 183, 189, 187, 229, 227, 221,
247 219, 223, 233, 231, 241, 191, 245, 243, 237, 235, 239, 249, 247, 253, 251,
248 255
249 };
250 #endif
251
252 // The original scan order (av1_default_iscan_16x16) is modified to match
253 // hadamard AVX2 implementation, i.e., aom_hadamard_lp_16x16_avx2.
254 // Since hadamard AVX2 implementation will modify the order of coefficients,
255 // such that the normal scan order is no longer guaranteed to scan low
256 // coefficients first, therefore we modify the scan order accordingly. Note that
257 // this one has to be used together with default_scan_lp_16x16_transpose.
258 DECLARE_ALIGNED(16, static const int16_t,
259 av1_default_iscan_lp_16x16_transpose[256]) = {
260 0, 44, 2, 46, 3, 63, 9, 69, 1, 45, 4, 64, 8, 68, 11,
261 87, 5, 65, 7, 67, 12, 88, 18, 94, 6, 66, 13, 89, 17, 93,
262 24, 116, 14, 90, 16, 92, 25, 117, 31, 123, 15, 91, 26, 118, 30,
263 122, 41, 148, 27, 119, 29, 121, 42, 149, 48, 152, 28, 120, 43, 150,
264 47, 151, 62, 177, 10, 86, 20, 96, 21, 113, 35, 127, 19, 95, 22,
265 114, 34, 126, 37, 144, 23, 115, 33, 125, 38, 145, 52, 156, 32, 124,
266 39, 146, 51, 155, 58, 173, 40, 147, 50, 154, 59, 174, 73, 181, 49,
267 153, 60, 175, 72, 180, 83, 198, 61, 176, 71, 179, 84, 199, 98, 202,
268 70, 178, 85, 200, 97, 201, 112, 219, 36, 143, 54, 158, 55, 170, 77,
269 185, 53, 157, 56, 171, 76, 184, 79, 194, 57, 172, 75, 183, 80, 195,
270 102, 206, 74, 182, 81, 196, 101, 205, 108, 215, 82, 197, 100, 204, 109,
271 216, 131, 223, 99, 203, 110, 217, 130, 222, 140, 232, 111, 218, 129, 221,
272 141, 233, 160, 236, 128, 220, 142, 234, 159, 235, 169, 245, 78, 193, 104,
273 208, 105, 212, 135, 227, 103, 207, 106, 213, 134, 226, 136, 228, 107, 214,
274 133, 225, 137, 229, 164, 240, 132, 224, 138, 230, 163, 239, 165, 241, 139,
275 231, 162, 238, 166, 242, 189, 249, 161, 237, 167, 243, 188, 248, 190, 250,
276 168, 244, 187, 247, 191, 251, 210, 254, 186, 246, 192, 252, 209, 253, 211,
277 255
278 };
279
280 #if CONFIG_AV1_HIGHBITDEPTH
281 // The original scan order (av1_default_iscan_16x16) is modified to match
282 // hadamard AVX2 implementation, i.e., aom_hadamard_16x16_avx2.
283 // Since hadamard AVX2 implementation will modify the order of coefficients,
284 // such that the normal scan order is no longer guaranteed to scan low
285 // coefficients first, therefore we modify the scan order accordingly. Note that
286 // this one has to be used together with default_scan_fp_16x16_transpose.
287 DECLARE_ALIGNED(16, static const int16_t,
288 av1_default_iscan_fp_16x16_transpose[256]) = {
289 0, 44, 2, 46, 1, 45, 4, 64, 3, 63, 9, 69, 8, 68, 11,
290 87, 5, 65, 7, 67, 6, 66, 13, 89, 12, 88, 18, 94, 17, 93,
291 24, 116, 14, 90, 16, 92, 15, 91, 26, 118, 25, 117, 31, 123, 30,
292 122, 41, 148, 27, 119, 29, 121, 28, 120, 43, 150, 42, 149, 48, 152,
293 47, 151, 62, 177, 10, 86, 20, 96, 19, 95, 22, 114, 21, 113, 35,
294 127, 34, 126, 37, 144, 23, 115, 33, 125, 32, 124, 39, 146, 38, 145,
295 52, 156, 51, 155, 58, 173, 40, 147, 50, 154, 49, 153, 60, 175, 59,
296 174, 73, 181, 72, 180, 83, 198, 61, 176, 71, 179, 70, 178, 85, 200,
297 84, 199, 98, 202, 97, 201, 112, 219, 36, 143, 54, 158, 53, 157, 56,
298 171, 55, 170, 77, 185, 76, 184, 79, 194, 57, 172, 75, 183, 74, 182,
299 81, 196, 80, 195, 102, 206, 101, 205, 108, 215, 82, 197, 100, 204, 99,
300 203, 110, 217, 109, 216, 131, 223, 130, 222, 140, 232, 111, 218, 129, 221,
301 128, 220, 142, 234, 141, 233, 160, 236, 159, 235, 169, 245, 78, 193, 104,
302 208, 103, 207, 106, 213, 105, 212, 135, 227, 134, 226, 136, 228, 107, 214,
303 133, 225, 132, 224, 138, 230, 137, 229, 164, 240, 163, 239, 165, 241, 139,
304 231, 162, 238, 161, 237, 167, 243, 166, 242, 189, 249, 188, 248, 190, 250,
305 168, 244, 187, 247, 186, 246, 192, 252, 191, 251, 210, 254, 209, 253, 211,
306 255
307 };
308 #endif
309
310 // For entropy coding, IDTX shares the scan orders of the other 2D-transforms,
311 // but the fastest way to calculate the IDTX transform (i.e. no transposes)
312 // results in coefficients that are a transposition of the entropy coding
313 // versions. These tables are used as substitute for the scan order for the
314 // faster version of IDTX.
315
316 // Must be used together with av1_fast_idtx_iscan_4x4
317 DECLARE_ALIGNED(16, static const int16_t,
318 av1_fast_idtx_scan_4x4[16]) = { 0, 1, 4, 8, 5, 2, 3, 6,
319 9, 12, 13, 10, 7, 11, 14, 15 };
320
321 // Must be used together with av1_fast_idtx_scan_4x4
322 DECLARE_ALIGNED(16, static const int16_t,
323 av1_fast_idtx_iscan_4x4[16]) = { 0, 1, 5, 6, 2, 4, 7, 12,
324 3, 8, 11, 13, 9, 10, 14, 15 };
325
326 static const SCAN_ORDER av1_fast_idtx_scan_order_4x4 = {
327 av1_fast_idtx_scan_4x4, av1_fast_idtx_iscan_4x4
328 };
329
330 // Must be used together with av1_fast_idtx_iscan_8x8
331 DECLARE_ALIGNED(16, static const int16_t, av1_fast_idtx_scan_8x8[64]) = {
332 0, 1, 8, 16, 9, 2, 3, 10, 17, 24, 32, 25, 18, 11, 4, 5,
333 12, 19, 26, 33, 40, 48, 41, 34, 27, 20, 13, 6, 7, 14, 21, 28,
334 35, 42, 49, 56, 57, 50, 43, 36, 29, 22, 15, 23, 30, 37, 44, 51,
335 58, 59, 52, 45, 38, 31, 39, 46, 53, 60, 61, 54, 47, 55, 62, 63
336 };
337
338 // Must be used together with av1_fast_idtx_scan_8x8
339 DECLARE_ALIGNED(16, static const int16_t, av1_fast_idtx_iscan_8x8[64]) = {
340 0, 1, 5, 6, 14, 15, 27, 28, 2, 4, 7, 13, 16, 26, 29, 42,
341 3, 8, 12, 17, 25, 30, 41, 43, 9, 11, 18, 24, 31, 40, 44, 53,
342 10, 19, 23, 32, 39, 45, 52, 54, 20, 22, 33, 38, 46, 51, 55, 60,
343 21, 34, 37, 47, 50, 56, 59, 61, 35, 36, 48, 49, 57, 58, 62, 63
344 };
345
346 static const SCAN_ORDER av1_fast_idtx_scan_order_8x8 = {
347 av1_fast_idtx_scan_8x8, av1_fast_idtx_iscan_8x8
348 };
349
350 // Must be used together with av1_fast_idtx_iscan_16x16
351 DECLARE_ALIGNED(16, static const int16_t, av1_fast_idtx_scan_16x16[256]) = {
352 0, 1, 16, 32, 17, 2, 3, 18, 33, 48, 64, 49, 34, 19, 4,
353 5, 20, 35, 50, 65, 80, 96, 81, 66, 51, 36, 21, 6, 7, 22,
354 37, 52, 67, 82, 97, 112, 128, 113, 98, 83, 68, 53, 38, 23, 8,
355 9, 24, 39, 54, 69, 84, 99, 114, 129, 144, 160, 145, 130, 115, 100,
356 85, 70, 55, 40, 25, 10, 11, 26, 41, 56, 71, 86, 101, 116, 131,
357 146, 161, 176, 192, 177, 162, 147, 132, 117, 102, 87, 72, 57, 42, 27,
358 12, 13, 28, 43, 58, 73, 88, 103, 118, 133, 148, 163, 178, 193, 208,
359 224, 209, 194, 179, 164, 149, 134, 119, 104, 89, 74, 59, 44, 29, 14,
360 15, 30, 45, 60, 75, 90, 105, 120, 135, 150, 165, 180, 195, 210, 225,
361 240, 241, 226, 211, 196, 181, 166, 151, 136, 121, 106, 91, 76, 61, 46,
362 31, 47, 62, 77, 92, 107, 122, 137, 152, 167, 182, 197, 212, 227, 242,
363 243, 228, 213, 198, 183, 168, 153, 138, 123, 108, 93, 78, 63, 79, 94,
364 109, 124, 139, 154, 169, 184, 199, 214, 229, 244, 245, 230, 215, 200, 185,
365 170, 155, 140, 125, 110, 95, 111, 126, 141, 156, 171, 186, 201, 216, 231,
366 246, 247, 232, 217, 202, 187, 172, 157, 142, 127, 143, 158, 173, 188, 203,
367 218, 233, 248, 249, 234, 219, 204, 189, 174, 159, 175, 190, 205, 220, 235,
368 250, 251, 236, 221, 206, 191, 207, 222, 237, 252, 253, 238, 223, 239, 254,
369 255
370 };
371
372 // Must be used together with av1_fast_idtx_scan_16x16
373 DECLARE_ALIGNED(16, static const int16_t, av1_fast_idtx_iscan_16x16[256]) = {
374 0, 1, 5, 6, 14, 15, 27, 28, 44, 45, 65, 66, 90, 91, 119,
375 120, 2, 4, 7, 13, 16, 26, 29, 43, 46, 64, 67, 89, 92, 118,
376 121, 150, 3, 8, 12, 17, 25, 30, 42, 47, 63, 68, 88, 93, 117,
377 122, 149, 151, 9, 11, 18, 24, 31, 41, 48, 62, 69, 87, 94, 116,
378 123, 148, 152, 177, 10, 19, 23, 32, 40, 49, 61, 70, 86, 95, 115,
379 124, 147, 153, 176, 178, 20, 22, 33, 39, 50, 60, 71, 85, 96, 114,
380 125, 146, 154, 175, 179, 200, 21, 34, 38, 51, 59, 72, 84, 97, 113,
381 126, 145, 155, 174, 180, 199, 201, 35, 37, 52, 58, 73, 83, 98, 112,
382 127, 144, 156, 173, 181, 198, 202, 219, 36, 53, 57, 74, 82, 99, 111,
383 128, 143, 157, 172, 182, 197, 203, 218, 220, 54, 56, 75, 81, 100, 110,
384 129, 142, 158, 171, 183, 196, 204, 217, 221, 234, 55, 76, 80, 101, 109,
385 130, 141, 159, 170, 184, 195, 205, 216, 222, 233, 235, 77, 79, 102, 108,
386 131, 140, 160, 169, 185, 194, 206, 215, 223, 232, 236, 245, 78, 103, 107,
387 132, 139, 161, 168, 186, 193, 207, 214, 224, 231, 237, 244, 246, 104, 106,
388 133, 138, 162, 167, 187, 192, 208, 213, 225, 230, 238, 243, 247, 252, 105,
389 134, 137, 163, 166, 188, 191, 209, 212, 226, 229, 239, 242, 248, 251, 253,
390 135, 136, 164, 165, 189, 190, 210, 211, 227, 228, 240, 241, 249, 250, 254,
391 255
392 };
393
394 // Indicates the blocks for which RD model should be based on special logic
get_model_rd_flag(const AV1_COMP * cpi,const MACROBLOCKD * xd,BLOCK_SIZE bsize)395 static inline int get_model_rd_flag(const AV1_COMP *cpi, const MACROBLOCKD *xd,
396 BLOCK_SIZE bsize) {
397 const AV1_COMMON *const cm = &cpi->common;
398 const int large_block = bsize >= BLOCK_32X32;
399 // Only enable for low bitdepth to mitigate issue: b/303023614.
400 return cpi->oxcf.rc_cfg.mode == AOM_CBR && large_block &&
401 !cyclic_refresh_segment_id_boosted(xd->mi[0]->segment_id) &&
402 cm->quant_params.base_qindex && !cpi->oxcf.use_highbitdepth;
403 }
404 /*!\brief Finds predicted motion vectors for a block.
405 *
406 * \ingroup nonrd_mode_search
407 * \callgraph
408 * \callergraph
409 * Finds predicted motion vectors for a block from a certain reference frame.
410 * First, it fills reference MV stack, then picks the test from the stack and
411 * predicts the final MV for a block for each mode.
412 * \param[in] cpi Top-level encoder structure
413 * \param[in] x Pointer to structure holding all the
414 * data for the current macroblock
415 * \param[in] ref_frame Reference frame for which to find
416 * ref MVs
417 * \param[out] frame_mv Predicted MVs for a block
418 * \param[in] yv12_mb Buffer to hold predicted block
419 * \param[in] bsize Current block size
420 * \param[in] force_skip_low_temp_var Flag indicating possible mode search
421 * prune for low temporal variance block
422 * \param[in] skip_pred_mv Flag indicating to skip av1_mv_pred
423 * \param[out] use_scaled_ref_frame Flag to indicate if scaled reference
424 * frame is used.
425 *
426 * \remark Nothing is returned. Instead, predicted MVs are placed into
427 * \c frame_mv array, and use_scaled_ref_frame is set.
428 */
find_predictors(AV1_COMP * cpi,MACROBLOCK * x,MV_REFERENCE_FRAME ref_frame,int_mv frame_mv[MB_MODE_COUNT][REF_FRAMES],struct buf_2d yv12_mb[8][MAX_MB_PLANE],BLOCK_SIZE bsize,int force_skip_low_temp_var,int skip_pred_mv,bool * use_scaled_ref_frame)429 static inline void find_predictors(
430 AV1_COMP *cpi, MACROBLOCK *x, MV_REFERENCE_FRAME ref_frame,
431 int_mv frame_mv[MB_MODE_COUNT][REF_FRAMES],
432 struct buf_2d yv12_mb[8][MAX_MB_PLANE], BLOCK_SIZE bsize,
433 int force_skip_low_temp_var, int skip_pred_mv, bool *use_scaled_ref_frame) {
434 AV1_COMMON *const cm = &cpi->common;
435 MACROBLOCKD *const xd = &x->e_mbd;
436 MB_MODE_INFO *const mbmi = xd->mi[0];
437 MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
438 const YV12_BUFFER_CONFIG *ref = get_ref_frame_yv12_buf(cm, ref_frame);
439 const bool ref_is_scaled =
440 ref->y_crop_height != cm->height || ref->y_crop_width != cm->width;
441 const YV12_BUFFER_CONFIG *scaled_ref =
442 av1_get_scaled_ref_frame(cpi, ref_frame);
443 const YV12_BUFFER_CONFIG *yv12 =
444 ref_is_scaled && scaled_ref ? scaled_ref : ref;
445 const int num_planes = av1_num_planes(cm);
446 x->pred_mv_sad[ref_frame] = INT_MAX;
447 x->pred_mv0_sad[ref_frame] = INT_MAX;
448 x->pred_mv1_sad[ref_frame] = INT_MAX;
449 frame_mv[NEWMV][ref_frame].as_int = INVALID_MV;
450 // TODO(kyslov) this needs various further optimizations. to be continued..
451 assert(yv12 != NULL);
452 if (yv12 != NULL) {
453 struct scale_factors *const sf =
454 scaled_ref ? NULL : get_ref_scale_factors(cm, ref_frame);
455 av1_setup_pred_block(xd, yv12_mb[ref_frame], yv12, sf, sf, num_planes);
456 av1_find_mv_refs(cm, xd, mbmi, ref_frame, mbmi_ext->ref_mv_count,
457 xd->ref_mv_stack, xd->weight, NULL, mbmi_ext->global_mvs,
458 mbmi_ext->mode_context);
459 // TODO(Ravi): Populate mbmi_ext->ref_mv_stack[ref_frame][4] and
460 // mbmi_ext->weight[ref_frame][4] inside av1_find_mv_refs.
461 av1_copy_usable_ref_mv_stack_and_weight(xd, mbmi_ext, ref_frame);
462 av1_find_best_ref_mvs_from_stack(
463 cm->features.allow_high_precision_mv, mbmi_ext, ref_frame,
464 &frame_mv[NEARESTMV][ref_frame], &frame_mv[NEARMV][ref_frame], 0);
465 frame_mv[GLOBALMV][ref_frame] = mbmi_ext->global_mvs[ref_frame];
466 // Early exit for non-LAST frame if force_skip_low_temp_var is set.
467 if (!ref_is_scaled && bsize >= BLOCK_8X8 && !skip_pred_mv &&
468 !(force_skip_low_temp_var && ref_frame != LAST_FRAME)) {
469 av1_mv_pred(cpi, x, yv12_mb[ref_frame][0].buf, yv12->y_stride, ref_frame,
470 bsize);
471 }
472 }
473 if (cm->features.switchable_motion_mode) {
474 av1_count_overlappable_neighbors(cm, xd);
475 }
476 mbmi->num_proj_ref = 1;
477 *use_scaled_ref_frame = ref_is_scaled && scaled_ref;
478 }
479
init_mbmi_nonrd(MB_MODE_INFO * mbmi,PREDICTION_MODE pred_mode,MV_REFERENCE_FRAME ref_frame0,MV_REFERENCE_FRAME ref_frame1,const AV1_COMMON * cm)480 static inline void init_mbmi_nonrd(MB_MODE_INFO *mbmi,
481 PREDICTION_MODE pred_mode,
482 MV_REFERENCE_FRAME ref_frame0,
483 MV_REFERENCE_FRAME ref_frame1,
484 const AV1_COMMON *cm) {
485 PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
486 mbmi->ref_mv_idx = 0;
487 mbmi->mode = pred_mode;
488 mbmi->uv_mode = UV_DC_PRED;
489 mbmi->ref_frame[0] = ref_frame0;
490 mbmi->ref_frame[1] = ref_frame1;
491 pmi->palette_size[PLANE_TYPE_Y] = 0;
492 pmi->palette_size[PLANE_TYPE_UV] = 0;
493 mbmi->filter_intra_mode_info.use_filter_intra = 0;
494 mbmi->mv[0].as_int = mbmi->mv[1].as_int = 0;
495 mbmi->motion_mode = SIMPLE_TRANSLATION;
496 mbmi->num_proj_ref = 1;
497 mbmi->interintra_mode = 0;
498 set_default_interp_filters(mbmi, cm->features.interp_filter);
499 }
500
init_estimate_block_intra_args(struct estimate_block_intra_args * args,AV1_COMP * cpi,MACROBLOCK * x)501 static inline void init_estimate_block_intra_args(
502 struct estimate_block_intra_args *args, AV1_COMP *cpi, MACROBLOCK *x) {
503 args->cpi = cpi;
504 args->x = x;
505 args->mode = DC_PRED;
506 args->skippable = 1;
507 args->rdc = 0;
508 args->best_sad = UINT_MAX;
509 args->prune_mode_based_on_sad = false;
510 }
511
get_pred_buffer(PRED_BUFFER * p,int len)512 static inline int get_pred_buffer(PRED_BUFFER *p, int len) {
513 for (int buf_idx = 0; buf_idx < len; buf_idx++) {
514 if (!p[buf_idx].in_use) {
515 p[buf_idx].in_use = 1;
516 return buf_idx;
517 }
518 }
519 return -1;
520 }
521
free_pred_buffer(PRED_BUFFER * p)522 static inline void free_pred_buffer(PRED_BUFFER *p) {
523 if (p != NULL) p->in_use = 0;
524 }
525
526 #if CONFIG_INTERNAL_STATS
store_coding_context_nonrd(MACROBLOCK * x,PICK_MODE_CONTEXT * ctx,int mode_index)527 static inline void store_coding_context_nonrd(MACROBLOCK *x,
528 PICK_MODE_CONTEXT *ctx,
529 int mode_index) {
530 #else
531 static inline void store_coding_context_nonrd(MACROBLOCK *x,
532 PICK_MODE_CONTEXT *ctx) {
533 #endif // CONFIG_INTERNAL_STATS
534 MACROBLOCKD *const xd = &x->e_mbd;
535 TxfmSearchInfo *txfm_info = &x->txfm_search_info;
536
537 // Take a snapshot of the coding context so it can be
538 // restored if we decide to encode this way
539 ctx->rd_stats.skip_txfm = txfm_info->skip_txfm;
540
541 ctx->skippable = txfm_info->skip_txfm;
542 #if CONFIG_INTERNAL_STATS
543 ctx->best_mode_index = mode_index;
544 #endif // CONFIG_INTERNAL_STATS
545 ctx->mic = *xd->mi[0];
546 ctx->skippable = txfm_info->skip_txfm;
547 av1_copy_mbmi_ext_to_mbmi_ext_frame(&ctx->mbmi_ext_best, &x->mbmi_ext,
548 av1_ref_frame_type(xd->mi[0]->ref_frame));
549 }
550
551 void av1_block_yrd(MACROBLOCK *x, RD_STATS *this_rdc, int *skippable,
552 BLOCK_SIZE bsize, TX_SIZE tx_size);
553
554 void av1_block_yrd_idtx(MACROBLOCK *x, const uint8_t *const pred_buf,
555 int pred_stride, RD_STATS *this_rdc, int *skippable,
556 BLOCK_SIZE bsize, TX_SIZE tx_size);
557
558 int64_t av1_model_rd_for_sb_uv(AV1_COMP *cpi, BLOCK_SIZE plane_bsize,
559 MACROBLOCK *x, MACROBLOCKD *xd,
560 RD_STATS *this_rdc, int start_plane,
561 int stop_plane);
562
563 void av1_estimate_block_intra(int plane, int block, int row, int col,
564 BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
565 void *arg);
566
567 void av1_estimate_intra_mode(AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
568 int best_early_term, unsigned int ref_cost_intra,
569 int reuse_prediction, struct buf_2d *orig_dst,
570 PRED_BUFFER *tmp_buffers,
571 PRED_BUFFER **this_mode_pred, RD_STATS *best_rdc,
572 BEST_PICKMODE *best_pickmode,
573 PICK_MODE_CONTEXT *ctx);
574
575 #endif // AOM_AV1_ENCODER_NONRD_OPT_H_
576