xref: /aosp_15_r20/external/libaom/av1/encoder/nonrd_opt.h (revision 77c1e3ccc04c968bd2bc212e87364f250e820521)
1 /*
2  * Copyright (c) 2022, Alliance for Open Media. All rights reserved.
3  *
4  * This source code is subject to the terms of the BSD 2 Clause License and
5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6  * was not distributed with this source code in the LICENSE file, you can
7  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8  * Media Patent License 1.0 was not distributed with this source code in the
9  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10  */
11 
12 #ifndef AOM_AV1_ENCODER_NONRD_OPT_H_
13 #define AOM_AV1_ENCODER_NONRD_OPT_H_
14 
15 #include "av1/encoder/rdopt_utils.h"
16 #include "av1/encoder/rdopt.h"
17 
18 #define RTC_INTER_MODES (4)
19 #define RTC_INTRA_MODES (4)
20 #define RTC_MODES (AOMMAX(RTC_INTER_MODES, RTC_INTRA_MODES))
21 #define CALC_BIASED_RDCOST(rdcost) (7 * (rdcost) >> 3)
22 #define NUM_COMP_INTER_MODES_RT (6)
23 #define NUM_INTER_MODES 12
24 #define CAP_TX_SIZE_FOR_BSIZE_GT32(tx_mode_search_type, bsize) \
25   (((tx_mode_search_type) != ONLY_4X4 && (bsize) > BLOCK_32X32) ? true : false)
26 #define TX_SIZE_FOR_BSIZE_GT32 (TX_16X16)
27 #define FILTER_SEARCH_SIZE 2
28 #if !CONFIG_REALTIME_ONLY
29 #define MOTION_MODE_SEARCH_SIZE 2
30 #endif
31 
32 extern int g_pick_inter_mode_cnt;
33 /*!\cond */
34 typedef struct {
35   uint8_t *data;
36   int stride;
37   int in_use;
38 } PRED_BUFFER;
39 
40 typedef struct {
41   PRED_BUFFER *best_pred;
42   PREDICTION_MODE best_mode;
43   TX_SIZE best_tx_size;
44   TX_TYPE tx_type;
45   MV_REFERENCE_FRAME best_ref_frame;
46   MV_REFERENCE_FRAME best_second_ref_frame;
47   uint8_t best_mode_skip_txfm;
48   uint8_t best_mode_initial_skip_flag;
49   int_interpfilters best_pred_filter;
50   MOTION_MODE best_motion_mode;
51   WarpedMotionParams wm_params;
52   int num_proj_ref;
53   PALETTE_MODE_INFO pmi;
54   int64_t best_sse;
55 } BEST_PICKMODE;
56 
57 typedef struct {
58   MV_REFERENCE_FRAME ref_frame;
59   PREDICTION_MODE pred_mode;
60 } REF_MODE;
61 
62 typedef struct {
63   MV_REFERENCE_FRAME ref_frame[2];
64   PREDICTION_MODE pred_mode;
65 } COMP_REF_MODE;
66 
67 struct estimate_block_intra_args {
68   AV1_COMP *cpi;
69   MACROBLOCK *x;
70   PREDICTION_MODE mode;
71   int skippable;
72   RD_STATS *rdc;
73   unsigned int best_sad;
74   bool prune_mode_based_on_sad;
75 };
76 /*!\endcond */
77 
78 /*!\brief Structure to store parameters and statistics used in non-rd inter mode
79  * evaluation.
80  */
81 typedef struct {
82   //! Structure to hold best inter mode data
83   BEST_PICKMODE best_pickmode;
84   //! Structure to RD cost of current mode
85   RD_STATS this_rdc;
86   //! Pointer to the RD Cost for the best mode found so far
87   RD_STATS best_rdc;
88   //! Distortion of chroma planes for all modes and reference frames
89   int64_t uv_dist[RTC_INTER_MODES][REF_FRAMES];
90   //! Buffer to hold predicted block for all reference frames and planes
91   struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE];
92   //! Array to hold variance of all modes and reference frames
93   unsigned int vars[RTC_INTER_MODES][REF_FRAMES];
94   //! Array to hold ref cost of single reference mode for all ref frames
95   unsigned int ref_costs_single[REF_FRAMES];
96   //! Array to hold motion vector for all modes and reference frames
97   int_mv frame_mv[MB_MODE_COUNT][REF_FRAMES];
98   //! Array to hold best mv for all modes and reference frames
99   int_mv frame_mv_best[MB_MODE_COUNT][REF_FRAMES];
100   //! Array to hold inter mode cost of single ref mode for all ref frames
101   int single_inter_mode_costs[RTC_INTER_MODES][REF_FRAMES];
102   //! Array to hold use reference frame mask for each reference frame
103   int use_ref_frame_mask[REF_FRAMES];
104   //! Array to hold flags of evaluated modes for each reference frame
105   uint8_t mode_checked[MB_MODE_COUNT][REF_FRAMES];
106   //! Array to hold flag indicating if scaled reference frame is used.
107   bool use_scaled_ref_frame[REF_FRAMES];
108 } InterModeSearchStateNonrd;
109 
110 static const uint8_t b_width_log2_lookup[BLOCK_SIZES] = { 0, 0, 1, 1, 1, 2,
111                                                           2, 2, 3, 3, 3, 4,
112                                                           4, 4, 5, 5 };
113 static const uint8_t b_height_log2_lookup[BLOCK_SIZES] = { 0, 1, 0, 1, 2, 1,
114                                                            2, 3, 2, 3, 4, 3,
115                                                            4, 5, 4, 5 };
116 
117 static const PREDICTION_MODE intra_mode_list[] = { DC_PRED, V_PRED, H_PRED,
118                                                    SMOOTH_PRED };
119 
120 static const PREDICTION_MODE inter_mode_list[] = { NEARESTMV, NEARMV, GLOBALMV,
121                                                    NEWMV };
122 
123 static const THR_MODES mode_idx[REF_FRAMES][RTC_MODES] = {
124   { THR_DC, THR_V_PRED, THR_H_PRED, THR_SMOOTH },
125   { THR_NEARESTMV, THR_NEARMV, THR_GLOBALMV, THR_NEWMV },
126   { THR_NEARESTL2, THR_NEARL2, THR_GLOBALL2, THR_NEWL2 },
127   { THR_NEARESTL3, THR_NEARL3, THR_GLOBALL3, THR_NEWL3 },
128   { THR_NEARESTG, THR_NEARG, THR_GLOBALG, THR_NEWG },
129   { THR_NEARESTB, THR_NEARB, THR_GLOBALB, THR_NEWB },
130   { THR_NEARESTA2, THR_NEARA2, THR_GLOBALA2, THR_NEWA2 },
131   { THR_NEARESTA, THR_NEARA, THR_GLOBALA, THR_NEWA },
132 };
133 
134 // GLOBALMV in the set below is in fact ZEROMV as we don't do global ME in RT
135 // mode
136 static const REF_MODE ref_mode_set[NUM_INTER_MODES] = {
137   { LAST_FRAME, NEARESTMV },   { LAST_FRAME, NEARMV },
138   { LAST_FRAME, GLOBALMV },    { LAST_FRAME, NEWMV },
139   { GOLDEN_FRAME, NEARESTMV }, { GOLDEN_FRAME, NEARMV },
140   { GOLDEN_FRAME, GLOBALMV },  { GOLDEN_FRAME, NEWMV },
141   { ALTREF_FRAME, NEARESTMV }, { ALTREF_FRAME, NEARMV },
142   { ALTREF_FRAME, GLOBALMV },  { ALTREF_FRAME, NEWMV },
143 };
144 
145 static const COMP_REF_MODE comp_ref_mode_set[NUM_COMP_INTER_MODES_RT] = {
146   { { LAST_FRAME, GOLDEN_FRAME }, GLOBAL_GLOBALMV },
147   { { LAST_FRAME, GOLDEN_FRAME }, NEAREST_NEARESTMV },
148   { { LAST_FRAME, LAST2_FRAME }, GLOBAL_GLOBALMV },
149   { { LAST_FRAME, LAST2_FRAME }, NEAREST_NEARESTMV },
150   { { LAST_FRAME, ALTREF_FRAME }, GLOBAL_GLOBALMV },
151   { { LAST_FRAME, ALTREF_FRAME }, NEAREST_NEARESTMV },
152 };
153 
154 static const int_interpfilters filters_ref_set[9] = {
155   [0].as_filters = { EIGHTTAP_REGULAR, EIGHTTAP_REGULAR },
156   [1].as_filters = { EIGHTTAP_SMOOTH, EIGHTTAP_SMOOTH },
157   [2].as_filters = { EIGHTTAP_REGULAR, EIGHTTAP_SMOOTH },
158   [3].as_filters = { EIGHTTAP_SMOOTH, EIGHTTAP_REGULAR },
159   [4].as_filters = { MULTITAP_SHARP, MULTITAP_SHARP },
160   [5].as_filters = { EIGHTTAP_REGULAR, MULTITAP_SHARP },
161   [6].as_filters = { MULTITAP_SHARP, EIGHTTAP_REGULAR },
162   [7].as_filters = { EIGHTTAP_SMOOTH, MULTITAP_SHARP },
163   [8].as_filters = { MULTITAP_SHARP, EIGHTTAP_SMOOTH }
164 };
165 
166 enum {
167   //  INTER_ALL = (1 << NEARESTMV) | (1 << NEARMV) | (1 << NEWMV),
168   INTER_NEAREST = (1 << NEARESTMV),
169   INTER_NEAREST_NEW = (1 << NEARESTMV) | (1 << NEWMV),
170   INTER_NEAREST_NEAR = (1 << NEARESTMV) | (1 << NEARMV),
171   INTER_NEAR_NEW = (1 << NEARMV) | (1 << NEWMV),
172 };
173 
174 // The original scan order (default_scan_8x8) is modified according to the extra
175 // transpose in hadamard c implementation, i.e., aom_hadamard_lp_8x8_c and
176 // aom_hadamard_8x8_c.
177 DECLARE_ALIGNED(16, static const int16_t, default_scan_8x8_transpose[64]) = {
178   0,  8,  1,  2,  9,  16, 24, 17, 10, 3,  4,  11, 18, 25, 32, 40,
179   33, 26, 19, 12, 5,  6,  13, 20, 27, 34, 41, 48, 56, 49, 42, 35,
180   28, 21, 14, 7,  15, 22, 29, 36, 43, 50, 57, 58, 51, 44, 37, 30,
181   23, 31, 38, 45, 52, 59, 60, 53, 46, 39, 47, 54, 61, 62, 55, 63
182 };
183 
184 // The original scan order (av1_default_iscan_8x8) is modified to match
185 // hadamard AVX2 implementation, i.e., aom_hadamard_lp_8x8_avx2 and
186 // aom_hadamard_8x8_avx2. Since hadamard AVX2 implementation will modify the
187 // order of coefficients, such that the normal scan order is no longer
188 // guaranteed to scan low coefficients first, therefore we modify the scan order
189 // accordingly.
190 // Note that this one has to be used together with default_scan_8x8_transpose.
191 DECLARE_ALIGNED(16, static const int16_t,
192                 av1_default_iscan_8x8_transpose[64]) = {
193   0,  2,  3,  9,  10, 20, 21, 35, 1,  4,  8,  11, 19, 22, 34, 36,
194   5,  7,  12, 18, 23, 33, 37, 48, 6,  13, 17, 24, 32, 38, 47, 49,
195   14, 16, 25, 31, 39, 46, 50, 57, 15, 26, 30, 40, 45, 51, 56, 58,
196   27, 29, 41, 44, 52, 55, 59, 62, 28, 42, 43, 53, 54, 60, 61, 63
197 };
198 
199 // The original scan order (default_scan_16x16) is modified according to the
200 // extra transpose in hadamard c implementation in lp case, i.e.,
201 // aom_hadamard_lp_16x16_c.
202 DECLARE_ALIGNED(16, static const int16_t,
203                 default_scan_lp_16x16_transpose[256]) = {
204   0,   8,   2,   4,   10,  16,  24,  18,  12,  6,   64,  14,  20,  26,  32,
205   40,  34,  28,  22,  72,  66,  68,  74,  80,  30,  36,  42,  48,  56,  50,
206   44,  38,  88,  82,  76,  70,  128, 78,  84,  90,  96,  46,  52,  58,  1,
207   9,   3,   60,  54,  104, 98,  92,  86,  136, 130, 132, 138, 144, 94,  100,
208   106, 112, 62,  5,   11,  17,  25,  19,  13,  7,   120, 114, 108, 102, 152,
209   146, 140, 134, 192, 142, 148, 154, 160, 110, 116, 122, 65,  15,  21,  27,
210   33,  41,  35,  29,  23,  73,  67,  124, 118, 168, 162, 156, 150, 200, 194,
211   196, 202, 208, 158, 164, 170, 176, 126, 69,  75,  81,  31,  37,  43,  49,
212   57,  51,  45,  39,  89,  83,  77,  71,  184, 178, 172, 166, 216, 210, 204,
213   198, 206, 212, 218, 224, 174, 180, 186, 129, 79,  85,  91,  97,  47,  53,
214   59,  61,  55,  105, 99,  93,  87,  137, 131, 188, 182, 232, 226, 220, 214,
215   222, 228, 234, 240, 190, 133, 139, 145, 95,  101, 107, 113, 63,  121, 115,
216   109, 103, 153, 147, 141, 135, 248, 242, 236, 230, 238, 244, 250, 193, 143,
217   149, 155, 161, 111, 117, 123, 125, 119, 169, 163, 157, 151, 201, 195, 252,
218   246, 254, 197, 203, 209, 159, 165, 171, 177, 127, 185, 179, 173, 167, 217,
219   211, 205, 199, 207, 213, 219, 225, 175, 181, 187, 189, 183, 233, 227, 221,
220   215, 223, 229, 235, 241, 191, 249, 243, 237, 231, 239, 245, 251, 253, 247,
221   255
222 };
223 
224 #if CONFIG_AV1_HIGHBITDEPTH
225 // The original scan order (default_scan_16x16) is modified according to the
226 // extra shift in hadamard c implementation in fp case, i.e.,
227 // aom_hadamard_16x16_c. Note that 16x16 lp and fp hadamard generate different
228 // outputs, so we handle them separately.
229 DECLARE_ALIGNED(16, static const int16_t,
230                 default_scan_fp_16x16_transpose[256]) = {
231   0,   4,   2,   8,   6,   16,  20,  18,  12,  10,  64,  14,  24,  22,  32,
232   36,  34,  28,  26,  68,  66,  72,  70,  80,  30,  40,  38,  48,  52,  50,
233   44,  42,  84,  82,  76,  74,  128, 78,  88,  86,  96,  46,  56,  54,  1,
234   5,   3,   60,  58,  100, 98,  92,  90,  132, 130, 136, 134, 144, 94,  104,
235   102, 112, 62,  9,   7,   17,  21,  19,  13,  11,  116, 114, 108, 106, 148,
236   146, 140, 138, 192, 142, 152, 150, 160, 110, 120, 118, 65,  15,  25,  23,
237   33,  37,  35,  29,  27,  69,  67,  124, 122, 164, 162, 156, 154, 196, 194,
238   200, 198, 208, 158, 168, 166, 176, 126, 73,  71,  81,  31,  41,  39,  49,
239   53,  51,  45,  43,  85,  83,  77,  75,  180, 178, 172, 170, 212, 210, 204,
240   202, 206, 216, 214, 224, 174, 184, 182, 129, 79,  89,  87,  97,  47,  57,
241   55,  61,  59,  101, 99,  93,  91,  133, 131, 188, 186, 228, 226, 220, 218,
242   222, 232, 230, 240, 190, 137, 135, 145, 95,  105, 103, 113, 63,  117, 115,
243   109, 107, 149, 147, 141, 139, 244, 242, 236, 234, 238, 248, 246, 193, 143,
244   153, 151, 161, 111, 121, 119, 125, 123, 165, 163, 157, 155, 197, 195, 252,
245   250, 254, 201, 199, 209, 159, 169, 167, 177, 127, 181, 179, 173, 171, 213,
246   211, 205, 203, 207, 217, 215, 225, 175, 185, 183, 189, 187, 229, 227, 221,
247   219, 223, 233, 231, 241, 191, 245, 243, 237, 235, 239, 249, 247, 253, 251,
248   255
249 };
250 #endif
251 
252 // The original scan order (av1_default_iscan_16x16) is modified to match
253 // hadamard AVX2 implementation, i.e., aom_hadamard_lp_16x16_avx2.
254 // Since hadamard AVX2 implementation will modify the order of coefficients,
255 // such that the normal scan order is no longer guaranteed to scan low
256 // coefficients first, therefore we modify the scan order accordingly. Note that
257 // this one has to be used together with default_scan_lp_16x16_transpose.
258 DECLARE_ALIGNED(16, static const int16_t,
259                 av1_default_iscan_lp_16x16_transpose[256]) = {
260   0,   44,  2,   46,  3,   63,  9,   69,  1,   45,  4,   64,  8,   68,  11,
261   87,  5,   65,  7,   67,  12,  88,  18,  94,  6,   66,  13,  89,  17,  93,
262   24,  116, 14,  90,  16,  92,  25,  117, 31,  123, 15,  91,  26,  118, 30,
263   122, 41,  148, 27,  119, 29,  121, 42,  149, 48,  152, 28,  120, 43,  150,
264   47,  151, 62,  177, 10,  86,  20,  96,  21,  113, 35,  127, 19,  95,  22,
265   114, 34,  126, 37,  144, 23,  115, 33,  125, 38,  145, 52,  156, 32,  124,
266   39,  146, 51,  155, 58,  173, 40,  147, 50,  154, 59,  174, 73,  181, 49,
267   153, 60,  175, 72,  180, 83,  198, 61,  176, 71,  179, 84,  199, 98,  202,
268   70,  178, 85,  200, 97,  201, 112, 219, 36,  143, 54,  158, 55,  170, 77,
269   185, 53,  157, 56,  171, 76,  184, 79,  194, 57,  172, 75,  183, 80,  195,
270   102, 206, 74,  182, 81,  196, 101, 205, 108, 215, 82,  197, 100, 204, 109,
271   216, 131, 223, 99,  203, 110, 217, 130, 222, 140, 232, 111, 218, 129, 221,
272   141, 233, 160, 236, 128, 220, 142, 234, 159, 235, 169, 245, 78,  193, 104,
273   208, 105, 212, 135, 227, 103, 207, 106, 213, 134, 226, 136, 228, 107, 214,
274   133, 225, 137, 229, 164, 240, 132, 224, 138, 230, 163, 239, 165, 241, 139,
275   231, 162, 238, 166, 242, 189, 249, 161, 237, 167, 243, 188, 248, 190, 250,
276   168, 244, 187, 247, 191, 251, 210, 254, 186, 246, 192, 252, 209, 253, 211,
277   255
278 };
279 
280 #if CONFIG_AV1_HIGHBITDEPTH
281 // The original scan order (av1_default_iscan_16x16) is modified to match
282 // hadamard AVX2 implementation, i.e., aom_hadamard_16x16_avx2.
283 // Since hadamard AVX2 implementation will modify the order of coefficients,
284 // such that the normal scan order is no longer guaranteed to scan low
285 // coefficients first, therefore we modify the scan order accordingly. Note that
286 // this one has to be used together with default_scan_fp_16x16_transpose.
287 DECLARE_ALIGNED(16, static const int16_t,
288                 av1_default_iscan_fp_16x16_transpose[256]) = {
289   0,   44,  2,   46,  1,   45,  4,   64,  3,   63,  9,   69,  8,   68,  11,
290   87,  5,   65,  7,   67,  6,   66,  13,  89,  12,  88,  18,  94,  17,  93,
291   24,  116, 14,  90,  16,  92,  15,  91,  26,  118, 25,  117, 31,  123, 30,
292   122, 41,  148, 27,  119, 29,  121, 28,  120, 43,  150, 42,  149, 48,  152,
293   47,  151, 62,  177, 10,  86,  20,  96,  19,  95,  22,  114, 21,  113, 35,
294   127, 34,  126, 37,  144, 23,  115, 33,  125, 32,  124, 39,  146, 38,  145,
295   52,  156, 51,  155, 58,  173, 40,  147, 50,  154, 49,  153, 60,  175, 59,
296   174, 73,  181, 72,  180, 83,  198, 61,  176, 71,  179, 70,  178, 85,  200,
297   84,  199, 98,  202, 97,  201, 112, 219, 36,  143, 54,  158, 53,  157, 56,
298   171, 55,  170, 77,  185, 76,  184, 79,  194, 57,  172, 75,  183, 74,  182,
299   81,  196, 80,  195, 102, 206, 101, 205, 108, 215, 82,  197, 100, 204, 99,
300   203, 110, 217, 109, 216, 131, 223, 130, 222, 140, 232, 111, 218, 129, 221,
301   128, 220, 142, 234, 141, 233, 160, 236, 159, 235, 169, 245, 78,  193, 104,
302   208, 103, 207, 106, 213, 105, 212, 135, 227, 134, 226, 136, 228, 107, 214,
303   133, 225, 132, 224, 138, 230, 137, 229, 164, 240, 163, 239, 165, 241, 139,
304   231, 162, 238, 161, 237, 167, 243, 166, 242, 189, 249, 188, 248, 190, 250,
305   168, 244, 187, 247, 186, 246, 192, 252, 191, 251, 210, 254, 209, 253, 211,
306   255
307 };
308 #endif
309 
310 // For entropy coding, IDTX shares the scan orders of the other 2D-transforms,
311 // but the fastest way to calculate the IDTX transform (i.e. no transposes)
312 // results in coefficients that are a transposition of the entropy coding
313 // versions. These tables are used as substitute for the scan order for the
314 // faster version of IDTX.
315 
316 // Must be used together with av1_fast_idtx_iscan_4x4
317 DECLARE_ALIGNED(16, static const int16_t,
318                 av1_fast_idtx_scan_4x4[16]) = { 0, 1,  4,  8,  5, 2,  3,  6,
319                                                 9, 12, 13, 10, 7, 11, 14, 15 };
320 
321 // Must be used together with av1_fast_idtx_scan_4x4
322 DECLARE_ALIGNED(16, static const int16_t,
323                 av1_fast_idtx_iscan_4x4[16]) = { 0, 1, 5,  6,  2, 4,  7,  12,
324                                                  3, 8, 11, 13, 9, 10, 14, 15 };
325 
326 static const SCAN_ORDER av1_fast_idtx_scan_order_4x4 = {
327   av1_fast_idtx_scan_4x4, av1_fast_idtx_iscan_4x4
328 };
329 
330 // Must be used together with av1_fast_idtx_iscan_8x8
331 DECLARE_ALIGNED(16, static const int16_t, av1_fast_idtx_scan_8x8[64]) = {
332   0,  1,  8,  16, 9,  2,  3,  10, 17, 24, 32, 25, 18, 11, 4,  5,
333   12, 19, 26, 33, 40, 48, 41, 34, 27, 20, 13, 6,  7,  14, 21, 28,
334   35, 42, 49, 56, 57, 50, 43, 36, 29, 22, 15, 23, 30, 37, 44, 51,
335   58, 59, 52, 45, 38, 31, 39, 46, 53, 60, 61, 54, 47, 55, 62, 63
336 };
337 
338 // Must be used together with av1_fast_idtx_scan_8x8
339 DECLARE_ALIGNED(16, static const int16_t, av1_fast_idtx_iscan_8x8[64]) = {
340   0,  1,  5,  6,  14, 15, 27, 28, 2,  4,  7,  13, 16, 26, 29, 42,
341   3,  8,  12, 17, 25, 30, 41, 43, 9,  11, 18, 24, 31, 40, 44, 53,
342   10, 19, 23, 32, 39, 45, 52, 54, 20, 22, 33, 38, 46, 51, 55, 60,
343   21, 34, 37, 47, 50, 56, 59, 61, 35, 36, 48, 49, 57, 58, 62, 63
344 };
345 
346 static const SCAN_ORDER av1_fast_idtx_scan_order_8x8 = {
347   av1_fast_idtx_scan_8x8, av1_fast_idtx_iscan_8x8
348 };
349 
350 // Must be used together with av1_fast_idtx_iscan_16x16
351 DECLARE_ALIGNED(16, static const int16_t, av1_fast_idtx_scan_16x16[256]) = {
352   0,   1,   16,  32,  17,  2,   3,   18,  33,  48,  64,  49,  34,  19,  4,
353   5,   20,  35,  50,  65,  80,  96,  81,  66,  51,  36,  21,  6,   7,   22,
354   37,  52,  67,  82,  97,  112, 128, 113, 98,  83,  68,  53,  38,  23,  8,
355   9,   24,  39,  54,  69,  84,  99,  114, 129, 144, 160, 145, 130, 115, 100,
356   85,  70,  55,  40,  25,  10,  11,  26,  41,  56,  71,  86,  101, 116, 131,
357   146, 161, 176, 192, 177, 162, 147, 132, 117, 102, 87,  72,  57,  42,  27,
358   12,  13,  28,  43,  58,  73,  88,  103, 118, 133, 148, 163, 178, 193, 208,
359   224, 209, 194, 179, 164, 149, 134, 119, 104, 89,  74,  59,  44,  29,  14,
360   15,  30,  45,  60,  75,  90,  105, 120, 135, 150, 165, 180, 195, 210, 225,
361   240, 241, 226, 211, 196, 181, 166, 151, 136, 121, 106, 91,  76,  61,  46,
362   31,  47,  62,  77,  92,  107, 122, 137, 152, 167, 182, 197, 212, 227, 242,
363   243, 228, 213, 198, 183, 168, 153, 138, 123, 108, 93,  78,  63,  79,  94,
364   109, 124, 139, 154, 169, 184, 199, 214, 229, 244, 245, 230, 215, 200, 185,
365   170, 155, 140, 125, 110, 95,  111, 126, 141, 156, 171, 186, 201, 216, 231,
366   246, 247, 232, 217, 202, 187, 172, 157, 142, 127, 143, 158, 173, 188, 203,
367   218, 233, 248, 249, 234, 219, 204, 189, 174, 159, 175, 190, 205, 220, 235,
368   250, 251, 236, 221, 206, 191, 207, 222, 237, 252, 253, 238, 223, 239, 254,
369   255
370 };
371 
372 // Must be used together with av1_fast_idtx_scan_16x16
373 DECLARE_ALIGNED(16, static const int16_t, av1_fast_idtx_iscan_16x16[256]) = {
374   0,   1,   5,   6,   14,  15,  27,  28,  44,  45,  65,  66,  90,  91,  119,
375   120, 2,   4,   7,   13,  16,  26,  29,  43,  46,  64,  67,  89,  92,  118,
376   121, 150, 3,   8,   12,  17,  25,  30,  42,  47,  63,  68,  88,  93,  117,
377   122, 149, 151, 9,   11,  18,  24,  31,  41,  48,  62,  69,  87,  94,  116,
378   123, 148, 152, 177, 10,  19,  23,  32,  40,  49,  61,  70,  86,  95,  115,
379   124, 147, 153, 176, 178, 20,  22,  33,  39,  50,  60,  71,  85,  96,  114,
380   125, 146, 154, 175, 179, 200, 21,  34,  38,  51,  59,  72,  84,  97,  113,
381   126, 145, 155, 174, 180, 199, 201, 35,  37,  52,  58,  73,  83,  98,  112,
382   127, 144, 156, 173, 181, 198, 202, 219, 36,  53,  57,  74,  82,  99,  111,
383   128, 143, 157, 172, 182, 197, 203, 218, 220, 54,  56,  75,  81,  100, 110,
384   129, 142, 158, 171, 183, 196, 204, 217, 221, 234, 55,  76,  80,  101, 109,
385   130, 141, 159, 170, 184, 195, 205, 216, 222, 233, 235, 77,  79,  102, 108,
386   131, 140, 160, 169, 185, 194, 206, 215, 223, 232, 236, 245, 78,  103, 107,
387   132, 139, 161, 168, 186, 193, 207, 214, 224, 231, 237, 244, 246, 104, 106,
388   133, 138, 162, 167, 187, 192, 208, 213, 225, 230, 238, 243, 247, 252, 105,
389   134, 137, 163, 166, 188, 191, 209, 212, 226, 229, 239, 242, 248, 251, 253,
390   135, 136, 164, 165, 189, 190, 210, 211, 227, 228, 240, 241, 249, 250, 254,
391   255
392 };
393 
394 // Indicates the blocks for which RD model should be based on special logic
get_model_rd_flag(const AV1_COMP * cpi,const MACROBLOCKD * xd,BLOCK_SIZE bsize)395 static inline int get_model_rd_flag(const AV1_COMP *cpi, const MACROBLOCKD *xd,
396                                     BLOCK_SIZE bsize) {
397   const AV1_COMMON *const cm = &cpi->common;
398   const int large_block = bsize >= BLOCK_32X32;
399   // Only enable for low bitdepth to mitigate issue: b/303023614.
400   return cpi->oxcf.rc_cfg.mode == AOM_CBR && large_block &&
401          !cyclic_refresh_segment_id_boosted(xd->mi[0]->segment_id) &&
402          cm->quant_params.base_qindex && !cpi->oxcf.use_highbitdepth;
403 }
404 /*!\brief Finds predicted motion vectors for a block.
405  *
406  * \ingroup nonrd_mode_search
407  * \callgraph
408  * \callergraph
409  * Finds predicted motion vectors for a block from a certain reference frame.
410  * First, it fills reference MV stack, then picks the test from the stack and
411  * predicts the final MV for a block for each mode.
412  * \param[in]    cpi                      Top-level encoder structure
413  * \param[in]    x                        Pointer to structure holding all the
414  *                                        data for the current macroblock
415  * \param[in]    ref_frame                Reference frame for which to find
416  *                                        ref MVs
417  * \param[out]   frame_mv                 Predicted MVs for a block
418  * \param[in]    yv12_mb                  Buffer to hold predicted block
419  * \param[in]    bsize                    Current block size
420  * \param[in]    force_skip_low_temp_var  Flag indicating possible mode search
421  *                                        prune for low temporal variance block
422  * \param[in]    skip_pred_mv             Flag indicating to skip av1_mv_pred
423  * \param[out]   use_scaled_ref_frame     Flag to indicate if scaled reference
424  *                                        frame is used.
425  *
426  * \remark Nothing is returned. Instead, predicted MVs are placed into
427  * \c frame_mv array, and use_scaled_ref_frame is set.
428  */
find_predictors(AV1_COMP * cpi,MACROBLOCK * x,MV_REFERENCE_FRAME ref_frame,int_mv frame_mv[MB_MODE_COUNT][REF_FRAMES],struct buf_2d yv12_mb[8][MAX_MB_PLANE],BLOCK_SIZE bsize,int force_skip_low_temp_var,int skip_pred_mv,bool * use_scaled_ref_frame)429 static inline void find_predictors(
430     AV1_COMP *cpi, MACROBLOCK *x, MV_REFERENCE_FRAME ref_frame,
431     int_mv frame_mv[MB_MODE_COUNT][REF_FRAMES],
432     struct buf_2d yv12_mb[8][MAX_MB_PLANE], BLOCK_SIZE bsize,
433     int force_skip_low_temp_var, int skip_pred_mv, bool *use_scaled_ref_frame) {
434   AV1_COMMON *const cm = &cpi->common;
435   MACROBLOCKD *const xd = &x->e_mbd;
436   MB_MODE_INFO *const mbmi = xd->mi[0];
437   MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
438   const YV12_BUFFER_CONFIG *ref = get_ref_frame_yv12_buf(cm, ref_frame);
439   const bool ref_is_scaled =
440       ref->y_crop_height != cm->height || ref->y_crop_width != cm->width;
441   const YV12_BUFFER_CONFIG *scaled_ref =
442       av1_get_scaled_ref_frame(cpi, ref_frame);
443   const YV12_BUFFER_CONFIG *yv12 =
444       ref_is_scaled && scaled_ref ? scaled_ref : ref;
445   const int num_planes = av1_num_planes(cm);
446   x->pred_mv_sad[ref_frame] = INT_MAX;
447   x->pred_mv0_sad[ref_frame] = INT_MAX;
448   x->pred_mv1_sad[ref_frame] = INT_MAX;
449   frame_mv[NEWMV][ref_frame].as_int = INVALID_MV;
450   // TODO(kyslov) this needs various further optimizations. to be continued..
451   assert(yv12 != NULL);
452   if (yv12 != NULL) {
453     struct scale_factors *const sf =
454         scaled_ref ? NULL : get_ref_scale_factors(cm, ref_frame);
455     av1_setup_pred_block(xd, yv12_mb[ref_frame], yv12, sf, sf, num_planes);
456     av1_find_mv_refs(cm, xd, mbmi, ref_frame, mbmi_ext->ref_mv_count,
457                      xd->ref_mv_stack, xd->weight, NULL, mbmi_ext->global_mvs,
458                      mbmi_ext->mode_context);
459     // TODO(Ravi): Populate mbmi_ext->ref_mv_stack[ref_frame][4] and
460     // mbmi_ext->weight[ref_frame][4] inside av1_find_mv_refs.
461     av1_copy_usable_ref_mv_stack_and_weight(xd, mbmi_ext, ref_frame);
462     av1_find_best_ref_mvs_from_stack(
463         cm->features.allow_high_precision_mv, mbmi_ext, ref_frame,
464         &frame_mv[NEARESTMV][ref_frame], &frame_mv[NEARMV][ref_frame], 0);
465     frame_mv[GLOBALMV][ref_frame] = mbmi_ext->global_mvs[ref_frame];
466     // Early exit for non-LAST frame if force_skip_low_temp_var is set.
467     if (!ref_is_scaled && bsize >= BLOCK_8X8 && !skip_pred_mv &&
468         !(force_skip_low_temp_var && ref_frame != LAST_FRAME)) {
469       av1_mv_pred(cpi, x, yv12_mb[ref_frame][0].buf, yv12->y_stride, ref_frame,
470                   bsize);
471     }
472   }
473   if (cm->features.switchable_motion_mode) {
474     av1_count_overlappable_neighbors(cm, xd);
475   }
476   mbmi->num_proj_ref = 1;
477   *use_scaled_ref_frame = ref_is_scaled && scaled_ref;
478 }
479 
init_mbmi_nonrd(MB_MODE_INFO * mbmi,PREDICTION_MODE pred_mode,MV_REFERENCE_FRAME ref_frame0,MV_REFERENCE_FRAME ref_frame1,const AV1_COMMON * cm)480 static inline void init_mbmi_nonrd(MB_MODE_INFO *mbmi,
481                                    PREDICTION_MODE pred_mode,
482                                    MV_REFERENCE_FRAME ref_frame0,
483                                    MV_REFERENCE_FRAME ref_frame1,
484                                    const AV1_COMMON *cm) {
485   PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
486   mbmi->ref_mv_idx = 0;
487   mbmi->mode = pred_mode;
488   mbmi->uv_mode = UV_DC_PRED;
489   mbmi->ref_frame[0] = ref_frame0;
490   mbmi->ref_frame[1] = ref_frame1;
491   pmi->palette_size[PLANE_TYPE_Y] = 0;
492   pmi->palette_size[PLANE_TYPE_UV] = 0;
493   mbmi->filter_intra_mode_info.use_filter_intra = 0;
494   mbmi->mv[0].as_int = mbmi->mv[1].as_int = 0;
495   mbmi->motion_mode = SIMPLE_TRANSLATION;
496   mbmi->num_proj_ref = 1;
497   mbmi->interintra_mode = 0;
498   set_default_interp_filters(mbmi, cm->features.interp_filter);
499 }
500 
init_estimate_block_intra_args(struct estimate_block_intra_args * args,AV1_COMP * cpi,MACROBLOCK * x)501 static inline void init_estimate_block_intra_args(
502     struct estimate_block_intra_args *args, AV1_COMP *cpi, MACROBLOCK *x) {
503   args->cpi = cpi;
504   args->x = x;
505   args->mode = DC_PRED;
506   args->skippable = 1;
507   args->rdc = 0;
508   args->best_sad = UINT_MAX;
509   args->prune_mode_based_on_sad = false;
510 }
511 
get_pred_buffer(PRED_BUFFER * p,int len)512 static inline int get_pred_buffer(PRED_BUFFER *p, int len) {
513   for (int buf_idx = 0; buf_idx < len; buf_idx++) {
514     if (!p[buf_idx].in_use) {
515       p[buf_idx].in_use = 1;
516       return buf_idx;
517     }
518   }
519   return -1;
520 }
521 
free_pred_buffer(PRED_BUFFER * p)522 static inline void free_pred_buffer(PRED_BUFFER *p) {
523   if (p != NULL) p->in_use = 0;
524 }
525 
526 #if CONFIG_INTERNAL_STATS
store_coding_context_nonrd(MACROBLOCK * x,PICK_MODE_CONTEXT * ctx,int mode_index)527 static inline void store_coding_context_nonrd(MACROBLOCK *x,
528                                               PICK_MODE_CONTEXT *ctx,
529                                               int mode_index) {
530 #else
531 static inline void store_coding_context_nonrd(MACROBLOCK *x,
532                                               PICK_MODE_CONTEXT *ctx) {
533 #endif  // CONFIG_INTERNAL_STATS
534   MACROBLOCKD *const xd = &x->e_mbd;
535   TxfmSearchInfo *txfm_info = &x->txfm_search_info;
536 
537   // Take a snapshot of the coding context so it can be
538   // restored if we decide to encode this way
539   ctx->rd_stats.skip_txfm = txfm_info->skip_txfm;
540 
541   ctx->skippable = txfm_info->skip_txfm;
542 #if CONFIG_INTERNAL_STATS
543   ctx->best_mode_index = mode_index;
544 #endif  // CONFIG_INTERNAL_STATS
545   ctx->mic = *xd->mi[0];
546   ctx->skippable = txfm_info->skip_txfm;
547   av1_copy_mbmi_ext_to_mbmi_ext_frame(&ctx->mbmi_ext_best, &x->mbmi_ext,
548                                       av1_ref_frame_type(xd->mi[0]->ref_frame));
549 }
550 
551 void av1_block_yrd(MACROBLOCK *x, RD_STATS *this_rdc, int *skippable,
552                    BLOCK_SIZE bsize, TX_SIZE tx_size);
553 
554 void av1_block_yrd_idtx(MACROBLOCK *x, const uint8_t *const pred_buf,
555                         int pred_stride, RD_STATS *this_rdc, int *skippable,
556                         BLOCK_SIZE bsize, TX_SIZE tx_size);
557 
558 int64_t av1_model_rd_for_sb_uv(AV1_COMP *cpi, BLOCK_SIZE plane_bsize,
559                                MACROBLOCK *x, MACROBLOCKD *xd,
560                                RD_STATS *this_rdc, int start_plane,
561                                int stop_plane);
562 
563 void av1_estimate_block_intra(int plane, int block, int row, int col,
564                               BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
565                               void *arg);
566 
567 void av1_estimate_intra_mode(AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
568                              int best_early_term, unsigned int ref_cost_intra,
569                              int reuse_prediction, struct buf_2d *orig_dst,
570                              PRED_BUFFER *tmp_buffers,
571                              PRED_BUFFER **this_mode_pred, RD_STATS *best_rdc,
572                              BEST_PICKMODE *best_pickmode,
573                              PICK_MODE_CONTEXT *ctx);
574 
575 #endif  // AOM_AV1_ENCODER_NONRD_OPT_H_
576