xref: /aosp_15_r20/external/libaom/av1/common/reconinter.c (revision 77c1e3ccc04c968bd2bc212e87364f250e820521)
1 /*
2  * Copyright (c) 2016, Alliance for Open Media. All rights reserved.
3  *
4  * This source code is subject to the terms of the BSD 2 Clause License and
5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6  * was not distributed with this source code in the LICENSE file, you can
7  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8  * Media Patent License 1.0 was not distributed with this source code in the
9  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10  */
11 
12 #include <assert.h>
13 #include <stdio.h>
14 #include <limits.h>
15 
16 #include "config/aom_config.h"
17 #include "config/aom_dsp_rtcd.h"
18 #include "config/aom_scale_rtcd.h"
19 
20 #include "aom/aom_integer.h"
21 #include "aom_dsp/blend.h"
22 #include "aom_ports/aom_once.h"
23 
24 #include "av1/common/av1_common_int.h"
25 #include "av1/common/blockd.h"
26 #include "av1/common/mvref_common.h"
27 #include "av1/common/obmc.h"
28 #include "av1/common/reconinter.h"
29 #include "av1/common/reconintra.h"
30 
31 // This function will determine whether or not to create a warped
32 // prediction.
allow_warp(const MB_MODE_INFO * const mbmi,const WarpTypesAllowed * const warp_types,const WarpedMotionParams * const gm_params,int build_for_obmc,const struct scale_factors * const sf,WarpedMotionParams * final_warp_params)33 static int allow_warp(const MB_MODE_INFO *const mbmi,
34                       const WarpTypesAllowed *const warp_types,
35                       const WarpedMotionParams *const gm_params,
36                       int build_for_obmc, const struct scale_factors *const sf,
37                       WarpedMotionParams *final_warp_params) {
38   // Note: As per the spec, we must test the fixed point scales here, which are
39   // at a higher precision (1 << 14) than the xs and ys in subpel_params (that
40   // have 1 << 10 precision).
41   if (av1_is_scaled(sf)) return 0;
42 
43   if (final_warp_params != NULL) *final_warp_params = default_warp_params;
44 
45   if (build_for_obmc) return 0;
46 
47   if (warp_types->local_warp_allowed && !mbmi->wm_params.invalid) {
48     if (final_warp_params != NULL)
49       memcpy(final_warp_params, &mbmi->wm_params, sizeof(*final_warp_params));
50     return 1;
51   } else if (warp_types->global_warp_allowed && !gm_params->invalid) {
52     if (final_warp_params != NULL)
53       memcpy(final_warp_params, gm_params, sizeof(*final_warp_params));
54     return 1;
55   }
56 
57   return 0;
58 }
59 
av1_init_warp_params(InterPredParams * inter_pred_params,const WarpTypesAllowed * warp_types,int ref,const MACROBLOCKD * xd,const MB_MODE_INFO * mi)60 void av1_init_warp_params(InterPredParams *inter_pred_params,
61                           const WarpTypesAllowed *warp_types, int ref,
62                           const MACROBLOCKD *xd, const MB_MODE_INFO *mi) {
63   if (inter_pred_params->block_height < 8 || inter_pred_params->block_width < 8)
64     return;
65 
66   if (xd->cur_frame_force_integer_mv) return;
67 
68   if (allow_warp(mi, warp_types, &xd->global_motion[mi->ref_frame[ref]], 0,
69                  inter_pred_params->scale_factors,
70                  &inter_pred_params->warp_params)) {
71 #if CONFIG_REALTIME_ONLY && !CONFIG_AV1_DECODER
72     aom_internal_error(xd->error_info, AOM_CODEC_UNSUP_FEATURE,
73                        "Warped motion is disabled in realtime only build.");
74 #endif  // CONFIG_REALTIME_ONLY && !CONFIG_AV1_DECODER
75     inter_pred_params->mode = WARP_PRED;
76   }
77 }
78 
av1_make_inter_predictor(const uint8_t * src,int src_stride,uint8_t * dst,int dst_stride,InterPredParams * inter_pred_params,const SubpelParams * subpel_params)79 void av1_make_inter_predictor(const uint8_t *src, int src_stride, uint8_t *dst,
80                               int dst_stride,
81                               InterPredParams *inter_pred_params,
82                               const SubpelParams *subpel_params) {
83   assert(IMPLIES(inter_pred_params->conv_params.is_compound,
84                  inter_pred_params->conv_params.dst != NULL));
85 
86   if (inter_pred_params->mode == TRANSLATION_PRED) {
87 #if CONFIG_AV1_HIGHBITDEPTH
88     if (inter_pred_params->use_hbd_buf) {
89       highbd_inter_predictor(src, src_stride, dst, dst_stride, subpel_params,
90                              inter_pred_params->block_width,
91                              inter_pred_params->block_height,
92                              &inter_pred_params->conv_params,
93                              inter_pred_params->interp_filter_params,
94                              inter_pred_params->bit_depth);
95     } else {
96       inter_predictor(src, src_stride, dst, dst_stride, subpel_params,
97                       inter_pred_params->block_width,
98                       inter_pred_params->block_height,
99                       &inter_pred_params->conv_params,
100                       inter_pred_params->interp_filter_params);
101     }
102 #else
103     inter_predictor(src, src_stride, dst, dst_stride, subpel_params,
104                     inter_pred_params->block_width,
105                     inter_pred_params->block_height,
106                     &inter_pred_params->conv_params,
107                     inter_pred_params->interp_filter_params);
108 #endif
109   }
110 #if !CONFIG_REALTIME_ONLY || CONFIG_AV1_DECODER
111   // TODO(jingning): av1_warp_plane() can be further cleaned up.
112   else if (inter_pred_params->mode == WARP_PRED) {
113     av1_warp_plane(
114         &inter_pred_params->warp_params, inter_pred_params->use_hbd_buf,
115         inter_pred_params->bit_depth, inter_pred_params->ref_frame_buf.buf0,
116         inter_pred_params->ref_frame_buf.width,
117         inter_pred_params->ref_frame_buf.height,
118         inter_pred_params->ref_frame_buf.stride, dst,
119         inter_pred_params->pix_col, inter_pred_params->pix_row,
120         inter_pred_params->block_width, inter_pred_params->block_height,
121         dst_stride, inter_pred_params->subsampling_x,
122         inter_pred_params->subsampling_y, &inter_pred_params->conv_params);
123   }
124 #endif  // !CONFIG_REALTIME_ONLY || CONFIG_AV1_DECODER
125   else {
126     assert(0 && "Unsupported inter_pred_params->mode");
127   }
128 }
129 
130 static const uint8_t wedge_master_oblique_odd[MASK_MASTER_SIZE] = {
131   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
132   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  2,  6,  18,
133   37, 53, 60, 63, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
134   64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
135 };
136 static const uint8_t wedge_master_oblique_even[MASK_MASTER_SIZE] = {
137   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
138   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  4,  11, 27,
139   46, 58, 62, 63, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
140   64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
141 };
142 static const uint8_t wedge_master_vertical[MASK_MASTER_SIZE] = {
143   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
144   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  2,  7,  21,
145   43, 57, 62, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
146   64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
147 };
148 
shift_copy(const uint8_t * src,uint8_t * dst,int shift,int width)149 static inline void shift_copy(const uint8_t *src, uint8_t *dst, int shift,
150                               int width) {
151   if (shift >= 0) {
152     memcpy(dst + shift, src, width - shift);
153     memset(dst, src[0], shift);
154   } else {
155     shift = -shift;
156     memcpy(dst, src + shift, width - shift);
157     memset(dst + width - shift, src[width - 1], shift);
158   }
159 }
160 
161 /* clang-format off */
162 DECLARE_ALIGNED(16, static uint8_t,
163                 wedge_signflip_lookup[BLOCK_SIZES_ALL][MAX_WEDGE_TYPES]) = {
164   { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
165   { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
166   { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
167   { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
168   { 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
169   { 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
170   { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
171   { 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
172   { 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
173   { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
174   { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
175   { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
176   { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
177   { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
178   { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
179   { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
180   { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
181   { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
182   { 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, },
183   { 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, },
184   { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
185   { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
186 };
187 /* clang-format on */
188 
189 // [negative][direction]
190 DECLARE_ALIGNED(
191     16, static uint8_t,
192     wedge_mask_obl[2][WEDGE_DIRECTIONS][MASK_MASTER_SIZE * MASK_MASTER_SIZE]);
193 
194 // 4 * MAX_WEDGE_SQUARE is an easy to compute and fairly tight upper bound
195 // on the sum of all mask sizes up to an including MAX_WEDGE_SQUARE.
196 DECLARE_ALIGNED(16, static uint8_t,
197                 wedge_mask_buf[2 * MAX_WEDGE_TYPES * 4 * MAX_WEDGE_SQUARE]);
198 
199 DECLARE_ALIGNED(16, static uint8_t,
200                 smooth_interintra_mask_buf[INTERINTRA_MODES][BLOCK_SIZES_ALL]
201                                           [MAX_WEDGE_SQUARE]);
202 
203 static wedge_masks_type wedge_masks[BLOCK_SIZES_ALL][2];
204 
205 static const wedge_code_type wedge_codebook_16_hgtw[16] = {
206   { WEDGE_OBLIQUE27, 4, 4 },  { WEDGE_OBLIQUE63, 4, 4 },
207   { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
208   { WEDGE_HORIZONTAL, 4, 2 }, { WEDGE_HORIZONTAL, 4, 4 },
209   { WEDGE_HORIZONTAL, 4, 6 }, { WEDGE_VERTICAL, 4, 4 },
210   { WEDGE_OBLIQUE27, 4, 2 },  { WEDGE_OBLIQUE27, 4, 6 },
211   { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 },
212   { WEDGE_OBLIQUE63, 2, 4 },  { WEDGE_OBLIQUE63, 6, 4 },
213   { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 },
214 };
215 
216 static const wedge_code_type wedge_codebook_16_hltw[16] = {
217   { WEDGE_OBLIQUE27, 4, 4 },  { WEDGE_OBLIQUE63, 4, 4 },
218   { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
219   { WEDGE_VERTICAL, 2, 4 },   { WEDGE_VERTICAL, 4, 4 },
220   { WEDGE_VERTICAL, 6, 4 },   { WEDGE_HORIZONTAL, 4, 4 },
221   { WEDGE_OBLIQUE27, 4, 2 },  { WEDGE_OBLIQUE27, 4, 6 },
222   { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 },
223   { WEDGE_OBLIQUE63, 2, 4 },  { WEDGE_OBLIQUE63, 6, 4 },
224   { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 },
225 };
226 
227 static const wedge_code_type wedge_codebook_16_heqw[16] = {
228   { WEDGE_OBLIQUE27, 4, 4 },  { WEDGE_OBLIQUE63, 4, 4 },
229   { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
230   { WEDGE_HORIZONTAL, 4, 2 }, { WEDGE_HORIZONTAL, 4, 6 },
231   { WEDGE_VERTICAL, 2, 4 },   { WEDGE_VERTICAL, 6, 4 },
232   { WEDGE_OBLIQUE27, 4, 2 },  { WEDGE_OBLIQUE27, 4, 6 },
233   { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 },
234   { WEDGE_OBLIQUE63, 2, 4 },  { WEDGE_OBLIQUE63, 6, 4 },
235   { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 },
236 };
237 
238 const wedge_params_type av1_wedge_params_lookup[BLOCK_SIZES_ALL] = {
239   { 0, NULL, NULL, NULL },
240   { 0, NULL, NULL, NULL },
241   { 0, NULL, NULL, NULL },
242   { MAX_WEDGE_TYPES, wedge_codebook_16_heqw, wedge_signflip_lookup[BLOCK_8X8],
243     wedge_masks[BLOCK_8X8] },
244   { MAX_WEDGE_TYPES, wedge_codebook_16_hgtw, wedge_signflip_lookup[BLOCK_8X16],
245     wedge_masks[BLOCK_8X16] },
246   { MAX_WEDGE_TYPES, wedge_codebook_16_hltw, wedge_signflip_lookup[BLOCK_16X8],
247     wedge_masks[BLOCK_16X8] },
248   { MAX_WEDGE_TYPES, wedge_codebook_16_heqw, wedge_signflip_lookup[BLOCK_16X16],
249     wedge_masks[BLOCK_16X16] },
250   { MAX_WEDGE_TYPES, wedge_codebook_16_hgtw, wedge_signflip_lookup[BLOCK_16X32],
251     wedge_masks[BLOCK_16X32] },
252   { MAX_WEDGE_TYPES, wedge_codebook_16_hltw, wedge_signflip_lookup[BLOCK_32X16],
253     wedge_masks[BLOCK_32X16] },
254   { MAX_WEDGE_TYPES, wedge_codebook_16_heqw, wedge_signflip_lookup[BLOCK_32X32],
255     wedge_masks[BLOCK_32X32] },
256   { 0, NULL, NULL, NULL },
257   { 0, NULL, NULL, NULL },
258   { 0, NULL, NULL, NULL },
259   { 0, NULL, NULL, NULL },
260   { 0, NULL, NULL, NULL },
261   { 0, NULL, NULL, NULL },
262   { 0, NULL, NULL, NULL },
263   { 0, NULL, NULL, NULL },
264   { MAX_WEDGE_TYPES, wedge_codebook_16_hgtw, wedge_signflip_lookup[BLOCK_8X32],
265     wedge_masks[BLOCK_8X32] },
266   { MAX_WEDGE_TYPES, wedge_codebook_16_hltw, wedge_signflip_lookup[BLOCK_32X8],
267     wedge_masks[BLOCK_32X8] },
268   { 0, NULL, NULL, NULL },
269   { 0, NULL, NULL, NULL },
270 };
271 
get_wedge_mask_inplace(int wedge_index,int neg,BLOCK_SIZE sb_type)272 static const uint8_t *get_wedge_mask_inplace(int wedge_index, int neg,
273                                              BLOCK_SIZE sb_type) {
274   const uint8_t *master;
275   const int bh = block_size_high[sb_type];
276   const int bw = block_size_wide[sb_type];
277   const wedge_code_type *a =
278       av1_wedge_params_lookup[sb_type].codebook + wedge_index;
279   int woff, hoff;
280   const uint8_t wsignflip =
281       av1_wedge_params_lookup[sb_type].signflip[wedge_index];
282 
283   assert(wedge_index >= 0 && wedge_index < get_wedge_types_lookup(sb_type));
284   woff = (a->x_offset * bw) >> 3;
285   hoff = (a->y_offset * bh) >> 3;
286   master = wedge_mask_obl[neg ^ wsignflip][a->direction] +
287            MASK_MASTER_STRIDE * (MASK_MASTER_SIZE / 2 - hoff) +
288            MASK_MASTER_SIZE / 2 - woff;
289   return master;
290 }
291 
av1_get_compound_type_mask(const INTERINTER_COMPOUND_DATA * const comp_data,BLOCK_SIZE sb_type)292 const uint8_t *av1_get_compound_type_mask(
293     const INTERINTER_COMPOUND_DATA *const comp_data, BLOCK_SIZE sb_type) {
294   (void)sb_type;
295   switch (comp_data->type) {
296     case COMPOUND_WEDGE:
297       return av1_get_contiguous_soft_mask(comp_data->wedge_index,
298                                           comp_data->wedge_sign, sb_type);
299     default: return comp_data->seg_mask;
300   }
301 }
302 
diffwtd_mask_d16(uint8_t * mask,int which_inverse,int mask_base,const CONV_BUF_TYPE * src0,int src0_stride,const CONV_BUF_TYPE * src1,int src1_stride,int h,int w,ConvolveParams * conv_params,int bd)303 static inline void diffwtd_mask_d16(uint8_t *mask, int which_inverse,
304                                     int mask_base, const CONV_BUF_TYPE *src0,
305                                     int src0_stride, const CONV_BUF_TYPE *src1,
306                                     int src1_stride, int h, int w,
307                                     ConvolveParams *conv_params, int bd) {
308   int round =
309       2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1 + (bd - 8);
310   int i, j, m, diff;
311   for (i = 0; i < h; ++i) {
312     for (j = 0; j < w; ++j) {
313       diff = abs(src0[i * src0_stride + j] - src1[i * src1_stride + j]);
314       diff = ROUND_POWER_OF_TWO(diff, round);
315       m = clamp(mask_base + (diff / DIFF_FACTOR), 0, AOM_BLEND_A64_MAX_ALPHA);
316       mask[i * w + j] = which_inverse ? AOM_BLEND_A64_MAX_ALPHA - m : m;
317     }
318   }
319 }
320 
av1_build_compound_diffwtd_mask_d16_c(uint8_t * mask,DIFFWTD_MASK_TYPE mask_type,const CONV_BUF_TYPE * src0,int src0_stride,const CONV_BUF_TYPE * src1,int src1_stride,int h,int w,ConvolveParams * conv_params,int bd)321 void av1_build_compound_diffwtd_mask_d16_c(
322     uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const CONV_BUF_TYPE *src0,
323     int src0_stride, const CONV_BUF_TYPE *src1, int src1_stride, int h, int w,
324     ConvolveParams *conv_params, int bd) {
325   switch (mask_type) {
326     case DIFFWTD_38:
327       diffwtd_mask_d16(mask, 0, 38, src0, src0_stride, src1, src1_stride, h, w,
328                        conv_params, bd);
329       break;
330     case DIFFWTD_38_INV:
331       diffwtd_mask_d16(mask, 1, 38, src0, src0_stride, src1, src1_stride, h, w,
332                        conv_params, bd);
333       break;
334     default: assert(0);
335   }
336 }
337 
diffwtd_mask(uint8_t * mask,int which_inverse,int mask_base,const uint8_t * src0,int src0_stride,const uint8_t * src1,int src1_stride,int h,int w)338 static inline void diffwtd_mask(uint8_t *mask, int which_inverse, int mask_base,
339                                 const uint8_t *src0, int src0_stride,
340                                 const uint8_t *src1, int src1_stride, int h,
341                                 int w) {
342   int i, j, m, diff;
343   for (i = 0; i < h; ++i) {
344     for (j = 0; j < w; ++j) {
345       diff =
346           abs((int)src0[i * src0_stride + j] - (int)src1[i * src1_stride + j]);
347       m = clamp(mask_base + (diff / DIFF_FACTOR), 0, AOM_BLEND_A64_MAX_ALPHA);
348       mask[i * w + j] = which_inverse ? AOM_BLEND_A64_MAX_ALPHA - m : m;
349     }
350   }
351 }
352 
av1_build_compound_diffwtd_mask_c(uint8_t * mask,DIFFWTD_MASK_TYPE mask_type,const uint8_t * src0,int src0_stride,const uint8_t * src1,int src1_stride,int h,int w)353 void av1_build_compound_diffwtd_mask_c(uint8_t *mask,
354                                        DIFFWTD_MASK_TYPE mask_type,
355                                        const uint8_t *src0, int src0_stride,
356                                        const uint8_t *src1, int src1_stride,
357                                        int h, int w) {
358   switch (mask_type) {
359     case DIFFWTD_38:
360       diffwtd_mask(mask, 0, 38, src0, src0_stride, src1, src1_stride, h, w);
361       break;
362     case DIFFWTD_38_INV:
363       diffwtd_mask(mask, 1, 38, src0, src0_stride, src1, src1_stride, h, w);
364       break;
365     default: assert(0);
366   }
367 }
368 
369 #if CONFIG_AV1_HIGHBITDEPTH
diffwtd_mask_highbd(uint8_t * mask,int which_inverse,int mask_base,const uint16_t * src0,int src0_stride,const uint16_t * src1,int src1_stride,int h,int w,const unsigned int bd)370 static AOM_FORCE_INLINE void diffwtd_mask_highbd(
371     uint8_t *mask, int which_inverse, int mask_base, const uint16_t *src0,
372     int src0_stride, const uint16_t *src1, int src1_stride, int h, int w,
373     const unsigned int bd) {
374   assert(bd >= 8);
375   if (bd == 8) {
376     if (which_inverse) {
377       for (int i = 0; i < h; ++i) {
378         for (int j = 0; j < w; ++j) {
379           int diff = abs((int)src0[j] - (int)src1[j]) / DIFF_FACTOR;
380           unsigned int m = negative_to_zero(mask_base + diff);
381           m = AOMMIN(m, AOM_BLEND_A64_MAX_ALPHA);
382           mask[j] = AOM_BLEND_A64_MAX_ALPHA - m;
383         }
384         src0 += src0_stride;
385         src1 += src1_stride;
386         mask += w;
387       }
388     } else {
389       for (int i = 0; i < h; ++i) {
390         for (int j = 0; j < w; ++j) {
391           int diff = abs((int)src0[j] - (int)src1[j]) / DIFF_FACTOR;
392           unsigned int m = negative_to_zero(mask_base + diff);
393           m = AOMMIN(m, AOM_BLEND_A64_MAX_ALPHA);
394           mask[j] = m;
395         }
396         src0 += src0_stride;
397         src1 += src1_stride;
398         mask += w;
399       }
400     }
401   } else {
402     const unsigned int bd_shift = bd - 8;
403     if (which_inverse) {
404       for (int i = 0; i < h; ++i) {
405         for (int j = 0; j < w; ++j) {
406           int diff =
407               (abs((int)src0[j] - (int)src1[j]) >> bd_shift) / DIFF_FACTOR;
408           unsigned int m = negative_to_zero(mask_base + diff);
409           m = AOMMIN(m, AOM_BLEND_A64_MAX_ALPHA);
410           mask[j] = AOM_BLEND_A64_MAX_ALPHA - m;
411         }
412         src0 += src0_stride;
413         src1 += src1_stride;
414         mask += w;
415       }
416     } else {
417       for (int i = 0; i < h; ++i) {
418         for (int j = 0; j < w; ++j) {
419           int diff =
420               (abs((int)src0[j] - (int)src1[j]) >> bd_shift) / DIFF_FACTOR;
421           unsigned int m = negative_to_zero(mask_base + diff);
422           m = AOMMIN(m, AOM_BLEND_A64_MAX_ALPHA);
423           mask[j] = m;
424         }
425         src0 += src0_stride;
426         src1 += src1_stride;
427         mask += w;
428       }
429     }
430   }
431 }
432 
av1_build_compound_diffwtd_mask_highbd_c(uint8_t * mask,DIFFWTD_MASK_TYPE mask_type,const uint8_t * src0,int src0_stride,const uint8_t * src1,int src1_stride,int h,int w,int bd)433 void av1_build_compound_diffwtd_mask_highbd_c(
434     uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const uint8_t *src0,
435     int src0_stride, const uint8_t *src1, int src1_stride, int h, int w,
436     int bd) {
437   switch (mask_type) {
438     case DIFFWTD_38:
439       diffwtd_mask_highbd(mask, 0, 38, CONVERT_TO_SHORTPTR(src0), src0_stride,
440                           CONVERT_TO_SHORTPTR(src1), src1_stride, h, w, bd);
441       break;
442     case DIFFWTD_38_INV:
443       diffwtd_mask_highbd(mask, 1, 38, CONVERT_TO_SHORTPTR(src0), src0_stride,
444                           CONVERT_TO_SHORTPTR(src1), src1_stride, h, w, bd);
445       break;
446     default: assert(0);
447   }
448 }
449 #endif  // CONFIG_AV1_HIGHBITDEPTH
450 
init_wedge_master_masks(void)451 static inline void init_wedge_master_masks(void) {
452   int i, j;
453   const int w = MASK_MASTER_SIZE;
454   const int h = MASK_MASTER_SIZE;
455   const int stride = MASK_MASTER_STRIDE;
456   // Note: index [0] stores the masters, and [1] its complement.
457   // Generate prototype by shifting the masters
458   int shift = h / 4;
459   for (i = 0; i < h; i += 2) {
460     shift_copy(wedge_master_oblique_even,
461                &wedge_mask_obl[0][WEDGE_OBLIQUE63][i * stride], shift,
462                MASK_MASTER_SIZE);
463     shift--;
464     shift_copy(wedge_master_oblique_odd,
465                &wedge_mask_obl[0][WEDGE_OBLIQUE63][(i + 1) * stride], shift,
466                MASK_MASTER_SIZE);
467     memcpy(&wedge_mask_obl[0][WEDGE_VERTICAL][i * stride],
468            wedge_master_vertical,
469            MASK_MASTER_SIZE * sizeof(wedge_master_vertical[0]));
470     memcpy(&wedge_mask_obl[0][WEDGE_VERTICAL][(i + 1) * stride],
471            wedge_master_vertical,
472            MASK_MASTER_SIZE * sizeof(wedge_master_vertical[0]));
473   }
474 
475   for (i = 0; i < h; ++i) {
476     for (j = 0; j < w; ++j) {
477       const int msk = wedge_mask_obl[0][WEDGE_OBLIQUE63][i * stride + j];
478       wedge_mask_obl[0][WEDGE_OBLIQUE27][j * stride + i] = msk;
479       wedge_mask_obl[0][WEDGE_OBLIQUE117][i * stride + w - 1 - j] =
480           wedge_mask_obl[0][WEDGE_OBLIQUE153][(w - 1 - j) * stride + i] =
481               (1 << WEDGE_WEIGHT_BITS) - msk;
482       wedge_mask_obl[1][WEDGE_OBLIQUE63][i * stride + j] =
483           wedge_mask_obl[1][WEDGE_OBLIQUE27][j * stride + i] =
484               (1 << WEDGE_WEIGHT_BITS) - msk;
485       wedge_mask_obl[1][WEDGE_OBLIQUE117][i * stride + w - 1 - j] =
486           wedge_mask_obl[1][WEDGE_OBLIQUE153][(w - 1 - j) * stride + i] = msk;
487       const int mskx = wedge_mask_obl[0][WEDGE_VERTICAL][i * stride + j];
488       wedge_mask_obl[0][WEDGE_HORIZONTAL][j * stride + i] = mskx;
489       wedge_mask_obl[1][WEDGE_VERTICAL][i * stride + j] =
490           wedge_mask_obl[1][WEDGE_HORIZONTAL][j * stride + i] =
491               (1 << WEDGE_WEIGHT_BITS) - mskx;
492     }
493   }
494 }
495 
init_wedge_masks(void)496 static inline void init_wedge_masks(void) {
497   uint8_t *dst = wedge_mask_buf;
498   BLOCK_SIZE bsize;
499   memset(wedge_masks, 0, sizeof(wedge_masks));
500   for (bsize = BLOCK_4X4; bsize < BLOCK_SIZES_ALL; ++bsize) {
501     const wedge_params_type *wedge_params = &av1_wedge_params_lookup[bsize];
502     const int wtypes = wedge_params->wedge_types;
503     if (wtypes == 0) continue;
504     const uint8_t *mask;
505     const int bw = block_size_wide[bsize];
506     const int bh = block_size_high[bsize];
507     int w;
508     for (w = 0; w < wtypes; ++w) {
509       mask = get_wedge_mask_inplace(w, 0, bsize);
510       aom_convolve_copy(mask, MASK_MASTER_STRIDE, dst, bw /* dst_stride */, bw,
511                         bh);
512       wedge_params->masks[0][w] = dst;
513       dst += bw * bh;
514 
515       mask = get_wedge_mask_inplace(w, 1, bsize);
516       aom_convolve_copy(mask, MASK_MASTER_STRIDE, dst, bw /* dst_stride */, bw,
517                         bh);
518       wedge_params->masks[1][w] = dst;
519       dst += bw * bh;
520     }
521     assert(sizeof(wedge_mask_buf) >= (size_t)(dst - wedge_mask_buf));
522   }
523 }
524 
525 /* clang-format off */
526 static const uint8_t ii_weights1d[MAX_SB_SIZE] = {
527   60, 58, 56, 54, 52, 50, 48, 47, 45, 44, 42, 41, 39, 38, 37, 35, 34, 33, 32,
528   31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 22, 21, 20, 19, 19, 18, 18, 17, 16,
529   16, 15, 15, 14, 14, 13, 13, 12, 12, 12, 11, 11, 10, 10, 10,  9,  9,  9,  8,
530   8,  8,  8,  7,  7,  7,  7,  6,  6,  6,  6,  6,  5,  5,  5,  5,  5,  4,  4,
531   4,  4,  4,  4,  4,  4,  3,  3,  3,  3,  3,  3,  3,  3,  3,  2,  2,  2,  2,
532   2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  1,  1,  1,  1,  1,  1,  1,  1,
533   1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1
534 };
535 static uint8_t ii_size_scales[BLOCK_SIZES_ALL] = {
536     32, 16, 16, 16, 8, 8, 8, 4,
537     4,  4,  2,  2,  2, 1, 1, 1,
538     8,  8,  4,  4,  2, 2
539 };
540 /* clang-format on */
541 
build_smooth_interintra_mask(uint8_t * mask,int stride,BLOCK_SIZE plane_bsize,INTERINTRA_MODE mode)542 static inline void build_smooth_interintra_mask(uint8_t *mask, int stride,
543                                                 BLOCK_SIZE plane_bsize,
544                                                 INTERINTRA_MODE mode) {
545   int i, j;
546   const int bw = block_size_wide[plane_bsize];
547   const int bh = block_size_high[plane_bsize];
548   const int size_scale = ii_size_scales[plane_bsize];
549 
550   switch (mode) {
551     case II_V_PRED:
552       for (i = 0; i < bh; ++i) {
553         memset(mask, ii_weights1d[i * size_scale], bw * sizeof(mask[0]));
554         mask += stride;
555       }
556       break;
557 
558     case II_H_PRED:
559       for (i = 0; i < bh; ++i) {
560         for (j = 0; j < bw; ++j) mask[j] = ii_weights1d[j * size_scale];
561         mask += stride;
562       }
563       break;
564 
565     case II_SMOOTH_PRED:
566       for (i = 0; i < bh; ++i) {
567         for (j = 0; j < bw; ++j)
568           mask[j] = ii_weights1d[(i < j ? i : j) * size_scale];
569         mask += stride;
570       }
571       break;
572 
573     case II_DC_PRED:
574     default:
575       for (i = 0; i < bh; ++i) {
576         memset(mask, 32, bw * sizeof(mask[0]));
577         mask += stride;
578       }
579       break;
580   }
581 }
582 
init_smooth_interintra_masks(void)583 static inline void init_smooth_interintra_masks(void) {
584   for (int m = 0; m < INTERINTRA_MODES; ++m) {
585     for (int bs = 0; bs < BLOCK_SIZES_ALL; ++bs) {
586       const int bw = block_size_wide[bs];
587       const int bh = block_size_high[bs];
588       if (bw > MAX_WEDGE_SIZE || bh > MAX_WEDGE_SIZE) continue;
589       build_smooth_interintra_mask(smooth_interintra_mask_buf[m][bs], bw, bs,
590                                    m);
591     }
592   }
593 }
594 
595 // Equation of line: f(x, y) = a[0]*(x - a[2]*w/8) + a[1]*(y - a[3]*h/8) = 0
init_all_wedge_masks(void)596 static void init_all_wedge_masks(void) {
597   init_wedge_master_masks();
598   init_wedge_masks();
599   init_smooth_interintra_masks();
600 }
601 
av1_init_wedge_masks(void)602 void av1_init_wedge_masks(void) { aom_once(init_all_wedge_masks); }
603 
build_masked_compound_no_round(uint8_t * dst,int dst_stride,const CONV_BUF_TYPE * src0,int src0_stride,const CONV_BUF_TYPE * src1,int src1_stride,const INTERINTER_COMPOUND_DATA * const comp_data,BLOCK_SIZE sb_type,int h,int w,InterPredParams * inter_pred_params)604 static inline void build_masked_compound_no_round(
605     uint8_t *dst, int dst_stride, const CONV_BUF_TYPE *src0, int src0_stride,
606     const CONV_BUF_TYPE *src1, int src1_stride,
607     const INTERINTER_COMPOUND_DATA *const comp_data, BLOCK_SIZE sb_type, int h,
608     int w, InterPredParams *inter_pred_params) {
609   const int ssy = inter_pred_params->subsampling_y;
610   const int ssx = inter_pred_params->subsampling_x;
611   const uint8_t *mask = av1_get_compound_type_mask(comp_data, sb_type);
612   const int mask_stride = block_size_wide[sb_type];
613 #if CONFIG_AV1_HIGHBITDEPTH
614   if (inter_pred_params->use_hbd_buf) {
615     aom_highbd_blend_a64_d16_mask(dst, dst_stride, src0, src0_stride, src1,
616                                   src1_stride, mask, mask_stride, w, h, ssx,
617                                   ssy, &inter_pred_params->conv_params,
618                                   inter_pred_params->bit_depth);
619   } else {
620     aom_lowbd_blend_a64_d16_mask(dst, dst_stride, src0, src0_stride, src1,
621                                  src1_stride, mask, mask_stride, w, h, ssx, ssy,
622                                  &inter_pred_params->conv_params);
623   }
624 #else
625   aom_lowbd_blend_a64_d16_mask(dst, dst_stride, src0, src0_stride, src1,
626                                src1_stride, mask, mask_stride, w, h, ssx, ssy,
627                                &inter_pred_params->conv_params);
628 #endif
629 }
630 
av1_make_masked_inter_predictor(const uint8_t * pre,int pre_stride,uint8_t * dst,int dst_stride,InterPredParams * inter_pred_params,const SubpelParams * subpel_params)631 void av1_make_masked_inter_predictor(const uint8_t *pre, int pre_stride,
632                                      uint8_t *dst, int dst_stride,
633                                      InterPredParams *inter_pred_params,
634                                      const SubpelParams *subpel_params) {
635   const INTERINTER_COMPOUND_DATA *comp_data = &inter_pred_params->mask_comp;
636   BLOCK_SIZE sb_type = inter_pred_params->sb_type;
637 
638   // We're going to call av1_make_inter_predictor to generate a prediction into
639   // a temporary buffer, then will blend that temporary buffer with that from
640   // the other reference.
641   DECLARE_ALIGNED(32, uint8_t, tmp_buf[2 * MAX_SB_SQUARE]);
642   uint8_t *tmp_dst =
643       inter_pred_params->use_hbd_buf ? CONVERT_TO_BYTEPTR(tmp_buf) : tmp_buf;
644 
645   const int tmp_buf_stride = MAX_SB_SIZE;
646   CONV_BUF_TYPE *org_dst = inter_pred_params->conv_params.dst;
647   int org_dst_stride = inter_pred_params->conv_params.dst_stride;
648   CONV_BUF_TYPE *tmp_buf16 = (CONV_BUF_TYPE *)tmp_buf;
649   inter_pred_params->conv_params.dst = tmp_buf16;
650   inter_pred_params->conv_params.dst_stride = tmp_buf_stride;
651   assert(inter_pred_params->conv_params.do_average == 0);
652 
653   // This will generate a prediction in tmp_buf for the second reference
654   av1_make_inter_predictor(pre, pre_stride, tmp_dst, MAX_SB_SIZE,
655                            inter_pred_params, subpel_params);
656 
657   if (!inter_pred_params->conv_params.plane &&
658       comp_data->type == COMPOUND_DIFFWTD) {
659     av1_build_compound_diffwtd_mask_d16(
660         comp_data->seg_mask, comp_data->mask_type, org_dst, org_dst_stride,
661         tmp_buf16, tmp_buf_stride, inter_pred_params->block_height,
662         inter_pred_params->block_width, &inter_pred_params->conv_params,
663         inter_pred_params->bit_depth);
664   }
665   build_masked_compound_no_round(
666       dst, dst_stride, org_dst, org_dst_stride, tmp_buf16, tmp_buf_stride,
667       comp_data, sb_type, inter_pred_params->block_height,
668       inter_pred_params->block_width, inter_pred_params);
669 }
670 
av1_dist_wtd_comp_weight_assign(const AV1_COMMON * cm,const MB_MODE_INFO * mbmi,int * fwd_offset,int * bck_offset,int * use_dist_wtd_comp_avg,int is_compound)671 void av1_dist_wtd_comp_weight_assign(const AV1_COMMON *cm,
672                                      const MB_MODE_INFO *mbmi, int *fwd_offset,
673                                      int *bck_offset,
674                                      int *use_dist_wtd_comp_avg,
675                                      int is_compound) {
676   assert(fwd_offset != NULL && bck_offset != NULL);
677   if (!is_compound || mbmi->compound_idx) {
678     *fwd_offset = 8;
679     *bck_offset = 8;
680     *use_dist_wtd_comp_avg = 0;
681     return;
682   }
683 
684   *use_dist_wtd_comp_avg = 1;
685   const RefCntBuffer *const bck_buf = get_ref_frame_buf(cm, mbmi->ref_frame[0]);
686   const RefCntBuffer *const fwd_buf = get_ref_frame_buf(cm, mbmi->ref_frame[1]);
687   const int cur_frame_index = cm->cur_frame->order_hint;
688   int bck_frame_index = 0, fwd_frame_index = 0;
689 
690   if (bck_buf != NULL) bck_frame_index = bck_buf->order_hint;
691   if (fwd_buf != NULL) fwd_frame_index = fwd_buf->order_hint;
692 
693   int d0 = clamp(abs(get_relative_dist(&cm->seq_params->order_hint_info,
694                                        fwd_frame_index, cur_frame_index)),
695                  0, MAX_FRAME_DISTANCE);
696   int d1 = clamp(abs(get_relative_dist(&cm->seq_params->order_hint_info,
697                                        cur_frame_index, bck_frame_index)),
698                  0, MAX_FRAME_DISTANCE);
699 
700   const int order = d0 <= d1;
701 
702   if (d0 == 0 || d1 == 0) {
703     *fwd_offset = quant_dist_lookup_table[3][order];
704     *bck_offset = quant_dist_lookup_table[3][1 - order];
705     return;
706   }
707 
708   int i;
709   for (i = 0; i < 3; ++i) {
710     int c0 = quant_dist_weight[i][order];
711     int c1 = quant_dist_weight[i][!order];
712     int d0_c0 = d0 * c0;
713     int d1_c1 = d1 * c1;
714     if ((d0 > d1 && d0_c0 < d1_c1) || (d0 <= d1 && d0_c0 > d1_c1)) break;
715   }
716 
717   *fwd_offset = quant_dist_lookup_table[i][order];
718   *bck_offset = quant_dist_lookup_table[i][1 - order];
719 }
720 
av1_setup_dst_planes(struct macroblockd_plane * planes,BLOCK_SIZE bsize,const YV12_BUFFER_CONFIG * src,int mi_row,int mi_col,const int plane_start,const int plane_end)721 void av1_setup_dst_planes(struct macroblockd_plane *planes, BLOCK_SIZE bsize,
722                           const YV12_BUFFER_CONFIG *src, int mi_row, int mi_col,
723                           const int plane_start, const int plane_end) {
724   // We use AOMMIN(num_planes, MAX_MB_PLANE) instead of num_planes to quiet
725   // the static analysis warnings.
726   for (int i = plane_start; i < AOMMIN(plane_end, MAX_MB_PLANE); ++i) {
727     struct macroblockd_plane *const pd = &planes[i];
728     const int is_uv = i > 0;
729     setup_pred_plane(&pd->dst, bsize, src->buffers[i], src->crop_widths[is_uv],
730                      src->crop_heights[is_uv], src->strides[is_uv], mi_row,
731                      mi_col, NULL, pd->subsampling_x, pd->subsampling_y);
732   }
733 }
734 
av1_setup_pre_planes(MACROBLOCKD * xd,int idx,const YV12_BUFFER_CONFIG * src,int mi_row,int mi_col,const struct scale_factors * sf,const int num_planes)735 void av1_setup_pre_planes(MACROBLOCKD *xd, int idx,
736                           const YV12_BUFFER_CONFIG *src, int mi_row, int mi_col,
737                           const struct scale_factors *sf,
738                           const int num_planes) {
739   if (src != NULL) {
740     // We use AOMMIN(num_planes, MAX_MB_PLANE) instead of num_planes to quiet
741     // the static analysis warnings.
742     for (int i = 0; i < AOMMIN(num_planes, MAX_MB_PLANE); ++i) {
743       struct macroblockd_plane *const pd = &xd->plane[i];
744       const int is_uv = i > 0;
745       setup_pred_plane(&pd->pre[idx], xd->mi[0]->bsize, src->buffers[i],
746                        src->crop_widths[is_uv], src->crop_heights[is_uv],
747                        src->strides[is_uv], mi_row, mi_col, sf,
748                        pd->subsampling_x, pd->subsampling_y);
749     }
750   }
751 }
752 
753 // obmc_mask_N[overlap_position]
754 static const uint8_t obmc_mask_1[1] = { 64 };
755 DECLARE_ALIGNED(2, static const uint8_t, obmc_mask_2[2]) = { 45, 64 };
756 
757 DECLARE_ALIGNED(4, static const uint8_t, obmc_mask_4[4]) = { 39, 50, 59, 64 };
758 
759 static const uint8_t obmc_mask_8[8] = { 36, 42, 48, 53, 57, 61, 64, 64 };
760 
761 static const uint8_t obmc_mask_16[16] = { 34, 37, 40, 43, 46, 49, 52, 54,
762                                           56, 58, 60, 61, 64, 64, 64, 64 };
763 
764 static const uint8_t obmc_mask_32[32] = { 33, 35, 36, 38, 40, 41, 43, 44,
765                                           45, 47, 48, 50, 51, 52, 53, 55,
766                                           56, 57, 58, 59, 60, 60, 61, 62,
767                                           64, 64, 64, 64, 64, 64, 64, 64 };
768 
769 static const uint8_t obmc_mask_64[64] = {
770   33, 34, 35, 35, 36, 37, 38, 39, 40, 40, 41, 42, 43, 44, 44, 44,
771   45, 46, 47, 47, 48, 49, 50, 51, 51, 51, 52, 52, 53, 54, 55, 56,
772   56, 56, 57, 57, 58, 58, 59, 60, 60, 60, 60, 60, 61, 62, 62, 62,
773   62, 62, 63, 63, 63, 63, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
774 };
775 
av1_get_obmc_mask(int length)776 const uint8_t *av1_get_obmc_mask(int length) {
777   switch (length) {
778     case 1: return obmc_mask_1;
779     case 2: return obmc_mask_2;
780     case 4: return obmc_mask_4;
781     case 8: return obmc_mask_8;
782     case 16: return obmc_mask_16;
783     case 32: return obmc_mask_32;
784     case 64: return obmc_mask_64;
785     default: assert(0); return NULL;
786   }
787 }
788 
increment_int_ptr(MACROBLOCKD * xd,int rel_mi_row,int rel_mi_col,uint8_t op_mi_size,int dir,MB_MODE_INFO * mi,void * fun_ctxt,const int num_planes)789 static inline void increment_int_ptr(MACROBLOCKD *xd, int rel_mi_row,
790                                      int rel_mi_col, uint8_t op_mi_size,
791                                      int dir, MB_MODE_INFO *mi, void *fun_ctxt,
792                                      const int num_planes) {
793   (void)xd;
794   (void)rel_mi_row;
795   (void)rel_mi_col;
796   (void)op_mi_size;
797   (void)dir;
798   (void)mi;
799   ++*(uint8_t *)fun_ctxt;
800   (void)num_planes;
801 }
802 
av1_count_overlappable_neighbors(const AV1_COMMON * cm,MACROBLOCKD * xd)803 void av1_count_overlappable_neighbors(const AV1_COMMON *cm, MACROBLOCKD *xd) {
804   MB_MODE_INFO *mbmi = xd->mi[0];
805 
806   mbmi->overlappable_neighbors = 0;
807 
808   if (!is_motion_variation_allowed_bsize(mbmi->bsize)) return;
809 
810   foreach_overlappable_nb_above(cm, xd, INT_MAX, increment_int_ptr,
811                                 &mbmi->overlappable_neighbors);
812   if (mbmi->overlappable_neighbors) return;
813   foreach_overlappable_nb_left(cm, xd, INT_MAX, increment_int_ptr,
814                                &mbmi->overlappable_neighbors);
815 }
816 
817 // HW does not support < 4x4 prediction. To limit the bandwidth requirement, if
818 // block-size of current plane is smaller than 8x8, always only blend with the
819 // left neighbor(s) (skip blending with the above side).
820 #define DISABLE_CHROMA_U8X8_OBMC 0  // 0: one-sided obmc; 1: disable
821 
av1_skip_u4x4_pred_in_obmc(BLOCK_SIZE bsize,const struct macroblockd_plane * pd,int dir)822 int av1_skip_u4x4_pred_in_obmc(BLOCK_SIZE bsize,
823                                const struct macroblockd_plane *pd, int dir) {
824   assert(is_motion_variation_allowed_bsize(bsize));
825 
826   const BLOCK_SIZE bsize_plane =
827       get_plane_block_size(bsize, pd->subsampling_x, pd->subsampling_y);
828   switch (bsize_plane) {
829 #if DISABLE_CHROMA_U8X8_OBMC
830     case BLOCK_4X4:
831     case BLOCK_8X4:
832     case BLOCK_4X8: return 1;
833 #else
834     case BLOCK_4X4:
835     case BLOCK_8X4:
836     case BLOCK_4X8: return dir == 0;
837 #endif
838     default: return 0;
839   }
840 }
841 
842 #if CONFIG_AV1_DECODER
modify_neighbor_predictor_for_obmc(MB_MODE_INFO * mbmi)843 static void modify_neighbor_predictor_for_obmc(MB_MODE_INFO *mbmi) {
844   mbmi->ref_frame[1] = NONE_FRAME;
845   mbmi->interinter_comp.type = COMPOUND_AVERAGE;
846 }
847 #endif  // CONFIG_AV1_DECODER
848 
849 struct obmc_inter_pred_ctxt {
850   uint8_t **adjacent;
851   int *adjacent_stride;
852 };
853 
build_obmc_inter_pred_above(MACROBLOCKD * xd,int rel_mi_row,int rel_mi_col,uint8_t op_mi_size,int dir,MB_MODE_INFO * above_mi,void * fun_ctxt,const int num_planes)854 static inline void build_obmc_inter_pred_above(
855     MACROBLOCKD *xd, int rel_mi_row, int rel_mi_col, uint8_t op_mi_size,
856     int dir, MB_MODE_INFO *above_mi, void *fun_ctxt, const int num_planes) {
857   (void)above_mi;
858   (void)rel_mi_row;
859   (void)dir;
860   struct obmc_inter_pred_ctxt *ctxt = (struct obmc_inter_pred_ctxt *)fun_ctxt;
861   const BLOCK_SIZE bsize = xd->mi[0]->bsize;
862   const int overlap =
863       AOMMIN(block_size_high[bsize], block_size_high[BLOCK_64X64]) >> 1;
864 
865   for (int plane = 0; plane < num_planes; ++plane) {
866     const struct macroblockd_plane *pd = &xd->plane[plane];
867     const int bw = (op_mi_size * MI_SIZE) >> pd->subsampling_x;
868     const int bh = overlap >> pd->subsampling_y;
869     const int plane_col = (rel_mi_col * MI_SIZE) >> pd->subsampling_x;
870 
871     if (av1_skip_u4x4_pred_in_obmc(bsize, pd, 0)) continue;
872 
873     const int dst_stride = pd->dst.stride;
874     uint8_t *const dst = &pd->dst.buf[plane_col];
875     const int tmp_stride = ctxt->adjacent_stride[plane];
876     const uint8_t *const tmp = &ctxt->adjacent[plane][plane_col];
877     const uint8_t *const mask = av1_get_obmc_mask(bh);
878 #if CONFIG_AV1_HIGHBITDEPTH
879     const int is_hbd = is_cur_buf_hbd(xd);
880     if (is_hbd)
881       aom_highbd_blend_a64_vmask(dst, dst_stride, dst, dst_stride, tmp,
882                                  tmp_stride, mask, bw, bh, xd->bd);
883     else
884       aom_blend_a64_vmask(dst, dst_stride, dst, dst_stride, tmp, tmp_stride,
885                           mask, bw, bh);
886 #else
887     aom_blend_a64_vmask(dst, dst_stride, dst, dst_stride, tmp, tmp_stride, mask,
888                         bw, bh);
889 #endif
890   }
891 }
892 
build_obmc_inter_pred_left(MACROBLOCKD * xd,int rel_mi_row,int rel_mi_col,uint8_t op_mi_size,int dir,MB_MODE_INFO * left_mi,void * fun_ctxt,const int num_planes)893 static inline void build_obmc_inter_pred_left(
894     MACROBLOCKD *xd, int rel_mi_row, int rel_mi_col, uint8_t op_mi_size,
895     int dir, MB_MODE_INFO *left_mi, void *fun_ctxt, const int num_planes) {
896   (void)left_mi;
897   (void)rel_mi_col;
898   (void)dir;
899   struct obmc_inter_pred_ctxt *ctxt = (struct obmc_inter_pred_ctxt *)fun_ctxt;
900   const BLOCK_SIZE bsize = xd->mi[0]->bsize;
901   const int overlap =
902       AOMMIN(block_size_wide[bsize], block_size_wide[BLOCK_64X64]) >> 1;
903 
904   for (int plane = 0; plane < num_planes; ++plane) {
905     const struct macroblockd_plane *pd = &xd->plane[plane];
906     const int bw = overlap >> pd->subsampling_x;
907     const int bh = (op_mi_size * MI_SIZE) >> pd->subsampling_y;
908     const int plane_row = (rel_mi_row * MI_SIZE) >> pd->subsampling_y;
909 
910     if (av1_skip_u4x4_pred_in_obmc(bsize, pd, 1)) continue;
911 
912     const int dst_stride = pd->dst.stride;
913     uint8_t *const dst = &pd->dst.buf[plane_row * dst_stride];
914     const int tmp_stride = ctxt->adjacent_stride[plane];
915     const uint8_t *const tmp = &ctxt->adjacent[plane][plane_row * tmp_stride];
916     const uint8_t *const mask = av1_get_obmc_mask(bw);
917 
918 #if CONFIG_AV1_HIGHBITDEPTH
919     const int is_hbd = is_cur_buf_hbd(xd);
920     if (is_hbd)
921       aom_highbd_blend_a64_hmask(dst, dst_stride, dst, dst_stride, tmp,
922                                  tmp_stride, mask, bw, bh, xd->bd);
923     else
924       aom_blend_a64_hmask(dst, dst_stride, dst, dst_stride, tmp, tmp_stride,
925                           mask, bw, bh);
926 #else
927     aom_blend_a64_hmask(dst, dst_stride, dst, dst_stride, tmp, tmp_stride, mask,
928                         bw, bh);
929 #endif
930   }
931 }
932 
933 // This function combines motion compensated predictions that are generated by
934 // top/left neighboring blocks' inter predictors with the regular inter
935 // prediction. We assume the original prediction (bmc) is stored in
936 // xd->plane[].dst.buf
av1_build_obmc_inter_prediction(const AV1_COMMON * cm,MACROBLOCKD * xd,uint8_t * above[MAX_MB_PLANE],int above_stride[MAX_MB_PLANE],uint8_t * left[MAX_MB_PLANE],int left_stride[MAX_MB_PLANE])937 void av1_build_obmc_inter_prediction(const AV1_COMMON *cm, MACROBLOCKD *xd,
938                                      uint8_t *above[MAX_MB_PLANE],
939                                      int above_stride[MAX_MB_PLANE],
940                                      uint8_t *left[MAX_MB_PLANE],
941                                      int left_stride[MAX_MB_PLANE]) {
942   const BLOCK_SIZE bsize = xd->mi[0]->bsize;
943 
944   // handle above row
945   struct obmc_inter_pred_ctxt ctxt_above = { above, above_stride };
946   foreach_overlappable_nb_above(cm, xd,
947                                 max_neighbor_obmc[mi_size_wide_log2[bsize]],
948                                 build_obmc_inter_pred_above, &ctxt_above);
949 
950   // handle left column
951   struct obmc_inter_pred_ctxt ctxt_left = { left, left_stride };
952   foreach_overlappable_nb_left(cm, xd,
953                                max_neighbor_obmc[mi_size_high_log2[bsize]],
954                                build_obmc_inter_pred_left, &ctxt_left);
955 }
956 
av1_setup_obmc_dst_bufs(MACROBLOCKD * xd,uint8_t ** dst_buf1,uint8_t ** dst_buf2)957 void av1_setup_obmc_dst_bufs(MACROBLOCKD *xd, uint8_t **dst_buf1,
958                              uint8_t **dst_buf2) {
959   if (is_cur_buf_hbd(xd)) {
960     int len = sizeof(uint16_t);
961     dst_buf1[0] = CONVERT_TO_BYTEPTR(xd->tmp_obmc_bufs[0]);
962     dst_buf1[1] =
963         CONVERT_TO_BYTEPTR(xd->tmp_obmc_bufs[0] + MAX_SB_SQUARE * len);
964     dst_buf1[2] =
965         CONVERT_TO_BYTEPTR(xd->tmp_obmc_bufs[0] + MAX_SB_SQUARE * 2 * len);
966     dst_buf2[0] = CONVERT_TO_BYTEPTR(xd->tmp_obmc_bufs[1]);
967     dst_buf2[1] =
968         CONVERT_TO_BYTEPTR(xd->tmp_obmc_bufs[1] + MAX_SB_SQUARE * len);
969     dst_buf2[2] =
970         CONVERT_TO_BYTEPTR(xd->tmp_obmc_bufs[1] + MAX_SB_SQUARE * 2 * len);
971   } else {
972     dst_buf1[0] = xd->tmp_obmc_bufs[0];
973     dst_buf1[1] = xd->tmp_obmc_bufs[0] + MAX_SB_SQUARE;
974     dst_buf1[2] = xd->tmp_obmc_bufs[0] + MAX_SB_SQUARE * 2;
975     dst_buf2[0] = xd->tmp_obmc_bufs[1];
976     dst_buf2[1] = xd->tmp_obmc_bufs[1] + MAX_SB_SQUARE;
977     dst_buf2[2] = xd->tmp_obmc_bufs[1] + MAX_SB_SQUARE * 2;
978   }
979 }
980 
981 #if CONFIG_AV1_DECODER
av1_setup_build_prediction_by_above_pred(MACROBLOCKD * xd,int rel_mi_col,uint8_t above_mi_width,MB_MODE_INFO * above_mbmi,struct build_prediction_ctxt * ctxt,const int num_planes)982 void av1_setup_build_prediction_by_above_pred(
983     MACROBLOCKD *xd, int rel_mi_col, uint8_t above_mi_width,
984     MB_MODE_INFO *above_mbmi, struct build_prediction_ctxt *ctxt,
985     const int num_planes) {
986   const BLOCK_SIZE a_bsize = AOMMAX(BLOCK_8X8, above_mbmi->bsize);
987   const int above_mi_col = xd->mi_col + rel_mi_col;
988 
989   modify_neighbor_predictor_for_obmc(above_mbmi);
990 
991   for (int j = 0; j < num_planes; ++j) {
992     struct macroblockd_plane *const pd = &xd->plane[j];
993     setup_pred_plane(&pd->dst, a_bsize, ctxt->tmp_buf[j], ctxt->tmp_width[j],
994                      ctxt->tmp_height[j], ctxt->tmp_stride[j], 0, rel_mi_col,
995                      NULL, pd->subsampling_x, pd->subsampling_y);
996   }
997 
998   const int num_refs = 1 + has_second_ref(above_mbmi);
999 
1000   for (int ref = 0; ref < num_refs; ++ref) {
1001     const MV_REFERENCE_FRAME frame = above_mbmi->ref_frame[ref];
1002 
1003     const RefCntBuffer *const ref_buf = get_ref_frame_buf(ctxt->cm, frame);
1004     const struct scale_factors *const sf =
1005         get_ref_scale_factors_const(ctxt->cm, frame);
1006     xd->block_ref_scale_factors[ref] = sf;
1007     if ((!av1_is_valid_scale(sf)))
1008       aom_internal_error(xd->error_info, AOM_CODEC_UNSUP_BITSTREAM,
1009                          "Reference frame has invalid dimensions");
1010     av1_setup_pre_planes(xd, ref, &ref_buf->buf, xd->mi_row, above_mi_col, sf,
1011                          num_planes);
1012   }
1013 
1014   xd->mb_to_left_edge = 8 * MI_SIZE * (-above_mi_col);
1015   xd->mb_to_right_edge =
1016       ctxt->mb_to_far_edge +
1017       (xd->width - rel_mi_col - above_mi_width) * MI_SIZE * 8;
1018 }
1019 
av1_setup_build_prediction_by_left_pred(MACROBLOCKD * xd,int rel_mi_row,uint8_t left_mi_height,MB_MODE_INFO * left_mbmi,struct build_prediction_ctxt * ctxt,const int num_planes)1020 void av1_setup_build_prediction_by_left_pred(MACROBLOCKD *xd, int rel_mi_row,
1021                                              uint8_t left_mi_height,
1022                                              MB_MODE_INFO *left_mbmi,
1023                                              struct build_prediction_ctxt *ctxt,
1024                                              const int num_planes) {
1025   const BLOCK_SIZE l_bsize = AOMMAX(BLOCK_8X8, left_mbmi->bsize);
1026   const int left_mi_row = xd->mi_row + rel_mi_row;
1027 
1028   modify_neighbor_predictor_for_obmc(left_mbmi);
1029 
1030   for (int j = 0; j < num_planes; ++j) {
1031     struct macroblockd_plane *const pd = &xd->plane[j];
1032     setup_pred_plane(&pd->dst, l_bsize, ctxt->tmp_buf[j], ctxt->tmp_width[j],
1033                      ctxt->tmp_height[j], ctxt->tmp_stride[j], rel_mi_row, 0,
1034                      NULL, pd->subsampling_x, pd->subsampling_y);
1035   }
1036 
1037   const int num_refs = 1 + has_second_ref(left_mbmi);
1038 
1039   for (int ref = 0; ref < num_refs; ++ref) {
1040     const MV_REFERENCE_FRAME frame = left_mbmi->ref_frame[ref];
1041 
1042     const RefCntBuffer *const ref_buf = get_ref_frame_buf(ctxt->cm, frame);
1043     const struct scale_factors *const ref_scale_factors =
1044         get_ref_scale_factors_const(ctxt->cm, frame);
1045 
1046     xd->block_ref_scale_factors[ref] = ref_scale_factors;
1047     if ((!av1_is_valid_scale(ref_scale_factors)))
1048       aom_internal_error(xd->error_info, AOM_CODEC_UNSUP_BITSTREAM,
1049                          "Reference frame has invalid dimensions");
1050     av1_setup_pre_planes(xd, ref, &ref_buf->buf, left_mi_row, xd->mi_col,
1051                          ref_scale_factors, num_planes);
1052   }
1053 
1054   xd->mb_to_top_edge = GET_MV_SUBPEL(MI_SIZE * (-left_mi_row));
1055   xd->mb_to_bottom_edge =
1056       ctxt->mb_to_far_edge +
1057       GET_MV_SUBPEL((xd->height - rel_mi_row - left_mi_height) * MI_SIZE);
1058 }
1059 #endif  // CONFIG_AV1_DECODER
1060 
combine_interintra(INTERINTRA_MODE mode,int8_t use_wedge_interintra,int8_t wedge_index,int8_t wedge_sign,BLOCK_SIZE bsize,BLOCK_SIZE plane_bsize,uint8_t * comppred,int compstride,const uint8_t * interpred,int interstride,const uint8_t * intrapred,int intrastride)1061 static inline void combine_interintra(
1062     INTERINTRA_MODE mode, int8_t use_wedge_interintra, int8_t wedge_index,
1063     int8_t wedge_sign, BLOCK_SIZE bsize, BLOCK_SIZE plane_bsize,
1064     uint8_t *comppred, int compstride, const uint8_t *interpred,
1065     int interstride, const uint8_t *intrapred, int intrastride) {
1066   const int bw = block_size_wide[plane_bsize];
1067   const int bh = block_size_high[plane_bsize];
1068 
1069   if (use_wedge_interintra) {
1070     if (av1_is_wedge_used(bsize)) {
1071       const uint8_t *mask =
1072           av1_get_contiguous_soft_mask(wedge_index, wedge_sign, bsize);
1073       const int subw = 2 * mi_size_wide[bsize] == bw;
1074       const int subh = 2 * mi_size_high[bsize] == bh;
1075       aom_blend_a64_mask(comppred, compstride, intrapred, intrastride,
1076                          interpred, interstride, mask, block_size_wide[bsize],
1077                          bw, bh, subw, subh);
1078     }
1079     return;
1080   }
1081 
1082   const uint8_t *mask = smooth_interintra_mask_buf[mode][plane_bsize];
1083   aom_blend_a64_mask(comppred, compstride, intrapred, intrastride, interpred,
1084                      interstride, mask, bw, bw, bh, 0, 0);
1085 }
1086 
1087 #if CONFIG_AV1_HIGHBITDEPTH
combine_interintra_highbd(INTERINTRA_MODE mode,int8_t use_wedge_interintra,int8_t wedge_index,int8_t wedge_sign,BLOCK_SIZE bsize,BLOCK_SIZE plane_bsize,uint8_t * comppred8,int compstride,const uint8_t * interpred8,int interstride,const uint8_t * intrapred8,int intrastride,int bd)1088 static inline void combine_interintra_highbd(
1089     INTERINTRA_MODE mode, int8_t use_wedge_interintra, int8_t wedge_index,
1090     int8_t wedge_sign, BLOCK_SIZE bsize, BLOCK_SIZE plane_bsize,
1091     uint8_t *comppred8, int compstride, const uint8_t *interpred8,
1092     int interstride, const uint8_t *intrapred8, int intrastride, int bd) {
1093   const int bw = block_size_wide[plane_bsize];
1094   const int bh = block_size_high[plane_bsize];
1095 
1096   if (use_wedge_interintra) {
1097     if (av1_is_wedge_used(bsize)) {
1098       const uint8_t *mask =
1099           av1_get_contiguous_soft_mask(wedge_index, wedge_sign, bsize);
1100       const int subh = 2 * mi_size_high[bsize] == bh;
1101       const int subw = 2 * mi_size_wide[bsize] == bw;
1102       aom_highbd_blend_a64_mask(comppred8, compstride, intrapred8, intrastride,
1103                                 interpred8, interstride, mask,
1104                                 block_size_wide[bsize], bw, bh, subw, subh, bd);
1105     }
1106     return;
1107   }
1108 
1109   uint8_t mask[MAX_SB_SQUARE];
1110   build_smooth_interintra_mask(mask, bw, plane_bsize, mode);
1111   aom_highbd_blend_a64_mask(comppred8, compstride, intrapred8, intrastride,
1112                             interpred8, interstride, mask, bw, bw, bh, 0, 0,
1113                             bd);
1114 }
1115 #endif
1116 
av1_build_intra_predictors_for_interintra(const AV1_COMMON * cm,MACROBLOCKD * xd,BLOCK_SIZE bsize,int plane,const BUFFER_SET * ctx,uint8_t * dst,int dst_stride)1117 void av1_build_intra_predictors_for_interintra(const AV1_COMMON *cm,
1118                                                MACROBLOCKD *xd,
1119                                                BLOCK_SIZE bsize, int plane,
1120                                                const BUFFER_SET *ctx,
1121                                                uint8_t *dst, int dst_stride) {
1122   struct macroblockd_plane *const pd = &xd->plane[plane];
1123   const int ssx = xd->plane[plane].subsampling_x;
1124   const int ssy = xd->plane[plane].subsampling_y;
1125   BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, ssx, ssy);
1126   PREDICTION_MODE mode = interintra_to_intra_mode[xd->mi[0]->interintra_mode];
1127   assert(xd->mi[0]->angle_delta[PLANE_TYPE_Y] == 0);
1128   assert(xd->mi[0]->angle_delta[PLANE_TYPE_UV] == 0);
1129   assert(xd->mi[0]->filter_intra_mode_info.use_filter_intra == 0);
1130   assert(xd->mi[0]->use_intrabc == 0);
1131   const SequenceHeader *seq_params = cm->seq_params;
1132 
1133   av1_predict_intra_block(xd, seq_params->sb_size,
1134                           seq_params->enable_intra_edge_filter, pd->width,
1135                           pd->height, max_txsize_rect_lookup[plane_bsize], mode,
1136                           0, 0, FILTER_INTRA_MODES, ctx->plane[plane],
1137                           ctx->stride[plane], dst, dst_stride, 0, 0, plane);
1138 }
1139 
av1_combine_interintra(MACROBLOCKD * xd,BLOCK_SIZE bsize,int plane,const uint8_t * inter_pred,int inter_stride,const uint8_t * intra_pred,int intra_stride)1140 void av1_combine_interintra(MACROBLOCKD *xd, BLOCK_SIZE bsize, int plane,
1141                             const uint8_t *inter_pred, int inter_stride,
1142                             const uint8_t *intra_pred, int intra_stride) {
1143   const int ssx = xd->plane[plane].subsampling_x;
1144   const int ssy = xd->plane[plane].subsampling_y;
1145   const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, ssx, ssy);
1146 #if CONFIG_AV1_HIGHBITDEPTH
1147   if (is_cur_buf_hbd(xd)) {
1148     combine_interintra_highbd(
1149         xd->mi[0]->interintra_mode, xd->mi[0]->use_wedge_interintra,
1150         xd->mi[0]->interintra_wedge_index, INTERINTRA_WEDGE_SIGN, bsize,
1151         plane_bsize, xd->plane[plane].dst.buf, xd->plane[plane].dst.stride,
1152         inter_pred, inter_stride, intra_pred, intra_stride, xd->bd);
1153     return;
1154   }
1155 #endif
1156   combine_interintra(
1157       xd->mi[0]->interintra_mode, xd->mi[0]->use_wedge_interintra,
1158       xd->mi[0]->interintra_wedge_index, INTERINTRA_WEDGE_SIGN, bsize,
1159       plane_bsize, xd->plane[plane].dst.buf, xd->plane[plane].dst.stride,
1160       inter_pred, inter_stride, intra_pred, intra_stride);
1161 }
1162 
1163 // build interintra_predictors for one plane
av1_build_interintra_predictor(const AV1_COMMON * cm,MACROBLOCKD * xd,uint8_t * pred,int stride,const BUFFER_SET * ctx,int plane,BLOCK_SIZE bsize)1164 void av1_build_interintra_predictor(const AV1_COMMON *cm, MACROBLOCKD *xd,
1165                                     uint8_t *pred, int stride,
1166                                     const BUFFER_SET *ctx, int plane,
1167                                     BLOCK_SIZE bsize) {
1168   assert(bsize < BLOCK_SIZES_ALL);
1169   if (is_cur_buf_hbd(xd)) {
1170     DECLARE_ALIGNED(16, uint16_t, intrapredictor[MAX_SB_SQUARE]);
1171     av1_build_intra_predictors_for_interintra(
1172         cm, xd, bsize, plane, ctx, CONVERT_TO_BYTEPTR(intrapredictor),
1173         MAX_SB_SIZE);
1174     av1_combine_interintra(xd, bsize, plane, pred, stride,
1175                            CONVERT_TO_BYTEPTR(intrapredictor), MAX_SB_SIZE);
1176   } else {
1177     DECLARE_ALIGNED(16, uint8_t, intrapredictor[MAX_SB_SQUARE]);
1178     av1_build_intra_predictors_for_interintra(cm, xd, bsize, plane, ctx,
1179                                               intrapredictor, MAX_SB_SIZE);
1180     av1_combine_interintra(xd, bsize, plane, pred, stride, intrapredictor,
1181                            MAX_SB_SIZE);
1182   }
1183 }
1184