1 /*
2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved.
3 *
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 */
11
12 #include <assert.h>
13 #include <stdio.h>
14 #include <limits.h>
15
16 #include "config/aom_config.h"
17 #include "config/aom_dsp_rtcd.h"
18 #include "config/aom_scale_rtcd.h"
19
20 #include "aom/aom_integer.h"
21 #include "aom_dsp/blend.h"
22 #include "aom_ports/aom_once.h"
23
24 #include "av1/common/av1_common_int.h"
25 #include "av1/common/blockd.h"
26 #include "av1/common/mvref_common.h"
27 #include "av1/common/obmc.h"
28 #include "av1/common/reconinter.h"
29 #include "av1/common/reconintra.h"
30
31 // This function will determine whether or not to create a warped
32 // prediction.
allow_warp(const MB_MODE_INFO * const mbmi,const WarpTypesAllowed * const warp_types,const WarpedMotionParams * const gm_params,int build_for_obmc,const struct scale_factors * const sf,WarpedMotionParams * final_warp_params)33 static int allow_warp(const MB_MODE_INFO *const mbmi,
34 const WarpTypesAllowed *const warp_types,
35 const WarpedMotionParams *const gm_params,
36 int build_for_obmc, const struct scale_factors *const sf,
37 WarpedMotionParams *final_warp_params) {
38 // Note: As per the spec, we must test the fixed point scales here, which are
39 // at a higher precision (1 << 14) than the xs and ys in subpel_params (that
40 // have 1 << 10 precision).
41 if (av1_is_scaled(sf)) return 0;
42
43 if (final_warp_params != NULL) *final_warp_params = default_warp_params;
44
45 if (build_for_obmc) return 0;
46
47 if (warp_types->local_warp_allowed && !mbmi->wm_params.invalid) {
48 if (final_warp_params != NULL)
49 memcpy(final_warp_params, &mbmi->wm_params, sizeof(*final_warp_params));
50 return 1;
51 } else if (warp_types->global_warp_allowed && !gm_params->invalid) {
52 if (final_warp_params != NULL)
53 memcpy(final_warp_params, gm_params, sizeof(*final_warp_params));
54 return 1;
55 }
56
57 return 0;
58 }
59
av1_init_warp_params(InterPredParams * inter_pred_params,const WarpTypesAllowed * warp_types,int ref,const MACROBLOCKD * xd,const MB_MODE_INFO * mi)60 void av1_init_warp_params(InterPredParams *inter_pred_params,
61 const WarpTypesAllowed *warp_types, int ref,
62 const MACROBLOCKD *xd, const MB_MODE_INFO *mi) {
63 if (inter_pred_params->block_height < 8 || inter_pred_params->block_width < 8)
64 return;
65
66 if (xd->cur_frame_force_integer_mv) return;
67
68 if (allow_warp(mi, warp_types, &xd->global_motion[mi->ref_frame[ref]], 0,
69 inter_pred_params->scale_factors,
70 &inter_pred_params->warp_params)) {
71 #if CONFIG_REALTIME_ONLY && !CONFIG_AV1_DECODER
72 aom_internal_error(xd->error_info, AOM_CODEC_UNSUP_FEATURE,
73 "Warped motion is disabled in realtime only build.");
74 #endif // CONFIG_REALTIME_ONLY && !CONFIG_AV1_DECODER
75 inter_pred_params->mode = WARP_PRED;
76 }
77 }
78
av1_make_inter_predictor(const uint8_t * src,int src_stride,uint8_t * dst,int dst_stride,InterPredParams * inter_pred_params,const SubpelParams * subpel_params)79 void av1_make_inter_predictor(const uint8_t *src, int src_stride, uint8_t *dst,
80 int dst_stride,
81 InterPredParams *inter_pred_params,
82 const SubpelParams *subpel_params) {
83 assert(IMPLIES(inter_pred_params->conv_params.is_compound,
84 inter_pred_params->conv_params.dst != NULL));
85
86 if (inter_pred_params->mode == TRANSLATION_PRED) {
87 #if CONFIG_AV1_HIGHBITDEPTH
88 if (inter_pred_params->use_hbd_buf) {
89 highbd_inter_predictor(src, src_stride, dst, dst_stride, subpel_params,
90 inter_pred_params->block_width,
91 inter_pred_params->block_height,
92 &inter_pred_params->conv_params,
93 inter_pred_params->interp_filter_params,
94 inter_pred_params->bit_depth);
95 } else {
96 inter_predictor(src, src_stride, dst, dst_stride, subpel_params,
97 inter_pred_params->block_width,
98 inter_pred_params->block_height,
99 &inter_pred_params->conv_params,
100 inter_pred_params->interp_filter_params);
101 }
102 #else
103 inter_predictor(src, src_stride, dst, dst_stride, subpel_params,
104 inter_pred_params->block_width,
105 inter_pred_params->block_height,
106 &inter_pred_params->conv_params,
107 inter_pred_params->interp_filter_params);
108 #endif
109 }
110 #if !CONFIG_REALTIME_ONLY || CONFIG_AV1_DECODER
111 // TODO(jingning): av1_warp_plane() can be further cleaned up.
112 else if (inter_pred_params->mode == WARP_PRED) {
113 av1_warp_plane(
114 &inter_pred_params->warp_params, inter_pred_params->use_hbd_buf,
115 inter_pred_params->bit_depth, inter_pred_params->ref_frame_buf.buf0,
116 inter_pred_params->ref_frame_buf.width,
117 inter_pred_params->ref_frame_buf.height,
118 inter_pred_params->ref_frame_buf.stride, dst,
119 inter_pred_params->pix_col, inter_pred_params->pix_row,
120 inter_pred_params->block_width, inter_pred_params->block_height,
121 dst_stride, inter_pred_params->subsampling_x,
122 inter_pred_params->subsampling_y, &inter_pred_params->conv_params);
123 }
124 #endif // !CONFIG_REALTIME_ONLY || CONFIG_AV1_DECODER
125 else {
126 assert(0 && "Unsupported inter_pred_params->mode");
127 }
128 }
129
130 static const uint8_t wedge_master_oblique_odd[MASK_MASTER_SIZE] = {
131 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
132 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 6, 18,
133 37, 53, 60, 63, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
134 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
135 };
136 static const uint8_t wedge_master_oblique_even[MASK_MASTER_SIZE] = {
137 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
138 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 4, 11, 27,
139 46, 58, 62, 63, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
140 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
141 };
142 static const uint8_t wedge_master_vertical[MASK_MASTER_SIZE] = {
143 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
144 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 7, 21,
145 43, 57, 62, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
146 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
147 };
148
shift_copy(const uint8_t * src,uint8_t * dst,int shift,int width)149 static inline void shift_copy(const uint8_t *src, uint8_t *dst, int shift,
150 int width) {
151 if (shift >= 0) {
152 memcpy(dst + shift, src, width - shift);
153 memset(dst, src[0], shift);
154 } else {
155 shift = -shift;
156 memcpy(dst, src + shift, width - shift);
157 memset(dst + width - shift, src[width - 1], shift);
158 }
159 }
160
161 /* clang-format off */
162 DECLARE_ALIGNED(16, static uint8_t,
163 wedge_signflip_lookup[BLOCK_SIZES_ALL][MAX_WEDGE_TYPES]) = {
164 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, // not used
165 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, // not used
166 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, // not used
167 { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
168 { 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
169 { 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
170 { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
171 { 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
172 { 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
173 { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
174 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, // not used
175 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, // not used
176 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, // not used
177 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, // not used
178 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, // not used
179 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, // not used
180 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, // not used
181 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, // not used
182 { 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, },
183 { 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, },
184 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, // not used
185 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, // not used
186 };
187 /* clang-format on */
188
189 // [negative][direction]
190 DECLARE_ALIGNED(
191 16, static uint8_t,
192 wedge_mask_obl[2][WEDGE_DIRECTIONS][MASK_MASTER_SIZE * MASK_MASTER_SIZE]);
193
194 // 4 * MAX_WEDGE_SQUARE is an easy to compute and fairly tight upper bound
195 // on the sum of all mask sizes up to an including MAX_WEDGE_SQUARE.
196 DECLARE_ALIGNED(16, static uint8_t,
197 wedge_mask_buf[2 * MAX_WEDGE_TYPES * 4 * MAX_WEDGE_SQUARE]);
198
199 DECLARE_ALIGNED(16, static uint8_t,
200 smooth_interintra_mask_buf[INTERINTRA_MODES][BLOCK_SIZES_ALL]
201 [MAX_WEDGE_SQUARE]);
202
203 static wedge_masks_type wedge_masks[BLOCK_SIZES_ALL][2];
204
205 static const wedge_code_type wedge_codebook_16_hgtw[16] = {
206 { WEDGE_OBLIQUE27, 4, 4 }, { WEDGE_OBLIQUE63, 4, 4 },
207 { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
208 { WEDGE_HORIZONTAL, 4, 2 }, { WEDGE_HORIZONTAL, 4, 4 },
209 { WEDGE_HORIZONTAL, 4, 6 }, { WEDGE_VERTICAL, 4, 4 },
210 { WEDGE_OBLIQUE27, 4, 2 }, { WEDGE_OBLIQUE27, 4, 6 },
211 { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 },
212 { WEDGE_OBLIQUE63, 2, 4 }, { WEDGE_OBLIQUE63, 6, 4 },
213 { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 },
214 };
215
216 static const wedge_code_type wedge_codebook_16_hltw[16] = {
217 { WEDGE_OBLIQUE27, 4, 4 }, { WEDGE_OBLIQUE63, 4, 4 },
218 { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
219 { WEDGE_VERTICAL, 2, 4 }, { WEDGE_VERTICAL, 4, 4 },
220 { WEDGE_VERTICAL, 6, 4 }, { WEDGE_HORIZONTAL, 4, 4 },
221 { WEDGE_OBLIQUE27, 4, 2 }, { WEDGE_OBLIQUE27, 4, 6 },
222 { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 },
223 { WEDGE_OBLIQUE63, 2, 4 }, { WEDGE_OBLIQUE63, 6, 4 },
224 { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 },
225 };
226
227 static const wedge_code_type wedge_codebook_16_heqw[16] = {
228 { WEDGE_OBLIQUE27, 4, 4 }, { WEDGE_OBLIQUE63, 4, 4 },
229 { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
230 { WEDGE_HORIZONTAL, 4, 2 }, { WEDGE_HORIZONTAL, 4, 6 },
231 { WEDGE_VERTICAL, 2, 4 }, { WEDGE_VERTICAL, 6, 4 },
232 { WEDGE_OBLIQUE27, 4, 2 }, { WEDGE_OBLIQUE27, 4, 6 },
233 { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 },
234 { WEDGE_OBLIQUE63, 2, 4 }, { WEDGE_OBLIQUE63, 6, 4 },
235 { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 },
236 };
237
238 const wedge_params_type av1_wedge_params_lookup[BLOCK_SIZES_ALL] = {
239 { 0, NULL, NULL, NULL },
240 { 0, NULL, NULL, NULL },
241 { 0, NULL, NULL, NULL },
242 { MAX_WEDGE_TYPES, wedge_codebook_16_heqw, wedge_signflip_lookup[BLOCK_8X8],
243 wedge_masks[BLOCK_8X8] },
244 { MAX_WEDGE_TYPES, wedge_codebook_16_hgtw, wedge_signflip_lookup[BLOCK_8X16],
245 wedge_masks[BLOCK_8X16] },
246 { MAX_WEDGE_TYPES, wedge_codebook_16_hltw, wedge_signflip_lookup[BLOCK_16X8],
247 wedge_masks[BLOCK_16X8] },
248 { MAX_WEDGE_TYPES, wedge_codebook_16_heqw, wedge_signflip_lookup[BLOCK_16X16],
249 wedge_masks[BLOCK_16X16] },
250 { MAX_WEDGE_TYPES, wedge_codebook_16_hgtw, wedge_signflip_lookup[BLOCK_16X32],
251 wedge_masks[BLOCK_16X32] },
252 { MAX_WEDGE_TYPES, wedge_codebook_16_hltw, wedge_signflip_lookup[BLOCK_32X16],
253 wedge_masks[BLOCK_32X16] },
254 { MAX_WEDGE_TYPES, wedge_codebook_16_heqw, wedge_signflip_lookup[BLOCK_32X32],
255 wedge_masks[BLOCK_32X32] },
256 { 0, NULL, NULL, NULL },
257 { 0, NULL, NULL, NULL },
258 { 0, NULL, NULL, NULL },
259 { 0, NULL, NULL, NULL },
260 { 0, NULL, NULL, NULL },
261 { 0, NULL, NULL, NULL },
262 { 0, NULL, NULL, NULL },
263 { 0, NULL, NULL, NULL },
264 { MAX_WEDGE_TYPES, wedge_codebook_16_hgtw, wedge_signflip_lookup[BLOCK_8X32],
265 wedge_masks[BLOCK_8X32] },
266 { MAX_WEDGE_TYPES, wedge_codebook_16_hltw, wedge_signflip_lookup[BLOCK_32X8],
267 wedge_masks[BLOCK_32X8] },
268 { 0, NULL, NULL, NULL },
269 { 0, NULL, NULL, NULL },
270 };
271
get_wedge_mask_inplace(int wedge_index,int neg,BLOCK_SIZE sb_type)272 static const uint8_t *get_wedge_mask_inplace(int wedge_index, int neg,
273 BLOCK_SIZE sb_type) {
274 const uint8_t *master;
275 const int bh = block_size_high[sb_type];
276 const int bw = block_size_wide[sb_type];
277 const wedge_code_type *a =
278 av1_wedge_params_lookup[sb_type].codebook + wedge_index;
279 int woff, hoff;
280 const uint8_t wsignflip =
281 av1_wedge_params_lookup[sb_type].signflip[wedge_index];
282
283 assert(wedge_index >= 0 && wedge_index < get_wedge_types_lookup(sb_type));
284 woff = (a->x_offset * bw) >> 3;
285 hoff = (a->y_offset * bh) >> 3;
286 master = wedge_mask_obl[neg ^ wsignflip][a->direction] +
287 MASK_MASTER_STRIDE * (MASK_MASTER_SIZE / 2 - hoff) +
288 MASK_MASTER_SIZE / 2 - woff;
289 return master;
290 }
291
av1_get_compound_type_mask(const INTERINTER_COMPOUND_DATA * const comp_data,BLOCK_SIZE sb_type)292 const uint8_t *av1_get_compound_type_mask(
293 const INTERINTER_COMPOUND_DATA *const comp_data, BLOCK_SIZE sb_type) {
294 (void)sb_type;
295 switch (comp_data->type) {
296 case COMPOUND_WEDGE:
297 return av1_get_contiguous_soft_mask(comp_data->wedge_index,
298 comp_data->wedge_sign, sb_type);
299 default: return comp_data->seg_mask;
300 }
301 }
302
diffwtd_mask_d16(uint8_t * mask,int which_inverse,int mask_base,const CONV_BUF_TYPE * src0,int src0_stride,const CONV_BUF_TYPE * src1,int src1_stride,int h,int w,ConvolveParams * conv_params,int bd)303 static inline void diffwtd_mask_d16(uint8_t *mask, int which_inverse,
304 int mask_base, const CONV_BUF_TYPE *src0,
305 int src0_stride, const CONV_BUF_TYPE *src1,
306 int src1_stride, int h, int w,
307 ConvolveParams *conv_params, int bd) {
308 int round =
309 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1 + (bd - 8);
310 int i, j, m, diff;
311 for (i = 0; i < h; ++i) {
312 for (j = 0; j < w; ++j) {
313 diff = abs(src0[i * src0_stride + j] - src1[i * src1_stride + j]);
314 diff = ROUND_POWER_OF_TWO(diff, round);
315 m = clamp(mask_base + (diff / DIFF_FACTOR), 0, AOM_BLEND_A64_MAX_ALPHA);
316 mask[i * w + j] = which_inverse ? AOM_BLEND_A64_MAX_ALPHA - m : m;
317 }
318 }
319 }
320
av1_build_compound_diffwtd_mask_d16_c(uint8_t * mask,DIFFWTD_MASK_TYPE mask_type,const CONV_BUF_TYPE * src0,int src0_stride,const CONV_BUF_TYPE * src1,int src1_stride,int h,int w,ConvolveParams * conv_params,int bd)321 void av1_build_compound_diffwtd_mask_d16_c(
322 uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const CONV_BUF_TYPE *src0,
323 int src0_stride, const CONV_BUF_TYPE *src1, int src1_stride, int h, int w,
324 ConvolveParams *conv_params, int bd) {
325 switch (mask_type) {
326 case DIFFWTD_38:
327 diffwtd_mask_d16(mask, 0, 38, src0, src0_stride, src1, src1_stride, h, w,
328 conv_params, bd);
329 break;
330 case DIFFWTD_38_INV:
331 diffwtd_mask_d16(mask, 1, 38, src0, src0_stride, src1, src1_stride, h, w,
332 conv_params, bd);
333 break;
334 default: assert(0);
335 }
336 }
337
diffwtd_mask(uint8_t * mask,int which_inverse,int mask_base,const uint8_t * src0,int src0_stride,const uint8_t * src1,int src1_stride,int h,int w)338 static inline void diffwtd_mask(uint8_t *mask, int which_inverse, int mask_base,
339 const uint8_t *src0, int src0_stride,
340 const uint8_t *src1, int src1_stride, int h,
341 int w) {
342 int i, j, m, diff;
343 for (i = 0; i < h; ++i) {
344 for (j = 0; j < w; ++j) {
345 diff =
346 abs((int)src0[i * src0_stride + j] - (int)src1[i * src1_stride + j]);
347 m = clamp(mask_base + (diff / DIFF_FACTOR), 0, AOM_BLEND_A64_MAX_ALPHA);
348 mask[i * w + j] = which_inverse ? AOM_BLEND_A64_MAX_ALPHA - m : m;
349 }
350 }
351 }
352
av1_build_compound_diffwtd_mask_c(uint8_t * mask,DIFFWTD_MASK_TYPE mask_type,const uint8_t * src0,int src0_stride,const uint8_t * src1,int src1_stride,int h,int w)353 void av1_build_compound_diffwtd_mask_c(uint8_t *mask,
354 DIFFWTD_MASK_TYPE mask_type,
355 const uint8_t *src0, int src0_stride,
356 const uint8_t *src1, int src1_stride,
357 int h, int w) {
358 switch (mask_type) {
359 case DIFFWTD_38:
360 diffwtd_mask(mask, 0, 38, src0, src0_stride, src1, src1_stride, h, w);
361 break;
362 case DIFFWTD_38_INV:
363 diffwtd_mask(mask, 1, 38, src0, src0_stride, src1, src1_stride, h, w);
364 break;
365 default: assert(0);
366 }
367 }
368
369 #if CONFIG_AV1_HIGHBITDEPTH
diffwtd_mask_highbd(uint8_t * mask,int which_inverse,int mask_base,const uint16_t * src0,int src0_stride,const uint16_t * src1,int src1_stride,int h,int w,const unsigned int bd)370 static AOM_FORCE_INLINE void diffwtd_mask_highbd(
371 uint8_t *mask, int which_inverse, int mask_base, const uint16_t *src0,
372 int src0_stride, const uint16_t *src1, int src1_stride, int h, int w,
373 const unsigned int bd) {
374 assert(bd >= 8);
375 if (bd == 8) {
376 if (which_inverse) {
377 for (int i = 0; i < h; ++i) {
378 for (int j = 0; j < w; ++j) {
379 int diff = abs((int)src0[j] - (int)src1[j]) / DIFF_FACTOR;
380 unsigned int m = negative_to_zero(mask_base + diff);
381 m = AOMMIN(m, AOM_BLEND_A64_MAX_ALPHA);
382 mask[j] = AOM_BLEND_A64_MAX_ALPHA - m;
383 }
384 src0 += src0_stride;
385 src1 += src1_stride;
386 mask += w;
387 }
388 } else {
389 for (int i = 0; i < h; ++i) {
390 for (int j = 0; j < w; ++j) {
391 int diff = abs((int)src0[j] - (int)src1[j]) / DIFF_FACTOR;
392 unsigned int m = negative_to_zero(mask_base + diff);
393 m = AOMMIN(m, AOM_BLEND_A64_MAX_ALPHA);
394 mask[j] = m;
395 }
396 src0 += src0_stride;
397 src1 += src1_stride;
398 mask += w;
399 }
400 }
401 } else {
402 const unsigned int bd_shift = bd - 8;
403 if (which_inverse) {
404 for (int i = 0; i < h; ++i) {
405 for (int j = 0; j < w; ++j) {
406 int diff =
407 (abs((int)src0[j] - (int)src1[j]) >> bd_shift) / DIFF_FACTOR;
408 unsigned int m = negative_to_zero(mask_base + diff);
409 m = AOMMIN(m, AOM_BLEND_A64_MAX_ALPHA);
410 mask[j] = AOM_BLEND_A64_MAX_ALPHA - m;
411 }
412 src0 += src0_stride;
413 src1 += src1_stride;
414 mask += w;
415 }
416 } else {
417 for (int i = 0; i < h; ++i) {
418 for (int j = 0; j < w; ++j) {
419 int diff =
420 (abs((int)src0[j] - (int)src1[j]) >> bd_shift) / DIFF_FACTOR;
421 unsigned int m = negative_to_zero(mask_base + diff);
422 m = AOMMIN(m, AOM_BLEND_A64_MAX_ALPHA);
423 mask[j] = m;
424 }
425 src0 += src0_stride;
426 src1 += src1_stride;
427 mask += w;
428 }
429 }
430 }
431 }
432
av1_build_compound_diffwtd_mask_highbd_c(uint8_t * mask,DIFFWTD_MASK_TYPE mask_type,const uint8_t * src0,int src0_stride,const uint8_t * src1,int src1_stride,int h,int w,int bd)433 void av1_build_compound_diffwtd_mask_highbd_c(
434 uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const uint8_t *src0,
435 int src0_stride, const uint8_t *src1, int src1_stride, int h, int w,
436 int bd) {
437 switch (mask_type) {
438 case DIFFWTD_38:
439 diffwtd_mask_highbd(mask, 0, 38, CONVERT_TO_SHORTPTR(src0), src0_stride,
440 CONVERT_TO_SHORTPTR(src1), src1_stride, h, w, bd);
441 break;
442 case DIFFWTD_38_INV:
443 diffwtd_mask_highbd(mask, 1, 38, CONVERT_TO_SHORTPTR(src0), src0_stride,
444 CONVERT_TO_SHORTPTR(src1), src1_stride, h, w, bd);
445 break;
446 default: assert(0);
447 }
448 }
449 #endif // CONFIG_AV1_HIGHBITDEPTH
450
init_wedge_master_masks(void)451 static inline void init_wedge_master_masks(void) {
452 int i, j;
453 const int w = MASK_MASTER_SIZE;
454 const int h = MASK_MASTER_SIZE;
455 const int stride = MASK_MASTER_STRIDE;
456 // Note: index [0] stores the masters, and [1] its complement.
457 // Generate prototype by shifting the masters
458 int shift = h / 4;
459 for (i = 0; i < h; i += 2) {
460 shift_copy(wedge_master_oblique_even,
461 &wedge_mask_obl[0][WEDGE_OBLIQUE63][i * stride], shift,
462 MASK_MASTER_SIZE);
463 shift--;
464 shift_copy(wedge_master_oblique_odd,
465 &wedge_mask_obl[0][WEDGE_OBLIQUE63][(i + 1) * stride], shift,
466 MASK_MASTER_SIZE);
467 memcpy(&wedge_mask_obl[0][WEDGE_VERTICAL][i * stride],
468 wedge_master_vertical,
469 MASK_MASTER_SIZE * sizeof(wedge_master_vertical[0]));
470 memcpy(&wedge_mask_obl[0][WEDGE_VERTICAL][(i + 1) * stride],
471 wedge_master_vertical,
472 MASK_MASTER_SIZE * sizeof(wedge_master_vertical[0]));
473 }
474
475 for (i = 0; i < h; ++i) {
476 for (j = 0; j < w; ++j) {
477 const int msk = wedge_mask_obl[0][WEDGE_OBLIQUE63][i * stride + j];
478 wedge_mask_obl[0][WEDGE_OBLIQUE27][j * stride + i] = msk;
479 wedge_mask_obl[0][WEDGE_OBLIQUE117][i * stride + w - 1 - j] =
480 wedge_mask_obl[0][WEDGE_OBLIQUE153][(w - 1 - j) * stride + i] =
481 (1 << WEDGE_WEIGHT_BITS) - msk;
482 wedge_mask_obl[1][WEDGE_OBLIQUE63][i * stride + j] =
483 wedge_mask_obl[1][WEDGE_OBLIQUE27][j * stride + i] =
484 (1 << WEDGE_WEIGHT_BITS) - msk;
485 wedge_mask_obl[1][WEDGE_OBLIQUE117][i * stride + w - 1 - j] =
486 wedge_mask_obl[1][WEDGE_OBLIQUE153][(w - 1 - j) * stride + i] = msk;
487 const int mskx = wedge_mask_obl[0][WEDGE_VERTICAL][i * stride + j];
488 wedge_mask_obl[0][WEDGE_HORIZONTAL][j * stride + i] = mskx;
489 wedge_mask_obl[1][WEDGE_VERTICAL][i * stride + j] =
490 wedge_mask_obl[1][WEDGE_HORIZONTAL][j * stride + i] =
491 (1 << WEDGE_WEIGHT_BITS) - mskx;
492 }
493 }
494 }
495
init_wedge_masks(void)496 static inline void init_wedge_masks(void) {
497 uint8_t *dst = wedge_mask_buf;
498 BLOCK_SIZE bsize;
499 memset(wedge_masks, 0, sizeof(wedge_masks));
500 for (bsize = BLOCK_4X4; bsize < BLOCK_SIZES_ALL; ++bsize) {
501 const wedge_params_type *wedge_params = &av1_wedge_params_lookup[bsize];
502 const int wtypes = wedge_params->wedge_types;
503 if (wtypes == 0) continue;
504 const uint8_t *mask;
505 const int bw = block_size_wide[bsize];
506 const int bh = block_size_high[bsize];
507 int w;
508 for (w = 0; w < wtypes; ++w) {
509 mask = get_wedge_mask_inplace(w, 0, bsize);
510 aom_convolve_copy(mask, MASK_MASTER_STRIDE, dst, bw /* dst_stride */, bw,
511 bh);
512 wedge_params->masks[0][w] = dst;
513 dst += bw * bh;
514
515 mask = get_wedge_mask_inplace(w, 1, bsize);
516 aom_convolve_copy(mask, MASK_MASTER_STRIDE, dst, bw /* dst_stride */, bw,
517 bh);
518 wedge_params->masks[1][w] = dst;
519 dst += bw * bh;
520 }
521 assert(sizeof(wedge_mask_buf) >= (size_t)(dst - wedge_mask_buf));
522 }
523 }
524
525 /* clang-format off */
526 static const uint8_t ii_weights1d[MAX_SB_SIZE] = {
527 60, 58, 56, 54, 52, 50, 48, 47, 45, 44, 42, 41, 39, 38, 37, 35, 34, 33, 32,
528 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 22, 21, 20, 19, 19, 18, 18, 17, 16,
529 16, 15, 15, 14, 14, 13, 13, 12, 12, 12, 11, 11, 10, 10, 10, 9, 9, 9, 8,
530 8, 8, 8, 7, 7, 7, 7, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 4, 4,
531 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2,
532 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1,
533 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
534 };
535 static uint8_t ii_size_scales[BLOCK_SIZES_ALL] = {
536 32, 16, 16, 16, 8, 8, 8, 4,
537 4, 4, 2, 2, 2, 1, 1, 1,
538 8, 8, 4, 4, 2, 2
539 };
540 /* clang-format on */
541
build_smooth_interintra_mask(uint8_t * mask,int stride,BLOCK_SIZE plane_bsize,INTERINTRA_MODE mode)542 static inline void build_smooth_interintra_mask(uint8_t *mask, int stride,
543 BLOCK_SIZE plane_bsize,
544 INTERINTRA_MODE mode) {
545 int i, j;
546 const int bw = block_size_wide[plane_bsize];
547 const int bh = block_size_high[plane_bsize];
548 const int size_scale = ii_size_scales[plane_bsize];
549
550 switch (mode) {
551 case II_V_PRED:
552 for (i = 0; i < bh; ++i) {
553 memset(mask, ii_weights1d[i * size_scale], bw * sizeof(mask[0]));
554 mask += stride;
555 }
556 break;
557
558 case II_H_PRED:
559 for (i = 0; i < bh; ++i) {
560 for (j = 0; j < bw; ++j) mask[j] = ii_weights1d[j * size_scale];
561 mask += stride;
562 }
563 break;
564
565 case II_SMOOTH_PRED:
566 for (i = 0; i < bh; ++i) {
567 for (j = 0; j < bw; ++j)
568 mask[j] = ii_weights1d[(i < j ? i : j) * size_scale];
569 mask += stride;
570 }
571 break;
572
573 case II_DC_PRED:
574 default:
575 for (i = 0; i < bh; ++i) {
576 memset(mask, 32, bw * sizeof(mask[0]));
577 mask += stride;
578 }
579 break;
580 }
581 }
582
init_smooth_interintra_masks(void)583 static inline void init_smooth_interintra_masks(void) {
584 for (int m = 0; m < INTERINTRA_MODES; ++m) {
585 for (int bs = 0; bs < BLOCK_SIZES_ALL; ++bs) {
586 const int bw = block_size_wide[bs];
587 const int bh = block_size_high[bs];
588 if (bw > MAX_WEDGE_SIZE || bh > MAX_WEDGE_SIZE) continue;
589 build_smooth_interintra_mask(smooth_interintra_mask_buf[m][bs], bw, bs,
590 m);
591 }
592 }
593 }
594
595 // Equation of line: f(x, y) = a[0]*(x - a[2]*w/8) + a[1]*(y - a[3]*h/8) = 0
init_all_wedge_masks(void)596 static void init_all_wedge_masks(void) {
597 init_wedge_master_masks();
598 init_wedge_masks();
599 init_smooth_interintra_masks();
600 }
601
av1_init_wedge_masks(void)602 void av1_init_wedge_masks(void) { aom_once(init_all_wedge_masks); }
603
build_masked_compound_no_round(uint8_t * dst,int dst_stride,const CONV_BUF_TYPE * src0,int src0_stride,const CONV_BUF_TYPE * src1,int src1_stride,const INTERINTER_COMPOUND_DATA * const comp_data,BLOCK_SIZE sb_type,int h,int w,InterPredParams * inter_pred_params)604 static inline void build_masked_compound_no_round(
605 uint8_t *dst, int dst_stride, const CONV_BUF_TYPE *src0, int src0_stride,
606 const CONV_BUF_TYPE *src1, int src1_stride,
607 const INTERINTER_COMPOUND_DATA *const comp_data, BLOCK_SIZE sb_type, int h,
608 int w, InterPredParams *inter_pred_params) {
609 const int ssy = inter_pred_params->subsampling_y;
610 const int ssx = inter_pred_params->subsampling_x;
611 const uint8_t *mask = av1_get_compound_type_mask(comp_data, sb_type);
612 const int mask_stride = block_size_wide[sb_type];
613 #if CONFIG_AV1_HIGHBITDEPTH
614 if (inter_pred_params->use_hbd_buf) {
615 aom_highbd_blend_a64_d16_mask(dst, dst_stride, src0, src0_stride, src1,
616 src1_stride, mask, mask_stride, w, h, ssx,
617 ssy, &inter_pred_params->conv_params,
618 inter_pred_params->bit_depth);
619 } else {
620 aom_lowbd_blend_a64_d16_mask(dst, dst_stride, src0, src0_stride, src1,
621 src1_stride, mask, mask_stride, w, h, ssx, ssy,
622 &inter_pred_params->conv_params);
623 }
624 #else
625 aom_lowbd_blend_a64_d16_mask(dst, dst_stride, src0, src0_stride, src1,
626 src1_stride, mask, mask_stride, w, h, ssx, ssy,
627 &inter_pred_params->conv_params);
628 #endif
629 }
630
av1_make_masked_inter_predictor(const uint8_t * pre,int pre_stride,uint8_t * dst,int dst_stride,InterPredParams * inter_pred_params,const SubpelParams * subpel_params)631 void av1_make_masked_inter_predictor(const uint8_t *pre, int pre_stride,
632 uint8_t *dst, int dst_stride,
633 InterPredParams *inter_pred_params,
634 const SubpelParams *subpel_params) {
635 const INTERINTER_COMPOUND_DATA *comp_data = &inter_pred_params->mask_comp;
636 BLOCK_SIZE sb_type = inter_pred_params->sb_type;
637
638 // We're going to call av1_make_inter_predictor to generate a prediction into
639 // a temporary buffer, then will blend that temporary buffer with that from
640 // the other reference.
641 DECLARE_ALIGNED(32, uint8_t, tmp_buf[2 * MAX_SB_SQUARE]);
642 uint8_t *tmp_dst =
643 inter_pred_params->use_hbd_buf ? CONVERT_TO_BYTEPTR(tmp_buf) : tmp_buf;
644
645 const int tmp_buf_stride = MAX_SB_SIZE;
646 CONV_BUF_TYPE *org_dst = inter_pred_params->conv_params.dst;
647 int org_dst_stride = inter_pred_params->conv_params.dst_stride;
648 CONV_BUF_TYPE *tmp_buf16 = (CONV_BUF_TYPE *)tmp_buf;
649 inter_pred_params->conv_params.dst = tmp_buf16;
650 inter_pred_params->conv_params.dst_stride = tmp_buf_stride;
651 assert(inter_pred_params->conv_params.do_average == 0);
652
653 // This will generate a prediction in tmp_buf for the second reference
654 av1_make_inter_predictor(pre, pre_stride, tmp_dst, MAX_SB_SIZE,
655 inter_pred_params, subpel_params);
656
657 if (!inter_pred_params->conv_params.plane &&
658 comp_data->type == COMPOUND_DIFFWTD) {
659 av1_build_compound_diffwtd_mask_d16(
660 comp_data->seg_mask, comp_data->mask_type, org_dst, org_dst_stride,
661 tmp_buf16, tmp_buf_stride, inter_pred_params->block_height,
662 inter_pred_params->block_width, &inter_pred_params->conv_params,
663 inter_pred_params->bit_depth);
664 }
665 build_masked_compound_no_round(
666 dst, dst_stride, org_dst, org_dst_stride, tmp_buf16, tmp_buf_stride,
667 comp_data, sb_type, inter_pred_params->block_height,
668 inter_pred_params->block_width, inter_pred_params);
669 }
670
av1_dist_wtd_comp_weight_assign(const AV1_COMMON * cm,const MB_MODE_INFO * mbmi,int * fwd_offset,int * bck_offset,int * use_dist_wtd_comp_avg,int is_compound)671 void av1_dist_wtd_comp_weight_assign(const AV1_COMMON *cm,
672 const MB_MODE_INFO *mbmi, int *fwd_offset,
673 int *bck_offset,
674 int *use_dist_wtd_comp_avg,
675 int is_compound) {
676 assert(fwd_offset != NULL && bck_offset != NULL);
677 if (!is_compound || mbmi->compound_idx) {
678 *fwd_offset = 8;
679 *bck_offset = 8;
680 *use_dist_wtd_comp_avg = 0;
681 return;
682 }
683
684 *use_dist_wtd_comp_avg = 1;
685 const RefCntBuffer *const bck_buf = get_ref_frame_buf(cm, mbmi->ref_frame[0]);
686 const RefCntBuffer *const fwd_buf = get_ref_frame_buf(cm, mbmi->ref_frame[1]);
687 const int cur_frame_index = cm->cur_frame->order_hint;
688 int bck_frame_index = 0, fwd_frame_index = 0;
689
690 if (bck_buf != NULL) bck_frame_index = bck_buf->order_hint;
691 if (fwd_buf != NULL) fwd_frame_index = fwd_buf->order_hint;
692
693 int d0 = clamp(abs(get_relative_dist(&cm->seq_params->order_hint_info,
694 fwd_frame_index, cur_frame_index)),
695 0, MAX_FRAME_DISTANCE);
696 int d1 = clamp(abs(get_relative_dist(&cm->seq_params->order_hint_info,
697 cur_frame_index, bck_frame_index)),
698 0, MAX_FRAME_DISTANCE);
699
700 const int order = d0 <= d1;
701
702 if (d0 == 0 || d1 == 0) {
703 *fwd_offset = quant_dist_lookup_table[3][order];
704 *bck_offset = quant_dist_lookup_table[3][1 - order];
705 return;
706 }
707
708 int i;
709 for (i = 0; i < 3; ++i) {
710 int c0 = quant_dist_weight[i][order];
711 int c1 = quant_dist_weight[i][!order];
712 int d0_c0 = d0 * c0;
713 int d1_c1 = d1 * c1;
714 if ((d0 > d1 && d0_c0 < d1_c1) || (d0 <= d1 && d0_c0 > d1_c1)) break;
715 }
716
717 *fwd_offset = quant_dist_lookup_table[i][order];
718 *bck_offset = quant_dist_lookup_table[i][1 - order];
719 }
720
av1_setup_dst_planes(struct macroblockd_plane * planes,BLOCK_SIZE bsize,const YV12_BUFFER_CONFIG * src,int mi_row,int mi_col,const int plane_start,const int plane_end)721 void av1_setup_dst_planes(struct macroblockd_plane *planes, BLOCK_SIZE bsize,
722 const YV12_BUFFER_CONFIG *src, int mi_row, int mi_col,
723 const int plane_start, const int plane_end) {
724 // We use AOMMIN(num_planes, MAX_MB_PLANE) instead of num_planes to quiet
725 // the static analysis warnings.
726 for (int i = plane_start; i < AOMMIN(plane_end, MAX_MB_PLANE); ++i) {
727 struct macroblockd_plane *const pd = &planes[i];
728 const int is_uv = i > 0;
729 setup_pred_plane(&pd->dst, bsize, src->buffers[i], src->crop_widths[is_uv],
730 src->crop_heights[is_uv], src->strides[is_uv], mi_row,
731 mi_col, NULL, pd->subsampling_x, pd->subsampling_y);
732 }
733 }
734
av1_setup_pre_planes(MACROBLOCKD * xd,int idx,const YV12_BUFFER_CONFIG * src,int mi_row,int mi_col,const struct scale_factors * sf,const int num_planes)735 void av1_setup_pre_planes(MACROBLOCKD *xd, int idx,
736 const YV12_BUFFER_CONFIG *src, int mi_row, int mi_col,
737 const struct scale_factors *sf,
738 const int num_planes) {
739 if (src != NULL) {
740 // We use AOMMIN(num_planes, MAX_MB_PLANE) instead of num_planes to quiet
741 // the static analysis warnings.
742 for (int i = 0; i < AOMMIN(num_planes, MAX_MB_PLANE); ++i) {
743 struct macroblockd_plane *const pd = &xd->plane[i];
744 const int is_uv = i > 0;
745 setup_pred_plane(&pd->pre[idx], xd->mi[0]->bsize, src->buffers[i],
746 src->crop_widths[is_uv], src->crop_heights[is_uv],
747 src->strides[is_uv], mi_row, mi_col, sf,
748 pd->subsampling_x, pd->subsampling_y);
749 }
750 }
751 }
752
753 // obmc_mask_N[overlap_position]
754 static const uint8_t obmc_mask_1[1] = { 64 };
755 DECLARE_ALIGNED(2, static const uint8_t, obmc_mask_2[2]) = { 45, 64 };
756
757 DECLARE_ALIGNED(4, static const uint8_t, obmc_mask_4[4]) = { 39, 50, 59, 64 };
758
759 static const uint8_t obmc_mask_8[8] = { 36, 42, 48, 53, 57, 61, 64, 64 };
760
761 static const uint8_t obmc_mask_16[16] = { 34, 37, 40, 43, 46, 49, 52, 54,
762 56, 58, 60, 61, 64, 64, 64, 64 };
763
764 static const uint8_t obmc_mask_32[32] = { 33, 35, 36, 38, 40, 41, 43, 44,
765 45, 47, 48, 50, 51, 52, 53, 55,
766 56, 57, 58, 59, 60, 60, 61, 62,
767 64, 64, 64, 64, 64, 64, 64, 64 };
768
769 static const uint8_t obmc_mask_64[64] = {
770 33, 34, 35, 35, 36, 37, 38, 39, 40, 40, 41, 42, 43, 44, 44, 44,
771 45, 46, 47, 47, 48, 49, 50, 51, 51, 51, 52, 52, 53, 54, 55, 56,
772 56, 56, 57, 57, 58, 58, 59, 60, 60, 60, 60, 60, 61, 62, 62, 62,
773 62, 62, 63, 63, 63, 63, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
774 };
775
av1_get_obmc_mask(int length)776 const uint8_t *av1_get_obmc_mask(int length) {
777 switch (length) {
778 case 1: return obmc_mask_1;
779 case 2: return obmc_mask_2;
780 case 4: return obmc_mask_4;
781 case 8: return obmc_mask_8;
782 case 16: return obmc_mask_16;
783 case 32: return obmc_mask_32;
784 case 64: return obmc_mask_64;
785 default: assert(0); return NULL;
786 }
787 }
788
increment_int_ptr(MACROBLOCKD * xd,int rel_mi_row,int rel_mi_col,uint8_t op_mi_size,int dir,MB_MODE_INFO * mi,void * fun_ctxt,const int num_planes)789 static inline void increment_int_ptr(MACROBLOCKD *xd, int rel_mi_row,
790 int rel_mi_col, uint8_t op_mi_size,
791 int dir, MB_MODE_INFO *mi, void *fun_ctxt,
792 const int num_planes) {
793 (void)xd;
794 (void)rel_mi_row;
795 (void)rel_mi_col;
796 (void)op_mi_size;
797 (void)dir;
798 (void)mi;
799 ++*(uint8_t *)fun_ctxt;
800 (void)num_planes;
801 }
802
av1_count_overlappable_neighbors(const AV1_COMMON * cm,MACROBLOCKD * xd)803 void av1_count_overlappable_neighbors(const AV1_COMMON *cm, MACROBLOCKD *xd) {
804 MB_MODE_INFO *mbmi = xd->mi[0];
805
806 mbmi->overlappable_neighbors = 0;
807
808 if (!is_motion_variation_allowed_bsize(mbmi->bsize)) return;
809
810 foreach_overlappable_nb_above(cm, xd, INT_MAX, increment_int_ptr,
811 &mbmi->overlappable_neighbors);
812 if (mbmi->overlappable_neighbors) return;
813 foreach_overlappable_nb_left(cm, xd, INT_MAX, increment_int_ptr,
814 &mbmi->overlappable_neighbors);
815 }
816
817 // HW does not support < 4x4 prediction. To limit the bandwidth requirement, if
818 // block-size of current plane is smaller than 8x8, always only blend with the
819 // left neighbor(s) (skip blending with the above side).
820 #define DISABLE_CHROMA_U8X8_OBMC 0 // 0: one-sided obmc; 1: disable
821
av1_skip_u4x4_pred_in_obmc(BLOCK_SIZE bsize,const struct macroblockd_plane * pd,int dir)822 int av1_skip_u4x4_pred_in_obmc(BLOCK_SIZE bsize,
823 const struct macroblockd_plane *pd, int dir) {
824 assert(is_motion_variation_allowed_bsize(bsize));
825
826 const BLOCK_SIZE bsize_plane =
827 get_plane_block_size(bsize, pd->subsampling_x, pd->subsampling_y);
828 switch (bsize_plane) {
829 #if DISABLE_CHROMA_U8X8_OBMC
830 case BLOCK_4X4:
831 case BLOCK_8X4:
832 case BLOCK_4X8: return 1;
833 #else
834 case BLOCK_4X4:
835 case BLOCK_8X4:
836 case BLOCK_4X8: return dir == 0;
837 #endif
838 default: return 0;
839 }
840 }
841
842 #if CONFIG_AV1_DECODER
modify_neighbor_predictor_for_obmc(MB_MODE_INFO * mbmi)843 static void modify_neighbor_predictor_for_obmc(MB_MODE_INFO *mbmi) {
844 mbmi->ref_frame[1] = NONE_FRAME;
845 mbmi->interinter_comp.type = COMPOUND_AVERAGE;
846 }
847 #endif // CONFIG_AV1_DECODER
848
849 struct obmc_inter_pred_ctxt {
850 uint8_t **adjacent;
851 int *adjacent_stride;
852 };
853
build_obmc_inter_pred_above(MACROBLOCKD * xd,int rel_mi_row,int rel_mi_col,uint8_t op_mi_size,int dir,MB_MODE_INFO * above_mi,void * fun_ctxt,const int num_planes)854 static inline void build_obmc_inter_pred_above(
855 MACROBLOCKD *xd, int rel_mi_row, int rel_mi_col, uint8_t op_mi_size,
856 int dir, MB_MODE_INFO *above_mi, void *fun_ctxt, const int num_planes) {
857 (void)above_mi;
858 (void)rel_mi_row;
859 (void)dir;
860 struct obmc_inter_pred_ctxt *ctxt = (struct obmc_inter_pred_ctxt *)fun_ctxt;
861 const BLOCK_SIZE bsize = xd->mi[0]->bsize;
862 const int overlap =
863 AOMMIN(block_size_high[bsize], block_size_high[BLOCK_64X64]) >> 1;
864
865 for (int plane = 0; plane < num_planes; ++plane) {
866 const struct macroblockd_plane *pd = &xd->plane[plane];
867 const int bw = (op_mi_size * MI_SIZE) >> pd->subsampling_x;
868 const int bh = overlap >> pd->subsampling_y;
869 const int plane_col = (rel_mi_col * MI_SIZE) >> pd->subsampling_x;
870
871 if (av1_skip_u4x4_pred_in_obmc(bsize, pd, 0)) continue;
872
873 const int dst_stride = pd->dst.stride;
874 uint8_t *const dst = &pd->dst.buf[plane_col];
875 const int tmp_stride = ctxt->adjacent_stride[plane];
876 const uint8_t *const tmp = &ctxt->adjacent[plane][plane_col];
877 const uint8_t *const mask = av1_get_obmc_mask(bh);
878 #if CONFIG_AV1_HIGHBITDEPTH
879 const int is_hbd = is_cur_buf_hbd(xd);
880 if (is_hbd)
881 aom_highbd_blend_a64_vmask(dst, dst_stride, dst, dst_stride, tmp,
882 tmp_stride, mask, bw, bh, xd->bd);
883 else
884 aom_blend_a64_vmask(dst, dst_stride, dst, dst_stride, tmp, tmp_stride,
885 mask, bw, bh);
886 #else
887 aom_blend_a64_vmask(dst, dst_stride, dst, dst_stride, tmp, tmp_stride, mask,
888 bw, bh);
889 #endif
890 }
891 }
892
build_obmc_inter_pred_left(MACROBLOCKD * xd,int rel_mi_row,int rel_mi_col,uint8_t op_mi_size,int dir,MB_MODE_INFO * left_mi,void * fun_ctxt,const int num_planes)893 static inline void build_obmc_inter_pred_left(
894 MACROBLOCKD *xd, int rel_mi_row, int rel_mi_col, uint8_t op_mi_size,
895 int dir, MB_MODE_INFO *left_mi, void *fun_ctxt, const int num_planes) {
896 (void)left_mi;
897 (void)rel_mi_col;
898 (void)dir;
899 struct obmc_inter_pred_ctxt *ctxt = (struct obmc_inter_pred_ctxt *)fun_ctxt;
900 const BLOCK_SIZE bsize = xd->mi[0]->bsize;
901 const int overlap =
902 AOMMIN(block_size_wide[bsize], block_size_wide[BLOCK_64X64]) >> 1;
903
904 for (int plane = 0; plane < num_planes; ++plane) {
905 const struct macroblockd_plane *pd = &xd->plane[plane];
906 const int bw = overlap >> pd->subsampling_x;
907 const int bh = (op_mi_size * MI_SIZE) >> pd->subsampling_y;
908 const int plane_row = (rel_mi_row * MI_SIZE) >> pd->subsampling_y;
909
910 if (av1_skip_u4x4_pred_in_obmc(bsize, pd, 1)) continue;
911
912 const int dst_stride = pd->dst.stride;
913 uint8_t *const dst = &pd->dst.buf[plane_row * dst_stride];
914 const int tmp_stride = ctxt->adjacent_stride[plane];
915 const uint8_t *const tmp = &ctxt->adjacent[plane][plane_row * tmp_stride];
916 const uint8_t *const mask = av1_get_obmc_mask(bw);
917
918 #if CONFIG_AV1_HIGHBITDEPTH
919 const int is_hbd = is_cur_buf_hbd(xd);
920 if (is_hbd)
921 aom_highbd_blend_a64_hmask(dst, dst_stride, dst, dst_stride, tmp,
922 tmp_stride, mask, bw, bh, xd->bd);
923 else
924 aom_blend_a64_hmask(dst, dst_stride, dst, dst_stride, tmp, tmp_stride,
925 mask, bw, bh);
926 #else
927 aom_blend_a64_hmask(dst, dst_stride, dst, dst_stride, tmp, tmp_stride, mask,
928 bw, bh);
929 #endif
930 }
931 }
932
933 // This function combines motion compensated predictions that are generated by
934 // top/left neighboring blocks' inter predictors with the regular inter
935 // prediction. We assume the original prediction (bmc) is stored in
936 // xd->plane[].dst.buf
av1_build_obmc_inter_prediction(const AV1_COMMON * cm,MACROBLOCKD * xd,uint8_t * above[MAX_MB_PLANE],int above_stride[MAX_MB_PLANE],uint8_t * left[MAX_MB_PLANE],int left_stride[MAX_MB_PLANE])937 void av1_build_obmc_inter_prediction(const AV1_COMMON *cm, MACROBLOCKD *xd,
938 uint8_t *above[MAX_MB_PLANE],
939 int above_stride[MAX_MB_PLANE],
940 uint8_t *left[MAX_MB_PLANE],
941 int left_stride[MAX_MB_PLANE]) {
942 const BLOCK_SIZE bsize = xd->mi[0]->bsize;
943
944 // handle above row
945 struct obmc_inter_pred_ctxt ctxt_above = { above, above_stride };
946 foreach_overlappable_nb_above(cm, xd,
947 max_neighbor_obmc[mi_size_wide_log2[bsize]],
948 build_obmc_inter_pred_above, &ctxt_above);
949
950 // handle left column
951 struct obmc_inter_pred_ctxt ctxt_left = { left, left_stride };
952 foreach_overlappable_nb_left(cm, xd,
953 max_neighbor_obmc[mi_size_high_log2[bsize]],
954 build_obmc_inter_pred_left, &ctxt_left);
955 }
956
av1_setup_obmc_dst_bufs(MACROBLOCKD * xd,uint8_t ** dst_buf1,uint8_t ** dst_buf2)957 void av1_setup_obmc_dst_bufs(MACROBLOCKD *xd, uint8_t **dst_buf1,
958 uint8_t **dst_buf2) {
959 if (is_cur_buf_hbd(xd)) {
960 int len = sizeof(uint16_t);
961 dst_buf1[0] = CONVERT_TO_BYTEPTR(xd->tmp_obmc_bufs[0]);
962 dst_buf1[1] =
963 CONVERT_TO_BYTEPTR(xd->tmp_obmc_bufs[0] + MAX_SB_SQUARE * len);
964 dst_buf1[2] =
965 CONVERT_TO_BYTEPTR(xd->tmp_obmc_bufs[0] + MAX_SB_SQUARE * 2 * len);
966 dst_buf2[0] = CONVERT_TO_BYTEPTR(xd->tmp_obmc_bufs[1]);
967 dst_buf2[1] =
968 CONVERT_TO_BYTEPTR(xd->tmp_obmc_bufs[1] + MAX_SB_SQUARE * len);
969 dst_buf2[2] =
970 CONVERT_TO_BYTEPTR(xd->tmp_obmc_bufs[1] + MAX_SB_SQUARE * 2 * len);
971 } else {
972 dst_buf1[0] = xd->tmp_obmc_bufs[0];
973 dst_buf1[1] = xd->tmp_obmc_bufs[0] + MAX_SB_SQUARE;
974 dst_buf1[2] = xd->tmp_obmc_bufs[0] + MAX_SB_SQUARE * 2;
975 dst_buf2[0] = xd->tmp_obmc_bufs[1];
976 dst_buf2[1] = xd->tmp_obmc_bufs[1] + MAX_SB_SQUARE;
977 dst_buf2[2] = xd->tmp_obmc_bufs[1] + MAX_SB_SQUARE * 2;
978 }
979 }
980
981 #if CONFIG_AV1_DECODER
av1_setup_build_prediction_by_above_pred(MACROBLOCKD * xd,int rel_mi_col,uint8_t above_mi_width,MB_MODE_INFO * above_mbmi,struct build_prediction_ctxt * ctxt,const int num_planes)982 void av1_setup_build_prediction_by_above_pred(
983 MACROBLOCKD *xd, int rel_mi_col, uint8_t above_mi_width,
984 MB_MODE_INFO *above_mbmi, struct build_prediction_ctxt *ctxt,
985 const int num_planes) {
986 const BLOCK_SIZE a_bsize = AOMMAX(BLOCK_8X8, above_mbmi->bsize);
987 const int above_mi_col = xd->mi_col + rel_mi_col;
988
989 modify_neighbor_predictor_for_obmc(above_mbmi);
990
991 for (int j = 0; j < num_planes; ++j) {
992 struct macroblockd_plane *const pd = &xd->plane[j];
993 setup_pred_plane(&pd->dst, a_bsize, ctxt->tmp_buf[j], ctxt->tmp_width[j],
994 ctxt->tmp_height[j], ctxt->tmp_stride[j], 0, rel_mi_col,
995 NULL, pd->subsampling_x, pd->subsampling_y);
996 }
997
998 const int num_refs = 1 + has_second_ref(above_mbmi);
999
1000 for (int ref = 0; ref < num_refs; ++ref) {
1001 const MV_REFERENCE_FRAME frame = above_mbmi->ref_frame[ref];
1002
1003 const RefCntBuffer *const ref_buf = get_ref_frame_buf(ctxt->cm, frame);
1004 const struct scale_factors *const sf =
1005 get_ref_scale_factors_const(ctxt->cm, frame);
1006 xd->block_ref_scale_factors[ref] = sf;
1007 if ((!av1_is_valid_scale(sf)))
1008 aom_internal_error(xd->error_info, AOM_CODEC_UNSUP_BITSTREAM,
1009 "Reference frame has invalid dimensions");
1010 av1_setup_pre_planes(xd, ref, &ref_buf->buf, xd->mi_row, above_mi_col, sf,
1011 num_planes);
1012 }
1013
1014 xd->mb_to_left_edge = 8 * MI_SIZE * (-above_mi_col);
1015 xd->mb_to_right_edge =
1016 ctxt->mb_to_far_edge +
1017 (xd->width - rel_mi_col - above_mi_width) * MI_SIZE * 8;
1018 }
1019
av1_setup_build_prediction_by_left_pred(MACROBLOCKD * xd,int rel_mi_row,uint8_t left_mi_height,MB_MODE_INFO * left_mbmi,struct build_prediction_ctxt * ctxt,const int num_planes)1020 void av1_setup_build_prediction_by_left_pred(MACROBLOCKD *xd, int rel_mi_row,
1021 uint8_t left_mi_height,
1022 MB_MODE_INFO *left_mbmi,
1023 struct build_prediction_ctxt *ctxt,
1024 const int num_planes) {
1025 const BLOCK_SIZE l_bsize = AOMMAX(BLOCK_8X8, left_mbmi->bsize);
1026 const int left_mi_row = xd->mi_row + rel_mi_row;
1027
1028 modify_neighbor_predictor_for_obmc(left_mbmi);
1029
1030 for (int j = 0; j < num_planes; ++j) {
1031 struct macroblockd_plane *const pd = &xd->plane[j];
1032 setup_pred_plane(&pd->dst, l_bsize, ctxt->tmp_buf[j], ctxt->tmp_width[j],
1033 ctxt->tmp_height[j], ctxt->tmp_stride[j], rel_mi_row, 0,
1034 NULL, pd->subsampling_x, pd->subsampling_y);
1035 }
1036
1037 const int num_refs = 1 + has_second_ref(left_mbmi);
1038
1039 for (int ref = 0; ref < num_refs; ++ref) {
1040 const MV_REFERENCE_FRAME frame = left_mbmi->ref_frame[ref];
1041
1042 const RefCntBuffer *const ref_buf = get_ref_frame_buf(ctxt->cm, frame);
1043 const struct scale_factors *const ref_scale_factors =
1044 get_ref_scale_factors_const(ctxt->cm, frame);
1045
1046 xd->block_ref_scale_factors[ref] = ref_scale_factors;
1047 if ((!av1_is_valid_scale(ref_scale_factors)))
1048 aom_internal_error(xd->error_info, AOM_CODEC_UNSUP_BITSTREAM,
1049 "Reference frame has invalid dimensions");
1050 av1_setup_pre_planes(xd, ref, &ref_buf->buf, left_mi_row, xd->mi_col,
1051 ref_scale_factors, num_planes);
1052 }
1053
1054 xd->mb_to_top_edge = GET_MV_SUBPEL(MI_SIZE * (-left_mi_row));
1055 xd->mb_to_bottom_edge =
1056 ctxt->mb_to_far_edge +
1057 GET_MV_SUBPEL((xd->height - rel_mi_row - left_mi_height) * MI_SIZE);
1058 }
1059 #endif // CONFIG_AV1_DECODER
1060
combine_interintra(INTERINTRA_MODE mode,int8_t use_wedge_interintra,int8_t wedge_index,int8_t wedge_sign,BLOCK_SIZE bsize,BLOCK_SIZE plane_bsize,uint8_t * comppred,int compstride,const uint8_t * interpred,int interstride,const uint8_t * intrapred,int intrastride)1061 static inline void combine_interintra(
1062 INTERINTRA_MODE mode, int8_t use_wedge_interintra, int8_t wedge_index,
1063 int8_t wedge_sign, BLOCK_SIZE bsize, BLOCK_SIZE plane_bsize,
1064 uint8_t *comppred, int compstride, const uint8_t *interpred,
1065 int interstride, const uint8_t *intrapred, int intrastride) {
1066 const int bw = block_size_wide[plane_bsize];
1067 const int bh = block_size_high[plane_bsize];
1068
1069 if (use_wedge_interintra) {
1070 if (av1_is_wedge_used(bsize)) {
1071 const uint8_t *mask =
1072 av1_get_contiguous_soft_mask(wedge_index, wedge_sign, bsize);
1073 const int subw = 2 * mi_size_wide[bsize] == bw;
1074 const int subh = 2 * mi_size_high[bsize] == bh;
1075 aom_blend_a64_mask(comppred, compstride, intrapred, intrastride,
1076 interpred, interstride, mask, block_size_wide[bsize],
1077 bw, bh, subw, subh);
1078 }
1079 return;
1080 }
1081
1082 const uint8_t *mask = smooth_interintra_mask_buf[mode][plane_bsize];
1083 aom_blend_a64_mask(comppred, compstride, intrapred, intrastride, interpred,
1084 interstride, mask, bw, bw, bh, 0, 0);
1085 }
1086
1087 #if CONFIG_AV1_HIGHBITDEPTH
combine_interintra_highbd(INTERINTRA_MODE mode,int8_t use_wedge_interintra,int8_t wedge_index,int8_t wedge_sign,BLOCK_SIZE bsize,BLOCK_SIZE plane_bsize,uint8_t * comppred8,int compstride,const uint8_t * interpred8,int interstride,const uint8_t * intrapred8,int intrastride,int bd)1088 static inline void combine_interintra_highbd(
1089 INTERINTRA_MODE mode, int8_t use_wedge_interintra, int8_t wedge_index,
1090 int8_t wedge_sign, BLOCK_SIZE bsize, BLOCK_SIZE plane_bsize,
1091 uint8_t *comppred8, int compstride, const uint8_t *interpred8,
1092 int interstride, const uint8_t *intrapred8, int intrastride, int bd) {
1093 const int bw = block_size_wide[plane_bsize];
1094 const int bh = block_size_high[plane_bsize];
1095
1096 if (use_wedge_interintra) {
1097 if (av1_is_wedge_used(bsize)) {
1098 const uint8_t *mask =
1099 av1_get_contiguous_soft_mask(wedge_index, wedge_sign, bsize);
1100 const int subh = 2 * mi_size_high[bsize] == bh;
1101 const int subw = 2 * mi_size_wide[bsize] == bw;
1102 aom_highbd_blend_a64_mask(comppred8, compstride, intrapred8, intrastride,
1103 interpred8, interstride, mask,
1104 block_size_wide[bsize], bw, bh, subw, subh, bd);
1105 }
1106 return;
1107 }
1108
1109 uint8_t mask[MAX_SB_SQUARE];
1110 build_smooth_interintra_mask(mask, bw, plane_bsize, mode);
1111 aom_highbd_blend_a64_mask(comppred8, compstride, intrapred8, intrastride,
1112 interpred8, interstride, mask, bw, bw, bh, 0, 0,
1113 bd);
1114 }
1115 #endif
1116
av1_build_intra_predictors_for_interintra(const AV1_COMMON * cm,MACROBLOCKD * xd,BLOCK_SIZE bsize,int plane,const BUFFER_SET * ctx,uint8_t * dst,int dst_stride)1117 void av1_build_intra_predictors_for_interintra(const AV1_COMMON *cm,
1118 MACROBLOCKD *xd,
1119 BLOCK_SIZE bsize, int plane,
1120 const BUFFER_SET *ctx,
1121 uint8_t *dst, int dst_stride) {
1122 struct macroblockd_plane *const pd = &xd->plane[plane];
1123 const int ssx = xd->plane[plane].subsampling_x;
1124 const int ssy = xd->plane[plane].subsampling_y;
1125 BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, ssx, ssy);
1126 PREDICTION_MODE mode = interintra_to_intra_mode[xd->mi[0]->interintra_mode];
1127 assert(xd->mi[0]->angle_delta[PLANE_TYPE_Y] == 0);
1128 assert(xd->mi[0]->angle_delta[PLANE_TYPE_UV] == 0);
1129 assert(xd->mi[0]->filter_intra_mode_info.use_filter_intra == 0);
1130 assert(xd->mi[0]->use_intrabc == 0);
1131 const SequenceHeader *seq_params = cm->seq_params;
1132
1133 av1_predict_intra_block(xd, seq_params->sb_size,
1134 seq_params->enable_intra_edge_filter, pd->width,
1135 pd->height, max_txsize_rect_lookup[plane_bsize], mode,
1136 0, 0, FILTER_INTRA_MODES, ctx->plane[plane],
1137 ctx->stride[plane], dst, dst_stride, 0, 0, plane);
1138 }
1139
av1_combine_interintra(MACROBLOCKD * xd,BLOCK_SIZE bsize,int plane,const uint8_t * inter_pred,int inter_stride,const uint8_t * intra_pred,int intra_stride)1140 void av1_combine_interintra(MACROBLOCKD *xd, BLOCK_SIZE bsize, int plane,
1141 const uint8_t *inter_pred, int inter_stride,
1142 const uint8_t *intra_pred, int intra_stride) {
1143 const int ssx = xd->plane[plane].subsampling_x;
1144 const int ssy = xd->plane[plane].subsampling_y;
1145 const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, ssx, ssy);
1146 #if CONFIG_AV1_HIGHBITDEPTH
1147 if (is_cur_buf_hbd(xd)) {
1148 combine_interintra_highbd(
1149 xd->mi[0]->interintra_mode, xd->mi[0]->use_wedge_interintra,
1150 xd->mi[0]->interintra_wedge_index, INTERINTRA_WEDGE_SIGN, bsize,
1151 plane_bsize, xd->plane[plane].dst.buf, xd->plane[plane].dst.stride,
1152 inter_pred, inter_stride, intra_pred, intra_stride, xd->bd);
1153 return;
1154 }
1155 #endif
1156 combine_interintra(
1157 xd->mi[0]->interintra_mode, xd->mi[0]->use_wedge_interintra,
1158 xd->mi[0]->interintra_wedge_index, INTERINTRA_WEDGE_SIGN, bsize,
1159 plane_bsize, xd->plane[plane].dst.buf, xd->plane[plane].dst.stride,
1160 inter_pred, inter_stride, intra_pred, intra_stride);
1161 }
1162
1163 // build interintra_predictors for one plane
av1_build_interintra_predictor(const AV1_COMMON * cm,MACROBLOCKD * xd,uint8_t * pred,int stride,const BUFFER_SET * ctx,int plane,BLOCK_SIZE bsize)1164 void av1_build_interintra_predictor(const AV1_COMMON *cm, MACROBLOCKD *xd,
1165 uint8_t *pred, int stride,
1166 const BUFFER_SET *ctx, int plane,
1167 BLOCK_SIZE bsize) {
1168 assert(bsize < BLOCK_SIZES_ALL);
1169 if (is_cur_buf_hbd(xd)) {
1170 DECLARE_ALIGNED(16, uint16_t, intrapredictor[MAX_SB_SQUARE]);
1171 av1_build_intra_predictors_for_interintra(
1172 cm, xd, bsize, plane, ctx, CONVERT_TO_BYTEPTR(intrapredictor),
1173 MAX_SB_SIZE);
1174 av1_combine_interintra(xd, bsize, plane, pred, stride,
1175 CONVERT_TO_BYTEPTR(intrapredictor), MAX_SB_SIZE);
1176 } else {
1177 DECLARE_ALIGNED(16, uint8_t, intrapredictor[MAX_SB_SQUARE]);
1178 av1_build_intra_predictors_for_interintra(cm, xd, bsize, plane, ctx,
1179 intrapredictor, MAX_SB_SIZE);
1180 av1_combine_interintra(xd, bsize, plane, pred, stride, intrapredictor,
1181 MAX_SB_SIZE);
1182 }
1183 }
1184