xref: /aosp_15_r20/external/libaom/av1/encoder/encodemv.c (revision 77c1e3ccc04c968bd2bc212e87364f250e820521)
1 /*
2  * Copyright (c) 2016, Alliance for Open Media. All rights reserved.
3  *
4  * This source code is subject to the terms of the BSD 2 Clause License and
5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6  * was not distributed with this source code in the LICENSE file, you can
7  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8  * Media Patent License 1.0 was not distributed with this source code in the
9  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10  */
11 
12 #include <math.h>
13 
14 #include "av1/common/common.h"
15 #include "av1/common/entropymode.h"
16 
17 #include "av1/encoder/cost.h"
18 #include "av1/encoder/encodemv.h"
19 
20 #include "aom_dsp/aom_dsp_common.h"
21 #include "aom_ports/bitops.h"
22 
update_mv_component_stats(int comp,nmv_component * mvcomp,MvSubpelPrecision precision)23 static void update_mv_component_stats(int comp, nmv_component *mvcomp,
24                                       MvSubpelPrecision precision) {
25   assert(comp != 0);
26   int offset;
27   const int sign = comp < 0;
28   const int mag = sign ? -comp : comp;
29   const int mv_class = av1_get_mv_class(mag - 1, &offset);
30   const int d = offset >> 3;         // int mv data
31   const int fr = (offset >> 1) & 3;  // fractional mv data
32   const int hp = offset & 1;         // high precision mv data
33 
34   // Sign
35   update_cdf(mvcomp->sign_cdf, sign, 2);
36 
37   // Class
38   update_cdf(mvcomp->classes_cdf, mv_class, MV_CLASSES);
39 
40   // Integer bits
41   if (mv_class == MV_CLASS_0) {
42     update_cdf(mvcomp->class0_cdf, d, CLASS0_SIZE);
43   } else {
44     const int n = mv_class + CLASS0_BITS - 1;  // number of bits
45     for (int i = 0; i < n; ++i)
46       update_cdf(mvcomp->bits_cdf[i], (d >> i) & 1, 2);
47   }
48   // Fractional bits
49   if (precision > MV_SUBPEL_NONE) {
50     aom_cdf_prob *fp_cdf =
51         mv_class == MV_CLASS_0 ? mvcomp->class0_fp_cdf[d] : mvcomp->fp_cdf;
52     update_cdf(fp_cdf, fr, MV_FP_SIZE);
53   }
54 
55   // High precision bit
56   if (precision > MV_SUBPEL_LOW_PRECISION) {
57     aom_cdf_prob *hp_cdf =
58         mv_class == MV_CLASS_0 ? mvcomp->class0_hp_cdf : mvcomp->hp_cdf;
59     update_cdf(hp_cdf, hp, 2);
60   }
61 }
62 
av1_update_mv_stats(const MV * mv,const MV * ref,nmv_context * mvctx,MvSubpelPrecision precision)63 void av1_update_mv_stats(const MV *mv, const MV *ref, nmv_context *mvctx,
64                          MvSubpelPrecision precision) {
65   const MV diff = { mv->row - ref->row, mv->col - ref->col };
66   const MV_JOINT_TYPE j = av1_get_mv_joint(&diff);
67 
68   update_cdf(mvctx->joints_cdf, j, MV_JOINTS);
69 
70   if (mv_joint_vertical(j))
71     update_mv_component_stats(diff.row, &mvctx->comps[0], precision);
72 
73   if (mv_joint_horizontal(j))
74     update_mv_component_stats(diff.col, &mvctx->comps[1], precision);
75 }
76 
encode_mv_component(aom_writer * w,int comp,nmv_component * mvcomp,MvSubpelPrecision precision)77 static void encode_mv_component(aom_writer *w, int comp, nmv_component *mvcomp,
78                                 MvSubpelPrecision precision) {
79   assert(comp != 0);
80   int offset;
81   const int sign = comp < 0;
82   const int mag = sign ? -comp : comp;
83   const int mv_class = av1_get_mv_class(mag - 1, &offset);
84   const int d = offset >> 3;         // int mv data
85   const int fr = (offset >> 1) & 3;  // fractional mv data
86   const int hp = offset & 1;         // high precision mv data
87 
88   // Sign
89   aom_write_symbol(w, sign, mvcomp->sign_cdf, 2);
90 
91   // Class
92   aom_write_symbol(w, mv_class, mvcomp->classes_cdf, MV_CLASSES);
93 
94   // Integer bits
95   if (mv_class == MV_CLASS_0) {
96     aom_write_symbol(w, d, mvcomp->class0_cdf, CLASS0_SIZE);
97   } else {
98     int i;
99     const int n = mv_class + CLASS0_BITS - 1;  // number of bits
100     for (i = 0; i < n; ++i)
101       aom_write_symbol(w, (d >> i) & 1, mvcomp->bits_cdf[i], 2);
102   }
103   // Fractional bits
104   if (precision > MV_SUBPEL_NONE) {
105     aom_write_symbol(
106         w, fr,
107         mv_class == MV_CLASS_0 ? mvcomp->class0_fp_cdf[d] : mvcomp->fp_cdf,
108         MV_FP_SIZE);
109   }
110 
111   // High precision bit
112   if (precision > MV_SUBPEL_LOW_PRECISION)
113     aom_write_symbol(
114         w, hp, mv_class == MV_CLASS_0 ? mvcomp->class0_hp_cdf : mvcomp->hp_cdf,
115         2);
116 }
117 
118 /* TODO([email protected]): This function writes MV_VALS ints or 128 KiB. This
119  *   is more than most L1D caches and is a significant chunk of L2. Write
120  *   SIMD that uses streaming writes to avoid loading all of that into L1, or
121  *   just don't update the larger component costs every time this called
122  *   (or both).
123  */
av1_build_nmv_component_cost_table(int * mvcost,const nmv_component * const mvcomp,MvSubpelPrecision precision)124 void av1_build_nmv_component_cost_table(int *mvcost,
125                                         const nmv_component *const mvcomp,
126                                         MvSubpelPrecision precision) {
127   int i, j, v, o, mantissa;
128   int sign_cost[2], class_cost[MV_CLASSES], class0_cost[CLASS0_SIZE];
129   int bits_cost[MV_OFFSET_BITS][2];
130   int class0_fp_cost[CLASS0_SIZE][MV_FP_SIZE] = { 0 },
131       fp_cost[MV_FP_SIZE] = { 0 };
132   int class0_hp_cost[2] = { 0 }, hp_cost[2] = { 0 };
133 
134   av1_cost_tokens_from_cdf(sign_cost, mvcomp->sign_cdf, NULL);
135   av1_cost_tokens_from_cdf(class_cost, mvcomp->classes_cdf, NULL);
136   av1_cost_tokens_from_cdf(class0_cost, mvcomp->class0_cdf, NULL);
137   for (i = 0; i < MV_OFFSET_BITS; ++i) {
138     av1_cost_tokens_from_cdf(bits_cost[i], mvcomp->bits_cdf[i], NULL);
139   }
140 
141   if (precision > MV_SUBPEL_NONE) {
142     for (i = 0; i < CLASS0_SIZE; ++i)
143       av1_cost_tokens_from_cdf(class0_fp_cost[i], mvcomp->class0_fp_cdf[i],
144                                NULL);
145     av1_cost_tokens_from_cdf(fp_cost, mvcomp->fp_cdf, NULL);
146   }
147 
148   if (precision > MV_SUBPEL_LOW_PRECISION) {
149     av1_cost_tokens_from_cdf(class0_hp_cost, mvcomp->class0_hp_cdf, NULL);
150     av1_cost_tokens_from_cdf(hp_cost, mvcomp->hp_cdf, NULL);
151   }
152 
153   // Instead of accumulating the cost of each vector component's bits
154   //   individually, compute the costs based on smaller vectors. Costs for
155   //   [2^exp, 2 * 2^exp - 1] are calculated based on [0, 2^exp - 1]
156   //   respectively. Offsets are maintained to swap both 1) class costs when
157   //   treated as a complete vector component with the highest set bit when
158   //   treated as a mantissa (significand) and 2) leading zeros to account for
159   //   the current exponent.
160 
161   // Cost offsets
162   int cost_swap[MV_OFFSET_BITS] = { 0 };
163   // Delta to convert positive vector to negative vector costs
164   int negate_sign = sign_cost[1] - sign_cost[0];
165 
166   // Initialize with offsets to swap the class costs with the costs of the
167   //   highest set bit.
168   for (i = 1; i < MV_OFFSET_BITS; ++i) {
169     cost_swap[i] = bits_cost[i - 1][1];
170     if (i > CLASS0_BITS) cost_swap[i] -= class_cost[i - CLASS0_BITS];
171   }
172 
173   // Seed the fractional costs onto the output (overwritten latter).
174   for (o = 0; o < MV_FP_SIZE; ++o) {
175     int hp;
176     for (hp = 0; hp < 2; ++hp) {
177       v = 2 * o + hp + 1;
178       mvcost[v] = fp_cost[o] + hp_cost[hp] + sign_cost[0];
179     }
180   }
181 
182   mvcost[0] = 0;
183   // Fill the costs for each exponent's vectors, using the costs set in the
184   //   previous exponents.
185   for (i = 0; i < MV_OFFSET_BITS; ++i) {
186     const int exponent = (2 * MV_FP_SIZE) << i;
187 
188     int class = 0;
189     if (i >= CLASS0_BITS) {
190       class = class_cost[i - CLASS0_BITS + 1];
191     }
192 
193     // Iterate through mantissas, keeping track of the location
194     //   of the highest set bit for the mantissa.
195     // To be clear: in the outer loop, the position of the highest set bit
196     //   (exponent) is tracked and, in this loop, the highest set bit of the
197     //   mantissa is tracked.
198     mantissa = 0;
199     for (j = 0; j <= i; ++j) {
200       for (; mantissa < (2 * MV_FP_SIZE) << j; ++mantissa) {
201         int cost = mvcost[mantissa + 1] + class + cost_swap[j];
202         v = exponent + mantissa + 1;
203         mvcost[v] = cost;
204         mvcost[-v] = cost + negate_sign;
205       }
206       cost_swap[j] += bits_cost[i][0];
207     }
208   }
209 
210   // Special case to avoid buffer overrun
211   {
212     int exponent = (2 * MV_FP_SIZE) << MV_OFFSET_BITS;
213     int class = class_cost[MV_CLASSES - 1];
214     mantissa = 0;
215     for (j = 0; j < MV_OFFSET_BITS; ++j) {
216       for (; mantissa < (2 * MV_FP_SIZE) << j; ++mantissa) {
217         int cost = mvcost[mantissa + 1] + class + cost_swap[j];
218         v = exponent + mantissa + 1;
219         mvcost[v] = cost;
220         mvcost[-v] = cost + negate_sign;
221       }
222     }
223     // At this point: mantissa = exponent >> 1
224 
225     // Manually calculate the final cost offset
226     int cost_swap_hi =
227         bits_cost[MV_OFFSET_BITS - 1][1] - class_cost[MV_CLASSES - 2];
228     for (; mantissa < exponent - 1; ++mantissa) {
229       int cost = mvcost[mantissa + 1] + class + cost_swap_hi;
230       v = exponent + mantissa + 1;
231       mvcost[v] = cost;
232       mvcost[-v] = cost + negate_sign;
233     }
234   }
235 
236   // Fill costs for class0 vectors, overwriting previous placeholder values
237   //   used for calculating the costs of the larger vectors.
238   for (i = 0; i < CLASS0_SIZE; ++i) {
239     const int top = i * 2 * MV_FP_SIZE;
240     for (o = 0; o < MV_FP_SIZE; ++o) {
241       int hp;
242       int cost = class0_fp_cost[i][o] + class_cost[0] + class0_cost[i];
243       for (hp = 0; hp < 2; ++hp) {
244         v = top + 2 * o + hp + 1;
245         mvcost[v] = cost + class0_hp_cost[hp] + sign_cost[0];
246         mvcost[-v] = cost + class0_hp_cost[hp] + sign_cost[1];
247       }
248     }
249   }
250 }
251 
av1_encode_mv(AV1_COMP * cpi,aom_writer * w,ThreadData * td,const MV * mv,const MV * ref,nmv_context * mvctx,int usehp)252 void av1_encode_mv(AV1_COMP *cpi, aom_writer *w, ThreadData *td, const MV *mv,
253                    const MV *ref, nmv_context *mvctx, int usehp) {
254   const MV diff = { mv->row - ref->row, mv->col - ref->col };
255   const MV_JOINT_TYPE j = av1_get_mv_joint(&diff);
256   // If the mv_diff is zero, then we should have used near or nearest instead.
257   assert(j != MV_JOINT_ZERO);
258   if (cpi->common.features.cur_frame_force_integer_mv) {
259     usehp = MV_SUBPEL_NONE;
260   }
261   aom_write_symbol(w, j, mvctx->joints_cdf, MV_JOINTS);
262   if (mv_joint_vertical(j))
263     encode_mv_component(w, diff.row, &mvctx->comps[0], usehp);
264 
265   if (mv_joint_horizontal(j))
266     encode_mv_component(w, diff.col, &mvctx->comps[1], usehp);
267 
268   // If auto_mv_step_size is enabled then keep track of the largest
269   // motion vector component used.
270   if (cpi->sf.mv_sf.auto_mv_step_size) {
271     int maxv = AOMMAX(abs(mv->row), abs(mv->col)) >> 3;
272     td->max_mv_magnitude = AOMMAX(maxv, td->max_mv_magnitude);
273   }
274 }
275 
av1_encode_dv(aom_writer * w,const MV * mv,const MV * ref,nmv_context * mvctx)276 void av1_encode_dv(aom_writer *w, const MV *mv, const MV *ref,
277                    nmv_context *mvctx) {
278   // DV and ref DV should not have sub-pel.
279   assert((mv->col & 7) == 0);
280   assert((mv->row & 7) == 0);
281   assert((ref->col & 7) == 0);
282   assert((ref->row & 7) == 0);
283   const MV diff = { mv->row - ref->row, mv->col - ref->col };
284   const MV_JOINT_TYPE j = av1_get_mv_joint(&diff);
285 
286   aom_write_symbol(w, j, mvctx->joints_cdf, MV_JOINTS);
287   if (mv_joint_vertical(j))
288     encode_mv_component(w, diff.row, &mvctx->comps[0], MV_SUBPEL_NONE);
289 
290   if (mv_joint_horizontal(j))
291     encode_mv_component(w, diff.col, &mvctx->comps[1], MV_SUBPEL_NONE);
292 }
293 
av1_build_nmv_cost_table(int * mvjoint,int * mvcost[2],const nmv_context * ctx,MvSubpelPrecision precision)294 void av1_build_nmv_cost_table(int *mvjoint, int *mvcost[2],
295                               const nmv_context *ctx,
296                               MvSubpelPrecision precision) {
297   av1_cost_tokens_from_cdf(mvjoint, ctx->joints_cdf, NULL);
298   av1_build_nmv_component_cost_table(mvcost[0], &ctx->comps[0], precision);
299   av1_build_nmv_component_cost_table(mvcost[1], &ctx->comps[1], precision);
300 }
301 
av1_get_ref_mv_from_stack(int ref_idx,const MV_REFERENCE_FRAME * ref_frame,int ref_mv_idx,const MB_MODE_INFO_EXT * mbmi_ext)302 int_mv av1_get_ref_mv_from_stack(int ref_idx,
303                                  const MV_REFERENCE_FRAME *ref_frame,
304                                  int ref_mv_idx,
305                                  const MB_MODE_INFO_EXT *mbmi_ext) {
306   const int8_t ref_frame_type = av1_ref_frame_type(ref_frame);
307   const CANDIDATE_MV *curr_ref_mv_stack =
308       mbmi_ext->ref_mv_stack[ref_frame_type];
309 
310   if (ref_frame[1] > INTRA_FRAME) {
311     assert(ref_idx == 0 || ref_idx == 1);
312     return ref_idx ? curr_ref_mv_stack[ref_mv_idx].comp_mv
313                    : curr_ref_mv_stack[ref_mv_idx].this_mv;
314   }
315 
316   assert(ref_idx == 0);
317   return ref_mv_idx < mbmi_ext->ref_mv_count[ref_frame_type]
318              ? curr_ref_mv_stack[ref_mv_idx].this_mv
319              : mbmi_ext->global_mvs[ref_frame_type];
320 }
321 
av1_get_ref_mv(const MACROBLOCK * x,int ref_idx)322 int_mv av1_get_ref_mv(const MACROBLOCK *x, int ref_idx) {
323   const MACROBLOCKD *xd = &x->e_mbd;
324   const MB_MODE_INFO *mbmi = xd->mi[0];
325   int ref_mv_idx = mbmi->ref_mv_idx;
326   if (mbmi->mode == NEAR_NEWMV || mbmi->mode == NEW_NEARMV) {
327     assert(has_second_ref(mbmi));
328     ref_mv_idx += 1;
329   }
330   return av1_get_ref_mv_from_stack(ref_idx, mbmi->ref_frame, ref_mv_idx,
331                                    &x->mbmi_ext);
332 }
333 
av1_find_best_ref_mvs_from_stack(int allow_hp,const MB_MODE_INFO_EXT * mbmi_ext,MV_REFERENCE_FRAME ref_frame,int_mv * nearest_mv,int_mv * near_mv,int is_integer)334 void av1_find_best_ref_mvs_from_stack(int allow_hp,
335                                       const MB_MODE_INFO_EXT *mbmi_ext,
336                                       MV_REFERENCE_FRAME ref_frame,
337                                       int_mv *nearest_mv, int_mv *near_mv,
338                                       int is_integer) {
339   const int ref_idx = 0;
340   MV_REFERENCE_FRAME ref_frames[2] = { ref_frame, NONE_FRAME };
341   *nearest_mv = av1_get_ref_mv_from_stack(ref_idx, ref_frames, 0, mbmi_ext);
342   lower_mv_precision(&nearest_mv->as_mv, allow_hp, is_integer);
343   *near_mv = av1_get_ref_mv_from_stack(ref_idx, ref_frames, 1, mbmi_ext);
344   lower_mv_precision(&near_mv->as_mv, allow_hp, is_integer);
345 }
346