xref: /aosp_15_r20/external/libaom/aom_dsp/flow_estimation/ransac.c (revision 77c1e3ccc04c968bd2bc212e87364f250e820521)
1 /*
2  * Copyright (c) 2016, Alliance for Open Media. All rights reserved.
3  *
4  * This source code is subject to the terms of the BSD 2 Clause License and
5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6  * was not distributed with this source code in the LICENSE file, you can
7  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8  * Media Patent License 1.0 was not distributed with this source code in the
9  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10  */
11 
12 #include <memory.h>
13 #include <math.h>
14 #include <time.h>
15 #include <stdio.h>
16 #include <stdbool.h>
17 #include <string.h>
18 #include <assert.h>
19 
20 #include "aom_dsp/flow_estimation/ransac.h"
21 #include "aom_dsp/mathutils.h"
22 #include "aom_mem/aom_mem.h"
23 
24 // TODO(rachelbarker): Remove dependence on code in av1/encoder/
25 #include "av1/encoder/random.h"
26 
27 #define MAX_MINPTS 4
28 #define MINPTS_MULTIPLIER 5
29 
30 #define INLIER_THRESHOLD 1.25
31 #define INLIER_THRESHOLD_SQUARED (INLIER_THRESHOLD * INLIER_THRESHOLD)
32 
33 // Number of initial models to generate
34 #define NUM_TRIALS 20
35 
36 // Number of times to refine the best model found
37 #define NUM_REFINES 5
38 
39 // Flag to enable functions for finding TRANSLATION type models.
40 //
41 // These modes are not considered currently due to a spec bug (see comments
42 // in gm_get_motion_vector() in av1/common/mv.h). Thus we don't need to compile
43 // the corresponding search functions, but it is nice to keep the source around
44 // but disabled, for completeness.
45 #define ALLOW_TRANSLATION_MODELS 0
46 
47 typedef struct {
48   int num_inliers;
49   double sse;  // Sum of squared errors of inliers
50   int *inlier_indices;
51 } RANSAC_MOTION;
52 
53 ////////////////////////////////////////////////////////////////////////////////
54 // ransac
55 typedef bool (*FindTransformationFunc)(const Correspondence *points,
56                                        const int *indices, int num_indices,
57                                        double *params);
58 typedef void (*ScoreModelFunc)(const double *mat, const Correspondence *points,
59                                int num_points, RANSAC_MOTION *model);
60 
61 // vtable-like structure which stores all of the information needed by RANSAC
62 // for a particular model type
63 typedef struct {
64   FindTransformationFunc find_transformation;
65   ScoreModelFunc score_model;
66 
67   // The minimum number of points which can be passed to find_transformation
68   // to generate a model.
69   //
70   // This should be set as small as possible. This is due to an observation
71   // from section 4 of "Optimal Ransac" by A. Hast, J. Nysjö and
72   // A. Marchetti (https://dspace5.zcu.cz/bitstream/11025/6869/1/Hast.pdf):
73   // using the minimum possible number of points in the initial model maximizes
74   // the chance that all of the selected points are inliers.
75   //
76   // That paper proposes a method which can deal with models which are
77   // contaminated by outliers, which helps in cases where the inlier fraction
78   // is low. However, for our purposes, global motion only gives significant
79   // gains when the inlier fraction is high.
80   //
81   // So we do not use the method from this paper, but we do find that
82   // minimizing the number of points used for initial model fitting helps
83   // make the best use of the limited number of models we consider.
84   int minpts;
85 } RansacModelInfo;
86 
87 #if ALLOW_TRANSLATION_MODELS
score_translation(const double * mat,const Correspondence * points,int num_points,RANSAC_MOTION * model)88 static void score_translation(const double *mat, const Correspondence *points,
89                               int num_points, RANSAC_MOTION *model) {
90   model->num_inliers = 0;
91   model->sse = 0.0;
92 
93   for (int i = 0; i < num_points; ++i) {
94     const double x1 = points[i].x;
95     const double y1 = points[i].y;
96     const double x2 = points[i].rx;
97     const double y2 = points[i].ry;
98 
99     const double proj_x = x1 + mat[0];
100     const double proj_y = y1 + mat[1];
101 
102     const double dx = proj_x - x2;
103     const double dy = proj_y - y2;
104     const double sse = dx * dx + dy * dy;
105 
106     if (sse < INLIER_THRESHOLD_SQUARED) {
107       model->inlier_indices[model->num_inliers++] = i;
108       model->sse += sse;
109     }
110   }
111 }
112 #endif  // ALLOW_TRANSLATION_MODELS
113 
score_affine(const double * mat,const Correspondence * points,int num_points,RANSAC_MOTION * model)114 static void score_affine(const double *mat, const Correspondence *points,
115                          int num_points, RANSAC_MOTION *model) {
116   model->num_inliers = 0;
117   model->sse = 0.0;
118 
119   for (int i = 0; i < num_points; ++i) {
120     const double x1 = points[i].x;
121     const double y1 = points[i].y;
122     const double x2 = points[i].rx;
123     const double y2 = points[i].ry;
124 
125     const double proj_x = mat[2] * x1 + mat[3] * y1 + mat[0];
126     const double proj_y = mat[4] * x1 + mat[5] * y1 + mat[1];
127 
128     const double dx = proj_x - x2;
129     const double dy = proj_y - y2;
130     const double sse = dx * dx + dy * dy;
131 
132     if (sse < INLIER_THRESHOLD_SQUARED) {
133       model->inlier_indices[model->num_inliers++] = i;
134       model->sse += sse;
135     }
136   }
137 }
138 
139 #if ALLOW_TRANSLATION_MODELS
find_translation(const Correspondence * points,const int * indices,int num_indices,double * params)140 static bool find_translation(const Correspondence *points, const int *indices,
141                              int num_indices, double *params) {
142   double sumx = 0;
143   double sumy = 0;
144 
145   for (int i = 0; i < num_indices; ++i) {
146     int index = indices[i];
147     const double sx = points[index].x;
148     const double sy = points[index].y;
149     const double dx = points[index].rx;
150     const double dy = points[index].ry;
151 
152     sumx += dx - sx;
153     sumy += dy - sy;
154   }
155 
156   params[0] = sumx / np;
157   params[1] = sumy / np;
158   params[2] = 1;
159   params[3] = 0;
160   params[4] = 0;
161   params[5] = 1;
162   return true;
163 }
164 #endif  // ALLOW_TRANSLATION_MODELS
165 
find_rotzoom(const Correspondence * points,const int * indices,int num_indices,double * params)166 static bool find_rotzoom(const Correspondence *points, const int *indices,
167                          int num_indices, double *params) {
168   const int n = 4;    // Size of least-squares problem
169   double mat[4 * 4];  // Accumulator for A'A
170   double y[4];        // Accumulator for A'b
171   double a[4];        // Single row of A
172   double b;           // Single element of b
173 
174   least_squares_init(mat, y, n);
175   for (int i = 0; i < num_indices; ++i) {
176     int index = indices[i];
177     const double sx = points[index].x;
178     const double sy = points[index].y;
179     const double dx = points[index].rx;
180     const double dy = points[index].ry;
181 
182     a[0] = 1;
183     a[1] = 0;
184     a[2] = sx;
185     a[3] = sy;
186     b = dx;
187     least_squares_accumulate(mat, y, a, b, n);
188 
189     a[0] = 0;
190     a[1] = 1;
191     a[2] = sy;
192     a[3] = -sx;
193     b = dy;
194     least_squares_accumulate(mat, y, a, b, n);
195   }
196 
197   // Fill in params[0] .. params[3] with output model
198   if (!least_squares_solve(mat, y, params, n)) {
199     return false;
200   }
201 
202   // Fill in remaining parameters
203   params[4] = -params[3];
204   params[5] = params[2];
205 
206   return true;
207 }
208 
find_affine(const Correspondence * points,const int * indices,int num_indices,double * params)209 static bool find_affine(const Correspondence *points, const int *indices,
210                         int num_indices, double *params) {
211   // Note: The least squares problem for affine models is 6-dimensional,
212   // but it splits into two independent 3-dimensional subproblems.
213   // Solving these two subproblems separately and recombining at the end
214   // results in less total computation than solving the 6-dimensional
215   // problem directly.
216   //
217   // The two subproblems correspond to all the parameters which contribute
218   // to the x output of the model, and all the parameters which contribute
219   // to the y output, respectively.
220 
221   const int n = 3;       // Size of each least-squares problem
222   double mat[2][3 * 3];  // Accumulator for A'A
223   double y[2][3];        // Accumulator for A'b
224   double x[2][3];        // Output vector
225   double a[2][3];        // Single row of A
226   double b[2];           // Single element of b
227 
228   least_squares_init(mat[0], y[0], n);
229   least_squares_init(mat[1], y[1], n);
230   for (int i = 0; i < num_indices; ++i) {
231     int index = indices[i];
232     const double sx = points[index].x;
233     const double sy = points[index].y;
234     const double dx = points[index].rx;
235     const double dy = points[index].ry;
236 
237     a[0][0] = 1;
238     a[0][1] = sx;
239     a[0][2] = sy;
240     b[0] = dx;
241     least_squares_accumulate(mat[0], y[0], a[0], b[0], n);
242 
243     a[1][0] = 1;
244     a[1][1] = sx;
245     a[1][2] = sy;
246     b[1] = dy;
247     least_squares_accumulate(mat[1], y[1], a[1], b[1], n);
248   }
249 
250   if (!least_squares_solve(mat[0], y[0], x[0], n)) {
251     return false;
252   }
253   if (!least_squares_solve(mat[1], y[1], x[1], n)) {
254     return false;
255   }
256 
257   // Rearrange least squares result to form output model
258   params[0] = x[0][0];
259   params[1] = x[1][0];
260   params[2] = x[0][1];
261   params[3] = x[0][2];
262   params[4] = x[1][1];
263   params[5] = x[1][2];
264 
265   return true;
266 }
267 
268 // Return -1 if 'a' is a better motion, 1 if 'b' is better, 0 otherwise.
compare_motions(const void * arg_a,const void * arg_b)269 static int compare_motions(const void *arg_a, const void *arg_b) {
270   const RANSAC_MOTION *motion_a = (RANSAC_MOTION *)arg_a;
271   const RANSAC_MOTION *motion_b = (RANSAC_MOTION *)arg_b;
272 
273   if (motion_a->num_inliers > motion_b->num_inliers) return -1;
274   if (motion_a->num_inliers < motion_b->num_inliers) return 1;
275   if (motion_a->sse < motion_b->sse) return -1;
276   if (motion_a->sse > motion_b->sse) return 1;
277   return 0;
278 }
279 
is_better_motion(const RANSAC_MOTION * motion_a,const RANSAC_MOTION * motion_b)280 static bool is_better_motion(const RANSAC_MOTION *motion_a,
281                              const RANSAC_MOTION *motion_b) {
282   return compare_motions(motion_a, motion_b) < 0;
283 }
284 
285 // Returns true on success, false on error
ransac_internal(const Correspondence * matched_points,int npoints,MotionModel * motion_models,int num_desired_motions,const RansacModelInfo * model_info,bool * mem_alloc_failed)286 static bool ransac_internal(const Correspondence *matched_points, int npoints,
287                             MotionModel *motion_models, int num_desired_motions,
288                             const RansacModelInfo *model_info,
289                             bool *mem_alloc_failed) {
290   assert(npoints >= 0);
291   int i = 0;
292   int minpts = model_info->minpts;
293   bool ret_val = true;
294 
295   unsigned int seed = (unsigned int)npoints;
296 
297   int indices[MAX_MINPTS] = { 0 };
298 
299   // Store information for the num_desired_motions best transformations found
300   // and the worst motion among them, as well as the motion currently under
301   // consideration.
302   RANSAC_MOTION *motions, *worst_kept_motion = NULL;
303   RANSAC_MOTION current_motion;
304 
305   // Store the parameters and the indices of the inlier points for the motion
306   // currently under consideration.
307   double params_this_motion[MAX_PARAMDIM];
308 
309   // Initialize output models, as a fallback in case we can't find a model
310   for (i = 0; i < num_desired_motions; i++) {
311     memcpy(motion_models[i].params, kIdentityParams,
312            MAX_PARAMDIM * sizeof(*(motion_models[i].params)));
313     motion_models[i].num_inliers = 0;
314   }
315 
316   if (npoints < minpts * MINPTS_MULTIPLIER || npoints == 0) {
317     return false;
318   }
319 
320   int min_inliers = AOMMAX((int)(MIN_INLIER_PROB * npoints), minpts);
321 
322   motions =
323       (RANSAC_MOTION *)aom_calloc(num_desired_motions, sizeof(RANSAC_MOTION));
324 
325   // Allocate one large buffer which will be carved up to store the inlier
326   // indices for the current motion plus the num_desired_motions many
327   // output models
328   // This allows us to keep the allocation/deallocation logic simple, without
329   // having to (for example) check that `motions` is non-null before allocating
330   // the inlier arrays
331   int *inlier_buffer = (int *)aom_malloc(sizeof(*inlier_buffer) * npoints *
332                                          (num_desired_motions + 1));
333 
334   if (!(motions && inlier_buffer)) {
335     ret_val = false;
336     *mem_alloc_failed = true;
337     goto finish_ransac;
338   }
339 
340   // Once all our allocations are known-good, we can fill in our structures
341   worst_kept_motion = motions;
342 
343   for (i = 0; i < num_desired_motions; ++i) {
344     motions[i].inlier_indices = inlier_buffer + i * npoints;
345   }
346   memset(&current_motion, 0, sizeof(current_motion));
347   current_motion.inlier_indices = inlier_buffer + num_desired_motions * npoints;
348 
349   for (int trial_count = 0; trial_count < NUM_TRIALS; trial_count++) {
350     lcg_pick(npoints, minpts, indices, &seed);
351 
352     if (!model_info->find_transformation(matched_points, indices, minpts,
353                                          params_this_motion)) {
354       continue;
355     }
356 
357     model_info->score_model(params_this_motion, matched_points, npoints,
358                             &current_motion);
359 
360     if (current_motion.num_inliers < min_inliers) {
361       // Reject models with too few inliers
362       continue;
363     }
364 
365     if (is_better_motion(&current_motion, worst_kept_motion)) {
366       // This motion is better than the worst currently kept motion. Remember
367       // the inlier points and sse. The parameters for each kept motion
368       // will be recomputed later using only the inliers.
369       worst_kept_motion->num_inliers = current_motion.num_inliers;
370       worst_kept_motion->sse = current_motion.sse;
371 
372       // Rather than copying the (potentially many) inlier indices from
373       // current_motion.inlier_indices to worst_kept_motion->inlier_indices,
374       // we can swap the underlying pointers.
375       //
376       // This is okay because the next time current_motion.inlier_indices
377       // is used will be in the next trial, where we ignore its previous
378       // contents anyway. And both arrays will be deallocated together at the
379       // end of this function, so there are no lifetime issues.
380       int *tmp = worst_kept_motion->inlier_indices;
381       worst_kept_motion->inlier_indices = current_motion.inlier_indices;
382       current_motion.inlier_indices = tmp;
383 
384       // Determine the new worst kept motion and its num_inliers and sse.
385       for (i = 0; i < num_desired_motions; ++i) {
386         if (is_better_motion(worst_kept_motion, &motions[i])) {
387           worst_kept_motion = &motions[i];
388         }
389       }
390     }
391   }
392 
393   // Sort the motions, best first.
394   qsort(motions, num_desired_motions, sizeof(RANSAC_MOTION), compare_motions);
395 
396   // Refine each of the best N models using iterative estimation.
397   //
398   // The idea here is loosely based on the iterative method from
399   // "Locally Optimized RANSAC" by O. Chum, J. Matas and Josef Kittler:
400   // https://cmp.felk.cvut.cz/ftp/articles/matas/chum-dagm03.pdf
401   //
402   // However, we implement a simpler version than their proposal, and simply
403   // refit the model repeatedly until the number of inliers stops increasing,
404   // with a cap on the number of iterations to defend against edge cases which
405   // only improve very slowly.
406   for (i = 0; i < num_desired_motions; ++i) {
407     if (motions[i].num_inliers <= 0) {
408       // Output model has already been initialized to the identity model,
409       // so just skip setup
410       continue;
411     }
412 
413     bool bad_model = false;
414     for (int refine_count = 0; refine_count < NUM_REFINES; refine_count++) {
415       int num_inliers = motions[i].num_inliers;
416       assert(num_inliers >= min_inliers);
417 
418       if (!model_info->find_transformation(matched_points,
419                                            motions[i].inlier_indices,
420                                            num_inliers, params_this_motion)) {
421         // In the unlikely event that this model fitting fails, we don't have a
422         // good fallback. So leave this model set to the identity model
423         bad_model = true;
424         break;
425       }
426 
427       // Score the newly generated model
428       model_info->score_model(params_this_motion, matched_points, npoints,
429                               &current_motion);
430 
431       // At this point, there are three possibilities:
432       // 1) If we found more inliers, keep refining.
433       // 2) If we found the same number of inliers but a lower SSE, we want to
434       //    keep the new model, but further refinement is unlikely to gain much.
435       //    So commit to this new model
436       // 3) It is possible, but very unlikely, that the new model will have
437       //    fewer inliers. If it does happen, we probably just lost a few
438       //    borderline inliers. So treat the same as case (2).
439       if (current_motion.num_inliers > motions[i].num_inliers) {
440         motions[i].num_inliers = current_motion.num_inliers;
441         motions[i].sse = current_motion.sse;
442         int *tmp = motions[i].inlier_indices;
443         motions[i].inlier_indices = current_motion.inlier_indices;
444         current_motion.inlier_indices = tmp;
445       } else {
446         // Refined model is no better, so stop
447         // This shouldn't be significantly worse than the previous model,
448         // so it's fine to use the parameters in params_this_motion.
449         // This saves us from having to cache the previous iteration's params.
450         break;
451       }
452     }
453 
454     if (bad_model) continue;
455 
456     // Fill in output struct
457     memcpy(motion_models[i].params, params_this_motion,
458            MAX_PARAMDIM * sizeof(*motion_models[i].params));
459     for (int j = 0; j < motions[i].num_inliers; j++) {
460       int index = motions[i].inlier_indices[j];
461       const Correspondence *corr = &matched_points[index];
462       motion_models[i].inliers[2 * j + 0] = (int)rint(corr->x);
463       motion_models[i].inliers[2 * j + 1] = (int)rint(corr->y);
464     }
465     motion_models[i].num_inliers = motions[i].num_inliers;
466   }
467 
468 finish_ransac:
469   aom_free(inlier_buffer);
470   aom_free(motions);
471 
472   return ret_val;
473 }
474 
475 static const RansacModelInfo ransac_model_info[TRANS_TYPES] = {
476   // IDENTITY
477   { NULL, NULL, 0 },
478 // TRANSLATION
479 #if ALLOW_TRANSLATION_MODELS
480   { find_translation, score_translation, 1 },
481 #else
482   { NULL, NULL, 0 },
483 #endif
484   // ROTZOOM
485   { find_rotzoom, score_affine, 2 },
486   // AFFINE
487   { find_affine, score_affine, 3 },
488 };
489 
490 // Returns true on success, false on error
ransac(const Correspondence * matched_points,int npoints,TransformationType type,MotionModel * motion_models,int num_desired_motions,bool * mem_alloc_failed)491 bool ransac(const Correspondence *matched_points, int npoints,
492             TransformationType type, MotionModel *motion_models,
493             int num_desired_motions, bool *mem_alloc_failed) {
494 #if ALLOW_TRANSLATION_MODELS
495   assert(type > IDENTITY && type < TRANS_TYPES);
496 #else
497   assert(type > TRANSLATION && type < TRANS_TYPES);
498 #endif  // ALLOW_TRANSLATION_MODELS
499 
500   return ransac_internal(matched_points, npoints, motion_models,
501                          num_desired_motions, &ransac_model_info[type],
502                          mem_alloc_failed);
503 }
504