1 /*
2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved.
3 *
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 */
11
12 #include <memory.h>
13 #include <math.h>
14 #include <time.h>
15 #include <stdio.h>
16 #include <stdbool.h>
17 #include <string.h>
18 #include <assert.h>
19
20 #include "aom_dsp/flow_estimation/ransac.h"
21 #include "aom_dsp/mathutils.h"
22 #include "aom_mem/aom_mem.h"
23
24 // TODO(rachelbarker): Remove dependence on code in av1/encoder/
25 #include "av1/encoder/random.h"
26
27 #define MAX_MINPTS 4
28 #define MINPTS_MULTIPLIER 5
29
30 #define INLIER_THRESHOLD 1.25
31 #define INLIER_THRESHOLD_SQUARED (INLIER_THRESHOLD * INLIER_THRESHOLD)
32
33 // Number of initial models to generate
34 #define NUM_TRIALS 20
35
36 // Number of times to refine the best model found
37 #define NUM_REFINES 5
38
39 // Flag to enable functions for finding TRANSLATION type models.
40 //
41 // These modes are not considered currently due to a spec bug (see comments
42 // in gm_get_motion_vector() in av1/common/mv.h). Thus we don't need to compile
43 // the corresponding search functions, but it is nice to keep the source around
44 // but disabled, for completeness.
45 #define ALLOW_TRANSLATION_MODELS 0
46
47 typedef struct {
48 int num_inliers;
49 double sse; // Sum of squared errors of inliers
50 int *inlier_indices;
51 } RANSAC_MOTION;
52
53 ////////////////////////////////////////////////////////////////////////////////
54 // ransac
55 typedef bool (*FindTransformationFunc)(const Correspondence *points,
56 const int *indices, int num_indices,
57 double *params);
58 typedef void (*ScoreModelFunc)(const double *mat, const Correspondence *points,
59 int num_points, RANSAC_MOTION *model);
60
61 // vtable-like structure which stores all of the information needed by RANSAC
62 // for a particular model type
63 typedef struct {
64 FindTransformationFunc find_transformation;
65 ScoreModelFunc score_model;
66
67 // The minimum number of points which can be passed to find_transformation
68 // to generate a model.
69 //
70 // This should be set as small as possible. This is due to an observation
71 // from section 4 of "Optimal Ransac" by A. Hast, J. Nysjö and
72 // A. Marchetti (https://dspace5.zcu.cz/bitstream/11025/6869/1/Hast.pdf):
73 // using the minimum possible number of points in the initial model maximizes
74 // the chance that all of the selected points are inliers.
75 //
76 // That paper proposes a method which can deal with models which are
77 // contaminated by outliers, which helps in cases where the inlier fraction
78 // is low. However, for our purposes, global motion only gives significant
79 // gains when the inlier fraction is high.
80 //
81 // So we do not use the method from this paper, but we do find that
82 // minimizing the number of points used for initial model fitting helps
83 // make the best use of the limited number of models we consider.
84 int minpts;
85 } RansacModelInfo;
86
87 #if ALLOW_TRANSLATION_MODELS
score_translation(const double * mat,const Correspondence * points,int num_points,RANSAC_MOTION * model)88 static void score_translation(const double *mat, const Correspondence *points,
89 int num_points, RANSAC_MOTION *model) {
90 model->num_inliers = 0;
91 model->sse = 0.0;
92
93 for (int i = 0; i < num_points; ++i) {
94 const double x1 = points[i].x;
95 const double y1 = points[i].y;
96 const double x2 = points[i].rx;
97 const double y2 = points[i].ry;
98
99 const double proj_x = x1 + mat[0];
100 const double proj_y = y1 + mat[1];
101
102 const double dx = proj_x - x2;
103 const double dy = proj_y - y2;
104 const double sse = dx * dx + dy * dy;
105
106 if (sse < INLIER_THRESHOLD_SQUARED) {
107 model->inlier_indices[model->num_inliers++] = i;
108 model->sse += sse;
109 }
110 }
111 }
112 #endif // ALLOW_TRANSLATION_MODELS
113
score_affine(const double * mat,const Correspondence * points,int num_points,RANSAC_MOTION * model)114 static void score_affine(const double *mat, const Correspondence *points,
115 int num_points, RANSAC_MOTION *model) {
116 model->num_inliers = 0;
117 model->sse = 0.0;
118
119 for (int i = 0; i < num_points; ++i) {
120 const double x1 = points[i].x;
121 const double y1 = points[i].y;
122 const double x2 = points[i].rx;
123 const double y2 = points[i].ry;
124
125 const double proj_x = mat[2] * x1 + mat[3] * y1 + mat[0];
126 const double proj_y = mat[4] * x1 + mat[5] * y1 + mat[1];
127
128 const double dx = proj_x - x2;
129 const double dy = proj_y - y2;
130 const double sse = dx * dx + dy * dy;
131
132 if (sse < INLIER_THRESHOLD_SQUARED) {
133 model->inlier_indices[model->num_inliers++] = i;
134 model->sse += sse;
135 }
136 }
137 }
138
139 #if ALLOW_TRANSLATION_MODELS
find_translation(const Correspondence * points,const int * indices,int num_indices,double * params)140 static bool find_translation(const Correspondence *points, const int *indices,
141 int num_indices, double *params) {
142 double sumx = 0;
143 double sumy = 0;
144
145 for (int i = 0; i < num_indices; ++i) {
146 int index = indices[i];
147 const double sx = points[index].x;
148 const double sy = points[index].y;
149 const double dx = points[index].rx;
150 const double dy = points[index].ry;
151
152 sumx += dx - sx;
153 sumy += dy - sy;
154 }
155
156 params[0] = sumx / np;
157 params[1] = sumy / np;
158 params[2] = 1;
159 params[3] = 0;
160 params[4] = 0;
161 params[5] = 1;
162 return true;
163 }
164 #endif // ALLOW_TRANSLATION_MODELS
165
find_rotzoom(const Correspondence * points,const int * indices,int num_indices,double * params)166 static bool find_rotzoom(const Correspondence *points, const int *indices,
167 int num_indices, double *params) {
168 const int n = 4; // Size of least-squares problem
169 double mat[4 * 4]; // Accumulator for A'A
170 double y[4]; // Accumulator for A'b
171 double a[4]; // Single row of A
172 double b; // Single element of b
173
174 least_squares_init(mat, y, n);
175 for (int i = 0; i < num_indices; ++i) {
176 int index = indices[i];
177 const double sx = points[index].x;
178 const double sy = points[index].y;
179 const double dx = points[index].rx;
180 const double dy = points[index].ry;
181
182 a[0] = 1;
183 a[1] = 0;
184 a[2] = sx;
185 a[3] = sy;
186 b = dx;
187 least_squares_accumulate(mat, y, a, b, n);
188
189 a[0] = 0;
190 a[1] = 1;
191 a[2] = sy;
192 a[3] = -sx;
193 b = dy;
194 least_squares_accumulate(mat, y, a, b, n);
195 }
196
197 // Fill in params[0] .. params[3] with output model
198 if (!least_squares_solve(mat, y, params, n)) {
199 return false;
200 }
201
202 // Fill in remaining parameters
203 params[4] = -params[3];
204 params[5] = params[2];
205
206 return true;
207 }
208
find_affine(const Correspondence * points,const int * indices,int num_indices,double * params)209 static bool find_affine(const Correspondence *points, const int *indices,
210 int num_indices, double *params) {
211 // Note: The least squares problem for affine models is 6-dimensional,
212 // but it splits into two independent 3-dimensional subproblems.
213 // Solving these two subproblems separately and recombining at the end
214 // results in less total computation than solving the 6-dimensional
215 // problem directly.
216 //
217 // The two subproblems correspond to all the parameters which contribute
218 // to the x output of the model, and all the parameters which contribute
219 // to the y output, respectively.
220
221 const int n = 3; // Size of each least-squares problem
222 double mat[2][3 * 3]; // Accumulator for A'A
223 double y[2][3]; // Accumulator for A'b
224 double x[2][3]; // Output vector
225 double a[2][3]; // Single row of A
226 double b[2]; // Single element of b
227
228 least_squares_init(mat[0], y[0], n);
229 least_squares_init(mat[1], y[1], n);
230 for (int i = 0; i < num_indices; ++i) {
231 int index = indices[i];
232 const double sx = points[index].x;
233 const double sy = points[index].y;
234 const double dx = points[index].rx;
235 const double dy = points[index].ry;
236
237 a[0][0] = 1;
238 a[0][1] = sx;
239 a[0][2] = sy;
240 b[0] = dx;
241 least_squares_accumulate(mat[0], y[0], a[0], b[0], n);
242
243 a[1][0] = 1;
244 a[1][1] = sx;
245 a[1][2] = sy;
246 b[1] = dy;
247 least_squares_accumulate(mat[1], y[1], a[1], b[1], n);
248 }
249
250 if (!least_squares_solve(mat[0], y[0], x[0], n)) {
251 return false;
252 }
253 if (!least_squares_solve(mat[1], y[1], x[1], n)) {
254 return false;
255 }
256
257 // Rearrange least squares result to form output model
258 params[0] = x[0][0];
259 params[1] = x[1][0];
260 params[2] = x[0][1];
261 params[3] = x[0][2];
262 params[4] = x[1][1];
263 params[5] = x[1][2];
264
265 return true;
266 }
267
268 // Return -1 if 'a' is a better motion, 1 if 'b' is better, 0 otherwise.
compare_motions(const void * arg_a,const void * arg_b)269 static int compare_motions(const void *arg_a, const void *arg_b) {
270 const RANSAC_MOTION *motion_a = (RANSAC_MOTION *)arg_a;
271 const RANSAC_MOTION *motion_b = (RANSAC_MOTION *)arg_b;
272
273 if (motion_a->num_inliers > motion_b->num_inliers) return -1;
274 if (motion_a->num_inliers < motion_b->num_inliers) return 1;
275 if (motion_a->sse < motion_b->sse) return -1;
276 if (motion_a->sse > motion_b->sse) return 1;
277 return 0;
278 }
279
is_better_motion(const RANSAC_MOTION * motion_a,const RANSAC_MOTION * motion_b)280 static bool is_better_motion(const RANSAC_MOTION *motion_a,
281 const RANSAC_MOTION *motion_b) {
282 return compare_motions(motion_a, motion_b) < 0;
283 }
284
285 // Returns true on success, false on error
ransac_internal(const Correspondence * matched_points,int npoints,MotionModel * motion_models,int num_desired_motions,const RansacModelInfo * model_info,bool * mem_alloc_failed)286 static bool ransac_internal(const Correspondence *matched_points, int npoints,
287 MotionModel *motion_models, int num_desired_motions,
288 const RansacModelInfo *model_info,
289 bool *mem_alloc_failed) {
290 assert(npoints >= 0);
291 int i = 0;
292 int minpts = model_info->minpts;
293 bool ret_val = true;
294
295 unsigned int seed = (unsigned int)npoints;
296
297 int indices[MAX_MINPTS] = { 0 };
298
299 // Store information for the num_desired_motions best transformations found
300 // and the worst motion among them, as well as the motion currently under
301 // consideration.
302 RANSAC_MOTION *motions, *worst_kept_motion = NULL;
303 RANSAC_MOTION current_motion;
304
305 // Store the parameters and the indices of the inlier points for the motion
306 // currently under consideration.
307 double params_this_motion[MAX_PARAMDIM];
308
309 // Initialize output models, as a fallback in case we can't find a model
310 for (i = 0; i < num_desired_motions; i++) {
311 memcpy(motion_models[i].params, kIdentityParams,
312 MAX_PARAMDIM * sizeof(*(motion_models[i].params)));
313 motion_models[i].num_inliers = 0;
314 }
315
316 if (npoints < minpts * MINPTS_MULTIPLIER || npoints == 0) {
317 return false;
318 }
319
320 int min_inliers = AOMMAX((int)(MIN_INLIER_PROB * npoints), minpts);
321
322 motions =
323 (RANSAC_MOTION *)aom_calloc(num_desired_motions, sizeof(RANSAC_MOTION));
324
325 // Allocate one large buffer which will be carved up to store the inlier
326 // indices for the current motion plus the num_desired_motions many
327 // output models
328 // This allows us to keep the allocation/deallocation logic simple, without
329 // having to (for example) check that `motions` is non-null before allocating
330 // the inlier arrays
331 int *inlier_buffer = (int *)aom_malloc(sizeof(*inlier_buffer) * npoints *
332 (num_desired_motions + 1));
333
334 if (!(motions && inlier_buffer)) {
335 ret_val = false;
336 *mem_alloc_failed = true;
337 goto finish_ransac;
338 }
339
340 // Once all our allocations are known-good, we can fill in our structures
341 worst_kept_motion = motions;
342
343 for (i = 0; i < num_desired_motions; ++i) {
344 motions[i].inlier_indices = inlier_buffer + i * npoints;
345 }
346 memset(¤t_motion, 0, sizeof(current_motion));
347 current_motion.inlier_indices = inlier_buffer + num_desired_motions * npoints;
348
349 for (int trial_count = 0; trial_count < NUM_TRIALS; trial_count++) {
350 lcg_pick(npoints, minpts, indices, &seed);
351
352 if (!model_info->find_transformation(matched_points, indices, minpts,
353 params_this_motion)) {
354 continue;
355 }
356
357 model_info->score_model(params_this_motion, matched_points, npoints,
358 ¤t_motion);
359
360 if (current_motion.num_inliers < min_inliers) {
361 // Reject models with too few inliers
362 continue;
363 }
364
365 if (is_better_motion(¤t_motion, worst_kept_motion)) {
366 // This motion is better than the worst currently kept motion. Remember
367 // the inlier points and sse. The parameters for each kept motion
368 // will be recomputed later using only the inliers.
369 worst_kept_motion->num_inliers = current_motion.num_inliers;
370 worst_kept_motion->sse = current_motion.sse;
371
372 // Rather than copying the (potentially many) inlier indices from
373 // current_motion.inlier_indices to worst_kept_motion->inlier_indices,
374 // we can swap the underlying pointers.
375 //
376 // This is okay because the next time current_motion.inlier_indices
377 // is used will be in the next trial, where we ignore its previous
378 // contents anyway. And both arrays will be deallocated together at the
379 // end of this function, so there are no lifetime issues.
380 int *tmp = worst_kept_motion->inlier_indices;
381 worst_kept_motion->inlier_indices = current_motion.inlier_indices;
382 current_motion.inlier_indices = tmp;
383
384 // Determine the new worst kept motion and its num_inliers and sse.
385 for (i = 0; i < num_desired_motions; ++i) {
386 if (is_better_motion(worst_kept_motion, &motions[i])) {
387 worst_kept_motion = &motions[i];
388 }
389 }
390 }
391 }
392
393 // Sort the motions, best first.
394 qsort(motions, num_desired_motions, sizeof(RANSAC_MOTION), compare_motions);
395
396 // Refine each of the best N models using iterative estimation.
397 //
398 // The idea here is loosely based on the iterative method from
399 // "Locally Optimized RANSAC" by O. Chum, J. Matas and Josef Kittler:
400 // https://cmp.felk.cvut.cz/ftp/articles/matas/chum-dagm03.pdf
401 //
402 // However, we implement a simpler version than their proposal, and simply
403 // refit the model repeatedly until the number of inliers stops increasing,
404 // with a cap on the number of iterations to defend against edge cases which
405 // only improve very slowly.
406 for (i = 0; i < num_desired_motions; ++i) {
407 if (motions[i].num_inliers <= 0) {
408 // Output model has already been initialized to the identity model,
409 // so just skip setup
410 continue;
411 }
412
413 bool bad_model = false;
414 for (int refine_count = 0; refine_count < NUM_REFINES; refine_count++) {
415 int num_inliers = motions[i].num_inliers;
416 assert(num_inliers >= min_inliers);
417
418 if (!model_info->find_transformation(matched_points,
419 motions[i].inlier_indices,
420 num_inliers, params_this_motion)) {
421 // In the unlikely event that this model fitting fails, we don't have a
422 // good fallback. So leave this model set to the identity model
423 bad_model = true;
424 break;
425 }
426
427 // Score the newly generated model
428 model_info->score_model(params_this_motion, matched_points, npoints,
429 ¤t_motion);
430
431 // At this point, there are three possibilities:
432 // 1) If we found more inliers, keep refining.
433 // 2) If we found the same number of inliers but a lower SSE, we want to
434 // keep the new model, but further refinement is unlikely to gain much.
435 // So commit to this new model
436 // 3) It is possible, but very unlikely, that the new model will have
437 // fewer inliers. If it does happen, we probably just lost a few
438 // borderline inliers. So treat the same as case (2).
439 if (current_motion.num_inliers > motions[i].num_inliers) {
440 motions[i].num_inliers = current_motion.num_inliers;
441 motions[i].sse = current_motion.sse;
442 int *tmp = motions[i].inlier_indices;
443 motions[i].inlier_indices = current_motion.inlier_indices;
444 current_motion.inlier_indices = tmp;
445 } else {
446 // Refined model is no better, so stop
447 // This shouldn't be significantly worse than the previous model,
448 // so it's fine to use the parameters in params_this_motion.
449 // This saves us from having to cache the previous iteration's params.
450 break;
451 }
452 }
453
454 if (bad_model) continue;
455
456 // Fill in output struct
457 memcpy(motion_models[i].params, params_this_motion,
458 MAX_PARAMDIM * sizeof(*motion_models[i].params));
459 for (int j = 0; j < motions[i].num_inliers; j++) {
460 int index = motions[i].inlier_indices[j];
461 const Correspondence *corr = &matched_points[index];
462 motion_models[i].inliers[2 * j + 0] = (int)rint(corr->x);
463 motion_models[i].inliers[2 * j + 1] = (int)rint(corr->y);
464 }
465 motion_models[i].num_inliers = motions[i].num_inliers;
466 }
467
468 finish_ransac:
469 aom_free(inlier_buffer);
470 aom_free(motions);
471
472 return ret_val;
473 }
474
475 static const RansacModelInfo ransac_model_info[TRANS_TYPES] = {
476 // IDENTITY
477 { NULL, NULL, 0 },
478 // TRANSLATION
479 #if ALLOW_TRANSLATION_MODELS
480 { find_translation, score_translation, 1 },
481 #else
482 { NULL, NULL, 0 },
483 #endif
484 // ROTZOOM
485 { find_rotzoom, score_affine, 2 },
486 // AFFINE
487 { find_affine, score_affine, 3 },
488 };
489
490 // Returns true on success, false on error
ransac(const Correspondence * matched_points,int npoints,TransformationType type,MotionModel * motion_models,int num_desired_motions,bool * mem_alloc_failed)491 bool ransac(const Correspondence *matched_points, int npoints,
492 TransformationType type, MotionModel *motion_models,
493 int num_desired_motions, bool *mem_alloc_failed) {
494 #if ALLOW_TRANSLATION_MODELS
495 assert(type > IDENTITY && type < TRANS_TYPES);
496 #else
497 assert(type > TRANSLATION && type < TRANS_TYPES);
498 #endif // ALLOW_TRANSLATION_MODELS
499
500 return ransac_internal(matched_points, npoints, motion_models,
501 num_desired_motions, &ransac_model_info[type],
502 mem_alloc_failed);
503 }
504