xref: /aosp_15_r20/external/libaom/av1/encoder/saliency_map.c (revision 77c1e3ccc04c968bd2bc212e87364f250e820521)
1 /*
2  * Copyright (c) 2023, Alliance for Open Media. All rights reserved.
3  *
4  * This source code is subject to the terms of the BSD 2 Clause License and
5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6  * was not distributed with this source code in the LICENSE file, you can
7  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8  * Media Patent License 1.0 was not distributed with this source code in the
9  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10  */
11 #include <assert.h>
12 #include <float.h>
13 #include <string.h>
14 
15 #include "av1/encoder/encoder.h"
16 #include "av1/encoder/encoder_utils.h"
17 #include "av1/encoder/firstpass.h"
18 #include "av1/encoder/rdopt.h"
19 #include "av1/encoder/saliency_map.h"
20 
21 // The Gabor filter is generated by setting the parameters as:
22 // ksize = 9
23 // sigma = 1
24 // theta = y*np.pi/4, where y /in {0, 1, 2, 3}, i.e., 0, 45, 90, 135 degree
25 // lambda1 = 1
26 // gamma=0.8
27 // phi =0
28 static const double kGaborFilter[4][9][9] = {  // [angle: 0, 45, 90, 135
29                                                // degree][ksize][ksize]
30   { { 2.0047323e-06, 6.6387620e-05, 8.0876675e-04, 3.6246411e-03, 5.9760227e-03,
31       3.6246411e-03, 8.0876675e-04, 6.6387620e-05, 2.0047323e-06 },
32     { 1.8831115e-05, 6.2360091e-04, 7.5970138e-03, 3.4047455e-02, 5.6134764e-02,
33       3.4047455e-02, 7.5970138e-03, 6.2360091e-04, 1.8831115e-05 },
34     { 9.3271126e-05, 3.0887155e-03, 3.7628256e-02, 1.6863814e-01, 2.7803731e-01,
35       1.6863814e-01, 3.7628256e-02, 3.0887155e-03, 9.3271126e-05 },
36     { 2.4359586e-04, 8.0667874e-03, 9.8273583e-02, 4.4043165e-01, 7.2614902e-01,
37       4.4043165e-01, 9.8273583e-02, 8.0667874e-03, 2.4359586e-04 },
38     { 3.3546262e-04, 1.1108996e-02, 1.3533528e-01, 6.0653067e-01, 1.0000000e+00,
39       6.0653067e-01, 1.3533528e-01, 1.1108996e-02, 3.3546262e-04 },
40     { 2.4359586e-04, 8.0667874e-03, 9.8273583e-02, 4.4043165e-01, 7.2614902e-01,
41       4.4043165e-01, 9.8273583e-02, 8.0667874e-03, 2.4359586e-04 },
42     { 9.3271126e-05, 3.0887155e-03, 3.7628256e-02, 1.6863814e-01, 2.7803731e-01,
43       1.6863814e-01, 3.7628256e-02, 3.0887155e-03, 9.3271126e-05 },
44     { 1.8831115e-05, 6.2360091e-04, 7.5970138e-03, 3.4047455e-02, 5.6134764e-02,
45       3.4047455e-02, 7.5970138e-03, 6.2360091e-04, 1.8831115e-05 },
46     { 2.0047323e-06, 6.6387620e-05, 8.0876675e-04, 3.6246411e-03, 5.9760227e-03,
47       3.6246411e-03, 8.0876675e-04, 6.6387620e-05, 2.0047323e-06 } },
48 
49   { { -6.2165498e-08, 3.8760313e-06, 3.0079011e-06, -4.4602581e-04,
50       6.6981313e-04, 1.3962291e-03, -9.9486928e-04, -8.1631159e-05,
51       3.5712848e-05 },
52     { 3.8760313e-06, 5.7044272e-06, -1.6041942e-03, 4.5687673e-03,
53       1.8061366e-02, -2.4406660e-02, -3.7979286e-03, 3.1511115e-03,
54       -8.1631159e-05 },
55     { 3.0079011e-06, -1.6041942e-03, 8.6645801e-03, 6.4960226e-02,
56       -1.6647682e-01, -4.9129307e-02, 7.7304743e-02, -3.7979286e-03,
57       -9.9486928e-04 },
58     { -4.4602581e-04, 4.5687673e-03, 6.4960226e-02, -3.1572008e-01,
59       -1.7670043e-01, 5.2729243e-01, -4.9129307e-02, -2.4406660e-02,
60       1.3962291e-03 },
61     { 6.6981313e-04, 1.8061366e-02, -1.6647682e-01, -1.7670043e-01,
62       1.0000000e+00, -1.7670043e-01, -1.6647682e-01, 1.8061366e-02,
63       6.6981313e-04 },
64     { 1.3962291e-03, -2.4406660e-02, -4.9129307e-02, 5.2729243e-01,
65       -1.7670043e-01, -3.1572008e-01, 6.4960226e-02, 4.5687673e-03,
66       -4.4602581e-04 },
67     { -9.9486928e-04, -3.7979286e-03, 7.7304743e-02, -4.9129307e-02,
68       -1.6647682e-01, 6.4960226e-02, 8.6645801e-03, -1.6041942e-03,
69       3.0079011e-06 },
70     { -8.1631159e-05, 3.1511115e-03, -3.7979286e-03, -2.4406660e-02,
71       1.8061366e-02, 4.5687673e-03, -1.6041942e-03, 5.7044272e-06,
72       3.8760313e-06 },
73     { 3.5712848e-05, -8.1631159e-05, -9.9486928e-04, 1.3962291e-03,
74       6.6981313e-04, -4.4602581e-04, 3.0079011e-06, 3.8760313e-06,
75       -6.2165498e-08 } },
76 
77   { { 2.0047323e-06, 1.8831115e-05, 9.3271126e-05, 2.4359586e-04, 3.3546262e-04,
78       2.4359586e-04, 9.3271126e-05, 1.8831115e-05, 2.0047323e-06 },
79     { 6.6387620e-05, 6.2360091e-04, 3.0887155e-03, 8.0667874e-03, 1.1108996e-02,
80       8.0667874e-03, 3.0887155e-03, 6.2360091e-04, 6.6387620e-05 },
81     { 8.0876675e-04, 7.5970138e-03, 3.7628256e-02, 9.8273583e-02, 1.3533528e-01,
82       9.8273583e-02, 3.7628256e-02, 7.5970138e-03, 8.0876675e-04 },
83     { 3.6246411e-03, 3.4047455e-02, 1.6863814e-01, 4.4043165e-01, 6.0653067e-01,
84       4.4043165e-01, 1.6863814e-01, 3.4047455e-02, 3.6246411e-03 },
85     { 5.9760227e-03, 5.6134764e-02, 2.7803731e-01, 7.2614902e-01, 1.0000000e+00,
86       7.2614902e-01, 2.7803731e-01, 5.6134764e-02, 5.9760227e-03 },
87     { 3.6246411e-03, 3.4047455e-02, 1.6863814e-01, 4.4043165e-01, 6.0653067e-01,
88       4.4043165e-01, 1.6863814e-01, 3.4047455e-02, 3.6246411e-03 },
89     { 8.0876675e-04, 7.5970138e-03, 3.7628256e-02, 9.8273583e-02, 1.3533528e-01,
90       9.8273583e-02, 3.7628256e-02, 7.5970138e-03, 8.0876675e-04 },
91     { 6.6387620e-05, 6.2360091e-04, 3.0887155e-03, 8.0667874e-03, 1.1108996e-02,
92       8.0667874e-03, 3.0887155e-03, 6.2360091e-04, 6.6387620e-05 },
93     { 2.0047323e-06, 1.8831115e-05, 9.3271126e-05, 2.4359586e-04, 3.3546262e-04,
94       2.4359586e-04, 9.3271126e-05, 1.8831115e-05, 2.0047323e-06 } },
95 
96   { { 3.5712848e-05, -8.1631159e-05, -9.9486928e-04, 1.3962291e-03,
97       6.6981313e-04, -4.4602581e-04, 3.0079011e-06, 3.8760313e-06,
98       -6.2165498e-08 },
99     { -8.1631159e-05, 3.1511115e-03, -3.7979286e-03, -2.4406660e-02,
100       1.8061366e-02, 4.5687673e-03, -1.6041942e-03, 5.7044272e-06,
101       3.8760313e-06 },
102     { -9.9486928e-04, -3.7979286e-03, 7.7304743e-02, -4.9129307e-02,
103       -1.6647682e-01, 6.4960226e-02, 8.6645801e-03, -1.6041942e-03,
104       3.0079011e-06 },
105     { 1.3962291e-03, -2.4406660e-02, -4.9129307e-02, 5.2729243e-01,
106       -1.7670043e-01, -3.1572008e-01, 6.4960226e-02, 4.5687673e-03,
107       -4.4602581e-04 },
108     { 6.6981313e-04, 1.8061366e-02, -1.6647682e-01, -1.7670043e-01,
109       1.0000000e+00, -1.7670043e-01, -1.6647682e-01, 1.8061366e-02,
110       6.6981313e-04 },
111     { -4.4602581e-04, 4.5687673e-03, 6.4960226e-02, -3.1572008e-01,
112       -1.7670043e-01, 5.2729243e-01, -4.9129307e-02, -2.4406660e-02,
113       1.3962291e-03 },
114     { 3.0079011e-06, -1.6041942e-03, 8.6645801e-03, 6.4960226e-02,
115       -1.6647682e-01, -4.9129307e-02, 7.7304743e-02, -3.7979286e-03,
116       -9.9486928e-04 },
117     { 3.8760313e-06, 5.7044272e-06, -1.6041942e-03, 4.5687673e-03,
118       1.8061366e-02, -2.4406660e-02, -3.7979286e-03, 3.1511115e-03,
119       -8.1631159e-05 },
120     { -6.2165498e-08, 3.8760313e-06, 3.0079011e-06, -4.4602581e-04,
121       6.6981313e-04, 1.3962291e-03, -9.9486928e-04, -8.1631159e-05,
122       3.5712848e-05 } }
123 };
124 
125 // This function is to extract red/green/blue channels, and calculate intensity
126 // = (r+g+b)/3. Note that it only handles 8bits case now.
127 // TODO(linzhen): add high bitdepth support.
get_color_intensity(const YV12_BUFFER_CONFIG * src,int subsampling_x,int subsampling_y,double * cr,double * cg,double * cb,double * intensity)128 static void get_color_intensity(const YV12_BUFFER_CONFIG *src,
129                                 int subsampling_x, int subsampling_y,
130                                 double *cr, double *cg, double *cb,
131                                 double *intensity) {
132   const uint8_t *y = src->buffers[0];
133   const uint8_t *u = src->buffers[1];
134   const uint8_t *v = src->buffers[2];
135 
136   const int y_height = src->crop_heights[0];
137   const int y_width = src->crop_widths[0];
138   const int y_stride = src->strides[0];
139   const int c_stride = src->strides[1];
140 
141   for (int i = 0; i < y_height; ++i) {
142     for (int j = 0; j < y_width; ++j) {
143       cr[i * y_width + j] =
144           fclamp((double)y[i * y_stride + j] +
145                      1.370 * (double)(v[(i >> subsampling_y) * c_stride +
146                                         (j >> subsampling_x)] -
147                                       128),
148                  0, 255);
149       cg[i * y_width + j] =
150           fclamp((double)y[i * y_stride + j] -
151                      0.698 * (double)(u[(i >> subsampling_y) * c_stride +
152                                         (j >> subsampling_x)] -
153                                       128) -
154                      0.337 * (double)(v[(i >> subsampling_y) * c_stride +
155                                         (j >> subsampling_x)] -
156                                       128),
157                  0, 255);
158       cb[i * y_width + j] =
159           fclamp((double)y[i * y_stride + j] +
160                      1.732 * (double)(u[(i >> subsampling_y) * c_stride +
161                                         (j >> subsampling_x)] -
162                                       128),
163                  0, 255);
164 
165       intensity[i * y_width + j] =
166           (cr[i * y_width + j] + cg[i * y_width + j] + cb[i * y_width + j]) /
167           3.0;
168       assert(intensity[i * y_width + j] >= 0 &&
169              intensity[i * y_width + j] <= 255);
170 
171       intensity[i * y_width + j] /= 256;
172       cr[i * y_width + j] /= 256;
173       cg[i * y_width + j] /= 256;
174       cb[i * y_width + j] /= 256;
175     }
176   }
177 }
178 
convolve_map(const double * filter,const double * map,const int size)179 static inline double convolve_map(const double *filter, const double *map,
180                                   const int size) {
181   double result = 0;
182   for (int i = 0; i < size; ++i) {
183     result += filter[i] * map[i];  // symmetric filter is used
184   }
185   return result;
186 }
187 
188 // This function is to decimate the map by half, and apply Gaussian filter on
189 // top of the downsampled map.
decimate_map(const double * map,int height,int width,int stride,double * downsampled_map)190 static inline void decimate_map(const double *map, int height, int width,
191                                 int stride, double *downsampled_map) {
192   const int new_width = width / 2;
193   const int window_size = 5;
194   const double gaussian_filter[25] = {
195     1. / 256, 1.0 / 64, 3. / 128, 1. / 64,  1. / 256, 1. / 64, 1. / 16,
196     3. / 32,  1. / 16,  1. / 64,  3. / 128, 3. / 32,  9. / 64, 3. / 32,
197     3. / 128, 1. / 64,  1. / 16,  3. / 32,  1. / 16,  1. / 64, 1. / 256,
198     1. / 64,  3. / 128, 1. / 64,  1. / 256
199   };
200 
201   double map_region[25];
202   for (int y = 0; y < height - 1; y += 2) {
203     for (int x = 0; x < width - 1; x += 2) {
204       int i = 0;
205       for (int yy = y - window_size / 2; yy <= y + window_size / 2; ++yy) {
206         for (int xx = x - window_size / 2; xx <= x + window_size / 2; ++xx) {
207           int yvalue = clamp(yy, 0, height - 1);
208           int xvalue = clamp(xx, 0, width - 1);
209           map_region[i++] = map[yvalue * stride + xvalue];
210         }
211       }
212       downsampled_map[(y / 2) * new_width + (x / 2)] =
213           convolve_map(gaussian_filter, map_region, window_size * window_size);
214     }
215   }
216 }
217 
218 // This function is to upscale the map from in_level size to out_level size.
219 // Note that the map at "level-1" will upscale the map at "level" by x2.
upscale_map(const double * input,int in_level,int out_level,int height[9],int width[9],double * output)220 static inline int upscale_map(const double *input, int in_level, int out_level,
221                               int height[9], int width[9], double *output) {
222   for (int level = in_level; level > out_level; level--) {
223     const int cur_width = width[level];
224     const int cur_height = height[level];
225     const int cur_stride = width[level];
226 
227     double *original = (level == in_level) ? (double *)input : output;
228 
229     assert(level > 0);
230 
231     const int h_upscale = height[level - 1];
232     const int w_upscale = width[level - 1];
233     const int s_upscale = width[level - 1];
234 
235     double *upscale = aom_malloc(h_upscale * w_upscale * sizeof(*upscale));
236 
237     if (!upscale) {
238       return 0;
239     }
240 
241     for (int i = 0; i < h_upscale; ++i) {
242       for (int j = 0; j < w_upscale; ++j) {
243         const int ii = clamp((i >> 1), 0, cur_height - 1);
244         const int jj = clamp((j >> 1), 0, cur_width - 1);
245         upscale[j + i * s_upscale] = (double)original[jj + ii * cur_stride];
246       }
247     }
248     memcpy(output, upscale, h_upscale * w_upscale * sizeof(double));
249     aom_free(upscale);
250   }
251 
252   return 1;
253 }
254 
255 // This function calculates the differences between a fine scale c and a
256 // coarser scale s yielding the feature maps. c \in {2, 3, 4}, and s = c +
257 // delta, where delta \in {3, 4}.
center_surround_diff(const double * input[9],int height[9],int width[9],saliency_feature_map * output[6])258 static int center_surround_diff(const double *input[9], int height[9],
259                                 int width[9], saliency_feature_map *output[6]) {
260   int j = 0;
261   for (int k = 2; k < 5; ++k) {
262     int cur_height = height[k];
263     int cur_width = width[k];
264 
265     if (upscale_map(input[k + 3], k + 3, k, height, width, output[j]->buf) ==
266         0) {
267       return 0;
268     }
269 
270     for (int r = 0; r < cur_height; ++r) {
271       for (int c = 0; c < cur_width; ++c) {
272         output[j]->buf[r * cur_width + c] =
273             fabs((double)(input[k][r * cur_width + c] -
274                           output[j]->buf[r * cur_width + c]));
275       }
276     }
277 
278     if (upscale_map(input[k + 4], k + 4, k, height, width,
279                     output[j + 1]->buf) == 0) {
280       return 0;
281     }
282 
283     for (int r = 0; r < cur_height; ++r) {
284       for (int c = 0; c < cur_width; ++c) {
285         output[j + 1]->buf[r * cur_width + c] =
286             fabs(input[k][r * cur_width + c] -
287                  output[j + 1]->buf[r * cur_width + c]);
288       }
289     }
290 
291     j += 2;
292   }
293   return 1;
294 }
295 
296 // For color channels, the differences is calculated based on "color
297 // double-opponency". For example, the RG feature map is constructed between a
298 // fine scale c of R-G component and a coarser scale s of G-R component.
center_surround_diff_rgb(const double * input_1[9],const double * input_2[9],int height[9],int width[9],saliency_feature_map * output[6])299 static int center_surround_diff_rgb(const double *input_1[9],
300                                     const double *input_2[9], int height[9],
301                                     int width[9],
302                                     saliency_feature_map *output[6]) {
303   int j = 0;
304   for (int k = 2; k < 5; ++k) {
305     int cur_height = height[k];
306     int cur_width = width[k];
307 
308     if (upscale_map(input_2[k + 3], k + 3, k, height, width, output[j]->buf) ==
309         0) {
310       return 0;
311     }
312 
313     for (int r = 0; r < cur_height; ++r) {
314       for (int c = 0; c < cur_width; ++c) {
315         output[j]->buf[r * cur_width + c] =
316             fabs((double)(input_1[k][r * cur_width + c] -
317                           output[j]->buf[r * cur_width + c]));
318       }
319     }
320 
321     if (upscale_map(input_2[k + 4], k + 4, k, height, width,
322                     output[j + 1]->buf) == 0) {
323       return 0;
324     }
325 
326     for (int r = 0; r < cur_height; ++r) {
327       for (int c = 0; c < cur_width; ++c) {
328         output[j + 1]->buf[r * cur_width + c] =
329             fabs(input_1[k][r * cur_width + c] -
330                  output[j + 1]->buf[r * cur_width + c]);
331       }
332     }
333 
334     j += 2;
335   }
336   return 1;
337 }
338 
339 // This function is to generate Gaussian pyramid images with indexes from 0 to
340 // 8, and construct the feature maps from calculating the center-surround
341 // differences.
gaussian_pyramid(const double * src,int width[9],int height[9],saliency_feature_map * dst[6])342 static int gaussian_pyramid(const double *src, int width[9], int height[9],
343                             saliency_feature_map *dst[6]) {
344   double *gaussian_map[9];  // scale = 9
345   gaussian_map[0] =
346       (double *)aom_malloc(width[0] * height[0] * sizeof(*gaussian_map[0]));
347   if (!gaussian_map[0]) {
348     return 0;
349   }
350 
351   memcpy(gaussian_map[0], src, width[0] * height[0] * sizeof(double));
352 
353   for (int i = 1; i < 9; ++i) {
354     int stride = width[i - 1];
355     int new_width = width[i];
356     int new_height = height[i];
357 
358     gaussian_map[i] =
359         (double *)aom_malloc(new_width * new_height * sizeof(*gaussian_map[i]));
360 
361     if (!gaussian_map[i]) {
362       for (int l = 0; l < i; ++l) {
363         aom_free(gaussian_map[l]);
364       }
365       return 0;
366     }
367 
368     memset(gaussian_map[i], 0, new_width * new_height * sizeof(double));
369 
370     decimate_map(gaussian_map[i - 1], height[i - 1], width[i - 1], stride,
371                  gaussian_map[i]);
372   }
373 
374   if (center_surround_diff((const double **)gaussian_map, height, width, dst) ==
375       0) {
376     for (int l = 0; l < 9; ++l) {
377       aom_free(gaussian_map[l]);
378     }
379     return 0;
380   }
381 
382   for (int i = 0; i < 9; ++i) {
383     aom_free(gaussian_map[i]);
384   }
385   return 1;
386 }
387 
gaussian_pyramid_rgb(double * src_1,double * src_2,int width[9],int height[9],saliency_feature_map * dst[6])388 static int gaussian_pyramid_rgb(double *src_1, double *src_2, int width[9],
389                                 int height[9], saliency_feature_map *dst[6]) {
390   double *gaussian_map[2][9];  // scale = 9
391   double *src[2];
392 
393   src[0] = src_1;
394   src[1] = src_2;
395 
396   for (int k = 0; k < 2; ++k) {
397     gaussian_map[k][0] = (double *)aom_malloc(width[0] * height[0] *
398                                               sizeof(*gaussian_map[k][0]));
399     if (!gaussian_map[k][0]) {
400       for (int l = 0; l < k; ++l) {
401         aom_free(gaussian_map[l][0]);
402       }
403       return 0;
404     }
405     memcpy(gaussian_map[k][0], src[k], width[0] * height[0] * sizeof(double));
406 
407     for (int i = 1; i < 9; ++i) {
408       int stride = width[i - 1];
409       int new_width = width[i];
410       int new_height = height[i];
411 
412       gaussian_map[k][i] = (double *)aom_malloc(new_width * new_height *
413                                                 sizeof(*gaussian_map[k][i]));
414       if (!gaussian_map[k][i]) {
415         for (int l = 0; l < k; ++l) {
416           aom_free(gaussian_map[l][i]);
417         }
418         return 0;
419       }
420       memset(gaussian_map[k][i], 0, new_width * new_height * sizeof(double));
421       decimate_map(gaussian_map[k][i - 1], height[i - 1], width[i - 1], stride,
422                    gaussian_map[k][i]);
423     }
424   }
425 
426   if (center_surround_diff_rgb((const double **)gaussian_map[0],
427                                (const double **)gaussian_map[1], height, width,
428                                dst) == 0) {
429     for (int l = 0; l < 2; ++l) {
430       for (int i = 0; i < 9; ++i) {
431         aom_free(gaussian_map[l][i]);
432       }
433     }
434     return 0;
435   }
436 
437   for (int l = 0; l < 2; ++l) {
438     for (int i = 0; i < 9; ++i) {
439       aom_free(gaussian_map[l][i]);
440     }
441   }
442   return 1;
443 }
444 
get_feature_map_intensity(double * intensity,int width[9],int height[9],saliency_feature_map * i_map[6])445 static int get_feature_map_intensity(double *intensity, int width[9],
446                                      int height[9],
447                                      saliency_feature_map *i_map[6]) {
448   if (gaussian_pyramid(intensity, width, height, i_map) == 0) {
449     return 0;
450   }
451   return 1;
452 }
453 
get_feature_map_rgb(double * cr,double * cg,double * cb,int width[9],int height[9],saliency_feature_map * rg_map[6],saliency_feature_map * by_map[6])454 static int get_feature_map_rgb(double *cr, double *cg, double *cb, int width[9],
455                                int height[9], saliency_feature_map *rg_map[6],
456                                saliency_feature_map *by_map[6]) {
457   double *rg_mat = aom_malloc(height[0] * width[0] * sizeof(*rg_mat));
458   double *by_mat = aom_malloc(height[0] * width[0] * sizeof(*by_mat));
459   double *gr_mat = aom_malloc(height[0] * width[0] * sizeof(*gr_mat));
460   double *yb_mat = aom_malloc(height[0] * width[0] * sizeof(*yb_mat));
461 
462   if (!rg_mat || !by_mat || !gr_mat || !yb_mat) {
463     aom_free(rg_mat);
464     aom_free(by_mat);
465     aom_free(gr_mat);
466     aom_free(yb_mat);
467     return 0;
468   }
469 
470   double r, g, b, y;
471   for (int i = 0; i < height[0]; ++i) {
472     for (int j = 0; j < width[0]; ++j) {
473       r = AOMMAX(0, cr[i * width[0] + j] -
474                         (cg[i * width[0] + j] + cb[i * width[0] + j]) / 2);
475       g = AOMMAX(0, cg[i * width[0] + j] -
476                         (cr[i * width[0] + j] + cb[i * width[0] + j]) / 2);
477       b = AOMMAX(0, cb[i * width[0] + j] -
478                         (cr[i * width[0] + j] + cg[i * width[0] + j]) / 2);
479       y = AOMMAX(0, (cr[i * width[0] + j] + cg[i * width[0] + j]) / 2 -
480                         fabs(cr[i * width[0] + j] - cg[i * width[0] + j]) / 2 -
481                         cb[i * width[0] + j]);
482 
483       rg_mat[i * width[0] + j] = r - g;
484       by_mat[i * width[0] + j] = b - y;
485       gr_mat[i * width[0] + j] = g - r;
486       yb_mat[i * width[0] + j] = y - b;
487     }
488   }
489 
490   if (gaussian_pyramid_rgb(rg_mat, gr_mat, width, height, rg_map) == 0 ||
491       gaussian_pyramid_rgb(by_mat, yb_mat, width, height, by_map) == 0) {
492     aom_free(rg_mat);
493     aom_free(by_mat);
494     aom_free(gr_mat);
495     aom_free(yb_mat);
496     return 0;
497   }
498 
499   aom_free(rg_mat);
500   aom_free(by_mat);
501   aom_free(gr_mat);
502   aom_free(yb_mat);
503   return 1;
504 }
505 
filter2d(const double * input,const double kernel[9][9],int width,int height,double * output)506 static inline void filter2d(const double *input, const double kernel[9][9],
507                             int width, int height, double *output) {
508   const int window_size = 9;
509   double map_section[81];
510   for (int y = 0; y <= height - 1; ++y) {
511     for (int x = 0; x <= width - 1; ++x) {
512       int i = 0;
513       for (int yy = y - window_size / 2; yy <= y + window_size / 2; ++yy) {
514         for (int xx = x - window_size / 2; xx <= x + window_size / 2; ++xx) {
515           int yvalue = clamp(yy, 0, height - 1);
516           int xvalue = clamp(xx, 0, width - 1);
517           map_section[i++] = input[yvalue * width + xvalue];
518         }
519       }
520 
521       output[y * width + x] = 0;
522       for (int k = 0; k < window_size; ++k) {
523         for (int l = 0; l < window_size; ++l) {
524           output[y * width + x] +=
525               kernel[k][l] * map_section[k * window_size + l];
526         }
527       }
528     }
529   }
530 }
531 
get_feature_map_orientation(const double * intensity,int width[9],int height[9],saliency_feature_map * dst[24])532 static int get_feature_map_orientation(const double *intensity, int width[9],
533                                        int height[9],
534                                        saliency_feature_map *dst[24]) {
535   double *gaussian_map[9];
536 
537   gaussian_map[0] =
538       (double *)aom_malloc(width[0] * height[0] * sizeof(*gaussian_map[0]));
539   if (!gaussian_map[0]) {
540     return 0;
541   }
542   memcpy(gaussian_map[0], intensity, width[0] * height[0] * sizeof(double));
543 
544   for (int i = 1; i < 9; ++i) {
545     int stride = width[i - 1];
546     int new_width = width[i];
547     int new_height = height[i];
548 
549     gaussian_map[i] =
550         (double *)aom_malloc(new_width * new_height * sizeof(*gaussian_map[i]));
551     if (!gaussian_map[i]) {
552       for (int l = 0; l < i; ++l) {
553         aom_free(gaussian_map[l]);
554       }
555       return 0;
556     }
557     memset(gaussian_map[i], 0, new_width * new_height * sizeof(double));
558     decimate_map(gaussian_map[i - 1], height[i - 1], width[i - 1], stride,
559                  gaussian_map[i]);
560   }
561 
562   double *tempGaborOutput[4][9];  //[angle: 0, 45, 90, 135 degree][filter_size]
563 
564   for (int i = 2; i < 9; ++i) {
565     const int cur_height = height[i];
566     const int cur_width = width[i];
567     for (int j = 0; j < 4; ++j) {
568       tempGaborOutput[j][i] = (double *)aom_malloc(
569           cur_height * cur_width * sizeof(*tempGaborOutput[j][i]));
570       if (!tempGaborOutput[j][i]) {
571         for (int l = 0; l < 9; ++l) {
572           aom_free(gaussian_map[l]);
573         }
574         for (int h = 0; h < 4; ++h) {
575           for (int g = 2; g < 9; ++g) {
576             aom_free(tempGaborOutput[h][g]);
577           }
578         }
579         return 0;
580       }
581       filter2d(gaussian_map[i], kGaborFilter[j], cur_width, cur_height,
582                tempGaborOutput[j][i]);
583     }
584   }
585 
586   for (int i = 0; i < 9; ++i) {
587     aom_free(gaussian_map[i]);
588   }
589 
590   saliency_feature_map
591       *tmp[4][6];  //[angle: 0, 45, 90, 135 degree][filter_size]
592 
593   for (int i = 0; i < 6; ++i) {
594     for (int j = 0; j < 4; ++j) {
595       tmp[j][i] = dst[j * 6 + i];
596     }
597   }
598 
599   for (int j = 0; j < 4; ++j) {
600     if (center_surround_diff((const double **)tempGaborOutput[j], height, width,
601                              tmp[j]) == 0) {
602       for (int h = 0; h < 4; ++h) {
603         for (int g = 2; g < 9; ++g) {
604           aom_free(tempGaborOutput[h][g]);
605         }
606       }
607       return 0;
608     }
609   }
610 
611   for (int i = 2; i < 9; ++i) {
612     for (int j = 0; j < 4; ++j) {
613       aom_free(tempGaborOutput[j][i]);
614     }
615   }
616 
617   return 1;
618 }
619 
find_min_max(const saliency_feature_map * input,double * max_value,double * min_value)620 static inline void find_min_max(const saliency_feature_map *input,
621                                 double *max_value, double *min_value) {
622   assert(input && input->buf);
623   *min_value = DBL_MAX;
624   *max_value = 0.0;
625 
626   for (int i = 0; i < input->height; ++i) {
627     for (int j = 0; j < input->width; ++j) {
628       assert(input->buf[i * input->width + j] >= 0.0);
629       *min_value = fmin(input->buf[i * input->width + j], *min_value);
630       *max_value = fmax(input->buf[i * input->width + j], *max_value);
631     }
632   }
633 }
634 
average_local_max(const saliency_feature_map * input,int stepsize)635 static inline double average_local_max(const saliency_feature_map *input,
636                                        int stepsize) {
637   int numlocal = 0;
638   double lmaxmean = 0, lmax = 0, dummy = 0;
639   saliency_feature_map local_map;
640   local_map.height = stepsize;
641   local_map.width = stepsize;
642   local_map.buf =
643       (double *)aom_malloc(stepsize * stepsize * sizeof(*local_map.buf));
644 
645   if (!local_map.buf) {
646     return -1;
647   }
648 
649   for (int y = 0; y < input->height - stepsize; y += stepsize) {
650     for (int x = 0; x < input->width - stepsize; x += stepsize) {
651       for (int i = 0; i < stepsize; ++i) {
652         for (int j = 0; j < stepsize; ++j) {
653           local_map.buf[i * stepsize + j] =
654               input->buf[(y + i) * input->width + x + j];
655         }
656       }
657 
658       find_min_max(&local_map, &lmax, &dummy);
659       lmaxmean += lmax;
660       numlocal++;
661     }
662   }
663 
664   aom_free(local_map.buf);
665 
666   return lmaxmean / numlocal;
667 }
668 
669 // Linear normalization the values in the map to [0,1].
minmax_normalize(saliency_feature_map * input)670 static void minmax_normalize(saliency_feature_map *input) {
671   double max_value, min_value;
672   find_min_max(input, &max_value, &min_value);
673 
674   for (int i = 0; i < input->height; ++i) {
675     for (int j = 0; j < input->width; ++j) {
676       if (max_value != min_value) {
677         input->buf[i * input->width + j] =
678             input->buf[i * input->width + j] / (max_value - min_value) +
679             min_value / (min_value - max_value);
680       } else {
681         input->buf[i * input->width + j] -= min_value;
682       }
683     }
684   }
685 }
686 
687 // This function is to promote meaningful “activation spots” in the map and
688 // ignores homogeneous areas.
nomalization_operator(saliency_feature_map * input,int stepsize)689 static int nomalization_operator(saliency_feature_map *input, int stepsize) {
690   minmax_normalize(input);
691   double lmaxmean = average_local_max(input, stepsize);
692   if (lmaxmean < 0) {
693     return 0;
694   }
695   double normCoeff = (1 - lmaxmean) * (1 - lmaxmean);
696 
697   for (int i = 0; i < input->height; ++i) {
698     for (int j = 0; j < input->width; ++j) {
699       input->buf[i * input->width + j] *= normCoeff;
700     }
701   }
702 
703   return 1;
704 }
705 
706 // Normalize the values in feature maps to [0,1], and then upscale all maps to
707 // the original frame size.
normalize_fm(saliency_feature_map * input[6],int width[9],int height[9],int num_fm,saliency_feature_map * output[6])708 static int normalize_fm(saliency_feature_map *input[6], int width[9],
709                         int height[9], int num_fm,
710                         saliency_feature_map *output[6]) {
711   // Feature maps (FM) are generated by function "center_surround_diff()". The
712   // difference is between a fine scale c and a coarser scale s, where c \in {2,
713   // 3, 4}, and s = c + delta, where delta \in {3, 4}, and the FM size is scale
714   // c. Specifically, i=0: c=2 and s=5, i=1: c=2 and s=6, i=2: c=3 and s=6, i=3:
715   // c=3 and s=7, i=4: c=4 and s=7, i=5: c=4 and s=8.
716   for (int i = 0; i < num_fm; ++i) {
717     if (nomalization_operator(input[i], 8) == 0) {
718       return 0;
719     }
720 
721     // Upscale FM to original frame size
722     if (upscale_map(input[i]->buf, (i / 2) + 2, 0, height, width,
723                     output[i]->buf) == 0) {
724       return 0;
725     }
726   }
727   return 1;
728 }
729 
730 // Combine feature maps with the same category (intensity, color, or
731 // orientation) into one conspicuity map.
normalized_map(saliency_feature_map * input[6],int width[9],int height[9],saliency_feature_map * output)732 static int normalized_map(saliency_feature_map *input[6], int width[9],
733                           int height[9], saliency_feature_map *output) {
734   int num_fm = 6;
735 
736   saliency_feature_map *n_input[6];
737   for (int i = 0; i < 6; ++i) {
738     n_input[i] = (saliency_feature_map *)aom_malloc(sizeof(*n_input[i]));
739     if (!n_input[i]) {
740       return 0;
741     }
742     n_input[i]->buf =
743         (double *)aom_malloc(width[0] * height[0] * sizeof(*n_input[i]->buf));
744     if (!n_input[i]->buf) {
745       aom_free(n_input[i]);
746       return 0;
747     }
748     n_input[i]->height = height[0];
749     n_input[i]->width = width[0];
750   }
751 
752   if (normalize_fm(input, width, height, num_fm, n_input) == 0) {
753     for (int i = 0; i < num_fm; ++i) {
754       aom_free(n_input[i]->buf);
755       aom_free(n_input[i]);
756     }
757     return 0;
758   }
759 
760   // Add up all normalized feature maps with the same category into one map.
761   for (int i = 0; i < num_fm; ++i) {
762     for (int r = 0; r < height[0]; ++r) {
763       for (int c = 0; c < width[0]; ++c) {
764         output->buf[r * width[0] + c] += n_input[i]->buf[r * width[0] + c];
765       }
766     }
767   }
768 
769   for (int i = 0; i < num_fm; ++i) {
770     aom_free(n_input[i]->buf);
771     aom_free(n_input[i]);
772   }
773 
774   nomalization_operator(output, 8);
775   return 1;
776 }
777 
normalized_map_rgb(saliency_feature_map * rg_map[6],saliency_feature_map * by_map[6],int width[9],int height[9],saliency_feature_map * output)778 static int normalized_map_rgb(saliency_feature_map *rg_map[6],
779                               saliency_feature_map *by_map[6], int width[9],
780                               int height[9], saliency_feature_map *output) {
781   saliency_feature_map *color_cm[2];  // 0: color_cm_rg, 1: color_cm_by
782   for (int i = 0; i < 2; ++i) {
783     color_cm[i] = aom_malloc(sizeof(*color_cm[i]));
784     if (!color_cm[i]) {
785       return 0;
786     }
787     color_cm[i]->buf =
788         (double *)aom_malloc(width[0] * height[0] * sizeof(*color_cm[i]->buf));
789     if (!color_cm[i]->buf) {
790       for (int l = 0; l < i; ++l) {
791         aom_free(color_cm[l]->buf);
792       }
793       aom_free(color_cm[i]);
794       return 0;
795     }
796 
797     color_cm[i]->width = width[0];
798     color_cm[i]->height = height[0];
799     memset(color_cm[i]->buf, 0,
800            width[0] * height[0] * sizeof(*color_cm[i]->buf));
801   }
802 
803   if (normalized_map(rg_map, width, height, color_cm[0]) == 0 ||
804       normalized_map(by_map, width, height, color_cm[1]) == 0) {
805     for (int i = 0; i < 2; ++i) {
806       aom_free(color_cm[i]->buf);
807       aom_free(color_cm[i]);
808     }
809     return 0;
810   }
811 
812   for (int r = 0; r < height[0]; ++r) {
813     for (int c = 0; c < width[0]; ++c) {
814       output->buf[r * width[0] + c] = color_cm[0]->buf[r * width[0] + c] +
815                                       color_cm[1]->buf[r * width[0] + c];
816     }
817   }
818 
819   for (int i = 0; i < 2; ++i) {
820     aom_free(color_cm[i]->buf);
821     aom_free(color_cm[i]);
822   }
823 
824   nomalization_operator(output, 8);
825   return 1;
826 }
827 
normalized_map_orientation(saliency_feature_map * orientation_map[24],int width[9],int height[9],saliency_feature_map * output)828 static int normalized_map_orientation(saliency_feature_map *orientation_map[24],
829                                       int width[9], int height[9],
830                                       saliency_feature_map *output) {
831   int num_fms_per_angle = 6;
832 
833   saliency_feature_map *ofm[4][6];
834   for (int i = 0; i < num_fms_per_angle; ++i) {
835     for (int j = 0; j < 4; ++j) {
836       ofm[j][i] = orientation_map[j * num_fms_per_angle + i];
837     }
838   }
839 
840   // extract conspicuity map for each angle
841   saliency_feature_map *nofm = aom_malloc(sizeof(*nofm));
842   if (!nofm) {
843     return 0;
844   }
845   nofm->buf = (double *)aom_malloc(width[0] * height[0] * sizeof(*nofm->buf));
846   if (!nofm->buf) {
847     aom_free(nofm);
848     return 0;
849   }
850   nofm->height = height[0];
851   nofm->width = width[0];
852 
853   for (int i = 0; i < 4; ++i) {
854     memset(nofm->buf, 0, width[0] * height[0] * sizeof(*nofm->buf));
855     if (normalized_map(ofm[i], width, height, nofm) == 0) {
856       aom_free(nofm->buf);
857       aom_free(nofm);
858       return 0;
859     }
860 
861     for (int r = 0; r < height[0]; ++r) {
862       for (int c = 0; c < width[0]; ++c) {
863         output->buf[r * width[0] + c] += nofm->buf[r * width[0] + c];
864       }
865     }
866   }
867 
868   aom_free(nofm->buf);
869   aom_free(nofm);
870 
871   nomalization_operator(output, 8);
872   return 1;
873 }
874 
875 // Set pixel level saliency mask based on Itti-Koch algorithm
av1_set_saliency_map(AV1_COMP * cpi)876 int av1_set_saliency_map(AV1_COMP *cpi) {
877   AV1_COMMON *const cm = &cpi->common;
878 
879   int frm_width = cm->width;
880   int frm_height = cm->height;
881 
882   int pyr_height[9];
883   int pyr_width[9];
884 
885   pyr_height[0] = frm_height;
886   pyr_width[0] = frm_width;
887 
888   for (int i = 1; i < 9; ++i) {
889     pyr_width[i] = pyr_width[i - 1] / 2;
890     pyr_height[i] = pyr_height[i - 1] / 2;
891   }
892 
893   double *cr = aom_malloc(frm_width * frm_height * sizeof(*cr));
894   double *cg = aom_malloc(frm_width * frm_height * sizeof(*cg));
895   double *cb = aom_malloc(frm_width * frm_height * sizeof(*cb));
896   double *intensity = aom_malloc(frm_width * frm_height * sizeof(*intensity));
897 
898   if (!cr || !cg || !cb || !intensity) {
899     aom_free(cr);
900     aom_free(cg);
901     aom_free(cb);
902     aom_free(intensity);
903     return 0;
904   }
905 
906   // Extract red / green / blue channels and intensity component
907   get_color_intensity(cpi->source, cm->seq_params->subsampling_x,
908                       cm->seq_params->subsampling_y, cr, cg, cb, intensity);
909 
910   // Feature Map Extraction
911   // intensity map
912   saliency_feature_map *i_map[6];
913   for (int i = 0; i < 6; ++i) {
914     int cur_height = pyr_height[(i / 2) + 2];
915     int cur_width = pyr_width[(i / 2) + 2];
916 
917     i_map[i] = (saliency_feature_map *)aom_malloc(sizeof(*i_map[i]));
918     if (!i_map[i]) {
919       aom_free(cr);
920       aom_free(cg);
921       aom_free(cb);
922       aom_free(intensity);
923       for (int l = 0; l < i; ++l) {
924         aom_free(i_map[l]);
925       }
926       return 0;
927     }
928     i_map[i]->buf =
929         (double *)aom_malloc(cur_height * cur_width * sizeof(*i_map[i]->buf));
930     if (!i_map[i]->buf) {
931       aom_free(cr);
932       aom_free(cg);
933       aom_free(cb);
934       aom_free(intensity);
935       for (int l = 0; l < i; ++l) {
936         aom_free(i_map[l]->buf);
937         aom_free(i_map[l]);
938       }
939       return 0;
940     }
941     i_map[i]->height = cur_height;
942     i_map[i]->width = cur_width;
943   }
944 
945   if (get_feature_map_intensity(intensity, pyr_width, pyr_height, i_map) == 0) {
946     aom_free(cr);
947     aom_free(cg);
948     aom_free(cb);
949     aom_free(intensity);
950     for (int l = 0; l < 6; ++l) {
951       aom_free(i_map[l]->buf);
952       aom_free(i_map[l]);
953     }
954     return 0;
955   }
956 
957   // RGB map
958   saliency_feature_map *rg_map[6], *by_map[6];
959   for (int i = 0; i < 6; ++i) {
960     int cur_height = pyr_height[(i / 2) + 2];
961     int cur_width = pyr_width[(i / 2) + 2];
962     rg_map[i] = (saliency_feature_map *)aom_malloc(sizeof(*rg_map[i]));
963     by_map[i] = (saliency_feature_map *)aom_malloc(sizeof(*by_map[i]));
964     if (!rg_map[i] || !by_map[i]) {
965       aom_free(cr);
966       aom_free(cg);
967       aom_free(cb);
968       aom_free(intensity);
969       for (int l = 0; l < 6; ++l) {
970         aom_free(i_map[l]->buf);
971         aom_free(i_map[l]);
972         aom_free(rg_map[l]);
973         aom_free(by_map[l]);
974       }
975       return 0;
976     }
977     rg_map[i]->buf =
978         (double *)aom_malloc(cur_height * cur_width * sizeof(*rg_map[i]->buf));
979     by_map[i]->buf =
980         (double *)aom_malloc(cur_height * cur_width * sizeof(*by_map[i]->buf));
981     if (!by_map[i]->buf || !rg_map[i]->buf) {
982       aom_free(cr);
983       aom_free(cg);
984       aom_free(cb);
985       aom_free(intensity);
986       for (int l = 0; l < 6; ++l) {
987         aom_free(i_map[l]->buf);
988         aom_free(i_map[l]);
989       }
990       for (int l = 0; l < i; ++l) {
991         aom_free(rg_map[l]->buf);
992         aom_free(by_map[l]->buf);
993         aom_free(rg_map[l]);
994         aom_free(by_map[l]);
995       }
996       return 0;
997     }
998     rg_map[i]->height = cur_height;
999     rg_map[i]->width = cur_width;
1000     by_map[i]->height = cur_height;
1001     by_map[i]->width = cur_width;
1002   }
1003 
1004   if (get_feature_map_rgb(cr, cg, cb, pyr_width, pyr_height, rg_map, by_map) ==
1005       0) {
1006     aom_free(cr);
1007     aom_free(cg);
1008     aom_free(cb);
1009     aom_free(intensity);
1010     for (int l = 0; l < 6; ++l) {
1011       aom_free(i_map[l]->buf);
1012       aom_free(rg_map[l]->buf);
1013       aom_free(by_map[l]->buf);
1014       aom_free(i_map[l]);
1015       aom_free(rg_map[l]);
1016       aom_free(by_map[l]);
1017     }
1018     return 0;
1019   }
1020 
1021   // Orientation map
1022   saliency_feature_map *orientation_map[24];
1023   for (int i = 0; i < 24; ++i) {
1024     int cur_height = pyr_height[((i % 6) / 2) + 2];
1025     int cur_width = pyr_width[((i % 6) / 2) + 2];
1026 
1027     orientation_map[i] =
1028         (saliency_feature_map *)aom_malloc(sizeof(*orientation_map[i]));
1029     if (!orientation_map[i]) {
1030       aom_free(cr);
1031       aom_free(cg);
1032       aom_free(cb);
1033       aom_free(intensity);
1034       for (int l = 0; l < 6; ++l) {
1035         aom_free(i_map[l]->buf);
1036         aom_free(rg_map[l]->buf);
1037         aom_free(by_map[l]->buf);
1038         aom_free(i_map[l]);
1039         aom_free(rg_map[l]);
1040         aom_free(by_map[l]);
1041       }
1042       for (int h = 0; h < i; ++h) {
1043         aom_free(orientation_map[h]);
1044       }
1045       return 0;
1046     }
1047 
1048     orientation_map[i]->buf = (double *)aom_malloc(
1049         cur_height * cur_width * sizeof(*orientation_map[i]->buf));
1050     if (!orientation_map[i]->buf) {
1051       aom_free(cr);
1052       aom_free(cg);
1053       aom_free(cb);
1054       aom_free(intensity);
1055       for (int l = 0; l < 6; ++l) {
1056         aom_free(i_map[l]->buf);
1057         aom_free(rg_map[l]->buf);
1058         aom_free(by_map[l]->buf);
1059         aom_free(i_map[l]);
1060         aom_free(rg_map[l]);
1061         aom_free(by_map[l]);
1062       }
1063 
1064       for (int h = 0; h < i; ++h) {
1065         aom_free(orientation_map[h]->buf);
1066         aom_free(orientation_map[h]->buf);
1067         aom_free(orientation_map[h]);
1068         aom_free(orientation_map[h]);
1069       }
1070       return 0;
1071     }
1072 
1073     orientation_map[i]->height = cur_height;
1074     orientation_map[i]->width = cur_width;
1075   }
1076 
1077   if (get_feature_map_orientation(intensity, pyr_width, pyr_height,
1078                                   orientation_map) == 0) {
1079     aom_free(cr);
1080     aom_free(cg);
1081     aom_free(cb);
1082     aom_free(intensity);
1083     for (int l = 0; l < 6; ++l) {
1084       aom_free(i_map[l]->buf);
1085       aom_free(rg_map[l]->buf);
1086       aom_free(by_map[l]->buf);
1087       aom_free(i_map[l]);
1088       aom_free(rg_map[l]);
1089       aom_free(by_map[l]);
1090     }
1091     for (int h = 0; h < 24; ++h) {
1092       aom_free(orientation_map[h]->buf);
1093       aom_free(orientation_map[h]);
1094     }
1095     return 0;
1096   }
1097 
1098   aom_free(cr);
1099   aom_free(cg);
1100   aom_free(cb);
1101   aom_free(intensity);
1102 
1103   saliency_feature_map
1104       *normalized_maps[3];  // 0: intensity, 1: color, 2: orientation
1105 
1106   for (int i = 0; i < 3; ++i) {
1107     normalized_maps[i] = aom_malloc(sizeof(*normalized_maps[i]));
1108     if (!normalized_maps[i]) {
1109       for (int l = 0; l < 6; ++l) {
1110         aom_free(i_map[l]->buf);
1111         aom_free(rg_map[l]->buf);
1112         aom_free(by_map[l]->buf);
1113         aom_free(i_map[l]);
1114         aom_free(rg_map[l]);
1115         aom_free(by_map[l]);
1116       }
1117 
1118       for (int h = 0; h < 24; ++h) {
1119         aom_free(orientation_map[h]->buf);
1120         aom_free(orientation_map[h]);
1121       }
1122 
1123       for (int l = 0; l < i; ++l) {
1124         aom_free(normalized_maps[l]);
1125       }
1126       return 0;
1127     }
1128     normalized_maps[i]->buf = (double *)aom_malloc(
1129         frm_width * frm_height * sizeof(*normalized_maps[i]->buf));
1130     if (!normalized_maps[i]->buf) {
1131       for (int l = 0; l < 6; ++l) {
1132         aom_free(i_map[l]->buf);
1133         aom_free(rg_map[l]->buf);
1134         aom_free(by_map[l]->buf);
1135         aom_free(i_map[l]);
1136         aom_free(rg_map[l]);
1137         aom_free(by_map[l]);
1138       }
1139       for (int h = 0; h < 24; ++h) {
1140         aom_free(orientation_map[h]->buf);
1141         aom_free(orientation_map[h]);
1142       }
1143       for (int l = 0; l < i; ++l) {
1144         aom_free(normalized_maps[l]->buf);
1145         aom_free(normalized_maps[l]);
1146       }
1147       return 0;
1148     }
1149     normalized_maps[i]->width = frm_width;
1150     normalized_maps[i]->height = frm_height;
1151     memset(normalized_maps[i]->buf, 0,
1152            frm_width * frm_height * sizeof(*normalized_maps[i]->buf));
1153   }
1154 
1155   // Conspicuity map generation
1156   if (normalized_map(i_map, pyr_width, pyr_height, normalized_maps[0]) == 0 ||
1157       normalized_map_rgb(rg_map, by_map, pyr_width, pyr_height,
1158                          normalized_maps[1]) == 0 ||
1159       normalized_map_orientation(orientation_map, pyr_width, pyr_height,
1160                                  normalized_maps[2]) == 0) {
1161     for (int i = 0; i < 6; ++i) {
1162       aom_free(i_map[i]->buf);
1163       aom_free(rg_map[i]->buf);
1164       aom_free(by_map[i]->buf);
1165       aom_free(i_map[i]);
1166       aom_free(rg_map[i]);
1167       aom_free(by_map[i]);
1168     }
1169 
1170     for (int i = 0; i < 24; ++i) {
1171       aom_free(orientation_map[i]->buf);
1172       aom_free(orientation_map[i]);
1173     }
1174 
1175     for (int i = 0; i < 3; ++i) {
1176       aom_free(normalized_maps[i]->buf);
1177       aom_free(normalized_maps[i]);
1178     }
1179     return 0;
1180   }
1181 
1182   for (int i = 0; i < 6; ++i) {
1183     aom_free(i_map[i]->buf);
1184     aom_free(rg_map[i]->buf);
1185     aom_free(by_map[i]->buf);
1186     aom_free(i_map[i]);
1187     aom_free(rg_map[i]);
1188     aom_free(by_map[i]);
1189   }
1190 
1191   for (int i = 0; i < 24; ++i) {
1192     aom_free(orientation_map[i]->buf);
1193     aom_free(orientation_map[i]);
1194   }
1195 
1196   // Pixel level saliency map
1197   saliency_feature_map *combined_saliency_map =
1198       aom_malloc(sizeof(*combined_saliency_map));
1199   if (!combined_saliency_map) {
1200     for (int i = 0; i < 3; ++i) {
1201       aom_free(normalized_maps[i]->buf);
1202       aom_free(normalized_maps[i]);
1203     }
1204     return 0;
1205   }
1206 
1207   combined_saliency_map->buf = (double *)aom_malloc(
1208       frm_width * frm_height * sizeof(*combined_saliency_map->buf));
1209   if (!combined_saliency_map->buf) {
1210     for (int i = 0; i < 3; ++i) {
1211       aom_free(normalized_maps[i]->buf);
1212       aom_free(normalized_maps[i]);
1213     }
1214 
1215     aom_free(combined_saliency_map);
1216     return 0;
1217   }
1218   combined_saliency_map->height = frm_height;
1219   combined_saliency_map->width = frm_width;
1220 
1221   double w_intensity, w_color, w_orient;
1222 
1223   w_intensity = w_color = w_orient = (double)1 / 3;
1224 
1225   for (int r = 0; r < frm_height; ++r) {
1226     for (int c = 0; c < frm_width; ++c) {
1227       combined_saliency_map->buf[r * frm_width + c] =
1228           (w_intensity * normalized_maps[0]->buf[r * frm_width + c] +
1229            w_color * normalized_maps[1]->buf[r * frm_width + c] +
1230            w_orient * normalized_maps[2]->buf[r * frm_width + c]);
1231     }
1232   }
1233 
1234   for (int r = 0; r < frm_height; ++r) {
1235     for (int c = 0; c < frm_width; ++c) {
1236       int index = r * frm_width + c;
1237       cpi->saliency_map[index] =
1238           (uint8_t)(combined_saliency_map->buf[index] * 255);
1239     }
1240   }
1241 
1242   for (int i = 0; i < 3; ++i) {
1243     aom_free(normalized_maps[i]->buf);
1244     aom_free(normalized_maps[i]);
1245   }
1246 
1247   aom_free(combined_saliency_map->buf);
1248   aom_free(combined_saliency_map);
1249 
1250   return 1;
1251 }
1252 
1253 // Set superblock level saliency mask for rdmult scaling
av1_setup_sm_rdmult_scaling_factor(AV1_COMP * cpi,double motion_ratio)1254 int av1_setup_sm_rdmult_scaling_factor(AV1_COMP *cpi, double motion_ratio) {
1255   AV1_COMMON *cm = &cpi->common;
1256 
1257   saliency_feature_map *sb_saliency_map =
1258       aom_malloc(sizeof(saliency_feature_map));
1259 
1260   if (sb_saliency_map == NULL) {
1261     return 0;
1262   }
1263 
1264   const BLOCK_SIZE bsize = cm->seq_params->sb_size;
1265   const int num_mi_w = mi_size_wide[bsize];
1266   const int num_mi_h = mi_size_high[bsize];
1267   const int block_width = block_size_wide[bsize];
1268   const int block_height = block_size_high[bsize];
1269   const int num_sb_cols = (cm->mi_params.mi_cols + num_mi_w - 1) / num_mi_w;
1270   const int num_sb_rows = (cm->mi_params.mi_rows + num_mi_h - 1) / num_mi_h;
1271 
1272   sb_saliency_map->height = num_sb_rows;
1273   sb_saliency_map->width = num_sb_cols;
1274   sb_saliency_map->buf = (double *)aom_malloc(num_sb_rows * num_sb_cols *
1275                                               sizeof(*sb_saliency_map->buf));
1276 
1277   if (sb_saliency_map->buf == NULL) {
1278     aom_free(sb_saliency_map);
1279     return 0;
1280   }
1281 
1282   for (int row = 0; row < num_sb_rows; ++row) {
1283     for (int col = 0; col < num_sb_cols; ++col) {
1284       const int index = row * num_sb_cols + col;
1285       double total_pixel = 0;
1286       double total_weight = 0;
1287 
1288       for (int i = 0; i < block_height; i++) {
1289         for (int j = 0; j < block_width; j++) {
1290           if ((row * block_height + i) >= cpi->common.height ||
1291               (col * block_width + j) >= cpi->common.width)
1292             continue;
1293           total_pixel++;
1294           total_weight +=
1295               cpi->saliency_map[(row * block_height + i) * cpi->common.width +
1296                                 col * block_width + j];
1297         }
1298       }
1299 
1300       assert(total_pixel > 0);
1301 
1302       // Calculate the superblock level saliency map from pixel level saliency
1303       // map
1304       sb_saliency_map->buf[index] = total_weight / total_pixel;
1305 
1306       // Further lower the superblock saliency score for boundary superblocks.
1307       if (row < 1 || row > num_sb_rows - 2 || col < 1 ||
1308           col > num_sb_cols - 2) {
1309         sb_saliency_map->buf[index] /= 5;
1310       }
1311     }
1312   }
1313 
1314   // superblock level saliency map finalization
1315   minmax_normalize(sb_saliency_map);
1316 
1317   double log_sum = 0.0;
1318   double sum = 0.0;
1319   int block_count = 0;
1320 
1321   // Calculate the average superblock sm_scaling_factor for a frame, to be used
1322   // for clamping later.
1323   for (int row = 0; row < num_sb_rows; ++row) {
1324     for (int col = 0; col < num_sb_cols; ++col) {
1325       const int index = row * num_sb_cols + col;
1326       const double saliency = sb_saliency_map->buf[index];
1327 
1328       cpi->sm_scaling_factor[index] = 1 - saliency;
1329       sum += cpi->sm_scaling_factor[index];
1330       block_count++;
1331     }
1332   }
1333   assert(block_count > 0);
1334   sum /= block_count;
1335 
1336   // Calculate the geometric mean of superblock sm_scaling_factor for a frame,
1337   // to be used for normalization.
1338   for (int row = 0; row < num_sb_rows; ++row) {
1339     for (int col = 0; col < num_sb_cols; ++col) {
1340       const int index = row * num_sb_cols + col;
1341       log_sum += log(fmax(cpi->sm_scaling_factor[index], 0.001));
1342       cpi->sm_scaling_factor[index] =
1343           fmax(cpi->sm_scaling_factor[index], 0.8 * sum);
1344     }
1345   }
1346 
1347   log_sum = exp(log_sum / block_count);
1348 
1349   // Normalize the sm_scaling_factor by geometric mean.
1350   for (int row = 0; row < num_sb_rows; ++row) {
1351     for (int col = 0; col < num_sb_cols; ++col) {
1352       const int index = row * num_sb_cols + col;
1353       assert(log_sum > 0);
1354       cpi->sm_scaling_factor[index] /= log_sum;
1355 
1356       // Modulate the sm_scaling_factor by frame basis motion factor
1357       cpi->sm_scaling_factor[index] =
1358           cpi->sm_scaling_factor[index] * motion_ratio;
1359     }
1360   }
1361 
1362   aom_free(sb_saliency_map->buf);
1363   aom_free(sb_saliency_map);
1364   return 1;
1365 }
1366 
1367 // av1_setup_motion_ratio() is only enabled when CONFIG_REALTIME_ONLY is 0,
1368 // because the computations need to access the first pass stats which are
1369 // only available when CONFIG_REALTIME_ONLY is equal to 0.
1370 #if !CONFIG_REALTIME_ONLY
1371 // Set motion_ratio that reflects the motion quantities between two consecutive
1372 // frames. Motion_ratio will be used to set up saliency_map based rdmult scaling
1373 // factor, i.e., the less the motion quantities are, the more bits will be spent
1374 // on this frame, and vice versa.
av1_setup_motion_ratio(AV1_COMP * cpi)1375 double av1_setup_motion_ratio(AV1_COMP *cpi) {
1376   AV1_COMMON *cm = &cpi->common;
1377   int frames_since_key =
1378       cm->current_frame.display_order_hint - cpi->rc.frames_since_key;
1379   const FIRSTPASS_STATS *cur_stats = av1_firstpass_info_peek(
1380       &cpi->ppi->twopass.firstpass_info, frames_since_key);
1381   assert(cur_stats != NULL);
1382   assert(cpi->ppi->twopass.firstpass_info.total_stats.count > 0);
1383 
1384   const double avg_intra_error =
1385       exp(cpi->ppi->twopass.firstpass_info.total_stats.log_intra_error /
1386           cpi->ppi->twopass.firstpass_info.total_stats.count);
1387   const double avg_inter_error =
1388       exp(cpi->ppi->twopass.firstpass_info.total_stats.log_coded_error /
1389           cpi->ppi->twopass.firstpass_info.total_stats.count);
1390 
1391   double inter_error = cur_stats->coded_error;
1392   double error_stdev = 0;
1393   const double avg_error =
1394       cpi->ppi->twopass.firstpass_info.total_stats.intra_error /
1395       cpi->ppi->twopass.firstpass_info.total_stats.count;
1396   for (int i = 0; i < cpi->ppi->twopass.firstpass_info.total_stats.count; i++) {
1397     const FIRSTPASS_STATS *stats =
1398         &cpi->ppi->twopass.firstpass_info.stats_buf[i];
1399     error_stdev +=
1400         (stats->intra_error - avg_error) * (stats->intra_error - avg_error);
1401   }
1402   error_stdev =
1403       sqrt(error_stdev / cpi->ppi->twopass.firstpass_info.total_stats.count);
1404 
1405   double motion_ratio = 1;
1406   if (error_stdev / fmax(avg_intra_error, 1) > 0.1) {
1407     motion_ratio = inter_error / fmax(1, avg_inter_error);
1408     motion_ratio = AOMMIN(motion_ratio, 1.5);
1409     motion_ratio = AOMMAX(motion_ratio, 0.8);
1410   }
1411 
1412   return motion_ratio;
1413 }
1414 #endif  // !CONFIG_REALTIME_ONLY
1415