1 /*
2 * Copyright (c) 2023, Alliance for Open Media. All rights reserved.
3 *
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 */
11 #include <assert.h>
12 #include <float.h>
13 #include <string.h>
14
15 #include "av1/encoder/encoder.h"
16 #include "av1/encoder/encoder_utils.h"
17 #include "av1/encoder/firstpass.h"
18 #include "av1/encoder/rdopt.h"
19 #include "av1/encoder/saliency_map.h"
20
21 // The Gabor filter is generated by setting the parameters as:
22 // ksize = 9
23 // sigma = 1
24 // theta = y*np.pi/4, where y /in {0, 1, 2, 3}, i.e., 0, 45, 90, 135 degree
25 // lambda1 = 1
26 // gamma=0.8
27 // phi =0
28 static const double kGaborFilter[4][9][9] = { // [angle: 0, 45, 90, 135
29 // degree][ksize][ksize]
30 { { 2.0047323e-06, 6.6387620e-05, 8.0876675e-04, 3.6246411e-03, 5.9760227e-03,
31 3.6246411e-03, 8.0876675e-04, 6.6387620e-05, 2.0047323e-06 },
32 { 1.8831115e-05, 6.2360091e-04, 7.5970138e-03, 3.4047455e-02, 5.6134764e-02,
33 3.4047455e-02, 7.5970138e-03, 6.2360091e-04, 1.8831115e-05 },
34 { 9.3271126e-05, 3.0887155e-03, 3.7628256e-02, 1.6863814e-01, 2.7803731e-01,
35 1.6863814e-01, 3.7628256e-02, 3.0887155e-03, 9.3271126e-05 },
36 { 2.4359586e-04, 8.0667874e-03, 9.8273583e-02, 4.4043165e-01, 7.2614902e-01,
37 4.4043165e-01, 9.8273583e-02, 8.0667874e-03, 2.4359586e-04 },
38 { 3.3546262e-04, 1.1108996e-02, 1.3533528e-01, 6.0653067e-01, 1.0000000e+00,
39 6.0653067e-01, 1.3533528e-01, 1.1108996e-02, 3.3546262e-04 },
40 { 2.4359586e-04, 8.0667874e-03, 9.8273583e-02, 4.4043165e-01, 7.2614902e-01,
41 4.4043165e-01, 9.8273583e-02, 8.0667874e-03, 2.4359586e-04 },
42 { 9.3271126e-05, 3.0887155e-03, 3.7628256e-02, 1.6863814e-01, 2.7803731e-01,
43 1.6863814e-01, 3.7628256e-02, 3.0887155e-03, 9.3271126e-05 },
44 { 1.8831115e-05, 6.2360091e-04, 7.5970138e-03, 3.4047455e-02, 5.6134764e-02,
45 3.4047455e-02, 7.5970138e-03, 6.2360091e-04, 1.8831115e-05 },
46 { 2.0047323e-06, 6.6387620e-05, 8.0876675e-04, 3.6246411e-03, 5.9760227e-03,
47 3.6246411e-03, 8.0876675e-04, 6.6387620e-05, 2.0047323e-06 } },
48
49 { { -6.2165498e-08, 3.8760313e-06, 3.0079011e-06, -4.4602581e-04,
50 6.6981313e-04, 1.3962291e-03, -9.9486928e-04, -8.1631159e-05,
51 3.5712848e-05 },
52 { 3.8760313e-06, 5.7044272e-06, -1.6041942e-03, 4.5687673e-03,
53 1.8061366e-02, -2.4406660e-02, -3.7979286e-03, 3.1511115e-03,
54 -8.1631159e-05 },
55 { 3.0079011e-06, -1.6041942e-03, 8.6645801e-03, 6.4960226e-02,
56 -1.6647682e-01, -4.9129307e-02, 7.7304743e-02, -3.7979286e-03,
57 -9.9486928e-04 },
58 { -4.4602581e-04, 4.5687673e-03, 6.4960226e-02, -3.1572008e-01,
59 -1.7670043e-01, 5.2729243e-01, -4.9129307e-02, -2.4406660e-02,
60 1.3962291e-03 },
61 { 6.6981313e-04, 1.8061366e-02, -1.6647682e-01, -1.7670043e-01,
62 1.0000000e+00, -1.7670043e-01, -1.6647682e-01, 1.8061366e-02,
63 6.6981313e-04 },
64 { 1.3962291e-03, -2.4406660e-02, -4.9129307e-02, 5.2729243e-01,
65 -1.7670043e-01, -3.1572008e-01, 6.4960226e-02, 4.5687673e-03,
66 -4.4602581e-04 },
67 { -9.9486928e-04, -3.7979286e-03, 7.7304743e-02, -4.9129307e-02,
68 -1.6647682e-01, 6.4960226e-02, 8.6645801e-03, -1.6041942e-03,
69 3.0079011e-06 },
70 { -8.1631159e-05, 3.1511115e-03, -3.7979286e-03, -2.4406660e-02,
71 1.8061366e-02, 4.5687673e-03, -1.6041942e-03, 5.7044272e-06,
72 3.8760313e-06 },
73 { 3.5712848e-05, -8.1631159e-05, -9.9486928e-04, 1.3962291e-03,
74 6.6981313e-04, -4.4602581e-04, 3.0079011e-06, 3.8760313e-06,
75 -6.2165498e-08 } },
76
77 { { 2.0047323e-06, 1.8831115e-05, 9.3271126e-05, 2.4359586e-04, 3.3546262e-04,
78 2.4359586e-04, 9.3271126e-05, 1.8831115e-05, 2.0047323e-06 },
79 { 6.6387620e-05, 6.2360091e-04, 3.0887155e-03, 8.0667874e-03, 1.1108996e-02,
80 8.0667874e-03, 3.0887155e-03, 6.2360091e-04, 6.6387620e-05 },
81 { 8.0876675e-04, 7.5970138e-03, 3.7628256e-02, 9.8273583e-02, 1.3533528e-01,
82 9.8273583e-02, 3.7628256e-02, 7.5970138e-03, 8.0876675e-04 },
83 { 3.6246411e-03, 3.4047455e-02, 1.6863814e-01, 4.4043165e-01, 6.0653067e-01,
84 4.4043165e-01, 1.6863814e-01, 3.4047455e-02, 3.6246411e-03 },
85 { 5.9760227e-03, 5.6134764e-02, 2.7803731e-01, 7.2614902e-01, 1.0000000e+00,
86 7.2614902e-01, 2.7803731e-01, 5.6134764e-02, 5.9760227e-03 },
87 { 3.6246411e-03, 3.4047455e-02, 1.6863814e-01, 4.4043165e-01, 6.0653067e-01,
88 4.4043165e-01, 1.6863814e-01, 3.4047455e-02, 3.6246411e-03 },
89 { 8.0876675e-04, 7.5970138e-03, 3.7628256e-02, 9.8273583e-02, 1.3533528e-01,
90 9.8273583e-02, 3.7628256e-02, 7.5970138e-03, 8.0876675e-04 },
91 { 6.6387620e-05, 6.2360091e-04, 3.0887155e-03, 8.0667874e-03, 1.1108996e-02,
92 8.0667874e-03, 3.0887155e-03, 6.2360091e-04, 6.6387620e-05 },
93 { 2.0047323e-06, 1.8831115e-05, 9.3271126e-05, 2.4359586e-04, 3.3546262e-04,
94 2.4359586e-04, 9.3271126e-05, 1.8831115e-05, 2.0047323e-06 } },
95
96 { { 3.5712848e-05, -8.1631159e-05, -9.9486928e-04, 1.3962291e-03,
97 6.6981313e-04, -4.4602581e-04, 3.0079011e-06, 3.8760313e-06,
98 -6.2165498e-08 },
99 { -8.1631159e-05, 3.1511115e-03, -3.7979286e-03, -2.4406660e-02,
100 1.8061366e-02, 4.5687673e-03, -1.6041942e-03, 5.7044272e-06,
101 3.8760313e-06 },
102 { -9.9486928e-04, -3.7979286e-03, 7.7304743e-02, -4.9129307e-02,
103 -1.6647682e-01, 6.4960226e-02, 8.6645801e-03, -1.6041942e-03,
104 3.0079011e-06 },
105 { 1.3962291e-03, -2.4406660e-02, -4.9129307e-02, 5.2729243e-01,
106 -1.7670043e-01, -3.1572008e-01, 6.4960226e-02, 4.5687673e-03,
107 -4.4602581e-04 },
108 { 6.6981313e-04, 1.8061366e-02, -1.6647682e-01, -1.7670043e-01,
109 1.0000000e+00, -1.7670043e-01, -1.6647682e-01, 1.8061366e-02,
110 6.6981313e-04 },
111 { -4.4602581e-04, 4.5687673e-03, 6.4960226e-02, -3.1572008e-01,
112 -1.7670043e-01, 5.2729243e-01, -4.9129307e-02, -2.4406660e-02,
113 1.3962291e-03 },
114 { 3.0079011e-06, -1.6041942e-03, 8.6645801e-03, 6.4960226e-02,
115 -1.6647682e-01, -4.9129307e-02, 7.7304743e-02, -3.7979286e-03,
116 -9.9486928e-04 },
117 { 3.8760313e-06, 5.7044272e-06, -1.6041942e-03, 4.5687673e-03,
118 1.8061366e-02, -2.4406660e-02, -3.7979286e-03, 3.1511115e-03,
119 -8.1631159e-05 },
120 { -6.2165498e-08, 3.8760313e-06, 3.0079011e-06, -4.4602581e-04,
121 6.6981313e-04, 1.3962291e-03, -9.9486928e-04, -8.1631159e-05,
122 3.5712848e-05 } }
123 };
124
125 // This function is to extract red/green/blue channels, and calculate intensity
126 // = (r+g+b)/3. Note that it only handles 8bits case now.
127 // TODO(linzhen): add high bitdepth support.
get_color_intensity(const YV12_BUFFER_CONFIG * src,int subsampling_x,int subsampling_y,double * cr,double * cg,double * cb,double * intensity)128 static void get_color_intensity(const YV12_BUFFER_CONFIG *src,
129 int subsampling_x, int subsampling_y,
130 double *cr, double *cg, double *cb,
131 double *intensity) {
132 const uint8_t *y = src->buffers[0];
133 const uint8_t *u = src->buffers[1];
134 const uint8_t *v = src->buffers[2];
135
136 const int y_height = src->crop_heights[0];
137 const int y_width = src->crop_widths[0];
138 const int y_stride = src->strides[0];
139 const int c_stride = src->strides[1];
140
141 for (int i = 0; i < y_height; ++i) {
142 for (int j = 0; j < y_width; ++j) {
143 cr[i * y_width + j] =
144 fclamp((double)y[i * y_stride + j] +
145 1.370 * (double)(v[(i >> subsampling_y) * c_stride +
146 (j >> subsampling_x)] -
147 128),
148 0, 255);
149 cg[i * y_width + j] =
150 fclamp((double)y[i * y_stride + j] -
151 0.698 * (double)(u[(i >> subsampling_y) * c_stride +
152 (j >> subsampling_x)] -
153 128) -
154 0.337 * (double)(v[(i >> subsampling_y) * c_stride +
155 (j >> subsampling_x)] -
156 128),
157 0, 255);
158 cb[i * y_width + j] =
159 fclamp((double)y[i * y_stride + j] +
160 1.732 * (double)(u[(i >> subsampling_y) * c_stride +
161 (j >> subsampling_x)] -
162 128),
163 0, 255);
164
165 intensity[i * y_width + j] =
166 (cr[i * y_width + j] + cg[i * y_width + j] + cb[i * y_width + j]) /
167 3.0;
168 assert(intensity[i * y_width + j] >= 0 &&
169 intensity[i * y_width + j] <= 255);
170
171 intensity[i * y_width + j] /= 256;
172 cr[i * y_width + j] /= 256;
173 cg[i * y_width + j] /= 256;
174 cb[i * y_width + j] /= 256;
175 }
176 }
177 }
178
convolve_map(const double * filter,const double * map,const int size)179 static inline double convolve_map(const double *filter, const double *map,
180 const int size) {
181 double result = 0;
182 for (int i = 0; i < size; ++i) {
183 result += filter[i] * map[i]; // symmetric filter is used
184 }
185 return result;
186 }
187
188 // This function is to decimate the map by half, and apply Gaussian filter on
189 // top of the downsampled map.
decimate_map(const double * map,int height,int width,int stride,double * downsampled_map)190 static inline void decimate_map(const double *map, int height, int width,
191 int stride, double *downsampled_map) {
192 const int new_width = width / 2;
193 const int window_size = 5;
194 const double gaussian_filter[25] = {
195 1. / 256, 1.0 / 64, 3. / 128, 1. / 64, 1. / 256, 1. / 64, 1. / 16,
196 3. / 32, 1. / 16, 1. / 64, 3. / 128, 3. / 32, 9. / 64, 3. / 32,
197 3. / 128, 1. / 64, 1. / 16, 3. / 32, 1. / 16, 1. / 64, 1. / 256,
198 1. / 64, 3. / 128, 1. / 64, 1. / 256
199 };
200
201 double map_region[25];
202 for (int y = 0; y < height - 1; y += 2) {
203 for (int x = 0; x < width - 1; x += 2) {
204 int i = 0;
205 for (int yy = y - window_size / 2; yy <= y + window_size / 2; ++yy) {
206 for (int xx = x - window_size / 2; xx <= x + window_size / 2; ++xx) {
207 int yvalue = clamp(yy, 0, height - 1);
208 int xvalue = clamp(xx, 0, width - 1);
209 map_region[i++] = map[yvalue * stride + xvalue];
210 }
211 }
212 downsampled_map[(y / 2) * new_width + (x / 2)] =
213 convolve_map(gaussian_filter, map_region, window_size * window_size);
214 }
215 }
216 }
217
218 // This function is to upscale the map from in_level size to out_level size.
219 // Note that the map at "level-1" will upscale the map at "level" by x2.
upscale_map(const double * input,int in_level,int out_level,int height[9],int width[9],double * output)220 static inline int upscale_map(const double *input, int in_level, int out_level,
221 int height[9], int width[9], double *output) {
222 for (int level = in_level; level > out_level; level--) {
223 const int cur_width = width[level];
224 const int cur_height = height[level];
225 const int cur_stride = width[level];
226
227 double *original = (level == in_level) ? (double *)input : output;
228
229 assert(level > 0);
230
231 const int h_upscale = height[level - 1];
232 const int w_upscale = width[level - 1];
233 const int s_upscale = width[level - 1];
234
235 double *upscale = aom_malloc(h_upscale * w_upscale * sizeof(*upscale));
236
237 if (!upscale) {
238 return 0;
239 }
240
241 for (int i = 0; i < h_upscale; ++i) {
242 for (int j = 0; j < w_upscale; ++j) {
243 const int ii = clamp((i >> 1), 0, cur_height - 1);
244 const int jj = clamp((j >> 1), 0, cur_width - 1);
245 upscale[j + i * s_upscale] = (double)original[jj + ii * cur_stride];
246 }
247 }
248 memcpy(output, upscale, h_upscale * w_upscale * sizeof(double));
249 aom_free(upscale);
250 }
251
252 return 1;
253 }
254
255 // This function calculates the differences between a fine scale c and a
256 // coarser scale s yielding the feature maps. c \in {2, 3, 4}, and s = c +
257 // delta, where delta \in {3, 4}.
center_surround_diff(const double * input[9],int height[9],int width[9],saliency_feature_map * output[6])258 static int center_surround_diff(const double *input[9], int height[9],
259 int width[9], saliency_feature_map *output[6]) {
260 int j = 0;
261 for (int k = 2; k < 5; ++k) {
262 int cur_height = height[k];
263 int cur_width = width[k];
264
265 if (upscale_map(input[k + 3], k + 3, k, height, width, output[j]->buf) ==
266 0) {
267 return 0;
268 }
269
270 for (int r = 0; r < cur_height; ++r) {
271 for (int c = 0; c < cur_width; ++c) {
272 output[j]->buf[r * cur_width + c] =
273 fabs((double)(input[k][r * cur_width + c] -
274 output[j]->buf[r * cur_width + c]));
275 }
276 }
277
278 if (upscale_map(input[k + 4], k + 4, k, height, width,
279 output[j + 1]->buf) == 0) {
280 return 0;
281 }
282
283 for (int r = 0; r < cur_height; ++r) {
284 for (int c = 0; c < cur_width; ++c) {
285 output[j + 1]->buf[r * cur_width + c] =
286 fabs(input[k][r * cur_width + c] -
287 output[j + 1]->buf[r * cur_width + c]);
288 }
289 }
290
291 j += 2;
292 }
293 return 1;
294 }
295
296 // For color channels, the differences is calculated based on "color
297 // double-opponency". For example, the RG feature map is constructed between a
298 // fine scale c of R-G component and a coarser scale s of G-R component.
center_surround_diff_rgb(const double * input_1[9],const double * input_2[9],int height[9],int width[9],saliency_feature_map * output[6])299 static int center_surround_diff_rgb(const double *input_1[9],
300 const double *input_2[9], int height[9],
301 int width[9],
302 saliency_feature_map *output[6]) {
303 int j = 0;
304 for (int k = 2; k < 5; ++k) {
305 int cur_height = height[k];
306 int cur_width = width[k];
307
308 if (upscale_map(input_2[k + 3], k + 3, k, height, width, output[j]->buf) ==
309 0) {
310 return 0;
311 }
312
313 for (int r = 0; r < cur_height; ++r) {
314 for (int c = 0; c < cur_width; ++c) {
315 output[j]->buf[r * cur_width + c] =
316 fabs((double)(input_1[k][r * cur_width + c] -
317 output[j]->buf[r * cur_width + c]));
318 }
319 }
320
321 if (upscale_map(input_2[k + 4], k + 4, k, height, width,
322 output[j + 1]->buf) == 0) {
323 return 0;
324 }
325
326 for (int r = 0; r < cur_height; ++r) {
327 for (int c = 0; c < cur_width; ++c) {
328 output[j + 1]->buf[r * cur_width + c] =
329 fabs(input_1[k][r * cur_width + c] -
330 output[j + 1]->buf[r * cur_width + c]);
331 }
332 }
333
334 j += 2;
335 }
336 return 1;
337 }
338
339 // This function is to generate Gaussian pyramid images with indexes from 0 to
340 // 8, and construct the feature maps from calculating the center-surround
341 // differences.
gaussian_pyramid(const double * src,int width[9],int height[9],saliency_feature_map * dst[6])342 static int gaussian_pyramid(const double *src, int width[9], int height[9],
343 saliency_feature_map *dst[6]) {
344 double *gaussian_map[9]; // scale = 9
345 gaussian_map[0] =
346 (double *)aom_malloc(width[0] * height[0] * sizeof(*gaussian_map[0]));
347 if (!gaussian_map[0]) {
348 return 0;
349 }
350
351 memcpy(gaussian_map[0], src, width[0] * height[0] * sizeof(double));
352
353 for (int i = 1; i < 9; ++i) {
354 int stride = width[i - 1];
355 int new_width = width[i];
356 int new_height = height[i];
357
358 gaussian_map[i] =
359 (double *)aom_malloc(new_width * new_height * sizeof(*gaussian_map[i]));
360
361 if (!gaussian_map[i]) {
362 for (int l = 0; l < i; ++l) {
363 aom_free(gaussian_map[l]);
364 }
365 return 0;
366 }
367
368 memset(gaussian_map[i], 0, new_width * new_height * sizeof(double));
369
370 decimate_map(gaussian_map[i - 1], height[i - 1], width[i - 1], stride,
371 gaussian_map[i]);
372 }
373
374 if (center_surround_diff((const double **)gaussian_map, height, width, dst) ==
375 0) {
376 for (int l = 0; l < 9; ++l) {
377 aom_free(gaussian_map[l]);
378 }
379 return 0;
380 }
381
382 for (int i = 0; i < 9; ++i) {
383 aom_free(gaussian_map[i]);
384 }
385 return 1;
386 }
387
gaussian_pyramid_rgb(double * src_1,double * src_2,int width[9],int height[9],saliency_feature_map * dst[6])388 static int gaussian_pyramid_rgb(double *src_1, double *src_2, int width[9],
389 int height[9], saliency_feature_map *dst[6]) {
390 double *gaussian_map[2][9]; // scale = 9
391 double *src[2];
392
393 src[0] = src_1;
394 src[1] = src_2;
395
396 for (int k = 0; k < 2; ++k) {
397 gaussian_map[k][0] = (double *)aom_malloc(width[0] * height[0] *
398 sizeof(*gaussian_map[k][0]));
399 if (!gaussian_map[k][0]) {
400 for (int l = 0; l < k; ++l) {
401 aom_free(gaussian_map[l][0]);
402 }
403 return 0;
404 }
405 memcpy(gaussian_map[k][0], src[k], width[0] * height[0] * sizeof(double));
406
407 for (int i = 1; i < 9; ++i) {
408 int stride = width[i - 1];
409 int new_width = width[i];
410 int new_height = height[i];
411
412 gaussian_map[k][i] = (double *)aom_malloc(new_width * new_height *
413 sizeof(*gaussian_map[k][i]));
414 if (!gaussian_map[k][i]) {
415 for (int l = 0; l < k; ++l) {
416 aom_free(gaussian_map[l][i]);
417 }
418 return 0;
419 }
420 memset(gaussian_map[k][i], 0, new_width * new_height * sizeof(double));
421 decimate_map(gaussian_map[k][i - 1], height[i - 1], width[i - 1], stride,
422 gaussian_map[k][i]);
423 }
424 }
425
426 if (center_surround_diff_rgb((const double **)gaussian_map[0],
427 (const double **)gaussian_map[1], height, width,
428 dst) == 0) {
429 for (int l = 0; l < 2; ++l) {
430 for (int i = 0; i < 9; ++i) {
431 aom_free(gaussian_map[l][i]);
432 }
433 }
434 return 0;
435 }
436
437 for (int l = 0; l < 2; ++l) {
438 for (int i = 0; i < 9; ++i) {
439 aom_free(gaussian_map[l][i]);
440 }
441 }
442 return 1;
443 }
444
get_feature_map_intensity(double * intensity,int width[9],int height[9],saliency_feature_map * i_map[6])445 static int get_feature_map_intensity(double *intensity, int width[9],
446 int height[9],
447 saliency_feature_map *i_map[6]) {
448 if (gaussian_pyramid(intensity, width, height, i_map) == 0) {
449 return 0;
450 }
451 return 1;
452 }
453
get_feature_map_rgb(double * cr,double * cg,double * cb,int width[9],int height[9],saliency_feature_map * rg_map[6],saliency_feature_map * by_map[6])454 static int get_feature_map_rgb(double *cr, double *cg, double *cb, int width[9],
455 int height[9], saliency_feature_map *rg_map[6],
456 saliency_feature_map *by_map[6]) {
457 double *rg_mat = aom_malloc(height[0] * width[0] * sizeof(*rg_mat));
458 double *by_mat = aom_malloc(height[0] * width[0] * sizeof(*by_mat));
459 double *gr_mat = aom_malloc(height[0] * width[0] * sizeof(*gr_mat));
460 double *yb_mat = aom_malloc(height[0] * width[0] * sizeof(*yb_mat));
461
462 if (!rg_mat || !by_mat || !gr_mat || !yb_mat) {
463 aom_free(rg_mat);
464 aom_free(by_mat);
465 aom_free(gr_mat);
466 aom_free(yb_mat);
467 return 0;
468 }
469
470 double r, g, b, y;
471 for (int i = 0; i < height[0]; ++i) {
472 for (int j = 0; j < width[0]; ++j) {
473 r = AOMMAX(0, cr[i * width[0] + j] -
474 (cg[i * width[0] + j] + cb[i * width[0] + j]) / 2);
475 g = AOMMAX(0, cg[i * width[0] + j] -
476 (cr[i * width[0] + j] + cb[i * width[0] + j]) / 2);
477 b = AOMMAX(0, cb[i * width[0] + j] -
478 (cr[i * width[0] + j] + cg[i * width[0] + j]) / 2);
479 y = AOMMAX(0, (cr[i * width[0] + j] + cg[i * width[0] + j]) / 2 -
480 fabs(cr[i * width[0] + j] - cg[i * width[0] + j]) / 2 -
481 cb[i * width[0] + j]);
482
483 rg_mat[i * width[0] + j] = r - g;
484 by_mat[i * width[0] + j] = b - y;
485 gr_mat[i * width[0] + j] = g - r;
486 yb_mat[i * width[0] + j] = y - b;
487 }
488 }
489
490 if (gaussian_pyramid_rgb(rg_mat, gr_mat, width, height, rg_map) == 0 ||
491 gaussian_pyramid_rgb(by_mat, yb_mat, width, height, by_map) == 0) {
492 aom_free(rg_mat);
493 aom_free(by_mat);
494 aom_free(gr_mat);
495 aom_free(yb_mat);
496 return 0;
497 }
498
499 aom_free(rg_mat);
500 aom_free(by_mat);
501 aom_free(gr_mat);
502 aom_free(yb_mat);
503 return 1;
504 }
505
filter2d(const double * input,const double kernel[9][9],int width,int height,double * output)506 static inline void filter2d(const double *input, const double kernel[9][9],
507 int width, int height, double *output) {
508 const int window_size = 9;
509 double map_section[81];
510 for (int y = 0; y <= height - 1; ++y) {
511 for (int x = 0; x <= width - 1; ++x) {
512 int i = 0;
513 for (int yy = y - window_size / 2; yy <= y + window_size / 2; ++yy) {
514 for (int xx = x - window_size / 2; xx <= x + window_size / 2; ++xx) {
515 int yvalue = clamp(yy, 0, height - 1);
516 int xvalue = clamp(xx, 0, width - 1);
517 map_section[i++] = input[yvalue * width + xvalue];
518 }
519 }
520
521 output[y * width + x] = 0;
522 for (int k = 0; k < window_size; ++k) {
523 for (int l = 0; l < window_size; ++l) {
524 output[y * width + x] +=
525 kernel[k][l] * map_section[k * window_size + l];
526 }
527 }
528 }
529 }
530 }
531
get_feature_map_orientation(const double * intensity,int width[9],int height[9],saliency_feature_map * dst[24])532 static int get_feature_map_orientation(const double *intensity, int width[9],
533 int height[9],
534 saliency_feature_map *dst[24]) {
535 double *gaussian_map[9];
536
537 gaussian_map[0] =
538 (double *)aom_malloc(width[0] * height[0] * sizeof(*gaussian_map[0]));
539 if (!gaussian_map[0]) {
540 return 0;
541 }
542 memcpy(gaussian_map[0], intensity, width[0] * height[0] * sizeof(double));
543
544 for (int i = 1; i < 9; ++i) {
545 int stride = width[i - 1];
546 int new_width = width[i];
547 int new_height = height[i];
548
549 gaussian_map[i] =
550 (double *)aom_malloc(new_width * new_height * sizeof(*gaussian_map[i]));
551 if (!gaussian_map[i]) {
552 for (int l = 0; l < i; ++l) {
553 aom_free(gaussian_map[l]);
554 }
555 return 0;
556 }
557 memset(gaussian_map[i], 0, new_width * new_height * sizeof(double));
558 decimate_map(gaussian_map[i - 1], height[i - 1], width[i - 1], stride,
559 gaussian_map[i]);
560 }
561
562 double *tempGaborOutput[4][9]; //[angle: 0, 45, 90, 135 degree][filter_size]
563
564 for (int i = 2; i < 9; ++i) {
565 const int cur_height = height[i];
566 const int cur_width = width[i];
567 for (int j = 0; j < 4; ++j) {
568 tempGaborOutput[j][i] = (double *)aom_malloc(
569 cur_height * cur_width * sizeof(*tempGaborOutput[j][i]));
570 if (!tempGaborOutput[j][i]) {
571 for (int l = 0; l < 9; ++l) {
572 aom_free(gaussian_map[l]);
573 }
574 for (int h = 0; h < 4; ++h) {
575 for (int g = 2; g < 9; ++g) {
576 aom_free(tempGaborOutput[h][g]);
577 }
578 }
579 return 0;
580 }
581 filter2d(gaussian_map[i], kGaborFilter[j], cur_width, cur_height,
582 tempGaborOutput[j][i]);
583 }
584 }
585
586 for (int i = 0; i < 9; ++i) {
587 aom_free(gaussian_map[i]);
588 }
589
590 saliency_feature_map
591 *tmp[4][6]; //[angle: 0, 45, 90, 135 degree][filter_size]
592
593 for (int i = 0; i < 6; ++i) {
594 for (int j = 0; j < 4; ++j) {
595 tmp[j][i] = dst[j * 6 + i];
596 }
597 }
598
599 for (int j = 0; j < 4; ++j) {
600 if (center_surround_diff((const double **)tempGaborOutput[j], height, width,
601 tmp[j]) == 0) {
602 for (int h = 0; h < 4; ++h) {
603 for (int g = 2; g < 9; ++g) {
604 aom_free(tempGaborOutput[h][g]);
605 }
606 }
607 return 0;
608 }
609 }
610
611 for (int i = 2; i < 9; ++i) {
612 for (int j = 0; j < 4; ++j) {
613 aom_free(tempGaborOutput[j][i]);
614 }
615 }
616
617 return 1;
618 }
619
find_min_max(const saliency_feature_map * input,double * max_value,double * min_value)620 static inline void find_min_max(const saliency_feature_map *input,
621 double *max_value, double *min_value) {
622 assert(input && input->buf);
623 *min_value = DBL_MAX;
624 *max_value = 0.0;
625
626 for (int i = 0; i < input->height; ++i) {
627 for (int j = 0; j < input->width; ++j) {
628 assert(input->buf[i * input->width + j] >= 0.0);
629 *min_value = fmin(input->buf[i * input->width + j], *min_value);
630 *max_value = fmax(input->buf[i * input->width + j], *max_value);
631 }
632 }
633 }
634
average_local_max(const saliency_feature_map * input,int stepsize)635 static inline double average_local_max(const saliency_feature_map *input,
636 int stepsize) {
637 int numlocal = 0;
638 double lmaxmean = 0, lmax = 0, dummy = 0;
639 saliency_feature_map local_map;
640 local_map.height = stepsize;
641 local_map.width = stepsize;
642 local_map.buf =
643 (double *)aom_malloc(stepsize * stepsize * sizeof(*local_map.buf));
644
645 if (!local_map.buf) {
646 return -1;
647 }
648
649 for (int y = 0; y < input->height - stepsize; y += stepsize) {
650 for (int x = 0; x < input->width - stepsize; x += stepsize) {
651 for (int i = 0; i < stepsize; ++i) {
652 for (int j = 0; j < stepsize; ++j) {
653 local_map.buf[i * stepsize + j] =
654 input->buf[(y + i) * input->width + x + j];
655 }
656 }
657
658 find_min_max(&local_map, &lmax, &dummy);
659 lmaxmean += lmax;
660 numlocal++;
661 }
662 }
663
664 aom_free(local_map.buf);
665
666 return lmaxmean / numlocal;
667 }
668
669 // Linear normalization the values in the map to [0,1].
minmax_normalize(saliency_feature_map * input)670 static void minmax_normalize(saliency_feature_map *input) {
671 double max_value, min_value;
672 find_min_max(input, &max_value, &min_value);
673
674 for (int i = 0; i < input->height; ++i) {
675 for (int j = 0; j < input->width; ++j) {
676 if (max_value != min_value) {
677 input->buf[i * input->width + j] =
678 input->buf[i * input->width + j] / (max_value - min_value) +
679 min_value / (min_value - max_value);
680 } else {
681 input->buf[i * input->width + j] -= min_value;
682 }
683 }
684 }
685 }
686
687 // This function is to promote meaningful “activation spots” in the map and
688 // ignores homogeneous areas.
nomalization_operator(saliency_feature_map * input,int stepsize)689 static int nomalization_operator(saliency_feature_map *input, int stepsize) {
690 minmax_normalize(input);
691 double lmaxmean = average_local_max(input, stepsize);
692 if (lmaxmean < 0) {
693 return 0;
694 }
695 double normCoeff = (1 - lmaxmean) * (1 - lmaxmean);
696
697 for (int i = 0; i < input->height; ++i) {
698 for (int j = 0; j < input->width; ++j) {
699 input->buf[i * input->width + j] *= normCoeff;
700 }
701 }
702
703 return 1;
704 }
705
706 // Normalize the values in feature maps to [0,1], and then upscale all maps to
707 // the original frame size.
normalize_fm(saliency_feature_map * input[6],int width[9],int height[9],int num_fm,saliency_feature_map * output[6])708 static int normalize_fm(saliency_feature_map *input[6], int width[9],
709 int height[9], int num_fm,
710 saliency_feature_map *output[6]) {
711 // Feature maps (FM) are generated by function "center_surround_diff()". The
712 // difference is between a fine scale c and a coarser scale s, where c \in {2,
713 // 3, 4}, and s = c + delta, where delta \in {3, 4}, and the FM size is scale
714 // c. Specifically, i=0: c=2 and s=5, i=1: c=2 and s=6, i=2: c=3 and s=6, i=3:
715 // c=3 and s=7, i=4: c=4 and s=7, i=5: c=4 and s=8.
716 for (int i = 0; i < num_fm; ++i) {
717 if (nomalization_operator(input[i], 8) == 0) {
718 return 0;
719 }
720
721 // Upscale FM to original frame size
722 if (upscale_map(input[i]->buf, (i / 2) + 2, 0, height, width,
723 output[i]->buf) == 0) {
724 return 0;
725 }
726 }
727 return 1;
728 }
729
730 // Combine feature maps with the same category (intensity, color, or
731 // orientation) into one conspicuity map.
normalized_map(saliency_feature_map * input[6],int width[9],int height[9],saliency_feature_map * output)732 static int normalized_map(saliency_feature_map *input[6], int width[9],
733 int height[9], saliency_feature_map *output) {
734 int num_fm = 6;
735
736 saliency_feature_map *n_input[6];
737 for (int i = 0; i < 6; ++i) {
738 n_input[i] = (saliency_feature_map *)aom_malloc(sizeof(*n_input[i]));
739 if (!n_input[i]) {
740 return 0;
741 }
742 n_input[i]->buf =
743 (double *)aom_malloc(width[0] * height[0] * sizeof(*n_input[i]->buf));
744 if (!n_input[i]->buf) {
745 aom_free(n_input[i]);
746 return 0;
747 }
748 n_input[i]->height = height[0];
749 n_input[i]->width = width[0];
750 }
751
752 if (normalize_fm(input, width, height, num_fm, n_input) == 0) {
753 for (int i = 0; i < num_fm; ++i) {
754 aom_free(n_input[i]->buf);
755 aom_free(n_input[i]);
756 }
757 return 0;
758 }
759
760 // Add up all normalized feature maps with the same category into one map.
761 for (int i = 0; i < num_fm; ++i) {
762 for (int r = 0; r < height[0]; ++r) {
763 for (int c = 0; c < width[0]; ++c) {
764 output->buf[r * width[0] + c] += n_input[i]->buf[r * width[0] + c];
765 }
766 }
767 }
768
769 for (int i = 0; i < num_fm; ++i) {
770 aom_free(n_input[i]->buf);
771 aom_free(n_input[i]);
772 }
773
774 nomalization_operator(output, 8);
775 return 1;
776 }
777
normalized_map_rgb(saliency_feature_map * rg_map[6],saliency_feature_map * by_map[6],int width[9],int height[9],saliency_feature_map * output)778 static int normalized_map_rgb(saliency_feature_map *rg_map[6],
779 saliency_feature_map *by_map[6], int width[9],
780 int height[9], saliency_feature_map *output) {
781 saliency_feature_map *color_cm[2]; // 0: color_cm_rg, 1: color_cm_by
782 for (int i = 0; i < 2; ++i) {
783 color_cm[i] = aom_malloc(sizeof(*color_cm[i]));
784 if (!color_cm[i]) {
785 return 0;
786 }
787 color_cm[i]->buf =
788 (double *)aom_malloc(width[0] * height[0] * sizeof(*color_cm[i]->buf));
789 if (!color_cm[i]->buf) {
790 for (int l = 0; l < i; ++l) {
791 aom_free(color_cm[l]->buf);
792 }
793 aom_free(color_cm[i]);
794 return 0;
795 }
796
797 color_cm[i]->width = width[0];
798 color_cm[i]->height = height[0];
799 memset(color_cm[i]->buf, 0,
800 width[0] * height[0] * sizeof(*color_cm[i]->buf));
801 }
802
803 if (normalized_map(rg_map, width, height, color_cm[0]) == 0 ||
804 normalized_map(by_map, width, height, color_cm[1]) == 0) {
805 for (int i = 0; i < 2; ++i) {
806 aom_free(color_cm[i]->buf);
807 aom_free(color_cm[i]);
808 }
809 return 0;
810 }
811
812 for (int r = 0; r < height[0]; ++r) {
813 for (int c = 0; c < width[0]; ++c) {
814 output->buf[r * width[0] + c] = color_cm[0]->buf[r * width[0] + c] +
815 color_cm[1]->buf[r * width[0] + c];
816 }
817 }
818
819 for (int i = 0; i < 2; ++i) {
820 aom_free(color_cm[i]->buf);
821 aom_free(color_cm[i]);
822 }
823
824 nomalization_operator(output, 8);
825 return 1;
826 }
827
normalized_map_orientation(saliency_feature_map * orientation_map[24],int width[9],int height[9],saliency_feature_map * output)828 static int normalized_map_orientation(saliency_feature_map *orientation_map[24],
829 int width[9], int height[9],
830 saliency_feature_map *output) {
831 int num_fms_per_angle = 6;
832
833 saliency_feature_map *ofm[4][6];
834 for (int i = 0; i < num_fms_per_angle; ++i) {
835 for (int j = 0; j < 4; ++j) {
836 ofm[j][i] = orientation_map[j * num_fms_per_angle + i];
837 }
838 }
839
840 // extract conspicuity map for each angle
841 saliency_feature_map *nofm = aom_malloc(sizeof(*nofm));
842 if (!nofm) {
843 return 0;
844 }
845 nofm->buf = (double *)aom_malloc(width[0] * height[0] * sizeof(*nofm->buf));
846 if (!nofm->buf) {
847 aom_free(nofm);
848 return 0;
849 }
850 nofm->height = height[0];
851 nofm->width = width[0];
852
853 for (int i = 0; i < 4; ++i) {
854 memset(nofm->buf, 0, width[0] * height[0] * sizeof(*nofm->buf));
855 if (normalized_map(ofm[i], width, height, nofm) == 0) {
856 aom_free(nofm->buf);
857 aom_free(nofm);
858 return 0;
859 }
860
861 for (int r = 0; r < height[0]; ++r) {
862 for (int c = 0; c < width[0]; ++c) {
863 output->buf[r * width[0] + c] += nofm->buf[r * width[0] + c];
864 }
865 }
866 }
867
868 aom_free(nofm->buf);
869 aom_free(nofm);
870
871 nomalization_operator(output, 8);
872 return 1;
873 }
874
875 // Set pixel level saliency mask based on Itti-Koch algorithm
av1_set_saliency_map(AV1_COMP * cpi)876 int av1_set_saliency_map(AV1_COMP *cpi) {
877 AV1_COMMON *const cm = &cpi->common;
878
879 int frm_width = cm->width;
880 int frm_height = cm->height;
881
882 int pyr_height[9];
883 int pyr_width[9];
884
885 pyr_height[0] = frm_height;
886 pyr_width[0] = frm_width;
887
888 for (int i = 1; i < 9; ++i) {
889 pyr_width[i] = pyr_width[i - 1] / 2;
890 pyr_height[i] = pyr_height[i - 1] / 2;
891 }
892
893 double *cr = aom_malloc(frm_width * frm_height * sizeof(*cr));
894 double *cg = aom_malloc(frm_width * frm_height * sizeof(*cg));
895 double *cb = aom_malloc(frm_width * frm_height * sizeof(*cb));
896 double *intensity = aom_malloc(frm_width * frm_height * sizeof(*intensity));
897
898 if (!cr || !cg || !cb || !intensity) {
899 aom_free(cr);
900 aom_free(cg);
901 aom_free(cb);
902 aom_free(intensity);
903 return 0;
904 }
905
906 // Extract red / green / blue channels and intensity component
907 get_color_intensity(cpi->source, cm->seq_params->subsampling_x,
908 cm->seq_params->subsampling_y, cr, cg, cb, intensity);
909
910 // Feature Map Extraction
911 // intensity map
912 saliency_feature_map *i_map[6];
913 for (int i = 0; i < 6; ++i) {
914 int cur_height = pyr_height[(i / 2) + 2];
915 int cur_width = pyr_width[(i / 2) + 2];
916
917 i_map[i] = (saliency_feature_map *)aom_malloc(sizeof(*i_map[i]));
918 if (!i_map[i]) {
919 aom_free(cr);
920 aom_free(cg);
921 aom_free(cb);
922 aom_free(intensity);
923 for (int l = 0; l < i; ++l) {
924 aom_free(i_map[l]);
925 }
926 return 0;
927 }
928 i_map[i]->buf =
929 (double *)aom_malloc(cur_height * cur_width * sizeof(*i_map[i]->buf));
930 if (!i_map[i]->buf) {
931 aom_free(cr);
932 aom_free(cg);
933 aom_free(cb);
934 aom_free(intensity);
935 for (int l = 0; l < i; ++l) {
936 aom_free(i_map[l]->buf);
937 aom_free(i_map[l]);
938 }
939 return 0;
940 }
941 i_map[i]->height = cur_height;
942 i_map[i]->width = cur_width;
943 }
944
945 if (get_feature_map_intensity(intensity, pyr_width, pyr_height, i_map) == 0) {
946 aom_free(cr);
947 aom_free(cg);
948 aom_free(cb);
949 aom_free(intensity);
950 for (int l = 0; l < 6; ++l) {
951 aom_free(i_map[l]->buf);
952 aom_free(i_map[l]);
953 }
954 return 0;
955 }
956
957 // RGB map
958 saliency_feature_map *rg_map[6], *by_map[6];
959 for (int i = 0; i < 6; ++i) {
960 int cur_height = pyr_height[(i / 2) + 2];
961 int cur_width = pyr_width[(i / 2) + 2];
962 rg_map[i] = (saliency_feature_map *)aom_malloc(sizeof(*rg_map[i]));
963 by_map[i] = (saliency_feature_map *)aom_malloc(sizeof(*by_map[i]));
964 if (!rg_map[i] || !by_map[i]) {
965 aom_free(cr);
966 aom_free(cg);
967 aom_free(cb);
968 aom_free(intensity);
969 for (int l = 0; l < 6; ++l) {
970 aom_free(i_map[l]->buf);
971 aom_free(i_map[l]);
972 aom_free(rg_map[l]);
973 aom_free(by_map[l]);
974 }
975 return 0;
976 }
977 rg_map[i]->buf =
978 (double *)aom_malloc(cur_height * cur_width * sizeof(*rg_map[i]->buf));
979 by_map[i]->buf =
980 (double *)aom_malloc(cur_height * cur_width * sizeof(*by_map[i]->buf));
981 if (!by_map[i]->buf || !rg_map[i]->buf) {
982 aom_free(cr);
983 aom_free(cg);
984 aom_free(cb);
985 aom_free(intensity);
986 for (int l = 0; l < 6; ++l) {
987 aom_free(i_map[l]->buf);
988 aom_free(i_map[l]);
989 }
990 for (int l = 0; l < i; ++l) {
991 aom_free(rg_map[l]->buf);
992 aom_free(by_map[l]->buf);
993 aom_free(rg_map[l]);
994 aom_free(by_map[l]);
995 }
996 return 0;
997 }
998 rg_map[i]->height = cur_height;
999 rg_map[i]->width = cur_width;
1000 by_map[i]->height = cur_height;
1001 by_map[i]->width = cur_width;
1002 }
1003
1004 if (get_feature_map_rgb(cr, cg, cb, pyr_width, pyr_height, rg_map, by_map) ==
1005 0) {
1006 aom_free(cr);
1007 aom_free(cg);
1008 aom_free(cb);
1009 aom_free(intensity);
1010 for (int l = 0; l < 6; ++l) {
1011 aom_free(i_map[l]->buf);
1012 aom_free(rg_map[l]->buf);
1013 aom_free(by_map[l]->buf);
1014 aom_free(i_map[l]);
1015 aom_free(rg_map[l]);
1016 aom_free(by_map[l]);
1017 }
1018 return 0;
1019 }
1020
1021 // Orientation map
1022 saliency_feature_map *orientation_map[24];
1023 for (int i = 0; i < 24; ++i) {
1024 int cur_height = pyr_height[((i % 6) / 2) + 2];
1025 int cur_width = pyr_width[((i % 6) / 2) + 2];
1026
1027 orientation_map[i] =
1028 (saliency_feature_map *)aom_malloc(sizeof(*orientation_map[i]));
1029 if (!orientation_map[i]) {
1030 aom_free(cr);
1031 aom_free(cg);
1032 aom_free(cb);
1033 aom_free(intensity);
1034 for (int l = 0; l < 6; ++l) {
1035 aom_free(i_map[l]->buf);
1036 aom_free(rg_map[l]->buf);
1037 aom_free(by_map[l]->buf);
1038 aom_free(i_map[l]);
1039 aom_free(rg_map[l]);
1040 aom_free(by_map[l]);
1041 }
1042 for (int h = 0; h < i; ++h) {
1043 aom_free(orientation_map[h]);
1044 }
1045 return 0;
1046 }
1047
1048 orientation_map[i]->buf = (double *)aom_malloc(
1049 cur_height * cur_width * sizeof(*orientation_map[i]->buf));
1050 if (!orientation_map[i]->buf) {
1051 aom_free(cr);
1052 aom_free(cg);
1053 aom_free(cb);
1054 aom_free(intensity);
1055 for (int l = 0; l < 6; ++l) {
1056 aom_free(i_map[l]->buf);
1057 aom_free(rg_map[l]->buf);
1058 aom_free(by_map[l]->buf);
1059 aom_free(i_map[l]);
1060 aom_free(rg_map[l]);
1061 aom_free(by_map[l]);
1062 }
1063
1064 for (int h = 0; h < i; ++h) {
1065 aom_free(orientation_map[h]->buf);
1066 aom_free(orientation_map[h]->buf);
1067 aom_free(orientation_map[h]);
1068 aom_free(orientation_map[h]);
1069 }
1070 return 0;
1071 }
1072
1073 orientation_map[i]->height = cur_height;
1074 orientation_map[i]->width = cur_width;
1075 }
1076
1077 if (get_feature_map_orientation(intensity, pyr_width, pyr_height,
1078 orientation_map) == 0) {
1079 aom_free(cr);
1080 aom_free(cg);
1081 aom_free(cb);
1082 aom_free(intensity);
1083 for (int l = 0; l < 6; ++l) {
1084 aom_free(i_map[l]->buf);
1085 aom_free(rg_map[l]->buf);
1086 aom_free(by_map[l]->buf);
1087 aom_free(i_map[l]);
1088 aom_free(rg_map[l]);
1089 aom_free(by_map[l]);
1090 }
1091 for (int h = 0; h < 24; ++h) {
1092 aom_free(orientation_map[h]->buf);
1093 aom_free(orientation_map[h]);
1094 }
1095 return 0;
1096 }
1097
1098 aom_free(cr);
1099 aom_free(cg);
1100 aom_free(cb);
1101 aom_free(intensity);
1102
1103 saliency_feature_map
1104 *normalized_maps[3]; // 0: intensity, 1: color, 2: orientation
1105
1106 for (int i = 0; i < 3; ++i) {
1107 normalized_maps[i] = aom_malloc(sizeof(*normalized_maps[i]));
1108 if (!normalized_maps[i]) {
1109 for (int l = 0; l < 6; ++l) {
1110 aom_free(i_map[l]->buf);
1111 aom_free(rg_map[l]->buf);
1112 aom_free(by_map[l]->buf);
1113 aom_free(i_map[l]);
1114 aom_free(rg_map[l]);
1115 aom_free(by_map[l]);
1116 }
1117
1118 for (int h = 0; h < 24; ++h) {
1119 aom_free(orientation_map[h]->buf);
1120 aom_free(orientation_map[h]);
1121 }
1122
1123 for (int l = 0; l < i; ++l) {
1124 aom_free(normalized_maps[l]);
1125 }
1126 return 0;
1127 }
1128 normalized_maps[i]->buf = (double *)aom_malloc(
1129 frm_width * frm_height * sizeof(*normalized_maps[i]->buf));
1130 if (!normalized_maps[i]->buf) {
1131 for (int l = 0; l < 6; ++l) {
1132 aom_free(i_map[l]->buf);
1133 aom_free(rg_map[l]->buf);
1134 aom_free(by_map[l]->buf);
1135 aom_free(i_map[l]);
1136 aom_free(rg_map[l]);
1137 aom_free(by_map[l]);
1138 }
1139 for (int h = 0; h < 24; ++h) {
1140 aom_free(orientation_map[h]->buf);
1141 aom_free(orientation_map[h]);
1142 }
1143 for (int l = 0; l < i; ++l) {
1144 aom_free(normalized_maps[l]->buf);
1145 aom_free(normalized_maps[l]);
1146 }
1147 return 0;
1148 }
1149 normalized_maps[i]->width = frm_width;
1150 normalized_maps[i]->height = frm_height;
1151 memset(normalized_maps[i]->buf, 0,
1152 frm_width * frm_height * sizeof(*normalized_maps[i]->buf));
1153 }
1154
1155 // Conspicuity map generation
1156 if (normalized_map(i_map, pyr_width, pyr_height, normalized_maps[0]) == 0 ||
1157 normalized_map_rgb(rg_map, by_map, pyr_width, pyr_height,
1158 normalized_maps[1]) == 0 ||
1159 normalized_map_orientation(orientation_map, pyr_width, pyr_height,
1160 normalized_maps[2]) == 0) {
1161 for (int i = 0; i < 6; ++i) {
1162 aom_free(i_map[i]->buf);
1163 aom_free(rg_map[i]->buf);
1164 aom_free(by_map[i]->buf);
1165 aom_free(i_map[i]);
1166 aom_free(rg_map[i]);
1167 aom_free(by_map[i]);
1168 }
1169
1170 for (int i = 0; i < 24; ++i) {
1171 aom_free(orientation_map[i]->buf);
1172 aom_free(orientation_map[i]);
1173 }
1174
1175 for (int i = 0; i < 3; ++i) {
1176 aom_free(normalized_maps[i]->buf);
1177 aom_free(normalized_maps[i]);
1178 }
1179 return 0;
1180 }
1181
1182 for (int i = 0; i < 6; ++i) {
1183 aom_free(i_map[i]->buf);
1184 aom_free(rg_map[i]->buf);
1185 aom_free(by_map[i]->buf);
1186 aom_free(i_map[i]);
1187 aom_free(rg_map[i]);
1188 aom_free(by_map[i]);
1189 }
1190
1191 for (int i = 0; i < 24; ++i) {
1192 aom_free(orientation_map[i]->buf);
1193 aom_free(orientation_map[i]);
1194 }
1195
1196 // Pixel level saliency map
1197 saliency_feature_map *combined_saliency_map =
1198 aom_malloc(sizeof(*combined_saliency_map));
1199 if (!combined_saliency_map) {
1200 for (int i = 0; i < 3; ++i) {
1201 aom_free(normalized_maps[i]->buf);
1202 aom_free(normalized_maps[i]);
1203 }
1204 return 0;
1205 }
1206
1207 combined_saliency_map->buf = (double *)aom_malloc(
1208 frm_width * frm_height * sizeof(*combined_saliency_map->buf));
1209 if (!combined_saliency_map->buf) {
1210 for (int i = 0; i < 3; ++i) {
1211 aom_free(normalized_maps[i]->buf);
1212 aom_free(normalized_maps[i]);
1213 }
1214
1215 aom_free(combined_saliency_map);
1216 return 0;
1217 }
1218 combined_saliency_map->height = frm_height;
1219 combined_saliency_map->width = frm_width;
1220
1221 double w_intensity, w_color, w_orient;
1222
1223 w_intensity = w_color = w_orient = (double)1 / 3;
1224
1225 for (int r = 0; r < frm_height; ++r) {
1226 for (int c = 0; c < frm_width; ++c) {
1227 combined_saliency_map->buf[r * frm_width + c] =
1228 (w_intensity * normalized_maps[0]->buf[r * frm_width + c] +
1229 w_color * normalized_maps[1]->buf[r * frm_width + c] +
1230 w_orient * normalized_maps[2]->buf[r * frm_width + c]);
1231 }
1232 }
1233
1234 for (int r = 0; r < frm_height; ++r) {
1235 for (int c = 0; c < frm_width; ++c) {
1236 int index = r * frm_width + c;
1237 cpi->saliency_map[index] =
1238 (uint8_t)(combined_saliency_map->buf[index] * 255);
1239 }
1240 }
1241
1242 for (int i = 0; i < 3; ++i) {
1243 aom_free(normalized_maps[i]->buf);
1244 aom_free(normalized_maps[i]);
1245 }
1246
1247 aom_free(combined_saliency_map->buf);
1248 aom_free(combined_saliency_map);
1249
1250 return 1;
1251 }
1252
1253 // Set superblock level saliency mask for rdmult scaling
av1_setup_sm_rdmult_scaling_factor(AV1_COMP * cpi,double motion_ratio)1254 int av1_setup_sm_rdmult_scaling_factor(AV1_COMP *cpi, double motion_ratio) {
1255 AV1_COMMON *cm = &cpi->common;
1256
1257 saliency_feature_map *sb_saliency_map =
1258 aom_malloc(sizeof(saliency_feature_map));
1259
1260 if (sb_saliency_map == NULL) {
1261 return 0;
1262 }
1263
1264 const BLOCK_SIZE bsize = cm->seq_params->sb_size;
1265 const int num_mi_w = mi_size_wide[bsize];
1266 const int num_mi_h = mi_size_high[bsize];
1267 const int block_width = block_size_wide[bsize];
1268 const int block_height = block_size_high[bsize];
1269 const int num_sb_cols = (cm->mi_params.mi_cols + num_mi_w - 1) / num_mi_w;
1270 const int num_sb_rows = (cm->mi_params.mi_rows + num_mi_h - 1) / num_mi_h;
1271
1272 sb_saliency_map->height = num_sb_rows;
1273 sb_saliency_map->width = num_sb_cols;
1274 sb_saliency_map->buf = (double *)aom_malloc(num_sb_rows * num_sb_cols *
1275 sizeof(*sb_saliency_map->buf));
1276
1277 if (sb_saliency_map->buf == NULL) {
1278 aom_free(sb_saliency_map);
1279 return 0;
1280 }
1281
1282 for (int row = 0; row < num_sb_rows; ++row) {
1283 for (int col = 0; col < num_sb_cols; ++col) {
1284 const int index = row * num_sb_cols + col;
1285 double total_pixel = 0;
1286 double total_weight = 0;
1287
1288 for (int i = 0; i < block_height; i++) {
1289 for (int j = 0; j < block_width; j++) {
1290 if ((row * block_height + i) >= cpi->common.height ||
1291 (col * block_width + j) >= cpi->common.width)
1292 continue;
1293 total_pixel++;
1294 total_weight +=
1295 cpi->saliency_map[(row * block_height + i) * cpi->common.width +
1296 col * block_width + j];
1297 }
1298 }
1299
1300 assert(total_pixel > 0);
1301
1302 // Calculate the superblock level saliency map from pixel level saliency
1303 // map
1304 sb_saliency_map->buf[index] = total_weight / total_pixel;
1305
1306 // Further lower the superblock saliency score for boundary superblocks.
1307 if (row < 1 || row > num_sb_rows - 2 || col < 1 ||
1308 col > num_sb_cols - 2) {
1309 sb_saliency_map->buf[index] /= 5;
1310 }
1311 }
1312 }
1313
1314 // superblock level saliency map finalization
1315 minmax_normalize(sb_saliency_map);
1316
1317 double log_sum = 0.0;
1318 double sum = 0.0;
1319 int block_count = 0;
1320
1321 // Calculate the average superblock sm_scaling_factor for a frame, to be used
1322 // for clamping later.
1323 for (int row = 0; row < num_sb_rows; ++row) {
1324 for (int col = 0; col < num_sb_cols; ++col) {
1325 const int index = row * num_sb_cols + col;
1326 const double saliency = sb_saliency_map->buf[index];
1327
1328 cpi->sm_scaling_factor[index] = 1 - saliency;
1329 sum += cpi->sm_scaling_factor[index];
1330 block_count++;
1331 }
1332 }
1333 assert(block_count > 0);
1334 sum /= block_count;
1335
1336 // Calculate the geometric mean of superblock sm_scaling_factor for a frame,
1337 // to be used for normalization.
1338 for (int row = 0; row < num_sb_rows; ++row) {
1339 for (int col = 0; col < num_sb_cols; ++col) {
1340 const int index = row * num_sb_cols + col;
1341 log_sum += log(fmax(cpi->sm_scaling_factor[index], 0.001));
1342 cpi->sm_scaling_factor[index] =
1343 fmax(cpi->sm_scaling_factor[index], 0.8 * sum);
1344 }
1345 }
1346
1347 log_sum = exp(log_sum / block_count);
1348
1349 // Normalize the sm_scaling_factor by geometric mean.
1350 for (int row = 0; row < num_sb_rows; ++row) {
1351 for (int col = 0; col < num_sb_cols; ++col) {
1352 const int index = row * num_sb_cols + col;
1353 assert(log_sum > 0);
1354 cpi->sm_scaling_factor[index] /= log_sum;
1355
1356 // Modulate the sm_scaling_factor by frame basis motion factor
1357 cpi->sm_scaling_factor[index] =
1358 cpi->sm_scaling_factor[index] * motion_ratio;
1359 }
1360 }
1361
1362 aom_free(sb_saliency_map->buf);
1363 aom_free(sb_saliency_map);
1364 return 1;
1365 }
1366
1367 // av1_setup_motion_ratio() is only enabled when CONFIG_REALTIME_ONLY is 0,
1368 // because the computations need to access the first pass stats which are
1369 // only available when CONFIG_REALTIME_ONLY is equal to 0.
1370 #if !CONFIG_REALTIME_ONLY
1371 // Set motion_ratio that reflects the motion quantities between two consecutive
1372 // frames. Motion_ratio will be used to set up saliency_map based rdmult scaling
1373 // factor, i.e., the less the motion quantities are, the more bits will be spent
1374 // on this frame, and vice versa.
av1_setup_motion_ratio(AV1_COMP * cpi)1375 double av1_setup_motion_ratio(AV1_COMP *cpi) {
1376 AV1_COMMON *cm = &cpi->common;
1377 int frames_since_key =
1378 cm->current_frame.display_order_hint - cpi->rc.frames_since_key;
1379 const FIRSTPASS_STATS *cur_stats = av1_firstpass_info_peek(
1380 &cpi->ppi->twopass.firstpass_info, frames_since_key);
1381 assert(cur_stats != NULL);
1382 assert(cpi->ppi->twopass.firstpass_info.total_stats.count > 0);
1383
1384 const double avg_intra_error =
1385 exp(cpi->ppi->twopass.firstpass_info.total_stats.log_intra_error /
1386 cpi->ppi->twopass.firstpass_info.total_stats.count);
1387 const double avg_inter_error =
1388 exp(cpi->ppi->twopass.firstpass_info.total_stats.log_coded_error /
1389 cpi->ppi->twopass.firstpass_info.total_stats.count);
1390
1391 double inter_error = cur_stats->coded_error;
1392 double error_stdev = 0;
1393 const double avg_error =
1394 cpi->ppi->twopass.firstpass_info.total_stats.intra_error /
1395 cpi->ppi->twopass.firstpass_info.total_stats.count;
1396 for (int i = 0; i < cpi->ppi->twopass.firstpass_info.total_stats.count; i++) {
1397 const FIRSTPASS_STATS *stats =
1398 &cpi->ppi->twopass.firstpass_info.stats_buf[i];
1399 error_stdev +=
1400 (stats->intra_error - avg_error) * (stats->intra_error - avg_error);
1401 }
1402 error_stdev =
1403 sqrt(error_stdev / cpi->ppi->twopass.firstpass_info.total_stats.count);
1404
1405 double motion_ratio = 1;
1406 if (error_stdev / fmax(avg_intra_error, 1) > 0.1) {
1407 motion_ratio = inter_error / fmax(1, avg_inter_error);
1408 motion_ratio = AOMMIN(motion_ratio, 1.5);
1409 motion_ratio = AOMMAX(motion_ratio, 0.8);
1410 }
1411
1412 return motion_ratio;
1413 }
1414 #endif // !CONFIG_REALTIME_ONLY
1415