1 /*
2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved.
3 *
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 */
11
12 #include <math.h>
13 #include <stdlib.h>
14
15 #include "av1/common/pred_common.h"
16
17 #include "av1/encoder/block.h"
18 #include "av1/encoder/cost.h"
19 #include "av1/encoder/encoder.h"
20 #include "av1/encoder/intra_mode_search.h"
21 #include "av1/encoder/intra_mode_search_utils.h"
22 #include "av1/encoder/palette.h"
23 #include "av1/encoder/random.h"
24 #include "av1/encoder/rdopt_utils.h"
25 #include "av1/encoder/tx_search.h"
26
27 #define AV1_K_MEANS_DIM 1
28 #include "av1/encoder/k_means_template.h"
29 #undef AV1_K_MEANS_DIM
30 #define AV1_K_MEANS_DIM 2
31 #include "av1/encoder/k_means_template.h"
32 #undef AV1_K_MEANS_DIM
33
int16_comparer(const void * a,const void * b)34 static int int16_comparer(const void *a, const void *b) {
35 return (*(int16_t *)a - *(int16_t *)b);
36 }
37
38 /*!\brief Removes duplicated centroid indices.
39 *
40 * \ingroup palette_mode_search
41 * \param[in] centroids A list of centroids index.
42 * \param[in] num_centroids Number of centroids.
43 *
44 * \return Returns the number of unique centroids and saves the unique centroids
45 * in beginning of the centroids array.
46 *
47 * \attention The centroids should be rounded to integers before calling this
48 * method.
49 */
remove_duplicates(int16_t * centroids,int num_centroids)50 static int remove_duplicates(int16_t *centroids, int num_centroids) {
51 int num_unique; // number of unique centroids
52 int i;
53 qsort(centroids, num_centroids, sizeof(*centroids), int16_comparer);
54 // Remove duplicates.
55 num_unique = 1;
56 for (i = 1; i < num_centroids; ++i) {
57 if (centroids[i] != centroids[i - 1]) { // found a new unique centroid
58 centroids[num_unique++] = centroids[i];
59 }
60 }
61 return num_unique;
62 }
63
delta_encode_cost(const int * colors,int num,int bit_depth,int min_val)64 static int delta_encode_cost(const int *colors, int num, int bit_depth,
65 int min_val) {
66 if (num <= 0) return 0;
67 int bits_cost = bit_depth;
68 if (num == 1) return bits_cost;
69 bits_cost += 2;
70 int max_delta = 0;
71 int deltas[PALETTE_MAX_SIZE];
72 const int min_bits = bit_depth - 3;
73 for (int i = 1; i < num; ++i) {
74 const int delta = colors[i] - colors[i - 1];
75 deltas[i - 1] = delta;
76 assert(delta >= min_val);
77 if (delta > max_delta) max_delta = delta;
78 }
79 int bits_per_delta = AOMMAX(av1_ceil_log2(max_delta + 1 - min_val), min_bits);
80 assert(bits_per_delta <= bit_depth);
81 int range = (1 << bit_depth) - colors[0] - min_val;
82 for (int i = 0; i < num - 1; ++i) {
83 bits_cost += bits_per_delta;
84 range -= deltas[i];
85 bits_per_delta = AOMMIN(bits_per_delta, av1_ceil_log2(range));
86 }
87 return bits_cost;
88 }
89
av1_index_color_cache(const uint16_t * color_cache,int n_cache,const uint16_t * colors,int n_colors,uint8_t * cache_color_found,int * out_cache_colors)90 int av1_index_color_cache(const uint16_t *color_cache, int n_cache,
91 const uint16_t *colors, int n_colors,
92 uint8_t *cache_color_found, int *out_cache_colors) {
93 if (n_cache <= 0) {
94 for (int i = 0; i < n_colors; ++i) out_cache_colors[i] = colors[i];
95 return n_colors;
96 }
97 memset(cache_color_found, 0, n_cache * sizeof(*cache_color_found));
98 int n_in_cache = 0;
99 int in_cache_flags[PALETTE_MAX_SIZE];
100 memset(in_cache_flags, 0, sizeof(in_cache_flags));
101 for (int i = 0; i < n_cache && n_in_cache < n_colors; ++i) {
102 for (int j = 0; j < n_colors; ++j) {
103 if (colors[j] == color_cache[i]) {
104 in_cache_flags[j] = 1;
105 cache_color_found[i] = 1;
106 ++n_in_cache;
107 break;
108 }
109 }
110 }
111 int j = 0;
112 for (int i = 0; i < n_colors; ++i)
113 if (!in_cache_flags[i]) out_cache_colors[j++] = colors[i];
114 assert(j == n_colors - n_in_cache);
115 return j;
116 }
117
av1_get_palette_delta_bits_v(const PALETTE_MODE_INFO * const pmi,int bit_depth,int * zero_count,int * min_bits)118 int av1_get_palette_delta_bits_v(const PALETTE_MODE_INFO *const pmi,
119 int bit_depth, int *zero_count,
120 int *min_bits) {
121 const int n = pmi->palette_size[1];
122 const int max_val = 1 << bit_depth;
123 int max_d = 0;
124 *min_bits = bit_depth - 4;
125 *zero_count = 0;
126 for (int i = 1; i < n; ++i) {
127 const int delta = pmi->palette_colors[2 * PALETTE_MAX_SIZE + i] -
128 pmi->palette_colors[2 * PALETTE_MAX_SIZE + i - 1];
129 const int v = abs(delta);
130 const int d = AOMMIN(v, max_val - v);
131 if (d > max_d) max_d = d;
132 if (d == 0) ++(*zero_count);
133 }
134 return AOMMAX(av1_ceil_log2(max_d + 1), *min_bits);
135 }
136
av1_palette_color_cost_y(const PALETTE_MODE_INFO * const pmi,const uint16_t * color_cache,int n_cache,int bit_depth)137 int av1_palette_color_cost_y(const PALETTE_MODE_INFO *const pmi,
138 const uint16_t *color_cache, int n_cache,
139 int bit_depth) {
140 const int n = pmi->palette_size[0];
141 int out_cache_colors[PALETTE_MAX_SIZE];
142 uint8_t cache_color_found[2 * PALETTE_MAX_SIZE];
143 const int n_out_cache =
144 av1_index_color_cache(color_cache, n_cache, pmi->palette_colors, n,
145 cache_color_found, out_cache_colors);
146 const int total_bits =
147 n_cache + delta_encode_cost(out_cache_colors, n_out_cache, bit_depth, 1);
148 return av1_cost_literal(total_bits);
149 }
150
av1_palette_color_cost_uv(const PALETTE_MODE_INFO * const pmi,const uint16_t * color_cache,int n_cache,int bit_depth)151 int av1_palette_color_cost_uv(const PALETTE_MODE_INFO *const pmi,
152 const uint16_t *color_cache, int n_cache,
153 int bit_depth) {
154 const int n = pmi->palette_size[1];
155 int total_bits = 0;
156 // U channel palette color cost.
157 int out_cache_colors[PALETTE_MAX_SIZE];
158 uint8_t cache_color_found[2 * PALETTE_MAX_SIZE];
159 const int n_out_cache = av1_index_color_cache(
160 color_cache, n_cache, pmi->palette_colors + PALETTE_MAX_SIZE, n,
161 cache_color_found, out_cache_colors);
162 total_bits +=
163 n_cache + delta_encode_cost(out_cache_colors, n_out_cache, bit_depth, 0);
164
165 // V channel palette color cost.
166 int zero_count = 0, min_bits_v = 0;
167 const int bits_v =
168 av1_get_palette_delta_bits_v(pmi, bit_depth, &zero_count, &min_bits_v);
169 const int bits_using_delta =
170 2 + bit_depth + (bits_v + 1) * (n - 1) - zero_count;
171 const int bits_using_raw = bit_depth * n;
172 total_bits += 1 + AOMMIN(bits_using_delta, bits_using_raw);
173 return av1_cost_literal(total_bits);
174 }
175
176 // Extends 'color_map' array from 'orig_width x orig_height' to 'new_width x
177 // new_height'. Extra rows and columns are filled in by copying last valid
178 // row/column.
extend_palette_color_map(uint8_t * const color_map,int orig_width,int orig_height,int new_width,int new_height)179 static inline void extend_palette_color_map(uint8_t *const color_map,
180 int orig_width, int orig_height,
181 int new_width, int new_height) {
182 int j;
183 assert(new_width >= orig_width);
184 assert(new_height >= orig_height);
185 if (new_width == orig_width && new_height == orig_height) return;
186
187 for (j = orig_height - 1; j >= 0; --j) {
188 memmove(color_map + j * new_width, color_map + j * orig_width, orig_width);
189 // Copy last column to extra columns.
190 memset(color_map + j * new_width + orig_width,
191 color_map[j * new_width + orig_width - 1], new_width - orig_width);
192 }
193 // Copy last row to extra rows.
194 for (j = orig_height; j < new_height; ++j) {
195 memcpy(color_map + j * new_width, color_map + (orig_height - 1) * new_width,
196 new_width);
197 }
198 }
199
200 // Bias toward using colors in the cache.
201 // TODO(huisu): Try other schemes to improve compression.
optimize_palette_colors(uint16_t * color_cache,int n_cache,int n_colors,int stride,int16_t * centroids,int bit_depth)202 static inline void optimize_palette_colors(uint16_t *color_cache, int n_cache,
203 int n_colors, int stride,
204 int16_t *centroids, int bit_depth) {
205 if (n_cache <= 0) return;
206 for (int i = 0; i < n_colors * stride; i += stride) {
207 int min_diff = abs((int)centroids[i] - (int)color_cache[0]);
208 int idx = 0;
209 for (int j = 1; j < n_cache; ++j) {
210 const int this_diff = abs((int)centroids[i] - (int)color_cache[j]);
211 if (this_diff < min_diff) {
212 min_diff = this_diff;
213 idx = j;
214 }
215 }
216 const int min_threshold = 4 << (bit_depth - 8);
217 if (min_diff <= min_threshold) centroids[i] = color_cache[idx];
218 }
219 }
220
221 /*!\brief Calculate the luma palette cost from a given color palette
222 *
223 * \ingroup palette_mode_search
224 * \callergraph
225 * Given the base colors as specified in centroids[], calculate the RD cost
226 * of palette mode.
227 */
palette_rd_y(const AV1_COMP * const cpi,MACROBLOCK * x,MB_MODE_INFO * mbmi,BLOCK_SIZE bsize,int dc_mode_cost,const int16_t * data,int16_t * centroids,int n,uint16_t * color_cache,int n_cache,bool do_header_rd_based_gating,MB_MODE_INFO * best_mbmi,uint8_t * best_palette_color_map,int64_t * best_rd,int * rate,int * rate_tokenonly,int64_t * distortion,uint8_t * skippable,int * beat_best_rd,PICK_MODE_CONTEXT * ctx,uint8_t * blk_skip,uint8_t * tx_type_map,int * beat_best_palette_rd,bool * do_header_rd_based_breakout,int discount_color_cost)228 static inline void palette_rd_y(
229 const AV1_COMP *const cpi, MACROBLOCK *x, MB_MODE_INFO *mbmi,
230 BLOCK_SIZE bsize, int dc_mode_cost, const int16_t *data, int16_t *centroids,
231 int n, uint16_t *color_cache, int n_cache, bool do_header_rd_based_gating,
232 MB_MODE_INFO *best_mbmi, uint8_t *best_palette_color_map, int64_t *best_rd,
233 int *rate, int *rate_tokenonly, int64_t *distortion, uint8_t *skippable,
234 int *beat_best_rd, PICK_MODE_CONTEXT *ctx, uint8_t *blk_skip,
235 uint8_t *tx_type_map, int *beat_best_palette_rd,
236 bool *do_header_rd_based_breakout, int discount_color_cost) {
237 if (do_header_rd_based_breakout != NULL) *do_header_rd_based_breakout = false;
238 optimize_palette_colors(color_cache, n_cache, n, 1, centroids,
239 cpi->common.seq_params->bit_depth);
240 const int num_unique_colors = remove_duplicates(centroids, n);
241 if (num_unique_colors < PALETTE_MIN_SIZE) {
242 // Too few unique colors to create a palette. And DC_PRED will work
243 // well for that case anyway. So skip.
244 return;
245 }
246 PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
247 if (cpi->common.seq_params->use_highbitdepth) {
248 for (int i = 0; i < num_unique_colors; ++i) {
249 pmi->palette_colors[i] = clip_pixel_highbd(
250 (int)centroids[i], cpi->common.seq_params->bit_depth);
251 }
252 } else {
253 for (int i = 0; i < num_unique_colors; ++i) {
254 pmi->palette_colors[i] = clip_pixel(centroids[i]);
255 }
256 }
257 pmi->palette_size[0] = num_unique_colors;
258 MACROBLOCKD *const xd = &x->e_mbd;
259 uint8_t *const color_map = xd->plane[0].color_index_map;
260 int block_width, block_height, rows, cols;
261 av1_get_block_dimensions(bsize, 0, xd, &block_width, &block_height, &rows,
262 &cols);
263 av1_calc_indices(data, centroids, color_map, rows * cols, num_unique_colors,
264 1);
265 extend_palette_color_map(color_map, cols, rows, block_width, block_height);
266
267 RD_STATS tokenonly_rd_stats;
268 int this_rate;
269
270 if (do_header_rd_based_gating) {
271 assert(do_header_rd_based_breakout != NULL);
272 const int palette_mode_rate = intra_mode_info_cost_y(
273 cpi, x, mbmi, bsize, dc_mode_cost, discount_color_cost);
274 const int64_t header_rd = RDCOST(x->rdmult, palette_mode_rate, 0);
275 // Less aggressive pruning when prune_luma_palette_size_search_level == 1.
276 const int header_rd_shift =
277 (cpi->sf.intra_sf.prune_luma_palette_size_search_level == 1) ? 1 : 0;
278 // Terminate further palette_size search, if the header cost corresponding
279 // to lower palette_size is more than *best_rd << header_rd_shift. This
280 // logic is implemented with a right shift in the LHS to prevent a possible
281 // overflow with the left shift in RHS.
282 if ((header_rd >> header_rd_shift) > *best_rd) {
283 *do_header_rd_based_breakout = true;
284 return;
285 }
286 av1_pick_uniform_tx_size_type_yrd(cpi, x, &tokenonly_rd_stats, bsize,
287 *best_rd);
288 if (tokenonly_rd_stats.rate == INT_MAX) return;
289 this_rate = tokenonly_rd_stats.rate + palette_mode_rate;
290 } else {
291 av1_pick_uniform_tx_size_type_yrd(cpi, x, &tokenonly_rd_stats, bsize,
292 *best_rd);
293 if (tokenonly_rd_stats.rate == INT_MAX) return;
294 this_rate = tokenonly_rd_stats.rate +
295 intra_mode_info_cost_y(cpi, x, mbmi, bsize, dc_mode_cost,
296 discount_color_cost);
297 }
298
299 int64_t this_rd = RDCOST(x->rdmult, this_rate, tokenonly_rd_stats.dist);
300 if (!xd->lossless[mbmi->segment_id] && block_signals_txsize(mbmi->bsize)) {
301 tokenonly_rd_stats.rate -= tx_size_cost(x, bsize, mbmi->tx_size);
302 }
303 // Collect mode stats for multiwinner mode processing
304 const int txfm_search_done = 1;
305 store_winner_mode_stats(
306 &cpi->common, x, mbmi, NULL, NULL, NULL, THR_DC, color_map, bsize,
307 this_rd, cpi->sf.winner_mode_sf.multi_winner_mode_type, txfm_search_done);
308 if (this_rd < *best_rd) {
309 *best_rd = this_rd;
310 // Setting beat_best_rd flag because current mode rd is better than best_rd.
311 // This flag need to be updated only for palette evaluation in key frames
312 if (beat_best_rd) *beat_best_rd = 1;
313 memcpy(best_palette_color_map, color_map,
314 block_width * block_height * sizeof(color_map[0]));
315 *best_mbmi = *mbmi;
316 memcpy(blk_skip, x->txfm_search_info.blk_skip,
317 sizeof(x->txfm_search_info.blk_skip[0]) * ctx->num_4x4_blk);
318 av1_copy_array(tx_type_map, xd->tx_type_map, ctx->num_4x4_blk);
319 if (rate) *rate = this_rate;
320 if (rate_tokenonly) *rate_tokenonly = tokenonly_rd_stats.rate;
321 if (distortion) *distortion = tokenonly_rd_stats.dist;
322 if (skippable) *skippable = tokenonly_rd_stats.skip_txfm;
323 if (beat_best_palette_rd) *beat_best_palette_rd = 1;
324 }
325 }
326
is_iter_over(int curr_idx,int end_idx,int step_size)327 static inline int is_iter_over(int curr_idx, int end_idx, int step_size) {
328 assert(step_size != 0);
329 return (step_size > 0) ? curr_idx >= end_idx : curr_idx <= end_idx;
330 }
331
332 // Performs count-based palette search with number of colors in interval
333 // [start_n, end_n) with step size step_size. If step_size < 0, then end_n can
334 // be less than start_n. Saves the last numbers searched in last_n_searched and
335 // returns the best number of colors found.
perform_top_color_palette_search(const AV1_COMP * const cpi,MACROBLOCK * x,MB_MODE_INFO * mbmi,BLOCK_SIZE bsize,int dc_mode_cost,const int16_t * data,int16_t * top_colors,int start_n,int end_n,int step_size,bool do_header_rd_based_gating,int * last_n_searched,uint16_t * color_cache,int n_cache,MB_MODE_INFO * best_mbmi,uint8_t * best_palette_color_map,int64_t * best_rd,int * rate,int * rate_tokenonly,int64_t * distortion,uint8_t * skippable,int * beat_best_rd,PICK_MODE_CONTEXT * ctx,uint8_t * best_blk_skip,uint8_t * tx_type_map,int discount_color_cost)336 static inline int perform_top_color_palette_search(
337 const AV1_COMP *const cpi, MACROBLOCK *x, MB_MODE_INFO *mbmi,
338 BLOCK_SIZE bsize, int dc_mode_cost, const int16_t *data,
339 int16_t *top_colors, int start_n, int end_n, int step_size,
340 bool do_header_rd_based_gating, int *last_n_searched, uint16_t *color_cache,
341 int n_cache, MB_MODE_INFO *best_mbmi, uint8_t *best_palette_color_map,
342 int64_t *best_rd, int *rate, int *rate_tokenonly, int64_t *distortion,
343 uint8_t *skippable, int *beat_best_rd, PICK_MODE_CONTEXT *ctx,
344 uint8_t *best_blk_skip, uint8_t *tx_type_map, int discount_color_cost) {
345 int16_t centroids[PALETTE_MAX_SIZE];
346 int n = start_n;
347 int top_color_winner = end_n;
348 /* clang-format off */
349 assert(IMPLIES(step_size < 0, start_n > end_n));
350 /* clang-format on */
351 assert(IMPLIES(step_size > 0, start_n < end_n));
352 while (!is_iter_over(n, end_n, step_size)) {
353 int beat_best_palette_rd = 0;
354 bool do_header_rd_based_breakout = false;
355 memcpy(centroids, top_colors, n * sizeof(top_colors[0]));
356 palette_rd_y(cpi, x, mbmi, bsize, dc_mode_cost, data, centroids, n,
357 color_cache, n_cache, do_header_rd_based_gating, best_mbmi,
358 best_palette_color_map, best_rd, rate, rate_tokenonly,
359 distortion, skippable, beat_best_rd, ctx, best_blk_skip,
360 tx_type_map, &beat_best_palette_rd,
361 &do_header_rd_based_breakout, discount_color_cost);
362 *last_n_searched = n;
363 if (do_header_rd_based_breakout) {
364 // Terminate palette_size search by setting last_n_searched to end_n.
365 *last_n_searched = end_n;
366 break;
367 }
368 if (beat_best_palette_rd) {
369 top_color_winner = n;
370 } else if (cpi->sf.intra_sf.prune_palette_search_level == 2) {
371 // At search level 2, we return immediately if we don't see an improvement
372 return top_color_winner;
373 }
374 n += step_size;
375 }
376 return top_color_winner;
377 }
378
379 // Performs k-means based palette search with number of colors in interval
380 // [start_n, end_n) with step size step_size. If step_size < 0, then end_n can
381 // be less than start_n. Saves the last numbers searched in last_n_searched and
382 // returns the best number of colors found.
perform_k_means_palette_search(const AV1_COMP * const cpi,MACROBLOCK * x,MB_MODE_INFO * mbmi,BLOCK_SIZE bsize,int dc_mode_cost,const int16_t * data,int lower_bound,int upper_bound,int start_n,int end_n,int step_size,bool do_header_rd_based_gating,int * last_n_searched,uint16_t * color_cache,int n_cache,MB_MODE_INFO * best_mbmi,uint8_t * best_palette_color_map,int64_t * best_rd,int * rate,int * rate_tokenonly,int64_t * distortion,uint8_t * skippable,int * beat_best_rd,PICK_MODE_CONTEXT * ctx,uint8_t * best_blk_skip,uint8_t * tx_type_map,uint8_t * color_map,int data_points,int discount_color_cost)383 static inline int perform_k_means_palette_search(
384 const AV1_COMP *const cpi, MACROBLOCK *x, MB_MODE_INFO *mbmi,
385 BLOCK_SIZE bsize, int dc_mode_cost, const int16_t *data, int lower_bound,
386 int upper_bound, int start_n, int end_n, int step_size,
387 bool do_header_rd_based_gating, int *last_n_searched, uint16_t *color_cache,
388 int n_cache, MB_MODE_INFO *best_mbmi, uint8_t *best_palette_color_map,
389 int64_t *best_rd, int *rate, int *rate_tokenonly, int64_t *distortion,
390 uint8_t *skippable, int *beat_best_rd, PICK_MODE_CONTEXT *ctx,
391 uint8_t *best_blk_skip, uint8_t *tx_type_map, uint8_t *color_map,
392 int data_points, int discount_color_cost) {
393 int16_t centroids[PALETTE_MAX_SIZE];
394 const int max_itr = 50;
395 int n = start_n;
396 int top_color_winner = end_n;
397 /* clang-format off */
398 assert(IMPLIES(step_size < 0, start_n > end_n));
399 /* clang-format on */
400 assert(IMPLIES(step_size > 0, start_n < end_n));
401 while (!is_iter_over(n, end_n, step_size)) {
402 int beat_best_palette_rd = 0;
403 bool do_header_rd_based_breakout = false;
404 for (int i = 0; i < n; ++i) {
405 centroids[i] =
406 lower_bound + (2 * i + 1) * (upper_bound - lower_bound) / n / 2;
407 }
408 av1_k_means(data, centroids, color_map, data_points, n, 1, max_itr);
409 palette_rd_y(cpi, x, mbmi, bsize, dc_mode_cost, data, centroids, n,
410 color_cache, n_cache, do_header_rd_based_gating, best_mbmi,
411 best_palette_color_map, best_rd, rate, rate_tokenonly,
412 distortion, skippable, beat_best_rd, ctx, best_blk_skip,
413 tx_type_map, &beat_best_palette_rd,
414 &do_header_rd_based_breakout, discount_color_cost);
415 *last_n_searched = n;
416 if (do_header_rd_based_breakout) {
417 // Terminate palette_size search by setting last_n_searched to end_n.
418 *last_n_searched = end_n;
419 break;
420 }
421 if (beat_best_palette_rd) {
422 top_color_winner = n;
423 } else if (cpi->sf.intra_sf.prune_palette_search_level == 2) {
424 // At search level 2, we return immediately if we don't see an improvement
425 return top_color_winner;
426 }
427 n += step_size;
428 }
429 return top_color_winner;
430 }
431
432 // Sets the parameters to search the current number of colors +- 1
set_stage2_params(int * min_n,int * max_n,int * step_size,int winner,int end_n)433 static inline void set_stage2_params(int *min_n, int *max_n, int *step_size,
434 int winner, int end_n) {
435 // Set min to winner - 1 unless we are already at the border, then we set it
436 // to winner + 1
437 *min_n = (winner == PALETTE_MIN_SIZE) ? (PALETTE_MIN_SIZE + 1)
438 : AOMMAX(winner - 1, PALETTE_MIN_SIZE);
439 // Set max to winner + 1 unless we are already at the border, then we set it
440 // to winner - 1
441 *max_n =
442 (winner == end_n) ? (winner - 1) : AOMMIN(winner + 1, PALETTE_MAX_SIZE);
443
444 // Set the step size to max_n - min_n so we only search those two values.
445 // If max_n == min_n, then set step_size to 1 to avoid infinite loop later.
446 *step_size = AOMMAX(1, *max_n - *min_n);
447 }
448
fill_data_and_get_bounds(const uint8_t * src,const int src_stride,const int rows,const int cols,const int is_high_bitdepth,int16_t * data,int * lower_bound,int * upper_bound)449 static inline void fill_data_and_get_bounds(const uint8_t *src,
450 const int src_stride,
451 const int rows, const int cols,
452 const int is_high_bitdepth,
453 int16_t *data, int *lower_bound,
454 int *upper_bound) {
455 if (is_high_bitdepth) {
456 const uint16_t *src_ptr = CONVERT_TO_SHORTPTR(src);
457 *lower_bound = *upper_bound = src_ptr[0];
458 for (int r = 0; r < rows; ++r) {
459 for (int c = 0; c < cols; ++c) {
460 const int val = src_ptr[c];
461 data[c] = (int16_t)val;
462 *lower_bound = AOMMIN(*lower_bound, val);
463 *upper_bound = AOMMAX(*upper_bound, val);
464 }
465 src_ptr += src_stride;
466 data += cols;
467 }
468 return;
469 }
470
471 // low bit depth
472 *lower_bound = *upper_bound = src[0];
473 for (int r = 0; r < rows; ++r) {
474 for (int c = 0; c < cols; ++c) {
475 const int val = src[c];
476 data[c] = (int16_t)val;
477 *lower_bound = AOMMIN(*lower_bound, val);
478 *upper_bound = AOMMAX(*upper_bound, val);
479 }
480 src += src_stride;
481 data += cols;
482 }
483 }
484
485 /*! \brief Colors are sorted by their count: the higher the better.
486 */
487 struct ColorCount {
488 //! Color index in the histogram.
489 int index;
490 //! Histogram count.
491 int count;
492 };
493
color_count_comp(const void * c1,const void * c2)494 static int color_count_comp(const void *c1, const void *c2) {
495 const struct ColorCount *color_count1 = (const struct ColorCount *)c1;
496 const struct ColorCount *color_count2 = (const struct ColorCount *)c2;
497 if (color_count1->count > color_count2->count) return -1;
498 if (color_count1->count < color_count2->count) return 1;
499 if (color_count1->index < color_count2->index) return -1;
500 return 1;
501 }
502
find_top_colors(const int * const count_buf,int bit_depth,int n_colors,int16_t * top_colors)503 static void find_top_colors(const int *const count_buf, int bit_depth,
504 int n_colors, int16_t *top_colors) {
505 // Top color array, serving as a priority queue if more than n_colors are
506 // found.
507 struct ColorCount top_color_counts[PALETTE_MAX_SIZE] = { { 0 } };
508 int n_color_count = 0;
509 for (int i = 0; i < (1 << bit_depth); ++i) {
510 if (count_buf[i] > 0) {
511 if (n_color_count < n_colors) {
512 // Keep adding to the top colors.
513 top_color_counts[n_color_count].index = i;
514 top_color_counts[n_color_count].count = count_buf[i];
515 ++n_color_count;
516 if (n_color_count == n_colors) {
517 qsort(top_color_counts, n_colors, sizeof(top_color_counts[0]),
518 color_count_comp);
519 }
520 } else {
521 // Check the worst in the sorted top.
522 if (count_buf[i] > top_color_counts[n_colors - 1].count) {
523 int j = n_colors - 1;
524 // Move up to the best one.
525 while (j >= 1 && count_buf[i] > top_color_counts[j - 1].count) --j;
526 memmove(top_color_counts + j + 1, top_color_counts + j,
527 (n_colors - j - 1) * sizeof(top_color_counts[0]));
528 top_color_counts[j].index = i;
529 top_color_counts[j].count = count_buf[i];
530 }
531 }
532 }
533 }
534 assert(n_color_count == n_colors);
535
536 for (int i = 0; i < n_colors; ++i) {
537 top_colors[i] = top_color_counts[i].index;
538 }
539 }
540
av1_rd_pick_palette_intra_sby(const AV1_COMP * cpi,MACROBLOCK * x,BLOCK_SIZE bsize,int dc_mode_cost,MB_MODE_INFO * best_mbmi,uint8_t * best_palette_color_map,int64_t * best_rd,int * rate,int * rate_tokenonly,int64_t * distortion,uint8_t * skippable,int * beat_best_rd,PICK_MODE_CONTEXT * ctx,uint8_t * best_blk_skip,uint8_t * tx_type_map)541 void av1_rd_pick_palette_intra_sby(
542 const AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, int dc_mode_cost,
543 MB_MODE_INFO *best_mbmi, uint8_t *best_palette_color_map, int64_t *best_rd,
544 int *rate, int *rate_tokenonly, int64_t *distortion, uint8_t *skippable,
545 int *beat_best_rd, PICK_MODE_CONTEXT *ctx, uint8_t *best_blk_skip,
546 uint8_t *tx_type_map) {
547 MACROBLOCKD *const xd = &x->e_mbd;
548 MB_MODE_INFO *const mbmi = xd->mi[0];
549 assert(!is_inter_block(mbmi));
550 assert(av1_allow_palette(cpi->common.features.allow_screen_content_tools,
551 bsize));
552 assert(PALETTE_MAX_SIZE == 8);
553 assert(PALETTE_MIN_SIZE == 2);
554
555 const int src_stride = x->plane[0].src.stride;
556 const uint8_t *const src = x->plane[0].src.buf;
557 int block_width, block_height, rows, cols;
558 av1_get_block_dimensions(bsize, 0, xd, &block_width, &block_height, &rows,
559 &cols);
560 const SequenceHeader *const seq_params = cpi->common.seq_params;
561 const int is_hbd = seq_params->use_highbitdepth;
562 const int bit_depth = seq_params->bit_depth;
563 const int discount_color_cost = cpi->sf.rt_sf.use_nonrd_pick_mode;
564 int unused;
565
566 int count_buf[1 << 12]; // Maximum (1 << 12) color levels.
567 int colors, colors_threshold = 0;
568 if (is_hbd) {
569 int count_buf_8bit[1 << 8]; // Maximum (1 << 8) bins for hbd path.
570 av1_count_colors_highbd(src, src_stride, rows, cols, bit_depth, count_buf,
571 count_buf_8bit, &colors_threshold, &colors);
572 } else {
573 av1_count_colors(src, src_stride, rows, cols, count_buf, &colors);
574 colors_threshold = colors;
575 }
576
577 uint8_t *const color_map = xd->plane[0].color_index_map;
578 int color_thresh_palette = x->color_palette_thresh;
579 // Allow for larger color_threshold for palette search, based on color,
580 // scene_change, and block source variance.
581 // Since palette is Y based, only allow larger threshold if block
582 // color_dist is below threshold.
583 if (cpi->sf.rt_sf.use_nonrd_pick_mode &&
584 cpi->sf.rt_sf.increase_color_thresh_palette && cpi->rc.high_source_sad &&
585 x->source_variance > 50) {
586 int64_t norm_color_dist = 0;
587 if (x->color_sensitivity[0] || x->color_sensitivity[1]) {
588 norm_color_dist = x->min_dist_inter_uv >>
589 (mi_size_wide_log2[bsize] + mi_size_high_log2[bsize]);
590 if (x->color_sensitivity[0] && x->color_sensitivity[1])
591 norm_color_dist = norm_color_dist >> 1;
592 }
593 if (norm_color_dist < 8000) color_thresh_palette += 20;
594 }
595 if (colors_threshold > 1 && colors_threshold <= color_thresh_palette) {
596 int16_t *const data = x->palette_buffer->kmeans_data_buf;
597 int16_t centroids[PALETTE_MAX_SIZE];
598 int lower_bound, upper_bound;
599 fill_data_and_get_bounds(src, src_stride, rows, cols, is_hbd, data,
600 &lower_bound, &upper_bound);
601
602 mbmi->mode = DC_PRED;
603 mbmi->filter_intra_mode_info.use_filter_intra = 0;
604
605 uint16_t color_cache[2 * PALETTE_MAX_SIZE];
606 const int n_cache = av1_get_palette_cache(xd, 0, color_cache);
607
608 // Find the dominant colors, stored in top_colors[].
609 int16_t top_colors[PALETTE_MAX_SIZE] = { 0 };
610 find_top_colors(count_buf, bit_depth, AOMMIN(colors, PALETTE_MAX_SIZE),
611 top_colors);
612
613 // The following are the approaches used for header rdcost based gating
614 // for early termination for different values of prune_palette_search_level.
615 // 0: Pruning based on header rdcost for ascending order palette_size
616 // search.
617 // 1: When colors > PALETTE_MIN_SIZE, enabled only for coarse palette_size
618 // search and for finer search do_header_rd_based_gating parameter is
619 // explicitly passed as 'false'.
620 // 2: Enabled only for ascending order palette_size search and for
621 // descending order search do_header_rd_based_gating parameter is explicitly
622 // passed as 'false'.
623 const bool do_header_rd_based_gating =
624 cpi->sf.intra_sf.prune_luma_palette_size_search_level != 0;
625
626 // TODO([email protected]): Try to avoid duplicate computation in cases
627 // where the dominant colors and the k-means results are similar.
628 if ((cpi->sf.intra_sf.prune_palette_search_level == 1) &&
629 (colors > PALETTE_MIN_SIZE)) {
630 // Start index and step size below are chosen to evaluate unique
631 // candidates in neighbor search, in case a winner candidate is found in
632 // coarse search. Example,
633 // 1) 8 colors (end_n = 8): 2,3,4,5,6,7,8. start_n is chosen as 2 and step
634 // size is chosen as 3. Therefore, coarse search will evaluate 2, 5 and 8.
635 // If winner is found at 5, then 4 and 6 are evaluated. Similarly, for 2
636 // (3) and 8 (7).
637 // 2) 7 colors (end_n = 7): 2,3,4,5,6,7. If start_n is chosen as 2 (same
638 // as for 8 colors) then step size should also be 2, to cover all
639 // candidates. Coarse search will evaluate 2, 4 and 6. If winner is either
640 // 2 or 4, 3 will be evaluated. Instead, if start_n=3 and step_size=3,
641 // coarse search will evaluate 3 and 6. For the winner, unique neighbors
642 // (3: 2,4 or 6: 5,7) would be evaluated.
643
644 // Start index for coarse palette search for dominant colors and k-means
645 const uint8_t start_n_lookup_table[PALETTE_MAX_SIZE + 1] = { 0, 0, 0,
646 3, 3, 2,
647 3, 3, 2 };
648 // Step size for coarse palette search for dominant colors and k-means
649 const uint8_t step_size_lookup_table[PALETTE_MAX_SIZE + 1] = { 0, 0, 0,
650 3, 3, 3,
651 3, 3, 3 };
652
653 // Choose the start index and step size for coarse search based on number
654 // of colors
655 const int max_n = AOMMIN(colors, PALETTE_MAX_SIZE);
656 const int min_n = start_n_lookup_table[max_n];
657 const int step_size = step_size_lookup_table[max_n];
658 assert(min_n >= PALETTE_MIN_SIZE);
659 // Perform top color coarse palette search to find the winner candidate
660 const int top_color_winner = perform_top_color_palette_search(
661 cpi, x, mbmi, bsize, dc_mode_cost, data, top_colors, min_n, max_n + 1,
662 step_size, do_header_rd_based_gating, &unused, color_cache, n_cache,
663 best_mbmi, best_palette_color_map, best_rd, rate, rate_tokenonly,
664 distortion, skippable, beat_best_rd, ctx, best_blk_skip, tx_type_map,
665 discount_color_cost);
666 // Evaluate neighbors for the winner color (if winner is found) in the
667 // above coarse search for dominant colors
668 if (top_color_winner <= max_n) {
669 int stage2_min_n, stage2_max_n, stage2_step_size;
670 set_stage2_params(&stage2_min_n, &stage2_max_n, &stage2_step_size,
671 top_color_winner, max_n);
672 // perform finer search for the winner candidate
673 perform_top_color_palette_search(
674 cpi, x, mbmi, bsize, dc_mode_cost, data, top_colors, stage2_min_n,
675 stage2_max_n + 1, stage2_step_size,
676 /*do_header_rd_based_gating=*/false, &unused, color_cache, n_cache,
677 best_mbmi, best_palette_color_map, best_rd, rate, rate_tokenonly,
678 distortion, skippable, beat_best_rd, ctx, best_blk_skip,
679 tx_type_map, discount_color_cost);
680 }
681 // K-means clustering.
682 // Perform k-means coarse palette search to find the winner candidate
683 const int k_means_winner = perform_k_means_palette_search(
684 cpi, x, mbmi, bsize, dc_mode_cost, data, lower_bound, upper_bound,
685 min_n, max_n + 1, step_size, do_header_rd_based_gating, &unused,
686 color_cache, n_cache, best_mbmi, best_palette_color_map, best_rd,
687 rate, rate_tokenonly, distortion, skippable, beat_best_rd, ctx,
688 best_blk_skip, tx_type_map, color_map, rows * cols,
689 discount_color_cost);
690 // Evaluate neighbors for the winner color (if winner is found) in the
691 // above coarse search for k-means
692 if (k_means_winner <= max_n) {
693 int start_n_stage2, end_n_stage2, step_size_stage2;
694 set_stage2_params(&start_n_stage2, &end_n_stage2, &step_size_stage2,
695 k_means_winner, max_n);
696 // perform finer search for the winner candidate
697 perform_k_means_palette_search(
698 cpi, x, mbmi, bsize, dc_mode_cost, data, lower_bound, upper_bound,
699 start_n_stage2, end_n_stage2 + 1, step_size_stage2,
700 /*do_header_rd_based_gating=*/false, &unused, color_cache, n_cache,
701 best_mbmi, best_palette_color_map, best_rd, rate, rate_tokenonly,
702 distortion, skippable, beat_best_rd, ctx, best_blk_skip,
703 tx_type_map, color_map, rows * cols, discount_color_cost);
704 }
705 } else {
706 const int max_n = AOMMIN(colors, PALETTE_MAX_SIZE),
707 min_n = PALETTE_MIN_SIZE;
708 // Perform top color palette search in ascending order
709 int last_n_searched = min_n;
710 perform_top_color_palette_search(
711 cpi, x, mbmi, bsize, dc_mode_cost, data, top_colors, min_n, max_n + 1,
712 1, do_header_rd_based_gating, &last_n_searched, color_cache, n_cache,
713 best_mbmi, best_palette_color_map, best_rd, rate, rate_tokenonly,
714 distortion, skippable, beat_best_rd, ctx, best_blk_skip, tx_type_map,
715 discount_color_cost);
716 if (last_n_searched < max_n) {
717 // Search in descending order until we get to the previous best
718 perform_top_color_palette_search(
719 cpi, x, mbmi, bsize, dc_mode_cost, data, top_colors, max_n,
720 last_n_searched, -1, /*do_header_rd_based_gating=*/false, &unused,
721 color_cache, n_cache, best_mbmi, best_palette_color_map, best_rd,
722 rate, rate_tokenonly, distortion, skippable, beat_best_rd, ctx,
723 best_blk_skip, tx_type_map, discount_color_cost);
724 }
725 // K-means clustering.
726 if (colors == PALETTE_MIN_SIZE) {
727 // Special case: These colors automatically become the centroids.
728 assert(colors == 2);
729 centroids[0] = lower_bound;
730 centroids[1] = upper_bound;
731 palette_rd_y(cpi, x, mbmi, bsize, dc_mode_cost, data, centroids, colors,
732 color_cache, n_cache, /*do_header_rd_based_gating=*/false,
733 best_mbmi, best_palette_color_map, best_rd, rate,
734 rate_tokenonly, distortion, skippable, beat_best_rd, ctx,
735 best_blk_skip, tx_type_map, NULL, NULL,
736 discount_color_cost);
737 } else {
738 // Perform k-means palette search in ascending order
739 last_n_searched = min_n;
740 perform_k_means_palette_search(
741 cpi, x, mbmi, bsize, dc_mode_cost, data, lower_bound, upper_bound,
742 min_n, max_n + 1, 1, do_header_rd_based_gating, &last_n_searched,
743 color_cache, n_cache, best_mbmi, best_palette_color_map, best_rd,
744 rate, rate_tokenonly, distortion, skippable, beat_best_rd, ctx,
745 best_blk_skip, tx_type_map, color_map, rows * cols,
746 discount_color_cost);
747 if (last_n_searched < max_n) {
748 // Search in descending order until we get to the previous best
749 perform_k_means_palette_search(
750 cpi, x, mbmi, bsize, dc_mode_cost, data, lower_bound, upper_bound,
751 max_n, last_n_searched, -1, /*do_header_rd_based_gating=*/false,
752 &unused, color_cache, n_cache, best_mbmi, best_palette_color_map,
753 best_rd, rate, rate_tokenonly, distortion, skippable,
754 beat_best_rd, ctx, best_blk_skip, tx_type_map, color_map,
755 rows * cols, discount_color_cost);
756 }
757 }
758 }
759 }
760
761 if (best_mbmi->palette_mode_info.palette_size[0] > 0) {
762 memcpy(color_map, best_palette_color_map,
763 block_width * block_height * sizeof(best_palette_color_map[0]));
764 // Gather the stats to determine whether to use screen content tools in
765 // function av1_determine_sc_tools_with_encoding().
766 x->palette_pixels += (block_width * block_height);
767 }
768 *mbmi = *best_mbmi;
769 }
770
av1_rd_pick_palette_intra_sbuv(const AV1_COMP * cpi,MACROBLOCK * x,int dc_mode_cost,uint8_t * best_palette_color_map,MB_MODE_INFO * const best_mbmi,int64_t * best_rd,int * rate,int * rate_tokenonly,int64_t * distortion,uint8_t * skippable)771 void av1_rd_pick_palette_intra_sbuv(const AV1_COMP *cpi, MACROBLOCK *x,
772 int dc_mode_cost,
773 uint8_t *best_palette_color_map,
774 MB_MODE_INFO *const best_mbmi,
775 int64_t *best_rd, int *rate,
776 int *rate_tokenonly, int64_t *distortion,
777 uint8_t *skippable) {
778 MACROBLOCKD *const xd = &x->e_mbd;
779 MB_MODE_INFO *const mbmi = xd->mi[0];
780 assert(!is_inter_block(mbmi));
781 assert(av1_allow_palette(cpi->common.features.allow_screen_content_tools,
782 mbmi->bsize));
783 PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
784 const BLOCK_SIZE bsize = mbmi->bsize;
785 const SequenceHeader *const seq_params = cpi->common.seq_params;
786 int this_rate;
787 int64_t this_rd;
788 int colors_u, colors_v;
789 int colors_threshold_u = 0, colors_threshold_v = 0, colors_threshold = 0;
790 const int src_stride = x->plane[1].src.stride;
791 const uint8_t *const src_u = x->plane[1].src.buf;
792 const uint8_t *const src_v = x->plane[2].src.buf;
793 uint8_t *const color_map = xd->plane[1].color_index_map;
794 RD_STATS tokenonly_rd_stats;
795 int plane_block_width, plane_block_height, rows, cols;
796 av1_get_block_dimensions(bsize, 1, xd, &plane_block_width,
797 &plane_block_height, &rows, &cols);
798
799 mbmi->uv_mode = UV_DC_PRED;
800 if (seq_params->use_highbitdepth) {
801 int count_buf[1 << 12]; // Maximum (1 << 12) color levels.
802 int count_buf_8bit[1 << 8]; // Maximum (1 << 8) bins for hbd path.
803 av1_count_colors_highbd(src_u, src_stride, rows, cols,
804 seq_params->bit_depth, count_buf, count_buf_8bit,
805 &colors_threshold_u, &colors_u);
806 av1_count_colors_highbd(src_v, src_stride, rows, cols,
807 seq_params->bit_depth, count_buf, count_buf_8bit,
808 &colors_threshold_v, &colors_v);
809 } else {
810 int count_buf[1 << 8];
811 av1_count_colors(src_u, src_stride, rows, cols, count_buf, &colors_u);
812 av1_count_colors(src_v, src_stride, rows, cols, count_buf, &colors_v);
813 colors_threshold_u = colors_u;
814 colors_threshold_v = colors_v;
815 }
816
817 uint16_t color_cache[2 * PALETTE_MAX_SIZE];
818 const int n_cache = av1_get_palette_cache(xd, 1, color_cache);
819
820 colors_threshold = colors_threshold_u > colors_threshold_v
821 ? colors_threshold_u
822 : colors_threshold_v;
823 if (colors_threshold > 1 && colors_threshold <= 64) {
824 int r, c, n, i, j;
825 const int max_itr = 50;
826 int lb_u, ub_u, val_u;
827 int lb_v, ub_v, val_v;
828 int16_t *const data = x->palette_buffer->kmeans_data_buf;
829 int16_t centroids[2 * PALETTE_MAX_SIZE];
830
831 uint16_t *src_u16 = CONVERT_TO_SHORTPTR(src_u);
832 uint16_t *src_v16 = CONVERT_TO_SHORTPTR(src_v);
833 if (seq_params->use_highbitdepth) {
834 lb_u = src_u16[0];
835 ub_u = src_u16[0];
836 lb_v = src_v16[0];
837 ub_v = src_v16[0];
838 } else {
839 lb_u = src_u[0];
840 ub_u = src_u[0];
841 lb_v = src_v[0];
842 ub_v = src_v[0];
843 }
844
845 for (r = 0; r < rows; ++r) {
846 for (c = 0; c < cols; ++c) {
847 if (seq_params->use_highbitdepth) {
848 val_u = src_u16[r * src_stride + c];
849 val_v = src_v16[r * src_stride + c];
850 data[(r * cols + c) * 2] = val_u;
851 data[(r * cols + c) * 2 + 1] = val_v;
852 } else {
853 val_u = src_u[r * src_stride + c];
854 val_v = src_v[r * src_stride + c];
855 data[(r * cols + c) * 2] = val_u;
856 data[(r * cols + c) * 2 + 1] = val_v;
857 }
858 if (val_u < lb_u)
859 lb_u = val_u;
860 else if (val_u > ub_u)
861 ub_u = val_u;
862 if (val_v < lb_v)
863 lb_v = val_v;
864 else if (val_v > ub_v)
865 ub_v = val_v;
866 }
867 }
868
869 const int colors = colors_u > colors_v ? colors_u : colors_v;
870 const int max_colors =
871 colors > PALETTE_MAX_SIZE ? PALETTE_MAX_SIZE : colors;
872 for (n = PALETTE_MIN_SIZE; n <= max_colors; ++n) {
873 for (i = 0; i < n; ++i) {
874 centroids[i * 2] = lb_u + (2 * i + 1) * (ub_u - lb_u) / n / 2;
875 centroids[i * 2 + 1] = lb_v + (2 * i + 1) * (ub_v - lb_v) / n / 2;
876 }
877 av1_k_means(data, centroids, color_map, rows * cols, n, 2, max_itr);
878 optimize_palette_colors(color_cache, n_cache, n, 2, centroids,
879 cpi->common.seq_params->bit_depth);
880 // Sort the U channel colors in ascending order.
881 for (i = 0; i < 2 * (n - 1); i += 2) {
882 int min_idx = i;
883 int min_val = centroids[i];
884 for (j = i + 2; j < 2 * n; j += 2)
885 if (centroids[j] < min_val) min_val = centroids[j], min_idx = j;
886 if (min_idx != i) {
887 int temp_u = centroids[i], temp_v = centroids[i + 1];
888 centroids[i] = centroids[min_idx];
889 centroids[i + 1] = centroids[min_idx + 1];
890 centroids[min_idx] = temp_u, centroids[min_idx + 1] = temp_v;
891 }
892 }
893 av1_calc_indices(data, centroids, color_map, rows * cols, n, 2);
894 extend_palette_color_map(color_map, cols, rows, plane_block_width,
895 plane_block_height);
896 pmi->palette_size[1] = n;
897 for (i = 1; i < 3; ++i) {
898 for (j = 0; j < n; ++j) {
899 if (seq_params->use_highbitdepth)
900 pmi->palette_colors[i * PALETTE_MAX_SIZE + j] = clip_pixel_highbd(
901 (int)centroids[j * 2 + i - 1], seq_params->bit_depth);
902 else
903 pmi->palette_colors[i * PALETTE_MAX_SIZE + j] =
904 clip_pixel((int)centroids[j * 2 + i - 1]);
905 }
906 }
907
908 if (cpi->sf.intra_sf.early_term_chroma_palette_size_search) {
909 const int palette_mode_rate =
910 intra_mode_info_cost_uv(cpi, x, mbmi, bsize, dc_mode_cost);
911 const int64_t header_rd = RDCOST(x->rdmult, palette_mode_rate, 0);
912 // Terminate further palette_size search, if header cost corresponding
913 // to lower palette_size is more than the best_rd.
914 if (header_rd >= *best_rd) break;
915 av1_txfm_uvrd(cpi, x, &tokenonly_rd_stats, bsize, *best_rd);
916 if (tokenonly_rd_stats.rate == INT_MAX) continue;
917 this_rate = tokenonly_rd_stats.rate + palette_mode_rate;
918 } else {
919 av1_txfm_uvrd(cpi, x, &tokenonly_rd_stats, bsize, *best_rd);
920 if (tokenonly_rd_stats.rate == INT_MAX) continue;
921 this_rate = tokenonly_rd_stats.rate +
922 intra_mode_info_cost_uv(cpi, x, mbmi, bsize, dc_mode_cost);
923 }
924
925 this_rd = RDCOST(x->rdmult, this_rate, tokenonly_rd_stats.dist);
926 if (this_rd < *best_rd) {
927 *best_rd = this_rd;
928 *best_mbmi = *mbmi;
929 memcpy(best_palette_color_map, color_map,
930 plane_block_width * plane_block_height *
931 sizeof(best_palette_color_map[0]));
932 *rate = this_rate;
933 *distortion = tokenonly_rd_stats.dist;
934 *rate_tokenonly = tokenonly_rd_stats.rate;
935 *skippable = tokenonly_rd_stats.skip_txfm;
936 }
937 }
938 }
939 if (best_mbmi->palette_mode_info.palette_size[1] > 0) {
940 memcpy(color_map, best_palette_color_map,
941 plane_block_width * plane_block_height *
942 sizeof(best_palette_color_map[0]));
943 }
944 }
945
av1_restore_uv_color_map(const AV1_COMP * cpi,MACROBLOCK * x)946 void av1_restore_uv_color_map(const AV1_COMP *cpi, MACROBLOCK *x) {
947 MACROBLOCKD *const xd = &x->e_mbd;
948 MB_MODE_INFO *const mbmi = xd->mi[0];
949 PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
950 const BLOCK_SIZE bsize = mbmi->bsize;
951 int src_stride = x->plane[1].src.stride;
952 const uint8_t *const src_u = x->plane[1].src.buf;
953 const uint8_t *const src_v = x->plane[2].src.buf;
954 int16_t *const data = x->palette_buffer->kmeans_data_buf;
955 int16_t centroids[2 * PALETTE_MAX_SIZE];
956 uint8_t *const color_map = xd->plane[1].color_index_map;
957 int r, c;
958 const uint16_t *const src_u16 = CONVERT_TO_SHORTPTR(src_u);
959 const uint16_t *const src_v16 = CONVERT_TO_SHORTPTR(src_v);
960 int plane_block_width, plane_block_height, rows, cols;
961 av1_get_block_dimensions(bsize, 1, xd, &plane_block_width,
962 &plane_block_height, &rows, &cols);
963
964 for (r = 0; r < rows; ++r) {
965 for (c = 0; c < cols; ++c) {
966 if (cpi->common.seq_params->use_highbitdepth) {
967 data[(r * cols + c) * 2] = src_u16[r * src_stride + c];
968 data[(r * cols + c) * 2 + 1] = src_v16[r * src_stride + c];
969 } else {
970 data[(r * cols + c) * 2] = src_u[r * src_stride + c];
971 data[(r * cols + c) * 2 + 1] = src_v[r * src_stride + c];
972 }
973 }
974 }
975
976 for (r = 1; r < 3; ++r) {
977 for (c = 0; c < pmi->palette_size[1]; ++c) {
978 centroids[c * 2 + r - 1] = pmi->palette_colors[r * PALETTE_MAX_SIZE + c];
979 }
980 }
981
982 av1_calc_indices(data, centroids, color_map, rows * cols,
983 pmi->palette_size[1], 2);
984 extend_palette_color_map(color_map, cols, rows, plane_block_width,
985 plane_block_height);
986 }
987