1 /*
2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved.
3 *
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 */
11
12 #ifndef AOM_AV1_COMMON_CFL_H_
13 #define AOM_AV1_COMMON_CFL_H_
14
15 #include "av1/common/av1_common_int.h"
16 #include "av1/common/blockd.h"
17
18 // Can we use CfL for the current block?
is_cfl_allowed(const MACROBLOCKD * xd)19 static inline CFL_ALLOWED_TYPE is_cfl_allowed(const MACROBLOCKD *xd) {
20 const MB_MODE_INFO *mbmi = xd->mi[0];
21 const BLOCK_SIZE bsize = mbmi->bsize;
22 assert(bsize < BLOCK_SIZES_ALL);
23 if (xd->lossless[mbmi->segment_id]) {
24 // In lossless, CfL is available when the partition size is equal to the
25 // transform size.
26 const int ssx = xd->plane[AOM_PLANE_U].subsampling_x;
27 const int ssy = xd->plane[AOM_PLANE_U].subsampling_y;
28 const int plane_bsize = get_plane_block_size(bsize, ssx, ssy);
29 return (CFL_ALLOWED_TYPE)(plane_bsize == BLOCK_4X4);
30 }
31 // Spec: CfL is available to luma partitions lesser than or equal to 32x32
32 return (CFL_ALLOWED_TYPE)(block_size_wide[bsize] <= 32 &&
33 block_size_high[bsize] <= 32);
34 }
35
36 // Do we need to save the luma pixels from the current block,
37 // for a possible future CfL prediction?
store_cfl_required(const AV1_COMMON * cm,const MACROBLOCKD * xd)38 static inline CFL_ALLOWED_TYPE store_cfl_required(const AV1_COMMON *cm,
39 const MACROBLOCKD *xd) {
40 const MB_MODE_INFO *mbmi = xd->mi[0];
41
42 if (cm->seq_params->monochrome) return CFL_DISALLOWED;
43
44 if (!xd->is_chroma_ref) {
45 // For non-chroma-reference blocks, we should always store the luma pixels,
46 // in case the corresponding chroma-reference block uses CfL.
47 // Note that this can only happen for block sizes which are <8 on
48 // their shortest side, as otherwise they would be chroma reference
49 // blocks.
50 return CFL_ALLOWED;
51 }
52
53 // If this block has chroma information, we know whether we're
54 // actually going to perform a CfL prediction
55 return (CFL_ALLOWED_TYPE)(!is_inter_block(mbmi) &&
56 mbmi->uv_mode == UV_CFL_PRED);
57 }
58
get_scaled_luma_q0(int alpha_q3,int16_t pred_buf_q3)59 static inline int get_scaled_luma_q0(int alpha_q3, int16_t pred_buf_q3) {
60 int scaled_luma_q6 = alpha_q3 * pred_buf_q3;
61 return ROUND_POWER_OF_TWO_SIGNED(scaled_luma_q6, 6);
62 }
63
get_cfl_pred_type(int plane)64 static inline CFL_PRED_TYPE get_cfl_pred_type(int plane) {
65 assert(plane > 0);
66 return (CFL_PRED_TYPE)(plane - 1);
67 }
68
clear_cfl_dc_pred_cache_flags(CFL_CTX * cfl)69 static inline void clear_cfl_dc_pred_cache_flags(CFL_CTX *cfl) {
70 cfl->use_dc_pred_cache = false;
71 cfl->dc_pred_is_cached[CFL_PRED_U] = false;
72 cfl->dc_pred_is_cached[CFL_PRED_V] = false;
73 }
74
75 void av1_cfl_predict_block(MACROBLOCKD *const xd, uint8_t *dst, int dst_stride,
76 TX_SIZE tx_size, int plane);
77
78 void cfl_store_block(MACROBLOCKD *const xd, BLOCK_SIZE bsize, TX_SIZE tx_size);
79
80 void cfl_store_tx(MACROBLOCKD *const xd, int row, int col, TX_SIZE tx_size,
81 BLOCK_SIZE bsize);
82
83 void cfl_store_dc_pred(MACROBLOCKD *const xd, const uint8_t *input,
84 CFL_PRED_TYPE pred_plane, int width);
85
86 void cfl_load_dc_pred(MACROBLOCKD *const xd, uint8_t *dst, int dst_stride,
87 TX_SIZE tx_size, CFL_PRED_TYPE pred_plane);
88
89 // Allows the CFL_SUBSAMPLE function to switch types depending on the bitdepth.
90 #define CFL_lbd_TYPE uint8_t *cfl_type
91 #define CFL_hbd_TYPE uint16_t *cfl_type
92
93 // Declare a size-specific wrapper for the size-generic function. The compiler
94 // will inline the size generic function in here, the advantage is that the size
95 // will be constant allowing for loop unrolling and other constant propagated
96 // goodness.
97 #define CFL_SUBSAMPLE(arch, sub, bd, width, height) \
98 void cfl_subsample_##bd##_##sub##_##width##x##height##_##arch( \
99 const CFL_##bd##_TYPE, int input_stride, uint16_t *output_q3); \
100 void cfl_subsample_##bd##_##sub##_##width##x##height##_##arch( \
101 const CFL_##bd##_TYPE, int input_stride, uint16_t *output_q3) { \
102 cfl_luma_subsampling_##sub##_##bd##_##arch(cfl_type, input_stride, \
103 output_q3, width, height); \
104 }
105
106 // Declare size-specific wrappers for all valid CfL sizes.
107 #define CFL_SUBSAMPLE_FUNCTIONS(arch, sub, bd) \
108 CFL_SUBSAMPLE(arch, sub, bd, 4, 4) \
109 CFL_SUBSAMPLE(arch, sub, bd, 8, 8) \
110 CFL_SUBSAMPLE(arch, sub, bd, 16, 16) \
111 CFL_SUBSAMPLE(arch, sub, bd, 32, 32) \
112 CFL_SUBSAMPLE(arch, sub, bd, 4, 8) \
113 CFL_SUBSAMPLE(arch, sub, bd, 8, 4) \
114 CFL_SUBSAMPLE(arch, sub, bd, 8, 16) \
115 CFL_SUBSAMPLE(arch, sub, bd, 16, 8) \
116 CFL_SUBSAMPLE(arch, sub, bd, 16, 32) \
117 CFL_SUBSAMPLE(arch, sub, bd, 32, 16) \
118 CFL_SUBSAMPLE(arch, sub, bd, 4, 16) \
119 CFL_SUBSAMPLE(arch, sub, bd, 16, 4) \
120 CFL_SUBSAMPLE(arch, sub, bd, 8, 32) \
121 CFL_SUBSAMPLE(arch, sub, bd, 32, 8) \
122 cfl_subsample_##bd##_fn cfl_get_luma_subsampling_##sub##_##bd##_##arch( \
123 TX_SIZE tx_size) { \
124 CFL_SUBSAMPLE_FUNCTION_ARRAY(arch, sub, bd) \
125 return subfn_##sub[tx_size]; \
126 }
127
128 // Declare an architecture-specific array of function pointers for size-specific
129 // wrappers.
130 #define CFL_SUBSAMPLE_FUNCTION_ARRAY(arch, sub, bd) \
131 static const cfl_subsample_##bd##_fn subfn_##sub[TX_SIZES_ALL] = { \
132 cfl_subsample_##bd##_##sub##_4x4_##arch, /* 4x4 */ \
133 cfl_subsample_##bd##_##sub##_8x8_##arch, /* 8x8 */ \
134 cfl_subsample_##bd##_##sub##_16x16_##arch, /* 16x16 */ \
135 cfl_subsample_##bd##_##sub##_32x32_##arch, /* 32x32 */ \
136 NULL, /* 64x64 (invalid CFL size) */ \
137 cfl_subsample_##bd##_##sub##_4x8_##arch, /* 4x8 */ \
138 cfl_subsample_##bd##_##sub##_8x4_##arch, /* 8x4 */ \
139 cfl_subsample_##bd##_##sub##_8x16_##arch, /* 8x16 */ \
140 cfl_subsample_##bd##_##sub##_16x8_##arch, /* 16x8 */ \
141 cfl_subsample_##bd##_##sub##_16x32_##arch, /* 16x32 */ \
142 cfl_subsample_##bd##_##sub##_32x16_##arch, /* 32x16 */ \
143 NULL, /* 32x64 (invalid CFL size) */ \
144 NULL, /* 64x32 (invalid CFL size) */ \
145 cfl_subsample_##bd##_##sub##_4x16_##arch, /* 4x16 */ \
146 cfl_subsample_##bd##_##sub##_16x4_##arch, /* 16x4 */ \
147 cfl_subsample_##bd##_##sub##_8x32_##arch, /* 8x32 */ \
148 cfl_subsample_##bd##_##sub##_32x8_##arch, /* 32x8 */ \
149 NULL, /* 16x64 (invalid CFL size) */ \
150 NULL, /* 64x16 (invalid CFL size) */ \
151 };
152
153 // The RTCD script does not support passing in an array, so we wrap it in this
154 // function.
155 #if CONFIG_AV1_HIGHBITDEPTH
156 #define CFL_GET_SUBSAMPLE_FUNCTION(arch) \
157 CFL_SUBSAMPLE_FUNCTIONS(arch, 420, lbd) \
158 CFL_SUBSAMPLE_FUNCTIONS(arch, 422, lbd) \
159 CFL_SUBSAMPLE_FUNCTIONS(arch, 444, lbd) \
160 CFL_SUBSAMPLE_FUNCTIONS(arch, 420, hbd) \
161 CFL_SUBSAMPLE_FUNCTIONS(arch, 422, hbd) \
162 CFL_SUBSAMPLE_FUNCTIONS(arch, 444, hbd)
163 #else
164 #define CFL_GET_SUBSAMPLE_FUNCTION(arch) \
165 CFL_SUBSAMPLE_FUNCTIONS(arch, 420, lbd) \
166 CFL_SUBSAMPLE_FUNCTIONS(arch, 422, lbd) \
167 CFL_SUBSAMPLE_FUNCTIONS(arch, 444, lbd)
168 #endif
169
170 // Declare a size-specific wrapper for the size-generic function. The compiler
171 // will inline the size generic function in here, the advantage is that the size
172 // will be constant allowing for loop unrolling and other constant propagated
173 // goodness.
174 #define CFL_SUB_AVG_X(arch, width, height, round_offset, num_pel_log2) \
175 void cfl_subtract_average_##width##x##height##_##arch(const uint16_t *src, \
176 int16_t *dst); \
177 void cfl_subtract_average_##width##x##height##_##arch(const uint16_t *src, \
178 int16_t *dst) { \
179 subtract_average_##arch(src, dst, width, height, round_offset, \
180 num_pel_log2); \
181 }
182
183 // Declare size-specific wrappers for all valid CfL sizes.
184 #define CFL_SUB_AVG_FN(arch) \
185 CFL_SUB_AVG_X(arch, 4, 4, 8, 4) \
186 CFL_SUB_AVG_X(arch, 4, 8, 16, 5) \
187 CFL_SUB_AVG_X(arch, 4, 16, 32, 6) \
188 CFL_SUB_AVG_X(arch, 8, 4, 16, 5) \
189 CFL_SUB_AVG_X(arch, 8, 8, 32, 6) \
190 CFL_SUB_AVG_X(arch, 8, 16, 64, 7) \
191 CFL_SUB_AVG_X(arch, 8, 32, 128, 8) \
192 CFL_SUB_AVG_X(arch, 16, 4, 32, 6) \
193 CFL_SUB_AVG_X(arch, 16, 8, 64, 7) \
194 CFL_SUB_AVG_X(arch, 16, 16, 128, 8) \
195 CFL_SUB_AVG_X(arch, 16, 32, 256, 9) \
196 CFL_SUB_AVG_X(arch, 32, 8, 128, 8) \
197 CFL_SUB_AVG_X(arch, 32, 16, 256, 9) \
198 CFL_SUB_AVG_X(arch, 32, 32, 512, 10) \
199 cfl_subtract_average_fn cfl_get_subtract_average_fn_##arch( \
200 TX_SIZE tx_size) { \
201 static const cfl_subtract_average_fn sub_avg[TX_SIZES_ALL] = { \
202 cfl_subtract_average_4x4_##arch, /* 4x4 */ \
203 cfl_subtract_average_8x8_##arch, /* 8x8 */ \
204 cfl_subtract_average_16x16_##arch, /* 16x16 */ \
205 cfl_subtract_average_32x32_##arch, /* 32x32 */ \
206 NULL, /* 64x64 (invalid CFL size) */ \
207 cfl_subtract_average_4x8_##arch, /* 4x8 */ \
208 cfl_subtract_average_8x4_##arch, /* 8x4 */ \
209 cfl_subtract_average_8x16_##arch, /* 8x16 */ \
210 cfl_subtract_average_16x8_##arch, /* 16x8 */ \
211 cfl_subtract_average_16x32_##arch, /* 16x32 */ \
212 cfl_subtract_average_32x16_##arch, /* 32x16 */ \
213 NULL, /* 32x64 (invalid CFL size) */ \
214 NULL, /* 64x32 (invalid CFL size) */ \
215 cfl_subtract_average_4x16_##arch, /* 4x16 (invalid CFL size) */ \
216 cfl_subtract_average_16x4_##arch, /* 16x4 (invalid CFL size) */ \
217 cfl_subtract_average_8x32_##arch, /* 8x32 (invalid CFL size) */ \
218 cfl_subtract_average_32x8_##arch, /* 32x8 (invalid CFL size) */ \
219 NULL, /* 16x64 (invalid CFL size) */ \
220 NULL, /* 64x16 (invalid CFL size) */ \
221 }; \
222 /* Modulo TX_SIZES_ALL to ensure that an attacker won't be able to */ \
223 /* index the function pointer array out of bounds. */ \
224 return sub_avg[tx_size % TX_SIZES_ALL]; \
225 }
226
227 #define CFL_PREDICT_lbd(arch, width, height) \
228 void cfl_predict_lbd_##width##x##height##_##arch( \
229 const int16_t *pred_buf_q3, uint8_t *dst, int dst_stride, int alpha_q3); \
230 void cfl_predict_lbd_##width##x##height##_##arch( \
231 const int16_t *pred_buf_q3, uint8_t *dst, int dst_stride, \
232 int alpha_q3) { \
233 cfl_predict_lbd_##arch(pred_buf_q3, dst, dst_stride, alpha_q3, width, \
234 height); \
235 }
236
237 #if CONFIG_AV1_HIGHBITDEPTH
238 #define CFL_PREDICT_hbd(arch, width, height) \
239 void cfl_predict_hbd_##width##x##height##_##arch( \
240 const int16_t *pred_buf_q3, uint16_t *dst, int dst_stride, int alpha_q3, \
241 int bd); \
242 void cfl_predict_hbd_##width##x##height##_##arch( \
243 const int16_t *pred_buf_q3, uint16_t *dst, int dst_stride, int alpha_q3, \
244 int bd) { \
245 cfl_predict_hbd_##arch(pred_buf_q3, dst, dst_stride, alpha_q3, bd, width, \
246 height); \
247 }
248 #endif
249
250 // This wrapper exists because clang format does not like calling macros with
251 // lowercase letters.
252 #define CFL_PREDICT_X(arch, width, height, bd) \
253 CFL_PREDICT_##bd(arch, width, height)
254
255 #define CFL_PREDICT_FN(arch, bd) \
256 CFL_PREDICT_X(arch, 4, 4, bd) \
257 CFL_PREDICT_X(arch, 4, 8, bd) \
258 CFL_PREDICT_X(arch, 4, 16, bd) \
259 CFL_PREDICT_X(arch, 8, 4, bd) \
260 CFL_PREDICT_X(arch, 8, 8, bd) \
261 CFL_PREDICT_X(arch, 8, 16, bd) \
262 CFL_PREDICT_X(arch, 8, 32, bd) \
263 CFL_PREDICT_X(arch, 16, 4, bd) \
264 CFL_PREDICT_X(arch, 16, 8, bd) \
265 CFL_PREDICT_X(arch, 16, 16, bd) \
266 CFL_PREDICT_X(arch, 16, 32, bd) \
267 CFL_PREDICT_X(arch, 32, 8, bd) \
268 CFL_PREDICT_X(arch, 32, 16, bd) \
269 CFL_PREDICT_X(arch, 32, 32, bd) \
270 cfl_predict_##bd##_fn cfl_get_predict_##bd##_fn_##arch(TX_SIZE tx_size) { \
271 static const cfl_predict_##bd##_fn pred[TX_SIZES_ALL] = { \
272 cfl_predict_##bd##_4x4_##arch, /* 4x4 */ \
273 cfl_predict_##bd##_8x8_##arch, /* 8x8 */ \
274 cfl_predict_##bd##_16x16_##arch, /* 16x16 */ \
275 cfl_predict_##bd##_32x32_##arch, /* 32x32 */ \
276 NULL, /* 64x64 (invalid CFL size) */ \
277 cfl_predict_##bd##_4x8_##arch, /* 4x8 */ \
278 cfl_predict_##bd##_8x4_##arch, /* 8x4 */ \
279 cfl_predict_##bd##_8x16_##arch, /* 8x16 */ \
280 cfl_predict_##bd##_16x8_##arch, /* 16x8 */ \
281 cfl_predict_##bd##_16x32_##arch, /* 16x32 */ \
282 cfl_predict_##bd##_32x16_##arch, /* 32x16 */ \
283 NULL, /* 32x64 (invalid CFL size) */ \
284 NULL, /* 64x32 (invalid CFL size) */ \
285 cfl_predict_##bd##_4x16_##arch, /* 4x16 */ \
286 cfl_predict_##bd##_16x4_##arch, /* 16x4 */ \
287 cfl_predict_##bd##_8x32_##arch, /* 8x32 */ \
288 cfl_predict_##bd##_32x8_##arch, /* 32x8 */ \
289 NULL, /* 16x64 (invalid CFL size) */ \
290 NULL, /* 64x16 (invalid CFL size) */ \
291 }; \
292 /* Modulo TX_SIZES_ALL to ensure that an attacker won't be able to */ \
293 /* index the function pointer array out of bounds. */ \
294 return pred[tx_size % TX_SIZES_ALL]; \
295 }
296
297 #endif // AOM_AV1_COMMON_CFL_H_
298