xref: /aosp_15_r20/external/libaom/av1/common/cfl.h (revision 77c1e3ccc04c968bd2bc212e87364f250e820521)
1 /*
2  * Copyright (c) 2016, Alliance for Open Media. All rights reserved.
3  *
4  * This source code is subject to the terms of the BSD 2 Clause License and
5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6  * was not distributed with this source code in the LICENSE file, you can
7  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8  * Media Patent License 1.0 was not distributed with this source code in the
9  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10  */
11 
12 #ifndef AOM_AV1_COMMON_CFL_H_
13 #define AOM_AV1_COMMON_CFL_H_
14 
15 #include "av1/common/av1_common_int.h"
16 #include "av1/common/blockd.h"
17 
18 // Can we use CfL for the current block?
is_cfl_allowed(const MACROBLOCKD * xd)19 static inline CFL_ALLOWED_TYPE is_cfl_allowed(const MACROBLOCKD *xd) {
20   const MB_MODE_INFO *mbmi = xd->mi[0];
21   const BLOCK_SIZE bsize = mbmi->bsize;
22   assert(bsize < BLOCK_SIZES_ALL);
23   if (xd->lossless[mbmi->segment_id]) {
24     // In lossless, CfL is available when the partition size is equal to the
25     // transform size.
26     const int ssx = xd->plane[AOM_PLANE_U].subsampling_x;
27     const int ssy = xd->plane[AOM_PLANE_U].subsampling_y;
28     const int plane_bsize = get_plane_block_size(bsize, ssx, ssy);
29     return (CFL_ALLOWED_TYPE)(plane_bsize == BLOCK_4X4);
30   }
31   // Spec: CfL is available to luma partitions lesser than or equal to 32x32
32   return (CFL_ALLOWED_TYPE)(block_size_wide[bsize] <= 32 &&
33                             block_size_high[bsize] <= 32);
34 }
35 
36 // Do we need to save the luma pixels from the current block,
37 // for a possible future CfL prediction?
store_cfl_required(const AV1_COMMON * cm,const MACROBLOCKD * xd)38 static inline CFL_ALLOWED_TYPE store_cfl_required(const AV1_COMMON *cm,
39                                                   const MACROBLOCKD *xd) {
40   const MB_MODE_INFO *mbmi = xd->mi[0];
41 
42   if (cm->seq_params->monochrome) return CFL_DISALLOWED;
43 
44   if (!xd->is_chroma_ref) {
45     // For non-chroma-reference blocks, we should always store the luma pixels,
46     // in case the corresponding chroma-reference block uses CfL.
47     // Note that this can only happen for block sizes which are <8 on
48     // their shortest side, as otherwise they would be chroma reference
49     // blocks.
50     return CFL_ALLOWED;
51   }
52 
53   // If this block has chroma information, we know whether we're
54   // actually going to perform a CfL prediction
55   return (CFL_ALLOWED_TYPE)(!is_inter_block(mbmi) &&
56                             mbmi->uv_mode == UV_CFL_PRED);
57 }
58 
get_scaled_luma_q0(int alpha_q3,int16_t pred_buf_q3)59 static inline int get_scaled_luma_q0(int alpha_q3, int16_t pred_buf_q3) {
60   int scaled_luma_q6 = alpha_q3 * pred_buf_q3;
61   return ROUND_POWER_OF_TWO_SIGNED(scaled_luma_q6, 6);
62 }
63 
get_cfl_pred_type(int plane)64 static inline CFL_PRED_TYPE get_cfl_pred_type(int plane) {
65   assert(plane > 0);
66   return (CFL_PRED_TYPE)(plane - 1);
67 }
68 
clear_cfl_dc_pred_cache_flags(CFL_CTX * cfl)69 static inline void clear_cfl_dc_pred_cache_flags(CFL_CTX *cfl) {
70   cfl->use_dc_pred_cache = false;
71   cfl->dc_pred_is_cached[CFL_PRED_U] = false;
72   cfl->dc_pred_is_cached[CFL_PRED_V] = false;
73 }
74 
75 void av1_cfl_predict_block(MACROBLOCKD *const xd, uint8_t *dst, int dst_stride,
76                            TX_SIZE tx_size, int plane);
77 
78 void cfl_store_block(MACROBLOCKD *const xd, BLOCK_SIZE bsize, TX_SIZE tx_size);
79 
80 void cfl_store_tx(MACROBLOCKD *const xd, int row, int col, TX_SIZE tx_size,
81                   BLOCK_SIZE bsize);
82 
83 void cfl_store_dc_pred(MACROBLOCKD *const xd, const uint8_t *input,
84                        CFL_PRED_TYPE pred_plane, int width);
85 
86 void cfl_load_dc_pred(MACROBLOCKD *const xd, uint8_t *dst, int dst_stride,
87                       TX_SIZE tx_size, CFL_PRED_TYPE pred_plane);
88 
89 // Allows the CFL_SUBSAMPLE function to switch types depending on the bitdepth.
90 #define CFL_lbd_TYPE uint8_t *cfl_type
91 #define CFL_hbd_TYPE uint16_t *cfl_type
92 
93 // Declare a size-specific wrapper for the size-generic function. The compiler
94 // will inline the size generic function in here, the advantage is that the size
95 // will be constant allowing for loop unrolling and other constant propagated
96 // goodness.
97 #define CFL_SUBSAMPLE(arch, sub, bd, width, height)                       \
98   void cfl_subsample_##bd##_##sub##_##width##x##height##_##arch(          \
99       const CFL_##bd##_TYPE, int input_stride, uint16_t *output_q3);      \
100   void cfl_subsample_##bd##_##sub##_##width##x##height##_##arch(          \
101       const CFL_##bd##_TYPE, int input_stride, uint16_t *output_q3) {     \
102     cfl_luma_subsampling_##sub##_##bd##_##arch(cfl_type, input_stride,    \
103                                                output_q3, width, height); \
104   }
105 
106 // Declare size-specific wrappers for all valid CfL sizes.
107 #define CFL_SUBSAMPLE_FUNCTIONS(arch, sub, bd)                            \
108   CFL_SUBSAMPLE(arch, sub, bd, 4, 4)                                      \
109   CFL_SUBSAMPLE(arch, sub, bd, 8, 8)                                      \
110   CFL_SUBSAMPLE(arch, sub, bd, 16, 16)                                    \
111   CFL_SUBSAMPLE(arch, sub, bd, 32, 32)                                    \
112   CFL_SUBSAMPLE(arch, sub, bd, 4, 8)                                      \
113   CFL_SUBSAMPLE(arch, sub, bd, 8, 4)                                      \
114   CFL_SUBSAMPLE(arch, sub, bd, 8, 16)                                     \
115   CFL_SUBSAMPLE(arch, sub, bd, 16, 8)                                     \
116   CFL_SUBSAMPLE(arch, sub, bd, 16, 32)                                    \
117   CFL_SUBSAMPLE(arch, sub, bd, 32, 16)                                    \
118   CFL_SUBSAMPLE(arch, sub, bd, 4, 16)                                     \
119   CFL_SUBSAMPLE(arch, sub, bd, 16, 4)                                     \
120   CFL_SUBSAMPLE(arch, sub, bd, 8, 32)                                     \
121   CFL_SUBSAMPLE(arch, sub, bd, 32, 8)                                     \
122   cfl_subsample_##bd##_fn cfl_get_luma_subsampling_##sub##_##bd##_##arch( \
123       TX_SIZE tx_size) {                                                  \
124     CFL_SUBSAMPLE_FUNCTION_ARRAY(arch, sub, bd)                           \
125     return subfn_##sub[tx_size];                                          \
126   }
127 
128 // Declare an architecture-specific array of function pointers for size-specific
129 // wrappers.
130 #define CFL_SUBSAMPLE_FUNCTION_ARRAY(arch, sub, bd)                           \
131   static const cfl_subsample_##bd##_fn subfn_##sub[TX_SIZES_ALL] = {          \
132     cfl_subsample_##bd##_##sub##_4x4_##arch,   /* 4x4 */                      \
133     cfl_subsample_##bd##_##sub##_8x8_##arch,   /* 8x8 */                      \
134     cfl_subsample_##bd##_##sub##_16x16_##arch, /* 16x16 */                    \
135     cfl_subsample_##bd##_##sub##_32x32_##arch, /* 32x32 */                    \
136     NULL,                                      /* 64x64 (invalid CFL size) */ \
137     cfl_subsample_##bd##_##sub##_4x8_##arch,   /* 4x8 */                      \
138     cfl_subsample_##bd##_##sub##_8x4_##arch,   /* 8x4 */                      \
139     cfl_subsample_##bd##_##sub##_8x16_##arch,  /* 8x16 */                     \
140     cfl_subsample_##bd##_##sub##_16x8_##arch,  /* 16x8 */                     \
141     cfl_subsample_##bd##_##sub##_16x32_##arch, /* 16x32 */                    \
142     cfl_subsample_##bd##_##sub##_32x16_##arch, /* 32x16 */                    \
143     NULL,                                      /* 32x64 (invalid CFL size) */ \
144     NULL,                                      /* 64x32 (invalid CFL size) */ \
145     cfl_subsample_##bd##_##sub##_4x16_##arch,  /* 4x16  */                    \
146     cfl_subsample_##bd##_##sub##_16x4_##arch,  /* 16x4  */                    \
147     cfl_subsample_##bd##_##sub##_8x32_##arch,  /* 8x32  */                    \
148     cfl_subsample_##bd##_##sub##_32x8_##arch,  /* 32x8  */                    \
149     NULL,                                      /* 16x64 (invalid CFL size) */ \
150     NULL,                                      /* 64x16 (invalid CFL size) */ \
151   };
152 
153 // The RTCD script does not support passing in an array, so we wrap it in this
154 // function.
155 #if CONFIG_AV1_HIGHBITDEPTH
156 #define CFL_GET_SUBSAMPLE_FUNCTION(arch)  \
157   CFL_SUBSAMPLE_FUNCTIONS(arch, 420, lbd) \
158   CFL_SUBSAMPLE_FUNCTIONS(arch, 422, lbd) \
159   CFL_SUBSAMPLE_FUNCTIONS(arch, 444, lbd) \
160   CFL_SUBSAMPLE_FUNCTIONS(arch, 420, hbd) \
161   CFL_SUBSAMPLE_FUNCTIONS(arch, 422, hbd) \
162   CFL_SUBSAMPLE_FUNCTIONS(arch, 444, hbd)
163 #else
164 #define CFL_GET_SUBSAMPLE_FUNCTION(arch)  \
165   CFL_SUBSAMPLE_FUNCTIONS(arch, 420, lbd) \
166   CFL_SUBSAMPLE_FUNCTIONS(arch, 422, lbd) \
167   CFL_SUBSAMPLE_FUNCTIONS(arch, 444, lbd)
168 #endif
169 
170 // Declare a size-specific wrapper for the size-generic function. The compiler
171 // will inline the size generic function in here, the advantage is that the size
172 // will be constant allowing for loop unrolling and other constant propagated
173 // goodness.
174 #define CFL_SUB_AVG_X(arch, width, height, round_offset, num_pel_log2)       \
175   void cfl_subtract_average_##width##x##height##_##arch(const uint16_t *src, \
176                                                         int16_t *dst);       \
177   void cfl_subtract_average_##width##x##height##_##arch(const uint16_t *src, \
178                                                         int16_t *dst) {      \
179     subtract_average_##arch(src, dst, width, height, round_offset,           \
180                             num_pel_log2);                                   \
181   }
182 
183 // Declare size-specific wrappers for all valid CfL sizes.
184 #define CFL_SUB_AVG_FN(arch)                                              \
185   CFL_SUB_AVG_X(arch, 4, 4, 8, 4)                                         \
186   CFL_SUB_AVG_X(arch, 4, 8, 16, 5)                                        \
187   CFL_SUB_AVG_X(arch, 4, 16, 32, 6)                                       \
188   CFL_SUB_AVG_X(arch, 8, 4, 16, 5)                                        \
189   CFL_SUB_AVG_X(arch, 8, 8, 32, 6)                                        \
190   CFL_SUB_AVG_X(arch, 8, 16, 64, 7)                                       \
191   CFL_SUB_AVG_X(arch, 8, 32, 128, 8)                                      \
192   CFL_SUB_AVG_X(arch, 16, 4, 32, 6)                                       \
193   CFL_SUB_AVG_X(arch, 16, 8, 64, 7)                                       \
194   CFL_SUB_AVG_X(arch, 16, 16, 128, 8)                                     \
195   CFL_SUB_AVG_X(arch, 16, 32, 256, 9)                                     \
196   CFL_SUB_AVG_X(arch, 32, 8, 128, 8)                                      \
197   CFL_SUB_AVG_X(arch, 32, 16, 256, 9)                                     \
198   CFL_SUB_AVG_X(arch, 32, 32, 512, 10)                                    \
199   cfl_subtract_average_fn cfl_get_subtract_average_fn_##arch(             \
200       TX_SIZE tx_size) {                                                  \
201     static const cfl_subtract_average_fn sub_avg[TX_SIZES_ALL] = {        \
202       cfl_subtract_average_4x4_##arch,   /* 4x4 */                        \
203       cfl_subtract_average_8x8_##arch,   /* 8x8 */                        \
204       cfl_subtract_average_16x16_##arch, /* 16x16 */                      \
205       cfl_subtract_average_32x32_##arch, /* 32x32 */                      \
206       NULL,                              /* 64x64 (invalid CFL size) */   \
207       cfl_subtract_average_4x8_##arch,   /* 4x8 */                        \
208       cfl_subtract_average_8x4_##arch,   /* 8x4 */                        \
209       cfl_subtract_average_8x16_##arch,  /* 8x16 */                       \
210       cfl_subtract_average_16x8_##arch,  /* 16x8 */                       \
211       cfl_subtract_average_16x32_##arch, /* 16x32 */                      \
212       cfl_subtract_average_32x16_##arch, /* 32x16 */                      \
213       NULL,                              /* 32x64 (invalid CFL size) */   \
214       NULL,                              /* 64x32 (invalid CFL size) */   \
215       cfl_subtract_average_4x16_##arch,  /* 4x16 (invalid CFL size) */    \
216       cfl_subtract_average_16x4_##arch,  /* 16x4 (invalid CFL size) */    \
217       cfl_subtract_average_8x32_##arch,  /* 8x32 (invalid CFL size) */    \
218       cfl_subtract_average_32x8_##arch,  /* 32x8 (invalid CFL size) */    \
219       NULL,                              /* 16x64 (invalid CFL size) */   \
220       NULL,                              /* 64x16 (invalid CFL size) */   \
221     };                                                                    \
222     /* Modulo TX_SIZES_ALL to ensure that an attacker won't be able to */ \
223     /* index the function pointer array out of bounds. */                 \
224     return sub_avg[tx_size % TX_SIZES_ALL];                               \
225   }
226 
227 #define CFL_PREDICT_lbd(arch, width, height)                                   \
228   void cfl_predict_lbd_##width##x##height##_##arch(                            \
229       const int16_t *pred_buf_q3, uint8_t *dst, int dst_stride, int alpha_q3); \
230   void cfl_predict_lbd_##width##x##height##_##arch(                            \
231       const int16_t *pred_buf_q3, uint8_t *dst, int dst_stride,                \
232       int alpha_q3) {                                                          \
233     cfl_predict_lbd_##arch(pred_buf_q3, dst, dst_stride, alpha_q3, width,      \
234                            height);                                            \
235   }
236 
237 #if CONFIG_AV1_HIGHBITDEPTH
238 #define CFL_PREDICT_hbd(arch, width, height)                                   \
239   void cfl_predict_hbd_##width##x##height##_##arch(                            \
240       const int16_t *pred_buf_q3, uint16_t *dst, int dst_stride, int alpha_q3, \
241       int bd);                                                                 \
242   void cfl_predict_hbd_##width##x##height##_##arch(                            \
243       const int16_t *pred_buf_q3, uint16_t *dst, int dst_stride, int alpha_q3, \
244       int bd) {                                                                \
245     cfl_predict_hbd_##arch(pred_buf_q3, dst, dst_stride, alpha_q3, bd, width,  \
246                            height);                                            \
247   }
248 #endif
249 
250 // This wrapper exists because clang format does not like calling macros with
251 // lowercase letters.
252 #define CFL_PREDICT_X(arch, width, height, bd) \
253   CFL_PREDICT_##bd(arch, width, height)
254 
255 #define CFL_PREDICT_FN(arch, bd)                                            \
256   CFL_PREDICT_X(arch, 4, 4, bd)                                             \
257   CFL_PREDICT_X(arch, 4, 8, bd)                                             \
258   CFL_PREDICT_X(arch, 4, 16, bd)                                            \
259   CFL_PREDICT_X(arch, 8, 4, bd)                                             \
260   CFL_PREDICT_X(arch, 8, 8, bd)                                             \
261   CFL_PREDICT_X(arch, 8, 16, bd)                                            \
262   CFL_PREDICT_X(arch, 8, 32, bd)                                            \
263   CFL_PREDICT_X(arch, 16, 4, bd)                                            \
264   CFL_PREDICT_X(arch, 16, 8, bd)                                            \
265   CFL_PREDICT_X(arch, 16, 16, bd)                                           \
266   CFL_PREDICT_X(arch, 16, 32, bd)                                           \
267   CFL_PREDICT_X(arch, 32, 8, bd)                                            \
268   CFL_PREDICT_X(arch, 32, 16, bd)                                           \
269   CFL_PREDICT_X(arch, 32, 32, bd)                                           \
270   cfl_predict_##bd##_fn cfl_get_predict_##bd##_fn_##arch(TX_SIZE tx_size) { \
271     static const cfl_predict_##bd##_fn pred[TX_SIZES_ALL] = {               \
272       cfl_predict_##bd##_4x4_##arch,   /* 4x4 */                            \
273       cfl_predict_##bd##_8x8_##arch,   /* 8x8 */                            \
274       cfl_predict_##bd##_16x16_##arch, /* 16x16 */                          \
275       cfl_predict_##bd##_32x32_##arch, /* 32x32 */                          \
276       NULL,                            /* 64x64 (invalid CFL size) */       \
277       cfl_predict_##bd##_4x8_##arch,   /* 4x8 */                            \
278       cfl_predict_##bd##_8x4_##arch,   /* 8x4 */                            \
279       cfl_predict_##bd##_8x16_##arch,  /* 8x16 */                           \
280       cfl_predict_##bd##_16x8_##arch,  /* 16x8 */                           \
281       cfl_predict_##bd##_16x32_##arch, /* 16x32 */                          \
282       cfl_predict_##bd##_32x16_##arch, /* 32x16 */                          \
283       NULL,                            /* 32x64 (invalid CFL size) */       \
284       NULL,                            /* 64x32 (invalid CFL size) */       \
285       cfl_predict_##bd##_4x16_##arch,  /* 4x16  */                          \
286       cfl_predict_##bd##_16x4_##arch,  /* 16x4  */                          \
287       cfl_predict_##bd##_8x32_##arch,  /* 8x32  */                          \
288       cfl_predict_##bd##_32x8_##arch,  /* 32x8  */                          \
289       NULL,                            /* 16x64 (invalid CFL size) */       \
290       NULL,                            /* 64x16 (invalid CFL size) */       \
291     };                                                                      \
292     /* Modulo TX_SIZES_ALL to ensure that an attacker won't be able to */   \
293     /* index the function pointer array out of bounds. */                   \
294     return pred[tx_size % TX_SIZES_ALL];                                    \
295   }
296 
297 #endif  // AOM_AV1_COMMON_CFL_H_
298