xref: /aosp_15_r20/external/libaom/av1/common/reconintra.c (revision 77c1e3ccc04c968bd2bc212e87364f250e820521)
1 /*
2  * Copyright (c) 2016, Alliance for Open Media. All rights reserved.
3  *
4  * This source code is subject to the terms of the BSD 2 Clause License and
5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6  * was not distributed with this source code in the LICENSE file, you can
7  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8  * Media Patent License 1.0 was not distributed with this source code in the
9  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10  */
11 
12 #include <assert.h>
13 #include <math.h>
14 
15 #include "config/aom_config.h"
16 #include "config/aom_dsp_rtcd.h"
17 #include "config/av1_rtcd.h"
18 
19 #include "aom_dsp/aom_dsp_common.h"
20 #include "aom_mem/aom_mem.h"
21 #include "aom_ports/aom_once.h"
22 #include "aom_ports/mem.h"
23 #include "av1/common/av1_common_int.h"
24 #include "av1/common/cfl.h"
25 #include "av1/common/reconintra.h"
26 
27 enum {
28   NEED_LEFT = 1 << 1,
29   NEED_ABOVE = 1 << 2,
30   NEED_ABOVERIGHT = 1 << 3,
31   NEED_ABOVELEFT = 1 << 4,
32   NEED_BOTTOMLEFT = 1 << 5,
33 };
34 
35 #define INTRA_EDGE_FILT 3
36 #define INTRA_EDGE_TAPS 5
37 #define MAX_UPSAMPLE_SZ 16
38 #define NUM_INTRA_NEIGHBOUR_PIXELS (MAX_TX_SIZE * 2 + 32)
39 
40 static const uint8_t extend_modes[INTRA_MODES] = {
41   NEED_ABOVE | NEED_LEFT,                   // DC
42   NEED_ABOVE,                               // V
43   NEED_LEFT,                                // H
44   NEED_ABOVE | NEED_ABOVERIGHT,             // D45
45   NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT,  // D135
46   NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT,  // D113
47   NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT,  // D157
48   NEED_LEFT | NEED_BOTTOMLEFT,              // D203
49   NEED_ABOVE | NEED_ABOVERIGHT,             // D67
50   NEED_LEFT | NEED_ABOVE,                   // SMOOTH
51   NEED_LEFT | NEED_ABOVE,                   // SMOOTH_V
52   NEED_LEFT | NEED_ABOVE,                   // SMOOTH_H
53   NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT,  // PAETH
54 };
55 
56 // Tables to store if the top-right reference pixels are available. The flags
57 // are represented with bits, packed into 8-bit integers. E.g., for the 32x32
58 // blocks in a 128x128 superblock, the index of the "o" block is 10 (in raster
59 // order), so its flag is stored at the 3rd bit of the 2nd entry in the table,
60 // i.e. (table[10 / 8] >> (10 % 8)) & 1.
61 //       . . . .
62 //       . . . .
63 //       . . o .
64 //       . . . .
65 static uint8_t has_tr_4x4[128] = {
66   255, 255, 255, 255, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
67   127, 127, 127, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
68   255, 127, 255, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
69   127, 127, 127, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
70   255, 255, 255, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
71   127, 127, 127, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
72   255, 127, 255, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
73   127, 127, 127, 127, 85, 85, 85, 85, 119, 119, 119, 119, 85, 85, 85, 85,
74 };
75 static uint8_t has_tr_4x8[64] = {
76   255, 255, 255, 255, 119, 119, 119, 119, 127, 127, 127, 127, 119,
77   119, 119, 119, 255, 127, 255, 127, 119, 119, 119, 119, 127, 127,
78   127, 127, 119, 119, 119, 119, 255, 255, 255, 127, 119, 119, 119,
79   119, 127, 127, 127, 127, 119, 119, 119, 119, 255, 127, 255, 127,
80   119, 119, 119, 119, 127, 127, 127, 127, 119, 119, 119, 119,
81 };
82 static uint8_t has_tr_8x4[64] = {
83   255, 255, 0, 0, 85, 85, 0, 0, 119, 119, 0, 0, 85, 85, 0, 0,
84   127, 127, 0, 0, 85, 85, 0, 0, 119, 119, 0, 0, 85, 85, 0, 0,
85   255, 127, 0, 0, 85, 85, 0, 0, 119, 119, 0, 0, 85, 85, 0, 0,
86   127, 127, 0, 0, 85, 85, 0, 0, 119, 119, 0, 0, 85, 85, 0, 0,
87 };
88 static uint8_t has_tr_8x8[32] = {
89   255, 255, 85, 85, 119, 119, 85, 85, 127, 127, 85, 85, 119, 119, 85, 85,
90   255, 127, 85, 85, 119, 119, 85, 85, 127, 127, 85, 85, 119, 119, 85, 85,
91 };
92 static uint8_t has_tr_8x16[16] = {
93   255, 255, 119, 119, 127, 127, 119, 119,
94   255, 127, 119, 119, 127, 127, 119, 119,
95 };
96 static uint8_t has_tr_16x8[16] = {
97   255, 0, 85, 0, 119, 0, 85, 0, 127, 0, 85, 0, 119, 0, 85, 0,
98 };
99 static uint8_t has_tr_16x16[8] = {
100   255, 85, 119, 85, 127, 85, 119, 85,
101 };
102 static uint8_t has_tr_16x32[4] = { 255, 119, 127, 119 };
103 static uint8_t has_tr_32x16[4] = { 15, 5, 7, 5 };
104 static uint8_t has_tr_32x32[2] = { 95, 87 };
105 static uint8_t has_tr_32x64[1] = { 127 };
106 static uint8_t has_tr_64x32[1] = { 19 };
107 static uint8_t has_tr_64x64[1] = { 7 };
108 static uint8_t has_tr_64x128[1] = { 3 };
109 static uint8_t has_tr_128x64[1] = { 1 };
110 static uint8_t has_tr_128x128[1] = { 1 };
111 static uint8_t has_tr_4x16[32] = {
112   255, 255, 255, 255, 127, 127, 127, 127, 255, 127, 255,
113   127, 127, 127, 127, 127, 255, 255, 255, 127, 127, 127,
114   127, 127, 255, 127, 255, 127, 127, 127, 127, 127,
115 };
116 static uint8_t has_tr_16x4[32] = {
117   255, 0, 0, 0, 85, 0, 0, 0, 119, 0, 0, 0, 85, 0, 0, 0,
118   127, 0, 0, 0, 85, 0, 0, 0, 119, 0, 0, 0, 85, 0, 0, 0,
119 };
120 static uint8_t has_tr_8x32[8] = {
121   255, 255, 127, 127, 255, 127, 127, 127,
122 };
123 static uint8_t has_tr_32x8[8] = {
124   15, 0, 5, 0, 7, 0, 5, 0,
125 };
126 static uint8_t has_tr_16x64[2] = { 255, 127 };
127 static uint8_t has_tr_64x16[2] = { 3, 1 };
128 
129 static const uint8_t *const has_tr_tables[BLOCK_SIZES_ALL] = {
130   // 4X4
131   has_tr_4x4,
132   // 4X8,       8X4,            8X8
133   has_tr_4x8, has_tr_8x4, has_tr_8x8,
134   // 8X16,      16X8,           16X16
135   has_tr_8x16, has_tr_16x8, has_tr_16x16,
136   // 16X32,     32X16,          32X32
137   has_tr_16x32, has_tr_32x16, has_tr_32x32,
138   // 32X64,     64X32,          64X64
139   has_tr_32x64, has_tr_64x32, has_tr_64x64,
140   // 64x128,    128x64,         128x128
141   has_tr_64x128, has_tr_128x64, has_tr_128x128,
142   // 4x16,      16x4,            8x32
143   has_tr_4x16, has_tr_16x4, has_tr_8x32,
144   // 32x8,      16x64,           64x16
145   has_tr_32x8, has_tr_16x64, has_tr_64x16
146 };
147 
148 static uint8_t has_tr_vert_8x8[32] = {
149   255, 255, 0, 0, 119, 119, 0, 0, 127, 127, 0, 0, 119, 119, 0, 0,
150   255, 127, 0, 0, 119, 119, 0, 0, 127, 127, 0, 0, 119, 119, 0, 0,
151 };
152 static uint8_t has_tr_vert_16x16[8] = {
153   255, 0, 119, 0, 127, 0, 119, 0,
154 };
155 static uint8_t has_tr_vert_32x32[2] = { 15, 7 };
156 static uint8_t has_tr_vert_64x64[1] = { 3 };
157 
158 // The _vert_* tables are like the ordinary tables above, but describe the
159 // order we visit square blocks when doing a PARTITION_VERT_A or
160 // PARTITION_VERT_B. This is the same order as normal except for on the last
161 // split where we go vertically (TL, BL, TR, BR). We treat the rectangular block
162 // as a pair of squares, which means that these tables work correctly for both
163 // mixed vertical partition types.
164 //
165 // There are tables for each of the square sizes. Vertical rectangles (like
166 // BLOCK_16X32) use their respective "non-vert" table
167 static const uint8_t *const has_tr_vert_tables[BLOCK_SIZES] = {
168   // 4X4
169   NULL,
170   // 4X8,      8X4,         8X8
171   has_tr_4x8, NULL, has_tr_vert_8x8,
172   // 8X16,     16X8,        16X16
173   has_tr_8x16, NULL, has_tr_vert_16x16,
174   // 16X32,    32X16,       32X32
175   has_tr_16x32, NULL, has_tr_vert_32x32,
176   // 32X64,    64X32,       64X64
177   has_tr_32x64, NULL, has_tr_vert_64x64,
178   // 64x128,   128x64,      128x128
179   has_tr_64x128, NULL, has_tr_128x128
180 };
181 
get_has_tr_table(PARTITION_TYPE partition,BLOCK_SIZE bsize)182 static const uint8_t *get_has_tr_table(PARTITION_TYPE partition,
183                                        BLOCK_SIZE bsize) {
184   const uint8_t *ret = NULL;
185   // If this is a mixed vertical partition, look up bsize in orders_vert.
186   if (partition == PARTITION_VERT_A || partition == PARTITION_VERT_B) {
187     assert(bsize < BLOCK_SIZES);
188     ret = has_tr_vert_tables[bsize];
189   } else {
190     ret = has_tr_tables[bsize];
191   }
192   assert(ret);
193   return ret;
194 }
195 
has_top_right(BLOCK_SIZE sb_size,BLOCK_SIZE bsize,int mi_row,int mi_col,int top_available,int right_available,PARTITION_TYPE partition,TX_SIZE txsz,int row_off,int col_off,int ss_x,int ss_y)196 static int has_top_right(BLOCK_SIZE sb_size, BLOCK_SIZE bsize, int mi_row,
197                          int mi_col, int top_available, int right_available,
198                          PARTITION_TYPE partition, TX_SIZE txsz, int row_off,
199                          int col_off, int ss_x, int ss_y) {
200   if (!top_available || !right_available) return 0;
201 
202   const int bw_unit = mi_size_wide[bsize];
203   const int plane_bw_unit = AOMMAX(bw_unit >> ss_x, 1);
204   const int top_right_count_unit = tx_size_wide_unit[txsz];
205 
206   if (row_off > 0) {  // Just need to check if enough pixels on the right.
207     if (block_size_wide[bsize] > block_size_wide[BLOCK_64X64]) {
208       // Special case: For 128x128 blocks, the transform unit whose
209       // top-right corner is at the center of the block does in fact have
210       // pixels available at its top-right corner.
211       if (row_off == mi_size_high[BLOCK_64X64] >> ss_y &&
212           col_off + top_right_count_unit == mi_size_wide[BLOCK_64X64] >> ss_x) {
213         return 1;
214       }
215       const int plane_bw_unit_64 = mi_size_wide[BLOCK_64X64] >> ss_x;
216       const int col_off_64 = col_off % plane_bw_unit_64;
217       return col_off_64 + top_right_count_unit < plane_bw_unit_64;
218     }
219     return col_off + top_right_count_unit < plane_bw_unit;
220   } else {
221     // All top-right pixels are in the block above, which is already available.
222     if (col_off + top_right_count_unit < plane_bw_unit) return 1;
223 
224     const int bw_in_mi_log2 = mi_size_wide_log2[bsize];
225     const int bh_in_mi_log2 = mi_size_high_log2[bsize];
226     const int sb_mi_size = mi_size_high[sb_size];
227     const int blk_row_in_sb = (mi_row & (sb_mi_size - 1)) >> bh_in_mi_log2;
228     const int blk_col_in_sb = (mi_col & (sb_mi_size - 1)) >> bw_in_mi_log2;
229 
230     // Top row of superblock: so top-right pixels are in the top and/or
231     // top-right superblocks, both of which are already available.
232     if (blk_row_in_sb == 0) return 1;
233 
234     // Rightmost column of superblock (and not the top row): so top-right pixels
235     // fall in the right superblock, which is not available yet.
236     if (((blk_col_in_sb + 1) << bw_in_mi_log2) >= sb_mi_size) {
237       return 0;
238     }
239 
240     // General case (neither top row nor rightmost column): check if the
241     // top-right block is coded before the current block.
242     const int this_blk_index =
243         ((blk_row_in_sb + 0) << (MAX_MIB_SIZE_LOG2 - bw_in_mi_log2)) +
244         blk_col_in_sb + 0;
245     const int idx1 = this_blk_index / 8;
246     const int idx2 = this_blk_index % 8;
247     const uint8_t *has_tr_table = get_has_tr_table(partition, bsize);
248     return (has_tr_table[idx1] >> idx2) & 1;
249   }
250 }
251 
252 // Similar to the has_tr_* tables, but store if the bottom-left reference
253 // pixels are available.
254 static uint8_t has_bl_4x4[128] = {
255   84, 85, 85, 85, 16, 17, 17, 17, 84, 85, 85, 85, 0,  1,  1,  1,  84, 85, 85,
256   85, 16, 17, 17, 17, 84, 85, 85, 85, 0,  0,  1,  0,  84, 85, 85, 85, 16, 17,
257   17, 17, 84, 85, 85, 85, 0,  1,  1,  1,  84, 85, 85, 85, 16, 17, 17, 17, 84,
258   85, 85, 85, 0,  0,  0,  0,  84, 85, 85, 85, 16, 17, 17, 17, 84, 85, 85, 85,
259   0,  1,  1,  1,  84, 85, 85, 85, 16, 17, 17, 17, 84, 85, 85, 85, 0,  0,  1,
260   0,  84, 85, 85, 85, 16, 17, 17, 17, 84, 85, 85, 85, 0,  1,  1,  1,  84, 85,
261   85, 85, 16, 17, 17, 17, 84, 85, 85, 85, 0,  0,  0,  0,
262 };
263 static uint8_t has_bl_4x8[64] = {
264   16, 17, 17, 17, 0, 1, 1, 1, 16, 17, 17, 17, 0, 0, 1, 0,
265   16, 17, 17, 17, 0, 1, 1, 1, 16, 17, 17, 17, 0, 0, 0, 0,
266   16, 17, 17, 17, 0, 1, 1, 1, 16, 17, 17, 17, 0, 0, 1, 0,
267   16, 17, 17, 17, 0, 1, 1, 1, 16, 17, 17, 17, 0, 0, 0, 0,
268 };
269 static uint8_t has_bl_8x4[64] = {
270   254, 255, 84, 85, 254, 255, 16, 17, 254, 255, 84, 85, 254, 255, 0, 1,
271   254, 255, 84, 85, 254, 255, 16, 17, 254, 255, 84, 85, 254, 255, 0, 0,
272   254, 255, 84, 85, 254, 255, 16, 17, 254, 255, 84, 85, 254, 255, 0, 1,
273   254, 255, 84, 85, 254, 255, 16, 17, 254, 255, 84, 85, 254, 255, 0, 0,
274 };
275 static uint8_t has_bl_8x8[32] = {
276   84, 85, 16, 17, 84, 85, 0, 1, 84, 85, 16, 17, 84, 85, 0, 0,
277   84, 85, 16, 17, 84, 85, 0, 1, 84, 85, 16, 17, 84, 85, 0, 0,
278 };
279 static uint8_t has_bl_8x16[16] = {
280   16, 17, 0, 1, 16, 17, 0, 0, 16, 17, 0, 1, 16, 17, 0, 0,
281 };
282 static uint8_t has_bl_16x8[16] = {
283   254, 84, 254, 16, 254, 84, 254, 0, 254, 84, 254, 16, 254, 84, 254, 0,
284 };
285 static uint8_t has_bl_16x16[8] = {
286   84, 16, 84, 0, 84, 16, 84, 0,
287 };
288 static uint8_t has_bl_16x32[4] = { 16, 0, 16, 0 };
289 static uint8_t has_bl_32x16[4] = { 78, 14, 78, 14 };
290 static uint8_t has_bl_32x32[2] = { 4, 4 };
291 static uint8_t has_bl_32x64[1] = { 0 };
292 static uint8_t has_bl_64x32[1] = { 34 };
293 static uint8_t has_bl_64x64[1] = { 0 };
294 static uint8_t has_bl_64x128[1] = { 0 };
295 static uint8_t has_bl_128x64[1] = { 0 };
296 static uint8_t has_bl_128x128[1] = { 0 };
297 static uint8_t has_bl_4x16[32] = {
298   0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0,
299   0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0,
300 };
301 static uint8_t has_bl_16x4[32] = {
302   254, 254, 254, 84, 254, 254, 254, 16, 254, 254, 254, 84, 254, 254, 254, 0,
303   254, 254, 254, 84, 254, 254, 254, 16, 254, 254, 254, 84, 254, 254, 254, 0,
304 };
305 static uint8_t has_bl_8x32[8] = {
306   0, 1, 0, 0, 0, 1, 0, 0,
307 };
308 static uint8_t has_bl_32x8[8] = {
309   238, 78, 238, 14, 238, 78, 238, 14,
310 };
311 static uint8_t has_bl_16x64[2] = { 0, 0 };
312 static uint8_t has_bl_64x16[2] = { 42, 42 };
313 
314 static const uint8_t *const has_bl_tables[BLOCK_SIZES_ALL] = {
315   // 4X4
316   has_bl_4x4,
317   // 4X8,         8X4,         8X8
318   has_bl_4x8, has_bl_8x4, has_bl_8x8,
319   // 8X16,        16X8,        16X16
320   has_bl_8x16, has_bl_16x8, has_bl_16x16,
321   // 16X32,       32X16,       32X32
322   has_bl_16x32, has_bl_32x16, has_bl_32x32,
323   // 32X64,       64X32,       64X64
324   has_bl_32x64, has_bl_64x32, has_bl_64x64,
325   // 64x128,      128x64,      128x128
326   has_bl_64x128, has_bl_128x64, has_bl_128x128,
327   // 4x16,        16x4,        8x32
328   has_bl_4x16, has_bl_16x4, has_bl_8x32,
329   // 32x8,        16x64,       64x16
330   has_bl_32x8, has_bl_16x64, has_bl_64x16
331 };
332 
333 static uint8_t has_bl_vert_8x8[32] = {
334   254, 255, 16, 17, 254, 255, 0, 1, 254, 255, 16, 17, 254, 255, 0, 0,
335   254, 255, 16, 17, 254, 255, 0, 1, 254, 255, 16, 17, 254, 255, 0, 0,
336 };
337 static uint8_t has_bl_vert_16x16[8] = {
338   254, 16, 254, 0, 254, 16, 254, 0,
339 };
340 static uint8_t has_bl_vert_32x32[2] = { 14, 14 };
341 static uint8_t has_bl_vert_64x64[1] = { 2 };
342 
343 // The _vert_* tables are like the ordinary tables above, but describe the
344 // order we visit square blocks when doing a PARTITION_VERT_A or
345 // PARTITION_VERT_B. This is the same order as normal except for on the last
346 // split where we go vertically (TL, BL, TR, BR). We treat the rectangular block
347 // as a pair of squares, which means that these tables work correctly for both
348 // mixed vertical partition types.
349 //
350 // There are tables for each of the square sizes. Vertical rectangles (like
351 // BLOCK_16X32) use their respective "non-vert" table
352 static const uint8_t *const has_bl_vert_tables[BLOCK_SIZES] = {
353   // 4X4
354   NULL,
355   // 4X8,     8X4,         8X8
356   has_bl_4x8, NULL, has_bl_vert_8x8,
357   // 8X16,    16X8,        16X16
358   has_bl_8x16, NULL, has_bl_vert_16x16,
359   // 16X32,   32X16,       32X32
360   has_bl_16x32, NULL, has_bl_vert_32x32,
361   // 32X64,   64X32,       64X64
362   has_bl_32x64, NULL, has_bl_vert_64x64,
363   // 64x128,  128x64,      128x128
364   has_bl_64x128, NULL, has_bl_128x128
365 };
366 
get_has_bl_table(PARTITION_TYPE partition,BLOCK_SIZE bsize)367 static const uint8_t *get_has_bl_table(PARTITION_TYPE partition,
368                                        BLOCK_SIZE bsize) {
369   const uint8_t *ret = NULL;
370   // If this is a mixed vertical partition, look up bsize in orders_vert.
371   if (partition == PARTITION_VERT_A || partition == PARTITION_VERT_B) {
372     assert(bsize < BLOCK_SIZES);
373     ret = has_bl_vert_tables[bsize];
374   } else {
375     ret = has_bl_tables[bsize];
376   }
377   assert(ret);
378   return ret;
379 }
380 
has_bottom_left(BLOCK_SIZE sb_size,BLOCK_SIZE bsize,int mi_row,int mi_col,int bottom_available,int left_available,PARTITION_TYPE partition,TX_SIZE txsz,int row_off,int col_off,int ss_x,int ss_y)381 static int has_bottom_left(BLOCK_SIZE sb_size, BLOCK_SIZE bsize, int mi_row,
382                            int mi_col, int bottom_available, int left_available,
383                            PARTITION_TYPE partition, TX_SIZE txsz, int row_off,
384                            int col_off, int ss_x, int ss_y) {
385   if (!bottom_available || !left_available) return 0;
386 
387   // Special case for 128x* blocks, when col_off is half the block width.
388   // This is needed because 128x* superblocks are divided into 64x* blocks in
389   // raster order
390   if (block_size_wide[bsize] > block_size_wide[BLOCK_64X64] && col_off > 0) {
391     const int plane_bw_unit_64 = mi_size_wide[BLOCK_64X64] >> ss_x;
392     const int col_off_64 = col_off % plane_bw_unit_64;
393     if (col_off_64 == 0) {
394       // We are at the left edge of top-right or bottom-right 64x* block.
395       const int plane_bh_unit_64 = mi_size_high[BLOCK_64X64] >> ss_y;
396       const int row_off_64 = row_off % plane_bh_unit_64;
397       const int plane_bh_unit =
398           AOMMIN(mi_size_high[bsize] >> ss_y, plane_bh_unit_64);
399       // Check if all bottom-left pixels are in the left 64x* block (which is
400       // already coded).
401       return row_off_64 + tx_size_high_unit[txsz] < plane_bh_unit;
402     }
403   }
404 
405   if (col_off > 0) {
406     // Bottom-left pixels are in the bottom-left block, which is not available.
407     return 0;
408   } else {
409     const int bh_unit = mi_size_high[bsize];
410     const int plane_bh_unit = AOMMAX(bh_unit >> ss_y, 1);
411     const int bottom_left_count_unit = tx_size_high_unit[txsz];
412 
413     // All bottom-left pixels are in the left block, which is already available.
414     if (row_off + bottom_left_count_unit < plane_bh_unit) return 1;
415 
416     const int bw_in_mi_log2 = mi_size_wide_log2[bsize];
417     const int bh_in_mi_log2 = mi_size_high_log2[bsize];
418     const int sb_mi_size = mi_size_high[sb_size];
419     const int blk_row_in_sb = (mi_row & (sb_mi_size - 1)) >> bh_in_mi_log2;
420     const int blk_col_in_sb = (mi_col & (sb_mi_size - 1)) >> bw_in_mi_log2;
421 
422     // Leftmost column of superblock: so bottom-left pixels maybe in the left
423     // and/or bottom-left superblocks. But only the left superblock is
424     // available, so check if all required pixels fall in that superblock.
425     if (blk_col_in_sb == 0) {
426       const int blk_start_row_off =
427           blk_row_in_sb << (bh_in_mi_log2 + MI_SIZE_LOG2 - MI_SIZE_LOG2) >>
428           ss_y;
429       const int row_off_in_sb = blk_start_row_off + row_off;
430       const int sb_height_unit = sb_mi_size >> ss_y;
431       return row_off_in_sb + bottom_left_count_unit < sb_height_unit;
432     }
433 
434     // Bottom row of superblock (and not the leftmost column): so bottom-left
435     // pixels fall in the bottom superblock, which is not available yet.
436     if (((blk_row_in_sb + 1) << bh_in_mi_log2) >= sb_mi_size) return 0;
437 
438     // General case (neither leftmost column nor bottom row): check if the
439     // bottom-left block is coded before the current block.
440     const int this_blk_index =
441         ((blk_row_in_sb + 0) << (MAX_MIB_SIZE_LOG2 - bw_in_mi_log2)) +
442         blk_col_in_sb + 0;
443     const int idx1 = this_blk_index / 8;
444     const int idx2 = this_blk_index % 8;
445     const uint8_t *has_bl_table = get_has_bl_table(partition, bsize);
446     return (has_bl_table[idx1] >> idx2) & 1;
447   }
448 }
449 
450 typedef void (*intra_pred_fn)(uint8_t *dst, ptrdiff_t stride,
451                               const uint8_t *above, const uint8_t *left);
452 
453 static intra_pred_fn pred[INTRA_MODES][TX_SIZES_ALL];
454 static intra_pred_fn dc_pred[2][2][TX_SIZES_ALL];
455 
456 #if CONFIG_AV1_HIGHBITDEPTH
457 typedef void (*intra_high_pred_fn)(uint16_t *dst, ptrdiff_t stride,
458                                    const uint16_t *above, const uint16_t *left,
459                                    int bd);
460 static intra_high_pred_fn pred_high[INTRA_MODES][TX_SIZES_ALL];
461 static intra_high_pred_fn dc_pred_high[2][2][TX_SIZES_ALL];
462 #endif
463 
init_intra_predictors_internal(void)464 static void init_intra_predictors_internal(void) {
465   assert(NELEMENTS(mode_to_angle_map) == INTRA_MODES);
466 
467 #if CONFIG_REALTIME_ONLY && !CONFIG_AV1_DECODER
468 #define INIT_RECTANGULAR(p, type)             \
469   p[TX_4X8] = aom_##type##_predictor_4x8;     \
470   p[TX_8X4] = aom_##type##_predictor_8x4;     \
471   p[TX_8X16] = aom_##type##_predictor_8x16;   \
472   p[TX_16X8] = aom_##type##_predictor_16x8;   \
473   p[TX_16X32] = aom_##type##_predictor_16x32; \
474   p[TX_32X16] = aom_##type##_predictor_32x16; \
475   p[TX_32X64] = aom_##type##_predictor_32x64; \
476   p[TX_64X32] = aom_##type##_predictor_64x32;
477 #else
478 #define INIT_RECTANGULAR(p, type)             \
479   p[TX_4X8] = aom_##type##_predictor_4x8;     \
480   p[TX_8X4] = aom_##type##_predictor_8x4;     \
481   p[TX_8X16] = aom_##type##_predictor_8x16;   \
482   p[TX_16X8] = aom_##type##_predictor_16x8;   \
483   p[TX_16X32] = aom_##type##_predictor_16x32; \
484   p[TX_32X16] = aom_##type##_predictor_32x16; \
485   p[TX_32X64] = aom_##type##_predictor_32x64; \
486   p[TX_64X32] = aom_##type##_predictor_64x32; \
487   p[TX_4X16] = aom_##type##_predictor_4x16;   \
488   p[TX_16X4] = aom_##type##_predictor_16x4;   \
489   p[TX_8X32] = aom_##type##_predictor_8x32;   \
490   p[TX_32X8] = aom_##type##_predictor_32x8;   \
491   p[TX_16X64] = aom_##type##_predictor_16x64; \
492   p[TX_64X16] = aom_##type##_predictor_64x16;
493 #endif  // CONFIG_REALTIME_ONLY && !CONFIG_AV1_DECODER
494 
495 #define INIT_NO_4X4(p, type)                  \
496   p[TX_8X8] = aom_##type##_predictor_8x8;     \
497   p[TX_16X16] = aom_##type##_predictor_16x16; \
498   p[TX_32X32] = aom_##type##_predictor_32x32; \
499   p[TX_64X64] = aom_##type##_predictor_64x64; \
500   INIT_RECTANGULAR(p, type)
501 
502 #define INIT_ALL_SIZES(p, type)           \
503   p[TX_4X4] = aom_##type##_predictor_4x4; \
504   INIT_NO_4X4(p, type)
505 
506   INIT_ALL_SIZES(pred[V_PRED], v)
507   INIT_ALL_SIZES(pred[H_PRED], h)
508   INIT_ALL_SIZES(pred[PAETH_PRED], paeth)
509   INIT_ALL_SIZES(pred[SMOOTH_PRED], smooth)
510   INIT_ALL_SIZES(pred[SMOOTH_V_PRED], smooth_v)
511   INIT_ALL_SIZES(pred[SMOOTH_H_PRED], smooth_h)
512   INIT_ALL_SIZES(dc_pred[0][0], dc_128)
513   INIT_ALL_SIZES(dc_pred[0][1], dc_top)
514   INIT_ALL_SIZES(dc_pred[1][0], dc_left)
515   INIT_ALL_SIZES(dc_pred[1][1], dc)
516 #if CONFIG_AV1_HIGHBITDEPTH
517   INIT_ALL_SIZES(pred_high[V_PRED], highbd_v)
518   INIT_ALL_SIZES(pred_high[H_PRED], highbd_h)
519   INIT_ALL_SIZES(pred_high[PAETH_PRED], highbd_paeth)
520   INIT_ALL_SIZES(pred_high[SMOOTH_PRED], highbd_smooth)
521   INIT_ALL_SIZES(pred_high[SMOOTH_V_PRED], highbd_smooth_v)
522   INIT_ALL_SIZES(pred_high[SMOOTH_H_PRED], highbd_smooth_h)
523   INIT_ALL_SIZES(dc_pred_high[0][0], highbd_dc_128)
524   INIT_ALL_SIZES(dc_pred_high[0][1], highbd_dc_top)
525   INIT_ALL_SIZES(dc_pred_high[1][0], highbd_dc_left)
526   INIT_ALL_SIZES(dc_pred_high[1][1], highbd_dc)
527 #endif
528 #undef intra_pred_allsizes
529 }
530 
531 // Directional prediction, zone 1: 0 < angle < 90
av1_dr_prediction_z1_c(uint8_t * dst,ptrdiff_t stride,int bw,int bh,const uint8_t * above,const uint8_t * left,int upsample_above,int dx,int dy)532 void av1_dr_prediction_z1_c(uint8_t *dst, ptrdiff_t stride, int bw, int bh,
533                             const uint8_t *above, const uint8_t *left,
534                             int upsample_above, int dx, int dy) {
535   int r, c, x, base, shift, val;
536 
537   (void)left;
538   (void)dy;
539   assert(dy == 1);
540   assert(dx > 0);
541 
542   const int max_base_x = ((bw + bh) - 1) << upsample_above;
543   const int frac_bits = 6 - upsample_above;
544   const int base_inc = 1 << upsample_above;
545   x = dx;
546   for (r = 0; r < bh; ++r, dst += stride, x += dx) {
547     base = x >> frac_bits;
548     shift = ((x << upsample_above) & 0x3F) >> 1;
549 
550     if (base >= max_base_x) {
551       for (int i = r; i < bh; ++i) {
552         memset(dst, above[max_base_x], bw * sizeof(dst[0]));
553         dst += stride;
554       }
555       return;
556     }
557 
558     for (c = 0; c < bw; ++c, base += base_inc) {
559       if (base < max_base_x) {
560         val = above[base] * (32 - shift) + above[base + 1] * shift;
561         dst[c] = ROUND_POWER_OF_TWO(val, 5);
562       } else {
563         dst[c] = above[max_base_x];
564       }
565     }
566   }
567 }
568 
569 // Directional prediction, zone 2: 90 < angle < 180
av1_dr_prediction_z2_c(uint8_t * dst,ptrdiff_t stride,int bw,int bh,const uint8_t * above,const uint8_t * left,int upsample_above,int upsample_left,int dx,int dy)570 void av1_dr_prediction_z2_c(uint8_t *dst, ptrdiff_t stride, int bw, int bh,
571                             const uint8_t *above, const uint8_t *left,
572                             int upsample_above, int upsample_left, int dx,
573                             int dy) {
574   assert(dx > 0);
575   assert(dy > 0);
576 
577   const int min_base_x = -(1 << upsample_above);
578   const int min_base_y = -(1 << upsample_left);
579   (void)min_base_y;
580   const int frac_bits_x = 6 - upsample_above;
581   const int frac_bits_y = 6 - upsample_left;
582 
583   for (int r = 0; r < bh; ++r) {
584     for (int c = 0; c < bw; ++c) {
585       int val;
586       int y = r + 1;
587       int x = (c << 6) - y * dx;
588       const int base_x = x >> frac_bits_x;
589       if (base_x >= min_base_x) {
590         const int shift = ((x * (1 << upsample_above)) & 0x3F) >> 1;
591         val = above[base_x] * (32 - shift) + above[base_x + 1] * shift;
592         val = ROUND_POWER_OF_TWO(val, 5);
593       } else {
594         x = c + 1;
595         y = (r << 6) - x * dy;
596         const int base_y = y >> frac_bits_y;
597         assert(base_y >= min_base_y);
598         const int shift = ((y * (1 << upsample_left)) & 0x3F) >> 1;
599         val = left[base_y] * (32 - shift) + left[base_y + 1] * shift;
600         val = ROUND_POWER_OF_TWO(val, 5);
601       }
602       dst[c] = val;
603     }
604     dst += stride;
605   }
606 }
607 
608 // Directional prediction, zone 3: 180 < angle < 270
av1_dr_prediction_z3_c(uint8_t * dst,ptrdiff_t stride,int bw,int bh,const uint8_t * above,const uint8_t * left,int upsample_left,int dx,int dy)609 void av1_dr_prediction_z3_c(uint8_t *dst, ptrdiff_t stride, int bw, int bh,
610                             const uint8_t *above, const uint8_t *left,
611                             int upsample_left, int dx, int dy) {
612   int r, c, y, base, shift, val;
613 
614   (void)above;
615   (void)dx;
616 
617   assert(dx == 1);
618   assert(dy > 0);
619 
620   const int max_base_y = (bw + bh - 1) << upsample_left;
621   const int frac_bits = 6 - upsample_left;
622   const int base_inc = 1 << upsample_left;
623   y = dy;
624   for (c = 0; c < bw; ++c, y += dy) {
625     base = y >> frac_bits;
626     shift = ((y << upsample_left) & 0x3F) >> 1;
627 
628     for (r = 0; r < bh; ++r, base += base_inc) {
629       if (base < max_base_y) {
630         val = left[base] * (32 - shift) + left[base + 1] * shift;
631         dst[r * stride + c] = ROUND_POWER_OF_TWO(val, 5);
632       } else {
633         for (; r < bh; ++r) dst[r * stride + c] = left[max_base_y];
634         break;
635       }
636     }
637   }
638 }
639 
dr_predictor(uint8_t * dst,ptrdiff_t stride,TX_SIZE tx_size,const uint8_t * above,const uint8_t * left,int upsample_above,int upsample_left,int angle)640 static void dr_predictor(uint8_t *dst, ptrdiff_t stride, TX_SIZE tx_size,
641                          const uint8_t *above, const uint8_t *left,
642                          int upsample_above, int upsample_left, int angle) {
643   const int dx = av1_get_dx(angle);
644   const int dy = av1_get_dy(angle);
645   const int bw = tx_size_wide[tx_size];
646   const int bh = tx_size_high[tx_size];
647   assert(angle > 0 && angle < 270);
648 
649   if (angle > 0 && angle < 90) {
650     av1_dr_prediction_z1(dst, stride, bw, bh, above, left, upsample_above, dx,
651                          dy);
652   } else if (angle > 90 && angle < 180) {
653     av1_dr_prediction_z2(dst, stride, bw, bh, above, left, upsample_above,
654                          upsample_left, dx, dy);
655   } else if (angle > 180 && angle < 270) {
656     av1_dr_prediction_z3(dst, stride, bw, bh, above, left, upsample_left, dx,
657                          dy);
658   } else if (angle == 90) {
659     pred[V_PRED][tx_size](dst, stride, above, left);
660   } else if (angle == 180) {
661     pred[H_PRED][tx_size](dst, stride, above, left);
662   }
663 }
664 
665 #if CONFIG_AV1_HIGHBITDEPTH
666 // Directional prediction, zone 1: 0 < angle < 90
av1_highbd_dr_prediction_z1_c(uint16_t * dst,ptrdiff_t stride,int bw,int bh,const uint16_t * above,const uint16_t * left,int upsample_above,int dx,int dy,int bd)667 void av1_highbd_dr_prediction_z1_c(uint16_t *dst, ptrdiff_t stride, int bw,
668                                    int bh, const uint16_t *above,
669                                    const uint16_t *left, int upsample_above,
670                                    int dx, int dy, int bd) {
671   int r, c, x, base, shift, val;
672 
673   (void)left;
674   (void)dy;
675   (void)bd;
676   assert(dy == 1);
677   assert(dx > 0);
678 
679   const int max_base_x = ((bw + bh) - 1) << upsample_above;
680   const int frac_bits = 6 - upsample_above;
681   const int base_inc = 1 << upsample_above;
682   x = dx;
683   for (r = 0; r < bh; ++r, dst += stride, x += dx) {
684     base = x >> frac_bits;
685     shift = ((x << upsample_above) & 0x3F) >> 1;
686 
687     if (base >= max_base_x) {
688       for (int i = r; i < bh; ++i) {
689         aom_memset16(dst, above[max_base_x], bw);
690         dst += stride;
691       }
692       return;
693     }
694 
695     for (c = 0; c < bw; ++c, base += base_inc) {
696       if (base < max_base_x) {
697         val = above[base] * (32 - shift) + above[base + 1] * shift;
698         dst[c] = ROUND_POWER_OF_TWO(val, 5);
699       } else {
700         dst[c] = above[max_base_x];
701       }
702     }
703   }
704 }
705 
706 // Directional prediction, zone 2: 90 < angle < 180
av1_highbd_dr_prediction_z2_c(uint16_t * dst,ptrdiff_t stride,int bw,int bh,const uint16_t * above,const uint16_t * left,int upsample_above,int upsample_left,int dx,int dy,int bd)707 void av1_highbd_dr_prediction_z2_c(uint16_t *dst, ptrdiff_t stride, int bw,
708                                    int bh, const uint16_t *above,
709                                    const uint16_t *left, int upsample_above,
710                                    int upsample_left, int dx, int dy, int bd) {
711   (void)bd;
712   assert(dx > 0);
713   assert(dy > 0);
714 
715   const int min_base_x = -(1 << upsample_above);
716   const int min_base_y = -(1 << upsample_left);
717   (void)min_base_y;
718   const int frac_bits_x = 6 - upsample_above;
719   const int frac_bits_y = 6 - upsample_left;
720 
721   for (int r = 0; r < bh; ++r) {
722     for (int c = 0; c < bw; ++c) {
723       int val;
724       int y = r + 1;
725       int x = (c << 6) - y * dx;
726       const int base_x = x >> frac_bits_x;
727       if (base_x >= min_base_x) {
728         const int shift = ((x * (1 << upsample_above)) & 0x3F) >> 1;
729         val = above[base_x] * (32 - shift) + above[base_x + 1] * shift;
730         val = ROUND_POWER_OF_TWO(val, 5);
731       } else {
732         x = c + 1;
733         y = (r << 6) - x * dy;
734         const int base_y = y >> frac_bits_y;
735         assert(base_y >= min_base_y);
736         const int shift = ((y * (1 << upsample_left)) & 0x3F) >> 1;
737         val = left[base_y] * (32 - shift) + left[base_y + 1] * shift;
738         val = ROUND_POWER_OF_TWO(val, 5);
739       }
740       dst[c] = val;
741     }
742     dst += stride;
743   }
744 }
745 
746 // Directional prediction, zone 3: 180 < angle < 270
av1_highbd_dr_prediction_z3_c(uint16_t * dst,ptrdiff_t stride,int bw,int bh,const uint16_t * above,const uint16_t * left,int upsample_left,int dx,int dy,int bd)747 void av1_highbd_dr_prediction_z3_c(uint16_t *dst, ptrdiff_t stride, int bw,
748                                    int bh, const uint16_t *above,
749                                    const uint16_t *left, int upsample_left,
750                                    int dx, int dy, int bd) {
751   int r, c, y, base, shift, val;
752 
753   (void)above;
754   (void)dx;
755   (void)bd;
756   assert(dx == 1);
757   assert(dy > 0);
758 
759   const int max_base_y = (bw + bh - 1) << upsample_left;
760   const int frac_bits = 6 - upsample_left;
761   const int base_inc = 1 << upsample_left;
762   y = dy;
763   for (c = 0; c < bw; ++c, y += dy) {
764     base = y >> frac_bits;
765     shift = ((y << upsample_left) & 0x3F) >> 1;
766 
767     for (r = 0; r < bh; ++r, base += base_inc) {
768       if (base < max_base_y) {
769         val = left[base] * (32 - shift) + left[base + 1] * shift;
770         dst[r * stride + c] = ROUND_POWER_OF_TWO(val, 5);
771       } else {
772         for (; r < bh; ++r) dst[r * stride + c] = left[max_base_y];
773         break;
774       }
775     }
776   }
777 }
778 
highbd_dr_predictor(uint16_t * dst,ptrdiff_t stride,TX_SIZE tx_size,const uint16_t * above,const uint16_t * left,int upsample_above,int upsample_left,int angle,int bd)779 static void highbd_dr_predictor(uint16_t *dst, ptrdiff_t stride,
780                                 TX_SIZE tx_size, const uint16_t *above,
781                                 const uint16_t *left, int upsample_above,
782                                 int upsample_left, int angle, int bd) {
783   const int dx = av1_get_dx(angle);
784   const int dy = av1_get_dy(angle);
785   const int bw = tx_size_wide[tx_size];
786   const int bh = tx_size_high[tx_size];
787   assert(angle > 0 && angle < 270);
788 
789   if (angle > 0 && angle < 90) {
790     av1_highbd_dr_prediction_z1(dst, stride, bw, bh, above, left,
791                                 upsample_above, dx, dy, bd);
792   } else if (angle > 90 && angle < 180) {
793     av1_highbd_dr_prediction_z2(dst, stride, bw, bh, above, left,
794                                 upsample_above, upsample_left, dx, dy, bd);
795   } else if (angle > 180 && angle < 270) {
796     av1_highbd_dr_prediction_z3(dst, stride, bw, bh, above, left, upsample_left,
797                                 dx, dy, bd);
798   } else if (angle == 90) {
799     pred_high[V_PRED][tx_size](dst, stride, above, left, bd);
800   } else if (angle == 180) {
801     pred_high[H_PRED][tx_size](dst, stride, above, left, bd);
802   }
803 }
804 #endif  // CONFIG_AV1_HIGHBITDEPTH
805 
806 DECLARE_ALIGNED(16, const int8_t,
807                 av1_filter_intra_taps[FILTER_INTRA_MODES][8][8]) = {
808   {
809       { -6, 10, 0, 0, 0, 12, 0, 0 },
810       { -5, 2, 10, 0, 0, 9, 0, 0 },
811       { -3, 1, 1, 10, 0, 7, 0, 0 },
812       { -3, 1, 1, 2, 10, 5, 0, 0 },
813       { -4, 6, 0, 0, 0, 2, 12, 0 },
814       { -3, 2, 6, 0, 0, 2, 9, 0 },
815       { -3, 2, 2, 6, 0, 2, 7, 0 },
816       { -3, 1, 2, 2, 6, 3, 5, 0 },
817   },
818   {
819       { -10, 16, 0, 0, 0, 10, 0, 0 },
820       { -6, 0, 16, 0, 0, 6, 0, 0 },
821       { -4, 0, 0, 16, 0, 4, 0, 0 },
822       { -2, 0, 0, 0, 16, 2, 0, 0 },
823       { -10, 16, 0, 0, 0, 0, 10, 0 },
824       { -6, 0, 16, 0, 0, 0, 6, 0 },
825       { -4, 0, 0, 16, 0, 0, 4, 0 },
826       { -2, 0, 0, 0, 16, 0, 2, 0 },
827   },
828   {
829       { -8, 8, 0, 0, 0, 16, 0, 0 },
830       { -8, 0, 8, 0, 0, 16, 0, 0 },
831       { -8, 0, 0, 8, 0, 16, 0, 0 },
832       { -8, 0, 0, 0, 8, 16, 0, 0 },
833       { -4, 4, 0, 0, 0, 0, 16, 0 },
834       { -4, 0, 4, 0, 0, 0, 16, 0 },
835       { -4, 0, 0, 4, 0, 0, 16, 0 },
836       { -4, 0, 0, 0, 4, 0, 16, 0 },
837   },
838   {
839       { -2, 8, 0, 0, 0, 10, 0, 0 },
840       { -1, 3, 8, 0, 0, 6, 0, 0 },
841       { -1, 2, 3, 8, 0, 4, 0, 0 },
842       { 0, 1, 2, 3, 8, 2, 0, 0 },
843       { -1, 4, 0, 0, 0, 3, 10, 0 },
844       { -1, 3, 4, 0, 0, 4, 6, 0 },
845       { -1, 2, 3, 4, 0, 4, 4, 0 },
846       { -1, 2, 2, 3, 4, 3, 3, 0 },
847   },
848   {
849       { -12, 14, 0, 0, 0, 14, 0, 0 },
850       { -10, 0, 14, 0, 0, 12, 0, 0 },
851       { -9, 0, 0, 14, 0, 11, 0, 0 },
852       { -8, 0, 0, 0, 14, 10, 0, 0 },
853       { -10, 12, 0, 0, 0, 0, 14, 0 },
854       { -9, 1, 12, 0, 0, 0, 12, 0 },
855       { -8, 0, 0, 12, 0, 1, 11, 0 },
856       { -7, 0, 0, 1, 12, 1, 9, 0 },
857   },
858 };
859 
av1_filter_intra_predictor_c(uint8_t * dst,ptrdiff_t stride,TX_SIZE tx_size,const uint8_t * above,const uint8_t * left,int mode)860 void av1_filter_intra_predictor_c(uint8_t *dst, ptrdiff_t stride,
861                                   TX_SIZE tx_size, const uint8_t *above,
862                                   const uint8_t *left, int mode) {
863   int r, c;
864   uint8_t buffer[33][33];
865   const int bw = tx_size_wide[tx_size];
866   const int bh = tx_size_high[tx_size];
867 
868   assert(bw <= 32 && bh <= 32);
869 
870   for (r = 0; r < bh; ++r) buffer[r + 1][0] = left[r];
871   memcpy(buffer[0], &above[-1], (bw + 1) * sizeof(uint8_t));
872 
873   for (r = 1; r < bh + 1; r += 2)
874     for (c = 1; c < bw + 1; c += 4) {
875       const uint8_t p0 = buffer[r - 1][c - 1];
876       const uint8_t p1 = buffer[r - 1][c];
877       const uint8_t p2 = buffer[r - 1][c + 1];
878       const uint8_t p3 = buffer[r - 1][c + 2];
879       const uint8_t p4 = buffer[r - 1][c + 3];
880       const uint8_t p5 = buffer[r][c - 1];
881       const uint8_t p6 = buffer[r + 1][c - 1];
882       for (int k = 0; k < 8; ++k) {
883         int r_offset = k >> 2;
884         int c_offset = k & 0x03;
885         int pr = av1_filter_intra_taps[mode][k][0] * p0 +
886                  av1_filter_intra_taps[mode][k][1] * p1 +
887                  av1_filter_intra_taps[mode][k][2] * p2 +
888                  av1_filter_intra_taps[mode][k][3] * p3 +
889                  av1_filter_intra_taps[mode][k][4] * p4 +
890                  av1_filter_intra_taps[mode][k][5] * p5 +
891                  av1_filter_intra_taps[mode][k][6] * p6;
892         // Section 7.11.2.3 specifies the right-hand side of the assignment as
893         //   Clip1( Round2Signed( pr, INTRA_FILTER_SCALE_BITS ) ).
894         // Since Clip1() clips a negative value to 0, it is safe to replace
895         // Round2Signed() with Round2().
896         buffer[r + r_offset][c + c_offset] =
897             clip_pixel(ROUND_POWER_OF_TWO(pr, FILTER_INTRA_SCALE_BITS));
898       }
899     }
900 
901   for (r = 0; r < bh; ++r) {
902     memcpy(dst, &buffer[r + 1][1], bw * sizeof(uint8_t));
903     dst += stride;
904   }
905 }
906 
907 #if CONFIG_AV1_HIGHBITDEPTH
highbd_filter_intra_predictor(uint16_t * dst,ptrdiff_t stride,TX_SIZE tx_size,const uint16_t * above,const uint16_t * left,int mode,int bd)908 static void highbd_filter_intra_predictor(uint16_t *dst, ptrdiff_t stride,
909                                           TX_SIZE tx_size,
910                                           const uint16_t *above,
911                                           const uint16_t *left, int mode,
912                                           int bd) {
913   int r, c;
914   uint16_t buffer[33][33];
915   const int bw = tx_size_wide[tx_size];
916   const int bh = tx_size_high[tx_size];
917 
918   assert(bw <= 32 && bh <= 32);
919 
920   for (r = 0; r < bh; ++r) buffer[r + 1][0] = left[r];
921   memcpy(buffer[0], &above[-1], (bw + 1) * sizeof(buffer[0][0]));
922 
923   for (r = 1; r < bh + 1; r += 2)
924     for (c = 1; c < bw + 1; c += 4) {
925       const uint16_t p0 = buffer[r - 1][c - 1];
926       const uint16_t p1 = buffer[r - 1][c];
927       const uint16_t p2 = buffer[r - 1][c + 1];
928       const uint16_t p3 = buffer[r - 1][c + 2];
929       const uint16_t p4 = buffer[r - 1][c + 3];
930       const uint16_t p5 = buffer[r][c - 1];
931       const uint16_t p6 = buffer[r + 1][c - 1];
932       for (int k = 0; k < 8; ++k) {
933         int r_offset = k >> 2;
934         int c_offset = k & 0x03;
935         int pr = av1_filter_intra_taps[mode][k][0] * p0 +
936                  av1_filter_intra_taps[mode][k][1] * p1 +
937                  av1_filter_intra_taps[mode][k][2] * p2 +
938                  av1_filter_intra_taps[mode][k][3] * p3 +
939                  av1_filter_intra_taps[mode][k][4] * p4 +
940                  av1_filter_intra_taps[mode][k][5] * p5 +
941                  av1_filter_intra_taps[mode][k][6] * p6;
942         // Section 7.11.2.3 specifies the right-hand side of the assignment as
943         //   Clip1( Round2Signed( pr, INTRA_FILTER_SCALE_BITS ) ).
944         // Since Clip1() clips a negative value to 0, it is safe to replace
945         // Round2Signed() with Round2().
946         buffer[r + r_offset][c + c_offset] = clip_pixel_highbd(
947             ROUND_POWER_OF_TWO(pr, FILTER_INTRA_SCALE_BITS), bd);
948       }
949     }
950 
951   for (r = 0; r < bh; ++r) {
952     memcpy(dst, &buffer[r + 1][1], bw * sizeof(dst[0]));
953     dst += stride;
954   }
955 }
956 #endif  // CONFIG_AV1_HIGHBITDEPTH
957 
is_smooth(const MB_MODE_INFO * mbmi,int plane)958 static int is_smooth(const MB_MODE_INFO *mbmi, int plane) {
959   if (plane == 0) {
960     const PREDICTION_MODE mode = mbmi->mode;
961     return (mode == SMOOTH_PRED || mode == SMOOTH_V_PRED ||
962             mode == SMOOTH_H_PRED);
963   } else {
964     // uv_mode is not set for inter blocks, so need to explicitly
965     // detect that case.
966     if (is_inter_block(mbmi)) return 0;
967 
968     const UV_PREDICTION_MODE uv_mode = mbmi->uv_mode;
969     return (uv_mode == UV_SMOOTH_PRED || uv_mode == UV_SMOOTH_V_PRED ||
970             uv_mode == UV_SMOOTH_H_PRED);
971   }
972 }
973 
get_intra_edge_filter_type(const MACROBLOCKD * xd,int plane)974 static int get_intra_edge_filter_type(const MACROBLOCKD *xd, int plane) {
975   const MB_MODE_INFO *above;
976   const MB_MODE_INFO *left;
977 
978   if (plane == 0) {
979     above = xd->above_mbmi;
980     left = xd->left_mbmi;
981   } else {
982     above = xd->chroma_above_mbmi;
983     left = xd->chroma_left_mbmi;
984   }
985 
986   return (above && is_smooth(above, plane)) || (left && is_smooth(left, plane));
987 }
988 
intra_edge_filter_strength(int bs0,int bs1,int delta,int type)989 static int intra_edge_filter_strength(int bs0, int bs1, int delta, int type) {
990   const int d = abs(delta);
991   int strength = 0;
992 
993   const int blk_wh = bs0 + bs1;
994   if (type == 0) {
995     if (blk_wh <= 8) {
996       if (d >= 56) strength = 1;
997     } else if (blk_wh <= 12) {
998       if (d >= 40) strength = 1;
999     } else if (blk_wh <= 16) {
1000       if (d >= 40) strength = 1;
1001     } else if (blk_wh <= 24) {
1002       if (d >= 8) strength = 1;
1003       if (d >= 16) strength = 2;
1004       if (d >= 32) strength = 3;
1005     } else if (blk_wh <= 32) {
1006       if (d >= 1) strength = 1;
1007       if (d >= 4) strength = 2;
1008       if (d >= 32) strength = 3;
1009     } else {
1010       if (d >= 1) strength = 3;
1011     }
1012   } else {
1013     if (blk_wh <= 8) {
1014       if (d >= 40) strength = 1;
1015       if (d >= 64) strength = 2;
1016     } else if (blk_wh <= 16) {
1017       if (d >= 20) strength = 1;
1018       if (d >= 48) strength = 2;
1019     } else if (blk_wh <= 24) {
1020       if (d >= 4) strength = 3;
1021     } else {
1022       if (d >= 1) strength = 3;
1023     }
1024   }
1025   return strength;
1026 }
1027 
av1_filter_intra_edge_c(uint8_t * p,int sz,int strength)1028 void av1_filter_intra_edge_c(uint8_t *p, int sz, int strength) {
1029   if (!strength) return;
1030 
1031   const int kernel[INTRA_EDGE_FILT][INTRA_EDGE_TAPS] = { { 0, 4, 8, 4, 0 },
1032                                                          { 0, 5, 6, 5, 0 },
1033                                                          { 2, 4, 4, 4, 2 } };
1034   const int filt = strength - 1;
1035   uint8_t edge[129];
1036 
1037   memcpy(edge, p, sz * sizeof(*p));
1038   for (int i = 1; i < sz; i++) {
1039     int s = 0;
1040     for (int j = 0; j < INTRA_EDGE_TAPS; j++) {
1041       int k = i - 2 + j;
1042       k = (k < 0) ? 0 : k;
1043       k = (k > sz - 1) ? sz - 1 : k;
1044       s += edge[k] * kernel[filt][j];
1045     }
1046     s = (s + 8) >> 4;
1047     p[i] = s;
1048   }
1049 }
1050 
filter_intra_edge_corner(uint8_t * p_above,uint8_t * p_left)1051 static void filter_intra_edge_corner(uint8_t *p_above, uint8_t *p_left) {
1052   const int kernel[3] = { 5, 6, 5 };
1053 
1054   int s = (p_left[0] * kernel[0]) + (p_above[-1] * kernel[1]) +
1055           (p_above[0] * kernel[2]);
1056   s = (s + 8) >> 4;
1057   p_above[-1] = s;
1058   p_left[-1] = s;
1059 }
1060 
av1_upsample_intra_edge_c(uint8_t * p,int sz)1061 void av1_upsample_intra_edge_c(uint8_t *p, int sz) {
1062   // interpolate half-sample positions
1063   assert(sz <= MAX_UPSAMPLE_SZ);
1064 
1065   uint8_t in[MAX_UPSAMPLE_SZ + 3];
1066   // copy p[-1..(sz-1)] and extend first and last samples
1067   in[0] = p[-1];
1068   in[1] = p[-1];
1069   for (int i = 0; i < sz; i++) {
1070     in[i + 2] = p[i];
1071   }
1072   in[sz + 2] = p[sz - 1];
1073 
1074   // interpolate half-sample edge positions
1075   p[-2] = in[0];
1076   for (int i = 0; i < sz; i++) {
1077     int s = -in[i] + (9 * in[i + 1]) + (9 * in[i + 2]) - in[i + 3];
1078     s = clip_pixel((s + 8) >> 4);
1079     p[2 * i - 1] = s;
1080     p[2 * i] = in[i + 2];
1081   }
1082 }
1083 
build_directional_and_filter_intra_predictors(const uint8_t * ref,int ref_stride,uint8_t * dst,int dst_stride,PREDICTION_MODE mode,int p_angle,FILTER_INTRA_MODE filter_intra_mode,TX_SIZE tx_size,int disable_edge_filter,int n_top_px,int n_topright_px,int n_left_px,int n_bottomleft_px,int intra_edge_filter_type)1084 static void build_directional_and_filter_intra_predictors(
1085     const uint8_t *ref, int ref_stride, uint8_t *dst, int dst_stride,
1086     PREDICTION_MODE mode, int p_angle, FILTER_INTRA_MODE filter_intra_mode,
1087     TX_SIZE tx_size, int disable_edge_filter, int n_top_px, int n_topright_px,
1088     int n_left_px, int n_bottomleft_px, int intra_edge_filter_type) {
1089   int i;
1090   const uint8_t *above_ref = ref - ref_stride;
1091   const uint8_t *left_ref = ref - 1;
1092   DECLARE_ALIGNED(16, uint8_t, left_data[NUM_INTRA_NEIGHBOUR_PIXELS]);
1093   DECLARE_ALIGNED(16, uint8_t, above_data[NUM_INTRA_NEIGHBOUR_PIXELS]);
1094   uint8_t *const above_row = above_data + 16;
1095   uint8_t *const left_col = left_data + 16;
1096   const int txwpx = tx_size_wide[tx_size];
1097   const int txhpx = tx_size_high[tx_size];
1098   int need_left = extend_modes[mode] & NEED_LEFT;
1099   int need_above = extend_modes[mode] & NEED_ABOVE;
1100   int need_above_left = extend_modes[mode] & NEED_ABOVELEFT;
1101   const int is_dr_mode = av1_is_directional_mode(mode);
1102   const int use_filter_intra = filter_intra_mode != FILTER_INTRA_MODES;
1103   assert(use_filter_intra || is_dr_mode);
1104   // The left_data, above_data buffers must be zeroed to fix some intermittent
1105   // valgrind errors. Uninitialized reads in intra pred modules (e.g. width = 4
1106   // path in av1_dr_prediction_z1_avx2()) from left_data, above_data are seen to
1107   // be the potential reason for this issue.
1108   memset(left_data, 129, NUM_INTRA_NEIGHBOUR_PIXELS);
1109   memset(above_data, 127, NUM_INTRA_NEIGHBOUR_PIXELS);
1110 
1111   // The default values if ref pixels are not available:
1112   // 128 127 127 .. 127 127 127 127 127 127
1113   // 129  A   B  ..  Y   Z
1114   // 129  C   D  ..  W   X
1115   // 129  E   F  ..  U   V
1116   // 129  G   H  ..  S   T   T   T   T   T
1117   // ..
1118 
1119   if (is_dr_mode) {
1120     if (p_angle <= 90)
1121       need_above = 1, need_left = 0, need_above_left = 1;
1122     else if (p_angle < 180)
1123       need_above = 1, need_left = 1, need_above_left = 1;
1124     else
1125       need_above = 0, need_left = 1, need_above_left = 1;
1126   }
1127   if (use_filter_intra) need_left = need_above = need_above_left = 1;
1128 
1129   assert(n_top_px >= 0);
1130   assert(n_topright_px >= -1);
1131   assert(n_left_px >= 0);
1132   assert(n_bottomleft_px >= -1);
1133 
1134   if ((!need_above && n_left_px == 0) || (!need_left && n_top_px == 0)) {
1135     int val;
1136     if (need_left) {
1137       val = (n_top_px > 0) ? above_ref[0] : 129;
1138     } else {
1139       val = (n_left_px > 0) ? left_ref[0] : 127;
1140     }
1141     for (i = 0; i < txhpx; ++i) {
1142       memset(dst, val, txwpx);
1143       dst += dst_stride;
1144     }
1145     return;
1146   }
1147 
1148   // NEED_LEFT
1149   if (need_left) {
1150     const int num_left_pixels_needed =
1151         txhpx + (n_bottomleft_px >= 0 ? txwpx : 0);
1152     i = 0;
1153     if (n_left_px > 0) {
1154       for (; i < n_left_px; i++) left_col[i] = left_ref[i * ref_stride];
1155       if (n_bottomleft_px > 0) {
1156         assert(i == txhpx);
1157         for (; i < txhpx + n_bottomleft_px; i++)
1158           left_col[i] = left_ref[i * ref_stride];
1159       }
1160       if (i < num_left_pixels_needed)
1161         memset(&left_col[i], left_col[i - 1], num_left_pixels_needed - i);
1162     } else if (n_top_px > 0) {
1163       memset(left_col, above_ref[0], num_left_pixels_needed);
1164     }
1165   }
1166 
1167   // NEED_ABOVE
1168   if (need_above) {
1169     const int num_top_pixels_needed = txwpx + (n_topright_px >= 0 ? txhpx : 0);
1170     if (n_top_px > 0) {
1171       memcpy(above_row, above_ref, n_top_px);
1172       i = n_top_px;
1173       if (n_topright_px > 0) {
1174         assert(n_top_px == txwpx);
1175         memcpy(above_row + txwpx, above_ref + txwpx, n_topright_px);
1176         i += n_topright_px;
1177       }
1178       if (i < num_top_pixels_needed)
1179         memset(&above_row[i], above_row[i - 1], num_top_pixels_needed - i);
1180     } else if (n_left_px > 0) {
1181       memset(above_row, left_ref[0], num_top_pixels_needed);
1182     }
1183   }
1184 
1185   if (need_above_left) {
1186     if (n_top_px > 0 && n_left_px > 0) {
1187       above_row[-1] = above_ref[-1];
1188     } else if (n_top_px > 0) {
1189       above_row[-1] = above_ref[0];
1190     } else if (n_left_px > 0) {
1191       above_row[-1] = left_ref[0];
1192     } else {
1193       above_row[-1] = 128;
1194     }
1195     left_col[-1] = above_row[-1];
1196   }
1197 
1198   if (use_filter_intra) {
1199     av1_filter_intra_predictor(dst, dst_stride, tx_size, above_row, left_col,
1200                                filter_intra_mode);
1201     return;
1202   }
1203 
1204   assert(is_dr_mode);
1205   int upsample_above = 0;
1206   int upsample_left = 0;
1207   if (!disable_edge_filter) {
1208     const int need_right = p_angle < 90;
1209     const int need_bottom = p_angle > 180;
1210     if (p_angle != 90 && p_angle != 180) {
1211       assert(need_above_left);
1212       const int ab_le = 1;
1213       if (need_above && need_left && (txwpx + txhpx >= 24)) {
1214         filter_intra_edge_corner(above_row, left_col);
1215       }
1216       if (need_above && n_top_px > 0) {
1217         const int strength = intra_edge_filter_strength(
1218             txwpx, txhpx, p_angle - 90, intra_edge_filter_type);
1219         const int n_px = n_top_px + ab_le + (need_right ? txhpx : 0);
1220         av1_filter_intra_edge(above_row - ab_le, n_px, strength);
1221       }
1222       if (need_left && n_left_px > 0) {
1223         const int strength = intra_edge_filter_strength(
1224             txhpx, txwpx, p_angle - 180, intra_edge_filter_type);
1225         const int n_px = n_left_px + ab_le + (need_bottom ? txwpx : 0);
1226         av1_filter_intra_edge(left_col - ab_le, n_px, strength);
1227       }
1228     }
1229     upsample_above = av1_use_intra_edge_upsample(txwpx, txhpx, p_angle - 90,
1230                                                  intra_edge_filter_type);
1231     if (need_above && upsample_above) {
1232       const int n_px = txwpx + (need_right ? txhpx : 0);
1233       av1_upsample_intra_edge(above_row, n_px);
1234     }
1235     upsample_left = av1_use_intra_edge_upsample(txhpx, txwpx, p_angle - 180,
1236                                                 intra_edge_filter_type);
1237     if (need_left && upsample_left) {
1238       const int n_px = txhpx + (need_bottom ? txwpx : 0);
1239       av1_upsample_intra_edge(left_col, n_px);
1240     }
1241   }
1242   dr_predictor(dst, dst_stride, tx_size, above_row, left_col, upsample_above,
1243                upsample_left, p_angle);
1244 }
1245 
1246 // This function generates the pred data of a given block for non-directional
1247 // intra prediction modes (i.e., DC, SMOOTH, SMOOTH_H, SMOOTH_V and PAETH).
build_non_directional_intra_predictors(const uint8_t * ref,int ref_stride,uint8_t * dst,int dst_stride,PREDICTION_MODE mode,TX_SIZE tx_size,int n_top_px,int n_left_px)1248 static void build_non_directional_intra_predictors(
1249     const uint8_t *ref, int ref_stride, uint8_t *dst, int dst_stride,
1250     PREDICTION_MODE mode, TX_SIZE tx_size, int n_top_px, int n_left_px) {
1251   const uint8_t *above_ref = ref - ref_stride;
1252   const uint8_t *left_ref = ref - 1;
1253   const int txwpx = tx_size_wide[tx_size];
1254   const int txhpx = tx_size_high[tx_size];
1255   const int need_left = extend_modes[mode] & NEED_LEFT;
1256   const int need_above = extend_modes[mode] & NEED_ABOVE;
1257   const int need_above_left = extend_modes[mode] & NEED_ABOVELEFT;
1258   int i = 0;
1259   assert(n_top_px >= 0);
1260   assert(n_left_px >= 0);
1261   assert(mode == DC_PRED || mode == SMOOTH_PRED || mode == SMOOTH_V_PRED ||
1262          mode == SMOOTH_H_PRED || mode == PAETH_PRED);
1263 
1264   if ((!need_above && n_left_px == 0) || (!need_left && n_top_px == 0)) {
1265     int val = 0;
1266     if (need_left) {
1267       val = (n_top_px > 0) ? above_ref[0] : 129;
1268     } else {
1269       val = (n_left_px > 0) ? left_ref[0] : 127;
1270     }
1271     for (i = 0; i < txhpx; ++i) {
1272       memset(dst, val, txwpx);
1273       dst += dst_stride;
1274     }
1275     return;
1276   }
1277 
1278   DECLARE_ALIGNED(16, uint8_t, left_data[NUM_INTRA_NEIGHBOUR_PIXELS]);
1279   DECLARE_ALIGNED(16, uint8_t, above_data[NUM_INTRA_NEIGHBOUR_PIXELS]);
1280   uint8_t *const above_row = above_data + 16;
1281   uint8_t *const left_col = left_data + 16;
1282 
1283   if (need_left) {
1284     memset(left_data, 129, NUM_INTRA_NEIGHBOUR_PIXELS);
1285     if (n_left_px > 0) {
1286       for (i = 0; i < n_left_px; i++) left_col[i] = left_ref[i * ref_stride];
1287       if (i < txhpx) memset(&left_col[i], left_col[i - 1], txhpx - i);
1288     } else if (n_top_px > 0) {
1289       memset(left_col, above_ref[0], txhpx);
1290     }
1291   }
1292 
1293   if (need_above) {
1294     memset(above_data, 127, NUM_INTRA_NEIGHBOUR_PIXELS);
1295     if (n_top_px > 0) {
1296       memcpy(above_row, above_ref, n_top_px);
1297       i = n_top_px;
1298       if (i < txwpx) memset(&above_row[i], above_row[i - 1], txwpx - i);
1299     } else if (n_left_px > 0) {
1300       memset(above_row, left_ref[0], txwpx);
1301     }
1302   }
1303 
1304   if (need_above_left) {
1305     if (n_top_px > 0 && n_left_px > 0) {
1306       above_row[-1] = above_ref[-1];
1307     } else if (n_top_px > 0) {
1308       above_row[-1] = above_ref[0];
1309     } else if (n_left_px > 0) {
1310       above_row[-1] = left_ref[0];
1311     } else {
1312       above_row[-1] = 128;
1313     }
1314     left_col[-1] = above_row[-1];
1315   }
1316 
1317   if (mode == DC_PRED) {
1318     dc_pred[n_left_px > 0][n_top_px > 0][tx_size](dst, dst_stride, above_row,
1319                                                   left_col);
1320   } else {
1321     pred[mode][tx_size](dst, dst_stride, above_row, left_col);
1322   }
1323 }
1324 
1325 #if CONFIG_AV1_HIGHBITDEPTH
av1_highbd_filter_intra_edge_c(uint16_t * p,int sz,int strength)1326 void av1_highbd_filter_intra_edge_c(uint16_t *p, int sz, int strength) {
1327   if (!strength) return;
1328 
1329   const int kernel[INTRA_EDGE_FILT][INTRA_EDGE_TAPS] = { { 0, 4, 8, 4, 0 },
1330                                                          { 0, 5, 6, 5, 0 },
1331                                                          { 2, 4, 4, 4, 2 } };
1332   const int filt = strength - 1;
1333   uint16_t edge[129];
1334 
1335   memcpy(edge, p, sz * sizeof(*p));
1336   for (int i = 1; i < sz; i++) {
1337     int s = 0;
1338     for (int j = 0; j < INTRA_EDGE_TAPS; j++) {
1339       int k = i - 2 + j;
1340       k = (k < 0) ? 0 : k;
1341       k = (k > sz - 1) ? sz - 1 : k;
1342       s += edge[k] * kernel[filt][j];
1343     }
1344     s = (s + 8) >> 4;
1345     p[i] = s;
1346   }
1347 }
1348 
highbd_filter_intra_edge_corner(uint16_t * p_above,uint16_t * p_left)1349 static void highbd_filter_intra_edge_corner(uint16_t *p_above,
1350                                             uint16_t *p_left) {
1351   const int kernel[3] = { 5, 6, 5 };
1352 
1353   int s = (p_left[0] * kernel[0]) + (p_above[-1] * kernel[1]) +
1354           (p_above[0] * kernel[2]);
1355   s = (s + 8) >> 4;
1356   p_above[-1] = s;
1357   p_left[-1] = s;
1358 }
1359 
av1_highbd_upsample_intra_edge_c(uint16_t * p,int sz,int bd)1360 void av1_highbd_upsample_intra_edge_c(uint16_t *p, int sz, int bd) {
1361   // interpolate half-sample positions
1362   assert(sz <= MAX_UPSAMPLE_SZ);
1363 
1364   uint16_t in[MAX_UPSAMPLE_SZ + 3];
1365   // copy p[-1..(sz-1)] and extend first and last samples
1366   in[0] = p[-1];
1367   in[1] = p[-1];
1368   for (int i = 0; i < sz; i++) {
1369     in[i + 2] = p[i];
1370   }
1371   in[sz + 2] = p[sz - 1];
1372 
1373   // interpolate half-sample edge positions
1374   p[-2] = in[0];
1375   for (int i = 0; i < sz; i++) {
1376     int s = -in[i] + (9 * in[i + 1]) + (9 * in[i + 2]) - in[i + 3];
1377     s = (s + 8) >> 4;
1378     s = clip_pixel_highbd(s, bd);
1379     p[2 * i - 1] = s;
1380     p[2 * i] = in[i + 2];
1381   }
1382 }
1383 
highbd_build_directional_and_filter_intra_predictors(const uint8_t * ref8,int ref_stride,uint8_t * dst8,int dst_stride,PREDICTION_MODE mode,int p_angle,FILTER_INTRA_MODE filter_intra_mode,TX_SIZE tx_size,int disable_edge_filter,int n_top_px,int n_topright_px,int n_left_px,int n_bottomleft_px,int intra_edge_filter_type,int bit_depth)1384 static void highbd_build_directional_and_filter_intra_predictors(
1385     const uint8_t *ref8, int ref_stride, uint8_t *dst8, int dst_stride,
1386     PREDICTION_MODE mode, int p_angle, FILTER_INTRA_MODE filter_intra_mode,
1387     TX_SIZE tx_size, int disable_edge_filter, int n_top_px, int n_topright_px,
1388     int n_left_px, int n_bottomleft_px, int intra_edge_filter_type,
1389     int bit_depth) {
1390   int i;
1391   uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
1392   const uint16_t *const ref = CONVERT_TO_SHORTPTR(ref8);
1393   DECLARE_ALIGNED(16, uint16_t, left_data[NUM_INTRA_NEIGHBOUR_PIXELS]);
1394   DECLARE_ALIGNED(16, uint16_t, above_data[NUM_INTRA_NEIGHBOUR_PIXELS]);
1395   uint16_t *const above_row = above_data + 16;
1396   uint16_t *const left_col = left_data + 16;
1397   const int txwpx = tx_size_wide[tx_size];
1398   const int txhpx = tx_size_high[tx_size];
1399   int need_left = extend_modes[mode] & NEED_LEFT;
1400   int need_above = extend_modes[mode] & NEED_ABOVE;
1401   int need_above_left = extend_modes[mode] & NEED_ABOVELEFT;
1402   const uint16_t *above_ref = ref - ref_stride;
1403   const uint16_t *left_ref = ref - 1;
1404   const int is_dr_mode = av1_is_directional_mode(mode);
1405   const int use_filter_intra = filter_intra_mode != FILTER_INTRA_MODES;
1406   assert(use_filter_intra || is_dr_mode);
1407   const int base = 128 << (bit_depth - 8);
1408   // The left_data, above_data buffers must be zeroed to fix some intermittent
1409   // valgrind errors. Uninitialized reads in intra pred modules (e.g. width = 4
1410   // path in av1_highbd_dr_prediction_z2_avx2()) from left_data, above_data are
1411   // seen to be the potential reason for this issue.
1412   aom_memset16(left_data, base + 1, NUM_INTRA_NEIGHBOUR_PIXELS);
1413   aom_memset16(above_data, base - 1, NUM_INTRA_NEIGHBOUR_PIXELS);
1414 
1415   // The default values if ref pixels are not available:
1416   // base   base-1 base-1 .. base-1 base-1 base-1 base-1 base-1 base-1
1417   // base+1   A      B  ..     Y      Z
1418   // base+1   C      D  ..     W      X
1419   // base+1   E      F  ..     U      V
1420   // base+1   G      H  ..     S      T      T      T      T      T
1421 
1422   if (is_dr_mode) {
1423     if (p_angle <= 90)
1424       need_above = 1, need_left = 0, need_above_left = 1;
1425     else if (p_angle < 180)
1426       need_above = 1, need_left = 1, need_above_left = 1;
1427     else
1428       need_above = 0, need_left = 1, need_above_left = 1;
1429   }
1430   if (use_filter_intra) need_left = need_above = need_above_left = 1;
1431 
1432   assert(n_top_px >= 0);
1433   assert(n_topright_px >= -1);
1434   assert(n_left_px >= 0);
1435   assert(n_bottomleft_px >= -1);
1436 
1437   if ((!need_above && n_left_px == 0) || (!need_left && n_top_px == 0)) {
1438     int val;
1439     if (need_left) {
1440       val = (n_top_px > 0) ? above_ref[0] : base + 1;
1441     } else {
1442       val = (n_left_px > 0) ? left_ref[0] : base - 1;
1443     }
1444     for (i = 0; i < txhpx; ++i) {
1445       aom_memset16(dst, val, txwpx);
1446       dst += dst_stride;
1447     }
1448     return;
1449   }
1450 
1451   // NEED_LEFT
1452   if (need_left) {
1453     const int num_left_pixels_needed =
1454         txhpx + (n_bottomleft_px >= 0 ? txwpx : 0);
1455     i = 0;
1456     if (n_left_px > 0) {
1457       for (; i < n_left_px; i++) left_col[i] = left_ref[i * ref_stride];
1458       if (n_bottomleft_px > 0) {
1459         assert(i == txhpx);
1460         for (; i < txhpx + n_bottomleft_px; i++)
1461           left_col[i] = left_ref[i * ref_stride];
1462       }
1463       if (i < num_left_pixels_needed)
1464         aom_memset16(&left_col[i], left_col[i - 1], num_left_pixels_needed - i);
1465     } else if (n_top_px > 0) {
1466       aom_memset16(left_col, above_ref[0], num_left_pixels_needed);
1467     }
1468   }
1469 
1470   // NEED_ABOVE
1471   if (need_above) {
1472     const int num_top_pixels_needed = txwpx + (n_topright_px >= 0 ? txhpx : 0);
1473     if (n_top_px > 0) {
1474       memcpy(above_row, above_ref, n_top_px * sizeof(above_ref[0]));
1475       i = n_top_px;
1476       if (n_topright_px > 0) {
1477         assert(n_top_px == txwpx);
1478         memcpy(above_row + txwpx, above_ref + txwpx,
1479                n_topright_px * sizeof(above_ref[0]));
1480         i += n_topright_px;
1481       }
1482       if (i < num_top_pixels_needed)
1483         aom_memset16(&above_row[i], above_row[i - 1],
1484                      num_top_pixels_needed - i);
1485     } else if (n_left_px > 0) {
1486       aom_memset16(above_row, left_ref[0], num_top_pixels_needed);
1487     }
1488   }
1489 
1490   if (need_above_left) {
1491     if (n_top_px > 0 && n_left_px > 0) {
1492       above_row[-1] = above_ref[-1];
1493     } else if (n_top_px > 0) {
1494       above_row[-1] = above_ref[0];
1495     } else if (n_left_px > 0) {
1496       above_row[-1] = left_ref[0];
1497     } else {
1498       above_row[-1] = base;
1499     }
1500     left_col[-1] = above_row[-1];
1501   }
1502 
1503   if (use_filter_intra) {
1504     highbd_filter_intra_predictor(dst, dst_stride, tx_size, above_row, left_col,
1505                                   filter_intra_mode, bit_depth);
1506     return;
1507   }
1508 
1509   assert(is_dr_mode);
1510   int upsample_above = 0;
1511   int upsample_left = 0;
1512   if (!disable_edge_filter) {
1513     const int need_right = p_angle < 90;
1514     const int need_bottom = p_angle > 180;
1515     if (p_angle != 90 && p_angle != 180) {
1516       assert(need_above_left);
1517       const int ab_le = 1;
1518       if (need_above && need_left && (txwpx + txhpx >= 24)) {
1519         highbd_filter_intra_edge_corner(above_row, left_col);
1520       }
1521       if (need_above && n_top_px > 0) {
1522         const int strength = intra_edge_filter_strength(
1523             txwpx, txhpx, p_angle - 90, intra_edge_filter_type);
1524         const int n_px = n_top_px + ab_le + (need_right ? txhpx : 0);
1525         av1_highbd_filter_intra_edge(above_row - ab_le, n_px, strength);
1526       }
1527       if (need_left && n_left_px > 0) {
1528         const int strength = intra_edge_filter_strength(
1529             txhpx, txwpx, p_angle - 180, intra_edge_filter_type);
1530         const int n_px = n_left_px + ab_le + (need_bottom ? txwpx : 0);
1531         av1_highbd_filter_intra_edge(left_col - ab_le, n_px, strength);
1532       }
1533     }
1534     upsample_above = av1_use_intra_edge_upsample(txwpx, txhpx, p_angle - 90,
1535                                                  intra_edge_filter_type);
1536     if (need_above && upsample_above) {
1537       const int n_px = txwpx + (need_right ? txhpx : 0);
1538       av1_highbd_upsample_intra_edge(above_row, n_px, bit_depth);
1539     }
1540     upsample_left = av1_use_intra_edge_upsample(txhpx, txwpx, p_angle - 180,
1541                                                 intra_edge_filter_type);
1542     if (need_left && upsample_left) {
1543       const int n_px = txhpx + (need_bottom ? txwpx : 0);
1544       av1_highbd_upsample_intra_edge(left_col, n_px, bit_depth);
1545     }
1546   }
1547   highbd_dr_predictor(dst, dst_stride, tx_size, above_row, left_col,
1548                       upsample_above, upsample_left, p_angle, bit_depth);
1549 }
1550 
1551 // For HBD encode/decode, this function generates the pred data of a given
1552 // block for non-directional intra prediction modes (i.e., DC, SMOOTH, SMOOTH_H,
1553 // SMOOTH_V and PAETH).
highbd_build_non_directional_intra_predictors(const uint8_t * ref8,int ref_stride,uint8_t * dst8,int dst_stride,PREDICTION_MODE mode,TX_SIZE tx_size,int n_top_px,int n_left_px,int bit_depth)1554 static void highbd_build_non_directional_intra_predictors(
1555     const uint8_t *ref8, int ref_stride, uint8_t *dst8, int dst_stride,
1556     PREDICTION_MODE mode, TX_SIZE tx_size, int n_top_px, int n_left_px,
1557     int bit_depth) {
1558   int i = 0;
1559   uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
1560   const uint16_t *const ref = CONVERT_TO_SHORTPTR(ref8);
1561   const int txwpx = tx_size_wide[tx_size];
1562   const int txhpx = tx_size_high[tx_size];
1563   int need_left = extend_modes[mode] & NEED_LEFT;
1564   int need_above = extend_modes[mode] & NEED_ABOVE;
1565   int need_above_left = extend_modes[mode] & NEED_ABOVELEFT;
1566   const uint16_t *above_ref = ref - ref_stride;
1567   const uint16_t *left_ref = ref - 1;
1568   const int base = 128 << (bit_depth - 8);
1569 
1570   assert(n_top_px >= 0);
1571   assert(n_left_px >= 0);
1572   assert(mode == DC_PRED || mode == SMOOTH_PRED || mode == SMOOTH_V_PRED ||
1573          mode == SMOOTH_H_PRED || mode == PAETH_PRED);
1574 
1575   if ((!need_above && n_left_px == 0) || (!need_left && n_top_px == 0)) {
1576     int val = 0;
1577     if (need_left) {
1578       val = (n_top_px > 0) ? above_ref[0] : base + 1;
1579     } else {
1580       val = (n_left_px > 0) ? left_ref[0] : base - 1;
1581     }
1582     for (i = 0; i < txhpx; ++i) {
1583       aom_memset16(dst, val, txwpx);
1584       dst += dst_stride;
1585     }
1586     return;
1587   }
1588 
1589   DECLARE_ALIGNED(16, uint16_t, left_data[NUM_INTRA_NEIGHBOUR_PIXELS]);
1590   DECLARE_ALIGNED(16, uint16_t, above_data[NUM_INTRA_NEIGHBOUR_PIXELS]);
1591   uint16_t *const above_row = above_data + 16;
1592   uint16_t *const left_col = left_data + 16;
1593 
1594   if (need_left) {
1595     aom_memset16(left_data, base + 1, NUM_INTRA_NEIGHBOUR_PIXELS);
1596     if (n_left_px > 0) {
1597       for (i = 0; i < n_left_px; i++) left_col[i] = left_ref[i * ref_stride];
1598       if (i < txhpx) aom_memset16(&left_col[i], left_col[i - 1], txhpx - i);
1599     } else if (n_top_px > 0) {
1600       aom_memset16(left_col, above_ref[0], txhpx);
1601     }
1602   }
1603 
1604   if (need_above) {
1605     aom_memset16(above_data, base - 1, NUM_INTRA_NEIGHBOUR_PIXELS);
1606     if (n_top_px > 0) {
1607       memcpy(above_row, above_ref, n_top_px * sizeof(above_ref[0]));
1608       i = n_top_px;
1609       if (i < txwpx) aom_memset16(&above_row[i], above_row[i - 1], (txwpx - i));
1610     } else if (n_left_px > 0) {
1611       aom_memset16(above_row, left_ref[0], txwpx);
1612     }
1613   }
1614 
1615   if (need_above_left) {
1616     if (n_top_px > 0 && n_left_px > 0) {
1617       above_row[-1] = above_ref[-1];
1618     } else if (n_top_px > 0) {
1619       above_row[-1] = above_ref[0];
1620     } else if (n_left_px > 0) {
1621       above_row[-1] = left_ref[0];
1622     } else {
1623       above_row[-1] = base;
1624     }
1625     left_col[-1] = above_row[-1];
1626   }
1627 
1628   if (mode == DC_PRED) {
1629     dc_pred_high[n_left_px > 0][n_top_px > 0][tx_size](
1630         dst, dst_stride, above_row, left_col, bit_depth);
1631   } else {
1632     pred_high[mode][tx_size](dst, dst_stride, above_row, left_col, bit_depth);
1633   }
1634 }
1635 #endif  // CONFIG_AV1_HIGHBITDEPTH
1636 
scale_chroma_bsize(BLOCK_SIZE bsize,int subsampling_x,int subsampling_y)1637 static inline BLOCK_SIZE scale_chroma_bsize(BLOCK_SIZE bsize, int subsampling_x,
1638                                             int subsampling_y) {
1639   assert(subsampling_x >= 0 && subsampling_x < 2);
1640   assert(subsampling_y >= 0 && subsampling_y < 2);
1641   BLOCK_SIZE bs = bsize;
1642   switch (bsize) {
1643     case BLOCK_4X4:
1644       if (subsampling_x == 1 && subsampling_y == 1)
1645         bs = BLOCK_8X8;
1646       else if (subsampling_x == 1)
1647         bs = BLOCK_8X4;
1648       else if (subsampling_y == 1)
1649         bs = BLOCK_4X8;
1650       break;
1651     case BLOCK_4X8:
1652       if (subsampling_x == 1 && subsampling_y == 1)
1653         bs = BLOCK_8X8;
1654       else if (subsampling_x == 1)
1655         bs = BLOCK_8X8;
1656       else if (subsampling_y == 1)
1657         bs = BLOCK_4X8;
1658       break;
1659     case BLOCK_8X4:
1660       if (subsampling_x == 1 && subsampling_y == 1)
1661         bs = BLOCK_8X8;
1662       else if (subsampling_x == 1)
1663         bs = BLOCK_8X4;
1664       else if (subsampling_y == 1)
1665         bs = BLOCK_8X8;
1666       break;
1667     case BLOCK_4X16:
1668       if (subsampling_x == 1 && subsampling_y == 1)
1669         bs = BLOCK_8X16;
1670       else if (subsampling_x == 1)
1671         bs = BLOCK_8X16;
1672       else if (subsampling_y == 1)
1673         bs = BLOCK_4X16;
1674       break;
1675     case BLOCK_16X4:
1676       if (subsampling_x == 1 && subsampling_y == 1)
1677         bs = BLOCK_16X8;
1678       else if (subsampling_x == 1)
1679         bs = BLOCK_16X4;
1680       else if (subsampling_y == 1)
1681         bs = BLOCK_16X8;
1682       break;
1683     default: break;
1684   }
1685   return bs;
1686 }
1687 
av1_predict_intra_block(const MACROBLOCKD * xd,BLOCK_SIZE sb_size,int enable_intra_edge_filter,int wpx,int hpx,TX_SIZE tx_size,PREDICTION_MODE mode,int angle_delta,int use_palette,FILTER_INTRA_MODE filter_intra_mode,const uint8_t * ref,int ref_stride,uint8_t * dst,int dst_stride,int col_off,int row_off,int plane)1688 void av1_predict_intra_block(const MACROBLOCKD *xd, BLOCK_SIZE sb_size,
1689                              int enable_intra_edge_filter, int wpx, int hpx,
1690                              TX_SIZE tx_size, PREDICTION_MODE mode,
1691                              int angle_delta, int use_palette,
1692                              FILTER_INTRA_MODE filter_intra_mode,
1693                              const uint8_t *ref, int ref_stride, uint8_t *dst,
1694                              int dst_stride, int col_off, int row_off,
1695                              int plane) {
1696   const MB_MODE_INFO *const mbmi = xd->mi[0];
1697   const int txwpx = tx_size_wide[tx_size];
1698   const int txhpx = tx_size_high[tx_size];
1699   const int x = col_off << MI_SIZE_LOG2;
1700   const int y = row_off << MI_SIZE_LOG2;
1701   const int is_hbd = is_cur_buf_hbd(xd);
1702 
1703   assert(mode < INTRA_MODES);
1704 
1705   if (use_palette) {
1706     int r, c;
1707     const uint8_t *const map = xd->plane[plane != 0].color_index_map +
1708                                xd->color_index_map_offset[plane != 0];
1709     const uint16_t *const palette =
1710         mbmi->palette_mode_info.palette_colors + plane * PALETTE_MAX_SIZE;
1711     if (is_hbd) {
1712       uint16_t *dst16 = CONVERT_TO_SHORTPTR(dst);
1713       for (r = 0; r < txhpx; ++r) {
1714         for (c = 0; c < txwpx; ++c) {
1715           dst16[r * dst_stride + c] = palette[map[(r + y) * wpx + c + x]];
1716         }
1717       }
1718     } else {
1719       for (r = 0; r < txhpx; ++r) {
1720         for (c = 0; c < txwpx; ++c) {
1721           dst[r * dst_stride + c] =
1722               (uint8_t)palette[map[(r + y) * wpx + c + x]];
1723         }
1724       }
1725     }
1726     return;
1727   }
1728 
1729   const struct macroblockd_plane *const pd = &xd->plane[plane];
1730   const int ss_x = pd->subsampling_x;
1731   const int ss_y = pd->subsampling_y;
1732   const int have_top =
1733       row_off || (ss_y ? xd->chroma_up_available : xd->up_available);
1734   const int have_left =
1735       col_off || (ss_x ? xd->chroma_left_available : xd->left_available);
1736 
1737   // Distance between the right edge of this prediction block to
1738   // the frame right edge
1739   const int xr = (xd->mb_to_right_edge >> (3 + ss_x)) + wpx - x - txwpx;
1740   // Distance between the bottom edge of this prediction block to
1741   // the frame bottom edge
1742   const int yd = (xd->mb_to_bottom_edge >> (3 + ss_y)) + hpx - y - txhpx;
1743   const int use_filter_intra = filter_intra_mode != FILTER_INTRA_MODES;
1744   const int is_dr_mode = av1_is_directional_mode(mode);
1745 
1746   // The computations in this function, as well as in build_intra_predictors(),
1747   // are generalized for all intra modes. Some of these operations are not
1748   // required since non-directional intra modes (i.e., DC, SMOOTH, SMOOTH_H,
1749   // SMOOTH_V, and PAETH) specifically require left and top neighbors. Hence, a
1750   // separate function build_non_directional_intra_predictors() is introduced
1751   // for these modes to avoid redundant computations while generating pred data.
1752 
1753   const int n_top_px = have_top ? AOMMIN(txwpx, xr + txwpx) : 0;
1754   const int n_left_px = have_left ? AOMMIN(txhpx, yd + txhpx) : 0;
1755   if (!use_filter_intra && !is_dr_mode) {
1756 #if CONFIG_AV1_HIGHBITDEPTH
1757     if (is_hbd) {
1758       highbd_build_non_directional_intra_predictors(
1759           ref, ref_stride, dst, dst_stride, mode, tx_size, n_top_px, n_left_px,
1760           xd->bd);
1761       return;
1762     }
1763 #endif  // CONFIG_AV1_HIGHBITDEPTH
1764     build_non_directional_intra_predictors(ref, ref_stride, dst, dst_stride,
1765                                            mode, tx_size, n_top_px, n_left_px);
1766     return;
1767   }
1768 
1769   const int txw = tx_size_wide_unit[tx_size];
1770   const int txh = tx_size_high_unit[tx_size];
1771   const int mi_row = -xd->mb_to_top_edge >> (3 + MI_SIZE_LOG2);
1772   const int mi_col = -xd->mb_to_left_edge >> (3 + MI_SIZE_LOG2);
1773   const int right_available =
1774       mi_col + ((col_off + txw) << ss_x) < xd->tile.mi_col_end;
1775   const int bottom_available =
1776       (yd > 0) && (mi_row + ((row_off + txh) << ss_y) < xd->tile.mi_row_end);
1777 
1778   const PARTITION_TYPE partition = mbmi->partition;
1779 
1780   BLOCK_SIZE bsize = mbmi->bsize;
1781   // force 4x4 chroma component block size.
1782   if (ss_x || ss_y) {
1783     bsize = scale_chroma_bsize(bsize, ss_x, ss_y);
1784   }
1785 
1786   int p_angle = 0;
1787   int need_top_right = extend_modes[mode] & NEED_ABOVERIGHT;
1788   int need_bottom_left = extend_modes[mode] & NEED_BOTTOMLEFT;
1789 
1790   if (use_filter_intra) {
1791     need_top_right = 0;
1792     need_bottom_left = 0;
1793   }
1794   if (is_dr_mode) {
1795     p_angle = mode_to_angle_map[mode] + angle_delta;
1796     need_top_right = p_angle < 90;
1797     need_bottom_left = p_angle > 180;
1798   }
1799 
1800   // Possible states for have_top_right(TR) and have_bottom_left(BL)
1801   // -1 : TR and BL are not needed
1802   //  0 : TR and BL are needed but not available
1803   // > 0 : TR and BL are needed and pixels are available
1804   const int have_top_right =
1805       need_top_right ? has_top_right(sb_size, bsize, mi_row, mi_col, have_top,
1806                                      right_available, partition, tx_size,
1807                                      row_off, col_off, ss_x, ss_y)
1808                      : -1;
1809   const int have_bottom_left =
1810       need_bottom_left ? has_bottom_left(sb_size, bsize, mi_row, mi_col,
1811                                          bottom_available, have_left, partition,
1812                                          tx_size, row_off, col_off, ss_x, ss_y)
1813                        : -1;
1814 
1815   const int disable_edge_filter = !enable_intra_edge_filter;
1816   const int intra_edge_filter_type = get_intra_edge_filter_type(xd, plane);
1817   const int n_topright_px =
1818       have_top_right > 0 ? AOMMIN(txwpx, xr) : have_top_right;
1819   const int n_bottomleft_px =
1820       have_bottom_left > 0 ? AOMMIN(txhpx, yd) : have_bottom_left;
1821 #if CONFIG_AV1_HIGHBITDEPTH
1822   if (is_hbd) {
1823     highbd_build_directional_and_filter_intra_predictors(
1824         ref, ref_stride, dst, dst_stride, mode, p_angle, filter_intra_mode,
1825         tx_size, disable_edge_filter, n_top_px, n_topright_px, n_left_px,
1826         n_bottomleft_px, intra_edge_filter_type, xd->bd);
1827     return;
1828   }
1829 #endif
1830   build_directional_and_filter_intra_predictors(
1831       ref, ref_stride, dst, dst_stride, mode, p_angle, filter_intra_mode,
1832       tx_size, disable_edge_filter, n_top_px, n_topright_px, n_left_px,
1833       n_bottomleft_px, intra_edge_filter_type);
1834 }
1835 
av1_predict_intra_block_facade(const AV1_COMMON * cm,MACROBLOCKD * xd,int plane,int blk_col,int blk_row,TX_SIZE tx_size)1836 void av1_predict_intra_block_facade(const AV1_COMMON *cm, MACROBLOCKD *xd,
1837                                     int plane, int blk_col, int blk_row,
1838                                     TX_SIZE tx_size) {
1839   const MB_MODE_INFO *const mbmi = xd->mi[0];
1840   struct macroblockd_plane *const pd = &xd->plane[plane];
1841   const int dst_stride = pd->dst.stride;
1842   uint8_t *dst = &pd->dst.buf[(blk_row * dst_stride + blk_col) << MI_SIZE_LOG2];
1843   const PREDICTION_MODE mode =
1844       (plane == AOM_PLANE_Y) ? mbmi->mode : get_uv_mode(mbmi->uv_mode);
1845   const int use_palette = mbmi->palette_mode_info.palette_size[plane != 0] > 0;
1846   const FILTER_INTRA_MODE filter_intra_mode =
1847       (plane == AOM_PLANE_Y && mbmi->filter_intra_mode_info.use_filter_intra)
1848           ? mbmi->filter_intra_mode_info.filter_intra_mode
1849           : FILTER_INTRA_MODES;
1850   const int angle_delta = mbmi->angle_delta[plane != AOM_PLANE_Y] * ANGLE_STEP;
1851   const SequenceHeader *seq_params = cm->seq_params;
1852 
1853 #if !CONFIG_REALTIME_ONLY || CONFIG_AV1_DECODER
1854   if (plane != AOM_PLANE_Y && mbmi->uv_mode == UV_CFL_PRED) {
1855 #if CONFIG_DEBUG
1856     assert(is_cfl_allowed(xd));
1857     const BLOCK_SIZE plane_bsize =
1858         get_plane_block_size(mbmi->bsize, pd->subsampling_x, pd->subsampling_y);
1859     (void)plane_bsize;
1860     assert(plane_bsize < BLOCK_SIZES_ALL);
1861     if (!xd->lossless[mbmi->segment_id]) {
1862       assert(blk_col == 0);
1863       assert(blk_row == 0);
1864       assert(block_size_wide[plane_bsize] == tx_size_wide[tx_size]);
1865       assert(block_size_high[plane_bsize] == tx_size_high[tx_size]);
1866     }
1867 #endif
1868     CFL_CTX *const cfl = &xd->cfl;
1869     CFL_PRED_TYPE pred_plane = get_cfl_pred_type(plane);
1870     if (!cfl->dc_pred_is_cached[pred_plane]) {
1871       av1_predict_intra_block(xd, seq_params->sb_size,
1872                               seq_params->enable_intra_edge_filter, pd->width,
1873                               pd->height, tx_size, mode, angle_delta,
1874                               use_palette, filter_intra_mode, dst, dst_stride,
1875                               dst, dst_stride, blk_col, blk_row, plane);
1876       if (cfl->use_dc_pred_cache) {
1877         cfl_store_dc_pred(xd, dst, pred_plane, tx_size_wide[tx_size]);
1878         cfl->dc_pred_is_cached[pred_plane] = true;
1879       }
1880     } else {
1881       cfl_load_dc_pred(xd, dst, dst_stride, tx_size, pred_plane);
1882     }
1883     av1_cfl_predict_block(xd, dst, dst_stride, tx_size, plane);
1884     return;
1885   }
1886 #endif  // !CONFIG_REALTIME_ONLY || CONFIG_AV1_DECODER
1887   av1_predict_intra_block(
1888       xd, seq_params->sb_size, seq_params->enable_intra_edge_filter, pd->width,
1889       pd->height, tx_size, mode, angle_delta, use_palette, filter_intra_mode,
1890       dst, dst_stride, dst, dst_stride, blk_col, blk_row, plane);
1891 }
1892 
av1_init_intra_predictors(void)1893 void av1_init_intra_predictors(void) {
1894   aom_once(init_intra_predictors_internal);
1895 }
1896