xref: /aosp_15_r20/external/libaom/av1/common/av1_txfm.c (revision 77c1e3ccc04c968bd2bc212e87364f250e820521)
1 /*
2  * Copyright (c) 2017, Alliance for Open Media. All rights reserved.
3  *
4  * This source code is subject to the terms of the BSD 2 Clause License and
5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6  * was not distributed with this source code in the LICENSE file, you can
7  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8  * Media Patent License 1.0 was not distributed with this source code in the
9  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10  */
11 
12 #include "config/aom_dsp_rtcd.h"
13 #include "config/av1_rtcd.h"
14 
15 #include "av1/common/av1_txfm.h"
16 
17 // av1_cospi_arr[i][j] = (int)round(cos(PI*j/128) * (1<<(cos_bit_min+i)));
18 const int32_t av1_cospi_arr_data[4][64] = {
19   { 1024, 1024, 1023, 1021, 1019, 1016, 1013, 1009, 1004, 999, 993, 987, 980,
20     972,  964,  955,  946,  936,  926,  915,  903,  891,  878, 865, 851, 837,
21     822,  807,  792,  775,  759,  742,  724,  706,  688,  669, 650, 630, 610,
22     590,  569,  548,  526,  505,  483,  460,  438,  415,  392, 369, 345, 321,
23     297,  273,  249,  224,  200,  175,  150,  125,  100,  75,  50,  25 },
24   { 2048, 2047, 2046, 2042, 2038, 2033, 2026, 2018, 2009, 1998, 1987,
25     1974, 1960, 1945, 1928, 1911, 1892, 1872, 1851, 1829, 1806, 1782,
26     1757, 1730, 1703, 1674, 1645, 1615, 1583, 1551, 1517, 1483, 1448,
27     1412, 1375, 1338, 1299, 1260, 1220, 1179, 1138, 1096, 1053, 1009,
28     965,  921,  876,  830,  784,  737,  690,  642,  595,  546,  498,
29     449,  400,  350,  301,  251,  201,  151,  100,  50 },
30   { 4096, 4095, 4091, 4085, 4076, 4065, 4052, 4036, 4017, 3996, 3973,
31     3948, 3920, 3889, 3857, 3822, 3784, 3745, 3703, 3659, 3612, 3564,
32     3513, 3461, 3406, 3349, 3290, 3229, 3166, 3102, 3035, 2967, 2896,
33     2824, 2751, 2675, 2598, 2520, 2440, 2359, 2276, 2191, 2106, 2019,
34     1931, 1842, 1751, 1660, 1567, 1474, 1380, 1285, 1189, 1092, 995,
35     897,  799,  700,  601,  501,  401,  301,  201,  101 },
36   { 8192, 8190, 8182, 8170, 8153, 8130, 8103, 8071, 8035, 7993, 7946,
37     7895, 7839, 7779, 7713, 7643, 7568, 7489, 7405, 7317, 7225, 7128,
38     7027, 6921, 6811, 6698, 6580, 6458, 6333, 6203, 6070, 5933, 5793,
39     5649, 5501, 5351, 5197, 5040, 4880, 4717, 4551, 4383, 4212, 4038,
40     3862, 3683, 3503, 3320, 3135, 2948, 2760, 2570, 2378, 2185, 1990,
41     1795, 1598, 1401, 1202, 1003, 803,  603,  402,  201 }
42 };
43 
44 // av1_sinpi_arr_data[i][j] = (int)round((sqrt(2) * sin(j*Pi/9) * 2 / 3) * (1
45 // << (cos_bit_min + i))) modified so that elements j=1,2 sum to element j=4.
46 const int32_t av1_sinpi_arr_data[4][5] = { { 0, 330, 621, 836, 951 },
47                                            { 0, 660, 1241, 1672, 1901 },
48                                            { 0, 1321, 2482, 3344, 3803 },
49                                            { 0, 2642, 4964, 6689, 7606 } };
50 
51 // The reduced bit-width arrays are only used in the Arm Neon implementations
52 // in av1_fwd_txfm2d_neon.c for now.
53 #if HAVE_NEON
54 // Constants are stored in groups of four, where symmetrical constants in the
55 // cospi array are stored adjacent in memory, followed immediately by the same
56 // constants but negated, i.e.:
57 //   f(i,j) = (int)round(cos(PI*j/128) * (1<<(cos_bit_min+i))) << (3-i)
58 // and then in memory we store 4-tuples of constants together as:
59 //   f4(i,j) = [ f(i,j), f(i,64-j), -f(i,j), -f(i,64-j) ]
60 //
61 // Constants are stored in Q2.13 format, see:
62 // https://en.wikipedia.org/wiki/Q_(number_format)
63 //
64 // The order of the constants is such that increasing subdivisions of 64 store
65 // f4 tuples contiguously:
66 // av1_cospi_arr_q13_data[i] = {
67 //   f4(i,32),  // f(i,32) twice
68 //   f4(i,16),  // f(i,16) and f(i,48), f4(i,32) skipped since present above.
69 //   f4(i,8), f(i,24), // f4(i,16) and f4(i,32) skipped since present above.
70 //   f4(i,4), f(i,12), f4(i,20), f4(i,28),
71 //   f4(i,2), f4(i,6), f4(i,10), f4(i,14), f4(i,18), ...
72 //   f4(i,1), f4(i,3), f4(i,5), f4(i,7), f4(i,9), f4(i,11), ...
73 // }
74 const int16_t av1_cospi_arr_q13_data[4][128] = {
75   {
76       5792,  5792,  -5792, -5792, 7568,  3136,  -7568, -3136, 8032,  1600,
77       -8032, -1600, 6808,  4552,  -6808, -4552, 8152,  800,   -8152, -800,
78       7840,  2376,  -7840, -2376, 7224,  3864,  -7224, -3864, 6336,  5200,
79       -6336, -5200, 8184,  400,   -8184, -400,  8104,  1200,  -8104, -1200,
80       7944,  1992,  -7944, -1992, 7712,  2760,  -7712, -2760, 7408,  3504,
81       -7408, -3504, 7024,  4208,  -7024, -4208, 6576,  4880,  -6576, -4880,
82       6072,  5504,  -6072, -5504, 8192,  200,   -8192, -200,  8168,  600,
83       -8168, -600,  8128,  1000,  -8128, -1000, 8072,  1400,  -8072, -1400,
84       7992,  1792,  -7992, -1792, 7896,  2184,  -7896, -2184, 7776,  2568,
85       -7776, -2568, 7640,  2952,  -7640, -2952, 7488,  3320,  -7488, -3320,
86       7320,  3680,  -7320, -3680, 7128,  4040,  -7128, -4040, 6920,  4384,
87       -6920, -4384, 6696,  4720,  -6696, -4720, 6456,  5040,  -6456, -5040,
88       6200,  5352,  -6200, -5352, 5936,  5648,  -5936, -5648,
89   },
90   {
91       5792,  5792,  -5792, -5792, 7568,  3136,  -7568, -3136, 8036,  1600,
92       -8036, -1600, 6812,  4552,  -6812, -4552, 8152,  804,   -8152, -804,
93       7840,  2380,  -7840, -2380, 7224,  3860,  -7224, -3860, 6332,  5196,
94       -6332, -5196, 8184,  400,   -8184, -400,  8104,  1204,  -8104, -1204,
95       7948,  1992,  -7948, -1992, 7712,  2760,  -7712, -2760, 7404,  3504,
96       -7404, -3504, 7028,  4212,  -7028, -4212, 6580,  4880,  -6580, -4880,
97       6068,  5500,  -6068, -5500, 8188,  200,   -8188, -200,  8168,  604,
98       -8168, -604,  8132,  1004,  -8132, -1004, 8072,  1400,  -8072, -1400,
99       7992,  1796,  -7992, -1796, 7896,  2184,  -7896, -2184, 7780,  2568,
100       -7780, -2568, 7644,  2948,  -7644, -2948, 7488,  3320,  -7488, -3320,
101       7316,  3684,  -7316, -3684, 7128,  4036,  -7128, -4036, 6920,  4384,
102       -6920, -4384, 6696,  4716,  -6696, -4716, 6460,  5040,  -6460, -5040,
103       6204,  5352,  -6204, -5352, 5932,  5648,  -5932, -5648,
104   },
105   {
106       5792,  5792,  -5792, -5792, 7568,  3134,  -7568, -3134, 8034,  1598,
107       -8034, -1598, 6812,  4552,  -6812, -4552, 8152,  802,   -8152, -802,
108       7840,  2378,  -7840, -2378, 7224,  3862,  -7224, -3862, 6332,  5196,
109       -6332, -5196, 8182,  402,   -8182, -402,  8104,  1202,  -8104, -1202,
110       7946,  1990,  -7946, -1990, 7714,  2760,  -7714, -2760, 7406,  3502,
111       -7406, -3502, 7026,  4212,  -7026, -4212, 6580,  4880,  -6580, -4880,
112       6070,  5502,  -6070, -5502, 8190,  202,   -8190, -202,  8170,  602,
113       -8170, -602,  8130,  1002,  -8130, -1002, 8072,  1400,  -8072, -1400,
114       7992,  1794,  -7992, -1794, 7896,  2184,  -7896, -2184, 7778,  2570,
115       -7778, -2570, 7644,  2948,  -7644, -2948, 7490,  3320,  -7490, -3320,
116       7318,  3684,  -7318, -3684, 7128,  4038,  -7128, -4038, 6922,  4382,
117       -6922, -4382, 6698,  4718,  -6698, -4718, 6458,  5040,  -6458, -5040,
118       6204,  5350,  -6204, -5350, 5934,  5648,  -5934, -5648,
119   },
120   {
121       5793,  5793,  -5793, -5793, 7568,  3135,  -7568, -3135, 8035,  1598,
122       -8035, -1598, 6811,  4551,  -6811, -4551, 8153,  803,   -8153, -803,
123       7839,  2378,  -7839, -2378, 7225,  3862,  -7225, -3862, 6333,  5197,
124       -6333, -5197, 8182,  402,   -8182, -402,  8103,  1202,  -8103, -1202,
125       7946,  1990,  -7946, -1990, 7713,  2760,  -7713, -2760, 7405,  3503,
126       -7405, -3503, 7027,  4212,  -7027, -4212, 6580,  4880,  -6580, -4880,
127       6070,  5501,  -6070, -5501, 8190,  201,   -8190, -201,  8170,  603,
128       -8170, -603,  8130,  1003,  -8130, -1003, 8071,  1401,  -8071, -1401,
129       7993,  1795,  -7993, -1795, 7895,  2185,  -7895, -2185, 7779,  2570,
130       -7779, -2570, 7643,  2948,  -7643, -2948, 7489,  3320,  -7489, -3320,
131       7317,  3683,  -7317, -3683, 7128,  4038,  -7128, -4038, 6921,  4383,
132       -6921, -4383, 6698,  4717,  -6698, -4717, 6458,  5040,  -6458, -5040,
133       6203,  5351,  -6203, -5351, 5933,  5649,  -5933, -5649,
134   }
135 };
136 
137 // av1_sinpi_arr_q13_data[i][j] =
138 //   round((sqrt2 * sin((j+1)*Pi/9) * 2/3) * (1 << (cos_bit_min + i))) << (3-i)
139 // modified so that elements j=0,1 sum to element j=3.
140 // See also: https://en.wikipedia.org/wiki/Q_(number_format)
141 const int16_t av1_sinpi_arr_q13_data[4][4] = { { 2640, 4968, 6688, 7608 },
142                                                { 2640, 4964, 6688, 7604 },
143                                                { 2642, 4964, 6688, 7606 },
144                                                { 2642, 4964, 6689, 7606 } };
145 
146 // Constants are stored in pairs, where symmetrical constants in the
147 // cospi array are stored adjacent in memory, i.e.:
148 //   f(i,j) = (int)round(cos(PI*j/128) * (1<<(cos_bit_min+i)))
149 // and then in memory we store 4-tuples of constants together as:
150 //   f2(i,j) = [ f(i,j), f(i,64-j) ]
151 const int32_t av1_cospi_arr_s32_data[4][66] = {
152   {
153       1024, 0,    1024, 25,   1023, 50,   1021, 75,  1019, 100, 1016,
154       125,  1013, 150,  1009, 175,  1004, 200,  999, 224,  993, 249,
155       987,  273,  980,  297,  972,  321,  964,  345, 955,  369, 946,
156       392,  936,  415,  926,  438,  915,  460,  903, 483,  891, 505,
157       878,  526,  865,  548,  851,  569,  837,  590, 822,  610, 807,
158       630,  792,  650,  775,  669,  759,  688,  742, 706,  724, 724,
159   },
160   {
161       2048, 0,    2047, 50,   2046, 100,  2042, 151,  2038, 201,  2033,
162       251,  2026, 301,  2018, 350,  2009, 400,  1998, 449,  1987, 498,
163       1974, 546,  1960, 595,  1945, 642,  1928, 690,  1911, 737,  1892,
164       784,  1872, 830,  1851, 876,  1829, 921,  1806, 965,  1782, 1009,
165       1757, 1053, 1730, 1096, 1703, 1138, 1674, 1179, 1645, 1220, 1615,
166       1260, 1583, 1299, 1551, 1338, 1517, 1375, 1483, 1412, 1448, 1448,
167   },
168   {
169       4096, 0,    4095, 101,  4091, 201,  4085, 301,  4076, 401,  4065,
170       501,  4052, 601,  4036, 700,  4017, 799,  3996, 897,  3973, 995,
171       3948, 1092, 3920, 1189, 3889, 1285, 3857, 1380, 3822, 1474, 3784,
172       1567, 3745, 1660, 3703, 1751, 3659, 1842, 3612, 1931, 3564, 2019,
173       3513, 2106, 3461, 2191, 3406, 2276, 3349, 2359, 3290, 2440, 3229,
174       2520, 3166, 2598, 3102, 2675, 3035, 2751, 2967, 2824, 2896, 2896,
175   },
176   {
177       8192, 0,    8190, 201,  8182, 402,  8170, 603,  8153, 803,  8130,
178       1003, 8103, 1202, 8071, 1401, 8035, 1598, 7993, 1795, 7946, 1990,
179       7895, 2185, 7839, 2378, 7779, 2570, 7713, 2760, 7643, 2948, 7568,
180       3135, 7489, 3320, 7405, 3503, 7317, 3683, 7225, 3862, 7128, 4038,
181       7027, 4212, 6921, 4383, 6811, 4551, 6698, 4717, 6580, 4880, 6458,
182       5040, 6333, 5197, 6203, 5351, 6070, 5501, 5933, 5649, 5793, 5793,
183   }
184 };
185 
186 #endif  // HAVE_NEON
187 
av1_round_shift_array_c(int32_t * arr,int size,int bit)188 void av1_round_shift_array_c(int32_t *arr, int size, int bit) {
189   int i;
190   if (bit == 0) {
191     return;
192   } else {
193     if (bit > 0) {
194       for (i = 0; i < size; i++) {
195         arr[i] = round_shift(arr[i], bit);
196       }
197     } else {
198       for (i = 0; i < size; i++) {
199         arr[i] = (int32_t)clamp64(((int64_t)1 << (-bit)) * arr[i], INT32_MIN,
200                                   INT32_MAX);
201       }
202     }
203   }
204 }
205 
206 const TXFM_TYPE av1_txfm_type_ls[5][TX_TYPES_1D] = {
207   { TXFM_TYPE_DCT4, TXFM_TYPE_ADST4, TXFM_TYPE_ADST4, TXFM_TYPE_IDENTITY4 },
208   { TXFM_TYPE_DCT8, TXFM_TYPE_ADST8, TXFM_TYPE_ADST8, TXFM_TYPE_IDENTITY8 },
209   { TXFM_TYPE_DCT16, TXFM_TYPE_ADST16, TXFM_TYPE_ADST16, TXFM_TYPE_IDENTITY16 },
210   { TXFM_TYPE_DCT32, TXFM_TYPE_INVALID, TXFM_TYPE_INVALID,
211     TXFM_TYPE_IDENTITY32 },
212   { TXFM_TYPE_DCT64, TXFM_TYPE_INVALID, TXFM_TYPE_INVALID, TXFM_TYPE_INVALID }
213 };
214 
215 const int8_t av1_txfm_stage_num_list[TXFM_TYPES] = {
216   4,   // TXFM_TYPE_DCT4
217   6,   // TXFM_TYPE_DCT8
218   8,   // TXFM_TYPE_DCT16
219   10,  // TXFM_TYPE_DCT32
220   12,  // TXFM_TYPE_DCT64
221   7,   // TXFM_TYPE_ADST4
222   8,   // TXFM_TYPE_ADST8
223   10,  // TXFM_TYPE_ADST16
224   1,   // TXFM_TYPE_IDENTITY4
225   1,   // TXFM_TYPE_IDENTITY8
226   1,   // TXFM_TYPE_IDENTITY16
227   1,   // TXFM_TYPE_IDENTITY32
228 };
229 
av1_range_check_buf(int32_t stage,const int32_t * input,const int32_t * buf,int32_t size,int8_t bit)230 void av1_range_check_buf(int32_t stage, const int32_t *input,
231                          const int32_t *buf, int32_t size, int8_t bit) {
232 #if CONFIG_COEFFICIENT_RANGE_CHECKING
233   const int64_t max_value = (1LL << (bit - 1)) - 1;
234   const int64_t min_value = -(1LL << (bit - 1));
235 
236   int in_range = 1;
237 
238   for (int i = 0; i < size; ++i) {
239     if (buf[i] < min_value || buf[i] > max_value) {
240       in_range = 0;
241     }
242   }
243 
244   if (!in_range) {
245     fprintf(stderr, "Error: coeffs contain out-of-range values\n");
246     fprintf(stderr, "size: %d\n", size);
247     fprintf(stderr, "stage: %d\n", stage);
248     fprintf(stderr, "allowed range: [%" PRId64 ";%" PRId64 "]\n", min_value,
249             max_value);
250 
251     fprintf(stderr, "coeffs: ");
252 
253     fprintf(stderr, "[");
254     for (int j = 0; j < size; j++) {
255       if (j > 0) fprintf(stderr, ", ");
256       fprintf(stderr, "%d", input[j]);
257     }
258     fprintf(stderr, "]\n");
259 
260     fprintf(stderr, "   buf: ");
261 
262     fprintf(stderr, "[");
263     for (int j = 0; j < size; j++) {
264       if (j > 0) fprintf(stderr, ", ");
265       fprintf(stderr, "%d", buf[j]);
266     }
267     fprintf(stderr, "]\n\n");
268   }
269 
270   assert(in_range);
271 #else
272   (void)stage;
273   (void)input;
274   (void)buf;
275   (void)size;
276   (void)bit;
277 #endif
278 }
279