1 /*
2  * Copyright 2016 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: AMD
23  *
24  */
25 
26 #include "dc.h"
27 #include "opp.h"
28 #include "color_gamma.h"
29 
30 /* When calculating LUT values the first region and at least one subsequent
31  * region are calculated with full precision. These defines are a demarcation
32  * of where the second region starts and ends.
33  * These are hardcoded values to avoid recalculating them in loops.
34  */
35 #define PRECISE_LUT_REGION_START 224
36 #define PRECISE_LUT_REGION_END 239
37 
38 static struct hw_x_point coordinates_x[MAX_HW_POINTS + 2];
39 
40 // Hardcoded table that depends on setup_x_points_distribution and sdr_level=80
41 // If x points are changed, then PQ Y points will be misaligned and a new
42 // table would need to be generated. Or use old method that calls compute_pq.
43 // The last point is above PQ formula range (0-125 in normalized FP16)
44 // The value for the last point (128) is such that interpolation from
45 // 120 to 128 will give 1.0 for X = 125.0
46 // first couple points are 0 - HW LUT is mirrored around zero, so making first
47 // segment 0 to 0 will effectively clip it, and these are very low PQ codes
48 // min nonzero value below (216825) is a little under 12-bit PQ code 1.
49 static const unsigned long long pq_divider = 1000000000;
50 static const unsigned long long pq_numerator[MAX_HW_POINTS + 1] = {
51 		0, 0, 0, 0, 216825, 222815,
52 		228691, 234460, 240128, 245702, 251187, 256587,
53 		261908, 267152, 272324, 277427, 282465, 292353,
54 		302011, 311456, 320704, 329768, 338661, 347394,
55 		355975, 364415, 372721, 380900, 388959, 396903,
56 		404739, 412471, 420104, 435089, 449727, 464042,
57 		478060, 491800, 505281, 518520, 531529, 544324,
58 		556916, 569316, 581533, 593576, 605454, 617175,
59 		628745, 651459, 673643, 695337, 716578, 737395,
60 		757817, 777869, 797572, 816947, 836012, 854782,
61 		873274, 891500, 909474, 927207, 944709, 979061,
62 		1012601, 1045391, 1077485, 1108931, 1139770, 1170042,
63 		1199778, 1229011, 1257767, 1286071, 1313948, 1341416,
64 		1368497, 1395207, 1421563, 1473272, 1523733, 1573041,
65 		1621279, 1668520, 1714828, 1760262, 1804874, 1848710,
66 		1891814, 1934223, 1975973, 2017096, 2057622, 2097578,
67 		2136989, 2214269, 2289629, 2363216, 2435157, 2505564,
68 		2574539, 2642169, 2708536, 2773711, 2837760, 2900742,
69 		2962712, 3023719, 3083810, 3143025, 3201405, 3315797,
70 		3427246, 3535974, 3642181, 3746038, 3847700, 3947305,
71 		4044975, 4140823, 4234949, 4327445, 4418394, 4507872,
72 		4595951, 4682694, 4768161, 4935487, 5098326, 5257022,
73 		5411878, 5563161, 5711107, 5855928, 5997812, 6136929,
74 		6273436, 6407471, 6539163, 6668629, 6795976, 6921304,
75 		7044703, 7286050, 7520623, 7748950, 7971492, 8188655,
76 		8400800, 8608247, 8811286, 9010175, 9205149, 9396421,
77 		9584186, 9768620, 9949889, 10128140, 10303513, 10646126,
78 		10978648, 11301874, 11616501, 11923142, 12222340, 12514578,
79 		12800290, 13079866, 13353659, 13621988, 13885144, 14143394,
80 		14396982, 14646132, 14891052, 15368951, 15832050, 16281537,
81 		16718448, 17143696, 17558086, 17962337, 18357092, 18742927,
82 		19120364, 19489877, 19851894, 20206810, 20554983, 20896745,
83 		21232399, 21886492, 22519276, 23132491, 23727656, 24306104,
84 		24869013, 25417430, 25952292, 26474438, 26984626, 27483542,
85 		27971811, 28450000, 28918632, 29378184, 29829095, 30706591,
86 		31554022, 32373894, 33168387, 33939412, 34688657, 35417620,
87 		36127636, 36819903, 37495502, 38155408, 38800507, 39431607,
88 		40049446, 40654702, 41247996, 42400951, 43512407, 44585892,
89 		45624474, 46630834, 47607339, 48556082, 49478931, 50377558,
90 		51253467, 52108015, 52942436, 53757848, 54555277, 55335659,
91 		56099856, 57582802, 59009766, 60385607, 61714540, 63000246,
92 		64245964, 65454559, 66628579, 67770304, 68881781, 69964856,
93 		71021203, 72052340, 73059655, 74044414, 75007782, 76874537,
94 		78667536, 80393312, 82057522, 83665098, 85220372, 86727167,
95 		88188883, 89608552, 90988895, 92332363, 93641173, 94917336,
96 		96162685, 97378894, 98567496, 100867409, 103072439, 105191162,
97 		107230989, 109198368, 111098951, 112937723, 114719105, 116447036,
98 		118125045, 119756307, 121343688, 122889787, 124396968, 125867388,
99 		127303021, 130077030, 132731849, 135278464, 137726346, 140083726,
100 		142357803, 144554913, 146680670, 148740067, 150737572, 152677197,
101 		154562560, 156396938, 158183306, 159924378, 161622632, 164899602,
102 		168030318, 171028513, 173906008, 176673051, 179338593, 181910502,
103 		184395731, 186800463, 189130216, 191389941, 193584098, 195716719,
104 		197791463, 199811660, 201780351, 205574133, 209192504, 212652233,
105 		215967720, 219151432, 222214238, 225165676, 228014163, 230767172,
106 		233431363, 236012706, 238516569, 240947800, 243310793, 245609544,
107 		247847696, 252155270, 256257056, 260173059, 263920427, 267513978,
108 		270966613, 274289634, 277493001, 280585542, 283575118, 286468763,
109 		289272796, 291992916, 294634284, 297201585, 299699091, 304500003,
110 		309064541, 313416043, 317574484, 321557096, 325378855, 329052864,
111 		332590655, 336002433, 339297275, 342483294, 345567766, 348557252,
112 		351457680, 354274432, 357012407, 362269536, 367260561, 372012143,
113 		376547060, 380884936, 385042798, 389035522, 392876185, 396576344,
114 		400146265, 403595112, 406931099, 410161619, 413293351, 416332348,
115 		419284117, 424945627, 430313203, 435416697, 440281572, 444929733,
116 		449380160, 453649415, 457752035, 461700854, 465507260, 469181407,
117 		472732388, 476168376, 479496748, 482724188, 485856764, 491858986,
118 		497542280, 502939446, 508078420, 512983199, 517674549, 522170569,
119 		526487126, 530638214, 534636233, 538492233, 542216094, 545816693,
120 		549302035, 552679362, 555955249, 562226134, 568156709, 573782374,
121 		579133244, 584235153, 589110430, 593778512, 598256421, 602559154,
122 		606699989, 610690741, 614541971, 618263157, 621862836, 625348729,
123 		628727839, 635190643, 641295921, 647081261, 652578597, 657815287,
124 		662814957, 667598146, 672182825, 676584810, 680818092, 684895111,
125 		688826974, 692623643, 696294085, 699846401, 703287935, 709864782,
126 		716071394, 721947076, 727525176, 732834238, 737898880, 742740485,
127 		747377745, 751827095, 756103063, 760218552, 764185078, 768012958,
128 		771711474, 775289005, 778753144, 785368225, 791604988, 797503949,
129 		803099452, 808420859, 813493471, 818339244, 822977353, 827424644,
130 		831695997, 835804619, 839762285, 843579541, 847265867, 850829815,
131 		854279128, 860861356, 867061719, 872921445, 878475444, 883753534,
132 		888781386, 893581259, 898172578, 902572393, 906795754, 910856010,
133 		914765057, 918533538, 922171018, 925686119, 929086644, 935571664,
134 		941675560, 947439782, 952899395, 958084324, 963020312, 967729662,
135 		972231821, 976543852, 980680801, 984656009, 988481353, 992167459,
136 		995723865, 999159168, 1002565681};
137 
138 // these are helpers for calculations to reduce stack usage
139 // do not depend on these being preserved across calls
140 
141 /* Helper to optimize gamma calculation, only use in translate_from_linear, in
142  * particular the dc_fixpt_pow function which is very expensive
143  * The idea is that our regions for X points are exponential and currently they all use
144  * the same number of points (NUM_PTS_IN_REGION) and in each region every point
145  * is exactly 2x the one at the same index in the previous region. In other words
146  * X[i] = 2 * X[i-NUM_PTS_IN_REGION] for i>=16
147  * The other fact is that (2x)^gamma = 2^gamma * x^gamma
148  * So we compute and save x^gamma for the first 16 regions, and for every next region
149  * just multiply with 2^gamma which can be computed once, and save the result so we
150  * recursively compute all the values.
151  */
152 
153 /*
154  * Regamma coefficients are used for both regamma and degamma. Degamma
155  * coefficients are calculated in our formula using the regamma coefficients.
156  */
157 									 /*sRGB     709     2.2 2.4 P3*/
158 static const int32_t numerator01[] = { 31308,   180000, 0,  0,  0};
159 static const int32_t numerator02[] = { 12920,   4500,   0,  0,  0};
160 static const int32_t numerator03[] = { 55,      99,     0,  0,  0};
161 static const int32_t numerator04[] = { 55,      99,     0,  0,  0};
162 static const int32_t numerator05[] = { 2400,    2222,   2200, 2400, 2600};
163 
164 /* one-time setup of X points */
setup_x_points_distribution(void)165 void setup_x_points_distribution(void)
166 {
167 	struct fixed31_32 region_size = dc_fixpt_from_int(128);
168 	int32_t segment;
169 	uint32_t seg_offset;
170 	uint32_t index;
171 	struct fixed31_32 increment;
172 
173 	coordinates_x[MAX_HW_POINTS].x = region_size;
174 	coordinates_x[MAX_HW_POINTS + 1].x = region_size;
175 
176 	for (segment = 6; segment > (6 - NUM_REGIONS); segment--) {
177 		region_size = dc_fixpt_div_int(region_size, 2);
178 		increment = dc_fixpt_div_int(region_size,
179 						NUM_PTS_IN_REGION);
180 		seg_offset = (segment + (NUM_REGIONS - 7)) * NUM_PTS_IN_REGION;
181 		coordinates_x[seg_offset].x = region_size;
182 
183 		for (index = seg_offset + 1;
184 				index < seg_offset + NUM_PTS_IN_REGION;
185 				index++) {
186 			coordinates_x[index].x = dc_fixpt_add
187 					(coordinates_x[index-1].x, increment);
188 		}
189 	}
190 }
191 
log_x_points_distribution(struct dal_logger * logger)192 void log_x_points_distribution(struct dal_logger *logger)
193 {
194 	int i = 0;
195 
196 	if (logger != NULL) {
197 		LOG_GAMMA_WRITE("Log X Distribution\n");
198 
199 		for (i = 0; i < MAX_HW_POINTS; i++)
200 			LOG_GAMMA_WRITE("%llu\n", coordinates_x[i].x.value);
201 	}
202 }
203 
compute_pq(struct fixed31_32 in_x,struct fixed31_32 * out_y)204 static void compute_pq(struct fixed31_32 in_x, struct fixed31_32 *out_y)
205 {
206 	/* consts for PQ gamma formula. */
207 	const struct fixed31_32 m1 =
208 		dc_fixpt_from_fraction(159301758, 1000000000);
209 	const struct fixed31_32 m2 =
210 		dc_fixpt_from_fraction(7884375, 100000);
211 	const struct fixed31_32 c1 =
212 		dc_fixpt_from_fraction(8359375, 10000000);
213 	const struct fixed31_32 c2 =
214 		dc_fixpt_from_fraction(188515625, 10000000);
215 	const struct fixed31_32 c3 =
216 		dc_fixpt_from_fraction(186875, 10000);
217 
218 	struct fixed31_32 l_pow_m1;
219 	struct fixed31_32 base;
220 
221 	if (dc_fixpt_lt(in_x, dc_fixpt_zero))
222 		in_x = dc_fixpt_zero;
223 
224 	l_pow_m1 = dc_fixpt_pow(in_x, m1);
225 	base = dc_fixpt_div(
226 			dc_fixpt_add(c1,
227 					(dc_fixpt_mul(c2, l_pow_m1))),
228 			dc_fixpt_add(dc_fixpt_one,
229 					(dc_fixpt_mul(c3, l_pow_m1))));
230 	*out_y = dc_fixpt_pow(base, m2);
231 }
232 
compute_de_pq(struct fixed31_32 in_x,struct fixed31_32 * out_y)233 static void compute_de_pq(struct fixed31_32 in_x, struct fixed31_32 *out_y)
234 {
235 	/* consts for dePQ gamma formula. */
236 	const struct fixed31_32 m1 =
237 		dc_fixpt_from_fraction(159301758, 1000000000);
238 	const struct fixed31_32 m2 =
239 		dc_fixpt_from_fraction(7884375, 100000);
240 	const struct fixed31_32 c1 =
241 		dc_fixpt_from_fraction(8359375, 10000000);
242 	const struct fixed31_32 c2 =
243 		dc_fixpt_from_fraction(188515625, 10000000);
244 	const struct fixed31_32 c3 =
245 		dc_fixpt_from_fraction(186875, 10000);
246 
247 	struct fixed31_32 l_pow_m1;
248 	struct fixed31_32 base, div;
249 	struct fixed31_32 base2;
250 
251 
252 	if (dc_fixpt_lt(in_x, dc_fixpt_zero))
253 		in_x = dc_fixpt_zero;
254 
255 	l_pow_m1 = dc_fixpt_pow(in_x,
256 			dc_fixpt_div(dc_fixpt_one, m2));
257 	base = dc_fixpt_sub(l_pow_m1, c1);
258 
259 	div = dc_fixpt_sub(c2, dc_fixpt_mul(c3, l_pow_m1));
260 
261 	base2 = dc_fixpt_div(base, div);
262 	// avoid complex numbers
263 	if (dc_fixpt_lt(base2, dc_fixpt_zero))
264 		base2 = dc_fixpt_sub(dc_fixpt_zero, base2);
265 
266 
267 	*out_y = dc_fixpt_pow(base2, dc_fixpt_div(dc_fixpt_one, m1));
268 
269 }
270 
271 
272 /* de gamma, non-linear to linear */
compute_hlg_eotf(struct fixed31_32 in_x,struct fixed31_32 * out_y,uint32_t sdr_white_level,uint32_t max_luminance_nits)273 static void compute_hlg_eotf(struct fixed31_32 in_x,
274 		struct fixed31_32 *out_y,
275 		uint32_t sdr_white_level, uint32_t max_luminance_nits)
276 {
277 	struct fixed31_32 a;
278 	struct fixed31_32 b;
279 	struct fixed31_32 c;
280 	struct fixed31_32 threshold;
281 	struct fixed31_32 x;
282 
283 	struct fixed31_32 scaling_factor =
284 			dc_fixpt_from_fraction(max_luminance_nits, sdr_white_level);
285 	a = dc_fixpt_from_fraction(17883277, 100000000);
286 	b = dc_fixpt_from_fraction(28466892, 100000000);
287 	c = dc_fixpt_from_fraction(55991073, 100000000);
288 	threshold = dc_fixpt_from_fraction(1, 2);
289 
290 	if (dc_fixpt_lt(in_x, threshold)) {
291 		x = dc_fixpt_mul(in_x, in_x);
292 		x = dc_fixpt_div_int(x, 3);
293 	} else {
294 		x = dc_fixpt_sub(in_x, c);
295 		x = dc_fixpt_div(x, a);
296 		x = dc_fixpt_exp(x);
297 		x = dc_fixpt_add(x, b);
298 		x = dc_fixpt_div_int(x, 12);
299 	}
300 	*out_y = dc_fixpt_mul(x, scaling_factor);
301 
302 }
303 
304 /* re gamma, linear to non-linear */
compute_hlg_oetf(struct fixed31_32 in_x,struct fixed31_32 * out_y,uint32_t sdr_white_level,uint32_t max_luminance_nits)305 static void compute_hlg_oetf(struct fixed31_32 in_x, struct fixed31_32 *out_y,
306 		uint32_t sdr_white_level, uint32_t max_luminance_nits)
307 {
308 	struct fixed31_32 a;
309 	struct fixed31_32 b;
310 	struct fixed31_32 c;
311 	struct fixed31_32 threshold;
312 	struct fixed31_32 x;
313 
314 	struct fixed31_32 scaling_factor =
315 			dc_fixpt_from_fraction(sdr_white_level, max_luminance_nits);
316 	a = dc_fixpt_from_fraction(17883277, 100000000);
317 	b = dc_fixpt_from_fraction(28466892, 100000000);
318 	c = dc_fixpt_from_fraction(55991073, 100000000);
319 	threshold = dc_fixpt_from_fraction(1, 12);
320 	x = dc_fixpt_mul(in_x, scaling_factor);
321 
322 
323 	if (dc_fixpt_lt(x, threshold)) {
324 		x = dc_fixpt_mul(x, dc_fixpt_from_fraction(3, 1));
325 		*out_y = dc_fixpt_pow(x, dc_fixpt_half);
326 	} else {
327 		x = dc_fixpt_mul(x, dc_fixpt_from_fraction(12, 1));
328 		x = dc_fixpt_sub(x, b);
329 		x = dc_fixpt_log(x);
330 		x = dc_fixpt_mul(a, x);
331 		*out_y = dc_fixpt_add(x, c);
332 	}
333 }
334 
335 
336 /* one-time pre-compute PQ values - only for sdr_white_level 80 */
precompute_pq(void)337 void precompute_pq(void)
338 {
339 	int i;
340 	struct fixed31_32 *pq_table = mod_color_get_table(type_pq_table);
341 
342 	for (i = 0; i <= MAX_HW_POINTS; i++)
343 		pq_table[i] = dc_fixpt_from_fraction(pq_numerator[i], pq_divider);
344 
345 	/* below is old method that uses run-time calculation in fixed pt space */
346 	/* pow function has problems with arguments too small */
347 	/*
348 	struct fixed31_32 x;
349 	const struct hw_x_point *coord_x = coordinates_x + 32;
350 	struct fixed31_32 scaling_factor =
351 			dc_fixpt_from_fraction(80, 10000);
352 
353 	for (i = 0; i < 32; i++)
354 		pq_table[i] = dc_fixpt_zero;
355 
356 	for (i = 32; i <= MAX_HW_POINTS; i++) {
357 		x = dc_fixpt_mul(coord_x->x, scaling_factor);
358 		compute_pq(x, &pq_table[i]);
359 		++coord_x;
360 	}
361 	*/
362 }
363 
364 /* one-time pre-compute dePQ values - only for max pixel value 125 FP16 */
precompute_de_pq(void)365 void precompute_de_pq(void)
366 {
367 	int i;
368 	struct fixed31_32  y;
369 	uint32_t begin_index, end_index;
370 
371 	struct fixed31_32 scaling_factor = dc_fixpt_from_int(125);
372 	struct fixed31_32 *de_pq_table = mod_color_get_table(type_de_pq_table);
373 	/* X points is 2^-25 to 2^7
374 	 * De-gamma X is 2^-12 to 2^0 – we are skipping first -12-(-25) = 13 regions
375 	 */
376 	begin_index = 13 * NUM_PTS_IN_REGION;
377 	end_index = begin_index + 12 * NUM_PTS_IN_REGION;
378 
379 	for (i = 0; i <= begin_index; i++)
380 		de_pq_table[i] = dc_fixpt_zero;
381 
382 	for (; i <= end_index; i++) {
383 		compute_de_pq(coordinates_x[i].x, &y);
384 		de_pq_table[i] = dc_fixpt_mul(y, scaling_factor);
385 	}
386 
387 	for (; i <= MAX_HW_POINTS; i++)
388 		de_pq_table[i] = de_pq_table[i-1];
389 }
390 struct dividers {
391 	struct fixed31_32 divider1;
392 	struct fixed31_32 divider2;
393 	struct fixed31_32 divider3;
394 };
395 
396 
build_coefficients(struct gamma_coefficients * coefficients,enum dc_transfer_func_predefined type)397 static bool build_coefficients(struct gamma_coefficients *coefficients,
398 		enum dc_transfer_func_predefined type)
399 {
400 
401 	uint32_t i = 0;
402 	uint32_t index = 0;
403 	bool ret = true;
404 
405 	if (type == TRANSFER_FUNCTION_SRGB)
406 		index = 0;
407 	else if (type == TRANSFER_FUNCTION_BT709)
408 		index = 1;
409 	else if (type == TRANSFER_FUNCTION_GAMMA22)
410 		index = 2;
411 	else if (type == TRANSFER_FUNCTION_GAMMA24)
412 		index = 3;
413 	else if (type == TRANSFER_FUNCTION_GAMMA26)
414 		index = 4;
415 	else {
416 		ret = false;
417 		goto release;
418 	}
419 
420 	do {
421 		coefficients->a0[i] = dc_fixpt_from_fraction(
422 			numerator01[index], 10000000);
423 		coefficients->a1[i] = dc_fixpt_from_fraction(
424 			numerator02[index], 1000);
425 		coefficients->a2[i] = dc_fixpt_from_fraction(
426 			numerator03[index], 1000);
427 		coefficients->a3[i] = dc_fixpt_from_fraction(
428 			numerator04[index], 1000);
429 		coefficients->user_gamma[i] = dc_fixpt_from_fraction(
430 			numerator05[index], 1000);
431 
432 		++i;
433 	} while (i != ARRAY_SIZE(coefficients->a0));
434 release:
435 	return ret;
436 }
437 
translate_from_linear_space(struct translate_from_linear_space_args * args)438 static struct fixed31_32 translate_from_linear_space(
439 		struct translate_from_linear_space_args *args)
440 {
441 	const struct fixed31_32 one = dc_fixpt_from_int(1);
442 
443 	struct fixed31_32 scratch_1, scratch_2;
444 	struct calculate_buffer *cal_buffer = args->cal_buffer;
445 
446 	if (dc_fixpt_le(one, args->arg))
447 		return one;
448 
449 	if (dc_fixpt_le(args->arg, dc_fixpt_neg(args->a0))) {
450 		scratch_1 = dc_fixpt_add(one, args->a3);
451 		scratch_2 = dc_fixpt_pow(
452 				dc_fixpt_neg(args->arg),
453 				dc_fixpt_recip(args->gamma));
454 		scratch_1 = dc_fixpt_mul(scratch_1, scratch_2);
455 		scratch_1 = dc_fixpt_sub(args->a2, scratch_1);
456 
457 		return scratch_1;
458 	} else if (dc_fixpt_le(args->a0, args->arg)) {
459 		if (cal_buffer->buffer_index == 0) {
460 			cal_buffer->gamma_of_2 = dc_fixpt_pow(dc_fixpt_from_int(2),
461 					dc_fixpt_recip(args->gamma));
462 		}
463 		scratch_1 = dc_fixpt_add(one, args->a3);
464 		/* In the first region (first 16 points) and in the
465 		 * region delimited by START/END we calculate with
466 		 * full precision to avoid error accumulation.
467 		 */
468 		if ((cal_buffer->buffer_index >= PRECISE_LUT_REGION_START &&
469 			cal_buffer->buffer_index <= PRECISE_LUT_REGION_END) ||
470 			(cal_buffer->buffer_index < 16))
471 			scratch_2 = dc_fixpt_pow(args->arg,
472 					dc_fixpt_recip(args->gamma));
473 		else
474 			scratch_2 = dc_fixpt_mul(cal_buffer->gamma_of_2,
475 					cal_buffer->buffer[cal_buffer->buffer_index%16]);
476 
477 		if (cal_buffer->buffer_index != -1) {
478 			cal_buffer->buffer[cal_buffer->buffer_index%16] = scratch_2;
479 			cal_buffer->buffer_index++;
480 		}
481 
482 		scratch_1 = dc_fixpt_mul(scratch_1, scratch_2);
483 		scratch_1 = dc_fixpt_sub(scratch_1, args->a2);
484 
485 		return scratch_1;
486 	} else
487 		return dc_fixpt_mul(args->arg, args->a1);
488 }
489 
490 
translate_from_linear_space_long(struct translate_from_linear_space_args * args)491 static struct fixed31_32 translate_from_linear_space_long(
492 		struct translate_from_linear_space_args *args)
493 {
494 	const struct fixed31_32 one = dc_fixpt_from_int(1);
495 
496 	if (dc_fixpt_lt(one, args->arg))
497 		return one;
498 
499 	if (dc_fixpt_le(args->arg, dc_fixpt_neg(args->a0)))
500 		return dc_fixpt_sub(
501 			args->a2,
502 			dc_fixpt_mul(
503 				dc_fixpt_add(
504 					one,
505 					args->a3),
506 				dc_fixpt_pow(
507 					dc_fixpt_neg(args->arg),
508 					dc_fixpt_recip(args->gamma))));
509 	else if (dc_fixpt_le(args->a0, args->arg))
510 		return dc_fixpt_sub(
511 			dc_fixpt_mul(
512 				dc_fixpt_add(
513 					one,
514 					args->a3),
515 				dc_fixpt_pow(
516 						args->arg,
517 					dc_fixpt_recip(args->gamma))),
518 					args->a2);
519 	else
520 		return dc_fixpt_mul(args->arg, args->a1);
521 }
522 
calculate_gamma22(struct fixed31_32 arg,bool use_eetf,struct calculate_buffer * cal_buffer)523 static struct fixed31_32 calculate_gamma22(struct fixed31_32 arg, bool use_eetf, struct calculate_buffer *cal_buffer)
524 {
525 	struct fixed31_32 gamma = dc_fixpt_from_fraction(22, 10);
526 	struct translate_from_linear_space_args scratch_gamma_args;
527 
528 	scratch_gamma_args.arg = arg;
529 	scratch_gamma_args.a0 = dc_fixpt_zero;
530 	scratch_gamma_args.a1 = dc_fixpt_zero;
531 	scratch_gamma_args.a2 = dc_fixpt_zero;
532 	scratch_gamma_args.a3 = dc_fixpt_zero;
533 	scratch_gamma_args.cal_buffer = cal_buffer;
534 	scratch_gamma_args.gamma = gamma;
535 
536 	if (use_eetf)
537 		return translate_from_linear_space_long(&scratch_gamma_args);
538 
539 	return translate_from_linear_space(&scratch_gamma_args);
540 }
541 
542 
translate_to_linear_space(struct fixed31_32 arg,struct fixed31_32 a0,struct fixed31_32 a1,struct fixed31_32 a2,struct fixed31_32 a3,struct fixed31_32 gamma)543 static struct fixed31_32 translate_to_linear_space(
544 	struct fixed31_32 arg,
545 	struct fixed31_32 a0,
546 	struct fixed31_32 a1,
547 	struct fixed31_32 a2,
548 	struct fixed31_32 a3,
549 	struct fixed31_32 gamma)
550 {
551 	struct fixed31_32 linear;
552 
553 	a0 = dc_fixpt_mul(a0, a1);
554 	if (dc_fixpt_le(arg, dc_fixpt_neg(a0)))
555 
556 		linear = dc_fixpt_neg(
557 				 dc_fixpt_pow(
558 				 dc_fixpt_div(
559 				 dc_fixpt_sub(a2, arg),
560 				 dc_fixpt_add(
561 				 dc_fixpt_one, a3)), gamma));
562 
563 	else if (dc_fixpt_le(dc_fixpt_neg(a0), arg) &&
564 			 dc_fixpt_le(arg, a0))
565 		linear = dc_fixpt_div(arg, a1);
566 	else
567 		linear =  dc_fixpt_pow(
568 					dc_fixpt_div(
569 					dc_fixpt_add(a2, arg),
570 					dc_fixpt_add(
571 					dc_fixpt_one, a3)), gamma);
572 
573 	return linear;
574 }
575 
translate_from_linear_space_ex(struct fixed31_32 arg,struct gamma_coefficients * coeff,uint32_t color_index,struct calculate_buffer * cal_buffer)576 static struct fixed31_32 translate_from_linear_space_ex(
577 	struct fixed31_32 arg,
578 	struct gamma_coefficients *coeff,
579 	uint32_t color_index,
580 	struct calculate_buffer *cal_buffer)
581 {
582 	struct translate_from_linear_space_args scratch_gamma_args;
583 
584 	scratch_gamma_args.arg = arg;
585 	scratch_gamma_args.a0 = coeff->a0[color_index];
586 	scratch_gamma_args.a1 = coeff->a1[color_index];
587 	scratch_gamma_args.a2 = coeff->a2[color_index];
588 	scratch_gamma_args.a3 = coeff->a3[color_index];
589 	scratch_gamma_args.gamma = coeff->user_gamma[color_index];
590 	scratch_gamma_args.cal_buffer = cal_buffer;
591 
592 	return translate_from_linear_space(&scratch_gamma_args);
593 }
594 
595 
translate_to_linear_space_ex(struct fixed31_32 arg,struct gamma_coefficients * coeff,uint32_t color_index)596 static inline struct fixed31_32 translate_to_linear_space_ex(
597 	struct fixed31_32 arg,
598 	struct gamma_coefficients *coeff,
599 	uint32_t color_index)
600 {
601 	return translate_to_linear_space(
602 		arg,
603 		coeff->a0[color_index],
604 		coeff->a1[color_index],
605 		coeff->a2[color_index],
606 		coeff->a3[color_index],
607 		coeff->user_gamma[color_index]);
608 }
609 
610 
find_software_points(const struct dc_gamma * ramp,const struct gamma_pixel * axis_x,struct fixed31_32 hw_point,enum channel_name channel,uint32_t * index_to_start,uint32_t * index_left,uint32_t * index_right,enum hw_point_position * pos)611 static bool find_software_points(
612 	const struct dc_gamma *ramp,
613 	const struct gamma_pixel *axis_x,
614 	struct fixed31_32 hw_point,
615 	enum channel_name channel,
616 	uint32_t *index_to_start,
617 	uint32_t *index_left,
618 	uint32_t *index_right,
619 	enum hw_point_position *pos)
620 {
621 	const uint32_t max_number = ramp->num_entries + 3;
622 
623 	struct fixed31_32 left, right;
624 
625 	uint32_t i = *index_to_start;
626 
627 	while (i < max_number) {
628 		if (channel == CHANNEL_NAME_RED) {
629 			left = axis_x[i].r;
630 
631 			if (i < max_number - 1)
632 				right = axis_x[i + 1].r;
633 			else
634 				right = axis_x[max_number - 1].r;
635 		} else if (channel == CHANNEL_NAME_GREEN) {
636 			left = axis_x[i].g;
637 
638 			if (i < max_number - 1)
639 				right = axis_x[i + 1].g;
640 			else
641 				right = axis_x[max_number - 1].g;
642 		} else {
643 			left = axis_x[i].b;
644 
645 			if (i < max_number - 1)
646 				right = axis_x[i + 1].b;
647 			else
648 				right = axis_x[max_number - 1].b;
649 		}
650 
651 		if (dc_fixpt_le(left, hw_point) &&
652 			dc_fixpt_le(hw_point, right)) {
653 			*index_to_start = i;
654 			*index_left = i;
655 
656 			if (i < max_number - 1)
657 				*index_right = i + 1;
658 			else
659 				*index_right = max_number - 1;
660 
661 			*pos = HW_POINT_POSITION_MIDDLE;
662 
663 			return true;
664 		} else if ((i == *index_to_start) &&
665 			dc_fixpt_le(hw_point, left)) {
666 			*index_to_start = i;
667 			*index_left = i;
668 			*index_right = i;
669 
670 			*pos = HW_POINT_POSITION_LEFT;
671 
672 			return true;
673 		} else if ((i == max_number - 1) &&
674 			dc_fixpt_le(right, hw_point)) {
675 			*index_to_start = i;
676 			*index_left = i;
677 			*index_right = i;
678 
679 			*pos = HW_POINT_POSITION_RIGHT;
680 
681 			return true;
682 		}
683 
684 		++i;
685 	}
686 
687 	return false;
688 }
689 
build_custom_gamma_mapping_coefficients_worker(const struct dc_gamma * ramp,struct pixel_gamma_point * coeff,const struct hw_x_point * coordinates_x,const struct gamma_pixel * axis_x,enum channel_name channel,uint32_t number_of_points)690 static bool build_custom_gamma_mapping_coefficients_worker(
691 	const struct dc_gamma *ramp,
692 	struct pixel_gamma_point *coeff,
693 	const struct hw_x_point *coordinates_x,
694 	const struct gamma_pixel *axis_x,
695 	enum channel_name channel,
696 	uint32_t number_of_points)
697 {
698 	uint32_t i = 0;
699 
700 	while (i <= number_of_points) {
701 		struct fixed31_32 coord_x;
702 
703 		uint32_t index_to_start = 0;
704 		uint32_t index_left = 0;
705 		uint32_t index_right = 0;
706 
707 		enum hw_point_position hw_pos;
708 
709 		struct gamma_point *point;
710 
711 		struct fixed31_32 left_pos;
712 		struct fixed31_32 right_pos;
713 
714 		if (channel == CHANNEL_NAME_RED)
715 			coord_x = coordinates_x[i].regamma_y_red;
716 		else if (channel == CHANNEL_NAME_GREEN)
717 			coord_x = coordinates_x[i].regamma_y_green;
718 		else
719 			coord_x = coordinates_x[i].regamma_y_blue;
720 
721 		if (!find_software_points(
722 			ramp, axis_x, coord_x, channel,
723 			&index_to_start, &index_left, &index_right, &hw_pos)) {
724 			BREAK_TO_DEBUGGER();
725 			return false;
726 		}
727 
728 		if (index_left >= ramp->num_entries + 3) {
729 			BREAK_TO_DEBUGGER();
730 			return false;
731 		}
732 
733 		if (index_right >= ramp->num_entries + 3) {
734 			BREAK_TO_DEBUGGER();
735 			return false;
736 		}
737 
738 		if (channel == CHANNEL_NAME_RED) {
739 			point = &coeff[i].r;
740 
741 			left_pos = axis_x[index_left].r;
742 			right_pos = axis_x[index_right].r;
743 		} else if (channel == CHANNEL_NAME_GREEN) {
744 			point = &coeff[i].g;
745 
746 			left_pos = axis_x[index_left].g;
747 			right_pos = axis_x[index_right].g;
748 		} else {
749 			point = &coeff[i].b;
750 
751 			left_pos = axis_x[index_left].b;
752 			right_pos = axis_x[index_right].b;
753 		}
754 
755 		if (hw_pos == HW_POINT_POSITION_MIDDLE)
756 			point->coeff = dc_fixpt_div(
757 				dc_fixpt_sub(
758 					coord_x,
759 					left_pos),
760 				dc_fixpt_sub(
761 					right_pos,
762 					left_pos));
763 		else if (hw_pos == HW_POINT_POSITION_LEFT)
764 			point->coeff = dc_fixpt_zero;
765 		else if (hw_pos == HW_POINT_POSITION_RIGHT)
766 			point->coeff = dc_fixpt_from_int(2);
767 		else {
768 			BREAK_TO_DEBUGGER();
769 			return false;
770 		}
771 
772 		point->left_index = index_left;
773 		point->right_index = index_right;
774 		point->pos = hw_pos;
775 
776 		++i;
777 	}
778 
779 	return true;
780 }
781 
calculate_mapped_value(struct pwl_float_data * rgb,const struct pixel_gamma_point * coeff,enum channel_name channel,uint32_t max_index)782 static struct fixed31_32 calculate_mapped_value(
783 	struct pwl_float_data *rgb,
784 	const struct pixel_gamma_point *coeff,
785 	enum channel_name channel,
786 	uint32_t max_index)
787 {
788 	const struct gamma_point *point;
789 
790 	struct fixed31_32 result;
791 
792 	if (channel == CHANNEL_NAME_RED)
793 		point = &coeff->r;
794 	else if (channel == CHANNEL_NAME_GREEN)
795 		point = &coeff->g;
796 	else
797 		point = &coeff->b;
798 
799 	if ((point->left_index < 0) || (point->left_index > max_index)) {
800 		BREAK_TO_DEBUGGER();
801 		return dc_fixpt_zero;
802 	}
803 
804 	if ((point->right_index < 0) || (point->right_index > max_index)) {
805 		BREAK_TO_DEBUGGER();
806 		return dc_fixpt_zero;
807 	}
808 
809 	if (point->pos == HW_POINT_POSITION_MIDDLE)
810 		if (channel == CHANNEL_NAME_RED)
811 			result = dc_fixpt_add(
812 				dc_fixpt_mul(
813 					point->coeff,
814 					dc_fixpt_sub(
815 						rgb[point->right_index].r,
816 						rgb[point->left_index].r)),
817 				rgb[point->left_index].r);
818 		else if (channel == CHANNEL_NAME_GREEN)
819 			result = dc_fixpt_add(
820 				dc_fixpt_mul(
821 					point->coeff,
822 					dc_fixpt_sub(
823 						rgb[point->right_index].g,
824 						rgb[point->left_index].g)),
825 				rgb[point->left_index].g);
826 		else
827 			result = dc_fixpt_add(
828 				dc_fixpt_mul(
829 					point->coeff,
830 					dc_fixpt_sub(
831 						rgb[point->right_index].b,
832 						rgb[point->left_index].b)),
833 				rgb[point->left_index].b);
834 	else if (point->pos == HW_POINT_POSITION_LEFT) {
835 		BREAK_TO_DEBUGGER();
836 		result = dc_fixpt_zero;
837 	} else {
838 		result = dc_fixpt_one;
839 	}
840 
841 	return result;
842 }
843 
build_pq(struct pwl_float_data_ex * rgb_regamma,uint32_t hw_points_num,const struct hw_x_point * coordinate_x,uint32_t sdr_white_level)844 static void build_pq(struct pwl_float_data_ex *rgb_regamma,
845 		uint32_t hw_points_num,
846 		const struct hw_x_point *coordinate_x,
847 		uint32_t sdr_white_level)
848 {
849 	uint32_t i, start_index;
850 
851 	struct pwl_float_data_ex *rgb = rgb_regamma;
852 	const struct hw_x_point *coord_x = coordinate_x;
853 	struct fixed31_32 x;
854 	struct fixed31_32 output;
855 	struct fixed31_32 scaling_factor =
856 			dc_fixpt_from_fraction(sdr_white_level, 10000);
857 	struct fixed31_32 *pq_table = mod_color_get_table(type_pq_table);
858 
859 	if (!mod_color_is_table_init(type_pq_table) && sdr_white_level == 80) {
860 		precompute_pq();
861 		mod_color_set_table_init_state(type_pq_table, true);
862 	}
863 
864 	/* TODO: start index is from segment 2^-24, skipping first segment
865 	 * due to x values too small for power calculations
866 	 */
867 	start_index = 32;
868 	rgb += start_index;
869 	coord_x += start_index;
870 
871 	for (i = start_index; i <= hw_points_num; i++) {
872 		/* Multiply 0.008 as regamma is 0-1 and FP16 input is 0-125.
873 		 * FP 1.0 = 80nits
874 		 */
875 		if (sdr_white_level == 80) {
876 			output = pq_table[i];
877 		} else {
878 			x = dc_fixpt_mul(coord_x->x, scaling_factor);
879 			compute_pq(x, &output);
880 		}
881 
882 		/* should really not happen? */
883 		if (dc_fixpt_lt(output, dc_fixpt_zero))
884 			output = dc_fixpt_zero;
885 
886 		rgb->r = output;
887 		rgb->g = output;
888 		rgb->b = output;
889 
890 		++coord_x;
891 		++rgb;
892 	}
893 }
894 
build_de_pq(struct pwl_float_data_ex * de_pq,uint32_t hw_points_num,const struct hw_x_point * coordinate_x)895 static void build_de_pq(struct pwl_float_data_ex *de_pq,
896 		uint32_t hw_points_num,
897 		const struct hw_x_point *coordinate_x)
898 {
899 	uint32_t i;
900 	struct fixed31_32 output;
901 	struct fixed31_32 *de_pq_table = mod_color_get_table(type_de_pq_table);
902 	struct fixed31_32 scaling_factor = dc_fixpt_from_int(125);
903 
904 	if (!mod_color_is_table_init(type_de_pq_table)) {
905 		precompute_de_pq();
906 		mod_color_set_table_init_state(type_de_pq_table, true);
907 	}
908 
909 
910 	for (i = 0; i <= hw_points_num; i++) {
911 		output = de_pq_table[i];
912 		/* should really not happen? */
913 		if (dc_fixpt_lt(output, dc_fixpt_zero))
914 			output = dc_fixpt_zero;
915 		else if (dc_fixpt_lt(scaling_factor, output))
916 			output = scaling_factor;
917 		de_pq[i].r = output;
918 		de_pq[i].g = output;
919 		de_pq[i].b = output;
920 	}
921 }
922 
build_regamma(struct pwl_float_data_ex * rgb_regamma,uint32_t hw_points_num,const struct hw_x_point * coordinate_x,enum dc_transfer_func_predefined type,struct calculate_buffer * cal_buffer)923 static bool build_regamma(struct pwl_float_data_ex *rgb_regamma,
924 		uint32_t hw_points_num,
925 		const struct hw_x_point *coordinate_x,
926 		enum dc_transfer_func_predefined type,
927 		struct calculate_buffer *cal_buffer)
928 {
929 	uint32_t i;
930 	bool ret = false;
931 
932 	struct gamma_coefficients *coeff;
933 	struct pwl_float_data_ex *rgb = rgb_regamma;
934 	const struct hw_x_point *coord_x = coordinate_x;
935 
936 	coeff = kvzalloc(sizeof(*coeff), GFP_KERNEL);
937 	if (!coeff)
938 		goto release;
939 
940 	if (!build_coefficients(coeff, type))
941 		goto release;
942 
943 	memset(cal_buffer->buffer, 0, NUM_PTS_IN_REGION * sizeof(struct fixed31_32));
944 	cal_buffer->buffer_index = 0; // see variable definition for more info
945 
946 	i = 0;
947 	while (i <= hw_points_num) {
948 		/* TODO use y vs r,g,b */
949 		rgb->r = translate_from_linear_space_ex(
950 			coord_x->x, coeff, 0, cal_buffer);
951 		rgb->g = rgb->r;
952 		rgb->b = rgb->r;
953 		++coord_x;
954 		++rgb;
955 		++i;
956 	}
957 	cal_buffer->buffer_index = -1;
958 	ret = true;
959 release:
960 	kvfree(coeff);
961 	return ret;
962 }
963 
hermite_spline_eetf(struct fixed31_32 input_x,struct fixed31_32 max_display,struct fixed31_32 min_display,struct fixed31_32 max_content,struct fixed31_32 * out_x)964 static void hermite_spline_eetf(struct fixed31_32 input_x,
965 				struct fixed31_32 max_display,
966 				struct fixed31_32 min_display,
967 				struct fixed31_32 max_content,
968 				struct fixed31_32 *out_x)
969 {
970 	struct fixed31_32 min_lum_pq;
971 	struct fixed31_32 max_lum_pq;
972 	struct fixed31_32 max_content_pq;
973 	struct fixed31_32 ks;
974 	struct fixed31_32 E1;
975 	struct fixed31_32 E2;
976 	struct fixed31_32 E3;
977 	struct fixed31_32 t;
978 	struct fixed31_32 t2;
979 	struct fixed31_32 t3;
980 	struct fixed31_32 two;
981 	struct fixed31_32 three;
982 	struct fixed31_32 temp1;
983 	struct fixed31_32 temp2;
984 	struct fixed31_32 a = dc_fixpt_from_fraction(15, 10);
985 	struct fixed31_32 b = dc_fixpt_from_fraction(5, 10);
986 	struct fixed31_32 epsilon = dc_fixpt_from_fraction(1, 1000000); // dc_fixpt_epsilon is a bit too small
987 
988 	if (dc_fixpt_eq(max_content, dc_fixpt_zero)) {
989 		*out_x = dc_fixpt_zero;
990 		return;
991 	}
992 
993 	compute_pq(input_x, &E1);
994 	compute_pq(dc_fixpt_div(min_display, max_content), &min_lum_pq);
995 	compute_pq(dc_fixpt_div(max_display, max_content), &max_lum_pq);
996 	compute_pq(dc_fixpt_one, &max_content_pq); // always 1? DAL2 code is weird
997 	a = dc_fixpt_div(dc_fixpt_add(dc_fixpt_one, b), max_content_pq); // (1+b)/maxContent
998 	ks = dc_fixpt_sub(dc_fixpt_mul(a, max_lum_pq), b); // a * max_lum_pq - b
999 
1000 	if (dc_fixpt_lt(E1, ks))
1001 		E2 = E1;
1002 	else if (dc_fixpt_le(ks, E1) && dc_fixpt_le(E1, dc_fixpt_one)) {
1003 		if (dc_fixpt_lt(epsilon, dc_fixpt_sub(dc_fixpt_one, ks)))
1004 			// t = (E1 - ks) / (1 - ks)
1005 			t = dc_fixpt_div(dc_fixpt_sub(E1, ks),
1006 					dc_fixpt_sub(dc_fixpt_one, ks));
1007 		else
1008 			t = dc_fixpt_zero;
1009 
1010 		two = dc_fixpt_from_int(2);
1011 		three = dc_fixpt_from_int(3);
1012 
1013 		t2 = dc_fixpt_mul(t, t);
1014 		t3 = dc_fixpt_mul(t2, t);
1015 		temp1 = dc_fixpt_mul(two, t3);
1016 		temp2 = dc_fixpt_mul(three, t2);
1017 
1018 		// (2t^3 - 3t^2 + 1) * ks
1019 		E2 = dc_fixpt_mul(ks, dc_fixpt_add(dc_fixpt_one,
1020 				dc_fixpt_sub(temp1, temp2)));
1021 
1022 		// (-2t^3 + 3t^2) * max_lum_pq
1023 		E2 = dc_fixpt_add(E2, dc_fixpt_mul(max_lum_pq,
1024 				dc_fixpt_sub(temp2, temp1)));
1025 
1026 		temp1 = dc_fixpt_mul(two, t2);
1027 		temp2 = dc_fixpt_sub(dc_fixpt_one, ks);
1028 
1029 		// (t^3 - 2t^2 + t) * (1-ks)
1030 		E2 = dc_fixpt_add(E2, dc_fixpt_mul(temp2,
1031 				dc_fixpt_add(t, dc_fixpt_sub(t3, temp1))));
1032 	} else
1033 		E2 = dc_fixpt_one;
1034 
1035 	temp1 = dc_fixpt_sub(dc_fixpt_one, E2);
1036 	temp2 = dc_fixpt_mul(temp1, temp1);
1037 	temp2 = dc_fixpt_mul(temp2, temp2);
1038 	// temp2 = (1-E2)^4
1039 
1040 	E3 =  dc_fixpt_add(E2, dc_fixpt_mul(min_lum_pq, temp2));
1041 	compute_de_pq(E3, out_x);
1042 
1043 	*out_x = dc_fixpt_div(*out_x, dc_fixpt_div(max_display, max_content));
1044 }
1045 
build_freesync_hdr(struct pwl_float_data_ex * rgb_regamma,uint32_t hw_points_num,const struct hw_x_point * coordinate_x,const struct hdr_tm_params * fs_params,struct calculate_buffer * cal_buffer)1046 static bool build_freesync_hdr(struct pwl_float_data_ex *rgb_regamma,
1047 		uint32_t hw_points_num,
1048 		const struct hw_x_point *coordinate_x,
1049 		const struct hdr_tm_params *fs_params,
1050 		struct calculate_buffer *cal_buffer)
1051 {
1052 	uint32_t i;
1053 	struct pwl_float_data_ex *rgb = rgb_regamma;
1054 	const struct hw_x_point *coord_x = coordinate_x;
1055 	const struct hw_x_point *prv_coord_x = coord_x;
1056 	struct fixed31_32 scaledX = dc_fixpt_zero;
1057 	struct fixed31_32 scaledX1 = dc_fixpt_zero;
1058 	struct fixed31_32 max_display;
1059 	struct fixed31_32 min_display;
1060 	struct fixed31_32 max_content;
1061 	struct fixed31_32 clip = dc_fixpt_one;
1062 	struct fixed31_32 output = dc_fixpt_zero;
1063 	bool use_eetf = false;
1064 	bool is_clipped = false;
1065 	struct fixed31_32 sdr_white_level;
1066 	struct fixed31_32 coordX_diff;
1067 	struct fixed31_32 out_dist_max;
1068 	struct fixed31_32 bright_norm;
1069 
1070 	if (fs_params->max_content == 0 ||
1071 			fs_params->max_display == 0)
1072 		return false;
1073 
1074 	max_display = dc_fixpt_from_int(fs_params->max_display);
1075 	min_display = dc_fixpt_from_fraction(fs_params->min_display, 10000);
1076 	max_content = dc_fixpt_from_int(fs_params->max_content);
1077 	sdr_white_level = dc_fixpt_from_int(fs_params->sdr_white_level);
1078 
1079 	if (fs_params->min_display > 1000) // cap at 0.1 at the bottom
1080 		min_display = dc_fixpt_from_fraction(1, 10);
1081 	if (fs_params->max_display < 100) // cap at 100 at the top
1082 		max_display = dc_fixpt_from_int(100);
1083 
1084 	// only max used, we don't adjust min luminance
1085 	if (fs_params->max_content > fs_params->max_display)
1086 		use_eetf = true;
1087 	else
1088 		max_content = max_display;
1089 
1090 	if (!use_eetf)
1091 		cal_buffer->buffer_index = 0; // see var definition for more info
1092 	rgb += 32; // first 32 points have problems with fixed point, too small
1093 	coord_x += 32;
1094 
1095 	for (i = 32; i <= hw_points_num; i++) {
1096 		if (!is_clipped) {
1097 			if (use_eetf) {
1098 				/* max content is equal 1 */
1099 				scaledX1 = dc_fixpt_div(coord_x->x,
1100 						dc_fixpt_div(max_content, sdr_white_level));
1101 				hermite_spline_eetf(scaledX1, max_display, min_display,
1102 						max_content, &scaledX);
1103 			} else
1104 				scaledX = dc_fixpt_div(coord_x->x,
1105 						dc_fixpt_div(max_display, sdr_white_level));
1106 
1107 			if (dc_fixpt_lt(scaledX, clip)) {
1108 				if (dc_fixpt_lt(scaledX, dc_fixpt_zero))
1109 					output = dc_fixpt_zero;
1110 				else
1111 					output = calculate_gamma22(scaledX, use_eetf, cal_buffer);
1112 
1113 				// Ensure output respects reasonable boundaries
1114 				output = dc_fixpt_clamp(output, dc_fixpt_zero, dc_fixpt_one);
1115 
1116 				rgb->r = output;
1117 				rgb->g = output;
1118 				rgb->b = output;
1119 			} else {
1120 				/* Here clipping happens for the first time */
1121 				is_clipped = true;
1122 
1123 				/* The next few lines implement the equation
1124 				 * output = prev_out +
1125 				 * (coord_x->x - prev_coord_x->x) *
1126 				 * (1.0 - prev_out) /
1127 				 * (maxDisp/sdr_white_level - prevCoordX)
1128 				 *
1129 				 * This equation interpolates the first point
1130 				 * after max_display/80 so that the slope from
1131 				 * hw_x_before_max and hw_x_after_max is such
1132 				 * that we hit Y=1.0 at max_display/80.
1133 				 */
1134 
1135 				coordX_diff = dc_fixpt_sub(coord_x->x, prv_coord_x->x);
1136 				out_dist_max = dc_fixpt_sub(dc_fixpt_one, output);
1137 				bright_norm = dc_fixpt_div(max_display, sdr_white_level);
1138 
1139 				output = dc_fixpt_add(
1140 					output, dc_fixpt_mul(
1141 						coordX_diff, dc_fixpt_div(
1142 							out_dist_max,
1143 							dc_fixpt_sub(bright_norm, prv_coord_x->x)
1144 						)
1145 					)
1146 				);
1147 
1148 				/* Relaxing the maximum boundary to 1.07 (instead of 1.0)
1149 				 * because the last point in the curve must be such that
1150 				 * the maximum display pixel brightness interpolates to
1151 				 * exactly 1.0. The worst case scenario was calculated
1152 				 * around 1.057, so the limit of 1.07 leaves some safety
1153 				 * margin.
1154 				 */
1155 				output = dc_fixpt_clamp(output, dc_fixpt_zero,
1156 					dc_fixpt_from_fraction(107, 100));
1157 
1158 				rgb->r = output;
1159 				rgb->g = output;
1160 				rgb->b = output;
1161 			}
1162 		} else {
1163 			/* Every other clipping after the first
1164 			 * one is dealt with here
1165 			 */
1166 			rgb->r = clip;
1167 			rgb->g = clip;
1168 			rgb->b = clip;
1169 		}
1170 
1171 		prv_coord_x = coord_x;
1172 		++coord_x;
1173 		++rgb;
1174 	}
1175 	cal_buffer->buffer_index = -1;
1176 
1177 	return true;
1178 }
1179 
build_degamma(struct pwl_float_data_ex * curve,uint32_t hw_points_num,const struct hw_x_point * coordinate_x,enum dc_transfer_func_predefined type)1180 static bool build_degamma(struct pwl_float_data_ex *curve,
1181 		uint32_t hw_points_num,
1182 		const struct hw_x_point *coordinate_x, enum dc_transfer_func_predefined type)
1183 {
1184 	uint32_t i;
1185 	struct gamma_coefficients coeff;
1186 	uint32_t begin_index, end_index;
1187 	bool ret = false;
1188 
1189 	if (!build_coefficients(&coeff, type))
1190 		goto release;
1191 
1192 	i = 0;
1193 
1194 	/* X points is 2^-25 to 2^7
1195 	 * De-gamma X is 2^-12 to 2^0 – we are skipping first -12-(-25) = 13 regions
1196 	 */
1197 	begin_index = 13 * NUM_PTS_IN_REGION;
1198 	end_index = begin_index + 12 * NUM_PTS_IN_REGION;
1199 
1200 	while (i != begin_index) {
1201 		curve[i].r = dc_fixpt_zero;
1202 		curve[i].g = dc_fixpt_zero;
1203 		curve[i].b = dc_fixpt_zero;
1204 		i++;
1205 	}
1206 
1207 	while (i != end_index) {
1208 		curve[i].r = translate_to_linear_space_ex(
1209 				coordinate_x[i].x, &coeff, 0);
1210 		curve[i].g = curve[i].r;
1211 		curve[i].b = curve[i].r;
1212 		i++;
1213 	}
1214 	while (i != hw_points_num + 1) {
1215 		curve[i].r = dc_fixpt_one;
1216 		curve[i].g = dc_fixpt_one;
1217 		curve[i].b = dc_fixpt_one;
1218 		i++;
1219 	}
1220 	ret = true;
1221 release:
1222 	return ret;
1223 }
1224 
1225 
1226 
1227 
1228 
build_hlg_degamma(struct pwl_float_data_ex * degamma,uint32_t hw_points_num,const struct hw_x_point * coordinate_x,uint32_t sdr_white_level,uint32_t max_luminance_nits)1229 static void build_hlg_degamma(struct pwl_float_data_ex *degamma,
1230 		uint32_t hw_points_num,
1231 		const struct hw_x_point *coordinate_x,
1232 		uint32_t sdr_white_level, uint32_t max_luminance_nits)
1233 {
1234 	uint32_t i;
1235 
1236 	struct pwl_float_data_ex *rgb = degamma;
1237 	const struct hw_x_point *coord_x = coordinate_x;
1238 
1239 	i = 0;
1240 	// check when i == 434
1241 	while (i != hw_points_num + 1) {
1242 		compute_hlg_eotf(coord_x->x, &rgb->r, sdr_white_level, max_luminance_nits);
1243 		rgb->g = rgb->r;
1244 		rgb->b = rgb->r;
1245 		++coord_x;
1246 		++rgb;
1247 		++i;
1248 	}
1249 }
1250 
1251 
build_hlg_regamma(struct pwl_float_data_ex * regamma,uint32_t hw_points_num,const struct hw_x_point * coordinate_x,uint32_t sdr_white_level,uint32_t max_luminance_nits)1252 static void build_hlg_regamma(struct pwl_float_data_ex *regamma,
1253 		uint32_t hw_points_num,
1254 		const struct hw_x_point *coordinate_x,
1255 		uint32_t sdr_white_level, uint32_t max_luminance_nits)
1256 {
1257 	uint32_t i;
1258 
1259 	struct pwl_float_data_ex *rgb = regamma;
1260 	const struct hw_x_point *coord_x = coordinate_x;
1261 
1262 	i = 0;
1263 
1264 	// when i == 471
1265 	while (i != hw_points_num + 1) {
1266 		compute_hlg_oetf(coord_x->x, &rgb->r, sdr_white_level, max_luminance_nits);
1267 		rgb->g = rgb->r;
1268 		rgb->b = rgb->r;
1269 		++coord_x;
1270 		++rgb;
1271 		++i;
1272 	}
1273 }
1274 
scale_gamma(struct pwl_float_data * pwl_rgb,const struct dc_gamma * ramp,struct dividers dividers)1275 static void scale_gamma(struct pwl_float_data *pwl_rgb,
1276 		const struct dc_gamma *ramp,
1277 		struct dividers dividers)
1278 {
1279 	const struct fixed31_32 max_driver = dc_fixpt_from_int(0xFFFF);
1280 	const struct fixed31_32 max_os = dc_fixpt_from_int(0xFF00);
1281 	struct fixed31_32 scaler = max_os;
1282 	uint32_t i;
1283 	struct pwl_float_data *rgb = pwl_rgb;
1284 	struct pwl_float_data *rgb_last = rgb + ramp->num_entries - 1;
1285 
1286 	i = 0;
1287 
1288 	do {
1289 		if (dc_fixpt_lt(max_os, ramp->entries.red[i]) ||
1290 			dc_fixpt_lt(max_os, ramp->entries.green[i]) ||
1291 			dc_fixpt_lt(max_os, ramp->entries.blue[i])) {
1292 			scaler = max_driver;
1293 			break;
1294 		}
1295 		++i;
1296 	} while (i != ramp->num_entries);
1297 
1298 	i = 0;
1299 
1300 	do {
1301 		rgb->r = dc_fixpt_div(
1302 			ramp->entries.red[i], scaler);
1303 		rgb->g = dc_fixpt_div(
1304 			ramp->entries.green[i], scaler);
1305 		rgb->b = dc_fixpt_div(
1306 			ramp->entries.blue[i], scaler);
1307 
1308 		++rgb;
1309 		++i;
1310 	} while (i != ramp->num_entries);
1311 
1312 	rgb->r = dc_fixpt_mul(rgb_last->r,
1313 			dividers.divider1);
1314 	rgb->g = dc_fixpt_mul(rgb_last->g,
1315 			dividers.divider1);
1316 	rgb->b = dc_fixpt_mul(rgb_last->b,
1317 			dividers.divider1);
1318 
1319 	++rgb;
1320 
1321 	rgb->r = dc_fixpt_mul(rgb_last->r,
1322 			dividers.divider2);
1323 	rgb->g = dc_fixpt_mul(rgb_last->g,
1324 			dividers.divider2);
1325 	rgb->b = dc_fixpt_mul(rgb_last->b,
1326 			dividers.divider2);
1327 
1328 	++rgb;
1329 
1330 	rgb->r = dc_fixpt_mul(rgb_last->r,
1331 			dividers.divider3);
1332 	rgb->g = dc_fixpt_mul(rgb_last->g,
1333 			dividers.divider3);
1334 	rgb->b = dc_fixpt_mul(rgb_last->b,
1335 			dividers.divider3);
1336 }
1337 
scale_gamma_dx(struct pwl_float_data * pwl_rgb,const struct dc_gamma * ramp,struct dividers dividers)1338 static void scale_gamma_dx(struct pwl_float_data *pwl_rgb,
1339 		const struct dc_gamma *ramp,
1340 		struct dividers dividers)
1341 {
1342 	uint32_t i;
1343 	struct fixed31_32 min = dc_fixpt_zero;
1344 	struct fixed31_32 max = dc_fixpt_one;
1345 
1346 	struct fixed31_32 delta = dc_fixpt_zero;
1347 	struct fixed31_32 offset = dc_fixpt_zero;
1348 
1349 	for (i = 0 ; i < ramp->num_entries; i++) {
1350 		if (dc_fixpt_lt(ramp->entries.red[i], min))
1351 			min = ramp->entries.red[i];
1352 
1353 		if (dc_fixpt_lt(ramp->entries.green[i], min))
1354 			min = ramp->entries.green[i];
1355 
1356 		if (dc_fixpt_lt(ramp->entries.blue[i], min))
1357 			min = ramp->entries.blue[i];
1358 
1359 		if (dc_fixpt_lt(max, ramp->entries.red[i]))
1360 			max = ramp->entries.red[i];
1361 
1362 		if (dc_fixpt_lt(max, ramp->entries.green[i]))
1363 			max = ramp->entries.green[i];
1364 
1365 		if (dc_fixpt_lt(max, ramp->entries.blue[i]))
1366 			max = ramp->entries.blue[i];
1367 	}
1368 
1369 	if (dc_fixpt_lt(min, dc_fixpt_zero))
1370 		delta = dc_fixpt_neg(min);
1371 
1372 	offset = dc_fixpt_add(min, max);
1373 
1374 	for (i = 0 ; i < ramp->num_entries; i++) {
1375 		pwl_rgb[i].r = dc_fixpt_div(
1376 			dc_fixpt_add(
1377 				ramp->entries.red[i], delta), offset);
1378 		pwl_rgb[i].g = dc_fixpt_div(
1379 			dc_fixpt_add(
1380 				ramp->entries.green[i], delta), offset);
1381 		pwl_rgb[i].b = dc_fixpt_div(
1382 			dc_fixpt_add(
1383 				ramp->entries.blue[i], delta), offset);
1384 
1385 	}
1386 
1387 	pwl_rgb[i].r =  dc_fixpt_sub(dc_fixpt_mul_int(
1388 				pwl_rgb[i-1].r, 2), pwl_rgb[i-2].r);
1389 	pwl_rgb[i].g =  dc_fixpt_sub(dc_fixpt_mul_int(
1390 				pwl_rgb[i-1].g, 2), pwl_rgb[i-2].g);
1391 	pwl_rgb[i].b =  dc_fixpt_sub(dc_fixpt_mul_int(
1392 				pwl_rgb[i-1].b, 2), pwl_rgb[i-2].b);
1393 	++i;
1394 	pwl_rgb[i].r =  dc_fixpt_sub(dc_fixpt_mul_int(
1395 				pwl_rgb[i-1].r, 2), pwl_rgb[i-2].r);
1396 	pwl_rgb[i].g =  dc_fixpt_sub(dc_fixpt_mul_int(
1397 				pwl_rgb[i-1].g, 2), pwl_rgb[i-2].g);
1398 	pwl_rgb[i].b =  dc_fixpt_sub(dc_fixpt_mul_int(
1399 				pwl_rgb[i-1].b, 2), pwl_rgb[i-2].b);
1400 }
1401 
1402 /*
1403  * RS3+ color transform DDI - 1D LUT adjustment is composed with regamma here
1404  * Input is evenly distributed in the output color space as specified in
1405  * SetTimings
1406  *
1407  * Interpolation details:
1408  * 1D LUT has 4096 values which give curve correction in 0-1 float range
1409  * for evenly spaced points in 0-1 range. lut1D[index] gives correction
1410  * for index/4095.
1411  * First we find index for which:
1412  *	index/4095 < regamma_y < (index+1)/4095 =>
1413  *	index < 4095*regamma_y < index + 1
1414  * norm_y = 4095*regamma_y, and index is just truncating to nearest integer
1415  * lut1 = lut1D[index], lut2 = lut1D[index+1]
1416  *
1417  * adjustedY is then linearly interpolating regamma Y between lut1 and lut2
1418  *
1419  * Custom degamma on Linux uses the same interpolation math, so is handled here
1420  */
apply_lut_1d(const struct dc_gamma * ramp,uint32_t num_hw_points,struct dc_transfer_func_distributed_points * tf_pts)1421 static void apply_lut_1d(
1422 		const struct dc_gamma *ramp,
1423 		uint32_t num_hw_points,
1424 		struct dc_transfer_func_distributed_points *tf_pts)
1425 {
1426 	int i = 0;
1427 	int color = 0;
1428 	struct fixed31_32 *regamma_y;
1429 	struct fixed31_32 norm_y;
1430 	struct fixed31_32 lut1;
1431 	struct fixed31_32 lut2;
1432 	const int max_lut_index = 4095;
1433 	const struct fixed31_32 penult_lut_index_f =
1434 			dc_fixpt_from_int(max_lut_index-1);
1435 	const struct fixed31_32 max_lut_index_f =
1436 			dc_fixpt_from_int(max_lut_index);
1437 	int32_t index = 0, index_next = 0;
1438 	struct fixed31_32 index_f;
1439 	struct fixed31_32 delta_lut;
1440 	struct fixed31_32 delta_index;
1441 
1442 	if (ramp->type != GAMMA_CS_TFM_1D && ramp->type != GAMMA_CUSTOM)
1443 		return; // this is not expected
1444 
1445 	for (i = 0; i < num_hw_points; i++) {
1446 		for (color = 0; color < 3; color++) {
1447 			if (color == 0)
1448 				regamma_y = &tf_pts->red[i];
1449 			else if (color == 1)
1450 				regamma_y = &tf_pts->green[i];
1451 			else
1452 				regamma_y = &tf_pts->blue[i];
1453 
1454 			norm_y = dc_fixpt_mul(max_lut_index_f,
1455 						   *regamma_y);
1456 			index = dc_fixpt_floor(norm_y);
1457 			index_f = dc_fixpt_from_int(index);
1458 
1459 			if (index < 0)
1460 				continue;
1461 
1462 			if (index <= max_lut_index)
1463 				index_next = (index == max_lut_index) ? index : index+1;
1464 			else {
1465 				/* Here we are dealing with the last point in the curve,
1466 				 * which in some cases might exceed the range given by
1467 				 * max_lut_index. So we interpolate the value using
1468 				 * max_lut_index and max_lut_index - 1.
1469 				 */
1470 				index = max_lut_index - 1;
1471 				index_next = max_lut_index;
1472 				index_f = penult_lut_index_f;
1473 			}
1474 
1475 			if (color == 0) {
1476 				lut1 = ramp->entries.red[index];
1477 				lut2 = ramp->entries.red[index_next];
1478 			} else if (color == 1) {
1479 				lut1 = ramp->entries.green[index];
1480 				lut2 = ramp->entries.green[index_next];
1481 			} else {
1482 				lut1 = ramp->entries.blue[index];
1483 				lut2 = ramp->entries.blue[index_next];
1484 			}
1485 
1486 			// we have everything now, so interpolate
1487 			delta_lut = dc_fixpt_sub(lut2, lut1);
1488 			delta_index = dc_fixpt_sub(norm_y, index_f);
1489 
1490 			*regamma_y = dc_fixpt_add(lut1,
1491 				dc_fixpt_mul(delta_index, delta_lut));
1492 		}
1493 	}
1494 }
1495 
build_evenly_distributed_points(struct gamma_pixel * points,uint32_t numberof_points,struct dividers dividers)1496 static void build_evenly_distributed_points(
1497 	struct gamma_pixel *points,
1498 	uint32_t numberof_points,
1499 	struct dividers dividers)
1500 {
1501 	struct gamma_pixel *p = points;
1502 	struct gamma_pixel *p_last;
1503 
1504 	uint32_t i = 0;
1505 
1506 	// This function should not gets called with 0 as a parameter
1507 	ASSERT(numberof_points > 0);
1508 	p_last = p + numberof_points - 1;
1509 
1510 	do {
1511 		struct fixed31_32 value = dc_fixpt_from_fraction(i,
1512 			numberof_points - 1);
1513 
1514 		p->r = value;
1515 		p->g = value;
1516 		p->b = value;
1517 
1518 		++p;
1519 		++i;
1520 	} while (i < numberof_points);
1521 
1522 	p->r = dc_fixpt_div(p_last->r, dividers.divider1);
1523 	p->g = dc_fixpt_div(p_last->g, dividers.divider1);
1524 	p->b = dc_fixpt_div(p_last->b, dividers.divider1);
1525 
1526 	++p;
1527 
1528 	p->r = dc_fixpt_div(p_last->r, dividers.divider2);
1529 	p->g = dc_fixpt_div(p_last->g, dividers.divider2);
1530 	p->b = dc_fixpt_div(p_last->b, dividers.divider2);
1531 
1532 	++p;
1533 
1534 	p->r = dc_fixpt_div(p_last->r, dividers.divider3);
1535 	p->g = dc_fixpt_div(p_last->g, dividers.divider3);
1536 	p->b = dc_fixpt_div(p_last->b, dividers.divider3);
1537 }
1538 
copy_rgb_regamma_to_coordinates_x(struct hw_x_point * coordinates_x,uint32_t hw_points_num,const struct pwl_float_data_ex * rgb_ex)1539 static inline void copy_rgb_regamma_to_coordinates_x(
1540 		struct hw_x_point *coordinates_x,
1541 		uint32_t hw_points_num,
1542 		const struct pwl_float_data_ex *rgb_ex)
1543 {
1544 	struct hw_x_point *coords = coordinates_x;
1545 	uint32_t i = 0;
1546 	const struct pwl_float_data_ex *rgb_regamma = rgb_ex;
1547 
1548 	while (i <= hw_points_num + 1) {
1549 		coords->regamma_y_red = rgb_regamma->r;
1550 		coords->regamma_y_green = rgb_regamma->g;
1551 		coords->regamma_y_blue = rgb_regamma->b;
1552 
1553 		++coords;
1554 		++rgb_regamma;
1555 		++i;
1556 	}
1557 }
1558 
calculate_interpolated_hardware_curve(const struct dc_gamma * ramp,struct pixel_gamma_point * coeff128,struct pwl_float_data * rgb_user,const struct hw_x_point * coordinates_x,const struct gamma_pixel * axis_x,uint32_t number_of_points,struct dc_transfer_func_distributed_points * tf_pts)1559 static bool calculate_interpolated_hardware_curve(
1560 	const struct dc_gamma *ramp,
1561 	struct pixel_gamma_point *coeff128,
1562 	struct pwl_float_data *rgb_user,
1563 	const struct hw_x_point *coordinates_x,
1564 	const struct gamma_pixel *axis_x,
1565 	uint32_t number_of_points,
1566 	struct dc_transfer_func_distributed_points *tf_pts)
1567 {
1568 
1569 	const struct pixel_gamma_point *coeff = coeff128;
1570 	uint32_t max_entries = 3 - 1;
1571 
1572 	uint32_t i = 0;
1573 
1574 	for (i = 0; i < 3; i++) {
1575 		if (!build_custom_gamma_mapping_coefficients_worker(
1576 				ramp, coeff128, coordinates_x, axis_x, i,
1577 				number_of_points))
1578 			return false;
1579 	}
1580 
1581 	i = 0;
1582 	max_entries += ramp->num_entries;
1583 
1584 	/* TODO: float point case */
1585 
1586 	while (i <= number_of_points) {
1587 		tf_pts->red[i] = calculate_mapped_value(
1588 			rgb_user, coeff, CHANNEL_NAME_RED, max_entries);
1589 		tf_pts->green[i] = calculate_mapped_value(
1590 			rgb_user, coeff, CHANNEL_NAME_GREEN, max_entries);
1591 		tf_pts->blue[i] = calculate_mapped_value(
1592 			rgb_user, coeff, CHANNEL_NAME_BLUE, max_entries);
1593 
1594 		++coeff;
1595 		++i;
1596 	}
1597 
1598 	return true;
1599 }
1600 
build_new_custom_resulted_curve(uint32_t hw_points_num,struct dc_transfer_func_distributed_points * tf_pts)1601 static void build_new_custom_resulted_curve(
1602 	uint32_t hw_points_num,
1603 	struct dc_transfer_func_distributed_points *tf_pts)
1604 {
1605 	uint32_t i = 0;
1606 
1607 	while (i != hw_points_num + 1) {
1608 		tf_pts->red[i] = dc_fixpt_clamp(
1609 			tf_pts->red[i], dc_fixpt_zero,
1610 			dc_fixpt_one);
1611 		tf_pts->green[i] = dc_fixpt_clamp(
1612 			tf_pts->green[i], dc_fixpt_zero,
1613 			dc_fixpt_one);
1614 		tf_pts->blue[i] = dc_fixpt_clamp(
1615 			tf_pts->blue[i], dc_fixpt_zero,
1616 			dc_fixpt_one);
1617 
1618 		++i;
1619 	}
1620 }
1621 
map_regamma_hw_to_x_user(const struct dc_gamma * ramp,struct pixel_gamma_point * coeff128,struct pwl_float_data * rgb_user,struct hw_x_point * coords_x,const struct gamma_pixel * axis_x,const struct pwl_float_data_ex * rgb_regamma,uint32_t hw_points_num,struct dc_transfer_func_distributed_points * tf_pts,bool map_user_ramp,bool do_clamping)1622 static bool map_regamma_hw_to_x_user(
1623 	const struct dc_gamma *ramp,
1624 	struct pixel_gamma_point *coeff128,
1625 	struct pwl_float_data *rgb_user,
1626 	struct hw_x_point *coords_x,
1627 	const struct gamma_pixel *axis_x,
1628 	const struct pwl_float_data_ex *rgb_regamma,
1629 	uint32_t hw_points_num,
1630 	struct dc_transfer_func_distributed_points *tf_pts,
1631 	bool map_user_ramp,
1632 	bool do_clamping)
1633 {
1634 	/* setup to spare calculated ideal regamma values */
1635 
1636 	int i = 0;
1637 	struct hw_x_point *coords = coords_x;
1638 	const struct pwl_float_data_ex *regamma = rgb_regamma;
1639 
1640 	if (ramp && map_user_ramp) {
1641 		copy_rgb_regamma_to_coordinates_x(coords,
1642 				hw_points_num,
1643 				rgb_regamma);
1644 
1645 		calculate_interpolated_hardware_curve(
1646 			ramp, coeff128, rgb_user, coords, axis_x,
1647 			hw_points_num, tf_pts);
1648 	} else {
1649 		/* just copy current rgb_regamma into  tf_pts */
1650 		while (i <= hw_points_num) {
1651 			tf_pts->red[i] = regamma->r;
1652 			tf_pts->green[i] = regamma->g;
1653 			tf_pts->blue[i] = regamma->b;
1654 
1655 			++regamma;
1656 			++i;
1657 		}
1658 	}
1659 
1660 	if (do_clamping) {
1661 		/* this should be named differently, all it does is clamp to 0-1 */
1662 		build_new_custom_resulted_curve(hw_points_num, tf_pts);
1663 	}
1664 
1665 	return true;
1666 }
1667 
1668 #define _EXTRA_POINTS 3
1669 
mod_color_calculate_degamma_params(struct dc_color_caps * dc_caps,struct dc_transfer_func * input_tf,const struct dc_gamma * ramp,bool map_user_ramp)1670 bool mod_color_calculate_degamma_params(struct dc_color_caps *dc_caps,
1671 		struct dc_transfer_func *input_tf,
1672 		const struct dc_gamma *ramp, bool map_user_ramp)
1673 {
1674 	struct dc_transfer_func_distributed_points *tf_pts = &input_tf->tf_pts;
1675 	struct dividers dividers;
1676 	struct pwl_float_data *rgb_user = NULL;
1677 	struct pwl_float_data_ex *curve = NULL;
1678 	struct gamma_pixel *axis_x = NULL;
1679 	struct pixel_gamma_point *coeff = NULL;
1680 	enum dc_transfer_func_predefined tf;
1681 	uint32_t i;
1682 	bool ret = false;
1683 
1684 	if (input_tf->type == TF_TYPE_BYPASS)
1685 		return false;
1686 
1687 	/* we can use hardcoded curve for plain SRGB TF
1688 	 * If linear, it's bypass if no user ramp
1689 	 */
1690 	if (input_tf->type == TF_TYPE_PREDEFINED) {
1691 		if ((input_tf->tf == TRANSFER_FUNCTION_SRGB ||
1692 				input_tf->tf == TRANSFER_FUNCTION_LINEAR) &&
1693 				!map_user_ramp)
1694 			return true;
1695 
1696 		if (dc_caps != NULL &&
1697 			dc_caps->dpp.dcn_arch == 1) {
1698 
1699 			if (input_tf->tf == TRANSFER_FUNCTION_PQ &&
1700 					dc_caps->dpp.dgam_rom_caps.pq == 1)
1701 				return true;
1702 
1703 			if (input_tf->tf == TRANSFER_FUNCTION_GAMMA22 &&
1704 					dc_caps->dpp.dgam_rom_caps.gamma2_2 == 1)
1705 				return true;
1706 
1707 			// HLG OOTF not accounted for
1708 			if (input_tf->tf == TRANSFER_FUNCTION_HLG &&
1709 					dc_caps->dpp.dgam_rom_caps.hlg == 1)
1710 				return true;
1711 		}
1712 	}
1713 
1714 	input_tf->type = TF_TYPE_DISTRIBUTED_POINTS;
1715 
1716 	if (map_user_ramp && ramp && ramp->type == GAMMA_RGB_256) {
1717 		rgb_user = kvcalloc(ramp->num_entries + _EXTRA_POINTS,
1718 				sizeof(*rgb_user),
1719 				GFP_KERNEL);
1720 		if (!rgb_user)
1721 			goto rgb_user_alloc_fail;
1722 
1723 		axis_x = kvcalloc(ramp->num_entries + _EXTRA_POINTS, sizeof(*axis_x),
1724 				GFP_KERNEL);
1725 		if (!axis_x)
1726 			goto axis_x_alloc_fail;
1727 
1728 		dividers.divider1 = dc_fixpt_from_fraction(3, 2);
1729 		dividers.divider2 = dc_fixpt_from_int(2);
1730 		dividers.divider3 = dc_fixpt_from_fraction(5, 2);
1731 
1732 		build_evenly_distributed_points(
1733 				axis_x,
1734 				ramp->num_entries,
1735 				dividers);
1736 
1737 		scale_gamma(rgb_user, ramp, dividers);
1738 	}
1739 
1740 	curve = kvcalloc(MAX_HW_POINTS + _EXTRA_POINTS, sizeof(*curve),
1741 			GFP_KERNEL);
1742 	if (!curve)
1743 		goto curve_alloc_fail;
1744 
1745 	coeff = kvcalloc(MAX_HW_POINTS + _EXTRA_POINTS, sizeof(*coeff),
1746 			GFP_KERNEL);
1747 	if (!coeff)
1748 		goto coeff_alloc_fail;
1749 
1750 	tf = input_tf->tf;
1751 
1752 	if (tf == TRANSFER_FUNCTION_PQ)
1753 		build_de_pq(curve,
1754 				MAX_HW_POINTS,
1755 				coordinates_x);
1756 	else if (tf == TRANSFER_FUNCTION_SRGB ||
1757 		tf == TRANSFER_FUNCTION_BT709 ||
1758 		tf == TRANSFER_FUNCTION_GAMMA22 ||
1759 		tf == TRANSFER_FUNCTION_GAMMA24 ||
1760 		tf == TRANSFER_FUNCTION_GAMMA26)
1761 		build_degamma(curve,
1762 				MAX_HW_POINTS,
1763 				coordinates_x,
1764 				tf);
1765 	else if (tf == TRANSFER_FUNCTION_HLG)
1766 		build_hlg_degamma(curve,
1767 				MAX_HW_POINTS,
1768 				coordinates_x,
1769 				80, 1000);
1770 	else if (tf == TRANSFER_FUNCTION_LINEAR) {
1771 		// just copy coordinates_x into curve
1772 		i = 0;
1773 		while (i != MAX_HW_POINTS + 1) {
1774 			curve[i].r = coordinates_x[i].x;
1775 			curve[i].g = curve[i].r;
1776 			curve[i].b = curve[i].r;
1777 			i++;
1778 		}
1779 	} else
1780 		goto invalid_tf_fail;
1781 
1782 	tf_pts->end_exponent = 0;
1783 	tf_pts->x_point_at_y1_red = 1;
1784 	tf_pts->x_point_at_y1_green = 1;
1785 	tf_pts->x_point_at_y1_blue = 1;
1786 
1787 	if (input_tf->tf == TRANSFER_FUNCTION_PQ) {
1788 		/* just copy current rgb_regamma into  tf_pts */
1789 		struct pwl_float_data_ex *curvePt = curve;
1790 		int i = 0;
1791 
1792 		while (i <= MAX_HW_POINTS) {
1793 			tf_pts->red[i]   = curvePt->r;
1794 			tf_pts->green[i] = curvePt->g;
1795 			tf_pts->blue[i]  = curvePt->b;
1796 			++curvePt;
1797 			++i;
1798 		}
1799 	} else {
1800 		// clamps to 0-1
1801 		map_regamma_hw_to_x_user(ramp, coeff, rgb_user,
1802 				coordinates_x, axis_x, curve,
1803 				MAX_HW_POINTS, tf_pts,
1804 				map_user_ramp && ramp && ramp->type == GAMMA_RGB_256,
1805 				true);
1806 	}
1807 
1808 
1809 
1810 	if (ramp && ramp->type == GAMMA_CUSTOM)
1811 		apply_lut_1d(ramp, MAX_HW_POINTS, tf_pts);
1812 
1813 	ret = true;
1814 
1815 invalid_tf_fail:
1816 	kvfree(coeff);
1817 coeff_alloc_fail:
1818 	kvfree(curve);
1819 curve_alloc_fail:
1820 	kvfree(axis_x);
1821 axis_x_alloc_fail:
1822 	kvfree(rgb_user);
1823 rgb_user_alloc_fail:
1824 
1825 	return ret;
1826 }
1827 
calculate_curve(enum dc_transfer_func_predefined trans,struct dc_transfer_func_distributed_points * points,struct pwl_float_data_ex * rgb_regamma,const struct hdr_tm_params * fs_params,uint32_t sdr_ref_white_level,struct calculate_buffer * cal_buffer)1828 static bool calculate_curve(enum dc_transfer_func_predefined trans,
1829 				struct dc_transfer_func_distributed_points *points,
1830 				struct pwl_float_data_ex *rgb_regamma,
1831 				const struct hdr_tm_params *fs_params,
1832 				uint32_t sdr_ref_white_level,
1833 				struct calculate_buffer *cal_buffer)
1834 {
1835 	uint32_t i;
1836 	bool ret = false;
1837 
1838 	if (trans == TRANSFER_FUNCTION_UNITY ||
1839 		trans == TRANSFER_FUNCTION_LINEAR) {
1840 		points->end_exponent = 0;
1841 		points->x_point_at_y1_red = 1;
1842 		points->x_point_at_y1_green = 1;
1843 		points->x_point_at_y1_blue = 1;
1844 
1845 		for (i = 0; i <= MAX_HW_POINTS ; i++) {
1846 			rgb_regamma[i].r = coordinates_x[i].x;
1847 			rgb_regamma[i].g = coordinates_x[i].x;
1848 			rgb_regamma[i].b = coordinates_x[i].x;
1849 		}
1850 
1851 		ret = true;
1852 	} else if (trans == TRANSFER_FUNCTION_PQ) {
1853 		points->end_exponent = 7;
1854 		points->x_point_at_y1_red = 125;
1855 		points->x_point_at_y1_green = 125;
1856 		points->x_point_at_y1_blue = 125;
1857 
1858 		build_pq(rgb_regamma,
1859 				MAX_HW_POINTS,
1860 				coordinates_x,
1861 				sdr_ref_white_level);
1862 
1863 		ret = true;
1864 	} else if (trans == TRANSFER_FUNCTION_GAMMA22 &&
1865 			fs_params != NULL && fs_params->skip_tm == 0) {
1866 		build_freesync_hdr(rgb_regamma,
1867 				MAX_HW_POINTS,
1868 				coordinates_x,
1869 				fs_params,
1870 				cal_buffer);
1871 
1872 		ret = true;
1873 	} else if (trans == TRANSFER_FUNCTION_HLG) {
1874 		points->end_exponent = 4;
1875 		points->x_point_at_y1_red = 12;
1876 		points->x_point_at_y1_green = 12;
1877 		points->x_point_at_y1_blue = 12;
1878 
1879 		build_hlg_regamma(rgb_regamma,
1880 				MAX_HW_POINTS,
1881 				coordinates_x,
1882 				80, 1000);
1883 
1884 		ret = true;
1885 	} else {
1886 		// trans == TRANSFER_FUNCTION_SRGB
1887 		// trans == TRANSFER_FUNCTION_BT709
1888 		// trans == TRANSFER_FUNCTION_GAMMA22
1889 		// trans == TRANSFER_FUNCTION_GAMMA24
1890 		// trans == TRANSFER_FUNCTION_GAMMA26
1891 		points->end_exponent = 0;
1892 		points->x_point_at_y1_red = 1;
1893 		points->x_point_at_y1_green = 1;
1894 		points->x_point_at_y1_blue = 1;
1895 
1896 		build_regamma(rgb_regamma,
1897 				MAX_HW_POINTS,
1898 				coordinates_x,
1899 				trans,
1900 				cal_buffer);
1901 
1902 		ret = true;
1903 	}
1904 
1905 	return ret;
1906 }
1907 
mod_color_calculate_regamma_params(struct dc_transfer_func * output_tf,const struct dc_gamma * ramp,bool map_user_ramp,bool can_rom_be_used,const struct hdr_tm_params * fs_params,struct calculate_buffer * cal_buffer)1908 bool mod_color_calculate_regamma_params(struct dc_transfer_func *output_tf,
1909 					const struct dc_gamma *ramp,
1910 					bool map_user_ramp,
1911 					bool can_rom_be_used,
1912 					const struct hdr_tm_params *fs_params,
1913 					struct calculate_buffer *cal_buffer)
1914 {
1915 	struct dc_transfer_func_distributed_points *tf_pts = &output_tf->tf_pts;
1916 	struct dividers dividers;
1917 
1918 	struct pwl_float_data *rgb_user = NULL;
1919 	struct pwl_float_data_ex *rgb_regamma = NULL;
1920 	struct gamma_pixel *axis_x = NULL;
1921 	struct pixel_gamma_point *coeff = NULL;
1922 	enum dc_transfer_func_predefined tf;
1923 	bool do_clamping = true;
1924 	bool ret = false;
1925 
1926 	if (output_tf->type == TF_TYPE_BYPASS)
1927 		return false;
1928 
1929 	/* we can use hardcoded curve for plain SRGB TF */
1930 	if (output_tf->type == TF_TYPE_PREDEFINED && can_rom_be_used == true &&
1931 			output_tf->tf == TRANSFER_FUNCTION_SRGB) {
1932 		if (ramp == NULL)
1933 			return true;
1934 		if ((ramp->is_identity && ramp->type != GAMMA_CS_TFM_1D) ||
1935 		    (!map_user_ramp && ramp->type == GAMMA_RGB_256))
1936 			return true;
1937 	}
1938 
1939 	output_tf->type = TF_TYPE_DISTRIBUTED_POINTS;
1940 
1941 	if (ramp && ramp->type != GAMMA_CS_TFM_1D &&
1942 	    (map_user_ramp || ramp->type != GAMMA_RGB_256)) {
1943 		rgb_user = kvcalloc(ramp->num_entries + _EXTRA_POINTS,
1944 			    sizeof(*rgb_user),
1945 			    GFP_KERNEL);
1946 		if (!rgb_user)
1947 			goto rgb_user_alloc_fail;
1948 
1949 		axis_x = kvcalloc(ramp->num_entries + 3, sizeof(*axis_x),
1950 				GFP_KERNEL);
1951 		if (!axis_x)
1952 			goto axis_x_alloc_fail;
1953 
1954 		dividers.divider1 = dc_fixpt_from_fraction(3, 2);
1955 		dividers.divider2 = dc_fixpt_from_int(2);
1956 		dividers.divider3 = dc_fixpt_from_fraction(5, 2);
1957 
1958 		build_evenly_distributed_points(
1959 				axis_x,
1960 				ramp->num_entries,
1961 				dividers);
1962 
1963 		if (ramp->type == GAMMA_RGB_256 && map_user_ramp)
1964 			scale_gamma(rgb_user, ramp, dividers);
1965 		else if (ramp->type == GAMMA_RGB_FLOAT_1024)
1966 			scale_gamma_dx(rgb_user, ramp, dividers);
1967 	}
1968 
1969 	rgb_regamma = kvcalloc(MAX_HW_POINTS + _EXTRA_POINTS,
1970 			       sizeof(*rgb_regamma),
1971 			       GFP_KERNEL);
1972 	if (!rgb_regamma)
1973 		goto rgb_regamma_alloc_fail;
1974 
1975 	coeff = kvcalloc(MAX_HW_POINTS + _EXTRA_POINTS, sizeof(*coeff),
1976 			 GFP_KERNEL);
1977 	if (!coeff)
1978 		goto coeff_alloc_fail;
1979 
1980 	tf = output_tf->tf;
1981 
1982 	ret = calculate_curve(tf,
1983 			tf_pts,
1984 			rgb_regamma,
1985 			fs_params,
1986 			output_tf->sdr_ref_white_level,
1987 			cal_buffer);
1988 
1989 	if (ret) {
1990 		do_clamping = !(output_tf->tf == TRANSFER_FUNCTION_PQ) &&
1991 				!(output_tf->tf == TRANSFER_FUNCTION_GAMMA22 &&
1992 				fs_params != NULL && fs_params->skip_tm == 0);
1993 
1994 		map_regamma_hw_to_x_user(ramp, coeff, rgb_user,
1995 					 coordinates_x, axis_x, rgb_regamma,
1996 					 MAX_HW_POINTS, tf_pts,
1997 					 (map_user_ramp || (ramp && ramp->type != GAMMA_RGB_256)) &&
1998 					 (ramp && ramp->type != GAMMA_CS_TFM_1D),
1999 					 do_clamping);
2000 
2001 		if (ramp && ramp->type == GAMMA_CS_TFM_1D)
2002 			apply_lut_1d(ramp, MAX_HW_POINTS, tf_pts);
2003 	}
2004 
2005 	kvfree(coeff);
2006 coeff_alloc_fail:
2007 	kvfree(rgb_regamma);
2008 rgb_regamma_alloc_fail:
2009 	kvfree(axis_x);
2010 axis_x_alloc_fail:
2011 	kvfree(rgb_user);
2012 rgb_user_alloc_fail:
2013 	return ret;
2014 }
2015