xref: /aosp_15_r20/external/mesa3d/src/amd/vpelib/src/chip/vpe10/vpe10_cm_common.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /* Copyright 2022 Advanced Micro Devices, Inc.
2  *
3  * Permission is hereby granted, free of charge, to any person obtaining a
4  * copy of this software and associated documentation files (the "Software"),
5  * to deal in the Software without restriction, including without limitation
6  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
7  * and/or sell copies of the Software, and to permit persons to whom the
8  * Software is furnished to do so, subject to the following conditions:
9  *
10  * The above copyright notice and this permission notice shall be included in
11  * all copies or substantial portions of the Software.
12  *
13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
16  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
17  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
18  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
19  * OTHER DEALINGS IN THE SOFTWARE.
20  *
21  * Authors: AMD
22  *
23  */
24 #include <stdint.h>
25 #include <string.h>
26 #include "vpe10_cm_common.h"
27 #include "custom_float.h"
28 #include "reg_helper.h"
29 
30 #define CTX_BASE dpp
31 #define CTX      vpe10_dpp
32 
cm_helper_convert_to_custom_float(struct pwl_result_data * rgb_resulted,struct curve_points3 * corner_points,uint32_t hw_points_num,bool fixpoint)33 static bool cm_helper_convert_to_custom_float(struct pwl_result_data *rgb_resulted,
34     struct curve_points3 *corner_points, uint32_t hw_points_num, bool fixpoint)
35 {
36     struct custom_float_format fmt = {0};
37 
38     struct pwl_result_data *rgb = rgb_resulted;
39 
40     uint32_t i = 0;
41 
42     fmt.exponenta_bits = 6;
43     fmt.mantissa_bits  = 12;
44     fmt.sign           = false;
45 
46     /* corner_points[0] - beginning base, slope offset for R,G,B
47      * corner_points[1] - end base, slope offset for R,G,B
48      */
49     if (!vpe_convert_to_custom_float_format(
50             corner_points[0].red.x, &fmt, &corner_points[0].red.custom_float_x)) {
51         VPE_ASSERT(0);
52         return false;
53     }
54     if (!vpe_convert_to_custom_float_format(
55             corner_points[0].green.x, &fmt, &corner_points[0].green.custom_float_x)) {
56         VPE_ASSERT(0);
57         return false;
58     }
59     if (!vpe_convert_to_custom_float_format(
60             corner_points[0].blue.x, &fmt, &corner_points[0].blue.custom_float_x)) {
61         VPE_ASSERT(0);
62         return false;
63     }
64     if (!vpe_convert_to_custom_float_format(
65             corner_points[0].red.y, &fmt, &corner_points[0].red.custom_float_y)) {
66         VPE_ASSERT(0);
67         return false;
68     }
69     if (!vpe_convert_to_custom_float_format(
70             corner_points[0].green.y, &fmt, &corner_points[0].green.custom_float_y)) {
71         VPE_ASSERT(0);
72         return false;
73     }
74     if (!vpe_convert_to_custom_float_format(
75             corner_points[0].blue.y, &fmt, &corner_points[0].blue.custom_float_y)) {
76         VPE_ASSERT(0);
77         return false;
78     }
79     if (!vpe_convert_to_custom_float_format(
80             corner_points[0].red.slope, &fmt, &corner_points[0].red.custom_float_slope)) {
81         VPE_ASSERT(0);
82         return false;
83     }
84     if (!vpe_convert_to_custom_float_format(
85             corner_points[0].green.slope, &fmt, &corner_points[0].green.custom_float_slope)) {
86         VPE_ASSERT(0);
87         return false;
88     }
89     if (!vpe_convert_to_custom_float_format(
90             corner_points[0].blue.slope, &fmt, &corner_points[0].blue.custom_float_slope)) {
91         VPE_ASSERT(0);
92         return false;
93     }
94 
95     if (fixpoint == true) {
96         corner_points[1].red.custom_float_y   = vpe_fixpt_clamp_u0d14(corner_points[1].red.y);
97         corner_points[1].green.custom_float_y = vpe_fixpt_clamp_u0d14(corner_points[1].green.y);
98         corner_points[1].blue.custom_float_y  = vpe_fixpt_clamp_u0d14(corner_points[1].blue.y);
99     } else {
100         if (!vpe_convert_to_custom_float_format(
101                 corner_points[1].red.y, &fmt, &corner_points[1].red.custom_float_y)) {
102             VPE_ASSERT(0);
103             return false;
104         }
105         if (!vpe_convert_to_custom_float_format(
106                 corner_points[1].green.y, &fmt, &corner_points[1].green.custom_float_y)) {
107             VPE_ASSERT(0);
108             return false;
109         }
110         if (!vpe_convert_to_custom_float_format(
111                 corner_points[1].blue.y, &fmt, &corner_points[1].blue.custom_float_y)) {
112             VPE_ASSERT(0);
113             return false;
114         }
115     }
116 
117     fmt.mantissa_bits = 10;
118     fmt.sign          = false;
119 
120     if (!vpe_convert_to_custom_float_format(
121             corner_points[1].red.x, &fmt, &corner_points[1].red.custom_float_x)) {
122         VPE_ASSERT(0);
123         return false;
124     }
125     if (!vpe_convert_to_custom_float_format(
126             corner_points[1].green.x, &fmt, &corner_points[1].green.custom_float_x)) {
127         VPE_ASSERT(0);
128         return false;
129     }
130     if (!vpe_convert_to_custom_float_format(
131             corner_points[1].blue.x, &fmt, &corner_points[1].blue.custom_float_x)) {
132         VPE_ASSERT(0);
133         return false;
134     }
135 
136     if (!vpe_convert_to_custom_float_format(
137             corner_points[1].red.slope, &fmt, &corner_points[1].red.custom_float_slope)) {
138         VPE_ASSERT(0);
139         return false;
140     }
141     if (!vpe_convert_to_custom_float_format(
142             corner_points[1].green.slope, &fmt, &corner_points[1].green.custom_float_slope)) {
143         VPE_ASSERT(0);
144         return false;
145     }
146     if (!vpe_convert_to_custom_float_format(
147             corner_points[1].blue.slope, &fmt, &corner_points[1].blue.custom_float_slope)) {
148         VPE_ASSERT(0);
149         return false;
150     }
151 
152     if (hw_points_num == 0 || rgb_resulted == NULL || fixpoint == true)
153         return true;
154 
155     fmt.mantissa_bits = 12;
156 
157     while (i != hw_points_num) {
158         if (!vpe_convert_to_custom_float_format(rgb->red, &fmt, &rgb->red_reg)) {
159             VPE_ASSERT(0);
160             return false;
161         }
162 
163         if (!vpe_convert_to_custom_float_format(rgb->green, &fmt, &rgb->green_reg)) {
164             VPE_ASSERT(0);
165             return false;
166         }
167 
168         if (!vpe_convert_to_custom_float_format(rgb->blue, &fmt, &rgb->blue_reg)) {
169             VPE_ASSERT(0);
170             return false;
171         }
172 
173         if (!vpe_convert_to_custom_float_format(rgb->delta_red, &fmt, &rgb->delta_red_reg)) {
174             VPE_ASSERT(0);
175             return false;
176         }
177 
178         if (!vpe_convert_to_custom_float_format(rgb->delta_green, &fmt, &rgb->delta_green_reg)) {
179             VPE_ASSERT(0);
180             return false;
181         }
182 
183         if (!vpe_convert_to_custom_float_format(rgb->delta_blue, &fmt, &rgb->delta_blue_reg)) {
184             VPE_ASSERT(0);
185             return false;
186         }
187 
188         ++rgb;
189         ++i;
190     }
191 
192     fmt.exponenta_bits = 6;
193     fmt.mantissa_bits = 12;
194     fmt.sign = 1;
195 
196     if (!vpe_convert_to_custom_float_format(
197         corner_points[0].red.offset, &fmt, &corner_points[0].red.custom_float_offset)) {
198         VPE_ASSERT(0);
199         return false;
200     }
201 
202     if (!vpe_convert_to_custom_float_format(
203         corner_points[0].green.offset, &fmt, &corner_points[0].green.custom_float_offset)) {
204         VPE_ASSERT(0);
205         return false;
206     }
207 
208     if (!vpe_convert_to_custom_float_format(
209         corner_points[0].blue.offset, &fmt, &corner_points[0].blue.custom_float_offset)) {
210         VPE_ASSERT(0);
211         return false;
212     }
213     return true;
214 }
215 
216 /* driver uses 32 regions or less, but DCN HW has 34, extra 2 are set to 0 */
217 #define MAX_REGIONS_NUMBER 34
218 #define MAX_LOW_POINT      25
219 #define NUMBER_REGIONS     32
220 #define NUMBER_SW_SEGMENTS 16
221 
vpe10_cm_helper_translate_curve_to_hw_format(const struct transfer_func * output_tf,struct pwl_params * lut_params,bool fixpoint)222 bool vpe10_cm_helper_translate_curve_to_hw_format(
223     const struct transfer_func *output_tf, struct pwl_params *lut_params, bool fixpoint)
224 {
225     struct curve_points3   *corner_points;
226     struct pwl_result_data *rgb_resulted;
227     struct pwl_result_data *rgb;
228     struct pwl_result_data *rgb_plus_1;
229     struct pwl_result_data *rgb_minus_1;
230 
231     int32_t  region_start, region_end;
232     int32_t  i;
233     uint32_t j, k, seg_distr[MAX_REGIONS_NUMBER], increment, start_index, hw_points;
234 
235     if (output_tf == NULL || lut_params == NULL || output_tf->type == TF_TYPE_BYPASS)
236         return false;
237 
238     corner_points = lut_params->corner_points;
239     rgb_resulted  = lut_params->rgb_resulted;
240     hw_points     = 0;
241 
242     memset(lut_params, 0, sizeof(struct pwl_params));
243     memset(seg_distr, 0, sizeof(seg_distr));
244 
245     if (output_tf->tf == TRANSFER_FUNC_PQ2084) {
246 
247         for (i = 0; i < MAX_LOW_POINT; i++)
248             seg_distr[i] = 3;
249 
250         // Extra magic point to account for incorrect programming of the lut
251         seg_distr[i] = 1;
252         region_start = -MAX_LOW_POINT;
253         region_end   = 1;
254     } else if (output_tf->tf == TRANSFER_FUNC_LINEAR) {
255 
256         int num_regions_linear = MAX_LOW_POINT + 3;
257 
258         for (i = 0; i < num_regions_linear; i++)
259             seg_distr[i] = 3;
260 
261         region_start = -MAX_LOW_POINT;
262         region_end   = 3;
263     } else {
264         seg_distr[0]  = 3;
265         seg_distr[1]  = 4;
266         seg_distr[2]  = 4;
267         seg_distr[3]  = 4;
268         seg_distr[4]  = 4;
269         seg_distr[5]  = 4;
270         seg_distr[6]  = 4;
271         seg_distr[7]  = 4;
272         seg_distr[8]  = 4;
273         seg_distr[9]  = 4;
274         seg_distr[10] = 4;
275         seg_distr[11] = 4;
276         seg_distr[12] = 1;
277 
278         region_start = -12;
279         region_end   = 1;
280     }
281 
282     for (i = region_end - region_start; i < MAX_REGIONS_NUMBER; i++)
283         seg_distr[i] = (uint32_t)-1;
284 
285     for (k = 0; k < MAX_REGIONS_NUMBER; k++) {
286         if (seg_distr[k] != (uint32_t)-1)
287             hw_points += (1 << seg_distr[k]);
288     }
289 
290     j = 0;
291     for (k = 0; k < (uint32_t)(region_end - region_start); k++) {
292         increment   = NUMBER_SW_SEGMENTS / (1 << seg_distr[k]);
293         start_index = ((uint32_t)region_start + k + MAX_LOW_POINT) * NUMBER_SW_SEGMENTS;
294         for (i = (int32_t)start_index; i < (int32_t)start_index + NUMBER_SW_SEGMENTS;
295              i += increment) {
296             if (j == hw_points - 1)
297                 break;
298             rgb_resulted[j].red   = output_tf->tf_pts.red[i];
299             rgb_resulted[j].green = output_tf->tf_pts.green[i];
300             rgb_resulted[j].blue  = output_tf->tf_pts.blue[i];
301             j++;
302         }
303     }
304 
305     /* last point */
306     start_index                     = (uint32_t)((region_end + MAX_LOW_POINT) * NUMBER_SW_SEGMENTS);
307     rgb_resulted[hw_points - 1].red = output_tf->tf_pts.red[start_index];
308     rgb_resulted[hw_points - 1].green = output_tf->tf_pts.green[start_index];
309     rgb_resulted[hw_points - 1].blue  = output_tf->tf_pts.blue[start_index];
310 
311     rgb_resulted[hw_points].red   = rgb_resulted[hw_points - 1].red;
312     rgb_resulted[hw_points].green = rgb_resulted[hw_points - 1].green;
313     rgb_resulted[hw_points].blue  = rgb_resulted[hw_points - 1].blue;
314 
315     // All 3 color channels have same x
316     corner_points[0].red.x = vpe_fixpt_pow(vpe_fixpt_from_int(2), vpe_fixpt_from_int(region_start));
317     corner_points[0].green.x = corner_points[0].red.x;
318     corner_points[0].blue.x  = corner_points[0].red.x;
319 
320     corner_points[1].red.x   = vpe_fixpt_pow(vpe_fixpt_from_int(2), vpe_fixpt_from_int(region_end));
321     corner_points[1].green.x = corner_points[1].red.x;
322     corner_points[1].blue.x  = corner_points[1].red.x;
323 
324     corner_points[0].red.y   = rgb_resulted[0].red;
325     corner_points[0].green.y = rgb_resulted[0].green;
326     corner_points[0].blue.y  = rgb_resulted[0].blue;
327 
328     corner_points[0].red.slope = vpe_fixpt_div(corner_points[0].red.y, corner_points[0].red.x);
329     corner_points[0].green.slope =
330         vpe_fixpt_div(corner_points[0].green.y, corner_points[0].green.x);
331     corner_points[0].blue.slope = vpe_fixpt_div(corner_points[0].blue.y, corner_points[0].blue.x);
332 
333     /* see comment above, m_arrPoints[1].y should be the Y value for the
334      * region end (m_numOfHwPoints), not last HW point(m_numOfHwPoints - 1)
335      */
336     corner_points[1].red.y       = rgb_resulted[hw_points - 1].red;
337     corner_points[1].green.y     = rgb_resulted[hw_points - 1].green;
338     corner_points[1].blue.y      = rgb_resulted[hw_points - 1].blue;
339     corner_points[1].red.slope   = vpe_fixpt_zero;
340     corner_points[1].green.slope = vpe_fixpt_zero;
341     corner_points[1].blue.slope  = vpe_fixpt_zero;
342 
343     lut_params->hw_points_num = hw_points;
344 
345     k = 0;
346     for (i = 1; i < MAX_REGIONS_NUMBER; i++) {
347         if (seg_distr[k] != (uint32_t)-1) {
348             lut_params->arr_curve_points[k].segments_num = seg_distr[k];
349             lut_params->arr_curve_points[i].offset =
350                 lut_params->arr_curve_points[k].offset + (1 << seg_distr[k]);
351         }
352         k++;
353     }
354 
355     if (seg_distr[k] != (uint32_t)-1)
356         lut_params->arr_curve_points[k].segments_num = seg_distr[k];
357 
358     rgb         = rgb_resulted;
359     rgb_plus_1  = rgb_resulted + 1;
360     rgb_minus_1 = rgb;
361 
362     i = 1;
363     while (i != (int32_t)(hw_points + 1)) {
364         if (i >= (int32_t)(hw_points - 1)) {
365             if (vpe_fixpt_lt(rgb_plus_1->red, rgb->red))
366                 rgb_plus_1->red = vpe_fixpt_add(rgb->red, rgb_minus_1->delta_red);
367             if (vpe_fixpt_lt(rgb_plus_1->green, rgb->green))
368                 rgb_plus_1->green = vpe_fixpt_add(rgb->green, rgb_minus_1->delta_green);
369             if (vpe_fixpt_lt(rgb_plus_1->blue, rgb->blue))
370                 rgb_plus_1->blue = vpe_fixpt_add(rgb->blue, rgb_minus_1->delta_blue);
371         }
372 
373         rgb->delta_red   = vpe_fixpt_sub(rgb_plus_1->red, rgb->red);
374         rgb->delta_green = vpe_fixpt_sub(rgb_plus_1->green, rgb->green);
375         rgb->delta_blue  = vpe_fixpt_sub(rgb_plus_1->blue, rgb->blue);
376 
377         if (fixpoint == true) {
378             rgb->delta_red_reg   = vpe_fixpt_clamp_u0d10(rgb->delta_red);
379             rgb->delta_green_reg = vpe_fixpt_clamp_u0d10(rgb->delta_green);
380             rgb->delta_blue_reg  = vpe_fixpt_clamp_u0d10(rgb->delta_blue);
381             rgb->red_reg         = vpe_fixpt_clamp_u0d14(rgb->red);
382             rgb->green_reg       = vpe_fixpt_clamp_u0d14(rgb->green);
383             rgb->blue_reg        = vpe_fixpt_clamp_u0d14(rgb->blue);
384         }
385 
386         ++rgb_plus_1;
387         rgb_minus_1 = rgb;
388         ++rgb;
389         ++i;
390     }
391 
392     corner_points[0].red.y        = vpe_fixpt_zero;
393     corner_points[0].green.y      = vpe_fixpt_zero;
394     corner_points[0].blue.y       = vpe_fixpt_zero;
395     corner_points[0].red.offset   = output_tf->start_base;
396     corner_points[0].green.offset = output_tf->start_base;
397     corner_points[0].blue.offset  = output_tf->start_base;
398 
399     cm_helper_convert_to_custom_float(rgb_resulted, lut_params->corner_points, hw_points, fixpoint);
400 
401     return true;
402 }
403 
404 #define NUM_DEGAMMA_REGIONS        9
405 #define MAX_REGIONS_NUMBER_DEGAMMA 16
406 #define MAX_HW_POINTS_DEGAMMA      257
407 
vpe10_cm_helper_translate_curve_to_degamma_hw_format(const struct transfer_func * output_tf,struct pwl_params * lut_params)408 bool vpe10_cm_helper_translate_curve_to_degamma_hw_format(
409     const struct transfer_func *output_tf, struct pwl_params *lut_params)
410 {
411     struct curve_points3   *corner_points;
412     struct pwl_result_data *rgb_resulted;
413     struct pwl_result_data *rgb;
414     struct pwl_result_data *rgb_plus_1;
415 
416     int32_t  region_start, region_end;
417     int32_t  i;
418     uint32_t k, seg_distr[MAX_REGIONS_NUMBER_DEGAMMA], num_segments, hw_points;
419 
420     if (output_tf == NULL || lut_params == NULL || output_tf->type == TF_TYPE_BYPASS)
421         return false;
422 
423     corner_points = lut_params->corner_points;
424     rgb_resulted  = lut_params->rgb_resulted;
425     num_segments  = 0;
426 
427     memset(lut_params, 0, sizeof(struct pwl_params));
428     memset(seg_distr, 0, sizeof(seg_distr));
429 
430     region_start = -NUM_DEGAMMA_REGIONS;
431     region_end   = 0;
432 
433     for (i = 0; i < MAX_HW_POINTS_DEGAMMA; i++) {
434         rgb_resulted[i].red   = output_tf->tf_pts.red[i];
435         rgb_resulted[i].green = output_tf->tf_pts.green[i];
436         rgb_resulted[i].blue  = output_tf->tf_pts.blue[i];
437     }
438 
439     for (k = (uint32_t)(region_end - region_start); k < MAX_REGIONS_NUMBER_DEGAMMA; k++)
440         seg_distr[k] = (uint32_t)-1;
441 
442     /* 9 segments
443      * segments are from 2^-8 to 0
444      */
445     seg_distr[0] = 0; /* Since we only have one point in last region */
446     num_segments += 1;
447 
448     for (k = 1; k < NUM_DEGAMMA_REGIONS; k++) {
449         seg_distr[k] = k - 1; /* Depends upon the regions' points 2^n; seg_distr = n */
450         num_segments += (1 << seg_distr[k]);
451     }
452     hw_points = num_segments + 1;
453 
454     corner_points[0].red.x = vpe_fixpt_pow(vpe_fixpt_from_int(2), vpe_fixpt_from_int(region_start));
455     corner_points[0].green.x     = corner_points[0].red.x;
456     corner_points[0].blue.x      = corner_points[0].red.x;
457     corner_points[0].red.y       = rgb_resulted[0].red;
458     corner_points[0].green.y     = rgb_resulted[0].green;
459     corner_points[0].blue.y      = rgb_resulted[0].blue;
460     corner_points[0].red.slope   = vpe_fixpt_div(corner_points[0].red.y, corner_points[0].red.x);
461     corner_points[0].green.slope = corner_points[0].red.slope;
462     corner_points[0].blue.slope  = corner_points[0].red.slope;
463 
464     corner_points[1].red.x   = vpe_fixpt_pow(vpe_fixpt_from_int(2), vpe_fixpt_from_int(region_end));
465     corner_points[1].green.x = corner_points[1].red.x;
466     corner_points[1].blue.x  = corner_points[1].red.x;
467 
468     corner_points[1].red.y       = rgb_resulted[num_segments].red;
469     corner_points[1].green.y     = rgb_resulted[num_segments].green;
470     corner_points[1].blue.y      = rgb_resulted[num_segments].blue;
471     corner_points[1].red.slope   = vpe_fixpt_zero;
472     corner_points[1].green.slope = vpe_fixpt_zero;
473     corner_points[1].blue.slope  = vpe_fixpt_zero;
474 
475     // The number of HW points is equal to num_segments+1, however due to bug in lower layer, it
476     // must be set to num_segments
477     lut_params->hw_points_num = num_segments;
478 
479     lut_params->arr_curve_points[0].segments_num = seg_distr[0];
480     for (i = 1; i < NUM_DEGAMMA_REGIONS; i++) {
481         lut_params->arr_curve_points[i].segments_num = seg_distr[i];
482         lut_params->arr_curve_points[i].offset =
483             lut_params->arr_curve_points[i - 1].offset + (1 << seg_distr[i - 1]);
484     }
485 
486     if (seg_distr[i] != (uint32_t)-1)
487         lut_params->arr_curve_points[k].segments_num = seg_distr[k];
488 
489     rgb        = rgb_resulted;
490     rgb_plus_1 = rgb_resulted + 1;
491 
492     i = 1;
493     while (i != (int32_t)(hw_points)) {
494         if (vpe_fixpt_lt(rgb_plus_1->red, rgb->red))
495             rgb_plus_1->red = rgb->red;
496         if (vpe_fixpt_lt(rgb_plus_1->green, rgb->green))
497             rgb_plus_1->green = rgb->green;
498         if (vpe_fixpt_lt(rgb_plus_1->blue, rgb->blue))
499             rgb_plus_1->blue = rgb->blue;
500 
501         rgb->delta_red   = vpe_fixpt_sub(rgb_plus_1->red, rgb->red);
502         rgb->delta_green = vpe_fixpt_sub(rgb_plus_1->green, rgb->green);
503         rgb->delta_blue  = vpe_fixpt_sub(rgb_plus_1->blue, rgb->blue);
504 
505         ++rgb_plus_1;
506         ++rgb;
507         ++i;
508     }
509 
510     corner_points[0].red.y        = vpe_fixpt_zero;
511     corner_points[0].green.y      = vpe_fixpt_zero;
512     corner_points[0].blue.y       = vpe_fixpt_zero;
513     corner_points[0].red.offset   = output_tf->start_base;
514     corner_points[0].green.offset = output_tf->start_base;
515     corner_points[0].blue.offset  = output_tf->start_base;
516 
517     cm_helper_convert_to_custom_float(rgb_resulted, lut_params->corner_points, hw_points, false);
518 
519     return true;
520 }
521 
522 #define REG_FIELD_VALUE_CM(field, value)                                                           \
523     ((uint32_t)((value) << reg->shifts.field) & reg->masks.field)
524 #define REG_FIELD_MASK_CM(field) reg->masks.field
525 
526 #define REG_SET_CM(reg_offset, init_val, field, val)                                               \
527     do {                                                                                           \
528         config_writer_fill(                                                                        \
529             config_writer, VPEC_FIELD_VALUE(VPE_DIR_CFG_PKT_DATA_SIZE, 0) |                        \
530                                VPEC_FIELD_VALUE(VPE_DIR_CFG_PKT_REGISTER_OFFSET, reg_offset));     \
531         config_writer_fill(config_writer,                                                          \
532             ((init_val & ~(REG_FIELD_MASK_CM(field))) | REG_FIELD_VALUE_CM(field, val)));          \
533     } while (0)
534 
535 #define REG_SET_2_CM(reg_offset, init_val, f1, v1, f2, v2)                                         \
536     do {                                                                                           \
537         config_writer_fill(                                                                        \
538             config_writer, VPEC_FIELD_VALUE(VPE_DIR_CFG_PKT_DATA_SIZE, 0) |                        \
539                                VPEC_FIELD_VALUE(VPE_DIR_CFG_PKT_REGISTER_OFFSET, reg_offset));     \
540         config_writer_fill(                                                                        \
541             config_writer, ((init_val & ~(REG_FIELD_MASK_CM(f1)) & ~(REG_FIELD_MASK_CM(f2))) |     \
542                                REG_FIELD_VALUE_CM(f1, v1) | REG_FIELD_VALUE_CM(f2, v2)));          \
543     } while (0)
544 
545 #define REG_SET_4_CM(reg_offset, init_val, f1, v1, f2, v2, f3, v3, f4, v4)                         \
546     do {                                                                                           \
547         config_writer_fill(                                                                        \
548             config_writer, VPEC_FIELD_VALUE(VPE_DIR_CFG_PKT_DATA_SIZE, 0) |                        \
549                                VPEC_FIELD_VALUE(VPE_DIR_CFG_PKT_REGISTER_OFFSET, reg_offset));     \
550         config_writer_fill(                                                                        \
551             config_writer, ((init_val & ~(REG_FIELD_MASK_CM(f1)) & ~(REG_FIELD_MASK_CM(f2)) &      \
552                                 ~(REG_FIELD_MASK_CM(f3)) & ~(REG_FIELD_MASK_CM(f4))) |             \
553                                REG_FIELD_VALUE_CM(f1, v1) | REG_FIELD_VALUE_CM(f2, v2) |           \
554                                REG_FIELD_VALUE_CM(f3, v3) | REG_FIELD_VALUE_CM(f4, v4)));          \
555     } while (0)
556 
vpe10_cm_helper_program_gamcor_xfer_func(struct config_writer * config_writer,const struct pwl_params * params,const struct vpe10_xfer_func_reg * reg)557 void vpe10_cm_helper_program_gamcor_xfer_func(struct config_writer *config_writer,
558     const struct pwl_params *params, const struct vpe10_xfer_func_reg *reg)
559 {
560     // Total: 13 * 4 + (region_end - region_start + 4) = 13*4 + 68 = 120 bytes
561     uint32_t     reg_region_cur;
562     unsigned int i                = 0;
563     uint16_t     packet_data_size = (uint16_t)((reg->region_end - reg->region_start + 1));
564 
565     REG_SET_2_CM(reg->start_cntl_b, 0, exp_region_start,
566         params->corner_points[0].blue.custom_float_x, exp_region_start_segment, 0);
567     REG_SET_2_CM(reg->start_cntl_g, 0, exp_region_start,
568         params->corner_points[0].green.custom_float_x, exp_region_start_segment, 0);
569     REG_SET_2_CM(reg->start_cntl_r, 0, exp_region_start,
570         params->corner_points[0].red.custom_float_x, exp_region_start_segment, 0);
571 
572     REG_SET_CM(reg->start_base_cntl_r, 0, field_region_start_base,
573         params->corner_points[0].red.custom_float_y);
574     REG_SET_CM(reg->start_base_cntl_g, 0, field_region_start_base,
575         params->corner_points[0].green.custom_float_y);
576     REG_SET_CM(reg->start_base_cntl_b, 0, field_region_start_base,
577         params->corner_points[0].blue.custom_float_y);
578 
579     REG_SET_CM(reg->offset_r, 0, field_offset,
580         params->corner_points[0].red.custom_float_offset);
581     REG_SET_CM(reg->offset_g, 0, field_offset,
582         params->corner_points[0].green.custom_float_offset);
583     REG_SET_CM(reg->offset_b, 0, field_offset,
584         params->corner_points[0].blue.custom_float_offset);
585 
586     REG_SET_CM(reg->start_slope_cntl_b, 0, // linear slope at start of curve
587         field_region_linear_slope, params->corner_points[0].blue.custom_float_slope);
588     REG_SET_CM(reg->start_slope_cntl_g, 0, field_region_linear_slope,
589         params->corner_points[0].green.custom_float_slope);
590     REG_SET_CM(reg->start_slope_cntl_r, 0, field_region_linear_slope,
591         params->corner_points[0].red.custom_float_slope);
592 
593     REG_SET_CM(reg->start_end_cntl1_b, 0, field_region_end_base,
594         params->corner_points[1].blue.custom_float_y);
595     REG_SET_CM(reg->start_end_cntl1_g, 0, field_region_end_base,
596         params->corner_points[1].green.custom_float_y);
597     REG_SET_CM(reg->start_end_cntl1_r, 0, field_region_end_base,
598         params->corner_points[1].red.custom_float_y);
599 
600     REG_SET_2_CM(reg->start_end_cntl2_b, 0, field_region_end_slope,
601         params->corner_points[1].blue.custom_float_slope, field_region_end,
602         params->corner_points[1].blue.custom_float_x);
603     REG_SET_2_CM(reg->start_end_cntl2_g, 0, field_region_end_slope,
604         params->corner_points[1].green.custom_float_slope, field_region_end,
605         params->corner_points[1].green.custom_float_x);
606     REG_SET_2_CM(reg->start_end_cntl2_r, 0, field_region_end_slope,
607         params->corner_points[1].red.custom_float_slope, field_region_end,
608         params->corner_points[1].red.custom_float_x);
609 
610     // program all the *GAM_RAM?_REGION_start ~ region_end regs in one VPEP_DIRECT_CONFIG packet
611     // with auto inc
612     config_writer_fill(
613         config_writer, VPEC_FIELD_VALUE(VPE_DIR_CFG_PKT_DATA_SIZE, packet_data_size - 1) |
614                            VPEC_FIELD_VALUE(VPE_DIR_CFG_PKT_REGISTER_OFFSET, reg->region_start) |
615                            0x01); // auto increase on
616 
617     for (reg_region_cur = reg->region_start; reg_region_cur <= reg->region_end; reg_region_cur++) {
618 
619         const struct gamma_curve *curve0 = &(params->arr_curve_points[2 * i]);
620         const struct gamma_curve *curve1 = &(params->arr_curve_points[(2 * i) + 1]);
621 
622         config_writer_fill(
623             config_writer, (((curve0->offset << reg->shifts.exp_region0_lut_offset) &
624                                 reg->masks.exp_region0_lut_offset) |
625                                ((curve0->segments_num << reg->shifts.exp_region0_num_segments) &
626                                    reg->masks.exp_region0_num_segments) |
627                                ((curve1->offset << reg->shifts.exp_region1_lut_offset) &
628                                    reg->masks.exp_region1_lut_offset) |
629                                ((curve1->segments_num << reg->shifts.exp_region1_num_segments) &
630                                    reg->masks.exp_region1_num_segments)));
631 
632         i++;
633     }
634 }
635 
vpe10_cm_helper_program_pwl(struct config_writer * config_writer,const struct pwl_result_data * rgb,uint32_t last_base_value,uint32_t num,uint32_t lut_data_reg_offset,uint8_t lut_data_reg_shift,uint32_t lut_data_reg_mask,enum cm_rgb_channel channel)636 void vpe10_cm_helper_program_pwl(struct config_writer *config_writer,
637     const struct pwl_result_data *rgb, uint32_t last_base_value, uint32_t num,
638     uint32_t lut_data_reg_offset, uint8_t lut_data_reg_shift, uint32_t lut_data_reg_mask,
639     enum cm_rgb_channel channel)
640 {
641     uint32_t i;
642     uint32_t lut_data = 0;
643 
644     // For LUT, we keep write the same address with entire LUT data, so don't set INC bit
645     config_writer_fill(
646         config_writer, VPEC_FIELD_VALUE(VPE_DIR_CFG_PKT_DATA_SIZE, num) |
647                            VPEC_FIELD_VALUE(VPE_DIR_CFG_PKT_REGISTER_OFFSET, lut_data_reg_offset));
648 
649     for (i = 0; i < num; i++) {
650         switch (channel) {
651         case CM_PWL_R:
652             lut_data = rgb[i].red_reg;
653             break;
654         case CM_PWL_G:
655             lut_data = rgb[i].green_reg;
656             break;
657         case CM_PWL_B:
658             lut_data = rgb[i].blue_reg;
659             break;
660         }
661         config_writer_fill(config_writer, ((lut_data << lut_data_reg_shift) & lut_data_reg_mask));
662     }
663 
664     config_writer_fill(
665         config_writer, ((last_base_value << lut_data_reg_shift) & lut_data_reg_mask));
666 }
667 
vpe10_cm_helper_program_color_matrices(struct config_writer * config_writer,const uint16_t * regval,const struct color_matrices_reg * reg)668 void vpe10_cm_helper_program_color_matrices(struct config_writer *config_writer,
669     const uint16_t *regval, const struct color_matrices_reg *reg)
670 {
671     uint32_t     cur_csc_reg;
672     unsigned int i                = 0;
673     uint16_t     packet_data_size = (uint16_t)((reg->csc_c33_c34 - reg->csc_c11_c12 + 1));
674 
675     config_writer_fill(
676         config_writer, VPEC_FIELD_VALUE(VPE_DIR_CFG_PKT_DATA_SIZE, packet_data_size - 1) |
677                            VPEC_FIELD_VALUE(VPE_DIR_CFG_PKT_REGISTER_OFFSET, reg->csc_c11_c12) |
678                            0x01); // auto increase on
679 
680     for (cur_csc_reg = reg->csc_c11_c12; cur_csc_reg <= reg->csc_c33_c34; cur_csc_reg++) {
681 
682         const uint16_t *regval0 = &(regval[2 * i]);
683         const uint16_t *regval1 = &(regval[(2 * i) + 1]);
684 
685         // use C11/C12 mask value for all CSC regs to ease programing
686         config_writer_fill(
687             config_writer, ((uint32_t)(*regval0 << reg->shifts.csc_c11) & reg->masks.csc_c11) |
688                                ((uint32_t)(*regval1 << reg->shifts.csc_c12) & reg->masks.csc_c12));
689 
690         // Due to the program nature of CSC regs are switchable to different sets
691         // Skip record REG_IS_WRITTEN and LAST_WRITTEN_VAL used in REG_SET* macros.
692         // and those CSC regs will always write at once for all fields
693 
694         i++;
695     }
696 }
697