1 // SPDX-License-Identifier: MIT
2 //
3 // Copyright 2024 Advanced Micro Devices, Inc.
4
5
6 #include "dml2_internal_shared_types.h"
7 #include "dml2_core_dcn4_calcs.h"
8 #include "dml2_debug.h"
9 #include "lib_float_math.h"
10 #include "dml_top_types.h"
11
12 #define DML2_MAX_FMT_420_BUFFER_WIDTH 4096
13 #define DML_MAX_NUM_OF_SLICES_PER_DSC 4
14 #define DML_MAX_COMPRESSION_RATIO 4
15 //#define DML_MODE_SUPPORT_USE_DPM_DRAM_BW
16 //#define DML_GLOBAL_PREFETCH_CHECK
17 #define ALLOW_SDPIF_RATE_LIMIT_PRE_CSTATE
18 #define DML_MAX_VSTARTUP_START 1023
19
dml2_core_internal_bw_type_str(enum dml2_core_internal_bw_type bw_type)20 const char *dml2_core_internal_bw_type_str(enum dml2_core_internal_bw_type bw_type)
21 {
22 switch (bw_type) {
23 case (dml2_core_internal_bw_sdp):
24 return("dml2_core_internal_bw_sdp");
25 case (dml2_core_internal_bw_dram):
26 return("dml2_core_internal_bw_dram");
27 case (dml2_core_internal_bw_max):
28 return("dml2_core_internal_bw_max");
29 default:
30 return("dml2_core_internal_bw_unknown");
31 }
32 }
33
dml2_core_internal_soc_state_type_str(enum dml2_core_internal_soc_state_type dml2_core_internal_soc_state_type)34 const char *dml2_core_internal_soc_state_type_str(enum dml2_core_internal_soc_state_type dml2_core_internal_soc_state_type)
35 {
36 switch (dml2_core_internal_soc_state_type) {
37 case (dml2_core_internal_soc_state_sys_idle):
38 return("dml2_core_internal_soc_state_sys_idle");
39 case (dml2_core_internal_soc_state_sys_active):
40 return("dml2_core_internal_soc_state_sys_active");
41 case (dml2_core_internal_soc_state_svp_prefetch):
42 return("dml2_core_internal_soc_state_svp_prefetch");
43 case dml2_core_internal_soc_state_max:
44 default:
45 return("dml2_core_internal_soc_state_unknown");
46 }
47 }
48
dml2_core_div_rem(double dividend,unsigned int divisor,unsigned int * remainder)49 static double dml2_core_div_rem(double dividend, unsigned int divisor, unsigned int *remainder)
50 {
51 *remainder = ((dividend / divisor) - (int)(dividend / divisor) > 0);
52 return dividend / divisor;
53 }
54
dml2_print_mode_support_info(const struct dml2_core_internal_mode_support_info * support,bool fail_only)55 static void dml2_print_mode_support_info(const struct dml2_core_internal_mode_support_info *support, bool fail_only)
56 {
57 dml2_printf("DML: ===================================== \n");
58 dml2_printf("DML: DML_MODE_SUPPORT_INFO_ST\n");
59 if (!fail_only || support->ScaleRatioAndTapsSupport == 0)
60 dml2_printf("DML: support: ScaleRatioAndTapsSupport = %d\n", support->ScaleRatioAndTapsSupport);
61 if (!fail_only || support->SourceFormatPixelAndScanSupport == 0)
62 dml2_printf("DML: support: SourceFormatPixelAndScanSupport = %d\n", support->SourceFormatPixelAndScanSupport);
63 if (!fail_only || support->ViewportSizeSupport == 0)
64 dml2_printf("DML: support: ViewportSizeSupport = %d\n", support->ViewportSizeSupport);
65 if (!fail_only || support->LinkRateDoesNotMatchDPVersion == 1)
66 dml2_printf("DML: support: LinkRateDoesNotMatchDPVersion = %d\n", support->LinkRateDoesNotMatchDPVersion);
67 if (!fail_only || support->LinkRateForMultistreamNotIndicated == 1)
68 dml2_printf("DML: support: LinkRateForMultistreamNotIndicated = %d\n", support->LinkRateForMultistreamNotIndicated);
69 if (!fail_only || support->BPPForMultistreamNotIndicated == 1)
70 dml2_printf("DML: support: BPPForMultistreamNotIndicated = %d\n", support->BPPForMultistreamNotIndicated);
71 if (!fail_only || support->MultistreamWithHDMIOreDP == 1)
72 dml2_printf("DML: support: MultistreamWithHDMIOreDP = %d\n", support->MultistreamWithHDMIOreDP);
73 if (!fail_only || support->ExceededMultistreamSlots == 1)
74 dml2_printf("DML: support: ExceededMultistreamSlots = %d\n", support->ExceededMultistreamSlots);
75 if (!fail_only || support->MSOOrODMSplitWithNonDPLink == 1)
76 dml2_printf("DML: support: MSOOrODMSplitWithNonDPLink = %d\n", support->MSOOrODMSplitWithNonDPLink);
77 if (!fail_only || support->NotEnoughLanesForMSO == 1)
78 dml2_printf("DML: support: NotEnoughLanesForMSO = %d\n", support->NotEnoughLanesForMSO);
79 if (!fail_only || support->P2IWith420 == 1)
80 dml2_printf("DML: support: P2IWith420 = %d\n", support->P2IWith420);
81 if (!fail_only || support->DSC422NativeNotSupported == 1)
82 dml2_printf("DML: support: DSC422NativeNotSupported = %d\n", support->DSC422NativeNotSupported);
83 if (!fail_only || support->DSCSlicesODMModeSupported == 0)
84 dml2_printf("DML: support: DSCSlicesODMModeSupported = %d\n", support->DSCSlicesODMModeSupported);
85 if (!fail_only || support->NotEnoughDSCUnits == 1)
86 dml2_printf("DML: support: NotEnoughDSCUnits = %d\n", support->NotEnoughDSCUnits);
87 if (!fail_only || support->NotEnoughDSCSlices == 1)
88 dml2_printf("DML: support: NotEnoughDSCSlices = %d\n", support->NotEnoughDSCSlices);
89 if (!fail_only || support->ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe == 1)
90 dml2_printf("DML: support: ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe = %d\n", support->ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe);
91 if (!fail_only || support->InvalidCombinationOfMALLUseForPStateAndStaticScreen == 1)
92 dml2_printf("DML: support: InvalidCombinationOfMALLUseForPStateAndStaticScreen = %d\n", support->InvalidCombinationOfMALLUseForPStateAndStaticScreen);
93 if (!fail_only || support->DSCCLKRequiredMoreThanSupported == 1)
94 dml2_printf("DML: support: DSCCLKRequiredMoreThanSupported = %d\n", support->DSCCLKRequiredMoreThanSupported);
95 if (!fail_only || support->PixelsPerLinePerDSCUnitSupport == 0)
96 dml2_printf("DML: support: PixelsPerLinePerDSCUnitSupport = %d\n", support->PixelsPerLinePerDSCUnitSupport);
97 if (!fail_only || support->DTBCLKRequiredMoreThanSupported == 1)
98 dml2_printf("DML: support: DTBCLKRequiredMoreThanSupported = %d\n", support->DTBCLKRequiredMoreThanSupported);
99 if (!fail_only || support->InvalidCombinationOfMALLUseForPState == 1)
100 dml2_printf("DML: support: InvalidCombinationOfMALLUseForPState = %d\n", support->InvalidCombinationOfMALLUseForPState);
101 if (!fail_only || support->ROBSupport == 0)
102 dml2_printf("DML: support: ROBSupport = %d\n", support->ROBSupport);
103 if (!fail_only || support->OutstandingRequestsSupport == 0)
104 dml2_printf("DML: support: OutstandingRequestsSupport = %d\n", support->OutstandingRequestsSupport);
105 if (!fail_only || support->OutstandingRequestsUrgencyAvoidance == 0)
106 dml2_printf("DML: support: OutstandingRequestsUrgencyAvoidance = %d\n", support->OutstandingRequestsUrgencyAvoidance);
107 if (!fail_only || support->DISPCLK_DPPCLK_Support == 0)
108 dml2_printf("DML: support: DISPCLK_DPPCLK_Support = %d\n", support->DISPCLK_DPPCLK_Support);
109 if (!fail_only || support->TotalAvailablePipesSupport == 0)
110 dml2_printf("DML: support: TotalAvailablePipesSupport = %d\n", support->TotalAvailablePipesSupport);
111 if (!fail_only || support->NumberOfOTGSupport == 0)
112 dml2_printf("DML: support: NumberOfOTGSupport = %d\n", support->NumberOfOTGSupport);
113 if (!fail_only || support->NumberOfHDMIFRLSupport == 0)
114 dml2_printf("DML: support: NumberOfHDMIFRLSupport = %d\n", support->NumberOfHDMIFRLSupport);
115 if (!fail_only || support->NumberOfDP2p0Support == 0)
116 dml2_printf("DML: support: NumberOfDP2p0Support = %d\n", support->NumberOfDP2p0Support);
117 if (!fail_only || support->EnoughWritebackUnits == 0)
118 dml2_printf("DML: support: EnoughWritebackUnits = %d\n", support->EnoughWritebackUnits);
119 if (!fail_only || support->WritebackScaleRatioAndTapsSupport == 0)
120 dml2_printf("DML: support: WritebackScaleRatioAndTapsSupport = %d\n", support->WritebackScaleRatioAndTapsSupport);
121 if (!fail_only || support->WritebackLatencySupport == 0)
122 dml2_printf("DML: support: WritebackLatencySupport = %d\n", support->WritebackLatencySupport);
123 if (!fail_only || support->CursorSupport == 0)
124 dml2_printf("DML: support: CursorSupport = %d\n", support->CursorSupport);
125 if (!fail_only || support->PitchSupport == 0)
126 dml2_printf("DML: support: PitchSupport = %d\n", support->PitchSupport);
127 if (!fail_only || support->ViewportExceedsSurface == 1)
128 dml2_printf("DML: support: ViewportExceedsSurface = %d\n", support->ViewportExceedsSurface);
129 if (!fail_only || support->PrefetchSupported == 0)
130 dml2_printf("DML: support: PrefetchSupported = %d\n", support->PrefetchSupported);
131 if (!fail_only || support->EnoughUrgentLatencyHidingSupport == 0)
132 dml2_printf("DML: support: EnoughUrgentLatencyHidingSupport = %d\n", support->EnoughUrgentLatencyHidingSupport);
133 if (!fail_only || support->AvgBandwidthSupport == 0)
134 dml2_printf("DML: support: AvgBandwidthSupport = %d\n", support->AvgBandwidthSupport);
135 if (!fail_only || support->DynamicMetadataSupported == 0)
136 dml2_printf("DML: support: DynamicMetadataSupported = %d\n", support->DynamicMetadataSupported);
137 if (!fail_only || support->VRatioInPrefetchSupported == 0)
138 dml2_printf("DML: support: VRatioInPrefetchSupported = %d\n", support->VRatioInPrefetchSupported);
139 if (!fail_only || support->PTEBufferSizeNotExceeded == 0)
140 dml2_printf("DML: support: PTEBufferSizeNotExceeded = %d\n", support->PTEBufferSizeNotExceeded);
141 if (!fail_only || support->DCCMetaBufferSizeNotExceeded == 0)
142 dml2_printf("DML: support: DCCMetaBufferSizeNotExceeded = %d\n", support->DCCMetaBufferSizeNotExceeded);
143 if (!fail_only || support->ExceededMALLSize == 1)
144 dml2_printf("DML: support: ExceededMALLSize = %d\n", support->ExceededMALLSize);
145 if (!fail_only || support->g6_temp_read_support == 0)
146 dml2_printf("DML: support: g6_temp_read_support = %d\n", support->g6_temp_read_support);
147 if (!fail_only || support->ImmediateFlipSupport == 0)
148 dml2_printf("DML: support: ImmediateFlipSupport = %d\n", support->ImmediateFlipSupport);
149 if (!fail_only || support->LinkCapacitySupport == 0)
150 dml2_printf("DML: support: LinkCapacitySupport = %d\n", support->LinkCapacitySupport);
151
152 if (!fail_only || support->ModeSupport == 0)
153 dml2_printf("DML: support: ModeSupport = %d\n", support->ModeSupport);
154 dml2_printf("DML: ===================================== \n");
155 }
156
get_stream_output_bpp(double * out_bpp,const struct dml2_display_cfg * display_cfg)157 static void get_stream_output_bpp(double *out_bpp, const struct dml2_display_cfg *display_cfg)
158 {
159 for (unsigned int k = 0; k < display_cfg->num_planes; k++) {
160 double bpc = (double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.bpc;
161 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable == dml2_dsc_disable) {
162 switch (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format) {
163 case dml2_444:
164 out_bpp[k] = bpc * 3;
165 break;
166 case dml2_s422:
167 out_bpp[k] = bpc * 2;
168 break;
169 case dml2_n422:
170 out_bpp[k] = bpc * 2;
171 break;
172 case dml2_420:
173 default:
174 out_bpp[k] = bpc * 1.5;
175 break;
176 }
177 } else if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable == dml2_dsc_enable) {
178 out_bpp[k] = (double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.dsc_compressed_bpp_x16 / 16;
179 } else {
180 out_bpp[k] = 0;
181 }
182 #ifdef __DML_VBA_DEBUG__
183 dml2_printf("DML::%s: k=%d bpc=%f\n", __func__, k, bpc);
184 dml2_printf("DML::%s: k=%d dsc.enable=%d\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable);
185 dml2_printf("DML::%s: k=%d out_bpp=%f\n", __func__, k, out_bpp[k]);
186 #endif
187 }
188 }
189
dml_round_to_multiple(unsigned int num,unsigned int multiple,bool up)190 static unsigned int dml_round_to_multiple(unsigned int num, unsigned int multiple, bool up)
191 {
192 unsigned int remainder;
193
194 if (multiple == 0)
195 return num;
196
197 remainder = num % multiple;
198 if (remainder == 0)
199 return num;
200
201 if (up)
202 return (num + multiple - remainder);
203 else
204 return (num - remainder);
205 }
206
dml_get_num_active_pipes(int unsigned num_planes,const struct core_display_cfg_support_info * cfg_support_info)207 static unsigned int dml_get_num_active_pipes(int unsigned num_planes, const struct core_display_cfg_support_info *cfg_support_info)
208 {
209 unsigned int num_active_pipes = 0;
210
211 for (unsigned int k = 0; k < num_planes; k++) {
212 num_active_pipes = num_active_pipes + (unsigned int)cfg_support_info->plane_support_info[k].dpps_used;
213 }
214
215 #ifdef __DML_VBA_DEBUG__
216 dml2_printf("DML::%s: num_active_pipes = %d\n", __func__, num_active_pipes);
217 #endif
218 return num_active_pipes;
219 }
220
dml_calc_pipe_plane_mapping(const struct core_display_cfg_support_info * cfg_support_info,unsigned int * pipe_plane)221 static void dml_calc_pipe_plane_mapping(const struct core_display_cfg_support_info *cfg_support_info, unsigned int *pipe_plane)
222 {
223 unsigned int pipe_idx = 0;
224
225 for (unsigned int k = 0; k < DML2_MAX_PLANES; ++k) {
226 pipe_plane[k] = __DML2_CALCS_PIPE_NO_PLANE__;
227 }
228
229 for (unsigned int plane_idx = 0; plane_idx < DML2_MAX_PLANES; plane_idx++) {
230 for (int i = 0; i < cfg_support_info->plane_support_info[plane_idx].dpps_used; i++) {
231 pipe_plane[pipe_idx] = plane_idx;
232 pipe_idx++;
233 }
234 }
235 }
236
dml_is_phantom_pipe(const struct dml2_plane_parameters * plane_cfg)237 static bool dml_is_phantom_pipe(const struct dml2_plane_parameters *plane_cfg)
238 {
239 bool is_phantom = false;
240
241 if (plane_cfg->overrides.legacy_svp_config == dml2_svp_mode_override_phantom_pipe ||
242 plane_cfg->overrides.legacy_svp_config == dml2_svp_mode_override_phantom_pipe_no_data_return) {
243 is_phantom = true;
244 }
245
246 return is_phantom;
247 }
248
dml_get_is_phantom_pipe(const struct dml2_display_cfg * display_cfg,const struct dml2_core_internal_display_mode_lib * mode_lib,unsigned int pipe_idx)249 static bool dml_get_is_phantom_pipe(const struct dml2_display_cfg *display_cfg, const struct dml2_core_internal_display_mode_lib *mode_lib, unsigned int pipe_idx)
250 {
251 unsigned int plane_idx = mode_lib->mp.pipe_plane[pipe_idx];
252
253 bool is_phantom = dml_is_phantom_pipe(&display_cfg->plane_descriptors[plane_idx]);
254 dml2_printf("DML::%s: pipe_idx=%d legacy_svp_config=%0d is_phantom=%d\n", __func__, pipe_idx, display_cfg->plane_descriptors[plane_idx].overrides.legacy_svp_config, is_phantom);
255 return is_phantom;
256 }
257
258 #define dml_get_per_pipe_var_func(variable, type, interval_var) static type dml_get_##variable(const struct dml2_core_internal_display_mode_lib *mode_lib, unsigned int pipe_idx) \
259 { \
260 unsigned int plane_idx; \
261 plane_idx = mode_lib->mp.pipe_plane[pipe_idx]; \
262 return (type) interval_var[plane_idx]; \
263 }
264
265 dml_get_per_pipe_var_func(dpte_group_size_in_bytes, unsigned int, mode_lib->mp.dpte_group_bytes);
266 dml_get_per_pipe_var_func(vm_group_size_in_bytes, unsigned int, mode_lib->mp.vm_group_bytes);
267 dml_get_per_pipe_var_func(swath_height_l, unsigned int, mode_lib->mp.SwathHeightY);
268 dml_get_per_pipe_var_func(swath_height_c, unsigned int, mode_lib->mp.SwathHeightC);
269 dml_get_per_pipe_var_func(dpte_row_height_linear_l, unsigned int, mode_lib->mp.dpte_row_height_linear);
270 dml_get_per_pipe_var_func(dpte_row_height_linear_c, unsigned int, mode_lib->mp.dpte_row_height_linear_chroma);
271
272 dml_get_per_pipe_var_func(vstartup_calculated, unsigned int, mode_lib->mp.VStartup);
273 dml_get_per_pipe_var_func(vupdate_offset, unsigned int, mode_lib->mp.VUpdateOffsetPix);
274 dml_get_per_pipe_var_func(vupdate_width, unsigned int, mode_lib->mp.VUpdateWidthPix);
275 dml_get_per_pipe_var_func(vready_offset, unsigned int, mode_lib->mp.VReadyOffsetPix);
276 dml_get_per_pipe_var_func(pstate_keepout_dst_lines, unsigned int, mode_lib->mp.pstate_keepout_dst_lines);
277 dml_get_per_pipe_var_func(det_stored_buffer_size_l_bytes, unsigned int, mode_lib->mp.DETBufferSizeY);
278 dml_get_per_pipe_var_func(det_stored_buffer_size_c_bytes, unsigned int, mode_lib->mp.DETBufferSizeC);
279 dml_get_per_pipe_var_func(det_buffer_size_kbytes, unsigned int, mode_lib->mp.DETBufferSizeInKByte);
280 dml_get_per_pipe_var_func(surface_size_in_mall_bytes, unsigned int, mode_lib->mp.SurfaceSizeInTheMALL);
281
282 #define dml_get_per_plane_var_func(variable, type, interval_var) static type dml_get_plane_##variable(const struct dml2_core_internal_display_mode_lib *mode_lib, unsigned int plane_idx) \
283 { \
284 return (type) interval_var[plane_idx]; \
285 }
286
287 dml_get_per_plane_var_func(num_mcaches_plane0, unsigned int, mode_lib->ms.num_mcaches_l);
288 dml_get_per_plane_var_func(mcache_row_bytes_plane0, unsigned int, mode_lib->ms.mcache_row_bytes_l);
289 dml_get_per_plane_var_func(mcache_shift_granularity_plane0, unsigned int, mode_lib->ms.mcache_shift_granularity_l);
290 dml_get_per_plane_var_func(num_mcaches_plane1, unsigned int, mode_lib->ms.num_mcaches_c);
291 dml_get_per_plane_var_func(mcache_row_bytes_plane1, unsigned int, mode_lib->ms.mcache_row_bytes_c);
292 dml_get_per_plane_var_func(mcache_shift_granularity_plane1, unsigned int, mode_lib->ms.mcache_shift_granularity_c);
293 dml_get_per_plane_var_func(mall_comb_mcache_l, unsigned int, mode_lib->ms.mall_comb_mcache_l);
294 dml_get_per_plane_var_func(mall_comb_mcache_c, unsigned int, mode_lib->ms.mall_comb_mcache_c);
295 dml_get_per_plane_var_func(lc_comb_mcache, unsigned int, mode_lib->ms.lc_comb_mcache);
296 dml_get_per_plane_var_func(subviewport_lines_needed_in_mall, unsigned int, mode_lib->ms.SubViewportLinesNeededInMALL);
297 dml_get_per_plane_var_func(max_vstartup_lines, unsigned int, mode_lib->ms.MaxVStartupLines);
298
299 #define dml_get_per_plane_array_var_func(variable, type, interval_var) static type dml_get_plane_array_##variable(const struct dml2_core_internal_display_mode_lib *mode_lib, unsigned int plane_idx, unsigned int array_idx) \
300 { \
301 return (type) interval_var[plane_idx][array_idx]; \
302 }
303
304 dml_get_per_plane_array_var_func(mcache_offsets_plane0, unsigned int, mode_lib->ms.mcache_offsets_l);
305 dml_get_per_plane_array_var_func(mcache_offsets_plane1, unsigned int, mode_lib->ms.mcache_offsets_c);
306
307 #define dml_get_var_func(var, type, internal_var) static type dml_get_##var(const struct dml2_core_internal_display_mode_lib *mode_lib) \
308 { \
309 return (type) internal_var; \
310 }
311
312 dml_get_var_func(wm_urgent, double, mode_lib->mp.Watermark.UrgentWatermark);
313 dml_get_var_func(wm_stutter_exit, double, mode_lib->mp.Watermark.StutterExitWatermark);
314 dml_get_var_func(wm_stutter_enter_exit, double, mode_lib->mp.Watermark.StutterEnterPlusExitWatermark);
315 dml_get_var_func(wm_z8_stutter_exit, double, mode_lib->mp.Watermark.Z8StutterExitWatermark);
316 dml_get_var_func(wm_z8_stutter_enter_exit, double, mode_lib->mp.Watermark.Z8StutterEnterPlusExitWatermark);
317 dml_get_var_func(wm_memory_trip, double, mode_lib->mp.UrgentLatency);
318 dml_get_var_func(meta_trip_memory_us, double, mode_lib->mp.MetaTripToMemory);
319
320 dml_get_var_func(wm_fclk_change, double, mode_lib->mp.Watermark.FCLKChangeWatermark);
321 dml_get_var_func(wm_usr_retraining, double, mode_lib->mp.Watermark.USRRetrainingWatermark);
322 dml_get_var_func(wm_temp_read_or_ppt, double, mode_lib->mp.Watermark.temp_read_or_ppt_watermark_us);
323 dml_get_var_func(wm_dram_clock_change, double, mode_lib->mp.Watermark.DRAMClockChangeWatermark);
324 dml_get_var_func(fraction_of_urgent_bandwidth, double, mode_lib->mp.FractionOfUrgentBandwidth);
325 dml_get_var_func(fraction_of_urgent_bandwidth_imm_flip, double, mode_lib->mp.FractionOfUrgentBandwidthImmediateFlip);
326 dml_get_var_func(fraction_of_urgent_bandwidth_mall, double, mode_lib->mp.FractionOfUrgentBandwidthMALL);
327 dml_get_var_func(wm_writeback_dram_clock_change, double, mode_lib->mp.Watermark.WritebackDRAMClockChangeWatermark);
328 dml_get_var_func(wm_writeback_fclk_change, double, mode_lib->mp.Watermark.WritebackFCLKChangeWatermark);
329 dml_get_var_func(stutter_efficiency, double, mode_lib->mp.StutterEfficiency);
330 dml_get_var_func(stutter_efficiency_no_vblank, double, mode_lib->mp.StutterEfficiencyNotIncludingVBlank);
331 dml_get_var_func(stutter_num_bursts, double, mode_lib->mp.NumberOfStutterBurstsPerFrame);
332 dml_get_var_func(stutter_efficiency_z8, double, mode_lib->mp.Z8StutterEfficiency);
333 dml_get_var_func(stutter_num_bursts_z8, double, mode_lib->mp.Z8NumberOfStutterBurstsPerFrame);
334 dml_get_var_func(stutter_period, double, mode_lib->mp.StutterPeriod);
335 dml_get_var_func(stutter_efficiency_z8_bestcase, double, mode_lib->mp.Z8StutterEfficiencyBestCase);
336 dml_get_var_func(stutter_num_bursts_z8_bestcase, double, mode_lib->mp.Z8NumberOfStutterBurstsPerFrameBestCase);
337 dml_get_var_func(stutter_period_bestcase, double, mode_lib->mp.StutterPeriodBestCase);
338 dml_get_var_func(fclk_change_latency, double, mode_lib->mp.MaxActiveFCLKChangeLatencySupported);
339 dml_get_var_func(global_dppclk_khz, double, mode_lib->mp.GlobalDPPCLK * 1000.0);
340
341 dml_get_var_func(sys_active_avg_bw_required_sdp, double, mode_lib->ms.support.avg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]);
342 dml_get_var_func(sys_active_avg_bw_required_dram, double, mode_lib->ms.support.avg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram]);
343
344 dml_get_var_func(svp_prefetch_avg_bw_required_sdp, double, mode_lib->ms.support.avg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp]);
345 dml_get_var_func(svp_prefetch_avg_bw_required_dram, double, mode_lib->ms.support.avg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram]);
346
347 dml_get_var_func(sys_active_avg_bw_available_sdp, double, mode_lib->mp.avg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]);
348 dml_get_var_func(sys_active_avg_bw_available_dram, double, mode_lib->mp.avg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram]);
349
350 dml_get_var_func(svp_prefetch_avg_bw_available_sdp, double, mode_lib->mp.avg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp]);
351 dml_get_var_func(svp_prefetch_avg_bw_available_dram, double, mode_lib->mp.avg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram]);
352
353 dml_get_var_func(sys_active_urg_bw_available_sdp, double, mode_lib->mp.urg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]);
354 dml_get_var_func(sys_active_urg_bw_available_dram, double, mode_lib->mp.urg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram]);
355 dml_get_var_func(sys_active_urg_bw_available_dram_vm_only, double, mode_lib->mp.urg_bandwidth_available_vm_only[dml2_core_internal_soc_state_sys_active]);
356
357 dml_get_var_func(svp_prefetch_urg_bw_available_sdp, double, mode_lib->mp.urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp]);
358 dml_get_var_func(svp_prefetch_urg_bw_available_dram, double, mode_lib->mp.urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram]);
359 dml_get_var_func(svp_prefetch_urg_bw_available_dram_vm_only, double, mode_lib->mp.urg_bandwidth_available_vm_only[dml2_core_internal_soc_state_svp_prefetch]);
360
361 dml_get_var_func(urgent_latency, double, mode_lib->mp.UrgentLatency);
362 dml_get_var_func(max_urgent_latency_us, double, mode_lib->ms.support.max_urgent_latency_us);
363 dml_get_var_func(max_non_urgent_latency_us, double, mode_lib->ms.support.max_non_urgent_latency_us);
364 dml_get_var_func(avg_non_urgent_latency_us, double, mode_lib->ms.support.avg_non_urgent_latency_us);
365 dml_get_var_func(avg_urgent_latency_us, double, mode_lib->ms.support.avg_urgent_latency_us);
366
367 dml_get_var_func(sys_active_urg_bw_required_sdp, double, mode_lib->mp.urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]);
368 dml_get_var_func(sys_active_urg_bw_required_dram, double, mode_lib->mp.urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram]);
369 dml_get_var_func(svp_prefetch_urg_bw_required_sdp, double, mode_lib->mp.urg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp]);
370 dml_get_var_func(svp_prefetch_urg_bw_required_dram, double, mode_lib->mp.urg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram]);
371
372 dml_get_var_func(sys_active_non_urg_required_sdp, double, mode_lib->mp.non_urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]);
373 dml_get_var_func(sys_active_non_urg_required_dram, double, mode_lib->mp.non_urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram]);
374 dml_get_var_func(svp_prefetch_non_urg_bw_required_sdp, double, mode_lib->mp.non_urg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp]);
375 dml_get_var_func(svp_prefetch_non_urg_bw_required_dram, double, mode_lib->mp.non_urg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram]);
376
377 dml_get_var_func(sys_active_urg_bw_required_sdp_flip, double, mode_lib->mp.urg_bandwidth_required_flip[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]);
378 dml_get_var_func(sys_active_urg_bw_required_dram_flip, double, mode_lib->mp.urg_bandwidth_required_flip[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram]);
379 dml_get_var_func(svp_prefetch_urg_bw_required_sdp_flip, double, mode_lib->mp.urg_bandwidth_required_flip[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp]);
380 dml_get_var_func(svp_prefetch_urg_bw_required_dram_flip, double, mode_lib->mp.urg_bandwidth_required_flip[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram]);
381
382 dml_get_var_func(sys_active_non_urg_required_sdp_flip, double, mode_lib->mp.non_urg_bandwidth_required_flip[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]);
383 dml_get_var_func(sys_active_non_urg_required_dram_flip, double, mode_lib->mp.non_urg_bandwidth_required_flip[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram]);
384 dml_get_var_func(svp_prefetch_non_urg_bw_required_sdp_flip, double, mode_lib->mp.non_urg_bandwidth_required_flip[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp]);
385 dml_get_var_func(svp_prefetch_non_urg_bw_required_dram_flip, double, mode_lib->mp.non_urg_bandwidth_required_flip[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram]);
386
387 dml_get_var_func(comp_buffer_size_kbytes, unsigned int, mode_lib->mp.CompressedBufferSizeInkByte);
388
389 dml_get_var_func(unbounded_request_enabled, bool, mode_lib->mp.UnboundedRequestEnabled);
390 dml_get_var_func(wm_writeback_urgent, double, mode_lib->mp.Watermark.WritebackUrgentWatermark);
391 dml_get_var_func(cstate_max_cap_mode, bool, mode_lib->mp.DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE);
392 dml_get_var_func(compbuf_reserved_space_64b, unsigned int, mode_lib->mp.compbuf_reserved_space_64b);
393 dml_get_var_func(hw_debug5, bool, mode_lib->mp.hw_debug5);
394 dml_get_var_func(dcfclk_deep_sleep_hysteresis, unsigned int, mode_lib->mp.dcfclk_deep_sleep_hysteresis);
395
CalculateMaxDETAndMinCompressedBufferSize(unsigned int ConfigReturnBufferSizeInKByte,unsigned int ConfigReturnBufferSegmentSizeInKByte,unsigned int ROBBufferSizeInKByte,unsigned int MaxNumDPP,unsigned int nomDETInKByteOverrideEnable,unsigned int nomDETInKByteOverrideValue,bool is_mrq_present,unsigned int * MaxTotalDETInKByte,unsigned int * nomDETInKByte,unsigned int * MinCompressedBufferSizeInKByte)396 static void CalculateMaxDETAndMinCompressedBufferSize(
397 unsigned int ConfigReturnBufferSizeInKByte,
398 unsigned int ConfigReturnBufferSegmentSizeInKByte,
399 unsigned int ROBBufferSizeInKByte,
400 unsigned int MaxNumDPP,
401 unsigned int nomDETInKByteOverrideEnable, // VBA_DELTA, allow DV to override default DET size
402 unsigned int nomDETInKByteOverrideValue, // VBA_DELTA
403 bool is_mrq_present,
404
405 // Output
406 unsigned int *MaxTotalDETInKByte,
407 unsigned int *nomDETInKByte,
408 unsigned int *MinCompressedBufferSizeInKByte)
409 {
410 if (is_mrq_present)
411 *MaxTotalDETInKByte = (unsigned int) math_ceil2((double)(ConfigReturnBufferSizeInKByte + ROBBufferSizeInKByte)*4/5, 64);
412 else
413 *MaxTotalDETInKByte = ConfigReturnBufferSizeInKByte - ConfigReturnBufferSegmentSizeInKByte;
414
415 *nomDETInKByte = (unsigned int)(math_floor2((double)*MaxTotalDETInKByte / (double)MaxNumDPP, ConfigReturnBufferSegmentSizeInKByte));
416 *MinCompressedBufferSizeInKByte = ConfigReturnBufferSizeInKByte - *MaxTotalDETInKByte;
417
418 #if defined(__DML_VBA_DEBUG__)
419 dml2_printf("DML::%s: is_mrq_present = %u\n", __func__, is_mrq_present);
420 dml2_printf("DML::%s: ConfigReturnBufferSizeInKByte = %u\n", __func__, ConfigReturnBufferSizeInKByte);
421 dml2_printf("DML::%s: ROBBufferSizeInKByte = %u\n", __func__, ROBBufferSizeInKByte);
422 dml2_printf("DML::%s: MaxNumDPP = %u\n", __func__, MaxNumDPP);
423 dml2_printf("DML::%s: MaxTotalDETInKByte = %u\n", __func__, *MaxTotalDETInKByte);
424 dml2_printf("DML::%s: nomDETInKByte = %u\n", __func__, *nomDETInKByte);
425 dml2_printf("DML::%s: MinCompressedBufferSizeInKByte = %u\n", __func__, *MinCompressedBufferSizeInKByte);
426 #endif
427
428 if (nomDETInKByteOverrideEnable) {
429 *nomDETInKByte = nomDETInKByteOverrideValue;
430 dml2_printf("DML::%s: nomDETInKByte = %u (overrided)\n", __func__, *nomDETInKByte);
431 }
432 }
433
PixelClockAdjustmentForProgressiveToInterlaceUnit(const struct dml2_display_cfg * display_cfg,bool ptoi_supported,double * PixelClockBackEnd)434 static void PixelClockAdjustmentForProgressiveToInterlaceUnit(const struct dml2_display_cfg *display_cfg, bool ptoi_supported, double *PixelClockBackEnd)
435 {
436 //unsigned int num_active_planes = display_cfg->num_planes;
437
438 //Progressive To Interlace Unit Effect
439 for (unsigned int k = 0; k < display_cfg->num_planes; ++k) {
440 PixelClockBackEnd[k] = ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
441 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.interlaced == 1 && ptoi_supported == true) {
442 // FIXME_STAGE2... can sw pass the pixel rate for interlaced directly
443 //display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz = 2 * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz;
444 }
445 }
446 }
447
dml_is_420(enum dml2_source_format_class source_format)448 static bool dml_is_420(enum dml2_source_format_class source_format)
449 {
450 bool val = false;
451
452 switch (source_format) {
453 case dml2_444_8:
454 val = 0;
455 break;
456 case dml2_444_16:
457 val = 0;
458 break;
459 case dml2_444_32:
460 val = 0;
461 break;
462 case dml2_444_64:
463 val = 0;
464 break;
465 case dml2_420_8:
466 val = 1;
467 break;
468 case dml2_420_10:
469 val = 1;
470 break;
471 case dml2_420_12:
472 val = 1;
473 break;
474 case dml2_422_planar_8:
475 val = 0;
476 break;
477 case dml2_422_planar_10:
478 val = 0;
479 break;
480 case dml2_422_planar_12:
481 val = 0;
482 break;
483 case dml2_422_packed_8:
484 val = 0;
485 break;
486 case dml2_422_packed_10:
487 val = 0;
488 break;
489 case dml2_422_packed_12:
490 val = 0;
491 break;
492 case dml2_rgbe_alpha:
493 val = 0;
494 break;
495 case dml2_rgbe:
496 val = 0;
497 break;
498 case dml2_mono_8:
499 val = 0;
500 break;
501 case dml2_mono_16:
502 val = 0;
503 break;
504 default:
505 DML2_ASSERT(0);
506 break;
507 }
508 return val;
509 }
510
dml_get_tile_block_size_bytes(enum dml2_swizzle_mode sw_mode)511 static unsigned int dml_get_tile_block_size_bytes(enum dml2_swizzle_mode sw_mode)
512 {
513 if (sw_mode == dml2_sw_linear)
514 return 256;
515 else if (sw_mode == dml2_sw_256b_2d)
516 return 256;
517 else if (sw_mode == dml2_sw_4kb_2d)
518 return 4096;
519 else if (sw_mode == dml2_sw_64kb_2d)
520 return 65536;
521 else if (sw_mode == dml2_sw_256kb_2d)
522 return 262144;
523 else if (sw_mode == dml2_gfx11_sw_linear)
524 return 256;
525 else if (sw_mode == dml2_gfx11_sw_64kb_d)
526 return 65536;
527 else if (sw_mode == dml2_gfx11_sw_64kb_d_t)
528 return 65536;
529 else if (sw_mode == dml2_gfx11_sw_64kb_d_x)
530 return 65536;
531 else if (sw_mode == dml2_gfx11_sw_64kb_r_x)
532 return 65536;
533 else if (sw_mode == dml2_gfx11_sw_256kb_d_x)
534 return 262144;
535 else if (sw_mode == dml2_gfx11_sw_256kb_r_x)
536 return 262144;
537 else {
538 DML2_ASSERT(0);
539 return 256;
540 }
541 }
542
dml_is_vertical_rotation(enum dml2_rotation_angle Scan)543 static bool dml_is_vertical_rotation(enum dml2_rotation_angle Scan)
544 {
545 bool is_vert = false;
546 if (Scan == dml2_rotation_90 || Scan == dml2_rotation_270) {
547 is_vert = true;
548 } else {
549 is_vert = false;
550 }
551 return is_vert;
552 }
553
dml_get_gfx_version(enum dml2_swizzle_mode sw_mode)554 static int unsigned dml_get_gfx_version(enum dml2_swizzle_mode sw_mode)
555 {
556 int unsigned version = 0;
557
558 if (sw_mode == dml2_sw_linear ||
559 sw_mode == dml2_sw_256b_2d ||
560 sw_mode == dml2_sw_4kb_2d ||
561 sw_mode == dml2_sw_64kb_2d ||
562 sw_mode == dml2_sw_256kb_2d) {
563 version = 12;
564 } else if (sw_mode == dml2_gfx11_sw_linear ||
565 sw_mode == dml2_gfx11_sw_64kb_d ||
566 sw_mode == dml2_gfx11_sw_64kb_d_t ||
567 sw_mode == dml2_gfx11_sw_64kb_d_x ||
568 sw_mode == dml2_gfx11_sw_64kb_r_x ||
569 sw_mode == dml2_gfx11_sw_256kb_d_x ||
570 sw_mode == dml2_gfx11_sw_256kb_r_x) {
571 version = 11;
572 } else {
573 dml2_printf("ERROR: Invalid sw_mode setting! val=%u\n", sw_mode);
574 DML2_ASSERT(0);
575 }
576
577 return version;
578 }
579
CalculateBytePerPixelAndBlockSizes(enum dml2_source_format_class SourcePixelFormat,enum dml2_swizzle_mode SurfaceTiling,unsigned int pitch_y,unsigned int pitch_c,unsigned int * BytePerPixelY,unsigned int * BytePerPixelC,double * BytePerPixelDETY,double * BytePerPixelDETC,unsigned int * BlockHeight256BytesY,unsigned int * BlockHeight256BytesC,unsigned int * BlockWidth256BytesY,unsigned int * BlockWidth256BytesC,unsigned int * MacroTileHeightY,unsigned int * MacroTileHeightC,unsigned int * MacroTileWidthY,unsigned int * MacroTileWidthC,bool * surf_linear128_l,bool * surf_linear128_c)580 static void CalculateBytePerPixelAndBlockSizes(
581 enum dml2_source_format_class SourcePixelFormat,
582 enum dml2_swizzle_mode SurfaceTiling,
583 unsigned int pitch_y,
584 unsigned int pitch_c,
585
586 // Output
587 unsigned int *BytePerPixelY,
588 unsigned int *BytePerPixelC,
589 double *BytePerPixelDETY,
590 double *BytePerPixelDETC,
591 unsigned int *BlockHeight256BytesY,
592 unsigned int *BlockHeight256BytesC,
593 unsigned int *BlockWidth256BytesY,
594 unsigned int *BlockWidth256BytesC,
595 unsigned int *MacroTileHeightY,
596 unsigned int *MacroTileHeightC,
597 unsigned int *MacroTileWidthY,
598 unsigned int *MacroTileWidthC,
599 bool *surf_linear128_l,
600 bool *surf_linear128_c)
601 {
602 *BytePerPixelDETY = 0;
603 *BytePerPixelDETC = 0;
604 *BytePerPixelY = 1;
605 *BytePerPixelC = 1;
606
607 if (SourcePixelFormat == dml2_444_64) {
608 *BytePerPixelDETY = 8;
609 *BytePerPixelDETC = 0;
610 *BytePerPixelY = 8;
611 *BytePerPixelC = 0;
612 } else if (SourcePixelFormat == dml2_444_32 || SourcePixelFormat == dml2_rgbe) {
613 *BytePerPixelDETY = 4;
614 *BytePerPixelDETC = 0;
615 *BytePerPixelY = 4;
616 *BytePerPixelC = 0;
617 } else if (SourcePixelFormat == dml2_444_16 || SourcePixelFormat == dml2_mono_16) {
618 *BytePerPixelDETY = 2;
619 *BytePerPixelDETC = 0;
620 *BytePerPixelY = 2;
621 *BytePerPixelC = 0;
622 } else if (SourcePixelFormat == dml2_444_8 || SourcePixelFormat == dml2_mono_8) {
623 *BytePerPixelDETY = 1;
624 *BytePerPixelDETC = 0;
625 *BytePerPixelY = 1;
626 *BytePerPixelC = 0;
627 } else if (SourcePixelFormat == dml2_rgbe_alpha) {
628 *BytePerPixelDETY = 4;
629 *BytePerPixelDETC = 1;
630 *BytePerPixelY = 4;
631 *BytePerPixelC = 1;
632 } else if (SourcePixelFormat == dml2_420_8) {
633 *BytePerPixelDETY = 1;
634 *BytePerPixelDETC = 2;
635 *BytePerPixelY = 1;
636 *BytePerPixelC = 2;
637 } else if (SourcePixelFormat == dml2_420_12) {
638 *BytePerPixelDETY = 2;
639 *BytePerPixelDETC = 4;
640 *BytePerPixelY = 2;
641 *BytePerPixelC = 4;
642 } else if (SourcePixelFormat == dml2_420_10) {
643 *BytePerPixelDETY = (double)(4.0 / 3);
644 *BytePerPixelDETC = (double)(8.0 / 3);
645 *BytePerPixelY = 2;
646 *BytePerPixelC = 4;
647 } else {
648 dml2_printf("ERROR: DML::%s: SourcePixelFormat = %u not supported!\n", __func__, SourcePixelFormat);
649 DML2_ASSERT(0);
650 }
651
652 #ifdef __DML_VBA_DEBUG__
653 dml2_printf("DML::%s: SourcePixelFormat = %u\n", __func__, SourcePixelFormat);
654 dml2_printf("DML::%s: BytePerPixelDETY = %f\n", __func__, *BytePerPixelDETY);
655 dml2_printf("DML::%s: BytePerPixelDETC = %f\n", __func__, *BytePerPixelDETC);
656 dml2_printf("DML::%s: BytePerPixelY = %u\n", __func__, *BytePerPixelY);
657 dml2_printf("DML::%s: BytePerPixelC = %u\n", __func__, *BytePerPixelC);
658 dml2_printf("DML::%s: pitch_y = %u\n", __func__, pitch_y);
659 dml2_printf("DML::%s: pitch_c = %u\n", __func__, pitch_c);
660 dml2_printf("DML::%s: surf_linear128_l = %u\n", __func__, *surf_linear128_l);
661 dml2_printf("DML::%s: surf_linear128_c = %u\n", __func__, *surf_linear128_c);
662 #endif
663
664 if (dml_get_gfx_version(SurfaceTiling) == 11) {
665 *surf_linear128_l = 0;
666 *surf_linear128_c = 0;
667 } else {
668 if (SurfaceTiling == dml2_sw_linear) {
669 *surf_linear128_l = (((pitch_y * *BytePerPixelY) % 256) != 0);
670
671 if (dml_is_420(SourcePixelFormat) || SourcePixelFormat == dml2_rgbe_alpha)
672 *surf_linear128_c = (((pitch_c * *BytePerPixelC) % 256) != 0);
673 }
674 }
675
676 if (!(dml_is_420(SourcePixelFormat) || SourcePixelFormat == dml2_rgbe_alpha)) {
677 if (SurfaceTiling == dml2_sw_linear) {
678 *BlockHeight256BytesY = 1;
679 } else if (SourcePixelFormat == dml2_444_64) {
680 *BlockHeight256BytesY = 4;
681 } else if (SourcePixelFormat == dml2_444_8) {
682 *BlockHeight256BytesY = 16;
683 } else {
684 *BlockHeight256BytesY = 8;
685 }
686 *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
687 *BlockHeight256BytesC = 0;
688 *BlockWidth256BytesC = 0;
689 } else { // dual plane
690 if (SurfaceTiling == dml2_sw_linear) {
691 *BlockHeight256BytesY = 1;
692 *BlockHeight256BytesC = 1;
693 } else if (SourcePixelFormat == dml2_rgbe_alpha) {
694 *BlockHeight256BytesY = 8;
695 *BlockHeight256BytesC = 16;
696 } else if (SourcePixelFormat == dml2_420_8) {
697 *BlockHeight256BytesY = 16;
698 *BlockHeight256BytesC = 8;
699 } else {
700 *BlockHeight256BytesY = 8;
701 *BlockHeight256BytesC = 8;
702 }
703 *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
704 *BlockWidth256BytesC = 256U / *BytePerPixelC / *BlockHeight256BytesC;
705 }
706 #ifdef __DML_VBA_DEBUG__
707 dml2_printf("DML::%s: BlockWidth256BytesY = %u\n", __func__, *BlockWidth256BytesY);
708 dml2_printf("DML::%s: BlockHeight256BytesY = %u\n", __func__, *BlockHeight256BytesY);
709 dml2_printf("DML::%s: BlockWidth256BytesC = %u\n", __func__, *BlockWidth256BytesC);
710 dml2_printf("DML::%s: BlockHeight256BytesC = %u\n", __func__, *BlockHeight256BytesC);
711 #endif
712
713 if (dml_get_gfx_version(SurfaceTiling) == 11) {
714 if (SurfaceTiling == dml2_gfx11_sw_linear) {
715 *MacroTileHeightY = *BlockHeight256BytesY;
716 *MacroTileWidthY = 256 / *BytePerPixelY / *MacroTileHeightY;
717 *MacroTileHeightC = *BlockHeight256BytesC;
718 if (*MacroTileHeightC == 0) {
719 *MacroTileWidthC = 0;
720 } else {
721 *MacroTileWidthC = 256 / *BytePerPixelC / *MacroTileHeightC;
722 }
723 } else if (SurfaceTiling == dml2_gfx11_sw_64kb_d || SurfaceTiling == dml2_gfx11_sw_64kb_d_t || SurfaceTiling == dml2_gfx11_sw_64kb_d_x || SurfaceTiling == dml2_gfx11_sw_64kb_r_x) {
724 *MacroTileHeightY = 16 * *BlockHeight256BytesY;
725 *MacroTileWidthY = 65536 / *BytePerPixelY / *MacroTileHeightY;
726 *MacroTileHeightC = 16 * *BlockHeight256BytesC;
727 if (*MacroTileHeightC == 0) {
728 *MacroTileWidthC = 0;
729 } else {
730 *MacroTileWidthC = 65536 / *BytePerPixelC / *MacroTileHeightC;
731 }
732 } else {
733 *MacroTileHeightY = 32 * *BlockHeight256BytesY;
734 *MacroTileWidthY = 65536 * 4 / *BytePerPixelY / *MacroTileHeightY;
735 *MacroTileHeightC = 32 * *BlockHeight256BytesC;
736 if (*MacroTileHeightC == 0) {
737 *MacroTileWidthC = 0;
738 } else {
739 *MacroTileWidthC = 65536 * 4 / *BytePerPixelC / *MacroTileHeightC;
740 }
741 }
742 } else {
743 unsigned int macro_tile_size_bytes = dml_get_tile_block_size_bytes(SurfaceTiling);
744 unsigned int macro_tile_scale = 1; // macro tile to 256B req scaling
745
746 if (SurfaceTiling == dml2_sw_linear) {
747 macro_tile_scale = 1;
748 } else if (SurfaceTiling == dml2_sw_4kb_2d) {
749 macro_tile_scale = 4;
750 } else if (SurfaceTiling == dml2_sw_64kb_2d) {
751 macro_tile_scale = 16;
752 } else if (SurfaceTiling == dml2_sw_256kb_2d) {
753 macro_tile_scale = 32;
754 } else {
755 dml2_printf("ERROR: Invalid SurfaceTiling setting! val=%u\n", SurfaceTiling);
756 DML2_ASSERT(0);
757 }
758
759 *MacroTileHeightY = macro_tile_scale * *BlockHeight256BytesY;
760 *MacroTileWidthY = macro_tile_size_bytes / *BytePerPixelY / *MacroTileHeightY;
761 *MacroTileHeightC = macro_tile_scale * *BlockHeight256BytesC;
762 if (*MacroTileHeightC == 0) {
763 *MacroTileWidthC = 0;
764 } else {
765 *MacroTileWidthC = macro_tile_size_bytes / *BytePerPixelC / *MacroTileHeightC;
766 }
767 }
768
769 #ifdef __DML_VBA_DEBUG__
770 dml2_printf("DML::%s: MacroTileWidthY = %u\n", __func__, *MacroTileWidthY);
771 dml2_printf("DML::%s: MacroTileHeightY = %u\n", __func__, *MacroTileHeightY);
772 dml2_printf("DML::%s: MacroTileWidthC = %u\n", __func__, *MacroTileWidthC);
773 dml2_printf("DML::%s: MacroTileHeightC = %u\n", __func__, *MacroTileHeightC);
774 #endif
775 }
776
CalculateSinglePipeDPPCLKAndSCLThroughput(double HRatio,double HRatioChroma,double VRatio,double VRatioChroma,double MaxDCHUBToPSCLThroughput,double MaxPSCLToLBThroughput,double PixelClock,enum dml2_source_format_class SourcePixelFormat,unsigned int HTaps,unsigned int HTapsChroma,unsigned int VTaps,unsigned int VTapsChroma,double * PSCL_THROUGHPUT,double * PSCL_THROUGHPUT_CHROMA,double * DPPCLKUsingSingleDPP)777 static void CalculateSinglePipeDPPCLKAndSCLThroughput(
778 double HRatio,
779 double HRatioChroma,
780 double VRatio,
781 double VRatioChroma,
782 double MaxDCHUBToPSCLThroughput,
783 double MaxPSCLToLBThroughput,
784 double PixelClock,
785 enum dml2_source_format_class SourcePixelFormat,
786 unsigned int HTaps,
787 unsigned int HTapsChroma,
788 unsigned int VTaps,
789 unsigned int VTapsChroma,
790
791 // Output
792 double *PSCL_THROUGHPUT,
793 double *PSCL_THROUGHPUT_CHROMA,
794 double *DPPCLKUsingSingleDPP)
795 {
796 double DPPCLKUsingSingleDPPLuma;
797 double DPPCLKUsingSingleDPPChroma;
798
799 if (HRatio > 1) {
800 *PSCL_THROUGHPUT = math_min2(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput * HRatio / math_ceil2((double)HTaps / 6.0, 1.0));
801 } else {
802 *PSCL_THROUGHPUT = math_min2(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput);
803 }
804
805 DPPCLKUsingSingleDPPLuma = PixelClock * math_max3(VTaps / 6 * math_min2(1, HRatio), HRatio * VRatio / *PSCL_THROUGHPUT, 1);
806
807 if ((HTaps > 6 || VTaps > 6) && DPPCLKUsingSingleDPPLuma < 2 * PixelClock)
808 DPPCLKUsingSingleDPPLuma = 2 * PixelClock;
809
810 if (!dml_is_420(SourcePixelFormat) && SourcePixelFormat != dml2_rgbe_alpha) {
811 *PSCL_THROUGHPUT_CHROMA = 0;
812 *DPPCLKUsingSingleDPP = DPPCLKUsingSingleDPPLuma;
813 } else {
814 if (HRatioChroma > 1) {
815 *PSCL_THROUGHPUT_CHROMA = math_min2(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput * HRatioChroma / math_ceil2((double)HTapsChroma / 6.0, 1.0));
816 } else {
817 *PSCL_THROUGHPUT_CHROMA = math_min2(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput);
818 }
819 DPPCLKUsingSingleDPPChroma = PixelClock * math_max3(VTapsChroma / 6 * math_min2(1, HRatioChroma),
820 HRatioChroma * VRatioChroma / *PSCL_THROUGHPUT_CHROMA, 1);
821 if ((HTapsChroma > 6 || VTapsChroma > 6) && DPPCLKUsingSingleDPPChroma < 2 * PixelClock)
822 DPPCLKUsingSingleDPPChroma = 2 * PixelClock;
823 *DPPCLKUsingSingleDPP = math_max2(DPPCLKUsingSingleDPPLuma, DPPCLKUsingSingleDPPChroma);
824 }
825 }
826
CalculateSwathWidth(const struct dml2_display_cfg * display_cfg,bool ForceSingleDPP,unsigned int NumberOfActiveSurfaces,enum dml2_odm_mode ODMMode[],unsigned int BytePerPixY[],unsigned int BytePerPixC[],unsigned int Read256BytesBlockHeightY[],unsigned int Read256BytesBlockHeightC[],unsigned int Read256BytesBlockWidthY[],unsigned int Read256BytesBlockWidthC[],bool surf_linear128_l[],bool surf_linear128_c[],unsigned int DPPPerSurface[],unsigned int req_per_swath_ub_l[],unsigned int req_per_swath_ub_c[],unsigned int SwathWidthSingleDPPY[],unsigned int SwathWidthSingleDPPC[],unsigned int SwathWidthY[],unsigned int SwathWidthC[],unsigned int MaximumSwathHeightY[],unsigned int MaximumSwathHeightC[],unsigned int swath_width_luma_ub[],unsigned int swath_width_chroma_ub[])827 static void CalculateSwathWidth(
828 const struct dml2_display_cfg *display_cfg,
829 bool ForceSingleDPP,
830 unsigned int NumberOfActiveSurfaces,
831 enum dml2_odm_mode ODMMode[],
832 unsigned int BytePerPixY[],
833 unsigned int BytePerPixC[],
834 unsigned int Read256BytesBlockHeightY[],
835 unsigned int Read256BytesBlockHeightC[],
836 unsigned int Read256BytesBlockWidthY[],
837 unsigned int Read256BytesBlockWidthC[],
838 bool surf_linear128_l[],
839 bool surf_linear128_c[],
840 unsigned int DPPPerSurface[],
841
842 // Output
843 unsigned int req_per_swath_ub_l[],
844 unsigned int req_per_swath_ub_c[],
845 unsigned int SwathWidthSingleDPPY[], // post-rotated plane width
846 unsigned int SwathWidthSingleDPPC[],
847 unsigned int SwathWidthY[], // per-pipe
848 unsigned int SwathWidthC[], // per-pipe
849 unsigned int MaximumSwathHeightY[],
850 unsigned int MaximumSwathHeightC[],
851 unsigned int swath_width_luma_ub[], // per-pipe
852 unsigned int swath_width_chroma_ub[]) // per-pipe
853 {
854 enum dml2_odm_mode MainSurfaceODMMode;
855 double odm_hactive_factor = 1.0;
856 unsigned int req_width_horz_y;
857 unsigned int req_width_horz_c;
858 unsigned int surface_width_ub_l;
859 unsigned int surface_height_ub_l;
860 unsigned int surface_width_ub_c;
861 unsigned int surface_height_ub_c;
862
863 #ifdef __DML_VBA_DEBUG__
864 dml2_printf("DML::%s: ForceSingleDPP = %u\n", __func__, ForceSingleDPP);
865 dml2_printf("DML::%s: NumberOfActiveSurfaces = %u\n", __func__, NumberOfActiveSurfaces);
866 #endif
867
868 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
869 if (!dml_is_vertical_rotation(display_cfg->plane_descriptors[k].composition.rotation_angle)) {
870 SwathWidthSingleDPPY[k] = (unsigned int)display_cfg->plane_descriptors[k].composition.viewport.plane0.width;
871 } else {
872 SwathWidthSingleDPPY[k] = (unsigned int)display_cfg->plane_descriptors[k].composition.viewport.plane0.height;
873 }
874
875 #ifdef __DML_VBA_DEBUG__
876 dml2_printf("DML::%s: k=%u ViewportWidth=%u\n", __func__, k, display_cfg->plane_descriptors[k].composition.viewport.plane0.width);
877 dml2_printf("DML::%s: k=%u ViewportHeight=%u\n", __func__, k, display_cfg->plane_descriptors[k].composition.viewport.plane0.height);
878 dml2_printf("DML::%s: k=%u DPPPerSurface=%u\n", __func__, k, DPPPerSurface[k]);
879 #endif
880
881 MainSurfaceODMMode = ODMMode[k];
882
883 if (ForceSingleDPP) {
884 SwathWidthY[k] = SwathWidthSingleDPPY[k];
885 } else {
886 if (MainSurfaceODMMode == dml2_odm_mode_combine_4to1)
887 odm_hactive_factor = 4.0;
888 else if (MainSurfaceODMMode == dml2_odm_mode_combine_3to1)
889 odm_hactive_factor = 3.0;
890 else if (MainSurfaceODMMode == dml2_odm_mode_combine_2to1)
891 odm_hactive_factor = 2.0;
892
893 if (MainSurfaceODMMode == dml2_odm_mode_combine_4to1 || MainSurfaceODMMode == dml2_odm_mode_combine_3to1 || MainSurfaceODMMode == dml2_odm_mode_combine_2to1) {
894 SwathWidthY[k] = (unsigned int)(math_min2((double)SwathWidthSingleDPPY[k], math_round((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active / odm_hactive_factor * display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio)));
895 } else if (DPPPerSurface[k] == 2) {
896 SwathWidthY[k] = SwathWidthSingleDPPY[k] / 2;
897 } else {
898 SwathWidthY[k] = SwathWidthSingleDPPY[k];
899 }
900 }
901
902 #ifdef __DML_VBA_DEBUG__
903 dml2_printf("DML::%s: k=%u HActive=%u\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active);
904 dml2_printf("DML::%s: k=%u HRatio=%f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio);
905 dml2_printf("DML::%s: k=%u MainSurfaceODMMode=%u\n", __func__, k, MainSurfaceODMMode);
906 dml2_printf("DML::%s: k=%u SwathWidthSingleDPPY=%u\n", __func__, k, SwathWidthSingleDPPY[k]);
907 dml2_printf("DML::%s: k=%u SwathWidthY=%u\n", __func__, k, SwathWidthY[k]);
908 #endif
909
910 if (dml_is_420(display_cfg->plane_descriptors[k].pixel_format)) {
911 SwathWidthC[k] = SwathWidthY[k] / 2;
912 SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k] / 2;
913 } else {
914 SwathWidthC[k] = SwathWidthY[k];
915 SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k];
916 }
917
918 if (ForceSingleDPP == true) {
919 SwathWidthY[k] = SwathWidthSingleDPPY[k];
920 SwathWidthC[k] = SwathWidthSingleDPPC[k];
921 }
922
923 req_width_horz_y = Read256BytesBlockWidthY[k];
924 req_width_horz_c = Read256BytesBlockWidthC[k];
925
926 if (surf_linear128_l[k])
927 req_width_horz_y = req_width_horz_y / 2;
928
929 if (surf_linear128_c[k])
930 req_width_horz_c = req_width_horz_c / 2;
931
932 surface_width_ub_l = (unsigned int)math_ceil2((double)display_cfg->plane_descriptors[k].surface.plane0.width, req_width_horz_y);
933 surface_height_ub_l = (unsigned int)math_ceil2((double)display_cfg->plane_descriptors[k].surface.plane0.height, Read256BytesBlockHeightY[k]);
934 surface_width_ub_c = (unsigned int)math_ceil2((double)display_cfg->plane_descriptors[k].surface.plane1.width, req_width_horz_c);
935 surface_height_ub_c = (unsigned int)math_ceil2((double)display_cfg->plane_descriptors[k].surface.plane1.height, Read256BytesBlockHeightC[k]);
936
937 #ifdef __DML_VBA_DEBUG__
938 dml2_printf("DML::%s: k=%u surface_width_ub_l=%u\n", __func__, k, surface_width_ub_l);
939 dml2_printf("DML::%s: k=%u surface_height_ub_l=%u\n", __func__, k, surface_height_ub_l);
940 dml2_printf("DML::%s: k=%u surface_width_ub_c=%u\n", __func__, k, surface_width_ub_c);
941 dml2_printf("DML::%s: k=%u surface_height_ub_c=%u\n", __func__, k, surface_height_ub_c);
942 dml2_printf("DML::%s: k=%u req_width_horz_y=%u\n", __func__, k, req_width_horz_y);
943 dml2_printf("DML::%s: k=%u req_width_horz_c=%u\n", __func__, k, req_width_horz_c);
944 dml2_printf("DML::%s: k=%u Read256BytesBlockWidthY=%u\n", __func__, k, Read256BytesBlockWidthY[k]);
945 dml2_printf("DML::%s: k=%u Read256BytesBlockHeightY=%u\n", __func__, k, Read256BytesBlockHeightY[k]);
946 dml2_printf("DML::%s: k=%u Read256BytesBlockWidthC=%u\n", __func__, k, Read256BytesBlockWidthC[k]);
947 dml2_printf("DML::%s: k=%u Read256BytesBlockHeightC=%u\n", __func__, k, Read256BytesBlockHeightC[k]);
948 dml2_printf("DML::%s: k=%u req_width_horz_y=%u\n", __func__, k, req_width_horz_y);
949 dml2_printf("DML::%s: k=%u req_width_horz_c=%u\n", __func__, k, req_width_horz_c);
950 dml2_printf("DML::%s: k=%u ViewportStationary=%u\n", __func__, k, display_cfg->plane_descriptors[k].composition.viewport.stationary);
951 dml2_printf("DML::%s: k=%u DPPPerSurface=%u\n", __func__, k, DPPPerSurface[k]);
952 #endif
953
954 req_per_swath_ub_l[k] = 0;
955 req_per_swath_ub_c[k] = 0;
956 if (!dml_is_vertical_rotation(display_cfg->plane_descriptors[k].composition.rotation_angle)) {
957 MaximumSwathHeightY[k] = Read256BytesBlockHeightY[k];
958 MaximumSwathHeightC[k] = Read256BytesBlockHeightC[k];
959 if (display_cfg->plane_descriptors[k].composition.viewport.stationary && DPPPerSurface[k] == 1) {
960 swath_width_luma_ub[k] = (unsigned int)(math_min2(surface_width_ub_l, math_floor2(display_cfg->plane_descriptors[k].composition.viewport.plane0.x_start + SwathWidthY[k] + req_width_horz_y - 1, req_width_horz_y) - math_floor2(display_cfg->plane_descriptors[k].composition.viewport.plane0.x_start, req_width_horz_y)));
961 } else {
962 swath_width_luma_ub[k] = (unsigned int)(math_min2(surface_width_ub_l, math_ceil2((double)SwathWidthY[k] - 1, req_width_horz_y) + req_width_horz_y));
963 }
964 req_per_swath_ub_l[k] = swath_width_luma_ub[k] / req_width_horz_y;
965
966 if (BytePerPixC[k] > 0) {
967 if (display_cfg->plane_descriptors[k].composition.viewport.stationary && DPPPerSurface[k] == 1) {
968 swath_width_chroma_ub[k] = (unsigned int)(math_min2(surface_width_ub_c, math_floor2(display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start + SwathWidthC[k] + req_width_horz_c - 1, req_width_horz_c) - math_floor2(display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start, req_width_horz_c)));
969 } else {
970 swath_width_chroma_ub[k] = (unsigned int)(math_min2(surface_width_ub_c, math_ceil2((double)SwathWidthC[k] - 1, req_width_horz_c) + req_width_horz_c));
971 }
972 req_per_swath_ub_c[k] = swath_width_chroma_ub[k] / req_width_horz_c;
973 } else {
974 swath_width_chroma_ub[k] = 0;
975 }
976 } else {
977 MaximumSwathHeightY[k] = Read256BytesBlockWidthY[k];
978 MaximumSwathHeightC[k] = Read256BytesBlockWidthC[k];
979
980 if (display_cfg->plane_descriptors[k].composition.viewport.stationary && DPPPerSurface[k] == 1) {
981 swath_width_luma_ub[k] = (unsigned int)(math_min2(surface_height_ub_l, math_floor2(display_cfg->plane_descriptors[k].composition.viewport.plane0.y_start + SwathWidthY[k] + Read256BytesBlockHeightY[k] - 1, Read256BytesBlockHeightY[k]) - math_floor2(display_cfg->plane_descriptors[k].composition.viewport.plane0.y_start, Read256BytesBlockHeightY[k])));
982 } else {
983 swath_width_luma_ub[k] = (unsigned int)(math_min2(surface_height_ub_l, math_ceil2((double)SwathWidthY[k] - 1, Read256BytesBlockHeightY[k]) + Read256BytesBlockHeightY[k]));
984 }
985 req_per_swath_ub_l[k] = swath_width_luma_ub[k] / Read256BytesBlockHeightY[k];
986 if (BytePerPixC[k] > 0) {
987 if (display_cfg->plane_descriptors[k].composition.viewport.stationary && DPPPerSurface[k] == 1) {
988 swath_width_chroma_ub[k] = (unsigned int)(math_min2(surface_height_ub_c, math_floor2(display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start + SwathWidthC[k] + Read256BytesBlockHeightC[k] - 1, Read256BytesBlockHeightC[k]) - math_floor2(display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start, Read256BytesBlockHeightC[k])));
989 } else {
990 swath_width_chroma_ub[k] = (unsigned int)(math_min2(surface_height_ub_c, math_ceil2((double)SwathWidthC[k] - 1, Read256BytesBlockHeightC[k]) + Read256BytesBlockHeightC[k]));
991 }
992 req_per_swath_ub_c[k] = swath_width_chroma_ub[k] / Read256BytesBlockHeightC[k];
993 } else {
994 swath_width_chroma_ub[k] = 0;
995 }
996 }
997
998 #ifdef __DML_VBA_DEBUG__
999 dml2_printf("DML::%s: k=%u swath_width_luma_ub=%u\n", __func__, k, swath_width_luma_ub[k]);
1000 dml2_printf("DML::%s: k=%u swath_width_chroma_ub=%u\n", __func__, k, swath_width_chroma_ub[k]);
1001 dml2_printf("DML::%s: k=%u MaximumSwathHeightY=%u\n", __func__, k, MaximumSwathHeightY[k]);
1002 dml2_printf("DML::%s: k=%u MaximumSwathHeightC=%u\n", __func__, k, MaximumSwathHeightC[k]);
1003 dml2_printf("DML::%s: k=%u req_per_swath_ub_l=%u\n", __func__, k, req_per_swath_ub_l[k]);
1004 dml2_printf("DML::%s: k=%u req_per_swath_ub_c=%u\n", __func__, k, req_per_swath_ub_c[k]);
1005 #endif
1006
1007 }
1008 }
1009
UnboundedRequest(bool unb_req_force_en,bool unb_req_force_val,unsigned int TotalNumberOfActiveDPP,bool NoChromaOrLinear)1010 static bool UnboundedRequest(bool unb_req_force_en, bool unb_req_force_val, unsigned int TotalNumberOfActiveDPP, bool NoChromaOrLinear)
1011 {
1012 bool unb_req_ok = false;
1013 bool unb_req_en = false;
1014
1015 unb_req_ok = (TotalNumberOfActiveDPP == 1 && NoChromaOrLinear);
1016 unb_req_en = unb_req_ok;
1017
1018 if (unb_req_force_en) {
1019 unb_req_en = unb_req_force_val && unb_req_ok;
1020 }
1021 #ifdef __DML_VBA_DEBUG__
1022 dml2_printf("DML::%s: unb_req_force_en = %u\n", __func__, unb_req_force_en);
1023 dml2_printf("DML::%s: unb_req_force_val = %u\n", __func__, unb_req_force_val);
1024 dml2_printf("DML::%s: unb_req_ok = %u\n", __func__, unb_req_ok);
1025 dml2_printf("DML::%s: unb_req_en = %u\n", __func__, unb_req_en);
1026 #endif
1027 return (unb_req_en);
1028 }
1029
CalculateDETBufferSize(struct dml2_core_shared_CalculateDETBufferSize_locals * l,const struct dml2_display_cfg * display_cfg,bool ForceSingleDPP,unsigned int NumberOfActiveSurfaces,bool UnboundedRequestEnabled,unsigned int nomDETInKByte,unsigned int MaxTotalDETInKByte,unsigned int ConfigReturnBufferSizeInKByte,unsigned int MinCompressedBufferSizeInKByte,unsigned int ConfigReturnBufferSegmentSizeInkByte,unsigned int CompressedBufferSegmentSizeInkByte,double ReadBandwidthLuma[],double ReadBandwidthChroma[],unsigned int full_swath_bytes_l[],unsigned int full_swath_bytes_c[],unsigned int DPPPerSurface[],unsigned int DETBufferSizeInKByte[],unsigned int * CompressedBufferSizeInkByte)1030 static void CalculateDETBufferSize(
1031 struct dml2_core_shared_CalculateDETBufferSize_locals *l,
1032 const struct dml2_display_cfg *display_cfg,
1033 bool ForceSingleDPP,
1034 unsigned int NumberOfActiveSurfaces,
1035 bool UnboundedRequestEnabled,
1036 unsigned int nomDETInKByte,
1037 unsigned int MaxTotalDETInKByte,
1038 unsigned int ConfigReturnBufferSizeInKByte,
1039 unsigned int MinCompressedBufferSizeInKByte,
1040 unsigned int ConfigReturnBufferSegmentSizeInkByte,
1041 unsigned int CompressedBufferSegmentSizeInkByte,
1042 double ReadBandwidthLuma[],
1043 double ReadBandwidthChroma[],
1044 unsigned int full_swath_bytes_l[],
1045 unsigned int full_swath_bytes_c[],
1046 unsigned int DPPPerSurface[],
1047 // Output
1048 unsigned int DETBufferSizeInKByte[],
1049 unsigned int *CompressedBufferSizeInkByte)
1050 {
1051 memset(l, 0, sizeof(struct dml2_core_shared_CalculateDETBufferSize_locals));
1052
1053 bool DETPieceAssignedToThisSurfaceAlready[DML2_MAX_PLANES];
1054 bool NextPotentialSurfaceToAssignDETPieceFound;
1055 bool MinimizeReallocationSuccess = false;
1056
1057 #ifdef __DML_VBA_DEBUG__
1058 dml2_printf("DML::%s: ForceSingleDPP = %u\n", __func__, ForceSingleDPP);
1059 dml2_printf("DML::%s: nomDETInKByte = %u\n", __func__, nomDETInKByte);
1060 dml2_printf("DML::%s: NumberOfActiveSurfaces = %u\n", __func__, NumberOfActiveSurfaces);
1061 dml2_printf("DML::%s: UnboundedRequestEnabled = %u\n", __func__, UnboundedRequestEnabled);
1062 dml2_printf("DML::%s: MaxTotalDETInKByte = %u\n", __func__, MaxTotalDETInKByte);
1063 dml2_printf("DML::%s: ConfigReturnBufferSizeInKByte = %u\n", __func__, ConfigReturnBufferSizeInKByte);
1064 dml2_printf("DML::%s: MinCompressedBufferSizeInKByte = %u\n", __func__, MinCompressedBufferSizeInKByte);
1065 dml2_printf("DML::%s: CompressedBufferSegmentSizeInkByte = %u\n", __func__, CompressedBufferSegmentSizeInkByte);
1066 #endif
1067
1068 // Note: Will use default det size if that fits 2 swaths
1069 if (UnboundedRequestEnabled) {
1070 if (display_cfg->plane_descriptors[0].overrides.det_size_override_kb > 0) {
1071 DETBufferSizeInKByte[0] = display_cfg->plane_descriptors[0].overrides.det_size_override_kb;
1072 } else {
1073 DETBufferSizeInKByte[0] = (unsigned int)math_max2(128.0, math_ceil2(2.0 * ((double)full_swath_bytes_l[0] + (double)full_swath_bytes_c[0]) / 1024.0, ConfigReturnBufferSegmentSizeInkByte));
1074 }
1075 *CompressedBufferSizeInkByte = ConfigReturnBufferSizeInKByte - DETBufferSizeInKByte[0];
1076 } else {
1077 l->DETBufferSizePoolInKByte = MaxTotalDETInKByte;
1078 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
1079 DETBufferSizeInKByte[k] = 0;
1080 if (dml_is_420(display_cfg->plane_descriptors[k].pixel_format)) {
1081 l->max_minDET = nomDETInKByte - ConfigReturnBufferSegmentSizeInkByte;
1082 } else {
1083 l->max_minDET = nomDETInKByte;
1084 }
1085 l->minDET = 128;
1086 l->minDET_pipe = 0;
1087
1088 // add DET resource until can hold 2 full swaths
1089 while (l->minDET <= l->max_minDET && l->minDET_pipe == 0) {
1090 if (2.0 * ((double)full_swath_bytes_l[k] + (double)full_swath_bytes_c[k]) / 1024.0 <= l->minDET)
1091 l->minDET_pipe = l->minDET;
1092 l->minDET = l->minDET + ConfigReturnBufferSegmentSizeInkByte;
1093 }
1094
1095 #ifdef __DML_VBA_DEBUG__
1096 dml2_printf("DML::%s: k=%u minDET = %u\n", __func__, k, l->minDET);
1097 dml2_printf("DML::%s: k=%u max_minDET = %u\n", __func__, k, l->max_minDET);
1098 dml2_printf("DML::%s: k=%u minDET_pipe = %u\n", __func__, k, l->minDET_pipe);
1099 dml2_printf("DML::%s: k=%u full_swath_bytes_l = %u\n", __func__, k, full_swath_bytes_l[k]);
1100 dml2_printf("DML::%s: k=%u full_swath_bytes_c = %u\n", __func__, k, full_swath_bytes_c[k]);
1101 #endif
1102
1103 if (l->minDET_pipe == 0) {
1104 l->minDET_pipe = (unsigned int)(math_max2(128, math_ceil2(((double)full_swath_bytes_l[k] + (double)full_swath_bytes_c[k]) / 1024.0, ConfigReturnBufferSegmentSizeInkByte)));
1105 #ifdef __DML_VBA_DEBUG__
1106 dml2_printf("DML::%s: k=%u minDET_pipe = %u (assume each plane take half DET)\n", __func__, k, l->minDET_pipe);
1107 #endif
1108 }
1109
1110 if (dml_is_phantom_pipe(&display_cfg->plane_descriptors[k])) {
1111 DETBufferSizeInKByte[k] = 0;
1112 } else if (display_cfg->plane_descriptors[k].overrides.det_size_override_kb > 0) {
1113 DETBufferSizeInKByte[k] = display_cfg->plane_descriptors[k].overrides.det_size_override_kb;
1114 l->DETBufferSizePoolInKByte = l->DETBufferSizePoolInKByte - (ForceSingleDPP ? 1 : DPPPerSurface[k]) * display_cfg->plane_descriptors[k].overrides.det_size_override_kb;
1115 } else if ((ForceSingleDPP ? 1 : DPPPerSurface[k]) * l->minDET_pipe <= l->DETBufferSizePoolInKByte) {
1116 DETBufferSizeInKByte[k] = l->minDET_pipe;
1117 l->DETBufferSizePoolInKByte = l->DETBufferSizePoolInKByte - (ForceSingleDPP ? 1 : DPPPerSurface[k]) * l->minDET_pipe;
1118 }
1119
1120 #ifdef __DML_VBA_DEBUG__
1121 dml2_printf("DML::%s: k=%u DPPPerSurface = %u\n", __func__, k, DPPPerSurface[k]);
1122 dml2_printf("DML::%s: k=%u DETSizeOverride = %u\n", __func__, k, display_cfg->plane_descriptors[k].overrides.det_size_override_kb);
1123 dml2_printf("DML::%s: k=%u DETBufferSizeInKByte = %u\n", __func__, k, DETBufferSizeInKByte[k]);
1124 dml2_printf("DML::%s: DETBufferSizePoolInKByte = %u\n", __func__, l->DETBufferSizePoolInKByte);
1125 #endif
1126 }
1127
1128 if (display_cfg->minimize_det_reallocation) {
1129 MinimizeReallocationSuccess = true;
1130 // To minimize det reallocation, we don't distribute based on each surfaces bandwidth proportional to the global
1131 // but rather distribute DET across streams proportionally based on pixel rate, and only distribute based on
1132 // bandwidth between the planes on the same stream. This ensures that large scale re-distribution only on a
1133 // stream count and/or pixel rate change, which is must less likely then general bandwidth changes per plane.
1134
1135 // Calculate total pixel rate
1136 for (unsigned int k = 0; k < display_cfg->num_streams; ++k) {
1137 l->TotalPixelRate += display_cfg->stream_descriptors[k].timing.pixel_clock_khz;
1138 }
1139
1140 // Calculate per stream DET budget
1141 for (unsigned int k = 0; k < display_cfg->num_streams; ++k) {
1142 l->DETBudgetPerStream[k] = (unsigned int)((double) display_cfg->stream_descriptors[k].timing.pixel_clock_khz * MaxTotalDETInKByte / l->TotalPixelRate);
1143 l->RemainingDETBudgetPerStream[k] = l->DETBudgetPerStream[k];
1144 }
1145
1146 // Calculate the per stream total bandwidth
1147 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
1148 if (!dml_is_phantom_pipe(&display_cfg->plane_descriptors[k])) {
1149 l->TotalBandwidthPerStream[display_cfg->plane_descriptors[k].stream_index] += (unsigned int)(ReadBandwidthLuma[k] + ReadBandwidthChroma[k]);
1150
1151 // Check the minimum can be satisfied by budget
1152 if (l->RemainingDETBudgetPerStream[display_cfg->plane_descriptors[k].stream_index] >= DETBufferSizeInKByte[k] * (ForceSingleDPP ? 1 : DPPPerSurface[k])) {
1153 l->RemainingDETBudgetPerStream[display_cfg->plane_descriptors[k].stream_index] -= DETBufferSizeInKByte[k] * (ForceSingleDPP ? 1 : DPPPerSurface[k]);
1154 } else {
1155 MinimizeReallocationSuccess = false;
1156 break;
1157 }
1158 }
1159 }
1160
1161 if (MinimizeReallocationSuccess) {
1162 // Since a fixed budget per stream is sufficient to satisfy the minimums, just re-distribute each streams
1163 // budget proportionally across its planes
1164 l->ResidualDETAfterRounding = MaxTotalDETInKByte;
1165
1166 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
1167 if (!dml_is_phantom_pipe(&display_cfg->plane_descriptors[k])) {
1168 l->IdealDETBudget = (unsigned int)(((ReadBandwidthLuma[k] + ReadBandwidthChroma[k]) / l->TotalBandwidthPerStream[display_cfg->plane_descriptors[k].stream_index])
1169 * l->DETBudgetPerStream[display_cfg->plane_descriptors[k].stream_index]);
1170
1171 if (l->IdealDETBudget > DETBufferSizeInKByte[k]) {
1172 l->DeltaDETBudget = l->IdealDETBudget - DETBufferSizeInKByte[k];
1173 if (l->DeltaDETBudget > l->RemainingDETBudgetPerStream[display_cfg->plane_descriptors[k].stream_index])
1174 l->DeltaDETBudget = l->RemainingDETBudgetPerStream[display_cfg->plane_descriptors[k].stream_index];
1175
1176 /* split the additional budgeted DET among the pipes per plane */
1177 DETBufferSizeInKByte[k] += (unsigned int)((double)l->DeltaDETBudget / (ForceSingleDPP ? 1 : DPPPerSurface[k]));
1178 l->RemainingDETBudgetPerStream[display_cfg->plane_descriptors[k].stream_index] -= l->DeltaDETBudget;
1179 }
1180
1181 // Round down to segment size
1182 DETBufferSizeInKByte[k] = (DETBufferSizeInKByte[k] / ConfigReturnBufferSegmentSizeInkByte) * ConfigReturnBufferSegmentSizeInkByte;
1183
1184 l->ResidualDETAfterRounding -= DETBufferSizeInKByte[k] * (ForceSingleDPP ? 1 : DPPPerSurface[k]);
1185 }
1186 }
1187 }
1188 }
1189
1190 if (!MinimizeReallocationSuccess) {
1191 l->TotalBandwidth = 0;
1192 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
1193 if (!dml_is_phantom_pipe(&display_cfg->plane_descriptors[k])) {
1194 l->TotalBandwidth = l->TotalBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k];
1195 }
1196 }
1197 #ifdef __DML_VBA_DEBUG__
1198 dml2_printf("DML::%s: --- Before bandwidth adjustment ---\n", __func__);
1199 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
1200 dml2_printf("DML::%s: k=%u DETBufferSizeInKByte = %u\n", __func__, k, DETBufferSizeInKByte[k]);
1201 }
1202 dml2_printf("DML::%s: --- DET allocation with bandwidth ---\n", __func__);
1203 #endif
1204 dml2_printf("DML::%s: TotalBandwidth = %f\n", __func__, l->TotalBandwidth);
1205 l->BandwidthOfSurfacesNotAssignedDETPiece = l->TotalBandwidth;
1206 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
1207
1208 if (dml_is_phantom_pipe(&display_cfg->plane_descriptors[k])) {
1209 DETPieceAssignedToThisSurfaceAlready[k] = true;
1210 } else if (display_cfg->plane_descriptors[k].overrides.det_size_override_kb > 0 || (((double)(ForceSingleDPP ? 1 : DPPPerSurface[k]) * (double)DETBufferSizeInKByte[k] / (double)MaxTotalDETInKByte) >= ((ReadBandwidthLuma[k] + ReadBandwidthChroma[k]) / l->TotalBandwidth))) {
1211 DETPieceAssignedToThisSurfaceAlready[k] = true;
1212 l->BandwidthOfSurfacesNotAssignedDETPiece = l->BandwidthOfSurfacesNotAssignedDETPiece - ReadBandwidthLuma[k] - ReadBandwidthChroma[k];
1213 } else {
1214 DETPieceAssignedToThisSurfaceAlready[k] = false;
1215 }
1216 #ifdef __DML_VBA_DEBUG__
1217 dml2_printf("DML::%s: k=%u DETPieceAssignedToThisSurfaceAlready = %u\n", __func__, k, DETPieceAssignedToThisSurfaceAlready[k]);
1218 dml2_printf("DML::%s: k=%u BandwidthOfSurfacesNotAssignedDETPiece = %f\n", __func__, k, l->BandwidthOfSurfacesNotAssignedDETPiece);
1219 #endif
1220 }
1221
1222 for (unsigned int j = 0; j < NumberOfActiveSurfaces; ++j) {
1223 NextPotentialSurfaceToAssignDETPieceFound = false;
1224 l->NextSurfaceToAssignDETPiece = 0;
1225
1226 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
1227 #ifdef __DML_VBA_DEBUG__
1228 dml2_printf("DML::%s: j=%u k=%u, ReadBandwidthLuma[k] = %f\n", __func__, j, k, ReadBandwidthLuma[k]);
1229 dml2_printf("DML::%s: j=%u k=%u, ReadBandwidthChroma[k] = %f\n", __func__, j, k, ReadBandwidthChroma[k]);
1230 dml2_printf("DML::%s: j=%u k=%u, ReadBandwidthLuma[Next] = %f\n", __func__, j, k, ReadBandwidthLuma[l->NextSurfaceToAssignDETPiece]);
1231 dml2_printf("DML::%s: j=%u k=%u, ReadBandwidthChroma[Next] = %f\n", __func__, j, k, ReadBandwidthChroma[l->NextSurfaceToAssignDETPiece]);
1232 dml2_printf("DML::%s: j=%u k=%u, NextSurfaceToAssignDETPiece = %u\n", __func__, j, k, l->NextSurfaceToAssignDETPiece);
1233 #endif
1234 if (!DETPieceAssignedToThisSurfaceAlready[k] && (!NextPotentialSurfaceToAssignDETPieceFound ||
1235 ReadBandwidthLuma[k] + ReadBandwidthChroma[k] < ReadBandwidthLuma[l->NextSurfaceToAssignDETPiece] + ReadBandwidthChroma[l->NextSurfaceToAssignDETPiece])) {
1236 l->NextSurfaceToAssignDETPiece = k;
1237 NextPotentialSurfaceToAssignDETPieceFound = true;
1238 }
1239 #ifdef __DML_VBA_DEBUG__
1240 dml2_printf("DML::%s: j=%u k=%u, DETPieceAssignedToThisSurfaceAlready = %u\n", __func__, j, k, DETPieceAssignedToThisSurfaceAlready[k]);
1241 dml2_printf("DML::%s: j=%u k=%u, NextPotentialSurfaceToAssignDETPieceFound = %u\n", __func__, j, k, NextPotentialSurfaceToAssignDETPieceFound);
1242 #endif
1243 }
1244
1245 if (NextPotentialSurfaceToAssignDETPieceFound) {
1246 l->NextDETBufferPieceInKByte = (unsigned int)(math_min2(
1247 math_round((double)l->DETBufferSizePoolInKByte * (ReadBandwidthLuma[l->NextSurfaceToAssignDETPiece] + ReadBandwidthChroma[l->NextSurfaceToAssignDETPiece]) / l->BandwidthOfSurfacesNotAssignedDETPiece /
1248 ((ForceSingleDPP ? 1 : DPPPerSurface[l->NextSurfaceToAssignDETPiece]) * ConfigReturnBufferSegmentSizeInkByte))
1249 * (ForceSingleDPP ? 1 : DPPPerSurface[l->NextSurfaceToAssignDETPiece]) * ConfigReturnBufferSegmentSizeInkByte,
1250 math_floor2((double)l->DETBufferSizePoolInKByte, (ForceSingleDPP ? 1 : DPPPerSurface[l->NextSurfaceToAssignDETPiece]) * ConfigReturnBufferSegmentSizeInkByte)));
1251
1252 #ifdef __DML_VBA_DEBUG__
1253 dml2_printf("DML::%s: j=%u, DETBufferSizePoolInKByte = %u\n", __func__, j, l->DETBufferSizePoolInKByte);
1254 dml2_printf("DML::%s: j=%u, NextSurfaceToAssignDETPiece = %u\n", __func__, j, l->NextSurfaceToAssignDETPiece);
1255 dml2_printf("DML::%s: j=%u, ReadBandwidthLuma[%u] = %f\n", __func__, j, l->NextSurfaceToAssignDETPiece, ReadBandwidthLuma[l->NextSurfaceToAssignDETPiece]);
1256 dml2_printf("DML::%s: j=%u, ReadBandwidthChroma[%u] = %f\n", __func__, j, l->NextSurfaceToAssignDETPiece, ReadBandwidthChroma[l->NextSurfaceToAssignDETPiece]);
1257 dml2_printf("DML::%s: j=%u, BandwidthOfSurfacesNotAssignedDETPiece = %f\n", __func__, j, l->BandwidthOfSurfacesNotAssignedDETPiece);
1258 dml2_printf("DML::%s: j=%u, NextDETBufferPieceInKByte = %u\n", __func__, j, l->NextDETBufferPieceInKByte);
1259 dml2_printf("DML::%s: j=%u, DETBufferSizeInKByte[%u] increases from %u ", __func__, j, l->NextSurfaceToAssignDETPiece, DETBufferSizeInKByte[l->NextSurfaceToAssignDETPiece]);
1260 #endif
1261
1262 DETBufferSizeInKByte[l->NextSurfaceToAssignDETPiece] = DETBufferSizeInKByte[l->NextSurfaceToAssignDETPiece] + l->NextDETBufferPieceInKByte / (ForceSingleDPP ? 1 : DPPPerSurface[l->NextSurfaceToAssignDETPiece]);
1263 #ifdef __DML_VBA_DEBUG__
1264 dml2_printf("to %u\n", DETBufferSizeInKByte[l->NextSurfaceToAssignDETPiece]);
1265 #endif
1266
1267 l->DETBufferSizePoolInKByte = l->DETBufferSizePoolInKByte - l->NextDETBufferPieceInKByte;
1268 DETPieceAssignedToThisSurfaceAlready[l->NextSurfaceToAssignDETPiece] = true;
1269 l->BandwidthOfSurfacesNotAssignedDETPiece = l->BandwidthOfSurfacesNotAssignedDETPiece - (ReadBandwidthLuma[l->NextSurfaceToAssignDETPiece] + ReadBandwidthChroma[l->NextSurfaceToAssignDETPiece]);
1270 }
1271 }
1272 }
1273 *CompressedBufferSizeInkByte = MinCompressedBufferSizeInKByte;
1274 }
1275 *CompressedBufferSizeInkByte = *CompressedBufferSizeInkByte * CompressedBufferSegmentSizeInkByte / ConfigReturnBufferSegmentSizeInkByte;
1276
1277 #ifdef __DML_VBA_DEBUG__
1278 dml2_printf("DML::%s: --- After bandwidth adjustment ---\n", __func__);
1279 dml2_printf("DML::%s: CompressedBufferSizeInkByte = %u\n", __func__, *CompressedBufferSizeInkByte);
1280 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
1281 dml2_printf("DML::%s: k=%u DETBufferSizeInKByte = %u (TotalReadBandWidth=%f)\n", __func__, k, DETBufferSizeInKByte[k], ReadBandwidthLuma[k] + ReadBandwidthChroma[k]);
1282 }
1283 #endif
1284 }
1285
CalculateRequiredDispclk(enum dml2_odm_mode ODMMode,double PixelClock)1286 static double CalculateRequiredDispclk(
1287 enum dml2_odm_mode ODMMode,
1288 double PixelClock)
1289 {
1290
1291 if (ODMMode == dml2_odm_mode_combine_4to1) {
1292 return PixelClock / 4.0;
1293 } else if (ODMMode == dml2_odm_mode_combine_3to1) {
1294 return PixelClock / 3.0;
1295 } else if (ODMMode == dml2_odm_mode_combine_2to1) {
1296 return PixelClock / 2.0;
1297 } else {
1298 return PixelClock;
1299 }
1300 }
1301
TruncToValidBPP(struct dml2_core_shared_TruncToValidBPP_locals * l,double LinkBitRate,unsigned int Lanes,unsigned int HTotal,unsigned int HActive,double PixelClock,double DesiredBPP,bool DSCEnable,enum dml2_output_encoder_class Output,enum dml2_output_format_class Format,unsigned int DSCInputBitPerComponent,unsigned int DSCSlices,unsigned int AudioRate,unsigned int AudioLayout,enum dml2_odm_mode ODMModeNoDSC,enum dml2_odm_mode ODMModeDSC,unsigned int * RequiredSlots)1302 static double TruncToValidBPP(
1303 struct dml2_core_shared_TruncToValidBPP_locals *l,
1304 double LinkBitRate,
1305 unsigned int Lanes,
1306 unsigned int HTotal,
1307 unsigned int HActive,
1308 double PixelClock,
1309 double DesiredBPP,
1310 bool DSCEnable,
1311 enum dml2_output_encoder_class Output,
1312 enum dml2_output_format_class Format,
1313 unsigned int DSCInputBitPerComponent,
1314 unsigned int DSCSlices,
1315 unsigned int AudioRate,
1316 unsigned int AudioLayout,
1317 enum dml2_odm_mode ODMModeNoDSC,
1318 enum dml2_odm_mode ODMModeDSC,
1319
1320 // Output
1321 unsigned int *RequiredSlots)
1322 {
1323 double MaxLinkBPP;
1324 unsigned int MinDSCBPP;
1325 double MaxDSCBPP;
1326 unsigned int NonDSCBPP0;
1327 unsigned int NonDSCBPP1;
1328 unsigned int NonDSCBPP2;
1329 enum dml2_odm_mode ODMMode;
1330
1331 if (Format == dml2_420) {
1332 NonDSCBPP0 = 12;
1333 NonDSCBPP1 = 15;
1334 NonDSCBPP2 = 18;
1335 MinDSCBPP = 6;
1336 MaxDSCBPP = 16;
1337 } else if (Format == dml2_444) {
1338 NonDSCBPP0 = 24;
1339 NonDSCBPP1 = 30;
1340 NonDSCBPP2 = 36;
1341 MinDSCBPP = 8;
1342 MaxDSCBPP = 16;
1343 } else {
1344 if (Output == dml2_hdmi || Output == dml2_hdmifrl) {
1345 NonDSCBPP0 = 24;
1346 NonDSCBPP1 = 24;
1347 NonDSCBPP2 = 24;
1348 } else {
1349 NonDSCBPP0 = 16;
1350 NonDSCBPP1 = 20;
1351 NonDSCBPP2 = 24;
1352 }
1353 if (Format == dml2_n422 || Output == dml2_hdmifrl) {
1354 MinDSCBPP = 7;
1355 MaxDSCBPP = 16;
1356 } else {
1357 MinDSCBPP = 8;
1358 MaxDSCBPP = 16;
1359 }
1360 }
1361 if (Output == dml2_dp2p0) {
1362 MaxLinkBPP = LinkBitRate * Lanes / PixelClock * 128.0 / 132.0 * 383.0 / 384.0 * 65536.0 / 65540.0;
1363 } else if (DSCEnable && Output == dml2_dp) {
1364 MaxLinkBPP = LinkBitRate / 10.0 * 8.0 * Lanes / PixelClock * (1 - 2.4 / 100);
1365 } else {
1366 MaxLinkBPP = LinkBitRate / 10.0 * 8.0 * Lanes / PixelClock;
1367 }
1368
1369 ODMMode = DSCEnable ? ODMModeDSC : ODMModeNoDSC;
1370
1371 if (ODMMode == dml2_odm_mode_split_1to2) {
1372 MaxLinkBPP = 2 * MaxLinkBPP;
1373 }
1374
1375 if (DesiredBPP == 0) {
1376 if (DSCEnable) {
1377 if (MaxLinkBPP < MinDSCBPP) {
1378 return __DML2_CALCS_DPP_INVALID__;
1379 } else if (MaxLinkBPP >= MaxDSCBPP) {
1380 return MaxDSCBPP;
1381 } else {
1382 return math_floor2(16.0 * MaxLinkBPP, 1.0) / 16.0;
1383 }
1384 } else {
1385 if (MaxLinkBPP >= NonDSCBPP2) {
1386 return NonDSCBPP2;
1387 } else if (MaxLinkBPP >= NonDSCBPP1) {
1388 return NonDSCBPP1;
1389 } else if (MaxLinkBPP >= NonDSCBPP0) {
1390 return NonDSCBPP0;
1391 } else {
1392 return __DML2_CALCS_DPP_INVALID__;
1393 }
1394 }
1395 } else {
1396 if (!((DSCEnable == false && (DesiredBPP == NonDSCBPP2 || DesiredBPP == NonDSCBPP1 || DesiredBPP == NonDSCBPP0)) ||
1397 (DSCEnable && DesiredBPP >= MinDSCBPP && DesiredBPP <= MaxDSCBPP))) {
1398 return __DML2_CALCS_DPP_INVALID__;
1399 } else {
1400 return DesiredBPP;
1401 }
1402 }
1403 }
1404
1405 // updated for dcn4
dscceComputeDelay(unsigned int bpc,double BPP,unsigned int sliceWidth,unsigned int numSlices,enum dml2_output_format_class pixelFormat,enum dml2_output_encoder_class Output)1406 static unsigned int dscceComputeDelay(
1407 unsigned int bpc,
1408 double BPP,
1409 unsigned int sliceWidth,
1410 unsigned int numSlices,
1411 enum dml2_output_format_class pixelFormat,
1412 enum dml2_output_encoder_class Output)
1413 {
1414 // valid bpc = source bits per component in the set of {8, 10, 12}
1415 // valid bpp = increments of 1/16 of a bit
1416 // min = 6/7/8 in N420/N422/444, respectively
1417 // max = such that compression is 1:1
1418 //valid sliceWidth = number of pixels per slice line, must be less than or equal to 5184/numSlices (or 4096/numSlices in 420 mode)
1419 //valid numSlices = number of slices in the horiziontal direction per DSC engine in the set of {1, 2, 3, 4}
1420 //valid pixelFormat = pixel/color format in the set of {:N444_RGB, :S422, :N422, :N420}
1421
1422 // fixed value
1423 unsigned int rcModelSize = 8192;
1424
1425 // N422/N420 operate at 2 pixels per clock
1426 unsigned int pixelsPerClock, padding_pixels, ssm_group_priming_delay, ssm_pipeline_delay, obsm_pipeline_delay, slice_padded_pixels, ixd_plus_padding, ixd_plus_padding_groups, cycles_per_group, group_delay, pipeline_delay, pixels, additional_group_delay, lines_to_reach_ixd, groups_to_reach_ixd, slice_width_groups, initial_xmit_delay, number_of_lines_to_reach_ixd, slice_width_modified;
1427
1428 if (pixelFormat == dml2_420)
1429 pixelsPerClock = 2;
1430 // #all other modes operate at 1 pixel per clock
1431 else if (pixelFormat == dml2_444)
1432 pixelsPerClock = 1;
1433 else if (pixelFormat == dml2_n422 || Output == dml2_hdmifrl)
1434 pixelsPerClock = 2;
1435 else
1436 pixelsPerClock = 1;
1437
1438 //initial transmit delay as per PPS
1439 initial_xmit_delay = (unsigned int)(math_round(rcModelSize / 2.0 / BPP / pixelsPerClock));
1440
1441 //slice width as seen by dscc_bcl in pixels or pixels pairs (depending on number of pixels per pixel container based on pixel format)
1442 slice_width_modified = (pixelFormat == dml2_444 || pixelFormat == dml2_420 || Output == dml2_hdmifrl) ? sliceWidth / 2 : sliceWidth;
1443
1444 padding_pixels = ((slice_width_modified % 3) != 0) ? (3 - (slice_width_modified % 3)) * (initial_xmit_delay / slice_width_modified) : 0;
1445
1446 if ((3.0 * pixelsPerClock * BPP) >= ((double)((initial_xmit_delay + 2) / 3) * (double)(3 + (pixelFormat == dml2_n422)))) {
1447 if ((initial_xmit_delay + padding_pixels) % 3 == 1) {
1448 initial_xmit_delay++;
1449 }
1450 }
1451
1452 //sub-stream multiplexer balance fifo priming delay in groups as per dsc standard
1453 if (bpc == 8)
1454 ssm_group_priming_delay = 83;
1455 else if (bpc == 10)
1456 ssm_group_priming_delay = 91;
1457 else if (bpc == 12)
1458 ssm_group_priming_delay = 115;
1459 else if (bpc == 14)
1460 ssm_group_priming_delay = 123;
1461 else
1462 ssm_group_priming_delay = 128;
1463
1464 //slice width in groups is rounded up to the nearest group as DSC adds padded pixels such that there are an integer number of groups per slice
1465 slice_width_groups = (slice_width_modified + 2) / 3;
1466
1467 //determine number of padded pixels in the last group of a slice line, computed as
1468 slice_padded_pixels = 3 * slice_width_groups - slice_width_modified;
1469
1470 //determine integer number of complete slice lines required to reach initial transmit delay without ssm delay considered
1471 number_of_lines_to_reach_ixd = initial_xmit_delay / slice_width_modified;
1472
1473 //increase initial transmit delay by the number of padded pixels added to a slice line multipled by the integer number of complete lines to reach initial transmit delay
1474 //this step is necessary as each padded pixel added takes up a clock cycle and, therefore, adds to the overall delay
1475 ixd_plus_padding = initial_xmit_delay + slice_padded_pixels * number_of_lines_to_reach_ixd;
1476
1477 //convert the padded initial transmit delay from pixels to groups by rounding up to the nearest group as DSC processes in groups of pixels
1478 ixd_plus_padding_groups = (ixd_plus_padding + 2) / 3;
1479
1480 //number of groups required for a slice to reach initial transmit delay is the sum of the padded initial transmit delay plus the ssm group priming delay
1481 groups_to_reach_ixd = ixd_plus_padding_groups + ssm_group_priming_delay;
1482
1483 //number of lines required to reach padded initial transmit delay in groups in slices to the left of the last horizontal slice
1484 //needs to be rounded up as a complete slice lines are buffered prior to initial transmit delay being reached in the last horizontal slice
1485 lines_to_reach_ixd = (groups_to_reach_ixd + slice_width_groups - 1) / slice_width_groups; //round up lines to reach ixd to next
1486
1487 //determine if there are non-zero number of pixels reached in the group where initial transmit delay is reached
1488 //an additional group time (i.e., 3 pixel times) is required before the first output if there are no additional pixels beyond initial transmit delay
1489 additional_group_delay = ((initial_xmit_delay - number_of_lines_to_reach_ixd * slice_width_modified) % 3) == 0 ? 1 : 0;
1490
1491 //number of pipeline delay cycles in the ssm block (can be determined empirically or analytically by inspecting the ssm block)
1492 ssm_pipeline_delay = 2;
1493
1494 //number of pipe delay cycles in the obsm block (can be determined empirically or analytically by inspecting the obsm block)
1495 obsm_pipeline_delay = 1;
1496
1497 //a group of pixels is worth 6 pixels in N422/N420 mode or 3 pixels in all other modes
1498 if (pixelFormat == dml2_420 || pixelFormat == dml2_444 || pixelFormat == dml2_n422 || Output == dml2_hdmifrl)
1499 cycles_per_group = 6;
1500 else
1501 cycles_per_group = 3;
1502 //delay of the bit stream contruction layer in pixels is the sum of:
1503 //1. number of pixel containers in a slice line multipled by the number of lines required to reach initial transmit delay multipled by number of slices to the left of the last horizontal slice
1504 //2. number of pixel containers required to reach initial transmit delay (specifically, in the last horizontal slice)
1505 //3. additional group of delay if initial transmit delay is reached exactly in a group
1506 //4. ssm and obsm pipeline delay (i.e., clock cycles of delay)
1507 group_delay = (lines_to_reach_ixd * slice_width_groups * (numSlices - 1)) + groups_to_reach_ixd + additional_group_delay;
1508 pipeline_delay = ssm_pipeline_delay + obsm_pipeline_delay;
1509
1510 //pixel delay is group_delay (converted to pixels) + pipeline, however, first group is a special case since it is processed as soon as it arrives (i.e., in 3 cycles regardless of pixel format)
1511 pixels = (group_delay - 1) * cycles_per_group + 3 + pipeline_delay;
1512
1513 #ifdef __DML_VBA_DEBUG__
1514 dml2_printf("DML::%s: bpc: %u\n", __func__, bpc);
1515 dml2_printf("DML::%s: BPP: %f\n", __func__, BPP);
1516 dml2_printf("DML::%s: sliceWidth: %u\n", __func__, sliceWidth);
1517 dml2_printf("DML::%s: numSlices: %u\n", __func__, numSlices);
1518 dml2_printf("DML::%s: pixelFormat: %u\n", __func__, pixelFormat);
1519 dml2_printf("DML::%s: Output: %u\n", __func__, Output);
1520 dml2_printf("DML::%s: pixels: %u\n", __func__, pixels);
1521 #endif
1522 return pixels;
1523 }
1524
1525 //updated in dcn4
dscComputeDelay(enum dml2_output_format_class pixelFormat,enum dml2_output_encoder_class Output)1526 static unsigned int dscComputeDelay(enum dml2_output_format_class pixelFormat, enum dml2_output_encoder_class Output)
1527 {
1528 unsigned int Delay = 0;
1529 unsigned int dispclk_per_dscclk = 3;
1530
1531 // sfr
1532 Delay = Delay + 2;
1533
1534 if (pixelFormat == dml2_420 || pixelFormat == dml2_n422 || (Output == dml2_hdmifrl && pixelFormat != dml2_444)) {
1535 dispclk_per_dscclk = 3 * 2;
1536 }
1537
1538 if (pixelFormat == dml2_420) {
1539 //dscc top delay for pixel compression layer
1540 Delay = Delay + 16 * dispclk_per_dscclk;
1541
1542 // dscc - input deserializer
1543 Delay = Delay + 5;
1544
1545 // dscc - input cdc fifo
1546 Delay = Delay + 1 + 4 * dispclk_per_dscclk;
1547
1548 // dscc - output cdc fifo
1549 Delay = Delay + 3 + 1 * dispclk_per_dscclk;
1550
1551 // dscc - cdc uncertainty
1552 Delay = Delay + 3 + 3 * dispclk_per_dscclk;
1553 } else if (pixelFormat == dml2_n422 || (Output == dml2_hdmifrl && pixelFormat != dml2_444)) {
1554 //dscc top delay for pixel compression layer
1555 Delay = Delay + 16 * dispclk_per_dscclk;
1556 // dsccif
1557 Delay = Delay + 1;
1558 // dscc - input deserializer
1559 Delay = Delay + 5;
1560 // dscc - input cdc fifo
1561 Delay = Delay + 1 + 4 * dispclk_per_dscclk;
1562
1563
1564 // dscc - output cdc fifo
1565 Delay = Delay + 3 + 1 * dispclk_per_dscclk;
1566 // dscc - cdc uncertainty
1567 Delay = Delay + 3 + 3 * dispclk_per_dscclk;
1568 } else if (pixelFormat == dml2_s422) {
1569 //dscc top delay for pixel compression layer
1570 Delay = Delay + 17 * dispclk_per_dscclk;
1571
1572 // dscc - input deserializer
1573 Delay = Delay + 3;
1574 // dscc - input cdc fifo
1575 Delay = Delay + 1 + 4 * dispclk_per_dscclk;
1576 // dscc - output cdc fifo
1577 Delay = Delay + 3 + 1 * dispclk_per_dscclk;
1578 // dscc - cdc uncertainty
1579 Delay = Delay + 3 + 3 * dispclk_per_dscclk;
1580 } else {
1581 //dscc top delay for pixel compression layer
1582 Delay = Delay + 16 * dispclk_per_dscclk;
1583 // dscc - input deserializer
1584 Delay = Delay + 3;
1585 // dscc - input cdc fifo
1586 Delay = Delay + 1 + 4 * dispclk_per_dscclk;
1587 // dscc - output cdc fifo
1588 Delay = Delay + 3 + 1 * dispclk_per_dscclk;
1589
1590 // dscc - cdc uncertainty
1591 Delay = Delay + 3 + 3 * dispclk_per_dscclk;
1592 }
1593
1594 // sft
1595 Delay = Delay + 1;
1596 #ifdef __DML_VBA_DEBUG__
1597 dml2_printf("DML::%s: pixelFormat = %u\n", __func__, pixelFormat);
1598 dml2_printf("DML::%s: Delay = %u\n", __func__, Delay);
1599 #endif
1600
1601 return Delay;
1602 }
1603
CalculateHostVMDynamicLevels(bool GPUVMEnable,bool HostVMEnable,unsigned int HostVMMinPageSize,unsigned int HostVMMaxNonCachedPageTableLevels)1604 static unsigned int CalculateHostVMDynamicLevels(
1605 bool GPUVMEnable,
1606 bool HostVMEnable,
1607 unsigned int HostVMMinPageSize,
1608 unsigned int HostVMMaxNonCachedPageTableLevels)
1609 {
1610 unsigned int HostVMDynamicLevels = 0;
1611
1612 if (GPUVMEnable && HostVMEnable) {
1613 if (HostVMMinPageSize < 2048)
1614 HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
1615 else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576)
1616 HostVMDynamicLevels = (unsigned int)math_max2(0, (double)HostVMMaxNonCachedPageTableLevels - 1);
1617 else
1618 HostVMDynamicLevels = (unsigned int)math_max2(0, (double)HostVMMaxNonCachedPageTableLevels - 2);
1619 } else {
1620 HostVMDynamicLevels = 0;
1621 }
1622 return HostVMDynamicLevels;
1623 }
1624
CalculateVMAndRowBytes(struct dml2_core_shared_calculate_vm_and_row_bytes_params * p)1625 static unsigned int CalculateVMAndRowBytes(struct dml2_core_shared_calculate_vm_and_row_bytes_params *p)
1626 {
1627 unsigned int extra_dpde_bytes;
1628 unsigned int extra_mpde_bytes;
1629 unsigned int MacroTileSizeBytes;
1630 unsigned int vp_height_dpte_ub;
1631
1632 unsigned int meta_surface_bytes;
1633 unsigned int vm_bytes;
1634 unsigned int vp_height_meta_ub;
1635 unsigned int PixelPTEReqWidth_linear = 0; // VBA_DELTA. VBA doesn't calculate this
1636
1637 *p->MetaRequestHeight = 8 * p->BlockHeight256Bytes;
1638 *p->MetaRequestWidth = 8 * p->BlockWidth256Bytes;
1639 if (p->SurfaceTiling == dml2_sw_linear) {
1640 *p->meta_row_height = 32;
1641 *p->meta_row_width = (unsigned int)(math_floor2(p->ViewportXStart + p->SwathWidth + *p->MetaRequestWidth - 1, *p->MetaRequestWidth) - math_floor2(p->ViewportXStart, *p->MetaRequestWidth));
1642 *p->meta_row_bytes = (unsigned int)(*p->meta_row_width * *p->MetaRequestHeight * p->BytePerPixel / 256.0); // FIXME_DCN4SW missing in old code but no dcc for linear anyways?
1643 } else if (!dml_is_vertical_rotation(p->RotationAngle)) {
1644 *p->meta_row_height = *p->MetaRequestHeight;
1645 if (p->ViewportStationary && p->NumberOfDPPs == 1) {
1646 *p->meta_row_width = (unsigned int)(math_floor2(p->ViewportXStart + p->SwathWidth + *p->MetaRequestWidth - 1, *p->MetaRequestWidth) - math_floor2(p->ViewportXStart, *p->MetaRequestWidth));
1647 } else {
1648 *p->meta_row_width = (unsigned int)(math_ceil2(p->SwathWidth - 1, *p->MetaRequestWidth) + *p->MetaRequestWidth);
1649 }
1650 *p->meta_row_bytes = (unsigned int)(*p->meta_row_width * *p->MetaRequestHeight * p->BytePerPixel / 256.0);
1651 } else {
1652 *p->meta_row_height = *p->MetaRequestWidth;
1653 if (p->ViewportStationary && p->NumberOfDPPs == 1) {
1654 *p->meta_row_width = (unsigned int)(math_floor2(p->ViewportYStart + p->ViewportHeight + *p->MetaRequestHeight - 1, *p->MetaRequestHeight) - math_floor2(p->ViewportYStart, *p->MetaRequestHeight));
1655 } else {
1656 *p->meta_row_width = (unsigned int)(math_ceil2(p->SwathWidth - 1, *p->MetaRequestHeight) + *p->MetaRequestHeight);
1657 }
1658 *p->meta_row_bytes = (unsigned int)(*p->meta_row_width * *p->MetaRequestWidth * p->BytePerPixel / 256.0);
1659 }
1660
1661 if (p->ViewportStationary && p->is_phantom && (p->NumberOfDPPs == 1 || !dml_is_vertical_rotation(p->RotationAngle))) {
1662 vp_height_meta_ub = (unsigned int)(math_floor2(p->ViewportYStart + p->ViewportHeight + 64 * p->BlockHeight256Bytes - 1, 64 * p->BlockHeight256Bytes) - math_floor2(p->ViewportYStart, 64 * p->BlockHeight256Bytes));
1663 } else if (!dml_is_vertical_rotation(p->RotationAngle)) {
1664 vp_height_meta_ub = (unsigned int)(math_ceil2(p->ViewportHeight - 1, 64 * p->BlockHeight256Bytes) + 64 * p->BlockHeight256Bytes);
1665 } else {
1666 vp_height_meta_ub = (unsigned int)(math_ceil2(p->SwathWidth - 1, 64 * p->BlockHeight256Bytes) + 64 * p->BlockHeight256Bytes);
1667 }
1668
1669 meta_surface_bytes = (unsigned int)(p->DCCMetaPitch * vp_height_meta_ub * p->BytePerPixel / 256.0);
1670 #ifdef __DML_VBA_DEBUG__
1671 dml2_printf("DML::%s: DCCMetaPitch = %u\n", __func__, p->DCCMetaPitch);
1672 dml2_printf("DML::%s: meta_surface_bytes = %u\n", __func__, meta_surface_bytes);
1673 #endif
1674 if (p->GPUVMEnable == true) {
1675 double meta_vmpg_bytes = 4.0 * 1024.0;
1676 *p->meta_pte_bytes_per_frame_ub = (unsigned int)((math_ceil2((double) (meta_surface_bytes - meta_vmpg_bytes) / (8 * meta_vmpg_bytes), 1) + 1) * 64);
1677 extra_mpde_bytes = 128 * (p->GPUVMMaxPageTableLevels - 1);
1678 } else {
1679 *p->meta_pte_bytes_per_frame_ub = 0;
1680 extra_mpde_bytes = 0;
1681 }
1682
1683 if (!p->DCCEnable || !p->mrq_present) {
1684 *p->meta_pte_bytes_per_frame_ub = 0;
1685 extra_mpde_bytes = 0;
1686 *p->meta_row_bytes = 0;
1687 }
1688
1689 if (!p->GPUVMEnable) {
1690 *p->PixelPTEBytesPerRow = 0;
1691 *p->PixelPTEBytesPerRowStorage = 0;
1692 *p->dpte_row_width_ub = 0;
1693 *p->dpte_row_height = 0;
1694 *p->dpte_row_height_linear = 0;
1695 *p->PixelPTEBytesPerRow_one_row_per_frame = 0;
1696 *p->dpte_row_width_ub_one_row_per_frame = 0;
1697 *p->dpte_row_height_one_row_per_frame = 0;
1698 *p->vmpg_width = 0;
1699 *p->vmpg_height = 0;
1700 *p->PixelPTEReqWidth = 0;
1701 *p->PixelPTEReqHeight = 0;
1702 *p->PTERequestSize = 0;
1703 *p->dpde0_bytes_per_frame_ub = 0;
1704 return 0;
1705 }
1706
1707 MacroTileSizeBytes = p->MacroTileWidth * p->BytePerPixel * p->MacroTileHeight;
1708
1709 if (p->ViewportStationary && p->is_phantom && (p->NumberOfDPPs == 1 || !dml_is_vertical_rotation(p->RotationAngle))) {
1710 vp_height_dpte_ub = (unsigned int)(math_floor2(p->ViewportYStart + p->ViewportHeight + p->MacroTileHeight - 1, p->MacroTileHeight) - math_floor2(p->ViewportYStart, p->MacroTileHeight));
1711 } else if (!dml_is_vertical_rotation(p->RotationAngle)) {
1712 vp_height_dpte_ub = (unsigned int)(math_ceil2((double)p->ViewportHeight - 1, p->MacroTileHeight) + p->MacroTileHeight);
1713 } else {
1714 vp_height_dpte_ub = (unsigned int)(math_ceil2((double)p->SwathWidth - 1, p->MacroTileHeight) + p->MacroTileHeight);
1715 }
1716
1717 if (p->GPUVMEnable == true && p->GPUVMMaxPageTableLevels > 1) {
1718 *p->dpde0_bytes_per_frame_ub = (unsigned int)(64 * (math_ceil2((double)(p->Pitch * vp_height_dpte_ub * p->BytePerPixel - MacroTileSizeBytes) / (double)(8 * 2097152), 1) + 1));
1719 extra_dpde_bytes = 128 * (p->GPUVMMaxPageTableLevels - 2);
1720 } else {
1721 *p->dpde0_bytes_per_frame_ub = 0;
1722 extra_dpde_bytes = 0;
1723 }
1724
1725 vm_bytes = *p->meta_pte_bytes_per_frame_ub + extra_mpde_bytes + *p->dpde0_bytes_per_frame_ub + extra_dpde_bytes;
1726
1727 #ifdef __DML_VBA_DEBUG__
1728 dml2_printf("DML::%s: DCCEnable = %u\n", __func__, p->DCCEnable);
1729 dml2_printf("DML::%s: GPUVMEnable = %u\n", __func__, p->GPUVMEnable);
1730 dml2_printf("DML::%s: SwModeLinear = %u\n", __func__, p->SurfaceTiling == dml2_sw_linear);
1731 dml2_printf("DML::%s: BytePerPixel = %u\n", __func__, p->BytePerPixel);
1732 dml2_printf("DML::%s: GPUVMMaxPageTableLevels = %u\n", __func__, p->GPUVMMaxPageTableLevels);
1733 dml2_printf("DML::%s: BlockHeight256Bytes = %u\n", __func__, p->BlockHeight256Bytes);
1734 dml2_printf("DML::%s: BlockWidth256Bytes = %u\n", __func__, p->BlockWidth256Bytes);
1735 dml2_printf("DML::%s: MacroTileHeight = %u\n", __func__, p->MacroTileHeight);
1736 dml2_printf("DML::%s: MacroTileWidth = %u\n", __func__, p->MacroTileWidth);
1737 dml2_printf("DML::%s: meta_pte_bytes_per_frame_ub = %u\n", __func__, *p->meta_pte_bytes_per_frame_ub);
1738 dml2_printf("DML::%s: dpde0_bytes_per_frame_ub = %u\n", __func__, *p->dpde0_bytes_per_frame_ub);
1739 dml2_printf("DML::%s: extra_mpde_bytes = %u\n", __func__, extra_mpde_bytes);
1740 dml2_printf("DML::%s: extra_dpde_bytes = %u\n", __func__, extra_dpde_bytes);
1741 dml2_printf("DML::%s: vm_bytes = %u\n", __func__, vm_bytes);
1742 dml2_printf("DML::%s: ViewportHeight = %u\n", __func__, p->ViewportHeight);
1743 dml2_printf("DML::%s: SwathWidth = %u\n", __func__, p->SwathWidth);
1744 dml2_printf("DML::%s: vp_height_dpte_ub = %u\n", __func__, vp_height_dpte_ub);
1745 #endif
1746
1747 if (p->SurfaceTiling == dml2_sw_linear) {
1748 *p->PixelPTEReqHeight = 1;
1749 *p->PixelPTEReqWidth = p->GPUVMMinPageSizeKBytes * 1024 * 8 / p->BytePerPixel;
1750 PixelPTEReqWidth_linear = p->GPUVMMinPageSizeKBytes * 1024 * 8 / p->BytePerPixel;
1751 *p->PTERequestSize = 64;
1752
1753 *p->vmpg_height = 1;
1754 *p->vmpg_width = p->GPUVMMinPageSizeKBytes * 1024 / p->BytePerPixel;
1755 } else if (p->GPUVMMinPageSizeKBytes * 1024 >= dml_get_tile_block_size_bytes(p->SurfaceTiling)) { // 1 64B 8x1 PTE
1756 *p->PixelPTEReqHeight = p->MacroTileHeight;
1757 *p->PixelPTEReqWidth = 8 * 1024 * p->GPUVMMinPageSizeKBytes / (p->MacroTileHeight * p->BytePerPixel);
1758 *p->PTERequestSize = 64;
1759
1760 *p->vmpg_height = p->MacroTileHeight;
1761 *p->vmpg_width = 1024 * p->GPUVMMinPageSizeKBytes / (p->MacroTileHeight * p->BytePerPixel);
1762
1763 } else if (p->GPUVMMinPageSizeKBytes == 4 && dml_get_tile_block_size_bytes(p->SurfaceTiling) == 65536) { // 2 64B PTE requests to get 16 PTEs to cover the 64K tile
1764 // one 64KB tile, is 16x16x256B req
1765 *p->PixelPTEReqHeight = 16 * p->BlockHeight256Bytes;
1766 *p->PixelPTEReqWidth = 16 * p->BlockWidth256Bytes;
1767 *p->PTERequestSize = 128;
1768
1769 *p->vmpg_height = *p->PixelPTEReqHeight;
1770 *p->vmpg_width = *p->PixelPTEReqWidth;
1771 } else {
1772 // default for rest of calculation to go through, when vm is disable, the calulated pte related values shouldnt be used anyways
1773 *p->PixelPTEReqHeight = p->MacroTileHeight;
1774 *p->PixelPTEReqWidth = 8 * 1024 * p->GPUVMMinPageSizeKBytes / (p->MacroTileHeight * p->BytePerPixel);
1775 *p->PTERequestSize = 64;
1776
1777 *p->vmpg_height = p->MacroTileHeight;
1778 *p->vmpg_width = 1024 * p->GPUVMMinPageSizeKBytes / (p->MacroTileHeight * p->BytePerPixel);
1779
1780 if (p->GPUVMEnable == true) {
1781 dml2_printf("DML::%s: GPUVMMinPageSizeKBytes=%u and sw_mode=%u (tile_size=%d) not supported!\n",
1782 __func__, p->GPUVMMinPageSizeKBytes, p->SurfaceTiling, dml_get_tile_block_size_bytes(p->SurfaceTiling));
1783 DML2_ASSERT(0);
1784 }
1785 }
1786
1787 #ifdef __DML_VBA_DEBUG__
1788 dml2_printf("DML::%s: GPUVMMinPageSizeKBytes = %u\n", __func__, p->GPUVMMinPageSizeKBytes);
1789 dml2_printf("DML::%s: PixelPTEReqHeight = %u\n", __func__, *p->PixelPTEReqHeight);
1790 dml2_printf("DML::%s: PixelPTEReqWidth = %u\n", __func__, *p->PixelPTEReqWidth);
1791 dml2_printf("DML::%s: PixelPTEReqWidth_linear = %u\n", __func__, PixelPTEReqWidth_linear);
1792 dml2_printf("DML::%s: PTERequestSize = %u\n", __func__, *p->PTERequestSize);
1793 dml2_printf("DML::%s: Pitch = %u\n", __func__, p->Pitch);
1794 dml2_printf("DML::%s: vmpg_width = %u\n", __func__, *p->vmpg_width);
1795 dml2_printf("DML::%s: vmpg_height = %u\n", __func__, *p->vmpg_height);
1796 #endif
1797
1798 *p->dpte_row_height_one_row_per_frame = vp_height_dpte_ub;
1799 *p->dpte_row_width_ub_one_row_per_frame = (unsigned int)((math_ceil2(((double)p->Pitch * (double)*p->dpte_row_height_one_row_per_frame / (double)*p->PixelPTEReqHeight - 1) / (double)*p->PixelPTEReqWidth, 1) + 1) * (double)*p->PixelPTEReqWidth);
1800 *p->PixelPTEBytesPerRow_one_row_per_frame = (unsigned int)((double)*p->dpte_row_width_ub_one_row_per_frame / (double)*p->PixelPTEReqWidth * *p->PTERequestSize);
1801 *p->dpte_row_height_linear = 0;
1802
1803 if (p->SurfaceTiling == dml2_sw_linear) {
1804 *p->dpte_row_height = (unsigned int)(math_min2(128, (double)(1ULL << (unsigned int)math_floor2(math_log((float)(p->PTEBufferSizeInRequests * *p->PixelPTEReqWidth / p->Pitch), 2.0), 1))));
1805 *p->dpte_row_width_ub = (unsigned int)(math_ceil2(((double)p->Pitch * (double)*p->dpte_row_height - 1), (double)*p->PixelPTEReqWidth) + *p->PixelPTEReqWidth);
1806 *p->PixelPTEBytesPerRow = (unsigned int)((double)*p->dpte_row_width_ub / (double)*p->PixelPTEReqWidth * *p->PTERequestSize);
1807
1808 // VBA_DELTA, VBA doesn't have programming value for pte row height linear.
1809 *p->dpte_row_height_linear = (unsigned int)1 << (unsigned int)math_floor2(math_log((float)(p->PTEBufferSizeInRequests * PixelPTEReqWidth_linear / p->Pitch), 2.0), 1);
1810 if (*p->dpte_row_height_linear > 128)
1811 *p->dpte_row_height_linear = 128;
1812
1813 #ifdef __DML_VBA_DEBUG__
1814 dml2_printf("DML::%s: dpte_row_width_ub = %u (linear)\n", __func__, *p->dpte_row_width_ub);
1815 #endif
1816
1817 } else if (!dml_is_vertical_rotation(p->RotationAngle)) {
1818 *p->dpte_row_height = *p->PixelPTEReqHeight;
1819
1820 if (p->GPUVMMinPageSizeKBytes > 64) {
1821 *p->dpte_row_width_ub = (unsigned int)((math_ceil2(((double)p->Pitch * (double)*p->dpte_row_height / (double)*p->PixelPTEReqHeight - 1) / (double)*p->PixelPTEReqWidth, 1) + 1) * *p->PixelPTEReqWidth);
1822 } else if (p->ViewportStationary && (p->NumberOfDPPs == 1)) {
1823 *p->dpte_row_width_ub = (unsigned int)(math_floor2(p->ViewportXStart + p->SwathWidth + *p->PixelPTEReqWidth - 1, *p->PixelPTEReqWidth) - math_floor2(p->ViewportXStart, *p->PixelPTEReqWidth));
1824 } else {
1825 *p->dpte_row_width_ub = (unsigned int)((math_ceil2((double)(p->SwathWidth - 1) / (double)*p->PixelPTEReqWidth, 1) + 1.0) * *p->PixelPTEReqWidth);
1826 }
1827 #ifdef __DML_VBA_DEBUG__
1828 dml2_printf("DML::%s: dpte_row_width_ub = %u (tiled horz)\n", __func__, *p->dpte_row_width_ub);
1829 #endif
1830
1831 *p->PixelPTEBytesPerRow = *p->dpte_row_width_ub / *p->PixelPTEReqWidth * *p->PTERequestSize;
1832 } else {
1833 *p->dpte_row_height = (unsigned int)(math_min2(*p->PixelPTEReqWidth, p->MacroTileWidth));
1834
1835 if (p->ViewportStationary && (p->NumberOfDPPs == 1)) {
1836 *p->dpte_row_width_ub = (unsigned int)(math_floor2(p->ViewportYStart + p->ViewportHeight + *p->PixelPTEReqHeight - 1, *p->PixelPTEReqHeight) - math_floor2(p->ViewportYStart, *p->PixelPTEReqHeight));
1837 } else {
1838 *p->dpte_row_width_ub = (unsigned int)((math_ceil2((double)(p->SwathWidth - 1) / (double)*p->PixelPTEReqHeight, 1) + 1) * *p->PixelPTEReqHeight);
1839 }
1840
1841 *p->PixelPTEBytesPerRow = (unsigned int)((double)*p->dpte_row_width_ub / (double)*p->PixelPTEReqHeight * *p->PTERequestSize);
1842 #ifdef __DML_VBA_DEBUG__
1843 dml2_printf("DML::%s: dpte_row_width_ub = %u (tiled vert)\n", __func__, *p->dpte_row_width_ub);
1844 #endif
1845 }
1846
1847 if (p->GPUVMEnable != true) {
1848 *p->PixelPTEBytesPerRow = 0;
1849 *p->PixelPTEBytesPerRow_one_row_per_frame = 0;
1850 }
1851
1852 *p->PixelPTEBytesPerRowStorage = *p->PixelPTEBytesPerRow;
1853
1854 #ifdef __DML_VBA_DEBUG__
1855 dml2_printf("DML::%s: GPUVMMinPageSizeKBytes = %u\n", __func__, p->GPUVMMinPageSizeKBytes);
1856 dml2_printf("DML::%s: GPUVMEnable = %u\n", __func__, p->GPUVMEnable);
1857 dml2_printf("DML::%s: meta_row_height = %u\n", __func__, *p->meta_row_height);
1858 dml2_printf("DML::%s: dpte_row_height = %u\n", __func__, *p->dpte_row_height);
1859 dml2_printf("DML::%s: dpte_row_height_linear = %u\n", __func__, *p->dpte_row_height_linear);
1860 dml2_printf("DML::%s: dpte_row_width_ub = %u\n", __func__, *p->dpte_row_width_ub);
1861 dml2_printf("DML::%s: PixelPTEBytesPerRow = %u\n", __func__, *p->PixelPTEBytesPerRow);
1862 dml2_printf("DML::%s: PixelPTEBytesPerRowStorage = %u\n", __func__, *p->PixelPTEBytesPerRowStorage);
1863 dml2_printf("DML::%s: PTEBufferSizeInRequests = %u\n", __func__, p->PTEBufferSizeInRequests);
1864 dml2_printf("DML::%s: dpte_row_height_one_row_per_frame = %u\n", __func__, *p->dpte_row_height_one_row_per_frame);
1865 dml2_printf("DML::%s: dpte_row_width_ub_one_row_per_frame = %u\n", __func__, *p->dpte_row_width_ub_one_row_per_frame);
1866 dml2_printf("DML::%s: PixelPTEBytesPerRow_one_row_per_frame = %u\n", __func__, *p->PixelPTEBytesPerRow_one_row_per_frame);
1867 #endif
1868
1869 return vm_bytes;
1870 } // CalculateVMAndRowBytes
1871
CalculatePrefetchSourceLines(double VRatio,unsigned int VTaps,bool Interlace,bool ProgressiveToInterlaceUnitInOPP,unsigned int SwathHeight,enum dml2_rotation_angle RotationAngle,bool mirrored,bool ViewportStationary,unsigned int SwathWidth,unsigned int ViewportHeight,unsigned int ViewportXStart,unsigned int ViewportYStart,unsigned int * VInitPreFill,unsigned int * MaxNumSwath)1872 static unsigned int CalculatePrefetchSourceLines(
1873 double VRatio,
1874 unsigned int VTaps,
1875 bool Interlace,
1876 bool ProgressiveToInterlaceUnitInOPP,
1877 unsigned int SwathHeight,
1878 enum dml2_rotation_angle RotationAngle,
1879 bool mirrored,
1880 bool ViewportStationary,
1881 unsigned int SwathWidth,
1882 unsigned int ViewportHeight,
1883 unsigned int ViewportXStart,
1884 unsigned int ViewportYStart,
1885
1886 // Output
1887 unsigned int *VInitPreFill,
1888 unsigned int *MaxNumSwath)
1889 {
1890
1891 unsigned int vp_start_rot = 0;
1892 unsigned int sw0_tmp = 0;
1893 unsigned int MaxPartialSwath = 0;
1894 double numLines = 0;
1895
1896 #ifdef __DML_VBA_DEBUG__
1897 dml2_printf("DML::%s: VRatio = %f\n", __func__, VRatio);
1898 dml2_printf("DML::%s: VTaps = %u\n", __func__, VTaps);
1899 dml2_printf("DML::%s: ViewportXStart = %u\n", __func__, ViewportXStart);
1900 dml2_printf("DML::%s: ViewportYStart = %u\n", __func__, ViewportYStart);
1901 dml2_printf("DML::%s: ViewportStationary = %u\n", __func__, ViewportStationary);
1902 dml2_printf("DML::%s: SwathHeight = %u\n", __func__, SwathHeight);
1903 #endif
1904 if (ProgressiveToInterlaceUnitInOPP)
1905 *VInitPreFill = (unsigned int)(math_floor2((VRatio + (double)VTaps + 1) / 2.0, 1));
1906 else
1907 *VInitPreFill = (unsigned int)(math_floor2((VRatio + (double)VTaps + 1 + (Interlace ? 1 : 0) * 0.5 * VRatio) / 2.0, 1));
1908
1909 if (ViewportStationary) {
1910 if (RotationAngle == dml2_rotation_180) {
1911 vp_start_rot = SwathHeight - (((unsigned int)(ViewportYStart + ViewportHeight - 1) % SwathHeight) + 1);
1912 } else if ((RotationAngle == dml2_rotation_270 && !mirrored) || (RotationAngle == dml2_rotation_90 && mirrored)) {
1913 vp_start_rot = ViewportXStart;
1914 } else if ((RotationAngle == dml2_rotation_90 && !mirrored) || (RotationAngle == dml2_rotation_270 && mirrored)) {
1915 vp_start_rot = SwathHeight - (((unsigned int)(ViewportYStart + SwathWidth - 1) % SwathHeight) + 1);
1916 } else {
1917 vp_start_rot = ViewportYStart;
1918 }
1919 sw0_tmp = SwathHeight - (vp_start_rot % SwathHeight);
1920 if (sw0_tmp < *VInitPreFill) {
1921 *MaxNumSwath = (unsigned int)(math_ceil2((*VInitPreFill - sw0_tmp) / (double)SwathHeight, 1) + 1);
1922 } else {
1923 *MaxNumSwath = 1;
1924 }
1925 MaxPartialSwath = (unsigned int)(math_max2(1, (unsigned int)(vp_start_rot + *VInitPreFill - 1) % SwathHeight));
1926 } else {
1927 *MaxNumSwath = (unsigned int)(math_ceil2((*VInitPreFill - 1.0) / (double)SwathHeight, 1) + 1);
1928 if (*VInitPreFill > 1) {
1929 MaxPartialSwath = (unsigned int)(math_max2(1, (unsigned int)(*VInitPreFill - 2) % SwathHeight));
1930 } else {
1931 MaxPartialSwath = (unsigned int)(math_max2(1, (unsigned int)(*VInitPreFill + SwathHeight - 2) % SwathHeight));
1932 }
1933 }
1934 numLines = *MaxNumSwath * SwathHeight + MaxPartialSwath;
1935
1936 #ifdef __DML_VBA_DEBUG__
1937 dml2_printf("DML::%s: vp_start_rot = %u\n", __func__, vp_start_rot);
1938 dml2_printf("DML::%s: VInitPreFill = %u\n", __func__, *VInitPreFill);
1939 dml2_printf("DML::%s: MaxPartialSwath = %u\n", __func__, MaxPartialSwath);
1940 dml2_printf("DML::%s: MaxNumSwath = %u\n", __func__, *MaxNumSwath);
1941 dml2_printf("DML::%s: Prefetch source lines = %3.2f\n", __func__, numLines);
1942 #endif
1943 return (unsigned int)(numLines);
1944
1945 }
1946
CalculateRowBandwidth(bool GPUVMEnable,bool use_one_row_for_frame,enum dml2_source_format_class SourcePixelFormat,double VRatio,double VRatioChroma,bool DCCEnable,double LineTime,unsigned int PixelPTEBytesPerRowLuma,unsigned int PixelPTEBytesPerRowChroma,unsigned int dpte_row_height_luma,unsigned int dpte_row_height_chroma,bool mrq_present,unsigned int meta_row_bytes_per_row_ub_l,unsigned int meta_row_bytes_per_row_ub_c,unsigned int meta_row_height_luma,unsigned int meta_row_height_chroma,double * dpte_row_bw,double * meta_row_bw)1947 static void CalculateRowBandwidth(
1948 bool GPUVMEnable,
1949 bool use_one_row_for_frame,
1950 enum dml2_source_format_class SourcePixelFormat,
1951 double VRatio,
1952 double VRatioChroma,
1953 bool DCCEnable,
1954 double LineTime,
1955 unsigned int PixelPTEBytesPerRowLuma,
1956 unsigned int PixelPTEBytesPerRowChroma,
1957 unsigned int dpte_row_height_luma,
1958 unsigned int dpte_row_height_chroma,
1959
1960 bool mrq_present,
1961 unsigned int meta_row_bytes_per_row_ub_l,
1962 unsigned int meta_row_bytes_per_row_ub_c,
1963 unsigned int meta_row_height_luma,
1964 unsigned int meta_row_height_chroma,
1965
1966 // Output
1967 double *dpte_row_bw,
1968 double *meta_row_bw)
1969 {
1970 if (!DCCEnable || !mrq_present) {
1971 *meta_row_bw = 0;
1972 } else if (dml_is_420(SourcePixelFormat) || SourcePixelFormat == dml2_rgbe_alpha) {
1973 *meta_row_bw = VRatio * meta_row_bytes_per_row_ub_l / (meta_row_height_luma * LineTime)
1974 + VRatioChroma * meta_row_bytes_per_row_ub_c / (meta_row_height_chroma * LineTime);
1975 } else {
1976 *meta_row_bw = VRatio * meta_row_bytes_per_row_ub_l / (meta_row_height_luma * LineTime);
1977 }
1978
1979 if (GPUVMEnable != true) {
1980 *dpte_row_bw = 0;
1981 } else if (dml_is_420(SourcePixelFormat) || SourcePixelFormat == dml2_rgbe_alpha) {
1982 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime)
1983 + VRatioChroma * PixelPTEBytesPerRowChroma / (dpte_row_height_chroma * LineTime);
1984 } else {
1985 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime);
1986 }
1987 }
1988
CalculateMALLUseForStaticScreen(const struct dml2_display_cfg * display_cfg,unsigned int NumberOfActiveSurfaces,unsigned int MALLAllocatedForDCN,unsigned int SurfaceSizeInMALL[],bool one_row_per_frame_fits_in_buffer[],bool is_using_mall_for_ss[])1989 static void CalculateMALLUseForStaticScreen(
1990 const struct dml2_display_cfg *display_cfg,
1991 unsigned int NumberOfActiveSurfaces,
1992 unsigned int MALLAllocatedForDCN,
1993 unsigned int SurfaceSizeInMALL[],
1994 bool one_row_per_frame_fits_in_buffer[],
1995
1996 // Output
1997 bool is_using_mall_for_ss[])
1998 {
1999
2000 unsigned int SurfaceToAddToMALL;
2001 bool CanAddAnotherSurfaceToMALL;
2002 unsigned int TotalSurfaceSizeInMALL;
2003
2004 TotalSurfaceSizeInMALL = 0;
2005 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
2006 is_using_mall_for_ss[k] = (display_cfg->plane_descriptors[k].overrides.refresh_from_mall == dml2_refresh_from_mall_mode_override_force_enable);
2007 if (is_using_mall_for_ss[k])
2008 TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k];
2009 #ifdef __DML_VBA_DEBUG__
2010 dml2_printf("DML::%s: k=%u, is_using_mall_for_ss = %u\n", __func__, k, is_using_mall_for_ss[k]);
2011 dml2_printf("DML::%s: k=%u, TotalSurfaceSizeInMALL = %u\n", __func__, k, TotalSurfaceSizeInMALL);
2012 #endif
2013 }
2014
2015 SurfaceToAddToMALL = 0;
2016 CanAddAnotherSurfaceToMALL = true;
2017 while (CanAddAnotherSurfaceToMALL) {
2018 CanAddAnotherSurfaceToMALL = false;
2019 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
2020 if (TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k] <= MALLAllocatedForDCN * 1024 * 1024 &&
2021 !is_using_mall_for_ss[k] && display_cfg->plane_descriptors[k].overrides.refresh_from_mall != dml2_refresh_from_mall_mode_override_force_disable && one_row_per_frame_fits_in_buffer[k] &&
2022 (!CanAddAnotherSurfaceToMALL || SurfaceSizeInMALL[k] < SurfaceSizeInMALL[SurfaceToAddToMALL])) {
2023 CanAddAnotherSurfaceToMALL = true;
2024 SurfaceToAddToMALL = k;
2025 dml2_printf("DML::%s: k=%u, UseMALLForStaticScreen = %u (dis, en, optimize)\n", __func__, k, display_cfg->plane_descriptors[k].overrides.refresh_from_mall);
2026 }
2027 }
2028 if (CanAddAnotherSurfaceToMALL) {
2029 is_using_mall_for_ss[SurfaceToAddToMALL] = true;
2030 TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[SurfaceToAddToMALL];
2031
2032 #ifdef __DML_VBA_DEBUG__
2033 dml2_printf("DML::%s: SurfaceToAddToMALL = %u\n", __func__, SurfaceToAddToMALL);
2034 dml2_printf("DML::%s: TotalSurfaceSizeInMALL = %u\n", __func__, TotalSurfaceSizeInMALL);
2035 #endif
2036 }
2037 }
2038 }
2039
CalculateDCCConfiguration(bool DCCEnabled,bool DCCProgrammingAssumesScanDirectionUnknown,enum dml2_source_format_class SourcePixelFormat,unsigned int SurfaceWidthLuma,unsigned int SurfaceWidthChroma,unsigned int SurfaceHeightLuma,unsigned int SurfaceHeightChroma,unsigned int nomDETInKByte,unsigned int RequestHeight256ByteLuma,unsigned int RequestHeight256ByteChroma,enum dml2_swizzle_mode TilingFormat,unsigned int BytePerPixelY,unsigned int BytePerPixelC,double BytePerPixelDETY,double BytePerPixelDETC,enum dml2_rotation_angle RotationAngle,enum dml2_core_internal_request_type * RequestLuma,enum dml2_core_internal_request_type * RequestChroma,unsigned int * MaxUncompressedBlockLuma,unsigned int * MaxUncompressedBlockChroma,unsigned int * MaxCompressedBlockLuma,unsigned int * MaxCompressedBlockChroma,unsigned int * IndependentBlockLuma,unsigned int * IndependentBlockChroma)2040 static void CalculateDCCConfiguration(
2041 bool DCCEnabled,
2042 bool DCCProgrammingAssumesScanDirectionUnknown,
2043 enum dml2_source_format_class SourcePixelFormat,
2044 unsigned int SurfaceWidthLuma,
2045 unsigned int SurfaceWidthChroma,
2046 unsigned int SurfaceHeightLuma,
2047 unsigned int SurfaceHeightChroma,
2048 unsigned int nomDETInKByte,
2049 unsigned int RequestHeight256ByteLuma,
2050 unsigned int RequestHeight256ByteChroma,
2051 enum dml2_swizzle_mode TilingFormat,
2052 unsigned int BytePerPixelY,
2053 unsigned int BytePerPixelC,
2054 double BytePerPixelDETY,
2055 double BytePerPixelDETC,
2056 enum dml2_rotation_angle RotationAngle,
2057
2058 // Output
2059 enum dml2_core_internal_request_type *RequestLuma,
2060 enum dml2_core_internal_request_type *RequestChroma,
2061 unsigned int *MaxUncompressedBlockLuma,
2062 unsigned int *MaxUncompressedBlockChroma,
2063 unsigned int *MaxCompressedBlockLuma,
2064 unsigned int *MaxCompressedBlockChroma,
2065 unsigned int *IndependentBlockLuma,
2066 unsigned int *IndependentBlockChroma)
2067 {
2068 unsigned int DETBufferSizeForDCC = nomDETInKByte * 1024;
2069
2070 unsigned int segment_order_horz_contiguous_luma;
2071 unsigned int segment_order_horz_contiguous_chroma;
2072 unsigned int segment_order_vert_contiguous_luma;
2073 unsigned int segment_order_vert_contiguous_chroma;
2074
2075 unsigned int req128_horz_wc_l;
2076 unsigned int req128_horz_wc_c;
2077 unsigned int req128_vert_wc_l;
2078 unsigned int req128_vert_wc_c;
2079
2080 unsigned int yuv420;
2081 unsigned int horz_div_l;
2082 unsigned int horz_div_c;
2083 unsigned int vert_div_l;
2084 unsigned int vert_div_c;
2085
2086 unsigned int swath_buf_size;
2087 double detile_buf_vp_horz_limit;
2088 double detile_buf_vp_vert_limit;
2089
2090 unsigned int MAS_vp_horz_limit;
2091 unsigned int MAS_vp_vert_limit;
2092 unsigned int max_vp_horz_width;
2093 unsigned int max_vp_vert_height;
2094 unsigned int eff_surf_width_l;
2095 unsigned int eff_surf_width_c;
2096 unsigned int eff_surf_height_l;
2097 unsigned int eff_surf_height_c;
2098
2099 unsigned int full_swath_bytes_horz_wc_l;
2100 unsigned int full_swath_bytes_horz_wc_c;
2101 unsigned int full_swath_bytes_vert_wc_l;
2102 unsigned int full_swath_bytes_vert_wc_c;
2103
2104 if (dml_is_420(SourcePixelFormat))
2105 yuv420 = 1;
2106 else
2107 yuv420 = 0;
2108 horz_div_l = 1;
2109 horz_div_c = 1;
2110 vert_div_l = 1;
2111 vert_div_c = 1;
2112
2113 if (BytePerPixelY == 1)
2114 vert_div_l = 0;
2115 if (BytePerPixelC == 1)
2116 vert_div_c = 0;
2117
2118 if (BytePerPixelC == 0) {
2119 swath_buf_size = DETBufferSizeForDCC / 2 - 2 * 256;
2120 detile_buf_vp_horz_limit = (double)swath_buf_size / ((double)RequestHeight256ByteLuma * BytePerPixelY / (1 + horz_div_l));
2121 detile_buf_vp_vert_limit = (double)swath_buf_size / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l));
2122 } else {
2123 swath_buf_size = DETBufferSizeForDCC / 2 - 2 * 2 * 256;
2124 detile_buf_vp_horz_limit = (double)swath_buf_size / ((double)RequestHeight256ByteLuma * BytePerPixelY / (1 + horz_div_l) + (double)RequestHeight256ByteChroma * BytePerPixelC / (1 + horz_div_c) / (1 + yuv420));
2125 detile_buf_vp_vert_limit = (double)swath_buf_size / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l) + 256.0 / RequestHeight256ByteChroma / (1 + vert_div_c) / (1 + yuv420));
2126 }
2127
2128 if (SourcePixelFormat == dml2_420_10) {
2129 detile_buf_vp_horz_limit = 1.5 * detile_buf_vp_horz_limit;
2130 detile_buf_vp_vert_limit = 1.5 * detile_buf_vp_vert_limit;
2131 }
2132
2133 detile_buf_vp_horz_limit = math_floor2(detile_buf_vp_horz_limit - 1, 16);
2134 detile_buf_vp_vert_limit = math_floor2(detile_buf_vp_vert_limit - 1, 16);
2135
2136 MAS_vp_horz_limit = SourcePixelFormat == dml2_rgbe_alpha ? 3840 : 6144;
2137 MAS_vp_vert_limit = SourcePixelFormat == dml2_rgbe_alpha ? 3840 : (BytePerPixelY == 8 ? 3072 : 6144);
2138 max_vp_horz_width = (unsigned int)(math_min2((double)MAS_vp_horz_limit, detile_buf_vp_horz_limit));
2139 max_vp_vert_height = (unsigned int)(math_min2((double)MAS_vp_vert_limit, detile_buf_vp_vert_limit));
2140 eff_surf_width_l = (SurfaceWidthLuma > max_vp_horz_width ? max_vp_horz_width : SurfaceWidthLuma);
2141 eff_surf_width_c = eff_surf_width_l / (1 + yuv420);
2142 eff_surf_height_l = (SurfaceHeightLuma > max_vp_vert_height ? max_vp_vert_height : SurfaceHeightLuma);
2143 eff_surf_height_c = eff_surf_height_l / (1 + yuv420);
2144
2145 full_swath_bytes_horz_wc_l = eff_surf_width_l * RequestHeight256ByteLuma * BytePerPixelY;
2146 full_swath_bytes_vert_wc_l = eff_surf_height_l * 256 / RequestHeight256ByteLuma;
2147 if (BytePerPixelC > 0) {
2148 full_swath_bytes_horz_wc_c = eff_surf_width_c * RequestHeight256ByteChroma * BytePerPixelC;
2149 full_swath_bytes_vert_wc_c = eff_surf_height_c * 256 / RequestHeight256ByteChroma;
2150 } else {
2151 full_swath_bytes_horz_wc_c = 0;
2152 full_swath_bytes_vert_wc_c = 0;
2153 }
2154
2155 if (SourcePixelFormat == dml2_420_10) {
2156 full_swath_bytes_horz_wc_l = (unsigned int)(math_ceil2((double)full_swath_bytes_horz_wc_l * 2.0 / 3.0, 256.0));
2157 full_swath_bytes_horz_wc_c = (unsigned int)(math_ceil2((double)full_swath_bytes_horz_wc_c * 2.0 / 3.0, 256.0));
2158 full_swath_bytes_vert_wc_l = (unsigned int)(math_ceil2((double)full_swath_bytes_vert_wc_l * 2.0 / 3.0, 256.0));
2159 full_swath_bytes_vert_wc_c = (unsigned int)(math_ceil2((double)full_swath_bytes_vert_wc_c * 2.0 / 3.0, 256.0));
2160 }
2161
2162 if (2 * full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) {
2163 req128_horz_wc_l = 0;
2164 req128_horz_wc_c = 0;
2165 } else if (full_swath_bytes_horz_wc_l < 1.5 * full_swath_bytes_horz_wc_c && 2 * full_swath_bytes_horz_wc_l + full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) {
2166 req128_horz_wc_l = 0;
2167 req128_horz_wc_c = 1;
2168 } else if (full_swath_bytes_horz_wc_l >= 1.5 * full_swath_bytes_horz_wc_c && full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) {
2169 req128_horz_wc_l = 1;
2170 req128_horz_wc_c = 0;
2171 } else {
2172 req128_horz_wc_l = 1;
2173 req128_horz_wc_c = 1;
2174 }
2175
2176 if (2 * full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) {
2177 req128_vert_wc_l = 0;
2178 req128_vert_wc_c = 0;
2179 } else if (full_swath_bytes_vert_wc_l < 1.5 * full_swath_bytes_vert_wc_c && 2 * full_swath_bytes_vert_wc_l + full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) {
2180 req128_vert_wc_l = 0;
2181 req128_vert_wc_c = 1;
2182 } else if (full_swath_bytes_vert_wc_l >= 1.5 * full_swath_bytes_vert_wc_c && full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) {
2183 req128_vert_wc_l = 1;
2184 req128_vert_wc_c = 0;
2185 } else {
2186 req128_vert_wc_l = 1;
2187 req128_vert_wc_c = 1;
2188 }
2189
2190 if (BytePerPixelY == 2) {
2191 segment_order_horz_contiguous_luma = 0;
2192 segment_order_vert_contiguous_luma = 1;
2193 } else {
2194 segment_order_horz_contiguous_luma = 1;
2195 segment_order_vert_contiguous_luma = 0;
2196 }
2197
2198 if (BytePerPixelC == 2) {
2199 segment_order_horz_contiguous_chroma = 0;
2200 segment_order_vert_contiguous_chroma = 1;
2201 } else {
2202 segment_order_horz_contiguous_chroma = 1;
2203 segment_order_vert_contiguous_chroma = 0;
2204 }
2205 #ifdef __DML_VBA_DEBUG__
2206 dml2_printf("DML::%s: DCCEnabled = %u\n", __func__, DCCEnabled);
2207 dml2_printf("DML::%s: nomDETInKByte = %u\n", __func__, nomDETInKByte);
2208 dml2_printf("DML::%s: DETBufferSizeForDCC = %u\n", __func__, DETBufferSizeForDCC);
2209 dml2_printf("DML::%s: req128_horz_wc_l = %u\n", __func__, req128_horz_wc_l);
2210 dml2_printf("DML::%s: req128_horz_wc_c = %u\n", __func__, req128_horz_wc_c);
2211 dml2_printf("DML::%s: full_swath_bytes_horz_wc_l = %u\n", __func__, full_swath_bytes_horz_wc_l);
2212 dml2_printf("DML::%s: full_swath_bytes_vert_wc_c = %u\n", __func__, full_swath_bytes_vert_wc_c);
2213 dml2_printf("DML::%s: segment_order_horz_contiguous_luma = %u\n", __func__, segment_order_horz_contiguous_luma);
2214 dml2_printf("DML::%s: segment_order_horz_contiguous_chroma = %u\n", __func__, segment_order_horz_contiguous_chroma);
2215 #endif
2216 if (DCCProgrammingAssumesScanDirectionUnknown == true) {
2217 if (req128_horz_wc_l == 0 && req128_vert_wc_l == 0) {
2218 *RequestLuma = dml2_core_internal_request_type_256_bytes;
2219 } else if ((req128_horz_wc_l == 1 && segment_order_horz_contiguous_luma == 0) || (req128_vert_wc_l == 1 && segment_order_vert_contiguous_luma == 0)) {
2220 *RequestLuma = dml2_core_internal_request_type_128_bytes_non_contiguous;
2221 } else {
2222 *RequestLuma = dml2_core_internal_request_type_128_bytes_contiguous;
2223 }
2224 if (req128_horz_wc_c == 0 && req128_vert_wc_c == 0) {
2225 *RequestChroma = dml2_core_internal_request_type_256_bytes;
2226 } else if ((req128_horz_wc_c == 1 && segment_order_horz_contiguous_chroma == 0) || (req128_vert_wc_c == 1 && segment_order_vert_contiguous_chroma == 0)) {
2227 *RequestChroma = dml2_core_internal_request_type_128_bytes_non_contiguous;
2228 } else {
2229 *RequestChroma = dml2_core_internal_request_type_128_bytes_contiguous;
2230 }
2231 } else if (!dml_is_vertical_rotation(RotationAngle)) {
2232 if (req128_horz_wc_l == 0) {
2233 *RequestLuma = dml2_core_internal_request_type_256_bytes;
2234 } else if (segment_order_horz_contiguous_luma == 0) {
2235 *RequestLuma = dml2_core_internal_request_type_128_bytes_non_contiguous;
2236 } else {
2237 *RequestLuma = dml2_core_internal_request_type_128_bytes_contiguous;
2238 }
2239 if (req128_horz_wc_c == 0) {
2240 *RequestChroma = dml2_core_internal_request_type_256_bytes;
2241 } else if (segment_order_horz_contiguous_chroma == 0) {
2242 *RequestChroma = dml2_core_internal_request_type_128_bytes_non_contiguous;
2243 } else {
2244 *RequestChroma = dml2_core_internal_request_type_128_bytes_contiguous;
2245 }
2246 } else {
2247 if (req128_vert_wc_l == 0) {
2248 *RequestLuma = dml2_core_internal_request_type_256_bytes;
2249 } else if (segment_order_vert_contiguous_luma == 0) {
2250 *RequestLuma = dml2_core_internal_request_type_128_bytes_non_contiguous;
2251 } else {
2252 *RequestLuma = dml2_core_internal_request_type_128_bytes_contiguous;
2253 }
2254 if (req128_vert_wc_c == 0) {
2255 *RequestChroma = dml2_core_internal_request_type_256_bytes;
2256 } else if (segment_order_vert_contiguous_chroma == 0) {
2257 *RequestChroma = dml2_core_internal_request_type_128_bytes_non_contiguous;
2258 } else {
2259 *RequestChroma = dml2_core_internal_request_type_128_bytes_contiguous;
2260 }
2261 }
2262
2263 if (*RequestLuma == dml2_core_internal_request_type_256_bytes) {
2264 *MaxUncompressedBlockLuma = 256;
2265 *MaxCompressedBlockLuma = 256;
2266 *IndependentBlockLuma = 0;
2267 } else if (*RequestLuma == dml2_core_internal_request_type_128_bytes_contiguous) {
2268 *MaxUncompressedBlockLuma = 256;
2269 *MaxCompressedBlockLuma = 128;
2270 *IndependentBlockLuma = 128;
2271 } else {
2272 *MaxUncompressedBlockLuma = 256;
2273 *MaxCompressedBlockLuma = 64;
2274 *IndependentBlockLuma = 64;
2275 }
2276
2277 if (*RequestChroma == dml2_core_internal_request_type_256_bytes) {
2278 *MaxUncompressedBlockChroma = 256;
2279 *MaxCompressedBlockChroma = 256;
2280 *IndependentBlockChroma = 0;
2281 } else if (*RequestChroma == dml2_core_internal_request_type_128_bytes_contiguous) {
2282 *MaxUncompressedBlockChroma = 256;
2283 *MaxCompressedBlockChroma = 128;
2284 *IndependentBlockChroma = 128;
2285 } else {
2286 *MaxUncompressedBlockChroma = 256;
2287 *MaxCompressedBlockChroma = 64;
2288 *IndependentBlockChroma = 64;
2289 }
2290
2291 if (DCCEnabled != true || BytePerPixelC == 0) {
2292 *MaxUncompressedBlockChroma = 0;
2293 *MaxCompressedBlockChroma = 0;
2294 *IndependentBlockChroma = 0;
2295 }
2296
2297 if (DCCEnabled != true) {
2298 *MaxUncompressedBlockLuma = 0;
2299 *MaxCompressedBlockLuma = 0;
2300 *IndependentBlockLuma = 0;
2301 }
2302
2303 #ifdef __DML_VBA_DEBUG__
2304 dml2_printf("DML::%s: MaxUncompressedBlockLuma = %u\n", __func__, *MaxUncompressedBlockLuma);
2305 dml2_printf("DML::%s: MaxCompressedBlockLuma = %u\n", __func__, *MaxCompressedBlockLuma);
2306 dml2_printf("DML::%s: IndependentBlockLuma = %u\n", __func__, *IndependentBlockLuma);
2307 dml2_printf("DML::%s: MaxUncompressedBlockChroma = %u\n", __func__, *MaxUncompressedBlockChroma);
2308 dml2_printf("DML::%s: MaxCompressedBlockChroma = %u\n", __func__, *MaxCompressedBlockChroma);
2309 dml2_printf("DML::%s: IndependentBlockChroma = %u\n", __func__, *IndependentBlockChroma);
2310 #endif
2311
2312 }
2313
calculate_mcache_row_bytes(struct dml2_core_internal_scratch * scratch,struct dml2_core_calcs_calculate_mcache_row_bytes_params * p)2314 static void calculate_mcache_row_bytes(
2315 struct dml2_core_internal_scratch *scratch,
2316 struct dml2_core_calcs_calculate_mcache_row_bytes_params *p)
2317 {
2318 unsigned int vmpg_bytes = 0;
2319 unsigned int blk_bytes = 0;
2320 float meta_per_mvmpg_per_channel = 0;
2321 unsigned int est_blk_per_vmpg = 2;
2322 unsigned int mvmpg_per_row_ub = 0;
2323 unsigned int full_vp_width_mvmpg_aligned = 0;
2324 unsigned int full_vp_height_mvmpg_aligned = 0;
2325 unsigned int meta_per_mvmpg_per_channel_ub = 0;
2326 unsigned int mvmpg_per_mcache;
2327
2328 #ifdef __DML_VBA_DEBUG__
2329 dml2_printf("DML::%s: num_chans = %u\n", __func__, p->num_chans);
2330 dml2_printf("DML::%s: mem_word_bytes = %u\n", __func__, p->mem_word_bytes);
2331 dml2_printf("DML::%s: mcache_line_size_bytes = %u\n", __func__, p->mcache_line_size_bytes);
2332 dml2_printf("DML::%s: mcache_size_bytes = %u\n", __func__, p->mcache_size_bytes);
2333 dml2_printf("DML::%s: gpuvm_enable = %u\n", __func__, p->gpuvm_enable);
2334 dml2_printf("DML::%s: gpuvm_page_size_kbytes = %u\n", __func__, p->gpuvm_page_size_kbytes);
2335 dml2_printf("DML::%s: vp_stationary = %u\n", __func__, p->vp_stationary);
2336 dml2_printf("DML::%s: tiling_mode = %u\n", __func__, p->tiling_mode);
2337 dml2_printf("DML::%s: vp_start_x = %u\n", __func__, p->vp_start_x);
2338 dml2_printf("DML::%s: vp_start_y = %u\n", __func__, p->vp_start_y);
2339 dml2_printf("DML::%s: full_vp_width = %u\n", __func__, p->full_vp_width);
2340 dml2_printf("DML::%s: full_vp_height = %u\n", __func__, p->full_vp_height);
2341 dml2_printf("DML::%s: blk_width = %u\n", __func__, p->blk_width);
2342 dml2_printf("DML::%s: blk_height = %u\n", __func__, p->blk_height);
2343 dml2_printf("DML::%s: vmpg_width = %u\n", __func__, p->vmpg_width);
2344 dml2_printf("DML::%s: vmpg_height = %u\n", __func__, p->vmpg_height);
2345 dml2_printf("DML::%s: full_swath_bytes = %u\n", __func__, p->full_swath_bytes);
2346 #endif
2347 DML2_ASSERT(p->mcache_line_size_bytes != 0);
2348 DML2_ASSERT(p->mcache_size_bytes != 0);
2349
2350 *p->mvmpg_width = 0;
2351 *p->mvmpg_height = 0;
2352
2353 if (p->full_vp_height == 0 && p->full_vp_width == 0) {
2354 *p->num_mcaches = 0;
2355 *p->mcache_row_bytes = 0;
2356 } else {
2357 blk_bytes = dml_get_tile_block_size_bytes(p->tiling_mode);
2358
2359 // if gpuvm is not enable, the alignment boundary should be in terms of tiling block size
2360 vmpg_bytes = p->gpuvm_page_size_kbytes * 1024;
2361
2362 //With vmpg_bytes >= tile blk_bytes, the meta_row_width alignment equations are relative to the vmpg_width/height.
2363 // But for 4KB page with 64KB tile block, we need the meta for all pages in the tile block.
2364 // Therefore, the alignment is relative to the blk_width/height. The factor of 16 vmpg per 64KB tile block is applied at the end.
2365 *p->mvmpg_width = p->blk_width;
2366 *p->mvmpg_height = p->blk_height;
2367 if (p->gpuvm_enable) {
2368 if (vmpg_bytes >= blk_bytes) {
2369 *p->mvmpg_width = p->vmpg_width;
2370 *p->mvmpg_height = p->vmpg_height;
2371 } else if (!((blk_bytes == 65536) && (vmpg_bytes == 4096))) {
2372 dml2_printf("ERROR: DML::%s: Tiling size and vm page size combination not supported\n", __func__);
2373 DML2_ASSERT(0);
2374 }
2375 }
2376
2377 //For plane0 & 1, first calculate full_vp_width/height_l/c aligned to vmpg_width/height_l/c
2378 full_vp_width_mvmpg_aligned = (unsigned int)(math_floor2((p->vp_start_x + p->full_vp_width) + *p->mvmpg_width - 1, *p->mvmpg_width) - math_floor2(p->vp_start_x, *p->mvmpg_width));
2379 full_vp_height_mvmpg_aligned = (unsigned int)(math_floor2((p->vp_start_y + p->full_vp_height) + *p->mvmpg_height - 1, *p->mvmpg_height) - math_floor2(p->vp_start_y, *p->mvmpg_height));
2380
2381 *p->full_vp_access_width_mvmpg_aligned = p->surf_vert ? full_vp_height_mvmpg_aligned : full_vp_width_mvmpg_aligned;
2382
2383 //Use the equation for the exact alignment when possible. Note that the exact alignment cannot be used for horizontal access if vmpg_bytes > blk_bytes.
2384 if (!p->surf_vert) { //horizontal access
2385 if (p->vp_stationary == 1 && vmpg_bytes <= blk_bytes)
2386 *p->meta_row_width_ub = full_vp_width_mvmpg_aligned;
2387 else
2388 *p->meta_row_width_ub = (unsigned int)math_ceil2((double)p->full_vp_width - 1, *p->mvmpg_width) + *p->mvmpg_width;
2389 mvmpg_per_row_ub = *p->meta_row_width_ub / *p->mvmpg_width;
2390 } else { //vertical access
2391 if (p->vp_stationary == 1)
2392 *p->meta_row_width_ub = full_vp_height_mvmpg_aligned;
2393 else
2394 *p->meta_row_width_ub = (unsigned int)math_ceil2((double)p->full_vp_height - 1, *p->mvmpg_height) + *p->mvmpg_height;
2395 mvmpg_per_row_ub = *p->meta_row_width_ub / *p->mvmpg_height;
2396 }
2397
2398 if (p->gpuvm_enable) {
2399 meta_per_mvmpg_per_channel = (float)vmpg_bytes / (float)256 / p->num_chans;
2400
2401 //but using the est_blk_per_vmpg between 2 and 4, to be not as pessimestic
2402 if (p->surf_vert && vmpg_bytes > blk_bytes) {
2403 meta_per_mvmpg_per_channel = (float)est_blk_per_vmpg * blk_bytes / (float)256 / p->num_chans;
2404 }
2405
2406 *p->dcc_dram_bw_nom_overhead_factor = 1 + math_max2(1.0 / 256.0, math_ceil2(meta_per_mvmpg_per_channel, p->mem_word_bytes) / (256 * meta_per_mvmpg_per_channel)); // dcc_dr_oh_nom
2407 } else {
2408 meta_per_mvmpg_per_channel = (float) blk_bytes / (float)256 / p->num_chans;
2409
2410 if (!p->surf_vert)
2411 *p->dcc_dram_bw_nom_overhead_factor = 1 + 1.0 / 256.0;
2412 else
2413 *p->dcc_dram_bw_nom_overhead_factor = 1 + math_max2(1.0 / 256.0, math_ceil2(meta_per_mvmpg_per_channel, p->mem_word_bytes) / (256 * meta_per_mvmpg_per_channel));
2414 }
2415
2416 meta_per_mvmpg_per_channel_ub = (unsigned int)math_ceil2((double)meta_per_mvmpg_per_channel, p->mcache_line_size_bytes);
2417
2418 //but for 4KB vmpg with 64KB tile blk
2419 if (p->gpuvm_enable && (blk_bytes == 65536) && (vmpg_bytes == 4096))
2420 meta_per_mvmpg_per_channel_ub = 16 * meta_per_mvmpg_per_channel_ub;
2421
2422 // If this mcache_row_bytes for the full viewport of the surface is less than or equal to mcache_bytes,
2423 // then one mcache can be used for this request stream. If not, it is useful to know the width of the viewport that can be supported in the mcache_bytes.
2424 if (p->gpuvm_enable || !p->surf_vert) {
2425 *p->mcache_row_bytes = mvmpg_per_row_ub * meta_per_mvmpg_per_channel_ub;
2426 } else { // horizontal and gpuvm disable
2427 *p->mcache_row_bytes = *p->meta_row_width_ub * p->blk_height * p->bytes_per_pixel / 256;
2428 *p->mcache_row_bytes = (unsigned int)math_ceil2((double)*p->mcache_row_bytes / p->num_chans, p->mcache_line_size_bytes);
2429 }
2430
2431 *p->dcc_dram_bw_pref_overhead_factor = 1 + math_max2(1.0 / 256.0, *p->mcache_row_bytes / p->full_swath_bytes); // dcc_dr_oh_pref
2432 *p->num_mcaches = (unsigned int)math_ceil2((double)*p->mcache_row_bytes / p->mcache_size_bytes, 1);
2433
2434 mvmpg_per_mcache = p->mcache_size_bytes / meta_per_mvmpg_per_channel_ub;
2435 *p->mvmpg_per_mcache_lb = (unsigned int)math_floor2(mvmpg_per_mcache, 1);
2436
2437 #ifdef __DML_VBA_DEBUG__
2438 dml2_printf("DML::%s: gpuvm_enable = %u\n", __func__, p->gpuvm_enable);
2439 dml2_printf("DML::%s: vmpg_bytes = %u\n", __func__, vmpg_bytes);
2440 dml2_printf("DML::%s: blk_bytes = %u\n", __func__, blk_bytes);
2441 dml2_printf("DML::%s: meta_per_mvmpg_per_channel = %f\n", __func__, meta_per_mvmpg_per_channel);
2442 dml2_printf("DML::%s: mvmpg_per_row_ub = %u\n", __func__, mvmpg_per_row_ub);
2443 dml2_printf("DML::%s: meta_row_width_ub = %u\n", __func__, *p->meta_row_width_ub);
2444 dml2_printf("DML::%s: mvmpg_width = %u\n", __func__, *p->mvmpg_width);
2445 dml2_printf("DML::%s: mvmpg_height = %u\n", __func__, *p->mvmpg_height);
2446 dml2_printf("DML::%s: dcc_dram_bw_nom_overhead_factor = %f\n", __func__, *p->dcc_dram_bw_nom_overhead_factor);
2447 dml2_printf("DML::%s: dcc_dram_bw_pref_overhead_factor = %f\n", __func__, *p->dcc_dram_bw_pref_overhead_factor);
2448 #endif
2449 }
2450
2451 #ifdef __DML_VBA_DEBUG__
2452 dml2_printf("DML::%s: mcache_row_bytes = %u\n", __func__, *p->mcache_row_bytes);
2453 dml2_printf("DML::%s: num_mcaches = %u\n", __func__, *p->num_mcaches);
2454 #endif
2455 DML2_ASSERT(*p->num_mcaches > 0);
2456 }
2457
calculate_mcache_setting(struct dml2_core_internal_scratch * scratch,struct dml2_core_calcs_calculate_mcache_setting_params * p)2458 static void calculate_mcache_setting(
2459 struct dml2_core_internal_scratch *scratch,
2460 struct dml2_core_calcs_calculate_mcache_setting_params *p)
2461 {
2462 unsigned int n;
2463
2464 struct dml2_core_shared_calculate_mcache_setting_locals *l = &scratch->calculate_mcache_setting_locals;
2465 memset(l, 0, sizeof(struct dml2_core_shared_calculate_mcache_setting_locals));
2466
2467 *p->num_mcaches_l = 0;
2468 *p->mcache_row_bytes_l = 0;
2469 *p->dcc_dram_bw_nom_overhead_factor_l = 1.0;
2470 *p->dcc_dram_bw_pref_overhead_factor_l = 1.0;
2471
2472 *p->num_mcaches_c = 0;
2473 *p->mcache_row_bytes_c = 0;
2474 *p->dcc_dram_bw_nom_overhead_factor_c = 1.0;
2475 *p->dcc_dram_bw_pref_overhead_factor_c = 1.0;
2476
2477 *p->mall_comb_mcache_l = 0;
2478 *p->mall_comb_mcache_c = 0;
2479 *p->lc_comb_mcache = 0;
2480
2481 if (!p->dcc_enable)
2482 return;
2483
2484 l->is_dual_plane = dml_is_420(p->source_format) || p->source_format == dml2_rgbe_alpha;
2485
2486 l->l_p.num_chans = p->num_chans;
2487 l->l_p.mem_word_bytes = p->mem_word_bytes;
2488 l->l_p.mcache_size_bytes = p->mcache_size_bytes;
2489 l->l_p.mcache_line_size_bytes = p->mcache_line_size_bytes;
2490 l->l_p.gpuvm_enable = p->gpuvm_enable;
2491 l->l_p.gpuvm_page_size_kbytes = p->gpuvm_page_size_kbytes;
2492 l->l_p.surf_vert = p->surf_vert;
2493 l->l_p.vp_stationary = p->vp_stationary;
2494 l->l_p.tiling_mode = p->tiling_mode;
2495 l->l_p.vp_start_x = p->vp_start_x_l;
2496 l->l_p.vp_start_y = p->vp_start_y_l;
2497 l->l_p.full_vp_width = p->full_vp_width_l;
2498 l->l_p.full_vp_height = p->full_vp_height_l;
2499 l->l_p.blk_width = p->blk_width_l;
2500 l->l_p.blk_height = p->blk_height_l;
2501 l->l_p.vmpg_width = p->vmpg_width_l;
2502 l->l_p.vmpg_height = p->vmpg_height_l;
2503 l->l_p.full_swath_bytes = p->full_swath_bytes_l;
2504 l->l_p.bytes_per_pixel = p->bytes_per_pixel_l;
2505
2506 // output
2507 l->l_p.num_mcaches = p->num_mcaches_l;
2508 l->l_p.mcache_row_bytes = p->mcache_row_bytes_l;
2509 l->l_p.dcc_dram_bw_nom_overhead_factor = p->dcc_dram_bw_nom_overhead_factor_l;
2510 l->l_p.dcc_dram_bw_pref_overhead_factor = p->dcc_dram_bw_pref_overhead_factor_l;
2511 l->l_p.mvmpg_width = &l->mvmpg_width_l;
2512 l->l_p.mvmpg_height = &l->mvmpg_height_l;
2513 l->l_p.full_vp_access_width_mvmpg_aligned = &l->full_vp_access_width_mvmpg_aligned_l;
2514 l->l_p.meta_row_width_ub = &l->meta_row_width_l;
2515 l->l_p.mvmpg_per_mcache_lb = &l->mvmpg_per_mcache_lb_l;
2516
2517 calculate_mcache_row_bytes(scratch, &l->l_p);
2518 dml2_assert(*p->num_mcaches_l > 0);
2519
2520 if (l->is_dual_plane) {
2521 l->c_p.num_chans = p->num_chans;
2522 l->c_p.mem_word_bytes = p->mem_word_bytes;
2523 l->c_p.mcache_size_bytes = p->mcache_size_bytes;
2524 l->c_p.mcache_line_size_bytes = p->mcache_line_size_bytes;
2525 l->c_p.gpuvm_enable = p->gpuvm_enable;
2526 l->c_p.gpuvm_page_size_kbytes = p->gpuvm_page_size_kbytes;
2527 l->c_p.surf_vert = p->surf_vert;
2528 l->c_p.vp_stationary = p->vp_stationary;
2529 l->c_p.tiling_mode = p->tiling_mode;
2530 l->c_p.vp_start_x = p->vp_start_x_c;
2531 l->c_p.vp_start_y = p->vp_start_y_c;
2532 l->c_p.full_vp_width = p->full_vp_width_c;
2533 l->c_p.full_vp_height = p->full_vp_height_c;
2534 l->c_p.blk_width = p->blk_width_c;
2535 l->c_p.blk_height = p->blk_height_c;
2536 l->c_p.vmpg_width = p->vmpg_width_c;
2537 l->c_p.vmpg_height = p->vmpg_height_c;
2538 l->c_p.full_swath_bytes = p->full_swath_bytes_c;
2539 l->c_p.bytes_per_pixel = p->bytes_per_pixel_c;
2540
2541 // output
2542 l->c_p.num_mcaches = p->num_mcaches_c;
2543 l->c_p.mcache_row_bytes = p->mcache_row_bytes_c;
2544 l->c_p.dcc_dram_bw_nom_overhead_factor = p->dcc_dram_bw_nom_overhead_factor_c;
2545 l->c_p.dcc_dram_bw_pref_overhead_factor = p->dcc_dram_bw_pref_overhead_factor_c;
2546 l->c_p.mvmpg_width = &l->mvmpg_width_c;
2547 l->c_p.mvmpg_height = &l->mvmpg_height_c;
2548 l->c_p.full_vp_access_width_mvmpg_aligned = &l->full_vp_access_width_mvmpg_aligned_c;
2549 l->c_p.meta_row_width_ub = &l->meta_row_width_c;
2550 l->c_p.mvmpg_per_mcache_lb = &l->mvmpg_per_mcache_lb_c;
2551
2552 calculate_mcache_row_bytes(scratch, &l->c_p);
2553 dml2_assert(*p->num_mcaches_c > 0);
2554 }
2555
2556 // Sharing for iMALL access
2557 l->mcache_remainder_l = *p->mcache_row_bytes_l % p->mcache_size_bytes;
2558 l->mcache_remainder_c = *p->mcache_row_bytes_c % p->mcache_size_bytes;
2559 l->mvmpg_access_width_l = p->surf_vert ? l->mvmpg_height_l : l->mvmpg_width_l;
2560 l->mvmpg_access_width_c = p->surf_vert ? l->mvmpg_height_c : l->mvmpg_width_c;
2561
2562 if (p->imall_enable) {
2563 *p->mall_comb_mcache_l = (2 * l->mcache_remainder_l <= p->mcache_size_bytes);
2564
2565 if (l->is_dual_plane)
2566 *p->mall_comb_mcache_c = (2 * l->mcache_remainder_c <= p->mcache_size_bytes);
2567 }
2568
2569 if (!p->surf_vert) // horizonatal access
2570 l->luma_time_factor = (double)l->mvmpg_height_c / l->mvmpg_height_l * 2;
2571 else // vertical access
2572 l->luma_time_factor = (double)l->mvmpg_width_c / l->mvmpg_width_l * 2;
2573
2574 // The algorithm starts with computing a non-integer, avg_mcache_element_size_l/c:
2575 if (*p->num_mcaches_l) {
2576 l->avg_mcache_element_size_l = l->meta_row_width_l / *p->num_mcaches_l;
2577 }
2578 if (l->is_dual_plane) {
2579 l->avg_mcache_element_size_c = l->meta_row_width_c / *p->num_mcaches_c;
2580
2581 if (!p->imall_enable || (*p->mall_comb_mcache_l == *p->mall_comb_mcache_c)) {
2582 l->lc_comb_last_mcache_size = (unsigned int)((l->mcache_remainder_l * (*p->mall_comb_mcache_l ? 2 : 1) * l->luma_time_factor) +
2583 (l->mcache_remainder_c * (*p->mall_comb_mcache_c ? 2 : 1)));
2584 }
2585 *p->lc_comb_mcache = (l->lc_comb_last_mcache_size <= p->mcache_size_bytes) && (*p->mall_comb_mcache_l == *p->mall_comb_mcache_c);
2586 }
2587
2588 #ifdef __DML_VBA_DEBUG__
2589 dml2_printf("DML::%s: imall_enable = %u\n", __func__, p->imall_enable);
2590 dml2_printf("DML::%s: is_dual_plane = %u\n", __func__, l->is_dual_plane);
2591 dml2_printf("DML::%s: surf_vert = %u\n", __func__, p->surf_vert);
2592 dml2_printf("DML::%s: mvmpg_width_l = %u\n", __func__, l->mvmpg_width_l);
2593 dml2_printf("DML::%s: mvmpg_height_l = %u\n", __func__, l->mvmpg_height_l);
2594 dml2_printf("DML::%s: mcache_remainder_l = %f\n", __func__, l->mcache_remainder_l);
2595 dml2_printf("DML::%s: num_mcaches_l = %u\n", __func__, *p->num_mcaches_l);
2596 dml2_printf("DML::%s: avg_mcache_element_size_l = %u\n", __func__, l->avg_mcache_element_size_l);
2597 dml2_printf("DML::%s: mvmpg_access_width_l = %u\n", __func__, l->mvmpg_access_width_l);
2598 dml2_printf("DML::%s: mall_comb_mcache_l = %u\n", __func__, *p->mall_comb_mcache_l);
2599
2600 if (l->is_dual_plane) {
2601 dml2_printf("DML::%s: mvmpg_width_c = %u\n", __func__, l->mvmpg_width_c);
2602 dml2_printf("DML::%s: mvmpg_height_c = %u\n", __func__, l->mvmpg_height_c);
2603 dml2_printf("DML::%s: mcache_remainder_c = %f\n", __func__, l->mcache_remainder_c);
2604 dml2_printf("DML::%s: luma_time_factor = %f\n", __func__, l->luma_time_factor);
2605 dml2_printf("DML::%s: num_mcaches_c = %u\n", __func__, *p->num_mcaches_c);
2606 dml2_printf("DML::%s: avg_mcache_element_size_c = %u\n", __func__, l->avg_mcache_element_size_c);
2607 dml2_printf("DML::%s: mvmpg_access_width_c = %u\n", __func__, l->mvmpg_access_width_c);
2608 dml2_printf("DML::%s: mall_comb_mcache_c = %u\n", __func__, *p->mall_comb_mcache_c);
2609 dml2_printf("DML::%s: lc_comb_last_mcache_size = %u\n", __func__, l->lc_comb_last_mcache_size);
2610 dml2_printf("DML::%s: lc_comb_mcache = %u\n", __func__, *p->lc_comb_mcache);
2611 }
2612 #endif
2613 // calculate split_coordinate
2614 l->full_vp_access_width_l = p->surf_vert ? p->full_vp_height_l : p->full_vp_width_l;
2615 l->full_vp_access_width_c = p->surf_vert ? p->full_vp_height_c : p->full_vp_width_c;
2616
2617 for (n = 0; n < *p->num_mcaches_l - 1; n++) {
2618 p->mcache_offsets_l[n] = (unsigned int)(math_floor2((n + 1) * l->avg_mcache_element_size_l / l->mvmpg_access_width_l, 1)) * l->mvmpg_access_width_l;
2619 }
2620 p->mcache_offsets_l[*p->num_mcaches_l - 1] = l->full_vp_access_width_l;
2621
2622 if (l->is_dual_plane) {
2623 for (n = 0; n < *p->num_mcaches_c - 1; n++) {
2624 p->mcache_offsets_c[n] = (unsigned int)(math_floor2((n + 1) * l->avg_mcache_element_size_c / l->mvmpg_access_width_c, 1)) * l->mvmpg_access_width_c;
2625 }
2626 p->mcache_offsets_c[*p->num_mcaches_c - 1] = l->full_vp_access_width_c;
2627 }
2628 #ifdef __DML_VBA_DEBUG__
2629 for (n = 0; n < *p->num_mcaches_l; n++)
2630 dml2_printf("DML::%s: mcache_offsets_l[%u] = %u\n", __func__, n, p->mcache_offsets_l[n]);
2631
2632 if (l->is_dual_plane) {
2633 for (n = 0; n < *p->num_mcaches_c; n++)
2634 dml2_printf("DML::%s: mcache_offsets_c[%u] = %u\n", __func__, n, p->mcache_offsets_c[n]);
2635 }
2636 #endif
2637
2638 // Luma/Chroma combine in the last mcache
2639 // In the case of Luma/Chroma combine-mCache (with lc_comb_mcache==1), all mCaches except the last segment are filled as much as possible, when stay aligned to mvmpg boundary
2640 if (*p->lc_comb_mcache && l->is_dual_plane) {
2641 for (n = 0; n < *p->num_mcaches_l - 1; n++)
2642 p->mcache_offsets_l[n] = (n + 1) * l->mvmpg_per_mcache_lb_l * l->mvmpg_access_width_l;
2643 p->mcache_offsets_l[*p->num_mcaches_l - 1] = l->full_vp_access_width_l;
2644
2645 for (n = 0; n < *p->num_mcaches_c - 1; n++)
2646 p->mcache_offsets_c[n] = (n + 1) * l->mvmpg_per_mcache_lb_c * l->mvmpg_access_width_c;
2647 p->mcache_offsets_c[*p->num_mcaches_c - 1] = l->full_vp_access_width_c;
2648
2649 #ifdef __DML_VBA_DEBUG__
2650 for (n = 0; n < *p->num_mcaches_l; n++)
2651 dml2_printf("DML::%s: mcache_offsets_l[%u] = %u\n", __func__, n, p->mcache_offsets_l[n]);
2652
2653 for (n = 0; n < *p->num_mcaches_c; n++)
2654 dml2_printf("DML::%s: mcache_offsets_c[%u] = %u\n", __func__, n, p->mcache_offsets_c[n]);
2655 #endif
2656 }
2657
2658 *p->mcache_shift_granularity_l = l->mvmpg_access_width_l;
2659 *p->mcache_shift_granularity_c = l->mvmpg_access_width_c;
2660 }
2661
calculate_mall_bw_overhead_factor(double mall_prefetch_sdp_overhead_factor[],double mall_prefetch_dram_overhead_factor[],const struct dml2_display_cfg * display_cfg,unsigned int num_active_planes)2662 static void calculate_mall_bw_overhead_factor(
2663 double mall_prefetch_sdp_overhead_factor[], //mall_sdp_oh_nom/pref
2664 double mall_prefetch_dram_overhead_factor[], //mall_dram_oh_nom/pref
2665
2666 // input
2667 const struct dml2_display_cfg *display_cfg,
2668 unsigned int num_active_planes)
2669 {
2670 for (unsigned int k = 0; k < num_active_planes; ++k) {
2671 mall_prefetch_sdp_overhead_factor[k] = 1.0;
2672 mall_prefetch_dram_overhead_factor[k] = 1.0;
2673
2674 // SDP - on the return side
2675 if (display_cfg->plane_descriptors[k].overrides.legacy_svp_config == dml2_svp_mode_override_imall) // always no data return
2676 mall_prefetch_sdp_overhead_factor[k] = 1.25;
2677 else if (display_cfg->plane_descriptors[k].overrides.legacy_svp_config == dml2_svp_mode_override_phantom_pipe_no_data_return)
2678 mall_prefetch_sdp_overhead_factor[k] = 0.25;
2679
2680 // DRAM
2681 if (display_cfg->plane_descriptors[k].overrides.legacy_svp_config == dml2_svp_mode_override_imall)
2682 mall_prefetch_dram_overhead_factor[k] = 2.0;
2683
2684 #ifdef __DML_VBA_DEBUG__
2685 dml2_printf("DML::%s: k=%u, mall_prefetch_sdp_overhead_factor = %f\n", __func__, k, mall_prefetch_sdp_overhead_factor[k]);
2686 dml2_printf("DML::%s: k=%u, mall_prefetch_dram_overhead_factor = %f\n", __func__, k, mall_prefetch_dram_overhead_factor[k]);
2687 #endif
2688 }
2689 }
2690
dml_get_return_bandwidth_available(const struct dml2_soc_bb * soc,enum dml2_core_internal_soc_state_type state_type,enum dml2_core_internal_bw_type bw_type,bool is_avg_bw,bool is_hvm_en,bool is_hvm_only,double dcfclk_mhz,double fclk_mhz,double dram_bw_mbps)2691 static double dml_get_return_bandwidth_available(
2692 const struct dml2_soc_bb *soc,
2693 enum dml2_core_internal_soc_state_type state_type,
2694 enum dml2_core_internal_bw_type bw_type,
2695 bool is_avg_bw,
2696 bool is_hvm_en,
2697 bool is_hvm_only,
2698 double dcfclk_mhz,
2699 double fclk_mhz,
2700 double dram_bw_mbps)
2701 {
2702 double return_bw_mbps = 0.;
2703 double ideal_sdp_bandwidth = (double)soc->return_bus_width_bytes * dcfclk_mhz;
2704 double ideal_fabric_bandwidth = fclk_mhz * (double)soc->fabric_datapath_to_dcn_data_return_bytes;
2705 double ideal_dram_bandwidth = dram_bw_mbps; //dram_speed_mts * soc->clk_table.dram_config.channel_count * soc->clk_table.dram_config.channel_width_bytes;
2706
2707 double derate_sdp_factor;
2708 double derate_fabric_factor;
2709 double derate_dram_factor;
2710
2711 double derate_sdp_bandwidth;
2712 double derate_fabric_bandwidth;
2713 double derate_dram_bandwidth;
2714
2715 if (is_avg_bw) {
2716 if (state_type == dml2_core_internal_soc_state_svp_prefetch) {
2717 derate_sdp_factor = soc->qos_parameters.derate_table.dcn_mall_prefetch_average.dcfclk_derate_percent / 100.0;
2718 derate_fabric_factor = soc->qos_parameters.derate_table.dcn_mall_prefetch_average.fclk_derate_percent / 100.0;
2719 derate_dram_factor = soc->qos_parameters.derate_table.dcn_mall_prefetch_average.dram_derate_percent_pixel / 100.0;
2720 } else { // just assume sys_active
2721 derate_sdp_factor = soc->qos_parameters.derate_table.system_active_average.dcfclk_derate_percent / 100.0;
2722 derate_fabric_factor = soc->qos_parameters.derate_table.system_active_average.fclk_derate_percent / 100.0;
2723 derate_dram_factor = soc->qos_parameters.derate_table.system_active_average.dram_derate_percent_pixel / 100.0;
2724 }
2725 } else { // urgent bw
2726 if (state_type == dml2_core_internal_soc_state_svp_prefetch) {
2727 derate_sdp_factor = soc->qos_parameters.derate_table.dcn_mall_prefetch_urgent.dcfclk_derate_percent / 100.0;
2728 derate_fabric_factor = soc->qos_parameters.derate_table.dcn_mall_prefetch_urgent.fclk_derate_percent / 100.0;
2729 derate_dram_factor = soc->qos_parameters.derate_table.dcn_mall_prefetch_urgent.dram_derate_percent_pixel / 100.0;
2730
2731 if (is_hvm_en) {
2732 if (is_hvm_only)
2733 derate_dram_factor = soc->qos_parameters.derate_table.dcn_mall_prefetch_urgent.dram_derate_percent_vm / 100.0;
2734 else
2735 derate_dram_factor = soc->qos_parameters.derate_table.dcn_mall_prefetch_urgent.dram_derate_percent_pixel_and_vm / 100.0;
2736 } else {
2737 derate_dram_factor = soc->qos_parameters.derate_table.dcn_mall_prefetch_urgent.dram_derate_percent_pixel / 100.0;
2738 }
2739 } else { // just assume sys_active
2740 derate_sdp_factor = soc->qos_parameters.derate_table.system_active_urgent.dcfclk_derate_percent / 100.0;
2741 derate_fabric_factor = soc->qos_parameters.derate_table.system_active_urgent.fclk_derate_percent / 100.0;
2742
2743 if (is_hvm_en) {
2744 if (is_hvm_only)
2745 derate_dram_factor = soc->qos_parameters.derate_table.system_active_urgent.dram_derate_percent_vm / 100.0;
2746 else
2747 derate_dram_factor = soc->qos_parameters.derate_table.system_active_urgent.dram_derate_percent_pixel_and_vm / 100.0;
2748 } else {
2749 derate_dram_factor = soc->qos_parameters.derate_table.system_active_urgent.dram_derate_percent_pixel / 100.0;
2750 }
2751 }
2752 }
2753
2754 derate_sdp_bandwidth = ideal_sdp_bandwidth * derate_sdp_factor;
2755 derate_fabric_bandwidth = ideal_fabric_bandwidth * derate_fabric_factor;
2756 derate_dram_bandwidth = ideal_dram_bandwidth * derate_dram_factor;
2757
2758 if (bw_type == dml2_core_internal_bw_sdp)
2759 return_bw_mbps = math_min2(derate_sdp_bandwidth, derate_fabric_bandwidth);
2760 else // dml2_core_internal_bw_dram
2761 return_bw_mbps = derate_dram_bandwidth;
2762
2763 #ifdef __DML_VBA_DEBUG__
2764 dml2_printf("DML::%s: is_avg_bw = %u\n", __func__, is_avg_bw);
2765 dml2_printf("DML::%s: is_hvm_en = %u\n", __func__, is_hvm_en);
2766 dml2_printf("DML::%s: is_hvm_only = %u\n", __func__, is_hvm_only);
2767 dml2_printf("DML::%s: state_type = %s\n", __func__, dml2_core_internal_soc_state_type_str(state_type));
2768 dml2_printf("DML::%s: bw_type = %s\n", __func__, dml2_core_internal_bw_type_str(bw_type));
2769 dml2_printf("DML::%s: dcfclk_mhz = %f\n", __func__, dcfclk_mhz);
2770 dml2_printf("DML::%s: fclk_mhz = %f\n", __func__, fclk_mhz);
2771 dml2_printf("DML::%s: ideal_sdp_bandwidth = %f\n", __func__, ideal_sdp_bandwidth);
2772 dml2_printf("DML::%s: ideal_fabric_bandwidth = %f\n", __func__, ideal_fabric_bandwidth);
2773 dml2_printf("DML::%s: ideal_dram_bandwidth = %f\n", __func__, ideal_dram_bandwidth);
2774 dml2_printf("DML::%s: derate_sdp_bandwidth = %f (derate %f)\n", __func__, derate_sdp_bandwidth, derate_sdp_factor);
2775 dml2_printf("DML::%s: derate_fabric_bandwidth = %f (derate %f)\n", __func__, derate_fabric_bandwidth, derate_fabric_factor);
2776 dml2_printf("DML::%s: derate_dram_bandwidth = %f (derate %f)\n", __func__, derate_dram_bandwidth, derate_dram_factor);
2777 dml2_printf("DML::%s: return_bw_mbps = %f\n", __func__, return_bw_mbps);
2778 #endif
2779 return return_bw_mbps;
2780 }
2781
calculate_bandwidth_available(double avg_bandwidth_available_min[dml2_core_internal_soc_state_max],double avg_bandwidth_available[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max],double urg_bandwidth_available_min[dml2_core_internal_soc_state_max],double urg_bandwidth_available[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max],double urg_bandwidth_available_vm_only[dml2_core_internal_soc_state_max],double urg_bandwidth_available_pixel_and_vm[dml2_core_internal_soc_state_max],const struct dml2_soc_bb * soc,bool HostVMEnable,double dcfclk_mhz,double fclk_mhz,double dram_bw_mbps)2782 static noinline_for_stack void calculate_bandwidth_available(
2783 double avg_bandwidth_available_min[dml2_core_internal_soc_state_max],
2784 double avg_bandwidth_available[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max],
2785 double urg_bandwidth_available_min[dml2_core_internal_soc_state_max], // min between SDP and DRAM
2786 double urg_bandwidth_available[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max],
2787 double urg_bandwidth_available_vm_only[dml2_core_internal_soc_state_max],
2788 double urg_bandwidth_available_pixel_and_vm[dml2_core_internal_soc_state_max],
2789
2790 const struct dml2_soc_bb *soc,
2791 bool HostVMEnable,
2792 double dcfclk_mhz,
2793 double fclk_mhz,
2794 double dram_bw_mbps)
2795 {
2796 unsigned int n, m;
2797
2798 dml2_printf("DML::%s: dcfclk_mhz = %f\n", __func__, dcfclk_mhz);
2799 dml2_printf("DML::%s: fclk_mhz = %f\n", __func__, fclk_mhz);
2800 dml2_printf("DML::%s: dram_bw_mbps = %f\n", __func__, dram_bw_mbps);
2801
2802 // Calculate all the bandwidth availabe
2803 for (m = 0; m < dml2_core_internal_soc_state_max; m++) {
2804 for (n = 0; n < dml2_core_internal_bw_max; n++) {
2805 avg_bandwidth_available[m][n] = dml_get_return_bandwidth_available(soc,
2806 m, // soc_state
2807 n, // bw_type
2808 1, // avg_bw
2809 HostVMEnable,
2810 0, // hvm_only
2811 dcfclk_mhz,
2812 fclk_mhz,
2813 dram_bw_mbps);
2814
2815 urg_bandwidth_available[m][n] = dml_get_return_bandwidth_available(soc, m, n, 0, HostVMEnable, 0, dcfclk_mhz, fclk_mhz, dram_bw_mbps);
2816
2817
2818 #ifdef __DML_VBA_DEBUG__
2819 dml2_printf("DML::%s: avg_bandwidth_available[%s][%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n), avg_bandwidth_available[m][n]);
2820 dml2_printf("DML::%s: urg_bandwidth_available[%s][%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n), urg_bandwidth_available[m][n]);
2821 #endif
2822
2823 // urg_bandwidth_available_vm_only is indexed by soc_state
2824 if (n == dml2_core_internal_bw_dram) {
2825 urg_bandwidth_available_vm_only[m] = dml_get_return_bandwidth_available(soc, m, n, 0, HostVMEnable, 1, dcfclk_mhz, fclk_mhz, dram_bw_mbps);
2826 urg_bandwidth_available_pixel_and_vm[m] = dml_get_return_bandwidth_available(soc, m, n, 0, HostVMEnable, 0, dcfclk_mhz, fclk_mhz, dram_bw_mbps);
2827 }
2828 }
2829
2830 avg_bandwidth_available_min[m] = math_min2(avg_bandwidth_available[m][dml2_core_internal_bw_dram], avg_bandwidth_available[m][dml2_core_internal_bw_sdp]);
2831 urg_bandwidth_available_min[m] = math_min2(urg_bandwidth_available[m][dml2_core_internal_bw_dram], urg_bandwidth_available[m][dml2_core_internal_bw_sdp]);
2832
2833 #ifdef __DML_VBA_DEBUG__
2834 dml2_printf("DML::%s: avg_bandwidth_available_min[%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(m), avg_bandwidth_available_min[m]);
2835 dml2_printf("DML::%s: urg_bandwidth_available_min[%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(m), urg_bandwidth_available_min[m]);
2836 dml2_printf("DML::%s: urg_bandwidth_available_vm_only[%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(m), urg_bandwidth_available_vm_only[n]);
2837 #endif
2838 }
2839 }
2840
calculate_avg_bandwidth_required(double avg_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max],const struct dml2_display_cfg * display_cfg,unsigned int num_active_planes,double ReadBandwidthLuma[],double ReadBandwidthChroma[],double cursor_bw[],double dcc_dram_bw_nom_overhead_factor_p0[],double dcc_dram_bw_nom_overhead_factor_p1[],double mall_prefetch_dram_overhead_factor[],double mall_prefetch_sdp_overhead_factor[])2841 static void calculate_avg_bandwidth_required(
2842 double avg_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max],
2843
2844 // input
2845 const struct dml2_display_cfg *display_cfg,
2846 unsigned int num_active_planes,
2847 double ReadBandwidthLuma[],
2848 double ReadBandwidthChroma[],
2849 double cursor_bw[],
2850 double dcc_dram_bw_nom_overhead_factor_p0[],
2851 double dcc_dram_bw_nom_overhead_factor_p1[],
2852 double mall_prefetch_dram_overhead_factor[],
2853 double mall_prefetch_sdp_overhead_factor[])
2854 {
2855 unsigned int n, m, k;
2856 double sdp_overhead_factor;
2857 double dram_overhead_factor_p0;
2858 double dram_overhead_factor_p1;
2859
2860 // Average BW support check
2861 for (m = 0; m < dml2_core_internal_soc_state_max; m++) {
2862 for (n = 0; n < dml2_core_internal_bw_max; n++) { // sdp, dram
2863 avg_bandwidth_required[m][n] = 0;
2864 }
2865 }
2866
2867 // SysActive and SVP Prefetch AVG bandwidth Check
2868 for (k = 0; k < num_active_planes; ++k) {
2869 #ifdef __DML_VBA_DEBUG__
2870 dml2_printf("DML::%s: plane %0d\n", __func__, k);
2871 dml2_printf("DML::%s: ReadBandwidthLuma=%f\n", __func__, ReadBandwidthLuma[k]);
2872 dml2_printf("DML::%s: ReadBandwidthChroma=%f\n", __func__, ReadBandwidthChroma[k]);
2873 dml2_printf("DML::%s: dcc_dram_bw_nom_overhead_factor_p0=%f\n", __func__, dcc_dram_bw_nom_overhead_factor_p0[k]);
2874 dml2_printf("DML::%s: dcc_dram_bw_nom_overhead_factor_p1=%f\n", __func__, dcc_dram_bw_nom_overhead_factor_p1[k]);
2875 dml2_printf("DML::%s: mall_prefetch_dram_overhead_factor=%f\n", __func__, mall_prefetch_dram_overhead_factor[k]);
2876 dml2_printf("DML::%s: mall_prefetch_sdp_overhead_factor=%f\n", __func__, mall_prefetch_sdp_overhead_factor[k]);
2877 #endif
2878
2879 sdp_overhead_factor = mall_prefetch_sdp_overhead_factor[k];
2880 dram_overhead_factor_p0 = dcc_dram_bw_nom_overhead_factor_p0[k] * mall_prefetch_dram_overhead_factor[k];
2881 dram_overhead_factor_p1 = dcc_dram_bw_nom_overhead_factor_p1[k] * mall_prefetch_dram_overhead_factor[k];
2882
2883 // FIXME_DCN4, was missing cursor_bw in here, but do I actually need that and tdlut bw for average bandwidth calculation?
2884 // active avg bw not include phantom, but svp_prefetch avg bw should include phantom pipes
2885 if (!dml_is_phantom_pipe(&display_cfg->plane_descriptors[k])) {
2886 avg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp] += sdp_overhead_factor * (ReadBandwidthLuma[k] + ReadBandwidthChroma[k]) + cursor_bw[k];
2887 avg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram] += dram_overhead_factor_p0 * ReadBandwidthLuma[k] + dram_overhead_factor_p1 * ReadBandwidthChroma[k] + cursor_bw[k];
2888 }
2889 avg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp] += sdp_overhead_factor * (ReadBandwidthLuma[k] + ReadBandwidthChroma[k]) + cursor_bw[k];
2890 avg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram] += dram_overhead_factor_p0 * ReadBandwidthLuma[k] + dram_overhead_factor_p1 * ReadBandwidthChroma[k] + cursor_bw[k];
2891
2892 #ifdef __DML_VBA_DEBUG__
2893 dml2_printf("DML::%s: avg_bandwidth_required[%s][%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(dml2_core_internal_soc_state_sys_active), dml2_core_internal_bw_type_str(dml2_core_internal_bw_sdp), avg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]);
2894 dml2_printf("DML::%s: avg_bandwidth_required[%s][%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(dml2_core_internal_soc_state_sys_active), dml2_core_internal_bw_type_str(dml2_core_internal_bw_dram), avg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram]);
2895 dml2_printf("DML::%s: avg_bandwidth_required[%s][%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(dml2_core_internal_soc_state_svp_prefetch), dml2_core_internal_bw_type_str(dml2_core_internal_bw_sdp), avg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp]);
2896 dml2_printf("DML::%s: avg_bandwidth_required[%s][%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(dml2_core_internal_soc_state_svp_prefetch), dml2_core_internal_bw_type_str(dml2_core_internal_bw_dram), avg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram]);
2897 #endif
2898 }
2899 }
2900
CalculateVMRowAndSwath(struct dml2_core_internal_scratch * scratch,struct dml2_core_calcs_CalculateVMRowAndSwath_params * p)2901 static void CalculateVMRowAndSwath(struct dml2_core_internal_scratch *scratch,
2902 struct dml2_core_calcs_CalculateVMRowAndSwath_params *p)
2903 {
2904 struct dml2_core_calcs_CalculateVMRowAndSwath_locals *s = &scratch->CalculateVMRowAndSwath_locals;
2905
2906 s->HostVMDynamicLevels = CalculateHostVMDynamicLevels(p->display_cfg->gpuvm_enable, p->display_cfg->hostvm_enable, p->HostVMMinPageSize, p->display_cfg->hostvm_max_non_cached_page_table_levels);
2907
2908 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
2909 if (p->display_cfg->gpuvm_enable == true) {
2910 p->vm_group_bytes[k] = 512;
2911 p->dpte_group_bytes[k] = 512;
2912 } else {
2913 p->vm_group_bytes[k] = 0;
2914 p->dpte_group_bytes[k] = 0;
2915 }
2916
2917 if (dml_is_420(p->myPipe[k].SourcePixelFormat) || p->myPipe[k].SourcePixelFormat == dml2_rgbe_alpha) {
2918 if ((p->myPipe[k].SourcePixelFormat == dml2_420_10 || p->myPipe[k].SourcePixelFormat == dml2_420_12) && !dml_is_vertical_rotation(p->myPipe[k].RotationAngle)) {
2919 s->PTEBufferSizeInRequestsForLuma[k] = (p->PTEBufferSizeInRequestsLuma + p->PTEBufferSizeInRequestsChroma) / 2;
2920 s->PTEBufferSizeInRequestsForChroma[k] = s->PTEBufferSizeInRequestsForLuma[k];
2921 } else {
2922 s->PTEBufferSizeInRequestsForLuma[k] = p->PTEBufferSizeInRequestsLuma;
2923 s->PTEBufferSizeInRequestsForChroma[k] = p->PTEBufferSizeInRequestsChroma;
2924 }
2925
2926 scratch->calculate_vm_and_row_bytes_params.ViewportStationary = p->myPipe[k].ViewportStationary;
2927 scratch->calculate_vm_and_row_bytes_params.DCCEnable = p->myPipe[k].DCCEnable;
2928 scratch->calculate_vm_and_row_bytes_params.NumberOfDPPs = p->myPipe[k].DPPPerSurface;
2929 scratch->calculate_vm_and_row_bytes_params.BlockHeight256Bytes = p->myPipe[k].BlockHeight256BytesC;
2930 scratch->calculate_vm_and_row_bytes_params.BlockWidth256Bytes = p->myPipe[k].BlockWidth256BytesC;
2931 scratch->calculate_vm_and_row_bytes_params.SourcePixelFormat = p->myPipe[k].SourcePixelFormat;
2932 scratch->calculate_vm_and_row_bytes_params.SurfaceTiling = p->myPipe[k].SurfaceTiling;
2933 scratch->calculate_vm_and_row_bytes_params.BytePerPixel = p->myPipe[k].BytePerPixelC;
2934 scratch->calculate_vm_and_row_bytes_params.RotationAngle = p->myPipe[k].RotationAngle;
2935 scratch->calculate_vm_and_row_bytes_params.SwathWidth = p->SwathWidthC[k];
2936 scratch->calculate_vm_and_row_bytes_params.ViewportHeight = p->myPipe[k].ViewportHeightC;
2937 scratch->calculate_vm_and_row_bytes_params.ViewportXStart = p->myPipe[k].ViewportXStartC;
2938 scratch->calculate_vm_and_row_bytes_params.ViewportYStart = p->myPipe[k].ViewportYStartC;
2939 scratch->calculate_vm_and_row_bytes_params.GPUVMEnable = p->display_cfg->gpuvm_enable;
2940 scratch->calculate_vm_and_row_bytes_params.GPUVMMaxPageTableLevels = p->display_cfg->gpuvm_max_page_table_levels;
2941 scratch->calculate_vm_and_row_bytes_params.GPUVMMinPageSizeKBytes = p->display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes;
2942 scratch->calculate_vm_and_row_bytes_params.PTEBufferSizeInRequests = s->PTEBufferSizeInRequestsForChroma[k];
2943 scratch->calculate_vm_and_row_bytes_params.Pitch = p->myPipe[k].PitchC;
2944 scratch->calculate_vm_and_row_bytes_params.MacroTileWidth = p->myPipe[k].BlockWidthC;
2945 scratch->calculate_vm_and_row_bytes_params.MacroTileHeight = p->myPipe[k].BlockHeightC;
2946 scratch->calculate_vm_and_row_bytes_params.is_phantom = dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k]);
2947 scratch->calculate_vm_and_row_bytes_params.DCCMetaPitch = p->myPipe[k].DCCMetaPitchC;
2948 scratch->calculate_vm_and_row_bytes_params.mrq_present = p->mrq_present;
2949
2950 scratch->calculate_vm_and_row_bytes_params.PixelPTEBytesPerRow = &s->PixelPTEBytesPerRowC[k];
2951 scratch->calculate_vm_and_row_bytes_params.PixelPTEBytesPerRowStorage = &s->PixelPTEBytesPerRowStorageC[k];
2952 scratch->calculate_vm_and_row_bytes_params.dpte_row_width_ub = &p->dpte_row_width_chroma_ub[k];
2953 scratch->calculate_vm_and_row_bytes_params.dpte_row_height = &p->dpte_row_height_chroma[k];
2954 scratch->calculate_vm_and_row_bytes_params.dpte_row_height_linear = &p->dpte_row_height_linear_chroma[k];
2955 scratch->calculate_vm_and_row_bytes_params.PixelPTEBytesPerRow_one_row_per_frame = &s->PixelPTEBytesPerRowC_one_row_per_frame[k];
2956 scratch->calculate_vm_and_row_bytes_params.dpte_row_width_ub_one_row_per_frame = &s->dpte_row_width_chroma_ub_one_row_per_frame[k];
2957 scratch->calculate_vm_and_row_bytes_params.dpte_row_height_one_row_per_frame = &s->dpte_row_height_chroma_one_row_per_frame[k];
2958 scratch->calculate_vm_and_row_bytes_params.vmpg_width = &p->vmpg_width_c[k];
2959 scratch->calculate_vm_and_row_bytes_params.vmpg_height = &p->vmpg_height_c[k];
2960 scratch->calculate_vm_and_row_bytes_params.PixelPTEReqWidth = &p->PixelPTEReqWidthC[k];
2961 scratch->calculate_vm_and_row_bytes_params.PixelPTEReqHeight = &p->PixelPTEReqHeightC[k];
2962 scratch->calculate_vm_and_row_bytes_params.PTERequestSize = &p->PTERequestSizeC[k];
2963 scratch->calculate_vm_and_row_bytes_params.dpde0_bytes_per_frame_ub = &p->dpde0_bytes_per_frame_ub_c[k];
2964
2965 scratch->calculate_vm_and_row_bytes_params.meta_row_bytes = &s->meta_row_bytes_per_row_ub_c[k];
2966 scratch->calculate_vm_and_row_bytes_params.MetaRequestWidth = &p->meta_req_width_chroma[k];
2967 scratch->calculate_vm_and_row_bytes_params.MetaRequestHeight = &p->meta_req_height_chroma[k];
2968 scratch->calculate_vm_and_row_bytes_params.meta_row_width = &p->meta_row_width_chroma[k];
2969 scratch->calculate_vm_and_row_bytes_params.meta_row_height = &p->meta_row_height_chroma[k];
2970 scratch->calculate_vm_and_row_bytes_params.meta_pte_bytes_per_frame_ub = &p->meta_pte_bytes_per_frame_ub_c[k];
2971
2972 s->vm_bytes_c = CalculateVMAndRowBytes(&scratch->calculate_vm_and_row_bytes_params);
2973
2974 p->PrefetchSourceLinesC[k] = CalculatePrefetchSourceLines(
2975 p->myPipe[k].VRatioChroma,
2976 p->myPipe[k].VTapsChroma,
2977 p->myPipe[k].InterlaceEnable,
2978 p->myPipe[k].ProgressiveToInterlaceUnitInOPP,
2979 p->myPipe[k].SwathHeightC,
2980 p->myPipe[k].RotationAngle,
2981 p->myPipe[k].mirrored,
2982 p->myPipe[k].ViewportStationary,
2983 p->SwathWidthC[k],
2984 p->myPipe[k].ViewportHeightC,
2985 p->myPipe[k].ViewportXStartC,
2986 p->myPipe[k].ViewportYStartC,
2987
2988 // Output
2989 &p->VInitPreFillC[k],
2990 &p->MaxNumSwathC[k]);
2991 } else {
2992 s->PTEBufferSizeInRequestsForLuma[k] = p->PTEBufferSizeInRequestsLuma + p->PTEBufferSizeInRequestsChroma;
2993 s->PTEBufferSizeInRequestsForChroma[k] = 0;
2994 s->PixelPTEBytesPerRowC[k] = 0;
2995 s->PixelPTEBytesPerRowStorageC[k] = 0;
2996 s->vm_bytes_c = 0;
2997 p->MaxNumSwathC[k] = 0;
2998 p->PrefetchSourceLinesC[k] = 0;
2999 s->dpte_row_height_chroma_one_row_per_frame[k] = 0;
3000 s->dpte_row_width_chroma_ub_one_row_per_frame[k] = 0;
3001 s->PixelPTEBytesPerRowC_one_row_per_frame[k] = 0;
3002 }
3003
3004 scratch->calculate_vm_and_row_bytes_params.ViewportStationary = p->myPipe[k].ViewportStationary;
3005 scratch->calculate_vm_and_row_bytes_params.DCCEnable = p->myPipe[k].DCCEnable;
3006 scratch->calculate_vm_and_row_bytes_params.NumberOfDPPs = p->myPipe[k].DPPPerSurface;
3007 scratch->calculate_vm_and_row_bytes_params.BlockHeight256Bytes = p->myPipe[k].BlockHeight256BytesY;
3008 scratch->calculate_vm_and_row_bytes_params.BlockWidth256Bytes = p->myPipe[k].BlockWidth256BytesY;
3009 scratch->calculate_vm_and_row_bytes_params.SourcePixelFormat = p->myPipe[k].SourcePixelFormat;
3010 scratch->calculate_vm_and_row_bytes_params.SurfaceTiling = p->myPipe[k].SurfaceTiling;
3011 scratch->calculate_vm_and_row_bytes_params.BytePerPixel = p->myPipe[k].BytePerPixelY;
3012 scratch->calculate_vm_and_row_bytes_params.RotationAngle = p->myPipe[k].RotationAngle;
3013 scratch->calculate_vm_and_row_bytes_params.SwathWidth = p->SwathWidthY[k];
3014 scratch->calculate_vm_and_row_bytes_params.ViewportHeight = p->myPipe[k].ViewportHeight;
3015 scratch->calculate_vm_and_row_bytes_params.ViewportXStart = p->myPipe[k].ViewportXStart;
3016 scratch->calculate_vm_and_row_bytes_params.ViewportYStart = p->myPipe[k].ViewportYStart;
3017 scratch->calculate_vm_and_row_bytes_params.GPUVMEnable = p->display_cfg->gpuvm_enable;
3018 scratch->calculate_vm_and_row_bytes_params.GPUVMMaxPageTableLevels = p->display_cfg->gpuvm_max_page_table_levels;
3019 scratch->calculate_vm_and_row_bytes_params.GPUVMMinPageSizeKBytes = p->display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes;
3020 scratch->calculate_vm_and_row_bytes_params.PTEBufferSizeInRequests = s->PTEBufferSizeInRequestsForLuma[k];
3021 scratch->calculate_vm_and_row_bytes_params.Pitch = p->myPipe[k].PitchY;
3022 scratch->calculate_vm_and_row_bytes_params.MacroTileWidth = p->myPipe[k].BlockWidthY;
3023 scratch->calculate_vm_and_row_bytes_params.MacroTileHeight = p->myPipe[k].BlockHeightY;
3024 scratch->calculate_vm_and_row_bytes_params.is_phantom = dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k]);
3025 scratch->calculate_vm_and_row_bytes_params.DCCMetaPitch = p->myPipe[k].DCCMetaPitchY;
3026 scratch->calculate_vm_and_row_bytes_params.mrq_present = p->mrq_present;
3027
3028 scratch->calculate_vm_and_row_bytes_params.PixelPTEBytesPerRow = &s->PixelPTEBytesPerRowY[k];
3029 scratch->calculate_vm_and_row_bytes_params.PixelPTEBytesPerRowStorage = &s->PixelPTEBytesPerRowStorageY[k];
3030 scratch->calculate_vm_and_row_bytes_params.dpte_row_width_ub = &p->dpte_row_width_luma_ub[k];
3031 scratch->calculate_vm_and_row_bytes_params.dpte_row_height = &p->dpte_row_height_luma[k];
3032 scratch->calculate_vm_and_row_bytes_params.dpte_row_height_linear = &p->dpte_row_height_linear_luma[k];
3033 scratch->calculate_vm_and_row_bytes_params.PixelPTEBytesPerRow_one_row_per_frame = &s->PixelPTEBytesPerRowY_one_row_per_frame[k];
3034 scratch->calculate_vm_and_row_bytes_params.dpte_row_width_ub_one_row_per_frame = &s->dpte_row_width_luma_ub_one_row_per_frame[k];
3035 scratch->calculate_vm_and_row_bytes_params.dpte_row_height_one_row_per_frame = &s->dpte_row_height_luma_one_row_per_frame[k];
3036 scratch->calculate_vm_and_row_bytes_params.vmpg_width = &p->vmpg_width_y[k];
3037 scratch->calculate_vm_and_row_bytes_params.vmpg_height = &p->vmpg_height_y[k];
3038 scratch->calculate_vm_and_row_bytes_params.PixelPTEReqWidth = &p->PixelPTEReqWidthY[k];
3039 scratch->calculate_vm_and_row_bytes_params.PixelPTEReqHeight = &p->PixelPTEReqHeightY[k];
3040 scratch->calculate_vm_and_row_bytes_params.PTERequestSize = &p->PTERequestSizeY[k];
3041 scratch->calculate_vm_and_row_bytes_params.dpde0_bytes_per_frame_ub = &p->dpde0_bytes_per_frame_ub_l[k];
3042
3043 scratch->calculate_vm_and_row_bytes_params.meta_row_bytes = &s->meta_row_bytes_per_row_ub_l[k];
3044 scratch->calculate_vm_and_row_bytes_params.MetaRequestWidth = &p->meta_req_width_luma[k];
3045 scratch->calculate_vm_and_row_bytes_params.MetaRequestHeight = &p->meta_req_height_luma[k];
3046 scratch->calculate_vm_and_row_bytes_params.meta_row_width = &p->meta_row_width_luma[k];
3047 scratch->calculate_vm_and_row_bytes_params.meta_row_height = &p->meta_row_height_luma[k];
3048 scratch->calculate_vm_and_row_bytes_params.meta_pte_bytes_per_frame_ub = &p->meta_pte_bytes_per_frame_ub_l[k];
3049
3050 s->vm_bytes_l = CalculateVMAndRowBytes(&scratch->calculate_vm_and_row_bytes_params);
3051
3052 p->PrefetchSourceLinesY[k] = CalculatePrefetchSourceLines(
3053 p->myPipe[k].VRatio,
3054 p->myPipe[k].VTaps,
3055 p->myPipe[k].InterlaceEnable,
3056 p->myPipe[k].ProgressiveToInterlaceUnitInOPP,
3057 p->myPipe[k].SwathHeightY,
3058 p->myPipe[k].RotationAngle,
3059 p->myPipe[k].mirrored,
3060 p->myPipe[k].ViewportStationary,
3061 p->SwathWidthY[k],
3062 p->myPipe[k].ViewportHeight,
3063 p->myPipe[k].ViewportXStart,
3064 p->myPipe[k].ViewportYStart,
3065
3066 // Output
3067 &p->VInitPreFillY[k],
3068 &p->MaxNumSwathY[k]);
3069
3070 #ifdef __DML_VBA_DEBUG__
3071 dml2_printf("DML::%s: k=%u, vm_bytes_l = %u (before hvm level)\n", __func__, k, s->vm_bytes_l);
3072 dml2_printf("DML::%s: k=%u, vm_bytes_c = %u (before hvm level)\n", __func__, k, s->vm_bytes_c);
3073 dml2_printf("DML::%s: k=%u, meta_row_bytes_per_row_ub_l = %u\n", __func__, k, s->meta_row_bytes_per_row_ub_l[k]);
3074 dml2_printf("DML::%s: k=%u, meta_row_bytes_per_row_ub_c = %u\n", __func__, k, s->meta_row_bytes_per_row_ub_c[k]);
3075 #endif
3076 p->vm_bytes[k] = (s->vm_bytes_l + s->vm_bytes_c) * (1 + 8 * s->HostVMDynamicLevels);
3077 p->meta_row_bytes[k] = s->meta_row_bytes_per_row_ub_l[k] + s->meta_row_bytes_per_row_ub_c[k];
3078 p->meta_row_bytes_per_row_ub_l[k] = s->meta_row_bytes_per_row_ub_l[k];
3079 p->meta_row_bytes_per_row_ub_c[k] = s->meta_row_bytes_per_row_ub_c[k];
3080
3081 #ifdef __DML_VBA_DEBUG__
3082 dml2_printf("DML::%s: k=%u, meta_row_bytes = %u\n", __func__, k, p->meta_row_bytes[k]);
3083 dml2_printf("DML::%s: k=%u, vm_bytes = %u (after hvm level)\n", __func__, k, p->vm_bytes[k]);
3084 #endif
3085 if (s->PixelPTEBytesPerRowStorageY[k] <= 64 * s->PTEBufferSizeInRequestsForLuma[k] && s->PixelPTEBytesPerRowStorageC[k] <= 64 * s->PTEBufferSizeInRequestsForChroma[k]) {
3086 p->PTEBufferSizeNotExceeded[k] = true;
3087 } else {
3088 p->PTEBufferSizeNotExceeded[k] = false;
3089 }
3090
3091 s->one_row_per_frame_fits_in_buffer[k] = (s->PixelPTEBytesPerRowY_one_row_per_frame[k] <= 64 * 2 * s->PTEBufferSizeInRequestsForLuma[k] &&
3092 s->PixelPTEBytesPerRowC_one_row_per_frame[k] <= 64 * 2 * s->PTEBufferSizeInRequestsForChroma[k]);
3093 #ifdef __DML_VBA_DEBUG__
3094 if (p->PTEBufferSizeNotExceeded[k] == 0 || s->one_row_per_frame_fits_in_buffer[k] == 0) {
3095 dml2_printf("DML::%s: k=%u, PixelPTEBytesPerRowY = %u (before hvm level)\n", __func__, k, s->PixelPTEBytesPerRowY[k]);
3096 dml2_printf("DML::%s: k=%u, PixelPTEBytesPerRowC = %u (before hvm level)\n", __func__, k, s->PixelPTEBytesPerRowC[k]);
3097 dml2_printf("DML::%s: k=%u, PixelPTEBytesPerRowStorageY = %u\n", __func__, k, s->PixelPTEBytesPerRowStorageY[k]);
3098 dml2_printf("DML::%s: k=%u, PixelPTEBytesPerRowStorageC = %u\n", __func__, k, s->PixelPTEBytesPerRowStorageC[k]);
3099 dml2_printf("DML::%s: k=%u, PTEBufferSizeInRequestsForLuma = %u\n", __func__, k, s->PTEBufferSizeInRequestsForLuma[k]);
3100 dml2_printf("DML::%s: k=%u, PTEBufferSizeInRequestsForChroma = %u\n", __func__, k, s->PTEBufferSizeInRequestsForChroma[k]);
3101 dml2_printf("DML::%s: k=%u, PTEBufferSizeNotExceeded (not one_row_per_frame) = %u\n", __func__, k, p->PTEBufferSizeNotExceeded[k]);
3102
3103 dml2_printf("DML::%s: k=%u, HostVMDynamicLevels = %u\n", __func__, k, s->HostVMDynamicLevels);
3104 dml2_printf("DML::%s: k=%u, PixelPTEBytesPerRowY_one_row_per_frame = %u\n", __func__, k, s->PixelPTEBytesPerRowY_one_row_per_frame[k]);
3105 dml2_printf("DML::%s: k=%u, PixelPTEBytesPerRowC_one_row_per_frame = %u\n", __func__, k, s->PixelPTEBytesPerRowC_one_row_per_frame[k]);
3106 dml2_printf("DML::%s: k=%u, one_row_per_frame_fits_in_buffer = %u\n", __func__, k, s->one_row_per_frame_fits_in_buffer[k]);
3107 }
3108 #endif
3109 }
3110
3111 CalculateMALLUseForStaticScreen(
3112 p->display_cfg,
3113 p->NumberOfActiveSurfaces,
3114 p->MALLAllocatedForDCN,
3115 p->SurfaceSizeInMALL,
3116 s->one_row_per_frame_fits_in_buffer,
3117 // Output
3118 p->is_using_mall_for_ss);
3119
3120 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
3121 if (p->display_cfg->gpuvm_enable) {
3122 if (p->display_cfg->plane_descriptors[k].overrides.hw.force_pte_buffer_mode.enable == 1) {
3123 p->PTE_BUFFER_MODE[k] = p->display_cfg->plane_descriptors[k].overrides.hw.force_pte_buffer_mode.value;
3124 }
3125 p->PTE_BUFFER_MODE[k] = p->myPipe[k].FORCE_ONE_ROW_FOR_FRAME || p->is_using_mall_for_ss[k] || (p->display_cfg->plane_descriptors[k].overrides.legacy_svp_config == dml2_svp_mode_override_main_pipe) ||
3126 dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k]) || (p->display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes > 64);
3127 p->BIGK_FRAGMENT_SIZE[k] = (unsigned int)(math_log((float)p->display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes * 1024, 2) - 12);
3128 } else {
3129 p->PTE_BUFFER_MODE[k] = 0;
3130 p->BIGK_FRAGMENT_SIZE[k] = 0;
3131 }
3132 }
3133
3134 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
3135 p->DCCMetaBufferSizeNotExceeded[k] = true;
3136 #ifdef __DML_VBA_DEBUG__
3137 dml2_printf("DML::%s: k=%u, SurfaceSizeInMALL = %u\n", __func__, k, p->SurfaceSizeInMALL[k]);
3138 dml2_printf("DML::%s: k=%u, is_using_mall_for_ss = %u\n", __func__, k, p->is_using_mall_for_ss[k]);
3139 #endif
3140 p->use_one_row_for_frame[k] = p->myPipe[k].FORCE_ONE_ROW_FOR_FRAME || p->is_using_mall_for_ss[k] || (p->display_cfg->plane_descriptors[k].overrides.legacy_svp_config == dml2_svp_mode_override_main_pipe) ||
3141 (dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k])) || (p->display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes > 64 && dml_is_vertical_rotation(p->myPipe[k].RotationAngle));
3142
3143 p->use_one_row_for_frame_flip[k] = p->use_one_row_for_frame[k] && !(p->display_cfg->plane_descriptors[k].overrides.uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_mall_full_frame);
3144
3145 if (p->use_one_row_for_frame[k]) {
3146 p->dpte_row_height_luma[k] = s->dpte_row_height_luma_one_row_per_frame[k];
3147 p->dpte_row_width_luma_ub[k] = s->dpte_row_width_luma_ub_one_row_per_frame[k];
3148 s->PixelPTEBytesPerRowY[k] = s->PixelPTEBytesPerRowY_one_row_per_frame[k];
3149 p->dpte_row_height_chroma[k] = s->dpte_row_height_chroma_one_row_per_frame[k];
3150 p->dpte_row_width_chroma_ub[k] = s->dpte_row_width_chroma_ub_one_row_per_frame[k];
3151 s->PixelPTEBytesPerRowC[k] = s->PixelPTEBytesPerRowC_one_row_per_frame[k];
3152 p->PTEBufferSizeNotExceeded[k] = s->one_row_per_frame_fits_in_buffer[k];
3153 }
3154
3155 if (p->meta_row_bytes[k] <= p->DCCMetaBufferSizeBytes) {
3156 p->DCCMetaBufferSizeNotExceeded[k] = true;
3157 } else {
3158 p->DCCMetaBufferSizeNotExceeded[k] = false;
3159
3160 #ifdef __DML_VBA_DEBUG__
3161 dml2_printf("DML::%s: k=%d, meta_row_bytes = %d\n", __func__, k, p->meta_row_bytes[k]);
3162 dml2_printf("DML::%s: k=%d, DCCMetaBufferSizeBytes = %d\n", __func__, k, p->DCCMetaBufferSizeBytes);
3163 dml2_printf("DML::%s: k=%d, DCCMetaBufferSizeNotExceeded = %d\n", __func__, k, p->DCCMetaBufferSizeNotExceeded[k]);
3164 #endif
3165 }
3166
3167 s->PixelPTEBytesPerRowY[k] = s->PixelPTEBytesPerRowY[k] * (1 + 8 * s->HostVMDynamicLevels);
3168 s->PixelPTEBytesPerRowC[k] = s->PixelPTEBytesPerRowC[k] * (1 + 8 * s->HostVMDynamicLevels);
3169 p->PixelPTEBytesPerRow[k] = s->PixelPTEBytesPerRowY[k] + s->PixelPTEBytesPerRowC[k];
3170 p->dpte_row_bytes_per_row_l[k] = s->PixelPTEBytesPerRowY[k];
3171 p->dpte_row_bytes_per_row_c[k] = s->PixelPTEBytesPerRowC[k];
3172
3173 // if one row of dPTEs is meant to span the entire frame, then for these calculations, we will pretend like that one big row is fetched in two halfs
3174 if (p->use_one_row_for_frame[k])
3175 p->PixelPTEBytesPerRow[k] = p->PixelPTEBytesPerRow[k] / 2;
3176
3177 CalculateRowBandwidth(
3178 p->display_cfg->gpuvm_enable,
3179 p->use_one_row_for_frame[k],
3180 p->myPipe[k].SourcePixelFormat,
3181 p->myPipe[k].VRatio,
3182 p->myPipe[k].VRatioChroma,
3183 p->myPipe[k].DCCEnable,
3184 p->myPipe[k].HTotal / p->myPipe[k].PixelClock,
3185 s->PixelPTEBytesPerRowY[k],
3186 s->PixelPTEBytesPerRowC[k],
3187 p->dpte_row_height_luma[k],
3188 p->dpte_row_height_chroma[k],
3189
3190 p->mrq_present,
3191 p->meta_row_bytes_per_row_ub_l[k],
3192 p->meta_row_bytes_per_row_ub_c[k],
3193 p->meta_row_height_luma[k],
3194 p->meta_row_height_chroma[k],
3195
3196 // Output
3197 &p->dpte_row_bw[k],
3198 &p->meta_row_bw[k]);
3199 #ifdef __DML_VBA_DEBUG__
3200 dml2_printf("DML::%s: k=%u, use_one_row_for_frame = %u\n", __func__, k, p->use_one_row_for_frame[k]);
3201 dml2_printf("DML::%s: k=%u, use_one_row_for_frame_flip = %u\n", __func__, k, p->use_one_row_for_frame_flip[k]);
3202 dml2_printf("DML::%s: k=%u, UseMALLForPStateChange = %u\n", __func__, k, p->display_cfg->plane_descriptors[k].overrides.legacy_svp_config);
3203 dml2_printf("DML::%s: k=%u, dpte_row_height_luma = %u\n", __func__, k, p->dpte_row_height_luma[k]);
3204 dml2_printf("DML::%s: k=%u, dpte_row_width_luma_ub = %u\n", __func__, k, p->dpte_row_width_luma_ub[k]);
3205 dml2_printf("DML::%s: k=%u, PixelPTEBytesPerRowY = %u (after hvm level)\n", __func__, k, s->PixelPTEBytesPerRowY[k]);
3206 dml2_printf("DML::%s: k=%u, dpte_row_height_chroma = %u\n", __func__, k, p->dpte_row_height_chroma[k]);
3207 dml2_printf("DML::%s: k=%u, dpte_row_width_chroma_ub = %u\n", __func__, k, p->dpte_row_width_chroma_ub[k]);
3208 dml2_printf("DML::%s: k=%u, PixelPTEBytesPerRowC = %u (after hvm level)\n", __func__, k, s->PixelPTEBytesPerRowC[k]);
3209 dml2_printf("DML::%s: k=%u, PixelPTEBytesPerRow = %u\n", __func__, k, p->PixelPTEBytesPerRow[k]);
3210 dml2_printf("DML::%s: k=%u, PTEBufferSizeNotExceeded = %u\n", __func__, k, p->PTEBufferSizeNotExceeded[k]);
3211 dml2_printf("DML::%s: k=%u, gpuvm_enable = %u\n", __func__, k, p->display_cfg->gpuvm_enable);
3212 dml2_printf("DML::%s: k=%u, PTE_BUFFER_MODE = %u\n", __func__, k, p->PTE_BUFFER_MODE[k]);
3213 dml2_printf("DML::%s: k=%u, BIGK_FRAGMENT_SIZE = %u\n", __func__, k, p->BIGK_FRAGMENT_SIZE[k]);
3214 #endif
3215 }
3216 }
3217
CalculateUrgentLatency(double UrgentLatencyPixelDataOnly,double UrgentLatencyPixelMixedWithVMData,double UrgentLatencyVMDataOnly,bool DoUrgentLatencyAdjustment,double UrgentLatencyAdjustmentFabricClockComponent,double UrgentLatencyAdjustmentFabricClockReference,double FabricClock,double uclk_freq_mhz,enum dml2_qos_param_type qos_type,unsigned int urgent_ramp_uclk_cycles,unsigned int df_qos_response_time_fclk_cycles,unsigned int max_round_trip_to_furthest_cs_fclk_cycles,unsigned int mall_overhead_fclk_cycles,double umc_urgent_ramp_latency_margin,double fabric_max_transport_latency_margin)3218 static double CalculateUrgentLatency(
3219 double UrgentLatencyPixelDataOnly,
3220 double UrgentLatencyPixelMixedWithVMData,
3221 double UrgentLatencyVMDataOnly,
3222 bool DoUrgentLatencyAdjustment,
3223 double UrgentLatencyAdjustmentFabricClockComponent,
3224 double UrgentLatencyAdjustmentFabricClockReference,
3225 double FabricClock,
3226 double uclk_freq_mhz,
3227 enum dml2_qos_param_type qos_type,
3228 unsigned int urgent_ramp_uclk_cycles,
3229 unsigned int df_qos_response_time_fclk_cycles,
3230 unsigned int max_round_trip_to_furthest_cs_fclk_cycles,
3231 unsigned int mall_overhead_fclk_cycles,
3232 double umc_urgent_ramp_latency_margin,
3233 double fabric_max_transport_latency_margin)
3234 {
3235 double urgent_latency = 0;
3236 if (qos_type == dml2_qos_param_type_dcn4x) {
3237 urgent_latency = (df_qos_response_time_fclk_cycles + mall_overhead_fclk_cycles) / FabricClock
3238 + max_round_trip_to_furthest_cs_fclk_cycles / FabricClock * (1 + fabric_max_transport_latency_margin / 100.0)
3239 + urgent_ramp_uclk_cycles / uclk_freq_mhz * (1 + umc_urgent_ramp_latency_margin / 100.0);
3240 } else {
3241 urgent_latency = math_max3(UrgentLatencyPixelDataOnly, UrgentLatencyPixelMixedWithVMData, UrgentLatencyVMDataOnly);
3242 if (DoUrgentLatencyAdjustment == true) {
3243 urgent_latency = urgent_latency + UrgentLatencyAdjustmentFabricClockComponent * (UrgentLatencyAdjustmentFabricClockReference / FabricClock - 1);
3244 }
3245 }
3246 #ifdef __DML_VBA_DEBUG__
3247 if (qos_type == dml2_qos_param_type_dcn4x) {
3248 dml2_printf("DML::%s: qos_type = %d\n", __func__, qos_type);
3249 dml2_printf("DML::%s: urgent_ramp_uclk_cycles = %d\n", __func__, urgent_ramp_uclk_cycles);
3250 dml2_printf("DML::%s: uclk_freq_mhz = %f\n", __func__, uclk_freq_mhz);
3251 dml2_printf("DML::%s: umc_urgent_ramp_latency_margin = %f\n", __func__, umc_urgent_ramp_latency_margin);
3252 } else {
3253 dml2_printf("DML::%s: UrgentLatencyPixelDataOnly = %f\n", __func__, UrgentLatencyPixelDataOnly);
3254 dml2_printf("DML::%s: UrgentLatencyPixelMixedWithVMData = %f\n", __func__, UrgentLatencyPixelMixedWithVMData);
3255 dml2_printf("DML::%s: UrgentLatencyVMDataOnly = %f\n", __func__, UrgentLatencyVMDataOnly);
3256 dml2_printf("DML::%s: UrgentLatencyAdjustmentFabricClockComponent = %f\n", __func__, UrgentLatencyAdjustmentFabricClockComponent);
3257 dml2_printf("DML::%s: UrgentLatencyAdjustmentFabricClockReference = %f\n", __func__, UrgentLatencyAdjustmentFabricClockReference);
3258 }
3259 dml2_printf("DML::%s: FabricClock = %f\n", __func__, FabricClock);
3260 dml2_printf("DML::%s: UrgentLatency = %f\n", __func__, urgent_latency);
3261 #endif
3262 return urgent_latency;
3263 }
3264
CalculateTripToMemory(double UrgLatency,double FabricClock,double uclk_freq_mhz,enum dml2_qos_param_type qos_type,unsigned int trip_to_memory_uclk_cycles,unsigned int max_round_trip_to_furthest_cs_fclk_cycles,unsigned int mall_overhead_fclk_cycles,double umc_max_latency_margin,double fabric_max_transport_latency_margin)3265 static double CalculateTripToMemory(
3266 double UrgLatency,
3267 double FabricClock,
3268 double uclk_freq_mhz,
3269 enum dml2_qos_param_type qos_type,
3270 unsigned int trip_to_memory_uclk_cycles,
3271 unsigned int max_round_trip_to_furthest_cs_fclk_cycles,
3272 unsigned int mall_overhead_fclk_cycles,
3273 double umc_max_latency_margin,
3274 double fabric_max_transport_latency_margin)
3275 {
3276 double trip_to_memory_us;
3277 if (qos_type == dml2_qos_param_type_dcn4x) {
3278 trip_to_memory_us = mall_overhead_fclk_cycles / FabricClock
3279 + max_round_trip_to_furthest_cs_fclk_cycles / FabricClock * (1.0 + fabric_max_transport_latency_margin / 100.0)
3280 + trip_to_memory_uclk_cycles / uclk_freq_mhz * (1.0 + umc_max_latency_margin / 100.0);
3281 } else {
3282 trip_to_memory_us = UrgLatency;
3283 }
3284
3285 #ifdef __DML_VBA_DEBUG__
3286 if (qos_type == dml2_qos_param_type_dcn4x) {
3287 dml2_printf("DML::%s: qos_type = %d\n", __func__, qos_type);
3288 dml2_printf("DML::%s: max_round_trip_to_furthest_cs_fclk_cycles = %d\n", __func__, max_round_trip_to_furthest_cs_fclk_cycles);
3289 dml2_printf("DML::%s: mall_overhead_fclk_cycles = %d\n", __func__, mall_overhead_fclk_cycles);
3290 dml2_printf("DML::%s: trip_to_memory_uclk_cycles = %d\n", __func__, trip_to_memory_uclk_cycles);
3291 dml2_printf("DML::%s: uclk_freq_mhz = %f\n", __func__, uclk_freq_mhz);
3292 dml2_printf("DML::%s: FabricClock = %f\n", __func__, FabricClock);
3293 dml2_printf("DML::%s: fabric_max_transport_latency_margin = %f\n", __func__, fabric_max_transport_latency_margin);
3294 dml2_printf("DML::%s: umc_max_latency_margin = %f\n", __func__, umc_max_latency_margin);
3295 } else {
3296 dml2_printf("DML::%s: UrgLatency = %f\n", __func__, UrgLatency);
3297 }
3298 dml2_printf("DML::%s: trip_to_memory_us = %f\n", __func__, trip_to_memory_us);
3299 #endif
3300
3301
3302 return trip_to_memory_us;
3303 }
3304
CalculateMetaTripToMemory(double UrgLatency,double FabricClock,double uclk_freq_mhz,enum dml2_qos_param_type qos_type,unsigned int meta_trip_to_memory_uclk_cycles,unsigned int meta_trip_to_memory_fclk_cycles,double umc_max_latency_margin,double fabric_max_transport_latency_margin)3305 static double CalculateMetaTripToMemory(
3306 double UrgLatency,
3307 double FabricClock,
3308 double uclk_freq_mhz,
3309 enum dml2_qos_param_type qos_type,
3310 unsigned int meta_trip_to_memory_uclk_cycles,
3311 unsigned int meta_trip_to_memory_fclk_cycles,
3312 double umc_max_latency_margin,
3313 double fabric_max_transport_latency_margin)
3314 {
3315 double meta_trip_to_memory_us;
3316 if (qos_type == dml2_qos_param_type_dcn4x) {
3317 meta_trip_to_memory_us = meta_trip_to_memory_fclk_cycles / FabricClock * (1.0 + fabric_max_transport_latency_margin / 100.0)
3318 + meta_trip_to_memory_uclk_cycles / uclk_freq_mhz * (1.0 + umc_max_latency_margin / 100.0);
3319 } else {
3320 meta_trip_to_memory_us = UrgLatency;
3321 }
3322
3323 #ifdef __DML_VBA_DEBUG__
3324 if (qos_type == dml2_qos_param_type_dcn4x) {
3325 dml2_printf("DML::%s: qos_type = %d\n", __func__, qos_type);
3326 dml2_printf("DML::%s: meta_trip_to_memory_fclk_cycles = %d\n", __func__, meta_trip_to_memory_fclk_cycles);
3327 dml2_printf("DML::%s: meta_trip_to_memory_uclk_cycles = %d\n", __func__, meta_trip_to_memory_uclk_cycles);
3328 dml2_printf("DML::%s: uclk_freq_mhz = %f\n", __func__, uclk_freq_mhz);
3329 } else {
3330 dml2_printf("DML::%s: UrgLatency = %f\n", __func__, UrgLatency);
3331 }
3332 dml2_printf("DML::%s: meta_trip_to_memory_us = %f\n", __func__, meta_trip_to_memory_us);
3333 #endif
3334
3335
3336 return meta_trip_to_memory_us;
3337 }
3338
calculate_cursor_req_attributes(unsigned int cursor_width,unsigned int cursor_bpp,unsigned int * cursor_lines_per_chunk,unsigned int * cursor_bytes_per_line,unsigned int * cursor_bytes_per_chunk,unsigned int * cursor_bytes)3339 static void calculate_cursor_req_attributes(
3340 unsigned int cursor_width,
3341 unsigned int cursor_bpp,
3342
3343 // output
3344 unsigned int *cursor_lines_per_chunk,
3345 unsigned int *cursor_bytes_per_line,
3346 unsigned int *cursor_bytes_per_chunk,
3347 unsigned int *cursor_bytes)
3348 {
3349 unsigned int cursor_pitch = 0;
3350 unsigned int cursor_bytes_per_req = 0;
3351 unsigned int cursor_width_bytes = 0;
3352 unsigned int cursor_height = 0;
3353
3354 //SW determines the cursor pitch to support the maximum cursor_width that will be used but the following restrictions apply.
3355 //- For 2bpp, cursor_pitch = 256 pixels due to min cursor request size of 64B
3356 //- For 32 or 64 bpp, cursor_pitch = 64, 128 or 256 pixels depending on the cursor width
3357 if (cursor_bpp == 2)
3358 cursor_pitch = 256;
3359 else
3360 cursor_pitch = (unsigned int)1 << (unsigned int)math_ceil2(math_log((float)cursor_width, 2), 1);
3361
3362 //The cursor requestor uses a cursor request size of 64B, 128B, or 256B depending on the cursor_width and cursor_bpp as follows.
3363
3364 cursor_width_bytes = (unsigned int)math_ceil2((double)cursor_width * cursor_bpp / 8, 1);
3365 if (cursor_width_bytes <= 64)
3366 cursor_bytes_per_req = 64;
3367 else if (cursor_width_bytes <= 128)
3368 cursor_bytes_per_req = 128;
3369 else
3370 cursor_bytes_per_req = 256;
3371
3372 //If cursor_width_bytes is greater than 256B, then multiple 256B requests are issued to fetch the entire cursor line.
3373 *cursor_bytes_per_line = (unsigned int)math_ceil2((double)cursor_width_bytes, cursor_bytes_per_req);
3374
3375 //Nominally, the cursor chunk is 1KB or 2KB but it is restricted to a power of 2 number of lines with a maximum of 16 lines.
3376 if (cursor_bpp == 2) {
3377 *cursor_lines_per_chunk = 16;
3378 } else if (cursor_bpp == 32) {
3379 if (cursor_width <= 32)
3380 *cursor_lines_per_chunk = 16;
3381 else if (cursor_width <= 64)
3382 *cursor_lines_per_chunk = 8;
3383 else if (cursor_width <= 128)
3384 *cursor_lines_per_chunk = 4;
3385 else
3386 *cursor_lines_per_chunk = 2;
3387 } else if (cursor_bpp == 64) {
3388 if (cursor_width <= 16)
3389 *cursor_lines_per_chunk = 16;
3390 else if (cursor_width <= 32)
3391 *cursor_lines_per_chunk = 8;
3392 else if (cursor_width <= 64)
3393 *cursor_lines_per_chunk = 4;
3394 else if (cursor_width <= 128)
3395 *cursor_lines_per_chunk = 2;
3396 else
3397 *cursor_lines_per_chunk = 1;
3398 } else {
3399 if (cursor_width > 0) {
3400 dml2_printf("DML::%s: Invalid cursor_bpp = %d\n", __func__, cursor_bpp);
3401 dml2_assert(0);
3402 }
3403 }
3404
3405 *cursor_bytes_per_chunk = *cursor_bytes_per_line * *cursor_lines_per_chunk;
3406
3407 // For the cursor implementation, all requested data is stored in the return buffer. Given this fact, the cursor_bytes can be directly compared with the CursorBufferSize.
3408 // Only cursor_width is provided for worst case sizing so assume that the cursor is square
3409 cursor_height = cursor_width;
3410 *cursor_bytes = *cursor_bytes_per_line * cursor_height;
3411 #ifdef __DML_VBA_DEBUG__
3412 dml2_printf("DML::%s: cursor_bpp = %d\n", __func__, cursor_bpp);
3413 dml2_printf("DML::%s: cursor_width = %d\n", __func__, cursor_width);
3414 dml2_printf("DML::%s: cursor_width_bytes = %d\n", __func__, cursor_width_bytes);
3415 dml2_printf("DML::%s: cursor_bytes_per_req = %d\n", __func__, cursor_bytes_per_req);
3416 dml2_printf("DML::%s: cursor_lines_per_chunk = %d\n", __func__, *cursor_lines_per_chunk);
3417 dml2_printf("DML::%s: cursor_bytes_per_line = %d\n", __func__, *cursor_bytes_per_line);
3418 dml2_printf("DML::%s: cursor_bytes_per_chunk = %d\n", __func__, *cursor_bytes_per_chunk);
3419 dml2_printf("DML::%s: cursor_bytes = %d\n", __func__, *cursor_bytes);
3420 dml2_printf("DML::%s: cursor_pitch = %d\n", __func__, cursor_pitch);
3421 #endif
3422 }
3423
calculate_cursor_urgent_burst_factor(unsigned int CursorBufferSize,unsigned int CursorWidth,unsigned int cursor_bytes_per_chunk,unsigned int cursor_lines_per_chunk,double LineTime,double UrgentLatency,double * UrgentBurstFactorCursor,bool * NotEnoughUrgentLatencyHiding)3424 static void calculate_cursor_urgent_burst_factor(
3425 unsigned int CursorBufferSize,
3426 unsigned int CursorWidth,
3427 unsigned int cursor_bytes_per_chunk,
3428 unsigned int cursor_lines_per_chunk,
3429 double LineTime,
3430 double UrgentLatency,
3431
3432 double *UrgentBurstFactorCursor,
3433 bool *NotEnoughUrgentLatencyHiding)
3434 {
3435 unsigned int LinesInCursorBuffer = 0;
3436 double CursorBufferSizeInTime = 0;
3437
3438 if (CursorWidth > 0) {
3439 LinesInCursorBuffer = (unsigned int)math_floor2(CursorBufferSize * 1024.0 / (double)cursor_bytes_per_chunk, 1) * cursor_lines_per_chunk;
3440
3441 CursorBufferSizeInTime = LinesInCursorBuffer * LineTime;
3442 if (CursorBufferSizeInTime - UrgentLatency <= 0) {
3443 *NotEnoughUrgentLatencyHiding = 1;
3444 *UrgentBurstFactorCursor = 0;
3445 } else {
3446 *NotEnoughUrgentLatencyHiding = 0;
3447 *UrgentBurstFactorCursor = CursorBufferSizeInTime / (CursorBufferSizeInTime - UrgentLatency);
3448 }
3449
3450 #ifdef __DML_VBA_DEBUG__
3451 dml2_printf("DML::%s: LinesInCursorBuffer = %u\n", __func__, LinesInCursorBuffer);
3452 dml2_printf("DML::%s: CursorBufferSizeInTime = %f\n", __func__, CursorBufferSizeInTime);
3453 dml2_printf("DML::%s: CursorBufferSize = %u (kbytes)\n", __func__, CursorBufferSize);
3454 dml2_printf("DML::%s: cursor_bytes_per_chunk = %u\n", __func__, cursor_bytes_per_chunk);
3455 dml2_printf("DML::%s: cursor_lines_per_chunk = %u\n", __func__, cursor_lines_per_chunk);
3456 dml2_printf("DML::%s: UrgentBurstFactorCursor = %f\n", __func__, *UrgentBurstFactorCursor);
3457 dml2_printf("DML::%s: NotEnoughUrgentLatencyHiding = %d\n", __func__, *NotEnoughUrgentLatencyHiding);
3458 #endif
3459
3460 }
3461 }
3462
CalculateUrgentBurstFactor(const struct dml2_plane_parameters * plane_cfg,unsigned int swath_width_luma_ub,unsigned int swath_width_chroma_ub,unsigned int SwathHeightY,unsigned int SwathHeightC,double LineTime,double UrgentLatency,double VRatio,double VRatioC,double BytePerPixelInDETY,double BytePerPixelInDETC,unsigned int DETBufferSizeY,unsigned int DETBufferSizeC,double * UrgentBurstFactorLuma,double * UrgentBurstFactorChroma,bool * NotEnoughUrgentLatencyHiding)3463 static void CalculateUrgentBurstFactor(
3464 const struct dml2_plane_parameters *plane_cfg,
3465 unsigned int swath_width_luma_ub,
3466 unsigned int swath_width_chroma_ub,
3467 unsigned int SwathHeightY,
3468 unsigned int SwathHeightC,
3469 double LineTime,
3470 double UrgentLatency,
3471 double VRatio,
3472 double VRatioC,
3473 double BytePerPixelInDETY,
3474 double BytePerPixelInDETC,
3475 unsigned int DETBufferSizeY,
3476 unsigned int DETBufferSizeC,
3477 // Output
3478 double *UrgentBurstFactorLuma,
3479 double *UrgentBurstFactorChroma,
3480 bool *NotEnoughUrgentLatencyHiding)
3481 {
3482 double LinesInDETLuma;
3483 double LinesInDETChroma;
3484 double DETBufferSizeInTimeLuma;
3485 double DETBufferSizeInTimeChroma;
3486
3487 *NotEnoughUrgentLatencyHiding = 0;
3488 *UrgentBurstFactorLuma = 0;
3489 *UrgentBurstFactorChroma = 0;
3490
3491 #ifdef __DML_VBA_DEBUG__
3492 dml2_printf("DML::%s: VRatio = %f\n", __func__, VRatio);
3493 dml2_printf("DML::%s: VRatioC = %f\n", __func__, VRatioC);
3494 dml2_printf("DML::%s: DETBufferSizeY = %d\n", __func__, DETBufferSizeY);
3495 dml2_printf("DML::%s: DETBufferSizeC = %d\n", __func__, DETBufferSizeC);
3496 dml2_printf("DML::%s: BytePerPixelInDETY = %f\n", __func__, BytePerPixelInDETY);
3497 dml2_printf("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub);
3498 dml2_printf("DML::%s: LineTime = %f\n", __func__, LineTime);
3499 #endif
3500 DML2_ASSERT(VRatio > 0);
3501
3502 LinesInDETLuma = (dml_is_phantom_pipe(plane_cfg) ? 1024 * 1024 : DETBufferSizeY) / BytePerPixelInDETY / swath_width_luma_ub;
3503
3504 DETBufferSizeInTimeLuma = math_floor2(LinesInDETLuma, SwathHeightY) * LineTime / VRatio;
3505 if (DETBufferSizeInTimeLuma - UrgentLatency <= 0) {
3506 *NotEnoughUrgentLatencyHiding = 1;
3507 *UrgentBurstFactorLuma = 0;
3508 } else {
3509 *UrgentBurstFactorLuma = DETBufferSizeInTimeLuma / (DETBufferSizeInTimeLuma - UrgentLatency);
3510 }
3511
3512 if (BytePerPixelInDETC > 0) {
3513 LinesInDETChroma = (dml_is_phantom_pipe(plane_cfg) ? 1024 * 1024 : DETBufferSizeC) / BytePerPixelInDETC / swath_width_chroma_ub;
3514
3515 DETBufferSizeInTimeChroma = math_floor2(LinesInDETChroma, SwathHeightC) * LineTime / VRatioC;
3516 if (DETBufferSizeInTimeChroma - UrgentLatency <= 0) {
3517 *NotEnoughUrgentLatencyHiding = 1;
3518 *UrgentBurstFactorChroma = 0;
3519 } else {
3520 *UrgentBurstFactorChroma = DETBufferSizeInTimeChroma / (DETBufferSizeInTimeChroma - UrgentLatency);
3521 }
3522 }
3523
3524 #ifdef __DML_VBA_DEBUG__
3525 dml2_printf("DML::%s: LinesInDETLuma = %f\n", __func__, LinesInDETLuma);
3526 dml2_printf("DML::%s: UrgentLatency = %f\n", __func__, UrgentLatency);
3527 dml2_printf("DML::%s: DETBufferSizeInTimeLuma = %f\n", __func__, DETBufferSizeInTimeLuma);
3528 dml2_printf("DML::%s: UrgentBurstFactorLuma = %f\n", __func__, *UrgentBurstFactorLuma);
3529 dml2_printf("DML::%s: UrgentBurstFactorChroma = %f\n", __func__, *UrgentBurstFactorChroma);
3530 dml2_printf("DML::%s: NotEnoughUrgentLatencyHiding = %d\n", __func__, *NotEnoughUrgentLatencyHiding);
3531 #endif
3532 }
3533
CalculateDCFCLKDeepSleepTdlut(const struct dml2_display_cfg * display_cfg,unsigned int NumberOfActiveSurfaces,unsigned int BytePerPixelY[],unsigned int BytePerPixelC[],unsigned int SwathWidthY[],unsigned int SwathWidthC[],unsigned int DPPPerSurface[],double PSCL_THROUGHPUT[],double PSCL_THROUGHPUT_CHROMA[],double Dppclk[],double ReadBandwidthLuma[],double ReadBandwidthChroma[],unsigned int ReturnBusWidth,double dispclk,unsigned int tdlut_bytes_to_deliver[],double prefetch_swath_time_us[],double * DCFClkDeepSleep)3534 static void CalculateDCFCLKDeepSleepTdlut(
3535 const struct dml2_display_cfg *display_cfg,
3536 unsigned int NumberOfActiveSurfaces,
3537 unsigned int BytePerPixelY[],
3538 unsigned int BytePerPixelC[],
3539 unsigned int SwathWidthY[],
3540 unsigned int SwathWidthC[],
3541 unsigned int DPPPerSurface[],
3542 double PSCL_THROUGHPUT[],
3543 double PSCL_THROUGHPUT_CHROMA[],
3544 double Dppclk[],
3545 double ReadBandwidthLuma[],
3546 double ReadBandwidthChroma[],
3547 unsigned int ReturnBusWidth,
3548
3549 double dispclk,
3550 unsigned int tdlut_bytes_to_deliver[],
3551 double prefetch_swath_time_us[],
3552
3553 // Output
3554 double *DCFClkDeepSleep)
3555 {
3556 double DisplayPipeLineDeliveryTimeLuma;
3557 double DisplayPipeLineDeliveryTimeChroma;
3558 double DCFClkDeepSleepPerSurface[DML2_MAX_PLANES];
3559 double ReadBandwidth = 0.0;
3560
3561 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
3562 double pixel_rate_mhz = ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
3563
3564 if (display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio <= 1) {
3565 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] * DPPPerSurface[k] / display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio / pixel_rate_mhz;
3566 } else {
3567 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] / PSCL_THROUGHPUT[k] / Dppclk[k];
3568 }
3569 if (BytePerPixelC[k] == 0) {
3570 DisplayPipeLineDeliveryTimeChroma = 0;
3571 } else {
3572 if (display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio <= 1) {
3573 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] * DPPPerSurface[k] / display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio / pixel_rate_mhz;
3574 } else {
3575 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k];
3576 }
3577 }
3578
3579 if (BytePerPixelC[k] > 0) {
3580 DCFClkDeepSleepPerSurface[k] = math_max2(__DML2_CALCS_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] / 32.0 / DisplayPipeLineDeliveryTimeLuma,
3581 __DML2_CALCS_DCFCLK_FACTOR__ * SwathWidthC[k] * BytePerPixelC[k] / 32.0 / DisplayPipeLineDeliveryTimeChroma);
3582 } else {
3583 DCFClkDeepSleepPerSurface[k] = __DML2_CALCS_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] / 64.0 / DisplayPipeLineDeliveryTimeLuma;
3584 }
3585 DCFClkDeepSleepPerSurface[k] = math_max2(DCFClkDeepSleepPerSurface[k], pixel_rate_mhz / 16);
3586
3587 // adjust for 3dlut delivery time
3588 if (display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut && tdlut_bytes_to_deliver[k] > 0) {
3589 double tdlut_required_deepsleep_dcfclk = (double) tdlut_bytes_to_deliver[k] / 64.0 / prefetch_swath_time_us[k];
3590
3591 dml2_printf("DML::%s: k=%d, DCFClkDeepSleepPerSurface = %f\n", __func__, k, DCFClkDeepSleepPerSurface[k]);
3592 dml2_printf("DML::%s: k=%d, tdlut_bytes_to_deliver = %d\n", __func__, k, tdlut_bytes_to_deliver[k]);
3593 dml2_printf("DML::%s: k=%d, prefetch_swath_time_us = %f\n", __func__, k, prefetch_swath_time_us[k]);
3594 dml2_printf("DML::%s: k=%d, tdlut_required_deepsleep_dcfclk = %f\n", __func__, k, tdlut_required_deepsleep_dcfclk);
3595
3596 // increase the deepsleep dcfclk to match the original dispclk throughput rate
3597 if (tdlut_required_deepsleep_dcfclk > DCFClkDeepSleepPerSurface[k]) {
3598 DCFClkDeepSleepPerSurface[k] = math_max2(DCFClkDeepSleepPerSurface[k], tdlut_required_deepsleep_dcfclk);
3599 DCFClkDeepSleepPerSurface[k] = math_max2(DCFClkDeepSleepPerSurface[k], dispclk / 4.0);
3600 }
3601 }
3602
3603 #ifdef __DML_VBA_DEBUG__
3604 dml2_printf("DML::%s: k=%u, PixelClock = %f\n", __func__, k, pixel_rate_mhz);
3605 dml2_printf("DML::%s: k=%u, DCFClkDeepSleepPerSurface = %f\n", __func__, k, DCFClkDeepSleepPerSurface[k]);
3606 #endif
3607 }
3608
3609 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
3610 ReadBandwidth = ReadBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k];
3611 }
3612
3613 *DCFClkDeepSleep = math_max2(8.0, __DML2_CALCS_DCFCLK_FACTOR__ * ReadBandwidth / (double)ReturnBusWidth);
3614
3615 #ifdef __DML_VBA_DEBUG__
3616 dml2_printf("DML::%s: __DML2_CALCS_DCFCLK_FACTOR__ = %f\n", __func__, __DML2_CALCS_DCFCLK_FACTOR__);
3617 dml2_printf("DML::%s: ReadBandwidth = %f\n", __func__, ReadBandwidth);
3618 dml2_printf("DML::%s: ReturnBusWidth = %u\n", __func__, ReturnBusWidth);
3619 dml2_printf("DML::%s: DCFClkDeepSleep = %f\n", __func__, *DCFClkDeepSleep);
3620 #endif
3621
3622 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
3623 *DCFClkDeepSleep = math_max2(*DCFClkDeepSleep, DCFClkDeepSleepPerSurface[k]);
3624 }
3625
3626 dml2_printf("DML::%s: DCFClkDeepSleep = %f (final)\n", __func__, *DCFClkDeepSleep);
3627 }
3628
CalculateDCFCLKDeepSleep(const struct dml2_display_cfg * display_cfg,unsigned int NumberOfActiveSurfaces,unsigned int BytePerPixelY[],unsigned int BytePerPixelC[],unsigned int SwathWidthY[],unsigned int SwathWidthC[],unsigned int DPPPerSurface[],double PSCL_THROUGHPUT[],double PSCL_THROUGHPUT_CHROMA[],double Dppclk[],double ReadBandwidthLuma[],double ReadBandwidthChroma[],unsigned int ReturnBusWidth,double * DCFClkDeepSleep)3629 static noinline_for_stack void CalculateDCFCLKDeepSleep(
3630 const struct dml2_display_cfg *display_cfg,
3631 unsigned int NumberOfActiveSurfaces,
3632 unsigned int BytePerPixelY[],
3633 unsigned int BytePerPixelC[],
3634 unsigned int SwathWidthY[],
3635 unsigned int SwathWidthC[],
3636 unsigned int DPPPerSurface[],
3637 double PSCL_THROUGHPUT[],
3638 double PSCL_THROUGHPUT_CHROMA[],
3639 double Dppclk[],
3640 double ReadBandwidthLuma[],
3641 double ReadBandwidthChroma[],
3642 unsigned int ReturnBusWidth,
3643
3644 // Output
3645 double *DCFClkDeepSleep)
3646 {
3647 double zero_double[DML2_MAX_PLANES];
3648 unsigned int zero_integer[DML2_MAX_PLANES];
3649
3650 memset(zero_double, 0, DML2_MAX_PLANES * sizeof(double));
3651 memset(zero_integer, 0, DML2_MAX_PLANES * sizeof(unsigned int));
3652
3653 CalculateDCFCLKDeepSleepTdlut(
3654 display_cfg,
3655 NumberOfActiveSurfaces,
3656 BytePerPixelY,
3657 BytePerPixelC,
3658 SwathWidthY,
3659 SwathWidthC,
3660 DPPPerSurface,
3661 PSCL_THROUGHPUT,
3662 PSCL_THROUGHPUT_CHROMA,
3663 Dppclk,
3664 ReadBandwidthLuma,
3665 ReadBandwidthChroma,
3666 ReturnBusWidth,
3667 0,
3668 zero_integer, //tdlut_bytes_to_deliver,
3669 zero_double, //prefetch_swath_time_us,
3670
3671 // Output
3672 DCFClkDeepSleep);
3673 }
3674
CalculateWriteBackDelay(enum dml2_source_format_class WritebackPixelFormat,double WritebackHRatio,double WritebackVRatio,unsigned int WritebackVTaps,unsigned int WritebackDestinationWidth,unsigned int WritebackDestinationHeight,unsigned int WritebackSourceHeight,unsigned int HTotal)3675 static double CalculateWriteBackDelay(
3676 enum dml2_source_format_class WritebackPixelFormat,
3677 double WritebackHRatio,
3678 double WritebackVRatio,
3679 unsigned int WritebackVTaps,
3680 unsigned int WritebackDestinationWidth,
3681 unsigned int WritebackDestinationHeight,
3682 unsigned int WritebackSourceHeight,
3683 unsigned int HTotal)
3684 {
3685 double CalculateWriteBackDelay;
3686 double Line_length;
3687 double Output_lines_last_notclamped;
3688 double WritebackVInit;
3689
3690 WritebackVInit = (WritebackVRatio + WritebackVTaps + 1) / 2;
3691 Line_length = math_max2((double)WritebackDestinationWidth, math_ceil2((double)WritebackDestinationWidth / 6.0, 1.0) * WritebackVTaps);
3692 Output_lines_last_notclamped = WritebackDestinationHeight - 1 - math_ceil2(((double)WritebackSourceHeight - (double)WritebackVInit) / (double)WritebackVRatio, 1.0);
3693 if (Output_lines_last_notclamped < 0) {
3694 CalculateWriteBackDelay = 0;
3695 } else {
3696 CalculateWriteBackDelay = Output_lines_last_notclamped * Line_length + (HTotal - WritebackDestinationWidth) + 80;
3697 }
3698 return CalculateWriteBackDelay;
3699 }
3700
CalculateMaxVStartup(bool ptoi_supported,unsigned int vblank_nom_default_us,const struct dml2_timing_cfg * timing,double write_back_delay_us)3701 static unsigned int CalculateMaxVStartup(
3702 bool ptoi_supported,
3703 unsigned int vblank_nom_default_us,
3704 const struct dml2_timing_cfg *timing,
3705 double write_back_delay_us)
3706 {
3707 unsigned int vblank_size = 0;
3708 unsigned int max_vstartup_lines = 0;
3709
3710 double line_time_us = (double)timing->h_total / ((double)timing->pixel_clock_khz / 1000);
3711 unsigned int vblank_actual = timing->v_total - timing->v_active;
3712 unsigned int vblank_nom_default_in_line = (unsigned int)math_floor2((double)vblank_nom_default_us / line_time_us, 1.0);
3713 unsigned int vblank_nom_input = (unsigned int)math_min2(timing->vblank_nom, vblank_nom_default_in_line);
3714 unsigned int vblank_avail = (vblank_nom_input == 0) ? vblank_nom_default_in_line : vblank_nom_input;
3715
3716 vblank_size = (unsigned int)math_min2(vblank_actual, vblank_avail);
3717
3718 if (timing->interlaced && !ptoi_supported)
3719 max_vstartup_lines = (unsigned int)(math_floor2(vblank_size / 2.0, 1.0));
3720 else
3721 max_vstartup_lines = vblank_size - (unsigned int)math_max2(1.0, math_ceil2(write_back_delay_us / line_time_us, 1.0));
3722 #ifdef __DML_VBA_DEBUG__
3723 dml2_printf("DML::%s: VBlankNom = %u\n", __func__, timing->vblank_nom);
3724 dml2_printf("DML::%s: vblank_nom_default_us = %u\n", __func__, vblank_nom_default_us);
3725 dml2_printf("DML::%s: line_time_us = %f\n", __func__, line_time_us);
3726 dml2_printf("DML::%s: vblank_actual = %u\n", __func__, vblank_actual);
3727 dml2_printf("DML::%s: vblank_avail = %u\n", __func__, vblank_avail);
3728 dml2_printf("DML::%s: max_vstartup_lines = %u\n", __func__, max_vstartup_lines);
3729 #endif
3730 max_vstartup_lines = (unsigned int)math_min2(max_vstartup_lines, DML_MAX_VSTARTUP_START);
3731 return max_vstartup_lines;
3732 }
3733
CalculateSwathAndDETConfiguration(struct dml2_core_internal_scratch * scratch,struct dml2_core_calcs_CalculateSwathAndDETConfiguration_params * p)3734 static void CalculateSwathAndDETConfiguration(struct dml2_core_internal_scratch *scratch,
3735 struct dml2_core_calcs_CalculateSwathAndDETConfiguration_params *p)
3736 {
3737 unsigned int MaximumSwathHeightY[DML2_MAX_PLANES] = { 0 };
3738 unsigned int MaximumSwathHeightC[DML2_MAX_PLANES] = { 0 };
3739 unsigned int RoundedUpSwathSizeBytesY[DML2_MAX_PLANES] = { 0 };
3740 unsigned int RoundedUpSwathSizeBytesC[DML2_MAX_PLANES] = { 0 };
3741 unsigned int SwathWidthSingleDPP[DML2_MAX_PLANES] = { 0 };
3742 unsigned int SwathWidthSingleDPPChroma[DML2_MAX_PLANES] = { 0 };
3743
3744 unsigned int TotalActiveDPP = 0;
3745 bool NoChromaOrLinear = true;
3746 unsigned int SurfaceDoingUnboundedRequest = 0;
3747 unsigned int DETBufferSizeInKByteForSwathCalculation;
3748
3749 const long TTUFIFODEPTH = 8;
3750 const long MAXIMUMCOMPRESSION = 4;
3751
3752 #ifdef __DML_VBA_DEBUG__
3753 dml2_printf("DML::%s: ForceSingleDPP = %u\n", __func__, p->ForceSingleDPP);
3754 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
3755 dml2_printf("DML::%s: DPPPerSurface[%u] = %u\n", __func__, k, p->DPPPerSurface[k]);
3756 }
3757 #endif
3758 CalculateSwathWidth(
3759 p->display_cfg,
3760 p->ForceSingleDPP,
3761 p->NumberOfActiveSurfaces,
3762 p->ODMMode,
3763 p->BytePerPixY,
3764 p->BytePerPixC,
3765 p->Read256BytesBlockHeightY,
3766 p->Read256BytesBlockHeightC,
3767 p->Read256BytesBlockWidthY,
3768 p->Read256BytesBlockWidthC,
3769 p->surf_linear128_l,
3770 p->surf_linear128_c,
3771 p->DPPPerSurface,
3772
3773 // Output
3774 p->req_per_swath_ub_l,
3775 p->req_per_swath_ub_c,
3776 SwathWidthSingleDPP,
3777 SwathWidthSingleDPPChroma,
3778 p->SwathWidth,
3779 p->SwathWidthChroma,
3780 MaximumSwathHeightY,
3781 MaximumSwathHeightC,
3782 p->swath_width_luma_ub,
3783 p->swath_width_chroma_ub);
3784
3785 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
3786 p->full_swath_bytes_l[k] = (unsigned int)(p->swath_width_luma_ub[k] * p->BytePerPixDETY[k] * MaximumSwathHeightY[k]);
3787 p->full_swath_bytes_c[k] = (unsigned int)(p->swath_width_chroma_ub[k] * p->BytePerPixDETC[k] * MaximumSwathHeightC[k]);
3788 #ifdef __DML_VBA_DEBUG__
3789 dml2_printf("DML::%s: k=%u DPPPerSurface = %u\n", __func__, k, p->DPPPerSurface[k]);
3790 dml2_printf("DML::%s: k=%u swath_width_luma_ub = %u\n", __func__, k, p->swath_width_luma_ub[k]);
3791 dml2_printf("DML::%s: k=%u BytePerPixDETY = %f\n", __func__, k, p->BytePerPixDETY[k]);
3792 dml2_printf("DML::%s: k=%u MaximumSwathHeightY = %u\n", __func__, k, MaximumSwathHeightY[k]);
3793 dml2_printf("DML::%s: k=%u full_swath_bytes_l = %u\n", __func__, k, p->full_swath_bytes_l[k]);
3794 dml2_printf("DML::%s: k=%u swath_width_chroma_ub = %u\n", __func__, k, p->swath_width_chroma_ub[k]);
3795 dml2_printf("DML::%s: k=%u BytePerPixDETC = %f\n", __func__, k, p->BytePerPixDETC[k]);
3796 dml2_printf("DML::%s: k=%u MaximumSwathHeightC = %u\n", __func__, k, MaximumSwathHeightC[k]);
3797 dml2_printf("DML::%s: k=%u full_swath_bytes_c = %u\n", __func__, k, p->full_swath_bytes_c[k]);
3798 #endif
3799 if (p->display_cfg->plane_descriptors[k].pixel_format == dml2_420_10) {
3800 p->full_swath_bytes_l[k] = (unsigned int)(math_ceil2((double)p->full_swath_bytes_l[k], 256));
3801 p->full_swath_bytes_c[k] = (unsigned int)(math_ceil2((double)p->full_swath_bytes_c[k], 256));
3802 }
3803 }
3804
3805 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
3806 TotalActiveDPP = TotalActiveDPP + (p->ForceSingleDPP ? 1 : p->DPPPerSurface[k]);
3807 if (p->DPPPerSurface[k] > 0)
3808 SurfaceDoingUnboundedRequest = k;
3809 if (dml_is_420(p->display_cfg->plane_descriptors[k].pixel_format) || p->display_cfg->plane_descriptors[k].pixel_format == dml2_rgbe_alpha
3810 || p->display_cfg->plane_descriptors[k].surface.tiling == dml2_sw_linear) {
3811 NoChromaOrLinear = false;
3812 }
3813 }
3814
3815 *p->UnboundedRequestEnabled = UnboundedRequest(p->display_cfg->overrides.hw.force_unbounded_requesting.enable, p->display_cfg->overrides.hw.force_unbounded_requesting.value, TotalActiveDPP, NoChromaOrLinear);
3816
3817 CalculateDETBufferSize(
3818 &scratch->CalculateDETBufferSize_locals,
3819 p->display_cfg,
3820 p->ForceSingleDPP,
3821 p->NumberOfActiveSurfaces,
3822 *p->UnboundedRequestEnabled,
3823 p->nomDETInKByte,
3824 p->MaxTotalDETInKByte,
3825 p->ConfigReturnBufferSizeInKByte,
3826 p->MinCompressedBufferSizeInKByte,
3827 p->ConfigReturnBufferSegmentSizeInkByte,
3828 p->CompressedBufferSegmentSizeInkByte,
3829 p->ReadBandwidthLuma,
3830 p->ReadBandwidthChroma,
3831 p->full_swath_bytes_l,
3832 p->full_swath_bytes_c,
3833 p->DPPPerSurface,
3834
3835 // Output
3836 p->DETBufferSizeInKByte, // per hubp pipe
3837 p->CompressedBufferSizeInkByte);
3838
3839 #ifdef __DML_VBA_DEBUG__
3840 dml2_printf("DML::%s: TotalActiveDPP = %u\n", __func__, TotalActiveDPP);
3841 dml2_printf("DML::%s: nomDETInKByte = %u\n", __func__, p->nomDETInKByte);
3842 dml2_printf("DML::%s: ConfigReturnBufferSizeInKByte = %u\n", __func__, p->ConfigReturnBufferSizeInKByte);
3843 dml2_printf("DML::%s: UnboundedRequestEnabled = %u\n", __func__, *p->UnboundedRequestEnabled);
3844 dml2_printf("DML::%s: CompressedBufferSizeInkByte = %u\n", __func__, *p->CompressedBufferSizeInkByte);
3845 #endif
3846
3847 *p->ViewportSizeSupport = true;
3848 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
3849
3850 DETBufferSizeInKByteForSwathCalculation = (dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k]) ? 1024 : p->DETBufferSizeInKByte[k]);
3851 #ifdef __DML_VBA_DEBUG__
3852 dml2_printf("DML::%s: k=%u DETBufferSizeInKByteForSwathCalculation = %u\n", __func__, k, DETBufferSizeInKByteForSwathCalculation);
3853 #endif
3854 if (p->display_cfg->plane_descriptors[k].surface.tiling == dml2_sw_linear) {
3855 p->SwathHeightY[k] = MaximumSwathHeightY[k];
3856 p->SwathHeightC[k] = MaximumSwathHeightC[k];
3857 RoundedUpSwathSizeBytesY[k] = p->full_swath_bytes_l[k];
3858 RoundedUpSwathSizeBytesC[k] = p->full_swath_bytes_c[k];
3859
3860 if (p->surf_linear128_l[k])
3861 p->request_size_bytes_luma[k] = 128;
3862 else
3863 p->request_size_bytes_luma[k] = 256;
3864
3865 if (p->surf_linear128_c[k])
3866 p->request_size_bytes_chroma[k] = 128;
3867 else
3868 p->request_size_bytes_chroma[k] = 256;
3869
3870 } else if (p->full_swath_bytes_l[k] + p->full_swath_bytes_c[k] <= DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
3871 p->SwathHeightY[k] = MaximumSwathHeightY[k];
3872 p->SwathHeightC[k] = MaximumSwathHeightC[k];
3873 RoundedUpSwathSizeBytesY[k] = p->full_swath_bytes_l[k];
3874 RoundedUpSwathSizeBytesC[k] = p->full_swath_bytes_c[k];
3875 p->request_size_bytes_luma[k] = 256;
3876 p->request_size_bytes_chroma[k] = 256;
3877
3878 } else if (p->full_swath_bytes_l[k] >= 1.5 * p->full_swath_bytes_c[k] && p->full_swath_bytes_l[k] / 2 + p->full_swath_bytes_c[k] <= DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
3879 p->SwathHeightY[k] = MaximumSwathHeightY[k] / 2;
3880 p->SwathHeightC[k] = MaximumSwathHeightC[k];
3881 RoundedUpSwathSizeBytesY[k] = p->full_swath_bytes_l[k] / 2;
3882 RoundedUpSwathSizeBytesC[k] = p->full_swath_bytes_c[k];
3883 p->request_size_bytes_luma[k] = ((p->BytePerPixY[k] == 2) == dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) ? 128 : 64;
3884 p->request_size_bytes_chroma[k] = 256;
3885
3886 } else if (p->full_swath_bytes_l[k] < 1.5 * p->full_swath_bytes_c[k] && p->full_swath_bytes_l[k] + p->full_swath_bytes_c[k] / 2 <= DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
3887 p->SwathHeightY[k] = MaximumSwathHeightY[k];
3888 p->SwathHeightC[k] = MaximumSwathHeightC[k] / 2;
3889 RoundedUpSwathSizeBytesY[k] = p->full_swath_bytes_l[k];
3890 RoundedUpSwathSizeBytesC[k] = p->full_swath_bytes_c[k] / 2;
3891 p->request_size_bytes_luma[k] = 256;
3892 p->request_size_bytes_chroma[k] = ((p->BytePerPixC[k] == 2) == dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) ? 128 : 64;
3893
3894 } else {
3895 p->SwathHeightY[k] = MaximumSwathHeightY[k] / 2;
3896 p->SwathHeightC[k] = MaximumSwathHeightC[k] / 2;
3897 RoundedUpSwathSizeBytesY[k] = p->full_swath_bytes_l[k] / 2;
3898 RoundedUpSwathSizeBytesC[k] = p->full_swath_bytes_c[k] / 2;
3899 p->request_size_bytes_luma[k] = ((p->BytePerPixY[k] == 2) == dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) ? 128 : 64;;
3900 p->request_size_bytes_chroma[k] = ((p->BytePerPixC[k] == 2) == dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) ? 128 : 64;;
3901 }
3902
3903 if (p->SwathHeightC[k] == 0)
3904 p->request_size_bytes_chroma[k] = 0;
3905
3906 if ((p->full_swath_bytes_l[k] / 2 + p->full_swath_bytes_c[k] / 2 > DETBufferSizeInKByteForSwathCalculation * 1024 / 2) ||
3907 p->SwathWidth[k] > p->MaximumSwathWidthLuma[k] || (p->SwathHeightC[k] > 0 && p->SwathWidthChroma[k] > p->MaximumSwathWidthChroma[k])) {
3908 *p->ViewportSizeSupport = false;
3909 dml2_printf("DML::%s: k=%u full_swath_bytes_l=%u\n", __func__, k, p->full_swath_bytes_l[k]);
3910 dml2_printf("DML::%s: k=%u full_swath_bytes_c=%u\n", __func__, k, p->full_swath_bytes_c[k]);
3911 dml2_printf("DML::%s: k=%u DETBufferSizeInKByteForSwathCalculation=%u\n", __func__, k, DETBufferSizeInKByteForSwathCalculation);
3912 dml2_printf("DML::%s: k=%u SwathWidth=%u\n", __func__, k, p->SwathWidth[k]);
3913 dml2_printf("DML::%s: k=%u MaximumSwathWidthLuma=%f\n", __func__, k, p->MaximumSwathWidthLuma[k]);
3914 dml2_printf("DML::%s: k=%u SwathWidthChroma=%d\n", __func__, k, p->SwathWidthChroma[k]);
3915 dml2_printf("DML::%s: k=%u MaximumSwathWidthChroma=%f\n", __func__, k, p->MaximumSwathWidthChroma[k]);
3916 p->ViewportSizeSupportPerSurface[k] = false;
3917 } else {
3918 p->ViewportSizeSupportPerSurface[k] = true;
3919 }
3920
3921 if (p->SwathHeightC[k] == 0) {
3922 #ifdef __DML_VBA_DEBUG__
3923 dml2_printf("DML::%s: k=%u, All DET will be used for plane0\n", __func__, k);
3924 #endif
3925 p->DETBufferSizeY[k] = p->DETBufferSizeInKByte[k] * 1024;
3926 p->DETBufferSizeC[k] = 0;
3927 } else if (RoundedUpSwathSizeBytesY[k] <= 1.5 * RoundedUpSwathSizeBytesC[k]) {
3928 #ifdef __DML_VBA_DEBUG__
3929 dml2_printf("DML::%s: k=%u, Half DET will be used for plane0, and half for plane1\n", __func__, k);
3930 #endif
3931 p->DETBufferSizeY[k] = p->DETBufferSizeInKByte[k] * 1024 / 2;
3932 p->DETBufferSizeC[k] = p->DETBufferSizeInKByte[k] * 1024 / 2;
3933 } else {
3934 #ifdef __DML_VBA_DEBUG__
3935 dml2_printf("DML::%s: k=%u, 2/3 DET will be used for plane0, and 1/3 for plane1\n", __func__, k);
3936 #endif
3937 p->DETBufferSizeY[k] = (unsigned int)(math_floor2(p->DETBufferSizeInKByte[k] * 1024 * 2 / 3, 1024));
3938 p->DETBufferSizeC[k] = p->DETBufferSizeInKByte[k] * 1024 - p->DETBufferSizeY[k];
3939 }
3940
3941 #ifdef __DML_VBA_DEBUG__
3942 dml2_printf("DML::%s: k=%u SwathHeightY = %u\n", __func__, k, p->SwathHeightY[k]);
3943 dml2_printf("DML::%s: k=%u SwathHeightC = %u\n", __func__, k, p->SwathHeightC[k]);
3944 dml2_printf("DML::%s: k=%u full_swath_bytes_l = %u\n", __func__, k, p->full_swath_bytes_l[k]);
3945 dml2_printf("DML::%s: k=%u full_swath_bytes_c = %u\n", __func__, k, p->full_swath_bytes_c[k]);
3946 dml2_printf("DML::%s: k=%u RoundedUpSwathSizeBytesY = %u\n", __func__, k, RoundedUpSwathSizeBytesY[k]);
3947 dml2_printf("DML::%s: k=%u RoundedUpSwathSizeBytesC = %u\n", __func__, k, RoundedUpSwathSizeBytesC[k]);
3948 dml2_printf("DML::%s: k=%u DETBufferSizeInKByte = %u\n", __func__, k, p->DETBufferSizeInKByte[k]);
3949 dml2_printf("DML::%s: k=%u DETBufferSizeY = %u\n", __func__, k, p->DETBufferSizeY[k]);
3950 dml2_printf("DML::%s: k=%u DETBufferSizeC = %u\n", __func__, k, p->DETBufferSizeC[k]);
3951 dml2_printf("DML::%s: k=%u ViewportSizeSupportPerSurface = %u\n", __func__, k, p->ViewportSizeSupportPerSurface[k]);
3952 #endif
3953
3954 }
3955
3956 *p->compbuf_reserved_space_64b = 2 * p->pixel_chunk_size_kbytes * 1024 / 64;
3957 if (*p->UnboundedRequestEnabled) {
3958 *p->compbuf_reserved_space_64b = (unsigned int)math_ceil2(math_max2(*p->compbuf_reserved_space_64b,
3959 (double)(p->rob_buffer_size_kbytes * 1024 / 64) - (double)(RoundedUpSwathSizeBytesY[SurfaceDoingUnboundedRequest] * TTUFIFODEPTH / (p->mrq_present ? MAXIMUMCOMPRESSION : 1) / 64)), 1.0);
3960 #ifdef __DML_VBA_DEBUG__
3961 dml2_printf("DML::%s: RoundedUpSwathSizeBytesY[%d] = %u\n", __func__, SurfaceDoingUnboundedRequest, RoundedUpSwathSizeBytesY[SurfaceDoingUnboundedRequest]);
3962 dml2_printf("DML::%s: rob_buffer_size_kbytes = %u\n", __func__, p->rob_buffer_size_kbytes);
3963 #endif
3964 }
3965 #ifdef __DML_VBA_DEBUG__
3966 dml2_printf("DML::%s: compbuf_reserved_space_64b = %u\n", __func__, *p->compbuf_reserved_space_64b);
3967 #endif
3968
3969 *p->hw_debug5 = false;
3970 #ifdef ALLOW_SDPIF_RATE_LIMIT_PRE_CSTATE
3971 if (p->NumberOfActiveSurfaces > 1)
3972 *p->hw_debug5 = true;
3973 #else
3974 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
3975 if (!(p->mrq_present) && (!(*p->UnboundedRequestEnabled)) && (TotalActiveDPP == 1)
3976 && p->display_cfg->plane_descriptors[k].surface.dcc.enable
3977 && ((p->rob_buffer_size_kbytes * 1024 * (p->mrq_present ? MAXIMUMCOMPRESSION : 1)
3978 + *p->CompressedBufferSizeInkByte * MAXIMUMCOMPRESSION * 1024) > TTUFIFODEPTH * (RoundedUpSwathSizeBytesY[k] + RoundedUpSwathSizeBytesC[k])))
3979 *p->hw_debug5 = true;
3980 #ifdef __DML_VBA_DEBUG__
3981 dml2_printf("DML::%s: k=%u UnboundedRequestEnabled = %u\n", __func__, k, *p->UnboundedRequestEnabled);
3982 dml2_printf("DML::%s: k=%u MAXIMUMCOMPRESSION = %lu\n", __func__, k, MAXIMUMCOMPRESSION);
3983 dml2_printf("DML::%s: k=%u TTUFIFODEPTH = %lu\n", __func__, k, TTUFIFODEPTH);
3984 dml2_printf("DML::%s: k=%u CompressedBufferSizeInkByte = %u\n", __func__, k, *p->CompressedBufferSizeInkByte);
3985 dml2_printf("DML::%s: k=%u RoundedUpSwathSizeBytesC = %u\n", __func__, k, RoundedUpSwathSizeBytesC[k]);
3986 dml2_printf("DML::%s: k=%u hw_debug5 = %u\n", __func__, k, *p->hw_debug5);
3987 #endif
3988 }
3989 #endif
3990 }
3991
DecideODMMode(unsigned int HActive,double MaxDispclk,unsigned int MaximumPixelsPerLinePerDSCUnit,enum dml2_output_format_class OutFormat,bool UseDSC,unsigned int NumberOfDSCSlices,double SurfaceRequiredDISPCLKWithoutODMCombine,double SurfaceRequiredDISPCLKWithODMCombineTwoToOne,double SurfaceRequiredDISPCLKWithODMCombineThreeToOne,double SurfaceRequiredDISPCLKWithODMCombineFourToOne)3992 static enum dml2_odm_mode DecideODMMode(unsigned int HActive,
3993 double MaxDispclk,
3994 unsigned int MaximumPixelsPerLinePerDSCUnit,
3995 enum dml2_output_format_class OutFormat,
3996 bool UseDSC,
3997 unsigned int NumberOfDSCSlices,
3998 double SurfaceRequiredDISPCLKWithoutODMCombine,
3999 double SurfaceRequiredDISPCLKWithODMCombineTwoToOne,
4000 double SurfaceRequiredDISPCLKWithODMCombineThreeToOne,
4001 double SurfaceRequiredDISPCLKWithODMCombineFourToOne)
4002 {
4003 enum dml2_odm_mode MinimumRequiredODMModeForMaxDispClock;
4004 enum dml2_odm_mode MinimumRequiredODMModeForMaxDSCHActive;
4005 enum dml2_odm_mode MinimumRequiredODMModeForMax420HActive;
4006 enum dml2_odm_mode ODMMode = dml2_odm_mode_bypass;
4007
4008 MinimumRequiredODMModeForMaxDispClock =
4009 (SurfaceRequiredDISPCLKWithoutODMCombine <= MaxDispclk) ? dml2_odm_mode_bypass :
4010 (SurfaceRequiredDISPCLKWithODMCombineTwoToOne <= MaxDispclk) ? dml2_odm_mode_combine_2to1 :
4011 (SurfaceRequiredDISPCLKWithODMCombineThreeToOne <= MaxDispclk) ? dml2_odm_mode_combine_3to1 : dml2_odm_mode_combine_4to1;
4012 if (ODMMode < MinimumRequiredODMModeForMaxDispClock)
4013 ODMMode = MinimumRequiredODMModeForMaxDispClock;
4014
4015 if (UseDSC) {
4016 MinimumRequiredODMModeForMaxDSCHActive =
4017 (HActive <= 1 * MaximumPixelsPerLinePerDSCUnit) ? dml2_odm_mode_bypass :
4018 (HActive <= 2 * MaximumPixelsPerLinePerDSCUnit) ? dml2_odm_mode_combine_2to1 :
4019 (HActive <= 3 * MaximumPixelsPerLinePerDSCUnit) ? dml2_odm_mode_combine_3to1 : dml2_odm_mode_combine_4to1;
4020 if (ODMMode < MinimumRequiredODMModeForMaxDSCHActive)
4021 ODMMode = MinimumRequiredODMModeForMaxDSCHActive;
4022 }
4023
4024 if (OutFormat == dml2_420) {
4025 MinimumRequiredODMModeForMax420HActive =
4026 (HActive <= 1 * DML2_MAX_FMT_420_BUFFER_WIDTH) ? dml2_odm_mode_bypass :
4027 (HActive <= 2 * DML2_MAX_FMT_420_BUFFER_WIDTH) ? dml2_odm_mode_combine_2to1 :
4028 (HActive <= 3 * DML2_MAX_FMT_420_BUFFER_WIDTH) ? dml2_odm_mode_combine_3to1 : dml2_odm_mode_combine_4to1;
4029 if (ODMMode < MinimumRequiredODMModeForMax420HActive)
4030 ODMMode = MinimumRequiredODMModeForMax420HActive;
4031 }
4032
4033 if (UseDSC) {
4034 if (ODMMode == dml2_odm_mode_bypass && NumberOfDSCSlices > 4)
4035 ODMMode = dml2_odm_mode_combine_2to1;
4036 if (ODMMode == dml2_odm_mode_combine_2to1 && NumberOfDSCSlices > 8)
4037 ODMMode = dml2_odm_mode_combine_3to1;
4038 if (ODMMode == dml2_odm_mode_combine_3to1 && NumberOfDSCSlices != 12)
4039 ODMMode = dml2_odm_mode_combine_4to1;
4040 }
4041
4042 return ODMMode;
4043 }
4044
CalculateODMConstraints(enum dml2_odm_mode ODMUse,double SurfaceRequiredDISPCLKWithoutODMCombine,double SurfaceRequiredDISPCLKWithODMCombineTwoToOne,double SurfaceRequiredDISPCLKWithODMCombineThreeToOne,double SurfaceRequiredDISPCLKWithODMCombineFourToOne,unsigned int MaximumPixelsPerLinePerDSCUnit,double * DISPCLKRequired,unsigned int * NumberOfDPPRequired,unsigned int * MaxHActiveForDSC,unsigned int * MaxDSCSlices,unsigned int * MaxHActiveFor420)4045 static void CalculateODMConstraints(
4046 enum dml2_odm_mode ODMUse,
4047 double SurfaceRequiredDISPCLKWithoutODMCombine,
4048 double SurfaceRequiredDISPCLKWithODMCombineTwoToOne,
4049 double SurfaceRequiredDISPCLKWithODMCombineThreeToOne,
4050 double SurfaceRequiredDISPCLKWithODMCombineFourToOne,
4051 unsigned int MaximumPixelsPerLinePerDSCUnit,
4052 /* Output */
4053 double *DISPCLKRequired,
4054 unsigned int *NumberOfDPPRequired,
4055 unsigned int *MaxHActiveForDSC,
4056 unsigned int *MaxDSCSlices,
4057 unsigned int *MaxHActiveFor420)
4058 {
4059 switch (ODMUse) {
4060 case dml2_odm_mode_combine_2to1:
4061 *DISPCLKRequired = SurfaceRequiredDISPCLKWithODMCombineTwoToOne;
4062 *NumberOfDPPRequired = 2;
4063 break;
4064 case dml2_odm_mode_combine_3to1:
4065 *DISPCLKRequired = SurfaceRequiredDISPCLKWithODMCombineThreeToOne;
4066 *NumberOfDPPRequired = 3;
4067 break;
4068 case dml2_odm_mode_combine_4to1:
4069 *DISPCLKRequired = SurfaceRequiredDISPCLKWithODMCombineFourToOne;
4070 *NumberOfDPPRequired = 4;
4071 break;
4072 case dml2_odm_mode_auto:
4073 case dml2_odm_mode_split_1to2:
4074 case dml2_odm_mode_mso_1to2:
4075 case dml2_odm_mode_mso_1to4:
4076 case dml2_odm_mode_bypass:
4077 default:
4078 *DISPCLKRequired = SurfaceRequiredDISPCLKWithoutODMCombine;
4079 *NumberOfDPPRequired = 1;
4080 break;
4081 }
4082 *MaxHActiveForDSC = *NumberOfDPPRequired * MaximumPixelsPerLinePerDSCUnit;
4083 *MaxDSCSlices = *NumberOfDPPRequired * DML_MAX_NUM_OF_SLICES_PER_DSC;
4084 *MaxHActiveFor420 = *NumberOfDPPRequired * DML2_MAX_FMT_420_BUFFER_WIDTH;
4085 }
4086
ValidateODMMode(enum dml2_odm_mode ODMMode,double MaxDispclk,unsigned int HActive,enum dml2_output_format_class OutFormat,bool UseDSC,unsigned int NumberOfDSCSlices,unsigned int TotalNumberOfActiveDPP,unsigned int MaxNumDPP,double DISPCLKRequired,unsigned int NumberOfDPPRequired,unsigned int MaxHActiveForDSC,unsigned int MaxDSCSlices,unsigned int MaxHActiveFor420)4087 static bool ValidateODMMode(enum dml2_odm_mode ODMMode,
4088 double MaxDispclk,
4089 unsigned int HActive,
4090 enum dml2_output_format_class OutFormat,
4091 bool UseDSC,
4092 unsigned int NumberOfDSCSlices,
4093 unsigned int TotalNumberOfActiveDPP,
4094 unsigned int MaxNumDPP,
4095 double DISPCLKRequired,
4096 unsigned int NumberOfDPPRequired,
4097 unsigned int MaxHActiveForDSC,
4098 unsigned int MaxDSCSlices,
4099 unsigned int MaxHActiveFor420)
4100 {
4101 bool are_odm_segments_symmetrical = (ODMMode == dml2_odm_mode_combine_3to1) ? UseDSC : true;
4102 bool is_max_dsc_slice_required = (ODMMode == dml2_odm_mode_combine_3to1);
4103 unsigned int pixels_per_clock_cycle = (OutFormat == dml2_420 || OutFormat == dml2_n422) ? 2 : 1;
4104 unsigned int h_timing_div_mode =
4105 (ODMMode == dml2_odm_mode_combine_4to1 || ODMMode == dml2_odm_mode_combine_3to1) ? 4 :
4106 (ODMMode == dml2_odm_mode_combine_2to1) ? 2 : pixels_per_clock_cycle;
4107
4108 if (DISPCLKRequired > MaxDispclk)
4109 return false;
4110 if ((TotalNumberOfActiveDPP + NumberOfDPPRequired) > MaxNumDPP)
4111 return false;
4112 if (are_odm_segments_symmetrical) {
4113 if (HActive % (NumberOfDPPRequired * pixels_per_clock_cycle))
4114 return false;
4115 }
4116 if (HActive % h_timing_div_mode)
4117 /*
4118 * TODO - OTG_H_TOTAL, OTG_H_BLANK_START/END and
4119 * OTG_H_SYNC_A_START/END all need to be visible by h timing div
4120 * mode. This logic only checks H active.
4121 */
4122 return false;
4123
4124 if (UseDSC) {
4125 if (HActive > MaxHActiveForDSC)
4126 return false;
4127 if (NumberOfDSCSlices > MaxDSCSlices)
4128 return false;
4129 if (HActive % NumberOfDSCSlices)
4130 return false;
4131 if (NumberOfDSCSlices % NumberOfDPPRequired)
4132 return false;
4133 if (is_max_dsc_slice_required) {
4134 if (NumberOfDSCSlices != MaxDSCSlices)
4135 return false;
4136 }
4137 }
4138
4139 if (OutFormat == dml2_420) {
4140 if (HActive > MaxHActiveFor420)
4141 return false;
4142 }
4143
4144 return true;
4145 }
4146
CalculateODMMode(unsigned int MaximumPixelsPerLinePerDSCUnit,unsigned int HActive,enum dml2_output_format_class OutFormat,enum dml2_output_encoder_class Output,enum dml2_odm_mode ODMUse,double MaxDispclk,bool DSCEnable,unsigned int TotalNumberOfActiveDPP,unsigned int MaxNumDPP,double PixelClock,unsigned int NumberOfDSCSlices,bool * TotalAvailablePipesSupport,unsigned int * NumberOfDPP,enum dml2_odm_mode * ODMMode,double * RequiredDISPCLKPerSurface)4147 static noinline_for_stack void CalculateODMMode(
4148 unsigned int MaximumPixelsPerLinePerDSCUnit,
4149 unsigned int HActive,
4150 enum dml2_output_format_class OutFormat,
4151 enum dml2_output_encoder_class Output,
4152 enum dml2_odm_mode ODMUse,
4153 double MaxDispclk,
4154 bool DSCEnable,
4155 unsigned int TotalNumberOfActiveDPP,
4156 unsigned int MaxNumDPP,
4157 double PixelClock,
4158 unsigned int NumberOfDSCSlices,
4159
4160 // Output
4161 bool *TotalAvailablePipesSupport,
4162 unsigned int *NumberOfDPP,
4163 enum dml2_odm_mode *ODMMode,
4164 double *RequiredDISPCLKPerSurface)
4165 {
4166 double SurfaceRequiredDISPCLKWithoutODMCombine;
4167 double SurfaceRequiredDISPCLKWithODMCombineTwoToOne;
4168 double SurfaceRequiredDISPCLKWithODMCombineThreeToOne;
4169 double SurfaceRequiredDISPCLKWithODMCombineFourToOne;
4170 double DISPCLKRequired;
4171 unsigned int NumberOfDPPRequired;
4172 unsigned int MaxHActiveForDSC;
4173 unsigned int MaxDSCSlices;
4174 unsigned int MaxHActiveFor420;
4175 bool success;
4176 bool UseDSC = DSCEnable && (NumberOfDSCSlices > 0);
4177 enum dml2_odm_mode DecidedODMMode;
4178
4179 SurfaceRequiredDISPCLKWithoutODMCombine = CalculateRequiredDispclk(dml2_odm_mode_bypass, PixelClock);
4180 SurfaceRequiredDISPCLKWithODMCombineTwoToOne = CalculateRequiredDispclk(dml2_odm_mode_combine_2to1, PixelClock);
4181 SurfaceRequiredDISPCLKWithODMCombineThreeToOne = CalculateRequiredDispclk(dml2_odm_mode_combine_3to1, PixelClock);
4182 SurfaceRequiredDISPCLKWithODMCombineFourToOne = CalculateRequiredDispclk(dml2_odm_mode_combine_4to1, PixelClock);
4183 #ifdef __DML_VBA_DEBUG__
4184 dml2_printf("DML::%s: ODMUse = %d\n", __func__, ODMUse);
4185 dml2_printf("DML::%s: Output = %d\n", __func__, Output);
4186 dml2_printf("DML::%s: DSCEnable = %d\n", __func__, DSCEnable);
4187 dml2_printf("DML::%s: MaxDispclk = %f\n", __func__, MaxDispclk);
4188 dml2_printf("DML::%s: MaximumPixelsPerLinePerDSCUnit = %d\n", __func__, MaximumPixelsPerLinePerDSCUnit);
4189 dml2_printf("DML::%s: SurfaceRequiredDISPCLKWithoutODMCombine = %f\n", __func__, SurfaceRequiredDISPCLKWithoutODMCombine);
4190 dml2_printf("DML::%s: SurfaceRequiredDISPCLKWithODMCombineTwoToOne = %f\n", __func__, SurfaceRequiredDISPCLKWithODMCombineTwoToOne);
4191 dml2_printf("DML::%s: SurfaceRequiredDISPCLKWithODMCombineThreeToOne = %f\n", __func__, SurfaceRequiredDISPCLKWithODMCombineThreeToOne);
4192 dml2_printf("DML::%s: SurfaceRequiredDISPCLKWithODMCombineFourToOne = %f\n", __func__, SurfaceRequiredDISPCLKWithODMCombineFourToOne);
4193 #endif
4194 if (ODMUse == dml2_odm_mode_auto)
4195 DecidedODMMode = DecideODMMode(HActive,
4196 MaxDispclk,
4197 MaximumPixelsPerLinePerDSCUnit,
4198 OutFormat,
4199 UseDSC,
4200 NumberOfDSCSlices,
4201 SurfaceRequiredDISPCLKWithoutODMCombine,
4202 SurfaceRequiredDISPCLKWithODMCombineTwoToOne,
4203 SurfaceRequiredDISPCLKWithODMCombineThreeToOne,
4204 SurfaceRequiredDISPCLKWithODMCombineFourToOne);
4205 else
4206 DecidedODMMode = ODMUse;
4207 CalculateODMConstraints(DecidedODMMode,
4208 SurfaceRequiredDISPCLKWithoutODMCombine,
4209 SurfaceRequiredDISPCLKWithODMCombineTwoToOne,
4210 SurfaceRequiredDISPCLKWithODMCombineThreeToOne,
4211 SurfaceRequiredDISPCLKWithODMCombineFourToOne,
4212 MaximumPixelsPerLinePerDSCUnit,
4213 &DISPCLKRequired,
4214 &NumberOfDPPRequired,
4215 &MaxHActiveForDSC,
4216 &MaxDSCSlices,
4217 &MaxHActiveFor420);
4218 success = ValidateODMMode(DecidedODMMode,
4219 MaxDispclk,
4220 HActive,
4221 OutFormat,
4222 UseDSC,
4223 NumberOfDSCSlices,
4224 TotalNumberOfActiveDPP,
4225 MaxNumDPP,
4226 DISPCLKRequired,
4227 NumberOfDPPRequired,
4228 MaxHActiveForDSC,
4229 MaxDSCSlices,
4230 MaxHActiveFor420);
4231
4232 *ODMMode = DecidedODMMode;
4233 *TotalAvailablePipesSupport = success;
4234 *NumberOfDPP = NumberOfDPPRequired;
4235 *RequiredDISPCLKPerSurface = success ? DISPCLKRequired : 0;
4236 #ifdef __DML_VBA_DEBUG__
4237 dml2_printf("DML::%s: ODMMode = %d\n", __func__, *ODMMode);
4238 dml2_printf("DML::%s: NumberOfDPP = %d\n", __func__, *NumberOfDPP);
4239 dml2_printf("DML::%s: TotalAvailablePipesSupport = %d\n", __func__, *TotalAvailablePipesSupport);
4240 dml2_printf("DML::%s: RequiredDISPCLKPerSurface = %f\n", __func__, *RequiredDISPCLKPerSurface);
4241 #endif
4242 }
4243
CalculateOutputLink(struct dml2_core_internal_scratch * s,double PHYCLK,double PHYCLKD18,double PHYCLKD32,double Downspreading,enum dml2_output_encoder_class Output,enum dml2_output_format_class OutputFormat,unsigned int HTotal,unsigned int HActive,double PixelClockBackEnd,double ForcedOutputLinkBPP,unsigned int DSCInputBitPerComponent,unsigned int NumberOfDSCSlices,double AudioSampleRate,unsigned int AudioSampleLayout,enum dml2_odm_mode ODMModeNoDSC,enum dml2_odm_mode ODMModeDSC,enum dml2_dsc_enable_option DSCEnable,unsigned int OutputLinkDPLanes,enum dml2_output_link_dp_rate OutputLinkDPRate,bool * RequiresDSC,bool * RequiresFEC,double * OutBpp,enum dml2_core_internal_output_type * OutputType,enum dml2_core_internal_output_type_rate * OutputRate,unsigned int * RequiredSlots)4244 static noinline_for_stack void CalculateOutputLink(
4245 struct dml2_core_internal_scratch *s,
4246 double PHYCLK,
4247 double PHYCLKD18,
4248 double PHYCLKD32,
4249 double Downspreading,
4250 enum dml2_output_encoder_class Output,
4251 enum dml2_output_format_class OutputFormat,
4252 unsigned int HTotal,
4253 unsigned int HActive,
4254 double PixelClockBackEnd,
4255 double ForcedOutputLinkBPP,
4256 unsigned int DSCInputBitPerComponent,
4257 unsigned int NumberOfDSCSlices,
4258 double AudioSampleRate,
4259 unsigned int AudioSampleLayout,
4260 enum dml2_odm_mode ODMModeNoDSC,
4261 enum dml2_odm_mode ODMModeDSC,
4262 enum dml2_dsc_enable_option DSCEnable,
4263 unsigned int OutputLinkDPLanes,
4264 enum dml2_output_link_dp_rate OutputLinkDPRate,
4265
4266 // Output
4267 bool *RequiresDSC,
4268 bool *RequiresFEC,
4269 double *OutBpp,
4270 enum dml2_core_internal_output_type *OutputType,
4271 enum dml2_core_internal_output_type_rate *OutputRate,
4272 unsigned int *RequiredSlots)
4273 {
4274 bool LinkDSCEnable;
4275 unsigned int dummy;
4276 *RequiresDSC = false;
4277 *RequiresFEC = false;
4278 *OutBpp = 0;
4279
4280 *OutputType = dml2_core_internal_output_type_unknown;
4281 *OutputRate = dml2_core_internal_output_rate_unknown;
4282
4283 #ifdef __DML_VBA_DEBUG__
4284 dml2_printf("DML::%s: DSCEnable = %u (dis, en, en_if_necessary)\n", __func__, DSCEnable);
4285 dml2_printf("DML::%s: PHYCLK = %f\n", __func__, PHYCLK);
4286 dml2_printf("DML::%s: PixelClockBackEnd = %f\n", __func__, PixelClockBackEnd);
4287 dml2_printf("DML::%s: AudioSampleRate = %f\n", __func__, AudioSampleRate);
4288 dml2_printf("DML::%s: HActive = %u\n", __func__, HActive);
4289 dml2_printf("DML::%s: HTotal = %u\n", __func__, HTotal);
4290 dml2_printf("DML::%s: ODMModeNoDSC = %u\n", __func__, ODMModeNoDSC);
4291 dml2_printf("DML::%s: ODMModeDSC = %u\n", __func__, ODMModeDSC);
4292 dml2_printf("DML::%s: ForcedOutputLinkBPP = %f\n", __func__, ForcedOutputLinkBPP);
4293 dml2_printf("DML::%s: Output (encoder) = %u\n", __func__, Output);
4294 dml2_printf("DML::%s: OutputLinkDPRate = %u\n", __func__, OutputLinkDPRate);
4295 #endif
4296 {
4297 if (Output == dml2_hdmi) {
4298 *RequiresDSC = false;
4299 *RequiresFEC = false;
4300 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, math_min2(600, PHYCLK) * 10, 3, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, false, Output,
4301 OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, &dummy);
4302 //OutputTypeAndRate = "HDMI";
4303 *OutputType = dml2_core_internal_output_type_hdmi;
4304 } else if (Output == dml2_dp || Output == dml2_dp2p0 || Output == dml2_edp) {
4305 if (DSCEnable == dml2_dsc_enable) {
4306 *RequiresDSC = true;
4307 LinkDSCEnable = true;
4308 if (Output == dml2_dp || Output == dml2_dp2p0) {
4309 *RequiresFEC = true;
4310 } else {
4311 *RequiresFEC = false;
4312 }
4313 } else {
4314 *RequiresDSC = false;
4315 LinkDSCEnable = false;
4316 if (Output == dml2_dp2p0) {
4317 *RequiresFEC = true;
4318 } else {
4319 *RequiresFEC = false;
4320 }
4321 }
4322 if (Output == dml2_dp2p0) {
4323 *OutBpp = 0;
4324 if ((OutputLinkDPRate == dml2_dp_rate_na || OutputLinkDPRate == dml2_dp_rate_uhbr10) && PHYCLKD32 >= 10000.0 / 32) {
4325 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 10000, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
4326 OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
4327 if (*OutBpp == 0 && PHYCLKD32 < 13500.0 / 32 && DSCEnable == dml2_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) {
4328 *RequiresDSC = true;
4329 LinkDSCEnable = true;
4330 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 10000, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
4331 OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
4332 }
4333 //OutputTypeAndRate = Output & " UHBR10";
4334 *OutputType = dml2_core_internal_output_type_dp2p0;
4335 *OutputRate = dml2_core_internal_output_rate_dp_rate_uhbr10;
4336 }
4337 if ((OutputLinkDPRate == dml2_dp_rate_na || OutputLinkDPRate == dml2_dp_rate_uhbr13p5) && *OutBpp == 0 && PHYCLKD32 >= 13500.0 / 32) {
4338 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 13500, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
4339 OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
4340
4341 if (*OutBpp == 0 && PHYCLKD32 < 20000.0 / 32 && DSCEnable == dml2_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) {
4342 *RequiresDSC = true;
4343 LinkDSCEnable = true;
4344 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 13500, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
4345 OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
4346 }
4347 //OutputTypeAndRate = Output & " UHBR13p5";
4348 *OutputType = dml2_core_internal_output_type_dp2p0;
4349 *OutputRate = dml2_core_internal_output_rate_dp_rate_uhbr13p5;
4350 }
4351 if ((OutputLinkDPRate == dml2_dp_rate_na || OutputLinkDPRate == dml2_dp_rate_uhbr20) && *OutBpp == 0 && PHYCLKD32 >= 20000.0 / 32) {
4352 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 20000, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
4353 OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
4354 if (*OutBpp == 0 && DSCEnable == dml2_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) {
4355 *RequiresDSC = true;
4356 LinkDSCEnable = true;
4357 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 20000, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
4358 OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
4359 }
4360 //OutputTypeAndRate = Output & " UHBR20";
4361 *OutputType = dml2_core_internal_output_type_dp2p0;
4362 *OutputRate = dml2_core_internal_output_rate_dp_rate_uhbr20;
4363 }
4364 } else { // output is dp or edp
4365 *OutBpp = 0;
4366 if ((OutputLinkDPRate == dml2_dp_rate_na || OutputLinkDPRate == dml2_dp_rate_hbr) && PHYCLK >= 270) {
4367 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 2700, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
4368 OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
4369 if (*OutBpp == 0 && PHYCLK < 540 && DSCEnable == dml2_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) {
4370 *RequiresDSC = true;
4371 LinkDSCEnable = true;
4372 if (Output == dml2_dp) {
4373 *RequiresFEC = true;
4374 }
4375 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 2700, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
4376 OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
4377 }
4378 //OutputTypeAndRate = Output & " HBR";
4379 *OutputType = (Output == dml2_dp) ? dml2_core_internal_output_type_dp : dml2_core_internal_output_type_edp;
4380 *OutputRate = dml2_core_internal_output_rate_dp_rate_hbr;
4381 }
4382 if ((OutputLinkDPRate == dml2_dp_rate_na || OutputLinkDPRate == dml2_dp_rate_hbr2) && *OutBpp == 0 && PHYCLK >= 540) {
4383 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 5400, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
4384 OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
4385
4386 if (*OutBpp == 0 && PHYCLK < 810 && DSCEnable == dml2_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) {
4387 *RequiresDSC = true;
4388 LinkDSCEnable = true;
4389 if (Output == dml2_dp) {
4390 *RequiresFEC = true;
4391 }
4392 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 5400, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
4393 OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
4394 }
4395 //OutputTypeAndRate = Output & " HBR2";
4396 *OutputType = (Output == dml2_dp) ? dml2_core_internal_output_type_dp : dml2_core_internal_output_type_edp;
4397 *OutputRate = dml2_core_internal_output_rate_dp_rate_hbr2;
4398 }
4399 if ((OutputLinkDPRate == dml2_dp_rate_na || OutputLinkDPRate == dml2_dp_rate_hbr3) && *OutBpp == 0 && PHYCLK >= 810) { // VBA_ERROR, vba code doesn't have hbr3 check
4400 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 8100, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
4401 OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
4402
4403 if (*OutBpp == 0 && DSCEnable == dml2_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) {
4404 *RequiresDSC = true;
4405 LinkDSCEnable = true;
4406 if (Output == dml2_dp) {
4407 *RequiresFEC = true;
4408 }
4409 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 8100, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
4410 OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
4411 }
4412 //OutputTypeAndRate = Output & " HBR3";
4413 *OutputType = (Output == dml2_dp) ? dml2_core_internal_output_type_dp : dml2_core_internal_output_type_edp;
4414 *OutputRate = dml2_core_internal_output_rate_dp_rate_hbr3;
4415 }
4416 }
4417 } else if (Output == dml2_hdmifrl) {
4418 if (DSCEnable == dml2_dsc_enable) {
4419 *RequiresDSC = true;
4420 LinkDSCEnable = true;
4421 *RequiresFEC = true;
4422 } else {
4423 *RequiresDSC = false;
4424 LinkDSCEnable = false;
4425 *RequiresFEC = false;
4426 }
4427 *OutBpp = 0;
4428 if (PHYCLKD18 >= 3000.0 / 18) {
4429 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, 3000, 3, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, &dummy);
4430 //OutputTypeAndRate = Output & "3x3";
4431 *OutputType = dml2_core_internal_output_type_hdmifrl;
4432 *OutputRate = dml2_core_internal_output_rate_hdmi_rate_3x3;
4433 }
4434 if (*OutBpp == 0 && PHYCLKD18 >= 6000.0 / 18) {
4435 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, 6000, 3, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, &dummy);
4436 //OutputTypeAndRate = Output & "6x3";
4437 *OutputType = dml2_core_internal_output_type_hdmifrl;
4438 *OutputRate = dml2_core_internal_output_rate_hdmi_rate_6x3;
4439 }
4440 if (*OutBpp == 0 && PHYCLKD18 >= 6000.0 / 18) {
4441 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, 6000, 4, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, &dummy);
4442 //OutputTypeAndRate = Output & "6x4";
4443 *OutputType = dml2_core_internal_output_type_hdmifrl;
4444 *OutputRate = dml2_core_internal_output_rate_hdmi_rate_6x4;
4445 }
4446 if (*OutBpp == 0 && PHYCLKD18 >= 8000.0 / 18) {
4447 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, 8000, 4, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, &dummy);
4448 //OutputTypeAndRate = Output & "8x4";
4449 *OutputType = dml2_core_internal_output_type_hdmifrl;
4450 *OutputRate = dml2_core_internal_output_rate_hdmi_rate_8x4;
4451 }
4452 if (*OutBpp == 0 && PHYCLKD18 >= 10000.0 / 18) {
4453 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, 10000, 4, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, &dummy);
4454 if (*OutBpp == 0 && DSCEnable == dml2_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0 && PHYCLKD18 < 12000.0 / 18) {
4455 *RequiresDSC = true;
4456 LinkDSCEnable = true;
4457 *RequiresFEC = true;
4458 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, 10000, 4, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, &dummy);
4459 }
4460 //OutputTypeAndRate = Output & "10x4";
4461 *OutputType = dml2_core_internal_output_type_hdmifrl;
4462 *OutputRate = dml2_core_internal_output_rate_hdmi_rate_10x4;
4463 }
4464 if (*OutBpp == 0 && PHYCLKD18 >= 12000.0 / 18) {
4465 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, 12000, 4, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, &dummy);
4466 if (*OutBpp == 0 && DSCEnable == dml2_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) {
4467 *RequiresDSC = true;
4468 LinkDSCEnable = true;
4469 *RequiresFEC = true;
4470 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, 12000, 4, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, &dummy);
4471 }
4472 //OutputTypeAndRate = Output & "12x4";
4473 *OutputType = dml2_core_internal_output_type_hdmifrl;
4474 *OutputRate = dml2_core_internal_output_rate_hdmi_rate_12x4;
4475 }
4476 }
4477 }
4478 #ifdef __DML_VBA_DEBUG__
4479 dml2_printf("DML::%s: RequiresDSC = %u\n", __func__, *RequiresDSC);
4480 dml2_printf("DML::%s: RequiresFEC = %u\n", __func__, *RequiresFEC);
4481 dml2_printf("DML::%s: OutBpp = %f\n", __func__, *OutBpp);
4482 #endif
4483 }
4484
CalculateWriteBackDISPCLK(enum dml2_source_format_class WritebackPixelFormat,double PixelClock,double WritebackHRatio,double WritebackVRatio,unsigned int WritebackHTaps,unsigned int WritebackVTaps,unsigned int WritebackSourceWidth,unsigned int WritebackDestinationWidth,unsigned int HTotal,unsigned int WritebackLineBufferSize)4485 static double CalculateWriteBackDISPCLK(
4486 enum dml2_source_format_class WritebackPixelFormat,
4487 double PixelClock,
4488 double WritebackHRatio,
4489 double WritebackVRatio,
4490 unsigned int WritebackHTaps,
4491 unsigned int WritebackVTaps,
4492 unsigned int WritebackSourceWidth,
4493 unsigned int WritebackDestinationWidth,
4494 unsigned int HTotal,
4495 unsigned int WritebackLineBufferSize)
4496 {
4497 double DISPCLK_H, DISPCLK_V, DISPCLK_HB;
4498
4499 DISPCLK_H = PixelClock * math_ceil2((double)WritebackHTaps / 8.0, 1) / WritebackHRatio;
4500 DISPCLK_V = PixelClock * (WritebackVTaps * math_ceil2((double)WritebackDestinationWidth / 6.0, 1) + 8.0) / (double)HTotal;
4501 DISPCLK_HB = PixelClock * WritebackVTaps * (WritebackDestinationWidth * WritebackVTaps - WritebackLineBufferSize / 57.0) / 6.0 / (double)WritebackSourceWidth;
4502 return math_max3(DISPCLK_H, DISPCLK_V, DISPCLK_HB);
4503 }
4504
RequiredDTBCLK(bool DSCEnable,double PixelClock,enum dml2_output_format_class OutputFormat,double OutputBpp,unsigned int DSCSlices,unsigned int HTotal,unsigned int HActive,unsigned int AudioRate,unsigned int AudioLayout)4505 static double RequiredDTBCLK(
4506 bool DSCEnable,
4507 double PixelClock,
4508 enum dml2_output_format_class OutputFormat,
4509 double OutputBpp,
4510 unsigned int DSCSlices,
4511 unsigned int HTotal,
4512 unsigned int HActive,
4513 unsigned int AudioRate,
4514 unsigned int AudioLayout)
4515 {
4516 if (DSCEnable != true) {
4517 return math_max2(PixelClock / 4.0 * OutputBpp / 24.0, 25.0);
4518 } else {
4519 double PixelWordRate = PixelClock / (OutputFormat == dml2_444 ? 1 : 2);
4520 double HCActive = math_ceil2(DSCSlices * math_ceil2(OutputBpp * math_ceil2(HActive / DSCSlices, 1) / 8.0, 1) / 3.0, 1);
4521 double HCBlank = 64 + 32 * math_ceil2(AudioRate * (AudioLayout == 1 ? 1 : 0.25) * HTotal / (PixelClock * 1000), 1);
4522 double AverageTribyteRate = PixelWordRate * (HCActive + HCBlank) / HTotal;
4523 double HActiveTribyteRate = PixelWordRate * HCActive / HActive;
4524 return math_max4(PixelWordRate / 4.0, AverageTribyteRate / 4.0, HActiveTribyteRate / 4.0, 25.0) * 1.002;
4525 }
4526 }
4527
DSCDelayRequirement(bool DSCEnabled,enum dml2_odm_mode ODMMode,unsigned int DSCInputBitPerComponent,double OutputBpp,unsigned int HActive,unsigned int HTotal,unsigned int NumberOfDSCSlices,enum dml2_output_format_class OutputFormat,enum dml2_output_encoder_class Output,double PixelClock,double PixelClockBackEnd)4528 static unsigned int DSCDelayRequirement(
4529 bool DSCEnabled,
4530 enum dml2_odm_mode ODMMode,
4531 unsigned int DSCInputBitPerComponent,
4532 double OutputBpp,
4533 unsigned int HActive,
4534 unsigned int HTotal,
4535 unsigned int NumberOfDSCSlices,
4536 enum dml2_output_format_class OutputFormat,
4537 enum dml2_output_encoder_class Output,
4538 double PixelClock,
4539 double PixelClockBackEnd)
4540 {
4541 unsigned int DSCDelayRequirement_val = 0;
4542 unsigned int NumberOfDSCSlicesFactor = 1;
4543
4544 if (DSCEnabled == true && OutputBpp != 0) {
4545
4546 if (ODMMode == dml2_odm_mode_combine_4to1)
4547 NumberOfDSCSlicesFactor = 4;
4548 else if (ODMMode == dml2_odm_mode_combine_3to1)
4549 NumberOfDSCSlicesFactor = 3;
4550 else if (ODMMode == dml2_odm_mode_combine_2to1)
4551 NumberOfDSCSlicesFactor = 2;
4552
4553 DSCDelayRequirement_val = NumberOfDSCSlicesFactor * (dscceComputeDelay(DSCInputBitPerComponent, OutputBpp, (unsigned int)(math_ceil2((double)HActive / (double)NumberOfDSCSlices, 1.0)),
4554 (NumberOfDSCSlices / NumberOfDSCSlicesFactor), OutputFormat, Output) + dscComputeDelay(OutputFormat, Output));
4555
4556 DSCDelayRequirement_val = (unsigned int)(DSCDelayRequirement_val + (HTotal - HActive) * math_ceil2((double)DSCDelayRequirement_val / (double)HActive, 1.0));
4557 DSCDelayRequirement_val = (unsigned int)(DSCDelayRequirement_val * PixelClock / PixelClockBackEnd);
4558
4559 } else {
4560 DSCDelayRequirement_val = 0;
4561 }
4562 #ifdef __DML_VBA_DEBUG__
4563 dml2_printf("DML::%s: DSCEnabled= %u\n", __func__, DSCEnabled);
4564 dml2_printf("DML::%s: ODMMode = %u\n", __func__, ODMMode);
4565 dml2_printf("DML::%s: OutputBpp = %f\n", __func__, OutputBpp);
4566 dml2_printf("DML::%s: HActive = %u\n", __func__, HActive);
4567 dml2_printf("DML::%s: HTotal = %u\n", __func__, HTotal);
4568 dml2_printf("DML::%s: PixelClock = %f\n", __func__, PixelClock);
4569 dml2_printf("DML::%s: PixelClockBackEnd = %f\n", __func__, PixelClockBackEnd);
4570 dml2_printf("DML::%s: OutputFormat = %u\n", __func__, OutputFormat);
4571 dml2_printf("DML::%s: DSCInputBitPerComponent = %u\n", __func__, DSCInputBitPerComponent);
4572 dml2_printf("DML::%s: NumberOfDSCSlices = %u\n", __func__, NumberOfDSCSlices);
4573 dml2_printf("DML::%s: DSCDelayRequirement_val = %u\n", __func__, DSCDelayRequirement_val);
4574 #endif
4575
4576 return DSCDelayRequirement_val;
4577 }
4578
CalculateSurfaceSizeInMall(const struct dml2_display_cfg * display_cfg,unsigned int NumberOfActiveSurfaces,unsigned int MALLAllocatedForDCN,unsigned int BytesPerPixelY[],unsigned int BytesPerPixelC[],unsigned int Read256BytesBlockWidthY[],unsigned int Read256BytesBlockWidthC[],unsigned int Read256BytesBlockHeightY[],unsigned int Read256BytesBlockHeightC[],unsigned int ReadBlockWidthY[],unsigned int ReadBlockWidthC[],unsigned int ReadBlockHeightY[],unsigned int ReadBlockHeightC[],unsigned int SurfaceSizeInMALL[],bool * ExceededMALLSize)4579 static void CalculateSurfaceSizeInMall(
4580 const struct dml2_display_cfg *display_cfg,
4581 unsigned int NumberOfActiveSurfaces,
4582 unsigned int MALLAllocatedForDCN,
4583 unsigned int BytesPerPixelY[],
4584 unsigned int BytesPerPixelC[],
4585 unsigned int Read256BytesBlockWidthY[],
4586 unsigned int Read256BytesBlockWidthC[],
4587 unsigned int Read256BytesBlockHeightY[],
4588 unsigned int Read256BytesBlockHeightC[],
4589 unsigned int ReadBlockWidthY[],
4590 unsigned int ReadBlockWidthC[],
4591 unsigned int ReadBlockHeightY[],
4592 unsigned int ReadBlockHeightC[],
4593
4594 // Output
4595 unsigned int SurfaceSizeInMALL[],
4596 bool *ExceededMALLSize)
4597 {
4598 unsigned int TotalSurfaceSizeInMALLForSS = 0;
4599 unsigned int TotalSurfaceSizeInMALLForSubVP = 0;
4600 unsigned int MALLAllocatedForDCNInBytes = MALLAllocatedForDCN * 1024 * 1024;
4601
4602 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
4603 const struct dml2_composition_cfg *composition = &display_cfg->plane_descriptors[k].composition;
4604 const struct dml2_surface_cfg *surface = &display_cfg->plane_descriptors[k].surface;
4605
4606 if (composition->viewport.stationary) {
4607 SurfaceSizeInMALL[k] = (unsigned int)(math_min2(math_ceil2((double)surface->plane0.width, ReadBlockWidthY[k]),
4608 math_floor2(composition->viewport.plane0.x_start + composition->viewport.plane0.width + ReadBlockWidthY[k] - 1, ReadBlockWidthY[k]) -
4609 math_floor2((double)composition->viewport.plane0.x_start, ReadBlockWidthY[k])) *
4610 math_min2(math_ceil2((double)surface->plane0.height, ReadBlockHeightY[k]),
4611 math_floor2((double)composition->viewport.plane0.y_start + composition->viewport.plane0.height + ReadBlockHeightY[k] - 1, ReadBlockHeightY[k]) -
4612 math_floor2((double)composition->viewport.plane0.y_start, ReadBlockHeightY[k])) * BytesPerPixelY[k]);
4613
4614 if (ReadBlockWidthC[k] > 0) {
4615 SurfaceSizeInMALL[k] = (unsigned int)(SurfaceSizeInMALL[k] +
4616 math_min2(math_ceil2((double)surface->plane1.width, ReadBlockWidthC[k]),
4617 math_floor2((double)composition->viewport.plane1.y_start + composition->viewport.plane1.width + ReadBlockWidthC[k] - 1, ReadBlockWidthC[k]) -
4618 math_floor2((double)composition->viewport.plane1.y_start, ReadBlockWidthC[k])) *
4619 math_min2(math_ceil2((double)surface->plane1.height, ReadBlockHeightC[k]),
4620 math_floor2((double)composition->viewport.plane1.y_start + composition->viewport.plane1.height + ReadBlockHeightC[k] - 1, ReadBlockHeightC[k]) -
4621 math_floor2(composition->viewport.plane1.y_start, ReadBlockHeightC[k])) * BytesPerPixelC[k]);
4622 }
4623 } else {
4624 SurfaceSizeInMALL[k] = (unsigned int)(math_ceil2(math_min2(surface->plane0.width, composition->viewport.plane0.width + ReadBlockWidthY[k] - 1), ReadBlockWidthY[k]) *
4625 math_ceil2(math_min2(surface->plane0.height, composition->viewport.plane0.height + ReadBlockHeightY[k] - 1), ReadBlockHeightY[k]) * BytesPerPixelY[k]);
4626 if (ReadBlockWidthC[k] > 0) {
4627 SurfaceSizeInMALL[k] = (unsigned int)(SurfaceSizeInMALL[k] +
4628 math_ceil2(math_min2(surface->plane1.width, composition->viewport.plane1.width + ReadBlockWidthC[k] - 1), ReadBlockWidthC[k]) *
4629 math_ceil2(math_min2(surface->plane1.height, composition->viewport.plane1.height + ReadBlockHeightC[k] - 1), ReadBlockHeightC[k]) * BytesPerPixelC[k]);
4630 }
4631 }
4632 }
4633
4634 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
4635 /* SS and Subvp counted separate as they are never used at the same time */
4636 if (dml_is_phantom_pipe(&display_cfg->plane_descriptors[k]))
4637 TotalSurfaceSizeInMALLForSubVP += SurfaceSizeInMALL[k];
4638 else if (display_cfg->plane_descriptors[k].overrides.refresh_from_mall == dml2_refresh_from_mall_mode_override_force_enable)
4639 TotalSurfaceSizeInMALLForSS += SurfaceSizeInMALL[k];
4640 }
4641
4642 *ExceededMALLSize = (TotalSurfaceSizeInMALLForSS > MALLAllocatedForDCNInBytes) ||
4643 (TotalSurfaceSizeInMALLForSubVP > MALLAllocatedForDCNInBytes);
4644
4645 #ifdef __DML_VBA_DEBUG__
4646 dml2_printf("DML::%s: MALLAllocatedForDCN = %u\n", __func__, MALLAllocatedForDCN * 1024 * 1024);
4647 dml2_printf("DML::%s: TotalSurfaceSizeInMALLForSubVP = %u\n", __func__, TotalSurfaceSizeInMALLForSubVP);
4648 dml2_printf("DML::%s: TotalSurfaceSizeInMALLForSS = %u\n", __func__, TotalSurfaceSizeInMALLForSS);
4649 dml2_printf("DML::%s: ExceededMALLSize = %u\n", __func__, *ExceededMALLSize);
4650 #endif
4651 }
4652
calculate_tdlut_setting(struct dml2_core_internal_scratch * scratch,struct dml2_core_calcs_calculate_tdlut_setting_params * p)4653 static void calculate_tdlut_setting(
4654 struct dml2_core_internal_scratch *scratch,
4655 struct dml2_core_calcs_calculate_tdlut_setting_params *p)
4656 {
4657 // locals
4658 unsigned int tdlut_bpe = 8;
4659 unsigned int tdlut_width;
4660 unsigned int tdlut_pitch_bytes;
4661 unsigned int tdlut_footprint_bytes;
4662 unsigned int vmpg_bytes;
4663 unsigned int tdlut_vmpg_per_frame;
4664 unsigned int tdlut_pte_req_per_frame;
4665 unsigned int tdlut_bytes_per_line;
4666 unsigned int tdlut_delivery_cycles;
4667 double tdlut_drain_rate;
4668 unsigned int tdlut_mpc_width;
4669 unsigned int tdlut_bytes_per_group_simple;
4670
4671 if (!p->setup_for_tdlut) {
4672 *p->tdlut_groups_per_2row_ub = 0;
4673 *p->tdlut_opt_time = 0;
4674 *p->tdlut_drain_time = 0;
4675 *p->tdlut_bytes_to_deliver = 0;
4676 *p->tdlut_bytes_per_group = 0;
4677 *p->tdlut_pte_bytes_per_frame = 0;
4678 *p->tdlut_bytes_per_frame = 0;
4679 return;
4680 }
4681
4682 if (p->tdlut_mpc_width_flag) {
4683 tdlut_mpc_width = 33;
4684 tdlut_bytes_per_group_simple = 39*256;
4685 } else {
4686 tdlut_mpc_width = 17;
4687 tdlut_bytes_per_group_simple = 10*256;
4688 }
4689
4690 vmpg_bytes = p->gpuvm_page_size_kbytes * 1024;
4691
4692 if (p->tdlut_addressing_mode == dml2_tdlut_simple_linear) {
4693 if (p->tdlut_width_mode == dml2_tdlut_width_17_cube)
4694 tdlut_width = 4916;
4695 else
4696 tdlut_width = 35940;
4697 } else {
4698 if (p->tdlut_width_mode == dml2_tdlut_width_17_cube)
4699 tdlut_width = 17;
4700 else // dml2_tdlut_width_33_cube
4701 tdlut_width = 33;
4702 }
4703
4704 if (p->is_gfx11)
4705 tdlut_pitch_bytes = (unsigned int)math_ceil2(tdlut_width * tdlut_bpe, 256); //256B alignment
4706 else
4707 tdlut_pitch_bytes = (unsigned int)math_ceil2(tdlut_width * tdlut_bpe, 128); //128B alignment
4708
4709 if (p->tdlut_addressing_mode == dml2_tdlut_sw_linear)
4710 tdlut_footprint_bytes = tdlut_pitch_bytes * tdlut_width * tdlut_width;
4711 else
4712 tdlut_footprint_bytes = tdlut_pitch_bytes;
4713
4714 if (!p->gpuvm_enable) {
4715 tdlut_vmpg_per_frame = 0;
4716 tdlut_pte_req_per_frame = 0;
4717 } else {
4718 tdlut_vmpg_per_frame = (unsigned int)math_ceil2(tdlut_footprint_bytes - 1, vmpg_bytes) / vmpg_bytes + 1;
4719 tdlut_pte_req_per_frame = (unsigned int)math_ceil2(tdlut_vmpg_per_frame - 1, 8) / 8 + 1;
4720 }
4721 tdlut_bytes_per_line = (unsigned int)math_ceil2(tdlut_width * tdlut_bpe, 64); //64b request
4722 *p->tdlut_pte_bytes_per_frame = tdlut_pte_req_per_frame * 64;
4723
4724 if (p->tdlut_addressing_mode == dml2_tdlut_sw_linear) {
4725 //the tdlut_width is either 17 or 33 but the 33x33x33 is subsampled every other line/slice
4726 *p->tdlut_bytes_per_frame = tdlut_bytes_per_line * tdlut_mpc_width * tdlut_mpc_width;
4727 *p->tdlut_bytes_per_group = tdlut_bytes_per_line * tdlut_mpc_width;
4728 //the delivery cycles is DispClk cycles per line * number of lines * number of slices
4729 tdlut_delivery_cycles = (unsigned int)math_ceil2(tdlut_mpc_width/2.0, 1) * tdlut_mpc_width * tdlut_mpc_width;
4730 tdlut_drain_rate = tdlut_bytes_per_line * p->dispclk_mhz / math_ceil2(tdlut_mpc_width/2.0, 1);
4731 } else {
4732 //tdlut_addressing_mode = tdlut_simple_linear, 3dlut width should be 4*1229=4916 elements
4733 *p->tdlut_bytes_per_frame = (unsigned int)math_ceil2(tdlut_width * tdlut_bpe, 256);
4734 *p->tdlut_bytes_per_group = tdlut_bytes_per_group_simple;
4735 tdlut_delivery_cycles = (unsigned int)math_ceil2(tdlut_width/2.0, 1);
4736 tdlut_drain_rate = 2 * tdlut_bpe * p->dispclk_mhz;
4737 }
4738
4739 //the tdlut is fetched during the 2 row times of prefetch.
4740 if (p->setup_for_tdlut) {
4741 *p->tdlut_groups_per_2row_ub = (unsigned int)math_ceil2((double) *p->tdlut_bytes_per_frame / *p->tdlut_bytes_per_group, 1);
4742 *p->tdlut_opt_time = (*p->tdlut_bytes_per_frame - p->cursor_buffer_size * 1024) / tdlut_drain_rate;
4743 *p->tdlut_drain_time = p->cursor_buffer_size * 1024 / tdlut_drain_rate;
4744 *p->tdlut_bytes_to_deliver = (unsigned int) (p->cursor_buffer_size * 1024.0);
4745 }
4746
4747 #ifdef __DML_VBA_DEBUG__
4748 dml2_printf("DML::%s: gpuvm_enable = %d\n", __func__, p->gpuvm_enable);
4749 dml2_printf("DML::%s: vmpg_bytes = %d\n", __func__, vmpg_bytes);
4750 dml2_printf("DML::%s: tdlut_vmpg_per_frame = %d\n", __func__, tdlut_vmpg_per_frame);
4751 dml2_printf("DML::%s: tdlut_pte_req_per_frame = %d\n", __func__, tdlut_pte_req_per_frame);
4752
4753 dml2_printf("DML::%s: dispclk_mhz = %f\n", __func__, p->dispclk_mhz);
4754 dml2_printf("DML::%s: tdlut_width = %u\n", __func__, tdlut_width);
4755 dml2_printf("DML::%s: tdlut_addressing_mode = %s\n", __func__, (p->tdlut_addressing_mode == dml2_tdlut_sw_linear) ? "sw_linear" : "simple_linear");
4756 dml2_printf("DML::%s: tdlut_pitch_bytes = %u\n", __func__, tdlut_pitch_bytes);
4757 dml2_printf("DML::%s: tdlut_footprint_bytes = %u\n", __func__, tdlut_footprint_bytes);
4758 dml2_printf("DML::%s: tdlut_bytes_per_frame = %u\n", __func__, *p->tdlut_bytes_per_frame);
4759 dml2_printf("DML::%s: tdlut_bytes_per_line = %u\n", __func__, tdlut_bytes_per_line);
4760 dml2_printf("DML::%s: tdlut_bytes_per_group = %u\n", __func__, *p->tdlut_bytes_per_group);
4761 dml2_printf("DML::%s: tdlut_drain_rate = %f\n", __func__, tdlut_drain_rate);
4762 dml2_printf("DML::%s: tdlut_delivery_cycles = %u\n", __func__, tdlut_delivery_cycles);
4763 dml2_printf("DML::%s: tdlut_opt_time = %f\n", __func__, *p->tdlut_opt_time);
4764 dml2_printf("DML::%s: tdlut_drain_time = %f\n", __func__, *p->tdlut_drain_time);
4765 dml2_printf("DML::%s: tdlut_bytes_to_deliver = %d\n", __func__, *p->tdlut_bytes_to_deliver);
4766 dml2_printf("DML::%s: tdlut_groups_per_2row_ub = %d\n", __func__, *p->tdlut_groups_per_2row_ub);
4767 #endif
4768 }
4769
CalculateTarb(const struct dml2_display_cfg * display_cfg,unsigned int PixelChunkSizeInKByte,unsigned int NumberOfActiveSurfaces,unsigned int NumberOfDPP[],unsigned int dpte_group_bytes[],unsigned int tdlut_bytes_per_group[],double HostVMInefficiencyFactor,double HostVMInefficiencyFactorPrefetch,unsigned int HostVMMinPageSize,double ReturnBW,unsigned int MetaChunkSize,double * Tarb,double * Tarb_prefetch)4770 static void CalculateTarb(
4771 const struct dml2_display_cfg *display_cfg,
4772 unsigned int PixelChunkSizeInKByte,
4773 unsigned int NumberOfActiveSurfaces,
4774 unsigned int NumberOfDPP[],
4775 unsigned int dpte_group_bytes[],
4776 unsigned int tdlut_bytes_per_group[],
4777 double HostVMInefficiencyFactor,
4778 double HostVMInefficiencyFactorPrefetch,
4779 unsigned int HostVMMinPageSize,
4780 double ReturnBW,
4781 unsigned int MetaChunkSize,
4782
4783 // output
4784 double *Tarb,
4785 double *Tarb_prefetch)
4786 {
4787 double extra_bytes = 0;
4788 double extra_bytes_prefetch = 0;
4789 double HostVMDynamicLevels = CalculateHostVMDynamicLevels(display_cfg->gpuvm_enable, display_cfg->hostvm_enable, HostVMMinPageSize, display_cfg->hostvm_max_non_cached_page_table_levels);
4790
4791 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
4792 extra_bytes = extra_bytes + (NumberOfDPP[k] * PixelChunkSizeInKByte * 1024);
4793
4794 if (display_cfg->plane_descriptors[k].surface.dcc.enable)
4795 extra_bytes = extra_bytes + (MetaChunkSize * 1024);
4796
4797 if (display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut)
4798 extra_bytes = extra_bytes + tdlut_bytes_per_group[k];
4799 }
4800
4801 extra_bytes_prefetch = extra_bytes;
4802
4803 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
4804 if (display_cfg->gpuvm_enable == true) {
4805 extra_bytes = extra_bytes + NumberOfDPP[k] * dpte_group_bytes[k] * (1 + 8 * HostVMDynamicLevels) * HostVMInefficiencyFactor;
4806 extra_bytes_prefetch = extra_bytes_prefetch + NumberOfDPP[k] * dpte_group_bytes[k] * (1 + 8 * HostVMDynamicLevels) * HostVMInefficiencyFactorPrefetch;
4807 }
4808 }
4809 *Tarb = extra_bytes / ReturnBW;
4810 *Tarb_prefetch = extra_bytes_prefetch / ReturnBW;
4811 #ifdef __DML_VBA_DEBUG__
4812 dml2_printf("DML::%s: PixelChunkSizeInKByte = %d\n", __func__, PixelChunkSizeInKByte);
4813 dml2_printf("DML::%s: MetaChunkSize = %d\n", __func__, MetaChunkSize);
4814 dml2_printf("DML::%s: extra_bytes = %f\n", __func__, extra_bytes);
4815 dml2_printf("DML::%s: extra_bytes_prefetch = %f\n", __func__, extra_bytes_prefetch);
4816 #endif
4817 }
4818
CalculateTWait(long reserved_vblank_time_ns,double UrgentLatency,double Ttrip,double g6_temp_read_blackout_us)4819 static double CalculateTWait(
4820 long reserved_vblank_time_ns,
4821 double UrgentLatency,
4822 double Ttrip,
4823 double g6_temp_read_blackout_us)
4824 {
4825 double TWait;
4826 double t_urg_trip = math_max2(UrgentLatency, Ttrip);
4827 TWait = math_max2(reserved_vblank_time_ns/1000.0, g6_temp_read_blackout_us) + t_urg_trip;
4828
4829 #ifdef __DML_VBA_DEBUG__
4830 dml2_printf("DML::%s: reserved_vblank_time_ns = %d\n", __func__, reserved_vblank_time_ns);
4831 dml2_printf("DML::%s: UrgentLatency = %f\n", __func__, UrgentLatency);
4832 dml2_printf("DML::%s: Ttrip = %f\n", __func__, Ttrip);
4833 dml2_printf("DML::%s: TWait = %f\n", __func__, TWait);
4834 #endif
4835 return TWait;
4836 }
4837
4838
CalculateVUpdateAndDynamicMetadataParameters(unsigned int MaxInterDCNTileRepeaters,double Dppclk,double Dispclk,double DCFClkDeepSleep,double PixelClock,unsigned int HTotal,unsigned int VBlank,unsigned int DynamicMetadataTransmittedBytes,unsigned int DynamicMetadataLinesBeforeActiveRequired,unsigned int InterlaceEnable,bool ProgressiveToInterlaceUnitInOPP,double * TSetup,double * Tdmbf,double * Tdmec,double * Tdmsks,unsigned int * VUpdateOffsetPix,unsigned int * VUpdateWidthPix,unsigned int * VReadyOffsetPix)4839 static void CalculateVUpdateAndDynamicMetadataParameters(
4840 unsigned int MaxInterDCNTileRepeaters,
4841 double Dppclk,
4842 double Dispclk,
4843 double DCFClkDeepSleep,
4844 double PixelClock,
4845 unsigned int HTotal,
4846 unsigned int VBlank,
4847 unsigned int DynamicMetadataTransmittedBytes,
4848 unsigned int DynamicMetadataLinesBeforeActiveRequired,
4849 unsigned int InterlaceEnable,
4850 bool ProgressiveToInterlaceUnitInOPP,
4851
4852 // Output
4853 double *TSetup,
4854 double *Tdmbf,
4855 double *Tdmec,
4856 double *Tdmsks,
4857 unsigned int *VUpdateOffsetPix,
4858 unsigned int *VUpdateWidthPix,
4859 unsigned int *VReadyOffsetPix)
4860 {
4861 double TotalRepeaterDelayTime;
4862 TotalRepeaterDelayTime = MaxInterDCNTileRepeaters * (2 / Dppclk + 3 / Dispclk);
4863 *VUpdateWidthPix = (unsigned int)(math_ceil2((14.0 / DCFClkDeepSleep + 12.0 / Dppclk + TotalRepeaterDelayTime) * PixelClock, 1.0));
4864 *VReadyOffsetPix = (unsigned int)(math_ceil2(math_max2(150.0 / Dppclk, TotalRepeaterDelayTime + 20.0 / DCFClkDeepSleep + 10.0 / Dppclk) * PixelClock, 1.0));
4865 *VUpdateOffsetPix = (unsigned int)(math_ceil2(HTotal / 4.0, 1.0));
4866 *TSetup = (*VUpdateOffsetPix + *VUpdateWidthPix + *VReadyOffsetPix) / PixelClock;
4867 *Tdmbf = DynamicMetadataTransmittedBytes / 4.0 / Dispclk;
4868 *Tdmec = HTotal / PixelClock;
4869
4870 if (DynamicMetadataLinesBeforeActiveRequired == 0) {
4871 *Tdmsks = VBlank * HTotal / PixelClock / 2.0;
4872 } else {
4873 *Tdmsks = DynamicMetadataLinesBeforeActiveRequired * HTotal / PixelClock;
4874 }
4875 if (InterlaceEnable == 1 && ProgressiveToInterlaceUnitInOPP == false) {
4876 *Tdmsks = *Tdmsks / 2;
4877 }
4878 #ifdef __DML_VBA_DEBUG__
4879 dml2_printf("DML::%s: DynamicMetadataLinesBeforeActiveRequired = %u\n", __func__, DynamicMetadataLinesBeforeActiveRequired);
4880 dml2_printf("DML::%s: VBlank = %u\n", __func__, VBlank);
4881 dml2_printf("DML::%s: HTotal = %u\n", __func__, HTotal);
4882 dml2_printf("DML::%s: PixelClock = %f\n", __func__, PixelClock);
4883 dml2_printf("DML::%s: Dppclk = %f\n", __func__, Dppclk);
4884 dml2_printf("DML::%s: DCFClkDeepSleep = %f\n", __func__, DCFClkDeepSleep);
4885 dml2_printf("DML::%s: MaxInterDCNTileRepeaters = %u\n", __func__, MaxInterDCNTileRepeaters);
4886 dml2_printf("DML::%s: TotalRepeaterDelayTime = %f\n", __func__, TotalRepeaterDelayTime);
4887
4888 dml2_printf("DML::%s: VUpdateWidthPix = %u\n", __func__, *VUpdateWidthPix);
4889 dml2_printf("DML::%s: VReadyOffsetPix = %u\n", __func__, *VReadyOffsetPix);
4890 dml2_printf("DML::%s: VUpdateOffsetPix = %u\n", __func__, *VUpdateOffsetPix);
4891
4892 dml2_printf("DML::%s: Tdmsks = %f\n", __func__, *Tdmsks);
4893 #endif
4894 }
4895
get_urgent_bandwidth_required(struct dml2_core_shared_get_urgent_bandwidth_required_locals * l,const struct dml2_display_cfg * display_cfg,enum dml2_core_internal_soc_state_type state_type,enum dml2_core_internal_bw_type bw_type,bool inc_flip_bw,bool use_qual_row_bw,unsigned int NumberOfActiveSurfaces,unsigned int NumberOfDPP[],double dcc_dram_bw_nom_overhead_factor_p0[],double dcc_dram_bw_nom_overhead_factor_p1[],double dcc_dram_bw_pref_overhead_factor_p0[],double dcc_dram_bw_pref_overhead_factor_p1[],double mall_prefetch_sdp_overhead_factor[],double mall_prefetch_dram_overhead_factor[],double ReadBandwidthLuma[],double ReadBandwidthChroma[],double PrefetchBandwidthLuma[],double PrefetchBandwidthChroma[],double excess_vactive_fill_bw_l[],double excess_vactive_fill_bw_c[],double cursor_bw[],double dpte_row_bw[],double meta_row_bw[],double prefetch_cursor_bw[],double prefetch_vmrow_bw[],double flip_bw[],double UrgentBurstFactorLuma[],double UrgentBurstFactorChroma[],double UrgentBurstFactorCursor[],double UrgentBurstFactorLumaPre[],double UrgentBurstFactorChromaPre[],double UrgentBurstFactorCursorPre[],double surface_required_bw[],double surface_peak_required_bw[])4896 static double get_urgent_bandwidth_required(
4897 struct dml2_core_shared_get_urgent_bandwidth_required_locals *l,
4898 const struct dml2_display_cfg *display_cfg,
4899 enum dml2_core_internal_soc_state_type state_type,
4900 enum dml2_core_internal_bw_type bw_type,
4901 bool inc_flip_bw, // including flip bw
4902 bool use_qual_row_bw,
4903 unsigned int NumberOfActiveSurfaces,
4904 unsigned int NumberOfDPP[],
4905 double dcc_dram_bw_nom_overhead_factor_p0[],
4906 double dcc_dram_bw_nom_overhead_factor_p1[],
4907 double dcc_dram_bw_pref_overhead_factor_p0[],
4908 double dcc_dram_bw_pref_overhead_factor_p1[],
4909 double mall_prefetch_sdp_overhead_factor[],
4910 double mall_prefetch_dram_overhead_factor[],
4911 double ReadBandwidthLuma[],
4912 double ReadBandwidthChroma[],
4913 double PrefetchBandwidthLuma[],
4914 double PrefetchBandwidthChroma[],
4915 double excess_vactive_fill_bw_l[],
4916 double excess_vactive_fill_bw_c[],
4917 double cursor_bw[],
4918 double dpte_row_bw[],
4919 double meta_row_bw[],
4920 double prefetch_cursor_bw[],
4921 double prefetch_vmrow_bw[],
4922 double flip_bw[],
4923 double UrgentBurstFactorLuma[],
4924 double UrgentBurstFactorChroma[],
4925 double UrgentBurstFactorCursor[],
4926 double UrgentBurstFactorLumaPre[],
4927 double UrgentBurstFactorChromaPre[],
4928 double UrgentBurstFactorCursorPre[],
4929 /* outputs */
4930 double surface_required_bw[],
4931 double surface_peak_required_bw[])
4932 {
4933 // set inc_flip_bw = 0 for total_dchub_urgent_read_bw_noflip calculation, 1 for total_dchub_urgent_read_bw as described in the MAS
4934 // set use_qual_row_bw = 1 to calculate using qualified row bandwidth, used for total_flip_bw calculation
4935
4936 memset(l, 0, sizeof(struct dml2_core_shared_get_urgent_bandwidth_required_locals));
4937
4938 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
4939 l->mall_svp_prefetch_factor = (state_type == dml2_core_internal_soc_state_svp_prefetch) ? (bw_type == dml2_core_internal_bw_dram ? mall_prefetch_dram_overhead_factor[k] : mall_prefetch_sdp_overhead_factor[k]) : 1.0;
4940 l->tmp_nom_adj_factor_p0 = (bw_type == dml2_core_internal_bw_dram ? dcc_dram_bw_nom_overhead_factor_p0[k] : 1.0) * l->mall_svp_prefetch_factor;
4941 l->tmp_nom_adj_factor_p1 = (bw_type == dml2_core_internal_bw_dram ? dcc_dram_bw_nom_overhead_factor_p1[k] : 1.0) * l->mall_svp_prefetch_factor;
4942 l->tmp_pref_adj_factor_p0 = (bw_type == dml2_core_internal_bw_dram ? dcc_dram_bw_pref_overhead_factor_p0[k] : 1.0) * l->mall_svp_prefetch_factor;
4943 l->tmp_pref_adj_factor_p1 = (bw_type == dml2_core_internal_bw_dram ? dcc_dram_bw_pref_overhead_factor_p1[k] : 1.0) * l->mall_svp_prefetch_factor;
4944
4945 l->adj_factor_p0 = UrgentBurstFactorLuma[k] * l->tmp_nom_adj_factor_p0;
4946 l->adj_factor_p1 = UrgentBurstFactorChroma[k] * l->tmp_nom_adj_factor_p1;
4947 l->adj_factor_cur = UrgentBurstFactorCursor[k];
4948 l->adj_factor_p0_pre = UrgentBurstFactorLumaPre[k] * l->tmp_pref_adj_factor_p0;
4949 l->adj_factor_p1_pre = UrgentBurstFactorChromaPre[k] * l->tmp_pref_adj_factor_p1;
4950 l->adj_factor_cur_pre = UrgentBurstFactorCursorPre[k];
4951
4952 bool is_phantom = dml_is_phantom_pipe(&display_cfg->plane_descriptors[k]);
4953 bool exclude_this_plane = 0;
4954
4955 // Exclude phantom pipe in bw calculation for non svp prefetch state
4956 if (state_type != dml2_core_internal_soc_state_svp_prefetch && is_phantom)
4957 exclude_this_plane = 1;
4958
4959 // The qualified row bandwidth, qual_row_bw, accounts for the regular non-flip row bandwidth when there is no possible immediate flip or HostVM invalidation flip.
4960 // The qual_row_bw is zero if HostVM is possible and only non-zero and equal to row_bw(i) if immediate flip is not allowed for that pipe.
4961 if (use_qual_row_bw) {
4962 if (display_cfg->hostvm_enable)
4963 l->per_plane_flip_bw[k] = 0; // qual_row_bw
4964 else if (!display_cfg->plane_descriptors[k].immediate_flip)
4965 l->per_plane_flip_bw[k] = NumberOfDPP[k] * (dpte_row_bw[k] + meta_row_bw[k]);
4966 } else {
4967 // the final_flip_bw includes the regular row_bw when immediate flip is disallowed (and no HostVM)
4968 if ((!display_cfg->plane_descriptors[k].immediate_flip && !display_cfg->hostvm_enable) || !inc_flip_bw)
4969 l->per_plane_flip_bw[k] = NumberOfDPP[k] * (dpte_row_bw[k] + meta_row_bw[k]);
4970 else
4971 l->per_plane_flip_bw[k] = NumberOfDPP[k] * flip_bw[k];
4972 }
4973
4974 if (!exclude_this_plane) {
4975 l->vm_row_bw = NumberOfDPP[k] * prefetch_vmrow_bw[k];
4976 l->flip_and_active_bw = l->per_plane_flip_bw[k] + ReadBandwidthLuma[k] * l->adj_factor_p0 + ReadBandwidthChroma[k] * l->adj_factor_p1 + cursor_bw[k] * l->adj_factor_cur;
4977 l->flip_and_prefetch_bw = l->per_plane_flip_bw[k] + NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * l->adj_factor_p0_pre + PrefetchBandwidthChroma[k] * l->adj_factor_p1_pre) + prefetch_cursor_bw[k] * l->adj_factor_cur_pre;
4978 l->active_and_excess_bw = (ReadBandwidthLuma[k] + excess_vactive_fill_bw_l[k]) * l->tmp_nom_adj_factor_p0 + (ReadBandwidthChroma[k] + excess_vactive_fill_bw_c[k]) * l->tmp_nom_adj_factor_p1 + dpte_row_bw[k] + meta_row_bw[k];
4979 surface_required_bw[k] = math_max4(l->vm_row_bw, l->flip_and_active_bw, l->flip_and_prefetch_bw, l->active_and_excess_bw);
4980
4981 /* export peak required bandwidth for the surface */
4982 surface_peak_required_bw[k] = math_max2(surface_required_bw[k], surface_peak_required_bw[k]);
4983
4984 #ifdef __DML_VBA_DEBUG__
4985 dml2_printf("DML::%s: k=%d, max1: vm_row_bw=%f\n", __func__, k, l->vm_row_bw);
4986 dml2_printf("DML::%s: k=%d, max2: flip_and_active_bw=%f\n", __func__, k, l->flip_and_active_bw);
4987 dml2_printf("DML::%s: k=%d, max3: flip_and_prefetch_bw=%f\n", __func__, k, l->flip_and_prefetch_bw);
4988 dml2_printf("DML::%s: k=%d, max4: active_and_excess_bw=%f\n", __func__, k, l->active_and_excess_bw);
4989 dml2_printf("DML::%s: k=%d, surface_required_bw=%f\n", __func__, k, surface_required_bw[k]);
4990 dml2_printf("DML::%s: k=%d, surface_peak_required_bw=%f\n", __func__, k, surface_peak_required_bw[k]);
4991 #endif
4992 } else {
4993 surface_required_bw[k] = 0.0;
4994 }
4995
4996 l->required_bandwidth_mbps += surface_required_bw[k];
4997
4998 #ifdef __DML_VBA_DEBUG__
4999 dml2_printf("DML::%s: k=%d, NumberOfDPP=%d\n", __func__, k, NumberOfDPP[k]);
5000 dml2_printf("DML::%s: k=%d, use_qual_row_bw=%d\n", __func__, k, use_qual_row_bw);
5001 dml2_printf("DML::%s: k=%d, immediate_flip=%d\n", __func__, k, display_cfg->plane_descriptors[k].immediate_flip);
5002 dml2_printf("DML::%s: k=%d, mall_svp_prefetch_factor=%f\n", __func__, k, l->mall_svp_prefetch_factor);
5003 dml2_printf("DML::%s: k=%d, adj_factor_p0=%f\n", __func__, k, l->adj_factor_p0);
5004 dml2_printf("DML::%s: k=%d, adj_factor_p1=%f\n", __func__, k, l->adj_factor_p1);
5005 dml2_printf("DML::%s: k=%d, adj_factor_cur=%f\n", __func__, k, l->adj_factor_cur);
5006
5007 dml2_printf("DML::%s: k=%d, adj_factor_p0_pre=%f\n", __func__, k, l->adj_factor_p0_pre);
5008 dml2_printf("DML::%s: k=%d, adj_factor_p1_pre=%f\n", __func__, k, l->adj_factor_p1_pre);
5009 dml2_printf("DML::%s: k=%d, adj_factor_cur_pre=%f\n", __func__, k, l->adj_factor_cur_pre);
5010
5011 dml2_printf("DML::%s: k=%d, per_plane_flip_bw=%f\n", __func__, k, l->per_plane_flip_bw[k]);
5012 dml2_printf("DML::%s: k=%d, prefetch_vmrow_bw=%f\n", __func__, k, prefetch_vmrow_bw[k]);
5013 dml2_printf("DML::%s: k=%d, ReadBandwidthLuma=%f\n", __func__, k, ReadBandwidthLuma[k]);
5014 dml2_printf("DML::%s: k=%d, ReadBandwidthChroma=%f\n", __func__, k, ReadBandwidthChroma[k]);
5015 dml2_printf("DML::%s: k=%d, excess_vactive_fill_bw_l=%f\n", __func__, k, excess_vactive_fill_bw_l[k]);
5016 dml2_printf("DML::%s: k=%d, excess_vactive_fill_bw_c=%f\n", __func__, k, excess_vactive_fill_bw_c[k]);
5017 dml2_printf("DML::%s: k=%d, cursor_bw=%f\n", __func__, k, cursor_bw[k]);
5018
5019 dml2_printf("DML::%s: k=%d, meta_row_bw=%f\n", __func__, k, meta_row_bw[k]);
5020 dml2_printf("DML::%s: k=%d, dpte_row_bw=%f\n", __func__, k, dpte_row_bw[k]);
5021 dml2_printf("DML::%s: k=%d, PrefetchBandwidthLuma=%f\n", __func__, k, PrefetchBandwidthLuma[k]);
5022 dml2_printf("DML::%s: k=%d, PrefetchBandwidthChroma=%f\n", __func__, k, PrefetchBandwidthChroma[k]);
5023 dml2_printf("DML::%s: k=%d, prefetch_cursor_bw=%f\n", __func__, k, prefetch_cursor_bw[k]);
5024 dml2_printf("DML::%s: k=%d, required_bandwidth_mbps=%f (total), inc_flip_bw=%d, is_phantom=%d exclude_this_plane=%d\n", __func__, k, l->required_bandwidth_mbps, inc_flip_bw, is_phantom, exclude_this_plane);
5025 dml2_printf("DML::%s: k=%d, required_bandwidth_mbps=%f (total), soc_state=%s, inc_flip_bw=%d, is_phantom=%d exclude_this_plane=%d\n", __func__, k, l->required_bandwidth_mbps, dml2_core_internal_soc_state_type_str(state_type), inc_flip_bw, is_phantom, exclude_this_plane);
5026 dml2_printf("DML::%s: k=%d, required_bandwidth_mbps=%f (total), inc_flip_bw=%d, is_phantom=%d exclude_this_plane=%d\n", __func__, k, l->required_bandwidth_mbps, inc_flip_bw, is_phantom, exclude_this_plane);
5027 #endif
5028 }
5029
5030 return l->required_bandwidth_mbps;
5031 }
5032
CalculateExtraLatency(const struct dml2_display_cfg * display_cfg,unsigned int ROBBufferSizeInKByte,unsigned int RoundTripPingLatencyCycles,unsigned int ReorderingBytes,double DCFCLK,double FabricClock,unsigned int PixelChunkSizeInKByte,double ReturnBW,unsigned int NumberOfActiveSurfaces,unsigned int NumberOfDPP[],unsigned int dpte_group_bytes[],unsigned int tdlut_bytes_per_group[],double HostVMInefficiencyFactor,double HostVMInefficiencyFactorPrefetch,unsigned int HostVMMinPageSize,enum dml2_qos_param_type qos_type,bool max_oustanding_when_urgent_expected,unsigned int max_outstanding_requests,unsigned int request_size_bytes_luma[],unsigned int request_size_bytes_chroma[],unsigned int MetaChunkSize,unsigned int dchub_arb_to_ret_delay,double Ttrip,unsigned int hostvm_mode,double * ExtraLatency,double * ExtraLatency_sr,double * ExtraLatencyPrefetch)5033 static void CalculateExtraLatency(
5034 const struct dml2_display_cfg *display_cfg,
5035 unsigned int ROBBufferSizeInKByte,
5036 unsigned int RoundTripPingLatencyCycles,
5037 unsigned int ReorderingBytes,
5038 double DCFCLK,
5039 double FabricClock,
5040 unsigned int PixelChunkSizeInKByte,
5041 double ReturnBW,
5042 unsigned int NumberOfActiveSurfaces,
5043 unsigned int NumberOfDPP[],
5044 unsigned int dpte_group_bytes[],
5045 unsigned int tdlut_bytes_per_group[],
5046 double HostVMInefficiencyFactor,
5047 double HostVMInefficiencyFactorPrefetch,
5048 unsigned int HostVMMinPageSize,
5049 enum dml2_qos_param_type qos_type,
5050 bool max_oustanding_when_urgent_expected,
5051 unsigned int max_outstanding_requests,
5052 unsigned int request_size_bytes_luma[],
5053 unsigned int request_size_bytes_chroma[],
5054 unsigned int MetaChunkSize,
5055 unsigned int dchub_arb_to_ret_delay,
5056 double Ttrip,
5057 unsigned int hostvm_mode,
5058
5059 // output
5060 double *ExtraLatency, // Tex
5061 double *ExtraLatency_sr, // Tex_sr
5062 double *ExtraLatencyPrefetch)
5063
5064 {
5065 double Tarb;
5066 double Tarb_prefetch;
5067 double Tex_trips;
5068 unsigned int max_request_size_bytes = 0;
5069
5070 CalculateTarb(
5071 display_cfg,
5072 PixelChunkSizeInKByte,
5073 NumberOfActiveSurfaces,
5074 NumberOfDPP,
5075 dpte_group_bytes,
5076 tdlut_bytes_per_group,
5077 HostVMInefficiencyFactor,
5078 HostVMInefficiencyFactorPrefetch,
5079 HostVMMinPageSize,
5080 ReturnBW,
5081 MetaChunkSize,
5082 // output
5083 &Tarb,
5084 &Tarb_prefetch);
5085
5086 Tex_trips = (display_cfg->hostvm_enable && hostvm_mode == 1) ? (2.0 * Ttrip) : 0.0;
5087
5088 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
5089 if (request_size_bytes_luma[k] > max_request_size_bytes)
5090 max_request_size_bytes = request_size_bytes_luma[k];
5091 if (request_size_bytes_chroma[k] > max_request_size_bytes)
5092 max_request_size_bytes = request_size_bytes_chroma[k];
5093 }
5094
5095 if (qos_type == dml2_qos_param_type_dcn4x) {
5096 *ExtraLatency_sr = dchub_arb_to_ret_delay / DCFCLK;
5097 *ExtraLatency = *ExtraLatency_sr;
5098 if (max_oustanding_when_urgent_expected)
5099 *ExtraLatency = *ExtraLatency + (ROBBufferSizeInKByte * 1024 - max_outstanding_requests * max_request_size_bytes) / ReturnBW;
5100 } else {
5101 *ExtraLatency_sr = dchub_arb_to_ret_delay / DCFCLK + RoundTripPingLatencyCycles / FabricClock + ReorderingBytes / ReturnBW;
5102 *ExtraLatency = *ExtraLatency_sr;
5103 }
5104 *ExtraLatency = *ExtraLatency + Tex_trips;
5105 *ExtraLatencyPrefetch = *ExtraLatency + Tarb_prefetch;
5106 *ExtraLatency = *ExtraLatency + Tarb;
5107 *ExtraLatency_sr = *ExtraLatency_sr + Tarb;
5108
5109 #ifdef __DML_VBA_DEBUG__
5110 dml2_printf("DML::%s: qos_type=%u\n", __func__, qos_type);
5111 dml2_printf("DML::%s: hostvm_mode=%u\n", __func__, hostvm_mode);
5112 dml2_printf("DML::%s: Tex_trips=%u\n", __func__, Tex_trips);
5113 dml2_printf("DML::%s: max_oustanding_when_urgent_expected=%u\n", __func__, max_oustanding_when_urgent_expected);
5114 dml2_printf("DML::%s: FabricClock=%f\n", __func__, FabricClock);
5115 dml2_printf("DML::%s: DCFCLK=%f\n", __func__, DCFCLK);
5116 dml2_printf("DML::%s: ReturnBW=%f\n", __func__, ReturnBW);
5117 dml2_printf("DML::%s: RoundTripPingLatencyCycles=%u\n", __func__, RoundTripPingLatencyCycles);
5118 dml2_printf("DML::%s: ReorderingBytes=%u\n", __func__, ReorderingBytes);
5119 dml2_printf("DML::%s: Tarb=%f\n", __func__, Tarb);
5120 dml2_printf("DML::%s: ExtraLatency=%f\n", __func__, *ExtraLatency);
5121 dml2_printf("DML::%s: ExtraLatency_sr=%f\n", __func__, *ExtraLatency_sr);
5122 dml2_printf("DML::%s: ExtraLatencyPrefetch=%f\n", __func__, *ExtraLatencyPrefetch);
5123 #endif
5124 }
5125
CalculatePrefetchSchedule(struct dml2_core_internal_scratch * scratch,struct dml2_core_calcs_CalculatePrefetchSchedule_params * p)5126 static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch, struct dml2_core_calcs_CalculatePrefetchSchedule_params *p)
5127 {
5128 struct dml2_core_calcs_CalculatePrefetchSchedule_locals *s = &scratch->CalculatePrefetchSchedule_locals;
5129 bool dcc_mrq_enable;
5130
5131 unsigned int vm_bytes;
5132 unsigned int extra_tdpe_bytes;
5133 unsigned int tdlut_row_bytes;
5134 unsigned int Lo;
5135
5136 s->NoTimeToPrefetch = false;
5137 s->DPPCycles = 0;
5138 s->DISPCLKCycles = 0;
5139 s->DSTTotalPixelsAfterScaler = 0.0;
5140 s->LineTime = 0.0;
5141 s->dst_y_prefetch_equ = 0.0;
5142 s->prefetch_bw_oto = 0.0;
5143 s->Tvm_oto = 0.0;
5144 s->Tr0_oto = 0.0;
5145 s->Tvm_oto_lines = 0.0;
5146 s->Tr0_oto_lines = 0.0;
5147 s->dst_y_prefetch_oto = 0.0;
5148 s->TimeForFetchingVM = 0.0;
5149 s->TimeForFetchingRowInVBlank = 0.0;
5150 s->LinesToRequestPrefetchPixelData = 0.0;
5151 s->HostVMDynamicLevelsTrips = 0;
5152 s->trip_to_mem = 0.0;
5153 *p->Tvm_trips = 0.0;
5154 *p->Tr0_trips = 0.0;
5155 s->Tvm_trips_rounded = 0.0;
5156 s->Tr0_trips_rounded = 0.0;
5157 s->max_Tsw = 0.0;
5158 s->Lsw_oto = 0.0;
5159 *p->Tpre_rounded = 0.0;
5160 s->prefetch_bw_equ = 0.0;
5161 s->Tvm_equ = 0.0;
5162 s->Tr0_equ = 0.0;
5163 s->Tdmbf = 0.0;
5164 s->Tdmec = 0.0;
5165 s->Tdmsks = 0.0;
5166 *p->prefetch_sw_bytes = 0.0;
5167 s->prefetch_bw_pr = 0.0;
5168 s->bytes_pp = 0.0;
5169 s->dep_bytes = 0.0;
5170 s->min_Lsw_oto = 0.0;
5171 s->min_Lsw_equ = 0.0;
5172 s->Tsw_est1 = 0.0;
5173 s->Tsw_est2 = 0.0;
5174 s->Tsw_est3 = 0.0;
5175 s->cursor_prefetch_bytes = 0;
5176 *p->prefetch_cursor_bw = 0;
5177
5178 dcc_mrq_enable = (p->dcc_enable && p->mrq_present);
5179
5180 s->TWait_p = p->TWait - p->Ttrip; // TWait includes max(Turg, Ttrip) and Ttrip here is already max(Turg, Ttrip)
5181
5182 if (p->display_cfg->gpuvm_enable == true && p->display_cfg->hostvm_enable == true) {
5183 s->HostVMDynamicLevelsTrips = p->display_cfg->hostvm_max_non_cached_page_table_levels;
5184 } else {
5185 s->HostVMDynamicLevelsTrips = 0;
5186 }
5187 #ifdef __DML_VBA_DEBUG__
5188 dml2_printf("DML::%s: dcc_enable = %u\n", __func__, p->dcc_enable);
5189 dml2_printf("DML::%s: mrq_present = %u\n", __func__, p->mrq_present);
5190 dml2_printf("DML::%s: dcc_mrq_enable = %u\n", __func__, dcc_mrq_enable);
5191 dml2_printf("DML::%s: GPUVMEnable = %u\n", __func__, p->display_cfg->gpuvm_enable);
5192 dml2_printf("DML::%s: GPUVMPageTableLevels = %u\n", __func__, p->display_cfg->gpuvm_max_page_table_levels);
5193 dml2_printf("DML::%s: DCCEnable = %u\n", __func__, p->myPipe->DCCEnable);
5194 dml2_printf("DML::%s: VStartup = %u\n", __func__, p->VStartup);
5195 dml2_printf("DML::%s: HostVMEnable = %u\n", __func__, p->display_cfg->hostvm_enable);
5196 dml2_printf("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, p->HostVMInefficiencyFactor);
5197 dml2_printf("DML::%s: TWait = %f\n", __func__, p->TWait);
5198 dml2_printf("DML::%s: TWait_p = %f\n", __func__, s->TWait_p);
5199 dml2_printf("DML::%s: Ttrip = %f\n", __func__, p->Ttrip);
5200 dml2_printf("DML::%s: myPipe->Dppclk = %f\n", __func__, p->myPipe->Dppclk);
5201 dml2_printf("DML::%s: myPipe->Dispclk = %f\n", __func__, p->myPipe->Dispclk);
5202 #endif
5203 CalculateVUpdateAndDynamicMetadataParameters(
5204 p->MaxInterDCNTileRepeaters,
5205 p->myPipe->Dppclk,
5206 p->myPipe->Dispclk,
5207 p->myPipe->DCFClkDeepSleep,
5208 p->myPipe->PixelClock,
5209 p->myPipe->HTotal,
5210 p->myPipe->VBlank,
5211 p->DynamicMetadataTransmittedBytes,
5212 p->DynamicMetadataLinesBeforeActiveRequired,
5213 p->myPipe->InterlaceEnable,
5214 p->myPipe->ProgressiveToInterlaceUnitInOPP,
5215 p->TSetup,
5216
5217 // Output
5218 &s->Tdmbf,
5219 &s->Tdmec,
5220 &s->Tdmsks,
5221 p->VUpdateOffsetPix,
5222 p->VUpdateWidthPix,
5223 p->VReadyOffsetPix);
5224
5225 s->LineTime = p->myPipe->HTotal / p->myPipe->PixelClock;
5226 s->trip_to_mem = p->Ttrip;
5227 *p->Tvm_trips = p->ExtraLatencyPrefetch + math_max2(s->trip_to_mem * (p->display_cfg->gpuvm_max_page_table_levels * (s->HostVMDynamicLevelsTrips + 1)), p->Turg);
5228 if (dcc_mrq_enable)
5229 *p->Tvm_trips_flip = *p->Tvm_trips;
5230 else
5231 *p->Tvm_trips_flip = *p->Tvm_trips - s->trip_to_mem;
5232
5233 *p->Tr0_trips_flip = s->trip_to_mem * (s->HostVMDynamicLevelsTrips + 1);
5234 *p->Tr0_trips = math_max2(*p->Tr0_trips_flip, p->tdlut_opt_time / 2);
5235
5236 if (p->DynamicMetadataVMEnabled == true) {
5237 *p->Tdmdl_vm = s->TWait_p + *p->Tvm_trips;
5238 *p->Tdmdl = *p->Tdmdl_vm + p->Ttrip;
5239 } else {
5240 *p->Tdmdl_vm = 0;
5241 *p->Tdmdl = s->TWait_p + p->ExtraLatencyPrefetch + p->Ttrip; // Tex
5242 }
5243
5244 if (p->DynamicMetadataEnable == true) {
5245 if (p->VStartup * s->LineTime < *p->TSetup + *p->Tdmdl + s->Tdmbf + s->Tdmec + s->Tdmsks) {
5246 *p->NotEnoughTimeForDynamicMetadata = true;
5247 dml2_printf("DML::%s: Not Enough Time for Dynamic Meta!\n", __func__);
5248 dml2_printf("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, s->Tdmbf);
5249 dml2_printf("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, s->Tdmec);
5250 dml2_printf("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", __func__, s->Tdmsks);
5251 dml2_printf("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", __func__, *p->Tdmdl);
5252 } else {
5253 *p->NotEnoughTimeForDynamicMetadata = false;
5254 }
5255 } else {
5256 *p->NotEnoughTimeForDynamicMetadata = false;
5257 }
5258
5259 if (p->myPipe->ScalerEnabled)
5260 s->DPPCycles = (unsigned int)(p->DPPCLKDelaySubtotalPlusCNVCFormater + p->DPPCLKDelaySCL);
5261 else
5262 s->DPPCycles = (unsigned int)(p->DPPCLKDelaySubtotalPlusCNVCFormater + p->DPPCLKDelaySCLLBOnly);
5263
5264 s->DPPCycles = (unsigned int)(s->DPPCycles + p->myPipe->NumberOfCursors * p->DPPCLKDelayCNVCCursor);
5265
5266 s->DISPCLKCycles = (unsigned int)p->DISPCLKDelaySubtotal;
5267
5268 if (p->myPipe->Dppclk == 0.0 || p->myPipe->Dispclk == 0.0)
5269 return true;
5270
5271 *p->DSTXAfterScaler = (unsigned int)math_round(s->DPPCycles * p->myPipe->PixelClock / p->myPipe->Dppclk + s->DISPCLKCycles * p->myPipe->PixelClock / p->myPipe->Dispclk + p->DSCDelay);
5272 *p->DSTXAfterScaler = (unsigned int)math_round(*p->DSTXAfterScaler + (p->myPipe->ODMMode != dml2_odm_mode_bypass ? 18 : 0) + (p->myPipe->DPPPerSurface - 1) * p->DPP_RECOUT_WIDTH +
5273 ((p->myPipe->ODMMode == dml2_odm_mode_split_1to2 || p->myPipe->ODMMode == dml2_odm_mode_mso_1to2) ? (double)p->myPipe->HActive / 2.0 : 0) +
5274 ((p->myPipe->ODMMode == dml2_odm_mode_mso_1to4) ? (double)p->myPipe->HActive * 3.0 / 4.0 : 0));
5275
5276 #ifdef __DML_VBA_DEBUG__
5277 dml2_printf("DML::%s: DynamicMetadataVMEnabled = %u\n", __func__, p->DynamicMetadataVMEnabled);
5278 dml2_printf("DML::%s: DPPCycles = %u\n", __func__, s->DPPCycles);
5279 dml2_printf("DML::%s: PixelClock = %f\n", __func__, p->myPipe->PixelClock);
5280 dml2_printf("DML::%s: Dppclk = %f\n", __func__, p->myPipe->Dppclk);
5281 dml2_printf("DML::%s: DISPCLKCycles = %u\n", __func__, s->DISPCLKCycles);
5282 dml2_printf("DML::%s: DISPCLK = %f\n", __func__, p->myPipe->Dispclk);
5283 dml2_printf("DML::%s: DSCDelay = %u\n", __func__, p->DSCDelay);
5284 dml2_printf("DML::%s: ODMMode = %u\n", __func__, p->myPipe->ODMMode);
5285 dml2_printf("DML::%s: DPP_RECOUT_WIDTH = %u\n", __func__, p->DPP_RECOUT_WIDTH);
5286 dml2_printf("DML::%s: DSTXAfterScaler = %u\n", __func__, *p->DSTXAfterScaler);
5287
5288 dml2_printf("DML::%s: setup_for_tdlut = %u\n", __func__, p->setup_for_tdlut);
5289 dml2_printf("DML::%s: tdlut_opt_time = %f\n", __func__, p->tdlut_opt_time);
5290 dml2_printf("DML::%s: tdlut_pte_bytes_per_frame = %u\n", __func__, p->tdlut_pte_bytes_per_frame);
5291 dml2_printf("DML::%s: tdlut_drain_time = %f\n", __func__, p->tdlut_drain_time);
5292 #endif
5293
5294 if (p->OutputFormat == dml2_420 || (p->myPipe->InterlaceEnable && p->myPipe->ProgressiveToInterlaceUnitInOPP))
5295 *p->DSTYAfterScaler = 1;
5296 else
5297 *p->DSTYAfterScaler = 0;
5298
5299 s->DSTTotalPixelsAfterScaler = *p->DSTYAfterScaler * p->myPipe->HTotal + *p->DSTXAfterScaler;
5300 *p->DSTYAfterScaler = (unsigned int)(math_floor2(s->DSTTotalPixelsAfterScaler / p->myPipe->HTotal, 1));
5301 *p->DSTXAfterScaler = (unsigned int)(s->DSTTotalPixelsAfterScaler - ((double)(*p->DSTYAfterScaler * p->myPipe->HTotal)));
5302 #ifdef __DML_VBA_DEBUG__
5303 dml2_printf("DML::%s: DSTXAfterScaler = %u (final)\n", __func__, *p->DSTXAfterScaler);
5304 dml2_printf("DML::%s: DSTYAfterScaler = %u (final)\n", __func__, *p->DSTYAfterScaler);
5305 #endif
5306
5307 #ifdef __DML_VBA_DEBUG__
5308 dml2_printf("DML::%s: Tr0_trips = %f\n", __func__, *p->Tr0_trips);
5309 dml2_printf("DML::%s: Tvm_trips = %f\n", __func__, *p->Tvm_trips);
5310 dml2_printf("DML::%s: trip_to_mem = %f\n", __func__, s->trip_to_mem);
5311 dml2_printf("DML::%s: ExtraLatencyPrefetch = %f\n", __func__, p->ExtraLatencyPrefetch);
5312 dml2_printf("DML::%s: GPUVMPageTableLevels = %u\n", __func__, p->display_cfg->gpuvm_max_page_table_levels);
5313 dml2_printf("DML::%s: HostVMDynamicLevelsTrips = %u\n", __func__, s->HostVMDynamicLevelsTrips);
5314 #endif
5315 if (p->display_cfg->gpuvm_enable) {
5316 s->Tvm_trips_rounded = math_ceil2(4.0 * *p->Tvm_trips / s->LineTime, 1.0) / 4.0 * s->LineTime;
5317 *p->Tvm_trips_flip_rounded = math_ceil2(4.0 * *p->Tvm_trips_flip / s->LineTime, 1.0) / 4.0 * s->LineTime;
5318 } else {
5319 if (p->DynamicMetadataEnable || dcc_mrq_enable || p->setup_for_tdlut)
5320 s->Tvm_trips_rounded = math_max2(s->LineTime * math_ceil2(4.0*math_max3(p->ExtraLatencyPrefetch, p->Turg, s->trip_to_mem)/s->LineTime, 1)/4, s->LineTime/4.0);
5321 else
5322 s->Tvm_trips_rounded = s->LineTime / 4.0;
5323 *p->Tvm_trips_flip_rounded = s->LineTime / 4.0;
5324 }
5325
5326 s->Tvm_trips_rounded = math_max2(s->Tvm_trips_rounded, s->LineTime / 4.0);
5327 *p->Tvm_trips_flip_rounded = math_max2(*p->Tvm_trips_flip_rounded, s->LineTime / 4.0);
5328
5329 if (p->display_cfg->gpuvm_enable == true || p->setup_for_tdlut || dcc_mrq_enable) {
5330 s->Tr0_trips_rounded = math_ceil2(4.0 * *p->Tr0_trips / s->LineTime, 1.0) / 4.0 * s->LineTime;
5331 *p->Tr0_trips_flip_rounded = math_ceil2(4.0 * *p->Tr0_trips_flip / s->LineTime, 1.0) / 4.0 * s->LineTime;
5332 } else {
5333 s->Tr0_trips_rounded = s->LineTime / 4.0;
5334 *p->Tr0_trips_flip_rounded = s->LineTime / 4.0;
5335 }
5336 s->Tr0_trips_rounded = math_max2(s->Tr0_trips_rounded, s->LineTime / 4.0);
5337 *p->Tr0_trips_flip_rounded = math_max2(*p->Tr0_trips_flip_rounded, s->LineTime / 4.0);
5338
5339 if (p->display_cfg->gpuvm_enable == true) {
5340 if (p->display_cfg->gpuvm_max_page_table_levels >= 3) {
5341 *p->Tno_bw = p->ExtraLatencyPrefetch + s->trip_to_mem * (double)((p->display_cfg->gpuvm_max_page_table_levels - 2) * (s->HostVMDynamicLevelsTrips + 1));
5342 } else if (p->display_cfg->gpuvm_max_page_table_levels == 1 && !dcc_mrq_enable && !p->setup_for_tdlut) {
5343 *p->Tno_bw = p->ExtraLatencyPrefetch;
5344 } else {
5345 *p->Tno_bw = 0;
5346 }
5347 } else {
5348 *p->Tno_bw = 0;
5349 }
5350
5351 if (p->mrq_present || p->display_cfg->gpuvm_max_page_table_levels >= 3)
5352 *p->Tno_bw_flip = *p->Tno_bw;
5353 else
5354 *p->Tno_bw_flip = 0; //because there is no 3DLUT for iFlip
5355
5356 if (dml_is_420(p->myPipe->SourcePixelFormat)) {
5357 s->bytes_pp = p->myPipe->BytePerPixelY + p->myPipe->BytePerPixelC / 4.0;
5358 } else {
5359 s->bytes_pp = p->myPipe->BytePerPixelY + p->myPipe->BytePerPixelC;
5360 }
5361
5362 *p->prefetch_sw_bytes = p->PrefetchSourceLinesY * p->swath_width_luma_ub * p->myPipe->BytePerPixelY + p->PrefetchSourceLinesC * p->swath_width_chroma_ub * p->myPipe->BytePerPixelC;
5363 *p->prefetch_sw_bytes = *p->prefetch_sw_bytes * p->mall_prefetch_sdp_overhead_factor;
5364
5365 vm_bytes = p->vm_bytes; // vm_bytes is dpde0_bytes_per_frame_ub_l + dpde0_bytes_per_frame_ub_c + 2*extra_dpde_bytes;
5366 extra_tdpe_bytes = (unsigned int)math_max2(0, (p->display_cfg->gpuvm_max_page_table_levels - 1) * 128);
5367
5368 if (p->setup_for_tdlut)
5369 vm_bytes = vm_bytes + p->tdlut_pte_bytes_per_frame + (p->display_cfg->gpuvm_enable ? extra_tdpe_bytes : 0);
5370
5371 tdlut_row_bytes = (unsigned long) math_ceil2(p->tdlut_bytes_per_frame/2.0, 1.0);
5372
5373 s->min_Lsw_oto = math_max2(p->PrefetchSourceLinesY, p->PrefetchSourceLinesC) / __DML2_CALCS_MAX_VRATIO_PRE_OTO__;
5374 s->min_Lsw_oto = math_max2(s->min_Lsw_oto, p->tdlut_drain_time / s->LineTime);
5375 s->min_Lsw_oto = math_max2(s->min_Lsw_oto, 2.0);
5376
5377 // use vactive swath bw for prefetch oto and also cap prefetch_bw_oto to max_vratio_oto
5378 // Note: in prefetch calculation, acounting is done mostly per-pipe.
5379 // vactive swath bw represents the per-surface (aka per dml plane) bw to move vratio_l/c lines of bytes_l/c per line time
5380 s->per_pipe_vactive_sw_bw = p->vactive_sw_bw_l / (double)p->myPipe->DPPPerSurface;
5381
5382 // one-to-one prefetch bw as one line of bytes per line time (as per vratio_pre_l/c = 1)
5383 s->prefetch_bw_oto = (p->swath_width_luma_ub * p->myPipe->BytePerPixelY) / s->LineTime;
5384
5385 if (p->myPipe->BytePerPixelC > 0) {
5386 s->per_pipe_vactive_sw_bw += p->vactive_sw_bw_c / (double)p->myPipe->DPPPerSurface;
5387 s->prefetch_bw_oto += (p->swath_width_chroma_ub * p->myPipe->BytePerPixelC) / s->LineTime;
5388 }
5389
5390 s->prefetch_bw_oto = math_max2(s->per_pipe_vactive_sw_bw, s->prefetch_bw_oto) * p->mall_prefetch_sdp_overhead_factor;
5391
5392 s->prefetch_bw_oto = math_min2(s->prefetch_bw_oto, *p->prefetch_sw_bytes/(s->min_Lsw_oto*s->LineTime));
5393
5394 s->Lsw_oto = math_ceil2(4.0 * *p->prefetch_sw_bytes / s->prefetch_bw_oto / s->LineTime, 1.0) / 4.0;
5395
5396 s->prefetch_bw_oto = math_max3(s->prefetch_bw_oto,
5397 p->vm_bytes * p->HostVMInefficiencyFactor / (31 * s->LineTime) - *p->Tno_bw,
5398 (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) / (15 * s->LineTime));
5399
5400 #ifdef __DML_VBA_DEBUG__
5401 dml2_printf("DML::%s: vactive_sw_bw_l = %f\n", __func__, p->vactive_sw_bw_l);
5402 dml2_printf("DML::%s: vactive_sw_bw_c = %f\n", __func__, p->vactive_sw_bw_c);
5403 dml2_printf("DML::%s: per_pipe_vactive_sw_bw = %f\n", __func__, s->per_pipe_vactive_sw_bw);
5404 #endif
5405
5406 if (p->display_cfg->gpuvm_enable == true) {
5407 s->Tvm_oto = math_max3(
5408 *p->Tvm_trips,
5409 *p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw_oto,
5410 s->LineTime / 4.0);
5411
5412 #ifdef __DML_VBA_DEBUG__
5413 dml2_printf("DML::%s: Tvm_oto max0 = %f\n", __func__, *p->Tvm_trips);
5414 dml2_printf("DML::%s: Tvm_oto max1 = %f\n", __func__, *p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw_oto);
5415 dml2_printf("DML::%s: Tvm_oto max2 = %f\n", __func__, s->LineTime / 4.0);
5416 #endif
5417 } else {
5418 s->Tvm_oto = s->Tvm_trips_rounded;
5419 }
5420
5421 if ((p->display_cfg->gpuvm_enable == true || p->setup_for_tdlut || dcc_mrq_enable)) {
5422 s->Tr0_oto = math_max3(
5423 *p->Tr0_trips,
5424 (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) / s->prefetch_bw_oto,
5425 s->LineTime / 4.0);
5426 #ifdef __DML_VBA_DEBUG__
5427 dml2_printf("DML::%s: Tr0_oto max0 = %f\n", __func__, *p->Tr0_trips);
5428 dml2_printf("DML::%s: Tr0_oto max1 = %f\n", __func__, (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) / s->prefetch_bw_oto);
5429 dml2_printf("DML::%s: Tr0_oto max2 = %f\n", __func__, s->LineTime / 4);
5430 #endif
5431 } else
5432 s->Tr0_oto = s->LineTime / 4.0;
5433
5434 s->Tvm_oto_lines = math_ceil2(4.0 * s->Tvm_oto / s->LineTime, 1) / 4.0;
5435 s->Tr0_oto_lines = math_ceil2(4.0 * s->Tr0_oto / s->LineTime, 1) / 4.0;
5436 s->dst_y_prefetch_oto = s->Tvm_oto_lines + 2 * s->Tr0_oto_lines + s->Lsw_oto;
5437
5438 #ifdef DML_GLOBAL_PREFETCH_CHECK
5439 dml2_printf("DML::%s: impacted_Tpre = %f\n", __func__, p->impacted_dst_y_pre);
5440 if (p->impacted_dst_y_pre > 0) {
5441 dml2_printf("DML::%s: dst_y_prefetch_oto = %f\n", __func__, s->dst_y_prefetch_oto);
5442 s->dst_y_prefetch_oto = math_max2(s->dst_y_prefetch_oto, p->impacted_dst_y_pre);
5443 dml2_printf("DML::%s: dst_y_prefetch_oto = %f (impacted)\n", __func__, s->dst_y_prefetch_oto);
5444 }
5445 #endif
5446 *p->Tpre_oto = s->dst_y_prefetch_oto * s->LineTime;
5447
5448 //To (time for delay after scaler) in line time
5449 Lo = (unsigned int)(*p->DSTYAfterScaler + (double)*p->DSTXAfterScaler / (double)p->myPipe->HTotal);
5450
5451 s->min_Lsw_equ = math_max2(p->PrefetchSourceLinesY, p->PrefetchSourceLinesC) / __DML2_CALCS_MAX_VRATIO_PRE_EQU__;
5452 s->min_Lsw_equ = math_max2(s->min_Lsw_equ, p->tdlut_drain_time / s->LineTime);
5453 s->min_Lsw_equ = math_max2(s->min_Lsw_equ, 2.0);
5454 //Tpre_equ in line time
5455 if (p->DynamicMetadataVMEnabled && p->DynamicMetadataEnable)
5456 s->dst_y_prefetch_equ = p->VStartup - (*p->TSetup + math_max2(p->TCalc, *p->Tvm_trips) + s->TWait_p) / s->LineTime - Lo;
5457 else
5458 s->dst_y_prefetch_equ = p->VStartup - (*p->TSetup + math_max2(p->TCalc, p->ExtraLatencyPrefetch) + s->TWait_p) / s->LineTime - Lo;
5459
5460 #ifdef DML_GLOBAL_PREFETCH_CHECK
5461 s->dst_y_prefetch_equ_impacted = math_max2(p->impacted_dst_y_pre, s->dst_y_prefetch_equ);
5462
5463 s->dst_y_prefetch_equ_impacted = math_min2(s->dst_y_prefetch_equ_impacted, 63.75); // limit to the reg limit of U6.2 for DST_Y_PREFETCH
5464
5465 if (s->dst_y_prefetch_equ_impacted > s->dst_y_prefetch_equ)
5466 s->dst_y_prefetch_equ -= s->dst_y_prefetch_equ_impacted - s->dst_y_prefetch_equ;
5467 #endif
5468
5469 s->dst_y_prefetch_equ = math_min2(s->dst_y_prefetch_equ, 63.75); // limit to the reg limit of U6.2 for DST_Y_PREFETCH
5470
5471 #ifdef __DML_VBA_DEBUG__
5472 dml2_printf("DML::%s: HTotal = %u\n", __func__, p->myPipe->HTotal);
5473 dml2_printf("DML::%s: min_Lsw_oto = %f\n", __func__, s->min_Lsw_oto);
5474 dml2_printf("DML::%s: min_Lsw_equ = %f\n", __func__, s->min_Lsw_equ);
5475 dml2_printf("DML::%s: Tno_bw = %f\n", __func__, *p->Tno_bw);
5476 dml2_printf("DML::%s: Tno_bw_flip = %f\n", __func__, *p->Tno_bw_flip);
5477 dml2_printf("DML::%s: ExtraLatencyPrefetch = %f\n", __func__, p->ExtraLatencyPrefetch);
5478 dml2_printf("DML::%s: trip_to_mem = %f\n", __func__, s->trip_to_mem);
5479 dml2_printf("DML::%s: mall_prefetch_sdp_overhead_factor = %f\n", __func__, p->mall_prefetch_sdp_overhead_factor);
5480 dml2_printf("DML::%s: BytePerPixelY = %u\n", __func__, p->myPipe->BytePerPixelY);
5481 dml2_printf("DML::%s: PrefetchSourceLinesY = %f\n", __func__, p->PrefetchSourceLinesY);
5482 dml2_printf("DML::%s: swath_width_luma_ub = %u\n", __func__, p->swath_width_luma_ub);
5483 dml2_printf("DML::%s: BytePerPixelC = %u\n", __func__, p->myPipe->BytePerPixelC);
5484 dml2_printf("DML::%s: PrefetchSourceLinesC = %f\n", __func__, p->PrefetchSourceLinesC);
5485 dml2_printf("DML::%s: swath_width_chroma_ub = %u\n", __func__, p->swath_width_chroma_ub);
5486 dml2_printf("DML::%s: prefetch_sw_bytes = %f\n", __func__, *p->prefetch_sw_bytes);
5487 dml2_printf("DML::%s: max_Tsw = %f\n", __func__, s->max_Tsw);
5488 dml2_printf("DML::%s: bytes_pp = %f\n", __func__, s->bytes_pp);
5489 dml2_printf("DML::%s: vm_bytes = %u\n", __func__, vm_bytes);
5490 dml2_printf("DML::%s: PixelPTEBytesPerRow = %u\n", __func__, p->PixelPTEBytesPerRow);
5491 dml2_printf("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, p->HostVMInefficiencyFactor);
5492 dml2_printf("DML::%s: Tvm_trips = %f\n", __func__, *p->Tvm_trips);
5493 dml2_printf("DML::%s: Tr0_trips = %f\n", __func__, *p->Tr0_trips);
5494 dml2_printf("DML::%s: Tvm_trips_flip = %f\n", __func__, *p->Tvm_trips_flip);
5495 dml2_printf("DML::%s: Tr0_trips_flip = %f\n", __func__, *p->Tr0_trips_flip);
5496 dml2_printf("DML::%s: prefetch_bw_pr = %f\n", __func__, s->prefetch_bw_pr);
5497 dml2_printf("DML::%s: prefetch_bw_oto = %f\n", __func__, s->prefetch_bw_oto);
5498 dml2_printf("DML::%s: Tr0_oto = %f\n", __func__, s->Tr0_oto);
5499 dml2_printf("DML::%s: Tvm_oto = %f\n", __func__, s->Tvm_oto);
5500 dml2_printf("DML::%s: Tvm_oto_lines = %f\n", __func__, s->Tvm_oto_lines);
5501 dml2_printf("DML::%s: Tr0_oto_lines = %f\n", __func__, s->Tr0_oto_lines);
5502 dml2_printf("DML::%s: Lsw_oto = %f\n", __func__, s->Lsw_oto);
5503 dml2_printf("DML::%s: dst_y_prefetch_oto = %f\n", __func__, s->dst_y_prefetch_oto);
5504 dml2_printf("DML::%s: dst_y_prefetch_equ = %f\n", __func__, s->dst_y_prefetch_equ);
5505 dml2_printf("DML::%s: tdlut_row_bytes = %d\n", __func__, tdlut_row_bytes);
5506 dml2_printf("DML::%s: meta_row_bytes = %d\n", __func__, p->meta_row_bytes);
5507 #endif
5508 double Tpre = s->dst_y_prefetch_equ * s->LineTime;
5509 s->dst_y_prefetch_equ = math_floor2(4.0 * (s->dst_y_prefetch_equ + 0.125), 1) / 4.0;
5510 *p->Tpre_rounded = s->dst_y_prefetch_equ * s->LineTime;
5511
5512 #ifdef __DML_VBA_DEBUG__
5513 dml2_printf("DML::%s: dst_y_prefetch_equ: %f (after round)\n", __func__, s->dst_y_prefetch_equ);
5514 dml2_printf("DML::%s: LineTime: %f\n", __func__, s->LineTime);
5515 dml2_printf("DML::%s: VStartup: %u\n", __func__, p->VStartup);
5516 dml2_printf("DML::%s: Tvstartup: %fus - time between vstartup and first pixel of active\n", __func__, p->VStartup * s->LineTime);
5517 dml2_printf("DML::%s: TSetup: %fus - time from vstartup to vready\n", __func__, *p->TSetup);
5518 dml2_printf("DML::%s: TCalc: %fus - time for calculations in dchub starting at vready\n", __func__, p->TCalc);
5519 dml2_printf("DML::%s: TWait: %fus - time for fabric to become ready max(pstate exit,cstate enter/exit, urgent latency) after TCalc\n", __func__, p->TWait);
5520 dml2_printf("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, s->Tdmbf);
5521 dml2_printf("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, s->Tdmec);
5522 dml2_printf("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", __func__, s->Tdmsks);
5523 dml2_printf("DML::%s: TWait = %f\n", __func__, p->TWait);
5524 dml2_printf("DML::%s: TWait_p = %f\n", __func__, s->TWait_p);
5525 dml2_printf("DML::%s: Ttrip = %f\n", __func__, p->Ttrip);
5526 dml2_printf("DML::%s: Tex = %f\n", __func__, p->ExtraLatencyPrefetch);
5527 dml2_printf("DML::%s: Tdmdl_vm: %fus - time for vm stages of dmd \n", __func__, *p->Tdmdl_vm);
5528 dml2_printf("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", __func__, *p->Tdmdl);
5529 dml2_printf("DML::%s: TWait_p: %fus\n", __func__, s->TWait_p);
5530 dml2_printf("DML::%s: Ttrip: %fus\n", __func__, p->Ttrip);
5531 dml2_printf("DML::%s: DSTXAfterScaler: %u pixels - number of pixel clocks pipeline and buffer delay after scaler \n", __func__, *p->DSTXAfterScaler);
5532 dml2_printf("DML::%s: DSTYAfterScaler: %u lines - number of lines of pipeline and buffer delay after scaler \n", __func__, *p->DSTYAfterScaler);
5533 dml2_printf("DML::%s: vm_bytes: %f (hvm inefficiency scaled)\n", __func__, vm_bytes*p->HostVMInefficiencyFactor);
5534 dml2_printf("DML::%s: row_bytes: %f (hvm inefficiency scaled, 1 row)\n", __func__, p->PixelPTEBytesPerRow*p->HostVMInefficiencyFactor+p->meta_row_bytes+tdlut_row_bytes);
5535 dml2_printf("DML::%s: Tno_bw: %f\n", __func__, *p->Tno_bw);
5536 dml2_printf("DML::%s: Tpre=%f Tpre_rounded: %f, delta=%f\n", __func__, Tpre, *p->Tpre_rounded, (*p->Tpre_rounded - Tpre));
5537 dml2_printf("DML::%s: Tvm_trips=%f Tvm_trips_rounded: %f, delta=%f\n", __func__, *p->Tvm_trips, s->Tvm_trips_rounded, (s->Tvm_trips_rounded - *p->Tvm_trips));
5538 #endif
5539
5540 *p->dst_y_per_vm_vblank = 0;
5541 *p->dst_y_per_row_vblank = 0;
5542 *p->VRatioPrefetchY = 0;
5543 *p->VRatioPrefetchC = 0;
5544 *p->RequiredPrefetchPixelDataBWLuma = 0;
5545
5546 // Derive bandwidth by finding how much data to move within the time constraint
5547 // Tpre_rounded is Tpre rounding to 2-bit fraction
5548 // Tvm_trips_rounded is Tvm_trips ceiling to 1/4 line time
5549 // Tr0_trips_rounded is Tr0_trips ceiling to 1/4 line time
5550 // So that means prefetch bw calculated can be higher since the total time available for prefetch is less
5551 bool min_Lsw_equ_ok = *p->Tpre_rounded >= s->Tvm_trips_rounded + 2.0*s->Tr0_trips_rounded + s->min_Lsw_equ*s->LineTime;
5552 bool tpre_gt_req_latency = true;
5553 #if 0
5554 // Check that Tpre_rounded is big enough if all of the stages of the prefetch are time constrained.
5555 // The terms Tvm_trips_rounded and Tr0_trips_rounded represent the min time constraints for the VM and row stages.
5556 // Normally, these terms cover the overall time constraint for Tpre >= (Tex + max{Ttrip, Turg}), but if these terms are at their minimum, an explicit check is necessary.
5557 tpre_gt_req_latency = *p->Tpre_rounded > (math_max2(p->Turg, s->trip_to_mem) + p->ExtraLatencyPrefetch);
5558 #endif
5559
5560 if (s->dst_y_prefetch_equ > 1 && min_Lsw_equ_ok && tpre_gt_req_latency) {
5561 s->prefetch_bw1 = 0.;
5562 s->prefetch_bw2 = 0.;
5563 s->prefetch_bw3 = 0.;
5564 s->prefetch_bw4 = 0.;
5565
5566 // prefetch_bw1: VM + 2*R0 + SW
5567 if (*p->Tpre_rounded - *p->Tno_bw > 0) {
5568 s->prefetch_bw1 = (vm_bytes * p->HostVMInefficiencyFactor
5569 + 2 * (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes)
5570 + *p->prefetch_sw_bytes)
5571 / (*p->Tpre_rounded - *p->Tno_bw);
5572 s->Tsw_est1 = *p->prefetch_sw_bytes / s->prefetch_bw1;
5573 } else
5574 s->prefetch_bw1 = 0;
5575
5576 dml2_printf("DML::%s: prefetch_bw1: %f\n", __func__, s->prefetch_bw1);
5577 if ((s->Tsw_est1 < s->min_Lsw_equ * s->LineTime) && (*p->Tpre_rounded - s->min_Lsw_equ * s->LineTime - 0.75 * s->LineTime - *p->Tno_bw > 0)) {
5578 s->prefetch_bw1 = (vm_bytes * p->HostVMInefficiencyFactor + 2 * (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes)) /
5579 (*p->Tpre_rounded - s->min_Lsw_equ * s->LineTime - 0.75 * s->LineTime - *p->Tno_bw);
5580 #ifdef __DML_VBA_DEBUG__
5581 dml2_printf("DML::%s: vm and 2 rows bytes = %f\n", __func__, (vm_bytes * p->HostVMInefficiencyFactor + 2 * (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes)));
5582 dml2_printf("DML::%s: Tpre_rounded = %f\n", __func__, *p->Tpre_rounded);
5583 dml2_printf("DML::%s: minus term = %f\n", __func__, s->min_Lsw_equ * s->LineTime + 0.75 * s->LineTime + *p->Tno_bw);
5584 dml2_printf("DML::%s: min_Lsw_equ = %f\n", __func__, s->min_Lsw_equ);
5585 dml2_printf("DML::%s: LineTime = %f\n", __func__, s->LineTime);
5586 dml2_printf("DML::%s: Tno_bw = %f\n", __func__, *p->Tno_bw);
5587 dml2_printf("DML::%s: Time to fetch vm and 2 rows = %f\n", __func__, (*p->Tpre_rounded - s->min_Lsw_equ * s->LineTime - 0.75 * s->LineTime - *p->Tno_bw));
5588 dml2_printf("DML::%s: prefetch_bw1: %f (updated)\n", __func__, s->prefetch_bw1);
5589 #endif
5590 }
5591
5592 // prefetch_bw2: VM + SW
5593 if (*p->Tpre_rounded - *p->Tno_bw - 2.0 * s->Tr0_trips_rounded > 0) {
5594 s->prefetch_bw2 = (vm_bytes * p->HostVMInefficiencyFactor + *p->prefetch_sw_bytes) /
5595 (*p->Tpre_rounded - *p->Tno_bw - 2.0 * s->Tr0_trips_rounded);
5596 s->Tsw_est2 = *p->prefetch_sw_bytes / s->prefetch_bw2;
5597 } else
5598 s->prefetch_bw2 = 0;
5599
5600 dml2_printf("DML::%s: prefetch_bw2: %f\n", __func__, s->prefetch_bw2);
5601 if ((s->Tsw_est2 < s->min_Lsw_equ * s->LineTime) && ((*p->Tpre_rounded - *p->Tno_bw - 2.0 * s->Tr0_trips_rounded - s->min_Lsw_equ * s->LineTime - 0.25 * s->LineTime) > 0)) {
5602 s->prefetch_bw2 = vm_bytes * p->HostVMInefficiencyFactor / (*p->Tpre_rounded - *p->Tno_bw - 2.0 * s->Tr0_trips_rounded - s->min_Lsw_equ * s->LineTime - 0.25 * s->LineTime);
5603 dml2_printf("DML::%s: prefetch_bw2: %f (updated)\n", __func__, s->prefetch_bw2);
5604 }
5605
5606 // prefetch_bw3: 2*R0 + SW
5607 if (*p->Tpre_rounded - s->Tvm_trips_rounded > 0) {
5608 s->prefetch_bw3 = (2 * (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) + *p->prefetch_sw_bytes) /
5609 (*p->Tpre_rounded - s->Tvm_trips_rounded);
5610 s->Tsw_est3 = *p->prefetch_sw_bytes / s->prefetch_bw3;
5611 } else
5612 s->prefetch_bw3 = 0;
5613
5614 dml2_printf("DML::%s: prefetch_bw3: %f\n", __func__, s->prefetch_bw3);
5615 if ((s->Tsw_est3 < s->min_Lsw_equ * s->LineTime) && ((*p->Tpre_rounded - s->min_Lsw_equ * s->LineTime - 0.5 * s->LineTime - s->Tvm_trips_rounded) > 0)) {
5616 s->prefetch_bw3 = (2 * (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes)) / (*p->Tpre_rounded - s->min_Lsw_equ * s->LineTime - 0.5 * s->LineTime - s->Tvm_trips_rounded);
5617 dml2_printf("DML::%s: prefetch_bw3: %f (updated)\n", __func__, s->prefetch_bw3);
5618 }
5619
5620 // prefetch_bw4: SW
5621 if (*p->Tpre_rounded - s->Tvm_trips_rounded - 2 * s->Tr0_trips_rounded > 0)
5622 s->prefetch_bw4 = *p->prefetch_sw_bytes / (*p->Tpre_rounded - s->Tvm_trips_rounded - 2 * s->Tr0_trips_rounded);
5623 else
5624 s->prefetch_bw4 = 0;
5625
5626 #ifdef __DML_VBA_DEBUG__
5627 dml2_printf("DML::%s: Tno_bw: %f\n", __func__, *p->Tno_bw);
5628 dml2_printf("DML::%s: Tpre=%f Tpre_rounded: %f, delta=%f\n", __func__, Tpre, *p->Tpre_rounded, (*p->Tpre_rounded - Tpre));
5629 dml2_printf("DML::%s: Tvm_trips=%f Tvm_trips_rounded: %f, delta=%f\n", __func__, *p->Tvm_trips, s->Tvm_trips_rounded, (s->Tvm_trips_rounded - *p->Tvm_trips));
5630 dml2_printf("DML::%s: Tr0_trips=%f Tr0_trips_rounded: %f, delta=%f\n", __func__, *p->Tr0_trips, s->Tr0_trips_rounded, (s->Tr0_trips_rounded - *p->Tr0_trips));
5631 dml2_printf("DML::%s: Tsw_est1: %f\n", __func__, s->Tsw_est1);
5632 dml2_printf("DML::%s: Tsw_est2: %f\n", __func__, s->Tsw_est2);
5633 dml2_printf("DML::%s: Tsw_est3: %f\n", __func__, s->Tsw_est3);
5634 dml2_printf("DML::%s: prefetch_bw1: %f (final)\n", __func__, s->prefetch_bw1);
5635 dml2_printf("DML::%s: prefetch_bw2: %f (final)\n", __func__, s->prefetch_bw2);
5636 dml2_printf("DML::%s: prefetch_bw3: %f (final)\n", __func__, s->prefetch_bw3);
5637 dml2_printf("DML::%s: prefetch_bw4: %f (final)\n", __func__, s->prefetch_bw4);
5638 #endif
5639 {
5640 bool Case1OK = false;
5641 bool Case2OK = false;
5642 bool Case3OK = false;
5643
5644 // get "equalized" bw among all stages (vm, r0, sw), so based is all 3 stages are just above the latency-based requirement
5645 // so it is not too dis-portionally favor a particular stage, next is either r0 more agressive and next is vm more agressive, the worst is all are agressive
5646 // vs the latency based number
5647
5648 // prefetch_bw1: VM + 2*R0 + SW
5649 // so prefetch_bw1 will have enough bw to transfer the necessary data within Tpre_rounded - Tno_bw (Tpre is the the worst-case latency based time to fetch the data)
5650 // here is to make sure equ bw wont be more agressive than the latency-based requirement.
5651 // check vm time >= vm_trips
5652 // check r0 time >= r0_trips
5653
5654 double total_row_bytes = (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes);
5655
5656 dml2_printf("DML::%s: Tvm_trips_rounded = %f\n", __func__, s->Tvm_trips_rounded);
5657 dml2_printf("DML::%s: Tr0_trips_rounded = %f\n", __func__, s->Tr0_trips_rounded);
5658
5659 if (s->prefetch_bw1 > 0) {
5660 double vm_transfer_time = *p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw1;
5661 double row_transfer_time = total_row_bytes / s->prefetch_bw1;
5662 dml2_printf("DML::%s: Case1: vm_transfer_time = %f\n", __func__, vm_transfer_time);
5663 dml2_printf("DML::%s: Case1: row_transfer_time = %f\n", __func__, row_transfer_time);
5664 if (vm_transfer_time >= s->Tvm_trips_rounded && row_transfer_time >= s->Tr0_trips_rounded) {
5665 Case1OK = true;
5666 }
5667 }
5668
5669 // prefetch_bw2: VM + SW
5670 // prefetch_bw2 will be enough bw to transfer VM and SW data within (Tpre_rounded - Tr0_trips_rounded - Tno_bw)
5671 // check vm time >= vm_trips
5672 // check r0 time < r0_trips
5673 if (s->prefetch_bw2 > 0) {
5674 double vm_transfer_time = *p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw2;
5675 double row_transfer_time = total_row_bytes / s->prefetch_bw2;
5676 dml2_printf("DML::%s: Case2: vm_transfer_time = %f\n", __func__, vm_transfer_time);
5677 dml2_printf("DML::%s: Case2: row_transfer_time = %f\n", __func__, row_transfer_time);
5678 if (vm_transfer_time >= s->Tvm_trips_rounded && row_transfer_time < s->Tr0_trips_rounded) {
5679 Case2OK = true;
5680 }
5681 }
5682
5683 // prefetch_bw3: VM + 2*R0
5684 // check vm time < vm_trips
5685 // check r0 time >= r0_trips
5686 if (s->prefetch_bw3 > 0) {
5687 double vm_transfer_time = *p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw3;
5688 double row_transfer_time = total_row_bytes / s->prefetch_bw3;
5689 dml2_printf("DML::%s: Case3: vm_transfer_time = %f\n", __func__, vm_transfer_time);
5690 dml2_printf("DML::%s: Case3: row_transfer_time = %f\n", __func__, row_transfer_time);
5691 if (vm_transfer_time < s->Tvm_trips_rounded && row_transfer_time >= s->Tr0_trips_rounded) {
5692 Case3OK = true;
5693 }
5694 }
5695
5696 if (Case1OK) {
5697 s->prefetch_bw_equ = s->prefetch_bw1;
5698 } else if (Case2OK) {
5699 s->prefetch_bw_equ = s->prefetch_bw2;
5700 } else if (Case3OK) {
5701 s->prefetch_bw_equ = s->prefetch_bw3;
5702 } else {
5703 s->prefetch_bw_equ = s->prefetch_bw4;
5704 }
5705
5706 s->prefetch_bw_equ = math_max3(s->prefetch_bw_equ,
5707 p->vm_bytes * p->HostVMInefficiencyFactor / (31 * s->LineTime) - *p->Tno_bw,
5708 (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) / (15 * s->LineTime));
5709 #ifdef __DML_VBA_DEBUG__
5710 dml2_printf("DML::%s: Case1OK: %u\n", __func__, Case1OK);
5711 dml2_printf("DML::%s: Case2OK: %u\n", __func__, Case2OK);
5712 dml2_printf("DML::%s: Case3OK: %u\n", __func__, Case3OK);
5713 dml2_printf("DML::%s: prefetch_bw_equ: %f\n", __func__, s->prefetch_bw_equ);
5714 #endif
5715
5716 if (s->prefetch_bw_equ > 0) {
5717 if (p->display_cfg->gpuvm_enable == true) {
5718 s->Tvm_equ = math_max3(*p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw_equ, *p->Tvm_trips, s->LineTime / 4);
5719 } else {
5720 s->Tvm_equ = s->LineTime / 4;
5721 }
5722
5723 if (p->display_cfg->gpuvm_enable == true || dcc_mrq_enable || p->setup_for_tdlut) {
5724 s->Tr0_equ = math_max3((p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) / s->prefetch_bw_equ, // PixelPTEBytesPerRow is dpte_row_bytes
5725 *p->Tr0_trips,
5726 s->LineTime / 4);
5727 } else {
5728 s->Tr0_equ = s->LineTime / 4;
5729 }
5730 } else {
5731 s->Tvm_equ = 0;
5732 s->Tr0_equ = 0;
5733 dml2_printf("DML::%s: prefetch_bw_equ equals 0!\n", __func__);
5734 }
5735 }
5736 #ifdef __DML_VBA_DEBUG__
5737 dml2_printf("DML::%s: Tvm_equ = %f\n", __func__, s->Tvm_equ);
5738 dml2_printf("DML::%s: Tr0_equ = %f\n", __func__, s->Tr0_equ);
5739 #endif
5740 // Use the more stressful prefetch schedule
5741 if (s->dst_y_prefetch_oto < s->dst_y_prefetch_equ) {
5742 *p->dst_y_prefetch = s->dst_y_prefetch_oto;
5743 s->TimeForFetchingVM = s->Tvm_oto;
5744 s->TimeForFetchingRowInVBlank = s->Tr0_oto;
5745
5746 *p->dst_y_per_vm_vblank = math_ceil2(4.0 * s->TimeForFetchingVM / s->LineTime, 1.0) / 4.0;
5747 *p->dst_y_per_row_vblank = math_ceil2(4.0 * s->TimeForFetchingRowInVBlank / s->LineTime, 1.0) / 4.0;
5748 #ifdef __DML_VBA_DEBUG__
5749 dml2_printf("DML::%s: Using oto scheduling for prefetch\n", __func__);
5750 #endif
5751
5752 } else {
5753 *p->dst_y_prefetch = s->dst_y_prefetch_equ;
5754
5755 if (s->dst_y_prefetch_equ < s->dst_y_prefetch_equ_impacted)
5756 *p->dst_y_prefetch = s->dst_y_prefetch_equ_impacted;
5757
5758 s->TimeForFetchingVM = s->Tvm_equ;
5759 s->TimeForFetchingRowInVBlank = s->Tr0_equ;
5760
5761 *p->dst_y_per_vm_vblank = math_ceil2(4.0 * s->TimeForFetchingVM / s->LineTime, 1.0) / 4.0;
5762 *p->dst_y_per_row_vblank = math_ceil2(4.0 * s->TimeForFetchingRowInVBlank / s->LineTime, 1.0) / 4.0;
5763
5764 #ifdef __DML_VBA_DEBUG__
5765 dml2_printf("DML::%s: Using equ bw scheduling for prefetch\n", __func__);
5766 #endif
5767 }
5768
5769 // Lsw = dst_y_prefetch - (dst_y_per_vm_vblank + 2*dst_y_per_row_vblank)
5770 s->LinesToRequestPrefetchPixelData = *p->dst_y_prefetch - *p->dst_y_per_vm_vblank - 2 * *p->dst_y_per_row_vblank; // Lsw
5771
5772 s->cursor_prefetch_bytes = (unsigned int)math_max2(p->cursor_bytes_per_chunk, 4 * p->cursor_bytes_per_line);
5773 *p->prefetch_cursor_bw = p->num_cursors * s->cursor_prefetch_bytes / (s->LinesToRequestPrefetchPixelData * s->LineTime);
5774 *p->prefetch_swath_time_us = (s->LinesToRequestPrefetchPixelData * s->LineTime);
5775
5776 #ifdef __DML_VBA_DEBUG__
5777 dml2_printf("DML::%s: TimeForFetchingVM = %f\n", __func__, s->TimeForFetchingVM);
5778 dml2_printf("DML::%s: TimeForFetchingRowInVBlank = %f\n", __func__, s->TimeForFetchingRowInVBlank);
5779 dml2_printf("DML::%s: LineTime = %f\n", __func__, s->LineTime);
5780 dml2_printf("DML::%s: dst_y_prefetch = %f\n", __func__, *p->dst_y_prefetch);
5781 dml2_printf("DML::%s: dst_y_per_vm_vblank = %f\n", __func__, *p->dst_y_per_vm_vblank);
5782 dml2_printf("DML::%s: dst_y_per_row_vblank = %f\n", __func__, *p->dst_y_per_row_vblank);
5783 dml2_printf("DML::%s: LinesToRequestPrefetchPixelData = %f\n", __func__, s->LinesToRequestPrefetchPixelData);
5784 dml2_printf("DML::%s: PrefetchSourceLinesY = %f\n", __func__, p->PrefetchSourceLinesY);
5785 dml2_printf("DML::%s: prefetch_swath_time_us = %f\n", __func__, *p->prefetch_swath_time_us);
5786
5787 dml2_printf("DML::%s: cursor_bytes_per_chunk = %d\n", __func__, p->cursor_bytes_per_chunk);
5788 dml2_printf("DML::%s: cursor_bytes_per_line = %d\n", __func__, p->cursor_bytes_per_line);
5789 dml2_printf("DML::%s: cursor_prefetch_bytes = %d\n", __func__, s->cursor_prefetch_bytes);
5790 dml2_printf("DML::%s: prefetch_cursor_bw = %f\n", __func__, *p->prefetch_cursor_bw);
5791 #endif
5792 dml2_assert(*p->dst_y_prefetch < 64);
5793
5794 unsigned int min_lsw_required = (unsigned int)math_max2(2, p->tdlut_drain_time / s->LineTime);
5795 if (s->LinesToRequestPrefetchPixelData >= min_lsw_required && s->prefetch_bw_equ > 0) {
5796 *p->VRatioPrefetchY = (double)p->PrefetchSourceLinesY / s->LinesToRequestPrefetchPixelData;
5797 *p->VRatioPrefetchY = math_max2(*p->VRatioPrefetchY, 1.0);
5798 #ifdef __DML_VBA_DEBUG__
5799 dml2_printf("DML::%s: VRatioPrefetchY = %f\n", __func__, *p->VRatioPrefetchY);
5800 dml2_printf("DML::%s: SwathHeightY = %u\n", __func__, p->SwathHeightY);
5801 dml2_printf("DML::%s: VInitPreFillY = %u\n", __func__, p->VInitPreFillY);
5802 #endif
5803 if ((p->SwathHeightY > 4) && (p->VInitPreFillY > 3)) {
5804 if (s->LinesToRequestPrefetchPixelData > (p->VInitPreFillY - 3.0) / 2.0) {
5805 *p->VRatioPrefetchY = math_max2(*p->VRatioPrefetchY,
5806 (double)p->MaxNumSwathY * p->SwathHeightY / (s->LinesToRequestPrefetchPixelData - (p->VInitPreFillY - 3.0) / 2.0));
5807 } else {
5808 s->NoTimeToPrefetch = true;
5809 dml2_printf("DML::%s: No time to prefetch!. LinesToRequestPrefetchPixelData=%f VinitPreFillY=%u\n", __func__, s->LinesToRequestPrefetchPixelData, p->VInitPreFillY);
5810 *p->VRatioPrefetchY = 0;
5811 }
5812 #ifdef __DML_VBA_DEBUG__
5813 dml2_printf("DML::%s: VRatioPrefetchY = %f\n", __func__, *p->VRatioPrefetchY);
5814 dml2_printf("DML::%s: PrefetchSourceLinesY = %f\n", __func__, p->PrefetchSourceLinesY);
5815 dml2_printf("DML::%s: MaxNumSwathY = %u\n", __func__, p->MaxNumSwathY);
5816 #endif
5817 }
5818
5819 *p->VRatioPrefetchC = (double)p->PrefetchSourceLinesC / s->LinesToRequestPrefetchPixelData;
5820 *p->VRatioPrefetchC = math_max2(*p->VRatioPrefetchC, 1.0);
5821
5822 #ifdef __DML_VBA_DEBUG__
5823 dml2_printf("DML::%s: VRatioPrefetchC = %f\n", __func__, *p->VRatioPrefetchC);
5824 dml2_printf("DML::%s: SwathHeightC = %u\n", __func__, p->SwathHeightC);
5825 dml2_printf("DML::%s: VInitPreFillC = %u\n", __func__, p->VInitPreFillC);
5826 #endif
5827 if ((p->SwathHeightC > 4) && (p->VInitPreFillC > 3)) {
5828 if (s->LinesToRequestPrefetchPixelData > (p->VInitPreFillC - 3.0) / 2.0) {
5829 *p->VRatioPrefetchC = math_max2(*p->VRatioPrefetchC, (double)p->MaxNumSwathC * p->SwathHeightC / (s->LinesToRequestPrefetchPixelData - (p->VInitPreFillC - 3.0) / 2.0));
5830 } else {
5831 s->NoTimeToPrefetch = true;
5832 dml2_printf("DML::%s: No time to prefetch!. LinesToRequestPrefetchPixelData=%f VInitPreFillC=%u\n", __func__, s->LinesToRequestPrefetchPixelData, p->VInitPreFillC);
5833 *p->VRatioPrefetchC = 0;
5834 }
5835 #ifdef __DML_VBA_DEBUG__
5836 dml2_printf("DML::%s: VRatioPrefetchC = %f\n", __func__, *p->VRatioPrefetchC);
5837 dml2_printf("DML::%s: PrefetchSourceLinesC = %f\n", __func__, p->PrefetchSourceLinesC);
5838 dml2_printf("DML::%s: MaxNumSwathC = %u\n", __func__, p->MaxNumSwathC);
5839 #endif
5840 }
5841
5842 *p->RequiredPrefetchPixelDataBWLuma = (double)p->PrefetchSourceLinesY / s->LinesToRequestPrefetchPixelData * p->myPipe->BytePerPixelY * p->swath_width_luma_ub / s->LineTime;
5843 *p->RequiredPrefetchPixelDataBWChroma = (double)p->PrefetchSourceLinesC / s->LinesToRequestPrefetchPixelData * p->myPipe->BytePerPixelC * p->swath_width_chroma_ub / s->LineTime;
5844
5845 #ifdef __DML_VBA_DEBUG__
5846 dml2_printf("DML::%s: BytePerPixelY = %u\n", __func__, p->myPipe->BytePerPixelY);
5847 dml2_printf("DML::%s: swath_width_luma_ub = %u\n", __func__, p->swath_width_luma_ub);
5848 dml2_printf("DML::%s: LineTime = %f\n", __func__, s->LineTime);
5849 dml2_printf("DML::%s: RequiredPrefetchPixelDataBWLuma = %f\n", __func__, *p->RequiredPrefetchPixelDataBWLuma);
5850 dml2_printf("DML::%s: RequiredPrefetchPixelDataBWChroma = %f\n", __func__, *p->RequiredPrefetchPixelDataBWChroma);
5851 #endif
5852 } else {
5853 s->NoTimeToPrefetch = true;
5854 dml2_printf("DML::%s: No time to prefetch!, LinesToRequestPrefetchPixelData: %f, should be >= %d\n", __func__, s->LinesToRequestPrefetchPixelData, min_lsw_required);
5855 dml2_printf("DML::%s: No time to prefetch!, prefetch_bw_equ: %f, should be > 0\n", __func__, s->prefetch_bw_equ);
5856 *p->VRatioPrefetchY = 0;
5857 *p->VRatioPrefetchC = 0;
5858 *p->RequiredPrefetchPixelDataBWLuma = 0;
5859 *p->RequiredPrefetchPixelDataBWChroma = 0;
5860 }
5861 dml2_printf("DML: Tpre: %fus - sum of time to request 2 x data pte, swaths\n", (double)s->LinesToRequestPrefetchPixelData * s->LineTime + 2.0 * s->TimeForFetchingRowInVBlank + s->TimeForFetchingVM);
5862 dml2_printf("DML: Tvm: %fus - time to fetch vm\n", s->TimeForFetchingVM);
5863 dml2_printf("DML: Tr0: %fus - time to fetch first row of data pagetables\n", s->TimeForFetchingRowInVBlank);
5864 dml2_printf("DML: Tsw: %fus = time to fetch enough pixel data and cursor data to feed the scalers init position and detile\n", (double)s->LinesToRequestPrefetchPixelData * s->LineTime);
5865 dml2_printf("DML: To: %fus - time for propagation from scaler to optc\n", (*p->DSTYAfterScaler + ((double)(*p->DSTXAfterScaler) / (double)p->myPipe->HTotal)) * s->LineTime);
5866 dml2_printf("DML: Tvstartup - TSetup - Tcalc - TWait - Tpre - To > 0\n");
5867 dml2_printf("DML: Tslack(pre): %fus - time left over in schedule\n", p->VStartup * s->LineTime - s->TimeForFetchingVM - 2 * s->TimeForFetchingRowInVBlank - (*p->DSTYAfterScaler + ((double)(*p->DSTXAfterScaler) / (double)p->myPipe->HTotal)) * s->LineTime - p->TWait - p->TCalc - *p->TSetup);
5868 dml2_printf("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %u\n", p->PixelPTEBytesPerRow);
5869
5870 } else {
5871 dml2_printf("DML::%s: No time to prefetch! dst_y_prefetch_equ = %f (should be > 1)\n", __func__, s->dst_y_prefetch_equ);
5872 dml2_printf("DML::%s: No time to prefetch! min_Lsw_equ_ok = %d, Tpre_rounded (%f) should be >= Tvm_trips_rounded (%f) + 2.0*Tr0_trips_rounded (%f) + min_Tsw_equ (%f)\n",
5873 __func__, min_Lsw_equ_ok, *p->Tpre_rounded, s->Tvm_trips_rounded, 2.0*s->Tr0_trips_rounded, s->min_Lsw_equ*s->LineTime);
5874 dml2_printf("DML::%s: No time to prefetch! min_Lsw_equ_ok = %d, Tpre_rounded+Tvm_trips_rounded+2.0*Tr0_trips_rounded+min_Tsw_equ (%f) should be > \n",
5875 __func__, tpre_gt_req_latency, (s->min_Lsw_equ*s->LineTime + s->Tvm_trips_rounded + 2.0*s->Tr0_trips_rounded), p->Turg, s->trip_to_mem, p->ExtraLatencyPrefetch);
5876 s->NoTimeToPrefetch = true;
5877 s->TimeForFetchingVM = 0;
5878 s->TimeForFetchingRowInVBlank = 0;
5879 *p->dst_y_per_vm_vblank = 0;
5880 *p->dst_y_per_row_vblank = 0;
5881 s->LinesToRequestPrefetchPixelData = 0;
5882 *p->VRatioPrefetchY = 0;
5883 *p->VRatioPrefetchC = 0;
5884 *p->RequiredPrefetchPixelDataBWLuma = 0;
5885 *p->RequiredPrefetchPixelDataBWChroma = 0;
5886 }
5887
5888 {
5889 double prefetch_vm_bw;
5890 double prefetch_row_bw;
5891
5892 if (vm_bytes == 0) {
5893 prefetch_vm_bw = 0;
5894 } else if (*p->dst_y_per_vm_vblank > 0) {
5895 #ifdef __DML_VBA_DEBUG__
5896 dml2_printf("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, p->HostVMInefficiencyFactor);
5897 dml2_printf("DML::%s: dst_y_per_vm_vblank = %f\n", __func__, *p->dst_y_per_vm_vblank);
5898 dml2_printf("DML::%s: LineTime = %f\n", __func__, s->LineTime);
5899 #endif
5900 prefetch_vm_bw = vm_bytes * p->HostVMInefficiencyFactor / (*p->dst_y_per_vm_vblank * s->LineTime);
5901 #ifdef __DML_VBA_DEBUG__
5902 dml2_printf("DML::%s: prefetch_vm_bw = %f\n", __func__, prefetch_vm_bw);
5903 #endif
5904 } else {
5905 prefetch_vm_bw = 0;
5906 s->NoTimeToPrefetch = true;
5907 dml2_printf("DML::%s: No time to prefetch!. dst_y_per_vm_vblank=%f (should be > 0)\n", __func__, *p->dst_y_per_vm_vblank);
5908 }
5909
5910 if (p->PixelPTEBytesPerRow == 0 && tdlut_row_bytes == 0) {
5911 prefetch_row_bw = 0;
5912 } else if (*p->dst_y_per_row_vblank > 0) {
5913 prefetch_row_bw = (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + tdlut_row_bytes) / (*p->dst_y_per_row_vblank * s->LineTime);
5914
5915 #ifdef __DML_VBA_DEBUG__
5916 dml2_printf("DML::%s: PixelPTEBytesPerRow = %u\n", __func__, p->PixelPTEBytesPerRow);
5917 dml2_printf("DML::%s: dst_y_per_row_vblank = %f\n", __func__, *p->dst_y_per_row_vblank);
5918 dml2_printf("DML::%s: prefetch_row_bw = %f\n", __func__, prefetch_row_bw);
5919 #endif
5920 } else {
5921 prefetch_row_bw = 0;
5922 s->NoTimeToPrefetch = true;
5923 dml2_printf("DML::%s: No time to prefetch!. dst_y_per_row_vblank=%f (should be > 0)\n", __func__, *p->dst_y_per_row_vblank);
5924 }
5925
5926 *p->prefetch_vmrow_bw = math_max2(prefetch_vm_bw, prefetch_row_bw);
5927 }
5928
5929 if (s->NoTimeToPrefetch) {
5930 s->TimeForFetchingVM = 0;
5931 s->TimeForFetchingRowInVBlank = 0;
5932 *p->dst_y_per_vm_vblank = 0;
5933 *p->dst_y_per_row_vblank = 0;
5934 *p->dst_y_prefetch = 0;
5935 s->LinesToRequestPrefetchPixelData = 0;
5936 *p->VRatioPrefetchY = 0;
5937 *p->VRatioPrefetchC = 0;
5938 *p->RequiredPrefetchPixelDataBWLuma = 0;
5939 *p->RequiredPrefetchPixelDataBWChroma = 0;
5940 *p->prefetch_vmrow_bw = 0;
5941 }
5942
5943 dml2_printf("DML::%s: dst_y_per_vm_vblank = %f (final)\n", __func__, *p->dst_y_per_vm_vblank);
5944 dml2_printf("DML::%s: dst_y_per_row_vblank = %f (final)\n", __func__, *p->dst_y_per_row_vblank);
5945 dml2_printf("DML::%s: prefetch_vmrow_bw = %f (final)\n", __func__, *p->prefetch_vmrow_bw);
5946 dml2_printf("DML::%s: RequiredPrefetchPixelDataBWLuma = %f (final)\n", __func__, *p->RequiredPrefetchPixelDataBWLuma);
5947 dml2_printf("DML::%s: RequiredPrefetchPixelDataBWChroma = %f (final)\n", __func__, *p->RequiredPrefetchPixelDataBWChroma);
5948 dml2_printf("DML::%s: NoTimeToPrefetch=%d\n", __func__, s->NoTimeToPrefetch);
5949
5950 return s->NoTimeToPrefetch;
5951 }
5952
get_num_lb_source_lines(unsigned int max_line_buffer_lines,unsigned int line_buffer_size_bits,unsigned int num_pipes,unsigned int vp_width,unsigned int vp_height,double h_ratio,enum dml2_rotation_angle rotation_angle)5953 static unsigned int get_num_lb_source_lines(unsigned int max_line_buffer_lines,
5954 unsigned int line_buffer_size_bits,
5955 unsigned int num_pipes,
5956 unsigned int vp_width,
5957 unsigned int vp_height,
5958 double h_ratio,
5959 enum dml2_rotation_angle rotation_angle)
5960 {
5961 unsigned int num_lb_source_lines = 0;
5962 double lb_bit_per_pixel = 57.0;
5963 unsigned recin_width = vp_width/num_pipes;
5964
5965 if (dml_is_vertical_rotation(rotation_angle))
5966 recin_width = vp_height/num_pipes;
5967
5968 num_lb_source_lines = (unsigned int) math_min2((double) max_line_buffer_lines,
5969 math_floor2(line_buffer_size_bits / lb_bit_per_pixel / (recin_width / math_max2(h_ratio, 1.0)), 1.0));
5970
5971 return num_lb_source_lines;
5972 }
5973
find_max_impact_plane(unsigned int this_plane_idx,unsigned int num_planes,unsigned int Trpd_dcfclk_cycles[])5974 static unsigned int find_max_impact_plane(unsigned int this_plane_idx, unsigned int num_planes, unsigned int Trpd_dcfclk_cycles[])
5975 {
5976 int max_value = -1;
5977 int max_idx = -1;
5978 for (unsigned int i = 0; i < num_planes; i++) {
5979 if (i != this_plane_idx && (int) Trpd_dcfclk_cycles[i] > max_value) {
5980 max_value = Trpd_dcfclk_cycles[i];
5981 max_idx = i;
5982 }
5983 }
5984 if (max_idx <= 0) {
5985 dml2_assert(max_idx >= 0);
5986 max_idx = this_plane_idx;
5987 }
5988
5989 return max_idx;
5990 }
5991
calculate_impacted_Tsw(unsigned int exclude_plane_idx,unsigned int num_planes,double * prefetch_swath_bytes,double bw_mbps)5992 static double calculate_impacted_Tsw(unsigned int exclude_plane_idx, unsigned int num_planes, double *prefetch_swath_bytes, double bw_mbps)
5993 {
5994 double sum = 0.;
5995 for (unsigned int i = 0; i < num_planes; i++) {
5996 if (i != exclude_plane_idx) {
5997 sum += prefetch_swath_bytes[i];
5998 }
5999 }
6000 return sum / bw_mbps;
6001 }
6002
6003 // a global check against the aggregate effect of the per plane prefetch schedule
CheckGlobalPrefetchAdmissibility(struct dml2_core_internal_scratch * scratch,struct dml2_core_calcs_CheckGlobalPrefetchAdmissibility_params * p)6004 static noinline_for_stack bool CheckGlobalPrefetchAdmissibility(struct dml2_core_internal_scratch *scratch,
6005 struct dml2_core_calcs_CheckGlobalPrefetchAdmissibility_params *p)
6006 {
6007 struct dml2_core_calcs_CheckGlobalPrefetchAdmissibility_locals *s = &scratch->CheckGlobalPrefetchAdmissibility_locals;
6008 unsigned int i, k;
6009
6010 memset(s, 0, sizeof(struct dml2_core_calcs_CheckGlobalPrefetchAdmissibility_locals));
6011
6012 *p->recalc_prefetch_schedule = 0;
6013 s->prefetch_global_check_passed = 1;
6014 // worst case if the rob and cdb is fully hogged
6015 s->max_Trpd_dcfclk_cycles = (unsigned int) math_ceil2((p->rob_buffer_size_kbytes*1024 + p->compressed_buffer_size_kbytes*DML_MAX_COMPRESSION_RATIO*1024)/64.0, 1.0);
6016 #ifdef __DML_VBA_DEBUG__
6017 dml2_printf("DML::%s: num_active_planes = %d\n", __func__, p->num_active_planes);
6018 dml2_printf("DML::%s: rob_buffer_size_kbytes = %d\n", __func__, p->rob_buffer_size_kbytes);
6019 dml2_printf("DML::%s: compressed_buffer_size_kbytes = %d\n", __func__, p->compressed_buffer_size_kbytes);
6020 dml2_printf("DML::%s: estimated_urg_bandwidth_required_mbps = %f\n", __func__, p->estimated_urg_bandwidth_required_mbps);
6021 dml2_printf("DML::%s: estimated_dcfclk_mhz = %f\n", __func__, p->estimated_dcfclk_mhz);
6022 dml2_printf("DML::%s: max_Trpd_dcfclk_cycles = %u\n", __func__, s->max_Trpd_dcfclk_cycles);
6023 #endif
6024
6025 // calculate the return impact from each plane, request is 256B per dcfclk
6026 for (i = 0; i < p->num_active_planes; i++) {
6027 s->src_detile_buf_size_bytes_l[i] = p->detile_buffer_size_bytes_l[i];
6028 s->src_detile_buf_size_bytes_c[i] = p->detile_buffer_size_bytes_c[i];
6029 s->src_swath_bytes_l[i] = p->full_swath_bytes_l[i];
6030 s->src_swath_bytes_c[i] = p->full_swath_bytes_c[i];
6031
6032 if (p->pixel_format[i] == dml2_420_10) {
6033 s->src_detile_buf_size_bytes_l[i] = (unsigned int) (s->src_detile_buf_size_bytes_l[i] * 1.5);
6034 s->src_detile_buf_size_bytes_c[i] = (unsigned int) (s->src_detile_buf_size_bytes_c[i] * 1.5);
6035 s->src_swath_bytes_l[i] = (unsigned int) (s->src_swath_bytes_l[i] * 1.5);
6036 s->src_swath_bytes_c[i] = (unsigned int) (s->src_swath_bytes_c[i] * 1.5);
6037 }
6038
6039 s->burst_bytes_to_fill_det = (unsigned int) (math_floor2(s->src_detile_buf_size_bytes_l[i] / p->chunk_bytes_l, 1) * p->chunk_bytes_l);
6040 s->burst_bytes_to_fill_det += (unsigned int) (math_floor2(p->lb_source_lines_l[i] / p->swath_height_l[i], 1) * s->src_swath_bytes_l[i]);
6041
6042 #ifdef __DML_VBA_DEBUG__
6043 dml2_printf("DML::%s: i=%u pixel_format = %d\n", __func__, i, p->pixel_format[i]);
6044 dml2_printf("DML::%s: i=%u chunk_bytes_l = %d\n", __func__, i, p->chunk_bytes_l);
6045 dml2_printf("DML::%s: i=%u lb_source_lines_l = %d\n", __func__, i, p->lb_source_lines_l[i]);
6046 dml2_printf("DML::%s: i=%u src_detile_buf_size_bytes_l=%d\n", __func__, i, s->src_detile_buf_size_bytes_l[i]);
6047 dml2_printf("DML::%s: i=%u src_swath_bytes_l=%d\n", __func__, i, s->src_swath_bytes_l[i]);
6048 dml2_printf("DML::%s: i=%u burst_bytes_to_fill_det=%d (luma)\n", __func__, i, s->burst_bytes_to_fill_det);
6049 #endif
6050
6051 if (s->src_swath_bytes_c[i] > 0) { // dual_plane
6052 s->burst_bytes_to_fill_det += (unsigned int) (math_floor2(s->src_detile_buf_size_bytes_c[i] / p->chunk_bytes_c, 1) * p->chunk_bytes_c);
6053
6054 if (p->pixel_format[i] == dml2_422_planar_8 || p->pixel_format[i] == dml2_422_planar_10 || p->pixel_format[i] == dml2_422_planar_12) {
6055 s->burst_bytes_to_fill_det += (unsigned int) (math_floor2(p->lb_source_lines_c[i] / p->swath_height_c[i], 1) * s->src_swath_bytes_c[i]);
6056 }
6057
6058 #ifdef __DML_VBA_DEBUG__
6059 dml2_printf("DML::%s: i=%u chunk_bytes_c = %d\n", __func__, i, p->chunk_bytes_c);
6060 dml2_printf("DML::%s: i=%u lb_source_lines_c = %d\n", __func__, i, p->lb_source_lines_c[i]);
6061 dml2_printf("DML::%s: i=%u src_detile_buf_size_bytes_c=%d\n", __func__, i, s->src_detile_buf_size_bytes_c[i]);
6062 dml2_printf("DML::%s: i=%u src_swath_bytes_c=%d\n", __func__, i, s->src_swath_bytes_c[i]);
6063 #endif
6064 }
6065
6066 s->time_to_fill_det_us = (double) s->burst_bytes_to_fill_det / (256 * p->estimated_dcfclk_mhz); // fill time assume full burst at request rate
6067 s->accumulated_return_path_dcfclk_cycles[i] = (unsigned int) math_ceil2(((DML_MAX_COMPRESSION_RATIO-1) * 64 * p->estimated_dcfclk_mhz) * s->time_to_fill_det_us / 64.0, 1.0); //for 64B per DCFClk
6068
6069 #ifdef __DML_VBA_DEBUG__
6070 dml2_printf("DML::%s: i=%u burst_bytes_to_fill_det=%d\n", __func__, i, s->burst_bytes_to_fill_det);
6071 dml2_printf("DML::%s: i=%u time_to_fill_det_us=%f\n", __func__, i, s->time_to_fill_det_us);
6072 dml2_printf("DML::%s: i=%u accumulated_return_path_dcfclk_cycles=%u\n", __func__, i, s->accumulated_return_path_dcfclk_cycles[i]);
6073 #endif
6074 // clamping to worst case delay which is one which occupy the full rob+cdb
6075 if (s->accumulated_return_path_dcfclk_cycles[i] > s->max_Trpd_dcfclk_cycles)
6076 s->accumulated_return_path_dcfclk_cycles[i] = s->max_Trpd_dcfclk_cycles;
6077 }
6078
6079 // Figure out the impacted prefetch time for each plane
6080 // if impacted_Tre is > equ bw Tpre, we need to fail the prefetch schedule as we need a higher state to support the bw
6081 for (i = 0; i < p->num_active_planes; i++) {
6082 k = find_max_impact_plane(i, p->num_active_planes, s->accumulated_return_path_dcfclk_cycles); // plane k causes most impact to plane i
6083 // the rest of planes (except for k) complete for bw
6084 p->impacted_dst_y_pre[i] = s->accumulated_return_path_dcfclk_cycles[k]/p->estimated_dcfclk_mhz;
6085 p->impacted_dst_y_pre[i] += calculate_impacted_Tsw(k, p->num_active_planes, p->prefetch_sw_bytes, p->estimated_urg_bandwidth_required_mbps);
6086 p->impacted_dst_y_pre[i] = math_ceil2(p->impacted_dst_y_pre[i] / p->line_time[i], 0.25);
6087
6088 #ifdef __DML_VBA_DEBUG__
6089 dml2_printf("DML::%s: i=%u impacted_Tpre=%f (k=%u)\n", __func__, i, p->impacted_dst_y_pre[i], k);
6090 #endif
6091 }
6092
6093 if (p->Tpre_rounded != NULL && p->Tpre_oto != NULL) {
6094 for (i = 0; i < p->num_active_planes; i++) {
6095 if (p->impacted_dst_y_pre[i] > p->dst_y_prefetch[i]) {
6096 s->prefetch_global_check_passed = 0;
6097 *p->recalc_prefetch_schedule = 1;
6098 }
6099 #ifdef __DML_VBA_DEBUG__
6100 dml2_printf("DML::%s: i=%u Tpre_rounded=%f\n", __func__, i, p->Tpre_rounded[i]);
6101 dml2_printf("DML::%s: i=%u Tpre_oto=%f\n", __func__, i, p->Tpre_oto[i]);
6102 #endif
6103 }
6104 } else {
6105 // likely a mode programming calls, assume support, and no recalc - not used anyways
6106 s->prefetch_global_check_passed = 1;
6107 *p->recalc_prefetch_schedule = 0;
6108 }
6109
6110 #ifdef __DML_VBA_DEBUG__
6111 dml2_printf("DML::%s: prefetch_global_check_passed=%u\n", __func__, s->prefetch_global_check_passed);
6112 dml2_printf("DML::%s: recalc_prefetch_schedule=%u\n", __func__, *p->recalc_prefetch_schedule);
6113 #endif
6114
6115 return s->prefetch_global_check_passed;
6116 }
6117
calculate_peak_bandwidth_required(struct dml2_core_internal_scratch * s,struct dml2_core_calcs_calculate_peak_bandwidth_required_params * p)6118 static void calculate_peak_bandwidth_required(
6119 struct dml2_core_internal_scratch *s,
6120 struct dml2_core_calcs_calculate_peak_bandwidth_required_params *p)
6121 {
6122 unsigned int n;
6123 unsigned int m;
6124
6125 struct dml2_core_shared_calculate_peak_bandwidth_required_locals *l = &s->calculate_peak_bandwidth_required_locals;
6126
6127 memset(l, 0, sizeof(struct dml2_core_shared_calculate_peak_bandwidth_required_locals));
6128
6129 #ifdef __DML_VBA_DEBUG__
6130 dml2_printf("DML::%s: inc_flip_bw = %d\n", __func__, p->inc_flip_bw);
6131 dml2_printf("DML::%s: NumberOfActiveSurfaces = %d\n", __func__, p->num_active_planes);
6132 #endif
6133
6134 for (unsigned int k = 0; k < p->num_active_planes; ++k) {
6135 l->unity_array[k] = 1.0;
6136 l->zero_array[k] = 0.0;
6137 }
6138
6139 for (m = 0; m < dml2_core_internal_soc_state_max; m++) {
6140 for (n = 0; n < dml2_core_internal_bw_max; n++) {
6141 get_urgent_bandwidth_required(
6142 &s->get_urgent_bandwidth_required_locals,
6143 p->display_cfg,
6144 m,
6145 n,
6146 0, //inc_flip_bw,
6147 0, //use_qual_row_bw
6148 p->num_active_planes,
6149 p->num_of_dpp,
6150 p->dcc_dram_bw_nom_overhead_factor_p0,
6151 p->dcc_dram_bw_nom_overhead_factor_p1,
6152 p->dcc_dram_bw_pref_overhead_factor_p0,
6153 p->dcc_dram_bw_pref_overhead_factor_p1,
6154 p->mall_prefetch_sdp_overhead_factor,
6155 p->mall_prefetch_dram_overhead_factor,
6156 p->surface_read_bandwidth_l,
6157 p->surface_read_bandwidth_c,
6158 l->zero_array, //PrefetchBandwidthLuma,
6159 l->zero_array, //PrefetchBandwidthChroma,
6160 l->zero_array,
6161 l->zero_array,
6162 l->zero_array,
6163 p->dpte_row_bw,
6164 p->meta_row_bw,
6165 l->zero_array, //prefetch_cursor_bw,
6166 l->zero_array, //prefetch_vmrow_bw,
6167 l->zero_array, //flip_bw,
6168 l->zero_array,
6169 l->zero_array,
6170 l->zero_array,
6171 l->zero_array,
6172 l->zero_array,
6173 l->zero_array,
6174 p->surface_avg_vactive_required_bw[m][n],
6175 p->surface_peak_required_bw[m][n]);
6176
6177 p->urg_vactive_bandwidth_required[m][n] = get_urgent_bandwidth_required(
6178 &s->get_urgent_bandwidth_required_locals,
6179 p->display_cfg,
6180 m,
6181 n,
6182 0, //inc_flip_bw,
6183 0, //use_qual_row_bw
6184 p->num_active_planes,
6185 p->num_of_dpp,
6186 p->dcc_dram_bw_nom_overhead_factor_p0,
6187 p->dcc_dram_bw_nom_overhead_factor_p1,
6188 p->dcc_dram_bw_pref_overhead_factor_p0,
6189 p->dcc_dram_bw_pref_overhead_factor_p1,
6190 p->mall_prefetch_sdp_overhead_factor,
6191 p->mall_prefetch_dram_overhead_factor,
6192 p->surface_read_bandwidth_l,
6193 p->surface_read_bandwidth_c,
6194 l->zero_array, //PrefetchBandwidthLuma,
6195 l->zero_array, //PrefetchBandwidthChroma,
6196 p->excess_vactive_fill_bw_l,
6197 p->excess_vactive_fill_bw_c,
6198 p->cursor_bw,
6199 p->dpte_row_bw,
6200 p->meta_row_bw,
6201 l->zero_array, //prefetch_cursor_bw,
6202 l->zero_array, //prefetch_vmrow_bw,
6203 l->zero_array, //flip_bw,
6204 p->urgent_burst_factor_l,
6205 p->urgent_burst_factor_c,
6206 p->urgent_burst_factor_cursor,
6207 p->urgent_burst_factor_prefetch_l,
6208 p->urgent_burst_factor_prefetch_c,
6209 p->urgent_burst_factor_prefetch_cursor,
6210 l->surface_dummy_bw,
6211 p->surface_peak_required_bw[m][n]);
6212
6213 p->urg_bandwidth_required[m][n] = get_urgent_bandwidth_required(
6214 &s->get_urgent_bandwidth_required_locals,
6215 p->display_cfg,
6216 m,
6217 n,
6218 p->inc_flip_bw,
6219 0, //use_qual_row_bw
6220 p->num_active_planes,
6221 p->num_of_dpp,
6222 p->dcc_dram_bw_nom_overhead_factor_p0,
6223 p->dcc_dram_bw_nom_overhead_factor_p1,
6224 p->dcc_dram_bw_pref_overhead_factor_p0,
6225 p->dcc_dram_bw_pref_overhead_factor_p1,
6226 p->mall_prefetch_sdp_overhead_factor,
6227 p->mall_prefetch_dram_overhead_factor,
6228 p->surface_read_bandwidth_l,
6229 p->surface_read_bandwidth_c,
6230 p->prefetch_bandwidth_l,
6231 p->prefetch_bandwidth_c,
6232 p->excess_vactive_fill_bw_l,
6233 p->excess_vactive_fill_bw_c,
6234 p->cursor_bw,
6235 p->dpte_row_bw,
6236 p->meta_row_bw,
6237 p->prefetch_cursor_bw,
6238 p->prefetch_vmrow_bw,
6239 p->flip_bw,
6240 p->urgent_burst_factor_l,
6241 p->urgent_burst_factor_c,
6242 p->urgent_burst_factor_cursor,
6243 p->urgent_burst_factor_prefetch_l,
6244 p->urgent_burst_factor_prefetch_c,
6245 p->urgent_burst_factor_prefetch_cursor,
6246 l->surface_dummy_bw,
6247 p->surface_peak_required_bw[m][n]);
6248
6249 p->urg_bandwidth_required_qual[m][n] = get_urgent_bandwidth_required(
6250 &s->get_urgent_bandwidth_required_locals,
6251 p->display_cfg,
6252 m,
6253 n,
6254 0, //inc_flip_bw
6255 1, //use_qual_row_bw
6256 p->num_active_planes,
6257 p->num_of_dpp,
6258 p->dcc_dram_bw_nom_overhead_factor_p0,
6259 p->dcc_dram_bw_nom_overhead_factor_p1,
6260 p->dcc_dram_bw_pref_overhead_factor_p0,
6261 p->dcc_dram_bw_pref_overhead_factor_p1,
6262 p->mall_prefetch_sdp_overhead_factor,
6263 p->mall_prefetch_dram_overhead_factor,
6264 p->surface_read_bandwidth_l,
6265 p->surface_read_bandwidth_c,
6266 p->prefetch_bandwidth_l,
6267 p->prefetch_bandwidth_c,
6268 p->excess_vactive_fill_bw_l,
6269 p->excess_vactive_fill_bw_c,
6270 p->cursor_bw,
6271 p->dpte_row_bw,
6272 p->meta_row_bw,
6273 p->prefetch_cursor_bw,
6274 p->prefetch_vmrow_bw,
6275 p->flip_bw,
6276 p->urgent_burst_factor_l,
6277 p->urgent_burst_factor_c,
6278 p->urgent_burst_factor_cursor,
6279 p->urgent_burst_factor_prefetch_l,
6280 p->urgent_burst_factor_prefetch_c,
6281 p->urgent_burst_factor_prefetch_cursor,
6282 l->surface_dummy_bw,
6283 p->surface_peak_required_bw[m][n]);
6284
6285 p->non_urg_bandwidth_required[m][n] = get_urgent_bandwidth_required(
6286 &s->get_urgent_bandwidth_required_locals,
6287 p->display_cfg,
6288 m,
6289 n,
6290 p->inc_flip_bw,
6291 0, //use_qual_row_bw
6292 p->num_active_planes,
6293 p->num_of_dpp,
6294 p->dcc_dram_bw_nom_overhead_factor_p0,
6295 p->dcc_dram_bw_nom_overhead_factor_p1,
6296 p->dcc_dram_bw_pref_overhead_factor_p0,
6297 p->dcc_dram_bw_pref_overhead_factor_p1,
6298 p->mall_prefetch_sdp_overhead_factor,
6299 p->mall_prefetch_dram_overhead_factor,
6300 p->surface_read_bandwidth_l,
6301 p->surface_read_bandwidth_c,
6302 p->prefetch_bandwidth_l,
6303 p->prefetch_bandwidth_c,
6304 p->excess_vactive_fill_bw_l,
6305 p->excess_vactive_fill_bw_c,
6306 p->cursor_bw,
6307 p->dpte_row_bw,
6308 p->meta_row_bw,
6309 p->prefetch_cursor_bw,
6310 p->prefetch_vmrow_bw,
6311 p->flip_bw,
6312 l->unity_array,
6313 l->unity_array,
6314 l->unity_array,
6315 l->unity_array,
6316 l->unity_array,
6317 l->unity_array,
6318 l->surface_dummy_bw,
6319 p->surface_peak_required_bw[m][n]);
6320
6321 #ifdef __DML_VBA_DEBUG__
6322 dml2_printf("DML::%s: urg_vactive_bandwidth_required%s[%s][%s]=%f\n", __func__, (p->inc_flip_bw ? "_flip" : ""), dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n), p->urg_vactive_bandwidth_required[m][n]);
6323 dml2_printf("DML::%s: urg_bandwidth_required%s[%s][%s]=%f\n", __func__, (p->inc_flip_bw ? "_flip" : ""), dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n), p->urg_bandwidth_required[m][n]);
6324 dml2_printf("DML::%s: urg_bandwidth_required_qual[%s][%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n), p->urg_bandwidth_required[m][n]);
6325 dml2_printf("DML::%s: non_urg_bandwidth_required%s[%s][%s]=%f\n", __func__, (p->inc_flip_bw ? "_flip" : ""), dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n), p->non_urg_bandwidth_required[m][n]);
6326 #endif
6327 dml2_assert(p->urg_bandwidth_required[m][n] >= p->non_urg_bandwidth_required[m][n]);
6328 }
6329 }
6330 }
6331
check_urgent_bandwidth_support(double * frac_urg_bandwidth_nom,double * frac_urg_bandwidth_mall,bool * vactive_bandwidth_support_ok,bool * bandwidth_support_ok,unsigned int mall_allocated_for_dcn_mbytes,double non_urg_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max],double urg_vactive_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max],double urg_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max],double urg_bandwidth_available[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max])6332 static void check_urgent_bandwidth_support(
6333 double *frac_urg_bandwidth_nom,
6334 double *frac_urg_bandwidth_mall,
6335 bool *vactive_bandwidth_support_ok, // vactive ok
6336 bool *bandwidth_support_ok,// max of vm, prefetch, vactive all ok
6337
6338 unsigned int mall_allocated_for_dcn_mbytes,
6339 double non_urg_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max],
6340 double urg_vactive_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max],
6341 double urg_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max],
6342 double urg_bandwidth_available[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max])
6343 {
6344 double frac_urg_bandwidth_nom_sdp = non_urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp] / urg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp];
6345 double frac_urg_bandwidth_nom_dram = non_urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram] / urg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram];
6346 double frac_urg_bandwidth_mall_sdp;
6347 double frac_urg_bandwidth_mall_dram;
6348 if (urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp] > 0)
6349 frac_urg_bandwidth_mall_sdp = non_urg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp] / urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp];
6350 else
6351 frac_urg_bandwidth_mall_sdp = 0.0;
6352 if (urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram] > 0)
6353 frac_urg_bandwidth_mall_dram = non_urg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram] / urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram];
6354 else
6355 frac_urg_bandwidth_mall_dram = 0.0;
6356
6357 *bandwidth_support_ok = 1;
6358 *vactive_bandwidth_support_ok = 1;
6359
6360 // Check urgent bandwidth required at sdp vs urgent bandwidth avail at sdp -> FractionOfUrgentBandwidth
6361 // Check urgent bandwidth required at dram vs urgent bandwidth avail at dram
6362 // Check urgent bandwidth required at sdp vs urgent bandwidth avail at sdp, svp_prefetch -> FractionOfUrgentBandwidthMALL
6363 // Check urgent bandwidth required at dram vs urgent bandwidth avail at dram, svp_prefetch
6364
6365 *bandwidth_support_ok &= urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp] <= urg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp];
6366 *bandwidth_support_ok &= urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram] <= urg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram];
6367
6368 if (mall_allocated_for_dcn_mbytes > 0) {
6369 *bandwidth_support_ok &= urg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp] <= urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp];
6370 *bandwidth_support_ok &= urg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram] <= urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram];
6371 }
6372
6373 *frac_urg_bandwidth_nom = math_max2(frac_urg_bandwidth_nom_sdp, frac_urg_bandwidth_nom_dram);
6374 *frac_urg_bandwidth_mall = math_max2(frac_urg_bandwidth_mall_sdp, frac_urg_bandwidth_mall_dram);
6375
6376 *bandwidth_support_ok &= (*frac_urg_bandwidth_nom <= 1.0);
6377
6378 if (mall_allocated_for_dcn_mbytes > 0)
6379 *bandwidth_support_ok &= (*frac_urg_bandwidth_mall <= 1.0);
6380
6381 *vactive_bandwidth_support_ok &= urg_vactive_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp] <= urg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp];
6382 *vactive_bandwidth_support_ok &= urg_vactive_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram] <= urg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram];
6383 if (mall_allocated_for_dcn_mbytes > 0) {
6384 *vactive_bandwidth_support_ok &= urg_vactive_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp] <= urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp];
6385 *vactive_bandwidth_support_ok &= urg_vactive_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram] <= urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram];
6386 }
6387
6388 #ifdef __DML_VBA_DEBUG__
6389 dml2_printf("DML::%s: frac_urg_bandwidth_nom_sdp = %f\n", __func__, frac_urg_bandwidth_nom_sdp);
6390 dml2_printf("DML::%s: frac_urg_bandwidth_nom_dram = %f\n", __func__, frac_urg_bandwidth_nom_dram);
6391 dml2_printf("DML::%s: frac_urg_bandwidth_nom = %f\n", __func__, *frac_urg_bandwidth_nom);
6392
6393 dml2_printf("DML::%s: frac_urg_bandwidth_mall_sdp = %f\n", __func__, frac_urg_bandwidth_mall_sdp);
6394 dml2_printf("DML::%s: frac_urg_bandwidth_mall_dram = %f\n", __func__, frac_urg_bandwidth_mall_dram);
6395 dml2_printf("DML::%s: frac_urg_bandwidth_mall = %f\n", __func__, *frac_urg_bandwidth_mall);
6396 dml2_printf("DML::%s: bandwidth_support_ok = %d\n", __func__, *bandwidth_support_ok);
6397
6398 for (unsigned int m = 0; m < dml2_core_internal_soc_state_max; m++) {
6399 for (unsigned int n = 0; n < dml2_core_internal_bw_max; n++) {
6400 dml2_printf("DML::%s: state:%s bw_type:%s urg_bandwidth_available=%f %s urg_bandwidth_required=%f\n",
6401 __func__, dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n),
6402 urg_bandwidth_available[m][n], (urg_bandwidth_available[m][n] < urg_bandwidth_required[m][n]) ? "<" : ">=", urg_bandwidth_required[m][n]);
6403 }
6404 }
6405 #endif
6406 }
6407
get_bandwidth_available_for_immediate_flip(enum dml2_core_internal_soc_state_type eval_state,double urg_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max],double urg_bandwidth_available[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max])6408 static double get_bandwidth_available_for_immediate_flip(enum dml2_core_internal_soc_state_type eval_state,
6409 double urg_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max], // no flip
6410 double urg_bandwidth_available[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max])
6411 {
6412 double flip_bw_available_mbps;
6413 double flip_bw_available_sdp_mbps;
6414 double flip_bw_available_dram_mbps;
6415
6416 flip_bw_available_sdp_mbps = urg_bandwidth_available[eval_state][dml2_core_internal_bw_sdp] - urg_bandwidth_required[eval_state][dml2_core_internal_bw_sdp];
6417 flip_bw_available_dram_mbps = urg_bandwidth_available[eval_state][dml2_core_internal_bw_dram] - urg_bandwidth_required[eval_state][dml2_core_internal_bw_dram];
6418 flip_bw_available_mbps = flip_bw_available_sdp_mbps < flip_bw_available_dram_mbps ? flip_bw_available_sdp_mbps : flip_bw_available_dram_mbps;
6419
6420 #ifdef __DML_VBA_DEBUG__
6421 dml2_printf("DML::%s: eval_state = %s\n", __func__, dml2_core_internal_soc_state_type_str(eval_state));
6422 dml2_printf("DML::%s: urg_bandwidth_available_sdp_mbps = %f\n", __func__, urg_bandwidth_available[eval_state][dml2_core_internal_bw_sdp]);
6423 dml2_printf("DML::%s: urg_bandwidth_available_dram_mbps = %f\n", __func__, urg_bandwidth_available[eval_state][dml2_core_internal_bw_dram]);
6424 dml2_printf("DML::%s: urg_bandwidth_required_sdp_mbps = %f\n", __func__, urg_bandwidth_required[eval_state][dml2_core_internal_bw_sdp]);
6425 dml2_printf("DML::%s: urg_bandwidth_required_dram_mbps = %f\n", __func__, urg_bandwidth_required[eval_state][dml2_core_internal_bw_dram]);
6426 dml2_printf("DML::%s: flip_bw_available_sdp_mbps = %f\n", __func__, flip_bw_available_sdp_mbps);
6427 dml2_printf("DML::%s: flip_bw_available_dram_mbps = %f\n", __func__, flip_bw_available_dram_mbps);
6428 dml2_printf("DML::%s: flip_bw_available_mbps = %f\n", __func__, flip_bw_available_mbps);
6429 #endif
6430
6431 return flip_bw_available_mbps;
6432 }
6433
calculate_immediate_flip_bandwidth_support(double * frac_urg_bandwidth_flip,bool * flip_bandwidth_support_ok,enum dml2_core_internal_soc_state_type eval_state,double urg_bandwidth_required_flip[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max],double non_urg_bandwidth_required_flip[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max],double urg_bandwidth_available[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max])6434 static void calculate_immediate_flip_bandwidth_support(
6435 // Output
6436 double *frac_urg_bandwidth_flip,
6437 bool *flip_bandwidth_support_ok,
6438
6439 // Input
6440 enum dml2_core_internal_soc_state_type eval_state,
6441 double urg_bandwidth_required_flip[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max],
6442 double non_urg_bandwidth_required_flip[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max],
6443 double urg_bandwidth_available[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max])
6444 {
6445 double frac_urg_bw_flip_sdp = non_urg_bandwidth_required_flip[eval_state][dml2_core_internal_bw_sdp] / urg_bandwidth_available[eval_state][dml2_core_internal_bw_sdp];
6446 double frac_urg_bw_flip_dram = non_urg_bandwidth_required_flip[eval_state][dml2_core_internal_bw_dram] / urg_bandwidth_available[eval_state][dml2_core_internal_bw_dram];
6447
6448 *flip_bandwidth_support_ok = true;
6449 for (unsigned int n = 0; n < dml2_core_internal_bw_max; n++) { // check sdp and dram
6450 *flip_bandwidth_support_ok &= urg_bandwidth_available[eval_state][n] >= urg_bandwidth_required_flip[eval_state][n];
6451
6452 #ifdef __DML_VBA_DEBUG__
6453 dml2_printf("DML::%s: n = %s\n", __func__, dml2_core_internal_bw_type_str(n));
6454 dml2_printf("DML::%s: urg_bandwidth_available = %f\n", __func__, urg_bandwidth_available[eval_state][n]);
6455 dml2_printf("DML::%s: non_urg_bandwidth_required_flip = %f\n", __func__, non_urg_bandwidth_required_flip[eval_state][n]);
6456 dml2_printf("DML::%s: urg_bandwidth_required_flip = %f\n", __func__, urg_bandwidth_required_flip[eval_state][n]);
6457 dml2_printf("DML::%s: flip_bandwidth_support_ok = %d\n", __func__, *flip_bandwidth_support_ok);
6458 #endif
6459 dml2_assert(urg_bandwidth_required_flip[eval_state][n] >= non_urg_bandwidth_required_flip[eval_state][n]);
6460 }
6461
6462 *frac_urg_bandwidth_flip = (frac_urg_bw_flip_sdp > frac_urg_bw_flip_dram) ? frac_urg_bw_flip_sdp : frac_urg_bw_flip_dram;
6463 *flip_bandwidth_support_ok &= (*frac_urg_bandwidth_flip <= 1.0);
6464
6465 #ifdef __DML_VBA_DEBUG__
6466 dml2_printf("DML::%s: eval_state = %s\n", __func__, dml2_core_internal_soc_state_type_str(eval_state));
6467 dml2_printf("DML::%s: frac_urg_bw_flip_sdp = %f\n", __func__, frac_urg_bw_flip_sdp);
6468 dml2_printf("DML::%s: frac_urg_bw_flip_dram = %f\n", __func__, frac_urg_bw_flip_dram);
6469 dml2_printf("DML::%s: frac_urg_bandwidth_flip = %f\n", __func__, *frac_urg_bandwidth_flip);
6470 dml2_printf("DML::%s: flip_bandwidth_support_ok = %d\n", __func__, *flip_bandwidth_support_ok);
6471
6472 for (unsigned int m = 0; m < dml2_core_internal_soc_state_max; m++) {
6473 for (unsigned int n = 0; n < dml2_core_internal_bw_max; n++) {
6474 dml2_printf("DML::%s: state:%s bw_type:%s, urg_bandwidth_available=%f %s urg_bandwidth_required=%f\n",
6475 __func__, dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n),
6476 urg_bandwidth_available[m][n], (urg_bandwidth_available[m][n] < urg_bandwidth_required_flip[m][n]) ? "<" : ">=", urg_bandwidth_required_flip[m][n]);
6477 }
6478 }
6479 #endif
6480 }
6481
CalculateFlipSchedule(struct dml2_core_internal_scratch * s,bool iflip_enable,bool use_lb_flip_bw,double HostVMInefficiencyFactor,double Tvm_trips_flip,double Tr0_trips_flip,double Tvm_trips_flip_rounded,double Tr0_trips_flip_rounded,bool GPUVMEnable,double vm_bytes,double DPTEBytesPerRow,double BandwidthAvailableForImmediateFlip,unsigned int TotImmediateFlipBytes,enum dml2_source_format_class SourcePixelFormat,double LineTime,double VRatio,double VRatioChroma,double Tno_bw_flip,unsigned int dpte_row_height,unsigned int dpte_row_height_chroma,bool use_one_row_for_frame_flip,unsigned int max_flip_time_us,unsigned int max_flip_time_lines,unsigned int per_pipe_flip_bytes,unsigned int meta_row_bytes,unsigned int meta_row_height,unsigned int meta_row_height_chroma,bool dcc_mrq_enable,double * dst_y_per_vm_flip,double * dst_y_per_row_flip,double * final_flip_bw,bool * ImmediateFlipSupportedForPipe)6482 static void CalculateFlipSchedule(
6483 struct dml2_core_internal_scratch *s,
6484 bool iflip_enable,
6485 bool use_lb_flip_bw,
6486 double HostVMInefficiencyFactor,
6487 double Tvm_trips_flip,
6488 double Tr0_trips_flip,
6489 double Tvm_trips_flip_rounded,
6490 double Tr0_trips_flip_rounded,
6491 bool GPUVMEnable,
6492 double vm_bytes, // vm_bytes
6493 double DPTEBytesPerRow, // dpte_row_bytes
6494 double BandwidthAvailableForImmediateFlip,
6495 unsigned int TotImmediateFlipBytes,
6496 enum dml2_source_format_class SourcePixelFormat,
6497 double LineTime,
6498 double VRatio,
6499 double VRatioChroma,
6500 double Tno_bw_flip,
6501 unsigned int dpte_row_height,
6502 unsigned int dpte_row_height_chroma,
6503 bool use_one_row_for_frame_flip,
6504 unsigned int max_flip_time_us,
6505 unsigned int max_flip_time_lines,
6506 unsigned int per_pipe_flip_bytes,
6507 unsigned int meta_row_bytes,
6508 unsigned int meta_row_height,
6509 unsigned int meta_row_height_chroma,
6510 bool dcc_mrq_enable,
6511
6512 // Output
6513 double *dst_y_per_vm_flip,
6514 double *dst_y_per_row_flip,
6515 double *final_flip_bw,
6516 bool *ImmediateFlipSupportedForPipe)
6517 {
6518 struct dml2_core_shared_CalculateFlipSchedule_locals *l = &s->CalculateFlipSchedule_locals;
6519
6520 l->dual_plane = dml_is_420(SourcePixelFormat) || SourcePixelFormat == dml2_rgbe_alpha;
6521 l->dpte_row_bytes = DPTEBytesPerRow;
6522
6523 #ifdef __DML_VBA_DEBUG__
6524 dml2_printf("DML::%s: GPUVMEnable = %u\n", __func__, GPUVMEnable);
6525 dml2_printf("DML::%s: ip.max_flip_time_us = %d\n", __func__, max_flip_time_us);
6526 dml2_printf("DML::%s: ip.max_flip_time_lines = %d\n", __func__, max_flip_time_lines);
6527 dml2_printf("DML::%s: BandwidthAvailableForImmediateFlip = %f\n", __func__, BandwidthAvailableForImmediateFlip);
6528 dml2_printf("DML::%s: TotImmediateFlipBytes = %u\n", __func__, TotImmediateFlipBytes);
6529 dml2_printf("DML::%s: use_lb_flip_bw = %u\n", __func__, use_lb_flip_bw);
6530 dml2_printf("DML::%s: iflip_enable = %u\n", __func__, iflip_enable);
6531 dml2_printf("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
6532 dml2_printf("DML::%s: LineTime = %f\n", __func__, LineTime);
6533 dml2_printf("DML::%s: Tno_bw_flip = %f\n", __func__, Tno_bw_flip);
6534 dml2_printf("DML::%s: Tvm_trips_flip = %f\n", __func__, Tvm_trips_flip);
6535 dml2_printf("DML::%s: Tr0_trips_flip = %f\n", __func__, Tr0_trips_flip);
6536 dml2_printf("DML::%s: Tvm_trips_flip_rounded = %f\n", __func__, Tvm_trips_flip_rounded);
6537 dml2_printf("DML::%s: Tr0_trips_flip_rounded = %f\n", __func__, Tr0_trips_flip_rounded);
6538 dml2_printf("DML::%s: vm_bytes = %f\n", __func__, vm_bytes);
6539 dml2_printf("DML::%s: DPTEBytesPerRow = %f\n", __func__, DPTEBytesPerRow);
6540 dml2_printf("DML::%s: meta_row_bytes = %d\n", __func__, meta_row_bytes);
6541 dml2_printf("DML::%s: dpte_row_bytes = %f\n", __func__, l->dpte_row_bytes);
6542 dml2_printf("DML::%s: dpte_row_height = %d\n", __func__, dpte_row_height);
6543 dml2_printf("DML::%s: meta_row_height = %d\n", __func__, meta_row_height);
6544 dml2_printf("DML::%s: VRatio = %f\n", __func__, VRatio);
6545 #endif
6546
6547 if (TotImmediateFlipBytes > 0 && (GPUVMEnable || dcc_mrq_enable)) {
6548 if (l->dual_plane) {
6549 if (dcc_mrq_enable & GPUVMEnable) {
6550 l->min_row_height = math_min2(dpte_row_height, meta_row_height);
6551 l->min_row_height_chroma = math_min2(dpte_row_height_chroma, meta_row_height_chroma);
6552 } else if (GPUVMEnable) {
6553 l->min_row_height = dpte_row_height;
6554 l->min_row_height_chroma = dpte_row_height_chroma;
6555 } else {
6556 l->min_row_height = meta_row_height;
6557 l->min_row_height_chroma = meta_row_height_chroma;
6558 }
6559 l->min_row_time = math_min2(l->min_row_height * LineTime / VRatio, l->min_row_height_chroma * LineTime / VRatioChroma);
6560 } else {
6561 if (dcc_mrq_enable & GPUVMEnable)
6562 l->min_row_height = math_min2(dpte_row_height, meta_row_height);
6563 else if (GPUVMEnable)
6564 l->min_row_height = dpte_row_height;
6565 else
6566 l->min_row_height = meta_row_height;
6567
6568 l->min_row_time = l->min_row_height * LineTime / VRatio;
6569 }
6570 #ifdef __DML_VBA_DEBUG__
6571 dml2_printf("DML::%s: min_row_time = %f\n", __func__, l->min_row_time);
6572 #endif
6573 dml2_assert(l->min_row_time > 0);
6574
6575 if (use_lb_flip_bw) {
6576 // For mode check, calculation the flip bw requirement with worst case flip time
6577 l->max_flip_time = math_min2(math_min2(l->min_row_time, (double)max_flip_time_lines * LineTime / VRatio),
6578 math_max2(Tvm_trips_flip_rounded + 2 * Tr0_trips_flip_rounded, (double)max_flip_time_us));
6579
6580 //The lower bound on flip bandwidth
6581 // Note: The get_urgent_bandwidth_required already consider dpte_row_bw and meta_row_bw in bandwidth calculation, so leave final_flip_bw = 0 if iflip not required
6582 l->lb_flip_bw = 0;
6583
6584 if (iflip_enable) {
6585 l->hvm_scaled_vm_bytes = vm_bytes * HostVMInefficiencyFactor;
6586 l->num_rows = 2;
6587 l->hvm_scaled_row_bytes = (l->num_rows * l->dpte_row_bytes * HostVMInefficiencyFactor + l->num_rows * meta_row_bytes);
6588 l->hvm_scaled_vm_row_bytes = l->hvm_scaled_vm_bytes + l->hvm_scaled_row_bytes;
6589 l->lb_flip_bw = math_max3(
6590 l->hvm_scaled_vm_row_bytes / (l->max_flip_time - Tno_bw_flip),
6591 l->hvm_scaled_vm_bytes / (l->max_flip_time - Tno_bw_flip - 2 * Tr0_trips_flip_rounded),
6592 l->hvm_scaled_row_bytes / (l->max_flip_time - Tvm_trips_flip_rounded));
6593 #ifdef __DML_VBA_DEBUG__
6594 dml2_printf("DML::%s: max_flip_time = %f\n", __func__, l->max_flip_time);
6595 dml2_printf("DML::%s: total vm bytes (hvm ineff scaled) = %f\n", __func__, l->hvm_scaled_vm_bytes);
6596 dml2_printf("DML::%s: total row bytes (%d row, hvm ineff scaled) = %f\n", __func__, l->num_rows, l->hvm_scaled_row_bytes);
6597 dml2_printf("DML::%s: total vm+row bytes (hvm ineff scaled) = %f\n", __func__, l->hvm_scaled_vm_row_bytes);
6598 dml2_printf("DML::%s: lb_flip_bw for vm and row = %f\n", __func__, l->hvm_scaled_vm_row_bytes / (l->max_flip_time - Tno_bw_flip));
6599 dml2_printf("DML::%s: lb_flip_bw for vm = %f\n", __func__, l->hvm_scaled_vm_bytes / (l->max_flip_time - Tno_bw_flip - 2 * Tr0_trips_flip_rounded));
6600 dml2_printf("DML::%s: lb_flip_bw for row = %f\n", __func__, l->hvm_scaled_row_bytes / (l->max_flip_time - Tvm_trips_flip_rounded));
6601
6602 if (l->lb_flip_bw > 0) {
6603 dml2_printf("DML::%s: mode_support est Tvm_flip = %f (bw-based)\n", __func__, Tno_bw_flip + l->hvm_scaled_vm_bytes / l->lb_flip_bw);
6604 dml2_printf("DML::%s: mode_support est Tr0_flip = %f (bw-based)\n", __func__, l->hvm_scaled_row_bytes / l->lb_flip_bw / l->num_rows);
6605 dml2_printf("DML::%s: mode_support est dst_y_per_vm_flip = %f (bw-based)\n", __func__, Tno_bw_flip + l->hvm_scaled_vm_bytes / l->lb_flip_bw / LineTime);
6606 dml2_printf("DML::%s: mode_support est dst_y_per_row_flip = %f (bw-based)\n", __func__, l->hvm_scaled_row_bytes / l->lb_flip_bw / LineTime / l->num_rows);
6607 dml2_printf("DML::%s: Tvm_trips_flip_rounded + 2*Tr0_trips_flip_rounded = %f\n", __func__, (Tvm_trips_flip_rounded + 2 * Tr0_trips_flip_rounded));
6608 }
6609 #endif
6610 l->lb_flip_bw = math_max3(l->lb_flip_bw,
6611 l->hvm_scaled_vm_bytes / (31 * LineTime) - Tno_bw_flip,
6612 (l->dpte_row_bytes * HostVMInefficiencyFactor + meta_row_bytes) / (15 * LineTime));
6613
6614 #ifdef __DML_VBA_DEBUG__
6615 dml2_printf("DML::%s: lb_flip_bw for vm reg limit = %f\n", __func__, l->hvm_scaled_vm_bytes / (31 * LineTime) - Tno_bw_flip);
6616 dml2_printf("DML::%s: lb_flip_bw for row reg limit = %f\n", __func__, (l->dpte_row_bytes * HostVMInefficiencyFactor + meta_row_bytes) / (15 * LineTime));
6617 #endif
6618 }
6619
6620 *final_flip_bw = l->lb_flip_bw;
6621
6622 *dst_y_per_vm_flip = 1; // not used
6623 *dst_y_per_row_flip = 1; // not used
6624 *ImmediateFlipSupportedForPipe = l->min_row_time >= (Tvm_trips_flip_rounded + 2 * Tr0_trips_flip_rounded);
6625 } else {
6626 if (iflip_enable) {
6627 l->ImmediateFlipBW = (double)per_pipe_flip_bytes * BandwidthAvailableForImmediateFlip / (double)TotImmediateFlipBytes; // flip_bw(i)
6628 double portion = (double)per_pipe_flip_bytes / (double)TotImmediateFlipBytes;
6629
6630 #ifdef __DML_VBA_DEBUG__
6631 dml2_printf("DML::%s: per_pipe_flip_bytes = %d\n", __func__, per_pipe_flip_bytes);
6632 dml2_printf("DML::%s: BandwidthAvailableForImmediateFlip = %f\n", __func__, BandwidthAvailableForImmediateFlip);
6633 dml2_printf("DML::%s: ImmediateFlipBW = %f\n", __func__, l->ImmediateFlipBW);
6634 dml2_printf("DML::%s: portion of flip bw = %f\n", __func__, portion);
6635 #endif
6636 if (l->ImmediateFlipBW == 0) {
6637 l->Tvm_flip = 0;
6638 l->Tr0_flip = 0;
6639 } else {
6640 l->Tvm_flip = math_max3(Tvm_trips_flip,
6641 Tno_bw_flip + vm_bytes * HostVMInefficiencyFactor / l->ImmediateFlipBW,
6642 LineTime / 4.0);
6643
6644 l->Tr0_flip = math_max3(Tr0_trips_flip,
6645 (l->dpte_row_bytes * HostVMInefficiencyFactor + meta_row_bytes) / l->ImmediateFlipBW,
6646 LineTime / 4.0);
6647 }
6648 #ifdef __DML_VBA_DEBUG__
6649 dml2_printf("DML::%s: total vm bytes (hvm ineff scaled) = %f\n", __func__, vm_bytes * HostVMInefficiencyFactor);
6650 dml2_printf("DML::%s: total row bytes (hvm ineff scaled, one row) = %f\n", __func__, (l->dpte_row_bytes * HostVMInefficiencyFactor + meta_row_bytes));
6651
6652 dml2_printf("DML::%s: Tvm_flip = %f (bw-based), Tvm_trips_flip = %f (latency-based)\n", __func__, Tno_bw_flip + vm_bytes * HostVMInefficiencyFactor / l->ImmediateFlipBW, Tvm_trips_flip);
6653 dml2_printf("DML::%s: Tr0_flip = %f (bw-based), Tr0_trips_flip = %f (latency-based)\n", __func__, (l->dpte_row_bytes * HostVMInefficiencyFactor + meta_row_bytes) / l->ImmediateFlipBW, Tr0_trips_flip);
6654 #endif
6655 *dst_y_per_vm_flip = math_ceil2(4.0 * (l->Tvm_flip / LineTime), 1.0) / 4.0;
6656 *dst_y_per_row_flip = math_ceil2(4.0 * (l->Tr0_flip / LineTime), 1.0) / 4.0;
6657
6658 *final_flip_bw = math_max2(vm_bytes * HostVMInefficiencyFactor / (*dst_y_per_vm_flip * LineTime),
6659 (l->dpte_row_bytes * HostVMInefficiencyFactor + meta_row_bytes) / (*dst_y_per_row_flip * LineTime));
6660
6661 if (*dst_y_per_vm_flip >= 32 || *dst_y_per_row_flip >= 16 || l->Tvm_flip + 2 * l->Tr0_flip > l->min_row_time) {
6662 *ImmediateFlipSupportedForPipe = false;
6663 } else {
6664 *ImmediateFlipSupportedForPipe = iflip_enable;
6665 }
6666 } else {
6667 l->Tvm_flip = 0;
6668 l->Tr0_flip = 0;
6669 *dst_y_per_vm_flip = 0;
6670 *dst_y_per_row_flip = 0;
6671 *final_flip_bw = 0;
6672 *ImmediateFlipSupportedForPipe = iflip_enable;
6673 }
6674 }
6675 } else {
6676 l->Tvm_flip = 0;
6677 l->Tr0_flip = 0;
6678 *dst_y_per_vm_flip = 0;
6679 *dst_y_per_row_flip = 0;
6680 *final_flip_bw = 0;
6681 *ImmediateFlipSupportedForPipe = iflip_enable;
6682 }
6683
6684 #ifdef __DML_VBA_DEBUG__
6685 if (!use_lb_flip_bw) {
6686 dml2_printf("DML::%s: dst_y_per_vm_flip = %f (should be < 32)\n", __func__, *dst_y_per_vm_flip);
6687 dml2_printf("DML::%s: dst_y_per_row_flip = %f (should be < 16)\n", __func__, *dst_y_per_row_flip);
6688 dml2_printf("DML::%s: Tvm_flip = %f (final)\n", __func__, l->Tvm_flip);
6689 dml2_printf("DML::%s: Tr0_flip = %f (final)\n", __func__, l->Tr0_flip);
6690 dml2_printf("DML::%s: Tvm_flip + 2*Tr0_flip = %f (should be <= min_row_time=%f)\n", __func__, l->Tvm_flip + 2 * l->Tr0_flip, l->min_row_time);
6691 }
6692 dml2_printf("DML::%s: final_flip_bw = %f\n", __func__, *final_flip_bw);
6693 dml2_printf("DML::%s: ImmediateFlipSupportedForPipe = %u\n", __func__, *ImmediateFlipSupportedForPipe);
6694 #endif
6695 }
6696
CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(struct dml2_core_internal_scratch * scratch,struct dml2_core_calcs_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params * p)6697 static void CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(
6698 struct dml2_core_internal_scratch *scratch,
6699 struct dml2_core_calcs_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params *p)
6700 {
6701 struct dml2_core_calcs_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_locals *s = &scratch->CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_locals;
6702
6703 enum dml2_uclk_pstate_change_strategy uclk_pstate_change_strategy;
6704 double reserved_vblank_time_us;
6705 bool FoundCriticalSurface = false;
6706
6707 s->TotalActiveWriteback = 0;
6708 p->Watermark->UrgentWatermark = p->mmSOCParameters.UrgentLatency + p->mmSOCParameters.ExtraLatency;
6709
6710 #ifdef __DML_VBA_DEBUG__
6711 dml2_printf("DML::%s: UrgentWatermark = %f\n", __func__, p->Watermark->UrgentWatermark);
6712 #endif
6713
6714 p->Watermark->USRRetrainingWatermark = p->mmSOCParameters.UrgentLatency + p->mmSOCParameters.ExtraLatency + p->mmSOCParameters.USRRetrainingLatency + p->mmSOCParameters.SMNLatency;
6715 p->Watermark->DRAMClockChangeWatermark = p->mmSOCParameters.DRAMClockChangeLatency + p->Watermark->UrgentWatermark;
6716 p->Watermark->FCLKChangeWatermark = p->mmSOCParameters.FCLKChangeLatency + p->Watermark->UrgentWatermark;
6717 p->Watermark->StutterExitWatermark = p->mmSOCParameters.SRExitTime + p->mmSOCParameters.ExtraLatency_sr + 10 / p->DCFClkDeepSleep;
6718 p->Watermark->StutterEnterPlusExitWatermark = p->mmSOCParameters.SREnterPlusExitTime + p->mmSOCParameters.ExtraLatency_sr + 10 / p->DCFClkDeepSleep;
6719 p->Watermark->Z8StutterExitWatermark = p->mmSOCParameters.SRExitZ8Time + p->mmSOCParameters.ExtraLatency_sr + 10 / p->DCFClkDeepSleep;
6720 p->Watermark->Z8StutterEnterPlusExitWatermark = p->mmSOCParameters.SREnterPlusExitZ8Time + p->mmSOCParameters.ExtraLatency_sr + 10 / p->DCFClkDeepSleep;
6721 if (p->mmSOCParameters.qos_type == dml2_qos_param_type_dcn4x) {
6722 p->Watermark->StutterExitWatermark += p->mmSOCParameters.max_urgent_latency_us + p->mmSOCParameters.df_response_time_us;
6723 p->Watermark->StutterEnterPlusExitWatermark += p->mmSOCParameters.max_urgent_latency_us + p->mmSOCParameters.df_response_time_us;
6724 p->Watermark->Z8StutterExitWatermark += p->mmSOCParameters.max_urgent_latency_us + p->mmSOCParameters.df_response_time_us;
6725 p->Watermark->Z8StutterEnterPlusExitWatermark += p->mmSOCParameters.max_urgent_latency_us + p->mmSOCParameters.df_response_time_us;
6726 }
6727 p->Watermark->temp_read_or_ppt_watermark_us = p->mmSOCParameters.g6_temp_read_blackout_us + p->Watermark->UrgentWatermark;
6728
6729 #ifdef __DML_VBA_DEBUG__
6730 dml2_printf("DML::%s: UrgentLatency = %f\n", __func__, p->mmSOCParameters.UrgentLatency);
6731 dml2_printf("DML::%s: ExtraLatency = %f\n", __func__, p->mmSOCParameters.ExtraLatency);
6732 dml2_printf("DML::%s: DRAMClockChangeLatency = %f\n", __func__, p->mmSOCParameters.DRAMClockChangeLatency);
6733 dml2_printf("DML::%s: SREnterPlusExitZ8Time = %f\n", __func__, p->mmSOCParameters.SREnterPlusExitZ8Time);
6734 dml2_printf("DML::%s: SREnterPlusExitTime = %f\n", __func__, p->mmSOCParameters.SREnterPlusExitTime);
6735 dml2_printf("DML::%s: UrgentWatermark = %f\n", __func__, p->Watermark->UrgentWatermark);
6736 dml2_printf("DML::%s: USRRetrainingWatermark = %f\n", __func__, p->Watermark->USRRetrainingWatermark);
6737 dml2_printf("DML::%s: DRAMClockChangeWatermark = %f\n", __func__, p->Watermark->DRAMClockChangeWatermark);
6738 dml2_printf("DML::%s: FCLKChangeWatermark = %f\n", __func__, p->Watermark->FCLKChangeWatermark);
6739 dml2_printf("DML::%s: StutterExitWatermark = %f\n", __func__, p->Watermark->StutterExitWatermark);
6740 dml2_printf("DML::%s: StutterEnterPlusExitWatermark = %f\n", __func__, p->Watermark->StutterEnterPlusExitWatermark);
6741 dml2_printf("DML::%s: Z8StutterExitWatermark = %f\n", __func__, p->Watermark->Z8StutterExitWatermark);
6742 dml2_printf("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, p->Watermark->Z8StutterEnterPlusExitWatermark);
6743 dml2_printf("DML::%s: temp_read_or_ppt_watermark_us = %f\n", __func__, p->Watermark->temp_read_or_ppt_watermark_us);
6744 #endif
6745
6746 s->TotalActiveWriteback = 0;
6747 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
6748 if (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0) {
6749 s->TotalActiveWriteback = s->TotalActiveWriteback + 1;
6750 }
6751 }
6752
6753 if (s->TotalActiveWriteback <= 1) {
6754 p->Watermark->WritebackUrgentWatermark = p->mmSOCParameters.WritebackLatency;
6755 } else {
6756 p->Watermark->WritebackUrgentWatermark = p->mmSOCParameters.WritebackLatency + p->WritebackChunkSize * 1024.0 / 32.0 / p->SOCCLK;
6757 }
6758 if (p->USRRetrainingRequired)
6759 p->Watermark->WritebackUrgentWatermark = p->Watermark->WritebackUrgentWatermark + p->mmSOCParameters.USRRetrainingLatency;
6760
6761 if (s->TotalActiveWriteback <= 1) {
6762 p->Watermark->WritebackDRAMClockChangeWatermark = p->mmSOCParameters.DRAMClockChangeLatency + p->mmSOCParameters.WritebackLatency;
6763 p->Watermark->WritebackFCLKChangeWatermark = p->mmSOCParameters.FCLKChangeLatency + p->mmSOCParameters.WritebackLatency;
6764 } else {
6765 p->Watermark->WritebackDRAMClockChangeWatermark = p->mmSOCParameters.DRAMClockChangeLatency + p->mmSOCParameters.WritebackLatency + p->WritebackChunkSize * 1024.0 / 32.0 / p->SOCCLK;
6766 p->Watermark->WritebackFCLKChangeWatermark = p->mmSOCParameters.FCLKChangeLatency + p->mmSOCParameters.WritebackLatency + p->WritebackChunkSize * 1024 / 32 / p->SOCCLK;
6767 }
6768
6769 if (p->USRRetrainingRequired)
6770 p->Watermark->WritebackDRAMClockChangeWatermark = p->Watermark->WritebackDRAMClockChangeWatermark + p->mmSOCParameters.USRRetrainingLatency;
6771
6772 if (p->USRRetrainingRequired)
6773 p->Watermark->WritebackFCLKChangeWatermark = p->Watermark->WritebackFCLKChangeWatermark + p->mmSOCParameters.USRRetrainingLatency;
6774
6775 #ifdef __DML_VBA_DEBUG__
6776 dml2_printf("DML::%s: WritebackDRAMClockChangeWatermark = %f\n", __func__, p->Watermark->WritebackDRAMClockChangeWatermark);
6777 dml2_printf("DML::%s: WritebackFCLKChangeWatermark = %f\n", __func__, p->Watermark->WritebackFCLKChangeWatermark);
6778 dml2_printf("DML::%s: WritebackUrgentWatermark = %f\n", __func__, p->Watermark->WritebackUrgentWatermark);
6779 dml2_printf("DML::%s: USRRetrainingRequired = %u\n", __func__, p->USRRetrainingRequired);
6780 dml2_printf("DML::%s: USRRetrainingLatency = %f\n", __func__, p->mmSOCParameters.USRRetrainingLatency);
6781 #endif
6782
6783 s->TotalPixelBW = 0.0;
6784 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
6785 double h_total = (double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total;
6786 double pixel_clock_mhz = p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000.0;
6787 double v_ratio = p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio;
6788 double v_ratio_c = p->display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio;
6789 s->TotalPixelBW = s->TotalPixelBW + p->DPPPerSurface[k]
6790 * (p->SwathWidthY[k] * p->BytePerPixelDETY[k] * v_ratio + p->SwathWidthC[k] * p->BytePerPixelDETC[k] * v_ratio_c) / (h_total / pixel_clock_mhz);
6791 }
6792
6793 *p->global_fclk_change_supported = true;
6794 *p->global_dram_clock_change_supported = true;
6795
6796 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
6797 double h_total = (double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total;
6798 double pixel_clock_mhz = p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000.0;
6799 double v_ratio = p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio;
6800 double v_ratio_c = p->display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio;
6801 double v_taps = p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps;
6802 double v_taps_c = p->display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps;
6803 double h_ratio = p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio;
6804 double h_ratio_c = p->display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio;
6805 double LBBitPerPixel = 57;
6806
6807 s->LBLatencyHidingSourceLinesY[k] = (unsigned int)(math_min2((double)p->MaxLineBufferLines, math_floor2((double)p->LineBufferSize / LBBitPerPixel / ((double)p->SwathWidthY[k] / math_max2(h_ratio, 1.0)), 1)) - (v_taps - 1));
6808 s->LBLatencyHidingSourceLinesC[k] = (unsigned int)(math_min2((double)p->MaxLineBufferLines, math_floor2((double)p->LineBufferSize / LBBitPerPixel / ((double)p->SwathWidthC[k] / math_max2(h_ratio_c, 1.0)), 1)) - (v_taps_c - 1));
6809
6810 #ifdef __DML_VBA_DEBUG__
6811 dml2_printf("DML::%s: k=%u, MaxLineBufferLines = %u\n", __func__, k, p->MaxLineBufferLines);
6812 dml2_printf("DML::%s: k=%u, LineBufferSize = %u\n", __func__, k, p->LineBufferSize);
6813 dml2_printf("DML::%s: k=%u, LBBitPerPixel = %u\n", __func__, k, LBBitPerPixel);
6814 dml2_printf("DML::%s: k=%u, HRatio = %f\n", __func__, k, h_ratio);
6815 dml2_printf("DML::%s: k=%u, VTaps = %f\n", __func__, k, v_taps);
6816 #endif
6817
6818 s->EffectiveLBLatencyHidingY = s->LBLatencyHidingSourceLinesY[k] / v_ratio * (h_total / pixel_clock_mhz);
6819 s->EffectiveLBLatencyHidingC = s->LBLatencyHidingSourceLinesC[k] / v_ratio_c * (h_total / pixel_clock_mhz);
6820
6821 s->EffectiveDETBufferSizeY = p->DETBufferSizeY[k];
6822 if (p->UnboundedRequestEnabled) {
6823 s->EffectiveDETBufferSizeY = s->EffectiveDETBufferSizeY + p->CompressedBufferSizeInkByte * 1024 * (p->SwathWidthY[k] * p->BytePerPixelDETY[k] * v_ratio) / (h_total / pixel_clock_mhz) / s->TotalPixelBW;
6824 }
6825
6826 s->LinesInDETY[k] = (double)s->EffectiveDETBufferSizeY / p->BytePerPixelDETY[k] / p->SwathWidthY[k];
6827 s->LinesInDETYRoundedDownToSwath[k] = (unsigned int)(math_floor2(s->LinesInDETY[k], p->SwathHeightY[k]));
6828 s->FullDETBufferingTimeY = s->LinesInDETYRoundedDownToSwath[k] * (h_total / pixel_clock_mhz) / v_ratio;
6829
6830 s->ActiveClockChangeLatencyHidingY = s->EffectiveLBLatencyHidingY + s->FullDETBufferingTimeY - ((double)p->DSTXAfterScaler[k] / h_total + (double)p->DSTYAfterScaler[k]) * h_total / pixel_clock_mhz;
6831
6832 if (p->NumberOfActiveSurfaces > 1) {
6833 s->ActiveClockChangeLatencyHidingY = s->ActiveClockChangeLatencyHidingY - (1.0 - 1.0 / (double)p->NumberOfActiveSurfaces) * (double)p->SwathHeightY[k] * (double)h_total / pixel_clock_mhz / v_ratio;
6834 }
6835
6836 if (p->BytePerPixelDETC[k] > 0) {
6837 s->LinesInDETC[k] = p->DETBufferSizeC[k] / p->BytePerPixelDETC[k] / p->SwathWidthC[k];
6838 s->LinesInDETCRoundedDownToSwath[k] = (unsigned int)(math_floor2(s->LinesInDETC[k], p->SwathHeightC[k]));
6839 s->FullDETBufferingTimeC = s->LinesInDETCRoundedDownToSwath[k] * (h_total / pixel_clock_mhz) / v_ratio_c;
6840 s->ActiveClockChangeLatencyHidingC = s->EffectiveLBLatencyHidingC + s->FullDETBufferingTimeC - ((double)p->DSTXAfterScaler[k] / (double)h_total + (double)p->DSTYAfterScaler[k]) * (double)h_total / pixel_clock_mhz;
6841 if (p->NumberOfActiveSurfaces > 1) {
6842 s->ActiveClockChangeLatencyHidingC = s->ActiveClockChangeLatencyHidingC - (1.0 - 1.0 / (double)p->NumberOfActiveSurfaces) * (double)p->SwathHeightC[k] * (double)h_total / pixel_clock_mhz / v_ratio_c;
6843 }
6844 s->ActiveClockChangeLatencyHiding = math_min2(s->ActiveClockChangeLatencyHidingY, s->ActiveClockChangeLatencyHidingC);
6845 } else {
6846 s->ActiveClockChangeLatencyHiding = s->ActiveClockChangeLatencyHidingY;
6847 }
6848
6849 s->ActiveDRAMClockChangeLatencyMargin[k] = s->ActiveClockChangeLatencyHiding - p->Watermark->DRAMClockChangeWatermark;
6850 s->ActiveFCLKChangeLatencyMargin[k] = s->ActiveClockChangeLatencyHiding - p->Watermark->FCLKChangeWatermark;
6851 s->USRRetrainingLatencyMargin[k] = s->ActiveClockChangeLatencyHiding - p->Watermark->USRRetrainingWatermark;
6852 s->g6_temp_read_latency_margin[k] = s->ActiveClockChangeLatencyHiding - p->Watermark->temp_read_or_ppt_watermark_us;
6853
6854 if (p->VActiveLatencyHidingMargin)
6855 p->VActiveLatencyHidingMargin[k] = s->ActiveDRAMClockChangeLatencyMargin[k];
6856
6857 if (p->VActiveLatencyHidingUs)
6858 p->VActiveLatencyHidingUs[k] = s->ActiveClockChangeLatencyHiding;
6859
6860 if (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0) {
6861 s->WritebackLatencyHiding = (double)p->WritebackInterfaceBufferSize * 1024.0
6862 / ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_height
6863 * (double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_width
6864 / ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].input_height * (double)h_total / pixel_clock_mhz) * 4.0);
6865 if (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].pixel_format == dml2_444_64) {
6866 s->WritebackLatencyHiding = s->WritebackLatencyHiding / 2;
6867 }
6868 s->WritebackDRAMClockChangeLatencyMargin = s->WritebackLatencyHiding - p->Watermark->WritebackDRAMClockChangeWatermark;
6869
6870 s->WritebackFCLKChangeLatencyMargin = s->WritebackLatencyHiding - p->Watermark->WritebackFCLKChangeWatermark;
6871
6872 s->ActiveDRAMClockChangeLatencyMargin[k] = math_min2(s->ActiveDRAMClockChangeLatencyMargin[k], s->WritebackDRAMClockChangeLatencyMargin);
6873 s->ActiveFCLKChangeLatencyMargin[k] = math_min2(s->ActiveFCLKChangeLatencyMargin[k], s->WritebackFCLKChangeLatencyMargin);
6874 }
6875 p->MaxActiveDRAMClockChangeLatencySupported[k] = dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k]) ? 0 : (s->ActiveDRAMClockChangeLatencyMargin[k] + p->mmSOCParameters.DRAMClockChangeLatency);
6876
6877 uclk_pstate_change_strategy = p->display_cfg->plane_descriptors[k].overrides.uclk_pstate_change_strategy;
6878 reserved_vblank_time_us = (double)p->display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns / 1000;
6879
6880 p->FCLKChangeSupport[k] = dml2_pstate_change_unsupported;
6881 if (s->ActiveFCLKChangeLatencyMargin[k] > 0)
6882 p->FCLKChangeSupport[k] = dml2_pstate_change_vactive;
6883 else if (reserved_vblank_time_us >= p->mmSOCParameters.FCLKChangeLatency)
6884 p->FCLKChangeSupport[k] = dml2_pstate_change_vblank;
6885
6886 if (p->FCLKChangeSupport[k] == dml2_pstate_change_unsupported)
6887 *p->global_fclk_change_supported = false;
6888
6889 p->DRAMClockChangeSupport[k] = dml2_pstate_change_unsupported;
6890 if (uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_auto) {
6891 if (p->display_cfg->overrides.all_streams_blanked ||
6892 (s->ActiveDRAMClockChangeLatencyMargin[k] > 0 && reserved_vblank_time_us >= p->mmSOCParameters.DRAMClockChangeLatency))
6893 p->DRAMClockChangeSupport[k] = dml2_pstate_change_vblank_and_vactive;
6894 else if (s->ActiveDRAMClockChangeLatencyMargin[k] > 0)
6895 p->DRAMClockChangeSupport[k] = dml2_pstate_change_vactive;
6896 else if (reserved_vblank_time_us >= p->mmSOCParameters.DRAMClockChangeLatency)
6897 p->DRAMClockChangeSupport[k] = dml2_pstate_change_vblank;
6898 } else if (uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_vactive && s->ActiveDRAMClockChangeLatencyMargin[k] > 0)
6899 p->DRAMClockChangeSupport[k] = dml2_pstate_change_vactive;
6900 else if (uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_vblank && reserved_vblank_time_us >= p->mmSOCParameters.DRAMClockChangeLatency)
6901 p->DRAMClockChangeSupport[k] = dml2_pstate_change_vblank;
6902 else if (uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_drr)
6903 p->DRAMClockChangeSupport[k] = dml2_pstate_change_drr;
6904 else if (uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_mall_svp)
6905 p->DRAMClockChangeSupport[k] = dml2_pstate_change_mall_svp;
6906 else if (uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_mall_full_frame)
6907 p->DRAMClockChangeSupport[k] = dml2_pstate_change_mall_full_frame;
6908
6909 if (p->DRAMClockChangeSupport[k] == dml2_pstate_change_unsupported)
6910 *p->global_dram_clock_change_supported = false;
6911
6912 s->dst_y_pstate = (unsigned int)(math_ceil2((p->mmSOCParameters.DRAMClockChangeLatency + p->mmSOCParameters.UrgentLatency) / (h_total / pixel_clock_mhz), 1));
6913 s->src_y_pstate_l = (unsigned int)(math_ceil2(s->dst_y_pstate * v_ratio, p->SwathHeightY[k]));
6914 s->src_y_ahead_l = (unsigned int)(math_floor2(p->DETBufferSizeY[k] / p->BytePerPixelDETY[k] / p->SwathWidthY[k], p->SwathHeightY[k]) + s->LBLatencyHidingSourceLinesY[k]);
6915 s->sub_vp_lines_l = s->src_y_pstate_l + s->src_y_ahead_l + p->meta_row_height_l[k];
6916
6917 #ifdef __DML_VBA_DEBUG__
6918 dml2_printf("DML::%s: k=%u, DETBufferSizeY = %u\n", __func__, k, p->DETBufferSizeY[k]);
6919 dml2_printf("DML::%s: k=%u, BytePerPixelDETY = %f\n", __func__, k, p->BytePerPixelDETY[k]);
6920 dml2_printf("DML::%s: k=%u, SwathWidthY = %u\n", __func__, k, p->SwathWidthY[k]);
6921 dml2_printf("DML::%s: k=%u, SwathHeightY = %u\n", __func__, k, p->SwathHeightY[k]);
6922 dml2_printf("DML::%s: k=%u, LBLatencyHidingSourceLinesY = %u\n", __func__, k, s->LBLatencyHidingSourceLinesY[k]);
6923 dml2_printf("DML::%s: k=%u, dst_y_pstate = %u\n", __func__, k, s->dst_y_pstate);
6924 dml2_printf("DML::%s: k=%u, src_y_pstate_l = %u\n", __func__, k, s->src_y_pstate_l);
6925 dml2_printf("DML::%s: k=%u, src_y_ahead_l = %u\n", __func__, k, s->src_y_ahead_l);
6926 dml2_printf("DML::%s: k=%u, meta_row_height_l = %u\n", __func__, k, p->meta_row_height_l[k]);
6927 dml2_printf("DML::%s: k=%u, sub_vp_lines_l = %u\n", __func__, k, s->sub_vp_lines_l);
6928 #endif
6929 p->SubViewportLinesNeededInMALL[k] = s->sub_vp_lines_l;
6930
6931 if (p->BytePerPixelDETC[k] > 0) {
6932 s->src_y_pstate_c = (unsigned int)(math_ceil2(s->dst_y_pstate * v_ratio_c, p->SwathHeightC[k]));
6933 s->src_y_ahead_c = (unsigned int)(math_floor2(p->DETBufferSizeC[k] / p->BytePerPixelDETC[k] / p->SwathWidthC[k], p->SwathHeightC[k]) + s->LBLatencyHidingSourceLinesC[k]);
6934 s->sub_vp_lines_c = s->src_y_pstate_c + s->src_y_ahead_c + p->meta_row_height_c[k];
6935
6936 if (dml_is_420(p->display_cfg->plane_descriptors[k].pixel_format))
6937 p->SubViewportLinesNeededInMALL[k] = (unsigned int)(math_max2(s->sub_vp_lines_l, 2 * s->sub_vp_lines_c));
6938 else
6939 p->SubViewportLinesNeededInMALL[k] = (unsigned int)(math_max2(s->sub_vp_lines_l, s->sub_vp_lines_c));
6940
6941 #ifdef __DML_VBA_DEBUG__
6942 dml2_printf("DML::%s: k=%u, meta_row_height_c = %u\n", __func__, k, p->meta_row_height_c[k]);
6943 dml2_printf("DML::%s: k=%u, src_y_pstate_c = %u\n", __func__, k, s->src_y_pstate_c);
6944 dml2_printf("DML::%s: k=%u, src_y_ahead_c = %u\n", __func__, k, s->src_y_ahead_c);
6945 dml2_printf("DML::%s: k=%u, sub_vp_lines_c = %u\n", __func__, k, s->sub_vp_lines_c);
6946 #endif
6947 }
6948 }
6949
6950 *p->g6_temp_read_support = true;
6951 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
6952 if ((!dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k])) &&
6953 (s->g6_temp_read_latency_margin[k] < 0)) {
6954 *p->g6_temp_read_support = false;
6955 }
6956 }
6957
6958 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
6959 if ((!dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k])) && ((!FoundCriticalSurface)
6960 || ((s->ActiveFCLKChangeLatencyMargin[k] + p->mmSOCParameters.FCLKChangeLatency) < *p->MaxActiveFCLKChangeLatencySupported))) {
6961 FoundCriticalSurface = true;
6962 *p->MaxActiveFCLKChangeLatencySupported = s->ActiveFCLKChangeLatencyMargin[k] + p->mmSOCParameters.FCLKChangeLatency;
6963 }
6964 }
6965
6966 #ifdef __DML_VBA_DEBUG__
6967 dml2_printf("DML::%s: DRAMClockChangeSupport = %u\n", __func__, *p->global_dram_clock_change_supported);
6968 dml2_printf("DML::%s: FCLKChangeSupport = %u\n", __func__, *p->global_fclk_change_supported);
6969 dml2_printf("DML::%s: MaxActiveFCLKChangeLatencySupported = %f\n", __func__, *p->MaxActiveFCLKChangeLatencySupported);
6970 dml2_printf("DML::%s: USRRetrainingSupport = %u\n", __func__, *p->USRRetrainingSupport);
6971 #endif
6972 }
6973
calculate_bytes_to_fetch_required_to_hide_latency(struct dml2_core_calcs_calculate_bytes_to_fetch_required_to_hide_latency_params * p)6974 static void calculate_bytes_to_fetch_required_to_hide_latency(
6975 struct dml2_core_calcs_calculate_bytes_to_fetch_required_to_hide_latency_params *p)
6976 {
6977 unsigned int dst_lines_to_hide;
6978 unsigned int src_lines_to_hide_l;
6979 unsigned int src_lines_to_hide_c;
6980 unsigned int plane_index;
6981 unsigned int stream_index;
6982
6983 for (plane_index = 0; plane_index < p->num_active_planes; plane_index++) {
6984 if (dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[plane_index]))
6985 continue;
6986
6987 stream_index = p->display_cfg->plane_descriptors[plane_index].stream_index;
6988
6989 dst_lines_to_hide = (unsigned int)math_ceil(p->latency_to_hide_us /
6990 ((double)p->display_cfg->stream_descriptors[stream_index].timing.h_total /
6991 (double)p->display_cfg->stream_descriptors[stream_index].timing.pixel_clock_khz * 1000.0));
6992
6993 src_lines_to_hide_l = (unsigned int)math_ceil2(p->display_cfg->plane_descriptors[plane_index].composition.scaler_info.plane0.v_ratio * dst_lines_to_hide,
6994 p->swath_height_l[plane_index]);
6995 p->bytes_required_l[plane_index] = src_lines_to_hide_l * p->num_of_dpp[plane_index] * p->swath_width_l[plane_index] * p->byte_per_pix_l[plane_index];
6996
6997 src_lines_to_hide_c = (unsigned int)math_ceil2(p->display_cfg->plane_descriptors[plane_index].composition.scaler_info.plane1.v_ratio * dst_lines_to_hide,
6998 p->swath_height_c[plane_index]);
6999 p->bytes_required_c[plane_index] = src_lines_to_hide_c * p->num_of_dpp[plane_index] * p->swath_width_c[plane_index] * p->byte_per_pix_c[plane_index];
7000
7001 if (p->display_cfg->plane_descriptors[plane_index].surface.dcc.enable && p->mrq_present) {
7002 p->bytes_required_l[plane_index] += (unsigned int)math_ceil((double)src_lines_to_hide_l / p->meta_row_height_l[plane_index]) * p->meta_row_bytes_per_row_ub_l[plane_index];
7003 if (p->meta_row_height_c[plane_index]) {
7004 p->bytes_required_c[plane_index] += (unsigned int)math_ceil((double)src_lines_to_hide_c / p->meta_row_height_c[plane_index]) * p->meta_row_bytes_per_row_ub_c[plane_index];
7005 }
7006 }
7007
7008 if (p->display_cfg->gpuvm_enable == true) {
7009 p->bytes_required_l[plane_index] += (unsigned int)math_ceil((double)src_lines_to_hide_l / p->dpte_row_height_l[plane_index]) * p->dpte_bytes_per_row_l[plane_index];
7010 if (p->dpte_row_height_c[plane_index]) {
7011 p->bytes_required_c[plane_index] += (unsigned int)math_ceil((double)src_lines_to_hide_c / p->dpte_row_height_c[plane_index]) * p->dpte_bytes_per_row_c[plane_index];
7012 }
7013 }
7014 }
7015 }
7016
calculate_vactive_det_fill_latency(const struct dml2_display_cfg * display_cfg,unsigned int num_active_planes,unsigned int bytes_required_l[],unsigned int bytes_required_c[],double dcc_dram_bw_nom_overhead_factor_p0[],double dcc_dram_bw_nom_overhead_factor_p1[],double surface_read_bw_l[],double surface_read_bw_c[],double (* surface_avg_vactive_required_bw)[dml2_core_internal_bw_max][DML2_MAX_PLANES],double (* surface_peak_required_bw)[dml2_core_internal_bw_max][DML2_MAX_PLANES],double vactive_det_fill_delay_us[])7017 static noinline_for_stack void calculate_vactive_det_fill_latency(
7018 const struct dml2_display_cfg *display_cfg,
7019 unsigned int num_active_planes,
7020 unsigned int bytes_required_l[],
7021 unsigned int bytes_required_c[],
7022 double dcc_dram_bw_nom_overhead_factor_p0[],
7023 double dcc_dram_bw_nom_overhead_factor_p1[],
7024 double surface_read_bw_l[],
7025 double surface_read_bw_c[],
7026 double (*surface_avg_vactive_required_bw)[dml2_core_internal_bw_max][DML2_MAX_PLANES],
7027 double (*surface_peak_required_bw)[dml2_core_internal_bw_max][DML2_MAX_PLANES],
7028 /* output */
7029 double vactive_det_fill_delay_us[])
7030 {
7031 double effective_excess_bandwidth;
7032 double effective_excess_bandwidth_l;
7033 double effective_excess_bandwidth_c;
7034 double adj_factor;
7035 unsigned int plane_index;
7036 unsigned int soc_state;
7037 unsigned int bw_type;
7038
7039 for (plane_index = 0; plane_index < num_active_planes; plane_index++) {
7040 if (dml_is_phantom_pipe(&display_cfg->plane_descriptors[plane_index]))
7041 continue;
7042
7043 vactive_det_fill_delay_us[plane_index] = 0.0;
7044 for (soc_state = 0; soc_state < dml2_core_internal_soc_state_max; soc_state++) {
7045 for (bw_type = 0; bw_type < dml2_core_internal_bw_max; bw_type++) {
7046 effective_excess_bandwidth = (surface_peak_required_bw[soc_state][bw_type][plane_index] - surface_avg_vactive_required_bw[soc_state][bw_type][plane_index]);
7047
7048 /* luma */
7049 adj_factor = bw_type == dml2_core_internal_bw_dram ? dcc_dram_bw_nom_overhead_factor_p0[plane_index] : 1.0;
7050
7051 effective_excess_bandwidth_l = effective_excess_bandwidth * surface_read_bw_l[plane_index] / (surface_read_bw_l[plane_index] + surface_read_bw_c[plane_index]) / adj_factor;
7052 if (effective_excess_bandwidth_l > 0.0) {
7053 vactive_det_fill_delay_us[plane_index] = math_max2(vactive_det_fill_delay_us[plane_index], bytes_required_l[plane_index] / effective_excess_bandwidth_l);
7054 }
7055
7056 /* chroma */
7057 adj_factor = bw_type == dml2_core_internal_bw_dram ? dcc_dram_bw_nom_overhead_factor_p1[plane_index] : 1.0;
7058
7059 effective_excess_bandwidth_c = effective_excess_bandwidth * surface_read_bw_c[plane_index] / (surface_read_bw_l[plane_index] + surface_read_bw_c[plane_index]) / adj_factor;
7060 if (effective_excess_bandwidth_c > 0.0) {
7061 vactive_det_fill_delay_us[plane_index] = math_max2(vactive_det_fill_delay_us[plane_index], bytes_required_c[plane_index] / effective_excess_bandwidth_c);
7062 }
7063 }
7064 }
7065 }
7066 }
7067
calculate_excess_vactive_bandwidth_required(const struct dml2_display_cfg * display_cfg,unsigned int num_active_planes,unsigned int bytes_required_l[],unsigned int bytes_required_c[],double excess_vactive_fill_bw_l[],double excess_vactive_fill_bw_c[])7068 static void calculate_excess_vactive_bandwidth_required(
7069 const struct dml2_display_cfg *display_cfg,
7070 unsigned int num_active_planes,
7071 unsigned int bytes_required_l[],
7072 unsigned int bytes_required_c[],
7073 /* outputs */
7074 double excess_vactive_fill_bw_l[],
7075 double excess_vactive_fill_bw_c[])
7076 {
7077 unsigned int plane_index;
7078
7079 for (plane_index = 0; plane_index < num_active_planes; plane_index++) {
7080 if (dml_is_phantom_pipe(&display_cfg->plane_descriptors[plane_index]))
7081 continue;
7082
7083 excess_vactive_fill_bw_l[plane_index] = 0.0;
7084 excess_vactive_fill_bw_c[plane_index] = 0.0;
7085
7086 if (display_cfg->plane_descriptors[plane_index].overrides.max_vactive_det_fill_delay_us > 0) {
7087 excess_vactive_fill_bw_l[plane_index] = (double)bytes_required_l[plane_index] / (double)display_cfg->plane_descriptors[plane_index].overrides.max_vactive_det_fill_delay_us;
7088 excess_vactive_fill_bw_c[plane_index] = (double)bytes_required_c[plane_index] / (double)display_cfg->plane_descriptors[plane_index].overrides.max_vactive_det_fill_delay_us;
7089 }
7090 }
7091 }
7092
uclk_khz_to_dram_bw_mbps(unsigned long uclk_khz,const struct dml2_dram_params * dram_config)7093 static double uclk_khz_to_dram_bw_mbps(unsigned long uclk_khz, const struct dml2_dram_params *dram_config)
7094 {
7095 double bw_mbps = 0;
7096 bw_mbps = ((double)uclk_khz * dram_config->channel_count * dram_config->channel_width_bytes * dram_config->transactions_per_clock) / 1000.0;
7097
7098 return bw_mbps;
7099 }
7100
dram_bw_kbps_to_uclk_mhz(unsigned long long bw_kbps,const struct dml2_dram_params * dram_config)7101 static double dram_bw_kbps_to_uclk_mhz(unsigned long long bw_kbps, const struct dml2_dram_params *dram_config)
7102 {
7103 double uclk_mhz = 0;
7104
7105 uclk_mhz = (double)bw_kbps / (dram_config->channel_count * dram_config->channel_width_bytes * dram_config->transactions_per_clock) / 1000.0;
7106
7107 return uclk_mhz;
7108 }
7109
get_qos_param_index(unsigned long uclk_freq_khz,const struct dml2_dcn4_uclk_dpm_dependent_qos_params * per_uclk_dpm_params)7110 static unsigned int get_qos_param_index(unsigned long uclk_freq_khz, const struct dml2_dcn4_uclk_dpm_dependent_qos_params *per_uclk_dpm_params)
7111 {
7112 unsigned int i;
7113 unsigned int index = 0;
7114
7115 for (i = 0; i < DML_MAX_CLK_TABLE_SIZE; i++) {
7116 dml2_printf("DML::%s: per_uclk_dpm_params[%d].minimum_uclk_khz = %d\n", __func__, i, per_uclk_dpm_params[i].minimum_uclk_khz);
7117
7118 if (i == 0)
7119 index = 0;
7120 else
7121 index = i - 1;
7122
7123 if (uclk_freq_khz < per_uclk_dpm_params[i].minimum_uclk_khz ||
7124 per_uclk_dpm_params[i].minimum_uclk_khz == 0) {
7125 break;
7126 }
7127 }
7128 #if defined(__DML_VBA_DEBUG__)
7129 dml2_printf("DML::%s: uclk_freq_khz = %d\n", __func__, uclk_freq_khz);
7130 dml2_printf("DML::%s: index = %d\n", __func__, index);
7131 #endif
7132 return index;
7133 }
7134
get_active_min_uclk_dpm_index(unsigned long uclk_freq_khz,const struct dml2_soc_state_table * clk_table)7135 static unsigned int get_active_min_uclk_dpm_index(unsigned long uclk_freq_khz, const struct dml2_soc_state_table *clk_table)
7136 {
7137 unsigned int i;
7138 bool clk_entry_found = 0;
7139
7140 for (i = 0; i < clk_table->uclk.num_clk_values; i++) {
7141 dml2_printf("DML::%s: clk_table.uclk.clk_values_khz[%d] = %d\n", __func__, i, clk_table->uclk.clk_values_khz[i]);
7142
7143 if (uclk_freq_khz == clk_table->uclk.clk_values_khz[i]) {
7144 clk_entry_found = 1;
7145 break;
7146 }
7147 }
7148
7149 dml2_assert(clk_entry_found);
7150 #if defined(__DML_VBA_DEBUG__)
7151 dml2_printf("DML::%s: uclk_freq_khz = %ld\n", __func__, uclk_freq_khz);
7152 dml2_printf("DML::%s: index = %d\n", __func__, i);
7153 #endif
7154 return i;
7155 }
7156
get_pipe_flip_bytes(double hostvm_inefficiency_factor,unsigned int vm_bytes,unsigned int dpte_row_bytes,unsigned int meta_row_bytes)7157 static unsigned int get_pipe_flip_bytes(
7158 double hostvm_inefficiency_factor,
7159 unsigned int vm_bytes,
7160 unsigned int dpte_row_bytes,
7161 unsigned int meta_row_bytes)
7162 {
7163 unsigned int flip_bytes = 0;
7164
7165 flip_bytes += (unsigned int) ((vm_bytes * hostvm_inefficiency_factor) + 2*meta_row_bytes);
7166 flip_bytes += (unsigned int) (2*dpte_row_bytes * hostvm_inefficiency_factor);
7167
7168 return flip_bytes;
7169 }
7170
calculate_hostvm_inefficiency_factor(double * HostVMInefficiencyFactor,double * HostVMInefficiencyFactorPrefetch,bool gpuvm_enable,bool hostvm_enable,unsigned int remote_iommu_outstanding_translations,unsigned int max_outstanding_reqs,double urg_bandwidth_avail_active_pixel_and_vm,double urg_bandwidth_avail_active_vm_only)7171 static void calculate_hostvm_inefficiency_factor(
7172 double *HostVMInefficiencyFactor,
7173 double *HostVMInefficiencyFactorPrefetch,
7174
7175 bool gpuvm_enable,
7176 bool hostvm_enable,
7177 unsigned int remote_iommu_outstanding_translations,
7178 unsigned int max_outstanding_reqs,
7179 double urg_bandwidth_avail_active_pixel_and_vm,
7180 double urg_bandwidth_avail_active_vm_only)
7181 {
7182 *HostVMInefficiencyFactor = 1;
7183 *HostVMInefficiencyFactorPrefetch = 1;
7184
7185 if (gpuvm_enable && hostvm_enable) {
7186 *HostVMInefficiencyFactor = urg_bandwidth_avail_active_pixel_and_vm / urg_bandwidth_avail_active_vm_only;
7187 *HostVMInefficiencyFactorPrefetch = *HostVMInefficiencyFactor;
7188
7189 if ((*HostVMInefficiencyFactorPrefetch < 4) && (remote_iommu_outstanding_translations < max_outstanding_reqs))
7190 *HostVMInefficiencyFactorPrefetch = 4;
7191 #ifdef __DML_VBA_DEBUG__
7192 dml2_printf("DML::%s: urg_bandwidth_avail_active_pixel_and_vm = %f\n", __func__, urg_bandwidth_avail_active_pixel_and_vm);
7193 dml2_printf("DML::%s: urg_bandwidth_avail_active_vm_only = %f\n", __func__, urg_bandwidth_avail_active_vm_only);
7194 dml2_printf("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, *HostVMInefficiencyFactor);
7195 dml2_printf("DML::%s: HostVMInefficiencyFactorPrefetch = %f\n", __func__, *HostVMInefficiencyFactorPrefetch);
7196 #endif
7197 }
7198 }
7199
7200 struct dml2_core_internal_g6_temp_read_blackouts_table {
7201 struct {
7202 unsigned int uclk_khz;
7203 unsigned int blackout_us;
7204 } entries[DML_MAX_CLK_TABLE_SIZE];
7205 };
7206
7207 struct dml2_core_internal_g6_temp_read_blackouts_table core_dcn4_g6_temp_read_blackout_table = {
7208 .entries = {
7209 {
7210 .uclk_khz = 96000,
7211 .blackout_us = 23,
7212 },
7213 {
7214 .uclk_khz = 435000,
7215 .blackout_us = 10,
7216 },
7217 {
7218 .uclk_khz = 521000,
7219 .blackout_us = 10,
7220 },
7221 {
7222 .uclk_khz = 731000,
7223 .blackout_us = 8,
7224 },
7225 {
7226 .uclk_khz = 822000,
7227 .blackout_us = 8,
7228 },
7229 {
7230 .uclk_khz = 962000,
7231 .blackout_us = 5,
7232 },
7233 {
7234 .uclk_khz = 1069000,
7235 .blackout_us = 5,
7236 },
7237 {
7238 .uclk_khz = 1187000,
7239 .blackout_us = 5,
7240 },
7241 },
7242 };
7243
get_g6_temp_read_blackout_us(struct dml2_soc_bb * soc,unsigned int uclk_freq_khz,unsigned int min_clk_index)7244 static double get_g6_temp_read_blackout_us(
7245 struct dml2_soc_bb *soc,
7246 unsigned int uclk_freq_khz,
7247 unsigned int min_clk_index)
7248 {
7249 unsigned int i;
7250 unsigned int blackout_us = core_dcn4_g6_temp_read_blackout_table.entries[0].blackout_us;
7251
7252 if (soc->power_management_parameters.g6_temp_read_blackout_us[0] > 0.0) {
7253 /* overrides are present in the SoC BB */
7254 return soc->power_management_parameters.g6_temp_read_blackout_us[min_clk_index];
7255 }
7256
7257 /* use internal table */
7258 blackout_us = core_dcn4_g6_temp_read_blackout_table.entries[0].blackout_us;
7259
7260 for (i = 0; i < DML_MAX_CLK_TABLE_SIZE; i++) {
7261 if (uclk_freq_khz < core_dcn4_g6_temp_read_blackout_table.entries[i].uclk_khz ||
7262 core_dcn4_g6_temp_read_blackout_table.entries[i].uclk_khz == 0) {
7263 break;
7264 }
7265
7266 blackout_us = core_dcn4_g6_temp_read_blackout_table.entries[i].blackout_us;
7267 }
7268
7269 return (double)blackout_us;
7270 }
7271
get_max_urgent_latency_us(struct dml2_dcn4x_soc_qos_params * dcn4x,double uclk_freq_mhz,double FabricClock,unsigned int min_clk_index)7272 static double get_max_urgent_latency_us(
7273 struct dml2_dcn4x_soc_qos_params *dcn4x,
7274 double uclk_freq_mhz,
7275 double FabricClock,
7276 unsigned int min_clk_index)
7277 {
7278 double latency;
7279 latency = dcn4x->per_uclk_dpm_params[min_clk_index].maximum_latency_when_urgent_uclk_cycles / uclk_freq_mhz
7280 * (1 + dcn4x->umc_max_latency_margin / 100.0)
7281 + dcn4x->mall_overhead_fclk_cycles / FabricClock
7282 + dcn4x->max_round_trip_to_furthest_cs_fclk_cycles / FabricClock
7283 * (1 + dcn4x->fabric_max_transport_latency_margin / 100.0);
7284 return latency;
7285 }
7286
calculate_pstate_keepout_dst_lines(const struct dml2_display_cfg * display_cfg,const struct dml2_core_internal_watermarks * watermarks,unsigned int pstate_keepout_dst_lines[])7287 static void calculate_pstate_keepout_dst_lines(
7288 const struct dml2_display_cfg *display_cfg,
7289 const struct dml2_core_internal_watermarks *watermarks,
7290 unsigned int pstate_keepout_dst_lines[])
7291 {
7292 const struct dml2_stream_parameters *stream_descriptor;
7293 unsigned int i;
7294
7295 for (i = 0; i < display_cfg->num_planes; i++) {
7296 if (!dml_is_phantom_pipe(&display_cfg->plane_descriptors[i])) {
7297 stream_descriptor = &display_cfg->stream_descriptors[display_cfg->plane_descriptors[i].stream_index];
7298
7299 pstate_keepout_dst_lines[i] =
7300 (unsigned int)math_ceil(watermarks->DRAMClockChangeWatermark / ((double)stream_descriptor->timing.h_total * 1000.0 / (double)stream_descriptor->timing.pixel_clock_khz));
7301
7302 if (pstate_keepout_dst_lines[i] > stream_descriptor->timing.v_total - 1) {
7303 pstate_keepout_dst_lines[i] = stream_descriptor->timing.v_total - 1;
7304 }
7305 }
7306 }
7307 }
7308
dml_core_mode_support(struct dml2_core_calcs_mode_support_ex * in_out_params)7309 static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out_params)
7310 {
7311 struct dml2_core_internal_display_mode_lib *mode_lib = in_out_params->mode_lib;
7312 const struct dml2_display_cfg *display_cfg = in_out_params->in_display_cfg;
7313 const struct dml2_mcg_min_clock_table *min_clk_table = in_out_params->min_clk_table;
7314
7315 #if defined(__DML_VBA_DEBUG__)
7316 double old_ReadBandwidthLuma;
7317 double old_ReadBandwidthChroma;
7318 #endif
7319 double outstanding_latency_us = 0;
7320 double min_return_bw_for_latency;
7321
7322 struct dml2_core_calcs_mode_support_locals *s = &mode_lib->scratch.dml_core_mode_support_locals;
7323 struct dml2_core_calcs_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params *CalculateWatermarks_params = &mode_lib->scratch.CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params;
7324 struct dml2_core_calcs_CalculateVMRowAndSwath_params *CalculateVMRowAndSwath_params = &mode_lib->scratch.CalculateVMRowAndSwath_params;
7325 struct dml2_core_calcs_CalculateSwathAndDETConfiguration_params *CalculateSwathAndDETConfiguration_params = &mode_lib->scratch.CalculateSwathAndDETConfiguration_params;
7326 struct dml2_core_calcs_CalculatePrefetchSchedule_params *CalculatePrefetchSchedule_params = &mode_lib->scratch.CalculatePrefetchSchedule_params;
7327 #ifdef DML_GLOBAL_PREFETCH_CHECK
7328 struct dml2_core_calcs_CheckGlobalPrefetchAdmissibility_params *CheckGlobalPrefetchAdmissibility_params = &mode_lib->scratch.CheckGlobalPrefetchAdmissibility_params;
7329 #endif
7330 struct dml2_core_calcs_calculate_tdlut_setting_params *calculate_tdlut_setting_params = &mode_lib->scratch.calculate_tdlut_setting_params;
7331 struct dml2_core_calcs_calculate_mcache_setting_params *calculate_mcache_setting_params = &mode_lib->scratch.calculate_mcache_setting_params;
7332 struct dml2_core_calcs_calculate_peak_bandwidth_required_params *calculate_peak_bandwidth_params = &mode_lib->scratch.calculate_peak_bandwidth_params;
7333 struct dml2_core_calcs_calculate_bytes_to_fetch_required_to_hide_latency_params *calculate_bytes_to_fetch_required_to_hide_latency_params = &mode_lib->scratch.calculate_bytes_to_fetch_required_to_hide_latency_params;
7334 unsigned int k, m, n;
7335
7336 memset(&mode_lib->scratch, 0, sizeof(struct dml2_core_internal_scratch));
7337 memset(&mode_lib->ms, 0, sizeof(struct dml2_core_internal_mode_support));
7338
7339 mode_lib->ms.num_active_planes = display_cfg->num_planes;
7340 get_stream_output_bpp(s->OutputBpp, display_cfg);
7341
7342 mode_lib->ms.state_idx = in_out_params->min_clk_index;
7343 mode_lib->ms.SOCCLK = ((double)mode_lib->soc.clk_table.socclk.clk_values_khz[0] / 1000);
7344 mode_lib->ms.DCFCLK = ((double)min_clk_table->dram_bw_table.entries[in_out_params->min_clk_index].min_dcfclk_khz / 1000);
7345 mode_lib->ms.FabricClock = ((double)min_clk_table->dram_bw_table.entries[in_out_params->min_clk_index].min_fclk_khz / 1000);
7346 mode_lib->ms.MaxDCFCLK = (double)min_clk_table->max_clocks_khz.dcfclk / 1000;
7347 mode_lib->ms.MaxFabricClock = (double)min_clk_table->max_clocks_khz.fclk / 1000;
7348 mode_lib->ms.max_dispclk_freq_mhz = (double)min_clk_table->max_clocks_khz.dispclk / 1000;
7349 mode_lib->ms.max_dscclk_freq_mhz = (double)min_clk_table->max_clocks_khz.dscclk / 1000;
7350 mode_lib->ms.max_dppclk_freq_mhz = (double)min_clk_table->max_clocks_khz.dppclk / 1000;
7351 mode_lib->ms.uclk_freq_mhz = dram_bw_kbps_to_uclk_mhz(min_clk_table->dram_bw_table.entries[in_out_params->min_clk_index].pre_derate_dram_bw_kbps, &mode_lib->soc.clk_table.dram_config);
7352 mode_lib->ms.dram_bw_mbps = ((double)min_clk_table->dram_bw_table.entries[in_out_params->min_clk_index].pre_derate_dram_bw_kbps / 1000);
7353 mode_lib->ms.max_dram_bw_mbps = ((double)min_clk_table->dram_bw_table.entries[min_clk_table->dram_bw_table.num_entries - 1].pre_derate_dram_bw_kbps / 1000);
7354 mode_lib->ms.qos_param_index = get_qos_param_index((unsigned int) (mode_lib->ms.uclk_freq_mhz * 1000.0), mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params);
7355 mode_lib->ms.active_min_uclk_dpm_index = get_active_min_uclk_dpm_index((unsigned int) (mode_lib->ms.uclk_freq_mhz * 1000.0), &mode_lib->soc.clk_table);
7356
7357 #if defined(__DML_VBA_DEBUG__)
7358 dml2_printf("DML::%s: --- START --- \n", __func__);
7359 dml2_printf("DML::%s: num_active_planes = %u\n", __func__, mode_lib->ms.num_active_planes);
7360 dml2_printf("DML::%s: min_clk_index = %0d\n", __func__, in_out_params->min_clk_index);
7361 dml2_printf("DML::%s: qos_param_index = %0d\n", __func__, mode_lib->ms.qos_param_index);
7362 dml2_printf("DML::%s: SOCCLK = %f\n", __func__, mode_lib->ms.SOCCLK);
7363 dml2_printf("DML::%s: dram_bw_mbps = %f\n", __func__, mode_lib->ms.dram_bw_mbps);
7364 dml2_printf("DML::%s: uclk_freq_mhz = %f\n", __func__, mode_lib->ms.uclk_freq_mhz);
7365 dml2_printf("DML::%s: DCFCLK = %f\n", __func__, mode_lib->ms.DCFCLK);
7366 dml2_printf("DML::%s: FabricClock = %f\n", __func__, mode_lib->ms.FabricClock);
7367 dml2_printf("DML::%s: MaxDCFCLK = %f\n", __func__, mode_lib->ms.MaxDCFCLK);
7368 dml2_printf("DML::%s: max_dispclk_freq_mhz = %f\n", __func__, mode_lib->ms.max_dispclk_freq_mhz);
7369 dml2_printf("DML::%s: max_dscclk_freq_mhz = %f\n", __func__, mode_lib->ms.max_dscclk_freq_mhz);
7370 dml2_printf("DML::%s: max_dppclk_freq_mhz = %f\n", __func__, mode_lib->ms.max_dppclk_freq_mhz);
7371 dml2_printf("DML::%s: MaxFabricClock = %f\n", __func__, mode_lib->ms.MaxFabricClock);
7372 dml2_printf("DML::%s: ip.compressed_buffer_segment_size_in_kbytes = %u\n", __func__, mode_lib->ip.compressed_buffer_segment_size_in_kbytes);
7373 dml2_printf("DML::%s: ip.dcn_mrq_present = %u\n", __func__, mode_lib->ip.dcn_mrq_present);
7374
7375 for (k = 0; k < mode_lib->ms.num_active_planes; k++)
7376 dml2_printf("DML::%s: plane_%d: reserved_vblank_time_ns = %u\n", __func__, k, display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns);
7377 #endif
7378
7379 CalculateMaxDETAndMinCompressedBufferSize(
7380 mode_lib->ip.config_return_buffer_size_in_kbytes,
7381 mode_lib->ip.config_return_buffer_segment_size_in_kbytes,
7382 mode_lib->ip.rob_buffer_size_kbytes,
7383 mode_lib->ip.max_num_dpp,
7384 display_cfg->overrides.hw.force_nom_det_size_kbytes.enable,
7385 display_cfg->overrides.hw.force_nom_det_size_kbytes.value,
7386 mode_lib->ip.dcn_mrq_present,
7387
7388 /* Output */
7389 &mode_lib->ms.MaxTotalDETInKByte,
7390 &mode_lib->ms.NomDETInKByte,
7391 &mode_lib->ms.MinCompressedBufferSizeInKByte);
7392
7393 PixelClockAdjustmentForProgressiveToInterlaceUnit(display_cfg, mode_lib->ip.ptoi_supported, s->PixelClockBackEnd);
7394
7395 /*MODE SUPPORT, VOLTAGE STATE AND SOC CONFIGURATION*/
7396
7397 /*Scale Ratio, taps Support Check*/
7398 mode_lib->ms.support.ScaleRatioAndTapsSupport = true;
7399 // Many core tests are still setting scaling parameters "incorrectly"
7400 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
7401 if (display_cfg->plane_descriptors[k].composition.scaler_info.enabled == false
7402 && (dml_is_420(display_cfg->plane_descriptors[k].pixel_format)
7403 || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio != 1.0
7404 || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_taps != 1.0
7405 || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio != 1.0
7406 || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps != 1.0)) {
7407 mode_lib->ms.support.ScaleRatioAndTapsSupport = false;
7408 } else if (display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps < 1.0 || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps > 8.0
7409 || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_taps < 1.0 || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_taps > 8.0
7410 || (display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_taps > 1.0 && (display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_taps % 2) == 1)
7411 || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio > mode_lib->ip.max_hscl_ratio
7412 || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio > mode_lib->ip.max_vscl_ratio
7413 || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio > display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_taps
7414 || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio > display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps
7415 || (dml_is_420(display_cfg->plane_descriptors[k].pixel_format)
7416 && (display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps < 1 || display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps > 8 ||
7417 display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_taps < 1 || display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_taps > 8 ||
7418 (display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_taps > 1 && display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_taps % 2 == 1) ||
7419 display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio > mode_lib->ip.max_hscl_ratio ||
7420 display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio > mode_lib->ip.max_vscl_ratio ||
7421 display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio > display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_taps ||
7422 display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio > display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps))) {
7423 mode_lib->ms.support.ScaleRatioAndTapsSupport = false;
7424 }
7425 }
7426
7427 /*Source Format, Pixel Format and Scan Support Check*/
7428 mode_lib->ms.support.SourceFormatPixelAndScanSupport = true;
7429 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
7430 if (display_cfg->plane_descriptors[k].surface.tiling == dml2_sw_linear && dml_is_vertical_rotation(display_cfg->plane_descriptors[k].composition.rotation_angle)) {
7431 mode_lib->ms.support.SourceFormatPixelAndScanSupport = false;
7432 }
7433 }
7434
7435 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
7436 CalculateBytePerPixelAndBlockSizes(
7437 display_cfg->plane_descriptors[k].pixel_format,
7438 display_cfg->plane_descriptors[k].surface.tiling,
7439 display_cfg->plane_descriptors[k].surface.plane0.pitch,
7440 display_cfg->plane_descriptors[k].surface.plane1.pitch,
7441
7442 /* Output */
7443 &mode_lib->ms.BytePerPixelY[k],
7444 &mode_lib->ms.BytePerPixelC[k],
7445 &mode_lib->ms.BytePerPixelInDETY[k],
7446 &mode_lib->ms.BytePerPixelInDETC[k],
7447 &mode_lib->ms.Read256BlockHeightY[k],
7448 &mode_lib->ms.Read256BlockHeightC[k],
7449 &mode_lib->ms.Read256BlockWidthY[k],
7450 &mode_lib->ms.Read256BlockWidthC[k],
7451 &mode_lib->ms.MacroTileHeightY[k],
7452 &mode_lib->ms.MacroTileHeightC[k],
7453 &mode_lib->ms.MacroTileWidthY[k],
7454 &mode_lib->ms.MacroTileWidthC[k],
7455 &mode_lib->ms.surf_linear128_l[k],
7456 &mode_lib->ms.surf_linear128_c[k]);
7457 }
7458
7459 /*Bandwidth Support Check*/
7460 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
7461 if (!dml_is_vertical_rotation(display_cfg->plane_descriptors[k].composition.rotation_angle)) {
7462 mode_lib->ms.SwathWidthYSingleDPP[k] = display_cfg->plane_descriptors[k].composition.viewport.plane0.width;
7463 mode_lib->ms.SwathWidthCSingleDPP[k] = display_cfg->plane_descriptors[k].composition.viewport.plane1.width;
7464 } else {
7465 mode_lib->ms.SwathWidthYSingleDPP[k] = display_cfg->plane_descriptors[k].composition.viewport.plane0.height;
7466 mode_lib->ms.SwathWidthCSingleDPP[k] = display_cfg->plane_descriptors[k].composition.viewport.plane1.height;
7467 }
7468 }
7469
7470 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
7471 mode_lib->ms.vactive_sw_bw_l[k] = mode_lib->ms.SwathWidthYSingleDPP[k] * math_ceil2(mode_lib->ms.BytePerPixelY[k], 1.0) / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio;
7472 mode_lib->ms.vactive_sw_bw_c[k] = mode_lib->ms.SwathWidthCSingleDPP[k] * math_ceil2(mode_lib->ms.BytePerPixelC[k], 2.0) / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio;
7473
7474 mode_lib->ms.cursor_bw[k] = display_cfg->plane_descriptors[k].cursor.num_cursors * display_cfg->plane_descriptors[k].cursor.cursor_width *
7475 display_cfg->plane_descriptors[k].cursor.cursor_bpp / 8.0 / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000));
7476
7477 #ifdef __DML_VBA_DEBUG__
7478 old_ReadBandwidthLuma = mode_lib->ms.SwathWidthYSingleDPP[k] * math_ceil2(mode_lib->ms.BytePerPixelInDETY[k], 1.0) / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio;
7479 old_ReadBandwidthChroma = mode_lib->ms.SwathWidthYSingleDPP[k] / 2 * math_ceil2(mode_lib->ms.BytePerPixelInDETC[k], 2.0) / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio / 2.0;
7480 dml2_printf("DML::%s: k=%u, old_ReadBandwidthLuma = %f\n", __func__, k, old_ReadBandwidthLuma);
7481 dml2_printf("DML::%s: k=%u, old_ReadBandwidthChroma = %f\n", __func__, k, old_ReadBandwidthChroma);
7482 dml2_printf("DML::%s: k=%u, vactive_sw_bw_l = %f\n", __func__, k, mode_lib->ms.vactive_sw_bw_l[k]);
7483 dml2_printf("DML::%s: k=%u, vactive_sw_bw_c = %f\n", __func__, k, mode_lib->ms.vactive_sw_bw_c[k]);
7484 #endif
7485 }
7486
7487 // Writeback bandwidth
7488 for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
7489 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0 && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].pixel_format == dml2_444_64) {
7490 mode_lib->ms.WriteBandwidth[k][0] = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_height
7491 * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_width
7492 / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].input_height
7493 * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total
7494 / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * 8.0;
7495 } else if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0) {
7496 mode_lib->ms.WriteBandwidth[k][0] = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_height
7497 * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_width
7498 / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].input_height
7499 * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total
7500 / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * 4.0;
7501 } else {
7502 mode_lib->ms.WriteBandwidth[k][0] = 0.0;
7503 }
7504 }
7505
7506 /*Writeback Latency support check*/
7507 mode_lib->ms.support.WritebackLatencySupport = true;
7508 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
7509 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0 &&
7510 (mode_lib->ms.WriteBandwidth[k][0] > mode_lib->ip.writeback_interface_buffer_size_kbytes * 1024 / ((double)mode_lib->soc.qos_parameters.writeback.base_latency_us))) {
7511 mode_lib->ms.support.WritebackLatencySupport = false;
7512 }
7513 }
7514
7515
7516 /* Writeback Scale Ratio and Taps Support Check */
7517 mode_lib->ms.support.WritebackScaleRatioAndTapsSupport = true;
7518 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
7519 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0) {
7520 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_ratio > mode_lib->ip.writeback_max_hscl_ratio
7521 || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_ratio > mode_lib->ip.writeback_max_vscl_ratio
7522 || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_ratio < mode_lib->ip.writeback_min_hscl_ratio
7523 || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_ratio < mode_lib->ip.writeback_min_vscl_ratio
7524 || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_taps > (unsigned int) mode_lib->ip.writeback_max_hscl_taps
7525 || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_taps > (unsigned int) mode_lib->ip.writeback_max_vscl_taps
7526 || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_ratio > (unsigned int)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_taps
7527 || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_ratio > (unsigned int)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_taps
7528 || (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_taps > 2.0 && ((display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_taps % 2) == 1))) {
7529 mode_lib->ms.support.WritebackScaleRatioAndTapsSupport = false;
7530 }
7531 if (2.0 * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_height * (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_taps - 1) * 57 > mode_lib->ip.writeback_line_buffer_buffer_size) {
7532 mode_lib->ms.support.WritebackScaleRatioAndTapsSupport = false;
7533 }
7534 }
7535 }
7536
7537 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
7538 CalculateSinglePipeDPPCLKAndSCLThroughput(
7539 display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio,
7540 display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio,
7541 display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio,
7542 display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio,
7543 mode_lib->ip.max_dchub_pscl_bw_pix_per_clk,
7544 mode_lib->ip.max_pscl_lb_bw_pix_per_clk,
7545 ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000),
7546 display_cfg->plane_descriptors[k].pixel_format,
7547 display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_taps,
7548 display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_taps,
7549 display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps,
7550 display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps,
7551 /* Output */
7552 &mode_lib->ms.PSCL_FACTOR[k],
7553 &mode_lib->ms.PSCL_FACTOR_CHROMA[k],
7554 &mode_lib->ms.MinDPPCLKUsingSingleDPP[k]);
7555 }
7556
7557 // Max Viewport Size support
7558 for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
7559 if (display_cfg->plane_descriptors[k].surface.tiling == dml2_sw_linear) {
7560 s->MaximumSwathWidthSupportLuma = 15360;
7561 } else if (!dml_is_vertical_rotation(display_cfg->plane_descriptors[k].composition.rotation_angle) && mode_lib->ms.BytePerPixelC[k] > 0 && display_cfg->plane_descriptors[k].pixel_format != dml2_rgbe_alpha) { // horz video
7562 s->MaximumSwathWidthSupportLuma = 7680 + 16;
7563 } else if (dml_is_vertical_rotation(display_cfg->plane_descriptors[k].composition.rotation_angle) && mode_lib->ms.BytePerPixelC[k] > 0 && display_cfg->plane_descriptors[k].pixel_format != dml2_rgbe_alpha) { // vert video
7564 s->MaximumSwathWidthSupportLuma = 4320 + 16;
7565 } else if (display_cfg->plane_descriptors[k].pixel_format == dml2_rgbe_alpha) { // rgbe + alpha
7566 s->MaximumSwathWidthSupportLuma = 5120 + 16;
7567 } else if (dml_is_vertical_rotation(display_cfg->plane_descriptors[k].composition.rotation_angle) && mode_lib->ms.BytePerPixelY[k] == 8 && display_cfg->plane_descriptors[k].surface.dcc.enable == true) { // vert 64bpp
7568 s->MaximumSwathWidthSupportLuma = 3072 + 16;
7569 } else {
7570 s->MaximumSwathWidthSupportLuma = 6144 + 16;
7571 }
7572
7573 if (dml_is_420(display_cfg->plane_descriptors[k].pixel_format)) {
7574 s->MaximumSwathWidthSupportChroma = (unsigned int)(s->MaximumSwathWidthSupportLuma / 2.0);
7575 } else {
7576 s->MaximumSwathWidthSupportChroma = s->MaximumSwathWidthSupportLuma;
7577 }
7578
7579 unsigned lb_buffer_size_bits_luma = mode_lib->ip.line_buffer_size_bits;
7580 unsigned lb_buffer_size_bits_chroma = mode_lib->ip.line_buffer_size_bits;
7581
7582 /*
7583 #if defined(DV_BUILD)
7584 // Assume a memory config setting of 3 in 420 mode or get a new ip parameter that reflects the programming.
7585 if (mode_lib->ms.BytePerPixelC[k] != 0.0 && display_cfg->plane_descriptors[k].pixel_format != dml2_rgbe_alpha) {
7586 lb_buffer_size_bits_luma = 34620 * 57;
7587 lb_buffer_size_bits_chroma = 13560 * 57;
7588 }
7589 #endif
7590 */
7591 mode_lib->ms.MaximumSwathWidthInLineBufferLuma = lb_buffer_size_bits_luma * math_max2(display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio, 1.0) / 57 /
7592 (display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps + math_max2(math_ceil2(display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio, 1.0) - 2, 0.0));
7593 if (mode_lib->ms.BytePerPixelC[k] == 0.0) {
7594 mode_lib->ms.MaximumSwathWidthInLineBufferChroma = 0;
7595 } else {
7596 mode_lib->ms.MaximumSwathWidthInLineBufferChroma = lb_buffer_size_bits_chroma * math_max2(display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio, 1.0) / 57 /
7597 (display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps + math_max2(math_ceil2(display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio, 1.0) - 2, 0.0));
7598 }
7599
7600 mode_lib->ms.MaximumSwathWidthLuma[k] = math_min2(s->MaximumSwathWidthSupportLuma, mode_lib->ms.MaximumSwathWidthInLineBufferLuma);
7601 mode_lib->ms.MaximumSwathWidthChroma[k] = math_min2(s->MaximumSwathWidthSupportChroma, mode_lib->ms.MaximumSwathWidthInLineBufferChroma);
7602
7603 dml2_printf("DML::%s: k=%u MaximumSwathWidthLuma=%f\n", __func__, k, mode_lib->ms.MaximumSwathWidthLuma[k]);
7604 dml2_printf("DML::%s: k=%u MaximumSwathWidthSupportLuma=%u\n", __func__, k, s->MaximumSwathWidthSupportLuma);
7605 dml2_printf("DML::%s: k=%u MaximumSwathWidthInLineBufferLuma=%f\n", __func__, k, mode_lib->ms.MaximumSwathWidthInLineBufferLuma);
7606
7607 dml2_printf("DML::%s: k=%u MaximumSwathWidthChroma=%f\n", __func__, k, mode_lib->ms.MaximumSwathWidthChroma[k]);
7608 dml2_printf("DML::%s: k=%u MaximumSwathWidthSupportChroma=%u\n", __func__, k, s->MaximumSwathWidthSupportChroma);
7609 dml2_printf("DML::%s: k=%u MaximumSwathWidthInLineBufferChroma=%f\n", __func__, k, mode_lib->ms.MaximumSwathWidthInLineBufferChroma);
7610 }
7611
7612 /* Cursor Support Check */
7613 mode_lib->ms.support.CursorSupport = true;
7614 for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
7615 if (display_cfg->plane_descriptors[k].cursor.num_cursors > 0) {
7616 if (display_cfg->plane_descriptors[k].cursor.cursor_bpp == 64 && mode_lib->ip.cursor_64bpp_support == false)
7617 mode_lib->ms.support.CursorSupport = false;
7618 }
7619 }
7620
7621 /* Valid Pitch Check */
7622 mode_lib->ms.support.PitchSupport = true;
7623 for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
7624
7625 // data pitch
7626 unsigned int alignment_l = mode_lib->ms.MacroTileWidthY[k];
7627
7628 if (mode_lib->ms.surf_linear128_l[k])
7629 alignment_l = alignment_l / 2;
7630
7631 mode_lib->ms.support.AlignedYPitch[k] = (unsigned int)math_ceil2(math_max2(display_cfg->plane_descriptors[k].surface.plane0.pitch, display_cfg->plane_descriptors[k].surface.plane0.width), alignment_l);
7632 if (dml_is_420(display_cfg->plane_descriptors[k].pixel_format) || display_cfg->plane_descriptors[k].pixel_format == dml2_rgbe_alpha) {
7633 unsigned int alignment_c = mode_lib->ms.MacroTileWidthC[k];
7634
7635 if (mode_lib->ms.surf_linear128_c[k])
7636 alignment_c = alignment_c / 2;
7637 mode_lib->ms.support.AlignedCPitch[k] = (unsigned int)math_ceil2(math_max2(display_cfg->plane_descriptors[k].surface.plane1.pitch, display_cfg->plane_descriptors[k].surface.plane1.width), alignment_c);
7638 } else {
7639 mode_lib->ms.support.AlignedCPitch[k] = display_cfg->plane_descriptors[k].surface.plane1.pitch;
7640 }
7641
7642 if (mode_lib->ms.support.AlignedYPitch[k] > display_cfg->plane_descriptors[k].surface.plane0.pitch ||
7643 mode_lib->ms.support.AlignedCPitch[k] > display_cfg->plane_descriptors[k].surface.plane1.pitch) {
7644 mode_lib->ms.support.PitchSupport = false;
7645 #if defined(__DML_VBA_DEBUG__)
7646 dml2_printf("DML::%s: k=%u AlignedYPitch = %d\n", __func__, k, mode_lib->ms.support.AlignedYPitch[k]);
7647 dml2_printf("DML::%s: k=%u PitchY = %d\n", __func__, k, display_cfg->plane_descriptors[k].surface.plane0.pitch);
7648 dml2_printf("DML::%s: k=%u AlignedCPitch = %d\n", __func__, k, mode_lib->ms.support.AlignedCPitch[k]);
7649 dml2_printf("DML::%s: k=%u PitchC = %d\n", __func__, k, display_cfg->plane_descriptors[k].surface.plane1.pitch);
7650 dml2_printf("DML::%s: k=%u PitchSupport = %d\n", __func__, k, mode_lib->ms.support.PitchSupport);
7651 #endif
7652 }
7653
7654 // meta pitch
7655 if (mode_lib->ip.dcn_mrq_present && display_cfg->plane_descriptors[k].surface.dcc.enable) {
7656 mode_lib->ms.support.AlignedDCCMetaPitchY[k] = (unsigned int)math_ceil2(math_max2(display_cfg->plane_descriptors[k].surface.dcc.plane0.pitch,
7657 display_cfg->plane_descriptors[k].surface.plane0.width), 64.0 * mode_lib->ms.Read256BlockWidthY[k]);
7658
7659 if (mode_lib->ms.support.AlignedDCCMetaPitchY[k] > display_cfg->plane_descriptors[k].surface.dcc.plane0.pitch)
7660 mode_lib->ms.support.PitchSupport = false;
7661
7662 if (dml_is_420(display_cfg->plane_descriptors[k].pixel_format) || display_cfg->plane_descriptors[k].pixel_format == dml2_rgbe_alpha) {
7663 mode_lib->ms.support.AlignedDCCMetaPitchC[k] = (unsigned int)math_ceil2(math_max2(display_cfg->plane_descriptors[k].surface.dcc.plane1.pitch,
7664 display_cfg->plane_descriptors[k].surface.plane1.width), 64.0 * mode_lib->ms.Read256BlockWidthC[k]);
7665
7666 if (mode_lib->ms.support.AlignedDCCMetaPitchC[k] > display_cfg->plane_descriptors[k].surface.dcc.plane1.pitch)
7667 mode_lib->ms.support.PitchSupport = false;
7668 }
7669 } else {
7670 mode_lib->ms.support.AlignedDCCMetaPitchY[k] = 0;
7671 mode_lib->ms.support.AlignedDCCMetaPitchC[k] = 0;
7672 }
7673 }
7674
7675 mode_lib->ms.support.ViewportExceedsSurface = false;
7676 if (!display_cfg->overrides.hw.surface_viewport_size_check_disable) {
7677 for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
7678 if (display_cfg->plane_descriptors[k].composition.viewport.plane0.width > display_cfg->plane_descriptors[k].surface.plane0.width ||
7679 display_cfg->plane_descriptors[k].composition.viewport.plane0.height > display_cfg->plane_descriptors[k].surface.plane0.height) {
7680 mode_lib->ms.support.ViewportExceedsSurface = true;
7681 #if defined(__DML_VBA_DEBUG__)
7682 dml2_printf("DML::%s: k=%u ViewportWidth = %d\n", __func__, k, display_cfg->plane_descriptors[k].composition.viewport.plane0.width);
7683 dml2_printf("DML::%s: k=%u SurfaceWidthY = %d\n", __func__, k, display_cfg->plane_descriptors[k].surface.plane0.width);
7684 dml2_printf("DML::%s: k=%u ViewportHeight = %d\n", __func__, k, display_cfg->plane_descriptors[k].composition.viewport.plane0.height);
7685 dml2_printf("DML::%s: k=%u SurfaceHeightY = %d\n", __func__, k, display_cfg->plane_descriptors[k].surface.plane0.height);
7686 dml2_printf("DML::%s: k=%u ViewportExceedsSurface = %d\n", __func__, k, mode_lib->ms.support.ViewportExceedsSurface);
7687 #endif
7688 }
7689 if (dml_is_420(display_cfg->plane_descriptors[k].pixel_format) || display_cfg->plane_descriptors[k].pixel_format == dml2_rgbe_alpha) {
7690 if (display_cfg->plane_descriptors[k].composition.viewport.plane1.width > display_cfg->plane_descriptors[k].surface.plane1.width ||
7691 display_cfg->plane_descriptors[k].composition.viewport.plane1.height > display_cfg->plane_descriptors[k].surface.plane1.height) {
7692 mode_lib->ms.support.ViewportExceedsSurface = true;
7693 }
7694 }
7695 }
7696 }
7697
7698 CalculateSwathAndDETConfiguration_params->display_cfg = display_cfg;
7699 CalculateSwathAndDETConfiguration_params->ConfigReturnBufferSizeInKByte = mode_lib->ip.config_return_buffer_size_in_kbytes;
7700 CalculateSwathAndDETConfiguration_params->MaxTotalDETInKByte = mode_lib->ms.MaxTotalDETInKByte;
7701 CalculateSwathAndDETConfiguration_params->MinCompressedBufferSizeInKByte = mode_lib->ms.MinCompressedBufferSizeInKByte;
7702 CalculateSwathAndDETConfiguration_params->rob_buffer_size_kbytes = mode_lib->ip.rob_buffer_size_kbytes;
7703 CalculateSwathAndDETConfiguration_params->pixel_chunk_size_kbytes = mode_lib->ip.pixel_chunk_size_kbytes;
7704 CalculateSwathAndDETConfiguration_params->rob_buffer_size_kbytes = mode_lib->ip.rob_buffer_size_kbytes;
7705 CalculateSwathAndDETConfiguration_params->pixel_chunk_size_kbytes = mode_lib->ip.pixel_chunk_size_kbytes;
7706 CalculateSwathAndDETConfiguration_params->ForceSingleDPP = 1;
7707 CalculateSwathAndDETConfiguration_params->NumberOfActiveSurfaces = mode_lib->ms.num_active_planes;
7708 CalculateSwathAndDETConfiguration_params->nomDETInKByte = mode_lib->ms.NomDETInKByte;
7709 CalculateSwathAndDETConfiguration_params->ConfigReturnBufferSegmentSizeInkByte = mode_lib->ip.config_return_buffer_segment_size_in_kbytes;
7710 CalculateSwathAndDETConfiguration_params->CompressedBufferSegmentSizeInkByte = mode_lib->ip.compressed_buffer_segment_size_in_kbytes;
7711 CalculateSwathAndDETConfiguration_params->ReadBandwidthLuma = mode_lib->ms.vactive_sw_bw_l;
7712 CalculateSwathAndDETConfiguration_params->ReadBandwidthChroma = mode_lib->ms.vactive_sw_bw_c;
7713 CalculateSwathAndDETConfiguration_params->MaximumSwathWidthLuma = mode_lib->ms.MaximumSwathWidthLuma;
7714 CalculateSwathAndDETConfiguration_params->MaximumSwathWidthChroma = mode_lib->ms.MaximumSwathWidthChroma;
7715 CalculateSwathAndDETConfiguration_params->Read256BytesBlockHeightY = mode_lib->ms.Read256BlockHeightY;
7716 CalculateSwathAndDETConfiguration_params->Read256BytesBlockHeightC = mode_lib->ms.Read256BlockHeightC;
7717 CalculateSwathAndDETConfiguration_params->Read256BytesBlockWidthY = mode_lib->ms.Read256BlockWidthY;
7718 CalculateSwathAndDETConfiguration_params->Read256BytesBlockWidthC = mode_lib->ms.Read256BlockWidthC;
7719 CalculateSwathAndDETConfiguration_params->surf_linear128_l = mode_lib->ms.surf_linear128_l;
7720 CalculateSwathAndDETConfiguration_params->surf_linear128_c = mode_lib->ms.surf_linear128_c;
7721 CalculateSwathAndDETConfiguration_params->ODMMode = s->dummy_odm_mode;
7722 CalculateSwathAndDETConfiguration_params->BytePerPixY = mode_lib->ms.BytePerPixelY;
7723 CalculateSwathAndDETConfiguration_params->BytePerPixC = mode_lib->ms.BytePerPixelC;
7724 CalculateSwathAndDETConfiguration_params->BytePerPixDETY = mode_lib->ms.BytePerPixelInDETY;
7725 CalculateSwathAndDETConfiguration_params->BytePerPixDETC = mode_lib->ms.BytePerPixelInDETC;
7726 CalculateSwathAndDETConfiguration_params->DPPPerSurface = s->dummy_integer_array[2];
7727 CalculateSwathAndDETConfiguration_params->mrq_present = mode_lib->ip.dcn_mrq_present;
7728
7729 // output
7730 CalculateSwathAndDETConfiguration_params->req_per_swath_ub_l = s->dummy_integer_array[0];
7731 CalculateSwathAndDETConfiguration_params->req_per_swath_ub_c = s->dummy_integer_array[1];
7732 CalculateSwathAndDETConfiguration_params->swath_width_luma_ub = s->dummy_integer_array[3];
7733 CalculateSwathAndDETConfiguration_params->swath_width_chroma_ub = s->dummy_integer_array[4];
7734 CalculateSwathAndDETConfiguration_params->SwathWidth = s->dummy_integer_array[5];
7735 CalculateSwathAndDETConfiguration_params->SwathWidthChroma = s->dummy_integer_array[6];
7736 CalculateSwathAndDETConfiguration_params->SwathHeightY = s->dummy_integer_array[7];
7737 CalculateSwathAndDETConfiguration_params->SwathHeightC = s->dummy_integer_array[8];
7738 CalculateSwathAndDETConfiguration_params->request_size_bytes_luma = s->dummy_integer_array[26];
7739 CalculateSwathAndDETConfiguration_params->request_size_bytes_chroma = s->dummy_integer_array[27];
7740 CalculateSwathAndDETConfiguration_params->DETBufferSizeInKByte = s->dummy_integer_array[9];
7741 CalculateSwathAndDETConfiguration_params->DETBufferSizeY = s->dummy_integer_array[10];
7742 CalculateSwathAndDETConfiguration_params->DETBufferSizeC = s->dummy_integer_array[11];
7743 CalculateSwathAndDETConfiguration_params->full_swath_bytes_l = s->full_swath_bytes_l;
7744 CalculateSwathAndDETConfiguration_params->full_swath_bytes_c = s->full_swath_bytes_c;
7745 CalculateSwathAndDETConfiguration_params->UnboundedRequestEnabled = &s->dummy_boolean[0];
7746 CalculateSwathAndDETConfiguration_params->compbuf_reserved_space_64b = &s->dummy_integer[1];
7747 CalculateSwathAndDETConfiguration_params->hw_debug5 = &s->dummy_boolean[2];
7748 CalculateSwathAndDETConfiguration_params->CompressedBufferSizeInkByte = &s->dummy_integer[0];
7749 CalculateSwathAndDETConfiguration_params->ViewportSizeSupportPerSurface = mode_lib->ms.SingleDPPViewportSizeSupportPerSurface;
7750 CalculateSwathAndDETConfiguration_params->ViewportSizeSupport = &s->dummy_boolean[1];
7751
7752 // This calls is just to find out if there is enough DET space to support full vp in 1 pipe.
7753 CalculateSwathAndDETConfiguration(&mode_lib->scratch, CalculateSwathAndDETConfiguration_params);
7754
7755 mode_lib->ms.TotalNumberOfActiveDPP = 0;
7756 mode_lib->ms.support.TotalAvailablePipesSupport = true;
7757
7758 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
7759 /*Number Of DSC Slices*/
7760 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable == dml2_dsc_enable ||
7761 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable == dml2_dsc_enable_if_necessary) {
7762
7763 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.overrides.num_slices != 0)
7764 mode_lib->ms.support.NumberOfDSCSlices[k] = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.overrides.num_slices;
7765 else {
7766 if (s->PixelClockBackEnd[k] > 4800) {
7767 mode_lib->ms.support.NumberOfDSCSlices[k] = (unsigned int)(math_ceil2(s->PixelClockBackEnd[k] / 600, 4));
7768 } else if (s->PixelClockBackEnd[k] > 2400) {
7769 mode_lib->ms.support.NumberOfDSCSlices[k] = 8;
7770 } else if (s->PixelClockBackEnd[k] > 1200) {
7771 mode_lib->ms.support.NumberOfDSCSlices[k] = 4;
7772 } else if (s->PixelClockBackEnd[k] > 340) {
7773 mode_lib->ms.support.NumberOfDSCSlices[k] = 2;
7774 } else {
7775 mode_lib->ms.support.NumberOfDSCSlices[k] = 1;
7776 }
7777 }
7778 } else {
7779 mode_lib->ms.support.NumberOfDSCSlices[k] = 0;
7780 }
7781
7782 CalculateODMMode(
7783 mode_lib->ip.maximum_pixels_per_line_per_dsc_unit,
7784 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active,
7785 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format,
7786 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder,
7787 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].overrides.odm_mode,
7788 mode_lib->ms.max_dispclk_freq_mhz,
7789 false, // DSCEnable
7790 mode_lib->ms.TotalNumberOfActiveDPP,
7791 mode_lib->ip.max_num_dpp,
7792 ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000),
7793 mode_lib->ms.support.NumberOfDSCSlices[k],
7794
7795 /* Output */
7796 &s->TotalAvailablePipesSupportNoDSC,
7797 &s->NumberOfDPPNoDSC,
7798 &s->ODMModeNoDSC,
7799 &s->RequiredDISPCLKPerSurfaceNoDSC);
7800
7801 CalculateODMMode(
7802 mode_lib->ip.maximum_pixels_per_line_per_dsc_unit,
7803 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active,
7804 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format,
7805 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder,
7806 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].overrides.odm_mode,
7807 mode_lib->ms.max_dispclk_freq_mhz,
7808 true, // DSCEnable
7809 mode_lib->ms.TotalNumberOfActiveDPP,
7810 mode_lib->ip.max_num_dpp,
7811 ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000),
7812 mode_lib->ms.support.NumberOfDSCSlices[k],
7813
7814 /* Output */
7815 &s->TotalAvailablePipesSupportDSC,
7816 &s->NumberOfDPPDSC,
7817 &s->ODMModeDSC,
7818 &s->RequiredDISPCLKPerSurfaceDSC);
7819
7820 CalculateOutputLink(
7821 &mode_lib->scratch,
7822 ((double)mode_lib->soc.clk_table.phyclk.clk_values_khz[0] / 1000),
7823 ((double)mode_lib->soc.clk_table.phyclk_d18.clk_values_khz[0] / 1000),
7824 ((double)mode_lib->soc.clk_table.phyclk_d32.clk_values_khz[0] / 1000),
7825 mode_lib->soc.phy_downspread_percent,
7826 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder,
7827 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format,
7828 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total,
7829 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active,
7830 s->PixelClockBackEnd[k],
7831 s->OutputBpp[k],
7832 mode_lib->ip.maximum_dsc_bits_per_component,
7833 mode_lib->ms.support.NumberOfDSCSlices[k],
7834 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.audio_sample_rate,
7835 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.audio_sample_layout,
7836 s->ODMModeNoDSC,
7837 s->ODMModeDSC,
7838 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable,
7839 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_lane_count,
7840 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_link_rate,
7841
7842 /* Output */
7843 &mode_lib->ms.RequiresDSC[k],
7844 &mode_lib->ms.RequiresFEC[k],
7845 &mode_lib->ms.OutputBpp[k],
7846 &mode_lib->ms.OutputType[k],
7847 &mode_lib->ms.OutputRate[k],
7848 &mode_lib->ms.RequiredSlots[k]);
7849
7850 if (s->OutputBpp[k] == 0.0) {
7851 s->OutputBpp[k] = mode_lib->ms.OutputBpp[k];
7852 }
7853
7854 if (mode_lib->ms.RequiresDSC[k] == false) {
7855 mode_lib->ms.ODMMode[k] = s->ODMModeNoDSC;
7856 mode_lib->ms.RequiredDISPCLKPerSurface[k] = s->RequiredDISPCLKPerSurfaceNoDSC;
7857 if (!s->TotalAvailablePipesSupportNoDSC)
7858 mode_lib->ms.support.TotalAvailablePipesSupport = false;
7859 mode_lib->ms.TotalNumberOfActiveDPP = mode_lib->ms.TotalNumberOfActiveDPP + s->NumberOfDPPNoDSC;
7860 } else {
7861 mode_lib->ms.ODMMode[k] = s->ODMModeDSC;
7862 mode_lib->ms.RequiredDISPCLKPerSurface[k] = s->RequiredDISPCLKPerSurfaceDSC;
7863 if (!s->TotalAvailablePipesSupportDSC)
7864 mode_lib->ms.support.TotalAvailablePipesSupport = false;
7865 mode_lib->ms.TotalNumberOfActiveDPP = mode_lib->ms.TotalNumberOfActiveDPP + s->NumberOfDPPDSC;
7866 }
7867 #if defined(__DML_VBA_DEBUG__)
7868 dml2_printf("DML::%s: k=%d RequiresDSC = %d\n", __func__, k, mode_lib->ms.RequiresDSC[k]);
7869 dml2_printf("DML::%s: k=%d ODMMode = %d\n", __func__, k, mode_lib->ms.ODMMode[k]);
7870 #endif
7871
7872 // ensure the number dsc slices is integer multiple based on ODM mode
7873 mode_lib->ms.support.DSCSlicesODMModeSupported = true;
7874 if (mode_lib->ms.RequiresDSC[k]) {
7875 // fail a ms check if the override num_slices doesn't align with odm mode setting
7876 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.overrides.num_slices != 0) {
7877 if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_2to1)
7878 mode_lib->ms.support.DSCSlicesODMModeSupported = ((mode_lib->ms.support.NumberOfDSCSlices[k] % 2) == 0);
7879 else if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_3to1)
7880 mode_lib->ms.support.DSCSlicesODMModeSupported = (mode_lib->ms.support.NumberOfDSCSlices[k] == 12);
7881 else if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_4to1)
7882 mode_lib->ms.support.DSCSlicesODMModeSupported = ((mode_lib->ms.support.NumberOfDSCSlices[k] % 4) == 0);
7883 #if defined(__DML_VBA_DEBUG__)
7884 if (!mode_lib->ms.support.DSCSlicesODMModeSupported) {
7885 dml2_printf("DML::%s: k=%d Invalid dsc num_slices and ODM mode setting\n", __func__, k);
7886 dml2_printf("DML::%s: k=%d num_slices = %d\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.overrides.num_slices);
7887 dml2_printf("DML::%s: k=%d ODMMode = %d\n", __func__, k, mode_lib->ms.ODMMode[k]);
7888 }
7889 #endif
7890 } else {
7891 // safe guard to ensure the dml derived dsc slices and odm setting are compatible
7892 if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_2to1)
7893 mode_lib->ms.support.NumberOfDSCSlices[k] = 2 * (unsigned int)math_ceil2(mode_lib->ms.support.NumberOfDSCSlices[k] / 2.0, 1.0);
7894 else if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_3to1)
7895 mode_lib->ms.support.NumberOfDSCSlices[k] = 12;
7896 else if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_4to1)
7897 mode_lib->ms.support.NumberOfDSCSlices[k] = 4 * (unsigned int)math_ceil2(mode_lib->ms.support.NumberOfDSCSlices[k] / 4.0, 1.0);
7898 }
7899
7900 } else {
7901 mode_lib->ms.support.NumberOfDSCSlices[k] = 0;
7902 }
7903 }
7904
7905 mode_lib->ms.support.incorrect_imall_usage = 0;
7906 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
7907 if (mode_lib->ip.imall_supported && display_cfg->plane_descriptors[k].overrides.legacy_svp_config == dml2_svp_mode_override_imall)
7908 mode_lib->ms.support.incorrect_imall_usage = 1;
7909 }
7910
7911 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
7912 mode_lib->ms.MPCCombine[k] = false;
7913 mode_lib->ms.NoOfDPP[k] = 1;
7914
7915 if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_4to1) {
7916 mode_lib->ms.MPCCombine[k] = false;
7917 mode_lib->ms.NoOfDPP[k] = 4;
7918 } else if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_3to1) {
7919 mode_lib->ms.MPCCombine[k] = false;
7920 mode_lib->ms.NoOfDPP[k] = 3;
7921 } else if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_2to1) {
7922 mode_lib->ms.MPCCombine[k] = false;
7923 mode_lib->ms.NoOfDPP[k] = 2;
7924 } else if (display_cfg->plane_descriptors[k].overrides.mpcc_combine_factor == 2) {
7925 mode_lib->ms.MPCCombine[k] = true;
7926 mode_lib->ms.NoOfDPP[k] = 2;
7927 mode_lib->ms.TotalNumberOfActiveDPP++;
7928 } else if (display_cfg->plane_descriptors[k].overrides.mpcc_combine_factor == 1) {
7929 mode_lib->ms.MPCCombine[k] = false;
7930 mode_lib->ms.NoOfDPP[k] = 1;
7931 if (!mode_lib->ms.SingleDPPViewportSizeSupportPerSurface[k]) {
7932 dml2_printf("WARNING: DML::%s: MPCC is override to disable but viewport is too large to be supported with single pipe!\n", __func__);
7933 }
7934 } else {
7935 if ((mode_lib->ms.MinDPPCLKUsingSingleDPP[k] > mode_lib->ms.max_dppclk_freq_mhz) || !mode_lib->ms.SingleDPPViewportSizeSupportPerSurface[k]) {
7936 mode_lib->ms.MPCCombine[k] = true;
7937 mode_lib->ms.NoOfDPP[k] = 2;
7938 mode_lib->ms.TotalNumberOfActiveDPP++;
7939 }
7940 }
7941 #if defined(__DML_VBA_DEBUG__)
7942 dml2_printf("DML::%s: k=%d, NoOfDPP = %d\n", __func__, k, mode_lib->ms.NoOfDPP[k]);
7943 #endif
7944 }
7945
7946 if (mode_lib->ms.TotalNumberOfActiveDPP > (unsigned int)mode_lib->ip.max_num_dpp)
7947 mode_lib->ms.support.TotalAvailablePipesSupport = false;
7948
7949
7950 mode_lib->ms.TotalNumberOfSingleDPPSurfaces = 0;
7951 for (k = 0; k < (unsigned int)mode_lib->ms.num_active_planes; ++k) {
7952 if (mode_lib->ms.NoOfDPP[k] == 1)
7953 mode_lib->ms.TotalNumberOfSingleDPPSurfaces = mode_lib->ms.TotalNumberOfSingleDPPSurfaces + 1;
7954 }
7955
7956 //DISPCLK/DPPCLK
7957 mode_lib->ms.WritebackRequiredDISPCLK = 0;
7958 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
7959 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0) {
7960 mode_lib->ms.WritebackRequiredDISPCLK = math_max2(mode_lib->ms.WritebackRequiredDISPCLK,
7961 CalculateWriteBackDISPCLK(display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].pixel_format,
7962 ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000),
7963 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_ratio,
7964 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_ratio,
7965 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_taps,
7966 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_taps,
7967 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].input_width,
7968 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_width,
7969 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total,
7970 mode_lib->ip.writeback_line_buffer_buffer_size));
7971 }
7972 }
7973
7974 mode_lib->ms.RequiredDISPCLK = mode_lib->ms.WritebackRequiredDISPCLK;
7975 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
7976 mode_lib->ms.RequiredDISPCLK = math_max2(mode_lib->ms.RequiredDISPCLK, mode_lib->ms.RequiredDISPCLKPerSurface[k]);
7977 }
7978
7979 mode_lib->ms.GlobalDPPCLK = 0;
7980 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
7981 mode_lib->ms.RequiredDPPCLK[k] = mode_lib->ms.MinDPPCLKUsingSingleDPP[k] / mode_lib->ms.NoOfDPP[k];
7982 mode_lib->ms.GlobalDPPCLK = math_max2(mode_lib->ms.GlobalDPPCLK, mode_lib->ms.RequiredDPPCLK[k]);
7983 }
7984
7985 mode_lib->ms.support.DISPCLK_DPPCLK_Support = !((mode_lib->ms.RequiredDISPCLK > mode_lib->ms.max_dispclk_freq_mhz) || (mode_lib->ms.GlobalDPPCLK > mode_lib->ms.max_dppclk_freq_mhz));
7986
7987 /* Total Available OTG, Writeback, HDMIFRL, DP Support Check */
7988 s->TotalNumberOfActiveOTG = 0;
7989 s->TotalNumberOfActiveHDMIFRL = 0;
7990 s->TotalNumberOfActiveDP2p0 = 0;
7991 s->TotalNumberOfActiveDP2p0Outputs = 0;
7992 s->TotalNumberOfActiveWriteback = 0;
7993 memset(s->stream_visited, 0, DML2_MAX_PLANES * sizeof(bool));
7994
7995 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
7996 if (!dml_is_phantom_pipe(&display_cfg->plane_descriptors[k])) {
7997 if (!s->stream_visited[display_cfg->plane_descriptors[k].stream_index]) {
7998 s->stream_visited[display_cfg->plane_descriptors[k].stream_index] = 1;
7999
8000 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0)
8001 s->TotalNumberOfActiveWriteback = s->TotalNumberOfActiveWriteback + 1;
8002
8003 s->TotalNumberOfActiveOTG = s->TotalNumberOfActiveOTG + 1;
8004 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmifrl)
8005 s->TotalNumberOfActiveHDMIFRL = s->TotalNumberOfActiveHDMIFRL + 1;
8006 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_dp2p0) {
8007 s->TotalNumberOfActiveDP2p0 = s->TotalNumberOfActiveDP2p0 + 1;
8008 // FIXME_STAGE2: SW not using backend related stuff, need mapping for mst setup
8009 //if (display_cfg->output.OutputMultistreamId[k] == k || display_cfg->output.OutputMultistreamEn[k] == false) {
8010 s->TotalNumberOfActiveDP2p0Outputs = s->TotalNumberOfActiveDP2p0Outputs + 1;
8011 //}
8012 }
8013 }
8014 }
8015 }
8016
8017 /* Writeback Mode Support Check */
8018 mode_lib->ms.support.EnoughWritebackUnits = 1;
8019 if (s->TotalNumberOfActiveWriteback > (unsigned int)mode_lib->ip.max_num_wb) {
8020 mode_lib->ms.support.EnoughWritebackUnits = false;
8021 }
8022 mode_lib->ms.support.NumberOfOTGSupport = (s->TotalNumberOfActiveOTG <= (unsigned int)mode_lib->ip.max_num_otg);
8023 mode_lib->ms.support.NumberOfHDMIFRLSupport = (s->TotalNumberOfActiveHDMIFRL <= (unsigned int)mode_lib->ip.max_num_hdmi_frl_outputs);
8024 mode_lib->ms.support.NumberOfDP2p0Support = (s->TotalNumberOfActiveDP2p0 <= (unsigned int)mode_lib->ip.max_num_dp2p0_streams && s->TotalNumberOfActiveDP2p0Outputs <= (unsigned int)mode_lib->ip.max_num_dp2p0_outputs);
8025
8026
8027 mode_lib->ms.support.ExceededMultistreamSlots = false;
8028 mode_lib->ms.support.LinkCapacitySupport = true;
8029 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
8030 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_disabled == false &&
8031 (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_dp || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_dp2p0 || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_edp ||
8032 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmi || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmifrl) && mode_lib->ms.OutputBpp[k] == 0) {
8033 mode_lib->ms.support.LinkCapacitySupport = false;
8034 }
8035 }
8036
8037 mode_lib->ms.support.P2IWith420 = false;
8038 mode_lib->ms.support.DSCOnlyIfNecessaryWithBPP = false;
8039 mode_lib->ms.support.DSC422NativeNotSupported = false;
8040 mode_lib->ms.support.LinkRateDoesNotMatchDPVersion = false;
8041 mode_lib->ms.support.LinkRateForMultistreamNotIndicated = false;
8042 mode_lib->ms.support.BPPForMultistreamNotIndicated = false;
8043 mode_lib->ms.support.MultistreamWithHDMIOreDP = false;
8044 mode_lib->ms.support.MSOOrODMSplitWithNonDPLink = false;
8045 mode_lib->ms.support.NotEnoughLanesForMSO = false;
8046
8047 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
8048 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_dp || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_dp2p0 || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_edp ||
8049 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmi || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmifrl) {
8050 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format == dml2_420 && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.interlaced == 1 && mode_lib->ip.ptoi_supported == true)
8051 mode_lib->ms.support.P2IWith420 = true;
8052
8053 if ((display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable == dml2_dsc_enable || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable == dml2_dsc_enable_if_necessary) && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format == dml2_n422 && !mode_lib->ip.dsc422_native_support)
8054 mode_lib->ms.support.DSC422NativeNotSupported = true;
8055
8056 if (((display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_link_rate == dml2_dp_rate_hbr || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_link_rate == dml2_dp_rate_hbr2 ||
8057 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_link_rate == dml2_dp_rate_hbr3) &&
8058 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder != dml2_dp && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder != dml2_edp) ||
8059 ((display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_link_rate == dml2_dp_rate_uhbr10 || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_link_rate == dml2_dp_rate_uhbr13p5 ||
8060 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_link_rate == dml2_dp_rate_uhbr20) &&
8061 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder != dml2_dp2p0))
8062 mode_lib->ms.support.LinkRateDoesNotMatchDPVersion = true;
8063
8064 // FIXME_STAGE2
8065 //if (display_cfg->output.OutputMultistreamEn[k] == 1) {
8066 // if (display_cfg->output.OutputMultistreamId[k] == k && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_link_rate == dml2_dp_rate_na)
8067 // mode_lib->ms.support.LinkRateForMultistreamNotIndicated = true;
8068 // if (display_cfg->output.OutputMultistreamId[k] == k && s->OutputBpp[k] == 0)
8069 // mode_lib->ms.support.BPPForMultistreamNotIndicated = true;
8070 // for (n = 0; n < mode_lib->ms.num_active_planes; ++n) {
8071 // if (display_cfg->output.OutputMultistreamId[k] == n && s->OutputBpp[k] == 0)
8072 // mode_lib->ms.support.BPPForMultistreamNotIndicated = true;
8073 // }
8074 //}
8075
8076 if ((display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_edp ||
8077 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmi ||
8078 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmifrl)) {
8079 // FIXME_STAGE2
8080 //if (display_cfg->output.OutputMultistreamEn[k] == 1 && display_cfg->output.OutputMultistreamId[k] == k)
8081 // mode_lib->ms.support.MultistreamWithHDMIOreDP = true;
8082 //for (n = 0; n < mode_lib->ms.num_active_planes; ++n) {
8083 // if (display_cfg->output.OutputMultistreamEn[k] == 1 && display_cfg->output.OutputMultistreamId[k] == n)
8084 // mode_lib->ms.support.MultistreamWithHDMIOreDP = true;
8085 //}
8086 }
8087 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder != dml2_dp && (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].overrides.odm_mode == dml2_odm_mode_split_1to2 ||
8088 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].overrides.odm_mode == dml2_odm_mode_mso_1to2 || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].overrides.odm_mode == dml2_odm_mode_mso_1to4))
8089 mode_lib->ms.support.MSOOrODMSplitWithNonDPLink = true;
8090
8091 if ((display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].overrides.odm_mode == dml2_odm_mode_mso_1to2 && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_lane_count < 2) ||
8092 (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].overrides.odm_mode == dml2_odm_mode_mso_1to4 && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_lane_count < 4))
8093 mode_lib->ms.support.NotEnoughLanesForMSO = true;
8094 }
8095 }
8096
8097 mode_lib->ms.support.DTBCLKRequiredMoreThanSupported = false;
8098 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
8099 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmifrl &&
8100 !dml_is_phantom_pipe(&display_cfg->plane_descriptors[k])) {
8101 mode_lib->ms.RequiredDTBCLK[k] = RequiredDTBCLK(
8102 mode_lib->ms.RequiresDSC[k],
8103 s->PixelClockBackEnd[k],
8104 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format,
8105 mode_lib->ms.OutputBpp[k],
8106 mode_lib->ms.support.NumberOfDSCSlices[k],
8107 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total,
8108 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active,
8109 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.audio_sample_rate,
8110 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.audio_sample_layout);
8111
8112 if (mode_lib->ms.RequiredDTBCLK[k] > ((double)min_clk_table->max_clocks_khz.dtbclk / 1000)) {
8113 mode_lib->ms.support.DTBCLKRequiredMoreThanSupported = true;
8114 }
8115 } else {
8116 /* Phantom DTBCLK can be calculated different from main because phantom has no DSC and thus
8117 * will have a different output BPP. Ignore phantom DTBCLK requirement and only consider
8118 * non-phantom DTBCLK requirements. In map_mode_to_soc_dpm we choose the highest DTBCLK
8119 * required - by setting phantom dtbclk to 0 we ignore it.
8120 */
8121 mode_lib->ms.RequiredDTBCLK[k] = 0;
8122 }
8123 }
8124
8125 mode_lib->ms.support.DSCCLKRequiredMoreThanSupported = false;
8126 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
8127 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_dp ||
8128 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_dp2p0 ||
8129 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_edp ||
8130 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmifrl) {
8131 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format == dml2_420) {
8132 s->DSCFormatFactor = 2;
8133 } else if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format == dml2_444) {
8134 s->DSCFormatFactor = 1;
8135 } else if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format == dml2_n422 || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmifrl) {
8136 s->DSCFormatFactor = 2;
8137 } else {
8138 s->DSCFormatFactor = 1;
8139 }
8140 #ifdef __DML_VBA_DEBUG__
8141 dml2_printf("DML::%s: k=%u, RequiresDSC = %u\n", __func__, k, mode_lib->ms.RequiresDSC[k]);
8142 #endif
8143 if (mode_lib->ms.RequiresDSC[k] == true) {
8144 s->PixelClockBackEndFactor = 3.0;
8145
8146 if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_4to1)
8147 s->PixelClockBackEndFactor = 12.0;
8148 else if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_3to1)
8149 s->PixelClockBackEndFactor = 9.0;
8150 else if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_2to1)
8151 s->PixelClockBackEndFactor = 6.0;
8152
8153 mode_lib->ms.required_dscclk_freq_mhz[k] = s->PixelClockBackEnd[k] / s->PixelClockBackEndFactor / (double)s->DSCFormatFactor;
8154 if (mode_lib->ms.required_dscclk_freq_mhz[k] > mode_lib->ms.max_dscclk_freq_mhz) {
8155 mode_lib->ms.support.DSCCLKRequiredMoreThanSupported = true;
8156 }
8157
8158 #ifdef __DML_VBA_DEBUG__
8159 dml2_printf("DML::%s: k=%u, PixelClockBackEnd = %f\n", __func__, k, s->PixelClockBackEnd[k]);
8160 dml2_printf("DML::%s: k=%u, required_dscclk_freq_mhz = %f\n", __func__, k, mode_lib->ms.required_dscclk_freq_mhz[k]);
8161 dml2_printf("DML::%s: k=%u, DSCFormatFactor = %u\n", __func__, k, s->DSCFormatFactor);
8162 dml2_printf("DML::%s: k=%u, DSCCLKRequiredMoreThanSupported = %u\n", __func__, k, mode_lib->ms.support.DSCCLKRequiredMoreThanSupported);
8163 #endif
8164 }
8165 }
8166 }
8167
8168 /* Check DSC Unit and Slices Support */
8169 mode_lib->ms.support.NotEnoughDSCSlices = false;
8170 s->TotalDSCUnitsRequired = 0;
8171 mode_lib->ms.support.PixelsPerLinePerDSCUnitSupport = true;
8172 memset(s->stream_visited, 0, DML2_MAX_PLANES * sizeof(bool));
8173
8174 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
8175 if (mode_lib->ms.RequiresDSC[k] == true && !s->stream_visited[display_cfg->plane_descriptors[k].stream_index]) {
8176 s->NumDSCUnitRequired = 1;
8177
8178 if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_4to1)
8179 s->NumDSCUnitRequired = 4;
8180 else if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_3to1)
8181 s->NumDSCUnitRequired = 3;
8182 else if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_2to1)
8183 s->NumDSCUnitRequired = 2;
8184
8185 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active > s->NumDSCUnitRequired * (unsigned int)mode_lib->ip.maximum_pixels_per_line_per_dsc_unit)
8186 mode_lib->ms.support.PixelsPerLinePerDSCUnitSupport = false;
8187 s->TotalDSCUnitsRequired = s->TotalDSCUnitsRequired + s->NumDSCUnitRequired;
8188
8189 if (mode_lib->ms.support.NumberOfDSCSlices[k] > 4 * s->NumDSCUnitRequired)
8190 mode_lib->ms.support.NotEnoughDSCSlices = true;
8191 }
8192 s->stream_visited[display_cfg->plane_descriptors[k].stream_index] = 1;
8193 }
8194
8195 mode_lib->ms.support.NotEnoughDSCUnits = false;
8196 if (s->TotalDSCUnitsRequired > (unsigned int)mode_lib->ip.num_dsc) {
8197 mode_lib->ms.support.NotEnoughDSCUnits = true;
8198 }
8199
8200 /*DSC Delay per state*/
8201 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
8202 mode_lib->ms.DSCDelay[k] = DSCDelayRequirement(mode_lib->ms.RequiresDSC[k],
8203 mode_lib->ms.ODMMode[k],
8204 mode_lib->ip.maximum_dsc_bits_per_component,
8205 s->OutputBpp[k],
8206 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active,
8207 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total,
8208 mode_lib->ms.support.NumberOfDSCSlices[k],
8209 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format,
8210 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder,
8211 ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000),
8212 s->PixelClockBackEnd[k]);
8213 }
8214
8215 // Figure out the swath and DET configuration after the num dpp per plane is figured out
8216 CalculateSwathAndDETConfiguration_params->ForceSingleDPP = false;
8217 CalculateSwathAndDETConfiguration_params->ODMMode = mode_lib->ms.ODMMode;
8218 CalculateSwathAndDETConfiguration_params->DPPPerSurface = mode_lib->ms.NoOfDPP;
8219
8220 // output
8221 CalculateSwathAndDETConfiguration_params->req_per_swath_ub_l = s->dummy_integer_array[0];
8222 CalculateSwathAndDETConfiguration_params->req_per_swath_ub_c = s->dummy_integer_array[1];
8223 CalculateSwathAndDETConfiguration_params->swath_width_luma_ub = mode_lib->ms.swath_width_luma_ub;
8224 CalculateSwathAndDETConfiguration_params->swath_width_chroma_ub = mode_lib->ms.swath_width_chroma_ub;
8225 CalculateSwathAndDETConfiguration_params->SwathWidth = mode_lib->ms.SwathWidthY;
8226 CalculateSwathAndDETConfiguration_params->SwathWidthChroma = mode_lib->ms.SwathWidthC;
8227 CalculateSwathAndDETConfiguration_params->SwathHeightY = mode_lib->ms.SwathHeightY;
8228 CalculateSwathAndDETConfiguration_params->SwathHeightC = mode_lib->ms.SwathHeightC;
8229 CalculateSwathAndDETConfiguration_params->request_size_bytes_luma = mode_lib->ms.support.request_size_bytes_luma;
8230 CalculateSwathAndDETConfiguration_params->request_size_bytes_chroma = mode_lib->ms.support.request_size_bytes_chroma;
8231 CalculateSwathAndDETConfiguration_params->DETBufferSizeInKByte = mode_lib->ms.DETBufferSizeInKByte; // FIXME: This is per pipe but the pipes in plane will use that
8232 CalculateSwathAndDETConfiguration_params->DETBufferSizeY = mode_lib->ms.DETBufferSizeY;
8233 CalculateSwathAndDETConfiguration_params->DETBufferSizeC = mode_lib->ms.DETBufferSizeC;
8234 CalculateSwathAndDETConfiguration_params->UnboundedRequestEnabled = &mode_lib->ms.UnboundedRequestEnabled;
8235 CalculateSwathAndDETConfiguration_params->compbuf_reserved_space_64b = s->dummy_integer_array[3];
8236 CalculateSwathAndDETConfiguration_params->hw_debug5 = s->dummy_boolean_array[1];
8237 CalculateSwathAndDETConfiguration_params->CompressedBufferSizeInkByte = &mode_lib->ms.CompressedBufferSizeInkByte;
8238 CalculateSwathAndDETConfiguration_params->ViewportSizeSupportPerSurface = s->dummy_boolean_array[0];
8239 CalculateSwathAndDETConfiguration_params->ViewportSizeSupport = &mode_lib->ms.support.ViewportSizeSupport;
8240
8241 CalculateSwathAndDETConfiguration(&mode_lib->scratch, CalculateSwathAndDETConfiguration_params);
8242
8243 if (mode_lib->soc.mall_allocated_for_dcn_mbytes == 0) {
8244 for (k = 0; k < mode_lib->ms.num_active_planes; k++)
8245 mode_lib->ms.SurfaceSizeInMALL[k] = 0;
8246 mode_lib->ms.support.ExceededMALLSize = 0;
8247 } else {
8248 CalculateSurfaceSizeInMall(
8249 display_cfg,
8250 mode_lib->ms.num_active_planes,
8251 mode_lib->soc.mall_allocated_for_dcn_mbytes,
8252
8253 mode_lib->ms.BytePerPixelY,
8254 mode_lib->ms.BytePerPixelC,
8255 mode_lib->ms.Read256BlockWidthY,
8256 mode_lib->ms.Read256BlockWidthC,
8257 mode_lib->ms.Read256BlockHeightY,
8258 mode_lib->ms.Read256BlockHeightC,
8259 mode_lib->ms.MacroTileWidthY,
8260 mode_lib->ms.MacroTileWidthC,
8261 mode_lib->ms.MacroTileHeightY,
8262 mode_lib->ms.MacroTileHeightC,
8263
8264 /* Output */
8265 mode_lib->ms.SurfaceSizeInMALL,
8266 &mode_lib->ms.support.ExceededMALLSize);
8267 }
8268
8269 mode_lib->ms.TotalNumberOfDCCActiveDPP = 0;
8270 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
8271 if (display_cfg->plane_descriptors[k].surface.dcc.enable == true) {
8272 mode_lib->ms.TotalNumberOfDCCActiveDPP = mode_lib->ms.TotalNumberOfDCCActiveDPP + mode_lib->ms.NoOfDPP[k];
8273 }
8274 }
8275
8276 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
8277 s->SurfParameters[k].PixelClock = ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
8278 s->SurfParameters[k].DPPPerSurface = mode_lib->ms.NoOfDPP[k];
8279 s->SurfParameters[k].RotationAngle = display_cfg->plane_descriptors[k].composition.rotation_angle;
8280 s->SurfParameters[k].ViewportHeight = display_cfg->plane_descriptors[k].composition.viewport.plane0.height;
8281 s->SurfParameters[k].ViewportHeightC = display_cfg->plane_descriptors[k].composition.viewport.plane1.height;
8282 s->SurfParameters[k].BlockWidth256BytesY = mode_lib->ms.Read256BlockWidthY[k];
8283 s->SurfParameters[k].BlockHeight256BytesY = mode_lib->ms.Read256BlockHeightY[k];
8284 s->SurfParameters[k].BlockWidth256BytesC = mode_lib->ms.Read256BlockWidthC[k];
8285 s->SurfParameters[k].BlockHeight256BytesC = mode_lib->ms.Read256BlockHeightC[k];
8286 s->SurfParameters[k].BlockWidthY = mode_lib->ms.MacroTileWidthY[k];
8287 s->SurfParameters[k].BlockHeightY = mode_lib->ms.MacroTileHeightY[k];
8288 s->SurfParameters[k].BlockWidthC = mode_lib->ms.MacroTileWidthC[k];
8289 s->SurfParameters[k].BlockHeightC = mode_lib->ms.MacroTileHeightC[k];
8290 s->SurfParameters[k].InterlaceEnable = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.interlaced;
8291 s->SurfParameters[k].HTotal = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total;
8292 s->SurfParameters[k].DCCEnable = display_cfg->plane_descriptors[k].surface.dcc.enable;
8293 s->SurfParameters[k].SourcePixelFormat = display_cfg->plane_descriptors[k].pixel_format;
8294 s->SurfParameters[k].SurfaceTiling = display_cfg->plane_descriptors[k].surface.tiling;
8295 s->SurfParameters[k].BytePerPixelY = mode_lib->ms.BytePerPixelY[k];
8296 s->SurfParameters[k].BytePerPixelC = mode_lib->ms.BytePerPixelC[k];
8297 s->SurfParameters[k].ProgressiveToInterlaceUnitInOPP = mode_lib->ip.ptoi_supported;
8298 s->SurfParameters[k].VRatio = display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio;
8299 s->SurfParameters[k].VRatioChroma = display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio;
8300 s->SurfParameters[k].VTaps = display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps;
8301 s->SurfParameters[k].VTapsChroma = display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps;
8302 s->SurfParameters[k].PitchY = display_cfg->plane_descriptors[k].surface.plane0.pitch;
8303 s->SurfParameters[k].PitchC = display_cfg->plane_descriptors[k].surface.plane1.pitch;
8304 s->SurfParameters[k].ViewportStationary = display_cfg->plane_descriptors[k].composition.viewport.stationary;
8305 s->SurfParameters[k].ViewportXStart = display_cfg->plane_descriptors[k].composition.viewport.plane0.x_start;
8306 s->SurfParameters[k].ViewportYStart = display_cfg->plane_descriptors[k].composition.viewport.plane0.y_start;
8307 s->SurfParameters[k].ViewportXStartC = display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start;
8308 s->SurfParameters[k].ViewportYStartC = display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start;
8309 s->SurfParameters[k].FORCE_ONE_ROW_FOR_FRAME = display_cfg->plane_descriptors[k].overrides.hw.force_one_row_for_frame;
8310 s->SurfParameters[k].SwathHeightY = mode_lib->ms.SwathHeightY[k];
8311 s->SurfParameters[k].SwathHeightC = mode_lib->ms.SwathHeightC[k];
8312
8313 s->SurfParameters[k].DCCMetaPitchY = display_cfg->plane_descriptors[k].surface.dcc.plane0.pitch;
8314 s->SurfParameters[k].DCCMetaPitchC = display_cfg->plane_descriptors[k].surface.dcc.plane1.pitch;
8315 }
8316
8317 CalculateVMRowAndSwath_params->display_cfg = display_cfg;
8318 CalculateVMRowAndSwath_params->NumberOfActiveSurfaces = mode_lib->ms.num_active_planes;
8319 CalculateVMRowAndSwath_params->myPipe = s->SurfParameters;
8320 CalculateVMRowAndSwath_params->SurfaceSizeInMALL = mode_lib->ms.SurfaceSizeInMALL;
8321 CalculateVMRowAndSwath_params->PTEBufferSizeInRequestsLuma = mode_lib->ip.dpte_buffer_size_in_pte_reqs_luma;
8322 CalculateVMRowAndSwath_params->PTEBufferSizeInRequestsChroma = mode_lib->ip.dpte_buffer_size_in_pte_reqs_chroma;
8323 CalculateVMRowAndSwath_params->MALLAllocatedForDCN = mode_lib->soc.mall_allocated_for_dcn_mbytes;
8324 CalculateVMRowAndSwath_params->SwathWidthY = mode_lib->ms.SwathWidthY;
8325 CalculateVMRowAndSwath_params->SwathWidthC = mode_lib->ms.SwathWidthC;
8326 CalculateVMRowAndSwath_params->HostVMMinPageSize = mode_lib->soc.hostvm_min_page_size_kbytes;
8327 CalculateVMRowAndSwath_params->DCCMetaBufferSizeBytes = mode_lib->ip.dcc_meta_buffer_size_bytes;
8328 CalculateVMRowAndSwath_params->mrq_present = mode_lib->ip.dcn_mrq_present;
8329
8330 // output
8331 CalculateVMRowAndSwath_params->PTEBufferSizeNotExceeded = mode_lib->ms.PTEBufferSizeNotExceeded;
8332 CalculateVMRowAndSwath_params->dpte_row_width_luma_ub = s->dummy_integer_array[12];
8333 CalculateVMRowAndSwath_params->dpte_row_width_chroma_ub = s->dummy_integer_array[13];
8334 CalculateVMRowAndSwath_params->dpte_row_height_luma = mode_lib->ms.dpte_row_height;
8335 CalculateVMRowAndSwath_params->dpte_row_height_chroma = mode_lib->ms.dpte_row_height_chroma;
8336 CalculateVMRowAndSwath_params->dpte_row_height_linear_luma = s->dummy_integer_array[14]; // VBA_DELTA
8337 CalculateVMRowAndSwath_params->dpte_row_height_linear_chroma = s->dummy_integer_array[15]; // VBA_DELTA
8338 CalculateVMRowAndSwath_params->vm_group_bytes = s->dummy_integer_array[16];
8339 CalculateVMRowAndSwath_params->dpte_group_bytes = mode_lib->ms.dpte_group_bytes;
8340 CalculateVMRowAndSwath_params->PixelPTEReqWidthY = s->dummy_integer_array[17];
8341 CalculateVMRowAndSwath_params->PixelPTEReqHeightY = s->dummy_integer_array[18];
8342 CalculateVMRowAndSwath_params->PTERequestSizeY = s->dummy_integer_array[19];
8343 CalculateVMRowAndSwath_params->PixelPTEReqWidthC = s->dummy_integer_array[20];
8344 CalculateVMRowAndSwath_params->PixelPTEReqHeightC = s->dummy_integer_array[21];
8345 CalculateVMRowAndSwath_params->PTERequestSizeC = s->dummy_integer_array[22];
8346 CalculateVMRowAndSwath_params->vmpg_width_y = s->vmpg_width_y;
8347 CalculateVMRowAndSwath_params->vmpg_height_y = s->vmpg_height_y;
8348 CalculateVMRowAndSwath_params->vmpg_width_c = s->vmpg_width_c;
8349 CalculateVMRowAndSwath_params->vmpg_height_c = s->vmpg_height_c;
8350 CalculateVMRowAndSwath_params->dpde0_bytes_per_frame_ub_l = s->dummy_integer_array[23];
8351 CalculateVMRowAndSwath_params->dpde0_bytes_per_frame_ub_c = s->dummy_integer_array[24];
8352 CalculateVMRowAndSwath_params->PrefetchSourceLinesY = mode_lib->ms.PrefetchLinesY;
8353 CalculateVMRowAndSwath_params->PrefetchSourceLinesC = mode_lib->ms.PrefetchLinesC;
8354 CalculateVMRowAndSwath_params->VInitPreFillY = mode_lib->ms.PrefillY;
8355 CalculateVMRowAndSwath_params->VInitPreFillC = mode_lib->ms.PrefillC;
8356 CalculateVMRowAndSwath_params->MaxNumSwathY = mode_lib->ms.MaxNumSwathY;
8357 CalculateVMRowAndSwath_params->MaxNumSwathC = mode_lib->ms.MaxNumSwathC;
8358 CalculateVMRowAndSwath_params->dpte_row_bw = mode_lib->ms.dpte_row_bw;
8359 CalculateVMRowAndSwath_params->PixelPTEBytesPerRow = mode_lib->ms.DPTEBytesPerRow;
8360 CalculateVMRowAndSwath_params->dpte_row_bytes_per_row_l = s->dpte_row_bytes_per_row_l;
8361 CalculateVMRowAndSwath_params->dpte_row_bytes_per_row_c = s->dpte_row_bytes_per_row_c;
8362 CalculateVMRowAndSwath_params->vm_bytes = mode_lib->ms.vm_bytes;
8363 CalculateVMRowAndSwath_params->use_one_row_for_frame = mode_lib->ms.use_one_row_for_frame;
8364 CalculateVMRowAndSwath_params->use_one_row_for_frame_flip = mode_lib->ms.use_one_row_for_frame_flip;
8365 CalculateVMRowAndSwath_params->is_using_mall_for_ss = s->dummy_boolean_array[0];
8366 CalculateVMRowAndSwath_params->PTE_BUFFER_MODE = s->dummy_boolean_array[1];
8367 CalculateVMRowAndSwath_params->BIGK_FRAGMENT_SIZE = s->dummy_integer_array[25];
8368 CalculateVMRowAndSwath_params->DCCMetaBufferSizeNotExceeded = mode_lib->ms.DCCMetaBufferSizeNotExceeded;
8369 CalculateVMRowAndSwath_params->meta_row_bw = mode_lib->ms.meta_row_bw;
8370 CalculateVMRowAndSwath_params->meta_row_bytes = mode_lib->ms.meta_row_bytes;
8371 CalculateVMRowAndSwath_params->meta_row_bytes_per_row_ub_l = s->meta_row_bytes_per_row_ub_l;
8372 CalculateVMRowAndSwath_params->meta_row_bytes_per_row_ub_c = s->meta_row_bytes_per_row_ub_c;
8373 CalculateVMRowAndSwath_params->meta_req_width_luma = s->dummy_integer_array[26];
8374 CalculateVMRowAndSwath_params->meta_req_height_luma = s->dummy_integer_array[27];
8375 CalculateVMRowAndSwath_params->meta_row_width_luma = s->dummy_integer_array[28];
8376 CalculateVMRowAndSwath_params->meta_row_height_luma = s->meta_row_height_luma;
8377 CalculateVMRowAndSwath_params->meta_pte_bytes_per_frame_ub_l = s->dummy_integer_array[29];
8378 CalculateVMRowAndSwath_params->meta_req_width_chroma = s->dummy_integer_array[30];
8379 CalculateVMRowAndSwath_params->meta_req_height_chroma = s->dummy_integer_array[31];
8380 CalculateVMRowAndSwath_params->meta_row_width_chroma = s->dummy_integer_array[32];
8381 CalculateVMRowAndSwath_params->meta_row_height_chroma = s->meta_row_height_chroma;
8382 CalculateVMRowAndSwath_params->meta_pte_bytes_per_frame_ub_c = s->dummy_integer_array[33];
8383
8384 CalculateVMRowAndSwath(&mode_lib->scratch, CalculateVMRowAndSwath_params);
8385
8386 mode_lib->ms.support.PTEBufferSizeNotExceeded = true;
8387 mode_lib->ms.support.DCCMetaBufferSizeNotExceeded = true;
8388
8389 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
8390 if (mode_lib->ms.PTEBufferSizeNotExceeded[k] == false)
8391 mode_lib->ms.support.PTEBufferSizeNotExceeded = false;
8392
8393 if (mode_lib->ms.DCCMetaBufferSizeNotExceeded[k] == false)
8394 mode_lib->ms.support.DCCMetaBufferSizeNotExceeded = false;
8395
8396 #ifdef __DML_VBA_DEBUG__
8397 dml2_printf("DML::%s: k=%u, PTEBufferSizeNotExceeded = %u\n", __func__, k, mode_lib->ms.PTEBufferSizeNotExceeded[k]);
8398 dml2_printf("DML::%s: k=%u, DCCMetaBufferSizeNotExceeded = %u\n", __func__, k, mode_lib->ms.DCCMetaBufferSizeNotExceeded[k]);
8399 #endif
8400 }
8401 #ifdef __DML_VBA_DEBUG__
8402 dml2_printf("DML::%s: PTEBufferSizeNotExceeded = %u\n", __func__, mode_lib->ms.support.PTEBufferSizeNotExceeded);
8403 dml2_printf("DML::%s: DCCMetaBufferSizeNotExceeded = %u\n", __func__, mode_lib->ms.support.DCCMetaBufferSizeNotExceeded);
8404 #endif
8405
8406 /* VActive bytes to fetch for UCLK P-State */
8407 calculate_bytes_to_fetch_required_to_hide_latency_params->display_cfg = display_cfg;
8408 calculate_bytes_to_fetch_required_to_hide_latency_params->mrq_present = mode_lib->ip.dcn_mrq_present;
8409
8410 calculate_bytes_to_fetch_required_to_hide_latency_params->num_active_planes = mode_lib->ms.num_active_planes;
8411 calculate_bytes_to_fetch_required_to_hide_latency_params->num_of_dpp = mode_lib->ms.NoOfDPP;
8412 calculate_bytes_to_fetch_required_to_hide_latency_params->meta_row_height_l = s->meta_row_height_luma;
8413 calculate_bytes_to_fetch_required_to_hide_latency_params->meta_row_height_c = s->meta_row_height_chroma;
8414 calculate_bytes_to_fetch_required_to_hide_latency_params->meta_row_bytes_per_row_ub_l = s->meta_row_bytes_per_row_ub_l;
8415 calculate_bytes_to_fetch_required_to_hide_latency_params->meta_row_bytes_per_row_ub_c = s->meta_row_bytes_per_row_ub_c;
8416 calculate_bytes_to_fetch_required_to_hide_latency_params->dpte_row_height_l = mode_lib->ms.dpte_row_height;
8417 calculate_bytes_to_fetch_required_to_hide_latency_params->dpte_row_height_c = mode_lib->ms.dpte_row_height_chroma;
8418 calculate_bytes_to_fetch_required_to_hide_latency_params->dpte_bytes_per_row_l = s->dpte_row_bytes_per_row_l;
8419 calculate_bytes_to_fetch_required_to_hide_latency_params->dpte_bytes_per_row_c = s->dpte_row_bytes_per_row_c;
8420 calculate_bytes_to_fetch_required_to_hide_latency_params->byte_per_pix_l = mode_lib->ms.BytePerPixelY;
8421 calculate_bytes_to_fetch_required_to_hide_latency_params->byte_per_pix_c = mode_lib->ms.BytePerPixelC;
8422 calculate_bytes_to_fetch_required_to_hide_latency_params->swath_width_l = mode_lib->ms.SwathWidthY;
8423 calculate_bytes_to_fetch_required_to_hide_latency_params->swath_width_c = mode_lib->ms.SwathWidthC;
8424 calculate_bytes_to_fetch_required_to_hide_latency_params->swath_height_l = mode_lib->ms.SwathHeightY;
8425 calculate_bytes_to_fetch_required_to_hide_latency_params->swath_height_c = mode_lib->ms.SwathHeightC;
8426 calculate_bytes_to_fetch_required_to_hide_latency_params->latency_to_hide_us = mode_lib->soc.power_management_parameters.dram_clk_change_blackout_us;
8427
8428 /* outputs */
8429 calculate_bytes_to_fetch_required_to_hide_latency_params->bytes_required_l = s->pstate_bytes_required_l;
8430 calculate_bytes_to_fetch_required_to_hide_latency_params->bytes_required_c = s->pstate_bytes_required_c;
8431
8432 calculate_bytes_to_fetch_required_to_hide_latency(calculate_bytes_to_fetch_required_to_hide_latency_params);
8433
8434 /* Excess VActive bandwidth required to fill DET */
8435 calculate_excess_vactive_bandwidth_required(
8436 display_cfg,
8437 mode_lib->ms.num_active_planes,
8438 s->pstate_bytes_required_l,
8439 s->pstate_bytes_required_c,
8440 /* outputs */
8441 mode_lib->ms.excess_vactive_fill_bw_l,
8442 mode_lib->ms.excess_vactive_fill_bw_c);
8443
8444 mode_lib->ms.UrgLatency = CalculateUrgentLatency(
8445 mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.base_latency_us,
8446 mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.base_latency_pixel_vm_us,
8447 mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.base_latency_vm_us,
8448 mode_lib->soc.do_urgent_latency_adjustment,
8449 mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.scaling_factor_fclk_us,
8450 mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.scaling_factor_mhz,
8451 mode_lib->ms.FabricClock,
8452 mode_lib->ms.uclk_freq_mhz,
8453 mode_lib->soc.qos_parameters.qos_type,
8454 mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->ms.qos_param_index].urgent_ramp_uclk_cycles,
8455 mode_lib->soc.qos_parameters.qos_params.dcn4x.df_qos_response_time_fclk_cycles,
8456 mode_lib->soc.qos_parameters.qos_params.dcn4x.max_round_trip_to_furthest_cs_fclk_cycles,
8457 mode_lib->soc.qos_parameters.qos_params.dcn4x.mall_overhead_fclk_cycles,
8458 mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_urgent_ramp_latency_margin,
8459 mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_max_transport_latency_margin);
8460
8461 mode_lib->ms.TripToMemory = CalculateTripToMemory(
8462 mode_lib->ms.UrgLatency,
8463 mode_lib->ms.FabricClock,
8464 mode_lib->ms.uclk_freq_mhz,
8465 mode_lib->soc.qos_parameters.qos_type,
8466 mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->ms.qos_param_index].trip_to_memory_uclk_cycles,
8467 mode_lib->soc.qos_parameters.qos_params.dcn4x.max_round_trip_to_furthest_cs_fclk_cycles,
8468 mode_lib->soc.qos_parameters.qos_params.dcn4x.mall_overhead_fclk_cycles,
8469 mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_max_latency_margin,
8470 mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_max_transport_latency_margin);
8471
8472 mode_lib->ms.TripToMemory = math_max2(mode_lib->ms.UrgLatency, mode_lib->ms.TripToMemory);
8473
8474 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
8475 double line_time_us = (double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
8476 bool cursor_not_enough_urgent_latency_hiding = 0;
8477
8478 if (display_cfg->plane_descriptors[k].cursor.num_cursors > 0) {
8479 calculate_cursor_req_attributes(
8480 display_cfg->plane_descriptors[k].cursor.cursor_width,
8481 display_cfg->plane_descriptors[k].cursor.cursor_bpp,
8482
8483 // output
8484 &s->cursor_lines_per_chunk[k],
8485 &s->cursor_bytes_per_line[k],
8486 &s->cursor_bytes_per_chunk[k],
8487 &s->cursor_bytes[k]);
8488
8489 calculate_cursor_urgent_burst_factor(
8490 mode_lib->ip.cursor_buffer_size,
8491 display_cfg->plane_descriptors[k].cursor.cursor_width,
8492 s->cursor_bytes_per_chunk[k],
8493 s->cursor_lines_per_chunk[k],
8494 line_time_us,
8495 mode_lib->ms.UrgLatency,
8496
8497 // output
8498 &mode_lib->ms.UrgentBurstFactorCursor[k],
8499 &cursor_not_enough_urgent_latency_hiding);
8500 }
8501
8502 mode_lib->ms.UrgentBurstFactorCursorPre[k] = mode_lib->ms.UrgentBurstFactorCursor[k];
8503
8504 #ifdef __DML_VBA_DEBUG__
8505 dml2_printf("DML::%s: k=%d, Calling CalculateUrgentBurstFactor\n", __func__, k);
8506 dml2_printf("DML::%s: k=%d, VRatio=%f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio);
8507 dml2_printf("DML::%s: k=%d, VRatioChroma=%f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio);
8508 #endif
8509
8510 CalculateUrgentBurstFactor(
8511 &display_cfg->plane_descriptors[k],
8512 mode_lib->ms.swath_width_luma_ub[k],
8513 mode_lib->ms.swath_width_chroma_ub[k],
8514 mode_lib->ms.SwathHeightY[k],
8515 mode_lib->ms.SwathHeightC[k],
8516 line_time_us,
8517 mode_lib->ms.UrgLatency,
8518 display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio,
8519 display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio,
8520 mode_lib->ms.BytePerPixelInDETY[k],
8521 mode_lib->ms.BytePerPixelInDETC[k],
8522 mode_lib->ms.DETBufferSizeY[k],
8523 mode_lib->ms.DETBufferSizeC[k],
8524
8525 // Output
8526 &mode_lib->ms.UrgentBurstFactorLuma[k],
8527 &mode_lib->ms.UrgentBurstFactorChroma[k],
8528 &mode_lib->ms.NotEnoughUrgentLatencyHiding[k]);
8529
8530 mode_lib->ms.NotEnoughUrgentLatencyHiding[k] = mode_lib->ms.NotEnoughUrgentLatencyHiding[k] || cursor_not_enough_urgent_latency_hiding;
8531 }
8532
8533 CalculateDCFCLKDeepSleep(
8534 display_cfg,
8535 mode_lib->ms.num_active_planes,
8536 mode_lib->ms.BytePerPixelY,
8537 mode_lib->ms.BytePerPixelC,
8538 mode_lib->ms.SwathWidthY,
8539 mode_lib->ms.SwathWidthC,
8540 mode_lib->ms.NoOfDPP,
8541 mode_lib->ms.PSCL_FACTOR,
8542 mode_lib->ms.PSCL_FACTOR_CHROMA,
8543 mode_lib->ms.RequiredDPPCLK,
8544 mode_lib->ms.vactive_sw_bw_l,
8545 mode_lib->ms.vactive_sw_bw_c,
8546 mode_lib->soc.return_bus_width_bytes,
8547
8548 /* Output */
8549 &mode_lib->ms.dcfclk_deepsleep);
8550
8551 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
8552 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0) {
8553 mode_lib->ms.WritebackDelayTime[k] = mode_lib->soc.qos_parameters.writeback.base_latency_us + CalculateWriteBackDelay(
8554 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].pixel_format,
8555 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_ratio,
8556 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_ratio,
8557 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_taps,
8558 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_width,
8559 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_height,
8560 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].input_height,
8561 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total) / mode_lib->ms.RequiredDISPCLK;
8562 } else {
8563 mode_lib->ms.WritebackDelayTime[k] = 0.0;
8564 }
8565 }
8566
8567 // MaximumVStartup is actually Tvstartup_min in DCN4 programming guide
8568 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
8569 bool isInterlaceTiming = (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.interlaced && !mode_lib->ip.ptoi_supported);
8570 s->MaximumVStartup[k] = CalculateMaxVStartup(
8571 mode_lib->ip.ptoi_supported,
8572 mode_lib->ip.vblank_nom_default_us,
8573 &display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing,
8574 mode_lib->ms.WritebackDelayTime[k]);
8575 mode_lib->ms.MaxVStartupLines[k] = (isInterlaceTiming ? (2 * s->MaximumVStartup[k]) : s->MaximumVStartup[k]);
8576 }
8577
8578 #ifdef __DML_VBA_DEBUG__
8579 dml2_printf("DML::%s: k=%u, MaximumVStartup = %u\n", __func__, k, s->MaximumVStartup[k]);
8580 #endif
8581
8582 /* Immediate Flip and MALL parameters */
8583 s->ImmediateFlipRequired = false;
8584 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
8585 s->ImmediateFlipRequired = s->ImmediateFlipRequired || display_cfg->plane_descriptors[k].immediate_flip;
8586 }
8587
8588 mode_lib->ms.support.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe = false;
8589 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
8590 mode_lib->ms.support.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe =
8591 mode_lib->ms.support.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe ||
8592 ((display_cfg->hostvm_enable == true || display_cfg->plane_descriptors[k].immediate_flip == true) &&
8593 (display_cfg->plane_descriptors[k].overrides.uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_mall_full_frame || dml_is_phantom_pipe(&display_cfg->plane_descriptors[k])));
8594 }
8595
8596 mode_lib->ms.support.InvalidCombinationOfMALLUseForPStateAndStaticScreen = false;
8597 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
8598 mode_lib->ms.support.InvalidCombinationOfMALLUseForPStateAndStaticScreen = mode_lib->ms.support.InvalidCombinationOfMALLUseForPStateAndStaticScreen ||
8599 ((display_cfg->plane_descriptors[k].overrides.refresh_from_mall == dml2_refresh_from_mall_mode_override_force_enable || display_cfg->plane_descriptors[k].overrides.refresh_from_mall == dml2_refresh_from_mall_mode_override_auto) && (dml_is_phantom_pipe(&display_cfg->plane_descriptors[k]))) ||
8600 ((display_cfg->plane_descriptors[k].overrides.refresh_from_mall == dml2_refresh_from_mall_mode_override_force_disable || display_cfg->plane_descriptors[k].overrides.refresh_from_mall == dml2_refresh_from_mall_mode_override_auto) && (display_cfg->plane_descriptors[k].overrides.uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_mall_full_frame));
8601 }
8602
8603 s->FullFrameMALLPStateMethod = false;
8604 s->SubViewportMALLPStateMethod = false;
8605 s->PhantomPipeMALLPStateMethod = false;
8606 s->SubViewportMALLRefreshGreaterThan120Hz = false;
8607 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
8608 if (display_cfg->plane_descriptors[k].overrides.uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_mall_full_frame)
8609 s->FullFrameMALLPStateMethod = true;
8610 if (display_cfg->plane_descriptors[k].overrides.legacy_svp_config == dml2_svp_mode_override_main_pipe) {
8611 s->SubViewportMALLPStateMethod = true;
8612 if (!display_cfg->overrides.enable_subvp_implicit_pmo) {
8613 // For dv, small frame tests will have very high refresh rate
8614 unsigned long long refresh_rate = (unsigned long long) ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz * 1000 /
8615 (double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total /
8616 (double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_total);
8617 if (refresh_rate > 120)
8618 s->SubViewportMALLRefreshGreaterThan120Hz = true;
8619 }
8620 }
8621 if (dml_is_phantom_pipe(&display_cfg->plane_descriptors[k]))
8622 s->PhantomPipeMALLPStateMethod = true;
8623 }
8624 mode_lib->ms.support.InvalidCombinationOfMALLUseForPState = (s->SubViewportMALLPStateMethod != s->PhantomPipeMALLPStateMethod) ||
8625 (s->SubViewportMALLPStateMethod && s->FullFrameMALLPStateMethod) || s->SubViewportMALLRefreshGreaterThan120Hz;
8626
8627 #ifdef __DML_VBA_DEBUG__
8628 dml2_printf("DML::%s: SubViewportMALLPStateMethod = %u\n", __func__, s->SubViewportMALLPStateMethod);
8629 dml2_printf("DML::%s: PhantomPipeMALLPStateMethod = %u\n", __func__, s->PhantomPipeMALLPStateMethod);
8630 dml2_printf("DML::%s: FullFrameMALLPStateMethod = %u\n", __func__, s->FullFrameMALLPStateMethod);
8631 dml2_printf("DML::%s: SubViewportMALLRefreshGreaterThan120Hz = %u\n", __func__, s->SubViewportMALLRefreshGreaterThan120Hz);
8632 dml2_printf("DML::%s: InvalidCombinationOfMALLUseForPState = %u\n", __func__, mode_lib->ms.support.InvalidCombinationOfMALLUseForPState);
8633 dml2_printf("DML::%s: in_out_params->min_clk_index = %u\n", __func__, in_out_params->min_clk_index);
8634 dml2_printf("DML::%s: mode_lib->ms.DCFCLK = %f\n", __func__, mode_lib->ms.DCFCLK);
8635 dml2_printf("DML::%s: mode_lib->ms.FabricClock = %f\n", __func__, mode_lib->ms.FabricClock);
8636 dml2_printf("DML::%s: mode_lib->ms.uclk_freq_mhz = %f\n", __func__, mode_lib->ms.uclk_freq_mhz);
8637 dml2_printf("DML::%s: urgent latency tolarance = %f\n", __func__, ((mode_lib->ip.rob_buffer_size_kbytes - mode_lib->ip.pixel_chunk_size_kbytes) * 1024 / (mode_lib->ms.DCFCLK * mode_lib->soc.return_bus_width_bytes)));
8638 #endif
8639
8640 mode_lib->ms.support.OutstandingRequestsSupport = true;
8641 mode_lib->ms.support.OutstandingRequestsUrgencyAvoidance = true;
8642
8643 mode_lib->ms.support.avg_urgent_latency_us
8644 = (mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->ms.qos_param_index].average_latency_when_urgent_uclk_cycles / mode_lib->ms.uclk_freq_mhz
8645 * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_average_latency_margin / 100.0)
8646 + mode_lib->soc.qos_parameters.qos_params.dcn4x.average_transport_distance_fclk_cycles / mode_lib->ms.FabricClock)
8647 * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_average_transport_latency_margin / 100.0);
8648
8649 mode_lib->ms.support.avg_non_urgent_latency_us
8650 = (mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->ms.qos_param_index].average_latency_when_non_urgent_uclk_cycles / mode_lib->ms.uclk_freq_mhz
8651 * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_average_latency_margin / 100.0)
8652 + mode_lib->soc.qos_parameters.qos_params.dcn4x.average_transport_distance_fclk_cycles / mode_lib->ms.FabricClock)
8653 * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_average_transport_latency_margin / 100.0);
8654
8655 mode_lib->ms.support.max_non_urgent_latency_us
8656 = mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->ms.qos_param_index].maximum_latency_when_non_urgent_uclk_cycles
8657 / mode_lib->ms.uclk_freq_mhz * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_max_latency_margin / 100.0)
8658 + mode_lib->soc.qos_parameters.qos_params.dcn4x.mall_overhead_fclk_cycles / mode_lib->ms.FabricClock
8659 + mode_lib->soc.qos_parameters.qos_params.dcn4x.max_round_trip_to_furthest_cs_fclk_cycles / mode_lib->ms.FabricClock
8660 * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_max_transport_latency_margin / 100.0);
8661
8662 for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
8663
8664 if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn4x) {
8665 outstanding_latency_us = (mode_lib->soc.max_outstanding_reqs * mode_lib->ms.support.request_size_bytes_luma[k]
8666 / (mode_lib->ms.DCFCLK * mode_lib->soc.return_bus_width_bytes));
8667
8668 if (outstanding_latency_us < mode_lib->ms.support.avg_urgent_latency_us) {
8669 mode_lib->ms.support.OutstandingRequestsSupport = false;
8670 }
8671
8672 if (outstanding_latency_us < mode_lib->ms.support.avg_non_urgent_latency_us) {
8673 mode_lib->ms.support.OutstandingRequestsUrgencyAvoidance = false;
8674 }
8675
8676 #ifdef __DML_VBA_DEBUG__
8677 dml2_printf("DML::%s: avg_urgent_latency_us = %f\n", __func__, mode_lib->ms.support.avg_urgent_latency_us);
8678 dml2_printf("DML::%s: avg_non_urgent_latency_us = %f\n", __func__, mode_lib->ms.support.avg_non_urgent_latency_us);
8679 dml2_printf("DML::%s: k=%d, request_size_bytes_luma = %d\n", __func__, k, mode_lib->ms.support.request_size_bytes_luma[k]);
8680 dml2_printf("DML::%s: k=%d, outstanding_latency_us = %f (luma)\n", __func__, k, outstanding_latency_us);
8681 #endif
8682 }
8683
8684 if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn4x && mode_lib->ms.BytePerPixelC[k] > 0) {
8685 outstanding_latency_us = (mode_lib->soc.max_outstanding_reqs * mode_lib->ms.support.request_size_bytes_chroma[k]
8686 / (mode_lib->ms.DCFCLK * mode_lib->soc.return_bus_width_bytes));
8687
8688 if (outstanding_latency_us < mode_lib->ms.support.avg_urgent_latency_us) {
8689 mode_lib->ms.support.OutstandingRequestsSupport = false;
8690 }
8691
8692 if (outstanding_latency_us < mode_lib->ms.support.avg_non_urgent_latency_us) {
8693 mode_lib->ms.support.OutstandingRequestsUrgencyAvoidance = false;
8694 }
8695 #ifdef __DML_VBA_DEBUG__
8696 dml2_printf("DML::%s: k=%d, request_size_bytes_chroma = %d\n", __func__, k, mode_lib->ms.support.request_size_bytes_chroma[k]);
8697 dml2_printf("DML::%s: k=%d, outstanding_latency_us = %f (chroma)\n", __func__, k, outstanding_latency_us);
8698 #endif
8699 }
8700 }
8701
8702 memset(calculate_mcache_setting_params, 0, sizeof(struct dml2_core_calcs_calculate_mcache_setting_params));
8703 if (mode_lib->soc.mcache_size_bytes == 0 || mode_lib->ip.dcn_mrq_present) {
8704 for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
8705 mode_lib->ms.mall_prefetch_sdp_overhead_factor[k] = 1.0;
8706 mode_lib->ms.mall_prefetch_dram_overhead_factor[k] = 1.0;
8707 mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p0[k] = 1.0;
8708 mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p0[k] = 1.0;
8709 mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p1[k] = 1.0;
8710 mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p1[k] = 1.0;
8711 }
8712 } else {
8713 for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
8714 calculate_mcache_setting_params->dcc_enable = display_cfg->plane_descriptors[k].surface.dcc.enable;
8715 calculate_mcache_setting_params->num_chans = mode_lib->soc.clk_table.dram_config.channel_count;
8716 calculate_mcache_setting_params->mem_word_bytes = mode_lib->soc.mem_word_bytes;
8717 calculate_mcache_setting_params->mcache_size_bytes = mode_lib->soc.mcache_size_bytes;
8718 calculate_mcache_setting_params->mcache_line_size_bytes = mode_lib->soc.mcache_line_size_bytes;
8719 calculate_mcache_setting_params->gpuvm_enable = display_cfg->gpuvm_enable;
8720 calculate_mcache_setting_params->gpuvm_page_size_kbytes = display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes;
8721
8722 calculate_mcache_setting_params->source_format = display_cfg->plane_descriptors[k].pixel_format;
8723 calculate_mcache_setting_params->surf_vert = dml_is_vertical_rotation(display_cfg->plane_descriptors[k].composition.rotation_angle);
8724 calculate_mcache_setting_params->vp_stationary = display_cfg->plane_descriptors[k].composition.viewport.stationary;
8725 calculate_mcache_setting_params->tiling_mode = display_cfg->plane_descriptors[k].surface.tiling;
8726 calculate_mcache_setting_params->imall_enable = mode_lib->ip.imall_supported && display_cfg->plane_descriptors[k].overrides.legacy_svp_config == dml2_svp_mode_override_imall;
8727
8728 calculate_mcache_setting_params->vp_start_x_l = display_cfg->plane_descriptors[k].composition.viewport.plane0.x_start;
8729 calculate_mcache_setting_params->vp_start_y_l = display_cfg->plane_descriptors[k].composition.viewport.plane0.y_start;
8730 calculate_mcache_setting_params->full_vp_width_l = display_cfg->plane_descriptors[k].composition.viewport.plane0.width;
8731 calculate_mcache_setting_params->full_vp_height_l = display_cfg->plane_descriptors[k].composition.viewport.plane0.height;
8732 calculate_mcache_setting_params->blk_width_l = mode_lib->ms.MacroTileWidthY[k];
8733 calculate_mcache_setting_params->blk_height_l = mode_lib->ms.MacroTileHeightY[k];
8734 calculate_mcache_setting_params->vmpg_width_l = s->vmpg_width_y[k];
8735 calculate_mcache_setting_params->vmpg_height_l = s->vmpg_height_y[k];
8736 calculate_mcache_setting_params->full_swath_bytes_l = s->full_swath_bytes_l[k];
8737 calculate_mcache_setting_params->bytes_per_pixel_l = mode_lib->ms.BytePerPixelY[k];
8738
8739 calculate_mcache_setting_params->vp_start_x_c = display_cfg->plane_descriptors[k].composition.viewport.plane1.x_start;
8740 calculate_mcache_setting_params->vp_start_y_c = display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start;
8741 calculate_mcache_setting_params->full_vp_width_c = display_cfg->plane_descriptors[k].composition.viewport.plane1.width;
8742 calculate_mcache_setting_params->full_vp_height_c = display_cfg->plane_descriptors[k].composition.viewport.plane1.height;
8743 calculate_mcache_setting_params->blk_width_c = mode_lib->ms.MacroTileWidthC[k];
8744 calculate_mcache_setting_params->blk_height_c = mode_lib->ms.MacroTileHeightC[k];
8745 calculate_mcache_setting_params->vmpg_width_c = s->vmpg_width_c[k];
8746 calculate_mcache_setting_params->vmpg_height_c = s->vmpg_height_c[k];
8747 calculate_mcache_setting_params->full_swath_bytes_c = s->full_swath_bytes_c[k];
8748 calculate_mcache_setting_params->bytes_per_pixel_c = mode_lib->ms.BytePerPixelC[k];
8749
8750 // output
8751 calculate_mcache_setting_params->dcc_dram_bw_nom_overhead_factor_l = &mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p0[k];
8752 calculate_mcache_setting_params->dcc_dram_bw_pref_overhead_factor_l = &mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p0[k];
8753 calculate_mcache_setting_params->dcc_dram_bw_nom_overhead_factor_c = &mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p1[k];
8754 calculate_mcache_setting_params->dcc_dram_bw_pref_overhead_factor_c = &mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p1[k];
8755
8756 calculate_mcache_setting_params->num_mcaches_l = &mode_lib->ms.num_mcaches_l[k];
8757 calculate_mcache_setting_params->mcache_row_bytes_l = &mode_lib->ms.mcache_row_bytes_l[k];
8758 calculate_mcache_setting_params->mcache_offsets_l = mode_lib->ms.mcache_offsets_l[k];
8759 calculate_mcache_setting_params->mcache_shift_granularity_l = &mode_lib->ms.mcache_shift_granularity_l[k];
8760
8761 calculate_mcache_setting_params->num_mcaches_c = &mode_lib->ms.num_mcaches_c[k];
8762 calculate_mcache_setting_params->mcache_row_bytes_c = &mode_lib->ms.mcache_row_bytes_c[k];
8763 calculate_mcache_setting_params->mcache_offsets_c = mode_lib->ms.mcache_offsets_c[k];
8764 calculate_mcache_setting_params->mcache_shift_granularity_c = &mode_lib->ms.mcache_shift_granularity_c[k];
8765
8766 calculate_mcache_setting_params->mall_comb_mcache_l = &mode_lib->ms.mall_comb_mcache_l[k];
8767 calculate_mcache_setting_params->mall_comb_mcache_c = &mode_lib->ms.mall_comb_mcache_c[k];
8768 calculate_mcache_setting_params->lc_comb_mcache = &mode_lib->ms.lc_comb_mcache[k];
8769
8770 calculate_mcache_setting(&mode_lib->scratch, calculate_mcache_setting_params);
8771 }
8772
8773 calculate_mall_bw_overhead_factor(
8774 mode_lib->ms.mall_prefetch_sdp_overhead_factor,
8775 mode_lib->ms.mall_prefetch_dram_overhead_factor,
8776
8777 // input
8778 display_cfg,
8779 mode_lib->ms.num_active_planes);
8780 }
8781
8782 // Calculate all the bandwidth available
8783 // Need anothe bw for latency evaluation
8784 calculate_bandwidth_available(
8785 mode_lib->ms.support.avg_bandwidth_available_min, // not used
8786 mode_lib->ms.support.avg_bandwidth_available, // not used
8787 mode_lib->ms.support.urg_bandwidth_available_min_latency,
8788 mode_lib->ms.support.urg_bandwidth_available, // not used
8789 mode_lib->ms.support.urg_bandwidth_available_vm_only, // not used
8790 mode_lib->ms.support.urg_bandwidth_available_pixel_and_vm, // not used
8791
8792 &mode_lib->soc,
8793 display_cfg->hostvm_enable,
8794 mode_lib->ms.DCFCLK,
8795 mode_lib->ms.FabricClock,
8796 mode_lib->ms.dram_bw_mbps);
8797
8798 calculate_bandwidth_available(
8799 mode_lib->ms.support.avg_bandwidth_available_min,
8800 mode_lib->ms.support.avg_bandwidth_available,
8801 mode_lib->ms.support.urg_bandwidth_available_min,
8802 mode_lib->ms.support.urg_bandwidth_available,
8803 mode_lib->ms.support.urg_bandwidth_available_vm_only,
8804 mode_lib->ms.support.urg_bandwidth_available_pixel_and_vm,
8805
8806 &mode_lib->soc,
8807 display_cfg->hostvm_enable,
8808 mode_lib->ms.MaxDCFCLK,
8809 mode_lib->ms.MaxFabricClock,
8810 #ifdef DML_MODE_SUPPORT_USE_DPM_DRAM_BW
8811 mode_lib->ms.dram_bw_mbps);
8812 #else
8813 mode_lib->ms.max_dram_bw_mbps);
8814 #endif
8815
8816 // Average BW support check
8817 calculate_avg_bandwidth_required(
8818 mode_lib->ms.support.avg_bandwidth_required,
8819 // input
8820 display_cfg,
8821 mode_lib->ms.num_active_planes,
8822 mode_lib->ms.vactive_sw_bw_l,
8823 mode_lib->ms.vactive_sw_bw_c,
8824 mode_lib->ms.cursor_bw,
8825 mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p0,
8826 mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p1,
8827 mode_lib->ms.mall_prefetch_dram_overhead_factor,
8828 mode_lib->ms.mall_prefetch_sdp_overhead_factor);
8829
8830 for (m = 0; m < dml2_core_internal_bw_max; m++) { // check sdp and dram
8831 mode_lib->ms.support.avg_bandwidth_support_ok[dml2_core_internal_soc_state_sys_idle][m] = 1;
8832 mode_lib->ms.support.avg_bandwidth_support_ok[dml2_core_internal_soc_state_sys_active][m] = (mode_lib->ms.support.avg_bandwidth_required[dml2_core_internal_soc_state_sys_active][m] <= mode_lib->ms.support.avg_bandwidth_available[dml2_core_internal_soc_state_sys_active][m]);
8833 mode_lib->ms.support.avg_bandwidth_support_ok[dml2_core_internal_soc_state_svp_prefetch][m] = (mode_lib->ms.support.avg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][m] <= mode_lib->ms.support.avg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][m]);
8834 }
8835
8836 mode_lib->ms.support.AvgBandwidthSupport = true;
8837 mode_lib->ms.support.EnoughUrgentLatencyHidingSupport = true;
8838 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
8839 if (mode_lib->ms.NotEnoughUrgentLatencyHiding[k]) {
8840 mode_lib->ms.support.EnoughUrgentLatencyHidingSupport = false;
8841 dml2_printf("DML::%s: k=%u NotEnoughUrgentLatencyHiding set\n", __func__, k);
8842
8843 }
8844 }
8845 for (m = 0; m < dml2_core_internal_soc_state_max; m++) {
8846 for (n = 0; n < dml2_core_internal_bw_max; n++) { // check sdp and dram
8847 if (!mode_lib->ms.support.avg_bandwidth_support_ok[m][n] && (m == dml2_core_internal_soc_state_sys_active || mode_lib->soc.mall_allocated_for_dcn_mbytes > 0)) {
8848 mode_lib->ms.support.AvgBandwidthSupport = false;
8849 #ifdef __DML_VBA_DEBUG__
8850 dml2_printf("DML::%s: avg_bandwidth_support_ok[%s][%s] not ok\n", __func__, dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n));
8851 #endif
8852 }
8853 }
8854 }
8855
8856 /* Prefetch Check */
8857 {
8858 mode_lib->ms.TimeCalc = 24 / mode_lib->ms.dcfclk_deepsleep;
8859
8860 calculate_hostvm_inefficiency_factor(
8861 &s->HostVMInefficiencyFactor,
8862 &s->HostVMInefficiencyFactorPrefetch,
8863
8864 display_cfg->gpuvm_enable,
8865 display_cfg->hostvm_enable,
8866 mode_lib->ip.remote_iommu_outstanding_translations,
8867 mode_lib->soc.max_outstanding_reqs,
8868 mode_lib->ms.support.urg_bandwidth_available_pixel_and_vm[dml2_core_internal_soc_state_sys_active],
8869 mode_lib->ms.support.urg_bandwidth_available_vm_only[dml2_core_internal_soc_state_sys_active]);
8870
8871 mode_lib->ms.Total3dlutActive = 0;
8872 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
8873 if (display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut)
8874 mode_lib->ms.Total3dlutActive = mode_lib->ms.Total3dlutActive + 1;
8875
8876 // Calculate tdlut schedule related terms
8877 calculate_tdlut_setting_params->dispclk_mhz = mode_lib->ms.RequiredDISPCLK;
8878 calculate_tdlut_setting_params->setup_for_tdlut = display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut;
8879 calculate_tdlut_setting_params->tdlut_width_mode = display_cfg->plane_descriptors[k].tdlut.tdlut_width_mode;
8880 calculate_tdlut_setting_params->tdlut_addressing_mode = display_cfg->plane_descriptors[k].tdlut.tdlut_addressing_mode;
8881 calculate_tdlut_setting_params->cursor_buffer_size = mode_lib->ip.cursor_buffer_size;
8882 calculate_tdlut_setting_params->gpuvm_enable = display_cfg->gpuvm_enable;
8883 calculate_tdlut_setting_params->gpuvm_page_size_kbytes = display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes;
8884 calculate_tdlut_setting_params->tdlut_mpc_width_flag = display_cfg->plane_descriptors[k].tdlut.tdlut_mpc_width_flag;
8885 calculate_tdlut_setting_params->is_gfx11 = dml_get_gfx_version(display_cfg->plane_descriptors[k].surface.tiling);
8886
8887 // output
8888 calculate_tdlut_setting_params->tdlut_pte_bytes_per_frame = &s->tdlut_pte_bytes_per_frame[k];
8889 calculate_tdlut_setting_params->tdlut_bytes_per_frame = &s->tdlut_bytes_per_frame[k];
8890 calculate_tdlut_setting_params->tdlut_groups_per_2row_ub = &s->tdlut_groups_per_2row_ub[k];
8891 calculate_tdlut_setting_params->tdlut_opt_time = &s->tdlut_opt_time[k];
8892 calculate_tdlut_setting_params->tdlut_drain_time = &s->tdlut_drain_time[k];
8893 calculate_tdlut_setting_params->tdlut_bytes_to_deliver = &s->tdlut_bytes_to_deliver[k];
8894 calculate_tdlut_setting_params->tdlut_bytes_per_group = &s->tdlut_bytes_per_group[k];
8895
8896 calculate_tdlut_setting(&mode_lib->scratch, calculate_tdlut_setting_params);
8897 }
8898
8899 min_return_bw_for_latency = mode_lib->ms.support.urg_bandwidth_available_min_latency[dml2_core_internal_soc_state_sys_active];
8900
8901 if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn3)
8902 s->ReorderingBytes = (unsigned int)(mode_lib->soc.clk_table.dram_config.channel_count * math_max3(mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_out_of_order_return_per_channel_pixel_only_bytes,
8903 mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_out_of_order_return_per_channel_pixel_and_vm_bytes,
8904 mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_out_of_order_return_per_channel_vm_only_bytes));
8905
8906 CalculateExtraLatency(
8907 display_cfg,
8908 mode_lib->ip.rob_buffer_size_kbytes,
8909 mode_lib->soc.qos_parameters.qos_params.dcn32x.loaded_round_trip_latency_fclk_cycles,
8910 s->ReorderingBytes,
8911 mode_lib->ms.DCFCLK,
8912 mode_lib->ms.FabricClock,
8913 mode_lib->ip.pixel_chunk_size_kbytes,
8914 min_return_bw_for_latency,
8915 mode_lib->ms.num_active_planes,
8916 mode_lib->ms.NoOfDPP,
8917 mode_lib->ms.dpte_group_bytes,
8918 s->tdlut_bytes_per_group,
8919 s->HostVMInefficiencyFactor,
8920 s->HostVMInefficiencyFactorPrefetch,
8921 mode_lib->soc.hostvm_min_page_size_kbytes,
8922 mode_lib->soc.qos_parameters.qos_type,
8923 !(display_cfg->overrides.max_outstanding_when_urgent_expected_disable),
8924 mode_lib->soc.max_outstanding_reqs,
8925 mode_lib->ms.support.request_size_bytes_luma,
8926 mode_lib->ms.support.request_size_bytes_chroma,
8927 mode_lib->ip.meta_chunk_size_kbytes,
8928 mode_lib->ip.dchub_arb_to_ret_delay,
8929 mode_lib->ms.TripToMemory,
8930 mode_lib->ip.hostvm_mode,
8931
8932 // output
8933 &mode_lib->ms.ExtraLatency,
8934 &mode_lib->ms.ExtraLatency_sr,
8935 &mode_lib->ms.ExtraLatencyPrefetch);
8936
8937 for (k = 0; k < mode_lib->ms.num_active_planes; k++)
8938 s->impacted_dst_y_pre[k] = 0;
8939
8940 s->recalc_prefetch_schedule = 0;
8941 s->recalc_prefetch_done = 0;
8942 do {
8943 mode_lib->ms.support.PrefetchSupported = true;
8944
8945 for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
8946 s->line_times[k] = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
8947 s->pixel_format[k] = display_cfg->plane_descriptors[k].pixel_format;
8948
8949 s->lb_source_lines_l[k] = get_num_lb_source_lines(mode_lib->ip.max_line_buffer_lines, mode_lib->ip.line_buffer_size_bits,
8950 mode_lib->ms.NoOfDPP[k],
8951 display_cfg->plane_descriptors[k].composition.viewport.plane0.width,
8952 display_cfg->plane_descriptors[k].composition.viewport.plane0.height,
8953 display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio,
8954 display_cfg->plane_descriptors[k].composition.rotation_angle);
8955
8956 s->lb_source_lines_c[k] = get_num_lb_source_lines(mode_lib->ip.max_line_buffer_lines, mode_lib->ip.line_buffer_size_bits,
8957 mode_lib->ms.NoOfDPP[k],
8958 display_cfg->plane_descriptors[k].composition.viewport.plane1.width,
8959 display_cfg->plane_descriptors[k].composition.viewport.plane1.height,
8960 display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio,
8961 display_cfg->plane_descriptors[k].composition.rotation_angle);
8962
8963 struct dml2_core_internal_DmlPipe *myPipe = &s->myPipe;
8964
8965 mode_lib->ms.TWait[k] = CalculateTWait(
8966 display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns,
8967 mode_lib->ms.UrgLatency,
8968 mode_lib->ms.TripToMemory,
8969 !dml_is_phantom_pipe(&display_cfg->plane_descriptors[k]) && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.drr_config.enabled ?
8970 get_g6_temp_read_blackout_us(&mode_lib->soc, (unsigned int)(mode_lib->ms.uclk_freq_mhz * 1000), in_out_params->min_clk_index) : 0.0);
8971
8972 myPipe->Dppclk = mode_lib->ms.RequiredDPPCLK[k];
8973 myPipe->Dispclk = mode_lib->ms.RequiredDISPCLK;
8974 myPipe->PixelClock = ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
8975 myPipe->DCFClkDeepSleep = mode_lib->ms.dcfclk_deepsleep;
8976 myPipe->DPPPerSurface = mode_lib->ms.NoOfDPP[k];
8977 myPipe->ScalerEnabled = display_cfg->plane_descriptors[k].composition.scaler_info.enabled;
8978 myPipe->VRatio = display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio;
8979 myPipe->VRatioChroma = display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio;
8980 myPipe->VTaps = display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps;
8981 myPipe->VTapsChroma = display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps;
8982 myPipe->RotationAngle = display_cfg->plane_descriptors[k].composition.rotation_angle;
8983 myPipe->mirrored = display_cfg->plane_descriptors[k].composition.mirrored;
8984 myPipe->BlockWidth256BytesY = mode_lib->ms.Read256BlockWidthY[k];
8985 myPipe->BlockHeight256BytesY = mode_lib->ms.Read256BlockHeightY[k];
8986 myPipe->BlockWidth256BytesC = mode_lib->ms.Read256BlockWidthC[k];
8987 myPipe->BlockHeight256BytesC = mode_lib->ms.Read256BlockHeightC[k];
8988 myPipe->InterlaceEnable = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.interlaced;
8989 myPipe->NumberOfCursors = display_cfg->plane_descriptors[k].cursor.num_cursors;
8990 myPipe->VBlank = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_total - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_active;
8991 myPipe->HTotal = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total;
8992 myPipe->HActive = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active;
8993 myPipe->DCCEnable = display_cfg->plane_descriptors[k].surface.dcc.enable;
8994 myPipe->ODMMode = mode_lib->ms.ODMMode[k];
8995 myPipe->SourcePixelFormat = display_cfg->plane_descriptors[k].pixel_format;
8996 myPipe->BytePerPixelY = mode_lib->ms.BytePerPixelY[k];
8997 myPipe->BytePerPixelC = mode_lib->ms.BytePerPixelC[k];
8998 myPipe->ProgressiveToInterlaceUnitInOPP = mode_lib->ip.ptoi_supported;
8999
9000 #ifdef __DML_VBA_DEBUG__
9001 dml2_printf("DML::%s: Calling CalculatePrefetchSchedule for k=%u\n", __func__, k);
9002 dml2_printf("DML::%s: MaximumVStartup = %u\n", __func__, s->MaximumVStartup[k]);
9003 #endif
9004 CalculatePrefetchSchedule_params->display_cfg = display_cfg;
9005 CalculatePrefetchSchedule_params->HostVMInefficiencyFactor = s->HostVMInefficiencyFactorPrefetch;
9006 CalculatePrefetchSchedule_params->myPipe = myPipe;
9007 CalculatePrefetchSchedule_params->DSCDelay = mode_lib->ms.DSCDelay[k];
9008 CalculatePrefetchSchedule_params->DPPCLKDelaySubtotalPlusCNVCFormater = mode_lib->ip.dppclk_delay_subtotal + mode_lib->ip.dppclk_delay_cnvc_formatter;
9009 CalculatePrefetchSchedule_params->DPPCLKDelaySCL = mode_lib->ip.dppclk_delay_scl;
9010 CalculatePrefetchSchedule_params->DPPCLKDelaySCLLBOnly = mode_lib->ip.dppclk_delay_scl_lb_only;
9011 CalculatePrefetchSchedule_params->DPPCLKDelayCNVCCursor = mode_lib->ip.dppclk_delay_cnvc_cursor;
9012 CalculatePrefetchSchedule_params->DISPCLKDelaySubtotal = mode_lib->ip.dispclk_delay_subtotal;
9013 CalculatePrefetchSchedule_params->DPP_RECOUT_WIDTH = (unsigned int)(mode_lib->ms.SwathWidthY[k] / display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio);
9014 CalculatePrefetchSchedule_params->OutputFormat = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format;
9015 CalculatePrefetchSchedule_params->MaxInterDCNTileRepeaters = mode_lib->ip.max_inter_dcn_tile_repeaters;
9016 CalculatePrefetchSchedule_params->VStartup = s->MaximumVStartup[k];
9017 CalculatePrefetchSchedule_params->HostVMMinPageSize = mode_lib->soc.hostvm_min_page_size_kbytes;
9018 CalculatePrefetchSchedule_params->DynamicMetadataEnable = display_cfg->plane_descriptors[k].dynamic_meta_data.enable;
9019 CalculatePrefetchSchedule_params->DynamicMetadataVMEnabled = mode_lib->ip.dynamic_metadata_vm_enabled;
9020 CalculatePrefetchSchedule_params->DynamicMetadataLinesBeforeActiveRequired = display_cfg->plane_descriptors[k].dynamic_meta_data.lines_before_active_required;
9021 CalculatePrefetchSchedule_params->DynamicMetadataTransmittedBytes = display_cfg->plane_descriptors[k].dynamic_meta_data.transmitted_bytes;
9022 CalculatePrefetchSchedule_params->UrgentLatency = mode_lib->ms.UrgLatency;
9023 CalculatePrefetchSchedule_params->ExtraLatencyPrefetch = mode_lib->ms.ExtraLatencyPrefetch;
9024 CalculatePrefetchSchedule_params->TCalc = mode_lib->ms.TimeCalc;
9025 CalculatePrefetchSchedule_params->vm_bytes = mode_lib->ms.vm_bytes[k];
9026 CalculatePrefetchSchedule_params->PixelPTEBytesPerRow = mode_lib->ms.DPTEBytesPerRow[k];
9027 CalculatePrefetchSchedule_params->PrefetchSourceLinesY = mode_lib->ms.PrefetchLinesY[k];
9028 CalculatePrefetchSchedule_params->VInitPreFillY = mode_lib->ms.PrefillY[k];
9029 CalculatePrefetchSchedule_params->MaxNumSwathY = mode_lib->ms.MaxNumSwathY[k];
9030 CalculatePrefetchSchedule_params->PrefetchSourceLinesC = mode_lib->ms.PrefetchLinesC[k];
9031 CalculatePrefetchSchedule_params->VInitPreFillC = mode_lib->ms.PrefillC[k];
9032 CalculatePrefetchSchedule_params->MaxNumSwathC = mode_lib->ms.MaxNumSwathC[k];
9033 CalculatePrefetchSchedule_params->swath_width_luma_ub = mode_lib->ms.swath_width_luma_ub[k];
9034 CalculatePrefetchSchedule_params->swath_width_chroma_ub = mode_lib->ms.swath_width_chroma_ub[k];
9035 CalculatePrefetchSchedule_params->SwathHeightY = mode_lib->ms.SwathHeightY[k];
9036 CalculatePrefetchSchedule_params->SwathHeightC = mode_lib->ms.SwathHeightC[k];
9037 CalculatePrefetchSchedule_params->TWait = mode_lib->ms.TWait[k];
9038 CalculatePrefetchSchedule_params->Ttrip = mode_lib->ms.TripToMemory;
9039 CalculatePrefetchSchedule_params->Turg = mode_lib->ms.UrgLatency;
9040 CalculatePrefetchSchedule_params->setup_for_tdlut = display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut;
9041 CalculatePrefetchSchedule_params->tdlut_pte_bytes_per_frame = s->tdlut_pte_bytes_per_frame[k];
9042 CalculatePrefetchSchedule_params->tdlut_bytes_per_frame = s->tdlut_bytes_per_frame[k];
9043 CalculatePrefetchSchedule_params->tdlut_opt_time = s->tdlut_opt_time[k];
9044 CalculatePrefetchSchedule_params->tdlut_drain_time = s->tdlut_drain_time[k];
9045 CalculatePrefetchSchedule_params->num_cursors = (display_cfg->plane_descriptors[k].cursor.cursor_width > 0);
9046 CalculatePrefetchSchedule_params->cursor_bytes_per_chunk = s->cursor_bytes_per_chunk[k];
9047 CalculatePrefetchSchedule_params->cursor_bytes_per_line = s->cursor_bytes_per_line[k];
9048 CalculatePrefetchSchedule_params->dcc_enable = display_cfg->plane_descriptors[k].surface.dcc.enable;
9049 CalculatePrefetchSchedule_params->mrq_present = mode_lib->ip.dcn_mrq_present;
9050 CalculatePrefetchSchedule_params->meta_row_bytes = mode_lib->ms.meta_row_bytes[k];
9051 CalculatePrefetchSchedule_params->mall_prefetch_sdp_overhead_factor = mode_lib->ms.mall_prefetch_sdp_overhead_factor[k];
9052 CalculatePrefetchSchedule_params->impacted_dst_y_pre = s->impacted_dst_y_pre[k];
9053 CalculatePrefetchSchedule_params->vactive_sw_bw_l = mode_lib->ms.vactive_sw_bw_l[k];
9054 CalculatePrefetchSchedule_params->vactive_sw_bw_c = mode_lib->ms.vactive_sw_bw_c[k];
9055
9056 // output
9057 CalculatePrefetchSchedule_params->DSTXAfterScaler = &s->DSTXAfterScaler[k];
9058 CalculatePrefetchSchedule_params->DSTYAfterScaler = &s->DSTYAfterScaler[k];
9059 CalculatePrefetchSchedule_params->dst_y_prefetch = &mode_lib->ms.dst_y_prefetch[k];
9060 CalculatePrefetchSchedule_params->dst_y_per_vm_vblank = &mode_lib->ms.LinesForVM[k];
9061 CalculatePrefetchSchedule_params->dst_y_per_row_vblank = &mode_lib->ms.LinesForDPTERow[k];
9062 CalculatePrefetchSchedule_params->VRatioPrefetchY = &mode_lib->ms.VRatioPreY[k];
9063 CalculatePrefetchSchedule_params->VRatioPrefetchC = &mode_lib->ms.VRatioPreC[k];
9064 CalculatePrefetchSchedule_params->RequiredPrefetchPixelDataBWLuma = &mode_lib->ms.RequiredPrefetchPixelDataBWLuma[k]; // prefetch_sw_bw_l
9065 CalculatePrefetchSchedule_params->RequiredPrefetchPixelDataBWChroma = &mode_lib->ms.RequiredPrefetchPixelDataBWChroma[k]; // prefetch_sw_bw_c
9066 CalculatePrefetchSchedule_params->NotEnoughTimeForDynamicMetadata = &mode_lib->ms.NoTimeForDynamicMetadata[k];
9067 CalculatePrefetchSchedule_params->Tno_bw = &mode_lib->ms.Tno_bw[k];
9068 CalculatePrefetchSchedule_params->Tno_bw_flip = &mode_lib->ms.Tno_bw_flip[k];
9069 CalculatePrefetchSchedule_params->prefetch_vmrow_bw = &mode_lib->ms.prefetch_vmrow_bw[k];
9070 CalculatePrefetchSchedule_params->Tdmdl_vm = &s->dummy_single[0];
9071 CalculatePrefetchSchedule_params->Tdmdl = &s->dummy_single[1];
9072 CalculatePrefetchSchedule_params->TSetup = &s->dummy_single[2];
9073 CalculatePrefetchSchedule_params->Tvm_trips = &s->Tvm_trips[k];
9074 CalculatePrefetchSchedule_params->Tr0_trips = &s->Tr0_trips[k];
9075 CalculatePrefetchSchedule_params->Tvm_trips_flip = &s->Tvm_trips_flip[k];
9076 CalculatePrefetchSchedule_params->Tr0_trips_flip = &s->Tr0_trips_flip[k];
9077 CalculatePrefetchSchedule_params->Tvm_trips_flip_rounded = &s->Tvm_trips_flip_rounded[k];
9078 CalculatePrefetchSchedule_params->Tr0_trips_flip_rounded = &s->Tr0_trips_flip_rounded[k];
9079 CalculatePrefetchSchedule_params->VUpdateOffsetPix = &s->dummy_integer[0];
9080 CalculatePrefetchSchedule_params->VUpdateWidthPix = &s->dummy_integer[1];
9081 CalculatePrefetchSchedule_params->VReadyOffsetPix = &s->dummy_integer[2];
9082 CalculatePrefetchSchedule_params->prefetch_cursor_bw = &mode_lib->ms.prefetch_cursor_bw[k];
9083 CalculatePrefetchSchedule_params->prefetch_sw_bytes = &s->prefetch_sw_bytes[k];
9084 CalculatePrefetchSchedule_params->Tpre_rounded = &s->Tpre_rounded[k];
9085 CalculatePrefetchSchedule_params->Tpre_oto = &s->Tpre_oto[k];
9086 CalculatePrefetchSchedule_params->prefetch_swath_time_us = &s->prefetch_swath_time_us[k];
9087
9088 mode_lib->ms.NoTimeForPrefetch[k] = CalculatePrefetchSchedule(&mode_lib->scratch, CalculatePrefetchSchedule_params);
9089
9090 mode_lib->ms.support.PrefetchSupported &= !mode_lib->ms.NoTimeForPrefetch[k];
9091 dml2_printf("DML::%s: k=%d, dst_y_per_vm_vblank = %f\n", __func__, k, *CalculatePrefetchSchedule_params->dst_y_per_vm_vblank);
9092 dml2_printf("DML::%s: k=%d, dst_y_per_row_vblank = %f\n", __func__, k, *CalculatePrefetchSchedule_params->dst_y_per_row_vblank);
9093 } // for k num_planes
9094
9095 CalculateDCFCLKDeepSleepTdlut(
9096 display_cfg,
9097 mode_lib->ms.num_active_planes,
9098 mode_lib->ms.BytePerPixelY,
9099 mode_lib->ms.BytePerPixelC,
9100 mode_lib->ms.SwathWidthY,
9101 mode_lib->ms.SwathWidthC,
9102 mode_lib->ms.NoOfDPP,
9103 mode_lib->ms.PSCL_FACTOR,
9104 mode_lib->ms.PSCL_FACTOR_CHROMA,
9105 mode_lib->ms.RequiredDPPCLK,
9106 mode_lib->ms.vactive_sw_bw_l,
9107 mode_lib->ms.vactive_sw_bw_c,
9108 mode_lib->soc.return_bus_width_bytes,
9109 mode_lib->ms.RequiredDISPCLK,
9110 s->tdlut_bytes_to_deliver,
9111 s->prefetch_swath_time_us,
9112
9113 /* Output */
9114 &mode_lib->ms.dcfclk_deepsleep);
9115
9116 for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
9117 if (mode_lib->ms.dst_y_prefetch[k] < 2.0
9118 || mode_lib->ms.LinesForVM[k] >= 32.0
9119 || mode_lib->ms.LinesForDPTERow[k] >= 16.0
9120 || mode_lib->ms.NoTimeForPrefetch[k] == true
9121 || s->DSTYAfterScaler[k] > 8) {
9122 mode_lib->ms.support.PrefetchSupported = false;
9123 dml2_printf("DML::%s: k=%d, dst_y_prefetch=%f (should not be < 2)\n", __func__, k, mode_lib->ms.dst_y_prefetch[k]);
9124 dml2_printf("DML::%s: k=%d, LinesForVM=%f (should not be >= 32)\n", __func__, k, mode_lib->ms.LinesForVM[k]);
9125 dml2_printf("DML::%s: k=%d, LinesForDPTERow=%f (should not be >= 16)\n", __func__, k, mode_lib->ms.LinesForDPTERow[k]);
9126 dml2_printf("DML::%s: k=%d, DSTYAfterScaler=%d (should be <= 8)\n", __func__, k, s->DSTYAfterScaler[k]);
9127 dml2_printf("DML::%s: k=%d, NoTimeForPrefetch=%d\n", __func__, k, mode_lib->ms.NoTimeForPrefetch[k]);
9128 }
9129 }
9130
9131 mode_lib->ms.support.DynamicMetadataSupported = true;
9132 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
9133 if (mode_lib->ms.NoTimeForDynamicMetadata[k] == true) {
9134 mode_lib->ms.support.DynamicMetadataSupported = false;
9135 }
9136 }
9137
9138 mode_lib->ms.support.VRatioInPrefetchSupported = true;
9139 for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
9140 if (mode_lib->ms.VRatioPreY[k] > __DML2_CALCS_MAX_VRATIO_PRE__ ||
9141 mode_lib->ms.VRatioPreC[k] > __DML2_CALCS_MAX_VRATIO_PRE__) {
9142 mode_lib->ms.support.VRatioInPrefetchSupported = false;
9143 dml2_printf("DML::%s: k=%d VRatioPreY = %f (should be <= %f)\n", __func__, k, mode_lib->ms.VRatioPreY[k], __DML2_CALCS_MAX_VRATIO_PRE__);
9144 dml2_printf("DML::%s: k=%d VRatioPreC = %f (should be <= %f)\n", __func__, k, mode_lib->ms.VRatioPreC[k], __DML2_CALCS_MAX_VRATIO_PRE__);
9145 dml2_printf("DML::%s: VRatioInPrefetchSupported = %u\n", __func__, mode_lib->ms.support.VRatioInPrefetchSupported);
9146 }
9147 }
9148
9149 mode_lib->ms.support.PrefetchSupported &= mode_lib->ms.support.VRatioInPrefetchSupported;
9150
9151 // By default, do not recalc prefetch schedule
9152 s->recalc_prefetch_schedule = 0;
9153
9154 // Only do urg vs prefetch bandwidth check, flip schedule check, power saving feature support check IF the Prefetch Schedule Check is ok
9155 if (mode_lib->ms.support.PrefetchSupported) {
9156 for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
9157 // Calculate Urgent burst factor for prefetch
9158 #ifdef __DML_VBA_DEBUG__
9159 dml2_printf("DML::%s: k=%d, Calling CalculateUrgentBurstFactor (for prefetch)\n", __func__, k);
9160 dml2_printf("DML::%s: k=%d, VRatioPreY=%f\n", __func__, k, mode_lib->ms.VRatioPreY[k]);
9161 dml2_printf("DML::%s: k=%d, VRatioPreC=%f\n", __func__, k, mode_lib->ms.VRatioPreC[k]);
9162 #endif
9163 CalculateUrgentBurstFactor(
9164 &display_cfg->plane_descriptors[k],
9165 mode_lib->ms.swath_width_luma_ub[k],
9166 mode_lib->ms.swath_width_chroma_ub[k],
9167 mode_lib->ms.SwathHeightY[k],
9168 mode_lib->ms.SwathHeightC[k],
9169 s->line_times[k],
9170 mode_lib->ms.UrgLatency,
9171 mode_lib->ms.VRatioPreY[k],
9172 mode_lib->ms.VRatioPreC[k],
9173 mode_lib->ms.BytePerPixelInDETY[k],
9174 mode_lib->ms.BytePerPixelInDETC[k],
9175 mode_lib->ms.DETBufferSizeY[k],
9176 mode_lib->ms.DETBufferSizeC[k],
9177 /* Output */
9178 &mode_lib->ms.UrgentBurstFactorLumaPre[k],
9179 &mode_lib->ms.UrgentBurstFactorChromaPre[k],
9180 &mode_lib->ms.NotEnoughUrgentLatencyHidingPre[k]);
9181 }
9182
9183 // Calculate urgent bandwidth required, both urg and non urg peak bandwidth
9184 // assume flip bw is 0 at this point
9185 for (k = 0; k < mode_lib->ms.num_active_planes; k++)
9186 mode_lib->ms.final_flip_bw[k] = 0;
9187
9188 calculate_peak_bandwidth_params->urg_vactive_bandwidth_required = mode_lib->ms.support.urg_vactive_bandwidth_required;
9189 calculate_peak_bandwidth_params->urg_bandwidth_required = mode_lib->ms.support.urg_bandwidth_required;
9190 calculate_peak_bandwidth_params->urg_bandwidth_required_qual = mode_lib->ms.support.urg_bandwidth_required_qual;
9191 calculate_peak_bandwidth_params->non_urg_bandwidth_required = mode_lib->ms.support.non_urg_bandwidth_required;
9192 calculate_peak_bandwidth_params->surface_avg_vactive_required_bw = mode_lib->ms.surface_avg_vactive_required_bw;
9193 calculate_peak_bandwidth_params->surface_peak_required_bw = mode_lib->ms.surface_peak_required_bw;
9194
9195 calculate_peak_bandwidth_params->display_cfg = display_cfg;
9196 calculate_peak_bandwidth_params->inc_flip_bw = 0;
9197 calculate_peak_bandwidth_params->num_active_planes = mode_lib->ms.num_active_planes;
9198 calculate_peak_bandwidth_params->num_of_dpp = mode_lib->ms.NoOfDPP;
9199 calculate_peak_bandwidth_params->dcc_dram_bw_nom_overhead_factor_p0 = mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p0;
9200 calculate_peak_bandwidth_params->dcc_dram_bw_nom_overhead_factor_p1 = mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p1;
9201 calculate_peak_bandwidth_params->dcc_dram_bw_pref_overhead_factor_p0 = mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p0;
9202 calculate_peak_bandwidth_params->dcc_dram_bw_pref_overhead_factor_p1 = mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p1;
9203 calculate_peak_bandwidth_params->mall_prefetch_sdp_overhead_factor = mode_lib->ms.mall_prefetch_sdp_overhead_factor;
9204 calculate_peak_bandwidth_params->mall_prefetch_dram_overhead_factor = mode_lib->ms.mall_prefetch_dram_overhead_factor;
9205
9206 calculate_peak_bandwidth_params->surface_read_bandwidth_l = mode_lib->ms.vactive_sw_bw_l;
9207 calculate_peak_bandwidth_params->surface_read_bandwidth_c = mode_lib->ms.vactive_sw_bw_c;
9208 calculate_peak_bandwidth_params->prefetch_bandwidth_l = mode_lib->ms.RequiredPrefetchPixelDataBWLuma;
9209 calculate_peak_bandwidth_params->prefetch_bandwidth_c = mode_lib->ms.RequiredPrefetchPixelDataBWChroma;
9210 calculate_peak_bandwidth_params->excess_vactive_fill_bw_l = mode_lib->ms.excess_vactive_fill_bw_l;
9211 calculate_peak_bandwidth_params->excess_vactive_fill_bw_c = mode_lib->ms.excess_vactive_fill_bw_c;
9212 calculate_peak_bandwidth_params->cursor_bw = mode_lib->ms.cursor_bw;
9213 calculate_peak_bandwidth_params->dpte_row_bw = mode_lib->ms.dpte_row_bw;
9214 calculate_peak_bandwidth_params->meta_row_bw = mode_lib->ms.meta_row_bw;
9215 calculate_peak_bandwidth_params->prefetch_cursor_bw = mode_lib->ms.prefetch_cursor_bw;
9216 calculate_peak_bandwidth_params->prefetch_vmrow_bw = mode_lib->ms.prefetch_vmrow_bw;
9217 calculate_peak_bandwidth_params->flip_bw = mode_lib->ms.final_flip_bw;
9218 calculate_peak_bandwidth_params->urgent_burst_factor_l = mode_lib->ms.UrgentBurstFactorLuma;
9219 calculate_peak_bandwidth_params->urgent_burst_factor_c = mode_lib->ms.UrgentBurstFactorChroma;
9220 calculate_peak_bandwidth_params->urgent_burst_factor_cursor = mode_lib->ms.UrgentBurstFactorCursor;
9221 calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_l = mode_lib->ms.UrgentBurstFactorLumaPre;
9222 calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_c = mode_lib->ms.UrgentBurstFactorChromaPre;
9223 calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_cursor = mode_lib->ms.UrgentBurstFactorCursorPre;
9224
9225 calculate_peak_bandwidth_required(
9226 &mode_lib->scratch,
9227 calculate_peak_bandwidth_params);
9228
9229 // Check urg peak bandwidth against available urg bw
9230 // check at SDP and DRAM, for all soc states (SVP prefetch an Sys Active)
9231 check_urgent_bandwidth_support(
9232 &s->dummy_single[0], // double* frac_urg_bandwidth
9233 &s->dummy_single[1], // double* frac_urg_bandwidth_mall
9234 &mode_lib->ms.support.UrgVactiveBandwidthSupport,
9235 &mode_lib->ms.support.PrefetchBandwidthSupported,
9236
9237 mode_lib->soc.mall_allocated_for_dcn_mbytes,
9238 mode_lib->ms.support.non_urg_bandwidth_required,
9239 mode_lib->ms.support.urg_vactive_bandwidth_required,
9240 mode_lib->ms.support.urg_bandwidth_required,
9241 mode_lib->ms.support.urg_bandwidth_available);
9242
9243 mode_lib->ms.support.PrefetchSupported &= mode_lib->ms.support.PrefetchBandwidthSupported;
9244 dml2_printf("DML::%s: PrefetchBandwidthSupported=%0d\n", __func__, mode_lib->ms.support.PrefetchBandwidthSupported);
9245
9246 for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
9247 if (mode_lib->ms.NotEnoughUrgentLatencyHidingPre[k]) {
9248 mode_lib->ms.support.PrefetchSupported = false;
9249 dml2_printf("DML::%s: k=%d, NotEnoughUrgentLatencyHidingPre=%d\n", __func__, k, mode_lib->ms.NotEnoughUrgentLatencyHidingPre[k]);
9250 }
9251 }
9252
9253 #ifdef DML_GLOBAL_PREFETCH_CHECK
9254 if (mode_lib->ms.support.PrefetchSupported && mode_lib->ms.num_active_planes > 1 && s->recalc_prefetch_done == 0) {
9255 CheckGlobalPrefetchAdmissibility_params->num_active_planes = mode_lib->ms.num_active_planes;
9256 CheckGlobalPrefetchAdmissibility_params->pixel_format = s->pixel_format;
9257 CheckGlobalPrefetchAdmissibility_params->chunk_bytes_l = mode_lib->ip.pixel_chunk_size_kbytes * 1024;
9258 CheckGlobalPrefetchAdmissibility_params->chunk_bytes_c = mode_lib->ip.pixel_chunk_size_kbytes * 1024;
9259 CheckGlobalPrefetchAdmissibility_params->lb_source_lines_l = s->lb_source_lines_l;
9260 CheckGlobalPrefetchAdmissibility_params->lb_source_lines_c = s->lb_source_lines_c;
9261 CheckGlobalPrefetchAdmissibility_params->swath_height_l = mode_lib->ms.SwathHeightY;
9262 CheckGlobalPrefetchAdmissibility_params->swath_height_c = mode_lib->ms.SwathHeightC;
9263 CheckGlobalPrefetchAdmissibility_params->rob_buffer_size_kbytes = mode_lib->ip.rob_buffer_size_kbytes;
9264 CheckGlobalPrefetchAdmissibility_params->compressed_buffer_size_kbytes = mode_lib->ms.CompressedBufferSizeInkByte;
9265 CheckGlobalPrefetchAdmissibility_params->detile_buffer_size_bytes_l = mode_lib->ms.DETBufferSizeY;
9266 CheckGlobalPrefetchAdmissibility_params->detile_buffer_size_bytes_c = mode_lib->ms.DETBufferSizeC;
9267 CheckGlobalPrefetchAdmissibility_params->full_swath_bytes_l = s->full_swath_bytes_l;
9268 CheckGlobalPrefetchAdmissibility_params->full_swath_bytes_c = s->full_swath_bytes_c;
9269 CheckGlobalPrefetchAdmissibility_params->prefetch_sw_bytes = s->prefetch_sw_bytes;
9270 CheckGlobalPrefetchAdmissibility_params->Tpre_rounded = s->Tpre_rounded;
9271 CheckGlobalPrefetchAdmissibility_params->Tpre_oto = s->Tpre_oto;
9272 CheckGlobalPrefetchAdmissibility_params->estimated_urg_bandwidth_required_mbps = mode_lib->ms.support.urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp];
9273 CheckGlobalPrefetchAdmissibility_params->line_time = s->line_times;
9274 CheckGlobalPrefetchAdmissibility_params->dst_y_prefetch = mode_lib->ms.dst_y_prefetch;
9275 if (CheckGlobalPrefetchAdmissibility_params->estimated_urg_bandwidth_required_mbps < 10 * 1024)
9276 CheckGlobalPrefetchAdmissibility_params->estimated_urg_bandwidth_required_mbps = 10 * 1024;
9277
9278 CheckGlobalPrefetchAdmissibility_params->estimated_dcfclk_mhz = (CheckGlobalPrefetchAdmissibility_params->estimated_urg_bandwidth_required_mbps / (double) mode_lib->soc.return_bus_width_bytes) /
9279 ((double)mode_lib->soc.qos_parameters.derate_table.system_active_urgent.dcfclk_derate_percent / 100.0);
9280
9281 // if recalc_prefetch_schedule is set, recalculate the prefetch schedule with the new impacted_Tpre, prefetch should be possible
9282 CheckGlobalPrefetchAdmissibility_params->recalc_prefetch_schedule = &s->recalc_prefetch_schedule;
9283 CheckGlobalPrefetchAdmissibility_params->impacted_dst_y_pre = s->impacted_dst_y_pre;
9284 mode_lib->ms.support.PrefetchSupported = CheckGlobalPrefetchAdmissibility(&mode_lib->scratch, CheckGlobalPrefetchAdmissibility_params);
9285 s->recalc_prefetch_done = 1;
9286 s->recalc_prefetch_schedule = 1;
9287 }
9288 #endif
9289 } // prefetch schedule ok, do urg bw and flip schedule
9290 } while (s->recalc_prefetch_schedule);
9291
9292 // Flip Schedule
9293 // Both prefetch schedule and BW okay
9294 if (mode_lib->ms.support.PrefetchSupported == true) {
9295 mode_lib->ms.BandwidthAvailableForImmediateFlip =
9296 get_bandwidth_available_for_immediate_flip(
9297 dml2_core_internal_soc_state_sys_active,
9298 mode_lib->ms.support.urg_bandwidth_required_qual, // no flip
9299 mode_lib->ms.support.urg_bandwidth_available);
9300
9301 mode_lib->ms.TotImmediateFlipBytes = 0;
9302 for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
9303 if (display_cfg->plane_descriptors[k].immediate_flip) {
9304 s->per_pipe_flip_bytes[k] = get_pipe_flip_bytes(
9305 s->HostVMInefficiencyFactor,
9306 mode_lib->ms.vm_bytes[k],
9307 mode_lib->ms.DPTEBytesPerRow[k],
9308 mode_lib->ms.meta_row_bytes[k]);
9309 } else {
9310 s->per_pipe_flip_bytes[k] = 0;
9311 }
9312 mode_lib->ms.TotImmediateFlipBytes += s->per_pipe_flip_bytes[k] * mode_lib->ms.NoOfDPP[k];
9313
9314 }
9315
9316 for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
9317 CalculateFlipSchedule(
9318 &mode_lib->scratch,
9319 display_cfg->plane_descriptors[k].immediate_flip,
9320 1, // use_lb_flip_bw
9321 s->HostVMInefficiencyFactor,
9322 s->Tvm_trips_flip[k],
9323 s->Tr0_trips_flip[k],
9324 s->Tvm_trips_flip_rounded[k],
9325 s->Tr0_trips_flip_rounded[k],
9326 display_cfg->gpuvm_enable,
9327 mode_lib->ms.vm_bytes[k],
9328 mode_lib->ms.DPTEBytesPerRow[k],
9329 mode_lib->ms.BandwidthAvailableForImmediateFlip,
9330 mode_lib->ms.TotImmediateFlipBytes,
9331 display_cfg->plane_descriptors[k].pixel_format,
9332 (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)),
9333 display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio,
9334 display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio,
9335 mode_lib->ms.Tno_bw_flip[k],
9336 mode_lib->ms.dpte_row_height[k],
9337 mode_lib->ms.dpte_row_height_chroma[k],
9338 mode_lib->ms.use_one_row_for_frame_flip[k],
9339 mode_lib->ip.max_flip_time_us,
9340 mode_lib->ip.max_flip_time_lines,
9341 s->per_pipe_flip_bytes[k],
9342 mode_lib->ms.meta_row_bytes[k],
9343 s->meta_row_height_luma[k],
9344 s->meta_row_height_chroma[k],
9345 mode_lib->ip.dcn_mrq_present && display_cfg->plane_descriptors[k].surface.dcc.enable,
9346
9347 /* Output */
9348 &mode_lib->ms.dst_y_per_vm_flip[k],
9349 &mode_lib->ms.dst_y_per_row_flip[k],
9350 &mode_lib->ms.final_flip_bw[k],
9351 &mode_lib->ms.ImmediateFlipSupportedForPipe[k]);
9352 }
9353
9354 calculate_peak_bandwidth_params->urg_vactive_bandwidth_required = s->dummy_bw;
9355 calculate_peak_bandwidth_params->urg_bandwidth_required = mode_lib->ms.support.urg_bandwidth_required_flip;
9356 calculate_peak_bandwidth_params->urg_bandwidth_required_qual = s->dummy_bw;
9357 calculate_peak_bandwidth_params->non_urg_bandwidth_required = mode_lib->ms.support.non_urg_bandwidth_required_flip;
9358 calculate_peak_bandwidth_params->surface_avg_vactive_required_bw = s->surface_dummy_bw;
9359 calculate_peak_bandwidth_params->surface_peak_required_bw = mode_lib->ms.surface_peak_required_bw;
9360
9361 calculate_peak_bandwidth_params->display_cfg = display_cfg;
9362 calculate_peak_bandwidth_params->inc_flip_bw = 1;
9363 calculate_peak_bandwidth_params->num_active_planes = mode_lib->ms.num_active_planes;
9364 calculate_peak_bandwidth_params->num_of_dpp = mode_lib->ms.NoOfDPP;
9365 calculate_peak_bandwidth_params->dcc_dram_bw_nom_overhead_factor_p0 = mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p0;
9366 calculate_peak_bandwidth_params->dcc_dram_bw_nom_overhead_factor_p1 = mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p1;
9367 calculate_peak_bandwidth_params->dcc_dram_bw_pref_overhead_factor_p0 = mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p0;
9368 calculate_peak_bandwidth_params->dcc_dram_bw_pref_overhead_factor_p1 = mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p1;
9369 calculate_peak_bandwidth_params->mall_prefetch_sdp_overhead_factor = mode_lib->ms.mall_prefetch_sdp_overhead_factor;
9370 calculate_peak_bandwidth_params->mall_prefetch_dram_overhead_factor = mode_lib->ms.mall_prefetch_dram_overhead_factor;
9371
9372 calculate_peak_bandwidth_params->surface_read_bandwidth_l = mode_lib->ms.vactive_sw_bw_l;
9373 calculate_peak_bandwidth_params->surface_read_bandwidth_c = mode_lib->ms.vactive_sw_bw_c;
9374 calculate_peak_bandwidth_params->prefetch_bandwidth_l = mode_lib->ms.RequiredPrefetchPixelDataBWLuma;
9375 calculate_peak_bandwidth_params->prefetch_bandwidth_c = mode_lib->ms.RequiredPrefetchPixelDataBWChroma;
9376 calculate_peak_bandwidth_params->excess_vactive_fill_bw_l = mode_lib->ms.excess_vactive_fill_bw_l;
9377 calculate_peak_bandwidth_params->excess_vactive_fill_bw_c = mode_lib->ms.excess_vactive_fill_bw_c;
9378 calculate_peak_bandwidth_params->cursor_bw = mode_lib->ms.cursor_bw;
9379 calculate_peak_bandwidth_params->dpte_row_bw = mode_lib->ms.dpte_row_bw;
9380 calculate_peak_bandwidth_params->meta_row_bw = mode_lib->ms.meta_row_bw;
9381 calculate_peak_bandwidth_params->prefetch_cursor_bw = mode_lib->ms.prefetch_cursor_bw;
9382 calculate_peak_bandwidth_params->prefetch_vmrow_bw = mode_lib->ms.prefetch_vmrow_bw;
9383 calculate_peak_bandwidth_params->flip_bw = mode_lib->ms.final_flip_bw;
9384 calculate_peak_bandwidth_params->urgent_burst_factor_l = mode_lib->ms.UrgentBurstFactorLuma;
9385 calculate_peak_bandwidth_params->urgent_burst_factor_c = mode_lib->ms.UrgentBurstFactorChroma;
9386 calculate_peak_bandwidth_params->urgent_burst_factor_cursor = mode_lib->ms.UrgentBurstFactorCursor;
9387 calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_l = mode_lib->ms.UrgentBurstFactorLumaPre;
9388 calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_c = mode_lib->ms.UrgentBurstFactorChromaPre;
9389 calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_cursor = mode_lib->ms.UrgentBurstFactorCursorPre;
9390
9391 calculate_peak_bandwidth_required(
9392 &mode_lib->scratch,
9393 calculate_peak_bandwidth_params);
9394
9395 calculate_immediate_flip_bandwidth_support(
9396 &s->dummy_single[0], // double* frac_urg_bandwidth_flip
9397 &mode_lib->ms.support.ImmediateFlipSupport,
9398
9399 dml2_core_internal_soc_state_sys_active,
9400 mode_lib->ms.support.urg_bandwidth_required_flip,
9401 mode_lib->ms.support.non_urg_bandwidth_required_flip,
9402 mode_lib->ms.support.urg_bandwidth_available);
9403
9404 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
9405 if (display_cfg->plane_descriptors[k].immediate_flip == true && mode_lib->ms.ImmediateFlipSupportedForPipe[k] == false)
9406 mode_lib->ms.support.ImmediateFlipSupport = false;
9407 }
9408
9409 } else { // if prefetch not support, assume iflip is not supported too
9410 mode_lib->ms.support.ImmediateFlipSupport = false;
9411 }
9412
9413 s->mSOCParameters.UrgentLatency = mode_lib->ms.UrgLatency;
9414 s->mSOCParameters.ExtraLatency = mode_lib->ms.ExtraLatency;
9415 s->mSOCParameters.ExtraLatency_sr = mode_lib->ms.ExtraLatency_sr;
9416 s->mSOCParameters.WritebackLatency = mode_lib->soc.qos_parameters.writeback.base_latency_us;
9417 s->mSOCParameters.DRAMClockChangeLatency = mode_lib->soc.power_management_parameters.dram_clk_change_blackout_us;
9418 s->mSOCParameters.FCLKChangeLatency = mode_lib->soc.power_management_parameters.fclk_change_blackout_us;
9419 s->mSOCParameters.SRExitTime = mode_lib->soc.power_management_parameters.stutter_exit_latency_us;
9420 s->mSOCParameters.SREnterPlusExitTime = mode_lib->soc.power_management_parameters.stutter_enter_plus_exit_latency_us;
9421 s->mSOCParameters.SRExitZ8Time = mode_lib->soc.power_management_parameters.z8_stutter_exit_latency_us;
9422 s->mSOCParameters.SREnterPlusExitZ8Time = mode_lib->soc.power_management_parameters.z8_stutter_enter_plus_exit_latency_us;
9423 s->mSOCParameters.USRRetrainingLatency = 0;
9424 s->mSOCParameters.SMNLatency = 0;
9425 s->mSOCParameters.g6_temp_read_blackout_us = get_g6_temp_read_blackout_us(&mode_lib->soc, (unsigned int)(mode_lib->ms.uclk_freq_mhz * 1000), in_out_params->min_clk_index);
9426 s->mSOCParameters.max_urgent_latency_us = get_max_urgent_latency_us(&mode_lib->soc.qos_parameters.qos_params.dcn4x, mode_lib->ms.uclk_freq_mhz, mode_lib->ms.FabricClock, in_out_params->min_clk_index);
9427 s->mSOCParameters.df_response_time_us = mode_lib->soc.qos_parameters.qos_params.dcn4x.df_qos_response_time_fclk_cycles / mode_lib->ms.FabricClock;
9428 s->mSOCParameters.qos_type = mode_lib->soc.qos_parameters.qos_type;
9429
9430 CalculateWatermarks_params->display_cfg = display_cfg;
9431 CalculateWatermarks_params->USRRetrainingRequired = false;
9432 CalculateWatermarks_params->NumberOfActiveSurfaces = mode_lib->ms.num_active_planes;
9433 CalculateWatermarks_params->MaxLineBufferLines = mode_lib->ip.max_line_buffer_lines;
9434 CalculateWatermarks_params->LineBufferSize = mode_lib->ip.line_buffer_size_bits;
9435 CalculateWatermarks_params->WritebackInterfaceBufferSize = mode_lib->ip.writeback_interface_buffer_size_kbytes;
9436 CalculateWatermarks_params->DCFCLK = mode_lib->ms.DCFCLK;
9437 CalculateWatermarks_params->SynchronizeTimings = display_cfg->overrides.synchronize_timings;
9438 CalculateWatermarks_params->SynchronizeDRRDisplaysForUCLKPStateChange = display_cfg->overrides.synchronize_ddr_displays_for_uclk_pstate_change;
9439 CalculateWatermarks_params->dpte_group_bytes = mode_lib->ms.dpte_group_bytes;
9440 CalculateWatermarks_params->mmSOCParameters = s->mSOCParameters;
9441 CalculateWatermarks_params->WritebackChunkSize = mode_lib->ip.writeback_chunk_size_kbytes;
9442 CalculateWatermarks_params->SOCCLK = mode_lib->ms.SOCCLK;
9443 CalculateWatermarks_params->DCFClkDeepSleep = mode_lib->ms.dcfclk_deepsleep;
9444 CalculateWatermarks_params->DETBufferSizeY = mode_lib->ms.DETBufferSizeY;
9445 CalculateWatermarks_params->DETBufferSizeC = mode_lib->ms.DETBufferSizeC;
9446 CalculateWatermarks_params->SwathHeightY = mode_lib->ms.SwathHeightY;
9447 CalculateWatermarks_params->SwathHeightC = mode_lib->ms.SwathHeightC;
9448 CalculateWatermarks_params->SwathWidthY = mode_lib->ms.SwathWidthY;
9449 CalculateWatermarks_params->SwathWidthC = mode_lib->ms.SwathWidthC;
9450 CalculateWatermarks_params->DPPPerSurface = mode_lib->ms.NoOfDPP;
9451 CalculateWatermarks_params->BytePerPixelDETY = mode_lib->ms.BytePerPixelInDETY;
9452 CalculateWatermarks_params->BytePerPixelDETC = mode_lib->ms.BytePerPixelInDETC;
9453 CalculateWatermarks_params->DSTXAfterScaler = s->DSTXAfterScaler;
9454 CalculateWatermarks_params->DSTYAfterScaler = s->DSTYAfterScaler;
9455 CalculateWatermarks_params->UnboundedRequestEnabled = mode_lib->ms.UnboundedRequestEnabled;
9456 CalculateWatermarks_params->CompressedBufferSizeInkByte = mode_lib->ms.CompressedBufferSizeInkByte;
9457 CalculateWatermarks_params->meta_row_height_l = s->meta_row_height_luma;
9458 CalculateWatermarks_params->meta_row_height_c = s->meta_row_height_chroma;
9459
9460 // Output
9461 CalculateWatermarks_params->Watermark = &mode_lib->ms.support.watermarks; // Watermarks *Watermark
9462 CalculateWatermarks_params->DRAMClockChangeSupport = mode_lib->ms.support.DRAMClockChangeSupport;
9463 CalculateWatermarks_params->global_dram_clock_change_supported = &mode_lib->ms.support.global_dram_clock_change_supported;
9464 CalculateWatermarks_params->MaxActiveDRAMClockChangeLatencySupported = &s->dummy_single_array[0]; // double *MaxActiveDRAMClockChangeLatencySupported[]
9465 CalculateWatermarks_params->SubViewportLinesNeededInMALL = mode_lib->ms.SubViewportLinesNeededInMALL; // unsigned int SubViewportLinesNeededInMALL[]
9466 CalculateWatermarks_params->FCLKChangeSupport = mode_lib->ms.support.FCLKChangeSupport;
9467 CalculateWatermarks_params->global_fclk_change_supported = &mode_lib->ms.support.global_fclk_change_supported;
9468 CalculateWatermarks_params->MaxActiveFCLKChangeLatencySupported = &s->dummy_single[0]; // double *MaxActiveFCLKChangeLatencySupported
9469 CalculateWatermarks_params->USRRetrainingSupport = &mode_lib->ms.support.USRRetrainingSupport;
9470 CalculateWatermarks_params->g6_temp_read_support = &mode_lib->ms.support.g6_temp_read_support;
9471 CalculateWatermarks_params->VActiveLatencyHidingMargin = mode_lib->ms.VActiveLatencyHidingMargin;
9472 CalculateWatermarks_params->VActiveLatencyHidingUs = mode_lib->ms.VActiveLatencyHidingUs;
9473
9474 CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(&mode_lib->scratch, CalculateWatermarks_params);
9475
9476 calculate_pstate_keepout_dst_lines(display_cfg, &mode_lib->ms.support.watermarks, s->dummy_integer_array[0]);
9477 }
9478 dml2_printf("DML::%s: Done prefetch calculation\n", __func__);
9479 // End of Prefetch Check
9480
9481 mode_lib->ms.support.max_urgent_latency_us = s->mSOCParameters.max_urgent_latency_us;
9482
9483 //Re-ordering Buffer Support Check
9484 if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn4x) {
9485 if (((mode_lib->ip.rob_buffer_size_kbytes - mode_lib->ip.pixel_chunk_size_kbytes) * 1024
9486 / mode_lib->ms.support.non_urg_bandwidth_required_flip[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]) >= s->mSOCParameters.max_urgent_latency_us) {
9487 mode_lib->ms.support.ROBSupport = true;
9488 } else {
9489 mode_lib->ms.support.ROBSupport = false;
9490 }
9491 } else {
9492 if (mode_lib->ip.rob_buffer_size_kbytes * 1024 >= mode_lib->soc.qos_parameters.qos_params.dcn32x.loaded_round_trip_latency_fclk_cycles * mode_lib->soc.fabric_datapath_to_dcn_data_return_bytes) {
9493 mode_lib->ms.support.ROBSupport = true;
9494 } else {
9495 mode_lib->ms.support.ROBSupport = false;
9496 }
9497 }
9498
9499 /* VActive fill time calculations (informative) */
9500 calculate_vactive_det_fill_latency(
9501 display_cfg,
9502 mode_lib->ms.num_active_planes,
9503 s->pstate_bytes_required_l,
9504 s->pstate_bytes_required_c,
9505 mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p0,
9506 mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p1,
9507 mode_lib->ms.vactive_sw_bw_l,
9508 mode_lib->ms.vactive_sw_bw_c,
9509 mode_lib->ms.surface_avg_vactive_required_bw,
9510 mode_lib->ms.surface_peak_required_bw,
9511 /* outputs */
9512 mode_lib->ms.dram_change_vactive_det_fill_delay_us);
9513
9514 #ifdef __DML_VBA_DEBUG__
9515 dml2_printf("DML::%s: max_urgent_latency_us = %f\n", __func__, s->mSOCParameters.max_urgent_latency_us);
9516 dml2_printf("DML::%s: ROBSupport = %u\n", __func__, mode_lib->ms.support.ROBSupport);
9517 #endif
9518
9519 /*Mode Support, Voltage State and SOC Configuration*/
9520 {
9521 if (mode_lib->ms.support.ScaleRatioAndTapsSupport
9522 && mode_lib->ms.support.SourceFormatPixelAndScanSupport
9523 && mode_lib->ms.support.ViewportSizeSupport
9524 && !mode_lib->ms.support.LinkRateDoesNotMatchDPVersion
9525 && !mode_lib->ms.support.LinkRateForMultistreamNotIndicated
9526 && !mode_lib->ms.support.BPPForMultistreamNotIndicated
9527 && !mode_lib->ms.support.MultistreamWithHDMIOreDP
9528 && !mode_lib->ms.support.ExceededMultistreamSlots
9529 && !mode_lib->ms.support.MSOOrODMSplitWithNonDPLink
9530 && !mode_lib->ms.support.NotEnoughLanesForMSO
9531 && !mode_lib->ms.support.P2IWith420
9532 && !mode_lib->ms.support.DSC422NativeNotSupported
9533 && mode_lib->ms.support.DSCSlicesODMModeSupported
9534 && !mode_lib->ms.support.NotEnoughDSCUnits
9535 && !mode_lib->ms.support.NotEnoughDSCSlices
9536 && !mode_lib->ms.support.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe
9537 && !mode_lib->ms.support.InvalidCombinationOfMALLUseForPStateAndStaticScreen
9538 && !mode_lib->ms.support.DSCCLKRequiredMoreThanSupported
9539 && mode_lib->ms.support.PixelsPerLinePerDSCUnitSupport
9540 && !mode_lib->ms.support.DTBCLKRequiredMoreThanSupported
9541 && !mode_lib->ms.support.InvalidCombinationOfMALLUseForPState
9542 && mode_lib->ms.support.ROBSupport
9543 && mode_lib->ms.support.OutstandingRequestsSupport
9544 && mode_lib->ms.support.OutstandingRequestsUrgencyAvoidance
9545 && mode_lib->ms.support.DISPCLK_DPPCLK_Support
9546 && mode_lib->ms.support.TotalAvailablePipesSupport
9547 && mode_lib->ms.support.NumberOfOTGSupport
9548 && mode_lib->ms.support.NumberOfHDMIFRLSupport
9549 && mode_lib->ms.support.NumberOfDP2p0Support
9550 && mode_lib->ms.support.EnoughWritebackUnits
9551 && mode_lib->ms.support.WritebackLatencySupport
9552 && mode_lib->ms.support.WritebackScaleRatioAndTapsSupport
9553 && mode_lib->ms.support.CursorSupport
9554 && mode_lib->ms.support.PitchSupport
9555 && !mode_lib->ms.support.ViewportExceedsSurface
9556 && mode_lib->ms.support.PrefetchSupported
9557 && mode_lib->ms.support.EnoughUrgentLatencyHidingSupport
9558 && mode_lib->ms.support.AvgBandwidthSupport
9559 && mode_lib->ms.support.DynamicMetadataSupported
9560 && mode_lib->ms.support.VRatioInPrefetchSupported
9561 && mode_lib->ms.support.PTEBufferSizeNotExceeded
9562 && mode_lib->ms.support.DCCMetaBufferSizeNotExceeded
9563 && !mode_lib->ms.support.ExceededMALLSize
9564 && mode_lib->ms.support.g6_temp_read_support
9565 && ((!display_cfg->hostvm_enable && !s->ImmediateFlipRequired) || mode_lib->ms.support.ImmediateFlipSupport)) {
9566 dml2_printf("DML::%s: mode is supported\n", __func__);
9567 mode_lib->ms.support.ModeSupport = true;
9568 } else {
9569 dml2_printf("DML::%s: mode is NOT supported\n", __func__);
9570 mode_lib->ms.support.ModeSupport = false;
9571 }
9572 }
9573
9574 // Since now the mode_support work on 1 particular power state, so there is only 1 state idx (index 0).
9575 dml2_printf("DML::%s: ModeSupport = %u\n", __func__, mode_lib->ms.support.ModeSupport);
9576 dml2_printf("DML::%s: ImmediateFlipSupport = %u\n", __func__, mode_lib->ms.support.ImmediateFlipSupport);
9577
9578 for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
9579 mode_lib->ms.support.MPCCombineEnable[k] = mode_lib->ms.MPCCombine[k];
9580 mode_lib->ms.support.DPPPerSurface[k] = mode_lib->ms.NoOfDPP[k];
9581 }
9582
9583 for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
9584 mode_lib->ms.support.ODMMode[k] = mode_lib->ms.ODMMode[k];
9585 mode_lib->ms.support.DSCEnabled[k] = mode_lib->ms.RequiresDSC[k];
9586 mode_lib->ms.support.FECEnabled[k] = mode_lib->ms.RequiresFEC[k];
9587 mode_lib->ms.support.OutputBpp[k] = mode_lib->ms.OutputBpp[k];
9588 mode_lib->ms.support.OutputType[k] = mode_lib->ms.OutputType[k];
9589 mode_lib->ms.support.OutputRate[k] = mode_lib->ms.OutputRate[k];
9590
9591 #if defined(__DML_VBA_DEBUG__)
9592 dml2_printf("DML::%s: k=%d, ODMMode = %u\n", __func__, k, mode_lib->ms.support.ODMMode[k]);
9593 dml2_printf("DML::%s: k=%d, DSCEnabled = %u\n", __func__, k, mode_lib->ms.support.DSCEnabled[k]);
9594 #endif
9595 }
9596
9597 #if defined(__DML_VBA_DEBUG__)
9598 if (!mode_lib->ms.support.ModeSupport)
9599 dml2_print_mode_support_info(&mode_lib->ms.support, true);
9600
9601 dml2_printf("DML::%s: --- DONE --- \n", __func__);
9602 #endif
9603
9604 return mode_lib->ms.support.ModeSupport;
9605 }
9606
dml2_core_calcs_mode_support_ex(struct dml2_core_calcs_mode_support_ex * in_out_params)9607 unsigned int dml2_core_calcs_mode_support_ex(struct dml2_core_calcs_mode_support_ex *in_out_params)
9608 {
9609 unsigned int result;
9610
9611 dml2_printf("DML::%s: ------------- START ----------\n", __func__);
9612 result = dml_core_mode_support(in_out_params);
9613
9614 if (result)
9615 *in_out_params->out_evaluation_info = in_out_params->mode_lib->ms.support;
9616
9617 dml2_printf("DML::%s: is_mode_support = %u (min_clk_index=%d)\n", __func__, result, in_out_params->min_clk_index);
9618
9619 for (unsigned int k = 0; k < in_out_params->in_display_cfg->num_planes; k++)
9620 dml2_printf("DML::%s: plane_%d: reserved_vblank_time_ns = %u\n", __func__, k, in_out_params->in_display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns);
9621
9622 dml2_printf("DML::%s: ------------- DONE ----------\n", __func__);
9623
9624 return result;
9625 }
9626
CalculatePixelDeliveryTimes(const struct dml2_display_cfg * display_cfg,const struct core_display_cfg_support_info * cfg_support_info,unsigned int NumberOfActiveSurfaces,double VRatioPrefetchY[],double VRatioPrefetchC[],unsigned int swath_width_luma_ub[],unsigned int swath_width_chroma_ub[],double PSCL_THROUGHPUT[],double PSCL_THROUGHPUT_CHROMA[],double Dppclk[],unsigned int BytePerPixelC[],unsigned int req_per_swath_ub_l[],unsigned int req_per_swath_ub_c[],double DisplayPipeLineDeliveryTimeLuma[],double DisplayPipeLineDeliveryTimeChroma[],double DisplayPipeLineDeliveryTimeLumaPrefetch[],double DisplayPipeLineDeliveryTimeChromaPrefetch[],double DisplayPipeRequestDeliveryTimeLuma[],double DisplayPipeRequestDeliveryTimeChroma[],double DisplayPipeRequestDeliveryTimeLumaPrefetch[],double DisplayPipeRequestDeliveryTimeChromaPrefetch[])9627 static void CalculatePixelDeliveryTimes(
9628 const struct dml2_display_cfg *display_cfg,
9629 const struct core_display_cfg_support_info *cfg_support_info,
9630 unsigned int NumberOfActiveSurfaces,
9631 double VRatioPrefetchY[],
9632 double VRatioPrefetchC[],
9633 unsigned int swath_width_luma_ub[],
9634 unsigned int swath_width_chroma_ub[],
9635 double PSCL_THROUGHPUT[],
9636 double PSCL_THROUGHPUT_CHROMA[],
9637 double Dppclk[],
9638 unsigned int BytePerPixelC[],
9639 unsigned int req_per_swath_ub_l[],
9640 unsigned int req_per_swath_ub_c[],
9641
9642 // Output
9643 double DisplayPipeLineDeliveryTimeLuma[],
9644 double DisplayPipeLineDeliveryTimeChroma[],
9645 double DisplayPipeLineDeliveryTimeLumaPrefetch[],
9646 double DisplayPipeLineDeliveryTimeChromaPrefetch[],
9647 double DisplayPipeRequestDeliveryTimeLuma[],
9648 double DisplayPipeRequestDeliveryTimeChroma[],
9649 double DisplayPipeRequestDeliveryTimeLumaPrefetch[],
9650 double DisplayPipeRequestDeliveryTimeChromaPrefetch[])
9651 {
9652 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
9653 double pixel_clock_mhz = ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
9654
9655 #ifdef __DML_VBA_DEBUG__
9656 dml2_printf("DML::%s: k=%u : HRatio = %f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio);
9657 dml2_printf("DML::%s: k=%u : VRatio = %f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio);
9658 dml2_printf("DML::%s: k=%u : HRatioChroma = %f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio);
9659 dml2_printf("DML::%s: k=%u : VRatioChroma = %f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio);
9660 dml2_printf("DML::%s: k=%u : VRatioPrefetchY = %f\n", __func__, k, VRatioPrefetchY[k]);
9661 dml2_printf("DML::%s: k=%u : VRatioPrefetchC = %f\n", __func__, k, VRatioPrefetchC[k]);
9662 dml2_printf("DML::%s: k=%u : swath_width_luma_ub = %u\n", __func__, k, swath_width_luma_ub[k]);
9663 dml2_printf("DML::%s: k=%u : swath_width_chroma_ub = %u\n", __func__, k, swath_width_chroma_ub[k]);
9664 dml2_printf("DML::%s: k=%u : PSCL_THROUGHPUT = %f\n", __func__, k, PSCL_THROUGHPUT[k]);
9665 dml2_printf("DML::%s: k=%u : PSCL_THROUGHPUT_CHROMA = %f\n", __func__, k, PSCL_THROUGHPUT_CHROMA[k]);
9666 dml2_printf("DML::%s: k=%u : DPPPerSurface = %u\n", __func__, k, cfg_support_info->plane_support_info[k].dpps_used);
9667 dml2_printf("DML::%s: k=%u : pixel_clock_mhz = %f\n", __func__, k, pixel_clock_mhz);
9668 dml2_printf("DML::%s: k=%u : Dppclk = %f\n", __func__, k, Dppclk[k]);
9669 #endif
9670 if (display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio <= 1) {
9671 DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] * cfg_support_info->plane_support_info[k].dpps_used / display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio / pixel_clock_mhz;
9672 } else {
9673 DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / Dppclk[k];
9674 }
9675
9676 if (BytePerPixelC[k] == 0) {
9677 DisplayPipeLineDeliveryTimeChroma[k] = 0;
9678 } else {
9679 if (display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio <= 1) {
9680 DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] * cfg_support_info->plane_support_info[k].dpps_used / display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio / pixel_clock_mhz;
9681 } else {
9682 DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k];
9683 }
9684 }
9685
9686 if (VRatioPrefetchY[k] <= 1) {
9687 DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] * cfg_support_info->plane_support_info[k].dpps_used / display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio / pixel_clock_mhz;
9688 } else {
9689 DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / Dppclk[k];
9690 }
9691
9692 if (BytePerPixelC[k] == 0) {
9693 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 0;
9694 } else {
9695 if (VRatioPrefetchC[k] <= 1) {
9696 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] * cfg_support_info->plane_support_info[k].dpps_used / display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio / pixel_clock_mhz;
9697 } else {
9698 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k];
9699 }
9700 }
9701 #ifdef __DML_VBA_DEBUG__
9702 dml2_printf("DML::%s: k=%u : DisplayPipeLineDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLuma[k]);
9703 dml2_printf("DML::%s: k=%u : DisplayPipeLineDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLumaPrefetch[k]);
9704 dml2_printf("DML::%s: k=%u : DisplayPipeLineDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChroma[k]);
9705 dml2_printf("DML::%s: k=%u : DisplayPipeLineDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChromaPrefetch[k]);
9706 #endif
9707 }
9708
9709 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
9710
9711 DisplayPipeRequestDeliveryTimeLuma[k] = DisplayPipeLineDeliveryTimeLuma[k] / req_per_swath_ub_l[k];
9712 DisplayPipeRequestDeliveryTimeLumaPrefetch[k] = DisplayPipeLineDeliveryTimeLumaPrefetch[k] / req_per_swath_ub_l[k];
9713 if (BytePerPixelC[k] == 0) {
9714 DisplayPipeRequestDeliveryTimeChroma[k] = 0;
9715 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = 0;
9716 } else {
9717 DisplayPipeRequestDeliveryTimeChroma[k] = DisplayPipeLineDeliveryTimeChroma[k] / req_per_swath_ub_c[k];
9718 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = DisplayPipeLineDeliveryTimeChromaPrefetch[k] / req_per_swath_ub_c[k];
9719 }
9720 #ifdef __DML_VBA_DEBUG__
9721 dml2_printf("DML::%s: k=%u : DisplayPipeRequestDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLuma[k]);
9722 dml2_printf("DML::%s: k=%u : DisplayPipeRequestDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLumaPrefetch[k]);
9723 dml2_printf("DML::%s: k=%u : req_per_swath_ub_l = %d\n", __func__, k, req_per_swath_ub_l[k]);
9724 dml2_printf("DML::%s: k=%u : DisplayPipeRequestDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChroma[k]);
9725 dml2_printf("DML::%s: k=%u : DisplayPipeRequestDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChromaPrefetch[k]);
9726 dml2_printf("DML::%s: k=%u : req_per_swath_ub_c = %d\n", __func__, k, req_per_swath_ub_c[k]);
9727 #endif
9728 }
9729 }
9730
CalculateMetaAndPTETimes(struct dml2_core_shared_CalculateMetaAndPTETimes_params * p)9731 static void CalculateMetaAndPTETimes(struct dml2_core_shared_CalculateMetaAndPTETimes_params *p)
9732 {
9733 unsigned int meta_chunk_width;
9734 unsigned int min_meta_chunk_width;
9735 unsigned int meta_chunk_per_row_int;
9736 unsigned int meta_row_remainder;
9737 unsigned int meta_chunk_threshold;
9738 unsigned int meta_chunks_per_row_ub;
9739 unsigned int meta_chunk_width_chroma;
9740 unsigned int min_meta_chunk_width_chroma;
9741 unsigned int meta_chunk_per_row_int_chroma;
9742 unsigned int meta_row_remainder_chroma;
9743 unsigned int meta_chunk_threshold_chroma;
9744 unsigned int meta_chunks_per_row_ub_chroma;
9745 unsigned int dpte_group_width_luma;
9746 unsigned int dpte_groups_per_row_luma_ub;
9747 unsigned int dpte_group_width_chroma;
9748 unsigned int dpte_groups_per_row_chroma_ub;
9749 double pixel_clock_mhz;
9750
9751 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
9752 p->DST_Y_PER_PTE_ROW_NOM_L[k] = p->dpte_row_height[k] / p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio;
9753 if (p->BytePerPixelC[k] == 0) {
9754 p->DST_Y_PER_PTE_ROW_NOM_C[k] = 0;
9755 } else {
9756 p->DST_Y_PER_PTE_ROW_NOM_C[k] = p->dpte_row_height_chroma[k] / p->display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio;
9757 }
9758 p->DST_Y_PER_META_ROW_NOM_L[k] = p->meta_row_height[k] / p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio;
9759 if (p->BytePerPixelC[k] == 0) {
9760 p->DST_Y_PER_META_ROW_NOM_C[k] = 0;
9761 } else {
9762 p->DST_Y_PER_META_ROW_NOM_C[k] = p->meta_row_height_chroma[k] / p->display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio;
9763 }
9764 }
9765
9766 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
9767 if (p->display_cfg->plane_descriptors[k].surface.dcc.enable == true && p->mrq_present) {
9768 meta_chunk_width = p->MetaChunkSize * 1024 * 256 / p->BytePerPixelY[k] / p->meta_row_height[k];
9769 min_meta_chunk_width = p->MinMetaChunkSizeBytes * 256 / p->BytePerPixelY[k] / p->meta_row_height[k];
9770 meta_chunk_per_row_int = p->meta_row_width[k] / meta_chunk_width;
9771 meta_row_remainder = p->meta_row_width[k] % meta_chunk_width;
9772 if (!dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) {
9773 meta_chunk_threshold = 2 * min_meta_chunk_width - p->meta_req_width[k];
9774 } else {
9775 meta_chunk_threshold = 2 * min_meta_chunk_width - p->meta_req_height[k];
9776 }
9777 if (meta_row_remainder <= meta_chunk_threshold) {
9778 meta_chunks_per_row_ub = meta_chunk_per_row_int + 1;
9779 } else {
9780 meta_chunks_per_row_ub = meta_chunk_per_row_int + 2;
9781 }
9782 p->TimePerMetaChunkNominal[k] = p->meta_row_height[k] / p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio *
9783 p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total /
9784 (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000) / meta_chunks_per_row_ub;
9785 p->TimePerMetaChunkVBlank[k] = p->dst_y_per_row_vblank[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total /
9786 (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000) / meta_chunks_per_row_ub;
9787 p->TimePerMetaChunkFlip[k] = p->dst_y_per_row_flip[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total /
9788 (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000) / meta_chunks_per_row_ub;
9789 if (p->BytePerPixelC[k] == 0) {
9790 p->TimePerChromaMetaChunkNominal[k] = 0;
9791 p->TimePerChromaMetaChunkVBlank[k] = 0;
9792 p->TimePerChromaMetaChunkFlip[k] = 0;
9793 } else {
9794 meta_chunk_width_chroma = p->MetaChunkSize * 1024 * 256 / p->BytePerPixelC[k] / p->meta_row_height_chroma[k];
9795 min_meta_chunk_width_chroma = p->MinMetaChunkSizeBytes * 256 / p->BytePerPixelC[k] / p->meta_row_height_chroma[k];
9796 meta_chunk_per_row_int_chroma = (unsigned int)((double)p->meta_row_width_chroma[k] / meta_chunk_width_chroma);
9797 meta_row_remainder_chroma = p->meta_row_width_chroma[k] % meta_chunk_width_chroma;
9798 if (!dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) {
9799 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - p->meta_req_width_chroma[k];
9800 } else {
9801 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - p->meta_req_height_chroma[k];
9802 }
9803 if (meta_row_remainder_chroma <= meta_chunk_threshold_chroma) {
9804 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 1;
9805 } else {
9806 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 2;
9807 }
9808 p->TimePerChromaMetaChunkNominal[k] = p->meta_row_height_chroma[k] / p->display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000) / meta_chunks_per_row_ub_chroma;
9809 p->TimePerChromaMetaChunkVBlank[k] = p->dst_y_per_row_vblank[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000) / meta_chunks_per_row_ub_chroma;
9810 p->TimePerChromaMetaChunkFlip[k] = p->dst_y_per_row_flip[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000) / meta_chunks_per_row_ub_chroma;
9811 }
9812 } else {
9813 p->TimePerMetaChunkNominal[k] = 0;
9814 p->TimePerMetaChunkVBlank[k] = 0;
9815 p->TimePerMetaChunkFlip[k] = 0;
9816 p->TimePerChromaMetaChunkNominal[k] = 0;
9817 p->TimePerChromaMetaChunkVBlank[k] = 0;
9818 p->TimePerChromaMetaChunkFlip[k] = 0;
9819 }
9820
9821 #ifdef __DML_VBA_DEBUG__
9822 dml2_printf("DML::%s: k=%d, DST_Y_PER_META_ROW_NOM_L = %f\n", __func__, k, p->DST_Y_PER_META_ROW_NOM_L[k]);
9823 dml2_printf("DML::%s: k=%d, DST_Y_PER_META_ROW_NOM_C = %f\n", __func__, k, p->DST_Y_PER_META_ROW_NOM_C[k]);
9824 dml2_printf("DML::%s: k=%d, TimePerMetaChunkNominal = %f\n", __func__, k, p->TimePerMetaChunkNominal[k]);
9825 dml2_printf("DML::%s: k=%d, TimePerMetaChunkVBlank = %f\n", __func__, k, p->TimePerMetaChunkVBlank[k]);
9826 dml2_printf("DML::%s: k=%d, TimePerMetaChunkFlip = %f\n", __func__, k, p->TimePerMetaChunkFlip[k]);
9827 dml2_printf("DML::%s: k=%d, TimePerChromaMetaChunkNominal = %f\n", __func__, k, p->TimePerChromaMetaChunkNominal[k]);
9828 dml2_printf("DML::%s: k=%d, TimePerChromaMetaChunkVBlank = %f\n", __func__, k, p->TimePerChromaMetaChunkVBlank[k]);
9829 dml2_printf("DML::%s: k=%d, TimePerChromaMetaChunkFlip = %f\n", __func__, k, p->TimePerChromaMetaChunkFlip[k]);
9830 #endif
9831 }
9832
9833 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
9834 p->DST_Y_PER_PTE_ROW_NOM_L[k] = p->dpte_row_height[k] / p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio;
9835 if (p->BytePerPixelC[k] == 0) {
9836 p->DST_Y_PER_PTE_ROW_NOM_C[k] = 0;
9837 } else {
9838 p->DST_Y_PER_PTE_ROW_NOM_C[k] = p->dpte_row_height_chroma[k] / p->display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio;
9839 }
9840 }
9841
9842 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
9843 pixel_clock_mhz = ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
9844
9845 if (p->display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut)
9846 p->time_per_tdlut_group[k] = 2 * p->dst_y_per_row_vblank[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / pixel_clock_mhz / p->tdlut_groups_per_2row_ub[k];
9847 else
9848 p->time_per_tdlut_group[k] = 0;
9849
9850 dml2_printf("DML::%s: k=%u, time_per_tdlut_group = %f\n", __func__, k, p->time_per_tdlut_group[k]);
9851
9852 if (p->display_cfg->gpuvm_enable == true) {
9853 if (!dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) {
9854 dpte_group_width_luma = (unsigned int)((double)p->dpte_group_bytes[k] / (double)p->PTERequestSizeY[k] * p->PixelPTEReqWidthY[k]);
9855 } else {
9856 dpte_group_width_luma = (unsigned int)((double)p->dpte_group_bytes[k] / (double)p->PTERequestSizeY[k] * p->PixelPTEReqHeightY[k]);
9857 }
9858 if (p->use_one_row_for_frame[k]) {
9859 dpte_groups_per_row_luma_ub = (unsigned int)(math_ceil2((double)p->dpte_row_width_luma_ub[k] / (double)dpte_group_width_luma / 2.0, 1.0));
9860 } else {
9861 dpte_groups_per_row_luma_ub = (unsigned int)(math_ceil2((double)p->dpte_row_width_luma_ub[k] / (double)dpte_group_width_luma, 1.0));
9862 }
9863 if (dpte_groups_per_row_luma_ub <= 2) {
9864 dpte_groups_per_row_luma_ub = dpte_groups_per_row_luma_ub + 1;
9865 }
9866 dml2_printf("DML::%s: k=%u, use_one_row_for_frame = %u\n", __func__, k, p->use_one_row_for_frame[k]);
9867 dml2_printf("DML::%s: k=%u, dpte_group_bytes = %u\n", __func__, k, p->dpte_group_bytes[k]);
9868 dml2_printf("DML::%s: k=%u, PTERequestSizeY = %u\n", __func__, k, p->PTERequestSizeY[k]);
9869 dml2_printf("DML::%s: k=%u, PixelPTEReqWidthY = %u\n", __func__, k, p->PixelPTEReqWidthY[k]);
9870 dml2_printf("DML::%s: k=%u, PixelPTEReqHeightY = %u\n", __func__, k, p->PixelPTEReqHeightY[k]);
9871 dml2_printf("DML::%s: k=%u, dpte_row_width_luma_ub = %u\n", __func__, k, p->dpte_row_width_luma_ub[k]);
9872 dml2_printf("DML::%s: k=%u, dpte_group_width_luma = %u\n", __func__, k, dpte_group_width_luma);
9873 dml2_printf("DML::%s: k=%u, dpte_groups_per_row_luma_ub = %u\n", __func__, k, dpte_groups_per_row_luma_ub);
9874
9875 p->time_per_pte_group_nom_luma[k] = p->DST_Y_PER_PTE_ROW_NOM_L[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / pixel_clock_mhz / dpte_groups_per_row_luma_ub;
9876 p->time_per_pte_group_vblank_luma[k] = p->dst_y_per_row_vblank[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / pixel_clock_mhz / dpte_groups_per_row_luma_ub;
9877 p->time_per_pte_group_flip_luma[k] = p->dst_y_per_row_flip[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / pixel_clock_mhz / dpte_groups_per_row_luma_ub;
9878 if (p->BytePerPixelC[k] == 0) {
9879 p->time_per_pte_group_nom_chroma[k] = 0;
9880 p->time_per_pte_group_vblank_chroma[k] = 0;
9881 p->time_per_pte_group_flip_chroma[k] = 0;
9882 } else {
9883 if (!dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) {
9884 dpte_group_width_chroma = (unsigned int)((double)p->dpte_group_bytes[k] / (double)p->PTERequestSizeC[k] * p->PixelPTEReqWidthC[k]);
9885 } else {
9886 dpte_group_width_chroma = (unsigned int)((double)p->dpte_group_bytes[k] / (double)p->PTERequestSizeC[k] * p->PixelPTEReqHeightC[k]);
9887 }
9888
9889 if (p->use_one_row_for_frame[k]) {
9890 dpte_groups_per_row_chroma_ub = (unsigned int)(math_ceil2((double)p->dpte_row_width_chroma_ub[k] / (double)dpte_group_width_chroma / 2.0, 1.0));
9891 } else {
9892 dpte_groups_per_row_chroma_ub = (unsigned int)(math_ceil2((double)p->dpte_row_width_chroma_ub[k] / (double)dpte_group_width_chroma, 1.0));
9893 }
9894 if (dpte_groups_per_row_chroma_ub <= 2) {
9895 dpte_groups_per_row_chroma_ub = dpte_groups_per_row_chroma_ub + 1;
9896 }
9897 dml2_printf("DML::%s: k=%u, dpte_row_width_chroma_ub = %u\n", __func__, k, p->dpte_row_width_chroma_ub[k]);
9898 dml2_printf("DML::%s: k=%u, dpte_group_width_chroma = %u\n", __func__, k, dpte_group_width_chroma);
9899 dml2_printf("DML::%s: k=%u, dpte_groups_per_row_chroma_ub = %u\n", __func__, k, dpte_groups_per_row_chroma_ub);
9900
9901 p->time_per_pte_group_nom_chroma[k] = p->DST_Y_PER_PTE_ROW_NOM_C[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / pixel_clock_mhz / dpte_groups_per_row_chroma_ub;
9902 p->time_per_pte_group_vblank_chroma[k] = p->dst_y_per_row_vblank[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / pixel_clock_mhz / dpte_groups_per_row_chroma_ub;
9903 p->time_per_pte_group_flip_chroma[k] = p->dst_y_per_row_flip[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / pixel_clock_mhz / dpte_groups_per_row_chroma_ub;
9904 }
9905 } else {
9906 p->time_per_pte_group_nom_luma[k] = 0;
9907 p->time_per_pte_group_vblank_luma[k] = 0;
9908 p->time_per_pte_group_flip_luma[k] = 0;
9909 p->time_per_pte_group_nom_chroma[k] = 0;
9910 p->time_per_pte_group_vblank_chroma[k] = 0;
9911 p->time_per_pte_group_flip_chroma[k] = 0;
9912 }
9913 #ifdef __DML_VBA_DEBUG__
9914 dml2_printf("DML::%s: k=%u, dst_y_per_row_vblank = %f\n", __func__, k, p->dst_y_per_row_vblank[k]);
9915 dml2_printf("DML::%s: k=%u, dst_y_per_row_flip = %f\n", __func__, k, p->dst_y_per_row_flip[k]);
9916
9917 dml2_printf("DML::%s: k=%u, DST_Y_PER_PTE_ROW_NOM_L = %f\n", __func__, k, p->DST_Y_PER_PTE_ROW_NOM_L[k]);
9918 dml2_printf("DML::%s: k=%u, DST_Y_PER_PTE_ROW_NOM_C = %f\n", __func__, k, p->DST_Y_PER_PTE_ROW_NOM_C[k]);
9919 dml2_printf("DML::%s: k=%u, time_per_pte_group_nom_luma = %f\n", __func__, k, p->time_per_pte_group_nom_luma[k]);
9920 dml2_printf("DML::%s: k=%u, time_per_pte_group_vblank_luma = %f\n", __func__, k, p->time_per_pte_group_vblank_luma[k]);
9921 dml2_printf("DML::%s: k=%u, time_per_pte_group_flip_luma = %f\n", __func__, k, p->time_per_pte_group_flip_luma[k]);
9922 dml2_printf("DML::%s: k=%u, time_per_pte_group_nom_chroma = %f\n", __func__, k, p->time_per_pte_group_nom_chroma[k]);
9923 dml2_printf("DML::%s: k=%u, time_per_pte_group_vblank_chroma = %f\n", __func__, k, p->time_per_pte_group_vblank_chroma[k]);
9924 dml2_printf("DML::%s: k=%u, time_per_pte_group_flip_chroma = %f\n", __func__, k, p->time_per_pte_group_flip_chroma[k]);
9925 #endif
9926 }
9927 } // CalculateMetaAndPTETimes
9928
CalculateVMGroupAndRequestTimes(const struct dml2_display_cfg * display_cfg,unsigned int NumberOfActiveSurfaces,unsigned int BytePerPixelC[],double dst_y_per_vm_vblank[],double dst_y_per_vm_flip[],unsigned int dpte_row_width_luma_ub[],unsigned int dpte_row_width_chroma_ub[],unsigned int vm_group_bytes[],unsigned int dpde0_bytes_per_frame_ub_l[],unsigned int dpde0_bytes_per_frame_ub_c[],unsigned int tdlut_pte_bytes_per_frame[],unsigned int meta_pte_bytes_per_frame_ub_l[],unsigned int meta_pte_bytes_per_frame_ub_c[],bool mrq_present,double TimePerVMGroupVBlank[],double TimePerVMGroupFlip[],double TimePerVMRequestVBlank[],double TimePerVMRequestFlip[])9929 static void CalculateVMGroupAndRequestTimes(
9930 const struct dml2_display_cfg *display_cfg,
9931 unsigned int NumberOfActiveSurfaces,
9932 unsigned int BytePerPixelC[],
9933 double dst_y_per_vm_vblank[],
9934 double dst_y_per_vm_flip[],
9935 unsigned int dpte_row_width_luma_ub[],
9936 unsigned int dpte_row_width_chroma_ub[],
9937 unsigned int vm_group_bytes[],
9938 unsigned int dpde0_bytes_per_frame_ub_l[],
9939 unsigned int dpde0_bytes_per_frame_ub_c[],
9940 unsigned int tdlut_pte_bytes_per_frame[],
9941 unsigned int meta_pte_bytes_per_frame_ub_l[],
9942 unsigned int meta_pte_bytes_per_frame_ub_c[],
9943 bool mrq_present,
9944
9945 // Output
9946 double TimePerVMGroupVBlank[],
9947 double TimePerVMGroupFlip[],
9948 double TimePerVMRequestVBlank[],
9949 double TimePerVMRequestFlip[])
9950 {
9951 unsigned int num_group_per_lower_vm_stage = 0;
9952 unsigned int num_req_per_lower_vm_stage = 0;
9953 unsigned int num_group_per_lower_vm_stage_flip;
9954 unsigned int num_group_per_lower_vm_stage_pref;
9955 unsigned int num_req_per_lower_vm_stage_flip;
9956 unsigned int num_req_per_lower_vm_stage_pref;
9957 double line_time;
9958
9959 #ifdef __DML_VBA_DEBUG__
9960 dml2_printf("DML::%s: NumberOfActiveSurfaces = %u\n", __func__, NumberOfActiveSurfaces);
9961 #endif
9962 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
9963 double pixel_clock_mhz = ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
9964 bool dcc_mrq_enable = display_cfg->plane_descriptors[k].surface.dcc.enable && mrq_present;
9965 #ifdef __DML_VBA_DEBUG__
9966 dml2_printf("DML::%s: k=%u, dcc_mrq_enable = %u\n", __func__, k, dcc_mrq_enable);
9967 dml2_printf("DML::%s: k=%u, vm_group_bytes = %u\n", __func__, k, vm_group_bytes[k]);
9968 dml2_printf("DML::%s: k=%u, dpde0_bytes_per_frame_ub_l = %u\n", __func__, k, dpde0_bytes_per_frame_ub_l[k]);
9969 dml2_printf("DML::%s: k=%u, dpde0_bytes_per_frame_ub_c = %u\n", __func__, k, dpde0_bytes_per_frame_ub_c[k]);
9970 dml2_printf("DML::%s: k=%d, meta_pte_bytes_per_frame_ub_l = %d\n", __func__, k, meta_pte_bytes_per_frame_ub_l[k]);
9971 dml2_printf("DML::%s: k=%d, meta_pte_bytes_per_frame_ub_c = %d\n", __func__, k, meta_pte_bytes_per_frame_ub_c[k]);
9972 #endif
9973
9974 if (display_cfg->gpuvm_enable) {
9975 if (display_cfg->gpuvm_max_page_table_levels >= 2) {
9976 num_group_per_lower_vm_stage += (unsigned int) math_ceil2((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1);
9977
9978 if (BytePerPixelC[k] > 0)
9979 num_group_per_lower_vm_stage += (unsigned int) math_ceil2((double) (dpde0_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1);
9980 }
9981
9982 if (dcc_mrq_enable) {
9983 if (BytePerPixelC[k] > 0) {
9984 num_group_per_lower_vm_stage += (unsigned int)(2.0 /*for each mpde0 group*/ + math_ceil2((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1) +
9985 math_ceil2((double) (meta_pte_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1));
9986 } else {
9987 num_group_per_lower_vm_stage += (unsigned int)(1.0 + math_ceil2((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1));
9988 }
9989 }
9990
9991 num_group_per_lower_vm_stage_flip = num_group_per_lower_vm_stage;
9992 num_group_per_lower_vm_stage_pref = num_group_per_lower_vm_stage;
9993
9994 if (display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut && display_cfg->gpuvm_enable) {
9995 num_group_per_lower_vm_stage_pref += (unsigned int) math_ceil2(tdlut_pte_bytes_per_frame[k] / vm_group_bytes[k], 1);
9996 if (display_cfg->gpuvm_max_page_table_levels >= 2)
9997 num_group_per_lower_vm_stage_pref += 1; // tdpe0 group
9998 }
9999
10000 if (display_cfg->gpuvm_max_page_table_levels >= 2) {
10001 num_req_per_lower_vm_stage += dpde0_bytes_per_frame_ub_l[k] / 64;
10002 if (BytePerPixelC[k] > 0)
10003 num_req_per_lower_vm_stage += dpde0_bytes_per_frame_ub_c[k];
10004 }
10005
10006 if (dcc_mrq_enable) {
10007 num_req_per_lower_vm_stage += meta_pte_bytes_per_frame_ub_l[k] / 64;
10008 if (BytePerPixelC[k] > 0)
10009 num_req_per_lower_vm_stage += meta_pte_bytes_per_frame_ub_c[k] / 64;
10010 }
10011
10012 num_req_per_lower_vm_stage_flip = num_req_per_lower_vm_stage;
10013 num_req_per_lower_vm_stage_pref = num_req_per_lower_vm_stage;
10014
10015 if (display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut && display_cfg->gpuvm_enable) {
10016 num_req_per_lower_vm_stage_pref += tdlut_pte_bytes_per_frame[k] / 64;
10017 }
10018
10019 line_time = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / pixel_clock_mhz;
10020
10021 if (num_group_per_lower_vm_stage_pref > 0)
10022 TimePerVMGroupVBlank[k] = dst_y_per_vm_vblank[k] * line_time / num_group_per_lower_vm_stage_pref;
10023 else
10024 TimePerVMGroupVBlank[k] = 0;
10025
10026 if (num_group_per_lower_vm_stage_flip > 0)
10027 TimePerVMGroupFlip[k] = dst_y_per_vm_flip[k] * line_time / num_group_per_lower_vm_stage_flip;
10028 else
10029 TimePerVMGroupFlip[k] = 0;
10030
10031 if (num_req_per_lower_vm_stage_pref > 0)
10032 TimePerVMRequestVBlank[k] = dst_y_per_vm_vblank[k] * line_time / num_req_per_lower_vm_stage_pref;
10033 else
10034 TimePerVMRequestVBlank[k] = 0.0;
10035 if (num_req_per_lower_vm_stage_flip > 0)
10036 TimePerVMRequestFlip[k] = dst_y_per_vm_flip[k] * line_time / num_req_per_lower_vm_stage_flip;
10037 else
10038 TimePerVMRequestFlip[k] = 0.0;
10039
10040 dml2_printf("DML::%s: k=%u, dst_y_per_vm_vblank = %f\n", __func__, k, dst_y_per_vm_vblank[k]);
10041 dml2_printf("DML::%s: k=%u, dst_y_per_vm_flip = %f\n", __func__, k, dst_y_per_vm_flip[k]);
10042 dml2_printf("DML::%s: k=%u, line_time = %f\n", __func__, k, line_time);
10043 dml2_printf("DML::%s: k=%u, num_group_per_lower_vm_stage_pref = %f\n", __func__, k, num_group_per_lower_vm_stage_pref);
10044 dml2_printf("DML::%s: k=%u, num_group_per_lower_vm_stage_flip = %f\n", __func__, k, num_group_per_lower_vm_stage_flip);
10045 dml2_printf("DML::%s: k=%u, num_req_per_lower_vm_stage_pref = %f\n", __func__, k, num_req_per_lower_vm_stage_pref);
10046 dml2_printf("DML::%s: k=%u, num_req_per_lower_vm_stage_flip = %f\n", __func__, k, num_req_per_lower_vm_stage_flip);
10047
10048 if (display_cfg->gpuvm_max_page_table_levels > 2) {
10049 TimePerVMGroupVBlank[k] = TimePerVMGroupVBlank[k] / 2;
10050 TimePerVMGroupFlip[k] = TimePerVMGroupFlip[k] / 2;
10051 TimePerVMRequestVBlank[k] = TimePerVMRequestVBlank[k] / 2;
10052 TimePerVMRequestFlip[k] = TimePerVMRequestFlip[k] / 2;
10053 }
10054
10055 } else {
10056 TimePerVMGroupVBlank[k] = 0;
10057 TimePerVMGroupFlip[k] = 0;
10058 TimePerVMRequestVBlank[k] = 0;
10059 TimePerVMRequestFlip[k] = 0;
10060 }
10061
10062 #ifdef __DML_VBA_DEBUG__
10063 dml2_printf("DML::%s: k=%u, TimePerVMGroupVBlank = %f\n", __func__, k, TimePerVMGroupVBlank[k]);
10064 dml2_printf("DML::%s: k=%u, TimePerVMGroupFlip = %f\n", __func__, k, TimePerVMGroupFlip[k]);
10065 dml2_printf("DML::%s: k=%u, TimePerVMRequestVBlank = %f\n", __func__, k, TimePerVMRequestVBlank[k]);
10066 dml2_printf("DML::%s: k=%u, TimePerVMRequestFlip = %f\n", __func__, k, TimePerVMRequestFlip[k]);
10067 #endif
10068 }
10069 }
10070
CalculateStutterEfficiency(struct dml2_core_internal_scratch * scratch,struct dml2_core_calcs_CalculateStutterEfficiency_params * p)10071 static void CalculateStutterEfficiency(struct dml2_core_internal_scratch *scratch,
10072 struct dml2_core_calcs_CalculateStutterEfficiency_params *p)
10073 {
10074 struct dml2_core_calcs_CalculateStutterEfficiency_locals *l = &scratch->CalculateStutterEfficiency_locals;
10075
10076 unsigned int TotalNumberOfActiveOTG = 0;
10077 double SinglePixelClock = 0;
10078 unsigned int SingleHTotal = 0;
10079 unsigned int SingleVTotal = 0;
10080 bool SameTiming = true;
10081 bool FoundCriticalSurface = false;
10082 double LastZ8StutterPeriod = 0;
10083
10084 memset(l, 0, sizeof(struct dml2_core_calcs_CalculateStutterEfficiency_locals));
10085
10086 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
10087 if (!dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k])) {
10088 if (p->display_cfg->plane_descriptors[k].surface.dcc.enable == true) {
10089 if ((dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle) && p->BlockWidth256BytesY[k] > p->SwathHeightY[k]) || (!dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle) && p->BlockHeight256BytesY[k] > p->SwathHeightY[k]) || p->DCCYMaxUncompressedBlock[k] < 256) {
10090 l->MaximumEffectiveCompressionLuma = 2;
10091 } else {
10092 l->MaximumEffectiveCompressionLuma = 4;
10093 }
10094 l->TotalCompressedReadBandwidth = l->TotalCompressedReadBandwidth + p->ReadBandwidthSurfaceLuma[k] / math_min2(p->display_cfg->plane_descriptors[k].surface.dcc.informative.dcc_rate_plane0, l->MaximumEffectiveCompressionLuma);
10095 #ifdef __DML_VBA_DEBUG__
10096 dml2_printf("DML::%s: k=%u, ReadBandwidthSurfaceLuma = %f\n", __func__, k, p->ReadBandwidthSurfaceLuma[k]);
10097 dml2_printf("DML::%s: k=%u, NetDCCRateLuma = %f\n", __func__, k, p->display_cfg->plane_descriptors[k].surface.dcc.informative.dcc_rate_plane0);
10098 dml2_printf("DML::%s: k=%u, MaximumEffectiveCompressionLuma = %f\n", __func__, k, l->MaximumEffectiveCompressionLuma);
10099 #endif
10100 l->TotalZeroSizeRequestReadBandwidth = l->TotalZeroSizeRequestReadBandwidth + p->ReadBandwidthSurfaceLuma[k] * p->display_cfg->plane_descriptors[k].surface.dcc.informative.fraction_of_zero_size_request_plane0;
10101 l->TotalZeroSizeCompressedReadBandwidth = l->TotalZeroSizeCompressedReadBandwidth + p->ReadBandwidthSurfaceLuma[k] * p->display_cfg->plane_descriptors[k].surface.dcc.informative.fraction_of_zero_size_request_plane0 / l->MaximumEffectiveCompressionLuma;
10102
10103 if (p->ReadBandwidthSurfaceChroma[k] > 0) {
10104 if ((dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle) && p->BlockWidth256BytesC[k] > p->SwathHeightC[k]) || (!dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle) && p->BlockHeight256BytesC[k] > p->SwathHeightC[k]) || p->DCCCMaxUncompressedBlock[k] < 256) {
10105 l->MaximumEffectiveCompressionChroma = 2;
10106 } else {
10107 l->MaximumEffectiveCompressionChroma = 4;
10108 }
10109 l->TotalCompressedReadBandwidth = l->TotalCompressedReadBandwidth + p->ReadBandwidthSurfaceChroma[k] / math_min2(p->display_cfg->plane_descriptors[k].surface.dcc.informative.dcc_rate_plane1, l->MaximumEffectiveCompressionChroma);
10110 #ifdef __DML_VBA_DEBUG__
10111 dml2_printf("DML::%s: k=%u, ReadBandwidthSurfaceChroma = %f\n", __func__, k, p->ReadBandwidthSurfaceChroma[k]);
10112 dml2_printf("DML::%s: k=%u, NetDCCRateChroma = %f\n", __func__, k, p->display_cfg->plane_descriptors[k].surface.dcc.informative.dcc_rate_plane1);
10113 dml2_printf("DML::%s: k=%u, MaximumEffectiveCompressionChroma = %f\n", __func__, k, l->MaximumEffectiveCompressionChroma);
10114 #endif
10115 l->TotalZeroSizeRequestReadBandwidth = l->TotalZeroSizeRequestReadBandwidth + p->ReadBandwidthSurfaceChroma[k] * p->display_cfg->plane_descriptors[k].surface.dcc.informative.fraction_of_zero_size_request_plane1;
10116 l->TotalZeroSizeCompressedReadBandwidth = l->TotalZeroSizeCompressedReadBandwidth + p->ReadBandwidthSurfaceChroma[k] * p->display_cfg->plane_descriptors[k].surface.dcc.informative.fraction_of_zero_size_request_plane1 / l->MaximumEffectiveCompressionChroma;
10117 }
10118 } else {
10119 l->TotalCompressedReadBandwidth = l->TotalCompressedReadBandwidth + p->ReadBandwidthSurfaceLuma[k] + p->ReadBandwidthSurfaceChroma[k];
10120 }
10121 l->TotalRowReadBandwidth = l->TotalRowReadBandwidth + p->DPPPerSurface[k] * (p->meta_row_bw[k] + p->dpte_row_bw[k]);
10122 }
10123 }
10124
10125 l->AverageDCCCompressionRate = p->TotalDataReadBandwidth / l->TotalCompressedReadBandwidth;
10126 l->AverageDCCZeroSizeFraction = l->TotalZeroSizeRequestReadBandwidth / p->TotalDataReadBandwidth;
10127
10128 #ifdef __DML_VBA_DEBUG__
10129 dml2_printf("DML::%s: UnboundedRequestEnabled = %u\n", __func__, p->UnboundedRequestEnabled);
10130 dml2_printf("DML::%s: TotalCompressedReadBandwidth = %f\n", __func__, l->TotalCompressedReadBandwidth);
10131 dml2_printf("DML::%s: TotalZeroSizeRequestReadBandwidth = %f\n", __func__, l->TotalZeroSizeRequestReadBandwidth);
10132 dml2_printf("DML::%s: TotalZeroSizeCompressedReadBandwidth = %f\n", __func__, l->TotalZeroSizeCompressedReadBandwidth);
10133 dml2_printf("DML::%s: MaximumEffectiveCompressionLuma = %f\n", __func__, l->MaximumEffectiveCompressionLuma);
10134 dml2_printf("DML::%s: MaximumEffectiveCompressionChroma = %f\n", __func__, l->MaximumEffectiveCompressionChroma);
10135 dml2_printf("DML::%s: AverageDCCCompressionRate = %f\n", __func__, l->AverageDCCCompressionRate);
10136 dml2_printf("DML::%s: AverageDCCZeroSizeFraction = %f\n", __func__, l->AverageDCCZeroSizeFraction);
10137
10138 dml2_printf("DML::%s: CompbufReservedSpace64B = %u (%f kbytes)\n", __func__, p->CompbufReservedSpace64B, p->CompbufReservedSpace64B * 64 / 1024.0);
10139 dml2_printf("DML::%s: CompbufReservedSpaceZs = %u\n", __func__, p->CompbufReservedSpaceZs);
10140 dml2_printf("DML::%s: CompressedBufferSizeInkByte = %u kbytes\n", __func__, p->CompressedBufferSizeInkByte);
10141 dml2_printf("DML::%s: ROBBufferSizeInKByte = %u kbytes\n", __func__, p->ROBBufferSizeInKByte);
10142 #endif
10143 if (l->AverageDCCZeroSizeFraction == 1) {
10144 l->AverageZeroSizeCompressionRate = l->TotalZeroSizeRequestReadBandwidth / l->TotalZeroSizeCompressedReadBandwidth;
10145 l->EffectiveCompressedBufferSize = (double)p->MetaFIFOSizeInKEntries * 1024 * 64 * l->AverageZeroSizeCompressionRate + ((double)p->ZeroSizeBufferEntries - p->CompbufReservedSpaceZs) * 64 * l->AverageZeroSizeCompressionRate;
10146
10147
10148 } else if (l->AverageDCCZeroSizeFraction > 0) {
10149 l->AverageZeroSizeCompressionRate = l->TotalZeroSizeRequestReadBandwidth / l->TotalZeroSizeCompressedReadBandwidth;
10150 l->EffectiveCompressedBufferSize = math_min2((double)p->CompressedBufferSizeInkByte * 1024 * l->AverageDCCCompressionRate,
10151 (double)p->MetaFIFOSizeInKEntries * 1024 * 64 / (l->AverageDCCZeroSizeFraction / l->AverageZeroSizeCompressionRate + 1 / l->AverageDCCCompressionRate)) +
10152 (p->rob_alloc_compressed ? math_min2(((double)p->ROBBufferSizeInKByte * 1024 - p->CompbufReservedSpace64B * 64) * l->AverageDCCCompressionRate,
10153 ((double)p->ZeroSizeBufferEntries - p->CompbufReservedSpaceZs) * 64 / (l->AverageDCCZeroSizeFraction / l->AverageZeroSizeCompressionRate))
10154 : ((double)p->ROBBufferSizeInKByte * 1024 - p->CompbufReservedSpace64B * 64));
10155
10156
10157 #ifdef __DML_VBA_DEBUG__
10158 dml2_printf("DML::%s: min 1 = %f\n", __func__, p->CompressedBufferSizeInkByte * 1024 * l->AverageDCCCompressionRate);
10159 dml2_printf("DML::%s: min 2 = %f\n", __func__, p->MetaFIFOSizeInKEntries * 1024 * 64 / (l->AverageDCCZeroSizeFraction / l->AverageZeroSizeCompressionRate + 1 / l->AverageDCCCompressionRate));
10160 dml2_printf("DML::%s: min 3 = %d\n", __func__, (p->ROBBufferSizeInKByte * 1024 - p->CompbufReservedSpace64B * 64));
10161 dml2_printf("DML::%s: min 4 = %f\n", __func__, (p->ZeroSizeBufferEntries - p->CompbufReservedSpaceZs) * 64 / (l->AverageDCCZeroSizeFraction / l->AverageZeroSizeCompressionRate));
10162 #endif
10163 } else {
10164 l->EffectiveCompressedBufferSize = math_min2((double)p->CompressedBufferSizeInkByte * 1024 * l->AverageDCCCompressionRate,
10165 (double)p->MetaFIFOSizeInKEntries * 1024 * 64 * l->AverageDCCCompressionRate) +
10166 ((double)p->ROBBufferSizeInKByte * 1024 - p->CompbufReservedSpace64B * 64) * (p->rob_alloc_compressed ? l->AverageDCCCompressionRate : 1.0);
10167
10168 #ifdef __DML_VBA_DEBUG__
10169 dml2_printf("DML::%s: min 1 = %f\n", __func__, p->CompressedBufferSizeInkByte * 1024 * l->AverageDCCCompressionRate);
10170 dml2_printf("DML::%s: min 2 = %f\n", __func__, p->MetaFIFOSizeInKEntries * 1024 * 64 * l->AverageDCCCompressionRate);
10171 #endif
10172 }
10173
10174 #ifdef __DML_VBA_DEBUG__
10175 dml2_printf("DML::%s: MetaFIFOSizeInKEntries = %u\n", __func__, p->MetaFIFOSizeInKEntries);
10176 dml2_printf("DML::%s: ZeroSizeBufferEntries = %u\n", __func__, p->ZeroSizeBufferEntries);
10177 dml2_printf("DML::%s: AverageZeroSizeCompressionRate = %f\n", __func__, l->AverageZeroSizeCompressionRate);
10178 dml2_printf("DML::%s: EffectiveCompressedBufferSize = %f (%f kbytes)\n", __func__, l->EffectiveCompressedBufferSize, l->EffectiveCompressedBufferSize / 1024.0);
10179 #endif
10180
10181 *p->StutterPeriod = 0;
10182
10183 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
10184 if (!dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k])) {
10185 l->LinesInDETY = ((double)p->DETBufferSizeY[k] + (p->UnboundedRequestEnabled == true ? l->EffectiveCompressedBufferSize : 0) * p->ReadBandwidthSurfaceLuma[k] / p->TotalDataReadBandwidth) / p->BytePerPixelDETY[k] / p->SwathWidthY[k];
10186 l->LinesInDETYRoundedDownToSwath = math_floor2(l->LinesInDETY, p->SwathHeightY[k]);
10187 l->DETBufferingTimeY = l->LinesInDETYRoundedDownToSwath * ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) / p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio;
10188 #ifdef __DML_VBA_DEBUG__
10189 dml2_printf("DML::%s: k=%u, DETBufferSizeY = %u (%u kbytes)\n", __func__, k, p->DETBufferSizeY[k], p->DETBufferSizeY[k] / 1024);
10190 dml2_printf("DML::%s: k=%u, BytePerPixelDETY = %f\n", __func__, k, p->BytePerPixelDETY[k]);
10191 dml2_printf("DML::%s: k=%u, SwathWidthY = %u\n", __func__, k, p->SwathWidthY[k]);
10192 dml2_printf("DML::%s: k=%u, ReadBandwidthSurfaceLuma = %f\n", __func__, k, p->ReadBandwidthSurfaceLuma[k]);
10193 dml2_printf("DML::%s: k=%u, TotalDataReadBandwidth = %f\n", __func__, k, p->TotalDataReadBandwidth);
10194 dml2_printf("DML::%s: k=%u, LinesInDETY = %f\n", __func__, k, l->LinesInDETY);
10195 dml2_printf("DML::%s: k=%u, LinesInDETYRoundedDownToSwath = %f\n", __func__, k, l->LinesInDETYRoundedDownToSwath);
10196 dml2_printf("DML::%s: k=%u, VRatio = %f\n", __func__, k, p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio);
10197 dml2_printf("DML::%s: k=%u, DETBufferingTimeY = %f\n", __func__, k, l->DETBufferingTimeY);
10198 #endif
10199
10200 if (!FoundCriticalSurface || l->DETBufferingTimeY < *p->StutterPeriod) {
10201 bool isInterlaceTiming = p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.interlaced && !p->ProgressiveToInterlaceUnitInOPP;
10202
10203 FoundCriticalSurface = true;
10204 *p->StutterPeriod = l->DETBufferingTimeY;
10205 l->FrameTimeCriticalSurface = (isInterlaceTiming ? math_floor2((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.v_total / 2.0, 1.0) : p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.v_total) * (double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
10206 l->VActiveTimeCriticalSurface = (isInterlaceTiming ? math_floor2((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.v_active / 2.0, 1.0) : p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.v_active) * (double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
10207 l->BytePerPixelYCriticalSurface = p->BytePerPixelY[k];
10208 l->SwathWidthYCriticalSurface = p->SwathWidthY[k];
10209 l->SwathHeightYCriticalSurface = p->SwathHeightY[k];
10210 l->BlockWidth256BytesYCriticalSurface = p->BlockWidth256BytesY[k];
10211 l->DETBufferSizeYCriticalSurface = p->DETBufferSizeY[k];
10212 l->MinTTUVBlankCriticalSurface = p->MinTTUVBlank[k];
10213 l->SinglePlaneCriticalSurface = (p->ReadBandwidthSurfaceChroma[k] == 0);
10214 l->SinglePipeCriticalSurface = (p->DPPPerSurface[k] == 1);
10215
10216 #ifdef __DML_VBA_DEBUG__
10217 dml2_printf("DML::%s: k=%u, FoundCriticalSurface = %u\n", __func__, k, FoundCriticalSurface);
10218 dml2_printf("DML::%s: k=%u, StutterPeriod = %f\n", __func__, k, *p->StutterPeriod);
10219 dml2_printf("DML::%s: k=%u, MinTTUVBlankCriticalSurface = %f\n", __func__, k, l->MinTTUVBlankCriticalSurface);
10220 dml2_printf("DML::%s: k=%u, FrameTimeCriticalSurface= %f\n", __func__, k, l->FrameTimeCriticalSurface);
10221 dml2_printf("DML::%s: k=%u, VActiveTimeCriticalSurface = %f\n", __func__, k, l->VActiveTimeCriticalSurface);
10222 dml2_printf("DML::%s: k=%u, BytePerPixelYCriticalSurface = %u\n", __func__, k, l->BytePerPixelYCriticalSurface);
10223 dml2_printf("DML::%s: k=%u, SwathWidthYCriticalSurface = %f\n", __func__, k, l->SwathWidthYCriticalSurface);
10224 dml2_printf("DML::%s: k=%u, SwathHeightYCriticalSurface = %f\n", __func__, k, l->SwathHeightYCriticalSurface);
10225 dml2_printf("DML::%s: k=%u, BlockWidth256BytesYCriticalSurface = %u\n", __func__, k, l->BlockWidth256BytesYCriticalSurface);
10226 dml2_printf("DML::%s: k=%u, SinglePlaneCriticalSurface = %u\n", __func__, k, l->SinglePlaneCriticalSurface);
10227 dml2_printf("DML::%s: k=%u, SinglePipeCriticalSurface = %u\n", __func__, k, l->SinglePipeCriticalSurface);
10228 #endif
10229 }
10230 }
10231 }
10232
10233 // for bounded req, the stutter period is calculated only based on DET size, but during burst there can be some return inside ROB/compressed buffer
10234 // stutter period is calculated only on the det sizing
10235 // if (cdb + rob >= det) the stutter burst will be absorbed by the cdb + rob which is before decompress
10236 // else
10237 // the cdb + rob part will be in compressed rate with urg bw (idea bw)
10238 // the det part will be return at uncompressed rate with 64B/dcfclk
10239 //
10240 // for unbounded req, the stutter period should be calculated as total of CDB+ROB+DET, so the term "PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer"
10241 // should be == EffectiveCompressedBufferSize which will returned a compressed rate, the rest of stutter period is from the DET will be returned at uncompressed rate with 64B/dcfclk
10242
10243 l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = math_min2(*p->StutterPeriod * p->TotalDataReadBandwidth, l->EffectiveCompressedBufferSize);
10244 #ifdef __DML_VBA_DEBUG__
10245 dml2_printf("DML::%s: AverageDCCCompressionRate = %f\n", __func__, l->AverageDCCCompressionRate);
10246 dml2_printf("DML::%s: StutterPeriod*TotalDataReadBandwidth = %f (%f kbytes)\n", __func__, *p->StutterPeriod * p->TotalDataReadBandwidth, (*p->StutterPeriod * p->TotalDataReadBandwidth) / 1024.0);
10247 dml2_printf("DML::%s: EffectiveCompressedBufferSize = %f (%f kbytes)\n", __func__, l->EffectiveCompressedBufferSize, l->EffectiveCompressedBufferSize / 1024.0);
10248 dml2_printf("DML::%s: PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = %f (%f kbytes)\n", __func__, l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer, l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / 1024);
10249 dml2_printf("DML::%s: ReturnBW = %f\n", __func__, p->ReturnBW);
10250 dml2_printf("DML::%s: TotalDataReadBandwidth = %f\n", __func__, p->TotalDataReadBandwidth);
10251 dml2_printf("DML::%s: TotalRowReadBandwidth = %f\n", __func__, l->TotalRowReadBandwidth);
10252 dml2_printf("DML::%s: DCFCLK = %f\n", __func__, p->DCFCLK);
10253 #endif
10254
10255 l->StutterBurstTime = l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer
10256 / (p->ReturnBW * (p->hw_debug5 ? 1 : l->AverageDCCCompressionRate)) +
10257 (*p->StutterPeriod * p->TotalDataReadBandwidth - l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer)
10258 / math_min2(p->DCFCLK * 64, p->ReturnBW * (p->hw_debug5 ? 1 : l->AverageDCCCompressionRate)) +
10259 *p->StutterPeriod * l->TotalRowReadBandwidth / p->ReturnBW;
10260 #ifdef __DML_VBA_DEBUG__
10261 dml2_printf("DML::%s: Part 1 = %f\n", __func__, l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / p->ReturnBW / (p->hw_debug5 ? 1 : l->AverageDCCCompressionRate));
10262 dml2_printf("DML::%s: Part 2 = %f\n", __func__, (*p->StutterPeriod * p->TotalDataReadBandwidth - l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (p->DCFCLK * 64));
10263 dml2_printf("DML::%s: Part 3 = %f\n", __func__, *p->StutterPeriod * l->TotalRowReadBandwidth / p->ReturnBW);
10264 dml2_printf("DML::%s: StutterBurstTime = %f\n", __func__, l->StutterBurstTime);
10265 #endif
10266 l->TotalActiveWriteback = 0;
10267 memset(l->stream_visited, 0, DML2_MAX_PLANES * sizeof(bool));
10268
10269 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
10270 if (!dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k])) {
10271 if (!l->stream_visited[p->display_cfg->plane_descriptors[k].stream_index]) {
10272
10273 if (p->display_cfg->stream_descriptors[k].writeback.active_writebacks_per_stream > 0)
10274 l->TotalActiveWriteback = l->TotalActiveWriteback + 1;
10275
10276 if (TotalNumberOfActiveOTG == 0) { // first otg
10277 SinglePixelClock = ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
10278 SingleHTotal = p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total;
10279 SingleVTotal = p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.v_total;
10280 } else if (SinglePixelClock != ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000) ||
10281 SingleHTotal != p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total ||
10282 SingleVTotal != p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.v_total) {
10283 SameTiming = false;
10284 }
10285 TotalNumberOfActiveOTG = TotalNumberOfActiveOTG + 1;
10286 l->stream_visited[p->display_cfg->plane_descriptors[k].stream_index] = 1;
10287 }
10288 }
10289 }
10290
10291 if (l->TotalActiveWriteback == 0) {
10292 #ifdef __DML_VBA_DEBUG__
10293 dml2_printf("DML::%s: SRExitTime = %f\n", __func__, p->SRExitTime);
10294 dml2_printf("DML::%s: SRExitZ8Time = %f\n", __func__, p->SRExitZ8Time);
10295 dml2_printf("DML::%s: StutterPeriod = %f\n", __func__, *p->StutterPeriod);
10296 #endif
10297 *p->StutterEfficiencyNotIncludingVBlank = math_max2(0., 1 - (p->SRExitTime + l->StutterBurstTime) / *p->StutterPeriod) * 100;
10298 *p->Z8StutterEfficiencyNotIncludingVBlank = math_max2(0., 1 - (p->SRExitZ8Time + l->StutterBurstTime) / *p->StutterPeriod) * 100;
10299 *p->NumberOfStutterBurstsPerFrame = (*p->StutterEfficiencyNotIncludingVBlank > 0 ? (unsigned int)(math_ceil2(l->VActiveTimeCriticalSurface / *p->StutterPeriod, 1)) : 0);
10300 *p->Z8NumberOfStutterBurstsPerFrame = (*p->Z8StutterEfficiencyNotIncludingVBlank > 0 ? (unsigned int)(math_ceil2(l->VActiveTimeCriticalSurface / *p->StutterPeriod, 1)) : 0);
10301 } else {
10302 *p->StutterEfficiencyNotIncludingVBlank = 0.;
10303 *p->Z8StutterEfficiencyNotIncludingVBlank = 0.;
10304 *p->NumberOfStutterBurstsPerFrame = 0;
10305 *p->Z8NumberOfStutterBurstsPerFrame = 0;
10306 }
10307 #ifdef __DML_VBA_DEBUG__
10308 dml2_printf("DML::%s: VActiveTimeCriticalSurface = %f\n", __func__, l->VActiveTimeCriticalSurface);
10309 dml2_printf("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *p->StutterEfficiencyNotIncludingVBlank);
10310 dml2_printf("DML::%s: Z8StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *p->Z8StutterEfficiencyNotIncludingVBlank);
10311 dml2_printf("DML::%s: NumberOfStutterBurstsPerFrame = %u\n", __func__, *p->NumberOfStutterBurstsPerFrame);
10312 dml2_printf("DML::%s: Z8NumberOfStutterBurstsPerFrame = %u\n", __func__, *p->Z8NumberOfStutterBurstsPerFrame);
10313 #endif
10314
10315 if (*p->StutterEfficiencyNotIncludingVBlank > 0) {
10316 if (!((p->SynchronizeTimings || TotalNumberOfActiveOTG == 1) && SameTiming)) {
10317 *p->StutterEfficiency = *p->StutterEfficiencyNotIncludingVBlank;
10318 } else {
10319 *p->StutterEfficiency = (1 - (*p->NumberOfStutterBurstsPerFrame * p->SRExitTime + l->StutterBurstTime * l->VActiveTimeCriticalSurface / *p->StutterPeriod) / l->FrameTimeCriticalSurface) * 100;
10320 }
10321 } else {
10322 *p->StutterEfficiency = 0;
10323 *p->NumberOfStutterBurstsPerFrame = 0;
10324 }
10325
10326 if (*p->Z8StutterEfficiencyNotIncludingVBlank > 0) {
10327 LastZ8StutterPeriod = l->VActiveTimeCriticalSurface - (*p->Z8NumberOfStutterBurstsPerFrame - 1) * *p->StutterPeriod;
10328 if (!((p->SynchronizeTimings || TotalNumberOfActiveOTG == 1) && SameTiming)) {
10329 *p->Z8StutterEfficiency = *p->Z8StutterEfficiencyNotIncludingVBlank;
10330 } else {
10331 *p->Z8StutterEfficiency = (1 - (*p->Z8NumberOfStutterBurstsPerFrame * p->SRExitZ8Time + l->StutterBurstTime * l->VActiveTimeCriticalSurface / *p->StutterPeriod) / l->FrameTimeCriticalSurface) * 100;
10332 }
10333 } else {
10334 *p->Z8StutterEfficiency = 0.;
10335 *p->Z8NumberOfStutterBurstsPerFrame = 0;
10336 }
10337
10338 #ifdef __DML_VBA_DEBUG__
10339 dml2_printf("DML::%s: TotalNumberOfActiveOTG = %u\n", __func__, TotalNumberOfActiveOTG);
10340 dml2_printf("DML::%s: SameTiming = %u\n", __func__, SameTiming);
10341 dml2_printf("DML::%s: SynchronizeTimings = %u\n", __func__, p->SynchronizeTimings);
10342 dml2_printf("DML::%s: LastZ8StutterPeriod = %f\n", __func__, LastZ8StutterPeriod);
10343 dml2_printf("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, p->Z8StutterEnterPlusExitWatermark);
10344 dml2_printf("DML::%s: StutterBurstTime = %f\n", __func__, l->StutterBurstTime);
10345 dml2_printf("DML::%s: StutterPeriod = %f\n", __func__, *p->StutterPeriod);
10346 dml2_printf("DML::%s: StutterEfficiency = %f\n", __func__, *p->StutterEfficiency);
10347 dml2_printf("DML::%s: Z8StutterEfficiency = %f\n", __func__, *p->Z8StutterEfficiency);
10348 dml2_printf("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *p->StutterEfficiencyNotIncludingVBlank);
10349 dml2_printf("DML::%s: Z8NumberOfStutterBurstsPerFrame = %u\n", __func__, *p->Z8NumberOfStutterBurstsPerFrame);
10350 #endif
10351
10352 *p->DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = !(!p->UnboundedRequestEnabled && (p->NumberOfActiveSurfaces == 1) && l->SinglePlaneCriticalSurface && l->SinglePipeCriticalSurface);
10353
10354 #ifdef __DML_VBA_DEBUG__
10355 dml2_printf("DML::%s: DETBufferSizeYCriticalSurface = %u\n", __func__, l->DETBufferSizeYCriticalSurface);
10356 dml2_printf("DML::%s: PixelChunkSizeInKByte = %u\n", __func__, p->PixelChunkSizeInKByte);
10357 dml2_printf("DML::%s: DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = %u\n", __func__, *p->DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE);
10358 #endif
10359 }
10360
dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex * in_out_params)10361 static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex *in_out_params)
10362 {
10363 const struct dml2_display_cfg *display_cfg = in_out_params->in_display_cfg;
10364 const struct dml2_mcg_min_clock_table *min_clk_table = in_out_params->min_clk_table;
10365 const struct core_display_cfg_support_info *cfg_support_info = in_out_params->cfg_support_info;
10366 struct dml2_core_internal_display_mode_lib *mode_lib = in_out_params->mode_lib;
10367 struct dml2_display_cfg_programming *programming = in_out_params->programming;
10368
10369 struct dml2_core_calcs_mode_programming_locals *s = &mode_lib->scratch.dml_core_mode_programming_locals;
10370 struct dml2_core_calcs_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params *CalculateWatermarks_params = &mode_lib->scratch.CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params;
10371 struct dml2_core_calcs_CalculateVMRowAndSwath_params *CalculateVMRowAndSwath_params = &mode_lib->scratch.CalculateVMRowAndSwath_params;
10372 struct dml2_core_calcs_CalculateSwathAndDETConfiguration_params *CalculateSwathAndDETConfiguration_params = &mode_lib->scratch.CalculateSwathAndDETConfiguration_params;
10373 struct dml2_core_calcs_CalculateStutterEfficiency_params *CalculateStutterEfficiency_params = &mode_lib->scratch.CalculateStutterEfficiency_params;
10374 struct dml2_core_calcs_CalculatePrefetchSchedule_params *CalculatePrefetchSchedule_params = &mode_lib->scratch.CalculatePrefetchSchedule_params;
10375 struct dml2_core_calcs_CheckGlobalPrefetchAdmissibility_params *CheckGlobalPrefetchAdmissibility_params = &mode_lib->scratch.CheckGlobalPrefetchAdmissibility_params;
10376 struct dml2_core_calcs_calculate_mcache_setting_params *calculate_mcache_setting_params = &mode_lib->scratch.calculate_mcache_setting_params;
10377 struct dml2_core_calcs_calculate_tdlut_setting_params *calculate_tdlut_setting_params = &mode_lib->scratch.calculate_tdlut_setting_params;
10378 struct dml2_core_shared_CalculateMetaAndPTETimes_params *CalculateMetaAndPTETimes_params = &mode_lib->scratch.CalculateMetaAndPTETimes_params;
10379 struct dml2_core_calcs_calculate_peak_bandwidth_required_params *calculate_peak_bandwidth_params = &mode_lib->scratch.calculate_peak_bandwidth_params;
10380 struct dml2_core_calcs_calculate_bytes_to_fetch_required_to_hide_latency_params *calculate_bytes_to_fetch_required_to_hide_latency_params = &mode_lib->scratch.calculate_bytes_to_fetch_required_to_hide_latency_params;
10381
10382 unsigned int k;
10383 bool must_support_iflip;
10384 const long min_return_uclk_cycles = 83;
10385 const long min_return_fclk_cycles = 75;
10386 const double max_fclk_mhz = min_clk_table->max_clocks_khz.fclk / 1000.0;
10387 double hard_minimum_dcfclk_mhz = (double)min_clk_table->dram_bw_table.entries[0].min_dcfclk_khz / 1000.0;
10388 double max_uclk_mhz = 0;
10389 double min_return_latency_in_DCFCLK_cycles = 0;
10390
10391 dml2_printf("DML::%s: --- START --- \n", __func__);
10392
10393 memset(&mode_lib->scratch, 0, sizeof(struct dml2_core_internal_scratch));
10394 memset(&mode_lib->mp, 0, sizeof(struct dml2_core_internal_mode_program));
10395
10396 s->num_active_planes = display_cfg->num_planes;
10397 get_stream_output_bpp(s->OutputBpp, display_cfg);
10398
10399 mode_lib->mp.num_active_pipes = dml_get_num_active_pipes(display_cfg->num_planes, cfg_support_info);
10400 dml_calc_pipe_plane_mapping(cfg_support_info, mode_lib->mp.pipe_plane);
10401
10402 mode_lib->mp.Dcfclk = programming->min_clocks.dcn4x.active.dcfclk_khz / 1000.0;
10403 mode_lib->mp.FabricClock = programming->min_clocks.dcn4x.active.fclk_khz / 1000.0;
10404 mode_lib->mp.dram_bw_mbps = uclk_khz_to_dram_bw_mbps(programming->min_clocks.dcn4x.active.uclk_khz, &mode_lib->soc.clk_table.dram_config);
10405 mode_lib->mp.uclk_freq_mhz = programming->min_clocks.dcn4x.active.uclk_khz / 1000.0;
10406 mode_lib->mp.GlobalDPPCLK = programming->min_clocks.dcn4x.dpprefclk_khz / 1000.0;
10407 s->SOCCLK = (double)programming->min_clocks.dcn4x.socclk_khz / 1000;
10408 mode_lib->mp.qos_param_index = get_qos_param_index(programming->min_clocks.dcn4x.active.uclk_khz, mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params);
10409 mode_lib->mp.active_min_uclk_dpm_index = get_active_min_uclk_dpm_index(programming->min_clocks.dcn4x.active.uclk_khz, &mode_lib->soc.clk_table);
10410
10411 for (k = 0; k < s->num_active_planes; ++k) {
10412 unsigned int stream_index = display_cfg->plane_descriptors[k].stream_index;
10413 dml2_assert(cfg_support_info->stream_support_info[stream_index].odms_used <= 4);
10414 dml2_assert(cfg_support_info->stream_support_info[stream_index].num_odm_output_segments == 4 ||
10415 cfg_support_info->stream_support_info[stream_index].num_odm_output_segments == 2 ||
10416 cfg_support_info->stream_support_info[stream_index].num_odm_output_segments == 1);
10417
10418 if (cfg_support_info->stream_support_info[stream_index].odms_used > 1)
10419 dml2_assert(cfg_support_info->stream_support_info[stream_index].num_odm_output_segments == 1);
10420
10421 switch (cfg_support_info->stream_support_info[stream_index].odms_used) {
10422 case (4):
10423 mode_lib->mp.ODMMode[k] = dml2_odm_mode_combine_4to1;
10424 break;
10425 case (3):
10426 mode_lib->mp.ODMMode[k] = dml2_odm_mode_combine_3to1;
10427 break;
10428 case (2):
10429 mode_lib->mp.ODMMode[k] = dml2_odm_mode_combine_2to1;
10430 break;
10431 default:
10432 if (cfg_support_info->stream_support_info[stream_index].num_odm_output_segments == 4)
10433 mode_lib->mp.ODMMode[k] = dml2_odm_mode_mso_1to4;
10434 else if (cfg_support_info->stream_support_info[stream_index].num_odm_output_segments == 2)
10435 mode_lib->mp.ODMMode[k] = dml2_odm_mode_mso_1to2;
10436 else
10437 mode_lib->mp.ODMMode[k] = dml2_odm_mode_bypass;
10438 break;
10439 }
10440 }
10441
10442 for (k = 0; k < s->num_active_planes; ++k) {
10443 mode_lib->mp.NoOfDPP[k] = cfg_support_info->plane_support_info[k].dpps_used;
10444 mode_lib->mp.Dppclk[k] = programming->plane_programming[k].min_clocks.dcn4x.dppclk_khz / 1000.0;
10445 dml2_assert(mode_lib->mp.Dppclk[k] > 0);
10446 }
10447
10448 for (k = 0; k < s->num_active_planes; ++k) {
10449 unsigned int stream_index = display_cfg->plane_descriptors[k].stream_index;
10450 mode_lib->mp.DSCCLK[k] = programming->stream_programming[stream_index].min_clocks.dcn4x.dscclk_khz / 1000.0;
10451 dml2_printf("DML::%s: k=%d stream_index=%d, mode_lib->mp.DSCCLK = %f\n", __func__, k, stream_index, mode_lib->mp.DSCCLK[k]);
10452 }
10453
10454 mode_lib->mp.Dispclk = programming->min_clocks.dcn4x.dispclk_khz / 1000.0;
10455 mode_lib->mp.DCFCLKDeepSleep = programming->min_clocks.dcn4x.deepsleep_dcfclk_khz / 1000.0;
10456
10457 dml2_assert(mode_lib->mp.Dcfclk > 0);
10458 dml2_assert(mode_lib->mp.FabricClock > 0);
10459 dml2_assert(mode_lib->mp.dram_bw_mbps > 0);
10460 dml2_assert(mode_lib->mp.uclk_freq_mhz > 0);
10461 dml2_assert(mode_lib->mp.GlobalDPPCLK > 0);
10462 dml2_assert(mode_lib->mp.Dispclk > 0);
10463 dml2_assert(mode_lib->mp.DCFCLKDeepSleep > 0);
10464 dml2_assert(s->SOCCLK > 0);
10465
10466 #ifdef __DML_VBA_DEBUG__
10467 dml2_printf("DML::%s: num_active_planes = %u\n", __func__, s->num_active_planes);
10468 dml2_printf("DML::%s: num_active_pipes = %u\n", __func__, mode_lib->mp.num_active_pipes);
10469 dml2_printf("DML::%s: Dcfclk = %f\n", __func__, mode_lib->mp.Dcfclk);
10470 dml2_printf("DML::%s: FabricClock = %f\n", __func__, mode_lib->mp.FabricClock);
10471 dml2_printf("DML::%s: dram_bw_mbps = %f\n", __func__, mode_lib->mp.dram_bw_mbps);
10472 dml2_printf("DML::%s: uclk_freq_mhz = %f\n", __func__, mode_lib->mp.uclk_freq_mhz);
10473 dml2_printf("DML::%s: Dispclk = %f\n", __func__, mode_lib->mp.Dispclk);
10474 for (k = 0; k < s->num_active_planes; ++k) {
10475 dml2_printf("DML::%s: Dppclk[%0d] = %f\n", __func__, k, mode_lib->mp.Dppclk[k]);
10476 }
10477 dml2_printf("DML::%s: GlobalDPPCLK = %f\n", __func__, mode_lib->mp.GlobalDPPCLK);
10478 dml2_printf("DML::%s: DCFCLKDeepSleep = %f\n", __func__, mode_lib->mp.DCFCLKDeepSleep);
10479 dml2_printf("DML::%s: SOCCLK = %f\n", __func__, s->SOCCLK);
10480 dml2_printf("DML::%s: min_clk_index = %0d\n", __func__, in_out_params->min_clk_index);
10481 dml2_printf("DML::%s: min_clk_table min_fclk_khz = %d\n", __func__, min_clk_table->dram_bw_table.entries[in_out_params->min_clk_index].min_fclk_khz);
10482 dml2_printf("DML::%s: min_clk_table uclk_mhz = %f\n", __func__, dram_bw_kbps_to_uclk_mhz(min_clk_table->dram_bw_table.entries[in_out_params->min_clk_index].pre_derate_dram_bw_kbps, &mode_lib->soc.clk_table.dram_config));
10483 for (k = 0; k < mode_lib->mp.num_active_pipes; ++k) {
10484 dml2_printf("DML::%s: pipe=%d is in plane=%d\n", __func__, k, mode_lib->mp.pipe_plane[k]);
10485 dml2_printf("DML::%s: Per-plane DPPPerSurface[%0d] = %d\n", __func__, k, mode_lib->mp.NoOfDPP[k]);
10486 }
10487
10488 for (k = 0; k < s->num_active_planes; k++)
10489 dml2_printf("DML::%s: plane_%d: reserved_vblank_time_ns = %u\n", __func__, k, display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns);
10490 #endif
10491
10492 CalculateMaxDETAndMinCompressedBufferSize(
10493 mode_lib->ip.config_return_buffer_size_in_kbytes,
10494 mode_lib->ip.config_return_buffer_segment_size_in_kbytes,
10495 mode_lib->ip.rob_buffer_size_kbytes,
10496 mode_lib->ip.max_num_dpp,
10497 display_cfg->overrides.hw.force_nom_det_size_kbytes.enable,
10498 display_cfg->overrides.hw.force_nom_det_size_kbytes.value,
10499 mode_lib->ip.dcn_mrq_present,
10500
10501 /* Output */
10502 &s->MaxTotalDETInKByte,
10503 &s->NomDETInKByte,
10504 &s->MinCompressedBufferSizeInKByte);
10505
10506
10507 PixelClockAdjustmentForProgressiveToInterlaceUnit(display_cfg, mode_lib->ip.ptoi_supported, s->PixelClockBackEnd);
10508
10509 for (k = 0; k < s->num_active_planes; ++k) {
10510 CalculateSinglePipeDPPCLKAndSCLThroughput(
10511 display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio,
10512 display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio,
10513 display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio,
10514 display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio,
10515 mode_lib->ip.max_dchub_pscl_bw_pix_per_clk,
10516 mode_lib->ip.max_pscl_lb_bw_pix_per_clk,
10517 ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000),
10518 display_cfg->plane_descriptors[k].pixel_format,
10519 display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_taps,
10520 display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps,
10521 display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps,
10522 display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_taps,
10523
10524 /* Output */
10525 &mode_lib->mp.PSCL_THROUGHPUT[k],
10526 &mode_lib->mp.PSCL_THROUGHPUT_CHROMA[k],
10527 &mode_lib->mp.DPPCLKUsingSingleDPP[k]);
10528 }
10529
10530 for (k = 0; k < s->num_active_planes; ++k) {
10531 CalculateBytePerPixelAndBlockSizes(
10532 display_cfg->plane_descriptors[k].pixel_format,
10533 display_cfg->plane_descriptors[k].surface.tiling,
10534 display_cfg->plane_descriptors[k].surface.plane0.pitch,
10535 display_cfg->plane_descriptors[k].surface.plane1.pitch,
10536
10537 // Output
10538 &mode_lib->mp.BytePerPixelY[k],
10539 &mode_lib->mp.BytePerPixelC[k],
10540 &mode_lib->mp.BytePerPixelInDETY[k],
10541 &mode_lib->mp.BytePerPixelInDETC[k],
10542 &mode_lib->mp.Read256BlockHeightY[k],
10543 &mode_lib->mp.Read256BlockHeightC[k],
10544 &mode_lib->mp.Read256BlockWidthY[k],
10545 &mode_lib->mp.Read256BlockWidthC[k],
10546 &mode_lib->mp.MacroTileHeightY[k],
10547 &mode_lib->mp.MacroTileHeightC[k],
10548 &mode_lib->mp.MacroTileWidthY[k],
10549 &mode_lib->mp.MacroTileWidthC[k],
10550 &mode_lib->mp.surf_linear128_l[k],
10551 &mode_lib->mp.surf_linear128_c[k]);
10552 }
10553
10554 CalculateSwathWidth(
10555 display_cfg,
10556 false, // ForceSingleDPP
10557 s->num_active_planes,
10558 mode_lib->mp.ODMMode,
10559 mode_lib->mp.BytePerPixelY,
10560 mode_lib->mp.BytePerPixelC,
10561 mode_lib->mp.Read256BlockHeightY,
10562 mode_lib->mp.Read256BlockHeightC,
10563 mode_lib->mp.Read256BlockWidthY,
10564 mode_lib->mp.Read256BlockWidthC,
10565 mode_lib->mp.surf_linear128_l,
10566 mode_lib->mp.surf_linear128_c,
10567 mode_lib->mp.NoOfDPP,
10568
10569 /* Output */
10570 mode_lib->mp.req_per_swath_ub_l,
10571 mode_lib->mp.req_per_swath_ub_c,
10572 mode_lib->mp.SwathWidthSingleDPPY,
10573 mode_lib->mp.SwathWidthSingleDPPC,
10574 mode_lib->mp.SwathWidthY,
10575 mode_lib->mp.SwathWidthC,
10576 s->dummy_integer_array[0], // unsigned int MaximumSwathHeightY[]
10577 s->dummy_integer_array[1], // unsigned int MaximumSwathHeightC[]
10578 mode_lib->mp.swath_width_luma_ub,
10579 mode_lib->mp.swath_width_chroma_ub);
10580
10581 for (k = 0; k < s->num_active_planes; ++k) {
10582 mode_lib->mp.cursor_bw[k] = display_cfg->plane_descriptors[k].cursor.num_cursors * display_cfg->plane_descriptors[k].cursor.cursor_width * display_cfg->plane_descriptors[k].cursor.cursor_bpp / 8.0 /
10583 ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000));
10584 mode_lib->mp.vactive_sw_bw_l[k] = mode_lib->mp.SwathWidthSingleDPPY[k] * mode_lib->mp.BytePerPixelY[k] / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio;
10585 mode_lib->mp.vactive_sw_bw_c[k] = mode_lib->mp.SwathWidthSingleDPPC[k] * mode_lib->mp.BytePerPixelC[k] / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio;
10586 dml2_printf("DML::%s: vactive_sw_bw_l[%i] = %fBps\n", __func__, k, mode_lib->mp.vactive_sw_bw_l[k]);
10587 dml2_printf("DML::%s: vactive_sw_bw_c[%i] = %fBps\n", __func__, k, mode_lib->mp.vactive_sw_bw_c[k]);
10588 }
10589
10590 CalculateSwathAndDETConfiguration_params->display_cfg = display_cfg;
10591 CalculateSwathAndDETConfiguration_params->ConfigReturnBufferSizeInKByte = mode_lib->ip.config_return_buffer_size_in_kbytes;
10592 CalculateSwathAndDETConfiguration_params->MaxTotalDETInKByte = s->MaxTotalDETInKByte;
10593 CalculateSwathAndDETConfiguration_params->MinCompressedBufferSizeInKByte = s->MinCompressedBufferSizeInKByte;
10594 CalculateSwathAndDETConfiguration_params->rob_buffer_size_kbytes = mode_lib->ip.rob_buffer_size_kbytes;
10595 CalculateSwathAndDETConfiguration_params->pixel_chunk_size_kbytes = mode_lib->ip.pixel_chunk_size_kbytes;
10596 CalculateSwathAndDETConfiguration_params->rob_buffer_size_kbytes = mode_lib->ip.rob_buffer_size_kbytes;
10597 CalculateSwathAndDETConfiguration_params->pixel_chunk_size_kbytes = mode_lib->ip.pixel_chunk_size_kbytes;
10598 CalculateSwathAndDETConfiguration_params->ForceSingleDPP = false;
10599 CalculateSwathAndDETConfiguration_params->NumberOfActiveSurfaces = s->num_active_planes;
10600 CalculateSwathAndDETConfiguration_params->nomDETInKByte = s->NomDETInKByte;
10601 CalculateSwathAndDETConfiguration_params->ConfigReturnBufferSegmentSizeInkByte = mode_lib->ip.config_return_buffer_segment_size_in_kbytes;
10602 CalculateSwathAndDETConfiguration_params->CompressedBufferSegmentSizeInkByte = mode_lib->ip.compressed_buffer_segment_size_in_kbytes;
10603 CalculateSwathAndDETConfiguration_params->ReadBandwidthLuma = mode_lib->mp.vactive_sw_bw_l;
10604 CalculateSwathAndDETConfiguration_params->ReadBandwidthChroma = mode_lib->mp.vactive_sw_bw_c;
10605 CalculateSwathAndDETConfiguration_params->MaximumSwathWidthLuma = s->dummy_single_array[0];
10606 CalculateSwathAndDETConfiguration_params->MaximumSwathWidthChroma = s->dummy_single_array[1];
10607 CalculateSwathAndDETConfiguration_params->Read256BytesBlockHeightY = mode_lib->mp.Read256BlockHeightY;
10608 CalculateSwathAndDETConfiguration_params->Read256BytesBlockHeightC = mode_lib->mp.Read256BlockHeightC;
10609 CalculateSwathAndDETConfiguration_params->Read256BytesBlockWidthY = mode_lib->mp.Read256BlockWidthY;
10610 CalculateSwathAndDETConfiguration_params->Read256BytesBlockWidthC = mode_lib->mp.Read256BlockWidthC;
10611 CalculateSwathAndDETConfiguration_params->surf_linear128_l = mode_lib->mp.surf_linear128_l;
10612 CalculateSwathAndDETConfiguration_params->surf_linear128_c = mode_lib->mp.surf_linear128_c;
10613 CalculateSwathAndDETConfiguration_params->ODMMode = mode_lib->mp.ODMMode;
10614 CalculateSwathAndDETConfiguration_params->DPPPerSurface = mode_lib->mp.NoOfDPP;
10615 CalculateSwathAndDETConfiguration_params->BytePerPixY = mode_lib->mp.BytePerPixelY;
10616 CalculateSwathAndDETConfiguration_params->BytePerPixC = mode_lib->mp.BytePerPixelC;
10617 CalculateSwathAndDETConfiguration_params->BytePerPixDETY = mode_lib->mp.BytePerPixelInDETY;
10618 CalculateSwathAndDETConfiguration_params->BytePerPixDETC = mode_lib->mp.BytePerPixelInDETC;
10619 CalculateSwathAndDETConfiguration_params->mrq_present = mode_lib->ip.dcn_mrq_present;
10620
10621 // output
10622 CalculateSwathAndDETConfiguration_params->req_per_swath_ub_l = mode_lib->mp.req_per_swath_ub_l;
10623 CalculateSwathAndDETConfiguration_params->req_per_swath_ub_c = mode_lib->mp.req_per_swath_ub_c;
10624 CalculateSwathAndDETConfiguration_params->swath_width_luma_ub = s->dummy_long_array[0];
10625 CalculateSwathAndDETConfiguration_params->swath_width_chroma_ub = s->dummy_long_array[1];
10626 CalculateSwathAndDETConfiguration_params->SwathWidth = s->dummy_long_array[2];
10627 CalculateSwathAndDETConfiguration_params->SwathWidthChroma = s->dummy_long_array[3];
10628 CalculateSwathAndDETConfiguration_params->SwathHeightY = mode_lib->mp.SwathHeightY;
10629 CalculateSwathAndDETConfiguration_params->SwathHeightC = mode_lib->mp.SwathHeightC;
10630 CalculateSwathAndDETConfiguration_params->request_size_bytes_luma = mode_lib->mp.request_size_bytes_luma;
10631 CalculateSwathAndDETConfiguration_params->request_size_bytes_chroma = mode_lib->mp.request_size_bytes_chroma;
10632 CalculateSwathAndDETConfiguration_params->DETBufferSizeInKByte = mode_lib->mp.DETBufferSizeInKByte;
10633 CalculateSwathAndDETConfiguration_params->DETBufferSizeY = mode_lib->mp.DETBufferSizeY;
10634 CalculateSwathAndDETConfiguration_params->DETBufferSizeC = mode_lib->mp.DETBufferSizeC;
10635 CalculateSwathAndDETConfiguration_params->full_swath_bytes_l = s->full_swath_bytes_l;
10636 CalculateSwathAndDETConfiguration_params->full_swath_bytes_c = s->full_swath_bytes_c;
10637 CalculateSwathAndDETConfiguration_params->UnboundedRequestEnabled = &mode_lib->mp.UnboundedRequestEnabled;
10638 CalculateSwathAndDETConfiguration_params->compbuf_reserved_space_64b = &mode_lib->mp.compbuf_reserved_space_64b;
10639 CalculateSwathAndDETConfiguration_params->hw_debug5 = &mode_lib->mp.hw_debug5;
10640 CalculateSwathAndDETConfiguration_params->CompressedBufferSizeInkByte = &mode_lib->mp.CompressedBufferSizeInkByte;
10641 CalculateSwathAndDETConfiguration_params->ViewportSizeSupportPerSurface = &s->dummy_boolean_array[0][0];
10642 CalculateSwathAndDETConfiguration_params->ViewportSizeSupport = &s->dummy_boolean[0];
10643
10644 // Calculate DET size, swath height here.
10645 CalculateSwathAndDETConfiguration(&mode_lib->scratch, CalculateSwathAndDETConfiguration_params);
10646
10647 // DSC Delay
10648 for (k = 0; k < s->num_active_planes; ++k) {
10649 mode_lib->mp.DSCDelay[k] = DSCDelayRequirement(cfg_support_info->stream_support_info[display_cfg->plane_descriptors[k].stream_index].dsc_enable,
10650 mode_lib->mp.ODMMode[k],
10651 mode_lib->ip.maximum_dsc_bits_per_component,
10652 s->OutputBpp[k],
10653 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active,
10654 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total,
10655 cfg_support_info->stream_support_info[display_cfg->plane_descriptors[k].stream_index].num_dsc_slices,
10656 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format,
10657 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder,
10658 ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000),
10659 s->PixelClockBackEnd[k]);
10660 }
10661
10662 // Prefetch
10663 if (mode_lib->soc.mall_allocated_for_dcn_mbytes == 0) {
10664 for (k = 0; k < s->num_active_planes; ++k)
10665 mode_lib->mp.SurfaceSizeInTheMALL[k] = 0;
10666 } else {
10667 CalculateSurfaceSizeInMall(
10668 display_cfg,
10669 s->num_active_planes,
10670 mode_lib->soc.mall_allocated_for_dcn_mbytes,
10671 mode_lib->mp.BytePerPixelY,
10672 mode_lib->mp.BytePerPixelC,
10673 mode_lib->mp.Read256BlockWidthY,
10674 mode_lib->mp.Read256BlockWidthC,
10675 mode_lib->mp.Read256BlockHeightY,
10676 mode_lib->mp.Read256BlockHeightC,
10677 mode_lib->mp.MacroTileWidthY,
10678 mode_lib->mp.MacroTileWidthC,
10679 mode_lib->mp.MacroTileHeightY,
10680 mode_lib->mp.MacroTileHeightC,
10681
10682 /* Output */
10683 mode_lib->mp.SurfaceSizeInTheMALL,
10684 &s->dummy_boolean[0]); /* bool *ExceededMALLSize */
10685 }
10686
10687 for (k = 0; k < s->num_active_planes; ++k) {
10688 s->SurfaceParameters[k].PixelClock = ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
10689 s->SurfaceParameters[k].DPPPerSurface = mode_lib->mp.NoOfDPP[k];
10690 s->SurfaceParameters[k].RotationAngle = display_cfg->plane_descriptors[k].composition.rotation_angle;
10691 s->SurfaceParameters[k].ViewportHeight = display_cfg->plane_descriptors[k].composition.viewport.plane0.height;
10692 s->SurfaceParameters[k].ViewportHeightC = display_cfg->plane_descriptors[k].composition.viewport.plane1.height;
10693 s->SurfaceParameters[k].BlockWidth256BytesY = mode_lib->mp.Read256BlockWidthY[k];
10694 s->SurfaceParameters[k].BlockHeight256BytesY = mode_lib->mp.Read256BlockHeightY[k];
10695 s->SurfaceParameters[k].BlockWidth256BytesC = mode_lib->mp.Read256BlockWidthC[k];
10696 s->SurfaceParameters[k].BlockHeight256BytesC = mode_lib->mp.Read256BlockHeightC[k];
10697 s->SurfaceParameters[k].BlockWidthY = mode_lib->mp.MacroTileWidthY[k];
10698 s->SurfaceParameters[k].BlockHeightY = mode_lib->mp.MacroTileHeightY[k];
10699 s->SurfaceParameters[k].BlockWidthC = mode_lib->mp.MacroTileWidthC[k];
10700 s->SurfaceParameters[k].BlockHeightC = mode_lib->mp.MacroTileHeightC[k];
10701 s->SurfaceParameters[k].InterlaceEnable = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.interlaced;
10702 s->SurfaceParameters[k].HTotal = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total;
10703 s->SurfaceParameters[k].DCCEnable = display_cfg->plane_descriptors[k].surface.dcc.enable;
10704 s->SurfaceParameters[k].SourcePixelFormat = display_cfg->plane_descriptors[k].pixel_format;
10705 s->SurfaceParameters[k].SurfaceTiling = display_cfg->plane_descriptors[k].surface.tiling;
10706 s->SurfaceParameters[k].BytePerPixelY = mode_lib->mp.BytePerPixelY[k];
10707 s->SurfaceParameters[k].BytePerPixelC = mode_lib->mp.BytePerPixelC[k];
10708 s->SurfaceParameters[k].ProgressiveToInterlaceUnitInOPP = mode_lib->ip.ptoi_supported;
10709 s->SurfaceParameters[k].VRatio = display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio;
10710 s->SurfaceParameters[k].VRatioChroma = display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio;
10711 s->SurfaceParameters[k].VTaps = display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps;
10712 s->SurfaceParameters[k].VTapsChroma = display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps;
10713 s->SurfaceParameters[k].PitchY = display_cfg->plane_descriptors[k].surface.plane0.pitch;
10714 s->SurfaceParameters[k].PitchC = display_cfg->plane_descriptors[k].surface.plane1.pitch;
10715 s->SurfaceParameters[k].ViewportStationary = display_cfg->plane_descriptors[k].composition.viewport.stationary;
10716 s->SurfaceParameters[k].ViewportXStart = display_cfg->plane_descriptors[k].composition.viewport.plane0.x_start;
10717 s->SurfaceParameters[k].ViewportYStart = display_cfg->plane_descriptors[k].composition.viewport.plane0.y_start;
10718 s->SurfaceParameters[k].ViewportXStartC = display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start;
10719 s->SurfaceParameters[k].ViewportYStartC = display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start;
10720 s->SurfaceParameters[k].FORCE_ONE_ROW_FOR_FRAME = display_cfg->plane_descriptors[k].overrides.hw.force_one_row_for_frame;
10721 s->SurfaceParameters[k].SwathHeightY = mode_lib->mp.SwathHeightY[k];
10722 s->SurfaceParameters[k].SwathHeightC = mode_lib->mp.SwathHeightC[k];
10723 s->SurfaceParameters[k].DCCMetaPitchY = display_cfg->plane_descriptors[k].surface.dcc.plane0.pitch;
10724 s->SurfaceParameters[k].DCCMetaPitchC = display_cfg->plane_descriptors[k].surface.dcc.plane1.pitch;
10725 }
10726
10727 CalculateVMRowAndSwath_params->display_cfg = display_cfg;
10728 CalculateVMRowAndSwath_params->NumberOfActiveSurfaces = s->num_active_planes;
10729 CalculateVMRowAndSwath_params->myPipe = s->SurfaceParameters;
10730 CalculateVMRowAndSwath_params->SurfaceSizeInMALL = mode_lib->mp.SurfaceSizeInTheMALL;
10731 CalculateVMRowAndSwath_params->PTEBufferSizeInRequestsLuma = mode_lib->ip.dpte_buffer_size_in_pte_reqs_luma;
10732 CalculateVMRowAndSwath_params->PTEBufferSizeInRequestsChroma = mode_lib->ip.dpte_buffer_size_in_pte_reqs_chroma;
10733 CalculateVMRowAndSwath_params->MALLAllocatedForDCN = mode_lib->soc.mall_allocated_for_dcn_mbytes;
10734 CalculateVMRowAndSwath_params->SwathWidthY = mode_lib->mp.SwathWidthY;
10735 CalculateVMRowAndSwath_params->SwathWidthC = mode_lib->mp.SwathWidthC;
10736 CalculateVMRowAndSwath_params->HostVMMinPageSize = mode_lib->soc.hostvm_min_page_size_kbytes;
10737 CalculateVMRowAndSwath_params->DCCMetaBufferSizeBytes = mode_lib->ip.dcc_meta_buffer_size_bytes;
10738 CalculateVMRowAndSwath_params->mrq_present = mode_lib->ip.dcn_mrq_present;
10739
10740 // output
10741 CalculateVMRowAndSwath_params->PTEBufferSizeNotExceeded = s->dummy_boolean_array[0];
10742 CalculateVMRowAndSwath_params->dpte_row_width_luma_ub = mode_lib->mp.dpte_row_width_luma_ub;
10743 CalculateVMRowAndSwath_params->dpte_row_width_chroma_ub = mode_lib->mp.dpte_row_width_chroma_ub;
10744 CalculateVMRowAndSwath_params->dpte_row_height_luma = mode_lib->mp.dpte_row_height;
10745 CalculateVMRowAndSwath_params->dpte_row_height_chroma = mode_lib->mp.dpte_row_height_chroma;
10746 CalculateVMRowAndSwath_params->dpte_row_height_linear_luma = mode_lib->mp.dpte_row_height_linear;
10747 CalculateVMRowAndSwath_params->dpte_row_height_linear_chroma = mode_lib->mp.dpte_row_height_linear_chroma;
10748 CalculateVMRowAndSwath_params->vm_group_bytes = mode_lib->mp.vm_group_bytes;
10749 CalculateVMRowAndSwath_params->dpte_group_bytes = mode_lib->mp.dpte_group_bytes;
10750 CalculateVMRowAndSwath_params->PixelPTEReqWidthY = mode_lib->mp.PixelPTEReqWidthY;
10751 CalculateVMRowAndSwath_params->PixelPTEReqHeightY = mode_lib->mp.PixelPTEReqHeightY;
10752 CalculateVMRowAndSwath_params->PTERequestSizeY = mode_lib->mp.PTERequestSizeY;
10753 CalculateVMRowAndSwath_params->PixelPTEReqWidthC = mode_lib->mp.PixelPTEReqWidthC;
10754 CalculateVMRowAndSwath_params->PixelPTEReqHeightC = mode_lib->mp.PixelPTEReqHeightC;
10755 CalculateVMRowAndSwath_params->PTERequestSizeC = mode_lib->mp.PTERequestSizeC;
10756 CalculateVMRowAndSwath_params->vmpg_width_y = s->vmpg_width_y;
10757 CalculateVMRowAndSwath_params->vmpg_height_y = s->vmpg_height_y;
10758 CalculateVMRowAndSwath_params->vmpg_width_c = s->vmpg_width_c;
10759 CalculateVMRowAndSwath_params->vmpg_height_c = s->vmpg_height_c;
10760 CalculateVMRowAndSwath_params->dpde0_bytes_per_frame_ub_l = mode_lib->mp.dpde0_bytes_per_frame_ub_l;
10761 CalculateVMRowAndSwath_params->dpde0_bytes_per_frame_ub_c = mode_lib->mp.dpde0_bytes_per_frame_ub_c;
10762 CalculateVMRowAndSwath_params->PrefetchSourceLinesY = mode_lib->mp.PrefetchSourceLinesY;
10763 CalculateVMRowAndSwath_params->PrefetchSourceLinesC = mode_lib->mp.PrefetchSourceLinesC;
10764 CalculateVMRowAndSwath_params->VInitPreFillY = mode_lib->mp.VInitPreFillY;
10765 CalculateVMRowAndSwath_params->VInitPreFillC = mode_lib->mp.VInitPreFillC;
10766 CalculateVMRowAndSwath_params->MaxNumSwathY = mode_lib->mp.MaxNumSwathY;
10767 CalculateVMRowAndSwath_params->MaxNumSwathC = mode_lib->mp.MaxNumSwathC;
10768 CalculateVMRowAndSwath_params->dpte_row_bw = mode_lib->mp.dpte_row_bw;
10769 CalculateVMRowAndSwath_params->PixelPTEBytesPerRow = mode_lib->mp.PixelPTEBytesPerRow;
10770 CalculateVMRowAndSwath_params->dpte_row_bytes_per_row_l = s->dpte_row_bytes_per_row_l;
10771 CalculateVMRowAndSwath_params->dpte_row_bytes_per_row_c = s->dpte_row_bytes_per_row_c;
10772 CalculateVMRowAndSwath_params->vm_bytes = mode_lib->mp.vm_bytes;
10773 CalculateVMRowAndSwath_params->use_one_row_for_frame = mode_lib->mp.use_one_row_for_frame;
10774 CalculateVMRowAndSwath_params->use_one_row_for_frame_flip = mode_lib->mp.use_one_row_for_frame_flip;
10775 CalculateVMRowAndSwath_params->is_using_mall_for_ss = mode_lib->mp.is_using_mall_for_ss;
10776 CalculateVMRowAndSwath_params->PTE_BUFFER_MODE = mode_lib->mp.PTE_BUFFER_MODE;
10777 CalculateVMRowAndSwath_params->BIGK_FRAGMENT_SIZE = mode_lib->mp.BIGK_FRAGMENT_SIZE;
10778 CalculateVMRowAndSwath_params->DCCMetaBufferSizeNotExceeded = s->dummy_boolean_array[1];
10779 CalculateVMRowAndSwath_params->meta_row_bw = mode_lib->mp.meta_row_bw;
10780 CalculateVMRowAndSwath_params->meta_row_bytes = mode_lib->mp.meta_row_bytes;
10781 CalculateVMRowAndSwath_params->meta_row_bytes_per_row_ub_l = s->meta_row_bytes_per_row_ub_l;
10782 CalculateVMRowAndSwath_params->meta_row_bytes_per_row_ub_c = s->meta_row_bytes_per_row_ub_c;
10783 CalculateVMRowAndSwath_params->meta_req_width_luma = mode_lib->mp.meta_req_width;
10784 CalculateVMRowAndSwath_params->meta_req_height_luma = mode_lib->mp.meta_req_height;
10785 CalculateVMRowAndSwath_params->meta_row_width_luma = mode_lib->mp.meta_row_width;
10786 CalculateVMRowAndSwath_params->meta_row_height_luma = mode_lib->mp.meta_row_height;
10787 CalculateVMRowAndSwath_params->meta_pte_bytes_per_frame_ub_l = mode_lib->mp.meta_pte_bytes_per_frame_ub_l;
10788 CalculateVMRowAndSwath_params->meta_req_width_chroma = mode_lib->mp.meta_req_width_chroma;
10789 CalculateVMRowAndSwath_params->meta_row_height_chroma = mode_lib->mp.meta_row_height_chroma;
10790 CalculateVMRowAndSwath_params->meta_row_width_chroma = mode_lib->mp.meta_row_width_chroma;
10791 CalculateVMRowAndSwath_params->meta_req_height_chroma = mode_lib->mp.meta_req_height_chroma;
10792 CalculateVMRowAndSwath_params->meta_pte_bytes_per_frame_ub_c = mode_lib->mp.meta_pte_bytes_per_frame_ub_c;
10793
10794 CalculateVMRowAndSwath(&mode_lib->scratch, CalculateVMRowAndSwath_params);
10795
10796 memset(calculate_mcache_setting_params, 0, sizeof(struct dml2_core_calcs_calculate_mcache_setting_params));
10797 if (mode_lib->soc.mall_allocated_for_dcn_mbytes == 0 || mode_lib->ip.dcn_mrq_present) {
10798 for (k = 0; k < s->num_active_planes; k++) {
10799 mode_lib->mp.mall_prefetch_sdp_overhead_factor[k] = 1.0;
10800 mode_lib->mp.mall_prefetch_dram_overhead_factor[k] = 1.0;
10801 mode_lib->mp.dcc_dram_bw_nom_overhead_factor_p0[k] = 1.0;
10802 mode_lib->mp.dcc_dram_bw_pref_overhead_factor_p0[k] = 1.0;
10803 mode_lib->mp.dcc_dram_bw_nom_overhead_factor_p1[k] = 1.0;
10804 mode_lib->mp.dcc_dram_bw_pref_overhead_factor_p1[k] = 1.0;
10805 }
10806 } else {
10807 for (k = 0; k < s->num_active_planes; k++) {
10808 calculate_mcache_setting_params->dcc_enable = display_cfg->plane_descriptors[k].surface.dcc.enable;
10809 calculate_mcache_setting_params->num_chans = mode_lib->soc.clk_table.dram_config.channel_count;
10810 calculate_mcache_setting_params->mem_word_bytes = mode_lib->soc.mem_word_bytes;
10811 calculate_mcache_setting_params->mcache_size_bytes = mode_lib->soc.mcache_size_bytes;
10812 calculate_mcache_setting_params->mcache_line_size_bytes = mode_lib->soc.mcache_line_size_bytes;
10813 calculate_mcache_setting_params->gpuvm_enable = display_cfg->gpuvm_enable;
10814 calculate_mcache_setting_params->gpuvm_page_size_kbytes = display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes;
10815
10816 calculate_mcache_setting_params->source_format = display_cfg->plane_descriptors[k].pixel_format;
10817 calculate_mcache_setting_params->surf_vert = dml_is_vertical_rotation(display_cfg->plane_descriptors[k].composition.rotation_angle);
10818 calculate_mcache_setting_params->vp_stationary = display_cfg->plane_descriptors[k].composition.viewport.stationary;
10819 calculate_mcache_setting_params->tiling_mode = display_cfg->plane_descriptors[k].surface.tiling;
10820 calculate_mcache_setting_params->imall_enable = mode_lib->ip.imall_supported && display_cfg->plane_descriptors[k].overrides.legacy_svp_config == dml2_svp_mode_override_imall;
10821
10822 calculate_mcache_setting_params->vp_start_x_l = display_cfg->plane_descriptors[k].composition.viewport.plane0.x_start;
10823 calculate_mcache_setting_params->vp_start_y_l = display_cfg->plane_descriptors[k].composition.viewport.plane0.y_start;
10824 calculate_mcache_setting_params->full_vp_width_l = display_cfg->plane_descriptors[k].composition.viewport.plane0.width;
10825 calculate_mcache_setting_params->full_vp_height_l = display_cfg->plane_descriptors[k].composition.viewport.plane0.height;
10826 calculate_mcache_setting_params->blk_width_l = mode_lib->mp.MacroTileWidthY[k];
10827 calculate_mcache_setting_params->blk_height_l = mode_lib->mp.MacroTileHeightY[k];
10828 calculate_mcache_setting_params->vmpg_width_l = s->vmpg_width_y[k];
10829 calculate_mcache_setting_params->vmpg_height_l = s->vmpg_height_y[k];
10830 calculate_mcache_setting_params->full_swath_bytes_l = s->full_swath_bytes_l[k];
10831 calculate_mcache_setting_params->bytes_per_pixel_l = mode_lib->mp.BytePerPixelY[k];
10832
10833 calculate_mcache_setting_params->vp_start_x_c = display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start;
10834 calculate_mcache_setting_params->vp_start_y_c = display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start;
10835 calculate_mcache_setting_params->full_vp_width_c = display_cfg->plane_descriptors[k].composition.viewport.plane1.width;
10836 calculate_mcache_setting_params->full_vp_height_c = display_cfg->plane_descriptors[k].composition.viewport.plane1.height;
10837 calculate_mcache_setting_params->blk_width_c = mode_lib->mp.MacroTileWidthC[k];
10838 calculate_mcache_setting_params->blk_height_c = mode_lib->mp.MacroTileHeightC[k];
10839 calculate_mcache_setting_params->vmpg_width_c = s->vmpg_width_c[k];
10840 calculate_mcache_setting_params->vmpg_height_c = s->vmpg_height_c[k];
10841 calculate_mcache_setting_params->full_swath_bytes_c = s->full_swath_bytes_c[k];
10842 calculate_mcache_setting_params->bytes_per_pixel_c = mode_lib->mp.BytePerPixelC[k];
10843
10844 // output
10845 calculate_mcache_setting_params->dcc_dram_bw_nom_overhead_factor_l = &mode_lib->mp.dcc_dram_bw_nom_overhead_factor_p0[k];
10846 calculate_mcache_setting_params->dcc_dram_bw_pref_overhead_factor_l = &mode_lib->mp.dcc_dram_bw_pref_overhead_factor_p0[k];
10847 calculate_mcache_setting_params->dcc_dram_bw_nom_overhead_factor_c = &mode_lib->mp.dcc_dram_bw_nom_overhead_factor_p1[k];
10848 calculate_mcache_setting_params->dcc_dram_bw_pref_overhead_factor_c = &mode_lib->mp.dcc_dram_bw_pref_overhead_factor_p1[k];
10849
10850 calculate_mcache_setting_params->num_mcaches_l = &mode_lib->mp.num_mcaches_l[k];
10851 calculate_mcache_setting_params->mcache_row_bytes_l = &mode_lib->mp.mcache_row_bytes_l[k];
10852 calculate_mcache_setting_params->mcache_offsets_l = mode_lib->mp.mcache_offsets_l[k];
10853 calculate_mcache_setting_params->mcache_shift_granularity_l = &mode_lib->mp.mcache_shift_granularity_l[k];
10854
10855 calculate_mcache_setting_params->num_mcaches_c = &mode_lib->mp.num_mcaches_c[k];
10856 calculate_mcache_setting_params->mcache_row_bytes_c = &mode_lib->mp.mcache_row_bytes_c[k];
10857 calculate_mcache_setting_params->mcache_offsets_c = mode_lib->mp.mcache_offsets_c[k];
10858 calculate_mcache_setting_params->mcache_shift_granularity_c = &mode_lib->mp.mcache_shift_granularity_c[k];
10859
10860 calculate_mcache_setting_params->mall_comb_mcache_l = &mode_lib->mp.mall_comb_mcache_l[k];
10861 calculate_mcache_setting_params->mall_comb_mcache_c = &mode_lib->mp.mall_comb_mcache_c[k];
10862 calculate_mcache_setting_params->lc_comb_mcache = &mode_lib->mp.lc_comb_mcache[k];
10863 calculate_mcache_setting(&mode_lib->scratch, calculate_mcache_setting_params);
10864 }
10865
10866 calculate_mall_bw_overhead_factor(
10867 mode_lib->mp.mall_prefetch_sdp_overhead_factor,
10868 mode_lib->mp.mall_prefetch_dram_overhead_factor,
10869
10870 // input
10871 display_cfg,
10872 s->num_active_planes);
10873 }
10874
10875 // Calculate all the bandwidth availabe
10876 calculate_bandwidth_available(
10877 mode_lib->mp.avg_bandwidth_available_min,
10878 mode_lib->mp.avg_bandwidth_available,
10879 mode_lib->mp.urg_bandwidth_available_min,
10880 mode_lib->mp.urg_bandwidth_available,
10881 mode_lib->mp.urg_bandwidth_available_vm_only,
10882 mode_lib->mp.urg_bandwidth_available_pixel_and_vm,
10883
10884 &mode_lib->soc,
10885 display_cfg->hostvm_enable,
10886 mode_lib->mp.Dcfclk,
10887 mode_lib->mp.FabricClock,
10888 mode_lib->mp.dram_bw_mbps);
10889
10890
10891 calculate_hostvm_inefficiency_factor(
10892 &s->HostVMInefficiencyFactor,
10893 &s->HostVMInefficiencyFactorPrefetch,
10894
10895 display_cfg->gpuvm_enable,
10896 display_cfg->hostvm_enable,
10897 mode_lib->ip.remote_iommu_outstanding_translations,
10898 mode_lib->soc.max_outstanding_reqs,
10899 mode_lib->mp.urg_bandwidth_available_pixel_and_vm[dml2_core_internal_soc_state_sys_active],
10900 mode_lib->mp.urg_bandwidth_available_vm_only[dml2_core_internal_soc_state_sys_active]);
10901
10902 s->TotalDCCActiveDPP = 0;
10903 s->TotalActiveDPP = 0;
10904 for (k = 0; k < s->num_active_planes; ++k) {
10905 s->TotalActiveDPP = s->TotalActiveDPP + mode_lib->mp.NoOfDPP[k];
10906 if (display_cfg->plane_descriptors[k].surface.dcc.enable)
10907 s->TotalDCCActiveDPP = s->TotalDCCActiveDPP + mode_lib->mp.NoOfDPP[k];
10908 }
10909 // Calculate tdlut schedule related terms
10910 for (k = 0; k <= s->num_active_planes - 1; k++) {
10911 calculate_tdlut_setting_params->dispclk_mhz = mode_lib->mp.Dispclk;
10912 calculate_tdlut_setting_params->setup_for_tdlut = display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut;
10913 calculate_tdlut_setting_params->tdlut_width_mode = display_cfg->plane_descriptors[k].tdlut.tdlut_width_mode;
10914 calculate_tdlut_setting_params->tdlut_addressing_mode = display_cfg->plane_descriptors[k].tdlut.tdlut_addressing_mode;
10915 calculate_tdlut_setting_params->cursor_buffer_size = mode_lib->ip.cursor_buffer_size;
10916 calculate_tdlut_setting_params->gpuvm_enable = display_cfg->gpuvm_enable;
10917 calculate_tdlut_setting_params->gpuvm_page_size_kbytes = display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes;
10918
10919 // output
10920 calculate_tdlut_setting_params->tdlut_pte_bytes_per_frame = &s->tdlut_pte_bytes_per_frame[k];
10921 calculate_tdlut_setting_params->tdlut_bytes_per_frame = &s->tdlut_bytes_per_frame[k];
10922 calculate_tdlut_setting_params->tdlut_groups_per_2row_ub = &s->tdlut_groups_per_2row_ub[k];
10923 calculate_tdlut_setting_params->tdlut_opt_time = &s->tdlut_opt_time[k];
10924 calculate_tdlut_setting_params->tdlut_drain_time = &s->tdlut_drain_time[k];
10925 calculate_tdlut_setting_params->tdlut_bytes_to_deliver = &s->tdlut_bytes_to_deliver[k];
10926 calculate_tdlut_setting_params->tdlut_bytes_per_group = &s->tdlut_bytes_per_group[k];
10927 calculate_tdlut_setting(&mode_lib->scratch, calculate_tdlut_setting_params);
10928 }
10929
10930 if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn3)
10931 s->ReorderingBytes = (unsigned int)(mode_lib->soc.clk_table.dram_config.channel_count * math_max3(mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_out_of_order_return_per_channel_pixel_only_bytes,
10932 mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_out_of_order_return_per_channel_pixel_and_vm_bytes,
10933 mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_out_of_order_return_per_channel_vm_only_bytes));
10934
10935 CalculateExtraLatency(
10936 display_cfg,
10937 mode_lib->ip.rob_buffer_size_kbytes,
10938 mode_lib->soc.qos_parameters.qos_params.dcn32x.loaded_round_trip_latency_fclk_cycles,
10939 s->ReorderingBytes,
10940 mode_lib->mp.Dcfclk,
10941 mode_lib->mp.FabricClock,
10942 mode_lib->ip.pixel_chunk_size_kbytes,
10943 mode_lib->mp.urg_bandwidth_available_min[dml2_core_internal_soc_state_sys_active],
10944 s->num_active_planes,
10945 mode_lib->mp.NoOfDPP,
10946 mode_lib->mp.dpte_group_bytes,
10947 s->tdlut_bytes_per_group,
10948 s->HostVMInefficiencyFactor,
10949 s->HostVMInefficiencyFactorPrefetch,
10950 mode_lib->soc.hostvm_min_page_size_kbytes,
10951 mode_lib->soc.qos_parameters.qos_type,
10952 !(display_cfg->overrides.max_outstanding_when_urgent_expected_disable),
10953 mode_lib->soc.max_outstanding_reqs,
10954 mode_lib->mp.request_size_bytes_luma,
10955 mode_lib->mp.request_size_bytes_chroma,
10956 mode_lib->ip.meta_chunk_size_kbytes,
10957 mode_lib->ip.dchub_arb_to_ret_delay,
10958 mode_lib->mp.TripToMemory,
10959 mode_lib->ip.hostvm_mode,
10960
10961 // output
10962 &mode_lib->mp.ExtraLatency,
10963 &mode_lib->mp.ExtraLatency_sr,
10964 &mode_lib->mp.ExtraLatencyPrefetch);
10965
10966 mode_lib->mp.TCalc = 24.0 / mode_lib->mp.DCFCLKDeepSleep;
10967
10968 for (k = 0; k < s->num_active_planes; ++k) {
10969 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0) {
10970 mode_lib->mp.WritebackDelay[k] =
10971 mode_lib->soc.qos_parameters.writeback.base_latency_us
10972 + CalculateWriteBackDelay(
10973 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].pixel_format,
10974 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_ratio,
10975 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_ratio,
10976 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_taps,
10977 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_width,
10978 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_height,
10979 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].input_height,
10980 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total) / mode_lib->mp.Dispclk;
10981 } else
10982 mode_lib->mp.WritebackDelay[k] = 0;
10983 }
10984
10985 /* VActive bytes to fetch for UCLK P-State */
10986 calculate_bytes_to_fetch_required_to_hide_latency_params->display_cfg = display_cfg;
10987 calculate_bytes_to_fetch_required_to_hide_latency_params->mrq_present = mode_lib->ip.dcn_mrq_present;
10988
10989 calculate_bytes_to_fetch_required_to_hide_latency_params->num_active_planes = s->num_active_planes;
10990 calculate_bytes_to_fetch_required_to_hide_latency_params->num_of_dpp = mode_lib->mp.NoOfDPP;
10991 calculate_bytes_to_fetch_required_to_hide_latency_params->meta_row_height_l = mode_lib->mp.meta_row_height;
10992 calculate_bytes_to_fetch_required_to_hide_latency_params->meta_row_height_c = mode_lib->mp.meta_row_height_chroma;
10993 calculate_bytes_to_fetch_required_to_hide_latency_params->meta_row_bytes_per_row_ub_l = s->meta_row_bytes_per_row_ub_l;
10994 calculate_bytes_to_fetch_required_to_hide_latency_params->meta_row_bytes_per_row_ub_c = s->meta_row_bytes_per_row_ub_c;
10995 calculate_bytes_to_fetch_required_to_hide_latency_params->dpte_row_height_l = mode_lib->mp.dpte_row_height;
10996 calculate_bytes_to_fetch_required_to_hide_latency_params->dpte_row_height_c = mode_lib->mp.dpte_row_height_chroma;
10997 calculate_bytes_to_fetch_required_to_hide_latency_params->dpte_bytes_per_row_l = s->dpte_row_bytes_per_row_l;
10998 calculate_bytes_to_fetch_required_to_hide_latency_params->dpte_bytes_per_row_c = s->dpte_row_bytes_per_row_c;
10999 calculate_bytes_to_fetch_required_to_hide_latency_params->byte_per_pix_l = mode_lib->mp.BytePerPixelY;
11000 calculate_bytes_to_fetch_required_to_hide_latency_params->byte_per_pix_c = mode_lib->mp.BytePerPixelC;
11001 calculate_bytes_to_fetch_required_to_hide_latency_params->swath_width_l = mode_lib->mp.SwathWidthY;
11002 calculate_bytes_to_fetch_required_to_hide_latency_params->swath_width_c = mode_lib->mp.SwathWidthC;
11003 calculate_bytes_to_fetch_required_to_hide_latency_params->swath_height_l = mode_lib->mp.SwathHeightY;
11004 calculate_bytes_to_fetch_required_to_hide_latency_params->swath_height_c = mode_lib->mp.SwathHeightC;
11005 calculate_bytes_to_fetch_required_to_hide_latency_params->latency_to_hide_us = mode_lib->soc.power_management_parameters.dram_clk_change_blackout_us;
11006
11007 /* outputs */
11008 calculate_bytes_to_fetch_required_to_hide_latency_params->bytes_required_l = s->pstate_bytes_required_l;
11009 calculate_bytes_to_fetch_required_to_hide_latency_params->bytes_required_c = s->pstate_bytes_required_c;
11010
11011 calculate_bytes_to_fetch_required_to_hide_latency(calculate_bytes_to_fetch_required_to_hide_latency_params);
11012
11013 /* Excess VActive bandwidth required to fill DET */
11014 calculate_excess_vactive_bandwidth_required(
11015 display_cfg,
11016 s->num_active_planes,
11017 s->pstate_bytes_required_l,
11018 s->pstate_bytes_required_c,
11019 /* outputs */
11020 mode_lib->mp.excess_vactive_fill_bw_l,
11021 mode_lib->mp.excess_vactive_fill_bw_c);
11022
11023 mode_lib->mp.UrgentLatency = CalculateUrgentLatency(
11024 mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.base_latency_us,
11025 mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.base_latency_pixel_vm_us,
11026 mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.base_latency_vm_us,
11027 mode_lib->soc.do_urgent_latency_adjustment,
11028 mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.scaling_factor_fclk_us,
11029 mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.scaling_factor_mhz,
11030 mode_lib->mp.FabricClock,
11031 mode_lib->mp.uclk_freq_mhz,
11032 mode_lib->soc.qos_parameters.qos_type,
11033 mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->mp.qos_param_index].urgent_ramp_uclk_cycles,
11034 mode_lib->soc.qos_parameters.qos_params.dcn4x.df_qos_response_time_fclk_cycles,
11035 mode_lib->soc.qos_parameters.qos_params.dcn4x.max_round_trip_to_furthest_cs_fclk_cycles,
11036 mode_lib->soc.qos_parameters.qos_params.dcn4x.mall_overhead_fclk_cycles,
11037 mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_urgent_ramp_latency_margin,
11038 mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_max_transport_latency_margin);
11039
11040 mode_lib->mp.TripToMemory = CalculateTripToMemory(
11041 mode_lib->mp.UrgentLatency,
11042 mode_lib->mp.FabricClock,
11043 mode_lib->mp.uclk_freq_mhz,
11044 mode_lib->soc.qos_parameters.qos_type,
11045 mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->mp.qos_param_index].trip_to_memory_uclk_cycles,
11046 mode_lib->soc.qos_parameters.qos_params.dcn4x.max_round_trip_to_furthest_cs_fclk_cycles,
11047 mode_lib->soc.qos_parameters.qos_params.dcn4x.mall_overhead_fclk_cycles,
11048 mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_max_latency_margin,
11049 mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_max_transport_latency_margin);
11050
11051 mode_lib->mp.TripToMemory = math_max2(mode_lib->mp.UrgentLatency, mode_lib->mp.TripToMemory);
11052
11053 mode_lib->mp.MetaTripToMemory = CalculateMetaTripToMemory(
11054 mode_lib->mp.UrgentLatency,
11055 mode_lib->mp.FabricClock,
11056 mode_lib->mp.uclk_freq_mhz,
11057 mode_lib->soc.qos_parameters.qos_type,
11058 mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->mp.qos_param_index].meta_trip_to_memory_uclk_cycles,
11059 mode_lib->soc.qos_parameters.qos_params.dcn4x.meta_trip_adder_fclk_cycles,
11060 mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_max_latency_margin,
11061 mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_max_transport_latency_margin);
11062
11063 for (k = 0; k < s->num_active_planes; ++k) {
11064 bool cursor_not_enough_urgent_latency_hiding = 0;
11065 s->line_times[k] = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total /
11066 ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
11067
11068 s->pixel_format[k] = display_cfg->plane_descriptors[k].pixel_format;
11069
11070 s->lb_source_lines_l[k] = get_num_lb_source_lines(mode_lib->ip.max_line_buffer_lines, mode_lib->ip.line_buffer_size_bits,
11071 mode_lib->mp.NoOfDPP[k],
11072 display_cfg->plane_descriptors[k].composition.viewport.plane0.width,
11073 display_cfg->plane_descriptors[k].composition.viewport.plane0.height,
11074 display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio,
11075 display_cfg->plane_descriptors[k].composition.rotation_angle);
11076
11077 s->lb_source_lines_c[k] = get_num_lb_source_lines(mode_lib->ip.max_line_buffer_lines, mode_lib->ip.line_buffer_size_bits,
11078 mode_lib->mp.NoOfDPP[k],
11079 display_cfg->plane_descriptors[k].composition.viewport.plane1.width,
11080 display_cfg->plane_descriptors[k].composition.viewport.plane1.height,
11081 display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio,
11082 display_cfg->plane_descriptors[k].composition.rotation_angle);
11083
11084 if (display_cfg->plane_descriptors[k].cursor.num_cursors > 0) {
11085 calculate_cursor_req_attributes(
11086 display_cfg->plane_descriptors[k].cursor.cursor_width,
11087 display_cfg->plane_descriptors[k].cursor.cursor_bpp,
11088
11089 // output
11090 &s->cursor_lines_per_chunk[k],
11091 &s->cursor_bytes_per_line[k],
11092 &s->cursor_bytes_per_chunk[k],
11093 &s->cursor_bytes[k]);
11094
11095 calculate_cursor_urgent_burst_factor(
11096 mode_lib->ip.cursor_buffer_size,
11097 display_cfg->plane_descriptors[k].cursor.cursor_width,
11098 s->cursor_bytes_per_chunk[k],
11099 s->cursor_lines_per_chunk[k],
11100 s->line_times[k],
11101 mode_lib->mp.UrgentLatency,
11102
11103 // output
11104 &mode_lib->mp.UrgentBurstFactorCursor[k],
11105 &cursor_not_enough_urgent_latency_hiding);
11106 }
11107 mode_lib->mp.UrgentBurstFactorCursorPre[k] = mode_lib->mp.UrgentBurstFactorCursor[k];
11108
11109 CalculateUrgentBurstFactor(
11110 &display_cfg->plane_descriptors[k],
11111 mode_lib->mp.swath_width_luma_ub[k],
11112 mode_lib->mp.swath_width_chroma_ub[k],
11113 mode_lib->mp.SwathHeightY[k],
11114 mode_lib->mp.SwathHeightC[k],
11115 s->line_times[k],
11116 mode_lib->mp.UrgentLatency,
11117 display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio,
11118 display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio,
11119 mode_lib->mp.BytePerPixelInDETY[k],
11120 mode_lib->mp.BytePerPixelInDETC[k],
11121 mode_lib->mp.DETBufferSizeY[k],
11122 mode_lib->mp.DETBufferSizeC[k],
11123
11124 /* output */
11125 &mode_lib->mp.UrgentBurstFactorLuma[k],
11126 &mode_lib->mp.UrgentBurstFactorChroma[k],
11127 &mode_lib->mp.NotEnoughUrgentLatencyHiding[k]);
11128
11129 mode_lib->mp.NotEnoughUrgentLatencyHiding[k] = mode_lib->mp.NotEnoughUrgentLatencyHiding[k] || cursor_not_enough_urgent_latency_hiding;
11130 }
11131
11132 for (k = 0; k < s->num_active_planes; ++k) {
11133 s->MaxVStartupLines[k] = CalculateMaxVStartup(
11134 mode_lib->ip.ptoi_supported,
11135 mode_lib->ip.vblank_nom_default_us,
11136 &display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing,
11137 mode_lib->mp.WritebackDelay[k]);
11138
11139 #ifdef __DML_VBA_DEBUG__
11140 dml2_printf("DML::%s: k=%u MaxVStartupLines = %u\n", __func__, k, s->MaxVStartupLines[k]);
11141 dml2_printf("DML::%s: k=%u WritebackDelay = %f\n", __func__, k, mode_lib->mp.WritebackDelay[k]);
11142 #endif
11143 }
11144
11145 s->immediate_flip_required = false;
11146 for (k = 0; k < s->num_active_planes; ++k) {
11147 s->immediate_flip_required = s->immediate_flip_required || display_cfg->plane_descriptors[k].immediate_flip;
11148 }
11149 #ifdef __DML_VBA_DEBUG__
11150 dml2_printf("DML::%s: immediate_flip_required = %u\n", __func__, s->immediate_flip_required);
11151 #endif
11152
11153 if (s->num_active_planes > 1) {
11154 CheckGlobalPrefetchAdmissibility_params->num_active_planes = s->num_active_planes;
11155 CheckGlobalPrefetchAdmissibility_params->pixel_format = s->pixel_format;
11156 CheckGlobalPrefetchAdmissibility_params->chunk_bytes_l = mode_lib->ip.pixel_chunk_size_kbytes * 1024;
11157 CheckGlobalPrefetchAdmissibility_params->chunk_bytes_c = mode_lib->ip.pixel_chunk_size_kbytes * 1024;
11158 CheckGlobalPrefetchAdmissibility_params->lb_source_lines_l = s->lb_source_lines_l;
11159 CheckGlobalPrefetchAdmissibility_params->lb_source_lines_c = s->lb_source_lines_c;
11160 CheckGlobalPrefetchAdmissibility_params->swath_height_l = mode_lib->mp.SwathHeightY;
11161 CheckGlobalPrefetchAdmissibility_params->swath_height_c = mode_lib->mp.SwathHeightC;
11162 CheckGlobalPrefetchAdmissibility_params->rob_buffer_size_kbytes = mode_lib->ip.rob_buffer_size_kbytes;
11163 CheckGlobalPrefetchAdmissibility_params->compressed_buffer_size_kbytes = mode_lib->mp.CompressedBufferSizeInkByte;
11164 CheckGlobalPrefetchAdmissibility_params->detile_buffer_size_bytes_l = mode_lib->mp.DETBufferSizeY;
11165 CheckGlobalPrefetchAdmissibility_params->detile_buffer_size_bytes_c = mode_lib->mp.DETBufferSizeC;
11166 CheckGlobalPrefetchAdmissibility_params->full_swath_bytes_l = s->full_swath_bytes_l;
11167 CheckGlobalPrefetchAdmissibility_params->full_swath_bytes_c = s->full_swath_bytes_c;
11168 CheckGlobalPrefetchAdmissibility_params->prefetch_sw_bytes = s->prefetch_sw_bytes;
11169 CheckGlobalPrefetchAdmissibility_params->Tpre_rounded = 0; // don't care
11170 CheckGlobalPrefetchAdmissibility_params->Tpre_oto = 0; // don't care
11171 CheckGlobalPrefetchAdmissibility_params->estimated_urg_bandwidth_required_mbps = mode_lib->mp.urg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp];
11172 CheckGlobalPrefetchAdmissibility_params->estimated_dcfclk_mhz = mode_lib->mp.Dcfclk;
11173 CheckGlobalPrefetchAdmissibility_params->line_time = s->line_times;
11174 CheckGlobalPrefetchAdmissibility_params->dst_y_prefetch = mode_lib->mp.dst_y_prefetch;
11175
11176 // if recalc_prefetch_schedule is set, recalculate the prefetch schedule with the new impacted_Tpre, prefetch should be possible
11177 CheckGlobalPrefetchAdmissibility_params->recalc_prefetch_schedule = &s->dummy_boolean[0];
11178 CheckGlobalPrefetchAdmissibility_params->impacted_dst_y_pre = s->impacted_dst_y_pre;
11179 CheckGlobalPrefetchAdmissibility(&mode_lib->scratch, CheckGlobalPrefetchAdmissibility_params); // dont care about the check output for mode programming
11180 }
11181
11182 {
11183 s->DestinationLineTimesForPrefetchLessThan2 = false;
11184 s->VRatioPrefetchMoreThanMax = false;
11185
11186 dml2_printf("DML::%s: Start one iteration of prefetch schedule evaluation\n", __func__);
11187
11188 for (k = 0; k < s->num_active_planes; ++k) {
11189 struct dml2_core_internal_DmlPipe *myPipe = &s->myPipe;
11190
11191 dml2_printf("DML::%s: k=%d MaxVStartupLines = %u\n", __func__, k, s->MaxVStartupLines[k]);
11192 mode_lib->mp.TWait[k] = CalculateTWait(
11193 display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns,
11194 mode_lib->mp.UrgentLatency,
11195 mode_lib->mp.TripToMemory,
11196 !dml_is_phantom_pipe(&display_cfg->plane_descriptors[k]) && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.drr_config.enabled ?
11197 get_g6_temp_read_blackout_us(&mode_lib->soc, (unsigned int)(mode_lib->mp.uclk_freq_mhz * 1000), in_out_params->min_clk_index) : 0.0);
11198
11199 myPipe->Dppclk = mode_lib->mp.Dppclk[k];
11200 myPipe->Dispclk = mode_lib->mp.Dispclk;
11201 myPipe->PixelClock = ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
11202 myPipe->DCFClkDeepSleep = mode_lib->mp.DCFCLKDeepSleep;
11203 myPipe->DPPPerSurface = mode_lib->mp.NoOfDPP[k];
11204 myPipe->ScalerEnabled = display_cfg->plane_descriptors[k].composition.scaler_info.enabled;
11205 myPipe->VRatio = display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio;
11206 myPipe->VRatioChroma = display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio;
11207 myPipe->VTaps = display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps;
11208 myPipe->VTapsChroma = display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps;
11209 myPipe->RotationAngle = display_cfg->plane_descriptors[k].composition.rotation_angle;
11210 myPipe->mirrored = display_cfg->plane_descriptors[k].composition.mirrored;
11211 myPipe->BlockWidth256BytesY = mode_lib->mp.Read256BlockWidthY[k];
11212 myPipe->BlockHeight256BytesY = mode_lib->mp.Read256BlockHeightY[k];
11213 myPipe->BlockWidth256BytesC = mode_lib->mp.Read256BlockWidthC[k];
11214 myPipe->BlockHeight256BytesC = mode_lib->mp.Read256BlockHeightC[k];
11215 myPipe->InterlaceEnable = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.interlaced;
11216 myPipe->NumberOfCursors = display_cfg->plane_descriptors[k].cursor.num_cursors;
11217 myPipe->VBlank = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_total - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_active;
11218 myPipe->HTotal = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total;
11219 myPipe->HActive = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active;
11220 myPipe->DCCEnable = display_cfg->plane_descriptors[k].surface.dcc.enable;
11221 myPipe->ODMMode = mode_lib->mp.ODMMode[k];
11222 myPipe->SourcePixelFormat = display_cfg->plane_descriptors[k].pixel_format;
11223 myPipe->BytePerPixelY = mode_lib->mp.BytePerPixelY[k];
11224 myPipe->BytePerPixelC = mode_lib->mp.BytePerPixelC[k];
11225 myPipe->ProgressiveToInterlaceUnitInOPP = mode_lib->ip.ptoi_supported;
11226
11227 #ifdef __DML_VBA_DEBUG__
11228 dml2_printf("DML::%s: Calling CalculatePrefetchSchedule for k=%u\n", __func__, k);
11229 #endif
11230 CalculatePrefetchSchedule_params->display_cfg = display_cfg;
11231 CalculatePrefetchSchedule_params->HostVMInefficiencyFactor = s->HostVMInefficiencyFactorPrefetch;
11232 CalculatePrefetchSchedule_params->myPipe = myPipe;
11233 CalculatePrefetchSchedule_params->DSCDelay = mode_lib->mp.DSCDelay[k];
11234 CalculatePrefetchSchedule_params->DPPCLKDelaySubtotalPlusCNVCFormater = mode_lib->ip.dppclk_delay_subtotal + mode_lib->ip.dppclk_delay_cnvc_formatter;
11235 CalculatePrefetchSchedule_params->DPPCLKDelaySCL = mode_lib->ip.dppclk_delay_scl;
11236 CalculatePrefetchSchedule_params->DPPCLKDelaySCLLBOnly = mode_lib->ip.dppclk_delay_scl_lb_only;
11237 CalculatePrefetchSchedule_params->DPPCLKDelayCNVCCursor = mode_lib->ip.dppclk_delay_cnvc_cursor;
11238 CalculatePrefetchSchedule_params->DISPCLKDelaySubtotal = mode_lib->ip.dispclk_delay_subtotal;
11239 CalculatePrefetchSchedule_params->DPP_RECOUT_WIDTH = (unsigned int)(mode_lib->mp.SwathWidthY[k] / display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio);
11240 CalculatePrefetchSchedule_params->OutputFormat = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format;
11241 CalculatePrefetchSchedule_params->MaxInterDCNTileRepeaters = mode_lib->ip.max_inter_dcn_tile_repeaters;
11242 CalculatePrefetchSchedule_params->VStartup = s->MaxVStartupLines[k];
11243 CalculatePrefetchSchedule_params->HostVMMinPageSize = mode_lib->soc.hostvm_min_page_size_kbytes;
11244 CalculatePrefetchSchedule_params->DynamicMetadataEnable = display_cfg->plane_descriptors[k].dynamic_meta_data.enable;
11245 CalculatePrefetchSchedule_params->DynamicMetadataVMEnabled = mode_lib->ip.dynamic_metadata_vm_enabled;
11246 CalculatePrefetchSchedule_params->DynamicMetadataLinesBeforeActiveRequired = display_cfg->plane_descriptors[k].dynamic_meta_data.lines_before_active_required;
11247 CalculatePrefetchSchedule_params->DynamicMetadataTransmittedBytes = display_cfg->plane_descriptors[k].dynamic_meta_data.transmitted_bytes;
11248 CalculatePrefetchSchedule_params->UrgentLatency = mode_lib->mp.UrgentLatency;
11249 CalculatePrefetchSchedule_params->ExtraLatencyPrefetch = mode_lib->mp.ExtraLatencyPrefetch;
11250 CalculatePrefetchSchedule_params->TCalc = mode_lib->mp.TCalc;
11251 CalculatePrefetchSchedule_params->vm_bytes = mode_lib->mp.vm_bytes[k];
11252 CalculatePrefetchSchedule_params->PixelPTEBytesPerRow = mode_lib->mp.PixelPTEBytesPerRow[k];
11253 CalculatePrefetchSchedule_params->PrefetchSourceLinesY = mode_lib->mp.PrefetchSourceLinesY[k];
11254 CalculatePrefetchSchedule_params->VInitPreFillY = mode_lib->mp.VInitPreFillY[k];
11255 CalculatePrefetchSchedule_params->MaxNumSwathY = mode_lib->mp.MaxNumSwathY[k];
11256 CalculatePrefetchSchedule_params->PrefetchSourceLinesC = mode_lib->mp.PrefetchSourceLinesC[k];
11257 CalculatePrefetchSchedule_params->VInitPreFillC = mode_lib->mp.VInitPreFillC[k];
11258 CalculatePrefetchSchedule_params->MaxNumSwathC = mode_lib->mp.MaxNumSwathC[k];
11259 CalculatePrefetchSchedule_params->swath_width_luma_ub = mode_lib->mp.swath_width_luma_ub[k];
11260 CalculatePrefetchSchedule_params->swath_width_chroma_ub = mode_lib->mp.swath_width_chroma_ub[k];
11261 CalculatePrefetchSchedule_params->SwathHeightY = mode_lib->mp.SwathHeightY[k];
11262 CalculatePrefetchSchedule_params->SwathHeightC = mode_lib->mp.SwathHeightC[k];
11263 CalculatePrefetchSchedule_params->TWait = mode_lib->mp.TWait[k];
11264 CalculatePrefetchSchedule_params->Ttrip = mode_lib->mp.TripToMemory;
11265 CalculatePrefetchSchedule_params->Turg = mode_lib->mp.UrgentLatency;
11266 CalculatePrefetchSchedule_params->setup_for_tdlut = display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut;
11267 CalculatePrefetchSchedule_params->tdlut_pte_bytes_per_frame = s->tdlut_pte_bytes_per_frame[k];
11268 CalculatePrefetchSchedule_params->tdlut_bytes_per_frame = s->tdlut_bytes_per_frame[k];
11269 CalculatePrefetchSchedule_params->tdlut_opt_time = s->tdlut_opt_time[k];
11270 CalculatePrefetchSchedule_params->tdlut_drain_time = s->tdlut_drain_time[k];
11271 CalculatePrefetchSchedule_params->num_cursors = (display_cfg->plane_descriptors[k].cursor.cursor_width > 0);
11272 CalculatePrefetchSchedule_params->cursor_bytes_per_chunk = s->cursor_bytes_per_chunk[k];
11273 CalculatePrefetchSchedule_params->cursor_bytes_per_line = s->cursor_bytes_per_line[k];
11274 CalculatePrefetchSchedule_params->dcc_enable = display_cfg->plane_descriptors[k].surface.dcc.enable;
11275 CalculatePrefetchSchedule_params->mrq_present = mode_lib->ip.dcn_mrq_present;
11276 CalculatePrefetchSchedule_params->meta_row_bytes = mode_lib->mp.meta_row_bytes[k];
11277 CalculatePrefetchSchedule_params->mall_prefetch_sdp_overhead_factor = mode_lib->mp.mall_prefetch_sdp_overhead_factor[k];
11278 CalculatePrefetchSchedule_params->impacted_dst_y_pre = s->impacted_dst_y_pre[k];
11279 CalculatePrefetchSchedule_params->vactive_sw_bw_l = mode_lib->mp.vactive_sw_bw_l[k];
11280 CalculatePrefetchSchedule_params->vactive_sw_bw_c = mode_lib->mp.vactive_sw_bw_c[k];
11281
11282 // output
11283 CalculatePrefetchSchedule_params->DSTXAfterScaler = &mode_lib->mp.DSTXAfterScaler[k];
11284 CalculatePrefetchSchedule_params->DSTYAfterScaler = &mode_lib->mp.DSTYAfterScaler[k];
11285 CalculatePrefetchSchedule_params->dst_y_prefetch = &mode_lib->mp.dst_y_prefetch[k];
11286 CalculatePrefetchSchedule_params->dst_y_per_vm_vblank = &mode_lib->mp.dst_y_per_vm_vblank[k];
11287 CalculatePrefetchSchedule_params->dst_y_per_row_vblank = &mode_lib->mp.dst_y_per_row_vblank[k];
11288 CalculatePrefetchSchedule_params->VRatioPrefetchY = &mode_lib->mp.VRatioPrefetchY[k];
11289 CalculatePrefetchSchedule_params->VRatioPrefetchC = &mode_lib->mp.VRatioPrefetchC[k];
11290 CalculatePrefetchSchedule_params->RequiredPrefetchPixelDataBWLuma = &mode_lib->mp.RequiredPrefetchPixelDataBWLuma[k];
11291 CalculatePrefetchSchedule_params->RequiredPrefetchPixelDataBWChroma = &mode_lib->mp.RequiredPrefetchPixelDataBWChroma[k];
11292 CalculatePrefetchSchedule_params->NotEnoughTimeForDynamicMetadata = &mode_lib->mp.NotEnoughTimeForDynamicMetadata[k];
11293 CalculatePrefetchSchedule_params->Tno_bw = &mode_lib->mp.Tno_bw[k];
11294 CalculatePrefetchSchedule_params->Tno_bw_flip = &mode_lib->mp.Tno_bw_flip[k];
11295 CalculatePrefetchSchedule_params->prefetch_vmrow_bw = &mode_lib->mp.prefetch_vmrow_bw[k];
11296 CalculatePrefetchSchedule_params->Tdmdl_vm = &mode_lib->mp.Tdmdl_vm[k];
11297 CalculatePrefetchSchedule_params->Tdmdl = &mode_lib->mp.Tdmdl[k];
11298 CalculatePrefetchSchedule_params->TSetup = &mode_lib->mp.TSetup[k];
11299 CalculatePrefetchSchedule_params->Tvm_trips = &s->Tvm_trips[k];
11300 CalculatePrefetchSchedule_params->Tr0_trips = &s->Tr0_trips[k];
11301 CalculatePrefetchSchedule_params->Tvm_trips_flip = &s->Tvm_trips_flip[k];
11302 CalculatePrefetchSchedule_params->Tr0_trips_flip = &s->Tr0_trips_flip[k];
11303 CalculatePrefetchSchedule_params->Tvm_trips_flip_rounded = &s->Tvm_trips_flip_rounded[k];
11304 CalculatePrefetchSchedule_params->Tr0_trips_flip_rounded = &s->Tr0_trips_flip_rounded[k];
11305 CalculatePrefetchSchedule_params->VUpdateOffsetPix = &mode_lib->mp.VUpdateOffsetPix[k];
11306 CalculatePrefetchSchedule_params->VUpdateWidthPix = &mode_lib->mp.VUpdateWidthPix[k];
11307 CalculatePrefetchSchedule_params->VReadyOffsetPix = &mode_lib->mp.VReadyOffsetPix[k];
11308 CalculatePrefetchSchedule_params->prefetch_cursor_bw = &mode_lib->mp.prefetch_cursor_bw[k];
11309 CalculatePrefetchSchedule_params->prefetch_sw_bytes = &s->prefetch_sw_bytes[k];
11310 CalculatePrefetchSchedule_params->Tpre_rounded = &s->Tpre_rounded[k];
11311 CalculatePrefetchSchedule_params->Tpre_oto = &s->Tpre_oto[k];
11312 CalculatePrefetchSchedule_params->prefetch_swath_time_us = &s->dummy_single[0];
11313
11314 mode_lib->mp.NoTimeToPrefetch[k] = CalculatePrefetchSchedule(&mode_lib->scratch, CalculatePrefetchSchedule_params);
11315
11316 if (s->impacted_dst_y_pre[k] > 0)
11317 mode_lib->mp.impacted_prefetch_margin_us[k] = (mode_lib->mp.dst_y_prefetch[k] - s->impacted_dst_y_pre[k]) * s->line_times[k];
11318 else
11319 mode_lib->mp.impacted_prefetch_margin_us[k] = 0;
11320
11321 #ifdef __DML_VBA_DEBUG__
11322 dml2_printf("DML::%s: k=%0u NoTimeToPrefetch=%0d\n", __func__, k, mode_lib->mp.NoTimeToPrefetch[k]);
11323 #endif
11324 mode_lib->mp.VStartupMin[k] = s->MaxVStartupLines[k];
11325 } // for k
11326
11327 mode_lib->mp.PrefetchModeSupported = true;
11328 for (k = 0; k < s->num_active_planes; ++k) {
11329 if (mode_lib->mp.NoTimeToPrefetch[k] == true ||
11330 mode_lib->mp.NotEnoughTimeForDynamicMetadata[k] ||
11331 mode_lib->mp.DSTYAfterScaler[k] > 8) {
11332 dml2_printf("DML::%s: k=%u, NoTimeToPrefetch = %0d\n", __func__, k, mode_lib->mp.NoTimeToPrefetch[k]);
11333 dml2_printf("DML::%s: k=%u, NotEnoughTimeForDynamicMetadata=%u\n", __func__, k, mode_lib->mp.NotEnoughTimeForDynamicMetadata[k]);
11334 dml2_printf("DML::%s: k=%u, DSTYAfterScaler=%u (should be <= 0)\n", __func__, k, mode_lib->mp.DSTYAfterScaler[k]);
11335 mode_lib->mp.PrefetchModeSupported = false;
11336 }
11337 if (mode_lib->mp.dst_y_prefetch[k] < 2)
11338 s->DestinationLineTimesForPrefetchLessThan2 = true;
11339
11340 if (mode_lib->mp.VRatioPrefetchY[k] > __DML2_CALCS_MAX_VRATIO_PRE__ ||
11341 mode_lib->mp.VRatioPrefetchC[k] > __DML2_CALCS_MAX_VRATIO_PRE__) {
11342 s->VRatioPrefetchMoreThanMax = true;
11343 dml2_printf("DML::%s: k=%d, VRatioPrefetchY=%f (should not be < %f)\n", __func__, k, mode_lib->mp.VRatioPrefetchY[k], __DML2_CALCS_MAX_VRATIO_PRE__);
11344 dml2_printf("DML::%s: k=%d, VRatioPrefetchC=%f (should not be < %f)\n", __func__, k, mode_lib->mp.VRatioPrefetchC[k], __DML2_CALCS_MAX_VRATIO_PRE__);
11345 dml2_printf("DML::%s: VRatioPrefetchMoreThanMax = %u\n", __func__, s->VRatioPrefetchMoreThanMax);
11346 }
11347
11348 if (mode_lib->mp.NotEnoughUrgentLatencyHiding[k]) {
11349 dml2_printf("DML::%s: k=%u, NotEnoughUrgentLatencyHiding = %u\n", __func__, k, mode_lib->mp.NotEnoughUrgentLatencyHiding[k]);
11350 mode_lib->mp.PrefetchModeSupported = false;
11351 }
11352 }
11353
11354 if (s->VRatioPrefetchMoreThanMax == true || s->DestinationLineTimesForPrefetchLessThan2 == true) {
11355 dml2_printf("DML::%s: VRatioPrefetchMoreThanMax = %u\n", __func__, s->VRatioPrefetchMoreThanMax);
11356 dml2_printf("DML::%s: DestinationLineTimesForPrefetchLessThan2 = %u\n", __func__, s->DestinationLineTimesForPrefetchLessThan2);
11357 mode_lib->mp.PrefetchModeSupported = false;
11358 }
11359
11360 dml2_printf("DML::%s: Prefetch schedule is %sOK at vstartup = %u\n", __func__,
11361 mode_lib->mp.PrefetchModeSupported ? "" : "NOT ", CalculatePrefetchSchedule_params->VStartup);
11362
11363 // Prefetch schedule OK, now check prefetch bw
11364 if (mode_lib->mp.PrefetchModeSupported == true) {
11365 for (k = 0; k < s->num_active_planes; ++k) {
11366 double line_time_us = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total /
11367 ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
11368 CalculateUrgentBurstFactor(
11369 &display_cfg->plane_descriptors[k],
11370 mode_lib->mp.swath_width_luma_ub[k],
11371 mode_lib->mp.swath_width_chroma_ub[k],
11372 mode_lib->mp.SwathHeightY[k],
11373 mode_lib->mp.SwathHeightC[k],
11374 line_time_us,
11375 mode_lib->mp.UrgentLatency,
11376 mode_lib->mp.VRatioPrefetchY[k],
11377 mode_lib->mp.VRatioPrefetchC[k],
11378 mode_lib->mp.BytePerPixelInDETY[k],
11379 mode_lib->mp.BytePerPixelInDETC[k],
11380 mode_lib->mp.DETBufferSizeY[k],
11381 mode_lib->mp.DETBufferSizeC[k],
11382 /* Output */
11383 &mode_lib->mp.UrgentBurstFactorLumaPre[k],
11384 &mode_lib->mp.UrgentBurstFactorChromaPre[k],
11385 &mode_lib->mp.NotEnoughUrgentLatencyHidingPre[k]);
11386
11387 #ifdef __DML_VBA_DEBUG__
11388 dml2_printf("DML::%s: k=%0u DPPPerSurface=%u\n", __func__, k, mode_lib->mp.NoOfDPP[k]);
11389 dml2_printf("DML::%s: k=%0u UrgentBurstFactorLuma=%f\n", __func__, k, mode_lib->mp.UrgentBurstFactorLuma[k]);
11390 dml2_printf("DML::%s: k=%0u UrgentBurstFactorChroma=%f\n", __func__, k, mode_lib->mp.UrgentBurstFactorChroma[k]);
11391 dml2_printf("DML::%s: k=%0u UrgentBurstFactorLumaPre=%f\n", __func__, k, mode_lib->mp.UrgentBurstFactorLumaPre[k]);
11392 dml2_printf("DML::%s: k=%0u UrgentBurstFactorChromaPre=%f\n", __func__, k, mode_lib->mp.UrgentBurstFactorChromaPre[k]);
11393
11394 dml2_printf("DML::%s: k=%0u VRatioPrefetchY=%f\n", __func__, k, mode_lib->mp.VRatioPrefetchY[k]);
11395 dml2_printf("DML::%s: k=%0u VRatioY=%f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio);
11396
11397 dml2_printf("DML::%s: k=%0u prefetch_vmrow_bw=%f\n", __func__, k, mode_lib->mp.prefetch_vmrow_bw[k]);
11398 dml2_printf("DML::%s: k=%0u vactive_sw_bw_l=%f\n", __func__, k, mode_lib->mp.vactive_sw_bw_l[k]);
11399 dml2_printf("DML::%s: k=%0u vactive_sw_bw_c=%f\n", __func__, k, mode_lib->mp.vactive_sw_bw_c[k]);
11400 dml2_printf("DML::%s: k=%0u cursor_bw=%f\n", __func__, k, mode_lib->mp.cursor_bw[k]);
11401 dml2_printf("DML::%s: k=%0u dpte_row_bw=%f\n", __func__, k, mode_lib->mp.dpte_row_bw[k]);
11402 dml2_printf("DML::%s: k=%0u meta_row_bw=%f\n", __func__, k, mode_lib->mp.meta_row_bw[k]);
11403 dml2_printf("DML::%s: k=%0u RequiredPrefetchPixelDataBWLuma=%f\n", __func__, k, mode_lib->mp.RequiredPrefetchPixelDataBWLuma[k]);
11404 dml2_printf("DML::%s: k=%0u RequiredPrefetchPixelDataBWChroma=%f\n", __func__, k, mode_lib->mp.RequiredPrefetchPixelDataBWChroma[k]);
11405 dml2_printf("DML::%s: k=%0u prefetch_cursor_bw=%f\n", __func__, k, mode_lib->mp.prefetch_cursor_bw[k]);
11406 #endif
11407 }
11408
11409 for (k = 0; k <= s->num_active_planes - 1; k++)
11410 mode_lib->mp.final_flip_bw[k] = 0;
11411
11412 calculate_peak_bandwidth_params->urg_vactive_bandwidth_required = mode_lib->mp.urg_vactive_bandwidth_required;
11413 calculate_peak_bandwidth_params->urg_bandwidth_required = mode_lib->mp.urg_bandwidth_required;
11414 calculate_peak_bandwidth_params->urg_bandwidth_required_qual = mode_lib->mp.urg_bandwidth_required_qual;
11415 calculate_peak_bandwidth_params->non_urg_bandwidth_required = mode_lib->mp.non_urg_bandwidth_required;
11416 calculate_peak_bandwidth_params->surface_avg_vactive_required_bw = s->surface_dummy_bw;
11417 calculate_peak_bandwidth_params->surface_peak_required_bw = s->surface_dummy_bw0;
11418
11419 calculate_peak_bandwidth_params->display_cfg = display_cfg;
11420 calculate_peak_bandwidth_params->inc_flip_bw = 0;
11421 calculate_peak_bandwidth_params->num_active_planes = s->num_active_planes;
11422 calculate_peak_bandwidth_params->num_of_dpp = mode_lib->mp.NoOfDPP;
11423 calculate_peak_bandwidth_params->dcc_dram_bw_nom_overhead_factor_p0 = mode_lib->mp.dcc_dram_bw_nom_overhead_factor_p0;
11424 calculate_peak_bandwidth_params->dcc_dram_bw_nom_overhead_factor_p1 = mode_lib->mp.dcc_dram_bw_nom_overhead_factor_p1;
11425 calculate_peak_bandwidth_params->dcc_dram_bw_pref_overhead_factor_p0 = mode_lib->mp.dcc_dram_bw_pref_overhead_factor_p0;
11426 calculate_peak_bandwidth_params->dcc_dram_bw_pref_overhead_factor_p1 = mode_lib->mp.dcc_dram_bw_pref_overhead_factor_p1;
11427 calculate_peak_bandwidth_params->mall_prefetch_sdp_overhead_factor = mode_lib->mp.mall_prefetch_sdp_overhead_factor;
11428 calculate_peak_bandwidth_params->mall_prefetch_dram_overhead_factor = mode_lib->mp.mall_prefetch_dram_overhead_factor;
11429
11430 calculate_peak_bandwidth_params->surface_read_bandwidth_l = mode_lib->mp.vactive_sw_bw_l;
11431 calculate_peak_bandwidth_params->surface_read_bandwidth_c = mode_lib->mp.vactive_sw_bw_c;
11432 calculate_peak_bandwidth_params->prefetch_bandwidth_l = mode_lib->mp.RequiredPrefetchPixelDataBWLuma;
11433 calculate_peak_bandwidth_params->prefetch_bandwidth_c = mode_lib->mp.RequiredPrefetchPixelDataBWChroma;
11434 calculate_peak_bandwidth_params->excess_vactive_fill_bw_l = mode_lib->mp.excess_vactive_fill_bw_l;
11435 calculate_peak_bandwidth_params->excess_vactive_fill_bw_c = mode_lib->mp.excess_vactive_fill_bw_c;
11436 calculate_peak_bandwidth_params->cursor_bw = mode_lib->mp.cursor_bw;
11437 calculate_peak_bandwidth_params->dpte_row_bw = mode_lib->mp.dpte_row_bw;
11438 calculate_peak_bandwidth_params->meta_row_bw = mode_lib->mp.meta_row_bw;
11439 calculate_peak_bandwidth_params->prefetch_cursor_bw = mode_lib->mp.prefetch_cursor_bw;
11440 calculate_peak_bandwidth_params->prefetch_vmrow_bw = mode_lib->mp.prefetch_vmrow_bw;
11441 calculate_peak_bandwidth_params->flip_bw = mode_lib->mp.final_flip_bw;
11442 calculate_peak_bandwidth_params->urgent_burst_factor_l = mode_lib->mp.UrgentBurstFactorLuma;
11443 calculate_peak_bandwidth_params->urgent_burst_factor_c = mode_lib->mp.UrgentBurstFactorChroma;
11444 calculate_peak_bandwidth_params->urgent_burst_factor_cursor = mode_lib->mp.UrgentBurstFactorCursor;
11445 calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_l = mode_lib->mp.UrgentBurstFactorLumaPre;
11446 calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_c = mode_lib->mp.UrgentBurstFactorChromaPre;
11447 calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_cursor = mode_lib->mp.UrgentBurstFactorCursorPre;
11448
11449 calculate_peak_bandwidth_required(
11450 &mode_lib->scratch,
11451 calculate_peak_bandwidth_params);
11452
11453 // Check urg peak bandwidth against available urg bw
11454 // check at SDP and DRAM, for all soc states (SVP prefetch an Sys Active)
11455 check_urgent_bandwidth_support(
11456 &mode_lib->mp.FractionOfUrgentBandwidth, // double* frac_urg_bandwidth
11457 &mode_lib->mp.FractionOfUrgentBandwidthMALL, // double* frac_urg_bandwidth_mall
11458 &s->dummy_boolean[1], // vactive bw ok
11459 &mode_lib->mp.PrefetchModeSupported, // prefetch bw ok
11460
11461 mode_lib->soc.mall_allocated_for_dcn_mbytes,
11462 mode_lib->mp.non_urg_bandwidth_required,
11463 mode_lib->mp.urg_vactive_bandwidth_required,
11464 mode_lib->mp.urg_bandwidth_required,
11465 mode_lib->mp.urg_bandwidth_available);
11466
11467 if (!mode_lib->mp.PrefetchModeSupported)
11468 dml2_printf("DML::%s: Bandwidth not sufficient for prefetch!\n", __func__);
11469
11470 for (k = 0; k < s->num_active_planes; ++k) {
11471 if (mode_lib->mp.NotEnoughUrgentLatencyHidingPre[k]) {
11472 dml2_printf("DML::%s: k=%u, NotEnoughUrgentLatencyHidingPre = %u\n", __func__, k, mode_lib->mp.NotEnoughUrgentLatencyHidingPre[k]);
11473 mode_lib->mp.PrefetchModeSupported = false;
11474 }
11475 }
11476 } // prefetch schedule ok
11477
11478 // Prefetch schedule and prefetch bw ok, now check flip bw
11479 if (mode_lib->mp.PrefetchModeSupported == true) { // prefetch schedule and prefetch bw ok, now check flip bw
11480
11481 mode_lib->mp.BandwidthAvailableForImmediateFlip =
11482 get_bandwidth_available_for_immediate_flip(
11483 dml2_core_internal_soc_state_sys_active,
11484 mode_lib->mp.urg_bandwidth_required_qual, // no flip
11485 mode_lib->mp.urg_bandwidth_available);
11486 mode_lib->mp.TotImmediateFlipBytes = 0;
11487 for (k = 0; k < s->num_active_planes; ++k) {
11488 if (display_cfg->plane_descriptors[k].immediate_flip) {
11489 s->per_pipe_flip_bytes[k] = get_pipe_flip_bytes(s->HostVMInefficiencyFactor,
11490 mode_lib->mp.vm_bytes[k],
11491 mode_lib->mp.PixelPTEBytesPerRow[k],
11492 mode_lib->mp.meta_row_bytes[k]);
11493 } else {
11494 s->per_pipe_flip_bytes[k] = 0;
11495 }
11496 mode_lib->mp.TotImmediateFlipBytes += s->per_pipe_flip_bytes[k] * mode_lib->mp.NoOfDPP[k];
11497 #ifdef __DML_VBA_DEBUG__
11498 dml2_printf("DML::%s: k = %u\n", __func__, k);
11499 dml2_printf("DML::%s: DPPPerSurface = %u\n", __func__, mode_lib->mp.NoOfDPP[k]);
11500 dml2_printf("DML::%s: vm_bytes = %u\n", __func__, mode_lib->mp.vm_bytes[k]);
11501 dml2_printf("DML::%s: PixelPTEBytesPerRow = %u\n", __func__, mode_lib->mp.PixelPTEBytesPerRow[k]);
11502 dml2_printf("DML::%s: meta_row_bytes = %u\n", __func__, mode_lib->mp.meta_row_bytes[k]);
11503 dml2_printf("DML::%s: TotImmediateFlipBytes = %u\n", __func__, mode_lib->mp.TotImmediateFlipBytes);
11504 #endif
11505 }
11506 for (k = 0; k < s->num_active_planes; ++k) {
11507 CalculateFlipSchedule(
11508 &mode_lib->scratch,
11509 display_cfg->plane_descriptors[k].immediate_flip,
11510 0, // use_lb_flip_bw
11511 s->HostVMInefficiencyFactor,
11512 s->Tvm_trips_flip[k],
11513 s->Tr0_trips_flip[k],
11514 s->Tvm_trips_flip_rounded[k],
11515 s->Tr0_trips_flip_rounded[k],
11516 display_cfg->gpuvm_enable,
11517 mode_lib->mp.vm_bytes[k],
11518 mode_lib->mp.PixelPTEBytesPerRow[k],
11519 mode_lib->mp.BandwidthAvailableForImmediateFlip,
11520 mode_lib->mp.TotImmediateFlipBytes,
11521 display_cfg->plane_descriptors[k].pixel_format,
11522 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000),
11523 display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio,
11524 display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio,
11525 mode_lib->mp.Tno_bw[k],
11526 mode_lib->mp.dpte_row_height[k],
11527 mode_lib->mp.dpte_row_height_chroma[k],
11528 mode_lib->mp.use_one_row_for_frame_flip[k],
11529 mode_lib->ip.max_flip_time_us,
11530 mode_lib->ip.max_flip_time_lines,
11531 s->per_pipe_flip_bytes[k],
11532 mode_lib->mp.meta_row_bytes[k],
11533 mode_lib->mp.meta_row_height[k],
11534 mode_lib->mp.meta_row_height_chroma[k],
11535 mode_lib->ip.dcn_mrq_present && display_cfg->plane_descriptors[k].surface.dcc.enable,
11536
11537 // Output
11538 &mode_lib->mp.dst_y_per_vm_flip[k],
11539 &mode_lib->mp.dst_y_per_row_flip[k],
11540 &mode_lib->mp.final_flip_bw[k],
11541 &mode_lib->mp.ImmediateFlipSupportedForPipe[k]);
11542 }
11543
11544 calculate_peak_bandwidth_params->urg_vactive_bandwidth_required = s->dummy_bw;
11545 calculate_peak_bandwidth_params->urg_bandwidth_required = mode_lib->mp.urg_bandwidth_required_flip;
11546 calculate_peak_bandwidth_params->urg_bandwidth_required_qual = s->dummy_bw;
11547 calculate_peak_bandwidth_params->non_urg_bandwidth_required = mode_lib->mp.non_urg_bandwidth_required_flip;
11548 calculate_peak_bandwidth_params->surface_avg_vactive_required_bw = s->surface_dummy_bw;
11549 calculate_peak_bandwidth_params->surface_peak_required_bw = s->surface_dummy_bw0;
11550
11551 calculate_peak_bandwidth_params->display_cfg = display_cfg;
11552 calculate_peak_bandwidth_params->inc_flip_bw = 1;
11553 calculate_peak_bandwidth_params->num_active_planes = s->num_active_planes;
11554 calculate_peak_bandwidth_params->num_of_dpp = mode_lib->mp.NoOfDPP;
11555 calculate_peak_bandwidth_params->dcc_dram_bw_nom_overhead_factor_p0 = mode_lib->mp.dcc_dram_bw_nom_overhead_factor_p0;
11556 calculate_peak_bandwidth_params->dcc_dram_bw_nom_overhead_factor_p1 = mode_lib->mp.dcc_dram_bw_nom_overhead_factor_p1;
11557 calculate_peak_bandwidth_params->dcc_dram_bw_pref_overhead_factor_p0 = mode_lib->mp.dcc_dram_bw_pref_overhead_factor_p0;
11558 calculate_peak_bandwidth_params->dcc_dram_bw_pref_overhead_factor_p1 = mode_lib->mp.dcc_dram_bw_pref_overhead_factor_p1;
11559 calculate_peak_bandwidth_params->mall_prefetch_sdp_overhead_factor = mode_lib->mp.mall_prefetch_sdp_overhead_factor;
11560 calculate_peak_bandwidth_params->mall_prefetch_dram_overhead_factor = mode_lib->mp.mall_prefetch_dram_overhead_factor;
11561
11562 calculate_peak_bandwidth_params->surface_read_bandwidth_l = mode_lib->mp.vactive_sw_bw_l;
11563 calculate_peak_bandwidth_params->surface_read_bandwidth_c = mode_lib->mp.vactive_sw_bw_c;
11564 calculate_peak_bandwidth_params->prefetch_bandwidth_l = mode_lib->mp.RequiredPrefetchPixelDataBWLuma;
11565 calculate_peak_bandwidth_params->prefetch_bandwidth_c = mode_lib->mp.RequiredPrefetchPixelDataBWChroma;
11566 calculate_peak_bandwidth_params->excess_vactive_fill_bw_l = mode_lib->mp.excess_vactive_fill_bw_l;
11567 calculate_peak_bandwidth_params->excess_vactive_fill_bw_c = mode_lib->mp.excess_vactive_fill_bw_c;
11568 calculate_peak_bandwidth_params->cursor_bw = mode_lib->mp.cursor_bw;
11569 calculate_peak_bandwidth_params->dpte_row_bw = mode_lib->mp.dpte_row_bw;
11570 calculate_peak_bandwidth_params->meta_row_bw = mode_lib->mp.meta_row_bw;
11571 calculate_peak_bandwidth_params->prefetch_cursor_bw = mode_lib->mp.prefetch_cursor_bw;
11572 calculate_peak_bandwidth_params->prefetch_vmrow_bw = mode_lib->mp.prefetch_vmrow_bw;
11573 calculate_peak_bandwidth_params->flip_bw = mode_lib->mp.final_flip_bw;
11574 calculate_peak_bandwidth_params->urgent_burst_factor_l = mode_lib->mp.UrgentBurstFactorLuma;
11575 calculate_peak_bandwidth_params->urgent_burst_factor_c = mode_lib->mp.UrgentBurstFactorChroma;
11576 calculate_peak_bandwidth_params->urgent_burst_factor_cursor = mode_lib->mp.UrgentBurstFactorCursor;
11577 calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_l = mode_lib->mp.UrgentBurstFactorLumaPre;
11578 calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_c = mode_lib->mp.UrgentBurstFactorChromaPre;
11579 calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_cursor = mode_lib->mp.UrgentBurstFactorCursorPre;
11580
11581 calculate_peak_bandwidth_required(
11582 &mode_lib->scratch,
11583 calculate_peak_bandwidth_params);
11584
11585 calculate_immediate_flip_bandwidth_support(
11586 &mode_lib->mp.FractionOfUrgentBandwidthImmediateFlip, // double* frac_urg_bandwidth_flip
11587 &mode_lib->mp.ImmediateFlipSupported, // bool* flip_bandwidth_support_ok
11588
11589 dml2_core_internal_soc_state_sys_active,
11590 mode_lib->mp.urg_bandwidth_required_flip,
11591 mode_lib->mp.non_urg_bandwidth_required_flip,
11592 mode_lib->mp.urg_bandwidth_available);
11593
11594 if (!mode_lib->mp.ImmediateFlipSupported)
11595 dml2_printf("DML::%s: Bandwidth not sufficient for flip!", __func__);
11596
11597 for (k = 0; k < s->num_active_planes; ++k) {
11598 if (display_cfg->plane_descriptors[k].immediate_flip && mode_lib->mp.ImmediateFlipSupportedForPipe[k] == false) {
11599 mode_lib->mp.ImmediateFlipSupported = false;
11600 #ifdef __DML_VBA_DEBUG__
11601 dml2_printf("DML::%s: Pipe %0d not supporting iflip!\n", __func__, k);
11602 #endif
11603 }
11604 }
11605 } else { // flip or prefetch not support
11606 mode_lib->mp.ImmediateFlipSupported = false;
11607 }
11608
11609 // consider flip support is okay if the flip bw is ok or (when user does't require a iflip and there is no host vm)
11610 must_support_iflip = display_cfg->hostvm_enable || s->immediate_flip_required;
11611 mode_lib->mp.PrefetchAndImmediateFlipSupported = (mode_lib->mp.PrefetchModeSupported == true && (!must_support_iflip || mode_lib->mp.ImmediateFlipSupported));
11612
11613 #ifdef __DML_VBA_DEBUG__
11614 dml2_printf("DML::%s: PrefetchModeSupported = %u\n", __func__, mode_lib->mp.PrefetchModeSupported);
11615 for (k = 0; k < s->num_active_planes; ++k)
11616 dml2_printf("DML::%s: immediate_flip_required[%u] = %u\n", __func__, k, display_cfg->plane_descriptors[k].immediate_flip);
11617 dml2_printf("DML::%s: HostVMEnable = %u\n", __func__, display_cfg->hostvm_enable);
11618 dml2_printf("DML::%s: ImmediateFlipSupported = %u\n", __func__, mode_lib->mp.ImmediateFlipSupported);
11619 dml2_printf("DML::%s: PrefetchAndImmediateFlipSupported = %u\n", __func__, mode_lib->mp.PrefetchAndImmediateFlipSupported);
11620 #endif
11621 dml2_printf("DML::%s: Done one iteration: k=%d, MaxVStartupLines=%u\n", __func__, k, s->MaxVStartupLines[k]);
11622 }
11623
11624 for (k = 0; k < s->num_active_planes; ++k)
11625 dml2_printf("DML::%s: k=%d MaxVStartupLines = %u\n", __func__, k, s->MaxVStartupLines[k]);
11626
11627 if (!mode_lib->mp.PrefetchAndImmediateFlipSupported) {
11628 dml2_printf("DML::%s: Bad, Prefetch and flip scheduling solution NOT found!\n", __func__);
11629 } else {
11630 dml2_printf("DML::%s: Good, Prefetch and flip scheduling solution found\n", __func__);
11631
11632 // DCC Configuration
11633 for (k = 0; k < s->num_active_planes; ++k) {
11634 #ifdef __DML_VBA_DEBUG__
11635 dml2_printf("DML::%s: Calculate DCC configuration for surface k=%u\n", __func__, k);
11636 #endif
11637 CalculateDCCConfiguration(
11638 display_cfg->plane_descriptors[k].surface.dcc.enable,
11639 display_cfg->overrides.dcc_programming_assumes_scan_direction_unknown,
11640 display_cfg->plane_descriptors[k].pixel_format,
11641 display_cfg->plane_descriptors[k].surface.plane0.width,
11642 display_cfg->plane_descriptors[k].surface.plane1.width,
11643 display_cfg->plane_descriptors[k].surface.plane0.height,
11644 display_cfg->plane_descriptors[k].surface.plane1.height,
11645 s->NomDETInKByte,
11646 mode_lib->mp.Read256BlockHeightY[k],
11647 mode_lib->mp.Read256BlockHeightC[k],
11648 display_cfg->plane_descriptors[k].surface.tiling,
11649 mode_lib->mp.BytePerPixelY[k],
11650 mode_lib->mp.BytePerPixelC[k],
11651 mode_lib->mp.BytePerPixelInDETY[k],
11652 mode_lib->mp.BytePerPixelInDETC[k],
11653 display_cfg->plane_descriptors[k].composition.rotation_angle,
11654
11655 /* Output */
11656 &mode_lib->mp.RequestLuma[k],
11657 &mode_lib->mp.RequestChroma[k],
11658 &mode_lib->mp.DCCYMaxUncompressedBlock[k],
11659 &mode_lib->mp.DCCCMaxUncompressedBlock[k],
11660 &mode_lib->mp.DCCYMaxCompressedBlock[k],
11661 &mode_lib->mp.DCCCMaxCompressedBlock[k],
11662 &mode_lib->mp.DCCYIndependentBlock[k],
11663 &mode_lib->mp.DCCCIndependentBlock[k]);
11664 }
11665
11666 //Watermarks and NB P-State/DRAM Clock Change Support
11667 s->mmSOCParameters.UrgentLatency = mode_lib->mp.UrgentLatency;
11668 s->mmSOCParameters.ExtraLatency = mode_lib->mp.ExtraLatency;
11669 s->mmSOCParameters.ExtraLatency_sr = mode_lib->mp.ExtraLatency_sr;
11670 s->mmSOCParameters.WritebackLatency = mode_lib->soc.qos_parameters.writeback.base_latency_us;
11671 s->mmSOCParameters.DRAMClockChangeLatency = mode_lib->soc.power_management_parameters.dram_clk_change_blackout_us;
11672 s->mmSOCParameters.FCLKChangeLatency = mode_lib->soc.power_management_parameters.fclk_change_blackout_us;
11673 s->mmSOCParameters.SRExitTime = mode_lib->soc.power_management_parameters.stutter_exit_latency_us;
11674 s->mmSOCParameters.SREnterPlusExitTime = mode_lib->soc.power_management_parameters.stutter_enter_plus_exit_latency_us;
11675 s->mmSOCParameters.SRExitZ8Time = mode_lib->soc.power_management_parameters.z8_stutter_exit_latency_us;
11676 s->mmSOCParameters.SREnterPlusExitZ8Time = mode_lib->soc.power_management_parameters.z8_stutter_enter_plus_exit_latency_us;
11677 s->mmSOCParameters.USRRetrainingLatency = 0;
11678 s->mmSOCParameters.SMNLatency = 0;
11679 s->mmSOCParameters.g6_temp_read_blackout_us = get_g6_temp_read_blackout_us(&mode_lib->soc, (unsigned int)(mode_lib->mp.uclk_freq_mhz * 1000), in_out_params->min_clk_index);
11680 s->mmSOCParameters.max_urgent_latency_us = get_max_urgent_latency_us(&mode_lib->soc.qos_parameters.qos_params.dcn4x, mode_lib->mp.uclk_freq_mhz, mode_lib->mp.FabricClock, in_out_params->min_clk_index);
11681 s->mmSOCParameters.df_response_time_us = mode_lib->soc.qos_parameters.qos_params.dcn4x.df_qos_response_time_fclk_cycles / mode_lib->mp.FabricClock;
11682 s->mmSOCParameters.qos_type = mode_lib->soc.qos_parameters.qos_type;
11683
11684 CalculateWatermarks_params->display_cfg = display_cfg;
11685 CalculateWatermarks_params->USRRetrainingRequired = false;
11686 CalculateWatermarks_params->NumberOfActiveSurfaces = s->num_active_planes;
11687 CalculateWatermarks_params->MaxLineBufferLines = mode_lib->ip.max_line_buffer_lines;
11688 CalculateWatermarks_params->LineBufferSize = mode_lib->ip.line_buffer_size_bits;
11689 CalculateWatermarks_params->WritebackInterfaceBufferSize = mode_lib->ip.writeback_interface_buffer_size_kbytes;
11690 CalculateWatermarks_params->DCFCLK = mode_lib->mp.Dcfclk;
11691 CalculateWatermarks_params->SynchronizeTimings = display_cfg->overrides.synchronize_timings;
11692 CalculateWatermarks_params->SynchronizeDRRDisplaysForUCLKPStateChange = display_cfg->overrides.synchronize_ddr_displays_for_uclk_pstate_change;
11693 CalculateWatermarks_params->dpte_group_bytes = mode_lib->mp.dpte_group_bytes;
11694 CalculateWatermarks_params->mmSOCParameters = s->mmSOCParameters;
11695 CalculateWatermarks_params->WritebackChunkSize = mode_lib->ip.writeback_chunk_size_kbytes;
11696 CalculateWatermarks_params->SOCCLK = s->SOCCLK;
11697 CalculateWatermarks_params->DCFClkDeepSleep = mode_lib->mp.DCFCLKDeepSleep;
11698 CalculateWatermarks_params->DETBufferSizeY = mode_lib->mp.DETBufferSizeY;
11699 CalculateWatermarks_params->DETBufferSizeC = mode_lib->mp.DETBufferSizeC;
11700 CalculateWatermarks_params->SwathHeightY = mode_lib->mp.SwathHeightY;
11701 CalculateWatermarks_params->SwathHeightC = mode_lib->mp.SwathHeightC;
11702 CalculateWatermarks_params->SwathWidthY = mode_lib->mp.SwathWidthY;
11703 CalculateWatermarks_params->SwathWidthC = mode_lib->mp.SwathWidthC;
11704 CalculateWatermarks_params->BytePerPixelDETY = mode_lib->mp.BytePerPixelInDETY;
11705 CalculateWatermarks_params->BytePerPixelDETC = mode_lib->mp.BytePerPixelInDETC;
11706 CalculateWatermarks_params->DSTXAfterScaler = mode_lib->mp.DSTXAfterScaler;
11707 CalculateWatermarks_params->DSTYAfterScaler = mode_lib->mp.DSTYAfterScaler;
11708 CalculateWatermarks_params->UnboundedRequestEnabled = mode_lib->mp.UnboundedRequestEnabled;
11709 CalculateWatermarks_params->CompressedBufferSizeInkByte = mode_lib->mp.CompressedBufferSizeInkByte;
11710 CalculateWatermarks_params->meta_row_height_l = mode_lib->mp.meta_row_height;
11711 CalculateWatermarks_params->meta_row_height_c = mode_lib->mp.meta_row_height_chroma;
11712 CalculateWatermarks_params->DPPPerSurface = mode_lib->mp.NoOfDPP;
11713
11714 // Output
11715 CalculateWatermarks_params->Watermark = &mode_lib->mp.Watermark;
11716 CalculateWatermarks_params->DRAMClockChangeSupport = mode_lib->mp.DRAMClockChangeSupport;
11717 CalculateWatermarks_params->global_dram_clock_change_supported = &mode_lib->mp.global_dram_clock_change_supported;
11718 CalculateWatermarks_params->MaxActiveDRAMClockChangeLatencySupported = mode_lib->mp.MaxActiveDRAMClockChangeLatencySupported;
11719 CalculateWatermarks_params->SubViewportLinesNeededInMALL = mode_lib->mp.SubViewportLinesNeededInMALL;
11720 CalculateWatermarks_params->FCLKChangeSupport = mode_lib->mp.FCLKChangeSupport;
11721 CalculateWatermarks_params->global_fclk_change_supported = &mode_lib->mp.global_fclk_change_supported;
11722 CalculateWatermarks_params->MaxActiveFCLKChangeLatencySupported = &mode_lib->mp.MaxActiveFCLKChangeLatencySupported;
11723 CalculateWatermarks_params->USRRetrainingSupport = &mode_lib->mp.USRRetrainingSupport;
11724 CalculateWatermarks_params->g6_temp_read_support = &mode_lib->mp.g6_temp_read_support;
11725 CalculateWatermarks_params->VActiveLatencyHidingMargin = 0;
11726 CalculateWatermarks_params->VActiveLatencyHidingUs = 0;
11727
11728 CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(&mode_lib->scratch, CalculateWatermarks_params);
11729
11730 for (k = 0; k < s->num_active_planes; ++k) {
11731 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0) {
11732 mode_lib->mp.WritebackAllowDRAMClockChangeEndPosition[k] = math_max2(0, mode_lib->mp.VStartupMin[k] * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total /
11733 ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000) - mode_lib->mp.Watermark.WritebackDRAMClockChangeWatermark);
11734 mode_lib->mp.WritebackAllowFCLKChangeEndPosition[k] = math_max2(0, mode_lib->mp.VStartupMin[k] * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total /
11735 ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000) - mode_lib->mp.Watermark.WritebackFCLKChangeWatermark);
11736 } else {
11737 mode_lib->mp.WritebackAllowDRAMClockChangeEndPosition[k] = 0;
11738 mode_lib->mp.WritebackAllowFCLKChangeEndPosition[k] = 0;
11739 }
11740 }
11741
11742 calculate_pstate_keepout_dst_lines(display_cfg, &mode_lib->mp.Watermark, mode_lib->mp.pstate_keepout_dst_lines);
11743
11744 dml2_printf("DML::%s: DEBUG stream_index = %0d\n", __func__, display_cfg->plane_descriptors[0].stream_index);
11745 dml2_printf("DML::%s: DEBUG PixelClock = %d kHz\n", __func__, (display_cfg->stream_descriptors[display_cfg->plane_descriptors[0].stream_index].timing.pixel_clock_khz));
11746
11747 //Display Pipeline Delivery Time in Prefetch, Groups
11748 CalculatePixelDeliveryTimes(
11749 display_cfg,
11750 cfg_support_info,
11751 s->num_active_planes,
11752 mode_lib->mp.VRatioPrefetchY,
11753 mode_lib->mp.VRatioPrefetchC,
11754 mode_lib->mp.swath_width_luma_ub,
11755 mode_lib->mp.swath_width_chroma_ub,
11756 mode_lib->mp.PSCL_THROUGHPUT,
11757 mode_lib->mp.PSCL_THROUGHPUT_CHROMA,
11758 mode_lib->mp.Dppclk,
11759 mode_lib->mp.BytePerPixelC,
11760 mode_lib->mp.req_per_swath_ub_l,
11761 mode_lib->mp.req_per_swath_ub_c,
11762
11763 /* Output */
11764 mode_lib->mp.DisplayPipeLineDeliveryTimeLuma,
11765 mode_lib->mp.DisplayPipeLineDeliveryTimeChroma,
11766 mode_lib->mp.DisplayPipeLineDeliveryTimeLumaPrefetch,
11767 mode_lib->mp.DisplayPipeLineDeliveryTimeChromaPrefetch,
11768 mode_lib->mp.DisplayPipeRequestDeliveryTimeLuma,
11769 mode_lib->mp.DisplayPipeRequestDeliveryTimeChroma,
11770 mode_lib->mp.DisplayPipeRequestDeliveryTimeLumaPrefetch,
11771 mode_lib->mp.DisplayPipeRequestDeliveryTimeChromaPrefetch);
11772
11773 CalculateMetaAndPTETimes_params->scratch = &mode_lib->scratch;
11774 CalculateMetaAndPTETimes_params->display_cfg = display_cfg;
11775 CalculateMetaAndPTETimes_params->NumberOfActiveSurfaces = s->num_active_planes;
11776 CalculateMetaAndPTETimes_params->use_one_row_for_frame = mode_lib->mp.use_one_row_for_frame;
11777 CalculateMetaAndPTETimes_params->dst_y_per_row_vblank = mode_lib->mp.dst_y_per_row_vblank;
11778 CalculateMetaAndPTETimes_params->dst_y_per_row_flip = mode_lib->mp.dst_y_per_row_flip;
11779 CalculateMetaAndPTETimes_params->BytePerPixelY = mode_lib->mp.BytePerPixelY;
11780 CalculateMetaAndPTETimes_params->BytePerPixelC = mode_lib->mp.BytePerPixelC;
11781 CalculateMetaAndPTETimes_params->dpte_row_height = mode_lib->mp.dpte_row_height;
11782 CalculateMetaAndPTETimes_params->dpte_row_height_chroma = mode_lib->mp.dpte_row_height_chroma;
11783 CalculateMetaAndPTETimes_params->dpte_group_bytes = mode_lib->mp.dpte_group_bytes;
11784 CalculateMetaAndPTETimes_params->PTERequestSizeY = mode_lib->mp.PTERequestSizeY;
11785 CalculateMetaAndPTETimes_params->PTERequestSizeC = mode_lib->mp.PTERequestSizeC;
11786 CalculateMetaAndPTETimes_params->PixelPTEReqWidthY = mode_lib->mp.PixelPTEReqWidthY;
11787 CalculateMetaAndPTETimes_params->PixelPTEReqHeightY = mode_lib->mp.PixelPTEReqHeightY;
11788 CalculateMetaAndPTETimes_params->PixelPTEReqWidthC = mode_lib->mp.PixelPTEReqWidthC;
11789 CalculateMetaAndPTETimes_params->PixelPTEReqHeightC = mode_lib->mp.PixelPTEReqHeightC;
11790 CalculateMetaAndPTETimes_params->dpte_row_width_luma_ub = mode_lib->mp.dpte_row_width_luma_ub;
11791 CalculateMetaAndPTETimes_params->dpte_row_width_chroma_ub = mode_lib->mp.dpte_row_width_chroma_ub;
11792 CalculateMetaAndPTETimes_params->tdlut_groups_per_2row_ub = s->tdlut_groups_per_2row_ub;
11793 CalculateMetaAndPTETimes_params->mrq_present = mode_lib->ip.dcn_mrq_present;
11794
11795 CalculateMetaAndPTETimes_params->MetaChunkSize = mode_lib->ip.meta_chunk_size_kbytes;
11796 CalculateMetaAndPTETimes_params->MinMetaChunkSizeBytes = mode_lib->ip.min_meta_chunk_size_bytes;
11797 CalculateMetaAndPTETimes_params->meta_row_width = mode_lib->mp.meta_row_width;
11798 CalculateMetaAndPTETimes_params->meta_row_width_chroma = mode_lib->mp.meta_row_width_chroma;
11799 CalculateMetaAndPTETimes_params->meta_row_height = mode_lib->mp.meta_row_height;
11800 CalculateMetaAndPTETimes_params->meta_row_height_chroma = mode_lib->mp.meta_row_height_chroma;
11801 CalculateMetaAndPTETimes_params->meta_req_width = mode_lib->mp.meta_req_width;
11802 CalculateMetaAndPTETimes_params->meta_req_width_chroma = mode_lib->mp.meta_req_width_chroma;
11803 CalculateMetaAndPTETimes_params->meta_req_height = mode_lib->mp.meta_req_height;
11804 CalculateMetaAndPTETimes_params->meta_req_height_chroma = mode_lib->mp.meta_req_height_chroma;
11805
11806 CalculateMetaAndPTETimes_params->time_per_tdlut_group = mode_lib->mp.time_per_tdlut_group;
11807 CalculateMetaAndPTETimes_params->DST_Y_PER_PTE_ROW_NOM_L = mode_lib->mp.DST_Y_PER_PTE_ROW_NOM_L;
11808 CalculateMetaAndPTETimes_params->DST_Y_PER_PTE_ROW_NOM_C = mode_lib->mp.DST_Y_PER_PTE_ROW_NOM_C;
11809 CalculateMetaAndPTETimes_params->time_per_pte_group_nom_luma = mode_lib->mp.time_per_pte_group_nom_luma;
11810 CalculateMetaAndPTETimes_params->time_per_pte_group_vblank_luma = mode_lib->mp.time_per_pte_group_vblank_luma;
11811 CalculateMetaAndPTETimes_params->time_per_pte_group_flip_luma = mode_lib->mp.time_per_pte_group_flip_luma;
11812 CalculateMetaAndPTETimes_params->time_per_pte_group_nom_chroma = mode_lib->mp.time_per_pte_group_nom_chroma;
11813 CalculateMetaAndPTETimes_params->time_per_pte_group_vblank_chroma = mode_lib->mp.time_per_pte_group_vblank_chroma;
11814 CalculateMetaAndPTETimes_params->time_per_pte_group_flip_chroma = mode_lib->mp.time_per_pte_group_flip_chroma;
11815 CalculateMetaAndPTETimes_params->DST_Y_PER_META_ROW_NOM_L = mode_lib->mp.DST_Y_PER_META_ROW_NOM_L;
11816 CalculateMetaAndPTETimes_params->DST_Y_PER_META_ROW_NOM_C = mode_lib->mp.DST_Y_PER_META_ROW_NOM_C;
11817 CalculateMetaAndPTETimes_params->TimePerMetaChunkNominal = mode_lib->mp.TimePerMetaChunkNominal;
11818 CalculateMetaAndPTETimes_params->TimePerChromaMetaChunkNominal = mode_lib->mp.TimePerChromaMetaChunkNominal;
11819 CalculateMetaAndPTETimes_params->TimePerMetaChunkVBlank = mode_lib->mp.TimePerMetaChunkVBlank;
11820 CalculateMetaAndPTETimes_params->TimePerChromaMetaChunkVBlank = mode_lib->mp.TimePerChromaMetaChunkVBlank;
11821 CalculateMetaAndPTETimes_params->TimePerMetaChunkFlip = mode_lib->mp.TimePerMetaChunkFlip;
11822 CalculateMetaAndPTETimes_params->TimePerChromaMetaChunkFlip = mode_lib->mp.TimePerChromaMetaChunkFlip;
11823
11824 CalculateMetaAndPTETimes(CalculateMetaAndPTETimes_params);
11825
11826 CalculateVMGroupAndRequestTimes(
11827 display_cfg,
11828 s->num_active_planes,
11829 mode_lib->mp.BytePerPixelC,
11830 mode_lib->mp.dst_y_per_vm_vblank,
11831 mode_lib->mp.dst_y_per_vm_flip,
11832 mode_lib->mp.dpte_row_width_luma_ub,
11833 mode_lib->mp.dpte_row_width_chroma_ub,
11834 mode_lib->mp.vm_group_bytes,
11835 mode_lib->mp.dpde0_bytes_per_frame_ub_l,
11836 mode_lib->mp.dpde0_bytes_per_frame_ub_c,
11837 s->tdlut_pte_bytes_per_frame,
11838 mode_lib->mp.meta_pte_bytes_per_frame_ub_l,
11839 mode_lib->mp.meta_pte_bytes_per_frame_ub_c,
11840 mode_lib->ip.dcn_mrq_present,
11841
11842 /* Output */
11843 mode_lib->mp.TimePerVMGroupVBlank,
11844 mode_lib->mp.TimePerVMGroupFlip,
11845 mode_lib->mp.TimePerVMRequestVBlank,
11846 mode_lib->mp.TimePerVMRequestFlip);
11847
11848 // VStartup Adjustment
11849 for (k = 0; k < s->num_active_planes; ++k) {
11850 bool isInterlaceTiming;
11851
11852 mode_lib->mp.MinTTUVBlank[k] = mode_lib->mp.TWait[k] + mode_lib->mp.ExtraLatency;
11853 if (!display_cfg->plane_descriptors[k].dynamic_meta_data.enable)
11854 mode_lib->mp.MinTTUVBlank[k] = mode_lib->mp.TCalc + mode_lib->mp.MinTTUVBlank[k];
11855
11856 #ifdef __DML_VBA_DEBUG__
11857 dml2_printf("DML::%s: k=%u, MinTTUVBlank = %f (before vstartup margin)\n", __func__, k, mode_lib->mp.MinTTUVBlank[k]);
11858 #endif
11859 s->Tvstartup_margin = (s->MaxVStartupLines[k] - mode_lib->mp.VStartupMin[k]) * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
11860 mode_lib->mp.MinTTUVBlank[k] = mode_lib->mp.MinTTUVBlank[k] + s->Tvstartup_margin;
11861
11862 #ifdef __DML_VBA_DEBUG__
11863 dml2_printf("DML::%s: k=%u, Tvstartup_margin = %f\n", __func__, k, s->Tvstartup_margin);
11864 dml2_printf("DML::%s: k=%u, MaxVStartupLines = %u\n", __func__, k, s->MaxVStartupLines[k]);
11865 dml2_printf("DML::%s: k=%u, MinTTUVBlank = %f\n", __func__, k, mode_lib->mp.MinTTUVBlank[k]);
11866 #endif
11867
11868 mode_lib->mp.Tdmdl[k] = mode_lib->mp.Tdmdl[k] + s->Tvstartup_margin;
11869 if (display_cfg->plane_descriptors[k].dynamic_meta_data.enable && mode_lib->ip.dynamic_metadata_vm_enabled) {
11870 mode_lib->mp.Tdmdl_vm[k] = mode_lib->mp.Tdmdl_vm[k] + s->Tvstartup_margin;
11871 }
11872
11873 isInterlaceTiming = (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.interlaced && !mode_lib->ip.ptoi_supported);
11874
11875 // The actual positioning of the vstartup
11876 mode_lib->mp.VStartup[k] = (isInterlaceTiming ? (2 * s->MaxVStartupLines[k]) : s->MaxVStartupLines[k]);
11877
11878 s->dlg_vblank_start = ((isInterlaceTiming ? math_floor2((display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_total - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_front_porch) / 2.0, 1.0) :
11879 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_total) - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_front_porch);
11880 s->LSetup = math_floor2(4.0 * mode_lib->mp.TSetup[k] / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)), 1.0) / 4.0;
11881 s->blank_lines_remaining = (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_total - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_active) - mode_lib->mp.VStartup[k];
11882
11883 if (s->blank_lines_remaining < 0) {
11884 dml2_printf("ERROR: Vstartup is larger than vblank!?\n");
11885 s->blank_lines_remaining = 0;
11886 DML2_ASSERT(0);
11887 }
11888 mode_lib->mp.MIN_DST_Y_NEXT_START[k] = s->dlg_vblank_start + s->blank_lines_remaining + s->LSetup;
11889
11890 // debug only
11891 if (((mode_lib->mp.VUpdateOffsetPix[k] + mode_lib->mp.VUpdateWidthPix[k] + mode_lib->mp.VReadyOffsetPix[k]) / (double) display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total) <=
11892 (isInterlaceTiming ?
11893 math_floor2((display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_total - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_active - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_front_porch - mode_lib->mp.VStartup[k]) / 2.0, 1.0) :
11894 (int)(display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_total - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_active - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_front_porch - mode_lib->mp.VStartup[k]))) {
11895 mode_lib->mp.VREADY_AT_OR_AFTER_VSYNC[k] = true;
11896 } else {
11897 mode_lib->mp.VREADY_AT_OR_AFTER_VSYNC[k] = false;
11898 }
11899 #ifdef __DML_VBA_DEBUG__
11900 dml2_printf("DML::%s: k=%u, VStartup = %u (max)\n", __func__, k, mode_lib->mp.VStartup[k]);
11901 dml2_printf("DML::%s: k=%u, VStartupMin = %u (max)\n", __func__, k, mode_lib->mp.VStartupMin[k]);
11902 dml2_printf("DML::%s: k=%u, VUpdateOffsetPix = %u\n", __func__, k, mode_lib->mp.VUpdateOffsetPix[k]);
11903 dml2_printf("DML::%s: k=%u, VUpdateWidthPix = %u\n", __func__, k, mode_lib->mp.VUpdateWidthPix[k]);
11904 dml2_printf("DML::%s: k=%u, VReadyOffsetPix = %u\n", __func__, k, mode_lib->mp.VReadyOffsetPix[k]);
11905 dml2_printf("DML::%s: k=%u, HTotal = %u\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total);
11906 dml2_printf("DML::%s: k=%u, VTotal = %u\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_total);
11907 dml2_printf("DML::%s: k=%u, VActive = %u\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_active);
11908 dml2_printf("DML::%s: k=%u, VFrontPorch = %u\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_front_porch);
11909 dml2_printf("DML::%s: k=%u, TSetup = %f\n", __func__, k, mode_lib->mp.TSetup[k]);
11910 dml2_printf("DML::%s: k=%u, MIN_DST_Y_NEXT_START = %f\n", __func__, k, mode_lib->mp.MIN_DST_Y_NEXT_START[k]);
11911 dml2_printf("DML::%s: k=%u, VREADY_AT_OR_AFTER_VSYNC = %u\n", __func__, k, mode_lib->mp.VREADY_AT_OR_AFTER_VSYNC[k]);
11912 #endif
11913 }
11914
11915 //Maximum Bandwidth Used
11916 s->TotalWRBandwidth = 0;
11917 for (k = 0; k < display_cfg->num_streams; ++k) {
11918 s->WRBandwidth = 0;
11919 if (display_cfg->stream_descriptors[k].writeback.active_writebacks_per_stream > 0) {
11920 s->WRBandwidth = display_cfg->stream_descriptors[k].writeback.writeback_stream[0].output_height
11921 * display_cfg->stream_descriptors[k].writeback.writeback_stream[0].output_width /
11922 (display_cfg->stream_descriptors[k].timing.h_total * display_cfg->stream_descriptors[k].writeback.writeback_stream[0].input_height
11923 / ((double)display_cfg->stream_descriptors[k].timing.pixel_clock_khz / 1000))
11924 * (display_cfg->stream_descriptors[k].writeback.writeback_stream[0].pixel_format == dml2_444_32 ? 4.0 : 8.0);
11925 s->TotalWRBandwidth = s->TotalWRBandwidth + s->WRBandwidth;
11926 }
11927 }
11928
11929 mode_lib->mp.TotalDataReadBandwidth = 0;
11930 for (k = 0; k < s->num_active_planes; ++k) {
11931 mode_lib->mp.TotalDataReadBandwidth = mode_lib->mp.TotalDataReadBandwidth + mode_lib->mp.vactive_sw_bw_l[k] + mode_lib->mp.vactive_sw_bw_c[k];
11932 #ifdef __DML_VBA_DEBUG__
11933 dml2_printf("DML::%s: k=%u, TotalDataReadBandwidth = %f\n", __func__, k, mode_lib->mp.TotalDataReadBandwidth);
11934 dml2_printf("DML::%s: k=%u, vactive_sw_bw_l = %f\n", __func__, k, mode_lib->mp.vactive_sw_bw_l[k]);
11935 dml2_printf("DML::%s: k=%u, vactive_sw_bw_c = %f\n", __func__, k, mode_lib->mp.vactive_sw_bw_c[k]);
11936 #endif
11937 }
11938
11939 CalculateStutterEfficiency_params->display_cfg = display_cfg;
11940 CalculateStutterEfficiency_params->CompressedBufferSizeInkByte = mode_lib->mp.CompressedBufferSizeInkByte;
11941 CalculateStutterEfficiency_params->UnboundedRequestEnabled = mode_lib->mp.UnboundedRequestEnabled;
11942 CalculateStutterEfficiency_params->MetaFIFOSizeInKEntries = mode_lib->ip.meta_fifo_size_in_kentries;
11943 CalculateStutterEfficiency_params->ZeroSizeBufferEntries = mode_lib->ip.zero_size_buffer_entries;
11944 CalculateStutterEfficiency_params->PixelChunkSizeInKByte = mode_lib->ip.pixel_chunk_size_kbytes;
11945 CalculateStutterEfficiency_params->NumberOfActiveSurfaces = s->num_active_planes;
11946 CalculateStutterEfficiency_params->ROBBufferSizeInKByte = mode_lib->ip.rob_buffer_size_kbytes;
11947 CalculateStutterEfficiency_params->TotalDataReadBandwidth = mode_lib->mp.TotalDataReadBandwidth;
11948 CalculateStutterEfficiency_params->DCFCLK = mode_lib->mp.Dcfclk;
11949 CalculateStutterEfficiency_params->ReturnBW = mode_lib->mp.urg_bandwidth_available_min[dml2_core_internal_soc_state_sys_active];
11950 CalculateStutterEfficiency_params->CompbufReservedSpace64B = mode_lib->mp.compbuf_reserved_space_64b;
11951 CalculateStutterEfficiency_params->CompbufReservedSpaceZs = mode_lib->ip.compbuf_reserved_space_zs;
11952 CalculateStutterEfficiency_params->SRExitTime = mode_lib->soc.power_management_parameters.stutter_exit_latency_us;
11953 CalculateStutterEfficiency_params->SRExitZ8Time = mode_lib->soc.power_management_parameters.z8_stutter_exit_latency_us;
11954 CalculateStutterEfficiency_params->SynchronizeTimings = display_cfg->overrides.synchronize_timings;
11955 CalculateStutterEfficiency_params->StutterEnterPlusExitWatermark = mode_lib->mp.Watermark.StutterEnterPlusExitWatermark;
11956 CalculateStutterEfficiency_params->Z8StutterEnterPlusExitWatermark = mode_lib->mp.Watermark.Z8StutterEnterPlusExitWatermark;
11957 CalculateStutterEfficiency_params->ProgressiveToInterlaceUnitInOPP = mode_lib->ip.ptoi_supported;
11958 CalculateStutterEfficiency_params->MinTTUVBlank = mode_lib->mp.MinTTUVBlank;
11959 CalculateStutterEfficiency_params->DPPPerSurface = mode_lib->mp.NoOfDPP;
11960 CalculateStutterEfficiency_params->DETBufferSizeY = mode_lib->mp.DETBufferSizeY;
11961 CalculateStutterEfficiency_params->BytePerPixelY = mode_lib->mp.BytePerPixelY;
11962 CalculateStutterEfficiency_params->BytePerPixelDETY = mode_lib->mp.BytePerPixelInDETY;
11963 CalculateStutterEfficiency_params->SwathWidthY = mode_lib->mp.SwathWidthY;
11964 CalculateStutterEfficiency_params->SwathHeightY = mode_lib->mp.SwathHeightY;
11965 CalculateStutterEfficiency_params->SwathHeightC = mode_lib->mp.SwathHeightC;
11966 CalculateStutterEfficiency_params->BlockHeight256BytesY = mode_lib->mp.Read256BlockHeightY;
11967 CalculateStutterEfficiency_params->BlockWidth256BytesY = mode_lib->mp.Read256BlockWidthY;
11968 CalculateStutterEfficiency_params->BlockHeight256BytesC = mode_lib->mp.Read256BlockHeightC;
11969 CalculateStutterEfficiency_params->BlockWidth256BytesC = mode_lib->mp.Read256BlockWidthC;
11970 CalculateStutterEfficiency_params->DCCYMaxUncompressedBlock = mode_lib->mp.DCCYMaxUncompressedBlock;
11971 CalculateStutterEfficiency_params->DCCCMaxUncompressedBlock = mode_lib->mp.DCCCMaxUncompressedBlock;
11972 CalculateStutterEfficiency_params->ReadBandwidthSurfaceLuma = mode_lib->mp.vactive_sw_bw_l;
11973 CalculateStutterEfficiency_params->ReadBandwidthSurfaceChroma = mode_lib->mp.vactive_sw_bw_c;
11974 CalculateStutterEfficiency_params->dpte_row_bw = mode_lib->mp.dpte_row_bw;
11975 CalculateStutterEfficiency_params->meta_row_bw = mode_lib->mp.meta_row_bw;
11976 CalculateStutterEfficiency_params->rob_alloc_compressed = mode_lib->ip.dcn_mrq_present;
11977
11978 // output
11979 CalculateStutterEfficiency_params->StutterEfficiencyNotIncludingVBlank = &mode_lib->mp.StutterEfficiencyNotIncludingVBlank;
11980 CalculateStutterEfficiency_params->StutterEfficiency = &mode_lib->mp.StutterEfficiency;
11981 CalculateStutterEfficiency_params->NumberOfStutterBurstsPerFrame = &mode_lib->mp.NumberOfStutterBurstsPerFrame;
11982 CalculateStutterEfficiency_params->Z8StutterEfficiencyNotIncludingVBlank = &mode_lib->mp.Z8StutterEfficiencyNotIncludingVBlank;
11983 CalculateStutterEfficiency_params->Z8StutterEfficiency = &mode_lib->mp.Z8StutterEfficiency;
11984 CalculateStutterEfficiency_params->Z8NumberOfStutterBurstsPerFrame = &mode_lib->mp.Z8NumberOfStutterBurstsPerFrame;
11985 CalculateStutterEfficiency_params->StutterPeriod = &mode_lib->mp.StutterPeriod;
11986 CalculateStutterEfficiency_params->DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = &mode_lib->mp.DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE;
11987
11988 // Stutter Efficiency
11989 CalculateStutterEfficiency(&mode_lib->scratch, CalculateStutterEfficiency_params);
11990
11991 #ifdef __DML_VBA_ALLOW_DELTA__
11992 // Calculate z8 stutter eff assuming 0 reserved space
11993 CalculateStutterEfficiency_params->CompbufReservedSpace64B = 0;
11994 CalculateStutterEfficiency_params->CompbufReservedSpaceZs = 0;
11995
11996 CalculateStutterEfficiency_params->Z8StutterEfficiencyNotIncludingVBlank = &mode_lib->mp.Z8StutterEfficiencyNotIncludingVBlankBestCase;
11997 CalculateStutterEfficiency_params->Z8StutterEfficiency = &mode_lib->mp.Z8StutterEfficiencyBestCase;
11998 CalculateStutterEfficiency_params->Z8NumberOfStutterBurstsPerFrame = &mode_lib->mp.Z8NumberOfStutterBurstsPerFrameBestCase;
11999 CalculateStutterEfficiency_params->StutterPeriod = &mode_lib->mp.StutterPeriodBestCase;
12000
12001 // Stutter Efficiency
12002 CalculateStutterEfficiency(&mode_lib->scratch, CalculateStutterEfficiency_params);
12003 #else
12004 mode_lib->mp.Z8StutterEfficiencyNotIncludingVBlankBestCase = mode_lib->mp.Z8StutterEfficiencyNotIncludingVBlank;
12005 mode_lib->mp.Z8StutterEfficiencyBestCase = mode_lib->mp.Z8StutterEfficiency;
12006 mode_lib->mp.Z8NumberOfStutterBurstsPerFrameBestCase = mode_lib->mp.Z8NumberOfStutterBurstsPerFrame;
12007 mode_lib->mp.StutterPeriodBestCase = mode_lib->mp.StutterPeriod;
12008 #endif
12009 } // PrefetchAndImmediateFlipSupported
12010
12011 max_uclk_mhz = mode_lib->soc.clk_table.uclk.clk_values_khz[mode_lib->soc.clk_table.uclk.num_clk_values - 1] / 1000.0;
12012 min_return_latency_in_DCFCLK_cycles = (min_return_uclk_cycles / max_uclk_mhz + min_return_fclk_cycles / max_fclk_mhz) * hard_minimum_dcfclk_mhz;
12013 mode_lib->mp.min_return_latency_in_dcfclk = (unsigned int)min_return_latency_in_DCFCLK_cycles;
12014 mode_lib->mp.dcfclk_deep_sleep_hysteresis = (unsigned int)math_max2(32, (double)mode_lib->ip.pixel_chunk_size_kbytes * 1024 * 3 / 4 / 64 - min_return_latency_in_DCFCLK_cycles);
12015 DML2_ASSERT(mode_lib->mp.dcfclk_deep_sleep_hysteresis < 256);
12016
12017 #ifdef __DML_VBA_DEBUG__
12018 dml2_printf("DML::%s: max_fclk_mhz = %f\n", __func__, max_fclk_mhz);
12019 dml2_printf("DML::%s: max_uclk_mhz = %f\n", __func__, max_uclk_mhz);
12020 dml2_printf("DML::%s: hard_minimum_dcfclk_mhz = %f\n", __func__, hard_minimum_dcfclk_mhz);
12021 dml2_printf("DML::%s: min_return_uclk_cycles = %d\n", __func__, min_return_uclk_cycles);
12022 dml2_printf("DML::%s: min_return_fclk_cycles = %d\n", __func__, min_return_fclk_cycles);
12023 dml2_printf("DML::%s: min_return_latency_in_DCFCLK_cycles = %f\n", __func__, min_return_latency_in_DCFCLK_cycles);
12024 dml2_printf("DML::%s: dcfclk_deep_sleep_hysteresis = %d \n", __func__, mode_lib->mp.dcfclk_deep_sleep_hysteresis);
12025 dml2_printf("DML::%s: --- END --- \n", __func__);
12026 #endif
12027 return (in_out_params->mode_lib->mp.PrefetchAndImmediateFlipSupported);
12028 }
12029
dml2_core_calcs_mode_programming_ex(struct dml2_core_calcs_mode_programming_ex * in_out_params)12030 bool dml2_core_calcs_mode_programming_ex(struct dml2_core_calcs_mode_programming_ex *in_out_params)
12031 {
12032 dml2_printf("DML::%s: ------------- START ----------\n", __func__);
12033 bool result = dml_core_mode_programming(in_out_params);
12034
12035 dml2_printf("DML::%s: result = %0d\n", __func__, result);
12036 dml2_printf("DML::%s: ------------- DONE ----------\n", __func__);
12037 return result;
12038 }
12039
dml2_core_calcs_get_dpte_row_height(unsigned int * dpte_row_height,struct dml2_core_internal_display_mode_lib * mode_lib,bool is_plane1,enum dml2_source_format_class SourcePixelFormat,enum dml2_swizzle_mode SurfaceTiling,enum dml2_rotation_angle ScanDirection,unsigned int pitch,unsigned int GPUVMMinPageSizeKBytes)12040 void dml2_core_calcs_get_dpte_row_height(
12041 unsigned int *dpte_row_height,
12042 struct dml2_core_internal_display_mode_lib *mode_lib,
12043 bool is_plane1,
12044 enum dml2_source_format_class SourcePixelFormat,
12045 enum dml2_swizzle_mode SurfaceTiling,
12046 enum dml2_rotation_angle ScanDirection,
12047 unsigned int pitch,
12048 unsigned int GPUVMMinPageSizeKBytes)
12049 {
12050 unsigned int BytePerPixelY;
12051 unsigned int BytePerPixelC;
12052 double BytePerPixelInDETY;
12053 double BytePerPixelInDETC;
12054 unsigned int BlockHeight256BytesY;
12055 unsigned int BlockHeight256BytesC;
12056 unsigned int BlockWidth256BytesY;
12057 unsigned int BlockWidth256BytesC;
12058 unsigned int MacroTileWidthY;
12059 unsigned int MacroTileWidthC;
12060 unsigned int MacroTileHeightY;
12061 unsigned int MacroTileHeightC;
12062 bool surf_linear_128_l = false;
12063 bool surf_linear_128_c = false;
12064
12065 CalculateBytePerPixelAndBlockSizes(
12066 SourcePixelFormat,
12067 SurfaceTiling,
12068 pitch,
12069 pitch,
12070
12071 /* Output */
12072 &BytePerPixelY,
12073 &BytePerPixelC,
12074 &BytePerPixelInDETY,
12075 &BytePerPixelInDETC,
12076 &BlockHeight256BytesY,
12077 &BlockHeight256BytesC,
12078 &BlockWidth256BytesY,
12079 &BlockWidth256BytesC,
12080 &MacroTileHeightY,
12081 &MacroTileHeightC,
12082 &MacroTileWidthY,
12083 &MacroTileWidthC,
12084 &surf_linear_128_l,
12085 &surf_linear_128_c);
12086
12087 unsigned int BytePerPixel = is_plane1 ? BytePerPixelC : BytePerPixelY;
12088 unsigned int BlockHeight256Bytes = is_plane1 ? BlockHeight256BytesC : BlockHeight256BytesY;
12089 unsigned int BlockWidth256Bytes = is_plane1 ? BlockWidth256BytesC : BlockWidth256BytesY;
12090 unsigned int MacroTileWidth = is_plane1 ? MacroTileWidthC : MacroTileWidthY;
12091 unsigned int MacroTileHeight = is_plane1 ? MacroTileHeightC : MacroTileHeightY;
12092 unsigned int PTEBufferSizeInRequests = is_plane1 ? mode_lib->ip.dpte_buffer_size_in_pte_reqs_chroma : mode_lib->ip.dpte_buffer_size_in_pte_reqs_luma;
12093 #ifdef __DML_VBA_DEBUG__
12094 dml2_printf("DML: %s: is_plane1 = %u\n", __func__, is_plane1);
12095 dml2_printf("DML: %s: BytePerPixel = %u\n", __func__, BytePerPixel);
12096 dml2_printf("DML: %s: BlockHeight256Bytes = %u\n", __func__, BlockHeight256Bytes);
12097 dml2_printf("DML: %s: BlockWidth256Bytes = %u\n", __func__, BlockWidth256Bytes);
12098 dml2_printf("DML: %s: MacroTileWidth = %u\n", __func__, MacroTileWidth);
12099 dml2_printf("DML: %s: MacroTileHeight = %u\n", __func__, MacroTileHeight);
12100 dml2_printf("DML: %s: PTEBufferSizeInRequests = %u\n", __func__, PTEBufferSizeInRequests);
12101 dml2_printf("DML: %s: dpte_buffer_size_in_pte_reqs_luma = %u\n", __func__, mode_lib->ip.dpte_buffer_size_in_pte_reqs_luma);
12102 dml2_printf("DML: %s: dpte_buffer_size_in_pte_reqs_chroma = %u\n", __func__, mode_lib->ip.dpte_buffer_size_in_pte_reqs_chroma);
12103 dml2_printf("DML: %s: GPUVMMinPageSizeKBytes = %u\n", __func__, GPUVMMinPageSizeKBytes);
12104 #endif
12105 unsigned int dummy_integer[21];
12106
12107 mode_lib->scratch.calculate_vm_and_row_bytes_params.ViewportStationary = 0;
12108 mode_lib->scratch.calculate_vm_and_row_bytes_params.DCCEnable = 0;
12109 mode_lib->scratch.calculate_vm_and_row_bytes_params.NumberOfDPPs = 1;
12110 mode_lib->scratch.calculate_vm_and_row_bytes_params.BlockHeight256Bytes = BlockHeight256Bytes;
12111 mode_lib->scratch.calculate_vm_and_row_bytes_params.BlockWidth256Bytes = BlockWidth256Bytes;
12112 mode_lib->scratch.calculate_vm_and_row_bytes_params.SourcePixelFormat = SourcePixelFormat;
12113 mode_lib->scratch.calculate_vm_and_row_bytes_params.SurfaceTiling = SurfaceTiling;
12114 mode_lib->scratch.calculate_vm_and_row_bytes_params.BytePerPixel = BytePerPixel;
12115 mode_lib->scratch.calculate_vm_and_row_bytes_params.RotationAngle = ScanDirection;
12116 mode_lib->scratch.calculate_vm_and_row_bytes_params.SwathWidth = 0;
12117 mode_lib->scratch.calculate_vm_and_row_bytes_params.ViewportHeight = 0;
12118 mode_lib->scratch.calculate_vm_and_row_bytes_params.ViewportXStart = 0;
12119 mode_lib->scratch.calculate_vm_and_row_bytes_params.ViewportYStart = 0;
12120 mode_lib->scratch.calculate_vm_and_row_bytes_params.GPUVMEnable = 1;
12121 mode_lib->scratch.calculate_vm_and_row_bytes_params.GPUVMMaxPageTableLevels = 4;
12122 mode_lib->scratch.calculate_vm_and_row_bytes_params.GPUVMMinPageSizeKBytes = GPUVMMinPageSizeKBytes;
12123 mode_lib->scratch.calculate_vm_and_row_bytes_params.PTEBufferSizeInRequests = PTEBufferSizeInRequests;
12124 mode_lib->scratch.calculate_vm_and_row_bytes_params.Pitch = pitch;
12125 mode_lib->scratch.calculate_vm_and_row_bytes_params.MacroTileWidth = MacroTileWidth;
12126 mode_lib->scratch.calculate_vm_and_row_bytes_params.MacroTileHeight = MacroTileHeight;
12127 mode_lib->scratch.calculate_vm_and_row_bytes_params.is_phantom = 0;
12128 mode_lib->scratch.calculate_vm_and_row_bytes_params.DCCMetaPitch = 0;
12129 mode_lib->scratch.calculate_vm_and_row_bytes_params.mrq_present = 0;
12130
12131 mode_lib->scratch.calculate_vm_and_row_bytes_params.PixelPTEBytesPerRow = &dummy_integer[1];
12132 mode_lib->scratch.calculate_vm_and_row_bytes_params.PixelPTEBytesPerRowStorage = &dummy_integer[2];
12133 mode_lib->scratch.calculate_vm_and_row_bytes_params.dpte_row_width_ub = &dummy_integer[3];
12134 mode_lib->scratch.calculate_vm_and_row_bytes_params.dpte_row_height = dpte_row_height;
12135 mode_lib->scratch.calculate_vm_and_row_bytes_params.dpte_row_height_linear = &dummy_integer[4];
12136 mode_lib->scratch.calculate_vm_and_row_bytes_params.PixelPTEBytesPerRow_one_row_per_frame = &dummy_integer[5];
12137 mode_lib->scratch.calculate_vm_and_row_bytes_params.dpte_row_width_ub_one_row_per_frame = &dummy_integer[6];
12138 mode_lib->scratch.calculate_vm_and_row_bytes_params.dpte_row_height_one_row_per_frame = &dummy_integer[7];
12139 mode_lib->scratch.calculate_vm_and_row_bytes_params.vmpg_width = &dummy_integer[8];
12140 mode_lib->scratch.calculate_vm_and_row_bytes_params.vmpg_height = &dummy_integer[9];
12141 mode_lib->scratch.calculate_vm_and_row_bytes_params.PixelPTEReqWidth = &dummy_integer[11];
12142 mode_lib->scratch.calculate_vm_and_row_bytes_params.PixelPTEReqHeight = &dummy_integer[12];
12143 mode_lib->scratch.calculate_vm_and_row_bytes_params.PTERequestSize = &dummy_integer[13];
12144 mode_lib->scratch.calculate_vm_and_row_bytes_params.dpde0_bytes_per_frame_ub = &dummy_integer[14];
12145
12146 mode_lib->scratch.calculate_vm_and_row_bytes_params.meta_row_bytes = &dummy_integer[15];
12147 mode_lib->scratch.calculate_vm_and_row_bytes_params.MetaRequestWidth = &dummy_integer[16];
12148 mode_lib->scratch.calculate_vm_and_row_bytes_params.MetaRequestHeight = &dummy_integer[17];
12149 mode_lib->scratch.calculate_vm_and_row_bytes_params.meta_row_width = &dummy_integer[18];
12150 mode_lib->scratch.calculate_vm_and_row_bytes_params.meta_row_height = &dummy_integer[19];
12151 mode_lib->scratch.calculate_vm_and_row_bytes_params.meta_pte_bytes_per_frame_ub = &dummy_integer[20];
12152
12153 // just supply with enough parameters to calculate dpte
12154 CalculateVMAndRowBytes(&mode_lib->scratch.calculate_vm_and_row_bytes_params);
12155
12156 #ifdef __DML_VBA_DEBUG__
12157 dml2_printf("DML: %s: dpte_row_height = %u\n", __func__, *dpte_row_height);
12158 #endif
12159 }
12160
is_dual_plane(enum dml2_source_format_class source_format)12161 static bool is_dual_plane(enum dml2_source_format_class source_format)
12162 {
12163 bool ret_val = 0;
12164
12165 if ((source_format == dml2_420_12) || (source_format == dml2_420_8) || (source_format == dml2_420_10) || (source_format == dml2_rgbe_alpha))
12166 ret_val = 1;
12167
12168 return ret_val;
12169 }
12170
dml_get_plane_idx(const struct dml2_core_internal_display_mode_lib * mode_lib,unsigned int pipe_idx)12171 static unsigned int dml_get_plane_idx(const struct dml2_core_internal_display_mode_lib *mode_lib, unsigned int pipe_idx)
12172 {
12173 unsigned int plane_idx = mode_lib->mp.pipe_plane[pipe_idx];
12174 return plane_idx;
12175 }
12176
rq_dlg_get_wm_regs(const struct dml2_display_cfg * display_cfg,const struct dml2_core_internal_display_mode_lib * mode_lib,struct dml2_dchub_watermark_regs * wm_regs)12177 static void rq_dlg_get_wm_regs(const struct dml2_display_cfg *display_cfg, const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_dchub_watermark_regs *wm_regs)
12178 {
12179 double refclk_freq_in_mhz = (display_cfg->overrides.hw.dlg_ref_clk_mhz > 0) ? (double)display_cfg->overrides.hw.dlg_ref_clk_mhz : mode_lib->soc.dchub_refclk_mhz;
12180
12181 wm_regs->fclk_pstate = (int unsigned)(mode_lib->mp.Watermark.FCLKChangeWatermark * refclk_freq_in_mhz);
12182 wm_regs->sr_enter = (int unsigned)(mode_lib->mp.Watermark.StutterEnterPlusExitWatermark * refclk_freq_in_mhz);
12183 wm_regs->sr_exit = (int unsigned)(mode_lib->mp.Watermark.StutterExitWatermark * refclk_freq_in_mhz);
12184 wm_regs->temp_read_or_ppt = (int unsigned)(mode_lib->mp.Watermark.temp_read_or_ppt_watermark_us * refclk_freq_in_mhz);
12185 wm_regs->uclk_pstate = (int unsigned)(mode_lib->mp.Watermark.DRAMClockChangeWatermark * refclk_freq_in_mhz);
12186 wm_regs->urgent = (int unsigned)(mode_lib->mp.Watermark.UrgentWatermark * refclk_freq_in_mhz);
12187 wm_regs->usr = (int unsigned)(mode_lib->mp.Watermark.USRRetrainingWatermark * refclk_freq_in_mhz);
12188 wm_regs->refcyc_per_trip_to_mem = (unsigned int)(mode_lib->mp.UrgentLatency * refclk_freq_in_mhz);
12189 wm_regs->refcyc_per_meta_trip_to_mem = (unsigned int)(mode_lib->mp.MetaTripToMemory * refclk_freq_in_mhz);
12190 wm_regs->frac_urg_bw_flip = (unsigned int)(mode_lib->mp.FractionOfUrgentBandwidthImmediateFlip * 1000);
12191 wm_regs->frac_urg_bw_nom = (unsigned int)(mode_lib->mp.FractionOfUrgentBandwidth * 1000);
12192 wm_regs->frac_urg_bw_mall = (unsigned int)(mode_lib->mp.FractionOfUrgentBandwidthMALL * 1000);
12193 }
12194
log_and_substract_if_non_zero(unsigned int a,unsigned int subtrahend)12195 static unsigned int log_and_substract_if_non_zero(unsigned int a, unsigned int subtrahend)
12196 {
12197 if (a == 0)
12198 return 0;
12199
12200 return (math_log2_approx(a) - subtrahend);
12201 }
12202
dml2_core_calcs_cursor_dlg_reg(struct dml2_cursor_dlg_regs * cursor_dlg_regs,const struct dml2_get_cursor_dlg_reg * p)12203 void dml2_core_calcs_cursor_dlg_reg(struct dml2_cursor_dlg_regs *cursor_dlg_regs, const struct dml2_get_cursor_dlg_reg *p)
12204 {
12205 int dst_x_offset = (int) ((p->cursor_x_position + (p->cursor_stereo_en == 0 ? 0 : math_max2(p->cursor_primary_offset, p->cursor_secondary_offset)) -
12206 (p->cursor_hotspot_x * (p->cursor_2x_magnify == 0 ? 1 : 2))) * p->dlg_refclk_mhz / p->pixel_rate_mhz / p->hratio);
12207 cursor_dlg_regs->dst_x_offset = (unsigned int) ((dst_x_offset > 0) ? dst_x_offset : 0);
12208
12209 #ifdef __DML_VBA_DEBUG__
12210 dml2_printf("DML_DLG::%s: cursor_x_position=%d\n", __func__, p->cursor_x_position);
12211 dml2_printf("DML_DLG::%s: dlg_refclk_mhz=%f\n", __func__, p->dlg_refclk_mhz);
12212 dml2_printf("DML_DLG::%s: pixel_rate_mhz=%f\n", __func__, p->pixel_rate_mhz);
12213 dml2_printf("DML_DLG::%s: dst_x_offset=%d\n", __func__, dst_x_offset);
12214 dml2_printf("DML_DLG::%s: dst_x_offset=%d (reg)\n", __func__, cursor_dlg_regs->dst_x_offset);
12215 #endif
12216
12217 cursor_dlg_regs->chunk_hdl_adjust = 3;
12218 cursor_dlg_regs->dst_y_offset = 0;
12219
12220 cursor_dlg_regs->qos_level_fixed = 8;
12221 cursor_dlg_regs->qos_ramp_disable = 0;
12222 }
12223
rq_dlg_get_rq_reg(struct dml2_display_rq_regs * rq_regs,const struct dml2_display_cfg * display_cfg,const struct dml2_core_internal_display_mode_lib * mode_lib,unsigned int pipe_idx)12224 static void rq_dlg_get_rq_reg(struct dml2_display_rq_regs *rq_regs,
12225 const struct dml2_display_cfg *display_cfg,
12226 const struct dml2_core_internal_display_mode_lib *mode_lib,
12227 unsigned int pipe_idx)
12228 {
12229 unsigned int plane_idx = dml_get_plane_idx(mode_lib, pipe_idx);
12230 enum dml2_source_format_class source_format = display_cfg->plane_descriptors[plane_idx].pixel_format;
12231 enum dml2_swizzle_mode sw_mode = display_cfg->plane_descriptors[plane_idx].surface.tiling;
12232 bool dual_plane = is_dual_plane((enum dml2_source_format_class)(source_format));
12233
12234 unsigned int pixel_chunk_bytes = 0;
12235 unsigned int min_pixel_chunk_bytes = 0;
12236 unsigned int dpte_group_bytes = 0;
12237 unsigned int mpte_group_bytes = 0;
12238
12239 unsigned int p1_pixel_chunk_bytes = 0;
12240 unsigned int p1_min_pixel_chunk_bytes = 0;
12241 unsigned int p1_dpte_group_bytes = 0;
12242 unsigned int p1_mpte_group_bytes = 0;
12243
12244 unsigned int detile_buf_plane1_addr = 0;
12245 unsigned int detile_buf_size_in_bytes;
12246 double stored_swath_l_bytes;
12247 double stored_swath_c_bytes;
12248 bool is_phantom_pipe;
12249
12250 dml2_printf("DML_DLG::%s: Calculation for pipe[%d] start\n", __func__, pipe_idx);
12251
12252 pixel_chunk_bytes = (unsigned int)(mode_lib->ip.pixel_chunk_size_kbytes * 1024);
12253 min_pixel_chunk_bytes = (unsigned int)(mode_lib->ip.min_pixel_chunk_size_bytes);
12254
12255 if (pixel_chunk_bytes == 64 * 1024)
12256 min_pixel_chunk_bytes = 0;
12257
12258 dpte_group_bytes = (unsigned int)(dml_get_dpte_group_size_in_bytes(mode_lib, pipe_idx));
12259 mpte_group_bytes = (unsigned int)(dml_get_vm_group_size_in_bytes(mode_lib, pipe_idx));
12260
12261 p1_pixel_chunk_bytes = pixel_chunk_bytes;
12262 p1_min_pixel_chunk_bytes = min_pixel_chunk_bytes;
12263 p1_dpte_group_bytes = dpte_group_bytes;
12264 p1_mpte_group_bytes = mpte_group_bytes;
12265
12266 if (source_format == dml2_rgbe_alpha)
12267 p1_pixel_chunk_bytes = (unsigned int)(mode_lib->ip.alpha_pixel_chunk_size_kbytes * 1024);
12268
12269 rq_regs->unbounded_request_enabled = dml_get_unbounded_request_enabled(mode_lib);
12270 rq_regs->rq_regs_l.chunk_size = log_and_substract_if_non_zero(pixel_chunk_bytes, 10);
12271 rq_regs->rq_regs_c.chunk_size = log_and_substract_if_non_zero(p1_pixel_chunk_bytes, 10);
12272
12273 if (min_pixel_chunk_bytes == 0)
12274 rq_regs->rq_regs_l.min_chunk_size = 0;
12275 else
12276 rq_regs->rq_regs_l.min_chunk_size = log_and_substract_if_non_zero(min_pixel_chunk_bytes, 8 - 1);
12277
12278 if (p1_min_pixel_chunk_bytes == 0)
12279 rq_regs->rq_regs_c.min_chunk_size = 0;
12280 else
12281 rq_regs->rq_regs_c.min_chunk_size = log_and_substract_if_non_zero(p1_min_pixel_chunk_bytes, 8 - 1);
12282
12283 rq_regs->rq_regs_l.dpte_group_size = log_and_substract_if_non_zero(dpte_group_bytes, 6);
12284 rq_regs->rq_regs_l.mpte_group_size = log_and_substract_if_non_zero(mpte_group_bytes, 6);
12285 rq_regs->rq_regs_c.dpte_group_size = log_and_substract_if_non_zero(p1_dpte_group_bytes, 6);
12286 rq_regs->rq_regs_c.mpte_group_size = log_and_substract_if_non_zero(p1_mpte_group_bytes, 6);
12287
12288 detile_buf_size_in_bytes = (unsigned int)(dml_get_det_buffer_size_kbytes(mode_lib, pipe_idx) * 1024);
12289
12290 if (sw_mode == dml2_sw_linear && display_cfg->gpuvm_enable) {
12291 unsigned int p0_pte_row_height_linear = (unsigned int)(dml_get_dpte_row_height_linear_l(mode_lib, pipe_idx));
12292 #ifdef __DML_VBA_DEBUG__
12293 dml2_printf("DML_DLG: %s: p0_pte_row_height_linear = %u\n", __func__, p0_pte_row_height_linear);
12294 #endif
12295 DML2_ASSERT(p0_pte_row_height_linear >= 8);
12296
12297 rq_regs->rq_regs_l.pte_row_height_linear = math_log2_approx(p0_pte_row_height_linear) - 3;
12298 if (dual_plane) {
12299 unsigned int p1_pte_row_height_linear = (unsigned int)(dml_get_dpte_row_height_linear_c(mode_lib, pipe_idx));
12300
12301 #ifdef __DML_VBA_DEBUG__
12302 dml2_printf("DML_DLG: %s: p1_pte_row_height_linear = %u\n", __func__, p1_pte_row_height_linear);
12303 #endif
12304 if (sw_mode == dml2_sw_linear) {
12305 DML2_ASSERT(p1_pte_row_height_linear >= 8);
12306 }
12307 rq_regs->rq_regs_c.pte_row_height_linear = math_log2_approx(p1_pte_row_height_linear) - 3;
12308 }
12309 } else {
12310 rq_regs->rq_regs_l.pte_row_height_linear = 0;
12311 rq_regs->rq_regs_c.pte_row_height_linear = 0;
12312 }
12313
12314 rq_regs->rq_regs_l.swath_height = log_and_substract_if_non_zero(dml_get_swath_height_l(mode_lib, pipe_idx), 0);
12315 rq_regs->rq_regs_c.swath_height = log_and_substract_if_non_zero(dml_get_swath_height_c(mode_lib, pipe_idx), 0);
12316
12317 // FIXME_DCN4, programming guide has dGPU condition
12318 if (pixel_chunk_bytes >= 32 * 1024 || (dual_plane && p1_pixel_chunk_bytes >= 32 * 1024)) { //32kb
12319 rq_regs->drq_expansion_mode = 0;
12320 } else {
12321 rq_regs->drq_expansion_mode = 2;
12322 }
12323 rq_regs->prq_expansion_mode = 1;
12324 rq_regs->crq_expansion_mode = 1;
12325 rq_regs->mrq_expansion_mode = 1;
12326
12327 stored_swath_l_bytes = dml_get_det_stored_buffer_size_l_bytes(mode_lib, pipe_idx);
12328 stored_swath_c_bytes = dml_get_det_stored_buffer_size_c_bytes(mode_lib, pipe_idx);
12329 is_phantom_pipe = dml_get_is_phantom_pipe(display_cfg, mode_lib, pipe_idx);
12330
12331 // Note: detile_buf_plane1_addr is in unit of 1KB
12332 if (dual_plane) {
12333 if (is_phantom_pipe) {
12334 detile_buf_plane1_addr = (unsigned int)((1024.0 * 1024.0) / 2.0 / 1024.0); // half to chroma
12335 } else {
12336 if (stored_swath_l_bytes / stored_swath_c_bytes <= 1.5) {
12337 detile_buf_plane1_addr = (unsigned int)(detile_buf_size_in_bytes / 2.0 / 1024.0); // half to chroma
12338 #ifdef __DML_VBA_DEBUG__
12339 dml2_printf("DML_DLG: %s: detile_buf_plane1_addr = %d (1/2 to chroma)\n", __func__, detile_buf_plane1_addr);
12340 #endif
12341 } else {
12342 detile_buf_plane1_addr = (unsigned int)(dml_round_to_multiple((unsigned int)((2.0 * detile_buf_size_in_bytes) / 3.0), 1024, 0) / 1024.0); // 2/3 to luma
12343 #ifdef __DML_VBA_DEBUG__
12344 dml2_printf("DML_DLG: %s: detile_buf_plane1_addr = %d (1/3 chroma)\n", __func__, detile_buf_plane1_addr);
12345 #endif
12346 }
12347 }
12348 }
12349 rq_regs->plane1_base_address = detile_buf_plane1_addr;
12350
12351 #ifdef __DML_VBA_DEBUG__
12352 dml2_printf("DML_DLG: %s: is_phantom_pipe = %d\n", __func__, is_phantom_pipe);
12353 dml2_printf("DML_DLG: %s: stored_swath_l_bytes = %f\n", __func__, stored_swath_l_bytes);
12354 dml2_printf("DML_DLG: %s: stored_swath_c_bytes = %f\n", __func__, stored_swath_c_bytes);
12355 dml2_printf("DML_DLG: %s: detile_buf_size_in_bytes = %d\n", __func__, detile_buf_size_in_bytes);
12356 dml2_printf("DML_DLG: %s: detile_buf_plane1_addr = %d\n", __func__, detile_buf_plane1_addr);
12357 dml2_printf("DML_DLG: %s: plane1_base_address = %d\n", __func__, rq_regs->plane1_base_address);
12358 #endif
12359 //dml2_printf_rq_regs_st(rq_regs);
12360 dml2_printf("DML_DLG::%s: Calculation for pipe[%d] done\n", __func__, pipe_idx);
12361 }
12362
rq_dlg_get_dlg_reg(struct dml2_core_internal_scratch * s,struct dml2_display_dlg_regs * disp_dlg_regs,struct dml2_display_ttu_regs * disp_ttu_regs,const struct dml2_display_cfg * display_cfg,const struct dml2_core_internal_display_mode_lib * mode_lib,const unsigned int pipe_idx)12363 static void rq_dlg_get_dlg_reg(
12364 struct dml2_core_internal_scratch *s,
12365 struct dml2_display_dlg_regs *disp_dlg_regs,
12366 struct dml2_display_ttu_regs *disp_ttu_regs,
12367 const struct dml2_display_cfg *display_cfg,
12368 const struct dml2_core_internal_display_mode_lib *mode_lib,
12369 const unsigned int pipe_idx)
12370 {
12371 struct dml2_core_shared_rq_dlg_get_dlg_reg_locals *l = &s->rq_dlg_get_dlg_reg_locals;
12372
12373 memset(l, 0, sizeof(struct dml2_core_shared_rq_dlg_get_dlg_reg_locals));
12374
12375 dml2_printf("DML_DLG::%s: Calculation for pipe_idx=%d\n", __func__, pipe_idx);
12376
12377 l->plane_idx = dml_get_plane_idx(mode_lib, pipe_idx);
12378 dml2_assert(l->plane_idx < DML2_MAX_PLANES);
12379
12380 l->source_format = dml2_444_8;
12381 l->odm_mode = dml2_odm_mode_bypass;
12382 l->dual_plane = false;
12383 l->htotal = 0;
12384 l->hactive = 0;
12385 l->hblank_end = 0;
12386 l->vblank_end = 0;
12387 l->interlaced = false;
12388 l->pclk_freq_in_mhz = 0.0;
12389 l->refclk_freq_in_mhz = (display_cfg->overrides.hw.dlg_ref_clk_mhz > 0) ? (double)display_cfg->overrides.hw.dlg_ref_clk_mhz : mode_lib->soc.dchub_refclk_mhz;
12390 l->ref_freq_to_pix_freq = 0.0;
12391
12392 if (l->plane_idx < DML2_MAX_PLANES) {
12393
12394 l->timing = &display_cfg->stream_descriptors[display_cfg->plane_descriptors[l->plane_idx].stream_index].timing;
12395 l->source_format = display_cfg->plane_descriptors[l->plane_idx].pixel_format;
12396 l->odm_mode = mode_lib->mp.ODMMode[l->plane_idx];
12397
12398 l->dual_plane = is_dual_plane(l->source_format);
12399
12400 l->htotal = l->timing->h_total;
12401 l->hactive = l->timing->h_active;
12402 l->hblank_end = l->timing->h_blank_end;
12403 l->vblank_end = l->timing->v_blank_end;
12404 l->interlaced = l->timing->interlaced;
12405 l->pclk_freq_in_mhz = (double)l->timing->pixel_clock_khz / 1000;
12406 l->ref_freq_to_pix_freq = l->refclk_freq_in_mhz / l->pclk_freq_in_mhz;
12407
12408 dml2_printf("DML_DLG::%s: plane_idx = %d\n", __func__, l->plane_idx);
12409 dml2_printf("DML_DLG: %s: htotal = %d\n", __func__, l->htotal);
12410 dml2_printf("DML_DLG: %s: refclk_freq_in_mhz = %3.2f\n", __func__, l->refclk_freq_in_mhz);
12411 dml2_printf("DML_DLG: %s: dlg_ref_clk_mhz = %3.2f\n", __func__, display_cfg->overrides.hw.dlg_ref_clk_mhz);
12412 dml2_printf("DML_DLG: %s: soc.refclk_mhz = %3.2f\n", __func__, mode_lib->soc.dchub_refclk_mhz);
12413 dml2_printf("DML_DLG: %s: pclk_freq_in_mhz = %3.2f\n", __func__, l->pclk_freq_in_mhz);
12414 dml2_printf("DML_DLG: %s: ref_freq_to_pix_freq = %3.2f\n", __func__, l->ref_freq_to_pix_freq);
12415 dml2_printf("DML_DLG: %s: interlaced = %d\n", __func__, l->interlaced);
12416
12417 DML2_ASSERT(l->refclk_freq_in_mhz != 0);
12418 DML2_ASSERT(l->pclk_freq_in_mhz != 0);
12419 DML2_ASSERT(l->ref_freq_to_pix_freq < 4.0);
12420
12421 // Need to figure out which side of odm combine we're in
12422 // Assume the pipe instance under the same plane is in order
12423
12424 if (l->odm_mode == dml2_odm_mode_bypass) {
12425 disp_dlg_regs->refcyc_h_blank_end = (unsigned int)((double)l->hblank_end * l->ref_freq_to_pix_freq);
12426 } else if (l->odm_mode == dml2_odm_mode_combine_2to1 || l->odm_mode == dml2_odm_mode_combine_3to1 || l->odm_mode == dml2_odm_mode_combine_4to1) {
12427 // find out how many pipe are in this plane
12428 l->num_active_pipes = mode_lib->mp.num_active_pipes;
12429 l->first_pipe_idx_in_plane = DML2_MAX_PLANES;
12430 l->pipe_idx_in_combine = 0; // pipe index within the plane
12431 l->odm_combine_factor = 2;
12432
12433 if (l->odm_mode == dml2_odm_mode_combine_3to1)
12434 l->odm_combine_factor = 3;
12435 else if (l->odm_mode == dml2_odm_mode_combine_4to1)
12436 l->odm_combine_factor = 4;
12437
12438 for (unsigned int i = 0; i < l->num_active_pipes; i++) {
12439 if (dml_get_plane_idx(mode_lib, i) == l->plane_idx) {
12440 if (i < l->first_pipe_idx_in_plane) {
12441 l->first_pipe_idx_in_plane = i;
12442 }
12443 }
12444 }
12445 l->pipe_idx_in_combine = pipe_idx - l->first_pipe_idx_in_plane; // DML assumes the pipes in the same plane will have continuous indexing (i.e. plane 0 use pipe 0, 1, and plane 1 uses pipe 2, 3, etc.)
12446
12447 disp_dlg_regs->refcyc_h_blank_end = (unsigned int)(((double)l->hblank_end + (double)l->pipe_idx_in_combine * (double)l->hactive / (double)l->odm_combine_factor) * l->ref_freq_to_pix_freq);
12448 dml2_printf("DML_DLG: %s: pipe_idx = %d\n", __func__, pipe_idx);
12449 dml2_printf("DML_DLG: %s: first_pipe_idx_in_plane = %d\n", __func__, l->first_pipe_idx_in_plane);
12450 dml2_printf("DML_DLG: %s: pipe_idx_in_combine = %d\n", __func__, l->pipe_idx_in_combine);
12451 dml2_printf("DML_DLG: %s: odm_combine_factor = %d\n", __func__, l->odm_combine_factor);
12452 }
12453 dml2_printf("DML_DLG: %s: refcyc_h_blank_end = %d\n", __func__, disp_dlg_regs->refcyc_h_blank_end);
12454
12455 DML2_ASSERT(disp_dlg_regs->refcyc_h_blank_end < (unsigned int)math_pow(2, 13));
12456
12457 disp_dlg_regs->ref_freq_to_pix_freq = (unsigned int)(l->ref_freq_to_pix_freq * math_pow(2, 19));
12458 disp_dlg_regs->refcyc_per_htotal = (unsigned int)(l->ref_freq_to_pix_freq * (double)l->htotal * math_pow(2, 8));
12459 disp_dlg_regs->dlg_vblank_end = l->interlaced ? (l->vblank_end / 2) : l->vblank_end; // 15 bits
12460
12461 l->min_ttu_vblank = mode_lib->mp.MinTTUVBlank[mode_lib->mp.pipe_plane[pipe_idx]];
12462 l->min_dst_y_next_start = (unsigned int)(mode_lib->mp.MIN_DST_Y_NEXT_START[mode_lib->mp.pipe_plane[pipe_idx]]);
12463
12464 dml2_printf("DML_DLG: %s: min_ttu_vblank (us) = %3.2f\n", __func__, l->min_ttu_vblank);
12465 dml2_printf("DML_DLG: %s: min_dst_y_next_start = %d\n", __func__, l->min_dst_y_next_start);
12466 dml2_printf("DML_DLG: %s: ref_freq_to_pix_freq = %3.2f\n", __func__, l->ref_freq_to_pix_freq);
12467
12468 l->vready_after_vcount0 = (unsigned int)(mode_lib->mp.VREADY_AT_OR_AFTER_VSYNC[mode_lib->mp.pipe_plane[pipe_idx]]);
12469 disp_dlg_regs->vready_after_vcount0 = l->vready_after_vcount0;
12470
12471 dml2_printf("DML_DLG: %s: vready_after_vcount0 = %d\n", __func__, disp_dlg_regs->vready_after_vcount0);
12472
12473 l->dst_x_after_scaler = (unsigned int)(mode_lib->mp.DSTXAfterScaler[mode_lib->mp.pipe_plane[pipe_idx]]);
12474 l->dst_y_after_scaler = (unsigned int)(mode_lib->mp.DSTYAfterScaler[mode_lib->mp.pipe_plane[pipe_idx]]);
12475
12476 dml2_printf("DML_DLG: %s: dst_x_after_scaler = %d\n", __func__, l->dst_x_after_scaler);
12477 dml2_printf("DML_DLG: %s: dst_y_after_scaler = %d\n", __func__, l->dst_y_after_scaler);
12478
12479 l->dst_y_prefetch = mode_lib->mp.dst_y_prefetch[mode_lib->mp.pipe_plane[pipe_idx]];
12480 l->dst_y_per_vm_vblank = mode_lib->mp.dst_y_per_vm_vblank[mode_lib->mp.pipe_plane[pipe_idx]];
12481 l->dst_y_per_row_vblank = mode_lib->mp.dst_y_per_row_vblank[mode_lib->mp.pipe_plane[pipe_idx]];
12482 l->dst_y_per_vm_flip = mode_lib->mp.dst_y_per_vm_flip[mode_lib->mp.pipe_plane[pipe_idx]];
12483 l->dst_y_per_row_flip = mode_lib->mp.dst_y_per_row_flip[mode_lib->mp.pipe_plane[pipe_idx]];
12484
12485 dml2_printf("DML_DLG: %s: dst_y_prefetch (after rnd) = %3.2f\n", __func__, l->dst_y_prefetch);
12486 dml2_printf("DML_DLG: %s: dst_y_per_vm_flip = %3.2f\n", __func__, l->dst_y_per_vm_flip);
12487 dml2_printf("DML_DLG: %s: dst_y_per_row_flip = %3.2f\n", __func__, l->dst_y_per_row_flip);
12488 dml2_printf("DML_DLG: %s: dst_y_per_vm_vblank = %3.2f\n", __func__, l->dst_y_per_vm_vblank);
12489 dml2_printf("DML_DLG: %s: dst_y_per_row_vblank = %3.2f\n", __func__, l->dst_y_per_row_vblank);
12490
12491 if (l->dst_y_prefetch > 0 && l->dst_y_per_vm_vblank > 0 && l->dst_y_per_row_vblank > 0) {
12492 DML2_ASSERT(l->dst_y_prefetch > (l->dst_y_per_vm_vblank + l->dst_y_per_row_vblank));
12493 }
12494
12495 l->vratio_pre_l = mode_lib->mp.VRatioPrefetchY[mode_lib->mp.pipe_plane[pipe_idx]];
12496 l->vratio_pre_c = mode_lib->mp.VRatioPrefetchC[mode_lib->mp.pipe_plane[pipe_idx]];
12497
12498 dml2_printf("DML_DLG: %s: vratio_pre_l = %3.2f\n", __func__, l->vratio_pre_l);
12499 dml2_printf("DML_DLG: %s: vratio_pre_c = %3.2f\n", __func__, l->vratio_pre_c);
12500
12501 // Active
12502 l->refcyc_per_line_delivery_pre_l = mode_lib->mp.DisplayPipeLineDeliveryTimeLumaPrefetch[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
12503 l->refcyc_per_line_delivery_l = mode_lib->mp.DisplayPipeLineDeliveryTimeLuma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
12504
12505 dml2_printf("DML_DLG: %s: refcyc_per_line_delivery_pre_l = %3.2f\n", __func__, l->refcyc_per_line_delivery_pre_l);
12506 dml2_printf("DML_DLG: %s: refcyc_per_line_delivery_l = %3.2f\n", __func__, l->refcyc_per_line_delivery_l);
12507
12508 l->refcyc_per_line_delivery_pre_c = 0.0;
12509 l->refcyc_per_line_delivery_c = 0.0;
12510
12511 if (l->dual_plane) {
12512 l->refcyc_per_line_delivery_pre_c = mode_lib->mp.DisplayPipeLineDeliveryTimeChromaPrefetch[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
12513 l->refcyc_per_line_delivery_c = mode_lib->mp.DisplayPipeLineDeliveryTimeChroma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
12514
12515 dml2_printf("DML_DLG: %s: refcyc_per_line_delivery_pre_c = %3.2f\n", __func__, l->refcyc_per_line_delivery_pre_c);
12516 dml2_printf("DML_DLG: %s: refcyc_per_line_delivery_c = %3.2f\n", __func__, l->refcyc_per_line_delivery_c);
12517 }
12518
12519 disp_dlg_regs->refcyc_per_vm_dmdata = (unsigned int)(mode_lib->mp.Tdmdl_vm[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz);
12520 disp_dlg_regs->dmdata_dl_delta = (unsigned int)(mode_lib->mp.Tdmdl[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz);
12521
12522 l->refcyc_per_req_delivery_pre_l = mode_lib->mp.DisplayPipeRequestDeliveryTimeLumaPrefetch[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
12523 l->refcyc_per_req_delivery_l = mode_lib->mp.DisplayPipeRequestDeliveryTimeLuma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
12524
12525 dml2_printf("DML_DLG: %s: refcyc_per_req_delivery_pre_l = %3.2f\n", __func__, l->refcyc_per_req_delivery_pre_l);
12526 dml2_printf("DML_DLG: %s: refcyc_per_req_delivery_l = %3.2f\n", __func__, l->refcyc_per_req_delivery_l);
12527
12528 l->refcyc_per_req_delivery_pre_c = 0.0;
12529 l->refcyc_per_req_delivery_c = 0.0;
12530 if (l->dual_plane) {
12531 l->refcyc_per_req_delivery_pre_c = mode_lib->mp.DisplayPipeRequestDeliveryTimeChromaPrefetch[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
12532 l->refcyc_per_req_delivery_c = mode_lib->mp.DisplayPipeRequestDeliveryTimeChroma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
12533
12534 dml2_printf("DML_DLG: %s: refcyc_per_req_delivery_pre_c = %3.2f\n", __func__, l->refcyc_per_req_delivery_pre_c);
12535 dml2_printf("DML_DLG: %s: refcyc_per_req_delivery_c = %3.2f\n", __func__, l->refcyc_per_req_delivery_c);
12536 }
12537
12538 // TTU - Cursor
12539 DML2_ASSERT(display_cfg->plane_descriptors[l->plane_idx].cursor.num_cursors <= 1);
12540
12541 // Assign to register structures
12542 disp_dlg_regs->min_dst_y_next_start = (unsigned int)((double)l->min_dst_y_next_start * math_pow(2, 2));
12543 DML2_ASSERT(disp_dlg_regs->min_dst_y_next_start < (unsigned int)math_pow(2, 18));
12544
12545 disp_dlg_regs->dst_y_after_scaler = l->dst_y_after_scaler; // in terms of line
12546 disp_dlg_regs->refcyc_x_after_scaler = (unsigned int)((double)l->dst_x_after_scaler * l->ref_freq_to_pix_freq); // in terms of refclk
12547 disp_dlg_regs->dst_y_prefetch = (unsigned int)(l->dst_y_prefetch * math_pow(2, 2));
12548 disp_dlg_regs->dst_y_per_vm_vblank = (unsigned int)(l->dst_y_per_vm_vblank * math_pow(2, 2));
12549 disp_dlg_regs->dst_y_per_row_vblank = (unsigned int)(l->dst_y_per_row_vblank * math_pow(2, 2));
12550 disp_dlg_regs->dst_y_per_vm_flip = (unsigned int)(l->dst_y_per_vm_flip * math_pow(2, 2));
12551 disp_dlg_regs->dst_y_per_row_flip = (unsigned int)(l->dst_y_per_row_flip * math_pow(2, 2));
12552
12553 disp_dlg_regs->vratio_prefetch = (unsigned int)(l->vratio_pre_l * math_pow(2, 19));
12554 disp_dlg_regs->vratio_prefetch_c = (unsigned int)(l->vratio_pre_c * math_pow(2, 19));
12555
12556 dml2_printf("DML_DLG: %s: disp_dlg_regs->dst_y_per_vm_vblank = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_vm_vblank);
12557 dml2_printf("DML_DLG: %s: disp_dlg_regs->dst_y_per_row_vblank = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_row_vblank);
12558 dml2_printf("DML_DLG: %s: disp_dlg_regs->dst_y_per_vm_flip = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_vm_flip);
12559 dml2_printf("DML_DLG: %s: disp_dlg_regs->dst_y_per_row_flip = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_row_flip);
12560
12561 disp_dlg_regs->refcyc_per_vm_group_vblank = (unsigned int)(mode_lib->mp.TimePerVMGroupVBlank[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz);
12562 disp_dlg_regs->refcyc_per_vm_group_flip = (unsigned int)(mode_lib->mp.TimePerVMGroupFlip[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz);
12563 disp_dlg_regs->refcyc_per_vm_req_vblank = (unsigned int)(mode_lib->mp.TimePerVMRequestVBlank[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz * math_pow(2, 10));
12564 disp_dlg_regs->refcyc_per_vm_req_flip = (unsigned int)(mode_lib->mp.TimePerVMRequestFlip[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz * math_pow(2, 10));
12565
12566 l->dst_y_per_pte_row_nom_l = mode_lib->mp.DST_Y_PER_PTE_ROW_NOM_L[mode_lib->mp.pipe_plane[pipe_idx]];
12567 l->dst_y_per_pte_row_nom_c = mode_lib->mp.DST_Y_PER_PTE_ROW_NOM_C[mode_lib->mp.pipe_plane[pipe_idx]];
12568 l->refcyc_per_pte_group_nom_l = mode_lib->mp.time_per_pte_group_nom_luma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
12569 l->refcyc_per_pte_group_nom_c = mode_lib->mp.time_per_pte_group_nom_chroma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
12570 l->refcyc_per_pte_group_vblank_l = mode_lib->mp.time_per_pte_group_vblank_luma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
12571 l->refcyc_per_pte_group_vblank_c = mode_lib->mp.time_per_pte_group_vblank_chroma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
12572 l->refcyc_per_pte_group_flip_l = mode_lib->mp.time_per_pte_group_flip_luma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
12573 l->refcyc_per_pte_group_flip_c = mode_lib->mp.time_per_pte_group_flip_chroma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
12574 l->refcyc_per_tdlut_group = mode_lib->mp.time_per_tdlut_group[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
12575
12576 disp_dlg_regs->dst_y_per_pte_row_nom_l = (unsigned int)(l->dst_y_per_pte_row_nom_l * math_pow(2, 2));
12577 disp_dlg_regs->dst_y_per_pte_row_nom_c = (unsigned int)(l->dst_y_per_pte_row_nom_c * math_pow(2, 2));
12578
12579 disp_dlg_regs->refcyc_per_pte_group_nom_l = (unsigned int)(l->refcyc_per_pte_group_nom_l);
12580 disp_dlg_regs->refcyc_per_pte_group_nom_c = (unsigned int)(l->refcyc_per_pte_group_nom_c);
12581 disp_dlg_regs->refcyc_per_pte_group_vblank_l = (unsigned int)(l->refcyc_per_pte_group_vblank_l);
12582 disp_dlg_regs->refcyc_per_pte_group_vblank_c = (unsigned int)(l->refcyc_per_pte_group_vblank_c);
12583 disp_dlg_regs->refcyc_per_pte_group_flip_l = (unsigned int)(l->refcyc_per_pte_group_flip_l);
12584 disp_dlg_regs->refcyc_per_pte_group_flip_c = (unsigned int)(l->refcyc_per_pte_group_flip_c);
12585 disp_dlg_regs->refcyc_per_line_delivery_pre_l = (unsigned int)math_floor2(l->refcyc_per_line_delivery_pre_l, 1);
12586 disp_dlg_regs->refcyc_per_line_delivery_l = (unsigned int)math_floor2(l->refcyc_per_line_delivery_l, 1);
12587 disp_dlg_regs->refcyc_per_line_delivery_pre_c = (unsigned int)math_floor2(l->refcyc_per_line_delivery_pre_c, 1);
12588 disp_dlg_regs->refcyc_per_line_delivery_c = (unsigned int)math_floor2(l->refcyc_per_line_delivery_c, 1);
12589
12590 l->dst_y_per_meta_row_nom_l = mode_lib->mp.DST_Y_PER_META_ROW_NOM_L[mode_lib->mp.pipe_plane[pipe_idx]];
12591 l->dst_y_per_meta_row_nom_c = mode_lib->mp.DST_Y_PER_META_ROW_NOM_C[mode_lib->mp.pipe_plane[pipe_idx]];
12592 l->refcyc_per_meta_chunk_nom_l = mode_lib->mp.TimePerMetaChunkNominal[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
12593 l->refcyc_per_meta_chunk_nom_c = mode_lib->mp.TimePerChromaMetaChunkNominal[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
12594 l->refcyc_per_meta_chunk_vblank_l = mode_lib->mp.TimePerMetaChunkVBlank[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
12595 l->refcyc_per_meta_chunk_vblank_c = mode_lib->mp.TimePerChromaMetaChunkVBlank[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
12596 l->refcyc_per_meta_chunk_flip_l = mode_lib->mp.TimePerMetaChunkFlip[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
12597 l->refcyc_per_meta_chunk_flip_c = mode_lib->mp.TimePerChromaMetaChunkFlip[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
12598
12599 disp_dlg_regs->dst_y_per_meta_row_nom_l = (unsigned int)(l->dst_y_per_meta_row_nom_l * math_pow(2, 2));
12600 disp_dlg_regs->dst_y_per_meta_row_nom_c = (unsigned int)(l->dst_y_per_meta_row_nom_c * math_pow(2, 2));
12601 disp_dlg_regs->refcyc_per_meta_chunk_nom_l = (unsigned int)(l->refcyc_per_meta_chunk_nom_l);
12602 disp_dlg_regs->refcyc_per_meta_chunk_nom_c = (unsigned int)(l->refcyc_per_meta_chunk_nom_c);
12603 disp_dlg_regs->refcyc_per_meta_chunk_vblank_l = (unsigned int)(l->refcyc_per_meta_chunk_vblank_l);
12604 disp_dlg_regs->refcyc_per_meta_chunk_vblank_c = (unsigned int)(l->refcyc_per_meta_chunk_vblank_c);
12605 disp_dlg_regs->refcyc_per_meta_chunk_flip_l = (unsigned int)(l->refcyc_per_meta_chunk_flip_l);
12606 disp_dlg_regs->refcyc_per_meta_chunk_flip_c = (unsigned int)(l->refcyc_per_meta_chunk_flip_c);
12607
12608 disp_dlg_regs->refcyc_per_tdlut_group = (unsigned int)(l->refcyc_per_tdlut_group);
12609 disp_dlg_regs->dst_y_delta_drq_limit = 0x7fff; // off
12610
12611 disp_ttu_regs->refcyc_per_req_delivery_pre_l = (unsigned int)(l->refcyc_per_req_delivery_pre_l * math_pow(2, 10));
12612 disp_ttu_regs->refcyc_per_req_delivery_l = (unsigned int)(l->refcyc_per_req_delivery_l * math_pow(2, 10));
12613 disp_ttu_regs->refcyc_per_req_delivery_pre_c = (unsigned int)(l->refcyc_per_req_delivery_pre_c * math_pow(2, 10));
12614 disp_ttu_regs->refcyc_per_req_delivery_c = (unsigned int)(l->refcyc_per_req_delivery_c * math_pow(2, 10));
12615 disp_ttu_regs->qos_level_low_wm = 0;
12616
12617 disp_ttu_regs->qos_level_high_wm = (unsigned int)(4.0 * (double)l->htotal * l->ref_freq_to_pix_freq);
12618
12619 disp_ttu_regs->qos_level_flip = 14;
12620 disp_ttu_regs->qos_level_fixed_l = 8;
12621 disp_ttu_regs->qos_level_fixed_c = 8;
12622 disp_ttu_regs->qos_ramp_disable_l = 0;
12623 disp_ttu_regs->qos_ramp_disable_c = 0;
12624 disp_ttu_regs->min_ttu_vblank = (unsigned int)(l->min_ttu_vblank * l->refclk_freq_in_mhz);
12625
12626 // CHECK for HW registers' range, DML2_ASSERT or clamp
12627 DML2_ASSERT(l->refcyc_per_req_delivery_pre_l < math_pow(2, 13));
12628 DML2_ASSERT(l->refcyc_per_req_delivery_l < math_pow(2, 13));
12629 DML2_ASSERT(l->refcyc_per_req_delivery_pre_c < math_pow(2, 13));
12630 DML2_ASSERT(l->refcyc_per_req_delivery_c < math_pow(2, 13));
12631 if (disp_dlg_regs->refcyc_per_vm_group_vblank >= (unsigned int)math_pow(2, 23))
12632 disp_dlg_regs->refcyc_per_vm_group_vblank = (unsigned int)(math_pow(2, 23) - 1);
12633
12634 if (disp_dlg_regs->refcyc_per_vm_group_flip >= (unsigned int)math_pow(2, 23))
12635 disp_dlg_regs->refcyc_per_vm_group_flip = (unsigned int)(math_pow(2, 23) - 1);
12636
12637 if (disp_dlg_regs->refcyc_per_vm_req_vblank >= (unsigned int)math_pow(2, 23))
12638 disp_dlg_regs->refcyc_per_vm_req_vblank = (unsigned int)(math_pow(2, 23) - 1);
12639
12640 if (disp_dlg_regs->refcyc_per_vm_req_flip >= (unsigned int)math_pow(2, 23))
12641 disp_dlg_regs->refcyc_per_vm_req_flip = (unsigned int)(math_pow(2, 23) - 1);
12642
12643
12644 DML2_ASSERT(disp_dlg_regs->dst_y_after_scaler < (unsigned int)8);
12645 DML2_ASSERT(disp_dlg_regs->refcyc_x_after_scaler < (unsigned int)math_pow(2, 13));
12646
12647 if (disp_dlg_regs->dst_y_per_pte_row_nom_l >= (unsigned int)math_pow(2, 17)) {
12648 dml2_printf("DML_DLG: %s: Warning DST_Y_PER_PTE_ROW_NOM_L %u > register max U15.2 %u, clamp to max\n", __func__, disp_dlg_regs->dst_y_per_pte_row_nom_l, (unsigned int)math_pow(2, 17) - 1);
12649 l->dst_y_per_pte_row_nom_l = (unsigned int)math_pow(2, 17) - 1;
12650 }
12651 if (l->dual_plane) {
12652 if (disp_dlg_regs->dst_y_per_pte_row_nom_c >= (unsigned int)math_pow(2, 17)) {
12653 dml2_printf("DML_DLG: %s: Warning DST_Y_PER_PTE_ROW_NOM_C %u > register max U15.2 %u, clamp to max\n", __func__, disp_dlg_regs->dst_y_per_pte_row_nom_c, (unsigned int)math_pow(2, 17) - 1);
12654 l->dst_y_per_pte_row_nom_c = (unsigned int)math_pow(2, 17) - 1;
12655 }
12656 }
12657
12658 if (disp_dlg_regs->refcyc_per_pte_group_nom_l >= (unsigned int)math_pow(2, 23))
12659 disp_dlg_regs->refcyc_per_pte_group_nom_l = (unsigned int)(math_pow(2, 23) - 1);
12660 if (l->dual_plane) {
12661 if (disp_dlg_regs->refcyc_per_pte_group_nom_c >= (unsigned int)math_pow(2, 23))
12662 disp_dlg_regs->refcyc_per_pte_group_nom_c = (unsigned int)(math_pow(2, 23) - 1);
12663 }
12664 DML2_ASSERT(disp_dlg_regs->refcyc_per_pte_group_vblank_l < (unsigned int)math_pow(2, 13));
12665 if (l->dual_plane) {
12666 DML2_ASSERT(disp_dlg_regs->refcyc_per_pte_group_vblank_c < (unsigned int)math_pow(2, 13));
12667 }
12668
12669 DML2_ASSERT(disp_dlg_regs->refcyc_per_line_delivery_pre_l < (unsigned int)math_pow(2, 13));
12670 DML2_ASSERT(disp_dlg_regs->refcyc_per_line_delivery_l < (unsigned int)math_pow(2, 13));
12671 DML2_ASSERT(disp_dlg_regs->refcyc_per_line_delivery_pre_c < (unsigned int)math_pow(2, 13));
12672 DML2_ASSERT(disp_dlg_regs->refcyc_per_line_delivery_c < (unsigned int)math_pow(2, 13));
12673 DML2_ASSERT(disp_ttu_regs->qos_level_low_wm < (unsigned int)math_pow(2, 14));
12674 DML2_ASSERT(disp_ttu_regs->qos_level_high_wm < (unsigned int)math_pow(2, 14));
12675 DML2_ASSERT(disp_ttu_regs->min_ttu_vblank < (unsigned int)math_pow(2, 24));
12676
12677 dml2_printf("DML_DLG::%s: Calculation for pipe[%d] done\n", __func__, pipe_idx);
12678
12679 }
12680 }
12681
rq_dlg_get_arb_params(const struct dml2_display_cfg * display_cfg,const struct dml2_core_internal_display_mode_lib * mode_lib,struct dml2_display_arb_regs * arb_param)12682 static void rq_dlg_get_arb_params(const struct dml2_display_cfg *display_cfg, const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_display_arb_regs *arb_param)
12683 {
12684 double refclk_freq_in_mhz = (display_cfg->overrides.hw.dlg_ref_clk_mhz > 0) ? (double)display_cfg->overrides.hw.dlg_ref_clk_mhz : mode_lib->soc.dchub_refclk_mhz;
12685
12686 arb_param->max_req_outstanding = mode_lib->soc.max_outstanding_reqs;
12687 arb_param->min_req_outstanding = mode_lib->soc.max_outstanding_reqs; // turn off the sat level feature if this set to max
12688 arb_param->sdpif_request_rate_limit = (3 * mode_lib->ip.words_per_channel * mode_lib->soc.clk_table.dram_config.channel_count) / 4;
12689 arb_param->sdpif_request_rate_limit = arb_param->sdpif_request_rate_limit < 96 ? 96 : arb_param->sdpif_request_rate_limit;
12690 arb_param->sat_level_us = 60;
12691 arb_param->hvm_max_qos_commit_threshold = 0xf;
12692 arb_param->hvm_min_req_outstand_commit_threshold = 0xa;
12693 arb_param->compbuf_reserved_space_kbytes = dml_get_compbuf_reserved_space_64b(mode_lib) * 64 / 1024;
12694 arb_param->compbuf_size = mode_lib->mp.CompressedBufferSizeInkByte / mode_lib->ip.compressed_buffer_segment_size_in_kbytes;
12695 arb_param->allow_sdpif_rate_limit_when_cstate_req = dml_get_hw_debug5(mode_lib);
12696 arb_param->dcfclk_deep_sleep_hysteresis = dml_get_dcfclk_deep_sleep_hysteresis(mode_lib);
12697 arb_param->pstate_stall_threshold = (unsigned int)(mode_lib->ip_caps.fams2.max_allow_delay_us * refclk_freq_in_mhz);
12698
12699 #ifdef __DML_VBA_DEBUG__
12700 dml2_printf("DML::%s: max_req_outstanding = %d\n", __func__, arb_param->max_req_outstanding);
12701 dml2_printf("DML::%s: sdpif_request_rate_limit = %d\n", __func__, arb_param->sdpif_request_rate_limit);
12702 dml2_printf("DML::%s: compbuf_reserved_space_kbytes = %d\n", __func__, arb_param->compbuf_reserved_space_kbytes);
12703 dml2_printf("DML::%s: allow_sdpif_rate_limit_when_cstate_req = %d\n", __func__, arb_param->allow_sdpif_rate_limit_when_cstate_req);
12704 dml2_printf("DML::%s: dcfclk_deep_sleep_hysteresis = %d\n", __func__, arb_param->dcfclk_deep_sleep_hysteresis);
12705 #endif
12706
12707 }
12708
dml2_core_calcs_get_watermarks(const struct dml2_display_cfg * display_cfg,const struct dml2_core_internal_display_mode_lib * mode_lib,struct dml2_dchub_watermark_regs * out)12709 void dml2_core_calcs_get_watermarks(const struct dml2_display_cfg *display_cfg, const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_dchub_watermark_regs *out)
12710 {
12711 rq_dlg_get_wm_regs(display_cfg, mode_lib, out);
12712 }
12713
dml2_core_calcs_get_arb_params(const struct dml2_display_cfg * display_cfg,const struct dml2_core_internal_display_mode_lib * mode_lib,struct dml2_display_arb_regs * out)12714 void dml2_core_calcs_get_arb_params(const struct dml2_display_cfg *display_cfg, const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_display_arb_regs *out)
12715 {
12716 rq_dlg_get_arb_params(display_cfg, mode_lib, out);
12717 }
12718
dml2_core_calcs_get_pipe_regs(const struct dml2_display_cfg * display_cfg,struct dml2_core_internal_display_mode_lib * mode_lib,struct dml2_dchub_per_pipe_register_set * out,int pipe_index)12719 void dml2_core_calcs_get_pipe_regs(const struct dml2_display_cfg *display_cfg,
12720 struct dml2_core_internal_display_mode_lib *mode_lib,
12721 struct dml2_dchub_per_pipe_register_set *out, int pipe_index)
12722 {
12723 rq_dlg_get_rq_reg(&out->rq_regs, display_cfg, mode_lib, pipe_index);
12724 rq_dlg_get_dlg_reg(&mode_lib->scratch, &out->dlg_regs, &out->ttu_regs, display_cfg, mode_lib, pipe_index);
12725 out->det_size = dml_get_det_buffer_size_kbytes(mode_lib, pipe_index) / mode_lib->ip.config_return_buffer_segment_size_in_kbytes;
12726 }
12727
dml2_core_calcs_get_global_sync_programming(const struct dml2_core_internal_display_mode_lib * mode_lib,union dml2_global_sync_programming * out,int pipe_index)12728 void dml2_core_calcs_get_global_sync_programming(const struct dml2_core_internal_display_mode_lib *mode_lib, union dml2_global_sync_programming *out, int pipe_index)
12729 {
12730 out->dcn4x.vready_offset_pixels = dml_get_vready_offset(mode_lib, pipe_index);
12731 out->dcn4x.vstartup_lines = dml_get_vstartup_calculated(mode_lib, pipe_index);
12732 out->dcn4x.vupdate_offset_pixels = dml_get_vupdate_offset(mode_lib, pipe_index);
12733 out->dcn4x.vupdate_vupdate_width_pixels = dml_get_vupdate_width(mode_lib, pipe_index);
12734 out->dcn4x.pstate_keepout_start_lines = dml_get_pstate_keepout_dst_lines(mode_lib, pipe_index);
12735 }
12736
dml2_core_calcs_get_stream_programming(const struct dml2_core_internal_display_mode_lib * mode_lib,struct dml2_per_stream_programming * out,int pipe_index)12737 void dml2_core_calcs_get_stream_programming(const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_per_stream_programming *out, int pipe_index)
12738 {
12739 dml2_core_calcs_get_global_sync_programming(mode_lib, &out->global_sync, pipe_index);
12740 }
12741
dml2_core_calcs_get_global_fams2_programming(const struct dml2_core_internal_display_mode_lib * mode_lib,const struct display_configuation_with_meta * display_cfg,struct dmub_cmd_fams2_global_config * fams2_global_config)12742 void dml2_core_calcs_get_global_fams2_programming(const struct dml2_core_internal_display_mode_lib *mode_lib,
12743 const struct display_configuation_with_meta *display_cfg,
12744 struct dmub_cmd_fams2_global_config *fams2_global_config)
12745 {
12746 fams2_global_config->features.bits.enable = display_cfg->stage3.fams2_required;
12747
12748 if (fams2_global_config->features.bits.enable) {
12749 fams2_global_config->features.bits.enable_stall_recovery = true;
12750 fams2_global_config->features.bits.allow_delay_check_mode = FAMS2_ALLOW_DELAY_CHECK_FROM_START;
12751
12752 fams2_global_config->max_allow_delay_us = mode_lib->ip_caps.fams2.max_allow_delay_us;
12753 fams2_global_config->lock_wait_time_us = mode_lib->ip_caps.fams2.lock_timeout_us;
12754 fams2_global_config->recovery_timeout_us = mode_lib->ip_caps.fams2.recovery_timeout_us;
12755 fams2_global_config->hwfq_flip_programming_delay_us = mode_lib->ip_caps.fams2.flip_programming_delay_us;
12756
12757 fams2_global_config->num_streams = display_cfg->display_config.num_streams;
12758 }
12759 }
12760
dml2_core_calcs_get_stream_fams2_programming(const struct dml2_core_internal_display_mode_lib * mode_lib,const struct display_configuation_with_meta * display_cfg,union dmub_cmd_fams2_config * fams2_base_programming,union dmub_cmd_fams2_config * fams2_sub_programming,enum dml2_pstate_method pstate_method,int plane_index)12761 void dml2_core_calcs_get_stream_fams2_programming(const struct dml2_core_internal_display_mode_lib *mode_lib,
12762 const struct display_configuation_with_meta *display_cfg,
12763 union dmub_cmd_fams2_config *fams2_base_programming,
12764 union dmub_cmd_fams2_config *fams2_sub_programming,
12765 enum dml2_pstate_method pstate_method,
12766 int plane_index)
12767 {
12768 const struct dml2_plane_parameters *plane_descriptor = &display_cfg->display_config.plane_descriptors[plane_index];
12769 const struct dml2_stream_parameters *stream_descriptor = &display_cfg->display_config.stream_descriptors[plane_descriptor->stream_index];
12770 const struct dml2_fams2_meta *stream_fams2_meta = &display_cfg->stage3.stream_fams2_meta[plane_descriptor->stream_index];
12771
12772 struct dmub_fams2_cmd_stream_static_base_state *base_programming = &fams2_base_programming->stream_v1.base;
12773 union dmub_fams2_cmd_stream_static_sub_state *sub_programming = &fams2_sub_programming->stream_v1.sub_state;
12774
12775 unsigned int i;
12776
12777 if (display_cfg->display_config.overrides.all_streams_blanked) {
12778 /* stream is blanked, so do nothing */
12779 return;
12780 }
12781
12782 /* from display configuration */
12783 base_programming->htotal = (uint16_t)stream_descriptor->timing.h_total;
12784 base_programming->vtotal = (uint16_t)stream_descriptor->timing.v_total;
12785 base_programming->vblank_start = (uint16_t)(stream_fams2_meta->nom_vtotal -
12786 stream_descriptor->timing.v_front_porch);
12787 base_programming->vblank_end = (uint16_t)(stream_fams2_meta->nom_vtotal -
12788 stream_descriptor->timing.v_front_porch -
12789 stream_descriptor->timing.v_active);
12790 base_programming->config.bits.is_drr = stream_descriptor->timing.drr_config.enabled;
12791
12792 /* from meta */
12793 base_programming->otg_vline_time_ns =
12794 (unsigned int)(stream_fams2_meta->otg_vline_time_us * 1000.0);
12795 base_programming->scheduling_delay_otg_vlines = (uint8_t)stream_fams2_meta->scheduling_delay_otg_vlines;
12796 base_programming->contention_delay_otg_vlines = (uint8_t)stream_fams2_meta->contention_delay_otg_vlines;
12797 base_programming->vline_int_ack_delay_otg_vlines = (uint8_t)stream_fams2_meta->vertical_interrupt_ack_delay_otg_vlines;
12798 base_programming->drr_keepout_otg_vline = (uint16_t)(stream_fams2_meta->nom_vtotal -
12799 stream_descriptor->timing.v_front_porch -
12800 stream_fams2_meta->method_drr.programming_delay_otg_vlines);
12801 base_programming->allow_to_target_delay_otg_vlines = (uint8_t)stream_fams2_meta->allow_to_target_delay_otg_vlines;
12802 base_programming->max_vtotal = (uint16_t)stream_fams2_meta->max_vtotal;
12803
12804 /* from core */
12805 base_programming->config.bits.min_ttu_vblank_usable = true;
12806 for (i = 0; i < display_cfg->display_config.num_planes; i++) {
12807 /* check if all planes support p-state in blank */
12808 if (display_cfg->display_config.plane_descriptors[i].stream_index == plane_descriptor->stream_index &&
12809 mode_lib->mp.MinTTUVBlank[i] <= mode_lib->mp.Watermark.DRAMClockChangeWatermark) {
12810 base_programming->config.bits.min_ttu_vblank_usable = false;
12811 break;
12812 }
12813 }
12814
12815 switch (pstate_method) {
12816 case dml2_pstate_method_vactive:
12817 case dml2_pstate_method_fw_vactive_drr:
12818 /* legacy vactive */
12819 base_programming->type = FAMS2_STREAM_TYPE_VACTIVE;
12820 sub_programming->legacy.vactive_det_fill_delay_otg_vlines =
12821 (uint8_t)stream_fams2_meta->method_vactive.max_vactive_det_fill_delay_otg_vlines;
12822 base_programming->allow_start_otg_vline =
12823 (uint16_t)stream_fams2_meta->method_vactive.common.allow_start_otg_vline;
12824 base_programming->allow_end_otg_vline =
12825 (uint16_t)stream_fams2_meta->method_vactive.common.allow_end_otg_vline;
12826 base_programming->config.bits.clamp_vtotal_min = true;
12827 break;
12828 case dml2_pstate_method_vblank:
12829 case dml2_pstate_method_fw_vblank_drr:
12830 /* legacy vblank */
12831 base_programming->type = FAMS2_STREAM_TYPE_VBLANK;
12832 base_programming->allow_start_otg_vline =
12833 (uint16_t)stream_fams2_meta->method_vblank.common.allow_start_otg_vline;
12834 base_programming->allow_end_otg_vline =
12835 (uint16_t)stream_fams2_meta->method_vblank.common.allow_end_otg_vline;
12836 base_programming->config.bits.clamp_vtotal_min = true;
12837 break;
12838 case dml2_pstate_method_fw_drr:
12839 /* drr */
12840 base_programming->type = FAMS2_STREAM_TYPE_DRR;
12841 sub_programming->drr.programming_delay_otg_vlines =
12842 (uint8_t)stream_fams2_meta->method_drr.programming_delay_otg_vlines;
12843 sub_programming->drr.nom_stretched_vtotal =
12844 (uint16_t)stream_fams2_meta->method_drr.stretched_vtotal;
12845 base_programming->allow_start_otg_vline =
12846 (uint16_t)stream_fams2_meta->method_drr.common.allow_start_otg_vline;
12847 base_programming->allow_end_otg_vline =
12848 (uint16_t)stream_fams2_meta->method_drr.common.allow_end_otg_vline;
12849 /* drr only clamps to vtotal min for single display */
12850 base_programming->config.bits.clamp_vtotal_min = display_cfg->display_config.num_streams == 1;
12851 sub_programming->drr.only_stretch_if_required = true;
12852 break;
12853 case dml2_pstate_method_fw_svp:
12854 case dml2_pstate_method_fw_svp_drr:
12855 /* subvp */
12856 base_programming->type = FAMS2_STREAM_TYPE_SUBVP;
12857 sub_programming->subvp.vratio_numerator =
12858 (uint16_t)(plane_descriptor->composition.scaler_info.plane0.v_ratio * 1000.0);
12859 sub_programming->subvp.vratio_denominator = 1000;
12860 sub_programming->subvp.programming_delay_otg_vlines =
12861 (uint8_t)stream_fams2_meta->method_subvp.programming_delay_otg_vlines;
12862 sub_programming->subvp.prefetch_to_mall_otg_vlines =
12863 (uint8_t)stream_fams2_meta->method_subvp.prefetch_to_mall_delay_otg_vlines;
12864 sub_programming->subvp.phantom_vtotal =
12865 (uint16_t)stream_fams2_meta->method_subvp.phantom_vtotal;
12866 sub_programming->subvp.phantom_vactive =
12867 (uint16_t)stream_fams2_meta->method_subvp.phantom_vactive;
12868 sub_programming->subvp.config.bits.is_multi_planar =
12869 plane_descriptor->surface.plane1.height > 0;
12870 sub_programming->subvp.config.bits.is_yuv420 =
12871 plane_descriptor->pixel_format == dml2_420_8 ||
12872 plane_descriptor->pixel_format == dml2_420_10 ||
12873 plane_descriptor->pixel_format == dml2_420_12;
12874
12875 base_programming->allow_start_otg_vline =
12876 (uint16_t)stream_fams2_meta->method_subvp.common.allow_start_otg_vline;
12877 base_programming->allow_end_otg_vline =
12878 (uint16_t)stream_fams2_meta->method_subvp.common.allow_end_otg_vline;
12879 base_programming->config.bits.clamp_vtotal_min = true;
12880 break;
12881 case dml2_pstate_method_reserved_hw:
12882 case dml2_pstate_method_reserved_fw:
12883 case dml2_pstate_method_reserved_fw_drr_clamped:
12884 case dml2_pstate_method_reserved_fw_drr_var:
12885 case dml2_pstate_method_na:
12886 case dml2_pstate_method_count:
12887 default:
12888 /* this should never happen */
12889 break;
12890 }
12891 }
12892
dml2_core_calcs_get_mcache_allocation(const struct dml2_core_internal_display_mode_lib * mode_lib,struct dml2_mcache_surface_allocation * out,int plane_idx)12893 void dml2_core_calcs_get_mcache_allocation(const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_mcache_surface_allocation *out, int plane_idx)
12894 {
12895 unsigned int n;
12896
12897 out->num_mcaches_plane0 = dml_get_plane_num_mcaches_plane0(mode_lib, plane_idx);
12898 out->num_mcaches_plane1 = dml_get_plane_num_mcaches_plane1(mode_lib, plane_idx);
12899 out->shift_granularity.p0 = dml_get_plane_mcache_shift_granularity_plane0(mode_lib, plane_idx);
12900 out->shift_granularity.p1 = dml_get_plane_mcache_shift_granularity_plane1(mode_lib, plane_idx);
12901
12902 for (n = 0; n < out->num_mcaches_plane0; n++)
12903 out->mcache_x_offsets_plane0[n] = dml_get_plane_array_mcache_offsets_plane0(mode_lib, plane_idx, n);
12904
12905 for (n = 0; n < out->num_mcaches_plane1; n++)
12906 out->mcache_x_offsets_plane1[n] = dml_get_plane_array_mcache_offsets_plane1(mode_lib, plane_idx, n);
12907
12908 out->last_slice_sharing.mall_comb_mcache_p0 = dml_get_plane_mall_comb_mcache_l(mode_lib, plane_idx);
12909 out->last_slice_sharing.mall_comb_mcache_p1 = dml_get_plane_mall_comb_mcache_c(mode_lib, plane_idx);
12910 out->last_slice_sharing.plane0_plane1 = dml_get_plane_lc_comb_mcache(mode_lib, plane_idx);
12911 out->informative.meta_row_bytes_plane0 = dml_get_plane_mcache_row_bytes_plane0(mode_lib, plane_idx);
12912 out->informative.meta_row_bytes_plane1 = dml_get_plane_mcache_row_bytes_plane1(mode_lib, plane_idx);
12913
12914 out->valid = true;
12915 }
12916
dml2_core_calcs_get_mall_allocation(struct dml2_core_internal_display_mode_lib * mode_lib,unsigned int * out,int pipe_index)12917 void dml2_core_calcs_get_mall_allocation(struct dml2_core_internal_display_mode_lib *mode_lib, unsigned int *out, int pipe_index)
12918 {
12919 *out = dml_get_surface_size_in_mall_bytes(mode_lib, pipe_index);
12920 }
12921
dml2_core_calcs_get_plane_support_info(const struct dml2_display_cfg * display_cfg,const struct dml2_core_internal_display_mode_lib * mode_lib,struct core_plane_support_info * out,int plane_idx)12922 void dml2_core_calcs_get_plane_support_info(const struct dml2_display_cfg *display_cfg, const struct dml2_core_internal_display_mode_lib *mode_lib, struct core_plane_support_info *out, int plane_idx)
12923 {
12924 out->mall_svp_size_requirement_ways = 0;
12925
12926 out->nominal_vblank_pstate_latency_hiding_us =
12927 (int)(display_cfg->stream_descriptors[display_cfg->plane_descriptors[plane_idx].stream_index].timing.h_total /
12928 ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[plane_idx].stream_index].timing.pixel_clock_khz / 1000) * mode_lib->ms.TWait[plane_idx]);
12929
12930 out->dram_change_latency_hiding_margin_in_active = (int)mode_lib->ms.VActiveLatencyHidingMargin[plane_idx];
12931
12932 out->active_latency_hiding_us = (int)mode_lib->ms.VActiveLatencyHidingUs[plane_idx];
12933
12934 out->dram_change_vactive_det_fill_delay_us = (unsigned int)math_ceil(mode_lib->ms.dram_change_vactive_det_fill_delay_us[plane_idx]);
12935 }
12936
dml2_core_calcs_get_stream_support_info(const struct dml2_display_cfg * display_cfg,const struct dml2_core_internal_display_mode_lib * mode_lib,struct core_stream_support_info * out,int plane_index)12937 void dml2_core_calcs_get_stream_support_info(const struct dml2_display_cfg *display_cfg, const struct dml2_core_internal_display_mode_lib *mode_lib, struct core_stream_support_info *out, int plane_index)
12938 {
12939 double phantom_processing_delay_pix;
12940 unsigned int phantom_processing_delay_lines;
12941 unsigned int phantom_min_v_active_lines;
12942 unsigned int phantom_v_active_lines;
12943 unsigned int phantom_v_startup_lines;
12944 unsigned int phantom_v_blank_lines;
12945 unsigned int main_v_blank_lines;
12946 unsigned int rem;
12947
12948 phantom_processing_delay_pix = (double)((mode_lib->ip.subvp_fw_processing_delay_us + mode_lib->ip.subvp_pstate_allow_width_us) *
12949 ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[plane_index].stream_index].timing.pixel_clock_khz / 1000));
12950 phantom_processing_delay_lines = (unsigned int)(phantom_processing_delay_pix / (double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[plane_index].stream_index].timing.h_total);
12951 dml2_core_div_rem(phantom_processing_delay_pix,
12952 display_cfg->stream_descriptors[display_cfg->plane_descriptors[plane_index].stream_index].timing.h_total,
12953 &rem);
12954 if (rem)
12955 phantom_processing_delay_lines++;
12956
12957 phantom_v_startup_lines = dml_get_plane_max_vstartup_lines(mode_lib, plane_index);
12958 phantom_min_v_active_lines = (unsigned int)math_ceil((double)dml_get_plane_subviewport_lines_needed_in_mall(mode_lib, plane_index) /
12959 display_cfg->plane_descriptors[plane_index].composition.scaler_info.plane0.v_ratio);
12960 phantom_v_active_lines = phantom_processing_delay_lines + phantom_min_v_active_lines + mode_lib->ip.subvp_swath_height_margin_lines;
12961
12962 // phantom_vblank = max(vbp(vstartup) + vactive + vfp(always 1) + vsync(can be 1), main_vblank)
12963 phantom_v_blank_lines = phantom_v_startup_lines + 1 + 1;
12964 main_v_blank_lines = display_cfg->stream_descriptors[display_cfg->plane_descriptors[plane_index].stream_index].timing.v_total - display_cfg->stream_descriptors[display_cfg->plane_descriptors[plane_index].stream_index].timing.v_active;
12965 if (phantom_v_blank_lines > main_v_blank_lines)
12966 phantom_v_blank_lines = main_v_blank_lines;
12967
12968 out->phantom_v_active = phantom_v_active_lines;
12969 // phantom_vtotal = vactive + vblank
12970 out->phantom_v_total = phantom_v_active_lines + phantom_v_blank_lines;
12971
12972 out->phantom_min_v_active = phantom_min_v_active_lines;
12973 out->phantom_v_startup = phantom_v_startup_lines;
12974
12975 out->vblank_reserved_time_us = display_cfg->plane_descriptors[plane_index].overrides.reserved_vblank_time_ns / 1000;
12976 #if defined(__DML_VBA_DEBUG__)
12977 dml2_printf("DML::%s: subvp_fw_processing_delay_us = %d\n", __func__, mode_lib->ip.subvp_fw_processing_delay_us);
12978 dml2_printf("DML::%s: subvp_pstate_allow_width_us = %d\n", __func__, mode_lib->ip.subvp_pstate_allow_width_us);
12979 dml2_printf("DML::%s: subvp_swath_height_margin_lines = %d\n", __func__, mode_lib->ip.subvp_swath_height_margin_lines);
12980 dml2_printf("DML::%s: vblank_reserved_time_us = %f\n", __func__, out->vblank_reserved_time_us);
12981 #endif
12982 }
12983
dml2_core_calcs_get_informative(const struct dml2_core_internal_display_mode_lib * mode_lib,struct dml2_display_cfg_programming * out)12984 void dml2_core_calcs_get_informative(const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_display_cfg_programming *out)
12985 {
12986 unsigned int k, n;
12987
12988 out->informative.mode_support_info.ModeIsSupported = mode_lib->ms.support.ModeSupport;
12989 out->informative.mode_support_info.ImmediateFlipSupport = mode_lib->ms.support.ImmediateFlipSupport;
12990 out->informative.mode_support_info.WritebackLatencySupport = mode_lib->ms.support.WritebackLatencySupport;
12991 out->informative.mode_support_info.ScaleRatioAndTapsSupport = mode_lib->ms.support.ScaleRatioAndTapsSupport;
12992 out->informative.mode_support_info.SourceFormatPixelAndScanSupport = mode_lib->ms.support.SourceFormatPixelAndScanSupport;
12993 out->informative.mode_support_info.P2IWith420 = mode_lib->ms.support.P2IWith420;
12994 out->informative.mode_support_info.DSCOnlyIfNecessaryWithBPP = false;
12995 out->informative.mode_support_info.DSC422NativeNotSupported = mode_lib->ms.support.DSC422NativeNotSupported;
12996 out->informative.mode_support_info.LinkRateDoesNotMatchDPVersion = mode_lib->ms.support.LinkRateDoesNotMatchDPVersion;
12997 out->informative.mode_support_info.LinkRateForMultistreamNotIndicated = mode_lib->ms.support.LinkRateForMultistreamNotIndicated;
12998 out->informative.mode_support_info.BPPForMultistreamNotIndicated = mode_lib->ms.support.BPPForMultistreamNotIndicated;
12999 out->informative.mode_support_info.MultistreamWithHDMIOreDP = mode_lib->ms.support.MultistreamWithHDMIOreDP;
13000 out->informative.mode_support_info.MSOOrODMSplitWithNonDPLink = mode_lib->ms.support.MSOOrODMSplitWithNonDPLink;
13001 out->informative.mode_support_info.NotEnoughLanesForMSO = mode_lib->ms.support.NotEnoughLanesForMSO;
13002 out->informative.mode_support_info.NumberOfOTGSupport = mode_lib->ms.support.NumberOfOTGSupport;
13003 out->informative.mode_support_info.NumberOfHDMIFRLSupport = mode_lib->ms.support.NumberOfHDMIFRLSupport;
13004 out->informative.mode_support_info.NumberOfDP2p0Support = mode_lib->ms.support.NumberOfDP2p0Support;
13005 out->informative.mode_support_info.WritebackScaleRatioAndTapsSupport = mode_lib->ms.support.WritebackScaleRatioAndTapsSupport;
13006 out->informative.mode_support_info.CursorSupport = mode_lib->ms.support.CursorSupport;
13007 out->informative.mode_support_info.PitchSupport = mode_lib->ms.support.PitchSupport;
13008 out->informative.mode_support_info.ViewportExceedsSurface = mode_lib->ms.support.ViewportExceedsSurface;
13009 out->informative.mode_support_info.ImmediateFlipRequiredButTheRequirementForEachSurfaceIsNotSpecified = false;
13010 out->informative.mode_support_info.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe = mode_lib->ms.support.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe;
13011 out->informative.mode_support_info.InvalidCombinationOfMALLUseForPStateAndStaticScreen = mode_lib->ms.support.InvalidCombinationOfMALLUseForPStateAndStaticScreen;
13012 out->informative.mode_support_info.InvalidCombinationOfMALLUseForPState = mode_lib->ms.support.InvalidCombinationOfMALLUseForPState;
13013 out->informative.mode_support_info.ExceededMALLSize = mode_lib->ms.support.ExceededMALLSize;
13014 out->informative.mode_support_info.EnoughWritebackUnits = mode_lib->ms.support.EnoughWritebackUnits;
13015 out->informative.mode_support_info.temp_read_or_ppt_support = mode_lib->ms.support.temp_read_or_ppt_support;
13016 out->informative.mode_support_info.g6_temp_read_support = mode_lib->ms.support.g6_temp_read_support;
13017
13018 out->informative.mode_support_info.ExceededMultistreamSlots = mode_lib->ms.support.ExceededMultistreamSlots;
13019 out->informative.mode_support_info.NotEnoughDSCUnits = mode_lib->ms.support.NotEnoughDSCUnits;
13020 out->informative.mode_support_info.NotEnoughDSCSlices = mode_lib->ms.support.NotEnoughDSCSlices;
13021 out->informative.mode_support_info.PixelsPerLinePerDSCUnitSupport = mode_lib->ms.support.PixelsPerLinePerDSCUnitSupport;
13022 out->informative.mode_support_info.DSCCLKRequiredMoreThanSupported = mode_lib->ms.support.DSCCLKRequiredMoreThanSupported;
13023 out->informative.mode_support_info.DTBCLKRequiredMoreThanSupported = mode_lib->ms.support.DTBCLKRequiredMoreThanSupported;
13024 out->informative.mode_support_info.LinkCapacitySupport = mode_lib->ms.support.LinkCapacitySupport;
13025
13026 out->informative.mode_support_info.ROBSupport = mode_lib->ms.support.ROBSupport;
13027 out->informative.mode_support_info.OutstandingRequestsSupport = mode_lib->ms.support.OutstandingRequestsSupport;
13028 out->informative.mode_support_info.OutstandingRequestsUrgencyAvoidance = mode_lib->ms.support.OutstandingRequestsUrgencyAvoidance;
13029 out->informative.mode_support_info.PTEBufferSizeNotExceeded = mode_lib->ms.support.PTEBufferSizeNotExceeded;
13030 out->informative.mode_support_info.DCCMetaBufferSizeNotExceeded = mode_lib->ms.support.DCCMetaBufferSizeNotExceeded;
13031
13032 out->informative.mode_support_info.TotalVerticalActiveBandwidthSupport = mode_lib->ms.support.AvgBandwidthSupport;
13033 out->informative.mode_support_info.VActiveBandwidthSupport = mode_lib->ms.support.UrgVactiveBandwidthSupport;
13034 out->informative.mode_support_info.USRRetrainingSupport = mode_lib->ms.support.USRRetrainingSupport;
13035
13036 out->informative.mode_support_info.PrefetchSupported = mode_lib->ms.support.PrefetchSupported;
13037 out->informative.mode_support_info.DynamicMetadataSupported = mode_lib->ms.support.DynamicMetadataSupported;
13038 out->informative.mode_support_info.VRatioInPrefetchSupported = mode_lib->ms.support.VRatioInPrefetchSupported;
13039 out->informative.mode_support_info.DISPCLK_DPPCLK_Support = mode_lib->ms.support.DISPCLK_DPPCLK_Support;
13040 out->informative.mode_support_info.TotalAvailablePipesSupport = mode_lib->ms.support.TotalAvailablePipesSupport;
13041 out->informative.mode_support_info.ViewportSizeSupport = mode_lib->ms.support.ViewportSizeSupport;
13042
13043 for (k = 0; k < out->display_config.num_planes; k++) {
13044
13045 out->informative.mode_support_info.FCLKChangeSupport[k] = mode_lib->ms.support.FCLKChangeSupport[k];
13046 out->informative.mode_support_info.MPCCombineEnable[k] = mode_lib->ms.support.MPCCombineEnable[k];
13047 out->informative.mode_support_info.ODMMode[k] = mode_lib->ms.support.ODMMode[k];
13048 out->informative.mode_support_info.DPPPerSurface[k] = mode_lib->ms.support.DPPPerSurface[k];
13049 out->informative.mode_support_info.DSCEnabled[k] = mode_lib->ms.support.DSCEnabled[k];
13050 out->informative.mode_support_info.FECEnabled[k] = mode_lib->ms.support.FECEnabled[k];
13051 out->informative.mode_support_info.NumberOfDSCSlices[k] = mode_lib->ms.support.NumberOfDSCSlices[k];
13052 out->informative.mode_support_info.OutputBpp[k] = mode_lib->ms.support.OutputBpp[k];
13053
13054 if (mode_lib->ms.support.OutputType[k] == dml2_core_internal_output_type_unknown)
13055 out->informative.mode_support_info.OutputType[k] = dml2_output_type_unknown;
13056 else if (mode_lib->ms.support.OutputType[k] == dml2_core_internal_output_type_dp)
13057 out->informative.mode_support_info.OutputType[k] = dml2_output_type_dp;
13058 else if (mode_lib->ms.support.OutputType[k] == dml2_core_internal_output_type_edp)
13059 out->informative.mode_support_info.OutputType[k] = dml2_output_type_edp;
13060 else if (mode_lib->ms.support.OutputType[k] == dml2_core_internal_output_type_dp2p0)
13061 out->informative.mode_support_info.OutputType[k] = dml2_output_type_dp2p0;
13062 else if (mode_lib->ms.support.OutputType[k] == dml2_core_internal_output_type_hdmi)
13063 out->informative.mode_support_info.OutputType[k] = dml2_output_type_hdmi;
13064 else if (mode_lib->ms.support.OutputType[k] == dml2_core_internal_output_type_hdmifrl)
13065 out->informative.mode_support_info.OutputType[k] = dml2_output_type_hdmifrl;
13066
13067 if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_unknown)
13068 out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_unknown;
13069 else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_dp_rate_hbr)
13070 out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_dp_rate_hbr;
13071 else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_dp_rate_hbr2)
13072 out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_dp_rate_hbr2;
13073 else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_dp_rate_hbr3)
13074 out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_dp_rate_hbr3;
13075 else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_dp_rate_uhbr10)
13076 out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_dp_rate_uhbr10;
13077 else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_dp_rate_uhbr13p5)
13078 out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_dp_rate_uhbr13p5;
13079 else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_dp_rate_uhbr20)
13080 out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_dp_rate_uhbr20;
13081 else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_hdmi_rate_3x3)
13082 out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_hdmi_rate_3x3;
13083 else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_hdmi_rate_6x3)
13084 out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_hdmi_rate_6x3;
13085 else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_hdmi_rate_6x4)
13086 out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_hdmi_rate_6x4;
13087 else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_hdmi_rate_8x4)
13088 out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_hdmi_rate_8x4;
13089 else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_hdmi_rate_10x4)
13090 out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_hdmi_rate_10x4;
13091 else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_hdmi_rate_12x4)
13092 out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_hdmi_rate_12x4;
13093
13094 out->informative.mode_support_info.AlignedYPitch[k] = mode_lib->ms.support.AlignedYPitch[k];
13095 out->informative.mode_support_info.AlignedCPitch[k] = mode_lib->ms.support.AlignedCPitch[k];
13096 }
13097
13098 out->informative.watermarks.urgent_us = dml_get_wm_urgent(mode_lib);
13099 out->informative.watermarks.writeback_urgent_us = dml_get_wm_writeback_urgent(mode_lib);
13100 out->informative.watermarks.writeback_pstate_us = dml_get_wm_writeback_dram_clock_change(mode_lib);
13101 out->informative.watermarks.writeback_fclk_pstate_us = dml_get_wm_writeback_fclk_change(mode_lib);
13102
13103 out->informative.watermarks.cstate_exit_us = dml_get_wm_stutter_exit(mode_lib);
13104 out->informative.watermarks.cstate_enter_plus_exit_us = dml_get_wm_stutter_enter_exit(mode_lib);
13105 out->informative.watermarks.z8_cstate_exit_us = dml_get_wm_z8_stutter_exit(mode_lib);
13106 out->informative.watermarks.z8_cstate_enter_plus_exit_us = dml_get_wm_z8_stutter_enter_exit(mode_lib);
13107 out->informative.watermarks.pstate_change_us = dml_get_wm_dram_clock_change(mode_lib);
13108 out->informative.watermarks.fclk_pstate_change_us = dml_get_wm_fclk_change(mode_lib);
13109 out->informative.watermarks.usr_retraining_us = dml_get_wm_usr_retraining(mode_lib);
13110 out->informative.watermarks.temp_read_or_ppt_watermark_us = dml_get_wm_temp_read_or_ppt(mode_lib);
13111
13112 out->informative.mall.total_surface_size_in_mall_bytes = 0;
13113 for (k = 0; k < out->display_config.num_planes; ++k)
13114 out->informative.mall.total_surface_size_in_mall_bytes += mode_lib->mp.SurfaceSizeInTheMALL[k];
13115
13116 out->informative.qos.min_return_latency_in_dcfclk = mode_lib->mp.min_return_latency_in_dcfclk;
13117 out->informative.qos.urgent_latency_us = dml_get_urgent_latency(mode_lib);
13118
13119 out->informative.qos.max_urgent_latency_us = dml_get_max_urgent_latency_us(mode_lib);
13120 out->informative.qos.avg_non_urgent_latency_us = dml_get_avg_non_urgent_latency_us(mode_lib);
13121 out->informative.qos.avg_urgent_latency_us = dml_get_avg_urgent_latency_us(mode_lib);
13122
13123 out->informative.qos.wm_memory_trip_us = dml_get_wm_memory_trip(mode_lib);
13124 out->informative.qos.meta_trip_memory_us = dml_get_meta_trip_memory_us(mode_lib);
13125 out->informative.qos.fraction_of_urgent_bandwidth = dml_get_fraction_of_urgent_bandwidth(mode_lib);
13126 out->informative.qos.fraction_of_urgent_bandwidth_immediate_flip = dml_get_fraction_of_urgent_bandwidth_imm_flip(mode_lib);
13127 out->informative.qos.fraction_of_urgent_bandwidth_mall = dml_get_fraction_of_urgent_bandwidth_mall(mode_lib);
13128
13129 out->informative.qos.avg_bw_required.sys_active.sdp_bw_mbps = dml_get_sys_active_avg_bw_required_sdp(mode_lib);
13130 out->informative.qos.avg_bw_required.sys_active.dram_bw_mbps = dml_get_sys_active_avg_bw_required_dram(mode_lib);
13131 out->informative.qos.avg_bw_required.svp_prefetch.sdp_bw_mbps = dml_get_svp_prefetch_avg_bw_required_sdp(mode_lib);
13132 out->informative.qos.avg_bw_required.svp_prefetch.dram_bw_mbps = dml_get_svp_prefetch_avg_bw_required_dram(mode_lib);
13133
13134 out->informative.qos.avg_bw_available.sys_active.sdp_bw_mbps = dml_get_sys_active_avg_bw_available_sdp(mode_lib);
13135 out->informative.qos.avg_bw_available.sys_active.dram_bw_mbps = dml_get_sys_active_avg_bw_available_dram(mode_lib);
13136 out->informative.qos.avg_bw_available.svp_prefetch.sdp_bw_mbps = dml_get_svp_prefetch_avg_bw_available_sdp(mode_lib);
13137 out->informative.qos.avg_bw_available.svp_prefetch.dram_bw_mbps = dml_get_svp_prefetch_avg_bw_available_dram(mode_lib);
13138
13139 out->informative.qos.urg_bw_available.sys_active.sdp_bw_mbps = dml_get_sys_active_urg_bw_available_sdp(mode_lib);
13140 out->informative.qos.urg_bw_available.sys_active.dram_bw_mbps = dml_get_sys_active_urg_bw_available_dram(mode_lib);
13141 out->informative.qos.urg_bw_available.sys_active.dram_vm_only_bw_mbps = dml_get_sys_active_urg_bw_available_dram_vm_only(mode_lib);
13142
13143 out->informative.qos.urg_bw_available.svp_prefetch.sdp_bw_mbps = dml_get_svp_prefetch_urg_bw_available_sdp(mode_lib);
13144 out->informative.qos.urg_bw_available.svp_prefetch.dram_bw_mbps = dml_get_svp_prefetch_urg_bw_available_dram(mode_lib);
13145 out->informative.qos.urg_bw_available.svp_prefetch.dram_vm_only_bw_mbps = dml_get_svp_prefetch_urg_bw_available_dram_vm_only(mode_lib);
13146
13147 out->informative.qos.urg_bw_required.sys_active.sdp_bw_mbps = dml_get_sys_active_urg_bw_required_sdp(mode_lib);
13148 out->informative.qos.urg_bw_required.sys_active.dram_bw_mbps = dml_get_sys_active_urg_bw_required_dram(mode_lib);
13149 out->informative.qos.urg_bw_required.svp_prefetch.sdp_bw_mbps = dml_get_svp_prefetch_urg_bw_required_sdp(mode_lib);
13150 out->informative.qos.urg_bw_required.svp_prefetch.dram_bw_mbps = dml_get_svp_prefetch_urg_bw_required_dram(mode_lib);
13151
13152 out->informative.qos.non_urg_bw_required.sys_active.sdp_bw_mbps = dml_get_sys_active_non_urg_required_sdp(mode_lib);
13153 out->informative.qos.non_urg_bw_required.sys_active.dram_bw_mbps = dml_get_sys_active_non_urg_required_dram(mode_lib);
13154 out->informative.qos.non_urg_bw_required.svp_prefetch.sdp_bw_mbps = dml_get_svp_prefetch_non_urg_bw_required_sdp(mode_lib);
13155 out->informative.qos.non_urg_bw_required.svp_prefetch.dram_bw_mbps = dml_get_svp_prefetch_non_urg_bw_required_dram(mode_lib);
13156
13157 out->informative.qos.urg_bw_required_with_flip.sys_active.sdp_bw_mbps = dml_get_sys_active_urg_bw_required_sdp_flip(mode_lib);
13158 out->informative.qos.urg_bw_required_with_flip.sys_active.dram_bw_mbps = dml_get_sys_active_urg_bw_required_dram_flip(mode_lib);
13159 out->informative.qos.urg_bw_required_with_flip.svp_prefetch.sdp_bw_mbps = dml_get_svp_prefetch_urg_bw_required_sdp_flip(mode_lib);
13160 out->informative.qos.urg_bw_required_with_flip.svp_prefetch.dram_bw_mbps = dml_get_svp_prefetch_urg_bw_required_dram_flip(mode_lib);
13161
13162 out->informative.qos.non_urg_bw_required_with_flip.sys_active.sdp_bw_mbps = dml_get_sys_active_non_urg_required_sdp_flip(mode_lib);
13163 out->informative.qos.non_urg_bw_required_with_flip.sys_active.dram_bw_mbps = dml_get_sys_active_non_urg_required_dram_flip(mode_lib);
13164 out->informative.qos.non_urg_bw_required_with_flip.svp_prefetch.sdp_bw_mbps = dml_get_svp_prefetch_non_urg_bw_required_sdp_flip(mode_lib);
13165 out->informative.qos.non_urg_bw_required_with_flip.svp_prefetch.dram_bw_mbps = dml_get_svp_prefetch_non_urg_bw_required_dram_flip(mode_lib);
13166
13167 out->informative.crb.comp_buffer_size_kbytes = dml_get_comp_buffer_size_kbytes(mode_lib);
13168 out->informative.crb.UnboundedRequestEnabled = dml_get_unbounded_request_enabled(mode_lib);
13169
13170 out->informative.crb.compbuf_reserved_space_64b = dml_get_compbuf_reserved_space_64b(mode_lib);
13171 out->informative.misc.hw_debug5 = dml_get_hw_debug5(mode_lib);
13172 out->informative.misc.dcfclk_deep_sleep_hysteresis = dml_get_dcfclk_deep_sleep_hysteresis(mode_lib);
13173
13174 out->informative.power_management.stutter_efficiency = dml_get_stutter_efficiency_no_vblank(mode_lib);
13175 out->informative.power_management.stutter_efficiency_with_vblank = dml_get_stutter_efficiency(mode_lib);
13176 out->informative.power_management.stutter_num_bursts = dml_get_stutter_num_bursts(mode_lib);
13177
13178 out->informative.power_management.z8.stutter_efficiency = dml_get_stutter_efficiency_z8(mode_lib);
13179 out->informative.power_management.z8.stutter_efficiency_with_vblank = dml_get_stutter_efficiency(mode_lib);
13180 out->informative.power_management.z8.stutter_num_bursts = dml_get_stutter_num_bursts_z8(mode_lib);
13181 out->informative.power_management.z8.stutter_period = dml_get_stutter_period(mode_lib);
13182
13183 out->informative.power_management.z8.bestcase.stutter_efficiency = dml_get_stutter_efficiency_z8_bestcase(mode_lib);
13184 out->informative.power_management.z8.bestcase.stutter_num_bursts = dml_get_stutter_num_bursts_z8_bestcase(mode_lib);
13185 out->informative.power_management.z8.bestcase.stutter_period = dml_get_stutter_period_bestcase(mode_lib);
13186
13187 out->informative.misc.cstate_max_cap_mode = dml_get_cstate_max_cap_mode(mode_lib);
13188
13189 out->min_clocks.dcn4x.dpprefclk_khz = (int unsigned)dml_get_global_dppclk_khz(mode_lib);
13190
13191 out->informative.qos.max_active_fclk_change_latency_supported = dml_get_fclk_change_latency(mode_lib);
13192
13193 out->informative.misc.LowestPrefetchMargin = 10 * 1000 * 1000;
13194
13195 for (k = 0; k < out->display_config.num_planes; k++) {
13196
13197 if ((out->display_config.plane_descriptors->overrides.reserved_vblank_time_ns >= 1000.0 * mode_lib->soc.power_management_parameters.dram_clk_change_blackout_us)
13198 && (out->display_config.plane_descriptors->overrides.reserved_vblank_time_ns >= 1000.0 * mode_lib->soc.power_management_parameters.fclk_change_blackout_us)
13199 && (out->display_config.plane_descriptors->overrides.reserved_vblank_time_ns >= 1000.0 * mode_lib->soc.power_management_parameters.stutter_enter_plus_exit_latency_us))
13200 out->informative.misc.PrefetchMode[k] = 0;
13201 else if ((out->display_config.plane_descriptors->overrides.reserved_vblank_time_ns >= 1000.0 * mode_lib->soc.power_management_parameters.fclk_change_blackout_us)
13202 && (out->display_config.plane_descriptors->overrides.reserved_vblank_time_ns >= 1000.0 * mode_lib->soc.power_management_parameters.stutter_enter_plus_exit_latency_us))
13203 out->informative.misc.PrefetchMode[k] = 1;
13204 else if (out->display_config.plane_descriptors->overrides.reserved_vblank_time_ns >= 1000.0 * mode_lib->soc.power_management_parameters.stutter_enter_plus_exit_latency_us)
13205 out->informative.misc.PrefetchMode[k] = 2;
13206 else
13207 out->informative.misc.PrefetchMode[k] = 3;
13208
13209 out->informative.misc.min_ttu_vblank_us[k] = mode_lib->mp.MinTTUVBlank[k];
13210 out->informative.mall.subviewport_lines_needed_in_mall[k] = mode_lib->mp.SubViewportLinesNeededInMALL[k];
13211 out->informative.crb.det_size_in_kbytes[k] = mode_lib->mp.DETBufferSizeInKByte[k];
13212 out->informative.crb.DETBufferSizeY[k] = mode_lib->mp.DETBufferSizeY[k];
13213 out->informative.misc.ImmediateFlipSupportedForPipe[k] = mode_lib->mp.ImmediateFlipSupportedForPipe[k];
13214 out->informative.misc.UsesMALLForStaticScreen[k] = mode_lib->mp.is_using_mall_for_ss[k];
13215 out->informative.plane_info[k].dpte_row_height_plane0 = mode_lib->mp.dpte_row_height[k];
13216 out->informative.plane_info[k].dpte_row_height_plane1 = mode_lib->mp.dpte_row_height_chroma[k];
13217 out->informative.plane_info[k].meta_row_height_plane0 = mode_lib->mp.meta_row_height[k];
13218 out->informative.plane_info[k].meta_row_height_plane1 = mode_lib->mp.meta_row_height_chroma[k];
13219 out->informative.dcc_control[k].max_uncompressed_block_plane0 = mode_lib->mp.DCCYMaxUncompressedBlock[k];
13220 out->informative.dcc_control[k].max_compressed_block_plane0 = mode_lib->mp.DCCYMaxCompressedBlock[k];
13221 out->informative.dcc_control[k].independent_block_plane0 = mode_lib->mp.DCCYIndependentBlock[k];
13222 out->informative.dcc_control[k].max_uncompressed_block_plane1 = mode_lib->mp.DCCCMaxUncompressedBlock[k];
13223 out->informative.dcc_control[k].max_compressed_block_plane1 = mode_lib->mp.DCCCMaxCompressedBlock[k];
13224 out->informative.dcc_control[k].independent_block_plane1 = mode_lib->mp.DCCCIndependentBlock[k];
13225 out->informative.misc.dst_x_after_scaler[k] = mode_lib->mp.DSTXAfterScaler[k];
13226 out->informative.misc.dst_y_after_scaler[k] = mode_lib->mp.DSTYAfterScaler[k];
13227 out->informative.misc.prefetch_source_lines_plane0[k] = mode_lib->mp.PrefetchSourceLinesY[k];
13228 out->informative.misc.prefetch_source_lines_plane1[k] = mode_lib->mp.PrefetchSourceLinesC[k];
13229 out->informative.misc.vready_at_or_after_vsync[k] = mode_lib->mp.VREADY_AT_OR_AFTER_VSYNC[k];
13230 out->informative.misc.min_dst_y_next_start[k] = mode_lib->mp.MIN_DST_Y_NEXT_START[k];
13231 out->informative.plane_info[k].swath_width_plane0 = mode_lib->mp.SwathWidthY[k];
13232 out->informative.plane_info[k].swath_height_plane0 = mode_lib->mp.SwathHeightY[k];
13233 out->informative.plane_info[k].swath_height_plane1 = mode_lib->mp.SwathHeightC[k];
13234 out->informative.misc.CursorDstXOffset[k] = mode_lib->mp.CursorDstXOffset[k];
13235 out->informative.misc.CursorDstYOffset[k] = mode_lib->mp.CursorDstYOffset[k];
13236 out->informative.misc.CursorChunkHDLAdjust[k] = mode_lib->mp.CursorChunkHDLAdjust[k];
13237 out->informative.misc.dpte_group_bytes[k] = mode_lib->mp.dpte_group_bytes[k];
13238 out->informative.misc.vm_group_bytes[k] = mode_lib->mp.vm_group_bytes[k];
13239 out->informative.misc.DisplayPipeRequestDeliveryTimeLuma[k] = mode_lib->mp.DisplayPipeRequestDeliveryTimeLuma[k];
13240 out->informative.misc.DisplayPipeRequestDeliveryTimeChroma[k] = mode_lib->mp.DisplayPipeRequestDeliveryTimeChroma[k];
13241 out->informative.misc.DisplayPipeRequestDeliveryTimeLumaPrefetch[k] = mode_lib->mp.DisplayPipeRequestDeliveryTimeLumaPrefetch[k];
13242 out->informative.misc.DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = mode_lib->mp.DisplayPipeRequestDeliveryTimeChromaPrefetch[k];
13243 out->informative.misc.TimePerVMGroupVBlank[k] = mode_lib->mp.TimePerVMGroupVBlank[k];
13244 out->informative.misc.TimePerVMGroupFlip[k] = mode_lib->mp.TimePerVMGroupFlip[k];
13245 out->informative.misc.TimePerVMRequestVBlank[k] = mode_lib->mp.TimePerVMRequestVBlank[k];
13246 out->informative.misc.TimePerVMRequestFlip[k] = mode_lib->mp.TimePerVMRequestFlip[k];
13247 out->informative.misc.Tdmdl_vm[k] = mode_lib->mp.Tdmdl_vm[k];
13248 out->informative.misc.Tdmdl[k] = mode_lib->mp.Tdmdl[k];
13249 out->informative.misc.VStartup[k] = mode_lib->mp.VStartup[k];
13250 out->informative.misc.VUpdateOffsetPix[k] = mode_lib->mp.VUpdateOffsetPix[k];
13251 out->informative.misc.VUpdateWidthPix[k] = mode_lib->mp.VUpdateWidthPix[k];
13252 out->informative.misc.VReadyOffsetPix[k] = mode_lib->mp.VReadyOffsetPix[k];
13253
13254 out->informative.misc.DST_Y_PER_PTE_ROW_NOM_L[k] = mode_lib->mp.DST_Y_PER_PTE_ROW_NOM_L[k];
13255 out->informative.misc.DST_Y_PER_PTE_ROW_NOM_C[k] = mode_lib->mp.DST_Y_PER_PTE_ROW_NOM_C[k];
13256 out->informative.misc.time_per_pte_group_nom_luma[k] = mode_lib->mp.time_per_pte_group_nom_luma[k];
13257 out->informative.misc.time_per_pte_group_nom_chroma[k] = mode_lib->mp.time_per_pte_group_nom_chroma[k];
13258 out->informative.misc.time_per_pte_group_vblank_luma[k] = mode_lib->mp.time_per_pte_group_vblank_luma[k];
13259 out->informative.misc.time_per_pte_group_vblank_chroma[k] = mode_lib->mp.time_per_pte_group_vblank_chroma[k];
13260 out->informative.misc.time_per_pte_group_flip_luma[k] = mode_lib->mp.time_per_pte_group_flip_luma[k];
13261 out->informative.misc.time_per_pte_group_flip_chroma[k] = mode_lib->mp.time_per_pte_group_flip_chroma[k];
13262 out->informative.misc.VRatioPrefetchY[k] = mode_lib->mp.VRatioPrefetchY[k];
13263 out->informative.misc.VRatioPrefetchC[k] = mode_lib->mp.VRatioPrefetchC[k];
13264 out->informative.misc.DestinationLinesForPrefetch[k] = mode_lib->mp.dst_y_prefetch[k];
13265 out->informative.misc.DestinationLinesToRequestVMInVBlank[k] = mode_lib->mp.dst_y_per_vm_vblank[k];
13266 out->informative.misc.DestinationLinesToRequestRowInVBlank[k] = mode_lib->mp.dst_y_per_row_vblank[k];
13267 out->informative.misc.DestinationLinesToRequestVMInImmediateFlip[k] = mode_lib->mp.dst_y_per_vm_flip[k];
13268 out->informative.misc.DestinationLinesToRequestRowInImmediateFlip[k] = mode_lib->mp.dst_y_per_row_flip[k];
13269 out->informative.misc.DisplayPipeLineDeliveryTimeLuma[k] = mode_lib->mp.DisplayPipeLineDeliveryTimeLuma[k];
13270 out->informative.misc.DisplayPipeLineDeliveryTimeChroma[k] = mode_lib->mp.DisplayPipeLineDeliveryTimeChroma[k];
13271 out->informative.misc.DisplayPipeLineDeliveryTimeLumaPrefetch[k] = mode_lib->mp.DisplayPipeLineDeliveryTimeLumaPrefetch[k];
13272 out->informative.misc.DisplayPipeLineDeliveryTimeChromaPrefetch[k] = mode_lib->mp.DisplayPipeLineDeliveryTimeChromaPrefetch[k];
13273
13274 out->informative.misc.WritebackRequiredBandwidth = mode_lib->scratch.dml_core_mode_programming_locals.TotalWRBandwidth / 1000.0;
13275 out->informative.misc.WritebackAllowDRAMClockChangeEndPosition[k] = mode_lib->mp.WritebackAllowDRAMClockChangeEndPosition[k];
13276 out->informative.misc.WritebackAllowFCLKChangeEndPosition[k] = mode_lib->mp.WritebackAllowFCLKChangeEndPosition[k];
13277 out->informative.misc.DSCCLK_calculated[k] = mode_lib->mp.DSCCLK[k];
13278 out->informative.misc.BIGK_FRAGMENT_SIZE[k] = mode_lib->mp.BIGK_FRAGMENT_SIZE[k];
13279 out->informative.misc.PTE_BUFFER_MODE[k] = mode_lib->mp.PTE_BUFFER_MODE[k];
13280 out->informative.misc.DSCDelay[k] = mode_lib->mp.DSCDelay[k];
13281 out->informative.misc.MaxActiveDRAMClockChangeLatencySupported[k] = mode_lib->mp.MaxActiveDRAMClockChangeLatencySupported[k];
13282
13283 if (mode_lib->mp.impacted_prefetch_margin_us[k] < out->informative.misc.LowestPrefetchMargin)
13284 out->informative.misc.LowestPrefetchMargin = mode_lib->mp.impacted_prefetch_margin_us[k];
13285 }
13286
13287 // For this DV informative layer, all pipes in the same planes will just use the same id
13288 // will have the optimization and helper layer later on
13289 // only work when we can have high "mcache" that fit everything without thrashing the cache
13290 for (k = 0; k < out->display_config.num_planes; k++) {
13291 out->informative.non_optimized_mcache_allocation[k].num_mcaches_plane0 = dml_get_plane_num_mcaches_plane0(mode_lib, k);
13292 out->informative.non_optimized_mcache_allocation[k].informative.meta_row_bytes_plane0 = dml_get_plane_mcache_row_bytes_plane0(mode_lib, k);
13293
13294 for (n = 0; n < out->informative.non_optimized_mcache_allocation[k].num_mcaches_plane0; n++) {
13295 out->informative.non_optimized_mcache_allocation[k].mcache_x_offsets_plane0[n] = dml_get_plane_array_mcache_offsets_plane0(mode_lib, k, n);
13296 out->informative.non_optimized_mcache_allocation[k].global_mcache_ids_plane0[n] = k;
13297 }
13298
13299 out->informative.non_optimized_mcache_allocation[k].num_mcaches_plane1 = dml_get_plane_num_mcaches_plane1(mode_lib, k);
13300 out->informative.non_optimized_mcache_allocation[k].informative.meta_row_bytes_plane1 = dml_get_plane_mcache_row_bytes_plane1(mode_lib, k);
13301
13302 for (n = 0; n < out->informative.non_optimized_mcache_allocation[k].num_mcaches_plane1; n++) {
13303 out->informative.non_optimized_mcache_allocation[k].mcache_x_offsets_plane1[n] = dml_get_plane_array_mcache_offsets_plane1(mode_lib, k, n);
13304 out->informative.non_optimized_mcache_allocation[k].global_mcache_ids_plane1[n] = k;
13305 }
13306 }
13307 out->informative.qos.max_non_urgent_latency_us = dml_get_max_non_urgent_latency_us(mode_lib);
13308
13309 if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn4x) {
13310 if (((mode_lib->ip.rob_buffer_size_kbytes - mode_lib->ip.pixel_chunk_size_kbytes) * 1024
13311 / mode_lib->ms.support.non_urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]) >= out->informative.qos.max_non_urgent_latency_us) {
13312 out->informative.misc.ROBUrgencyAvoidance = true;
13313 } else {
13314 out->informative.misc.ROBUrgencyAvoidance = false;
13315 }
13316 } else {
13317 out->informative.misc.ROBUrgencyAvoidance = true;
13318 }
13319 }
13320