1 /*
2 * Copyright 2020 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Authors: AMD
23 *
24 */
25
26 #include "dc.h"
27 #include "../display_mode_lib.h"
28 #include "display_mode_vba_30.h"
29 #include "../dml_inline_defs.h"
30
31
32 /*
33 * NOTE:
34 * This file is gcc-parsable HW gospel, coming straight from HW engineers.
35 *
36 * It doesn't adhere to Linux kernel style and sometimes will do things in odd
37 * ways. Unless there is something clearly wrong with it the code should
38 * remain as-is as it provides us with a guarantee from HW that it is correct.
39 */
40
41
42 typedef struct {
43 double DPPCLK;
44 double DISPCLK;
45 double PixelClock;
46 double DCFCLKDeepSleep;
47 unsigned int DPPPerPlane;
48 bool ScalerEnabled;
49 enum scan_direction_class SourceScan;
50 unsigned int BlockWidth256BytesY;
51 unsigned int BlockHeight256BytesY;
52 unsigned int BlockWidth256BytesC;
53 unsigned int BlockHeight256BytesC;
54 unsigned int InterlaceEnable;
55 unsigned int NumberOfCursors;
56 unsigned int VBlank;
57 unsigned int HTotal;
58 unsigned int DCCEnable;
59 bool ODMCombineEnabled;
60 } Pipe;
61
62 #define BPP_INVALID 0
63 #define BPP_BLENDED_PIPE 0xffffffff
64 #define DCN30_MAX_DSC_IMAGE_WIDTH 5184
65 #define DCN30_MAX_FMT_420_BUFFER_WIDTH 4096
66
67 static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib);
68 static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(
69 struct display_mode_lib *mode_lib);
70 static unsigned int dscceComputeDelay(
71 unsigned int bpc,
72 double BPP,
73 unsigned int sliceWidth,
74 unsigned int numSlices,
75 enum output_format_class pixelFormat,
76 enum output_encoder_class Output);
77 static unsigned int dscComputeDelay(
78 enum output_format_class pixelFormat,
79 enum output_encoder_class Output);
80 // Super monster function with some 45 argument
81 static bool CalculatePrefetchSchedule(
82 struct display_mode_lib *mode_lib,
83 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
84 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
85 Pipe *myPipe,
86 unsigned int DSCDelay,
87 double DPPCLKDelaySubtotalPlusCNVCFormater,
88 double DPPCLKDelaySCL,
89 double DPPCLKDelaySCLLBOnly,
90 double DPPCLKDelayCNVCCursor,
91 double DISPCLKDelaySubtotal,
92 unsigned int DPP_RECOUT_WIDTH,
93 enum output_format_class OutputFormat,
94 unsigned int MaxInterDCNTileRepeaters,
95 unsigned int VStartup,
96 unsigned int MaxVStartup,
97 unsigned int GPUVMPageTableLevels,
98 bool GPUVMEnable,
99 bool HostVMEnable,
100 unsigned int HostVMMaxNonCachedPageTableLevels,
101 double HostVMMinPageSize,
102 bool DynamicMetadataEnable,
103 bool DynamicMetadataVMEnabled,
104 int DynamicMetadataLinesBeforeActiveRequired,
105 unsigned int DynamicMetadataTransmittedBytes,
106 double UrgentLatency,
107 double UrgentExtraLatency,
108 double TCalc,
109 unsigned int PDEAndMetaPTEBytesFrame,
110 unsigned int MetaRowByte,
111 unsigned int PixelPTEBytesPerRow,
112 double PrefetchSourceLinesY,
113 unsigned int SwathWidthY,
114 int BytePerPixelY,
115 double VInitPreFillY,
116 unsigned int MaxNumSwathY,
117 double PrefetchSourceLinesC,
118 unsigned int SwathWidthC,
119 int BytePerPixelC,
120 double VInitPreFillC,
121 unsigned int MaxNumSwathC,
122 long swath_width_luma_ub,
123 long swath_width_chroma_ub,
124 unsigned int SwathHeightY,
125 unsigned int SwathHeightC,
126 double TWait,
127 bool ProgressiveToInterlaceUnitInOPP,
128 double *DSTXAfterScaler,
129 double *DSTYAfterScaler,
130 double *DestinationLinesForPrefetch,
131 double *PrefetchBandwidth,
132 double *DestinationLinesToRequestVMInVBlank,
133 double *DestinationLinesToRequestRowInVBlank,
134 double *VRatioPrefetchY,
135 double *VRatioPrefetchC,
136 double *RequiredPrefetchPixDataBWLuma,
137 double *RequiredPrefetchPixDataBWChroma,
138 bool *NotEnoughTimeForDynamicMetadata,
139 double *Tno_bw,
140 double *prefetch_vmrow_bw,
141 double *Tdmdl_vm,
142 double *Tdmdl,
143 unsigned int *VUpdateOffsetPix,
144 double *VUpdateWidthPix,
145 double *VReadyOffsetPix);
146 static double RoundToDFSGranularityUp(double Clock, double VCOSpeed);
147 static double RoundToDFSGranularityDown(double Clock, double VCOSpeed);
148 static void CalculateDCCConfiguration(
149 bool DCCEnabled,
150 bool DCCProgrammingAssumesScanDirectionUnknown,
151 enum source_format_class SourcePixelFormat,
152 unsigned int ViewportWidthLuma,
153 unsigned int ViewportWidthChroma,
154 unsigned int ViewportHeightLuma,
155 unsigned int ViewportHeightChroma,
156 double DETBufferSize,
157 unsigned int RequestHeight256ByteLuma,
158 unsigned int RequestHeight256ByteChroma,
159 enum dm_swizzle_mode TilingFormat,
160 unsigned int BytePerPixelY,
161 unsigned int BytePerPixelC,
162 double BytePerPixelDETY,
163 double BytePerPixelDETC,
164 enum scan_direction_class ScanOrientation,
165 unsigned int *MaxUncompressedBlockLuma,
166 unsigned int *MaxUncompressedBlockChroma,
167 unsigned int *MaxCompressedBlockLuma,
168 unsigned int *MaxCompressedBlockChroma,
169 unsigned int *IndependentBlockLuma,
170 unsigned int *IndependentBlockChroma);
171 static double CalculatePrefetchSourceLines(
172 struct display_mode_lib *mode_lib,
173 double VRatio,
174 double vtaps,
175 bool Interlace,
176 bool ProgressiveToInterlaceUnitInOPP,
177 unsigned int SwathHeight,
178 unsigned int ViewportYStart,
179 double *VInitPreFill,
180 unsigned int *MaxNumSwath);
181 static unsigned int CalculateVMAndRowBytes(
182 struct display_mode_lib *mode_lib,
183 bool DCCEnable,
184 unsigned int BlockHeight256Bytes,
185 unsigned int BlockWidth256Bytes,
186 enum source_format_class SourcePixelFormat,
187 unsigned int SurfaceTiling,
188 unsigned int BytePerPixel,
189 enum scan_direction_class ScanDirection,
190 unsigned int SwathWidth,
191 unsigned int ViewportHeight,
192 bool GPUVMEnable,
193 bool HostVMEnable,
194 unsigned int HostVMMaxNonCachedPageTableLevels,
195 unsigned int GPUVMMinPageSize,
196 unsigned int HostVMMinPageSize,
197 unsigned int PTEBufferSizeInRequests,
198 unsigned int Pitch,
199 unsigned int DCCMetaPitch,
200 unsigned int *MacroTileWidth,
201 unsigned int *MetaRowByte,
202 unsigned int *PixelPTEBytesPerRow,
203 bool *PTEBufferSizeNotExceeded,
204 unsigned int *dpte_row_width_ub,
205 unsigned int *dpte_row_height,
206 unsigned int *MetaRequestWidth,
207 unsigned int *MetaRequestHeight,
208 unsigned int *meta_row_width,
209 unsigned int *meta_row_height,
210 unsigned int *vm_group_bytes,
211 unsigned int *dpte_group_bytes,
212 unsigned int *PixelPTEReqWidth,
213 unsigned int *PixelPTEReqHeight,
214 unsigned int *PTERequestSize,
215 unsigned int *DPDE0BytesFrame,
216 unsigned int *MetaPTEBytesFrame);
217 static double CalculateTWait(
218 unsigned int PrefetchMode,
219 double DRAMClockChangeLatency,
220 double UrgentLatency,
221 double SREnterPlusExitTime);
222 static void CalculateRowBandwidth(
223 bool GPUVMEnable,
224 enum source_format_class SourcePixelFormat,
225 double VRatio,
226 double VRatioChroma,
227 bool DCCEnable,
228 double LineTime,
229 unsigned int MetaRowByteLuma,
230 unsigned int MetaRowByteChroma,
231 unsigned int meta_row_height_luma,
232 unsigned int meta_row_height_chroma,
233 unsigned int PixelPTEBytesPerRowLuma,
234 unsigned int PixelPTEBytesPerRowChroma,
235 unsigned int dpte_row_height_luma,
236 unsigned int dpte_row_height_chroma,
237 double *meta_row_bw,
238 double *dpte_row_bw);
239 static void CalculateFlipSchedule(
240 struct display_mode_lib *mode_lib,
241 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
242 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
243 double UrgentExtraLatency,
244 double UrgentLatency,
245 unsigned int GPUVMMaxPageTableLevels,
246 bool HostVMEnable,
247 unsigned int HostVMMaxNonCachedPageTableLevels,
248 bool GPUVMEnable,
249 double HostVMMinPageSize,
250 double PDEAndMetaPTEBytesPerFrame,
251 double MetaRowBytes,
252 double DPTEBytesPerRow,
253 double BandwidthAvailableForImmediateFlip,
254 unsigned int TotImmediateFlipBytes,
255 enum source_format_class SourcePixelFormat,
256 double LineTime,
257 double VRatio,
258 double VRatioChroma,
259 double Tno_bw,
260 bool DCCEnable,
261 unsigned int dpte_row_height,
262 unsigned int meta_row_height,
263 unsigned int dpte_row_height_chroma,
264 unsigned int meta_row_height_chroma,
265 double *DestinationLinesToRequestVMInImmediateFlip,
266 double *DestinationLinesToRequestRowInImmediateFlip,
267 double *final_flip_bw,
268 bool *ImmediateFlipSupportedForPipe);
269 static double CalculateWriteBackDelay(
270 enum source_format_class WritebackPixelFormat,
271 double WritebackHRatio,
272 double WritebackVRatio,
273 unsigned int WritebackVTaps,
274 long WritebackDestinationWidth,
275 long WritebackDestinationHeight,
276 long WritebackSourceHeight,
277 unsigned int HTotal);
278 static void CalculateDynamicMetadataParameters(
279 int MaxInterDCNTileRepeaters,
280 double DPPCLK,
281 double DISPCLK,
282 double DCFClkDeepSleep,
283 double PixelClock,
284 unsigned int HTotal,
285 unsigned int VBlank,
286 unsigned int DynamicMetadataTransmittedBytes,
287 int DynamicMetadataLinesBeforeActiveRequired,
288 int InterlaceEnable,
289 bool ProgressiveToInterlaceUnitInOPP,
290 double *Tsetup,
291 double *Tdmbf,
292 double *Tdmec,
293 double *Tdmsks);
294 static void CalculateWatermarksAndDRAMSpeedChangeSupport(
295 struct display_mode_lib *mode_lib,
296 unsigned int PrefetchMode,
297 unsigned int NumberOfActivePlanes,
298 unsigned int MaxLineBufferLines,
299 unsigned int LineBufferSize,
300 unsigned int DPPOutputBufferPixels,
301 unsigned int DETBufferSizeInKByte,
302 unsigned int WritebackInterfaceBufferSize,
303 double DCFCLK,
304 double ReturnBW,
305 bool GPUVMEnable,
306 unsigned int dpte_group_bytes[],
307 unsigned int MetaChunkSize,
308 double UrgentLatency,
309 double ExtraLatency,
310 double WritebackLatency,
311 double WritebackChunkSize,
312 double SOCCLK,
313 double DRAMClockChangeLatency,
314 double SRExitTime,
315 double SREnterPlusExitTime,
316 double DCFCLKDeepSleep,
317 unsigned int DPPPerPlane[],
318 bool DCCEnable[],
319 double DPPCLK[],
320 unsigned int DETBufferSizeY[],
321 unsigned int DETBufferSizeC[],
322 unsigned int SwathHeightY[],
323 unsigned int SwathHeightC[],
324 unsigned int LBBitPerPixel[],
325 double SwathWidthY[],
326 double SwathWidthC[],
327 double HRatio[],
328 double HRatioChroma[],
329 unsigned int vtaps[],
330 unsigned int VTAPsChroma[],
331 double VRatio[],
332 double VRatioChroma[],
333 unsigned int HTotal[],
334 double PixelClock[],
335 unsigned int BlendingAndTiming[],
336 double BytePerPixelDETY[],
337 double BytePerPixelDETC[],
338 double DSTXAfterScaler[],
339 double DSTYAfterScaler[],
340 bool WritebackEnable[],
341 enum source_format_class WritebackPixelFormat[],
342 double WritebackDestinationWidth[],
343 double WritebackDestinationHeight[],
344 double WritebackSourceHeight[],
345 enum clock_change_support *DRAMClockChangeSupport,
346 double *UrgentWatermark,
347 double *WritebackUrgentWatermark,
348 double *DRAMClockChangeWatermark,
349 double *WritebackDRAMClockChangeWatermark,
350 double *StutterExitWatermark,
351 double *StutterEnterPlusExitWatermark,
352 double *MinActiveDRAMClockChangeLatencySupported);
353 static void CalculateDCFCLKDeepSleep(
354 struct display_mode_lib *mode_lib,
355 unsigned int NumberOfActivePlanes,
356 int BytePerPixelY[],
357 int BytePerPixelC[],
358 double VRatio[],
359 double VRatioChroma[],
360 double SwathWidthY[],
361 double SwathWidthC[],
362 unsigned int DPPPerPlane[],
363 double HRatio[],
364 double HRatioChroma[],
365 double PixelClock[],
366 double PSCL_THROUGHPUT[],
367 double PSCL_THROUGHPUT_CHROMA[],
368 double DPPCLK[],
369 double ReadBandwidthLuma[],
370 double ReadBandwidthChroma[],
371 int ReturnBusWidth,
372 double *DCFCLKDeepSleep);
373 static void CalculateUrgentBurstFactor(
374 long swath_width_luma_ub,
375 long swath_width_chroma_ub,
376 unsigned int DETBufferSizeInKByte,
377 unsigned int SwathHeightY,
378 unsigned int SwathHeightC,
379 double LineTime,
380 double UrgentLatency,
381 double CursorBufferSize,
382 unsigned int CursorWidth,
383 unsigned int CursorBPP,
384 double VRatio,
385 double VRatioC,
386 double BytePerPixelInDETY,
387 double BytePerPixelInDETC,
388 double DETBufferSizeY,
389 double DETBufferSizeC,
390 double *UrgentBurstFactorCursor,
391 double *UrgentBurstFactorLuma,
392 double *UrgentBurstFactorChroma,
393 bool *NotEnoughUrgentLatencyHiding);
394
395 static void UseMinimumDCFCLK(
396 struct display_mode_lib *mode_lib,
397 struct vba_vars_st *v,
398 int MaxPrefetchMode,
399 int ReorderingBytes);
400
401 static void CalculatePixelDeliveryTimes(
402 unsigned int NumberOfActivePlanes,
403 double VRatio[],
404 double VRatioChroma[],
405 double VRatioPrefetchY[],
406 double VRatioPrefetchC[],
407 unsigned int swath_width_luma_ub[],
408 unsigned int swath_width_chroma_ub[],
409 unsigned int DPPPerPlane[],
410 double HRatio[],
411 double HRatioChroma[],
412 double PixelClock[],
413 double PSCL_THROUGHPUT[],
414 double PSCL_THROUGHPUT_CHROMA[],
415 double DPPCLK[],
416 int BytePerPixelC[],
417 enum scan_direction_class SourceScan[],
418 unsigned int NumberOfCursors[],
419 unsigned int CursorWidth[][2],
420 unsigned int CursorBPP[][2],
421 unsigned int BlockWidth256BytesY[],
422 unsigned int BlockHeight256BytesY[],
423 unsigned int BlockWidth256BytesC[],
424 unsigned int BlockHeight256BytesC[],
425 double DisplayPipeLineDeliveryTimeLuma[],
426 double DisplayPipeLineDeliveryTimeChroma[],
427 double DisplayPipeLineDeliveryTimeLumaPrefetch[],
428 double DisplayPipeLineDeliveryTimeChromaPrefetch[],
429 double DisplayPipeRequestDeliveryTimeLuma[],
430 double DisplayPipeRequestDeliveryTimeChroma[],
431 double DisplayPipeRequestDeliveryTimeLumaPrefetch[],
432 double DisplayPipeRequestDeliveryTimeChromaPrefetch[],
433 double CursorRequestDeliveryTime[],
434 double CursorRequestDeliveryTimePrefetch[]);
435
436 static void CalculateMetaAndPTETimes(
437 int NumberOfActivePlanes,
438 bool GPUVMEnable,
439 int MetaChunkSize,
440 int MinMetaChunkSizeBytes,
441 int HTotal[],
442 double VRatio[],
443 double VRatioChroma[],
444 double DestinationLinesToRequestRowInVBlank[],
445 double DestinationLinesToRequestRowInImmediateFlip[],
446 bool DCCEnable[],
447 double PixelClock[],
448 int BytePerPixelY[],
449 int BytePerPixelC[],
450 enum scan_direction_class SourceScan[],
451 int dpte_row_height[],
452 int dpte_row_height_chroma[],
453 int meta_row_width[],
454 int meta_row_width_chroma[],
455 int meta_row_height[],
456 int meta_row_height_chroma[],
457 int meta_req_width[],
458 int meta_req_width_chroma[],
459 int meta_req_height[],
460 int meta_req_height_chroma[],
461 int dpte_group_bytes[],
462 int PTERequestSizeY[],
463 int PTERequestSizeC[],
464 int PixelPTEReqWidthY[],
465 int PixelPTEReqHeightY[],
466 int PixelPTEReqWidthC[],
467 int PixelPTEReqHeightC[],
468 int dpte_row_width_luma_ub[],
469 int dpte_row_width_chroma_ub[],
470 double DST_Y_PER_PTE_ROW_NOM_L[],
471 double DST_Y_PER_PTE_ROW_NOM_C[],
472 double DST_Y_PER_META_ROW_NOM_L[],
473 double DST_Y_PER_META_ROW_NOM_C[],
474 double TimePerMetaChunkNominal[],
475 double TimePerChromaMetaChunkNominal[],
476 double TimePerMetaChunkVBlank[],
477 double TimePerChromaMetaChunkVBlank[],
478 double TimePerMetaChunkFlip[],
479 double TimePerChromaMetaChunkFlip[],
480 double time_per_pte_group_nom_luma[],
481 double time_per_pte_group_vblank_luma[],
482 double time_per_pte_group_flip_luma[],
483 double time_per_pte_group_nom_chroma[],
484 double time_per_pte_group_vblank_chroma[],
485 double time_per_pte_group_flip_chroma[]);
486
487 static void CalculateVMGroupAndRequestTimes(
488 unsigned int NumberOfActivePlanes,
489 bool GPUVMEnable,
490 unsigned int GPUVMMaxPageTableLevels,
491 unsigned int HTotal[],
492 int BytePerPixelC[],
493 double DestinationLinesToRequestVMInVBlank[],
494 double DestinationLinesToRequestVMInImmediateFlip[],
495 bool DCCEnable[],
496 double PixelClock[],
497 int dpte_row_width_luma_ub[],
498 int dpte_row_width_chroma_ub[],
499 int vm_group_bytes[],
500 unsigned int dpde0_bytes_per_frame_ub_l[],
501 unsigned int dpde0_bytes_per_frame_ub_c[],
502 int meta_pte_bytes_per_frame_ub_l[],
503 int meta_pte_bytes_per_frame_ub_c[],
504 double TimePerVMGroupVBlank[],
505 double TimePerVMGroupFlip[],
506 double TimePerVMRequestVBlank[],
507 double TimePerVMRequestFlip[]);
508
509 static void CalculateStutterEfficiency(
510 int NumberOfActivePlanes,
511 long ROBBufferSizeInKByte,
512 double TotalDataReadBandwidth,
513 double DCFCLK,
514 double ReturnBW,
515 double SRExitTime,
516 bool SynchronizedVBlank,
517 int DPPPerPlane[],
518 unsigned int DETBufferSizeY[],
519 int BytePerPixelY[],
520 double BytePerPixelDETY[],
521 double SwathWidthY[],
522 int SwathHeightY[],
523 int SwathHeightC[],
524 double DCCRateLuma[],
525 double DCCRateChroma[],
526 int HTotal[],
527 int VTotal[],
528 double PixelClock[],
529 double VRatio[],
530 enum scan_direction_class SourceScan[],
531 int BlockHeight256BytesY[],
532 int BlockWidth256BytesY[],
533 int BlockHeight256BytesC[],
534 int BlockWidth256BytesC[],
535 int DCCYMaxUncompressedBlock[],
536 int DCCCMaxUncompressedBlock[],
537 int VActive[],
538 bool DCCEnable[],
539 bool WritebackEnable[],
540 double ReadBandwidthPlaneLuma[],
541 double ReadBandwidthPlaneChroma[],
542 double meta_row_bw[],
543 double dpte_row_bw[],
544 double *StutterEfficiencyNotIncludingVBlank,
545 double *StutterEfficiency,
546 double *StutterPeriodOut);
547
548 static void CalculateSwathAndDETConfiguration(
549 bool ForceSingleDPP,
550 int NumberOfActivePlanes,
551 unsigned int DETBufferSizeInKByte,
552 double MaximumSwathWidthLuma[],
553 double MaximumSwathWidthChroma[],
554 enum scan_direction_class SourceScan[],
555 enum source_format_class SourcePixelFormat[],
556 enum dm_swizzle_mode SurfaceTiling[],
557 int ViewportWidth[],
558 int ViewportHeight[],
559 int SurfaceWidthY[],
560 int SurfaceWidthC[],
561 int SurfaceHeightY[],
562 int SurfaceHeightC[],
563 int Read256BytesBlockHeightY[],
564 int Read256BytesBlockHeightC[],
565 int Read256BytesBlockWidthY[],
566 int Read256BytesBlockWidthC[],
567 enum odm_combine_mode ODMCombineEnabled[],
568 int BlendingAndTiming[],
569 int BytePerPixY[],
570 int BytePerPixC[],
571 double BytePerPixDETY[],
572 double BytePerPixDETC[],
573 int HActive[],
574 double HRatio[],
575 double HRatioChroma[],
576 int DPPPerPlane[],
577 int swath_width_luma_ub[],
578 int swath_width_chroma_ub[],
579 double SwathWidth[],
580 double SwathWidthChroma[],
581 int SwathHeightY[],
582 int SwathHeightC[],
583 unsigned int DETBufferSizeY[],
584 unsigned int DETBufferSizeC[],
585 bool ViewportSizeSupportPerPlane[],
586 bool *ViewportSizeSupport);
587 static void CalculateSwathWidth(
588 bool ForceSingleDPP,
589 int NumberOfActivePlanes,
590 enum source_format_class SourcePixelFormat[],
591 enum scan_direction_class SourceScan[],
592 unsigned int ViewportWidth[],
593 unsigned int ViewportHeight[],
594 unsigned int SurfaceWidthY[],
595 unsigned int SurfaceWidthC[],
596 unsigned int SurfaceHeightY[],
597 unsigned int SurfaceHeightC[],
598 enum odm_combine_mode ODMCombineEnabled[],
599 int BytePerPixY[],
600 int BytePerPixC[],
601 int Read256BytesBlockHeightY[],
602 int Read256BytesBlockHeightC[],
603 int Read256BytesBlockWidthY[],
604 int Read256BytesBlockWidthC[],
605 int BlendingAndTiming[],
606 unsigned int HActive[],
607 double HRatio[],
608 int DPPPerPlane[],
609 double SwathWidthSingleDPPY[],
610 double SwathWidthSingleDPPC[],
611 double SwathWidthY[],
612 double SwathWidthC[],
613 int MaximumSwathHeightY[],
614 int MaximumSwathHeightC[],
615 unsigned int swath_width_luma_ub[],
616 unsigned int swath_width_chroma_ub[]);
617 static double CalculateExtraLatency(
618 long RoundTripPingLatencyCycles,
619 long ReorderingBytes,
620 double DCFCLK,
621 int TotalNumberOfActiveDPP,
622 int PixelChunkSizeInKByte,
623 int TotalNumberOfDCCActiveDPP,
624 int MetaChunkSize,
625 double ReturnBW,
626 bool GPUVMEnable,
627 bool HostVMEnable,
628 int NumberOfActivePlanes,
629 int NumberOfDPP[],
630 int dpte_group_bytes[],
631 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
632 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
633 double HostVMMinPageSize,
634 int HostVMMaxNonCachedPageTableLevels);
635 static double CalculateExtraLatencyBytes(
636 long ReorderingBytes,
637 int TotalNumberOfActiveDPP,
638 int PixelChunkSizeInKByte,
639 int TotalNumberOfDCCActiveDPP,
640 int MetaChunkSize,
641 bool GPUVMEnable,
642 bool HostVMEnable,
643 int NumberOfActivePlanes,
644 int NumberOfDPP[],
645 int dpte_group_bytes[],
646 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
647 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
648 double HostVMMinPageSize,
649 int HostVMMaxNonCachedPageTableLevels);
650 static double CalculateUrgentLatency(
651 double UrgentLatencyPixelDataOnly,
652 double UrgentLatencyPixelMixedWithVMData,
653 double UrgentLatencyVMDataOnly,
654 bool DoUrgentLatencyAdjustment,
655 double UrgentLatencyAdjustmentFabricClockComponent,
656 double UrgentLatencyAdjustmentFabricClockReference,
657 double FabricClockSingle);
658
dml30_recalculate(struct display_mode_lib * mode_lib)659 void dml30_recalculate(struct display_mode_lib *mode_lib)
660 {
661 ModeSupportAndSystemConfiguration(mode_lib);
662 PixelClockAdjustmentForProgressiveToInterlaceUnit(mode_lib);
663 DisplayPipeConfiguration(mode_lib);
664 DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(mode_lib);
665 }
666
dscceComputeDelay(unsigned int bpc,double BPP,unsigned int sliceWidth,unsigned int numSlices,enum output_format_class pixelFormat,enum output_encoder_class Output)667 static unsigned int dscceComputeDelay(
668 unsigned int bpc,
669 double BPP,
670 unsigned int sliceWidth,
671 unsigned int numSlices,
672 enum output_format_class pixelFormat,
673 enum output_encoder_class Output)
674 {
675 // valid bpc = source bits per component in the set of {8, 10, 12}
676 // valid bpp = increments of 1/16 of a bit
677 // min = 6/7/8 in N420/N422/444, respectively
678 // max = such that compression is 1:1
679 //valid sliceWidth = number of pixels per slice line, must be less than or equal to 5184/numSlices (or 4096/numSlices in 420 mode)
680 //valid numSlices = number of slices in the horiziontal direction per DSC engine in the set of {1, 2, 3, 4}
681 //valid pixelFormat = pixel/color format in the set of {:N444_RGB, :S422, :N422, :N420}
682
683 // fixed value
684 unsigned int rcModelSize = 8192;
685
686 // N422/N420 operate at 2 pixels per clock
687 unsigned int pixelsPerClock, lstall, D, initalXmitDelay, w, s, ix, wx, P, l0, a, ax, L,
688 Delay, pixels;
689
690 if (pixelFormat == dm_420)
691 pixelsPerClock = 2;
692 // #all other modes operate at 1 pixel per clock
693 else if (pixelFormat == dm_444)
694 pixelsPerClock = 1;
695 else if (pixelFormat == dm_n422)
696 pixelsPerClock = 2;
697 else
698 pixelsPerClock = 1;
699
700 //initial transmit delay as per PPS
701 initalXmitDelay = dml_round(rcModelSize / 2.0 / BPP / pixelsPerClock);
702
703 //compute ssm delay
704 if (bpc == 8)
705 D = 81;
706 else if (bpc == 10)
707 D = 89;
708 else
709 D = 113;
710
711 //divide by pixel per cycle to compute slice width as seen by DSC
712 w = sliceWidth / pixelsPerClock;
713
714 //422 mode has an additional cycle of delay
715 if (pixelFormat == dm_420 || pixelFormat == dm_444 || pixelFormat == dm_n422)
716 s = 0;
717 else
718 s = 1;
719
720 //main calculation for the dscce
721 ix = initalXmitDelay + 45;
722 wx = (w + 2) / 3;
723 P = 3 * wx - w;
724 l0 = ix / w;
725 a = ix + P * l0;
726 ax = (a + 2) / 3 + D + 6 + 1;
727 L = (ax + wx - 1) / wx;
728 if ((ix % w) == 0 && P != 0)
729 lstall = 1;
730 else
731 lstall = 0;
732 Delay = L * wx * (numSlices - 1) + ax + s + lstall + 22;
733
734 //dsc processes 3 pixel containers per cycle and a container can contain 1 or 2 pixels
735 pixels = Delay * 3 * pixelsPerClock;
736 return pixels;
737 }
738
dscComputeDelay(enum output_format_class pixelFormat,enum output_encoder_class Output)739 static unsigned int dscComputeDelay(enum output_format_class pixelFormat, enum output_encoder_class Output)
740 {
741 unsigned int Delay = 0;
742
743 if (pixelFormat == dm_420) {
744 // sfr
745 Delay = Delay + 2;
746 // dsccif
747 Delay = Delay + 0;
748 // dscc - input deserializer
749 Delay = Delay + 3;
750 // dscc gets pixels every other cycle
751 Delay = Delay + 2;
752 // dscc - input cdc fifo
753 Delay = Delay + 12;
754 // dscc gets pixels every other cycle
755 Delay = Delay + 13;
756 // dscc - cdc uncertainty
757 Delay = Delay + 2;
758 // dscc - output cdc fifo
759 Delay = Delay + 7;
760 // dscc gets pixels every other cycle
761 Delay = Delay + 3;
762 // dscc - cdc uncertainty
763 Delay = Delay + 2;
764 // dscc - output serializer
765 Delay = Delay + 1;
766 // sft
767 Delay = Delay + 1;
768 } else if (pixelFormat == dm_n422) {
769 // sfr
770 Delay = Delay + 2;
771 // dsccif
772 Delay = Delay + 1;
773 // dscc - input deserializer
774 Delay = Delay + 5;
775 // dscc - input cdc fifo
776 Delay = Delay + 25;
777 // dscc - cdc uncertainty
778 Delay = Delay + 2;
779 // dscc - output cdc fifo
780 Delay = Delay + 10;
781 // dscc - cdc uncertainty
782 Delay = Delay + 2;
783 // dscc - output serializer
784 Delay = Delay + 1;
785 // sft
786 Delay = Delay + 1;
787 } else {
788 // sfr
789 Delay = Delay + 2;
790 // dsccif
791 Delay = Delay + 0;
792 // dscc - input deserializer
793 Delay = Delay + 3;
794 // dscc - input cdc fifo
795 Delay = Delay + 12;
796 // dscc - cdc uncertainty
797 Delay = Delay + 2;
798 // dscc - output cdc fifo
799 Delay = Delay + 7;
800 // dscc - output serializer
801 Delay = Delay + 1;
802 // dscc - cdc uncertainty
803 Delay = Delay + 2;
804 // sft
805 Delay = Delay + 1;
806 }
807
808 return Delay;
809 }
810
CalculatePrefetchSchedule(struct display_mode_lib * mode_lib,double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,Pipe * myPipe,unsigned int DSCDelay,double DPPCLKDelaySubtotalPlusCNVCFormater,double DPPCLKDelaySCL,double DPPCLKDelaySCLLBOnly,double DPPCLKDelayCNVCCursor,double DISPCLKDelaySubtotal,unsigned int DPP_RECOUT_WIDTH,enum output_format_class OutputFormat,unsigned int MaxInterDCNTileRepeaters,unsigned int VStartup,unsigned int MaxVStartup,unsigned int GPUVMPageTableLevels,bool GPUVMEnable,bool HostVMEnable,unsigned int HostVMMaxNonCachedPageTableLevels,double HostVMMinPageSize,bool DynamicMetadataEnable,bool DynamicMetadataVMEnabled,int DynamicMetadataLinesBeforeActiveRequired,unsigned int DynamicMetadataTransmittedBytes,double UrgentLatency,double UrgentExtraLatency,double TCalc,unsigned int PDEAndMetaPTEBytesFrame,unsigned int MetaRowByte,unsigned int PixelPTEBytesPerRow,double PrefetchSourceLinesY,unsigned int SwathWidthY,int BytePerPixelY,double VInitPreFillY,unsigned int MaxNumSwathY,double PrefetchSourceLinesC,unsigned int SwathWidthC,int BytePerPixelC,double VInitPreFillC,unsigned int MaxNumSwathC,long swath_width_luma_ub,long swath_width_chroma_ub,unsigned int SwathHeightY,unsigned int SwathHeightC,double TWait,bool ProgressiveToInterlaceUnitInOPP,double * DSTXAfterScaler,double * DSTYAfterScaler,double * DestinationLinesForPrefetch,double * PrefetchBandwidth,double * DestinationLinesToRequestVMInVBlank,double * DestinationLinesToRequestRowInVBlank,double * VRatioPrefetchY,double * VRatioPrefetchC,double * RequiredPrefetchPixDataBWLuma,double * RequiredPrefetchPixDataBWChroma,bool * NotEnoughTimeForDynamicMetadata,double * Tno_bw,double * prefetch_vmrow_bw,double * Tdmdl_vm,double * Tdmdl,unsigned int * VUpdateOffsetPix,double * VUpdateWidthPix,double * VReadyOffsetPix)811 static bool CalculatePrefetchSchedule(
812 struct display_mode_lib *mode_lib,
813 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
814 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
815 Pipe *myPipe,
816 unsigned int DSCDelay,
817 double DPPCLKDelaySubtotalPlusCNVCFormater,
818 double DPPCLKDelaySCL,
819 double DPPCLKDelaySCLLBOnly,
820 double DPPCLKDelayCNVCCursor,
821 double DISPCLKDelaySubtotal,
822 unsigned int DPP_RECOUT_WIDTH,
823 enum output_format_class OutputFormat,
824 unsigned int MaxInterDCNTileRepeaters,
825 unsigned int VStartup,
826 unsigned int MaxVStartup,
827 unsigned int GPUVMPageTableLevels,
828 bool GPUVMEnable,
829 bool HostVMEnable,
830 unsigned int HostVMMaxNonCachedPageTableLevels,
831 double HostVMMinPageSize,
832 bool DynamicMetadataEnable,
833 bool DynamicMetadataVMEnabled,
834 int DynamicMetadataLinesBeforeActiveRequired,
835 unsigned int DynamicMetadataTransmittedBytes,
836 double UrgentLatency,
837 double UrgentExtraLatency,
838 double TCalc,
839 unsigned int PDEAndMetaPTEBytesFrame,
840 unsigned int MetaRowByte,
841 unsigned int PixelPTEBytesPerRow,
842 double PrefetchSourceLinesY,
843 unsigned int SwathWidthY,
844 int BytePerPixelY,
845 double VInitPreFillY,
846 unsigned int MaxNumSwathY,
847 double PrefetchSourceLinesC,
848 unsigned int SwathWidthC,
849 int BytePerPixelC,
850 double VInitPreFillC,
851 unsigned int MaxNumSwathC,
852 long swath_width_luma_ub,
853 long swath_width_chroma_ub,
854 unsigned int SwathHeightY,
855 unsigned int SwathHeightC,
856 double TWait,
857 bool ProgressiveToInterlaceUnitInOPP,
858 double *DSTXAfterScaler,
859 double *DSTYAfterScaler,
860 double *DestinationLinesForPrefetch,
861 double *PrefetchBandwidth,
862 double *DestinationLinesToRequestVMInVBlank,
863 double *DestinationLinesToRequestRowInVBlank,
864 double *VRatioPrefetchY,
865 double *VRatioPrefetchC,
866 double *RequiredPrefetchPixDataBWLuma,
867 double *RequiredPrefetchPixDataBWChroma,
868 bool *NotEnoughTimeForDynamicMetadata,
869 double *Tno_bw,
870 double *prefetch_vmrow_bw,
871 double *Tdmdl_vm,
872 double *Tdmdl,
873 unsigned int *VUpdateOffsetPix,
874 double *VUpdateWidthPix,
875 double *VReadyOffsetPix)
876 {
877 bool MyError = false;
878 unsigned int DPPCycles = 0, DISPCLKCycles = 0;
879 double DSTTotalPixelsAfterScaler = 0;
880 double LineTime = 0, Tsetup = 0;
881 double dst_y_prefetch_equ = 0;
882 double Tsw_oto = 0;
883 double prefetch_bw_oto = 0;
884 double Tvm_oto = 0;
885 double Tr0_oto = 0;
886 double Tvm_oto_lines = 0;
887 double Tr0_oto_lines = 0;
888 double dst_y_prefetch_oto = 0;
889 double TimeForFetchingMetaPTE = 0;
890 double TimeForFetchingRowInVBlank = 0;
891 double LinesToRequestPrefetchPixelData = 0;
892 double HostVMInefficiencyFactor = 0;
893 unsigned int HostVMDynamicLevelsTrips = 0;
894 double trip_to_mem = 0;
895 double Tvm_trips = 0;
896 double Tr0_trips = 0;
897 double Tvm_trips_rounded = 0;
898 double Tr0_trips_rounded = 0;
899 double Lsw_oto = 0;
900 double Tpre_rounded = 0;
901 double prefetch_bw_equ = 0;
902 double Tvm_equ = 0;
903 double Tr0_equ = 0;
904 double Tdmbf = 0;
905 double Tdmec = 0;
906 double Tdmsks = 0;
907
908 if (GPUVMEnable == true && HostVMEnable == true) {
909 HostVMInefficiencyFactor = PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData / PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly;
910 HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels;
911 } else {
912 HostVMInefficiencyFactor = 1;
913 HostVMDynamicLevelsTrips = 0;
914 }
915
916 CalculateDynamicMetadataParameters(
917 MaxInterDCNTileRepeaters,
918 myPipe->DPPCLK,
919 myPipe->DISPCLK,
920 myPipe->DCFCLKDeepSleep,
921 myPipe->PixelClock,
922 myPipe->HTotal,
923 myPipe->VBlank,
924 DynamicMetadataTransmittedBytes,
925 DynamicMetadataLinesBeforeActiveRequired,
926 myPipe->InterlaceEnable,
927 ProgressiveToInterlaceUnitInOPP,
928 &Tsetup,
929 &Tdmbf,
930 &Tdmec,
931 &Tdmsks);
932
933 LineTime = myPipe->HTotal / myPipe->PixelClock;
934 trip_to_mem = UrgentLatency;
935 Tvm_trips = UrgentExtraLatency + trip_to_mem * (GPUVMPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1);
936
937 if (DynamicMetadataVMEnabled == true && GPUVMEnable == true) {
938 *Tdmdl = TWait + Tvm_trips + trip_to_mem;
939 } else {
940 *Tdmdl = TWait + UrgentExtraLatency;
941 }
942
943 if (DynamicMetadataEnable == true) {
944 if (VStartup * LineTime < Tsetup + *Tdmdl + Tdmbf + Tdmec + Tdmsks) {
945 *NotEnoughTimeForDynamicMetadata = true;
946 } else {
947 *NotEnoughTimeForDynamicMetadata = false;
948 dml_print("DML: Not Enough Time for Dynamic Meta!\n");
949 dml_print("DML: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", Tdmbf);
950 dml_print("DML: Tdmec: %fus - time dio takes to transfer dmd\n", Tdmec);
951 dml_print("DML: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", Tdmsks);
952 dml_print("DML: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", *Tdmdl);
953 }
954 } else {
955 *NotEnoughTimeForDynamicMetadata = false;
956 }
957
958 *Tdmdl_vm = (DynamicMetadataEnable == true && DynamicMetadataVMEnabled == true && GPUVMEnable == true ? TWait + Tvm_trips : 0);
959
960 if (myPipe->ScalerEnabled)
961 DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCL;
962 else
963 DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCLLBOnly;
964
965 DPPCycles = DPPCycles + myPipe->NumberOfCursors * DPPCLKDelayCNVCCursor;
966
967 DISPCLKCycles = DISPCLKDelaySubtotal;
968
969 if (myPipe->DPPCLK == 0.0 || myPipe->DISPCLK == 0.0)
970 return true;
971
972 *DSTXAfterScaler = DPPCycles * myPipe->PixelClock / myPipe->DPPCLK + DISPCLKCycles * myPipe->PixelClock / myPipe->DISPCLK
973 + DSCDelay;
974
975 *DSTXAfterScaler = *DSTXAfterScaler + ((myPipe->ODMCombineEnabled)?18:0) + (myPipe->DPPPerPlane - 1) * DPP_RECOUT_WIDTH;
976
977 if (OutputFormat == dm_420 || (myPipe->InterlaceEnable && ProgressiveToInterlaceUnitInOPP))
978 *DSTYAfterScaler = 1;
979 else
980 *DSTYAfterScaler = 0;
981
982 DSTTotalPixelsAfterScaler = *DSTYAfterScaler * myPipe->HTotal + *DSTXAfterScaler;
983 *DSTYAfterScaler = dml_floor(DSTTotalPixelsAfterScaler / myPipe->HTotal, 1);
984 *DSTXAfterScaler = DSTTotalPixelsAfterScaler - ((double) (*DSTYAfterScaler * myPipe->HTotal));
985
986 MyError = false;
987
988
989 Tr0_trips = trip_to_mem * (HostVMDynamicLevelsTrips + 1);
990 Tvm_trips_rounded = dml_ceil(4.0 * Tvm_trips / LineTime, 1) / 4 * LineTime;
991 Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1) / 4 * LineTime;
992
993 if (GPUVMEnable) {
994 if (GPUVMPageTableLevels >= 3) {
995 *Tno_bw = UrgentExtraLatency + trip_to_mem * ((GPUVMPageTableLevels - 2) - 1);
996 } else
997 *Tno_bw = 0;
998 } else if (!myPipe->DCCEnable)
999 *Tno_bw = LineTime;
1000 else
1001 *Tno_bw = LineTime / 4;
1002
1003 dst_y_prefetch_equ = VStartup - (Tsetup + dml_max(TWait + TCalc, *Tdmdl)) / LineTime
1004 - (*DSTYAfterScaler + *DSTXAfterScaler / myPipe->HTotal);
1005
1006 Lsw_oto = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC);
1007 Tsw_oto = Lsw_oto * LineTime;
1008
1009 prefetch_bw_oto = (PrefetchSourceLinesY * swath_width_luma_ub * BytePerPixelY + PrefetchSourceLinesC * swath_width_chroma_ub * BytePerPixelC) / Tsw_oto;
1010
1011 if (GPUVMEnable == true) {
1012 Tvm_oto = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_oto,
1013 Tvm_trips,
1014 LineTime / 4.0);
1015 } else
1016 Tvm_oto = LineTime / 4.0;
1017
1018 if ((GPUVMEnable == true || myPipe->DCCEnable == true)) {
1019 Tr0_oto = dml_max3(
1020 (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto,
1021 LineTime - Tvm_oto, LineTime / 4);
1022 } else
1023 Tr0_oto = (LineTime - Tvm_oto) / 2.0;
1024
1025 Tvm_oto_lines = dml_ceil(4.0 * Tvm_oto / LineTime, 1) / 4.0;
1026 Tr0_oto_lines = dml_ceil(4.0 * Tr0_oto / LineTime, 1) / 4.0;
1027 dst_y_prefetch_oto = Tvm_oto_lines + 2 * Tr0_oto_lines + Lsw_oto;
1028
1029 dst_y_prefetch_equ = dml_floor(4.0 * (dst_y_prefetch_equ + 0.125), 1) / 4.0;
1030 Tpre_rounded = dst_y_prefetch_equ * LineTime;
1031
1032 dml_print("DML: dst_y_prefetch_oto: %f\n", dst_y_prefetch_oto);
1033 dml_print("DML: dst_y_prefetch_equ: %f\n", dst_y_prefetch_equ);
1034
1035 dml_print("DML: LineTime: %f\n", LineTime);
1036 dml_print("DML: VStartup: %d\n", VStartup);
1037 dml_print("DML: Tvstartup: %fus - time between vstartup and first pixel of active\n", VStartup * LineTime);
1038 dml_print("DML: Tsetup: %fus - time from vstartup to vready\n", Tsetup);
1039 dml_print("DML: TCalc: %fus - time for calculations in dchub starting at vready\n", TCalc);
1040 dml_print("DML: TWait: %fus - time for fabric to become ready max(pstate exit,cstate enter/exit, urgent latency) after TCalc\n", TWait);
1041 dml_print("DML: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", Tdmbf);
1042 dml_print("DML: Tdmec: %fus - time dio takes to transfer dmd\n", Tdmec);
1043 dml_print("DML: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", Tdmsks);
1044 dml_print("DML: Tdmdl_vm: %fus - time for vm stages of dmd \n", *Tdmdl_vm);
1045 dml_print("DML: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", *Tdmdl);
1046 dml_print("DML: dst_x_after_scl: %f pixels - number of pixel clocks pipeline and buffer delay after scaler \n", *DSTXAfterScaler);
1047 dml_print("DML: dst_y_after_scl: %d lines - number of lines of pipeline and buffer delay after scaler \n", (int)*DSTYAfterScaler);
1048
1049 *PrefetchBandwidth = 0;
1050 *DestinationLinesToRequestVMInVBlank = 0;
1051 *DestinationLinesToRequestRowInVBlank = 0;
1052 *VRatioPrefetchY = 0;
1053 *VRatioPrefetchC = 0;
1054 *RequiredPrefetchPixDataBWLuma = 0;
1055 if (dst_y_prefetch_equ > 1) {
1056 double PrefetchBandwidth1 = 0;
1057 double PrefetchBandwidth2 = 0;
1058 double PrefetchBandwidth3 = 0;
1059 double PrefetchBandwidth4 = 0;
1060
1061 if (Tpre_rounded - *Tno_bw > 0)
1062 PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte
1063 + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
1064 + PrefetchSourceLinesY * swath_width_luma_ub * BytePerPixelY
1065 + PrefetchSourceLinesC * swath_width_chroma_ub * BytePerPixelC)
1066 / (Tpre_rounded - *Tno_bw);
1067 else
1068 PrefetchBandwidth1 = 0;
1069
1070 if (VStartup == MaxVStartup && (PrefetchBandwidth1 > 4 * prefetch_bw_oto) && (Tpre_rounded - Tsw_oto / 4 - 0.75 * LineTime - *Tno_bw) > 0) {
1071 PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor) / (Tpre_rounded - Tsw_oto / 4 - 0.75 * LineTime - *Tno_bw);
1072 }
1073
1074 if (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded > 0)
1075 PrefetchBandwidth2 = (PDEAndMetaPTEBytesFrame *
1076 HostVMInefficiencyFactor + PrefetchSourceLinesY *
1077 swath_width_luma_ub * BytePerPixelY +
1078 PrefetchSourceLinesC * swath_width_chroma_ub *
1079 BytePerPixelC) /
1080 (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded);
1081 else
1082 PrefetchBandwidth2 = 0;
1083
1084 if (Tpre_rounded - Tvm_trips_rounded > 0)
1085 PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow *
1086 HostVMInefficiencyFactor + PrefetchSourceLinesY *
1087 swath_width_luma_ub * BytePerPixelY + PrefetchSourceLinesC *
1088 swath_width_chroma_ub * BytePerPixelC) / (Tpre_rounded -
1089 Tvm_trips_rounded);
1090 else
1091 PrefetchBandwidth3 = 0;
1092
1093 if (VStartup == MaxVStartup && (PrefetchBandwidth3 > 4 * prefetch_bw_oto) && Tpre_rounded - Tsw_oto / 4 - 0.75 * LineTime - Tvm_trips_rounded > 0) {
1094 PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor) / (Tpre_rounded - Tsw_oto / 4 - 0.75 * LineTime - Tvm_trips_rounded);
1095 }
1096
1097 if (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded > 0)
1098 PrefetchBandwidth4 = (PrefetchSourceLinesY * swath_width_luma_ub * BytePerPixelY + PrefetchSourceLinesC * swath_width_chroma_ub * BytePerPixelC)
1099 / (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded);
1100 else
1101 PrefetchBandwidth4 = 0;
1102
1103 {
1104 bool Case1OK;
1105 bool Case2OK;
1106 bool Case3OK;
1107
1108 if (PrefetchBandwidth1 > 0) {
1109 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth1
1110 >= Tvm_trips_rounded && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth1 >= Tr0_trips_rounded) {
1111 Case1OK = true;
1112 } else {
1113 Case1OK = false;
1114 }
1115 } else {
1116 Case1OK = false;
1117 }
1118
1119 if (PrefetchBandwidth2 > 0) {
1120 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth2
1121 >= Tvm_trips_rounded && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth2 < Tr0_trips_rounded) {
1122 Case2OK = true;
1123 } else {
1124 Case2OK = false;
1125 }
1126 } else {
1127 Case2OK = false;
1128 }
1129
1130 if (PrefetchBandwidth3 > 0) {
1131 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth3
1132 < Tvm_trips_rounded && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth3 >= Tr0_trips_rounded) {
1133 Case3OK = true;
1134 } else {
1135 Case3OK = false;
1136 }
1137 } else {
1138 Case3OK = false;
1139 }
1140
1141 if (Case1OK) {
1142 prefetch_bw_equ = PrefetchBandwidth1;
1143 } else if (Case2OK) {
1144 prefetch_bw_equ = PrefetchBandwidth2;
1145 } else if (Case3OK) {
1146 prefetch_bw_equ = PrefetchBandwidth3;
1147 } else {
1148 prefetch_bw_equ = PrefetchBandwidth4;
1149 }
1150
1151 dml_print("DML: prefetch_bw_equ: %f\n", prefetch_bw_equ);
1152
1153 if (prefetch_bw_equ > 0) {
1154 if (GPUVMEnable) {
1155 Tvm_equ = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_equ, Tvm_trips, LineTime / 4);
1156 } else {
1157 Tvm_equ = LineTime / 4;
1158 }
1159
1160 if ((GPUVMEnable || myPipe->DCCEnable)) {
1161 Tr0_equ = dml_max4(
1162 (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_equ,
1163 Tr0_trips,
1164 (LineTime - Tvm_equ) / 2,
1165 LineTime / 4);
1166 } else {
1167 Tr0_equ = (LineTime - Tvm_equ) / 2;
1168 }
1169 } else {
1170 Tvm_equ = 0;
1171 Tr0_equ = 0;
1172 dml_print("DML: prefetch_bw_equ equals 0! %s:%d\n", __FILE__, __LINE__);
1173 }
1174 }
1175
1176 if (dst_y_prefetch_oto < dst_y_prefetch_equ) {
1177 *DestinationLinesForPrefetch = dst_y_prefetch_oto;
1178 TimeForFetchingMetaPTE = Tvm_oto;
1179 TimeForFetchingRowInVBlank = Tr0_oto;
1180 *PrefetchBandwidth = prefetch_bw_oto;
1181 } else {
1182 *DestinationLinesForPrefetch = dst_y_prefetch_equ;
1183 TimeForFetchingMetaPTE = Tvm_equ;
1184 TimeForFetchingRowInVBlank = Tr0_equ;
1185 *PrefetchBandwidth = prefetch_bw_equ;
1186 }
1187
1188 *DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * TimeForFetchingMetaPTE / LineTime, 1.0) / 4.0;
1189
1190 *DestinationLinesToRequestRowInVBlank = dml_ceil(4.0 * TimeForFetchingRowInVBlank / LineTime, 1.0) / 4.0;
1191
1192
1193 LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch - *DestinationLinesToRequestVMInVBlank
1194 - 2 * *DestinationLinesToRequestRowInVBlank;
1195
1196 if (LinesToRequestPrefetchPixelData > 0 && prefetch_bw_equ > 0) {
1197
1198 *VRatioPrefetchY = (double) PrefetchSourceLinesY
1199 / LinesToRequestPrefetchPixelData;
1200 *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
1201 if ((SwathHeightY > 4) && (VInitPreFillY > 3)) {
1202 if (LinesToRequestPrefetchPixelData > (VInitPreFillY - 3.0) / 2.0) {
1203 *VRatioPrefetchY = dml_max((double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData,
1204 (double) MaxNumSwathY * SwathHeightY / (LinesToRequestPrefetchPixelData - (VInitPreFillY - 3.0) / 2.0));
1205 *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
1206 } else {
1207 MyError = true;
1208 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1209 *VRatioPrefetchY = 0;
1210 }
1211 }
1212
1213 *VRatioPrefetchC = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData;
1214 *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
1215
1216 if ((SwathHeightC > 4)) {
1217 if (LinesToRequestPrefetchPixelData > (VInitPreFillC - 3.0) / 2.0) {
1218 *VRatioPrefetchC = dml_max(*VRatioPrefetchC,
1219 (double) MaxNumSwathC * SwathHeightC / (LinesToRequestPrefetchPixelData - (VInitPreFillC - 3.0) / 2.0));
1220 *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
1221 } else {
1222 MyError = true;
1223 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1224 *VRatioPrefetchC = 0;
1225 }
1226 }
1227
1228 *RequiredPrefetchPixDataBWLuma = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData * BytePerPixelY * swath_width_luma_ub / LineTime;
1229 *RequiredPrefetchPixDataBWChroma = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData * BytePerPixelC * swath_width_chroma_ub / LineTime;
1230 } else {
1231 MyError = true;
1232 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1233 dml_print("DML: LinesToRequestPrefetchPixelData: %f, should be > 0\n", LinesToRequestPrefetchPixelData);
1234 *VRatioPrefetchY = 0;
1235 *VRatioPrefetchC = 0;
1236 *RequiredPrefetchPixDataBWLuma = 0;
1237 *RequiredPrefetchPixDataBWChroma = 0;
1238 }
1239
1240 dml_print("DML: Tpre: %fus - sum of tim to request meta pte, 2 x data pte + meta data, swaths\n", (double)LinesToRequestPrefetchPixelData * LineTime + 2.0*TimeForFetchingRowInVBlank + TimeForFetchingMetaPTE);
1241 dml_print("DML: Tvm: %fus - time to fetch page tables for meta surface\n", TimeForFetchingMetaPTE);
1242 dml_print("DML: Tr0: %fus - time to fetch first row of data pagetables and first row of meta data (done in parallel)\n", TimeForFetchingRowInVBlank);
1243 dml_print("DML: Tr1: %fus - time to fetch second row of data pagetables and second row of meta data (done in parallel)\n", TimeForFetchingRowInVBlank);
1244 dml_print("DML: Tsw: %fus = time to fetch enough pixel data and cursor data to feed the scalers init position and detile\n", (double)LinesToRequestPrefetchPixelData * LineTime);
1245 dml_print("DML: To: %fus - time for propagation from scaler to optc\n", (*DSTYAfterScaler + ((*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime);
1246 dml_print("DML: Tvstartup - Tsetup - Tcalc - Twait - Tpre - To > 0\n");
1247 dml_print("DML: Tslack(pre): %fus - time left over in schedule\n", VStartup * LineTime - TimeForFetchingMetaPTE - 2 * TimeForFetchingRowInVBlank - (*DSTYAfterScaler + ((*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime - TWait - TCalc - Tsetup);
1248 dml_print("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %d\n", PixelPTEBytesPerRow);
1249
1250 } else {
1251 MyError = true;
1252 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1253 }
1254
1255 {
1256 double prefetch_vm_bw = 0;
1257 double prefetch_row_bw = 0;
1258
1259 if (PDEAndMetaPTEBytesFrame == 0) {
1260 prefetch_vm_bw = 0;
1261 } else if (*DestinationLinesToRequestVMInVBlank > 0) {
1262 prefetch_vm_bw = PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / (*DestinationLinesToRequestVMInVBlank * LineTime);
1263 } else {
1264 prefetch_vm_bw = 0;
1265 MyError = true;
1266 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1267 }
1268 if (MetaRowByte + PixelPTEBytesPerRow == 0) {
1269 prefetch_row_bw = 0;
1270 } else if (*DestinationLinesToRequestRowInVBlank > 0) {
1271 prefetch_row_bw = (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInVBlank * LineTime);
1272 } else {
1273 prefetch_row_bw = 0;
1274 MyError = true;
1275 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1276 }
1277
1278 *prefetch_vmrow_bw = dml_max(prefetch_vm_bw, prefetch_row_bw);
1279 }
1280
1281 if (MyError) {
1282 *PrefetchBandwidth = 0;
1283 *DestinationLinesToRequestVMInVBlank = 0;
1284 *DestinationLinesToRequestRowInVBlank = 0;
1285 *DestinationLinesForPrefetch = 0;
1286 *VRatioPrefetchY = 0;
1287 *VRatioPrefetchC = 0;
1288 *RequiredPrefetchPixDataBWLuma = 0;
1289 *RequiredPrefetchPixDataBWChroma = 0;
1290 }
1291
1292 return MyError;
1293 }
1294
RoundToDFSGranularityUp(double Clock,double VCOSpeed)1295 static double RoundToDFSGranularityUp(double Clock, double VCOSpeed)
1296 {
1297 return VCOSpeed * 4 / dml_floor(VCOSpeed * 4 / Clock, 1);
1298 }
1299
RoundToDFSGranularityDown(double Clock,double VCOSpeed)1300 static double RoundToDFSGranularityDown(double Clock, double VCOSpeed)
1301 {
1302 return VCOSpeed * 4 / dml_ceil(VCOSpeed * 4.0 / Clock, 1);
1303 }
1304
CalculateDCCConfiguration(bool DCCEnabled,bool DCCProgrammingAssumesScanDirectionUnknown,enum source_format_class SourcePixelFormat,unsigned int SurfaceWidthLuma,unsigned int SurfaceWidthChroma,unsigned int SurfaceHeightLuma,unsigned int SurfaceHeightChroma,double DETBufferSize,unsigned int RequestHeight256ByteLuma,unsigned int RequestHeight256ByteChroma,enum dm_swizzle_mode TilingFormat,unsigned int BytePerPixelY,unsigned int BytePerPixelC,double BytePerPixelDETY,double BytePerPixelDETC,enum scan_direction_class ScanOrientation,unsigned int * MaxUncompressedBlockLuma,unsigned int * MaxUncompressedBlockChroma,unsigned int * MaxCompressedBlockLuma,unsigned int * MaxCompressedBlockChroma,unsigned int * IndependentBlockLuma,unsigned int * IndependentBlockChroma)1305 static void CalculateDCCConfiguration(
1306 bool DCCEnabled,
1307 bool DCCProgrammingAssumesScanDirectionUnknown,
1308 enum source_format_class SourcePixelFormat,
1309 unsigned int SurfaceWidthLuma,
1310 unsigned int SurfaceWidthChroma,
1311 unsigned int SurfaceHeightLuma,
1312 unsigned int SurfaceHeightChroma,
1313 double DETBufferSize,
1314 unsigned int RequestHeight256ByteLuma,
1315 unsigned int RequestHeight256ByteChroma,
1316 enum dm_swizzle_mode TilingFormat,
1317 unsigned int BytePerPixelY,
1318 unsigned int BytePerPixelC,
1319 double BytePerPixelDETY,
1320 double BytePerPixelDETC,
1321 enum scan_direction_class ScanOrientation,
1322 unsigned int *MaxUncompressedBlockLuma,
1323 unsigned int *MaxUncompressedBlockChroma,
1324 unsigned int *MaxCompressedBlockLuma,
1325 unsigned int *MaxCompressedBlockChroma,
1326 unsigned int *IndependentBlockLuma,
1327 unsigned int *IndependentBlockChroma)
1328 {
1329 int yuv420 = 0;
1330 int horz_div_l = 0;
1331 int horz_div_c = 0;
1332 int vert_div_l = 0;
1333 int vert_div_c = 0;
1334
1335 int req128_horz_wc_l = 0;
1336 int req128_horz_wc_c = 0;
1337 int req128_vert_wc_l = 0;
1338 int req128_vert_wc_c = 0;
1339 int segment_order_horz_contiguous_luma = 0;
1340 int segment_order_horz_contiguous_chroma = 0;
1341 int segment_order_vert_contiguous_luma = 0;
1342 int segment_order_vert_contiguous_chroma = 0;
1343
1344 long full_swath_bytes_horz_wc_l = 0;
1345 long full_swath_bytes_horz_wc_c = 0;
1346 long full_swath_bytes_vert_wc_l = 0;
1347 long full_swath_bytes_vert_wc_c = 0;
1348
1349 long swath_buf_size = 0;
1350 double detile_buf_vp_horz_limit = 0;
1351 double detile_buf_vp_vert_limit = 0;
1352
1353 long MAS_vp_horz_limit = 0;
1354 long MAS_vp_vert_limit = 0;
1355 long max_vp_horz_width = 0;
1356 long max_vp_vert_height = 0;
1357 long eff_surf_width_l = 0;
1358 long eff_surf_width_c = 0;
1359 long eff_surf_height_l = 0;
1360 long eff_surf_height_c = 0;
1361
1362 typedef enum {
1363 REQ_256Bytes,
1364 REQ_128BytesNonContiguous,
1365 REQ_128BytesContiguous,
1366 REQ_NA
1367 } RequestType;
1368
1369 RequestType RequestLuma;
1370 RequestType RequestChroma;
1371
1372 yuv420 = ((SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12) ? 1 : 0);
1373 horz_div_l = 1;
1374 horz_div_c = 1;
1375 vert_div_l = 1;
1376 vert_div_c = 1;
1377
1378 if (BytePerPixelY == 1)
1379 vert_div_l = 0;
1380 if (BytePerPixelC == 1)
1381 vert_div_c = 0;
1382 if (BytePerPixelY == 8
1383 && (TilingFormat == dm_sw_64kb_s || TilingFormat == dm_sw_64kb_s_t
1384 || TilingFormat == dm_sw_64kb_s_x))
1385 horz_div_l = 0;
1386 if (BytePerPixelC == 8
1387 && (TilingFormat == dm_sw_64kb_s || TilingFormat == dm_sw_64kb_s_t
1388 || TilingFormat == dm_sw_64kb_s_x))
1389 horz_div_c = 0;
1390
1391 if (BytePerPixelC == 0) {
1392 swath_buf_size = DETBufferSize / 2 - 2 * 256;
1393 detile_buf_vp_horz_limit = (double) swath_buf_size
1394 / ((double) RequestHeight256ByteLuma * BytePerPixelY
1395 / (1 + horz_div_l));
1396 detile_buf_vp_vert_limit = (double) swath_buf_size
1397 / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l));
1398 } else {
1399 swath_buf_size = DETBufferSize / 2 - 2 * 2 * 256;
1400 detile_buf_vp_horz_limit = (double) swath_buf_size
1401 / ((double) RequestHeight256ByteLuma * BytePerPixelY
1402 / (1 + horz_div_l)
1403 + (double) RequestHeight256ByteChroma
1404 * BytePerPixelC / (1 + horz_div_c)
1405 / (1 + yuv420));
1406 detile_buf_vp_vert_limit = (double) swath_buf_size
1407 / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l)
1408 + 256.0 / RequestHeight256ByteChroma
1409 / (1 + vert_div_c) / (1 + yuv420));
1410 }
1411
1412 if (SourcePixelFormat == dm_420_10) {
1413 detile_buf_vp_horz_limit = 1.5 * detile_buf_vp_horz_limit;
1414 detile_buf_vp_vert_limit = 1.5 * detile_buf_vp_vert_limit;
1415 }
1416
1417 detile_buf_vp_horz_limit = dml_floor(detile_buf_vp_horz_limit - 1, 16);
1418 detile_buf_vp_vert_limit = dml_floor(detile_buf_vp_vert_limit - 1, 16);
1419
1420 MAS_vp_horz_limit = 5760;
1421 MAS_vp_vert_limit = (BytePerPixelC > 0 ? 2880 : 5760);
1422 max_vp_horz_width = dml_min((double) MAS_vp_horz_limit, detile_buf_vp_horz_limit);
1423 max_vp_vert_height = dml_min((double) MAS_vp_vert_limit, detile_buf_vp_vert_limit);
1424 eff_surf_width_l =
1425 (SurfaceWidthLuma > max_vp_horz_width ? max_vp_horz_width : SurfaceWidthLuma);
1426 eff_surf_width_c = eff_surf_width_l / (1 + yuv420);
1427 eff_surf_height_l = (
1428 SurfaceHeightLuma > max_vp_vert_height ?
1429 max_vp_vert_height : SurfaceHeightLuma);
1430 eff_surf_height_c = eff_surf_height_l / (1 + yuv420);
1431
1432 full_swath_bytes_horz_wc_l = eff_surf_width_l * RequestHeight256ByteLuma * BytePerPixelY;
1433 full_swath_bytes_vert_wc_l = eff_surf_height_l * 256 / RequestHeight256ByteLuma;
1434 if (BytePerPixelC > 0) {
1435 full_swath_bytes_horz_wc_c = eff_surf_width_c * RequestHeight256ByteChroma
1436 * BytePerPixelC;
1437 full_swath_bytes_vert_wc_c = eff_surf_height_c * 256 / RequestHeight256ByteChroma;
1438 } else {
1439 full_swath_bytes_horz_wc_c = 0;
1440 full_swath_bytes_vert_wc_c = 0;
1441 }
1442
1443 if (SourcePixelFormat == dm_420_10) {
1444 full_swath_bytes_horz_wc_l = dml_ceil(full_swath_bytes_horz_wc_l * 2 / 3, 256);
1445 full_swath_bytes_horz_wc_c = dml_ceil(full_swath_bytes_horz_wc_c * 2 / 3, 256);
1446 full_swath_bytes_vert_wc_l = dml_ceil(full_swath_bytes_vert_wc_l * 2 / 3, 256);
1447 full_swath_bytes_vert_wc_c = dml_ceil(full_swath_bytes_vert_wc_c * 2 / 3, 256);
1448 }
1449
1450 if (2 * full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSize) {
1451 req128_horz_wc_l = 0;
1452 req128_horz_wc_c = 0;
1453 } else if (full_swath_bytes_horz_wc_l < 1.5 * full_swath_bytes_horz_wc_c
1454 && 2 * full_swath_bytes_horz_wc_l + full_swath_bytes_horz_wc_c
1455 <= DETBufferSize) {
1456 req128_horz_wc_l = 0;
1457 req128_horz_wc_c = 1;
1458 } else if (full_swath_bytes_horz_wc_l >= 1.5 * full_swath_bytes_horz_wc_c
1459 && full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c
1460 <= DETBufferSize) {
1461 req128_horz_wc_l = 1;
1462 req128_horz_wc_c = 0;
1463 } else {
1464 req128_horz_wc_l = 1;
1465 req128_horz_wc_c = 1;
1466 }
1467
1468 if (2 * full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSize) {
1469 req128_vert_wc_l = 0;
1470 req128_vert_wc_c = 0;
1471 } else if (full_swath_bytes_vert_wc_l < 1.5 * full_swath_bytes_vert_wc_c
1472 && 2 * full_swath_bytes_vert_wc_l + full_swath_bytes_vert_wc_c
1473 <= DETBufferSize) {
1474 req128_vert_wc_l = 0;
1475 req128_vert_wc_c = 1;
1476 } else if (full_swath_bytes_vert_wc_l >= 1.5 * full_swath_bytes_vert_wc_c
1477 && full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c
1478 <= DETBufferSize) {
1479 req128_vert_wc_l = 1;
1480 req128_vert_wc_c = 0;
1481 } else {
1482 req128_vert_wc_l = 1;
1483 req128_vert_wc_c = 1;
1484 }
1485
1486 if (BytePerPixelY == 2 || (BytePerPixelY == 4 && TilingFormat != dm_sw_64kb_r_x)) {
1487 segment_order_horz_contiguous_luma = 0;
1488 } else {
1489 segment_order_horz_contiguous_luma = 1;
1490 }
1491 if ((BytePerPixelY == 8
1492 && (TilingFormat == dm_sw_64kb_d || TilingFormat == dm_sw_64kb_d_x
1493 || TilingFormat == dm_sw_64kb_d_t
1494 || TilingFormat == dm_sw_64kb_r_x))
1495 || (BytePerPixelY == 4 && TilingFormat == dm_sw_64kb_r_x)) {
1496 segment_order_vert_contiguous_luma = 0;
1497 } else {
1498 segment_order_vert_contiguous_luma = 1;
1499 }
1500 if (BytePerPixelC == 2 || (BytePerPixelC == 4 && TilingFormat != dm_sw_64kb_r_x)) {
1501 segment_order_horz_contiguous_chroma = 0;
1502 } else {
1503 segment_order_horz_contiguous_chroma = 1;
1504 }
1505 if ((BytePerPixelC == 8
1506 && (TilingFormat == dm_sw_64kb_d || TilingFormat == dm_sw_64kb_d_x
1507 || TilingFormat == dm_sw_64kb_d_t
1508 || TilingFormat == dm_sw_64kb_r_x))
1509 || (BytePerPixelC == 4 && TilingFormat == dm_sw_64kb_r_x)) {
1510 segment_order_vert_contiguous_chroma = 0;
1511 } else {
1512 segment_order_vert_contiguous_chroma = 1;
1513 }
1514
1515 if (DCCProgrammingAssumesScanDirectionUnknown == true) {
1516 if (req128_horz_wc_l == 0 && req128_vert_wc_l == 0) {
1517 RequestLuma = REQ_256Bytes;
1518 } else if ((req128_horz_wc_l == 1 && segment_order_horz_contiguous_luma == 0)
1519 || (req128_vert_wc_l == 1 && segment_order_vert_contiguous_luma == 0)) {
1520 RequestLuma = REQ_128BytesNonContiguous;
1521 } else {
1522 RequestLuma = REQ_128BytesContiguous;
1523 }
1524 if (req128_horz_wc_c == 0 && req128_vert_wc_c == 0) {
1525 RequestChroma = REQ_256Bytes;
1526 } else if ((req128_horz_wc_c == 1 && segment_order_horz_contiguous_chroma == 0)
1527 || (req128_vert_wc_c == 1
1528 && segment_order_vert_contiguous_chroma == 0)) {
1529 RequestChroma = REQ_128BytesNonContiguous;
1530 } else {
1531 RequestChroma = REQ_128BytesContiguous;
1532 }
1533 } else if (ScanOrientation != dm_vert) {
1534 if (req128_horz_wc_l == 0) {
1535 RequestLuma = REQ_256Bytes;
1536 } else if (segment_order_horz_contiguous_luma == 0) {
1537 RequestLuma = REQ_128BytesNonContiguous;
1538 } else {
1539 RequestLuma = REQ_128BytesContiguous;
1540 }
1541 if (req128_horz_wc_c == 0) {
1542 RequestChroma = REQ_256Bytes;
1543 } else if (segment_order_horz_contiguous_chroma == 0) {
1544 RequestChroma = REQ_128BytesNonContiguous;
1545 } else {
1546 RequestChroma = REQ_128BytesContiguous;
1547 }
1548 } else {
1549 if (req128_vert_wc_l == 0) {
1550 RequestLuma = REQ_256Bytes;
1551 } else if (segment_order_vert_contiguous_luma == 0) {
1552 RequestLuma = REQ_128BytesNonContiguous;
1553 } else {
1554 RequestLuma = REQ_128BytesContiguous;
1555 }
1556 if (req128_vert_wc_c == 0) {
1557 RequestChroma = REQ_256Bytes;
1558 } else if (segment_order_vert_contiguous_chroma == 0) {
1559 RequestChroma = REQ_128BytesNonContiguous;
1560 } else {
1561 RequestChroma = REQ_128BytesContiguous;
1562 }
1563 }
1564
1565 if (RequestLuma == REQ_256Bytes) {
1566 *MaxUncompressedBlockLuma = 256;
1567 *MaxCompressedBlockLuma = 256;
1568 *IndependentBlockLuma = 0;
1569 } else if (RequestLuma == REQ_128BytesContiguous) {
1570 *MaxUncompressedBlockLuma = 256;
1571 *MaxCompressedBlockLuma = 128;
1572 *IndependentBlockLuma = 128;
1573 } else {
1574 *MaxUncompressedBlockLuma = 256;
1575 *MaxCompressedBlockLuma = 64;
1576 *IndependentBlockLuma = 64;
1577 }
1578
1579 if (RequestChroma == REQ_256Bytes) {
1580 *MaxUncompressedBlockChroma = 256;
1581 *MaxCompressedBlockChroma = 256;
1582 *IndependentBlockChroma = 0;
1583 } else if (RequestChroma == REQ_128BytesContiguous) {
1584 *MaxUncompressedBlockChroma = 256;
1585 *MaxCompressedBlockChroma = 128;
1586 *IndependentBlockChroma = 128;
1587 } else {
1588 *MaxUncompressedBlockChroma = 256;
1589 *MaxCompressedBlockChroma = 64;
1590 *IndependentBlockChroma = 64;
1591 }
1592
1593 if (DCCEnabled != true || BytePerPixelC == 0) {
1594 *MaxUncompressedBlockChroma = 0;
1595 *MaxCompressedBlockChroma = 0;
1596 *IndependentBlockChroma = 0;
1597 }
1598
1599 if (DCCEnabled != true) {
1600 *MaxUncompressedBlockLuma = 0;
1601 *MaxCompressedBlockLuma = 0;
1602 *IndependentBlockLuma = 0;
1603 }
1604 }
1605
1606
CalculatePrefetchSourceLines(struct display_mode_lib * mode_lib,double VRatio,double vtaps,bool Interlace,bool ProgressiveToInterlaceUnitInOPP,unsigned int SwathHeight,unsigned int ViewportYStart,double * VInitPreFill,unsigned int * MaxNumSwath)1607 static double CalculatePrefetchSourceLines(
1608 struct display_mode_lib *mode_lib,
1609 double VRatio,
1610 double vtaps,
1611 bool Interlace,
1612 bool ProgressiveToInterlaceUnitInOPP,
1613 unsigned int SwathHeight,
1614 unsigned int ViewportYStart,
1615 double *VInitPreFill,
1616 unsigned int *MaxNumSwath)
1617 {
1618 unsigned int MaxPartialSwath = 0;
1619
1620 if (ProgressiveToInterlaceUnitInOPP)
1621 *VInitPreFill = dml_floor((VRatio + vtaps + 1) / 2.0, 1);
1622 else
1623 *VInitPreFill = dml_floor((VRatio + vtaps + 1 + Interlace * 0.5 * VRatio) / 2.0, 1);
1624
1625 if (!mode_lib->vba.IgnoreViewportPositioning) {
1626
1627 *MaxNumSwath = dml_ceil((*VInitPreFill - 1.0) / SwathHeight, 1) + 1.0;
1628
1629 if (*VInitPreFill > 1.0)
1630 MaxPartialSwath = (unsigned int) (*VInitPreFill - 2) % SwathHeight;
1631 else
1632 MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 2)
1633 % SwathHeight;
1634 MaxPartialSwath = dml_max(1U, MaxPartialSwath);
1635
1636 } else {
1637
1638 if (ViewportYStart != 0)
1639 dml_print(
1640 "WARNING DML: using viewport y position of 0 even though actual viewport y position is non-zero in prefetch source lines calculation\n");
1641
1642 *MaxNumSwath = dml_ceil(*VInitPreFill / SwathHeight, 1);
1643
1644 if (*VInitPreFill > 1.0)
1645 MaxPartialSwath = (unsigned int) (*VInitPreFill - 1) % SwathHeight;
1646 else
1647 MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 1)
1648 % SwathHeight;
1649 }
1650
1651 return *MaxNumSwath * SwathHeight + MaxPartialSwath;
1652 }
1653
CalculateVMAndRowBytes(struct display_mode_lib * mode_lib,bool DCCEnable,unsigned int BlockHeight256Bytes,unsigned int BlockWidth256Bytes,enum source_format_class SourcePixelFormat,unsigned int SurfaceTiling,unsigned int BytePerPixel,enum scan_direction_class ScanDirection,unsigned int SwathWidth,unsigned int ViewportHeight,bool GPUVMEnable,bool HostVMEnable,unsigned int HostVMMaxNonCachedPageTableLevels,unsigned int GPUVMMinPageSize,unsigned int HostVMMinPageSize,unsigned int PTEBufferSizeInRequests,unsigned int Pitch,unsigned int DCCMetaPitch,unsigned int * MacroTileWidth,unsigned int * MetaRowByte,unsigned int * PixelPTEBytesPerRow,bool * PTEBufferSizeNotExceeded,unsigned int * dpte_row_width_ub,unsigned int * dpte_row_height,unsigned int * MetaRequestWidth,unsigned int * MetaRequestHeight,unsigned int * meta_row_width,unsigned int * meta_row_height,unsigned int * vm_group_bytes,unsigned int * dpte_group_bytes,unsigned int * PixelPTEReqWidth,unsigned int * PixelPTEReqHeight,unsigned int * PTERequestSize,unsigned int * DPDE0BytesFrame,unsigned int * MetaPTEBytesFrame)1654 static unsigned int CalculateVMAndRowBytes(
1655 struct display_mode_lib *mode_lib,
1656 bool DCCEnable,
1657 unsigned int BlockHeight256Bytes,
1658 unsigned int BlockWidth256Bytes,
1659 enum source_format_class SourcePixelFormat,
1660 unsigned int SurfaceTiling,
1661 unsigned int BytePerPixel,
1662 enum scan_direction_class ScanDirection,
1663 unsigned int SwathWidth,
1664 unsigned int ViewportHeight,
1665 bool GPUVMEnable,
1666 bool HostVMEnable,
1667 unsigned int HostVMMaxNonCachedPageTableLevels,
1668 unsigned int GPUVMMinPageSize,
1669 unsigned int HostVMMinPageSize,
1670 unsigned int PTEBufferSizeInRequests,
1671 unsigned int Pitch,
1672 unsigned int DCCMetaPitch,
1673 unsigned int *MacroTileWidth,
1674 unsigned int *MetaRowByte,
1675 unsigned int *PixelPTEBytesPerRow,
1676 bool *PTEBufferSizeNotExceeded,
1677 unsigned int *dpte_row_width_ub,
1678 unsigned int *dpte_row_height,
1679 unsigned int *MetaRequestWidth,
1680 unsigned int *MetaRequestHeight,
1681 unsigned int *meta_row_width,
1682 unsigned int *meta_row_height,
1683 unsigned int *vm_group_bytes,
1684 unsigned int *dpte_group_bytes,
1685 unsigned int *PixelPTEReqWidth,
1686 unsigned int *PixelPTEReqHeight,
1687 unsigned int *PTERequestSize,
1688 unsigned int *DPDE0BytesFrame,
1689 unsigned int *MetaPTEBytesFrame)
1690 {
1691 unsigned int MPDEBytesFrame = 0;
1692 unsigned int DCCMetaSurfaceBytes = 0;
1693 unsigned int MacroTileSizeBytes = 0;
1694 unsigned int MacroTileHeight = 0;
1695 unsigned int ExtraDPDEBytesFrame = 0;
1696 unsigned int PDEAndMetaPTEBytesFrame = 0;
1697 unsigned int PixelPTEReqHeightPTEs = 0;
1698 unsigned int HostVMDynamicLevels = 0;
1699
1700 double FractionOfPTEReturnDrop;
1701
1702 if (GPUVMEnable == true && HostVMEnable == true) {
1703 if (HostVMMinPageSize < 2048) {
1704 HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
1705 } else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576) {
1706 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1);
1707 } else {
1708 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2);
1709 }
1710 }
1711
1712 *MetaRequestHeight = 8 * BlockHeight256Bytes;
1713 *MetaRequestWidth = 8 * BlockWidth256Bytes;
1714 if (ScanDirection != dm_vert) {
1715 *meta_row_height = *MetaRequestHeight;
1716 *meta_row_width = dml_ceil((double) SwathWidth - 1, *MetaRequestWidth)
1717 + *MetaRequestWidth;
1718 *MetaRowByte = *meta_row_width * *MetaRequestHeight * BytePerPixel / 256.0;
1719 } else {
1720 *meta_row_height = *MetaRequestWidth;
1721 *meta_row_width = dml_ceil((double) SwathWidth - 1, *MetaRequestHeight)
1722 + *MetaRequestHeight;
1723 *MetaRowByte = *meta_row_width * *MetaRequestWidth * BytePerPixel / 256.0;
1724 }
1725 DCCMetaSurfaceBytes = DCCMetaPitch * (dml_ceil(ViewportHeight - 1, 64 * BlockHeight256Bytes)
1726 + 64 * BlockHeight256Bytes) * BytePerPixel / 256;
1727 if (GPUVMEnable == true) {
1728 *MetaPTEBytesFrame = (dml_ceil((double) (DCCMetaSurfaceBytes - 4.0 * 1024.0) / (8 * 4.0 * 1024), 1) + 1) * 64;
1729 MPDEBytesFrame = 128 * (mode_lib->vba.GPUVMMaxPageTableLevels - 1);
1730 } else {
1731 *MetaPTEBytesFrame = 0;
1732 MPDEBytesFrame = 0;
1733 }
1734
1735 if (DCCEnable != true) {
1736 *MetaPTEBytesFrame = 0;
1737 MPDEBytesFrame = 0;
1738 *MetaRowByte = 0;
1739 }
1740
1741 if (SurfaceTiling == dm_sw_linear) {
1742 MacroTileSizeBytes = 256;
1743 MacroTileHeight = BlockHeight256Bytes;
1744 } else {
1745 MacroTileSizeBytes = 65536;
1746 MacroTileHeight = 16 * BlockHeight256Bytes;
1747 }
1748 *MacroTileWidth = MacroTileSizeBytes / BytePerPixel / MacroTileHeight;
1749
1750 if (GPUVMEnable == true && mode_lib->vba.GPUVMMaxPageTableLevels > 1) {
1751 if (ScanDirection != dm_vert) {
1752 *DPDE0BytesFrame = 64 * (dml_ceil(((Pitch * (dml_ceil(ViewportHeight - 1, MacroTileHeight) + MacroTileHeight) * BytePerPixel) - MacroTileSizeBytes) / (8 * 2097152), 1) + 1);
1753 } else {
1754 *DPDE0BytesFrame = 64 * (dml_ceil(((Pitch * (dml_ceil((double) SwathWidth - 1, MacroTileHeight) + MacroTileHeight) * BytePerPixel) - MacroTileSizeBytes) / (8 * 2097152), 1) + 1);
1755 }
1756 ExtraDPDEBytesFrame = 128 * (mode_lib->vba.GPUVMMaxPageTableLevels - 2);
1757 } else {
1758 *DPDE0BytesFrame = 0;
1759 ExtraDPDEBytesFrame = 0;
1760 }
1761
1762 PDEAndMetaPTEBytesFrame = *MetaPTEBytesFrame + MPDEBytesFrame + *DPDE0BytesFrame
1763 + ExtraDPDEBytesFrame;
1764
1765 if (HostVMEnable == true) {
1766 PDEAndMetaPTEBytesFrame = PDEAndMetaPTEBytesFrame * (1 + 8 * HostVMDynamicLevels);
1767 }
1768
1769 if (SurfaceTiling == dm_sw_linear) {
1770 PixelPTEReqHeightPTEs = 1;
1771 *PixelPTEReqHeight = 1;
1772 *PixelPTEReqWidth = 32768.0 / BytePerPixel;
1773 *PTERequestSize = 64;
1774 FractionOfPTEReturnDrop = 0;
1775 } else if (GPUVMMinPageSize == 4 && MacroTileSizeBytes > 4096) {
1776 PixelPTEReqHeightPTEs = 16;
1777 *PixelPTEReqHeight = 16 * BlockHeight256Bytes;
1778 *PixelPTEReqWidth = 16 * BlockWidth256Bytes;
1779 *PTERequestSize = 128;
1780 FractionOfPTEReturnDrop = 0;
1781 } else {
1782 PixelPTEReqHeightPTEs = 1;
1783 *PixelPTEReqHeight = MacroTileHeight;
1784 *PixelPTEReqWidth = 8 * *MacroTileWidth;
1785 *PTERequestSize = 64;
1786 FractionOfPTEReturnDrop = 0;
1787 }
1788
1789 if (SurfaceTiling == dm_sw_linear) {
1790 if (PTEBufferSizeInRequests == 0)
1791 *dpte_row_height = 1;
1792 else
1793 *dpte_row_height = dml_min(128, 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch), 1));
1794 *dpte_row_width_ub = (dml_ceil(((double) SwathWidth - 1) / *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth;
1795 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize;
1796 } else if (ScanDirection != dm_vert) {
1797 *dpte_row_height = *PixelPTEReqHeight;
1798 *dpte_row_width_ub = (dml_ceil((double) (SwathWidth - 1) / *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth;
1799 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize;
1800 } else {
1801 *dpte_row_height = dml_min(*PixelPTEReqWidth, *MacroTileWidth);
1802 *dpte_row_width_ub = (dml_ceil((double) (SwathWidth - 1) / *PixelPTEReqHeight, 1) + 1) * *PixelPTEReqHeight;
1803 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqHeight * *PTERequestSize;
1804 }
1805 if (*PixelPTEBytesPerRow * (1 - FractionOfPTEReturnDrop)
1806 <= 64 * PTEBufferSizeInRequests) {
1807 *PTEBufferSizeNotExceeded = true;
1808 } else {
1809 *PTEBufferSizeNotExceeded = false;
1810 }
1811
1812 if (GPUVMEnable != true) {
1813 *PixelPTEBytesPerRow = 0;
1814 *PTEBufferSizeNotExceeded = true;
1815 }
1816 dml_print("DML: vm_bytes = meta_pte_bytes_per_frame (per_pipe) = MetaPTEBytesFrame = : %i\n", *MetaPTEBytesFrame);
1817
1818 if (HostVMEnable == true) {
1819 *PixelPTEBytesPerRow = *PixelPTEBytesPerRow * (1 + 8 * HostVMDynamicLevels);
1820 }
1821
1822 if (HostVMEnable == true) {
1823 *vm_group_bytes = 512;
1824 *dpte_group_bytes = 512;
1825 } else if (GPUVMEnable == true) {
1826 *vm_group_bytes = 2048;
1827 if (SurfaceTiling != dm_sw_linear && PixelPTEReqHeightPTEs == 1 && ScanDirection == dm_vert) {
1828 *dpte_group_bytes = 512;
1829 } else {
1830 *dpte_group_bytes = 2048;
1831 }
1832 } else {
1833 *vm_group_bytes = 0;
1834 *dpte_group_bytes = 0;
1835 }
1836
1837 return PDEAndMetaPTEBytesFrame;
1838 }
1839
DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(struct display_mode_lib * mode_lib)1840 static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(
1841 struct display_mode_lib *mode_lib)
1842 {
1843 struct vba_vars_st *v = &mode_lib->vba;
1844 unsigned int j, k;
1845 long ReorderBytes = 0;
1846 unsigned int PrefetchMode = v->PrefetchModePerState[v->VoltageLevel][v->maxMpcComb];
1847 double MaxTotalRDBandwidth = 0;
1848 double MaxTotalRDBandwidthNoUrgentBurst = 0;
1849 bool DestinationLineTimesForPrefetchLessThan2 = false;
1850 bool VRatioPrefetchMoreThan4 = false;
1851 double TWait;
1852
1853 v->WritebackDISPCLK = 0.0;
1854 v->DISPCLKWithRamping = 0;
1855 v->DISPCLKWithoutRamping = 0;
1856 v->GlobalDPPCLK = 0.0;
1857 /* DAL custom code: need to update ReturnBW in case min dcfclk is overriden */
1858 v->IdealSDPPortBandwidthPerState[v->VoltageLevel][v->maxMpcComb] = dml_min3(
1859 v->ReturnBusWidth * v->DCFCLK,
1860 v->DRAMSpeedPerState[v->VoltageLevel] * v->NumberOfChannels * v->DRAMChannelWidth,
1861 v->FabricClockPerState[v->VoltageLevel] * v->FabricDatapathToDCNDataReturn);
1862 if (v->HostVMEnable != true) {
1863 v->ReturnBW = v->IdealSDPPortBandwidthPerState[v->VoltageLevel][v->maxMpcComb] * v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelDataOnly / 100;
1864 } else {
1865 v->ReturnBW = v->IdealSDPPortBandwidthPerState[v->VoltageLevel][v->maxMpcComb] * v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData / 100;
1866 }
1867 /* End DAL custom code */
1868
1869 // DISPCLK and DPPCLK Calculation
1870 //
1871 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
1872 if (v->WritebackEnable[k]) {
1873 v->WritebackDISPCLK = dml_max(v->WritebackDISPCLK,
1874 dml30_CalculateWriteBackDISPCLK(
1875 v->WritebackPixelFormat[k],
1876 v->PixelClock[k],
1877 v->WritebackHRatio[k],
1878 v->WritebackVRatio[k],
1879 v->WritebackHTaps[k],
1880 v->WritebackVTaps[k],
1881 v->WritebackSourceWidth[k],
1882 v->WritebackDestinationWidth[k],
1883 v->HTotal[k],
1884 v->WritebackLineBufferSize));
1885 }
1886 }
1887
1888 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
1889 if (v->HRatio[k] > 1) {
1890 v->PSCL_THROUGHPUT_LUMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput,
1891 v->MaxPSCLToLBThroughput * v->HRatio[k] / dml_ceil(v->htaps[k] / 6.0, 1));
1892 } else {
1893 v->PSCL_THROUGHPUT_LUMA[k] = dml_min(
1894 v->MaxDCHUBToPSCLThroughput,
1895 v->MaxPSCLToLBThroughput);
1896 }
1897
1898 v->DPPCLKUsingSingleDPPLuma = v->PixelClock[k]
1899 * dml_max(v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]),
1900 dml_max(v->HRatio[k] * v->VRatio[k] / v->PSCL_THROUGHPUT_LUMA[k], 1.0));
1901
1902 if ((v->htaps[k] > 6 || v->vtaps[k] > 6)
1903 && v->DPPCLKUsingSingleDPPLuma < 2 * v->PixelClock[k]) {
1904 v->DPPCLKUsingSingleDPPLuma = 2 * v->PixelClock[k];
1905 }
1906
1907 if ((v->SourcePixelFormat[k] != dm_420_8
1908 && v->SourcePixelFormat[k] != dm_420_10
1909 && v->SourcePixelFormat[k] != dm_420_12
1910 && v->SourcePixelFormat[k] != dm_rgbe_alpha)) {
1911 v->PSCL_THROUGHPUT_CHROMA[k] = 0.0;
1912 v->DPPCLKUsingSingleDPP[k] = v->DPPCLKUsingSingleDPPLuma;
1913 } else {
1914 if (v->HRatioChroma[k] > 1) {
1915 v->PSCL_THROUGHPUT_CHROMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput,
1916 v->MaxPSCLToLBThroughput * v->HRatioChroma[k] / dml_ceil(v->HTAPsChroma[k] / 6.0, 1.0));
1917 } else {
1918 v->PSCL_THROUGHPUT_CHROMA[k] = dml_min(
1919 v->MaxDCHUBToPSCLThroughput,
1920 v->MaxPSCLToLBThroughput);
1921 }
1922 v->DPPCLKUsingSingleDPPChroma = v->PixelClock[k]
1923 * dml_max3(v->VTAPsChroma[k] / 6.0 * dml_min(1.0, v->HRatioChroma[k]),
1924 v->HRatioChroma[k] * v->VRatioChroma[k] / v->PSCL_THROUGHPUT_CHROMA[k], 1.0);
1925
1926 if ((v->HTAPsChroma[k] > 6 || v->VTAPsChroma[k] > 6)
1927 && v->DPPCLKUsingSingleDPPChroma
1928 < 2 * v->PixelClock[k]) {
1929 v->DPPCLKUsingSingleDPPChroma = 2
1930 * v->PixelClock[k];
1931 }
1932
1933 v->DPPCLKUsingSingleDPP[k] = dml_max(
1934 v->DPPCLKUsingSingleDPPLuma,
1935 v->DPPCLKUsingSingleDPPChroma);
1936 }
1937 }
1938
1939 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
1940 if (v->BlendingAndTiming[k] != k)
1941 continue;
1942 if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1) {
1943 v->DISPCLKWithRamping = dml_max(v->DISPCLKWithRamping,
1944 v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100)
1945 * (1 + v->DISPCLKRampingMargin / 100));
1946 v->DISPCLKWithoutRamping = dml_max(v->DISPCLKWithoutRamping,
1947 v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100));
1948 } else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) {
1949 v->DISPCLKWithRamping = dml_max(v->DISPCLKWithRamping,
1950 v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100)
1951 * (1 + v->DISPCLKRampingMargin / 100));
1952 v->DISPCLKWithoutRamping = dml_max(v->DISPCLKWithoutRamping,
1953 v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100));
1954 } else {
1955 v->DISPCLKWithRamping = dml_max(v->DISPCLKWithRamping,
1956 v->PixelClock[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100)
1957 * (1 + v->DISPCLKRampingMargin / 100));
1958 v->DISPCLKWithoutRamping = dml_max(v->DISPCLKWithoutRamping,
1959 v->PixelClock[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100));
1960 }
1961 }
1962
1963 v->DISPCLKWithRamping = dml_max(
1964 v->DISPCLKWithRamping,
1965 v->WritebackDISPCLK);
1966 v->DISPCLKWithoutRamping = dml_max(
1967 v->DISPCLKWithoutRamping,
1968 v->WritebackDISPCLK);
1969
1970 ASSERT(v->DISPCLKDPPCLKVCOSpeed != 0);
1971 v->DISPCLKWithRampingRoundedToDFSGranularity = RoundToDFSGranularityUp(
1972 v->DISPCLKWithRamping,
1973 v->DISPCLKDPPCLKVCOSpeed);
1974 v->DISPCLKWithoutRampingRoundedToDFSGranularity = RoundToDFSGranularityUp(
1975 v->DISPCLKWithoutRamping,
1976 v->DISPCLKDPPCLKVCOSpeed);
1977 v->MaxDispclkRoundedToDFSGranularity = RoundToDFSGranularityDown(
1978 v->soc.clock_limits[mode_lib->soc.num_states - 1].dispclk_mhz,
1979 v->DISPCLKDPPCLKVCOSpeed);
1980 if (v->DISPCLKWithoutRampingRoundedToDFSGranularity
1981 > v->MaxDispclkRoundedToDFSGranularity) {
1982 v->DISPCLK_calculated =
1983 v->DISPCLKWithoutRampingRoundedToDFSGranularity;
1984 } else if (v->DISPCLKWithRampingRoundedToDFSGranularity
1985 > v->MaxDispclkRoundedToDFSGranularity) {
1986 v->DISPCLK_calculated = v->MaxDispclkRoundedToDFSGranularity;
1987 } else {
1988 v->DISPCLK_calculated =
1989 v->DISPCLKWithRampingRoundedToDFSGranularity;
1990 }
1991 v->DISPCLK = v->DISPCLK_calculated;
1992 DTRACE(" dispclk_mhz (calculated) = %f", v->DISPCLK_calculated);
1993
1994 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
1995 v->DPPCLK_calculated[k] = v->DPPCLKUsingSingleDPP[k]
1996 / v->DPPPerPlane[k]
1997 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
1998 v->GlobalDPPCLK = dml_max(
1999 v->GlobalDPPCLK,
2000 v->DPPCLK_calculated[k]);
2001 }
2002 v->GlobalDPPCLK = RoundToDFSGranularityUp(
2003 v->GlobalDPPCLK,
2004 v->DISPCLKDPPCLKVCOSpeed);
2005 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2006 v->DPPCLK_calculated[k] = v->GlobalDPPCLK / 255
2007 * dml_ceil(
2008 v->DPPCLK_calculated[k] * 255.0
2009 / v->GlobalDPPCLK,
2010 1);
2011 DTRACE(" dppclk_mhz[%i] (calculated) = %f", k, v->DPPCLK_calculated[k]);
2012 v->DPPCLK[k] = v->DPPCLK_calculated[k];
2013 }
2014
2015 // Urgent and B P-State/DRAM Clock Change Watermark
2016 DTRACE(" dcfclk_mhz = %f", v->DCFCLK);
2017 DTRACE(" return_bus_bw = %f", v->ReturnBW);
2018
2019 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2020 dml30_CalculateBytePerPixelAnd256BBlockSizes(
2021 v->SourcePixelFormat[k],
2022 v->SurfaceTiling[k],
2023 &v->BytePerPixelY[k],
2024 &v->BytePerPixelC[k],
2025 &v->BytePerPixelDETY[k],
2026 &v->BytePerPixelDETC[k],
2027 &v->BlockHeight256BytesY[k],
2028 &v->BlockHeight256BytesC[k],
2029 &v->BlockWidth256BytesY[k],
2030 &v->BlockWidth256BytesC[k]);
2031 }
2032
2033 CalculateSwathWidth(
2034 false,
2035 v->NumberOfActivePlanes,
2036 v->SourcePixelFormat,
2037 v->SourceScan,
2038 v->ViewportWidth,
2039 v->ViewportHeight,
2040 v->SurfaceWidthY,
2041 v->SurfaceWidthC,
2042 v->SurfaceHeightY,
2043 v->SurfaceHeightC,
2044 v->ODMCombineEnabled,
2045 v->BytePerPixelY,
2046 v->BytePerPixelC,
2047 v->BlockHeight256BytesY,
2048 v->BlockHeight256BytesC,
2049 v->BlockWidth256BytesY,
2050 v->BlockWidth256BytesC,
2051 v->BlendingAndTiming,
2052 v->HActive,
2053 v->HRatio,
2054 v->DPPPerPlane,
2055 v->SwathWidthSingleDPPY,
2056 v->SwathWidthSingleDPPC,
2057 v->SwathWidthY,
2058 v->SwathWidthC,
2059 v->dummyinteger3,
2060 v->dummyinteger4,
2061 v->swath_width_luma_ub,
2062 v->swath_width_chroma_ub);
2063
2064
2065 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2066 v->ReadBandwidthPlaneLuma[k] = v->SwathWidthSingleDPPY[k] * v->BytePerPixelY[k] / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k];
2067 v->ReadBandwidthPlaneChroma[k] = v->SwathWidthSingleDPPC[k] * v->BytePerPixelC[k] / (v->HTotal[k] / v->PixelClock[k]) * v->VRatioChroma[k];
2068 DTRACE("read_bw[%i] = %fBps", k, v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k]);
2069 }
2070
2071
2072 // DCFCLK Deep Sleep
2073 CalculateDCFCLKDeepSleep(
2074 mode_lib,
2075 v->NumberOfActivePlanes,
2076 v->BytePerPixelY,
2077 v->BytePerPixelC,
2078 v->VRatio,
2079 v->VRatioChroma,
2080 v->SwathWidthY,
2081 v->SwathWidthC,
2082 v->DPPPerPlane,
2083 v->HRatio,
2084 v->HRatioChroma,
2085 v->PixelClock,
2086 v->PSCL_THROUGHPUT_LUMA,
2087 v->PSCL_THROUGHPUT_CHROMA,
2088 v->DPPCLK,
2089 v->ReadBandwidthPlaneLuma,
2090 v->ReadBandwidthPlaneChroma,
2091 v->ReturnBusWidth,
2092 &v->DCFCLKDeepSleep);
2093
2094 // DSCCLK
2095 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2096 if ((v->BlendingAndTiming[k] != k) || !v->DSCEnabled[k]) {
2097 v->DSCCLK_calculated[k] = 0.0;
2098 } else {
2099 if (v->OutputFormat[k] == dm_420)
2100 v->DSCFormatFactor = 2;
2101 else if (v->OutputFormat[k] == dm_444)
2102 v->DSCFormatFactor = 1;
2103 else if (v->OutputFormat[k] == dm_n422)
2104 v->DSCFormatFactor = 2;
2105 else
2106 v->DSCFormatFactor = 1;
2107 if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1)
2108 v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 12
2109 / v->DSCFormatFactor / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2110 else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1)
2111 v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 6
2112 / v->DSCFormatFactor / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2113 else
2114 v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 3
2115 / v->DSCFormatFactor / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2116 }
2117 }
2118
2119 // DSC Delay
2120 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2121 double BPP = v->OutputBppPerState[k][v->VoltageLevel];
2122
2123 if (v->DSCEnabled[k] && BPP != 0) {
2124 if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_disabled) {
2125 v->DSCDelay[k] = dscceComputeDelay(v->DSCInputBitPerComponent[k],
2126 BPP,
2127 dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1),
2128 v->NumberOfDSCSlices[k],
2129 v->OutputFormat[k],
2130 v->Output[k])
2131 + dscComputeDelay(v->OutputFormat[k], v->Output[k]);
2132 } else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) {
2133 v->DSCDelay[k] = 2 * dscceComputeDelay(v->DSCInputBitPerComponent[k],
2134 BPP,
2135 dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1),
2136 v->NumberOfDSCSlices[k] / 2.0,
2137 v->OutputFormat[k],
2138 v->Output[k])
2139 + dscComputeDelay(v->OutputFormat[k], v->Output[k]);
2140 } else {
2141 v->DSCDelay[k] = 4 * dscceComputeDelay(v->DSCInputBitPerComponent[k],
2142 BPP,
2143 dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1),
2144 v->NumberOfDSCSlices[k] / 4.0,
2145 v->OutputFormat[k],
2146 v->Output[k])
2147 + dscComputeDelay(v->OutputFormat[k], v->Output[k]);
2148 }
2149 v->DSCDelay[k] = v->DSCDelay[k] * v->PixelClock[k] / v->PixelClockBackEnd[k];
2150 } else {
2151 v->DSCDelay[k] = 0;
2152 }
2153 }
2154
2155 for (k = 0; k < v->NumberOfActivePlanes; ++k)
2156 for (j = 0; j < v->NumberOfActivePlanes; ++j) // NumberOfPlanes
2157 if (j != k && v->BlendingAndTiming[k] == j
2158 && v->DSCEnabled[j])
2159 v->DSCDelay[k] = v->DSCDelay[j];
2160
2161 // Prefetch
2162 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2163 unsigned int PDEAndMetaPTEBytesFrameY = 0;
2164 unsigned int PixelPTEBytesPerRowY = 0;
2165 unsigned int MetaRowByteY = 0;
2166 unsigned int MetaRowByteC = 0;
2167 unsigned int PDEAndMetaPTEBytesFrameC = 0;
2168 unsigned int PixelPTEBytesPerRowC = 0;
2169 bool PTEBufferSizeNotExceededY = 0;
2170 bool PTEBufferSizeNotExceededC = 0;
2171
2172
2173 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12 || v->SourcePixelFormat[k] == dm_rgbe_alpha) {
2174 if ((v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12) && v->SourceScan[k] != dm_vert) {
2175 v->PTEBufferSizeInRequestsForLuma = (v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma) / 2;
2176 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsForLuma;
2177 } else {
2178 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma;
2179 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsChroma;
2180
2181 }
2182 PDEAndMetaPTEBytesFrameC = CalculateVMAndRowBytes(
2183 mode_lib,
2184 v->DCCEnable[k],
2185 v->BlockHeight256BytesC[k],
2186 v->BlockWidth256BytesC[k],
2187 v->SourcePixelFormat[k],
2188 v->SurfaceTiling[k],
2189 v->BytePerPixelC[k],
2190 v->SourceScan[k],
2191 v->SwathWidthC[k],
2192 v->ViewportHeightChroma[k],
2193 v->GPUVMEnable,
2194 v->HostVMEnable,
2195 v->HostVMMaxNonCachedPageTableLevels,
2196 v->GPUVMMinPageSize,
2197 v->HostVMMinPageSize,
2198 v->PTEBufferSizeInRequestsForChroma,
2199 v->PitchC[k],
2200 v->DCCMetaPitchC[k],
2201 &v->MacroTileWidthC[k],
2202 &MetaRowByteC,
2203 &PixelPTEBytesPerRowC,
2204 &PTEBufferSizeNotExceededC,
2205 &v->dpte_row_width_chroma_ub[k],
2206 &v->dpte_row_height_chroma[k],
2207 &v->meta_req_width_chroma[k],
2208 &v->meta_req_height_chroma[k],
2209 &v->meta_row_width_chroma[k],
2210 &v->meta_row_height_chroma[k],
2211 &v->dummyinteger1,
2212 &v->dummyinteger2,
2213 &v->PixelPTEReqWidthC[k],
2214 &v->PixelPTEReqHeightC[k],
2215 &v->PTERequestSizeC[k],
2216 &v->dpde0_bytes_per_frame_ub_c[k],
2217 &v->meta_pte_bytes_per_frame_ub_c[k]);
2218
2219 v->PrefetchSourceLinesC[k] = CalculatePrefetchSourceLines(
2220 mode_lib,
2221 v->VRatioChroma[k],
2222 v->VTAPsChroma[k],
2223 v->Interlace[k],
2224 v->ProgressiveToInterlaceUnitInOPP,
2225 v->SwathHeightC[k],
2226 v->ViewportYStartC[k],
2227 &v->VInitPreFillC[k],
2228 &v->MaxNumSwathC[k]);
2229 } else {
2230 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma;
2231 v->PTEBufferSizeInRequestsForChroma = 0;
2232 PixelPTEBytesPerRowC = 0;
2233 PDEAndMetaPTEBytesFrameC = 0;
2234 MetaRowByteC = 0;
2235 v->MaxNumSwathC[k] = 0;
2236 v->PrefetchSourceLinesC[k] = 0;
2237 }
2238
2239 PDEAndMetaPTEBytesFrameY = CalculateVMAndRowBytes(
2240 mode_lib,
2241 v->DCCEnable[k],
2242 v->BlockHeight256BytesY[k],
2243 v->BlockWidth256BytesY[k],
2244 v->SourcePixelFormat[k],
2245 v->SurfaceTiling[k],
2246 v->BytePerPixelY[k],
2247 v->SourceScan[k],
2248 v->SwathWidthY[k],
2249 v->ViewportHeight[k],
2250 v->GPUVMEnable,
2251 v->HostVMEnable,
2252 v->HostVMMaxNonCachedPageTableLevels,
2253 v->GPUVMMinPageSize,
2254 v->HostVMMinPageSize,
2255 v->PTEBufferSizeInRequestsForLuma,
2256 v->PitchY[k],
2257 v->DCCMetaPitchY[k],
2258 &v->MacroTileWidthY[k],
2259 &MetaRowByteY,
2260 &PixelPTEBytesPerRowY,
2261 &PTEBufferSizeNotExceededY,
2262 &v->dpte_row_width_luma_ub[k],
2263 &v->dpte_row_height[k],
2264 &v->meta_req_width[k],
2265 &v->meta_req_height[k],
2266 &v->meta_row_width[k],
2267 &v->meta_row_height[k],
2268 &v->vm_group_bytes[k],
2269 &v->dpte_group_bytes[k],
2270 &v->PixelPTEReqWidthY[k],
2271 &v->PixelPTEReqHeightY[k],
2272 &v->PTERequestSizeY[k],
2273 &v->dpde0_bytes_per_frame_ub_l[k],
2274 &v->meta_pte_bytes_per_frame_ub_l[k]);
2275
2276 v->PrefetchSourceLinesY[k] = CalculatePrefetchSourceLines(
2277 mode_lib,
2278 v->VRatio[k],
2279 v->vtaps[k],
2280 v->Interlace[k],
2281 v->ProgressiveToInterlaceUnitInOPP,
2282 v->SwathHeightY[k],
2283 v->ViewportYStartY[k],
2284 &v->VInitPreFillY[k],
2285 &v->MaxNumSwathY[k]);
2286 v->PixelPTEBytesPerRow[k] = PixelPTEBytesPerRowY + PixelPTEBytesPerRowC;
2287 v->PDEAndMetaPTEBytesFrame[k] = PDEAndMetaPTEBytesFrameY
2288 + PDEAndMetaPTEBytesFrameC;
2289 v->MetaRowByte[k] = MetaRowByteY + MetaRowByteC;
2290
2291 CalculateRowBandwidth(
2292 v->GPUVMEnable,
2293 v->SourcePixelFormat[k],
2294 v->VRatio[k],
2295 v->VRatioChroma[k],
2296 v->DCCEnable[k],
2297 v->HTotal[k] / v->PixelClock[k],
2298 MetaRowByteY,
2299 MetaRowByteC,
2300 v->meta_row_height[k],
2301 v->meta_row_height_chroma[k],
2302 PixelPTEBytesPerRowY,
2303 PixelPTEBytesPerRowC,
2304 v->dpte_row_height[k],
2305 v->dpte_row_height_chroma[k],
2306 &v->meta_row_bw[k],
2307 &v->dpte_row_bw[k]);
2308 }
2309
2310 v->TotalDCCActiveDPP = 0;
2311 v->TotalActiveDPP = 0;
2312 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2313 v->TotalActiveDPP = v->TotalActiveDPP
2314 + v->DPPPerPlane[k];
2315 if (v->DCCEnable[k])
2316 v->TotalDCCActiveDPP = v->TotalDCCActiveDPP
2317 + v->DPPPerPlane[k];
2318 }
2319
2320
2321 ReorderBytes = v->NumberOfChannels * dml_max3(
2322 v->UrgentOutOfOrderReturnPerChannelPixelDataOnly,
2323 v->UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData,
2324 v->UrgentOutOfOrderReturnPerChannelVMDataOnly);
2325
2326 v->UrgentExtraLatency = CalculateExtraLatency(
2327 v->RoundTripPingLatencyCycles,
2328 ReorderBytes,
2329 v->DCFCLK,
2330 v->TotalActiveDPP,
2331 v->PixelChunkSizeInKByte,
2332 v->TotalDCCActiveDPP,
2333 v->MetaChunkSize,
2334 v->ReturnBW,
2335 v->GPUVMEnable,
2336 v->HostVMEnable,
2337 v->NumberOfActivePlanes,
2338 v->DPPPerPlane,
2339 v->dpte_group_bytes,
2340 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
2341 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
2342 v->HostVMMinPageSize,
2343 v->HostVMMaxNonCachedPageTableLevels);
2344
2345 v->TCalc = 24.0 / v->DCFCLKDeepSleep;
2346
2347 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2348 if (v->BlendingAndTiming[k] == k) {
2349 if (v->WritebackEnable[k] == true) {
2350 v->WritebackDelay[v->VoltageLevel][k] = v->WritebackLatency +
2351 CalculateWriteBackDelay(v->WritebackPixelFormat[k],
2352 v->WritebackHRatio[k],
2353 v->WritebackVRatio[k],
2354 v->WritebackVTaps[k],
2355 v->WritebackDestinationWidth[k],
2356 v->WritebackDestinationHeight[k],
2357 v->WritebackSourceHeight[k],
2358 v->HTotal[k]) / v->DISPCLK;
2359 } else
2360 v->WritebackDelay[v->VoltageLevel][k] = 0;
2361 for (j = 0; j < v->NumberOfActivePlanes; ++j) {
2362 if (v->BlendingAndTiming[j] == k
2363 && v->WritebackEnable[j] == true) {
2364 v->WritebackDelay[v->VoltageLevel][k] = dml_max(v->WritebackDelay[v->VoltageLevel][k],
2365 v->WritebackLatency + CalculateWriteBackDelay(
2366 v->WritebackPixelFormat[j],
2367 v->WritebackHRatio[j],
2368 v->WritebackVRatio[j],
2369 v->WritebackVTaps[j],
2370 v->WritebackDestinationWidth[j],
2371 v->WritebackDestinationHeight[j],
2372 v->WritebackSourceHeight[j],
2373 v->HTotal[k]) / v->DISPCLK);
2374 }
2375 }
2376 }
2377 }
2378
2379 for (k = 0; k < v->NumberOfActivePlanes; ++k)
2380 for (j = 0; j < v->NumberOfActivePlanes; ++j)
2381 if (v->BlendingAndTiming[k] == j)
2382 v->WritebackDelay[v->VoltageLevel][k] = v->WritebackDelay[v->VoltageLevel][j];
2383
2384 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2385 v->MaxVStartupLines[k] = v->VTotal[k] - v->VActive[k] - dml_max(1.0, dml_ceil((double) v->WritebackDelay[v->VoltageLevel][k] / (v->HTotal[k] / v->PixelClock[k]), 1));
2386 }
2387
2388 v->MaximumMaxVStartupLines = 0;
2389 for (k = 0; k < v->NumberOfActivePlanes; ++k)
2390 v->MaximumMaxVStartupLines = dml_max(v->MaximumMaxVStartupLines, v->MaxVStartupLines[k]);
2391
2392 if (v->DRAMClockChangeLatencyOverride > 0.0) {
2393 v->FinalDRAMClockChangeLatency = v->DRAMClockChangeLatencyOverride;
2394 } else {
2395 v->FinalDRAMClockChangeLatency = v->DRAMClockChangeLatency;
2396 }
2397 v->UrgentLatency = CalculateUrgentLatency(v->UrgentLatencyPixelDataOnly, v->UrgentLatencyPixelMixedWithVMData, v->UrgentLatencyVMDataOnly, v->DoUrgentLatencyAdjustment, v->UrgentLatencyAdjustmentFabricClockComponent, v->UrgentLatencyAdjustmentFabricClockReference, v->FabricClock);
2398
2399
2400 v->FractionOfUrgentBandwidth = 0.0;
2401 v->FractionOfUrgentBandwidthImmediateFlip = 0.0;
2402
2403 v->VStartupLines = 13;
2404
2405 do {
2406 MaxTotalRDBandwidth = 0;
2407 MaxTotalRDBandwidthNoUrgentBurst = 0;
2408 DestinationLineTimesForPrefetchLessThan2 = false;
2409 VRatioPrefetchMoreThan4 = false;
2410 TWait = CalculateTWait(
2411 PrefetchMode,
2412 v->FinalDRAMClockChangeLatency,
2413 v->UrgentLatency,
2414 v->SREnterPlusExitTime);
2415
2416 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2417 Pipe myPipe = { 0 };
2418
2419 myPipe.DPPCLK = v->DPPCLK[k];
2420 myPipe.DISPCLK = v->DISPCLK;
2421 myPipe.PixelClock = v->PixelClock[k];
2422 myPipe.DCFCLKDeepSleep = v->DCFCLKDeepSleep;
2423 myPipe.DPPPerPlane = v->DPPPerPlane[k];
2424 myPipe.ScalerEnabled = v->ScalerEnabled[k];
2425 myPipe.SourceScan = v->SourceScan[k];
2426 myPipe.BlockWidth256BytesY = v->BlockWidth256BytesY[k];
2427 myPipe.BlockHeight256BytesY = v->BlockHeight256BytesY[k];
2428 myPipe.BlockWidth256BytesC = v->BlockWidth256BytesC[k];
2429 myPipe.BlockHeight256BytesC = v->BlockHeight256BytesC[k];
2430 myPipe.InterlaceEnable = v->Interlace[k];
2431 myPipe.NumberOfCursors = v->NumberOfCursors[k];
2432 myPipe.VBlank = v->VTotal[k] - v->VActive[k];
2433 myPipe.HTotal = v->HTotal[k];
2434 myPipe.DCCEnable = v->DCCEnable[k];
2435 myPipe.ODMCombineEnabled = !!v->ODMCombineEnabled[k];
2436
2437 v->ErrorResult[k] = CalculatePrefetchSchedule(
2438 mode_lib,
2439 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
2440 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
2441 &myPipe,
2442 v->DSCDelay[k],
2443 v->DPPCLKDelaySubtotal
2444 + v->DPPCLKDelayCNVCFormater,
2445 v->DPPCLKDelaySCL,
2446 v->DPPCLKDelaySCLLBOnly,
2447 v->DPPCLKDelayCNVCCursor,
2448 v->DISPCLKDelaySubtotal,
2449 (unsigned int) (v->SwathWidthY[k] / v->HRatio[k]),
2450 v->OutputFormat[k],
2451 v->MaxInterDCNTileRepeaters,
2452 dml_min(v->VStartupLines, v->MaxVStartupLines[k]),
2453 v->MaxVStartupLines[k],
2454 v->GPUVMMaxPageTableLevels,
2455 v->GPUVMEnable,
2456 v->HostVMEnable,
2457 v->HostVMMaxNonCachedPageTableLevels,
2458 v->HostVMMinPageSize,
2459 v->DynamicMetadataEnable[k],
2460 v->DynamicMetadataVMEnabled,
2461 v->DynamicMetadataLinesBeforeActiveRequired[k],
2462 v->DynamicMetadataTransmittedBytes[k],
2463 v->UrgentLatency,
2464 v->UrgentExtraLatency,
2465 v->TCalc,
2466 v->PDEAndMetaPTEBytesFrame[k],
2467 v->MetaRowByte[k],
2468 v->PixelPTEBytesPerRow[k],
2469 v->PrefetchSourceLinesY[k],
2470 v->SwathWidthY[k],
2471 v->BytePerPixelY[k],
2472 v->VInitPreFillY[k],
2473 v->MaxNumSwathY[k],
2474 v->PrefetchSourceLinesC[k],
2475 v->SwathWidthC[k],
2476 v->BytePerPixelC[k],
2477 v->VInitPreFillC[k],
2478 v->MaxNumSwathC[k],
2479 v->swath_width_luma_ub[k],
2480 v->swath_width_chroma_ub[k],
2481 v->SwathHeightY[k],
2482 v->SwathHeightC[k],
2483 TWait,
2484 v->ProgressiveToInterlaceUnitInOPP,
2485 &v->DSTXAfterScaler[k],
2486 &v->DSTYAfterScaler[k],
2487 &v->DestinationLinesForPrefetch[k],
2488 &v->PrefetchBandwidth[k],
2489 &v->DestinationLinesToRequestVMInVBlank[k],
2490 &v->DestinationLinesToRequestRowInVBlank[k],
2491 &v->VRatioPrefetchY[k],
2492 &v->VRatioPrefetchC[k],
2493 &v->RequiredPrefetchPixDataBWLuma[k],
2494 &v->RequiredPrefetchPixDataBWChroma[k],
2495 &v->NotEnoughTimeForDynamicMetadata[k],
2496 &v->Tno_bw[k],
2497 &v->prefetch_vmrow_bw[k],
2498 &v->Tdmdl_vm[k],
2499 &v->Tdmdl[k],
2500 &v->VUpdateOffsetPix[k],
2501 &v->VUpdateWidthPix[k],
2502 &v->VReadyOffsetPix[k]);
2503 if (v->BlendingAndTiming[k] == k) {
2504 double TotalRepeaterDelayTime = v->MaxInterDCNTileRepeaters * (2 / v->DPPCLK[k] + 3 / v->DISPCLK);
2505 v->VUpdateWidthPix[k] = (14 / v->DCFCLKDeepSleep + 12 / v->DPPCLK[k] + TotalRepeaterDelayTime) * v->PixelClock[k];
2506 v->VReadyOffsetPix[k] = dml_max(150.0 / v->DPPCLK[k], TotalRepeaterDelayTime + 20 / v->DCFCLKDeepSleep + 10 / v->DPPCLK[k]) * v->PixelClock[k];
2507 v->VUpdateOffsetPix[k] = dml_ceil(v->HTotal[k] / 4.0, 1);
2508 v->VStartup[k] = dml_min(v->VStartupLines, v->MaxVStartupLines[k]);
2509 } else {
2510 int x = v->BlendingAndTiming[k];
2511 double TotalRepeaterDelayTime = v->MaxInterDCNTileRepeaters * (2 / v->DPPCLK[k] + 3 / v->DISPCLK);
2512 v->VUpdateWidthPix[k] = (14 / v->DCFCLKDeepSleep + 12 / v->DPPCLK[k] + TotalRepeaterDelayTime) * v->PixelClock[x];
2513 v->VReadyOffsetPix[k] = dml_max(150.0 / v->DPPCLK[k], TotalRepeaterDelayTime + 20 / v->DCFCLKDeepSleep + 10 / v->DPPCLK[k]) * v->PixelClock[x];
2514 v->VUpdateOffsetPix[k] = dml_ceil(v->HTotal[x] / 4.0, 1);
2515 if (!v->MaxVStartupLines[x])
2516 v->MaxVStartupLines[x] = v->MaxVStartupLines[k];
2517 v->VStartup[k] = dml_min(v->VStartupLines, v->MaxVStartupLines[x]);
2518 }
2519 }
2520
2521 v->NotEnoughUrgentLatencyHiding[0][0] = false;
2522 v->NotEnoughUrgentLatencyHidingPre = false;
2523
2524 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2525 v->cursor_bw[k] = v->NumberOfCursors[k]
2526 * v->CursorWidth[k][0] * v->CursorBPP[k][0]
2527 / 8.0
2528 / (v->HTotal[k] / v->PixelClock[k])
2529 * v->VRatio[k];
2530 v->cursor_bw_pre[k] = v->NumberOfCursors[k]
2531 * v->CursorWidth[k][0] * v->CursorBPP[k][0]
2532 / 8.0
2533 / (v->HTotal[k] / v->PixelClock[k])
2534 * v->VRatioPrefetchY[k];
2535
2536 CalculateUrgentBurstFactor(
2537 v->swath_width_luma_ub[k],
2538 v->swath_width_chroma_ub[k],
2539 v->DETBufferSizeInKByte[0],
2540 v->SwathHeightY[k],
2541 v->SwathHeightC[k],
2542 v->HTotal[k] / v->PixelClock[k],
2543 v->UrgentLatency,
2544 v->CursorBufferSize,
2545 v->CursorWidth[k][0],
2546 v->CursorBPP[k][0],
2547 v->VRatio[k],
2548 v->VRatioChroma[k],
2549 v->BytePerPixelDETY[k],
2550 v->BytePerPixelDETC[k],
2551 v->DETBufferSizeY[k],
2552 v->DETBufferSizeC[k],
2553 &v->UrgentBurstFactorCursor[k],
2554 &v->UrgentBurstFactorLuma[k],
2555 &v->UrgentBurstFactorChroma[k],
2556 &v->NoUrgentLatencyHiding[k]);
2557
2558 CalculateUrgentBurstFactor(
2559 v->swath_width_luma_ub[k],
2560 v->swath_width_chroma_ub[k],
2561 v->DETBufferSizeInKByte[0],
2562 v->SwathHeightY[k],
2563 v->SwathHeightC[k],
2564 v->HTotal[k] / v->PixelClock[k],
2565 v->UrgentLatency,
2566 v->CursorBufferSize,
2567 v->CursorWidth[k][0],
2568 v->CursorBPP[k][0],
2569 v->VRatioPrefetchY[k],
2570 v->VRatioPrefetchC[k],
2571 v->BytePerPixelDETY[k],
2572 v->BytePerPixelDETC[k],
2573 v->DETBufferSizeY[k],
2574 v->DETBufferSizeC[k],
2575 &v->UrgentBurstFactorCursorPre[k],
2576 &v->UrgentBurstFactorLumaPre[k],
2577 &v->UrgentBurstFactorChromaPre[k],
2578 &v->NoUrgentLatencyHidingPre[k]);
2579
2580 MaxTotalRDBandwidth = MaxTotalRDBandwidth +
2581 dml_max3(v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2582 v->ReadBandwidthPlaneLuma[k] *
2583 v->UrgentBurstFactorLuma[k] +
2584 v->ReadBandwidthPlaneChroma[k] *
2585 v->UrgentBurstFactorChroma[k] +
2586 v->cursor_bw[k] *
2587 v->UrgentBurstFactorCursor[k] +
2588 v->DPPPerPlane[k] * (v->meta_row_bw[k] + v->dpte_row_bw[k]),
2589 v->DPPPerPlane[k] * (v->RequiredPrefetchPixDataBWLuma[k] * v->UrgentBurstFactorLumaPre[k] +
2590 v->RequiredPrefetchPixDataBWChroma[k] * v->UrgentBurstFactorChromaPre[k]) + v->cursor_bw_pre[k] *
2591 v->UrgentBurstFactorCursorPre[k]);
2592
2593 MaxTotalRDBandwidthNoUrgentBurst = MaxTotalRDBandwidthNoUrgentBurst +
2594 dml_max3(v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2595 v->ReadBandwidthPlaneLuma[k] +
2596 v->ReadBandwidthPlaneChroma[k] +
2597 v->cursor_bw[k] +
2598 v->DPPPerPlane[k] * (v->meta_row_bw[k] + v->dpte_row_bw[k]),
2599 v->DPPPerPlane[k] * (v->RequiredPrefetchPixDataBWLuma[k] + v->RequiredPrefetchPixDataBWChroma[k]) + v->cursor_bw_pre[k]);
2600
2601 if (v->DestinationLinesForPrefetch[k] < 2)
2602 DestinationLineTimesForPrefetchLessThan2 = true;
2603 if (v->VRatioPrefetchY[k] > 4 || v->VRatioPrefetchC[k] > 4)
2604 VRatioPrefetchMoreThan4 = true;
2605 if (v->NoUrgentLatencyHiding[k] == true)
2606 v->NotEnoughUrgentLatencyHiding[0][0] = true;
2607
2608 if (v->NoUrgentLatencyHidingPre[k] == true)
2609 v->NotEnoughUrgentLatencyHidingPre = true;
2610 }
2611 v->FractionOfUrgentBandwidth = MaxTotalRDBandwidthNoUrgentBurst / v->ReturnBW;
2612
2613
2614 if (MaxTotalRDBandwidth <= v->ReturnBW && v->NotEnoughUrgentLatencyHiding[0][0] == 0
2615 && v->NotEnoughUrgentLatencyHidingPre == 0 && !VRatioPrefetchMoreThan4
2616 && !DestinationLineTimesForPrefetchLessThan2)
2617 v->PrefetchModeSupported = true;
2618 else {
2619 v->PrefetchModeSupported = false;
2620 dml_print("DML: CalculatePrefetchSchedule ***failed***. Bandwidth violation. Results are NOT valid\n");
2621 dml_print("DML: MaxTotalRDBandwidth:%f AvailReturnBandwidth:%f\n", MaxTotalRDBandwidth, v->ReturnBW);
2622 dml_print("DML: VRatioPrefetch %s more than 4\n", (VRatioPrefetchMoreThan4) ? "is" : "is not");
2623 dml_print("DML: DestinationLines for Prefetch %s less than 2\n", (DestinationLineTimesForPrefetchLessThan2) ? "is" : "is not");
2624 }
2625
2626 if (v->PrefetchModeSupported == true && v->ImmediateFlipSupport == true) {
2627 v->BandwidthAvailableForImmediateFlip = v->ReturnBW;
2628 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2629 v->BandwidthAvailableForImmediateFlip =
2630 v->BandwidthAvailableForImmediateFlip
2631 - dml_max(
2632 v->ReadBandwidthPlaneLuma[k] * v->UrgentBurstFactorLuma[k]
2633 + v->ReadBandwidthPlaneChroma[k] * v->UrgentBurstFactorChroma[k]
2634 + v->cursor_bw[k] * v->UrgentBurstFactorCursor[k],
2635 v->DPPPerPlane[k] * (v->RequiredPrefetchPixDataBWLuma[k] * v->UrgentBurstFactorLumaPre[k] +
2636 v->RequiredPrefetchPixDataBWChroma[k] * v->UrgentBurstFactorChromaPre[k]) +
2637 v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
2638 }
2639
2640 v->TotImmediateFlipBytes = 0;
2641 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2642 v->TotImmediateFlipBytes = v->TotImmediateFlipBytes + v->DPPPerPlane[k] * (v->PDEAndMetaPTEBytesFrame[k] + v->MetaRowByte[k] + v->PixelPTEBytesPerRow[k]);
2643 }
2644 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2645 CalculateFlipSchedule(
2646 mode_lib,
2647 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
2648 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
2649 v->UrgentExtraLatency,
2650 v->UrgentLatency,
2651 v->GPUVMMaxPageTableLevels,
2652 v->HostVMEnable,
2653 v->HostVMMaxNonCachedPageTableLevels,
2654 v->GPUVMEnable,
2655 v->HostVMMinPageSize,
2656 v->PDEAndMetaPTEBytesFrame[k],
2657 v->MetaRowByte[k],
2658 v->PixelPTEBytesPerRow[k],
2659 v->BandwidthAvailableForImmediateFlip,
2660 v->TotImmediateFlipBytes,
2661 v->SourcePixelFormat[k],
2662 v->HTotal[k] / v->PixelClock[k],
2663 v->VRatio[k],
2664 v->VRatioChroma[k],
2665 v->Tno_bw[k],
2666 v->DCCEnable[k],
2667 v->dpte_row_height[k],
2668 v->meta_row_height[k],
2669 v->dpte_row_height_chroma[k],
2670 v->meta_row_height_chroma[k],
2671 &v->DestinationLinesToRequestVMInImmediateFlip[k],
2672 &v->DestinationLinesToRequestRowInImmediateFlip[k],
2673 &v->final_flip_bw[k],
2674 &v->ImmediateFlipSupportedForPipe[k]);
2675 }
2676 v->total_dcn_read_bw_with_flip = 0.0;
2677 v->total_dcn_read_bw_with_flip_no_urgent_burst = 0.0;
2678 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2679 v->total_dcn_read_bw_with_flip = v->total_dcn_read_bw_with_flip + dml_max3(
2680 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2681 v->DPPPerPlane[k] * v->final_flip_bw[k] +
2682 v->ReadBandwidthLuma[k] * v->UrgentBurstFactorLuma[k] +
2683 v->ReadBandwidthChroma[k] * v->UrgentBurstFactorChroma[k] +
2684 v->cursor_bw[k] * v->UrgentBurstFactorCursor[k],
2685 v->DPPPerPlane[k] * (v->final_flip_bw[k] +
2686 v->RequiredPrefetchPixDataBWLuma[k] * v->UrgentBurstFactorLumaPre[k] +
2687 v->RequiredPrefetchPixDataBWChroma[k] * v->UrgentBurstFactorChromaPre[k]) +
2688 v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
2689 v->total_dcn_read_bw_with_flip_no_urgent_burst =
2690 v->total_dcn_read_bw_with_flip_no_urgent_burst +
2691 dml_max3(v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2692 v->DPPPerPlane[k] * v->final_flip_bw[k] + v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k] + v->cursor_bw[k],
2693 v->DPPPerPlane[k] * (v->final_flip_bw[k] + v->RequiredPrefetchPixDataBWLuma[k] + v->RequiredPrefetchPixDataBWChroma[k]) + v->cursor_bw_pre[k]);
2694
2695 }
2696 v->FractionOfUrgentBandwidthImmediateFlip = v->total_dcn_read_bw_with_flip_no_urgent_burst / v->ReturnBW;
2697
2698 v->ImmediateFlipSupported = true;
2699 if (v->total_dcn_read_bw_with_flip > v->ReturnBW) {
2700 v->ImmediateFlipSupported = false;
2701 v->total_dcn_read_bw_with_flip = MaxTotalRDBandwidth;
2702 }
2703 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2704 if (v->ImmediateFlipSupportedForPipe[k] == false) {
2705 v->ImmediateFlipSupported = false;
2706 }
2707 }
2708 } else {
2709 v->ImmediateFlipSupported = false;
2710 }
2711
2712 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2713 if (v->ErrorResult[k] || v->NotEnoughTimeForDynamicMetadata[k]) {
2714 v->PrefetchModeSupported = false;
2715 dml_print("DML: CalculatePrefetchSchedule ***failed***. Prefetch schedule violation. Results are NOT valid\n");
2716 }
2717 }
2718
2719 v->VStartupLines = v->VStartupLines + 1;
2720 v->PrefetchModeSupported = (v->PrefetchModeSupported == true && ((!v->ImmediateFlipSupport &&
2721 !v->HostVMEnable && v->ImmediateFlipRequirement[0] != dm_immediate_flip_required) ||
2722 v->ImmediateFlipSupported)) ? true : false;
2723 } while (!v->PrefetchModeSupported && v->VStartupLines <= v->MaximumMaxVStartupLines);
2724 ASSERT(v->PrefetchModeSupported);
2725
2726 //Watermarks and NB P-State/DRAM Clock Change Support
2727 {
2728 enum clock_change_support DRAMClockChangeSupport = 0; // dummy
2729 CalculateWatermarksAndDRAMSpeedChangeSupport(
2730 mode_lib,
2731 PrefetchMode,
2732 v->NumberOfActivePlanes,
2733 v->MaxLineBufferLines,
2734 v->LineBufferSize,
2735 v->DPPOutputBufferPixels,
2736 v->DETBufferSizeInKByte[0],
2737 v->WritebackInterfaceBufferSize,
2738 v->DCFCLK,
2739 v->ReturnBW,
2740 v->GPUVMEnable,
2741 v->dpte_group_bytes,
2742 v->MetaChunkSize,
2743 v->UrgentLatency,
2744 v->UrgentExtraLatency,
2745 v->WritebackLatency,
2746 v->WritebackChunkSize,
2747 v->SOCCLK,
2748 v->FinalDRAMClockChangeLatency,
2749 v->SRExitTime,
2750 v->SREnterPlusExitTime,
2751 v->DCFCLKDeepSleep,
2752 v->DPPPerPlane,
2753 v->DCCEnable,
2754 v->DPPCLK,
2755 v->DETBufferSizeY,
2756 v->DETBufferSizeC,
2757 v->SwathHeightY,
2758 v->SwathHeightC,
2759 v->LBBitPerPixel,
2760 v->SwathWidthY,
2761 v->SwathWidthC,
2762 v->HRatio,
2763 v->HRatioChroma,
2764 v->vtaps,
2765 v->VTAPsChroma,
2766 v->VRatio,
2767 v->VRatioChroma,
2768 v->HTotal,
2769 v->PixelClock,
2770 v->BlendingAndTiming,
2771 v->BytePerPixelDETY,
2772 v->BytePerPixelDETC,
2773 v->DSTXAfterScaler,
2774 v->DSTYAfterScaler,
2775 v->WritebackEnable,
2776 v->WritebackPixelFormat,
2777 v->WritebackDestinationWidth,
2778 v->WritebackDestinationHeight,
2779 v->WritebackSourceHeight,
2780 &DRAMClockChangeSupport,
2781 &v->UrgentWatermark,
2782 &v->WritebackUrgentWatermark,
2783 &v->DRAMClockChangeWatermark,
2784 &v->WritebackDRAMClockChangeWatermark,
2785 &v->StutterExitWatermark,
2786 &v->StutterEnterPlusExitWatermark,
2787 &v->MinActiveDRAMClockChangeLatencySupported);
2788
2789 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2790 if (v->WritebackEnable[k] == true) {
2791 if (v->BlendingAndTiming[k] == k) {
2792 v->ThisVStartup = v->VStartup[k];
2793 } else {
2794 for (j = 0; j < v->NumberOfActivePlanes; ++j) {
2795 if (v->BlendingAndTiming[k] == j) {
2796 v->ThisVStartup = v->VStartup[j];
2797 }
2798 }
2799 }
2800 v->WritebackAllowDRAMClockChangeEndPosition[k] = dml_max(0,
2801 v->ThisVStartup * v->HTotal[k] / v->PixelClock[k] - v->WritebackDRAMClockChangeWatermark);
2802 } else {
2803 v->WritebackAllowDRAMClockChangeEndPosition[k] = 0;
2804 }
2805 }
2806
2807 }
2808
2809
2810 //Display Pipeline Delivery Time in Prefetch, Groups
2811 CalculatePixelDeliveryTimes(
2812 v->NumberOfActivePlanes,
2813 v->VRatio,
2814 v->VRatioChroma,
2815 v->VRatioPrefetchY,
2816 v->VRatioPrefetchC,
2817 v->swath_width_luma_ub,
2818 v->swath_width_chroma_ub,
2819 v->DPPPerPlane,
2820 v->HRatio,
2821 v->HRatioChroma,
2822 v->PixelClock,
2823 v->PSCL_THROUGHPUT_LUMA,
2824 v->PSCL_THROUGHPUT_CHROMA,
2825 v->DPPCLK,
2826 v->BytePerPixelC,
2827 v->SourceScan,
2828 v->NumberOfCursors,
2829 v->CursorWidth,
2830 v->CursorBPP,
2831 v->BlockWidth256BytesY,
2832 v->BlockHeight256BytesY,
2833 v->BlockWidth256BytesC,
2834 v->BlockHeight256BytesC,
2835 v->DisplayPipeLineDeliveryTimeLuma,
2836 v->DisplayPipeLineDeliveryTimeChroma,
2837 v->DisplayPipeLineDeliveryTimeLumaPrefetch,
2838 v->DisplayPipeLineDeliveryTimeChromaPrefetch,
2839 v->DisplayPipeRequestDeliveryTimeLuma,
2840 v->DisplayPipeRequestDeliveryTimeChroma,
2841 v->DisplayPipeRequestDeliveryTimeLumaPrefetch,
2842 v->DisplayPipeRequestDeliveryTimeChromaPrefetch,
2843 v->CursorRequestDeliveryTime,
2844 v->CursorRequestDeliveryTimePrefetch);
2845
2846 CalculateMetaAndPTETimes(
2847 v->NumberOfActivePlanes,
2848 v->GPUVMEnable,
2849 v->MetaChunkSize,
2850 v->MinMetaChunkSizeBytes,
2851 v->HTotal,
2852 v->VRatio,
2853 v->VRatioChroma,
2854 v->DestinationLinesToRequestRowInVBlank,
2855 v->DestinationLinesToRequestRowInImmediateFlip,
2856 v->DCCEnable,
2857 v->PixelClock,
2858 v->BytePerPixelY,
2859 v->BytePerPixelC,
2860 v->SourceScan,
2861 v->dpte_row_height,
2862 v->dpte_row_height_chroma,
2863 v->meta_row_width,
2864 v->meta_row_width_chroma,
2865 v->meta_row_height,
2866 v->meta_row_height_chroma,
2867 v->meta_req_width,
2868 v->meta_req_width_chroma,
2869 v->meta_req_height,
2870 v->meta_req_height_chroma,
2871 v->dpte_group_bytes,
2872 v->PTERequestSizeY,
2873 v->PTERequestSizeC,
2874 v->PixelPTEReqWidthY,
2875 v->PixelPTEReqHeightY,
2876 v->PixelPTEReqWidthC,
2877 v->PixelPTEReqHeightC,
2878 v->dpte_row_width_luma_ub,
2879 v->dpte_row_width_chroma_ub,
2880 v->DST_Y_PER_PTE_ROW_NOM_L,
2881 v->DST_Y_PER_PTE_ROW_NOM_C,
2882 v->DST_Y_PER_META_ROW_NOM_L,
2883 v->DST_Y_PER_META_ROW_NOM_C,
2884 v->TimePerMetaChunkNominal,
2885 v->TimePerChromaMetaChunkNominal,
2886 v->TimePerMetaChunkVBlank,
2887 v->TimePerChromaMetaChunkVBlank,
2888 v->TimePerMetaChunkFlip,
2889 v->TimePerChromaMetaChunkFlip,
2890 v->time_per_pte_group_nom_luma,
2891 v->time_per_pte_group_vblank_luma,
2892 v->time_per_pte_group_flip_luma,
2893 v->time_per_pte_group_nom_chroma,
2894 v->time_per_pte_group_vblank_chroma,
2895 v->time_per_pte_group_flip_chroma);
2896
2897 CalculateVMGroupAndRequestTimes(
2898 v->NumberOfActivePlanes,
2899 v->GPUVMEnable,
2900 v->GPUVMMaxPageTableLevels,
2901 v->HTotal,
2902 v->BytePerPixelC,
2903 v->DestinationLinesToRequestVMInVBlank,
2904 v->DestinationLinesToRequestVMInImmediateFlip,
2905 v->DCCEnable,
2906 v->PixelClock,
2907 v->dpte_row_width_luma_ub,
2908 v->dpte_row_width_chroma_ub,
2909 v->vm_group_bytes,
2910 v->dpde0_bytes_per_frame_ub_l,
2911 v->dpde0_bytes_per_frame_ub_c,
2912 v->meta_pte_bytes_per_frame_ub_l,
2913 v->meta_pte_bytes_per_frame_ub_c,
2914 v->TimePerVMGroupVBlank,
2915 v->TimePerVMGroupFlip,
2916 v->TimePerVMRequestVBlank,
2917 v->TimePerVMRequestFlip);
2918
2919
2920 // Min TTUVBlank
2921 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2922 if (PrefetchMode == 0) {
2923 v->AllowDRAMClockChangeDuringVBlank[k] = true;
2924 v->AllowDRAMSelfRefreshDuringVBlank[k] = true;
2925 v->MinTTUVBlank[k] = dml_max(
2926 v->DRAMClockChangeWatermark,
2927 dml_max(
2928 v->StutterEnterPlusExitWatermark,
2929 v->UrgentWatermark));
2930 } else if (PrefetchMode == 1) {
2931 v->AllowDRAMClockChangeDuringVBlank[k] = false;
2932 v->AllowDRAMSelfRefreshDuringVBlank[k] = true;
2933 v->MinTTUVBlank[k] = dml_max(
2934 v->StutterEnterPlusExitWatermark,
2935 v->UrgentWatermark);
2936 } else {
2937 v->AllowDRAMClockChangeDuringVBlank[k] = false;
2938 v->AllowDRAMSelfRefreshDuringVBlank[k] = false;
2939 v->MinTTUVBlank[k] = v->UrgentWatermark;
2940 }
2941 if (!v->DynamicMetadataEnable[k])
2942 v->MinTTUVBlank[k] = v->TCalc
2943 + v->MinTTUVBlank[k];
2944 }
2945
2946 // DCC Configuration
2947 v->ActiveDPPs = 0;
2948 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2949 CalculateDCCConfiguration(v->DCCEnable[k], false, // We should always know the direction DCCProgrammingAssumesScanDirectionUnknown,
2950 v->SourcePixelFormat[k],
2951 v->SurfaceWidthY[k],
2952 v->SurfaceWidthC[k],
2953 v->SurfaceHeightY[k],
2954 v->SurfaceHeightC[k],
2955 v->DETBufferSizeInKByte[0] * 1024,
2956 v->BlockHeight256BytesY[k],
2957 v->BlockHeight256BytesC[k],
2958 v->SurfaceTiling[k],
2959 v->BytePerPixelY[k],
2960 v->BytePerPixelC[k],
2961 v->BytePerPixelDETY[k],
2962 v->BytePerPixelDETC[k],
2963 v->SourceScan[k],
2964 &v->DCCYMaxUncompressedBlock[k],
2965 &v->DCCCMaxUncompressedBlock[k],
2966 &v->DCCYMaxCompressedBlock[k],
2967 &v->DCCCMaxCompressedBlock[k],
2968 &v->DCCYIndependentBlock[k],
2969 &v->DCCCIndependentBlock[k]);
2970 }
2971
2972 {
2973 //Maximum Bandwidth Used
2974 v->TotalDataReadBandwidth = 0;
2975 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2976 v->TotalDataReadBandwidth = v->TotalDataReadBandwidth
2977 + v->ReadBandwidthPlaneLuma[k]
2978 + v->ReadBandwidthPlaneChroma[k];
2979 }
2980 }
2981
2982 // VStartup Margin
2983 v->VStartupMargin = 0;
2984 v->FirstMainPlane = true;
2985 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2986 if (v->BlendingAndTiming[k] == k) {
2987 double margin = (v->MaxVStartupLines[k] - v->VStartup[k]) * v->HTotal[k]
2988 / v->PixelClock[k];
2989 if (v->FirstMainPlane == true) {
2990 v->VStartupMargin = margin;
2991 v->FirstMainPlane = false;
2992 } else {
2993 v->VStartupMargin = dml_min(v->VStartupMargin, margin);
2994 }
2995 }
2996 }
2997
2998 // Stutter Efficiency
2999 CalculateStutterEfficiency(
3000 v->NumberOfActivePlanes,
3001 v->ROBBufferSizeInKByte,
3002 v->TotalDataReadBandwidth,
3003 v->DCFCLK,
3004 v->ReturnBW,
3005 v->SRExitTime,
3006 v->SynchronizedVBlank,
3007 v->DPPPerPlane,
3008 v->DETBufferSizeY,
3009 v->BytePerPixelY,
3010 v->BytePerPixelDETY,
3011 v->SwathWidthY,
3012 v->SwathHeightY,
3013 v->SwathHeightC,
3014 v->DCCRateLuma,
3015 v->DCCRateChroma,
3016 v->HTotal,
3017 v->VTotal,
3018 v->PixelClock,
3019 v->VRatio,
3020 v->SourceScan,
3021 v->BlockHeight256BytesY,
3022 v->BlockWidth256BytesY,
3023 v->BlockHeight256BytesC,
3024 v->BlockWidth256BytesC,
3025 v->DCCYMaxUncompressedBlock,
3026 v->DCCCMaxUncompressedBlock,
3027 v->VActive,
3028 v->DCCEnable,
3029 v->WritebackEnable,
3030 v->ReadBandwidthPlaneLuma,
3031 v->ReadBandwidthPlaneChroma,
3032 v->meta_row_bw,
3033 v->dpte_row_bw,
3034 &v->StutterEfficiencyNotIncludingVBlank,
3035 &v->StutterEfficiency,
3036 &v->StutterPeriod);
3037 }
3038
DisplayPipeConfiguration(struct display_mode_lib * mode_lib)3039 static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib)
3040 {
3041 // Display Pipe Configuration
3042 double BytePerPixDETY[DC__NUM_DPP__MAX] = { 0 };
3043 double BytePerPixDETC[DC__NUM_DPP__MAX] = { 0 };
3044 int BytePerPixY[DC__NUM_DPP__MAX] = { 0 };
3045 int BytePerPixC[DC__NUM_DPP__MAX] = { 0 };
3046 int Read256BytesBlockHeightY[DC__NUM_DPP__MAX] = { 0 };
3047 int Read256BytesBlockHeightC[DC__NUM_DPP__MAX] = { 0 };
3048 int Read256BytesBlockWidthY[DC__NUM_DPP__MAX] = { 0 };
3049 int Read256BytesBlockWidthC[DC__NUM_DPP__MAX] = { 0 };
3050 double dummy1[DC__NUM_DPP__MAX] = { 0 };
3051 double dummy2[DC__NUM_DPP__MAX] = { 0 };
3052 double dummy3[DC__NUM_DPP__MAX] = { 0 };
3053 double dummy4[DC__NUM_DPP__MAX] = { 0 };
3054 int dummy5[DC__NUM_DPP__MAX] = { 0 };
3055 int dummy6[DC__NUM_DPP__MAX] = { 0 };
3056 bool dummy7[DC__NUM_DPP__MAX] = { 0 };
3057 bool dummysinglestring = 0;
3058 unsigned int k;
3059
3060 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
3061
3062 dml30_CalculateBytePerPixelAnd256BBlockSizes(
3063 mode_lib->vba.SourcePixelFormat[k],
3064 mode_lib->vba.SurfaceTiling[k],
3065 &BytePerPixY[k],
3066 &BytePerPixC[k],
3067 &BytePerPixDETY[k],
3068 &BytePerPixDETC[k],
3069 &Read256BytesBlockHeightY[k],
3070 &Read256BytesBlockHeightC[k],
3071 &Read256BytesBlockWidthY[k],
3072 &Read256BytesBlockWidthC[k]);
3073 }
3074 CalculateSwathAndDETConfiguration(
3075 false,
3076 mode_lib->vba.NumberOfActivePlanes,
3077 mode_lib->vba.DETBufferSizeInKByte[0],
3078 dummy1,
3079 dummy2,
3080 mode_lib->vba.SourceScan,
3081 mode_lib->vba.SourcePixelFormat,
3082 mode_lib->vba.SurfaceTiling,
3083 mode_lib->vba.ViewportWidth,
3084 mode_lib->vba.ViewportHeight,
3085 mode_lib->vba.SurfaceWidthY,
3086 mode_lib->vba.SurfaceWidthC,
3087 mode_lib->vba.SurfaceHeightY,
3088 mode_lib->vba.SurfaceHeightC,
3089 Read256BytesBlockHeightY,
3090 Read256BytesBlockHeightC,
3091 Read256BytesBlockWidthY,
3092 Read256BytesBlockWidthC,
3093 mode_lib->vba.ODMCombineEnabled,
3094 mode_lib->vba.BlendingAndTiming,
3095 BytePerPixY,
3096 BytePerPixC,
3097 BytePerPixDETY,
3098 BytePerPixDETC,
3099 mode_lib->vba.HActive,
3100 mode_lib->vba.HRatio,
3101 mode_lib->vba.HRatioChroma,
3102 mode_lib->vba.DPPPerPlane,
3103 dummy5,
3104 dummy6,
3105 dummy3,
3106 dummy4,
3107 mode_lib->vba.SwathHeightY,
3108 mode_lib->vba.SwathHeightC,
3109 mode_lib->vba.DETBufferSizeY,
3110 mode_lib->vba.DETBufferSizeC,
3111 dummy7,
3112 &dummysinglestring);
3113 }
3114
dml30_CalculateBytePerPixelAnd256BBlockSizes(enum source_format_class SourcePixelFormat,enum dm_swizzle_mode SurfaceTiling,unsigned int * BytePerPixelY,unsigned int * BytePerPixelC,double * BytePerPixelDETY,double * BytePerPixelDETC,unsigned int * BlockHeight256BytesY,unsigned int * BlockHeight256BytesC,unsigned int * BlockWidth256BytesY,unsigned int * BlockWidth256BytesC)3115 void dml30_CalculateBytePerPixelAnd256BBlockSizes(
3116 enum source_format_class SourcePixelFormat,
3117 enum dm_swizzle_mode SurfaceTiling,
3118 unsigned int *BytePerPixelY,
3119 unsigned int *BytePerPixelC,
3120 double *BytePerPixelDETY,
3121 double *BytePerPixelDETC,
3122 unsigned int *BlockHeight256BytesY,
3123 unsigned int *BlockHeight256BytesC,
3124 unsigned int *BlockWidth256BytesY,
3125 unsigned int *BlockWidth256BytesC)
3126 {
3127 if (SourcePixelFormat == dm_444_64) {
3128 *BytePerPixelDETY = 8;
3129 *BytePerPixelDETC = 0;
3130 *BytePerPixelY = 8;
3131 *BytePerPixelC = 0;
3132 } else if (SourcePixelFormat == dm_444_32 || SourcePixelFormat == dm_rgbe) {
3133 *BytePerPixelDETY = 4;
3134 *BytePerPixelDETC = 0;
3135 *BytePerPixelY = 4;
3136 *BytePerPixelC = 0;
3137 } else if (SourcePixelFormat == dm_444_16) {
3138 *BytePerPixelDETY = 2;
3139 *BytePerPixelDETC = 0;
3140 *BytePerPixelY = 2;
3141 *BytePerPixelC = 0;
3142 } else if (SourcePixelFormat == dm_444_8) {
3143 *BytePerPixelDETY = 1;
3144 *BytePerPixelDETC = 0;
3145 *BytePerPixelY = 1;
3146 *BytePerPixelC = 0;
3147 } else if (SourcePixelFormat == dm_rgbe_alpha) {
3148 *BytePerPixelDETY = 4;
3149 *BytePerPixelDETC = 1;
3150 *BytePerPixelY = 4;
3151 *BytePerPixelC = 1;
3152 } else if (SourcePixelFormat == dm_420_8) {
3153 *BytePerPixelDETY = 1;
3154 *BytePerPixelDETC = 2;
3155 *BytePerPixelY = 1;
3156 *BytePerPixelC = 2;
3157 } else if (SourcePixelFormat == dm_420_12) {
3158 *BytePerPixelDETY = 2;
3159 *BytePerPixelDETC = 4;
3160 *BytePerPixelY = 2;
3161 *BytePerPixelC = 4;
3162 } else {
3163 *BytePerPixelDETY = 4.0 / 3;
3164 *BytePerPixelDETC = 8.0 / 3;
3165 *BytePerPixelY = 2;
3166 *BytePerPixelC = 4;
3167 }
3168
3169 if ((SourcePixelFormat == dm_444_64 || SourcePixelFormat == dm_444_32
3170 || SourcePixelFormat == dm_444_16 || SourcePixelFormat == dm_444_8
3171 || SourcePixelFormat == dm_mono_16 || SourcePixelFormat == dm_mono_8
3172 || SourcePixelFormat == dm_rgbe)) {
3173 if (SurfaceTiling == dm_sw_linear) {
3174 *BlockHeight256BytesY = 1;
3175 } else if (SourcePixelFormat == dm_444_64) {
3176 *BlockHeight256BytesY = 4;
3177 } else if (SourcePixelFormat == dm_444_8) {
3178 *BlockHeight256BytesY = 16;
3179 } else {
3180 *BlockHeight256BytesY = 8;
3181 }
3182 *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
3183 *BlockHeight256BytesC = 0;
3184 *BlockWidth256BytesC = 0;
3185 } else {
3186 if (SurfaceTiling == dm_sw_linear) {
3187 *BlockHeight256BytesY = 1;
3188 *BlockHeight256BytesC = 1;
3189 } else if (SourcePixelFormat == dm_rgbe_alpha) {
3190 *BlockHeight256BytesY = 8;
3191 *BlockHeight256BytesC = 16;
3192 } else if (SourcePixelFormat == dm_420_8) {
3193 *BlockHeight256BytesY = 16;
3194 *BlockHeight256BytesC = 8;
3195 } else {
3196 *BlockHeight256BytesY = 8;
3197 *BlockHeight256BytesC = 8;
3198 }
3199 *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
3200 *BlockWidth256BytesC = 256U / *BytePerPixelC / *BlockHeight256BytesC;
3201 }
3202 }
3203
CalculateTWait(unsigned int PrefetchMode,double DRAMClockChangeLatency,double UrgentLatency,double SREnterPlusExitTime)3204 static double CalculateTWait(
3205 unsigned int PrefetchMode,
3206 double DRAMClockChangeLatency,
3207 double UrgentLatency,
3208 double SREnterPlusExitTime)
3209 {
3210 if (PrefetchMode == 0) {
3211 return dml_max(DRAMClockChangeLatency + UrgentLatency,
3212 dml_max(SREnterPlusExitTime, UrgentLatency));
3213 } else if (PrefetchMode == 1) {
3214 return dml_max(SREnterPlusExitTime, UrgentLatency);
3215 } else {
3216 return UrgentLatency;
3217 }
3218 }
3219
dml30_CalculateWriteBackDISPCLK(enum source_format_class WritebackPixelFormat,double PixelClock,double WritebackHRatio,double WritebackVRatio,unsigned int WritebackHTaps,unsigned int WritebackVTaps,long WritebackSourceWidth,long WritebackDestinationWidth,unsigned int HTotal,unsigned int WritebackLineBufferSize)3220 double dml30_CalculateWriteBackDISPCLK(
3221 enum source_format_class WritebackPixelFormat,
3222 double PixelClock,
3223 double WritebackHRatio,
3224 double WritebackVRatio,
3225 unsigned int WritebackHTaps,
3226 unsigned int WritebackVTaps,
3227 long WritebackSourceWidth,
3228 long WritebackDestinationWidth,
3229 unsigned int HTotal,
3230 unsigned int WritebackLineBufferSize)
3231 {
3232 double DISPCLK_H = 0, DISPCLK_V = 0, DISPCLK_HB = 0;
3233
3234 DISPCLK_H = PixelClock * dml_ceil(WritebackHTaps / 8.0, 1) / WritebackHRatio;
3235 DISPCLK_V = PixelClock * (WritebackVTaps * dml_ceil(WritebackDestinationWidth / 6.0, 1) + 8.0) / HTotal;
3236 DISPCLK_HB = PixelClock * WritebackVTaps * (WritebackDestinationWidth * WritebackVTaps - WritebackLineBufferSize / 57.0) / 6.0 / WritebackSourceWidth;
3237 return dml_max3(DISPCLK_H, DISPCLK_V, DISPCLK_HB);
3238 }
3239
CalculateWriteBackDelay(enum source_format_class WritebackPixelFormat,double WritebackHRatio,double WritebackVRatio,unsigned int WritebackVTaps,long WritebackDestinationWidth,long WritebackDestinationHeight,long WritebackSourceHeight,unsigned int HTotal)3240 static double CalculateWriteBackDelay(
3241 enum source_format_class WritebackPixelFormat,
3242 double WritebackHRatio,
3243 double WritebackVRatio,
3244 unsigned int WritebackVTaps,
3245 long WritebackDestinationWidth,
3246 long WritebackDestinationHeight,
3247 long WritebackSourceHeight,
3248 unsigned int HTotal)
3249 {
3250 double CalculateWriteBackDelay = 0;
3251 double Line_length = 0;
3252 double Output_lines_last_notclamped = 0;
3253 double WritebackVInit = 0;
3254
3255 WritebackVInit = (WritebackVRatio + WritebackVTaps + 1) / 2;
3256 Line_length = dml_max((double) WritebackDestinationWidth, dml_ceil(WritebackDestinationWidth / 6.0, 1) * WritebackVTaps);
3257 Output_lines_last_notclamped = WritebackDestinationHeight - 1 - dml_ceil((WritebackSourceHeight - WritebackVInit) / WritebackVRatio, 1);
3258 if (Output_lines_last_notclamped < 0) {
3259 CalculateWriteBackDelay = 0;
3260 } else {
3261 CalculateWriteBackDelay = Output_lines_last_notclamped * Line_length + (HTotal - WritebackDestinationWidth) + 80;
3262 }
3263 return CalculateWriteBackDelay;
3264 }
3265
3266
CalculateDynamicMetadataParameters(int MaxInterDCNTileRepeaters,double DPPCLK,double DISPCLK,double DCFClkDeepSleep,double PixelClock,unsigned int HTotal,unsigned int VBlank,unsigned int DynamicMetadataTransmittedBytes,int DynamicMetadataLinesBeforeActiveRequired,int InterlaceEnable,bool ProgressiveToInterlaceUnitInOPP,double * Tsetup,double * Tdmbf,double * Tdmec,double * Tdmsks)3267 static void CalculateDynamicMetadataParameters(int MaxInterDCNTileRepeaters, double DPPCLK, double DISPCLK,
3268 double DCFClkDeepSleep, double PixelClock, unsigned int HTotal, unsigned int VBlank, unsigned int DynamicMetadataTransmittedBytes,
3269 int DynamicMetadataLinesBeforeActiveRequired, int InterlaceEnable, bool ProgressiveToInterlaceUnitInOPP,
3270 double *Tsetup, double *Tdmbf, double *Tdmec, double *Tdmsks)
3271 {
3272 double TotalRepeaterDelayTime = 0;
3273 double VUpdateWidthPix = 0;
3274 double VReadyOffsetPix = 0;
3275 double VUpdateOffsetPix = 0;
3276 TotalRepeaterDelayTime = MaxInterDCNTileRepeaters * (2 / DPPCLK + 3 / DISPCLK);
3277 VUpdateWidthPix = (14 / DCFClkDeepSleep + 12 / DPPCLK + TotalRepeaterDelayTime) * PixelClock;
3278 VReadyOffsetPix = dml_max(150.0 / DPPCLK, TotalRepeaterDelayTime + 20 / DCFClkDeepSleep + 10 / DPPCLK) * PixelClock;
3279 VUpdateOffsetPix = dml_ceil(HTotal / 4.0, 1);
3280 *Tsetup = (VUpdateOffsetPix + VUpdateWidthPix + VReadyOffsetPix) / PixelClock;
3281 *Tdmbf = DynamicMetadataTransmittedBytes / 4.0 / DISPCLK;
3282 *Tdmec = HTotal / PixelClock;
3283 if (DynamicMetadataLinesBeforeActiveRequired == 0) {
3284 *Tdmsks = VBlank * HTotal / PixelClock / 2.0;
3285 } else {
3286 *Tdmsks = DynamicMetadataLinesBeforeActiveRequired * HTotal / PixelClock;
3287 }
3288 if (InterlaceEnable == 1 && ProgressiveToInterlaceUnitInOPP == false) {
3289 *Tdmsks = *Tdmsks / 2;
3290 }
3291 }
3292
CalculateRowBandwidth(bool GPUVMEnable,enum source_format_class SourcePixelFormat,double VRatio,double VRatioChroma,bool DCCEnable,double LineTime,unsigned int MetaRowByteLuma,unsigned int MetaRowByteChroma,unsigned int meta_row_height_luma,unsigned int meta_row_height_chroma,unsigned int PixelPTEBytesPerRowLuma,unsigned int PixelPTEBytesPerRowChroma,unsigned int dpte_row_height_luma,unsigned int dpte_row_height_chroma,double * meta_row_bw,double * dpte_row_bw)3293 static void CalculateRowBandwidth(
3294 bool GPUVMEnable,
3295 enum source_format_class SourcePixelFormat,
3296 double VRatio,
3297 double VRatioChroma,
3298 bool DCCEnable,
3299 double LineTime,
3300 unsigned int MetaRowByteLuma,
3301 unsigned int MetaRowByteChroma,
3302 unsigned int meta_row_height_luma,
3303 unsigned int meta_row_height_chroma,
3304 unsigned int PixelPTEBytesPerRowLuma,
3305 unsigned int PixelPTEBytesPerRowChroma,
3306 unsigned int dpte_row_height_luma,
3307 unsigned int dpte_row_height_chroma,
3308 double *meta_row_bw,
3309 double *dpte_row_bw)
3310 {
3311 if (DCCEnable != true) {
3312 *meta_row_bw = 0;
3313 } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 || SourcePixelFormat == dm_rgbe_alpha) {
3314 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime)
3315 + VRatioChroma * MetaRowByteChroma
3316 / (meta_row_height_chroma * LineTime);
3317 } else {
3318 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime);
3319 }
3320
3321 if (GPUVMEnable != true) {
3322 *dpte_row_bw = 0;
3323 } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 || SourcePixelFormat == dm_rgbe_alpha) {
3324 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime)
3325 + VRatioChroma * PixelPTEBytesPerRowChroma
3326 / (dpte_row_height_chroma * LineTime);
3327 } else {
3328 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime);
3329 }
3330 }
3331
CalculateFlipSchedule(struct display_mode_lib * mode_lib,double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,double UrgentExtraLatency,double UrgentLatency,unsigned int GPUVMMaxPageTableLevels,bool HostVMEnable,unsigned int HostVMMaxNonCachedPageTableLevels,bool GPUVMEnable,double HostVMMinPageSize,double PDEAndMetaPTEBytesPerFrame,double MetaRowBytes,double DPTEBytesPerRow,double BandwidthAvailableForImmediateFlip,unsigned int TotImmediateFlipBytes,enum source_format_class SourcePixelFormat,double LineTime,double VRatio,double VRatioChroma,double Tno_bw,bool DCCEnable,unsigned int dpte_row_height,unsigned int meta_row_height,unsigned int dpte_row_height_chroma,unsigned int meta_row_height_chroma,double * DestinationLinesToRequestVMInImmediateFlip,double * DestinationLinesToRequestRowInImmediateFlip,double * final_flip_bw,bool * ImmediateFlipSupportedForPipe)3332 static void CalculateFlipSchedule(
3333 struct display_mode_lib *mode_lib,
3334 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
3335 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
3336 double UrgentExtraLatency,
3337 double UrgentLatency,
3338 unsigned int GPUVMMaxPageTableLevels,
3339 bool HostVMEnable,
3340 unsigned int HostVMMaxNonCachedPageTableLevels,
3341 bool GPUVMEnable,
3342 double HostVMMinPageSize,
3343 double PDEAndMetaPTEBytesPerFrame,
3344 double MetaRowBytes,
3345 double DPTEBytesPerRow,
3346 double BandwidthAvailableForImmediateFlip,
3347 unsigned int TotImmediateFlipBytes,
3348 enum source_format_class SourcePixelFormat,
3349 double LineTime,
3350 double VRatio,
3351 double VRatioChroma,
3352 double Tno_bw,
3353 bool DCCEnable,
3354 unsigned int dpte_row_height,
3355 unsigned int meta_row_height,
3356 unsigned int dpte_row_height_chroma,
3357 unsigned int meta_row_height_chroma,
3358 double *DestinationLinesToRequestVMInImmediateFlip,
3359 double *DestinationLinesToRequestRowInImmediateFlip,
3360 double *final_flip_bw,
3361 bool *ImmediateFlipSupportedForPipe)
3362 {
3363 double min_row_time = 0.0;
3364 unsigned int HostVMDynamicLevelsTrips = 0;
3365 double TimeForFetchingMetaPTEImmediateFlip = 0;
3366 double TimeForFetchingRowInVBlankImmediateFlip = 0;
3367 double ImmediateFlipBW = 0;
3368 double HostVMInefficiencyFactor = 0;
3369
3370 if (GPUVMEnable == true && HostVMEnable == true) {
3371 HostVMInefficiencyFactor = PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData / PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly;
3372 HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels;
3373 } else {
3374 HostVMInefficiencyFactor = 1;
3375 HostVMDynamicLevelsTrips = 0;
3376 }
3377
3378 if (GPUVMEnable == true || DCCEnable == true) {
3379 ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + DPTEBytesPerRow) * BandwidthAvailableForImmediateFlip / TotImmediateFlipBytes;
3380 }
3381
3382 if (GPUVMEnable == true) {
3383 TimeForFetchingMetaPTEImmediateFlip = dml_max3(Tno_bw + PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / ImmediateFlipBW,
3384 UrgentExtraLatency + UrgentLatency * (GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1), LineTime / 4.0);
3385 } else {
3386 TimeForFetchingMetaPTEImmediateFlip = 0;
3387 }
3388
3389 *DestinationLinesToRequestVMInImmediateFlip = dml_ceil(4.0 * (TimeForFetchingMetaPTEImmediateFlip / LineTime), 1) / 4.0;
3390 if ((GPUVMEnable == true || DCCEnable == true)) {
3391 TimeForFetchingRowInVBlankImmediateFlip = dml_max3((MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / ImmediateFlipBW,
3392 UrgentLatency * (HostVMDynamicLevelsTrips + 1), LineTime / 4);
3393 } else {
3394 TimeForFetchingRowInVBlankImmediateFlip = 0;
3395 }
3396
3397 *DestinationLinesToRequestRowInImmediateFlip = dml_ceil(4.0 * (TimeForFetchingRowInVBlankImmediateFlip / LineTime), 1) / 4.0;
3398
3399 if (GPUVMEnable == true) {
3400 *final_flip_bw = dml_max(PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / (*DestinationLinesToRequestVMInImmediateFlip * LineTime),
3401 (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInImmediateFlip * LineTime));
3402 } else if ((GPUVMEnable == true || DCCEnable == true)) {
3403 *final_flip_bw = (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInImmediateFlip * LineTime);
3404 } else {
3405 *final_flip_bw = 0;
3406 }
3407
3408
3409 if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_rgbe_alpha) {
3410 if (GPUVMEnable == true && DCCEnable != true) {
3411 min_row_time = dml_min(dpte_row_height * LineTime / VRatio, dpte_row_height_chroma * LineTime / VRatioChroma);
3412 } else if (GPUVMEnable != true && DCCEnable == true) {
3413 min_row_time = dml_min(meta_row_height * LineTime / VRatio, meta_row_height_chroma * LineTime / VRatioChroma);
3414 } else {
3415 min_row_time = dml_min4(dpte_row_height * LineTime / VRatio, meta_row_height * LineTime / VRatio,
3416 dpte_row_height_chroma * LineTime / VRatioChroma, meta_row_height_chroma * LineTime / VRatioChroma);
3417 }
3418 } else {
3419 if (GPUVMEnable == true && DCCEnable != true) {
3420 min_row_time = dpte_row_height * LineTime / VRatio;
3421 } else if (GPUVMEnable != true && DCCEnable == true) {
3422 min_row_time = meta_row_height * LineTime / VRatio;
3423 } else {
3424 min_row_time = dml_min(dpte_row_height * LineTime / VRatio, meta_row_height * LineTime / VRatio);
3425 }
3426 }
3427
3428 if (*DestinationLinesToRequestVMInImmediateFlip >= 32 || *DestinationLinesToRequestRowInImmediateFlip >= 16
3429 || TimeForFetchingMetaPTEImmediateFlip + 2 * TimeForFetchingRowInVBlankImmediateFlip > min_row_time) {
3430 *ImmediateFlipSupportedForPipe = false;
3431 } else {
3432 *ImmediateFlipSupportedForPipe = true;
3433 }
3434 }
3435
TruncToValidBPP(double LinkBitRate,int Lanes,long HTotal,long HActive,double PixelClock,double DesiredBPP,bool DSCEnable,enum output_encoder_class Output,enum output_format_class Format,unsigned int DSCInputBitPerComponent,int DSCSlices,int AudioRate,int AudioLayout,enum odm_combine_mode ODMCombine)3436 static double TruncToValidBPP(
3437 double LinkBitRate,
3438 int Lanes,
3439 long HTotal,
3440 long HActive,
3441 double PixelClock,
3442 double DesiredBPP,
3443 bool DSCEnable,
3444 enum output_encoder_class Output,
3445 enum output_format_class Format,
3446 unsigned int DSCInputBitPerComponent,
3447 int DSCSlices,
3448 int AudioRate,
3449 int AudioLayout,
3450 enum odm_combine_mode ODMCombine)
3451 {
3452 double MaxLinkBPP = 0;
3453 int MinDSCBPP = 0;
3454 double MaxDSCBPP = 0;
3455 int NonDSCBPP0 = 0;
3456 int NonDSCBPP1 = 0;
3457 int NonDSCBPP2 = 0;
3458
3459 if (Format == dm_420) {
3460 NonDSCBPP0 = 12;
3461 NonDSCBPP1 = 15;
3462 NonDSCBPP2 = 18;
3463 MinDSCBPP = 6;
3464 MaxDSCBPP = 1.5 * DSCInputBitPerComponent - 1.0 / 16;
3465 } else if (Format == dm_444) {
3466 NonDSCBPP0 = 24;
3467 NonDSCBPP1 = 30;
3468 NonDSCBPP2 = 36;
3469 MinDSCBPP = 8;
3470 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16;
3471 } else {
3472 NonDSCBPP0 = 16;
3473 NonDSCBPP1 = 20;
3474 NonDSCBPP2 = 24;
3475
3476 if (Format == dm_n422) {
3477 MinDSCBPP = 7;
3478 MaxDSCBPP = 2 * DSCInputBitPerComponent - 1.0 / 16.0;
3479 } else {
3480 MinDSCBPP = 8;
3481 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16.0;
3482 }
3483 }
3484
3485 if (DSCEnable && Output == dm_dp) {
3486 MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock * (1 - 2.4 / 100);
3487 } else {
3488 MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock;
3489 }
3490
3491 if (ODMCombine == dm_odm_combine_mode_4to1 && MaxLinkBPP > 16) {
3492 MaxLinkBPP = 16;
3493 } else if (ODMCombine == dm_odm_combine_mode_2to1 && MaxLinkBPP > 32) {
3494 MaxLinkBPP = 32;
3495 }
3496
3497
3498 if (DesiredBPP == 0) {
3499 if (DSCEnable) {
3500 if (MaxLinkBPP < MinDSCBPP) {
3501 return BPP_INVALID;
3502 } else if (MaxLinkBPP >= MaxDSCBPP) {
3503 return MaxDSCBPP;
3504 } else {
3505 return dml_floor(16.0 * MaxLinkBPP, 1.0) / 16.0;
3506 }
3507 } else {
3508 if (MaxLinkBPP >= NonDSCBPP2) {
3509 return NonDSCBPP2;
3510 } else if (MaxLinkBPP >= NonDSCBPP1) {
3511 return NonDSCBPP1;
3512 } else if (MaxLinkBPP >= NonDSCBPP0) {
3513 return NonDSCBPP0;
3514 } else {
3515 return BPP_INVALID;
3516 }
3517 }
3518 } else {
3519 if (!((DSCEnable == false && (DesiredBPP == NonDSCBPP2 || DesiredBPP == NonDSCBPP1 || DesiredBPP == NonDSCBPP0 || DesiredBPP == 18)) ||
3520 (DSCEnable && DesiredBPP >= MinDSCBPP && DesiredBPP <= MaxDSCBPP))) {
3521 return BPP_INVALID;
3522 } else {
3523 return DesiredBPP;
3524 }
3525 }
3526 }
3527
dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib * mode_lib)3528 void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib)
3529 {
3530 struct vba_vars_st *v = &mode_lib->vba;
3531 int MinPrefetchMode, MaxPrefetchMode;
3532 int i, start_state;
3533 unsigned int j, k, m;
3534 bool EnoughWritebackUnits = true;
3535 bool WritebackModeSupport = true;
3536 bool ViewportExceedsSurface = false;
3537 double MaxTotalVActiveRDBandwidth = 0;
3538 long ReorderingBytes = 0;
3539 bool NotUrgentLatencyHiding[DC__NUM_DPP__MAX] = { 0 };
3540
3541 /*MODE SUPPORT, VOLTAGE STATE AND SOC CONFIGURATION*/
3542
3543 if (mode_lib->validate_max_state)
3544 start_state = v->soc.num_states - 1;
3545 else
3546 start_state = 0;
3547
3548 CalculateMinAndMaxPrefetchMode(
3549 mode_lib->vba.AllowDRAMSelfRefreshOrDRAMClockChangeInVblank,
3550 &MinPrefetchMode, &MaxPrefetchMode);
3551
3552 /*Scale Ratio, taps Support Check*/
3553
3554 v->ScaleRatioAndTapsSupport = true;
3555 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3556 if (v->ScalerEnabled[k] == false
3557 && ((v->SourcePixelFormat[k] != dm_444_64
3558 && v->SourcePixelFormat[k] != dm_444_32
3559 && v->SourcePixelFormat[k] != dm_444_16
3560 && v->SourcePixelFormat[k] != dm_mono_16
3561 && v->SourcePixelFormat[k] != dm_mono_8
3562 && v->SourcePixelFormat[k] != dm_rgbe
3563 && v->SourcePixelFormat[k] != dm_rgbe_alpha)
3564 || v->HRatio[k] != 1.0
3565 || v->htaps[k] != 1.0
3566 || v->VRatio[k] != 1.0
3567 || v->vtaps[k] != 1.0)) {
3568 v->ScaleRatioAndTapsSupport = false;
3569 } else if (v->vtaps[k] < 1.0 || v->vtaps[k] > 8.0
3570 || v->htaps[k] < 1.0 || v->htaps[k] > 8.0
3571 || (v->htaps[k] > 1.0
3572 && (v->htaps[k] % 2) == 1)
3573 || v->HRatio[k] > v->MaxHSCLRatio
3574 || v->VRatio[k] > v->MaxVSCLRatio
3575 || v->HRatio[k] > v->htaps[k]
3576 || v->VRatio[k] > v->vtaps[k]
3577 || (v->SourcePixelFormat[k] != dm_444_64
3578 && v->SourcePixelFormat[k] != dm_444_32
3579 && v->SourcePixelFormat[k] != dm_444_16
3580 && v->SourcePixelFormat[k] != dm_mono_16
3581 && v->SourcePixelFormat[k] != dm_mono_8
3582 && v->SourcePixelFormat[k] != dm_rgbe
3583 && (v->VTAPsChroma[k] < 1
3584 || v->VTAPsChroma[k] > 8
3585 || v->HTAPsChroma[k] < 1
3586 || v->HTAPsChroma[k] > 8
3587 || (v->HTAPsChroma[k] > 1 && v->HTAPsChroma[k] % 2 == 1)
3588 || v->HRatioChroma[k] > v->MaxHSCLRatio
3589 || v->VRatioChroma[k] > v->MaxVSCLRatio
3590 || v->HRatioChroma[k] > v->HTAPsChroma[k]
3591 || v->VRatioChroma[k] > v->VTAPsChroma[k]))) {
3592 v->ScaleRatioAndTapsSupport = false;
3593 }
3594 }
3595 /*Source Format, Pixel Format and Scan Support Check*/
3596
3597 v->SourceFormatPixelAndScanSupport = true;
3598 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3599 if ((v->SurfaceTiling[k] == dm_sw_linear && (!(v->SourceScan[k] != dm_vert) || v->DCCEnable[k] == true))
3600 || ((v->SurfaceTiling[k] == dm_sw_64kb_d || v->SurfaceTiling[k] == dm_sw_64kb_d_t || v->SurfaceTiling[k] == dm_sw_64kb_d_x)
3601 && !(v->SourcePixelFormat[k] == dm_444_64))) {
3602 v->SourceFormatPixelAndScanSupport = false;
3603 }
3604 }
3605 /*Bandwidth Support Check*/
3606
3607 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3608 dml30_CalculateBytePerPixelAnd256BBlockSizes(
3609 v->SourcePixelFormat[k],
3610 v->SurfaceTiling[k],
3611 &v->BytePerPixelY[k],
3612 &v->BytePerPixelC[k],
3613 &v->BytePerPixelInDETY[k],
3614 &v->BytePerPixelInDETC[k],
3615 &v->Read256BlockHeightY[k],
3616 &v->Read256BlockHeightC[k],
3617 &v->Read256BlockWidthY[k],
3618 &v->Read256BlockWidthC[k]);
3619 }
3620 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3621 if (v->SourceScan[k] != dm_vert) {
3622 v->SwathWidthYSingleDPP[k] = v->ViewportWidth[k];
3623 v->SwathWidthCSingleDPP[k] = v->ViewportWidthChroma[k];
3624 } else {
3625 v->SwathWidthYSingleDPP[k] = v->ViewportHeight[k];
3626 v->SwathWidthCSingleDPP[k] = v->ViewportHeightChroma[k];
3627 }
3628 }
3629 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3630 v->ReadBandwidthLuma[k] = v->SwathWidthYSingleDPP[k] * dml_ceil(v->BytePerPixelInDETY[k], 1.0) / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k];
3631 v->ReadBandwidthChroma[k] = v->SwathWidthYSingleDPP[k] / 2 * dml_ceil(v->BytePerPixelInDETC[k], 2.0) / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k] / 2.0;
3632 }
3633 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3634 if (v->WritebackEnable[k] == true
3635 && v->WritebackPixelFormat[k] == dm_444_64) {
3636 v->WriteBandwidth[k] = v->WritebackDestinationWidth[k]
3637 * v->WritebackDestinationHeight[k]
3638 / (v->WritebackSourceHeight[k]
3639 * v->HTotal[k]
3640 / v->PixelClock[k]) * 8.0;
3641 } else if (v->WritebackEnable[k] == true) {
3642 v->WriteBandwidth[k] = v->WritebackDestinationWidth[k]
3643 * v->WritebackDestinationHeight[k]
3644 / (v->WritebackSourceHeight[k]
3645 * v->HTotal[k]
3646 / v->PixelClock[k]) * 4.0;
3647 } else {
3648 v->WriteBandwidth[k] = 0.0;
3649 }
3650 }
3651
3652 /*Writeback Latency support check*/
3653
3654 v->WritebackLatencySupport = true;
3655 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3656 if (v->WritebackEnable[k] == true) {
3657 if (v->WritebackConfiguration == dm_whole_buffer_for_single_stream_no_interleave ||
3658 v->WritebackConfiguration == dm_whole_buffer_for_single_stream_interleave) {
3659 if (v->WriteBandwidth[k]
3660 > 2.0 * v->WritebackInterfaceBufferSize * 1024
3661 / v->WritebackLatency) {
3662 v->WritebackLatencySupport = false;
3663 }
3664 } else {
3665 if (v->WriteBandwidth[k]
3666 > v->WritebackInterfaceBufferSize * 1024
3667 / v->WritebackLatency) {
3668 v->WritebackLatencySupport = false;
3669 }
3670 }
3671 }
3672 }
3673
3674 /*Writeback Mode Support Check*/
3675
3676 v->TotalNumberOfActiveWriteback = 0;
3677 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3678 if (v->WritebackEnable[k] == true) {
3679 v->TotalNumberOfActiveWriteback =
3680 v->TotalNumberOfActiveWriteback + 1;
3681 }
3682 }
3683
3684 if (v->TotalNumberOfActiveWriteback > v->MaxNumWriteback) {
3685 EnoughWritebackUnits = false;
3686 }
3687 if (!v->WritebackSupportInterleaveAndUsingWholeBufferForASingleStream
3688 && (v->WritebackConfiguration == dm_whole_buffer_for_single_stream_no_interleave
3689 || v->WritebackConfiguration == dm_whole_buffer_for_single_stream_interleave)) {
3690
3691 WritebackModeSupport = false;
3692 }
3693 if (v->WritebackConfiguration == dm_whole_buffer_for_single_stream_no_interleave && v->TotalNumberOfActiveWriteback > 1) {
3694 WritebackModeSupport = false;
3695 }
3696
3697 /*Writeback Scale Ratio and Taps Support Check*/
3698
3699 v->WritebackScaleRatioAndTapsSupport = true;
3700 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3701 if (v->WritebackEnable[k] == true) {
3702 if (v->WritebackHRatio[k] > v->WritebackMaxHSCLRatio
3703 || v->WritebackVRatio[k]
3704 > v->WritebackMaxVSCLRatio
3705 || v->WritebackHRatio[k]
3706 < v->WritebackMinHSCLRatio
3707 || v->WritebackVRatio[k]
3708 < v->WritebackMinVSCLRatio
3709 || v->WritebackHTaps[k]
3710 > v->WritebackMaxHSCLTaps
3711 || v->WritebackVTaps[k]
3712 > v->WritebackMaxVSCLTaps
3713 || v->WritebackHRatio[k]
3714 > v->WritebackHTaps[k]
3715 || v->WritebackVRatio[k]
3716 > v->WritebackVTaps[k]
3717 || (v->WritebackHTaps[k] > 2.0
3718 && ((v->WritebackHTaps[k] % 2)
3719 == 1))) {
3720 v->WritebackScaleRatioAndTapsSupport = false;
3721 }
3722 if (2.0 * v->WritebackDestinationWidth[k] * (v->WritebackVTaps[k] - 1) * 57 > v->WritebackLineBufferSize) {
3723 v->WritebackScaleRatioAndTapsSupport = false;
3724 }
3725 }
3726 }
3727 /*Maximum DISPCLK/DPPCLK Support check*/
3728
3729 v->WritebackRequiredDISPCLK = 0.0;
3730 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3731 if (v->WritebackEnable[k] == true) {
3732 v->WritebackRequiredDISPCLK = dml_max(v->WritebackRequiredDISPCLK,
3733 dml30_CalculateWriteBackDISPCLK(
3734 v->WritebackPixelFormat[k],
3735 v->PixelClock[k],
3736 v->WritebackHRatio[k],
3737 v->WritebackVRatio[k],
3738 v->WritebackHTaps[k],
3739 v->WritebackVTaps[k],
3740 v->WritebackSourceWidth[k],
3741 v->WritebackDestinationWidth[k],
3742 v->HTotal[k],
3743 v->WritebackLineBufferSize));
3744 }
3745 }
3746 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3747 if (v->HRatio[k] > 1.0) {
3748 v->PSCL_FACTOR[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput * v->HRatio[k] / dml_ceil(v->htaps[k] / 6.0, 1.0));
3749 } else {
3750 v->PSCL_FACTOR[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput);
3751 }
3752 if (v->BytePerPixelC[k] == 0.0) {
3753 v->PSCL_FACTOR_CHROMA[k] = 0.0;
3754 v->MinDPPCLKUsingSingleDPP[k] = v->PixelClock[k]
3755 * dml_max3(v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]), v->HRatio[k] * v->VRatio[k] / v->PSCL_FACTOR[k], 1.0);
3756 if ((v->htaps[k] > 6.0 || v->vtaps[k] > 6.0) && v->MinDPPCLKUsingSingleDPP[k] < 2.0 * v->PixelClock[k]) {
3757 v->MinDPPCLKUsingSingleDPP[k] = 2.0 * v->PixelClock[k];
3758 }
3759 } else {
3760 if (v->HRatioChroma[k] > 1.0) {
3761 v->PSCL_FACTOR_CHROMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput,
3762 v->MaxPSCLToLBThroughput * v->HRatioChroma[k] / dml_ceil(v->HTAPsChroma[k] / 6.0, 1.0));
3763 } else {
3764 v->PSCL_FACTOR_CHROMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput);
3765 }
3766 v->MinDPPCLKUsingSingleDPP[k] = v->PixelClock[k] * dml_max5(v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]),
3767 v->HRatio[k] * v->VRatio[k] / v->PSCL_FACTOR[k],
3768 v->VTAPsChroma[k] / 6.0 * dml_min(1.0, v->HRatioChroma[k]),
3769 v->HRatioChroma[k] * v->VRatioChroma[k] / v->PSCL_FACTOR_CHROMA[k],
3770 1.0);
3771 if ((v->htaps[k] > 6.0 || v->vtaps[k] > 6.0 || v->HTAPsChroma[k] > 6.0 || v->VTAPsChroma[k] > 6.0)
3772 && v->MinDPPCLKUsingSingleDPP[k] < 2.0 * v->PixelClock[k]) {
3773 v->MinDPPCLKUsingSingleDPP[k] = 2.0 * v->PixelClock[k];
3774 }
3775 }
3776 }
3777 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3778 int MaximumSwathWidthSupportLuma = 0;
3779 int MaximumSwathWidthSupportChroma = 0;
3780
3781 if (v->SurfaceTiling[k] == dm_sw_linear) {
3782 MaximumSwathWidthSupportLuma = 8192.0;
3783 } else if (v->SourceScan[k] == dm_vert && v->BytePerPixelC[k] > 0) {
3784 MaximumSwathWidthSupportLuma = 2880.0;
3785 } else {
3786 MaximumSwathWidthSupportLuma = 5760.0;
3787 }
3788
3789 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12) {
3790 MaximumSwathWidthSupportChroma = MaximumSwathWidthSupportLuma / 2.0;
3791 } else {
3792 MaximumSwathWidthSupportChroma = MaximumSwathWidthSupportLuma;
3793 }
3794 v->MaximumSwathWidthInLineBufferLuma = v->LineBufferSize * dml_max(v->HRatio[k], 1.0) / v->LBBitPerPixel[k]
3795 / (v->vtaps[k] + dml_max(dml_ceil(v->VRatio[k], 1.0) - 2, 0.0));
3796 if (v->BytePerPixelC[k] == 0.0) {
3797 v->MaximumSwathWidthInLineBufferChroma = 0;
3798 } else {
3799 v->MaximumSwathWidthInLineBufferChroma = v->LineBufferSize * dml_max(v->HRatioChroma[k], 1.0) / v->LBBitPerPixel[k]
3800 / (v->VTAPsChroma[k] + dml_max(dml_ceil(v->VRatioChroma[k], 1.0) - 2, 0.0));
3801 }
3802 v->MaximumSwathWidthLuma[k] = dml_min(MaximumSwathWidthSupportLuma, v->MaximumSwathWidthInLineBufferLuma);
3803 v->MaximumSwathWidthChroma[k] = dml_min(MaximumSwathWidthSupportChroma, v->MaximumSwathWidthInLineBufferChroma);
3804 }
3805
3806 CalculateSwathAndDETConfiguration(
3807 true,
3808 v->NumberOfActivePlanes,
3809 v->DETBufferSizeInKByte[0],
3810 v->MaximumSwathWidthLuma,
3811 v->MaximumSwathWidthChroma,
3812 v->SourceScan,
3813 v->SourcePixelFormat,
3814 v->SurfaceTiling,
3815 v->ViewportWidth,
3816 v->ViewportHeight,
3817 v->SurfaceWidthY,
3818 v->SurfaceWidthC,
3819 v->SurfaceHeightY,
3820 v->SurfaceHeightC,
3821 v->Read256BlockHeightY,
3822 v->Read256BlockHeightC,
3823 v->Read256BlockWidthY,
3824 v->Read256BlockWidthC,
3825 v->odm_combine_dummy,
3826 v->BlendingAndTiming,
3827 v->BytePerPixelY,
3828 v->BytePerPixelC,
3829 v->BytePerPixelInDETY,
3830 v->BytePerPixelInDETC,
3831 v->HActive,
3832 v->HRatio,
3833 v->HRatioChroma,
3834 v->DPPPerPlane,
3835 v->swath_width_luma_ub,
3836 v->swath_width_chroma_ub,
3837 v->SwathWidthY,
3838 v->SwathWidthC,
3839 v->SwathHeightY,
3840 v->SwathHeightC,
3841 v->DETBufferSizeY,
3842 v->DETBufferSizeC,
3843 v->SingleDPPViewportSizeSupportPerPlane,
3844 &v->ViewportSizeSupport[0][0]);
3845
3846 for (i = start_state; i < v->soc.num_states; i++) {
3847 for (j = 0; j < 2; j++) {
3848 v->MaxDispclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown(v->MaxDispclk[i], v->DISPCLKDPPCLKVCOSpeed);
3849 v->MaxDppclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown(v->MaxDppclk[i], v->DISPCLKDPPCLKVCOSpeed);
3850 v->RequiredDISPCLK[i][j] = 0.0;
3851 v->DISPCLK_DPPCLK_Support[i][j] = true;
3852 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3853 v->PlaneRequiredDISPCLKWithoutODMCombine = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
3854 * (1.0 + v->DISPCLKRampingMargin / 100.0);
3855 if ((v->PlaneRequiredDISPCLKWithoutODMCombine >= v->MaxDispclk[i] && v->MaxDispclk[i] == v->MaxDispclk[mode_lib->soc.num_states - 1]
3856 && v->MaxDppclk[i] == v->MaxDppclk[mode_lib->soc.num_states - 1])) {
3857 v->PlaneRequiredDISPCLKWithoutODMCombine = v->PixelClock[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
3858 }
3859 v->PlaneRequiredDISPCLKWithODMCombine2To1 = v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
3860 * (1 + v->DISPCLKRampingMargin / 100.0);
3861 if ((v->PlaneRequiredDISPCLKWithODMCombine2To1 >= v->MaxDispclk[i] && v->MaxDispclk[i] == v->MaxDispclk[mode_lib->soc.num_states - 1]
3862 && v->MaxDppclk[i] == v->MaxDppclk[mode_lib->soc.num_states - 1])) {
3863 v->PlaneRequiredDISPCLKWithODMCombine2To1 = v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
3864 }
3865 v->PlaneRequiredDISPCLKWithODMCombine4To1 = v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
3866 * (1 + v->DISPCLKRampingMargin / 100.0);
3867 if ((v->PlaneRequiredDISPCLKWithODMCombine4To1 >= v->MaxDispclk[i] && v->MaxDispclk[i] == v->MaxDispclk[mode_lib->soc.num_states - 1]
3868 && v->MaxDppclk[i] == v->MaxDppclk[mode_lib->soc.num_states - 1])) {
3869 v->PlaneRequiredDISPCLKWithODMCombine4To1 = v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
3870 }
3871
3872 if (v->ODMCombinePolicy == dm_odm_combine_policy_none) {
3873 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
3874 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithoutODMCombine;
3875 } else if (v->ODMCombinePolicy == dm_odm_combine_policy_2to1) {
3876 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
3877 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
3878 } else if (v->ODMCombinePolicy == dm_odm_combine_policy_4to1
3879 || v->PlaneRequiredDISPCLKWithODMCombine2To1 > v->MaxDispclkRoundedDownToDFSGranularity) {
3880 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1;
3881 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1;
3882 } else if (v->PlaneRequiredDISPCLKWithoutODMCombine > v->MaxDispclkRoundedDownToDFSGranularity) {
3883 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
3884 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
3885 } else {
3886 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
3887 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithoutODMCombine;
3888 }
3889 if (v->DSCEnabled[k] && v->HActive[k] > DCN30_MAX_DSC_IMAGE_WIDTH
3890 && v->ODMCombineEnablePerState[i][k] != dm_odm_combine_mode_4to1) {
3891 if (v->HActive[k] / 2 > DCN30_MAX_DSC_IMAGE_WIDTH) {
3892 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1;
3893 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1;
3894 } else {
3895 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
3896 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
3897 }
3898 }
3899 if (v->OutputFormat[k] == dm_420 && v->HActive[k] > DCN30_MAX_FMT_420_BUFFER_WIDTH
3900 && v->ODMCombineEnablePerState[i][k] != dm_odm_combine_mode_4to1) {
3901 if (v->HActive[k] / 2 > DCN30_MAX_FMT_420_BUFFER_WIDTH) {
3902 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1;
3903 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1;
3904 } else {
3905 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
3906 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
3907 }
3908 }
3909 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1) {
3910 v->MPCCombine[i][j][k] = false;
3911 v->NoOfDPP[i][j][k] = 4;
3912 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 4;
3913 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
3914 v->MPCCombine[i][j][k] = false;
3915 v->NoOfDPP[i][j][k] = 2;
3916 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2;
3917 } else if ((v->WhenToDoMPCCombine == dm_mpc_never
3918 || (v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) <= v->MaxDppclkRoundedDownToDFSGranularity
3919 && v->SingleDPPViewportSizeSupportPerPlane[k] == true))) {
3920 v->MPCCombine[i][j][k] = false;
3921 v->NoOfDPP[i][j][k] = 1;
3922 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
3923 } else {
3924 v->MPCCombine[i][j][k] = true;
3925 v->NoOfDPP[i][j][k] = 2;
3926 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2.0;
3927 }
3928 v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->PlaneRequiredDISPCLK);
3929 if ((v->MinDPPCLKUsingSingleDPP[k] / v->NoOfDPP[i][j][k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
3930 > v->MaxDppclkRoundedDownToDFSGranularity) || (v->PlaneRequiredDISPCLK > v->MaxDispclkRoundedDownToDFSGranularity)) {
3931 v->DISPCLK_DPPCLK_Support[i][j] = false;
3932 }
3933 }
3934 v->TotalNumberOfActiveDPP[i][j] = 0;
3935 v->TotalNumberOfSingleDPPPlanes[i][j] = 0;
3936 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3937 v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + v->NoOfDPP[i][j][k];
3938 if (v->NoOfDPP[i][j][k] == 1)
3939 v->TotalNumberOfSingleDPPPlanes[i][j] = v->TotalNumberOfSingleDPPPlanes[i][j] + 1;
3940 }
3941 if (j == 1 && v->WhenToDoMPCCombine != dm_mpc_never) {
3942 while (!(v->TotalNumberOfActiveDPP[i][j] >= v->MaxNumDPP || v->TotalNumberOfSingleDPPPlanes[i][j] == 0)) {
3943 double BWOfNonSplitPlaneOfMaximumBandwidth = 0;
3944 unsigned int NumberOfNonSplitPlaneOfMaximumBandwidth = 0;
3945 BWOfNonSplitPlaneOfMaximumBandwidth = 0;
3946 NumberOfNonSplitPlaneOfMaximumBandwidth = 0;
3947 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3948 if (v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k] > BWOfNonSplitPlaneOfMaximumBandwidth
3949 && v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled && v->MPCCombine[i][j][k] == false) {
3950 BWOfNonSplitPlaneOfMaximumBandwidth = v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k];
3951 NumberOfNonSplitPlaneOfMaximumBandwidth = k;
3952 }
3953 }
3954 v->MPCCombine[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = true;
3955 v->NoOfDPP[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = 2;
3956 v->RequiredDPPCLK[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = v->MinDPPCLKUsingSingleDPP[NumberOfNonSplitPlaneOfMaximumBandwidth]
3957 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100) / 2;
3958 v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + 1;
3959 v->TotalNumberOfSingleDPPPlanes[i][j] = v->TotalNumberOfSingleDPPPlanes[i][j] - 1;
3960 }
3961 }
3962 if (v->TotalNumberOfActiveDPP[i][j] > v->MaxNumDPP) {
3963 v->RequiredDISPCLK[i][j] = 0.0;
3964 v->DISPCLK_DPPCLK_Support[i][j] = true;
3965 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3966 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
3967 if (v->SingleDPPViewportSizeSupportPerPlane[k] == false && v->WhenToDoMPCCombine != dm_mpc_never) {
3968 v->MPCCombine[i][j][k] = true;
3969 v->NoOfDPP[i][j][k] = 2;
3970 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2.0;
3971 } else {
3972 v->MPCCombine[i][j][k] = false;
3973 v->NoOfDPP[i][j][k] = 1;
3974 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
3975 }
3976 if (!(v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1] && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) {
3977 v->PlaneRequiredDISPCLK = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
3978 * (1.0 + v->DISPCLKRampingMargin / 100.0);
3979 } else {
3980 v->PlaneRequiredDISPCLK = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
3981 }
3982 v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->PlaneRequiredDISPCLK);
3983 if ((v->MinDPPCLKUsingSingleDPP[k] / v->NoOfDPP[i][j][k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
3984 > v->MaxDppclkRoundedDownToDFSGranularity) || (v->PlaneRequiredDISPCLK > v->MaxDispclkRoundedDownToDFSGranularity)) {
3985 v->DISPCLK_DPPCLK_Support[i][j] = false;
3986 }
3987 }
3988 v->TotalNumberOfActiveDPP[i][j] = 0.0;
3989 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3990 v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + v->NoOfDPP[i][j][k];
3991 }
3992 }
3993 v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->WritebackRequiredDISPCLK);
3994 if (v->MaxDispclkRoundedDownToDFSGranularity < v->WritebackRequiredDISPCLK) {
3995 v->DISPCLK_DPPCLK_Support[i][j] = false;
3996 }
3997 }
3998 }
3999
4000 /*Total Available Pipes Support Check*/
4001
4002 for (i = start_state; i < v->soc.num_states; i++) {
4003 for (j = 0; j < 2; j++) {
4004 if (v->TotalNumberOfActiveDPP[i][j] <= v->MaxNumDPP) {
4005 v->TotalAvailablePipesSupport[i][j] = true;
4006 } else {
4007 v->TotalAvailablePipesSupport[i][j] = false;
4008 }
4009 }
4010 }
4011 /*Display IO and DSC Support Check*/
4012
4013 v->NonsupportedDSCInputBPC = false;
4014 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4015 if (!(v->DSCInputBitPerComponent[k] == 12.0
4016 || v->DSCInputBitPerComponent[k] == 10.0
4017 || v->DSCInputBitPerComponent[k] == 8.0)) {
4018 v->NonsupportedDSCInputBPC = true;
4019 }
4020 }
4021
4022 /*Number Of DSC Slices*/
4023 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4024 if (v->BlendingAndTiming[k] == k) {
4025 if (v->PixelClockBackEnd[k] > 3200) {
4026 v->NumberOfDSCSlices[k] = dml_ceil(v->PixelClockBackEnd[k] / 400.0, 4.0);
4027 } else if (v->PixelClockBackEnd[k] > 1360) {
4028 v->NumberOfDSCSlices[k] = 8;
4029 } else if (v->PixelClockBackEnd[k] > 680) {
4030 v->NumberOfDSCSlices[k] = 4;
4031 } else if (v->PixelClockBackEnd[k] > 340) {
4032 v->NumberOfDSCSlices[k] = 2;
4033 } else {
4034 v->NumberOfDSCSlices[k] = 1;
4035 }
4036 } else {
4037 v->NumberOfDSCSlices[k] = 0;
4038 }
4039 }
4040
4041 for (i = start_state; i < v->soc.num_states; i++) {
4042 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4043 v->RequiresDSC[i][k] = false;
4044 v->RequiresFEC[i][k] = false;
4045 if (v->BlendingAndTiming[k] == k) {
4046 if (v->Output[k] == dm_hdmi) {
4047 v->RequiresDSC[i][k] = false;
4048 v->RequiresFEC[i][k] = false;
4049 v->OutputBppPerState[i][k] = TruncToValidBPP(
4050 dml_min(600.0, v->PHYCLKPerState[i]) * 10,
4051 3,
4052 v->HTotal[k],
4053 v->HActive[k],
4054 v->PixelClockBackEnd[k],
4055 v->ForcedOutputLinkBPP[k],
4056 false,
4057 v->Output[k],
4058 v->OutputFormat[k],
4059 v->DSCInputBitPerComponent[k],
4060 v->NumberOfDSCSlices[k],
4061 v->AudioSampleRate[k],
4062 v->AudioSampleLayout[k],
4063 v->ODMCombineEnablePerState[i][k]);
4064 } else if (v->Output[k] == dm_dp || v->Output[k] == dm_edp) {
4065 if (v->DSCEnable[k] == true) {
4066 v->RequiresDSC[i][k] = true;
4067 v->LinkDSCEnable = true;
4068 if (v->Output[k] == dm_dp) {
4069 v->RequiresFEC[i][k] = true;
4070 } else {
4071 v->RequiresFEC[i][k] = false;
4072 }
4073 } else {
4074 v->RequiresDSC[i][k] = false;
4075 v->LinkDSCEnable = false;
4076 v->RequiresFEC[i][k] = false;
4077 }
4078
4079 v->Outbpp = BPP_INVALID;
4080 if (v->PHYCLKPerState[i] >= 270.0) {
4081 v->Outbpp = TruncToValidBPP(
4082 (1.0 - v->Downspreading / 100.0) * 2700,
4083 v->OutputLinkDPLanes[k],
4084 v->HTotal[k],
4085 v->HActive[k],
4086 v->PixelClockBackEnd[k],
4087 v->ForcedOutputLinkBPP[k],
4088 v->LinkDSCEnable,
4089 v->Output[k],
4090 v->OutputFormat[k],
4091 v->DSCInputBitPerComponent[k],
4092 v->NumberOfDSCSlices[k],
4093 v->AudioSampleRate[k],
4094 v->AudioSampleLayout[k],
4095 v->ODMCombineEnablePerState[i][k]);
4096 v->OutputBppPerState[i][k] = v->Outbpp;
4097 // TODO: Need some other way to handle this nonsense
4098 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR"
4099 }
4100 if (v->Outbpp == BPP_INVALID && v->PHYCLKPerState[i] >= 540.0) {
4101 v->Outbpp = TruncToValidBPP(
4102 (1.0 - v->Downspreading / 100.0) * 5400,
4103 v->OutputLinkDPLanes[k],
4104 v->HTotal[k],
4105 v->HActive[k],
4106 v->PixelClockBackEnd[k],
4107 v->ForcedOutputLinkBPP[k],
4108 v->LinkDSCEnable,
4109 v->Output[k],
4110 v->OutputFormat[k],
4111 v->DSCInputBitPerComponent[k],
4112 v->NumberOfDSCSlices[k],
4113 v->AudioSampleRate[k],
4114 v->AudioSampleLayout[k],
4115 v->ODMCombineEnablePerState[i][k]);
4116 v->OutputBppPerState[i][k] = v->Outbpp;
4117 // TODO: Need some other way to handle this nonsense
4118 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR2"
4119 }
4120 if (v->Outbpp == BPP_INVALID && v->PHYCLKPerState[i] >= 810.0) {
4121 v->Outbpp = TruncToValidBPP(
4122 (1.0 - v->Downspreading / 100.0) * 8100,
4123 v->OutputLinkDPLanes[k],
4124 v->HTotal[k],
4125 v->HActive[k],
4126 v->PixelClockBackEnd[k],
4127 v->ForcedOutputLinkBPP[k],
4128 v->LinkDSCEnable,
4129 v->Output[k],
4130 v->OutputFormat[k],
4131 v->DSCInputBitPerComponent[k],
4132 v->NumberOfDSCSlices[k],
4133 v->AudioSampleRate[k],
4134 v->AudioSampleLayout[k],
4135 v->ODMCombineEnablePerState[i][k]);
4136 if (v->Outbpp == BPP_INVALID && v->ForcedOutputLinkBPP[k] == 0) {
4137 //if (v->Outbpp == BPP_INVALID && v->DSCEnabled[k] == dm_dsc_enable_only_if_necessary && v->ForcedOutputLinkBPP[k] == 0) {
4138 v->RequiresDSC[i][k] = true;
4139 v->LinkDSCEnable = true;
4140 if (v->Output[k] == dm_dp) {
4141 v->RequiresFEC[i][k] = true;
4142 }
4143 v->Outbpp = TruncToValidBPP(
4144 (1.0 - v->Downspreading / 100.0) * 8100,
4145 v->OutputLinkDPLanes[k],
4146 v->HTotal[k],
4147 v->HActive[k],
4148 v->PixelClockBackEnd[k],
4149 v->ForcedOutputLinkBPP[k],
4150 v->LinkDSCEnable,
4151 v->Output[k],
4152 v->OutputFormat[k],
4153 v->DSCInputBitPerComponent[k],
4154 v->NumberOfDSCSlices[k],
4155 v->AudioSampleRate[k],
4156 v->AudioSampleLayout[k],
4157 v->ODMCombineEnablePerState[i][k]);
4158 }
4159 v->OutputBppPerState[i][k] = v->Outbpp;
4160 // TODO: Need some other way to handle this nonsense
4161 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR3"
4162 }
4163 }
4164 } else {
4165 v->OutputBppPerState[i][k] = 0;
4166 }
4167 }
4168 }
4169 for (i = start_state; i < v->soc.num_states; i++) {
4170 v->DIOSupport[i] = true;
4171 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4172 if (!v->skip_dio_check[k] && v->BlendingAndTiming[k] == k && (v->Output[k] == dm_dp || v->Output[k] == dm_edp || v->Output[k] == dm_hdmi)
4173 && (v->OutputBppPerState[i][k] == 0
4174 || (v->OutputFormat[k] == dm_420 && v->Interlace[k] == true && v->ProgressiveToInterlaceUnitInOPP == true))) {
4175 v->DIOSupport[i] = false;
4176 }
4177 }
4178 }
4179
4180 for (i = start_state; i < v->soc.num_states; ++i) {
4181 v->ODMCombine4To1SupportCheckOK[i] = true;
4182 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4183 if (v->BlendingAndTiming[k] == k && v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1
4184 && (v->ODMCombine4To1Supported == false || v->Output[k] == dm_dp || v->Output[k] == dm_edp || v->Output[k] == dm_hdmi)) {
4185 v->ODMCombine4To1SupportCheckOK[i] = false;
4186 }
4187 }
4188 }
4189
4190 /* Skip dscclk validation: as long as dispclk is supported, dscclk is also implicitly supported */
4191
4192 for (i = start_state; i < v->soc.num_states; i++) {
4193 v->NotEnoughDSCUnits[i] = false;
4194 v->TotalDSCUnitsRequired = 0.0;
4195 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4196 if (v->RequiresDSC[i][k] == true) {
4197 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1) {
4198 v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 4.0;
4199 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
4200 v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 2.0;
4201 } else {
4202 v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 1.0;
4203 }
4204 }
4205 }
4206 if (v->TotalDSCUnitsRequired > v->NumberOfDSC) {
4207 v->NotEnoughDSCUnits[i] = true;
4208 }
4209 }
4210 /*DSC Delay per state*/
4211
4212 for (i = start_state; i < v->soc.num_states; i++) {
4213 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4214 if (v->OutputBppPerState[i][k] == BPP_INVALID) {
4215 v->BPP = 0.0;
4216 } else {
4217 v->BPP = v->OutputBppPerState[i][k];
4218 }
4219 if (v->RequiresDSC[i][k] == true && v->BPP != 0.0) {
4220 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled) {
4221 v->DSCDelayPerState[i][k] = dscceComputeDelay(
4222 v->DSCInputBitPerComponent[k],
4223 v->BPP,
4224 dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0),
4225 v->NumberOfDSCSlices[k],
4226 v->OutputFormat[k],
4227 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]);
4228 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
4229 v->DSCDelayPerState[i][k] = 2.0
4230 * dscceComputeDelay(
4231 v->DSCInputBitPerComponent[k],
4232 v->BPP,
4233 dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0),
4234 v->NumberOfDSCSlices[k] / 2,
4235 v->OutputFormat[k],
4236 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]);
4237 } else {
4238 v->DSCDelayPerState[i][k] = 4.0
4239 * (dscceComputeDelay(
4240 v->DSCInputBitPerComponent[k],
4241 v->BPP,
4242 dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0),
4243 v->NumberOfDSCSlices[k] / 4,
4244 v->OutputFormat[k],
4245 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]));
4246 }
4247 v->DSCDelayPerState[i][k] = v->DSCDelayPerState[i][k] * v->PixelClock[k] / v->PixelClockBackEnd[k];
4248 } else {
4249 v->DSCDelayPerState[i][k] = 0.0;
4250 }
4251 }
4252 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4253 for (m = 0; m <= v->NumberOfActivePlanes - 1; m++) {
4254 if (v->BlendingAndTiming[k] == m && v->RequiresDSC[i][m] == true) {
4255 v->DSCDelayPerState[i][k] = v->DSCDelayPerState[i][m];
4256 }
4257 }
4258 }
4259 }
4260
4261 //Calculate Swath, DET Configuration, DCFCLKDeepSleep
4262 //
4263 for (i = start_state; i < mode_lib->soc.num_states; ++i) {
4264 for (j = 0; j <= 1; ++j) {
4265 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4266 v->RequiredDPPCLKThisState[k] = v->RequiredDPPCLK[i][j][k];
4267 v->NoOfDPPThisState[k] = v->NoOfDPP[i][j][k];
4268 v->ODMCombineEnableThisState[k] = v->ODMCombineEnablePerState[i][k];
4269 }
4270
4271 CalculateSwathAndDETConfiguration(
4272 false,
4273 v->NumberOfActivePlanes,
4274 v->DETBufferSizeInKByte[0],
4275 v->MaximumSwathWidthLuma,
4276 v->MaximumSwathWidthChroma,
4277 v->SourceScan,
4278 v->SourcePixelFormat,
4279 v->SurfaceTiling,
4280 v->ViewportWidth,
4281 v->ViewportHeight,
4282 v->SurfaceWidthY,
4283 v->SurfaceWidthC,
4284 v->SurfaceHeightY,
4285 v->SurfaceHeightC,
4286 v->Read256BlockHeightY,
4287 v->Read256BlockHeightC,
4288 v->Read256BlockWidthY,
4289 v->Read256BlockWidthC,
4290 v->ODMCombineEnableThisState,
4291 v->BlendingAndTiming,
4292 v->BytePerPixelY,
4293 v->BytePerPixelC,
4294 v->BytePerPixelInDETY,
4295 v->BytePerPixelInDETC,
4296 v->HActive,
4297 v->HRatio,
4298 v->HRatioChroma,
4299 v->NoOfDPPThisState,
4300 v->swath_width_luma_ub_this_state,
4301 v->swath_width_chroma_ub_this_state,
4302 v->SwathWidthYThisState,
4303 v->SwathWidthCThisState,
4304 v->SwathHeightYThisState,
4305 v->SwathHeightCThisState,
4306 v->DETBufferSizeYThisState,
4307 v->DETBufferSizeCThisState,
4308 v->dummystring,
4309 &v->ViewportSizeSupport[i][j]);
4310
4311 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4312 v->swath_width_luma_ub_all_states[i][j][k] = v->swath_width_luma_ub_this_state[k];
4313 v->swath_width_chroma_ub_all_states[i][j][k] = v->swath_width_chroma_ub_this_state[k];
4314 v->SwathWidthYAllStates[i][j][k] = v->SwathWidthYThisState[k];
4315 v->SwathWidthCAllStates[i][j][k] = v->SwathWidthCThisState[k];
4316 v->SwathHeightYAllStates[i][j][k] = v->SwathHeightYThisState[k];
4317 v->SwathHeightCAllStates[i][j][k] = v->SwathHeightCThisState[k];
4318 v->DETBufferSizeYAllStates[i][j][k] = v->DETBufferSizeYThisState[k];
4319 v->DETBufferSizeCAllStates[i][j][k] = v->DETBufferSizeCThisState[k];
4320 }
4321
4322 }
4323 }
4324 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4325 v->cursor_bw[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k];
4326 }
4327
4328 for (i = start_state; i < v->soc.num_states; i++) {
4329 for (j = 0; j < 2; j++) {
4330 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4331 v->swath_width_luma_ub_this_state[k] = v->swath_width_luma_ub_all_states[i][j][k];
4332 v->swath_width_chroma_ub_this_state[k] = v->swath_width_chroma_ub_all_states[i][j][k];
4333 v->SwathWidthYThisState[k] = v->SwathWidthYAllStates[i][j][k];
4334 v->SwathWidthCThisState[k] = v->SwathWidthCAllStates[i][j][k];
4335 v->SwathHeightYThisState[k] = v->SwathHeightYAllStates[i][j][k];
4336 v->SwathHeightCThisState[k] = v->SwathHeightCAllStates[i][j][k];
4337 v->DETBufferSizeYThisState[k] = v->DETBufferSizeYAllStates[i][j][k];
4338 v->DETBufferSizeCThisState[k] = v->DETBufferSizeCAllStates[i][j][k];
4339 }
4340
4341 v->TotalNumberOfDCCActiveDPP[i][j] = 0;
4342 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4343 if (v->DCCEnable[k] == true) {
4344 v->TotalNumberOfDCCActiveDPP[i][j] = v->TotalNumberOfDCCActiveDPP[i][j] + v->NoOfDPP[i][j][k];
4345 }
4346 }
4347
4348 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4349 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12
4350 || v->SourcePixelFormat[k] == dm_rgbe_alpha) {
4351
4352 if ((v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12) && v->SourceScan[k] != dm_vert) {
4353 v->PTEBufferSizeInRequestsForLuma = (v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma) / 2;
4354 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsForLuma;
4355 } else {
4356 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma;
4357 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsChroma;
4358 }
4359
4360 v->PDEAndMetaPTEBytesPerFrameC = CalculateVMAndRowBytes(
4361 mode_lib,
4362 v->DCCEnable[k],
4363 v->Read256BlockHeightC[k],
4364 v->Read256BlockWidthY[k],
4365 v->SourcePixelFormat[k],
4366 v->SurfaceTiling[k],
4367 v->BytePerPixelC[k],
4368 v->SourceScan[k],
4369 v->SwathWidthCThisState[k],
4370 v->ViewportHeightChroma[k],
4371 v->GPUVMEnable,
4372 v->HostVMEnable,
4373 v->HostVMMaxNonCachedPageTableLevels,
4374 v->GPUVMMinPageSize,
4375 v->HostVMMinPageSize,
4376 v->PTEBufferSizeInRequestsForChroma,
4377 v->PitchC[k],
4378 0.0,
4379 &v->MacroTileWidthC[k],
4380 &v->MetaRowBytesC,
4381 &v->DPTEBytesPerRowC,
4382 &v->PTEBufferSizeNotExceededC[i][j][k],
4383 &v->dummyinteger7,
4384 &v->dpte_row_height_chroma[k],
4385 &v->dummyinteger28,
4386 &v->dummyinteger26,
4387 &v->dummyinteger23,
4388 &v->meta_row_height_chroma[k],
4389 &v->dummyinteger8,
4390 &v->dummyinteger9,
4391 &v->dummyinteger19,
4392 &v->dummyinteger20,
4393 &v->dummyinteger17,
4394 &v->dummyinteger10,
4395 &v->dummyinteger11);
4396
4397 v->PrefetchLinesC[i][j][k] = CalculatePrefetchSourceLines(
4398 mode_lib,
4399 v->VRatioChroma[k],
4400 v->VTAPsChroma[k],
4401 v->Interlace[k],
4402 v->ProgressiveToInterlaceUnitInOPP,
4403 v->SwathHeightCThisState[k],
4404 v->ViewportYStartC[k],
4405 &v->PrefillC[k],
4406 &v->MaxNumSwC[k]);
4407 } else {
4408 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma;
4409 v->PTEBufferSizeInRequestsForChroma = 0;
4410 v->PDEAndMetaPTEBytesPerFrameC = 0.0;
4411 v->MetaRowBytesC = 0.0;
4412 v->DPTEBytesPerRowC = 0.0;
4413 v->PrefetchLinesC[i][j][k] = 0.0;
4414 v->PTEBufferSizeNotExceededC[i][j][k] = true;
4415 }
4416 v->PDEAndMetaPTEBytesPerFrameY = CalculateVMAndRowBytes(
4417 mode_lib,
4418 v->DCCEnable[k],
4419 v->Read256BlockHeightY[k],
4420 v->Read256BlockWidthY[k],
4421 v->SourcePixelFormat[k],
4422 v->SurfaceTiling[k],
4423 v->BytePerPixelY[k],
4424 v->SourceScan[k],
4425 v->SwathWidthYThisState[k],
4426 v->ViewportHeight[k],
4427 v->GPUVMEnable,
4428 v->HostVMEnable,
4429 v->HostVMMaxNonCachedPageTableLevels,
4430 v->GPUVMMinPageSize,
4431 v->HostVMMinPageSize,
4432 v->PTEBufferSizeInRequestsForLuma,
4433 v->PitchY[k],
4434 v->DCCMetaPitchY[k],
4435 &v->MacroTileWidthY[k],
4436 &v->MetaRowBytesY,
4437 &v->DPTEBytesPerRowY,
4438 &v->PTEBufferSizeNotExceededY[i][j][k],
4439 v->dummyinteger4,
4440 &v->dpte_row_height[k],
4441 &v->dummyinteger29,
4442 &v->dummyinteger27,
4443 &v->dummyinteger24,
4444 &v->meta_row_height[k],
4445 &v->dummyinteger25,
4446 &v->dpte_group_bytes[k],
4447 &v->dummyinteger21,
4448 &v->dummyinteger22,
4449 &v->dummyinteger18,
4450 &v->dummyinteger5,
4451 &v->dummyinteger6);
4452 v->PrefetchLinesY[i][j][k] = CalculatePrefetchSourceLines(
4453 mode_lib,
4454 v->VRatio[k],
4455 v->vtaps[k],
4456 v->Interlace[k],
4457 v->ProgressiveToInterlaceUnitInOPP,
4458 v->SwathHeightYThisState[k],
4459 v->ViewportYStartY[k],
4460 &v->PrefillY[k],
4461 &v->MaxNumSwY[k]);
4462 v->PDEAndMetaPTEBytesPerFrame[i][j][k] = v->PDEAndMetaPTEBytesPerFrameY + v->PDEAndMetaPTEBytesPerFrameC;
4463 v->MetaRowBytes[i][j][k] = v->MetaRowBytesY + v->MetaRowBytesC;
4464 v->DPTEBytesPerRow[i][j][k] = v->DPTEBytesPerRowY + v->DPTEBytesPerRowC;
4465
4466 CalculateRowBandwidth(
4467 v->GPUVMEnable,
4468 v->SourcePixelFormat[k],
4469 v->VRatio[k],
4470 v->VRatioChroma[k],
4471 v->DCCEnable[k],
4472 v->HTotal[k] / v->PixelClock[k],
4473 v->MetaRowBytesY,
4474 v->MetaRowBytesC,
4475 v->meta_row_height[k],
4476 v->meta_row_height_chroma[k],
4477 v->DPTEBytesPerRowY,
4478 v->DPTEBytesPerRowC,
4479 v->dpte_row_height[k],
4480 v->dpte_row_height_chroma[k],
4481 &v->meta_row_bandwidth[i][j][k],
4482 &v->dpte_row_bandwidth[i][j][k]);
4483 }
4484 v->UrgLatency[i] = CalculateUrgentLatency(
4485 v->UrgentLatencyPixelDataOnly,
4486 v->UrgentLatencyPixelMixedWithVMData,
4487 v->UrgentLatencyVMDataOnly,
4488 v->DoUrgentLatencyAdjustment,
4489 v->UrgentLatencyAdjustmentFabricClockComponent,
4490 v->UrgentLatencyAdjustmentFabricClockReference,
4491 v->FabricClockPerState[i]);
4492
4493 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4494 CalculateUrgentBurstFactor(
4495 v->swath_width_luma_ub_this_state[k],
4496 v->swath_width_chroma_ub_this_state[k],
4497 v->DETBufferSizeInKByte[0],
4498 v->SwathHeightYThisState[k],
4499 v->SwathHeightCThisState[k],
4500 v->HTotal[k] / v->PixelClock[k],
4501 v->UrgLatency[i],
4502 v->CursorBufferSize,
4503 v->CursorWidth[k][0],
4504 v->CursorBPP[k][0],
4505 v->VRatio[k],
4506 v->VRatioChroma[k],
4507 v->BytePerPixelInDETY[k],
4508 v->BytePerPixelInDETC[k],
4509 v->DETBufferSizeYThisState[k],
4510 v->DETBufferSizeCThisState[k],
4511 &v->UrgentBurstFactorCursor[k],
4512 &v->UrgentBurstFactorLuma[k],
4513 &v->UrgentBurstFactorChroma[k],
4514 &NotUrgentLatencyHiding[k]);
4515 }
4516
4517 v->NotUrgentLatencyHiding[i][j] = false;
4518 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4519 if (NotUrgentLatencyHiding[k]) {
4520 v->NotUrgentLatencyHiding[i][j] = true;
4521 }
4522 }
4523
4524 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4525 v->VActivePixelBandwidth[i][j][k] = v->ReadBandwidthLuma[k] * v->UrgentBurstFactorLuma[k]
4526 + v->ReadBandwidthChroma[k] * v->UrgentBurstFactorChroma[k];
4527 v->VActiveCursorBandwidth[i][j][k] = v->cursor_bw[k] * v->UrgentBurstFactorCursor[k];
4528 }
4529
4530 v->TotalVActivePixelBandwidth[i][j] = 0;
4531 v->TotalVActiveCursorBandwidth[i][j] = 0;
4532 v->TotalMetaRowBandwidth[i][j] = 0;
4533 v->TotalDPTERowBandwidth[i][j] = 0;
4534 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4535 v->TotalVActivePixelBandwidth[i][j] = v->TotalVActivePixelBandwidth[i][j] + v->VActivePixelBandwidth[i][j][k];
4536 v->TotalVActiveCursorBandwidth[i][j] = v->TotalVActiveCursorBandwidth[i][j] + v->VActiveCursorBandwidth[i][j][k];
4537 v->TotalMetaRowBandwidth[i][j] = v->TotalMetaRowBandwidth[i][j] + v->NoOfDPP[i][j][k] * v->meta_row_bandwidth[i][j][k];
4538 v->TotalDPTERowBandwidth[i][j] = v->TotalDPTERowBandwidth[i][j] + v->NoOfDPP[i][j][k] * v->dpte_row_bandwidth[i][j][k];
4539 }
4540
4541 CalculateDCFCLKDeepSleep(
4542 mode_lib,
4543 v->NumberOfActivePlanes,
4544 v->BytePerPixelY,
4545 v->BytePerPixelC,
4546 v->VRatio,
4547 v->VRatioChroma,
4548 v->SwathWidthYThisState,
4549 v->SwathWidthCThisState,
4550 v->NoOfDPPThisState,
4551 v->HRatio,
4552 v->HRatioChroma,
4553 v->PixelClock,
4554 v->PSCL_FACTOR,
4555 v->PSCL_FACTOR_CHROMA,
4556 v->RequiredDPPCLKThisState,
4557 v->ReadBandwidthLuma,
4558 v->ReadBandwidthChroma,
4559 v->ReturnBusWidth,
4560 &v->ProjectedDCFCLKDeepSleep[i][j]);
4561 }
4562 }
4563
4564 //Calculate Return BW
4565
4566 for (i = start_state; i < mode_lib->soc.num_states; ++i) {
4567 for (j = 0; j <= 1; ++j) {
4568 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4569 if (v->BlendingAndTiming[k] == k) {
4570 if (v->WritebackEnable[k] == true) {
4571 v->WritebackDelayTime[k] = v->WritebackLatency
4572 + CalculateWriteBackDelay(
4573 v->WritebackPixelFormat[k],
4574 v->WritebackHRatio[k],
4575 v->WritebackVRatio[k],
4576 v->WritebackVTaps[k],
4577 v->WritebackDestinationWidth[k],
4578 v->WritebackDestinationHeight[k],
4579 v->WritebackSourceHeight[k],
4580 v->HTotal[k]) / v->RequiredDISPCLK[i][j];
4581 } else {
4582 v->WritebackDelayTime[k] = 0.0;
4583 }
4584 for (m = 0; m <= v->NumberOfActivePlanes - 1; m++) {
4585 if (v->BlendingAndTiming[m] == k && v->WritebackEnable[m] == true) {
4586 v->WritebackDelayTime[k] = dml_max(
4587 v->WritebackDelayTime[k],
4588 v->WritebackLatency
4589 + CalculateWriteBackDelay(
4590 v->WritebackPixelFormat[m],
4591 v->WritebackHRatio[m],
4592 v->WritebackVRatio[m],
4593 v->WritebackVTaps[m],
4594 v->WritebackDestinationWidth[m],
4595 v->WritebackDestinationHeight[m],
4596 v->WritebackSourceHeight[m],
4597 v->HTotal[m]) / v->RequiredDISPCLK[i][j]);
4598 }
4599 }
4600 }
4601 }
4602 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4603 for (m = 0; m <= v->NumberOfActivePlanes - 1; m++) {
4604 if (v->BlendingAndTiming[k] == m) {
4605 v->WritebackDelayTime[k] = v->WritebackDelayTime[m];
4606 }
4607 }
4608 }
4609 v->MaxMaxVStartup[i][j] = 0;
4610 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4611 v->MaximumVStartup[i][j][k] = v->VTotal[k] - v->VActive[k]
4612 - dml_max(1.0, dml_ceil(1.0 * v->WritebackDelayTime[k] / (v->HTotal[k] / v->PixelClock[k]), 1.0));
4613 v->MaxMaxVStartup[i][j] = dml_max(v->MaxMaxVStartup[i][j], v->MaximumVStartup[i][j][k]);
4614 }
4615 }
4616 }
4617
4618 ReorderingBytes = v->NumberOfChannels
4619 * dml_max3(
4620 v->UrgentOutOfOrderReturnPerChannelPixelDataOnly,
4621 v->UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData,
4622 v->UrgentOutOfOrderReturnPerChannelVMDataOnly);
4623 v->FinalDRAMClockChangeLatency = (v->DRAMClockChangeLatencyOverride > 0 ? v->DRAMClockChangeLatencyOverride : v->DRAMClockChangeLatency);
4624
4625 for (i = start_state; i < mode_lib->soc.num_states; ++i) {
4626 for (j = 0; j <= 1; ++j) {
4627 v->DCFCLKState[i][j] = v->DCFCLKPerState[i];
4628 }
4629 }
4630
4631 if (v->UseMinimumRequiredDCFCLK == true) {
4632 UseMinimumDCFCLK(mode_lib, v, MaxPrefetchMode, ReorderingBytes);
4633
4634 if (v->ClampMinDCFCLK) {
4635 /* Clamp calculated values to actual minimum */
4636 for (i = start_state; i < mode_lib->soc.num_states; ++i) {
4637 for (j = 0; j <= 1; ++j) {
4638 if (v->DCFCLKState[i][j] < mode_lib->soc.min_dcfclk) {
4639 v->DCFCLKState[i][j] = mode_lib->soc.min_dcfclk;
4640 }
4641 }
4642 }
4643 }
4644 }
4645
4646 for (i = start_state; i < mode_lib->soc.num_states; ++i) {
4647 for (j = 0; j <= 1; ++j) {
4648 v->IdealSDPPortBandwidthPerState[i][j] = dml_min3(
4649 v->ReturnBusWidth * v->DCFCLKState[i][j],
4650 v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth,
4651 v->FabricClockPerState[i] * v->FabricDatapathToDCNDataReturn);
4652 if (v->HostVMEnable != true) {
4653 v->ReturnBWPerState[i][j] = v->IdealSDPPortBandwidthPerState[i][j] * v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelDataOnly
4654 / 100;
4655 } else {
4656 v->ReturnBWPerState[i][j] = v->IdealSDPPortBandwidthPerState[i][j]
4657 * v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData / 100;
4658 }
4659 }
4660 }
4661
4662 //Re-ordering Buffer Support Check
4663
4664 for (i = start_state; i < mode_lib->soc.num_states; ++i) {
4665 for (j = 0; j <= 1; ++j) {
4666 if ((v->ROBBufferSizeInKByte - v->PixelChunkSizeInKByte) * 1024 / v->ReturnBWPerState[i][j]
4667 > (v->RoundTripPingLatencyCycles + 32) / v->DCFCLKState[i][j] + ReorderingBytes / v->ReturnBWPerState[i][j]) {
4668 v->ROBSupport[i][j] = true;
4669 } else {
4670 v->ROBSupport[i][j] = false;
4671 }
4672 }
4673 }
4674
4675 //Vertical Active BW support check
4676
4677 MaxTotalVActiveRDBandwidth = 0;
4678 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4679 MaxTotalVActiveRDBandwidth = MaxTotalVActiveRDBandwidth + v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k];
4680 }
4681
4682 for (i = start_state; i < mode_lib->soc.num_states; ++i) {
4683 for (j = 0; j <= 1; ++j) {
4684 v->MaxTotalVerticalActiveAvailableBandwidth[i][j] = dml_min(
4685 v->IdealSDPPortBandwidthPerState[i][j] * v->MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation / 100,
4686 v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth * v->MaxAveragePercentOfIdealDRAMBWDisplayCanUseInNormalSystemOperation
4687 / 100);
4688 if (MaxTotalVActiveRDBandwidth <= v->MaxTotalVerticalActiveAvailableBandwidth[i][j]) {
4689 v->TotalVerticalActiveBandwidthSupport[i][j] = true;
4690 } else {
4691 v->TotalVerticalActiveBandwidthSupport[i][j] = false;
4692 }
4693 }
4694 }
4695
4696 //Prefetch Check
4697
4698 for (i = start_state; i < mode_lib->soc.num_states; ++i) {
4699 for (j = 0; j <= 1; ++j) {
4700 int NextPrefetchModeState = MinPrefetchMode;
4701
4702 v->TimeCalc = 24 / v->ProjectedDCFCLKDeepSleep[i][j];
4703
4704 v->BandwidthWithoutPrefetchSupported[i][j] = true;
4705 if (v->TotalVActivePixelBandwidth[i][j] + v->TotalVActiveCursorBandwidth[i][j] + v->TotalMetaRowBandwidth[i][j] + v->TotalDPTERowBandwidth[i][j]
4706 > v->ReturnBWPerState[i][j] || v->NotUrgentLatencyHiding[i][j]) {
4707 v->BandwidthWithoutPrefetchSupported[i][j] = false;
4708 }
4709
4710 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4711 v->NoOfDPPThisState[k] = v->NoOfDPP[i][j][k];
4712 v->swath_width_luma_ub_this_state[k] = v->swath_width_luma_ub_all_states[i][j][k];
4713 v->swath_width_chroma_ub_this_state[k] = v->swath_width_chroma_ub_all_states[i][j][k];
4714 v->SwathWidthYThisState[k] = v->SwathWidthYAllStates[i][j][k];
4715 v->SwathWidthCThisState[k] = v->SwathWidthCAllStates[i][j][k];
4716 v->SwathHeightYThisState[k] = v->SwathHeightYAllStates[i][j][k];
4717 v->SwathHeightCThisState[k] = v->SwathHeightCAllStates[i][j][k];
4718 v->DETBufferSizeYThisState[k] = v->DETBufferSizeYAllStates[i][j][k];
4719 v->DETBufferSizeCThisState[k] = v->DETBufferSizeCAllStates[i][j][k];
4720 v->ODMCombineEnabled[k] = v->ODMCombineEnablePerState[i][k];
4721 }
4722
4723 v->ExtraLatency = CalculateExtraLatency(
4724 v->RoundTripPingLatencyCycles,
4725 ReorderingBytes,
4726 v->DCFCLKState[i][j],
4727 v->TotalNumberOfActiveDPP[i][j],
4728 v->PixelChunkSizeInKByte,
4729 v->TotalNumberOfDCCActiveDPP[i][j],
4730 v->MetaChunkSize,
4731 v->ReturnBWPerState[i][j],
4732 v->GPUVMEnable,
4733 v->HostVMEnable,
4734 v->NumberOfActivePlanes,
4735 v->NoOfDPPThisState,
4736 v->dpte_group_bytes,
4737 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
4738 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
4739 v->HostVMMinPageSize,
4740 v->HostVMMaxNonCachedPageTableLevels);
4741
4742 v->NextMaxVStartup = v->MaxMaxVStartup[i][j];
4743 do {
4744 v->PrefetchModePerState[i][j] = NextPrefetchModeState;
4745 v->MaxVStartup = v->NextMaxVStartup;
4746
4747 v->TWait = CalculateTWait(v->PrefetchModePerState[i][j], v->FinalDRAMClockChangeLatency, v->UrgLatency[i], v->SREnterPlusExitTime);
4748
4749 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4750 Pipe myPipe = { 0 };
4751
4752 myPipe.DPPCLK = v->RequiredDPPCLK[i][j][k];
4753 myPipe.DISPCLK = v->RequiredDISPCLK[i][j];
4754 myPipe.PixelClock = v->PixelClock[k];
4755 myPipe.DCFCLKDeepSleep = v->ProjectedDCFCLKDeepSleep[i][j];
4756 myPipe.DPPPerPlane = v->NoOfDPP[i][j][k];
4757 myPipe.ScalerEnabled = v->ScalerEnabled[k];
4758 myPipe.SourceScan = v->SourceScan[k];
4759 myPipe.BlockWidth256BytesY = v->Read256BlockWidthY[k];
4760 myPipe.BlockHeight256BytesY = v->Read256BlockHeightY[k];
4761 myPipe.BlockWidth256BytesC = v->Read256BlockWidthC[k];
4762 myPipe.BlockHeight256BytesC = v->Read256BlockHeightC[k];
4763 myPipe.InterlaceEnable = v->Interlace[k];
4764 myPipe.NumberOfCursors = v->NumberOfCursors[k];
4765 myPipe.VBlank = v->VTotal[k] - v->VActive[k];
4766 myPipe.HTotal = v->HTotal[k];
4767 myPipe.DCCEnable = v->DCCEnable[k];
4768 myPipe.ODMCombineEnabled = !!v->ODMCombineEnabled[k];
4769
4770 v->NoTimeForPrefetch[i][j][k] = CalculatePrefetchSchedule(
4771 mode_lib,
4772 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
4773 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
4774 &myPipe,
4775 v->DSCDelayPerState[i][k],
4776 v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater,
4777 v->DPPCLKDelaySCL,
4778 v->DPPCLKDelaySCLLBOnly,
4779 v->DPPCLKDelayCNVCCursor,
4780 v->DISPCLKDelaySubtotal,
4781 v->SwathWidthYThisState[k] / v->HRatio[k],
4782 v->OutputFormat[k],
4783 v->MaxInterDCNTileRepeaters,
4784 dml_min(v->MaxVStartup, v->MaximumVStartup[i][j][k]),
4785 v->MaximumVStartup[i][j][k],
4786 v->GPUVMMaxPageTableLevels,
4787 v->GPUVMEnable,
4788 v->HostVMEnable,
4789 v->HostVMMaxNonCachedPageTableLevels,
4790 v->HostVMMinPageSize,
4791 v->DynamicMetadataEnable[k],
4792 v->DynamicMetadataVMEnabled,
4793 v->DynamicMetadataLinesBeforeActiveRequired[k],
4794 v->DynamicMetadataTransmittedBytes[k],
4795 v->UrgLatency[i],
4796 v->ExtraLatency,
4797 v->TimeCalc,
4798 v->PDEAndMetaPTEBytesPerFrame[i][j][k],
4799 v->MetaRowBytes[i][j][k],
4800 v->DPTEBytesPerRow[i][j][k],
4801 v->PrefetchLinesY[i][j][k],
4802 v->SwathWidthYThisState[k],
4803 v->BytePerPixelY[k],
4804 v->PrefillY[k],
4805 v->MaxNumSwY[k],
4806 v->PrefetchLinesC[i][j][k],
4807 v->SwathWidthCThisState[k],
4808 v->BytePerPixelC[k],
4809 v->PrefillC[k],
4810 v->MaxNumSwC[k],
4811 v->swath_width_luma_ub_this_state[k],
4812 v->swath_width_chroma_ub_this_state[k],
4813 v->SwathHeightYThisState[k],
4814 v->SwathHeightCThisState[k],
4815 v->TWait,
4816 v->ProgressiveToInterlaceUnitInOPP,
4817 &v->DSTXAfterScaler[k],
4818 &v->DSTYAfterScaler[k],
4819 &v->LineTimesForPrefetch[k],
4820 &v->PrefetchBW[k],
4821 &v->LinesForMetaPTE[k],
4822 &v->LinesForMetaAndDPTERow[k],
4823 &v->VRatioPreY[i][j][k],
4824 &v->VRatioPreC[i][j][k],
4825 &v->RequiredPrefetchPixelDataBWLuma[i][j][k],
4826 &v->RequiredPrefetchPixelDataBWChroma[i][j][k],
4827 &v->NoTimeForDynamicMetadata[i][j][k],
4828 &v->Tno_bw[k],
4829 &v->prefetch_vmrow_bw[k],
4830 &v->Tdmdl_vm[k],
4831 &v->Tdmdl[k],
4832 &v->VUpdateOffsetPix[k],
4833 &v->VUpdateWidthPix[k],
4834 &v->VReadyOffsetPix[k]);
4835 }
4836
4837 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4838 CalculateUrgentBurstFactor(
4839 v->swath_width_luma_ub_this_state[k],
4840 v->swath_width_chroma_ub_this_state[k],
4841 v->DETBufferSizeInKByte[0],
4842 v->SwathHeightYThisState[k],
4843 v->SwathHeightCThisState[k],
4844 v->HTotal[k] / v->PixelClock[k],
4845 v->UrgLatency[i],
4846 v->CursorBufferSize,
4847 v->CursorWidth[k][0],
4848 v->CursorBPP[k][0],
4849 v->VRatioPreY[i][j][k],
4850 v->VRatioPreC[i][j][k],
4851 v->BytePerPixelInDETY[k],
4852 v->BytePerPixelInDETC[k],
4853 v->DETBufferSizeYThisState[k],
4854 v->DETBufferSizeCThisState[k],
4855 &v->UrgentBurstFactorCursorPre[k],
4856 &v->UrgentBurstFactorLumaPre[k],
4857 &v->UrgentBurstFactorChromaPre[k],
4858 &v->NoUrgentLatencyHidingPre[k]);
4859 }
4860
4861 v->MaximumReadBandwidthWithPrefetch = 0.0;
4862 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4863 v->cursor_bw_pre[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0 / (v->HTotal[k] / v->PixelClock[k])
4864 * v->VRatioPreY[i][j][k];
4865
4866 v->MaximumReadBandwidthWithPrefetch = v->MaximumReadBandwidthWithPrefetch
4867 + dml_max4(
4868 v->VActivePixelBandwidth[i][j][k],
4869 v->VActiveCursorBandwidth[i][j][k]
4870 + v->NoOfDPP[i][j][k] * (v->meta_row_bandwidth[i][j][k] + v->dpte_row_bandwidth[i][j][k]),
4871 v->NoOfDPP[i][j][k] * v->prefetch_vmrow_bw[k],
4872 v->NoOfDPP[i][j][k]
4873 * (v->RequiredPrefetchPixelDataBWLuma[i][j][k] * v->UrgentBurstFactorLumaPre[k]
4874 + v->RequiredPrefetchPixelDataBWChroma[i][j][k]
4875 * v->UrgentBurstFactorChromaPre[k])
4876 + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
4877 }
4878
4879 v->NotEnoughUrgentLatencyHidingPre = false;
4880 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4881 if (v->NoUrgentLatencyHidingPre[k] == true) {
4882 v->NotEnoughUrgentLatencyHidingPre = true;
4883 }
4884 }
4885
4886 v->PrefetchSupported[i][j] = true;
4887 if (v->BandwidthWithoutPrefetchSupported[i][j] == false || v->MaximumReadBandwidthWithPrefetch > v->ReturnBWPerState[i][j]
4888 || v->NotEnoughUrgentLatencyHidingPre == 1) {
4889 v->PrefetchSupported[i][j] = false;
4890 }
4891 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4892 if (v->LineTimesForPrefetch[k] < 2.0 || v->LinesForMetaPTE[k] >= 32.0 || v->LinesForMetaAndDPTERow[k] >= 16.0
4893 || v->NoTimeForPrefetch[i][j][k] == true) {
4894 v->PrefetchSupported[i][j] = false;
4895 }
4896 }
4897
4898 v->DynamicMetadataSupported[i][j] = true;
4899 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4900 if (v->NoTimeForDynamicMetadata[i][j][k] == true) {
4901 v->DynamicMetadataSupported[i][j] = false;
4902 }
4903 }
4904
4905 v->VRatioInPrefetchSupported[i][j] = true;
4906 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4907 if (v->VRatioPreY[i][j][k] > 4.0 || v->VRatioPreC[i][j][k] > 4.0 || v->NoTimeForPrefetch[i][j][k] == true) {
4908 v->VRatioInPrefetchSupported[i][j] = false;
4909 }
4910 }
4911 v->AnyLinesForVMOrRowTooLarge = false;
4912 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4913 if (v->LinesForMetaAndDPTERow[k] >= 16 || v->LinesForMetaPTE[k] >= 32) {
4914 v->AnyLinesForVMOrRowTooLarge = true;
4915 }
4916 }
4917
4918 if (v->PrefetchSupported[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true) {
4919 v->BandwidthAvailableForImmediateFlip = v->ReturnBWPerState[i][j];
4920 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4921 v->BandwidthAvailableForImmediateFlip = v->BandwidthAvailableForImmediateFlip
4922 - dml_max(
4923 v->VActivePixelBandwidth[i][j][k] + v->VActiveCursorBandwidth[i][j][k],
4924 v->NoOfDPP[i][j][k]
4925 * (v->RequiredPrefetchPixelDataBWLuma[i][j][k] * v->UrgentBurstFactorLumaPre[k]
4926 + v->RequiredPrefetchPixelDataBWChroma[i][j][k]
4927 * v->UrgentBurstFactorChromaPre[k])
4928 + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
4929 }
4930 v->TotImmediateFlipBytes = 0.0;
4931 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4932 v->TotImmediateFlipBytes = v->TotImmediateFlipBytes + v->NoOfDPP[i][j][k] * (v->PDEAndMetaPTEBytesPerFrame[i][j][k]
4933 + v->MetaRowBytes[i][j][k] + v->DPTEBytesPerRow[i][j][k]);
4934 }
4935
4936 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4937 CalculateFlipSchedule(
4938 mode_lib,
4939 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
4940 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
4941 v->ExtraLatency,
4942 v->UrgLatency[i],
4943 v->GPUVMMaxPageTableLevels,
4944 v->HostVMEnable,
4945 v->HostVMMaxNonCachedPageTableLevels,
4946 v->GPUVMEnable,
4947 v->HostVMMinPageSize,
4948 v->PDEAndMetaPTEBytesPerFrame[i][j][k],
4949 v->MetaRowBytes[i][j][k],
4950 v->DPTEBytesPerRow[i][j][k],
4951 v->BandwidthAvailableForImmediateFlip,
4952 v->TotImmediateFlipBytes,
4953 v->SourcePixelFormat[k],
4954 v->HTotal[k] / v->PixelClock[k],
4955 v->VRatio[k],
4956 v->VRatioChroma[k],
4957 v->Tno_bw[k],
4958 v->DCCEnable[k],
4959 v->dpte_row_height[k],
4960 v->meta_row_height[k],
4961 v->dpte_row_height_chroma[k],
4962 v->meta_row_height_chroma[k],
4963 &v->DestinationLinesToRequestVMInImmediateFlip[k],
4964 &v->DestinationLinesToRequestRowInImmediateFlip[k],
4965 &v->final_flip_bw[k],
4966 &v->ImmediateFlipSupportedForPipe[k]);
4967 }
4968 v->total_dcn_read_bw_with_flip = 0.0;
4969 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4970 v->total_dcn_read_bw_with_flip = v->total_dcn_read_bw_with_flip
4971 + dml_max3(
4972 v->NoOfDPP[i][j][k] * v->prefetch_vmrow_bw[k],
4973 v->NoOfDPP[i][j][k] * v->final_flip_bw[k] + v->VActivePixelBandwidth[i][j][k]
4974 + v->VActiveCursorBandwidth[i][j][k],
4975 v->NoOfDPP[i][j][k]
4976 * (v->final_flip_bw[k]
4977 + v->RequiredPrefetchPixelDataBWLuma[i][j][k]
4978 * v->UrgentBurstFactorLumaPre[k]
4979 + v->RequiredPrefetchPixelDataBWChroma[i][j][k]
4980 * v->UrgentBurstFactorChromaPre[k])
4981 + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
4982 }
4983 v->ImmediateFlipSupportedForState[i][j] = true;
4984 if (v->total_dcn_read_bw_with_flip > v->ReturnBWPerState[i][j]) {
4985 v->ImmediateFlipSupportedForState[i][j] = false;
4986 }
4987 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4988 if (v->ImmediateFlipSupportedForPipe[k] == false) {
4989 v->ImmediateFlipSupportedForState[i][j] = false;
4990 }
4991 }
4992 } else {
4993 v->ImmediateFlipSupportedForState[i][j] = false;
4994 }
4995 if (v->MaxVStartup <= 13 || v->AnyLinesForVMOrRowTooLarge == false) {
4996 v->NextMaxVStartup = v->MaxMaxVStartup[i][j];
4997 NextPrefetchModeState = NextPrefetchModeState + 1;
4998 } else {
4999 v->NextMaxVStartup = v->NextMaxVStartup - 1;
5000 }
5001 } while (!((v->PrefetchSupported[i][j] == true && v->DynamicMetadataSupported[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true
5002 && ((v->HostVMEnable == false && v->ImmediateFlipRequirement[0] != dm_immediate_flip_required)
5003 || v->ImmediateFlipSupportedForState[i][j] == true))
5004 || (v->NextMaxVStartup == v->MaxMaxVStartup[i][j] && NextPrefetchModeState > MaxPrefetchMode)));
5005
5006 CalculateWatermarksAndDRAMSpeedChangeSupport(
5007 mode_lib,
5008 v->PrefetchModePerState[i][j],
5009 v->NumberOfActivePlanes,
5010 v->MaxLineBufferLines,
5011 v->LineBufferSize,
5012 v->DPPOutputBufferPixels,
5013 v->DETBufferSizeInKByte[0],
5014 v->WritebackInterfaceBufferSize,
5015 v->DCFCLKState[i][j],
5016 v->ReturnBWPerState[i][j],
5017 v->GPUVMEnable,
5018 v->dpte_group_bytes,
5019 v->MetaChunkSize,
5020 v->UrgLatency[i],
5021 v->ExtraLatency,
5022 v->WritebackLatency,
5023 v->WritebackChunkSize,
5024 v->SOCCLKPerState[i],
5025 v->FinalDRAMClockChangeLatency,
5026 v->SRExitTime,
5027 v->SREnterPlusExitTime,
5028 v->ProjectedDCFCLKDeepSleep[i][j],
5029 v->NoOfDPPThisState,
5030 v->DCCEnable,
5031 v->RequiredDPPCLKThisState,
5032 v->DETBufferSizeYThisState,
5033 v->DETBufferSizeCThisState,
5034 v->SwathHeightYThisState,
5035 v->SwathHeightCThisState,
5036 v->LBBitPerPixel,
5037 v->SwathWidthYThisState,
5038 v->SwathWidthCThisState,
5039 v->HRatio,
5040 v->HRatioChroma,
5041 v->vtaps,
5042 v->VTAPsChroma,
5043 v->VRatio,
5044 v->VRatioChroma,
5045 v->HTotal,
5046 v->PixelClock,
5047 v->BlendingAndTiming,
5048 v->BytePerPixelInDETY,
5049 v->BytePerPixelInDETC,
5050 v->DSTXAfterScaler,
5051 v->DSTYAfterScaler,
5052 v->WritebackEnable,
5053 v->WritebackPixelFormat,
5054 v->WritebackDestinationWidth,
5055 v->WritebackDestinationHeight,
5056 v->WritebackSourceHeight,
5057 &v->DRAMClockChangeSupport[i][j],
5058 &v->UrgentWatermark,
5059 &v->WritebackUrgentWatermark,
5060 &v->DRAMClockChangeWatermark,
5061 &v->WritebackDRAMClockChangeWatermark,
5062 &v->StutterExitWatermark,
5063 &v->StutterEnterPlusExitWatermark,
5064 &v->MinActiveDRAMClockChangeLatencySupported);
5065 }
5066 }
5067
5068 /*PTE Buffer Size Check*/
5069
5070 for (i = start_state; i < v->soc.num_states; i++) {
5071 for (j = 0; j < 2; j++) {
5072 v->PTEBufferSizeNotExceeded[i][j] = true;
5073 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
5074 if (v->PTEBufferSizeNotExceededY[i][j][k] == false || v->PTEBufferSizeNotExceededC[i][j][k] == false) {
5075 v->PTEBufferSizeNotExceeded[i][j] = false;
5076 }
5077 }
5078 }
5079 }
5080 /*Cursor Support Check*/
5081
5082 v->CursorSupport = true;
5083 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
5084 if (v->CursorWidth[k][0] > 0.0) {
5085 if (v->CursorBPP[k][0] == 64 && v->Cursor64BppSupport == false) {
5086 v->CursorSupport = false;
5087 }
5088 }
5089 }
5090 /*Valid Pitch Check*/
5091
5092 v->PitchSupport = true;
5093 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
5094 v->AlignedYPitch[k] = dml_ceil(dml_max(v->PitchY[k], v->SurfaceWidthY[k]), v->MacroTileWidthY[k]);
5095 if (v->DCCEnable[k] == true) {
5096 v->AlignedDCCMetaPitchY[k] = dml_ceil(dml_max(v->DCCMetaPitchY[k], v->SurfaceWidthY[k]), 64.0 * v->Read256BlockWidthY[k]);
5097 } else {
5098 v->AlignedDCCMetaPitchY[k] = v->DCCMetaPitchY[k];
5099 }
5100 if (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32 && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_mono_16
5101 && v->SourcePixelFormat[k] != dm_rgbe && v->SourcePixelFormat[k] != dm_mono_8) {
5102 v->AlignedCPitch[k] = dml_ceil(dml_max(v->PitchC[k], v->SurfaceWidthC[k]), v->MacroTileWidthC[k]);
5103 if (v->DCCEnable[k] == true) {
5104 v->AlignedDCCMetaPitchC[k] = dml_ceil(dml_max(v->DCCMetaPitchC[k], v->SurfaceWidthC[k]), 64.0 * v->Read256BlockWidthC[k]);
5105 } else {
5106 v->AlignedDCCMetaPitchC[k] = v->DCCMetaPitchC[k];
5107 }
5108 } else {
5109 v->AlignedCPitch[k] = v->PitchC[k];
5110 v->AlignedDCCMetaPitchC[k] = v->DCCMetaPitchC[k];
5111 }
5112 if (v->AlignedYPitch[k] > v->PitchY[k] || v->AlignedCPitch[k] > v->PitchC[k] || v->AlignedDCCMetaPitchY[k] > v->DCCMetaPitchY[k]
5113 || v->AlignedDCCMetaPitchC[k] > v->DCCMetaPitchC[k]) {
5114 v->PitchSupport = false;
5115 }
5116 }
5117
5118 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
5119 if (v->ViewportWidth[k] > v->SurfaceWidthY[k] || v->ViewportHeight[k] > v->SurfaceHeightY[k])
5120 ViewportExceedsSurface = true;
5121
5122 if (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32 && v->SourcePixelFormat[k] != dm_444_16
5123 && v->SourcePixelFormat[k] != dm_444_8 && v->SourcePixelFormat[k] != dm_rgbe) {
5124 if (v->ViewportWidthChroma[k] > v->SurfaceWidthC[k] || v->ViewportHeightChroma[k] > v->SurfaceHeightC[k]) {
5125 ViewportExceedsSurface = true;
5126 }
5127 }
5128 }
5129 /*Mode Support, Voltage State and SOC Configuration*/
5130
5131 for (i = v->soc.num_states - 1; i >= start_state; i--) {
5132 for (j = 0; j < 2; j++) {
5133 if (v->ScaleRatioAndTapsSupport == 1 && v->SourceFormatPixelAndScanSupport == 1 && v->ViewportSizeSupport[i][j] == 1
5134 && v->DIOSupport[i] == 1 && v->ODMCombine4To1SupportCheckOK[i] == 1
5135 && v->NotEnoughDSCUnits[i] == 0
5136 && v->DTBCLKRequiredMoreThanSupported[i] == 0
5137 && v->ROBSupport[i][j] == 1 && v->DISPCLK_DPPCLK_Support[i][j] == 1 && v->TotalAvailablePipesSupport[i][j] == 1
5138 && EnoughWritebackUnits == 1 && WritebackModeSupport == 1
5139 && v->WritebackLatencySupport == 1 && v->WritebackScaleRatioAndTapsSupport == 1 && v->CursorSupport == 1 && v->PitchSupport == 1
5140 && ViewportExceedsSurface == 0 && v->PrefetchSupported[i][j] == 1 && v->DynamicMetadataSupported[i][j] == 1
5141 && v->TotalVerticalActiveBandwidthSupport[i][j] == 1 && v->VRatioInPrefetchSupported[i][j] == 1
5142 && v->PTEBufferSizeNotExceeded[i][j] == 1 && v->NonsupportedDSCInputBPC == 0
5143 && ((v->HostVMEnable == 0 && v->ImmediateFlipRequirement[0] != dm_immediate_flip_required)
5144 || v->ImmediateFlipSupportedForState[i][j] == true)) {
5145 v->ModeSupport[i][j] = true;
5146 } else {
5147 v->ModeSupport[i][j] = false;
5148 }
5149 }
5150 }
5151 {
5152 unsigned int MaximumMPCCombine = 0;
5153 for (i = v->soc.num_states; i >= start_state; i--) {
5154 if (i == v->soc.num_states || v->ModeSupport[i][0] == true || v->ModeSupport[i][1] == true) {
5155 v->VoltageLevel = i;
5156 v->ModeIsSupported = v->ModeSupport[i][0] == true || v->ModeSupport[i][1] == true;
5157 if (v->ModeSupport[i][1] == true) {
5158 MaximumMPCCombine = 1;
5159 } else {
5160 MaximumMPCCombine = 0;
5161 }
5162 }
5163 }
5164 v->ImmediateFlipSupport = v->ImmediateFlipSupportedForState[v->VoltageLevel][MaximumMPCCombine];
5165 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
5166 v->MPCCombineEnable[k] = v->MPCCombine[v->VoltageLevel][MaximumMPCCombine][k];
5167 v->DPPPerPlane[k] = v->NoOfDPP[v->VoltageLevel][MaximumMPCCombine][k];
5168 }
5169 v->DCFCLK = v->DCFCLKState[v->VoltageLevel][MaximumMPCCombine];
5170 v->DRAMSpeed = v->DRAMSpeedPerState[v->VoltageLevel];
5171 v->FabricClock = v->FabricClockPerState[v->VoltageLevel];
5172 v->SOCCLK = v->SOCCLKPerState[v->VoltageLevel];
5173 v->ReturnBW = v->ReturnBWPerState[v->VoltageLevel][MaximumMPCCombine];
5174 v->maxMpcComb = MaximumMPCCombine;
5175 }
5176 }
5177
CalculateWatermarksAndDRAMSpeedChangeSupport(struct display_mode_lib * mode_lib,unsigned int PrefetchMode,unsigned int NumberOfActivePlanes,unsigned int MaxLineBufferLines,unsigned int LineBufferSize,unsigned int DPPOutputBufferPixels,unsigned int DETBufferSizeInKByte,unsigned int WritebackInterfaceBufferSize,double DCFCLK,double ReturnBW,bool GPUVMEnable,unsigned int dpte_group_bytes[],unsigned int MetaChunkSize,double UrgentLatency,double ExtraLatency,double WritebackLatency,double WritebackChunkSize,double SOCCLK,double DRAMClockChangeLatency,double SRExitTime,double SREnterPlusExitTime,double DCFCLKDeepSleep,unsigned int DPPPerPlane[],bool DCCEnable[],double DPPCLK[],unsigned int DETBufferSizeY[],unsigned int DETBufferSizeC[],unsigned int SwathHeightY[],unsigned int SwathHeightC[],unsigned int LBBitPerPixel[],double SwathWidthY[],double SwathWidthC[],double HRatio[],double HRatioChroma[],unsigned int vtaps[],unsigned int VTAPsChroma[],double VRatio[],double VRatioChroma[],unsigned int HTotal[],double PixelClock[],unsigned int BlendingAndTiming[],double BytePerPixelDETY[],double BytePerPixelDETC[],double DSTXAfterScaler[],double DSTYAfterScaler[],bool WritebackEnable[],enum source_format_class WritebackPixelFormat[],double WritebackDestinationWidth[],double WritebackDestinationHeight[],double WritebackSourceHeight[],enum clock_change_support * DRAMClockChangeSupport,double * UrgentWatermark,double * WritebackUrgentWatermark,double * DRAMClockChangeWatermark,double * WritebackDRAMClockChangeWatermark,double * StutterExitWatermark,double * StutterEnterPlusExitWatermark,double * MinActiveDRAMClockChangeLatencySupported)5178 static void CalculateWatermarksAndDRAMSpeedChangeSupport(
5179 struct display_mode_lib *mode_lib,
5180 unsigned int PrefetchMode,
5181 unsigned int NumberOfActivePlanes,
5182 unsigned int MaxLineBufferLines,
5183 unsigned int LineBufferSize,
5184 unsigned int DPPOutputBufferPixels,
5185 unsigned int DETBufferSizeInKByte,
5186 unsigned int WritebackInterfaceBufferSize,
5187 double DCFCLK,
5188 double ReturnBW,
5189 bool GPUVMEnable,
5190 unsigned int dpte_group_bytes[],
5191 unsigned int MetaChunkSize,
5192 double UrgentLatency,
5193 double ExtraLatency,
5194 double WritebackLatency,
5195 double WritebackChunkSize,
5196 double SOCCLK,
5197 double DRAMClockChangeLatency,
5198 double SRExitTime,
5199 double SREnterPlusExitTime,
5200 double DCFCLKDeepSleep,
5201 unsigned int DPPPerPlane[],
5202 bool DCCEnable[],
5203 double DPPCLK[],
5204 unsigned int DETBufferSizeY[],
5205 unsigned int DETBufferSizeC[],
5206 unsigned int SwathHeightY[],
5207 unsigned int SwathHeightC[],
5208 unsigned int LBBitPerPixel[],
5209 double SwathWidthY[],
5210 double SwathWidthC[],
5211 double HRatio[],
5212 double HRatioChroma[],
5213 unsigned int vtaps[],
5214 unsigned int VTAPsChroma[],
5215 double VRatio[],
5216 double VRatioChroma[],
5217 unsigned int HTotal[],
5218 double PixelClock[],
5219 unsigned int BlendingAndTiming[],
5220 double BytePerPixelDETY[],
5221 double BytePerPixelDETC[],
5222 double DSTXAfterScaler[],
5223 double DSTYAfterScaler[],
5224 bool WritebackEnable[],
5225 enum source_format_class WritebackPixelFormat[],
5226 double WritebackDestinationWidth[],
5227 double WritebackDestinationHeight[],
5228 double WritebackSourceHeight[],
5229 enum clock_change_support *DRAMClockChangeSupport,
5230 double *UrgentWatermark,
5231 double *WritebackUrgentWatermark,
5232 double *DRAMClockChangeWatermark,
5233 double *WritebackDRAMClockChangeWatermark,
5234 double *StutterExitWatermark,
5235 double *StutterEnterPlusExitWatermark,
5236 double *MinActiveDRAMClockChangeLatencySupported)
5237 {
5238 double EffectiveLBLatencyHidingY = 0;
5239 double EffectiveLBLatencyHidingC = 0;
5240 double LinesInDETY[DC__NUM_DPP__MAX] = { 0 };
5241 double LinesInDETC = 0;
5242 unsigned int LinesInDETYRoundedDownToSwath[DC__NUM_DPP__MAX] = { 0 };
5243 unsigned int LinesInDETCRoundedDownToSwath = 0;
5244 double FullDETBufferingTimeY[DC__NUM_DPP__MAX] = { 0 };
5245 double FullDETBufferingTimeC = 0;
5246 double ActiveDRAMClockChangeLatencyMarginY = 0;
5247 double ActiveDRAMClockChangeLatencyMarginC = 0;
5248 double WritebackDRAMClockChangeLatencyMargin = 0;
5249 double PlaneWithMinActiveDRAMClockChangeMargin = 0;
5250 double SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = 0;
5251 double FullDETBufferingTimeYStutterCriticalPlane = 0;
5252 double TimeToFinishSwathTransferStutterCriticalPlane = 0;
5253 double WritebackDRAMClockChangeLatencyHiding = 0;
5254 unsigned int k, j;
5255
5256 mode_lib->vba.TotalActiveDPP = 0;
5257 mode_lib->vba.TotalDCCActiveDPP = 0;
5258 for (k = 0; k < NumberOfActivePlanes; ++k) {
5259 mode_lib->vba.TotalActiveDPP = mode_lib->vba.TotalActiveDPP + DPPPerPlane[k];
5260 if (DCCEnable[k] == true) {
5261 mode_lib->vba.TotalDCCActiveDPP = mode_lib->vba.TotalDCCActiveDPP + DPPPerPlane[k];
5262 }
5263 }
5264
5265 *UrgentWatermark = UrgentLatency + ExtraLatency;
5266
5267 *DRAMClockChangeWatermark = DRAMClockChangeLatency + *UrgentWatermark;
5268
5269 mode_lib->vba.TotalActiveWriteback = 0;
5270 for (k = 0; k < NumberOfActivePlanes; ++k) {
5271 if (WritebackEnable[k] == true) {
5272 mode_lib->vba.TotalActiveWriteback = mode_lib->vba.TotalActiveWriteback + 1;
5273 }
5274 }
5275
5276 if (mode_lib->vba.TotalActiveWriteback <= 1) {
5277 *WritebackUrgentWatermark = WritebackLatency;
5278 } else {
5279 *WritebackUrgentWatermark = WritebackLatency + WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
5280 }
5281
5282 if (mode_lib->vba.TotalActiveWriteback <= 1) {
5283 *WritebackDRAMClockChangeWatermark = DRAMClockChangeLatency + WritebackLatency;
5284 } else {
5285 *WritebackDRAMClockChangeWatermark = DRAMClockChangeLatency + WritebackLatency + WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
5286 }
5287
5288 for (k = 0; k < NumberOfActivePlanes; ++k) {
5289
5290 mode_lib->vba.LBLatencyHidingSourceLinesY = dml_min((double) MaxLineBufferLines, dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(HRatio[k], 1.0)), 1)) - (vtaps[k] - 1);
5291
5292 mode_lib->vba.LBLatencyHidingSourceLinesC = dml_min((double) MaxLineBufferLines, dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthC[k] / dml_max(HRatioChroma[k], 1.0)), 1)) - (VTAPsChroma[k] - 1);
5293
5294 EffectiveLBLatencyHidingY = mode_lib->vba.LBLatencyHidingSourceLinesY / VRatio[k] * (HTotal[k] / PixelClock[k]);
5295
5296 EffectiveLBLatencyHidingC = mode_lib->vba.LBLatencyHidingSourceLinesC / VRatioChroma[k] * (HTotal[k] / PixelClock[k]);
5297
5298 LinesInDETY[k] = (double) DETBufferSizeY[k] / BytePerPixelDETY[k] / SwathWidthY[k];
5299 LinesInDETYRoundedDownToSwath[k] = dml_floor(LinesInDETY[k], SwathHeightY[k]);
5300 FullDETBufferingTimeY[k] = LinesInDETYRoundedDownToSwath[k] * (HTotal[k] / PixelClock[k]) / VRatio[k];
5301 if (BytePerPixelDETC[k] > 0) {
5302 LinesInDETC = mode_lib->vba.DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k];
5303 LinesInDETCRoundedDownToSwath = dml_floor(LinesInDETC, SwathHeightC[k]);
5304 FullDETBufferingTimeC = LinesInDETCRoundedDownToSwath * (HTotal[k] / PixelClock[k]) / VRatioChroma[k];
5305 } else {
5306 LinesInDETC = 0;
5307 FullDETBufferingTimeC = 999999;
5308 }
5309
5310 ActiveDRAMClockChangeLatencyMarginY = EffectiveLBLatencyHidingY + FullDETBufferingTimeY[k] - *UrgentWatermark - (HTotal[k] / PixelClock[k]) * (DSTXAfterScaler[k] / HTotal[k] + DSTYAfterScaler[k]) - *DRAMClockChangeWatermark;
5311
5312 if (NumberOfActivePlanes > 1) {
5313 ActiveDRAMClockChangeLatencyMarginY = ActiveDRAMClockChangeLatencyMarginY - (1 - 1.0 / NumberOfActivePlanes) * SwathHeightY[k] * HTotal[k] / PixelClock[k] / VRatio[k];
5314 }
5315
5316 if (BytePerPixelDETC[k] > 0) {
5317 ActiveDRAMClockChangeLatencyMarginC = EffectiveLBLatencyHidingC + FullDETBufferingTimeC - *UrgentWatermark - (HTotal[k] / PixelClock[k]) * (DSTXAfterScaler[k] / HTotal[k] + DSTYAfterScaler[k]) - *DRAMClockChangeWatermark;
5318
5319 if (NumberOfActivePlanes > 1) {
5320 ActiveDRAMClockChangeLatencyMarginC = ActiveDRAMClockChangeLatencyMarginC - (1 - 1.0 / NumberOfActivePlanes) * SwathHeightC[k] * HTotal[k] / PixelClock[k] / VRatioChroma[k];
5321 }
5322 mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k] = dml_min(ActiveDRAMClockChangeLatencyMarginY, ActiveDRAMClockChangeLatencyMarginC);
5323 } else {
5324 mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k] = ActiveDRAMClockChangeLatencyMarginY;
5325 }
5326
5327 if (WritebackEnable[k] == true) {
5328
5329 WritebackDRAMClockChangeLatencyHiding = WritebackInterfaceBufferSize * 1024 / (WritebackDestinationWidth[k] * WritebackDestinationHeight[k] / (WritebackSourceHeight[k] * HTotal[k] / PixelClock[k]) * 4);
5330 if (WritebackPixelFormat[k] == dm_444_64) {
5331 WritebackDRAMClockChangeLatencyHiding = WritebackDRAMClockChangeLatencyHiding / 2;
5332 }
5333 if (mode_lib->vba.WritebackConfiguration == dm_whole_buffer_for_single_stream_interleave) {
5334 WritebackDRAMClockChangeLatencyHiding = WritebackDRAMClockChangeLatencyHiding * 2;
5335 }
5336 WritebackDRAMClockChangeLatencyMargin = WritebackDRAMClockChangeLatencyHiding - mode_lib->vba.WritebackDRAMClockChangeWatermark;
5337 mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k] = dml_min(mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k], WritebackDRAMClockChangeLatencyMargin);
5338 }
5339 }
5340
5341 mode_lib->vba.MinActiveDRAMClockChangeMargin = 999999;
5342 PlaneWithMinActiveDRAMClockChangeMargin = 0;
5343 for (k = 0; k < NumberOfActivePlanes; ++k) {
5344 if (mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k] < mode_lib->vba.MinActiveDRAMClockChangeMargin) {
5345 mode_lib->vba.MinActiveDRAMClockChangeMargin = mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k];
5346 if (BlendingAndTiming[k] == k) {
5347 PlaneWithMinActiveDRAMClockChangeMargin = k;
5348 } else {
5349 for (j = 0; j < NumberOfActivePlanes; ++j) {
5350 if (BlendingAndTiming[k] == j) {
5351 PlaneWithMinActiveDRAMClockChangeMargin = j;
5352 }
5353 }
5354 }
5355 }
5356 }
5357
5358 *MinActiveDRAMClockChangeLatencySupported = mode_lib->vba.MinActiveDRAMClockChangeMargin + DRAMClockChangeLatency;
5359
5360 SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = 999999;
5361 for (k = 0; k < NumberOfActivePlanes; ++k) {
5362 if (!((k == PlaneWithMinActiveDRAMClockChangeMargin) && (BlendingAndTiming[k] == k)) && !(BlendingAndTiming[k] == PlaneWithMinActiveDRAMClockChangeMargin) && mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k] < SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank) {
5363 SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k];
5364 }
5365 }
5366
5367 mode_lib->vba.TotalNumberOfActiveOTG = 0;
5368 for (k = 0; k < NumberOfActivePlanes; ++k) {
5369 if (BlendingAndTiming[k] == k) {
5370 mode_lib->vba.TotalNumberOfActiveOTG = mode_lib->vba.TotalNumberOfActiveOTG + 1;
5371 }
5372 }
5373
5374 if (mode_lib->vba.MinActiveDRAMClockChangeMargin > 0) {
5375 *DRAMClockChangeSupport = dm_dram_clock_change_vactive;
5376 } else if (((mode_lib->vba.SynchronizedVBlank == true || mode_lib->vba.TotalNumberOfActiveOTG == 1 || SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank > 0) && PrefetchMode == 0)) {
5377 *DRAMClockChangeSupport = dm_dram_clock_change_vblank;
5378 } else {
5379 *DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
5380 }
5381
5382 FullDETBufferingTimeYStutterCriticalPlane = FullDETBufferingTimeY[0];
5383 for (k = 0; k < NumberOfActivePlanes; ++k) {
5384 if (FullDETBufferingTimeY[k] <= FullDETBufferingTimeYStutterCriticalPlane) {
5385 FullDETBufferingTimeYStutterCriticalPlane = FullDETBufferingTimeY[k];
5386 TimeToFinishSwathTransferStutterCriticalPlane = (SwathHeightY[k] - (LinesInDETY[k] - LinesInDETYRoundedDownToSwath[k])) * (HTotal[k] / PixelClock[k]) / VRatio[k];
5387 }
5388 }
5389
5390 *StutterExitWatermark = SRExitTime + ExtraLatency + 10 / DCFCLKDeepSleep;
5391 *StutterEnterPlusExitWatermark = dml_max(SREnterPlusExitTime + ExtraLatency + 10 / DCFCLKDeepSleep, TimeToFinishSwathTransferStutterCriticalPlane);
5392
5393 }
5394
CalculateDCFCLKDeepSleep(struct display_mode_lib * mode_lib,unsigned int NumberOfActivePlanes,int BytePerPixelY[],int BytePerPixelC[],double VRatio[],double VRatioChroma[],double SwathWidthY[],double SwathWidthC[],unsigned int DPPPerPlane[],double HRatio[],double HRatioChroma[],double PixelClock[],double PSCL_THROUGHPUT[],double PSCL_THROUGHPUT_CHROMA[],double DPPCLK[],double ReadBandwidthLuma[],double ReadBandwidthChroma[],int ReturnBusWidth,double * DCFCLKDeepSleep)5395 static void CalculateDCFCLKDeepSleep(
5396 struct display_mode_lib *mode_lib,
5397 unsigned int NumberOfActivePlanes,
5398 int BytePerPixelY[],
5399 int BytePerPixelC[],
5400 double VRatio[],
5401 double VRatioChroma[],
5402 double SwathWidthY[],
5403 double SwathWidthC[],
5404 unsigned int DPPPerPlane[],
5405 double HRatio[],
5406 double HRatioChroma[],
5407 double PixelClock[],
5408 double PSCL_THROUGHPUT[],
5409 double PSCL_THROUGHPUT_CHROMA[],
5410 double DPPCLK[],
5411 double ReadBandwidthLuma[],
5412 double ReadBandwidthChroma[],
5413 int ReturnBusWidth,
5414 double *DCFCLKDeepSleep)
5415 {
5416 double DisplayPipeLineDeliveryTimeLuma = 0;
5417 double DisplayPipeLineDeliveryTimeChroma = 0;
5418 unsigned int k;
5419 double ReadBandwidth = 0.0;
5420
5421 //double DCFCLKDeepSleepPerPlane[DC__NUM_DPP__MAX];
5422 for (k = 0; k < NumberOfActivePlanes; ++k) {
5423
5424 if (VRatio[k] <= 1) {
5425 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k];
5426 } else {
5427 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] / PSCL_THROUGHPUT[k] / DPPCLK[k];
5428 }
5429 if (BytePerPixelC[k] == 0) {
5430 DisplayPipeLineDeliveryTimeChroma = 0;
5431 } else {
5432 if (VRatioChroma[k] <= 1) {
5433 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k];
5434 } else {
5435 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k];
5436 }
5437 }
5438
5439 if (BytePerPixelC[k] > 0) {
5440 mode_lib->vba.DCFCLKDeepSleepPerPlane[k] = dml_max(1.1 * SwathWidthY[k] * BytePerPixelY[k] / 32.0 / DisplayPipeLineDeliveryTimeLuma, 1.1 * SwathWidthC[k] * BytePerPixelC[k] / 32.0 / DisplayPipeLineDeliveryTimeChroma);
5441 } else {
5442 mode_lib->vba.DCFCLKDeepSleepPerPlane[k] = 1.1 * SwathWidthY[k] * BytePerPixelY[k] / 64.0 / DisplayPipeLineDeliveryTimeLuma;
5443 }
5444 mode_lib->vba.DCFCLKDeepSleepPerPlane[k] = dml_max(mode_lib->vba.DCFCLKDeepSleepPerPlane[k], PixelClock[k] / 16);
5445
5446 }
5447
5448 for (k = 0; k < NumberOfActivePlanes; ++k) {
5449 ReadBandwidth = ReadBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k];
5450 }
5451
5452 *DCFCLKDeepSleep = dml_max(8.0, ReadBandwidth / ReturnBusWidth);
5453
5454 for (k = 0; k < NumberOfActivePlanes; ++k) {
5455 *DCFCLKDeepSleep = dml_max(*DCFCLKDeepSleep, mode_lib->vba.DCFCLKDeepSleepPerPlane[k]);
5456 }
5457 }
5458
CalculateUrgentBurstFactor(long swath_width_luma_ub,long swath_width_chroma_ub,unsigned int DETBufferSizeInKByte,unsigned int SwathHeightY,unsigned int SwathHeightC,double LineTime,double UrgentLatency,double CursorBufferSize,unsigned int CursorWidth,unsigned int CursorBPP,double VRatio,double VRatioC,double BytePerPixelInDETY,double BytePerPixelInDETC,double DETBufferSizeY,double DETBufferSizeC,double * UrgentBurstFactorCursor,double * UrgentBurstFactorLuma,double * UrgentBurstFactorChroma,bool * NotEnoughUrgentLatencyHiding)5459 static void CalculateUrgentBurstFactor(
5460 long swath_width_luma_ub,
5461 long swath_width_chroma_ub,
5462 unsigned int DETBufferSizeInKByte,
5463 unsigned int SwathHeightY,
5464 unsigned int SwathHeightC,
5465 double LineTime,
5466 double UrgentLatency,
5467 double CursorBufferSize,
5468 unsigned int CursorWidth,
5469 unsigned int CursorBPP,
5470 double VRatio,
5471 double VRatioC,
5472 double BytePerPixelInDETY,
5473 double BytePerPixelInDETC,
5474 double DETBufferSizeY,
5475 double DETBufferSizeC,
5476 double *UrgentBurstFactorCursor,
5477 double *UrgentBurstFactorLuma,
5478 double *UrgentBurstFactorChroma,
5479 bool *NotEnoughUrgentLatencyHiding)
5480 {
5481 double LinesInDETLuma = 0;
5482 double LinesInDETChroma = 0;
5483 unsigned int LinesInCursorBuffer = 0;
5484 double CursorBufferSizeInTime = 0;
5485 double DETBufferSizeInTimeLuma = 0;
5486 double DETBufferSizeInTimeChroma = 0;
5487
5488 *NotEnoughUrgentLatencyHiding = 0;
5489
5490 if (CursorWidth > 0) {
5491 LinesInCursorBuffer = 1 << (unsigned int) dml_floor(dml_log2(CursorBufferSize * 1024.0 / (CursorWidth * CursorBPP / 8.0)), 1.0);
5492 if (VRatio > 0) {
5493 CursorBufferSizeInTime = LinesInCursorBuffer * LineTime / VRatio;
5494 if (CursorBufferSizeInTime - UrgentLatency <= 0) {
5495 *NotEnoughUrgentLatencyHiding = 1;
5496 *UrgentBurstFactorCursor = 0;
5497 } else {
5498 *UrgentBurstFactorCursor = CursorBufferSizeInTime / (CursorBufferSizeInTime - UrgentLatency);
5499 }
5500 } else {
5501 *UrgentBurstFactorCursor = 1;
5502 }
5503 }
5504
5505 LinesInDETLuma = DETBufferSizeY / BytePerPixelInDETY / swath_width_luma_ub;
5506 if (VRatio > 0) {
5507 DETBufferSizeInTimeLuma = dml_floor(LinesInDETLuma, SwathHeightY) * LineTime / VRatio;
5508 if (DETBufferSizeInTimeLuma - UrgentLatency <= 0) {
5509 *NotEnoughUrgentLatencyHiding = 1;
5510 *UrgentBurstFactorLuma = 0;
5511 } else {
5512 *UrgentBurstFactorLuma = DETBufferSizeInTimeLuma / (DETBufferSizeInTimeLuma - UrgentLatency);
5513 }
5514 } else {
5515 *UrgentBurstFactorLuma = 1;
5516 }
5517
5518 if (BytePerPixelInDETC > 0) {
5519 LinesInDETChroma = DETBufferSizeC / BytePerPixelInDETC / swath_width_chroma_ub;
5520 if (VRatio > 0) {
5521 DETBufferSizeInTimeChroma = dml_floor(LinesInDETChroma, SwathHeightC) * LineTime / VRatio;
5522 if (DETBufferSizeInTimeChroma - UrgentLatency <= 0) {
5523 *NotEnoughUrgentLatencyHiding = 1;
5524 *UrgentBurstFactorChroma = 0;
5525 } else {
5526 *UrgentBurstFactorChroma = DETBufferSizeInTimeChroma / (DETBufferSizeInTimeChroma - UrgentLatency);
5527 }
5528 } else {
5529 *UrgentBurstFactorChroma = 1;
5530 }
5531 }
5532 }
5533
CalculatePixelDeliveryTimes(unsigned int NumberOfActivePlanes,double VRatio[],double VRatioChroma[],double VRatioPrefetchY[],double VRatioPrefetchC[],unsigned int swath_width_luma_ub[],unsigned int swath_width_chroma_ub[],unsigned int DPPPerPlane[],double HRatio[],double HRatioChroma[],double PixelClock[],double PSCL_THROUGHPUT[],double PSCL_THROUGHPUT_CHROMA[],double DPPCLK[],int BytePerPixelC[],enum scan_direction_class SourceScan[],unsigned int NumberOfCursors[],unsigned int CursorWidth[][2],unsigned int CursorBPP[][2],unsigned int BlockWidth256BytesY[],unsigned int BlockHeight256BytesY[],unsigned int BlockWidth256BytesC[],unsigned int BlockHeight256BytesC[],double DisplayPipeLineDeliveryTimeLuma[],double DisplayPipeLineDeliveryTimeChroma[],double DisplayPipeLineDeliveryTimeLumaPrefetch[],double DisplayPipeLineDeliveryTimeChromaPrefetch[],double DisplayPipeRequestDeliveryTimeLuma[],double DisplayPipeRequestDeliveryTimeChroma[],double DisplayPipeRequestDeliveryTimeLumaPrefetch[],double DisplayPipeRequestDeliveryTimeChromaPrefetch[],double CursorRequestDeliveryTime[],double CursorRequestDeliveryTimePrefetch[])5534 static void CalculatePixelDeliveryTimes(
5535 unsigned int NumberOfActivePlanes,
5536 double VRatio[],
5537 double VRatioChroma[],
5538 double VRatioPrefetchY[],
5539 double VRatioPrefetchC[],
5540 unsigned int swath_width_luma_ub[],
5541 unsigned int swath_width_chroma_ub[],
5542 unsigned int DPPPerPlane[],
5543 double HRatio[],
5544 double HRatioChroma[],
5545 double PixelClock[],
5546 double PSCL_THROUGHPUT[],
5547 double PSCL_THROUGHPUT_CHROMA[],
5548 double DPPCLK[],
5549 int BytePerPixelC[],
5550 enum scan_direction_class SourceScan[],
5551 unsigned int NumberOfCursors[],
5552 unsigned int CursorWidth[][2],
5553 unsigned int CursorBPP[][2],
5554 unsigned int BlockWidth256BytesY[],
5555 unsigned int BlockHeight256BytesY[],
5556 unsigned int BlockWidth256BytesC[],
5557 unsigned int BlockHeight256BytesC[],
5558 double DisplayPipeLineDeliveryTimeLuma[],
5559 double DisplayPipeLineDeliveryTimeChroma[],
5560 double DisplayPipeLineDeliveryTimeLumaPrefetch[],
5561 double DisplayPipeLineDeliveryTimeChromaPrefetch[],
5562 double DisplayPipeRequestDeliveryTimeLuma[],
5563 double DisplayPipeRequestDeliveryTimeChroma[],
5564 double DisplayPipeRequestDeliveryTimeLumaPrefetch[],
5565 double DisplayPipeRequestDeliveryTimeChromaPrefetch[],
5566 double CursorRequestDeliveryTime[],
5567 double CursorRequestDeliveryTimePrefetch[])
5568 {
5569 double req_per_swath_ub = 0;
5570 unsigned int k;
5571
5572 for (k = 0; k < NumberOfActivePlanes; ++k) {
5573 if (VRatio[k] <= 1) {
5574 DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k];
5575 } else {
5576 DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / DPPCLK[k];
5577 }
5578
5579 if (BytePerPixelC[k] == 0) {
5580 DisplayPipeLineDeliveryTimeChroma[k] = 0;
5581 } else {
5582 if (VRatioChroma[k] <= 1) {
5583 DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k];
5584 } else {
5585 DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k];
5586 }
5587 }
5588
5589 if (VRatioPrefetchY[k] <= 1) {
5590 DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k];
5591 } else {
5592 DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / DPPCLK[k];
5593 }
5594
5595 if (BytePerPixelC[k] == 0) {
5596 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 0;
5597 } else {
5598 if (VRatioPrefetchC[k] <= 1) {
5599 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k];
5600 } else {
5601 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k];
5602 }
5603 }
5604 }
5605
5606 for (k = 0; k < NumberOfActivePlanes; ++k) {
5607 if (SourceScan[k] != dm_vert) {
5608 req_per_swath_ub = swath_width_luma_ub[k] / BlockWidth256BytesY[k];
5609 } else {
5610 req_per_swath_ub = swath_width_luma_ub[k] / BlockHeight256BytesY[k];
5611 }
5612 DisplayPipeRequestDeliveryTimeLuma[k] = DisplayPipeLineDeliveryTimeLuma[k] / req_per_swath_ub;
5613 DisplayPipeRequestDeliveryTimeLumaPrefetch[k] = DisplayPipeLineDeliveryTimeLumaPrefetch[k] / req_per_swath_ub;
5614 if (BytePerPixelC[k] == 0) {
5615 DisplayPipeRequestDeliveryTimeChroma[k] = 0;
5616 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = 0;
5617 } else {
5618 if (SourceScan[k] != dm_vert) {
5619 req_per_swath_ub = swath_width_chroma_ub[k] / BlockWidth256BytesC[k];
5620 } else {
5621 req_per_swath_ub = swath_width_chroma_ub[k] / BlockHeight256BytesC[k];
5622 }
5623 DisplayPipeRequestDeliveryTimeChroma[k] = DisplayPipeLineDeliveryTimeChroma[k] / req_per_swath_ub;
5624 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = DisplayPipeLineDeliveryTimeChromaPrefetch[k] / req_per_swath_ub;
5625 }
5626 }
5627
5628 for (k = 0; k < NumberOfActivePlanes; ++k) {
5629 int cursor_req_per_width = 0;
5630 cursor_req_per_width = dml_ceil(CursorWidth[k][0] * CursorBPP[k][0] / 256 / 8, 1);
5631 if (NumberOfCursors[k] > 0) {
5632 if (VRatio[k] <= 1) {
5633 CursorRequestDeliveryTime[k] = CursorWidth[k][0] / HRatio[k] / PixelClock[k] / cursor_req_per_width;
5634 } else {
5635 CursorRequestDeliveryTime[k] = CursorWidth[k][0] / PSCL_THROUGHPUT[k] / DPPCLK[k] / cursor_req_per_width;
5636 }
5637 if (VRatioPrefetchY[k] <= 1) {
5638 CursorRequestDeliveryTimePrefetch[k] = CursorWidth[k][0] / HRatio[k] / PixelClock[k] / cursor_req_per_width;
5639 } else {
5640 CursorRequestDeliveryTimePrefetch[k] = CursorWidth[k][0] / PSCL_THROUGHPUT[k] / DPPCLK[k] / cursor_req_per_width;
5641 }
5642 } else {
5643 CursorRequestDeliveryTime[k] = 0;
5644 CursorRequestDeliveryTimePrefetch[k] = 0;
5645 }
5646 }
5647 }
5648
CalculateMetaAndPTETimes(int NumberOfActivePlanes,bool GPUVMEnable,int MetaChunkSize,int MinMetaChunkSizeBytes,int HTotal[],double VRatio[],double VRatioChroma[],double DestinationLinesToRequestRowInVBlank[],double DestinationLinesToRequestRowInImmediateFlip[],bool DCCEnable[],double PixelClock[],int BytePerPixelY[],int BytePerPixelC[],enum scan_direction_class SourceScan[],int dpte_row_height[],int dpte_row_height_chroma[],int meta_row_width[],int meta_row_width_chroma[],int meta_row_height[],int meta_row_height_chroma[],int meta_req_width[],int meta_req_width_chroma[],int meta_req_height[],int meta_req_height_chroma[],int dpte_group_bytes[],int PTERequestSizeY[],int PTERequestSizeC[],int PixelPTEReqWidthY[],int PixelPTEReqHeightY[],int PixelPTEReqWidthC[],int PixelPTEReqHeightC[],int dpte_row_width_luma_ub[],int dpte_row_width_chroma_ub[],double DST_Y_PER_PTE_ROW_NOM_L[],double DST_Y_PER_PTE_ROW_NOM_C[],double DST_Y_PER_META_ROW_NOM_L[],double DST_Y_PER_META_ROW_NOM_C[],double TimePerMetaChunkNominal[],double TimePerChromaMetaChunkNominal[],double TimePerMetaChunkVBlank[],double TimePerChromaMetaChunkVBlank[],double TimePerMetaChunkFlip[],double TimePerChromaMetaChunkFlip[],double time_per_pte_group_nom_luma[],double time_per_pte_group_vblank_luma[],double time_per_pte_group_flip_luma[],double time_per_pte_group_nom_chroma[],double time_per_pte_group_vblank_chroma[],double time_per_pte_group_flip_chroma[])5649 static void CalculateMetaAndPTETimes(
5650 int NumberOfActivePlanes,
5651 bool GPUVMEnable,
5652 int MetaChunkSize,
5653 int MinMetaChunkSizeBytes,
5654 int HTotal[],
5655 double VRatio[],
5656 double VRatioChroma[],
5657 double DestinationLinesToRequestRowInVBlank[],
5658 double DestinationLinesToRequestRowInImmediateFlip[],
5659 bool DCCEnable[],
5660 double PixelClock[],
5661 int BytePerPixelY[],
5662 int BytePerPixelC[],
5663 enum scan_direction_class SourceScan[],
5664 int dpte_row_height[],
5665 int dpte_row_height_chroma[],
5666 int meta_row_width[],
5667 int meta_row_width_chroma[],
5668 int meta_row_height[],
5669 int meta_row_height_chroma[],
5670 int meta_req_width[],
5671 int meta_req_width_chroma[],
5672 int meta_req_height[],
5673 int meta_req_height_chroma[],
5674 int dpte_group_bytes[],
5675 int PTERequestSizeY[],
5676 int PTERequestSizeC[],
5677 int PixelPTEReqWidthY[],
5678 int PixelPTEReqHeightY[],
5679 int PixelPTEReqWidthC[],
5680 int PixelPTEReqHeightC[],
5681 int dpte_row_width_luma_ub[],
5682 int dpte_row_width_chroma_ub[],
5683 double DST_Y_PER_PTE_ROW_NOM_L[],
5684 double DST_Y_PER_PTE_ROW_NOM_C[],
5685 double DST_Y_PER_META_ROW_NOM_L[],
5686 double DST_Y_PER_META_ROW_NOM_C[],
5687 double TimePerMetaChunkNominal[],
5688 double TimePerChromaMetaChunkNominal[],
5689 double TimePerMetaChunkVBlank[],
5690 double TimePerChromaMetaChunkVBlank[],
5691 double TimePerMetaChunkFlip[],
5692 double TimePerChromaMetaChunkFlip[],
5693 double time_per_pte_group_nom_luma[],
5694 double time_per_pte_group_vblank_luma[],
5695 double time_per_pte_group_flip_luma[],
5696 double time_per_pte_group_nom_chroma[],
5697 double time_per_pte_group_vblank_chroma[],
5698 double time_per_pte_group_flip_chroma[])
5699 {
5700 unsigned int meta_chunk_width = 0;
5701 unsigned int min_meta_chunk_width = 0;
5702 unsigned int meta_chunk_per_row_int = 0;
5703 unsigned int meta_row_remainder = 0;
5704 unsigned int meta_chunk_threshold = 0;
5705 unsigned int meta_chunks_per_row_ub = 0;
5706 unsigned int meta_chunk_width_chroma = 0;
5707 unsigned int min_meta_chunk_width_chroma = 0;
5708 unsigned int meta_chunk_per_row_int_chroma = 0;
5709 unsigned int meta_row_remainder_chroma = 0;
5710 unsigned int meta_chunk_threshold_chroma = 0;
5711 unsigned int meta_chunks_per_row_ub_chroma = 0;
5712 unsigned int dpte_group_width_luma = 0;
5713 unsigned int dpte_groups_per_row_luma_ub = 0;
5714 unsigned int dpte_group_width_chroma = 0;
5715 unsigned int dpte_groups_per_row_chroma_ub = 0;
5716 unsigned int k;
5717
5718 for (k = 0; k < NumberOfActivePlanes; ++k) {
5719 DST_Y_PER_PTE_ROW_NOM_L[k] = dpte_row_height[k] / VRatio[k];
5720 if (BytePerPixelC[k] == 0) {
5721 DST_Y_PER_PTE_ROW_NOM_C[k] = 0;
5722 } else {
5723 DST_Y_PER_PTE_ROW_NOM_C[k] = dpte_row_height_chroma[k] / VRatioChroma[k];
5724 }
5725 DST_Y_PER_META_ROW_NOM_L[k] = meta_row_height[k] / VRatio[k];
5726 if (BytePerPixelC[k] == 0) {
5727 DST_Y_PER_META_ROW_NOM_C[k] = 0;
5728 } else {
5729 DST_Y_PER_META_ROW_NOM_C[k] = meta_row_height_chroma[k] / VRatioChroma[k];
5730 }
5731 }
5732
5733 for (k = 0; k < NumberOfActivePlanes; ++k) {
5734 if (DCCEnable[k] == true) {
5735 meta_chunk_width = MetaChunkSize * 1024 * 256 / BytePerPixelY[k] / meta_row_height[k];
5736 min_meta_chunk_width = MinMetaChunkSizeBytes * 256 / BytePerPixelY[k] / meta_row_height[k];
5737 meta_chunk_per_row_int = meta_row_width[k] / meta_chunk_width;
5738 meta_row_remainder = meta_row_width[k] % meta_chunk_width;
5739 if (SourceScan[k] != dm_vert) {
5740 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_width[k];
5741 } else {
5742 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_height[k];
5743 }
5744 if (meta_row_remainder <= meta_chunk_threshold) {
5745 meta_chunks_per_row_ub = meta_chunk_per_row_int + 1;
5746 } else {
5747 meta_chunks_per_row_ub = meta_chunk_per_row_int + 2;
5748 }
5749 TimePerMetaChunkNominal[k] = meta_row_height[k] / VRatio[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
5750 TimePerMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
5751 TimePerMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
5752 if (BytePerPixelC[k] == 0) {
5753 TimePerChromaMetaChunkNominal[k] = 0;
5754 TimePerChromaMetaChunkVBlank[k] = 0;
5755 TimePerChromaMetaChunkFlip[k] = 0;
5756 } else {
5757 meta_chunk_width_chroma = MetaChunkSize * 1024 * 256 / BytePerPixelC[k] / meta_row_height_chroma[k];
5758 min_meta_chunk_width_chroma = MinMetaChunkSizeBytes * 256 / BytePerPixelC[k] / meta_row_height_chroma[k];
5759 meta_chunk_per_row_int_chroma = (double) meta_row_width_chroma[k] / meta_chunk_width_chroma;
5760 meta_row_remainder_chroma = meta_row_width_chroma[k] % meta_chunk_width_chroma;
5761 if (SourceScan[k] != dm_vert) {
5762 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - meta_req_width_chroma[k];
5763 } else {
5764 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - meta_req_height_chroma[k];
5765 }
5766 if (meta_row_remainder_chroma <= meta_chunk_threshold_chroma) {
5767 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 1;
5768 } else {
5769 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 2;
5770 }
5771 TimePerChromaMetaChunkNominal[k] = meta_row_height_chroma[k] / VRatioChroma[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
5772 TimePerChromaMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
5773 TimePerChromaMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
5774 }
5775 } else {
5776 TimePerMetaChunkNominal[k] = 0;
5777 TimePerMetaChunkVBlank[k] = 0;
5778 TimePerMetaChunkFlip[k] = 0;
5779 TimePerChromaMetaChunkNominal[k] = 0;
5780 TimePerChromaMetaChunkVBlank[k] = 0;
5781 TimePerChromaMetaChunkFlip[k] = 0;
5782 }
5783 }
5784
5785 for (k = 0; k < NumberOfActivePlanes; ++k) {
5786 if (GPUVMEnable == true) {
5787 if (SourceScan[k] != dm_vert) {
5788 dpte_group_width_luma = dpte_group_bytes[k] / PTERequestSizeY[k] * PixelPTEReqWidthY[k];
5789 } else {
5790 dpte_group_width_luma = dpte_group_bytes[k] / PTERequestSizeY[k] * PixelPTEReqHeightY[k];
5791 }
5792 dpte_groups_per_row_luma_ub = dml_ceil(1.0 * dpte_row_width_luma_ub[k] / dpte_group_width_luma, 1);
5793 time_per_pte_group_nom_luma[k] = DST_Y_PER_PTE_ROW_NOM_L[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
5794 time_per_pte_group_vblank_luma[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
5795 time_per_pte_group_flip_luma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
5796 if (BytePerPixelC[k] == 0) {
5797 time_per_pte_group_nom_chroma[k] = 0;
5798 time_per_pte_group_vblank_chroma[k] = 0;
5799 time_per_pte_group_flip_chroma[k] = 0;
5800 } else {
5801 if (SourceScan[k] != dm_vert) {
5802 dpte_group_width_chroma = dpte_group_bytes[k] / PTERequestSizeC[k] * PixelPTEReqWidthC[k];
5803 } else {
5804 dpte_group_width_chroma = dpte_group_bytes[k] / PTERequestSizeC[k] * PixelPTEReqHeightC[k];
5805 }
5806 dpte_groups_per_row_chroma_ub = dml_ceil(1.0 * dpte_row_width_chroma_ub[k] / dpte_group_width_chroma, 1);
5807 time_per_pte_group_nom_chroma[k] = DST_Y_PER_PTE_ROW_NOM_C[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
5808 time_per_pte_group_vblank_chroma[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
5809 time_per_pte_group_flip_chroma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
5810 }
5811 } else {
5812 time_per_pte_group_nom_luma[k] = 0;
5813 time_per_pte_group_vblank_luma[k] = 0;
5814 time_per_pte_group_flip_luma[k] = 0;
5815 time_per_pte_group_nom_chroma[k] = 0;
5816 time_per_pte_group_vblank_chroma[k] = 0;
5817 time_per_pte_group_flip_chroma[k] = 0;
5818 }
5819 }
5820 }
5821
CalculateVMGroupAndRequestTimes(unsigned int NumberOfActivePlanes,bool GPUVMEnable,unsigned int GPUVMMaxPageTableLevels,unsigned int HTotal[],int BytePerPixelC[],double DestinationLinesToRequestVMInVBlank[],double DestinationLinesToRequestVMInImmediateFlip[],bool DCCEnable[],double PixelClock[],int dpte_row_width_luma_ub[],int dpte_row_width_chroma_ub[],int vm_group_bytes[],unsigned int dpde0_bytes_per_frame_ub_l[],unsigned int dpde0_bytes_per_frame_ub_c[],int meta_pte_bytes_per_frame_ub_l[],int meta_pte_bytes_per_frame_ub_c[],double TimePerVMGroupVBlank[],double TimePerVMGroupFlip[],double TimePerVMRequestVBlank[],double TimePerVMRequestFlip[])5822 static void CalculateVMGroupAndRequestTimes(
5823 unsigned int NumberOfActivePlanes,
5824 bool GPUVMEnable,
5825 unsigned int GPUVMMaxPageTableLevels,
5826 unsigned int HTotal[],
5827 int BytePerPixelC[],
5828 double DestinationLinesToRequestVMInVBlank[],
5829 double DestinationLinesToRequestVMInImmediateFlip[],
5830 bool DCCEnable[],
5831 double PixelClock[],
5832 int dpte_row_width_luma_ub[],
5833 int dpte_row_width_chroma_ub[],
5834 int vm_group_bytes[],
5835 unsigned int dpde0_bytes_per_frame_ub_l[],
5836 unsigned int dpde0_bytes_per_frame_ub_c[],
5837 int meta_pte_bytes_per_frame_ub_l[],
5838 int meta_pte_bytes_per_frame_ub_c[],
5839 double TimePerVMGroupVBlank[],
5840 double TimePerVMGroupFlip[],
5841 double TimePerVMRequestVBlank[],
5842 double TimePerVMRequestFlip[])
5843 {
5844 int num_group_per_lower_vm_stage = 0;
5845 int num_req_per_lower_vm_stage = 0;
5846 unsigned int k;
5847
5848 for (k = 0; k < NumberOfActivePlanes; ++k) {
5849 if (GPUVMEnable == true && (DCCEnable[k] == true || GPUVMMaxPageTableLevels > 1)) {
5850 if (DCCEnable[k] == false) {
5851 if (BytePerPixelC[k] > 0) {
5852 num_group_per_lower_vm_stage = dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k])
5853 / (double) (vm_group_bytes[k]), 1) + dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k])
5854 / (double) (vm_group_bytes[k]), 1);
5855 } else {
5856 num_group_per_lower_vm_stage = dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k])
5857 / (double) (vm_group_bytes[k]), 1);
5858 }
5859 } else {
5860 if (GPUVMMaxPageTableLevels == 1) {
5861 if (BytePerPixelC[k] > 0) {
5862 num_group_per_lower_vm_stage = dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k])
5863 / (double) (vm_group_bytes[k]), 1) + dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k])
5864 / (double) (vm_group_bytes[k]), 1);
5865 } else {
5866 num_group_per_lower_vm_stage = dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k])
5867 / (double) (vm_group_bytes[k]), 1);
5868 }
5869 } else {
5870 if (BytePerPixelC[k] > 0) {
5871 num_group_per_lower_vm_stage = 2 + dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
5872 + dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1)
5873 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
5874 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1);
5875 } else {
5876 num_group_per_lower_vm_stage = 1 + dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
5877 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1);
5878 }
5879 }
5880 }
5881
5882 if (DCCEnable[k] == false) {
5883 if (BytePerPixelC[k] > 0) {
5884 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + dpde0_bytes_per_frame_ub_c[k] / 64;
5885 } else {
5886 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64;
5887 }
5888 } else {
5889 if (GPUVMMaxPageTableLevels == 1) {
5890 if (BytePerPixelC[k] > 0) {
5891 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64
5892 + meta_pte_bytes_per_frame_ub_c[k] / 64;
5893 } else {
5894 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64;
5895 }
5896 } else {
5897 if (BytePerPixelC[k] > 0) {
5898 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64
5899 + dpde0_bytes_per_frame_ub_c[k] / 64 + meta_pte_bytes_per_frame_ub_l[k]
5900 / 64 + meta_pte_bytes_per_frame_ub_c[k] / 64;
5901 } else {
5902 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64
5903 + meta_pte_bytes_per_frame_ub_l[k] / 64;
5904 }
5905 }
5906 }
5907
5908 TimePerVMGroupVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * HTotal[k] / PixelClock[k]
5909 / num_group_per_lower_vm_stage;
5910 TimePerVMGroupFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * HTotal[k] / PixelClock[k]
5911 / num_group_per_lower_vm_stage;
5912 TimePerVMRequestVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * HTotal[k] / PixelClock[k]
5913 / num_req_per_lower_vm_stage;
5914 TimePerVMRequestFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * HTotal[k] / PixelClock[k]
5915 / num_req_per_lower_vm_stage;
5916
5917 if (GPUVMMaxPageTableLevels > 2) {
5918 TimePerVMGroupVBlank[k] = TimePerVMGroupVBlank[k] / 2;
5919 TimePerVMGroupFlip[k] = TimePerVMGroupFlip[k] / 2;
5920 TimePerVMRequestVBlank[k] = TimePerVMRequestVBlank[k] / 2;
5921 TimePerVMRequestFlip[k] = TimePerVMRequestFlip[k] / 2;
5922 }
5923
5924 } else {
5925 TimePerVMGroupVBlank[k] = 0;
5926 TimePerVMGroupFlip[k] = 0;
5927 TimePerVMRequestVBlank[k] = 0;
5928 TimePerVMRequestFlip[k] = 0;
5929 }
5930 }
5931 }
5932
CalculateStutterEfficiency(int NumberOfActivePlanes,long ROBBufferSizeInKByte,double TotalDataReadBandwidth,double DCFCLK,double ReturnBW,double SRExitTime,bool SynchronizedVBlank,int DPPPerPlane[],unsigned int DETBufferSizeY[],int BytePerPixelY[],double BytePerPixelDETY[],double SwathWidthY[],int SwathHeightY[],int SwathHeightC[],double DCCRateLuma[],double DCCRateChroma[],int HTotal[],int VTotal[],double PixelClock[],double VRatio[],enum scan_direction_class SourceScan[],int BlockHeight256BytesY[],int BlockWidth256BytesY[],int BlockHeight256BytesC[],int BlockWidth256BytesC[],int DCCYMaxUncompressedBlock[],int DCCCMaxUncompressedBlock[],int VActive[],bool DCCEnable[],bool WritebackEnable[],double ReadBandwidthPlaneLuma[],double ReadBandwidthPlaneChroma[],double meta_row_bw[],double dpte_row_bw[],double * StutterEfficiencyNotIncludingVBlank,double * StutterEfficiency,double * StutterPeriodOut)5933 static void CalculateStutterEfficiency(
5934 int NumberOfActivePlanes,
5935 long ROBBufferSizeInKByte,
5936 double TotalDataReadBandwidth,
5937 double DCFCLK,
5938 double ReturnBW,
5939 double SRExitTime,
5940 bool SynchronizedVBlank,
5941 int DPPPerPlane[],
5942 unsigned int DETBufferSizeY[],
5943 int BytePerPixelY[],
5944 double BytePerPixelDETY[],
5945 double SwathWidthY[],
5946 int SwathHeightY[],
5947 int SwathHeightC[],
5948 double DCCRateLuma[],
5949 double DCCRateChroma[],
5950 int HTotal[],
5951 int VTotal[],
5952 double PixelClock[],
5953 double VRatio[],
5954 enum scan_direction_class SourceScan[],
5955 int BlockHeight256BytesY[],
5956 int BlockWidth256BytesY[],
5957 int BlockHeight256BytesC[],
5958 int BlockWidth256BytesC[],
5959 int DCCYMaxUncompressedBlock[],
5960 int DCCCMaxUncompressedBlock[],
5961 int VActive[],
5962 bool DCCEnable[],
5963 bool WritebackEnable[],
5964 double ReadBandwidthPlaneLuma[],
5965 double ReadBandwidthPlaneChroma[],
5966 double meta_row_bw[],
5967 double dpte_row_bw[],
5968 double *StutterEfficiencyNotIncludingVBlank,
5969 double *StutterEfficiency,
5970 double *StutterPeriodOut)
5971 {
5972 double FullDETBufferingTimeY[DC__NUM_DPP__MAX] = { 0 };
5973 double FrameTimeForMinFullDETBufferingTime = 0;
5974 double StutterPeriod = 0;
5975 double AverageReadBandwidth = 0;
5976 double TotalRowReadBandwidth = 0;
5977 double AverageDCCCompressionRate = 0;
5978 double PartOfBurstThatFitsInROB = 0;
5979 double StutterBurstTime = 0;
5980 int TotalActiveWriteback = 0;
5981 double VBlankTime = 0;
5982 double SmallestVBlank = 0;
5983 int BytePerPixelYCriticalPlane = 0;
5984 double SwathWidthYCriticalPlane = 0;
5985 double LinesInDETY[DC__NUM_DPP__MAX] = { 0 };
5986 double LinesInDETYRoundedDownToSwath[DC__NUM_DPP__MAX] = { 0 };
5987 double LinesToFinishSwathTransferStutterCriticalPlane = 0;
5988 double MaximumEffectiveCompressionLuma = 0;
5989 double MaximumEffectiveCompressionChroma = 0;
5990 unsigned int k;
5991
5992 for (k = 0; k < NumberOfActivePlanes; ++k) {
5993 LinesInDETY[k] = DETBufferSizeY[k] / BytePerPixelDETY[k] / SwathWidthY[k];
5994 LinesInDETYRoundedDownToSwath[k] = dml_floor(LinesInDETY[k], SwathHeightY[k]);
5995 FullDETBufferingTimeY[k] = LinesInDETYRoundedDownToSwath[k] * (HTotal[k] / PixelClock[k]) / VRatio[k];
5996 }
5997
5998 StutterPeriod = FullDETBufferingTimeY[0];
5999 FrameTimeForMinFullDETBufferingTime = VTotal[0] * HTotal[0] / PixelClock[0];
6000 BytePerPixelYCriticalPlane = BytePerPixelY[0];
6001 SwathWidthYCriticalPlane = SwathWidthY[0];
6002 LinesToFinishSwathTransferStutterCriticalPlane = SwathHeightY[0]
6003 - (LinesInDETY[0] - LinesInDETYRoundedDownToSwath[0]);
6004
6005 for (k = 0; k < NumberOfActivePlanes; ++k) {
6006 if (FullDETBufferingTimeY[k] < StutterPeriod) {
6007 StutterPeriod = FullDETBufferingTimeY[k];
6008 FrameTimeForMinFullDETBufferingTime = VTotal[k] * HTotal[k] / PixelClock[k];
6009 BytePerPixelYCriticalPlane = BytePerPixelY[k];
6010 SwathWidthYCriticalPlane = SwathWidthY[k];
6011 LinesToFinishSwathTransferStutterCriticalPlane = SwathHeightY[k]
6012 - (LinesInDETY[k] - LinesInDETYRoundedDownToSwath[k]);
6013 }
6014 }
6015
6016 AverageReadBandwidth = 0;
6017 TotalRowReadBandwidth = 0;
6018 for (k = 0; k < NumberOfActivePlanes; ++k) {
6019 if (DCCEnable[k] == true) {
6020 if ((SourceScan[k] == dm_vert && BlockWidth256BytesY[k] > SwathHeightY[k])
6021 || (SourceScan[k] != dm_vert
6022 && BlockHeight256BytesY[k] > SwathHeightY[k])
6023 || DCCYMaxUncompressedBlock[k] < 256) {
6024 MaximumEffectiveCompressionLuma = 2;
6025 } else {
6026 MaximumEffectiveCompressionLuma = 4;
6027 }
6028 AverageReadBandwidth = AverageReadBandwidth + ReadBandwidthPlaneLuma[k] / dml_min(DCCRateLuma[k], MaximumEffectiveCompressionLuma);
6029
6030 if (ReadBandwidthPlaneChroma[k] > 0) {
6031 if ((SourceScan[k] == dm_vert && BlockWidth256BytesC[k] > SwathHeightC[k])
6032 || (SourceScan[k] != dm_vert && BlockHeight256BytesC[k] > SwathHeightC[k])
6033 || DCCCMaxUncompressedBlock[k] < 256) {
6034 MaximumEffectiveCompressionChroma = 2;
6035 } else {
6036 MaximumEffectiveCompressionChroma = 4;
6037 }
6038 AverageReadBandwidth = AverageReadBandwidth + ReadBandwidthPlaneChroma[k] / dml_min(DCCRateChroma[k], MaximumEffectiveCompressionChroma);
6039 }
6040 } else {
6041 AverageReadBandwidth = AverageReadBandwidth + ReadBandwidthPlaneLuma[k] + ReadBandwidthPlaneChroma[k];
6042 }
6043 TotalRowReadBandwidth = TotalRowReadBandwidth + DPPPerPlane[k] * (meta_row_bw[k] + dpte_row_bw[k]);
6044 }
6045
6046 AverageDCCCompressionRate = TotalDataReadBandwidth / AverageReadBandwidth;
6047 PartOfBurstThatFitsInROB = dml_min(StutterPeriod * TotalDataReadBandwidth, ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate);
6048 StutterBurstTime = PartOfBurstThatFitsInROB / AverageDCCCompressionRate / ReturnBW + (StutterPeriod * TotalDataReadBandwidth
6049 - PartOfBurstThatFitsInROB) / (DCFCLK * 64) + StutterPeriod * TotalRowReadBandwidth / ReturnBW;
6050 StutterBurstTime = dml_max(StutterBurstTime, LinesToFinishSwathTransferStutterCriticalPlane * BytePerPixelYCriticalPlane * SwathWidthYCriticalPlane / ReturnBW);
6051
6052 TotalActiveWriteback = 0;
6053 for (k = 0; k < NumberOfActivePlanes; ++k) {
6054 if (WritebackEnable[k] == true) {
6055 TotalActiveWriteback = TotalActiveWriteback + 1;
6056 }
6057 }
6058
6059 if (TotalActiveWriteback == 0) {
6060 *StutterEfficiencyNotIncludingVBlank = (1
6061 - (SRExitTime + StutterBurstTime) / StutterPeriod) * 100;
6062 } else {
6063 *StutterEfficiencyNotIncludingVBlank = 0;
6064 }
6065
6066 if (SynchronizedVBlank == true || NumberOfActivePlanes == 1) {
6067 SmallestVBlank = (VTotal[0] - VActive[0]) * HTotal[0] / PixelClock[0];
6068 } else {
6069 SmallestVBlank = 0;
6070 }
6071 for (k = 0; k < NumberOfActivePlanes; ++k) {
6072 if (SynchronizedVBlank == true || NumberOfActivePlanes == 1) {
6073 VBlankTime = (VTotal[k] - VActive[k]) * HTotal[k] / PixelClock[k];
6074 } else {
6075 VBlankTime = 0;
6076 }
6077 SmallestVBlank = dml_min(SmallestVBlank, VBlankTime);
6078 }
6079
6080 *StutterEfficiency = (*StutterEfficiencyNotIncludingVBlank / 100.0 * (FrameTimeForMinFullDETBufferingTime - SmallestVBlank) + SmallestVBlank) / FrameTimeForMinFullDETBufferingTime * 100;
6081
6082 if (StutterPeriodOut)
6083 *StutterPeriodOut = StutterPeriod;
6084 }
6085
CalculateSwathAndDETConfiguration(bool ForceSingleDPP,int NumberOfActivePlanes,unsigned int DETBufferSizeInKByte,double MaximumSwathWidthLuma[],double MaximumSwathWidthChroma[],enum scan_direction_class SourceScan[],enum source_format_class SourcePixelFormat[],enum dm_swizzle_mode SurfaceTiling[],int ViewportWidth[],int ViewportHeight[],int SurfaceWidthY[],int SurfaceWidthC[],int SurfaceHeightY[],int SurfaceHeightC[],int Read256BytesBlockHeightY[],int Read256BytesBlockHeightC[],int Read256BytesBlockWidthY[],int Read256BytesBlockWidthC[],enum odm_combine_mode ODMCombineEnabled[],int BlendingAndTiming[],int BytePerPixY[],int BytePerPixC[],double BytePerPixDETY[],double BytePerPixDETC[],int HActive[],double HRatio[],double HRatioChroma[],int DPPPerPlane[],int swath_width_luma_ub[],int swath_width_chroma_ub[],double SwathWidth[],double SwathWidthChroma[],int SwathHeightY[],int SwathHeightC[],unsigned int DETBufferSizeY[],unsigned int DETBufferSizeC[],bool ViewportSizeSupportPerPlane[],bool * ViewportSizeSupport)6086 static void CalculateSwathAndDETConfiguration(
6087 bool ForceSingleDPP,
6088 int NumberOfActivePlanes,
6089 unsigned int DETBufferSizeInKByte,
6090 double MaximumSwathWidthLuma[],
6091 double MaximumSwathWidthChroma[],
6092 enum scan_direction_class SourceScan[],
6093 enum source_format_class SourcePixelFormat[],
6094 enum dm_swizzle_mode SurfaceTiling[],
6095 int ViewportWidth[],
6096 int ViewportHeight[],
6097 int SurfaceWidthY[],
6098 int SurfaceWidthC[],
6099 int SurfaceHeightY[],
6100 int SurfaceHeightC[],
6101 int Read256BytesBlockHeightY[],
6102 int Read256BytesBlockHeightC[],
6103 int Read256BytesBlockWidthY[],
6104 int Read256BytesBlockWidthC[],
6105 enum odm_combine_mode ODMCombineEnabled[],
6106 int BlendingAndTiming[],
6107 int BytePerPixY[],
6108 int BytePerPixC[],
6109 double BytePerPixDETY[],
6110 double BytePerPixDETC[],
6111 int HActive[],
6112 double HRatio[],
6113 double HRatioChroma[],
6114 int DPPPerPlane[],
6115 int swath_width_luma_ub[],
6116 int swath_width_chroma_ub[],
6117 double SwathWidth[],
6118 double SwathWidthChroma[],
6119 int SwathHeightY[],
6120 int SwathHeightC[],
6121 unsigned int DETBufferSizeY[],
6122 unsigned int DETBufferSizeC[],
6123 bool ViewportSizeSupportPerPlane[],
6124 bool *ViewportSizeSupport)
6125 {
6126 int MaximumSwathHeightY[DC__NUM_DPP__MAX] = { 0 };
6127 int MaximumSwathHeightC[DC__NUM_DPP__MAX] = { 0 };
6128 int MinimumSwathHeightY = 0;
6129 int MinimumSwathHeightC = 0;
6130 long RoundedUpMaxSwathSizeBytesY = 0;
6131 long RoundedUpMaxSwathSizeBytesC = 0;
6132 long RoundedUpMinSwathSizeBytesY = 0;
6133 long RoundedUpMinSwathSizeBytesC = 0;
6134 long RoundedUpSwathSizeBytesY = 0;
6135 long RoundedUpSwathSizeBytesC = 0;
6136 double SwathWidthSingleDPP[DC__NUM_DPP__MAX] = { 0 };
6137 double SwathWidthSingleDPPChroma[DC__NUM_DPP__MAX] = { 0 };
6138 int k;
6139
6140 CalculateSwathWidth(
6141 ForceSingleDPP,
6142 NumberOfActivePlanes,
6143 SourcePixelFormat,
6144 SourceScan,
6145 ViewportWidth,
6146 ViewportHeight,
6147 SurfaceWidthY,
6148 SurfaceWidthC,
6149 SurfaceHeightY,
6150 SurfaceHeightC,
6151 ODMCombineEnabled,
6152 BytePerPixY,
6153 BytePerPixC,
6154 Read256BytesBlockHeightY,
6155 Read256BytesBlockHeightC,
6156 Read256BytesBlockWidthY,
6157 Read256BytesBlockWidthC,
6158 BlendingAndTiming,
6159 HActive,
6160 HRatio,
6161 DPPPerPlane,
6162 SwathWidthSingleDPP,
6163 SwathWidthSingleDPPChroma,
6164 SwathWidth,
6165 SwathWidthChroma,
6166 MaximumSwathHeightY,
6167 MaximumSwathHeightC,
6168 swath_width_luma_ub,
6169 swath_width_chroma_ub);
6170
6171 *ViewportSizeSupport = true;
6172 for (k = 0; k < NumberOfActivePlanes; ++k) {
6173 if ((SourcePixelFormat[k] == dm_444_64 || SourcePixelFormat[k] == dm_444_32
6174 || SourcePixelFormat[k] == dm_444_16
6175 || SourcePixelFormat[k] == dm_mono_16
6176 || SourcePixelFormat[k] == dm_mono_8
6177 || SourcePixelFormat[k] == dm_rgbe)) {
6178 if (SurfaceTiling[k] == dm_sw_linear
6179 || (SourcePixelFormat[k] == dm_444_64
6180 && (SurfaceTiling[k] == dm_sw_64kb_s || SurfaceTiling[k] == dm_sw_64kb_s_t || SurfaceTiling[k] == dm_sw_64kb_s_x)
6181 && SourceScan[k] != dm_vert)) {
6182 MinimumSwathHeightY = MaximumSwathHeightY[k];
6183 } else if (SourcePixelFormat[k] == dm_444_8 && SourceScan[k] == dm_vert) {
6184 MinimumSwathHeightY = MaximumSwathHeightY[k];
6185 } else {
6186 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6187 }
6188 MinimumSwathHeightC = MaximumSwathHeightC[k];
6189 } else {
6190 if (SurfaceTiling[k] == dm_sw_linear) {
6191 MinimumSwathHeightY = MaximumSwathHeightY[k];
6192 MinimumSwathHeightC = MaximumSwathHeightC[k];
6193 } else if (SourcePixelFormat[k] == dm_rgbe_alpha
6194 && SourceScan[k] == dm_vert) {
6195 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6196 MinimumSwathHeightC = MaximumSwathHeightC[k];
6197 } else if (SourcePixelFormat[k] == dm_rgbe_alpha) {
6198 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6199 MinimumSwathHeightC = MaximumSwathHeightC[k] / 2;
6200 } else if (SourcePixelFormat[k] == dm_420_8 && SourceScan[k] == dm_vert) {
6201 MinimumSwathHeightY = MaximumSwathHeightY[k];
6202 MinimumSwathHeightC = MaximumSwathHeightC[k] / 2;
6203 } else {
6204 MinimumSwathHeightC = MaximumSwathHeightC[k] / 2;
6205 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6206 }
6207 }
6208
6209 RoundedUpMaxSwathSizeBytesY = swath_width_luma_ub[k] * BytePerPixDETY[k]
6210 * MaximumSwathHeightY[k];
6211 RoundedUpMinSwathSizeBytesY = swath_width_luma_ub[k] * BytePerPixDETY[k]
6212 * MinimumSwathHeightY;
6213 if (SourcePixelFormat[k] == dm_420_10) {
6214 RoundedUpMaxSwathSizeBytesY = dml_ceil((double) RoundedUpMaxSwathSizeBytesY, 256);
6215 RoundedUpMinSwathSizeBytesY = dml_ceil((double) RoundedUpMinSwathSizeBytesY, 256);
6216 }
6217 RoundedUpMaxSwathSizeBytesC = swath_width_chroma_ub[k] * BytePerPixDETC[k]
6218 * MaximumSwathHeightC[k];
6219 RoundedUpMinSwathSizeBytesC = swath_width_chroma_ub[k] * BytePerPixDETC[k]
6220 * MinimumSwathHeightC;
6221 if (SourcePixelFormat[k] == dm_420_10) {
6222 RoundedUpMaxSwathSizeBytesC = dml_ceil(RoundedUpMaxSwathSizeBytesC, 256);
6223 RoundedUpMinSwathSizeBytesC = dml_ceil(RoundedUpMinSwathSizeBytesC, 256);
6224 }
6225
6226 if (RoundedUpMaxSwathSizeBytesY + RoundedUpMaxSwathSizeBytesC
6227 <= DETBufferSizeInKByte * 1024 / 2) {
6228 SwathHeightY[k] = MaximumSwathHeightY[k];
6229 SwathHeightC[k] = MaximumSwathHeightC[k];
6230 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY;
6231 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC;
6232 } else if (RoundedUpMaxSwathSizeBytesY >= 1.5 * RoundedUpMaxSwathSizeBytesC
6233 && RoundedUpMinSwathSizeBytesY + RoundedUpMaxSwathSizeBytesC
6234 <= DETBufferSizeInKByte * 1024 / 2) {
6235 SwathHeightY[k] = MinimumSwathHeightY;
6236 SwathHeightC[k] = MaximumSwathHeightC[k];
6237 RoundedUpSwathSizeBytesY = RoundedUpMinSwathSizeBytesY;
6238 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC;
6239 } else if (RoundedUpMaxSwathSizeBytesY < 1.5 * RoundedUpMaxSwathSizeBytesC
6240 && RoundedUpMaxSwathSizeBytesY + RoundedUpMinSwathSizeBytesC
6241 <= DETBufferSizeInKByte * 1024 / 2) {
6242 SwathHeightY[k] = MaximumSwathHeightY[k];
6243 SwathHeightC[k] = MinimumSwathHeightC;
6244 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY;
6245 RoundedUpSwathSizeBytesC = RoundedUpMinSwathSizeBytesC;
6246 } else {
6247 SwathHeightY[k] = MinimumSwathHeightY;
6248 SwathHeightC[k] = MinimumSwathHeightC;
6249 RoundedUpSwathSizeBytesY = RoundedUpMinSwathSizeBytesY;
6250 RoundedUpSwathSizeBytesC = RoundedUpMinSwathSizeBytesC;
6251 }
6252
6253 if (SwathHeightC[k] == 0) {
6254 DETBufferSizeY[k] = DETBufferSizeInKByte * 1024;
6255 DETBufferSizeC[k] = 0;
6256 } else if (RoundedUpSwathSizeBytesY <= 1.5 * RoundedUpSwathSizeBytesC) {
6257 DETBufferSizeY[k] = DETBufferSizeInKByte * 1024 / 2;
6258 DETBufferSizeC[k] = DETBufferSizeInKByte * 1024 / 2;
6259 } else {
6260 DETBufferSizeY[k] = DETBufferSizeInKByte * 1024 * 2 / 3;
6261 DETBufferSizeC[k] = DETBufferSizeInKByte * 1024 / 3;
6262 }
6263
6264 if (RoundedUpMinSwathSizeBytesY + RoundedUpMinSwathSizeBytesC
6265 > DETBufferSizeInKByte * 1024 / 2
6266 || SwathWidth[k] > MaximumSwathWidthLuma[k]
6267 || (SwathHeightC[k] > 0
6268 && SwathWidthChroma[k] > MaximumSwathWidthChroma[k])) {
6269 *ViewportSizeSupport = false;
6270 ViewportSizeSupportPerPlane[k] = false;
6271 } else {
6272 ViewportSizeSupportPerPlane[k] = true;
6273 }
6274 }
6275 }
6276
CalculateSwathWidth(bool ForceSingleDPP,int NumberOfActivePlanes,enum source_format_class SourcePixelFormat[],enum scan_direction_class SourceScan[],unsigned int ViewportWidth[],unsigned int ViewportHeight[],unsigned int SurfaceWidthY[],unsigned int SurfaceWidthC[],unsigned int SurfaceHeightY[],unsigned int SurfaceHeightC[],enum odm_combine_mode ODMCombineEnabled[],int BytePerPixY[],int BytePerPixC[],int Read256BytesBlockHeightY[],int Read256BytesBlockHeightC[],int Read256BytesBlockWidthY[],int Read256BytesBlockWidthC[],int BlendingAndTiming[],unsigned int HActive[],double HRatio[],int DPPPerPlane[],double SwathWidthSingleDPPY[],double SwathWidthSingleDPPC[],double SwathWidthY[],double SwathWidthC[],int MaximumSwathHeightY[],int MaximumSwathHeightC[],unsigned int swath_width_luma_ub[],unsigned int swath_width_chroma_ub[])6277 static void CalculateSwathWidth(
6278 bool ForceSingleDPP,
6279 int NumberOfActivePlanes,
6280 enum source_format_class SourcePixelFormat[],
6281 enum scan_direction_class SourceScan[],
6282 unsigned int ViewportWidth[],
6283 unsigned int ViewportHeight[],
6284 unsigned int SurfaceWidthY[],
6285 unsigned int SurfaceWidthC[],
6286 unsigned int SurfaceHeightY[],
6287 unsigned int SurfaceHeightC[],
6288 enum odm_combine_mode ODMCombineEnabled[],
6289 int BytePerPixY[],
6290 int BytePerPixC[],
6291 int Read256BytesBlockHeightY[],
6292 int Read256BytesBlockHeightC[],
6293 int Read256BytesBlockWidthY[],
6294 int Read256BytesBlockWidthC[],
6295 int BlendingAndTiming[],
6296 unsigned int HActive[],
6297 double HRatio[],
6298 int DPPPerPlane[],
6299 double SwathWidthSingleDPPY[],
6300 double SwathWidthSingleDPPC[],
6301 double SwathWidthY[],
6302 double SwathWidthC[],
6303 int MaximumSwathHeightY[],
6304 int MaximumSwathHeightC[],
6305 unsigned int swath_width_luma_ub[],
6306 unsigned int swath_width_chroma_ub[])
6307 {
6308 unsigned int k, j;
6309 long surface_width_ub_l;
6310 long surface_height_ub_l;
6311 long surface_width_ub_c;
6312 long surface_height_ub_c;
6313
6314 for (k = 0; k < NumberOfActivePlanes; ++k) {
6315 enum odm_combine_mode MainPlaneODMCombine = 0;
6316
6317 if (SourceScan[k] != dm_vert) {
6318 SwathWidthSingleDPPY[k] = ViewportWidth[k];
6319 } else {
6320 SwathWidthSingleDPPY[k] = ViewportHeight[k];
6321 }
6322
6323 MainPlaneODMCombine = ODMCombineEnabled[k];
6324 for (j = 0; j < NumberOfActivePlanes; ++j) {
6325 if (BlendingAndTiming[k] == j) {
6326 MainPlaneODMCombine = ODMCombineEnabled[j];
6327 }
6328 }
6329
6330 if (MainPlaneODMCombine == dm_odm_combine_mode_4to1) {
6331 SwathWidthY[k] = dml_min(SwathWidthSingleDPPY[k], dml_round(HActive[k] / 4.0 * HRatio[k]));
6332 } else if (MainPlaneODMCombine == dm_odm_combine_mode_2to1) {
6333 SwathWidthY[k] = dml_min(SwathWidthSingleDPPY[k], dml_round(HActive[k] / 2.0 * HRatio[k]));
6334 } else if (DPPPerPlane[k] == 2) {
6335 SwathWidthY[k] = SwathWidthSingleDPPY[k] / 2;
6336 } else {
6337 SwathWidthY[k] = SwathWidthSingleDPPY[k];
6338 }
6339
6340 if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 || SourcePixelFormat[k] == dm_420_12) {
6341 SwathWidthC[k] = SwathWidthY[k] / 2;
6342 SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k] / 2;
6343 } else {
6344 SwathWidthC[k] = SwathWidthY[k];
6345 SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k];
6346 }
6347
6348 if (ForceSingleDPP == true) {
6349 SwathWidthY[k] = SwathWidthSingleDPPY[k];
6350 SwathWidthC[k] = SwathWidthSingleDPPC[k];
6351 }
6352
6353 surface_width_ub_l = dml_ceil(SurfaceWidthY[k], Read256BytesBlockWidthY[k]);
6354 surface_height_ub_l = dml_ceil(SurfaceHeightY[k], Read256BytesBlockHeightY[k]);
6355
6356 if (SourceScan[k] != dm_vert) {
6357 MaximumSwathHeightY[k] = Read256BytesBlockHeightY[k];
6358 MaximumSwathHeightC[k] = Read256BytesBlockHeightC[k];
6359 swath_width_luma_ub[k] = dml_min(surface_width_ub_l, (long) dml_ceil(SwathWidthY[k] - 1,
6360 Read256BytesBlockWidthY[k]) + Read256BytesBlockWidthY[k]);
6361 if (BytePerPixC[k] > 0) {
6362 surface_width_ub_c = dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]);
6363 swath_width_chroma_ub[k] = dml_min(surface_width_ub_c, (long) dml_ceil(SwathWidthC[k] - 1,
6364 Read256BytesBlockWidthC[k]) + Read256BytesBlockWidthC[k]);
6365 } else {
6366 swath_width_chroma_ub[k] = 0;
6367 }
6368 } else {
6369 MaximumSwathHeightY[k] = Read256BytesBlockWidthY[k];
6370 MaximumSwathHeightC[k] = Read256BytesBlockWidthC[k];
6371 swath_width_luma_ub[k] = dml_min(surface_height_ub_l, (long) dml_ceil(SwathWidthY[k] - 1,
6372 Read256BytesBlockHeightY[k]) + Read256BytesBlockHeightY[k]);
6373 if (BytePerPixC[k] > 0) {
6374 surface_height_ub_c = dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]);
6375 swath_width_chroma_ub[k] = dml_min(surface_height_ub_c, (long) dml_ceil(SwathWidthC[k] - 1,
6376 Read256BytesBlockHeightC[k]) + Read256BytesBlockHeightC[k]);
6377 } else {
6378 swath_width_chroma_ub[k] = 0;
6379 }
6380 }
6381 }
6382 }
6383
CalculateExtraLatency(long RoundTripPingLatencyCycles,long ReorderingBytes,double DCFCLK,int TotalNumberOfActiveDPP,int PixelChunkSizeInKByte,int TotalNumberOfDCCActiveDPP,int MetaChunkSize,double ReturnBW,bool GPUVMEnable,bool HostVMEnable,int NumberOfActivePlanes,int NumberOfDPP[],int dpte_group_bytes[],double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,double HostVMMinPageSize,int HostVMMaxNonCachedPageTableLevels)6384 static double CalculateExtraLatency(
6385 long RoundTripPingLatencyCycles,
6386 long ReorderingBytes,
6387 double DCFCLK,
6388 int TotalNumberOfActiveDPP,
6389 int PixelChunkSizeInKByte,
6390 int TotalNumberOfDCCActiveDPP,
6391 int MetaChunkSize,
6392 double ReturnBW,
6393 bool GPUVMEnable,
6394 bool HostVMEnable,
6395 int NumberOfActivePlanes,
6396 int NumberOfDPP[],
6397 int dpte_group_bytes[],
6398 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
6399 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
6400 double HostVMMinPageSize,
6401 int HostVMMaxNonCachedPageTableLevels)
6402 {
6403 double ExtraLatencyBytes = 0;
6404 ExtraLatencyBytes = CalculateExtraLatencyBytes(
6405 ReorderingBytes,
6406 TotalNumberOfActiveDPP,
6407 PixelChunkSizeInKByte,
6408 TotalNumberOfDCCActiveDPP,
6409 MetaChunkSize,
6410 GPUVMEnable,
6411 HostVMEnable,
6412 NumberOfActivePlanes,
6413 NumberOfDPP,
6414 dpte_group_bytes,
6415 PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
6416 PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
6417 HostVMMinPageSize,
6418 HostVMMaxNonCachedPageTableLevels);
6419
6420 return (RoundTripPingLatencyCycles + 32) / DCFCLK + ExtraLatencyBytes / ReturnBW;
6421 }
6422
CalculateExtraLatencyBytes(long ReorderingBytes,int TotalNumberOfActiveDPP,int PixelChunkSizeInKByte,int TotalNumberOfDCCActiveDPP,int MetaChunkSize,bool GPUVMEnable,bool HostVMEnable,int NumberOfActivePlanes,int NumberOfDPP[],int dpte_group_bytes[],double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,double HostVMMinPageSize,int HostVMMaxNonCachedPageTableLevels)6423 static double CalculateExtraLatencyBytes(
6424 long ReorderingBytes,
6425 int TotalNumberOfActiveDPP,
6426 int PixelChunkSizeInKByte,
6427 int TotalNumberOfDCCActiveDPP,
6428 int MetaChunkSize,
6429 bool GPUVMEnable,
6430 bool HostVMEnable,
6431 int NumberOfActivePlanes,
6432 int NumberOfDPP[],
6433 int dpte_group_bytes[],
6434 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
6435 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
6436 double HostVMMinPageSize,
6437 int HostVMMaxNonCachedPageTableLevels)
6438 {
6439 double ret = 0;
6440 double HostVMInefficiencyFactor = 0;
6441 int HostVMDynamicLevels = 0;
6442 unsigned int k;
6443
6444 if (GPUVMEnable == true && HostVMEnable == true) {
6445 HostVMInefficiencyFactor = PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData / PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly;
6446 if (HostVMMinPageSize < 2048) {
6447 HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
6448 } else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576) {
6449 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1);
6450 } else {
6451 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2);
6452 }
6453 } else {
6454 HostVMInefficiencyFactor = 1;
6455 HostVMDynamicLevels = 0;
6456 }
6457
6458 ret = ReorderingBytes + (TotalNumberOfActiveDPP * PixelChunkSizeInKByte + TotalNumberOfDCCActiveDPP * MetaChunkSize) * 1024.0;
6459
6460 if (GPUVMEnable == true) {
6461 for (k = 0; k < NumberOfActivePlanes; ++k) {
6462 ret = ret + NumberOfDPP[k] * dpte_group_bytes[k] * (1 + 8 * HostVMDynamicLevels) * HostVMInefficiencyFactor;
6463 }
6464 }
6465 return ret;
6466 }
6467
6468
CalculateUrgentLatency(double UrgentLatencyPixelDataOnly,double UrgentLatencyPixelMixedWithVMData,double UrgentLatencyVMDataOnly,bool DoUrgentLatencyAdjustment,double UrgentLatencyAdjustmentFabricClockComponent,double UrgentLatencyAdjustmentFabricClockReference,double FabricClock)6469 static double CalculateUrgentLatency(
6470 double UrgentLatencyPixelDataOnly,
6471 double UrgentLatencyPixelMixedWithVMData,
6472 double UrgentLatencyVMDataOnly,
6473 bool DoUrgentLatencyAdjustment,
6474 double UrgentLatencyAdjustmentFabricClockComponent,
6475 double UrgentLatencyAdjustmentFabricClockReference,
6476 double FabricClock)
6477 {
6478 double ret;
6479
6480 ret = dml_max3(UrgentLatencyPixelDataOnly, UrgentLatencyPixelMixedWithVMData, UrgentLatencyVMDataOnly);
6481 if (DoUrgentLatencyAdjustment == true) {
6482 ret = ret + UrgentLatencyAdjustmentFabricClockComponent * (UrgentLatencyAdjustmentFabricClockReference / FabricClock - 1);
6483 }
6484 return ret;
6485 }
6486
UseMinimumDCFCLK(struct display_mode_lib * mode_lib,struct vba_vars_st * v,int MaxPrefetchMode,int ReorderingBytes)6487 static noinline_for_stack void UseMinimumDCFCLK(
6488 struct display_mode_lib *mode_lib,
6489 struct vba_vars_st *v,
6490 int MaxPrefetchMode,
6491 int ReorderingBytes)
6492 {
6493 double NormalEfficiency = 0;
6494 double PTEEfficiency = 0;
6495 double TotalMaxPrefetchFlipDPTERowBandwidth[DC__VOLTAGE_STATES][2] = { { 0 } };
6496 unsigned int i, j, k;
6497
6498 NormalEfficiency = (v->HostVMEnable == true ? v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData
6499 : v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelDataOnly) / 100.0;
6500 PTEEfficiency = (v->HostVMEnable == true ? v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly
6501 / v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData : 1.0);
6502 for (i = 0; i < mode_lib->soc.num_states; ++i) {
6503 for (j = 0; j <= 1; ++j) {
6504 double PixelDCFCLKCyclesRequiredInPrefetch[DC__NUM_DPP__MAX] = { 0 };
6505 double PrefetchPixelLinesTime[DC__NUM_DPP__MAX] = { 0 };
6506 double DCFCLKRequiredForPeakBandwidthPerPlane[DC__NUM_DPP__MAX] = { 0 };
6507 double DynamicMetadataVMExtraLatency[DC__NUM_DPP__MAX] = { 0 };
6508 double MinimumTWait = 0;
6509 double NonDPTEBandwidth = 0;
6510 double DPTEBandwidth = 0;
6511 double DCFCLKRequiredForAverageBandwidth = 0;
6512 double ExtraLatencyBytes = 0;
6513 double ExtraLatencyCycles = 0;
6514 double DCFCLKRequiredForPeakBandwidth = 0;
6515 int NoOfDPPState[DC__NUM_DPP__MAX] = { 0 };
6516 double MinimumTvmPlus2Tr0 = 0;
6517
6518 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = 0;
6519 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
6520 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = TotalMaxPrefetchFlipDPTERowBandwidth[i][j]
6521 + v->NoOfDPP[i][j][k] * v->DPTEBytesPerRow[i][j][k] / (15.75 * v->HTotal[k] / v->PixelClock[k]);
6522 }
6523
6524 for (k = 0; k <= v->NumberOfActivePlanes - 1; ++k) {
6525 NoOfDPPState[k] = v->NoOfDPP[i][j][k];
6526 }
6527
6528 MinimumTWait = CalculateTWait(MaxPrefetchMode, v->FinalDRAMClockChangeLatency, v->UrgLatency[i], v->SREnterPlusExitTime);
6529 NonDPTEBandwidth = v->TotalVActivePixelBandwidth[i][j] + v->TotalVActiveCursorBandwidth[i][j] + v->TotalMetaRowBandwidth[i][j];
6530 DPTEBandwidth = (v->HostVMEnable == true || v->ImmediateFlipRequirement[0] == dm_immediate_flip_required) ?
6531 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] : v->TotalDPTERowBandwidth[i][j];
6532 DCFCLKRequiredForAverageBandwidth = dml_max3(v->ProjectedDCFCLKDeepSleep[i][j],
6533 (NonDPTEBandwidth + v->TotalDPTERowBandwidth[i][j]) / v->ReturnBusWidth / (v->MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation / 100),
6534 (NonDPTEBandwidth + DPTEBandwidth / PTEEfficiency) / NormalEfficiency / v->ReturnBusWidth);
6535
6536 ExtraLatencyBytes = CalculateExtraLatencyBytes(ReorderingBytes, v->TotalNumberOfActiveDPP[i][j], v->PixelChunkSizeInKByte, v->TotalNumberOfDCCActiveDPP[i][j],
6537 v->MetaChunkSize, v->GPUVMEnable, v->HostVMEnable, v->NumberOfActivePlanes, NoOfDPPState, v->dpte_group_bytes,
6538 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData, v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
6539 v->HostVMMinPageSize, v->HostVMMaxNonCachedPageTableLevels);
6540 ExtraLatencyCycles = v->RoundTripPingLatencyCycles + 32 + ExtraLatencyBytes / NormalEfficiency / v->ReturnBusWidth;
6541 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
6542 double DCFCLKCyclesRequiredInPrefetch = { 0 };
6543 double ExpectedPrefetchBWAcceleration = { 0 };
6544 double PrefetchTime = { 0 };
6545
6546 PixelDCFCLKCyclesRequiredInPrefetch[k] = (v->PrefetchLinesY[i][j][k] * v->swath_width_luma_ub_all_states[i][j][k] * v->BytePerPixelY[k]
6547 + v->PrefetchLinesC[i][j][k] * v->swath_width_chroma_ub_all_states[i][j][k] * v->BytePerPixelC[k]) / NormalEfficiency / v->ReturnBusWidth;
6548 DCFCLKCyclesRequiredInPrefetch = 2 * ExtraLatencyCycles / NoOfDPPState[k] + v->PDEAndMetaPTEBytesPerFrame[i][j][k] / PTEEfficiency
6549 / NormalEfficiency / v->ReturnBusWidth * (v->GPUVMMaxPageTableLevels > 2 ? 1 : 0) + 2 * v->DPTEBytesPerRow[i][j][k] / PTEEfficiency
6550 / NormalEfficiency / v->ReturnBusWidth + 2 * v->MetaRowBytes[i][j][k] / NormalEfficiency / v->ReturnBusWidth + PixelDCFCLKCyclesRequiredInPrefetch[k];
6551 PrefetchPixelLinesTime[k] = dml_max(v->PrefetchLinesY[i][j][k], v->PrefetchLinesC[i][j][k]) * v->HTotal[k] / v->PixelClock[k];
6552 ExpectedPrefetchBWAcceleration = (v->VActivePixelBandwidth[i][j][k] + v->VActiveCursorBandwidth[i][j][k]) / (v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k]);
6553 DynamicMetadataVMExtraLatency[k] = (v->GPUVMEnable == true && v->DynamicMetadataEnable[k] == true && v->DynamicMetadataVMEnabled == true) ?
6554 v->UrgLatency[i] * v->GPUVMMaxPageTableLevels * (v->HostVMEnable == true ? v->HostVMMaxNonCachedPageTableLevels + 1 : 1) : 0;
6555 PrefetchTime = (v->MaximumVStartup[i][j][k] - 1) * v->HTotal[k] / v->PixelClock[k] - MinimumTWait - v->UrgLatency[i] * ((v->GPUVMMaxPageTableLevels <= 2 ? v->GPUVMMaxPageTableLevels
6556 : v->GPUVMMaxPageTableLevels - 2) * (v->HostVMEnable == true ? v->HostVMMaxNonCachedPageTableLevels + 1 : 1) - 1) - DynamicMetadataVMExtraLatency[k];
6557
6558 if (PrefetchTime > 0) {
6559 double ExpectedVRatioPrefetch = { 0 };
6560 ExpectedVRatioPrefetch = PrefetchPixelLinesTime[k] / (PrefetchTime * PixelDCFCLKCyclesRequiredInPrefetch[k] / DCFCLKCyclesRequiredInPrefetch);
6561 DCFCLKRequiredForPeakBandwidthPerPlane[k] = NoOfDPPState[k] * PixelDCFCLKCyclesRequiredInPrefetch[k] / PrefetchPixelLinesTime[k]
6562 * dml_max(1.0, ExpectedVRatioPrefetch) * dml_max(1.0, ExpectedVRatioPrefetch / 4) * ExpectedPrefetchBWAcceleration;
6563 if (v->HostVMEnable == true || v->ImmediateFlipRequirement[0] == dm_immediate_flip_required) {
6564 DCFCLKRequiredForPeakBandwidthPerPlane[k] = DCFCLKRequiredForPeakBandwidthPerPlane[k]
6565 + NoOfDPPState[k] * DPTEBandwidth / PTEEfficiency / NormalEfficiency / v->ReturnBusWidth;
6566 }
6567 } else {
6568 DCFCLKRequiredForPeakBandwidthPerPlane[k] = v->DCFCLKPerState[i];
6569 }
6570 if (v->DynamicMetadataEnable[k] == true) {
6571 double TsetupPipe = { 0 };
6572 double TdmbfPipe = { 0 };
6573 double TdmsksPipe = { 0 };
6574 double TdmecPipe = { 0 };
6575 double AllowedTimeForUrgentExtraLatency = { 0 };
6576
6577 CalculateDynamicMetadataParameters(
6578 v->MaxInterDCNTileRepeaters,
6579 v->RequiredDPPCLK[i][j][k],
6580 v->RequiredDISPCLK[i][j],
6581 v->ProjectedDCFCLKDeepSleep[i][j],
6582 v->PixelClock[k],
6583 v->HTotal[k],
6584 v->VTotal[k] - v->VActive[k],
6585 v->DynamicMetadataTransmittedBytes[k],
6586 v->DynamicMetadataLinesBeforeActiveRequired[k],
6587 v->Interlace[k],
6588 v->ProgressiveToInterlaceUnitInOPP,
6589 &TsetupPipe,
6590 &TdmbfPipe,
6591 &TdmecPipe,
6592 &TdmsksPipe);
6593 AllowedTimeForUrgentExtraLatency = v->MaximumVStartup[i][j][k] * v->HTotal[k] / v->PixelClock[k] - MinimumTWait - TsetupPipe
6594 - TdmbfPipe - TdmecPipe - TdmsksPipe - DynamicMetadataVMExtraLatency[k];
6595 if (AllowedTimeForUrgentExtraLatency > 0) {
6596 DCFCLKRequiredForPeakBandwidthPerPlane[k] = dml_max(DCFCLKRequiredForPeakBandwidthPerPlane[k],
6597 ExtraLatencyCycles / AllowedTimeForUrgentExtraLatency);
6598 } else {
6599 DCFCLKRequiredForPeakBandwidthPerPlane[k] = v->DCFCLKPerState[i];
6600 }
6601 }
6602 }
6603 DCFCLKRequiredForPeakBandwidth = 0;
6604 for (k = 0; k <= v->NumberOfActivePlanes - 1; ++k) {
6605 DCFCLKRequiredForPeakBandwidth = DCFCLKRequiredForPeakBandwidth + DCFCLKRequiredForPeakBandwidthPerPlane[k];
6606 }
6607 MinimumTvmPlus2Tr0 = v->UrgLatency[i] * (v->GPUVMEnable == true ? (v->HostVMEnable == true ?
6608 (v->GPUVMMaxPageTableLevels + 2) * (v->HostVMMaxNonCachedPageTableLevels + 1) - 1 : v->GPUVMMaxPageTableLevels + 1) : 0);
6609 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
6610 double MaximumTvmPlus2Tr0PlusTsw = { 0 };
6611 MaximumTvmPlus2Tr0PlusTsw = (v->MaximumVStartup[i][j][k] - 2) * v->HTotal[k] / v->PixelClock[k] - MinimumTWait - DynamicMetadataVMExtraLatency[k];
6612 if (MaximumTvmPlus2Tr0PlusTsw <= MinimumTvmPlus2Tr0 + PrefetchPixelLinesTime[k] / 4) {
6613 DCFCLKRequiredForPeakBandwidth = v->DCFCLKPerState[i];
6614 } else {
6615 DCFCLKRequiredForPeakBandwidth = dml_max3(DCFCLKRequiredForPeakBandwidth, 2 * ExtraLatencyCycles
6616 / (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0 - PrefetchPixelLinesTime[k] / 4),
6617 (2 * ExtraLatencyCycles + PixelDCFCLKCyclesRequiredInPrefetch[k]) / (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0));
6618 }
6619 }
6620 v->DCFCLKState[i][j] = dml_min(v->DCFCLKPerState[i], 1.05 * (1 + mode_lib->vba.PercentMarginOverMinimumRequiredDCFCLK / 100)
6621 * dml_max(DCFCLKRequiredForAverageBandwidth, DCFCLKRequiredForPeakBandwidth));
6622 }
6623 }
6624 }
6625
6626