1 /*
2 * Copyright 2017 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Authors: AMD
23 *
24 */
25
26
27 #include "../display_mode_lib.h"
28 #include "../dml_inline_defs.h"
29 #include "../display_mode_vba.h"
30 #include "display_mode_vba_21.h"
31
32
33 /*
34 * NOTE:
35 * This file is gcc-parsable HW gospel, coming straight from HW engineers.
36 *
37 * It doesn't adhere to Linux kernel style and sometimes will do things in odd
38 * ways. Unless there is something clearly wrong with it the code should
39 * remain as-is as it provides us with a guarantee from HW that it is correct.
40 */
41 typedef struct {
42 double DPPCLK;
43 double DISPCLK;
44 double PixelClock;
45 double DCFCLKDeepSleep;
46 unsigned int DPPPerPlane;
47 bool ScalerEnabled;
48 enum scan_direction_class SourceScan;
49 unsigned int BlockWidth256BytesY;
50 unsigned int BlockHeight256BytesY;
51 unsigned int BlockWidth256BytesC;
52 unsigned int BlockHeight256BytesC;
53 unsigned int InterlaceEnable;
54 unsigned int NumberOfCursors;
55 unsigned int VBlank;
56 unsigned int HTotal;
57 } Pipe;
58
59 typedef struct {
60 bool Enable;
61 unsigned int MaxPageTableLevels;
62 unsigned int CachedPageTableLevels;
63 } HostVM;
64
65 #define BPP_INVALID 0
66 #define BPP_BLENDED_PIPE 0xffffffff
67 #define DCN21_MAX_DSC_IMAGE_WIDTH 5184
68 #define DCN21_MAX_420_IMAGE_WIDTH 4096
69
70 static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib);
71 static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(
72 struct display_mode_lib *mode_lib);
73 static unsigned int dscceComputeDelay(
74 unsigned int bpc,
75 double bpp,
76 unsigned int sliceWidth,
77 unsigned int numSlices,
78 enum output_format_class pixelFormat);
79 static unsigned int dscComputeDelay(enum output_format_class pixelFormat);
80 // Super monster function with some 45 argument
81 static bool CalculatePrefetchSchedule(
82 struct display_mode_lib *mode_lib,
83 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
84 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
85 Pipe *myPipe,
86 unsigned int DSCDelay,
87 double DPPCLKDelaySubtotal,
88 double DPPCLKDelaySCL,
89 double DPPCLKDelaySCLLBOnly,
90 double DPPCLKDelayCNVCFormater,
91 double DPPCLKDelayCNVCCursor,
92 double DISPCLKDelaySubtotal,
93 unsigned int ScalerRecoutWidth,
94 enum output_format_class OutputFormat,
95 unsigned int MaxInterDCNTileRepeaters,
96 unsigned int VStartup,
97 unsigned int MaxVStartup,
98 unsigned int GPUVMPageTableLevels,
99 bool GPUVMEnable,
100 HostVM *myHostVM,
101 bool DynamicMetadataEnable,
102 int DynamicMetadataLinesBeforeActiveRequired,
103 unsigned int DynamicMetadataTransmittedBytes,
104 bool DCCEnable,
105 double UrgentLatency,
106 double UrgentExtraLatency,
107 double TCalc,
108 unsigned int PDEAndMetaPTEBytesFrame,
109 unsigned int MetaRowByte,
110 unsigned int PixelPTEBytesPerRow,
111 double PrefetchSourceLinesY,
112 unsigned int SwathWidthY,
113 double BytePerPixelDETY,
114 double VInitPreFillY,
115 unsigned int MaxNumSwathY,
116 double PrefetchSourceLinesC,
117 double BytePerPixelDETC,
118 double VInitPreFillC,
119 unsigned int MaxNumSwathC,
120 unsigned int SwathHeightY,
121 unsigned int SwathHeightC,
122 double TWait,
123 bool XFCEnabled,
124 double XFCRemoteSurfaceFlipDelay,
125 bool ProgressiveToInterlaceUnitInOPP,
126 double *DSTXAfterScaler,
127 double *DSTYAfterScaler,
128 double *DestinationLinesForPrefetch,
129 double *PrefetchBandwidth,
130 double *DestinationLinesToRequestVMInVBlank,
131 double *DestinationLinesToRequestRowInVBlank,
132 double *VRatioPrefetchY,
133 double *VRatioPrefetchC,
134 double *RequiredPrefetchPixDataBWLuma,
135 double *RequiredPrefetchPixDataBWChroma,
136 unsigned int *VStartupRequiredWhenNotEnoughTimeForDynamicMetadata,
137 double *Tno_bw,
138 double *prefetch_vmrow_bw,
139 unsigned int *swath_width_luma_ub,
140 unsigned int *swath_width_chroma_ub,
141 unsigned int *VUpdateOffsetPix,
142 double *VUpdateWidthPix,
143 double *VReadyOffsetPix);
144 static double RoundToDFSGranularityUp(double Clock, double VCOSpeed);
145 static double RoundToDFSGranularityDown(double Clock, double VCOSpeed);
146 static double CalculateDCCConfiguration(
147 bool DCCEnabled,
148 bool DCCProgrammingAssumesScanDirectionUnknown,
149 unsigned int ViewportWidth,
150 unsigned int ViewportHeight,
151 unsigned int DETBufferSize,
152 unsigned int RequestHeight256Byte,
153 unsigned int SwathHeight,
154 enum dm_swizzle_mode TilingFormat,
155 unsigned int BytePerPixel,
156 enum scan_direction_class ScanOrientation,
157 unsigned int *MaxUncompressedBlock,
158 unsigned int *MaxCompressedBlock,
159 unsigned int *Independent64ByteBlock);
160 static double CalculatePrefetchSourceLines(
161 struct display_mode_lib *mode_lib,
162 double VRatio,
163 double vtaps,
164 bool Interlace,
165 bool ProgressiveToInterlaceUnitInOPP,
166 unsigned int SwathHeight,
167 unsigned int ViewportYStart,
168 double *VInitPreFill,
169 unsigned int *MaxNumSwath);
170 static unsigned int CalculateVMAndRowBytes(
171 struct display_mode_lib *mode_lib,
172 bool DCCEnable,
173 unsigned int BlockHeight256Bytes,
174 unsigned int BlockWidth256Bytes,
175 enum source_format_class SourcePixelFormat,
176 unsigned int SurfaceTiling,
177 unsigned int BytePerPixel,
178 enum scan_direction_class ScanDirection,
179 unsigned int ViewportWidth,
180 unsigned int ViewportHeight,
181 unsigned int SwathWidthY,
182 bool GPUVMEnable,
183 bool HostVMEnable,
184 unsigned int HostVMMaxPageTableLevels,
185 unsigned int HostVMCachedPageTableLevels,
186 unsigned int VMMPageSize,
187 unsigned int PTEBufferSizeInRequests,
188 unsigned int Pitch,
189 unsigned int DCCMetaPitch,
190 unsigned int *MacroTileWidth,
191 unsigned int *MetaRowByte,
192 unsigned int *PixelPTEBytesPerRow,
193 bool *PTEBufferSizeNotExceeded,
194 unsigned int *dpte_row_width_ub,
195 unsigned int *dpte_row_height,
196 unsigned int *MetaRequestWidth,
197 unsigned int *MetaRequestHeight,
198 unsigned int *meta_row_width,
199 unsigned int *meta_row_height,
200 unsigned int *vm_group_bytes,
201 unsigned int *dpte_group_bytes,
202 unsigned int *PixelPTEReqWidth,
203 unsigned int *PixelPTEReqHeight,
204 unsigned int *PTERequestSize,
205 unsigned int *DPDE0BytesFrame,
206 unsigned int *MetaPTEBytesFrame);
207
208 static double CalculateTWait(
209 unsigned int PrefetchMode,
210 double DRAMClockChangeLatency,
211 double UrgentLatency,
212 double SREnterPlusExitTime);
213 static double CalculateRemoteSurfaceFlipDelay(
214 struct display_mode_lib *mode_lib,
215 double VRatio,
216 double SwathWidth,
217 double Bpp,
218 double LineTime,
219 double XFCTSlvVupdateOffset,
220 double XFCTSlvVupdateWidth,
221 double XFCTSlvVreadyOffset,
222 double XFCXBUFLatencyTolerance,
223 double XFCFillBWOverhead,
224 double XFCSlvChunkSize,
225 double XFCBusTransportTime,
226 double TCalc,
227 double TWait,
228 double *SrcActiveDrainRate,
229 double *TInitXFill,
230 double *TslvChk);
231 static void CalculateActiveRowBandwidth(
232 bool GPUVMEnable,
233 enum source_format_class SourcePixelFormat,
234 double VRatio,
235 bool DCCEnable,
236 double LineTime,
237 unsigned int MetaRowByteLuma,
238 unsigned int MetaRowByteChroma,
239 unsigned int meta_row_height_luma,
240 unsigned int meta_row_height_chroma,
241 unsigned int PixelPTEBytesPerRowLuma,
242 unsigned int PixelPTEBytesPerRowChroma,
243 unsigned int dpte_row_height_luma,
244 unsigned int dpte_row_height_chroma,
245 double *meta_row_bw,
246 double *dpte_row_bw);
247 static void CalculateFlipSchedule(
248 struct display_mode_lib *mode_lib,
249 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
250 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
251 double UrgentExtraLatency,
252 double UrgentLatency,
253 unsigned int GPUVMMaxPageTableLevels,
254 bool HostVMEnable,
255 unsigned int HostVMMaxPageTableLevels,
256 unsigned int HostVMCachedPageTableLevels,
257 bool GPUVMEnable,
258 double PDEAndMetaPTEBytesPerFrame,
259 double MetaRowBytes,
260 double DPTEBytesPerRow,
261 double BandwidthAvailableForImmediateFlip,
262 unsigned int TotImmediateFlipBytes,
263 enum source_format_class SourcePixelFormat,
264 double LineTime,
265 double VRatio,
266 double Tno_bw,
267 bool DCCEnable,
268 unsigned int dpte_row_height,
269 unsigned int meta_row_height,
270 unsigned int dpte_row_height_chroma,
271 unsigned int meta_row_height_chroma,
272 double *DestinationLinesToRequestVMInImmediateFlip,
273 double *DestinationLinesToRequestRowInImmediateFlip,
274 double *final_flip_bw,
275 bool *ImmediateFlipSupportedForPipe);
276 static double CalculateWriteBackDelay(
277 enum source_format_class WritebackPixelFormat,
278 double WritebackHRatio,
279 double WritebackVRatio,
280 unsigned int WritebackLumaHTaps,
281 unsigned int WritebackLumaVTaps,
282 unsigned int WritebackChromaHTaps,
283 unsigned int WritebackChromaVTaps,
284 unsigned int WritebackDestinationWidth);
285 static void CalculateWatermarksAndDRAMSpeedChangeSupport(
286 struct display_mode_lib *mode_lib,
287 unsigned int PrefetchMode,
288 unsigned int NumberOfActivePlanes,
289 unsigned int MaxLineBufferLines,
290 unsigned int LineBufferSize,
291 unsigned int DPPOutputBufferPixels,
292 unsigned int DETBufferSizeInKByte,
293 unsigned int WritebackInterfaceLumaBufferSize,
294 unsigned int WritebackInterfaceChromaBufferSize,
295 double DCFCLK,
296 double UrgentOutOfOrderReturn,
297 double ReturnBW,
298 bool GPUVMEnable,
299 int dpte_group_bytes[],
300 unsigned int MetaChunkSize,
301 double UrgentLatency,
302 double ExtraLatency,
303 double WritebackLatency,
304 double WritebackChunkSize,
305 double SOCCLK,
306 double DRAMClockChangeLatency,
307 double SRExitTime,
308 double SREnterPlusExitTime,
309 double DCFCLKDeepSleep,
310 int DPPPerPlane[],
311 bool DCCEnable[],
312 double DPPCLK[],
313 double SwathWidthSingleDPPY[],
314 unsigned int SwathHeightY[],
315 double ReadBandwidthPlaneLuma[],
316 unsigned int SwathHeightC[],
317 double ReadBandwidthPlaneChroma[],
318 unsigned int LBBitPerPixel[],
319 double SwathWidthY[],
320 double HRatio[],
321 unsigned int vtaps[],
322 unsigned int VTAPsChroma[],
323 double VRatio[],
324 unsigned int HTotal[],
325 double PixelClock[],
326 unsigned int BlendingAndTiming[],
327 double BytePerPixelDETY[],
328 double BytePerPixelDETC[],
329 bool WritebackEnable[],
330 enum source_format_class WritebackPixelFormat[],
331 double WritebackDestinationWidth[],
332 double WritebackDestinationHeight[],
333 double WritebackSourceHeight[],
334 enum clock_change_support *DRAMClockChangeSupport,
335 double *UrgentWatermark,
336 double *WritebackUrgentWatermark,
337 double *DRAMClockChangeWatermark,
338 double *WritebackDRAMClockChangeWatermark,
339 double *StutterExitWatermark,
340 double *StutterEnterPlusExitWatermark,
341 double *MinActiveDRAMClockChangeLatencySupported);
342 static void CalculateDCFCLKDeepSleep(
343 struct display_mode_lib *mode_lib,
344 unsigned int NumberOfActivePlanes,
345 double BytePerPixelDETY[],
346 double BytePerPixelDETC[],
347 double VRatio[],
348 double SwathWidthY[],
349 int DPPPerPlane[],
350 double HRatio[],
351 double PixelClock[],
352 double PSCL_THROUGHPUT[],
353 double PSCL_THROUGHPUT_CHROMA[],
354 double DPPCLK[],
355 double *DCFCLKDeepSleep);
356 static void CalculateDETBufferSize(
357 unsigned int DETBufferSizeInKByte,
358 unsigned int SwathHeightY,
359 unsigned int SwathHeightC,
360 unsigned int *DETBufferSizeY,
361 unsigned int *DETBufferSizeC);
362 static void CalculateUrgentBurstFactor(
363 unsigned int DETBufferSizeInKByte,
364 unsigned int SwathHeightY,
365 unsigned int SwathHeightC,
366 unsigned int SwathWidthY,
367 double LineTime,
368 double UrgentLatency,
369 double CursorBufferSize,
370 unsigned int CursorWidth,
371 unsigned int CursorBPP,
372 double VRatio,
373 double VRatioPreY,
374 double VRatioPreC,
375 double BytePerPixelInDETY,
376 double BytePerPixelInDETC,
377 double *UrgentBurstFactorCursor,
378 double *UrgentBurstFactorCursorPre,
379 double *UrgentBurstFactorLuma,
380 double *UrgentBurstFactorLumaPre,
381 double *UrgentBurstFactorChroma,
382 double *UrgentBurstFactorChromaPre,
383 unsigned int *NotEnoughUrgentLatencyHiding,
384 unsigned int *NotEnoughUrgentLatencyHidingPre);
385
386 static void CalculatePixelDeliveryTimes(
387 unsigned int NumberOfActivePlanes,
388 double VRatio[],
389 double VRatioPrefetchY[],
390 double VRatioPrefetchC[],
391 unsigned int swath_width_luma_ub[],
392 unsigned int swath_width_chroma_ub[],
393 int DPPPerPlane[],
394 double HRatio[],
395 double PixelClock[],
396 double PSCL_THROUGHPUT[],
397 double PSCL_THROUGHPUT_CHROMA[],
398 double DPPCLK[],
399 double BytePerPixelDETC[],
400 enum scan_direction_class SourceScan[],
401 unsigned int BlockWidth256BytesY[],
402 unsigned int BlockHeight256BytesY[],
403 unsigned int BlockWidth256BytesC[],
404 unsigned int BlockHeight256BytesC[],
405 double DisplayPipeLineDeliveryTimeLuma[],
406 double DisplayPipeLineDeliveryTimeChroma[],
407 double DisplayPipeLineDeliveryTimeLumaPrefetch[],
408 double DisplayPipeLineDeliveryTimeChromaPrefetch[],
409 double DisplayPipeRequestDeliveryTimeLuma[],
410 double DisplayPipeRequestDeliveryTimeChroma[],
411 double DisplayPipeRequestDeliveryTimeLumaPrefetch[],
412 double DisplayPipeRequestDeliveryTimeChromaPrefetch[]);
413
414 static void CalculateMetaAndPTETimes(
415 unsigned int NumberOfActivePlanes,
416 bool GPUVMEnable,
417 unsigned int MetaChunkSize,
418 unsigned int MinMetaChunkSizeBytes,
419 unsigned int GPUVMMaxPageTableLevels,
420 unsigned int HTotal[],
421 double VRatio[],
422 double VRatioPrefetchY[],
423 double VRatioPrefetchC[],
424 double DestinationLinesToRequestRowInVBlank[],
425 double DestinationLinesToRequestRowInImmediateFlip[],
426 double DestinationLinesToRequestVMInVBlank[],
427 double DestinationLinesToRequestVMInImmediateFlip[],
428 bool DCCEnable[],
429 double PixelClock[],
430 double BytePerPixelDETY[],
431 double BytePerPixelDETC[],
432 enum scan_direction_class SourceScan[],
433 unsigned int dpte_row_height[],
434 unsigned int dpte_row_height_chroma[],
435 unsigned int meta_row_width[],
436 unsigned int meta_row_height[],
437 unsigned int meta_req_width[],
438 unsigned int meta_req_height[],
439 int dpte_group_bytes[],
440 unsigned int PTERequestSizeY[],
441 unsigned int PTERequestSizeC[],
442 unsigned int PixelPTEReqWidthY[],
443 unsigned int PixelPTEReqHeightY[],
444 unsigned int PixelPTEReqWidthC[],
445 unsigned int PixelPTEReqHeightC[],
446 unsigned int dpte_row_width_luma_ub[],
447 unsigned int dpte_row_width_chroma_ub[],
448 unsigned int vm_group_bytes[],
449 unsigned int dpde0_bytes_per_frame_ub_l[],
450 unsigned int dpde0_bytes_per_frame_ub_c[],
451 unsigned int meta_pte_bytes_per_frame_ub_l[],
452 unsigned int meta_pte_bytes_per_frame_ub_c[],
453 double DST_Y_PER_PTE_ROW_NOM_L[],
454 double DST_Y_PER_PTE_ROW_NOM_C[],
455 double DST_Y_PER_META_ROW_NOM_L[],
456 double TimePerMetaChunkNominal[],
457 double TimePerMetaChunkVBlank[],
458 double TimePerMetaChunkFlip[],
459 double time_per_pte_group_nom_luma[],
460 double time_per_pte_group_vblank_luma[],
461 double time_per_pte_group_flip_luma[],
462 double time_per_pte_group_nom_chroma[],
463 double time_per_pte_group_vblank_chroma[],
464 double time_per_pte_group_flip_chroma[],
465 double TimePerVMGroupVBlank[],
466 double TimePerVMGroupFlip[],
467 double TimePerVMRequestVBlank[],
468 double TimePerVMRequestFlip[]);
469
470 static double CalculateExtraLatency(
471 double UrgentRoundTripAndOutOfOrderLatency,
472 int TotalNumberOfActiveDPP,
473 int PixelChunkSizeInKByte,
474 int TotalNumberOfDCCActiveDPP,
475 int MetaChunkSize,
476 double ReturnBW,
477 bool GPUVMEnable,
478 bool HostVMEnable,
479 int NumberOfActivePlanes,
480 int NumberOfDPP[],
481 int dpte_group_bytes[],
482 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
483 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
484 int HostVMMaxPageTableLevels,
485 int HostVMCachedPageTableLevels);
486
dml21_recalculate(struct display_mode_lib * mode_lib)487 void dml21_recalculate(struct display_mode_lib *mode_lib)
488 {
489 ModeSupportAndSystemConfiguration(mode_lib);
490 PixelClockAdjustmentForProgressiveToInterlaceUnit(mode_lib);
491 DisplayPipeConfiguration(mode_lib);
492 DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(mode_lib);
493 }
494
dscceComputeDelay(unsigned int bpc,double bpp,unsigned int sliceWidth,unsigned int numSlices,enum output_format_class pixelFormat)495 static unsigned int dscceComputeDelay(
496 unsigned int bpc,
497 double bpp,
498 unsigned int sliceWidth,
499 unsigned int numSlices,
500 enum output_format_class pixelFormat)
501 {
502 // valid bpc = source bits per component in the set of {8, 10, 12}
503 // valid bpp = increments of 1/16 of a bit
504 // min = 6/7/8 in N420/N422/444, respectively
505 // max = such that compression is 1:1
506 //valid sliceWidth = number of pixels per slice line, must be less than or equal to 5184/numSlices (or 4096/numSlices in 420 mode)
507 //valid numSlices = number of slices in the horiziontal direction per DSC engine in the set of {1, 2, 3, 4}
508 //valid pixelFormat = pixel/color format in the set of {:N444_RGB, :S422, :N422, :N420}
509
510 // fixed value
511 unsigned int rcModelSize = 8192;
512
513 // N422/N420 operate at 2 pixels per clock
514 unsigned int pixelsPerClock, lstall, D, initalXmitDelay, w, S, ix, wx, p, l0, a, ax, l,
515 Delay, pixels;
516
517 if (pixelFormat == dm_n422 || pixelFormat == dm_420)
518 pixelsPerClock = 2;
519 // #all other modes operate at 1 pixel per clock
520 else
521 pixelsPerClock = 1;
522
523 //initial transmit delay as per PPS
524 initalXmitDelay = dml_round(rcModelSize / 2.0 / bpp / pixelsPerClock);
525
526 //compute ssm delay
527 if (bpc == 8)
528 D = 81;
529 else if (bpc == 10)
530 D = 89;
531 else
532 D = 113;
533
534 //divide by pixel per cycle to compute slice width as seen by DSC
535 w = sliceWidth / pixelsPerClock;
536
537 //422 mode has an additional cycle of delay
538 if (pixelFormat == dm_s422)
539 S = 1;
540 else
541 S = 0;
542
543 //main calculation for the dscce
544 ix = initalXmitDelay + 45;
545 wx = (w + 2) / 3;
546 p = 3 * wx - w;
547 l0 = ix / w;
548 a = ix + p * l0;
549 ax = (a + 2) / 3 + D + 6 + 1;
550 l = (ax + wx - 1) / wx;
551 if ((ix % w) == 0 && p != 0)
552 lstall = 1;
553 else
554 lstall = 0;
555 Delay = l * wx * (numSlices - 1) + ax + S + lstall + 22;
556
557 //dsc processes 3 pixel containers per cycle and a container can contain 1 or 2 pixels
558 pixels = Delay * 3 * pixelsPerClock;
559 return pixels;
560 }
561
dscComputeDelay(enum output_format_class pixelFormat)562 static unsigned int dscComputeDelay(enum output_format_class pixelFormat)
563 {
564 unsigned int Delay = 0;
565
566 if (pixelFormat == dm_420) {
567 // sfr
568 Delay = Delay + 2;
569 // dsccif
570 Delay = Delay + 0;
571 // dscc - input deserializer
572 Delay = Delay + 3;
573 // dscc gets pixels every other cycle
574 Delay = Delay + 2;
575 // dscc - input cdc fifo
576 Delay = Delay + 12;
577 // dscc gets pixels every other cycle
578 Delay = Delay + 13;
579 // dscc - cdc uncertainty
580 Delay = Delay + 2;
581 // dscc - output cdc fifo
582 Delay = Delay + 7;
583 // dscc gets pixels every other cycle
584 Delay = Delay + 3;
585 // dscc - cdc uncertainty
586 Delay = Delay + 2;
587 // dscc - output serializer
588 Delay = Delay + 1;
589 // sft
590 Delay = Delay + 1;
591 } else if (pixelFormat == dm_n422) {
592 // sfr
593 Delay = Delay + 2;
594 // dsccif
595 Delay = Delay + 1;
596 // dscc - input deserializer
597 Delay = Delay + 5;
598 // dscc - input cdc fifo
599 Delay = Delay + 25;
600 // dscc - cdc uncertainty
601 Delay = Delay + 2;
602 // dscc - output cdc fifo
603 Delay = Delay + 10;
604 // dscc - cdc uncertainty
605 Delay = Delay + 2;
606 // dscc - output serializer
607 Delay = Delay + 1;
608 // sft
609 Delay = Delay + 1;
610 } else {
611 // sfr
612 Delay = Delay + 2;
613 // dsccif
614 Delay = Delay + 0;
615 // dscc - input deserializer
616 Delay = Delay + 3;
617 // dscc - input cdc fifo
618 Delay = Delay + 12;
619 // dscc - cdc uncertainty
620 Delay = Delay + 2;
621 // dscc - output cdc fifo
622 Delay = Delay + 7;
623 // dscc - output serializer
624 Delay = Delay + 1;
625 // dscc - cdc uncertainty
626 Delay = Delay + 2;
627 // sft
628 Delay = Delay + 1;
629 }
630
631 return Delay;
632 }
633
CalculatePrefetchSchedule(struct display_mode_lib * mode_lib,double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,Pipe * myPipe,unsigned int DSCDelay,double DPPCLKDelaySubtotal,double DPPCLKDelaySCL,double DPPCLKDelaySCLLBOnly,double DPPCLKDelayCNVCFormater,double DPPCLKDelayCNVCCursor,double DISPCLKDelaySubtotal,unsigned int ScalerRecoutWidth,enum output_format_class OutputFormat,unsigned int MaxInterDCNTileRepeaters,unsigned int VStartup,unsigned int MaxVStartup,unsigned int GPUVMPageTableLevels,bool GPUVMEnable,HostVM * myHostVM,bool DynamicMetadataEnable,int DynamicMetadataLinesBeforeActiveRequired,unsigned int DynamicMetadataTransmittedBytes,bool DCCEnable,double UrgentLatency,double UrgentExtraLatency,double TCalc,unsigned int PDEAndMetaPTEBytesFrame,unsigned int MetaRowByte,unsigned int PixelPTEBytesPerRow,double PrefetchSourceLinesY,unsigned int SwathWidthY,double BytePerPixelDETY,double VInitPreFillY,unsigned int MaxNumSwathY,double PrefetchSourceLinesC,double BytePerPixelDETC,double VInitPreFillC,unsigned int MaxNumSwathC,unsigned int SwathHeightY,unsigned int SwathHeightC,double TWait,bool XFCEnabled,double XFCRemoteSurfaceFlipDelay,bool ProgressiveToInterlaceUnitInOPP,double * DSTXAfterScaler,double * DSTYAfterScaler,double * DestinationLinesForPrefetch,double * PrefetchBandwidth,double * DestinationLinesToRequestVMInVBlank,double * DestinationLinesToRequestRowInVBlank,double * VRatioPrefetchY,double * VRatioPrefetchC,double * RequiredPrefetchPixDataBWLuma,double * RequiredPrefetchPixDataBWChroma,unsigned int * VStartupRequiredWhenNotEnoughTimeForDynamicMetadata,double * Tno_bw,double * prefetch_vmrow_bw,unsigned int * swath_width_luma_ub,unsigned int * swath_width_chroma_ub,unsigned int * VUpdateOffsetPix,double * VUpdateWidthPix,double * VReadyOffsetPix)634 static bool CalculatePrefetchSchedule(
635 struct display_mode_lib *mode_lib,
636 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
637 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
638 Pipe *myPipe,
639 unsigned int DSCDelay,
640 double DPPCLKDelaySubtotal,
641 double DPPCLKDelaySCL,
642 double DPPCLKDelaySCLLBOnly,
643 double DPPCLKDelayCNVCFormater,
644 double DPPCLKDelayCNVCCursor,
645 double DISPCLKDelaySubtotal,
646 unsigned int ScalerRecoutWidth,
647 enum output_format_class OutputFormat,
648 unsigned int MaxInterDCNTileRepeaters,
649 unsigned int VStartup,
650 unsigned int MaxVStartup,
651 unsigned int GPUVMPageTableLevels,
652 bool GPUVMEnable,
653 HostVM *myHostVM,
654 bool DynamicMetadataEnable,
655 int DynamicMetadataLinesBeforeActiveRequired,
656 unsigned int DynamicMetadataTransmittedBytes,
657 bool DCCEnable,
658 double UrgentLatency,
659 double UrgentExtraLatency,
660 double TCalc,
661 unsigned int PDEAndMetaPTEBytesFrame,
662 unsigned int MetaRowByte,
663 unsigned int PixelPTEBytesPerRow,
664 double PrefetchSourceLinesY,
665 unsigned int SwathWidthY,
666 double BytePerPixelDETY,
667 double VInitPreFillY,
668 unsigned int MaxNumSwathY,
669 double PrefetchSourceLinesC,
670 double BytePerPixelDETC,
671 double VInitPreFillC,
672 unsigned int MaxNumSwathC,
673 unsigned int SwathHeightY,
674 unsigned int SwathHeightC,
675 double TWait,
676 bool XFCEnabled,
677 double XFCRemoteSurfaceFlipDelay,
678 bool ProgressiveToInterlaceUnitInOPP,
679 double *DSTXAfterScaler,
680 double *DSTYAfterScaler,
681 double *DestinationLinesForPrefetch,
682 double *PrefetchBandwidth,
683 double *DestinationLinesToRequestVMInVBlank,
684 double *DestinationLinesToRequestRowInVBlank,
685 double *VRatioPrefetchY,
686 double *VRatioPrefetchC,
687 double *RequiredPrefetchPixDataBWLuma,
688 double *RequiredPrefetchPixDataBWChroma,
689 unsigned int *VStartupRequiredWhenNotEnoughTimeForDynamicMetadata,
690 double *Tno_bw,
691 double *prefetch_vmrow_bw,
692 unsigned int *swath_width_luma_ub,
693 unsigned int *swath_width_chroma_ub,
694 unsigned int *VUpdateOffsetPix,
695 double *VUpdateWidthPix,
696 double *VReadyOffsetPix)
697 {
698 bool MyError = false;
699 unsigned int DPPCycles, DISPCLKCycles;
700 double DSTTotalPixelsAfterScaler, TotalRepeaterDelayTime;
701 double Tdm, LineTime, Tsetup;
702 double dst_y_prefetch_equ;
703 double Tsw_oto;
704 double prefetch_bw_oto;
705 double Tvm_oto;
706 double Tr0_oto;
707 double Tvm_oto_lines;
708 double Tr0_oto_lines;
709 double Tsw_oto_lines;
710 double dst_y_prefetch_oto;
711 double TimeForFetchingMetaPTE = 0;
712 double TimeForFetchingRowInVBlank = 0;
713 double LinesToRequestPrefetchPixelData = 0;
714 double HostVMInefficiencyFactor;
715 unsigned int HostVMDynamicLevels;
716
717 if (GPUVMEnable == true && myHostVM->Enable == true) {
718 HostVMInefficiencyFactor =
719 PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData
720 / PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly;
721 HostVMDynamicLevels = myHostVM->MaxPageTableLevels
722 - myHostVM->CachedPageTableLevels;
723 } else {
724 HostVMInefficiencyFactor = 1;
725 HostVMDynamicLevels = 0;
726 }
727
728 if (myPipe->ScalerEnabled)
729 DPPCycles = DPPCLKDelaySubtotal + DPPCLKDelaySCL;
730 else
731 DPPCycles = DPPCLKDelaySubtotal + DPPCLKDelaySCLLBOnly;
732
733 DPPCycles = DPPCycles + DPPCLKDelayCNVCFormater + myPipe->NumberOfCursors * DPPCLKDelayCNVCCursor;
734
735 DISPCLKCycles = DISPCLKDelaySubtotal;
736
737 if (myPipe->DPPCLK == 0.0 || myPipe->DISPCLK == 0.0)
738 return true;
739
740 *DSTXAfterScaler = DPPCycles * myPipe->PixelClock / myPipe->DPPCLK
741 + DISPCLKCycles * myPipe->PixelClock / myPipe->DISPCLK + DSCDelay;
742
743 if (myPipe->DPPPerPlane > 1)
744 *DSTXAfterScaler = *DSTXAfterScaler + ScalerRecoutWidth;
745
746 if (OutputFormat == dm_420 || (myPipe->InterlaceEnable && ProgressiveToInterlaceUnitInOPP))
747 *DSTYAfterScaler = 1;
748 else
749 *DSTYAfterScaler = 0;
750
751 DSTTotalPixelsAfterScaler = ((double) (*DSTYAfterScaler * myPipe->HTotal)) + *DSTXAfterScaler;
752 *DSTYAfterScaler = dml_floor(DSTTotalPixelsAfterScaler / myPipe->HTotal, 1);
753 *DSTXAfterScaler = DSTTotalPixelsAfterScaler - ((double) (*DSTYAfterScaler * myPipe->HTotal));
754
755 *VUpdateOffsetPix = dml_ceil(myPipe->HTotal / 4.0, 1);
756 TotalRepeaterDelayTime = MaxInterDCNTileRepeaters * (2.0 / myPipe->DPPCLK + 3.0 / myPipe->DISPCLK);
757 *VUpdateWidthPix = (14.0 / myPipe->DCFCLKDeepSleep + 12.0 / myPipe->DPPCLK + TotalRepeaterDelayTime)
758 * myPipe->PixelClock;
759
760 *VReadyOffsetPix = dml_max(
761 150.0 / myPipe->DPPCLK,
762 TotalRepeaterDelayTime + 20.0 / myPipe->DCFCLKDeepSleep + 10.0 / myPipe->DPPCLK)
763 * myPipe->PixelClock;
764
765 Tsetup = (double) (*VUpdateOffsetPix + *VUpdateWidthPix + *VReadyOffsetPix) / myPipe->PixelClock;
766
767 LineTime = (double) myPipe->HTotal / myPipe->PixelClock;
768
769 if (DynamicMetadataEnable) {
770 double Tdmbf, Tdmec, Tdmsks;
771
772 Tdm = dml_max(0.0, UrgentExtraLatency - TCalc);
773 Tdmbf = DynamicMetadataTransmittedBytes / 4.0 / myPipe->DISPCLK;
774 Tdmec = LineTime;
775 if (DynamicMetadataLinesBeforeActiveRequired == -1)
776 Tdmsks = myPipe->VBlank * LineTime / 2.0;
777 else
778 Tdmsks = DynamicMetadataLinesBeforeActiveRequired * LineTime;
779 if (myPipe->InterlaceEnable && !ProgressiveToInterlaceUnitInOPP)
780 Tdmsks = Tdmsks / 2;
781 if (VStartup * LineTime
782 < Tsetup + TWait + UrgentExtraLatency + Tdmbf + Tdmec + Tdmsks) {
783 MyError = true;
784 *VStartupRequiredWhenNotEnoughTimeForDynamicMetadata = (Tsetup + TWait
785 + UrgentExtraLatency + Tdmbf + Tdmec + Tdmsks) / LineTime;
786 } else
787 *VStartupRequiredWhenNotEnoughTimeForDynamicMetadata = 0.0;
788 } else
789 Tdm = 0;
790
791 if (GPUVMEnable) {
792 if (GPUVMPageTableLevels >= 3)
793 *Tno_bw = UrgentExtraLatency + UrgentLatency * ((GPUVMPageTableLevels - 2) * (myHostVM->MaxPageTableLevels + 1) - 1);
794 else
795 *Tno_bw = 0;
796 } else if (!DCCEnable)
797 *Tno_bw = LineTime;
798 else
799 *Tno_bw = LineTime / 4;
800
801 dst_y_prefetch_equ = VStartup - dml_max(TCalc + TWait, XFCRemoteSurfaceFlipDelay) / LineTime
802 - (Tsetup + Tdm) / LineTime
803 - (*DSTYAfterScaler + *DSTXAfterScaler / myPipe->HTotal);
804
805 Tsw_oto = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * LineTime;
806
807 if (myPipe->SourceScan == dm_horz) {
808 *swath_width_luma_ub = dml_ceil(SwathWidthY - 1, myPipe->BlockWidth256BytesY) + myPipe->BlockWidth256BytesY;
809 if (myPipe->BlockWidth256BytesC > 0)
810 *swath_width_chroma_ub = dml_ceil(SwathWidthY / 2 - 1, myPipe->BlockWidth256BytesC) + myPipe->BlockWidth256BytesC;
811 } else {
812 *swath_width_luma_ub = dml_ceil(SwathWidthY - 1, myPipe->BlockHeight256BytesY) + myPipe->BlockHeight256BytesY;
813 if (myPipe->BlockHeight256BytesC > 0)
814 *swath_width_chroma_ub = dml_ceil(SwathWidthY / 2 - 1, myPipe->BlockHeight256BytesC) + myPipe->BlockHeight256BytesC;
815 }
816
817 prefetch_bw_oto = (PrefetchSourceLinesY * *swath_width_luma_ub * dml_ceil(BytePerPixelDETY, 1) + PrefetchSourceLinesC * *swath_width_chroma_ub * dml_ceil(BytePerPixelDETC, 2)) / Tsw_oto;
818
819
820 if (GPUVMEnable == true) {
821 Tvm_oto = dml_max(*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_oto,
822 dml_max(UrgentExtraLatency + UrgentLatency * (GPUVMPageTableLevels * (HostVMDynamicLevels + 1) - 1),
823 LineTime / 4.0));
824 } else
825 Tvm_oto = LineTime / 4.0;
826
827 if ((GPUVMEnable == true || DCCEnable == true)) {
828 Tr0_oto = dml_max(
829 (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto,
830 dml_max(UrgentLatency * (HostVMDynamicLevels + 1), dml_max(LineTime - Tvm_oto, LineTime / 4)));
831 } else
832 Tr0_oto = (LineTime - Tvm_oto) / 2.0;
833
834 Tvm_oto_lines = dml_ceil(4 * Tvm_oto / LineTime, 1) / 4.0;
835 Tr0_oto_lines = dml_ceil(4 * Tr0_oto / LineTime, 1) / 4.0;
836 Tsw_oto_lines = dml_ceil(4 * Tsw_oto / LineTime, 1) / 4.0;
837 dst_y_prefetch_oto = Tvm_oto_lines + 2 * Tr0_oto_lines + Tsw_oto_lines + 0.75;
838
839 dst_y_prefetch_equ = dml_floor(4.0 * (dst_y_prefetch_equ + 0.125), 1) / 4.0;
840
841 if (dst_y_prefetch_oto < dst_y_prefetch_equ)
842 *DestinationLinesForPrefetch = dst_y_prefetch_oto;
843 else
844 *DestinationLinesForPrefetch = dst_y_prefetch_equ;
845
846 // Limit to prevent overflow in DST_Y_PREFETCH register
847 *DestinationLinesForPrefetch = dml_min(*DestinationLinesForPrefetch, 63.75);
848
849 dml_print("DML: VStartup: %d\n", VStartup);
850 dml_print("DML: TCalc: %f\n", TCalc);
851 dml_print("DML: TWait: %f\n", TWait);
852 dml_print("DML: XFCRemoteSurfaceFlipDelay: %f\n", XFCRemoteSurfaceFlipDelay);
853 dml_print("DML: LineTime: %f\n", LineTime);
854 dml_print("DML: Tsetup: %f\n", Tsetup);
855 dml_print("DML: Tdm: %f\n", Tdm);
856 dml_print("DML: DSTYAfterScaler: %f\n", *DSTYAfterScaler);
857 dml_print("DML: DSTXAfterScaler: %f\n", *DSTXAfterScaler);
858 dml_print("DML: HTotal: %d\n", myPipe->HTotal);
859
860 *PrefetchBandwidth = 0;
861 *DestinationLinesToRequestVMInVBlank = 0;
862 *DestinationLinesToRequestRowInVBlank = 0;
863 *VRatioPrefetchY = 0;
864 *VRatioPrefetchC = 0;
865 *RequiredPrefetchPixDataBWLuma = 0;
866 if (*DestinationLinesForPrefetch > 1) {
867 double PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte
868 + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
869 + PrefetchSourceLinesY * *swath_width_luma_ub * dml_ceil(BytePerPixelDETY, 1)
870 + PrefetchSourceLinesC * *swath_width_chroma_ub * dml_ceil(BytePerPixelDETC, 2))
871 / (*DestinationLinesForPrefetch * LineTime - *Tno_bw);
872
873 double PrefetchBandwidth2 = (PDEAndMetaPTEBytesFrame *
874 HostVMInefficiencyFactor + PrefetchSourceLinesY *
875 *swath_width_luma_ub * dml_ceil(BytePerPixelDETY, 1) +
876 PrefetchSourceLinesC * *swath_width_chroma_ub *
877 dml_ceil(BytePerPixelDETC, 2)) /
878 (*DestinationLinesForPrefetch * LineTime - *Tno_bw - 2 *
879 UrgentLatency * (1 + HostVMDynamicLevels));
880
881 double PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow
882 * HostVMInefficiencyFactor + PrefetchSourceLinesY *
883 *swath_width_luma_ub * dml_ceil(BytePerPixelDETY, 1) +
884 PrefetchSourceLinesC * *swath_width_chroma_ub *
885 dml_ceil(BytePerPixelDETC, 2)) /
886 (*DestinationLinesForPrefetch * LineTime -
887 UrgentExtraLatency - UrgentLatency * (GPUVMPageTableLevels
888 * (HostVMDynamicLevels + 1) - 1));
889
890 double PrefetchBandwidth4 = (PrefetchSourceLinesY * *swath_width_luma_ub *
891 dml_ceil(BytePerPixelDETY, 1) + PrefetchSourceLinesC *
892 *swath_width_chroma_ub * dml_ceil(BytePerPixelDETC, 2)) /
893 (*DestinationLinesForPrefetch * LineTime -
894 UrgentExtraLatency - UrgentLatency * (GPUVMPageTableLevels
895 * (HostVMDynamicLevels + 1) - 1) - 2 * UrgentLatency *
896 (1 + HostVMDynamicLevels));
897
898 if (VStartup == MaxVStartup && (PrefetchBandwidth1 > 4 * prefetch_bw_oto) && (*DestinationLinesForPrefetch - dml_ceil(Tsw_oto_lines, 1) / 4.0 - 0.75) * LineTime - *Tno_bw > 0) {
899 PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor) / ((*DestinationLinesForPrefetch - dml_ceil(Tsw_oto_lines, 1) / 4.0 - 0.75) * LineTime - *Tno_bw);
900 }
901 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth1 >= UrgentExtraLatency + UrgentLatency * (GPUVMPageTableLevels * (HostVMDynamicLevels + 1) - 1) && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth1 >= UrgentLatency * (1 + HostVMDynamicLevels)) {
902 *PrefetchBandwidth = PrefetchBandwidth1;
903 } else if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth2 >= UrgentExtraLatency + UrgentLatency * (GPUVMPageTableLevels * (HostVMDynamicLevels + 1) - 1) && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth2 < UrgentLatency * (1 + HostVMDynamicLevels)) {
904 *PrefetchBandwidth = PrefetchBandwidth2;
905 } else if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth3 < UrgentExtraLatency + UrgentLatency * (GPUVMPageTableLevels * (HostVMDynamicLevels + 1) - 1) && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth3 >= UrgentLatency * (1 + HostVMDynamicLevels)) {
906 *PrefetchBandwidth = PrefetchBandwidth3;
907 } else {
908 *PrefetchBandwidth = PrefetchBandwidth4;
909 }
910
911 if (GPUVMEnable) {
912 TimeForFetchingMetaPTE = dml_max(*Tno_bw + (double) PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / *PrefetchBandwidth,
913 dml_max(UrgentExtraLatency + UrgentLatency * (GPUVMPageTableLevels * (HostVMDynamicLevels + 1) - 1), LineTime / 4));
914 } else {
915 // 5/30/2018 - This was an optimization requested from Sy but now NumberOfCursors is no longer a factor
916 // so if this needs to be reinstated, then it should be officially done in the VBA code as well.
917 // if (mode_lib->NumberOfCursors > 0 || XFCEnabled)
918 TimeForFetchingMetaPTE = LineTime / 4;
919 // else
920 // TimeForFetchingMetaPTE = 0.0;
921 }
922
923 if ((GPUVMEnable == true || DCCEnable == true)) {
924 TimeForFetchingRowInVBlank =
925 dml_max(
926 (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor)
927 / *PrefetchBandwidth,
928 dml_max(
929 UrgentLatency * (1 + HostVMDynamicLevels),
930 dml_max(
931 (LineTime
932 - TimeForFetchingMetaPTE) / 2.0,
933 LineTime
934 / 4.0)));
935 } else {
936 // See note above dated 5/30/2018
937 // if (NumberOfCursors > 0 || XFCEnabled)
938 TimeForFetchingRowInVBlank = (LineTime - TimeForFetchingMetaPTE) / 2.0;
939 // else // TODO: Did someone else add this??
940 // TimeForFetchingRowInVBlank = 0.0;
941 }
942
943 *DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * TimeForFetchingMetaPTE / LineTime, 1.0) / 4.0;
944
945 *DestinationLinesToRequestRowInVBlank = dml_ceil(4.0 * TimeForFetchingRowInVBlank / LineTime, 1.0) / 4.0;
946
947 LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch
948 // See note above dated 5/30/2018
949 // - ((NumberOfCursors > 0 || GPUVMEnable || DCCEnable) ?
950 - ((GPUVMEnable || DCCEnable) ?
951 (*DestinationLinesToRequestVMInVBlank + 2 * *DestinationLinesToRequestRowInVBlank) :
952 0.0); // TODO: Did someone else add this??
953
954 if (LinesToRequestPrefetchPixelData > 0) {
955
956 *VRatioPrefetchY = (double) PrefetchSourceLinesY
957 / LinesToRequestPrefetchPixelData;
958 *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
959 if ((SwathHeightY > 4) && (VInitPreFillY > 3)) {
960 if (LinesToRequestPrefetchPixelData > (VInitPreFillY - 3.0) / 2.0) {
961 *VRatioPrefetchY =
962 dml_max(
963 (double) PrefetchSourceLinesY
964 / LinesToRequestPrefetchPixelData,
965 (double) MaxNumSwathY
966 * SwathHeightY
967 / (LinesToRequestPrefetchPixelData
968 - (VInitPreFillY
969 - 3.0)
970 / 2.0));
971 *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
972 } else {
973 MyError = true;
974 *VRatioPrefetchY = 0;
975 }
976 }
977
978 *VRatioPrefetchC = (double) PrefetchSourceLinesC
979 / LinesToRequestPrefetchPixelData;
980 *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
981
982 if ((SwathHeightC > 4)) {
983 if (LinesToRequestPrefetchPixelData > (VInitPreFillC - 3.0) / 2.0) {
984 *VRatioPrefetchC =
985 dml_max(
986 *VRatioPrefetchC,
987 (double) MaxNumSwathC
988 * SwathHeightC
989 / (LinesToRequestPrefetchPixelData
990 - (VInitPreFillC
991 - 3.0)
992 / 2.0));
993 *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
994 } else {
995 MyError = true;
996 *VRatioPrefetchC = 0;
997 }
998 }
999
1000 *RequiredPrefetchPixDataBWLuma = myPipe->DPPPerPlane
1001 * (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData
1002 * dml_ceil(BytePerPixelDETY, 1)
1003 * *swath_width_luma_ub / LineTime;
1004 *RequiredPrefetchPixDataBWChroma = myPipe->DPPPerPlane
1005 * (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData
1006 * dml_ceil(BytePerPixelDETC, 2)
1007 * *swath_width_chroma_ub / LineTime;
1008 } else {
1009 MyError = true;
1010 *VRatioPrefetchY = 0;
1011 *VRatioPrefetchC = 0;
1012 *RequiredPrefetchPixDataBWLuma = 0;
1013 *RequiredPrefetchPixDataBWChroma = 0;
1014 }
1015
1016 dml_print("DML: Tvm: %fus\n", TimeForFetchingMetaPTE);
1017 dml_print("DML: Tr0: %fus\n", TimeForFetchingRowInVBlank);
1018 dml_print("DML: Tsw: %fus\n", (double)(*DestinationLinesForPrefetch) * LineTime - TimeForFetchingMetaPTE - TimeForFetchingRowInVBlank);
1019 dml_print("DML: Tpre: %fus\n", (double)(*DestinationLinesForPrefetch) * LineTime);
1020 dml_print("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %d\n", PixelPTEBytesPerRow);
1021
1022 } else {
1023 MyError = true;
1024 }
1025
1026 {
1027 double prefetch_vm_bw;
1028 double prefetch_row_bw;
1029
1030 if (PDEAndMetaPTEBytesFrame == 0) {
1031 prefetch_vm_bw = 0;
1032 } else if (*DestinationLinesToRequestVMInVBlank > 0) {
1033 prefetch_vm_bw = PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / (*DestinationLinesToRequestVMInVBlank * LineTime);
1034 } else {
1035 prefetch_vm_bw = 0;
1036 MyError = true;
1037 }
1038 if (MetaRowByte + PixelPTEBytesPerRow == 0) {
1039 prefetch_row_bw = 0;
1040 } else if (*DestinationLinesToRequestRowInVBlank > 0) {
1041 prefetch_row_bw = (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInVBlank * LineTime);
1042 } else {
1043 prefetch_row_bw = 0;
1044 MyError = true;
1045 }
1046
1047 *prefetch_vmrow_bw = dml_max(prefetch_vm_bw, prefetch_row_bw);
1048 }
1049
1050 if (MyError) {
1051 *PrefetchBandwidth = 0;
1052 *DestinationLinesToRequestVMInVBlank = 0;
1053 *DestinationLinesToRequestRowInVBlank = 0;
1054 *DestinationLinesForPrefetch = 0;
1055 *VRatioPrefetchY = 0;
1056 *VRatioPrefetchC = 0;
1057 *RequiredPrefetchPixDataBWLuma = 0;
1058 *RequiredPrefetchPixDataBWChroma = 0;
1059 }
1060
1061 return MyError;
1062 }
1063
RoundToDFSGranularityUp(double Clock,double VCOSpeed)1064 static double RoundToDFSGranularityUp(double Clock, double VCOSpeed)
1065 {
1066 return VCOSpeed * 4 / dml_floor(VCOSpeed * 4 / Clock, 1);
1067 }
1068
RoundToDFSGranularityDown(double Clock,double VCOSpeed)1069 static double RoundToDFSGranularityDown(double Clock, double VCOSpeed)
1070 {
1071 return VCOSpeed * 4 / dml_ceil(VCOSpeed * 4 / Clock, 1);
1072 }
1073
CalculateDCCConfiguration(bool DCCEnabled,bool DCCProgrammingAssumesScanDirectionUnknown,unsigned int ViewportWidth,unsigned int ViewportHeight,unsigned int DETBufferSize,unsigned int RequestHeight256Byte,unsigned int SwathHeight,enum dm_swizzle_mode TilingFormat,unsigned int BytePerPixel,enum scan_direction_class ScanOrientation,unsigned int * MaxUncompressedBlock,unsigned int * MaxCompressedBlock,unsigned int * Independent64ByteBlock)1074 static double CalculateDCCConfiguration(
1075 bool DCCEnabled,
1076 bool DCCProgrammingAssumesScanDirectionUnknown,
1077 unsigned int ViewportWidth,
1078 unsigned int ViewportHeight,
1079 unsigned int DETBufferSize,
1080 unsigned int RequestHeight256Byte,
1081 unsigned int SwathHeight,
1082 enum dm_swizzle_mode TilingFormat,
1083 unsigned int BytePerPixel,
1084 enum scan_direction_class ScanOrientation,
1085 unsigned int *MaxUncompressedBlock,
1086 unsigned int *MaxCompressedBlock,
1087 unsigned int *Independent64ByteBlock)
1088 {
1089 double MaximumDCCCompressionSurface = 0.0;
1090 enum {
1091 REQ_256Bytes,
1092 REQ_128BytesNonContiguous,
1093 REQ_128BytesContiguous,
1094 REQ_NA
1095 } Request = REQ_NA;
1096
1097 if (DCCEnabled == true) {
1098 if (DCCProgrammingAssumesScanDirectionUnknown == true) {
1099 if (DETBufferSize >= RequestHeight256Byte * ViewportWidth * BytePerPixel
1100 && DETBufferSize
1101 >= 256 / RequestHeight256Byte
1102 * ViewportHeight) {
1103 Request = REQ_256Bytes;
1104 } else if ((DETBufferSize
1105 < RequestHeight256Byte * ViewportWidth * BytePerPixel
1106 && (BytePerPixel == 2 || BytePerPixel == 4))
1107 || (DETBufferSize
1108 < 256 / RequestHeight256Byte
1109 * ViewportHeight
1110 && BytePerPixel == 8
1111 && (TilingFormat == dm_sw_4kb_d
1112 || TilingFormat
1113 == dm_sw_4kb_d_x
1114 || TilingFormat
1115 == dm_sw_var_d
1116 || TilingFormat
1117 == dm_sw_var_d_x
1118 || TilingFormat
1119 == dm_sw_64kb_d
1120 || TilingFormat
1121 == dm_sw_64kb_d_x
1122 || TilingFormat
1123 == dm_sw_64kb_d_t
1124 || TilingFormat
1125 == dm_sw_64kb_r_x))) {
1126 Request = REQ_128BytesNonContiguous;
1127 } else {
1128 Request = REQ_128BytesContiguous;
1129 }
1130 } else {
1131 if (BytePerPixel == 1) {
1132 if (ScanOrientation == dm_vert || SwathHeight == 16) {
1133 Request = REQ_256Bytes;
1134 } else {
1135 Request = REQ_128BytesContiguous;
1136 }
1137 } else if (BytePerPixel == 2) {
1138 if ((ScanOrientation == dm_vert && SwathHeight == 16) || (ScanOrientation != dm_vert && SwathHeight == 8)) {
1139 Request = REQ_256Bytes;
1140 } else if (ScanOrientation == dm_vert) {
1141 Request = REQ_128BytesContiguous;
1142 } else {
1143 Request = REQ_128BytesNonContiguous;
1144 }
1145 } else if (BytePerPixel == 4) {
1146 if (SwathHeight == 8) {
1147 Request = REQ_256Bytes;
1148 } else if (ScanOrientation == dm_vert) {
1149 Request = REQ_128BytesContiguous;
1150 } else {
1151 Request = REQ_128BytesNonContiguous;
1152 }
1153 } else if (BytePerPixel == 8) {
1154 if (TilingFormat == dm_sw_4kb_d || TilingFormat == dm_sw_4kb_d_x
1155 || TilingFormat == dm_sw_var_d
1156 || TilingFormat == dm_sw_var_d_x
1157 || TilingFormat == dm_sw_64kb_d
1158 || TilingFormat == dm_sw_64kb_d_x
1159 || TilingFormat == dm_sw_64kb_d_t
1160 || TilingFormat == dm_sw_64kb_r_x) {
1161 if ((ScanOrientation == dm_vert && SwathHeight == 8)
1162 || (ScanOrientation != dm_vert
1163 && SwathHeight == 4)) {
1164 Request = REQ_256Bytes;
1165 } else if (ScanOrientation != dm_vert) {
1166 Request = REQ_128BytesContiguous;
1167 } else {
1168 Request = REQ_128BytesNonContiguous;
1169 }
1170 } else {
1171 if (ScanOrientation != dm_vert || SwathHeight == 8) {
1172 Request = REQ_256Bytes;
1173 } else {
1174 Request = REQ_128BytesContiguous;
1175 }
1176 }
1177 }
1178 }
1179 } else {
1180 Request = REQ_NA;
1181 }
1182
1183 if (Request == REQ_256Bytes) {
1184 *MaxUncompressedBlock = 256;
1185 *MaxCompressedBlock = 256;
1186 *Independent64ByteBlock = false;
1187 MaximumDCCCompressionSurface = 4.0;
1188 } else if (Request == REQ_128BytesContiguous) {
1189 *MaxUncompressedBlock = 128;
1190 *MaxCompressedBlock = 128;
1191 *Independent64ByteBlock = false;
1192 MaximumDCCCompressionSurface = 2.0;
1193 } else if (Request == REQ_128BytesNonContiguous) {
1194 *MaxUncompressedBlock = 256;
1195 *MaxCompressedBlock = 64;
1196 *Independent64ByteBlock = true;
1197 MaximumDCCCompressionSurface = 4.0;
1198 } else {
1199 *MaxUncompressedBlock = 0;
1200 *MaxCompressedBlock = 0;
1201 *Independent64ByteBlock = 0;
1202 MaximumDCCCompressionSurface = 0.0;
1203 }
1204
1205 return MaximumDCCCompressionSurface;
1206 }
1207
CalculatePrefetchSourceLines(struct display_mode_lib * mode_lib,double VRatio,double vtaps,bool Interlace,bool ProgressiveToInterlaceUnitInOPP,unsigned int SwathHeight,unsigned int ViewportYStart,double * VInitPreFill,unsigned int * MaxNumSwath)1208 static double CalculatePrefetchSourceLines(
1209 struct display_mode_lib *mode_lib,
1210 double VRatio,
1211 double vtaps,
1212 bool Interlace,
1213 bool ProgressiveToInterlaceUnitInOPP,
1214 unsigned int SwathHeight,
1215 unsigned int ViewportYStart,
1216 double *VInitPreFill,
1217 unsigned int *MaxNumSwath)
1218 {
1219 unsigned int MaxPartialSwath;
1220
1221 if (ProgressiveToInterlaceUnitInOPP)
1222 *VInitPreFill = dml_floor((VRatio + vtaps + 1) / 2.0, 1);
1223 else
1224 *VInitPreFill = dml_floor((VRatio + vtaps + 1 + Interlace * 0.5 * VRatio) / 2.0, 1);
1225
1226 if (!mode_lib->vba.IgnoreViewportPositioning) {
1227
1228 *MaxNumSwath = dml_ceil((*VInitPreFill - 1.0) / SwathHeight, 1) + 1.0;
1229
1230 if (*VInitPreFill > 1.0)
1231 MaxPartialSwath = (unsigned int) (*VInitPreFill - 2) % SwathHeight;
1232 else
1233 MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 2)
1234 % SwathHeight;
1235 MaxPartialSwath = dml_max(1U, MaxPartialSwath);
1236
1237 } else {
1238
1239 if (ViewportYStart != 0)
1240 dml_print(
1241 "WARNING DML: using viewport y position of 0 even though actual viewport y position is non-zero in prefetch source lines calculation\n");
1242
1243 *MaxNumSwath = dml_ceil(*VInitPreFill / SwathHeight, 1);
1244
1245 if (*VInitPreFill > 1.0)
1246 MaxPartialSwath = (unsigned int) (*VInitPreFill - 1) % SwathHeight;
1247 else
1248 MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 1)
1249 % SwathHeight;
1250 }
1251
1252 return *MaxNumSwath * SwathHeight + MaxPartialSwath;
1253 }
1254
CalculateVMAndRowBytes(struct display_mode_lib * mode_lib,bool DCCEnable,unsigned int BlockHeight256Bytes,unsigned int BlockWidth256Bytes,enum source_format_class SourcePixelFormat,unsigned int SurfaceTiling,unsigned int BytePerPixel,enum scan_direction_class ScanDirection,unsigned int ViewportWidth,unsigned int ViewportHeight,unsigned int SwathWidth,bool GPUVMEnable,bool HostVMEnable,unsigned int HostVMMaxPageTableLevels,unsigned int HostVMCachedPageTableLevels,unsigned int VMMPageSize,unsigned int PTEBufferSizeInRequests,unsigned int Pitch,unsigned int DCCMetaPitch,unsigned int * MacroTileWidth,unsigned int * MetaRowByte,unsigned int * PixelPTEBytesPerRow,bool * PTEBufferSizeNotExceeded,unsigned int * dpte_row_width_ub,unsigned int * dpte_row_height,unsigned int * MetaRequestWidth,unsigned int * MetaRequestHeight,unsigned int * meta_row_width,unsigned int * meta_row_height,unsigned int * vm_group_bytes,unsigned int * dpte_group_bytes,unsigned int * PixelPTEReqWidth,unsigned int * PixelPTEReqHeight,unsigned int * PTERequestSize,unsigned int * DPDE0BytesFrame,unsigned int * MetaPTEBytesFrame)1255 static unsigned int CalculateVMAndRowBytes(
1256 struct display_mode_lib *mode_lib,
1257 bool DCCEnable,
1258 unsigned int BlockHeight256Bytes,
1259 unsigned int BlockWidth256Bytes,
1260 enum source_format_class SourcePixelFormat,
1261 unsigned int SurfaceTiling,
1262 unsigned int BytePerPixel,
1263 enum scan_direction_class ScanDirection,
1264 unsigned int ViewportWidth,
1265 unsigned int ViewportHeight,
1266 unsigned int SwathWidth,
1267 bool GPUVMEnable,
1268 bool HostVMEnable,
1269 unsigned int HostVMMaxPageTableLevels,
1270 unsigned int HostVMCachedPageTableLevels,
1271 unsigned int VMMPageSize,
1272 unsigned int PTEBufferSizeInRequests,
1273 unsigned int Pitch,
1274 unsigned int DCCMetaPitch,
1275 unsigned int *MacroTileWidth,
1276 unsigned int *MetaRowByte,
1277 unsigned int *PixelPTEBytesPerRow,
1278 bool *PTEBufferSizeNotExceeded,
1279 unsigned int *dpte_row_width_ub,
1280 unsigned int *dpte_row_height,
1281 unsigned int *MetaRequestWidth,
1282 unsigned int *MetaRequestHeight,
1283 unsigned int *meta_row_width,
1284 unsigned int *meta_row_height,
1285 unsigned int *vm_group_bytes,
1286 unsigned int *dpte_group_bytes,
1287 unsigned int *PixelPTEReqWidth,
1288 unsigned int *PixelPTEReqHeight,
1289 unsigned int *PTERequestSize,
1290 unsigned int *DPDE0BytesFrame,
1291 unsigned int *MetaPTEBytesFrame)
1292 {
1293 unsigned int MPDEBytesFrame;
1294 unsigned int DCCMetaSurfaceBytes;
1295 unsigned int MacroTileSizeBytes;
1296 unsigned int MacroTileHeight;
1297 unsigned int ExtraDPDEBytesFrame;
1298 unsigned int PDEAndMetaPTEBytesFrame;
1299 unsigned int PixelPTEReqHeightPTEs = 0;
1300
1301 if (DCCEnable == true) {
1302 *MetaRequestHeight = 8 * BlockHeight256Bytes;
1303 *MetaRequestWidth = 8 * BlockWidth256Bytes;
1304 if (ScanDirection == dm_horz) {
1305 *meta_row_height = *MetaRequestHeight;
1306 *meta_row_width = dml_ceil((double) SwathWidth - 1, *MetaRequestWidth)
1307 + *MetaRequestWidth;
1308 *MetaRowByte = *meta_row_width * *MetaRequestHeight * BytePerPixel / 256.0;
1309 } else {
1310 *meta_row_height = *MetaRequestWidth;
1311 *meta_row_width = dml_ceil((double) SwathWidth - 1, *MetaRequestHeight)
1312 + *MetaRequestHeight;
1313 *MetaRowByte = *meta_row_width * *MetaRequestWidth * BytePerPixel / 256.0;
1314 }
1315 if (ScanDirection == dm_horz) {
1316 DCCMetaSurfaceBytes = DCCMetaPitch
1317 * (dml_ceil(ViewportHeight - 1, 64 * BlockHeight256Bytes)
1318 + 64 * BlockHeight256Bytes) * BytePerPixel
1319 / 256;
1320 } else {
1321 DCCMetaSurfaceBytes = DCCMetaPitch
1322 * (dml_ceil(
1323 (double) ViewportHeight - 1,
1324 64 * BlockHeight256Bytes)
1325 + 64 * BlockHeight256Bytes) * BytePerPixel
1326 / 256;
1327 }
1328 if (GPUVMEnable == true) {
1329 *MetaPTEBytesFrame = (dml_ceil(
1330 (double) (DCCMetaSurfaceBytes - VMMPageSize)
1331 / (8 * VMMPageSize),
1332 1) + 1) * 64;
1333 MPDEBytesFrame = 128 * ((mode_lib->vba.GPUVMMaxPageTableLevels + 1) * (mode_lib->vba.HostVMMaxPageTableLevels + 1) - 2);
1334 } else {
1335 *MetaPTEBytesFrame = 0;
1336 MPDEBytesFrame = 0;
1337 }
1338 } else {
1339 *MetaPTEBytesFrame = 0;
1340 MPDEBytesFrame = 0;
1341 *MetaRowByte = 0;
1342 }
1343
1344 if (SurfaceTiling == dm_sw_linear || SurfaceTiling == dm_sw_gfx7_2d_thin_gl || SurfaceTiling == dm_sw_gfx7_2d_thin_l_vp) {
1345 MacroTileSizeBytes = 256;
1346 MacroTileHeight = BlockHeight256Bytes;
1347 } else if (SurfaceTiling == dm_sw_4kb_s || SurfaceTiling == dm_sw_4kb_s_x
1348 || SurfaceTiling == dm_sw_4kb_d || SurfaceTiling == dm_sw_4kb_d_x) {
1349 MacroTileSizeBytes = 4096;
1350 MacroTileHeight = 4 * BlockHeight256Bytes;
1351 } else if (SurfaceTiling == dm_sw_64kb_s || SurfaceTiling == dm_sw_64kb_s_t
1352 || SurfaceTiling == dm_sw_64kb_s_x || SurfaceTiling == dm_sw_64kb_d
1353 || SurfaceTiling == dm_sw_64kb_d_t || SurfaceTiling == dm_sw_64kb_d_x
1354 || SurfaceTiling == dm_sw_64kb_r_x) {
1355 MacroTileSizeBytes = 65536;
1356 MacroTileHeight = 16 * BlockHeight256Bytes;
1357 } else {
1358 MacroTileSizeBytes = 262144;
1359 MacroTileHeight = 32 * BlockHeight256Bytes;
1360 }
1361 *MacroTileWidth = MacroTileSizeBytes / BytePerPixel / MacroTileHeight;
1362
1363 if (GPUVMEnable == true && (mode_lib->vba.GPUVMMaxPageTableLevels + 1) * (mode_lib->vba.HostVMMaxPageTableLevels + 1) > 2) {
1364 if (ScanDirection == dm_horz) {
1365 *DPDE0BytesFrame = 64 * (dml_ceil(((Pitch * (dml_ceil(ViewportHeight - 1, MacroTileHeight) + MacroTileHeight) * BytePerPixel) - MacroTileSizeBytes) / (8 * 2097152), 1) + 1);
1366 } else {
1367 *DPDE0BytesFrame = 64 * (dml_ceil(((Pitch * (dml_ceil((double) SwathWidth - 1, MacroTileHeight) + MacroTileHeight) * BytePerPixel) - MacroTileSizeBytes) / (8 * 2097152), 1) + 1);
1368 }
1369 ExtraDPDEBytesFrame = 128 * ((mode_lib->vba.GPUVMMaxPageTableLevels + 1) * (mode_lib->vba.HostVMMaxPageTableLevels + 1) - 3);
1370 } else {
1371 *DPDE0BytesFrame = 0;
1372 ExtraDPDEBytesFrame = 0;
1373 }
1374
1375 PDEAndMetaPTEBytesFrame = *MetaPTEBytesFrame + MPDEBytesFrame + *DPDE0BytesFrame
1376 + ExtraDPDEBytesFrame;
1377
1378 if (HostVMEnable == true) {
1379 PDEAndMetaPTEBytesFrame = PDEAndMetaPTEBytesFrame * (1 + 8 * (HostVMMaxPageTableLevels - HostVMCachedPageTableLevels));
1380 }
1381
1382 if (GPUVMEnable == true) {
1383 double FractionOfPTEReturnDrop;
1384
1385 if (SurfaceTiling == dm_sw_linear) {
1386 PixelPTEReqHeightPTEs = 1;
1387 *PixelPTEReqHeight = 1;
1388 *PixelPTEReqWidth = 8.0 * VMMPageSize / BytePerPixel;
1389 *PTERequestSize = 64;
1390 FractionOfPTEReturnDrop = 0;
1391 } else if (MacroTileSizeBytes == 4096) {
1392 PixelPTEReqHeightPTEs = 1;
1393 *PixelPTEReqHeight = MacroTileHeight;
1394 *PixelPTEReqWidth = 8 * *MacroTileWidth;
1395 *PTERequestSize = 64;
1396 if (ScanDirection == dm_horz)
1397 FractionOfPTEReturnDrop = 0;
1398 else
1399 FractionOfPTEReturnDrop = 7.0 / 8;
1400 } else if (VMMPageSize == 4096 && MacroTileSizeBytes > 4096) {
1401 PixelPTEReqHeightPTEs = 16;
1402 *PixelPTEReqHeight = 16 * BlockHeight256Bytes;
1403 *PixelPTEReqWidth = 16 * BlockWidth256Bytes;
1404 *PTERequestSize = 128;
1405 FractionOfPTEReturnDrop = 0;
1406 } else {
1407 PixelPTEReqHeightPTEs = 1;
1408 *PixelPTEReqHeight = MacroTileHeight;
1409 *PixelPTEReqWidth = 8 * *MacroTileWidth;
1410 *PTERequestSize = 64;
1411 FractionOfPTEReturnDrop = 0;
1412 }
1413
1414 if (SurfaceTiling == dm_sw_linear) {
1415 *dpte_row_height = dml_min(128,
1416 1 << (unsigned int) dml_floor(
1417 dml_log2(
1418 (double) PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch),
1419 1));
1420 *dpte_row_width_ub = (dml_ceil((double) (Pitch * *dpte_row_height - 1) / *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth;
1421 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize;
1422 } else if (ScanDirection == dm_horz) {
1423 *dpte_row_height = *PixelPTEReqHeight;
1424 *dpte_row_width_ub = (dml_ceil((double) (SwathWidth - 1) / *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth;
1425 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize;
1426 } else {
1427 *dpte_row_height = dml_min(*PixelPTEReqWidth, *MacroTileWidth);
1428 *dpte_row_width_ub = (dml_ceil((double) (SwathWidth - 1) / *PixelPTEReqHeight, 1) + 1) * *PixelPTEReqHeight;
1429 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqHeight * *PTERequestSize;
1430 }
1431 if (*PixelPTEBytesPerRow * (1 - FractionOfPTEReturnDrop)
1432 <= 64 * PTEBufferSizeInRequests) {
1433 *PTEBufferSizeNotExceeded = true;
1434 } else {
1435 *PTEBufferSizeNotExceeded = false;
1436 }
1437 } else {
1438 *PixelPTEBytesPerRow = 0;
1439 *PTEBufferSizeNotExceeded = true;
1440 }
1441 dml_print("DML: vm_bytes = meta_pte_bytes_per_frame (per_pipe) = MetaPTEBytesFrame = : %d\n", *MetaPTEBytesFrame);
1442
1443 if (HostVMEnable == true) {
1444 *PixelPTEBytesPerRow = *PixelPTEBytesPerRow * (1 + 8 * (HostVMMaxPageTableLevels - HostVMCachedPageTableLevels));
1445 }
1446
1447 if (HostVMEnable == true) {
1448 *vm_group_bytes = 512;
1449 *dpte_group_bytes = 512;
1450 } else if (GPUVMEnable == true) {
1451 *vm_group_bytes = 2048;
1452 if (SurfaceTiling != dm_sw_linear && PixelPTEReqHeightPTEs == 1 && ScanDirection != dm_horz) {
1453 *dpte_group_bytes = 512;
1454 } else {
1455 *dpte_group_bytes = 2048;
1456 }
1457 } else {
1458 *vm_group_bytes = 0;
1459 *dpte_group_bytes = 0;
1460 }
1461
1462 return PDEAndMetaPTEBytesFrame;
1463 }
1464
DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(struct display_mode_lib * mode_lib)1465 static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(
1466 struct display_mode_lib *mode_lib)
1467 {
1468 struct vba_vars_st *locals = &mode_lib->vba;
1469 unsigned int j, k;
1470
1471 mode_lib->vba.WritebackDISPCLK = 0.0;
1472 mode_lib->vba.DISPCLKWithRamping = 0;
1473 mode_lib->vba.DISPCLKWithoutRamping = 0;
1474 mode_lib->vba.GlobalDPPCLK = 0.0;
1475
1476 // DISPCLK and DPPCLK Calculation
1477 //
1478 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
1479 if (mode_lib->vba.WritebackEnable[k]) {
1480 mode_lib->vba.WritebackDISPCLK =
1481 dml_max(
1482 mode_lib->vba.WritebackDISPCLK,
1483 CalculateWriteBackDISPCLK(
1484 mode_lib->vba.WritebackPixelFormat[k],
1485 mode_lib->vba.PixelClock[k],
1486 mode_lib->vba.WritebackHRatio[k],
1487 mode_lib->vba.WritebackVRatio[k],
1488 mode_lib->vba.WritebackLumaHTaps[k],
1489 mode_lib->vba.WritebackLumaVTaps[k],
1490 mode_lib->vba.WritebackChromaHTaps[k],
1491 mode_lib->vba.WritebackChromaVTaps[k],
1492 mode_lib->vba.WritebackDestinationWidth[k],
1493 mode_lib->vba.HTotal[k],
1494 mode_lib->vba.WritebackChromaLineBufferWidth));
1495 }
1496 }
1497
1498 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
1499 if (mode_lib->vba.HRatio[k] > 1) {
1500 locals->PSCL_THROUGHPUT_LUMA[k] = dml_min(
1501 mode_lib->vba.MaxDCHUBToPSCLThroughput,
1502 mode_lib->vba.MaxPSCLToLBThroughput
1503 * mode_lib->vba.HRatio[k]
1504 / dml_ceil(
1505 mode_lib->vba.htaps[k]
1506 / 6.0,
1507 1));
1508 } else {
1509 locals->PSCL_THROUGHPUT_LUMA[k] = dml_min(
1510 mode_lib->vba.MaxDCHUBToPSCLThroughput,
1511 mode_lib->vba.MaxPSCLToLBThroughput);
1512 }
1513
1514 mode_lib->vba.DPPCLKUsingSingleDPPLuma =
1515 mode_lib->vba.PixelClock[k]
1516 * dml_max(
1517 mode_lib->vba.vtaps[k] / 6.0
1518 * dml_min(
1519 1.0,
1520 mode_lib->vba.HRatio[k]),
1521 dml_max(
1522 mode_lib->vba.HRatio[k]
1523 * mode_lib->vba.VRatio[k]
1524 / locals->PSCL_THROUGHPUT_LUMA[k],
1525 1.0));
1526
1527 if ((mode_lib->vba.htaps[k] > 6 || mode_lib->vba.vtaps[k] > 6)
1528 && mode_lib->vba.DPPCLKUsingSingleDPPLuma
1529 < 2 * mode_lib->vba.PixelClock[k]) {
1530 mode_lib->vba.DPPCLKUsingSingleDPPLuma = 2 * mode_lib->vba.PixelClock[k];
1531 }
1532
1533 if ((mode_lib->vba.SourcePixelFormat[k] != dm_420_8
1534 && mode_lib->vba.SourcePixelFormat[k] != dm_420_10)) {
1535 locals->PSCL_THROUGHPUT_CHROMA[k] = 0.0;
1536 locals->DPPCLKUsingSingleDPP[k] =
1537 mode_lib->vba.DPPCLKUsingSingleDPPLuma;
1538 } else {
1539 if (mode_lib->vba.HRatio[k] > 1) {
1540 locals->PSCL_THROUGHPUT_CHROMA[k] =
1541 dml_min(
1542 mode_lib->vba.MaxDCHUBToPSCLThroughput,
1543 mode_lib->vba.MaxPSCLToLBThroughput
1544 * mode_lib->vba.HRatio[k]
1545 / 2
1546 / dml_ceil(
1547 mode_lib->vba.HTAPsChroma[k]
1548 / 6.0,
1549 1.0));
1550 } else {
1551 locals->PSCL_THROUGHPUT_CHROMA[k] = dml_min(
1552 mode_lib->vba.MaxDCHUBToPSCLThroughput,
1553 mode_lib->vba.MaxPSCLToLBThroughput);
1554 }
1555 mode_lib->vba.DPPCLKUsingSingleDPPChroma =
1556 mode_lib->vba.PixelClock[k]
1557 * dml_max(
1558 mode_lib->vba.VTAPsChroma[k]
1559 / 6.0
1560 * dml_min(
1561 1.0,
1562 mode_lib->vba.HRatio[k]
1563 / 2),
1564 dml_max(
1565 mode_lib->vba.HRatio[k]
1566 * mode_lib->vba.VRatio[k]
1567 / 4
1568 / locals->PSCL_THROUGHPUT_CHROMA[k],
1569 1.0));
1570
1571 if ((mode_lib->vba.HTAPsChroma[k] > 6 || mode_lib->vba.VTAPsChroma[k] > 6)
1572 && mode_lib->vba.DPPCLKUsingSingleDPPChroma
1573 < 2 * mode_lib->vba.PixelClock[k]) {
1574 mode_lib->vba.DPPCLKUsingSingleDPPChroma = 2
1575 * mode_lib->vba.PixelClock[k];
1576 }
1577
1578 locals->DPPCLKUsingSingleDPP[k] = dml_max(
1579 mode_lib->vba.DPPCLKUsingSingleDPPLuma,
1580 mode_lib->vba.DPPCLKUsingSingleDPPChroma);
1581 }
1582 }
1583
1584 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
1585 if (mode_lib->vba.BlendingAndTiming[k] != k)
1586 continue;
1587 if (mode_lib->vba.ODMCombineEnabled[k]) {
1588 mode_lib->vba.DISPCLKWithRamping =
1589 dml_max(
1590 mode_lib->vba.DISPCLKWithRamping,
1591 mode_lib->vba.PixelClock[k] / 2
1592 * (1
1593 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading
1594 / 100)
1595 * (1
1596 + mode_lib->vba.DISPCLKRampingMargin
1597 / 100));
1598 mode_lib->vba.DISPCLKWithoutRamping =
1599 dml_max(
1600 mode_lib->vba.DISPCLKWithoutRamping,
1601 mode_lib->vba.PixelClock[k] / 2
1602 * (1
1603 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading
1604 / 100));
1605 } else if (!mode_lib->vba.ODMCombineEnabled[k]) {
1606 mode_lib->vba.DISPCLKWithRamping =
1607 dml_max(
1608 mode_lib->vba.DISPCLKWithRamping,
1609 mode_lib->vba.PixelClock[k]
1610 * (1
1611 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading
1612 / 100)
1613 * (1
1614 + mode_lib->vba.DISPCLKRampingMargin
1615 / 100));
1616 mode_lib->vba.DISPCLKWithoutRamping =
1617 dml_max(
1618 mode_lib->vba.DISPCLKWithoutRamping,
1619 mode_lib->vba.PixelClock[k]
1620 * (1
1621 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading
1622 / 100));
1623 }
1624 }
1625
1626 mode_lib->vba.DISPCLKWithRamping = dml_max(
1627 mode_lib->vba.DISPCLKWithRamping,
1628 mode_lib->vba.WritebackDISPCLK);
1629 mode_lib->vba.DISPCLKWithoutRamping = dml_max(
1630 mode_lib->vba.DISPCLKWithoutRamping,
1631 mode_lib->vba.WritebackDISPCLK);
1632
1633 ASSERT(mode_lib->vba.DISPCLKDPPCLKVCOSpeed != 0);
1634 mode_lib->vba.DISPCLKWithRampingRoundedToDFSGranularity = RoundToDFSGranularityUp(
1635 mode_lib->vba.DISPCLKWithRamping,
1636 mode_lib->vba.DISPCLKDPPCLKVCOSpeed);
1637 mode_lib->vba.DISPCLKWithoutRampingRoundedToDFSGranularity = RoundToDFSGranularityUp(
1638 mode_lib->vba.DISPCLKWithoutRamping,
1639 mode_lib->vba.DISPCLKDPPCLKVCOSpeed);
1640 mode_lib->vba.MaxDispclkRoundedToDFSGranularity = RoundToDFSGranularityDown(
1641 mode_lib->vba.soc.clock_limits[mode_lib->vba.soc.num_states - 1].dispclk_mhz,
1642 mode_lib->vba.DISPCLKDPPCLKVCOSpeed);
1643 if (mode_lib->vba.DISPCLKWithoutRampingRoundedToDFSGranularity
1644 > mode_lib->vba.MaxDispclkRoundedToDFSGranularity) {
1645 mode_lib->vba.DISPCLK_calculated =
1646 mode_lib->vba.DISPCLKWithoutRampingRoundedToDFSGranularity;
1647 } else if (mode_lib->vba.DISPCLKWithRampingRoundedToDFSGranularity
1648 > mode_lib->vba.MaxDispclkRoundedToDFSGranularity) {
1649 mode_lib->vba.DISPCLK_calculated = mode_lib->vba.MaxDispclkRoundedToDFSGranularity;
1650 } else {
1651 mode_lib->vba.DISPCLK_calculated =
1652 mode_lib->vba.DISPCLKWithRampingRoundedToDFSGranularity;
1653 }
1654 DTRACE(" dispclk_mhz (calculated) = %f", mode_lib->vba.DISPCLK_calculated);
1655
1656 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
1657 mode_lib->vba.DPPCLK_calculated[k] = locals->DPPCLKUsingSingleDPP[k]
1658 / mode_lib->vba.DPPPerPlane[k]
1659 * (1 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100);
1660 mode_lib->vba.GlobalDPPCLK = dml_max(
1661 mode_lib->vba.GlobalDPPCLK,
1662 mode_lib->vba.DPPCLK_calculated[k]);
1663 }
1664 mode_lib->vba.GlobalDPPCLK = RoundToDFSGranularityUp(
1665 mode_lib->vba.GlobalDPPCLK,
1666 mode_lib->vba.DISPCLKDPPCLKVCOSpeed);
1667 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
1668 mode_lib->vba.DPPCLK_calculated[k] = mode_lib->vba.GlobalDPPCLK / 255
1669 * dml_ceil(
1670 mode_lib->vba.DPPCLK_calculated[k] * 255
1671 / mode_lib->vba.GlobalDPPCLK,
1672 1);
1673 DTRACE(" dppclk_mhz[%i] (calculated) = %f", k, mode_lib->vba.DPPCLK_calculated[k]);
1674 }
1675
1676 // Urgent and B P-State/DRAM Clock Change Watermark
1677 DTRACE(" dcfclk_mhz = %f", mode_lib->vba.DCFCLK);
1678 DTRACE(" return_bw_to_dcn = %f", mode_lib->vba.ReturnBandwidthToDCN);
1679 DTRACE(" return_bus_bw = %f", mode_lib->vba.ReturnBW);
1680
1681 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
1682 bool MainPlaneDoesODMCombine = false;
1683
1684 if (mode_lib->vba.SourceScan[k] == dm_horz)
1685 locals->SwathWidthSingleDPPY[k] = mode_lib->vba.ViewportWidth[k];
1686 else
1687 locals->SwathWidthSingleDPPY[k] = mode_lib->vba.ViewportHeight[k];
1688
1689 if (mode_lib->vba.ODMCombineEnabled[k] == dm_odm_combine_mode_2to1)
1690 MainPlaneDoesODMCombine = true;
1691 for (j = 0; j < mode_lib->vba.NumberOfActivePlanes; ++j)
1692 if (mode_lib->vba.BlendingAndTiming[k] == j
1693 && mode_lib->vba.ODMCombineEnabled[k] == dm_odm_combine_mode_2to1)
1694 MainPlaneDoesODMCombine = true;
1695
1696 if (MainPlaneDoesODMCombine == true)
1697 locals->SwathWidthY[k] = dml_min(
1698 (double) locals->SwathWidthSingleDPPY[k],
1699 dml_round(
1700 mode_lib->vba.HActive[k] / 2.0
1701 * mode_lib->vba.HRatio[k]));
1702 else
1703 locals->SwathWidthY[k] = locals->SwathWidthSingleDPPY[k]
1704 / mode_lib->vba.DPPPerPlane[k];
1705 }
1706
1707 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
1708 if (mode_lib->vba.SourcePixelFormat[k] == dm_444_64) {
1709 locals->BytePerPixelDETY[k] = 8;
1710 locals->BytePerPixelDETC[k] = 0;
1711 } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_32) {
1712 locals->BytePerPixelDETY[k] = 4;
1713 locals->BytePerPixelDETC[k] = 0;
1714 } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_16 || mode_lib->vba.SourcePixelFormat[k] == dm_mono_16) {
1715 locals->BytePerPixelDETY[k] = 2;
1716 locals->BytePerPixelDETC[k] = 0;
1717 } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_8 || mode_lib->vba.SourcePixelFormat[k] == dm_mono_8) {
1718 locals->BytePerPixelDETY[k] = 1;
1719 locals->BytePerPixelDETC[k] = 0;
1720 } else if (mode_lib->vba.SourcePixelFormat[k] == dm_420_8) {
1721 locals->BytePerPixelDETY[k] = 1;
1722 locals->BytePerPixelDETC[k] = 2;
1723 } else { // dm_420_10
1724 locals->BytePerPixelDETY[k] = 4.0 / 3.0;
1725 locals->BytePerPixelDETC[k] = 8.0 / 3.0;
1726 }
1727 }
1728
1729 mode_lib->vba.TotalDataReadBandwidth = 0.0;
1730 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
1731 locals->ReadBandwidthPlaneLuma[k] = locals->SwathWidthSingleDPPY[k]
1732 * dml_ceil(locals->BytePerPixelDETY[k], 1)
1733 / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k])
1734 * mode_lib->vba.VRatio[k];
1735 locals->ReadBandwidthPlaneChroma[k] = locals->SwathWidthSingleDPPY[k]
1736 / 2 * dml_ceil(locals->BytePerPixelDETC[k], 2)
1737 / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k])
1738 * mode_lib->vba.VRatio[k] / 2;
1739 DTRACE(
1740 " read_bw[%i] = %fBps",
1741 k,
1742 locals->ReadBandwidthPlaneLuma[k]
1743 + locals->ReadBandwidthPlaneChroma[k]);
1744 mode_lib->vba.TotalDataReadBandwidth += locals->ReadBandwidthPlaneLuma[k]
1745 + locals->ReadBandwidthPlaneChroma[k];
1746 }
1747
1748 // DCFCLK Deep Sleep
1749 CalculateDCFCLKDeepSleep(
1750 mode_lib,
1751 mode_lib->vba.NumberOfActivePlanes,
1752 locals->BytePerPixelDETY,
1753 locals->BytePerPixelDETC,
1754 mode_lib->vba.VRatio,
1755 locals->SwathWidthY,
1756 mode_lib->vba.DPPPerPlane,
1757 mode_lib->vba.HRatio,
1758 mode_lib->vba.PixelClock,
1759 locals->PSCL_THROUGHPUT_LUMA,
1760 locals->PSCL_THROUGHPUT_CHROMA,
1761 locals->DPPCLK,
1762 &mode_lib->vba.DCFCLKDeepSleep);
1763
1764 // DSCCLK
1765 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
1766 if ((mode_lib->vba.BlendingAndTiming[k] != k) || !mode_lib->vba.DSCEnabled[k]) {
1767 locals->DSCCLK_calculated[k] = 0.0;
1768 } else {
1769 if (mode_lib->vba.OutputFormat[k] == dm_420
1770 || mode_lib->vba.OutputFormat[k] == dm_n422)
1771 mode_lib->vba.DSCFormatFactor = 2;
1772 else
1773 mode_lib->vba.DSCFormatFactor = 1;
1774 if (mode_lib->vba.ODMCombineEnabled[k])
1775 locals->DSCCLK_calculated[k] =
1776 mode_lib->vba.PixelClockBackEnd[k] / 6
1777 / mode_lib->vba.DSCFormatFactor
1778 / (1
1779 - mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading
1780 / 100);
1781 else
1782 locals->DSCCLK_calculated[k] =
1783 mode_lib->vba.PixelClockBackEnd[k] / 3
1784 / mode_lib->vba.DSCFormatFactor
1785 / (1
1786 - mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading
1787 / 100);
1788 }
1789 }
1790
1791 // DSC Delay
1792 // TODO
1793 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
1794 double bpp = mode_lib->vba.OutputBpp[k];
1795 unsigned int slices = mode_lib->vba.NumberOfDSCSlices[k];
1796
1797 if (mode_lib->vba.DSCEnabled[k] && bpp != 0) {
1798 if (!mode_lib->vba.ODMCombineEnabled[k]) {
1799 locals->DSCDelay[k] =
1800 dscceComputeDelay(
1801 mode_lib->vba.DSCInputBitPerComponent[k],
1802 bpp,
1803 dml_ceil(
1804 (double) mode_lib->vba.HActive[k]
1805 / mode_lib->vba.NumberOfDSCSlices[k],
1806 1),
1807 slices,
1808 mode_lib->vba.OutputFormat[k])
1809 + dscComputeDelay(
1810 mode_lib->vba.OutputFormat[k]);
1811 } else {
1812 locals->DSCDelay[k] =
1813 2
1814 * (dscceComputeDelay(
1815 mode_lib->vba.DSCInputBitPerComponent[k],
1816 bpp,
1817 dml_ceil(
1818 (double) mode_lib->vba.HActive[k]
1819 / mode_lib->vba.NumberOfDSCSlices[k],
1820 1),
1821 slices / 2.0,
1822 mode_lib->vba.OutputFormat[k])
1823 + dscComputeDelay(
1824 mode_lib->vba.OutputFormat[k]));
1825 }
1826 locals->DSCDelay[k] = locals->DSCDelay[k]
1827 * mode_lib->vba.PixelClock[k]
1828 / mode_lib->vba.PixelClockBackEnd[k];
1829 } else {
1830 locals->DSCDelay[k] = 0;
1831 }
1832 }
1833
1834 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k)
1835 for (j = 0; j < mode_lib->vba.NumberOfActivePlanes; ++j) // NumberOfPlanes
1836 if (j != k && mode_lib->vba.BlendingAndTiming[k] == j
1837 && mode_lib->vba.DSCEnabled[j])
1838 locals->DSCDelay[k] = locals->DSCDelay[j];
1839
1840 // Prefetch
1841 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
1842 unsigned int PDEAndMetaPTEBytesFrameY;
1843 unsigned int PixelPTEBytesPerRowY;
1844 unsigned int MetaRowByteY;
1845 unsigned int MetaRowByteC;
1846 unsigned int PDEAndMetaPTEBytesFrameC;
1847 unsigned int PixelPTEBytesPerRowC;
1848 bool PTEBufferSizeNotExceededY;
1849 bool PTEBufferSizeNotExceededC;
1850
1851 Calculate256BBlockSizes(
1852 mode_lib->vba.SourcePixelFormat[k],
1853 mode_lib->vba.SurfaceTiling[k],
1854 dml_ceil(locals->BytePerPixelDETY[k], 1),
1855 dml_ceil(locals->BytePerPixelDETC[k], 2),
1856 &locals->BlockHeight256BytesY[k],
1857 &locals->BlockHeight256BytesC[k],
1858 &locals->BlockWidth256BytesY[k],
1859 &locals->BlockWidth256BytesC[k]);
1860
1861 locals->PrefetchSourceLinesY[k] = CalculatePrefetchSourceLines(
1862 mode_lib,
1863 mode_lib->vba.VRatio[k],
1864 mode_lib->vba.vtaps[k],
1865 mode_lib->vba.Interlace[k],
1866 mode_lib->vba.ProgressiveToInterlaceUnitInOPP,
1867 mode_lib->vba.SwathHeightY[k],
1868 mode_lib->vba.ViewportYStartY[k],
1869 &locals->VInitPreFillY[k],
1870 &locals->MaxNumSwathY[k]);
1871
1872 if ((mode_lib->vba.SourcePixelFormat[k] != dm_444_64
1873 && mode_lib->vba.SourcePixelFormat[k] != dm_444_32
1874 && mode_lib->vba.SourcePixelFormat[k] != dm_444_16
1875 && mode_lib->vba.SourcePixelFormat[k] != dm_444_8)) {
1876 PDEAndMetaPTEBytesFrameC =
1877 CalculateVMAndRowBytes(
1878 mode_lib,
1879 mode_lib->vba.DCCEnable[k],
1880 locals->BlockHeight256BytesC[k],
1881 locals->BlockWidth256BytesC[k],
1882 mode_lib->vba.SourcePixelFormat[k],
1883 mode_lib->vba.SurfaceTiling[k],
1884 dml_ceil(
1885 locals->BytePerPixelDETC[k],
1886 2),
1887 mode_lib->vba.SourceScan[k],
1888 mode_lib->vba.ViewportWidth[k] / 2,
1889 mode_lib->vba.ViewportHeight[k] / 2,
1890 locals->SwathWidthY[k] / 2,
1891 mode_lib->vba.GPUVMEnable,
1892 mode_lib->vba.HostVMEnable,
1893 mode_lib->vba.HostVMMaxPageTableLevels,
1894 mode_lib->vba.HostVMCachedPageTableLevels,
1895 mode_lib->vba.VMMPageSize,
1896 mode_lib->vba.PTEBufferSizeInRequestsChroma,
1897 mode_lib->vba.PitchC[k],
1898 mode_lib->vba.DCCMetaPitchC[k],
1899 &locals->MacroTileWidthC[k],
1900 &MetaRowByteC,
1901 &PixelPTEBytesPerRowC,
1902 &PTEBufferSizeNotExceededC,
1903 &locals->dpte_row_width_chroma_ub[k],
1904 &locals->dpte_row_height_chroma[k],
1905 &locals->meta_req_width_chroma[k],
1906 &locals->meta_req_height_chroma[k],
1907 &locals->meta_row_width_chroma[k],
1908 &locals->meta_row_height_chroma[k],
1909 &locals->vm_group_bytes_chroma,
1910 &locals->dpte_group_bytes_chroma,
1911 &locals->PixelPTEReqWidthC[k],
1912 &locals->PixelPTEReqHeightC[k],
1913 &locals->PTERequestSizeC[k],
1914 &locals->dpde0_bytes_per_frame_ub_c[k],
1915 &locals->meta_pte_bytes_per_frame_ub_c[k]);
1916
1917 locals->PrefetchSourceLinesC[k] = CalculatePrefetchSourceLines(
1918 mode_lib,
1919 mode_lib->vba.VRatio[k] / 2,
1920 mode_lib->vba.VTAPsChroma[k],
1921 mode_lib->vba.Interlace[k],
1922 mode_lib->vba.ProgressiveToInterlaceUnitInOPP,
1923 mode_lib->vba.SwathHeightC[k],
1924 mode_lib->vba.ViewportYStartC[k],
1925 &locals->VInitPreFillC[k],
1926 &locals->MaxNumSwathC[k]);
1927 } else {
1928 PixelPTEBytesPerRowC = 0;
1929 PDEAndMetaPTEBytesFrameC = 0;
1930 MetaRowByteC = 0;
1931 locals->MaxNumSwathC[k] = 0;
1932 locals->PrefetchSourceLinesC[k] = 0;
1933 locals->PTEBufferSizeInRequestsForLuma = mode_lib->vba.PTEBufferSizeInRequestsLuma + mode_lib->vba.PTEBufferSizeInRequestsChroma;
1934 }
1935
1936 PDEAndMetaPTEBytesFrameY = CalculateVMAndRowBytes(
1937 mode_lib,
1938 mode_lib->vba.DCCEnable[k],
1939 locals->BlockHeight256BytesY[k],
1940 locals->BlockWidth256BytesY[k],
1941 mode_lib->vba.SourcePixelFormat[k],
1942 mode_lib->vba.SurfaceTiling[k],
1943 dml_ceil(locals->BytePerPixelDETY[k], 1),
1944 mode_lib->vba.SourceScan[k],
1945 mode_lib->vba.ViewportWidth[k],
1946 mode_lib->vba.ViewportHeight[k],
1947 locals->SwathWidthY[k],
1948 mode_lib->vba.GPUVMEnable,
1949 mode_lib->vba.HostVMEnable,
1950 mode_lib->vba.HostVMMaxPageTableLevels,
1951 mode_lib->vba.HostVMCachedPageTableLevels,
1952 mode_lib->vba.VMMPageSize,
1953 locals->PTEBufferSizeInRequestsForLuma,
1954 mode_lib->vba.PitchY[k],
1955 mode_lib->vba.DCCMetaPitchY[k],
1956 &locals->MacroTileWidthY[k],
1957 &MetaRowByteY,
1958 &PixelPTEBytesPerRowY,
1959 &PTEBufferSizeNotExceededY,
1960 &locals->dpte_row_width_luma_ub[k],
1961 &locals->dpte_row_height[k],
1962 &locals->meta_req_width[k],
1963 &locals->meta_req_height[k],
1964 &locals->meta_row_width[k],
1965 &locals->meta_row_height[k],
1966 &locals->vm_group_bytes[k],
1967 &locals->dpte_group_bytes[k],
1968 &locals->PixelPTEReqWidthY[k],
1969 &locals->PixelPTEReqHeightY[k],
1970 &locals->PTERequestSizeY[k],
1971 &locals->dpde0_bytes_per_frame_ub_l[k],
1972 &locals->meta_pte_bytes_per_frame_ub_l[k]);
1973
1974 locals->PixelPTEBytesPerRow[k] = PixelPTEBytesPerRowY + PixelPTEBytesPerRowC;
1975 locals->PDEAndMetaPTEBytesFrame[k] = PDEAndMetaPTEBytesFrameY
1976 + PDEAndMetaPTEBytesFrameC;
1977 locals->MetaRowByte[k] = MetaRowByteY + MetaRowByteC;
1978
1979 CalculateActiveRowBandwidth(
1980 mode_lib->vba.GPUVMEnable,
1981 mode_lib->vba.SourcePixelFormat[k],
1982 mode_lib->vba.VRatio[k],
1983 mode_lib->vba.DCCEnable[k],
1984 mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k],
1985 MetaRowByteY,
1986 MetaRowByteC,
1987 locals->meta_row_height[k],
1988 locals->meta_row_height_chroma[k],
1989 PixelPTEBytesPerRowY,
1990 PixelPTEBytesPerRowC,
1991 locals->dpte_row_height[k],
1992 locals->dpte_row_height_chroma[k],
1993 &locals->meta_row_bw[k],
1994 &locals->dpte_row_bw[k]);
1995 }
1996
1997 mode_lib->vba.TotalDCCActiveDPP = 0;
1998 mode_lib->vba.TotalActiveDPP = 0;
1999 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
2000 mode_lib->vba.TotalActiveDPP = mode_lib->vba.TotalActiveDPP
2001 + mode_lib->vba.DPPPerPlane[k];
2002 if (mode_lib->vba.DCCEnable[k])
2003 mode_lib->vba.TotalDCCActiveDPP = mode_lib->vba.TotalDCCActiveDPP
2004 + mode_lib->vba.DPPPerPlane[k];
2005 }
2006
2007 mode_lib->vba.UrgentOutOfOrderReturnPerChannel = dml_max3(
2008 mode_lib->vba.UrgentOutOfOrderReturnPerChannelPixelDataOnly,
2009 mode_lib->vba.UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData,
2010 mode_lib->vba.UrgentOutOfOrderReturnPerChannelVMDataOnly);
2011
2012 mode_lib->vba.UrgentRoundTripAndOutOfOrderLatency =
2013 (mode_lib->vba.RoundTripPingLatencyCycles + 32) / mode_lib->vba.DCFCLK
2014 + mode_lib->vba.UrgentOutOfOrderReturnPerChannel
2015 * mode_lib->vba.NumberOfChannels
2016 / mode_lib->vba.ReturnBW;
2017
2018 mode_lib->vba.UrgentExtraLatency = CalculateExtraLatency(
2019 mode_lib->vba.UrgentRoundTripAndOutOfOrderLatency,
2020 mode_lib->vba.TotalActiveDPP,
2021 mode_lib->vba.PixelChunkSizeInKByte,
2022 mode_lib->vba.TotalDCCActiveDPP,
2023 mode_lib->vba.MetaChunkSize,
2024 mode_lib->vba.ReturnBW,
2025 mode_lib->vba.GPUVMEnable,
2026 mode_lib->vba.HostVMEnable,
2027 mode_lib->vba.NumberOfActivePlanes,
2028 mode_lib->vba.DPPPerPlane,
2029 locals->dpte_group_bytes,
2030 mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
2031 mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
2032 mode_lib->vba.HostVMMaxPageTableLevels,
2033 mode_lib->vba.HostVMCachedPageTableLevels);
2034
2035
2036 mode_lib->vba.TCalc = 24.0 / mode_lib->vba.DCFCLKDeepSleep;
2037
2038 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
2039 if (mode_lib->vba.BlendingAndTiming[k] == k) {
2040 if (mode_lib->vba.WritebackEnable[k] == true) {
2041 locals->WritebackDelay[mode_lib->vba.VoltageLevel][k] =
2042 mode_lib->vba.WritebackLatency
2043 + CalculateWriteBackDelay(
2044 mode_lib->vba.WritebackPixelFormat[k],
2045 mode_lib->vba.WritebackHRatio[k],
2046 mode_lib->vba.WritebackVRatio[k],
2047 mode_lib->vba.WritebackLumaHTaps[k],
2048 mode_lib->vba.WritebackLumaVTaps[k],
2049 mode_lib->vba.WritebackChromaHTaps[k],
2050 mode_lib->vba.WritebackChromaVTaps[k],
2051 mode_lib->vba.WritebackDestinationWidth[k])
2052 / mode_lib->vba.DISPCLK;
2053 } else
2054 locals->WritebackDelay[mode_lib->vba.VoltageLevel][k] = 0;
2055 for (j = 0; j < mode_lib->vba.NumberOfActivePlanes; ++j) {
2056 if (mode_lib->vba.BlendingAndTiming[j] == k
2057 && mode_lib->vba.WritebackEnable[j] == true) {
2058 locals->WritebackDelay[mode_lib->vba.VoltageLevel][k] =
2059 dml_max(
2060 locals->WritebackDelay[mode_lib->vba.VoltageLevel][k],
2061 mode_lib->vba.WritebackLatency
2062 + CalculateWriteBackDelay(
2063 mode_lib->vba.WritebackPixelFormat[j],
2064 mode_lib->vba.WritebackHRatio[j],
2065 mode_lib->vba.WritebackVRatio[j],
2066 mode_lib->vba.WritebackLumaHTaps[j],
2067 mode_lib->vba.WritebackLumaVTaps[j],
2068 mode_lib->vba.WritebackChromaHTaps[j],
2069 mode_lib->vba.WritebackChromaVTaps[j],
2070 mode_lib->vba.WritebackDestinationWidth[j])
2071 / mode_lib->vba.DISPCLK);
2072 }
2073 }
2074 }
2075 }
2076
2077 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k)
2078 for (j = 0; j < mode_lib->vba.NumberOfActivePlanes; ++j)
2079 if (mode_lib->vba.BlendingAndTiming[k] == j)
2080 locals->WritebackDelay[mode_lib->vba.VoltageLevel][k] =
2081 locals->WritebackDelay[mode_lib->vba.VoltageLevel][j];
2082
2083 mode_lib->vba.VStartupLines = 13;
2084 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
2085 locals->MaxVStartupLines[k] = mode_lib->vba.VTotal[k] - mode_lib->vba.VActive[k] - dml_max(1.0, dml_ceil(locals->WritebackDelay[mode_lib->vba.VoltageLevel][k] / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]), 1));
2086 }
2087
2088 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k)
2089 locals->MaximumMaxVStartupLines = dml_max(locals->MaximumMaxVStartupLines, locals->MaxVStartupLines[k]);
2090
2091 // We don't really care to iterate between the various prefetch modes
2092 //mode_lib->vba.PrefetchERROR = CalculateMinAndMaxPrefetchMode(mode_lib->vba.AllowDRAMSelfRefreshOrDRAMClockChangeInVblank, &mode_lib->vba.MinPrefetchMode, &mode_lib->vba.MaxPrefetchMode);
2093 mode_lib->vba.UrgentLatency = dml_max3(mode_lib->vba.UrgentLatencyPixelDataOnly, mode_lib->vba.UrgentLatencyPixelMixedWithVMData, mode_lib->vba.UrgentLatencyVMDataOnly);
2094
2095 do {
2096 double MaxTotalRDBandwidth = 0;
2097 double MaxTotalRDBandwidthNoUrgentBurst = 0;
2098 bool DestinationLineTimesForPrefetchLessThan2 = false;
2099 bool VRatioPrefetchMoreThan4 = false;
2100 double TWait = CalculateTWait(
2101 mode_lib->vba.PrefetchMode[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb],
2102 mode_lib->vba.DRAMClockChangeLatency,
2103 mode_lib->vba.UrgentLatency,
2104 mode_lib->vba.SREnterPlusExitTime);
2105
2106 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
2107 Pipe myPipe;
2108 HostVM myHostVM;
2109
2110 if (mode_lib->vba.XFCEnabled[k] == true) {
2111 mode_lib->vba.XFCRemoteSurfaceFlipDelay =
2112 CalculateRemoteSurfaceFlipDelay(
2113 mode_lib,
2114 mode_lib->vba.VRatio[k],
2115 locals->SwathWidthY[k],
2116 dml_ceil(
2117 locals->BytePerPixelDETY[k],
2118 1),
2119 mode_lib->vba.HTotal[k]
2120 / mode_lib->vba.PixelClock[k],
2121 mode_lib->vba.XFCTSlvVupdateOffset,
2122 mode_lib->vba.XFCTSlvVupdateWidth,
2123 mode_lib->vba.XFCTSlvVreadyOffset,
2124 mode_lib->vba.XFCXBUFLatencyTolerance,
2125 mode_lib->vba.XFCFillBWOverhead,
2126 mode_lib->vba.XFCSlvChunkSize,
2127 mode_lib->vba.XFCBusTransportTime,
2128 mode_lib->vba.TCalc,
2129 TWait,
2130 &mode_lib->vba.SrcActiveDrainRate,
2131 &mode_lib->vba.TInitXFill,
2132 &mode_lib->vba.TslvChk);
2133 } else {
2134 mode_lib->vba.XFCRemoteSurfaceFlipDelay = 0;
2135 }
2136
2137 myPipe.DPPCLK = locals->DPPCLK[k];
2138 myPipe.DISPCLK = mode_lib->vba.DISPCLK;
2139 myPipe.PixelClock = mode_lib->vba.PixelClock[k];
2140 myPipe.DCFCLKDeepSleep = mode_lib->vba.DCFCLKDeepSleep;
2141 myPipe.DPPPerPlane = mode_lib->vba.DPPPerPlane[k];
2142 myPipe.ScalerEnabled = mode_lib->vba.ScalerEnabled[k];
2143 myPipe.SourceScan = mode_lib->vba.SourceScan[k];
2144 myPipe.BlockWidth256BytesY = locals->BlockWidth256BytesY[k];
2145 myPipe.BlockHeight256BytesY = locals->BlockHeight256BytesY[k];
2146 myPipe.BlockWidth256BytesC = locals->BlockWidth256BytesC[k];
2147 myPipe.BlockHeight256BytesC = locals->BlockHeight256BytesC[k];
2148 myPipe.InterlaceEnable = mode_lib->vba.Interlace[k];
2149 myPipe.NumberOfCursors = mode_lib->vba.NumberOfCursors[k];
2150 myPipe.VBlank = mode_lib->vba.VTotal[k] - mode_lib->vba.VActive[k];
2151 myPipe.HTotal = mode_lib->vba.HTotal[k];
2152
2153
2154 myHostVM.Enable = mode_lib->vba.HostVMEnable;
2155 myHostVM.MaxPageTableLevels = mode_lib->vba.HostVMMaxPageTableLevels;
2156 myHostVM.CachedPageTableLevels = mode_lib->vba.HostVMCachedPageTableLevels;
2157
2158 mode_lib->vba.ErrorResult[k] =
2159 CalculatePrefetchSchedule(
2160 mode_lib,
2161 mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
2162 mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
2163 &myPipe,
2164 locals->DSCDelay[k],
2165 mode_lib->vba.DPPCLKDelaySubtotal,
2166 mode_lib->vba.DPPCLKDelaySCL,
2167 mode_lib->vba.DPPCLKDelaySCLLBOnly,
2168 mode_lib->vba.DPPCLKDelayCNVCFormater,
2169 mode_lib->vba.DPPCLKDelayCNVCCursor,
2170 mode_lib->vba.DISPCLKDelaySubtotal,
2171 (unsigned int) (locals->SwathWidthY[k]
2172 / mode_lib->vba.HRatio[k]),
2173 mode_lib->vba.OutputFormat[k],
2174 mode_lib->vba.MaxInterDCNTileRepeaters,
2175 dml_min(mode_lib->vba.VStartupLines, locals->MaxVStartupLines[k]),
2176 locals->MaxVStartupLines[k],
2177 mode_lib->vba.GPUVMMaxPageTableLevels,
2178 mode_lib->vba.GPUVMEnable,
2179 &myHostVM,
2180 mode_lib->vba.DynamicMetadataEnable[k],
2181 mode_lib->vba.DynamicMetadataLinesBeforeActiveRequired[k],
2182 mode_lib->vba.DynamicMetadataTransmittedBytes[k],
2183 mode_lib->vba.DCCEnable[k],
2184 mode_lib->vba.UrgentLatency,
2185 mode_lib->vba.UrgentExtraLatency,
2186 mode_lib->vba.TCalc,
2187 locals->PDEAndMetaPTEBytesFrame[k],
2188 locals->MetaRowByte[k],
2189 locals->PixelPTEBytesPerRow[k],
2190 locals->PrefetchSourceLinesY[k],
2191 locals->SwathWidthY[k],
2192 locals->BytePerPixelDETY[k],
2193 locals->VInitPreFillY[k],
2194 locals->MaxNumSwathY[k],
2195 locals->PrefetchSourceLinesC[k],
2196 locals->BytePerPixelDETC[k],
2197 locals->VInitPreFillC[k],
2198 locals->MaxNumSwathC[k],
2199 mode_lib->vba.SwathHeightY[k],
2200 mode_lib->vba.SwathHeightC[k],
2201 TWait,
2202 mode_lib->vba.XFCEnabled[k],
2203 mode_lib->vba.XFCRemoteSurfaceFlipDelay,
2204 mode_lib->vba.ProgressiveToInterlaceUnitInOPP,
2205 &locals->DSTXAfterScaler[k],
2206 &locals->DSTYAfterScaler[k],
2207 &locals->DestinationLinesForPrefetch[k],
2208 &locals->PrefetchBandwidth[k],
2209 &locals->DestinationLinesToRequestVMInVBlank[k],
2210 &locals->DestinationLinesToRequestRowInVBlank[k],
2211 &locals->VRatioPrefetchY[k],
2212 &locals->VRatioPrefetchC[k],
2213 &locals->RequiredPrefetchPixDataBWLuma[k],
2214 &locals->RequiredPrefetchPixDataBWChroma[k],
2215 &locals->VStartupRequiredWhenNotEnoughTimeForDynamicMetadata,
2216 &locals->Tno_bw[k],
2217 &locals->prefetch_vmrow_bw[k],
2218 &locals->swath_width_luma_ub[k],
2219 &locals->swath_width_chroma_ub[k],
2220 &mode_lib->vba.VUpdateOffsetPix[k],
2221 &mode_lib->vba.VUpdateWidthPix[k],
2222 &mode_lib->vba.VReadyOffsetPix[k]);
2223 if (mode_lib->vba.BlendingAndTiming[k] == k) {
2224 locals->VStartup[k] = dml_min(
2225 mode_lib->vba.VStartupLines,
2226 locals->MaxVStartupLines[k]);
2227 if (locals->VStartupRequiredWhenNotEnoughTimeForDynamicMetadata
2228 != 0) {
2229 locals->VStartup[k] =
2230 locals->VStartupRequiredWhenNotEnoughTimeForDynamicMetadata;
2231 }
2232 } else {
2233 locals->VStartup[k] =
2234 dml_min(
2235 mode_lib->vba.VStartupLines,
2236 locals->MaxVStartupLines[mode_lib->vba.BlendingAndTiming[k]]);
2237 }
2238 }
2239
2240 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
2241 unsigned int m;
2242
2243 locals->cursor_bw[k] = 0;
2244 locals->cursor_bw_pre[k] = 0;
2245 for (m = 0; m < mode_lib->vba.NumberOfCursors[k]; m++) {
2246 locals->cursor_bw[k] += mode_lib->vba.CursorWidth[k][m] * mode_lib->vba.CursorBPP[k][m] / 8.0 / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) * mode_lib->vba.VRatio[k];
2247 locals->cursor_bw_pre[k] += mode_lib->vba.CursorWidth[k][m] * mode_lib->vba.CursorBPP[k][m] / 8.0 / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) * locals->VRatioPrefetchY[k];
2248 }
2249
2250 CalculateUrgentBurstFactor(
2251 mode_lib->vba.DETBufferSizeInKByte[0],
2252 mode_lib->vba.SwathHeightY[k],
2253 mode_lib->vba.SwathHeightC[k],
2254 locals->SwathWidthY[k],
2255 mode_lib->vba.HTotal[k] /
2256 mode_lib->vba.PixelClock[k],
2257 mode_lib->vba.UrgentLatency,
2258 mode_lib->vba.CursorBufferSize,
2259 mode_lib->vba.CursorWidth[k][0] + mode_lib->vba.CursorWidth[k][1],
2260 dml_max(mode_lib->vba.CursorBPP[k][0], mode_lib->vba.CursorBPP[k][1]),
2261 mode_lib->vba.VRatio[k],
2262 locals->VRatioPrefetchY[k],
2263 locals->VRatioPrefetchC[k],
2264 locals->BytePerPixelDETY[k],
2265 locals->BytePerPixelDETC[k],
2266 &locals->UrgentBurstFactorCursor[k],
2267 &locals->UrgentBurstFactorCursorPre[k],
2268 &locals->UrgentBurstFactorLuma[k],
2269 &locals->UrgentBurstFactorLumaPre[k],
2270 &locals->UrgentBurstFactorChroma[k],
2271 &locals->UrgentBurstFactorChromaPre[k],
2272 &locals->NotEnoughUrgentLatencyHiding[0][0],
2273 &locals->NotEnoughUrgentLatencyHidingPre);
2274
2275 if (mode_lib->vba.UseUrgentBurstBandwidth == false) {
2276 locals->UrgentBurstFactorLuma[k] = 1;
2277 locals->UrgentBurstFactorChroma[k] = 1;
2278 locals->UrgentBurstFactorCursor[k] = 1;
2279 locals->UrgentBurstFactorLumaPre[k] = 1;
2280 locals->UrgentBurstFactorChromaPre[k] = 1;
2281 locals->UrgentBurstFactorCursorPre[k] = 1;
2282 }
2283
2284 MaxTotalRDBandwidth = MaxTotalRDBandwidth +
2285 dml_max3(locals->prefetch_vmrow_bw[k],
2286 locals->ReadBandwidthPlaneLuma[k] * locals->UrgentBurstFactorLuma[k]
2287 + locals->ReadBandwidthPlaneChroma[k] * locals->UrgentBurstFactorChroma[k] + locals->cursor_bw[k]
2288 * locals->UrgentBurstFactorCursor[k] + locals->meta_row_bw[k] + locals->dpte_row_bw[k],
2289 locals->RequiredPrefetchPixDataBWLuma[k] * locals->UrgentBurstFactorLumaPre[k] + locals->RequiredPrefetchPixDataBWChroma[k]
2290 * locals->UrgentBurstFactorChromaPre[k] + locals->cursor_bw_pre[k] * locals->UrgentBurstFactorCursorPre[k]);
2291
2292 MaxTotalRDBandwidthNoUrgentBurst = MaxTotalRDBandwidthNoUrgentBurst +
2293 dml_max3(locals->prefetch_vmrow_bw[k],
2294 locals->ReadBandwidthPlaneLuma[k] + locals->ReadBandwidthPlaneChroma[k] + locals->cursor_bw[k]
2295 + locals->meta_row_bw[k] + locals->dpte_row_bw[k],
2296 locals->RequiredPrefetchPixDataBWLuma[k] + locals->RequiredPrefetchPixDataBWChroma[k] + locals->cursor_bw_pre[k]);
2297
2298 if (locals->DestinationLinesForPrefetch[k] < 2)
2299 DestinationLineTimesForPrefetchLessThan2 = true;
2300 if (locals->VRatioPrefetchY[k] > 4 || locals->VRatioPrefetchC[k] > 4)
2301 VRatioPrefetchMoreThan4 = true;
2302 }
2303 mode_lib->vba.FractionOfUrgentBandwidth = MaxTotalRDBandwidthNoUrgentBurst / mode_lib->vba.ReturnBW;
2304
2305 if (MaxTotalRDBandwidth <= mode_lib->vba.ReturnBW && locals->NotEnoughUrgentLatencyHiding[0][0] == 0 &&
2306 locals->NotEnoughUrgentLatencyHidingPre == 0 && !VRatioPrefetchMoreThan4
2307 && !DestinationLineTimesForPrefetchLessThan2)
2308 mode_lib->vba.PrefetchModeSupported = true;
2309 else {
2310 mode_lib->vba.PrefetchModeSupported = false;
2311 dml_print(
2312 "DML: CalculatePrefetchSchedule ***failed***. Bandwidth violation. Results are NOT valid\n");
2313 }
2314
2315 if (mode_lib->vba.PrefetchModeSupported == true) {
2316 mode_lib->vba.BandwidthAvailableForImmediateFlip = mode_lib->vba.ReturnBW;
2317 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
2318 mode_lib->vba.BandwidthAvailableForImmediateFlip =
2319 mode_lib->vba.BandwidthAvailableForImmediateFlip
2320 - dml_max(
2321 locals->ReadBandwidthPlaneLuma[k] * locals->UrgentBurstFactorLuma[k]
2322 + locals->ReadBandwidthPlaneChroma[k] * locals->UrgentBurstFactorChroma[k]
2323 + locals->cursor_bw[k] * locals->UrgentBurstFactorCursor[k],
2324 locals->RequiredPrefetchPixDataBWLuma[k] * locals->UrgentBurstFactorLumaPre[k] +
2325 locals->RequiredPrefetchPixDataBWChroma[k] * locals->UrgentBurstFactorChromaPre[k] +
2326 locals->cursor_bw_pre[k] * locals->UrgentBurstFactorCursorPre[k]);
2327 }
2328
2329 mode_lib->vba.TotImmediateFlipBytes = 0;
2330 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
2331 mode_lib->vba.TotImmediateFlipBytes = mode_lib->vba.TotImmediateFlipBytes + locals->PDEAndMetaPTEBytesFrame[k] + locals->MetaRowByte[k] + locals->PixelPTEBytesPerRow[k];
2332 }
2333 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
2334 CalculateFlipSchedule(
2335 mode_lib,
2336 mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
2337 mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
2338 mode_lib->vba.UrgentExtraLatency,
2339 mode_lib->vba.UrgentLatency,
2340 mode_lib->vba.GPUVMMaxPageTableLevels,
2341 mode_lib->vba.HostVMEnable,
2342 mode_lib->vba.HostVMMaxPageTableLevels,
2343 mode_lib->vba.HostVMCachedPageTableLevels,
2344 mode_lib->vba.GPUVMEnable,
2345 locals->PDEAndMetaPTEBytesFrame[k],
2346 locals->MetaRowByte[k],
2347 locals->PixelPTEBytesPerRow[k],
2348 mode_lib->vba.BandwidthAvailableForImmediateFlip,
2349 mode_lib->vba.TotImmediateFlipBytes,
2350 mode_lib->vba.SourcePixelFormat[k],
2351 mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k],
2352 mode_lib->vba.VRatio[k],
2353 locals->Tno_bw[k],
2354 mode_lib->vba.DCCEnable[k],
2355 locals->dpte_row_height[k],
2356 locals->meta_row_height[k],
2357 locals->dpte_row_height_chroma[k],
2358 locals->meta_row_height_chroma[k],
2359 &locals->DestinationLinesToRequestVMInImmediateFlip[k],
2360 &locals->DestinationLinesToRequestRowInImmediateFlip[k],
2361 &locals->final_flip_bw[k],
2362 &locals->ImmediateFlipSupportedForPipe[k]);
2363 }
2364 mode_lib->vba.total_dcn_read_bw_with_flip = 0.0;
2365 mode_lib->vba.total_dcn_read_bw_with_flip_no_urgent_burst = 0.0;
2366 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
2367 mode_lib->vba.total_dcn_read_bw_with_flip =
2368 mode_lib->vba.total_dcn_read_bw_with_flip + dml_max3(
2369 locals->prefetch_vmrow_bw[k],
2370 locals->final_flip_bw[k] + locals->ReadBandwidthLuma[k] * locals->UrgentBurstFactorLuma[k]
2371 + locals->ReadBandwidthChroma[k] * locals->UrgentBurstFactorChroma[k] + locals->cursor_bw[k] * locals->UrgentBurstFactorCursor[k],
2372 locals->final_flip_bw[k] + locals->RequiredPrefetchPixDataBWLuma[k] * locals->UrgentBurstFactorLumaPre[k]
2373 + locals->RequiredPrefetchPixDataBWChroma[k] * locals->UrgentBurstFactorChromaPre[k]
2374 + locals->cursor_bw_pre[k] * locals->UrgentBurstFactorCursorPre[k]);
2375 mode_lib->vba.total_dcn_read_bw_with_flip_no_urgent_burst =
2376 mode_lib->vba.total_dcn_read_bw_with_flip_no_urgent_burst +
2377 dml_max3(locals->prefetch_vmrow_bw[k],
2378 locals->final_flip_bw[k] + locals->ReadBandwidthPlaneLuma[k] + locals->ReadBandwidthPlaneChroma[k] + locals->cursor_bw[k],
2379 locals->final_flip_bw[k] + locals->RequiredPrefetchPixDataBWLuma[k] + locals->RequiredPrefetchPixDataBWChroma[k] + locals->cursor_bw_pre[k]);
2380
2381 }
2382 mode_lib->vba.FractionOfUrgentBandwidthImmediateFlip = mode_lib->vba.total_dcn_read_bw_with_flip_no_urgent_burst / mode_lib->vba.ReturnBW;
2383
2384 mode_lib->vba.ImmediateFlipSupported = true;
2385 if (mode_lib->vba.total_dcn_read_bw_with_flip > mode_lib->vba.ReturnBW) {
2386 mode_lib->vba.ImmediateFlipSupported = false;
2387 }
2388 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
2389 if (locals->ImmediateFlipSupportedForPipe[k] == false) {
2390 mode_lib->vba.ImmediateFlipSupported = false;
2391 }
2392 }
2393 } else {
2394 mode_lib->vba.ImmediateFlipSupported = false;
2395 }
2396
2397 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
2398 if (mode_lib->vba.ErrorResult[k]) {
2399 mode_lib->vba.PrefetchModeSupported = false;
2400 dml_print(
2401 "DML: CalculatePrefetchSchedule ***failed***. Prefetch schedule violation. Results are NOT valid\n");
2402 }
2403 }
2404
2405 mode_lib->vba.VStartupLines = mode_lib->vba.VStartupLines + 1;
2406 } while (!((mode_lib->vba.PrefetchModeSupported
2407 && ((!mode_lib->vba.ImmediateFlipSupport && !mode_lib->vba.HostVMEnable)
2408 || mode_lib->vba.ImmediateFlipSupported))
2409 || locals->MaximumMaxVStartupLines < mode_lib->vba.VStartupLines));
2410
2411 //Watermarks and NB P-State/DRAM Clock Change Support
2412 {
2413 enum clock_change_support DRAMClockChangeSupport; // dummy
2414 CalculateWatermarksAndDRAMSpeedChangeSupport(
2415 mode_lib,
2416 mode_lib->vba.PrefetchMode[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb],
2417 mode_lib->vba.NumberOfActivePlanes,
2418 mode_lib->vba.MaxLineBufferLines,
2419 mode_lib->vba.LineBufferSize,
2420 mode_lib->vba.DPPOutputBufferPixels,
2421 mode_lib->vba.DETBufferSizeInKByte[0],
2422 mode_lib->vba.WritebackInterfaceLumaBufferSize,
2423 mode_lib->vba.WritebackInterfaceChromaBufferSize,
2424 mode_lib->vba.DCFCLK,
2425 mode_lib->vba.UrgentOutOfOrderReturnPerChannel * mode_lib->vba.NumberOfChannels,
2426 mode_lib->vba.ReturnBW,
2427 mode_lib->vba.GPUVMEnable,
2428 locals->dpte_group_bytes,
2429 mode_lib->vba.MetaChunkSize,
2430 mode_lib->vba.UrgentLatency,
2431 mode_lib->vba.UrgentExtraLatency,
2432 mode_lib->vba.WritebackLatency,
2433 mode_lib->vba.WritebackChunkSize,
2434 mode_lib->vba.SOCCLK,
2435 mode_lib->vba.DRAMClockChangeLatency,
2436 mode_lib->vba.SRExitTime,
2437 mode_lib->vba.SREnterPlusExitTime,
2438 mode_lib->vba.DCFCLKDeepSleep,
2439 mode_lib->vba.DPPPerPlane,
2440 mode_lib->vba.DCCEnable,
2441 locals->DPPCLK,
2442 locals->SwathWidthSingleDPPY,
2443 mode_lib->vba.SwathHeightY,
2444 locals->ReadBandwidthPlaneLuma,
2445 mode_lib->vba.SwathHeightC,
2446 locals->ReadBandwidthPlaneChroma,
2447 mode_lib->vba.LBBitPerPixel,
2448 locals->SwathWidthY,
2449 mode_lib->vba.HRatio,
2450 mode_lib->vba.vtaps,
2451 mode_lib->vba.VTAPsChroma,
2452 mode_lib->vba.VRatio,
2453 mode_lib->vba.HTotal,
2454 mode_lib->vba.PixelClock,
2455 mode_lib->vba.BlendingAndTiming,
2456 locals->BytePerPixelDETY,
2457 locals->BytePerPixelDETC,
2458 mode_lib->vba.WritebackEnable,
2459 mode_lib->vba.WritebackPixelFormat,
2460 mode_lib->vba.WritebackDestinationWidth,
2461 mode_lib->vba.WritebackDestinationHeight,
2462 mode_lib->vba.WritebackSourceHeight,
2463 &DRAMClockChangeSupport,
2464 &mode_lib->vba.UrgentWatermark,
2465 &mode_lib->vba.WritebackUrgentWatermark,
2466 &mode_lib->vba.DRAMClockChangeWatermark,
2467 &mode_lib->vba.WritebackDRAMClockChangeWatermark,
2468 &mode_lib->vba.StutterExitWatermark,
2469 &mode_lib->vba.StutterEnterPlusExitWatermark,
2470 &mode_lib->vba.MinActiveDRAMClockChangeLatencySupported);
2471 }
2472
2473
2474 //Display Pipeline Delivery Time in Prefetch, Groups
2475 CalculatePixelDeliveryTimes(
2476 mode_lib->vba.NumberOfActivePlanes,
2477 mode_lib->vba.VRatio,
2478 locals->VRatioPrefetchY,
2479 locals->VRatioPrefetchC,
2480 locals->swath_width_luma_ub,
2481 locals->swath_width_chroma_ub,
2482 mode_lib->vba.DPPPerPlane,
2483 mode_lib->vba.HRatio,
2484 mode_lib->vba.PixelClock,
2485 locals->PSCL_THROUGHPUT_LUMA,
2486 locals->PSCL_THROUGHPUT_CHROMA,
2487 locals->DPPCLK,
2488 locals->BytePerPixelDETC,
2489 mode_lib->vba.SourceScan,
2490 locals->BlockWidth256BytesY,
2491 locals->BlockHeight256BytesY,
2492 locals->BlockWidth256BytesC,
2493 locals->BlockHeight256BytesC,
2494 locals->DisplayPipeLineDeliveryTimeLuma,
2495 locals->DisplayPipeLineDeliveryTimeChroma,
2496 locals->DisplayPipeLineDeliveryTimeLumaPrefetch,
2497 locals->DisplayPipeLineDeliveryTimeChromaPrefetch,
2498 locals->DisplayPipeRequestDeliveryTimeLuma,
2499 locals->DisplayPipeRequestDeliveryTimeChroma,
2500 locals->DisplayPipeRequestDeliveryTimeLumaPrefetch,
2501 locals->DisplayPipeRequestDeliveryTimeChromaPrefetch);
2502
2503 CalculateMetaAndPTETimes(
2504 mode_lib->vba.NumberOfActivePlanes,
2505 mode_lib->vba.GPUVMEnable,
2506 mode_lib->vba.MetaChunkSize,
2507 mode_lib->vba.MinMetaChunkSizeBytes,
2508 mode_lib->vba.GPUVMMaxPageTableLevels,
2509 mode_lib->vba.HTotal,
2510 mode_lib->vba.VRatio,
2511 locals->VRatioPrefetchY,
2512 locals->VRatioPrefetchC,
2513 locals->DestinationLinesToRequestRowInVBlank,
2514 locals->DestinationLinesToRequestRowInImmediateFlip,
2515 locals->DestinationLinesToRequestVMInVBlank,
2516 locals->DestinationLinesToRequestVMInImmediateFlip,
2517 mode_lib->vba.DCCEnable,
2518 mode_lib->vba.PixelClock,
2519 locals->BytePerPixelDETY,
2520 locals->BytePerPixelDETC,
2521 mode_lib->vba.SourceScan,
2522 locals->dpte_row_height,
2523 locals->dpte_row_height_chroma,
2524 locals->meta_row_width,
2525 locals->meta_row_height,
2526 locals->meta_req_width,
2527 locals->meta_req_height,
2528 locals->dpte_group_bytes,
2529 locals->PTERequestSizeY,
2530 locals->PTERequestSizeC,
2531 locals->PixelPTEReqWidthY,
2532 locals->PixelPTEReqHeightY,
2533 locals->PixelPTEReqWidthC,
2534 locals->PixelPTEReqHeightC,
2535 locals->dpte_row_width_luma_ub,
2536 locals->dpte_row_width_chroma_ub,
2537 locals->vm_group_bytes,
2538 locals->dpde0_bytes_per_frame_ub_l,
2539 locals->dpde0_bytes_per_frame_ub_c,
2540 locals->meta_pte_bytes_per_frame_ub_l,
2541 locals->meta_pte_bytes_per_frame_ub_c,
2542 locals->DST_Y_PER_PTE_ROW_NOM_L,
2543 locals->DST_Y_PER_PTE_ROW_NOM_C,
2544 locals->DST_Y_PER_META_ROW_NOM_L,
2545 locals->TimePerMetaChunkNominal,
2546 locals->TimePerMetaChunkVBlank,
2547 locals->TimePerMetaChunkFlip,
2548 locals->time_per_pte_group_nom_luma,
2549 locals->time_per_pte_group_vblank_luma,
2550 locals->time_per_pte_group_flip_luma,
2551 locals->time_per_pte_group_nom_chroma,
2552 locals->time_per_pte_group_vblank_chroma,
2553 locals->time_per_pte_group_flip_chroma,
2554 locals->TimePerVMGroupVBlank,
2555 locals->TimePerVMGroupFlip,
2556 locals->TimePerVMRequestVBlank,
2557 locals->TimePerVMRequestFlip);
2558
2559
2560 // Min TTUVBlank
2561 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
2562 if (mode_lib->vba.PrefetchMode[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb] == 0) {
2563 locals->AllowDRAMClockChangeDuringVBlank[k] = true;
2564 locals->AllowDRAMSelfRefreshDuringVBlank[k] = true;
2565 locals->MinTTUVBlank[k] = dml_max(
2566 mode_lib->vba.DRAMClockChangeWatermark,
2567 dml_max(
2568 mode_lib->vba.StutterEnterPlusExitWatermark,
2569 mode_lib->vba.UrgentWatermark));
2570 } else if (mode_lib->vba.PrefetchMode[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb] == 1) {
2571 locals->AllowDRAMClockChangeDuringVBlank[k] = false;
2572 locals->AllowDRAMSelfRefreshDuringVBlank[k] = true;
2573 locals->MinTTUVBlank[k] = dml_max(
2574 mode_lib->vba.StutterEnterPlusExitWatermark,
2575 mode_lib->vba.UrgentWatermark);
2576 } else {
2577 locals->AllowDRAMClockChangeDuringVBlank[k] = false;
2578 locals->AllowDRAMSelfRefreshDuringVBlank[k] = false;
2579 locals->MinTTUVBlank[k] = mode_lib->vba.UrgentWatermark;
2580 }
2581 if (!mode_lib->vba.DynamicMetadataEnable[k])
2582 locals->MinTTUVBlank[k] = mode_lib->vba.TCalc
2583 + locals->MinTTUVBlank[k];
2584 }
2585
2586 // DCC Configuration
2587 mode_lib->vba.ActiveDPPs = 0;
2588 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
2589 locals->MaximumDCCCompressionYSurface[k] = CalculateDCCConfiguration(
2590 mode_lib->vba.DCCEnable[k],
2591 false, // We should always know the direction DCCProgrammingAssumesScanDirectionUnknown,
2592 mode_lib->vba.ViewportWidth[k],
2593 mode_lib->vba.ViewportHeight[k],
2594 mode_lib->vba.DETBufferSizeInKByte[0] * 1024,
2595 locals->BlockHeight256BytesY[k],
2596 mode_lib->vba.SwathHeightY[k],
2597 mode_lib->vba.SurfaceTiling[k],
2598 locals->BytePerPixelDETY[k],
2599 mode_lib->vba.SourceScan[k],
2600 &locals->DCCYMaxUncompressedBlock[k],
2601 &locals->DCCYMaxCompressedBlock[k],
2602 &locals->DCCYIndependent64ByteBlock[k]);
2603 }
2604
2605 //XFC Parameters:
2606 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
2607 if (mode_lib->vba.XFCEnabled[k] == true) {
2608 double TWait;
2609
2610 locals->XFCSlaveVUpdateOffset[k] = mode_lib->vba.XFCTSlvVupdateOffset;
2611 locals->XFCSlaveVupdateWidth[k] = mode_lib->vba.XFCTSlvVupdateWidth;
2612 locals->XFCSlaveVReadyOffset[k] = mode_lib->vba.XFCTSlvVreadyOffset;
2613 TWait = CalculateTWait(
2614 mode_lib->vba.PrefetchMode[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb],
2615 mode_lib->vba.DRAMClockChangeLatency,
2616 mode_lib->vba.UrgentLatency,
2617 mode_lib->vba.SREnterPlusExitTime);
2618 mode_lib->vba.XFCRemoteSurfaceFlipDelay = CalculateRemoteSurfaceFlipDelay(
2619 mode_lib,
2620 mode_lib->vba.VRatio[k],
2621 locals->SwathWidthY[k],
2622 dml_ceil(locals->BytePerPixelDETY[k], 1),
2623 mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k],
2624 mode_lib->vba.XFCTSlvVupdateOffset,
2625 mode_lib->vba.XFCTSlvVupdateWidth,
2626 mode_lib->vba.XFCTSlvVreadyOffset,
2627 mode_lib->vba.XFCXBUFLatencyTolerance,
2628 mode_lib->vba.XFCFillBWOverhead,
2629 mode_lib->vba.XFCSlvChunkSize,
2630 mode_lib->vba.XFCBusTransportTime,
2631 mode_lib->vba.TCalc,
2632 TWait,
2633 &mode_lib->vba.SrcActiveDrainRate,
2634 &mode_lib->vba.TInitXFill,
2635 &mode_lib->vba.TslvChk);
2636 locals->XFCRemoteSurfaceFlipLatency[k] =
2637 dml_floor(
2638 mode_lib->vba.XFCRemoteSurfaceFlipDelay
2639 / (mode_lib->vba.HTotal[k]
2640 / mode_lib->vba.PixelClock[k]),
2641 1);
2642 locals->XFCTransferDelay[k] =
2643 dml_ceil(
2644 mode_lib->vba.XFCBusTransportTime
2645 / (mode_lib->vba.HTotal[k]
2646 / mode_lib->vba.PixelClock[k]),
2647 1);
2648 locals->XFCPrechargeDelay[k] =
2649 dml_ceil(
2650 (mode_lib->vba.XFCBusTransportTime
2651 + mode_lib->vba.TInitXFill
2652 + mode_lib->vba.TslvChk)
2653 / (mode_lib->vba.HTotal[k]
2654 / mode_lib->vba.PixelClock[k]),
2655 1);
2656 mode_lib->vba.InitFillLevel = mode_lib->vba.XFCXBUFLatencyTolerance
2657 * mode_lib->vba.SrcActiveDrainRate;
2658 mode_lib->vba.FinalFillMargin =
2659 (locals->DestinationLinesToRequestVMInVBlank[k]
2660 + locals->DestinationLinesToRequestRowInVBlank[k])
2661 * mode_lib->vba.HTotal[k]
2662 / mode_lib->vba.PixelClock[k]
2663 * mode_lib->vba.SrcActiveDrainRate
2664 + mode_lib->vba.XFCFillConstant;
2665 mode_lib->vba.FinalFillLevel = mode_lib->vba.XFCRemoteSurfaceFlipDelay
2666 * mode_lib->vba.SrcActiveDrainRate
2667 + mode_lib->vba.FinalFillMargin;
2668 mode_lib->vba.RemainingFillLevel = dml_max(
2669 0.0,
2670 mode_lib->vba.FinalFillLevel - mode_lib->vba.InitFillLevel);
2671 mode_lib->vba.TFinalxFill = mode_lib->vba.RemainingFillLevel
2672 / (mode_lib->vba.SrcActiveDrainRate
2673 * mode_lib->vba.XFCFillBWOverhead / 100);
2674 locals->XFCPrefetchMargin[k] =
2675 mode_lib->vba.XFCRemoteSurfaceFlipDelay
2676 + mode_lib->vba.TFinalxFill
2677 + (locals->DestinationLinesToRequestVMInVBlank[k]
2678 + locals->DestinationLinesToRequestRowInVBlank[k])
2679 * mode_lib->vba.HTotal[k]
2680 / mode_lib->vba.PixelClock[k];
2681 } else {
2682 locals->XFCSlaveVUpdateOffset[k] = 0;
2683 locals->XFCSlaveVupdateWidth[k] = 0;
2684 locals->XFCSlaveVReadyOffset[k] = 0;
2685 locals->XFCRemoteSurfaceFlipLatency[k] = 0;
2686 locals->XFCPrechargeDelay[k] = 0;
2687 locals->XFCTransferDelay[k] = 0;
2688 locals->XFCPrefetchMargin[k] = 0;
2689 }
2690 }
2691
2692 // Stutter Efficiency
2693 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
2694 CalculateDETBufferSize(
2695 mode_lib->vba.DETBufferSizeInKByte[0],
2696 mode_lib->vba.SwathHeightY[k],
2697 mode_lib->vba.SwathHeightC[k],
2698 &locals->DETBufferSizeY[k],
2699 &locals->DETBufferSizeC[k]);
2700
2701 locals->LinesInDETY[k] = (double)locals->DETBufferSizeY[k]
2702 / locals->BytePerPixelDETY[k] / locals->SwathWidthY[k];
2703 locals->LinesInDETYRoundedDownToSwath[k] = dml_floor(
2704 locals->LinesInDETY[k],
2705 mode_lib->vba.SwathHeightY[k]);
2706 locals->FullDETBufferingTimeY[k] =
2707 locals->LinesInDETYRoundedDownToSwath[k]
2708 * (mode_lib->vba.HTotal[k]
2709 / mode_lib->vba.PixelClock[k])
2710 / mode_lib->vba.VRatio[k];
2711 }
2712
2713 mode_lib->vba.StutterPeriod = 999999.0;
2714 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
2715 if (locals->FullDETBufferingTimeY[k] < mode_lib->vba.StutterPeriod) {
2716 mode_lib->vba.StutterPeriod = locals->FullDETBufferingTimeY[k];
2717 mode_lib->vba.FrameTimeForMinFullDETBufferingTime =
2718 (double) mode_lib->vba.VTotal[k] * mode_lib->vba.HTotal[k]
2719 / mode_lib->vba.PixelClock[k];
2720 locals->BytePerPixelYCriticalPlane = dml_ceil(locals->BytePerPixelDETY[k], 1);
2721 locals->SwathWidthYCriticalPlane = locals->SwathWidthY[k];
2722 locals->LinesToFinishSwathTransferStutterCriticalPlane =
2723 mode_lib->vba.SwathHeightY[k] - (locals->LinesInDETY[k] - locals->LinesInDETYRoundedDownToSwath[k]);
2724 }
2725 }
2726
2727 mode_lib->vba.AverageReadBandwidth = 0.0;
2728 mode_lib->vba.TotalRowReadBandwidth = 0.0;
2729 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
2730 unsigned int DCCRateLimit;
2731
2732 if (mode_lib->vba.DCCEnable[k]) {
2733 if (locals->DCCYMaxCompressedBlock[k] == 256)
2734 DCCRateLimit = 4;
2735 else
2736 DCCRateLimit = 2;
2737
2738 mode_lib->vba.AverageReadBandwidth =
2739 mode_lib->vba.AverageReadBandwidth
2740 + (locals->ReadBandwidthPlaneLuma[k] + locals->ReadBandwidthPlaneChroma[k]) /
2741 dml_min(mode_lib->vba.DCCRate[k], DCCRateLimit);
2742 } else {
2743 mode_lib->vba.AverageReadBandwidth =
2744 mode_lib->vba.AverageReadBandwidth
2745 + locals->ReadBandwidthPlaneLuma[k]
2746 + locals->ReadBandwidthPlaneChroma[k];
2747 }
2748 mode_lib->vba.TotalRowReadBandwidth = mode_lib->vba.TotalRowReadBandwidth +
2749 locals->meta_row_bw[k] + locals->dpte_row_bw[k];
2750 }
2751
2752 mode_lib->vba.AverageDCCCompressionRate = mode_lib->vba.TotalDataReadBandwidth / mode_lib->vba.AverageReadBandwidth;
2753
2754 mode_lib->vba.PartOfBurstThatFitsInROB =
2755 dml_min(
2756 mode_lib->vba.StutterPeriod
2757 * mode_lib->vba.TotalDataReadBandwidth,
2758 mode_lib->vba.ROBBufferSizeInKByte * 1024
2759 * mode_lib->vba.AverageDCCCompressionRate);
2760 mode_lib->vba.StutterBurstTime = mode_lib->vba.PartOfBurstThatFitsInROB
2761 / mode_lib->vba.AverageDCCCompressionRate / mode_lib->vba.ReturnBW
2762 + (mode_lib->vba.StutterPeriod * mode_lib->vba.TotalDataReadBandwidth
2763 - mode_lib->vba.PartOfBurstThatFitsInROB)
2764 / (mode_lib->vba.DCFCLK * 64)
2765 + mode_lib->vba.StutterPeriod * mode_lib->vba.TotalRowReadBandwidth / mode_lib->vba.ReturnBW;
2766 mode_lib->vba.StutterBurstTime = dml_max(
2767 mode_lib->vba.StutterBurstTime,
2768 (locals->LinesToFinishSwathTransferStutterCriticalPlane * locals->BytePerPixelYCriticalPlane *
2769 locals->SwathWidthYCriticalPlane / mode_lib->vba.ReturnBW)
2770 );
2771
2772 mode_lib->vba.TotalActiveWriteback = 0;
2773 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
2774 if (mode_lib->vba.WritebackEnable[k] == true) {
2775 mode_lib->vba.TotalActiveWriteback = mode_lib->vba.TotalActiveWriteback + 1;
2776 }
2777 }
2778
2779 if (mode_lib->vba.TotalActiveWriteback == 0) {
2780 mode_lib->vba.StutterEfficiencyNotIncludingVBlank = (1
2781 - (mode_lib->vba.SRExitTime + mode_lib->vba.StutterBurstTime)
2782 / mode_lib->vba.StutterPeriod) * 100;
2783 } else {
2784 mode_lib->vba.StutterEfficiencyNotIncludingVBlank = 0;
2785 }
2786
2787 mode_lib->vba.SmallestVBlank = 999999;
2788 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
2789 if (mode_lib->vba.SynchronizedVBlank || mode_lib->vba.NumberOfActivePlanes == 1) {
2790 mode_lib->vba.VBlankTime = (double) (mode_lib->vba.VTotal[k]
2791 - mode_lib->vba.VActive[k]) * mode_lib->vba.HTotal[k]
2792 / mode_lib->vba.PixelClock[k];
2793 } else {
2794 mode_lib->vba.VBlankTime = 0;
2795 }
2796 mode_lib->vba.SmallestVBlank = dml_min(
2797 mode_lib->vba.SmallestVBlank,
2798 mode_lib->vba.VBlankTime);
2799 }
2800
2801 mode_lib->vba.StutterEfficiency = (mode_lib->vba.StutterEfficiencyNotIncludingVBlank / 100
2802 * (mode_lib->vba.FrameTimeForMinFullDETBufferingTime
2803 - mode_lib->vba.SmallestVBlank)
2804 + mode_lib->vba.SmallestVBlank)
2805 / mode_lib->vba.FrameTimeForMinFullDETBufferingTime * 100;
2806 }
2807
DisplayPipeConfiguration(struct display_mode_lib * mode_lib)2808 static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib)
2809 {
2810 // Display Pipe Configuration
2811 double BytePerPixDETY;
2812 double BytePerPixDETC;
2813 double Read256BytesBlockHeightY;
2814 double Read256BytesBlockHeightC;
2815 double Read256BytesBlockWidthY;
2816 double Read256BytesBlockWidthC;
2817 double MaximumSwathHeightY;
2818 double MaximumSwathHeightC;
2819 double MinimumSwathHeightY;
2820 double MinimumSwathHeightC;
2821 double SwathWidth;
2822 double SwathWidthGranularityY;
2823 double SwathWidthGranularityC;
2824 double RoundedUpMaxSwathSizeBytesY;
2825 double RoundedUpMaxSwathSizeBytesC;
2826 unsigned int j, k;
2827
2828 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
2829 bool MainPlaneDoesODMCombine = false;
2830
2831 if (mode_lib->vba.SourcePixelFormat[k] == dm_444_64) {
2832 BytePerPixDETY = 8;
2833 BytePerPixDETC = 0;
2834 } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_32) {
2835 BytePerPixDETY = 4;
2836 BytePerPixDETC = 0;
2837 } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_16) {
2838 BytePerPixDETY = 2;
2839 BytePerPixDETC = 0;
2840 } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_8) {
2841 BytePerPixDETY = 1;
2842 BytePerPixDETC = 0;
2843 } else if (mode_lib->vba.SourcePixelFormat[k] == dm_420_8) {
2844 BytePerPixDETY = 1;
2845 BytePerPixDETC = 2;
2846 } else {
2847 BytePerPixDETY = 4.0 / 3.0;
2848 BytePerPixDETC = 8.0 / 3.0;
2849 }
2850
2851 if ((mode_lib->vba.SourcePixelFormat[k] == dm_444_64
2852 || mode_lib->vba.SourcePixelFormat[k] == dm_444_32
2853 || mode_lib->vba.SourcePixelFormat[k] == dm_444_16
2854 || mode_lib->vba.SourcePixelFormat[k] == dm_444_8)) {
2855 if (mode_lib->vba.SurfaceTiling[k] == dm_sw_linear) {
2856 Read256BytesBlockHeightY = 1;
2857 } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_64) {
2858 Read256BytesBlockHeightY = 4;
2859 } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_32
2860 || mode_lib->vba.SourcePixelFormat[k] == dm_444_16) {
2861 Read256BytesBlockHeightY = 8;
2862 } else {
2863 Read256BytesBlockHeightY = 16;
2864 }
2865 Read256BytesBlockWidthY = 256 / dml_ceil(BytePerPixDETY, 1)
2866 / Read256BytesBlockHeightY;
2867 Read256BytesBlockHeightC = 0;
2868 Read256BytesBlockWidthC = 0;
2869 } else {
2870 if (mode_lib->vba.SurfaceTiling[k] == dm_sw_linear) {
2871 Read256BytesBlockHeightY = 1;
2872 Read256BytesBlockHeightC = 1;
2873 } else if (mode_lib->vba.SourcePixelFormat[k] == dm_420_8) {
2874 Read256BytesBlockHeightY = 16;
2875 Read256BytesBlockHeightC = 8;
2876 } else {
2877 Read256BytesBlockHeightY = 8;
2878 Read256BytesBlockHeightC = 8;
2879 }
2880 Read256BytesBlockWidthY = 256 / dml_ceil(BytePerPixDETY, 1)
2881 / Read256BytesBlockHeightY;
2882 Read256BytesBlockWidthC = 256 / dml_ceil(BytePerPixDETC, 2)
2883 / Read256BytesBlockHeightC;
2884 }
2885
2886 if (mode_lib->vba.SourceScan[k] == dm_horz) {
2887 MaximumSwathHeightY = Read256BytesBlockHeightY;
2888 MaximumSwathHeightC = Read256BytesBlockHeightC;
2889 } else {
2890 MaximumSwathHeightY = Read256BytesBlockWidthY;
2891 MaximumSwathHeightC = Read256BytesBlockWidthC;
2892 }
2893
2894 if ((mode_lib->vba.SourcePixelFormat[k] == dm_444_64
2895 || mode_lib->vba.SourcePixelFormat[k] == dm_444_32
2896 || mode_lib->vba.SourcePixelFormat[k] == dm_444_16
2897 || mode_lib->vba.SourcePixelFormat[k] == dm_444_8)) {
2898 if (mode_lib->vba.SurfaceTiling[k] == dm_sw_linear
2899 || (mode_lib->vba.SourcePixelFormat[k] == dm_444_64
2900 && (mode_lib->vba.SurfaceTiling[k]
2901 == dm_sw_4kb_s
2902 || mode_lib->vba.SurfaceTiling[k]
2903 == dm_sw_4kb_s_x
2904 || mode_lib->vba.SurfaceTiling[k]
2905 == dm_sw_64kb_s
2906 || mode_lib->vba.SurfaceTiling[k]
2907 == dm_sw_64kb_s_t
2908 || mode_lib->vba.SurfaceTiling[k]
2909 == dm_sw_64kb_s_x
2910 || mode_lib->vba.SurfaceTiling[k]
2911 == dm_sw_var_s
2912 || mode_lib->vba.SurfaceTiling[k]
2913 == dm_sw_var_s_x)
2914 && mode_lib->vba.SourceScan[k] == dm_horz)) {
2915 MinimumSwathHeightY = MaximumSwathHeightY;
2916 } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_8
2917 && mode_lib->vba.SourceScan[k] != dm_horz) {
2918 MinimumSwathHeightY = MaximumSwathHeightY;
2919 } else {
2920 MinimumSwathHeightY = MaximumSwathHeightY / 2.0;
2921 }
2922 MinimumSwathHeightC = MaximumSwathHeightC;
2923 } else {
2924 if (mode_lib->vba.SurfaceTiling[k] == dm_sw_linear) {
2925 MinimumSwathHeightY = MaximumSwathHeightY;
2926 MinimumSwathHeightC = MaximumSwathHeightC;
2927 } else if (mode_lib->vba.SourcePixelFormat[k] == dm_420_8
2928 && mode_lib->vba.SourceScan[k] == dm_horz) {
2929 MinimumSwathHeightY = MaximumSwathHeightY / 2.0;
2930 MinimumSwathHeightC = MaximumSwathHeightC;
2931 } else if (mode_lib->vba.SourcePixelFormat[k] == dm_420_10
2932 && mode_lib->vba.SourceScan[k] == dm_horz) {
2933 MinimumSwathHeightC = MaximumSwathHeightC / 2.0;
2934 MinimumSwathHeightY = MaximumSwathHeightY;
2935 } else {
2936 MinimumSwathHeightY = MaximumSwathHeightY;
2937 MinimumSwathHeightC = MaximumSwathHeightC;
2938 }
2939 }
2940
2941 if (mode_lib->vba.SourceScan[k] == dm_horz) {
2942 SwathWidth = mode_lib->vba.ViewportWidth[k];
2943 } else {
2944 SwathWidth = mode_lib->vba.ViewportHeight[k];
2945 }
2946
2947 if (mode_lib->vba.ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) {
2948 MainPlaneDoesODMCombine = true;
2949 }
2950 for (j = 0; j < mode_lib->vba.NumberOfActivePlanes; ++j) {
2951 if (mode_lib->vba.BlendingAndTiming[k] == j
2952 && mode_lib->vba.ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) {
2953 MainPlaneDoesODMCombine = true;
2954 }
2955 }
2956
2957 if (MainPlaneDoesODMCombine == true) {
2958 SwathWidth = dml_min(
2959 SwathWidth,
2960 mode_lib->vba.HActive[k] / 2.0 * mode_lib->vba.HRatio[k]);
2961 } else {
2962 SwathWidth = SwathWidth / mode_lib->vba.DPPPerPlane[k];
2963 }
2964
2965 SwathWidthGranularityY = 256 / dml_ceil(BytePerPixDETY, 1) / MaximumSwathHeightY;
2966 RoundedUpMaxSwathSizeBytesY = (dml_ceil(
2967 (double) (SwathWidth - 1),
2968 SwathWidthGranularityY) + SwathWidthGranularityY) * BytePerPixDETY
2969 * MaximumSwathHeightY;
2970 if (mode_lib->vba.SourcePixelFormat[k] == dm_420_10) {
2971 RoundedUpMaxSwathSizeBytesY = dml_ceil(RoundedUpMaxSwathSizeBytesY, 256)
2972 + 256;
2973 }
2974 if (MaximumSwathHeightC > 0) {
2975 SwathWidthGranularityC = 256.0 / dml_ceil(BytePerPixDETC, 2)
2976 / MaximumSwathHeightC;
2977 RoundedUpMaxSwathSizeBytesC = (dml_ceil(
2978 (double) (SwathWidth / 2.0 - 1),
2979 SwathWidthGranularityC) + SwathWidthGranularityC)
2980 * BytePerPixDETC * MaximumSwathHeightC;
2981 if (mode_lib->vba.SourcePixelFormat[k] == dm_420_10) {
2982 RoundedUpMaxSwathSizeBytesC = dml_ceil(
2983 RoundedUpMaxSwathSizeBytesC,
2984 256) + 256;
2985 }
2986 } else
2987 RoundedUpMaxSwathSizeBytesC = 0.0;
2988
2989 if (RoundedUpMaxSwathSizeBytesY + RoundedUpMaxSwathSizeBytesC
2990 <= mode_lib->vba.DETBufferSizeInKByte[0] * 1024.0 / 2.0) {
2991 mode_lib->vba.SwathHeightY[k] = MaximumSwathHeightY;
2992 mode_lib->vba.SwathHeightC[k] = MaximumSwathHeightC;
2993 } else {
2994 mode_lib->vba.SwathHeightY[k] = MinimumSwathHeightY;
2995 mode_lib->vba.SwathHeightC[k] = MinimumSwathHeightC;
2996 }
2997
2998 CalculateDETBufferSize(
2999 mode_lib->vba.DETBufferSizeInKByte[0],
3000 mode_lib->vba.SwathHeightY[k],
3001 mode_lib->vba.SwathHeightC[k],
3002 &mode_lib->vba.DETBufferSizeY[k],
3003 &mode_lib->vba.DETBufferSizeC[k]);
3004 }
3005 }
3006
CalculateTWait(unsigned int PrefetchMode,double DRAMClockChangeLatency,double UrgentLatency,double SREnterPlusExitTime)3007 static double CalculateTWait(
3008 unsigned int PrefetchMode,
3009 double DRAMClockChangeLatency,
3010 double UrgentLatency,
3011 double SREnterPlusExitTime)
3012 {
3013 if (PrefetchMode == 0) {
3014 return dml_max(
3015 DRAMClockChangeLatency + UrgentLatency,
3016 dml_max(SREnterPlusExitTime, UrgentLatency));
3017 } else if (PrefetchMode == 1) {
3018 return dml_max(SREnterPlusExitTime, UrgentLatency);
3019 } else {
3020 return UrgentLatency;
3021 }
3022 }
3023
CalculateRemoteSurfaceFlipDelay(struct display_mode_lib * mode_lib,double VRatio,double SwathWidth,double Bpp,double LineTime,double XFCTSlvVupdateOffset,double XFCTSlvVupdateWidth,double XFCTSlvVreadyOffset,double XFCXBUFLatencyTolerance,double XFCFillBWOverhead,double XFCSlvChunkSize,double XFCBusTransportTime,double TCalc,double TWait,double * SrcActiveDrainRate,double * TInitXFill,double * TslvChk)3024 static double CalculateRemoteSurfaceFlipDelay(
3025 struct display_mode_lib *mode_lib,
3026 double VRatio,
3027 double SwathWidth,
3028 double Bpp,
3029 double LineTime,
3030 double XFCTSlvVupdateOffset,
3031 double XFCTSlvVupdateWidth,
3032 double XFCTSlvVreadyOffset,
3033 double XFCXBUFLatencyTolerance,
3034 double XFCFillBWOverhead,
3035 double XFCSlvChunkSize,
3036 double XFCBusTransportTime,
3037 double TCalc,
3038 double TWait,
3039 double *SrcActiveDrainRate,
3040 double *TInitXFill,
3041 double *TslvChk)
3042 {
3043 double TSlvSetup, AvgfillRate, result;
3044
3045 *SrcActiveDrainRate = VRatio * SwathWidth * Bpp / LineTime;
3046 TSlvSetup = XFCTSlvVupdateOffset + XFCTSlvVupdateWidth + XFCTSlvVreadyOffset;
3047 *TInitXFill = XFCXBUFLatencyTolerance / (1 + XFCFillBWOverhead / 100);
3048 AvgfillRate = *SrcActiveDrainRate * (1 + XFCFillBWOverhead / 100);
3049 *TslvChk = XFCSlvChunkSize / AvgfillRate;
3050 dml_print(
3051 "DML::CalculateRemoteSurfaceFlipDelay: SrcActiveDrainRate: %f\n",
3052 *SrcActiveDrainRate);
3053 dml_print("DML::CalculateRemoteSurfaceFlipDelay: TSlvSetup: %f\n", TSlvSetup);
3054 dml_print("DML::CalculateRemoteSurfaceFlipDelay: TInitXFill: %f\n", *TInitXFill);
3055 dml_print("DML::CalculateRemoteSurfaceFlipDelay: AvgfillRate: %f\n", AvgfillRate);
3056 dml_print("DML::CalculateRemoteSurfaceFlipDelay: TslvChk: %f\n", *TslvChk);
3057 result = 2 * XFCBusTransportTime + TSlvSetup + TCalc + TWait + *TslvChk + *TInitXFill; // TODO: This doesn't seem to match programming guide
3058 dml_print("DML::CalculateRemoteSurfaceFlipDelay: RemoteSurfaceFlipDelay: %f\n", result);
3059 return result;
3060 }
3061
CalculateWriteBackDelay(enum source_format_class WritebackPixelFormat,double WritebackHRatio,double WritebackVRatio,unsigned int WritebackLumaHTaps,unsigned int WritebackLumaVTaps,unsigned int WritebackChromaHTaps,unsigned int WritebackChromaVTaps,unsigned int WritebackDestinationWidth)3062 static double CalculateWriteBackDelay(
3063 enum source_format_class WritebackPixelFormat,
3064 double WritebackHRatio,
3065 double WritebackVRatio,
3066 unsigned int WritebackLumaHTaps,
3067 unsigned int WritebackLumaVTaps,
3068 unsigned int WritebackChromaHTaps,
3069 unsigned int WritebackChromaVTaps,
3070 unsigned int WritebackDestinationWidth)
3071 {
3072 double CalculateWriteBackDelay =
3073 dml_max(
3074 dml_ceil(WritebackLumaHTaps / 4.0, 1) / WritebackHRatio,
3075 WritebackLumaVTaps * dml_ceil(1.0 / WritebackVRatio, 1)
3076 * dml_ceil(
3077 WritebackDestinationWidth
3078 / 4.0,
3079 1)
3080 + dml_ceil(1.0 / WritebackVRatio, 1)
3081 * (dml_ceil(
3082 WritebackLumaVTaps
3083 / 4.0,
3084 1) + 4));
3085
3086 if (WritebackPixelFormat != dm_444_32) {
3087 CalculateWriteBackDelay =
3088 dml_max(
3089 CalculateWriteBackDelay,
3090 dml_max(
3091 dml_ceil(
3092 WritebackChromaHTaps
3093 / 2.0,
3094 1)
3095 / (2
3096 * WritebackHRatio),
3097 WritebackChromaVTaps
3098 * dml_ceil(
3099 1
3100 / (2
3101 * WritebackVRatio),
3102 1)
3103 * dml_ceil(
3104 WritebackDestinationWidth
3105 / 2.0
3106 / 2.0,
3107 1)
3108 + dml_ceil(
3109 1
3110 / (2
3111 * WritebackVRatio),
3112 1)
3113 * (dml_ceil(
3114 WritebackChromaVTaps
3115 / 4.0,
3116 1)
3117 + 4)));
3118 }
3119 return CalculateWriteBackDelay;
3120 }
3121
CalculateActiveRowBandwidth(bool GPUVMEnable,enum source_format_class SourcePixelFormat,double VRatio,bool DCCEnable,double LineTime,unsigned int MetaRowByteLuma,unsigned int MetaRowByteChroma,unsigned int meta_row_height_luma,unsigned int meta_row_height_chroma,unsigned int PixelPTEBytesPerRowLuma,unsigned int PixelPTEBytesPerRowChroma,unsigned int dpte_row_height_luma,unsigned int dpte_row_height_chroma,double * meta_row_bw,double * dpte_row_bw)3122 static void CalculateActiveRowBandwidth(
3123 bool GPUVMEnable,
3124 enum source_format_class SourcePixelFormat,
3125 double VRatio,
3126 bool DCCEnable,
3127 double LineTime,
3128 unsigned int MetaRowByteLuma,
3129 unsigned int MetaRowByteChroma,
3130 unsigned int meta_row_height_luma,
3131 unsigned int meta_row_height_chroma,
3132 unsigned int PixelPTEBytesPerRowLuma,
3133 unsigned int PixelPTEBytesPerRowChroma,
3134 unsigned int dpte_row_height_luma,
3135 unsigned int dpte_row_height_chroma,
3136 double *meta_row_bw,
3137 double *dpte_row_bw)
3138 {
3139 if (DCCEnable != true) {
3140 *meta_row_bw = 0;
3141 } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10) {
3142 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime)
3143 + VRatio / 2 * MetaRowByteChroma
3144 / (meta_row_height_chroma * LineTime);
3145 } else {
3146 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime);
3147 }
3148
3149 if (GPUVMEnable != true) {
3150 *dpte_row_bw = 0;
3151 } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10) {
3152 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime)
3153 + VRatio / 2 * PixelPTEBytesPerRowChroma
3154 / (dpte_row_height_chroma * LineTime);
3155 } else {
3156 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime);
3157 }
3158 }
3159
CalculateFlipSchedule(struct display_mode_lib * mode_lib,double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,double UrgentExtraLatency,double UrgentLatency,unsigned int GPUVMMaxPageTableLevels,bool HostVMEnable,unsigned int HostVMMaxPageTableLevels,unsigned int HostVMCachedPageTableLevels,bool GPUVMEnable,double PDEAndMetaPTEBytesPerFrame,double MetaRowBytes,double DPTEBytesPerRow,double BandwidthAvailableForImmediateFlip,unsigned int TotImmediateFlipBytes,enum source_format_class SourcePixelFormat,double LineTime,double VRatio,double Tno_bw,bool DCCEnable,unsigned int dpte_row_height,unsigned int meta_row_height,unsigned int dpte_row_height_chroma,unsigned int meta_row_height_chroma,double * DestinationLinesToRequestVMInImmediateFlip,double * DestinationLinesToRequestRowInImmediateFlip,double * final_flip_bw,bool * ImmediateFlipSupportedForPipe)3160 static void CalculateFlipSchedule(
3161 struct display_mode_lib *mode_lib,
3162 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
3163 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
3164 double UrgentExtraLatency,
3165 double UrgentLatency,
3166 unsigned int GPUVMMaxPageTableLevels,
3167 bool HostVMEnable,
3168 unsigned int HostVMMaxPageTableLevels,
3169 unsigned int HostVMCachedPageTableLevels,
3170 bool GPUVMEnable,
3171 double PDEAndMetaPTEBytesPerFrame,
3172 double MetaRowBytes,
3173 double DPTEBytesPerRow,
3174 double BandwidthAvailableForImmediateFlip,
3175 unsigned int TotImmediateFlipBytes,
3176 enum source_format_class SourcePixelFormat,
3177 double LineTime,
3178 double VRatio,
3179 double Tno_bw,
3180 bool DCCEnable,
3181 unsigned int dpte_row_height,
3182 unsigned int meta_row_height,
3183 unsigned int dpte_row_height_chroma,
3184 unsigned int meta_row_height_chroma,
3185 double *DestinationLinesToRequestVMInImmediateFlip,
3186 double *DestinationLinesToRequestRowInImmediateFlip,
3187 double *final_flip_bw,
3188 bool *ImmediateFlipSupportedForPipe)
3189 {
3190 double min_row_time = 0.0;
3191 unsigned int HostVMDynamicLevels;
3192 double TimeForFetchingMetaPTEImmediateFlip;
3193 double TimeForFetchingRowInVBlankImmediateFlip;
3194 double ImmediateFlipBW = 1.0;
3195 double HostVMInefficiencyFactor;
3196 double VRatioClamped;
3197
3198 if (GPUVMEnable == true && HostVMEnable == true) {
3199 HostVMInefficiencyFactor =
3200 PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData
3201 / PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly;
3202 HostVMDynamicLevels = HostVMMaxPageTableLevels - HostVMCachedPageTableLevels;
3203 } else {
3204 HostVMInefficiencyFactor = 1;
3205 HostVMDynamicLevels = 0;
3206 }
3207
3208 ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + DPTEBytesPerRow)
3209 * BandwidthAvailableForImmediateFlip / TotImmediateFlipBytes;
3210
3211 if (GPUVMEnable == true) {
3212 TimeForFetchingMetaPTEImmediateFlip = dml_max3(
3213 Tno_bw + PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / ImmediateFlipBW,
3214 UrgentExtraLatency + UrgentLatency * (GPUVMMaxPageTableLevels * (HostVMDynamicLevels + 1) - 1),
3215 LineTime / 4.0);
3216 } else {
3217 TimeForFetchingMetaPTEImmediateFlip = 0;
3218 }
3219
3220 *DestinationLinesToRequestVMInImmediateFlip = dml_ceil(4.0 * (TimeForFetchingMetaPTEImmediateFlip / LineTime), 1) / 4.0;
3221 if ((GPUVMEnable == true || DCCEnable == true)) {
3222 TimeForFetchingRowInVBlankImmediateFlip = dml_max3((MetaRowBytes + DPTEBytesPerRow) * HostVMInefficiencyFactor / ImmediateFlipBW, UrgentLatency * (HostVMDynamicLevels + 1), LineTime / 4);
3223 } else {
3224 TimeForFetchingRowInVBlankImmediateFlip = 0;
3225 }
3226
3227 *DestinationLinesToRequestRowInImmediateFlip = dml_ceil(4.0 * (TimeForFetchingRowInVBlankImmediateFlip / LineTime), 1) / 4.0;
3228 *final_flip_bw = dml_max(PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / (*DestinationLinesToRequestVMInImmediateFlip * LineTime), (MetaRowBytes + DPTEBytesPerRow) * HostVMInefficiencyFactor / (*DestinationLinesToRequestRowInImmediateFlip * LineTime));
3229 VRatioClamped = (VRatio < 1.0) ? 1.0 : VRatio;
3230 if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10) {
3231 if (GPUVMEnable == true && DCCEnable != true) {
3232 min_row_time = dml_min(
3233 dpte_row_height * LineTime / VRatioClamped,
3234 dpte_row_height_chroma * LineTime / (VRatioClamped / 2));
3235 } else if (GPUVMEnable != true && DCCEnable == true) {
3236 min_row_time = dml_min(
3237 meta_row_height * LineTime / VRatioClamped,
3238 meta_row_height_chroma * LineTime / (VRatioClamped / 2));
3239 } else {
3240 min_row_time = dml_min4(
3241 dpte_row_height * LineTime / VRatioClamped,
3242 meta_row_height * LineTime / VRatioClamped,
3243 dpte_row_height_chroma * LineTime / (VRatioClamped / 2),
3244 meta_row_height_chroma * LineTime / (VRatioClamped / 2));
3245 }
3246 } else {
3247 if (GPUVMEnable == true && DCCEnable != true) {
3248 min_row_time = dpte_row_height * LineTime / VRatioClamped;
3249 } else if (GPUVMEnable != true && DCCEnable == true) {
3250 min_row_time = meta_row_height * LineTime / VRatioClamped;
3251 } else {
3252 min_row_time = dml_min(
3253 dpte_row_height * LineTime / VRatioClamped,
3254 meta_row_height * LineTime / VRatioClamped);
3255 }
3256 }
3257
3258 if (*DestinationLinesToRequestVMInImmediateFlip >= 32
3259 || *DestinationLinesToRequestRowInImmediateFlip >= 16
3260 || TimeForFetchingMetaPTEImmediateFlip + 2 * TimeForFetchingRowInVBlankImmediateFlip > min_row_time) {
3261 *ImmediateFlipSupportedForPipe = false;
3262 } else {
3263 *ImmediateFlipSupportedForPipe = true;
3264 }
3265 }
3266
TruncToValidBPP(double DecimalBPP,double DesiredBPP,bool DSCEnabled,enum output_encoder_class Output,enum output_format_class Format,unsigned int DSCInputBitPerComponent)3267 static unsigned int TruncToValidBPP(
3268 double DecimalBPP,
3269 double DesiredBPP,
3270 bool DSCEnabled,
3271 enum output_encoder_class Output,
3272 enum output_format_class Format,
3273 unsigned int DSCInputBitPerComponent)
3274 {
3275 if (Output == dm_hdmi) {
3276 if (Format == dm_420) {
3277 if (DecimalBPP >= 18 && (DesiredBPP == 0 || DesiredBPP == 18))
3278 return 18;
3279 else if (DecimalBPP >= 15 && (DesiredBPP == 0 || DesiredBPP == 15))
3280 return 15;
3281 else if (DecimalBPP >= 12 && (DesiredBPP == 0 || DesiredBPP == 12))
3282 return 12;
3283 else
3284 return BPP_INVALID;
3285 } else if (Format == dm_444) {
3286 if (DecimalBPP >= 36 && (DesiredBPP == 0 || DesiredBPP == 36))
3287 return 36;
3288 else if (DecimalBPP >= 30 && (DesiredBPP == 0 || DesiredBPP == 30))
3289 return 30;
3290 else if (DecimalBPP >= 24 && (DesiredBPP == 0 || DesiredBPP == 24))
3291 return 24;
3292 else if (DecimalBPP >= 18 && (DesiredBPP == 0 || DesiredBPP == 18))
3293 return 18;
3294 else
3295 return BPP_INVALID;
3296 } else {
3297 if (DecimalBPP / 1.5 >= 24 && (DesiredBPP == 0 || DesiredBPP == 24))
3298 return 24;
3299 else if (DecimalBPP / 1.5 >= 20 && (DesiredBPP == 0 || DesiredBPP == 20))
3300 return 20;
3301 else if (DecimalBPP / 1.5 >= 16 && (DesiredBPP == 0 || DesiredBPP == 16))
3302 return 16;
3303 else
3304 return BPP_INVALID;
3305 }
3306 } else {
3307 if (DSCEnabled) {
3308 if (Format == dm_420) {
3309 if (DesiredBPP == 0) {
3310 if (DecimalBPP < 6)
3311 return BPP_INVALID;
3312 else if (DecimalBPP >= 1.5 * DSCInputBitPerComponent - 1.0 / 16.0)
3313 return 1.5 * DSCInputBitPerComponent - 1.0 / 16.0;
3314 else
3315 return dml_floor(16 * DecimalBPP, 1) / 16.0;
3316 } else {
3317 if (DecimalBPP < 6
3318 || DesiredBPP < 6
3319 || DesiredBPP > 1.5 * DSCInputBitPerComponent - 1.0 / 16.0
3320 || DecimalBPP < DesiredBPP) {
3321 return BPP_INVALID;
3322 } else {
3323 return DesiredBPP;
3324 }
3325 }
3326 } else if (Format == dm_n422) {
3327 if (DesiredBPP == 0) {
3328 if (DecimalBPP < 7)
3329 return BPP_INVALID;
3330 else if (DecimalBPP >= 2 * DSCInputBitPerComponent - 1.0 / 16.0)
3331 return 2 * DSCInputBitPerComponent - 1.0 / 16.0;
3332 else
3333 return dml_floor(16 * DecimalBPP, 1) / 16.0;
3334 } else {
3335 if (DecimalBPP < 7
3336 || DesiredBPP < 7
3337 || DesiredBPP > 2 * DSCInputBitPerComponent - 1.0 / 16.0
3338 || DecimalBPP < DesiredBPP) {
3339 return BPP_INVALID;
3340 } else {
3341 return DesiredBPP;
3342 }
3343 }
3344 } else {
3345 if (DesiredBPP == 0) {
3346 if (DecimalBPP < 8)
3347 return BPP_INVALID;
3348 else if (DecimalBPP >= 3 * DSCInputBitPerComponent - 1.0 / 16.0)
3349 return 3 * DSCInputBitPerComponent - 1.0 / 16.0;
3350 else
3351 return dml_floor(16 * DecimalBPP, 1) / 16.0;
3352 } else {
3353 if (DecimalBPP < 8
3354 || DesiredBPP < 8
3355 || DesiredBPP > 3 * DSCInputBitPerComponent - 1.0 / 16.0
3356 || DecimalBPP < DesiredBPP) {
3357 return BPP_INVALID;
3358 } else {
3359 return DesiredBPP;
3360 }
3361 }
3362 }
3363 } else if (Format == dm_420) {
3364 if (DecimalBPP >= 18 && (DesiredBPP == 0 || DesiredBPP == 18))
3365 return 18;
3366 else if (DecimalBPP >= 15 && (DesiredBPP == 0 || DesiredBPP == 15))
3367 return 15;
3368 else if (DecimalBPP >= 12 && (DesiredBPP == 0 || DesiredBPP == 12))
3369 return 12;
3370 else
3371 return BPP_INVALID;
3372 } else if (Format == dm_s422 || Format == dm_n422) {
3373 if (DecimalBPP >= 24 && (DesiredBPP == 0 || DesiredBPP == 24))
3374 return 24;
3375 else if (DecimalBPP >= 20 && (DesiredBPP == 0 || DesiredBPP == 20))
3376 return 20;
3377 else if (DecimalBPP >= 16 && (DesiredBPP == 0 || DesiredBPP == 16))
3378 return 16;
3379 else
3380 return BPP_INVALID;
3381 } else {
3382 if (DecimalBPP >= 36 && (DesiredBPP == 0 || DesiredBPP == 36))
3383 return 36;
3384 else if (DecimalBPP >= 30 && (DesiredBPP == 0 || DesiredBPP == 30))
3385 return 30;
3386 else if (DecimalBPP >= 24 && (DesiredBPP == 0 || DesiredBPP == 24))
3387 return 24;
3388 else if (DecimalBPP >= 18 && (DesiredBPP == 0 || DesiredBPP == 18))
3389 return 18;
3390 else
3391 return BPP_INVALID;
3392 }
3393 }
3394 }
3395
3396
CalculatePrefetchSchedulePerPlane(struct display_mode_lib * mode_lib,int i,unsigned j,unsigned k)3397 static noinline void CalculatePrefetchSchedulePerPlane(
3398 struct display_mode_lib *mode_lib,
3399 int i,
3400 unsigned j,
3401 unsigned k)
3402 {
3403 struct vba_vars_st *locals = &mode_lib->vba;
3404 Pipe myPipe;
3405 HostVM myHostVM;
3406
3407 if (mode_lib->vba.XFCEnabled[k] == true) {
3408 mode_lib->vba.XFCRemoteSurfaceFlipDelay =
3409 CalculateRemoteSurfaceFlipDelay(
3410 mode_lib,
3411 mode_lib->vba.VRatio[k],
3412 locals->SwathWidthYThisState[k],
3413 dml_ceil(locals->BytePerPixelInDETY[k], 1.0),
3414 mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k],
3415 mode_lib->vba.XFCTSlvVupdateOffset,
3416 mode_lib->vba.XFCTSlvVupdateWidth,
3417 mode_lib->vba.XFCTSlvVreadyOffset,
3418 mode_lib->vba.XFCXBUFLatencyTolerance,
3419 mode_lib->vba.XFCFillBWOverhead,
3420 mode_lib->vba.XFCSlvChunkSize,
3421 mode_lib->vba.XFCBusTransportTime,
3422 mode_lib->vba.TimeCalc,
3423 mode_lib->vba.TWait,
3424 &mode_lib->vba.SrcActiveDrainRate,
3425 &mode_lib->vba.TInitXFill,
3426 &mode_lib->vba.TslvChk);
3427 } else {
3428 mode_lib->vba.XFCRemoteSurfaceFlipDelay = 0.0;
3429 }
3430
3431 myPipe.DPPCLK = locals->RequiredDPPCLK[i][j][k];
3432 myPipe.DISPCLK = locals->RequiredDISPCLK[i][j];
3433 myPipe.PixelClock = mode_lib->vba.PixelClock[k];
3434 myPipe.DCFCLKDeepSleep = mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0];
3435 myPipe.DPPPerPlane = locals->NoOfDPP[i][j][k];
3436 myPipe.ScalerEnabled = mode_lib->vba.ScalerEnabled[k];
3437 myPipe.SourceScan = mode_lib->vba.SourceScan[k];
3438 myPipe.BlockWidth256BytesY = locals->Read256BlockWidthY[k];
3439 myPipe.BlockHeight256BytesY = locals->Read256BlockHeightY[k];
3440 myPipe.BlockWidth256BytesC = locals->Read256BlockWidthC[k];
3441 myPipe.BlockHeight256BytesC = locals->Read256BlockHeightC[k];
3442 myPipe.InterlaceEnable = mode_lib->vba.Interlace[k];
3443 myPipe.NumberOfCursors = mode_lib->vba.NumberOfCursors[k];
3444 myPipe.VBlank = mode_lib->vba.VTotal[k] - mode_lib->vba.VActive[k];
3445 myPipe.HTotal = mode_lib->vba.HTotal[k];
3446
3447
3448 myHostVM.Enable = mode_lib->vba.HostVMEnable;
3449 myHostVM.MaxPageTableLevels = mode_lib->vba.HostVMMaxPageTableLevels;
3450 myHostVM.CachedPageTableLevels = mode_lib->vba.HostVMCachedPageTableLevels;
3451
3452
3453 mode_lib->vba.IsErrorResult[i][j][k] = CalculatePrefetchSchedule(
3454 mode_lib,
3455 mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
3456 mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
3457 &myPipe,
3458 locals->DSCDelayPerState[i][k],
3459 mode_lib->vba.DPPCLKDelaySubtotal,
3460 mode_lib->vba.DPPCLKDelaySCL,
3461 mode_lib->vba.DPPCLKDelaySCLLBOnly,
3462 mode_lib->vba.DPPCLKDelayCNVCFormater,
3463 mode_lib->vba.DPPCLKDelayCNVCCursor,
3464 mode_lib->vba.DISPCLKDelaySubtotal,
3465 locals->SwathWidthYThisState[k] / mode_lib->vba.HRatio[k],
3466 mode_lib->vba.OutputFormat[k],
3467 mode_lib->vba.MaxInterDCNTileRepeaters,
3468 dml_min(mode_lib->vba.MaxVStartup, locals->MaximumVStartup[0][0][k]),
3469 locals->MaximumVStartup[0][0][k],
3470 mode_lib->vba.GPUVMMaxPageTableLevels,
3471 mode_lib->vba.GPUVMEnable,
3472 &myHostVM,
3473 mode_lib->vba.DynamicMetadataEnable[k],
3474 mode_lib->vba.DynamicMetadataLinesBeforeActiveRequired[k],
3475 mode_lib->vba.DynamicMetadataTransmittedBytes[k],
3476 mode_lib->vba.DCCEnable[k],
3477 mode_lib->vba.UrgentLatency,
3478 mode_lib->vba.ExtraLatency,
3479 mode_lib->vba.TimeCalc,
3480 locals->PDEAndMetaPTEBytesPerFrame[0][0][k],
3481 locals->MetaRowBytes[0][0][k],
3482 locals->DPTEBytesPerRow[0][0][k],
3483 locals->PrefetchLinesY[0][0][k],
3484 locals->SwathWidthYThisState[k],
3485 locals->BytePerPixelInDETY[k],
3486 locals->PrefillY[k],
3487 locals->MaxNumSwY[k],
3488 locals->PrefetchLinesC[0][0][k],
3489 locals->BytePerPixelInDETC[k],
3490 locals->PrefillC[k],
3491 locals->MaxNumSwC[k],
3492 locals->SwathHeightYThisState[k],
3493 locals->SwathHeightCThisState[k],
3494 mode_lib->vba.TWait,
3495 mode_lib->vba.XFCEnabled[k],
3496 mode_lib->vba.XFCRemoteSurfaceFlipDelay,
3497 mode_lib->vba.ProgressiveToInterlaceUnitInOPP,
3498 &locals->dst_x_after_scaler,
3499 &locals->dst_y_after_scaler,
3500 &locals->LineTimesForPrefetch[k],
3501 &locals->PrefetchBW[k],
3502 &locals->LinesForMetaPTE[k],
3503 &locals->LinesForMetaAndDPTERow[k],
3504 &locals->VRatioPreY[i][j][k],
3505 &locals->VRatioPreC[i][j][k],
3506 &locals->RequiredPrefetchPixelDataBWLuma[i][j][k],
3507 &locals->RequiredPrefetchPixelDataBWChroma[i][j][k],
3508 &locals->VStartupRequiredWhenNotEnoughTimeForDynamicMetadata,
3509 &locals->Tno_bw[k],
3510 &locals->prefetch_vmrow_bw[k],
3511 locals->swath_width_luma_ub,
3512 locals->swath_width_chroma_ub,
3513 &mode_lib->vba.VUpdateOffsetPix[k],
3514 &mode_lib->vba.VUpdateWidthPix[k],
3515 &mode_lib->vba.VReadyOffsetPix[k]);
3516 }
dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib * mode_lib)3517 void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib)
3518 {
3519 struct vba_vars_st *locals = &mode_lib->vba;
3520
3521 int i;
3522 unsigned int j, k, m;
3523
3524 /*MODE SUPPORT, VOLTAGE STATE AND SOC CONFIGURATION*/
3525
3526 /*Scale Ratio, taps Support Check*/
3527
3528 mode_lib->vba.ScaleRatioAndTapsSupport = true;
3529 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
3530 if (mode_lib->vba.ScalerEnabled[k] == false
3531 && ((mode_lib->vba.SourcePixelFormat[k] != dm_444_64
3532 && mode_lib->vba.SourcePixelFormat[k] != dm_444_32
3533 && mode_lib->vba.SourcePixelFormat[k] != dm_444_16
3534 && mode_lib->vba.SourcePixelFormat[k] != dm_mono_16
3535 && mode_lib->vba.SourcePixelFormat[k] != dm_mono_8)
3536 || mode_lib->vba.HRatio[k] != 1.0
3537 || mode_lib->vba.htaps[k] != 1.0
3538 || mode_lib->vba.VRatio[k] != 1.0
3539 || mode_lib->vba.vtaps[k] != 1.0)) {
3540 mode_lib->vba.ScaleRatioAndTapsSupport = false;
3541 } else if (mode_lib->vba.vtaps[k] < 1.0 || mode_lib->vba.vtaps[k] > 8.0
3542 || mode_lib->vba.htaps[k] < 1.0 || mode_lib->vba.htaps[k] > 8.0
3543 || (mode_lib->vba.htaps[k] > 1.0
3544 && (mode_lib->vba.htaps[k] % 2) == 1)
3545 || mode_lib->vba.HRatio[k] > mode_lib->vba.MaxHSCLRatio
3546 || mode_lib->vba.VRatio[k] > mode_lib->vba.MaxVSCLRatio
3547 || mode_lib->vba.HRatio[k] > mode_lib->vba.htaps[k]
3548 || mode_lib->vba.VRatio[k] > mode_lib->vba.vtaps[k]
3549 || (mode_lib->vba.SourcePixelFormat[k] != dm_444_64
3550 && mode_lib->vba.SourcePixelFormat[k] != dm_444_32
3551 && mode_lib->vba.SourcePixelFormat[k] != dm_444_16
3552 && mode_lib->vba.SourcePixelFormat[k] != dm_mono_16
3553 && mode_lib->vba.SourcePixelFormat[k] != dm_mono_8
3554 && (mode_lib->vba.HRatio[k] / 2.0
3555 > mode_lib->vba.HTAPsChroma[k]
3556 || mode_lib->vba.VRatio[k] / 2.0
3557 > mode_lib->vba.VTAPsChroma[k]))) {
3558 mode_lib->vba.ScaleRatioAndTapsSupport = false;
3559 }
3560 }
3561 /*Source Format, Pixel Format and Scan Support Check*/
3562
3563 mode_lib->vba.SourceFormatPixelAndScanSupport = true;
3564 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
3565 if ((mode_lib->vba.SurfaceTiling[k] == dm_sw_linear
3566 && mode_lib->vba.SourceScan[k] != dm_horz)
3567 || ((mode_lib->vba.SurfaceTiling[k] == dm_sw_4kb_d
3568 || mode_lib->vba.SurfaceTiling[k] == dm_sw_4kb_d_x
3569 || mode_lib->vba.SurfaceTiling[k] == dm_sw_64kb_d
3570 || mode_lib->vba.SurfaceTiling[k] == dm_sw_64kb_d_t
3571 || mode_lib->vba.SurfaceTiling[k] == dm_sw_64kb_d_x
3572 || mode_lib->vba.SurfaceTiling[k] == dm_sw_var_d
3573 || mode_lib->vba.SurfaceTiling[k] == dm_sw_var_d_x)
3574 && mode_lib->vba.SourcePixelFormat[k] != dm_444_64)
3575 || (mode_lib->vba.SurfaceTiling[k] == dm_sw_64kb_r_x
3576 && (mode_lib->vba.SourcePixelFormat[k] == dm_mono_8
3577 || mode_lib->vba.SourcePixelFormat[k]
3578 == dm_420_8
3579 || mode_lib->vba.SourcePixelFormat[k]
3580 == dm_420_10))
3581 || (((mode_lib->vba.SurfaceTiling[k] == dm_sw_gfx7_2d_thin_gl
3582 || mode_lib->vba.SurfaceTiling[k]
3583 == dm_sw_gfx7_2d_thin_l_vp)
3584 && !((mode_lib->vba.SourcePixelFormat[k]
3585 == dm_444_64
3586 || mode_lib->vba.SourcePixelFormat[k]
3587 == dm_444_32)
3588 && mode_lib->vba.SourceScan[k]
3589 == dm_horz
3590 && mode_lib->vba.SupportGFX7CompatibleTilingIn32bppAnd64bpp
3591 == true
3592 && mode_lib->vba.DCCEnable[k]
3593 == false))
3594 || (mode_lib->vba.DCCEnable[k] == true
3595 && (mode_lib->vba.SurfaceTiling[k]
3596 == dm_sw_linear
3597 || mode_lib->vba.SourcePixelFormat[k]
3598 == dm_420_8
3599 || mode_lib->vba.SourcePixelFormat[k]
3600 == dm_420_10)))) {
3601 mode_lib->vba.SourceFormatPixelAndScanSupport = false;
3602 }
3603 }
3604 /*Bandwidth Support Check*/
3605
3606 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
3607 if (mode_lib->vba.SourcePixelFormat[k] == dm_444_64) {
3608 locals->BytePerPixelInDETY[k] = 8.0;
3609 locals->BytePerPixelInDETC[k] = 0.0;
3610 } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_32) {
3611 locals->BytePerPixelInDETY[k] = 4.0;
3612 locals->BytePerPixelInDETC[k] = 0.0;
3613 } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_16
3614 || mode_lib->vba.SourcePixelFormat[k] == dm_mono_16) {
3615 locals->BytePerPixelInDETY[k] = 2.0;
3616 locals->BytePerPixelInDETC[k] = 0.0;
3617 } else if (mode_lib->vba.SourcePixelFormat[k] == dm_mono_8) {
3618 locals->BytePerPixelInDETY[k] = 1.0;
3619 locals->BytePerPixelInDETC[k] = 0.0;
3620 } else if (mode_lib->vba.SourcePixelFormat[k] == dm_420_8) {
3621 locals->BytePerPixelInDETY[k] = 1.0;
3622 locals->BytePerPixelInDETC[k] = 2.0;
3623 } else {
3624 locals->BytePerPixelInDETY[k] = 4.0 / 3;
3625 locals->BytePerPixelInDETC[k] = 8.0 / 3;
3626 }
3627 if (mode_lib->vba.SourceScan[k] == dm_horz) {
3628 locals->SwathWidthYSingleDPP[k] = mode_lib->vba.ViewportWidth[k];
3629 } else {
3630 locals->SwathWidthYSingleDPP[k] = mode_lib->vba.ViewportHeight[k];
3631 }
3632 }
3633 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
3634 locals->ReadBandwidthLuma[k] = locals->SwathWidthYSingleDPP[k] * dml_ceil(locals->BytePerPixelInDETY[k], 1.0)
3635 / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) * mode_lib->vba.VRatio[k];
3636 locals->ReadBandwidthChroma[k] = locals->SwathWidthYSingleDPP[k] / 2 * dml_ceil(locals->BytePerPixelInDETC[k], 2.0)
3637 / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) * mode_lib->vba.VRatio[k] / 2.0;
3638 locals->ReadBandwidth[k] = locals->ReadBandwidthLuma[k] + locals->ReadBandwidthChroma[k];
3639 }
3640 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
3641 if (mode_lib->vba.WritebackEnable[k] == true
3642 && mode_lib->vba.WritebackPixelFormat[k] == dm_444_32) {
3643 locals->WriteBandwidth[k] = mode_lib->vba.WritebackDestinationWidth[k]
3644 * mode_lib->vba.WritebackDestinationHeight[k]
3645 / (mode_lib->vba.WritebackSourceHeight[k]
3646 * mode_lib->vba.HTotal[k]
3647 / mode_lib->vba.PixelClock[k]) * 4.0;
3648 } else if (mode_lib->vba.WritebackEnable[k] == true
3649 && mode_lib->vba.WritebackPixelFormat[k] == dm_420_10) {
3650 locals->WriteBandwidth[k] = mode_lib->vba.WritebackDestinationWidth[k]
3651 * mode_lib->vba.WritebackDestinationHeight[k]
3652 / (mode_lib->vba.WritebackSourceHeight[k]
3653 * mode_lib->vba.HTotal[k]
3654 / mode_lib->vba.PixelClock[k]) * 3.0;
3655 } else if (mode_lib->vba.WritebackEnable[k] == true) {
3656 locals->WriteBandwidth[k] = mode_lib->vba.WritebackDestinationWidth[k]
3657 * mode_lib->vba.WritebackDestinationHeight[k]
3658 / (mode_lib->vba.WritebackSourceHeight[k]
3659 * mode_lib->vba.HTotal[k]
3660 / mode_lib->vba.PixelClock[k]) * 1.5;
3661 } else {
3662 locals->WriteBandwidth[k] = 0.0;
3663 }
3664 }
3665 mode_lib->vba.DCCEnabledInAnyPlane = false;
3666 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
3667 if (mode_lib->vba.DCCEnable[k] == true) {
3668 mode_lib->vba.DCCEnabledInAnyPlane = true;
3669 }
3670 }
3671 for (i = 0; i <= mode_lib->vba.soc.num_states; i++) {
3672 locals->IdealSDPPortBandwidthPerState[i][0] = dml_min3(
3673 mode_lib->vba.ReturnBusWidth * mode_lib->vba.DCFCLKPerState[i],
3674 mode_lib->vba.DRAMSpeedPerState[i] * mode_lib->vba.NumberOfChannels
3675 * mode_lib->vba.DRAMChannelWidth,
3676 mode_lib->vba.FabricClockPerState[i]
3677 * mode_lib->vba.FabricDatapathToDCNDataReturn);
3678 if (mode_lib->vba.HostVMEnable == false) {
3679 locals->ReturnBWPerState[i][0] = locals->IdealSDPPortBandwidthPerState[i][0]
3680 * mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelDataOnly / 100.0;
3681 } else {
3682 locals->ReturnBWPerState[i][0] = locals->IdealSDPPortBandwidthPerState[i][0]
3683 * mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData / 100.0;
3684 }
3685 }
3686 /*Writeback Latency support check*/
3687
3688 mode_lib->vba.WritebackLatencySupport = true;
3689 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
3690 if (mode_lib->vba.WritebackEnable[k] == true) {
3691 if (mode_lib->vba.WritebackPixelFormat[k] == dm_444_32) {
3692 if (locals->WriteBandwidth[k]
3693 > (mode_lib->vba.WritebackInterfaceLumaBufferSize
3694 + mode_lib->vba.WritebackInterfaceChromaBufferSize)
3695 / mode_lib->vba.WritebackLatency) {
3696 mode_lib->vba.WritebackLatencySupport = false;
3697 }
3698 } else {
3699 if (locals->WriteBandwidth[k]
3700 > 1.5
3701 * dml_min(
3702 mode_lib->vba.WritebackInterfaceLumaBufferSize,
3703 2.0
3704 * mode_lib->vba.WritebackInterfaceChromaBufferSize)
3705 / mode_lib->vba.WritebackLatency) {
3706 mode_lib->vba.WritebackLatencySupport = false;
3707 }
3708 }
3709 }
3710 }
3711 /*Re-ordering Buffer Support Check*/
3712
3713 for (i = 0; i <= mode_lib->vba.soc.num_states; i++) {
3714 locals->UrgentRoundTripAndOutOfOrderLatencyPerState[i] =
3715 (mode_lib->vba.RoundTripPingLatencyCycles + 32.0) / mode_lib->vba.DCFCLKPerState[i]
3716 + dml_max3(mode_lib->vba.UrgentOutOfOrderReturnPerChannelPixelDataOnly,
3717 mode_lib->vba.UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData,
3718 mode_lib->vba.UrgentOutOfOrderReturnPerChannelVMDataOnly)
3719 * mode_lib->vba.NumberOfChannels / locals->ReturnBWPerState[i][0];
3720 if ((mode_lib->vba.ROBBufferSizeInKByte - mode_lib->vba.PixelChunkSizeInKByte) * 1024.0 / locals->ReturnBWPerState[i][0]
3721 > locals->UrgentRoundTripAndOutOfOrderLatencyPerState[i]) {
3722 locals->ROBSupport[i][0] = true;
3723 } else {
3724 locals->ROBSupport[i][0] = false;
3725 }
3726 }
3727 /*Writeback Mode Support Check*/
3728
3729 mode_lib->vba.TotalNumberOfActiveWriteback = 0;
3730 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
3731 if (mode_lib->vba.WritebackEnable[k] == true) {
3732 if (mode_lib->vba.ActiveWritebacksPerPlane[k] == 0)
3733 mode_lib->vba.ActiveWritebacksPerPlane[k] = 1;
3734 mode_lib->vba.TotalNumberOfActiveWriteback =
3735 mode_lib->vba.TotalNumberOfActiveWriteback
3736 + mode_lib->vba.ActiveWritebacksPerPlane[k];
3737 }
3738 }
3739 mode_lib->vba.WritebackModeSupport = true;
3740 if (mode_lib->vba.TotalNumberOfActiveWriteback > mode_lib->vba.MaxNumWriteback) {
3741 mode_lib->vba.WritebackModeSupport = false;
3742 }
3743 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
3744 if (mode_lib->vba.WritebackEnable[k] == true
3745 && mode_lib->vba.Writeback10bpc420Supported != true
3746 && mode_lib->vba.WritebackPixelFormat[k] == dm_420_10) {
3747 mode_lib->vba.WritebackModeSupport = false;
3748 }
3749 }
3750 /*Writeback Scale Ratio and Taps Support Check*/
3751
3752 mode_lib->vba.WritebackScaleRatioAndTapsSupport = true;
3753 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
3754 if (mode_lib->vba.WritebackEnable[k] == true) {
3755 if (mode_lib->vba.WritebackLumaAndChromaScalingSupported == false
3756 && (mode_lib->vba.WritebackHRatio[k] != 1.0
3757 || mode_lib->vba.WritebackVRatio[k] != 1.0)) {
3758 mode_lib->vba.WritebackScaleRatioAndTapsSupport = false;
3759 }
3760 if (mode_lib->vba.WritebackHRatio[k] > mode_lib->vba.WritebackMaxHSCLRatio
3761 || mode_lib->vba.WritebackVRatio[k]
3762 > mode_lib->vba.WritebackMaxVSCLRatio
3763 || mode_lib->vba.WritebackHRatio[k]
3764 < mode_lib->vba.WritebackMinHSCLRatio
3765 || mode_lib->vba.WritebackVRatio[k]
3766 < mode_lib->vba.WritebackMinVSCLRatio
3767 || mode_lib->vba.WritebackLumaHTaps[k]
3768 > mode_lib->vba.WritebackMaxHSCLTaps
3769 || mode_lib->vba.WritebackLumaVTaps[k]
3770 > mode_lib->vba.WritebackMaxVSCLTaps
3771 || mode_lib->vba.WritebackHRatio[k]
3772 > mode_lib->vba.WritebackLumaHTaps[k]
3773 || mode_lib->vba.WritebackVRatio[k]
3774 > mode_lib->vba.WritebackLumaVTaps[k]
3775 || (mode_lib->vba.WritebackLumaHTaps[k] > 2.0
3776 && ((mode_lib->vba.WritebackLumaHTaps[k] % 2)
3777 == 1))
3778 || (mode_lib->vba.WritebackPixelFormat[k] != dm_444_32
3779 && (mode_lib->vba.WritebackChromaHTaps[k]
3780 > mode_lib->vba.WritebackMaxHSCLTaps
3781 || mode_lib->vba.WritebackChromaVTaps[k]
3782 > mode_lib->vba.WritebackMaxVSCLTaps
3783 || 2.0
3784 * mode_lib->vba.WritebackHRatio[k]
3785 > mode_lib->vba.WritebackChromaHTaps[k]
3786 || 2.0
3787 * mode_lib->vba.WritebackVRatio[k]
3788 > mode_lib->vba.WritebackChromaVTaps[k]
3789 || (mode_lib->vba.WritebackChromaHTaps[k] > 2.0
3790 && ((mode_lib->vba.WritebackChromaHTaps[k] % 2) == 1))))) {
3791 mode_lib->vba.WritebackScaleRatioAndTapsSupport = false;
3792 }
3793 if (mode_lib->vba.WritebackVRatio[k] < 1.0) {
3794 mode_lib->vba.WritebackLumaVExtra =
3795 dml_max(1.0 - 2.0 / dml_ceil(1.0 / mode_lib->vba.WritebackVRatio[k], 1.0), 0.0);
3796 } else {
3797 mode_lib->vba.WritebackLumaVExtra = -1;
3798 }
3799 if ((mode_lib->vba.WritebackPixelFormat[k] == dm_444_32
3800 && mode_lib->vba.WritebackLumaVTaps[k]
3801 > (mode_lib->vba.WritebackLineBufferLumaBufferSize
3802 + mode_lib->vba.WritebackLineBufferChromaBufferSize)
3803 / 3.0
3804 / mode_lib->vba.WritebackDestinationWidth[k]
3805 - mode_lib->vba.WritebackLumaVExtra)
3806 || (mode_lib->vba.WritebackPixelFormat[k] == dm_420_8
3807 && mode_lib->vba.WritebackLumaVTaps[k]
3808 > mode_lib->vba.WritebackLineBufferLumaBufferSize
3809 * 8.0 / 10.0 / mode_lib->vba.WritebackDestinationWidth[k]
3810 - mode_lib->vba.WritebackLumaVExtra)
3811 || (mode_lib->vba.WritebackPixelFormat[k] == dm_420_10
3812 && mode_lib->vba.WritebackLumaVTaps[k]
3813 > mode_lib->vba.WritebackLineBufferLumaBufferSize
3814 * 8.0 / 10.0
3815 / mode_lib->vba.WritebackDestinationWidth[k]
3816 - mode_lib->vba.WritebackLumaVExtra)) {
3817 mode_lib->vba.WritebackScaleRatioAndTapsSupport = false;
3818 }
3819 if (2.0 * mode_lib->vba.WritebackVRatio[k] < 1) {
3820 mode_lib->vba.WritebackChromaVExtra = 0.0;
3821 } else {
3822 mode_lib->vba.WritebackChromaVExtra = -1;
3823 }
3824 if ((mode_lib->vba.WritebackPixelFormat[k] == dm_420_8
3825 && mode_lib->vba.WritebackChromaVTaps[k]
3826 > mode_lib->vba.WritebackLineBufferChromaBufferSize
3827 * 8.0 / 10.0 / mode_lib->vba.WritebackDestinationWidth[k]
3828 - mode_lib->vba.WritebackChromaVExtra)
3829 || (mode_lib->vba.WritebackPixelFormat[k] == dm_420_10
3830 && mode_lib->vba.WritebackChromaVTaps[k]
3831 > mode_lib->vba.WritebackLineBufferChromaBufferSize
3832 * 8.0 / 10.0
3833 / mode_lib->vba.WritebackDestinationWidth[k]
3834 - mode_lib->vba.WritebackChromaVExtra)) {
3835 mode_lib->vba.WritebackScaleRatioAndTapsSupport = false;
3836 }
3837 }
3838 }
3839 /*Maximum DISPCLK/DPPCLK Support check*/
3840
3841 mode_lib->vba.WritebackRequiredDISPCLK = 0.0;
3842 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
3843 if (mode_lib->vba.WritebackEnable[k] == true) {
3844 mode_lib->vba.WritebackRequiredDISPCLK =
3845 dml_max(
3846 mode_lib->vba.WritebackRequiredDISPCLK,
3847 CalculateWriteBackDISPCLK(
3848 mode_lib->vba.WritebackPixelFormat[k],
3849 mode_lib->vba.PixelClock[k],
3850 mode_lib->vba.WritebackHRatio[k],
3851 mode_lib->vba.WritebackVRatio[k],
3852 mode_lib->vba.WritebackLumaHTaps[k],
3853 mode_lib->vba.WritebackLumaVTaps[k],
3854 mode_lib->vba.WritebackChromaHTaps[k],
3855 mode_lib->vba.WritebackChromaVTaps[k],
3856 mode_lib->vba.WritebackDestinationWidth[k],
3857 mode_lib->vba.HTotal[k],
3858 mode_lib->vba.WritebackChromaLineBufferWidth));
3859 }
3860 }
3861 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
3862 if (mode_lib->vba.HRatio[k] > 1.0) {
3863 locals->PSCL_FACTOR[k] = dml_min(
3864 mode_lib->vba.MaxDCHUBToPSCLThroughput,
3865 mode_lib->vba.MaxPSCLToLBThroughput
3866 * mode_lib->vba.HRatio[k]
3867 / dml_ceil(
3868 mode_lib->vba.htaps[k]
3869 / 6.0,
3870 1.0));
3871 } else {
3872 locals->PSCL_FACTOR[k] = dml_min(
3873 mode_lib->vba.MaxDCHUBToPSCLThroughput,
3874 mode_lib->vba.MaxPSCLToLBThroughput);
3875 }
3876 if (locals->BytePerPixelInDETC[k] == 0.0) {
3877 locals->PSCL_FACTOR_CHROMA[k] = 0.0;
3878 locals->MinDPPCLKUsingSingleDPP[k] =
3879 mode_lib->vba.PixelClock[k]
3880 * dml_max3(
3881 mode_lib->vba.vtaps[k] / 6.0
3882 * dml_min(
3883 1.0,
3884 mode_lib->vba.HRatio[k]),
3885 mode_lib->vba.HRatio[k]
3886 * mode_lib->vba.VRatio[k]
3887 / locals->PSCL_FACTOR[k],
3888 1.0);
3889 if ((mode_lib->vba.htaps[k] > 6.0 || mode_lib->vba.vtaps[k] > 6.0)
3890 && locals->MinDPPCLKUsingSingleDPP[k]
3891 < 2.0 * mode_lib->vba.PixelClock[k]) {
3892 locals->MinDPPCLKUsingSingleDPP[k] = 2.0
3893 * mode_lib->vba.PixelClock[k];
3894 }
3895 } else {
3896 if (mode_lib->vba.HRatio[k] / 2.0 > 1.0) {
3897 locals->PSCL_FACTOR_CHROMA[k] =
3898 dml_min(
3899 mode_lib->vba.MaxDCHUBToPSCLThroughput,
3900 mode_lib->vba.MaxPSCLToLBThroughput
3901 * mode_lib->vba.HRatio[k]
3902 / 2.0
3903 / dml_ceil(
3904 mode_lib->vba.HTAPsChroma[k]
3905 / 6.0,
3906 1.0));
3907 } else {
3908 locals->PSCL_FACTOR_CHROMA[k] = dml_min(
3909 mode_lib->vba.MaxDCHUBToPSCLThroughput,
3910 mode_lib->vba.MaxPSCLToLBThroughput);
3911 }
3912 locals->MinDPPCLKUsingSingleDPP[k] =
3913 mode_lib->vba.PixelClock[k]
3914 * dml_max5(
3915 mode_lib->vba.vtaps[k] / 6.0
3916 * dml_min(
3917 1.0,
3918 mode_lib->vba.HRatio[k]),
3919 mode_lib->vba.HRatio[k]
3920 * mode_lib->vba.VRatio[k]
3921 / locals->PSCL_FACTOR[k],
3922 mode_lib->vba.VTAPsChroma[k]
3923 / 6.0
3924 * dml_min(
3925 1.0,
3926 mode_lib->vba.HRatio[k]
3927 / 2.0),
3928 mode_lib->vba.HRatio[k]
3929 * mode_lib->vba.VRatio[k]
3930 / 4.0
3931 / locals->PSCL_FACTOR_CHROMA[k],
3932 1.0);
3933 if ((mode_lib->vba.htaps[k] > 6.0 || mode_lib->vba.vtaps[k] > 6.0
3934 || mode_lib->vba.HTAPsChroma[k] > 6.0
3935 || mode_lib->vba.VTAPsChroma[k] > 6.0)
3936 && locals->MinDPPCLKUsingSingleDPP[k]
3937 < 2.0 * mode_lib->vba.PixelClock[k]) {
3938 locals->MinDPPCLKUsingSingleDPP[k] = 2.0
3939 * mode_lib->vba.PixelClock[k];
3940 }
3941 }
3942 }
3943 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
3944 Calculate256BBlockSizes(
3945 mode_lib->vba.SourcePixelFormat[k],
3946 mode_lib->vba.SurfaceTiling[k],
3947 dml_ceil(locals->BytePerPixelInDETY[k], 1.0),
3948 dml_ceil(locals->BytePerPixelInDETC[k], 2.0),
3949 &locals->Read256BlockHeightY[k],
3950 &locals->Read256BlockHeightC[k],
3951 &locals->Read256BlockWidthY[k],
3952 &locals->Read256BlockWidthC[k]);
3953 if (mode_lib->vba.SourceScan[k] == dm_horz) {
3954 locals->MaxSwathHeightY[k] = locals->Read256BlockHeightY[k];
3955 locals->MaxSwathHeightC[k] = locals->Read256BlockHeightC[k];
3956 } else {
3957 locals->MaxSwathHeightY[k] = locals->Read256BlockWidthY[k];
3958 locals->MaxSwathHeightC[k] = locals->Read256BlockWidthC[k];
3959 }
3960 if ((mode_lib->vba.SourcePixelFormat[k] == dm_444_64
3961 || mode_lib->vba.SourcePixelFormat[k] == dm_444_32
3962 || mode_lib->vba.SourcePixelFormat[k] == dm_444_16
3963 || mode_lib->vba.SourcePixelFormat[k] == dm_mono_16
3964 || mode_lib->vba.SourcePixelFormat[k] == dm_mono_8)) {
3965 if (mode_lib->vba.SurfaceTiling[k] == dm_sw_linear
3966 || (mode_lib->vba.SourcePixelFormat[k] == dm_444_64
3967 && (mode_lib->vba.SurfaceTiling[k]
3968 == dm_sw_4kb_s
3969 || mode_lib->vba.SurfaceTiling[k]
3970 == dm_sw_4kb_s_x
3971 || mode_lib->vba.SurfaceTiling[k]
3972 == dm_sw_64kb_s
3973 || mode_lib->vba.SurfaceTiling[k]
3974 == dm_sw_64kb_s_t
3975 || mode_lib->vba.SurfaceTiling[k]
3976 == dm_sw_64kb_s_x
3977 || mode_lib->vba.SurfaceTiling[k]
3978 == dm_sw_var_s
3979 || mode_lib->vba.SurfaceTiling[k]
3980 == dm_sw_var_s_x)
3981 && mode_lib->vba.SourceScan[k] == dm_horz)) {
3982 locals->MinSwathHeightY[k] = locals->MaxSwathHeightY[k];
3983 } else {
3984 locals->MinSwathHeightY[k] = locals->MaxSwathHeightY[k]
3985 / 2.0;
3986 }
3987 locals->MinSwathHeightC[k] = locals->MaxSwathHeightC[k];
3988 } else {
3989 if (mode_lib->vba.SurfaceTiling[k] == dm_sw_linear) {
3990 locals->MinSwathHeightY[k] = locals->MaxSwathHeightY[k];
3991 locals->MinSwathHeightC[k] = locals->MaxSwathHeightC[k];
3992 } else if (mode_lib->vba.SourcePixelFormat[k] == dm_420_8
3993 && mode_lib->vba.SourceScan[k] == dm_horz) {
3994 locals->MinSwathHeightY[k] = locals->MaxSwathHeightY[k]
3995 / 2.0;
3996 locals->MinSwathHeightC[k] = locals->MaxSwathHeightC[k];
3997 } else if (mode_lib->vba.SourcePixelFormat[k] == dm_420_10
3998 && mode_lib->vba.SourceScan[k] == dm_horz) {
3999 locals->MinSwathHeightC[k] = locals->MaxSwathHeightC[k]
4000 / 2.0;
4001 locals->MinSwathHeightY[k] = locals->MaxSwathHeightY[k];
4002 } else {
4003 locals->MinSwathHeightY[k] = locals->MaxSwathHeightY[k];
4004 locals->MinSwathHeightC[k] = locals->MaxSwathHeightC[k];
4005 }
4006 }
4007 if (mode_lib->vba.SurfaceTiling[k] == dm_sw_linear) {
4008 mode_lib->vba.MaximumSwathWidthSupport = 8192.0;
4009 } else {
4010 mode_lib->vba.MaximumSwathWidthSupport = 5120.0;
4011 }
4012 mode_lib->vba.MaximumSwathWidthInDETBuffer =
4013 dml_min(
4014 mode_lib->vba.MaximumSwathWidthSupport,
4015 mode_lib->vba.DETBufferSizeInKByte[0] * 1024.0 / 2.0
4016 / (locals->BytePerPixelInDETY[k]
4017 * locals->MinSwathHeightY[k]
4018 + locals->BytePerPixelInDETC[k]
4019 / 2.0
4020 * locals->MinSwathHeightC[k]));
4021 if (locals->BytePerPixelInDETC[k] == 0.0) {
4022 mode_lib->vba.MaximumSwathWidthInLineBuffer =
4023 mode_lib->vba.LineBufferSize
4024 * dml_max(mode_lib->vba.HRatio[k], 1.0)
4025 / mode_lib->vba.LBBitPerPixel[k]
4026 / (mode_lib->vba.vtaps[k]
4027 + dml_max(
4028 dml_ceil(
4029 mode_lib->vba.VRatio[k],
4030 1.0)
4031 - 2,
4032 0.0));
4033 } else {
4034 mode_lib->vba.MaximumSwathWidthInLineBuffer =
4035 dml_min(
4036 mode_lib->vba.LineBufferSize
4037 * dml_max(
4038 mode_lib->vba.HRatio[k],
4039 1.0)
4040 / mode_lib->vba.LBBitPerPixel[k]
4041 / (mode_lib->vba.vtaps[k]
4042 + dml_max(
4043 dml_ceil(
4044 mode_lib->vba.VRatio[k],
4045 1.0)
4046 - 2,
4047 0.0)),
4048 2.0 * mode_lib->vba.LineBufferSize
4049 * dml_max(
4050 mode_lib->vba.HRatio[k]
4051 / 2.0,
4052 1.0)
4053 / mode_lib->vba.LBBitPerPixel[k]
4054 / (mode_lib->vba.VTAPsChroma[k]
4055 + dml_max(
4056 dml_ceil(
4057 mode_lib->vba.VRatio[k]
4058 / 2.0,
4059 1.0)
4060 - 2,
4061 0.0)));
4062 }
4063 locals->MaximumSwathWidth[k] = dml_min(
4064 mode_lib->vba.MaximumSwathWidthInDETBuffer,
4065 mode_lib->vba.MaximumSwathWidthInLineBuffer);
4066 }
4067 for (i = 0; i <= mode_lib->vba.soc.num_states; i++) {
4068 double MaxMaxDispclkRoundedDown = RoundToDFSGranularityDown(
4069 mode_lib->vba.MaxDispclk[mode_lib->vba.soc.num_states],
4070 mode_lib->vba.DISPCLKDPPCLKVCOSpeed);
4071
4072 for (j = 0; j < 2; j++) {
4073 mode_lib->vba.MaxDispclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown(
4074 mode_lib->vba.MaxDispclk[i],
4075 mode_lib->vba.DISPCLKDPPCLKVCOSpeed);
4076 mode_lib->vba.MaxDppclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown(
4077 mode_lib->vba.MaxDppclk[i],
4078 mode_lib->vba.DISPCLKDPPCLKVCOSpeed);
4079 locals->RequiredDISPCLK[i][j] = 0.0;
4080 locals->DISPCLK_DPPCLK_Support[i][j] = true;
4081 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
4082 mode_lib->vba.PlaneRequiredDISPCLKWithoutODMCombine =
4083 mode_lib->vba.PixelClock[k]
4084 * (1.0
4085 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading
4086 / 100.0)
4087 * (1.0
4088 + mode_lib->vba.DISPCLKRampingMargin
4089 / 100.0);
4090 if (mode_lib->vba.PlaneRequiredDISPCLKWithoutODMCombine >= mode_lib->vba.MaxDispclk[i]
4091 && i == mode_lib->vba.soc.num_states)
4092 mode_lib->vba.PlaneRequiredDISPCLKWithoutODMCombine = mode_lib->vba.PixelClock[k]
4093 * (1 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4094
4095 mode_lib->vba.PlaneRequiredDISPCLKWithODMCombine = mode_lib->vba.PixelClock[k] / 2
4096 * (1 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) * (1 + mode_lib->vba.DISPCLKRampingMargin / 100.0);
4097 if (mode_lib->vba.PlaneRequiredDISPCLKWithODMCombine >= mode_lib->vba.MaxDispclk[i]
4098 && i == mode_lib->vba.soc.num_states)
4099 mode_lib->vba.PlaneRequiredDISPCLKWithODMCombine = mode_lib->vba.PixelClock[k] / 2
4100 * (1 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4101
4102 locals->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
4103 mode_lib->vba.PlaneRequiredDISPCLK = mode_lib->vba.PlaneRequiredDISPCLKWithoutODMCombine;
4104 if (mode_lib->vba.ODMCapability) {
4105 if (locals->PlaneRequiredDISPCLKWithoutODMCombine > MaxMaxDispclkRoundedDown) {
4106 locals->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
4107 mode_lib->vba.PlaneRequiredDISPCLK = mode_lib->vba.PlaneRequiredDISPCLKWithODMCombine;
4108 } else if (locals->DSCEnabled[k] && (locals->HActive[k] > DCN21_MAX_DSC_IMAGE_WIDTH)) {
4109 locals->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
4110 mode_lib->vba.PlaneRequiredDISPCLK = mode_lib->vba.PlaneRequiredDISPCLKWithODMCombine;
4111 } else if (locals->HActive[k] > DCN21_MAX_420_IMAGE_WIDTH && locals->OutputFormat[k] == dm_420) {
4112 locals->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
4113 mode_lib->vba.PlaneRequiredDISPCLK = mode_lib->vba.PlaneRequiredDISPCLKWithODMCombine;
4114 }
4115 }
4116
4117 if (locals->MinDPPCLKUsingSingleDPP[k] * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) <= mode_lib->vba.MaxDppclkRoundedDownToDFSGranularity
4118 && locals->SwathWidthYSingleDPP[k] <= locals->MaximumSwathWidth[k]
4119 && locals->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled) {
4120 locals->NoOfDPP[i][j][k] = 1;
4121 locals->RequiredDPPCLK[i][j][k] =
4122 locals->MinDPPCLKUsingSingleDPP[k] * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4123 } else {
4124 locals->NoOfDPP[i][j][k] = 2;
4125 locals->RequiredDPPCLK[i][j][k] =
4126 locals->MinDPPCLKUsingSingleDPP[k] * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2.0;
4127 }
4128 locals->RequiredDISPCLK[i][j] = dml_max(
4129 locals->RequiredDISPCLK[i][j],
4130 mode_lib->vba.PlaneRequiredDISPCLK);
4131 if ((locals->MinDPPCLKUsingSingleDPP[k] / locals->NoOfDPP[i][j][k] * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4132 > mode_lib->vba.MaxDppclkRoundedDownToDFSGranularity)
4133 || (mode_lib->vba.PlaneRequiredDISPCLK > mode_lib->vba.MaxDispclkRoundedDownToDFSGranularity)) {
4134 locals->DISPCLK_DPPCLK_Support[i][j] = false;
4135 }
4136 }
4137 locals->TotalNumberOfActiveDPP[i][j] = 0.0;
4138 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++)
4139 locals->TotalNumberOfActiveDPP[i][j] = locals->TotalNumberOfActiveDPP[i][j] + locals->NoOfDPP[i][j][k];
4140 if (j == 1) {
4141 while (locals->TotalNumberOfActiveDPP[i][j] < mode_lib->vba.MaxNumDPP
4142 && locals->TotalNumberOfActiveDPP[i][j] < 2 * mode_lib->vba.NumberOfActivePlanes) {
4143 double BWOfNonSplitPlaneOfMaximumBandwidth;
4144 unsigned int NumberOfNonSplitPlaneOfMaximumBandwidth;
4145
4146 BWOfNonSplitPlaneOfMaximumBandwidth = 0;
4147 NumberOfNonSplitPlaneOfMaximumBandwidth = 0;
4148 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
4149 if (locals->ReadBandwidth[k] > BWOfNonSplitPlaneOfMaximumBandwidth && locals->NoOfDPP[i][j][k] == 1) {
4150 BWOfNonSplitPlaneOfMaximumBandwidth = locals->ReadBandwidth[k];
4151 NumberOfNonSplitPlaneOfMaximumBandwidth = k;
4152 }
4153 }
4154 locals->NoOfDPP[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = 2;
4155 locals->RequiredDPPCLK[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] =
4156 locals->MinDPPCLKUsingSingleDPP[NumberOfNonSplitPlaneOfMaximumBandwidth]
4157 * (1 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100) / 2;
4158 locals->TotalNumberOfActiveDPP[i][j] = locals->TotalNumberOfActiveDPP[i][j] + 1;
4159 }
4160 }
4161 if (locals->TotalNumberOfActiveDPP[i][j] > mode_lib->vba.MaxNumDPP) {
4162 locals->RequiredDISPCLK[i][j] = 0.0;
4163 locals->DISPCLK_DPPCLK_Support[i][j] = true;
4164 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
4165 locals->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
4166 if (locals->SwathWidthYSingleDPP[k] <= locals->MaximumSwathWidth[k]) {
4167 locals->NoOfDPP[i][j][k] = 1;
4168 locals->RequiredDPPCLK[i][j][k] = locals->MinDPPCLKUsingSingleDPP[k]
4169 * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4170 } else {
4171 locals->NoOfDPP[i][j][k] = 2;
4172 locals->RequiredDPPCLK[i][j][k] = locals->MinDPPCLKUsingSingleDPP[k]
4173 * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2.0;
4174 }
4175 if (i != mode_lib->vba.soc.num_states) {
4176 mode_lib->vba.PlaneRequiredDISPCLK =
4177 mode_lib->vba.PixelClock[k]
4178 * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4179 * (1.0 + mode_lib->vba.DISPCLKRampingMargin / 100.0);
4180 } else {
4181 mode_lib->vba.PlaneRequiredDISPCLK = mode_lib->vba.PixelClock[k]
4182 * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4183 }
4184 locals->RequiredDISPCLK[i][j] = dml_max(
4185 locals->RequiredDISPCLK[i][j],
4186 mode_lib->vba.PlaneRequiredDISPCLK);
4187 if (locals->MinDPPCLKUsingSingleDPP[k] / locals->NoOfDPP[i][j][k] * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4188 > mode_lib->vba.MaxDppclkRoundedDownToDFSGranularity
4189 || mode_lib->vba.PlaneRequiredDISPCLK > mode_lib->vba.MaxDispclkRoundedDownToDFSGranularity)
4190 locals->DISPCLK_DPPCLK_Support[i][j] = false;
4191 }
4192 locals->TotalNumberOfActiveDPP[i][j] = 0.0;
4193 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++)
4194 locals->TotalNumberOfActiveDPP[i][j] = locals->TotalNumberOfActiveDPP[i][j] + locals->NoOfDPP[i][j][k];
4195 }
4196 locals->RequiredDISPCLK[i][j] = dml_max(
4197 locals->RequiredDISPCLK[i][j],
4198 mode_lib->vba.WritebackRequiredDISPCLK);
4199 if (mode_lib->vba.MaxDispclkRoundedDownToDFSGranularity
4200 < mode_lib->vba.WritebackRequiredDISPCLK) {
4201 locals->DISPCLK_DPPCLK_Support[i][j] = false;
4202 }
4203 }
4204 }
4205 /*Viewport Size Check*/
4206
4207 for (i = 0; i <= mode_lib->vba.soc.num_states; i++) {
4208 locals->ViewportSizeSupport[i][0] = true;
4209 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
4210 if (locals->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
4211 if (dml_min(locals->SwathWidthYSingleDPP[k], dml_round(mode_lib->vba.HActive[k] / 2.0 * mode_lib->vba.HRatio[k]))
4212 > locals->MaximumSwathWidth[k]) {
4213 locals->ViewportSizeSupport[i][0] = false;
4214 }
4215 } else {
4216 if (locals->SwathWidthYSingleDPP[k] / 2.0 > locals->MaximumSwathWidth[k]) {
4217 locals->ViewportSizeSupport[i][0] = false;
4218 }
4219 }
4220 }
4221 }
4222 /*Total Available Pipes Support Check*/
4223
4224 for (i = 0; i <= mode_lib->vba.soc.num_states; i++) {
4225 for (j = 0; j < 2; j++) {
4226 if (locals->TotalNumberOfActiveDPP[i][j] <= mode_lib->vba.MaxNumDPP)
4227 locals->TotalAvailablePipesSupport[i][j] = true;
4228 else
4229 locals->TotalAvailablePipesSupport[i][j] = false;
4230 }
4231 }
4232 /*Total Available OTG Support Check*/
4233
4234 mode_lib->vba.TotalNumberOfActiveOTG = 0.0;
4235 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
4236 if (mode_lib->vba.BlendingAndTiming[k] == k) {
4237 mode_lib->vba.TotalNumberOfActiveOTG = mode_lib->vba.TotalNumberOfActiveOTG
4238 + 1.0;
4239 }
4240 }
4241 if (mode_lib->vba.TotalNumberOfActiveOTG <= mode_lib->vba.MaxNumOTG) {
4242 mode_lib->vba.NumberOfOTGSupport = true;
4243 } else {
4244 mode_lib->vba.NumberOfOTGSupport = false;
4245 }
4246 /*Display IO and DSC Support Check*/
4247
4248 mode_lib->vba.NonsupportedDSCInputBPC = false;
4249 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
4250 if (!(mode_lib->vba.DSCInputBitPerComponent[k] == 12.0
4251 || mode_lib->vba.DSCInputBitPerComponent[k] == 10.0
4252 || mode_lib->vba.DSCInputBitPerComponent[k] == 8.0)) {
4253 mode_lib->vba.NonsupportedDSCInputBPC = true;
4254 }
4255 }
4256 for (i = 0; i <= mode_lib->vba.soc.num_states; i++) {
4257 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
4258 locals->RequiresDSC[i][k] = false;
4259 locals->RequiresFEC[i][k] = 0;
4260 if (mode_lib->vba.BlendingAndTiming[k] == k) {
4261 if (mode_lib->vba.Output[k] == dm_hdmi) {
4262 locals->RequiresDSC[i][k] = false;
4263 locals->RequiresFEC[i][k] = 0;
4264 locals->OutputBppPerState[i][k] = TruncToValidBPP(
4265 dml_min(600.0, mode_lib->vba.PHYCLKPerState[i]) / mode_lib->vba.PixelClockBackEnd[k] * 24,
4266 mode_lib->vba.ForcedOutputLinkBPP[k],
4267 false,
4268 mode_lib->vba.Output[k],
4269 mode_lib->vba.OutputFormat[k],
4270 mode_lib->vba.DSCInputBitPerComponent[k]);
4271 } else if (mode_lib->vba.Output[k] == dm_dp
4272 || mode_lib->vba.Output[k] == dm_edp) {
4273 if (mode_lib->vba.Output[k] == dm_edp) {
4274 mode_lib->vba.EffectiveFECOverhead = 0.0;
4275 } else {
4276 mode_lib->vba.EffectiveFECOverhead =
4277 mode_lib->vba.FECOverhead;
4278 }
4279 if (mode_lib->vba.PHYCLKPerState[i] >= 270.0) {
4280 mode_lib->vba.Outbpp = TruncToValidBPP(
4281 (1.0 - mode_lib->vba.Downspreading / 100.0) * 270.0
4282 * mode_lib->vba.OutputLinkDPLanes[k] / mode_lib->vba.PixelClockBackEnd[k] * 8.0,
4283 mode_lib->vba.ForcedOutputLinkBPP[k],
4284 false,
4285 mode_lib->vba.Output[k],
4286 mode_lib->vba.OutputFormat[k],
4287 mode_lib->vba.DSCInputBitPerComponent[k]);
4288 mode_lib->vba.OutbppDSC = TruncToValidBPP(
4289 (1.0 - mode_lib->vba.Downspreading / 100.0) * (1.0 - mode_lib->vba.EffectiveFECOverhead / 100.0) * 270.0
4290 * mode_lib->vba.OutputLinkDPLanes[k] / mode_lib->vba.PixelClockBackEnd[k] * 8.0,
4291 mode_lib->vba.ForcedOutputLinkBPP[k],
4292 true,
4293 mode_lib->vba.Output[k],
4294 mode_lib->vba.OutputFormat[k],
4295 mode_lib->vba.DSCInputBitPerComponent[k]);
4296 if (mode_lib->vba.DSCEnabled[k] == true) {
4297 locals->RequiresDSC[i][k] = true;
4298 if (mode_lib->vba.Output[k] == dm_dp) {
4299 locals->RequiresFEC[i][k] = true;
4300 } else {
4301 locals->RequiresFEC[i][k] = false;
4302 }
4303 mode_lib->vba.Outbpp = mode_lib->vba.OutbppDSC;
4304 } else {
4305 locals->RequiresDSC[i][k] = false;
4306 locals->RequiresFEC[i][k] = false;
4307 }
4308 locals->OutputBppPerState[i][k] = mode_lib->vba.Outbpp;
4309 }
4310 if (mode_lib->vba.Outbpp == BPP_INVALID && mode_lib->vba.PHYCLKPerState[i] >= 540.0) {
4311 mode_lib->vba.Outbpp = TruncToValidBPP(
4312 (1.0 - mode_lib->vba.Downspreading / 100.0) * 540.0
4313 * mode_lib->vba.OutputLinkDPLanes[k] / mode_lib->vba.PixelClockBackEnd[k] * 8.0,
4314 mode_lib->vba.ForcedOutputLinkBPP[k],
4315 false,
4316 mode_lib->vba.Output[k],
4317 mode_lib->vba.OutputFormat[k],
4318 mode_lib->vba.DSCInputBitPerComponent[k]);
4319 mode_lib->vba.OutbppDSC = TruncToValidBPP(
4320 (1.0 - mode_lib->vba.Downspreading / 100.0) * (1.0 - mode_lib->vba.EffectiveFECOverhead / 100.0) * 540.0
4321 * mode_lib->vba.OutputLinkDPLanes[k] / mode_lib->vba.PixelClockBackEnd[k] * 8.0,
4322 mode_lib->vba.ForcedOutputLinkBPP[k],
4323 true,
4324 mode_lib->vba.Output[k],
4325 mode_lib->vba.OutputFormat[k],
4326 mode_lib->vba.DSCInputBitPerComponent[k]);
4327 if (mode_lib->vba.DSCEnabled[k] == true) {
4328 locals->RequiresDSC[i][k] = true;
4329 if (mode_lib->vba.Output[k] == dm_dp) {
4330 locals->RequiresFEC[i][k] = true;
4331 } else {
4332 locals->RequiresFEC[i][k] = false;
4333 }
4334 mode_lib->vba.Outbpp = mode_lib->vba.OutbppDSC;
4335 } else {
4336 locals->RequiresDSC[i][k] = false;
4337 locals->RequiresFEC[i][k] = false;
4338 }
4339 locals->OutputBppPerState[i][k] = mode_lib->vba.Outbpp;
4340 }
4341 if (mode_lib->vba.Outbpp == BPP_INVALID
4342 && mode_lib->vba.PHYCLKPerState[i]
4343 >= 810.0) {
4344 mode_lib->vba.Outbpp = TruncToValidBPP(
4345 (1.0 - mode_lib->vba.Downspreading / 100.0) * 810.0
4346 * mode_lib->vba.OutputLinkDPLanes[k] / mode_lib->vba.PixelClockBackEnd[k] * 8.0,
4347 mode_lib->vba.ForcedOutputLinkBPP[k],
4348 false,
4349 mode_lib->vba.Output[k],
4350 mode_lib->vba.OutputFormat[k],
4351 mode_lib->vba.DSCInputBitPerComponent[k]);
4352 mode_lib->vba.OutbppDSC = TruncToValidBPP(
4353 (1.0 - mode_lib->vba.Downspreading / 100.0) * (1.0 - mode_lib->vba.EffectiveFECOverhead / 100.0) * 810.0
4354 * mode_lib->vba.OutputLinkDPLanes[k] / mode_lib->vba.PixelClockBackEnd[k] * 8.0,
4355 mode_lib->vba.ForcedOutputLinkBPP[k],
4356 true,
4357 mode_lib->vba.Output[k],
4358 mode_lib->vba.OutputFormat[k],
4359 mode_lib->vba.DSCInputBitPerComponent[k]);
4360 if (mode_lib->vba.DSCEnabled[k] == true || mode_lib->vba.Outbpp == BPP_INVALID) {
4361 locals->RequiresDSC[i][k] = true;
4362 if (mode_lib->vba.Output[k] == dm_dp) {
4363 locals->RequiresFEC[i][k] = true;
4364 } else {
4365 locals->RequiresFEC[i][k] = false;
4366 }
4367 mode_lib->vba.Outbpp = mode_lib->vba.OutbppDSC;
4368 } else {
4369 locals->RequiresDSC[i][k] = false;
4370 locals->RequiresFEC[i][k] = false;
4371 }
4372 locals->OutputBppPerState[i][k] =
4373 mode_lib->vba.Outbpp;
4374 }
4375 }
4376 } else {
4377 locals->OutputBppPerState[i][k] = BPP_BLENDED_PIPE;
4378 }
4379 }
4380 }
4381 for (i = 0; i <= mode_lib->vba.soc.num_states; i++) {
4382 locals->DIOSupport[i] = true;
4383 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
4384 if (!mode_lib->vba.skip_dio_check[k]
4385 && (locals->OutputBppPerState[i][k] == BPP_INVALID
4386 || (mode_lib->vba.OutputFormat[k] == dm_420
4387 && mode_lib->vba.Interlace[k] == true
4388 && mode_lib->vba.ProgressiveToInterlaceUnitInOPP == true))) {
4389 locals->DIOSupport[i] = false;
4390 }
4391 }
4392 }
4393 for (i = 0; i <= mode_lib->vba.soc.num_states; i++) {
4394 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
4395 locals->DSCCLKRequiredMoreThanSupported[i] = false;
4396 if (mode_lib->vba.BlendingAndTiming[k] == k) {
4397 if ((mode_lib->vba.Output[k] == dm_dp
4398 || mode_lib->vba.Output[k] == dm_edp)) {
4399 if (mode_lib->vba.OutputFormat[k] == dm_420
4400 || mode_lib->vba.OutputFormat[k]
4401 == dm_n422) {
4402 mode_lib->vba.DSCFormatFactor = 2;
4403 } else {
4404 mode_lib->vba.DSCFormatFactor = 1;
4405 }
4406 if (locals->RequiresDSC[i][k] == true) {
4407 if (locals->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
4408 if (mode_lib->vba.PixelClockBackEnd[k] / 6.0 / mode_lib->vba.DSCFormatFactor
4409 > (1.0 - mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) * mode_lib->vba.MaxDSCCLK[i]) {
4410 locals->DSCCLKRequiredMoreThanSupported[i] =
4411 true;
4412 }
4413 } else {
4414 if (mode_lib->vba.PixelClockBackEnd[k] / 3.0 / mode_lib->vba.DSCFormatFactor
4415 > (1.0 - mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) * mode_lib->vba.MaxDSCCLK[i]) {
4416 locals->DSCCLKRequiredMoreThanSupported[i] =
4417 true;
4418 }
4419 }
4420 }
4421 }
4422 }
4423 }
4424 }
4425 for (i = 0; i <= mode_lib->vba.soc.num_states; i++) {
4426 locals->NotEnoughDSCUnits[i] = false;
4427 mode_lib->vba.TotalDSCUnitsRequired = 0.0;
4428 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
4429 if (locals->RequiresDSC[i][k] == true) {
4430 if (locals->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
4431 mode_lib->vba.TotalDSCUnitsRequired =
4432 mode_lib->vba.TotalDSCUnitsRequired + 2.0;
4433 } else {
4434 mode_lib->vba.TotalDSCUnitsRequired =
4435 mode_lib->vba.TotalDSCUnitsRequired + 1.0;
4436 }
4437 }
4438 }
4439 if (mode_lib->vba.TotalDSCUnitsRequired > mode_lib->vba.NumberOfDSC) {
4440 locals->NotEnoughDSCUnits[i] = true;
4441 }
4442 }
4443 /*DSC Delay per state*/
4444
4445 for (i = 0; i <= mode_lib->vba.soc.num_states; i++) {
4446 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
4447 if (mode_lib->vba.BlendingAndTiming[k] != k) {
4448 mode_lib->vba.slices = 0;
4449 } else if (locals->RequiresDSC[i][k] == 0
4450 || locals->RequiresDSC[i][k] == false) {
4451 mode_lib->vba.slices = 0;
4452 } else if (mode_lib->vba.PixelClockBackEnd[k] > 3200.0) {
4453 mode_lib->vba.slices = dml_ceil(
4454 mode_lib->vba.PixelClockBackEnd[k] / 400.0,
4455 4.0);
4456 } else if (mode_lib->vba.PixelClockBackEnd[k] > 1360.0) {
4457 mode_lib->vba.slices = 8.0;
4458 } else if (mode_lib->vba.PixelClockBackEnd[k] > 680.0) {
4459 mode_lib->vba.slices = 4.0;
4460 } else if (mode_lib->vba.PixelClockBackEnd[k] > 340.0) {
4461 mode_lib->vba.slices = 2.0;
4462 } else {
4463 mode_lib->vba.slices = 1.0;
4464 }
4465 if (locals->OutputBppPerState[i][k] == BPP_BLENDED_PIPE
4466 || locals->OutputBppPerState[i][k] == BPP_INVALID) {
4467 mode_lib->vba.bpp = 0.0;
4468 } else {
4469 mode_lib->vba.bpp = locals->OutputBppPerState[i][k];
4470 }
4471 if (locals->RequiresDSC[i][k] == true && mode_lib->vba.bpp != 0.0) {
4472 if (locals->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled) {
4473 locals->DSCDelayPerState[i][k] =
4474 dscceComputeDelay(
4475 mode_lib->vba.DSCInputBitPerComponent[k],
4476 mode_lib->vba.bpp,
4477 dml_ceil(
4478 mode_lib->vba.HActive[k]
4479 / mode_lib->vba.slices,
4480 1.0),
4481 mode_lib->vba.slices,
4482 mode_lib->vba.OutputFormat[k])
4483 + dscComputeDelay(
4484 mode_lib->vba.OutputFormat[k]);
4485 } else {
4486 locals->DSCDelayPerState[i][k] =
4487 2.0 * (dscceComputeDelay(
4488 mode_lib->vba.DSCInputBitPerComponent[k],
4489 mode_lib->vba.bpp,
4490 dml_ceil(mode_lib->vba.HActive[k] / mode_lib->vba.slices, 1.0),
4491 mode_lib->vba.slices / 2,
4492 mode_lib->vba.OutputFormat[k])
4493 + dscComputeDelay(mode_lib->vba.OutputFormat[k]));
4494 }
4495 locals->DSCDelayPerState[i][k] =
4496 locals->DSCDelayPerState[i][k] * mode_lib->vba.PixelClock[k] / mode_lib->vba.PixelClockBackEnd[k];
4497 } else {
4498 locals->DSCDelayPerState[i][k] = 0.0;
4499 }
4500 }
4501 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
4502 for (m = 0; m <= mode_lib->vba.NumberOfActivePlanes - 1; m++) {
4503 for (j = 0; j <= mode_lib->vba.NumberOfActivePlanes - 1; j++) {
4504 if (mode_lib->vba.BlendingAndTiming[k] == m && locals->RequiresDSC[i][m] == true)
4505 locals->DSCDelayPerState[i][k] = locals->DSCDelayPerState[i][m];
4506 }
4507 }
4508 }
4509 }
4510
4511 //Prefetch Check
4512 for (i = 0; i <= mode_lib->vba.soc.num_states; ++i) {
4513 for (j = 0; j <= 1; ++j) {
4514 locals->TotalNumberOfDCCActiveDPP[i][j] = 0;
4515 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
4516 if (mode_lib->vba.DCCEnable[k] == true)
4517 locals->TotalNumberOfDCCActiveDPP[i][j] = locals->TotalNumberOfDCCActiveDPP[i][j] + locals->NoOfDPP[i][j][k];
4518 }
4519 }
4520 }
4521
4522 mode_lib->vba.UrgentLatency = dml_max3(
4523 mode_lib->vba.UrgentLatencyPixelDataOnly,
4524 mode_lib->vba.UrgentLatencyPixelMixedWithVMData,
4525 mode_lib->vba.UrgentLatencyVMDataOnly);
4526 mode_lib->vba.PrefetchERROR = CalculateMinAndMaxPrefetchMode(
4527 mode_lib->vba.AllowDRAMSelfRefreshOrDRAMClockChangeInVblank,
4528 &mode_lib->vba.MinPrefetchMode,
4529 &mode_lib->vba.MaxPrefetchMode);
4530
4531 for (i = 0; i <= mode_lib->vba.soc.num_states; i++) {
4532 for (j = 0; j < 2; j++) {
4533 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
4534 locals->RequiredDPPCLKThisState[k] = locals->RequiredDPPCLK[i][j][k];
4535 locals->NoOfDPPThisState[k] = locals->NoOfDPP[i][j][k];
4536 if (locals->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
4537 locals->SwathWidthYThisState[k] =
4538 dml_min(locals->SwathWidthYSingleDPP[k], dml_round(mode_lib->vba.HActive[k] / 2.0 * mode_lib->vba.HRatio[k]));
4539 } else {
4540 locals->SwathWidthYThisState[k] = locals->SwathWidthYSingleDPP[k] / locals->NoOfDPP[i][j][k];
4541 }
4542 mode_lib->vba.SwathWidthGranularityY = 256.0
4543 / dml_ceil(locals->BytePerPixelInDETY[k], 1.0)
4544 / locals->MaxSwathHeightY[k];
4545 mode_lib->vba.RoundedUpMaxSwathSizeBytesY =
4546 (dml_ceil(locals->SwathWidthYThisState[k] - 1.0, mode_lib->vba.SwathWidthGranularityY)
4547 + mode_lib->vba.SwathWidthGranularityY) * locals->BytePerPixelInDETY[k] * locals->MaxSwathHeightY[k];
4548 if (mode_lib->vba.SourcePixelFormat[k] == dm_420_10) {
4549 mode_lib->vba.RoundedUpMaxSwathSizeBytesY = dml_ceil(
4550 mode_lib->vba.RoundedUpMaxSwathSizeBytesY,
4551 256.0) + 256;
4552 }
4553 if (locals->MaxSwathHeightC[k] > 0.0) {
4554 mode_lib->vba.SwathWidthGranularityC = 256.0 / dml_ceil(locals->BytePerPixelInDETC[k], 2.0) / locals->MaxSwathHeightC[k];
4555 mode_lib->vba.RoundedUpMaxSwathSizeBytesC = (dml_ceil(locals->SwathWidthYThisState[k] / 2.0 - 1.0, mode_lib->vba.SwathWidthGranularityC)
4556 + mode_lib->vba.SwathWidthGranularityC) * locals->BytePerPixelInDETC[k] * locals->MaxSwathHeightC[k];
4557 if (mode_lib->vba.SourcePixelFormat[k] == dm_420_10) {
4558 mode_lib->vba.RoundedUpMaxSwathSizeBytesC = dml_ceil(mode_lib->vba.RoundedUpMaxSwathSizeBytesC, 256.0) + 256;
4559 }
4560 } else {
4561 mode_lib->vba.RoundedUpMaxSwathSizeBytesC = 0.0;
4562 }
4563 if (mode_lib->vba.RoundedUpMaxSwathSizeBytesY + mode_lib->vba.RoundedUpMaxSwathSizeBytesC
4564 <= mode_lib->vba.DETBufferSizeInKByte[0] * 1024.0 / 2.0) {
4565 locals->SwathHeightYThisState[k] = locals->MaxSwathHeightY[k];
4566 locals->SwathHeightCThisState[k] = locals->MaxSwathHeightC[k];
4567 } else {
4568 locals->SwathHeightYThisState[k] =
4569 locals->MinSwathHeightY[k];
4570 locals->SwathHeightCThisState[k] =
4571 locals->MinSwathHeightC[k];
4572 }
4573 }
4574
4575 CalculateDCFCLKDeepSleep(
4576 mode_lib,
4577 mode_lib->vba.NumberOfActivePlanes,
4578 locals->BytePerPixelInDETY,
4579 locals->BytePerPixelInDETC,
4580 mode_lib->vba.VRatio,
4581 locals->SwathWidthYThisState,
4582 locals->NoOfDPPThisState,
4583 mode_lib->vba.HRatio,
4584 mode_lib->vba.PixelClock,
4585 locals->PSCL_FACTOR,
4586 locals->PSCL_FACTOR_CHROMA,
4587 locals->RequiredDPPCLKThisState,
4588 &mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0]);
4589
4590 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
4591 if ((mode_lib->vba.SourcePixelFormat[k] != dm_444_64
4592 && mode_lib->vba.SourcePixelFormat[k] != dm_444_32
4593 && mode_lib->vba.SourcePixelFormat[k] != dm_444_16
4594 && mode_lib->vba.SourcePixelFormat[k] != dm_mono_16
4595 && mode_lib->vba.SourcePixelFormat[k] != dm_mono_8)) {
4596 mode_lib->vba.PDEAndMetaPTEBytesPerFrameC = CalculateVMAndRowBytes(
4597 mode_lib,
4598 mode_lib->vba.DCCEnable[k],
4599 locals->Read256BlockHeightC[k],
4600 locals->Read256BlockWidthC[k],
4601 mode_lib->vba.SourcePixelFormat[k],
4602 mode_lib->vba.SurfaceTiling[k],
4603 dml_ceil(locals->BytePerPixelInDETC[k], 2.0),
4604 mode_lib->vba.SourceScan[k],
4605 mode_lib->vba.ViewportWidth[k] / 2.0,
4606 mode_lib->vba.ViewportHeight[k] / 2.0,
4607 locals->SwathWidthYThisState[k] / 2.0,
4608 mode_lib->vba.GPUVMEnable,
4609 mode_lib->vba.HostVMEnable,
4610 mode_lib->vba.HostVMMaxPageTableLevels,
4611 mode_lib->vba.HostVMCachedPageTableLevels,
4612 mode_lib->vba.VMMPageSize,
4613 mode_lib->vba.PTEBufferSizeInRequestsChroma,
4614 mode_lib->vba.PitchC[k],
4615 0.0,
4616 &locals->MacroTileWidthC[k],
4617 &mode_lib->vba.MetaRowBytesC,
4618 &mode_lib->vba.DPTEBytesPerRowC,
4619 &locals->PTEBufferSizeNotExceededC[i][j][k],
4620 locals->dpte_row_width_chroma_ub,
4621 &locals->dpte_row_height_chroma[k],
4622 &locals->meta_req_width_chroma[k],
4623 &locals->meta_req_height_chroma[k],
4624 &locals->meta_row_width_chroma[k],
4625 &locals->meta_row_height_chroma[k],
4626 &locals->vm_group_bytes_chroma,
4627 &locals->dpte_group_bytes_chroma,
4628 locals->PixelPTEReqWidthC,
4629 locals->PixelPTEReqHeightC,
4630 locals->PTERequestSizeC,
4631 locals->dpde0_bytes_per_frame_ub_c,
4632 locals->meta_pte_bytes_per_frame_ub_c);
4633 locals->PrefetchLinesC[0][0][k] = CalculatePrefetchSourceLines(
4634 mode_lib,
4635 mode_lib->vba.VRatio[k]/2,
4636 mode_lib->vba.VTAPsChroma[k],
4637 mode_lib->vba.Interlace[k],
4638 mode_lib->vba.ProgressiveToInterlaceUnitInOPP,
4639 locals->SwathHeightCThisState[k],
4640 mode_lib->vba.ViewportYStartC[k],
4641 &locals->PrefillC[k],
4642 &locals->MaxNumSwC[k]);
4643 locals->PTEBufferSizeInRequestsForLuma = mode_lib->vba.PTEBufferSizeInRequestsLuma;
4644 } else {
4645 mode_lib->vba.PDEAndMetaPTEBytesPerFrameC = 0.0;
4646 mode_lib->vba.MetaRowBytesC = 0.0;
4647 mode_lib->vba.DPTEBytesPerRowC = 0.0;
4648 locals->PrefetchLinesC[0][0][k] = 0.0;
4649 locals->PTEBufferSizeNotExceededC[i][j][k] = true;
4650 locals->PTEBufferSizeInRequestsForLuma = mode_lib->vba.PTEBufferSizeInRequestsLuma + mode_lib->vba.PTEBufferSizeInRequestsChroma;
4651 }
4652 mode_lib->vba.PDEAndMetaPTEBytesPerFrameY = CalculateVMAndRowBytes(
4653 mode_lib,
4654 mode_lib->vba.DCCEnable[k],
4655 locals->Read256BlockHeightY[k],
4656 locals->Read256BlockWidthY[k],
4657 mode_lib->vba.SourcePixelFormat[k],
4658 mode_lib->vba.SurfaceTiling[k],
4659 dml_ceil(locals->BytePerPixelInDETY[k], 1.0),
4660 mode_lib->vba.SourceScan[k],
4661 mode_lib->vba.ViewportWidth[k],
4662 mode_lib->vba.ViewportHeight[k],
4663 locals->SwathWidthYThisState[k],
4664 mode_lib->vba.GPUVMEnable,
4665 mode_lib->vba.HostVMEnable,
4666 mode_lib->vba.HostVMMaxPageTableLevels,
4667 mode_lib->vba.HostVMCachedPageTableLevels,
4668 mode_lib->vba.VMMPageSize,
4669 locals->PTEBufferSizeInRequestsForLuma,
4670 mode_lib->vba.PitchY[k],
4671 mode_lib->vba.DCCMetaPitchY[k],
4672 &locals->MacroTileWidthY[k],
4673 &mode_lib->vba.MetaRowBytesY,
4674 &mode_lib->vba.DPTEBytesPerRowY,
4675 &locals->PTEBufferSizeNotExceededY[i][j][k],
4676 locals->dpte_row_width_luma_ub,
4677 &locals->dpte_row_height[k],
4678 &locals->meta_req_width[k],
4679 &locals->meta_req_height[k],
4680 &locals->meta_row_width[k],
4681 &locals->meta_row_height[k],
4682 &locals->vm_group_bytes[k],
4683 &locals->dpte_group_bytes[k],
4684 locals->PixelPTEReqWidthY,
4685 locals->PixelPTEReqHeightY,
4686 locals->PTERequestSizeY,
4687 locals->dpde0_bytes_per_frame_ub_l,
4688 locals->meta_pte_bytes_per_frame_ub_l);
4689 locals->PrefetchLinesY[0][0][k] = CalculatePrefetchSourceLines(
4690 mode_lib,
4691 mode_lib->vba.VRatio[k],
4692 mode_lib->vba.vtaps[k],
4693 mode_lib->vba.Interlace[k],
4694 mode_lib->vba.ProgressiveToInterlaceUnitInOPP,
4695 locals->SwathHeightYThisState[k],
4696 mode_lib->vba.ViewportYStartY[k],
4697 &locals->PrefillY[k],
4698 &locals->MaxNumSwY[k]);
4699 locals->PDEAndMetaPTEBytesPerFrame[0][0][k] =
4700 mode_lib->vba.PDEAndMetaPTEBytesPerFrameY + mode_lib->vba.PDEAndMetaPTEBytesPerFrameC;
4701 locals->MetaRowBytes[0][0][k] = mode_lib->vba.MetaRowBytesY + mode_lib->vba.MetaRowBytesC;
4702 locals->DPTEBytesPerRow[0][0][k] = mode_lib->vba.DPTEBytesPerRowY + mode_lib->vba.DPTEBytesPerRowC;
4703
4704 CalculateActiveRowBandwidth(
4705 mode_lib->vba.GPUVMEnable,
4706 mode_lib->vba.SourcePixelFormat[k],
4707 mode_lib->vba.VRatio[k],
4708 mode_lib->vba.DCCEnable[k],
4709 mode_lib->vba.HTotal[k] /
4710 mode_lib->vba.PixelClock[k],
4711 mode_lib->vba.MetaRowBytesY,
4712 mode_lib->vba.MetaRowBytesC,
4713 locals->meta_row_height[k],
4714 locals->meta_row_height_chroma[k],
4715 mode_lib->vba.DPTEBytesPerRowY,
4716 mode_lib->vba.DPTEBytesPerRowC,
4717 locals->dpte_row_height[k],
4718 locals->dpte_row_height_chroma[k],
4719 &locals->meta_row_bw[k],
4720 &locals->dpte_row_bw[k]);
4721 }
4722 mode_lib->vba.ExtraLatency = CalculateExtraLatency(
4723 locals->UrgentRoundTripAndOutOfOrderLatencyPerState[i],
4724 locals->TotalNumberOfActiveDPP[i][j],
4725 mode_lib->vba.PixelChunkSizeInKByte,
4726 locals->TotalNumberOfDCCActiveDPP[i][j],
4727 mode_lib->vba.MetaChunkSize,
4728 locals->ReturnBWPerState[i][0],
4729 mode_lib->vba.GPUVMEnable,
4730 mode_lib->vba.HostVMEnable,
4731 mode_lib->vba.NumberOfActivePlanes,
4732 locals->NoOfDPPThisState,
4733 locals->dpte_group_bytes,
4734 mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
4735 mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
4736 mode_lib->vba.HostVMMaxPageTableLevels,
4737 mode_lib->vba.HostVMCachedPageTableLevels);
4738
4739 mode_lib->vba.TimeCalc = 24.0 / mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0];
4740 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
4741 if (mode_lib->vba.BlendingAndTiming[k] == k) {
4742 if (mode_lib->vba.WritebackEnable[k] == true) {
4743 locals->WritebackDelay[i][k] = mode_lib->vba.WritebackLatency
4744 + CalculateWriteBackDelay(
4745 mode_lib->vba.WritebackPixelFormat[k],
4746 mode_lib->vba.WritebackHRatio[k],
4747 mode_lib->vba.WritebackVRatio[k],
4748 mode_lib->vba.WritebackLumaHTaps[k],
4749 mode_lib->vba.WritebackLumaVTaps[k],
4750 mode_lib->vba.WritebackChromaHTaps[k],
4751 mode_lib->vba.WritebackChromaVTaps[k],
4752 mode_lib->vba.WritebackDestinationWidth[k]) / locals->RequiredDISPCLK[i][j];
4753 } else {
4754 locals->WritebackDelay[i][k] = 0.0;
4755 }
4756 for (m = 0; m <= mode_lib->vba.NumberOfActivePlanes - 1; m++) {
4757 if (mode_lib->vba.BlendingAndTiming[m] == k
4758 && mode_lib->vba.WritebackEnable[m]
4759 == true) {
4760 locals->WritebackDelay[i][k] = dml_max(locals->WritebackDelay[i][k],
4761 mode_lib->vba.WritebackLatency + CalculateWriteBackDelay(
4762 mode_lib->vba.WritebackPixelFormat[m],
4763 mode_lib->vba.WritebackHRatio[m],
4764 mode_lib->vba.WritebackVRatio[m],
4765 mode_lib->vba.WritebackLumaHTaps[m],
4766 mode_lib->vba.WritebackLumaVTaps[m],
4767 mode_lib->vba.WritebackChromaHTaps[m],
4768 mode_lib->vba.WritebackChromaVTaps[m],
4769 mode_lib->vba.WritebackDestinationWidth[m]) / locals->RequiredDISPCLK[i][j]);
4770 }
4771 }
4772 }
4773 }
4774 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
4775 for (m = 0; m <= mode_lib->vba.NumberOfActivePlanes - 1; m++) {
4776 if (mode_lib->vba.BlendingAndTiming[k] == m) {
4777 locals->WritebackDelay[i][k] = locals->WritebackDelay[i][m];
4778 }
4779 }
4780 }
4781 mode_lib->vba.MaxMaxVStartup[0][0] = 0;
4782 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
4783 locals->MaximumVStartup[0][0][k] = mode_lib->vba.VTotal[k] - mode_lib->vba.VActive[k]
4784 - dml_max(1.0, dml_ceil(locals->WritebackDelay[i][k] / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]), 1.0));
4785 mode_lib->vba.MaxMaxVStartup[0][0] = dml_max(mode_lib->vba.MaxMaxVStartup[0][0], locals->MaximumVStartup[0][0][k]);
4786 }
4787
4788 mode_lib->vba.NextPrefetchMode = mode_lib->vba.MinPrefetchMode;
4789 mode_lib->vba.NextMaxVStartup = mode_lib->vba.MaxMaxVStartup[0][0];
4790 do {
4791 mode_lib->vba.PrefetchMode[i][j] = mode_lib->vba.NextPrefetchMode;
4792 mode_lib->vba.MaxVStartup = mode_lib->vba.NextMaxVStartup;
4793
4794 mode_lib->vba.TWait = CalculateTWait(
4795 mode_lib->vba.PrefetchMode[i][j],
4796 mode_lib->vba.DRAMClockChangeLatency,
4797 mode_lib->vba.UrgentLatency,
4798 mode_lib->vba.SREnterPlusExitTime);
4799 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++)
4800 CalculatePrefetchSchedulePerPlane(mode_lib, i, j, k);
4801
4802 mode_lib->vba.MaximumReadBandwidthWithoutPrefetch = 0.0;
4803 mode_lib->vba.MaximumReadBandwidthWithPrefetch = 0.0;
4804 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
4805 unsigned int m;
4806
4807 locals->cursor_bw[k] = 0;
4808 locals->cursor_bw_pre[k] = 0;
4809 for (m = 0; m < mode_lib->vba.NumberOfCursors[k]; m++) {
4810 locals->cursor_bw[k] = mode_lib->vba.CursorWidth[k][m] * mode_lib->vba.CursorBPP[k][m]
4811 / 8.0 / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) * mode_lib->vba.VRatio[k];
4812 locals->cursor_bw_pre[k] = mode_lib->vba.CursorWidth[k][m] * mode_lib->vba.CursorBPP[k][m]
4813 / 8.0 / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) * locals->VRatioPreY[i][j][k];
4814 }
4815
4816 CalculateUrgentBurstFactor(
4817 mode_lib->vba.DETBufferSizeInKByte[0],
4818 locals->SwathHeightYThisState[k],
4819 locals->SwathHeightCThisState[k],
4820 locals->SwathWidthYThisState[k],
4821 mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k],
4822 mode_lib->vba.UrgentLatency,
4823 mode_lib->vba.CursorBufferSize,
4824 mode_lib->vba.CursorWidth[k][0] + mode_lib->vba.CursorWidth[k][1],
4825 dml_max(mode_lib->vba.CursorBPP[k][0], mode_lib->vba.CursorBPP[k][1]),
4826 mode_lib->vba.VRatio[k],
4827 locals->VRatioPreY[i][j][k],
4828 locals->VRatioPreC[i][j][k],
4829 locals->BytePerPixelInDETY[k],
4830 locals->BytePerPixelInDETC[k],
4831 &locals->UrgentBurstFactorCursor[k],
4832 &locals->UrgentBurstFactorCursorPre[k],
4833 &locals->UrgentBurstFactorLuma[k],
4834 &locals->UrgentBurstFactorLumaPre[k],
4835 &locals->UrgentBurstFactorChroma[k],
4836 &locals->UrgentBurstFactorChromaPre[k],
4837 &locals->NotEnoughUrgentLatencyHiding[0][0],
4838 &locals->NotEnoughUrgentLatencyHidingPre);
4839
4840 if (mode_lib->vba.UseUrgentBurstBandwidth == false) {
4841 locals->UrgentBurstFactorCursor[k] = 1;
4842 locals->UrgentBurstFactorCursorPre[k] = 1;
4843 locals->UrgentBurstFactorLuma[k] = 1;
4844 locals->UrgentBurstFactorLumaPre[k] = 1;
4845 locals->UrgentBurstFactorChroma[k] = 1;
4846 locals->UrgentBurstFactorChromaPre[k] = 1;
4847 }
4848
4849 mode_lib->vba.MaximumReadBandwidthWithoutPrefetch = mode_lib->vba.MaximumReadBandwidthWithoutPrefetch
4850 + locals->cursor_bw[k] * locals->UrgentBurstFactorCursor[k] + locals->ReadBandwidthLuma[k]
4851 * locals->UrgentBurstFactorLuma[k] + locals->ReadBandwidthChroma[k]
4852 * locals->UrgentBurstFactorChroma[k] + locals->meta_row_bw[k] + locals->dpte_row_bw[k];
4853 mode_lib->vba.MaximumReadBandwidthWithPrefetch = mode_lib->vba.MaximumReadBandwidthWithPrefetch
4854 + dml_max3(locals->prefetch_vmrow_bw[k],
4855 locals->ReadBandwidthLuma[k] * locals->UrgentBurstFactorLuma[k] + locals->ReadBandwidthChroma[k]
4856 * locals->UrgentBurstFactorChroma[k] + locals->cursor_bw[k] * locals->UrgentBurstFactorCursor[k]
4857 + locals->meta_row_bw[k] + locals->dpte_row_bw[k],
4858 locals->RequiredPrefetchPixelDataBWLuma[i][j][k] * locals->UrgentBurstFactorLumaPre[k]
4859 + locals->RequiredPrefetchPixelDataBWChroma[i][j][k] * locals->UrgentBurstFactorChromaPre[k]
4860 + locals->cursor_bw_pre[k] * locals->UrgentBurstFactorCursorPre[k]);
4861 }
4862 locals->BandwidthWithoutPrefetchSupported[i][0] = true;
4863 if (mode_lib->vba.MaximumReadBandwidthWithoutPrefetch > locals->ReturnBWPerState[i][0]
4864 || locals->NotEnoughUrgentLatencyHiding[0][0] == 1) {
4865 locals->BandwidthWithoutPrefetchSupported[i][0] = false;
4866 }
4867
4868 locals->PrefetchSupported[i][j] = true;
4869 if (mode_lib->vba.MaximumReadBandwidthWithPrefetch > locals->ReturnBWPerState[i][0]
4870 || locals->NotEnoughUrgentLatencyHiding[0][0] == 1
4871 || locals->NotEnoughUrgentLatencyHidingPre == 1) {
4872 locals->PrefetchSupported[i][j] = false;
4873 }
4874 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
4875 if (locals->LineTimesForPrefetch[k] < 2.0
4876 || locals->LinesForMetaPTE[k] >= 32.0
4877 || locals->LinesForMetaAndDPTERow[k] >= 16.0
4878 || mode_lib->vba.IsErrorResult[i][j][k] == true) {
4879 locals->PrefetchSupported[i][j] = false;
4880 }
4881 }
4882 locals->VRatioInPrefetchSupported[i][j] = true;
4883 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
4884 if (locals->VRatioPreY[i][j][k] > 4.0
4885 || locals->VRatioPreC[i][j][k] > 4.0
4886 || mode_lib->vba.IsErrorResult[i][j][k] == true) {
4887 locals->VRatioInPrefetchSupported[i][j] = false;
4888 }
4889 }
4890 mode_lib->vba.AnyLinesForVMOrRowTooLarge = false;
4891 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
4892 if (locals->LinesForMetaAndDPTERow[k] >= 16 || locals->LinesForMetaPTE[k] >= 32) {
4893 mode_lib->vba.AnyLinesForVMOrRowTooLarge = true;
4894 }
4895 }
4896
4897 if (mode_lib->vba.MaxVStartup <= 13 || mode_lib->vba.AnyLinesForVMOrRowTooLarge == false) {
4898 mode_lib->vba.NextMaxVStartup = mode_lib->vba.MaxMaxVStartup[0][0];
4899 mode_lib->vba.NextPrefetchMode = mode_lib->vba.NextPrefetchMode + 1;
4900 } else {
4901 mode_lib->vba.NextMaxVStartup = mode_lib->vba.NextMaxVStartup - 1;
4902 }
4903 } while ((locals->PrefetchSupported[i][j] != true || locals->VRatioInPrefetchSupported[i][j] != true)
4904 && (mode_lib->vba.NextMaxVStartup != mode_lib->vba.MaxMaxVStartup[0][0]
4905 || mode_lib->vba.NextPrefetchMode <= mode_lib->vba.MaxPrefetchMode));
4906
4907 if (locals->PrefetchSupported[i][j] == true && locals->VRatioInPrefetchSupported[i][j] == true) {
4908 mode_lib->vba.BandwidthAvailableForImmediateFlip = locals->ReturnBWPerState[i][0];
4909 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
4910 mode_lib->vba.BandwidthAvailableForImmediateFlip = mode_lib->vba.BandwidthAvailableForImmediateFlip
4911 - dml_max(locals->ReadBandwidthLuma[k] * locals->UrgentBurstFactorLuma[k]
4912 + locals->ReadBandwidthChroma[k] * locals->UrgentBurstFactorChroma[k]
4913 + locals->cursor_bw[k] * locals->UrgentBurstFactorCursor[k],
4914 locals->RequiredPrefetchPixelDataBWLuma[i][j][k] * locals->UrgentBurstFactorLumaPre[k]
4915 + locals->RequiredPrefetchPixelDataBWChroma[i][j][k] * locals->UrgentBurstFactorChromaPre[k]
4916 + locals->cursor_bw_pre[k] * locals->UrgentBurstFactorCursorPre[k]);
4917 }
4918 mode_lib->vba.TotImmediateFlipBytes = 0.0;
4919 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
4920 mode_lib->vba.TotImmediateFlipBytes = mode_lib->vba.TotImmediateFlipBytes
4921 + locals->PDEAndMetaPTEBytesPerFrame[0][0][k] + locals->MetaRowBytes[0][0][k] + locals->DPTEBytesPerRow[0][0][k];
4922 }
4923
4924 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
4925 CalculateFlipSchedule(
4926 mode_lib,
4927 mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
4928 mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
4929 mode_lib->vba.ExtraLatency,
4930 mode_lib->vba.UrgentLatency,
4931 mode_lib->vba.GPUVMMaxPageTableLevels,
4932 mode_lib->vba.HostVMEnable,
4933 mode_lib->vba.HostVMMaxPageTableLevels,
4934 mode_lib->vba.HostVMCachedPageTableLevels,
4935 mode_lib->vba.GPUVMEnable,
4936 locals->PDEAndMetaPTEBytesPerFrame[0][0][k],
4937 locals->MetaRowBytes[0][0][k],
4938 locals->DPTEBytesPerRow[0][0][k],
4939 mode_lib->vba.BandwidthAvailableForImmediateFlip,
4940 mode_lib->vba.TotImmediateFlipBytes,
4941 mode_lib->vba.SourcePixelFormat[k],
4942 mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k],
4943 mode_lib->vba.VRatio[k],
4944 locals->Tno_bw[k],
4945 mode_lib->vba.DCCEnable[k],
4946 locals->dpte_row_height[k],
4947 locals->meta_row_height[k],
4948 locals->dpte_row_height_chroma[k],
4949 locals->meta_row_height_chroma[k],
4950 &locals->DestinationLinesToRequestVMInImmediateFlip[k],
4951 &locals->DestinationLinesToRequestRowInImmediateFlip[k],
4952 &locals->final_flip_bw[k],
4953 &locals->ImmediateFlipSupportedForPipe[k]);
4954 }
4955 mode_lib->vba.total_dcn_read_bw_with_flip = 0.0;
4956 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
4957 mode_lib->vba.total_dcn_read_bw_with_flip = mode_lib->vba.total_dcn_read_bw_with_flip + dml_max3(
4958 locals->prefetch_vmrow_bw[k],
4959 locals->final_flip_bw[k] + locals->ReadBandwidthLuma[k] * locals->UrgentBurstFactorLuma[k]
4960 + locals->ReadBandwidthChroma[k] * locals->UrgentBurstFactorChroma[k]
4961 + locals->cursor_bw[k] * locals->UrgentBurstFactorCursor[k],
4962 locals->final_flip_bw[k] + locals->RequiredPrefetchPixelDataBWLuma[i][j][k]
4963 * locals->UrgentBurstFactorLumaPre[k] + locals->RequiredPrefetchPixelDataBWChroma[i][j][k]
4964 * locals->UrgentBurstFactorChromaPre[k] + locals->cursor_bw_pre[k]
4965 * locals->UrgentBurstFactorCursorPre[k]);
4966 }
4967 locals->ImmediateFlipSupportedForState[i][j] = true;
4968 if (mode_lib->vba.total_dcn_read_bw_with_flip
4969 > locals->ReturnBWPerState[i][0]) {
4970 locals->ImmediateFlipSupportedForState[i][j] = false;
4971 }
4972 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
4973 if (locals->ImmediateFlipSupportedForPipe[k] == false) {
4974 locals->ImmediateFlipSupportedForState[i][j] = false;
4975 }
4976 }
4977 } else {
4978 locals->ImmediateFlipSupportedForState[i][j] = false;
4979 }
4980 mode_lib->vba.UrgentOutOfOrderReturnPerChannel = dml_max3(
4981 mode_lib->vba.UrgentOutOfOrderReturnPerChannelPixelDataOnly,
4982 mode_lib->vba.UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData,
4983 mode_lib->vba.UrgentOutOfOrderReturnPerChannelVMDataOnly);
4984 CalculateWatermarksAndDRAMSpeedChangeSupport(
4985 mode_lib,
4986 mode_lib->vba.PrefetchMode[i][j],
4987 mode_lib->vba.NumberOfActivePlanes,
4988 mode_lib->vba.MaxLineBufferLines,
4989 mode_lib->vba.LineBufferSize,
4990 mode_lib->vba.DPPOutputBufferPixels,
4991 mode_lib->vba.DETBufferSizeInKByte[0],
4992 mode_lib->vba.WritebackInterfaceLumaBufferSize,
4993 mode_lib->vba.WritebackInterfaceChromaBufferSize,
4994 mode_lib->vba.DCFCLKPerState[i],
4995 mode_lib->vba.UrgentOutOfOrderReturnPerChannel * mode_lib->vba.NumberOfChannels,
4996 locals->ReturnBWPerState[i][0],
4997 mode_lib->vba.GPUVMEnable,
4998 locals->dpte_group_bytes,
4999 mode_lib->vba.MetaChunkSize,
5000 mode_lib->vba.UrgentLatency,
5001 mode_lib->vba.ExtraLatency,
5002 mode_lib->vba.WritebackLatency,
5003 mode_lib->vba.WritebackChunkSize,
5004 mode_lib->vba.SOCCLKPerState[i],
5005 mode_lib->vba.DRAMClockChangeLatency,
5006 mode_lib->vba.SRExitTime,
5007 mode_lib->vba.SREnterPlusExitTime,
5008 mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0],
5009 locals->NoOfDPPThisState,
5010 mode_lib->vba.DCCEnable,
5011 locals->RequiredDPPCLKThisState,
5012 locals->SwathWidthYSingleDPP,
5013 locals->SwathHeightYThisState,
5014 locals->ReadBandwidthLuma,
5015 locals->SwathHeightCThisState,
5016 locals->ReadBandwidthChroma,
5017 mode_lib->vba.LBBitPerPixel,
5018 locals->SwathWidthYThisState,
5019 mode_lib->vba.HRatio,
5020 mode_lib->vba.vtaps,
5021 mode_lib->vba.VTAPsChroma,
5022 mode_lib->vba.VRatio,
5023 mode_lib->vba.HTotal,
5024 mode_lib->vba.PixelClock,
5025 mode_lib->vba.BlendingAndTiming,
5026 locals->BytePerPixelInDETY,
5027 locals->BytePerPixelInDETC,
5028 mode_lib->vba.WritebackEnable,
5029 mode_lib->vba.WritebackPixelFormat,
5030 mode_lib->vba.WritebackDestinationWidth,
5031 mode_lib->vba.WritebackDestinationHeight,
5032 mode_lib->vba.WritebackSourceHeight,
5033 &locals->DRAMClockChangeSupport[i][j],
5034 &mode_lib->vba.UrgentWatermark,
5035 &mode_lib->vba.WritebackUrgentWatermark,
5036 &mode_lib->vba.DRAMClockChangeWatermark,
5037 &mode_lib->vba.WritebackDRAMClockChangeWatermark,
5038 &mode_lib->vba.StutterExitWatermark,
5039 &mode_lib->vba.StutterEnterPlusExitWatermark,
5040 &mode_lib->vba.MinActiveDRAMClockChangeLatencySupported);
5041 }
5042 }
5043
5044 /*Vertical Active BW support*/
5045 {
5046 double MaxTotalVActiveRDBandwidth = 0.0;
5047 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
5048 MaxTotalVActiveRDBandwidth = MaxTotalVActiveRDBandwidth + locals->ReadBandwidth[k];
5049 }
5050 for (i = 0; i <= mode_lib->vba.soc.num_states; ++i) {
5051 locals->MaxTotalVerticalActiveAvailableBandwidth[i][0] = dml_min(
5052 locals->IdealSDPPortBandwidthPerState[i][0] *
5053 mode_lib->vba.MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation
5054 / 100.0, mode_lib->vba.DRAMSpeedPerState[i] *
5055 mode_lib->vba.NumberOfChannels *
5056 mode_lib->vba.DRAMChannelWidth *
5057 mode_lib->vba.MaxAveragePercentOfIdealDRAMBWDisplayCanUseInNormalSystemOperation
5058 / 100.0);
5059
5060 if (MaxTotalVActiveRDBandwidth <= locals->MaxTotalVerticalActiveAvailableBandwidth[i][0]) {
5061 locals->TotalVerticalActiveBandwidthSupport[i][0] = true;
5062 } else {
5063 locals->TotalVerticalActiveBandwidthSupport[i][0] = false;
5064 }
5065 }
5066 }
5067
5068 /*PTE Buffer Size Check*/
5069
5070 for (i = 0; i <= mode_lib->vba.soc.num_states; i++) {
5071 for (j = 0; j < 2; j++) {
5072 locals->PTEBufferSizeNotExceeded[i][j] = true;
5073 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
5074 if (locals->PTEBufferSizeNotExceededY[i][j][k] == false
5075 || locals->PTEBufferSizeNotExceededC[i][j][k] == false) {
5076 locals->PTEBufferSizeNotExceeded[i][j] = false;
5077 }
5078 }
5079 }
5080 }
5081 /*Cursor Support Check*/
5082
5083 mode_lib->vba.CursorSupport = true;
5084 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
5085 if (mode_lib->vba.CursorWidth[k][0] > 0.0) {
5086 for (m = 0; m < mode_lib->vba.NumberOfCursors[k]; m++) {
5087 if (mode_lib->vba.CursorBPP[k][m] == 64 && mode_lib->vba.Cursor64BppSupport == false) {
5088 mode_lib->vba.CursorSupport = false;
5089 }
5090 }
5091 }
5092 }
5093 /*Valid Pitch Check*/
5094
5095 mode_lib->vba.PitchSupport = true;
5096 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
5097 locals->AlignedYPitch[k] = dml_ceil(
5098 dml_max(mode_lib->vba.PitchY[k], mode_lib->vba.ViewportWidth[k]),
5099 locals->MacroTileWidthY[k]);
5100 if (locals->AlignedYPitch[k] > mode_lib->vba.PitchY[k]) {
5101 mode_lib->vba.PitchSupport = false;
5102 }
5103 if (mode_lib->vba.DCCEnable[k] == true) {
5104 locals->AlignedDCCMetaPitch[k] = dml_ceil(
5105 dml_max(
5106 mode_lib->vba.DCCMetaPitchY[k],
5107 mode_lib->vba.ViewportWidth[k]),
5108 64.0 * locals->Read256BlockWidthY[k]);
5109 } else {
5110 locals->AlignedDCCMetaPitch[k] = mode_lib->vba.DCCMetaPitchY[k];
5111 }
5112 if (locals->AlignedDCCMetaPitch[k] > mode_lib->vba.DCCMetaPitchY[k]) {
5113 mode_lib->vba.PitchSupport = false;
5114 }
5115 if (mode_lib->vba.SourcePixelFormat[k] != dm_444_64
5116 && mode_lib->vba.SourcePixelFormat[k] != dm_444_32
5117 && mode_lib->vba.SourcePixelFormat[k] != dm_444_16
5118 && mode_lib->vba.SourcePixelFormat[k] != dm_mono_16
5119 && mode_lib->vba.SourcePixelFormat[k] != dm_mono_8) {
5120 locals->AlignedCPitch[k] = dml_ceil(
5121 dml_max(
5122 mode_lib->vba.PitchC[k],
5123 mode_lib->vba.ViewportWidth[k] / 2.0),
5124 locals->MacroTileWidthC[k]);
5125 } else {
5126 locals->AlignedCPitch[k] = mode_lib->vba.PitchC[k];
5127 }
5128 if (locals->AlignedCPitch[k] > mode_lib->vba.PitchC[k]) {
5129 mode_lib->vba.PitchSupport = false;
5130 }
5131 }
5132 /*Mode Support, Voltage State and SOC Configuration*/
5133
5134 for (i = mode_lib->vba.soc.num_states; i >= 0; i--) {
5135 for (j = 0; j < 2; j++) {
5136 enum dm_validation_status status = DML_VALIDATION_OK;
5137
5138 if (!mode_lib->vba.ScaleRatioAndTapsSupport) {
5139 status = DML_FAIL_SCALE_RATIO_TAP;
5140 } else if (!mode_lib->vba.SourceFormatPixelAndScanSupport) {
5141 status = DML_FAIL_SOURCE_PIXEL_FORMAT;
5142 } else if (!locals->ViewportSizeSupport[i][0]) {
5143 status = DML_FAIL_VIEWPORT_SIZE;
5144 } else if (!locals->DIOSupport[i]) {
5145 status = DML_FAIL_DIO_SUPPORT;
5146 } else if (locals->NotEnoughDSCUnits[i]) {
5147 status = DML_FAIL_NOT_ENOUGH_DSC;
5148 } else if (locals->DSCCLKRequiredMoreThanSupported[i]) {
5149 status = DML_FAIL_DSC_CLK_REQUIRED;
5150 } else if (!locals->ROBSupport[i][0]) {
5151 status = DML_FAIL_REORDERING_BUFFER;
5152 } else if (!locals->DISPCLK_DPPCLK_Support[i][j]) {
5153 status = DML_FAIL_DISPCLK_DPPCLK;
5154 } else if (!locals->TotalAvailablePipesSupport[i][j]) {
5155 status = DML_FAIL_TOTAL_AVAILABLE_PIPES;
5156 } else if (!mode_lib->vba.NumberOfOTGSupport) {
5157 status = DML_FAIL_NUM_OTG;
5158 } else if (!mode_lib->vba.WritebackModeSupport) {
5159 status = DML_FAIL_WRITEBACK_MODE;
5160 } else if (!mode_lib->vba.WritebackLatencySupport) {
5161 status = DML_FAIL_WRITEBACK_LATENCY;
5162 } else if (!mode_lib->vba.WritebackScaleRatioAndTapsSupport) {
5163 status = DML_FAIL_WRITEBACK_SCALE_RATIO_TAP;
5164 } else if (!mode_lib->vba.CursorSupport) {
5165 status = DML_FAIL_CURSOR_SUPPORT;
5166 } else if (!mode_lib->vba.PitchSupport) {
5167 status = DML_FAIL_PITCH_SUPPORT;
5168 } else if (!locals->TotalVerticalActiveBandwidthSupport[i][0]) {
5169 status = DML_FAIL_TOTAL_V_ACTIVE_BW;
5170 } else if (!locals->PTEBufferSizeNotExceeded[i][j]) {
5171 status = DML_FAIL_PTE_BUFFER_SIZE;
5172 } else if (mode_lib->vba.NonsupportedDSCInputBPC) {
5173 status = DML_FAIL_DSC_INPUT_BPC;
5174 } else if ((mode_lib->vba.HostVMEnable
5175 && !locals->ImmediateFlipSupportedForState[i][j])) {
5176 status = DML_FAIL_HOST_VM_IMMEDIATE_FLIP;
5177 } else if (!locals->PrefetchSupported[i][j]) {
5178 status = DML_FAIL_PREFETCH_SUPPORT;
5179 } else if (!locals->VRatioInPrefetchSupported[i][j]) {
5180 status = DML_FAIL_V_RATIO_PREFETCH;
5181 }
5182
5183 if (status == DML_VALIDATION_OK) {
5184 locals->ModeSupport[i][j] = true;
5185 } else {
5186 locals->ModeSupport[i][j] = false;
5187 }
5188 locals->ValidationStatus[i] = status;
5189 }
5190 }
5191 {
5192 unsigned int MaximumMPCCombine = 0;
5193 mode_lib->vba.VoltageLevel = mode_lib->vba.soc.num_states + 1;
5194 for (i = mode_lib->vba.VoltageOverrideLevel; i <= mode_lib->vba.soc.num_states; i++) {
5195 if (locals->ModeSupport[i][0] == true || locals->ModeSupport[i][1] == true) {
5196 mode_lib->vba.VoltageLevel = i;
5197 if (locals->ModeSupport[i][1] == true && (locals->ModeSupport[i][0] == false
5198 || mode_lib->vba.WhenToDoMPCCombine == dm_mpc_always_when_possible
5199 || (mode_lib->vba.WhenToDoMPCCombine == dm_mpc_reduce_voltage_and_clocks
5200 && ((locals->DRAMClockChangeSupport[i][1] == dm_dram_clock_change_vactive
5201 && locals->DRAMClockChangeSupport[i][0] != dm_dram_clock_change_vactive)
5202 || (locals->DRAMClockChangeSupport[i][1] == dm_dram_clock_change_vblank
5203 && locals->DRAMClockChangeSupport[i][0] == dm_dram_clock_change_unsupported))))) {
5204 MaximumMPCCombine = 1;
5205 } else {
5206 MaximumMPCCombine = 0;
5207 }
5208 break;
5209 }
5210 }
5211 mode_lib->vba.ImmediateFlipSupport =
5212 locals->ImmediateFlipSupportedForState[mode_lib->vba.VoltageLevel][MaximumMPCCombine];
5213 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
5214 mode_lib->vba.DPPPerPlane[k] = locals->NoOfDPP[mode_lib->vba.VoltageLevel][MaximumMPCCombine][k];
5215 locals->DPPCLK[k] = locals->RequiredDPPCLK[mode_lib->vba.VoltageLevel][MaximumMPCCombine][k];
5216 }
5217 mode_lib->vba.DISPCLK = locals->RequiredDISPCLK[mode_lib->vba.VoltageLevel][MaximumMPCCombine];
5218 mode_lib->vba.maxMpcComb = MaximumMPCCombine;
5219 }
5220 mode_lib->vba.DCFCLK = mode_lib->vba.DCFCLKPerState[mode_lib->vba.VoltageLevel];
5221 mode_lib->vba.DRAMSpeed = mode_lib->vba.DRAMSpeedPerState[mode_lib->vba.VoltageLevel];
5222 mode_lib->vba.FabricClock = mode_lib->vba.FabricClockPerState[mode_lib->vba.VoltageLevel];
5223 mode_lib->vba.SOCCLK = mode_lib->vba.SOCCLKPerState[mode_lib->vba.VoltageLevel];
5224 mode_lib->vba.ReturnBW = locals->ReturnBWPerState[mode_lib->vba.VoltageLevel][0];
5225 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
5226 if (mode_lib->vba.BlendingAndTiming[k] == k) {
5227 mode_lib->vba.ODMCombineEnabled[k] =
5228 locals->ODMCombineEnablePerState[mode_lib->vba.VoltageLevel][k];
5229 } else {
5230 mode_lib->vba.ODMCombineEnabled[k] = dm_odm_combine_mode_disabled;
5231 }
5232 mode_lib->vba.DSCEnabled[k] =
5233 locals->RequiresDSC[mode_lib->vba.VoltageLevel][k];
5234 mode_lib->vba.OutputBpp[k] =
5235 locals->OutputBppPerState[mode_lib->vba.VoltageLevel][k];
5236 }
5237 }
5238
CalculateWatermarksAndDRAMSpeedChangeSupport(struct display_mode_lib * mode_lib,unsigned int PrefetchMode,unsigned int NumberOfActivePlanes,unsigned int MaxLineBufferLines,unsigned int LineBufferSize,unsigned int DPPOutputBufferPixels,unsigned int DETBufferSizeInKByte,unsigned int WritebackInterfaceLumaBufferSize,unsigned int WritebackInterfaceChromaBufferSize,double DCFCLK,double UrgentOutOfOrderReturn,double ReturnBW,bool GPUVMEnable,int dpte_group_bytes[],unsigned int MetaChunkSize,double UrgentLatency,double ExtraLatency,double WritebackLatency,double WritebackChunkSize,double SOCCLK,double DRAMClockChangeLatency,double SRExitTime,double SREnterPlusExitTime,double DCFCLKDeepSleep,int DPPPerPlane[],bool DCCEnable[],double DPPCLK[],double SwathWidthSingleDPPY[],unsigned int SwathHeightY[],double ReadBandwidthPlaneLuma[],unsigned int SwathHeightC[],double ReadBandwidthPlaneChroma[],unsigned int LBBitPerPixel[],double SwathWidthY[],double HRatio[],unsigned int vtaps[],unsigned int VTAPsChroma[],double VRatio[],unsigned int HTotal[],double PixelClock[],unsigned int BlendingAndTiming[],double BytePerPixelDETY[],double BytePerPixelDETC[],bool WritebackEnable[],enum source_format_class WritebackPixelFormat[],double WritebackDestinationWidth[],double WritebackDestinationHeight[],double WritebackSourceHeight[],enum clock_change_support * DRAMClockChangeSupport,double * UrgentWatermark,double * WritebackUrgentWatermark,double * DRAMClockChangeWatermark,double * WritebackDRAMClockChangeWatermark,double * StutterExitWatermark,double * StutterEnterPlusExitWatermark,double * MinActiveDRAMClockChangeLatencySupported)5239 static void CalculateWatermarksAndDRAMSpeedChangeSupport(
5240 struct display_mode_lib *mode_lib,
5241 unsigned int PrefetchMode,
5242 unsigned int NumberOfActivePlanes,
5243 unsigned int MaxLineBufferLines,
5244 unsigned int LineBufferSize,
5245 unsigned int DPPOutputBufferPixels,
5246 unsigned int DETBufferSizeInKByte,
5247 unsigned int WritebackInterfaceLumaBufferSize,
5248 unsigned int WritebackInterfaceChromaBufferSize,
5249 double DCFCLK,
5250 double UrgentOutOfOrderReturn,
5251 double ReturnBW,
5252 bool GPUVMEnable,
5253 int dpte_group_bytes[],
5254 unsigned int MetaChunkSize,
5255 double UrgentLatency,
5256 double ExtraLatency,
5257 double WritebackLatency,
5258 double WritebackChunkSize,
5259 double SOCCLK,
5260 double DRAMClockChangeLatency,
5261 double SRExitTime,
5262 double SREnterPlusExitTime,
5263 double DCFCLKDeepSleep,
5264 int DPPPerPlane[],
5265 bool DCCEnable[],
5266 double DPPCLK[],
5267 double SwathWidthSingleDPPY[],
5268 unsigned int SwathHeightY[],
5269 double ReadBandwidthPlaneLuma[],
5270 unsigned int SwathHeightC[],
5271 double ReadBandwidthPlaneChroma[],
5272 unsigned int LBBitPerPixel[],
5273 double SwathWidthY[],
5274 double HRatio[],
5275 unsigned int vtaps[],
5276 unsigned int VTAPsChroma[],
5277 double VRatio[],
5278 unsigned int HTotal[],
5279 double PixelClock[],
5280 unsigned int BlendingAndTiming[],
5281 double BytePerPixelDETY[],
5282 double BytePerPixelDETC[],
5283 bool WritebackEnable[],
5284 enum source_format_class WritebackPixelFormat[],
5285 double WritebackDestinationWidth[],
5286 double WritebackDestinationHeight[],
5287 double WritebackSourceHeight[],
5288 enum clock_change_support *DRAMClockChangeSupport,
5289 double *UrgentWatermark,
5290 double *WritebackUrgentWatermark,
5291 double *DRAMClockChangeWatermark,
5292 double *WritebackDRAMClockChangeWatermark,
5293 double *StutterExitWatermark,
5294 double *StutterEnterPlusExitWatermark,
5295 double *MinActiveDRAMClockChangeLatencySupported)
5296 {
5297 double EffectiveLBLatencyHidingY;
5298 double EffectiveLBLatencyHidingC;
5299 double DPPOutputBufferLinesY;
5300 double DPPOutputBufferLinesC;
5301 unsigned int DETBufferSizeY;
5302 unsigned int DETBufferSizeC;
5303 double LinesInDETY[DC__NUM_DPP__MAX];
5304 double LinesInDETC;
5305 unsigned int LinesInDETYRoundedDownToSwath[DC__NUM_DPP__MAX];
5306 unsigned int LinesInDETCRoundedDownToSwath;
5307 double FullDETBufferingTimeY[DC__NUM_DPP__MAX];
5308 double FullDETBufferingTimeC;
5309 double ActiveDRAMClockChangeLatencyMarginY;
5310 double ActiveDRAMClockChangeLatencyMarginC;
5311 double WritebackDRAMClockChangeLatencyMargin;
5312 double PlaneWithMinActiveDRAMClockChangeMargin;
5313 double SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank;
5314 double FullDETBufferingTimeYStutterCriticalPlane = 0;
5315 double TimeToFinishSwathTransferStutterCriticalPlane = 0;
5316 unsigned int k, j;
5317
5318 mode_lib->vba.TotalActiveDPP = 0;
5319 mode_lib->vba.TotalDCCActiveDPP = 0;
5320 for (k = 0; k < NumberOfActivePlanes; ++k) {
5321 mode_lib->vba.TotalActiveDPP = mode_lib->vba.TotalActiveDPP + DPPPerPlane[k];
5322 if (DCCEnable[k] == true) {
5323 mode_lib->vba.TotalDCCActiveDPP = mode_lib->vba.TotalDCCActiveDPP + DPPPerPlane[k];
5324 }
5325 }
5326
5327 mode_lib->vba.TotalDataReadBandwidth = 0;
5328 for (k = 0; k < NumberOfActivePlanes; ++k) {
5329 mode_lib->vba.TotalDataReadBandwidth = mode_lib->vba.TotalDataReadBandwidth
5330 + ReadBandwidthPlaneLuma[k] + ReadBandwidthPlaneChroma[k];
5331 }
5332
5333 *UrgentWatermark = UrgentLatency + ExtraLatency;
5334
5335 *DRAMClockChangeWatermark = DRAMClockChangeLatency + *UrgentWatermark;
5336
5337 mode_lib->vba.TotalActiveWriteback = 0;
5338 for (k = 0; k < NumberOfActivePlanes; ++k) {
5339 if (WritebackEnable[k] == true) {
5340 mode_lib->vba.TotalActiveWriteback = mode_lib->vba.TotalActiveWriteback + 1;
5341 }
5342 }
5343
5344 if (mode_lib->vba.TotalActiveWriteback <= 1) {
5345 *WritebackUrgentWatermark = WritebackLatency;
5346 } else {
5347 *WritebackUrgentWatermark = WritebackLatency
5348 + WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
5349 }
5350
5351 if (mode_lib->vba.TotalActiveWriteback <= 1) {
5352 *WritebackDRAMClockChangeWatermark = DRAMClockChangeLatency + WritebackLatency;
5353 } else {
5354 *WritebackDRAMClockChangeWatermark = DRAMClockChangeLatency + WritebackLatency
5355 + WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
5356 }
5357
5358 for (k = 0; k < NumberOfActivePlanes; ++k) {
5359
5360 mode_lib->vba.LBLatencyHidingSourceLinesY = dml_min((double) MaxLineBufferLines,
5361 dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(HRatio[k], 1.0)), 1))
5362 - (vtaps[k] - 1);
5363
5364 mode_lib->vba.LBLatencyHidingSourceLinesC = dml_min((double) MaxLineBufferLines,
5365 dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthY[k] / 2 / dml_max(HRatio[k] / 2, 1.0)), 1))
5366 - (VTAPsChroma[k] - 1);
5367
5368 EffectiveLBLatencyHidingY = mode_lib->vba.LBLatencyHidingSourceLinesY / VRatio[k]
5369 * (HTotal[k] / PixelClock[k]);
5370
5371 EffectiveLBLatencyHidingC = mode_lib->vba.LBLatencyHidingSourceLinesC
5372 / (VRatio[k] / 2) * (HTotal[k] / PixelClock[k]);
5373
5374 if (SwathWidthY[k] > 2 * DPPOutputBufferPixels) {
5375 DPPOutputBufferLinesY = (double) DPPOutputBufferPixels / SwathWidthY[k];
5376 } else if (SwathWidthY[k] > DPPOutputBufferPixels) {
5377 DPPOutputBufferLinesY = 0.5;
5378 } else {
5379 DPPOutputBufferLinesY = 1;
5380 }
5381
5382 if (SwathWidthY[k] / 2.0 > 2 * DPPOutputBufferPixels) {
5383 DPPOutputBufferLinesC = (double) DPPOutputBufferPixels
5384 / (SwathWidthY[k] / 2.0);
5385 } else if (SwathWidthY[k] / 2.0 > DPPOutputBufferPixels) {
5386 DPPOutputBufferLinesC = 0.5;
5387 } else {
5388 DPPOutputBufferLinesC = 1;
5389 }
5390
5391 CalculateDETBufferSize(
5392 DETBufferSizeInKByte,
5393 SwathHeightY[k],
5394 SwathHeightC[k],
5395 &DETBufferSizeY,
5396 &DETBufferSizeC);
5397
5398 LinesInDETY[k] = (double)DETBufferSizeY / BytePerPixelDETY[k] / SwathWidthY[k];
5399 LinesInDETYRoundedDownToSwath[k] = dml_floor(LinesInDETY[k], SwathHeightY[k]);
5400 FullDETBufferingTimeY[k] = LinesInDETYRoundedDownToSwath[k]
5401 * (HTotal[k] / PixelClock[k]) / VRatio[k];
5402 if (BytePerPixelDETC[k] > 0) {
5403 LinesInDETC = (double)DETBufferSizeC / BytePerPixelDETC[k] / (SwathWidthY[k] / 2.0);
5404 LinesInDETCRoundedDownToSwath = dml_floor(LinesInDETC, SwathHeightC[k]);
5405 FullDETBufferingTimeC = LinesInDETCRoundedDownToSwath
5406 * (HTotal[k] / PixelClock[k]) / (VRatio[k] / 2);
5407 } else {
5408 LinesInDETC = 0;
5409 FullDETBufferingTimeC = 999999;
5410 }
5411
5412 ActiveDRAMClockChangeLatencyMarginY = HTotal[k] / PixelClock[k]
5413 * DPPOutputBufferLinesY + EffectiveLBLatencyHidingY
5414 + FullDETBufferingTimeY[k] - *DRAMClockChangeWatermark;
5415
5416 if (NumberOfActivePlanes > 1) {
5417 ActiveDRAMClockChangeLatencyMarginY = ActiveDRAMClockChangeLatencyMarginY
5418 - (1 - 1.0 / NumberOfActivePlanes) * SwathHeightY[k] * HTotal[k] / PixelClock[k] / VRatio[k];
5419 }
5420
5421 if (BytePerPixelDETC[k] > 0) {
5422 ActiveDRAMClockChangeLatencyMarginC = HTotal[k] / PixelClock[k]
5423 * DPPOutputBufferLinesC + EffectiveLBLatencyHidingC
5424 + FullDETBufferingTimeC - *DRAMClockChangeWatermark;
5425 if (NumberOfActivePlanes > 1) {
5426 ActiveDRAMClockChangeLatencyMarginC = ActiveDRAMClockChangeLatencyMarginC
5427 - (1 - 1.0 / NumberOfActivePlanes) * SwathHeightC[k] * HTotal[k] / PixelClock[k] / (VRatio[k] / 2);
5428 }
5429 mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k] = dml_min(
5430 ActiveDRAMClockChangeLatencyMarginY,
5431 ActiveDRAMClockChangeLatencyMarginC);
5432 } else {
5433 mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k] = ActiveDRAMClockChangeLatencyMarginY;
5434 }
5435
5436 if (WritebackEnable[k] == true) {
5437 if (WritebackPixelFormat[k] == dm_444_32) {
5438 WritebackDRAMClockChangeLatencyMargin = (WritebackInterfaceLumaBufferSize
5439 + WritebackInterfaceChromaBufferSize) / (WritebackDestinationWidth[k]
5440 * WritebackDestinationHeight[k] / (WritebackSourceHeight[k] * HTotal[k]
5441 / PixelClock[k]) * 4) - *WritebackDRAMClockChangeWatermark;
5442 } else {
5443 WritebackDRAMClockChangeLatencyMargin = dml_min(
5444 WritebackInterfaceLumaBufferSize * 8.0 / 10,
5445 2 * WritebackInterfaceChromaBufferSize * 8.0 / 10) / (WritebackDestinationWidth[k]
5446 * WritebackDestinationHeight[k] / (WritebackSourceHeight[k] * HTotal[k] / PixelClock[k]))
5447 - *WritebackDRAMClockChangeWatermark;
5448 }
5449 mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k] = dml_min(
5450 mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k],
5451 WritebackDRAMClockChangeLatencyMargin);
5452 }
5453 }
5454
5455 mode_lib->vba.MinActiveDRAMClockChangeMargin = 999999;
5456 PlaneWithMinActiveDRAMClockChangeMargin = 0;
5457 for (k = 0; k < NumberOfActivePlanes; ++k) {
5458 if (mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k]
5459 < mode_lib->vba.MinActiveDRAMClockChangeMargin) {
5460 mode_lib->vba.MinActiveDRAMClockChangeMargin =
5461 mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k];
5462 if (BlendingAndTiming[k] == k) {
5463 PlaneWithMinActiveDRAMClockChangeMargin = k;
5464 } else {
5465 for (j = 0; j < NumberOfActivePlanes; ++j) {
5466 if (BlendingAndTiming[k] == j) {
5467 PlaneWithMinActiveDRAMClockChangeMargin = j;
5468 }
5469 }
5470 }
5471 }
5472 }
5473
5474 *MinActiveDRAMClockChangeLatencySupported = mode_lib->vba.MinActiveDRAMClockChangeMargin + DRAMClockChangeLatency;
5475
5476 SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = 999999;
5477 for (k = 0; k < NumberOfActivePlanes; ++k) {
5478 if (!((k == PlaneWithMinActiveDRAMClockChangeMargin) && (BlendingAndTiming[k] == k))
5479 && !(BlendingAndTiming[k] == PlaneWithMinActiveDRAMClockChangeMargin)
5480 && mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k]
5481 < SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank) {
5482 SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank =
5483 mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k];
5484 }
5485 }
5486
5487 mode_lib->vba.TotalNumberOfActiveOTG = 0;
5488 for (k = 0; k < NumberOfActivePlanes; ++k) {
5489 if (BlendingAndTiming[k] == k) {
5490 mode_lib->vba.TotalNumberOfActiveOTG = mode_lib->vba.TotalNumberOfActiveOTG + 1;
5491 }
5492 }
5493
5494 if (mode_lib->vba.MinActiveDRAMClockChangeMargin > 0 && PrefetchMode == 0) {
5495 *DRAMClockChangeSupport = dm_dram_clock_change_vactive;
5496 } else if (((mode_lib->vba.SynchronizedVBlank == true
5497 || mode_lib->vba.TotalNumberOfActiveOTG == 1
5498 || SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank > 0)
5499 && PrefetchMode == 0)) {
5500 *DRAMClockChangeSupport = dm_dram_clock_change_vblank;
5501 } else {
5502 *DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
5503 }
5504
5505 FullDETBufferingTimeYStutterCriticalPlane = FullDETBufferingTimeY[0];
5506 for (k = 0; k < NumberOfActivePlanes; ++k) {
5507 if (FullDETBufferingTimeY[k] <= FullDETBufferingTimeYStutterCriticalPlane) {
5508 TimeToFinishSwathTransferStutterCriticalPlane = (SwathHeightY[k]
5509 - (LinesInDETY[k] - LinesInDETYRoundedDownToSwath[k]))
5510 * (HTotal[k] / PixelClock[k]) / VRatio[k];
5511 }
5512 }
5513
5514 *StutterExitWatermark = SRExitTime + mode_lib->vba.LastPixelOfLineExtraWatermark
5515 + ExtraLatency + 10 / DCFCLKDeepSleep;
5516 *StutterEnterPlusExitWatermark = dml_max(
5517 SREnterPlusExitTime + mode_lib->vba.LastPixelOfLineExtraWatermark
5518 + ExtraLatency + 10 / DCFCLKDeepSleep,
5519 TimeToFinishSwathTransferStutterCriticalPlane);
5520
5521 }
5522
CalculateDCFCLKDeepSleep(struct display_mode_lib * mode_lib,unsigned int NumberOfActivePlanes,double BytePerPixelDETY[],double BytePerPixelDETC[],double VRatio[],double SwathWidthY[],int DPPPerPlane[],double HRatio[],double PixelClock[],double PSCL_THROUGHPUT[],double PSCL_THROUGHPUT_CHROMA[],double DPPCLK[],double * DCFCLKDeepSleep)5523 static void CalculateDCFCLKDeepSleep(
5524 struct display_mode_lib *mode_lib,
5525 unsigned int NumberOfActivePlanes,
5526 double BytePerPixelDETY[],
5527 double BytePerPixelDETC[],
5528 double VRatio[],
5529 double SwathWidthY[],
5530 int DPPPerPlane[],
5531 double HRatio[],
5532 double PixelClock[],
5533 double PSCL_THROUGHPUT[],
5534 double PSCL_THROUGHPUT_CHROMA[],
5535 double DPPCLK[],
5536 double *DCFCLKDeepSleep)
5537 {
5538 unsigned int k;
5539 double DisplayPipeLineDeliveryTimeLuma;
5540 double DisplayPipeLineDeliveryTimeChroma;
5541 //double DCFCLKDeepSleepPerPlane[DC__NUM_DPP__MAX];
5542
5543 for (k = 0; k < NumberOfActivePlanes; ++k) {
5544 if (VRatio[k] <= 1) {
5545 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] * DPPPerPlane[k]
5546 / HRatio[k] / PixelClock[k];
5547 } else {
5548 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] / PSCL_THROUGHPUT[k]
5549 / DPPCLK[k];
5550 }
5551 if (BytePerPixelDETC[k] == 0) {
5552 DisplayPipeLineDeliveryTimeChroma = 0;
5553 } else {
5554 if (VRatio[k] / 2 <= 1) {
5555 DisplayPipeLineDeliveryTimeChroma = SwathWidthY[k] / 2.0
5556 * DPPPerPlane[k] / (HRatio[k] / 2) / PixelClock[k];
5557 } else {
5558 DisplayPipeLineDeliveryTimeChroma = SwathWidthY[k] / 2.0
5559 / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k];
5560 }
5561 }
5562
5563 if (BytePerPixelDETC[k] > 0) {
5564 mode_lib->vba.DCFCLKDeepSleepPerPlane[k] = dml_max(
5565 1.1 * SwathWidthY[k] * dml_ceil(BytePerPixelDETY[k], 1)
5566 / 32.0 / DisplayPipeLineDeliveryTimeLuma,
5567 1.1 * SwathWidthY[k] / 2.0
5568 * dml_ceil(BytePerPixelDETC[k], 2) / 32.0
5569 / DisplayPipeLineDeliveryTimeChroma);
5570 } else {
5571 mode_lib->vba.DCFCLKDeepSleepPerPlane[k] = 1.1 * SwathWidthY[k]
5572 * dml_ceil(BytePerPixelDETY[k], 1) / 64.0
5573 / DisplayPipeLineDeliveryTimeLuma;
5574 }
5575 mode_lib->vba.DCFCLKDeepSleepPerPlane[k] = dml_max(
5576 mode_lib->vba.DCFCLKDeepSleepPerPlane[k],
5577 PixelClock[k] / 16);
5578
5579 }
5580
5581 *DCFCLKDeepSleep = 8;
5582 for (k = 0; k < NumberOfActivePlanes; ++k) {
5583 *DCFCLKDeepSleep = dml_max(
5584 *DCFCLKDeepSleep,
5585 mode_lib->vba.DCFCLKDeepSleepPerPlane[k]);
5586 }
5587 }
5588
CalculateDETBufferSize(unsigned int DETBufferSizeInKByte,unsigned int SwathHeightY,unsigned int SwathHeightC,unsigned int * DETBufferSizeY,unsigned int * DETBufferSizeC)5589 static void CalculateDETBufferSize(
5590 unsigned int DETBufferSizeInKByte,
5591 unsigned int SwathHeightY,
5592 unsigned int SwathHeightC,
5593 unsigned int *DETBufferSizeY,
5594 unsigned int *DETBufferSizeC)
5595 {
5596 if (SwathHeightC == 0) {
5597 *DETBufferSizeY = DETBufferSizeInKByte * 1024;
5598 *DETBufferSizeC = 0;
5599 } else if (SwathHeightY <= SwathHeightC) {
5600 *DETBufferSizeY = DETBufferSizeInKByte * 1024 / 2;
5601 *DETBufferSizeC = DETBufferSizeInKByte * 1024 / 2;
5602 } else {
5603 *DETBufferSizeY = DETBufferSizeInKByte * 1024 * 2 / 3;
5604 *DETBufferSizeC = DETBufferSizeInKByte * 1024 / 3;
5605 }
5606 }
5607
CalculateUrgentBurstFactor(unsigned int DETBufferSizeInKByte,unsigned int SwathHeightY,unsigned int SwathHeightC,unsigned int SwathWidthY,double LineTime,double UrgentLatency,double CursorBufferSize,unsigned int CursorWidth,unsigned int CursorBPP,double VRatio,double VRatioPreY,double VRatioPreC,double BytePerPixelInDETY,double BytePerPixelInDETC,double * UrgentBurstFactorCursor,double * UrgentBurstFactorCursorPre,double * UrgentBurstFactorLuma,double * UrgentBurstFactorLumaPre,double * UrgentBurstFactorChroma,double * UrgentBurstFactorChromaPre,unsigned int * NotEnoughUrgentLatencyHiding,unsigned int * NotEnoughUrgentLatencyHidingPre)5608 static void CalculateUrgentBurstFactor(
5609 unsigned int DETBufferSizeInKByte,
5610 unsigned int SwathHeightY,
5611 unsigned int SwathHeightC,
5612 unsigned int SwathWidthY,
5613 double LineTime,
5614 double UrgentLatency,
5615 double CursorBufferSize,
5616 unsigned int CursorWidth,
5617 unsigned int CursorBPP,
5618 double VRatio,
5619 double VRatioPreY,
5620 double VRatioPreC,
5621 double BytePerPixelInDETY,
5622 double BytePerPixelInDETC,
5623 double *UrgentBurstFactorCursor,
5624 double *UrgentBurstFactorCursorPre,
5625 double *UrgentBurstFactorLuma,
5626 double *UrgentBurstFactorLumaPre,
5627 double *UrgentBurstFactorChroma,
5628 double *UrgentBurstFactorChromaPre,
5629 unsigned int *NotEnoughUrgentLatencyHiding,
5630 unsigned int *NotEnoughUrgentLatencyHidingPre)
5631 {
5632 double LinesInDETLuma;
5633 double LinesInDETChroma;
5634 unsigned int LinesInCursorBuffer;
5635 double CursorBufferSizeInTime;
5636 double CursorBufferSizeInTimePre;
5637 double DETBufferSizeInTimeLuma;
5638 double DETBufferSizeInTimeLumaPre;
5639 double DETBufferSizeInTimeChroma;
5640 double DETBufferSizeInTimeChromaPre;
5641 unsigned int DETBufferSizeY;
5642 unsigned int DETBufferSizeC;
5643
5644 *NotEnoughUrgentLatencyHiding = 0;
5645 *NotEnoughUrgentLatencyHidingPre = 0;
5646
5647 if (CursorWidth > 0) {
5648 LinesInCursorBuffer = 1 << (unsigned int) dml_floor(
5649 dml_log2(CursorBufferSize * 1024.0 / (CursorWidth * CursorBPP / 8.0)), 1.0);
5650 CursorBufferSizeInTime = LinesInCursorBuffer * LineTime / VRatio;
5651 if (CursorBufferSizeInTime - UrgentLatency <= 0) {
5652 *NotEnoughUrgentLatencyHiding = 1;
5653 *UrgentBurstFactorCursor = 0;
5654 } else {
5655 *UrgentBurstFactorCursor = CursorBufferSizeInTime
5656 / (CursorBufferSizeInTime - UrgentLatency);
5657 }
5658 if (VRatioPreY > 0) {
5659 CursorBufferSizeInTimePre = LinesInCursorBuffer * LineTime / VRatioPreY;
5660 if (CursorBufferSizeInTimePre - UrgentLatency <= 0) {
5661 *NotEnoughUrgentLatencyHidingPre = 1;
5662 *UrgentBurstFactorCursorPre = 0;
5663 } else {
5664 *UrgentBurstFactorCursorPre = CursorBufferSizeInTimePre
5665 / (CursorBufferSizeInTimePre - UrgentLatency);
5666 }
5667 } else {
5668 *UrgentBurstFactorCursorPre = 1;
5669 }
5670 }
5671
5672 CalculateDETBufferSize(
5673 DETBufferSizeInKByte,
5674 SwathHeightY,
5675 SwathHeightC,
5676 &DETBufferSizeY,
5677 &DETBufferSizeC);
5678
5679 LinesInDETLuma = (double)DETBufferSizeY / BytePerPixelInDETY / SwathWidthY;
5680 DETBufferSizeInTimeLuma = dml_floor(LinesInDETLuma, SwathHeightY) * LineTime / VRatio;
5681 if (DETBufferSizeInTimeLuma - UrgentLatency <= 0) {
5682 *NotEnoughUrgentLatencyHiding = 1;
5683 *UrgentBurstFactorLuma = 0;
5684 } else {
5685 *UrgentBurstFactorLuma = DETBufferSizeInTimeLuma
5686 / (DETBufferSizeInTimeLuma - UrgentLatency);
5687 }
5688 if (VRatioPreY > 0) {
5689 DETBufferSizeInTimeLumaPre = dml_floor(LinesInDETLuma, SwathHeightY) * LineTime
5690 / VRatioPreY;
5691 if (DETBufferSizeInTimeLumaPre - UrgentLatency <= 0) {
5692 *NotEnoughUrgentLatencyHidingPre = 1;
5693 *UrgentBurstFactorLumaPre = 0;
5694 } else {
5695 *UrgentBurstFactorLumaPre = DETBufferSizeInTimeLumaPre
5696 / (DETBufferSizeInTimeLumaPre - UrgentLatency);
5697 }
5698 } else {
5699 *UrgentBurstFactorLumaPre = 1;
5700 }
5701
5702 if (BytePerPixelInDETC > 0) {
5703 LinesInDETChroma = (double)DETBufferSizeC / BytePerPixelInDETC / (SwathWidthY / 2);
5704 DETBufferSizeInTimeChroma = dml_floor(LinesInDETChroma, SwathHeightC) * LineTime
5705 / (VRatio / 2);
5706 if (DETBufferSizeInTimeChroma - UrgentLatency <= 0) {
5707 *NotEnoughUrgentLatencyHiding = 1;
5708 *UrgentBurstFactorChroma = 0;
5709 } else {
5710 *UrgentBurstFactorChroma = DETBufferSizeInTimeChroma
5711 / (DETBufferSizeInTimeChroma - UrgentLatency);
5712 }
5713 if (VRatioPreC > 0) {
5714 DETBufferSizeInTimeChromaPre = dml_floor(LinesInDETChroma, SwathHeightC)
5715 * LineTime / VRatioPreC;
5716 if (DETBufferSizeInTimeChromaPre - UrgentLatency <= 0) {
5717 *NotEnoughUrgentLatencyHidingPre = 1;
5718 *UrgentBurstFactorChromaPre = 0;
5719 } else {
5720 *UrgentBurstFactorChromaPre = DETBufferSizeInTimeChromaPre
5721 / (DETBufferSizeInTimeChromaPre - UrgentLatency);
5722 }
5723 } else {
5724 *UrgentBurstFactorChromaPre = 1;
5725 }
5726 }
5727 }
5728
CalculatePixelDeliveryTimes(unsigned int NumberOfActivePlanes,double VRatio[],double VRatioPrefetchY[],double VRatioPrefetchC[],unsigned int swath_width_luma_ub[],unsigned int swath_width_chroma_ub[],int DPPPerPlane[],double HRatio[],double PixelClock[],double PSCL_THROUGHPUT[],double PSCL_THROUGHPUT_CHROMA[],double DPPCLK[],double BytePerPixelDETC[],enum scan_direction_class SourceScan[],unsigned int BlockWidth256BytesY[],unsigned int BlockHeight256BytesY[],unsigned int BlockWidth256BytesC[],unsigned int BlockHeight256BytesC[],double DisplayPipeLineDeliveryTimeLuma[],double DisplayPipeLineDeliveryTimeChroma[],double DisplayPipeLineDeliveryTimeLumaPrefetch[],double DisplayPipeLineDeliveryTimeChromaPrefetch[],double DisplayPipeRequestDeliveryTimeLuma[],double DisplayPipeRequestDeliveryTimeChroma[],double DisplayPipeRequestDeliveryTimeLumaPrefetch[],double DisplayPipeRequestDeliveryTimeChromaPrefetch[])5729 static void CalculatePixelDeliveryTimes(
5730 unsigned int NumberOfActivePlanes,
5731 double VRatio[],
5732 double VRatioPrefetchY[],
5733 double VRatioPrefetchC[],
5734 unsigned int swath_width_luma_ub[],
5735 unsigned int swath_width_chroma_ub[],
5736 int DPPPerPlane[],
5737 double HRatio[],
5738 double PixelClock[],
5739 double PSCL_THROUGHPUT[],
5740 double PSCL_THROUGHPUT_CHROMA[],
5741 double DPPCLK[],
5742 double BytePerPixelDETC[],
5743 enum scan_direction_class SourceScan[],
5744 unsigned int BlockWidth256BytesY[],
5745 unsigned int BlockHeight256BytesY[],
5746 unsigned int BlockWidth256BytesC[],
5747 unsigned int BlockHeight256BytesC[],
5748 double DisplayPipeLineDeliveryTimeLuma[],
5749 double DisplayPipeLineDeliveryTimeChroma[],
5750 double DisplayPipeLineDeliveryTimeLumaPrefetch[],
5751 double DisplayPipeLineDeliveryTimeChromaPrefetch[],
5752 double DisplayPipeRequestDeliveryTimeLuma[],
5753 double DisplayPipeRequestDeliveryTimeChroma[],
5754 double DisplayPipeRequestDeliveryTimeLumaPrefetch[],
5755 double DisplayPipeRequestDeliveryTimeChromaPrefetch[])
5756 {
5757 double req_per_swath_ub;
5758 unsigned int k;
5759
5760 for (k = 0; k < NumberOfActivePlanes; ++k) {
5761 if (VRatio[k] <= 1) {
5762 DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] * DPPPerPlane[k]
5763 / HRatio[k] / PixelClock[k];
5764 } else {
5765 DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k]
5766 / PSCL_THROUGHPUT[k] / DPPCLK[k];
5767 }
5768
5769 if (BytePerPixelDETC[k] == 0) {
5770 DisplayPipeLineDeliveryTimeChroma[k] = 0;
5771 } else {
5772 if (VRatio[k] / 2 <= 1) {
5773 DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k]
5774 * DPPPerPlane[k] / (HRatio[k] / 2) / PixelClock[k];
5775 } else {
5776 DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k]
5777 / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k];
5778 }
5779 }
5780
5781 if (VRatioPrefetchY[k] <= 1) {
5782 DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k]
5783 * DPPPerPlane[k] / HRatio[k] / PixelClock[k];
5784 } else {
5785 DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k]
5786 / PSCL_THROUGHPUT[k] / DPPCLK[k];
5787 }
5788
5789 if (BytePerPixelDETC[k] == 0) {
5790 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 0;
5791 } else {
5792 if (VRatioPrefetchC[k] <= 1) {
5793 DisplayPipeLineDeliveryTimeChromaPrefetch[k] =
5794 swath_width_chroma_ub[k] * DPPPerPlane[k]
5795 / (HRatio[k] / 2) / PixelClock[k];
5796 } else {
5797 DisplayPipeLineDeliveryTimeChromaPrefetch[k] =
5798 swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k];
5799 }
5800 }
5801 }
5802
5803 for (k = 0; k < NumberOfActivePlanes; ++k) {
5804 if (SourceScan[k] == dm_horz) {
5805 req_per_swath_ub = swath_width_luma_ub[k] / BlockWidth256BytesY[k];
5806 } else {
5807 req_per_swath_ub = swath_width_luma_ub[k] / BlockHeight256BytesY[k];
5808 }
5809 DisplayPipeRequestDeliveryTimeLuma[k] = DisplayPipeLineDeliveryTimeLuma[k]
5810 / req_per_swath_ub;
5811 DisplayPipeRequestDeliveryTimeLumaPrefetch[k] =
5812 DisplayPipeLineDeliveryTimeLumaPrefetch[k] / req_per_swath_ub;
5813 if (BytePerPixelDETC[k] == 0) {
5814 DisplayPipeRequestDeliveryTimeChroma[k] = 0;
5815 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = 0;
5816 } else {
5817 if (SourceScan[k] == dm_horz) {
5818 req_per_swath_ub = swath_width_chroma_ub[k]
5819 / BlockWidth256BytesC[k];
5820 } else {
5821 req_per_swath_ub = swath_width_chroma_ub[k]
5822 / BlockHeight256BytesC[k];
5823 }
5824 DisplayPipeRequestDeliveryTimeChroma[k] =
5825 DisplayPipeLineDeliveryTimeChroma[k] / req_per_swath_ub;
5826 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] =
5827 DisplayPipeLineDeliveryTimeChromaPrefetch[k] / req_per_swath_ub;
5828 }
5829 }
5830 }
5831
CalculateMetaAndPTETimes(unsigned int NumberOfActivePlanes,bool GPUVMEnable,unsigned int MetaChunkSize,unsigned int MinMetaChunkSizeBytes,unsigned int GPUVMMaxPageTableLevels,unsigned int HTotal[],double VRatio[],double VRatioPrefetchY[],double VRatioPrefetchC[],double DestinationLinesToRequestRowInVBlank[],double DestinationLinesToRequestRowInImmediateFlip[],double DestinationLinesToRequestVMInVBlank[],double DestinationLinesToRequestVMInImmediateFlip[],bool DCCEnable[],double PixelClock[],double BytePerPixelDETY[],double BytePerPixelDETC[],enum scan_direction_class SourceScan[],unsigned int dpte_row_height[],unsigned int dpte_row_height_chroma[],unsigned int meta_row_width[],unsigned int meta_row_height[],unsigned int meta_req_width[],unsigned int meta_req_height[],int dpte_group_bytes[],unsigned int PTERequestSizeY[],unsigned int PTERequestSizeC[],unsigned int PixelPTEReqWidthY[],unsigned int PixelPTEReqHeightY[],unsigned int PixelPTEReqWidthC[],unsigned int PixelPTEReqHeightC[],unsigned int dpte_row_width_luma_ub[],unsigned int dpte_row_width_chroma_ub[],unsigned int vm_group_bytes[],unsigned int dpde0_bytes_per_frame_ub_l[],unsigned int dpde0_bytes_per_frame_ub_c[],unsigned int meta_pte_bytes_per_frame_ub_l[],unsigned int meta_pte_bytes_per_frame_ub_c[],double DST_Y_PER_PTE_ROW_NOM_L[],double DST_Y_PER_PTE_ROW_NOM_C[],double DST_Y_PER_META_ROW_NOM_L[],double TimePerMetaChunkNominal[],double TimePerMetaChunkVBlank[],double TimePerMetaChunkFlip[],double time_per_pte_group_nom_luma[],double time_per_pte_group_vblank_luma[],double time_per_pte_group_flip_luma[],double time_per_pte_group_nom_chroma[],double time_per_pte_group_vblank_chroma[],double time_per_pte_group_flip_chroma[],double TimePerVMGroupVBlank[],double TimePerVMGroupFlip[],double TimePerVMRequestVBlank[],double TimePerVMRequestFlip[])5832 static void CalculateMetaAndPTETimes(
5833 unsigned int NumberOfActivePlanes,
5834 bool GPUVMEnable,
5835 unsigned int MetaChunkSize,
5836 unsigned int MinMetaChunkSizeBytes,
5837 unsigned int GPUVMMaxPageTableLevels,
5838 unsigned int HTotal[],
5839 double VRatio[],
5840 double VRatioPrefetchY[],
5841 double VRatioPrefetchC[],
5842 double DestinationLinesToRequestRowInVBlank[],
5843 double DestinationLinesToRequestRowInImmediateFlip[],
5844 double DestinationLinesToRequestVMInVBlank[],
5845 double DestinationLinesToRequestVMInImmediateFlip[],
5846 bool DCCEnable[],
5847 double PixelClock[],
5848 double BytePerPixelDETY[],
5849 double BytePerPixelDETC[],
5850 enum scan_direction_class SourceScan[],
5851 unsigned int dpte_row_height[],
5852 unsigned int dpte_row_height_chroma[],
5853 unsigned int meta_row_width[],
5854 unsigned int meta_row_height[],
5855 unsigned int meta_req_width[],
5856 unsigned int meta_req_height[],
5857 int dpte_group_bytes[],
5858 unsigned int PTERequestSizeY[],
5859 unsigned int PTERequestSizeC[],
5860 unsigned int PixelPTEReqWidthY[],
5861 unsigned int PixelPTEReqHeightY[],
5862 unsigned int PixelPTEReqWidthC[],
5863 unsigned int PixelPTEReqHeightC[],
5864 unsigned int dpte_row_width_luma_ub[],
5865 unsigned int dpte_row_width_chroma_ub[],
5866 unsigned int vm_group_bytes[],
5867 unsigned int dpde0_bytes_per_frame_ub_l[],
5868 unsigned int dpde0_bytes_per_frame_ub_c[],
5869 unsigned int meta_pte_bytes_per_frame_ub_l[],
5870 unsigned int meta_pte_bytes_per_frame_ub_c[],
5871 double DST_Y_PER_PTE_ROW_NOM_L[],
5872 double DST_Y_PER_PTE_ROW_NOM_C[],
5873 double DST_Y_PER_META_ROW_NOM_L[],
5874 double TimePerMetaChunkNominal[],
5875 double TimePerMetaChunkVBlank[],
5876 double TimePerMetaChunkFlip[],
5877 double time_per_pte_group_nom_luma[],
5878 double time_per_pte_group_vblank_luma[],
5879 double time_per_pte_group_flip_luma[],
5880 double time_per_pte_group_nom_chroma[],
5881 double time_per_pte_group_vblank_chroma[],
5882 double time_per_pte_group_flip_chroma[],
5883 double TimePerVMGroupVBlank[],
5884 double TimePerVMGroupFlip[],
5885 double TimePerVMRequestVBlank[],
5886 double TimePerVMRequestFlip[])
5887 {
5888 unsigned int meta_chunk_width;
5889 unsigned int min_meta_chunk_width;
5890 unsigned int meta_chunk_per_row_int;
5891 unsigned int meta_row_remainder;
5892 unsigned int meta_chunk_threshold;
5893 unsigned int meta_chunks_per_row_ub;
5894 unsigned int dpte_group_width_luma;
5895 unsigned int dpte_group_width_chroma;
5896 unsigned int dpte_groups_per_row_luma_ub;
5897 unsigned int dpte_groups_per_row_chroma_ub;
5898 unsigned int num_group_per_lower_vm_stage;
5899 unsigned int num_req_per_lower_vm_stage;
5900 unsigned int k;
5901
5902 for (k = 0; k < NumberOfActivePlanes; ++k) {
5903 if (GPUVMEnable == true) {
5904 DST_Y_PER_PTE_ROW_NOM_L[k] = dpte_row_height[k] / VRatio[k];
5905 if (BytePerPixelDETC[k] == 0) {
5906 DST_Y_PER_PTE_ROW_NOM_C[k] = 0;
5907 } else {
5908 DST_Y_PER_PTE_ROW_NOM_C[k] = dpte_row_height_chroma[k] / (VRatio[k] / 2);
5909 }
5910 } else {
5911 DST_Y_PER_PTE_ROW_NOM_L[k] = 0;
5912 DST_Y_PER_PTE_ROW_NOM_C[k] = 0;
5913 }
5914 if (DCCEnable[k] == true) {
5915 DST_Y_PER_META_ROW_NOM_L[k] = meta_row_height[k] / VRatio[k];
5916 } else {
5917 DST_Y_PER_META_ROW_NOM_L[k] = 0;
5918 }
5919 }
5920
5921 for (k = 0; k < NumberOfActivePlanes; ++k) {
5922 if (DCCEnable[k] == true) {
5923 meta_chunk_width = MetaChunkSize * 1024 * 256
5924 / dml_ceil(BytePerPixelDETY[k], 1) / meta_row_height[k];
5925 min_meta_chunk_width = MinMetaChunkSizeBytes * 256
5926 / dml_ceil(BytePerPixelDETY[k], 1) / meta_row_height[k];
5927 meta_chunk_per_row_int = meta_row_width[k] / meta_chunk_width;
5928 meta_row_remainder = meta_row_width[k] % meta_chunk_width;
5929 if (SourceScan[k] == dm_horz) {
5930 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_width[k];
5931 } else {
5932 meta_chunk_threshold = 2 * min_meta_chunk_width
5933 - meta_req_height[k];
5934 }
5935 if (meta_row_remainder <= meta_chunk_threshold) {
5936 meta_chunks_per_row_ub = meta_chunk_per_row_int + 1;
5937 } else {
5938 meta_chunks_per_row_ub = meta_chunk_per_row_int + 2;
5939 }
5940 TimePerMetaChunkNominal[k] = meta_row_height[k] / VRatio[k] * HTotal[k]
5941 / PixelClock[k] / meta_chunks_per_row_ub;
5942 TimePerMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k]
5943 * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
5944 TimePerMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k]
5945 * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
5946 } else {
5947 TimePerMetaChunkNominal[k] = 0;
5948 TimePerMetaChunkVBlank[k] = 0;
5949 TimePerMetaChunkFlip[k] = 0;
5950 }
5951 }
5952
5953 for (k = 0; k < NumberOfActivePlanes; ++k) {
5954 if (GPUVMEnable == true) {
5955 if (SourceScan[k] == dm_horz) {
5956 dpte_group_width_luma = dpte_group_bytes[k] / PTERequestSizeY[k]
5957 * PixelPTEReqWidthY[k];
5958 } else {
5959 dpte_group_width_luma = dpte_group_bytes[k] / PTERequestSizeY[k]
5960 * PixelPTEReqHeightY[k];
5961 }
5962 dpte_groups_per_row_luma_ub = dml_ceil(
5963 (float) dpte_row_width_luma_ub[k] / dpte_group_width_luma,
5964 1);
5965 time_per_pte_group_nom_luma[k] = DST_Y_PER_PTE_ROW_NOM_L[k] * HTotal[k]
5966 / PixelClock[k] / dpte_groups_per_row_luma_ub;
5967 time_per_pte_group_vblank_luma[k] = DestinationLinesToRequestRowInVBlank[k]
5968 * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
5969 time_per_pte_group_flip_luma[k] =
5970 DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k]
5971 / PixelClock[k]
5972 / dpte_groups_per_row_luma_ub;
5973 if (BytePerPixelDETC[k] == 0) {
5974 time_per_pte_group_nom_chroma[k] = 0;
5975 time_per_pte_group_vblank_chroma[k] = 0;
5976 time_per_pte_group_flip_chroma[k] = 0;
5977 } else {
5978 if (SourceScan[k] == dm_horz) {
5979 dpte_group_width_chroma = dpte_group_bytes[k]
5980 / PTERequestSizeC[k] * PixelPTEReqWidthC[k];
5981 } else {
5982 dpte_group_width_chroma = dpte_group_bytes[k]
5983 / PTERequestSizeC[k]
5984 * PixelPTEReqHeightC[k];
5985 }
5986 dpte_groups_per_row_chroma_ub = dml_ceil(
5987 (float) dpte_row_width_chroma_ub[k]
5988 / dpte_group_width_chroma,
5989 1);
5990 time_per_pte_group_nom_chroma[k] = DST_Y_PER_PTE_ROW_NOM_C[k]
5991 * HTotal[k] / PixelClock[k]
5992 / dpte_groups_per_row_chroma_ub;
5993 time_per_pte_group_vblank_chroma[k] =
5994 DestinationLinesToRequestRowInVBlank[k] * HTotal[k]
5995 / PixelClock[k]
5996 / dpte_groups_per_row_chroma_ub;
5997 time_per_pte_group_flip_chroma[k] =
5998 DestinationLinesToRequestRowInImmediateFlip[k]
5999 * HTotal[k] / PixelClock[k]
6000 / dpte_groups_per_row_chroma_ub;
6001 }
6002 } else {
6003 time_per_pte_group_nom_luma[k] = 0;
6004 time_per_pte_group_vblank_luma[k] = 0;
6005 time_per_pte_group_flip_luma[k] = 0;
6006 time_per_pte_group_nom_chroma[k] = 0;
6007 time_per_pte_group_vblank_chroma[k] = 0;
6008 time_per_pte_group_flip_chroma[k] = 0;
6009 }
6010 }
6011
6012 for (k = 0; k < NumberOfActivePlanes; ++k) {
6013 if (GPUVMEnable == true && (DCCEnable[k] == true || GPUVMMaxPageTableLevels > 1)) {
6014 if (DCCEnable[k] == false) {
6015 if (BytePerPixelDETC[k] > 0) {
6016 num_group_per_lower_vm_stage =
6017 dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6018 + dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1);
6019 } else {
6020 num_group_per_lower_vm_stage =
6021 dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1);
6022 }
6023 } else {
6024 if (GPUVMMaxPageTableLevels == 1) {
6025 if (BytePerPixelDETC[k] > 0) {
6026 num_group_per_lower_vm_stage =
6027 dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6028 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1);
6029 } else {
6030 num_group_per_lower_vm_stage =
6031 dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1);
6032 }
6033 } else {
6034 if (BytePerPixelDETC[k] > 0) {
6035 num_group_per_lower_vm_stage =
6036 dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6037 + dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1)
6038 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6039 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1);
6040 } else {
6041 num_group_per_lower_vm_stage =
6042 dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6043 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1);
6044 }
6045 }
6046 }
6047
6048 if (DCCEnable[k] == false) {
6049 if (BytePerPixelDETC[k] > 0) {
6050 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k]
6051 / 64 + dpde0_bytes_per_frame_ub_c[k] / 64;
6052 } else {
6053 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k]
6054 / 64;
6055 }
6056 } else {
6057 if (GPUVMMaxPageTableLevels == 1) {
6058 if (BytePerPixelDETC[k] > 0) {
6059 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64
6060 + meta_pte_bytes_per_frame_ub_c[k] / 64;
6061 } else {
6062 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64;
6063 }
6064 } else {
6065 if (BytePerPixelDETC[k] > 0) {
6066 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64
6067 + dpde0_bytes_per_frame_ub_c[k] / 64
6068 + meta_pte_bytes_per_frame_ub_l[k] / 64
6069 + meta_pte_bytes_per_frame_ub_c[k] / 64;
6070 } else {
6071 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64
6072 + meta_pte_bytes_per_frame_ub_l[k] / 64;
6073 }
6074 }
6075 }
6076
6077 TimePerVMGroupVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * HTotal[k]
6078 / PixelClock[k] / num_group_per_lower_vm_stage;
6079 TimePerVMGroupFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k]
6080 * HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage;
6081 TimePerVMRequestVBlank[k] = DestinationLinesToRequestVMInVBlank[k]
6082 * HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage;
6083 TimePerVMRequestFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k]
6084 * HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage;
6085
6086 if (GPUVMMaxPageTableLevels > 2) {
6087 TimePerVMGroupVBlank[k] = TimePerVMGroupVBlank[k] / 2;
6088 TimePerVMGroupFlip[k] = TimePerVMGroupFlip[k] / 2;
6089 TimePerVMRequestVBlank[k] = TimePerVMRequestVBlank[k] / 2;
6090 TimePerVMRequestFlip[k] = TimePerVMRequestFlip[k] / 2;
6091 }
6092
6093 } else {
6094 TimePerVMGroupVBlank[k] = 0;
6095 TimePerVMGroupFlip[k] = 0;
6096 TimePerVMRequestVBlank[k] = 0;
6097 TimePerVMRequestFlip[k] = 0;
6098 }
6099 }
6100 }
6101
CalculateExtraLatency(double UrgentRoundTripAndOutOfOrderLatency,int TotalNumberOfActiveDPP,int PixelChunkSizeInKByte,int TotalNumberOfDCCActiveDPP,int MetaChunkSize,double ReturnBW,bool GPUVMEnable,bool HostVMEnable,int NumberOfActivePlanes,int NumberOfDPP[],int dpte_group_bytes[],double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,int HostVMMaxPageTableLevels,int HostVMCachedPageTableLevels)6102 static double CalculateExtraLatency(
6103 double UrgentRoundTripAndOutOfOrderLatency,
6104 int TotalNumberOfActiveDPP,
6105 int PixelChunkSizeInKByte,
6106 int TotalNumberOfDCCActiveDPP,
6107 int MetaChunkSize,
6108 double ReturnBW,
6109 bool GPUVMEnable,
6110 bool HostVMEnable,
6111 int NumberOfActivePlanes,
6112 int NumberOfDPP[],
6113 int dpte_group_bytes[],
6114 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
6115 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
6116 int HostVMMaxPageTableLevels,
6117 int HostVMCachedPageTableLevels)
6118 {
6119 double CalculateExtraLatency;
6120 double HostVMInefficiencyFactor;
6121 int HostVMDynamicLevels;
6122
6123 if (GPUVMEnable && HostVMEnable) {
6124 HostVMInefficiencyFactor =
6125 PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData
6126 / PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly;
6127 HostVMDynamicLevels = HostVMMaxPageTableLevels - HostVMCachedPageTableLevels;
6128 } else {
6129 HostVMInefficiencyFactor = 1;
6130 HostVMDynamicLevels = 0;
6131 }
6132
6133 CalculateExtraLatency = UrgentRoundTripAndOutOfOrderLatency
6134 + (TotalNumberOfActiveDPP * PixelChunkSizeInKByte
6135 + TotalNumberOfDCCActiveDPP * MetaChunkSize) * 1024.0
6136 / ReturnBW;
6137
6138 if (GPUVMEnable) {
6139 int k;
6140
6141 for (k = 0; k < NumberOfActivePlanes; k++) {
6142 CalculateExtraLatency = CalculateExtraLatency
6143 + NumberOfDPP[k] * dpte_group_bytes[k]
6144 * (1 + 8 * HostVMDynamicLevels)
6145 * HostVMInefficiencyFactor / ReturnBW;
6146 }
6147 }
6148 return CalculateExtraLatency;
6149 }
6150
6151