1 // SPDX-License-Identifier: MIT
2 /*
3 * Copyright 2022 Advanced Micro Devices, Inc.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be included in
13 * all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
21 * OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * Authors: AMD
24 *
25 */
26
27 #include "dc.h"
28 #include "../display_mode_lib.h"
29 #include "display_mode_vba_314.h"
30 #include "../dml_inline_defs.h"
31
32 /*
33 * NOTE:
34 * This file is gcc-parsable HW gospel, coming straight from HW engineers.
35 *
36 * It doesn't adhere to Linux kernel style and sometimes will do things in odd
37 * ways. Unless there is something clearly wrong with it the code should
38 * remain as-is as it provides us with a guarantee from HW that it is correct.
39 */
40
41 #define BPP_INVALID 0
42 #define BPP_BLENDED_PIPE 0xffffffff
43 #define DCN314_MAX_DSC_IMAGE_WIDTH 5184
44 #define DCN314_MAX_FMT_420_BUFFER_WIDTH 4096
45
46 // For DML-C changes that hasn't been propagated to VBA yet
47 //#define __DML_VBA_ALLOW_DELTA__
48
49 // Move these to ip parameters/constant
50
51 // At which vstartup the DML start to try if the mode can be supported
52 #define __DML_VBA_MIN_VSTARTUP__ 9
53
54 // Delay in DCFCLK from ARB to DET (1st num is ARB to SDPIF, 2nd number is SDPIF to DET)
55 #define __DML_ARB_TO_RET_DELAY__ (7 + 95)
56
57 // fudge factor for min dcfclk calclation
58 #define __DML_MIN_DCFCLK_FACTOR__ 1.15
59
60 typedef struct {
61 double DPPCLK;
62 double DISPCLK;
63 double PixelClock;
64 double DCFCLKDeepSleep;
65 unsigned int DPPPerPlane;
66 bool ScalerEnabled;
67 double VRatio;
68 double VRatioChroma;
69 enum scan_direction_class SourceScan;
70 unsigned int BlockWidth256BytesY;
71 unsigned int BlockHeight256BytesY;
72 unsigned int BlockWidth256BytesC;
73 unsigned int BlockHeight256BytesC;
74 unsigned int InterlaceEnable;
75 unsigned int NumberOfCursors;
76 unsigned int VBlank;
77 unsigned int HTotal;
78 unsigned int DCCEnable;
79 bool ODMCombineIsEnabled;
80 enum source_format_class SourcePixelFormat;
81 int BytePerPixelY;
82 int BytePerPixelC;
83 bool ProgressiveToInterlaceUnitInOPP;
84 } Pipe;
85
86 #define BPP_INVALID 0
87 #define BPP_BLENDED_PIPE 0xffffffff
88
89 static bool CalculateBytePerPixelAnd256BBlockSizes(
90 enum source_format_class SourcePixelFormat,
91 enum dm_swizzle_mode SurfaceTiling,
92 unsigned int *BytePerPixelY,
93 unsigned int *BytePerPixelC,
94 double *BytePerPixelDETY,
95 double *BytePerPixelDETC,
96 unsigned int *BlockHeight256BytesY,
97 unsigned int *BlockHeight256BytesC,
98 unsigned int *BlockWidth256BytesY,
99 unsigned int *BlockWidth256BytesC);
100 static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib);
101 static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(struct display_mode_lib *mode_lib);
102 static unsigned int dscceComputeDelay(
103 unsigned int bpc,
104 double BPP,
105 unsigned int sliceWidth,
106 unsigned int numSlices,
107 enum output_format_class pixelFormat,
108 enum output_encoder_class Output);
109 static unsigned int dscComputeDelay(enum output_format_class pixelFormat, enum output_encoder_class Output);
110 static bool CalculatePrefetchSchedule(
111 struct display_mode_lib *mode_lib,
112 double HostVMInefficiencyFactor,
113 Pipe *myPipe,
114 unsigned int DSCDelay,
115 double DPPCLKDelaySubtotalPlusCNVCFormater,
116 double DPPCLKDelaySCL,
117 double DPPCLKDelaySCLLBOnly,
118 double DPPCLKDelayCNVCCursor,
119 double DISPCLKDelaySubtotal,
120 unsigned int DPP_RECOUT_WIDTH,
121 enum output_format_class OutputFormat,
122 unsigned int MaxInterDCNTileRepeaters,
123 unsigned int VStartup,
124 unsigned int MaxVStartup,
125 unsigned int GPUVMPageTableLevels,
126 bool GPUVMEnable,
127 bool HostVMEnable,
128 unsigned int HostVMMaxNonCachedPageTableLevels,
129 double HostVMMinPageSize,
130 bool DynamicMetadataEnable,
131 bool DynamicMetadataVMEnabled,
132 int DynamicMetadataLinesBeforeActiveRequired,
133 unsigned int DynamicMetadataTransmittedBytes,
134 double UrgentLatency,
135 double UrgentExtraLatency,
136 double TCalc,
137 unsigned int PDEAndMetaPTEBytesFrame,
138 unsigned int MetaRowByte,
139 unsigned int PixelPTEBytesPerRow,
140 double PrefetchSourceLinesY,
141 unsigned int SwathWidthY,
142 double VInitPreFillY,
143 unsigned int MaxNumSwathY,
144 double PrefetchSourceLinesC,
145 unsigned int SwathWidthC,
146 double VInitPreFillC,
147 unsigned int MaxNumSwathC,
148 int swath_width_luma_ub,
149 int swath_width_chroma_ub,
150 unsigned int SwathHeightY,
151 unsigned int SwathHeightC,
152 double TWait,
153 double *DSTXAfterScaler,
154 double *DSTYAfterScaler,
155 double *DestinationLinesForPrefetch,
156 double *PrefetchBandwidth,
157 double *DestinationLinesToRequestVMInVBlank,
158 double *DestinationLinesToRequestRowInVBlank,
159 double *VRatioPrefetchY,
160 double *VRatioPrefetchC,
161 double *RequiredPrefetchPixDataBWLuma,
162 double *RequiredPrefetchPixDataBWChroma,
163 bool *NotEnoughTimeForDynamicMetadata,
164 double *Tno_bw,
165 double *prefetch_vmrow_bw,
166 double *Tdmdl_vm,
167 double *Tdmdl,
168 double *TSetup,
169 int *VUpdateOffsetPix,
170 double *VUpdateWidthPix,
171 double *VReadyOffsetPix);
172 static double RoundToDFSGranularityUp(double Clock, double VCOSpeed);
173 static double RoundToDFSGranularityDown(double Clock, double VCOSpeed);
174 static void CalculateDCCConfiguration(
175 bool DCCEnabled,
176 bool DCCProgrammingAssumesScanDirectionUnknown,
177 enum source_format_class SourcePixelFormat,
178 unsigned int SurfaceWidthLuma,
179 unsigned int SurfaceWidthChroma,
180 unsigned int SurfaceHeightLuma,
181 unsigned int SurfaceHeightChroma,
182 double DETBufferSize,
183 unsigned int RequestHeight256ByteLuma,
184 unsigned int RequestHeight256ByteChroma,
185 enum dm_swizzle_mode TilingFormat,
186 unsigned int BytePerPixelY,
187 unsigned int BytePerPixelC,
188 double BytePerPixelDETY,
189 double BytePerPixelDETC,
190 enum scan_direction_class ScanOrientation,
191 unsigned int *MaxUncompressedBlockLuma,
192 unsigned int *MaxUncompressedBlockChroma,
193 unsigned int *MaxCompressedBlockLuma,
194 unsigned int *MaxCompressedBlockChroma,
195 unsigned int *IndependentBlockLuma,
196 unsigned int *IndependentBlockChroma);
197 static double CalculatePrefetchSourceLines(
198 struct display_mode_lib *mode_lib,
199 double VRatio,
200 double vtaps,
201 bool Interlace,
202 bool ProgressiveToInterlaceUnitInOPP,
203 unsigned int SwathHeight,
204 unsigned int ViewportYStart,
205 double *VInitPreFill,
206 unsigned int *MaxNumSwath);
207 static unsigned int CalculateVMAndRowBytes(
208 struct display_mode_lib *mode_lib,
209 bool DCCEnable,
210 unsigned int BlockHeight256Bytes,
211 unsigned int BlockWidth256Bytes,
212 enum source_format_class SourcePixelFormat,
213 unsigned int SurfaceTiling,
214 unsigned int BytePerPixel,
215 enum scan_direction_class ScanDirection,
216 unsigned int SwathWidth,
217 unsigned int ViewportHeight,
218 bool GPUVMEnable,
219 bool HostVMEnable,
220 unsigned int HostVMMaxNonCachedPageTableLevels,
221 unsigned int GPUVMMinPageSize,
222 unsigned int HostVMMinPageSize,
223 unsigned int PTEBufferSizeInRequests,
224 unsigned int Pitch,
225 unsigned int DCCMetaPitch,
226 unsigned int *MacroTileWidth,
227 unsigned int *MetaRowByte,
228 unsigned int *PixelPTEBytesPerRow,
229 bool *PTEBufferSizeNotExceeded,
230 int *dpte_row_width_ub,
231 unsigned int *dpte_row_height,
232 unsigned int *MetaRequestWidth,
233 unsigned int *MetaRequestHeight,
234 unsigned int *meta_row_width,
235 unsigned int *meta_row_height,
236 int *vm_group_bytes,
237 unsigned int *dpte_group_bytes,
238 unsigned int *PixelPTEReqWidth,
239 unsigned int *PixelPTEReqHeight,
240 unsigned int *PTERequestSize,
241 int *DPDE0BytesFrame,
242 int *MetaPTEBytesFrame);
243 static double CalculateTWait(unsigned int PrefetchMode, double DRAMClockChangeLatency, double UrgentLatency, double SREnterPlusExitTime);
244 static void CalculateRowBandwidth(
245 bool GPUVMEnable,
246 enum source_format_class SourcePixelFormat,
247 double VRatio,
248 double VRatioChroma,
249 bool DCCEnable,
250 double LineTime,
251 unsigned int MetaRowByteLuma,
252 unsigned int MetaRowByteChroma,
253 unsigned int meta_row_height_luma,
254 unsigned int meta_row_height_chroma,
255 unsigned int PixelPTEBytesPerRowLuma,
256 unsigned int PixelPTEBytesPerRowChroma,
257 unsigned int dpte_row_height_luma,
258 unsigned int dpte_row_height_chroma,
259 double *meta_row_bw,
260 double *dpte_row_bw);
261
262 static void CalculateFlipSchedule(
263 struct display_mode_lib *mode_lib,
264 unsigned int k,
265 double HostVMInefficiencyFactor,
266 double UrgentExtraLatency,
267 double UrgentLatency,
268 double PDEAndMetaPTEBytesPerFrame,
269 double MetaRowBytes,
270 double DPTEBytesPerRow);
271 static double CalculateWriteBackDelay(
272 enum source_format_class WritebackPixelFormat,
273 double WritebackHRatio,
274 double WritebackVRatio,
275 unsigned int WritebackVTaps,
276 int WritebackDestinationWidth,
277 int WritebackDestinationHeight,
278 int WritebackSourceHeight,
279 unsigned int HTotal);
280
281 static void CalculateVupdateAndDynamicMetadataParameters(
282 int MaxInterDCNTileRepeaters,
283 double DPPCLK,
284 double DISPCLK,
285 double DCFClkDeepSleep,
286 double PixelClock,
287 int HTotal,
288 int VBlank,
289 int DynamicMetadataTransmittedBytes,
290 int DynamicMetadataLinesBeforeActiveRequired,
291 int InterlaceEnable,
292 bool ProgressiveToInterlaceUnitInOPP,
293 double *TSetup,
294 double *Tdmbf,
295 double *Tdmec,
296 double *Tdmsks,
297 int *VUpdateOffsetPix,
298 double *VUpdateWidthPix,
299 double *VReadyOffsetPix);
300
301 static void CalculateWatermarksAndDRAMSpeedChangeSupport(
302 struct display_mode_lib *mode_lib,
303 unsigned int PrefetchMode,
304 double DCFCLK,
305 double ReturnBW,
306 double UrgentLatency,
307 double ExtraLatency,
308 double SOCCLK,
309 double DCFCLKDeepSleep,
310 unsigned int DETBufferSizeY[],
311 unsigned int DETBufferSizeC[],
312 unsigned int SwathHeightY[],
313 unsigned int SwathHeightC[],
314 double SwathWidthY[],
315 double SwathWidthC[],
316 unsigned int DPPPerPlane[],
317 double BytePerPixelDETY[],
318 double BytePerPixelDETC[],
319 bool UnboundedRequestEnabled,
320 unsigned int CompressedBufferSizeInkByte,
321 enum clock_change_support *DRAMClockChangeSupport,
322 double *StutterExitWatermark,
323 double *StutterEnterPlusExitWatermark,
324 double *Z8StutterExitWatermark,
325 double *Z8StutterEnterPlusExitWatermark);
326
327 static void CalculateDCFCLKDeepSleep(
328 struct display_mode_lib *mode_lib,
329 unsigned int NumberOfActivePlanes,
330 int BytePerPixelY[],
331 int BytePerPixelC[],
332 double VRatio[],
333 double VRatioChroma[],
334 double SwathWidthY[],
335 double SwathWidthC[],
336 unsigned int DPPPerPlane[],
337 double HRatio[],
338 double HRatioChroma[],
339 double PixelClock[],
340 double PSCL_THROUGHPUT[],
341 double PSCL_THROUGHPUT_CHROMA[],
342 double DPPCLK[],
343 double ReadBandwidthLuma[],
344 double ReadBandwidthChroma[],
345 int ReturnBusWidth,
346 double *DCFCLKDeepSleep);
347
348 static void CalculateUrgentBurstFactor(
349 int swath_width_luma_ub,
350 int swath_width_chroma_ub,
351 unsigned int SwathHeightY,
352 unsigned int SwathHeightC,
353 double LineTime,
354 double UrgentLatency,
355 double CursorBufferSize,
356 unsigned int CursorWidth,
357 unsigned int CursorBPP,
358 double VRatio,
359 double VRatioC,
360 double BytePerPixelInDETY,
361 double BytePerPixelInDETC,
362 double DETBufferSizeY,
363 double DETBufferSizeC,
364 double *UrgentBurstFactorCursor,
365 double *UrgentBurstFactorLuma,
366 double *UrgentBurstFactorChroma,
367 bool *NotEnoughUrgentLatencyHiding);
368
369 static void UseMinimumDCFCLK(
370 struct display_mode_lib *mode_lib,
371 int MaxPrefetchMode,
372 int ReorderingBytes);
373
374 static void CalculatePixelDeliveryTimes(
375 unsigned int NumberOfActivePlanes,
376 double VRatio[],
377 double VRatioChroma[],
378 double VRatioPrefetchY[],
379 double VRatioPrefetchC[],
380 unsigned int swath_width_luma_ub[],
381 unsigned int swath_width_chroma_ub[],
382 unsigned int DPPPerPlane[],
383 double HRatio[],
384 double HRatioChroma[],
385 double PixelClock[],
386 double PSCL_THROUGHPUT[],
387 double PSCL_THROUGHPUT_CHROMA[],
388 double DPPCLK[],
389 int BytePerPixelC[],
390 enum scan_direction_class SourceScan[],
391 unsigned int NumberOfCursors[],
392 unsigned int CursorWidth[][DC__NUM_CURSOR__MAX],
393 unsigned int CursorBPP[][DC__NUM_CURSOR__MAX],
394 unsigned int BlockWidth256BytesY[],
395 unsigned int BlockHeight256BytesY[],
396 unsigned int BlockWidth256BytesC[],
397 unsigned int BlockHeight256BytesC[],
398 double DisplayPipeLineDeliveryTimeLuma[],
399 double DisplayPipeLineDeliveryTimeChroma[],
400 double DisplayPipeLineDeliveryTimeLumaPrefetch[],
401 double DisplayPipeLineDeliveryTimeChromaPrefetch[],
402 double DisplayPipeRequestDeliveryTimeLuma[],
403 double DisplayPipeRequestDeliveryTimeChroma[],
404 double DisplayPipeRequestDeliveryTimeLumaPrefetch[],
405 double DisplayPipeRequestDeliveryTimeChromaPrefetch[],
406 double CursorRequestDeliveryTime[],
407 double CursorRequestDeliveryTimePrefetch[]);
408
409 static void CalculateMetaAndPTETimes(
410 int NumberOfActivePlanes,
411 bool GPUVMEnable,
412 int MetaChunkSize,
413 int MinMetaChunkSizeBytes,
414 int HTotal[],
415 double VRatio[],
416 double VRatioChroma[],
417 double DestinationLinesToRequestRowInVBlank[],
418 double DestinationLinesToRequestRowInImmediateFlip[],
419 bool DCCEnable[],
420 double PixelClock[],
421 int BytePerPixelY[],
422 int BytePerPixelC[],
423 enum scan_direction_class SourceScan[],
424 int dpte_row_height[],
425 int dpte_row_height_chroma[],
426 int meta_row_width[],
427 int meta_row_width_chroma[],
428 int meta_row_height[],
429 int meta_row_height_chroma[],
430 int meta_req_width[],
431 int meta_req_width_chroma[],
432 int meta_req_height[],
433 int meta_req_height_chroma[],
434 int dpte_group_bytes[],
435 int PTERequestSizeY[],
436 int PTERequestSizeC[],
437 int PixelPTEReqWidthY[],
438 int PixelPTEReqHeightY[],
439 int PixelPTEReqWidthC[],
440 int PixelPTEReqHeightC[],
441 int dpte_row_width_luma_ub[],
442 int dpte_row_width_chroma_ub[],
443 double DST_Y_PER_PTE_ROW_NOM_L[],
444 double DST_Y_PER_PTE_ROW_NOM_C[],
445 double DST_Y_PER_META_ROW_NOM_L[],
446 double DST_Y_PER_META_ROW_NOM_C[],
447 double TimePerMetaChunkNominal[],
448 double TimePerChromaMetaChunkNominal[],
449 double TimePerMetaChunkVBlank[],
450 double TimePerChromaMetaChunkVBlank[],
451 double TimePerMetaChunkFlip[],
452 double TimePerChromaMetaChunkFlip[],
453 double time_per_pte_group_nom_luma[],
454 double time_per_pte_group_vblank_luma[],
455 double time_per_pte_group_flip_luma[],
456 double time_per_pte_group_nom_chroma[],
457 double time_per_pte_group_vblank_chroma[],
458 double time_per_pte_group_flip_chroma[]);
459
460 static void CalculateVMGroupAndRequestTimes(
461 unsigned int NumberOfActivePlanes,
462 bool GPUVMEnable,
463 unsigned int GPUVMMaxPageTableLevels,
464 unsigned int HTotal[],
465 int BytePerPixelC[],
466 double DestinationLinesToRequestVMInVBlank[],
467 double DestinationLinesToRequestVMInImmediateFlip[],
468 bool DCCEnable[],
469 double PixelClock[],
470 int dpte_row_width_luma_ub[],
471 int dpte_row_width_chroma_ub[],
472 int vm_group_bytes[],
473 unsigned int dpde0_bytes_per_frame_ub_l[],
474 unsigned int dpde0_bytes_per_frame_ub_c[],
475 int meta_pte_bytes_per_frame_ub_l[],
476 int meta_pte_bytes_per_frame_ub_c[],
477 double TimePerVMGroupVBlank[],
478 double TimePerVMGroupFlip[],
479 double TimePerVMRequestVBlank[],
480 double TimePerVMRequestFlip[]);
481
482 static void CalculateStutterEfficiency(
483 struct display_mode_lib *mode_lib,
484 int CompressedBufferSizeInkByte,
485 bool UnboundedRequestEnabled,
486 int ConfigReturnBufferSizeInKByte,
487 int MetaFIFOSizeInKEntries,
488 int ZeroSizeBufferEntries,
489 int NumberOfActivePlanes,
490 int ROBBufferSizeInKByte,
491 double TotalDataReadBandwidth,
492 double DCFCLK,
493 double ReturnBW,
494 double COMPBUF_RESERVED_SPACE_64B,
495 double COMPBUF_RESERVED_SPACE_ZS,
496 double SRExitTime,
497 double SRExitZ8Time,
498 bool SynchronizedVBlank,
499 double Z8StutterEnterPlusExitWatermark,
500 double StutterEnterPlusExitWatermark,
501 bool ProgressiveToInterlaceUnitInOPP,
502 bool Interlace[],
503 double MinTTUVBlank[],
504 int DPPPerPlane[],
505 unsigned int DETBufferSizeY[],
506 int BytePerPixelY[],
507 double BytePerPixelDETY[],
508 double SwathWidthY[],
509 int SwathHeightY[],
510 int SwathHeightC[],
511 double NetDCCRateLuma[],
512 double NetDCCRateChroma[],
513 double DCCFractionOfZeroSizeRequestsLuma[],
514 double DCCFractionOfZeroSizeRequestsChroma[],
515 int HTotal[],
516 int VTotal[],
517 double PixelClock[],
518 double VRatio[],
519 enum scan_direction_class SourceScan[],
520 int BlockHeight256BytesY[],
521 int BlockWidth256BytesY[],
522 int BlockHeight256BytesC[],
523 int BlockWidth256BytesC[],
524 int DCCYMaxUncompressedBlock[],
525 int DCCCMaxUncompressedBlock[],
526 int VActive[],
527 bool DCCEnable[],
528 bool WritebackEnable[],
529 double ReadBandwidthPlaneLuma[],
530 double ReadBandwidthPlaneChroma[],
531 double meta_row_bw[],
532 double dpte_row_bw[],
533 double *StutterEfficiencyNotIncludingVBlank,
534 double *StutterEfficiency,
535 int *NumberOfStutterBurstsPerFrame,
536 double *Z8StutterEfficiencyNotIncludingVBlank,
537 double *Z8StutterEfficiency,
538 int *Z8NumberOfStutterBurstsPerFrame,
539 double *StutterPeriod);
540
541 static void CalculateSwathAndDETConfiguration(
542 bool ForceSingleDPP,
543 int NumberOfActivePlanes,
544 unsigned int DETBufferSizeInKByte,
545 double MaximumSwathWidthLuma[],
546 double MaximumSwathWidthChroma[],
547 enum scan_direction_class SourceScan[],
548 enum source_format_class SourcePixelFormat[],
549 enum dm_swizzle_mode SurfaceTiling[],
550 int ViewportWidth[],
551 int ViewportHeight[],
552 int SurfaceWidthY[],
553 int SurfaceWidthC[],
554 int SurfaceHeightY[],
555 int SurfaceHeightC[],
556 int Read256BytesBlockHeightY[],
557 int Read256BytesBlockHeightC[],
558 int Read256BytesBlockWidthY[],
559 int Read256BytesBlockWidthC[],
560 enum odm_combine_mode ODMCombineEnabled[],
561 int BlendingAndTiming[],
562 int BytePerPixY[],
563 int BytePerPixC[],
564 double BytePerPixDETY[],
565 double BytePerPixDETC[],
566 int HActive[],
567 double HRatio[],
568 double HRatioChroma[],
569 int DPPPerPlane[],
570 int swath_width_luma_ub[],
571 int swath_width_chroma_ub[],
572 double SwathWidth[],
573 double SwathWidthChroma[],
574 int SwathHeightY[],
575 int SwathHeightC[],
576 unsigned int DETBufferSizeY[],
577 unsigned int DETBufferSizeC[],
578 bool ViewportSizeSupportPerPlane[],
579 bool *ViewportSizeSupport);
580 static void CalculateSwathWidth(
581 bool ForceSingleDPP,
582 int NumberOfActivePlanes,
583 enum source_format_class SourcePixelFormat[],
584 enum scan_direction_class SourceScan[],
585 int ViewportWidth[],
586 int ViewportHeight[],
587 int SurfaceWidthY[],
588 int SurfaceWidthC[],
589 int SurfaceHeightY[],
590 int SurfaceHeightC[],
591 enum odm_combine_mode ODMCombineEnabled[],
592 int BytePerPixY[],
593 int BytePerPixC[],
594 int Read256BytesBlockHeightY[],
595 int Read256BytesBlockHeightC[],
596 int Read256BytesBlockWidthY[],
597 int Read256BytesBlockWidthC[],
598 int BlendingAndTiming[],
599 int HActive[],
600 double HRatio[],
601 int DPPPerPlane[],
602 double SwathWidthSingleDPPY[],
603 double SwathWidthSingleDPPC[],
604 double SwathWidthY[],
605 double SwathWidthC[],
606 int MaximumSwathHeightY[],
607 int MaximumSwathHeightC[],
608 int swath_width_luma_ub[],
609 int swath_width_chroma_ub[]);
610
611 static double CalculateExtraLatency(
612 int RoundTripPingLatencyCycles,
613 int ReorderingBytes,
614 double DCFCLK,
615 int TotalNumberOfActiveDPP,
616 int PixelChunkSizeInKByte,
617 int TotalNumberOfDCCActiveDPP,
618 int MetaChunkSize,
619 double ReturnBW,
620 bool GPUVMEnable,
621 bool HostVMEnable,
622 int NumberOfActivePlanes,
623 int NumberOfDPP[],
624 int dpte_group_bytes[],
625 double HostVMInefficiencyFactor,
626 double HostVMMinPageSize,
627 int HostVMMaxNonCachedPageTableLevels);
628
629 static double CalculateExtraLatencyBytes(
630 int ReorderingBytes,
631 int TotalNumberOfActiveDPP,
632 int PixelChunkSizeInKByte,
633 int TotalNumberOfDCCActiveDPP,
634 int MetaChunkSize,
635 bool GPUVMEnable,
636 bool HostVMEnable,
637 int NumberOfActivePlanes,
638 int NumberOfDPP[],
639 int dpte_group_bytes[],
640 double HostVMInefficiencyFactor,
641 double HostVMMinPageSize,
642 int HostVMMaxNonCachedPageTableLevels);
643
644 static double CalculateUrgentLatency(
645 double UrgentLatencyPixelDataOnly,
646 double UrgentLatencyPixelMixedWithVMData,
647 double UrgentLatencyVMDataOnly,
648 bool DoUrgentLatencyAdjustment,
649 double UrgentLatencyAdjustmentFabricClockComponent,
650 double UrgentLatencyAdjustmentFabricClockReference,
651 double FabricClockSingle);
652
653 static void CalculateUnboundedRequestAndCompressedBufferSize(
654 unsigned int DETBufferSizeInKByte,
655 int ConfigReturnBufferSizeInKByte,
656 enum unbounded_requesting_policy UseUnboundedRequestingFinal,
657 int TotalActiveDPP,
658 bool NoChromaPlanes,
659 int MaxNumDPP,
660 int CompressedBufferSegmentSizeInkByteFinal,
661 enum output_encoder_class *Output,
662 bool *UnboundedRequestEnabled,
663 int *CompressedBufferSizeInkByte);
664
665 static bool UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal, int TotalNumberOfActiveDPP, bool NoChroma, enum output_encoder_class Output);
666 static unsigned int CalculateMaxVStartup(
667 unsigned int VTotal,
668 unsigned int VActive,
669 unsigned int VBlankNom,
670 unsigned int HTotal,
671 double PixelClock,
672 bool ProgressiveTointerlaceUnitinOPP,
673 bool Interlace,
674 unsigned int VBlankNomDefaultUS,
675 double WritebackDelayTime);
676
dml314_recalculate(struct display_mode_lib * mode_lib)677 void dml314_recalculate(struct display_mode_lib *mode_lib)
678 {
679 ModeSupportAndSystemConfiguration(mode_lib);
680 PixelClockAdjustmentForProgressiveToInterlaceUnit(mode_lib);
681 DisplayPipeConfiguration(mode_lib);
682 #ifdef __DML_VBA_DEBUG__
683 dml_print("DML::%s: Calling DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation\n", __func__);
684 #endif
685 DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(mode_lib);
686 }
687
dscceComputeDelay(unsigned int bpc,double BPP,unsigned int sliceWidth,unsigned int numSlices,enum output_format_class pixelFormat,enum output_encoder_class Output)688 static unsigned int dscceComputeDelay(
689 unsigned int bpc,
690 double BPP,
691 unsigned int sliceWidth,
692 unsigned int numSlices,
693 enum output_format_class pixelFormat,
694 enum output_encoder_class Output)
695 {
696 // valid bpc = source bits per component in the set of {8, 10, 12}
697 // valid bpp = increments of 1/16 of a bit
698 // min = 6/7/8 in N420/N422/444, respectively
699 // max = such that compression is 1:1
700 //valid sliceWidth = number of pixels per slice line, must be less than or equal to 5184/numSlices (or 4096/numSlices in 420 mode)
701 //valid numSlices = number of slices in the horiziontal direction per DSC engine in the set of {1, 2, 3, 4}
702 //valid pixelFormat = pixel/color format in the set of {:N444_RGB, :S422, :N422, :N420}
703
704 // fixed value
705 unsigned int rcModelSize = 8192;
706
707 // N422/N420 operate at 2 pixels per clock
708 unsigned int pixelsPerClock = 0, lstall, D, initalXmitDelay, w, s, ix, wx, P, l0, a, ax, L, Delay, pixels;
709
710 if (pixelFormat == dm_420)
711 pixelsPerClock = 2;
712 else if (pixelFormat == dm_444)
713 pixelsPerClock = 1;
714 else if (pixelFormat == dm_n422)
715 pixelsPerClock = 2;
716 // #all other modes operate at 1 pixel per clock
717 else
718 pixelsPerClock = 1;
719
720 //initial transmit delay as per PPS
721 initalXmitDelay = dml_round(rcModelSize / 2.0 / BPP / pixelsPerClock);
722
723 //compute ssm delay
724 if (bpc == 8)
725 D = 81;
726 else if (bpc == 10)
727 D = 89;
728 else
729 D = 113;
730
731 //divide by pixel per cycle to compute slice width as seen by DSC
732 w = sliceWidth / pixelsPerClock;
733
734 //422 mode has an additional cycle of delay
735 if (pixelFormat == dm_420 || pixelFormat == dm_444 || pixelFormat == dm_n422)
736 s = 0;
737 else
738 s = 1;
739
740 //main calculation for the dscce
741 ix = initalXmitDelay + 45;
742 wx = (w + 2) / 3;
743 P = 3 * wx - w;
744 l0 = ix / w;
745 a = ix + P * l0;
746 ax = (a + 2) / 3 + D + 6 + 1;
747 L = (ax + wx - 1) / wx;
748 if ((ix % w) == 0 && P != 0)
749 lstall = 1;
750 else
751 lstall = 0;
752 Delay = L * wx * (numSlices - 1) + ax + s + lstall + 22;
753
754 //dsc processes 3 pixel containers per cycle and a container can contain 1 or 2 pixels
755 pixels = Delay * 3 * pixelsPerClock;
756 return pixels;
757 }
758
dscComputeDelay(enum output_format_class pixelFormat,enum output_encoder_class Output)759 static unsigned int dscComputeDelay(enum output_format_class pixelFormat, enum output_encoder_class Output)
760 {
761 unsigned int Delay = 0;
762
763 if (pixelFormat == dm_420) {
764 // sfr
765 Delay = Delay + 2;
766 // dsccif
767 Delay = Delay + 0;
768 // dscc - input deserializer
769 Delay = Delay + 3;
770 // dscc gets pixels every other cycle
771 Delay = Delay + 2;
772 // dscc - input cdc fifo
773 Delay = Delay + 12;
774 // dscc gets pixels every other cycle
775 Delay = Delay + 13;
776 // dscc - cdc uncertainty
777 Delay = Delay + 2;
778 // dscc - output cdc fifo
779 Delay = Delay + 7;
780 // dscc gets pixels every other cycle
781 Delay = Delay + 3;
782 // dscc - cdc uncertainty
783 Delay = Delay + 2;
784 // dscc - output serializer
785 Delay = Delay + 1;
786 // sft
787 Delay = Delay + 1;
788 } else if (pixelFormat == dm_n422) {
789 // sfr
790 Delay = Delay + 2;
791 // dsccif
792 Delay = Delay + 1;
793 // dscc - input deserializer
794 Delay = Delay + 5;
795 // dscc - input cdc fifo
796 Delay = Delay + 25;
797 // dscc - cdc uncertainty
798 Delay = Delay + 2;
799 // dscc - output cdc fifo
800 Delay = Delay + 10;
801 // dscc - cdc uncertainty
802 Delay = Delay + 2;
803 // dscc - output serializer
804 Delay = Delay + 1;
805 // sft
806 Delay = Delay + 1;
807 } else {
808 // sfr
809 Delay = Delay + 2;
810 // dsccif
811 Delay = Delay + 0;
812 // dscc - input deserializer
813 Delay = Delay + 3;
814 // dscc - input cdc fifo
815 Delay = Delay + 12;
816 // dscc - cdc uncertainty
817 Delay = Delay + 2;
818 // dscc - output cdc fifo
819 Delay = Delay + 7;
820 // dscc - output serializer
821 Delay = Delay + 1;
822 // dscc - cdc uncertainty
823 Delay = Delay + 2;
824 // sft
825 Delay = Delay + 1;
826 }
827
828 return Delay;
829 }
830
CalculatePrefetchSchedule(struct display_mode_lib * mode_lib,double HostVMInefficiencyFactor,Pipe * myPipe,unsigned int DSCDelay,double DPPCLKDelaySubtotalPlusCNVCFormater,double DPPCLKDelaySCL,double DPPCLKDelaySCLLBOnly,double DPPCLKDelayCNVCCursor,double DISPCLKDelaySubtotal,unsigned int DPP_RECOUT_WIDTH,enum output_format_class OutputFormat,unsigned int MaxInterDCNTileRepeaters,unsigned int VStartup,unsigned int MaxVStartup,unsigned int GPUVMPageTableLevels,bool GPUVMEnable,bool HostVMEnable,unsigned int HostVMMaxNonCachedPageTableLevels,double HostVMMinPageSize,bool DynamicMetadataEnable,bool DynamicMetadataVMEnabled,int DynamicMetadataLinesBeforeActiveRequired,unsigned int DynamicMetadataTransmittedBytes,double UrgentLatency,double UrgentExtraLatency,double TCalc,unsigned int PDEAndMetaPTEBytesFrame,unsigned int MetaRowByte,unsigned int PixelPTEBytesPerRow,double PrefetchSourceLinesY,unsigned int SwathWidthY,double VInitPreFillY,unsigned int MaxNumSwathY,double PrefetchSourceLinesC,unsigned int SwathWidthC,double VInitPreFillC,unsigned int MaxNumSwathC,int swath_width_luma_ub,int swath_width_chroma_ub,unsigned int SwathHeightY,unsigned int SwathHeightC,double TWait,double * DSTXAfterScaler,double * DSTYAfterScaler,double * DestinationLinesForPrefetch,double * PrefetchBandwidth,double * DestinationLinesToRequestVMInVBlank,double * DestinationLinesToRequestRowInVBlank,double * VRatioPrefetchY,double * VRatioPrefetchC,double * RequiredPrefetchPixDataBWLuma,double * RequiredPrefetchPixDataBWChroma,bool * NotEnoughTimeForDynamicMetadata,double * Tno_bw,double * prefetch_vmrow_bw,double * Tdmdl_vm,double * Tdmdl,double * TSetup,int * VUpdateOffsetPix,double * VUpdateWidthPix,double * VReadyOffsetPix)831 static bool CalculatePrefetchSchedule(
832 struct display_mode_lib *mode_lib,
833 double HostVMInefficiencyFactor,
834 Pipe *myPipe,
835 unsigned int DSCDelay,
836 double DPPCLKDelaySubtotalPlusCNVCFormater,
837 double DPPCLKDelaySCL,
838 double DPPCLKDelaySCLLBOnly,
839 double DPPCLKDelayCNVCCursor,
840 double DISPCLKDelaySubtotal,
841 unsigned int DPP_RECOUT_WIDTH,
842 enum output_format_class OutputFormat,
843 unsigned int MaxInterDCNTileRepeaters,
844 unsigned int VStartup,
845 unsigned int MaxVStartup,
846 unsigned int GPUVMPageTableLevels,
847 bool GPUVMEnable,
848 bool HostVMEnable,
849 unsigned int HostVMMaxNonCachedPageTableLevels,
850 double HostVMMinPageSize,
851 bool DynamicMetadataEnable,
852 bool DynamicMetadataVMEnabled,
853 int DynamicMetadataLinesBeforeActiveRequired,
854 unsigned int DynamicMetadataTransmittedBytes,
855 double UrgentLatency,
856 double UrgentExtraLatency,
857 double TCalc,
858 unsigned int PDEAndMetaPTEBytesFrame,
859 unsigned int MetaRowByte,
860 unsigned int PixelPTEBytesPerRow,
861 double PrefetchSourceLinesY,
862 unsigned int SwathWidthY,
863 double VInitPreFillY,
864 unsigned int MaxNumSwathY,
865 double PrefetchSourceLinesC,
866 unsigned int SwathWidthC,
867 double VInitPreFillC,
868 unsigned int MaxNumSwathC,
869 int swath_width_luma_ub,
870 int swath_width_chroma_ub,
871 unsigned int SwathHeightY,
872 unsigned int SwathHeightC,
873 double TWait,
874 double *DSTXAfterScaler,
875 double *DSTYAfterScaler,
876 double *DestinationLinesForPrefetch,
877 double *PrefetchBandwidth,
878 double *DestinationLinesToRequestVMInVBlank,
879 double *DestinationLinesToRequestRowInVBlank,
880 double *VRatioPrefetchY,
881 double *VRatioPrefetchC,
882 double *RequiredPrefetchPixDataBWLuma,
883 double *RequiredPrefetchPixDataBWChroma,
884 bool *NotEnoughTimeForDynamicMetadata,
885 double *Tno_bw,
886 double *prefetch_vmrow_bw,
887 double *Tdmdl_vm,
888 double *Tdmdl,
889 double *TSetup,
890 int *VUpdateOffsetPix,
891 double *VUpdateWidthPix,
892 double *VReadyOffsetPix)
893 {
894 bool MyError = false;
895 unsigned int DPPCycles, DISPCLKCycles;
896 double DSTTotalPixelsAfterScaler;
897 double LineTime;
898 double dst_y_prefetch_equ;
899 #ifdef __DML_VBA_DEBUG__
900 double Tsw_oto;
901 #endif
902 double prefetch_bw_oto;
903 double prefetch_bw_pr;
904 double Tvm_oto;
905 double Tr0_oto;
906 double Tvm_oto_lines;
907 double Tr0_oto_lines;
908 double dst_y_prefetch_oto;
909 double TimeForFetchingMetaPTE = 0;
910 double TimeForFetchingRowInVBlank = 0;
911 double LinesToRequestPrefetchPixelData = 0;
912 unsigned int HostVMDynamicLevelsTrips;
913 double trip_to_mem;
914 double Tvm_trips;
915 double Tr0_trips;
916 double Tvm_trips_rounded;
917 double Tr0_trips_rounded;
918 double Lsw_oto;
919 double Tpre_rounded;
920 double prefetch_bw_equ;
921 double Tvm_equ;
922 double Tr0_equ;
923 double Tdmbf;
924 double Tdmec;
925 double Tdmsks;
926 double prefetch_sw_bytes;
927 double bytes_pp;
928 double dep_bytes;
929 int max_vratio_pre = 4;
930 double min_Lsw;
931 double Tsw_est1 = 0;
932 double Tsw_est3 = 0;
933 double max_Tsw = 0;
934
935 if (GPUVMEnable == true && HostVMEnable == true) {
936 HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels;
937 } else {
938 HostVMDynamicLevelsTrips = 0;
939 }
940 #ifdef __DML_VBA_DEBUG__
941 dml_print("DML::%s: GPUVMEnable=%d HostVMEnable=%d HostVMInefficiencyFactor=%f\n", __func__, GPUVMEnable, HostVMEnable, HostVMInefficiencyFactor);
942 #endif
943 CalculateVupdateAndDynamicMetadataParameters(
944 MaxInterDCNTileRepeaters,
945 myPipe->DPPCLK,
946 myPipe->DISPCLK,
947 myPipe->DCFCLKDeepSleep,
948 myPipe->PixelClock,
949 myPipe->HTotal,
950 myPipe->VBlank,
951 DynamicMetadataTransmittedBytes,
952 DynamicMetadataLinesBeforeActiveRequired,
953 myPipe->InterlaceEnable,
954 myPipe->ProgressiveToInterlaceUnitInOPP,
955 TSetup,
956 &Tdmbf,
957 &Tdmec,
958 &Tdmsks,
959 VUpdateOffsetPix,
960 VUpdateWidthPix,
961 VReadyOffsetPix);
962
963 LineTime = myPipe->HTotal / myPipe->PixelClock;
964 trip_to_mem = UrgentLatency;
965 Tvm_trips = UrgentExtraLatency + trip_to_mem * (GPUVMPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1);
966
967 #ifdef __DML_VBA_ALLOW_DELTA__
968 if (DynamicMetadataVMEnabled == true && GPUVMEnable == true) {
969 #else
970 if (DynamicMetadataVMEnabled == true) {
971 #endif
972 *Tdmdl = TWait + Tvm_trips + trip_to_mem;
973 } else {
974 *Tdmdl = TWait + UrgentExtraLatency;
975 }
976
977 #ifdef __DML_VBA_ALLOW_DELTA__
978 if (DynamicMetadataEnable == false) {
979 *Tdmdl = 0.0;
980 }
981 #endif
982
983 if (DynamicMetadataEnable == true) {
984 if (VStartup * LineTime < *TSetup + *Tdmdl + Tdmbf + Tdmec + Tdmsks) {
985 *NotEnoughTimeForDynamicMetadata = true;
986 dml_print("DML::%s: Not Enough Time for Dynamic Meta!\n", __func__);
987 dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, Tdmbf);
988 dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, Tdmec);
989 dml_print("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", __func__, Tdmsks);
990 dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd\n", __func__, *Tdmdl);
991 } else {
992 *NotEnoughTimeForDynamicMetadata = false;
993 }
994 } else {
995 *NotEnoughTimeForDynamicMetadata = false;
996 }
997
998 *Tdmdl_vm = (DynamicMetadataEnable == true && DynamicMetadataVMEnabled == true && GPUVMEnable == true ? TWait + Tvm_trips : 0);
999
1000 if (myPipe->ScalerEnabled)
1001 DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCL;
1002 else
1003 DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCLLBOnly;
1004
1005 DPPCycles = DPPCycles + myPipe->NumberOfCursors * DPPCLKDelayCNVCCursor;
1006
1007 DISPCLKCycles = DISPCLKDelaySubtotal;
1008
1009 if (myPipe->DPPCLK == 0.0 || myPipe->DISPCLK == 0.0)
1010 return true;
1011
1012 *DSTXAfterScaler = DPPCycles * myPipe->PixelClock / myPipe->DPPCLK + DISPCLKCycles * myPipe->PixelClock / myPipe->DISPCLK + DSCDelay;
1013
1014 #ifdef __DML_VBA_DEBUG__
1015 dml_print("DML::%s: DPPCycles: %d\n", __func__, DPPCycles);
1016 dml_print("DML::%s: PixelClock: %f\n", __func__, myPipe->PixelClock);
1017 dml_print("DML::%s: DPPCLK: %f\n", __func__, myPipe->DPPCLK);
1018 dml_print("DML::%s: DISPCLKCycles: %d\n", __func__, DISPCLKCycles);
1019 dml_print("DML::%s: DISPCLK: %f\n", __func__, myPipe->DISPCLK);
1020 dml_print("DML::%s: DSCDelay: %d\n", __func__, DSCDelay);
1021 dml_print("DML::%s: DSTXAfterScaler: %d\n", __func__, *DSTXAfterScaler);
1022 dml_print("DML::%s: ODMCombineIsEnabled: %d\n", __func__, myPipe->ODMCombineIsEnabled);
1023 #endif
1024
1025 *DSTXAfterScaler = *DSTXAfterScaler + ((myPipe->ODMCombineIsEnabled) ? 18 : 0) + (myPipe->DPPPerPlane - 1) * DPP_RECOUT_WIDTH;
1026
1027 if (OutputFormat == dm_420 || (myPipe->InterlaceEnable && myPipe->ProgressiveToInterlaceUnitInOPP))
1028 *DSTYAfterScaler = 1;
1029 else
1030 *DSTYAfterScaler = 0;
1031
1032 DSTTotalPixelsAfterScaler = *DSTYAfterScaler * myPipe->HTotal + *DSTXAfterScaler;
1033 *DSTYAfterScaler = dml_floor(DSTTotalPixelsAfterScaler / myPipe->HTotal, 1);
1034 *DSTXAfterScaler = DSTTotalPixelsAfterScaler - ((double) (*DSTYAfterScaler * myPipe->HTotal));
1035
1036 #ifdef __DML_VBA_DEBUG__
1037 dml_print("DML::%s: DSTXAfterScaler: %d (final)\n", __func__, *DSTXAfterScaler);
1038 #endif
1039
1040 MyError = false;
1041
1042 Tr0_trips = trip_to_mem * (HostVMDynamicLevelsTrips + 1);
1043 Tvm_trips_rounded = dml_ceil(4.0 * Tvm_trips / LineTime, 1) / 4 * LineTime;
1044 Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1) / 4 * LineTime;
1045
1046 #ifdef __DML_VBA_ALLOW_DELTA__
1047 if (!myPipe->DCCEnable) {
1048 Tr0_trips = 0.0;
1049 Tr0_trips_rounded = 0.0;
1050 }
1051 #endif
1052
1053 if (!GPUVMEnable) {
1054 Tvm_trips = 0.0;
1055 Tvm_trips_rounded = 0.0;
1056 }
1057
1058 if (GPUVMEnable) {
1059 if (GPUVMPageTableLevels >= 3) {
1060 *Tno_bw = UrgentExtraLatency + trip_to_mem * ((GPUVMPageTableLevels - 2) - 1);
1061 } else {
1062 *Tno_bw = 0;
1063 }
1064 } else if (!myPipe->DCCEnable) {
1065 *Tno_bw = LineTime;
1066 } else {
1067 *Tno_bw = LineTime / 4;
1068 }
1069
1070 if (myPipe->SourcePixelFormat == dm_420_8 || myPipe->SourcePixelFormat == dm_420_10 || myPipe->SourcePixelFormat == dm_420_12)
1071 bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC / 4;
1072 else
1073 bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC;
1074 /*rev 99*/
1075 prefetch_bw_pr = bytes_pp * myPipe->PixelClock / (double) myPipe->DPPPerPlane;
1076 prefetch_bw_pr = dml_min(1, myPipe->VRatio) * prefetch_bw_pr;
1077 max_Tsw = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * LineTime;
1078 prefetch_sw_bytes = PrefetchSourceLinesY * swath_width_luma_ub * myPipe->BytePerPixelY + PrefetchSourceLinesC * swath_width_chroma_ub * myPipe->BytePerPixelC;
1079 prefetch_bw_oto = dml_max(prefetch_bw_pr, prefetch_sw_bytes / max_Tsw);
1080
1081 min_Lsw = dml_max(1, dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) / max_vratio_pre);
1082 Lsw_oto = dml_ceil(4 * dml_max(prefetch_sw_bytes / prefetch_bw_oto / LineTime, min_Lsw), 1) / 4;
1083 #ifdef __DML_VBA_DEBUG__
1084 Tsw_oto = Lsw_oto * LineTime;
1085 #endif
1086
1087
1088 #ifdef __DML_VBA_DEBUG__
1089 dml_print("DML: HTotal: %d\n", myPipe->HTotal);
1090 dml_print("DML: prefetch_bw_oto: %f\n", prefetch_bw_oto);
1091 dml_print("DML: PrefetchSourceLinesY: %f\n", PrefetchSourceLinesY);
1092 dml_print("DML: swath_width_luma_ub: %d\n", swath_width_luma_ub);
1093 dml_print("DML: BytePerPixelY: %d\n", myPipe->BytePerPixelY);
1094 dml_print("DML: Tsw_oto: %f\n", Tsw_oto);
1095 #endif
1096
1097 if (GPUVMEnable == true)
1098 Tvm_oto = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_oto, Tvm_trips, LineTime / 4.0);
1099 else
1100 Tvm_oto = LineTime / 4.0;
1101
1102 if ((GPUVMEnable == true || myPipe->DCCEnable == true)) {
1103 Tr0_oto = dml_max4((MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto, Tr0_trips, // PREVIOUS_ERROR (missing this term)
1104 LineTime - Tvm_oto,
1105 LineTime / 4);
1106 } else {
1107 Tr0_oto = (LineTime - Tvm_oto) / 2.0;
1108 }
1109
1110 #ifdef __DML_VBA_DEBUG__
1111 dml_print("DML::%s: Tvm_trips = %f\n", __func__, Tvm_trips);
1112 dml_print("DML::%s: Tr0_trips = %f\n", __func__, Tr0_trips);
1113 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, MetaRowByte);
1114 dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte);
1115 dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow);
1116 dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
1117 dml_print("DML::%s: prefetch_bw_oto = %f\n", __func__, prefetch_bw_oto);
1118 dml_print("DML::%s: Tr0_oto = %f\n", __func__, Tr0_oto);
1119 dml_print("DML::%s: Tvm_oto = %f\n", __func__, Tvm_oto);
1120 #endif
1121
1122 Tvm_oto_lines = dml_ceil(4.0 * Tvm_oto / LineTime, 1) / 4.0;
1123 Tr0_oto_lines = dml_ceil(4.0 * Tr0_oto / LineTime, 1) / 4.0;
1124 dst_y_prefetch_oto = Tvm_oto_lines + 2 * Tr0_oto_lines + Lsw_oto;
1125 dst_y_prefetch_equ = VStartup - (*TSetup + dml_max(TWait + TCalc, *Tdmdl)) / LineTime - (*DSTYAfterScaler + *DSTXAfterScaler / myPipe->HTotal);
1126 dst_y_prefetch_equ = dml_floor(4.0 * (dst_y_prefetch_equ + 0.125), 1) / 4.0;
1127 Tpre_rounded = dst_y_prefetch_equ * LineTime;
1128
1129 dep_bytes = dml_max(PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor, MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor);
1130
1131 if (prefetch_sw_bytes < dep_bytes)
1132 prefetch_sw_bytes = 2 * dep_bytes;
1133
1134 dml_print("DML: dst_y_prefetch_oto: %f\n", dst_y_prefetch_oto);
1135 dml_print("DML: Tvm_oto_lines: %f\n", Tvm_oto_lines);
1136 dml_print("DML: Tr0_oto_lines: %f\n", Tr0_oto_lines);
1137 dml_print("DML: Lsw_oto: %f\n", Lsw_oto);
1138 dml_print("DML: LineTime: %f\n", LineTime);
1139 dml_print("DML: dst_y_prefetch_equ: %f (after round)\n", dst_y_prefetch_equ);
1140
1141 dml_print("DML: LineTime: %f\n", LineTime);
1142 dml_print("DML: VStartup: %d\n", VStartup);
1143 dml_print("DML: Tvstartup: %fus - time between vstartup and first pixel of active\n", VStartup * LineTime);
1144 dml_print("DML: TSetup: %fus - time from vstartup to vready\n", *TSetup);
1145 dml_print("DML: TCalc: %fus - time for calculations in dchub starting at vready\n", TCalc);
1146 dml_print("DML: TWait: %fus - time for fabric to become ready max(pstate exit,cstate enter/exit, urgent latency) after TCalc\n", TWait);
1147 dml_print("DML: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", Tdmbf);
1148 dml_print("DML: Tdmec: %fus - time dio takes to transfer dmd\n", Tdmec);
1149 dml_print("DML: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", Tdmsks);
1150 dml_print("DML: Tdmdl_vm: %fus - time for vm stages of dmd\n", *Tdmdl_vm);
1151 dml_print("DML: Tdmdl: %fus - time for fabric to become ready and fetch dmd\n", *Tdmdl);
1152 dml_print("DML: DSTXAfterScaler: %f pixels - number of pixel clocks pipeline and buffer delay after scaler\n", *DSTXAfterScaler);
1153 dml_print("DML: DSTYAfterScaler: %f lines - number of lines of pipeline and buffer delay after scaler\n", *DSTYAfterScaler);
1154
1155 *PrefetchBandwidth = 0;
1156 *DestinationLinesToRequestVMInVBlank = 0;
1157 *DestinationLinesToRequestRowInVBlank = 0;
1158 *VRatioPrefetchY = 0;
1159 *VRatioPrefetchC = 0;
1160 *RequiredPrefetchPixDataBWLuma = 0;
1161 if (dst_y_prefetch_equ > 1) {
1162 double PrefetchBandwidth1;
1163 double PrefetchBandwidth2;
1164 double PrefetchBandwidth3;
1165 double PrefetchBandwidth4;
1166
1167 if (Tpre_rounded - *Tno_bw > 0) {
1168 PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
1169 + prefetch_sw_bytes) / (Tpre_rounded - *Tno_bw);
1170 Tsw_est1 = prefetch_sw_bytes / PrefetchBandwidth1;
1171 } else {
1172 PrefetchBandwidth1 = 0;
1173 }
1174
1175 if (VStartup == MaxVStartup && Tsw_est1 / LineTime < min_Lsw && Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw > 0) {
1176 PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor)
1177 / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw);
1178 }
1179
1180 if (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded > 0)
1181 PrefetchBandwidth2 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + prefetch_sw_bytes) / (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded);
1182 else
1183 PrefetchBandwidth2 = 0;
1184
1185 if (Tpre_rounded - Tvm_trips_rounded > 0) {
1186 PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
1187 + prefetch_sw_bytes) / (Tpre_rounded - Tvm_trips_rounded);
1188 Tsw_est3 = prefetch_sw_bytes / PrefetchBandwidth3;
1189 } else {
1190 PrefetchBandwidth3 = 0;
1191 }
1192
1193 #ifdef __DML_VBA_DEBUG__
1194 dml_print("DML::%s: Tpre_rounded: %f\n", __func__, Tpre_rounded);
1195 dml_print("DML::%s: Tvm_trips_rounded: %f\n", __func__, Tvm_trips_rounded);
1196 dml_print("DML::%s: PrefetchBandwidth3: %f\n", __func__, PrefetchBandwidth3);
1197 #endif
1198 if (VStartup == MaxVStartup && Tsw_est3 / LineTime < min_Lsw && Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - Tvm_trips_rounded > 0) {
1199 PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor)
1200 / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - Tvm_trips_rounded);
1201 }
1202
1203 if (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded > 0)
1204 PrefetchBandwidth4 = prefetch_sw_bytes / (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded);
1205 else
1206 PrefetchBandwidth4 = 0;
1207
1208 {
1209 bool Case1OK;
1210 bool Case2OK;
1211 bool Case3OK;
1212
1213 if (PrefetchBandwidth1 > 0) {
1214 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth1 >= Tvm_trips_rounded
1215 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth1 >= Tr0_trips_rounded) {
1216 Case1OK = true;
1217 } else {
1218 Case1OK = false;
1219 }
1220 } else {
1221 Case1OK = false;
1222 }
1223
1224 if (PrefetchBandwidth2 > 0) {
1225 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth2 >= Tvm_trips_rounded
1226 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth2 < Tr0_trips_rounded) {
1227 Case2OK = true;
1228 } else {
1229 Case2OK = false;
1230 }
1231 } else {
1232 Case2OK = false;
1233 }
1234
1235 if (PrefetchBandwidth3 > 0) {
1236 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth3 < Tvm_trips_rounded
1237 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth3 >= Tr0_trips_rounded) {
1238 Case3OK = true;
1239 } else {
1240 Case3OK = false;
1241 }
1242 } else {
1243 Case3OK = false;
1244 }
1245
1246 if (Case1OK) {
1247 prefetch_bw_equ = PrefetchBandwidth1;
1248 } else if (Case2OK) {
1249 prefetch_bw_equ = PrefetchBandwidth2;
1250 } else if (Case3OK) {
1251 prefetch_bw_equ = PrefetchBandwidth3;
1252 } else {
1253 prefetch_bw_equ = PrefetchBandwidth4;
1254 }
1255
1256 #ifdef __DML_VBA_DEBUG__
1257 dml_print("DML::%s: Case1OK: %d\n", __func__, Case1OK);
1258 dml_print("DML::%s: Case2OK: %d\n", __func__, Case2OK);
1259 dml_print("DML::%s: Case3OK: %d\n", __func__, Case3OK);
1260 dml_print("DML::%s: prefetch_bw_equ: %f\n", __func__, prefetch_bw_equ);
1261 #endif
1262
1263 if (prefetch_bw_equ > 0) {
1264 if (GPUVMEnable == true) {
1265 Tvm_equ = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_equ, Tvm_trips, LineTime / 4);
1266 } else {
1267 Tvm_equ = LineTime / 4;
1268 }
1269
1270 if ((GPUVMEnable == true || myPipe->DCCEnable == true)) {
1271 Tr0_equ = dml_max4(
1272 (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_equ,
1273 Tr0_trips,
1274 (LineTime - Tvm_equ) / 2,
1275 LineTime / 4);
1276 } else {
1277 Tr0_equ = (LineTime - Tvm_equ) / 2;
1278 }
1279 } else {
1280 Tvm_equ = 0;
1281 Tr0_equ = 0;
1282 dml_print("DML: prefetch_bw_equ equals 0! %s:%d\n", __FILE__, __LINE__);
1283 }
1284 }
1285
1286 if (dst_y_prefetch_oto < dst_y_prefetch_equ) {
1287 *DestinationLinesForPrefetch = dst_y_prefetch_oto;
1288 TimeForFetchingMetaPTE = Tvm_oto;
1289 TimeForFetchingRowInVBlank = Tr0_oto;
1290 *PrefetchBandwidth = prefetch_bw_oto;
1291 } else {
1292 *DestinationLinesForPrefetch = dst_y_prefetch_equ;
1293 TimeForFetchingMetaPTE = Tvm_equ;
1294 TimeForFetchingRowInVBlank = Tr0_equ;
1295 *PrefetchBandwidth = prefetch_bw_equ;
1296 }
1297
1298 *DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * TimeForFetchingMetaPTE / LineTime, 1.0) / 4.0;
1299
1300 *DestinationLinesToRequestRowInVBlank = dml_ceil(4.0 * TimeForFetchingRowInVBlank / LineTime, 1.0) / 4.0;
1301
1302 #ifdef __DML_VBA_ALLOW_DELTA__
1303 LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch
1304 // See note above dated 5/30/2018
1305 // - ((NumberOfCursors > 0 || GPUVMEnable || DCCEnable) ?
1306 - ((GPUVMEnable || myPipe->DCCEnable) ? (*DestinationLinesToRequestVMInVBlank + 2 * *DestinationLinesToRequestRowInVBlank) : 0.0); // TODO: Did someone else add this??
1307 #else
1308 LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch - *DestinationLinesToRequestVMInVBlank - 2 * *DestinationLinesToRequestRowInVBlank;
1309 #endif
1310
1311 #ifdef __DML_VBA_DEBUG__
1312 dml_print("DML::%s: DestinationLinesForPrefetch = %f\n", __func__, *DestinationLinesForPrefetch);
1313 dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n", __func__, *DestinationLinesToRequestVMInVBlank);
1314 dml_print("DML::%s: TimeForFetchingRowInVBlank = %f\n", __func__, TimeForFetchingRowInVBlank);
1315 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
1316 dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n", __func__, *DestinationLinesToRequestRowInVBlank);
1317 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
1318 dml_print("DML::%s: LinesToRequestPrefetchPixelData = %f\n", __func__, LinesToRequestPrefetchPixelData);
1319 #endif
1320
1321 if (LinesToRequestPrefetchPixelData > 0 && prefetch_bw_equ > 0) {
1322
1323 *VRatioPrefetchY = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData;
1324 *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
1325 #ifdef __DML_VBA_DEBUG__
1326 dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY);
1327 dml_print("DML::%s: SwathHeightY = %d\n", __func__, SwathHeightY);
1328 dml_print("DML::%s: VInitPreFillY = %f\n", __func__, VInitPreFillY);
1329 #endif
1330 if ((SwathHeightY > 4) && (VInitPreFillY > 3)) {
1331 if (LinesToRequestPrefetchPixelData > (VInitPreFillY - 3.0) / 2.0) {
1332 *VRatioPrefetchY = dml_max(
1333 (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData,
1334 (double) MaxNumSwathY * SwathHeightY / (LinesToRequestPrefetchPixelData - (VInitPreFillY - 3.0) / 2.0));
1335 *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
1336 } else {
1337 MyError = true;
1338 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1339 *VRatioPrefetchY = 0;
1340 }
1341 #ifdef __DML_VBA_DEBUG__
1342 dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY);
1343 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
1344 dml_print("DML::%s: MaxNumSwathY = %d\n", __func__, MaxNumSwathY);
1345 #endif
1346 }
1347
1348 *VRatioPrefetchC = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData;
1349 *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
1350
1351 #ifdef __DML_VBA_DEBUG__
1352 dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC);
1353 dml_print("DML::%s: SwathHeightC = %d\n", __func__, SwathHeightC);
1354 dml_print("DML::%s: VInitPreFillC = %f\n", __func__, VInitPreFillC);
1355 #endif
1356 if ((SwathHeightC > 4) || VInitPreFillC > 3) {
1357 if (LinesToRequestPrefetchPixelData > (VInitPreFillC - 3.0) / 2.0) {
1358 *VRatioPrefetchC = dml_max(
1359 *VRatioPrefetchC,
1360 (double) MaxNumSwathC * SwathHeightC / (LinesToRequestPrefetchPixelData - (VInitPreFillC - 3.0) / 2.0));
1361 *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
1362 } else {
1363 MyError = true;
1364 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1365 *VRatioPrefetchC = 0;
1366 }
1367 #ifdef __DML_VBA_DEBUG__
1368 dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC);
1369 dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, PrefetchSourceLinesC);
1370 dml_print("DML::%s: MaxNumSwathC = %d\n", __func__, MaxNumSwathC);
1371 #endif
1372 }
1373
1374 #ifdef __DML_VBA_DEBUG__
1375 dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY);
1376 dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub);
1377 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
1378 #endif
1379
1380 *RequiredPrefetchPixDataBWLuma = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData * myPipe->BytePerPixelY * swath_width_luma_ub / LineTime;
1381
1382 #ifdef __DML_VBA_DEBUG__
1383 dml_print("DML::%s: RequiredPrefetchPixDataBWLuma = %f\n", __func__, *RequiredPrefetchPixDataBWLuma);
1384 #endif
1385
1386 *RequiredPrefetchPixDataBWChroma = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData * myPipe->BytePerPixelC * swath_width_chroma_ub
1387 / LineTime;
1388 } else {
1389 MyError = true;
1390 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1391 dml_print("DML: LinesToRequestPrefetchPixelData: %f, should be > 0\n", LinesToRequestPrefetchPixelData);
1392 *VRatioPrefetchY = 0;
1393 *VRatioPrefetchC = 0;
1394 *RequiredPrefetchPixDataBWLuma = 0;
1395 *RequiredPrefetchPixDataBWChroma = 0;
1396 }
1397
1398 dml_print(
1399 "DML: Tpre: %fus - sum of time to request meta pte, 2 x data pte + meta data, swaths\n",
1400 (double) LinesToRequestPrefetchPixelData * LineTime + 2.0 * TimeForFetchingRowInVBlank + TimeForFetchingMetaPTE);
1401 dml_print("DML: Tvm: %fus - time to fetch page tables for meta surface\n", TimeForFetchingMetaPTE);
1402 dml_print("DML: Tr0: %fus - time to fetch first row of data pagetables and first row of meta data (done in parallel)\n", TimeForFetchingRowInVBlank);
1403 dml_print(
1404 "DML: Tsw: %fus = time to fetch enough pixel data and cursor data to feed the scalers init position and detile\n",
1405 (double) LinesToRequestPrefetchPixelData * LineTime);
1406 dml_print("DML: To: %fus - time for propagation from scaler to optc\n", (*DSTYAfterScaler + ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime);
1407 dml_print("DML: Tvstartup - TSetup - Tcalc - Twait - Tpre - To > 0\n");
1408 dml_print(
1409 "DML: Tslack(pre): %fus - time left over in schedule\n",
1410 VStartup * LineTime - TimeForFetchingMetaPTE - 2 * TimeForFetchingRowInVBlank
1411 - (*DSTYAfterScaler + ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime - TWait - TCalc - *TSetup);
1412 dml_print("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %d\n", PixelPTEBytesPerRow);
1413
1414 } else {
1415 MyError = true;
1416 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1417 }
1418
1419 {
1420 double prefetch_vm_bw;
1421 double prefetch_row_bw;
1422
1423 if (PDEAndMetaPTEBytesFrame == 0) {
1424 prefetch_vm_bw = 0;
1425 } else if (*DestinationLinesToRequestVMInVBlank > 0) {
1426 #ifdef __DML_VBA_DEBUG__
1427 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
1428 dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
1429 dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n", __func__, *DestinationLinesToRequestVMInVBlank);
1430 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
1431 #endif
1432 prefetch_vm_bw = PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / (*DestinationLinesToRequestVMInVBlank * LineTime);
1433 #ifdef __DML_VBA_DEBUG__
1434 dml_print("DML::%s: prefetch_vm_bw = %f\n", __func__, prefetch_vm_bw);
1435 #endif
1436 } else {
1437 prefetch_vm_bw = 0;
1438 MyError = true;
1439 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1440 }
1441
1442 if (MetaRowByte + PixelPTEBytesPerRow == 0) {
1443 prefetch_row_bw = 0;
1444 } else if (*DestinationLinesToRequestRowInVBlank > 0) {
1445 prefetch_row_bw = (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInVBlank * LineTime);
1446
1447 #ifdef __DML_VBA_DEBUG__
1448 dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte);
1449 dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow);
1450 dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n", __func__, *DestinationLinesToRequestRowInVBlank);
1451 dml_print("DML::%s: prefetch_row_bw = %f\n", __func__, prefetch_row_bw);
1452 #endif
1453 } else {
1454 prefetch_row_bw = 0;
1455 MyError = true;
1456 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1457 }
1458
1459 *prefetch_vmrow_bw = dml_max(prefetch_vm_bw, prefetch_row_bw);
1460 }
1461
1462 if (MyError) {
1463 *PrefetchBandwidth = 0;
1464 *DestinationLinesToRequestVMInVBlank = 0;
1465 *DestinationLinesToRequestRowInVBlank = 0;
1466 *DestinationLinesForPrefetch = 0;
1467 *VRatioPrefetchY = 0;
1468 *VRatioPrefetchC = 0;
1469 *RequiredPrefetchPixDataBWLuma = 0;
1470 *RequiredPrefetchPixDataBWChroma = 0;
1471 }
1472
1473 return MyError;
1474 }
1475
1476 static double RoundToDFSGranularityUp(double Clock, double VCOSpeed)
1477 {
1478 return VCOSpeed * 4 / dml_floor(VCOSpeed * 4 / Clock, 1);
1479 }
1480
1481 static double RoundToDFSGranularityDown(double Clock, double VCOSpeed)
1482 {
1483 return VCOSpeed * 4 / dml_ceil(VCOSpeed * 4.0 / Clock, 1);
1484 }
1485
1486 static void CalculateDCCConfiguration(
1487 bool DCCEnabled,
1488 bool DCCProgrammingAssumesScanDirectionUnknown,
1489 enum source_format_class SourcePixelFormat,
1490 unsigned int SurfaceWidthLuma,
1491 unsigned int SurfaceWidthChroma,
1492 unsigned int SurfaceHeightLuma,
1493 unsigned int SurfaceHeightChroma,
1494 double DETBufferSize,
1495 unsigned int RequestHeight256ByteLuma,
1496 unsigned int RequestHeight256ByteChroma,
1497 enum dm_swizzle_mode TilingFormat,
1498 unsigned int BytePerPixelY,
1499 unsigned int BytePerPixelC,
1500 double BytePerPixelDETY,
1501 double BytePerPixelDETC,
1502 enum scan_direction_class ScanOrientation,
1503 unsigned int *MaxUncompressedBlockLuma,
1504 unsigned int *MaxUncompressedBlockChroma,
1505 unsigned int *MaxCompressedBlockLuma,
1506 unsigned int *MaxCompressedBlockChroma,
1507 unsigned int *IndependentBlockLuma,
1508 unsigned int *IndependentBlockChroma)
1509 {
1510 int yuv420;
1511 int horz_div_l;
1512 int horz_div_c;
1513 int vert_div_l;
1514 int vert_div_c;
1515
1516 int swath_buf_size;
1517 double detile_buf_vp_horz_limit;
1518 double detile_buf_vp_vert_limit;
1519
1520 int MAS_vp_horz_limit;
1521 int MAS_vp_vert_limit;
1522 int max_vp_horz_width;
1523 int max_vp_vert_height;
1524 int eff_surf_width_l;
1525 int eff_surf_width_c;
1526 int eff_surf_height_l;
1527 int eff_surf_height_c;
1528
1529 int full_swath_bytes_horz_wc_l;
1530 int full_swath_bytes_horz_wc_c;
1531 int full_swath_bytes_vert_wc_l;
1532 int full_swath_bytes_vert_wc_c;
1533 int req128_horz_wc_l;
1534 int req128_horz_wc_c;
1535 int req128_vert_wc_l;
1536 int req128_vert_wc_c;
1537 int segment_order_horz_contiguous_luma;
1538 int segment_order_horz_contiguous_chroma;
1539 int segment_order_vert_contiguous_luma;
1540 int segment_order_vert_contiguous_chroma;
1541
1542 typedef enum {
1543 REQ_256Bytes, REQ_128BytesNonContiguous, REQ_128BytesContiguous, REQ_NA
1544 } RequestType;
1545 RequestType RequestLuma;
1546 RequestType RequestChroma;
1547
1548 yuv420 = ((SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12) ? 1 : 0);
1549 horz_div_l = 1;
1550 horz_div_c = 1;
1551 vert_div_l = 1;
1552 vert_div_c = 1;
1553
1554 if (BytePerPixelY == 1)
1555 vert_div_l = 0;
1556 if (BytePerPixelC == 1)
1557 vert_div_c = 0;
1558 if (BytePerPixelY == 8 && (TilingFormat == dm_sw_64kb_s || TilingFormat == dm_sw_64kb_s_t || TilingFormat == dm_sw_64kb_s_x))
1559 horz_div_l = 0;
1560 if (BytePerPixelC == 8 && (TilingFormat == dm_sw_64kb_s || TilingFormat == dm_sw_64kb_s_t || TilingFormat == dm_sw_64kb_s_x))
1561 horz_div_c = 0;
1562
1563 if (BytePerPixelC == 0) {
1564 swath_buf_size = DETBufferSize / 2 - 2 * 256;
1565 detile_buf_vp_horz_limit = (double) swath_buf_size / ((double) RequestHeight256ByteLuma * BytePerPixelY / (1 + horz_div_l));
1566 detile_buf_vp_vert_limit = (double) swath_buf_size / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l));
1567 } else {
1568 swath_buf_size = DETBufferSize / 2 - 2 * 2 * 256;
1569 detile_buf_vp_horz_limit = (double) swath_buf_size
1570 / ((double) RequestHeight256ByteLuma * BytePerPixelY / (1 + horz_div_l)
1571 + (double) RequestHeight256ByteChroma * BytePerPixelC / (1 + horz_div_c) / (1 + yuv420));
1572 detile_buf_vp_vert_limit = (double) swath_buf_size
1573 / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l) + 256.0 / RequestHeight256ByteChroma / (1 + vert_div_c) / (1 + yuv420));
1574 }
1575
1576 if (SourcePixelFormat == dm_420_10) {
1577 detile_buf_vp_horz_limit = 1.5 * detile_buf_vp_horz_limit;
1578 detile_buf_vp_vert_limit = 1.5 * detile_buf_vp_vert_limit;
1579 }
1580
1581 detile_buf_vp_horz_limit = dml_floor(detile_buf_vp_horz_limit - 1, 16);
1582 detile_buf_vp_vert_limit = dml_floor(detile_buf_vp_vert_limit - 1, 16);
1583
1584 MAS_vp_horz_limit = SourcePixelFormat == dm_rgbe_alpha ? 3840 : 5760;
1585 MAS_vp_vert_limit = (BytePerPixelC > 0 ? 2880 : 5760);
1586 max_vp_horz_width = dml_min((double) MAS_vp_horz_limit, detile_buf_vp_horz_limit);
1587 max_vp_vert_height = dml_min((double) MAS_vp_vert_limit, detile_buf_vp_vert_limit);
1588 eff_surf_width_l = (SurfaceWidthLuma > max_vp_horz_width ? max_vp_horz_width : SurfaceWidthLuma);
1589 eff_surf_width_c = eff_surf_width_l / (1 + yuv420);
1590 eff_surf_height_l = (SurfaceHeightLuma > max_vp_vert_height ? max_vp_vert_height : SurfaceHeightLuma);
1591 eff_surf_height_c = eff_surf_height_l / (1 + yuv420);
1592
1593 full_swath_bytes_horz_wc_l = eff_surf_width_l * RequestHeight256ByteLuma * BytePerPixelY;
1594 full_swath_bytes_vert_wc_l = eff_surf_height_l * 256 / RequestHeight256ByteLuma;
1595 if (BytePerPixelC > 0) {
1596 full_swath_bytes_horz_wc_c = eff_surf_width_c * RequestHeight256ByteChroma * BytePerPixelC;
1597 full_swath_bytes_vert_wc_c = eff_surf_height_c * 256 / RequestHeight256ByteChroma;
1598 } else {
1599 full_swath_bytes_horz_wc_c = 0;
1600 full_swath_bytes_vert_wc_c = 0;
1601 }
1602
1603 if (SourcePixelFormat == dm_420_10) {
1604 full_swath_bytes_horz_wc_l = dml_ceil(full_swath_bytes_horz_wc_l * 2 / 3, 256);
1605 full_swath_bytes_horz_wc_c = dml_ceil(full_swath_bytes_horz_wc_c * 2 / 3, 256);
1606 full_swath_bytes_vert_wc_l = dml_ceil(full_swath_bytes_vert_wc_l * 2 / 3, 256);
1607 full_swath_bytes_vert_wc_c = dml_ceil(full_swath_bytes_vert_wc_c * 2 / 3, 256);
1608 }
1609
1610 if (2 * full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSize) {
1611 req128_horz_wc_l = 0;
1612 req128_horz_wc_c = 0;
1613 } else if (full_swath_bytes_horz_wc_l < 1.5 * full_swath_bytes_horz_wc_c && 2 * full_swath_bytes_horz_wc_l + full_swath_bytes_horz_wc_c <= DETBufferSize) {
1614 req128_horz_wc_l = 0;
1615 req128_horz_wc_c = 1;
1616 } else if (full_swath_bytes_horz_wc_l >= 1.5 * full_swath_bytes_horz_wc_c && full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSize) {
1617 req128_horz_wc_l = 1;
1618 req128_horz_wc_c = 0;
1619 } else {
1620 req128_horz_wc_l = 1;
1621 req128_horz_wc_c = 1;
1622 }
1623
1624 if (2 * full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSize) {
1625 req128_vert_wc_l = 0;
1626 req128_vert_wc_c = 0;
1627 } else if (full_swath_bytes_vert_wc_l < 1.5 * full_swath_bytes_vert_wc_c && 2 * full_swath_bytes_vert_wc_l + full_swath_bytes_vert_wc_c <= DETBufferSize) {
1628 req128_vert_wc_l = 0;
1629 req128_vert_wc_c = 1;
1630 } else if (full_swath_bytes_vert_wc_l >= 1.5 * full_swath_bytes_vert_wc_c && full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSize) {
1631 req128_vert_wc_l = 1;
1632 req128_vert_wc_c = 0;
1633 } else {
1634 req128_vert_wc_l = 1;
1635 req128_vert_wc_c = 1;
1636 }
1637
1638 if (BytePerPixelY == 2 || (BytePerPixelY == 4 && TilingFormat != dm_sw_64kb_r_x)) {
1639 segment_order_horz_contiguous_luma = 0;
1640 } else {
1641 segment_order_horz_contiguous_luma = 1;
1642 }
1643 if ((BytePerPixelY == 8 && (TilingFormat == dm_sw_64kb_d || TilingFormat == dm_sw_64kb_d_x || TilingFormat == dm_sw_64kb_d_t || TilingFormat == dm_sw_64kb_r_x))
1644 || (BytePerPixelY == 4 && TilingFormat == dm_sw_64kb_r_x)) {
1645 segment_order_vert_contiguous_luma = 0;
1646 } else {
1647 segment_order_vert_contiguous_luma = 1;
1648 }
1649 if (BytePerPixelC == 2 || (BytePerPixelC == 4 && TilingFormat != dm_sw_64kb_r_x)) {
1650 segment_order_horz_contiguous_chroma = 0;
1651 } else {
1652 segment_order_horz_contiguous_chroma = 1;
1653 }
1654 if ((BytePerPixelC == 8 && (TilingFormat == dm_sw_64kb_d || TilingFormat == dm_sw_64kb_d_x || TilingFormat == dm_sw_64kb_d_t || TilingFormat == dm_sw_64kb_r_x))
1655 || (BytePerPixelC == 4 && TilingFormat == dm_sw_64kb_r_x)) {
1656 segment_order_vert_contiguous_chroma = 0;
1657 } else {
1658 segment_order_vert_contiguous_chroma = 1;
1659 }
1660
1661 if (DCCProgrammingAssumesScanDirectionUnknown == true) {
1662 if (req128_horz_wc_l == 0 && req128_vert_wc_l == 0) {
1663 RequestLuma = REQ_256Bytes;
1664 } else if ((req128_horz_wc_l == 1 && segment_order_horz_contiguous_luma == 0) || (req128_vert_wc_l == 1 && segment_order_vert_contiguous_luma == 0)) {
1665 RequestLuma = REQ_128BytesNonContiguous;
1666 } else {
1667 RequestLuma = REQ_128BytesContiguous;
1668 }
1669 if (req128_horz_wc_c == 0 && req128_vert_wc_c == 0) {
1670 RequestChroma = REQ_256Bytes;
1671 } else if ((req128_horz_wc_c == 1 && segment_order_horz_contiguous_chroma == 0) || (req128_vert_wc_c == 1 && segment_order_vert_contiguous_chroma == 0)) {
1672 RequestChroma = REQ_128BytesNonContiguous;
1673 } else {
1674 RequestChroma = REQ_128BytesContiguous;
1675 }
1676 } else if (ScanOrientation != dm_vert) {
1677 if (req128_horz_wc_l == 0) {
1678 RequestLuma = REQ_256Bytes;
1679 } else if (segment_order_horz_contiguous_luma == 0) {
1680 RequestLuma = REQ_128BytesNonContiguous;
1681 } else {
1682 RequestLuma = REQ_128BytesContiguous;
1683 }
1684 if (req128_horz_wc_c == 0) {
1685 RequestChroma = REQ_256Bytes;
1686 } else if (segment_order_horz_contiguous_chroma == 0) {
1687 RequestChroma = REQ_128BytesNonContiguous;
1688 } else {
1689 RequestChroma = REQ_128BytesContiguous;
1690 }
1691 } else {
1692 if (req128_vert_wc_l == 0) {
1693 RequestLuma = REQ_256Bytes;
1694 } else if (segment_order_vert_contiguous_luma == 0) {
1695 RequestLuma = REQ_128BytesNonContiguous;
1696 } else {
1697 RequestLuma = REQ_128BytesContiguous;
1698 }
1699 if (req128_vert_wc_c == 0) {
1700 RequestChroma = REQ_256Bytes;
1701 } else if (segment_order_vert_contiguous_chroma == 0) {
1702 RequestChroma = REQ_128BytesNonContiguous;
1703 } else {
1704 RequestChroma = REQ_128BytesContiguous;
1705 }
1706 }
1707
1708 if (RequestLuma == REQ_256Bytes) {
1709 *MaxUncompressedBlockLuma = 256;
1710 *MaxCompressedBlockLuma = 256;
1711 *IndependentBlockLuma = 0;
1712 } else if (RequestLuma == REQ_128BytesContiguous) {
1713 *MaxUncompressedBlockLuma = 256;
1714 *MaxCompressedBlockLuma = 128;
1715 *IndependentBlockLuma = 128;
1716 } else {
1717 *MaxUncompressedBlockLuma = 256;
1718 *MaxCompressedBlockLuma = 64;
1719 *IndependentBlockLuma = 64;
1720 }
1721
1722 if (RequestChroma == REQ_256Bytes) {
1723 *MaxUncompressedBlockChroma = 256;
1724 *MaxCompressedBlockChroma = 256;
1725 *IndependentBlockChroma = 0;
1726 } else if (RequestChroma == REQ_128BytesContiguous) {
1727 *MaxUncompressedBlockChroma = 256;
1728 *MaxCompressedBlockChroma = 128;
1729 *IndependentBlockChroma = 128;
1730 } else {
1731 *MaxUncompressedBlockChroma = 256;
1732 *MaxCompressedBlockChroma = 64;
1733 *IndependentBlockChroma = 64;
1734 }
1735
1736 if (DCCEnabled != true || BytePerPixelC == 0) {
1737 *MaxUncompressedBlockChroma = 0;
1738 *MaxCompressedBlockChroma = 0;
1739 *IndependentBlockChroma = 0;
1740 }
1741
1742 if (DCCEnabled != true) {
1743 *MaxUncompressedBlockLuma = 0;
1744 *MaxCompressedBlockLuma = 0;
1745 *IndependentBlockLuma = 0;
1746 }
1747 }
1748
1749 static double CalculatePrefetchSourceLines(
1750 struct display_mode_lib *mode_lib,
1751 double VRatio,
1752 double vtaps,
1753 bool Interlace,
1754 bool ProgressiveToInterlaceUnitInOPP,
1755 unsigned int SwathHeight,
1756 unsigned int ViewportYStart,
1757 double *VInitPreFill,
1758 unsigned int *MaxNumSwath)
1759 {
1760 struct vba_vars_st *v = &mode_lib->vba;
1761 unsigned int MaxPartialSwath;
1762
1763 if (ProgressiveToInterlaceUnitInOPP)
1764 *VInitPreFill = dml_floor((VRatio + vtaps + 1) / 2.0, 1);
1765 else
1766 *VInitPreFill = dml_floor((VRatio + vtaps + 1 + Interlace * 0.5 * VRatio) / 2.0, 1);
1767
1768 if (!v->IgnoreViewportPositioning) {
1769
1770 *MaxNumSwath = dml_ceil((*VInitPreFill - 1.0) / SwathHeight, 1) + 1.0;
1771
1772 if (*VInitPreFill > 1.0)
1773 MaxPartialSwath = (unsigned int) (*VInitPreFill - 2) % SwathHeight;
1774 else
1775 MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 2) % SwathHeight;
1776 MaxPartialSwath = dml_max(1U, MaxPartialSwath);
1777
1778 } else {
1779
1780 if (ViewportYStart != 0)
1781 dml_print("WARNING DML: using viewport y position of 0 even though actual viewport y position is non-zero in prefetch source lines calculation\n");
1782
1783 *MaxNumSwath = dml_ceil(*VInitPreFill / SwathHeight, 1);
1784
1785 if (*VInitPreFill > 1.0)
1786 MaxPartialSwath = (unsigned int) (*VInitPreFill - 1) % SwathHeight;
1787 else
1788 MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 1) % SwathHeight;
1789 }
1790
1791 #ifdef __DML_VBA_DEBUG__
1792 dml_print("DML::%s: VRatio = %f\n", __func__, VRatio);
1793 dml_print("DML::%s: vtaps = %f\n", __func__, vtaps);
1794 dml_print("DML::%s: VInitPreFill = %f\n", __func__, *VInitPreFill);
1795 dml_print("DML::%s: ProgressiveToInterlaceUnitInOPP = %d\n", __func__, ProgressiveToInterlaceUnitInOPP);
1796 dml_print("DML::%s: IgnoreViewportPositioning = %d\n", __func__, v->IgnoreViewportPositioning);
1797 dml_print("DML::%s: SwathHeight = %d\n", __func__, SwathHeight);
1798 dml_print("DML::%s: MaxPartialSwath = %d\n", __func__, MaxPartialSwath);
1799 dml_print("DML::%s: MaxNumSwath = %d\n", __func__, *MaxNumSwath);
1800 dml_print("DML::%s: Prefetch source lines = %d\n", __func__, *MaxNumSwath * SwathHeight + MaxPartialSwath);
1801 #endif
1802 return *MaxNumSwath * SwathHeight + MaxPartialSwath;
1803 }
1804
1805 static unsigned int CalculateVMAndRowBytes(
1806 struct display_mode_lib *mode_lib,
1807 bool DCCEnable,
1808 unsigned int BlockHeight256Bytes,
1809 unsigned int BlockWidth256Bytes,
1810 enum source_format_class SourcePixelFormat,
1811 unsigned int SurfaceTiling,
1812 unsigned int BytePerPixel,
1813 enum scan_direction_class ScanDirection,
1814 unsigned int SwathWidth,
1815 unsigned int ViewportHeight,
1816 bool GPUVMEnable,
1817 bool HostVMEnable,
1818 unsigned int HostVMMaxNonCachedPageTableLevels,
1819 unsigned int GPUVMMinPageSize,
1820 unsigned int HostVMMinPageSize,
1821 unsigned int PTEBufferSizeInRequests,
1822 unsigned int Pitch,
1823 unsigned int DCCMetaPitch,
1824 unsigned int *MacroTileWidth,
1825 unsigned int *MetaRowByte,
1826 unsigned int *PixelPTEBytesPerRow,
1827 bool *PTEBufferSizeNotExceeded,
1828 int *dpte_row_width_ub,
1829 unsigned int *dpte_row_height,
1830 unsigned int *MetaRequestWidth,
1831 unsigned int *MetaRequestHeight,
1832 unsigned int *meta_row_width,
1833 unsigned int *meta_row_height,
1834 int *vm_group_bytes,
1835 unsigned int *dpte_group_bytes,
1836 unsigned int *PixelPTEReqWidth,
1837 unsigned int *PixelPTEReqHeight,
1838 unsigned int *PTERequestSize,
1839 int *DPDE0BytesFrame,
1840 int *MetaPTEBytesFrame)
1841 {
1842 struct vba_vars_st *v = &mode_lib->vba;
1843 unsigned int MPDEBytesFrame;
1844 unsigned int DCCMetaSurfaceBytes;
1845 unsigned int MacroTileSizeBytes;
1846 unsigned int MacroTileHeight;
1847 unsigned int ExtraDPDEBytesFrame;
1848 unsigned int PDEAndMetaPTEBytesFrame;
1849 unsigned int PixelPTEReqHeightPTEs = 0;
1850 unsigned int HostVMDynamicLevels = 0;
1851 double FractionOfPTEReturnDrop;
1852
1853 if (GPUVMEnable == true && HostVMEnable == true) {
1854 if (HostVMMinPageSize < 2048) {
1855 HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
1856 } else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576) {
1857 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1);
1858 } else {
1859 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2);
1860 }
1861 }
1862
1863 *MetaRequestHeight = 8 * BlockHeight256Bytes;
1864 *MetaRequestWidth = 8 * BlockWidth256Bytes;
1865 if (ScanDirection != dm_vert) {
1866 *meta_row_height = *MetaRequestHeight;
1867 *meta_row_width = dml_ceil((double) SwathWidth - 1, *MetaRequestWidth) + *MetaRequestWidth;
1868 *MetaRowByte = *meta_row_width * *MetaRequestHeight * BytePerPixel / 256.0;
1869 } else {
1870 *meta_row_height = *MetaRequestWidth;
1871 *meta_row_width = dml_ceil((double) SwathWidth - 1, *MetaRequestHeight) + *MetaRequestHeight;
1872 *MetaRowByte = *meta_row_width * *MetaRequestWidth * BytePerPixel / 256.0;
1873 }
1874 DCCMetaSurfaceBytes = DCCMetaPitch * (dml_ceil(ViewportHeight - 1, 64 * BlockHeight256Bytes) + 64 * BlockHeight256Bytes) * BytePerPixel / 256;
1875 if (GPUVMEnable == true) {
1876 *MetaPTEBytesFrame = (dml_ceil((double) (DCCMetaSurfaceBytes - 4.0 * 1024.0) / (8 * 4.0 * 1024), 1) + 1) * 64;
1877 MPDEBytesFrame = 128 * (v->GPUVMMaxPageTableLevels - 1);
1878 } else {
1879 *MetaPTEBytesFrame = 0;
1880 MPDEBytesFrame = 0;
1881 }
1882
1883 if (DCCEnable != true) {
1884 *MetaPTEBytesFrame = 0;
1885 MPDEBytesFrame = 0;
1886 *MetaRowByte = 0;
1887 }
1888
1889 if (SurfaceTiling == dm_sw_linear) {
1890 MacroTileSizeBytes = 256;
1891 MacroTileHeight = BlockHeight256Bytes;
1892 } else {
1893 MacroTileSizeBytes = 65536;
1894 MacroTileHeight = 16 * BlockHeight256Bytes;
1895 }
1896 *MacroTileWidth = MacroTileSizeBytes / BytePerPixel / MacroTileHeight;
1897
1898 if (GPUVMEnable == true && v->GPUVMMaxPageTableLevels > 1) {
1899 if (ScanDirection != dm_vert) {
1900 *DPDE0BytesFrame = 64
1901 * (dml_ceil(
1902 ((Pitch * (dml_ceil(ViewportHeight - 1, MacroTileHeight) + MacroTileHeight) * BytePerPixel) - MacroTileSizeBytes)
1903 / (8 * 2097152),
1904 1) + 1);
1905 } else {
1906 *DPDE0BytesFrame = 64
1907 * (dml_ceil(
1908 ((Pitch * (dml_ceil((double) SwathWidth - 1, MacroTileHeight) + MacroTileHeight) * BytePerPixel) - MacroTileSizeBytes)
1909 / (8 * 2097152),
1910 1) + 1);
1911 }
1912 ExtraDPDEBytesFrame = 128 * (v->GPUVMMaxPageTableLevels - 2);
1913 } else {
1914 *DPDE0BytesFrame = 0;
1915 ExtraDPDEBytesFrame = 0;
1916 }
1917
1918 PDEAndMetaPTEBytesFrame = *MetaPTEBytesFrame + MPDEBytesFrame + *DPDE0BytesFrame + ExtraDPDEBytesFrame;
1919
1920 #ifdef __DML_VBA_DEBUG__
1921 dml_print("DML::%s: MetaPTEBytesFrame = %d\n", __func__, *MetaPTEBytesFrame);
1922 dml_print("DML::%s: MPDEBytesFrame = %d\n", __func__, MPDEBytesFrame);
1923 dml_print("DML::%s: DPDE0BytesFrame = %d\n", __func__, *DPDE0BytesFrame);
1924 dml_print("DML::%s: ExtraDPDEBytesFrame= %d\n", __func__, ExtraDPDEBytesFrame);
1925 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
1926 #endif
1927
1928 if (HostVMEnable == true) {
1929 PDEAndMetaPTEBytesFrame = PDEAndMetaPTEBytesFrame * (1 + 8 * HostVMDynamicLevels);
1930 }
1931 #ifdef __DML_VBA_DEBUG__
1932 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
1933 #endif
1934
1935 if (SurfaceTiling == dm_sw_linear) {
1936 PixelPTEReqHeightPTEs = 1;
1937 *PixelPTEReqHeight = 1;
1938 *PixelPTEReqWidth = 32768.0 / BytePerPixel;
1939 *PTERequestSize = 64;
1940 FractionOfPTEReturnDrop = 0;
1941 } else if (GPUVMMinPageSize == 4 && MacroTileSizeBytes > 4096) {
1942 PixelPTEReqHeightPTEs = 16;
1943 *PixelPTEReqHeight = 16 * BlockHeight256Bytes;
1944 *PixelPTEReqWidth = 16 * BlockWidth256Bytes;
1945 *PTERequestSize = 128;
1946 FractionOfPTEReturnDrop = 0;
1947 } else {
1948 PixelPTEReqHeightPTEs = 1;
1949 *PixelPTEReqHeight = MacroTileHeight;
1950 *PixelPTEReqWidth = 8 * *MacroTileWidth;
1951 *PTERequestSize = 64;
1952 FractionOfPTEReturnDrop = 0;
1953 }
1954
1955 if (SurfaceTiling == dm_sw_linear) {
1956 *dpte_row_height = dml_min(128, 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch), 1));
1957 *dpte_row_width_ub = (dml_ceil((double)(Pitch * *dpte_row_height - 1) / *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth;
1958 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize;
1959 } else if (ScanDirection != dm_vert) {
1960 *dpte_row_height = *PixelPTEReqHeight;
1961 *dpte_row_width_ub = (dml_ceil((double) (SwathWidth - 1) / *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth;
1962 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize;
1963 } else {
1964 *dpte_row_height = dml_min(*PixelPTEReqWidth, *MacroTileWidth);
1965 *dpte_row_width_ub = (dml_ceil((double) (SwathWidth - 1) / *PixelPTEReqHeight, 1) + 1) * *PixelPTEReqHeight;
1966 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqHeight * *PTERequestSize;
1967 }
1968
1969 if (*PixelPTEBytesPerRow * (1 - FractionOfPTEReturnDrop) <= 64 * PTEBufferSizeInRequests) {
1970 *PTEBufferSizeNotExceeded = true;
1971 } else {
1972 *PTEBufferSizeNotExceeded = false;
1973 }
1974
1975 if (GPUVMEnable != true) {
1976 *PixelPTEBytesPerRow = 0;
1977 *PTEBufferSizeNotExceeded = true;
1978 }
1979
1980 dml_print("DML: vm_bytes = meta_pte_bytes_per_frame (per_pipe) = MetaPTEBytesFrame = : %i\n", *MetaPTEBytesFrame);
1981
1982 if (HostVMEnable == true) {
1983 *PixelPTEBytesPerRow = *PixelPTEBytesPerRow * (1 + 8 * HostVMDynamicLevels);
1984 }
1985
1986 if (HostVMEnable == true) {
1987 *vm_group_bytes = 512;
1988 *dpte_group_bytes = 512;
1989 } else if (GPUVMEnable == true) {
1990 *vm_group_bytes = 2048;
1991 if (SurfaceTiling != dm_sw_linear && PixelPTEReqHeightPTEs == 1 && ScanDirection == dm_vert) {
1992 *dpte_group_bytes = 512;
1993 } else {
1994 *dpte_group_bytes = 2048;
1995 }
1996 } else {
1997 *vm_group_bytes = 0;
1998 *dpte_group_bytes = 0;
1999 }
2000 return PDEAndMetaPTEBytesFrame;
2001 }
2002
2003 static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(struct display_mode_lib *mode_lib)
2004 {
2005 struct vba_vars_st *v = &mode_lib->vba;
2006 unsigned int j, k;
2007 double HostVMInefficiencyFactor = 1.0;
2008 bool NoChromaPlanes = true;
2009 int ReorderBytes;
2010 double VMDataOnlyReturnBW;
2011 double MaxTotalRDBandwidth = 0;
2012 int PrefetchMode = v->PrefetchModePerState[v->VoltageLevel][v->maxMpcComb];
2013
2014 v->WritebackDISPCLK = 0.0;
2015 v->DISPCLKWithRamping = 0;
2016 v->DISPCLKWithoutRamping = 0;
2017 v->GlobalDPPCLK = 0.0;
2018 /* DAL custom code: need to update ReturnBW in case min dcfclk is overridden */
2019 {
2020 double IdealFabricAndSDPPortBandwidthPerState = dml_min(
2021 v->ReturnBusWidth * v->DCFCLKState[v->VoltageLevel][v->maxMpcComb],
2022 v->FabricClockPerState[v->VoltageLevel] * v->FabricDatapathToDCNDataReturn);
2023 double IdealDRAMBandwidthPerState = v->DRAMSpeedPerState[v->VoltageLevel] * v->NumberOfChannels * v->DRAMChannelWidth;
2024
2025 if (v->HostVMEnable != true) {
2026 v->ReturnBW = dml_min(
2027 IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
2028 IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelDataOnly / 100.0);
2029 } else {
2030 v->ReturnBW = dml_min(
2031 IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
2032 IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelMixedWithVMData / 100.0);
2033 }
2034 }
2035 /* End DAL custom code */
2036
2037 // DISPCLK and DPPCLK Calculation
2038 //
2039 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2040 if (v->WritebackEnable[k]) {
2041 v->WritebackDISPCLK = dml_max(
2042 v->WritebackDISPCLK,
2043 dml314_CalculateWriteBackDISPCLK(
2044 v->WritebackPixelFormat[k],
2045 v->PixelClock[k],
2046 v->WritebackHRatio[k],
2047 v->WritebackVRatio[k],
2048 v->WritebackHTaps[k],
2049 v->WritebackVTaps[k],
2050 v->WritebackSourceWidth[k],
2051 v->WritebackDestinationWidth[k],
2052 v->HTotal[k],
2053 v->WritebackLineBufferSize));
2054 }
2055 }
2056
2057 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2058 if (v->HRatio[k] > 1) {
2059 v->PSCL_THROUGHPUT_LUMA[k] = dml_min(
2060 v->MaxDCHUBToPSCLThroughput,
2061 v->MaxPSCLToLBThroughput * v->HRatio[k] / dml_ceil(v->htaps[k] / 6.0, 1));
2062 } else {
2063 v->PSCL_THROUGHPUT_LUMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput);
2064 }
2065
2066 v->DPPCLKUsingSingleDPPLuma = v->PixelClock[k]
2067 * dml_max(
2068 v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]),
2069 dml_max(v->HRatio[k] * v->VRatio[k] / v->PSCL_THROUGHPUT_LUMA[k], 1.0));
2070
2071 if ((v->htaps[k] > 6 || v->vtaps[k] > 6) && v->DPPCLKUsingSingleDPPLuma < 2 * v->PixelClock[k]) {
2072 v->DPPCLKUsingSingleDPPLuma = 2 * v->PixelClock[k];
2073 }
2074
2075 if ((v->SourcePixelFormat[k] != dm_420_8 && v->SourcePixelFormat[k] != dm_420_10 && v->SourcePixelFormat[k] != dm_420_12
2076 && v->SourcePixelFormat[k] != dm_rgbe_alpha)) {
2077 v->PSCL_THROUGHPUT_CHROMA[k] = 0.0;
2078 v->DPPCLKUsingSingleDPP[k] = v->DPPCLKUsingSingleDPPLuma;
2079 } else {
2080 if (v->HRatioChroma[k] > 1) {
2081 v->PSCL_THROUGHPUT_CHROMA[k] = dml_min(
2082 v->MaxDCHUBToPSCLThroughput,
2083 v->MaxPSCLToLBThroughput * v->HRatioChroma[k] / dml_ceil(v->HTAPsChroma[k] / 6.0, 1.0));
2084 } else {
2085 v->PSCL_THROUGHPUT_CHROMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput);
2086 }
2087 v->DPPCLKUsingSingleDPPChroma = v->PixelClock[k]
2088 * dml_max3(
2089 v->VTAPsChroma[k] / 6.0 * dml_min(1.0, v->HRatioChroma[k]),
2090 v->HRatioChroma[k] * v->VRatioChroma[k] / v->PSCL_THROUGHPUT_CHROMA[k],
2091 1.0);
2092
2093 if ((v->HTAPsChroma[k] > 6 || v->VTAPsChroma[k] > 6) && v->DPPCLKUsingSingleDPPChroma < 2 * v->PixelClock[k]) {
2094 v->DPPCLKUsingSingleDPPChroma = 2 * v->PixelClock[k];
2095 }
2096
2097 v->DPPCLKUsingSingleDPP[k] = dml_max(v->DPPCLKUsingSingleDPPLuma, v->DPPCLKUsingSingleDPPChroma);
2098 }
2099 }
2100
2101 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2102 if (v->BlendingAndTiming[k] != k)
2103 continue;
2104 if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1) {
2105 v->DISPCLKWithRamping = dml_max(
2106 v->DISPCLKWithRamping,
2107 v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100)
2108 * (1 + v->DISPCLKRampingMargin / 100));
2109 v->DISPCLKWithoutRamping = dml_max(
2110 v->DISPCLKWithoutRamping,
2111 v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100));
2112 } else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) {
2113 v->DISPCLKWithRamping = dml_max(
2114 v->DISPCLKWithRamping,
2115 v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100)
2116 * (1 + v->DISPCLKRampingMargin / 100));
2117 v->DISPCLKWithoutRamping = dml_max(
2118 v->DISPCLKWithoutRamping,
2119 v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100));
2120 } else {
2121 v->DISPCLKWithRamping = dml_max(
2122 v->DISPCLKWithRamping,
2123 v->PixelClock[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100) * (1 + v->DISPCLKRampingMargin / 100));
2124 v->DISPCLKWithoutRamping = dml_max(
2125 v->DISPCLKWithoutRamping,
2126 v->PixelClock[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100));
2127 }
2128 }
2129
2130 v->DISPCLKWithRamping = dml_max(v->DISPCLKWithRamping, v->WritebackDISPCLK);
2131 v->DISPCLKWithoutRamping = dml_max(v->DISPCLKWithoutRamping, v->WritebackDISPCLK);
2132
2133 ASSERT(v->DISPCLKDPPCLKVCOSpeed != 0);
2134 v->DISPCLKWithRampingRoundedToDFSGranularity = RoundToDFSGranularityUp(v->DISPCLKWithRamping, v->DISPCLKDPPCLKVCOSpeed);
2135 v->DISPCLKWithoutRampingRoundedToDFSGranularity = RoundToDFSGranularityUp(v->DISPCLKWithoutRamping, v->DISPCLKDPPCLKVCOSpeed);
2136 v->MaxDispclkRoundedToDFSGranularity = RoundToDFSGranularityDown(
2137 v->soc.clock_limits[v->soc.num_states - 1].dispclk_mhz,
2138 v->DISPCLKDPPCLKVCOSpeed);
2139 if (v->DISPCLKWithoutRampingRoundedToDFSGranularity > v->MaxDispclkRoundedToDFSGranularity) {
2140 v->DISPCLK_calculated = v->DISPCLKWithoutRampingRoundedToDFSGranularity;
2141 } else if (v->DISPCLKWithRampingRoundedToDFSGranularity > v->MaxDispclkRoundedToDFSGranularity) {
2142 v->DISPCLK_calculated = v->MaxDispclkRoundedToDFSGranularity;
2143 } else {
2144 v->DISPCLK_calculated = v->DISPCLKWithRampingRoundedToDFSGranularity;
2145 }
2146 v->DISPCLK = v->DISPCLK_calculated;
2147 DTRACE(" dispclk_mhz (calculated) = %f", v->DISPCLK_calculated);
2148
2149 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2150 v->DPPCLK_calculated[k] = v->DPPCLKUsingSingleDPP[k] / v->DPPPerPlane[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2151 v->GlobalDPPCLK = dml_max(v->GlobalDPPCLK, v->DPPCLK_calculated[k]);
2152 }
2153 v->GlobalDPPCLK = RoundToDFSGranularityUp(v->GlobalDPPCLK, v->DISPCLKDPPCLKVCOSpeed);
2154 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2155 v->DPPCLK_calculated[k] = v->GlobalDPPCLK / 255 * dml_ceil(v->DPPCLK_calculated[k] * 255.0 / v->GlobalDPPCLK, 1);
2156 DTRACE(" dppclk_mhz[%i] (calculated) = %f", k, v->DPPCLK_calculated[k]);
2157 }
2158
2159 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2160 v->DPPCLK[k] = v->DPPCLK_calculated[k];
2161 }
2162
2163 // Urgent and B P-State/DRAM Clock Change Watermark
2164 DTRACE(" dcfclk_mhz = %f", v->DCFCLK);
2165 DTRACE(" return_bus_bw = %f", v->ReturnBW);
2166
2167 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2168 CalculateBytePerPixelAnd256BBlockSizes(
2169 v->SourcePixelFormat[k],
2170 v->SurfaceTiling[k],
2171 &v->BytePerPixelY[k],
2172 &v->BytePerPixelC[k],
2173 &v->BytePerPixelDETY[k],
2174 &v->BytePerPixelDETC[k],
2175 &v->BlockHeight256BytesY[k],
2176 &v->BlockHeight256BytesC[k],
2177 &v->BlockWidth256BytesY[k],
2178 &v->BlockWidth256BytesC[k]);
2179 }
2180
2181 CalculateSwathWidth(
2182 false,
2183 v->NumberOfActivePlanes,
2184 v->SourcePixelFormat,
2185 v->SourceScan,
2186 v->ViewportWidth,
2187 v->ViewportHeight,
2188 v->SurfaceWidthY,
2189 v->SurfaceWidthC,
2190 v->SurfaceHeightY,
2191 v->SurfaceHeightC,
2192 v->ODMCombineEnabled,
2193 v->BytePerPixelY,
2194 v->BytePerPixelC,
2195 v->BlockHeight256BytesY,
2196 v->BlockHeight256BytesC,
2197 v->BlockWidth256BytesY,
2198 v->BlockWidth256BytesC,
2199 v->BlendingAndTiming,
2200 v->HActive,
2201 v->HRatio,
2202 v->DPPPerPlane,
2203 v->SwathWidthSingleDPPY,
2204 v->SwathWidthSingleDPPC,
2205 v->SwathWidthY,
2206 v->SwathWidthC,
2207 v->dummyinteger3,
2208 v->dummyinteger4,
2209 v->swath_width_luma_ub,
2210 v->swath_width_chroma_ub);
2211
2212 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2213 v->ReadBandwidthPlaneLuma[k] = v->SwathWidthSingleDPPY[k] * v->BytePerPixelY[k] / (v->HTotal[k] / v->PixelClock[k])
2214 * v->VRatio[k];
2215 v->ReadBandwidthPlaneChroma[k] = v->SwathWidthSingleDPPC[k] * v->BytePerPixelC[k] / (v->HTotal[k] / v->PixelClock[k])
2216 * v->VRatioChroma[k];
2217 DTRACE(" read_bw[%i] = %fBps", k, v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k]);
2218 }
2219
2220 // DCFCLK Deep Sleep
2221 CalculateDCFCLKDeepSleep(
2222 mode_lib,
2223 v->NumberOfActivePlanes,
2224 v->BytePerPixelY,
2225 v->BytePerPixelC,
2226 v->VRatio,
2227 v->VRatioChroma,
2228 v->SwathWidthY,
2229 v->SwathWidthC,
2230 v->DPPPerPlane,
2231 v->HRatio,
2232 v->HRatioChroma,
2233 v->PixelClock,
2234 v->PSCL_THROUGHPUT_LUMA,
2235 v->PSCL_THROUGHPUT_CHROMA,
2236 v->DPPCLK,
2237 v->ReadBandwidthPlaneLuma,
2238 v->ReadBandwidthPlaneChroma,
2239 v->ReturnBusWidth,
2240 &v->DCFCLKDeepSleep);
2241
2242 // DSCCLK
2243 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2244 if ((v->BlendingAndTiming[k] != k) || !v->DSCEnabled[k]) {
2245 v->DSCCLK_calculated[k] = 0.0;
2246 } else {
2247 if (v->OutputFormat[k] == dm_420)
2248 v->DSCFormatFactor = 2;
2249 else if (v->OutputFormat[k] == dm_444)
2250 v->DSCFormatFactor = 1;
2251 else if (v->OutputFormat[k] == dm_n422)
2252 v->DSCFormatFactor = 2;
2253 else
2254 v->DSCFormatFactor = 1;
2255 if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1)
2256 v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 12 / v->DSCFormatFactor
2257 / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2258 else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1)
2259 v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 6 / v->DSCFormatFactor
2260 / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2261 else
2262 v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 3 / v->DSCFormatFactor
2263 / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2264 }
2265 }
2266
2267 // DSC Delay
2268 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2269 double BPP = v->OutputBpp[k];
2270
2271 if (v->DSCEnabled[k] && BPP != 0) {
2272 if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_disabled) {
2273 v->DSCDelay[k] = dscceComputeDelay(
2274 v->DSCInputBitPerComponent[k],
2275 BPP,
2276 dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1),
2277 v->NumberOfDSCSlices[k],
2278 v->OutputFormat[k],
2279 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]);
2280 } else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) {
2281 v->DSCDelay[k] = 2
2282 * (dscceComputeDelay(
2283 v->DSCInputBitPerComponent[k],
2284 BPP,
2285 dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1),
2286 v->NumberOfDSCSlices[k] / 2.0,
2287 v->OutputFormat[k],
2288 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]));
2289 } else {
2290 v->DSCDelay[k] = 4
2291 * (dscceComputeDelay(
2292 v->DSCInputBitPerComponent[k],
2293 BPP,
2294 dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1),
2295 v->NumberOfDSCSlices[k] / 4.0,
2296 v->OutputFormat[k],
2297 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]));
2298 }
2299 v->DSCDelay[k] = v->DSCDelay[k] + (v->HTotal[k] - v->HActive[k]) * dml_ceil((double) v->DSCDelay[k] / v->HActive[k], 1);
2300 v->DSCDelay[k] = v->DSCDelay[k] * v->PixelClock[k] / v->PixelClockBackEnd[k];
2301 } else {
2302 v->DSCDelay[k] = 0;
2303 }
2304 }
2305
2306 for (k = 0; k < v->NumberOfActivePlanes; ++k)
2307 for (j = 0; j < v->NumberOfActivePlanes; ++j) // NumberOfPlanes
2308 if (j != k && v->BlendingAndTiming[k] == j && v->DSCEnabled[j])
2309 v->DSCDelay[k] = v->DSCDelay[j];
2310
2311 // Prefetch
2312 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2313 unsigned int PDEAndMetaPTEBytesFrameY;
2314 unsigned int PixelPTEBytesPerRowY;
2315 unsigned int MetaRowByteY;
2316 unsigned int MetaRowByteC;
2317 unsigned int PDEAndMetaPTEBytesFrameC;
2318 unsigned int PixelPTEBytesPerRowC;
2319 bool PTEBufferSizeNotExceededY;
2320 bool PTEBufferSizeNotExceededC;
2321
2322 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12
2323 || v->SourcePixelFormat[k] == dm_rgbe_alpha) {
2324 if ((v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12) && v->SourceScan[k] != dm_vert) {
2325 v->PTEBufferSizeInRequestsForLuma = (v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma) / 2;
2326 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsForLuma;
2327 } else {
2328 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma;
2329 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsChroma;
2330 }
2331
2332 PDEAndMetaPTEBytesFrameC = CalculateVMAndRowBytes(
2333 mode_lib,
2334 v->DCCEnable[k],
2335 v->BlockHeight256BytesC[k],
2336 v->BlockWidth256BytesC[k],
2337 v->SourcePixelFormat[k],
2338 v->SurfaceTiling[k],
2339 v->BytePerPixelC[k],
2340 v->SourceScan[k],
2341 v->SwathWidthC[k],
2342 v->ViewportHeightChroma[k],
2343 v->GPUVMEnable,
2344 v->HostVMEnable,
2345 v->HostVMMaxNonCachedPageTableLevels,
2346 v->GPUVMMinPageSize,
2347 v->HostVMMinPageSize,
2348 v->PTEBufferSizeInRequestsForChroma,
2349 v->PitchC[k],
2350 v->DCCMetaPitchC[k],
2351 &v->MacroTileWidthC[k],
2352 &MetaRowByteC,
2353 &PixelPTEBytesPerRowC,
2354 &PTEBufferSizeNotExceededC,
2355 &v->dpte_row_width_chroma_ub[k],
2356 &v->dpte_row_height_chroma[k],
2357 &v->meta_req_width_chroma[k],
2358 &v->meta_req_height_chroma[k],
2359 &v->meta_row_width_chroma[k],
2360 &v->meta_row_height_chroma[k],
2361 &v->dummyinteger1,
2362 &v->dummyinteger2,
2363 &v->PixelPTEReqWidthC[k],
2364 &v->PixelPTEReqHeightC[k],
2365 &v->PTERequestSizeC[k],
2366 &v->dpde0_bytes_per_frame_ub_c[k],
2367 &v->meta_pte_bytes_per_frame_ub_c[k]);
2368
2369 v->PrefetchSourceLinesC[k] = CalculatePrefetchSourceLines(
2370 mode_lib,
2371 v->VRatioChroma[k],
2372 v->VTAPsChroma[k],
2373 v->Interlace[k],
2374 v->ProgressiveToInterlaceUnitInOPP,
2375 v->SwathHeightC[k],
2376 v->ViewportYStartC[k],
2377 &v->VInitPreFillC[k],
2378 &v->MaxNumSwathC[k]);
2379 } else {
2380 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma;
2381 v->PTEBufferSizeInRequestsForChroma = 0;
2382 PixelPTEBytesPerRowC = 0;
2383 PDEAndMetaPTEBytesFrameC = 0;
2384 MetaRowByteC = 0;
2385 v->MaxNumSwathC[k] = 0;
2386 v->PrefetchSourceLinesC[k] = 0;
2387 }
2388
2389 PDEAndMetaPTEBytesFrameY = CalculateVMAndRowBytes(
2390 mode_lib,
2391 v->DCCEnable[k],
2392 v->BlockHeight256BytesY[k],
2393 v->BlockWidth256BytesY[k],
2394 v->SourcePixelFormat[k],
2395 v->SurfaceTiling[k],
2396 v->BytePerPixelY[k],
2397 v->SourceScan[k],
2398 v->SwathWidthY[k],
2399 v->ViewportHeight[k],
2400 v->GPUVMEnable,
2401 v->HostVMEnable,
2402 v->HostVMMaxNonCachedPageTableLevels,
2403 v->GPUVMMinPageSize,
2404 v->HostVMMinPageSize,
2405 v->PTEBufferSizeInRequestsForLuma,
2406 v->PitchY[k],
2407 v->DCCMetaPitchY[k],
2408 &v->MacroTileWidthY[k],
2409 &MetaRowByteY,
2410 &PixelPTEBytesPerRowY,
2411 &PTEBufferSizeNotExceededY,
2412 &v->dpte_row_width_luma_ub[k],
2413 &v->dpte_row_height[k],
2414 &v->meta_req_width[k],
2415 &v->meta_req_height[k],
2416 &v->meta_row_width[k],
2417 &v->meta_row_height[k],
2418 &v->vm_group_bytes[k],
2419 &v->dpte_group_bytes[k],
2420 &v->PixelPTEReqWidthY[k],
2421 &v->PixelPTEReqHeightY[k],
2422 &v->PTERequestSizeY[k],
2423 &v->dpde0_bytes_per_frame_ub_l[k],
2424 &v->meta_pte_bytes_per_frame_ub_l[k]);
2425
2426 v->PrefetchSourceLinesY[k] = CalculatePrefetchSourceLines(
2427 mode_lib,
2428 v->VRatio[k],
2429 v->vtaps[k],
2430 v->Interlace[k],
2431 v->ProgressiveToInterlaceUnitInOPP,
2432 v->SwathHeightY[k],
2433 v->ViewportYStartY[k],
2434 &v->VInitPreFillY[k],
2435 &v->MaxNumSwathY[k]);
2436 v->PixelPTEBytesPerRow[k] = PixelPTEBytesPerRowY + PixelPTEBytesPerRowC;
2437 v->PDEAndMetaPTEBytesFrame[k] = PDEAndMetaPTEBytesFrameY + PDEAndMetaPTEBytesFrameC;
2438 v->MetaRowByte[k] = MetaRowByteY + MetaRowByteC;
2439
2440 CalculateRowBandwidth(
2441 v->GPUVMEnable,
2442 v->SourcePixelFormat[k],
2443 v->VRatio[k],
2444 v->VRatioChroma[k],
2445 v->DCCEnable[k],
2446 v->HTotal[k] / v->PixelClock[k],
2447 MetaRowByteY,
2448 MetaRowByteC,
2449 v->meta_row_height[k],
2450 v->meta_row_height_chroma[k],
2451 PixelPTEBytesPerRowY,
2452 PixelPTEBytesPerRowC,
2453 v->dpte_row_height[k],
2454 v->dpte_row_height_chroma[k],
2455 &v->meta_row_bw[k],
2456 &v->dpte_row_bw[k]);
2457 }
2458
2459 v->TotalDCCActiveDPP = 0;
2460 v->TotalActiveDPP = 0;
2461 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2462 v->TotalActiveDPP = v->TotalActiveDPP + v->DPPPerPlane[k];
2463 if (v->DCCEnable[k])
2464 v->TotalDCCActiveDPP = v->TotalDCCActiveDPP + v->DPPPerPlane[k];
2465 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12
2466 || v->SourcePixelFormat[k] == dm_rgbe_alpha)
2467 NoChromaPlanes = false;
2468 }
2469
2470 ReorderBytes = v->NumberOfChannels
2471 * dml_max3(
2472 v->UrgentOutOfOrderReturnPerChannelPixelDataOnly,
2473 v->UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData,
2474 v->UrgentOutOfOrderReturnPerChannelVMDataOnly);
2475
2476 VMDataOnlyReturnBW = dml_min(
2477 dml_min(v->ReturnBusWidth * v->DCFCLK, v->FabricClock * v->FabricDatapathToDCNDataReturn)
2478 * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
2479 v->DRAMSpeed * v->NumberOfChannels * v->DRAMChannelWidth
2480 * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly / 100.0);
2481
2482 #ifdef __DML_VBA_DEBUG__
2483 dml_print("DML::%s: v->ReturnBusWidth = %f\n", __func__, v->ReturnBusWidth);
2484 dml_print("DML::%s: v->DCFCLK = %f\n", __func__, v->DCFCLK);
2485 dml_print("DML::%s: v->FabricClock = %f\n", __func__, v->FabricClock);
2486 dml_print("DML::%s: v->FabricDatapathToDCNDataReturn = %f\n", __func__, v->FabricDatapathToDCNDataReturn);
2487 dml_print("DML::%s: v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency = %f\n", __func__, v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency);
2488 dml_print("DML::%s: v->DRAMSpeed = %f\n", __func__, v->DRAMSpeed);
2489 dml_print("DML::%s: v->NumberOfChannels = %f\n", __func__, v->NumberOfChannels);
2490 dml_print("DML::%s: v->DRAMChannelWidth = %f\n", __func__, v->DRAMChannelWidth);
2491 dml_print("DML::%s: v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly = %f\n", __func__, v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly);
2492 dml_print("DML::%s: VMDataOnlyReturnBW = %f\n", __func__, VMDataOnlyReturnBW);
2493 dml_print("DML::%s: ReturnBW = %f\n", __func__, v->ReturnBW);
2494 #endif
2495
2496 if (v->GPUVMEnable && v->HostVMEnable)
2497 HostVMInefficiencyFactor = v->ReturnBW / VMDataOnlyReturnBW;
2498
2499 v->UrgentExtraLatency = CalculateExtraLatency(
2500 v->RoundTripPingLatencyCycles,
2501 ReorderBytes,
2502 v->DCFCLK,
2503 v->TotalActiveDPP,
2504 v->PixelChunkSizeInKByte,
2505 v->TotalDCCActiveDPP,
2506 v->MetaChunkSize,
2507 v->ReturnBW,
2508 v->GPUVMEnable,
2509 v->HostVMEnable,
2510 v->NumberOfActivePlanes,
2511 v->DPPPerPlane,
2512 v->dpte_group_bytes,
2513 HostVMInefficiencyFactor,
2514 v->HostVMMinPageSize,
2515 v->HostVMMaxNonCachedPageTableLevels);
2516
2517 v->TCalc = 24.0 / v->DCFCLKDeepSleep;
2518
2519 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2520 if (v->BlendingAndTiming[k] == k) {
2521 if (v->WritebackEnable[k] == true) {
2522 v->WritebackDelay[v->VoltageLevel][k] = v->WritebackLatency
2523 + CalculateWriteBackDelay(
2524 v->WritebackPixelFormat[k],
2525 v->WritebackHRatio[k],
2526 v->WritebackVRatio[k],
2527 v->WritebackVTaps[k],
2528 v->WritebackDestinationWidth[k],
2529 v->WritebackDestinationHeight[k],
2530 v->WritebackSourceHeight[k],
2531 v->HTotal[k]) / v->DISPCLK;
2532 } else
2533 v->WritebackDelay[v->VoltageLevel][k] = 0;
2534 for (j = 0; j < v->NumberOfActivePlanes; ++j) {
2535 if (v->BlendingAndTiming[j] == k && v->WritebackEnable[j] == true) {
2536 v->WritebackDelay[v->VoltageLevel][k] = dml_max(
2537 v->WritebackDelay[v->VoltageLevel][k],
2538 v->WritebackLatency
2539 + CalculateWriteBackDelay(
2540 v->WritebackPixelFormat[j],
2541 v->WritebackHRatio[j],
2542 v->WritebackVRatio[j],
2543 v->WritebackVTaps[j],
2544 v->WritebackDestinationWidth[j],
2545 v->WritebackDestinationHeight[j],
2546 v->WritebackSourceHeight[j],
2547 v->HTotal[k]) / v->DISPCLK);
2548 }
2549 }
2550 }
2551 }
2552
2553 for (k = 0; k < v->NumberOfActivePlanes; ++k)
2554 for (j = 0; j < v->NumberOfActivePlanes; ++j)
2555 if (v->BlendingAndTiming[k] == j)
2556 v->WritebackDelay[v->VoltageLevel][k] = v->WritebackDelay[v->VoltageLevel][j];
2557
2558 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2559 v->MaxVStartupLines[k] =
2560 CalculateMaxVStartup(
2561 v->VTotal[k],
2562 v->VActive[k],
2563 v->VBlankNom[k],
2564 v->HTotal[k],
2565 v->PixelClock[k],
2566 v->ProgressiveToInterlaceUnitInOPP,
2567 v->Interlace[k],
2568 v->ip.VBlankNomDefaultUS,
2569 v->WritebackDelay[v->VoltageLevel][k]);
2570
2571 #ifdef __DML_VBA_DEBUG__
2572 dml_print("DML::%s: k=%d MaxVStartupLines = %d\n", __func__, k, v->MaxVStartupLines[k]);
2573 dml_print("DML::%s: k=%d VoltageLevel = %d\n", __func__, k, v->VoltageLevel);
2574 dml_print("DML::%s: k=%d WritebackDelay = %f\n", __func__, k, v->WritebackDelay[v->VoltageLevel][k]);
2575 #endif
2576 }
2577
2578 v->MaximumMaxVStartupLines = 0;
2579 for (k = 0; k < v->NumberOfActivePlanes; ++k)
2580 v->MaximumMaxVStartupLines = dml_max(v->MaximumMaxVStartupLines, v->MaxVStartupLines[k]);
2581
2582 // VBA_DELTA
2583 // We don't really care to iterate between the various prefetch modes
2584 //v->PrefetchERROR = CalculateMinAndMaxPrefetchMode(v->AllowDRAMSelfRefreshOrDRAMClockChangeInVblank, &v->MinPrefetchMode, &v->MaxPrefetchMode);
2585
2586 v->UrgentLatency = CalculateUrgentLatency(
2587 v->UrgentLatencyPixelDataOnly,
2588 v->UrgentLatencyPixelMixedWithVMData,
2589 v->UrgentLatencyVMDataOnly,
2590 v->DoUrgentLatencyAdjustment,
2591 v->UrgentLatencyAdjustmentFabricClockComponent,
2592 v->UrgentLatencyAdjustmentFabricClockReference,
2593 v->FabricClock);
2594
2595 v->FractionOfUrgentBandwidth = 0.0;
2596 v->FractionOfUrgentBandwidthImmediateFlip = 0.0;
2597
2598 v->VStartupLines = __DML_VBA_MIN_VSTARTUP__;
2599
2600 do {
2601 double MaxTotalRDBandwidthNoUrgentBurst = 0.0;
2602 bool DestinationLineTimesForPrefetchLessThan2 = false;
2603 bool VRatioPrefetchMoreThan4 = false;
2604 double TWait = CalculateTWait(PrefetchMode, v->DRAMClockChangeLatency, v->UrgentLatency, v->SREnterPlusExitTime);
2605
2606 MaxTotalRDBandwidth = 0;
2607
2608 dml_print("DML::%s: Start loop: VStartup = %d\n", __func__, v->VStartupLines);
2609
2610 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2611 Pipe myPipe;
2612
2613 myPipe.DPPCLK = v->DPPCLK[k];
2614 myPipe.DISPCLK = v->DISPCLK;
2615 myPipe.PixelClock = v->PixelClock[k];
2616 myPipe.DCFCLKDeepSleep = v->DCFCLKDeepSleep;
2617 myPipe.DPPPerPlane = v->DPPPerPlane[k];
2618 myPipe.ScalerEnabled = v->ScalerEnabled[k];
2619 myPipe.VRatio = v->VRatio[k];
2620 myPipe.VRatioChroma = v->VRatioChroma[k];
2621 myPipe.SourceScan = v->SourceScan[k];
2622 myPipe.BlockWidth256BytesY = v->BlockWidth256BytesY[k];
2623 myPipe.BlockHeight256BytesY = v->BlockHeight256BytesY[k];
2624 myPipe.BlockWidth256BytesC = v->BlockWidth256BytesC[k];
2625 myPipe.BlockHeight256BytesC = v->BlockHeight256BytesC[k];
2626 myPipe.InterlaceEnable = v->Interlace[k];
2627 myPipe.NumberOfCursors = v->NumberOfCursors[k];
2628 myPipe.VBlank = v->VTotal[k] - v->VActive[k];
2629 myPipe.HTotal = v->HTotal[k];
2630 myPipe.DCCEnable = v->DCCEnable[k];
2631 myPipe.ODMCombineIsEnabled = v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1
2632 || v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1;
2633 myPipe.SourcePixelFormat = v->SourcePixelFormat[k];
2634 myPipe.BytePerPixelY = v->BytePerPixelY[k];
2635 myPipe.BytePerPixelC = v->BytePerPixelC[k];
2636 myPipe.ProgressiveToInterlaceUnitInOPP = v->ProgressiveToInterlaceUnitInOPP;
2637 v->ErrorResult[k] = CalculatePrefetchSchedule(
2638 mode_lib,
2639 HostVMInefficiencyFactor,
2640 &myPipe,
2641 v->DSCDelay[k],
2642 v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater,
2643 v->DPPCLKDelaySCL,
2644 v->DPPCLKDelaySCLLBOnly,
2645 v->DPPCLKDelayCNVCCursor,
2646 v->DISPCLKDelaySubtotal,
2647 (unsigned int) (v->SwathWidthY[k] / v->HRatio[k]),
2648 v->OutputFormat[k],
2649 v->MaxInterDCNTileRepeaters,
2650 dml_min(v->VStartupLines, v->MaxVStartupLines[k]),
2651 v->MaxVStartupLines[k],
2652 v->GPUVMMaxPageTableLevels,
2653 v->GPUVMEnable,
2654 v->HostVMEnable,
2655 v->HostVMMaxNonCachedPageTableLevels,
2656 v->HostVMMinPageSize,
2657 v->DynamicMetadataEnable[k],
2658 v->DynamicMetadataVMEnabled,
2659 v->DynamicMetadataLinesBeforeActiveRequired[k],
2660 v->DynamicMetadataTransmittedBytes[k],
2661 v->UrgentLatency,
2662 v->UrgentExtraLatency,
2663 v->TCalc,
2664 v->PDEAndMetaPTEBytesFrame[k],
2665 v->MetaRowByte[k],
2666 v->PixelPTEBytesPerRow[k],
2667 v->PrefetchSourceLinesY[k],
2668 v->SwathWidthY[k],
2669 v->VInitPreFillY[k],
2670 v->MaxNumSwathY[k],
2671 v->PrefetchSourceLinesC[k],
2672 v->SwathWidthC[k],
2673 v->VInitPreFillC[k],
2674 v->MaxNumSwathC[k],
2675 v->swath_width_luma_ub[k],
2676 v->swath_width_chroma_ub[k],
2677 v->SwathHeightY[k],
2678 v->SwathHeightC[k],
2679 TWait,
2680 &v->DSTXAfterScaler[k],
2681 &v->DSTYAfterScaler[k],
2682 &v->DestinationLinesForPrefetch[k],
2683 &v->PrefetchBandwidth[k],
2684 &v->DestinationLinesToRequestVMInVBlank[k],
2685 &v->DestinationLinesToRequestRowInVBlank[k],
2686 &v->VRatioPrefetchY[k],
2687 &v->VRatioPrefetchC[k],
2688 &v->RequiredPrefetchPixDataBWLuma[k],
2689 &v->RequiredPrefetchPixDataBWChroma[k],
2690 &v->NotEnoughTimeForDynamicMetadata[k],
2691 &v->Tno_bw[k],
2692 &v->prefetch_vmrow_bw[k],
2693 &v->Tdmdl_vm[k],
2694 &v->Tdmdl[k],
2695 &v->TSetup[k],
2696 &v->VUpdateOffsetPix[k],
2697 &v->VUpdateWidthPix[k],
2698 &v->VReadyOffsetPix[k]);
2699
2700 #ifdef __DML_VBA_DEBUG__
2701 dml_print("DML::%s: k=%0d Prefetch cal result=%0d\n", __func__, k, v->ErrorResult[k]);
2702 #endif
2703 v->VStartup[k] = dml_min(v->VStartupLines, v->MaxVStartupLines[k]);
2704 }
2705
2706 v->NoEnoughUrgentLatencyHiding = false;
2707 v->NoEnoughUrgentLatencyHidingPre = false;
2708
2709 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2710 v->cursor_bw[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0
2711 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k];
2712 v->cursor_bw_pre[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0
2713 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatioPrefetchY[k];
2714
2715 CalculateUrgentBurstFactor(
2716 v->swath_width_luma_ub[k],
2717 v->swath_width_chroma_ub[k],
2718 v->SwathHeightY[k],
2719 v->SwathHeightC[k],
2720 v->HTotal[k] / v->PixelClock[k],
2721 v->UrgentLatency,
2722 v->CursorBufferSize,
2723 v->CursorWidth[k][0],
2724 v->CursorBPP[k][0],
2725 v->VRatio[k],
2726 v->VRatioChroma[k],
2727 v->BytePerPixelDETY[k],
2728 v->BytePerPixelDETC[k],
2729 v->DETBufferSizeY[k],
2730 v->DETBufferSizeC[k],
2731 &v->UrgBurstFactorCursor[k],
2732 &v->UrgBurstFactorLuma[k],
2733 &v->UrgBurstFactorChroma[k],
2734 &v->NoUrgentLatencyHiding[k]);
2735
2736 CalculateUrgentBurstFactor(
2737 v->swath_width_luma_ub[k],
2738 v->swath_width_chroma_ub[k],
2739 v->SwathHeightY[k],
2740 v->SwathHeightC[k],
2741 v->HTotal[k] / v->PixelClock[k],
2742 v->UrgentLatency,
2743 v->CursorBufferSize,
2744 v->CursorWidth[k][0],
2745 v->CursorBPP[k][0],
2746 v->VRatioPrefetchY[k],
2747 v->VRatioPrefetchC[k],
2748 v->BytePerPixelDETY[k],
2749 v->BytePerPixelDETC[k],
2750 v->DETBufferSizeY[k],
2751 v->DETBufferSizeC[k],
2752 &v->UrgBurstFactorCursorPre[k],
2753 &v->UrgBurstFactorLumaPre[k],
2754 &v->UrgBurstFactorChromaPre[k],
2755 &v->NoUrgentLatencyHidingPre[k]);
2756
2757 MaxTotalRDBandwidth = MaxTotalRDBandwidth
2758 + dml_max3(
2759 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2760 v->ReadBandwidthPlaneLuma[k] * v->UrgBurstFactorLuma[k]
2761 + v->ReadBandwidthPlaneChroma[k] * v->UrgBurstFactorChroma[k]
2762 + v->cursor_bw[k] * v->UrgBurstFactorCursor[k]
2763 + v->DPPPerPlane[k] * (v->meta_row_bw[k] + v->dpte_row_bw[k]),
2764 v->DPPPerPlane[k]
2765 * (v->RequiredPrefetchPixDataBWLuma[k] * v->UrgBurstFactorLumaPre[k]
2766 + v->RequiredPrefetchPixDataBWChroma[k] * v->UrgBurstFactorChromaPre[k])
2767 + v->cursor_bw_pre[k] * v->UrgBurstFactorCursorPre[k]);
2768
2769 MaxTotalRDBandwidthNoUrgentBurst = MaxTotalRDBandwidthNoUrgentBurst
2770 + dml_max3(
2771 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2772 v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k] + v->cursor_bw[k]
2773 + v->DPPPerPlane[k] * (v->meta_row_bw[k] + v->dpte_row_bw[k]),
2774 v->DPPPerPlane[k] * (v->RequiredPrefetchPixDataBWLuma[k] + v->RequiredPrefetchPixDataBWChroma[k])
2775 + v->cursor_bw_pre[k]);
2776
2777 #ifdef __DML_VBA_DEBUG__
2778 dml_print("DML::%s: k=%0d DPPPerPlane=%d\n", __func__, k, v->DPPPerPlane[k]);
2779 dml_print("DML::%s: k=%0d UrgBurstFactorLuma=%f\n", __func__, k, v->UrgBurstFactorLuma[k]);
2780 dml_print("DML::%s: k=%0d UrgBurstFactorChroma=%f\n", __func__, k, v->UrgBurstFactorChroma[k]);
2781 dml_print("DML::%s: k=%0d UrgBurstFactorLumaPre=%f\n", __func__, k, v->UrgBurstFactorLumaPre[k]);
2782 dml_print("DML::%s: k=%0d UrgBurstFactorChromaPre=%f\n", __func__, k, v->UrgBurstFactorChromaPre[k]);
2783
2784 dml_print("DML::%s: k=%0d VRatioPrefetchY=%f\n", __func__, k, v->VRatioPrefetchY[k]);
2785 dml_print("DML::%s: k=%0d VRatioY=%f\n", __func__, k, v->VRatio[k]);
2786
2787 dml_print("DML::%s: k=%0d prefetch_vmrow_bw=%f\n", __func__, k, v->prefetch_vmrow_bw[k]);
2788 dml_print("DML::%s: k=%0d ReadBandwidthPlaneLuma=%f\n", __func__, k, v->ReadBandwidthPlaneLuma[k]);
2789 dml_print("DML::%s: k=%0d ReadBandwidthPlaneChroma=%f\n", __func__, k, v->ReadBandwidthPlaneChroma[k]);
2790 dml_print("DML::%s: k=%0d cursor_bw=%f\n", __func__, k, v->cursor_bw[k]);
2791 dml_print("DML::%s: k=%0d meta_row_bw=%f\n", __func__, k, v->meta_row_bw[k]);
2792 dml_print("DML::%s: k=%0d dpte_row_bw=%f\n", __func__, k, v->dpte_row_bw[k]);
2793 dml_print("DML::%s: k=%0d RequiredPrefetchPixDataBWLuma=%f\n", __func__, k, v->RequiredPrefetchPixDataBWLuma[k]);
2794 dml_print("DML::%s: k=%0d RequiredPrefetchPixDataBWChroma=%f\n", __func__, k, v->RequiredPrefetchPixDataBWChroma[k]);
2795 dml_print("DML::%s: k=%0d cursor_bw_pre=%f\n", __func__, k, v->cursor_bw_pre[k]);
2796 dml_print("DML::%s: k=%0d MaxTotalRDBandwidthNoUrgentBurst=%f\n", __func__, k, MaxTotalRDBandwidthNoUrgentBurst);
2797 #endif
2798
2799 if (v->DestinationLinesForPrefetch[k] < 2)
2800 DestinationLineTimesForPrefetchLessThan2 = true;
2801
2802 if (v->VRatioPrefetchY[k] > 4 || v->VRatioPrefetchC[k] > 4)
2803 VRatioPrefetchMoreThan4 = true;
2804
2805 if (v->NoUrgentLatencyHiding[k] == true)
2806 v->NoEnoughUrgentLatencyHiding = true;
2807
2808 if (v->NoUrgentLatencyHidingPre[k] == true)
2809 v->NoEnoughUrgentLatencyHidingPre = true;
2810 }
2811
2812 v->FractionOfUrgentBandwidth = MaxTotalRDBandwidthNoUrgentBurst / v->ReturnBW;
2813
2814 #ifdef __DML_VBA_DEBUG__
2815 dml_print("DML::%s: MaxTotalRDBandwidthNoUrgentBurst=%f\n", __func__, MaxTotalRDBandwidthNoUrgentBurst);
2816 dml_print("DML::%s: ReturnBW=%f\n", __func__, v->ReturnBW);
2817 dml_print("DML::%s: FractionOfUrgentBandwidth=%f\n", __func__, v->FractionOfUrgentBandwidth);
2818 #endif
2819
2820 if (MaxTotalRDBandwidth <= v->ReturnBW && v->NoEnoughUrgentLatencyHiding == 0 && v->NoEnoughUrgentLatencyHidingPre == 0
2821 && !VRatioPrefetchMoreThan4 && !DestinationLineTimesForPrefetchLessThan2)
2822 v->PrefetchModeSupported = true;
2823 else {
2824 v->PrefetchModeSupported = false;
2825 dml_print("DML::%s: ***failed***. Bandwidth violation. Results are NOT valid\n", __func__);
2826 dml_print("DML::%s: MaxTotalRDBandwidth:%f AvailReturnBandwidth:%f\n", __func__, MaxTotalRDBandwidth, v->ReturnBW);
2827 dml_print("DML::%s: VRatioPrefetch %s more than 4\n", __func__, (VRatioPrefetchMoreThan4) ? "is" : "is not");
2828 dml_print("DML::%s: DestinationLines for Prefetch %s less than 2\n", __func__, (DestinationLineTimesForPrefetchLessThan2) ? "is" : "is not");
2829 }
2830
2831 // PREVIOUS_ERROR
2832 // This error result check was done after the PrefetchModeSupported. So we will
2833 // still try to calculate flip schedule even prefetch mode not supported
2834 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2835 if (v->ErrorResult[k] == true || v->NotEnoughTimeForDynamicMetadata[k] == true) {
2836 v->PrefetchModeSupported = false;
2837 dml_print("DML::%s: ***failed***. Prefetch schedule violation. Results are NOT valid\n", __func__);
2838 }
2839 }
2840
2841 if (v->PrefetchModeSupported == true && v->ImmediateFlipSupport == true) {
2842 v->BandwidthAvailableForImmediateFlip = v->ReturnBW;
2843 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2844 v->BandwidthAvailableForImmediateFlip = v->BandwidthAvailableForImmediateFlip
2845 - dml_max(
2846 v->ReadBandwidthPlaneLuma[k] * v->UrgBurstFactorLuma[k]
2847 + v->ReadBandwidthPlaneChroma[k] * v->UrgBurstFactorChroma[k]
2848 + v->cursor_bw[k] * v->UrgBurstFactorCursor[k],
2849 v->DPPPerPlane[k]
2850 * (v->RequiredPrefetchPixDataBWLuma[k] * v->UrgBurstFactorLumaPre[k]
2851 + v->RequiredPrefetchPixDataBWChroma[k] * v->UrgBurstFactorChromaPre[k])
2852 + v->cursor_bw_pre[k] * v->UrgBurstFactorCursorPre[k]);
2853 }
2854
2855 v->TotImmediateFlipBytes = 0;
2856 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2857 v->TotImmediateFlipBytes = v->TotImmediateFlipBytes
2858 + v->DPPPerPlane[k] * (v->PDEAndMetaPTEBytesFrame[k] + v->MetaRowByte[k] + v->PixelPTEBytesPerRow[k]);
2859 }
2860 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2861 CalculateFlipSchedule(
2862 mode_lib,
2863 k,
2864 HostVMInefficiencyFactor,
2865 v->UrgentExtraLatency,
2866 v->UrgentLatency,
2867 v->PDEAndMetaPTEBytesFrame[k],
2868 v->MetaRowByte[k],
2869 v->PixelPTEBytesPerRow[k]);
2870 }
2871
2872 v->total_dcn_read_bw_with_flip = 0.0;
2873 v->total_dcn_read_bw_with_flip_no_urgent_burst = 0.0;
2874 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2875 v->total_dcn_read_bw_with_flip = v->total_dcn_read_bw_with_flip
2876 + dml_max3(
2877 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2878 v->DPPPerPlane[k] * v->final_flip_bw[k]
2879 + v->ReadBandwidthLuma[k] * v->UrgBurstFactorLuma[k]
2880 + v->ReadBandwidthChroma[k] * v->UrgBurstFactorChroma[k]
2881 + v->cursor_bw[k] * v->UrgBurstFactorCursor[k],
2882 v->DPPPerPlane[k]
2883 * (v->final_flip_bw[k]
2884 + v->RequiredPrefetchPixDataBWLuma[k] * v->UrgBurstFactorLumaPre[k]
2885 + v->RequiredPrefetchPixDataBWChroma[k] * v->UrgBurstFactorChromaPre[k])
2886 + v->cursor_bw_pre[k] * v->UrgBurstFactorCursorPre[k]);
2887 v->total_dcn_read_bw_with_flip_no_urgent_burst = v->total_dcn_read_bw_with_flip_no_urgent_burst
2888 + dml_max3(
2889 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2890 v->DPPPerPlane[k] * v->final_flip_bw[k] + v->ReadBandwidthPlaneLuma[k]
2891 + v->ReadBandwidthPlaneChroma[k] + v->cursor_bw[k],
2892 v->DPPPerPlane[k]
2893 * (v->final_flip_bw[k] + v->RequiredPrefetchPixDataBWLuma[k]
2894 + v->RequiredPrefetchPixDataBWChroma[k]) + v->cursor_bw_pre[k]);
2895 }
2896 v->FractionOfUrgentBandwidthImmediateFlip = v->total_dcn_read_bw_with_flip_no_urgent_burst / v->ReturnBW;
2897
2898 v->ImmediateFlipSupported = true;
2899 if (v->total_dcn_read_bw_with_flip > v->ReturnBW) {
2900 #ifdef __DML_VBA_DEBUG__
2901 dml_print("DML::%s: total_dcn_read_bw_with_flip %f (bw w/ flip too high!)\n", __func__, v->total_dcn_read_bw_with_flip);
2902 #endif
2903 v->ImmediateFlipSupported = false;
2904 v->total_dcn_read_bw_with_flip = MaxTotalRDBandwidth;
2905 }
2906 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2907 if (v->ImmediateFlipSupportedForPipe[k] == false) {
2908 #ifdef __DML_VBA_DEBUG__
2909 dml_print("DML::%s: Pipe %0d not supporting iflip\n", __func__, k);
2910 #endif
2911 v->ImmediateFlipSupported = false;
2912 }
2913 }
2914 } else {
2915 v->ImmediateFlipSupported = false;
2916 }
2917
2918 v->PrefetchAndImmediateFlipSupported =
2919 (v->PrefetchModeSupported == true && ((!v->ImmediateFlipSupport && !v->HostVMEnable
2920 && v->ImmediateFlipRequirement[0] != dm_immediate_flip_required) ||
2921 v->ImmediateFlipSupported)) ? true : false;
2922 #ifdef __DML_VBA_DEBUG__
2923 dml_print("DML::%s: PrefetchModeSupported %d\n", __func__, v->PrefetchModeSupported);
2924 dml_print("DML::%s: ImmediateFlipRequirement %d\n", __func__, v->ImmediateFlipRequirement == dm_immediate_flip_required);
2925 dml_print("DML::%s: ImmediateFlipSupported %d\n", __func__, v->ImmediateFlipSupported);
2926 dml_print("DML::%s: ImmediateFlipSupport %d\n", __func__, v->ImmediateFlipSupport);
2927 dml_print("DML::%s: HostVMEnable %d\n", __func__, v->HostVMEnable);
2928 dml_print("DML::%s: PrefetchAndImmediateFlipSupported %d\n", __func__, v->PrefetchAndImmediateFlipSupported);
2929 #endif
2930 dml_print("DML::%s: Done loop: Vstartup=%d, Max Vstartup is %d\n", __func__, v->VStartupLines, v->MaximumMaxVStartupLines);
2931
2932 v->VStartupLines = v->VStartupLines + 1;
2933 } while (!v->PrefetchAndImmediateFlipSupported && v->VStartupLines <= v->MaximumMaxVStartupLines);
2934 ASSERT(v->PrefetchAndImmediateFlipSupported);
2935
2936 // Unbounded Request Enabled
2937 CalculateUnboundedRequestAndCompressedBufferSize(
2938 v->DETBufferSizeInKByte[0],
2939 v->ConfigReturnBufferSizeInKByte,
2940 v->UseUnboundedRequesting,
2941 v->TotalActiveDPP,
2942 NoChromaPlanes,
2943 v->MaxNumDPP,
2944 v->CompressedBufferSegmentSizeInkByte,
2945 v->Output,
2946 &v->UnboundedRequestEnabled,
2947 &v->CompressedBufferSizeInkByte);
2948
2949 //Watermarks and NB P-State/DRAM Clock Change Support
2950 {
2951 enum clock_change_support DRAMClockChangeSupport; // dummy
2952
2953 CalculateWatermarksAndDRAMSpeedChangeSupport(
2954 mode_lib,
2955 PrefetchMode,
2956 v->DCFCLK,
2957 v->ReturnBW,
2958 v->UrgentLatency,
2959 v->UrgentExtraLatency,
2960 v->SOCCLK,
2961 v->DCFCLKDeepSleep,
2962 v->DETBufferSizeY,
2963 v->DETBufferSizeC,
2964 v->SwathHeightY,
2965 v->SwathHeightC,
2966 v->SwathWidthY,
2967 v->SwathWidthC,
2968 v->DPPPerPlane,
2969 v->BytePerPixelDETY,
2970 v->BytePerPixelDETC,
2971 v->UnboundedRequestEnabled,
2972 v->CompressedBufferSizeInkByte,
2973 &DRAMClockChangeSupport,
2974 &v->StutterExitWatermark,
2975 &v->StutterEnterPlusExitWatermark,
2976 &v->Z8StutterExitWatermark,
2977 &v->Z8StutterEnterPlusExitWatermark);
2978
2979 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2980 if (v->WritebackEnable[k] == true) {
2981 v->WritebackAllowDRAMClockChangeEndPosition[k] = dml_max(
2982 0,
2983 v->VStartup[k] * v->HTotal[k] / v->PixelClock[k] - v->WritebackDRAMClockChangeWatermark);
2984 } else {
2985 v->WritebackAllowDRAMClockChangeEndPosition[k] = 0;
2986 }
2987 }
2988 }
2989
2990 //Display Pipeline Delivery Time in Prefetch, Groups
2991 CalculatePixelDeliveryTimes(
2992 v->NumberOfActivePlanes,
2993 v->VRatio,
2994 v->VRatioChroma,
2995 v->VRatioPrefetchY,
2996 v->VRatioPrefetchC,
2997 v->swath_width_luma_ub,
2998 v->swath_width_chroma_ub,
2999 v->DPPPerPlane,
3000 v->HRatio,
3001 v->HRatioChroma,
3002 v->PixelClock,
3003 v->PSCL_THROUGHPUT_LUMA,
3004 v->PSCL_THROUGHPUT_CHROMA,
3005 v->DPPCLK,
3006 v->BytePerPixelC,
3007 v->SourceScan,
3008 v->NumberOfCursors,
3009 v->CursorWidth,
3010 v->CursorBPP,
3011 v->BlockWidth256BytesY,
3012 v->BlockHeight256BytesY,
3013 v->BlockWidth256BytesC,
3014 v->BlockHeight256BytesC,
3015 v->DisplayPipeLineDeliveryTimeLuma,
3016 v->DisplayPipeLineDeliveryTimeChroma,
3017 v->DisplayPipeLineDeliveryTimeLumaPrefetch,
3018 v->DisplayPipeLineDeliveryTimeChromaPrefetch,
3019 v->DisplayPipeRequestDeliveryTimeLuma,
3020 v->DisplayPipeRequestDeliveryTimeChroma,
3021 v->DisplayPipeRequestDeliveryTimeLumaPrefetch,
3022 v->DisplayPipeRequestDeliveryTimeChromaPrefetch,
3023 v->CursorRequestDeliveryTime,
3024 v->CursorRequestDeliveryTimePrefetch);
3025
3026 CalculateMetaAndPTETimes(
3027 v->NumberOfActivePlanes,
3028 v->GPUVMEnable,
3029 v->MetaChunkSize,
3030 v->MinMetaChunkSizeBytes,
3031 v->HTotal,
3032 v->VRatio,
3033 v->VRatioChroma,
3034 v->DestinationLinesToRequestRowInVBlank,
3035 v->DestinationLinesToRequestRowInImmediateFlip,
3036 v->DCCEnable,
3037 v->PixelClock,
3038 v->BytePerPixelY,
3039 v->BytePerPixelC,
3040 v->SourceScan,
3041 v->dpte_row_height,
3042 v->dpte_row_height_chroma,
3043 v->meta_row_width,
3044 v->meta_row_width_chroma,
3045 v->meta_row_height,
3046 v->meta_row_height_chroma,
3047 v->meta_req_width,
3048 v->meta_req_width_chroma,
3049 v->meta_req_height,
3050 v->meta_req_height_chroma,
3051 v->dpte_group_bytes,
3052 v->PTERequestSizeY,
3053 v->PTERequestSizeC,
3054 v->PixelPTEReqWidthY,
3055 v->PixelPTEReqHeightY,
3056 v->PixelPTEReqWidthC,
3057 v->PixelPTEReqHeightC,
3058 v->dpte_row_width_luma_ub,
3059 v->dpte_row_width_chroma_ub,
3060 v->DST_Y_PER_PTE_ROW_NOM_L,
3061 v->DST_Y_PER_PTE_ROW_NOM_C,
3062 v->DST_Y_PER_META_ROW_NOM_L,
3063 v->DST_Y_PER_META_ROW_NOM_C,
3064 v->TimePerMetaChunkNominal,
3065 v->TimePerChromaMetaChunkNominal,
3066 v->TimePerMetaChunkVBlank,
3067 v->TimePerChromaMetaChunkVBlank,
3068 v->TimePerMetaChunkFlip,
3069 v->TimePerChromaMetaChunkFlip,
3070 v->time_per_pte_group_nom_luma,
3071 v->time_per_pte_group_vblank_luma,
3072 v->time_per_pte_group_flip_luma,
3073 v->time_per_pte_group_nom_chroma,
3074 v->time_per_pte_group_vblank_chroma,
3075 v->time_per_pte_group_flip_chroma);
3076
3077 CalculateVMGroupAndRequestTimes(
3078 v->NumberOfActivePlanes,
3079 v->GPUVMEnable,
3080 v->GPUVMMaxPageTableLevels,
3081 v->HTotal,
3082 v->BytePerPixelC,
3083 v->DestinationLinesToRequestVMInVBlank,
3084 v->DestinationLinesToRequestVMInImmediateFlip,
3085 v->DCCEnable,
3086 v->PixelClock,
3087 v->dpte_row_width_luma_ub,
3088 v->dpte_row_width_chroma_ub,
3089 v->vm_group_bytes,
3090 v->dpde0_bytes_per_frame_ub_l,
3091 v->dpde0_bytes_per_frame_ub_c,
3092 v->meta_pte_bytes_per_frame_ub_l,
3093 v->meta_pte_bytes_per_frame_ub_c,
3094 v->TimePerVMGroupVBlank,
3095 v->TimePerVMGroupFlip,
3096 v->TimePerVMRequestVBlank,
3097 v->TimePerVMRequestFlip);
3098
3099 // Min TTUVBlank
3100 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3101 if (PrefetchMode == 0) {
3102 v->AllowDRAMClockChangeDuringVBlank[k] = true;
3103 v->AllowDRAMSelfRefreshDuringVBlank[k] = true;
3104 v->MinTTUVBlank[k] = dml_max(
3105 v->DRAMClockChangeWatermark,
3106 dml_max(v->StutterEnterPlusExitWatermark, v->UrgentWatermark));
3107 } else if (PrefetchMode == 1) {
3108 v->AllowDRAMClockChangeDuringVBlank[k] = false;
3109 v->AllowDRAMSelfRefreshDuringVBlank[k] = true;
3110 v->MinTTUVBlank[k] = dml_max(v->StutterEnterPlusExitWatermark, v->UrgentWatermark);
3111 } else {
3112 v->AllowDRAMClockChangeDuringVBlank[k] = false;
3113 v->AllowDRAMSelfRefreshDuringVBlank[k] = false;
3114 v->MinTTUVBlank[k] = v->UrgentWatermark;
3115 }
3116 if (!v->DynamicMetadataEnable[k])
3117 v->MinTTUVBlank[k] = v->TCalc + v->MinTTUVBlank[k];
3118 }
3119
3120 // DCC Configuration
3121 v->ActiveDPPs = 0;
3122 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3123 CalculateDCCConfiguration(v->DCCEnable[k], false, // We should always know the direction DCCProgrammingAssumesScanDirectionUnknown,
3124 v->SourcePixelFormat[k],
3125 v->SurfaceWidthY[k],
3126 v->SurfaceWidthC[k],
3127 v->SurfaceHeightY[k],
3128 v->SurfaceHeightC[k],
3129 v->DETBufferSizeInKByte[0] * 1024,
3130 v->BlockHeight256BytesY[k],
3131 v->BlockHeight256BytesC[k],
3132 v->SurfaceTiling[k],
3133 v->BytePerPixelY[k],
3134 v->BytePerPixelC[k],
3135 v->BytePerPixelDETY[k],
3136 v->BytePerPixelDETC[k],
3137 v->SourceScan[k],
3138 &v->DCCYMaxUncompressedBlock[k],
3139 &v->DCCCMaxUncompressedBlock[k],
3140 &v->DCCYMaxCompressedBlock[k],
3141 &v->DCCCMaxCompressedBlock[k],
3142 &v->DCCYIndependentBlock[k],
3143 &v->DCCCIndependentBlock[k]);
3144 }
3145
3146 // VStartup Adjustment
3147 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3148 bool isInterlaceTiming;
3149 double Tvstartup_margin = (v->MaxVStartupLines[k] - v->VStartup[k]) * v->HTotal[k] / v->PixelClock[k];
3150 #ifdef __DML_VBA_DEBUG__
3151 dml_print("DML::%s: k=%d, MinTTUVBlank = %f (before margin)\n", __func__, k, v->MinTTUVBlank[k]);
3152 #endif
3153
3154 v->MinTTUVBlank[k] = v->MinTTUVBlank[k] + Tvstartup_margin;
3155
3156 #ifdef __DML_VBA_DEBUG__
3157 dml_print("DML::%s: k=%d, Tvstartup_margin = %f\n", __func__, k, Tvstartup_margin);
3158 dml_print("DML::%s: k=%d, MaxVStartupLines = %d\n", __func__, k, v->MaxVStartupLines[k]);
3159 dml_print("DML::%s: k=%d, VStartup = %d\n", __func__, k, v->VStartup[k]);
3160 dml_print("DML::%s: k=%d, MinTTUVBlank = %f\n", __func__, k, v->MinTTUVBlank[k]);
3161 #endif
3162
3163 v->Tdmdl[k] = v->Tdmdl[k] + Tvstartup_margin;
3164 if (v->DynamicMetadataEnable[k] && v->DynamicMetadataVMEnabled) {
3165 v->Tdmdl_vm[k] = v->Tdmdl_vm[k] + Tvstartup_margin;
3166 }
3167
3168 isInterlaceTiming = (v->Interlace[k] && !v->ProgressiveToInterlaceUnitInOPP);
3169 v->VStartup[k] = (isInterlaceTiming ? (2 * v->MaxVStartupLines[k]) : v->MaxVStartupLines[k]);
3170 if (v->Interlace[k] && !v->ProgressiveToInterlaceUnitInOPP) {
3171 v->MIN_DST_Y_NEXT_START[k] = dml_floor((v->VTotal[k] - v->VFrontPorch[k] + v->VTotal[k] - v->VActive[k] - v->VStartup[k]) / 2.0, 1.0);
3172 } else {
3173 v->MIN_DST_Y_NEXT_START[k] = v->VTotal[k] - v->VFrontPorch[k] + v->VTotal[k] - v->VActive[k] - v->VStartup[k];
3174 }
3175 v->MIN_DST_Y_NEXT_START[k] += dml_floor(4.0 * v->TSetup[k] / ((double)v->HTotal[k] / v->PixelClock[k]), 1.0) / 4.0;
3176 if (((v->VUpdateOffsetPix[k] + v->VUpdateWidthPix[k] + v->VReadyOffsetPix[k]) / v->HTotal[k])
3177 <= (isInterlaceTiming ?
3178 dml_floor((v->VTotal[k] - v->VActive[k] - v->VFrontPorch[k] - v->VStartup[k]) / 2.0, 1.0) :
3179 (int) (v->VTotal[k] - v->VActive[k] - v->VFrontPorch[k] - v->VStartup[k]))) {
3180 v->VREADY_AT_OR_AFTER_VSYNC[k] = true;
3181 } else {
3182 v->VREADY_AT_OR_AFTER_VSYNC[k] = false;
3183 }
3184 #ifdef __DML_VBA_DEBUG__
3185 dml_print("DML::%s: k=%d, VStartup = %d (max)\n", __func__, k, v->VStartup[k]);
3186 dml_print("DML::%s: k=%d, VUpdateOffsetPix = %d\n", __func__, k, v->VUpdateOffsetPix[k]);
3187 dml_print("DML::%s: k=%d, VUpdateWidthPix = %d\n", __func__, k, v->VUpdateWidthPix[k]);
3188 dml_print("DML::%s: k=%d, VReadyOffsetPix = %d\n", __func__, k, v->VReadyOffsetPix[k]);
3189 dml_print("DML::%s: k=%d, HTotal = %d\n", __func__, k, v->HTotal[k]);
3190 dml_print("DML::%s: k=%d, VTotal = %d\n", __func__, k, v->VTotal[k]);
3191 dml_print("DML::%s: k=%d, VActive = %d\n", __func__, k, v->VActive[k]);
3192 dml_print("DML::%s: k=%d, VFrontPorch = %d\n", __func__, k, v->VFrontPorch[k]);
3193 dml_print("DML::%s: k=%d, VStartup = %d\n", __func__, k, v->VStartup[k]);
3194 dml_print("DML::%s: k=%d, MIN_DST_Y_NEXT_START = %f\n", __func__, k, v->MIN_DST_Y_NEXT_START[k]);
3195 dml_print("DML::%s: k=%d, VREADY_AT_OR_AFTER_VSYNC = %d\n", __func__, k, v->VREADY_AT_OR_AFTER_VSYNC[k]);
3196 #endif
3197 }
3198
3199 {
3200 //Maximum Bandwidth Used
3201 double TotalWRBandwidth = 0;
3202 double MaxPerPlaneVActiveWRBandwidth = 0;
3203 double WRBandwidth = 0;
3204
3205 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3206 if (v->WritebackEnable[k] == true && v->WritebackPixelFormat[k] == dm_444_32) {
3207 WRBandwidth = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
3208 / (v->HTotal[k] * v->WritebackSourceHeight[k] / v->PixelClock[k]) * 4;
3209 } else if (v->WritebackEnable[k] == true) {
3210 WRBandwidth = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
3211 / (v->HTotal[k] * v->WritebackSourceHeight[k] / v->PixelClock[k]) * 8;
3212 }
3213 TotalWRBandwidth = TotalWRBandwidth + WRBandwidth;
3214 MaxPerPlaneVActiveWRBandwidth = dml_max(MaxPerPlaneVActiveWRBandwidth, WRBandwidth);
3215 }
3216
3217 v->TotalDataReadBandwidth = 0;
3218 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3219 v->TotalDataReadBandwidth = v->TotalDataReadBandwidth + v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k];
3220 }
3221 }
3222 // Stutter Efficiency
3223 CalculateStutterEfficiency(
3224 mode_lib,
3225 v->CompressedBufferSizeInkByte,
3226 v->UnboundedRequestEnabled,
3227 v->ConfigReturnBufferSizeInKByte,
3228 v->MetaFIFOSizeInKEntries,
3229 v->ZeroSizeBufferEntries,
3230 v->NumberOfActivePlanes,
3231 v->ROBBufferSizeInKByte,
3232 v->TotalDataReadBandwidth,
3233 v->DCFCLK,
3234 v->ReturnBW,
3235 v->COMPBUF_RESERVED_SPACE_64B,
3236 v->COMPBUF_RESERVED_SPACE_ZS,
3237 v->SRExitTime,
3238 v->SRExitZ8Time,
3239 v->SynchronizedVBlank,
3240 v->StutterEnterPlusExitWatermark,
3241 v->Z8StutterEnterPlusExitWatermark,
3242 v->ProgressiveToInterlaceUnitInOPP,
3243 v->Interlace,
3244 v->MinTTUVBlank,
3245 v->DPPPerPlane,
3246 v->DETBufferSizeY,
3247 v->BytePerPixelY,
3248 v->BytePerPixelDETY,
3249 v->SwathWidthY,
3250 v->SwathHeightY,
3251 v->SwathHeightC,
3252 v->DCCRateLuma,
3253 v->DCCRateChroma,
3254 v->DCCFractionOfZeroSizeRequestsLuma,
3255 v->DCCFractionOfZeroSizeRequestsChroma,
3256 v->HTotal,
3257 v->VTotal,
3258 v->PixelClock,
3259 v->VRatio,
3260 v->SourceScan,
3261 v->BlockHeight256BytesY,
3262 v->BlockWidth256BytesY,
3263 v->BlockHeight256BytesC,
3264 v->BlockWidth256BytesC,
3265 v->DCCYMaxUncompressedBlock,
3266 v->DCCCMaxUncompressedBlock,
3267 v->VActive,
3268 v->DCCEnable,
3269 v->WritebackEnable,
3270 v->ReadBandwidthPlaneLuma,
3271 v->ReadBandwidthPlaneChroma,
3272 v->meta_row_bw,
3273 v->dpte_row_bw,
3274 &v->StutterEfficiencyNotIncludingVBlank,
3275 &v->StutterEfficiency,
3276 &v->NumberOfStutterBurstsPerFrame,
3277 &v->Z8StutterEfficiencyNotIncludingVBlank,
3278 &v->Z8StutterEfficiency,
3279 &v->Z8NumberOfStutterBurstsPerFrame,
3280 &v->StutterPeriod);
3281 }
3282
3283 static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib)
3284 {
3285 struct vba_vars_st *v = &mode_lib->vba;
3286 // Display Pipe Configuration
3287 double BytePerPixDETY[DC__NUM_DPP__MAX];
3288 double BytePerPixDETC[DC__NUM_DPP__MAX];
3289 int BytePerPixY[DC__NUM_DPP__MAX];
3290 int BytePerPixC[DC__NUM_DPP__MAX];
3291 int Read256BytesBlockHeightY[DC__NUM_DPP__MAX];
3292 int Read256BytesBlockHeightC[DC__NUM_DPP__MAX];
3293 int Read256BytesBlockWidthY[DC__NUM_DPP__MAX];
3294 int Read256BytesBlockWidthC[DC__NUM_DPP__MAX];
3295 double dummy1[DC__NUM_DPP__MAX];
3296 double dummy2[DC__NUM_DPP__MAX];
3297 double dummy3[DC__NUM_DPP__MAX];
3298 double dummy4[DC__NUM_DPP__MAX];
3299 int dummy5[DC__NUM_DPP__MAX];
3300 int dummy6[DC__NUM_DPP__MAX];
3301 bool dummy7[DC__NUM_DPP__MAX];
3302 bool dummysinglestring;
3303
3304 unsigned int k;
3305
3306 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3307
3308 CalculateBytePerPixelAnd256BBlockSizes(
3309 v->SourcePixelFormat[k],
3310 v->SurfaceTiling[k],
3311 &BytePerPixY[k],
3312 &BytePerPixC[k],
3313 &BytePerPixDETY[k],
3314 &BytePerPixDETC[k],
3315 &Read256BytesBlockHeightY[k],
3316 &Read256BytesBlockHeightC[k],
3317 &Read256BytesBlockWidthY[k],
3318 &Read256BytesBlockWidthC[k]);
3319 }
3320
3321 CalculateSwathAndDETConfiguration(
3322 false,
3323 v->NumberOfActivePlanes,
3324 v->DETBufferSizeInKByte[0],
3325 dummy1,
3326 dummy2,
3327 v->SourceScan,
3328 v->SourcePixelFormat,
3329 v->SurfaceTiling,
3330 v->ViewportWidth,
3331 v->ViewportHeight,
3332 v->SurfaceWidthY,
3333 v->SurfaceWidthC,
3334 v->SurfaceHeightY,
3335 v->SurfaceHeightC,
3336 Read256BytesBlockHeightY,
3337 Read256BytesBlockHeightC,
3338 Read256BytesBlockWidthY,
3339 Read256BytesBlockWidthC,
3340 v->ODMCombineEnabled,
3341 v->BlendingAndTiming,
3342 BytePerPixY,
3343 BytePerPixC,
3344 BytePerPixDETY,
3345 BytePerPixDETC,
3346 v->HActive,
3347 v->HRatio,
3348 v->HRatioChroma,
3349 v->DPPPerPlane,
3350 dummy5,
3351 dummy6,
3352 dummy3,
3353 dummy4,
3354 v->SwathHeightY,
3355 v->SwathHeightC,
3356 v->DETBufferSizeY,
3357 v->DETBufferSizeC,
3358 dummy7,
3359 &dummysinglestring);
3360 }
3361
3362 static bool CalculateBytePerPixelAnd256BBlockSizes(
3363 enum source_format_class SourcePixelFormat,
3364 enum dm_swizzle_mode SurfaceTiling,
3365 unsigned int *BytePerPixelY,
3366 unsigned int *BytePerPixelC,
3367 double *BytePerPixelDETY,
3368 double *BytePerPixelDETC,
3369 unsigned int *BlockHeight256BytesY,
3370 unsigned int *BlockHeight256BytesC,
3371 unsigned int *BlockWidth256BytesY,
3372 unsigned int *BlockWidth256BytesC)
3373 {
3374 if (SourcePixelFormat == dm_444_64) {
3375 *BytePerPixelDETY = 8;
3376 *BytePerPixelDETC = 0;
3377 *BytePerPixelY = 8;
3378 *BytePerPixelC = 0;
3379 } else if (SourcePixelFormat == dm_444_32 || SourcePixelFormat == dm_rgbe) {
3380 *BytePerPixelDETY = 4;
3381 *BytePerPixelDETC = 0;
3382 *BytePerPixelY = 4;
3383 *BytePerPixelC = 0;
3384 } else if (SourcePixelFormat == dm_444_16) {
3385 *BytePerPixelDETY = 2;
3386 *BytePerPixelDETC = 0;
3387 *BytePerPixelY = 2;
3388 *BytePerPixelC = 0;
3389 } else if (SourcePixelFormat == dm_444_8) {
3390 *BytePerPixelDETY = 1;
3391 *BytePerPixelDETC = 0;
3392 *BytePerPixelY = 1;
3393 *BytePerPixelC = 0;
3394 } else if (SourcePixelFormat == dm_rgbe_alpha) {
3395 *BytePerPixelDETY = 4;
3396 *BytePerPixelDETC = 1;
3397 *BytePerPixelY = 4;
3398 *BytePerPixelC = 1;
3399 } else if (SourcePixelFormat == dm_420_8) {
3400 *BytePerPixelDETY = 1;
3401 *BytePerPixelDETC = 2;
3402 *BytePerPixelY = 1;
3403 *BytePerPixelC = 2;
3404 } else if (SourcePixelFormat == dm_420_12) {
3405 *BytePerPixelDETY = 2;
3406 *BytePerPixelDETC = 4;
3407 *BytePerPixelY = 2;
3408 *BytePerPixelC = 4;
3409 } else {
3410 *BytePerPixelDETY = 4.0 / 3;
3411 *BytePerPixelDETC = 8.0 / 3;
3412 *BytePerPixelY = 2;
3413 *BytePerPixelC = 4;
3414 }
3415
3416 if ((SourcePixelFormat == dm_444_64 || SourcePixelFormat == dm_444_32 || SourcePixelFormat == dm_444_16 || SourcePixelFormat == dm_444_8 || SourcePixelFormat == dm_mono_16
3417 || SourcePixelFormat == dm_mono_8 || SourcePixelFormat == dm_rgbe)) {
3418 if (SurfaceTiling == dm_sw_linear) {
3419 *BlockHeight256BytesY = 1;
3420 } else if (SourcePixelFormat == dm_444_64) {
3421 *BlockHeight256BytesY = 4;
3422 } else if (SourcePixelFormat == dm_444_8) {
3423 *BlockHeight256BytesY = 16;
3424 } else {
3425 *BlockHeight256BytesY = 8;
3426 }
3427 *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
3428 *BlockHeight256BytesC = 0;
3429 *BlockWidth256BytesC = 0;
3430 } else {
3431 if (SurfaceTiling == dm_sw_linear) {
3432 *BlockHeight256BytesY = 1;
3433 *BlockHeight256BytesC = 1;
3434 } else if (SourcePixelFormat == dm_rgbe_alpha) {
3435 *BlockHeight256BytesY = 8;
3436 *BlockHeight256BytesC = 16;
3437 } else if (SourcePixelFormat == dm_420_8) {
3438 *BlockHeight256BytesY = 16;
3439 *BlockHeight256BytesC = 8;
3440 } else {
3441 *BlockHeight256BytesY = 8;
3442 *BlockHeight256BytesC = 8;
3443 }
3444 *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
3445 *BlockWidth256BytesC = 256U / *BytePerPixelC / *BlockHeight256BytesC;
3446 }
3447 return true;
3448 }
3449
3450 static double CalculateTWait(unsigned int PrefetchMode, double DRAMClockChangeLatency, double UrgentLatency, double SREnterPlusExitTime)
3451 {
3452 if (PrefetchMode == 0) {
3453 return dml_max(DRAMClockChangeLatency + UrgentLatency, dml_max(SREnterPlusExitTime, UrgentLatency));
3454 } else if (PrefetchMode == 1) {
3455 return dml_max(SREnterPlusExitTime, UrgentLatency);
3456 } else {
3457 return UrgentLatency;
3458 }
3459 }
3460
3461 double dml314_CalculateWriteBackDISPCLK(
3462 enum source_format_class WritebackPixelFormat,
3463 double PixelClock,
3464 double WritebackHRatio,
3465 double WritebackVRatio,
3466 unsigned int WritebackHTaps,
3467 unsigned int WritebackVTaps,
3468 long WritebackSourceWidth,
3469 long WritebackDestinationWidth,
3470 unsigned int HTotal,
3471 unsigned int WritebackLineBufferSize)
3472 {
3473 double DISPCLK_H, DISPCLK_V, DISPCLK_HB;
3474
3475 DISPCLK_H = PixelClock * dml_ceil(WritebackHTaps / 8.0, 1) / WritebackHRatio;
3476 DISPCLK_V = PixelClock * (WritebackVTaps * dml_ceil(WritebackDestinationWidth / 6.0, 1) + 8.0) / HTotal;
3477 DISPCLK_HB = PixelClock * WritebackVTaps * (WritebackDestinationWidth * WritebackVTaps - WritebackLineBufferSize / 57.0) / 6.0 / WritebackSourceWidth;
3478 return dml_max3(DISPCLK_H, DISPCLK_V, DISPCLK_HB);
3479 }
3480
3481 static double CalculateWriteBackDelay(
3482 enum source_format_class WritebackPixelFormat,
3483 double WritebackHRatio,
3484 double WritebackVRatio,
3485 unsigned int WritebackVTaps,
3486 int WritebackDestinationWidth,
3487 int WritebackDestinationHeight,
3488 int WritebackSourceHeight,
3489 unsigned int HTotal)
3490 {
3491 double CalculateWriteBackDelay;
3492 double Line_length;
3493 double Output_lines_last_notclamped;
3494 double WritebackVInit;
3495
3496 WritebackVInit = (WritebackVRatio + WritebackVTaps + 1) / 2;
3497 Line_length = dml_max((double) WritebackDestinationWidth, dml_ceil(WritebackDestinationWidth / 6.0, 1) * WritebackVTaps);
3498 Output_lines_last_notclamped = WritebackDestinationHeight - 1 - dml_ceil((WritebackSourceHeight - WritebackVInit) / WritebackVRatio, 1);
3499 if (Output_lines_last_notclamped < 0) {
3500 CalculateWriteBackDelay = 0;
3501 } else {
3502 CalculateWriteBackDelay = Output_lines_last_notclamped * Line_length + (HTotal - WritebackDestinationWidth) + 80;
3503 }
3504 return CalculateWriteBackDelay;
3505 }
3506
3507 static void CalculateVupdateAndDynamicMetadataParameters(
3508 int MaxInterDCNTileRepeaters,
3509 double DPPCLK,
3510 double DISPCLK,
3511 double DCFClkDeepSleep,
3512 double PixelClock,
3513 int HTotal,
3514 int VBlank,
3515 int DynamicMetadataTransmittedBytes,
3516 int DynamicMetadataLinesBeforeActiveRequired,
3517 int InterlaceEnable,
3518 bool ProgressiveToInterlaceUnitInOPP,
3519 double *TSetup,
3520 double *Tdmbf,
3521 double *Tdmec,
3522 double *Tdmsks,
3523 int *VUpdateOffsetPix,
3524 double *VUpdateWidthPix,
3525 double *VReadyOffsetPix)
3526 {
3527 double TotalRepeaterDelayTime;
3528
3529 TotalRepeaterDelayTime = MaxInterDCNTileRepeaters * (2 / DPPCLK + 3 / DISPCLK);
3530 *VUpdateWidthPix = dml_ceil((14.0 / DCFClkDeepSleep + 12.0 / DPPCLK + TotalRepeaterDelayTime) * PixelClock, 1.0);
3531 *VReadyOffsetPix = dml_ceil(dml_max(150.0 / DPPCLK, TotalRepeaterDelayTime + 20.0 / DCFClkDeepSleep + 10.0 / DPPCLK) * PixelClock, 1.0);
3532 *VUpdateOffsetPix = dml_ceil(HTotal / 4.0, 1);
3533 *TSetup = (*VUpdateOffsetPix + *VUpdateWidthPix + *VReadyOffsetPix) / PixelClock;
3534 *Tdmbf = DynamicMetadataTransmittedBytes / 4.0 / DISPCLK;
3535 *Tdmec = HTotal / PixelClock;
3536 if (DynamicMetadataLinesBeforeActiveRequired == 0) {
3537 *Tdmsks = VBlank * HTotal / PixelClock / 2.0;
3538 } else {
3539 *Tdmsks = DynamicMetadataLinesBeforeActiveRequired * HTotal / PixelClock;
3540 }
3541 if (InterlaceEnable == 1 && ProgressiveToInterlaceUnitInOPP == false) {
3542 *Tdmsks = *Tdmsks / 2;
3543 }
3544 #ifdef __DML_VBA_DEBUG__
3545 dml_print("DML::%s: VUpdateWidthPix = %d\n", __func__, *VUpdateWidthPix);
3546 dml_print("DML::%s: VReadyOffsetPix = %d\n", __func__, *VReadyOffsetPix);
3547 dml_print("DML::%s: VUpdateOffsetPix = %d\n", __func__, *VUpdateOffsetPix);
3548 #endif
3549 }
3550
3551 static void CalculateRowBandwidth(
3552 bool GPUVMEnable,
3553 enum source_format_class SourcePixelFormat,
3554 double VRatio,
3555 double VRatioChroma,
3556 bool DCCEnable,
3557 double LineTime,
3558 unsigned int MetaRowByteLuma,
3559 unsigned int MetaRowByteChroma,
3560 unsigned int meta_row_height_luma,
3561 unsigned int meta_row_height_chroma,
3562 unsigned int PixelPTEBytesPerRowLuma,
3563 unsigned int PixelPTEBytesPerRowChroma,
3564 unsigned int dpte_row_height_luma,
3565 unsigned int dpte_row_height_chroma,
3566 double *meta_row_bw,
3567 double *dpte_row_bw)
3568 {
3569 if (DCCEnable != true) {
3570 *meta_row_bw = 0;
3571 } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 || SourcePixelFormat == dm_rgbe_alpha) {
3572 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime) + VRatioChroma * MetaRowByteChroma / (meta_row_height_chroma * LineTime);
3573 } else {
3574 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime);
3575 }
3576
3577 if (GPUVMEnable != true) {
3578 *dpte_row_bw = 0;
3579 } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 || SourcePixelFormat == dm_rgbe_alpha) {
3580 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime)
3581 + VRatioChroma * PixelPTEBytesPerRowChroma / (dpte_row_height_chroma * LineTime);
3582 } else {
3583 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime);
3584 }
3585 }
3586
3587 static void CalculateFlipSchedule(
3588 struct display_mode_lib *mode_lib,
3589 unsigned int k,
3590 double HostVMInefficiencyFactor,
3591 double UrgentExtraLatency,
3592 double UrgentLatency,
3593 double PDEAndMetaPTEBytesPerFrame,
3594 double MetaRowBytes,
3595 double DPTEBytesPerRow)
3596 {
3597 struct vba_vars_st *v = &mode_lib->vba;
3598 double min_row_time = 0.0;
3599 unsigned int HostVMDynamicLevelsTrips;
3600 double TimeForFetchingMetaPTEImmediateFlip;
3601 double TimeForFetchingRowInVBlankImmediateFlip;
3602 double ImmediateFlipBW = 1.0;
3603 double LineTime = v->HTotal[k] / v->PixelClock[k];
3604
3605 if (v->GPUVMEnable == true && v->HostVMEnable == true) {
3606 HostVMDynamicLevelsTrips = v->HostVMMaxNonCachedPageTableLevels;
3607 } else {
3608 HostVMDynamicLevelsTrips = 0;
3609 }
3610
3611 if (v->GPUVMEnable == true || v->DCCEnable[k] == true) {
3612 ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + DPTEBytesPerRow) * v->BandwidthAvailableForImmediateFlip / v->TotImmediateFlipBytes;
3613 }
3614
3615 if (v->GPUVMEnable == true) {
3616 TimeForFetchingMetaPTEImmediateFlip = dml_max3(
3617 v->Tno_bw[k] + PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / ImmediateFlipBW,
3618 UrgentExtraLatency + UrgentLatency * (v->GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1),
3619 LineTime / 4.0);
3620 } else {
3621 TimeForFetchingMetaPTEImmediateFlip = 0;
3622 }
3623
3624 v->DestinationLinesToRequestVMInImmediateFlip[k] = dml_ceil(4.0 * (TimeForFetchingMetaPTEImmediateFlip / LineTime), 1) / 4.0;
3625 if ((v->GPUVMEnable == true || v->DCCEnable[k] == true)) {
3626 TimeForFetchingRowInVBlankImmediateFlip = dml_max3(
3627 (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / ImmediateFlipBW,
3628 UrgentLatency * (HostVMDynamicLevelsTrips + 1),
3629 LineTime / 4);
3630 } else {
3631 TimeForFetchingRowInVBlankImmediateFlip = 0;
3632 }
3633
3634 v->DestinationLinesToRequestRowInImmediateFlip[k] = dml_ceil(4.0 * (TimeForFetchingRowInVBlankImmediateFlip / LineTime), 1) / 4.0;
3635
3636 if (v->GPUVMEnable == true) {
3637 v->final_flip_bw[k] = dml_max(
3638 PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / (v->DestinationLinesToRequestVMInImmediateFlip[k] * LineTime),
3639 (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (v->DestinationLinesToRequestRowInImmediateFlip[k] * LineTime));
3640 } else if ((v->GPUVMEnable == true || v->DCCEnable[k] == true)) {
3641 v->final_flip_bw[k] = (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (v->DestinationLinesToRequestRowInImmediateFlip[k] * LineTime);
3642 } else {
3643 v->final_flip_bw[k] = 0;
3644 }
3645
3646 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_rgbe_alpha) {
3647 if (v->GPUVMEnable == true && v->DCCEnable[k] != true) {
3648 min_row_time = dml_min(v->dpte_row_height[k] * LineTime / v->VRatio[k], v->dpte_row_height_chroma[k] * LineTime / v->VRatioChroma[k]);
3649 } else if (v->GPUVMEnable != true && v->DCCEnable[k] == true) {
3650 min_row_time = dml_min(v->meta_row_height[k] * LineTime / v->VRatio[k], v->meta_row_height_chroma[k] * LineTime / v->VRatioChroma[k]);
3651 } else {
3652 min_row_time = dml_min4(
3653 v->dpte_row_height[k] * LineTime / v->VRatio[k],
3654 v->meta_row_height[k] * LineTime / v->VRatio[k],
3655 v->dpte_row_height_chroma[k] * LineTime / v->VRatioChroma[k],
3656 v->meta_row_height_chroma[k] * LineTime / v->VRatioChroma[k]);
3657 }
3658 } else {
3659 if (v->GPUVMEnable == true && v->DCCEnable[k] != true) {
3660 min_row_time = v->dpte_row_height[k] * LineTime / v->VRatio[k];
3661 } else if (v->GPUVMEnable != true && v->DCCEnable[k] == true) {
3662 min_row_time = v->meta_row_height[k] * LineTime / v->VRatio[k];
3663 } else {
3664 min_row_time = dml_min(v->dpte_row_height[k] * LineTime / v->VRatio[k], v->meta_row_height[k] * LineTime / v->VRatio[k]);
3665 }
3666 }
3667
3668 if (v->DestinationLinesToRequestVMInImmediateFlip[k] >= 32 || v->DestinationLinesToRequestRowInImmediateFlip[k] >= 16
3669 || TimeForFetchingMetaPTEImmediateFlip + 2 * TimeForFetchingRowInVBlankImmediateFlip > min_row_time) {
3670 v->ImmediateFlipSupportedForPipe[k] = false;
3671 } else {
3672 v->ImmediateFlipSupportedForPipe[k] = true;
3673 }
3674
3675 #ifdef __DML_VBA_DEBUG__
3676 dml_print("DML::%s: DestinationLinesToRequestVMInImmediateFlip = %f\n", __func__, v->DestinationLinesToRequestVMInImmediateFlip[k]);
3677 dml_print("DML::%s: DestinationLinesToRequestRowInImmediateFlip = %f\n", __func__, v->DestinationLinesToRequestRowInImmediateFlip[k]);
3678 dml_print("DML::%s: TimeForFetchingMetaPTEImmediateFlip = %f\n", __func__, TimeForFetchingMetaPTEImmediateFlip);
3679 dml_print("DML::%s: TimeForFetchingRowInVBlankImmediateFlip = %f\n", __func__, TimeForFetchingRowInVBlankImmediateFlip);
3680 dml_print("DML::%s: min_row_time = %f\n", __func__, min_row_time);
3681 dml_print("DML::%s: ImmediateFlipSupportedForPipe = %d\n", __func__, v->ImmediateFlipSupportedForPipe[k]);
3682 #endif
3683
3684 }
3685
3686 static double TruncToValidBPP(
3687 double LinkBitRate,
3688 int Lanes,
3689 int HTotal,
3690 int HActive,
3691 double PixelClock,
3692 double DesiredBPP,
3693 bool DSCEnable,
3694 enum output_encoder_class Output,
3695 enum output_format_class Format,
3696 unsigned int DSCInputBitPerComponent,
3697 int DSCSlices,
3698 int AudioRate,
3699 int AudioLayout,
3700 enum odm_combine_mode ODMCombine)
3701 {
3702 double MaxLinkBPP;
3703 int MinDSCBPP;
3704 double MaxDSCBPP;
3705 int NonDSCBPP0;
3706 int NonDSCBPP1;
3707 int NonDSCBPP2;
3708
3709 if (Format == dm_420) {
3710 NonDSCBPP0 = 12;
3711 NonDSCBPP1 = 15;
3712 NonDSCBPP2 = 18;
3713 MinDSCBPP = 6;
3714 MaxDSCBPP = 1.5 * DSCInputBitPerComponent - 1.0 / 16;
3715 } else if (Format == dm_444) {
3716 NonDSCBPP0 = 24;
3717 NonDSCBPP1 = 30;
3718 NonDSCBPP2 = 36;
3719 MinDSCBPP = 8;
3720 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16;
3721 } else {
3722
3723 NonDSCBPP0 = 16;
3724 NonDSCBPP1 = 20;
3725 NonDSCBPP2 = 24;
3726
3727 if (Format == dm_n422) {
3728 MinDSCBPP = 7;
3729 MaxDSCBPP = 2 * DSCInputBitPerComponent - 1.0 / 16.0;
3730 } else {
3731 MinDSCBPP = 8;
3732 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16.0;
3733 }
3734 }
3735
3736 if (DSCEnable && Output == dm_dp) {
3737 MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock * (1 - 2.4 / 100);
3738 } else {
3739 MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock;
3740 }
3741
3742 if (ODMCombine == dm_odm_combine_mode_4to1 && MaxLinkBPP > 16) {
3743 MaxLinkBPP = 16;
3744 } else if (ODMCombine == dm_odm_combine_mode_2to1 && MaxLinkBPP > 32) {
3745 MaxLinkBPP = 32;
3746 }
3747
3748 if (DesiredBPP == 0) {
3749 if (DSCEnable) {
3750 if (MaxLinkBPP < MinDSCBPP) {
3751 return BPP_INVALID;
3752 } else if (MaxLinkBPP >= MaxDSCBPP) {
3753 return MaxDSCBPP;
3754 } else {
3755 return dml_floor(16.0 * MaxLinkBPP, 1.0) / 16.0;
3756 }
3757 } else {
3758 if (MaxLinkBPP >= NonDSCBPP2) {
3759 return NonDSCBPP2;
3760 } else if (MaxLinkBPP >= NonDSCBPP1) {
3761 return NonDSCBPP1;
3762 } else if (MaxLinkBPP >= NonDSCBPP0) {
3763 return 16.0;
3764 } else {
3765 return BPP_INVALID;
3766 }
3767 }
3768 } else {
3769 if (!((DSCEnable == false && (DesiredBPP == NonDSCBPP2 || DesiredBPP == NonDSCBPP1 || DesiredBPP <= NonDSCBPP0))
3770 || (DSCEnable && DesiredBPP >= MinDSCBPP && DesiredBPP <= MaxDSCBPP))) {
3771 return BPP_INVALID;
3772 } else {
3773 return DesiredBPP;
3774 }
3775 }
3776 }
3777
3778 static noinline void CalculatePrefetchSchedulePerPlane(
3779 struct display_mode_lib *mode_lib,
3780 double HostVMInefficiencyFactor,
3781 int i,
3782 unsigned int j,
3783 unsigned int k)
3784 {
3785 struct vba_vars_st *v = &mode_lib->vba;
3786 Pipe myPipe;
3787
3788 myPipe.DPPCLK = v->RequiredDPPCLK[i][j][k];
3789 myPipe.DISPCLK = v->RequiredDISPCLK[i][j];
3790 myPipe.PixelClock = v->PixelClock[k];
3791 myPipe.DCFCLKDeepSleep = v->ProjectedDCFCLKDeepSleep[i][j];
3792 myPipe.DPPPerPlane = v->NoOfDPP[i][j][k];
3793 myPipe.ScalerEnabled = v->ScalerEnabled[k];
3794 myPipe.VRatio = mode_lib->vba.VRatio[k];
3795 myPipe.VRatioChroma = mode_lib->vba.VRatioChroma[k];
3796
3797 myPipe.SourceScan = v->SourceScan[k];
3798 myPipe.BlockWidth256BytesY = v->Read256BlockWidthY[k];
3799 myPipe.BlockHeight256BytesY = v->Read256BlockHeightY[k];
3800 myPipe.BlockWidth256BytesC = v->Read256BlockWidthC[k];
3801 myPipe.BlockHeight256BytesC = v->Read256BlockHeightC[k];
3802 myPipe.InterlaceEnable = v->Interlace[k];
3803 myPipe.NumberOfCursors = v->NumberOfCursors[k];
3804 myPipe.VBlank = v->VTotal[k] - v->VActive[k];
3805 myPipe.HTotal = v->HTotal[k];
3806 myPipe.DCCEnable = v->DCCEnable[k];
3807 myPipe.ODMCombineIsEnabled = v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1
3808 || v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1;
3809 myPipe.SourcePixelFormat = v->SourcePixelFormat[k];
3810 myPipe.BytePerPixelY = v->BytePerPixelY[k];
3811 myPipe.BytePerPixelC = v->BytePerPixelC[k];
3812 myPipe.ProgressiveToInterlaceUnitInOPP = v->ProgressiveToInterlaceUnitInOPP;
3813 v->NoTimeForPrefetch[i][j][k] = CalculatePrefetchSchedule(
3814 mode_lib,
3815 HostVMInefficiencyFactor,
3816 &myPipe,
3817 v->DSCDelayPerState[i][k],
3818 v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater,
3819 v->DPPCLKDelaySCL,
3820 v->DPPCLKDelaySCLLBOnly,
3821 v->DPPCLKDelayCNVCCursor,
3822 v->DISPCLKDelaySubtotal,
3823 v->SwathWidthYThisState[k] / v->HRatio[k],
3824 v->OutputFormat[k],
3825 v->MaxInterDCNTileRepeaters,
3826 dml_min(v->MaxVStartup, v->MaximumVStartup[i][j][k]),
3827 v->MaximumVStartup[i][j][k],
3828 v->GPUVMMaxPageTableLevels,
3829 v->GPUVMEnable,
3830 v->HostVMEnable,
3831 v->HostVMMaxNonCachedPageTableLevels,
3832 v->HostVMMinPageSize,
3833 v->DynamicMetadataEnable[k],
3834 v->DynamicMetadataVMEnabled,
3835 v->DynamicMetadataLinesBeforeActiveRequired[k],
3836 v->DynamicMetadataTransmittedBytes[k],
3837 v->UrgLatency[i],
3838 v->ExtraLatency,
3839 v->TimeCalc,
3840 v->PDEAndMetaPTEBytesPerFrame[i][j][k],
3841 v->MetaRowBytes[i][j][k],
3842 v->DPTEBytesPerRow[i][j][k],
3843 v->PrefetchLinesY[i][j][k],
3844 v->SwathWidthYThisState[k],
3845 v->PrefillY[k],
3846 v->MaxNumSwY[k],
3847 v->PrefetchLinesC[i][j][k],
3848 v->SwathWidthCThisState[k],
3849 v->PrefillC[k],
3850 v->MaxNumSwC[k],
3851 v->swath_width_luma_ub_this_state[k],
3852 v->swath_width_chroma_ub_this_state[k],
3853 v->SwathHeightYThisState[k],
3854 v->SwathHeightCThisState[k],
3855 v->TWait,
3856 &v->DSTXAfterScaler[k],
3857 &v->DSTYAfterScaler[k],
3858 &v->LineTimesForPrefetch[k],
3859 &v->PrefetchBW[k],
3860 &v->LinesForMetaPTE[k],
3861 &v->LinesForMetaAndDPTERow[k],
3862 &v->VRatioPreY[i][j][k],
3863 &v->VRatioPreC[i][j][k],
3864 &v->RequiredPrefetchPixelDataBWLuma[i][j][k],
3865 &v->RequiredPrefetchPixelDataBWChroma[i][j][k],
3866 &v->NoTimeForDynamicMetadata[i][j][k],
3867 &v->Tno_bw[k],
3868 &v->prefetch_vmrow_bw[k],
3869 &v->dummy7[k],
3870 &v->dummy8[k],
3871 &v->dummy13[k],
3872 &v->VUpdateOffsetPix[k],
3873 &v->VUpdateWidthPix[k],
3874 &v->VReadyOffsetPix[k]);
3875 }
3876
3877 void dml314_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib)
3878 {
3879 struct vba_vars_st *v = &mode_lib->vba;
3880
3881 int i, j;
3882 unsigned int k, m;
3883 int ReorderingBytes;
3884 int MinPrefetchMode = 0, MaxPrefetchMode = 2;
3885 bool NoChroma = true;
3886 bool EnoughWritebackUnits = true;
3887 bool P2IWith420 = false;
3888 bool DSCOnlyIfNecessaryWithBPP = false;
3889 bool DSC422NativeNotSupported = false;
3890 double MaxTotalVActiveRDBandwidth;
3891 bool ViewportExceedsSurface = false;
3892 bool FMTBufferExceeded = false;
3893
3894 /*MODE SUPPORT, VOLTAGE STATE AND SOC CONFIGURATION*/
3895
3896 CalculateMinAndMaxPrefetchMode(
3897 mode_lib->vba.AllowDRAMSelfRefreshOrDRAMClockChangeInVblank,
3898 &MinPrefetchMode, &MaxPrefetchMode);
3899
3900 /*Scale Ratio, taps Support Check*/
3901
3902 v->ScaleRatioAndTapsSupport = true;
3903 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3904 if (v->ScalerEnabled[k] == false
3905 && ((v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32
3906 && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_mono_16
3907 && v->SourcePixelFormat[k] != dm_mono_8 && v->SourcePixelFormat[k] != dm_rgbe
3908 && v->SourcePixelFormat[k] != dm_rgbe_alpha) || v->HRatio[k] != 1.0 || v->htaps[k] != 1.0
3909 || v->VRatio[k] != 1.0 || v->vtaps[k] != 1.0)) {
3910 v->ScaleRatioAndTapsSupport = false;
3911 } else if (v->vtaps[k] < 1.0 || v->vtaps[k] > 8.0 || v->htaps[k] < 1.0 || v->htaps[k] > 8.0
3912 || (v->htaps[k] > 1.0 && (v->htaps[k] % 2) == 1) || v->HRatio[k] > v->MaxHSCLRatio
3913 || v->VRatio[k] > v->MaxVSCLRatio || v->HRatio[k] > v->htaps[k]
3914 || v->VRatio[k] > v->vtaps[k]
3915 || (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32
3916 && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_mono_16
3917 && v->SourcePixelFormat[k] != dm_mono_8 && v->SourcePixelFormat[k] != dm_rgbe
3918 && (v->VTAPsChroma[k] < 1 || v->VTAPsChroma[k] > 8 || v->HTAPsChroma[k] < 1
3919 || v->HTAPsChroma[k] > 8 || (v->HTAPsChroma[k] > 1 && v->HTAPsChroma[k] % 2 == 1)
3920 || v->HRatioChroma[k] > v->MaxHSCLRatio
3921 || v->VRatioChroma[k] > v->MaxVSCLRatio
3922 || v->HRatioChroma[k] > v->HTAPsChroma[k]
3923 || v->VRatioChroma[k] > v->VTAPsChroma[k]))) {
3924 v->ScaleRatioAndTapsSupport = false;
3925 }
3926 }
3927 /*Source Format, Pixel Format and Scan Support Check*/
3928
3929 v->SourceFormatPixelAndScanSupport = true;
3930 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3931 if (v->SurfaceTiling[k] == dm_sw_linear && (!(v->SourceScan[k] != dm_vert) || v->DCCEnable[k] == true)) {
3932 v->SourceFormatPixelAndScanSupport = false;
3933 }
3934 }
3935 /*Bandwidth Support Check*/
3936
3937 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3938 CalculateBytePerPixelAnd256BBlockSizes(
3939 v->SourcePixelFormat[k],
3940 v->SurfaceTiling[k],
3941 &v->BytePerPixelY[k],
3942 &v->BytePerPixelC[k],
3943 &v->BytePerPixelInDETY[k],
3944 &v->BytePerPixelInDETC[k],
3945 &v->Read256BlockHeightY[k],
3946 &v->Read256BlockHeightC[k],
3947 &v->Read256BlockWidthY[k],
3948 &v->Read256BlockWidthC[k]);
3949 }
3950 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3951 if (v->SourceScan[k] != dm_vert) {
3952 v->SwathWidthYSingleDPP[k] = v->ViewportWidth[k];
3953 v->SwathWidthCSingleDPP[k] = v->ViewportWidthChroma[k];
3954 } else {
3955 v->SwathWidthYSingleDPP[k] = v->ViewportHeight[k];
3956 v->SwathWidthCSingleDPP[k] = v->ViewportHeightChroma[k];
3957 }
3958 }
3959 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3960 v->ReadBandwidthLuma[k] = v->SwathWidthYSingleDPP[k] * dml_ceil(v->BytePerPixelInDETY[k], 1.0)
3961 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k];
3962 v->ReadBandwidthChroma[k] = v->SwathWidthYSingleDPP[k] / 2 * dml_ceil(v->BytePerPixelInDETC[k], 2.0)
3963 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k] / 2.0;
3964 }
3965 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3966 if (v->WritebackEnable[k] == true && v->WritebackPixelFormat[k] == dm_444_64) {
3967 v->WriteBandwidth[k] = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
3968 / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 8.0;
3969 } else if (v->WritebackEnable[k] == true) {
3970 v->WriteBandwidth[k] = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
3971 / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 4.0;
3972 } else {
3973 v->WriteBandwidth[k] = 0.0;
3974 }
3975 }
3976
3977 /*Writeback Latency support check*/
3978
3979 v->WritebackLatencySupport = true;
3980 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3981 if (v->WritebackEnable[k] == true && (v->WriteBandwidth[k] > v->WritebackInterfaceBufferSize * 1024 / v->WritebackLatency)) {
3982 v->WritebackLatencySupport = false;
3983 }
3984 }
3985
3986 /*Writeback Mode Support Check*/
3987
3988 v->TotalNumberOfActiveWriteback = 0;
3989 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3990 if (v->WritebackEnable[k] == true) {
3991 v->TotalNumberOfActiveWriteback = v->TotalNumberOfActiveWriteback + 1;
3992 }
3993 }
3994
3995 if (v->TotalNumberOfActiveWriteback > v->MaxNumWriteback) {
3996 EnoughWritebackUnits = false;
3997 }
3998
3999 /*Writeback Scale Ratio and Taps Support Check*/
4000
4001 v->WritebackScaleRatioAndTapsSupport = true;
4002 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4003 if (v->WritebackEnable[k] == true) {
4004 if (v->WritebackHRatio[k] > v->WritebackMaxHSCLRatio || v->WritebackVRatio[k] > v->WritebackMaxVSCLRatio
4005 || v->WritebackHRatio[k] < v->WritebackMinHSCLRatio
4006 || v->WritebackVRatio[k] < v->WritebackMinVSCLRatio
4007 || v->WritebackHTaps[k] > v->WritebackMaxHSCLTaps
4008 || v->WritebackVTaps[k] > v->WritebackMaxVSCLTaps
4009 || v->WritebackHRatio[k] > v->WritebackHTaps[k] || v->WritebackVRatio[k] > v->WritebackVTaps[k]
4010 || (v->WritebackHTaps[k] > 2.0 && ((v->WritebackHTaps[k] % 2) == 1))) {
4011 v->WritebackScaleRatioAndTapsSupport = false;
4012 }
4013 if (2.0 * v->WritebackDestinationWidth[k] * (v->WritebackVTaps[k] - 1) * 57 > v->WritebackLineBufferSize) {
4014 v->WritebackScaleRatioAndTapsSupport = false;
4015 }
4016 }
4017 }
4018 /*Maximum DISPCLK/DPPCLK Support check*/
4019
4020 v->WritebackRequiredDISPCLK = 0.0;
4021 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4022 if (v->WritebackEnable[k] == true) {
4023 v->WritebackRequiredDISPCLK = dml_max(
4024 v->WritebackRequiredDISPCLK,
4025 dml314_CalculateWriteBackDISPCLK(
4026 v->WritebackPixelFormat[k],
4027 v->PixelClock[k],
4028 v->WritebackHRatio[k],
4029 v->WritebackVRatio[k],
4030 v->WritebackHTaps[k],
4031 v->WritebackVTaps[k],
4032 v->WritebackSourceWidth[k],
4033 v->WritebackDestinationWidth[k],
4034 v->HTotal[k],
4035 v->WritebackLineBufferSize));
4036 }
4037 }
4038 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4039 if (v->HRatio[k] > 1.0) {
4040 v->PSCL_FACTOR[k] = dml_min(
4041 v->MaxDCHUBToPSCLThroughput,
4042 v->MaxPSCLToLBThroughput * v->HRatio[k] / dml_ceil(v->htaps[k] / 6.0, 1.0));
4043 } else {
4044 v->PSCL_FACTOR[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput);
4045 }
4046 if (v->BytePerPixelC[k] == 0.0) {
4047 v->PSCL_FACTOR_CHROMA[k] = 0.0;
4048 v->MinDPPCLKUsingSingleDPP[k] = v->PixelClock[k]
4049 * dml_max3(
4050 v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]),
4051 v->HRatio[k] * v->VRatio[k] / v->PSCL_FACTOR[k],
4052 1.0);
4053 if ((v->htaps[k] > 6.0 || v->vtaps[k] > 6.0) && v->MinDPPCLKUsingSingleDPP[k] < 2.0 * v->PixelClock[k]) {
4054 v->MinDPPCLKUsingSingleDPP[k] = 2.0 * v->PixelClock[k];
4055 }
4056 } else {
4057 if (v->HRatioChroma[k] > 1.0) {
4058 v->PSCL_FACTOR_CHROMA[k] = dml_min(
4059 v->MaxDCHUBToPSCLThroughput,
4060 v->MaxPSCLToLBThroughput * v->HRatioChroma[k] / dml_ceil(v->HTAPsChroma[k] / 6.0, 1.0));
4061 } else {
4062 v->PSCL_FACTOR_CHROMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput);
4063 }
4064 v->MinDPPCLKUsingSingleDPP[k] = v->PixelClock[k]
4065 * dml_max5(
4066 v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]),
4067 v->HRatio[k] * v->VRatio[k] / v->PSCL_FACTOR[k],
4068 v->VTAPsChroma[k] / 6.0 * dml_min(1.0, v->HRatioChroma[k]),
4069 v->HRatioChroma[k] * v->VRatioChroma[k] / v->PSCL_FACTOR_CHROMA[k],
4070 1.0);
4071 if ((v->htaps[k] > 6.0 || v->vtaps[k] > 6.0 || v->HTAPsChroma[k] > 6.0 || v->VTAPsChroma[k] > 6.0)
4072 && v->MinDPPCLKUsingSingleDPP[k] < 2.0 * v->PixelClock[k]) {
4073 v->MinDPPCLKUsingSingleDPP[k] = 2.0 * v->PixelClock[k];
4074 }
4075 }
4076 }
4077 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4078 int MaximumSwathWidthSupportLuma;
4079 int MaximumSwathWidthSupportChroma;
4080
4081 if (v->SurfaceTiling[k] == dm_sw_linear) {
4082 MaximumSwathWidthSupportLuma = 8192.0;
4083 } else if (v->SourceScan[k] == dm_vert && v->BytePerPixelC[k] > 0) {
4084 MaximumSwathWidthSupportLuma = 2880.0;
4085 } else if (v->SourcePixelFormat[k] == dm_rgbe_alpha) {
4086 MaximumSwathWidthSupportLuma = 3840.0;
4087 } else {
4088 MaximumSwathWidthSupportLuma = 5760.0;
4089 }
4090
4091 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12) {
4092 MaximumSwathWidthSupportChroma = MaximumSwathWidthSupportLuma / 2.0;
4093 } else {
4094 MaximumSwathWidthSupportChroma = MaximumSwathWidthSupportLuma;
4095 }
4096 v->MaximumSwathWidthInLineBufferLuma = v->LineBufferSize * dml_max(v->HRatio[k], 1.0) / v->LBBitPerPixel[k]
4097 / (v->vtaps[k] + dml_max(dml_ceil(v->VRatio[k], 1.0) - 2, 0.0));
4098 if (v->BytePerPixelC[k] == 0.0) {
4099 v->MaximumSwathWidthInLineBufferChroma = 0;
4100 } else {
4101 v->MaximumSwathWidthInLineBufferChroma = v->LineBufferSize * dml_max(v->HRatioChroma[k], 1.0) / v->LBBitPerPixel[k]
4102 / (v->VTAPsChroma[k] + dml_max(dml_ceil(v->VRatioChroma[k], 1.0) - 2, 0.0));
4103 }
4104 v->MaximumSwathWidthLuma[k] = dml_min(MaximumSwathWidthSupportLuma, v->MaximumSwathWidthInLineBufferLuma);
4105 v->MaximumSwathWidthChroma[k] = dml_min(MaximumSwathWidthSupportChroma, v->MaximumSwathWidthInLineBufferChroma);
4106 }
4107
4108 CalculateSwathAndDETConfiguration(
4109 true,
4110 v->NumberOfActivePlanes,
4111 v->DETBufferSizeInKByte[0],
4112 v->MaximumSwathWidthLuma,
4113 v->MaximumSwathWidthChroma,
4114 v->SourceScan,
4115 v->SourcePixelFormat,
4116 v->SurfaceTiling,
4117 v->ViewportWidth,
4118 v->ViewportHeight,
4119 v->SurfaceWidthY,
4120 v->SurfaceWidthC,
4121 v->SurfaceHeightY,
4122 v->SurfaceHeightC,
4123 v->Read256BlockHeightY,
4124 v->Read256BlockHeightC,
4125 v->Read256BlockWidthY,
4126 v->Read256BlockWidthC,
4127 v->odm_combine_dummy,
4128 v->BlendingAndTiming,
4129 v->BytePerPixelY,
4130 v->BytePerPixelC,
4131 v->BytePerPixelInDETY,
4132 v->BytePerPixelInDETC,
4133 v->HActive,
4134 v->HRatio,
4135 v->HRatioChroma,
4136 v->NoOfDPPThisState,
4137 v->swath_width_luma_ub_this_state,
4138 v->swath_width_chroma_ub_this_state,
4139 v->SwathWidthYThisState,
4140 v->SwathWidthCThisState,
4141 v->SwathHeightYThisState,
4142 v->SwathHeightCThisState,
4143 v->DETBufferSizeYThisState,
4144 v->DETBufferSizeCThisState,
4145 v->SingleDPPViewportSizeSupportPerPlane,
4146 &v->ViewportSizeSupport[0][0]);
4147
4148 for (i = 0; i < v->soc.num_states; i++) {
4149 for (j = 0; j < 2; j++) {
4150 v->MaxDispclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown(v->MaxDispclk[i], v->DISPCLKDPPCLKVCOSpeed);
4151 v->MaxDppclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown(v->MaxDppclk[i], v->DISPCLKDPPCLKVCOSpeed);
4152 v->RequiredDISPCLK[i][j] = 0.0;
4153 v->DISPCLK_DPPCLK_Support[i][j] = true;
4154 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4155 v->PlaneRequiredDISPCLKWithoutODMCombine = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4156 * (1.0 + v->DISPCLKRampingMargin / 100.0);
4157 if ((v->PlaneRequiredDISPCLKWithoutODMCombine >= v->MaxDispclk[i]
4158 && v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1]
4159 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) {
4160 v->PlaneRequiredDISPCLKWithoutODMCombine = v->PixelClock[k]
4161 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4162 }
4163 v->PlaneRequiredDISPCLKWithODMCombine2To1 = v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4164 * (1 + v->DISPCLKRampingMargin / 100.0);
4165 if ((v->PlaneRequiredDISPCLKWithODMCombine2To1 >= v->MaxDispclk[i]
4166 && v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1]
4167 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) {
4168 v->PlaneRequiredDISPCLKWithODMCombine2To1 = v->PixelClock[k] / 2
4169 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4170 }
4171 v->PlaneRequiredDISPCLKWithODMCombine4To1 = v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4172 * (1 + v->DISPCLKRampingMargin / 100.0);
4173 if ((v->PlaneRequiredDISPCLKWithODMCombine4To1 >= v->MaxDispclk[i]
4174 && v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1]
4175 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) {
4176 v->PlaneRequiredDISPCLKWithODMCombine4To1 = v->PixelClock[k] / 4
4177 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4178 }
4179
4180 if (v->ODMCombinePolicy == dm_odm_combine_policy_none
4181 || !(v->Output[k] == dm_dp ||
4182 v->Output[k] == dm_dp2p0 ||
4183 v->Output[k] == dm_edp)) {
4184 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
4185 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithoutODMCombine;
4186
4187 if (v->HActive[k] / 2 > DCN314_MAX_FMT_420_BUFFER_WIDTH)
4188 FMTBufferExceeded = true;
4189 } else if (v->ODMCombinePolicy == dm_odm_combine_policy_2to1) {
4190 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
4191 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
4192 } else if (v->ODMCombinePolicy == dm_odm_combine_policy_4to1
4193 || v->PlaneRequiredDISPCLKWithODMCombine2To1 > v->MaxDispclkRoundedDownToDFSGranularity) {
4194 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1;
4195 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1;
4196 } else if (v->PlaneRequiredDISPCLKWithoutODMCombine > v->MaxDispclkRoundedDownToDFSGranularity) {
4197 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
4198 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
4199 } else {
4200 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
4201 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithoutODMCombine;
4202 }
4203 if (v->DSCEnabled[k] && v->HActive[k] > DCN314_MAX_DSC_IMAGE_WIDTH
4204 && v->ODMCombineEnablePerState[i][k] != dm_odm_combine_mode_4to1) {
4205 if (v->HActive[k] / 2 > DCN314_MAX_DSC_IMAGE_WIDTH) {
4206 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1;
4207 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1;
4208 } else {
4209 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
4210 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
4211 }
4212 }
4213 if (v->OutputFormat[k] == dm_420 && v->HActive[k] > DCN314_MAX_FMT_420_BUFFER_WIDTH
4214 && v->ODMCombineEnablePerState[i][k] != dm_odm_combine_mode_4to1) {
4215 if (v->Output[k] == dm_hdmi) {
4216 FMTBufferExceeded = true;
4217 } else if (v->HActive[k] / 2 > DCN314_MAX_FMT_420_BUFFER_WIDTH) {
4218 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1;
4219 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1;
4220
4221 if (v->HActive[k] / 4 > DCN314_MAX_FMT_420_BUFFER_WIDTH)
4222 FMTBufferExceeded = true;
4223 } else {
4224 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
4225 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
4226 }
4227 }
4228 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1) {
4229 v->MPCCombine[i][j][k] = false;
4230 v->NoOfDPP[i][j][k] = 4;
4231 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 4;
4232 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
4233 v->MPCCombine[i][j][k] = false;
4234 v->NoOfDPP[i][j][k] = 2;
4235 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2;
4236 } else if ((v->WhenToDoMPCCombine == dm_mpc_never
4237 || (v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4238 <= v->MaxDppclkRoundedDownToDFSGranularity && v->SingleDPPViewportSizeSupportPerPlane[k] == true))) {
4239 v->MPCCombine[i][j][k] = false;
4240 v->NoOfDPP[i][j][k] = 1;
4241 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4242 } else {
4243 v->MPCCombine[i][j][k] = true;
4244 v->NoOfDPP[i][j][k] = 2;
4245 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2.0;
4246 }
4247 v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->PlaneRequiredDISPCLK);
4248 if ((v->MinDPPCLKUsingSingleDPP[k] / v->NoOfDPP[i][j][k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4249 > v->MaxDppclkRoundedDownToDFSGranularity)
4250 || (v->PlaneRequiredDISPCLK > v->MaxDispclkRoundedDownToDFSGranularity)) {
4251 v->DISPCLK_DPPCLK_Support[i][j] = false;
4252 }
4253 }
4254 v->TotalNumberOfActiveDPP[i][j] = 0;
4255 v->TotalNumberOfSingleDPPPlanes[i][j] = 0;
4256 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4257 v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + v->NoOfDPP[i][j][k];
4258 if (v->NoOfDPP[i][j][k] == 1)
4259 v->TotalNumberOfSingleDPPPlanes[i][j] = v->TotalNumberOfSingleDPPPlanes[i][j] + 1;
4260 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10
4261 || v->SourcePixelFormat[k] == dm_420_12 || v->SourcePixelFormat[k] == dm_rgbe_alpha)
4262 NoChroma = false;
4263 }
4264
4265 // UPTO
4266 if (j == 1 && v->WhenToDoMPCCombine != dm_mpc_never
4267 && !UnboundedRequest(v->UseUnboundedRequesting, v->TotalNumberOfActiveDPP[i][j], NoChroma, v->Output[0])) {
4268 while (!(v->TotalNumberOfActiveDPP[i][j] >= v->MaxNumDPP || v->TotalNumberOfSingleDPPPlanes[i][j] == 0)) {
4269 double BWOfNonSplitPlaneOfMaximumBandwidth;
4270 unsigned int NumberOfNonSplitPlaneOfMaximumBandwidth;
4271
4272 BWOfNonSplitPlaneOfMaximumBandwidth = 0;
4273 NumberOfNonSplitPlaneOfMaximumBandwidth = 0;
4274 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4275 if (v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k] > BWOfNonSplitPlaneOfMaximumBandwidth
4276 && v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled && v->MPCCombine[i][j][k] == false) {
4277 BWOfNonSplitPlaneOfMaximumBandwidth = v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k];
4278 NumberOfNonSplitPlaneOfMaximumBandwidth = k;
4279 }
4280 }
4281 v->MPCCombine[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = true;
4282 v->NoOfDPP[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = 2;
4283 v->RequiredDPPCLK[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] =
4284 v->MinDPPCLKUsingSingleDPP[NumberOfNonSplitPlaneOfMaximumBandwidth]
4285 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100) / 2;
4286 v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + 1;
4287 v->TotalNumberOfSingleDPPPlanes[i][j] = v->TotalNumberOfSingleDPPPlanes[i][j] - 1;
4288 }
4289 }
4290 if (v->TotalNumberOfActiveDPP[i][j] > v->MaxNumDPP) {
4291 v->RequiredDISPCLK[i][j] = 0.0;
4292 v->DISPCLK_DPPCLK_Support[i][j] = true;
4293 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4294 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
4295 if (v->SingleDPPViewportSizeSupportPerPlane[k] == false && v->WhenToDoMPCCombine != dm_mpc_never) {
4296 v->MPCCombine[i][j][k] = true;
4297 v->NoOfDPP[i][j][k] = 2;
4298 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k]
4299 * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2.0;
4300 } else {
4301 v->MPCCombine[i][j][k] = false;
4302 v->NoOfDPP[i][j][k] = 1;
4303 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k]
4304 * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4305 }
4306 if (!(v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1]
4307 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) {
4308 v->PlaneRequiredDISPCLK = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4309 * (1.0 + v->DISPCLKRampingMargin / 100.0);
4310 } else {
4311 v->PlaneRequiredDISPCLK = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4312 }
4313 v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->PlaneRequiredDISPCLK);
4314 if ((v->MinDPPCLKUsingSingleDPP[k] / v->NoOfDPP[i][j][k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4315 > v->MaxDppclkRoundedDownToDFSGranularity)
4316 || (v->PlaneRequiredDISPCLK > v->MaxDispclkRoundedDownToDFSGranularity)) {
4317 v->DISPCLK_DPPCLK_Support[i][j] = false;
4318 }
4319 }
4320 v->TotalNumberOfActiveDPP[i][j] = 0.0;
4321 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4322 v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + v->NoOfDPP[i][j][k];
4323 }
4324 }
4325 v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->WritebackRequiredDISPCLK);
4326 if (v->MaxDispclkRoundedDownToDFSGranularity < v->WritebackRequiredDISPCLK) {
4327 v->DISPCLK_DPPCLK_Support[i][j] = false;
4328 }
4329 }
4330 }
4331
4332 /*Total Available Pipes Support Check*/
4333
4334 for (i = 0; i < v->soc.num_states; i++) {
4335 for (j = 0; j < 2; j++) {
4336 if (v->TotalNumberOfActiveDPP[i][j] <= v->MaxNumDPP) {
4337 v->TotalAvailablePipesSupport[i][j] = true;
4338 } else {
4339 v->TotalAvailablePipesSupport[i][j] = false;
4340 }
4341 }
4342 }
4343 /*Display IO and DSC Support Check*/
4344
4345 v->NonsupportedDSCInputBPC = false;
4346 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4347 if (!(v->DSCInputBitPerComponent[k] == 12.0 || v->DSCInputBitPerComponent[k] == 10.0 || v->DSCInputBitPerComponent[k] == 8.0)
4348 || v->DSCInputBitPerComponent[k] > v->MaximumDSCBitsPerComponent) {
4349 v->NonsupportedDSCInputBPC = true;
4350 }
4351 }
4352
4353 /*Number Of DSC Slices*/
4354 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4355 if (v->BlendingAndTiming[k] == k) {
4356 if (v->PixelClockBackEnd[k] > 3200) {
4357 v->NumberOfDSCSlices[k] = dml_ceil(v->PixelClockBackEnd[k] / 400.0, 4.0);
4358 } else if (v->PixelClockBackEnd[k] > 1360) {
4359 v->NumberOfDSCSlices[k] = 8;
4360 } else if (v->PixelClockBackEnd[k] > 680) {
4361 v->NumberOfDSCSlices[k] = 4;
4362 } else if (v->PixelClockBackEnd[k] > 340) {
4363 v->NumberOfDSCSlices[k] = 2;
4364 } else {
4365 v->NumberOfDSCSlices[k] = 1;
4366 }
4367 } else {
4368 v->NumberOfDSCSlices[k] = 0;
4369 }
4370 }
4371
4372 for (i = 0; i < v->soc.num_states; i++) {
4373 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4374 v->RequiresDSC[i][k] = false;
4375 v->RequiresFEC[i][k] = false;
4376 if (v->BlendingAndTiming[k] == k) {
4377 if (v->Output[k] == dm_hdmi) {
4378 v->RequiresDSC[i][k] = false;
4379 v->RequiresFEC[i][k] = false;
4380 v->OutputBppPerState[i][k] = TruncToValidBPP(
4381 dml_min(600.0, v->PHYCLKPerState[i]) * 10,
4382 3,
4383 v->HTotal[k],
4384 v->HActive[k],
4385 v->PixelClockBackEnd[k],
4386 v->ForcedOutputLinkBPP[k],
4387 false,
4388 v->Output[k],
4389 v->OutputFormat[k],
4390 v->DSCInputBitPerComponent[k],
4391 v->NumberOfDSCSlices[k],
4392 v->AudioSampleRate[k],
4393 v->AudioSampleLayout[k],
4394 v->ODMCombineEnablePerState[i][k]);
4395 } else if (v->Output[k] == dm_dp || v->Output[k] == dm_edp || v->Output[k] == dm_dp2p0) {
4396 if (v->DSCEnable[k] == true) {
4397 v->RequiresDSC[i][k] = true;
4398 v->LinkDSCEnable = true;
4399 if (v->Output[k] == dm_dp || v->Output[k] == dm_dp2p0) {
4400 v->RequiresFEC[i][k] = true;
4401 } else {
4402 v->RequiresFEC[i][k] = false;
4403 }
4404 } else {
4405 v->RequiresDSC[i][k] = false;
4406 v->LinkDSCEnable = false;
4407 if (v->Output[k] == dm_dp2p0) {
4408 v->RequiresFEC[i][k] = true;
4409 } else {
4410 v->RequiresFEC[i][k] = false;
4411 }
4412 }
4413 if (v->Output[k] == dm_dp2p0) {
4414 v->Outbpp = BPP_INVALID;
4415 if ((v->OutputLinkDPRate[k] == dm_dp_rate_na || v->OutputLinkDPRate[k] == dm_dp_rate_uhbr10) &&
4416 v->PHYCLKD18PerState[k] >= 10000.0 / 18.0) {
4417 v->Outbpp = TruncToValidBPP(
4418 (1.0 - v->Downspreading / 100.0) * 10000,
4419 v->OutputLinkDPLanes[k],
4420 v->HTotal[k],
4421 v->HActive[k],
4422 v->PixelClockBackEnd[k],
4423 v->ForcedOutputLinkBPP[k],
4424 v->LinkDSCEnable,
4425 v->Output[k],
4426 v->OutputFormat[k],
4427 v->DSCInputBitPerComponent[k],
4428 v->NumberOfDSCSlices[k],
4429 v->AudioSampleRate[k],
4430 v->AudioSampleLayout[k],
4431 v->ODMCombineEnablePerState[i][k]);
4432 if (v->Outbpp == BPP_INVALID && v->PHYCLKD18PerState[k] < 13500.0 / 18.0 &&
4433 v->DSCEnable[k] == true && v->ForcedOutputLinkBPP[k] == 0) {
4434 v->RequiresDSC[i][k] = true;
4435 v->LinkDSCEnable = true;
4436 v->Outbpp = TruncToValidBPP(
4437 (1.0 - v->Downspreading / 100.0) * 10000,
4438 v->OutputLinkDPLanes[k],
4439 v->HTotal[k],
4440 v->HActive[k],
4441 v->PixelClockBackEnd[k],
4442 v->ForcedOutputLinkBPP[k],
4443 v->LinkDSCEnable,
4444 v->Output[k],
4445 v->OutputFormat[k],
4446 v->DSCInputBitPerComponent[k],
4447 v->NumberOfDSCSlices[k],
4448 v->AudioSampleRate[k],
4449 v->AudioSampleLayout[k],
4450 v->ODMCombineEnablePerState[i][k]);
4451 }
4452 v->OutputBppPerState[i][k] = v->Outbpp;
4453 // TODO: Need some other way to handle this nonsense
4454 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " UHBR10"
4455 }
4456 if (v->Outbpp == BPP_INVALID &&
4457 (v->OutputLinkDPRate[k] == dm_dp_rate_na || v->OutputLinkDPRate[k] == dm_dp_rate_uhbr13p5) &&
4458 v->PHYCLKD18PerState[k] >= 13500.0 / 18.0) {
4459 v->Outbpp = TruncToValidBPP(
4460 (1.0 - v->Downspreading / 100.0) * 13500,
4461 v->OutputLinkDPLanes[k],
4462 v->HTotal[k],
4463 v->HActive[k],
4464 v->PixelClockBackEnd[k],
4465 v->ForcedOutputLinkBPP[k],
4466 v->LinkDSCEnable,
4467 v->Output[k],
4468 v->OutputFormat[k],
4469 v->DSCInputBitPerComponent[k],
4470 v->NumberOfDSCSlices[k],
4471 v->AudioSampleRate[k],
4472 v->AudioSampleLayout[k],
4473 v->ODMCombineEnablePerState[i][k]);
4474 if (v->Outbpp == BPP_INVALID && v->PHYCLKD18PerState[k] < 20000.0 / 18.0 &&
4475 v->DSCEnable[k] == true && v->ForcedOutputLinkBPP[k] == 0) {
4476 v->RequiresDSC[i][k] = true;
4477 v->LinkDSCEnable = true;
4478 v->Outbpp = TruncToValidBPP(
4479 (1.0 - v->Downspreading / 100.0) * 13500,
4480 v->OutputLinkDPLanes[k],
4481 v->HTotal[k],
4482 v->HActive[k],
4483 v->PixelClockBackEnd[k],
4484 v->ForcedOutputLinkBPP[k],
4485 v->LinkDSCEnable,
4486 v->Output[k],
4487 v->OutputFormat[k],
4488 v->DSCInputBitPerComponent[k],
4489 v->NumberOfDSCSlices[k],
4490 v->AudioSampleRate[k],
4491 v->AudioSampleLayout[k],
4492 v->ODMCombineEnablePerState[i][k]);
4493 }
4494 v->OutputBppPerState[i][k] = v->Outbpp;
4495 // TODO: Need some other way to handle this nonsense
4496 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " UHBR13p5"
4497 }
4498 if (v->Outbpp == BPP_INVALID &&
4499 (v->OutputLinkDPRate[k] == dm_dp_rate_na || v->OutputLinkDPRate[k] == dm_dp_rate_uhbr20) &&
4500 v->PHYCLKD18PerState[k] >= 20000.0 / 18.0) {
4501 v->Outbpp = TruncToValidBPP(
4502 (1.0 - v->Downspreading / 100.0) * 20000,
4503 v->OutputLinkDPLanes[k],
4504 v->HTotal[k],
4505 v->HActive[k],
4506 v->PixelClockBackEnd[k],
4507 v->ForcedOutputLinkBPP[k],
4508 v->LinkDSCEnable,
4509 v->Output[k],
4510 v->OutputFormat[k],
4511 v->DSCInputBitPerComponent[k],
4512 v->NumberOfDSCSlices[k],
4513 v->AudioSampleRate[k],
4514 v->AudioSampleLayout[k],
4515 v->ODMCombineEnablePerState[i][k]);
4516 if (v->Outbpp == BPP_INVALID && v->DSCEnable[k] == true &&
4517 v->ForcedOutputLinkBPP[k] == 0) {
4518 v->RequiresDSC[i][k] = true;
4519 v->LinkDSCEnable = true;
4520 v->Outbpp = TruncToValidBPP(
4521 (1.0 - v->Downspreading / 100.0) * 20000,
4522 v->OutputLinkDPLanes[k],
4523 v->HTotal[k],
4524 v->HActive[k],
4525 v->PixelClockBackEnd[k],
4526 v->ForcedOutputLinkBPP[k],
4527 v->LinkDSCEnable,
4528 v->Output[k],
4529 v->OutputFormat[k],
4530 v->DSCInputBitPerComponent[k],
4531 v->NumberOfDSCSlices[k],
4532 v->AudioSampleRate[k],
4533 v->AudioSampleLayout[k],
4534 v->ODMCombineEnablePerState[i][k]);
4535 }
4536 v->OutputBppPerState[i][k] = v->Outbpp;
4537 // TODO: Need some other way to handle this nonsense
4538 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " UHBR20"
4539 }
4540 } else {
4541 v->Outbpp = BPP_INVALID;
4542 if (v->PHYCLKPerState[i] >= 270.0) {
4543 v->Outbpp = TruncToValidBPP(
4544 (1.0 - v->Downspreading / 100.0) * 2700,
4545 v->OutputLinkDPLanes[k],
4546 v->HTotal[k],
4547 v->HActive[k],
4548 v->PixelClockBackEnd[k],
4549 v->ForcedOutputLinkBPP[k],
4550 v->LinkDSCEnable,
4551 v->Output[k],
4552 v->OutputFormat[k],
4553 v->DSCInputBitPerComponent[k],
4554 v->NumberOfDSCSlices[k],
4555 v->AudioSampleRate[k],
4556 v->AudioSampleLayout[k],
4557 v->ODMCombineEnablePerState[i][k]);
4558 v->OutputBppPerState[i][k] = v->Outbpp;
4559 // TODO: Need some other way to handle this nonsense
4560 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR"
4561 }
4562 if (v->Outbpp == BPP_INVALID && v->PHYCLKPerState[i] >= 540.0) {
4563 v->Outbpp = TruncToValidBPP(
4564 (1.0 - v->Downspreading / 100.0) * 5400,
4565 v->OutputLinkDPLanes[k],
4566 v->HTotal[k],
4567 v->HActive[k],
4568 v->PixelClockBackEnd[k],
4569 v->ForcedOutputLinkBPP[k],
4570 v->LinkDSCEnable,
4571 v->Output[k],
4572 v->OutputFormat[k],
4573 v->DSCInputBitPerComponent[k],
4574 v->NumberOfDSCSlices[k],
4575 v->AudioSampleRate[k],
4576 v->AudioSampleLayout[k],
4577 v->ODMCombineEnablePerState[i][k]);
4578 v->OutputBppPerState[i][k] = v->Outbpp;
4579 // TODO: Need some other way to handle this nonsense
4580 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR2"
4581 }
4582 if (v->Outbpp == BPP_INVALID && v->PHYCLKPerState[i] >= 810.0) {
4583 v->Outbpp = TruncToValidBPP(
4584 (1.0 - v->Downspreading / 100.0) * 8100,
4585 v->OutputLinkDPLanes[k],
4586 v->HTotal[k],
4587 v->HActive[k],
4588 v->PixelClockBackEnd[k],
4589 v->ForcedOutputLinkBPP[k],
4590 v->LinkDSCEnable,
4591 v->Output[k],
4592 v->OutputFormat[k],
4593 v->DSCInputBitPerComponent[k],
4594 v->NumberOfDSCSlices[k],
4595 v->AudioSampleRate[k],
4596 v->AudioSampleLayout[k],
4597 v->ODMCombineEnablePerState[i][k]);
4598 v->OutputBppPerState[i][k] = v->Outbpp;
4599 // TODO: Need some other way to handle this nonsense
4600 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR3"
4601 }
4602 }
4603 }
4604 } else {
4605 v->OutputBppPerState[i][k] = 0;
4606 }
4607 }
4608 }
4609
4610 for (i = 0; i < v->soc.num_states; i++) {
4611 v->LinkCapacitySupport[i] = true;
4612 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4613 if (v->BlendingAndTiming[k] == k
4614 && (v->Output[k] == dm_dp ||
4615 v->Output[k] == dm_edp ||
4616 v->Output[k] == dm_hdmi) && v->OutputBppPerState[i][k] == 0) {
4617 v->LinkCapacitySupport[i] = false;
4618 }
4619 }
4620 }
4621
4622 // UPTO 2172
4623 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4624 if (v->BlendingAndTiming[k] == k
4625 && (v->Output[k] == dm_dp ||
4626 v->Output[k] == dm_edp ||
4627 v->Output[k] == dm_hdmi)) {
4628 if (v->OutputFormat[k] == dm_420 && v->Interlace[k] == 1 && v->ProgressiveToInterlaceUnitInOPP == true) {
4629 P2IWith420 = true;
4630 }
4631 if (v->DSCEnable[k] == true && v->OutputFormat[k] == dm_n422
4632 && !v->DSC422NativeSupport) {
4633 DSC422NativeNotSupported = true;
4634 }
4635 }
4636 }
4637
4638
4639 for (i = 0; i < v->soc.num_states; ++i) {
4640 v->ODMCombine4To1SupportCheckOK[i] = true;
4641 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4642 if (v->BlendingAndTiming[k] == k && v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1
4643 && (v->ODMCombine4To1Supported == false || v->Output[k] == dm_dp || v->Output[k] == dm_edp
4644 || v->Output[k] == dm_hdmi)) {
4645 v->ODMCombine4To1SupportCheckOK[i] = false;
4646 }
4647 }
4648 }
4649
4650 /* Skip dscclk validation: as long as dispclk is supported, dscclk is also implicitly supported */
4651
4652 for (i = 0; i < v->soc.num_states; i++) {
4653 v->NotEnoughDSCUnits[i] = false;
4654 v->TotalDSCUnitsRequired = 0.0;
4655 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4656 if (v->RequiresDSC[i][k] == true) {
4657 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1) {
4658 v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 4.0;
4659 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
4660 v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 2.0;
4661 } else {
4662 v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 1.0;
4663 }
4664 }
4665 }
4666 if (v->TotalDSCUnitsRequired > v->NumberOfDSC) {
4667 v->NotEnoughDSCUnits[i] = true;
4668 }
4669 }
4670 /*DSC Delay per state*/
4671
4672 for (i = 0; i < v->soc.num_states; i++) {
4673 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4674 if (v->OutputBppPerState[i][k] == BPP_INVALID) {
4675 v->BPP = 0.0;
4676 } else {
4677 v->BPP = v->OutputBppPerState[i][k];
4678 }
4679 if (v->RequiresDSC[i][k] == true && v->BPP != 0.0) {
4680 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled) {
4681 v->DSCDelayPerState[i][k] = dscceComputeDelay(
4682 v->DSCInputBitPerComponent[k],
4683 v->BPP,
4684 dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0),
4685 v->NumberOfDSCSlices[k],
4686 v->OutputFormat[k],
4687 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]);
4688 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
4689 v->DSCDelayPerState[i][k] = 2.0
4690 * (dscceComputeDelay(
4691 v->DSCInputBitPerComponent[k],
4692 v->BPP,
4693 dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0),
4694 v->NumberOfDSCSlices[k] / 2,
4695 v->OutputFormat[k],
4696 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]));
4697 } else {
4698 v->DSCDelayPerState[i][k] = 4.0
4699 * (dscceComputeDelay(
4700 v->DSCInputBitPerComponent[k],
4701 v->BPP,
4702 dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0),
4703 v->NumberOfDSCSlices[k] / 4,
4704 v->OutputFormat[k],
4705 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]));
4706 }
4707 v->DSCDelayPerState[i][k] = v->DSCDelayPerState[i][k] + (v->HTotal[k] - v->HActive[k]) * dml_ceil((double) v->DSCDelayPerState[i][k] / v->HActive[k], 1.0);
4708 v->DSCDelayPerState[i][k] = v->DSCDelayPerState[i][k] * v->PixelClock[k] / v->PixelClockBackEnd[k];
4709 } else {
4710 v->DSCDelayPerState[i][k] = 0.0;
4711 }
4712 }
4713 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4714 for (m = 0; m < v->NumberOfActivePlanes; m++) {
4715 if (v->BlendingAndTiming[k] == m && v->RequiresDSC[i][m] == true) {
4716 v->DSCDelayPerState[i][k] = v->DSCDelayPerState[i][m];
4717 }
4718 }
4719 }
4720 }
4721
4722 //Calculate Swath, DET Configuration, DCFCLKDeepSleep
4723 //
4724 for (i = 0; i < v->soc.num_states; ++i) {
4725 for (j = 0; j <= 1; ++j) {
4726 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4727 v->RequiredDPPCLKThisState[k] = v->RequiredDPPCLK[i][j][k];
4728 v->NoOfDPPThisState[k] = v->NoOfDPP[i][j][k];
4729 v->ODMCombineEnableThisState[k] = v->ODMCombineEnablePerState[i][k];
4730 }
4731
4732 CalculateSwathAndDETConfiguration(
4733 false,
4734 v->NumberOfActivePlanes,
4735 v->DETBufferSizeInKByte[0],
4736 v->MaximumSwathWidthLuma,
4737 v->MaximumSwathWidthChroma,
4738 v->SourceScan,
4739 v->SourcePixelFormat,
4740 v->SurfaceTiling,
4741 v->ViewportWidth,
4742 v->ViewportHeight,
4743 v->SurfaceWidthY,
4744 v->SurfaceWidthC,
4745 v->SurfaceHeightY,
4746 v->SurfaceHeightC,
4747 v->Read256BlockHeightY,
4748 v->Read256BlockHeightC,
4749 v->Read256BlockWidthY,
4750 v->Read256BlockWidthC,
4751 v->ODMCombineEnableThisState,
4752 v->BlendingAndTiming,
4753 v->BytePerPixelY,
4754 v->BytePerPixelC,
4755 v->BytePerPixelInDETY,
4756 v->BytePerPixelInDETC,
4757 v->HActive,
4758 v->HRatio,
4759 v->HRatioChroma,
4760 v->NoOfDPPThisState,
4761 v->swath_width_luma_ub_this_state,
4762 v->swath_width_chroma_ub_this_state,
4763 v->SwathWidthYThisState,
4764 v->SwathWidthCThisState,
4765 v->SwathHeightYThisState,
4766 v->SwathHeightCThisState,
4767 v->DETBufferSizeYThisState,
4768 v->DETBufferSizeCThisState,
4769 v->dummystring,
4770 &v->ViewportSizeSupport[i][j]);
4771
4772 CalculateDCFCLKDeepSleep(
4773 mode_lib,
4774 v->NumberOfActivePlanes,
4775 v->BytePerPixelY,
4776 v->BytePerPixelC,
4777 v->VRatio,
4778 v->VRatioChroma,
4779 v->SwathWidthYThisState,
4780 v->SwathWidthCThisState,
4781 v->NoOfDPPThisState,
4782 v->HRatio,
4783 v->HRatioChroma,
4784 v->PixelClock,
4785 v->PSCL_FACTOR,
4786 v->PSCL_FACTOR_CHROMA,
4787 v->RequiredDPPCLKThisState,
4788 v->ReadBandwidthLuma,
4789 v->ReadBandwidthChroma,
4790 v->ReturnBusWidth,
4791 &v->ProjectedDCFCLKDeepSleep[i][j]);
4792
4793 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4794 v->swath_width_luma_ub_all_states[i][j][k] = v->swath_width_luma_ub_this_state[k];
4795 v->swath_width_chroma_ub_all_states[i][j][k] = v->swath_width_chroma_ub_this_state[k];
4796 v->SwathWidthYAllStates[i][j][k] = v->SwathWidthYThisState[k];
4797 v->SwathWidthCAllStates[i][j][k] = v->SwathWidthCThisState[k];
4798 v->SwathHeightYAllStates[i][j][k] = v->SwathHeightYThisState[k];
4799 v->SwathHeightCAllStates[i][j][k] = v->SwathHeightCThisState[k];
4800 v->DETBufferSizeYAllStates[i][j][k] = v->DETBufferSizeYThisState[k];
4801 v->DETBufferSizeCAllStates[i][j][k] = v->DETBufferSizeCThisState[k];
4802 }
4803 }
4804 }
4805
4806 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4807 v->cursor_bw[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0
4808 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k];
4809 }
4810
4811 for (i = 0; i < v->soc.num_states; i++) {
4812 for (j = 0; j < 2; j++) {
4813 bool NotUrgentLatencyHiding[DC__NUM_DPP__MAX];
4814
4815 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4816 v->swath_width_luma_ub_this_state[k] = v->swath_width_luma_ub_all_states[i][j][k];
4817 v->swath_width_chroma_ub_this_state[k] = v->swath_width_chroma_ub_all_states[i][j][k];
4818 v->SwathWidthYThisState[k] = v->SwathWidthYAllStates[i][j][k];
4819 v->SwathWidthCThisState[k] = v->SwathWidthCAllStates[i][j][k];
4820 v->SwathHeightYThisState[k] = v->SwathHeightYAllStates[i][j][k];
4821 v->SwathHeightCThisState[k] = v->SwathHeightCAllStates[i][j][k];
4822 v->DETBufferSizeYThisState[k] = v->DETBufferSizeYAllStates[i][j][k];
4823 v->DETBufferSizeCThisState[k] = v->DETBufferSizeCAllStates[i][j][k];
4824 }
4825
4826 v->TotalNumberOfDCCActiveDPP[i][j] = 0;
4827 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4828 if (v->DCCEnable[k] == true) {
4829 v->TotalNumberOfDCCActiveDPP[i][j] = v->TotalNumberOfDCCActiveDPP[i][j] + v->NoOfDPP[i][j][k];
4830 }
4831 }
4832
4833 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4834 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10
4835 || v->SourcePixelFormat[k] == dm_420_12 || v->SourcePixelFormat[k] == dm_rgbe_alpha) {
4836
4837 if ((v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12)
4838 && v->SourceScan[k] != dm_vert) {
4839 v->PTEBufferSizeInRequestsForLuma = (v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma)
4840 / 2;
4841 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsForLuma;
4842 } else {
4843 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma;
4844 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsChroma;
4845 }
4846
4847 v->PDEAndMetaPTEBytesPerFrameC = CalculateVMAndRowBytes(
4848 mode_lib,
4849 v->DCCEnable[k],
4850 v->Read256BlockHeightC[k],
4851 v->Read256BlockWidthC[k],
4852 v->SourcePixelFormat[k],
4853 v->SurfaceTiling[k],
4854 v->BytePerPixelC[k],
4855 v->SourceScan[k],
4856 v->SwathWidthCThisState[k],
4857 v->ViewportHeightChroma[k],
4858 v->GPUVMEnable,
4859 v->HostVMEnable,
4860 v->HostVMMaxNonCachedPageTableLevels,
4861 v->GPUVMMinPageSize,
4862 v->HostVMMinPageSize,
4863 v->PTEBufferSizeInRequestsForChroma,
4864 v->PitchC[k],
4865 0.0,
4866 &v->MacroTileWidthC[k],
4867 &v->MetaRowBytesC,
4868 &v->DPTEBytesPerRowC,
4869 &v->PTEBufferSizeNotExceededC[i][j][k],
4870 &v->dummyinteger7,
4871 &v->dpte_row_height_chroma[k],
4872 &v->dummyinteger28,
4873 &v->dummyinteger26,
4874 &v->dummyinteger23,
4875 &v->meta_row_height_chroma[k],
4876 &v->dummyinteger8,
4877 &v->dummyinteger9,
4878 &v->dummyinteger19,
4879 &v->dummyinteger20,
4880 &v->dummyinteger17,
4881 &v->dummyinteger10,
4882 &v->dummyinteger11);
4883
4884 v->PrefetchLinesC[i][j][k] = CalculatePrefetchSourceLines(
4885 mode_lib,
4886 v->VRatioChroma[k],
4887 v->VTAPsChroma[k],
4888 v->Interlace[k],
4889 v->ProgressiveToInterlaceUnitInOPP,
4890 v->SwathHeightCThisState[k],
4891 v->ViewportYStartC[k],
4892 &v->PrefillC[k],
4893 &v->MaxNumSwC[k]);
4894 } else {
4895 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma;
4896 v->PTEBufferSizeInRequestsForChroma = 0;
4897 v->PDEAndMetaPTEBytesPerFrameC = 0.0;
4898 v->MetaRowBytesC = 0.0;
4899 v->DPTEBytesPerRowC = 0.0;
4900 v->PrefetchLinesC[i][j][k] = 0.0;
4901 v->PTEBufferSizeNotExceededC[i][j][k] = true;
4902 }
4903 v->PDEAndMetaPTEBytesPerFrameY = CalculateVMAndRowBytes(
4904 mode_lib,
4905 v->DCCEnable[k],
4906 v->Read256BlockHeightY[k],
4907 v->Read256BlockWidthY[k],
4908 v->SourcePixelFormat[k],
4909 v->SurfaceTiling[k],
4910 v->BytePerPixelY[k],
4911 v->SourceScan[k],
4912 v->SwathWidthYThisState[k],
4913 v->ViewportHeight[k],
4914 v->GPUVMEnable,
4915 v->HostVMEnable,
4916 v->HostVMMaxNonCachedPageTableLevels,
4917 v->GPUVMMinPageSize,
4918 v->HostVMMinPageSize,
4919 v->PTEBufferSizeInRequestsForLuma,
4920 v->PitchY[k],
4921 v->DCCMetaPitchY[k],
4922 &v->MacroTileWidthY[k],
4923 &v->MetaRowBytesY,
4924 &v->DPTEBytesPerRowY,
4925 &v->PTEBufferSizeNotExceededY[i][j][k],
4926 &v->dummyinteger7,
4927 &v->dpte_row_height[k],
4928 &v->dummyinteger29,
4929 &v->dummyinteger27,
4930 &v->dummyinteger24,
4931 &v->meta_row_height[k],
4932 &v->dummyinteger25,
4933 &v->dpte_group_bytes[k],
4934 &v->dummyinteger21,
4935 &v->dummyinteger22,
4936 &v->dummyinteger18,
4937 &v->dummyinteger5,
4938 &v->dummyinteger6);
4939 v->PrefetchLinesY[i][j][k] = CalculatePrefetchSourceLines(
4940 mode_lib,
4941 v->VRatio[k],
4942 v->vtaps[k],
4943 v->Interlace[k],
4944 v->ProgressiveToInterlaceUnitInOPP,
4945 v->SwathHeightYThisState[k],
4946 v->ViewportYStartY[k],
4947 &v->PrefillY[k],
4948 &v->MaxNumSwY[k]);
4949 v->PDEAndMetaPTEBytesPerFrame[i][j][k] = v->PDEAndMetaPTEBytesPerFrameY + v->PDEAndMetaPTEBytesPerFrameC;
4950 v->MetaRowBytes[i][j][k] = v->MetaRowBytesY + v->MetaRowBytesC;
4951 v->DPTEBytesPerRow[i][j][k] = v->DPTEBytesPerRowY + v->DPTEBytesPerRowC;
4952
4953 CalculateRowBandwidth(
4954 v->GPUVMEnable,
4955 v->SourcePixelFormat[k],
4956 v->VRatio[k],
4957 v->VRatioChroma[k],
4958 v->DCCEnable[k],
4959 v->HTotal[k] / v->PixelClock[k],
4960 v->MetaRowBytesY,
4961 v->MetaRowBytesC,
4962 v->meta_row_height[k],
4963 v->meta_row_height_chroma[k],
4964 v->DPTEBytesPerRowY,
4965 v->DPTEBytesPerRowC,
4966 v->dpte_row_height[k],
4967 v->dpte_row_height_chroma[k],
4968 &v->meta_row_bandwidth[i][j][k],
4969 &v->dpte_row_bandwidth[i][j][k]);
4970 }
4971 /*
4972 * DCCMetaBufferSizeSupport(i, j) = True
4973 * For k = 0 To NumberOfActivePlanes - 1
4974 * If MetaRowBytes(i, j, k) > 24064 Then
4975 * DCCMetaBufferSizeSupport(i, j) = False
4976 * End If
4977 * Next k
4978 */
4979 v->DCCMetaBufferSizeSupport[i][j] = true;
4980 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4981 if (v->MetaRowBytes[i][j][k] > 24064)
4982 v->DCCMetaBufferSizeSupport[i][j] = false;
4983 }
4984 v->UrgLatency[i] = CalculateUrgentLatency(
4985 v->UrgentLatencyPixelDataOnly,
4986 v->UrgentLatencyPixelMixedWithVMData,
4987 v->UrgentLatencyVMDataOnly,
4988 v->DoUrgentLatencyAdjustment,
4989 v->UrgentLatencyAdjustmentFabricClockComponent,
4990 v->UrgentLatencyAdjustmentFabricClockReference,
4991 v->FabricClockPerState[i]);
4992
4993 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4994 CalculateUrgentBurstFactor(
4995 v->swath_width_luma_ub_this_state[k],
4996 v->swath_width_chroma_ub_this_state[k],
4997 v->SwathHeightYThisState[k],
4998 v->SwathHeightCThisState[k],
4999 v->HTotal[k] / v->PixelClock[k],
5000 v->UrgLatency[i],
5001 v->CursorBufferSize,
5002 v->CursorWidth[k][0],
5003 v->CursorBPP[k][0],
5004 v->VRatio[k],
5005 v->VRatioChroma[k],
5006 v->BytePerPixelInDETY[k],
5007 v->BytePerPixelInDETC[k],
5008 v->DETBufferSizeYThisState[k],
5009 v->DETBufferSizeCThisState[k],
5010 &v->UrgentBurstFactorCursor[k],
5011 &v->UrgentBurstFactorLuma[k],
5012 &v->UrgentBurstFactorChroma[k],
5013 &NotUrgentLatencyHiding[k]);
5014 }
5015
5016 v->NotEnoughUrgentLatencyHidingA[i][j] = false;
5017 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5018 if (NotUrgentLatencyHiding[k]) {
5019 v->NotEnoughUrgentLatencyHidingA[i][j] = true;
5020 }
5021 }
5022
5023 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5024 v->VActivePixelBandwidth[i][j][k] = v->ReadBandwidthLuma[k] * v->UrgentBurstFactorLuma[k]
5025 + v->ReadBandwidthChroma[k] * v->UrgentBurstFactorChroma[k];
5026 v->VActiveCursorBandwidth[i][j][k] = v->cursor_bw[k] * v->UrgentBurstFactorCursor[k];
5027 }
5028
5029 v->TotalVActivePixelBandwidth[i][j] = 0;
5030 v->TotalVActiveCursorBandwidth[i][j] = 0;
5031 v->TotalMetaRowBandwidth[i][j] = 0;
5032 v->TotalDPTERowBandwidth[i][j] = 0;
5033 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5034 v->TotalVActivePixelBandwidth[i][j] = v->TotalVActivePixelBandwidth[i][j] + v->VActivePixelBandwidth[i][j][k];
5035 v->TotalVActiveCursorBandwidth[i][j] = v->TotalVActiveCursorBandwidth[i][j] + v->VActiveCursorBandwidth[i][j][k];
5036 v->TotalMetaRowBandwidth[i][j] = v->TotalMetaRowBandwidth[i][j] + v->NoOfDPP[i][j][k] * v->meta_row_bandwidth[i][j][k];
5037 v->TotalDPTERowBandwidth[i][j] = v->TotalDPTERowBandwidth[i][j] + v->NoOfDPP[i][j][k] * v->dpte_row_bandwidth[i][j][k];
5038 }
5039 }
5040 }
5041
5042 //Calculate Return BW
5043 for (i = 0; i < v->soc.num_states; ++i) {
5044 for (j = 0; j <= 1; ++j) {
5045 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5046 if (v->BlendingAndTiming[k] == k) {
5047 if (v->WritebackEnable[k] == true) {
5048 v->WritebackDelayTime[k] = v->WritebackLatency
5049 + CalculateWriteBackDelay(
5050 v->WritebackPixelFormat[k],
5051 v->WritebackHRatio[k],
5052 v->WritebackVRatio[k],
5053 v->WritebackVTaps[k],
5054 v->WritebackDestinationWidth[k],
5055 v->WritebackDestinationHeight[k],
5056 v->WritebackSourceHeight[k],
5057 v->HTotal[k]) / v->RequiredDISPCLK[i][j];
5058 } else {
5059 v->WritebackDelayTime[k] = 0.0;
5060 }
5061 for (m = 0; m < v->NumberOfActivePlanes; m++) {
5062 if (v->BlendingAndTiming[m] == k && v->WritebackEnable[m] == true) {
5063 v->WritebackDelayTime[k] = dml_max(
5064 v->WritebackDelayTime[k],
5065 v->WritebackLatency
5066 + CalculateWriteBackDelay(
5067 v->WritebackPixelFormat[m],
5068 v->WritebackHRatio[m],
5069 v->WritebackVRatio[m],
5070 v->WritebackVTaps[m],
5071 v->WritebackDestinationWidth[m],
5072 v->WritebackDestinationHeight[m],
5073 v->WritebackSourceHeight[m],
5074 v->HTotal[m]) / v->RequiredDISPCLK[i][j]);
5075 }
5076 }
5077 }
5078 }
5079 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5080 for (m = 0; m < v->NumberOfActivePlanes; m++) {
5081 if (v->BlendingAndTiming[k] == m) {
5082 v->WritebackDelayTime[k] = v->WritebackDelayTime[m];
5083 }
5084 }
5085 }
5086 v->MaxMaxVStartup[i][j] = 0;
5087 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5088 v->MaximumVStartup[i][j][k] =
5089 CalculateMaxVStartup(
5090 v->VTotal[k],
5091 v->VActive[k],
5092 v->VBlankNom[k],
5093 v->HTotal[k],
5094 v->PixelClock[k],
5095 v->ProgressiveToInterlaceUnitInOPP,
5096 v->Interlace[k],
5097 v->ip.VBlankNomDefaultUS,
5098 v->WritebackDelayTime[k]);
5099 v->MaxMaxVStartup[i][j] = dml_max(v->MaxMaxVStartup[i][j], v->MaximumVStartup[i][j][k]);
5100 }
5101 }
5102 }
5103
5104 ReorderingBytes = v->NumberOfChannels
5105 * dml_max3(
5106 v->UrgentOutOfOrderReturnPerChannelPixelDataOnly,
5107 v->UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData,
5108 v->UrgentOutOfOrderReturnPerChannelVMDataOnly);
5109
5110 for (i = 0; i < v->soc.num_states; ++i) {
5111 for (j = 0; j <= 1; ++j) {
5112 v->DCFCLKState[i][j] = v->DCFCLKPerState[i];
5113 }
5114 }
5115
5116 if (v->UseMinimumRequiredDCFCLK == true)
5117 UseMinimumDCFCLK(mode_lib, MaxPrefetchMode, ReorderingBytes);
5118
5119 for (i = 0; i < v->soc.num_states; ++i) {
5120 for (j = 0; j <= 1; ++j) {
5121 double IdealFabricAndSDPPortBandwidthPerState = dml_min(
5122 v->ReturnBusWidth * v->DCFCLKState[i][j],
5123 v->FabricClockPerState[i] * v->FabricDatapathToDCNDataReturn);
5124 double IdealDRAMBandwidthPerState = v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth;
5125 double PixelDataOnlyReturnBWPerState = dml_min(
5126 IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
5127 IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelDataOnly / 100.0);
5128 double PixelMixedWithVMDataReturnBWPerState = dml_min(
5129 IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
5130 IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelMixedWithVMData / 100.0);
5131
5132 if (v->HostVMEnable != true) {
5133 v->ReturnBWPerState[i][j] = PixelDataOnlyReturnBWPerState;
5134 } else {
5135 v->ReturnBWPerState[i][j] = PixelMixedWithVMDataReturnBWPerState;
5136 }
5137 }
5138 }
5139
5140 //Re-ordering Buffer Support Check
5141 for (i = 0; i < v->soc.num_states; ++i) {
5142 for (j = 0; j <= 1; ++j) {
5143 if ((v->ROBBufferSizeInKByte - v->PixelChunkSizeInKByte) * 1024 / v->ReturnBWPerState[i][j]
5144 > (v->RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__) / v->DCFCLKState[i][j] + ReorderingBytes / v->ReturnBWPerState[i][j]) {
5145 v->ROBSupport[i][j] = true;
5146 } else {
5147 v->ROBSupport[i][j] = false;
5148 }
5149 }
5150 }
5151
5152 //Vertical Active BW support check
5153
5154 MaxTotalVActiveRDBandwidth = 0;
5155 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5156 MaxTotalVActiveRDBandwidth = MaxTotalVActiveRDBandwidth + v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k];
5157 }
5158
5159 for (i = 0; i < v->soc.num_states; ++i) {
5160 for (j = 0; j <= 1; ++j) {
5161 v->MaxTotalVerticalActiveAvailableBandwidth[i][j] = dml_min(
5162 dml_min(
5163 v->ReturnBusWidth * v->DCFCLKState[i][j],
5164 v->FabricClockPerState[i] * v->FabricDatapathToDCNDataReturn)
5165 * v->MaxAveragePercentOfIdealFabricAndSDPPortBWDisplayCanUseInNormalSystemOperation / 100,
5166 v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth
5167 * v->MaxAveragePercentOfIdealDRAMBWDisplayCanUseInNormalSystemOperation / 100);
5168
5169 if (MaxTotalVActiveRDBandwidth <= v->MaxTotalVerticalActiveAvailableBandwidth[i][j]) {
5170 v->TotalVerticalActiveBandwidthSupport[i][j] = true;
5171 } else {
5172 v->TotalVerticalActiveBandwidthSupport[i][j] = false;
5173 }
5174 }
5175 }
5176
5177 v->UrgentLatency = CalculateUrgentLatency(
5178 v->UrgentLatencyPixelDataOnly,
5179 v->UrgentLatencyPixelMixedWithVMData,
5180 v->UrgentLatencyVMDataOnly,
5181 v->DoUrgentLatencyAdjustment,
5182 v->UrgentLatencyAdjustmentFabricClockComponent,
5183 v->UrgentLatencyAdjustmentFabricClockReference,
5184 v->FabricClock);
5185 //Prefetch Check
5186 for (i = 0; i < v->soc.num_states; ++i) {
5187 for (j = 0; j <= 1; ++j) {
5188 double VMDataOnlyReturnBWPerState;
5189 double HostVMInefficiencyFactor = 1;
5190 int NextPrefetchModeState = MinPrefetchMode;
5191 bool UnboundedRequestEnabledThisState = false;
5192 int CompressedBufferSizeInkByteThisState = 0;
5193 double dummy;
5194
5195 v->TimeCalc = 24 / v->ProjectedDCFCLKDeepSleep[i][j];
5196
5197 v->BandwidthWithoutPrefetchSupported[i][j] = true;
5198 if (v->TotalVActivePixelBandwidth[i][j] + v->TotalVActiveCursorBandwidth[i][j] + v->TotalMetaRowBandwidth[i][j]
5199 + v->TotalDPTERowBandwidth[i][j] > v->ReturnBWPerState[i][j] || v->NotEnoughUrgentLatencyHidingA[i][j]) {
5200 v->BandwidthWithoutPrefetchSupported[i][j] = false;
5201 }
5202
5203 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5204 v->NoOfDPPThisState[k] = v->NoOfDPP[i][j][k];
5205 v->swath_width_luma_ub_this_state[k] = v->swath_width_luma_ub_all_states[i][j][k];
5206 v->swath_width_chroma_ub_this_state[k] = v->swath_width_chroma_ub_all_states[i][j][k];
5207 v->SwathWidthYThisState[k] = v->SwathWidthYAllStates[i][j][k];
5208 v->SwathWidthCThisState[k] = v->SwathWidthCAllStates[i][j][k];
5209 v->SwathHeightYThisState[k] = v->SwathHeightYAllStates[i][j][k];
5210 v->SwathHeightCThisState[k] = v->SwathHeightCAllStates[i][j][k];
5211 v->DETBufferSizeYThisState[k] = v->DETBufferSizeYAllStates[i][j][k];
5212 v->DETBufferSizeCThisState[k] = v->DETBufferSizeCAllStates[i][j][k];
5213 }
5214
5215 VMDataOnlyReturnBWPerState = dml_min(
5216 dml_min(
5217 v->ReturnBusWidth * v->DCFCLKState[i][j],
5218 v->FabricClockPerState[i] * v->FabricDatapathToDCNDataReturn)
5219 * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
5220 v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth
5221 * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly / 100.0);
5222 if (v->GPUVMEnable && v->HostVMEnable)
5223 HostVMInefficiencyFactor = v->ReturnBWPerState[i][j] / VMDataOnlyReturnBWPerState;
5224
5225 v->ExtraLatency = CalculateExtraLatency(
5226 v->RoundTripPingLatencyCycles,
5227 ReorderingBytes,
5228 v->DCFCLKState[i][j],
5229 v->TotalNumberOfActiveDPP[i][j],
5230 v->PixelChunkSizeInKByte,
5231 v->TotalNumberOfDCCActiveDPP[i][j],
5232 v->MetaChunkSize,
5233 v->ReturnBWPerState[i][j],
5234 v->GPUVMEnable,
5235 v->HostVMEnable,
5236 v->NumberOfActivePlanes,
5237 v->NoOfDPPThisState,
5238 v->dpte_group_bytes,
5239 HostVMInefficiencyFactor,
5240 v->HostVMMinPageSize,
5241 v->HostVMMaxNonCachedPageTableLevels);
5242
5243 v->NextMaxVStartup = v->MaxMaxVStartup[i][j];
5244 do {
5245 v->PrefetchModePerState[i][j] = NextPrefetchModeState;
5246 v->MaxVStartup = v->NextMaxVStartup;
5247
5248 v->TWait = CalculateTWait(
5249 v->PrefetchModePerState[i][j],
5250 v->DRAMClockChangeLatency,
5251 v->UrgLatency[i],
5252 v->SREnterPlusExitTime);
5253
5254 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5255 CalculatePrefetchSchedulePerPlane(mode_lib,
5256 HostVMInefficiencyFactor,
5257 i, j, k);
5258 }
5259
5260 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5261 CalculateUrgentBurstFactor(
5262 v->swath_width_luma_ub_this_state[k],
5263 v->swath_width_chroma_ub_this_state[k],
5264 v->SwathHeightYThisState[k],
5265 v->SwathHeightCThisState[k],
5266 v->HTotal[k] / v->PixelClock[k],
5267 v->UrgLatency[i],
5268 v->CursorBufferSize,
5269 v->CursorWidth[k][0],
5270 v->CursorBPP[k][0],
5271 v->VRatioPreY[i][j][k],
5272 v->VRatioPreC[i][j][k],
5273 v->BytePerPixelInDETY[k],
5274 v->BytePerPixelInDETC[k],
5275 v->DETBufferSizeYThisState[k],
5276 v->DETBufferSizeCThisState[k],
5277 &v->UrgentBurstFactorCursorPre[k],
5278 &v->UrgentBurstFactorLumaPre[k],
5279 &v->UrgentBurstFactorChromaPre[k],
5280 &v->NotUrgentLatencyHidingPre[k]);
5281 }
5282
5283 v->MaximumReadBandwidthWithPrefetch = 0.0;
5284 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5285 v->cursor_bw_pre[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0
5286 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatioPreY[i][j][k];
5287
5288 v->MaximumReadBandwidthWithPrefetch =
5289 v->MaximumReadBandwidthWithPrefetch
5290 + dml_max3(
5291 v->VActivePixelBandwidth[i][j][k]
5292 + v->VActiveCursorBandwidth[i][j][k]
5293 + v->NoOfDPP[i][j][k]
5294 * (v->meta_row_bandwidth[i][j][k]
5295 + v->dpte_row_bandwidth[i][j][k]),
5296 v->NoOfDPP[i][j][k] * v->prefetch_vmrow_bw[k],
5297 v->NoOfDPP[i][j][k]
5298 * (v->RequiredPrefetchPixelDataBWLuma[i][j][k]
5299 * v->UrgentBurstFactorLumaPre[k]
5300 + v->RequiredPrefetchPixelDataBWChroma[i][j][k]
5301 * v->UrgentBurstFactorChromaPre[k])
5302 + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
5303 }
5304
5305 v->NotEnoughUrgentLatencyHidingPre = false;
5306 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5307 if (v->NotUrgentLatencyHidingPre[k] == true) {
5308 v->NotEnoughUrgentLatencyHidingPre = true;
5309 }
5310 }
5311
5312 v->PrefetchSupported[i][j] = true;
5313 if (v->BandwidthWithoutPrefetchSupported[i][j] == false || v->MaximumReadBandwidthWithPrefetch > v->ReturnBWPerState[i][j]
5314 || v->NotEnoughUrgentLatencyHidingPre == 1) {
5315 v->PrefetchSupported[i][j] = false;
5316 }
5317 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5318 if (v->LineTimesForPrefetch[k] < 2.0 || v->LinesForMetaPTE[k] >= 32.0 || v->LinesForMetaAndDPTERow[k] >= 16.0
5319 || v->NoTimeForPrefetch[i][j][k] == true) {
5320 v->PrefetchSupported[i][j] = false;
5321 }
5322 }
5323
5324 v->DynamicMetadataSupported[i][j] = true;
5325 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5326 if (v->NoTimeForDynamicMetadata[i][j][k] == true) {
5327 v->DynamicMetadataSupported[i][j] = false;
5328 }
5329 }
5330
5331 v->VRatioInPrefetchSupported[i][j] = true;
5332 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5333 if (v->VRatioPreY[i][j][k] > 4.0 || v->VRatioPreC[i][j][k] > 4.0 || v->NoTimeForPrefetch[i][j][k] == true) {
5334 v->VRatioInPrefetchSupported[i][j] = false;
5335 }
5336 }
5337 v->AnyLinesForVMOrRowTooLarge = false;
5338 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5339 if (v->LinesForMetaAndDPTERow[k] >= 16 || v->LinesForMetaPTE[k] >= 32) {
5340 v->AnyLinesForVMOrRowTooLarge = true;
5341 }
5342 }
5343
5344 v->NextPrefetchMode = v->NextPrefetchMode + 1;
5345
5346 if (v->PrefetchSupported[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true) {
5347 v->BandwidthAvailableForImmediateFlip = v->ReturnBWPerState[i][j];
5348 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5349 v->BandwidthAvailableForImmediateFlip = v->BandwidthAvailableForImmediateFlip
5350 - dml_max(
5351 v->VActivePixelBandwidth[i][j][k] + v->VActiveCursorBandwidth[i][j][k],
5352 v->NoOfDPP[i][j][k]
5353 * (v->RequiredPrefetchPixelDataBWLuma[i][j][k]
5354 * v->UrgentBurstFactorLumaPre[k]
5355 + v->RequiredPrefetchPixelDataBWChroma[i][j][k]
5356 * v->UrgentBurstFactorChromaPre[k])
5357 + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
5358 }
5359 v->TotImmediateFlipBytes = 0.0;
5360 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5361 v->TotImmediateFlipBytes = v->TotImmediateFlipBytes
5362 + v->NoOfDPP[i][j][k] * (v->PDEAndMetaPTEBytesPerFrame[i][j][k] + v->MetaRowBytes[i][j][k]
5363 + v->DPTEBytesPerRow[i][j][k]);
5364 }
5365
5366 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5367 CalculateFlipSchedule(
5368 mode_lib,
5369 k,
5370 HostVMInefficiencyFactor,
5371 v->ExtraLatency,
5372 v->UrgLatency[i],
5373 v->PDEAndMetaPTEBytesPerFrame[i][j][k],
5374 v->MetaRowBytes[i][j][k],
5375 v->DPTEBytesPerRow[i][j][k]);
5376 }
5377 v->total_dcn_read_bw_with_flip = 0.0;
5378 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5379 v->total_dcn_read_bw_with_flip = v->total_dcn_read_bw_with_flip
5380 + dml_max3(
5381 v->NoOfDPP[i][j][k] * v->prefetch_vmrow_bw[k],
5382 v->NoOfDPP[i][j][k] * v->final_flip_bw[k] + v->VActivePixelBandwidth[i][j][k]
5383 + v->VActiveCursorBandwidth[i][j][k],
5384 v->NoOfDPP[i][j][k]
5385 * (v->final_flip_bw[k]
5386 + v->RequiredPrefetchPixelDataBWLuma[i][j][k]
5387 * v->UrgentBurstFactorLumaPre[k]
5388 + v->RequiredPrefetchPixelDataBWChroma[i][j][k]
5389 * v->UrgentBurstFactorChromaPre[k])
5390 + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
5391 }
5392 v->ImmediateFlipSupportedForState[i][j] = true;
5393 if (v->total_dcn_read_bw_with_flip > v->ReturnBWPerState[i][j]) {
5394 v->ImmediateFlipSupportedForState[i][j] = false;
5395 }
5396 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5397 if (v->ImmediateFlipSupportedForPipe[k] == false) {
5398 v->ImmediateFlipSupportedForState[i][j] = false;
5399 }
5400 }
5401 } else {
5402 v->ImmediateFlipSupportedForState[i][j] = false;
5403 }
5404
5405 if (v->MaxVStartup <= __DML_VBA_MIN_VSTARTUP__ || v->AnyLinesForVMOrRowTooLarge == false) {
5406 v->NextMaxVStartup = v->MaxMaxVStartup[i][j];
5407 NextPrefetchModeState = NextPrefetchModeState + 1;
5408 } else {
5409 v->NextMaxVStartup = v->NextMaxVStartup - 1;
5410 }
5411 v->NextPrefetchMode = v->NextPrefetchMode + 1;
5412 } while (!((v->PrefetchSupported[i][j] == true && v->DynamicMetadataSupported[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true
5413 && ((v->HostVMEnable == false &&
5414 v->ImmediateFlipRequirement[0] != dm_immediate_flip_required)
5415 || v->ImmediateFlipSupportedForState[i][j] == true))
5416 || (v->NextMaxVStartup == v->MaxMaxVStartup[i][j] && NextPrefetchModeState > MaxPrefetchMode)));
5417
5418 CalculateUnboundedRequestAndCompressedBufferSize(
5419 v->DETBufferSizeInKByte[0],
5420 v->ConfigReturnBufferSizeInKByte,
5421 v->UseUnboundedRequesting,
5422 v->TotalNumberOfActiveDPP[i][j],
5423 NoChroma,
5424 v->MaxNumDPP,
5425 v->CompressedBufferSegmentSizeInkByte,
5426 v->Output,
5427 &UnboundedRequestEnabledThisState,
5428 &CompressedBufferSizeInkByteThisState);
5429
5430 CalculateWatermarksAndDRAMSpeedChangeSupport(
5431 mode_lib,
5432 v->PrefetchModePerState[i][j],
5433 v->DCFCLKState[i][j],
5434 v->ReturnBWPerState[i][j],
5435 v->UrgLatency[i],
5436 v->ExtraLatency,
5437 v->SOCCLKPerState[i],
5438 v->ProjectedDCFCLKDeepSleep[i][j],
5439 v->DETBufferSizeYThisState,
5440 v->DETBufferSizeCThisState,
5441 v->SwathHeightYThisState,
5442 v->SwathHeightCThisState,
5443 v->SwathWidthYThisState,
5444 v->SwathWidthCThisState,
5445 v->NoOfDPPThisState,
5446 v->BytePerPixelInDETY,
5447 v->BytePerPixelInDETC,
5448 UnboundedRequestEnabledThisState,
5449 CompressedBufferSizeInkByteThisState,
5450 &v->DRAMClockChangeSupport[i][j],
5451 &dummy,
5452 &dummy,
5453 &dummy,
5454 &dummy);
5455 }
5456 }
5457
5458 /*PTE Buffer Size Check*/
5459 for (i = 0; i < v->soc.num_states; i++) {
5460 for (j = 0; j < 2; j++) {
5461 v->PTEBufferSizeNotExceeded[i][j] = true;
5462 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5463 if (v->PTEBufferSizeNotExceededY[i][j][k] == false || v->PTEBufferSizeNotExceededC[i][j][k] == false) {
5464 v->PTEBufferSizeNotExceeded[i][j] = false;
5465 }
5466 }
5467 }
5468 }
5469
5470 /*Cursor Support Check*/
5471 v->CursorSupport = true;
5472 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5473 if (v->CursorWidth[k][0] > 0.0) {
5474 if (v->CursorBPP[k][0] == 64 && v->Cursor64BppSupport == false) {
5475 v->CursorSupport = false;
5476 }
5477 }
5478 }
5479
5480 /*Valid Pitch Check*/
5481 v->PitchSupport = true;
5482 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5483 v->AlignedYPitch[k] = dml_ceil(dml_max(v->PitchY[k], v->SurfaceWidthY[k]), v->MacroTileWidthY[k]);
5484 if (v->DCCEnable[k] == true) {
5485 v->AlignedDCCMetaPitchY[k] = dml_ceil(dml_max(v->DCCMetaPitchY[k], v->SurfaceWidthY[k]), 64.0 * v->Read256BlockWidthY[k]);
5486 } else {
5487 v->AlignedDCCMetaPitchY[k] = v->DCCMetaPitchY[k];
5488 }
5489 if (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32 && v->SourcePixelFormat[k] != dm_444_16
5490 && v->SourcePixelFormat[k] != dm_mono_16 && v->SourcePixelFormat[k] != dm_rgbe
5491 && v->SourcePixelFormat[k] != dm_mono_8) {
5492 v->AlignedCPitch[k] = dml_ceil(dml_max(v->PitchC[k], v->SurfaceWidthC[k]), v->MacroTileWidthC[k]);
5493 if (v->DCCEnable[k] == true) {
5494 v->AlignedDCCMetaPitchC[k] = dml_ceil(
5495 dml_max(v->DCCMetaPitchC[k], v->SurfaceWidthC[k]),
5496 64.0 * v->Read256BlockWidthC[k]);
5497 } else {
5498 v->AlignedDCCMetaPitchC[k] = v->DCCMetaPitchC[k];
5499 }
5500 } else {
5501 v->AlignedCPitch[k] = v->PitchC[k];
5502 v->AlignedDCCMetaPitchC[k] = v->DCCMetaPitchC[k];
5503 }
5504 if (v->AlignedYPitch[k] > v->PitchY[k] || v->AlignedCPitch[k] > v->PitchC[k]
5505 || v->AlignedDCCMetaPitchY[k] > v->DCCMetaPitchY[k] || v->AlignedDCCMetaPitchC[k] > v->DCCMetaPitchC[k]) {
5506 v->PitchSupport = false;
5507 }
5508 }
5509
5510 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5511 if (v->ViewportWidth[k] > v->SurfaceWidthY[k] || v->ViewportHeight[k] > v->SurfaceHeightY[k]) {
5512 ViewportExceedsSurface = true;
5513 if (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32
5514 && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_444_8
5515 && v->SourcePixelFormat[k] != dm_rgbe) {
5516 if (v->ViewportWidthChroma[k] > v->SurfaceWidthC[k]
5517 || v->ViewportHeightChroma[k] > v->SurfaceHeightC[k]) {
5518 ViewportExceedsSurface = true;
5519 }
5520 }
5521 }
5522 }
5523
5524 /*Mode Support, Voltage State and SOC Configuration*/
5525 for (i = v->soc.num_states - 1; i >= 0; i--) {
5526 for (j = 0; j < 2; j++) {
5527 if (v->ScaleRatioAndTapsSupport == true && v->SourceFormatPixelAndScanSupport == true && v->ViewportSizeSupport[i][j] == true
5528 && v->LinkCapacitySupport[i] == true && !P2IWith420 && !DSCOnlyIfNecessaryWithBPP
5529 && !DSC422NativeNotSupported && v->ODMCombine4To1SupportCheckOK[i] == true && v->NotEnoughDSCUnits[i] == false
5530 && v->DTBCLKRequiredMoreThanSupported[i] == false
5531 && v->ROBSupport[i][j] == true && v->DISPCLK_DPPCLK_Support[i][j] == true
5532 && v->TotalAvailablePipesSupport[i][j] == true && EnoughWritebackUnits == true
5533 && v->WritebackLatencySupport == true && v->WritebackScaleRatioAndTapsSupport == true
5534 && v->CursorSupport == true && v->PitchSupport == true && ViewportExceedsSurface == false
5535 && v->PrefetchSupported[i][j] == true && v->DynamicMetadataSupported[i][j] == true
5536 && v->TotalVerticalActiveBandwidthSupport[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true
5537 && v->PTEBufferSizeNotExceeded[i][j] == true && v->NonsupportedDSCInputBPC == false
5538 && ((v->HostVMEnable == false
5539 && v->ImmediateFlipRequirement[0] != dm_immediate_flip_required)
5540 || v->ImmediateFlipSupportedForState[i][j] == true)
5541 && FMTBufferExceeded == false) {
5542 v->ModeSupport[i][j] = true;
5543 } else {
5544 v->ModeSupport[i][j] = false;
5545 }
5546 }
5547 }
5548 for (i = v->soc.num_states; i >= 0; i--) {
5549 for (j = 0; j < 2; j++) {
5550 enum dm_validation_status status = DML_VALIDATION_OK;
5551
5552 if (!v->ScaleRatioAndTapsSupport) {
5553 status = DML_FAIL_SCALE_RATIO_TAP;
5554 } else if (!v->SourceFormatPixelAndScanSupport) {
5555 status = DML_FAIL_SOURCE_PIXEL_FORMAT;
5556 } else if (!v->ViewportSizeSupport[i][j]) {
5557 status = DML_FAIL_VIEWPORT_SIZE;
5558 } else if (P2IWith420) {
5559 status = DML_FAIL_P2I_WITH_420;
5560 } else if (DSCOnlyIfNecessaryWithBPP) {
5561 status = DML_FAIL_DSC_ONLY_IF_NECESSARY_WITH_BPP;
5562 } else if (DSC422NativeNotSupported) {
5563 status = DML_FAIL_NOT_DSC422_NATIVE;
5564 } else if (!v->ODMCombine4To1SupportCheckOK[i]) {
5565 status = DML_FAIL_ODM_COMBINE4TO1;
5566 } else if (v->NotEnoughDSCUnits[i]) {
5567 status = DML_FAIL_NOT_ENOUGH_DSC;
5568 } else if (!v->ROBSupport[i][j]) {
5569 status = DML_FAIL_REORDERING_BUFFER;
5570 } else if (!v->DISPCLK_DPPCLK_Support[i][j]) {
5571 status = DML_FAIL_DISPCLK_DPPCLK;
5572 } else if (!v->TotalAvailablePipesSupport[i][j]) {
5573 status = DML_FAIL_TOTAL_AVAILABLE_PIPES;
5574 } else if (!EnoughWritebackUnits) {
5575 status = DML_FAIL_ENOUGH_WRITEBACK_UNITS;
5576 } else if (!v->WritebackLatencySupport) {
5577 status = DML_FAIL_WRITEBACK_LATENCY;
5578 } else if (!v->WritebackScaleRatioAndTapsSupport) {
5579 status = DML_FAIL_WRITEBACK_SCALE_RATIO_TAP;
5580 } else if (!v->CursorSupport) {
5581 status = DML_FAIL_CURSOR_SUPPORT;
5582 } else if (!v->PitchSupport) {
5583 status = DML_FAIL_PITCH_SUPPORT;
5584 } else if (ViewportExceedsSurface) {
5585 status = DML_FAIL_VIEWPORT_EXCEEDS_SURFACE;
5586 } else if (!v->PrefetchSupported[i][j]) {
5587 status = DML_FAIL_PREFETCH_SUPPORT;
5588 } else if (!v->DynamicMetadataSupported[i][j]) {
5589 status = DML_FAIL_DYNAMIC_METADATA;
5590 } else if (!v->TotalVerticalActiveBandwidthSupport[i][j]) {
5591 status = DML_FAIL_TOTAL_V_ACTIVE_BW;
5592 } else if (!v->VRatioInPrefetchSupported[i][j]) {
5593 status = DML_FAIL_V_RATIO_PREFETCH;
5594 } else if (!v->PTEBufferSizeNotExceeded[i][j]) {
5595 status = DML_FAIL_PTE_BUFFER_SIZE;
5596 } else if (v->NonsupportedDSCInputBPC) {
5597 status = DML_FAIL_DSC_INPUT_BPC;
5598 } else if ((v->HostVMEnable
5599 && !v->ImmediateFlipSupportedForState[i][j])) {
5600 status = DML_FAIL_HOST_VM_IMMEDIATE_FLIP;
5601 } else if (FMTBufferExceeded) {
5602 status = DML_FAIL_FMT_BUFFER_EXCEEDED;
5603 }
5604 mode_lib->vba.ValidationStatus[i] = status;
5605 }
5606 }
5607
5608 {
5609 unsigned int MaximumMPCCombine = 0;
5610
5611 for (i = v->soc.num_states; i >= 0; i--) {
5612 if (i == v->soc.num_states || v->ModeSupport[i][0] == true || v->ModeSupport[i][1] == true) {
5613 v->VoltageLevel = i;
5614 v->ModeIsSupported = v->ModeSupport[i][0] == true || v->ModeSupport[i][1] == true;
5615 if (v->ModeSupport[i][0] == true) {
5616 MaximumMPCCombine = 0;
5617 } else {
5618 MaximumMPCCombine = 1;
5619 }
5620 }
5621 }
5622 v->ImmediateFlipSupport = v->ImmediateFlipSupportedForState[v->VoltageLevel][MaximumMPCCombine];
5623 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
5624 v->MPCCombineEnable[k] = v->MPCCombine[v->VoltageLevel][MaximumMPCCombine][k];
5625 v->DPPPerPlane[k] = v->NoOfDPP[v->VoltageLevel][MaximumMPCCombine][k];
5626 }
5627 v->DCFCLK = v->DCFCLKState[v->VoltageLevel][MaximumMPCCombine];
5628 v->DRAMSpeed = v->DRAMSpeedPerState[v->VoltageLevel];
5629 v->FabricClock = v->FabricClockPerState[v->VoltageLevel];
5630 v->SOCCLK = v->SOCCLKPerState[v->VoltageLevel];
5631 v->ReturnBW = v->ReturnBWPerState[v->VoltageLevel][MaximumMPCCombine];
5632 v->maxMpcComb = MaximumMPCCombine;
5633 }
5634 }
5635
5636 static void CalculateWatermarksAndDRAMSpeedChangeSupport(
5637 struct display_mode_lib *mode_lib,
5638 unsigned int PrefetchMode,
5639 double DCFCLK,
5640 double ReturnBW,
5641 double UrgentLatency,
5642 double ExtraLatency,
5643 double SOCCLK,
5644 double DCFCLKDeepSleep,
5645 unsigned int DETBufferSizeY[],
5646 unsigned int DETBufferSizeC[],
5647 unsigned int SwathHeightY[],
5648 unsigned int SwathHeightC[],
5649 double SwathWidthY[],
5650 double SwathWidthC[],
5651 unsigned int DPPPerPlane[],
5652 double BytePerPixelDETY[],
5653 double BytePerPixelDETC[],
5654 bool UnboundedRequestEnabled,
5655 unsigned int CompressedBufferSizeInkByte,
5656 enum clock_change_support *DRAMClockChangeSupport,
5657 double *StutterExitWatermark,
5658 double *StutterEnterPlusExitWatermark,
5659 double *Z8StutterExitWatermark,
5660 double *Z8StutterEnterPlusExitWatermark)
5661 {
5662 struct vba_vars_st *v = &mode_lib->vba;
5663 double EffectiveLBLatencyHidingY;
5664 double EffectiveLBLatencyHidingC;
5665 double LinesInDETY[DC__NUM_DPP__MAX];
5666 double LinesInDETC;
5667 unsigned int LinesInDETYRoundedDownToSwath[DC__NUM_DPP__MAX];
5668 unsigned int LinesInDETCRoundedDownToSwath;
5669 double FullDETBufferingTimeY;
5670 double FullDETBufferingTimeC;
5671 double ActiveDRAMClockChangeLatencyMarginY;
5672 double ActiveDRAMClockChangeLatencyMarginC;
5673 double WritebackDRAMClockChangeLatencyMargin;
5674 double PlaneWithMinActiveDRAMClockChangeMargin;
5675 double SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank;
5676 double WritebackDRAMClockChangeLatencyHiding;
5677 double TotalPixelBW = 0.0;
5678 int k, j;
5679
5680 v->UrgentWatermark = UrgentLatency + ExtraLatency;
5681
5682 #ifdef __DML_VBA_DEBUG__
5683 dml_print("DML::%s: UrgentLatency = %f\n", __func__, UrgentLatency);
5684 dml_print("DML::%s: ExtraLatency = %f\n", __func__, ExtraLatency);
5685 dml_print("DML::%s: UrgentWatermark = %f\n", __func__, v->UrgentWatermark);
5686 #endif
5687
5688 v->DRAMClockChangeWatermark = v->DRAMClockChangeLatency + v->UrgentWatermark;
5689
5690 #ifdef __DML_VBA_DEBUG__
5691 dml_print("DML::%s: v->DRAMClockChangeLatency = %f\n", __func__, v->DRAMClockChangeLatency);
5692 dml_print("DML::%s: DRAMClockChangeWatermark = %f\n", __func__, v->DRAMClockChangeWatermark);
5693 #endif
5694
5695 v->TotalActiveWriteback = 0;
5696 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5697 if (v->WritebackEnable[k] == true) {
5698 v->TotalActiveWriteback = v->TotalActiveWriteback + 1;
5699 }
5700 }
5701
5702 if (v->TotalActiveWriteback <= 1) {
5703 v->WritebackUrgentWatermark = v->WritebackLatency;
5704 } else {
5705 v->WritebackUrgentWatermark = v->WritebackLatency + v->WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
5706 }
5707
5708 if (v->TotalActiveWriteback <= 1) {
5709 v->WritebackDRAMClockChangeWatermark = v->DRAMClockChangeLatency + v->WritebackLatency;
5710 } else {
5711 v->WritebackDRAMClockChangeWatermark = v->DRAMClockChangeLatency + v->WritebackLatency + v->WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
5712 }
5713
5714 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5715 TotalPixelBW = TotalPixelBW
5716 + DPPPerPlane[k] * (SwathWidthY[k] * BytePerPixelDETY[k] * v->VRatio[k] + SwathWidthC[k] * BytePerPixelDETC[k] * v->VRatioChroma[k])
5717 / (v->HTotal[k] / v->PixelClock[k]);
5718 }
5719
5720 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5721 double EffectiveDETBufferSizeY = DETBufferSizeY[k];
5722
5723 v->LBLatencyHidingSourceLinesY = dml_min(
5724 (double) v->MaxLineBufferLines,
5725 dml_floor(v->LineBufferSize / v->LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(v->HRatio[k], 1.0)), 1)) - (v->vtaps[k] - 1);
5726
5727 v->LBLatencyHidingSourceLinesC = dml_min(
5728 (double) v->MaxLineBufferLines,
5729 dml_floor(v->LineBufferSize / v->LBBitPerPixel[k] / (SwathWidthC[k] / dml_max(v->HRatioChroma[k], 1.0)), 1)) - (v->VTAPsChroma[k] - 1);
5730
5731 EffectiveLBLatencyHidingY = v->LBLatencyHidingSourceLinesY / v->VRatio[k] * (v->HTotal[k] / v->PixelClock[k]);
5732
5733 EffectiveLBLatencyHidingC = v->LBLatencyHidingSourceLinesC / v->VRatioChroma[k] * (v->HTotal[k] / v->PixelClock[k]);
5734
5735 if (UnboundedRequestEnabled) {
5736 EffectiveDETBufferSizeY = EffectiveDETBufferSizeY
5737 + CompressedBufferSizeInkByte * 1024 * SwathWidthY[k] * BytePerPixelDETY[k] * v->VRatio[k] / (v->HTotal[k] / v->PixelClock[k]) / TotalPixelBW;
5738 }
5739
5740 LinesInDETY[k] = (double) EffectiveDETBufferSizeY / BytePerPixelDETY[k] / SwathWidthY[k];
5741 LinesInDETYRoundedDownToSwath[k] = dml_floor(LinesInDETY[k], SwathHeightY[k]);
5742 FullDETBufferingTimeY = LinesInDETYRoundedDownToSwath[k] * (v->HTotal[k] / v->PixelClock[k]) / v->VRatio[k];
5743 if (BytePerPixelDETC[k] > 0) {
5744 LinesInDETC = v->DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k];
5745 LinesInDETCRoundedDownToSwath = dml_floor(LinesInDETC, SwathHeightC[k]);
5746 FullDETBufferingTimeC = LinesInDETCRoundedDownToSwath * (v->HTotal[k] / v->PixelClock[k]) / v->VRatioChroma[k];
5747 } else {
5748 LinesInDETC = 0;
5749 FullDETBufferingTimeC = 999999;
5750 }
5751
5752 ActiveDRAMClockChangeLatencyMarginY = EffectiveLBLatencyHidingY + FullDETBufferingTimeY
5753 - ((double) v->DSTXAfterScaler[k] / v->HTotal[k] + v->DSTYAfterScaler[k]) * v->HTotal[k] / v->PixelClock[k] - v->UrgentWatermark - v->DRAMClockChangeWatermark;
5754
5755 if (v->NumberOfActivePlanes > 1) {
5756 ActiveDRAMClockChangeLatencyMarginY = ActiveDRAMClockChangeLatencyMarginY
5757 - (1 - 1.0 / v->NumberOfActivePlanes) * SwathHeightY[k] * v->HTotal[k] / v->PixelClock[k] / v->VRatio[k];
5758 }
5759
5760 if (BytePerPixelDETC[k] > 0) {
5761 ActiveDRAMClockChangeLatencyMarginC = EffectiveLBLatencyHidingC + FullDETBufferingTimeC
5762 - ((double) v->DSTXAfterScaler[k] / v->HTotal[k] + v->DSTYAfterScaler[k]) * v->HTotal[k] / v->PixelClock[k] - v->UrgentWatermark - v->DRAMClockChangeWatermark;
5763
5764 if (v->NumberOfActivePlanes > 1) {
5765 ActiveDRAMClockChangeLatencyMarginC = ActiveDRAMClockChangeLatencyMarginC
5766 - (1 - 1.0 / v->NumberOfActivePlanes) * SwathHeightC[k] * v->HTotal[k] / v->PixelClock[k] / v->VRatioChroma[k];
5767 }
5768 v->ActiveDRAMClockChangeLatencyMargin[k] = dml_min(ActiveDRAMClockChangeLatencyMarginY, ActiveDRAMClockChangeLatencyMarginC);
5769 } else {
5770 v->ActiveDRAMClockChangeLatencyMargin[k] = ActiveDRAMClockChangeLatencyMarginY;
5771 }
5772
5773 if (v->WritebackEnable[k] == true) {
5774 WritebackDRAMClockChangeLatencyHiding = v->WritebackInterfaceBufferSize * 1024
5775 / (v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k] / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 4);
5776 if (v->WritebackPixelFormat[k] == dm_444_64) {
5777 WritebackDRAMClockChangeLatencyHiding = WritebackDRAMClockChangeLatencyHiding / 2;
5778 }
5779 WritebackDRAMClockChangeLatencyMargin = WritebackDRAMClockChangeLatencyHiding - v->WritebackDRAMClockChangeWatermark;
5780 v->ActiveDRAMClockChangeLatencyMargin[k] = dml_min(v->ActiveDRAMClockChangeLatencyMargin[k], WritebackDRAMClockChangeLatencyMargin);
5781 }
5782 }
5783
5784 v->MinActiveDRAMClockChangeMargin = 999999;
5785 PlaneWithMinActiveDRAMClockChangeMargin = 0;
5786 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5787 if (v->ActiveDRAMClockChangeLatencyMargin[k] < v->MinActiveDRAMClockChangeMargin) {
5788 v->MinActiveDRAMClockChangeMargin = v->ActiveDRAMClockChangeLatencyMargin[k];
5789 if (v->BlendingAndTiming[k] == k) {
5790 PlaneWithMinActiveDRAMClockChangeMargin = k;
5791 } else {
5792 for (j = 0; j < v->NumberOfActivePlanes; ++j) {
5793 if (v->BlendingAndTiming[k] == j) {
5794 PlaneWithMinActiveDRAMClockChangeMargin = j;
5795 }
5796 }
5797 }
5798 }
5799 }
5800
5801 v->MinActiveDRAMClockChangeLatencySupported = v->MinActiveDRAMClockChangeMargin + v->DRAMClockChangeLatency ;
5802
5803 SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = 999999;
5804 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5805 if (!((k == PlaneWithMinActiveDRAMClockChangeMargin) && (v->BlendingAndTiming[k] == k)) && !(v->BlendingAndTiming[k] == PlaneWithMinActiveDRAMClockChangeMargin)
5806 && v->ActiveDRAMClockChangeLatencyMargin[k] < SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank) {
5807 SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = v->ActiveDRAMClockChangeLatencyMargin[k];
5808 }
5809 }
5810
5811 v->TotalNumberOfActiveOTG = 0;
5812
5813 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5814 if (v->BlendingAndTiming[k] == k) {
5815 v->TotalNumberOfActiveOTG = v->TotalNumberOfActiveOTG + 1;
5816 }
5817 }
5818
5819 if (v->MinActiveDRAMClockChangeMargin > 0 && PrefetchMode == 0) {
5820 *DRAMClockChangeSupport = dm_dram_clock_change_vactive;
5821 } else if ((v->SynchronizedVBlank == true || v->TotalNumberOfActiveOTG == 1
5822 || SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank > 0) && PrefetchMode == 0) {
5823 *DRAMClockChangeSupport = dm_dram_clock_change_vblank;
5824 } else {
5825 *DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
5826 }
5827
5828 *StutterExitWatermark = v->SRExitTime + ExtraLatency + 10 / DCFCLKDeepSleep;
5829 *StutterEnterPlusExitWatermark = (v->SREnterPlusExitTime + ExtraLatency + 10 / DCFCLKDeepSleep);
5830 *Z8StutterExitWatermark = v->SRExitZ8Time + ExtraLatency + 10 / DCFCLKDeepSleep;
5831 *Z8StutterEnterPlusExitWatermark = v->SREnterPlusExitZ8Time + ExtraLatency + 10 / DCFCLKDeepSleep;
5832
5833 #ifdef __DML_VBA_DEBUG__
5834 dml_print("DML::%s: StutterExitWatermark = %f\n", __func__, *StutterExitWatermark);
5835 dml_print("DML::%s: StutterEnterPlusExitWatermark = %f\n", __func__, *StutterEnterPlusExitWatermark);
5836 dml_print("DML::%s: Z8StutterExitWatermark = %f\n", __func__, *Z8StutterExitWatermark);
5837 dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, *Z8StutterEnterPlusExitWatermark);
5838 #endif
5839 }
5840
5841 static void CalculateDCFCLKDeepSleep(
5842 struct display_mode_lib *mode_lib,
5843 unsigned int NumberOfActivePlanes,
5844 int BytePerPixelY[],
5845 int BytePerPixelC[],
5846 double VRatio[],
5847 double VRatioChroma[],
5848 double SwathWidthY[],
5849 double SwathWidthC[],
5850 unsigned int DPPPerPlane[],
5851 double HRatio[],
5852 double HRatioChroma[],
5853 double PixelClock[],
5854 double PSCL_THROUGHPUT[],
5855 double PSCL_THROUGHPUT_CHROMA[],
5856 double DPPCLK[],
5857 double ReadBandwidthLuma[],
5858 double ReadBandwidthChroma[],
5859 int ReturnBusWidth,
5860 double *DCFCLKDeepSleep)
5861 {
5862 struct vba_vars_st *v = &mode_lib->vba;
5863 double DisplayPipeLineDeliveryTimeLuma;
5864 double DisplayPipeLineDeliveryTimeChroma;
5865 double ReadBandwidth = 0.0;
5866 int k;
5867
5868 for (k = 0; k < NumberOfActivePlanes; ++k) {
5869
5870 if (VRatio[k] <= 1) {
5871 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k];
5872 } else {
5873 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] / PSCL_THROUGHPUT[k] / DPPCLK[k];
5874 }
5875 if (BytePerPixelC[k] == 0) {
5876 DisplayPipeLineDeliveryTimeChroma = 0;
5877 } else {
5878 if (VRatioChroma[k] <= 1) {
5879 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k];
5880 } else {
5881 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k];
5882 }
5883 }
5884
5885 if (BytePerPixelC[k] > 0) {
5886 v->DCFCLKDeepSleepPerPlane[k] = dml_max(__DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] / 32.0 / DisplayPipeLineDeliveryTimeLuma,
5887 __DML_MIN_DCFCLK_FACTOR__ * SwathWidthC[k] * BytePerPixelC[k] / 32.0 / DisplayPipeLineDeliveryTimeChroma);
5888 } else {
5889 v->DCFCLKDeepSleepPerPlane[k] = __DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] / 64.0 / DisplayPipeLineDeliveryTimeLuma;
5890 }
5891 v->DCFCLKDeepSleepPerPlane[k] = dml_max(v->DCFCLKDeepSleepPerPlane[k], PixelClock[k] / 16);
5892
5893 }
5894
5895 for (k = 0; k < NumberOfActivePlanes; ++k) {
5896 ReadBandwidth = ReadBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k];
5897 }
5898
5899 *DCFCLKDeepSleep = dml_max(8.0, __DML_MIN_DCFCLK_FACTOR__ * ReadBandwidth / ReturnBusWidth);
5900
5901 for (k = 0; k < NumberOfActivePlanes; ++k) {
5902 *DCFCLKDeepSleep = dml_max(*DCFCLKDeepSleep, v->DCFCLKDeepSleepPerPlane[k]);
5903 }
5904 }
5905
5906 static void CalculateUrgentBurstFactor(
5907 int swath_width_luma_ub,
5908 int swath_width_chroma_ub,
5909 unsigned int SwathHeightY,
5910 unsigned int SwathHeightC,
5911 double LineTime,
5912 double UrgentLatency,
5913 double CursorBufferSize,
5914 unsigned int CursorWidth,
5915 unsigned int CursorBPP,
5916 double VRatio,
5917 double VRatioC,
5918 double BytePerPixelInDETY,
5919 double BytePerPixelInDETC,
5920 double DETBufferSizeY,
5921 double DETBufferSizeC,
5922 double *UrgentBurstFactorCursor,
5923 double *UrgentBurstFactorLuma,
5924 double *UrgentBurstFactorChroma,
5925 bool *NotEnoughUrgentLatencyHiding)
5926 {
5927 double LinesInDETLuma;
5928 double LinesInDETChroma;
5929 unsigned int LinesInCursorBuffer;
5930 double CursorBufferSizeInTime;
5931 double DETBufferSizeInTimeLuma;
5932 double DETBufferSizeInTimeChroma;
5933
5934 *NotEnoughUrgentLatencyHiding = 0;
5935
5936 if (CursorWidth > 0) {
5937 LinesInCursorBuffer = 1 << (unsigned int) dml_floor(dml_log2(CursorBufferSize * 1024.0 / (CursorWidth * CursorBPP / 8.0)), 1.0);
5938 if (VRatio > 0) {
5939 CursorBufferSizeInTime = LinesInCursorBuffer * LineTime / VRatio;
5940 if (CursorBufferSizeInTime - UrgentLatency <= 0) {
5941 *NotEnoughUrgentLatencyHiding = 1;
5942 *UrgentBurstFactorCursor = 0;
5943 } else {
5944 *UrgentBurstFactorCursor = CursorBufferSizeInTime / (CursorBufferSizeInTime - UrgentLatency);
5945 }
5946 } else {
5947 *UrgentBurstFactorCursor = 1;
5948 }
5949 }
5950
5951 LinesInDETLuma = DETBufferSizeY / BytePerPixelInDETY / swath_width_luma_ub;
5952 if (VRatio > 0) {
5953 DETBufferSizeInTimeLuma = dml_floor(LinesInDETLuma, SwathHeightY) * LineTime / VRatio;
5954 if (DETBufferSizeInTimeLuma - UrgentLatency <= 0) {
5955 *NotEnoughUrgentLatencyHiding = 1;
5956 *UrgentBurstFactorLuma = 0;
5957 } else {
5958 *UrgentBurstFactorLuma = DETBufferSizeInTimeLuma / (DETBufferSizeInTimeLuma - UrgentLatency);
5959 }
5960 } else {
5961 *UrgentBurstFactorLuma = 1;
5962 }
5963
5964 if (BytePerPixelInDETC > 0) {
5965 LinesInDETChroma = DETBufferSizeC / BytePerPixelInDETC / swath_width_chroma_ub;
5966 if (VRatio > 0) {
5967 DETBufferSizeInTimeChroma = dml_floor(LinesInDETChroma, SwathHeightC) * LineTime / VRatio;
5968 if (DETBufferSizeInTimeChroma - UrgentLatency <= 0) {
5969 *NotEnoughUrgentLatencyHiding = 1;
5970 *UrgentBurstFactorChroma = 0;
5971 } else {
5972 *UrgentBurstFactorChroma = DETBufferSizeInTimeChroma / (DETBufferSizeInTimeChroma - UrgentLatency);
5973 }
5974 } else {
5975 *UrgentBurstFactorChroma = 1;
5976 }
5977 }
5978 }
5979
5980 static void CalculatePixelDeliveryTimes(
5981 unsigned int NumberOfActivePlanes,
5982 double VRatio[],
5983 double VRatioChroma[],
5984 double VRatioPrefetchY[],
5985 double VRatioPrefetchC[],
5986 unsigned int swath_width_luma_ub[],
5987 unsigned int swath_width_chroma_ub[],
5988 unsigned int DPPPerPlane[],
5989 double HRatio[],
5990 double HRatioChroma[],
5991 double PixelClock[],
5992 double PSCL_THROUGHPUT[],
5993 double PSCL_THROUGHPUT_CHROMA[],
5994 double DPPCLK[],
5995 int BytePerPixelC[],
5996 enum scan_direction_class SourceScan[],
5997 unsigned int NumberOfCursors[],
5998 unsigned int CursorWidth[][DC__NUM_CURSOR__MAX],
5999 unsigned int CursorBPP[][DC__NUM_CURSOR__MAX],
6000 unsigned int BlockWidth256BytesY[],
6001 unsigned int BlockHeight256BytesY[],
6002 unsigned int BlockWidth256BytesC[],
6003 unsigned int BlockHeight256BytesC[],
6004 double DisplayPipeLineDeliveryTimeLuma[],
6005 double DisplayPipeLineDeliveryTimeChroma[],
6006 double DisplayPipeLineDeliveryTimeLumaPrefetch[],
6007 double DisplayPipeLineDeliveryTimeChromaPrefetch[],
6008 double DisplayPipeRequestDeliveryTimeLuma[],
6009 double DisplayPipeRequestDeliveryTimeChroma[],
6010 double DisplayPipeRequestDeliveryTimeLumaPrefetch[],
6011 double DisplayPipeRequestDeliveryTimeChromaPrefetch[],
6012 double CursorRequestDeliveryTime[],
6013 double CursorRequestDeliveryTimePrefetch[])
6014 {
6015 double req_per_swath_ub;
6016 int k;
6017
6018 for (k = 0; k < NumberOfActivePlanes; ++k) {
6019 if (VRatio[k] <= 1) {
6020 DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k];
6021 } else {
6022 DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / DPPCLK[k];
6023 }
6024
6025 if (BytePerPixelC[k] == 0) {
6026 DisplayPipeLineDeliveryTimeChroma[k] = 0;
6027 } else {
6028 if (VRatioChroma[k] <= 1) {
6029 DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k];
6030 } else {
6031 DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k];
6032 }
6033 }
6034
6035 if (VRatioPrefetchY[k] <= 1) {
6036 DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k];
6037 } else {
6038 DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / DPPCLK[k];
6039 }
6040
6041 if (BytePerPixelC[k] == 0) {
6042 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 0;
6043 } else {
6044 if (VRatioPrefetchC[k] <= 1) {
6045 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k];
6046 } else {
6047 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k];
6048 }
6049 }
6050 }
6051
6052 for (k = 0; k < NumberOfActivePlanes; ++k) {
6053 if (SourceScan[k] != dm_vert) {
6054 req_per_swath_ub = swath_width_luma_ub[k] / BlockWidth256BytesY[k];
6055 } else {
6056 req_per_swath_ub = swath_width_luma_ub[k] / BlockHeight256BytesY[k];
6057 }
6058 DisplayPipeRequestDeliveryTimeLuma[k] = DisplayPipeLineDeliveryTimeLuma[k] / req_per_swath_ub;
6059 DisplayPipeRequestDeliveryTimeLumaPrefetch[k] = DisplayPipeLineDeliveryTimeLumaPrefetch[k] / req_per_swath_ub;
6060 if (BytePerPixelC[k] == 0) {
6061 DisplayPipeRequestDeliveryTimeChroma[k] = 0;
6062 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = 0;
6063 } else {
6064 if (SourceScan[k] != dm_vert) {
6065 req_per_swath_ub = swath_width_chroma_ub[k] / BlockWidth256BytesC[k];
6066 } else {
6067 req_per_swath_ub = swath_width_chroma_ub[k] / BlockHeight256BytesC[k];
6068 }
6069 DisplayPipeRequestDeliveryTimeChroma[k] = DisplayPipeLineDeliveryTimeChroma[k] / req_per_swath_ub;
6070 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = DisplayPipeLineDeliveryTimeChromaPrefetch[k] / req_per_swath_ub;
6071 }
6072 #ifdef __DML_VBA_DEBUG__
6073 dml_print("DML::%s: k=%d : HRatio = %f\n", __func__, k, HRatio[k]);
6074 dml_print("DML::%s: k=%d : VRatio = %f\n", __func__, k, VRatio[k]);
6075 dml_print("DML::%s: k=%d : HRatioChroma = %f\n", __func__, k, HRatioChroma[k]);
6076 dml_print("DML::%s: k=%d : VRatioChroma = %f\n", __func__, k, VRatioChroma[k]);
6077 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLuma[k]);
6078 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLumaPrefetch[k]);
6079 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChroma[k]);
6080 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChromaPrefetch[k]);
6081 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLuma[k]);
6082 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLumaPrefetch[k]);
6083 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChroma[k]);
6084 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChromaPrefetch[k]);
6085 #endif
6086 }
6087
6088 for (k = 0; k < NumberOfActivePlanes; ++k) {
6089 int cursor_req_per_width;
6090
6091 cursor_req_per_width = dml_ceil(CursorWidth[k][0] * CursorBPP[k][0] / 256 / 8, 1);
6092 if (NumberOfCursors[k] > 0) {
6093 if (VRatio[k] <= 1) {
6094 CursorRequestDeliveryTime[k] = CursorWidth[k][0] / HRatio[k] / PixelClock[k] / cursor_req_per_width;
6095 } else {
6096 CursorRequestDeliveryTime[k] = CursorWidth[k][0] / PSCL_THROUGHPUT[k] / DPPCLK[k] / cursor_req_per_width;
6097 }
6098 if (VRatioPrefetchY[k] <= 1) {
6099 CursorRequestDeliveryTimePrefetch[k] = CursorWidth[k][0] / HRatio[k] / PixelClock[k] / cursor_req_per_width;
6100 } else {
6101 CursorRequestDeliveryTimePrefetch[k] = CursorWidth[k][0] / PSCL_THROUGHPUT[k] / DPPCLK[k] / cursor_req_per_width;
6102 }
6103 } else {
6104 CursorRequestDeliveryTime[k] = 0;
6105 CursorRequestDeliveryTimePrefetch[k] = 0;
6106 }
6107 #ifdef __DML_VBA_DEBUG__
6108 dml_print("DML::%s: k=%d : NumberOfCursors = %d\n", __func__, k, NumberOfCursors[k]);
6109 dml_print("DML::%s: k=%d : CursorRequestDeliveryTime = %f\n", __func__, k, CursorRequestDeliveryTime[k]);
6110 dml_print("DML::%s: k=%d : CursorRequestDeliveryTimePrefetch = %f\n", __func__, k, CursorRequestDeliveryTimePrefetch[k]);
6111 #endif
6112 }
6113 }
6114
6115 static void CalculateMetaAndPTETimes(
6116 int NumberOfActivePlanes,
6117 bool GPUVMEnable,
6118 int MetaChunkSize,
6119 int MinMetaChunkSizeBytes,
6120 int HTotal[],
6121 double VRatio[],
6122 double VRatioChroma[],
6123 double DestinationLinesToRequestRowInVBlank[],
6124 double DestinationLinesToRequestRowInImmediateFlip[],
6125 bool DCCEnable[],
6126 double PixelClock[],
6127 int BytePerPixelY[],
6128 int BytePerPixelC[],
6129 enum scan_direction_class SourceScan[],
6130 int dpte_row_height[],
6131 int dpte_row_height_chroma[],
6132 int meta_row_width[],
6133 int meta_row_width_chroma[],
6134 int meta_row_height[],
6135 int meta_row_height_chroma[],
6136 int meta_req_width[],
6137 int meta_req_width_chroma[],
6138 int meta_req_height[],
6139 int meta_req_height_chroma[],
6140 int dpte_group_bytes[],
6141 int PTERequestSizeY[],
6142 int PTERequestSizeC[],
6143 int PixelPTEReqWidthY[],
6144 int PixelPTEReqHeightY[],
6145 int PixelPTEReqWidthC[],
6146 int PixelPTEReqHeightC[],
6147 int dpte_row_width_luma_ub[],
6148 int dpte_row_width_chroma_ub[],
6149 double DST_Y_PER_PTE_ROW_NOM_L[],
6150 double DST_Y_PER_PTE_ROW_NOM_C[],
6151 double DST_Y_PER_META_ROW_NOM_L[],
6152 double DST_Y_PER_META_ROW_NOM_C[],
6153 double TimePerMetaChunkNominal[],
6154 double TimePerChromaMetaChunkNominal[],
6155 double TimePerMetaChunkVBlank[],
6156 double TimePerChromaMetaChunkVBlank[],
6157 double TimePerMetaChunkFlip[],
6158 double TimePerChromaMetaChunkFlip[],
6159 double time_per_pte_group_nom_luma[],
6160 double time_per_pte_group_vblank_luma[],
6161 double time_per_pte_group_flip_luma[],
6162 double time_per_pte_group_nom_chroma[],
6163 double time_per_pte_group_vblank_chroma[],
6164 double time_per_pte_group_flip_chroma[])
6165 {
6166 unsigned int meta_chunk_width;
6167 unsigned int min_meta_chunk_width;
6168 unsigned int meta_chunk_per_row_int;
6169 unsigned int meta_row_remainder;
6170 unsigned int meta_chunk_threshold;
6171 unsigned int meta_chunks_per_row_ub;
6172 unsigned int meta_chunk_width_chroma;
6173 unsigned int min_meta_chunk_width_chroma;
6174 unsigned int meta_chunk_per_row_int_chroma;
6175 unsigned int meta_row_remainder_chroma;
6176 unsigned int meta_chunk_threshold_chroma;
6177 unsigned int meta_chunks_per_row_ub_chroma;
6178 unsigned int dpte_group_width_luma;
6179 unsigned int dpte_groups_per_row_luma_ub;
6180 unsigned int dpte_group_width_chroma;
6181 unsigned int dpte_groups_per_row_chroma_ub;
6182 int k;
6183
6184 for (k = 0; k < NumberOfActivePlanes; ++k) {
6185 DST_Y_PER_PTE_ROW_NOM_L[k] = dpte_row_height[k] / VRatio[k];
6186 if (BytePerPixelC[k] == 0) {
6187 DST_Y_PER_PTE_ROW_NOM_C[k] = 0;
6188 } else {
6189 DST_Y_PER_PTE_ROW_NOM_C[k] = dpte_row_height_chroma[k] / VRatioChroma[k];
6190 }
6191 DST_Y_PER_META_ROW_NOM_L[k] = meta_row_height[k] / VRatio[k];
6192 if (BytePerPixelC[k] == 0) {
6193 DST_Y_PER_META_ROW_NOM_C[k] = 0;
6194 } else {
6195 DST_Y_PER_META_ROW_NOM_C[k] = meta_row_height_chroma[k] / VRatioChroma[k];
6196 }
6197 }
6198
6199 for (k = 0; k < NumberOfActivePlanes; ++k) {
6200 if (DCCEnable[k] == true) {
6201 meta_chunk_width = MetaChunkSize * 1024 * 256 / BytePerPixelY[k] / meta_row_height[k];
6202 min_meta_chunk_width = MinMetaChunkSizeBytes * 256 / BytePerPixelY[k] / meta_row_height[k];
6203 meta_chunk_per_row_int = meta_row_width[k] / meta_chunk_width;
6204 meta_row_remainder = meta_row_width[k] % meta_chunk_width;
6205 if (SourceScan[k] != dm_vert) {
6206 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_width[k];
6207 } else {
6208 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_height[k];
6209 }
6210 if (meta_row_remainder <= meta_chunk_threshold) {
6211 meta_chunks_per_row_ub = meta_chunk_per_row_int + 1;
6212 } else {
6213 meta_chunks_per_row_ub = meta_chunk_per_row_int + 2;
6214 }
6215 TimePerMetaChunkNominal[k] = meta_row_height[k] / VRatio[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
6216 TimePerMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
6217 TimePerMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
6218 if (BytePerPixelC[k] == 0) {
6219 TimePerChromaMetaChunkNominal[k] = 0;
6220 TimePerChromaMetaChunkVBlank[k] = 0;
6221 TimePerChromaMetaChunkFlip[k] = 0;
6222 } else {
6223 meta_chunk_width_chroma = MetaChunkSize * 1024 * 256 / BytePerPixelC[k] / meta_row_height_chroma[k];
6224 min_meta_chunk_width_chroma = MinMetaChunkSizeBytes * 256 / BytePerPixelC[k] / meta_row_height_chroma[k];
6225 meta_chunk_per_row_int_chroma = (double) meta_row_width_chroma[k] / meta_chunk_width_chroma;
6226 meta_row_remainder_chroma = meta_row_width_chroma[k] % meta_chunk_width_chroma;
6227 if (SourceScan[k] != dm_vert) {
6228 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - meta_req_width_chroma[k];
6229 } else {
6230 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - meta_req_height_chroma[k];
6231 }
6232 if (meta_row_remainder_chroma <= meta_chunk_threshold_chroma) {
6233 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 1;
6234 } else {
6235 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 2;
6236 }
6237 TimePerChromaMetaChunkNominal[k] = meta_row_height_chroma[k] / VRatioChroma[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
6238 TimePerChromaMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
6239 TimePerChromaMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
6240 }
6241 } else {
6242 TimePerMetaChunkNominal[k] = 0;
6243 TimePerMetaChunkVBlank[k] = 0;
6244 TimePerMetaChunkFlip[k] = 0;
6245 TimePerChromaMetaChunkNominal[k] = 0;
6246 TimePerChromaMetaChunkVBlank[k] = 0;
6247 TimePerChromaMetaChunkFlip[k] = 0;
6248 }
6249 }
6250
6251 for (k = 0; k < NumberOfActivePlanes; ++k) {
6252 if (GPUVMEnable == true) {
6253 if (SourceScan[k] != dm_vert) {
6254 dpte_group_width_luma = dpte_group_bytes[k] / PTERequestSizeY[k] * PixelPTEReqWidthY[k];
6255 } else {
6256 dpte_group_width_luma = dpte_group_bytes[k] / PTERequestSizeY[k] * PixelPTEReqHeightY[k];
6257 }
6258 dpte_groups_per_row_luma_ub = dml_ceil(1.0 * dpte_row_width_luma_ub[k] / dpte_group_width_luma, 1);
6259 time_per_pte_group_nom_luma[k] = DST_Y_PER_PTE_ROW_NOM_L[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
6260 time_per_pte_group_vblank_luma[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
6261 time_per_pte_group_flip_luma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
6262 if (BytePerPixelC[k] == 0) {
6263 time_per_pte_group_nom_chroma[k] = 0;
6264 time_per_pte_group_vblank_chroma[k] = 0;
6265 time_per_pte_group_flip_chroma[k] = 0;
6266 } else {
6267 if (SourceScan[k] != dm_vert) {
6268 dpte_group_width_chroma = dpte_group_bytes[k] / PTERequestSizeC[k] * PixelPTEReqWidthC[k];
6269 } else {
6270 dpte_group_width_chroma = dpte_group_bytes[k] / PTERequestSizeC[k] * PixelPTEReqHeightC[k];
6271 }
6272 dpte_groups_per_row_chroma_ub = dml_ceil(1.0 * dpte_row_width_chroma_ub[k] / dpte_group_width_chroma, 1);
6273 time_per_pte_group_nom_chroma[k] = DST_Y_PER_PTE_ROW_NOM_C[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
6274 time_per_pte_group_vblank_chroma[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
6275 time_per_pte_group_flip_chroma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
6276 }
6277 } else {
6278 time_per_pte_group_nom_luma[k] = 0;
6279 time_per_pte_group_vblank_luma[k] = 0;
6280 time_per_pte_group_flip_luma[k] = 0;
6281 time_per_pte_group_nom_chroma[k] = 0;
6282 time_per_pte_group_vblank_chroma[k] = 0;
6283 time_per_pte_group_flip_chroma[k] = 0;
6284 }
6285 }
6286 }
6287
6288 static void CalculateVMGroupAndRequestTimes(
6289 unsigned int NumberOfActivePlanes,
6290 bool GPUVMEnable,
6291 unsigned int GPUVMMaxPageTableLevels,
6292 unsigned int HTotal[],
6293 int BytePerPixelC[],
6294 double DestinationLinesToRequestVMInVBlank[],
6295 double DestinationLinesToRequestVMInImmediateFlip[],
6296 bool DCCEnable[],
6297 double PixelClock[],
6298 int dpte_row_width_luma_ub[],
6299 int dpte_row_width_chroma_ub[],
6300 int vm_group_bytes[],
6301 unsigned int dpde0_bytes_per_frame_ub_l[],
6302 unsigned int dpde0_bytes_per_frame_ub_c[],
6303 int meta_pte_bytes_per_frame_ub_l[],
6304 int meta_pte_bytes_per_frame_ub_c[],
6305 double TimePerVMGroupVBlank[],
6306 double TimePerVMGroupFlip[],
6307 double TimePerVMRequestVBlank[],
6308 double TimePerVMRequestFlip[])
6309 {
6310 int num_group_per_lower_vm_stage;
6311 int num_req_per_lower_vm_stage;
6312 int k;
6313
6314 for (k = 0; k < NumberOfActivePlanes; ++k) {
6315 if (GPUVMEnable == true && (DCCEnable[k] == true || GPUVMMaxPageTableLevels > 1)) {
6316 if (DCCEnable[k] == false) {
6317 if (BytePerPixelC[k] > 0) {
6318 num_group_per_lower_vm_stage = dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6319 + dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1);
6320 } else {
6321 num_group_per_lower_vm_stage = dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1);
6322 }
6323 } else {
6324 if (GPUVMMaxPageTableLevels == 1) {
6325 if (BytePerPixelC[k] > 0) {
6326 num_group_per_lower_vm_stage = dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6327 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1);
6328 } else {
6329 num_group_per_lower_vm_stage = dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1);
6330 }
6331 } else {
6332 if (BytePerPixelC[k] > 0) {
6333 num_group_per_lower_vm_stage = 2 + dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6334 + dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1)
6335 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6336 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1);
6337 } else {
6338 num_group_per_lower_vm_stage = 1 + dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6339 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1);
6340 }
6341 }
6342 }
6343
6344 if (DCCEnable[k] == false) {
6345 if (BytePerPixelC[k] > 0) {
6346 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + dpde0_bytes_per_frame_ub_c[k] / 64;
6347 } else {
6348 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64;
6349 }
6350 } else {
6351 if (GPUVMMaxPageTableLevels == 1) {
6352 if (BytePerPixelC[k] > 0) {
6353 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_c[k] / 64;
6354 } else {
6355 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64;
6356 }
6357 } else {
6358 if (BytePerPixelC[k] > 0) {
6359 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + dpde0_bytes_per_frame_ub_c[k] / 64
6360 + meta_pte_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_c[k] / 64;
6361 } else {
6362 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_l[k] / 64;
6363 }
6364 }
6365 }
6366
6367 TimePerVMGroupVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage;
6368 TimePerVMGroupFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage;
6369 TimePerVMRequestVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage;
6370 TimePerVMRequestFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage;
6371
6372 if (GPUVMMaxPageTableLevels > 2) {
6373 TimePerVMGroupVBlank[k] = TimePerVMGroupVBlank[k] / 2;
6374 TimePerVMGroupFlip[k] = TimePerVMGroupFlip[k] / 2;
6375 TimePerVMRequestVBlank[k] = TimePerVMRequestVBlank[k] / 2;
6376 TimePerVMRequestFlip[k] = TimePerVMRequestFlip[k] / 2;
6377 }
6378
6379 } else {
6380 TimePerVMGroupVBlank[k] = 0;
6381 TimePerVMGroupFlip[k] = 0;
6382 TimePerVMRequestVBlank[k] = 0;
6383 TimePerVMRequestFlip[k] = 0;
6384 }
6385 }
6386 }
6387
6388 static void CalculateStutterEfficiency(
6389 struct display_mode_lib *mode_lib,
6390 int CompressedBufferSizeInkByte,
6391 bool UnboundedRequestEnabled,
6392 int ConfigReturnBufferSizeInKByte,
6393 int MetaFIFOSizeInKEntries,
6394 int ZeroSizeBufferEntries,
6395 int NumberOfActivePlanes,
6396 int ROBBufferSizeInKByte,
6397 double TotalDataReadBandwidth,
6398 double DCFCLK,
6399 double ReturnBW,
6400 double COMPBUF_RESERVED_SPACE_64B,
6401 double COMPBUF_RESERVED_SPACE_ZS,
6402 double SRExitTime,
6403 double SRExitZ8Time,
6404 bool SynchronizedVBlank,
6405 double Z8StutterEnterPlusExitWatermark,
6406 double StutterEnterPlusExitWatermark,
6407 bool ProgressiveToInterlaceUnitInOPP,
6408 bool Interlace[],
6409 double MinTTUVBlank[],
6410 int DPPPerPlane[],
6411 unsigned int DETBufferSizeY[],
6412 int BytePerPixelY[],
6413 double BytePerPixelDETY[],
6414 double SwathWidthY[],
6415 int SwathHeightY[],
6416 int SwathHeightC[],
6417 double NetDCCRateLuma[],
6418 double NetDCCRateChroma[],
6419 double DCCFractionOfZeroSizeRequestsLuma[],
6420 double DCCFractionOfZeroSizeRequestsChroma[],
6421 int HTotal[],
6422 int VTotal[],
6423 double PixelClock[],
6424 double VRatio[],
6425 enum scan_direction_class SourceScan[],
6426 int BlockHeight256BytesY[],
6427 int BlockWidth256BytesY[],
6428 int BlockHeight256BytesC[],
6429 int BlockWidth256BytesC[],
6430 int DCCYMaxUncompressedBlock[],
6431 int DCCCMaxUncompressedBlock[],
6432 int VActive[],
6433 bool DCCEnable[],
6434 bool WritebackEnable[],
6435 double ReadBandwidthPlaneLuma[],
6436 double ReadBandwidthPlaneChroma[],
6437 double meta_row_bw[],
6438 double dpte_row_bw[],
6439 double *StutterEfficiencyNotIncludingVBlank,
6440 double *StutterEfficiency,
6441 int *NumberOfStutterBurstsPerFrame,
6442 double *Z8StutterEfficiencyNotIncludingVBlank,
6443 double *Z8StutterEfficiency,
6444 int *Z8NumberOfStutterBurstsPerFrame,
6445 double *StutterPeriod)
6446 {
6447 struct vba_vars_st *v = &mode_lib->vba;
6448
6449 double DETBufferingTimeY;
6450 double SwathWidthYCriticalPlane = 0;
6451 double VActiveTimeCriticalPlane = 0;
6452 double FrameTimeCriticalPlane = 0;
6453 int BytePerPixelYCriticalPlane = 0;
6454 double LinesToFinishSwathTransferStutterCriticalPlane = 0;
6455 double MinTTUVBlankCriticalPlane = 0;
6456 double TotalCompressedReadBandwidth;
6457 double TotalRowReadBandwidth;
6458 double AverageDCCCompressionRate;
6459 double EffectiveCompressedBufferSize;
6460 double PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer;
6461 double StutterBurstTime;
6462 int TotalActiveWriteback;
6463 double LinesInDETY;
6464 double LinesInDETYRoundedDownToSwath;
6465 double MaximumEffectiveCompressionLuma;
6466 double MaximumEffectiveCompressionChroma;
6467 double TotalZeroSizeRequestReadBandwidth;
6468 double TotalZeroSizeCompressedReadBandwidth;
6469 double AverageDCCZeroSizeFraction;
6470 double AverageZeroSizeCompressionRate;
6471 int TotalNumberOfActiveOTG = 0;
6472 double LastStutterPeriod = 0.0;
6473 double LastZ8StutterPeriod = 0.0;
6474 int k;
6475
6476 TotalZeroSizeRequestReadBandwidth = 0;
6477 TotalZeroSizeCompressedReadBandwidth = 0;
6478 TotalRowReadBandwidth = 0;
6479 TotalCompressedReadBandwidth = 0;
6480
6481 for (k = 0; k < NumberOfActivePlanes; ++k) {
6482 if (DCCEnable[k] == true) {
6483 if ((SourceScan[k] == dm_vert && BlockWidth256BytesY[k] > SwathHeightY[k]) || (SourceScan[k] != dm_vert && BlockHeight256BytesY[k] > SwathHeightY[k])
6484 || DCCYMaxUncompressedBlock[k] < 256) {
6485 MaximumEffectiveCompressionLuma = 2;
6486 } else {
6487 MaximumEffectiveCompressionLuma = 4;
6488 }
6489 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth + ReadBandwidthPlaneLuma[k] / dml_min(NetDCCRateLuma[k], MaximumEffectiveCompressionLuma);
6490 TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth + ReadBandwidthPlaneLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k];
6491 TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth
6492 + ReadBandwidthPlaneLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k] / MaximumEffectiveCompressionLuma;
6493 if (ReadBandwidthPlaneChroma[k] > 0) {
6494 if ((SourceScan[k] == dm_vert && BlockWidth256BytesC[k] > SwathHeightC[k])
6495 || (SourceScan[k] != dm_vert && BlockHeight256BytesC[k] > SwathHeightC[k]) || DCCCMaxUncompressedBlock[k] < 256) {
6496 MaximumEffectiveCompressionChroma = 2;
6497 } else {
6498 MaximumEffectiveCompressionChroma = 4;
6499 }
6500 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth
6501 + ReadBandwidthPlaneChroma[k] / dml_min(NetDCCRateChroma[k], MaximumEffectiveCompressionChroma);
6502 TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth + ReadBandwidthPlaneChroma[k] * DCCFractionOfZeroSizeRequestsChroma[k];
6503 TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth
6504 + ReadBandwidthPlaneChroma[k] * DCCFractionOfZeroSizeRequestsChroma[k] / MaximumEffectiveCompressionChroma;
6505 }
6506 } else {
6507 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth + ReadBandwidthPlaneLuma[k] + ReadBandwidthPlaneChroma[k];
6508 }
6509 TotalRowReadBandwidth = TotalRowReadBandwidth + DPPPerPlane[k] * (meta_row_bw[k] + dpte_row_bw[k]);
6510 }
6511
6512 AverageDCCCompressionRate = TotalDataReadBandwidth / TotalCompressedReadBandwidth;
6513 AverageDCCZeroSizeFraction = TotalZeroSizeRequestReadBandwidth / TotalDataReadBandwidth;
6514
6515 #ifdef __DML_VBA_DEBUG__
6516 dml_print("DML::%s: TotalCompressedReadBandwidth = %f\n", __func__, TotalCompressedReadBandwidth);
6517 dml_print("DML::%s: TotalZeroSizeRequestReadBandwidth = %f\n", __func__, TotalZeroSizeRequestReadBandwidth);
6518 dml_print("DML::%s: TotalZeroSizeCompressedReadBandwidth = %f\n", __func__, TotalZeroSizeCompressedReadBandwidth);
6519 dml_print("DML::%s: MaximumEffectiveCompressionLuma = %f\n", __func__, MaximumEffectiveCompressionLuma);
6520 dml_print("DML::%s: MaximumEffectiveCompressionChroma = %f\n", __func__, MaximumEffectiveCompressionChroma);
6521 dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate);
6522 dml_print("DML::%s: AverageDCCZeroSizeFraction = %f\n", __func__, AverageDCCZeroSizeFraction);
6523 dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, CompressedBufferSizeInkByte);
6524 #endif
6525
6526 if (AverageDCCZeroSizeFraction == 1) {
6527 AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth / TotalZeroSizeCompressedReadBandwidth;
6528 EffectiveCompressedBufferSize = MetaFIFOSizeInKEntries * 1024 * 64 * AverageZeroSizeCompressionRate + (ZeroSizeBufferEntries - COMPBUF_RESERVED_SPACE_ZS) * 64 * AverageZeroSizeCompressionRate;
6529 } else if (AverageDCCZeroSizeFraction > 0) {
6530 AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth / TotalZeroSizeCompressedReadBandwidth;
6531 EffectiveCompressedBufferSize = dml_min(
6532 CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate,
6533 MetaFIFOSizeInKEntries * 1024 * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate + 1 / AverageDCCCompressionRate))
6534 + dml_min((ROBBufferSizeInKByte * 1024 - COMPBUF_RESERVED_SPACE_64B * 64) * AverageDCCCompressionRate,
6535 (ZeroSizeBufferEntries - COMPBUF_RESERVED_SPACE_ZS) * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate));
6536 dml_print("DML::%s: min 1 = %f\n", __func__, CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate);
6537 dml_print(
6538 "DML::%s: min 2 = %f\n",
6539 __func__,
6540 MetaFIFOSizeInKEntries * 1024 * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate + 1 / AverageDCCCompressionRate));
6541 dml_print("DML::%s: min 3 = %f\n", __func__, ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate);
6542 dml_print("DML::%s: min 4 = %f\n", __func__, ZeroSizeBufferEntries * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate));
6543 } else {
6544 EffectiveCompressedBufferSize = dml_min(
6545 CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate,
6546 MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate) + (ROBBufferSizeInKByte * 1024 - COMPBUF_RESERVED_SPACE_64B * 64) * AverageDCCCompressionRate;
6547 dml_print("DML::%s: min 1 = %f\n", __func__, CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate);
6548 dml_print("DML::%s: min 2 = %f\n", __func__, MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate);
6549 }
6550
6551 #ifdef __DML_VBA_DEBUG__
6552 dml_print("DML::%s: MetaFIFOSizeInKEntries = %d\n", __func__, MetaFIFOSizeInKEntries);
6553 dml_print("DML::%s: AverageZeroSizeCompressionRate = %f\n", __func__, AverageZeroSizeCompressionRate);
6554 dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize);
6555 #endif
6556
6557 *StutterPeriod = 0;
6558 for (k = 0; k < NumberOfActivePlanes; ++k) {
6559 LinesInDETY = (DETBufferSizeY[k] + (UnboundedRequestEnabled == true ? EffectiveCompressedBufferSize : 0) * ReadBandwidthPlaneLuma[k] / TotalDataReadBandwidth)
6560 / BytePerPixelDETY[k] / SwathWidthY[k];
6561 LinesInDETYRoundedDownToSwath = dml_floor(LinesInDETY, SwathHeightY[k]);
6562 DETBufferingTimeY = LinesInDETYRoundedDownToSwath * (HTotal[k] / PixelClock[k]) / VRatio[k];
6563 #ifdef __DML_VBA_DEBUG__
6564 dml_print("DML::%s: k=%0d DETBufferSizeY = %f\n", __func__, k, DETBufferSizeY[k]);
6565 dml_print("DML::%s: k=%0d BytePerPixelDETY = %f\n", __func__, k, BytePerPixelDETY[k]);
6566 dml_print("DML::%s: k=%0d SwathWidthY = %f\n", __func__, k, SwathWidthY[k]);
6567 dml_print("DML::%s: k=%0d ReadBandwidthPlaneLuma = %f\n", __func__, k, ReadBandwidthPlaneLuma[k]);
6568 dml_print("DML::%s: k=%0d TotalDataReadBandwidth = %f\n", __func__, k, TotalDataReadBandwidth);
6569 dml_print("DML::%s: k=%0d LinesInDETY = %f\n", __func__, k, LinesInDETY);
6570 dml_print("DML::%s: k=%0d LinesInDETYRoundedDownToSwath = %f\n", __func__, k, LinesInDETYRoundedDownToSwath);
6571 dml_print("DML::%s: k=%0d HTotal = %d\n", __func__, k, HTotal[k]);
6572 dml_print("DML::%s: k=%0d PixelClock = %f\n", __func__, k, PixelClock[k]);
6573 dml_print("DML::%s: k=%0d VRatio = %f\n", __func__, k, VRatio[k]);
6574 dml_print("DML::%s: k=%0d DETBufferingTimeY = %f\n", __func__, k, DETBufferingTimeY);
6575 dml_print("DML::%s: k=%0d PixelClock = %f\n", __func__, k, PixelClock[k]);
6576 #endif
6577
6578 if (k == 0 || DETBufferingTimeY < *StutterPeriod) {
6579 bool isInterlaceTiming = Interlace[k] && !ProgressiveToInterlaceUnitInOPP;
6580
6581 *StutterPeriod = DETBufferingTimeY;
6582 FrameTimeCriticalPlane = (isInterlaceTiming ? dml_floor(VTotal[k] / 2.0, 1.0) : VTotal[k]) * HTotal[k] / PixelClock[k];
6583 VActiveTimeCriticalPlane = (isInterlaceTiming ? dml_floor(VActive[k] / 2.0, 1.0) : VActive[k]) * HTotal[k] / PixelClock[k];
6584 BytePerPixelYCriticalPlane = BytePerPixelY[k];
6585 SwathWidthYCriticalPlane = SwathWidthY[k];
6586 LinesToFinishSwathTransferStutterCriticalPlane = SwathHeightY[k] - (LinesInDETY - LinesInDETYRoundedDownToSwath);
6587 MinTTUVBlankCriticalPlane = MinTTUVBlank[k];
6588
6589 #ifdef __DML_VBA_DEBUG__
6590 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
6591 dml_print("DML::%s: MinTTUVBlankCriticalPlane = %f\n", __func__, MinTTUVBlankCriticalPlane);
6592 dml_print("DML::%s: FrameTimeCriticalPlane = %f\n", __func__, FrameTimeCriticalPlane);
6593 dml_print("DML::%s: VActiveTimeCriticalPlane = %f\n", __func__, VActiveTimeCriticalPlane);
6594 dml_print("DML::%s: BytePerPixelYCriticalPlane = %d\n", __func__, BytePerPixelYCriticalPlane);
6595 dml_print("DML::%s: SwathWidthYCriticalPlane = %f\n", __func__, SwathWidthYCriticalPlane);
6596 dml_print("DML::%s: LinesToFinishSwathTransferStutterCriticalPlane = %f\n", __func__, LinesToFinishSwathTransferStutterCriticalPlane);
6597 #endif
6598 }
6599 }
6600
6601 PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = dml_min(*StutterPeriod * TotalDataReadBandwidth, EffectiveCompressedBufferSize);
6602 #ifdef __DML_VBA_DEBUG__
6603 dml_print("DML::%s: ROBBufferSizeInKByte = %d\n", __func__, ROBBufferSizeInKByte);
6604 dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate);
6605 dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n", __func__, *StutterPeriod * TotalDataReadBandwidth);
6606 dml_print("DML::%s: ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate + EffectiveCompressedBufferSize = %f\n", __func__, ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate + EffectiveCompressedBufferSize);
6607 dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize);
6608 dml_print("DML::%s: PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = %f\n", __func__, PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer);
6609 dml_print("DML::%s: ReturnBW = %f\n", __func__, ReturnBW);
6610 dml_print("DML::%s: TotalDataReadBandwidth = %f\n", __func__, TotalDataReadBandwidth);
6611 dml_print("DML::%s: TotalRowReadBandwidth = %f\n", __func__, TotalRowReadBandwidth);
6612 dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK);
6613 #endif
6614
6615 StutterBurstTime = PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / AverageDCCCompressionRate / ReturnBW
6616 + (*StutterPeriod * TotalDataReadBandwidth - PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64)
6617 + *StutterPeriod * TotalRowReadBandwidth / ReturnBW;
6618 #ifdef __DML_VBA_DEBUG__
6619 dml_print("DML::%s: Part 1 = %f\n", __func__, PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / AverageDCCCompressionRate / ReturnBW);
6620 dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n", __func__, (*StutterPeriod * TotalDataReadBandwidth));
6621 dml_print("DML::%s: Part 2 = %f\n", __func__, (*StutterPeriod * TotalDataReadBandwidth - PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64));
6622 dml_print("DML::%s: Part 3 = %f\n", __func__, *StutterPeriod * TotalRowReadBandwidth / ReturnBW);
6623 dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime);
6624 #endif
6625 StutterBurstTime = dml_max(StutterBurstTime, LinesToFinishSwathTransferStutterCriticalPlane * BytePerPixelYCriticalPlane * SwathWidthYCriticalPlane / ReturnBW);
6626
6627 dml_print(
6628 "DML::%s: Time to finish residue swath=%f\n",
6629 __func__,
6630 LinesToFinishSwathTransferStutterCriticalPlane * BytePerPixelYCriticalPlane * SwathWidthYCriticalPlane / ReturnBW);
6631
6632 TotalActiveWriteback = 0;
6633 for (k = 0; k < NumberOfActivePlanes; ++k) {
6634 if (WritebackEnable[k]) {
6635 TotalActiveWriteback = TotalActiveWriteback + 1;
6636 }
6637 }
6638
6639 if (TotalActiveWriteback == 0) {
6640 #ifdef __DML_VBA_DEBUG__
6641 dml_print("DML::%s: SRExitTime = %f\n", __func__, SRExitTime);
6642 dml_print("DML::%s: SRExitZ8Time = %f\n", __func__, SRExitZ8Time);
6643 dml_print("DML::%s: StutterBurstTime = %f (final)\n", __func__, StutterBurstTime);
6644 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
6645 #endif
6646 *StutterEfficiencyNotIncludingVBlank = dml_max(0., 1 - (SRExitTime + StutterBurstTime) / *StutterPeriod) * 100;
6647 *Z8StutterEfficiencyNotIncludingVBlank = dml_max(0., 1 - (SRExitZ8Time + StutterBurstTime) / *StutterPeriod) * 100;
6648 *NumberOfStutterBurstsPerFrame = (*StutterEfficiencyNotIncludingVBlank > 0 ? dml_ceil(VActiveTimeCriticalPlane / *StutterPeriod, 1) : 0);
6649 *Z8NumberOfStutterBurstsPerFrame = (*Z8StutterEfficiencyNotIncludingVBlank > 0 ? dml_ceil(VActiveTimeCriticalPlane / *StutterPeriod, 1) : 0);
6650 } else {
6651 *StutterEfficiencyNotIncludingVBlank = 0.;
6652 *Z8StutterEfficiencyNotIncludingVBlank = 0.;
6653 *NumberOfStutterBurstsPerFrame = 0;
6654 *Z8NumberOfStutterBurstsPerFrame = 0;
6655 }
6656 #ifdef __DML_VBA_DEBUG__
6657 dml_print("DML::%s: VActiveTimeCriticalPlane = %f\n", __func__, VActiveTimeCriticalPlane);
6658 dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *StutterEfficiencyNotIncludingVBlank);
6659 dml_print("DML::%s: Z8StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *Z8StutterEfficiencyNotIncludingVBlank);
6660 dml_print("DML::%s: NumberOfStutterBurstsPerFrame = %d\n", __func__, *NumberOfStutterBurstsPerFrame);
6661 dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame);
6662 #endif
6663
6664 for (k = 0; k < NumberOfActivePlanes; ++k) {
6665 if (v->BlendingAndTiming[k] == k) {
6666 TotalNumberOfActiveOTG = TotalNumberOfActiveOTG + 1;
6667 }
6668 }
6669
6670 if (*StutterEfficiencyNotIncludingVBlank > 0) {
6671 LastStutterPeriod = VActiveTimeCriticalPlane - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod;
6672
6673 if ((SynchronizedVBlank || TotalNumberOfActiveOTG == 1) && LastStutterPeriod + MinTTUVBlankCriticalPlane > StutterEnterPlusExitWatermark) {
6674 *StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitTime + StutterBurstTime * VActiveTimeCriticalPlane
6675 / *StutterPeriod) / FrameTimeCriticalPlane) * 100;
6676 } else {
6677 *StutterEfficiency = *StutterEfficiencyNotIncludingVBlank;
6678 }
6679 } else {
6680 *StutterEfficiency = 0;
6681 }
6682
6683 if (*Z8StutterEfficiencyNotIncludingVBlank > 0) {
6684 LastZ8StutterPeriod = VActiveTimeCriticalPlane - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod;
6685 if ((SynchronizedVBlank || TotalNumberOfActiveOTG == 1) && LastZ8StutterPeriod + MinTTUVBlankCriticalPlane > Z8StutterEnterPlusExitWatermark) {
6686 *Z8StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitZ8Time + StutterBurstTime * VActiveTimeCriticalPlane
6687 / *StutterPeriod) / FrameTimeCriticalPlane) * 100;
6688 } else {
6689 *Z8StutterEfficiency = *Z8StutterEfficiencyNotIncludingVBlank;
6690 }
6691 } else {
6692 *Z8StutterEfficiency = 0.;
6693 }
6694
6695 dml_print("DML::%s: LastZ8StutterPeriod = %f\n", __func__, LastZ8StutterPeriod);
6696 dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, Z8StutterEnterPlusExitWatermark);
6697 dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime);
6698 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
6699 dml_print("DML::%s: StutterEfficiency = %f\n", __func__, *StutterEfficiency);
6700 dml_print("DML::%s: Z8StutterEfficiency = %f\n", __func__, *Z8StutterEfficiency);
6701 dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *StutterEfficiencyNotIncludingVBlank);
6702 dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame);
6703 }
6704
6705 static void CalculateSwathAndDETConfiguration(
6706 bool ForceSingleDPP,
6707 int NumberOfActivePlanes,
6708 unsigned int DETBufferSizeInKByte,
6709 double MaximumSwathWidthLuma[],
6710 double MaximumSwathWidthChroma[],
6711 enum scan_direction_class SourceScan[],
6712 enum source_format_class SourcePixelFormat[],
6713 enum dm_swizzle_mode SurfaceTiling[],
6714 int ViewportWidth[],
6715 int ViewportHeight[],
6716 int SurfaceWidthY[],
6717 int SurfaceWidthC[],
6718 int SurfaceHeightY[],
6719 int SurfaceHeightC[],
6720 int Read256BytesBlockHeightY[],
6721 int Read256BytesBlockHeightC[],
6722 int Read256BytesBlockWidthY[],
6723 int Read256BytesBlockWidthC[],
6724 enum odm_combine_mode ODMCombineEnabled[],
6725 int BlendingAndTiming[],
6726 int BytePerPixY[],
6727 int BytePerPixC[],
6728 double BytePerPixDETY[],
6729 double BytePerPixDETC[],
6730 int HActive[],
6731 double HRatio[],
6732 double HRatioChroma[],
6733 int DPPPerPlane[],
6734 int swath_width_luma_ub[],
6735 int swath_width_chroma_ub[],
6736 double SwathWidth[],
6737 double SwathWidthChroma[],
6738 int SwathHeightY[],
6739 int SwathHeightC[],
6740 unsigned int DETBufferSizeY[],
6741 unsigned int DETBufferSizeC[],
6742 bool ViewportSizeSupportPerPlane[],
6743 bool *ViewportSizeSupport)
6744 {
6745 int MaximumSwathHeightY[DC__NUM_DPP__MAX];
6746 int MaximumSwathHeightC[DC__NUM_DPP__MAX];
6747 int MinimumSwathHeightY;
6748 int MinimumSwathHeightC;
6749 int RoundedUpMaxSwathSizeBytesY;
6750 int RoundedUpMaxSwathSizeBytesC;
6751 int RoundedUpMinSwathSizeBytesY;
6752 int RoundedUpMinSwathSizeBytesC;
6753 int RoundedUpSwathSizeBytesY;
6754 int RoundedUpSwathSizeBytesC;
6755 double SwathWidthSingleDPP[DC__NUM_DPP__MAX];
6756 double SwathWidthSingleDPPChroma[DC__NUM_DPP__MAX];
6757 int k;
6758
6759 CalculateSwathWidth(
6760 ForceSingleDPP,
6761 NumberOfActivePlanes,
6762 SourcePixelFormat,
6763 SourceScan,
6764 ViewportWidth,
6765 ViewportHeight,
6766 SurfaceWidthY,
6767 SurfaceWidthC,
6768 SurfaceHeightY,
6769 SurfaceHeightC,
6770 ODMCombineEnabled,
6771 BytePerPixY,
6772 BytePerPixC,
6773 Read256BytesBlockHeightY,
6774 Read256BytesBlockHeightC,
6775 Read256BytesBlockWidthY,
6776 Read256BytesBlockWidthC,
6777 BlendingAndTiming,
6778 HActive,
6779 HRatio,
6780 DPPPerPlane,
6781 SwathWidthSingleDPP,
6782 SwathWidthSingleDPPChroma,
6783 SwathWidth,
6784 SwathWidthChroma,
6785 MaximumSwathHeightY,
6786 MaximumSwathHeightC,
6787 swath_width_luma_ub,
6788 swath_width_chroma_ub);
6789
6790 *ViewportSizeSupport = true;
6791 for (k = 0; k < NumberOfActivePlanes; ++k) {
6792 if ((SourcePixelFormat[k] == dm_444_64 || SourcePixelFormat[k] == dm_444_32 || SourcePixelFormat[k] == dm_444_16 || SourcePixelFormat[k] == dm_mono_16
6793 || SourcePixelFormat[k] == dm_mono_8 || SourcePixelFormat[k] == dm_rgbe)) {
6794 if (SurfaceTiling[k] == dm_sw_linear
6795 || (SourcePixelFormat[k] == dm_444_64
6796 && (SurfaceTiling[k] == dm_sw_64kb_s || SurfaceTiling[k] == dm_sw_64kb_s_t || SurfaceTiling[k] == dm_sw_64kb_s_x)
6797 && SourceScan[k] != dm_vert)) {
6798 MinimumSwathHeightY = MaximumSwathHeightY[k];
6799 } else if (SourcePixelFormat[k] == dm_444_8 && SourceScan[k] == dm_vert) {
6800 MinimumSwathHeightY = MaximumSwathHeightY[k];
6801 } else {
6802 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6803 }
6804 MinimumSwathHeightC = MaximumSwathHeightC[k];
6805 } else {
6806 if (SurfaceTiling[k] == dm_sw_linear) {
6807 MinimumSwathHeightY = MaximumSwathHeightY[k];
6808 MinimumSwathHeightC = MaximumSwathHeightC[k];
6809 } else if (SourcePixelFormat[k] == dm_rgbe_alpha && SourceScan[k] == dm_vert) {
6810 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6811 MinimumSwathHeightC = MaximumSwathHeightC[k];
6812 } else if (SourcePixelFormat[k] == dm_rgbe_alpha) {
6813 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6814 MinimumSwathHeightC = MaximumSwathHeightC[k] / 2;
6815 } else if (SourcePixelFormat[k] == dm_420_8 && SourceScan[k] == dm_vert) {
6816 MinimumSwathHeightY = MaximumSwathHeightY[k];
6817 MinimumSwathHeightC = MaximumSwathHeightC[k] / 2;
6818 } else {
6819 MinimumSwathHeightC = MaximumSwathHeightC[k] / 2;
6820 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6821 }
6822 }
6823
6824 RoundedUpMaxSwathSizeBytesY = swath_width_luma_ub[k] * BytePerPixDETY[k] * MaximumSwathHeightY[k];
6825 RoundedUpMinSwathSizeBytesY = swath_width_luma_ub[k] * BytePerPixDETY[k] * MinimumSwathHeightY;
6826 if (SourcePixelFormat[k] == dm_420_10) {
6827 RoundedUpMaxSwathSizeBytesY = dml_ceil((double) RoundedUpMaxSwathSizeBytesY, 256);
6828 RoundedUpMinSwathSizeBytesY = dml_ceil((double) RoundedUpMinSwathSizeBytesY, 256);
6829 }
6830 RoundedUpMaxSwathSizeBytesC = swath_width_chroma_ub[k] * BytePerPixDETC[k] * MaximumSwathHeightC[k];
6831 RoundedUpMinSwathSizeBytesC = swath_width_chroma_ub[k] * BytePerPixDETC[k] * MinimumSwathHeightC;
6832 if (SourcePixelFormat[k] == dm_420_10) {
6833 RoundedUpMaxSwathSizeBytesC = dml_ceil(RoundedUpMaxSwathSizeBytesC, 256);
6834 RoundedUpMinSwathSizeBytesC = dml_ceil(RoundedUpMinSwathSizeBytesC, 256);
6835 }
6836
6837 if (RoundedUpMaxSwathSizeBytesY + RoundedUpMaxSwathSizeBytesC <= DETBufferSizeInKByte * 1024 / 2) {
6838 SwathHeightY[k] = MaximumSwathHeightY[k];
6839 SwathHeightC[k] = MaximumSwathHeightC[k];
6840 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY;
6841 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC;
6842 } else if (RoundedUpMaxSwathSizeBytesY >= 1.5 * RoundedUpMaxSwathSizeBytesC
6843 && RoundedUpMinSwathSizeBytesY + RoundedUpMaxSwathSizeBytesC <= DETBufferSizeInKByte * 1024 / 2) {
6844 SwathHeightY[k] = MinimumSwathHeightY;
6845 SwathHeightC[k] = MaximumSwathHeightC[k];
6846 RoundedUpSwathSizeBytesY = RoundedUpMinSwathSizeBytesY;
6847 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC;
6848 } else if (RoundedUpMaxSwathSizeBytesY < 1.5 * RoundedUpMaxSwathSizeBytesC
6849 && RoundedUpMaxSwathSizeBytesY + RoundedUpMinSwathSizeBytesC <= DETBufferSizeInKByte * 1024 / 2) {
6850 SwathHeightY[k] = MaximumSwathHeightY[k];
6851 SwathHeightC[k] = MinimumSwathHeightC;
6852 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY;
6853 RoundedUpSwathSizeBytesC = RoundedUpMinSwathSizeBytesC;
6854 } else {
6855 SwathHeightY[k] = MinimumSwathHeightY;
6856 SwathHeightC[k] = MinimumSwathHeightC;
6857 RoundedUpSwathSizeBytesY = RoundedUpMinSwathSizeBytesY;
6858 RoundedUpSwathSizeBytesC = RoundedUpMinSwathSizeBytesC;
6859 }
6860 {
6861 double actDETBufferSizeInKByte = dml_ceil(DETBufferSizeInKByte, 64);
6862
6863 if (SwathHeightC[k] == 0) {
6864 DETBufferSizeY[k] = actDETBufferSizeInKByte * 1024;
6865 DETBufferSizeC[k] = 0;
6866 } else if (RoundedUpSwathSizeBytesY <= 1.5 * RoundedUpSwathSizeBytesC) {
6867 DETBufferSizeY[k] = actDETBufferSizeInKByte * 1024 / 2;
6868 DETBufferSizeC[k] = actDETBufferSizeInKByte * 1024 / 2;
6869 } else {
6870 DETBufferSizeY[k] = dml_floor(actDETBufferSizeInKByte * 1024 * 2 / 3, 1024);
6871 DETBufferSizeC[k] = actDETBufferSizeInKByte * 1024 / 3;
6872 }
6873
6874 if (RoundedUpMinSwathSizeBytesY + RoundedUpMinSwathSizeBytesC > actDETBufferSizeInKByte * 1024 / 2 || SwathWidth[k] > MaximumSwathWidthLuma[k]
6875 || (SwathHeightC[k] > 0 && SwathWidthChroma[k] > MaximumSwathWidthChroma[k])) {
6876 *ViewportSizeSupport = false;
6877 ViewportSizeSupportPerPlane[k] = false;
6878 } else {
6879 ViewportSizeSupportPerPlane[k] = true;
6880 }
6881 }
6882 }
6883 }
6884
6885 static void CalculateSwathWidth(
6886 bool ForceSingleDPP,
6887 int NumberOfActivePlanes,
6888 enum source_format_class SourcePixelFormat[],
6889 enum scan_direction_class SourceScan[],
6890 int ViewportWidth[],
6891 int ViewportHeight[],
6892 int SurfaceWidthY[],
6893 int SurfaceWidthC[],
6894 int SurfaceHeightY[],
6895 int SurfaceHeightC[],
6896 enum odm_combine_mode ODMCombineEnabled[],
6897 int BytePerPixY[],
6898 int BytePerPixC[],
6899 int Read256BytesBlockHeightY[],
6900 int Read256BytesBlockHeightC[],
6901 int Read256BytesBlockWidthY[],
6902 int Read256BytesBlockWidthC[],
6903 int BlendingAndTiming[],
6904 int HActive[],
6905 double HRatio[],
6906 int DPPPerPlane[],
6907 double SwathWidthSingleDPPY[],
6908 double SwathWidthSingleDPPC[],
6909 double SwathWidthY[],
6910 double SwathWidthC[],
6911 int MaximumSwathHeightY[],
6912 int MaximumSwathHeightC[],
6913 int swath_width_luma_ub[],
6914 int swath_width_chroma_ub[])
6915 {
6916 enum odm_combine_mode MainPlaneODMCombine;
6917 int j, k;
6918
6919 #ifdef __DML_VBA_DEBUG__
6920 dml_print("DML::%s: NumberOfActivePlanes = %d\n", __func__, NumberOfActivePlanes);
6921 #endif
6922
6923 for (k = 0; k < NumberOfActivePlanes; ++k) {
6924 if (SourceScan[k] != dm_vert) {
6925 SwathWidthSingleDPPY[k] = ViewportWidth[k];
6926 } else {
6927 SwathWidthSingleDPPY[k] = ViewportHeight[k];
6928 }
6929
6930 #ifdef __DML_VBA_DEBUG__
6931 dml_print("DML::%s: k=%d ViewportWidth=%d\n", __func__, k, ViewportWidth[k]);
6932 dml_print("DML::%s: k=%d ViewportHeight=%d\n", __func__, k, ViewportHeight[k]);
6933 #endif
6934
6935 MainPlaneODMCombine = ODMCombineEnabled[k];
6936 for (j = 0; j < NumberOfActivePlanes; ++j) {
6937 if (BlendingAndTiming[k] == j) {
6938 MainPlaneODMCombine = ODMCombineEnabled[j];
6939 }
6940 }
6941
6942 if (MainPlaneODMCombine == dm_odm_combine_mode_4to1)
6943 SwathWidthY[k] = dml_min(SwathWidthSingleDPPY[k], dml_round(HActive[k] / 4.0 * HRatio[k]));
6944 else if (MainPlaneODMCombine == dm_odm_combine_mode_2to1)
6945 SwathWidthY[k] = dml_min(SwathWidthSingleDPPY[k], dml_round(HActive[k] / 2.0 * HRatio[k]));
6946 else if (DPPPerPlane[k] == 2)
6947 SwathWidthY[k] = SwathWidthSingleDPPY[k] / 2;
6948 else
6949 SwathWidthY[k] = SwathWidthSingleDPPY[k];
6950
6951 #ifdef __DML_VBA_DEBUG__
6952 dml_print("DML::%s: k=%d SwathWidthSingleDPPY=%f\n", __func__, k, SwathWidthSingleDPPY[k]);
6953 dml_print("DML::%s: k=%d SwathWidthY=%f\n", __func__, k, SwathWidthY[k]);
6954 #endif
6955
6956 if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 || SourcePixelFormat[k] == dm_420_12) {
6957 SwathWidthC[k] = SwathWidthY[k] / 2;
6958 SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k] / 2;
6959 } else {
6960 SwathWidthC[k] = SwathWidthY[k];
6961 SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k];
6962 }
6963
6964 if (ForceSingleDPP == true) {
6965 SwathWidthY[k] = SwathWidthSingleDPPY[k];
6966 SwathWidthC[k] = SwathWidthSingleDPPC[k];
6967 }
6968 {
6969 int surface_width_ub_l = dml_ceil(SurfaceWidthY[k], Read256BytesBlockWidthY[k]);
6970 int surface_height_ub_l = dml_ceil(SurfaceHeightY[k], Read256BytesBlockHeightY[k]);
6971
6972 #ifdef __DML_VBA_DEBUG__
6973 dml_print("DML::%s: k=%d surface_width_ub_l=%0d\n", __func__, k, surface_width_ub_l);
6974 #endif
6975
6976 if (SourceScan[k] != dm_vert) {
6977 MaximumSwathHeightY[k] = Read256BytesBlockHeightY[k];
6978 MaximumSwathHeightC[k] = Read256BytesBlockHeightC[k];
6979 swath_width_luma_ub[k] = dml_min(surface_width_ub_l, (int) dml_ceil(SwathWidthY[k] - 1, Read256BytesBlockWidthY[k]) + Read256BytesBlockWidthY[k]);
6980 if (BytePerPixC[k] > 0) {
6981 int surface_width_ub_c = dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]);
6982
6983 swath_width_chroma_ub[k] = dml_min(
6984 surface_width_ub_c,
6985 (int) dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockWidthC[k]) + Read256BytesBlockWidthC[k]);
6986 } else {
6987 swath_width_chroma_ub[k] = 0;
6988 }
6989 } else {
6990 MaximumSwathHeightY[k] = Read256BytesBlockWidthY[k];
6991 MaximumSwathHeightC[k] = Read256BytesBlockWidthC[k];
6992 swath_width_luma_ub[k] = dml_min(surface_height_ub_l, (int) dml_ceil(SwathWidthY[k] - 1, Read256BytesBlockHeightY[k]) + Read256BytesBlockHeightY[k]);
6993 if (BytePerPixC[k] > 0) {
6994 int surface_height_ub_c = dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]);
6995
6996 swath_width_chroma_ub[k] = dml_min(
6997 surface_height_ub_c,
6998 (int) dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockHeightC[k]) + Read256BytesBlockHeightC[k]);
6999 } else {
7000 swath_width_chroma_ub[k] = 0;
7001 }
7002 }
7003 }
7004 }
7005 }
7006
7007 static double CalculateExtraLatency(
7008 int RoundTripPingLatencyCycles,
7009 int ReorderingBytes,
7010 double DCFCLK,
7011 int TotalNumberOfActiveDPP,
7012 int PixelChunkSizeInKByte,
7013 int TotalNumberOfDCCActiveDPP,
7014 int MetaChunkSize,
7015 double ReturnBW,
7016 bool GPUVMEnable,
7017 bool HostVMEnable,
7018 int NumberOfActivePlanes,
7019 int NumberOfDPP[],
7020 int dpte_group_bytes[],
7021 double HostVMInefficiencyFactor,
7022 double HostVMMinPageSize,
7023 int HostVMMaxNonCachedPageTableLevels)
7024 {
7025 double ExtraLatencyBytes;
7026 double ExtraLatency;
7027
7028 ExtraLatencyBytes = CalculateExtraLatencyBytes(
7029 ReorderingBytes,
7030 TotalNumberOfActiveDPP,
7031 PixelChunkSizeInKByte,
7032 TotalNumberOfDCCActiveDPP,
7033 MetaChunkSize,
7034 GPUVMEnable,
7035 HostVMEnable,
7036 NumberOfActivePlanes,
7037 NumberOfDPP,
7038 dpte_group_bytes,
7039 HostVMInefficiencyFactor,
7040 HostVMMinPageSize,
7041 HostVMMaxNonCachedPageTableLevels);
7042
7043 ExtraLatency = (RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__) / DCFCLK + ExtraLatencyBytes / ReturnBW;
7044
7045 #ifdef __DML_VBA_DEBUG__
7046 dml_print("DML::%s: RoundTripPingLatencyCycles=%d\n", __func__, RoundTripPingLatencyCycles);
7047 dml_print("DML::%s: DCFCLK=%f\n", __func__, DCFCLK);
7048 dml_print("DML::%s: ExtraLatencyBytes=%f\n", __func__, ExtraLatencyBytes);
7049 dml_print("DML::%s: ReturnBW=%f\n", __func__, ReturnBW);
7050 dml_print("DML::%s: ExtraLatency=%f\n", __func__, ExtraLatency);
7051 #endif
7052
7053 return ExtraLatency;
7054 }
7055
7056 static double CalculateExtraLatencyBytes(
7057 int ReorderingBytes,
7058 int TotalNumberOfActiveDPP,
7059 int PixelChunkSizeInKByte,
7060 int TotalNumberOfDCCActiveDPP,
7061 int MetaChunkSize,
7062 bool GPUVMEnable,
7063 bool HostVMEnable,
7064 int NumberOfActivePlanes,
7065 int NumberOfDPP[],
7066 int dpte_group_bytes[],
7067 double HostVMInefficiencyFactor,
7068 double HostVMMinPageSize,
7069 int HostVMMaxNonCachedPageTableLevels)
7070 {
7071 double ret;
7072 int HostVMDynamicLevels = 0, k;
7073
7074 if (GPUVMEnable == true && HostVMEnable == true) {
7075 if (HostVMMinPageSize < 2048)
7076 HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
7077 else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576)
7078 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1);
7079 else
7080 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2);
7081 } else {
7082 HostVMDynamicLevels = 0;
7083 }
7084
7085 ret = ReorderingBytes + (TotalNumberOfActiveDPP * PixelChunkSizeInKByte + TotalNumberOfDCCActiveDPP * MetaChunkSize) * 1024.0;
7086
7087 if (GPUVMEnable == true) {
7088 for (k = 0; k < NumberOfActivePlanes; ++k)
7089 ret = ret + NumberOfDPP[k] * dpte_group_bytes[k] * (1 + 8 * HostVMDynamicLevels) * HostVMInefficiencyFactor;
7090 }
7091 return ret;
7092 }
7093
7094 static double CalculateUrgentLatency(
7095 double UrgentLatencyPixelDataOnly,
7096 double UrgentLatencyPixelMixedWithVMData,
7097 double UrgentLatencyVMDataOnly,
7098 bool DoUrgentLatencyAdjustment,
7099 double UrgentLatencyAdjustmentFabricClockComponent,
7100 double UrgentLatencyAdjustmentFabricClockReference,
7101 double FabricClock)
7102 {
7103 double ret;
7104
7105 ret = dml_max3(UrgentLatencyPixelDataOnly, UrgentLatencyPixelMixedWithVMData, UrgentLatencyVMDataOnly);
7106 if (DoUrgentLatencyAdjustment == true)
7107 ret = ret + UrgentLatencyAdjustmentFabricClockComponent * (UrgentLatencyAdjustmentFabricClockReference / FabricClock - 1);
7108 return ret;
7109 }
7110
7111 static noinline_for_stack void UseMinimumDCFCLK(
7112 struct display_mode_lib *mode_lib,
7113 int MaxPrefetchMode,
7114 int ReorderingBytes)
7115 {
7116 struct vba_vars_st *v = &mode_lib->vba;
7117 int dummy1, i, j, k;
7118 double NormalEfficiency, dummy2, dummy3;
7119 double TotalMaxPrefetchFlipDPTERowBandwidth[DC__VOLTAGE_STATES][2];
7120
7121 NormalEfficiency = v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0;
7122 for (i = 0; i < v->soc.num_states; ++i) {
7123 for (j = 0; j <= 1; ++j) {
7124 double PixelDCFCLKCyclesRequiredInPrefetch[DC__NUM_DPP__MAX];
7125 double PrefetchPixelLinesTime[DC__NUM_DPP__MAX];
7126 double DCFCLKRequiredForPeakBandwidthPerPlane[DC__NUM_DPP__MAX];
7127 double DynamicMetadataVMExtraLatency[DC__NUM_DPP__MAX];
7128 double MinimumTWait;
7129 double NonDPTEBandwidth;
7130 double DPTEBandwidth;
7131 double DCFCLKRequiredForAverageBandwidth;
7132 double ExtraLatencyBytes;
7133 double ExtraLatencyCycles;
7134 double DCFCLKRequiredForPeakBandwidth;
7135 int NoOfDPPState[DC__NUM_DPP__MAX];
7136 double MinimumTvmPlus2Tr0;
7137
7138 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = 0;
7139 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
7140 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = TotalMaxPrefetchFlipDPTERowBandwidth[i][j]
7141 + v->NoOfDPP[i][j][k] * v->DPTEBytesPerRow[i][j][k] / (15.75 * v->HTotal[k] / v->PixelClock[k]);
7142 }
7143
7144 for (k = 0; k <= v->NumberOfActivePlanes - 1; ++k)
7145 NoOfDPPState[k] = v->NoOfDPP[i][j][k];
7146
7147 MinimumTWait = CalculateTWait(MaxPrefetchMode, v->FinalDRAMClockChangeLatency, v->UrgLatency[i], v->SREnterPlusExitTime);
7148 NonDPTEBandwidth = v->TotalVActivePixelBandwidth[i][j] + v->TotalVActiveCursorBandwidth[i][j] + v->TotalMetaRowBandwidth[i][j];
7149 DPTEBandwidth = (v->HostVMEnable == true || v->ImmediateFlipRequirement[0] == dm_immediate_flip_required) ?
7150 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] : v->TotalDPTERowBandwidth[i][j];
7151 DCFCLKRequiredForAverageBandwidth = dml_max3(
7152 v->ProjectedDCFCLKDeepSleep[i][j],
7153 (NonDPTEBandwidth + v->TotalDPTERowBandwidth[i][j]) / v->ReturnBusWidth
7154 / (v->MaxAveragePercentOfIdealFabricAndSDPPortBWDisplayCanUseInNormalSystemOperation / 100),
7155 (NonDPTEBandwidth + DPTEBandwidth / NormalEfficiency) / NormalEfficiency / v->ReturnBusWidth);
7156
7157 ExtraLatencyBytes = CalculateExtraLatencyBytes(
7158 ReorderingBytes,
7159 v->TotalNumberOfActiveDPP[i][j],
7160 v->PixelChunkSizeInKByte,
7161 v->TotalNumberOfDCCActiveDPP[i][j],
7162 v->MetaChunkSize,
7163 v->GPUVMEnable,
7164 v->HostVMEnable,
7165 v->NumberOfActivePlanes,
7166 NoOfDPPState,
7167 v->dpte_group_bytes,
7168 1,
7169 v->HostVMMinPageSize,
7170 v->HostVMMaxNonCachedPageTableLevels);
7171 ExtraLatencyCycles = v->RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__ + ExtraLatencyBytes / NormalEfficiency / v->ReturnBusWidth;
7172 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
7173 double DCFCLKCyclesRequiredInPrefetch;
7174 double ExpectedPrefetchBWAcceleration;
7175 double PrefetchTime;
7176
7177 PixelDCFCLKCyclesRequiredInPrefetch[k] = (v->PrefetchLinesY[i][j][k] * v->swath_width_luma_ub_all_states[i][j][k] * v->BytePerPixelY[k]
7178 + v->PrefetchLinesC[i][j][k] * v->swath_width_chroma_ub_all_states[i][j][k] * v->BytePerPixelC[k]) / NormalEfficiency / v->ReturnBusWidth;
7179 DCFCLKCyclesRequiredInPrefetch = 2 * ExtraLatencyCycles / NoOfDPPState[k]
7180 + v->PDEAndMetaPTEBytesPerFrame[i][j][k] / NormalEfficiency / NormalEfficiency / v->ReturnBusWidth * (v->GPUVMMaxPageTableLevels > 2 ? 1 : 0)
7181 + 2 * v->DPTEBytesPerRow[i][j][k] / NormalEfficiency / NormalEfficiency / v->ReturnBusWidth
7182 + 2 * v->MetaRowBytes[i][j][k] / NormalEfficiency / v->ReturnBusWidth + PixelDCFCLKCyclesRequiredInPrefetch[k];
7183 PrefetchPixelLinesTime[k] = dml_max(v->PrefetchLinesY[i][j][k], v->PrefetchLinesC[i][j][k]) * v->HTotal[k] / v->PixelClock[k];
7184 ExpectedPrefetchBWAcceleration = (v->VActivePixelBandwidth[i][j][k] + v->VActiveCursorBandwidth[i][j][k])
7185 / (v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k]);
7186 DynamicMetadataVMExtraLatency[k] =
7187 (v->GPUVMEnable == true && v->DynamicMetadataEnable[k] == true && v->DynamicMetadataVMEnabled == true) ?
7188 v->UrgLatency[i] * v->GPUVMMaxPageTableLevels * (v->HostVMEnable == true ? v->HostVMMaxNonCachedPageTableLevels + 1 : 1) : 0;
7189 PrefetchTime = (v->MaximumVStartup[i][j][k] - 1) * v->HTotal[k] / v->PixelClock[k] - MinimumTWait
7190 - v->UrgLatency[i]
7191 * ((v->GPUVMMaxPageTableLevels <= 2 ? v->GPUVMMaxPageTableLevels : v->GPUVMMaxPageTableLevels - 2)
7192 * (v->HostVMEnable == true ? v->HostVMMaxNonCachedPageTableLevels + 1 : 1) - 1)
7193 - DynamicMetadataVMExtraLatency[k];
7194
7195 if (PrefetchTime > 0) {
7196 double ExpectedVRatioPrefetch;
7197
7198 ExpectedVRatioPrefetch = PrefetchPixelLinesTime[k]
7199 / (PrefetchTime * PixelDCFCLKCyclesRequiredInPrefetch[k] / DCFCLKCyclesRequiredInPrefetch);
7200 DCFCLKRequiredForPeakBandwidthPerPlane[k] = NoOfDPPState[k] * PixelDCFCLKCyclesRequiredInPrefetch[k] / PrefetchPixelLinesTime[k]
7201 * dml_max(1.0, ExpectedVRatioPrefetch) * dml_max(1.0, ExpectedVRatioPrefetch / 4) * ExpectedPrefetchBWAcceleration;
7202 if (v->HostVMEnable == true || v->ImmediateFlipRequirement[0] == dm_immediate_flip_required) {
7203 DCFCLKRequiredForPeakBandwidthPerPlane[k] = DCFCLKRequiredForPeakBandwidthPerPlane[k]
7204 + NoOfDPPState[k] * DPTEBandwidth / NormalEfficiency / NormalEfficiency / v->ReturnBusWidth;
7205 }
7206 } else {
7207 DCFCLKRequiredForPeakBandwidthPerPlane[k] = v->DCFCLKPerState[i];
7208 }
7209 if (v->DynamicMetadataEnable[k] == true) {
7210 double TSetupPipe;
7211 double TdmbfPipe;
7212 double TdmsksPipe;
7213 double TdmecPipe;
7214 double AllowedTimeForUrgentExtraLatency;
7215
7216 CalculateVupdateAndDynamicMetadataParameters(
7217 v->MaxInterDCNTileRepeaters,
7218 v->RequiredDPPCLK[i][j][k],
7219 v->RequiredDISPCLK[i][j],
7220 v->ProjectedDCFCLKDeepSleep[i][j],
7221 v->PixelClock[k],
7222 v->HTotal[k],
7223 v->VTotal[k] - v->VActive[k],
7224 v->DynamicMetadataTransmittedBytes[k],
7225 v->DynamicMetadataLinesBeforeActiveRequired[k],
7226 v->Interlace[k],
7227 v->ProgressiveToInterlaceUnitInOPP,
7228 &TSetupPipe,
7229 &TdmbfPipe,
7230 &TdmecPipe,
7231 &TdmsksPipe,
7232 &dummy1,
7233 &dummy2,
7234 &dummy3);
7235 AllowedTimeForUrgentExtraLatency = v->MaximumVStartup[i][j][k] * v->HTotal[k] / v->PixelClock[k] - MinimumTWait - TSetupPipe - TdmbfPipe - TdmecPipe
7236 - TdmsksPipe - DynamicMetadataVMExtraLatency[k];
7237 if (AllowedTimeForUrgentExtraLatency > 0) {
7238 DCFCLKRequiredForPeakBandwidthPerPlane[k] = dml_max(
7239 DCFCLKRequiredForPeakBandwidthPerPlane[k],
7240 ExtraLatencyCycles / AllowedTimeForUrgentExtraLatency);
7241 } else {
7242 DCFCLKRequiredForPeakBandwidthPerPlane[k] = v->DCFCLKPerState[i];
7243 }
7244 }
7245 }
7246 DCFCLKRequiredForPeakBandwidth = 0;
7247 for (k = 0; k <= v->NumberOfActivePlanes - 1; ++k)
7248 DCFCLKRequiredForPeakBandwidth = DCFCLKRequiredForPeakBandwidth + DCFCLKRequiredForPeakBandwidthPerPlane[k];
7249
7250 MinimumTvmPlus2Tr0 = v->UrgLatency[i]
7251 * (v->GPUVMEnable == true ?
7252 (v->HostVMEnable == true ?
7253 (v->GPUVMMaxPageTableLevels + 2) * (v->HostVMMaxNonCachedPageTableLevels + 1) - 1 : v->GPUVMMaxPageTableLevels + 1) :
7254 0);
7255 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
7256 double MaximumTvmPlus2Tr0PlusTsw;
7257
7258 MaximumTvmPlus2Tr0PlusTsw = (v->MaximumVStartup[i][j][k] - 2) * v->HTotal[k] / v->PixelClock[k] - MinimumTWait - DynamicMetadataVMExtraLatency[k];
7259 if (MaximumTvmPlus2Tr0PlusTsw <= MinimumTvmPlus2Tr0 + PrefetchPixelLinesTime[k] / 4) {
7260 DCFCLKRequiredForPeakBandwidth = v->DCFCLKPerState[i];
7261 } else {
7262 DCFCLKRequiredForPeakBandwidth = dml_max3(
7263 DCFCLKRequiredForPeakBandwidth,
7264 2 * ExtraLatencyCycles / (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0 - PrefetchPixelLinesTime[k] / 4),
7265 (2 * ExtraLatencyCycles + PixelDCFCLKCyclesRequiredInPrefetch[k]) / (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0));
7266 }
7267 }
7268 v->DCFCLKState[i][j] = dml_min(v->DCFCLKPerState[i], 1.05 * dml_max(DCFCLKRequiredForAverageBandwidth, DCFCLKRequiredForPeakBandwidth));
7269 }
7270 }
7271 }
7272
7273 static void CalculateUnboundedRequestAndCompressedBufferSize(
7274 unsigned int DETBufferSizeInKByte,
7275 int ConfigReturnBufferSizeInKByte,
7276 enum unbounded_requesting_policy UseUnboundedRequestingFinal,
7277 int TotalActiveDPP,
7278 bool NoChromaPlanes,
7279 int MaxNumDPP,
7280 int CompressedBufferSegmentSizeInkByteFinal,
7281 enum output_encoder_class *Output,
7282 bool *UnboundedRequestEnabled,
7283 int *CompressedBufferSizeInkByte)
7284 {
7285 double actDETBufferSizeInKByte = dml_ceil(DETBufferSizeInKByte, 64);
7286
7287 *UnboundedRequestEnabled = UnboundedRequest(UseUnboundedRequestingFinal, TotalActiveDPP, NoChromaPlanes, Output[0]);
7288 *CompressedBufferSizeInkByte = (
7289 *UnboundedRequestEnabled == true ?
7290 ConfigReturnBufferSizeInKByte - TotalActiveDPP * actDETBufferSizeInKByte :
7291 ConfigReturnBufferSizeInKByte - MaxNumDPP * actDETBufferSizeInKByte);
7292 *CompressedBufferSizeInkByte = *CompressedBufferSizeInkByte * CompressedBufferSegmentSizeInkByteFinal / 64;
7293
7294 #ifdef __DML_VBA_DEBUG__
7295 dml_print("DML::%s: TotalActiveDPP = %d\n", __func__, TotalActiveDPP);
7296 dml_print("DML::%s: DETBufferSizeInKByte = %d\n", __func__, DETBufferSizeInKByte);
7297 dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %d\n", __func__, ConfigReturnBufferSizeInKByte);
7298 dml_print("DML::%s: UseUnboundedRequestingFinal = %d\n", __func__, UseUnboundedRequestingFinal);
7299 dml_print("DML::%s: actDETBufferSizeInKByte = %f\n", __func__, actDETBufferSizeInKByte);
7300 dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, *UnboundedRequestEnabled);
7301 dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, *CompressedBufferSizeInkByte);
7302 #endif
7303 }
7304
7305 static bool UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal, int TotalNumberOfActiveDPP, bool NoChroma, enum output_encoder_class Output)
7306 {
7307 bool ret_val = false;
7308
7309 ret_val = (UseUnboundedRequestingFinal != dm_unbounded_requesting_disable && TotalNumberOfActiveDPP == 1 && NoChroma);
7310 if (UseUnboundedRequestingFinal == dm_unbounded_requesting_edp_only && Output != dm_edp)
7311 ret_val = false;
7312 return ret_val;
7313 }
7314
7315 static unsigned int CalculateMaxVStartup(
7316 unsigned int VTotal,
7317 unsigned int VActive,
7318 unsigned int VBlankNom,
7319 unsigned int HTotal,
7320 double PixelClock,
7321 bool ProgressiveTointerlaceUnitinOPP,
7322 bool Interlace,
7323 unsigned int VBlankNomDefaultUS,
7324 double WritebackDelayTime)
7325 {
7326 unsigned int MaxVStartup = 0;
7327 unsigned int vblank_size = 0;
7328 double line_time_us = HTotal / PixelClock;
7329 unsigned int vblank_actual = VTotal - VActive;
7330 unsigned int vblank_nom_default_in_line = dml_floor(VBlankNomDefaultUS / line_time_us, 1.0);
7331 unsigned int vblank_nom_input = VBlankNom; //dml_min(VBlankNom, vblank_nom_default_in_line);
7332 unsigned int vblank_avail = vblank_nom_input == 0 ? vblank_nom_default_in_line : vblank_nom_input;
7333
7334 vblank_size = (unsigned int) dml_min(vblank_actual, vblank_avail);
7335 if (Interlace && !ProgressiveTointerlaceUnitinOPP)
7336 MaxVStartup = dml_floor(vblank_size / 2.0, 1.0);
7337 else
7338 MaxVStartup = vblank_size - dml_max(1.0, dml_ceil(WritebackDelayTime / line_time_us, 1.0));
7339 if (MaxVStartup > 1023)
7340 MaxVStartup = 1023;
7341 return MaxVStartup;
7342 }
7343