1 /* SPDX-License-Identifier: MIT */
2 /*
3 * Copyright 2023 Advanced Micro Devices, Inc.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be included in
13 * all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
21 * OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * Authors: AMD
24 *
25 */
26
27 #include "display_mode_core.h"
28 #include "display_mode_util.h"
29 #include "display_mode_lib_defines.h"
30
31 #include "dml_assert.h"
32
33 #define DML2_MAX_FMT_420_BUFFER_WIDTH 4096
34 #define TB_BORROWED_MAX 400
35
36 // ---------------------------
37 // Declaration Begins
38 // ---------------------------
39 static void CalculateBytePerPixelAndBlockSizes(
40 enum dml_source_format_class SourcePixelFormat,
41 enum dml_swizzle_mode SurfaceTiling,
42 // Output
43 dml_uint_t *BytePerPixelY,
44 dml_uint_t *BytePerPixelC,
45 dml_float_t *BytePerPixelDETY,
46 dml_float_t *BytePerPixelDETC,
47 dml_uint_t *BlockHeight256BytesY,
48 dml_uint_t *BlockHeight256BytesC,
49 dml_uint_t *BlockWidth256BytesY,
50 dml_uint_t *BlockWidth256BytesC,
51 dml_uint_t *MacroTileHeightY,
52 dml_uint_t *MacroTileHeightC,
53 dml_uint_t *MacroTileWidthY,
54 dml_uint_t *MacroTileWidthC);
55
56 static dml_float_t CalculateWriteBackDISPCLK(
57 enum dml_source_format_class WritebackPixelFormat,
58 dml_float_t PixelClock,
59 dml_float_t WritebackHRatio,
60 dml_float_t WritebackVRatio,
61 dml_uint_t WritebackHTaps,
62 dml_uint_t WritebackVTaps,
63 dml_uint_t WritebackSourceWidth,
64 dml_uint_t WritebackDestinationWidth,
65 dml_uint_t HTotal,
66 dml_uint_t WritebackLineBufferSize,
67 dml_float_t DISPCLKDPPCLKVCOSpeed);
68
69 static void CalculateVMRowAndSwath(
70 struct display_mode_lib_scratch_st *s,
71 struct CalculateVMRowAndSwath_params_st *p);
72
73 static void CalculateOutputLink(
74 dml_float_t PHYCLKPerState,
75 dml_float_t PHYCLKD18PerState,
76 dml_float_t PHYCLKD32PerState,
77 dml_float_t Downspreading,
78 dml_bool_t IsMainSurfaceUsingTheIndicatedTiming,
79 enum dml_output_encoder_class Output,
80 enum dml_output_format_class OutputFormat,
81 dml_uint_t HTotal,
82 dml_uint_t HActive,
83 dml_float_t PixelClockBackEnd,
84 dml_float_t ForcedOutputLinkBPP,
85 dml_uint_t DSCInputBitPerComponent,
86 dml_uint_t NumberOfDSCSlices,
87 dml_float_t AudioSampleRate,
88 dml_uint_t AudioSampleLayout,
89 enum dml_odm_mode ODMModeNoDSC,
90 enum dml_odm_mode ODMModeDSC,
91 enum dml_dsc_enable DSCEnable,
92 dml_uint_t OutputLinkDPLanes,
93 enum dml_output_link_dp_rate OutputLinkDPRate,
94
95 // Output
96 dml_bool_t *RequiresDSC,
97 dml_bool_t *RequiresFEC,
98 dml_float_t *OutBpp,
99 enum dml_output_type_and_rate__type *OutputType,
100 enum dml_output_type_and_rate__rate *OutputRate,
101 dml_uint_t *RequiredSlots);
102
103 static void CalculateODMMode(
104 dml_uint_t MaximumPixelsPerLinePerDSCUnit,
105 dml_uint_t HActive,
106 enum dml_output_encoder_class Output,
107 enum dml_output_format_class OutputFormat,
108 enum dml_odm_use_policy ODMUse,
109 dml_float_t StateDispclk,
110 dml_float_t MaxDispclk,
111 dml_bool_t DSCEnable,
112 dml_uint_t TotalNumberOfActiveDPP,
113 dml_uint_t MaxNumDPP,
114 dml_float_t PixelClock,
115 dml_float_t DISPCLKDPPCLKDSCCLKDownSpreading,
116 dml_float_t DISPCLKRampingMargin,
117 dml_float_t DISPCLKDPPCLKVCOSpeed,
118 dml_uint_t NumberOfDSCSlices,
119
120 // Output
121 dml_bool_t *TotalAvailablePipesSupport,
122 dml_uint_t *NumberOfDPP,
123 enum dml_odm_mode *ODMMode,
124 dml_float_t *RequiredDISPCLKPerSurface);
125
126 static dml_float_t CalculateRequiredDispclk(
127 enum dml_odm_mode ODMMode,
128 dml_float_t PixelClock,
129 dml_float_t DISPCLKDPPCLKDSCCLKDownSpreading,
130 dml_float_t DISPCLKRampingMargin,
131 dml_float_t DISPCLKDPPCLKVCOSpeed,
132 dml_float_t MaxDispclkSingle);
133
134 static void CalculateSinglePipeDPPCLKAndSCLThroughput(
135 dml_float_t HRatio,
136 dml_float_t HRatioChroma,
137 dml_float_t VRatio,
138 dml_float_t VRatioChroma,
139 dml_float_t MaxDCHUBToPSCLThroughput,
140 dml_float_t MaxPSCLToLBThroughput,
141 dml_float_t PixelClock,
142 enum dml_source_format_class SourcePixelFormat,
143 dml_uint_t HTaps,
144 dml_uint_t HTapsChroma,
145 dml_uint_t VTaps,
146 dml_uint_t VTapsChroma,
147
148 // Output
149 dml_float_t *PSCL_THROUGHPUT,
150 dml_float_t *PSCL_THROUGHPUT_CHROMA,
151 dml_float_t *DPPCLKUsingSingleDPP);
152
153 static void CalculateDPPCLK(
154 dml_uint_t NumberOfActiveSurfaces,
155 dml_float_t DISPCLKDPPCLKDSCCLKDownSpreading,
156 dml_float_t DISPCLKDPPCLKVCOSpeed,
157 dml_float_t DPPCLKUsingSingleDPP[],
158 dml_uint_t DPPPerSurface[],
159
160 // Output
161 dml_float_t *GlobalDPPCLK,
162 dml_float_t Dppclk[]);
163
164 static void CalculateMALLUseForStaticScreen(
165 dml_uint_t NumberOfActiveSurfaces,
166 dml_uint_t MALLAllocatedForDCNFinal,
167 enum dml_use_mall_for_static_screen_mode *UseMALLForStaticScreen,
168 dml_uint_t SurfaceSizeInMALL[],
169 dml_bool_t one_row_per_frame_fits_in_buffer[],
170
171 // Output
172 dml_bool_t UsesMALLForStaticScreen[]);
173
174 static dml_uint_t dscceComputeDelay(
175 dml_uint_t bpc,
176 dml_float_t BPP,
177 dml_uint_t sliceWidth,
178 dml_uint_t numSlices,
179 enum dml_output_format_class pixelFormat,
180 enum dml_output_encoder_class Output);
181
182 static dml_uint_t dscComputeDelay(enum dml_output_format_class pixelFormat,
183 enum dml_output_encoder_class Output);
184
185 static dml_bool_t CalculatePrefetchSchedule(struct display_mode_lib_scratch_st *scratch,
186 struct CalculatePrefetchSchedule_params_st *p);
187
188 static dml_float_t RoundToDFSGranularity(dml_float_t Clock, dml_bool_t round_up, dml_float_t VCOSpeed);
189
190 static void CalculateDCCConfiguration(
191 dml_bool_t DCCEnabled,
192 dml_bool_t DCCProgrammingAssumesScanDirectionUnknown,
193 enum dml_source_format_class SourcePixelFormat,
194 dml_uint_t SurfaceWidthLuma,
195 dml_uint_t SurfaceWidthChroma,
196 dml_uint_t SurfaceHeightLuma,
197 dml_uint_t SurfaceHeightChroma,
198 dml_uint_t nomDETInKByte,
199 dml_uint_t RequestHeight256ByteLuma,
200 dml_uint_t RequestHeight256ByteChroma,
201 enum dml_swizzle_mode TilingFormat,
202 dml_uint_t BytePerPixelY,
203 dml_uint_t BytePerPixelC,
204 dml_float_t BytePerPixelDETY,
205 dml_float_t BytePerPixelDETC,
206 enum dml_rotation_angle SourceScan,
207 // Output
208 dml_uint_t *MaxUncompressedBlockLuma,
209 dml_uint_t *MaxUncompressedBlockChroma,
210 dml_uint_t *MaxCompressedBlockLuma,
211 dml_uint_t *MaxCompressedBlockChroma,
212 dml_uint_t *IndependentBlockLuma,
213 dml_uint_t *IndependentBlockChroma);
214
215 static dml_uint_t CalculatePrefetchSourceLines(
216 dml_float_t VRatio,
217 dml_uint_t VTaps,
218 dml_bool_t Interlace,
219 dml_bool_t ProgressiveToInterlaceUnitInOPP,
220 dml_uint_t SwathHeight,
221 enum dml_rotation_angle SourceScan,
222 dml_bool_t ViewportStationary,
223 dml_uint_t SwathWidth,
224 dml_uint_t ViewportHeight,
225 dml_uint_t ViewportXStart,
226 dml_uint_t ViewportYStart,
227
228 // Output
229 dml_uint_t *VInitPreFill,
230 dml_uint_t *MaxNumSwath);
231
232 static dml_uint_t CalculateVMAndRowBytes(
233 dml_bool_t ViewportStationary,
234 dml_bool_t DCCEnable,
235 dml_uint_t NumberOfDPPs,
236 dml_uint_t BlockHeight256Bytes,
237 dml_uint_t BlockWidth256Bytes,
238 enum dml_source_format_class SourcePixelFormat,
239 dml_uint_t SurfaceTiling,
240 dml_uint_t BytePerPixel,
241 enum dml_rotation_angle SourceScan,
242 dml_uint_t SwathWidth,
243 dml_uint_t ViewportHeight,
244 dml_uint_t ViewportXStart,
245 dml_uint_t ViewportYStart,
246 dml_bool_t GPUVMEnable,
247 dml_uint_t GPUVMMaxPageTableLevels,
248 dml_uint_t GPUVMMinPageSizeKBytes,
249 dml_uint_t PTEBufferSizeInRequests,
250 dml_uint_t Pitch,
251 dml_uint_t DCCMetaPitch,
252 dml_uint_t MacroTileWidth,
253 dml_uint_t MacroTileHeight,
254
255 // Output
256 dml_uint_t *MetaRowByte,
257 dml_uint_t *PixelPTEBytesPerRow,
258 dml_uint_t *PixelPTEBytesPerRowStorage, // for PTE buffer size check
259 dml_uint_t *dpte_row_width_ub,
260 dml_uint_t *dpte_row_height,
261 dml_uint_t *dpte_row_height_linear,
262 dml_uint_t *PixelPTEBytesPerRow_one_row_per_frame,
263 dml_uint_t *dpte_row_width_ub_one_row_per_frame,
264 dml_uint_t *dpte_row_height_one_row_per_frame,
265 dml_uint_t *MetaRequestWidth,
266 dml_uint_t *MetaRequestHeight,
267 dml_uint_t *meta_row_width,
268 dml_uint_t *meta_row_height,
269 dml_uint_t *PixelPTEReqWidth,
270 dml_uint_t *PixelPTEReqHeight,
271 dml_uint_t *PTERequestSize,
272 dml_uint_t *DPDE0BytesFrame,
273 dml_uint_t *MetaPTEBytesFrame);
274
275 static dml_float_t CalculateTWait(
276 dml_uint_t PrefetchMode,
277 enum dml_use_mall_for_pstate_change_mode UseMALLForPStateChange,
278 dml_bool_t SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
279 dml_bool_t DRRDisplay,
280 dml_float_t DRAMClockChangeLatency,
281 dml_float_t FCLKChangeLatency,
282 dml_float_t UrgentLatency,
283 dml_float_t SREnterPlusExitTime);
284
285 static void CalculatePrefetchMode(
286 enum dml_prefetch_modes AllowForPStateChangeOrStutterInVBlank,
287 dml_uint_t *MinPrefetchMode,
288 dml_uint_t *MaxPrefetchMode);
289
290 static void CalculateRowBandwidth(
291 dml_bool_t GPUVMEnable,
292 enum dml_source_format_class SourcePixelFormat,
293 dml_float_t VRatio,
294 dml_float_t VRatioChroma,
295 dml_bool_t DCCEnable,
296 dml_float_t LineTime,
297 dml_uint_t MetaRowByteLuma,
298 dml_uint_t MetaRowByteChroma,
299 dml_uint_t meta_row_height_luma,
300 dml_uint_t meta_row_height_chroma,
301 dml_uint_t PixelPTEBytesPerRowLuma,
302 dml_uint_t PixelPTEBytesPerRowChroma,
303 dml_uint_t dpte_row_height_luma,
304 dml_uint_t dpte_row_height_chroma,
305 // Output
306 dml_float_t *meta_row_bw,
307 dml_float_t *dpte_row_bw);
308
309 static void CalculateFlipSchedule(
310 dml_float_t HostVMInefficiencyFactor,
311 dml_float_t UrgentExtraLatency,
312 dml_float_t UrgentLatency,
313 dml_uint_t GPUVMMaxPageTableLevels,
314 dml_bool_t HostVMEnable,
315 dml_uint_t HostVMMaxNonCachedPageTableLevels,
316 dml_bool_t GPUVMEnable,
317 dml_uint_t HostVMMinPageSize,
318 dml_float_t PDEAndMetaPTEBytesPerFrame,
319 dml_float_t MetaRowBytes,
320 dml_float_t DPTEBytesPerRow,
321 dml_float_t BandwidthAvailableForImmediateFlip,
322 dml_uint_t TotImmediateFlipBytes,
323 enum dml_source_format_class SourcePixelFormat,
324 dml_float_t LineTime,
325 dml_float_t VRatio,
326 dml_float_t VRatioChroma,
327 dml_float_t Tno_bw,
328 dml_bool_t DCCEnable,
329 dml_uint_t dpte_row_height,
330 dml_uint_t meta_row_height,
331 dml_uint_t dpte_row_height_chroma,
332 dml_uint_t meta_row_height_chroma,
333 dml_bool_t use_one_row_for_frame_flip,
334
335 // Output
336 dml_float_t *DestinationLinesToRequestVMInImmediateFlip,
337 dml_float_t *DestinationLinesToRequestRowInImmediateFlip,
338 dml_float_t *final_flip_bw,
339 dml_bool_t *ImmediateFlipSupportedForPipe);
340
341 static dml_float_t CalculateWriteBackDelay(
342 enum dml_source_format_class WritebackPixelFormat,
343 dml_float_t WritebackHRatio,
344 dml_float_t WritebackVRatio,
345 dml_uint_t WritebackVTaps,
346 dml_uint_t WritebackDestinationWidth,
347 dml_uint_t WritebackDestinationHeight,
348 dml_uint_t WritebackSourceHeight,
349 dml_uint_t HTotal);
350
351 static void CalculateVUpdateAndDynamicMetadataParameters(
352 dml_uint_t MaxInterDCNTileRepeaters,
353 dml_float_t Dppclk,
354 dml_float_t DISPCLK,
355 dml_float_t DCFClkDeepSleep,
356 dml_float_t PixelClock,
357 dml_uint_t HTotal,
358 dml_uint_t VBlank,
359 dml_uint_t DynamicMetadataTransmittedBytes,
360 dml_uint_t DynamicMetadataLinesBeforeActiveRequired,
361 dml_uint_t InterlaceEnable,
362 dml_bool_t ProgressiveToInterlaceUnitInOPP,
363 dml_float_t *TSetup,
364 dml_float_t *Tdmbf,
365 dml_float_t *Tdmec,
366 dml_float_t *Tdmsks,
367 dml_uint_t *VUpdateOffsetPix,
368 dml_uint_t *VUpdateWidthPix,
369 dml_uint_t *VReadyOffsetPix);
370
371 static void PixelClockAdjustmentForProgressiveToInterlaceUnit(struct dml_display_cfg_st *display_cfg, dml_bool_t ptoi_supported);
372
373 static dml_float_t TruncToValidBPP(
374 dml_float_t LinkBitRate,
375 dml_uint_t Lanes,
376 dml_uint_t HTotal,
377 dml_uint_t HActive,
378 dml_float_t PixelClock,
379 dml_float_t DesiredBPP,
380 dml_bool_t DSCEnable,
381 enum dml_output_encoder_class Output,
382 enum dml_output_format_class Format,
383 dml_uint_t DSCInputBitPerComponent,
384 dml_uint_t DSCSlices,
385 dml_uint_t AudioRate,
386 dml_uint_t AudioLayout,
387 enum dml_odm_mode ODMModeNoDSC,
388 enum dml_odm_mode ODMModeDSC,
389 // Output
390 dml_uint_t *RequiredSlotsSingle);
391
392 static void CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(
393 struct display_mode_lib_scratch_st *s,
394 struct CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params_st *p);
395
396 static void CalculateDCFCLKDeepSleep(
397 dml_uint_t NumberOfActiveSurfaces,
398 dml_uint_t BytePerPixelY[],
399 dml_uint_t BytePerPixelC[],
400 dml_float_t VRatio[],
401 dml_float_t VRatioChroma[],
402 dml_uint_t SwathWidthY[],
403 dml_uint_t SwathWidthC[],
404 dml_uint_t DPPPerSurface[],
405 dml_float_t HRatio[],
406 dml_float_t HRatioChroma[],
407 dml_float_t PixelClock[],
408 dml_float_t PSCL_THROUGHPUT[],
409 dml_float_t PSCL_THROUGHPUT_CHROMA[],
410 dml_float_t Dppclk[],
411 dml_float_t ReadBandwidthLuma[],
412 dml_float_t ReadBandwidthChroma[],
413 dml_uint_t ReturnBusWidth,
414
415 // Output
416 dml_float_t *DCFCLKDeepSleep);
417
418 static void CalculateUrgentBurstFactor(
419 enum dml_use_mall_for_pstate_change_mode UseMALLForPStateChange,
420 dml_uint_t swath_width_luma_ub,
421 dml_uint_t swath_width_chroma_ub,
422 dml_uint_t SwathHeightY,
423 dml_uint_t SwathHeightC,
424 dml_float_t LineTime,
425 dml_float_t UrgentLatency,
426 dml_float_t CursorBufferSize,
427 dml_uint_t CursorWidth,
428 dml_uint_t CursorBPP,
429 dml_float_t VRatio,
430 dml_float_t VRatioC,
431 dml_float_t BytePerPixelInDETY,
432 dml_float_t BytePerPixelInDETC,
433 dml_uint_t DETBufferSizeY,
434 dml_uint_t DETBufferSizeC,
435 // Output
436 dml_float_t *UrgentBurstFactorCursor,
437 dml_float_t *UrgentBurstFactorLuma,
438 dml_float_t *UrgentBurstFactorChroma,
439 dml_bool_t *NotEnoughUrgentLatencyHiding);
440
441 static dml_float_t RequiredDTBCLK(
442 dml_bool_t DSCEnable,
443 dml_float_t PixelClock,
444 enum dml_output_format_class OutputFormat,
445 dml_float_t OutputBpp,
446 dml_uint_t DSCSlices,
447 dml_uint_t HTotal,
448 dml_uint_t HActive,
449 dml_uint_t AudioRate,
450 dml_uint_t AudioLayoutSingle);
451
452 static void UseMinimumDCFCLK(
453 struct display_mode_lib_scratch_st *scratch,
454 struct UseMinimumDCFCLK_params_st *p);
455
456 static void CalculatePixelDeliveryTimes(
457 dml_uint_t NumberOfActiveSurfaces,
458 dml_float_t VRatio[],
459 dml_float_t VRatioChroma[],
460 dml_float_t VRatioPrefetchY[],
461 dml_float_t VRatioPrefetchC[],
462 dml_uint_t swath_width_luma_ub[],
463 dml_uint_t swath_width_chroma_ub[],
464 dml_uint_t DPPPerSurface[],
465 dml_float_t HRatio[],
466 dml_float_t HRatioChroma[],
467 dml_float_t PixelClock[],
468 dml_float_t PSCL_THROUGHPUT[],
469 dml_float_t PSCL_THROUGHPUT_CHROMA[],
470 dml_float_t Dppclk[],
471 dml_uint_t BytePerPixelC[],
472 enum dml_rotation_angle SourceScan[],
473 dml_uint_t NumberOfCursors[],
474 dml_uint_t CursorWidth[],
475 dml_uint_t CursorBPP[],
476 dml_uint_t BlockWidth256BytesY[],
477 dml_uint_t BlockHeight256BytesY[],
478 dml_uint_t BlockWidth256BytesC[],
479 dml_uint_t BlockHeight256BytesC[],
480
481 // Output
482 dml_float_t DisplayPipeLineDeliveryTimeLuma[],
483 dml_float_t DisplayPipeLineDeliveryTimeChroma[],
484 dml_float_t DisplayPipeLineDeliveryTimeLumaPrefetch[],
485 dml_float_t DisplayPipeLineDeliveryTimeChromaPrefetch[],
486 dml_float_t DisplayPipeRequestDeliveryTimeLuma[],
487 dml_float_t DisplayPipeRequestDeliveryTimeChroma[],
488 dml_float_t DisplayPipeRequestDeliveryTimeLumaPrefetch[],
489 dml_float_t DisplayPipeRequestDeliveryTimeChromaPrefetch[],
490 dml_float_t CursorRequestDeliveryTime[],
491 dml_float_t CursorRequestDeliveryTimePrefetch[]);
492
493 static void CalculateMetaAndPTETimes(
494 dml_bool_t use_one_row_for_frame[],
495 dml_uint_t NumberOfActiveSurfaces,
496 dml_bool_t GPUVMEnable,
497 dml_uint_t MetaChunkSize,
498 dml_uint_t MinMetaChunkSizeBytes,
499 dml_uint_t HTotal[],
500 dml_float_t VRatio[],
501 dml_float_t VRatioChroma[],
502 dml_float_t DestinationLinesToRequestRowInVBlank[],
503 dml_float_t DestinationLinesToRequestRowInImmediateFlip[],
504 dml_bool_t DCCEnable[],
505 dml_float_t PixelClock[],
506 dml_uint_t BytePerPixelY[],
507 dml_uint_t BytePerPixelC[],
508 enum dml_rotation_angle SourceScan[],
509 dml_uint_t dpte_row_height[],
510 dml_uint_t dpte_row_height_chroma[],
511 dml_uint_t meta_row_width[],
512 dml_uint_t meta_row_width_chroma[],
513 dml_uint_t meta_row_height[],
514 dml_uint_t meta_row_height_chroma[],
515 dml_uint_t meta_req_width[],
516 dml_uint_t meta_req_width_chroma[],
517 dml_uint_t meta_req_height[],
518 dml_uint_t meta_req_height_chroma[],
519 dml_uint_t dpte_group_bytes[],
520 dml_uint_t PTERequestSizeY[],
521 dml_uint_t PTERequestSizeC[],
522 dml_uint_t PixelPTEReqWidthY[],
523 dml_uint_t PixelPTEReqHeightY[],
524 dml_uint_t PixelPTEReqWidthC[],
525 dml_uint_t PixelPTEReqHeightC[],
526 dml_uint_t dpte_row_width_luma_ub[],
527 dml_uint_t dpte_row_width_chroma_ub[],
528
529 // Output
530 dml_float_t DST_Y_PER_PTE_ROW_NOM_L[],
531 dml_float_t DST_Y_PER_PTE_ROW_NOM_C[],
532 dml_float_t DST_Y_PER_META_ROW_NOM_L[],
533 dml_float_t DST_Y_PER_META_ROW_NOM_C[],
534 dml_float_t TimePerMetaChunkNominal[],
535 dml_float_t TimePerChromaMetaChunkNominal[],
536 dml_float_t TimePerMetaChunkVBlank[],
537 dml_float_t TimePerChromaMetaChunkVBlank[],
538 dml_float_t TimePerMetaChunkFlip[],
539 dml_float_t TimePerChromaMetaChunkFlip[],
540 dml_float_t time_per_pte_group_nom_luma[],
541 dml_float_t time_per_pte_group_vblank_luma[],
542 dml_float_t time_per_pte_group_flip_luma[],
543 dml_float_t time_per_pte_group_nom_chroma[],
544 dml_float_t time_per_pte_group_vblank_chroma[],
545 dml_float_t time_per_pte_group_flip_chroma[]);
546
547 static void CalculateVMGroupAndRequestTimes(
548 dml_uint_t NumberOfActiveSurfaces,
549 dml_bool_t GPUVMEnable,
550 dml_uint_t GPUVMMaxPageTableLevels,
551 dml_uint_t HTotal[],
552 dml_uint_t BytePerPixelC[],
553 dml_float_t DestinationLinesToRequestVMInVBlank[],
554 dml_float_t DestinationLinesToRequestVMInImmediateFlip[],
555 dml_bool_t DCCEnable[],
556 dml_float_t PixelClock[],
557 dml_uint_t dpte_row_width_luma_ub[],
558 dml_uint_t dpte_row_width_chroma_ub[],
559 dml_uint_t vm_group_bytes[],
560 dml_uint_t dpde0_bytes_per_frame_ub_l[],
561 dml_uint_t dpde0_bytes_per_frame_ub_c[],
562 dml_uint_t meta_pte_bytes_per_frame_ub_l[],
563 dml_uint_t meta_pte_bytes_per_frame_ub_c[],
564
565 // Output
566 dml_float_t TimePerVMGroupVBlank[],
567 dml_float_t TimePerVMGroupFlip[],
568 dml_float_t TimePerVMRequestVBlank[],
569 dml_float_t TimePerVMRequestFlip[]);
570
571 static void CalculateStutterEfficiency(
572 struct display_mode_lib_scratch_st *scratch,
573 struct CalculateStutterEfficiency_params_st *p);
574
575 static void CalculateSwathAndDETConfiguration(
576 struct display_mode_lib_scratch_st *scratch,
577 struct CalculateSwathAndDETConfiguration_params_st *p);
578
579 static void CalculateSwathWidth(
580 dml_bool_t ForceSingleDPP,
581 dml_uint_t NumberOfActiveSurfaces,
582 enum dml_source_format_class SourcePixelFormat[],
583 enum dml_rotation_angle SourceScan[],
584 dml_bool_t ViewportStationary[],
585 dml_uint_t ViewportWidth[],
586 dml_uint_t ViewportHeight[],
587 dml_uint_t ViewportXStart[],
588 dml_uint_t ViewportYStart[],
589 dml_uint_t ViewportXStartC[],
590 dml_uint_t ViewportYStartC[],
591 dml_uint_t SurfaceWidthY[],
592 dml_uint_t SurfaceWidthC[],
593 dml_uint_t SurfaceHeightY[],
594 dml_uint_t SurfaceHeightC[],
595 enum dml_odm_mode ODMMode[],
596 dml_uint_t BytePerPixY[],
597 dml_uint_t BytePerPixC[],
598 dml_uint_t Read256BytesBlockHeightY[],
599 dml_uint_t Read256BytesBlockHeightC[],
600 dml_uint_t Read256BytesBlockWidthY[],
601 dml_uint_t Read256BytesBlockWidthC[],
602 dml_uint_t BlendingAndTiming[],
603 dml_uint_t HActive[],
604 dml_float_t HRatio[],
605 dml_uint_t DPPPerSurface[],
606
607 // Output
608 dml_uint_t SwathWidthSingleDPPY[],
609 dml_uint_t SwathWidthSingleDPPC[],
610 dml_uint_t SwathWidthY[],
611 dml_uint_t SwathWidthC[],
612 dml_uint_t MaximumSwathHeightY[],
613 dml_uint_t MaximumSwathHeightC[],
614 dml_uint_t swath_width_luma_ub[],
615 dml_uint_t swath_width_chroma_ub[]);
616
617 static dml_float_t CalculateExtraLatency(
618 dml_uint_t RoundTripPingLatencyCycles,
619 dml_uint_t ReorderingBytes,
620 dml_float_t DCFCLK,
621 dml_uint_t TotalNumberOfActiveDPP,
622 dml_uint_t PixelChunkSizeInKByte,
623 dml_uint_t TotalNumberOfDCCActiveDPP,
624 dml_uint_t MetaChunkSize,
625 dml_float_t ReturnBW,
626 dml_bool_t GPUVMEnable,
627 dml_bool_t HostVMEnable,
628 dml_uint_t NumberOfActiveSurfaces,
629 dml_uint_t NumberOfDPP[],
630 dml_uint_t dpte_group_bytes[],
631 dml_float_t HostVMInefficiencyFactor,
632 dml_uint_t HostVMMinPageSize,
633 dml_uint_t HostVMMaxNonCachedPageTableLevels);
634
635 static dml_uint_t CalculateExtraLatencyBytes(
636 dml_uint_t ReorderingBytes,
637 dml_uint_t TotalNumberOfActiveDPP,
638 dml_uint_t PixelChunkSizeInKByte,
639 dml_uint_t TotalNumberOfDCCActiveDPP,
640 dml_uint_t MetaChunkSize,
641 dml_bool_t GPUVMEnable,
642 dml_bool_t HostVMEnable,
643 dml_uint_t NumberOfActiveSurfaces,
644 dml_uint_t NumberOfDPP[],
645 dml_uint_t dpte_group_bytes[],
646 dml_float_t HostVMInefficiencyFactor,
647 dml_uint_t HostVMMinPageSize,
648 dml_uint_t HostVMMaxNonCachedPageTableLevels);
649
650 static dml_float_t CalculateUrgentLatency(
651 dml_float_t UrgentLatencyPixelDataOnly,
652 dml_float_t UrgentLatencyPixelMixedWithVMData,
653 dml_float_t UrgentLatencyVMDataOnly,
654 dml_bool_t DoUrgentLatencyAdjustment,
655 dml_float_t UrgentLatencyAdjustmentFabricClockComponent,
656 dml_float_t UrgentLatencyAdjustmentFabricClockReference,
657 dml_float_t FabricClockSingle);
658
659 static dml_bool_t UnboundedRequest(
660 enum dml_unbounded_requesting_policy UseUnboundedRequestingFinal,
661 dml_uint_t TotalNumberOfActiveDPP,
662 dml_bool_t NoChromaOrLinear,
663 enum dml_output_encoder_class Output);
664
665 static void CalculateSurfaceSizeInMall(
666 dml_uint_t NumberOfActiveSurfaces,
667 dml_uint_t MALLAllocatedForDCN,
668 enum dml_use_mall_for_static_screen_mode UseMALLForStaticScreen[],
669 dml_bool_t DCCEnable[],
670 dml_bool_t ViewportStationary[],
671 dml_uint_t ViewportXStartY[],
672 dml_uint_t ViewportYStartY[],
673 dml_uint_t ViewportXStartC[],
674 dml_uint_t ViewportYStartC[],
675 dml_uint_t ViewportWidthY[],
676 dml_uint_t ViewportHeightY[],
677 dml_uint_t BytesPerPixelY[],
678 dml_uint_t ViewportWidthC[],
679 dml_uint_t ViewportHeightC[],
680 dml_uint_t BytesPerPixelC[],
681 dml_uint_t SurfaceWidthY[],
682 dml_uint_t SurfaceWidthC[],
683 dml_uint_t SurfaceHeightY[],
684 dml_uint_t SurfaceHeightC[],
685 dml_uint_t Read256BytesBlockWidthY[],
686 dml_uint_t Read256BytesBlockWidthC[],
687 dml_uint_t Read256BytesBlockHeightY[],
688 dml_uint_t Read256BytesBlockHeightC[],
689 dml_uint_t ReadBlockWidthY[],
690 dml_uint_t ReadBlockWidthC[],
691 dml_uint_t ReadBlockHeightY[],
692 dml_uint_t ReadBlockHeightC[],
693
694 // Output
695 dml_uint_t SurfaceSizeInMALL[],
696 dml_bool_t *ExceededMALLSize);
697
698 static void CalculateDETBufferSize(
699 dml_uint_t DETSizeOverride[],
700 enum dml_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
701 dml_bool_t ForceSingleDPP,
702 dml_uint_t NumberOfActiveSurfaces,
703 dml_bool_t UnboundedRequestEnabled,
704 dml_uint_t nomDETInKByte,
705 dml_uint_t MaxTotalDETInKByte,
706 dml_uint_t ConfigReturnBufferSizeInKByte,
707 dml_uint_t MinCompressedBufferSizeInKByte,
708 dml_uint_t ConfigReturnBufferSegmentSizeInkByte,
709 dml_uint_t CompressedBufferSegmentSizeInkByteFinal,
710 enum dml_source_format_class SourcePixelFormat[],
711 dml_float_t ReadBandwidthLuma[],
712 dml_float_t ReadBandwidthChroma[],
713 dml_uint_t RotesY[],
714 dml_uint_t RoundedUpMaxSwathSizeBytesC[],
715 dml_uint_t DPPPerSurface[],
716 // Output
717 dml_uint_t DETBufferSizeInKByte[],
718 dml_uint_t *CompressedBufferSizeInkByte);
719
720 static void CalculateMaxDETAndMinCompressedBufferSize(
721 dml_uint_t ConfigReturnBufferSizeInKByte,
722 dml_uint_t ConfigReturnBufferSegmentSizeInKByte,
723 dml_uint_t ROBBufferSizeInKByte,
724 dml_uint_t MaxNumDPP,
725 dml_bool_t nomDETInKByteOverrideEnable,
726 dml_uint_t nomDETInKByteOverrideValue,
727
728 // Output
729 dml_uint_t *MaxTotalDETInKByte,
730 dml_uint_t *nomDETInKByte,
731 dml_uint_t *MinCompressedBufferSizeInKByte);
732
733 static dml_uint_t DSCDelayRequirement(
734 dml_bool_t DSCEnabled,
735 enum dml_odm_mode ODMMode,
736 dml_uint_t DSCInputBitPerComponent,
737 dml_float_t OutputBpp,
738 dml_uint_t HActive,
739 dml_uint_t HTotal,
740 dml_uint_t NumberOfDSCSlices,
741 enum dml_output_format_class OutputFormat,
742 enum dml_output_encoder_class Output,
743 dml_float_t PixelClock,
744 dml_float_t PixelClockBackEnd);
745
746 static dml_bool_t CalculateVActiveBandwithSupport(
747 dml_uint_t NumberOfActiveSurfaces,
748 dml_float_t ReturnBW,
749 dml_bool_t NotUrgentLatencyHiding[],
750 dml_float_t ReadBandwidthLuma[],
751 dml_float_t ReadBandwidthChroma[],
752 dml_float_t cursor_bw[],
753 dml_float_t meta_row_bandwidth[],
754 dml_float_t dpte_row_bandwidth[],
755 dml_uint_t NumberOfDPP[],
756 dml_float_t UrgentBurstFactorLuma[],
757 dml_float_t UrgentBurstFactorChroma[],
758 dml_float_t UrgentBurstFactorCursor[]);
759
760 static void CalculatePrefetchBandwithSupport(
761 dml_uint_t NumberOfActiveSurfaces,
762 dml_float_t ReturnBW,
763 enum dml_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
764 dml_bool_t NotUrgentLatencyHiding[],
765 dml_float_t ReadBandwidthLuma[],
766 dml_float_t ReadBandwidthChroma[],
767 dml_float_t PrefetchBandwidthLuma[],
768 dml_float_t PrefetchBandwidthChroma[],
769 dml_float_t cursor_bw[],
770 dml_float_t meta_row_bandwidth[],
771 dml_float_t dpte_row_bandwidth[],
772 dml_float_t cursor_bw_pre[],
773 dml_float_t prefetch_vmrow_bw[],
774 dml_uint_t NumberOfDPP[],
775 dml_float_t UrgentBurstFactorLuma[],
776 dml_float_t UrgentBurstFactorChroma[],
777 dml_float_t UrgentBurstFactorCursor[],
778 dml_float_t UrgentBurstFactorLumaPre[],
779 dml_float_t UrgentBurstFactorChromaPre[],
780 dml_float_t UrgentBurstFactorCursorPre[],
781
782 // Output
783 dml_float_t *PrefetchBandwidth,
784 dml_float_t *PrefetchBandwidthNotIncludingMALLPrefetch,
785 dml_float_t *FractionOfUrgentBandwidth,
786 dml_bool_t *PrefetchBandwidthSupport);
787
788 static dml_float_t CalculateBandwidthAvailableForImmediateFlip(
789 dml_uint_t NumberOfActiveSurfaces,
790 dml_float_t ReturnBW,
791 dml_float_t ReadBandwidthLuma[],
792 dml_float_t ReadBandwidthChroma[],
793 dml_float_t PrefetchBandwidthLuma[],
794 dml_float_t PrefetchBandwidthChroma[],
795 dml_float_t cursor_bw[],
796 dml_float_t cursor_bw_pre[],
797 dml_uint_t NumberOfDPP[],
798 dml_float_t UrgentBurstFactorLuma[],
799 dml_float_t UrgentBurstFactorChroma[],
800 dml_float_t UrgentBurstFactorCursor[],
801 dml_float_t UrgentBurstFactorLumaPre[],
802 dml_float_t UrgentBurstFactorChromaPre[],
803 dml_float_t UrgentBurstFactorCursorPre[]);
804
805 static void CalculateImmediateFlipBandwithSupport(
806 dml_uint_t NumberOfActiveSurfaces,
807 dml_float_t ReturnBW,
808 enum dml_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
809 enum dml_immediate_flip_requirement ImmediateFlipRequirement[],
810 dml_float_t final_flip_bw[],
811 dml_float_t ReadBandwidthLuma[],
812 dml_float_t ReadBandwidthChroma[],
813 dml_float_t PrefetchBandwidthLuma[],
814 dml_float_t PrefetchBandwidthChroma[],
815 dml_float_t cursor_bw[],
816 dml_float_t meta_row_bandwidth[],
817 dml_float_t dpte_row_bandwidth[],
818 dml_float_t cursor_bw_pre[],
819 dml_float_t prefetch_vmrow_bw[],
820 dml_uint_t NumberOfDPP[],
821 dml_float_t UrgentBurstFactorLuma[],
822 dml_float_t UrgentBurstFactorChroma[],
823 dml_float_t UrgentBurstFactorCursor[],
824 dml_float_t UrgentBurstFactorLumaPre[],
825 dml_float_t UrgentBurstFactorChromaPre[],
826 dml_float_t UrgentBurstFactorCursorPre[],
827
828 // Output
829 dml_float_t *TotalBandwidth,
830 dml_float_t *TotalBandwidthNotIncludingMALLPrefetch,
831 dml_float_t *FractionOfUrgentBandwidth,
832 dml_bool_t *ImmediateFlipBandwidthSupport);
833
834 // ---------------------------
835 // Declaration Ends
836 // ---------------------------
837
dscceComputeDelay(dml_uint_t bpc,dml_float_t BPP,dml_uint_t sliceWidth,dml_uint_t numSlices,enum dml_output_format_class pixelFormat,enum dml_output_encoder_class Output)838 static dml_uint_t dscceComputeDelay(
839 dml_uint_t bpc,
840 dml_float_t BPP,
841 dml_uint_t sliceWidth,
842 dml_uint_t numSlices,
843 enum dml_output_format_class pixelFormat,
844 enum dml_output_encoder_class Output)
845 {
846 // valid bpc = source bits per component in the set of {8, 10, 12}
847 // valid bpp = increments of 1/16 of a bit
848 // min = 6/7/8 in N420/N422/444, respectively
849 // max = such that compression is 1:1
850 //valid sliceWidth = number of pixels per slice line, must be less than or equal to 5184/numSlices (or 4096/numSlices in 420 mode)
851 //valid numSlices = number of slices in the horiziontal direction per DSC engine in the set of {1, 2, 3, 4}
852 //valid pixelFormat = pixel/color format in the set of {:N444_RGB, :S422, :N422, :N420}
853
854 // fixed value
855 dml_uint_t rcModelSize = 8192;
856
857 // N422/N420 operate at 2 pixels per clock
858 dml_uint_t pixelsPerClock, lstall, D, initalXmitDelay, w, s, ix, wx, p, l0, a, ax, L,
859 Delay, pixels;
860
861 if (pixelFormat == dml_420)
862 pixelsPerClock = 2;
863 // #all other modes operate at 1 pixel per clock
864 else if (pixelFormat == dml_444)
865 pixelsPerClock = 1;
866 else if (pixelFormat == dml_n422)
867 pixelsPerClock = 2;
868 else
869 pixelsPerClock = 1;
870
871 //initial transmit delay as per PPS
872 initalXmitDelay = (dml_uint_t)(dml_round(rcModelSize / 2.0 / BPP / pixelsPerClock, 1));
873
874 //compute ssm delay
875 if (bpc == 8)
876 D = 81;
877 else if (bpc == 10)
878 D = 89;
879 else
880 D = 113;
881
882 //divide by pixel per cycle to compute slice width as seen by DSC
883 w = sliceWidth / pixelsPerClock;
884
885 //422 mode has an additional cycle of delay
886 if (pixelFormat == dml_420 || pixelFormat == dml_444 || pixelFormat == dml_n422)
887 s = 0;
888 else
889 s = 1;
890
891 //main calculation for the dscce
892 ix = initalXmitDelay + 45;
893 wx = (w + 2) / 3;
894 p = 3 * wx - w;
895 l0 = ix / w;
896 a = ix + p * l0;
897 ax = (a + 2) / 3 + D + 6 + 1;
898 L = (ax + wx - 1) / wx;
899 if ((ix % w) == 0 && p != 0)
900 lstall = 1;
901 else
902 lstall = 0;
903 Delay = L * wx * (numSlices - 1) + ax + s + lstall + 22;
904
905 //dsc processes 3 pixel containers per cycle and a container can contain 1 or 2 pixels
906 pixels = Delay * 3 * pixelsPerClock;
907
908 #ifdef __DML_VBA_DEBUG__
909 dml_print("DML::%s: bpc: %u\n", __func__, bpc);
910 dml_print("DML::%s: BPP: %f\n", __func__, BPP);
911 dml_print("DML::%s: sliceWidth: %u\n", __func__, sliceWidth);
912 dml_print("DML::%s: numSlices: %u\n", __func__, numSlices);
913 dml_print("DML::%s: pixelFormat: %u\n", __func__, pixelFormat);
914 dml_print("DML::%s: Output: %u\n", __func__, Output);
915 dml_print("DML::%s: pixels: %u\n", __func__, pixels);
916 #endif
917 return pixels;
918 }
919
dscComputeDelay(enum dml_output_format_class pixelFormat,enum dml_output_encoder_class Output)920 static dml_uint_t dscComputeDelay(enum dml_output_format_class pixelFormat, enum dml_output_encoder_class Output)
921 {
922 dml_uint_t Delay = 0;
923
924 if (pixelFormat == dml_420) {
925 // sfr
926 Delay = Delay + 2;
927 // dsccif
928 Delay = Delay + 0;
929 // dscc - input deserializer
930 Delay = Delay + 3;
931 // dscc gets pixels every other cycle
932 Delay = Delay + 2;
933 // dscc - input cdc fifo
934 Delay = Delay + 12;
935 // dscc gets pixels every other cycle
936 Delay = Delay + 13;
937 // dscc - cdc uncertainty
938 Delay = Delay + 2;
939 // dscc - output cdc fifo
940 Delay = Delay + 7;
941 // dscc gets pixels every other cycle
942 Delay = Delay + 3;
943 // dscc - cdc uncertainty
944 Delay = Delay + 2;
945 // dscc - output serializer
946 Delay = Delay + 1;
947 // sft
948 Delay = Delay + 1;
949 } else if (pixelFormat == dml_n422) {
950 // sfr
951 Delay = Delay + 2;
952 // dsccif
953 Delay = Delay + 1;
954 // dscc - input deserializer
955 Delay = Delay + 5;
956 // dscc - input cdc fifo
957 Delay = Delay + 25;
958 // dscc - cdc uncertainty
959 Delay = Delay + 2;
960 // dscc - output cdc fifo
961 Delay = Delay + 10;
962 // dscc - cdc uncertainty
963 Delay = Delay + 2;
964 // dscc - output serializer
965 Delay = Delay + 1;
966 // sft
967 Delay = Delay + 1;
968 } else {
969 // sfr
970 Delay = Delay + 2;
971 // dsccif
972 Delay = Delay + 0;
973 // dscc - input deserializer
974 Delay = Delay + 3;
975 // dscc - input cdc fifo
976 Delay = Delay + 12;
977 // dscc - cdc uncertainty
978 Delay = Delay + 2;
979 // dscc - output cdc fifo
980 Delay = Delay + 7;
981 // dscc - output serializer
982 Delay = Delay + 1;
983 // dscc - cdc uncertainty
984 Delay = Delay + 2;
985 // sft
986 Delay = Delay + 1;
987 }
988 #ifdef __DML_VBA_DEBUG__
989 dml_print("DML::%s: pixelFormat = %u\n", __func__, pixelFormat);
990 dml_print("DML::%s: Delay = %u\n", __func__, Delay);
991 #endif
992
993 return Delay;
994 }
995
CalculatePrefetchSchedule(struct display_mode_lib_scratch_st * scratch,struct CalculatePrefetchSchedule_params_st * p)996 static dml_bool_t CalculatePrefetchSchedule(struct display_mode_lib_scratch_st *scratch,
997 struct CalculatePrefetchSchedule_params_st *p)
998 {
999 struct CalculatePrefetchSchedule_locals_st *s = &scratch->CalculatePrefetchSchedule_locals;
1000
1001 s->MyError = false;
1002 s->DPPCycles = 0;
1003 s->DISPCLKCycles = 0;
1004 s->DSTTotalPixelsAfterScaler = 0.0;
1005 s->LineTime = 0.0;
1006 s->dst_y_prefetch_equ = 0.0;
1007 s->prefetch_bw_oto = 0.0;
1008 s->Tvm_oto = 0.0;
1009 s->Tr0_oto = 0.0;
1010 s->Tvm_oto_lines = 0.0;
1011 s->Tr0_oto_lines = 0.0;
1012 s->dst_y_prefetch_oto = 0.0;
1013 s->TimeForFetchingMetaPTE = 0.0;
1014 s->TimeForFetchingRowInVBlank = 0.0;
1015 s->LinesToRequestPrefetchPixelData = 0.0;
1016 s->HostVMDynamicLevelsTrips = 0;
1017 s->trip_to_mem = 0.0;
1018 s->Tvm_trips = 0.0;
1019 s->Tr0_trips = 0.0;
1020 s->Tvm_trips_rounded = 0.0;
1021 s->Tr0_trips_rounded = 0.0;
1022 s->max_Tsw = 0.0;
1023 s->Lsw_oto = 0.0;
1024 s->Tpre_rounded = 0.0;
1025 s->prefetch_bw_equ = 0.0;
1026 s->Tvm_equ = 0.0;
1027 s->Tr0_equ = 0.0;
1028 s->Tdmbf = 0.0;
1029 s->Tdmec = 0.0;
1030 s->Tdmsks = 0.0;
1031 s->prefetch_sw_bytes = 0.0;
1032 s->prefetch_bw_pr = 0.0;
1033 s->bytes_pp = 0.0;
1034 s->dep_bytes = 0.0;
1035 s->min_Lsw_oto = 0.0;
1036 s->Tsw_est1 = 0.0;
1037 s->Tsw_est3 = 0.0;
1038
1039 if (p->GPUVMEnable == true && p->HostVMEnable == true) {
1040 s->HostVMDynamicLevelsTrips = p->HostVMMaxNonCachedPageTableLevels;
1041 } else {
1042 s->HostVMDynamicLevelsTrips = 0;
1043 }
1044 #ifdef __DML_VBA_DEBUG__
1045 dml_print("DML::%s: GPUVMEnable = %u\n", __func__, p->GPUVMEnable);
1046 dml_print("DML::%s: GPUVMPageTableLevels = %u\n", __func__, p->GPUVMPageTableLevels);
1047 dml_print("DML::%s: DCCEnable = %u\n", __func__, p->myPipe->DCCEnable);
1048 dml_print("DML::%s: VStartup = %u\n", __func__, p->VStartup);
1049 dml_print("DML::%s: MaxVStartup = %u\n", __func__, p->MaxVStartup);
1050 dml_print("DML::%s: HostVMEnable = %u\n", __func__, p->HostVMEnable);
1051 dml_print("DML::%s: HostVMInefficiencyFactor= %f\n", __func__, p->HostVMInefficiencyFactor);
1052 dml_print("DML::%s: myPipe->Dppclk = %f\n", __func__, p->myPipe->Dppclk);
1053 #endif
1054 CalculateVUpdateAndDynamicMetadataParameters(
1055 p->MaxInterDCNTileRepeaters,
1056 p->myPipe->Dppclk,
1057 p->myPipe->Dispclk,
1058 p->myPipe->DCFClkDeepSleep,
1059 p->myPipe->PixelClock,
1060 p->myPipe->HTotal,
1061 p->myPipe->VBlank,
1062 p->DynamicMetadataTransmittedBytes,
1063 p->DynamicMetadataLinesBeforeActiveRequired,
1064 p->myPipe->InterlaceEnable,
1065 p->myPipe->ProgressiveToInterlaceUnitInOPP,
1066 p->TSetup,
1067
1068 // Output
1069 &s->Tdmbf,
1070 &s->Tdmec,
1071 &s->Tdmsks,
1072 p->VUpdateOffsetPix,
1073 p->VUpdateWidthPix,
1074 p->VReadyOffsetPix);
1075
1076 s->LineTime = p->myPipe->HTotal / p->myPipe->PixelClock;
1077 s->trip_to_mem = p->UrgentLatency;
1078 s->Tvm_trips = p->UrgentExtraLatency + s->trip_to_mem * (p->GPUVMPageTableLevels * (s->HostVMDynamicLevelsTrips + 1) - 1);
1079
1080 if (p->DynamicMetadataVMEnabled == true) {
1081 *p->Tdmdl = p->TWait + s->Tvm_trips + s->trip_to_mem;
1082 } else {
1083 *p->Tdmdl = p->TWait + p->UrgentExtraLatency;
1084 }
1085
1086 #ifdef __DML_VBA_ALLOW_DELTA__
1087 if (DynamicMetadataEnable == false) {
1088 *Tdmdl = 0.0;
1089 }
1090 #endif
1091
1092 if (p->DynamicMetadataEnable == true) {
1093 if (p->VStartup * s->LineTime < *p->TSetup + *p->Tdmdl + s->Tdmbf + s->Tdmec + s->Tdmsks) {
1094 *p->NotEnoughTimeForDynamicMetadata = true;
1095 dml_print("DML::%s: Not Enough Time for Dynamic Meta!\n", __func__);
1096 dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, s->Tdmbf);
1097 dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, s->Tdmec);
1098 dml_print("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", __func__, s->Tdmsks);
1099 dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", __func__, *p->Tdmdl);
1100 } else {
1101 *p->NotEnoughTimeForDynamicMetadata = false;
1102 }
1103 } else {
1104 *p->NotEnoughTimeForDynamicMetadata = false;
1105 }
1106
1107 *p->Tdmdl_vm = (p->DynamicMetadataEnable == true && p->DynamicMetadataVMEnabled == true && p->GPUVMEnable == true ? p->TWait + s->Tvm_trips : 0);
1108
1109 if (p->myPipe->ScalerEnabled)
1110 s->DPPCycles = (dml_uint_t)(p->DPPCLKDelaySubtotalPlusCNVCFormater + p->DPPCLKDelaySCL);
1111 else
1112 s->DPPCycles = (dml_uint_t)(p->DPPCLKDelaySubtotalPlusCNVCFormater + p->DPPCLKDelaySCLLBOnly);
1113
1114 s->DPPCycles = (dml_uint_t)(s->DPPCycles + p->myPipe->NumberOfCursors * p->DPPCLKDelayCNVCCursor);
1115
1116 s->DISPCLKCycles = (dml_uint_t)p->DISPCLKDelaySubtotal;
1117
1118 if (p->myPipe->Dppclk == 0.0 || p->myPipe->Dispclk == 0.0)
1119 return true;
1120
1121 *p->DSTXAfterScaler = (dml_uint_t) dml_round(s->DPPCycles * p->myPipe->PixelClock / p->myPipe->Dppclk + s->DISPCLKCycles * p->myPipe->PixelClock / p->myPipe->Dispclk + p->DSCDelay, 1.0);
1122 *p->DSTXAfterScaler = (dml_uint_t) dml_round(*p->DSTXAfterScaler + (p->myPipe->ODMMode != dml_odm_mode_bypass ? 18 : 0) + (p->myPipe->DPPPerSurface - 1) * p->DPP_RECOUT_WIDTH +
1123 ((p->myPipe->ODMMode == dml_odm_mode_split_1to2 || p->myPipe->ODMMode == dml_odm_mode_mso_1to2) ? (dml_float_t)p->myPipe->HActive / 2.0 : 0) +
1124 ((p->myPipe->ODMMode == dml_odm_mode_mso_1to4) ? (dml_float_t)p->myPipe->HActive * 3.0 / 4.0 : 0), 1.0);
1125
1126 #ifdef __DML_VBA_DEBUG__
1127 dml_print("DML::%s: DPPCycles = %u\n", __func__, s->DPPCycles);
1128 dml_print("DML::%s: PixelClock = %f\n", __func__, p->myPipe->PixelClock);
1129 dml_print("DML::%s: Dppclk = %f\n", __func__, p->myPipe->Dppclk);
1130 dml_print("DML::%s: DISPCLKCycles = %u\n", __func__, s->DISPCLKCycles);
1131 dml_print("DML::%s: DISPCLK = %f\n", __func__, p->myPipe->Dispclk);
1132 dml_print("DML::%s: DSCDelay = %u\n", __func__, p->DSCDelay);
1133 dml_print("DML::%s: ODMMode = %u\n", __func__, p->myPipe->ODMMode);
1134 dml_print("DML::%s: DPP_RECOUT_WIDTH = %u\n", __func__, p->DPP_RECOUT_WIDTH);
1135 dml_print("DML::%s: DSTXAfterScaler = %u\n", __func__, *p->DSTXAfterScaler);
1136 #endif
1137
1138 if (p->OutputFormat == dml_420 || (p->myPipe->InterlaceEnable && p->myPipe->ProgressiveToInterlaceUnitInOPP))
1139 *p->DSTYAfterScaler = 1;
1140 else
1141 *p->DSTYAfterScaler = 0;
1142
1143 s->DSTTotalPixelsAfterScaler = *p->DSTYAfterScaler * p->myPipe->HTotal + *p->DSTXAfterScaler;
1144 *p->DSTYAfterScaler = (dml_uint_t)(dml_floor(s->DSTTotalPixelsAfterScaler / p->myPipe->HTotal, 1));
1145 *p->DSTXAfterScaler = (dml_uint_t)(s->DSTTotalPixelsAfterScaler - ((dml_float_t) (*p->DSTYAfterScaler * p->myPipe->HTotal)));
1146 #ifdef __DML_VBA_DEBUG__
1147 dml_print("DML::%s: DSTXAfterScaler = %u (final)\n", __func__, *p->DSTXAfterScaler);
1148 dml_print("DML::%s: DSTYAfterScaler = %u (final)\n", __func__, *p->DSTYAfterScaler);
1149 #endif
1150
1151 s->MyError = false;
1152
1153 s->Tr0_trips = s->trip_to_mem * (s->HostVMDynamicLevelsTrips + 1);
1154
1155 if (p->GPUVMEnable == true) {
1156 s->Tvm_trips_rounded = dml_ceil(4.0 * s->Tvm_trips / s->LineTime, 1.0) / 4.0 * s->LineTime;
1157 s->Tr0_trips_rounded = dml_ceil(4.0 * s->Tr0_trips / s->LineTime, 1.0) / 4.0 * s->LineTime;
1158 if (p->GPUVMPageTableLevels >= 3) {
1159 *p->Tno_bw = p->UrgentExtraLatency + s->trip_to_mem * (dml_float_t) ((p->GPUVMPageTableLevels - 2) * (s->HostVMDynamicLevelsTrips + 1) - 1);
1160 } else if (p->GPUVMPageTableLevels == 1 && p->myPipe->DCCEnable != true) {
1161 s->Tr0_trips_rounded = dml_ceil(4.0 * p->UrgentExtraLatency / s->LineTime, 1.0) / 4.0 * s->LineTime;
1162 *p->Tno_bw = p->UrgentExtraLatency;
1163 } else {
1164 *p->Tno_bw = 0;
1165 }
1166 } else if (p->myPipe->DCCEnable == true) {
1167 s->Tvm_trips_rounded = s->LineTime / 4.0;
1168 s->Tr0_trips_rounded = dml_ceil(4.0 * s->Tr0_trips / s->LineTime, 1.0) / 4.0 * s->LineTime;
1169 *p->Tno_bw = 0;
1170 } else {
1171 s->Tvm_trips_rounded = s->LineTime / 4.0;
1172 s->Tr0_trips_rounded = s->LineTime / 2.0;
1173 *p->Tno_bw = 0;
1174 }
1175 s->Tvm_trips_rounded = dml_max(s->Tvm_trips_rounded, s->LineTime / 4.0);
1176 s->Tr0_trips_rounded = dml_max(s->Tr0_trips_rounded, s->LineTime / 4.0);
1177
1178 if (p->myPipe->SourcePixelFormat == dml_420_8 || p->myPipe->SourcePixelFormat == dml_420_10 || p->myPipe->SourcePixelFormat == dml_420_12) {
1179 s->bytes_pp = p->myPipe->BytePerPixelY + p->myPipe->BytePerPixelC / 4;
1180 } else {
1181 s->bytes_pp = p->myPipe->BytePerPixelY + p->myPipe->BytePerPixelC;
1182 }
1183
1184 s->prefetch_bw_pr = s->bytes_pp * p->myPipe->PixelClock / (dml_float_t)p->myPipe->DPPPerSurface;
1185 if (p->myPipe->VRatio < 1.0)
1186 s->prefetch_bw_pr = p->myPipe->VRatio * s->prefetch_bw_pr;
1187
1188 s->max_Tsw = (dml_max(p->PrefetchSourceLinesY, p->PrefetchSourceLinesC) * s->LineTime);
1189
1190 s->prefetch_sw_bytes = p->PrefetchSourceLinesY * p->swath_width_luma_ub * p->myPipe->BytePerPixelY + p->PrefetchSourceLinesC * p->swath_width_chroma_ub * p->myPipe->BytePerPixelC;
1191 s->prefetch_bw_oto = dml_max(s->prefetch_bw_pr, s->prefetch_sw_bytes / s->max_Tsw);
1192
1193 s->min_Lsw_oto = dml_max(p->PrefetchSourceLinesY, p->PrefetchSourceLinesC) / __DML_MAX_VRATIO_PRE_OTO__;
1194 s->min_Lsw_oto = dml_max(s->min_Lsw_oto, 1.0);
1195 s->Lsw_oto = dml_ceil(4.0 * dml_max(s->prefetch_sw_bytes / s->prefetch_bw_oto / s->LineTime, s->min_Lsw_oto), 1.0) / 4.0;
1196
1197 if (p->GPUVMEnable == true) {
1198 s->Tvm_oto = dml_max3(
1199 s->Tvm_trips,
1200 *p->Tno_bw + p->PDEAndMetaPTEBytesFrame * p->HostVMInefficiencyFactor / s->prefetch_bw_oto,
1201 s->LineTime / 4.0);
1202 } else
1203 s->Tvm_oto = s->LineTime / 4.0;
1204
1205 if ((p->GPUVMEnable == true || p->myPipe->DCCEnable == true)) {
1206 s->Tr0_oto = dml_max4(
1207 s->Tr0_trips,
1208 (p->MetaRowByte + p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor) / s->prefetch_bw_oto,
1209 (s->LineTime - s->Tvm_oto)/2.0,
1210 s->LineTime / 4.0);
1211 #ifdef __DML_VBA_DEBUG__
1212 dml_print("DML::%s: Tr0_oto max0 = %f\n", __func__, (p->MetaRowByte + p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor) / s->prefetch_bw_oto);
1213 dml_print("DML::%s: Tr0_oto max1 = %f\n", __func__, s->Tr0_trips);
1214 dml_print("DML::%s: Tr0_oto max2 = %f\n", __func__, s->LineTime - s->Tvm_oto);
1215 dml_print("DML::%s: Tr0_oto max3 = %f\n", __func__, s->LineTime / 4);
1216 #endif
1217 } else
1218 s->Tr0_oto = (s->LineTime - s->Tvm_oto) / 2.0;
1219
1220 s->Tvm_oto_lines = dml_ceil(4.0 * s->Tvm_oto / s->LineTime, 1) / 4.0;
1221 s->Tr0_oto_lines = dml_ceil(4.0 * s->Tr0_oto / s->LineTime, 1) / 4.0;
1222 s->dst_y_prefetch_oto = s->Tvm_oto_lines + 2 * s->Tr0_oto_lines + s->Lsw_oto;
1223
1224 s->dst_y_prefetch_equ = p->VStartup - (*p->TSetup + dml_max(p->TWait + p->TCalc, *p->Tdmdl)) / s->LineTime - (*p->DSTYAfterScaler + (dml_float_t) *p->DSTXAfterScaler / (dml_float_t)p->myPipe->HTotal);
1225 s->dst_y_prefetch_equ = dml_min(s->dst_y_prefetch_equ, 63.75); // limit to the reg limit of U6.2 for DST_Y_PREFETCH
1226
1227 #ifdef __DML_VBA_DEBUG__
1228 dml_print("DML::%s: HTotal = %u\n", __func__, p->myPipe->HTotal);
1229 dml_print("DML::%s: min_Lsw_oto = %f\n", __func__, s->min_Lsw_oto);
1230 dml_print("DML::%s: *Tno_bw = %f\n", __func__, *p->Tno_bw);
1231 dml_print("DML::%s: UrgentExtraLatency = %f\n", __func__, p->UrgentExtraLatency);
1232 dml_print("DML::%s: trip_to_mem = %f\n", __func__, s->trip_to_mem);
1233 dml_print("DML::%s: BytePerPixelY = %u\n", __func__, p->myPipe->BytePerPixelY);
1234 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, p->PrefetchSourceLinesY);
1235 dml_print("DML::%s: swath_width_luma_ub = %u\n", __func__, p->swath_width_luma_ub);
1236 dml_print("DML::%s: BytePerPixelC = %u\n", __func__, p->myPipe->BytePerPixelC);
1237 dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, p->PrefetchSourceLinesC);
1238 dml_print("DML::%s: swath_width_chroma_ub = %u\n", __func__, p->swath_width_chroma_ub);
1239 dml_print("DML::%s: prefetch_sw_bytes = %f\n", __func__, s->prefetch_sw_bytes);
1240 dml_print("DML::%s: bytes_pp = %f\n", __func__, s->bytes_pp);
1241 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %u\n", __func__, p->PDEAndMetaPTEBytesFrame);
1242 dml_print("DML::%s: MetaRowByte = %u\n", __func__, p->MetaRowByte);
1243 dml_print("DML::%s: PixelPTEBytesPerRow = %u\n", __func__, p->PixelPTEBytesPerRow);
1244 dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, p->HostVMInefficiencyFactor);
1245 dml_print("DML::%s: Tvm_trips = %f\n", __func__, s->Tvm_trips);
1246 dml_print("DML::%s: Tr0_trips = %f\n", __func__, s->Tr0_trips);
1247 dml_print("DML::%s: prefetch_bw_oto = %f\n", __func__, s->prefetch_bw_oto);
1248 dml_print("DML::%s: Tr0_oto = %f\n", __func__, s->Tr0_oto);
1249 dml_print("DML::%s: Tvm_oto = %f\n", __func__, s->Tvm_oto);
1250 dml_print("DML::%s: Tvm_oto_lines = %f\n", __func__, s->Tvm_oto_lines);
1251 dml_print("DML::%s: Tr0_oto_lines = %f\n", __func__, s->Tr0_oto_lines);
1252 dml_print("DML::%s: Lsw_oto = %f\n", __func__, s->Lsw_oto);
1253 dml_print("DML::%s: dst_y_prefetch_oto = %f\n", __func__, s->dst_y_prefetch_oto);
1254 dml_print("DML::%s: dst_y_prefetch_equ = %f\n", __func__, s->dst_y_prefetch_equ);
1255 #endif
1256
1257 s->dst_y_prefetch_equ = dml_floor(4.0 * (s->dst_y_prefetch_equ + 0.125), 1) / 4.0;
1258 s->Tpre_rounded = s->dst_y_prefetch_equ * s->LineTime;
1259
1260 dml_print("DML::%s: dst_y_prefetch_equ: %f (after round)\n", __func__, s->dst_y_prefetch_equ);
1261
1262 dml_print("DML::%s: LineTime: %f\n", __func__, s->LineTime);
1263 dml_print("DML::%s: VStartup: %u\n", __func__, p->VStartup);
1264 dml_print("DML::%s: Tvstartup: %fus - time between vstartup and first pixel of active\n", __func__, p->VStartup * s->LineTime);
1265 dml_print("DML::%s: TSetup: %fus - time from vstartup to vready\n", __func__, *p->TSetup);
1266 dml_print("DML::%s: TCalc: %fus - time for calculations in dchub starting at vready\n", __func__, p->TCalc);
1267 dml_print("DML::%s: TWait: %fus - time for fabric to become ready max(pstate exit,cstate enter/exit, urgent latency) after TCalc\n", __func__, p->TWait);
1268 dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, s->Tdmbf);
1269 dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, s->Tdmec);
1270 dml_print("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", __func__, s->Tdmsks);
1271 dml_print("DML::%s: Tdmdl_vm: %fus - time for vm stages of dmd \n", __func__, *p->Tdmdl_vm);
1272 dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", __func__, *p->Tdmdl);
1273 dml_print("DML::%s: DSTXAfterScaler: %u pixels - number of pixel clocks pipeline and buffer delay after scaler \n", __func__, *p->DSTXAfterScaler);
1274 dml_print("DML::%s: DSTYAfterScaler: %u lines - number of lines of pipeline and buffer delay after scaler \n", __func__, *p->DSTYAfterScaler);
1275
1276 s->dep_bytes = dml_max(p->PDEAndMetaPTEBytesFrame * p->HostVMInefficiencyFactor, p->MetaRowByte + p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor);
1277
1278 if (s->prefetch_sw_bytes < s->dep_bytes) {
1279 s->prefetch_sw_bytes = 2 * s->dep_bytes;
1280 }
1281
1282 *p->DestinationLinesToRequestVMInVBlank = 0;
1283 *p->DestinationLinesToRequestRowInVBlank = 0;
1284 *p->VRatioPrefetchY = 0;
1285 *p->VRatioPrefetchC = 0;
1286 *p->RequiredPrefetchPixDataBWLuma = 0;
1287 if (s->dst_y_prefetch_equ > 1) {
1288
1289 if (s->Tpre_rounded - *p->Tno_bw > 0) {
1290 s->PrefetchBandwidth1 = (p->PDEAndMetaPTEBytesFrame * p->HostVMInefficiencyFactor + 2 * p->MetaRowByte
1291 + 2 * p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor
1292 + s->prefetch_sw_bytes)
1293 / (s->Tpre_rounded - *p->Tno_bw);
1294 s->Tsw_est1 = s->prefetch_sw_bytes / s->PrefetchBandwidth1;
1295 } else
1296 s->PrefetchBandwidth1 = 0;
1297
1298 if (p->VStartup == p->MaxVStartup && (s->Tsw_est1 / s->LineTime < s->min_Lsw_oto) && s->Tpre_rounded - s->min_Lsw_oto * s->LineTime - 0.75 * s->LineTime - *p->Tno_bw > 0) {
1299 s->PrefetchBandwidth1 = (p->PDEAndMetaPTEBytesFrame * p->HostVMInefficiencyFactor + 2 * p->MetaRowByte + 2 * p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor) /
1300 (s->Tpre_rounded - s->min_Lsw_oto * s->LineTime - 0.75 * s->LineTime - *p->Tno_bw);
1301 }
1302
1303 if (s->Tpre_rounded - *p->Tno_bw - 2 * s->Tr0_trips_rounded > 0)
1304 s->PrefetchBandwidth2 = (p->PDEAndMetaPTEBytesFrame * p->HostVMInefficiencyFactor + s->prefetch_sw_bytes) /
1305 (s->Tpre_rounded - *p->Tno_bw - 2 * s->Tr0_trips_rounded);
1306 else
1307 s->PrefetchBandwidth2 = 0;
1308
1309 if (s->Tpre_rounded - s->Tvm_trips_rounded > 0) {
1310 s->PrefetchBandwidth3 = (2 * p->MetaRowByte + 2 * p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + s->prefetch_sw_bytes) /
1311 (s->Tpre_rounded - s->Tvm_trips_rounded);
1312 s->Tsw_est3 = s->prefetch_sw_bytes / s->PrefetchBandwidth3;
1313 }
1314 else
1315 s->PrefetchBandwidth3 = 0;
1316
1317
1318 if (p->VStartup == p->MaxVStartup && (s->Tsw_est3 / s->LineTime < s->min_Lsw_oto) && s->Tpre_rounded - s->min_Lsw_oto * s->LineTime - 0.5 * s->LineTime - s->Tvm_trips_rounded > 0) {
1319 s->PrefetchBandwidth3 = (2 * p->MetaRowByte + 2 * p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor) / (s->Tpre_rounded - s->min_Lsw_oto * s->LineTime - 0.5 * s->LineTime - s->Tvm_trips_rounded);
1320 }
1321
1322 if (s->Tpre_rounded - s->Tvm_trips_rounded - 2 * s->Tr0_trips_rounded > 0)
1323 s->PrefetchBandwidth4 = s->prefetch_sw_bytes / (s->Tpre_rounded - s->Tvm_trips_rounded - 2 * s->Tr0_trips_rounded);
1324 else
1325 s->PrefetchBandwidth4 = 0;
1326
1327 #ifdef __DML_VBA_DEBUG__
1328 dml_print("DML::%s: Tpre_rounded: %f\n", __func__, s->Tpre_rounded);
1329 dml_print("DML::%s: Tno_bw: %f\n", __func__, *p->Tno_bw);
1330 dml_print("DML::%s: Tvm_trips_rounded: %f\n", __func__, s->Tvm_trips_rounded);
1331 dml_print("DML::%s: Tsw_est1: %f\n", __func__, s->Tsw_est1);
1332 dml_print("DML::%s: Tsw_est3: %f\n", __func__, s->Tsw_est3);
1333 dml_print("DML::%s: PrefetchBandwidth1: %f\n", __func__, s->PrefetchBandwidth1);
1334 dml_print("DML::%s: PrefetchBandwidth2: %f\n", __func__, s->PrefetchBandwidth2);
1335 dml_print("DML::%s: PrefetchBandwidth3: %f\n", __func__, s->PrefetchBandwidth3);
1336 dml_print("DML::%s: PrefetchBandwidth4: %f\n", __func__, s->PrefetchBandwidth4);
1337 #endif
1338 {
1339 dml_bool_t Case1OK;
1340 dml_bool_t Case2OK;
1341 dml_bool_t Case3OK;
1342
1343 if (s->PrefetchBandwidth1 > 0) {
1344 if (*p->Tno_bw + p->PDEAndMetaPTEBytesFrame * p->HostVMInefficiencyFactor / s->PrefetchBandwidth1 >= s->Tvm_trips_rounded && (p->MetaRowByte + p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor) / s->PrefetchBandwidth1 >= s->Tr0_trips_rounded) {
1345 Case1OK = true;
1346 } else {
1347 Case1OK = false;
1348 }
1349 } else {
1350 Case1OK = false;
1351 }
1352
1353 if (s->PrefetchBandwidth2 > 0) {
1354 if (*p->Tno_bw + p->PDEAndMetaPTEBytesFrame * p->HostVMInefficiencyFactor / s->PrefetchBandwidth2 >= s->Tvm_trips_rounded && (p->MetaRowByte + p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor) / s->PrefetchBandwidth2 < s->Tr0_trips_rounded) {
1355 Case2OK = true;
1356 } else {
1357 Case2OK = false;
1358 }
1359 } else {
1360 Case2OK = false;
1361 }
1362
1363 if (s->PrefetchBandwidth3 > 0) {
1364 if (*p->Tno_bw + p->PDEAndMetaPTEBytesFrame * p->HostVMInefficiencyFactor / s->PrefetchBandwidth3 < s->Tvm_trips_rounded && (p->MetaRowByte + p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor) / s->PrefetchBandwidth3 >= s->Tr0_trips_rounded) {
1365 Case3OK = true;
1366 } else {
1367 Case3OK = false;
1368 }
1369 } else {
1370 Case3OK = false;
1371 }
1372
1373 if (Case1OK) {
1374 s->prefetch_bw_equ = s->PrefetchBandwidth1;
1375 } else if (Case2OK) {
1376 s->prefetch_bw_equ = s->PrefetchBandwidth2;
1377 } else if (Case3OK) {
1378 s->prefetch_bw_equ = s->PrefetchBandwidth3;
1379 } else {
1380 s->prefetch_bw_equ = s->PrefetchBandwidth4;
1381 }
1382
1383 #ifdef __DML_VBA_DEBUG__
1384 dml_print("DML::%s: Case1OK: %u\n", __func__, Case1OK);
1385 dml_print("DML::%s: Case2OK: %u\n", __func__, Case2OK);
1386 dml_print("DML::%s: Case3OK: %u\n", __func__, Case3OK);
1387 dml_print("DML::%s: prefetch_bw_equ: %f\n", __func__, s->prefetch_bw_equ);
1388 #endif
1389
1390 if (s->prefetch_bw_equ > 0) {
1391 if (p->GPUVMEnable == true) {
1392 s->Tvm_equ = dml_max3(*p->Tno_bw + p->PDEAndMetaPTEBytesFrame * p->HostVMInefficiencyFactor / s->prefetch_bw_equ, s->Tvm_trips, s->LineTime / 4);
1393 } else {
1394 s->Tvm_equ = s->LineTime / 4;
1395 }
1396
1397 if ((p->GPUVMEnable == true || p->myPipe->DCCEnable == true)) {
1398 s->Tr0_equ = dml_max4((p->MetaRowByte + p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor) / s->prefetch_bw_equ, s->Tr0_trips, (s->LineTime - s->Tvm_equ) / 2, s->LineTime / 4);
1399 } else {
1400 s->Tr0_equ = (s->LineTime - s->Tvm_equ) / 2;
1401 }
1402 } else {
1403 s->Tvm_equ = 0;
1404 s->Tr0_equ = 0;
1405 dml_print("DML::%s: prefetch_bw_equ equals 0!\n", __func__);
1406 }
1407 }
1408
1409
1410 if (s->dst_y_prefetch_oto < s->dst_y_prefetch_equ) {
1411 *p->DestinationLinesForPrefetch = s->dst_y_prefetch_oto;
1412 s->TimeForFetchingMetaPTE = s->Tvm_oto;
1413 s->TimeForFetchingRowInVBlank = s->Tr0_oto;
1414
1415 *p->DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * s->TimeForFetchingMetaPTE / s->LineTime, 1.0) / 4.0;
1416 *p->DestinationLinesToRequestRowInVBlank = dml_ceil(4.0 * s->TimeForFetchingRowInVBlank / s->LineTime, 1.0) / 4.0;
1417 } else {
1418 *p->DestinationLinesForPrefetch = s->dst_y_prefetch_equ;
1419 s->TimeForFetchingMetaPTE = s->Tvm_equ;
1420 s->TimeForFetchingRowInVBlank = s->Tr0_equ;
1421
1422 if (p->VStartup == p->MaxVStartup && p->EnhancedPrefetchScheduleAccelerationFinal != 0) {
1423 *p->DestinationLinesToRequestVMInVBlank = dml_floor(4.0 * s->TimeForFetchingMetaPTE / s->LineTime, 1.0) / 4.0;
1424 *p->DestinationLinesToRequestRowInVBlank = dml_floor(4.0 * s->TimeForFetchingRowInVBlank / s->LineTime, 1.0) / 4.0;
1425 } else {
1426 *p->DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * s->TimeForFetchingMetaPTE / s->LineTime, 1.0) / 4.0;
1427 *p->DestinationLinesToRequestRowInVBlank = dml_ceil(4.0 * s->TimeForFetchingRowInVBlank / s->LineTime, 1.0) / 4.0;
1428 }
1429 }
1430
1431 s->LinesToRequestPrefetchPixelData = *p->DestinationLinesForPrefetch - *p->DestinationLinesToRequestVMInVBlank - 2 * *p->DestinationLinesToRequestRowInVBlank;
1432
1433 #ifdef __DML_VBA_DEBUG__
1434 dml_print("DML::%s: DestinationLinesForPrefetch = %f\n", __func__, *p->DestinationLinesForPrefetch);
1435 dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n", __func__, *p->DestinationLinesToRequestVMInVBlank);
1436 dml_print("DML::%s: TimeForFetchingRowInVBlank = %f\n", __func__, s->TimeForFetchingRowInVBlank);
1437 dml_print("DML::%s: LineTime = %f\n", __func__, s->LineTime);
1438 dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n", __func__, *p->DestinationLinesToRequestRowInVBlank);
1439 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, p->PrefetchSourceLinesY);
1440 dml_print("DML::%s: LinesToRequestPrefetchPixelData = %f\n", __func__, s->LinesToRequestPrefetchPixelData);
1441 #endif
1442
1443 if (s->LinesToRequestPrefetchPixelData >= 1 && s->prefetch_bw_equ > 0) {
1444 *p->VRatioPrefetchY = (dml_float_t)p->PrefetchSourceLinesY / s->LinesToRequestPrefetchPixelData;
1445 *p->VRatioPrefetchY = dml_max(*p->VRatioPrefetchY, 1.0);
1446 #ifdef __DML_VBA_DEBUG__
1447 dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *p->VRatioPrefetchY);
1448 dml_print("DML::%s: SwathHeightY = %u\n", __func__, p->SwathHeightY);
1449 dml_print("DML::%s: VInitPreFillY = %u\n", __func__, p->VInitPreFillY);
1450 #endif
1451 if ((p->SwathHeightY > 4) && (p->VInitPreFillY > 3)) {
1452 if (s->LinesToRequestPrefetchPixelData > (p->VInitPreFillY - 3.0) / 2.0) {
1453 *p->VRatioPrefetchY = dml_max(*p->VRatioPrefetchY,
1454 (dml_float_t)p->MaxNumSwathY * p->SwathHeightY / (s->LinesToRequestPrefetchPixelData - (p->VInitPreFillY - 3.0) / 2.0));
1455 } else {
1456 s->MyError = true;
1457 dml_print("DML::%s: MyErr set. LinesToRequestPrefetchPixelData=%f VinitPreFillY=%u\n", __func__, s->LinesToRequestPrefetchPixelData, p->VInitPreFillY);
1458 *p->VRatioPrefetchY = 0;
1459 }
1460 #ifdef __DML_VBA_DEBUG__
1461 dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *p->VRatioPrefetchY);
1462 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, p->PrefetchSourceLinesY);
1463 dml_print("DML::%s: MaxNumSwathY = %u\n", __func__, p->MaxNumSwathY);
1464 #endif
1465 }
1466
1467 *p->VRatioPrefetchC = (dml_float_t)p->PrefetchSourceLinesC / s->LinesToRequestPrefetchPixelData;
1468 *p->VRatioPrefetchC = dml_max(*p->VRatioPrefetchC, 1.0);
1469
1470 #ifdef __DML_VBA_DEBUG__
1471 dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *p->VRatioPrefetchC);
1472 dml_print("DML::%s: SwathHeightC = %u\n", __func__, p->SwathHeightC);
1473 dml_print("DML::%s: VInitPreFillC = %u\n", __func__, p->VInitPreFillC);
1474 #endif
1475 if ((p->SwathHeightC > 4) && (p->VInitPreFillC > 3)) {
1476 if (s->LinesToRequestPrefetchPixelData > (p->VInitPreFillC - 3.0) / 2.0) {
1477 *p->VRatioPrefetchC = dml_max(*p->VRatioPrefetchC, (dml_float_t)p->MaxNumSwathC * p->SwathHeightC / (s->LinesToRequestPrefetchPixelData - (p->VInitPreFillC - 3.0) / 2.0));
1478 } else {
1479 s->MyError = true;
1480 dml_print("DML::%s: MyErr set. LinesToRequestPrefetchPixelData=%f VInitPreFillC=%u\n", __func__, s->LinesToRequestPrefetchPixelData, p->VInitPreFillC);
1481 *p->VRatioPrefetchC = 0;
1482 }
1483 #ifdef __DML_VBA_DEBUG__
1484 dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *p->VRatioPrefetchC);
1485 dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, p->PrefetchSourceLinesC);
1486 dml_print("DML::%s: MaxNumSwathC = %u\n", __func__, p->MaxNumSwathC);
1487 #endif
1488 }
1489
1490 *p->RequiredPrefetchPixDataBWLuma = (dml_float_t)p->PrefetchSourceLinesY / s->LinesToRequestPrefetchPixelData
1491 * p->myPipe->BytePerPixelY
1492 * p->swath_width_luma_ub / s->LineTime;
1493
1494 #ifdef __DML_VBA_DEBUG__
1495 dml_print("DML::%s: BytePerPixelY = %u\n", __func__, p->myPipe->BytePerPixelY);
1496 dml_print("DML::%s: swath_width_luma_ub = %u\n", __func__, p->swath_width_luma_ub);
1497 dml_print("DML::%s: LineTime = %f\n", __func__, s->LineTime);
1498 dml_print("DML::%s: RequiredPrefetchPixDataBWLuma = %f\n", __func__, *p->RequiredPrefetchPixDataBWLuma);
1499 #endif
1500 *p->RequiredPrefetchPixDataBWChroma = (dml_float_t)p->PrefetchSourceLinesC / s->LinesToRequestPrefetchPixelData
1501 *p->myPipe->BytePerPixelC
1502 *p->swath_width_chroma_ub / s->LineTime;
1503 } else {
1504 s->MyError = true;
1505 dml_print("DML:%s: MyErr set. LinesToRequestPrefetchPixelData: %f, should be > 0\n", __func__, s->LinesToRequestPrefetchPixelData);
1506 *p->VRatioPrefetchY = 0;
1507 *p->VRatioPrefetchC = 0;
1508 *p->RequiredPrefetchPixDataBWLuma = 0;
1509 *p->RequiredPrefetchPixDataBWChroma = 0;
1510 }
1511
1512 dml_print("DML: Tpre: %fus - sum of time to request meta pte, 2 x data pte + meta data, swaths\n", (dml_float_t)s->LinesToRequestPrefetchPixelData * s->LineTime + 2.0 * s->TimeForFetchingRowInVBlank + s->TimeForFetchingMetaPTE);
1513 dml_print("DML: Tvm: %fus - time to fetch page tables for meta surface\n", s->TimeForFetchingMetaPTE);
1514 dml_print("DML: Tr0: %fus - time to fetch first row of data pagetables and first row of meta data (done in parallel)\n", s->TimeForFetchingRowInVBlank);
1515 dml_print("DML: Tsw: %fus = time to fetch enough pixel data and cursor data to feed the scalers init position and detile\n", (dml_float_t)s->LinesToRequestPrefetchPixelData * s->LineTime);
1516 dml_print("DML: To: %fus - time for propagation from scaler to optc\n", (*p->DSTYAfterScaler + ((dml_float_t) (*p->DSTXAfterScaler) / (dml_float_t)p->myPipe->HTotal)) * s->LineTime);
1517 dml_print("DML: Tvstartup - TSetup - Tcalc - Twait - Tpre - To > 0\n");
1518 dml_print("DML: Tslack(pre): %fus - time left over in schedule\n", p->VStartup * s->LineTime - s->TimeForFetchingMetaPTE - 2 * s->TimeForFetchingRowInVBlank - (*p->DSTYAfterScaler + ((dml_float_t) (*p->DSTXAfterScaler) / (dml_float_t)p->myPipe->HTotal)) * s->LineTime - p->TWait - p->TCalc - *p->TSetup);
1519 dml_print("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %u\n", p->PixelPTEBytesPerRow);
1520
1521 } else {
1522 s->MyError = true;
1523 dml_print("DML::%s: MyErr set, dst_y_prefetch_equ = %f (should be > 1)\n", __func__, s->dst_y_prefetch_equ);
1524 s->TimeForFetchingMetaPTE = 0;
1525 s->TimeForFetchingRowInVBlank = 0;
1526 *p->DestinationLinesToRequestVMInVBlank = 0;
1527 *p->DestinationLinesToRequestRowInVBlank = 0;
1528 s->LinesToRequestPrefetchPixelData = 0;
1529 *p->VRatioPrefetchY = 0;
1530 *p->VRatioPrefetchC = 0;
1531 *p->RequiredPrefetchPixDataBWLuma = 0;
1532 *p->RequiredPrefetchPixDataBWChroma = 0;
1533 }
1534
1535 {
1536 dml_float_t prefetch_vm_bw;
1537 dml_float_t prefetch_row_bw;
1538
1539 if (p->PDEAndMetaPTEBytesFrame == 0) {
1540 prefetch_vm_bw = 0;
1541 } else if (*p->DestinationLinesToRequestVMInVBlank > 0) {
1542 #ifdef __DML_VBA_DEBUG__
1543 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %u\n", __func__, p->PDEAndMetaPTEBytesFrame);
1544 dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, p->HostVMInefficiencyFactor);
1545 dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n", __func__, *p->DestinationLinesToRequestVMInVBlank);
1546 dml_print("DML::%s: LineTime = %f\n", __func__, s->LineTime);
1547 #endif
1548 prefetch_vm_bw = p->PDEAndMetaPTEBytesFrame * p->HostVMInefficiencyFactor / (*p->DestinationLinesToRequestVMInVBlank * s->LineTime);
1549 #ifdef __DML_VBA_DEBUG__
1550 dml_print("DML::%s: prefetch_vm_bw = %f\n", __func__, prefetch_vm_bw);
1551 #endif
1552 } else {
1553 prefetch_vm_bw = 0;
1554 s->MyError = true;
1555 dml_print("DML::%s: MyErr set. DestinationLinesToRequestVMInVBlank=%f (should be > 0)\n", __func__, *p->DestinationLinesToRequestVMInVBlank);
1556 }
1557
1558 if (p->MetaRowByte + p->PixelPTEBytesPerRow == 0) {
1559 prefetch_row_bw = 0;
1560 } else if (*p->DestinationLinesToRequestRowInVBlank > 0) {
1561 prefetch_row_bw = (p->MetaRowByte + p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor) / (*p->DestinationLinesToRequestRowInVBlank * s->LineTime);
1562
1563 #ifdef __DML_VBA_DEBUG__
1564 dml_print("DML::%s: MetaRowByte = %u\n", __func__, p->MetaRowByte);
1565 dml_print("DML::%s: PixelPTEBytesPerRow = %u\n", __func__, p->PixelPTEBytesPerRow);
1566 dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n", __func__, *p->DestinationLinesToRequestRowInVBlank);
1567 dml_print("DML::%s: prefetch_row_bw = %f\n", __func__, prefetch_row_bw);
1568 #endif
1569 } else {
1570 prefetch_row_bw = 0;
1571 s->MyError = true;
1572 dml_print("DML::%s: MyErr set. DestinationLinesToRequestRowInVBlank=%f (should be > 0)\n", __func__, *p->DestinationLinesToRequestRowInVBlank);
1573 }
1574
1575 *p->prefetch_vmrow_bw = dml_max(prefetch_vm_bw, prefetch_row_bw);
1576 }
1577
1578 if (s->MyError) {
1579 s->TimeForFetchingMetaPTE = 0;
1580 s->TimeForFetchingRowInVBlank = 0;
1581 *p->DestinationLinesToRequestVMInVBlank = 0;
1582 *p->DestinationLinesToRequestRowInVBlank = 0;
1583 *p->DestinationLinesForPrefetch = 0;
1584 s->LinesToRequestPrefetchPixelData = 0;
1585 *p->VRatioPrefetchY = 0;
1586 *p->VRatioPrefetchC = 0;
1587 *p->RequiredPrefetchPixDataBWLuma = 0;
1588 *p->RequiredPrefetchPixDataBWChroma = 0;
1589 }
1590
1591 return s->MyError;
1592 } // CalculatePrefetchSchedule
1593
CalculateBytePerPixelAndBlockSizes(enum dml_source_format_class SourcePixelFormat,enum dml_swizzle_mode SurfaceTiling,dml_uint_t * BytePerPixelY,dml_uint_t * BytePerPixelC,dml_float_t * BytePerPixelDETY,dml_float_t * BytePerPixelDETC,dml_uint_t * BlockHeight256BytesY,dml_uint_t * BlockHeight256BytesC,dml_uint_t * BlockWidth256BytesY,dml_uint_t * BlockWidth256BytesC,dml_uint_t * MacroTileHeightY,dml_uint_t * MacroTileHeightC,dml_uint_t * MacroTileWidthY,dml_uint_t * MacroTileWidthC)1594 static void CalculateBytePerPixelAndBlockSizes(
1595 enum dml_source_format_class SourcePixelFormat,
1596 enum dml_swizzle_mode SurfaceTiling,
1597
1598 // Output
1599 dml_uint_t *BytePerPixelY,
1600 dml_uint_t *BytePerPixelC,
1601 dml_float_t *BytePerPixelDETY,
1602 dml_float_t *BytePerPixelDETC,
1603 dml_uint_t *BlockHeight256BytesY,
1604 dml_uint_t *BlockHeight256BytesC,
1605 dml_uint_t *BlockWidth256BytesY,
1606 dml_uint_t *BlockWidth256BytesC,
1607 dml_uint_t *MacroTileHeightY,
1608 dml_uint_t *MacroTileHeightC,
1609 dml_uint_t *MacroTileWidthY,
1610 dml_uint_t *MacroTileWidthC)
1611 {
1612 if (SourcePixelFormat == dml_444_64) {
1613 *BytePerPixelDETY = 8;
1614 *BytePerPixelDETC = 0;
1615 *BytePerPixelY = 8;
1616 *BytePerPixelC = 0;
1617 } else if (SourcePixelFormat == dml_444_32 || SourcePixelFormat == dml_rgbe) {
1618 *BytePerPixelDETY = 4;
1619 *BytePerPixelDETC = 0;
1620 *BytePerPixelY = 4;
1621 *BytePerPixelC = 0;
1622 } else if (SourcePixelFormat == dml_444_16 || SourcePixelFormat == dml_mono_16) {
1623 *BytePerPixelDETY = 2;
1624 *BytePerPixelDETC = 0;
1625 *BytePerPixelY = 2;
1626 *BytePerPixelC = 0;
1627 } else if (SourcePixelFormat == dml_444_8 || SourcePixelFormat == dml_mono_8) {
1628 *BytePerPixelDETY = 1;
1629 *BytePerPixelDETC = 0;
1630 *BytePerPixelY = 1;
1631 *BytePerPixelC = 0;
1632 } else if (SourcePixelFormat == dml_rgbe_alpha) {
1633 *BytePerPixelDETY = 4;
1634 *BytePerPixelDETC = 1;
1635 *BytePerPixelY = 4;
1636 *BytePerPixelC = 1;
1637 } else if (SourcePixelFormat == dml_420_8) {
1638 *BytePerPixelDETY = 1;
1639 *BytePerPixelDETC = 2;
1640 *BytePerPixelY = 1;
1641 *BytePerPixelC = 2;
1642 } else if (SourcePixelFormat == dml_420_12) {
1643 *BytePerPixelDETY = 2;
1644 *BytePerPixelDETC = 4;
1645 *BytePerPixelY = 2;
1646 *BytePerPixelC = 4;
1647 } else {
1648 *BytePerPixelDETY = (dml_float_t) (4.0 / 3);
1649 *BytePerPixelDETC = (dml_float_t) (8.0 / 3);
1650 *BytePerPixelY = 2;
1651 *BytePerPixelC = 4;
1652 }
1653 #ifdef __DML_VBA_DEBUG__
1654 dml_print("DML::%s: SourcePixelFormat = %u\n", __func__, SourcePixelFormat);
1655 dml_print("DML::%s: BytePerPixelDETY = %f\n", __func__, *BytePerPixelDETY);
1656 dml_print("DML::%s: BytePerPixelDETC = %f\n", __func__, *BytePerPixelDETC);
1657 dml_print("DML::%s: BytePerPixelY = %u\n", __func__, *BytePerPixelY);
1658 dml_print("DML::%s: BytePerPixelC = %u\n", __func__, *BytePerPixelC);
1659 #endif
1660 if ((SourcePixelFormat == dml_444_64 || SourcePixelFormat == dml_444_32
1661 || SourcePixelFormat == dml_444_16
1662 || SourcePixelFormat == dml_444_8
1663 || SourcePixelFormat == dml_mono_16
1664 || SourcePixelFormat == dml_mono_8
1665 || SourcePixelFormat == dml_rgbe)) {
1666 if (SurfaceTiling == dml_sw_linear) {
1667 *BlockHeight256BytesY = 1;
1668 } else if (SourcePixelFormat == dml_444_64) {
1669 *BlockHeight256BytesY = 4;
1670 } else if (SourcePixelFormat == dml_444_8) {
1671 *BlockHeight256BytesY = 16;
1672 } else {
1673 *BlockHeight256BytesY = 8;
1674 }
1675 *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
1676 *BlockHeight256BytesC = 0;
1677 *BlockWidth256BytesC = 0;
1678 } else {
1679 if (SurfaceTiling == dml_sw_linear) {
1680 *BlockHeight256BytesY = 1;
1681 *BlockHeight256BytesC = 1;
1682 } else if (SourcePixelFormat == dml_rgbe_alpha) {
1683 *BlockHeight256BytesY = 8;
1684 *BlockHeight256BytesC = 16;
1685 } else if (SourcePixelFormat == dml_420_8) {
1686 *BlockHeight256BytesY = 16;
1687 *BlockHeight256BytesC = 8;
1688 } else {
1689 *BlockHeight256BytesY = 8;
1690 *BlockHeight256BytesC = 8;
1691 }
1692 *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
1693 *BlockWidth256BytesC = 256U / *BytePerPixelC / *BlockHeight256BytesC;
1694 }
1695 #ifdef __DML_VBA_DEBUG__
1696 dml_print("DML::%s: BlockWidth256BytesY = %u\n", __func__, *BlockWidth256BytesY);
1697 dml_print("DML::%s: BlockHeight256BytesY = %u\n", __func__, *BlockHeight256BytesY);
1698 dml_print("DML::%s: BlockWidth256BytesC = %u\n", __func__, *BlockWidth256BytesC);
1699 dml_print("DML::%s: BlockHeight256BytesC = %u\n", __func__, *BlockHeight256BytesC);
1700 #endif
1701
1702 if (SurfaceTiling == dml_sw_linear) {
1703 *MacroTileHeightY = *BlockHeight256BytesY;
1704 *MacroTileWidthY = 256 / *BytePerPixelY / *MacroTileHeightY;
1705 *MacroTileHeightC = *BlockHeight256BytesC;
1706 if (*MacroTileHeightC == 0) {
1707 *MacroTileWidthC = 0;
1708 } else {
1709 *MacroTileWidthC = 256 / *BytePerPixelC / *MacroTileHeightC;
1710 }
1711 } else if (SurfaceTiling == dml_sw_64kb_d || SurfaceTiling == dml_sw_64kb_d_t || SurfaceTiling == dml_sw_64kb_d_x || SurfaceTiling == dml_sw_64kb_r_x) {
1712 *MacroTileHeightY = 16 * *BlockHeight256BytesY;
1713 *MacroTileWidthY = 65536 / *BytePerPixelY / *MacroTileHeightY;
1714 *MacroTileHeightC = 16 * *BlockHeight256BytesC;
1715 if (*MacroTileHeightC == 0) {
1716 *MacroTileWidthC = 0;
1717 } else {
1718 *MacroTileWidthC = 65536 / *BytePerPixelC / *MacroTileHeightC;
1719 }
1720 } else {
1721 *MacroTileHeightY = 32 * *BlockHeight256BytesY;
1722 *MacroTileWidthY = 65536 * 4 / *BytePerPixelY / *MacroTileHeightY;
1723 *MacroTileHeightC = 32 * *BlockHeight256BytesC;
1724 if (*MacroTileHeightC == 0) {
1725 *MacroTileWidthC = 0;
1726 } else {
1727 *MacroTileWidthC = 65536 * 4 / *BytePerPixelC / *MacroTileHeightC;
1728 }
1729 }
1730
1731 #ifdef __DML_VBA_DEBUG__
1732 dml_print("DML::%s: MacroTileWidthY = %u\n", __func__, *MacroTileWidthY);
1733 dml_print("DML::%s: MacroTileHeightY = %u\n", __func__, *MacroTileHeightY);
1734 dml_print("DML::%s: MacroTileWidthC = %u\n", __func__, *MacroTileWidthC);
1735 dml_print("DML::%s: MacroTileHeightC = %u\n", __func__, *MacroTileHeightC);
1736 #endif
1737 } // CalculateBytePerPixelAndBlockSizes
1738
CalculateTWait(dml_uint_t PrefetchMode,enum dml_use_mall_for_pstate_change_mode UseMALLForPStateChange,dml_bool_t SynchronizeDRRDisplaysForUCLKPStateChangeFinal,dml_bool_t DRRDisplay,dml_float_t DRAMClockChangeLatency,dml_float_t FCLKChangeLatency,dml_float_t UrgentLatency,dml_float_t SREnterPlusExitTime)1739 static noinline_for_stack dml_float_t CalculateTWait(
1740 dml_uint_t PrefetchMode,
1741 enum dml_use_mall_for_pstate_change_mode UseMALLForPStateChange,
1742 dml_bool_t SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
1743 dml_bool_t DRRDisplay,
1744 dml_float_t DRAMClockChangeLatency,
1745 dml_float_t FCLKChangeLatency,
1746 dml_float_t UrgentLatency,
1747 dml_float_t SREnterPlusExitTime)
1748 {
1749 dml_float_t TWait = 0.0;
1750
1751 if (PrefetchMode == 0 &&
1752 !(UseMALLForPStateChange == dml_use_mall_pstate_change_full_frame) && !(UseMALLForPStateChange == dml_use_mall_pstate_change_sub_viewport) &&
1753 !(UseMALLForPStateChange == dml_use_mall_pstate_change_phantom_pipe) && !(SynchronizeDRRDisplaysForUCLKPStateChangeFinal && DRRDisplay)) {
1754 TWait = dml_max3(DRAMClockChangeLatency + UrgentLatency, SREnterPlusExitTime, UrgentLatency);
1755 } else if (PrefetchMode <= 1 && !(UseMALLForPStateChange == dml_use_mall_pstate_change_phantom_pipe)) {
1756 TWait = dml_max3(FCLKChangeLatency + UrgentLatency, SREnterPlusExitTime, UrgentLatency);
1757 } else if (PrefetchMode <= 2 && !(UseMALLForPStateChange == dml_use_mall_pstate_change_phantom_pipe)) {
1758 TWait = dml_max(SREnterPlusExitTime, UrgentLatency);
1759 } else {
1760 TWait = UrgentLatency;
1761 }
1762
1763 #ifdef __DML_VBA_DEBUG__
1764 dml_print("DML::%s: PrefetchMode = %u\n", __func__, PrefetchMode);
1765 dml_print("DML::%s: TWait = %f\n", __func__, TWait);
1766 #endif
1767 return TWait;
1768 } // CalculateTWait
1769
1770
1771 /// @brief Calculate the "starting point" for prefetch calculation
1772 /// if AllowForPStateChangeOrStutterInVBlank is set as a particular requirement, then the mode evalulation
1773 /// will only be done at the given mode. If no specific requirement (i.e. *_if_possible), then will just go from
1774 /// try all the prefetch mode in decreasing order of "difficulty" (start from 0 which means all power saving
1775 /// features).
CalculatePrefetchMode(enum dml_prefetch_modes AllowForPStateChangeOrStutterInVBlank,dml_uint_t * MinPrefetchMode,dml_uint_t * MaxPrefetchMode)1776 static void CalculatePrefetchMode(
1777 enum dml_prefetch_modes AllowForPStateChangeOrStutterInVBlank,
1778 dml_uint_t *MinPrefetchMode,
1779 dml_uint_t *MaxPrefetchMode)
1780 {
1781 if (AllowForPStateChangeOrStutterInVBlank == dml_prefetch_support_uclk_fclk_and_stutter_if_possible) {
1782 *MinPrefetchMode = 0; // consider all pwr saving features
1783 *MaxPrefetchMode = 3; // consider just urgent latency
1784 } else {
1785 if (AllowForPStateChangeOrStutterInVBlank == dml_prefetch_support_none) {
1786 *MinPrefetchMode = 3;
1787 } else if (AllowForPStateChangeOrStutterInVBlank == dml_prefetch_support_stutter) {
1788 *MinPrefetchMode = 2;
1789 } else if (AllowForPStateChangeOrStutterInVBlank == dml_prefetch_support_fclk_and_stutter) {
1790 *MinPrefetchMode = 1;
1791 } else if (AllowForPStateChangeOrStutterInVBlank == dml_prefetch_support_uclk_fclk_and_stutter) {
1792 *MinPrefetchMode = 0;
1793 } else {
1794 dml_print("ERROR: Invalid AllowForPStateChangeOrStutterInVBlank setting! val=%u\n", AllowForPStateChangeOrStutterInVBlank);
1795 ASSERT(0);
1796 }
1797 *MaxPrefetchMode = *MinPrefetchMode;
1798 }
1799 } // CalculatePrefetchMode
1800
CalculateWriteBackDISPCLK(enum dml_source_format_class WritebackPixelFormat,dml_float_t PixelClock,dml_float_t WritebackHRatio,dml_float_t WritebackVRatio,dml_uint_t WritebackHTaps,dml_uint_t WritebackVTaps,dml_uint_t WritebackSourceWidth,dml_uint_t WritebackDestinationWidth,dml_uint_t HTotal,dml_uint_t WritebackLineBufferSize,dml_float_t DISPCLKDPPCLKVCOSpeed)1801 static dml_float_t CalculateWriteBackDISPCLK(
1802 enum dml_source_format_class WritebackPixelFormat,
1803 dml_float_t PixelClock,
1804 dml_float_t WritebackHRatio,
1805 dml_float_t WritebackVRatio,
1806 dml_uint_t WritebackHTaps,
1807 dml_uint_t WritebackVTaps,
1808 dml_uint_t WritebackSourceWidth,
1809 dml_uint_t WritebackDestinationWidth,
1810 dml_uint_t HTotal,
1811 dml_uint_t WritebackLineBufferSize,
1812 dml_float_t DISPCLKDPPCLKVCOSpeed)
1813 {
1814 dml_float_t DISPCLK_H, DISPCLK_V, DISPCLK_HB;
1815
1816 DISPCLK_H = PixelClock * dml_ceil(WritebackHTaps / 8.0, 1) / WritebackHRatio;
1817 DISPCLK_V = PixelClock * (WritebackVTaps * dml_ceil(WritebackDestinationWidth / 6.0, 1) + 8.0) / (dml_float_t) HTotal;
1818 DISPCLK_HB = PixelClock * WritebackVTaps * (WritebackDestinationWidth * WritebackVTaps - WritebackLineBufferSize / 57.0) / 6.0 / (dml_float_t) WritebackSourceWidth;
1819 return RoundToDFSGranularity(dml_max3(DISPCLK_H, DISPCLK_V, DISPCLK_HB), 1, DISPCLKDPPCLKVCOSpeed);
1820 }
1821
CalculateWriteBackDelay(enum dml_source_format_class WritebackPixelFormat,dml_float_t WritebackHRatio,dml_float_t WritebackVRatio,dml_uint_t WritebackVTaps,dml_uint_t WritebackDestinationWidth,dml_uint_t WritebackDestinationHeight,dml_uint_t WritebackSourceHeight,dml_uint_t HTotal)1822 static dml_float_t CalculateWriteBackDelay(
1823 enum dml_source_format_class WritebackPixelFormat,
1824 dml_float_t WritebackHRatio,
1825 dml_float_t WritebackVRatio,
1826 dml_uint_t WritebackVTaps,
1827 dml_uint_t WritebackDestinationWidth,
1828 dml_uint_t WritebackDestinationHeight,
1829 dml_uint_t WritebackSourceHeight,
1830 dml_uint_t HTotal)
1831 {
1832 dml_float_t CalculateWriteBackDelay;
1833 dml_float_t Line_length;
1834 dml_float_t Output_lines_last_notclamped;
1835 dml_float_t WritebackVInit;
1836
1837 WritebackVInit = (WritebackVRatio + WritebackVTaps + 1) / 2;
1838 Line_length = dml_max((dml_float_t) WritebackDestinationWidth, dml_ceil((dml_float_t)WritebackDestinationWidth / 6.0, 1.0) * WritebackVTaps);
1839 Output_lines_last_notclamped = WritebackDestinationHeight - 1 - dml_ceil(((dml_float_t)WritebackSourceHeight - (dml_float_t) WritebackVInit) / (dml_float_t)WritebackVRatio, 1.0);
1840 if (Output_lines_last_notclamped < 0) {
1841 CalculateWriteBackDelay = 0;
1842 } else {
1843 CalculateWriteBackDelay = Output_lines_last_notclamped * Line_length + (HTotal - WritebackDestinationWidth) + 80;
1844 }
1845 return CalculateWriteBackDelay;
1846 }
1847
CalculateVUpdateAndDynamicMetadataParameters(dml_uint_t MaxInterDCNTileRepeaters,dml_float_t Dppclk,dml_float_t Dispclk,dml_float_t DCFClkDeepSleep,dml_float_t PixelClock,dml_uint_t HTotal,dml_uint_t VBlank,dml_uint_t DynamicMetadataTransmittedBytes,dml_uint_t DynamicMetadataLinesBeforeActiveRequired,dml_uint_t InterlaceEnable,dml_bool_t ProgressiveToInterlaceUnitInOPP,dml_float_t * TSetup,dml_float_t * Tdmbf,dml_float_t * Tdmec,dml_float_t * Tdmsks,dml_uint_t * VUpdateOffsetPix,dml_uint_t * VUpdateWidthPix,dml_uint_t * VReadyOffsetPix)1848 static void CalculateVUpdateAndDynamicMetadataParameters(
1849 dml_uint_t MaxInterDCNTileRepeaters,
1850 dml_float_t Dppclk,
1851 dml_float_t Dispclk,
1852 dml_float_t DCFClkDeepSleep,
1853 dml_float_t PixelClock,
1854 dml_uint_t HTotal,
1855 dml_uint_t VBlank,
1856 dml_uint_t DynamicMetadataTransmittedBytes,
1857 dml_uint_t DynamicMetadataLinesBeforeActiveRequired,
1858 dml_uint_t InterlaceEnable,
1859 dml_bool_t ProgressiveToInterlaceUnitInOPP,
1860
1861 // Output
1862 dml_float_t *TSetup,
1863 dml_float_t *Tdmbf,
1864 dml_float_t *Tdmec,
1865 dml_float_t *Tdmsks,
1866 dml_uint_t *VUpdateOffsetPix,
1867 dml_uint_t *VUpdateWidthPix,
1868 dml_uint_t *VReadyOffsetPix)
1869 {
1870 dml_float_t TotalRepeaterDelayTime;
1871 TotalRepeaterDelayTime = MaxInterDCNTileRepeaters * (2 / Dppclk + 3 / Dispclk);
1872 *VUpdateWidthPix = (dml_uint_t)(dml_ceil((14.0 / DCFClkDeepSleep + 12.0 / Dppclk + TotalRepeaterDelayTime) * PixelClock, 1.0));
1873 *VReadyOffsetPix = (dml_uint_t)(dml_ceil(dml_max(150.0 / Dppclk, TotalRepeaterDelayTime + 20.0 / DCFClkDeepSleep + 10.0 / Dppclk) * PixelClock, 1.0));
1874 *VUpdateOffsetPix = (dml_uint_t)(dml_ceil(HTotal / 4.0, 1.0));
1875 *TSetup = (*VUpdateOffsetPix + *VUpdateWidthPix + *VReadyOffsetPix) / PixelClock;
1876 *Tdmbf = DynamicMetadataTransmittedBytes / 4.0 / Dispclk;
1877 *Tdmec = HTotal / PixelClock;
1878
1879 if (DynamicMetadataLinesBeforeActiveRequired == 0) {
1880 *Tdmsks = VBlank * HTotal / PixelClock / 2.0;
1881 } else {
1882 *Tdmsks = DynamicMetadataLinesBeforeActiveRequired * HTotal / PixelClock;
1883 }
1884 if (InterlaceEnable == 1 && ProgressiveToInterlaceUnitInOPP == false) {
1885 *Tdmsks = *Tdmsks / 2;
1886 }
1887 #ifdef __DML_VBA_DEBUG__
1888 dml_print("DML::%s: DynamicMetadataLinesBeforeActiveRequired = %u\n", __func__, DynamicMetadataLinesBeforeActiveRequired);
1889 dml_print("DML::%s: VBlank = %u\n", __func__, VBlank);
1890 dml_print("DML::%s: HTotal = %u\n", __func__, HTotal);
1891 dml_print("DML::%s: PixelClock = %f\n", __func__, PixelClock);
1892 dml_print("DML::%s: Dppclk = %f\n", __func__, Dppclk);
1893 dml_print("DML::%s: DCFClkDeepSleep = %f\n", __func__, DCFClkDeepSleep);
1894 dml_print("DML::%s: MaxInterDCNTileRepeaters = %u\n", __func__, MaxInterDCNTileRepeaters);
1895 dml_print("DML::%s: TotalRepeaterDelayTime = %f\n", __func__, TotalRepeaterDelayTime);
1896
1897 dml_print("DML::%s: VUpdateWidthPix = %u\n", __func__, *VUpdateWidthPix);
1898 dml_print("DML::%s: VReadyOffsetPix = %u\n", __func__, *VReadyOffsetPix);
1899 dml_print("DML::%s: VUpdateOffsetPix = %u\n", __func__, *VUpdateOffsetPix);
1900
1901 dml_print("DML::%s: Tdmsks = %f\n", __func__, *Tdmsks);
1902 #endif
1903 }
1904
CalculateRowBandwidth(dml_bool_t GPUVMEnable,enum dml_source_format_class SourcePixelFormat,dml_float_t VRatio,dml_float_t VRatioChroma,dml_bool_t DCCEnable,dml_float_t LineTime,dml_uint_t MetaRowByteLuma,dml_uint_t MetaRowByteChroma,dml_uint_t meta_row_height_luma,dml_uint_t meta_row_height_chroma,dml_uint_t PixelPTEBytesPerRowLuma,dml_uint_t PixelPTEBytesPerRowChroma,dml_uint_t dpte_row_height_luma,dml_uint_t dpte_row_height_chroma,dml_float_t * meta_row_bw,dml_float_t * dpte_row_bw)1905 static void CalculateRowBandwidth(
1906 dml_bool_t GPUVMEnable,
1907 enum dml_source_format_class SourcePixelFormat,
1908 dml_float_t VRatio,
1909 dml_float_t VRatioChroma,
1910 dml_bool_t DCCEnable,
1911 dml_float_t LineTime,
1912 dml_uint_t MetaRowByteLuma,
1913 dml_uint_t MetaRowByteChroma,
1914 dml_uint_t meta_row_height_luma,
1915 dml_uint_t meta_row_height_chroma,
1916 dml_uint_t PixelPTEBytesPerRowLuma,
1917 dml_uint_t PixelPTEBytesPerRowChroma,
1918 dml_uint_t dpte_row_height_luma,
1919 dml_uint_t dpte_row_height_chroma,
1920 // Output
1921 dml_float_t *meta_row_bw,
1922 dml_float_t *dpte_row_bw)
1923 {
1924 if (DCCEnable != true) {
1925 *meta_row_bw = 0;
1926 } else if (SourcePixelFormat == dml_420_8 || SourcePixelFormat == dml_420_10 || SourcePixelFormat == dml_420_12 || SourcePixelFormat == dml_rgbe_alpha) {
1927 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime)
1928 + VRatioChroma * MetaRowByteChroma
1929 / (meta_row_height_chroma * LineTime);
1930 } else {
1931 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime);
1932 }
1933
1934 if (GPUVMEnable != true) {
1935 *dpte_row_bw = 0;
1936 } else if (SourcePixelFormat == dml_420_8 || SourcePixelFormat == dml_420_10 || SourcePixelFormat == dml_420_12 || SourcePixelFormat == dml_rgbe_alpha) {
1937 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime)
1938 + VRatioChroma * PixelPTEBytesPerRowChroma
1939 / (dpte_row_height_chroma * LineTime);
1940 } else {
1941 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime);
1942 }
1943 }
1944
1945 /// @brief Determine immediate flip schedule given bw remaining after considering the prefetch schedule
1946 /// @param BandwidthAvailableForImmediateFlip Bandwidth available for iflip for all planes
CalculateFlipSchedule(dml_float_t HostVMInefficiencyFactor,dml_float_t UrgentExtraLatency,dml_float_t UrgentLatency,dml_uint_t GPUVMMaxPageTableLevels,dml_bool_t HostVMEnable,dml_uint_t HostVMMaxNonCachedPageTableLevels,dml_bool_t GPUVMEnable,dml_uint_t HostVMMinPageSize,dml_float_t PDEAndMetaPTEBytesPerFrame,dml_float_t MetaRowBytes,dml_float_t DPTEBytesPerRow,dml_float_t BandwidthAvailableForImmediateFlip,dml_uint_t TotImmediateFlipBytes,enum dml_source_format_class SourcePixelFormat,dml_float_t LineTime,dml_float_t VRatio,dml_float_t VRatioChroma,dml_float_t Tno_bw,dml_bool_t DCCEnable,dml_uint_t dpte_row_height,dml_uint_t meta_row_height,dml_uint_t dpte_row_height_chroma,dml_uint_t meta_row_height_chroma,dml_bool_t use_one_row_for_frame_flip,dml_float_t * DestinationLinesToRequestVMInImmediateFlip,dml_float_t * DestinationLinesToRequestRowInImmediateFlip,dml_float_t * final_flip_bw,dml_bool_t * ImmediateFlipSupportedForPipe)1947 static void CalculateFlipSchedule(
1948 dml_float_t HostVMInefficiencyFactor,
1949 dml_float_t UrgentExtraLatency,
1950 dml_float_t UrgentLatency,
1951 dml_uint_t GPUVMMaxPageTableLevels,
1952 dml_bool_t HostVMEnable,
1953 dml_uint_t HostVMMaxNonCachedPageTableLevels,
1954 dml_bool_t GPUVMEnable,
1955 dml_uint_t HostVMMinPageSize,
1956 dml_float_t PDEAndMetaPTEBytesPerFrame,
1957 dml_float_t MetaRowBytes,
1958 dml_float_t DPTEBytesPerRow,
1959 dml_float_t BandwidthAvailableForImmediateFlip,
1960 dml_uint_t TotImmediateFlipBytes,
1961 enum dml_source_format_class SourcePixelFormat,
1962 dml_float_t LineTime,
1963 dml_float_t VRatio,
1964 dml_float_t VRatioChroma,
1965 dml_float_t Tno_bw,
1966 dml_bool_t DCCEnable,
1967 dml_uint_t dpte_row_height,
1968 dml_uint_t meta_row_height,
1969 dml_uint_t dpte_row_height_chroma,
1970 dml_uint_t meta_row_height_chroma,
1971 dml_bool_t use_one_row_for_frame_flip,
1972
1973 // Output
1974 dml_float_t *DestinationLinesToRequestVMInImmediateFlip,
1975 dml_float_t *DestinationLinesToRequestRowInImmediateFlip,
1976 dml_float_t *final_flip_bw,
1977 dml_bool_t *ImmediateFlipSupportedForPipe)
1978 {
1979 dml_float_t min_row_time = 0.0;
1980 dml_uint_t HostVMDynamicLevelsTrips = 0;
1981 dml_float_t TimeForFetchingMetaPTEImmediateFlip = 0;
1982 dml_float_t TimeForFetchingRowInVBlankImmediateFlip = 0;
1983 dml_float_t ImmediateFlipBW = 0; // @brief The immediate flip bandwidth for this pipe
1984
1985 if (GPUVMEnable == true && HostVMEnable == true) {
1986 HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels;
1987 } else {
1988 HostVMDynamicLevelsTrips = 0;
1989 }
1990
1991 #ifdef __DML_VBA_DEBUG__
1992 dml_print("DML::%s: TotImmediateFlipBytes = %u\n", __func__, TotImmediateFlipBytes);
1993 dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
1994 dml_print("DML::%s: UrgentLatency = %f\n", __func__, UrgentLatency);
1995 dml_print("DML::%s: BandwidthAvailableForImmediateFlip = %f\n", __func__, BandwidthAvailableForImmediateFlip);
1996 #endif
1997
1998 if (TotImmediateFlipBytes > 0) {
1999 if (use_one_row_for_frame_flip) {
2000 ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + 2.0 * DPTEBytesPerRow) * BandwidthAvailableForImmediateFlip / (dml_float_t) TotImmediateFlipBytes;
2001 } else {
2002 ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + DPTEBytesPerRow) * BandwidthAvailableForImmediateFlip / (dml_float_t) TotImmediateFlipBytes;
2003 }
2004 if (GPUVMEnable == true) {
2005 TimeForFetchingMetaPTEImmediateFlip = dml_max3(Tno_bw + PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / ImmediateFlipBW,
2006 UrgentExtraLatency + UrgentLatency * (GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1),
2007 LineTime / 4.0);
2008 } else {
2009 TimeForFetchingMetaPTEImmediateFlip = 0;
2010 }
2011 if ((GPUVMEnable == true || DCCEnable == true)) {
2012 TimeForFetchingRowInVBlankImmediateFlip = dml_max3((MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / ImmediateFlipBW, UrgentLatency * (HostVMDynamicLevelsTrips + 1), LineTime / 4.0);
2013 } else {
2014 TimeForFetchingRowInVBlankImmediateFlip = 0;
2015 }
2016
2017 *DestinationLinesToRequestVMInImmediateFlip = dml_ceil(4.0 * (TimeForFetchingMetaPTEImmediateFlip / LineTime), 1.0) / 4.0;
2018 *DestinationLinesToRequestRowInImmediateFlip = dml_ceil(4.0 * (TimeForFetchingRowInVBlankImmediateFlip / LineTime), 1.0) / 4.0;
2019
2020 if (GPUVMEnable == true) {
2021 *final_flip_bw = dml_max(PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / (*DestinationLinesToRequestVMInImmediateFlip * LineTime),
2022 (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInImmediateFlip * LineTime));
2023 } else if ((GPUVMEnable == true || DCCEnable == true)) {
2024 *final_flip_bw = (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInImmediateFlip * LineTime);
2025 } else {
2026 *final_flip_bw = 0;
2027 }
2028 } else {
2029 TimeForFetchingMetaPTEImmediateFlip = 0;
2030 TimeForFetchingRowInVBlankImmediateFlip = 0;
2031 *DestinationLinesToRequestVMInImmediateFlip = 0;
2032 *DestinationLinesToRequestRowInImmediateFlip = 0;
2033 *final_flip_bw = 0;
2034 }
2035
2036 if (SourcePixelFormat == dml_420_8 || SourcePixelFormat == dml_420_10 || SourcePixelFormat == dml_rgbe_alpha) {
2037 if (GPUVMEnable == true && DCCEnable != true) {
2038 min_row_time = dml_min(dpte_row_height * LineTime / VRatio, dpte_row_height_chroma * LineTime / VRatioChroma);
2039 } else if (GPUVMEnable != true && DCCEnable == true) {
2040 min_row_time = dml_min(meta_row_height * LineTime / VRatio, meta_row_height_chroma * LineTime / VRatioChroma);
2041 } else {
2042 min_row_time = dml_min4(dpte_row_height * LineTime / VRatio, meta_row_height * LineTime / VRatio, dpte_row_height_chroma * LineTime / VRatioChroma, meta_row_height_chroma * LineTime / VRatioChroma);
2043 }
2044 } else {
2045 if (GPUVMEnable == true && DCCEnable != true) {
2046 min_row_time = dpte_row_height * LineTime / VRatio;
2047 } else if (GPUVMEnable != true && DCCEnable == true) {
2048 min_row_time = meta_row_height * LineTime / VRatio;
2049 } else {
2050 min_row_time = dml_min(dpte_row_height * LineTime / VRatio, meta_row_height * LineTime / VRatio);
2051 }
2052 }
2053
2054 if (*DestinationLinesToRequestVMInImmediateFlip >= 32 || *DestinationLinesToRequestRowInImmediateFlip >= 16 || TimeForFetchingMetaPTEImmediateFlip + 2 * TimeForFetchingRowInVBlankImmediateFlip > min_row_time) {
2055 *ImmediateFlipSupportedForPipe = false;
2056 } else {
2057 *ImmediateFlipSupportedForPipe = true;
2058 }
2059
2060 #ifdef __DML_VBA_DEBUG__
2061 dml_print("DML::%s: GPUVMEnable = %u\n", __func__, GPUVMEnable);
2062 dml_print("DML::%s: DCCEnable = %u\n", __func__, DCCEnable);
2063
2064 dml_print("DML::%s: MetaRowBytes = %f\n", __func__, MetaRowBytes);
2065 dml_print("DML::%s: DPTEBytesPerRow = %f\n", __func__, DPTEBytesPerRow);
2066 dml_print("DML::%s: BandwidthAvailableForImmediateFlip = %f\n", __func__, BandwidthAvailableForImmediateFlip);
2067 dml_print("DML::%s: TotImmediateFlipBytes = %u\n", __func__, TotImmediateFlipBytes);
2068 dml_print("DML::%s: ImmediateFlipBW = %f\n", __func__, ImmediateFlipBW);
2069 dml_print("DML::%s: PDEAndMetaPTEBytesPerFrame = %f\n", __func__, PDEAndMetaPTEBytesPerFrame);
2070 dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
2071 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
2072 dml_print("DML::%s: final_flip_bw = %f\n", __func__, *final_flip_bw);
2073
2074 dml_print("DML::%s: DestinationLinesToRequestVMInImmediateFlip = %f\n", __func__, *DestinationLinesToRequestVMInImmediateFlip);
2075 dml_print("DML::%s: DestinationLinesToRequestRowInImmediateFlip = %f\n", __func__, *DestinationLinesToRequestRowInImmediateFlip);
2076 dml_print("DML::%s: TimeForFetchingMetaPTEImmediateFlip = %f\n", __func__, TimeForFetchingMetaPTEImmediateFlip);
2077 dml_print("DML::%s: TimeForFetchingRowInVBlankImmediateFlip = %f\n", __func__, TimeForFetchingRowInVBlankImmediateFlip);
2078 dml_print("DML::%s: min_row_time = %f\n", __func__, min_row_time);
2079 dml_print("DML::%s: ImmediateFlipSupportedForPipe = %u\n", __func__, *ImmediateFlipSupportedForPipe);
2080 #endif
2081 } // CalculateFlipSchedule
2082
RoundToDFSGranularity(dml_float_t Clock,dml_bool_t round_up,dml_float_t VCOSpeed)2083 static dml_float_t RoundToDFSGranularity(dml_float_t Clock, dml_bool_t round_up, dml_float_t VCOSpeed)
2084 {
2085 if (Clock <= 0.0)
2086 return 0.0;
2087 else {
2088 if (round_up)
2089 return VCOSpeed * 4.0 / dml_floor(VCOSpeed * 4.0 / Clock, 1.0);
2090 else
2091 return VCOSpeed * 4.0 / dml_ceil(VCOSpeed * 4.0 / Clock, 1.0);
2092 }
2093 }
2094
CalculateDCCConfiguration(dml_bool_t DCCEnabled,dml_bool_t DCCProgrammingAssumesScanDirectionUnknown,enum dml_source_format_class SourcePixelFormat,dml_uint_t SurfaceWidthLuma,dml_uint_t SurfaceWidthChroma,dml_uint_t SurfaceHeightLuma,dml_uint_t SurfaceHeightChroma,dml_uint_t nomDETInKByte,dml_uint_t RequestHeight256ByteLuma,dml_uint_t RequestHeight256ByteChroma,enum dml_swizzle_mode TilingFormat,dml_uint_t BytePerPixelY,dml_uint_t BytePerPixelC,dml_float_t BytePerPixelDETY,dml_float_t BytePerPixelDETC,enum dml_rotation_angle SourceScan,dml_uint_t * MaxUncompressedBlockLuma,dml_uint_t * MaxUncompressedBlockChroma,dml_uint_t * MaxCompressedBlockLuma,dml_uint_t * MaxCompressedBlockChroma,dml_uint_t * IndependentBlockLuma,dml_uint_t * IndependentBlockChroma)2095 static void CalculateDCCConfiguration(
2096 dml_bool_t DCCEnabled,
2097 dml_bool_t DCCProgrammingAssumesScanDirectionUnknown,
2098 enum dml_source_format_class SourcePixelFormat,
2099 dml_uint_t SurfaceWidthLuma,
2100 dml_uint_t SurfaceWidthChroma,
2101 dml_uint_t SurfaceHeightLuma,
2102 dml_uint_t SurfaceHeightChroma,
2103 dml_uint_t nomDETInKByte,
2104 dml_uint_t RequestHeight256ByteLuma,
2105 dml_uint_t RequestHeight256ByteChroma,
2106 enum dml_swizzle_mode TilingFormat,
2107 dml_uint_t BytePerPixelY,
2108 dml_uint_t BytePerPixelC,
2109 dml_float_t BytePerPixelDETY,
2110 dml_float_t BytePerPixelDETC,
2111 enum dml_rotation_angle SourceScan,
2112 // Output
2113 dml_uint_t *MaxUncompressedBlockLuma,
2114 dml_uint_t *MaxUncompressedBlockChroma,
2115 dml_uint_t *MaxCompressedBlockLuma,
2116 dml_uint_t *MaxCompressedBlockChroma,
2117 dml_uint_t *IndependentBlockLuma,
2118 dml_uint_t *IndependentBlockChroma)
2119 {
2120 dml_uint_t DETBufferSizeForDCC = nomDETInKByte * 1024;
2121
2122 dml_uint_t yuv420;
2123 dml_uint_t horz_div_l;
2124 dml_uint_t horz_div_c;
2125 dml_uint_t vert_div_l;
2126 dml_uint_t vert_div_c;
2127
2128 dml_uint_t swath_buf_size;
2129 dml_float_t detile_buf_vp_horz_limit;
2130 dml_float_t detile_buf_vp_vert_limit;
2131
2132 dml_uint_t MAS_vp_horz_limit;
2133 dml_uint_t MAS_vp_vert_limit;
2134 dml_uint_t max_vp_horz_width;
2135 dml_uint_t max_vp_vert_height;
2136 dml_uint_t eff_surf_width_l;
2137 dml_uint_t eff_surf_width_c;
2138 dml_uint_t eff_surf_height_l;
2139 dml_uint_t eff_surf_height_c;
2140
2141 dml_uint_t full_swath_bytes_horz_wc_l;
2142 dml_uint_t full_swath_bytes_horz_wc_c;
2143 dml_uint_t full_swath_bytes_vert_wc_l;
2144 dml_uint_t full_swath_bytes_vert_wc_c;
2145
2146 dml_uint_t req128_horz_wc_l;
2147 dml_uint_t req128_horz_wc_c;
2148 dml_uint_t req128_vert_wc_l;
2149 dml_uint_t req128_vert_wc_c;
2150
2151 dml_uint_t segment_order_horz_contiguous_luma;
2152 dml_uint_t segment_order_horz_contiguous_chroma;
2153 dml_uint_t segment_order_vert_contiguous_luma;
2154 dml_uint_t segment_order_vert_contiguous_chroma;
2155
2156 typedef enum{
2157 REQ_256Bytes,
2158 REQ_128BytesNonContiguous,
2159 REQ_128BytesContiguous,
2160 REQ_NA
2161 } RequestType;
2162
2163 RequestType RequestLuma;
2164 RequestType RequestChroma;
2165
2166 yuv420 = ((SourcePixelFormat == dml_420_8 || SourcePixelFormat == dml_420_10 || SourcePixelFormat == dml_420_12) ? 1 : 0);
2167 horz_div_l = 1;
2168 horz_div_c = 1;
2169 vert_div_l = 1;
2170 vert_div_c = 1;
2171
2172 if (BytePerPixelY == 1)
2173 vert_div_l = 0;
2174 if (BytePerPixelC == 1)
2175 vert_div_c = 0;
2176
2177 if (BytePerPixelC == 0) {
2178 swath_buf_size = DETBufferSizeForDCC / 2 - 2 * 256;
2179 detile_buf_vp_horz_limit = (dml_float_t) swath_buf_size / ((dml_float_t) RequestHeight256ByteLuma * BytePerPixelY / (1 + horz_div_l));
2180 detile_buf_vp_vert_limit = (dml_float_t) swath_buf_size / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l));
2181 } else {
2182 swath_buf_size = DETBufferSizeForDCC / 2 - 2 * 2 * 256;
2183 detile_buf_vp_horz_limit = (dml_float_t) swath_buf_size / ((dml_float_t) RequestHeight256ByteLuma * BytePerPixelY / (1 + horz_div_l) + (dml_float_t) RequestHeight256ByteChroma * BytePerPixelC / (1 + horz_div_c) / (1 + yuv420));
2184 detile_buf_vp_vert_limit = (dml_float_t) swath_buf_size / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l) + 256.0 / RequestHeight256ByteChroma / (1 + vert_div_c) / (1 + yuv420));
2185 }
2186
2187 if (SourcePixelFormat == dml_420_10) {
2188 detile_buf_vp_horz_limit = 1.5 * detile_buf_vp_horz_limit;
2189 detile_buf_vp_vert_limit = 1.5 * detile_buf_vp_vert_limit;
2190 }
2191
2192 detile_buf_vp_horz_limit = dml_floor(detile_buf_vp_horz_limit - 1, 16);
2193 detile_buf_vp_vert_limit = dml_floor(detile_buf_vp_vert_limit - 1, 16);
2194
2195 MAS_vp_horz_limit = SourcePixelFormat == dml_rgbe_alpha ? 3840 : 6144;
2196 MAS_vp_vert_limit = SourcePixelFormat == dml_rgbe_alpha ? 3840 : (BytePerPixelY == 8 ? 3072 : 6144);
2197 max_vp_horz_width = (dml_uint_t)(dml_min((dml_float_t) MAS_vp_horz_limit, detile_buf_vp_horz_limit));
2198 max_vp_vert_height = (dml_uint_t)(dml_min((dml_float_t) MAS_vp_vert_limit, detile_buf_vp_vert_limit));
2199 eff_surf_width_l = (SurfaceWidthLuma > max_vp_horz_width ? max_vp_horz_width : SurfaceWidthLuma);
2200 eff_surf_width_c = eff_surf_width_l / (1 + yuv420);
2201 eff_surf_height_l = (SurfaceHeightLuma > max_vp_vert_height ? max_vp_vert_height : SurfaceHeightLuma);
2202 eff_surf_height_c = eff_surf_height_l / (1 + yuv420);
2203
2204 full_swath_bytes_horz_wc_l = eff_surf_width_l * RequestHeight256ByteLuma * BytePerPixelY;
2205 full_swath_bytes_vert_wc_l = eff_surf_height_l * 256 / RequestHeight256ByteLuma;
2206 if (BytePerPixelC > 0) {
2207 full_swath_bytes_horz_wc_c = eff_surf_width_c * RequestHeight256ByteChroma * BytePerPixelC;
2208 full_swath_bytes_vert_wc_c = eff_surf_height_c * 256 / RequestHeight256ByteChroma;
2209 } else {
2210 full_swath_bytes_horz_wc_c = 0;
2211 full_swath_bytes_vert_wc_c = 0;
2212 }
2213
2214 if (SourcePixelFormat == dml_420_10) {
2215 full_swath_bytes_horz_wc_l = (dml_uint_t)(dml_ceil((dml_float_t) full_swath_bytes_horz_wc_l * 2.0 / 3.0, 256.0));
2216 full_swath_bytes_horz_wc_c = (dml_uint_t)(dml_ceil((dml_float_t) full_swath_bytes_horz_wc_c * 2.0 / 3.0, 256.0));
2217 full_swath_bytes_vert_wc_l = (dml_uint_t)(dml_ceil((dml_float_t) full_swath_bytes_vert_wc_l * 2.0 / 3.0, 256.0));
2218 full_swath_bytes_vert_wc_c = (dml_uint_t)(dml_ceil((dml_float_t) full_swath_bytes_vert_wc_c * 2.0 / 3.0, 256.0));
2219 }
2220
2221 if (2 * full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) {
2222 req128_horz_wc_l = 0;
2223 req128_horz_wc_c = 0;
2224 } else if (full_swath_bytes_horz_wc_l < 1.5 * full_swath_bytes_horz_wc_c && 2 * full_swath_bytes_horz_wc_l + full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) {
2225 req128_horz_wc_l = 0;
2226 req128_horz_wc_c = 1;
2227 } else if (full_swath_bytes_horz_wc_l >= 1.5 * full_swath_bytes_horz_wc_c && full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) {
2228 req128_horz_wc_l = 1;
2229 req128_horz_wc_c = 0;
2230 } else {
2231 req128_horz_wc_l = 1;
2232 req128_horz_wc_c = 1;
2233 }
2234
2235 if (2 * full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) {
2236 req128_vert_wc_l = 0;
2237 req128_vert_wc_c = 0;
2238 } else if (full_swath_bytes_vert_wc_l < 1.5 * full_swath_bytes_vert_wc_c && 2 * full_swath_bytes_vert_wc_l + full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) {
2239 req128_vert_wc_l = 0;
2240 req128_vert_wc_c = 1;
2241 } else if (full_swath_bytes_vert_wc_l >= 1.5 * full_swath_bytes_vert_wc_c && full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) {
2242 req128_vert_wc_l = 1;
2243 req128_vert_wc_c = 0;
2244 } else {
2245 req128_vert_wc_l = 1;
2246 req128_vert_wc_c = 1;
2247 }
2248
2249 if (BytePerPixelY == 2) {
2250 segment_order_horz_contiguous_luma = 0;
2251 segment_order_vert_contiguous_luma = 1;
2252 } else {
2253 segment_order_horz_contiguous_luma = 1;
2254 segment_order_vert_contiguous_luma = 0;
2255 }
2256
2257 if (BytePerPixelC == 2) {
2258 segment_order_horz_contiguous_chroma = 0;
2259 segment_order_vert_contiguous_chroma = 1;
2260 } else {
2261 segment_order_horz_contiguous_chroma = 1;
2262 segment_order_vert_contiguous_chroma = 0;
2263 }
2264 #ifdef __DML_VBA_DEBUG__
2265 dml_print("DML::%s: DCCEnabled = %u\n", __func__, DCCEnabled);
2266 dml_print("DML::%s: nomDETInKByte = %u\n", __func__, nomDETInKByte);
2267 dml_print("DML::%s: DETBufferSizeForDCC = %u\n", __func__, DETBufferSizeForDCC);
2268 dml_print("DML::%s: req128_horz_wc_l = %u\n", __func__, req128_horz_wc_l);
2269 dml_print("DML::%s: req128_horz_wc_c = %u\n", __func__, req128_horz_wc_c);
2270 dml_print("DML::%s: full_swath_bytes_horz_wc_l = %u\n", __func__, full_swath_bytes_horz_wc_l);
2271 dml_print("DML::%s: full_swath_bytes_vert_wc_c = %u\n", __func__, full_swath_bytes_vert_wc_c);
2272 dml_print("DML::%s: segment_order_horz_contiguous_luma = %u\n", __func__, segment_order_horz_contiguous_luma);
2273 dml_print("DML::%s: segment_order_horz_contiguous_chroma = %u\n", __func__, segment_order_horz_contiguous_chroma);
2274 #endif
2275
2276 if (DCCProgrammingAssumesScanDirectionUnknown == true) {
2277 if (req128_horz_wc_l == 0 && req128_vert_wc_l == 0) {
2278 RequestLuma = REQ_256Bytes;
2279 } else if ((req128_horz_wc_l == 1 && segment_order_horz_contiguous_luma == 0) || (req128_vert_wc_l == 1 && segment_order_vert_contiguous_luma == 0)) {
2280 RequestLuma = REQ_128BytesNonContiguous;
2281 } else {
2282 RequestLuma = REQ_128BytesContiguous;
2283 }
2284 if (req128_horz_wc_c == 0 && req128_vert_wc_c == 0) {
2285 RequestChroma = REQ_256Bytes;
2286 } else if ((req128_horz_wc_c == 1 && segment_order_horz_contiguous_chroma == 0) || (req128_vert_wc_c == 1 && segment_order_vert_contiguous_chroma == 0)) {
2287 RequestChroma = REQ_128BytesNonContiguous;
2288 } else {
2289 RequestChroma = REQ_128BytesContiguous;
2290 }
2291 } else if (!dml_is_vertical_rotation(SourceScan)) {
2292 if (req128_horz_wc_l == 0) {
2293 RequestLuma = REQ_256Bytes;
2294 } else if (segment_order_horz_contiguous_luma == 0) {
2295 RequestLuma = REQ_128BytesNonContiguous;
2296 } else {
2297 RequestLuma = REQ_128BytesContiguous;
2298 }
2299 if (req128_horz_wc_c == 0) {
2300 RequestChroma = REQ_256Bytes;
2301 } else if (segment_order_horz_contiguous_chroma == 0) {
2302 RequestChroma = REQ_128BytesNonContiguous;
2303 } else {
2304 RequestChroma = REQ_128BytesContiguous;
2305 }
2306 } else {
2307 if (req128_vert_wc_l == 0) {
2308 RequestLuma = REQ_256Bytes;
2309 } else if (segment_order_vert_contiguous_luma == 0) {
2310 RequestLuma = REQ_128BytesNonContiguous;
2311 } else {
2312 RequestLuma = REQ_128BytesContiguous;
2313 }
2314 if (req128_vert_wc_c == 0) {
2315 RequestChroma = REQ_256Bytes;
2316 } else if (segment_order_vert_contiguous_chroma == 0) {
2317 RequestChroma = REQ_128BytesNonContiguous;
2318 } else {
2319 RequestChroma = REQ_128BytesContiguous;
2320 }
2321 }
2322
2323 if (RequestLuma == REQ_256Bytes) {
2324 *MaxUncompressedBlockLuma = 256;
2325 *MaxCompressedBlockLuma = 256;
2326 *IndependentBlockLuma = 0;
2327 } else if (RequestLuma == REQ_128BytesContiguous) {
2328 *MaxUncompressedBlockLuma = 256;
2329 *MaxCompressedBlockLuma = 128;
2330 *IndependentBlockLuma = 128;
2331 } else {
2332 *MaxUncompressedBlockLuma = 256;
2333 *MaxCompressedBlockLuma = 64;
2334 *IndependentBlockLuma = 64;
2335 }
2336
2337 if (RequestChroma == REQ_256Bytes) {
2338 *MaxUncompressedBlockChroma = 256;
2339 *MaxCompressedBlockChroma = 256;
2340 *IndependentBlockChroma = 0;
2341 } else if (RequestChroma == REQ_128BytesContiguous) {
2342 *MaxUncompressedBlockChroma = 256;
2343 *MaxCompressedBlockChroma = 128;
2344 *IndependentBlockChroma = 128;
2345 } else {
2346 *MaxUncompressedBlockChroma = 256;
2347 *MaxCompressedBlockChroma = 64;
2348 *IndependentBlockChroma = 64;
2349 }
2350
2351 if (DCCEnabled != true || BytePerPixelC == 0) {
2352 *MaxUncompressedBlockChroma = 0;
2353 *MaxCompressedBlockChroma = 0;
2354 *IndependentBlockChroma = 0;
2355 }
2356
2357 if (DCCEnabled != true) {
2358 *MaxUncompressedBlockLuma = 0;
2359 *MaxCompressedBlockLuma = 0;
2360 *IndependentBlockLuma = 0;
2361 }
2362
2363 #ifdef __DML_VBA_DEBUG__
2364 dml_print("DML::%s: MaxUncompressedBlockLuma = %u\n", __func__, *MaxUncompressedBlockLuma);
2365 dml_print("DML::%s: MaxCompressedBlockLuma = %u\n", __func__, *MaxCompressedBlockLuma);
2366 dml_print("DML::%s: IndependentBlockLuma = %u\n", __func__, *IndependentBlockLuma);
2367 dml_print("DML::%s: MaxUncompressedBlockChroma = %u\n", __func__, *MaxUncompressedBlockChroma);
2368 dml_print("DML::%s: MaxCompressedBlockChroma = %u\n", __func__, *MaxCompressedBlockChroma);
2369 dml_print("DML::%s: IndependentBlockChroma = %u\n", __func__, *IndependentBlockChroma);
2370 #endif
2371
2372 } // CalculateDCCConfiguration
2373
CalculatePrefetchSourceLines(dml_float_t VRatio,dml_uint_t VTaps,dml_bool_t Interlace,dml_bool_t ProgressiveToInterlaceUnitInOPP,dml_uint_t SwathHeight,enum dml_rotation_angle SourceScan,dml_bool_t ViewportStationary,dml_uint_t SwathWidth,dml_uint_t ViewportHeight,dml_uint_t ViewportXStart,dml_uint_t ViewportYStart,dml_uint_t * VInitPreFill,dml_uint_t * MaxNumSwath)2374 static dml_uint_t CalculatePrefetchSourceLines(
2375 dml_float_t VRatio,
2376 dml_uint_t VTaps,
2377 dml_bool_t Interlace,
2378 dml_bool_t ProgressiveToInterlaceUnitInOPP,
2379 dml_uint_t SwathHeight,
2380 enum dml_rotation_angle SourceScan,
2381 dml_bool_t ViewportStationary,
2382 dml_uint_t SwathWidth,
2383 dml_uint_t ViewportHeight,
2384 dml_uint_t ViewportXStart,
2385 dml_uint_t ViewportYStart,
2386
2387 // Output
2388 dml_uint_t *VInitPreFill,
2389 dml_uint_t *MaxNumSwath)
2390 {
2391
2392 dml_uint_t vp_start_rot = 0;
2393 dml_uint_t sw0_tmp = 0;
2394 dml_uint_t MaxPartialSwath = 0;
2395 dml_float_t numLines = 0;
2396
2397 #ifdef __DML_VBA_DEBUG__
2398 dml_print("DML::%s: VRatio = %f\n", __func__, VRatio);
2399 dml_print("DML::%s: VTaps = %u\n", __func__, VTaps);
2400 dml_print("DML::%s: ViewportXStart = %u\n", __func__, ViewportXStart);
2401 dml_print("DML::%s: ViewportYStart = %u\n", __func__, ViewportYStart);
2402 dml_print("DML::%s: ViewportStationary = %u\n", __func__, ViewportStationary);
2403 dml_print("DML::%s: SwathHeight = %u\n", __func__, SwathHeight);
2404 #endif
2405 if (ProgressiveToInterlaceUnitInOPP)
2406 *VInitPreFill = (dml_uint_t)(dml_floor((VRatio + (dml_float_t) VTaps + 1) / 2.0, 1));
2407 else
2408 *VInitPreFill = (dml_uint_t)(dml_floor((VRatio + (dml_float_t) VTaps + 1 + Interlace * 0.5 * VRatio) / 2.0, 1));
2409
2410 if (ViewportStationary) {
2411 if (SourceScan == dml_rotation_180 || SourceScan == dml_rotation_180m) {
2412 vp_start_rot = SwathHeight - (((dml_uint_t) (ViewportYStart + ViewportHeight - 1) % SwathHeight) + 1);
2413 } else if (SourceScan == dml_rotation_270 || SourceScan == dml_rotation_90m) {
2414 vp_start_rot = ViewportXStart;
2415 } else if (SourceScan == dml_rotation_90 || SourceScan == dml_rotation_270m) {
2416 vp_start_rot = SwathHeight - (((dml_uint_t)(ViewportYStart + SwathWidth - 1) % SwathHeight) + 1);
2417 } else {
2418 vp_start_rot = ViewportYStart;
2419 }
2420 sw0_tmp = SwathHeight - (vp_start_rot % SwathHeight);
2421 if (sw0_tmp < *VInitPreFill) {
2422 *MaxNumSwath = (dml_uint_t)(dml_ceil((*VInitPreFill - sw0_tmp) / (dml_float_t) SwathHeight, 1) + 1);
2423 } else {
2424 *MaxNumSwath = 1;
2425 }
2426 MaxPartialSwath = (dml_uint_t)(dml_max(1, (dml_uint_t) (vp_start_rot + *VInitPreFill - 1) % SwathHeight));
2427 } else {
2428 *MaxNumSwath = (dml_uint_t)(dml_ceil((*VInitPreFill - 1.0) / (dml_float_t) SwathHeight, 1) + 1);
2429 if (*VInitPreFill > 1) {
2430 MaxPartialSwath = (dml_uint_t)(dml_max(1, (dml_uint_t) (*VInitPreFill - 2) % SwathHeight));
2431 } else {
2432 MaxPartialSwath = (dml_uint_t)(dml_max(1, (dml_uint_t) (*VInitPreFill + SwathHeight - 2) % SwathHeight));
2433 }
2434 }
2435 numLines = *MaxNumSwath * SwathHeight + MaxPartialSwath;
2436
2437 #ifdef __DML_VBA_DEBUG__
2438 dml_print("DML::%s: vp_start_rot = %u\n", __func__, vp_start_rot);
2439 dml_print("DML::%s: VInitPreFill = %u\n", __func__, *VInitPreFill);
2440 dml_print("DML::%s: MaxPartialSwath = %u\n", __func__, MaxPartialSwath);
2441 dml_print("DML::%s: MaxNumSwath = %u\n", __func__, *MaxNumSwath);
2442 dml_print("DML::%s: Prefetch source lines = %3.2f\n", __func__, numLines);
2443 #endif
2444 return (dml_uint_t)(numLines);
2445
2446 } // CalculatePrefetchSourceLines
2447
CalculateVMAndRowBytes(dml_bool_t ViewportStationary,dml_bool_t DCCEnable,dml_uint_t NumberOfDPPs,dml_uint_t BlockHeight256Bytes,dml_uint_t BlockWidth256Bytes,enum dml_source_format_class SourcePixelFormat,dml_uint_t SurfaceTiling,dml_uint_t BytePerPixel,enum dml_rotation_angle SourceScan,dml_uint_t SwathWidth,dml_uint_t ViewportHeight,dml_uint_t ViewportXStart,dml_uint_t ViewportYStart,dml_bool_t GPUVMEnable,dml_uint_t GPUVMMaxPageTableLevels,dml_uint_t GPUVMMinPageSizeKBytes,dml_uint_t PTEBufferSizeInRequests,dml_uint_t Pitch,dml_uint_t DCCMetaPitch,dml_uint_t MacroTileWidth,dml_uint_t MacroTileHeight,dml_uint_t * MetaRowByte,dml_uint_t * PixelPTEBytesPerRow,dml_uint_t * PixelPTEBytesPerRowStorage,dml_uint_t * dpte_row_width_ub,dml_uint_t * dpte_row_height,dml_uint_t * dpte_row_height_linear,dml_uint_t * PixelPTEBytesPerRow_one_row_per_frame,dml_uint_t * dpte_row_width_ub_one_row_per_frame,dml_uint_t * dpte_row_height_one_row_per_frame,dml_uint_t * MetaRequestWidth,dml_uint_t * MetaRequestHeight,dml_uint_t * meta_row_width,dml_uint_t * meta_row_height,dml_uint_t * PixelPTEReqWidth,dml_uint_t * PixelPTEReqHeight,dml_uint_t * PTERequestSize,dml_uint_t * DPDE0BytesFrame,dml_uint_t * MetaPTEBytesFrame)2448 static dml_uint_t CalculateVMAndRowBytes(
2449 dml_bool_t ViewportStationary,
2450 dml_bool_t DCCEnable,
2451 dml_uint_t NumberOfDPPs,
2452 dml_uint_t BlockHeight256Bytes,
2453 dml_uint_t BlockWidth256Bytes,
2454 enum dml_source_format_class SourcePixelFormat,
2455 dml_uint_t SurfaceTiling,
2456 dml_uint_t BytePerPixel,
2457 enum dml_rotation_angle SourceScan,
2458 dml_uint_t SwathWidth,
2459 dml_uint_t ViewportHeight,
2460 dml_uint_t ViewportXStart,
2461 dml_uint_t ViewportYStart,
2462 dml_bool_t GPUVMEnable,
2463 dml_uint_t GPUVMMaxPageTableLevels,
2464 dml_uint_t GPUVMMinPageSizeKBytes,
2465 dml_uint_t PTEBufferSizeInRequests,
2466 dml_uint_t Pitch,
2467 dml_uint_t DCCMetaPitch,
2468 dml_uint_t MacroTileWidth,
2469 dml_uint_t MacroTileHeight,
2470
2471 // Output
2472 dml_uint_t *MetaRowByte,
2473 dml_uint_t *PixelPTEBytesPerRow, // for bandwidth calculation
2474 dml_uint_t *PixelPTEBytesPerRowStorage, // for PTE buffer size check
2475 dml_uint_t *dpte_row_width_ub,
2476 dml_uint_t *dpte_row_height,
2477 dml_uint_t *dpte_row_height_linear,
2478 dml_uint_t *PixelPTEBytesPerRow_one_row_per_frame,
2479 dml_uint_t *dpte_row_width_ub_one_row_per_frame,
2480 dml_uint_t *dpte_row_height_one_row_per_frame,
2481 dml_uint_t *MetaRequestWidth,
2482 dml_uint_t *MetaRequestHeight,
2483 dml_uint_t *meta_row_width,
2484 dml_uint_t *meta_row_height,
2485 dml_uint_t *PixelPTEReqWidth,
2486 dml_uint_t *PixelPTEReqHeight,
2487 dml_uint_t *PTERequestSize,
2488 dml_uint_t *DPDE0BytesFrame,
2489 dml_uint_t *MetaPTEBytesFrame)
2490 {
2491 dml_uint_t MPDEBytesFrame;
2492 dml_uint_t DCCMetaSurfaceBytes;
2493 dml_uint_t ExtraDPDEBytesFrame;
2494 dml_uint_t PDEAndMetaPTEBytesFrame;
2495 dml_uint_t MacroTileSizeBytes;
2496 dml_uint_t vp_height_meta_ub;
2497 dml_uint_t vp_height_dpte_ub;
2498
2499 dml_uint_t PixelPTEReqWidth_linear = 0; // VBA_DELTA. VBA doesn't calculate this
2500
2501 *MetaRequestHeight = 8 * BlockHeight256Bytes;
2502 *MetaRequestWidth = 8 * BlockWidth256Bytes;
2503 if (SurfaceTiling == dml_sw_linear) {
2504 *meta_row_height = 32;
2505 *meta_row_width = (dml_uint_t)(dml_floor(ViewportXStart + SwathWidth + *MetaRequestWidth - 1, *MetaRequestWidth) - dml_floor(ViewportXStart, *MetaRequestWidth));
2506 } else if (!dml_is_vertical_rotation(SourceScan)) {
2507 *meta_row_height = *MetaRequestHeight;
2508 if (ViewportStationary && NumberOfDPPs == 1) {
2509 *meta_row_width = (dml_uint_t)(dml_floor(ViewportXStart + SwathWidth + *MetaRequestWidth - 1, *MetaRequestWidth) - dml_floor(ViewportXStart, *MetaRequestWidth));
2510 } else {
2511 *meta_row_width = (dml_uint_t)(dml_ceil(SwathWidth - 1, *MetaRequestWidth) + *MetaRequestWidth);
2512 }
2513 *MetaRowByte = (dml_uint_t)(*meta_row_width * *MetaRequestHeight * BytePerPixel / 256.0);
2514 } else {
2515 *meta_row_height = *MetaRequestWidth;
2516 if (ViewportStationary && NumberOfDPPs == 1) {
2517 *meta_row_width = (dml_uint_t)(dml_floor(ViewportYStart + ViewportHeight + *MetaRequestHeight - 1, *MetaRequestHeight) - dml_floor(ViewportYStart, *MetaRequestHeight));
2518 } else {
2519 *meta_row_width = (dml_uint_t)(dml_ceil(SwathWidth - 1, *MetaRequestHeight) + *MetaRequestHeight);
2520 }
2521 *MetaRowByte = (dml_uint_t)(*meta_row_width * *MetaRequestWidth * BytePerPixel / 256.0);
2522 }
2523
2524 if (ViewportStationary && (NumberOfDPPs == 1 || !dml_is_vertical_rotation(SourceScan))) {
2525 vp_height_meta_ub = (dml_uint_t)(dml_floor(ViewportYStart + ViewportHeight + 64 * BlockHeight256Bytes - 1, 64 * BlockHeight256Bytes) - dml_floor(ViewportYStart, 64 * BlockHeight256Bytes));
2526 } else if (!dml_is_vertical_rotation(SourceScan)) {
2527 vp_height_meta_ub = (dml_uint_t)(dml_ceil(ViewportHeight - 1, 64 * BlockHeight256Bytes) + 64 * BlockHeight256Bytes);
2528 } else {
2529 vp_height_meta_ub = (dml_uint_t)(dml_ceil(SwathWidth - 1, 64 * BlockHeight256Bytes) + 64 * BlockHeight256Bytes);
2530 }
2531
2532 DCCMetaSurfaceBytes = (dml_uint_t)(DCCMetaPitch * vp_height_meta_ub * BytePerPixel / 256.0);
2533
2534 if (GPUVMEnable == true) {
2535 *MetaPTEBytesFrame = (dml_uint_t)((dml_ceil((dml_float_t) (DCCMetaSurfaceBytes - 4.0 * 1024.0) / (8 * 4.0 * 1024), 1) + 1) * 64);
2536 MPDEBytesFrame = 128 * (GPUVMMaxPageTableLevels - 1);
2537 } else {
2538 *MetaPTEBytesFrame = 0;
2539 MPDEBytesFrame = 0;
2540 }
2541
2542 if (DCCEnable != true) {
2543 *MetaPTEBytesFrame = 0;
2544 MPDEBytesFrame = 0;
2545 *MetaRowByte = 0;
2546 }
2547
2548 MacroTileSizeBytes = MacroTileWidth * BytePerPixel * MacroTileHeight;
2549
2550 if (ViewportStationary && (NumberOfDPPs == 1 || !dml_is_vertical_rotation(SourceScan))) {
2551 vp_height_dpte_ub = (dml_uint_t)(dml_floor(ViewportYStart + ViewportHeight + MacroTileHeight - 1, MacroTileHeight) - dml_floor(ViewportYStart, MacroTileHeight));
2552 } else if (!dml_is_vertical_rotation(SourceScan)) {
2553 vp_height_dpte_ub = (dml_uint_t)(dml_ceil(ViewportHeight - 1, MacroTileHeight) + MacroTileHeight);
2554 } else {
2555 vp_height_dpte_ub = (dml_uint_t)(dml_ceil(SwathWidth - 1, MacroTileHeight) + MacroTileHeight);
2556 }
2557
2558 if (GPUVMEnable == true && GPUVMMaxPageTableLevels > 1) {
2559 *DPDE0BytesFrame = (dml_uint_t)(64 * (dml_ceil((dml_float_t) (Pitch * vp_height_dpte_ub * BytePerPixel - MacroTileSizeBytes) / (dml_float_t) (8 * 2097152), 1) + 1));
2560 ExtraDPDEBytesFrame = 128 * (GPUVMMaxPageTableLevels - 2);
2561 } else {
2562 *DPDE0BytesFrame = 0;
2563 ExtraDPDEBytesFrame = 0;
2564 }
2565
2566 PDEAndMetaPTEBytesFrame = *MetaPTEBytesFrame + MPDEBytesFrame + *DPDE0BytesFrame + ExtraDPDEBytesFrame;
2567
2568 #ifdef __DML_VBA_DEBUG__
2569 dml_print("DML::%s: DCCEnable = %u\n", __func__, DCCEnable);
2570 dml_print("DML::%s: GPUVMEnable = %u\n", __func__, GPUVMEnable);
2571 dml_print("DML::%s: SwModeLinear = %u\n", __func__, SurfaceTiling == dml_sw_linear);
2572 dml_print("DML::%s: BytePerPixel = %u\n", __func__, BytePerPixel);
2573 dml_print("DML::%s: GPUVMMaxPageTableLevels = %u\n", __func__, GPUVMMaxPageTableLevels);
2574 dml_print("DML::%s: BlockHeight256Bytes = %u\n", __func__, BlockHeight256Bytes);
2575 dml_print("DML::%s: BlockWidth256Bytes = %u\n", __func__, BlockWidth256Bytes);
2576 dml_print("DML::%s: MacroTileHeight = %u\n", __func__, MacroTileHeight);
2577 dml_print("DML::%s: MacroTileWidth = %u\n", __func__, MacroTileWidth);
2578 dml_print("DML::%s: MetaPTEBytesFrame = %u\n", __func__, *MetaPTEBytesFrame);
2579 dml_print("DML::%s: MPDEBytesFrame = %u\n", __func__, MPDEBytesFrame);
2580 dml_print("DML::%s: DPDE0BytesFrame = %u\n", __func__, *DPDE0BytesFrame);
2581 dml_print("DML::%s: ExtraDPDEBytesFrame= %u\n", __func__, ExtraDPDEBytesFrame);
2582 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %u\n", __func__, PDEAndMetaPTEBytesFrame);
2583 dml_print("DML::%s: ViewportHeight = %u\n", __func__, ViewportHeight);
2584 dml_print("DML::%s: SwathWidth = %u\n", __func__, SwathWidth);
2585 dml_print("DML::%s: vp_height_dpte_ub = %u\n", __func__, vp_height_dpte_ub);
2586 #endif
2587
2588 if (SurfaceTiling == dml_sw_linear) {
2589 *PixelPTEReqHeight = 1;
2590 *PixelPTEReqWidth = GPUVMMinPageSizeKBytes * 1024 * 8 / BytePerPixel;
2591 PixelPTEReqWidth_linear = GPUVMMinPageSizeKBytes * 1024 * 8 / BytePerPixel;
2592 *PTERequestSize = 64;
2593 } else if (GPUVMMinPageSizeKBytes == 4) {
2594 *PixelPTEReqHeight = 16 * BlockHeight256Bytes;
2595 *PixelPTEReqWidth = 16 * BlockWidth256Bytes;
2596 *PTERequestSize = 128;
2597 } else {
2598 *PixelPTEReqHeight = MacroTileHeight;
2599 *PixelPTEReqWidth = 8 * 1024 * GPUVMMinPageSizeKBytes / (MacroTileHeight * BytePerPixel);
2600 *PTERequestSize = 64;
2601 }
2602 #ifdef __DML_VBA_DEBUG__
2603 dml_print("DML::%s: GPUVMMinPageSizeKBytes = %u\n", __func__, GPUVMMinPageSizeKBytes);
2604 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %u (after HostVM factor)\n", __func__, PDEAndMetaPTEBytesFrame);
2605 dml_print("DML::%s: PixelPTEReqHeight = %u\n", __func__, *PixelPTEReqHeight);
2606 dml_print("DML::%s: PixelPTEReqWidth = %u\n", __func__, *PixelPTEReqWidth);
2607 dml_print("DML::%s: PixelPTEReqWidth_linear = %u\n", __func__, PixelPTEReqWidth_linear);
2608 dml_print("DML::%s: PTERequestSize = %u\n", __func__, *PTERequestSize);
2609 dml_print("DML::%s: Pitch = %u\n", __func__, Pitch);
2610 #endif
2611
2612 *dpte_row_height_one_row_per_frame = vp_height_dpte_ub;
2613 *dpte_row_width_ub_one_row_per_frame = (dml_uint_t)((dml_ceil(((dml_float_t)Pitch * (dml_float_t) *dpte_row_height_one_row_per_frame / (dml_float_t) *PixelPTEReqHeight - 1) / (dml_float_t) *PixelPTEReqWidth, 1) + 1) * (dml_float_t) *PixelPTEReqWidth);
2614 *PixelPTEBytesPerRow_one_row_per_frame = (dml_uint_t)((dml_float_t) *dpte_row_width_ub_one_row_per_frame / (dml_float_t) *PixelPTEReqWidth * *PTERequestSize);
2615
2616 if (SurfaceTiling == dml_sw_linear) {
2617 *dpte_row_height = (dml_uint_t)(dml_min(128, 1 << (dml_uint_t) dml_floor(dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch), 1)));
2618 dml_print("DML::%s: dpte_row_height term 1 = %u\n", __func__, PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch);
2619 dml_print("DML::%s: dpte_row_height term 2 = %f\n", __func__, dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch));
2620 dml_print("DML::%s: dpte_row_height term 3 = %f\n", __func__, dml_floor(dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch), 1));
2621 dml_print("DML::%s: dpte_row_height term 4 = %u\n", __func__, 1 << (dml_uint_t) dml_floor(dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch), 1));
2622 dml_print("DML::%s: dpte_row_height = %u\n", __func__, *dpte_row_height);
2623
2624 *dpte_row_width_ub = (dml_uint_t)(dml_ceil(((dml_float_t) Pitch * (dml_float_t) *dpte_row_height - 1), (dml_float_t) *PixelPTEReqWidth) + *PixelPTEReqWidth);
2625 *PixelPTEBytesPerRow = (dml_uint_t)((dml_float_t) *dpte_row_width_ub / (dml_float_t) *PixelPTEReqWidth * *PTERequestSize);
2626
2627 // VBA_DELTA, VBA doesn't have programming value for pte row height linear.
2628 *dpte_row_height_linear = 1 << (dml_uint_t) dml_floor(dml_log2(PTEBufferSizeInRequests * PixelPTEReqWidth_linear / Pitch), 1);
2629 if (*dpte_row_height_linear > 128)
2630 *dpte_row_height_linear = 128;
2631
2632 #ifdef __DML_VBA_DEBUG__
2633 dml_print("DML::%s: dpte_row_width_ub = %u (linear)\n", __func__, *dpte_row_width_ub);
2634 #endif
2635
2636 } else if (!dml_is_vertical_rotation(SourceScan)) {
2637 *dpte_row_height = *PixelPTEReqHeight;
2638
2639 if (GPUVMMinPageSizeKBytes > 64) {
2640 *dpte_row_width_ub = (dml_uint_t)((dml_ceil(((dml_float_t) Pitch * (dml_float_t) *dpte_row_height / (dml_float_t) *PixelPTEReqHeight - 1) / (dml_float_t) *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth);
2641 } else if (ViewportStationary && (NumberOfDPPs == 1)) {
2642 *dpte_row_width_ub = (dml_uint_t)(dml_floor(ViewportXStart + SwathWidth + *PixelPTEReqWidth - 1, *PixelPTEReqWidth) - dml_floor(ViewportXStart, *PixelPTEReqWidth));
2643 } else {
2644 *dpte_row_width_ub = (dml_uint_t)((dml_ceil((dml_float_t) (SwathWidth - 1) / (dml_float_t)*PixelPTEReqWidth, 1) + 1.0) * *PixelPTEReqWidth);
2645 }
2646 #ifdef __DML_VBA_DEBUG__
2647 dml_print("DML::%s: dpte_row_width_ub = %u (tiled horz)\n", __func__, *dpte_row_width_ub);
2648 #endif
2649
2650 ASSERT(*PixelPTEReqWidth);
2651 if (*PixelPTEReqWidth != 0)
2652 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize;
2653 } else {
2654 *dpte_row_height = (dml_uint_t)(dml_min(*PixelPTEReqWidth, MacroTileWidth));
2655
2656 if (ViewportStationary && (NumberOfDPPs == 1)) {
2657 *dpte_row_width_ub = (dml_uint_t)(dml_floor(ViewportYStart + ViewportHeight + *PixelPTEReqHeight - 1, *PixelPTEReqHeight) - dml_floor(ViewportYStart, *PixelPTEReqHeight));
2658 } else {
2659 *dpte_row_width_ub = (dml_uint_t)((dml_ceil((dml_float_t) (SwathWidth - 1) / (dml_float_t) *PixelPTEReqHeight, 1) + 1) * *PixelPTEReqHeight);
2660 }
2661
2662 *PixelPTEBytesPerRow = (dml_uint_t)((dml_float_t) *dpte_row_width_ub / (dml_float_t) *PixelPTEReqHeight * *PTERequestSize);
2663 #ifdef __DML_VBA_DEBUG__
2664 dml_print("DML::%s: dpte_row_width_ub = %u (tiled vert)\n", __func__, *dpte_row_width_ub);
2665 #endif
2666 }
2667
2668 if (GPUVMEnable != true)
2669 *PixelPTEBytesPerRow = 0;
2670
2671 *PixelPTEBytesPerRowStorage = *PixelPTEBytesPerRow;
2672
2673 #ifdef __DML_VBA_DEBUG__
2674 dml_print("DML::%s: GPUVMMinPageSizeKBytes = %u\n", __func__, GPUVMMinPageSizeKBytes);
2675 dml_print("DML::%s: GPUVMEnable = %u\n", __func__, GPUVMEnable);
2676 dml_print("DML::%s: dpte_row_height = %u\n", __func__, *dpte_row_height);
2677 dml_print("DML::%s: dpte_row_height_linear = %u\n", __func__, *dpte_row_height_linear);
2678 dml_print("DML::%s: dpte_row_width_ub = %u\n", __func__, *dpte_row_width_ub);
2679 dml_print("DML::%s: PixelPTEBytesPerRow = %u\n", __func__, *PixelPTEBytesPerRow);
2680 dml_print("DML::%s: PixelPTEBytesPerRowStorage = %u\n", __func__, *PixelPTEBytesPerRowStorage);
2681 dml_print("DML::%s: PTEBufferSizeInRequests = %u\n", __func__, PTEBufferSizeInRequests);
2682 dml_print("DML::%s: dpte_row_height_one_row_per_frame = %u\n", __func__, *dpte_row_height_one_row_per_frame);
2683 dml_print("DML::%s: dpte_row_width_ub_one_row_per_frame = %u\n", __func__, *dpte_row_width_ub_one_row_per_frame);
2684 dml_print("DML::%s: PixelPTEBytesPerRow_one_row_per_frame = %u\n", __func__, *PixelPTEBytesPerRow_one_row_per_frame);
2685 #endif
2686
2687 dml_print("DML: vm_bytes = meta_pte_bytes_per_frame (per_pipe) = MetaPTEBytesFrame = : %i\n", *MetaPTEBytesFrame);
2688
2689 return PDEAndMetaPTEBytesFrame;
2690 } // CalculateVMAndRowBytes
2691
PixelClockAdjustmentForProgressiveToInterlaceUnit(struct dml_display_cfg_st * display_cfg,dml_bool_t ptoi_supported)2692 static void PixelClockAdjustmentForProgressiveToInterlaceUnit(struct dml_display_cfg_st *display_cfg, dml_bool_t ptoi_supported)
2693 {
2694 dml_uint_t num_active_planes = dml_get_num_active_planes(display_cfg);
2695
2696 //Progressive To Interlace Unit Effect
2697 for (dml_uint_t k = 0; k < num_active_planes; ++k) {
2698 display_cfg->output.PixelClockBackEnd[k] = display_cfg->timing.PixelClock[k];
2699 if (display_cfg->timing.Interlace[k] == 1 && ptoi_supported == true) {
2700 display_cfg->timing.PixelClock[k] = 2 * display_cfg->timing.PixelClock[k];
2701 }
2702 }
2703 }
2704
TruncToValidBPP(dml_float_t LinkBitRate,dml_uint_t Lanes,dml_uint_t HTotal,dml_uint_t HActive,dml_float_t PixelClock,dml_float_t DesiredBPP,dml_bool_t DSCEnable,enum dml_output_encoder_class Output,enum dml_output_format_class Format,dml_uint_t DSCInputBitPerComponent,dml_uint_t DSCSlices,dml_uint_t AudioRate,dml_uint_t AudioLayout,enum dml_odm_mode ODMModeNoDSC,enum dml_odm_mode ODMModeDSC,dml_uint_t * RequiredSlots)2705 static dml_float_t TruncToValidBPP(
2706 dml_float_t LinkBitRate,
2707 dml_uint_t Lanes,
2708 dml_uint_t HTotal,
2709 dml_uint_t HActive,
2710 dml_float_t PixelClock,
2711 dml_float_t DesiredBPP,
2712 dml_bool_t DSCEnable,
2713 enum dml_output_encoder_class Output,
2714 enum dml_output_format_class Format,
2715 dml_uint_t DSCInputBitPerComponent,
2716 dml_uint_t DSCSlices,
2717 dml_uint_t AudioRate,
2718 dml_uint_t AudioLayout,
2719 enum dml_odm_mode ODMModeNoDSC,
2720 enum dml_odm_mode ODMModeDSC,
2721
2722 // Output
2723 dml_uint_t *RequiredSlots)
2724 {
2725 dml_float_t MaxLinkBPP;
2726 dml_uint_t MinDSCBPP;
2727 dml_float_t MaxDSCBPP;
2728 dml_uint_t NonDSCBPP0;
2729 dml_uint_t NonDSCBPP1;
2730 dml_uint_t NonDSCBPP2;
2731
2732 if (Format == dml_420) {
2733 NonDSCBPP0 = 12;
2734 NonDSCBPP1 = 15;
2735 NonDSCBPP2 = 18;
2736 MinDSCBPP = 6;
2737 MaxDSCBPP = 1.5 * DSCInputBitPerComponent - 1.0 / 16;
2738 } else if (Format == dml_444) {
2739 NonDSCBPP0 = 24;
2740 NonDSCBPP1 = 30;
2741 NonDSCBPP2 = 36;
2742 MinDSCBPP = 8;
2743 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16;
2744 } else {
2745 if (Output == dml_hdmi) {
2746 NonDSCBPP0 = 24;
2747 NonDSCBPP1 = 24;
2748 NonDSCBPP2 = 24;
2749 } else {
2750 NonDSCBPP0 = 16;
2751 NonDSCBPP1 = 20;
2752 NonDSCBPP2 = 24;
2753 }
2754 if (Format == dml_n422) {
2755 MinDSCBPP = 7;
2756 MaxDSCBPP = 2 * DSCInputBitPerComponent - 1.0 / 16.0;
2757 } else {
2758 MinDSCBPP = 8;
2759 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16.0;
2760 }
2761 }
2762
2763 if (Output == dml_dp2p0) {
2764 MaxLinkBPP = LinkBitRate * Lanes / PixelClock * 128.0 / 132.0 * 383.0 / 384.0 * 65536.0 / 65540.0;
2765 } else if (DSCEnable && Output == dml_dp) {
2766 MaxLinkBPP = LinkBitRate / 10.0 * 8.0 * Lanes / PixelClock * (1 - 2.4 / 100);
2767 } else {
2768 MaxLinkBPP = LinkBitRate / 10.0 * 8.0 * Lanes / PixelClock;
2769 }
2770
2771 if (DSCEnable) {
2772 if (ODMModeDSC == dml_odm_mode_combine_4to1) {
2773 MaxLinkBPP = dml_min(MaxLinkBPP, 16);
2774 } else if (ODMModeDSC == dml_odm_mode_combine_2to1) {
2775 MaxLinkBPP = dml_min(MaxLinkBPP, 32);
2776 } else if (ODMModeDSC == dml_odm_mode_split_1to2) {
2777 MaxLinkBPP = 2 * MaxLinkBPP;
2778 }
2779 } else {
2780 if (ODMModeNoDSC == dml_odm_mode_combine_4to1) {
2781 MaxLinkBPP = dml_min(MaxLinkBPP, 16);
2782 } else if (ODMModeNoDSC == dml_odm_mode_combine_2to1) {
2783 MaxLinkBPP = dml_min(MaxLinkBPP, 32);
2784 } else if (ODMModeNoDSC == dml_odm_mode_split_1to2) {
2785 MaxLinkBPP = 2 * MaxLinkBPP;
2786 }
2787 }
2788
2789 *RequiredSlots = (dml_uint_t)(dml_ceil(DesiredBPP / MaxLinkBPP * 64, 1));
2790
2791 if (DesiredBPP == 0) {
2792 if (DSCEnable) {
2793 if (MaxLinkBPP < MinDSCBPP) {
2794 return __DML_DPP_INVALID__;
2795 } else if (MaxLinkBPP >= MaxDSCBPP) {
2796 return MaxDSCBPP;
2797 } else {
2798 return dml_floor(16.0 * MaxLinkBPP, 1.0) / 16.0;
2799 }
2800 } else {
2801 if (MaxLinkBPP >= NonDSCBPP2) {
2802 return NonDSCBPP2;
2803 } else if (MaxLinkBPP >= NonDSCBPP1) {
2804 return NonDSCBPP1;
2805 } else if (MaxLinkBPP >= NonDSCBPP0) {
2806 return NonDSCBPP0;
2807 } else {
2808 return __DML_DPP_INVALID__;
2809 }
2810 }
2811 } else {
2812 if (!((DSCEnable == false && (DesiredBPP == NonDSCBPP2 || DesiredBPP == NonDSCBPP1 || DesiredBPP == NonDSCBPP0)) ||
2813 (DSCEnable && DesiredBPP >= MinDSCBPP && DesiredBPP <= MaxDSCBPP))) {
2814 return __DML_DPP_INVALID__;
2815 } else {
2816 return DesiredBPP;
2817 }
2818 }
2819 } // TruncToValidBPP
2820
CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(struct display_mode_lib_scratch_st * scratch,struct CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params_st * p)2821 static void CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(
2822 struct display_mode_lib_scratch_st *scratch,
2823 struct CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params_st *p)
2824 {
2825 struct CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_locals_st *s = &scratch->CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_locals;
2826
2827 s->TotalActiveWriteback = 0;
2828 p->Watermark->UrgentWatermark = p->mmSOCParameters.UrgentLatency + p->mmSOCParameters.ExtraLatency;
2829 p->Watermark->USRRetrainingWatermark = p->mmSOCParameters.UrgentLatency + p->mmSOCParameters.ExtraLatency + p->mmSOCParameters.USRRetrainingLatency + p->mmSOCParameters.SMNLatency;
2830 p->Watermark->DRAMClockChangeWatermark = p->mmSOCParameters.DRAMClockChangeLatency + p->Watermark->UrgentWatermark;
2831 p->Watermark->FCLKChangeWatermark = p->mmSOCParameters.FCLKChangeLatency + p->Watermark->UrgentWatermark;
2832 p->Watermark->StutterExitWatermark = p->mmSOCParameters.SRExitTime + p->mmSOCParameters.ExtraLatency + 10 / p->DCFClkDeepSleep;
2833 p->Watermark->StutterEnterPlusExitWatermark = p->mmSOCParameters.SREnterPlusExitTime + p->mmSOCParameters.ExtraLatency + 10 / p->DCFClkDeepSleep;
2834 p->Watermark->Z8StutterExitWatermark = p->mmSOCParameters.SRExitZ8Time + p->mmSOCParameters.ExtraLatency + 10 / p->DCFClkDeepSleep;
2835 p->Watermark->Z8StutterEnterPlusExitWatermark = p->mmSOCParameters.SREnterPlusExitZ8Time + p->mmSOCParameters.ExtraLatency + 10 / p->DCFClkDeepSleep;
2836
2837 #ifdef __DML_VBA_DEBUG__
2838 dml_print("DML::%s: UrgentLatency = %f\n", __func__, p->mmSOCParameters.UrgentLatency);
2839 dml_print("DML::%s: ExtraLatency = %f\n", __func__, p->mmSOCParameters.ExtraLatency);
2840 dml_print("DML::%s: DRAMClockChangeLatency = %f\n", __func__, p->mmSOCParameters.DRAMClockChangeLatency);
2841 dml_print("DML::%s: UrgentWatermark = %f\n", __func__, p->Watermark->UrgentWatermark);
2842 dml_print("DML::%s: USRRetrainingWatermark = %f\n", __func__, p->Watermark->USRRetrainingWatermark);
2843 dml_print("DML::%s: DRAMClockChangeWatermark = %f\n", __func__, p->Watermark->DRAMClockChangeWatermark);
2844 dml_print("DML::%s: FCLKChangeWatermark = %f\n", __func__, p->Watermark->FCLKChangeWatermark);
2845 dml_print("DML::%s: StutterExitWatermark = %f\n", __func__, p->Watermark->StutterExitWatermark);
2846 dml_print("DML::%s: StutterEnterPlusExitWatermark = %f\n", __func__, p->Watermark->StutterEnterPlusExitWatermark);
2847 dml_print("DML::%s: Z8StutterExitWatermark = %f\n", __func__, p->Watermark->Z8StutterExitWatermark);
2848 dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, p->Watermark->Z8StutterEnterPlusExitWatermark);
2849 #endif
2850
2851 s->TotalActiveWriteback = 0;
2852 for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) {
2853 if (p->WritebackEnable[k] == true) {
2854 s->TotalActiveWriteback = s->TotalActiveWriteback + 1;
2855 }
2856 }
2857
2858 if (s->TotalActiveWriteback <= 1) {
2859 p->Watermark->WritebackUrgentWatermark = p->mmSOCParameters.WritebackLatency;
2860 } else {
2861 p->Watermark->WritebackUrgentWatermark = p->mmSOCParameters.WritebackLatency + p->WritebackChunkSize * 1024.0 / 32.0 / p->SOCCLK;
2862 }
2863 if (p->USRRetrainingRequiredFinal)
2864 p->Watermark->WritebackUrgentWatermark = p->Watermark->WritebackUrgentWatermark + p->mmSOCParameters.USRRetrainingLatency;
2865
2866 if (s->TotalActiveWriteback <= 1) {
2867 p->Watermark->WritebackDRAMClockChangeWatermark = p->mmSOCParameters.DRAMClockChangeLatency + p->mmSOCParameters.WritebackLatency;
2868 p->Watermark->WritebackFCLKChangeWatermark = p->mmSOCParameters.FCLKChangeLatency + p->mmSOCParameters.WritebackLatency;
2869 } else {
2870 p->Watermark->WritebackDRAMClockChangeWatermark = p->mmSOCParameters.DRAMClockChangeLatency + p->mmSOCParameters.WritebackLatency + p->WritebackChunkSize * 1024.0 / 32.0 / p->SOCCLK;
2871 p->Watermark->WritebackFCLKChangeWatermark = p->mmSOCParameters.FCLKChangeLatency + p->mmSOCParameters.WritebackLatency + p->WritebackChunkSize * 1024 / 32 / p->SOCCLK;
2872 }
2873
2874 if (p->USRRetrainingRequiredFinal)
2875 p->Watermark->WritebackDRAMClockChangeWatermark = p->Watermark->WritebackDRAMClockChangeWatermark + p->mmSOCParameters.USRRetrainingLatency;
2876
2877 if (p->USRRetrainingRequiredFinal)
2878 p->Watermark->WritebackFCLKChangeWatermark = p->Watermark->WritebackFCLKChangeWatermark + p->mmSOCParameters.USRRetrainingLatency;
2879
2880 #ifdef __DML_VBA_DEBUG__
2881 dml_print("DML::%s: WritebackDRAMClockChangeWatermark = %f\n", __func__, p->Watermark->WritebackDRAMClockChangeWatermark);
2882 dml_print("DML::%s: WritebackFCLKChangeWatermark = %f\n", __func__, p->Watermark->WritebackFCLKChangeWatermark);
2883 dml_print("DML::%s: WritebackUrgentWatermark = %f\n", __func__, p->Watermark->WritebackUrgentWatermark);
2884 dml_print("DML::%s: USRRetrainingRequiredFinal = %u\n", __func__, p->USRRetrainingRequiredFinal);
2885 dml_print("DML::%s: USRRetrainingLatency = %f\n", __func__, p->mmSOCParameters.USRRetrainingLatency);
2886 #endif
2887
2888 s->TotalPixelBW = 0.0;
2889 for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) {
2890 s->TotalPixelBW = s->TotalPixelBW + p->DPPPerSurface[k]
2891 * (p->SwathWidthY[k] * p->BytePerPixelDETY[k] * p->VRatio[k] + p->SwathWidthC[k] * p->BytePerPixelDETC[k] * p->VRatioChroma[k]) / (p->HTotal[k] / p->PixelClock[k]);
2892 }
2893
2894 for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) {
2895
2896 s->LBLatencyHidingSourceLinesY[k] = (dml_uint_t)(dml_min((dml_float_t)p->MaxLineBufferLines, dml_floor((dml_float_t)p->LineBufferSize / (dml_float_t)p->LBBitPerPixel[k] / ((dml_float_t)p->SwathWidthY[k] / dml_max(p->HRatio[k], 1.0)), 1)) - (p->VTaps[k] - 1));
2897 s->LBLatencyHidingSourceLinesC[k] = (dml_uint_t)(dml_min((dml_float_t)p->MaxLineBufferLines, dml_floor((dml_float_t)p->LineBufferSize / (dml_float_t)p->LBBitPerPixel[k] / ((dml_float_t)p->SwathWidthC[k] / dml_max(p->HRatioChroma[k], 1.0)), 1)) - (p->VTapsChroma[k] - 1));
2898
2899
2900 #ifdef __DML_VBA_DEBUG__
2901 dml_print("DML::%s: k=%u, MaxLineBufferLines = %u\n", __func__, k, p->MaxLineBufferLines);
2902 dml_print("DML::%s: k=%u, LineBufferSize = %u\n", __func__, k, p->LineBufferSize);
2903 dml_print("DML::%s: k=%u, LBBitPerPixel = %u\n", __func__, k, p->LBBitPerPixel[k]);
2904 dml_print("DML::%s: k=%u, HRatio = %f\n", __func__, k, p->HRatio[k]);
2905 dml_print("DML::%s: k=%u, VTaps = %u\n", __func__, k, p->VTaps[k]);
2906 #endif
2907
2908 s->EffectiveLBLatencyHidingY = s->LBLatencyHidingSourceLinesY[k] / p->VRatio[k] * (p->HTotal[k] / p->PixelClock[k]);
2909 s->EffectiveLBLatencyHidingC = s->LBLatencyHidingSourceLinesC[k] / p->VRatioChroma[k] * (p->HTotal[k] / p->PixelClock[k]);
2910
2911 s->EffectiveDETBufferSizeY = p->DETBufferSizeY[k];
2912 if (p->UnboundedRequestEnabled) {
2913 s->EffectiveDETBufferSizeY = s->EffectiveDETBufferSizeY + p->CompressedBufferSizeInkByte * 1024 * (p->SwathWidthY[k] * p->BytePerPixelDETY[k] * p->VRatio[k]) / (p->HTotal[k] / p->PixelClock[k]) / s->TotalPixelBW;
2914 }
2915
2916 s->LinesInDETY[k] = (dml_float_t)s->EffectiveDETBufferSizeY / p->BytePerPixelDETY[k] / p->SwathWidthY[k];
2917 s->LinesInDETYRoundedDownToSwath[k] = (dml_uint_t)(dml_floor(s->LinesInDETY[k], p->SwathHeightY[k]));
2918 s->FullDETBufferingTimeY = s->LinesInDETYRoundedDownToSwath[k] * (p->HTotal[k] / p->PixelClock[k]) / p->VRatio[k];
2919
2920 s->ActiveClockChangeLatencyHidingY = s->EffectiveLBLatencyHidingY + s->FullDETBufferingTimeY - ((dml_float_t)p->DSTXAfterScaler[k] / (dml_float_t)p->HTotal[k] + (dml_float_t)p->DSTYAfterScaler[k]) * (dml_float_t)p->HTotal[k] / p->PixelClock[k];
2921
2922 if (p->NumberOfActiveSurfaces > 1) {
2923 s->ActiveClockChangeLatencyHidingY = s->ActiveClockChangeLatencyHidingY - (1.0 - 1.0 / (dml_float_t)p->NumberOfActiveSurfaces) * (dml_float_t)p->SwathHeightY[k] * (dml_float_t)p->HTotal[k] / p->PixelClock[k] / p->VRatio[k];
2924 }
2925
2926 if (p->BytePerPixelDETC[k] > 0) {
2927 s->LinesInDETC[k] = p->DETBufferSizeC[k] / p->BytePerPixelDETC[k] / p->SwathWidthC[k];
2928 s->LinesInDETCRoundedDownToSwath[k] = (dml_uint_t)(dml_floor(s->LinesInDETC[k], p->SwathHeightC[k]));
2929 s->FullDETBufferingTimeC = s->LinesInDETCRoundedDownToSwath[k] * (p->HTotal[k] / p->PixelClock[k]) / p->VRatioChroma[k];
2930 s->ActiveClockChangeLatencyHidingC = s->EffectiveLBLatencyHidingC + s->FullDETBufferingTimeC - ((dml_float_t)p->DSTXAfterScaler[k] / (dml_float_t)p->HTotal[k] + (dml_float_t)p->DSTYAfterScaler[k]) * (dml_float_t)p->HTotal[k] / p->PixelClock[k];
2931 if (p->NumberOfActiveSurfaces > 1) {
2932 s->ActiveClockChangeLatencyHidingC = s->ActiveClockChangeLatencyHidingC - (1.0 - 1.0 / (dml_float_t)p->NumberOfActiveSurfaces) * (dml_float_t)p->SwathHeightC[k] * (dml_float_t)p->HTotal[k] / p->PixelClock[k] / p->VRatioChroma[k];
2933 }
2934 s->ActiveClockChangeLatencyHiding = dml_min(s->ActiveClockChangeLatencyHidingY, s->ActiveClockChangeLatencyHidingC);
2935 } else {
2936 s->ActiveClockChangeLatencyHiding = s->ActiveClockChangeLatencyHidingY;
2937 }
2938
2939 s->ActiveDRAMClockChangeLatencyMargin[k] = s->ActiveClockChangeLatencyHiding - p->Watermark->UrgentWatermark - p->Watermark->DRAMClockChangeWatermark;
2940 s->ActiveFCLKChangeLatencyMargin[k] = s->ActiveClockChangeLatencyHiding - p->Watermark->UrgentWatermark - p->Watermark->FCLKChangeWatermark;
2941 s->USRRetrainingLatencyMargin[k] = s->ActiveClockChangeLatencyHiding - p->Watermark->USRRetrainingWatermark;
2942
2943 if (p->WritebackEnable[k]) {
2944 s->WritebackLatencyHiding = (dml_float_t)p->WritebackInterfaceBufferSize * 1024.0 / ((dml_float_t)p->WritebackDestinationWidth[k] * (dml_float_t)p->WritebackDestinationHeight[k] / ((dml_float_t)p->WritebackSourceHeight[k] * (dml_float_t)p->HTotal[k] / p->PixelClock[k]) * 4.0);
2945 if (p->WritebackPixelFormat[k] == dml_444_64) {
2946 s->WritebackLatencyHiding = s->WritebackLatencyHiding / 2;
2947 }
2948 s->WritebackDRAMClockChangeLatencyMargin = s->WritebackLatencyHiding - p->Watermark->WritebackDRAMClockChangeWatermark;
2949
2950 s->WritebackFCLKChangeLatencyMargin = s->WritebackLatencyHiding - p->Watermark->WritebackFCLKChangeWatermark;
2951
2952 s->ActiveDRAMClockChangeLatencyMargin[k] = dml_min(s->ActiveDRAMClockChangeLatencyMargin[k], s->WritebackFCLKChangeLatencyMargin);
2953 s->ActiveFCLKChangeLatencyMargin[k] = dml_min(s->ActiveFCLKChangeLatencyMargin[k], s->WritebackDRAMClockChangeLatencyMargin);
2954 }
2955 p->MaxActiveDRAMClockChangeLatencySupported[k] = (p->UseMALLForPStateChange[k] == dml_use_mall_pstate_change_phantom_pipe) ? 0 : (s->ActiveDRAMClockChangeLatencyMargin[k] + p->mmSOCParameters.DRAMClockChangeLatency);
2956 p->ActiveDRAMClockChangeLatencyMargin[k] = s->ActiveDRAMClockChangeLatencyMargin[k];
2957 }
2958
2959 *p->USRRetrainingSupport = true;
2960 for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) {
2961 if ((p->UseMALLForPStateChange[k] != dml_use_mall_pstate_change_phantom_pipe) && (s->USRRetrainingLatencyMargin[k] < 0)) {
2962 *p->USRRetrainingSupport = false;
2963 }
2964 }
2965
2966 s->FoundCriticalSurface = false;
2967 for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) {
2968 if ((p->UseMALLForPStateChange[k] != dml_use_mall_pstate_change_phantom_pipe) && ((!s->FoundCriticalSurface)
2969 || ((s->ActiveFCLKChangeLatencyMargin[k] + p->mmSOCParameters.FCLKChangeLatency) < *p->MaxActiveFCLKChangeLatencySupported))) {
2970 s->FoundCriticalSurface = true;
2971 *p->MaxActiveFCLKChangeLatencySupported = s->ActiveFCLKChangeLatencyMargin[k] + p->mmSOCParameters.FCLKChangeLatency;
2972 }
2973 }
2974
2975 for (dml_uint_t i = 0; i < p->NumberOfActiveSurfaces; ++i) {
2976 for (dml_uint_t j = 0; j < p->NumberOfActiveSurfaces; ++j) {
2977 if (i == j ||
2978 (p->BlendingAndTiming[i] == i && p->BlendingAndTiming[j] == i) ||
2979 (p->BlendingAndTiming[j] == j && p->BlendingAndTiming[i] == j) ||
2980 (p->BlendingAndTiming[i] == p->BlendingAndTiming[j] && p->BlendingAndTiming[i] != i) ||
2981 (p->SynchronizeTimingsFinal && p->PixelClock[i] == p->PixelClock[j] && p->HTotal[i] == p->HTotal[j] && p->VTotal[i] == p->VTotal[j] && p->VActive[i] == p->VActive[j]) ||
2982 (p->SynchronizeDRRDisplaysForUCLKPStateChangeFinal && (p->DRRDisplay[i] || p->DRRDisplay[j]))) {
2983 s->SynchronizedSurfaces[i][j] = true;
2984 } else {
2985 s->SynchronizedSurfaces[i][j] = false;
2986 }
2987 }
2988 }
2989
2990 s->FCLKChangeSupportNumber = 0;
2991 for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) {
2992 if ((p->UseMALLForPStateChange[k] != dml_use_mall_pstate_change_phantom_pipe) && (s->ActiveFCLKChangeLatencyMargin[k] < 0)) {
2993 if (!(p->PrefetchMode[k] <= 1)) {
2994 s->FCLKChangeSupportNumber = 3;
2995 } else if (s->FCLKChangeSupportNumber == 0) {
2996 s->FCLKChangeSupportNumber = ((p->SynchronizeDRRDisplaysForUCLKPStateChangeFinal && p->DRRDisplay[k]) ? 2 : 1);
2997 s->LastSurfaceWithoutMargin = k;
2998 } else if (((s->FCLKChangeSupportNumber == 1) && (p->DRRDisplay[k] || (!s->SynchronizedSurfaces[s->LastSurfaceWithoutMargin][k]))) || (s->FCLKChangeSupportNumber == 2))
2999 s->FCLKChangeSupportNumber = 3;
3000 }
3001 }
3002
3003 if (s->FCLKChangeSupportNumber == 0) {
3004 *p->FCLKChangeSupport = dml_fclock_change_vactive;
3005 } else if ((s->FCLKChangeSupportNumber == 1) || (s->FCLKChangeSupportNumber == 2)) {
3006 *p->FCLKChangeSupport = dml_fclock_change_vblank;
3007 } else {
3008 *p->FCLKChangeSupport = dml_fclock_change_unsupported;
3009 }
3010
3011 s->DRAMClockChangeMethod = 0;
3012 for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) {
3013 if (p->UseMALLForPStateChange[k] == dml_use_mall_pstate_change_full_frame)
3014 s->DRAMClockChangeMethod = 1;
3015 else if (p->UseMALLForPStateChange[k] == dml_use_mall_pstate_change_sub_viewport)
3016 s->DRAMClockChangeMethod = 2;
3017 }
3018
3019 s->DRAMClockChangeSupportNumber = 0;
3020 for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) {
3021 if (((s->DRAMClockChangeMethod == 0) && (s->ActiveDRAMClockChangeLatencyMargin[k] < 0)) ||
3022 ((s->DRAMClockChangeMethod == 1) && (p->UseMALLForPStateChange[k] != dml_use_mall_pstate_change_full_frame)) ||
3023 ((s->DRAMClockChangeMethod == 2) && (p->UseMALLForPStateChange[k] != dml_use_mall_pstate_change_sub_viewport) && (p->UseMALLForPStateChange[k] != dml_use_mall_pstate_change_phantom_pipe))) {
3024 if (p->PrefetchMode[k] != 0) { // Don't need to support DRAM clock change, PrefetchMode 0 means needs DRAM clock change support
3025 s->DRAMClockChangeSupportNumber = 3;
3026 } else if (s->DRAMClockChangeSupportNumber == 0) {
3027 s->DRAMClockChangeSupportNumber = (p->SynchronizeDRRDisplaysForUCLKPStateChangeFinal && p->DRRDisplay[k]) ? 2 : 1;
3028 s->LastSurfaceWithoutMargin = k;
3029 } else if (((s->DRAMClockChangeSupportNumber == 1) && (p->DRRDisplay[k] || !s->SynchronizedSurfaces[s->LastSurfaceWithoutMargin][k])) || (s->DRAMClockChangeSupportNumber == 2)) {
3030 s->DRAMClockChangeSupportNumber = 3;
3031 }
3032 }
3033 }
3034
3035 if (s->DRAMClockChangeMethod == 0) { // No MALL usage
3036 if (s->DRAMClockChangeSupportNumber == 0) {
3037 *p->DRAMClockChangeSupport = dml_dram_clock_change_vactive;
3038 } else if (s->DRAMClockChangeSupportNumber == 1) {
3039 *p->DRAMClockChangeSupport = dml_dram_clock_change_vblank;
3040 } else if (s->DRAMClockChangeSupportNumber == 2) {
3041 *p->DRAMClockChangeSupport = dml_dram_clock_change_vblank_drr;
3042 } else {
3043 *p->DRAMClockChangeSupport = dml_dram_clock_change_unsupported;
3044 }
3045 } else if (s->DRAMClockChangeMethod == 1) { // Any pipe using MALL full frame
3046 if (s->DRAMClockChangeSupportNumber == 0) {
3047 *p->DRAMClockChangeSupport = dml_dram_clock_change_vactive_w_mall_full_frame;
3048 } else if (s->DRAMClockChangeSupportNumber == 1) {
3049 *p->DRAMClockChangeSupport = dml_dram_clock_change_vblank_w_mall_full_frame;
3050 } else if (s->DRAMClockChangeSupportNumber == 2) {
3051 *p->DRAMClockChangeSupport = dml_dram_clock_change_vblank_drr_w_mall_full_frame;
3052 } else {
3053 *p->DRAMClockChangeSupport = dml_dram_clock_change_unsupported;
3054 }
3055 } else { // Any pipe using MALL subviewport
3056 if (s->DRAMClockChangeSupportNumber == 0) {
3057 *p->DRAMClockChangeSupport = dml_dram_clock_change_vactive_w_mall_sub_vp;
3058 } else if (s->DRAMClockChangeSupportNumber == 1) {
3059 *p->DRAMClockChangeSupport = dml_dram_clock_change_vblank_w_mall_sub_vp;
3060 } else if (s->DRAMClockChangeSupportNumber == 2) {
3061 *p->DRAMClockChangeSupport = dml_dram_clock_change_vblank_drr_w_mall_sub_vp;
3062 } else {
3063 *p->DRAMClockChangeSupport = dml_dram_clock_change_unsupported;
3064 }
3065 }
3066
3067 for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) {
3068 s->dst_y_pstate = (dml_uint_t)(dml_ceil((p->mmSOCParameters.DRAMClockChangeLatency + p->mmSOCParameters.UrgentLatency) / (p->HTotal[k] / p->PixelClock[k]), 1));
3069 s->src_y_pstate_l = (dml_uint_t)(dml_ceil(s->dst_y_pstate * p->VRatio[k], p->SwathHeightY[k]));
3070 s->src_y_ahead_l = (dml_uint_t)(dml_floor(p->DETBufferSizeY[k] / p->BytePerPixelDETY[k] / p->SwathWidthY[k], p->SwathHeightY[k]) + s->LBLatencyHidingSourceLinesY[k]);
3071 s->sub_vp_lines_l = s->src_y_pstate_l + s->src_y_ahead_l + p->meta_row_height[k];
3072
3073 #ifdef __DML_VBA_DEBUG__
3074 dml_print("DML::%s: k=%u, DETBufferSizeY = %u\n", __func__, k, p->DETBufferSizeY[k]);
3075 dml_print("DML::%s: k=%u, BytePerPixelDETY = %f\n", __func__, k, p->BytePerPixelDETY[k]);
3076 dml_print("DML::%s: k=%u, SwathWidthY = %u\n", __func__, k, p->SwathWidthY[k]);
3077 dml_print("DML::%s: k=%u, SwathHeightY = %u\n", __func__, k, p->SwathHeightY[k]);
3078 dml_print("DML::%s: k=%u, LBLatencyHidingSourceLinesY = %u\n", __func__, k, s->LBLatencyHidingSourceLinesY[k]);
3079 dml_print("DML::%s: k=%u, dst_y_pstate = %u\n", __func__, k, s->dst_y_pstate);
3080 dml_print("DML::%s: k=%u, src_y_pstate_l = %u\n", __func__, k, s->src_y_pstate_l);
3081 dml_print("DML::%s: k=%u, src_y_ahead_l = %u\n", __func__, k, s->src_y_ahead_l);
3082 dml_print("DML::%s: k=%u, meta_row_height = %u\n", __func__, k, p->meta_row_height[k]);
3083 dml_print("DML::%s: k=%u, sub_vp_lines_l = %u\n", __func__, k, s->sub_vp_lines_l);
3084 #endif
3085 p->SubViewportLinesNeededInMALL[k] = s->sub_vp_lines_l;
3086
3087 if (p->BytePerPixelDETC[k] > 0) {
3088 s->src_y_pstate_c = (dml_uint_t)(dml_ceil(s->dst_y_pstate * p->VRatioChroma[k], p->SwathHeightC[k]));
3089 s->src_y_ahead_c = (dml_uint_t)(dml_floor(p->DETBufferSizeC[k] / p->BytePerPixelDETC[k] / p->SwathWidthC[k], p->SwathHeightC[k]) + s->LBLatencyHidingSourceLinesC[k]);
3090 s->sub_vp_lines_c = s->src_y_pstate_c + s->src_y_ahead_c + p->meta_row_height_chroma[k];
3091 p->SubViewportLinesNeededInMALL[k] = (dml_uint_t)(dml_max(s->sub_vp_lines_l, s->sub_vp_lines_c));
3092
3093 #ifdef __DML_VBA_DEBUG__
3094 dml_print("DML::%s: k=%u, src_y_pstate_c = %u\n", __func__, k, s->src_y_pstate_c);
3095 dml_print("DML::%s: k=%u, src_y_ahead_c = %u\n", __func__, k, s->src_y_ahead_c);
3096 dml_print("DML::%s: k=%u, meta_row_height_chroma = %u\n", __func__, k, p->meta_row_height_chroma[k]);
3097 dml_print("DML::%s: k=%u, sub_vp_lines_c = %u\n", __func__, k, s->sub_vp_lines_c);
3098 #endif
3099 }
3100 }
3101
3102 #ifdef __DML_VBA_DEBUG__
3103 dml_print("DML::%s: DRAMClockChangeSupport = %u\n", __func__, *p->DRAMClockChangeSupport);
3104 dml_print("DML::%s: FCLKChangeSupport = %u\n", __func__, *p->FCLKChangeSupport);
3105 dml_print("DML::%s: MaxActiveFCLKChangeLatencySupported = %f\n", __func__, *p->MaxActiveFCLKChangeLatencySupported);
3106 dml_print("DML::%s: USRRetrainingSupport = %u\n", __func__, *p->USRRetrainingSupport);
3107 #endif
3108 } // CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport
3109
CalculateDCFCLKDeepSleep(dml_uint_t NumberOfActiveSurfaces,dml_uint_t BytePerPixelY[],dml_uint_t BytePerPixelC[],dml_float_t VRatio[],dml_float_t VRatioChroma[],dml_uint_t SwathWidthY[],dml_uint_t SwathWidthC[],dml_uint_t DPPPerSurface[],dml_float_t HRatio[],dml_float_t HRatioChroma[],dml_float_t PixelClock[],dml_float_t PSCL_THROUGHPUT[],dml_float_t PSCL_THROUGHPUT_CHROMA[],dml_float_t Dppclk[],dml_float_t ReadBandwidthLuma[],dml_float_t ReadBandwidthChroma[],dml_uint_t ReturnBusWidth,dml_float_t * DCFClkDeepSleep)3110 static void CalculateDCFCLKDeepSleep(
3111 dml_uint_t NumberOfActiveSurfaces,
3112 dml_uint_t BytePerPixelY[],
3113 dml_uint_t BytePerPixelC[],
3114 dml_float_t VRatio[],
3115 dml_float_t VRatioChroma[],
3116 dml_uint_t SwathWidthY[],
3117 dml_uint_t SwathWidthC[],
3118 dml_uint_t DPPPerSurface[],
3119 dml_float_t HRatio[],
3120 dml_float_t HRatioChroma[],
3121 dml_float_t PixelClock[],
3122 dml_float_t PSCL_THROUGHPUT[],
3123 dml_float_t PSCL_THROUGHPUT_CHROMA[],
3124 dml_float_t Dppclk[],
3125 dml_float_t ReadBandwidthLuma[],
3126 dml_float_t ReadBandwidthChroma[],
3127 dml_uint_t ReturnBusWidth,
3128
3129 // Output
3130 dml_float_t *DCFClkDeepSleep)
3131 {
3132 dml_float_t DisplayPipeLineDeliveryTimeLuma;
3133 dml_float_t DisplayPipeLineDeliveryTimeChroma;
3134 dml_float_t DCFClkDeepSleepPerSurface[__DML_NUM_PLANES__];
3135 dml_float_t ReadBandwidth = 0.0;
3136
3137 for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
3138
3139 if (VRatio[k] <= 1) {
3140 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] * DPPPerSurface[k] / HRatio[k] / PixelClock[k];
3141 } else {
3142 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] / PSCL_THROUGHPUT[k] / Dppclk[k];
3143 }
3144 if (BytePerPixelC[k] == 0) {
3145 DisplayPipeLineDeliveryTimeChroma = 0;
3146 } else {
3147 if (VRatioChroma[k] <= 1) {
3148 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] * DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k];
3149 } else {
3150 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k];
3151 }
3152 }
3153
3154 if (BytePerPixelC[k] > 0) {
3155 DCFClkDeepSleepPerSurface[k] = dml_max(__DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] / 32.0 / DisplayPipeLineDeliveryTimeLuma,
3156 __DML_MIN_DCFCLK_FACTOR__ * SwathWidthC[k] * BytePerPixelC[k] / 32.0 / DisplayPipeLineDeliveryTimeChroma);
3157 } else {
3158 DCFClkDeepSleepPerSurface[k] = __DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] / 64.0 / DisplayPipeLineDeliveryTimeLuma;
3159 }
3160 DCFClkDeepSleepPerSurface[k] = dml_max(DCFClkDeepSleepPerSurface[k], PixelClock[k] / 16);
3161
3162 #ifdef __DML_VBA_DEBUG__
3163 dml_print("DML::%s: k=%u, PixelClock = %f\n", __func__, k, PixelClock[k]);
3164 dml_print("DML::%s: k=%u, DCFClkDeepSleepPerSurface = %f\n", __func__, k, DCFClkDeepSleepPerSurface[k]);
3165 #endif
3166 }
3167
3168 for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
3169 ReadBandwidth = ReadBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k];
3170 }
3171
3172 *DCFClkDeepSleep = dml_max(8.0, __DML_MIN_DCFCLK_FACTOR__ * ReadBandwidth / (dml_float_t) ReturnBusWidth);
3173
3174 #ifdef __DML_VBA_DEBUG__
3175 dml_print("DML::%s: __DML_MIN_DCFCLK_FACTOR__ = %f\n", __func__, __DML_MIN_DCFCLK_FACTOR__);
3176 dml_print("DML::%s: ReadBandwidth = %f\n", __func__, ReadBandwidth);
3177 dml_print("DML::%s: ReturnBusWidth = %u\n", __func__, ReturnBusWidth);
3178 dml_print("DML::%s: DCFClkDeepSleep = %f\n", __func__, *DCFClkDeepSleep);
3179 #endif
3180
3181 for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
3182 *DCFClkDeepSleep = dml_max(*DCFClkDeepSleep, DCFClkDeepSleepPerSurface[k]);
3183 }
3184 dml_print("DML::%s: DCFClkDeepSleep = %f (final)\n", __func__, *DCFClkDeepSleep);
3185 } // CalculateDCFCLKDeepSleep
3186
CalculateUrgentBurstFactor(enum dml_use_mall_for_pstate_change_mode UseMALLForPStateChange,dml_uint_t swath_width_luma_ub,dml_uint_t swath_width_chroma_ub,dml_uint_t SwathHeightY,dml_uint_t SwathHeightC,dml_float_t LineTime,dml_float_t UrgentLatency,dml_float_t CursorBufferSize,dml_uint_t CursorWidth,dml_uint_t CursorBPP,dml_float_t VRatio,dml_float_t VRatioC,dml_float_t BytePerPixelInDETY,dml_float_t BytePerPixelInDETC,dml_uint_t DETBufferSizeY,dml_uint_t DETBufferSizeC,dml_float_t * UrgentBurstFactorCursor,dml_float_t * UrgentBurstFactorLuma,dml_float_t * UrgentBurstFactorChroma,dml_bool_t * NotEnoughUrgentLatencyHiding)3187 static void CalculateUrgentBurstFactor(
3188 enum dml_use_mall_for_pstate_change_mode UseMALLForPStateChange,
3189 dml_uint_t swath_width_luma_ub,
3190 dml_uint_t swath_width_chroma_ub,
3191 dml_uint_t SwathHeightY,
3192 dml_uint_t SwathHeightC,
3193 dml_float_t LineTime,
3194 dml_float_t UrgentLatency,
3195 dml_float_t CursorBufferSize,
3196 dml_uint_t CursorWidth,
3197 dml_uint_t CursorBPP,
3198 dml_float_t VRatio,
3199 dml_float_t VRatioC,
3200 dml_float_t BytePerPixelInDETY,
3201 dml_float_t BytePerPixelInDETC,
3202 dml_uint_t DETBufferSizeY,
3203 dml_uint_t DETBufferSizeC,
3204 // Output
3205 dml_float_t *UrgentBurstFactorCursor,
3206 dml_float_t *UrgentBurstFactorLuma,
3207 dml_float_t *UrgentBurstFactorChroma,
3208 dml_bool_t *NotEnoughUrgentLatencyHiding)
3209 {
3210 dml_float_t LinesInDETLuma;
3211 dml_float_t LinesInDETChroma;
3212 dml_uint_t LinesInCursorBuffer;
3213 dml_float_t CursorBufferSizeInTime;
3214 dml_float_t DETBufferSizeInTimeLuma;
3215 dml_float_t DETBufferSizeInTimeChroma;
3216
3217 *NotEnoughUrgentLatencyHiding = 0;
3218
3219 if (CursorWidth > 0) {
3220 LinesInCursorBuffer = 1 << (dml_uint_t) dml_floor(dml_log2(CursorBufferSize * 1024.0 / (CursorWidth * CursorBPP / 8.0)), 1.0);
3221 if (VRatio > 0) {
3222 CursorBufferSizeInTime = LinesInCursorBuffer * LineTime / VRatio;
3223 if (CursorBufferSizeInTime - UrgentLatency <= 0) {
3224 *NotEnoughUrgentLatencyHiding = 1;
3225 *UrgentBurstFactorCursor = 0;
3226 } else {
3227 *UrgentBurstFactorCursor = CursorBufferSizeInTime / (CursorBufferSizeInTime - UrgentLatency);
3228 }
3229 } else {
3230 *UrgentBurstFactorCursor = 1;
3231 }
3232 }
3233
3234 LinesInDETLuma = (UseMALLForPStateChange == dml_use_mall_pstate_change_phantom_pipe ? 1024*1024 : DETBufferSizeY) / BytePerPixelInDETY / swath_width_luma_ub;
3235
3236 if (VRatio > 0) {
3237 DETBufferSizeInTimeLuma = dml_floor(LinesInDETLuma, SwathHeightY) * LineTime / VRatio;
3238 if (DETBufferSizeInTimeLuma - UrgentLatency <= 0) {
3239 *NotEnoughUrgentLatencyHiding = 1;
3240 *UrgentBurstFactorLuma = 0;
3241 } else {
3242 *UrgentBurstFactorLuma = DETBufferSizeInTimeLuma / (DETBufferSizeInTimeLuma - UrgentLatency);
3243 }
3244 } else {
3245 *UrgentBurstFactorLuma = 1;
3246 }
3247
3248 if (BytePerPixelInDETC > 0) {
3249 LinesInDETChroma = (UseMALLForPStateChange == dml_use_mall_pstate_change_phantom_pipe ? 1024*1024 : DETBufferSizeC) / BytePerPixelInDETC / swath_width_chroma_ub;
3250
3251 if (VRatioC > 0) {
3252 DETBufferSizeInTimeChroma = dml_floor(LinesInDETChroma, SwathHeightC) * LineTime / VRatioC;
3253 if (DETBufferSizeInTimeChroma - UrgentLatency <= 0) {
3254 *NotEnoughUrgentLatencyHiding = 1;
3255 *UrgentBurstFactorChroma = 0;
3256 } else {
3257 *UrgentBurstFactorChroma = DETBufferSizeInTimeChroma / (DETBufferSizeInTimeChroma - UrgentLatency);
3258 }
3259 } else {
3260 *UrgentBurstFactorChroma = 1;
3261 }
3262 }
3263 } // CalculateUrgentBurstFactor
3264
CalculatePixelDeliveryTimes(dml_uint_t NumberOfActiveSurfaces,dml_float_t VRatio[],dml_float_t VRatioChroma[],dml_float_t VRatioPrefetchY[],dml_float_t VRatioPrefetchC[],dml_uint_t swath_width_luma_ub[],dml_uint_t swath_width_chroma_ub[],dml_uint_t DPPPerSurface[],dml_float_t HRatio[],dml_float_t HRatioChroma[],dml_float_t PixelClock[],dml_float_t PSCL_THROUGHPUT[],dml_float_t PSCL_THROUGHPUT_CHROMA[],dml_float_t Dppclk[],dml_uint_t BytePerPixelC[],enum dml_rotation_angle SourceScan[],dml_uint_t NumberOfCursors[],dml_uint_t CursorWidth[],dml_uint_t CursorBPP[],dml_uint_t BlockWidth256BytesY[],dml_uint_t BlockHeight256BytesY[],dml_uint_t BlockWidth256BytesC[],dml_uint_t BlockHeight256BytesC[],dml_float_t DisplayPipeLineDeliveryTimeLuma[],dml_float_t DisplayPipeLineDeliveryTimeChroma[],dml_float_t DisplayPipeLineDeliveryTimeLumaPrefetch[],dml_float_t DisplayPipeLineDeliveryTimeChromaPrefetch[],dml_float_t DisplayPipeRequestDeliveryTimeLuma[],dml_float_t DisplayPipeRequestDeliveryTimeChroma[],dml_float_t DisplayPipeRequestDeliveryTimeLumaPrefetch[],dml_float_t DisplayPipeRequestDeliveryTimeChromaPrefetch[],dml_float_t CursorRequestDeliveryTime[],dml_float_t CursorRequestDeliveryTimePrefetch[])3265 static void CalculatePixelDeliveryTimes(
3266 dml_uint_t NumberOfActiveSurfaces,
3267 dml_float_t VRatio[],
3268 dml_float_t VRatioChroma[],
3269 dml_float_t VRatioPrefetchY[],
3270 dml_float_t VRatioPrefetchC[],
3271 dml_uint_t swath_width_luma_ub[],
3272 dml_uint_t swath_width_chroma_ub[],
3273 dml_uint_t DPPPerSurface[],
3274 dml_float_t HRatio[],
3275 dml_float_t HRatioChroma[],
3276 dml_float_t PixelClock[],
3277 dml_float_t PSCL_THROUGHPUT[],
3278 dml_float_t PSCL_THROUGHPUT_CHROMA[],
3279 dml_float_t Dppclk[],
3280 dml_uint_t BytePerPixelC[],
3281 enum dml_rotation_angle SourceScan[],
3282 dml_uint_t NumberOfCursors[],
3283 dml_uint_t CursorWidth[],
3284 dml_uint_t CursorBPP[],
3285 dml_uint_t BlockWidth256BytesY[],
3286 dml_uint_t BlockHeight256BytesY[],
3287 dml_uint_t BlockWidth256BytesC[],
3288 dml_uint_t BlockHeight256BytesC[],
3289
3290 // Output
3291 dml_float_t DisplayPipeLineDeliveryTimeLuma[],
3292 dml_float_t DisplayPipeLineDeliveryTimeChroma[],
3293 dml_float_t DisplayPipeLineDeliveryTimeLumaPrefetch[],
3294 dml_float_t DisplayPipeLineDeliveryTimeChromaPrefetch[],
3295 dml_float_t DisplayPipeRequestDeliveryTimeLuma[],
3296 dml_float_t DisplayPipeRequestDeliveryTimeChroma[],
3297 dml_float_t DisplayPipeRequestDeliveryTimeLumaPrefetch[],
3298 dml_float_t DisplayPipeRequestDeliveryTimeChromaPrefetch[],
3299 dml_float_t CursorRequestDeliveryTime[],
3300 dml_float_t CursorRequestDeliveryTimePrefetch[])
3301 {
3302 dml_float_t req_per_swath_ub;
3303
3304 for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
3305
3306 #ifdef __DML_VBA_DEBUG__
3307 dml_print("DML::%s: k=%u : HRatio = %f\n", __func__, k, HRatio[k]);
3308 dml_print("DML::%s: k=%u : VRatio = %f\n", __func__, k, VRatio[k]);
3309 dml_print("DML::%s: k=%u : HRatioChroma = %f\n", __func__, k, HRatioChroma[k]);
3310 dml_print("DML::%s: k=%u : VRatioChroma = %f\n", __func__, k, VRatioChroma[k]);
3311 dml_print("DML::%s: k=%u : swath_width_luma_ub = %u\n", __func__, k, swath_width_luma_ub[k]);
3312 dml_print("DML::%s: k=%u : swath_width_chroma_ub = %u\n", __func__, k, swath_width_chroma_ub[k]);
3313 dml_print("DML::%s: k=%u : PSCL_THROUGHPUT = %f\n", __func__, k, PSCL_THROUGHPUT[k]);
3314 dml_print("DML::%s: k=%u : PSCL_THROUGHPUT_CHROMA = %f\n", __func__, k, PSCL_THROUGHPUT_CHROMA[k]);
3315 dml_print("DML::%s: k=%u : DPPPerSurface = %u\n", __func__, k, DPPPerSurface[k]);
3316 dml_print("DML::%s: k=%u : PixelClock = %f\n", __func__, k, PixelClock[k]);
3317 dml_print("DML::%s: k=%u : Dppclk = %f\n", __func__, k, Dppclk[k]);
3318 #endif
3319
3320 if (VRatio[k] <= 1) {
3321 DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] * DPPPerSurface[k] / HRatio[k] / PixelClock[k];
3322 } else {
3323 DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / Dppclk[k];
3324 }
3325
3326 if (BytePerPixelC[k] == 0) {
3327 DisplayPipeLineDeliveryTimeChroma[k] = 0;
3328 } else {
3329 if (VRatioChroma[k] <= 1) {
3330 DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] * DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k];
3331 } else {
3332 DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k];
3333 }
3334 }
3335
3336 if (VRatioPrefetchY[k] <= 1) {
3337 DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] * DPPPerSurface[k] / HRatio[k] / PixelClock[k];
3338 } else {
3339 DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / Dppclk[k];
3340 }
3341
3342 if (BytePerPixelC[k] == 0) {
3343 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 0;
3344 } else {
3345 if (VRatioPrefetchC[k] <= 1) {
3346 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] * DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k];
3347 } else {
3348 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k];
3349 }
3350 }
3351 #ifdef __DML_VBA_DEBUG__
3352 dml_print("DML::%s: k=%u : DisplayPipeLineDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLuma[k]);
3353 dml_print("DML::%s: k=%u : DisplayPipeLineDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLumaPrefetch[k]);
3354 dml_print("DML::%s: k=%u : DisplayPipeLineDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChroma[k]);
3355 dml_print("DML::%s: k=%u : DisplayPipeLineDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChromaPrefetch[k]);
3356 #endif
3357 }
3358
3359 for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
3360 if (!dml_is_vertical_rotation(SourceScan[k])) {
3361 req_per_swath_ub = swath_width_luma_ub[k] / BlockWidth256BytesY[k];
3362 } else {
3363 req_per_swath_ub = swath_width_luma_ub[k] / BlockHeight256BytesY[k];
3364 }
3365 #ifdef __DML_VBA_DEBUG__
3366 dml_print("DML::%s: k=%u : req_per_swath_ub = %f (Luma)\n", __func__, k, req_per_swath_ub);
3367 #endif
3368
3369 DisplayPipeRequestDeliveryTimeLuma[k] = DisplayPipeLineDeliveryTimeLuma[k] / req_per_swath_ub;
3370 DisplayPipeRequestDeliveryTimeLumaPrefetch[k] = DisplayPipeLineDeliveryTimeLumaPrefetch[k] / req_per_swath_ub;
3371 if (BytePerPixelC[k] == 0) {
3372 DisplayPipeRequestDeliveryTimeChroma[k] = 0;
3373 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = 0;
3374 } else {
3375 if (!dml_is_vertical_rotation(SourceScan[k])) {
3376 req_per_swath_ub = swath_width_chroma_ub[k] / BlockWidth256BytesC[k];
3377 } else {
3378 req_per_swath_ub = swath_width_chroma_ub[k] / BlockHeight256BytesC[k];
3379 }
3380 #ifdef __DML_VBA_DEBUG__
3381 dml_print("DML::%s: k=%u : req_per_swath_ub = %f (Chroma)\n", __func__, k, req_per_swath_ub);
3382 #endif
3383 DisplayPipeRequestDeliveryTimeChroma[k] = DisplayPipeLineDeliveryTimeChroma[k] / req_per_swath_ub;
3384 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = DisplayPipeLineDeliveryTimeChromaPrefetch[k] / req_per_swath_ub;
3385 }
3386 #ifdef __DML_VBA_DEBUG__
3387 dml_print("DML::%s: k=%u : DisplayPipeRequestDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLuma[k]);
3388 dml_print("DML::%s: k=%u : DisplayPipeRequestDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLumaPrefetch[k]);
3389 dml_print("DML::%s: k=%u : DisplayPipeRequestDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChroma[k]);
3390 dml_print("DML::%s: k=%u : DisplayPipeRequestDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChromaPrefetch[k]);
3391 #endif
3392 }
3393
3394 for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
3395 dml_uint_t cursor_req_per_width;
3396 cursor_req_per_width = (dml_uint_t)(dml_ceil((dml_float_t) CursorWidth[k] * (dml_float_t) CursorBPP[k] / 256.0 / 8.0, 1.0));
3397 if (NumberOfCursors[k] > 0) {
3398 if (VRatio[k] <= 1) {
3399 CursorRequestDeliveryTime[k] = (dml_float_t) CursorWidth[k] / HRatio[k] / PixelClock[k] / cursor_req_per_width;
3400 } else {
3401 CursorRequestDeliveryTime[k] = (dml_float_t) CursorWidth[k] / PSCL_THROUGHPUT[k] / Dppclk[k] / cursor_req_per_width;
3402 }
3403 if (VRatioPrefetchY[k] <= 1) {
3404 CursorRequestDeliveryTimePrefetch[k] = (dml_float_t) CursorWidth[k] / HRatio[k] / PixelClock[k] / cursor_req_per_width;
3405 } else {
3406 CursorRequestDeliveryTimePrefetch[k] = (dml_float_t) CursorWidth[k] / PSCL_THROUGHPUT[k] / Dppclk[k] / cursor_req_per_width;
3407 }
3408 } else {
3409 CursorRequestDeliveryTime[k] = 0;
3410 CursorRequestDeliveryTimePrefetch[k] = 0;
3411 }
3412 #ifdef __DML_VBA_DEBUG__
3413 dml_print("DML::%s: k=%u : NumberOfCursors = %u\n", __func__, k, NumberOfCursors[k]);
3414 dml_print("DML::%s: k=%u : CursorRequestDeliveryTime = %f\n", __func__, k, CursorRequestDeliveryTime[k]);
3415 dml_print("DML::%s: k=%u : CursorRequestDeliveryTimePrefetch = %f\n", __func__, k, CursorRequestDeliveryTimePrefetch[k]);
3416 #endif
3417 }
3418 } // CalculatePixelDeliveryTimes
3419
CalculateMetaAndPTETimes(dml_bool_t use_one_row_for_frame[],dml_uint_t NumberOfActiveSurfaces,dml_bool_t GPUVMEnable,dml_uint_t MetaChunkSize,dml_uint_t MinMetaChunkSizeBytes,dml_uint_t HTotal[],dml_float_t VRatio[],dml_float_t VRatioChroma[],dml_float_t DestinationLinesToRequestRowInVBlank[],dml_float_t DestinationLinesToRequestRowInImmediateFlip[],dml_bool_t DCCEnable[],dml_float_t PixelClock[],dml_uint_t BytePerPixelY[],dml_uint_t BytePerPixelC[],enum dml_rotation_angle SourceScan[],dml_uint_t dpte_row_height[],dml_uint_t dpte_row_height_chroma[],dml_uint_t meta_row_width[],dml_uint_t meta_row_width_chroma[],dml_uint_t meta_row_height[],dml_uint_t meta_row_height_chroma[],dml_uint_t meta_req_width[],dml_uint_t meta_req_width_chroma[],dml_uint_t meta_req_height[],dml_uint_t meta_req_height_chroma[],dml_uint_t dpte_group_bytes[],dml_uint_t PTERequestSizeY[],dml_uint_t PTERequestSizeC[],dml_uint_t PixelPTEReqWidthY[],dml_uint_t PixelPTEReqHeightY[],dml_uint_t PixelPTEReqWidthC[],dml_uint_t PixelPTEReqHeightC[],dml_uint_t dpte_row_width_luma_ub[],dml_uint_t dpte_row_width_chroma_ub[],dml_float_t DST_Y_PER_PTE_ROW_NOM_L[],dml_float_t DST_Y_PER_PTE_ROW_NOM_C[],dml_float_t DST_Y_PER_META_ROW_NOM_L[],dml_float_t DST_Y_PER_META_ROW_NOM_C[],dml_float_t TimePerMetaChunkNominal[],dml_float_t TimePerChromaMetaChunkNominal[],dml_float_t TimePerMetaChunkVBlank[],dml_float_t TimePerChromaMetaChunkVBlank[],dml_float_t TimePerMetaChunkFlip[],dml_float_t TimePerChromaMetaChunkFlip[],dml_float_t time_per_pte_group_nom_luma[],dml_float_t time_per_pte_group_vblank_luma[],dml_float_t time_per_pte_group_flip_luma[],dml_float_t time_per_pte_group_nom_chroma[],dml_float_t time_per_pte_group_vblank_chroma[],dml_float_t time_per_pte_group_flip_chroma[])3420 static void CalculateMetaAndPTETimes(
3421 dml_bool_t use_one_row_for_frame[],
3422 dml_uint_t NumberOfActiveSurfaces,
3423 dml_bool_t GPUVMEnable,
3424 dml_uint_t MetaChunkSize,
3425 dml_uint_t MinMetaChunkSizeBytes,
3426 dml_uint_t HTotal[],
3427 dml_float_t VRatio[],
3428 dml_float_t VRatioChroma[],
3429 dml_float_t DestinationLinesToRequestRowInVBlank[],
3430 dml_float_t DestinationLinesToRequestRowInImmediateFlip[],
3431 dml_bool_t DCCEnable[],
3432 dml_float_t PixelClock[],
3433 dml_uint_t BytePerPixelY[],
3434 dml_uint_t BytePerPixelC[],
3435 enum dml_rotation_angle SourceScan[],
3436 dml_uint_t dpte_row_height[],
3437 dml_uint_t dpte_row_height_chroma[],
3438 dml_uint_t meta_row_width[],
3439 dml_uint_t meta_row_width_chroma[],
3440 dml_uint_t meta_row_height[],
3441 dml_uint_t meta_row_height_chroma[],
3442 dml_uint_t meta_req_width[],
3443 dml_uint_t meta_req_width_chroma[],
3444 dml_uint_t meta_req_height[],
3445 dml_uint_t meta_req_height_chroma[],
3446 dml_uint_t dpte_group_bytes[],
3447 dml_uint_t PTERequestSizeY[],
3448 dml_uint_t PTERequestSizeC[],
3449 dml_uint_t PixelPTEReqWidthY[],
3450 dml_uint_t PixelPTEReqHeightY[],
3451 dml_uint_t PixelPTEReqWidthC[],
3452 dml_uint_t PixelPTEReqHeightC[],
3453 dml_uint_t dpte_row_width_luma_ub[],
3454 dml_uint_t dpte_row_width_chroma_ub[],
3455
3456 // Output
3457 dml_float_t DST_Y_PER_PTE_ROW_NOM_L[],
3458 dml_float_t DST_Y_PER_PTE_ROW_NOM_C[],
3459 dml_float_t DST_Y_PER_META_ROW_NOM_L[],
3460 dml_float_t DST_Y_PER_META_ROW_NOM_C[],
3461 dml_float_t TimePerMetaChunkNominal[],
3462 dml_float_t TimePerChromaMetaChunkNominal[],
3463 dml_float_t TimePerMetaChunkVBlank[],
3464 dml_float_t TimePerChromaMetaChunkVBlank[],
3465 dml_float_t TimePerMetaChunkFlip[],
3466 dml_float_t TimePerChromaMetaChunkFlip[],
3467 dml_float_t time_per_pte_group_nom_luma[],
3468 dml_float_t time_per_pte_group_vblank_luma[],
3469 dml_float_t time_per_pte_group_flip_luma[],
3470 dml_float_t time_per_pte_group_nom_chroma[],
3471 dml_float_t time_per_pte_group_vblank_chroma[],
3472 dml_float_t time_per_pte_group_flip_chroma[])
3473 {
3474 dml_uint_t meta_chunk_width;
3475 dml_uint_t min_meta_chunk_width;
3476 dml_uint_t meta_chunk_per_row_int;
3477 dml_uint_t meta_row_remainder;
3478 dml_uint_t meta_chunk_threshold;
3479 dml_uint_t meta_chunks_per_row_ub;
3480 dml_uint_t meta_chunk_width_chroma;
3481 dml_uint_t min_meta_chunk_width_chroma;
3482 dml_uint_t meta_chunk_per_row_int_chroma;
3483 dml_uint_t meta_row_remainder_chroma;
3484 dml_uint_t meta_chunk_threshold_chroma;
3485 dml_uint_t meta_chunks_per_row_ub_chroma;
3486 dml_uint_t dpte_group_width_luma;
3487 dml_uint_t dpte_groups_per_row_luma_ub;
3488 dml_uint_t dpte_group_width_chroma;
3489 dml_uint_t dpte_groups_per_row_chroma_ub;
3490
3491 for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
3492 DST_Y_PER_PTE_ROW_NOM_L[k] = dpte_row_height[k] / VRatio[k];
3493 if (BytePerPixelC[k] == 0) {
3494 DST_Y_PER_PTE_ROW_NOM_C[k] = 0;
3495 } else {
3496 DST_Y_PER_PTE_ROW_NOM_C[k] = dpte_row_height_chroma[k] / VRatioChroma[k];
3497 }
3498 DST_Y_PER_META_ROW_NOM_L[k] = meta_row_height[k] / VRatio[k];
3499 if (BytePerPixelC[k] == 0) {
3500 DST_Y_PER_META_ROW_NOM_C[k] = 0;
3501 } else {
3502 DST_Y_PER_META_ROW_NOM_C[k] = meta_row_height_chroma[k] / VRatioChroma[k];
3503 }
3504 }
3505
3506 for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
3507 if (DCCEnable[k] == true) {
3508 meta_chunk_width = MetaChunkSize * 1024 * 256 / BytePerPixelY[k] / meta_row_height[k];
3509 min_meta_chunk_width = MinMetaChunkSizeBytes * 256 / BytePerPixelY[k] / meta_row_height[k];
3510 meta_chunk_per_row_int = meta_row_width[k] / meta_chunk_width;
3511 meta_row_remainder = meta_row_width[k] % meta_chunk_width;
3512 if (!dml_is_vertical_rotation(SourceScan[k])) {
3513 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_width[k];
3514 } else {
3515 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_height[k];
3516 }
3517 if (meta_row_remainder <= meta_chunk_threshold) {
3518 meta_chunks_per_row_ub = meta_chunk_per_row_int + 1;
3519 } else {
3520 meta_chunks_per_row_ub = meta_chunk_per_row_int + 2;
3521 }
3522 TimePerMetaChunkNominal[k] = meta_row_height[k] / VRatio[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
3523 TimePerMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
3524 TimePerMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
3525 if (BytePerPixelC[k] == 0) {
3526 TimePerChromaMetaChunkNominal[k] = 0;
3527 TimePerChromaMetaChunkVBlank[k] = 0;
3528 TimePerChromaMetaChunkFlip[k] = 0;
3529 } else {
3530 meta_chunk_width_chroma = MetaChunkSize * 1024 * 256 / BytePerPixelC[k] / meta_row_height_chroma[k];
3531 min_meta_chunk_width_chroma = MinMetaChunkSizeBytes * 256 / BytePerPixelC[k] / meta_row_height_chroma[k];
3532 meta_chunk_per_row_int_chroma = (dml_uint_t)((dml_float_t) meta_row_width_chroma[k] / meta_chunk_width_chroma);
3533 meta_row_remainder_chroma = meta_row_width_chroma[k] % meta_chunk_width_chroma;
3534 if (!dml_is_vertical_rotation(SourceScan[k])) {
3535 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - meta_req_width_chroma[k];
3536 } else {
3537 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - meta_req_height_chroma[k];
3538 }
3539 if (meta_row_remainder_chroma <= meta_chunk_threshold_chroma) {
3540 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 1;
3541 } else {
3542 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 2;
3543 }
3544 TimePerChromaMetaChunkNominal[k] = meta_row_height_chroma[k] / VRatioChroma[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
3545 TimePerChromaMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
3546 TimePerChromaMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
3547 }
3548 } else {
3549 TimePerMetaChunkNominal[k] = 0;
3550 TimePerMetaChunkVBlank[k] = 0;
3551 TimePerMetaChunkFlip[k] = 0;
3552 TimePerChromaMetaChunkNominal[k] = 0;
3553 TimePerChromaMetaChunkVBlank[k] = 0;
3554 TimePerChromaMetaChunkFlip[k] = 0;
3555 }
3556 }
3557
3558 for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
3559 if (GPUVMEnable == true) {
3560 if (!dml_is_vertical_rotation(SourceScan[k])) {
3561 dpte_group_width_luma = (dml_uint_t)((dml_float_t) dpte_group_bytes[k] / (dml_float_t) PTERequestSizeY[k] * PixelPTEReqWidthY[k]);
3562 } else {
3563 dpte_group_width_luma = (dml_uint_t)((dml_float_t) dpte_group_bytes[k] / (dml_float_t) PTERequestSizeY[k] * PixelPTEReqHeightY[k]);
3564 }
3565
3566 if (use_one_row_for_frame[k]) {
3567 dpte_groups_per_row_luma_ub = (dml_uint_t)(dml_ceil((dml_float_t) dpte_row_width_luma_ub[k] / (dml_float_t) dpte_group_width_luma / 2.0, 1.0));
3568 } else {
3569 dpte_groups_per_row_luma_ub = (dml_uint_t)(dml_ceil((dml_float_t) dpte_row_width_luma_ub[k] / (dml_float_t) dpte_group_width_luma, 1.0));
3570 }
3571
3572 dml_print("DML::%s: k=%u, use_one_row_for_frame = %u\n", __func__, k, use_one_row_for_frame[k]);
3573 dml_print("DML::%s: k=%u, dpte_group_bytes = %u\n", __func__, k, dpte_group_bytes[k]);
3574 dml_print("DML::%s: k=%u, PTERequestSizeY = %u\n", __func__, k, PTERequestSizeY[k]);
3575 dml_print("DML::%s: k=%u, PixelPTEReqWidthY = %u\n", __func__, k, PixelPTEReqWidthY[k]);
3576 dml_print("DML::%s: k=%u, PixelPTEReqHeightY = %u\n", __func__, k, PixelPTEReqHeightY[k]);
3577 dml_print("DML::%s: k=%u, dpte_row_width_luma_ub = %u\n", __func__, k, dpte_row_width_luma_ub[k]);
3578 dml_print("DML::%s: k=%u, dpte_group_width_luma = %u\n", __func__, k, dpte_group_width_luma);
3579 dml_print("DML::%s: k=%u, dpte_groups_per_row_luma_ub = %u\n", __func__, k, dpte_groups_per_row_luma_ub);
3580
3581 time_per_pte_group_nom_luma[k] = DST_Y_PER_PTE_ROW_NOM_L[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
3582 time_per_pte_group_vblank_luma[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
3583 time_per_pte_group_flip_luma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
3584 if (BytePerPixelC[k] == 0) {
3585 time_per_pte_group_nom_chroma[k] = 0;
3586 time_per_pte_group_vblank_chroma[k] = 0;
3587 time_per_pte_group_flip_chroma[k] = 0;
3588 } else {
3589 if (!dml_is_vertical_rotation(SourceScan[k])) {
3590 dpte_group_width_chroma = (dml_uint_t)((dml_float_t) dpte_group_bytes[k] / (dml_float_t) PTERequestSizeC[k] * PixelPTEReqWidthC[k]);
3591 } else {
3592 dpte_group_width_chroma = (dml_uint_t)((dml_float_t) dpte_group_bytes[k] / (dml_float_t) PTERequestSizeC[k] * PixelPTEReqHeightC[k]);
3593 }
3594
3595 if (use_one_row_for_frame[k]) {
3596 dpte_groups_per_row_chroma_ub = (dml_uint_t)(dml_ceil((dml_float_t) dpte_row_width_chroma_ub[k] / (dml_float_t) dpte_group_width_chroma / 2.0, 1.0));
3597 } else {
3598 dpte_groups_per_row_chroma_ub = (dml_uint_t)(dml_ceil((dml_float_t) dpte_row_width_chroma_ub[k] / (dml_float_t) dpte_group_width_chroma, 1.0));
3599 }
3600 dml_print("DML::%s: k=%u, dpte_row_width_chroma_ub = %u\n", __func__, k, dpte_row_width_chroma_ub[k]);
3601 dml_print("DML::%s: k=%u, dpte_group_width_chroma = %u\n", __func__, k, dpte_group_width_chroma);
3602 dml_print("DML::%s: k=%u, dpte_groups_per_row_chroma_ub = %u\n", __func__, k, dpte_groups_per_row_chroma_ub);
3603
3604 time_per_pte_group_nom_chroma[k] = DST_Y_PER_PTE_ROW_NOM_C[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
3605 time_per_pte_group_vblank_chroma[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
3606 time_per_pte_group_flip_chroma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
3607 }
3608 } else {
3609 time_per_pte_group_nom_luma[k] = 0;
3610 time_per_pte_group_vblank_luma[k] = 0;
3611 time_per_pte_group_flip_luma[k] = 0;
3612 time_per_pte_group_nom_chroma[k] = 0;
3613 time_per_pte_group_vblank_chroma[k] = 0;
3614 time_per_pte_group_flip_chroma[k] = 0;
3615 }
3616 #ifdef __DML_VBA_DEBUG__
3617 dml_print("DML::%s: k=%u, DestinationLinesToRequestRowInVBlank = %f\n", __func__, k, DestinationLinesToRequestRowInVBlank[k]);
3618 dml_print("DML::%s: k=%u, DestinationLinesToRequestRowInImmediateFlip = %f\n", __func__, k, DestinationLinesToRequestRowInImmediateFlip[k]);
3619
3620 dml_print("DML::%s: k=%u, DST_Y_PER_PTE_ROW_NOM_L = %f\n", __func__, k, DST_Y_PER_PTE_ROW_NOM_L[k]);
3621 dml_print("DML::%s: k=%u, DST_Y_PER_PTE_ROW_NOM_C = %f\n", __func__, k, DST_Y_PER_PTE_ROW_NOM_C[k]);
3622 dml_print("DML::%s: k=%u, DST_Y_PER_META_ROW_NOM_L = %f\n", __func__, k, DST_Y_PER_META_ROW_NOM_L[k]);
3623 dml_print("DML::%s: k=%u, DST_Y_PER_META_ROW_NOM_C = %f\n", __func__, k, DST_Y_PER_META_ROW_NOM_C[k]);
3624 dml_print("DML::%s: k=%u, TimePerMetaChunkNominal = %f\n", __func__, k, TimePerMetaChunkNominal[k]);
3625 dml_print("DML::%s: k=%u, TimePerMetaChunkVBlank = %f\n", __func__, k, TimePerMetaChunkVBlank[k]);
3626 dml_print("DML::%s: k=%u, TimePerMetaChunkFlip = %f\n", __func__, k, TimePerMetaChunkFlip[k]);
3627 dml_print("DML::%s: k=%u, TimePerChromaMetaChunkNominal = %f\n", __func__, k, TimePerChromaMetaChunkNominal[k]);
3628 dml_print("DML::%s: k=%u, TimePerChromaMetaChunkVBlank = %f\n", __func__, k, TimePerChromaMetaChunkVBlank[k]);
3629 dml_print("DML::%s: k=%u, TimePerChromaMetaChunkFlip = %f\n", __func__, k, TimePerChromaMetaChunkFlip[k]);
3630 dml_print("DML::%s: k=%u, time_per_pte_group_nom_luma = %f\n", __func__, k, time_per_pte_group_nom_luma[k]);
3631 dml_print("DML::%s: k=%u, time_per_pte_group_vblank_luma = %f\n", __func__, k, time_per_pte_group_vblank_luma[k]);
3632 dml_print("DML::%s: k=%u, time_per_pte_group_flip_luma = %f\n", __func__, k, time_per_pte_group_flip_luma[k]);
3633 dml_print("DML::%s: k=%u, time_per_pte_group_nom_chroma = %f\n", __func__, k, time_per_pte_group_nom_chroma[k]);
3634 dml_print("DML::%s: k=%u, time_per_pte_group_vblank_chroma = %f\n", __func__, k, time_per_pte_group_vblank_chroma[k]);
3635 dml_print("DML::%s: k=%u, time_per_pte_group_flip_chroma = %f\n", __func__, k, time_per_pte_group_flip_chroma[k]);
3636 #endif
3637 }
3638 } // CalculateMetaAndPTETimes
3639
CalculateVMGroupAndRequestTimes(dml_uint_t NumberOfActiveSurfaces,dml_bool_t GPUVMEnable,dml_uint_t GPUVMMaxPageTableLevels,dml_uint_t HTotal[],dml_uint_t BytePerPixelC[],dml_float_t DestinationLinesToRequestVMInVBlank[],dml_float_t DestinationLinesToRequestVMInImmediateFlip[],dml_bool_t DCCEnable[],dml_float_t PixelClock[],dml_uint_t dpte_row_width_luma_ub[],dml_uint_t dpte_row_width_chroma_ub[],dml_uint_t vm_group_bytes[],dml_uint_t dpde0_bytes_per_frame_ub_l[],dml_uint_t dpde0_bytes_per_frame_ub_c[],dml_uint_t meta_pte_bytes_per_frame_ub_l[],dml_uint_t meta_pte_bytes_per_frame_ub_c[],dml_float_t TimePerVMGroupVBlank[],dml_float_t TimePerVMGroupFlip[],dml_float_t TimePerVMRequestVBlank[],dml_float_t TimePerVMRequestFlip[])3640 static void CalculateVMGroupAndRequestTimes(
3641 dml_uint_t NumberOfActiveSurfaces,
3642 dml_bool_t GPUVMEnable,
3643 dml_uint_t GPUVMMaxPageTableLevels,
3644 dml_uint_t HTotal[],
3645 dml_uint_t BytePerPixelC[],
3646 dml_float_t DestinationLinesToRequestVMInVBlank[],
3647 dml_float_t DestinationLinesToRequestVMInImmediateFlip[],
3648 dml_bool_t DCCEnable[],
3649 dml_float_t PixelClock[],
3650 dml_uint_t dpte_row_width_luma_ub[],
3651 dml_uint_t dpte_row_width_chroma_ub[],
3652 dml_uint_t vm_group_bytes[],
3653 dml_uint_t dpde0_bytes_per_frame_ub_l[],
3654 dml_uint_t dpde0_bytes_per_frame_ub_c[],
3655 dml_uint_t meta_pte_bytes_per_frame_ub_l[],
3656 dml_uint_t meta_pte_bytes_per_frame_ub_c[],
3657
3658 // Output
3659 dml_float_t TimePerVMGroupVBlank[],
3660 dml_float_t TimePerVMGroupFlip[],
3661 dml_float_t TimePerVMRequestVBlank[],
3662 dml_float_t TimePerVMRequestFlip[])
3663 {
3664 dml_uint_t num_group_per_lower_vm_stage;
3665 dml_uint_t num_req_per_lower_vm_stage;
3666
3667 #ifdef __DML_VBA_DEBUG__
3668 dml_print("DML::%s: NumberOfActiveSurfaces = %u\n", __func__, NumberOfActiveSurfaces);
3669 dml_print("DML::%s: GPUVMEnable = %u\n", __func__, GPUVMEnable);
3670 #endif
3671 for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
3672
3673 #ifdef __DML_VBA_DEBUG__
3674 dml_print("DML::%s: k=%u, DCCEnable = %u\n", __func__, k, DCCEnable[k]);
3675 dml_print("DML::%s: k=%u, vm_group_bytes = %u\n", __func__, k, vm_group_bytes[k]);
3676 dml_print("DML::%s: k=%u, dpde0_bytes_per_frame_ub_l = %u\n", __func__, k, dpde0_bytes_per_frame_ub_l[k]);
3677 dml_print("DML::%s: k=%u, dpde0_bytes_per_frame_ub_c = %u\n", __func__, k, dpde0_bytes_per_frame_ub_c[k]);
3678 dml_print("DML::%s: k=%u, meta_pte_bytes_per_frame_ub_l = %u\n", __func__, k, meta_pte_bytes_per_frame_ub_l[k]);
3679 dml_print("DML::%s: k=%u, meta_pte_bytes_per_frame_ub_c = %u\n", __func__, k, meta_pte_bytes_per_frame_ub_c[k]);
3680 #endif
3681
3682 if (GPUVMEnable == true && (DCCEnable[k] == true || GPUVMMaxPageTableLevels > 1)) {
3683 if (DCCEnable[k] == false) {
3684 if (BytePerPixelC[k] > 0) {
3685 num_group_per_lower_vm_stage = (dml_uint_t) (dml_ceil((dml_float_t) dpde0_bytes_per_frame_ub_l[k] / (dml_float_t) vm_group_bytes[k], 1.0) +
3686 dml_ceil((dml_float_t) dpde0_bytes_per_frame_ub_c[k] / (dml_float_t) vm_group_bytes[k], 1.0));
3687 } else {
3688 num_group_per_lower_vm_stage = (dml_uint_t) (dml_ceil((dml_float_t) dpde0_bytes_per_frame_ub_l[k] / (dml_float_t) vm_group_bytes[k], 1.0));
3689 }
3690 } else {
3691 if (GPUVMMaxPageTableLevels == 1) {
3692 if (BytePerPixelC[k] > 0) {
3693 num_group_per_lower_vm_stage = (dml_uint_t)(dml_ceil((dml_float_t) (meta_pte_bytes_per_frame_ub_l[k]) / (dml_float_t) (vm_group_bytes[k]), 1.0) +
3694 dml_ceil((dml_float_t) (meta_pte_bytes_per_frame_ub_c[k]) / (dml_float_t) (vm_group_bytes[k]), 1.0));
3695 } else {
3696 num_group_per_lower_vm_stage = (dml_uint_t)(dml_ceil((dml_float_t) (meta_pte_bytes_per_frame_ub_l[k]) / (dml_float_t) (vm_group_bytes[k]), 1.0));
3697 }
3698 } else {
3699 if (BytePerPixelC[k] > 0) {
3700 num_group_per_lower_vm_stage = (dml_uint_t)(2.0 + dml_ceil((dml_float_t) (dpde0_bytes_per_frame_ub_l[k]) / (dml_float_t) (vm_group_bytes[k]), 1) +
3701 dml_ceil((dml_float_t) (dpde0_bytes_per_frame_ub_c[k]) / (dml_float_t) (vm_group_bytes[k]), 1) +
3702 dml_ceil((dml_float_t) (meta_pte_bytes_per_frame_ub_l[k]) / (dml_float_t) (vm_group_bytes[k]), 1) +
3703 dml_ceil((dml_float_t) (meta_pte_bytes_per_frame_ub_c[k]) / (dml_float_t) (vm_group_bytes[k]), 1));
3704 } else {
3705 num_group_per_lower_vm_stage = (dml_uint_t)(1.0 + dml_ceil((dml_float_t) (dpde0_bytes_per_frame_ub_l[k]) / (dml_float_t) (vm_group_bytes[k]), 1) +
3706 dml_ceil((dml_float_t) (meta_pte_bytes_per_frame_ub_l[k]) / (dml_float_t) (vm_group_bytes[k]), 1));
3707 }
3708 }
3709 }
3710
3711 if (DCCEnable[k] == false) {
3712 if (BytePerPixelC[k] > 0) {
3713 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + dpde0_bytes_per_frame_ub_c[k] / 64;
3714 } else {
3715 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64;
3716 }
3717 } else {
3718 if (GPUVMMaxPageTableLevels == 1) {
3719 if (BytePerPixelC[k] > 0) {
3720 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_c[k] / 64;
3721 } else {
3722 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64;
3723 }
3724 } else {
3725 if (BytePerPixelC[k] > 0) {
3726 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + dpde0_bytes_per_frame_ub_c[k] / 64 + meta_pte_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_c[k] / 64;
3727 } else {
3728 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_l[k] / 64;
3729 }
3730 }
3731 }
3732
3733 TimePerVMGroupVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage;
3734 TimePerVMGroupFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage;
3735 TimePerVMRequestVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage;
3736 TimePerVMRequestFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage;
3737
3738 if (GPUVMMaxPageTableLevels > 2) {
3739 TimePerVMGroupVBlank[k] = TimePerVMGroupVBlank[k] / 2;
3740 TimePerVMGroupFlip[k] = TimePerVMGroupFlip[k] / 2;
3741 TimePerVMRequestVBlank[k] = TimePerVMRequestVBlank[k] / 2;
3742 TimePerVMRequestFlip[k] = TimePerVMRequestFlip[k] / 2;
3743 }
3744
3745 } else {
3746 TimePerVMGroupVBlank[k] = 0;
3747 TimePerVMGroupFlip[k] = 0;
3748 TimePerVMRequestVBlank[k] = 0;
3749 TimePerVMRequestFlip[k] = 0;
3750 }
3751
3752 #ifdef __DML_VBA_DEBUG__
3753 dml_print("DML::%s: k=%u, TimePerVMGroupVBlank = %f\n", __func__, k, TimePerVMGroupVBlank[k]);
3754 dml_print("DML::%s: k=%u, TimePerVMGroupFlip = %f\n", __func__, k, TimePerVMGroupFlip[k]);
3755 dml_print("DML::%s: k=%u, TimePerVMRequestVBlank = %f\n", __func__, k, TimePerVMRequestVBlank[k]);
3756 dml_print("DML::%s: k=%u, TimePerVMRequestFlip = %f\n", __func__, k, TimePerVMRequestFlip[k]);
3757 #endif
3758 }
3759 } // CalculateVMGroupAndRequestTimes
3760
CalculateStutterEfficiency(struct display_mode_lib_scratch_st * scratch,struct CalculateStutterEfficiency_params_st * p)3761 static void CalculateStutterEfficiency(struct display_mode_lib_scratch_st *scratch,
3762 struct CalculateStutterEfficiency_params_st *p)
3763 {
3764 dml_float_t DETBufferingTimeY = 0;
3765 dml_float_t SwathWidthYCriticalSurface = 0;
3766 dml_float_t SwathHeightYCriticalSurface = 0;
3767 dml_float_t VActiveTimeCriticalSurface = 0;
3768 dml_float_t FrameTimeCriticalSurface = 0;
3769 dml_uint_t BytePerPixelYCriticalSurface = 0;
3770 dml_float_t LinesToFinishSwathTransferStutterCriticalSurface = 0;
3771 dml_uint_t DETBufferSizeYCriticalSurface = 0;
3772 dml_float_t MinTTUVBlankCriticalSurface = 0;
3773 dml_uint_t BlockWidth256BytesYCriticalSurface = 0;
3774 dml_bool_t SinglePlaneCriticalSurface = 0;
3775 dml_bool_t SinglePipeCriticalSurface = 0;
3776 dml_float_t TotalCompressedReadBandwidth = 0;
3777 dml_float_t TotalRowReadBandwidth = 0;
3778 dml_float_t AverageDCCCompressionRate = 0;
3779 dml_float_t EffectiveCompressedBufferSize = 0;
3780 dml_float_t PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = 0;
3781 dml_float_t StutterBurstTime = 0;
3782 dml_uint_t TotalActiveWriteback = 0;
3783 dml_float_t LinesInDETY = 0;
3784 dml_float_t LinesInDETYRoundedDownToSwath = 0;
3785 dml_float_t MaximumEffectiveCompressionLuma = 0;
3786 dml_float_t MaximumEffectiveCompressionChroma = 0;
3787 dml_float_t TotalZeroSizeRequestReadBandwidth = 0;
3788 dml_float_t TotalZeroSizeCompressedReadBandwidth = 0;
3789 dml_float_t AverageDCCZeroSizeFraction = 0;
3790 dml_float_t AverageZeroSizeCompressionRate = 0;
3791
3792 dml_bool_t FoundCriticalSurface = false;
3793
3794 dml_uint_t TotalNumberOfActiveOTG = 0;
3795 dml_float_t SinglePixelClock = 0;
3796 dml_uint_t SingleHTotal = 0;
3797 dml_uint_t SingleVTotal = 0;
3798 dml_bool_t SameTiming = true;
3799
3800 dml_float_t LastStutterPeriod = 0.0;
3801 dml_float_t LastZ8StutterPeriod = 0.0;
3802
3803 dml_uint_t SwathSizeCriticalSurface;
3804 dml_uint_t LastChunkOfSwathSize;
3805 dml_uint_t MissingPartOfLastSwathOfDETSize;
3806
3807 TotalZeroSizeRequestReadBandwidth = 0;
3808 TotalZeroSizeCompressedReadBandwidth = 0;
3809 TotalRowReadBandwidth = 0;
3810 TotalCompressedReadBandwidth = 0;
3811
3812 for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) {
3813 if (p->UseMALLForPStateChange[k] != dml_use_mall_pstate_change_phantom_pipe) {
3814 if (p->DCCEnable[k] == true) {
3815 if ((dml_is_vertical_rotation(p->SourceScan[k]) && p->BlockWidth256BytesY[k] > p->SwathHeightY[k]) || (!dml_is_vertical_rotation(p->SourceScan[k]) && p->BlockHeight256BytesY[k] > p->SwathHeightY[k]) || p->DCCYMaxUncompressedBlock[k] < 256) {
3816 MaximumEffectiveCompressionLuma = 2;
3817 } else {
3818 MaximumEffectiveCompressionLuma = 4;
3819 }
3820 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth + p->ReadBandwidthSurfaceLuma[k] / dml_min(p->NetDCCRateLuma[k], MaximumEffectiveCompressionLuma);
3821 #ifdef __DML_VBA_DEBUG__
3822 dml_print("DML::%s: k=%u, ReadBandwidthSurfaceLuma = %f\n", __func__, k, p->ReadBandwidthSurfaceLuma[k]);
3823 dml_print("DML::%s: k=%u, NetDCCRateLuma = %f\n", __func__, k, p->NetDCCRateLuma[k]);
3824 dml_print("DML::%s: k=%u, MaximumEffectiveCompressionLuma = %f\n", __func__, k, MaximumEffectiveCompressionLuma);
3825 #endif
3826 TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth + p->ReadBandwidthSurfaceLuma[k] * p->DCCFractionOfZeroSizeRequestsLuma[k];
3827 TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth + p->ReadBandwidthSurfaceLuma[k] * p->DCCFractionOfZeroSizeRequestsLuma[k] / MaximumEffectiveCompressionLuma;
3828
3829 if (p->ReadBandwidthSurfaceChroma[k] > 0) {
3830 if ((dml_is_vertical_rotation(p->SourceScan[k]) && p->BlockWidth256BytesC[k] > p->SwathHeightC[k]) || (!dml_is_vertical_rotation(p->SourceScan[k]) && p->BlockHeight256BytesC[k] > p->SwathHeightC[k]) || p->DCCCMaxUncompressedBlock[k] < 256) {
3831 MaximumEffectiveCompressionChroma = 2;
3832 } else {
3833 MaximumEffectiveCompressionChroma = 4;
3834 }
3835 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth + p->ReadBandwidthSurfaceChroma[k] / dml_min(p->NetDCCRateChroma[k], MaximumEffectiveCompressionChroma);
3836 #ifdef __DML_VBA_DEBUG__
3837 dml_print("DML::%s: k=%u, ReadBandwidthSurfaceChroma = %f\n", __func__, k, p->ReadBandwidthSurfaceChroma[k]);
3838 dml_print("DML::%s: k=%u, NetDCCRateChroma = %f\n", __func__, k, p->NetDCCRateChroma[k]);
3839 dml_print("DML::%s: k=%u, MaximumEffectiveCompressionChroma = %f\n", __func__, k, MaximumEffectiveCompressionChroma);
3840 #endif
3841 TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth + p->ReadBandwidthSurfaceChroma[k] * p->DCCFractionOfZeroSizeRequestsChroma[k];
3842 TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth + p->ReadBandwidthSurfaceChroma[k] * p->DCCFractionOfZeroSizeRequestsChroma[k] / MaximumEffectiveCompressionChroma;
3843 }
3844 } else {
3845 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth + p->ReadBandwidthSurfaceLuma[k] + p->ReadBandwidthSurfaceChroma[k];
3846 }
3847 TotalRowReadBandwidth = TotalRowReadBandwidth + p->DPPPerSurface[k] * (p->meta_row_bw[k] + p->dpte_row_bw[k]);
3848 }
3849 }
3850
3851 AverageDCCCompressionRate = p->TotalDataReadBandwidth / TotalCompressedReadBandwidth;
3852 AverageDCCZeroSizeFraction = TotalZeroSizeRequestReadBandwidth / p->TotalDataReadBandwidth;
3853
3854 #ifdef __DML_VBA_DEBUG__
3855 dml_print("DML::%s: UnboundedRequestEnabled = %u\n", __func__, p->UnboundedRequestEnabled);
3856 dml_print("DML::%s: TotalCompressedReadBandwidth = %f\n", __func__, TotalCompressedReadBandwidth);
3857 dml_print("DML::%s: TotalZeroSizeRequestReadBandwidth = %f\n", __func__, TotalZeroSizeRequestReadBandwidth);
3858 dml_print("DML::%s: TotalZeroSizeCompressedReadBandwidth = %f\n", __func__, TotalZeroSizeCompressedReadBandwidth);
3859 dml_print("DML::%s: MaximumEffectiveCompressionLuma = %f\n", __func__, MaximumEffectiveCompressionLuma);
3860 dml_print("DML::%s: MaximumEffectiveCompressionChroma = %f\n", __func__, MaximumEffectiveCompressionChroma);
3861 dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate);
3862 dml_print("DML::%s: AverageDCCZeroSizeFraction = %f\n", __func__, AverageDCCZeroSizeFraction);
3863 dml_print("DML::%s: CompbufReservedSpace64B = %u\n", __func__, p->CompbufReservedSpace64B);
3864 dml_print("DML::%s: CompbufReservedSpaceZs = %u\n", __func__, p->CompbufReservedSpaceZs);
3865 dml_print("DML::%s: CompressedBufferSizeInkByte = %u\n", __func__, p->CompressedBufferSizeInkByte);
3866 #endif
3867 if (AverageDCCZeroSizeFraction == 1) {
3868 AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth / TotalZeroSizeCompressedReadBandwidth;
3869 EffectiveCompressedBufferSize = (dml_float_t)p->MetaFIFOSizeInKEntries * 1024 * 64 * AverageZeroSizeCompressionRate + ((dml_float_t)p->ZeroSizeBufferEntries - p->CompbufReservedSpaceZs) * 64 * AverageZeroSizeCompressionRate;
3870 } else if (AverageDCCZeroSizeFraction > 0) {
3871 AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth / TotalZeroSizeCompressedReadBandwidth;
3872 EffectiveCompressedBufferSize = dml_min((dml_float_t)p->CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate,
3873 (dml_float_t)p->MetaFIFOSizeInKEntries * 1024 * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate + 1 / AverageDCCCompressionRate)) +
3874 dml_min(((dml_float_t)p->ROBBufferSizeInKByte * 1024 - p->CompbufReservedSpace64B * 64) * AverageDCCCompressionRate,
3875 ((dml_float_t)p->ZeroSizeBufferEntries - p->CompbufReservedSpaceZs) * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate));
3876
3877 #ifdef __DML_VBA_DEBUG__
3878 dml_print("DML::%s: min 1 = %f\n", __func__, p->CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate);
3879 dml_print("DML::%s: min 2 = %f\n", __func__, p->MetaFIFOSizeInKEntries * 1024 * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate + 1 / AverageDCCCompressionRate));
3880 dml_print("DML::%s: min 3 = %f\n", __func__, (p->ROBBufferSizeInKByte * 1024 - p->CompbufReservedSpace64B * 64) * AverageDCCCompressionRate);
3881 dml_print("DML::%s: min 4 = %f\n", __func__, (p->ZeroSizeBufferEntries - p->CompbufReservedSpaceZs) * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate));
3882 #endif
3883 } else {
3884 EffectiveCompressedBufferSize = dml_min((dml_float_t)p->CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate,
3885 (dml_float_t)p->MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate) +
3886 ((dml_float_t)p->ROBBufferSizeInKByte * 1024 - p->CompbufReservedSpace64B * 64) * AverageDCCCompressionRate;
3887
3888 #ifdef __DML_VBA_DEBUG__
3889 dml_print("DML::%s: min 1 = %f\n", __func__, p->CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate);
3890 dml_print("DML::%s: min 2 = %f\n", __func__, p->MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate);
3891 #endif
3892 }
3893
3894 #ifdef __DML_VBA_DEBUG__
3895 dml_print("DML::%s: MetaFIFOSizeInKEntries = %u\n", __func__, p->MetaFIFOSizeInKEntries);
3896 dml_print("DML::%s: AverageZeroSizeCompressionRate = %f\n", __func__, AverageZeroSizeCompressionRate);
3897 dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize);
3898 #endif
3899
3900 *p->StutterPeriod = 0;
3901
3902 for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) {
3903 if (p->UseMALLForPStateChange[k] != dml_use_mall_pstate_change_phantom_pipe) {
3904 LinesInDETY = ((dml_float_t)p->DETBufferSizeY[k] + (p->UnboundedRequestEnabled == true ? EffectiveCompressedBufferSize : 0) * p->ReadBandwidthSurfaceLuma[k] / p->TotalDataReadBandwidth) / p->BytePerPixelDETY[k] / p->SwathWidthY[k];
3905 LinesInDETYRoundedDownToSwath = dml_floor(LinesInDETY, p->SwathHeightY[k]);
3906 DETBufferingTimeY = LinesInDETYRoundedDownToSwath * ((dml_float_t)p->HTotal[k] / p->PixelClock[k]) / p->VRatio[k];
3907 #ifdef __DML_VBA_DEBUG__
3908 dml_print("DML::%s: k=%u, DETBufferSizeY = %u\n", __func__, k, p->DETBufferSizeY[k]);
3909 dml_print("DML::%s: k=%u, BytePerPixelDETY = %f\n", __func__, k, p->BytePerPixelDETY[k]);
3910 dml_print("DML::%s: k=%u, SwathWidthY = %u\n", __func__, k, p->SwathWidthY[k]);
3911 dml_print("DML::%s: k=%u, ReadBandwidthSurfaceLuma = %f\n", __func__, k, p->ReadBandwidthSurfaceLuma[k]);
3912 dml_print("DML::%s: k=%u, TotalDataReadBandwidth = %f\n", __func__, k, p->TotalDataReadBandwidth);
3913 dml_print("DML::%s: k=%u, LinesInDETY = %f\n", __func__, k, LinesInDETY);
3914 dml_print("DML::%s: k=%u, LinesInDETYRoundedDownToSwath = %f\n", __func__, k, LinesInDETYRoundedDownToSwath);
3915 dml_print("DML::%s: k=%u, HTotal = %u\n", __func__, k, p->HTotal[k]);
3916 dml_print("DML::%s: k=%u, PixelClock = %f\n", __func__, k, p->PixelClock[k]);
3917 dml_print("DML::%s: k=%u, VRatio = %f\n", __func__, k, p->VRatio[k]);
3918 dml_print("DML::%s: k=%u, DETBufferingTimeY = %f\n", __func__, k, DETBufferingTimeY);
3919 dml_print("DML::%s: k=%u,PixelClock = %f\n", __func__, k, p->PixelClock[k]);
3920 #endif
3921
3922 if (!FoundCriticalSurface || DETBufferingTimeY < *p->StutterPeriod) {
3923 dml_bool_t isInterlaceTiming = p->Interlace[k] && !p->ProgressiveToInterlaceUnitInOPP;
3924
3925 FoundCriticalSurface = true;
3926 *p->StutterPeriod = DETBufferingTimeY;
3927 FrameTimeCriticalSurface = (isInterlaceTiming ? dml_floor((dml_float_t)p->VTotal[k]/2.0, 1.0) : p->VTotal[k]) * (dml_float_t)p->HTotal[k] / p->PixelClock[k];
3928 VActiveTimeCriticalSurface = (isInterlaceTiming ? dml_floor((dml_float_t)p->VActive[k]/2.0, 1.0) : p->VActive[k]) * (dml_float_t)p->HTotal[k] / p->PixelClock[k];
3929 BytePerPixelYCriticalSurface = p->BytePerPixelY[k];
3930 SwathWidthYCriticalSurface = p->SwathWidthY[k];
3931 SwathHeightYCriticalSurface = p->SwathHeightY[k];
3932 BlockWidth256BytesYCriticalSurface = p->BlockWidth256BytesY[k];
3933 LinesToFinishSwathTransferStutterCriticalSurface = p->SwathHeightY[k] - (LinesInDETY - LinesInDETYRoundedDownToSwath);
3934 DETBufferSizeYCriticalSurface = p->DETBufferSizeY[k];
3935 MinTTUVBlankCriticalSurface = p->MinTTUVBlank[k];
3936 SinglePlaneCriticalSurface = (p->ReadBandwidthSurfaceChroma[k] == 0);
3937 SinglePipeCriticalSurface = (p->DPPPerSurface[k] == 1);
3938
3939 #ifdef __DML_VBA_DEBUG__
3940 dml_print("DML::%s: k=%u, FoundCriticalSurface = %u\n", __func__, k, FoundCriticalSurface);
3941 dml_print("DML::%s: k=%u, StutterPeriod = %f\n", __func__, k, *p->StutterPeriod);
3942 dml_print("DML::%s: k=%u, MinTTUVBlankCriticalSurface = %f\n", __func__, k, MinTTUVBlankCriticalSurface);
3943 dml_print("DML::%s: k=%u, FrameTimeCriticalSurface = %f\n", __func__, k, FrameTimeCriticalSurface);
3944 dml_print("DML::%s: k=%u, VActiveTimeCriticalSurface = %f\n", __func__, k, VActiveTimeCriticalSurface);
3945 dml_print("DML::%s: k=%u, BytePerPixelYCriticalSurface = %u\n", __func__, k, BytePerPixelYCriticalSurface);
3946 dml_print("DML::%s: k=%u, SwathWidthYCriticalSurface = %f\n", __func__, k, SwathWidthYCriticalSurface);
3947 dml_print("DML::%s: k=%u, SwathHeightYCriticalSurface = %f\n", __func__, k, SwathHeightYCriticalSurface);
3948 dml_print("DML::%s: k=%u, BlockWidth256BytesYCriticalSurface = %u\n", __func__, k, BlockWidth256BytesYCriticalSurface);
3949 dml_print("DML::%s: k=%u, SinglePlaneCriticalSurface = %u\n", __func__, k, SinglePlaneCriticalSurface);
3950 dml_print("DML::%s: k=%u, SinglePipeCriticalSurface = %u\n", __func__, k, SinglePipeCriticalSurface);
3951 dml_print("DML::%s: k=%u, LinesToFinishSwathTransferStutterCriticalSurface = %f\n", __func__, k, LinesToFinishSwathTransferStutterCriticalSurface);
3952 #endif
3953 }
3954 }
3955 }
3956
3957 PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = dml_min(*p->StutterPeriod * p->TotalDataReadBandwidth, EffectiveCompressedBufferSize);
3958 #ifdef __DML_VBA_DEBUG__
3959 dml_print("DML::%s: ROBBufferSizeInKByte = %u\n", __func__, p->ROBBufferSizeInKByte);
3960 dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate);
3961 dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n", __func__, *p->StutterPeriod * p->TotalDataReadBandwidth);
3962 dml_print("DML::%s: ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate + EffectiveCompressedBufferSize = %f\n", __func__, p->ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate + EffectiveCompressedBufferSize);
3963 dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize);
3964 dml_print("DML::%s: PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = %f\n", __func__, PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer);
3965 dml_print("DML::%s: ReturnBW = %f\n", __func__, p->ReturnBW);
3966 dml_print("DML::%s: TotalDataReadBandwidth = %f\n", __func__, p->TotalDataReadBandwidth);
3967 dml_print("DML::%s: TotalRowReadBandwidth = %f\n", __func__, TotalRowReadBandwidth);
3968 dml_print("DML::%s: DCFCLK = %f\n", __func__, p->DCFCLK);
3969 #endif
3970
3971 StutterBurstTime = PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / AverageDCCCompressionRate / p->ReturnBW + (*p->StutterPeriod * p->TotalDataReadBandwidth - PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (p->DCFCLK * 64) + *p->StutterPeriod * TotalRowReadBandwidth / p->ReturnBW;
3972 #ifdef __DML_VBA_DEBUG__
3973 dml_print("DML::%s: Part 1 = %f\n", __func__, PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / AverageDCCCompressionRate / p->ReturnBW);
3974 dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n", __func__, (*p->StutterPeriod * p->TotalDataReadBandwidth));
3975 dml_print("DML::%s: Part 2 = %f\n", __func__, (*p->StutterPeriod * p->TotalDataReadBandwidth - PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (p->DCFCLK * 64));
3976 dml_print("DML::%s: Part 3 = %f\n", __func__, *p->StutterPeriod * TotalRowReadBandwidth / p->ReturnBW);
3977 dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime);
3978 #endif
3979 StutterBurstTime = dml_max(StutterBurstTime, LinesToFinishSwathTransferStutterCriticalSurface * BytePerPixelYCriticalSurface * SwathWidthYCriticalSurface / p->ReturnBW);
3980
3981 dml_print("DML::%s: Time to finish residue swath=%f\n", __func__, LinesToFinishSwathTransferStutterCriticalSurface * BytePerPixelYCriticalSurface * SwathWidthYCriticalSurface / p->ReturnBW);
3982
3983 TotalActiveWriteback = 0;
3984 for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) {
3985 if (p->WritebackEnable[k]) {
3986 TotalActiveWriteback = TotalActiveWriteback + 1;
3987 }
3988 }
3989
3990 if (TotalActiveWriteback == 0) {
3991 #ifdef __DML_VBA_DEBUG__
3992 dml_print("DML::%s: SRExitTime = %f\n", __func__, p->SRExitTime);
3993 dml_print("DML::%s: SRExitZ8Time = %f\n", __func__, p->SRExitZ8Time);
3994 dml_print("DML::%s: StutterBurstTime = %f (final)\n", __func__, StutterBurstTime);
3995 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *p->StutterPeriod);
3996 #endif
3997 *p->StutterEfficiencyNotIncludingVBlank = dml_max(0., 1 - (p->SRExitTime + StutterBurstTime) / *p->StutterPeriod) * 100;
3998 *p->Z8StutterEfficiencyNotIncludingVBlank = dml_max(0., 1 - (p->SRExitZ8Time + StutterBurstTime) / *p->StutterPeriod) * 100;
3999 *p->NumberOfStutterBurstsPerFrame = (*p->StutterEfficiencyNotIncludingVBlank > 0 ? (dml_uint_t)(dml_ceil(VActiveTimeCriticalSurface / *p->StutterPeriod, 1)) : 0);
4000 *p->Z8NumberOfStutterBurstsPerFrame = (*p->Z8StutterEfficiencyNotIncludingVBlank > 0 ? (dml_uint_t)(dml_ceil(VActiveTimeCriticalSurface / *p->StutterPeriod, 1)) : 0);
4001 } else {
4002 *p->StutterEfficiencyNotIncludingVBlank = 0.;
4003 *p->Z8StutterEfficiencyNotIncludingVBlank = 0.;
4004 *p->NumberOfStutterBurstsPerFrame = 0;
4005 *p->Z8NumberOfStutterBurstsPerFrame = 0;
4006 }
4007 #ifdef __DML_VBA_DEBUG__
4008 dml_print("DML::%s: VActiveTimeCriticalSurface = %f\n", __func__, VActiveTimeCriticalSurface);
4009 dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *p->StutterEfficiencyNotIncludingVBlank);
4010 dml_print("DML::%s: Z8StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *p->Z8StutterEfficiencyNotIncludingVBlank);
4011 dml_print("DML::%s: NumberOfStutterBurstsPerFrame = %u\n", __func__, *p->NumberOfStutterBurstsPerFrame);
4012 dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %u\n", __func__, *p->Z8NumberOfStutterBurstsPerFrame);
4013 #endif
4014
4015 for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) {
4016 if (p->UseMALLForPStateChange[k] != dml_use_mall_pstate_change_phantom_pipe) {
4017 if (p->BlendingAndTiming[k] == k) {
4018 if (TotalNumberOfActiveOTG == 0) {
4019 SinglePixelClock = p->PixelClock[k];
4020 SingleHTotal = p->HTotal[k];
4021 SingleVTotal = p->VTotal[k];
4022 } else if (SinglePixelClock != p->PixelClock[k] || SingleHTotal != p->HTotal[k] || SingleVTotal != p->VTotal[k]) {
4023 SameTiming = false;
4024 }
4025 TotalNumberOfActiveOTG = TotalNumberOfActiveOTG + 1;
4026 }
4027 }
4028 }
4029
4030 if (*p->StutterEfficiencyNotIncludingVBlank > 0) {
4031 LastStutterPeriod = VActiveTimeCriticalSurface - (*p->NumberOfStutterBurstsPerFrame - 1) * *p->StutterPeriod;
4032
4033 if ((p->SynchronizeTimingsFinal || TotalNumberOfActiveOTG == 1) && SameTiming &&
4034 LastStutterPeriod + MinTTUVBlankCriticalSurface > p->StutterEnterPlusExitWatermark) {
4035 *p->StutterEfficiency = (1 - (*p->NumberOfStutterBurstsPerFrame * p->SRExitTime + StutterBurstTime * VActiveTimeCriticalSurface / *p->StutterPeriod) / FrameTimeCriticalSurface) * 100;
4036 } else {
4037 *p->StutterEfficiency = *p->StutterEfficiencyNotIncludingVBlank;
4038 }
4039 } else {
4040 *p->StutterEfficiency = 0;
4041 }
4042
4043 if (*p->Z8StutterEfficiencyNotIncludingVBlank > 0) {
4044 LastZ8StutterPeriod = VActiveTimeCriticalSurface - (*p->NumberOfStutterBurstsPerFrame - 1) * *p->StutterPeriod;
4045 if ((p->SynchronizeTimingsFinal || TotalNumberOfActiveOTG == 1) && SameTiming && LastZ8StutterPeriod + MinTTUVBlankCriticalSurface > p->Z8StutterEnterPlusExitWatermark) {
4046 *p->Z8StutterEfficiency = (1 - (*p->NumberOfStutterBurstsPerFrame * p->SRExitZ8Time + StutterBurstTime * VActiveTimeCriticalSurface / *p->StutterPeriod) / FrameTimeCriticalSurface) * 100;
4047 } else {
4048 *p->Z8StutterEfficiency = *p->Z8StutterEfficiencyNotIncludingVBlank;
4049 }
4050 } else {
4051 *p->Z8StutterEfficiency = 0.;
4052 }
4053
4054 #ifdef __DML_VBA_DEBUG__
4055 dml_print("DML::%s: LastZ8StutterPeriod = %f\n", __func__, LastZ8StutterPeriod);
4056 dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, p->Z8StutterEnterPlusExitWatermark);
4057 dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime);
4058 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *p->StutterPeriod);
4059 dml_print("DML::%s: StutterEfficiency = %f\n", __func__, *p->StutterEfficiency);
4060 dml_print("DML::%s: Z8StutterEfficiency = %f\n", __func__, *p->Z8StutterEfficiency);
4061 dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *p->StutterEfficiencyNotIncludingVBlank);
4062 dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %u\n", __func__, *p->Z8NumberOfStutterBurstsPerFrame);
4063 #endif
4064
4065 SwathSizeCriticalSurface = (dml_uint_t)(BytePerPixelYCriticalSurface * SwathHeightYCriticalSurface * dml_ceil(SwathWidthYCriticalSurface, BlockWidth256BytesYCriticalSurface));
4066 LastChunkOfSwathSize = SwathSizeCriticalSurface % (p->PixelChunkSizeInKByte * 1024);
4067 MissingPartOfLastSwathOfDETSize = (dml_uint_t)(dml_ceil(DETBufferSizeYCriticalSurface, SwathSizeCriticalSurface) - DETBufferSizeYCriticalSurface);
4068
4069 *p->DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = !(!p->UnboundedRequestEnabled && (p->NumberOfActiveSurfaces == 1) && SinglePlaneCriticalSurface && SinglePipeCriticalSurface && (LastChunkOfSwathSize > 0) &&
4070 (LastChunkOfSwathSize <= 4096) && (MissingPartOfLastSwathOfDETSize > 0) && (MissingPartOfLastSwathOfDETSize <= LastChunkOfSwathSize));
4071
4072 #ifdef __DML_VBA_DEBUG__
4073 dml_print("DML::%s: SwathSizeCriticalSurface = %u\n", __func__, SwathSizeCriticalSurface);
4074 dml_print("DML::%s: DETBufferSizeYCriticalSurface = %u\n", __func__, DETBufferSizeYCriticalSurface);
4075 dml_print("DML::%s: PixelChunkSizeInKByte = %u\n", __func__, p->PixelChunkSizeInKByte);
4076 dml_print("DML::%s: LastChunkOfSwathSize = %u\n", __func__, LastChunkOfSwathSize);
4077 dml_print("DML::%s: MissingPartOfLastSwathOfDETSize = %u\n", __func__, MissingPartOfLastSwathOfDETSize);
4078 dml_print("DML::%s: DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = %u\n", __func__, *p->DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE);
4079 #endif
4080 } // CalculateStutterEfficiency
4081
4082 /// \CalculateSwathAndDETConfiguration
4083 /// @brief Calculates swath width and different return buffers sizing (DET, CDB, etc.)
CalculateSwathAndDETConfiguration(struct display_mode_lib_scratch_st * scratch,struct CalculateSwathAndDETConfiguration_params_st * p)4084 static void CalculateSwathAndDETConfiguration(struct display_mode_lib_scratch_st *scratch,
4085 struct CalculateSwathAndDETConfiguration_params_st *p)
4086 {
4087 dml_uint_t MaximumSwathHeightY[__DML_NUM_PLANES__];
4088 dml_uint_t MaximumSwathHeightC[__DML_NUM_PLANES__];
4089 dml_uint_t RoundedUpMaxSwathSizeBytesY[__DML_NUM_PLANES__];
4090 dml_uint_t RoundedUpMaxSwathSizeBytesC[__DML_NUM_PLANES__];
4091 dml_uint_t RoundedUpSwathSizeBytesY[__DML_NUM_PLANES__];
4092 dml_uint_t RoundedUpSwathSizeBytesC[__DML_NUM_PLANES__];
4093 dml_uint_t SwathWidthSingleDPP[__DML_NUM_PLANES__];
4094 dml_uint_t SwathWidthSingleDPPChroma[__DML_NUM_PLANES__];
4095
4096 dml_uint_t TotalActiveDPP = 0;
4097 dml_bool_t NoChromaOrLinearSurfaces = true;
4098 dml_uint_t SurfaceDoingUnboundedRequest = 0;
4099
4100 dml_uint_t DETBufferSizeInKByteForSwathCalculation;
4101
4102 const long TTUFIFODEPTH = 8;
4103 const long MAXIMUMCOMPRESSION = 4;
4104
4105 #ifdef __DML_VBA_DEBUG__
4106 dml_print("DML::%s: ForceSingleDPP = %u\n", __func__, p->ForceSingleDPP);
4107 for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) {
4108 dml_print("DML::%s: DPPPerSurface[%u] = %u\n", __func__, k, p->DPPPerSurface[k]);
4109 }
4110 #endif
4111 CalculateSwathWidth(p->ForceSingleDPP,
4112 p->NumberOfActiveSurfaces,
4113 p->SourcePixelFormat,
4114 p->SourceScan,
4115 p->ViewportStationary,
4116 p->ViewportWidth,
4117 p->ViewportHeight,
4118 p->ViewportXStart,
4119 p->ViewportYStart,
4120 p->ViewportXStartC,
4121 p->ViewportYStartC,
4122 p->SurfaceWidthY,
4123 p->SurfaceWidthC,
4124 p->SurfaceHeightY,
4125 p->SurfaceHeightC,
4126 p->ODMMode,
4127 p->BytePerPixY,
4128 p->BytePerPixC,
4129 p->Read256BytesBlockHeightY,
4130 p->Read256BytesBlockHeightC,
4131 p->Read256BytesBlockWidthY,
4132 p->Read256BytesBlockWidthC,
4133 p->BlendingAndTiming,
4134 p->HActive,
4135 p->HRatio,
4136 p->DPPPerSurface,
4137
4138 // Output
4139 SwathWidthSingleDPP,
4140 SwathWidthSingleDPPChroma,
4141 p->SwathWidth,
4142 p->SwathWidthChroma,
4143 MaximumSwathHeightY,
4144 MaximumSwathHeightC,
4145 p->swath_width_luma_ub,
4146 p->swath_width_chroma_ub);
4147
4148 for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) {
4149 RoundedUpMaxSwathSizeBytesY[k] = (dml_uint_t)(p->swath_width_luma_ub[k] * p->BytePerPixDETY[k] * MaximumSwathHeightY[k]);
4150 RoundedUpMaxSwathSizeBytesC[k] = (dml_uint_t)(p->swath_width_chroma_ub[k] * p->BytePerPixDETC[k] * MaximumSwathHeightC[k]);
4151 #ifdef __DML_VBA_DEBUG__
4152 dml_print("DML::%s: k=%u DPPPerSurface = %u\n", __func__, k, p->DPPPerSurface[k]);
4153 dml_print("DML::%s: k=%u swath_width_luma_ub = %u\n", __func__, k, p->swath_width_luma_ub[k]);
4154 dml_print("DML::%s: k=%u BytePerPixDETY = %f\n", __func__, k, p->BytePerPixDETY[k]);
4155 dml_print("DML::%s: k=%u MaximumSwathHeightY = %u\n", __func__, k, MaximumSwathHeightY[k]);
4156 dml_print("DML::%s: k=%u RoundedUpMaxSwathSizeBytesY = %u\n", __func__, k, RoundedUpMaxSwathSizeBytesY[k]);
4157 dml_print("DML::%s: k=%u swath_width_chroma_ub = %u\n", __func__, k, p->swath_width_chroma_ub[k]);
4158 dml_print("DML::%s: k=%u BytePerPixDETC = %f\n", __func__, k, p->BytePerPixDETC[k]);
4159 dml_print("DML::%s: k=%u MaximumSwathHeightC = %u\n", __func__, k, MaximumSwathHeightC[k]);
4160 dml_print("DML::%s: k=%u RoundedUpMaxSwathSizeBytesC = %u\n", __func__, k, RoundedUpMaxSwathSizeBytesC[k]);
4161 #endif
4162 if (p->SourcePixelFormat[k] == dml_420_10) {
4163 RoundedUpMaxSwathSizeBytesY[k] = (dml_uint_t)(dml_ceil((dml_float_t) RoundedUpMaxSwathSizeBytesY[k], 256));
4164 RoundedUpMaxSwathSizeBytesC[k] = (dml_uint_t)(dml_ceil((dml_float_t) RoundedUpMaxSwathSizeBytesC[k], 256));
4165 }
4166 }
4167
4168 for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) {
4169 TotalActiveDPP = TotalActiveDPP + (p->ForceSingleDPP ? 1 : p->DPPPerSurface[k]);
4170 if (p->DPPPerSurface[k] > 0)
4171 SurfaceDoingUnboundedRequest = k;
4172 if (p->SourcePixelFormat[k] == dml_420_8 || p->SourcePixelFormat[k] == dml_420_10 ||
4173 p->SourcePixelFormat[k] == dml_420_12 || p->SourcePixelFormat[k] == dml_rgbe_alpha
4174 || p->SurfaceTiling[k] == dml_sw_linear) {
4175 NoChromaOrLinearSurfaces = false;
4176 }
4177 }
4178
4179 *p->UnboundedRequestEnabled = UnboundedRequest(p->UseUnboundedRequestingFinal, TotalActiveDPP,
4180 NoChromaOrLinearSurfaces, p->Output[0]);
4181
4182 CalculateDETBufferSize(p->DETSizeOverride,
4183 p->UseMALLForPStateChange,
4184 p->ForceSingleDPP,
4185 p->NumberOfActiveSurfaces,
4186 *p->UnboundedRequestEnabled,
4187 p->nomDETInKByte,
4188 p->MaxTotalDETInKByte,
4189 p->ConfigReturnBufferSizeInKByte,
4190 p->MinCompressedBufferSizeInKByte,
4191 p->ConfigReturnBufferSegmentSizeInkByte,
4192 p->CompressedBufferSegmentSizeInkByteFinal,
4193 p->SourcePixelFormat,
4194 p->ReadBandwidthLuma,
4195 p->ReadBandwidthChroma,
4196 RoundedUpMaxSwathSizeBytesY,
4197 RoundedUpMaxSwathSizeBytesC,
4198 p->DPPPerSurface,
4199
4200 // Output
4201 p->DETBufferSizeInKByte, // per hubp pipe
4202 p->CompressedBufferSizeInkByte);
4203
4204 #ifdef __DML_VBA_DEBUG__
4205 dml_print("DML::%s: TotalActiveDPP = %u\n", __func__, TotalActiveDPP);
4206 dml_print("DML::%s: nomDETInKByte = %u\n", __func__, p->nomDETInKByte);
4207 dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %u\n", __func__, p->ConfigReturnBufferSizeInKByte);
4208 dml_print("DML::%s: UseUnboundedRequestingFinal = %u\n", __func__, p->UseUnboundedRequestingFinal);
4209 dml_print("DML::%s: UnboundedRequestEnabled = %u\n", __func__, *p->UnboundedRequestEnabled);
4210 dml_print("DML::%s: CompressedBufferSizeInkByte = %u\n", __func__, *p->CompressedBufferSizeInkByte);
4211 #endif
4212
4213 *p->ViewportSizeSupport = true;
4214 for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) {
4215
4216 DETBufferSizeInKByteForSwathCalculation = (p->UseMALLForPStateChange[k] == dml_use_mall_pstate_change_phantom_pipe ? 1024 : p->DETBufferSizeInKByte[k]);
4217 #ifdef __DML_VBA_DEBUG__
4218 dml_print("DML::%s: k=%u DETBufferSizeInKByteForSwathCalculation = %u\n", __func__, k, DETBufferSizeInKByteForSwathCalculation);
4219 #endif
4220
4221 if (RoundedUpMaxSwathSizeBytesY[k] + RoundedUpMaxSwathSizeBytesC[k] <= DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
4222 p->SwathHeightY[k] = MaximumSwathHeightY[k];
4223 p->SwathHeightC[k] = MaximumSwathHeightC[k];
4224 RoundedUpSwathSizeBytesY[k] = RoundedUpMaxSwathSizeBytesY[k];
4225 RoundedUpSwathSizeBytesC[k] = RoundedUpMaxSwathSizeBytesC[k];
4226 } else if (RoundedUpMaxSwathSizeBytesY[k] >= 1.5 * RoundedUpMaxSwathSizeBytesC[k] && RoundedUpMaxSwathSizeBytesY[k] / 2 + RoundedUpMaxSwathSizeBytesC[k] <= DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
4227 p->SwathHeightY[k] = MaximumSwathHeightY[k] / 2;
4228 p->SwathHeightC[k] = MaximumSwathHeightC[k];
4229 RoundedUpSwathSizeBytesY[k] = RoundedUpMaxSwathSizeBytesY[k] / 2;
4230 RoundedUpSwathSizeBytesC[k] = RoundedUpMaxSwathSizeBytesC[k];
4231 } else if (RoundedUpMaxSwathSizeBytesY[k] < 1.5 * RoundedUpMaxSwathSizeBytesC[k] && RoundedUpMaxSwathSizeBytesY[k] + RoundedUpMaxSwathSizeBytesC[k] / 2 <= DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
4232 p->SwathHeightY[k] = MaximumSwathHeightY[k];
4233 p->SwathHeightC[k] = MaximumSwathHeightC[k] / 2;
4234 RoundedUpSwathSizeBytesY[k] = RoundedUpMaxSwathSizeBytesY[k];
4235 RoundedUpSwathSizeBytesC[k] = RoundedUpMaxSwathSizeBytesC[k] / 2;
4236 } else {
4237 p->SwathHeightY[k] = MaximumSwathHeightY[k] / 2;
4238 p->SwathHeightC[k] = MaximumSwathHeightC[k] / 2;
4239 RoundedUpSwathSizeBytesY[k] = RoundedUpMaxSwathSizeBytesY[k] / 2;
4240 RoundedUpSwathSizeBytesC[k] = RoundedUpMaxSwathSizeBytesC[k] / 2;
4241 }
4242
4243 if ((RoundedUpMaxSwathSizeBytesY[k] / 2 + RoundedUpMaxSwathSizeBytesC[k] / 2 > DETBufferSizeInKByteForSwathCalculation * 1024 / 2) ||
4244 p->SwathWidth[k] > p->MaximumSwathWidthLuma[k] || (p->SwathHeightC[k] > 0 && p->SwathWidthChroma[k] > p->MaximumSwathWidthChroma[k])) {
4245 *p->ViewportSizeSupport = false;
4246 p->ViewportSizeSupportPerSurface[k] = false;
4247 } else {
4248 p->ViewportSizeSupportPerSurface[k] = true;
4249 }
4250
4251 if (p->SwathHeightC[k] == 0) {
4252 #ifdef __DML_VBA_DEBUG__
4253 dml_print("DML::%s: k=%u All DET for plane0\n", __func__, k);
4254 #endif
4255 p->DETBufferSizeY[k] = p->DETBufferSizeInKByte[k] * 1024;
4256 p->DETBufferSizeC[k] = 0;
4257 } else if (RoundedUpSwathSizeBytesY[k] <= 1.5 * RoundedUpSwathSizeBytesC[k]) {
4258 #ifdef __DML_VBA_DEBUG__
4259 dml_print("DML::%s: k=%u Half DET for plane0, half for plane1\n", __func__, k);
4260 #endif
4261 p->DETBufferSizeY[k] = p->DETBufferSizeInKByte[k] * 1024 / 2;
4262 p->DETBufferSizeC[k] = p->DETBufferSizeInKByte[k] * 1024 / 2;
4263 } else {
4264 #ifdef __DML_VBA_DEBUG__
4265 dml_print("DML::%s: k=%u 2/3 DET for plane0, 1/3 for plane1\n", __func__, k);
4266 #endif
4267 p->DETBufferSizeY[k] = (dml_uint_t)(dml_floor(p->DETBufferSizeInKByte[k] * 1024 * 2 / 3, 1024));
4268 p->DETBufferSizeC[k] = p->DETBufferSizeInKByte[k] * 1024 - p->DETBufferSizeY[k];
4269 }
4270
4271 #ifdef __DML_VBA_DEBUG__
4272 dml_print("DML::%s: k=%u SwathHeightY = %u\n", __func__, k, p->SwathHeightY[k]);
4273 dml_print("DML::%s: k=%u SwathHeightC = %u\n", __func__, k, p->SwathHeightC[k]);
4274 dml_print("DML::%s: k=%u RoundedUpMaxSwathSizeBytesY = %u\n", __func__, k, RoundedUpMaxSwathSizeBytesY[k]);
4275 dml_print("DML::%s: k=%u RoundedUpMaxSwathSizeBytesC = %u\n", __func__, k, RoundedUpMaxSwathSizeBytesC[k]);
4276 dml_print("DML::%s: k=%u RoundedUpSwathSizeBytesY = %u\n", __func__, k, RoundedUpSwathSizeBytesY[k]);
4277 dml_print("DML::%s: k=%u RoundedUpSwathSizeBytesC = %u\n", __func__, k, RoundedUpSwathSizeBytesC[k]);
4278 dml_print("DML::%s: k=%u DETBufferSizeInKByte = %u\n", __func__, k, p->DETBufferSizeInKByte[k]);
4279 dml_print("DML::%s: k=%u DETBufferSizeY = %u\n", __func__, k, p->DETBufferSizeY[k]);
4280 dml_print("DML::%s: k=%u DETBufferSizeC = %u\n", __func__, k, p->DETBufferSizeC[k]);
4281 dml_print("DML::%s: k=%u ViewportSizeSupportPerSurface = %u\n", __func__, k, p->ViewportSizeSupportPerSurface[k]);
4282 #endif
4283
4284 }
4285
4286 *p->compbuf_reserved_space_64b = 2 * p->PixelChunkSizeInKByte * 1024 / 64;
4287 if (*p->UnboundedRequestEnabled) {
4288 *p->compbuf_reserved_space_64b = dml_max(*p->compbuf_reserved_space_64b,
4289 (dml_float_t)(p->ROBBufferSizeInKByte * 1024/64)
4290 - (dml_float_t)(RoundedUpSwathSizeBytesY[SurfaceDoingUnboundedRequest] * TTUFIFODEPTH / MAXIMUMCOMPRESSION/64));
4291 }
4292 *p->compbuf_reserved_space_zs = 2 * p->PixelChunkSizeInKByte * 1024 / 256;
4293 } // CalculateSwathAndDETConfiguration
4294
CalculateSwathWidth(dml_bool_t ForceSingleDPP,dml_uint_t NumberOfActiveSurfaces,enum dml_source_format_class SourcePixelFormat[],enum dml_rotation_angle SourceScan[],dml_bool_t ViewportStationary[],dml_uint_t ViewportWidth[],dml_uint_t ViewportHeight[],dml_uint_t ViewportXStart[],dml_uint_t ViewportYStart[],dml_uint_t ViewportXStartC[],dml_uint_t ViewportYStartC[],dml_uint_t SurfaceWidthY[],dml_uint_t SurfaceWidthC[],dml_uint_t SurfaceHeightY[],dml_uint_t SurfaceHeightC[],enum dml_odm_mode ODMMode[],dml_uint_t BytePerPixY[],dml_uint_t BytePerPixC[],dml_uint_t Read256BytesBlockHeightY[],dml_uint_t Read256BytesBlockHeightC[],dml_uint_t Read256BytesBlockWidthY[],dml_uint_t Read256BytesBlockWidthC[],dml_uint_t BlendingAndTiming[],dml_uint_t HActive[],dml_float_t HRatio[],dml_uint_t DPPPerSurface[],dml_uint_t SwathWidthSingleDPPY[],dml_uint_t SwathWidthSingleDPPC[],dml_uint_t SwathWidthY[],dml_uint_t SwathWidthC[],dml_uint_t MaximumSwathHeightY[],dml_uint_t MaximumSwathHeightC[],dml_uint_t swath_width_luma_ub[],dml_uint_t swath_width_chroma_ub[])4295 static void CalculateSwathWidth(
4296 dml_bool_t ForceSingleDPP,
4297 dml_uint_t NumberOfActiveSurfaces,
4298 enum dml_source_format_class SourcePixelFormat[],
4299 enum dml_rotation_angle SourceScan[],
4300 dml_bool_t ViewportStationary[],
4301 dml_uint_t ViewportWidth[],
4302 dml_uint_t ViewportHeight[],
4303 dml_uint_t ViewportXStart[],
4304 dml_uint_t ViewportYStart[],
4305 dml_uint_t ViewportXStartC[],
4306 dml_uint_t ViewportYStartC[],
4307 dml_uint_t SurfaceWidthY[],
4308 dml_uint_t SurfaceWidthC[],
4309 dml_uint_t SurfaceHeightY[],
4310 dml_uint_t SurfaceHeightC[],
4311 enum dml_odm_mode ODMMode[],
4312 dml_uint_t BytePerPixY[],
4313 dml_uint_t BytePerPixC[],
4314 dml_uint_t Read256BytesBlockHeightY[],
4315 dml_uint_t Read256BytesBlockHeightC[],
4316 dml_uint_t Read256BytesBlockWidthY[],
4317 dml_uint_t Read256BytesBlockWidthC[],
4318 dml_uint_t BlendingAndTiming[],
4319 dml_uint_t HActive[],
4320 dml_float_t HRatio[],
4321 dml_uint_t DPPPerSurface[],
4322
4323 // Output
4324 dml_uint_t SwathWidthSingleDPPY[],
4325 dml_uint_t SwathWidthSingleDPPC[],
4326 dml_uint_t SwathWidthY[], // per-pipe
4327 dml_uint_t SwathWidthC[], // per-pipe
4328 dml_uint_t MaximumSwathHeightY[],
4329 dml_uint_t MaximumSwathHeightC[],
4330 dml_uint_t swath_width_luma_ub[], // per-pipe
4331 dml_uint_t swath_width_chroma_ub[]) // per-pipe
4332 {
4333 enum dml_odm_mode MainSurfaceODMMode;
4334 dml_uint_t surface_width_ub_l;
4335 dml_uint_t surface_height_ub_l;
4336 dml_uint_t surface_width_ub_c = 0;
4337 dml_uint_t surface_height_ub_c = 0;
4338
4339 #ifdef __DML_VBA_DEBUG__
4340 dml_print("DML::%s: ForceSingleDPP = %u\n", __func__, ForceSingleDPP);
4341 dml_print("DML::%s: NumberOfActiveSurfaces = %u\n", __func__, NumberOfActiveSurfaces);
4342 #endif
4343
4344 for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
4345 if (!dml_is_vertical_rotation(SourceScan[k])) {
4346 SwathWidthSingleDPPY[k] = ViewportWidth[k];
4347 } else {
4348 SwathWidthSingleDPPY[k] = ViewportHeight[k];
4349 }
4350
4351 #ifdef __DML_VBA_DEBUG__
4352 dml_print("DML::%s: k=%u ViewportWidth=%u\n", __func__, k, ViewportWidth[k]);
4353 dml_print("DML::%s: k=%u ViewportHeight=%u\n", __func__, k, ViewportHeight[k]);
4354 dml_print("DML::%s: k=%u DPPPerSurface=%u\n", __func__, k, DPPPerSurface[k]);
4355 #endif
4356
4357 MainSurfaceODMMode = ODMMode[k];
4358 for (dml_uint_t j = 0; j < NumberOfActiveSurfaces; ++j) {
4359 if (BlendingAndTiming[k] == j) {
4360 MainSurfaceODMMode = ODMMode[j];
4361 }
4362 }
4363
4364 if (ForceSingleDPP) {
4365 SwathWidthY[k] = SwathWidthSingleDPPY[k];
4366 } else {
4367 if (MainSurfaceODMMode == dml_odm_mode_combine_4to1) {
4368 SwathWidthY[k] = (dml_uint_t)(dml_min(SwathWidthSingleDPPY[k], dml_round(HActive[k] / 4.0 * HRatio[k], true)));
4369 } else if (MainSurfaceODMMode == dml_odm_mode_combine_2to1) {
4370 SwathWidthY[k] = (dml_uint_t)(dml_min(SwathWidthSingleDPPY[k], dml_round(HActive[k] / 2.0 * HRatio[k], true)));
4371 } else if (DPPPerSurface[k] == 2) {
4372 SwathWidthY[k] = SwathWidthSingleDPPY[k] / 2;
4373 } else {
4374 SwathWidthY[k] = SwathWidthSingleDPPY[k];
4375 }
4376 }
4377
4378 #ifdef __DML_VBA_DEBUG__
4379 dml_print("DML::%s: k=%u HActive=%u\n", __func__, k, HActive[k]);
4380 dml_print("DML::%s: k=%u HRatio=%f\n", __func__, k, HRatio[k]);
4381 dml_print("DML::%s: k=%u MainSurfaceODMMode=%u\n", __func__, k, MainSurfaceODMMode);
4382 dml_print("DML::%s: k=%u SwathWidthSingleDPPY=%u\n", __func__, k, SwathWidthSingleDPPY[k]);
4383 dml_print("DML::%s: k=%u SwathWidthY=%u\n", __func__, k, SwathWidthY[k]);
4384 #endif
4385
4386 if (SourcePixelFormat[k] == dml_420_8 || SourcePixelFormat[k] == dml_420_10 || SourcePixelFormat[k] == dml_420_12) {
4387 SwathWidthC[k] = SwathWidthY[k] / 2;
4388 SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k] / 2;
4389 } else {
4390 SwathWidthC[k] = SwathWidthY[k];
4391 SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k];
4392 }
4393
4394 if (ForceSingleDPP == true) {
4395 SwathWidthY[k] = SwathWidthSingleDPPY[k];
4396 SwathWidthC[k] = SwathWidthSingleDPPC[k];
4397 }
4398
4399 surface_width_ub_l = (dml_uint_t)dml_ceil(SurfaceWidthY[k], Read256BytesBlockWidthY[k]);
4400 surface_height_ub_l = (dml_uint_t)dml_ceil(SurfaceHeightY[k], Read256BytesBlockHeightY[k]);
4401
4402 if (!dml_is_vertical_rotation(SourceScan[k])) {
4403 MaximumSwathHeightY[k] = Read256BytesBlockHeightY[k];
4404 MaximumSwathHeightC[k] = Read256BytesBlockHeightC[k];
4405 if (ViewportStationary[k] && DPPPerSurface[k] == 1) {
4406 swath_width_luma_ub[k] = (dml_uint_t)(dml_min(surface_width_ub_l, dml_floor(ViewportXStart[k] + SwathWidthY[k] + Read256BytesBlockWidthY[k] - 1, Read256BytesBlockWidthY[k]) - dml_floor(ViewportXStart[k], Read256BytesBlockWidthY[k])));
4407 } else {
4408 swath_width_luma_ub[k] = (dml_uint_t)(dml_min(surface_width_ub_l, dml_ceil(SwathWidthY[k] - 1, Read256BytesBlockWidthY[k]) + Read256BytesBlockWidthY[k]));
4409 }
4410 if (BytePerPixC[k] > 0) {
4411 surface_width_ub_c = (dml_uint_t)dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]);
4412 if (ViewportStationary[k] && DPPPerSurface[k] == 1) {
4413 swath_width_chroma_ub[k] = (dml_uint_t)(dml_min(surface_width_ub_c, dml_floor(ViewportXStartC[k] + SwathWidthC[k] + Read256BytesBlockWidthC[k] - 1, Read256BytesBlockWidthC[k]) - dml_floor(ViewportXStartC[k], Read256BytesBlockWidthC[k])));
4414 } else {
4415 swath_width_chroma_ub[k] = (dml_uint_t)(dml_min(surface_width_ub_c, dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockWidthC[k]) + Read256BytesBlockWidthC[k]));
4416 }
4417 } else {
4418 swath_width_chroma_ub[k] = 0;
4419 }
4420 } else {
4421 MaximumSwathHeightY[k] = Read256BytesBlockWidthY[k];
4422 MaximumSwathHeightC[k] = Read256BytesBlockWidthC[k];
4423
4424 if (ViewportStationary[k] && DPPPerSurface[k] == 1) {
4425 swath_width_luma_ub[k] = (dml_uint_t)(dml_min(surface_height_ub_l, dml_floor(ViewportYStart[k] + SwathWidthY[k] + Read256BytesBlockHeightY[k] - 1, Read256BytesBlockHeightY[k]) - dml_floor(ViewportYStart[k], Read256BytesBlockHeightY[k])));
4426 } else {
4427 swath_width_luma_ub[k] = (dml_uint_t)(dml_min(surface_height_ub_l, dml_ceil(SwathWidthY[k] - 1, Read256BytesBlockHeightY[k]) + Read256BytesBlockHeightY[k]));
4428 }
4429 if (BytePerPixC[k] > 0) {
4430 surface_height_ub_c = (dml_uint_t)dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]);
4431 if (ViewportStationary[k] && DPPPerSurface[k] == 1) {
4432 swath_width_chroma_ub[k] = (dml_uint_t)(dml_min(surface_height_ub_c, dml_floor(ViewportYStartC[k] + SwathWidthC[k] + Read256BytesBlockHeightC[k] - 1, Read256BytesBlockHeightC[k]) - dml_floor(ViewportYStartC[k], Read256BytesBlockHeightC[k])));
4433 } else {
4434 swath_width_chroma_ub[k] = (dml_uint_t)(dml_min(surface_height_ub_c, dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockHeightC[k]) + Read256BytesBlockHeightC[k]));
4435 }
4436 } else {
4437 swath_width_chroma_ub[k] = 0;
4438 }
4439 }
4440
4441 #ifdef __DML_VBA_DEBUG__
4442 dml_print("DML::%s: k=%u surface_width_ub_l=%u\n", __func__, k, surface_width_ub_l);
4443 dml_print("DML::%s: k=%u surface_height_ub_l=%u\n", __func__, k, surface_height_ub_l);
4444 dml_print("DML::%s: k=%u surface_width_ub_c=%u\n", __func__, k, surface_width_ub_c);
4445 dml_print("DML::%s: k=%u surface_height_ub_c=%u\n", __func__, k, surface_height_ub_c);
4446 dml_print("DML::%s: k=%u Read256BytesBlockWidthY=%u\n", __func__, k, Read256BytesBlockWidthY[k]);
4447 dml_print("DML::%s: k=%u Read256BytesBlockHeightY=%u\n", __func__, k, Read256BytesBlockHeightY[k]);
4448 dml_print("DML::%s: k=%u Read256BytesBlockWidthC=%u\n", __func__, k, Read256BytesBlockWidthC[k]);
4449 dml_print("DML::%s: k=%u Read256BytesBlockHeightC=%u\n", __func__, k, Read256BytesBlockHeightC[k]);
4450 dml_print("DML::%s: k=%u ViewportStationary=%u\n", __func__, k, ViewportStationary[k]);
4451 dml_print("DML::%s: k=%u DPPPerSurface=%u\n", __func__, k, DPPPerSurface[k]);
4452 dml_print("DML::%s: k=%u swath_width_luma_ub=%u\n", __func__, k, swath_width_luma_ub[k]);
4453 dml_print("DML::%s: k=%u swath_width_chroma_ub=%u\n", __func__, k, swath_width_chroma_ub[k]);
4454 dml_print("DML::%s: k=%u MaximumSwathHeightY=%u\n", __func__, k, MaximumSwathHeightY[k]);
4455 dml_print("DML::%s: k=%u MaximumSwathHeightC=%u\n", __func__, k, MaximumSwathHeightC[k]);
4456 #endif
4457
4458 }
4459 } // CalculateSwathWidth
4460
CalculateExtraLatency(dml_uint_t RoundTripPingLatencyCycles,dml_uint_t ReorderingBytes,dml_float_t DCFCLK,dml_uint_t TotalNumberOfActiveDPP,dml_uint_t PixelChunkSizeInKByte,dml_uint_t TotalNumberOfDCCActiveDPP,dml_uint_t MetaChunkSize,dml_float_t ReturnBW,dml_bool_t GPUVMEnable,dml_bool_t HostVMEnable,dml_uint_t NumberOfActiveSurfaces,dml_uint_t NumberOfDPP[],dml_uint_t dpte_group_bytes[],dml_float_t HostVMInefficiencyFactor,dml_uint_t HostVMMinPageSize,dml_uint_t HostVMMaxNonCachedPageTableLevels)4461 static noinline_for_stack dml_float_t CalculateExtraLatency(
4462 dml_uint_t RoundTripPingLatencyCycles,
4463 dml_uint_t ReorderingBytes,
4464 dml_float_t DCFCLK,
4465 dml_uint_t TotalNumberOfActiveDPP,
4466 dml_uint_t PixelChunkSizeInKByte,
4467 dml_uint_t TotalNumberOfDCCActiveDPP,
4468 dml_uint_t MetaChunkSize,
4469 dml_float_t ReturnBW,
4470 dml_bool_t GPUVMEnable,
4471 dml_bool_t HostVMEnable,
4472 dml_uint_t NumberOfActiveSurfaces,
4473 dml_uint_t NumberOfDPP[],
4474 dml_uint_t dpte_group_bytes[],
4475 dml_float_t HostVMInefficiencyFactor,
4476 dml_uint_t HostVMMinPageSize,
4477 dml_uint_t HostVMMaxNonCachedPageTableLevels)
4478 {
4479 dml_float_t ExtraLatencyBytes;
4480 dml_float_t ExtraLatency;
4481
4482 ExtraLatencyBytes = CalculateExtraLatencyBytes(
4483 ReorderingBytes,
4484 TotalNumberOfActiveDPP,
4485 PixelChunkSizeInKByte,
4486 TotalNumberOfDCCActiveDPP,
4487 MetaChunkSize,
4488 GPUVMEnable,
4489 HostVMEnable,
4490 NumberOfActiveSurfaces,
4491 NumberOfDPP,
4492 dpte_group_bytes,
4493 HostVMInefficiencyFactor,
4494 HostVMMinPageSize,
4495 HostVMMaxNonCachedPageTableLevels);
4496
4497 ExtraLatency = (RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__) / DCFCLK + ExtraLatencyBytes / ReturnBW;
4498
4499 #ifdef __DML_VBA_DEBUG__
4500 dml_print("DML::%s: RoundTripPingLatencyCycles=%u\n", __func__, RoundTripPingLatencyCycles);
4501 dml_print("DML::%s: DCFCLK=%f\n", __func__, DCFCLK);
4502 dml_print("DML::%s: ExtraLatencyBytes=%f\n", __func__, ExtraLatencyBytes);
4503 dml_print("DML::%s: ReturnBW=%f\n", __func__, ReturnBW);
4504 dml_print("DML::%s: ExtraLatency=%f\n", __func__, ExtraLatency);
4505 #endif
4506
4507 return ExtraLatency;
4508 } // CalculateExtraLatency
4509
CalculateHostVMDynamicLevels(dml_bool_t GPUVMEnable,dml_bool_t HostVMEnable,dml_uint_t HostVMMinPageSize,dml_uint_t HostVMMaxNonCachedPageTableLevels)4510 static dml_uint_t CalculateHostVMDynamicLevels(
4511 dml_bool_t GPUVMEnable,
4512 dml_bool_t HostVMEnable,
4513 dml_uint_t HostVMMinPageSize,
4514 dml_uint_t HostVMMaxNonCachedPageTableLevels)
4515 {
4516 dml_uint_t HostVMDynamicLevels = 0;
4517
4518 if (GPUVMEnable && HostVMEnable) {
4519 if (HostVMMinPageSize < 2048)
4520 HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
4521 else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576)
4522 HostVMDynamicLevels = (dml_uint_t) dml_max(0, (dml_float_t) HostVMMaxNonCachedPageTableLevels - 1);
4523 else
4524 HostVMDynamicLevels = (dml_uint_t) dml_max(0, (dml_float_t) HostVMMaxNonCachedPageTableLevels - 2);
4525 } else {
4526 HostVMDynamicLevels = 0;
4527 }
4528 return HostVMDynamicLevels;
4529 }
4530
CalculateExtraLatencyBytes(dml_uint_t ReorderingBytes,dml_uint_t TotalNumberOfActiveDPP,dml_uint_t PixelChunkSizeInKByte,dml_uint_t TotalNumberOfDCCActiveDPP,dml_uint_t MetaChunkSize,dml_bool_t GPUVMEnable,dml_bool_t HostVMEnable,dml_uint_t NumberOfActiveSurfaces,dml_uint_t NumberOfDPP[],dml_uint_t dpte_group_bytes[],dml_float_t HostVMInefficiencyFactor,dml_uint_t HostVMMinPageSize,dml_uint_t HostVMMaxNonCachedPageTableLevels)4531 static dml_uint_t CalculateExtraLatencyBytes(dml_uint_t ReorderingBytes,
4532 dml_uint_t TotalNumberOfActiveDPP,
4533 dml_uint_t PixelChunkSizeInKByte,
4534 dml_uint_t TotalNumberOfDCCActiveDPP,
4535 dml_uint_t MetaChunkSize,
4536 dml_bool_t GPUVMEnable,
4537 dml_bool_t HostVMEnable,
4538 dml_uint_t NumberOfActiveSurfaces,
4539 dml_uint_t NumberOfDPP[],
4540 dml_uint_t dpte_group_bytes[],
4541 dml_float_t HostVMInefficiencyFactor,
4542 dml_uint_t HostVMMinPageSize,
4543 dml_uint_t HostVMMaxNonCachedPageTableLevels)
4544 {
4545 dml_uint_t HostVMDynamicLevels = CalculateHostVMDynamicLevels(GPUVMEnable, HostVMEnable, HostVMMinPageSize, HostVMMaxNonCachedPageTableLevels);
4546 dml_float_t ret = ReorderingBytes + (TotalNumberOfActiveDPP * PixelChunkSizeInKByte + TotalNumberOfDCCActiveDPP * MetaChunkSize) * 1024.0;
4547
4548 if (GPUVMEnable == true) {
4549 for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
4550 ret = ret + NumberOfDPP[k] * dpte_group_bytes[k] * (1 + 8 * HostVMDynamicLevels) * HostVMInefficiencyFactor;
4551 }
4552 }
4553 return (dml_uint_t)(ret);
4554 }
4555
CalculateUrgentLatency(dml_float_t UrgentLatencyPixelDataOnly,dml_float_t UrgentLatencyPixelMixedWithVMData,dml_float_t UrgentLatencyVMDataOnly,dml_bool_t DoUrgentLatencyAdjustment,dml_float_t UrgentLatencyAdjustmentFabricClockComponent,dml_float_t UrgentLatencyAdjustmentFabricClockReference,dml_float_t FabricClock)4556 static dml_float_t CalculateUrgentLatency(
4557 dml_float_t UrgentLatencyPixelDataOnly,
4558 dml_float_t UrgentLatencyPixelMixedWithVMData,
4559 dml_float_t UrgentLatencyVMDataOnly,
4560 dml_bool_t DoUrgentLatencyAdjustment,
4561 dml_float_t UrgentLatencyAdjustmentFabricClockComponent,
4562 dml_float_t UrgentLatencyAdjustmentFabricClockReference,
4563 dml_float_t FabricClock)
4564 {
4565 dml_float_t ret;
4566
4567 ret = dml_max3(UrgentLatencyPixelDataOnly, UrgentLatencyPixelMixedWithVMData, UrgentLatencyVMDataOnly);
4568 if (DoUrgentLatencyAdjustment == true) {
4569 ret = ret + UrgentLatencyAdjustmentFabricClockComponent * (UrgentLatencyAdjustmentFabricClockReference / FabricClock - 1);
4570 }
4571 return ret;
4572 }
4573
RequiredDTBCLK(dml_bool_t DSCEnable,dml_float_t PixelClock,enum dml_output_format_class OutputFormat,dml_float_t OutputBpp,dml_uint_t DSCSlices,dml_uint_t HTotal,dml_uint_t HActive,dml_uint_t AudioRate,dml_uint_t AudioLayout)4574 static dml_float_t RequiredDTBCLK(
4575 dml_bool_t DSCEnable,
4576 dml_float_t PixelClock,
4577 enum dml_output_format_class OutputFormat,
4578 dml_float_t OutputBpp,
4579 dml_uint_t DSCSlices,
4580 dml_uint_t HTotal,
4581 dml_uint_t HActive,
4582 dml_uint_t AudioRate,
4583 dml_uint_t AudioLayout)
4584 {
4585 if (DSCEnable != true) {
4586 return dml_max(PixelClock / 4.0 * OutputBpp / 24.0, 25.0);
4587 } else {
4588 dml_float_t PixelWordRate = PixelClock / (OutputFormat == dml_444 ? 1 : 2);
4589 dml_float_t HCActive = dml_ceil(DSCSlices * dml_ceil(OutputBpp * dml_ceil(HActive / DSCSlices, 1) / 8.0, 1) / 3.0, 1);
4590 dml_float_t HCBlank = 64 + 32 * dml_ceil(AudioRate * (AudioLayout == 1 ? 1 : 0.25) * HTotal / (PixelClock * 1000), 1);
4591 dml_float_t AverageTribyteRate = PixelWordRate * (HCActive + HCBlank) / HTotal;
4592 dml_float_t HActiveTribyteRate = PixelWordRate * HCActive / HActive;
4593 return dml_max4(PixelWordRate / 4.0, AverageTribyteRate / 4.0, HActiveTribyteRate / 4.0, 25.0) * 1.002;
4594 }
4595 }
4596
UseMinimumDCFCLK(struct display_mode_lib_scratch_st * scratch,struct UseMinimumDCFCLK_params_st * p)4597 static void UseMinimumDCFCLK(struct display_mode_lib_scratch_st *scratch, struct UseMinimumDCFCLK_params_st *p)
4598 {
4599 struct UseMinimumDCFCLK_locals_st *s = &scratch->UseMinimumDCFCLK_locals;
4600
4601 s->NormalEfficiency = p->PercentOfIdealSDPPortBWReceivedAfterUrgLatency / 100.0;
4602 for (dml_uint_t j = 0; j < 2; ++j) {
4603
4604
4605 s->TotalMaxPrefetchFlipDPTERowBandwidth[j] = 0;
4606 for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) {
4607 s->TotalMaxPrefetchFlipDPTERowBandwidth[j] = s->TotalMaxPrefetchFlipDPTERowBandwidth[j] + p->NoOfDPP[j][k] * p->DPTEBytesPerRow[j][k] / (15.75 * p->HTotal[k] / p->PixelClock[k]);
4608 }
4609
4610 for (dml_uint_t k = 0; k <= p->NumberOfActiveSurfaces - 1; ++k) {
4611 s->NoOfDPPState[k] = p->NoOfDPP[j][k];
4612 }
4613
4614 s->DPTEBandwidth = s->TotalMaxPrefetchFlipDPTERowBandwidth[j];
4615
4616 s->DCFCLKRequiredForAverageBandwidth = dml_max(p->ProjectedDCFCLKDeepSleep[j], s->DPTEBandwidth / s->NormalEfficiency / p->ReturnBusWidth);
4617
4618 s->ExtraLatencyBytes = CalculateExtraLatencyBytes(p->ReorderingBytes, p->TotalNumberOfActiveDPP[j], p->PixelChunkSizeInKByte, p->TotalNumberOfDCCActiveDPP[j],
4619 p->MetaChunkSize, p->GPUVMEnable, p->HostVMEnable, p->NumberOfActiveSurfaces, s->NoOfDPPState, p->dpte_group_bytes,
4620 1, p->HostVMMinPageSize, p->HostVMMaxNonCachedPageTableLevels);
4621 s->ExtraLatencyCycles = p->RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__ + s->ExtraLatencyBytes / s->NormalEfficiency / p->ReturnBusWidth;
4622 for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) {
4623 dml_float_t DCFCLKCyclesRequiredInPrefetch;
4624 dml_float_t PrefetchTime;
4625
4626 s->PixelDCFCLKCyclesRequiredInPrefetch[k] = (p->PrefetchLinesY[j][k] * p->swath_width_luma_ub_all_states[j][k] * p->BytePerPixelY[k] + p->PrefetchLinesC[j][k] * p->swath_width_chroma_ub_all_states[j][k] * p->BytePerPixelC[k]) / s->NormalEfficiency / p->ReturnBusWidth;
4627 DCFCLKCyclesRequiredInPrefetch = 2 * s->ExtraLatencyCycles / s->NoOfDPPState[k] + p->PDEAndMetaPTEBytesPerFrame[j][k] / s->NormalEfficiency / s->NormalEfficiency / p->ReturnBusWidth * (p->GPUVMMaxPageTableLevels > 2 ? 1 : 0) + 2 * p->DPTEBytesPerRow[j][k] / s->NormalEfficiency / s->NormalEfficiency / p->ReturnBusWidth + 2 * p->MetaRowBytes[j][k] / s->NormalEfficiency / p->ReturnBusWidth + s->PixelDCFCLKCyclesRequiredInPrefetch[k];
4628 s->PrefetchPixelLinesTime[k] = dml_max(p->PrefetchLinesY[j][k], p->PrefetchLinesC[j][k]) * p->HTotal[k] / p->PixelClock[k];
4629 s->DynamicMetadataVMExtraLatency[k] = (p->GPUVMEnable == true && p->DynamicMetadataEnable[k] == true && p->DynamicMetadataVMEnabled == true) ? p->UrgLatency * p->GPUVMMaxPageTableLevels * (p->HostVMEnable == true ? p->HostVMMaxNonCachedPageTableLevels + 1 : 1) : 0;
4630
4631 s->MinimumTWait = CalculateTWait(p->MaxPrefetchMode,
4632 p->UseMALLForPStateChange[k],
4633 p->SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
4634 p->DRRDisplay[k],
4635 p->DRAMClockChangeLatencyFinal,
4636 p->FCLKChangeLatency,
4637 p->UrgLatency,
4638 p->SREnterPlusExitTime);
4639
4640 PrefetchTime = (p->MaximumVStartup[j][k] - 1) * p->HTotal[k] / p->PixelClock[k] - s->MinimumTWait - p->UrgLatency * ((p->GPUVMMaxPageTableLevels <= 2 ? p->GPUVMMaxPageTableLevels : p->GPUVMMaxPageTableLevels - 2) * (p->HostVMEnable == true ? p->HostVMMaxNonCachedPageTableLevels + 1 : 1) - 1) - s->DynamicMetadataVMExtraLatency[k];
4641
4642 if (PrefetchTime > 0) {
4643 dml_float_t ExpectedVRatioPrefetch;
4644 ExpectedVRatioPrefetch = s->PrefetchPixelLinesTime[k] / (PrefetchTime * s->PixelDCFCLKCyclesRequiredInPrefetch[k] / DCFCLKCyclesRequiredInPrefetch);
4645 s->DCFCLKRequiredForPeakBandwidthPerSurface[k] = s->NoOfDPPState[k] * s->PixelDCFCLKCyclesRequiredInPrefetch[k] / s->PrefetchPixelLinesTime[k] * dml_max(1.0, ExpectedVRatioPrefetch) * dml_max(1.0, ExpectedVRatioPrefetch / 4);
4646 if (p->HostVMEnable == true || p->ImmediateFlipRequirement == true) {
4647 s->DCFCLKRequiredForPeakBandwidthPerSurface[k] = s->DCFCLKRequiredForPeakBandwidthPerSurface[k] + s->NoOfDPPState[k] * s->DPTEBandwidth / s->NormalEfficiency / s->NormalEfficiency / p->ReturnBusWidth;
4648 }
4649 } else {
4650 s->DCFCLKRequiredForPeakBandwidthPerSurface[k] = p->DCFCLKPerState;
4651 }
4652 if (p->DynamicMetadataEnable[k] == true) {
4653 dml_float_t TSetupPipe;
4654 dml_float_t TdmbfPipe;
4655 dml_float_t TdmsksPipe;
4656 dml_float_t TdmecPipe;
4657 dml_float_t AllowedTimeForUrgentExtraLatency;
4658
4659 CalculateVUpdateAndDynamicMetadataParameters(
4660 p->MaxInterDCNTileRepeaters,
4661 p->RequiredDPPCLKPerSurface[j][k],
4662 p->RequiredDISPCLK[j],
4663 p->ProjectedDCFCLKDeepSleep[j],
4664 p->PixelClock[k],
4665 p->HTotal[k],
4666 p->VTotal[k] - p->VActive[k],
4667 p->DynamicMetadataTransmittedBytes[k],
4668 p->DynamicMetadataLinesBeforeActiveRequired[k],
4669 p->Interlace[k],
4670 p->ProgressiveToInterlaceUnitInOPP,
4671
4672 // Output
4673 &TSetupPipe,
4674 &TdmbfPipe,
4675 &TdmecPipe,
4676 &TdmsksPipe,
4677 &s->dummy1,
4678 &s->dummy2,
4679 &s->dummy3);
4680
4681 AllowedTimeForUrgentExtraLatency = p->MaximumVStartup[j][k] * p->HTotal[k] / p->PixelClock[k] - s->MinimumTWait - TSetupPipe - TdmbfPipe - TdmecPipe - TdmsksPipe - s->DynamicMetadataVMExtraLatency[k];
4682 if (AllowedTimeForUrgentExtraLatency > 0) {
4683 s->DCFCLKRequiredForPeakBandwidthPerSurface[k] = dml_max(s->DCFCLKRequiredForPeakBandwidthPerSurface[k], s->ExtraLatencyCycles / AllowedTimeForUrgentExtraLatency);
4684 } else {
4685 s->DCFCLKRequiredForPeakBandwidthPerSurface[k] = p->DCFCLKPerState;
4686 }
4687 }
4688 }
4689 s->DCFCLKRequiredForPeakBandwidth = 0;
4690 for (dml_uint_t k = 0; k <= p->NumberOfActiveSurfaces - 1; ++k) {
4691 s->DCFCLKRequiredForPeakBandwidth = s->DCFCLKRequiredForPeakBandwidth + s->DCFCLKRequiredForPeakBandwidthPerSurface[k];
4692 }
4693 s->MinimumTvmPlus2Tr0 = p->UrgLatency * (p->GPUVMEnable == true ? (p->HostVMEnable == true ? (p->GPUVMMaxPageTableLevels + 2) * (p->HostVMMaxNonCachedPageTableLevels + 1) - 1 : p->GPUVMMaxPageTableLevels + 1) : 0);
4694 for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) {
4695 dml_float_t MaximumTvmPlus2Tr0PlusTsw;
4696 MaximumTvmPlus2Tr0PlusTsw = (p->MaximumVStartup[j][k] - 2) * p->HTotal[k] / p->PixelClock[k] - s->MinimumTWait - s->DynamicMetadataVMExtraLatency[k];
4697 if (MaximumTvmPlus2Tr0PlusTsw <= s->MinimumTvmPlus2Tr0 + s->PrefetchPixelLinesTime[k] / 4) {
4698 s->DCFCLKRequiredForPeakBandwidth = p->DCFCLKPerState;
4699 } else {
4700 s->DCFCLKRequiredForPeakBandwidth = dml_max3(s->DCFCLKRequiredForPeakBandwidth,
4701 2 * s->ExtraLatencyCycles / (MaximumTvmPlus2Tr0PlusTsw - s->MinimumTvmPlus2Tr0 - s->PrefetchPixelLinesTime[k] / 4),
4702 (2 * s->ExtraLatencyCycles + s->PixelDCFCLKCyclesRequiredInPrefetch[k]) / (MaximumTvmPlus2Tr0PlusTsw - s->MinimumTvmPlus2Tr0));
4703 }
4704 }
4705 p->DCFCLKState[j] = dml_min(p->DCFCLKPerState, 1.05 * dml_max(s->DCFCLKRequiredForAverageBandwidth, s->DCFCLKRequiredForPeakBandwidth));
4706 }
4707 }
4708
4709
UnboundedRequest(enum dml_unbounded_requesting_policy UseUnboundedRequestingFinal,dml_uint_t TotalNumberOfActiveDPP,dml_bool_t NoChromaOrLinear,enum dml_output_encoder_class Output)4710 static dml_bool_t UnboundedRequest(enum dml_unbounded_requesting_policy UseUnboundedRequestingFinal,
4711 dml_uint_t TotalNumberOfActiveDPP,
4712 dml_bool_t NoChromaOrLinear,
4713 enum dml_output_encoder_class Output)
4714 {
4715 dml_bool_t ret_val = false;
4716
4717 ret_val = (UseUnboundedRequestingFinal != dml_unbounded_requesting_disable
4718 && TotalNumberOfActiveDPP == 1 && NoChromaOrLinear);
4719 if (UseUnboundedRequestingFinal == dml_unbounded_requesting_edp_only && Output != dml_edp) {
4720 ret_val = false;
4721 }
4722 return (ret_val);
4723 }
4724
CalculateSurfaceSizeInMall(dml_uint_t NumberOfActiveSurfaces,dml_uint_t MALLAllocatedForDCN,enum dml_use_mall_for_static_screen_mode UseMALLForStaticScreen[],dml_bool_t DCCEnable[],dml_bool_t ViewportStationary[],dml_uint_t ViewportXStartY[],dml_uint_t ViewportYStartY[],dml_uint_t ViewportXStartC[],dml_uint_t ViewportYStartC[],dml_uint_t ViewportWidthY[],dml_uint_t ViewportHeightY[],dml_uint_t BytesPerPixelY[],dml_uint_t ViewportWidthC[],dml_uint_t ViewportHeightC[],dml_uint_t BytesPerPixelC[],dml_uint_t SurfaceWidthY[],dml_uint_t SurfaceWidthC[],dml_uint_t SurfaceHeightY[],dml_uint_t SurfaceHeightC[],dml_uint_t Read256BytesBlockWidthY[],dml_uint_t Read256BytesBlockWidthC[],dml_uint_t Read256BytesBlockHeightY[],dml_uint_t Read256BytesBlockHeightC[],dml_uint_t ReadBlockWidthY[],dml_uint_t ReadBlockWidthC[],dml_uint_t ReadBlockHeightY[],dml_uint_t ReadBlockHeightC[],dml_uint_t SurfaceSizeInMALL[],dml_bool_t * ExceededMALLSize)4725 static void CalculateSurfaceSizeInMall(
4726 dml_uint_t NumberOfActiveSurfaces,
4727 dml_uint_t MALLAllocatedForDCN,
4728 enum dml_use_mall_for_static_screen_mode UseMALLForStaticScreen[],
4729 dml_bool_t DCCEnable[],
4730 dml_bool_t ViewportStationary[],
4731 dml_uint_t ViewportXStartY[],
4732 dml_uint_t ViewportYStartY[],
4733 dml_uint_t ViewportXStartC[],
4734 dml_uint_t ViewportYStartC[],
4735 dml_uint_t ViewportWidthY[],
4736 dml_uint_t ViewportHeightY[],
4737 dml_uint_t BytesPerPixelY[],
4738 dml_uint_t ViewportWidthC[],
4739 dml_uint_t ViewportHeightC[],
4740 dml_uint_t BytesPerPixelC[],
4741 dml_uint_t SurfaceWidthY[],
4742 dml_uint_t SurfaceWidthC[],
4743 dml_uint_t SurfaceHeightY[],
4744 dml_uint_t SurfaceHeightC[],
4745 dml_uint_t Read256BytesBlockWidthY[],
4746 dml_uint_t Read256BytesBlockWidthC[],
4747 dml_uint_t Read256BytesBlockHeightY[],
4748 dml_uint_t Read256BytesBlockHeightC[],
4749 dml_uint_t ReadBlockWidthY[],
4750 dml_uint_t ReadBlockWidthC[],
4751 dml_uint_t ReadBlockHeightY[],
4752 dml_uint_t ReadBlockHeightC[],
4753
4754 // Output
4755 dml_uint_t SurfaceSizeInMALL[],
4756 dml_bool_t *ExceededMALLSize)
4757 {
4758 dml_uint_t TotalSurfaceSizeInMALL = 0;
4759
4760 for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
4761 if (ViewportStationary[k]) {
4762 SurfaceSizeInMALL[k] = (dml_uint_t)(dml_min(dml_ceil(SurfaceWidthY[k], ReadBlockWidthY[k]), dml_floor(ViewportXStartY[k] + ViewportWidthY[k] + ReadBlockWidthY[k] - 1, ReadBlockWidthY[k]) - dml_floor(ViewportXStartY[k], ReadBlockWidthY[k])) *
4763 dml_min(dml_ceil(SurfaceHeightY[k], ReadBlockHeightY[k]), dml_floor(ViewportYStartY[k] + ViewportHeightY[k] + ReadBlockHeightY[k] - 1, ReadBlockHeightY[k]) - dml_floor(ViewportYStartY[k], ReadBlockHeightY[k])) *
4764 BytesPerPixelY[k]);
4765
4766 if (ReadBlockWidthC[k] > 0) {
4767 SurfaceSizeInMALL[k] = (dml_uint_t)(SurfaceSizeInMALL[k] +
4768 dml_min(dml_ceil(SurfaceWidthC[k], ReadBlockWidthC[k]), dml_floor(ViewportXStartC[k] + ViewportWidthC[k] + ReadBlockWidthC[k] - 1, ReadBlockWidthC[k]) - dml_floor(ViewportXStartC[k], ReadBlockWidthC[k])) *
4769 dml_min(dml_ceil(SurfaceHeightC[k], ReadBlockHeightC[k]), dml_floor(ViewportYStartC[k] + ViewportHeightC[k] + ReadBlockHeightC[k] - 1, ReadBlockHeightC[k]) - dml_floor(ViewportYStartC[k], ReadBlockHeightC[k])) * BytesPerPixelC[k]);
4770 }
4771 if (DCCEnable[k] == true) {
4772 SurfaceSizeInMALL[k] = (dml_uint_t)(SurfaceSizeInMALL[k] +
4773 dml_min(dml_ceil(SurfaceWidthY[k], 8 * Read256BytesBlockWidthY[k]), dml_floor(ViewportXStartY[k] + ViewportWidthY[k] + 8 * Read256BytesBlockWidthY[k] - 1, 8 * Read256BytesBlockWidthY[k]) - dml_floor(ViewportXStartY[k], 8 * Read256BytesBlockWidthY[k])) *
4774 dml_min(dml_ceil(SurfaceHeightY[k], 8 * Read256BytesBlockHeightY[k]), dml_floor(ViewportYStartY[k] + ViewportHeightY[k] + 8 * Read256BytesBlockHeightY[k] - 1, 8 * Read256BytesBlockHeightY[k]) - dml_floor(ViewportYStartY[k], 8 * Read256BytesBlockHeightY[k])) * BytesPerPixelY[k] / 256);
4775 if (Read256BytesBlockWidthC[k] > 0) {
4776 SurfaceSizeInMALL[k] = (dml_uint_t)(SurfaceSizeInMALL[k] +
4777 dml_min(dml_ceil(SurfaceWidthC[k], 8 * Read256BytesBlockWidthC[k]), dml_floor(ViewportXStartC[k] + ViewportWidthC[k] + 8 * Read256BytesBlockWidthC[k] - 1, 8 * Read256BytesBlockWidthC[k]) - dml_floor(ViewportXStartC[k], 8 * Read256BytesBlockWidthC[k])) *
4778 dml_min(dml_ceil(SurfaceHeightC[k], 8 * Read256BytesBlockHeightC[k]), dml_floor(ViewportYStartC[k] + ViewportHeightC[k] + 8 * Read256BytesBlockHeightC[k] - 1, 8 * Read256BytesBlockHeightC[k]) - dml_floor(ViewportYStartC[k], 8 * Read256BytesBlockHeightC[k])) * BytesPerPixelC[k] / 256);
4779 }
4780 }
4781 } else {
4782 SurfaceSizeInMALL[k] = (dml_uint_t)(dml_ceil(dml_min(SurfaceWidthY[k], ViewportWidthY[k] + ReadBlockWidthY[k] - 1), ReadBlockWidthY[k]) * dml_ceil(dml_min(SurfaceHeightY[k], ViewportHeightY[k] + ReadBlockHeightY[k] - 1), ReadBlockHeightY[k]) * BytesPerPixelY[k]);
4783 if (ReadBlockWidthC[k] > 0) {
4784 SurfaceSizeInMALL[k] = (dml_uint_t)(SurfaceSizeInMALL[k] +
4785 dml_ceil(dml_min(SurfaceWidthC[k], ViewportWidthC[k] + ReadBlockWidthC[k] - 1), ReadBlockWidthC[k]) *
4786 dml_ceil(dml_min(SurfaceHeightC[k], ViewportHeightC[k] + ReadBlockHeightC[k] - 1), ReadBlockHeightC[k]) * BytesPerPixelC[k]);
4787 }
4788 if (DCCEnable[k] == true) {
4789 SurfaceSizeInMALL[k] = (dml_uint_t)(SurfaceSizeInMALL[k] +
4790 dml_ceil(dml_min(SurfaceWidthY[k], ViewportWidthY[k] + 8 * Read256BytesBlockWidthY[k] - 1), 8 * Read256BytesBlockWidthY[k]) *
4791 dml_ceil(dml_min(SurfaceHeightY[k], ViewportHeightY[k] + 8 * Read256BytesBlockHeightY[k] - 1), 8 * Read256BytesBlockHeightY[k]) * BytesPerPixelY[k] / 256);
4792
4793 if (Read256BytesBlockWidthC[k] > 0) {
4794 SurfaceSizeInMALL[k] = (dml_uint_t)(SurfaceSizeInMALL[k] +
4795 dml_ceil(dml_min(SurfaceWidthC[k], ViewportWidthC[k] + 8 * Read256BytesBlockWidthC[k] - 1), 8 * Read256BytesBlockWidthC[k]) *
4796 dml_ceil(dml_min(SurfaceHeightC[k], ViewportHeightC[k] + 8 * Read256BytesBlockHeightC[k] - 1), 8 * Read256BytesBlockHeightC[k]) * BytesPerPixelC[k] / 256);
4797 }
4798 }
4799 }
4800 }
4801
4802 for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
4803 if (UseMALLForStaticScreen[k] == dml_use_mall_static_screen_enable)
4804 TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k];
4805 }
4806 *ExceededMALLSize = (TotalSurfaceSizeInMALL > MALLAllocatedForDCN * 1024 * 1024);
4807 } // CalculateSurfaceSizeInMall
4808
CalculateDETBufferSize(dml_uint_t DETSizeOverride[],enum dml_use_mall_for_pstate_change_mode UseMALLForPStateChange[],dml_bool_t ForceSingleDPP,dml_uint_t NumberOfActiveSurfaces,dml_bool_t UnboundedRequestEnabled,dml_uint_t nomDETInKByte,dml_uint_t MaxTotalDETInKByte,dml_uint_t ConfigReturnBufferSizeInKByte,dml_uint_t MinCompressedBufferSizeInKByte,dml_uint_t ConfigReturnBufferSegmentSizeInkByte,dml_uint_t CompressedBufferSegmentSizeInkByteFinal,enum dml_source_format_class SourcePixelFormat[],dml_float_t ReadBandwidthLuma[],dml_float_t ReadBandwidthChroma[],dml_uint_t RoundedUpMaxSwathSizeBytesY[],dml_uint_t RoundedUpMaxSwathSizeBytesC[],dml_uint_t DPPPerSurface[],dml_uint_t DETBufferSizeInKByte[],dml_uint_t * CompressedBufferSizeInkByte)4809 static void CalculateDETBufferSize(
4810 dml_uint_t DETSizeOverride[],
4811 enum dml_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
4812 dml_bool_t ForceSingleDPP,
4813 dml_uint_t NumberOfActiveSurfaces,
4814 dml_bool_t UnboundedRequestEnabled,
4815 dml_uint_t nomDETInKByte,
4816 dml_uint_t MaxTotalDETInKByte,
4817 dml_uint_t ConfigReturnBufferSizeInKByte,
4818 dml_uint_t MinCompressedBufferSizeInKByte,
4819 dml_uint_t ConfigReturnBufferSegmentSizeInkByte,
4820 dml_uint_t CompressedBufferSegmentSizeInkByteFinal,
4821 enum dml_source_format_class SourcePixelFormat[],
4822 dml_float_t ReadBandwidthLuma[],
4823 dml_float_t ReadBandwidthChroma[],
4824 dml_uint_t RoundedUpMaxSwathSizeBytesY[],
4825 dml_uint_t RoundedUpMaxSwathSizeBytesC[],
4826 dml_uint_t DPPPerSurface[],
4827 // Output
4828 dml_uint_t DETBufferSizeInKByte[],
4829 dml_uint_t *CompressedBufferSizeInkByte)
4830 {
4831 dml_uint_t DETBufferSizePoolInKByte;
4832 dml_uint_t NextDETBufferPieceInKByte;
4833 dml_bool_t DETPieceAssignedToThisSurfaceAlready[__DML_NUM_PLANES__];
4834 dml_bool_t NextPotentialSurfaceToAssignDETPieceFound;
4835 dml_uint_t NextSurfaceToAssignDETPiece;
4836 dml_float_t TotalBandwidth;
4837 dml_float_t BandwidthOfSurfacesNotAssignedDETPiece;
4838 dml_uint_t max_minDET;
4839 dml_uint_t minDET;
4840 dml_uint_t minDET_pipe;
4841
4842 #ifdef __DML_VBA_DEBUG__
4843 dml_print("DML::%s: ForceSingleDPP = %u\n", __func__, ForceSingleDPP);
4844 dml_print("DML::%s: nomDETInKByte = %u\n", __func__, nomDETInKByte);
4845 dml_print("DML::%s: NumberOfActiveSurfaces = %u\n", __func__, NumberOfActiveSurfaces);
4846 dml_print("DML::%s: UnboundedRequestEnabled = %u\n", __func__, UnboundedRequestEnabled);
4847 dml_print("DML::%s: MaxTotalDETInKByte = %u\n", __func__, MaxTotalDETInKByte);
4848 dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %u\n", __func__, ConfigReturnBufferSizeInKByte);
4849 dml_print("DML::%s: MinCompressedBufferSizeInKByte = %u\n", __func__, MinCompressedBufferSizeInKByte);
4850 dml_print("DML::%s: CompressedBufferSegmentSizeInkByteFinal = %u\n", __func__, CompressedBufferSegmentSizeInkByteFinal);
4851 #endif
4852
4853 // Note: Will use default det size if that fits 2 swaths
4854 if (UnboundedRequestEnabled) {
4855 if (DETSizeOverride[0] > 0) {
4856 DETBufferSizeInKByte[0] = DETSizeOverride[0];
4857 } else {
4858 DETBufferSizeInKByte[0] = (dml_uint_t) dml_max(128.0, dml_ceil(2.0 * ((dml_float_t) RoundedUpMaxSwathSizeBytesY[0] + (dml_float_t) RoundedUpMaxSwathSizeBytesC[0]) / 1024.0, ConfigReturnBufferSegmentSizeInkByte));
4859 }
4860 *CompressedBufferSizeInkByte = ConfigReturnBufferSizeInKByte - DETBufferSizeInKByte[0];
4861 } else {
4862 DETBufferSizePoolInKByte = MaxTotalDETInKByte;
4863 for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
4864 DETBufferSizeInKByte[k] = 0;
4865 if (SourcePixelFormat[k] == dml_420_8 || SourcePixelFormat[k] == dml_420_10 || SourcePixelFormat[k] == dml_420_12) {
4866 max_minDET = nomDETInKByte - ConfigReturnBufferSegmentSizeInkByte;
4867 } else {
4868 max_minDET = nomDETInKByte;
4869 }
4870 minDET = 128;
4871 minDET_pipe = 0;
4872
4873 // add DET resource until can hold 2 full swaths
4874 while (minDET <= max_minDET && minDET_pipe == 0) {
4875 if (2.0 * ((dml_float_t) RoundedUpMaxSwathSizeBytesY[k] + (dml_float_t) RoundedUpMaxSwathSizeBytesC[k]) / 1024.0 <= minDET)
4876 minDET_pipe = minDET;
4877 minDET = minDET + ConfigReturnBufferSegmentSizeInkByte;
4878 }
4879
4880 #ifdef __DML_VBA_DEBUG__
4881 dml_print("DML::%s: k=%u minDET = %u\n", __func__, k, minDET);
4882 dml_print("DML::%s: k=%u max_minDET = %u\n", __func__, k, max_minDET);
4883 dml_print("DML::%s: k=%u minDET_pipe = %u\n", __func__, k, minDET_pipe);
4884 dml_print("DML::%s: k=%u RoundedUpMaxSwathSizeBytesY = %u\n", __func__, k, RoundedUpMaxSwathSizeBytesY[k]);
4885 dml_print("DML::%s: k=%u RoundedUpMaxSwathSizeBytesC = %u\n", __func__, k, RoundedUpMaxSwathSizeBytesC[k]);
4886 #endif
4887
4888 if (minDET_pipe == 0) {
4889 minDET_pipe = (dml_uint_t)(dml_max(128, dml_ceil(((dml_float_t)RoundedUpMaxSwathSizeBytesY[k] + (dml_float_t)RoundedUpMaxSwathSizeBytesC[k]) / 1024.0, ConfigReturnBufferSegmentSizeInkByte)));
4890 #ifdef __DML_VBA_DEBUG__
4891 dml_print("DML::%s: k=%u minDET_pipe = %u (assume each plane take half DET)\n", __func__, k, minDET_pipe);
4892 #endif
4893 }
4894
4895 if (UseMALLForPStateChange[k] == dml_use_mall_pstate_change_phantom_pipe) {
4896 DETBufferSizeInKByte[k] = 0;
4897 } else if (DETSizeOverride[k] > 0) {
4898 DETBufferSizeInKByte[k] = DETSizeOverride[k];
4899 DETBufferSizePoolInKByte = DETBufferSizePoolInKByte - (ForceSingleDPP ? 1 : DPPPerSurface[k]) * DETSizeOverride[k];
4900 } else if ((ForceSingleDPP ? 1 : DPPPerSurface[k]) * minDET_pipe <= DETBufferSizePoolInKByte) {
4901 DETBufferSizeInKByte[k] = minDET_pipe;
4902 DETBufferSizePoolInKByte = DETBufferSizePoolInKByte - (ForceSingleDPP ? 1 : DPPPerSurface[k]) * minDET_pipe;
4903 }
4904
4905 #ifdef __DML_VBA_DEBUG__
4906 dml_print("DML::%s: k=%u DPPPerSurface = %u\n", __func__, k, DPPPerSurface[k]);
4907 dml_print("DML::%s: k=%u DETSizeOverride = %u\n", __func__, k, DETSizeOverride[k]);
4908 dml_print("DML::%s: k=%u DETBufferSizeInKByte = %u\n", __func__, k, DETBufferSizeInKByte[k]);
4909 dml_print("DML::%s: DETBufferSizePoolInKByte = %u\n", __func__, DETBufferSizePoolInKByte);
4910 #endif
4911 }
4912
4913 TotalBandwidth = 0;
4914 for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
4915 if (UseMALLForPStateChange[k] != dml_use_mall_pstate_change_phantom_pipe)
4916 TotalBandwidth = TotalBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k];
4917 }
4918 #ifdef __DML_VBA_DEBUG__
4919 dml_print("DML::%s: --- Before bandwidth adjustment ---\n", __func__);
4920 for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
4921 dml_print("DML::%s: k=%u DETBufferSizeInKByte = %u\n", __func__, k, DETBufferSizeInKByte[k]);
4922 }
4923 dml_print("DML::%s: --- DET allocation with bandwidth ---\n", __func__);
4924 #endif
4925 dml_print("DML::%s: TotalBandwidth = %f\n", __func__, TotalBandwidth);
4926 BandwidthOfSurfacesNotAssignedDETPiece = TotalBandwidth;
4927 for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
4928
4929 if (UseMALLForPStateChange[k] == dml_use_mall_pstate_change_phantom_pipe) {
4930 DETPieceAssignedToThisSurfaceAlready[k] = true;
4931 } else if (DETSizeOverride[k] > 0 || (((dml_float_t) (ForceSingleDPP ? 1 : DPPPerSurface[k]) * (dml_float_t) DETBufferSizeInKByte[k] / (dml_float_t) MaxTotalDETInKByte) >= ((ReadBandwidthLuma[k] + ReadBandwidthChroma[k]) / TotalBandwidth))) {
4932 DETPieceAssignedToThisSurfaceAlready[k] = true;
4933 BandwidthOfSurfacesNotAssignedDETPiece = BandwidthOfSurfacesNotAssignedDETPiece - ReadBandwidthLuma[k] - ReadBandwidthChroma[k];
4934 } else {
4935 DETPieceAssignedToThisSurfaceAlready[k] = false;
4936 }
4937 #ifdef __DML_VBA_DEBUG__
4938 dml_print("DML::%s: k=%u DETPieceAssignedToThisSurfaceAlready = %u\n", __func__, k, DETPieceAssignedToThisSurfaceAlready[k]);
4939 dml_print("DML::%s: k=%u BandwidthOfSurfacesNotAssignedDETPiece = %f\n", __func__, k, BandwidthOfSurfacesNotAssignedDETPiece);
4940 #endif
4941 }
4942
4943 for (dml_uint_t j = 0; j < NumberOfActiveSurfaces; ++j) {
4944 NextPotentialSurfaceToAssignDETPieceFound = false;
4945 NextSurfaceToAssignDETPiece = 0;
4946
4947 for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
4948 #ifdef __DML_VBA_DEBUG__
4949 dml_print("DML::%s: j=%u k=%u, ReadBandwidthLuma[k] = %f\n", __func__, j, k, ReadBandwidthLuma[k]);
4950 dml_print("DML::%s: j=%u k=%u, ReadBandwidthChroma[k] = %f\n", __func__, j, k, ReadBandwidthChroma[k]);
4951 dml_print("DML::%s: j=%u k=%u, ReadBandwidthLuma[Next] = %f\n", __func__, j, k, ReadBandwidthLuma[NextSurfaceToAssignDETPiece]);
4952 dml_print("DML::%s: j=%u k=%u, ReadBandwidthChroma[Next] = %f\n", __func__, j, k, ReadBandwidthChroma[NextSurfaceToAssignDETPiece]);
4953 dml_print("DML::%s: j=%u k=%u, NextSurfaceToAssignDETPiece = %u\n", __func__, j, k, NextSurfaceToAssignDETPiece);
4954 #endif
4955 if (!DETPieceAssignedToThisSurfaceAlready[k] && (!NextPotentialSurfaceToAssignDETPieceFound ||
4956 ReadBandwidthLuma[k] + ReadBandwidthChroma[k] < ReadBandwidthLuma[NextSurfaceToAssignDETPiece] + ReadBandwidthChroma[NextSurfaceToAssignDETPiece])) {
4957 NextSurfaceToAssignDETPiece = k;
4958 NextPotentialSurfaceToAssignDETPieceFound = true;
4959 }
4960 #ifdef __DML_VBA_DEBUG__
4961 dml_print("DML::%s: j=%u k=%u, DETPieceAssignedToThisSurfaceAlready = %u\n", __func__, j, k, DETPieceAssignedToThisSurfaceAlready[k]);
4962 dml_print("DML::%s: j=%u k=%u, NextPotentialSurfaceToAssignDETPieceFound = %u\n", __func__, j, k, NextPotentialSurfaceToAssignDETPieceFound);
4963 #endif
4964 }
4965
4966 if (NextPotentialSurfaceToAssignDETPieceFound) {
4967 // Note: To show the banker's rounding behavior in VBA and also the fact that the DET buffer size varies due to precision issue
4968 //
4969 //dml_float_t tmp1 = ((dml_float_t) DETBufferSizePoolInKByte * (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] + ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) / BandwidthOfSurfacesNotAssignedDETPiece /
4970 // ((ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0));
4971 //dml_float_t tmp2 = dml_round((dml_float_t) DETBufferSizePoolInKByte * (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] + ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) / BandwidthOfSurfacesNotAssignedDETPiece /
4972 // ((ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0));
4973 //
4974 //dml_print("DML::%s: j=%u, tmp1 = %f\n", __func__, j, tmp1);
4975 //dml_print("DML::%s: j=%u, tmp2 = %f\n", __func__, j, tmp2);
4976
4977 NextDETBufferPieceInKByte = (dml_uint_t)(dml_min(
4978 dml_round((dml_float_t) DETBufferSizePoolInKByte * (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] + ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) / BandwidthOfSurfacesNotAssignedDETPiece /
4979 ((ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]) * ConfigReturnBufferSegmentSizeInkByte), true)
4980 * (ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]) * ConfigReturnBufferSegmentSizeInkByte,
4981 dml_floor((dml_float_t) DETBufferSizePoolInKByte, (ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]) * ConfigReturnBufferSegmentSizeInkByte)));
4982
4983 #ifdef __DML_VBA_DEBUG__
4984 dml_print("DML::%s: j=%u, DETBufferSizePoolInKByte = %u\n", __func__, j, DETBufferSizePoolInKByte);
4985 dml_print("DML::%s: j=%u, NextSurfaceToAssignDETPiece = %u\n", __func__, j, NextSurfaceToAssignDETPiece);
4986 dml_print("DML::%s: j=%u, ReadBandwidthLuma[%u] = %f\n", __func__, j, NextSurfaceToAssignDETPiece, ReadBandwidthLuma[NextSurfaceToAssignDETPiece]);
4987 dml_print("DML::%s: j=%u, ReadBandwidthChroma[%u] = %f\n", __func__, j, NextSurfaceToAssignDETPiece, ReadBandwidthChroma[NextSurfaceToAssignDETPiece]);
4988 dml_print("DML::%s: j=%u, BandwidthOfSurfacesNotAssignedDETPiece = %f\n", __func__, j, BandwidthOfSurfacesNotAssignedDETPiece);
4989 dml_print("DML::%s: j=%u, NextDETBufferPieceInKByte = %u\n", __func__, j, NextDETBufferPieceInKByte);
4990 dml_print("DML::%s: j=%u, DETBufferSizeInKByte[%u] increases from %u ", __func__, j, NextSurfaceToAssignDETPiece, DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]);
4991 #endif
4992
4993 DETBufferSizeInKByte[NextSurfaceToAssignDETPiece] = DETBufferSizeInKByte[NextSurfaceToAssignDETPiece] + NextDETBufferPieceInKByte / (ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]);
4994 #ifdef __DML_VBA_DEBUG__
4995 dml_print("to %u\n", DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]);
4996 #endif
4997
4998 DETBufferSizePoolInKByte = DETBufferSizePoolInKByte - NextDETBufferPieceInKByte;
4999 DETPieceAssignedToThisSurfaceAlready[NextSurfaceToAssignDETPiece] = true;
5000 BandwidthOfSurfacesNotAssignedDETPiece = BandwidthOfSurfacesNotAssignedDETPiece - (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] + ReadBandwidthChroma[NextSurfaceToAssignDETPiece]);
5001 }
5002 }
5003 *CompressedBufferSizeInkByte = MinCompressedBufferSizeInKByte;
5004 }
5005 *CompressedBufferSizeInkByte = *CompressedBufferSizeInkByte * CompressedBufferSegmentSizeInkByteFinal / ConfigReturnBufferSegmentSizeInkByte;
5006
5007 #ifdef __DML_VBA_DEBUG__
5008 dml_print("DML::%s: --- After bandwidth adjustment ---\n", __func__);
5009 dml_print("DML::%s: CompressedBufferSizeInkByte = %u\n", __func__, *CompressedBufferSizeInkByte);
5010 for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
5011 dml_print("DML::%s: k=%u DETBufferSizeInKByte = %u (TotalReadBandWidth=%f)\n", __func__, k, DETBufferSizeInKByte[k], ReadBandwidthLuma[k] + ReadBandwidthChroma[k]);
5012 }
5013 #endif
5014 } // CalculateDETBufferSize
5015
5016
5017 /// @brief Calculate the bound for return buffer sizing
CalculateMaxDETAndMinCompressedBufferSize(dml_uint_t ConfigReturnBufferSizeInKByte,dml_uint_t ConfigReturnBufferSegmentSizeInKByte,dml_uint_t ROBBufferSizeInKByte,dml_uint_t MaxNumDPP,dml_bool_t nomDETInKByteOverrideEnable,dml_uint_t nomDETInKByteOverrideValue,dml_uint_t * MaxTotalDETInKByte,dml_uint_t * nomDETInKByte,dml_uint_t * MinCompressedBufferSizeInKByte)5018 static void CalculateMaxDETAndMinCompressedBufferSize(
5019 dml_uint_t ConfigReturnBufferSizeInKByte,
5020 dml_uint_t ConfigReturnBufferSegmentSizeInKByte,
5021 dml_uint_t ROBBufferSizeInKByte,
5022 dml_uint_t MaxNumDPP,
5023 dml_bool_t nomDETInKByteOverrideEnable, // VBA_DELTA, allow DV to override default DET size
5024 dml_uint_t nomDETInKByteOverrideValue, // VBA_DELTA
5025
5026 // Output
5027 dml_uint_t *MaxTotalDETInKByte,
5028 dml_uint_t *nomDETInKByte,
5029 dml_uint_t *MinCompressedBufferSizeInKByte)
5030 {
5031 *MaxTotalDETInKByte = ConfigReturnBufferSizeInKByte - ConfigReturnBufferSegmentSizeInKByte;
5032 *nomDETInKByte = (dml_uint_t)(dml_floor((dml_float_t) *MaxTotalDETInKByte / (dml_float_t) MaxNumDPP, ConfigReturnBufferSegmentSizeInKByte));
5033 *MinCompressedBufferSizeInKByte = ConfigReturnBufferSizeInKByte - *MaxTotalDETInKByte;
5034
5035 #ifdef __DML_VBA_DEBUG__
5036 dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %u\n", __func__, ConfigReturnBufferSizeInKByte);
5037 dml_print("DML::%s: ROBBufferSizeInKByte = %u\n", __func__, ROBBufferSizeInKByte);
5038 dml_print("DML::%s: MaxNumDPP = %u\n", __func__, MaxNumDPP);
5039 dml_print("DML::%s: MaxTotalDETInKByte = %u\n", __func__, *MaxTotalDETInKByte);
5040 dml_print("DML::%s: nomDETInKByte = %u\n", __func__, *nomDETInKByte);
5041 dml_print("DML::%s: MinCompressedBufferSizeInKByte = %u\n", __func__, *MinCompressedBufferSizeInKByte);
5042 #endif
5043
5044 if (nomDETInKByteOverrideEnable) {
5045 *nomDETInKByte = nomDETInKByteOverrideValue;
5046 dml_print("DML::%s: nomDETInKByte = %u (overrided)\n", __func__, *nomDETInKByte);
5047 }
5048 } // CalculateMaxDETAndMinCompressedBufferSize
5049
5050 /// @brief Calculate all the RQ request attributes, like row height and # swath
CalculateVMRowAndSwath(struct display_mode_lib_scratch_st * scratch,struct CalculateVMRowAndSwath_params_st * p)5051 static void CalculateVMRowAndSwath(struct display_mode_lib_scratch_st *scratch,
5052 struct CalculateVMRowAndSwath_params_st *p)
5053 {
5054 struct CalculateVMRowAndSwath_locals_st *s = &scratch->CalculateVMRowAndSwath_locals;
5055
5056 s->HostVMDynamicLevels = CalculateHostVMDynamicLevels(p->GPUVMEnable, p->HostVMEnable, p->HostVMMinPageSize, p->HostVMMaxNonCachedPageTableLevels);
5057
5058 for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) {
5059 if (p->HostVMEnable == true) {
5060 p->vm_group_bytes[k] = 512;
5061 p->dpte_group_bytes[k] = 512;
5062 } else if (p->GPUVMEnable == true) {
5063 p->vm_group_bytes[k] = 2048;
5064 if (p->GPUVMMinPageSizeKBytes[k] >= 64 && dml_is_vertical_rotation(p->myPipe[k].SourceScan)) {
5065 p->dpte_group_bytes[k] = 512;
5066 } else {
5067 p->dpte_group_bytes[k] = 2048;
5068 }
5069 } else {
5070 p->vm_group_bytes[k] = 0;
5071 p->dpte_group_bytes[k] = 0;
5072 }
5073
5074 if (p->myPipe[k].SourcePixelFormat == dml_420_8 || p->myPipe[k].SourcePixelFormat == dml_420_10 ||
5075 p->myPipe[k].SourcePixelFormat == dml_420_12 || p->myPipe[k].SourcePixelFormat == dml_rgbe_alpha) {
5076 if ((p->myPipe[k].SourcePixelFormat == dml_420_10 || p->myPipe[k].SourcePixelFormat == dml_420_12) && !dml_is_vertical_rotation(p->myPipe[k].SourceScan)) {
5077 s->PTEBufferSizeInRequestsForLuma[k] = (p->PTEBufferSizeInRequestsLuma + p->PTEBufferSizeInRequestsChroma) / 2;
5078 s->PTEBufferSizeInRequestsForChroma[k] = s->PTEBufferSizeInRequestsForLuma[k];
5079 } else {
5080 s->PTEBufferSizeInRequestsForLuma[k] = p->PTEBufferSizeInRequestsLuma;
5081 s->PTEBufferSizeInRequestsForChroma[k] = p->PTEBufferSizeInRequestsChroma;
5082 }
5083
5084 s->PDEAndMetaPTEBytesFrameC = CalculateVMAndRowBytes(
5085 p->myPipe[k].ViewportStationary,
5086 p->myPipe[k].DCCEnable,
5087 p->myPipe[k].DPPPerSurface,
5088 p->myPipe[k].BlockHeight256BytesC,
5089 p->myPipe[k].BlockWidth256BytesC,
5090 p->myPipe[k].SourcePixelFormat,
5091 p->myPipe[k].SurfaceTiling,
5092 p->myPipe[k].BytePerPixelC,
5093 p->myPipe[k].SourceScan,
5094 p->SwathWidthC[k],
5095 p->myPipe[k].ViewportHeightChroma,
5096 p->myPipe[k].ViewportXStartC,
5097 p->myPipe[k].ViewportYStartC,
5098 p->GPUVMEnable,
5099 p->GPUVMMaxPageTableLevels,
5100 p->GPUVMMinPageSizeKBytes[k],
5101 s->PTEBufferSizeInRequestsForChroma[k],
5102 p->myPipe[k].PitchC,
5103 p->myPipe[k].DCCMetaPitchC,
5104 p->myPipe[k].BlockWidthC,
5105 p->myPipe[k].BlockHeightC,
5106
5107 // Output
5108 &s->MetaRowByteC[k],
5109 &s->PixelPTEBytesPerRowC[k],
5110 &s->PixelPTEBytesPerRowStorageC[k],
5111 &p->dpte_row_width_chroma_ub[k],
5112 &p->dpte_row_height_chroma[k],
5113 &p->dpte_row_height_linear_chroma[k],
5114 &s->PixelPTEBytesPerRowC_one_row_per_frame[k],
5115 &s->dpte_row_width_chroma_ub_one_row_per_frame[k],
5116 &s->dpte_row_height_chroma_one_row_per_frame[k],
5117 &p->meta_req_width_chroma[k],
5118 &p->meta_req_height_chroma[k],
5119 &p->meta_row_width_chroma[k],
5120 &p->meta_row_height_chroma[k],
5121 &p->PixelPTEReqWidthC[k],
5122 &p->PixelPTEReqHeightC[k],
5123 &p->PTERequestSizeC[k],
5124 &p->dpde0_bytes_per_frame_ub_c[k],
5125 &p->meta_pte_bytes_per_frame_ub_c[k]);
5126
5127 p->PrefetchSourceLinesC[k] = CalculatePrefetchSourceLines (
5128 p->myPipe[k].VRatioChroma,
5129 p->myPipe[k].VTapsChroma,
5130 p->myPipe[k].InterlaceEnable,
5131 p->myPipe[k].ProgressiveToInterlaceUnitInOPP,
5132 p->myPipe[k].SwathHeightC,
5133 p->myPipe[k].SourceScan,
5134 p->myPipe[k].ViewportStationary,
5135 p->SwathWidthC[k],
5136 p->myPipe[k].ViewportHeightChroma,
5137 p->myPipe[k].ViewportXStartC,
5138 p->myPipe[k].ViewportYStartC,
5139
5140 // Output
5141 &p->VInitPreFillC[k],
5142 &p->MaxNumSwathC[k]);
5143 } else {
5144 s->PTEBufferSizeInRequestsForLuma[k] = p->PTEBufferSizeInRequestsLuma + p->PTEBufferSizeInRequestsChroma;
5145 s->PTEBufferSizeInRequestsForChroma[k] = 0;
5146 s->PixelPTEBytesPerRowC[k] = 0;
5147 s->PixelPTEBytesPerRowStorageC[k] = 0;
5148 s->PDEAndMetaPTEBytesFrameC = 0;
5149 s->MetaRowByteC[k] = 0;
5150 p->MaxNumSwathC[k] = 0;
5151 p->PrefetchSourceLinesC[k] = 0;
5152 s->dpte_row_height_chroma_one_row_per_frame[k] = 0;
5153 s->dpte_row_width_chroma_ub_one_row_per_frame[k] = 0;
5154 s->PixelPTEBytesPerRowC_one_row_per_frame[k] = 0;
5155 }
5156
5157 s->PDEAndMetaPTEBytesFrameY = CalculateVMAndRowBytes(
5158 p->myPipe[k].ViewportStationary,
5159 p->myPipe[k].DCCEnable,
5160 p->myPipe[k].DPPPerSurface,
5161 p->myPipe[k].BlockHeight256BytesY,
5162 p->myPipe[k].BlockWidth256BytesY,
5163 p->myPipe[k].SourcePixelFormat,
5164 p->myPipe[k].SurfaceTiling,
5165 p->myPipe[k].BytePerPixelY,
5166 p->myPipe[k].SourceScan,
5167 p->SwathWidthY[k],
5168 p->myPipe[k].ViewportHeight,
5169 p->myPipe[k].ViewportXStart,
5170 p->myPipe[k].ViewportYStart,
5171 p->GPUVMEnable,
5172 p->GPUVMMaxPageTableLevels,
5173 p->GPUVMMinPageSizeKBytes[k],
5174 s->PTEBufferSizeInRequestsForLuma[k],
5175 p->myPipe[k].PitchY,
5176 p->myPipe[k].DCCMetaPitchY,
5177 p->myPipe[k].BlockWidthY,
5178 p->myPipe[k].BlockHeightY,
5179
5180 // Output
5181 &s->MetaRowByteY[k],
5182 &s->PixelPTEBytesPerRowY[k],
5183 &s->PixelPTEBytesPerRowStorageY[k],
5184 &p->dpte_row_width_luma_ub[k],
5185 &p->dpte_row_height_luma[k],
5186 &p->dpte_row_height_linear_luma[k],
5187 &s->PixelPTEBytesPerRowY_one_row_per_frame[k],
5188 &s->dpte_row_width_luma_ub_one_row_per_frame[k],
5189 &s->dpte_row_height_luma_one_row_per_frame[k],
5190 &p->meta_req_width[k],
5191 &p->meta_req_height[k],
5192 &p->meta_row_width[k],
5193 &p->meta_row_height[k],
5194 &p->PixelPTEReqWidthY[k],
5195 &p->PixelPTEReqHeightY[k],
5196 &p->PTERequestSizeY[k],
5197 &p->dpde0_bytes_per_frame_ub_l[k],
5198 &p->meta_pte_bytes_per_frame_ub_l[k]);
5199
5200 p->PrefetchSourceLinesY[k] = CalculatePrefetchSourceLines(
5201 p->myPipe[k].VRatio,
5202 p->myPipe[k].VTaps,
5203 p->myPipe[k].InterlaceEnable,
5204 p->myPipe[k].ProgressiveToInterlaceUnitInOPP,
5205 p->myPipe[k].SwathHeightY,
5206 p->myPipe[k].SourceScan,
5207 p->myPipe[k].ViewportStationary,
5208 p->SwathWidthY[k],
5209 p->myPipe[k].ViewportHeight,
5210 p->myPipe[k].ViewportXStart,
5211 p->myPipe[k].ViewportYStart,
5212
5213 // Output
5214 &p->VInitPreFillY[k],
5215 &p->MaxNumSwathY[k]);
5216
5217 p->PDEAndMetaPTEBytesFrame[k] = (s->PDEAndMetaPTEBytesFrameY + s->PDEAndMetaPTEBytesFrameC) * (1 + 8 * s->HostVMDynamicLevels);
5218 p->MetaRowByte[k] = s->MetaRowByteY[k] + s->MetaRowByteC[k];
5219
5220 if (s->PixelPTEBytesPerRowStorageY[k] <= 64 * s->PTEBufferSizeInRequestsForLuma[k] && s->PixelPTEBytesPerRowStorageC[k] <= 64 * s->PTEBufferSizeInRequestsForChroma[k]) {
5221 p->PTEBufferSizeNotExceeded[k] = true;
5222 } else {
5223 p->PTEBufferSizeNotExceeded[k] = false;
5224 #ifdef __DML_VBA_DEBUG__
5225 dml_print("DML::%s: k=%u, PixelPTEBytesPerRowY = %u\n", __func__, k, s->PixelPTEBytesPerRowY[k]);
5226 dml_print("DML::%s: k=%u, PixelPTEBytesPerRowC = %u\n", __func__, k, s->PixelPTEBytesPerRowC[k]);
5227 dml_print("DML::%s: k=%u, PixelPTEBytesPerRowStorageY = %u\n", __func__, k, s->PixelPTEBytesPerRowStorageY[k]);
5228 dml_print("DML::%s: k=%u, PixelPTEBytesPerRowStorageC = %u\n", __func__, k, s->PixelPTEBytesPerRowStorageC[k]);
5229 dml_print("DML::%s: k=%u, PTEBufferSizeInRequestsForLuma = %u\n", __func__, k, s->PTEBufferSizeInRequestsForLuma[k]);
5230 dml_print("DML::%s: k=%u, PTEBufferSizeInRequestsForChroma = %u\n", __func__, k, s->PTEBufferSizeInRequestsForChroma[k]);
5231 dml_print("DML::%s: k=%u, PTEBufferSizeNotExceeded = %u\n", __func__, k, p->PTEBufferSizeNotExceeded[k]);
5232 #endif
5233 }
5234 s->one_row_per_frame_fits_in_buffer[k] = (s->PixelPTEBytesPerRowY_one_row_per_frame[k] <= 64 * 2 * s->PTEBufferSizeInRequestsForLuma[k] &&
5235 s->PixelPTEBytesPerRowC_one_row_per_frame[k] <= 64 * 2 * s->PTEBufferSizeInRequestsForChroma[k]);
5236
5237 #ifdef __DML_VBA_DEBUG__
5238 dml_print("DML::%s: k=%u, PDEAndMetaPTEBytesFrame = %u\n", __func__, k, p->PDEAndMetaPTEBytesFrame[k]);
5239 dml_print("DML::%s: k=%u, PDEAndMetaPTEBytesFrameY = %u\n", __func__, k, s->PDEAndMetaPTEBytesFrameY);
5240 dml_print("DML::%s: k=%u, PDEAndMetaPTEBytesFrameC = %u\n", __func__, k, s->PDEAndMetaPTEBytesFrameC);
5241 dml_print("DML::%s: k=%u, HostVMDynamicLevels = %u\n", __func__, k, s->HostVMDynamicLevels);
5242 dml_print("DML::%s: k=%u, one_row_per_frame_fits_in_buffer = %u\n", __func__, k, s->one_row_per_frame_fits_in_buffer[k]);
5243 dml_print("DML::%s: k=%u, PixelPTEBytesPerRowY_one_row_per_frame = %u\n", __func__, k, s->PixelPTEBytesPerRowY_one_row_per_frame[k]);
5244 dml_print("DML::%s: k=%u, PixelPTEBytesPerRowC_one_row_per_frame = %u\n", __func__, k, s->PixelPTEBytesPerRowC_one_row_per_frame[k]);
5245 #endif
5246 }
5247
5248 CalculateMALLUseForStaticScreen(
5249 p->NumberOfActiveSurfaces,
5250 p->MALLAllocatedForDCN,
5251 p->UseMALLForStaticScreen, // mode
5252 p->SurfaceSizeInMALL,
5253 s->one_row_per_frame_fits_in_buffer,
5254 // Output
5255 p->UsesMALLForStaticScreen); // boolen
5256
5257 for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) {
5258 if (p->PTEBufferModeOverrideEn[k] == 1) {
5259 p->PTE_BUFFER_MODE[k] = p->PTEBufferModeOverrideVal[k];
5260 }
5261 p->PTE_BUFFER_MODE[k] = p->myPipe[k].FORCE_ONE_ROW_FOR_FRAME || p->UsesMALLForStaticScreen[k] || (p->UseMALLForPStateChange[k] == dml_use_mall_pstate_change_sub_viewport) ||
5262 (p->UseMALLForPStateChange[k] == dml_use_mall_pstate_change_phantom_pipe) || (p->GPUVMMinPageSizeKBytes[k] > 64);
5263 p->BIGK_FRAGMENT_SIZE[k] = (dml_uint_t)(dml_log2(p->GPUVMMinPageSizeKBytes[k] * 1024) - 12);
5264 }
5265
5266 for (dml_uint_t k = 0; k < p->NumberOfActiveSurfaces; ++k) {
5267 #ifdef __DML_VBA_DEBUG__
5268 dml_print("DML::%s: k=%u, SurfaceSizeInMALL = %u\n", __func__, k, p->SurfaceSizeInMALL[k]);
5269 dml_print("DML::%s: k=%u, UsesMALLForStaticScreen = %u\n", __func__, k, p->UsesMALLForStaticScreen[k]);
5270 #endif
5271 p->use_one_row_for_frame[k] = p->myPipe[k].FORCE_ONE_ROW_FOR_FRAME || p->UsesMALLForStaticScreen[k] || (p->UseMALLForPStateChange[k] == dml_use_mall_pstate_change_sub_viewport) ||
5272 (p->UseMALLForPStateChange[k] == dml_use_mall_pstate_change_phantom_pipe) || (p->GPUVMMinPageSizeKBytes[k] > 64 && dml_is_vertical_rotation(p->myPipe[k].SourceScan));
5273
5274 p->use_one_row_for_frame_flip[k] = p->use_one_row_for_frame[k] && !(p->UseMALLForPStateChange[k] == dml_use_mall_pstate_change_full_frame);
5275
5276 if (p->use_one_row_for_frame[k]) {
5277 p->dpte_row_height_luma[k] = s->dpte_row_height_luma_one_row_per_frame[k];
5278 p->dpte_row_width_luma_ub[k] = s->dpte_row_width_luma_ub_one_row_per_frame[k];
5279 s->PixelPTEBytesPerRowY[k] = s->PixelPTEBytesPerRowY_one_row_per_frame[k];
5280 p->dpte_row_height_chroma[k] = s->dpte_row_height_chroma_one_row_per_frame[k];
5281 p->dpte_row_width_chroma_ub[k] = s->dpte_row_width_chroma_ub_one_row_per_frame[k];
5282 s->PixelPTEBytesPerRowC[k] = s->PixelPTEBytesPerRowC_one_row_per_frame[k];
5283 p->PTEBufferSizeNotExceeded[k] = s->one_row_per_frame_fits_in_buffer[k];
5284 }
5285
5286 if (p->MetaRowByte[k] <= p->DCCMetaBufferSizeBytes) {
5287 p->DCCMetaBufferSizeNotExceeded[k] = true;
5288 } else {
5289 p->DCCMetaBufferSizeNotExceeded[k] = false;
5290
5291 #ifdef __DML_VBA_DEBUG__
5292 dml_print("DML::%s: k=%u, MetaRowByte = %u\n", __func__, k, p->MetaRowByte[k]);
5293 dml_print("DML::%s: k=%u, DCCMetaBufferSizeBytes = %u\n", __func__, k, p->DCCMetaBufferSizeBytes);
5294 dml_print("DML::%s: k=%u, DCCMetaBufferSizeNotExceeded = %u\n", __func__, k, p->DCCMetaBufferSizeNotExceeded[k]);
5295 #endif
5296 }
5297 s->PixelPTEBytesPerRowY[k] = s->PixelPTEBytesPerRowY[k] * (1 + 8 * s->HostVMDynamicLevels);
5298 s->PixelPTEBytesPerRowC[k] = s->PixelPTEBytesPerRowC[k] * (1 + 8 * s->HostVMDynamicLevels);
5299 p->PixelPTEBytesPerRow[k] = s->PixelPTEBytesPerRowY[k] + s->PixelPTEBytesPerRowC[k];
5300 if (p->use_one_row_for_frame[k])
5301 p->PixelPTEBytesPerRow[k] = p->PixelPTEBytesPerRow[k] / 2;
5302
5303 CalculateRowBandwidth(
5304 p->GPUVMEnable,
5305 p->myPipe[k].SourcePixelFormat,
5306 p->myPipe[k].VRatio,
5307 p->myPipe[k].VRatioChroma,
5308 p->myPipe[k].DCCEnable,
5309 p->myPipe[k].HTotal / p->myPipe[k].PixelClock,
5310 s->MetaRowByteY[k],
5311 s->MetaRowByteC[k],
5312 p->meta_row_height[k],
5313 p->meta_row_height_chroma[k],
5314 s->PixelPTEBytesPerRowY[k],
5315 s->PixelPTEBytesPerRowC[k],
5316 p->dpte_row_height_luma[k],
5317 p->dpte_row_height_chroma[k],
5318
5319 // Output
5320 &p->meta_row_bw[k],
5321 &p->dpte_row_bw[k]);
5322 #ifdef __DML_VBA_DEBUG__
5323 dml_print("DML::%s: k=%u, use_one_row_for_frame = %u\n", __func__, k, p->use_one_row_for_frame[k]);
5324 dml_print("DML::%s: k=%u, use_one_row_for_frame_flip = %u\n", __func__, k, p->use_one_row_for_frame_flip[k]);
5325 dml_print("DML::%s: k=%u, UseMALLForPStateChange = %u\n", __func__, k, p->UseMALLForPStateChange[k]);
5326 dml_print("DML::%s: k=%u, dpte_row_height_luma = %u\n", __func__, k, p->dpte_row_height_luma[k]);
5327 dml_print("DML::%s: k=%u, dpte_row_width_luma_ub = %u\n", __func__, k, p->dpte_row_width_luma_ub[k]);
5328 dml_print("DML::%s: k=%u, PixelPTEBytesPerRowY = %u\n", __func__, k, s->PixelPTEBytesPerRowY[k]);
5329 dml_print("DML::%s: k=%u, dpte_row_height_chroma = %u\n", __func__, k, p->dpte_row_height_chroma[k]);
5330 dml_print("DML::%s: k=%u, dpte_row_width_chroma_ub = %u\n", __func__, k, p->dpte_row_width_chroma_ub[k]);
5331 dml_print("DML::%s: k=%u, PixelPTEBytesPerRowC = %u\n", __func__, k, s->PixelPTEBytesPerRowC[k]);
5332 dml_print("DML::%s: k=%u, PixelPTEBytesPerRow = %u\n", __func__, k, p->PixelPTEBytesPerRow[k]);
5333 dml_print("DML::%s: k=%u, PTEBufferSizeNotExceeded = %u\n", __func__, k, p->PTEBufferSizeNotExceeded[k]);
5334 dml_print("DML::%s: k=%u, PTE_BUFFER_MODE = %u\n", __func__, k, p->PTE_BUFFER_MODE[k]);
5335 dml_print("DML::%s: k=%u, BIGK_FRAGMENT_SIZE = %u\n", __func__, k, p->BIGK_FRAGMENT_SIZE[k]);
5336 #endif
5337 }
5338 }
5339
CalculateOutputLink(dml_float_t PHYCLKPerState,dml_float_t PHYCLKD18PerState,dml_float_t PHYCLKD32PerState,dml_float_t Downspreading,dml_bool_t IsMainSurfaceUsingTheIndicatedTiming,enum dml_output_encoder_class Output,enum dml_output_format_class OutputFormat,dml_uint_t HTotal,dml_uint_t HActive,dml_float_t PixelClockBackEnd,dml_float_t ForcedOutputLinkBPP,dml_uint_t DSCInputBitPerComponent,dml_uint_t NumberOfDSCSlices,dml_float_t AudioSampleRate,dml_uint_t AudioSampleLayout,enum dml_odm_mode ODMModeNoDSC,enum dml_odm_mode ODMModeDSC,enum dml_dsc_enable DSCEnable,dml_uint_t OutputLinkDPLanes,enum dml_output_link_dp_rate OutputLinkDPRate,dml_bool_t * RequiresDSC,dml_bool_t * RequiresFEC,dml_float_t * OutBpp,enum dml_output_type_and_rate__type * OutputType,enum dml_output_type_and_rate__rate * OutputRate,dml_uint_t * RequiredSlots)5340 static void CalculateOutputLink(
5341 dml_float_t PHYCLKPerState,
5342 dml_float_t PHYCLKD18PerState,
5343 dml_float_t PHYCLKD32PerState,
5344 dml_float_t Downspreading,
5345 dml_bool_t IsMainSurfaceUsingTheIndicatedTiming,
5346 enum dml_output_encoder_class Output,
5347 enum dml_output_format_class OutputFormat,
5348 dml_uint_t HTotal,
5349 dml_uint_t HActive,
5350 dml_float_t PixelClockBackEnd,
5351 dml_float_t ForcedOutputLinkBPP,
5352 dml_uint_t DSCInputBitPerComponent,
5353 dml_uint_t NumberOfDSCSlices,
5354 dml_float_t AudioSampleRate,
5355 dml_uint_t AudioSampleLayout,
5356 enum dml_odm_mode ODMModeNoDSC,
5357 enum dml_odm_mode ODMModeDSC,
5358 enum dml_dsc_enable DSCEnable,
5359 dml_uint_t OutputLinkDPLanes,
5360 enum dml_output_link_dp_rate OutputLinkDPRate,
5361
5362 // Output
5363 dml_bool_t *RequiresDSC,
5364 dml_bool_t *RequiresFEC,
5365 dml_float_t *OutBpp,
5366 enum dml_output_type_and_rate__type *OutputType,
5367 enum dml_output_type_and_rate__rate *OutputRate,
5368 dml_uint_t *RequiredSlots)
5369 {
5370 dml_bool_t LinkDSCEnable;
5371 dml_uint_t dummy;
5372 *RequiresDSC = false;
5373 *RequiresFEC = false;
5374 *OutBpp = 0;
5375
5376 *OutputType = dml_output_type_unknown;
5377 *OutputRate = dml_output_rate_unknown;
5378
5379 if (IsMainSurfaceUsingTheIndicatedTiming) {
5380 if (Output == dml_hdmi) {
5381 *RequiresDSC = false;
5382 *RequiresFEC = false;
5383 *OutBpp = TruncToValidBPP(dml_min(600, PHYCLKPerState) * 10, 3, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, false, Output,
5384 OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (dml_uint_t)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, &dummy);
5385 //OutputTypeAndRate = "HDMI";
5386 *OutputType = dml_output_type_hdmi;
5387
5388 } else if (Output == dml_dp || Output == dml_dp2p0 || Output == dml_edp) {
5389 if (DSCEnable == dml_dsc_enable) {
5390 *RequiresDSC = true;
5391 LinkDSCEnable = true;
5392 if (Output == dml_dp || Output == dml_dp2p0) {
5393 *RequiresFEC = true;
5394 } else {
5395 *RequiresFEC = false;
5396 }
5397 } else {
5398 *RequiresDSC = false;
5399 LinkDSCEnable = false;
5400 if (Output == dml_dp2p0) {
5401 *RequiresFEC = true;
5402 } else {
5403 *RequiresFEC = false;
5404 }
5405 }
5406 if (Output == dml_dp2p0) {
5407 *OutBpp = 0;
5408 if ((OutputLinkDPRate == dml_dp_rate_na || OutputLinkDPRate == dml_dp_rate_uhbr10) && PHYCLKD32PerState >= 10000 / 32.0) {
5409 *OutBpp = TruncToValidBPP((1 - Downspreading / 100) * 10000, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
5410 OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (dml_uint_t)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
5411 if (*OutBpp == 0 && PHYCLKD32PerState < 13500 / 32.0 && DSCEnable == dml_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) {
5412 *RequiresDSC = true;
5413 LinkDSCEnable = true;
5414 *OutBpp = TruncToValidBPP((1 - Downspreading / 100) * 10000, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
5415 OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (dml_uint_t)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
5416 }
5417 //OutputTypeAndRate = Output & " UHBR10";
5418 *OutputType = dml_output_type_dp2p0;
5419 *OutputRate = dml_output_rate_dp_rate_uhbr10;
5420 }
5421 if ((OutputLinkDPRate == dml_dp_rate_na || OutputLinkDPRate == dml_dp_rate_uhbr13p5) && *OutBpp == 0 && PHYCLKD32PerState >= 13500 / 32.0) {
5422 *OutBpp = TruncToValidBPP((1 - Downspreading / 100) * 13500, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
5423 OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (dml_uint_t)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
5424
5425 if (*OutBpp == 0 && PHYCLKD32PerState < 20000 / 32 && DSCEnable == dml_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) {
5426 *RequiresDSC = true;
5427 LinkDSCEnable = true;
5428 *OutBpp = TruncToValidBPP((1 - Downspreading / 100) * 13500, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
5429 OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (dml_uint_t)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
5430 }
5431 //OutputTypeAndRate = Output & " UHBR13p5";
5432 *OutputType = dml_output_type_dp2p0;
5433 *OutputRate = dml_output_rate_dp_rate_uhbr13p5;
5434 }
5435 if ((OutputLinkDPRate == dml_dp_rate_na || OutputLinkDPRate == dml_dp_rate_uhbr20) && *OutBpp == 0 && PHYCLKD32PerState >= 20000 / 32) {
5436 *OutBpp = TruncToValidBPP((1 - Downspreading / 100) * 20000, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
5437 OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (dml_uint_t)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
5438 if (*OutBpp == 0 && DSCEnable == dml_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) {
5439 *RequiresDSC = true;
5440 LinkDSCEnable = true;
5441 *OutBpp = TruncToValidBPP((1 - Downspreading / 100) * 20000, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
5442 OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (dml_uint_t)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
5443 }
5444 //OutputTypeAndRate = Output & " UHBR20";
5445 *OutputType = dml_output_type_dp2p0;
5446 *OutputRate = dml_output_rate_dp_rate_uhbr20;
5447 }
5448 } else { // output is dp or edp
5449 *OutBpp = 0;
5450 if ((OutputLinkDPRate == dml_dp_rate_na || OutputLinkDPRate == dml_dp_rate_hbr) && PHYCLKPerState >= 270) {
5451 *OutBpp = TruncToValidBPP((1 - Downspreading / 100) * 2700, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
5452 OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (dml_uint_t)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
5453 if (*OutBpp == 0 && PHYCLKPerState < 540 && DSCEnable == dml_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) {
5454 *RequiresDSC = true;
5455 LinkDSCEnable = true;
5456 if (Output == dml_dp) {
5457 *RequiresFEC = true;
5458 }
5459 *OutBpp = TruncToValidBPP((1 - Downspreading / 100) * 2700, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
5460 OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (dml_uint_t)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
5461 }
5462 //OutputTypeAndRate = Output & " HBR";
5463 *OutputType = (Output == dml_dp) ? dml_output_type_dp : dml_output_type_edp;
5464 *OutputRate = dml_output_rate_dp_rate_hbr;
5465 }
5466 if ((OutputLinkDPRate == dml_dp_rate_na || OutputLinkDPRate == dml_dp_rate_hbr2) && *OutBpp == 0 && PHYCLKPerState >= 540) {
5467 *OutBpp = TruncToValidBPP((1 - Downspreading / 100) * 5400, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
5468 OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (dml_uint_t)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
5469
5470 if (*OutBpp == 0 && PHYCLKPerState < 810 && DSCEnable == dml_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) {
5471 *RequiresDSC = true;
5472 LinkDSCEnable = true;
5473 if (Output == dml_dp) {
5474 *RequiresFEC = true;
5475 }
5476 *OutBpp = TruncToValidBPP((1 - Downspreading / 100) * 5400, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
5477 OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (dml_uint_t)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
5478 }
5479 //OutputTypeAndRate = Output & " HBR2";
5480 *OutputType = (Output == dml_dp) ? dml_output_type_dp : dml_output_type_edp;
5481 *OutputRate = dml_output_rate_dp_rate_hbr2;
5482 }
5483 if ((OutputLinkDPRate == dml_dp_rate_na || OutputLinkDPRate == dml_dp_rate_hbr3) && *OutBpp == 0 && PHYCLKPerState >= 810) { // VBA_ERROR, vba code doesn't have hbr3 check
5484 *OutBpp = TruncToValidBPP((1 - Downspreading / 100) * 8100, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
5485 OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (dml_uint_t)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
5486
5487 if (*OutBpp == 0 && DSCEnable == dml_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) {
5488 *RequiresDSC = true;
5489 LinkDSCEnable = true;
5490 if (Output == dml_dp) {
5491 *RequiresFEC = true;
5492 }
5493 *OutBpp = TruncToValidBPP((1 - Downspreading / 100) * 8100, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
5494 OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (dml_uint_t)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
5495 }
5496 //OutputTypeAndRate = Output & " HBR3";
5497 *OutputType = (Output == dml_dp) ? dml_output_type_dp : dml_output_type_edp;
5498 *OutputRate = dml_output_rate_dp_rate_hbr3;
5499 }
5500 }
5501 }
5502 }
5503 }
5504
5505 /// @brief Determine the ODM mode and number of DPP used per plane based on dispclk, dsc usage, odm usage policy
CalculateODMMode(dml_uint_t MaximumPixelsPerLinePerDSCUnit,dml_uint_t HActive,enum dml_output_encoder_class Output,enum dml_output_format_class OutputFormat,enum dml_odm_use_policy ODMUse,dml_float_t StateDispclk,dml_float_t MaxDispclk,dml_bool_t DSCEnable,dml_uint_t TotalNumberOfActiveDPP,dml_uint_t MaxNumDPP,dml_float_t PixelClock,dml_float_t DISPCLKDPPCLKDSCCLKDownSpreading,dml_float_t DISPCLKRampingMargin,dml_float_t DISPCLKDPPCLKVCOSpeed,dml_uint_t NumberOfDSCSlices,dml_bool_t * TotalAvailablePipesSupport,dml_uint_t * NumberOfDPP,enum dml_odm_mode * ODMMode,dml_float_t * RequiredDISPCLKPerSurface)5506 static void CalculateODMMode(
5507 dml_uint_t MaximumPixelsPerLinePerDSCUnit,
5508 dml_uint_t HActive,
5509 enum dml_output_encoder_class Output,
5510 enum dml_output_format_class OutputFormat,
5511 enum dml_odm_use_policy ODMUse,
5512 dml_float_t StateDispclk,
5513 dml_float_t MaxDispclk,
5514 dml_bool_t DSCEnable,
5515 dml_uint_t TotalNumberOfActiveDPP,
5516 dml_uint_t MaxNumDPP,
5517 dml_float_t PixelClock,
5518 dml_float_t DISPCLKDPPCLKDSCCLKDownSpreading,
5519 dml_float_t DISPCLKRampingMargin,
5520 dml_float_t DISPCLKDPPCLKVCOSpeed,
5521 dml_uint_t NumberOfDSCSlices,
5522
5523 // Output
5524 dml_bool_t *TotalAvailablePipesSupport,
5525 dml_uint_t *NumberOfDPP,
5526 enum dml_odm_mode *ODMMode,
5527 dml_float_t *RequiredDISPCLKPerSurface)
5528 {
5529
5530 dml_float_t SurfaceRequiredDISPCLKWithoutODMCombine;
5531 dml_float_t SurfaceRequiredDISPCLKWithODMCombineTwoToOne;
5532 dml_float_t SurfaceRequiredDISPCLKWithODMCombineFourToOne;
5533
5534 SurfaceRequiredDISPCLKWithoutODMCombine = CalculateRequiredDispclk(dml_odm_mode_bypass, PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed, MaxDispclk);
5535 SurfaceRequiredDISPCLKWithODMCombineTwoToOne = CalculateRequiredDispclk(dml_odm_mode_combine_2to1, PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed, MaxDispclk);
5536 SurfaceRequiredDISPCLKWithODMCombineFourToOne = CalculateRequiredDispclk(dml_odm_mode_combine_4to1, PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed, MaxDispclk);
5537 *TotalAvailablePipesSupport = true;
5538
5539 if (OutputFormat == dml_420) {
5540 if (HActive > 4 * DML2_MAX_FMT_420_BUFFER_WIDTH)
5541 *TotalAvailablePipesSupport = false;
5542 else if (HActive > 2 * DML2_MAX_FMT_420_BUFFER_WIDTH)
5543 ODMUse = dml_odm_use_policy_combine_4to1;
5544 else if (HActive > DML2_MAX_FMT_420_BUFFER_WIDTH && ODMUse != dml_odm_use_policy_combine_4to1)
5545 ODMUse = dml_odm_use_policy_combine_2to1;
5546 if (Output == dml_hdmi && ODMUse == dml_odm_use_policy_combine_2to1)
5547 *TotalAvailablePipesSupport = false;
5548 if ((Output == dml_hdmi || Output == dml_dp || Output == dml_edp) && ODMUse == dml_odm_use_policy_combine_4to1)
5549 *TotalAvailablePipesSupport = false;
5550 }
5551
5552 if (ODMUse == dml_odm_use_policy_bypass || ODMUse == dml_odm_use_policy_combine_as_needed)
5553 *ODMMode = dml_odm_mode_bypass;
5554 else if (ODMUse == dml_odm_use_policy_combine_2to1)
5555 *ODMMode = dml_odm_mode_combine_2to1;
5556 else if (ODMUse == dml_odm_use_policy_combine_4to1)
5557 *ODMMode = dml_odm_mode_combine_4to1;
5558 else if (ODMUse == dml_odm_use_policy_split_1to2)
5559 *ODMMode = dml_odm_mode_split_1to2;
5560 else if (ODMUse == dml_odm_use_policy_mso_1to2)
5561 *ODMMode = dml_odm_mode_mso_1to2;
5562 else if (ODMUse == dml_odm_use_policy_mso_1to4)
5563 *ODMMode = dml_odm_mode_mso_1to4;
5564
5565 *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithoutODMCombine;
5566 *NumberOfDPP = 0;
5567
5568 if (!(Output == dml_hdmi || Output == dml_dp || Output == dml_edp) && (ODMUse == dml_odm_use_policy_combine_4to1 || (ODMUse == dml_odm_use_policy_combine_as_needed &&
5569 (SurfaceRequiredDISPCLKWithODMCombineTwoToOne > StateDispclk || (DSCEnable && (HActive > 2 * MaximumPixelsPerLinePerDSCUnit)) || NumberOfDSCSlices > 8)))) {
5570 if (TotalNumberOfActiveDPP + 4 <= MaxNumDPP) {
5571 *ODMMode = dml_odm_mode_combine_4to1;
5572 *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineFourToOne;
5573 *NumberOfDPP = 4;
5574 } else {
5575 *TotalAvailablePipesSupport = false;
5576 }
5577 } else if (Output != dml_hdmi && (ODMUse == dml_odm_use_policy_combine_2to1 || (ODMUse == dml_odm_use_policy_combine_as_needed &&
5578 ((SurfaceRequiredDISPCLKWithoutODMCombine > StateDispclk && SurfaceRequiredDISPCLKWithODMCombineTwoToOne <= StateDispclk) ||
5579 (DSCEnable && (HActive > MaximumPixelsPerLinePerDSCUnit)) || (NumberOfDSCSlices <= 8 && NumberOfDSCSlices > 4))))) {
5580 if (TotalNumberOfActiveDPP + 2 <= MaxNumDPP) {
5581 *ODMMode = dml_odm_mode_combine_2to1;
5582 *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineTwoToOne;
5583 *NumberOfDPP = 2;
5584 } else {
5585 *TotalAvailablePipesSupport = false;
5586 }
5587 } else {
5588 if (TotalNumberOfActiveDPP + 1 <= MaxNumDPP) {
5589 *NumberOfDPP = 1;
5590 } else {
5591 *TotalAvailablePipesSupport = false;
5592 }
5593 }
5594 }
5595
5596 /// @brief Calculate the required DISPCLK given the odm mode and pixclk
CalculateRequiredDispclk(enum dml_odm_mode ODMMode,dml_float_t PixelClock,dml_float_t DISPCLKDPPCLKDSCCLKDownSpreading,dml_float_t DISPCLKRampingMargin,dml_float_t DISPCLKDPPCLKVCOSpeed,dml_float_t MaxDispclk)5597 static dml_float_t CalculateRequiredDispclk(
5598 enum dml_odm_mode ODMMode,
5599 dml_float_t PixelClock,
5600 dml_float_t DISPCLKDPPCLKDSCCLKDownSpreading,
5601 dml_float_t DISPCLKRampingMargin,
5602 dml_float_t DISPCLKDPPCLKVCOSpeed,
5603 dml_float_t MaxDispclk)
5604 {
5605 dml_float_t RequiredDispclk = 0.;
5606 dml_float_t PixelClockAfterODM;
5607
5608 dml_float_t DISPCLKWithRampingRoundedToDFSGranularity;
5609 dml_float_t DISPCLKWithoutRampingRoundedToDFSGranularity;
5610 dml_float_t MaxDispclkRoundedDownToDFSGranularity;
5611
5612 if (ODMMode == dml_odm_mode_combine_4to1) {
5613 PixelClockAfterODM = PixelClock / 4;
5614 } else if (ODMMode == dml_odm_mode_combine_2to1) {
5615 PixelClockAfterODM = PixelClock / 2;
5616 } else {
5617 PixelClockAfterODM = PixelClock;
5618 }
5619
5620 DISPCLKWithRampingRoundedToDFSGranularity = RoundToDFSGranularity(PixelClockAfterODM * (1.0 + DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) * (1 + DISPCLKRampingMargin / 100.0), 1, DISPCLKDPPCLKVCOSpeed);
5621 DISPCLKWithoutRampingRoundedToDFSGranularity = RoundToDFSGranularity(PixelClockAfterODM * (1.0 + DISPCLKDPPCLKDSCCLKDownSpreading / 100.0), 1, DISPCLKDPPCLKVCOSpeed);
5622 MaxDispclkRoundedDownToDFSGranularity = RoundToDFSGranularity(MaxDispclk, 0, DISPCLKDPPCLKVCOSpeed);
5623
5624 if (DISPCLKWithoutRampingRoundedToDFSGranularity > MaxDispclkRoundedDownToDFSGranularity) {
5625 RequiredDispclk = DISPCLKWithoutRampingRoundedToDFSGranularity;
5626 } else if (DISPCLKWithRampingRoundedToDFSGranularity > MaxDispclkRoundedDownToDFSGranularity) {
5627 RequiredDispclk = MaxDispclkRoundedDownToDFSGranularity;
5628 } else {
5629 RequiredDispclk = DISPCLKWithRampingRoundedToDFSGranularity;
5630 }
5631
5632 return RequiredDispclk;
5633 }
5634
5635 /// @brief Determine DPPCLK if there only one DPP per plane, main factor is the pixel rate and DPP scaling parameter
CalculateSinglePipeDPPCLKAndSCLThroughput(dml_float_t HRatio,dml_float_t HRatioChroma,dml_float_t VRatio,dml_float_t VRatioChroma,dml_float_t MaxDCHUBToPSCLThroughput,dml_float_t MaxPSCLToLBThroughput,dml_float_t PixelClock,enum dml_source_format_class SourcePixelFormat,dml_uint_t HTaps,dml_uint_t HTapsChroma,dml_uint_t VTaps,dml_uint_t VTapsChroma,dml_float_t * PSCL_THROUGHPUT,dml_float_t * PSCL_THROUGHPUT_CHROMA,dml_float_t * DPPCLKUsingSingleDPP)5636 static void CalculateSinglePipeDPPCLKAndSCLThroughput(
5637 dml_float_t HRatio,
5638 dml_float_t HRatioChroma,
5639 dml_float_t VRatio,
5640 dml_float_t VRatioChroma,
5641 dml_float_t MaxDCHUBToPSCLThroughput,
5642 dml_float_t MaxPSCLToLBThroughput,
5643 dml_float_t PixelClock,
5644 enum dml_source_format_class SourcePixelFormat,
5645 dml_uint_t HTaps,
5646 dml_uint_t HTapsChroma,
5647 dml_uint_t VTaps,
5648 dml_uint_t VTapsChroma,
5649
5650 // Output
5651 dml_float_t *PSCL_THROUGHPUT,
5652 dml_float_t *PSCL_THROUGHPUT_CHROMA,
5653 dml_float_t *DPPCLKUsingSingleDPP)
5654 {
5655 dml_float_t DPPCLKUsingSingleDPPLuma;
5656 dml_float_t DPPCLKUsingSingleDPPChroma;
5657
5658 if (HRatio > 1) {
5659 *PSCL_THROUGHPUT = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput * HRatio / dml_ceil((dml_float_t) HTaps / 6.0, 1.0));
5660 } else {
5661 *PSCL_THROUGHPUT = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput);
5662 }
5663
5664 DPPCLKUsingSingleDPPLuma = PixelClock * dml_max3(VTaps / 6 * dml_min(1, HRatio), HRatio * VRatio / *PSCL_THROUGHPUT, 1);
5665
5666 if ((HTaps > 6 || VTaps > 6) && DPPCLKUsingSingleDPPLuma < 2 * PixelClock)
5667 DPPCLKUsingSingleDPPLuma = 2 * PixelClock;
5668
5669 if ((SourcePixelFormat != dml_420_8 && SourcePixelFormat != dml_420_10 && SourcePixelFormat != dml_420_12 && SourcePixelFormat != dml_rgbe_alpha)) {
5670 *PSCL_THROUGHPUT_CHROMA = 0;
5671 *DPPCLKUsingSingleDPP = DPPCLKUsingSingleDPPLuma;
5672 } else {
5673 if (HRatioChroma > 1) {
5674 *PSCL_THROUGHPUT_CHROMA = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput * HRatioChroma / dml_ceil((dml_float_t) HTapsChroma / 6.0, 1.0));
5675 } else {
5676 *PSCL_THROUGHPUT_CHROMA = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput);
5677 }
5678 DPPCLKUsingSingleDPPChroma = PixelClock * dml_max3(VTapsChroma / 6 * dml_min(1, HRatioChroma),
5679 HRatioChroma * VRatioChroma / *PSCL_THROUGHPUT_CHROMA, 1);
5680 if ((HTapsChroma > 6 || VTapsChroma > 6) && DPPCLKUsingSingleDPPChroma < 2 * PixelClock)
5681 DPPCLKUsingSingleDPPChroma = 2 * PixelClock;
5682 *DPPCLKUsingSingleDPP = dml_max(DPPCLKUsingSingleDPPLuma, DPPCLKUsingSingleDPPChroma);
5683 }
5684 }
5685
5686 /// @brief Calculate the actual dppclk freq
5687 /// @param DPPCLKUsingSingleDPP DppClk freq required if there is only 1 DPP per plane
5688 /// @param DPPPerSurface Number of DPP for each plane
CalculateDPPCLK(dml_uint_t NumberOfActiveSurfaces,dml_float_t DISPCLKDPPCLKDSCCLKDownSpreading,dml_float_t DISPCLKDPPCLKVCOSpeed,dml_float_t DPPCLKUsingSingleDPP[],dml_uint_t DPPPerSurface[],dml_float_t * GlobalDPPCLK,dml_float_t Dppclk[])5689 static void CalculateDPPCLK(
5690 dml_uint_t NumberOfActiveSurfaces,
5691 dml_float_t DISPCLKDPPCLKDSCCLKDownSpreading,
5692 dml_float_t DISPCLKDPPCLKVCOSpeed,
5693 dml_float_t DPPCLKUsingSingleDPP[],
5694 dml_uint_t DPPPerSurface[],
5695
5696 // Output
5697 dml_float_t *GlobalDPPCLK,
5698 dml_float_t Dppclk[])
5699 {
5700 *GlobalDPPCLK = 0;
5701 for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
5702 Dppclk[k] = DPPCLKUsingSingleDPP[k] / DPPPerSurface[k] * (1 + DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
5703 *GlobalDPPCLK = dml_max(*GlobalDPPCLK, Dppclk[k]);
5704 }
5705 *GlobalDPPCLK = RoundToDFSGranularity(*GlobalDPPCLK, 1, DISPCLKDPPCLKVCOSpeed);
5706
5707 dml_print("DML::%s: GlobalDPPCLK = %f\n", __func__, *GlobalDPPCLK);
5708 for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
5709 Dppclk[k] = *GlobalDPPCLK / 255.0 * dml_ceil(Dppclk[k] * 255.0 / *GlobalDPPCLK, 1.0);
5710 dml_print("DML::%s: Dppclk[%0d] = %f\n", __func__, k, Dppclk[k]);
5711 }
5712 }
5713
CalculateMALLUseForStaticScreen(dml_uint_t NumberOfActiveSurfaces,dml_uint_t MALLAllocatedForDCNFinal,enum dml_use_mall_for_static_screen_mode * UseMALLForStaticScreen,dml_uint_t SurfaceSizeInMALL[],dml_bool_t one_row_per_frame_fits_in_buffer[],dml_bool_t UsesMALLForStaticScreen[])5714 static void CalculateMALLUseForStaticScreen(
5715 dml_uint_t NumberOfActiveSurfaces,
5716 dml_uint_t MALLAllocatedForDCNFinal,
5717 enum dml_use_mall_for_static_screen_mode *UseMALLForStaticScreen,
5718 dml_uint_t SurfaceSizeInMALL[],
5719 dml_bool_t one_row_per_frame_fits_in_buffer[],
5720
5721 // Output
5722 dml_bool_t UsesMALLForStaticScreen[])
5723 {
5724
5725 dml_uint_t SurfaceToAddToMALL;
5726 dml_bool_t CanAddAnotherSurfaceToMALL;
5727 dml_uint_t TotalSurfaceSizeInMALL;
5728
5729 TotalSurfaceSizeInMALL = 0;
5730 for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
5731 UsesMALLForStaticScreen[k] = (UseMALLForStaticScreen[k] == dml_use_mall_static_screen_enable);
5732 if (UsesMALLForStaticScreen[k])
5733 TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k];
5734 #ifdef __DML_VBA_DEBUG__
5735 dml_print("DML::%s: k=%u, UsesMALLForStaticScreen = %u\n", __func__, k, UsesMALLForStaticScreen[k]);
5736 dml_print("DML::%s: k=%u, TotalSurfaceSizeInMALL = %u\n", __func__, k, TotalSurfaceSizeInMALL);
5737 #endif
5738 }
5739
5740 SurfaceToAddToMALL = 0;
5741 CanAddAnotherSurfaceToMALL = true;
5742 while (CanAddAnotherSurfaceToMALL) {
5743 CanAddAnotherSurfaceToMALL = false;
5744 for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
5745 if (TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k] <= MALLAllocatedForDCNFinal * 1024 * 1024 &&
5746 !UsesMALLForStaticScreen[k] && UseMALLForStaticScreen[k] != dml_use_mall_static_screen_disable && one_row_per_frame_fits_in_buffer[k] &&
5747 (!CanAddAnotherSurfaceToMALL || SurfaceSizeInMALL[k] < SurfaceSizeInMALL[SurfaceToAddToMALL])) {
5748 CanAddAnotherSurfaceToMALL = true;
5749 SurfaceToAddToMALL = k;
5750 dml_print("DML::%s: k=%u, UseMALLForStaticScreen = %u (dis, en, optimize)\n", __func__, k, UseMALLForStaticScreen[k]);
5751 }
5752 }
5753 if (CanAddAnotherSurfaceToMALL) {
5754 UsesMALLForStaticScreen[SurfaceToAddToMALL] = true;
5755 TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[SurfaceToAddToMALL];
5756
5757 #ifdef __DML_VBA_DEBUG__
5758 dml_print("DML::%s: SurfaceToAddToMALL = %u\n", __func__, SurfaceToAddToMALL);
5759 dml_print("DML::%s: TotalSurfaceSizeInMALL = %u\n", __func__, TotalSurfaceSizeInMALL);
5760 #endif
5761 }
5762 }
5763 }
5764
5765 // @brief Calculate return bw for VM only traffic
dml_get_return_bw_mbps_vm_only(const struct soc_bounding_box_st * soc,dml_bool_t use_ideal_dram_bw_strobe,dml_bool_t HostVMEnable,dml_float_t DCFCLK,dml_float_t FabricClock,dml_float_t DRAMSpeed)5766 dml_float_t dml_get_return_bw_mbps_vm_only(
5767 const struct soc_bounding_box_st *soc,
5768 dml_bool_t use_ideal_dram_bw_strobe,
5769 dml_bool_t HostVMEnable,
5770 dml_float_t DCFCLK,
5771 dml_float_t FabricClock,
5772 dml_float_t DRAMSpeed)
5773 {
5774 dml_float_t VMDataOnlyReturnBW =
5775 dml_min3(soc->return_bus_width_bytes * DCFCLK * soc->pct_ideal_sdp_bw_after_urgent / 100.0,
5776 FabricClock * soc->fabric_datapath_to_dcn_data_return_bytes * soc->pct_ideal_sdp_bw_after_urgent / 100.0,
5777 DRAMSpeed * soc->num_chans * soc->dram_channel_width_bytes *
5778 ((use_ideal_dram_bw_strobe && !HostVMEnable) ? soc->pct_ideal_dram_bw_after_urgent_strobe : soc->pct_ideal_dram_bw_after_urgent_vm_only) / 100.0);
5779 #ifdef __DML_VBA_DEBUG__
5780 dml_print("DML::%s: use_ideal_dram_bw_strobe = %u\n", __func__, use_ideal_dram_bw_strobe);
5781 dml_print("DML::%s: HostVMEnable = %u\n", __func__, HostVMEnable);
5782 dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK);
5783 dml_print("DML::%s: FabricClock = %f\n", __func__, FabricClock);
5784 dml_print("DML::%s: DRAMSpeed = %f\n", __func__, DRAMSpeed);
5785 dml_print("DML::%s: VMDataOnlyReturnBW = %f\n", __func__, VMDataOnlyReturnBW);
5786 #endif
5787 return VMDataOnlyReturnBW;
5788 }
5789
5790 // Function: dml_get_return_bw_mbps
5791 // Megabyte per second
dml_get_return_bw_mbps(const struct soc_bounding_box_st * soc,dml_bool_t use_ideal_dram_bw_strobe,dml_bool_t HostVMEnable,dml_float_t DCFCLK,dml_float_t FabricClock,dml_float_t DRAMSpeed)5792 dml_float_t dml_get_return_bw_mbps(
5793 const struct soc_bounding_box_st *soc,
5794 dml_bool_t use_ideal_dram_bw_strobe,
5795 dml_bool_t HostVMEnable,
5796 dml_float_t DCFCLK,
5797 dml_float_t FabricClock,
5798 dml_float_t DRAMSpeed)
5799 {
5800 dml_float_t ReturnBW = 0.;
5801 dml_float_t IdealSDPPortBandwidth = soc->return_bus_width_bytes * DCFCLK;
5802 dml_float_t IdealFabricBandwidth = FabricClock * soc->fabric_datapath_to_dcn_data_return_bytes;
5803 dml_float_t IdealDRAMBandwidth = DRAMSpeed * soc->num_chans * soc->dram_channel_width_bytes;
5804 dml_float_t PixelDataOnlyReturnBW = dml_min3(IdealSDPPortBandwidth * soc->pct_ideal_sdp_bw_after_urgent / 100,
5805 IdealFabricBandwidth * soc->pct_ideal_fabric_bw_after_urgent / 100,
5806 IdealDRAMBandwidth * ((use_ideal_dram_bw_strobe && !HostVMEnable) ? soc->pct_ideal_dram_bw_after_urgent_strobe :
5807 soc->pct_ideal_dram_bw_after_urgent_pixel_only) / 100);
5808 dml_float_t PixelMixedWithVMDataReturnBW = dml_min3(IdealSDPPortBandwidth * soc->pct_ideal_sdp_bw_after_urgent / 100,
5809 IdealFabricBandwidth * soc->pct_ideal_fabric_bw_after_urgent / 100,
5810 IdealDRAMBandwidth * ((use_ideal_dram_bw_strobe && !HostVMEnable) ? soc->pct_ideal_dram_bw_after_urgent_strobe :
5811 soc->pct_ideal_dram_bw_after_urgent_pixel_and_vm) / 100);
5812
5813 if (HostVMEnable != true) {
5814 ReturnBW = PixelDataOnlyReturnBW;
5815 } else {
5816 ReturnBW = PixelMixedWithVMDataReturnBW;
5817 }
5818
5819 #ifdef __DML_VBA_DEBUG__
5820 dml_print("DML::%s: use_ideal_dram_bw_strobe = %u\n", __func__, use_ideal_dram_bw_strobe);
5821 dml_print("DML::%s: HostVMEnable = %u\n", __func__, HostVMEnable);
5822 dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK);
5823 dml_print("DML::%s: FabricClock = %f\n", __func__, FabricClock);
5824 dml_print("DML::%s: DRAMSpeed = %f\n", __func__, DRAMSpeed);
5825 dml_print("DML::%s: IdealSDPPortBandwidth = %f\n", __func__, IdealSDPPortBandwidth);
5826 dml_print("DML::%s: IdealFabricBandwidth = %f\n", __func__, IdealFabricBandwidth);
5827 dml_print("DML::%s: IdealDRAMBandwidth = %f\n", __func__, IdealDRAMBandwidth);
5828 dml_print("DML::%s: PixelDataOnlyReturnBW = %f\n", __func__, PixelDataOnlyReturnBW);
5829 dml_print("DML::%s: PixelMixedWithVMDataReturnBW = %f\n", __func__, PixelMixedWithVMDataReturnBW);
5830 dml_print("DML::%s: ReturnBW = %f MBps\n", __func__, ReturnBW);
5831 #endif
5832 return ReturnBW;
5833 }
5834
5835 // Function: dml_get_return_dram_bw_mbps
5836 // Megabyte per second
dml_get_return_dram_bw_mbps(const struct soc_bounding_box_st * soc,dml_bool_t use_ideal_dram_bw_strobe,dml_bool_t HostVMEnable,dml_float_t DRAMSpeed)5837 static dml_float_t dml_get_return_dram_bw_mbps(
5838 const struct soc_bounding_box_st *soc,
5839 dml_bool_t use_ideal_dram_bw_strobe,
5840 dml_bool_t HostVMEnable,
5841 dml_float_t DRAMSpeed)
5842 {
5843 dml_float_t ReturnDRAMBW = 0.;
5844 dml_float_t IdealDRAMBandwidth = DRAMSpeed * soc->num_chans * soc->dram_channel_width_bytes;
5845 dml_float_t PixelDataOnlyReturnBW = IdealDRAMBandwidth * ((use_ideal_dram_bw_strobe && !HostVMEnable) ? soc->pct_ideal_dram_bw_after_urgent_strobe :
5846 soc->pct_ideal_dram_bw_after_urgent_pixel_only) / 100;
5847 dml_float_t PixelMixedWithVMDataReturnBW = IdealDRAMBandwidth * ((use_ideal_dram_bw_strobe && !HostVMEnable) ? soc->pct_ideal_dram_bw_after_urgent_strobe :
5848 soc->pct_ideal_dram_bw_after_urgent_pixel_and_vm) / 100;
5849
5850 if (HostVMEnable != true) {
5851 ReturnDRAMBW = PixelDataOnlyReturnBW;
5852 } else {
5853 ReturnDRAMBW = PixelMixedWithVMDataReturnBW;
5854 }
5855
5856 #ifdef __DML_VBA_DEBUG__
5857 dml_print("DML::%s: use_ideal_dram_bw_strobe = %u\n", __func__, use_ideal_dram_bw_strobe);
5858 dml_print("DML::%s: HostVMEnable = %u\n", __func__, HostVMEnable);
5859 dml_print("DML::%s: DRAMSpeed = %f\n", __func__, DRAMSpeed);
5860 dml_print("DML::%s: IdealDRAMBandwidth = %f\n", __func__, IdealDRAMBandwidth);
5861 dml_print("DML::%s: PixelDataOnlyReturnBW = %f\n", __func__, PixelDataOnlyReturnBW);
5862 dml_print("DML::%s: PixelMixedWithVMDataReturnBW = %f\n", __func__, PixelMixedWithVMDataReturnBW);
5863 dml_print("DML::%s: ReturnDRAMBW = %f MBps\n", __func__, ReturnDRAMBW);
5864 #endif
5865 return ReturnDRAMBW;
5866 }
5867
5868 /// @brief BACKEND
DSCDelayRequirement(dml_bool_t DSCEnabled,enum dml_odm_mode ODMMode,dml_uint_t DSCInputBitPerComponent,dml_float_t OutputBpp,dml_uint_t HActive,dml_uint_t HTotal,dml_uint_t NumberOfDSCSlices,enum dml_output_format_class OutputFormat,enum dml_output_encoder_class Output,dml_float_t PixelClock,dml_float_t PixelClockBackEnd)5869 static dml_uint_t DSCDelayRequirement(
5870 dml_bool_t DSCEnabled,
5871 enum dml_odm_mode ODMMode,
5872 dml_uint_t DSCInputBitPerComponent,
5873 dml_float_t OutputBpp,
5874 dml_uint_t HActive,
5875 dml_uint_t HTotal,
5876 dml_uint_t NumberOfDSCSlices,
5877 enum dml_output_format_class OutputFormat,
5878 enum dml_output_encoder_class Output,
5879 dml_float_t PixelClock,
5880 dml_float_t PixelClockBackEnd)
5881 {
5882 dml_uint_t DSCDelayRequirement_val = 0;
5883
5884 if (DSCEnabled == true && OutputBpp != 0) {
5885 if (ODMMode == dml_odm_mode_combine_4to1) {
5886 DSCDelayRequirement_val = dscceComputeDelay(DSCInputBitPerComponent, OutputBpp, (dml_uint_t)(dml_ceil((dml_float_t) HActive / (dml_float_t) NumberOfDSCSlices, 1.0)),
5887 (dml_uint_t) (NumberOfDSCSlices / 4.0), OutputFormat, Output) + dscComputeDelay(OutputFormat, Output);
5888 } else if (ODMMode == dml_odm_mode_combine_2to1) {
5889 DSCDelayRequirement_val = dscceComputeDelay(DSCInputBitPerComponent, OutputBpp, (dml_uint_t)(dml_ceil((dml_float_t) HActive / (dml_float_t) NumberOfDSCSlices, 1.0)),
5890 (dml_uint_t) (NumberOfDSCSlices / 2.0), OutputFormat, Output) + dscComputeDelay(OutputFormat, Output);
5891 } else {
5892 DSCDelayRequirement_val = dscceComputeDelay(DSCInputBitPerComponent, OutputBpp, (dml_uint_t)((dml_float_t) dml_ceil(HActive / (dml_float_t) NumberOfDSCSlices, 1.0)),
5893 NumberOfDSCSlices, OutputFormat, Output) + dscComputeDelay(OutputFormat, Output);
5894 }
5895 DSCDelayRequirement_val = (dml_uint_t)(DSCDelayRequirement_val + (HTotal - HActive) * dml_ceil((dml_float_t) DSCDelayRequirement_val / (dml_float_t) HActive, 1.0));
5896 DSCDelayRequirement_val = (dml_uint_t)(DSCDelayRequirement_val * PixelClock / PixelClockBackEnd);
5897
5898 } else {
5899 DSCDelayRequirement_val = 0;
5900 }
5901 #ifdef __DML_VBA_DEBUG__
5902 dml_print("DML::%s: DSCEnabled = %u\n", __func__, DSCEnabled);
5903 dml_print("DML::%s: ODMMode = %u\n", __func__, ODMMode);
5904 dml_print("DML::%s: OutputBpp = %f\n", __func__, OutputBpp);
5905 dml_print("DML::%s: HActive = %u\n", __func__, HActive);
5906 dml_print("DML::%s: HTotal = %u\n", __func__, HTotal);
5907 dml_print("DML::%s: PixelClock = %f\n", __func__, PixelClock);
5908 dml_print("DML::%s: PixelClockBackEnd = %f\n", __func__, PixelClockBackEnd);
5909 dml_print("DML::%s: OutputFormat = %u\n", __func__, OutputFormat);
5910 dml_print("DML::%s: DSCInputBitPerComponent = %u\n", __func__, DSCInputBitPerComponent);
5911 dml_print("DML::%s: NumberOfDSCSlices = %u\n", __func__, NumberOfDSCSlices);
5912 dml_print("DML::%s: DSCDelayRequirement_val = %u\n", __func__, DSCDelayRequirement_val);
5913 #endif
5914
5915 return DSCDelayRequirement_val;
5916 }
5917
CalculateVActiveBandwithSupport(dml_uint_t NumberOfActiveSurfaces,dml_float_t ReturnBW,dml_bool_t NotUrgentLatencyHiding[],dml_float_t ReadBandwidthLuma[],dml_float_t ReadBandwidthChroma[],dml_float_t cursor_bw[],dml_float_t meta_row_bandwidth[],dml_float_t dpte_row_bandwidth[],dml_uint_t NumberOfDPP[],dml_float_t UrgentBurstFactorLuma[],dml_float_t UrgentBurstFactorChroma[],dml_float_t UrgentBurstFactorCursor[])5918 static noinline_for_stack dml_bool_t CalculateVActiveBandwithSupport(dml_uint_t NumberOfActiveSurfaces,
5919 dml_float_t ReturnBW,
5920 dml_bool_t NotUrgentLatencyHiding[],
5921 dml_float_t ReadBandwidthLuma[],
5922 dml_float_t ReadBandwidthChroma[],
5923 dml_float_t cursor_bw[],
5924 dml_float_t meta_row_bandwidth[],
5925 dml_float_t dpte_row_bandwidth[],
5926 dml_uint_t NumberOfDPP[],
5927 dml_float_t UrgentBurstFactorLuma[],
5928 dml_float_t UrgentBurstFactorChroma[],
5929 dml_float_t UrgentBurstFactorCursor[])
5930 {
5931 dml_bool_t NotEnoughUrgentLatencyHiding = false;
5932 dml_bool_t CalculateVActiveBandwithSupport_val = false;
5933 dml_float_t VActiveBandwith = 0;
5934
5935 for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
5936 if (NotUrgentLatencyHiding[k]) {
5937 NotEnoughUrgentLatencyHiding = true;
5938 }
5939 }
5940
5941 for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
5942 VActiveBandwith = VActiveBandwith + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k] + NumberOfDPP[k] * meta_row_bandwidth[k] + NumberOfDPP[k] * dpte_row_bandwidth[k];
5943 }
5944
5945 CalculateVActiveBandwithSupport_val = (VActiveBandwith <= ReturnBW) && !NotEnoughUrgentLatencyHiding;
5946
5947 #ifdef __DML_VBA_DEBUG__
5948 dml_print("DML::%s: NotEnoughUrgentLatencyHiding = %u\n", __func__, NotEnoughUrgentLatencyHiding);
5949 dml_print("DML::%s: VActiveBandwith = %f\n", __func__, VActiveBandwith);
5950 dml_print("DML::%s: ReturnBW = %f\n", __func__, ReturnBW);
5951 dml_print("DML::%s: CalculateVActiveBandwithSupport_val = %u\n", __func__, CalculateVActiveBandwithSupport_val);
5952 #endif
5953 return CalculateVActiveBandwithSupport_val;
5954 }
5955
CalculatePrefetchBandwithSupport(dml_uint_t NumberOfActiveSurfaces,dml_float_t ReturnBW,enum dml_use_mall_for_pstate_change_mode UseMALLForPStateChange[],dml_bool_t NotUrgentLatencyHiding[],dml_float_t ReadBandwidthLuma[],dml_float_t ReadBandwidthChroma[],dml_float_t PrefetchBandwidthLuma[],dml_float_t PrefetchBandwidthChroma[],dml_float_t cursor_bw[],dml_float_t meta_row_bandwidth[],dml_float_t dpte_row_bandwidth[],dml_float_t cursor_bw_pre[],dml_float_t prefetch_vmrow_bw[],dml_uint_t NumberOfDPP[],dml_float_t UrgentBurstFactorLuma[],dml_float_t UrgentBurstFactorChroma[],dml_float_t UrgentBurstFactorCursor[],dml_float_t UrgentBurstFactorLumaPre[],dml_float_t UrgentBurstFactorChromaPre[],dml_float_t UrgentBurstFactorCursorPre[],dml_float_t * PrefetchBandwidth,dml_float_t * PrefetchBandwidthNotIncludingMALLPrefetch,dml_float_t * FractionOfUrgentBandwidth,dml_bool_t * PrefetchBandwidthSupport)5956 static void CalculatePrefetchBandwithSupport(
5957 dml_uint_t NumberOfActiveSurfaces,
5958 dml_float_t ReturnBW,
5959 enum dml_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
5960 dml_bool_t NotUrgentLatencyHiding[],
5961 dml_float_t ReadBandwidthLuma[],
5962 dml_float_t ReadBandwidthChroma[],
5963 dml_float_t PrefetchBandwidthLuma[],
5964 dml_float_t PrefetchBandwidthChroma[],
5965 dml_float_t cursor_bw[],
5966 dml_float_t meta_row_bandwidth[],
5967 dml_float_t dpte_row_bandwidth[],
5968 dml_float_t cursor_bw_pre[],
5969 dml_float_t prefetch_vmrow_bw[],
5970 dml_uint_t NumberOfDPP[],
5971 dml_float_t UrgentBurstFactorLuma[],
5972 dml_float_t UrgentBurstFactorChroma[],
5973 dml_float_t UrgentBurstFactorCursor[],
5974 dml_float_t UrgentBurstFactorLumaPre[],
5975 dml_float_t UrgentBurstFactorChromaPre[],
5976 dml_float_t UrgentBurstFactorCursorPre[],
5977
5978 // Output
5979 dml_float_t *PrefetchBandwidth,
5980 dml_float_t *PrefetchBandwidthNotIncludingMALLPrefetch,
5981 dml_float_t *FractionOfUrgentBandwidth,
5982 dml_bool_t *PrefetchBandwidthSupport)
5983 {
5984 dml_bool_t NotEnoughUrgentLatencyHiding = false;
5985 for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
5986 if (NotUrgentLatencyHiding[k]) {
5987 NotEnoughUrgentLatencyHiding = true;
5988 }
5989 }
5990
5991 *PrefetchBandwidth = 0;
5992 for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
5993 *PrefetchBandwidth = *PrefetchBandwidth + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k],
5994 ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k] + NumberOfDPP[k] * (meta_row_bandwidth[k] + dpte_row_bandwidth[k]),
5995 NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
5996 }
5997
5998 *PrefetchBandwidthNotIncludingMALLPrefetch = 0;
5999 for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
6000 if (UseMALLForPStateChange[k] != dml_use_mall_pstate_change_phantom_pipe)
6001 *PrefetchBandwidthNotIncludingMALLPrefetch = *PrefetchBandwidthNotIncludingMALLPrefetch
6002 + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k],
6003 ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k]
6004 + cursor_bw[k] * UrgentBurstFactorCursor[k]
6005 + NumberOfDPP[k] * (meta_row_bandwidth[k] + dpte_row_bandwidth[k]),
6006 NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k]
6007 + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k])
6008 + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
6009 }
6010
6011 *PrefetchBandwidthSupport = (*PrefetchBandwidth <= ReturnBW) && !NotEnoughUrgentLatencyHiding;
6012 *FractionOfUrgentBandwidth = *PrefetchBandwidth / ReturnBW;
6013
6014 #ifdef __DML_VBA_DEBUG__
6015 dml_print("DML::%s: ReturnBW = %f\n", __func__, ReturnBW);
6016 dml_print("DML::%s: PrefetchBandwidth = %f\n", __func__, *PrefetchBandwidth);
6017 dml_print("DML::%s: FractionOfUrgentBandwidth = %f\n", __func__, *FractionOfUrgentBandwidth);
6018 dml_print("DML::%s: PrefetchBandwidthSupport = %u\n", __func__, *PrefetchBandwidthSupport);
6019 #endif
6020 }
6021
CalculateBandwidthAvailableForImmediateFlip(dml_uint_t NumberOfActiveSurfaces,dml_float_t ReturnBW,dml_float_t ReadBandwidthLuma[],dml_float_t ReadBandwidthChroma[],dml_float_t PrefetchBandwidthLuma[],dml_float_t PrefetchBandwidthChroma[],dml_float_t cursor_bw[],dml_float_t cursor_bw_pre[],dml_uint_t NumberOfDPP[],dml_float_t UrgentBurstFactorLuma[],dml_float_t UrgentBurstFactorChroma[],dml_float_t UrgentBurstFactorCursor[],dml_float_t UrgentBurstFactorLumaPre[],dml_float_t UrgentBurstFactorChromaPre[],dml_float_t UrgentBurstFactorCursorPre[])6022 static noinline_for_stack dml_float_t CalculateBandwidthAvailableForImmediateFlip(
6023 dml_uint_t NumberOfActiveSurfaces,
6024 dml_float_t ReturnBW,
6025 dml_float_t ReadBandwidthLuma[],
6026 dml_float_t ReadBandwidthChroma[],
6027 dml_float_t PrefetchBandwidthLuma[],
6028 dml_float_t PrefetchBandwidthChroma[],
6029 dml_float_t cursor_bw[],
6030 dml_float_t cursor_bw_pre[],
6031 dml_uint_t NumberOfDPP[],
6032 dml_float_t UrgentBurstFactorLuma[],
6033 dml_float_t UrgentBurstFactorChroma[],
6034 dml_float_t UrgentBurstFactorCursor[],
6035 dml_float_t UrgentBurstFactorLumaPre[],
6036 dml_float_t UrgentBurstFactorChromaPre[],
6037 dml_float_t UrgentBurstFactorCursorPre[])
6038 {
6039 dml_float_t ret_val = ReturnBW;
6040
6041 for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
6042 ret_val = ret_val - dml_max(ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k],
6043 NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) +
6044 cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
6045 #ifdef __DML_VBA_DEBUG__
6046 dml_print("DML::%s: k=%u\n", __func__, k);
6047 dml_print("DML::%s: NumberOfDPP = %u\n", __func__, NumberOfDPP[k]);
6048 dml_print("DML::%s: ReadBandwidthLuma = %f\n", __func__, ReadBandwidthLuma[k]);
6049 dml_print("DML::%s: UrgentBurstFactorLuma = %f\n", __func__, UrgentBurstFactorLuma[k]);
6050 dml_print("DML::%s: ReadBandwidthChroma = %f\n", __func__, ReadBandwidthChroma[k]);
6051 dml_print("DML::%s: UrgentBurstFactorChroma = %f\n", __func__, UrgentBurstFactorChroma[k]);
6052 dml_print("DML::%s: cursor_bw = %f\n", __func__, cursor_bw[k]);
6053 dml_print("DML::%s: UrgentBurstFactorCursor = %f\n", __func__, UrgentBurstFactorCursor[k]);
6054
6055 dml_print("DML::%s: PrefetchBandwidthLuma = %f\n", __func__, PrefetchBandwidthLuma[k]);
6056 dml_print("DML::%s: UrgentBurstFactorLumaPre = %f\n", __func__, UrgentBurstFactorLumaPre[k]);
6057 dml_print("DML::%s: PrefetchBandwidthChroma = %f\n", __func__, PrefetchBandwidthChroma[k]);
6058 dml_print("DML::%s: UrgentBurstFactorChromaPre = %f\n", __func__, UrgentBurstFactorChromaPre[k]);
6059 dml_print("DML::%s: cursor_bw_pre = %f\n", __func__, cursor_bw_pre[k]);
6060 dml_print("DML::%s: UrgentBurstFactorCursorPre = %f\n", __func__, UrgentBurstFactorCursorPre[k]);
6061 dml_print("DML::%s: ret_val = %f\n", __func__, ret_val);
6062 #endif
6063 }
6064
6065 return ret_val;
6066 }
6067
CalculateImmediateFlipBandwithSupport(dml_uint_t NumberOfActiveSurfaces,dml_float_t ReturnBW,enum dml_use_mall_for_pstate_change_mode UseMALLForPStateChange[],enum dml_immediate_flip_requirement ImmediateFlipRequirement[],dml_float_t final_flip_bw[],dml_float_t ReadBandwidthLuma[],dml_float_t ReadBandwidthChroma[],dml_float_t PrefetchBandwidthLuma[],dml_float_t PrefetchBandwidthChroma[],dml_float_t cursor_bw[],dml_float_t meta_row_bandwidth[],dml_float_t dpte_row_bandwidth[],dml_float_t cursor_bw_pre[],dml_float_t prefetch_vmrow_bw[],dml_uint_t NumberOfDPP[],dml_float_t UrgentBurstFactorLuma[],dml_float_t UrgentBurstFactorChroma[],dml_float_t UrgentBurstFactorCursor[],dml_float_t UrgentBurstFactorLumaPre[],dml_float_t UrgentBurstFactorChromaPre[],dml_float_t UrgentBurstFactorCursorPre[],dml_float_t * TotalBandwidth,dml_float_t * TotalBandwidthNotIncludingMALLPrefetch,dml_float_t * FractionOfUrgentBandwidth,dml_bool_t * ImmediateFlipBandwidthSupport)6068 static void CalculateImmediateFlipBandwithSupport(
6069 dml_uint_t NumberOfActiveSurfaces,
6070 dml_float_t ReturnBW,
6071 enum dml_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
6072 enum dml_immediate_flip_requirement ImmediateFlipRequirement[],
6073 dml_float_t final_flip_bw[],
6074 dml_float_t ReadBandwidthLuma[],
6075 dml_float_t ReadBandwidthChroma[],
6076 dml_float_t PrefetchBandwidthLuma[],
6077 dml_float_t PrefetchBandwidthChroma[],
6078 dml_float_t cursor_bw[],
6079 dml_float_t meta_row_bandwidth[],
6080 dml_float_t dpte_row_bandwidth[],
6081 dml_float_t cursor_bw_pre[],
6082 dml_float_t prefetch_vmrow_bw[],
6083 dml_uint_t NumberOfDPP[],
6084 dml_float_t UrgentBurstFactorLuma[],
6085 dml_float_t UrgentBurstFactorChroma[],
6086 dml_float_t UrgentBurstFactorCursor[],
6087 dml_float_t UrgentBurstFactorLumaPre[],
6088 dml_float_t UrgentBurstFactorChromaPre[],
6089 dml_float_t UrgentBurstFactorCursorPre[],
6090
6091 // Output
6092 dml_float_t *TotalBandwidth,
6093 dml_float_t *TotalBandwidthNotIncludingMALLPrefetch,
6094 dml_float_t *FractionOfUrgentBandwidth,
6095 dml_bool_t *ImmediateFlipBandwidthSupport)
6096 {
6097 *TotalBandwidth = 0;
6098 for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
6099 if (ImmediateFlipRequirement[k] != dml_immediate_flip_not_required) {
6100
6101
6102
6103 *TotalBandwidth = *TotalBandwidth + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k],
6104 NumberOfDPP[k] * final_flip_bw[k] + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k],
6105 NumberOfDPP[k] * (final_flip_bw[k] + PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
6106 } else {
6107 *TotalBandwidth = *TotalBandwidth + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k],
6108 NumberOfDPP[k] * (meta_row_bandwidth[k] + dpte_row_bandwidth[k]) + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k],
6109 NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
6110 }
6111 #ifdef __DML_VBA_DEBUG__
6112 dml_print("DML::%s: k = %u\n", __func__, k);
6113 dml_print("DML::%s: ImmediateFlipRequirement = %u\n", __func__, ImmediateFlipRequirement[k]);
6114 dml_print("DML::%s: TotalBandwidth = %f\n", __func__, *TotalBandwidth);
6115 dml_print("DML::%s: NumberOfDPP = %u\n", __func__, NumberOfDPP[k]);
6116 dml_print("DML::%s: prefetch_vmrow_bw = %f\n", __func__, prefetch_vmrow_bw[k]);
6117 dml_print("DML::%s: final_flip_bw = %f\n", __func__, final_flip_bw[k]);
6118 dml_print("DML::%s: ReadBandwidthLuma = %f\n", __func__, ReadBandwidthLuma[k]);
6119 dml_print("DML::%s: UrgentBurstFactorLuma = %f\n", __func__, UrgentBurstFactorLuma[k]);
6120 dml_print("DML::%s: ReadBandwidthChroma = %f\n", __func__, ReadBandwidthChroma[k]);
6121 dml_print("DML::%s: UrgentBurstFactorChroma = %f\n", __func__, UrgentBurstFactorChroma[k]);
6122 dml_print("DML::%s: cursor_bw = %f\n", __func__, cursor_bw[k]);
6123 dml_print("DML::%s: UrgentBurstFactorCursor = %f\n", __func__, UrgentBurstFactorCursor[k]);
6124 dml_print("DML::%s: PrefetchBandwidthLuma = %f\n", __func__, PrefetchBandwidthLuma[k]);
6125 dml_print("DML::%s: UrgentBurstFactorLumaPre = %f\n", __func__, UrgentBurstFactorLumaPre[k]);
6126 dml_print("DML::%s: PrefetchBandwidthChroma = %f\n", __func__, PrefetchBandwidthChroma[k]);
6127 dml_print("DML::%s: UrgentBurstFactorChromaPre = %f\n", __func__, UrgentBurstFactorChromaPre[k]);
6128 dml_print("DML::%s: cursor_bw_pre = %f\n", __func__, cursor_bw_pre[k]);
6129 dml_print("DML::%s: UrgentBurstFactorCursorPre = %f\n", __func__, UrgentBurstFactorCursorPre[k]);
6130 dml_print("DML::%s: meta_row_bandwidth = %f\n", __func__, meta_row_bandwidth[k]);
6131 dml_print("DML::%s: dpte_row_bandwidth = %f\n", __func__, dpte_row_bandwidth[k]);
6132 #endif
6133 }
6134
6135 *TotalBandwidthNotIncludingMALLPrefetch = 0;
6136 for (dml_uint_t k = 0; k < NumberOfActiveSurfaces; ++k) {
6137 if (UseMALLForPStateChange[k] != dml_use_mall_pstate_change_phantom_pipe) {
6138 if (ImmediateFlipRequirement[k] != dml_immediate_flip_not_required)
6139 *TotalBandwidthNotIncludingMALLPrefetch = *TotalBandwidthNotIncludingMALLPrefetch + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k],
6140 NumberOfDPP[k] * final_flip_bw[k] + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k],
6141 NumberOfDPP[k] * (final_flip_bw[k] + PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k])
6142 + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
6143 else
6144 *TotalBandwidthNotIncludingMALLPrefetch = *TotalBandwidthNotIncludingMALLPrefetch + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k],
6145 NumberOfDPP[k] * (meta_row_bandwidth[k] + dpte_row_bandwidth[k])
6146 + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k],
6147 NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k])
6148 + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
6149 }
6150 }
6151
6152 *ImmediateFlipBandwidthSupport = (*TotalBandwidth <= ReturnBW);
6153 *FractionOfUrgentBandwidth = *TotalBandwidth / ReturnBW;
6154 #ifdef __DML_VBA_DEBUG__
6155 dml_print("DML::%s: ReturnBW = %f\n", __func__, ReturnBW);
6156 dml_print("DML::%s: TotalBandwidth = %f\n", __func__, *TotalBandwidth);
6157 dml_print("DML::%s: ImmediateFlipBandwidthSupport = %u\n", __func__, *ImmediateFlipBandwidthSupport);
6158 #endif
6159 }
6160
MicroSecToVertLines(dml_uint_t num_us,dml_uint_t h_total,dml_float_t pixel_clock)6161 static dml_uint_t MicroSecToVertLines(dml_uint_t num_us, dml_uint_t h_total, dml_float_t pixel_clock)
6162 {
6163 dml_uint_t lines_time_in_ns = 1000.0 * (h_total * 1000.0) / (pixel_clock * 1000.0);
6164
6165 return dml_ceil(1000.0 * num_us / lines_time_in_ns, 1.0);
6166 }
6167
6168 /// @brief Calculate the maximum vstartup for mode support and mode programming consideration
6169 /// Bounded by min of actual vblank and input vblank_nom, dont want vstartup/ready to start too early if actual vbllank is huge
CalculateMaxVStartup(dml_uint_t plane_idx,dml_bool_t ptoi_supported,dml_uint_t vblank_nom_default_us,struct dml_timing_cfg_st * timing,dml_float_t write_back_delay_us)6170 static dml_uint_t CalculateMaxVStartup(
6171 dml_uint_t plane_idx,
6172 dml_bool_t ptoi_supported,
6173 dml_uint_t vblank_nom_default_us,
6174 struct dml_timing_cfg_st *timing,
6175 dml_float_t write_back_delay_us)
6176 {
6177 dml_uint_t vblank_size = 0;
6178 dml_uint_t max_vstartup_lines = 0;
6179 const dml_uint_t max_allowed_vblank_nom = 1023;
6180
6181 dml_float_t line_time_us = (dml_float_t) timing->HTotal[plane_idx] / timing->PixelClock[plane_idx];
6182 dml_uint_t vblank_actual = timing->VTotal[plane_idx] - timing->VActive[plane_idx];
6183
6184 dml_uint_t vblank_nom_default_in_line = MicroSecToVertLines(vblank_nom_default_us, timing->HTotal[plane_idx],
6185 timing->PixelClock[plane_idx]);
6186 dml_uint_t vblank_nom_input = (dml_uint_t)dml_min(vblank_actual, vblank_nom_default_in_line);
6187
6188 // vblank_nom should not be smaller than (VSync (VTotal - VActive - VFrontPorch) + 2)
6189 // + 2 is because
6190 // 1 -> VStartup_start should be 1 line before VSync
6191 // 1 -> always reserve 1 line between start of VBlank to VStartup signal
6192 dml_uint_t vblank_nom_vsync_capped = dml_max(vblank_nom_input,
6193 timing->VTotal[plane_idx] - timing->VActive[plane_idx] - timing->VFrontPorch[plane_idx] + 2);
6194 dml_uint_t vblank_nom_max_allowed_capped = dml_min(vblank_nom_vsync_capped, max_allowed_vblank_nom);
6195 dml_uint_t vblank_avail = (vblank_nom_max_allowed_capped == 0) ?
6196 vblank_nom_default_in_line : vblank_nom_max_allowed_capped;
6197
6198 vblank_size = (dml_uint_t) dml_min(vblank_actual, vblank_avail);
6199
6200 if (timing->Interlace[plane_idx] && !ptoi_supported)
6201 max_vstartup_lines = (dml_uint_t) (dml_floor(vblank_size/2.0, 1.0));
6202 else
6203 max_vstartup_lines = vblank_size - (dml_uint_t) dml_max(1.0, dml_ceil(write_back_delay_us/line_time_us, 1.0));
6204 #ifdef __DML_VBA_DEBUG__
6205 dml_print("DML::%s: plane_idx = %u\n", __func__, plane_idx);
6206 dml_print("DML::%s: VBlankNom = %u\n", __func__, timing->VBlankNom[plane_idx]);
6207 dml_print("DML::%s: vblank_nom_default_us = %u\n", __func__, vblank_nom_default_us);
6208 dml_print("DML::%s: line_time_us = %f\n", __func__, line_time_us);
6209 dml_print("DML::%s: vblank_actual = %u\n", __func__, vblank_actual);
6210 dml_print("DML::%s: vblank_avail = %u\n", __func__, vblank_avail);
6211 dml_print("DML::%s: max_vstartup_lines = %u\n", __func__, max_vstartup_lines);
6212 #endif
6213 return max_vstartup_lines;
6214 }
6215
set_calculate_prefetch_schedule_params(struct display_mode_lib_st * mode_lib,struct CalculatePrefetchSchedule_params_st * CalculatePrefetchSchedule_params,dml_uint_t j,dml_uint_t k)6216 static noinline_for_stack void set_calculate_prefetch_schedule_params(struct display_mode_lib_st *mode_lib,
6217 struct CalculatePrefetchSchedule_params_st *CalculatePrefetchSchedule_params,
6218 dml_uint_t j,
6219 dml_uint_t k)
6220 {
6221 CalculatePrefetchSchedule_params->DSCDelay = mode_lib->ms.DSCDelayPerState[k];
6222 CalculatePrefetchSchedule_params->EnhancedPrefetchScheduleAccelerationFinal = mode_lib->ms.policy.EnhancedPrefetchScheduleAccelerationFinal;
6223 CalculatePrefetchSchedule_params->DPPCLKDelaySubtotalPlusCNVCFormater = mode_lib->ms.ip.dppclk_delay_subtotal + mode_lib->ms.ip.dppclk_delay_cnvc_formatter;
6224 CalculatePrefetchSchedule_params->DPPCLKDelaySCL = mode_lib->ms.ip.dppclk_delay_scl;
6225 CalculatePrefetchSchedule_params->DPPCLKDelaySCLLBOnly = mode_lib->ms.ip.dppclk_delay_scl_lb_only;
6226 CalculatePrefetchSchedule_params->DPPCLKDelayCNVCCursor = mode_lib->ms.ip.dppclk_delay_cnvc_cursor;
6227 CalculatePrefetchSchedule_params->DISPCLKDelaySubtotal = mode_lib->ms.ip.dispclk_delay_subtotal;
6228 CalculatePrefetchSchedule_params->DPP_RECOUT_WIDTH = (dml_uint_t)(mode_lib->ms.SwathWidthYThisState[k] / mode_lib->ms.cache_display_cfg.plane.HRatio[k]);
6229 CalculatePrefetchSchedule_params->OutputFormat = mode_lib->ms.cache_display_cfg.output.OutputFormat[k];
6230 CalculatePrefetchSchedule_params->MaxInterDCNTileRepeaters = mode_lib->ms.ip.max_inter_dcn_tile_repeaters;
6231 CalculatePrefetchSchedule_params->GPUVMPageTableLevels = mode_lib->ms.cache_display_cfg.plane.GPUVMMaxPageTableLevels;
6232 CalculatePrefetchSchedule_params->GPUVMEnable = mode_lib->ms.cache_display_cfg.plane.GPUVMEnable;
6233 CalculatePrefetchSchedule_params->HostVMEnable = mode_lib->ms.cache_display_cfg.plane.HostVMEnable;
6234 CalculatePrefetchSchedule_params->HostVMMaxNonCachedPageTableLevels = mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels;
6235 CalculatePrefetchSchedule_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024;
6236 CalculatePrefetchSchedule_params->DynamicMetadataEnable = mode_lib->ms.cache_display_cfg.plane.DynamicMetadataEnable[k];
6237 CalculatePrefetchSchedule_params->DynamicMetadataVMEnabled = mode_lib->ms.ip.dynamic_metadata_vm_enabled;
6238 CalculatePrefetchSchedule_params->DynamicMetadataLinesBeforeActiveRequired = mode_lib->ms.cache_display_cfg.plane.DynamicMetadataLinesBeforeActiveRequired[k];
6239 CalculatePrefetchSchedule_params->DynamicMetadataTransmittedBytes = mode_lib->ms.cache_display_cfg.plane.DynamicMetadataTransmittedBytes[k];
6240 CalculatePrefetchSchedule_params->UrgentLatency = mode_lib->ms.UrgLatency;
6241 CalculatePrefetchSchedule_params->UrgentExtraLatency = mode_lib->ms.ExtraLatency;
6242 CalculatePrefetchSchedule_params->TCalc = mode_lib->ms.TimeCalc;
6243 CalculatePrefetchSchedule_params->PDEAndMetaPTEBytesFrame = mode_lib->ms.PDEAndMetaPTEBytesPerFrame[j][k];
6244 CalculatePrefetchSchedule_params->MetaRowByte = mode_lib->ms.MetaRowBytes[j][k];
6245 CalculatePrefetchSchedule_params->PixelPTEBytesPerRow = mode_lib->ms.DPTEBytesPerRow[j][k];
6246 CalculatePrefetchSchedule_params->PrefetchSourceLinesY = mode_lib->ms.PrefetchLinesY[j][k];
6247 CalculatePrefetchSchedule_params->VInitPreFillY = mode_lib->ms.PrefillY[k];
6248 CalculatePrefetchSchedule_params->MaxNumSwathY = mode_lib->ms.MaxNumSwY[k];
6249 CalculatePrefetchSchedule_params->PrefetchSourceLinesC = mode_lib->ms.PrefetchLinesC[j][k];
6250 CalculatePrefetchSchedule_params->VInitPreFillC = mode_lib->ms.PrefillC[k];
6251 CalculatePrefetchSchedule_params->MaxNumSwathC = mode_lib->ms.MaxNumSwC[k];
6252 CalculatePrefetchSchedule_params->swath_width_luma_ub = mode_lib->ms.swath_width_luma_ub_this_state[k];
6253 CalculatePrefetchSchedule_params->swath_width_chroma_ub = mode_lib->ms.swath_width_chroma_ub_this_state[k];
6254 CalculatePrefetchSchedule_params->SwathHeightY = mode_lib->ms.SwathHeightYThisState[k];
6255 CalculatePrefetchSchedule_params->SwathHeightC = mode_lib->ms.SwathHeightCThisState[k];
6256 CalculatePrefetchSchedule_params->TWait = mode_lib->ms.TWait;
6257 CalculatePrefetchSchedule_params->DestinationLinesForPrefetch = &mode_lib->ms.LineTimesForPrefetch[k];
6258 CalculatePrefetchSchedule_params->DestinationLinesToRequestVMInVBlank = &mode_lib->ms.LinesForMetaPTE[k];
6259 CalculatePrefetchSchedule_params->DestinationLinesToRequestRowInVBlank = &mode_lib->ms.LinesForMetaAndDPTERow[k];
6260 CalculatePrefetchSchedule_params->VRatioPrefetchY = &mode_lib->ms.VRatioPreY[j][k];
6261 CalculatePrefetchSchedule_params->VRatioPrefetchC = &mode_lib->ms.VRatioPreC[j][k];
6262 CalculatePrefetchSchedule_params->RequiredPrefetchPixDataBWLuma = &mode_lib->ms.RequiredPrefetchPixelDataBWLuma[k];
6263 CalculatePrefetchSchedule_params->RequiredPrefetchPixDataBWChroma = &mode_lib->ms.RequiredPrefetchPixelDataBWChroma[k];
6264 CalculatePrefetchSchedule_params->NotEnoughTimeForDynamicMetadata = &mode_lib->ms.support.NoTimeForDynamicMetadata[j][k];
6265 CalculatePrefetchSchedule_params->Tno_bw = &mode_lib->ms.Tno_bw[k];
6266 }
6267
dml_prefetch_check(struct display_mode_lib_st * mode_lib)6268 static noinline_for_stack void dml_prefetch_check(struct display_mode_lib_st *mode_lib)
6269 {
6270 struct dml_core_mode_support_locals_st *s = &mode_lib->scratch.dml_core_mode_support_locals;
6271 struct CalculatePrefetchSchedule_params_st *CalculatePrefetchSchedule_params = &mode_lib->scratch.CalculatePrefetchSchedule_params;
6272 struct CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params_st *CalculateWatermarks_params = &mode_lib->scratch.CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params;
6273 struct DmlPipe *myPipe;
6274 dml_uint_t j, k;
6275
6276 for (j = 0; j < 2; ++j) {
6277 mode_lib->ms.TimeCalc = 24 / mode_lib->ms.ProjectedDCFCLKDeepSleep[j];
6278
6279 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
6280 mode_lib->ms.NoOfDPPThisState[k] = mode_lib->ms.NoOfDPP[j][k];
6281 mode_lib->ms.swath_width_luma_ub_this_state[k] = mode_lib->ms.swath_width_luma_ub_all_states[j][k];
6282 mode_lib->ms.swath_width_chroma_ub_this_state[k] = mode_lib->ms.swath_width_chroma_ub_all_states[j][k];
6283 mode_lib->ms.SwathWidthYThisState[k] = mode_lib->ms.SwathWidthYAllStates[j][k];
6284 mode_lib->ms.SwathWidthCThisState[k] = mode_lib->ms.SwathWidthCAllStates[j][k];
6285 mode_lib->ms.SwathHeightYThisState[k] = mode_lib->ms.SwathHeightYAllStates[j][k];
6286 mode_lib->ms.SwathHeightCThisState[k] = mode_lib->ms.SwathHeightCAllStates[j][k];
6287 mode_lib->ms.UnboundedRequestEnabledThisState = mode_lib->ms.UnboundedRequestEnabledAllStates[j];
6288 mode_lib->ms.CompressedBufferSizeInkByteThisState = mode_lib->ms.CompressedBufferSizeInkByteAllStates[j];
6289 mode_lib->ms.DETBufferSizeInKByteThisState[k] = mode_lib->ms.DETBufferSizeInKByteAllStates[j][k];
6290 mode_lib->ms.DETBufferSizeYThisState[k] = mode_lib->ms.DETBufferSizeYAllStates[j][k];
6291 mode_lib->ms.DETBufferSizeCThisState[k] = mode_lib->ms.DETBufferSizeCAllStates[j][k];
6292 }
6293
6294 mode_lib->ms.support.VActiveBandwithSupport[j] = CalculateVActiveBandwithSupport(
6295 mode_lib->ms.num_active_planes,
6296 mode_lib->ms.ReturnBWPerState[j],
6297 mode_lib->ms.NotUrgentLatencyHiding,
6298 mode_lib->ms.ReadBandwidthLuma,
6299 mode_lib->ms.ReadBandwidthChroma,
6300 mode_lib->ms.cursor_bw,
6301 mode_lib->ms.meta_row_bandwidth_this_state,
6302 mode_lib->ms.dpte_row_bandwidth_this_state,
6303 mode_lib->ms.NoOfDPPThisState,
6304 mode_lib->ms.UrgentBurstFactorLuma[j],
6305 mode_lib->ms.UrgentBurstFactorChroma[j],
6306 mode_lib->ms.UrgentBurstFactorCursor[j]);
6307
6308 s->VMDataOnlyReturnBWPerState = dml_get_return_bw_mbps_vm_only(
6309 &mode_lib->ms.soc,
6310 mode_lib->ms.state.use_ideal_dram_bw_strobe,
6311 mode_lib->ms.cache_display_cfg.plane.HostVMEnable,
6312 mode_lib->ms.DCFCLKState[j],
6313 mode_lib->ms.state.fabricclk_mhz,
6314 mode_lib->ms.state.dram_speed_mts);
6315
6316 s->HostVMInefficiencyFactor = 1;
6317 if (mode_lib->ms.cache_display_cfg.plane.GPUVMEnable && mode_lib->ms.cache_display_cfg.plane.HostVMEnable)
6318 s->HostVMInefficiencyFactor = mode_lib->ms.ReturnBWPerState[j] / s->VMDataOnlyReturnBWPerState;
6319
6320 mode_lib->ms.ExtraLatency = CalculateExtraLatency(
6321 mode_lib->ms.soc.round_trip_ping_latency_dcfclk_cycles,
6322 s->ReorderingBytes,
6323 mode_lib->ms.DCFCLKState[j],
6324 mode_lib->ms.TotalNumberOfActiveDPP[j],
6325 mode_lib->ms.ip.pixel_chunk_size_kbytes,
6326 mode_lib->ms.TotalNumberOfDCCActiveDPP[j],
6327 mode_lib->ms.ip.meta_chunk_size_kbytes,
6328 mode_lib->ms.ReturnBWPerState[j],
6329 mode_lib->ms.cache_display_cfg.plane.GPUVMEnable,
6330 mode_lib->ms.cache_display_cfg.plane.HostVMEnable,
6331 mode_lib->ms.num_active_planes,
6332 mode_lib->ms.NoOfDPPThisState,
6333 mode_lib->ms.dpte_group_bytes,
6334 s->HostVMInefficiencyFactor,
6335 mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024,
6336 mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels);
6337
6338 s->NextMaxVStartup = s->MaxVStartupAllPlanes[j];
6339 s->MaxVStartup = 0;
6340 s->AllPrefetchModeTested = true;
6341 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
6342 CalculatePrefetchMode(mode_lib->ms.policy.AllowForPStateChangeOrStutterInVBlank[k], &s->MinPrefetchMode[k], &s->MaxPrefetchMode[k]);
6343 s->NextPrefetchMode[k] = s->MinPrefetchMode[k];
6344 }
6345
6346 do {
6347 s->MaxVStartup = s->NextMaxVStartup;
6348 s->AllPrefetchModeTested = true;
6349
6350 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
6351 mode_lib->ms.PrefetchMode[k] = s->NextPrefetchMode[k];
6352 mode_lib->ms.TWait = CalculateTWait(
6353 mode_lib->ms.PrefetchMode[k],
6354 mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange[k],
6355 mode_lib->ms.policy.SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
6356 mode_lib->ms.cache_display_cfg.timing.DRRDisplay[k],
6357 mode_lib->ms.state.dram_clock_change_latency_us,
6358 mode_lib->ms.state.fclk_change_latency_us,
6359 mode_lib->ms.UrgLatency,
6360 mode_lib->ms.state.sr_enter_plus_exit_time_us);
6361
6362 myPipe = &s->myPipe;
6363 myPipe->Dppclk = mode_lib->ms.RequiredDPPCLKPerSurface[j][k];
6364 myPipe->Dispclk = mode_lib->ms.RequiredDISPCLK[j];
6365 myPipe->PixelClock = mode_lib->ms.cache_display_cfg.timing.PixelClock[k];
6366 myPipe->DCFClkDeepSleep = mode_lib->ms.ProjectedDCFCLKDeepSleep[j];
6367 myPipe->DPPPerSurface = mode_lib->ms.NoOfDPP[j][k];
6368 myPipe->ScalerEnabled = mode_lib->ms.cache_display_cfg.plane.ScalerEnabled[k];
6369 myPipe->SourceScan = mode_lib->ms.cache_display_cfg.plane.SourceScan[k];
6370 myPipe->BlockWidth256BytesY = mode_lib->ms.Read256BlockWidthY[k];
6371 myPipe->BlockHeight256BytesY = mode_lib->ms.Read256BlockHeightY[k];
6372 myPipe->BlockWidth256BytesC = mode_lib->ms.Read256BlockWidthC[k];
6373 myPipe->BlockHeight256BytesC = mode_lib->ms.Read256BlockHeightC[k];
6374 myPipe->InterlaceEnable = mode_lib->ms.cache_display_cfg.timing.Interlace[k];
6375 myPipe->NumberOfCursors = mode_lib->ms.cache_display_cfg.plane.NumberOfCursors[k];
6376 myPipe->VBlank = mode_lib->ms.cache_display_cfg.timing.VTotal[k] - mode_lib->ms.cache_display_cfg.timing.VActive[k];
6377 myPipe->HTotal = mode_lib->ms.cache_display_cfg.timing.HTotal[k];
6378 myPipe->HActive = mode_lib->ms.cache_display_cfg.timing.HActive[k];
6379 myPipe->DCCEnable = mode_lib->ms.cache_display_cfg.surface.DCCEnable[k];
6380 myPipe->ODMMode = mode_lib->ms.ODMModePerState[k];
6381 myPipe->SourcePixelFormat = mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k];
6382 myPipe->BytePerPixelY = mode_lib->ms.BytePerPixelY[k];
6383 myPipe->BytePerPixelC = mode_lib->ms.BytePerPixelC[k];
6384 myPipe->ProgressiveToInterlaceUnitInOPP = mode_lib->ms.ip.ptoi_supported;
6385
6386 #ifdef __DML_VBA_DEBUG__
6387 dml_print("DML::%s: Calling CalculatePrefetchSchedule for j=%u, k=%u\n", __func__, j, k);
6388 dml_print("DML::%s: MaximumVStartup = %u\n", __func__, s->MaximumVStartup[j][k]);
6389 dml_print("DML::%s: MaxVStartup = %u\n", __func__, s->MaxVStartup);
6390 dml_print("DML::%s: NextPrefetchMode = %u\n", __func__, s->NextPrefetchMode[k]);
6391 dml_print("DML::%s: AllowForPStateChangeOrStutterInVBlank = %u\n", __func__, mode_lib->ms.policy.AllowForPStateChangeOrStutterInVBlank[k]);
6392 dml_print("DML::%s: PrefetchMode = %u\n", __func__, mode_lib->ms.PrefetchMode[k]);
6393 #endif
6394
6395 CalculatePrefetchSchedule_params->HostVMInefficiencyFactor = s->HostVMInefficiencyFactor;
6396 CalculatePrefetchSchedule_params->myPipe = myPipe;
6397 CalculatePrefetchSchedule_params->VStartup = (dml_uint_t)(dml_min(s->MaxVStartup, s->MaximumVStartup[j][k]));
6398 CalculatePrefetchSchedule_params->MaxVStartup = s->MaximumVStartup[j][k];
6399 CalculatePrefetchSchedule_params->DSTXAfterScaler = &s->DSTXAfterScaler[k];
6400 CalculatePrefetchSchedule_params->DSTYAfterScaler = &s->DSTYAfterScaler[k];
6401 CalculatePrefetchSchedule_params->prefetch_vmrow_bw = &mode_lib->ms.prefetch_vmrow_bw[k];
6402 CalculatePrefetchSchedule_params->Tdmdl_vm = &s->dummy_single[0];
6403 CalculatePrefetchSchedule_params->Tdmdl = &s->dummy_single[1];
6404 CalculatePrefetchSchedule_params->TSetup = &s->dummy_single[2];
6405 CalculatePrefetchSchedule_params->VUpdateOffsetPix = &s->dummy_integer[0];
6406 CalculatePrefetchSchedule_params->VUpdateWidthPix = &s->dummy_integer[1];
6407 CalculatePrefetchSchedule_params->VReadyOffsetPix = &s->dummy_integer[2];
6408
6409 set_calculate_prefetch_schedule_params(mode_lib, CalculatePrefetchSchedule_params, j, k);
6410
6411 mode_lib->ms.support.NoTimeForPrefetch[j][k] =
6412 CalculatePrefetchSchedule(&mode_lib->scratch,
6413 CalculatePrefetchSchedule_params);
6414 }
6415
6416 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
6417 CalculateUrgentBurstFactor(
6418 mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange[k],
6419 mode_lib->ms.swath_width_luma_ub_this_state[k],
6420 mode_lib->ms.swath_width_chroma_ub_this_state[k],
6421 mode_lib->ms.SwathHeightYThisState[k],
6422 mode_lib->ms.SwathHeightCThisState[k],
6423 mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k],
6424 mode_lib->ms.UrgLatency,
6425 mode_lib->ms.ip.cursor_buffer_size,
6426 mode_lib->ms.cache_display_cfg.plane.CursorWidth[k],
6427 mode_lib->ms.cache_display_cfg.plane.CursorBPP[k],
6428 mode_lib->ms.VRatioPreY[j][k],
6429 mode_lib->ms.VRatioPreC[j][k],
6430 mode_lib->ms.BytePerPixelInDETY[k],
6431 mode_lib->ms.BytePerPixelInDETC[k],
6432 mode_lib->ms.DETBufferSizeYThisState[k],
6433 mode_lib->ms.DETBufferSizeCThisState[k],
6434 /* Output */
6435 &mode_lib->ms.UrgentBurstFactorCursorPre[k],
6436 &mode_lib->ms.UrgentBurstFactorLumaPre[k],
6437 &mode_lib->ms.UrgentBurstFactorChromaPre[k],
6438 &mode_lib->ms.NotUrgentLatencyHidingPre[k]);
6439
6440 mode_lib->ms.cursor_bw_pre[k] = mode_lib->ms.cache_display_cfg.plane.NumberOfCursors[k] * mode_lib->ms.cache_display_cfg.plane.CursorWidth[k] *
6441 mode_lib->ms.cache_display_cfg.plane.CursorBPP[k] / 8.0 / (mode_lib->ms.cache_display_cfg.timing.HTotal[k] /
6442 mode_lib->ms.cache_display_cfg.timing.PixelClock[k]) * mode_lib->ms.VRatioPreY[j][k];
6443 }
6444
6445 {
6446 CalculatePrefetchBandwithSupport(
6447 mode_lib->ms.num_active_planes,
6448 mode_lib->ms.ReturnBWPerState[j],
6449 mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange,
6450 mode_lib->ms.NotUrgentLatencyHidingPre,
6451 mode_lib->ms.ReadBandwidthLuma,
6452 mode_lib->ms.ReadBandwidthChroma,
6453 mode_lib->ms.RequiredPrefetchPixelDataBWLuma,
6454 mode_lib->ms.RequiredPrefetchPixelDataBWChroma,
6455 mode_lib->ms.cursor_bw,
6456 mode_lib->ms.meta_row_bandwidth_this_state,
6457 mode_lib->ms.dpte_row_bandwidth_this_state,
6458 mode_lib->ms.cursor_bw_pre,
6459 mode_lib->ms.prefetch_vmrow_bw,
6460 mode_lib->ms.NoOfDPPThisState,
6461 mode_lib->ms.UrgentBurstFactorLuma[j],
6462 mode_lib->ms.UrgentBurstFactorChroma[j],
6463 mode_lib->ms.UrgentBurstFactorCursor[j],
6464 mode_lib->ms.UrgentBurstFactorLumaPre,
6465 mode_lib->ms.UrgentBurstFactorChromaPre,
6466 mode_lib->ms.UrgentBurstFactorCursorPre,
6467
6468 /* output */
6469 &s->dummy_single[0], // dml_float_t *PrefetchBandwidth
6470 &s->dummy_single[1], // dml_float_t *PrefetchBandwidthNotIncludingMALLPrefetch
6471 &mode_lib->mp.FractionOfUrgentBandwidth, // dml_float_t *FractionOfUrgentBandwidth
6472 &mode_lib->ms.support.PrefetchSupported[j]);
6473 }
6474
6475 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
6476 if (mode_lib->ms.LineTimesForPrefetch[k] < 2.0
6477 || mode_lib->ms.LinesForMetaPTE[k] >= 32.0
6478 || mode_lib->ms.LinesForMetaAndDPTERow[k] >= 16.0
6479 || mode_lib->ms.support.NoTimeForPrefetch[j][k] == true) {
6480 mode_lib->ms.support.PrefetchSupported[j] = false;
6481 }
6482 }
6483
6484 mode_lib->ms.support.DynamicMetadataSupported[j] = true;
6485 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
6486 if (mode_lib->ms.support.NoTimeForDynamicMetadata[j][k] == true) {
6487 mode_lib->ms.support.DynamicMetadataSupported[j] = false;
6488 }
6489 }
6490
6491 mode_lib->ms.support.VRatioInPrefetchSupported[j] = true;
6492 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
6493 if (mode_lib->ms.support.NoTimeForPrefetch[j][k] == true ||
6494 mode_lib->ms.VRatioPreY[j][k] > __DML_MAX_VRATIO_PRE_ENHANCE_PREFETCH_ACC__ ||
6495 mode_lib->ms.VRatioPreC[j][k] > __DML_MAX_VRATIO_PRE_ENHANCE_PREFETCH_ACC__ ||
6496 ((s->MaxVStartup < s->MaximumVStartup[j][k] || mode_lib->ms.policy.EnhancedPrefetchScheduleAccelerationFinal == 0) &&
6497 (mode_lib->ms.VRatioPreY[j][k] > __DML_MAX_VRATIO_PRE__ || mode_lib->ms.VRatioPreC[j][k] > __DML_MAX_VRATIO_PRE__))) {
6498 mode_lib->ms.support.VRatioInPrefetchSupported[j] = false;
6499 }
6500 }
6501
6502 s->AnyLinesForVMOrRowTooLarge = false;
6503 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
6504 if (mode_lib->ms.LinesForMetaAndDPTERow[k] >= 16 || mode_lib->ms.LinesForMetaPTE[k] >= 32) {
6505 s->AnyLinesForVMOrRowTooLarge = true;
6506 }
6507 }
6508
6509 if (mode_lib->ms.support.PrefetchSupported[j] == true && mode_lib->ms.support.VRatioInPrefetchSupported[j] == true) {
6510 mode_lib->ms.BandwidthAvailableForImmediateFlip = CalculateBandwidthAvailableForImmediateFlip(
6511 mode_lib->ms.num_active_planes,
6512 mode_lib->ms.ReturnBWPerState[j],
6513 mode_lib->ms.ReadBandwidthLuma,
6514 mode_lib->ms.ReadBandwidthChroma,
6515 mode_lib->ms.RequiredPrefetchPixelDataBWLuma,
6516 mode_lib->ms.RequiredPrefetchPixelDataBWChroma,
6517 mode_lib->ms.cursor_bw,
6518 mode_lib->ms.cursor_bw_pre,
6519 mode_lib->ms.NoOfDPPThisState,
6520 mode_lib->ms.UrgentBurstFactorLuma[j],
6521 mode_lib->ms.UrgentBurstFactorChroma[j],
6522 mode_lib->ms.UrgentBurstFactorCursor[j],
6523 mode_lib->ms.UrgentBurstFactorLumaPre,
6524 mode_lib->ms.UrgentBurstFactorChromaPre,
6525 mode_lib->ms.UrgentBurstFactorCursorPre);
6526
6527 mode_lib->ms.TotImmediateFlipBytes = 0;
6528 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
6529 if (!(mode_lib->ms.policy.ImmediateFlipRequirement[k] == dml_immediate_flip_not_required)) {
6530 mode_lib->ms.TotImmediateFlipBytes = mode_lib->ms.TotImmediateFlipBytes + mode_lib->ms.NoOfDPP[j][k] * mode_lib->ms.PDEAndMetaPTEBytesPerFrame[j][k] + mode_lib->ms.MetaRowBytes[j][k];
6531 if (mode_lib->ms.use_one_row_for_frame_flip[j][k]) {
6532 mode_lib->ms.TotImmediateFlipBytes = mode_lib->ms.TotImmediateFlipBytes + mode_lib->ms.NoOfDPP[j][k] * (2 * mode_lib->ms.DPTEBytesPerRow[j][k]);
6533 } else {
6534 mode_lib->ms.TotImmediateFlipBytes = mode_lib->ms.TotImmediateFlipBytes + mode_lib->ms.NoOfDPP[j][k] * mode_lib->ms.DPTEBytesPerRow[j][k];
6535 }
6536 }
6537 }
6538
6539 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
6540 CalculateFlipSchedule(
6541 s->HostVMInefficiencyFactor,
6542 mode_lib->ms.ExtraLatency,
6543 mode_lib->ms.UrgLatency,
6544 mode_lib->ms.cache_display_cfg.plane.GPUVMMaxPageTableLevels,
6545 mode_lib->ms.cache_display_cfg.plane.HostVMEnable,
6546 mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels,
6547 mode_lib->ms.cache_display_cfg.plane.GPUVMEnable,
6548 mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024,
6549 mode_lib->ms.PDEAndMetaPTEBytesPerFrame[j][k],
6550 mode_lib->ms.MetaRowBytes[j][k],
6551 mode_lib->ms.DPTEBytesPerRow[j][k],
6552 mode_lib->ms.BandwidthAvailableForImmediateFlip,
6553 mode_lib->ms.TotImmediateFlipBytes,
6554 mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k],
6555 (mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k]),
6556 mode_lib->ms.cache_display_cfg.plane.VRatio[k],
6557 mode_lib->ms.cache_display_cfg.plane.VRatioChroma[k],
6558 mode_lib->ms.Tno_bw[k],
6559 mode_lib->ms.cache_display_cfg.surface.DCCEnable[k],
6560 mode_lib->ms.dpte_row_height[k],
6561 mode_lib->ms.meta_row_height[k],
6562 mode_lib->ms.dpte_row_height_chroma[k],
6563 mode_lib->ms.meta_row_height_chroma[k],
6564 mode_lib->ms.use_one_row_for_frame_flip[j][k], // 24
6565
6566 /* Output */
6567 &mode_lib->ms.DestinationLinesToRequestVMInImmediateFlip[k],
6568 &mode_lib->ms.DestinationLinesToRequestRowInImmediateFlip[k],
6569 &mode_lib->ms.final_flip_bw[k],
6570 &mode_lib->ms.ImmediateFlipSupportedForPipe[k]);
6571 }
6572
6573 {
6574 CalculateImmediateFlipBandwithSupport(mode_lib->ms.num_active_planes,
6575 mode_lib->ms.ReturnBWPerState[j],
6576 mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange,
6577 mode_lib->ms.policy.ImmediateFlipRequirement,
6578 mode_lib->ms.final_flip_bw,
6579 mode_lib->ms.ReadBandwidthLuma,
6580 mode_lib->ms.ReadBandwidthChroma,
6581 mode_lib->ms.RequiredPrefetchPixelDataBWLuma,
6582 mode_lib->ms.RequiredPrefetchPixelDataBWChroma,
6583 mode_lib->ms.cursor_bw,
6584 mode_lib->ms.meta_row_bandwidth_this_state,
6585 mode_lib->ms.dpte_row_bandwidth_this_state,
6586 mode_lib->ms.cursor_bw_pre,
6587 mode_lib->ms.prefetch_vmrow_bw,
6588 mode_lib->ms.NoOfDPP[j], // VBA_ERROR DPPPerSurface is not assigned at this point, should use NoOfDpp here
6589 mode_lib->ms.UrgentBurstFactorLuma[j],
6590 mode_lib->ms.UrgentBurstFactorChroma[j],
6591 mode_lib->ms.UrgentBurstFactorCursor[j],
6592 mode_lib->ms.UrgentBurstFactorLumaPre,
6593 mode_lib->ms.UrgentBurstFactorChromaPre,
6594 mode_lib->ms.UrgentBurstFactorCursorPre,
6595
6596 /* output */
6597 &s->dummy_single[0], // dml_float_t *TotalBandwidth
6598 &s->dummy_single[1], // dml_float_t *TotalBandwidthNotIncludingMALLPrefetch
6599 &s->dummy_single[2], // dml_float_t *FractionOfUrgentBandwidth
6600 &mode_lib->ms.support.ImmediateFlipSupportedForState[j]); // dml_bool_t *ImmediateFlipBandwidthSupport
6601 }
6602
6603 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
6604 if (!(mode_lib->ms.policy.ImmediateFlipRequirement[k] == dml_immediate_flip_not_required) && (mode_lib->ms.ImmediateFlipSupportedForPipe[k] == false))
6605 mode_lib->ms.support.ImmediateFlipSupportedForState[j] = false;
6606 }
6607
6608 } else { // if prefetch not support, assume iflip not supported
6609 mode_lib->ms.support.ImmediateFlipSupportedForState[j] = false;
6610 }
6611
6612 if (s->MaxVStartup <= __DML_VBA_MIN_VSTARTUP__ || s->AnyLinesForVMOrRowTooLarge == false) {
6613 s->NextMaxVStartup = s->MaxVStartupAllPlanes[j];
6614 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
6615 s->NextPrefetchMode[k] = s->NextPrefetchMode[k] + 1;
6616
6617 if (s->NextPrefetchMode[k] <= s->MaxPrefetchMode[k])
6618 s->AllPrefetchModeTested = false;
6619 }
6620 } else {
6621 s->NextMaxVStartup = s->NextMaxVStartup - 1;
6622 }
6623 } while (!((mode_lib->ms.support.PrefetchSupported[j] == true && mode_lib->ms.support.DynamicMetadataSupported[j] == true &&
6624 mode_lib->ms.support.VRatioInPrefetchSupported[j] == true &&
6625 // consider flip support is okay if when there is no hostvm and the user does't require a iflip OR the flip bw is ok
6626 // If there is hostvm, DCN needs to support iflip for invalidation
6627 ((s->ImmediateFlipRequiredFinal) || mode_lib->ms.support.ImmediateFlipSupportedForState[j] == true)) ||
6628 (s->NextMaxVStartup == s->MaxVStartupAllPlanes[j] && s->AllPrefetchModeTested)));
6629
6630 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
6631 mode_lib->ms.use_one_row_for_frame_this_state[k] = mode_lib->ms.use_one_row_for_frame[j][k];
6632 }
6633
6634 s->mSOCParameters.UrgentLatency = mode_lib->ms.UrgLatency;
6635 s->mSOCParameters.ExtraLatency = mode_lib->ms.ExtraLatency;
6636 s->mSOCParameters.WritebackLatency = mode_lib->ms.state.writeback_latency_us;
6637 s->mSOCParameters.DRAMClockChangeLatency = mode_lib->ms.state.dram_clock_change_latency_us;
6638 s->mSOCParameters.FCLKChangeLatency = mode_lib->ms.state.fclk_change_latency_us;
6639 s->mSOCParameters.SRExitTime = mode_lib->ms.state.sr_exit_time_us;
6640 s->mSOCParameters.SREnterPlusExitTime = mode_lib->ms.state.sr_enter_plus_exit_time_us;
6641 s->mSOCParameters.SRExitZ8Time = mode_lib->ms.state.sr_exit_z8_time_us;
6642 s->mSOCParameters.SREnterPlusExitZ8Time = mode_lib->ms.state.sr_enter_plus_exit_z8_time_us;
6643 s->mSOCParameters.USRRetrainingLatency = mode_lib->ms.state.usr_retraining_latency_us;
6644 s->mSOCParameters.SMNLatency = mode_lib->ms.soc.smn_latency_us;
6645
6646 CalculateWatermarks_params->USRRetrainingRequiredFinal = mode_lib->ms.policy.USRRetrainingRequiredFinal;
6647 CalculateWatermarks_params->UseMALLForPStateChange = mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange;
6648 CalculateWatermarks_params->PrefetchMode = mode_lib->ms.PrefetchMode;
6649 CalculateWatermarks_params->NumberOfActiveSurfaces = mode_lib->ms.num_active_planes;
6650 CalculateWatermarks_params->MaxLineBufferLines = mode_lib->ms.ip.max_line_buffer_lines;
6651 CalculateWatermarks_params->LineBufferSize = mode_lib->ms.ip.line_buffer_size_bits;
6652 CalculateWatermarks_params->WritebackInterfaceBufferSize = mode_lib->ms.ip.writeback_interface_buffer_size_kbytes;
6653 CalculateWatermarks_params->DCFCLK = mode_lib->ms.DCFCLKState[j];
6654 CalculateWatermarks_params->ReturnBW = mode_lib->ms.ReturnBWPerState[j];
6655 CalculateWatermarks_params->SynchronizeTimingsFinal = mode_lib->ms.policy.SynchronizeTimingsFinal;
6656 CalculateWatermarks_params->SynchronizeDRRDisplaysForUCLKPStateChangeFinal = mode_lib->ms.policy.SynchronizeDRRDisplaysForUCLKPStateChangeFinal;
6657 CalculateWatermarks_params->DRRDisplay = mode_lib->ms.cache_display_cfg.timing.DRRDisplay;
6658 CalculateWatermarks_params->dpte_group_bytes = mode_lib->ms.dpte_group_bytes;
6659 CalculateWatermarks_params->meta_row_height = mode_lib->ms.meta_row_height;
6660 CalculateWatermarks_params->meta_row_height_chroma = mode_lib->ms.meta_row_height_chroma;
6661 CalculateWatermarks_params->mmSOCParameters = s->mSOCParameters;
6662 CalculateWatermarks_params->WritebackChunkSize = mode_lib->ms.ip.writeback_chunk_size_kbytes;
6663 CalculateWatermarks_params->SOCCLK = mode_lib->ms.state.socclk_mhz;
6664 CalculateWatermarks_params->DCFClkDeepSleep = mode_lib->ms.ProjectedDCFCLKDeepSleep[j];
6665 CalculateWatermarks_params->DETBufferSizeY = mode_lib->ms.DETBufferSizeYThisState;
6666 CalculateWatermarks_params->DETBufferSizeC = mode_lib->ms.DETBufferSizeCThisState;
6667 CalculateWatermarks_params->SwathHeightY = mode_lib->ms.SwathHeightYThisState;
6668 CalculateWatermarks_params->SwathHeightC = mode_lib->ms.SwathHeightCThisState;
6669 CalculateWatermarks_params->LBBitPerPixel = mode_lib->ms.cache_display_cfg.plane.LBBitPerPixel;
6670 CalculateWatermarks_params->SwathWidthY = mode_lib->ms.SwathWidthYThisState;
6671 CalculateWatermarks_params->SwathWidthC = mode_lib->ms.SwathWidthCThisState;
6672 CalculateWatermarks_params->HRatio = mode_lib->ms.cache_display_cfg.plane.HRatio;
6673 CalculateWatermarks_params->HRatioChroma = mode_lib->ms.cache_display_cfg.plane.HRatioChroma;
6674 CalculateWatermarks_params->VTaps = mode_lib->ms.cache_display_cfg.plane.VTaps;
6675 CalculateWatermarks_params->VTapsChroma = mode_lib->ms.cache_display_cfg.plane.VTapsChroma;
6676 CalculateWatermarks_params->VRatio = mode_lib->ms.cache_display_cfg.plane.VRatio;
6677 CalculateWatermarks_params->VRatioChroma = mode_lib->ms.cache_display_cfg.plane.VRatioChroma;
6678 CalculateWatermarks_params->HTotal = mode_lib->ms.cache_display_cfg.timing.HTotal;
6679 CalculateWatermarks_params->VTotal = mode_lib->ms.cache_display_cfg.timing.VTotal;
6680 CalculateWatermarks_params->VActive = mode_lib->ms.cache_display_cfg.timing.VActive;
6681 CalculateWatermarks_params->PixelClock = mode_lib->ms.cache_display_cfg.timing.PixelClock;
6682 CalculateWatermarks_params->BlendingAndTiming = mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming;
6683 CalculateWatermarks_params->DPPPerSurface = mode_lib->ms.NoOfDPPThisState;
6684 CalculateWatermarks_params->BytePerPixelDETY = mode_lib->ms.BytePerPixelInDETY;
6685 CalculateWatermarks_params->BytePerPixelDETC = mode_lib->ms.BytePerPixelInDETC;
6686 CalculateWatermarks_params->DSTXAfterScaler = s->DSTXAfterScaler;
6687 CalculateWatermarks_params->DSTYAfterScaler = s->DSTYAfterScaler;
6688 CalculateWatermarks_params->WritebackEnable = mode_lib->ms.cache_display_cfg.writeback.WritebackEnable;
6689 CalculateWatermarks_params->WritebackPixelFormat = mode_lib->ms.cache_display_cfg.writeback.WritebackPixelFormat;
6690 CalculateWatermarks_params->WritebackDestinationWidth = mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationWidth;
6691 CalculateWatermarks_params->WritebackDestinationHeight = mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationHeight;
6692 CalculateWatermarks_params->WritebackSourceHeight = mode_lib->ms.cache_display_cfg.writeback.WritebackSourceHeight;
6693 CalculateWatermarks_params->UnboundedRequestEnabled = mode_lib->ms.UnboundedRequestEnabledThisState;
6694 CalculateWatermarks_params->CompressedBufferSizeInkByte = mode_lib->ms.CompressedBufferSizeInkByteThisState;
6695
6696 // Output
6697 CalculateWatermarks_params->Watermark = &s->dummy_watermark; // Watermarks *Watermark
6698 CalculateWatermarks_params->DRAMClockChangeSupport = &mode_lib->ms.support.DRAMClockChangeSupport[j];
6699 CalculateWatermarks_params->MaxActiveDRAMClockChangeLatencySupported = &s->dummy_single_array[0]; // dml_float_t *MaxActiveDRAMClockChangeLatencySupported[]
6700 CalculateWatermarks_params->SubViewportLinesNeededInMALL = &mode_lib->ms.SubViewportLinesNeededInMALL[j]; // dml_uint_t SubViewportLinesNeededInMALL[]
6701 CalculateWatermarks_params->FCLKChangeSupport = &mode_lib->ms.support.FCLKChangeSupport[j];
6702 CalculateWatermarks_params->MaxActiveFCLKChangeLatencySupported = &s->dummy_single[0]; // dml_float_t *MaxActiveFCLKChangeLatencySupported
6703 CalculateWatermarks_params->USRRetrainingSupport = &mode_lib->ms.support.USRRetrainingSupport[j];
6704 CalculateWatermarks_params->ActiveDRAMClockChangeLatencyMargin = mode_lib->ms.support.ActiveDRAMClockChangeLatencyMargin;
6705
6706 CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(&mode_lib->scratch,
6707 CalculateWatermarks_params);
6708
6709 } // for j
6710 }
6711
6712 /// @brief The Mode Support function.
dml_core_mode_support(struct display_mode_lib_st * mode_lib)6713 dml_bool_t dml_core_mode_support(struct display_mode_lib_st *mode_lib)
6714 {
6715 struct dml_core_mode_support_locals_st *s = &mode_lib->scratch.dml_core_mode_support_locals;
6716 struct UseMinimumDCFCLK_params_st *UseMinimumDCFCLK_params = &mode_lib->scratch.UseMinimumDCFCLK_params;
6717 struct CalculateSwathAndDETConfiguration_params_st *CalculateSwathAndDETConfiguration_params = &mode_lib->scratch.CalculateSwathAndDETConfiguration_params;
6718 struct CalculateVMRowAndSwath_params_st *CalculateVMRowAndSwath_params = &mode_lib->scratch.CalculateVMRowAndSwath_params;
6719
6720 dml_uint_t j, k, m;
6721
6722 mode_lib->ms.num_active_planes = dml_get_num_active_planes(&mode_lib->ms.cache_display_cfg);
6723 dml_print("DML::%s: num_active_planes = %u\n", __func__, mode_lib->ms.num_active_planes);
6724
6725 CalculateMaxDETAndMinCompressedBufferSize(
6726 mode_lib->ms.ip.config_return_buffer_size_in_kbytes,
6727 mode_lib->ms.ip.config_return_buffer_segment_size_in_kbytes,
6728 mode_lib->ms.ip.rob_buffer_size_kbytes,
6729 mode_lib->ms.ip.max_num_dpp,
6730 mode_lib->ms.policy.NomDETInKByteOverrideEnable, // VBA_DELTA
6731 mode_lib->ms.policy.NomDETInKByteOverrideValue, // VBA_DELTA
6732
6733 /* Output */
6734 &mode_lib->ms.MaxTotalDETInKByte,
6735 &mode_lib->ms.NomDETInKByte,
6736 &mode_lib->ms.MinCompressedBufferSizeInKByte);
6737
6738 PixelClockAdjustmentForProgressiveToInterlaceUnit(&mode_lib->ms.cache_display_cfg, mode_lib->ms.ip.ptoi_supported);
6739
6740
6741 /*MODE SUPPORT, VOLTAGE STATE AND SOC CONFIGURATION*/
6742
6743 /*Scale Ratio, taps Support Check*/
6744 mode_lib->ms.support.ScaleRatioAndTapsSupport = true;
6745 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
6746 if (mode_lib->ms.cache_display_cfg.plane.ScalerEnabled[k] == false
6747 && ((mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_444_64
6748 && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_444_32
6749 && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_444_16
6750 && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_mono_16
6751 && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_mono_8
6752 && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_rgbe
6753 && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_rgbe_alpha)
6754 || mode_lib->ms.cache_display_cfg.plane.HRatio[k] != 1.0
6755 || mode_lib->ms.cache_display_cfg.plane.HTaps[k] != 1.0
6756 || mode_lib->ms.cache_display_cfg.plane.VRatio[k] != 1.0
6757 || mode_lib->ms.cache_display_cfg.plane.VTaps[k] != 1.0)) {
6758 mode_lib->ms.support.ScaleRatioAndTapsSupport = false;
6759 } else if (mode_lib->ms.cache_display_cfg.plane.VTaps[k] < 1.0 || mode_lib->ms.cache_display_cfg.plane.VTaps[k] > 8.0
6760 || mode_lib->ms.cache_display_cfg.plane.HTaps[k] < 1.0 || mode_lib->ms.cache_display_cfg.plane.HTaps[k] > 8.0
6761 || (mode_lib->ms.cache_display_cfg.plane.HTaps[k] > 1.0 && (mode_lib->ms.cache_display_cfg.plane.HTaps[k] % 2) == 1)
6762 || mode_lib->ms.cache_display_cfg.plane.HRatio[k] > mode_lib->ms.ip.max_hscl_ratio
6763 || mode_lib->ms.cache_display_cfg.plane.VRatio[k] > mode_lib->ms.ip.max_vscl_ratio
6764 || mode_lib->ms.cache_display_cfg.plane.HRatio[k] > mode_lib->ms.cache_display_cfg.plane.HTaps[k]
6765 || mode_lib->ms.cache_display_cfg.plane.VRatio[k] > mode_lib->ms.cache_display_cfg.plane.VTaps[k]
6766 || (mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_444_64
6767 && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_444_32
6768 && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_444_16
6769 && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_mono_16
6770 && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_mono_8
6771 && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_rgbe
6772 && (mode_lib->ms.cache_display_cfg.plane.VTapsChroma[k] < 1 || mode_lib->ms.cache_display_cfg.plane.VTapsChroma[k] > 8 || mode_lib->ms.cache_display_cfg.plane.HTapsChroma[k] < 1 || mode_lib->ms.cache_display_cfg.plane.HTapsChroma[k] > 8 ||
6773 (mode_lib->ms.cache_display_cfg.plane.HTapsChroma[k] > 1 && mode_lib->ms.cache_display_cfg.plane.HTapsChroma[k] % 2 == 1) ||
6774 mode_lib->ms.cache_display_cfg.plane.HRatioChroma[k] > mode_lib->ms.ip.max_hscl_ratio ||
6775 mode_lib->ms.cache_display_cfg.plane.VRatioChroma[k] > mode_lib->ms.ip.max_vscl_ratio ||
6776 mode_lib->ms.cache_display_cfg.plane.HRatioChroma[k] > mode_lib->ms.cache_display_cfg.plane.HTapsChroma[k] ||
6777 mode_lib->ms.cache_display_cfg.plane.VRatioChroma[k] > mode_lib->ms.cache_display_cfg.plane.VTapsChroma[k]))) {
6778 mode_lib->ms.support.ScaleRatioAndTapsSupport = false;
6779 }
6780 }
6781
6782 /*Source Format, Pixel Format and Scan Support Check*/
6783 mode_lib->ms.support.SourceFormatPixelAndScanSupport = true;
6784 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
6785 if (mode_lib->ms.cache_display_cfg.surface.SurfaceTiling[k] == dml_sw_linear && (!(!dml_is_vertical_rotation(mode_lib->ms.cache_display_cfg.plane.SourceScan[k])) || mode_lib->ms.cache_display_cfg.surface.DCCEnable[k] == true)) {
6786 mode_lib->ms.support.SourceFormatPixelAndScanSupport = false;
6787 }
6788 }
6789
6790 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
6791 CalculateBytePerPixelAndBlockSizes(
6792 mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k],
6793 mode_lib->ms.cache_display_cfg.surface.SurfaceTiling[k],
6794
6795 /* Output */
6796 &mode_lib->ms.BytePerPixelY[k],
6797 &mode_lib->ms.BytePerPixelC[k],
6798 &mode_lib->ms.BytePerPixelInDETY[k],
6799 &mode_lib->ms.BytePerPixelInDETC[k],
6800 &mode_lib->ms.Read256BlockHeightY[k],
6801 &mode_lib->ms.Read256BlockHeightC[k],
6802 &mode_lib->ms.Read256BlockWidthY[k],
6803 &mode_lib->ms.Read256BlockWidthC[k],
6804 &mode_lib->ms.MacroTileHeightY[k],
6805 &mode_lib->ms.MacroTileHeightC[k],
6806 &mode_lib->ms.MacroTileWidthY[k],
6807 &mode_lib->ms.MacroTileWidthC[k]);
6808 }
6809
6810 /*Bandwidth Support Check*/
6811 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
6812 if (!dml_is_vertical_rotation(mode_lib->ms.cache_display_cfg.plane.SourceScan[k])) {
6813 mode_lib->ms.SwathWidthYSingleDPP[k] = mode_lib->ms.cache_display_cfg.plane.ViewportWidth[k];
6814 mode_lib->ms.SwathWidthCSingleDPP[k] = mode_lib->ms.cache_display_cfg.plane.ViewportWidthChroma[k];
6815 } else {
6816 mode_lib->ms.SwathWidthYSingleDPP[k] = mode_lib->ms.cache_display_cfg.plane.ViewportHeight[k];
6817 mode_lib->ms.SwathWidthCSingleDPP[k] = mode_lib->ms.cache_display_cfg.plane.ViewportHeightChroma[k];
6818 }
6819 }
6820 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
6821 mode_lib->ms.ReadBandwidthLuma[k] = mode_lib->ms.SwathWidthYSingleDPP[k] * dml_ceil(mode_lib->ms.BytePerPixelInDETY[k], 1.0) / (mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k]) * mode_lib->ms.cache_display_cfg.plane.VRatio[k];
6822 mode_lib->ms.ReadBandwidthChroma[k] = mode_lib->ms.SwathWidthYSingleDPP[k] / 2 * dml_ceil(mode_lib->ms.BytePerPixelInDETC[k], 2.0) / (mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k]) * mode_lib->ms.cache_display_cfg.plane.VRatio[k] / 2.0;
6823 }
6824 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
6825 if (mode_lib->ms.cache_display_cfg.writeback.WritebackEnable[k] == true
6826 && mode_lib->ms.cache_display_cfg.writeback.WritebackPixelFormat[k] == dml_444_64) {
6827 mode_lib->ms.WriteBandwidth[k] = mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationWidth[k]
6828 * mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationHeight[k]
6829 / (mode_lib->ms.cache_display_cfg.writeback.WritebackSourceHeight[k]
6830 * mode_lib->ms.cache_display_cfg.timing.HTotal[k]
6831 / mode_lib->ms.cache_display_cfg.timing.PixelClock[k]) * 8.0;
6832 } else if (mode_lib->ms.cache_display_cfg.writeback.WritebackEnable[k] == true) {
6833 mode_lib->ms.WriteBandwidth[k] = mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationWidth[k]
6834 * mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationHeight[k]
6835 / (mode_lib->ms.cache_display_cfg.writeback.WritebackSourceHeight[k]
6836 * mode_lib->ms.cache_display_cfg.timing.HTotal[k]
6837 / mode_lib->ms.cache_display_cfg.timing.PixelClock[k]) * 4.0;
6838 } else {
6839 mode_lib->ms.WriteBandwidth[k] = 0.0;
6840 }
6841 }
6842
6843 /*Writeback Latency support check*/
6844 mode_lib->ms.support.WritebackLatencySupport = true;
6845 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
6846 if (mode_lib->ms.cache_display_cfg.writeback.WritebackEnable[k] == true &&
6847 (mode_lib->ms.WriteBandwidth[k] > mode_lib->ms.ip.writeback_interface_buffer_size_kbytes * 1024 / mode_lib->ms.state.writeback_latency_us)) {
6848 mode_lib->ms.support.WritebackLatencySupport = false;
6849 }
6850 }
6851
6852 /*Writeback Mode Support Check*/
6853 s->TotalNumberOfActiveWriteback = 0;
6854 for (k = 0; k <= (dml_uint_t) mode_lib->ms.num_active_planes - 1; k++) {
6855 if (mode_lib->ms.cache_display_cfg.writeback.WritebackEnable[k] == true) {
6856 s->TotalNumberOfActiveWriteback = s->TotalNumberOfActiveWriteback + 1;
6857 }
6858 }
6859
6860 mode_lib->ms.support.EnoughWritebackUnits = 1;
6861 if (s->TotalNumberOfActiveWriteback > (dml_uint_t) mode_lib->ms.ip.max_num_wb) {
6862 mode_lib->ms.support.EnoughWritebackUnits = false;
6863 }
6864
6865 /*Writeback Scale Ratio and Taps Support Check*/
6866 mode_lib->ms.support.WritebackScaleRatioAndTapsSupport = true;
6867 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
6868 if (mode_lib->ms.cache_display_cfg.writeback.WritebackEnable[k] == true) {
6869 if (mode_lib->ms.cache_display_cfg.writeback.WritebackHRatio[k] > mode_lib->ms.ip.writeback_max_hscl_ratio
6870 || mode_lib->ms.cache_display_cfg.writeback.WritebackVRatio[k] > mode_lib->ms.ip.writeback_max_vscl_ratio
6871 || mode_lib->ms.cache_display_cfg.writeback.WritebackHRatio[k] < mode_lib->ms.ip.writeback_min_hscl_ratio
6872 || mode_lib->ms.cache_display_cfg.writeback.WritebackVRatio[k] < mode_lib->ms.ip.writeback_min_vscl_ratio
6873 || mode_lib->ms.cache_display_cfg.writeback.WritebackHTaps[k] > (dml_uint_t) mode_lib->ms.ip.writeback_max_hscl_taps
6874 || mode_lib->ms.cache_display_cfg.writeback.WritebackVTaps[k] > (dml_uint_t) mode_lib->ms.ip.writeback_max_vscl_taps
6875 || mode_lib->ms.cache_display_cfg.writeback.WritebackHRatio[k] > (dml_uint_t) mode_lib->ms.cache_display_cfg.writeback.WritebackHTaps[k]
6876 || mode_lib->ms.cache_display_cfg.writeback.WritebackVRatio[k] > (dml_uint_t) mode_lib->ms.cache_display_cfg.writeback.WritebackVTaps[k]
6877 || (mode_lib->ms.cache_display_cfg.writeback.WritebackHTaps[k] > 2.0 && ((mode_lib->ms.cache_display_cfg.writeback.WritebackHTaps[k] % 2) == 1))) {
6878 mode_lib->ms.support.WritebackScaleRatioAndTapsSupport = false;
6879 }
6880 if (2.0 * mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationWidth[k] * (mode_lib->ms.cache_display_cfg.writeback.WritebackVTaps[k] - 1) * 57 > mode_lib->ms.ip.writeback_line_buffer_buffer_size) {
6881 mode_lib->ms.support.WritebackScaleRatioAndTapsSupport = false;
6882 }
6883 }
6884 }
6885
6886 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
6887 CalculateSinglePipeDPPCLKAndSCLThroughput(
6888 mode_lib->ms.cache_display_cfg.plane.HRatio[k],
6889 mode_lib->ms.cache_display_cfg.plane.HRatioChroma[k],
6890 mode_lib->ms.cache_display_cfg.plane.VRatio[k],
6891 mode_lib->ms.cache_display_cfg.plane.VRatioChroma[k],
6892 mode_lib->ms.ip.max_dchub_pscl_bw_pix_per_clk,
6893 mode_lib->ms.ip.max_pscl_lb_bw_pix_per_clk,
6894 mode_lib->ms.cache_display_cfg.timing.PixelClock[k],
6895 mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k],
6896 mode_lib->ms.cache_display_cfg.plane.HTaps[k],
6897 mode_lib->ms.cache_display_cfg.plane.HTapsChroma[k],
6898 mode_lib->ms.cache_display_cfg.plane.VTaps[k],
6899 mode_lib->ms.cache_display_cfg.plane.VTapsChroma[k],
6900 /* Output */
6901 &mode_lib->ms.PSCL_FACTOR[k],
6902 &mode_lib->ms.PSCL_FACTOR_CHROMA[k],
6903 &mode_lib->ms.MinDPPCLKUsingSingleDPP[k]);
6904 }
6905
6906 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
6907 if (mode_lib->ms.cache_display_cfg.surface.SurfaceTiling[k] == dml_sw_linear) {
6908 s->MaximumSwathWidthSupportLuma = 8192;
6909 } else if (!dml_is_vertical_rotation(mode_lib->ms.cache_display_cfg.plane.SourceScan[k]) && mode_lib->ms.BytePerPixelC[k] > 0 && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_rgbe_alpha) {
6910 s->MaximumSwathWidthSupportLuma = 7680;
6911 } else if (dml_is_vertical_rotation(mode_lib->ms.cache_display_cfg.plane.SourceScan[k]) && mode_lib->ms.BytePerPixelC[k] > 0 && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_rgbe_alpha) {
6912 s->MaximumSwathWidthSupportLuma = 4320;
6913 } else if (mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] == dml_rgbe_alpha) {
6914 s->MaximumSwathWidthSupportLuma = 3840;
6915 } else if (dml_is_vertical_rotation(mode_lib->ms.cache_display_cfg.plane.SourceScan[k]) && mode_lib->ms.BytePerPixelY[k] == 8 && mode_lib->ms.cache_display_cfg.surface.DCCEnable[k] == true) {
6916 s->MaximumSwathWidthSupportLuma = 3072;
6917 } else {
6918 s->MaximumSwathWidthSupportLuma = 6144;
6919 }
6920
6921 if (mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] == dml_420_8 || mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] == dml_420_10 || mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] == dml_420_12) {
6922 s->MaximumSwathWidthSupportChroma = (dml_uint_t)(s->MaximumSwathWidthSupportLuma / 2.0);
6923 } else {
6924 s->MaximumSwathWidthSupportChroma = s->MaximumSwathWidthSupportLuma;
6925 }
6926 mode_lib->ms.MaximumSwathWidthInLineBufferLuma = mode_lib->ms.ip.line_buffer_size_bits * dml_max(mode_lib->ms.cache_display_cfg.plane.HRatio[k], 1.0) / mode_lib->ms.cache_display_cfg.plane.LBBitPerPixel[k] /
6927 (mode_lib->ms.cache_display_cfg.plane.VTaps[k] + dml_max(dml_ceil(mode_lib->ms.cache_display_cfg.plane.VRatio[k], 1.0) - 2, 0.0));
6928 if (mode_lib->ms.BytePerPixelC[k] == 0.0) {
6929 mode_lib->ms.MaximumSwathWidthInLineBufferChroma = 0;
6930 } else {
6931 mode_lib->ms.MaximumSwathWidthInLineBufferChroma =
6932 mode_lib->ms.ip.line_buffer_size_bits
6933 * dml_max(mode_lib->ms.cache_display_cfg.plane.HRatioChroma[k], 1.0)
6934 / mode_lib->ms.cache_display_cfg.plane.LBBitPerPixel[k]
6935 / (mode_lib->ms.cache_display_cfg.plane.VTapsChroma[k]
6936 + dml_max(dml_ceil(mode_lib->ms.cache_display_cfg.plane.VRatioChroma[k], 1.0) - 2, 0.0));
6937 }
6938 mode_lib->ms.MaximumSwathWidthLuma[k] = dml_min(s->MaximumSwathWidthSupportLuma, mode_lib->ms.MaximumSwathWidthInLineBufferLuma);
6939 mode_lib->ms.MaximumSwathWidthChroma[k] = dml_min(s->MaximumSwathWidthSupportChroma, mode_lib->ms.MaximumSwathWidthInLineBufferChroma);
6940 }
6941
6942 /*Number Of DSC Slices*/
6943 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
6944 if (mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] == k &&
6945 mode_lib->ms.cache_display_cfg.output.DSCEnable[k] != dml_dsc_disable) {
6946 mode_lib->ms.support.NumberOfDSCSlices[k] = mode_lib->ms.cache_display_cfg.output.DSCSlices[k];
6947
6948 if (mode_lib->ms.support.NumberOfDSCSlices[k] == 0) {
6949 if (mode_lib->ms.cache_display_cfg.output.PixelClockBackEnd[k] > 4800) {
6950 mode_lib->ms.support.NumberOfDSCSlices[k] = (dml_uint_t)(dml_ceil(mode_lib->ms.cache_display_cfg.output.PixelClockBackEnd[k] / 600, 4));
6951 } else if (mode_lib->ms.cache_display_cfg.output.PixelClockBackEnd[k] > 2400) {
6952 mode_lib->ms.support.NumberOfDSCSlices[k] = 8;
6953 } else if (mode_lib->ms.cache_display_cfg.output.PixelClockBackEnd[k] > 1200) {
6954 mode_lib->ms.support.NumberOfDSCSlices[k] = 4;
6955 } else if (mode_lib->ms.cache_display_cfg.output.PixelClockBackEnd[k] > 340) {
6956 mode_lib->ms.support.NumberOfDSCSlices[k] = 2;
6957 } else {
6958 mode_lib->ms.support.NumberOfDSCSlices[k] = 1;
6959 }
6960 }
6961 } else {
6962 mode_lib->ms.support.NumberOfDSCSlices[k] = 1;
6963 }
6964 }
6965
6966 CalculateSwathAndDETConfiguration_params->DETSizeOverride = mode_lib->ms.cache_display_cfg.plane.DETSizeOverride;
6967 CalculateSwathAndDETConfiguration_params->UseMALLForPStateChange = mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange;
6968 CalculateSwathAndDETConfiguration_params->ConfigReturnBufferSizeInKByte = mode_lib->ms.ip.config_return_buffer_size_in_kbytes;
6969 CalculateSwathAndDETConfiguration_params->ROBBufferSizeInKByte = mode_lib->ms.ip.rob_buffer_size_kbytes;
6970 CalculateSwathAndDETConfiguration_params->MaxTotalDETInKByte = mode_lib->ms.MaxTotalDETInKByte;
6971 CalculateSwathAndDETConfiguration_params->MinCompressedBufferSizeInKByte = mode_lib->ms.MinCompressedBufferSizeInKByte;
6972 CalculateSwathAndDETConfiguration_params->PixelChunkSizeInKByte = mode_lib->ms.ip.pixel_chunk_size_kbytes;
6973 CalculateSwathAndDETConfiguration_params->ForceSingleDPP = 1;
6974 CalculateSwathAndDETConfiguration_params->NumberOfActiveSurfaces = mode_lib->ms.num_active_planes;
6975 CalculateSwathAndDETConfiguration_params->nomDETInKByte = mode_lib->ms.NomDETInKByte;
6976 CalculateSwathAndDETConfiguration_params->UseUnboundedRequestingFinal = mode_lib->ms.policy.UseUnboundedRequesting;
6977 CalculateSwathAndDETConfiguration_params->ConfigReturnBufferSegmentSizeInkByte = mode_lib->ms.ip.config_return_buffer_segment_size_in_kbytes;
6978 CalculateSwathAndDETConfiguration_params->CompressedBufferSegmentSizeInkByteFinal = mode_lib->ms.ip.compressed_buffer_segment_size_in_kbytes;
6979 CalculateSwathAndDETConfiguration_params->Output = mode_lib->ms.cache_display_cfg.output.OutputEncoder;
6980 CalculateSwathAndDETConfiguration_params->ReadBandwidthLuma = mode_lib->ms.ReadBandwidthLuma;
6981 CalculateSwathAndDETConfiguration_params->ReadBandwidthChroma = mode_lib->ms.ReadBandwidthChroma;
6982 CalculateSwathAndDETConfiguration_params->MaximumSwathWidthLuma = mode_lib->ms.MaximumSwathWidthLuma;
6983 CalculateSwathAndDETConfiguration_params->MaximumSwathWidthChroma = mode_lib->ms.MaximumSwathWidthChroma;
6984 CalculateSwathAndDETConfiguration_params->SourceScan = mode_lib->ms.cache_display_cfg.plane.SourceScan;
6985 CalculateSwathAndDETConfiguration_params->ViewportStationary = mode_lib->ms.cache_display_cfg.plane.ViewportStationary;
6986 CalculateSwathAndDETConfiguration_params->SourcePixelFormat = mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat;
6987 CalculateSwathAndDETConfiguration_params->SurfaceTiling = mode_lib->ms.cache_display_cfg.surface.SurfaceTiling;
6988 CalculateSwathAndDETConfiguration_params->ViewportWidth = mode_lib->ms.cache_display_cfg.plane.ViewportWidth;
6989 CalculateSwathAndDETConfiguration_params->ViewportHeight = mode_lib->ms.cache_display_cfg.plane.ViewportHeight;
6990 CalculateSwathAndDETConfiguration_params->ViewportXStart = mode_lib->ms.cache_display_cfg.plane.ViewportXStart;
6991 CalculateSwathAndDETConfiguration_params->ViewportYStart = mode_lib->ms.cache_display_cfg.plane.ViewportYStart;
6992 CalculateSwathAndDETConfiguration_params->ViewportXStartC = mode_lib->ms.cache_display_cfg.plane.ViewportXStartC;
6993 CalculateSwathAndDETConfiguration_params->ViewportYStartC = mode_lib->ms.cache_display_cfg.plane.ViewportYStartC;
6994 CalculateSwathAndDETConfiguration_params->SurfaceWidthY = mode_lib->ms.cache_display_cfg.surface.SurfaceWidthY;
6995 CalculateSwathAndDETConfiguration_params->SurfaceWidthC = mode_lib->ms.cache_display_cfg.surface.SurfaceWidthC;
6996 CalculateSwathAndDETConfiguration_params->SurfaceHeightY = mode_lib->ms.cache_display_cfg.surface.SurfaceHeightY;
6997 CalculateSwathAndDETConfiguration_params->SurfaceHeightC = mode_lib->ms.cache_display_cfg.surface.SurfaceHeightC;
6998 CalculateSwathAndDETConfiguration_params->Read256BytesBlockHeightY = mode_lib->ms.Read256BlockHeightY;
6999 CalculateSwathAndDETConfiguration_params->Read256BytesBlockHeightC = mode_lib->ms.Read256BlockHeightC;
7000 CalculateSwathAndDETConfiguration_params->Read256BytesBlockWidthY = mode_lib->ms.Read256BlockWidthY;
7001 CalculateSwathAndDETConfiguration_params->Read256BytesBlockWidthC = mode_lib->ms.Read256BlockWidthC;
7002 CalculateSwathAndDETConfiguration_params->ODMMode = s->dummy_odm_mode;
7003 CalculateSwathAndDETConfiguration_params->BlendingAndTiming = mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming;
7004 CalculateSwathAndDETConfiguration_params->BytePerPixY = mode_lib->ms.BytePerPixelY;
7005 CalculateSwathAndDETConfiguration_params->BytePerPixC = mode_lib->ms.BytePerPixelC;
7006 CalculateSwathAndDETConfiguration_params->BytePerPixDETY = mode_lib->ms.BytePerPixelInDETY;
7007 CalculateSwathAndDETConfiguration_params->BytePerPixDETC = mode_lib->ms.BytePerPixelInDETC;
7008 CalculateSwathAndDETConfiguration_params->HActive = mode_lib->ms.cache_display_cfg.timing.HActive;
7009 CalculateSwathAndDETConfiguration_params->HRatio = mode_lib->ms.cache_display_cfg.plane.HRatio;
7010 CalculateSwathAndDETConfiguration_params->HRatioChroma = mode_lib->ms.cache_display_cfg.plane.HRatioChroma;
7011 CalculateSwathAndDETConfiguration_params->DPPPerSurface = s->dummy_integer_array[0];
7012 CalculateSwathAndDETConfiguration_params->swath_width_luma_ub = s->dummy_integer_array[1];
7013 CalculateSwathAndDETConfiguration_params->swath_width_chroma_ub = s->dummy_integer_array[2];
7014 CalculateSwathAndDETConfiguration_params->SwathWidth = s->dummy_integer_array[3];
7015 CalculateSwathAndDETConfiguration_params->SwathWidthChroma = s->dummy_integer_array[4];
7016 CalculateSwathAndDETConfiguration_params->SwathHeightY = s->dummy_integer_array[5];
7017 CalculateSwathAndDETConfiguration_params->SwathHeightC = s->dummy_integer_array[6];
7018 CalculateSwathAndDETConfiguration_params->DETBufferSizeInKByte = s->dummy_integer_array[7];
7019 CalculateSwathAndDETConfiguration_params->DETBufferSizeY = mode_lib->ms.DETBufferSizeY;
7020 CalculateSwathAndDETConfiguration_params->DETBufferSizeC = mode_lib->ms.DETBufferSizeC;
7021 CalculateSwathAndDETConfiguration_params->UnboundedRequestEnabled = &s->dummy_boolean[0];
7022 CalculateSwathAndDETConfiguration_params->compbuf_reserved_space_64b = &s->dummy_integer[2];
7023 CalculateSwathAndDETConfiguration_params->compbuf_reserved_space_zs = &s->dummy_integer[1];
7024 CalculateSwathAndDETConfiguration_params->CompressedBufferSizeInkByte = &s->dummy_integer[0];
7025 CalculateSwathAndDETConfiguration_params->ViewportSizeSupportPerSurface = mode_lib->ms.SingleDPPViewportSizeSupportPerSurface;
7026 CalculateSwathAndDETConfiguration_params->ViewportSizeSupport = &s->dummy_boolean[1];
7027
7028 CalculateSwathAndDETConfiguration(&mode_lib->scratch,
7029 CalculateSwathAndDETConfiguration_params); /* dml_bool_t *ViewportSizeSupport */
7030
7031 s->MPCCombineMethodAsNeededForPStateChangeAndVoltage = false;
7032 s->MPCCombineMethodAsPossible = false;
7033 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
7034 if (mode_lib->ms.policy.MPCCombineUse[k] == dml_mpc_as_needed_for_pstate_and_voltage)
7035 s->MPCCombineMethodAsNeededForPStateChangeAndVoltage = true;
7036 if (mode_lib->ms.policy.MPCCombineUse[k] == dml_mpc_as_possible)
7037 s->MPCCombineMethodAsPossible = true;
7038 }
7039 mode_lib->ms.support.MPCCombineMethodIncompatible = s->MPCCombineMethodAsNeededForPStateChangeAndVoltage && s->MPCCombineMethodAsPossible;
7040
7041 for (j = 0; j < 2; j++) {
7042 mode_lib->ms.TotalNumberOfActiveDPP[j] = 0;
7043 mode_lib->ms.support.TotalAvailablePipesSupport[j] = true;
7044
7045 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
7046 CalculateODMMode(
7047 mode_lib->ms.ip.maximum_pixels_per_line_per_dsc_unit,
7048 mode_lib->ms.cache_display_cfg.timing.HActive[k],
7049 mode_lib->ms.cache_display_cfg.output.OutputEncoder[k],
7050 mode_lib->ms.cache_display_cfg.output.OutputFormat[k],
7051 mode_lib->ms.policy.ODMUse[k],
7052 mode_lib->ms.state.dispclk_mhz,
7053 mode_lib->ms.max_state.dispclk_mhz,
7054 false, // DSCEnable
7055 mode_lib->ms.TotalNumberOfActiveDPP[j],
7056 mode_lib->ms.ip.max_num_dpp,
7057 mode_lib->ms.cache_display_cfg.timing.PixelClock[k],
7058 mode_lib->ms.soc.dcn_downspread_percent,
7059 mode_lib->ms.ip.dispclk_ramp_margin_percent,
7060 mode_lib->ms.soc.dispclk_dppclk_vco_speed_mhz,
7061 mode_lib->ms.support.NumberOfDSCSlices[k],
7062
7063 /* Output */
7064 &s->TotalAvailablePipesSupportNoDSC,
7065 &s->NumberOfDPPNoDSC,
7066 &s->ODMModeNoDSC,
7067 &s->RequiredDISPCLKPerSurfaceNoDSC);
7068
7069 CalculateODMMode(
7070 mode_lib->ms.ip.maximum_pixels_per_line_per_dsc_unit,
7071 mode_lib->ms.cache_display_cfg.timing.HActive[k],
7072 mode_lib->ms.cache_display_cfg.output.OutputEncoder[k],
7073 mode_lib->ms.cache_display_cfg.output.OutputFormat[k],
7074 mode_lib->ms.policy.ODMUse[k],
7075 mode_lib->ms.state.dispclk_mhz,
7076 mode_lib->ms.max_state.dispclk_mhz,
7077 true, // DSCEnable
7078 mode_lib->ms.TotalNumberOfActiveDPP[j],
7079 mode_lib->ms.ip.max_num_dpp,
7080 mode_lib->ms.cache_display_cfg.timing.PixelClock[k],
7081 mode_lib->ms.soc.dcn_downspread_percent,
7082 mode_lib->ms.ip.dispclk_ramp_margin_percent,
7083 mode_lib->ms.soc.dispclk_dppclk_vco_speed_mhz,
7084 mode_lib->ms.support.NumberOfDSCSlices[k],
7085
7086 /* Output */
7087 &s->TotalAvailablePipesSupportDSC,
7088 &s->NumberOfDPPDSC,
7089 &s->ODMModeDSC,
7090 &s->RequiredDISPCLKPerSurfaceDSC);
7091
7092 CalculateOutputLink(
7093 mode_lib->ms.state.phyclk_mhz,
7094 mode_lib->ms.state.phyclk_d18_mhz,
7095 mode_lib->ms.state.phyclk_d32_mhz,
7096 mode_lib->ms.soc.phy_downspread_percent,
7097 (mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] == k),
7098 mode_lib->ms.cache_display_cfg.output.OutputEncoder[k],
7099 mode_lib->ms.cache_display_cfg.output.OutputFormat[k],
7100 mode_lib->ms.cache_display_cfg.timing.HTotal[k],
7101 mode_lib->ms.cache_display_cfg.timing.HActive[k],
7102 mode_lib->ms.cache_display_cfg.output.PixelClockBackEnd[k],
7103 mode_lib->ms.cache_display_cfg.output.ForcedOutputLinkBPP[k],
7104 mode_lib->ms.cache_display_cfg.output.DSCInputBitPerComponent[k],
7105 mode_lib->ms.support.NumberOfDSCSlices[k],
7106 mode_lib->ms.cache_display_cfg.output.AudioSampleRate[k],
7107 mode_lib->ms.cache_display_cfg.output.AudioSampleLayout[k],
7108 s->ODMModeNoDSC,
7109 s->ODMModeDSC,
7110 mode_lib->ms.cache_display_cfg.output.DSCEnable[k],
7111 mode_lib->ms.cache_display_cfg.output.OutputLinkDPLanes[k],
7112 mode_lib->ms.cache_display_cfg.output.OutputLinkDPRate[k],
7113
7114 /* Output */
7115 &mode_lib->ms.RequiresDSC[k],
7116 &mode_lib->ms.RequiresFEC[k],
7117 &mode_lib->ms.OutputBppPerState[k],
7118 &mode_lib->ms.OutputTypePerState[k], // VBA_DELTA, VBA uses a string to represent type and rate, but DML uses enum, don't want to rely on strng
7119 &mode_lib->ms.OutputRatePerState[k],
7120 &mode_lib->ms.RequiredSlots[k]);
7121
7122 if (mode_lib->ms.RequiresDSC[k] == false) {
7123 mode_lib->ms.ODMModePerState[k] = s->ODMModeNoDSC;
7124 mode_lib->ms.RequiredDISPCLKPerSurface[j][k] = s->RequiredDISPCLKPerSurfaceNoDSC;
7125 if (!s->TotalAvailablePipesSupportNoDSC)
7126 mode_lib->ms.support.TotalAvailablePipesSupport[j] = false;
7127 mode_lib->ms.TotalNumberOfActiveDPP[j] = mode_lib->ms.TotalNumberOfActiveDPP[j] + s->NumberOfDPPNoDSC;
7128 } else {
7129 mode_lib->ms.ODMModePerState[k] = s->ODMModeDSC;
7130 mode_lib->ms.RequiredDISPCLKPerSurface[j][k] = s->RequiredDISPCLKPerSurfaceDSC;
7131 if (!s->TotalAvailablePipesSupportDSC)
7132 mode_lib->ms.support.TotalAvailablePipesSupport[j] = false;
7133 mode_lib->ms.TotalNumberOfActiveDPP[j] = mode_lib->ms.TotalNumberOfActiveDPP[j] + s->NumberOfDPPDSC;
7134 }
7135 }
7136
7137 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
7138 if (mode_lib->ms.ODMModePerState[k] == dml_odm_mode_combine_4to1) {
7139 mode_lib->ms.MPCCombine[j][k] = false;
7140 mode_lib->ms.NoOfDPP[j][k] = 4;
7141 } else if (mode_lib->ms.ODMModePerState[k] == dml_odm_mode_combine_2to1) {
7142 mode_lib->ms.MPCCombine[j][k] = false;
7143 mode_lib->ms.NoOfDPP[j][k] = 2;
7144 } else if (mode_lib->ms.policy.MPCCombineUse[k] == dml_mpc_disabled) {
7145 mode_lib->ms.MPCCombine[j][k] = false;
7146 mode_lib->ms.NoOfDPP[j][k] = 1;
7147 } else if (RoundToDFSGranularity(mode_lib->ms.MinDPPCLKUsingSingleDPP[k] * (1 + mode_lib->ms.soc.dcn_downspread_percent / 100),
7148 1, mode_lib->ms.soc.dispclk_dppclk_vco_speed_mhz) <= mode_lib->ms.state.dppclk_mhz &&
7149 mode_lib->ms.SingleDPPViewportSizeSupportPerSurface[k] == true) {
7150 mode_lib->ms.MPCCombine[j][k] = false;
7151 mode_lib->ms.NoOfDPP[j][k] = 1;
7152 } else if (mode_lib->ms.TotalNumberOfActiveDPP[j] < (dml_uint_t) mode_lib->ms.ip.max_num_dpp) {
7153 mode_lib->ms.MPCCombine[j][k] = true;
7154 mode_lib->ms.NoOfDPP[j][k] = 2;
7155 mode_lib->ms.TotalNumberOfActiveDPP[j] = (dml_uint_t) mode_lib->ms.TotalNumberOfActiveDPP[j] + 1;
7156 } else {
7157 mode_lib->ms.MPCCombine[j][k] = false;
7158 mode_lib->ms.NoOfDPP[j][k] = 1;
7159 mode_lib->ms.support.TotalAvailablePipesSupport[j] = false;
7160 }
7161 }
7162
7163 mode_lib->ms.TotalNumberOfSingleDPPSurfaces[j] = 0;
7164 s->NoChromaOrLinear = true;
7165 for (k = 0; k < (dml_uint_t) mode_lib->ms.num_active_planes; ++k) {
7166 if (mode_lib->ms.NoOfDPP[j][k] == 1)
7167 mode_lib->ms.TotalNumberOfSingleDPPSurfaces[j] = mode_lib->ms.TotalNumberOfSingleDPPSurfaces[j] + 1;
7168 if (mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] == dml_420_8
7169 || mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] == dml_420_10
7170 || mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] == dml_420_12
7171 || mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] == dml_rgbe_alpha
7172 || mode_lib->ms.cache_display_cfg.surface.SurfaceTiling[k] == dml_sw_linear) {
7173 s->NoChromaOrLinear = false;
7174 }
7175 }
7176
7177 if (j == 1 && !UnboundedRequest(mode_lib->ms.policy.UseUnboundedRequesting,
7178 mode_lib->ms.TotalNumberOfActiveDPP[j], s->NoChromaOrLinear,
7179 mode_lib->ms.cache_display_cfg.output.OutputEncoder[0])) {
7180 while (!(mode_lib->ms.TotalNumberOfActiveDPP[j] >= (dml_uint_t) mode_lib->ms.ip.max_num_dpp || mode_lib->ms.TotalNumberOfSingleDPPSurfaces[j] == 0)) {
7181 s->BWOfNonCombinedSurfaceOfMaximumBandwidth = 0;
7182 s->NumberOfNonCombinedSurfaceOfMaximumBandwidth = 0;
7183 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
7184 if (mode_lib->ms.policy.MPCCombineUse[k] != dml_mpc_disabled && mode_lib->ms.policy.MPCCombineUse[k] != dml_mpc_as_needed_for_voltage &&
7185 mode_lib->ms.ReadBandwidthLuma[k] + mode_lib->ms.ReadBandwidthChroma[k] > s->BWOfNonCombinedSurfaceOfMaximumBandwidth &&
7186 (mode_lib->ms.ODMModePerState[k] != dml_odm_mode_combine_2to1 && mode_lib->ms.ODMModePerState[k] != dml_odm_mode_combine_4to1) &&
7187 mode_lib->ms.MPCCombine[j][k] == false) {
7188 s->BWOfNonCombinedSurfaceOfMaximumBandwidth = mode_lib->ms.ReadBandwidthLuma[k] + mode_lib->ms.ReadBandwidthChroma[k];
7189 s->NumberOfNonCombinedSurfaceOfMaximumBandwidth = k;
7190 }
7191 }
7192 mode_lib->ms.MPCCombine[j][s->NumberOfNonCombinedSurfaceOfMaximumBandwidth] = true;
7193 mode_lib->ms.NoOfDPP[j][s->NumberOfNonCombinedSurfaceOfMaximumBandwidth] = 2;
7194 mode_lib->ms.TotalNumberOfActiveDPP[j] = mode_lib->ms.TotalNumberOfActiveDPP[j] + 1;
7195 mode_lib->ms.TotalNumberOfSingleDPPSurfaces[j] = mode_lib->ms.TotalNumberOfSingleDPPSurfaces[j] - 1;
7196 }
7197 }
7198
7199 //DISPCLK/DPPCLK
7200 mode_lib->ms.WritebackRequiredDISPCLK = 0;
7201 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
7202 if (mode_lib->ms.cache_display_cfg.writeback.WritebackEnable[k]) {
7203 mode_lib->ms.WritebackRequiredDISPCLK = dml_max(mode_lib->ms.WritebackRequiredDISPCLK,
7204 CalculateWriteBackDISPCLK(mode_lib->ms.cache_display_cfg.writeback.WritebackPixelFormat[k],
7205 mode_lib->ms.cache_display_cfg.timing.PixelClock[k],
7206 mode_lib->ms.cache_display_cfg.writeback.WritebackHRatio[k],
7207 mode_lib->ms.cache_display_cfg.writeback.WritebackVRatio[k],
7208 mode_lib->ms.cache_display_cfg.writeback.WritebackHTaps[k],
7209 mode_lib->ms.cache_display_cfg.writeback.WritebackVTaps[k],
7210 mode_lib->ms.cache_display_cfg.writeback.WritebackSourceWidth[k],
7211 mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationWidth[k],
7212 mode_lib->ms.cache_display_cfg.timing.HTotal[k],
7213 mode_lib->ms.ip.writeback_line_buffer_buffer_size,
7214 mode_lib->ms.soc.dispclk_dppclk_vco_speed_mhz));
7215 }
7216 }
7217
7218 mode_lib->ms.RequiredDISPCLK[j] = mode_lib->ms.WritebackRequiredDISPCLK;
7219 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
7220 mode_lib->ms.RequiredDISPCLK[j] = dml_max(mode_lib->ms.RequiredDISPCLK[j], mode_lib->ms.RequiredDISPCLKPerSurface[j][k]);
7221 }
7222
7223 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
7224 mode_lib->ms.NoOfDPPThisState[k] = mode_lib->ms.NoOfDPP[j][k];
7225 }
7226
7227 CalculateDPPCLK(mode_lib->ms.num_active_planes,
7228 mode_lib->ms.soc.dcn_downspread_percent,
7229 mode_lib->ms.soc.dispclk_dppclk_vco_speed_mhz,
7230 mode_lib->ms.MinDPPCLKUsingSingleDPP,
7231 mode_lib->ms.NoOfDPPThisState,
7232 /* Output */
7233 &mode_lib->ms.GlobalDPPCLK,
7234 mode_lib->ms.RequiredDPPCLKThisState);
7235
7236 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
7237 mode_lib->ms.RequiredDPPCLKPerSurface[j][k] = mode_lib->ms.RequiredDPPCLKThisState[k];
7238 }
7239
7240 mode_lib->ms.support.DISPCLK_DPPCLK_Support[j] = !((mode_lib->ms.RequiredDISPCLK[j] > mode_lib->ms.state.dispclk_mhz) || (mode_lib->ms.GlobalDPPCLK > mode_lib->ms.state.dppclk_mhz));
7241
7242 if (mode_lib->ms.TotalNumberOfActiveDPP[j] > (dml_uint_t) mode_lib->ms.ip.max_num_dpp) {
7243 mode_lib->ms.support.TotalAvailablePipesSupport[j] = false;
7244 }
7245 } // j
7246
7247 /* Total Available OTG, HDMIFRL, DP Support Check */
7248 s->TotalNumberOfActiveOTG = 0;
7249 s->TotalNumberOfActiveHDMIFRL = 0;
7250 s->TotalNumberOfActiveDP2p0 = 0;
7251 s->TotalNumberOfActiveDP2p0Outputs = 0;
7252
7253 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
7254 if (mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] == k) {
7255 s->TotalNumberOfActiveOTG = s->TotalNumberOfActiveOTG + 1;
7256 if (mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_hdmifrl)
7257 s->TotalNumberOfActiveHDMIFRL = s->TotalNumberOfActiveHDMIFRL + 1;
7258 if (mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_dp2p0) {
7259 s->TotalNumberOfActiveDP2p0 = s->TotalNumberOfActiveDP2p0 + 1;
7260 if (mode_lib->ms.cache_display_cfg.output.OutputMultistreamId[k] == k || mode_lib->ms.cache_display_cfg.output.OutputMultistreamEn[k] == false) {
7261 s->TotalNumberOfActiveDP2p0Outputs = s->TotalNumberOfActiveDP2p0Outputs + 1;
7262 }
7263 }
7264 }
7265 }
7266
7267 mode_lib->ms.support.NumberOfOTGSupport = (s->TotalNumberOfActiveOTG <= (dml_uint_t) mode_lib->ms.ip.max_num_otg);
7268 mode_lib->ms.support.NumberOfHDMIFRLSupport = (s->TotalNumberOfActiveHDMIFRL <= (dml_uint_t) mode_lib->ms.ip.max_num_hdmi_frl_outputs);
7269 mode_lib->ms.support.NumberOfDP2p0Support = (s->TotalNumberOfActiveDP2p0 <= (dml_uint_t) mode_lib->ms.ip.max_num_dp2p0_streams && s->TotalNumberOfActiveDP2p0Outputs <= (dml_uint_t) mode_lib->ms.ip.max_num_dp2p0_outputs);
7270
7271 /* Display IO and DSC Support Check */
7272 mode_lib->ms.support.NonsupportedDSCInputBPC = false;
7273 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
7274 if (mode_lib->ms.cache_display_cfg.output.OutputDisabled[k] == false &&
7275 !(mode_lib->ms.cache_display_cfg.output.DSCInputBitPerComponent[k] == 12.0
7276 || mode_lib->ms.cache_display_cfg.output.DSCInputBitPerComponent[k] == 10.0
7277 || mode_lib->ms.cache_display_cfg.output.DSCInputBitPerComponent[k] == 8.0
7278 || mode_lib->ms.cache_display_cfg.output.DSCInputBitPerComponent[k] > (dml_uint_t) mode_lib->ms.ip.maximum_dsc_bits_per_component
7279 )) {
7280 mode_lib->ms.support.NonsupportedDSCInputBPC = true;
7281 }
7282 }
7283
7284 mode_lib->ms.support.ExceededMultistreamSlots = false;
7285 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
7286 if (mode_lib->ms.cache_display_cfg.output.OutputMultistreamId[k] == k) {
7287 s->TotalSlots = mode_lib->ms.RequiredSlots[k];
7288 for (j = 0; j < mode_lib->ms.num_active_planes; ++j) {
7289 if (mode_lib->ms.cache_display_cfg.output.OutputMultistreamId[j] == k)
7290 s->TotalSlots = s->TotalSlots + mode_lib->ms.RequiredSlots[j];
7291 }
7292 if (mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_dp && s->TotalSlots > 63)
7293 mode_lib->ms.support.ExceededMultistreamSlots = true;
7294 if (mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_dp2p0 && s->TotalSlots > 64)
7295 mode_lib->ms.support.ExceededMultistreamSlots = true;
7296 }
7297 }
7298 mode_lib->ms.support.LinkCapacitySupport = true;
7299 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
7300 if (mode_lib->ms.cache_display_cfg.output.OutputDisabled[k] == false &&
7301 mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] == k && (mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_dp || mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_dp2p0 || mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_edp ||
7302 mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_hdmi || mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_hdmifrl) && mode_lib->ms.OutputBppPerState[k] == 0) {
7303 mode_lib->ms.support.LinkCapacitySupport = false;
7304 }
7305 }
7306
7307 mode_lib->ms.support.P2IWith420 = false;
7308 mode_lib->ms.support.DSCOnlyIfNecessaryWithBPP = false;
7309 mode_lib->ms.support.DSC422NativeNotSupported = false;
7310 mode_lib->ms.support.LinkRateDoesNotMatchDPVersion = false;
7311 mode_lib->ms.support.LinkRateForMultistreamNotIndicated = false;
7312 mode_lib->ms.support.BPPForMultistreamNotIndicated = false;
7313 mode_lib->ms.support.MultistreamWithHDMIOreDP = false;
7314 mode_lib->ms.support.MSOOrODMSplitWithNonDPLink = false;
7315 mode_lib->ms.support.NotEnoughLanesForMSO = false;
7316
7317 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
7318 if (mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] == k && (mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_dp || mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_dp2p0 || mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_edp ||
7319 mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_hdmi || mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_hdmifrl)) {
7320 if (mode_lib->ms.cache_display_cfg.output.OutputFormat[k] == dml_420 && mode_lib->ms.cache_display_cfg.timing.Interlace[k] == 1 && mode_lib->ms.ip.ptoi_supported == true)
7321 mode_lib->ms.support.P2IWith420 = true;
7322
7323 if (mode_lib->ms.cache_display_cfg.output.DSCEnable[k] == dml_dsc_enable_if_necessary && mode_lib->ms.cache_display_cfg.output.ForcedOutputLinkBPP[k] != 0)
7324 mode_lib->ms.support.DSCOnlyIfNecessaryWithBPP = true;
7325 if ((mode_lib->ms.cache_display_cfg.output.DSCEnable[k] == dml_dsc_enable || mode_lib->ms.cache_display_cfg.output.DSCEnable[k] == dml_dsc_enable_if_necessary) && mode_lib->ms.cache_display_cfg.output.OutputFormat[k] == dml_n422 && !mode_lib->ms.ip.dsc422_native_support)
7326 mode_lib->ms.support.DSC422NativeNotSupported = true;
7327
7328 if (((mode_lib->ms.cache_display_cfg.output.OutputLinkDPRate[k] == dml_dp_rate_hbr || mode_lib->ms.cache_display_cfg.output.OutputLinkDPRate[k] == dml_dp_rate_hbr2 || mode_lib->ms.cache_display_cfg.output.OutputLinkDPRate[k] == dml_dp_rate_hbr3) &&
7329 mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] != dml_dp && mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] != dml_edp) ||
7330 ((mode_lib->ms.cache_display_cfg.output.OutputLinkDPRate[k] == dml_dp_rate_uhbr10 || mode_lib->ms.cache_display_cfg.output.OutputLinkDPRate[k] == dml_dp_rate_uhbr13p5 || mode_lib->ms.cache_display_cfg.output.OutputLinkDPRate[k] == dml_dp_rate_uhbr20) &&
7331 mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] != dml_dp2p0))
7332 mode_lib->ms.support.LinkRateDoesNotMatchDPVersion = true;
7333
7334 if (mode_lib->ms.cache_display_cfg.output.OutputMultistreamEn[k] == 1) {
7335 if (mode_lib->ms.cache_display_cfg.output.OutputMultistreamId[k] == k && mode_lib->ms.cache_display_cfg.output.OutputLinkDPRate[k] == dml_dp_rate_na)
7336 mode_lib->ms.support.LinkRateForMultistreamNotIndicated = true;
7337 if (mode_lib->ms.cache_display_cfg.output.OutputMultistreamId[k] == k && mode_lib->ms.cache_display_cfg.output.ForcedOutputLinkBPP[k] == 0)
7338 mode_lib->ms.support.BPPForMultistreamNotIndicated = true;
7339 for (j = 0; j < mode_lib->ms.num_active_planes; ++j) {
7340 if (mode_lib->ms.cache_display_cfg.output.OutputMultistreamId[k] == j && mode_lib->ms.cache_display_cfg.output.ForcedOutputLinkBPP[k] == 0)
7341 mode_lib->ms.support.BPPForMultistreamNotIndicated = true;
7342 }
7343 }
7344
7345 if ((mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_edp || mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_hdmi || mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_hdmifrl)) {
7346 if (mode_lib->ms.cache_display_cfg.output.OutputMultistreamEn[k] == 1 && mode_lib->ms.cache_display_cfg.output.OutputMultistreamId[k] == k)
7347 mode_lib->ms.support.MultistreamWithHDMIOreDP = true;
7348 for (j = 0; j < mode_lib->ms.num_active_planes; ++j) {
7349 if (mode_lib->ms.cache_display_cfg.output.OutputMultistreamEn[k] == 1 && mode_lib->ms.cache_display_cfg.output.OutputMultistreamId[k] == j)
7350 mode_lib->ms.support.MultistreamWithHDMIOreDP = true;
7351 }
7352 }
7353 if (mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] != dml_dp && (mode_lib->ms.policy.ODMUse[k] == dml_odm_use_policy_split_1to2 ||
7354 mode_lib->ms.policy.ODMUse[k] == dml_odm_use_policy_mso_1to2 || mode_lib->ms.policy.ODMUse[k] == dml_odm_use_policy_mso_1to4))
7355 mode_lib->ms.support.MSOOrODMSplitWithNonDPLink = true;
7356
7357 if ((mode_lib->ms.policy.ODMUse[k] == dml_odm_use_policy_mso_1to2 && mode_lib->ms.cache_display_cfg.output.OutputLinkDPLanes[k] < 2) ||
7358 (mode_lib->ms.policy.ODMUse[k] == dml_odm_use_policy_mso_1to4 && mode_lib->ms.cache_display_cfg.output.OutputLinkDPLanes[k] < 4))
7359 mode_lib->ms.support.NotEnoughLanesForMSO = true;
7360 }
7361 }
7362
7363 mode_lib->ms.support.DTBCLKRequiredMoreThanSupported = false;
7364 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
7365 if (mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] == k &&
7366 mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_hdmifrl &&
7367 RequiredDTBCLK(
7368 mode_lib->ms.RequiresDSC[k],
7369 mode_lib->ms.cache_display_cfg.output.PixelClockBackEnd[k],
7370 mode_lib->ms.cache_display_cfg.output.OutputFormat[k],
7371 mode_lib->ms.OutputBppPerState[k],
7372 mode_lib->ms.support.NumberOfDSCSlices[k],
7373 mode_lib->ms.cache_display_cfg.timing.HTotal[k],
7374 mode_lib->ms.cache_display_cfg.timing.HActive[k],
7375 mode_lib->ms.cache_display_cfg.output.AudioSampleRate[k],
7376 mode_lib->ms.cache_display_cfg.output.AudioSampleLayout[k]) > mode_lib->ms.state.dtbclk_mhz) {
7377 mode_lib->ms.support.DTBCLKRequiredMoreThanSupported = true;
7378 }
7379 }
7380
7381 mode_lib->ms.support.ODMCombineTwoToOneSupportCheckOK = true;
7382 mode_lib->ms.support.ODMCombineFourToOneSupportCheckOK = true;
7383 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
7384 if (mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] == k && mode_lib->ms.ODMModePerState[k] == dml_odm_mode_combine_2to1 && mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_hdmi) {
7385 mode_lib->ms.support.ODMCombineTwoToOneSupportCheckOK = false;
7386 }
7387 if (mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] == k && mode_lib->ms.ODMModePerState[k] == dml_odm_mode_combine_4to1 && (mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_dp ||
7388 mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_edp || mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_hdmi)) {
7389 mode_lib->ms.support.ODMCombineFourToOneSupportCheckOK = false;
7390 }
7391 }
7392
7393 mode_lib->ms.support.DSCCLKRequiredMoreThanSupported = false;
7394 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
7395 if (mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] == k) {
7396 if (mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_dp ||
7397 mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_dp2p0 ||
7398 mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_edp ||
7399 mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_hdmifrl) {
7400 if (mode_lib->ms.cache_display_cfg.output.OutputFormat[k] == dml_420) {
7401 s->DSCFormatFactor = 2;
7402 } else if (mode_lib->ms.cache_display_cfg.output.OutputFormat[k] == dml_444) {
7403 s->DSCFormatFactor = 1;
7404 } else if (mode_lib->ms.cache_display_cfg.output.OutputFormat[k] == dml_n422 || mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_hdmifrl) {
7405 s->DSCFormatFactor = 2;
7406 } else {
7407 s->DSCFormatFactor = 1;
7408 }
7409 #ifdef __DML_VBA_DEBUG__
7410 dml_print("DML::%s: k=%u, RequiresDSC = %u\n", __func__, k, mode_lib->ms.RequiresDSC[k]);
7411 #endif
7412 if (mode_lib->ms.RequiresDSC[k] == true) {
7413 if (mode_lib->ms.ODMModePerState[k] == dml_odm_mode_combine_4to1) {
7414 if (mode_lib->ms.cache_display_cfg.output.PixelClockBackEnd[k] / 12.0 / (dml_float_t)s->DSCFormatFactor > (1.0 - mode_lib->ms.soc.dcn_downspread_percent / 100.0) * mode_lib->ms.state.dscclk_mhz) {
7415 #ifdef __DML_VBA_DEBUG__
7416 dml_print("DML::%s: k=%u, PixelClockBackEnd = %f\n", __func__, k, mode_lib->ms.cache_display_cfg.output.PixelClockBackEnd[k]);
7417 dml_print("DML::%s: k=%u, DSCCLKPerState = %f\n", __func__, k, mode_lib->ms.state.dscclk_mhz);
7418 dml_print("DML::%s: k=%u, DSCFormatFactor = %u\n", __func__, k, s->DSCFormatFactor);
7419 #endif
7420 mode_lib->ms.support.DSCCLKRequiredMoreThanSupported = true;
7421 }
7422 } else if (mode_lib->ms.ODMModePerState[k] == dml_odm_mode_combine_2to1) {
7423 if (mode_lib->ms.cache_display_cfg.output.PixelClockBackEnd[k] / 6.0 / (dml_float_t)s->DSCFormatFactor > (1.0 - mode_lib->ms.soc.dcn_downspread_percent / 100.0) * mode_lib->ms.state.dscclk_mhz) {
7424 mode_lib->ms.support.DSCCLKRequiredMoreThanSupported = true;
7425 }
7426 } else {
7427 if (mode_lib->ms.cache_display_cfg.output.PixelClockBackEnd[k] / 3.0 / (dml_float_t)s->DSCFormatFactor > (1.0 - mode_lib->ms.soc.dcn_downspread_percent / 100.0) * mode_lib->ms.state.dscclk_mhz) {
7428 mode_lib->ms.support.DSCCLKRequiredMoreThanSupported = true;
7429 }
7430 }
7431 }
7432 }
7433 }
7434 }
7435 #ifdef __DML_VBA_DEBUG__
7436 dml_print("DML::%s: DSCCLKRequiredMoreThanSupported = %u\n", __func__, mode_lib->ms.support.DSCCLKRequiredMoreThanSupported);
7437 #endif
7438
7439 /* Check DSC Unit and Slices Support */
7440 mode_lib->ms.support.NotEnoughDSCUnits = false;
7441 mode_lib->ms.support.NotEnoughDSCSlices = false;
7442 s->TotalDSCUnitsRequired = 0;
7443 mode_lib->ms.support.PixelsPerLinePerDSCUnitSupport = true;
7444 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
7445 if (mode_lib->ms.RequiresDSC[k] == true) {
7446 if (mode_lib->ms.ODMModePerState[k] == dml_odm_mode_combine_4to1) {
7447 if (mode_lib->ms.cache_display_cfg.timing.HActive[k] > 4 * (dml_uint_t) mode_lib->ms.ip.maximum_pixels_per_line_per_dsc_unit)
7448 mode_lib->ms.support.PixelsPerLinePerDSCUnitSupport = false;
7449 s->TotalDSCUnitsRequired = s->TotalDSCUnitsRequired + 4;
7450 if (mode_lib->ms.support.NumberOfDSCSlices[k] > 16)
7451 mode_lib->ms.support.NotEnoughDSCSlices = true;
7452 } else if (mode_lib->ms.ODMModePerState[k] == dml_odm_mode_combine_2to1) {
7453 if (mode_lib->ms.cache_display_cfg.timing.HActive[k] > 2 * (dml_uint_t) mode_lib->ms.ip.maximum_pixels_per_line_per_dsc_unit)
7454 mode_lib->ms.support.PixelsPerLinePerDSCUnitSupport = false;
7455 s->TotalDSCUnitsRequired = s->TotalDSCUnitsRequired + 2;
7456 if (mode_lib->ms.support.NumberOfDSCSlices[k] > 8)
7457 mode_lib->ms.support.NotEnoughDSCSlices = true;
7458 } else {
7459 if (mode_lib->ms.cache_display_cfg.timing.HActive[k] > (dml_uint_t) mode_lib->ms.ip.maximum_pixels_per_line_per_dsc_unit)
7460 mode_lib->ms.support.PixelsPerLinePerDSCUnitSupport = false;
7461 s->TotalDSCUnitsRequired = s->TotalDSCUnitsRequired + 1;
7462 if (mode_lib->ms.support.NumberOfDSCSlices[k] > 4)
7463 mode_lib->ms.support.NotEnoughDSCSlices = true;
7464 }
7465 }
7466 }
7467 if (s->TotalDSCUnitsRequired > (dml_uint_t) mode_lib->ms.ip.num_dsc) {
7468 mode_lib->ms.support.NotEnoughDSCUnits = true;
7469 }
7470
7471 /*DSC Delay per state*/
7472 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
7473 mode_lib->ms.DSCDelayPerState[k] = DSCDelayRequirement(mode_lib->ms.RequiresDSC[k],
7474 mode_lib->ms.ODMModePerState[k],
7475 mode_lib->ms.cache_display_cfg.output.DSCInputBitPerComponent[k],
7476 mode_lib->ms.OutputBppPerState[k],
7477 mode_lib->ms.cache_display_cfg.timing.HActive[k],
7478 mode_lib->ms.cache_display_cfg.timing.HTotal[k],
7479 mode_lib->ms.support.NumberOfDSCSlices[k],
7480 mode_lib->ms.cache_display_cfg.output.OutputFormat[k],
7481 mode_lib->ms.cache_display_cfg.output.OutputEncoder[k],
7482 mode_lib->ms.cache_display_cfg.timing.PixelClock[k],
7483 mode_lib->ms.cache_display_cfg.output.PixelClockBackEnd[k]);
7484 }
7485
7486 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
7487 for (m = 0; m <= mode_lib->ms.num_active_planes - 1; m++) {
7488 for (j = 0; j <= mode_lib->ms.num_active_planes - 1; j++) {
7489 if (mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] == m && mode_lib->ms.RequiresDSC[m] == true) {
7490 mode_lib->ms.DSCDelayPerState[k] = mode_lib->ms.DSCDelayPerState[m];
7491 }
7492 }
7493 }
7494 }
7495
7496 //Calculate Swath, DET Configuration, DCFCLKDeepSleep
7497 //
7498 for (j = 0; j < 2; ++j) {
7499 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
7500 mode_lib->ms.RequiredDPPCLKThisState[k] = mode_lib->ms.RequiredDPPCLKPerSurface[j][k];
7501 mode_lib->ms.NoOfDPPThisState[k] = mode_lib->ms.NoOfDPP[j][k];
7502 mode_lib->ms.ODMModeThisState[k] = mode_lib->ms.ODMModePerState[k];
7503 }
7504
7505 CalculateSwathAndDETConfiguration_params->DETSizeOverride = mode_lib->ms.cache_display_cfg.plane.DETSizeOverride;
7506 CalculateSwathAndDETConfiguration_params->UseMALLForPStateChange = mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange;
7507 CalculateSwathAndDETConfiguration_params->ConfigReturnBufferSizeInKByte = mode_lib->ms.ip.config_return_buffer_size_in_kbytes;
7508 CalculateSwathAndDETConfiguration_params->ROBBufferSizeInKByte = mode_lib->ms.ip.rob_buffer_size_kbytes;
7509 CalculateSwathAndDETConfiguration_params->MaxTotalDETInKByte = mode_lib->ms.MaxTotalDETInKByte;
7510 CalculateSwathAndDETConfiguration_params->MinCompressedBufferSizeInKByte = mode_lib->ms.MinCompressedBufferSizeInKByte;
7511 CalculateSwathAndDETConfiguration_params->PixelChunkSizeInKByte = mode_lib->ms.ip.pixel_chunk_size_kbytes;
7512 CalculateSwathAndDETConfiguration_params->ForceSingleDPP = false;
7513 CalculateSwathAndDETConfiguration_params->NumberOfActiveSurfaces = mode_lib->ms.num_active_planes;
7514 CalculateSwathAndDETConfiguration_params->nomDETInKByte = mode_lib->ms.NomDETInKByte;
7515 CalculateSwathAndDETConfiguration_params->UseUnboundedRequestingFinal = mode_lib->ms.policy.UseUnboundedRequesting;
7516 CalculateSwathAndDETConfiguration_params->ConfigReturnBufferSegmentSizeInkByte = mode_lib->ms.ip.config_return_buffer_segment_size_in_kbytes;
7517 CalculateSwathAndDETConfiguration_params->CompressedBufferSegmentSizeInkByteFinal = mode_lib->ms.ip.compressed_buffer_segment_size_in_kbytes;
7518 CalculateSwathAndDETConfiguration_params->Output = mode_lib->ms.cache_display_cfg.output.OutputEncoder;
7519 CalculateSwathAndDETConfiguration_params->ReadBandwidthLuma = mode_lib->ms.ReadBandwidthLuma;
7520 CalculateSwathAndDETConfiguration_params->ReadBandwidthChroma = mode_lib->ms.ReadBandwidthChroma;
7521 CalculateSwathAndDETConfiguration_params->MaximumSwathWidthLuma = mode_lib->ms.MaximumSwathWidthLuma;
7522 CalculateSwathAndDETConfiguration_params->MaximumSwathWidthChroma = mode_lib->ms.MaximumSwathWidthChroma;
7523 CalculateSwathAndDETConfiguration_params->SourceScan = mode_lib->ms.cache_display_cfg.plane.SourceScan;
7524 CalculateSwathAndDETConfiguration_params->ViewportStationary = mode_lib->ms.cache_display_cfg.plane.ViewportStationary;
7525 CalculateSwathAndDETConfiguration_params->SourcePixelFormat = mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat;
7526 CalculateSwathAndDETConfiguration_params->SurfaceTiling = mode_lib->ms.cache_display_cfg.surface.SurfaceTiling;
7527 CalculateSwathAndDETConfiguration_params->ViewportWidth = mode_lib->ms.cache_display_cfg.plane.ViewportWidth;
7528 CalculateSwathAndDETConfiguration_params->ViewportHeight = mode_lib->ms.cache_display_cfg.plane.ViewportHeight;
7529 CalculateSwathAndDETConfiguration_params->ViewportXStart = mode_lib->ms.cache_display_cfg.plane.ViewportXStart;
7530 CalculateSwathAndDETConfiguration_params->ViewportYStart = mode_lib->ms.cache_display_cfg.plane.ViewportYStart;
7531 CalculateSwathAndDETConfiguration_params->ViewportXStartC = mode_lib->ms.cache_display_cfg.plane.ViewportXStartC;
7532 CalculateSwathAndDETConfiguration_params->ViewportYStartC = mode_lib->ms.cache_display_cfg.plane.ViewportYStartC;
7533 CalculateSwathAndDETConfiguration_params->SurfaceWidthY = mode_lib->ms.cache_display_cfg.surface.SurfaceWidthY;
7534 CalculateSwathAndDETConfiguration_params->SurfaceWidthC = mode_lib->ms.cache_display_cfg.surface.SurfaceWidthC;
7535 CalculateSwathAndDETConfiguration_params->SurfaceHeightY = mode_lib->ms.cache_display_cfg.surface.SurfaceHeightY;
7536 CalculateSwathAndDETConfiguration_params->SurfaceHeightC = mode_lib->ms.cache_display_cfg.surface.SurfaceHeightC;
7537 CalculateSwathAndDETConfiguration_params->Read256BytesBlockHeightY = mode_lib->ms.Read256BlockHeightY;
7538 CalculateSwathAndDETConfiguration_params->Read256BytesBlockHeightC = mode_lib->ms.Read256BlockHeightC;
7539 CalculateSwathAndDETConfiguration_params->Read256BytesBlockWidthY = mode_lib->ms.Read256BlockWidthY;
7540 CalculateSwathAndDETConfiguration_params->Read256BytesBlockWidthC = mode_lib->ms.Read256BlockWidthC;
7541 CalculateSwathAndDETConfiguration_params->ODMMode = mode_lib->ms.ODMModeThisState;
7542 CalculateSwathAndDETConfiguration_params->BlendingAndTiming = mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming;
7543 CalculateSwathAndDETConfiguration_params->BytePerPixY = mode_lib->ms.BytePerPixelY;
7544 CalculateSwathAndDETConfiguration_params->BytePerPixC = mode_lib->ms.BytePerPixelC;
7545 CalculateSwathAndDETConfiguration_params->BytePerPixDETY = mode_lib->ms.BytePerPixelInDETY;
7546 CalculateSwathAndDETConfiguration_params->BytePerPixDETC = mode_lib->ms.BytePerPixelInDETC;
7547 CalculateSwathAndDETConfiguration_params->HActive = mode_lib->ms.cache_display_cfg.timing.HActive;
7548 CalculateSwathAndDETConfiguration_params->HRatio = mode_lib->ms.cache_display_cfg.plane.HRatio;
7549 CalculateSwathAndDETConfiguration_params->HRatioChroma = mode_lib->ms.cache_display_cfg.plane.HRatioChroma;
7550 CalculateSwathAndDETConfiguration_params->DPPPerSurface = mode_lib->ms.NoOfDPPThisState;
7551 CalculateSwathAndDETConfiguration_params->swath_width_luma_ub = mode_lib->ms.swath_width_luma_ub_this_state;
7552 CalculateSwathAndDETConfiguration_params->swath_width_chroma_ub = mode_lib->ms.swath_width_chroma_ub_this_state;
7553 CalculateSwathAndDETConfiguration_params->SwathWidth = mode_lib->ms.SwathWidthYThisState;
7554 CalculateSwathAndDETConfiguration_params->SwathWidthChroma = mode_lib->ms.SwathWidthCThisState;
7555 CalculateSwathAndDETConfiguration_params->SwathHeightY = mode_lib->ms.SwathHeightYThisState;
7556 CalculateSwathAndDETConfiguration_params->SwathHeightC = mode_lib->ms.SwathHeightCThisState;
7557 CalculateSwathAndDETConfiguration_params->DETBufferSizeInKByte = mode_lib->ms.DETBufferSizeInKByteThisState;
7558 CalculateSwathAndDETConfiguration_params->DETBufferSizeY = mode_lib->ms.DETBufferSizeYThisState;
7559 CalculateSwathAndDETConfiguration_params->DETBufferSizeC = mode_lib->ms.DETBufferSizeCThisState;
7560 CalculateSwathAndDETConfiguration_params->UnboundedRequestEnabled = &mode_lib->ms.UnboundedRequestEnabledThisState;
7561 CalculateSwathAndDETConfiguration_params->compbuf_reserved_space_64b = &s->dummy_integer[2];
7562 CalculateSwathAndDETConfiguration_params->compbuf_reserved_space_zs = &s->dummy_integer[1];
7563 CalculateSwathAndDETConfiguration_params->CompressedBufferSizeInkByte = &mode_lib->ms.CompressedBufferSizeInkByteThisState;
7564 CalculateSwathAndDETConfiguration_params->ViewportSizeSupportPerSurface = s->dummy_boolean_array[0];
7565 CalculateSwathAndDETConfiguration_params->ViewportSizeSupport = &mode_lib->ms.support.ViewportSizeSupport[j];
7566
7567 CalculateSwathAndDETConfiguration(&mode_lib->scratch,
7568 CalculateSwathAndDETConfiguration_params);
7569
7570 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
7571 mode_lib->ms.swath_width_luma_ub_all_states[j][k] = mode_lib->ms.swath_width_luma_ub_this_state[k];
7572 mode_lib->ms.swath_width_chroma_ub_all_states[j][k] = mode_lib->ms.swath_width_chroma_ub_this_state[k];
7573 mode_lib->ms.SwathWidthYAllStates[j][k] = mode_lib->ms.SwathWidthYThisState[k];
7574 mode_lib->ms.SwathWidthCAllStates[j][k] = mode_lib->ms.SwathWidthCThisState[k];
7575 mode_lib->ms.SwathHeightYAllStates[j][k] = mode_lib->ms.SwathHeightYThisState[k];
7576 mode_lib->ms.SwathHeightCAllStates[j][k] = mode_lib->ms.SwathHeightCThisState[k];
7577 mode_lib->ms.UnboundedRequestEnabledAllStates[j] = mode_lib->ms.UnboundedRequestEnabledThisState;
7578 mode_lib->ms.CompressedBufferSizeInkByteAllStates[j] = mode_lib->ms.CompressedBufferSizeInkByteThisState;
7579 mode_lib->ms.DETBufferSizeInKByteAllStates[j][k] = mode_lib->ms.DETBufferSizeInKByteThisState[k];
7580 mode_lib->ms.DETBufferSizeYAllStates[j][k] = mode_lib->ms.DETBufferSizeYThisState[k];
7581 mode_lib->ms.DETBufferSizeCAllStates[j][k] = mode_lib->ms.DETBufferSizeCThisState[k];
7582 }
7583 }
7584
7585 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
7586 mode_lib->ms.cursor_bw[k] = mode_lib->ms.cache_display_cfg.plane.NumberOfCursors[k] * mode_lib->ms.cache_display_cfg.plane.CursorWidth[k] * mode_lib->ms.cache_display_cfg.plane.CursorBPP[k] / 8.0 / (mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k]) * mode_lib->ms.cache_display_cfg.plane.VRatio[k];
7587 }
7588
7589 CalculateSurfaceSizeInMall(
7590 mode_lib->ms.num_active_planes,
7591 mode_lib->ms.soc.mall_allocated_for_dcn_mbytes,
7592 mode_lib->ms.cache_display_cfg.plane.UseMALLForStaticScreen,
7593 mode_lib->ms.cache_display_cfg.surface.DCCEnable,
7594 mode_lib->ms.cache_display_cfg.plane.ViewportStationary,
7595 mode_lib->ms.cache_display_cfg.plane.ViewportXStart,
7596 mode_lib->ms.cache_display_cfg.plane.ViewportYStart,
7597 mode_lib->ms.cache_display_cfg.plane.ViewportXStartC,
7598 mode_lib->ms.cache_display_cfg.plane.ViewportYStartC,
7599 mode_lib->ms.cache_display_cfg.plane.ViewportWidth,
7600 mode_lib->ms.cache_display_cfg.plane.ViewportHeight,
7601 mode_lib->ms.BytePerPixelY,
7602 mode_lib->ms.cache_display_cfg.plane.ViewportWidthChroma,
7603 mode_lib->ms.cache_display_cfg.plane.ViewportHeightChroma,
7604 mode_lib->ms.BytePerPixelC,
7605 mode_lib->ms.cache_display_cfg.surface.SurfaceWidthY,
7606 mode_lib->ms.cache_display_cfg.surface.SurfaceWidthC,
7607 mode_lib->ms.cache_display_cfg.surface.SurfaceHeightY,
7608 mode_lib->ms.cache_display_cfg.surface.SurfaceHeightC,
7609 mode_lib->ms.Read256BlockWidthY,
7610 mode_lib->ms.Read256BlockWidthC,
7611 mode_lib->ms.Read256BlockHeightY,
7612 mode_lib->ms.Read256BlockHeightC,
7613 mode_lib->ms.MacroTileWidthY,
7614 mode_lib->ms.MacroTileWidthC,
7615 mode_lib->ms.MacroTileHeightY,
7616 mode_lib->ms.MacroTileHeightC,
7617
7618 /* Output */
7619 mode_lib->ms.SurfaceSizeInMALL,
7620 &mode_lib->ms.support.ExceededMALLSize);
7621
7622 for (j = 0; j < 2; j++) {
7623 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
7624 mode_lib->ms.swath_width_luma_ub_this_state[k] = mode_lib->ms.swath_width_luma_ub_all_states[j][k];
7625 mode_lib->ms.swath_width_chroma_ub_this_state[k] = mode_lib->ms.swath_width_chroma_ub_all_states[j][k];
7626 mode_lib->ms.SwathWidthYThisState[k] = mode_lib->ms.SwathWidthYAllStates[j][k];
7627 mode_lib->ms.SwathWidthCThisState[k] = mode_lib->ms.SwathWidthCAllStates[j][k];
7628 mode_lib->ms.SwathHeightYThisState[k] = mode_lib->ms.SwathHeightYAllStates[j][k];
7629 mode_lib->ms.SwathHeightCThisState[k] = mode_lib->ms.SwathHeightCAllStates[j][k];
7630 mode_lib->ms.DETBufferSizeInKByteThisState[k] = mode_lib->ms.DETBufferSizeInKByteAllStates[j][k];
7631 mode_lib->ms.DETBufferSizeYThisState[k] = mode_lib->ms.DETBufferSizeYAllStates[j][k];
7632 mode_lib->ms.DETBufferSizeCThisState[k] = mode_lib->ms.DETBufferSizeCAllStates[j][k];
7633 mode_lib->ms.RequiredDPPCLKThisState[k] = mode_lib->ms.RequiredDPPCLKPerSurface[j][k];
7634 mode_lib->ms.NoOfDPPThisState[k] = mode_lib->ms.NoOfDPP[j][k];
7635 }
7636
7637 mode_lib->ms.TotalNumberOfDCCActiveDPP[j] = 0;
7638 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
7639 if (mode_lib->ms.cache_display_cfg.surface.DCCEnable[k] == true) {
7640 mode_lib->ms.TotalNumberOfDCCActiveDPP[j] = mode_lib->ms.TotalNumberOfDCCActiveDPP[j] + mode_lib->ms.NoOfDPP[j][k];
7641 }
7642 }
7643
7644 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
7645 s->SurfParameters[k].PixelClock = mode_lib->ms.cache_display_cfg.timing.PixelClock[k];
7646 s->SurfParameters[k].DPPPerSurface = mode_lib->ms.NoOfDPP[j][k];
7647 s->SurfParameters[k].SourceScan = mode_lib->ms.cache_display_cfg.plane.SourceScan[k];
7648 s->SurfParameters[k].ViewportHeight = mode_lib->ms.cache_display_cfg.plane.ViewportHeight[k];
7649 s->SurfParameters[k].ViewportHeightChroma = mode_lib->ms.cache_display_cfg.plane.ViewportHeightChroma[k];
7650 s->SurfParameters[k].BlockWidth256BytesY = mode_lib->ms.Read256BlockWidthY[k];
7651 s->SurfParameters[k].BlockHeight256BytesY = mode_lib->ms.Read256BlockHeightY[k];
7652 s->SurfParameters[k].BlockWidth256BytesC = mode_lib->ms.Read256BlockWidthC[k];
7653 s->SurfParameters[k].BlockHeight256BytesC = mode_lib->ms.Read256BlockHeightC[k];
7654 s->SurfParameters[k].BlockWidthY = mode_lib->ms.MacroTileWidthY[k];
7655 s->SurfParameters[k].BlockHeightY = mode_lib->ms.MacroTileHeightY[k];
7656 s->SurfParameters[k].BlockWidthC = mode_lib->ms.MacroTileWidthC[k];
7657 s->SurfParameters[k].BlockHeightC = mode_lib->ms.MacroTileHeightC[k];
7658 s->SurfParameters[k].InterlaceEnable = mode_lib->ms.cache_display_cfg.timing.Interlace[k];
7659 s->SurfParameters[k].HTotal = mode_lib->ms.cache_display_cfg.timing.HTotal[k];
7660 s->SurfParameters[k].DCCEnable = mode_lib->ms.cache_display_cfg.surface.DCCEnable[k];
7661 s->SurfParameters[k].SourcePixelFormat = mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k];
7662 s->SurfParameters[k].SurfaceTiling = mode_lib->ms.cache_display_cfg.surface.SurfaceTiling[k];
7663 s->SurfParameters[k].BytePerPixelY = mode_lib->ms.BytePerPixelY[k];
7664 s->SurfParameters[k].BytePerPixelC = mode_lib->ms.BytePerPixelC[k];
7665 s->SurfParameters[k].ProgressiveToInterlaceUnitInOPP = mode_lib->ms.ip.ptoi_supported;
7666 s->SurfParameters[k].VRatio = mode_lib->ms.cache_display_cfg.plane.VRatio[k];
7667 s->SurfParameters[k].VRatioChroma = mode_lib->ms.cache_display_cfg.plane.VRatioChroma[k];
7668 s->SurfParameters[k].VTaps = mode_lib->ms.cache_display_cfg.plane.VTaps[k];
7669 s->SurfParameters[k].VTapsChroma = mode_lib->ms.cache_display_cfg.plane.VTapsChroma[k];
7670 s->SurfParameters[k].PitchY = mode_lib->ms.cache_display_cfg.surface.PitchY[k];
7671 s->SurfParameters[k].DCCMetaPitchY = mode_lib->ms.cache_display_cfg.surface.DCCMetaPitchY[k];
7672 s->SurfParameters[k].PitchC = mode_lib->ms.cache_display_cfg.surface.PitchC[k];
7673 s->SurfParameters[k].DCCMetaPitchC = mode_lib->ms.cache_display_cfg.surface.DCCMetaPitchC[k];
7674 s->SurfParameters[k].ViewportStationary = mode_lib->ms.cache_display_cfg.plane.ViewportStationary[k];
7675 s->SurfParameters[k].ViewportXStart = mode_lib->ms.cache_display_cfg.plane.ViewportXStart[k];
7676 s->SurfParameters[k].ViewportYStart = mode_lib->ms.cache_display_cfg.plane.ViewportYStart[k];
7677 s->SurfParameters[k].ViewportXStartC = mode_lib->ms.cache_display_cfg.plane.ViewportXStartC[k];
7678 s->SurfParameters[k].ViewportYStartC = mode_lib->ms.cache_display_cfg.plane.ViewportYStartC[k];
7679 s->SurfParameters[k].FORCE_ONE_ROW_FOR_FRAME = mode_lib->ms.cache_display_cfg.plane.ForceOneRowForFrame[k];
7680 s->SurfParameters[k].SwathHeightY = mode_lib->ms.SwathHeightYThisState[k];
7681 s->SurfParameters[k].SwathHeightC = mode_lib->ms.SwathHeightCThisState[k];
7682 }
7683
7684 CalculateVMRowAndSwath_params->NumberOfActiveSurfaces = mode_lib->ms.num_active_planes;
7685 CalculateVMRowAndSwath_params->myPipe = s->SurfParameters;
7686 CalculateVMRowAndSwath_params->SurfaceSizeInMALL = mode_lib->ms.SurfaceSizeInMALL;
7687 CalculateVMRowAndSwath_params->PTEBufferSizeInRequestsLuma = mode_lib->ms.ip.dpte_buffer_size_in_pte_reqs_luma;
7688 CalculateVMRowAndSwath_params->PTEBufferSizeInRequestsChroma = mode_lib->ms.ip.dpte_buffer_size_in_pte_reqs_chroma;
7689 CalculateVMRowAndSwath_params->DCCMetaBufferSizeBytes = mode_lib->ms.ip.dcc_meta_buffer_size_bytes;
7690 CalculateVMRowAndSwath_params->UseMALLForStaticScreen = mode_lib->ms.cache_display_cfg.plane.UseMALLForStaticScreen;
7691 CalculateVMRowAndSwath_params->UseMALLForPStateChange = mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange;
7692 CalculateVMRowAndSwath_params->MALLAllocatedForDCN = mode_lib->ms.soc.mall_allocated_for_dcn_mbytes;
7693 CalculateVMRowAndSwath_params->SwathWidthY = mode_lib->ms.SwathWidthYThisState;
7694 CalculateVMRowAndSwath_params->SwathWidthC = mode_lib->ms.SwathWidthCThisState;
7695 CalculateVMRowAndSwath_params->GPUVMEnable = mode_lib->ms.cache_display_cfg.plane.GPUVMEnable;
7696 CalculateVMRowAndSwath_params->HostVMEnable = mode_lib->ms.cache_display_cfg.plane.HostVMEnable;
7697 CalculateVMRowAndSwath_params->HostVMMaxNonCachedPageTableLevels = mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels;
7698 CalculateVMRowAndSwath_params->GPUVMMaxPageTableLevels = mode_lib->ms.cache_display_cfg.plane.GPUVMMaxPageTableLevels;
7699 CalculateVMRowAndSwath_params->GPUVMMinPageSizeKBytes = mode_lib->ms.cache_display_cfg.plane.GPUVMMinPageSizeKBytes;
7700 CalculateVMRowAndSwath_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024;
7701 CalculateVMRowAndSwath_params->PTEBufferModeOverrideEn = mode_lib->ms.cache_display_cfg.plane.PTEBufferModeOverrideEn;
7702 CalculateVMRowAndSwath_params->PTEBufferModeOverrideVal = mode_lib->ms.cache_display_cfg.plane.PTEBufferMode;
7703 CalculateVMRowAndSwath_params->PTEBufferSizeNotExceeded = mode_lib->ms.PTEBufferSizeNotExceededPerState;
7704 CalculateVMRowAndSwath_params->DCCMetaBufferSizeNotExceeded = mode_lib->ms.DCCMetaBufferSizeNotExceededPerState;
7705 CalculateVMRowAndSwath_params->dpte_row_width_luma_ub = s->dummy_integer_array[0];
7706 CalculateVMRowAndSwath_params->dpte_row_width_chroma_ub = s->dummy_integer_array[1];
7707 CalculateVMRowAndSwath_params->dpte_row_height_luma = mode_lib->ms.dpte_row_height;
7708 CalculateVMRowAndSwath_params->dpte_row_height_chroma = mode_lib->ms.dpte_row_height_chroma;
7709 CalculateVMRowAndSwath_params->dpte_row_height_linear_luma = s->dummy_integer_array[2]; // VBA_DELTA
7710 CalculateVMRowAndSwath_params->dpte_row_height_linear_chroma = s->dummy_integer_array[3]; // VBA_DELTA
7711 CalculateVMRowAndSwath_params->meta_req_width = s->dummy_integer_array[4];
7712 CalculateVMRowAndSwath_params->meta_req_width_chroma = s->dummy_integer_array[5];
7713 CalculateVMRowAndSwath_params->meta_req_height = s->dummy_integer_array[6];
7714 CalculateVMRowAndSwath_params->meta_req_height_chroma = s->dummy_integer_array[7];
7715 CalculateVMRowAndSwath_params->meta_row_width = s->dummy_integer_array[8];
7716 CalculateVMRowAndSwath_params->meta_row_width_chroma = s->dummy_integer_array[9];
7717 CalculateVMRowAndSwath_params->meta_row_height = mode_lib->ms.meta_row_height;
7718 CalculateVMRowAndSwath_params->meta_row_height_chroma = mode_lib->ms.meta_row_height_chroma;
7719 CalculateVMRowAndSwath_params->vm_group_bytes = s->dummy_integer_array[10];
7720 CalculateVMRowAndSwath_params->dpte_group_bytes = mode_lib->ms.dpte_group_bytes;
7721 CalculateVMRowAndSwath_params->PixelPTEReqWidthY = s->dummy_integer_array[11];
7722 CalculateVMRowAndSwath_params->PixelPTEReqHeightY = s->dummy_integer_array[12];
7723 CalculateVMRowAndSwath_params->PTERequestSizeY = s->dummy_integer_array[13];
7724 CalculateVMRowAndSwath_params->PixelPTEReqWidthC = s->dummy_integer_array[14];
7725 CalculateVMRowAndSwath_params->PixelPTEReqHeightC = s->dummy_integer_array[15];
7726 CalculateVMRowAndSwath_params->PTERequestSizeC = s->dummy_integer_array[16];
7727 CalculateVMRowAndSwath_params->dpde0_bytes_per_frame_ub_l = s->dummy_integer_array[17];
7728 CalculateVMRowAndSwath_params->meta_pte_bytes_per_frame_ub_l = s->dummy_integer_array[18];
7729 CalculateVMRowAndSwath_params->dpde0_bytes_per_frame_ub_c = s->dummy_integer_array[19];
7730 CalculateVMRowAndSwath_params->meta_pte_bytes_per_frame_ub_c = s->dummy_integer_array[20];
7731 CalculateVMRowAndSwath_params->PrefetchSourceLinesY = mode_lib->ms.PrefetchLinesYThisState;
7732 CalculateVMRowAndSwath_params->PrefetchSourceLinesC = mode_lib->ms.PrefetchLinesCThisState;
7733 CalculateVMRowAndSwath_params->VInitPreFillY = mode_lib->ms.PrefillY;
7734 CalculateVMRowAndSwath_params->VInitPreFillC = mode_lib->ms.PrefillC;
7735 CalculateVMRowAndSwath_params->MaxNumSwathY = mode_lib->ms.MaxNumSwY;
7736 CalculateVMRowAndSwath_params->MaxNumSwathC = mode_lib->ms.MaxNumSwC;
7737 CalculateVMRowAndSwath_params->meta_row_bw = mode_lib->ms.meta_row_bandwidth_this_state;
7738 CalculateVMRowAndSwath_params->dpte_row_bw = mode_lib->ms.dpte_row_bandwidth_this_state;
7739 CalculateVMRowAndSwath_params->PixelPTEBytesPerRow = mode_lib->ms.DPTEBytesPerRowThisState;
7740 CalculateVMRowAndSwath_params->PDEAndMetaPTEBytesFrame = mode_lib->ms.PDEAndMetaPTEBytesPerFrameThisState;
7741 CalculateVMRowAndSwath_params->MetaRowByte = mode_lib->ms.MetaRowBytesThisState;
7742 CalculateVMRowAndSwath_params->use_one_row_for_frame = mode_lib->ms.use_one_row_for_frame_this_state;
7743 CalculateVMRowAndSwath_params->use_one_row_for_frame_flip = mode_lib->ms.use_one_row_for_frame_flip_this_state;
7744 CalculateVMRowAndSwath_params->UsesMALLForStaticScreen = s->dummy_boolean_array[0];
7745 CalculateVMRowAndSwath_params->PTE_BUFFER_MODE = s->dummy_boolean_array[1];
7746 CalculateVMRowAndSwath_params->BIGK_FRAGMENT_SIZE = s->dummy_integer_array[21];
7747
7748 CalculateVMRowAndSwath(&mode_lib->scratch,
7749 CalculateVMRowAndSwath_params);
7750
7751 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
7752 mode_lib->ms.PrefetchLinesY[j][k] = mode_lib->ms.PrefetchLinesYThisState[k];
7753 mode_lib->ms.PrefetchLinesC[j][k] = mode_lib->ms.PrefetchLinesCThisState[k];
7754 mode_lib->ms.meta_row_bandwidth[j][k] = mode_lib->ms.meta_row_bandwidth_this_state[k];
7755 mode_lib->ms.dpte_row_bandwidth[j][k] = mode_lib->ms.dpte_row_bandwidth_this_state[k];
7756 mode_lib->ms.DPTEBytesPerRow[j][k] = mode_lib->ms.DPTEBytesPerRowThisState[k];
7757 mode_lib->ms.PDEAndMetaPTEBytesPerFrame[j][k] = mode_lib->ms.PDEAndMetaPTEBytesPerFrameThisState[k];
7758 mode_lib->ms.MetaRowBytes[j][k] = mode_lib->ms.MetaRowBytesThisState[k];
7759 mode_lib->ms.use_one_row_for_frame[j][k] = mode_lib->ms.use_one_row_for_frame_this_state[k];
7760 mode_lib->ms.use_one_row_for_frame_flip[j][k] = mode_lib->ms.use_one_row_for_frame_flip_this_state[k];
7761 }
7762
7763 mode_lib->ms.support.PTEBufferSizeNotExceeded[j] = true;
7764
7765 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
7766 if (mode_lib->ms.PTEBufferSizeNotExceededPerState[k] == false)
7767 mode_lib->ms.support.PTEBufferSizeNotExceeded[j] = false;
7768 #ifdef __DML_VBA_DEBUG__
7769 dml_print("DML::%s: j=%u k=%u, PTEBufferSizeNotExceededPerState[%u] = %u\n", __func__, j, k, k, mode_lib->ms.PTEBufferSizeNotExceededPerState[k]);
7770 #endif
7771 }
7772 #ifdef __DML_VBA_DEBUG__
7773 dml_print("DML::%s: PTEBufferSizeNotExceeded[%u] = %u\n", __func__, j, mode_lib->ms.support.PTEBufferSizeNotExceeded[j]);
7774 #endif
7775
7776 mode_lib->ms.support.DCCMetaBufferSizeNotExceeded[j] = true;
7777 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
7778 if (mode_lib->ms.DCCMetaBufferSizeNotExceededPerState[k] == false)
7779 mode_lib->ms.support.DCCMetaBufferSizeNotExceeded[j] = false;
7780 }
7781
7782 mode_lib->ms.UrgLatency = CalculateUrgentLatency(mode_lib->ms.state.urgent_latency_pixel_data_only_us,
7783 mode_lib->ms.state.urgent_latency_pixel_mixed_with_vm_data_us,
7784 mode_lib->ms.state.urgent_latency_vm_data_only_us,
7785 mode_lib->ms.soc.do_urgent_latency_adjustment,
7786 mode_lib->ms.state.urgent_latency_adjustment_fabric_clock_component_us,
7787 mode_lib->ms.state.urgent_latency_adjustment_fabric_clock_reference_mhz,
7788 mode_lib->ms.state.fabricclk_mhz);
7789
7790 /* Getter functions work at mp interface so copy the urgent latency to mp*/
7791 mode_lib->mp.UrgentLatency = mode_lib->ms.UrgLatency;
7792
7793 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
7794 CalculateUrgentBurstFactor(
7795 mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange[k],
7796 mode_lib->ms.swath_width_luma_ub_this_state[k],
7797 mode_lib->ms.swath_width_chroma_ub_this_state[k],
7798 mode_lib->ms.SwathHeightYThisState[k],
7799 mode_lib->ms.SwathHeightCThisState[k],
7800 (dml_float_t) mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k],
7801 mode_lib->ms.UrgLatency,
7802 mode_lib->ms.ip.cursor_buffer_size,
7803 mode_lib->ms.cache_display_cfg.plane.CursorWidth[k],
7804 mode_lib->ms.cache_display_cfg.plane.CursorBPP[k],
7805 mode_lib->ms.cache_display_cfg.plane.VRatio[k],
7806 mode_lib->ms.cache_display_cfg.plane.VRatioChroma[k],
7807 mode_lib->ms.BytePerPixelInDETY[k],
7808 mode_lib->ms.BytePerPixelInDETC[k],
7809 mode_lib->ms.DETBufferSizeYThisState[k],
7810 mode_lib->ms.DETBufferSizeCThisState[k],
7811 /* Output */
7812 &mode_lib->ms.UrgentBurstFactorCursor[j][k],
7813 &mode_lib->ms.UrgentBurstFactorLuma[j][k],
7814 &mode_lib->ms.UrgentBurstFactorChroma[j][k],
7815 &mode_lib->ms.NotUrgentLatencyHiding[k]);
7816 }
7817
7818 CalculateDCFCLKDeepSleep(
7819 mode_lib->ms.num_active_planes,
7820 mode_lib->ms.BytePerPixelY,
7821 mode_lib->ms.BytePerPixelC,
7822 mode_lib->ms.cache_display_cfg.plane.VRatio,
7823 mode_lib->ms.cache_display_cfg.plane.VRatioChroma,
7824 mode_lib->ms.SwathWidthYThisState,
7825 mode_lib->ms.SwathWidthCThisState,
7826 mode_lib->ms.NoOfDPPThisState,
7827 mode_lib->ms.cache_display_cfg.plane.HRatio,
7828 mode_lib->ms.cache_display_cfg.plane.HRatioChroma,
7829 mode_lib->ms.cache_display_cfg.timing.PixelClock,
7830 mode_lib->ms.PSCL_FACTOR,
7831 mode_lib->ms.PSCL_FACTOR_CHROMA,
7832 mode_lib->ms.RequiredDPPCLKThisState,
7833 mode_lib->ms.ReadBandwidthLuma,
7834 mode_lib->ms.ReadBandwidthChroma,
7835 mode_lib->ms.soc.return_bus_width_bytes,
7836
7837 /* Output */
7838 &mode_lib->ms.ProjectedDCFCLKDeepSleep[j]);
7839 }
7840
7841 //Calculate Return BW
7842 for (j = 0; j < 2; ++j) {
7843 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
7844 if (mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] == k) {
7845 if (mode_lib->ms.cache_display_cfg.writeback.WritebackEnable[k] == true) {
7846 mode_lib->ms.WritebackDelayTime[k] = mode_lib->ms.state.writeback_latency_us + CalculateWriteBackDelay(
7847 mode_lib->ms.cache_display_cfg.writeback.WritebackPixelFormat[k],
7848 mode_lib->ms.cache_display_cfg.writeback.WritebackHRatio[k],
7849 mode_lib->ms.cache_display_cfg.writeback.WritebackVRatio[k],
7850 mode_lib->ms.cache_display_cfg.writeback.WritebackVTaps[k],
7851 mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationWidth[k],
7852 mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationHeight[k],
7853 mode_lib->ms.cache_display_cfg.writeback.WritebackSourceHeight[k],
7854 mode_lib->ms.cache_display_cfg.timing.HTotal[k]) / mode_lib->ms.RequiredDISPCLK[j];
7855 } else {
7856 mode_lib->ms.WritebackDelayTime[k] = 0.0;
7857 }
7858 for (m = 0; m <= mode_lib->ms.num_active_planes - 1; m++) {
7859 if (mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[m] == k && mode_lib->ms.cache_display_cfg.writeback.WritebackEnable[m] == true) {
7860 mode_lib->ms.WritebackDelayTime[k] = dml_max(mode_lib->ms.WritebackDelayTime[k],
7861 mode_lib->ms.state.writeback_latency_us + CalculateWriteBackDelay(
7862 mode_lib->ms.cache_display_cfg.writeback.WritebackPixelFormat[m],
7863 mode_lib->ms.cache_display_cfg.writeback.WritebackHRatio[m],
7864 mode_lib->ms.cache_display_cfg.writeback.WritebackVRatio[m],
7865 mode_lib->ms.cache_display_cfg.writeback.WritebackVTaps[m],
7866 mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationWidth[m],
7867 mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationHeight[m],
7868 mode_lib->ms.cache_display_cfg.writeback.WritebackSourceHeight[m],
7869 mode_lib->ms.cache_display_cfg.timing.HTotal[m]) / mode_lib->ms.RequiredDISPCLK[j]);
7870 }
7871 }
7872 }
7873 }
7874 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
7875 for (m = 0; m <= mode_lib->ms.num_active_planes - 1; m++) {
7876 if (mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] == m) {
7877 mode_lib->ms.WritebackDelayTime[k] = mode_lib->ms.WritebackDelayTime[m];
7878 }
7879 }
7880 }
7881 s->MaxVStartupAllPlanes[j] = 0; // max vstartup among all planes
7882
7883 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
7884 s->MaximumVStartup[j][k] = CalculateMaxVStartup(k,
7885 mode_lib->ms.ip.ptoi_supported,
7886 mode_lib->ms.ip.vblank_nom_default_us,
7887 &mode_lib->ms.cache_display_cfg.timing,
7888 mode_lib->ms.WritebackDelayTime[k]);
7889
7890 s->MaxVStartupAllPlanes[j] = (dml_uint_t)(dml_max(s->MaxVStartupAllPlanes[j], s->MaximumVStartup[j][k]));
7891 #ifdef __DML_VBA_DEBUG__
7892 dml_print("DML::%s: k=%u, MaxVStartupAllPlanes[%u] = %u\n", __func__, k, j, s->MaxVStartupAllPlanes[j]);
7893 dml_print("DML::%s: k=%u, MaximumVStartup[%u][%u] = %u\n", __func__, k, j, k, s->MaximumVStartup[j][k]);
7894 #endif
7895 }
7896 }
7897
7898 s->ReorderingBytes = (dml_uint_t)(mode_lib->ms.soc.num_chans * dml_max3(mode_lib->ms.soc.urgent_out_of_order_return_per_channel_pixel_only_bytes,
7899 mode_lib->ms.soc.urgent_out_of_order_return_per_channel_pixel_and_vm_bytes,
7900 mode_lib->ms.soc.urgent_out_of_order_return_per_channel_vm_only_bytes));
7901
7902 for (j = 0; j < 2; ++j) {
7903 mode_lib->ms.DCFCLKState[j] = mode_lib->ms.state.dcfclk_mhz;
7904 }
7905
7906 /* Immediate Flip and MALL parameters */
7907 s->ImmediateFlipRequiredFinal = false;
7908 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
7909 s->ImmediateFlipRequiredFinal = s->ImmediateFlipRequiredFinal || (mode_lib->ms.policy.ImmediateFlipRequirement[k] == dml_immediate_flip_required);
7910 }
7911
7912 mode_lib->ms.support.ImmediateFlipRequiredButTheRequirementForEachSurfaceIsNotSpecified = false;
7913 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
7914 mode_lib->ms.support.ImmediateFlipRequiredButTheRequirementForEachSurfaceIsNotSpecified = mode_lib->ms.support.ImmediateFlipRequiredButTheRequirementForEachSurfaceIsNotSpecified ||
7915 ((mode_lib->ms.policy.ImmediateFlipRequirement[k] != dml_immediate_flip_required) &&
7916 (mode_lib->ms.policy.ImmediateFlipRequirement[k] != dml_immediate_flip_not_required));
7917 }
7918 mode_lib->ms.support.ImmediateFlipRequiredButTheRequirementForEachSurfaceIsNotSpecified = mode_lib->ms.support.ImmediateFlipRequiredButTheRequirementForEachSurfaceIsNotSpecified && s->ImmediateFlipRequiredFinal;
7919
7920 mode_lib->ms.support.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe = false;
7921 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
7922 mode_lib->ms.support.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe =
7923 mode_lib->ms.support.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe || ((mode_lib->ms.cache_display_cfg.plane.HostVMEnable == true || mode_lib->ms.policy.ImmediateFlipRequirement[k] != dml_immediate_flip_not_required) &&
7924 (mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange[k] == dml_use_mall_pstate_change_full_frame || mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange[k] == dml_use_mall_pstate_change_phantom_pipe));
7925 }
7926
7927 mode_lib->ms.support.InvalidCombinationOfMALLUseForPStateAndStaticScreen = false;
7928 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
7929 mode_lib->ms.support.InvalidCombinationOfMALLUseForPStateAndStaticScreen = mode_lib->ms.support.InvalidCombinationOfMALLUseForPStateAndStaticScreen ||
7930 ((mode_lib->ms.cache_display_cfg.plane.UseMALLForStaticScreen[k] == dml_use_mall_static_screen_enable || mode_lib->ms.cache_display_cfg.plane.UseMALLForStaticScreen[k] == dml_use_mall_static_screen_optimize) && (mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange[k] == dml_use_mall_pstate_change_phantom_pipe)) ||
7931 ((mode_lib->ms.cache_display_cfg.plane.UseMALLForStaticScreen[k] == dml_use_mall_static_screen_disable || mode_lib->ms.cache_display_cfg.plane.UseMALLForStaticScreen[k] == dml_use_mall_static_screen_optimize) && (mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange[k] == dml_use_mall_pstate_change_full_frame));
7932 }
7933
7934 s->FullFrameMALLPStateMethod = false;
7935 s->SubViewportMALLPStateMethod = false;
7936 s->PhantomPipeMALLPStateMethod = false;
7937 s->SubViewportMALLRefreshGreaterThan120Hz = false;
7938 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
7939 if (mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange[k] == dml_use_mall_pstate_change_full_frame)
7940 s->FullFrameMALLPStateMethod = true;
7941 if (mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange[k] == dml_use_mall_pstate_change_sub_viewport) {
7942 s->SubViewportMALLPStateMethod = true;
7943 if (mode_lib->ms.cache_display_cfg.timing.RefreshRate[k] > 120)
7944 s->SubViewportMALLRefreshGreaterThan120Hz = true;
7945 }
7946 if (mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange[k] == dml_use_mall_pstate_change_phantom_pipe)
7947 s->PhantomPipeMALLPStateMethod = true;
7948 }
7949 mode_lib->ms.support.InvalidCombinationOfMALLUseForPState = (s->SubViewportMALLPStateMethod != s->PhantomPipeMALLPStateMethod)
7950 || (s->SubViewportMALLPStateMethod && s->FullFrameMALLPStateMethod) || s->SubViewportMALLRefreshGreaterThan120Hz;
7951
7952 if (mode_lib->ms.policy.UseMinimumRequiredDCFCLK == true) {
7953 UseMinimumDCFCLK_params->UseMALLForPStateChange = mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange;
7954 UseMinimumDCFCLK_params->DRRDisplay = mode_lib->ms.cache_display_cfg.timing.DRRDisplay;
7955 UseMinimumDCFCLK_params->SynchronizeDRRDisplaysForUCLKPStateChangeFinal = mode_lib->ms.policy.SynchronizeDRRDisplaysForUCLKPStateChangeFinal;
7956 UseMinimumDCFCLK_params->MaxInterDCNTileRepeaters = mode_lib->ms.ip.max_inter_dcn_tile_repeaters;
7957 UseMinimumDCFCLK_params->MaxPrefetchMode = dml_prefetch_support_stutter;
7958 UseMinimumDCFCLK_params->DRAMClockChangeLatencyFinal = mode_lib->ms.state.dram_clock_change_latency_us;
7959 UseMinimumDCFCLK_params->FCLKChangeLatency = mode_lib->ms.state.fclk_change_latency_us;
7960 UseMinimumDCFCLK_params->SREnterPlusExitTime = mode_lib->ms.state.sr_enter_plus_exit_time_us;
7961 UseMinimumDCFCLK_params->ReturnBusWidth = mode_lib->ms.soc.return_bus_width_bytes;
7962 UseMinimumDCFCLK_params->RoundTripPingLatencyCycles = mode_lib->ms.soc.round_trip_ping_latency_dcfclk_cycles;
7963 UseMinimumDCFCLK_params->ReorderingBytes = s->ReorderingBytes;
7964 UseMinimumDCFCLK_params->PixelChunkSizeInKByte = mode_lib->ms.ip.pixel_chunk_size_kbytes;
7965 UseMinimumDCFCLK_params->MetaChunkSize = mode_lib->ms.ip.meta_chunk_size_kbytes;
7966 UseMinimumDCFCLK_params->GPUVMEnable = mode_lib->ms.cache_display_cfg.plane.GPUVMEnable;
7967 UseMinimumDCFCLK_params->GPUVMMaxPageTableLevels = mode_lib->ms.cache_display_cfg.plane.GPUVMMaxPageTableLevels;
7968 UseMinimumDCFCLK_params->HostVMEnable = mode_lib->ms.cache_display_cfg.plane.HostVMEnable;
7969 UseMinimumDCFCLK_params->NumberOfActiveSurfaces = mode_lib->ms.num_active_planes;
7970 UseMinimumDCFCLK_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024;
7971 UseMinimumDCFCLK_params->HostVMMaxNonCachedPageTableLevels = mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels;
7972 UseMinimumDCFCLK_params->DynamicMetadataVMEnabled = mode_lib->ms.ip.dynamic_metadata_vm_enabled;
7973 UseMinimumDCFCLK_params->ImmediateFlipRequirement = s->ImmediateFlipRequiredFinal;
7974 UseMinimumDCFCLK_params->ProgressiveToInterlaceUnitInOPP = mode_lib->ms.ip.ptoi_supported;
7975 UseMinimumDCFCLK_params->MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation = mode_lib->ms.soc.max_avg_sdp_bw_use_normal_percent;
7976 UseMinimumDCFCLK_params->PercentOfIdealSDPPortBWReceivedAfterUrgLatency = mode_lib->ms.soc.pct_ideal_sdp_bw_after_urgent;
7977 UseMinimumDCFCLK_params->VTotal = mode_lib->ms.cache_display_cfg.timing.VTotal;
7978 UseMinimumDCFCLK_params->VActive = mode_lib->ms.cache_display_cfg.timing.VActive;
7979 UseMinimumDCFCLK_params->DynamicMetadataTransmittedBytes = mode_lib->ms.cache_display_cfg.plane.DynamicMetadataTransmittedBytes;
7980 UseMinimumDCFCLK_params->DynamicMetadataLinesBeforeActiveRequired = mode_lib->ms.cache_display_cfg.plane.DynamicMetadataLinesBeforeActiveRequired;
7981 UseMinimumDCFCLK_params->Interlace = mode_lib->ms.cache_display_cfg.timing.Interlace;
7982 UseMinimumDCFCLK_params->RequiredDPPCLKPerSurface = mode_lib->ms.RequiredDPPCLKPerSurface;
7983 UseMinimumDCFCLK_params->RequiredDISPCLK = mode_lib->ms.RequiredDISPCLK;
7984 UseMinimumDCFCLK_params->UrgLatency = mode_lib->ms.UrgLatency;
7985 UseMinimumDCFCLK_params->NoOfDPP = mode_lib->ms.NoOfDPP;
7986 UseMinimumDCFCLK_params->ProjectedDCFCLKDeepSleep = mode_lib->ms.ProjectedDCFCLKDeepSleep;
7987 UseMinimumDCFCLK_params->MaximumVStartup = s->MaximumVStartup;
7988 UseMinimumDCFCLK_params->TotalNumberOfActiveDPP = mode_lib->ms.TotalNumberOfActiveDPP;
7989 UseMinimumDCFCLK_params->TotalNumberOfDCCActiveDPP = mode_lib->ms.TotalNumberOfDCCActiveDPP;
7990 UseMinimumDCFCLK_params->dpte_group_bytes = mode_lib->ms.dpte_group_bytes;
7991 UseMinimumDCFCLK_params->PrefetchLinesY = mode_lib->ms.PrefetchLinesY;
7992 UseMinimumDCFCLK_params->PrefetchLinesC = mode_lib->ms.PrefetchLinesC;
7993 UseMinimumDCFCLK_params->swath_width_luma_ub_all_states = mode_lib->ms.swath_width_luma_ub_all_states;
7994 UseMinimumDCFCLK_params->swath_width_chroma_ub_all_states = mode_lib->ms.swath_width_chroma_ub_all_states;
7995 UseMinimumDCFCLK_params->BytePerPixelY = mode_lib->ms.BytePerPixelY;
7996 UseMinimumDCFCLK_params->BytePerPixelC = mode_lib->ms.BytePerPixelC;
7997 UseMinimumDCFCLK_params->HTotal = mode_lib->ms.cache_display_cfg.timing.HTotal;
7998 UseMinimumDCFCLK_params->PixelClock = mode_lib->ms.cache_display_cfg.timing.PixelClock;
7999 UseMinimumDCFCLK_params->PDEAndMetaPTEBytesPerFrame = mode_lib->ms.PDEAndMetaPTEBytesPerFrame;
8000 UseMinimumDCFCLK_params->DPTEBytesPerRow = mode_lib->ms.DPTEBytesPerRow;
8001 UseMinimumDCFCLK_params->MetaRowBytes = mode_lib->ms.MetaRowBytes;
8002 UseMinimumDCFCLK_params->DynamicMetadataEnable = mode_lib->ms.cache_display_cfg.plane.DynamicMetadataEnable;
8003 UseMinimumDCFCLK_params->ReadBandwidthLuma = mode_lib->ms.ReadBandwidthLuma;
8004 UseMinimumDCFCLK_params->ReadBandwidthChroma = mode_lib->ms.ReadBandwidthChroma;
8005 UseMinimumDCFCLK_params->DCFCLKPerState = mode_lib->ms.state.dcfclk_mhz;
8006 UseMinimumDCFCLK_params->DCFCLKState = mode_lib->ms.DCFCLKState;
8007
8008 UseMinimumDCFCLK(&mode_lib->scratch,
8009 UseMinimumDCFCLK_params);
8010
8011 } // UseMinimumRequiredDCFCLK == true
8012
8013 for (j = 0; j < 2; ++j) {
8014 mode_lib->ms.ReturnBWPerState[j] = dml_get_return_bw_mbps(&mode_lib->ms.soc, mode_lib->ms.state.use_ideal_dram_bw_strobe,
8015 mode_lib->ms.cache_display_cfg.plane.HostVMEnable, mode_lib->ms.DCFCLKState[j], mode_lib->ms.state.fabricclk_mhz,
8016 mode_lib->ms.state.dram_speed_mts);
8017 mode_lib->ms.ReturnDRAMBWPerState[j] = dml_get_return_dram_bw_mbps(&mode_lib->ms.soc, mode_lib->ms.state.use_ideal_dram_bw_strobe,
8018 mode_lib->ms.cache_display_cfg.plane.HostVMEnable,
8019 mode_lib->ms.state.dram_speed_mts);
8020 }
8021
8022 //Re-ordering Buffer Support Check
8023 for (j = 0; j < 2; ++j) {
8024 if ((mode_lib->ms.ip.rob_buffer_size_kbytes - mode_lib->ms.ip.pixel_chunk_size_kbytes) * 1024 / mode_lib->ms.ReturnBWPerState[j] >
8025 (mode_lib->ms.soc.round_trip_ping_latency_dcfclk_cycles + 32) / mode_lib->ms.DCFCLKState[j] + s->ReorderingBytes / mode_lib->ms.ReturnBWPerState[j]) {
8026 mode_lib->ms.support.ROBSupport[j] = true;
8027 } else {
8028 mode_lib->ms.support.ROBSupport[j] = false;
8029 }
8030 dml_print("DML::%s: DEBUG ROBSupport[%u] = %u (%u)\n", __func__, j, mode_lib->ms.support.ROBSupport[j], __LINE__);
8031 }
8032
8033 //Vertical Active BW support check
8034 s->MaxTotalVActiveRDBandwidth = 0;
8035 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
8036 s->MaxTotalVActiveRDBandwidth = s->MaxTotalVActiveRDBandwidth + mode_lib->ms.ReadBandwidthLuma[k] + mode_lib->ms.ReadBandwidthChroma[k];
8037 }
8038
8039 for (j = 0; j < 2; ++j) {
8040 mode_lib->ms.support.MaxTotalVerticalActiveAvailableBandwidth[j] = dml_min3(mode_lib->ms.soc.return_bus_width_bytes * mode_lib->ms.DCFCLKState[j] * mode_lib->ms.soc.max_avg_sdp_bw_use_normal_percent / 100.0,
8041 mode_lib->ms.state.fabricclk_mhz * mode_lib->ms.soc.fabric_datapath_to_dcn_data_return_bytes * mode_lib->ms.soc.max_avg_fabric_bw_use_normal_percent / 100.0,
8042 mode_lib->ms.state.dram_speed_mts * mode_lib->ms.soc.num_chans * mode_lib->ms.soc.dram_channel_width_bytes *
8043 ((mode_lib->ms.state.use_ideal_dram_bw_strobe && !mode_lib->ms.cache_display_cfg.plane.HostVMEnable) ?
8044 mode_lib->ms.soc.max_avg_dram_bw_use_normal_strobe_percent : mode_lib->ms.soc.max_avg_dram_bw_use_normal_percent) / 100.0);
8045
8046 if (s->MaxTotalVActiveRDBandwidth <= mode_lib->ms.support.MaxTotalVerticalActiveAvailableBandwidth[j]) {
8047 mode_lib->ms.support.TotalVerticalActiveBandwidthSupport[j] = true;
8048 } else {
8049 mode_lib->ms.support.TotalVerticalActiveBandwidthSupport[j] = false;
8050 }
8051 }
8052
8053 /* Prefetch Check */
8054 dml_prefetch_check(mode_lib);
8055
8056 // End of Prefetch Check
8057 dml_print("DML::%s: Done prefetch calculation\n", __func__);
8058
8059 /*Cursor Support Check*/
8060 mode_lib->ms.support.CursorSupport = true;
8061 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
8062 if (mode_lib->ms.cache_display_cfg.plane.CursorWidth[k] > 0.0) {
8063 if (mode_lib->ms.cache_display_cfg.plane.CursorBPP[k] == 64 && mode_lib->ms.ip.cursor_64bpp_support == false) {
8064 mode_lib->ms.support.CursorSupport = false;
8065 }
8066 }
8067 }
8068
8069 /*Valid Pitch Check*/
8070 mode_lib->ms.support.PitchSupport = true;
8071 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
8072 mode_lib->ms.support.AlignedYPitch[k] = dml_ceil(
8073 dml_max(mode_lib->ms.cache_display_cfg.surface.PitchY[k], mode_lib->ms.cache_display_cfg.surface.SurfaceWidthY[k]),
8074 mode_lib->ms.MacroTileWidthY[k]);
8075 if (mode_lib->ms.cache_display_cfg.surface.DCCEnable[k] == true) {
8076 mode_lib->ms.support.AlignedDCCMetaPitchY[k] = dml_ceil(dml_max(mode_lib->ms.cache_display_cfg.surface.DCCMetaPitchY[k], mode_lib->ms.cache_display_cfg.surface.SurfaceWidthY[k]), 64.0 * mode_lib->ms.Read256BlockWidthY[k]);
8077 } else {
8078 mode_lib->ms.support.AlignedDCCMetaPitchY[k] = mode_lib->ms.cache_display_cfg.surface.DCCMetaPitchY[k];
8079 }
8080 if (mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_444_64
8081 && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_444_32
8082 && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_444_16
8083 && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_mono_16
8084 && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_rgbe
8085 && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_mono_8) {
8086 mode_lib->ms.support.AlignedCPitch[k] = dml_ceil(dml_max(mode_lib->ms.cache_display_cfg.surface.PitchC[k], mode_lib->ms.cache_display_cfg.surface.SurfaceWidthC[k]), mode_lib->ms.MacroTileWidthC[k]);
8087 if (mode_lib->ms.cache_display_cfg.surface.DCCEnable[k] == true) {
8088 mode_lib->ms.support.AlignedDCCMetaPitchC[k] = dml_ceil(dml_max(mode_lib->ms.cache_display_cfg.surface.DCCMetaPitchC[k], mode_lib->ms.cache_display_cfg.surface.SurfaceWidthC[k]), 64.0 * mode_lib->ms.Read256BlockWidthC[k]);
8089 } else {
8090 mode_lib->ms.support.AlignedDCCMetaPitchC[k] = mode_lib->ms.cache_display_cfg.surface.DCCMetaPitchC[k];
8091 }
8092 } else {
8093 mode_lib->ms.support.AlignedCPitch[k] = mode_lib->ms.cache_display_cfg.surface.PitchC[k];
8094 mode_lib->ms.support.AlignedDCCMetaPitchC[k] = mode_lib->ms.cache_display_cfg.surface.DCCMetaPitchC[k];
8095 }
8096 if (mode_lib->ms.support.AlignedYPitch[k] > mode_lib->ms.cache_display_cfg.surface.PitchY[k] || mode_lib->ms.support.AlignedCPitch[k] > mode_lib->ms.cache_display_cfg.surface.PitchC[k] ||
8097 mode_lib->ms.support.AlignedDCCMetaPitchY[k] > mode_lib->ms.cache_display_cfg.surface.DCCMetaPitchY[k] || mode_lib->ms.support.AlignedDCCMetaPitchC[k] > mode_lib->ms.cache_display_cfg.surface.DCCMetaPitchC[k]) {
8098 mode_lib->ms.support.PitchSupport = false;
8099 }
8100 }
8101
8102 mode_lib->ms.support.ViewportExceedsSurface = false;
8103 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
8104 if (mode_lib->ms.cache_display_cfg.plane.ViewportWidth[k] > mode_lib->ms.cache_display_cfg.surface.SurfaceWidthY[k] || mode_lib->ms.cache_display_cfg.plane.ViewportHeight[k] > mode_lib->ms.cache_display_cfg.surface.SurfaceHeightY[k]) {
8105 mode_lib->ms.support.ViewportExceedsSurface = true;
8106 if (mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_444_64 && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_444_32 &&
8107 mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_444_16 && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_444_8 && mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k] != dml_rgbe) {
8108 if (mode_lib->ms.cache_display_cfg.plane.ViewportWidthChroma[k] > mode_lib->ms.cache_display_cfg.surface.SurfaceWidthC[k] || mode_lib->ms.cache_display_cfg.plane.ViewportHeightChroma[k] > mode_lib->ms.cache_display_cfg.surface.SurfaceHeightC[k]) {
8109 mode_lib->ms.support.ViewportExceedsSurface = true;
8110 }
8111 }
8112 }
8113 }
8114
8115 /*Mode Support, Voltage State and SOC Configuration*/
8116 for (j = 0; j < 2; j++) { // j iterator is for the combine mode off or on
8117 dml_print("DML::%s: checking support for j=%u\n", __func__, j);
8118 dml_print("DML::%s: state_idx=%0d max_state_idx=%0d\n", __func__, mode_lib->ms.state_idx, mode_lib->ms.max_state_idx);
8119
8120 s->is_max_pwr_state = (mode_lib->ms.max_state_idx == mode_lib->ms.state_idx);
8121 s->is_max_dram_pwr_state = (mode_lib->ms.max_state.dram_speed_mts == mode_lib->ms.state.dram_speed_mts);
8122
8123 s->dram_clock_change_support = (!mode_lib->ms.policy.DRAMClockChangeRequirementFinal ||
8124 (s->is_max_dram_pwr_state && mode_lib->policy.AssumeModeSupportAtMaxPwrStateEvenDRAMClockChangeNotSupported) ||
8125 mode_lib->ms.support.DRAMClockChangeSupport[j] != dml_dram_clock_change_unsupported);
8126 s->f_clock_change_support = (!mode_lib->ms.policy.FCLKChangeRequirementFinal ||
8127 (s->is_max_pwr_state && mode_lib->policy.AssumeModeSupportAtMaxPwrStateEvenFClockChangeNotSupported) ||
8128 mode_lib->ms.support.FCLKChangeSupport[j] != dml_fclock_change_unsupported);
8129
8130 if (mode_lib->ms.support.ScaleRatioAndTapsSupport == true
8131 && mode_lib->ms.support.SourceFormatPixelAndScanSupport == true
8132 && mode_lib->ms.support.ViewportSizeSupport[j] == true
8133 && !mode_lib->ms.support.LinkRateDoesNotMatchDPVersion
8134 && !mode_lib->ms.support.LinkRateForMultistreamNotIndicated
8135 && !mode_lib->ms.support.BPPForMultistreamNotIndicated
8136 && !mode_lib->ms.support.MultistreamWithHDMIOreDP
8137 && !mode_lib->ms.support.ExceededMultistreamSlots
8138 && !mode_lib->ms.support.MSOOrODMSplitWithNonDPLink
8139 && !mode_lib->ms.support.NotEnoughLanesForMSO
8140 && mode_lib->ms.support.LinkCapacitySupport == true
8141 && !mode_lib->ms.support.P2IWith420
8142 && !mode_lib->ms.support.DSCOnlyIfNecessaryWithBPP
8143 && !mode_lib->ms.support.DSC422NativeNotSupported
8144 && !mode_lib->ms.support.MPCCombineMethodIncompatible
8145 && mode_lib->ms.support.ODMCombineTwoToOneSupportCheckOK == true
8146 && mode_lib->ms.support.ODMCombineFourToOneSupportCheckOK == true
8147 && mode_lib->ms.support.NotEnoughDSCUnits == false
8148 && !mode_lib->ms.support.NotEnoughDSCSlices
8149 && !mode_lib->ms.support.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe
8150 && !mode_lib->ms.support.InvalidCombinationOfMALLUseForPStateAndStaticScreen
8151 && mode_lib->ms.support.DSCCLKRequiredMoreThanSupported == false
8152 && mode_lib->ms.support.PixelsPerLinePerDSCUnitSupport
8153 && mode_lib->ms.support.DTBCLKRequiredMoreThanSupported == false
8154 && !mode_lib->ms.support.InvalidCombinationOfMALLUseForPState
8155 && !mode_lib->ms.support.ImmediateFlipRequiredButTheRequirementForEachSurfaceIsNotSpecified
8156 && mode_lib->ms.support.ROBSupport[j] == true
8157 && mode_lib->ms.support.DISPCLK_DPPCLK_Support[j] == true
8158 && mode_lib->ms.support.TotalAvailablePipesSupport[j] == true
8159 && mode_lib->ms.support.NumberOfOTGSupport == true
8160 && mode_lib->ms.support.NumberOfHDMIFRLSupport == true
8161 && mode_lib->ms.support.NumberOfDP2p0Support == true
8162 && mode_lib->ms.support.EnoughWritebackUnits == true
8163 && mode_lib->ms.support.WritebackLatencySupport == true
8164 && mode_lib->ms.support.WritebackScaleRatioAndTapsSupport == true
8165 && mode_lib->ms.support.CursorSupport == true
8166 && mode_lib->ms.support.PitchSupport == true
8167 && mode_lib->ms.support.ViewportExceedsSurface == false
8168 && mode_lib->ms.support.PrefetchSupported[j] == true
8169 && mode_lib->ms.support.VActiveBandwithSupport[j] == true
8170 && mode_lib->ms.support.DynamicMetadataSupported[j] == true
8171 && mode_lib->ms.support.TotalVerticalActiveBandwidthSupport[j] == true
8172 && mode_lib->ms.support.VRatioInPrefetchSupported[j] == true
8173 && mode_lib->ms.support.PTEBufferSizeNotExceeded[j] == true
8174 && mode_lib->ms.support.DCCMetaBufferSizeNotExceeded[j] == true
8175 && mode_lib->ms.support.NonsupportedDSCInputBPC == false
8176 && !mode_lib->ms.support.ExceededMALLSize
8177 && ((mode_lib->ms.cache_display_cfg.plane.HostVMEnable == false && !s->ImmediateFlipRequiredFinal) || mode_lib->ms.support.ImmediateFlipSupportedForState[j])
8178 && s->dram_clock_change_support == true
8179 && s->f_clock_change_support == true
8180 && (!mode_lib->ms.policy.USRRetrainingRequiredFinal || mode_lib->ms.support.USRRetrainingSupport[j])) {
8181 dml_print("DML::%s: mode is supported\n", __func__);
8182 mode_lib->ms.support.ModeSupport[j] = true;
8183 } else {
8184 dml_print("DML::%s: mode is NOT supported\n", __func__);
8185 mode_lib->ms.support.ModeSupport[j] = false;
8186 dml_print_mode_support(mode_lib, j);
8187 }
8188 }
8189
8190 mode_lib->ms.support.MaximumMPCCombine = 0;
8191 mode_lib->ms.support.ModeIsSupported = 0;
8192 if (mode_lib->ms.support.ModeSupport[0] == true || mode_lib->ms.support.ModeSupport[1] == true) { // if the mode is supported by either no combine or mpccombine
8193 mode_lib->ms.support.ModeIsSupported = mode_lib->ms.support.ModeSupport[0] == true || mode_lib->ms.support.ModeSupport[1] == true;
8194
8195 // Determine if MPC combine is necessary, depends on if using MPC combine will help dram clock change or fclk change, etc.
8196 if ((mode_lib->ms.support.ModeSupport[0] == false && mode_lib->ms.support.ModeSupport[1] == true) || s->MPCCombineMethodAsPossible ||
8197 (s->MPCCombineMethodAsNeededForPStateChangeAndVoltage && mode_lib->ms.policy.DRAMClockChangeRequirementFinal &&
8198 (((mode_lib->ms.support.DRAMClockChangeSupport[1] == dml_dram_clock_change_vactive || mode_lib->ms.support.DRAMClockChangeSupport[1] == dml_dram_clock_change_vactive_w_mall_full_frame || mode_lib->ms.support.DRAMClockChangeSupport[1] == dml_dram_clock_change_vactive_w_mall_sub_vp) &&
8199 !(mode_lib->ms.support.DRAMClockChangeSupport[0] == dml_dram_clock_change_vactive || mode_lib->ms.support.DRAMClockChangeSupport[0] == dml_dram_clock_change_vactive_w_mall_full_frame || mode_lib->ms.support.DRAMClockChangeSupport[0] == dml_dram_clock_change_vactive_w_mall_sub_vp)) ||
8200 ((mode_lib->ms.support.DRAMClockChangeSupport[1] == dml_dram_clock_change_vblank || mode_lib->ms.support.DRAMClockChangeSupport[1] == dml_dram_clock_change_vblank_drr
8201 || mode_lib->ms.support.DRAMClockChangeSupport[1] == dml_dram_clock_change_vblank_w_mall_full_frame || mode_lib->ms.support.DRAMClockChangeSupport[1] == dml_dram_clock_change_vblank_drr_w_mall_full_frame
8202 || mode_lib->ms.support.DRAMClockChangeSupport[1] == dml_dram_clock_change_vblank_w_mall_sub_vp || mode_lib->ms.support.DRAMClockChangeSupport[1] == dml_dram_clock_change_vblank_drr_w_mall_sub_vp
8203 ) &&
8204 mode_lib->ms.support.DRAMClockChangeSupport[0] == dml_dram_clock_change_unsupported)))
8205 || (s->MPCCombineMethodAsNeededForPStateChangeAndVoltage && mode_lib->ms.policy.FCLKChangeRequirementFinal &&
8206 ((mode_lib->ms.support.FCLKChangeSupport[1] == dml_fclock_change_vactive && mode_lib->ms.support.FCLKChangeSupport[0] != dml_fclock_change_vactive) ||
8207 (mode_lib->ms.support.FCLKChangeSupport[1] == dml_fclock_change_vblank && mode_lib->ms.support.FCLKChangeSupport[0] == dml_fclock_change_unsupported)))) {
8208 mode_lib->ms.support.MaximumMPCCombine = 1;
8209 } else {
8210 mode_lib->ms.support.MaximumMPCCombine = 0;
8211 }
8212 }
8213
8214 // Since now the mode_support work on 1 particular power state, so there is only 1 state idx (index 0).
8215 mode_lib->ms.support.ImmediateFlipSupport = mode_lib->ms.support.ImmediateFlipSupportedForState[mode_lib->ms.support.MaximumMPCCombine]; // Consider flip support if max combine support imm flip
8216 mode_lib->ms.support.UnboundedRequestEnabled = mode_lib->ms.UnboundedRequestEnabledAllStates[mode_lib->ms.support.MaximumMPCCombine]; // Not used, informational
8217 mode_lib->ms.support.CompressedBufferSizeInkByte = mode_lib->ms.CompressedBufferSizeInkByteAllStates[mode_lib->ms.support.MaximumMPCCombine]; // Not used, informational
8218
8219 dml_print("DML::%s: ModeIsSupported = %u\n", __func__, mode_lib->ms.support.ModeIsSupported);
8220 dml_print("DML::%s: MaximumMPCCombine = %u\n", __func__, mode_lib->ms.support.MaximumMPCCombine);
8221 dml_print("DML::%s: ImmediateFlipSupport = %u\n", __func__, mode_lib->ms.support.ImmediateFlipSupport);
8222 dml_print("DML::%s: UnboundedRequestEnabled = %u\n", __func__, mode_lib->ms.support.UnboundedRequestEnabled);
8223 dml_print("DML::%s: CompressedBufferSizeInkByte = %u\n", __func__, mode_lib->ms.support.CompressedBufferSizeInkByte);
8224
8225 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
8226 mode_lib->ms.support.MPCCombineEnable[k] = mode_lib->ms.MPCCombine[mode_lib->ms.support.MaximumMPCCombine][k];
8227 mode_lib->ms.support.DPPPerSurface[k] = mode_lib->ms.NoOfDPP[mode_lib->ms.support.MaximumMPCCombine][k];
8228 mode_lib->ms.SwathHeightY[k] = mode_lib->ms.SwathHeightYAllStates[mode_lib->ms.support.MaximumMPCCombine][k];
8229 mode_lib->ms.SwathHeightC[k] = mode_lib->ms.SwathHeightCAllStates[mode_lib->ms.support.MaximumMPCCombine][k];
8230 mode_lib->ms.DETBufferSizeInKByte[k] = mode_lib->ms.DETBufferSizeInKByteAllStates[mode_lib->ms.support.MaximumMPCCombine][k];
8231 mode_lib->ms.DETBufferSizeY[k] = mode_lib->ms.DETBufferSizeYAllStates[mode_lib->ms.support.MaximumMPCCombine][k];
8232 mode_lib->ms.DETBufferSizeC[k] = mode_lib->ms.DETBufferSizeCAllStates[mode_lib->ms.support.MaximumMPCCombine][k];
8233 }
8234
8235 mode_lib->ms.DRAMSpeed = mode_lib->ms.state.dram_speed_mts;
8236 mode_lib->ms.FabricClock = mode_lib->ms.state.fabricclk_mhz;
8237 mode_lib->ms.SOCCLK = mode_lib->ms.state.socclk_mhz;
8238 mode_lib->ms.DCFCLK = mode_lib->ms.DCFCLKState[mode_lib->ms.support.MaximumMPCCombine];
8239 mode_lib->ms.ReturnBW = mode_lib->ms.ReturnBWPerState[mode_lib->ms.support.MaximumMPCCombine];
8240 mode_lib->ms.ReturnDRAMBW = mode_lib->ms.ReturnDRAMBWPerState[mode_lib->ms.support.MaximumMPCCombine];
8241
8242 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
8243 if (mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] == k) {
8244 mode_lib->ms.support.ODMMode[k] = mode_lib->ms.ODMModePerState[k];
8245 } else {
8246 mode_lib->ms.support.ODMMode[k] = dml_odm_mode_bypass;
8247 }
8248
8249 mode_lib->ms.support.DSCEnabled[k] = mode_lib->ms.RequiresDSC[k];
8250 mode_lib->ms.support.FECEnabled[k] = mode_lib->ms.RequiresFEC[k];
8251 mode_lib->ms.support.OutputBpp[k] = mode_lib->ms.OutputBppPerState[k];
8252 mode_lib->ms.support.OutputType[k] = mode_lib->ms.OutputTypePerState[k];
8253 mode_lib->ms.support.OutputRate[k] = mode_lib->ms.OutputRatePerState[k];
8254 mode_lib->ms.support.SubViewportLinesNeededInMALL[k] = mode_lib->ms.SubViewportLinesNeededInMALL[k];
8255 }
8256
8257 return mode_lib->ms.support.ModeIsSupported;
8258 } // dml_core_mode_support
8259
8260 /// @brief This function calculates some parameters thats are needed ahead of the mode programming function all
dml_core_mode_support_partial(struct display_mode_lib_st * mode_lib)8261 void dml_core_mode_support_partial(struct display_mode_lib_st *mode_lib)
8262 {
8263 CalculateMaxDETAndMinCompressedBufferSize(
8264 mode_lib->ms.ip.config_return_buffer_size_in_kbytes,
8265 mode_lib->ms.ip.config_return_buffer_segment_size_in_kbytes,
8266 mode_lib->ms.ip.rob_buffer_size_kbytes,
8267 mode_lib->ms.ip.max_num_dpp,
8268 mode_lib->ms.policy.NomDETInKByteOverrideEnable,
8269 mode_lib->ms.policy.NomDETInKByteOverrideValue,
8270
8271 /* Output */
8272 &mode_lib->ms.MaxTotalDETInKByte,
8273 &mode_lib->ms.NomDETInKByte,
8274 &mode_lib->ms.MinCompressedBufferSizeInKByte);
8275
8276 PixelClockAdjustmentForProgressiveToInterlaceUnit(&mode_lib->ms.cache_display_cfg, mode_lib->ms.ip.ptoi_supported);
8277
8278 mode_lib->ms.ReturnBW = dml_get_return_bw_mbps(&mode_lib->ms.soc,
8279 mode_lib->ms.state.use_ideal_dram_bw_strobe,
8280 mode_lib->ms.cache_display_cfg.plane.HostVMEnable,
8281 mode_lib->ms.DCFCLK,
8282 mode_lib->ms.FabricClock,
8283 mode_lib->ms.DRAMSpeed);
8284 dml_print("DML::%s: ReturnBW = %f\n", __func__, mode_lib->ms.ReturnBW);
8285
8286 } // dml_core_mode_support_partial
8287
8288 /// @brief This is the mode programming function. It is assumed the display cfg is support at the given power state
dml_core_mode_programming(struct display_mode_lib_st * mode_lib,const struct dml_clk_cfg_st * clk_cfg)8289 void dml_core_mode_programming(struct display_mode_lib_st *mode_lib, const struct dml_clk_cfg_st *clk_cfg)
8290 {
8291 struct dml_core_mode_programming_locals_st *s = &mode_lib->scratch.dml_core_mode_programming_locals;
8292 struct CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params_st *CalculateWatermarks_params = &mode_lib->scratch.CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params;
8293 struct CalculateVMRowAndSwath_params_st *CalculateVMRowAndSwath_params = &mode_lib->scratch.CalculateVMRowAndSwath_params;
8294 struct CalculateSwathAndDETConfiguration_params_st *CalculateSwathAndDETConfiguration_params = &mode_lib->scratch.CalculateSwathAndDETConfiguration_params;
8295 struct CalculateStutterEfficiency_params_st *CalculateStutterEfficiency_params = &mode_lib->scratch.CalculateStutterEfficiency_params;
8296 struct CalculatePrefetchSchedule_params_st *CalculatePrefetchSchedule_params = &mode_lib->scratch.CalculatePrefetchSchedule_params;
8297
8298 struct mode_program_st *locals = &mode_lib->mp;
8299 struct DmlPipe *myPipe;
8300 dml_uint_t j = 0, k = 0;
8301 dml_float_t TWait;
8302 dml_bool_t isInterlaceTiming;
8303
8304 mode_lib->ms.num_active_planes = dml_get_num_active_planes(&mode_lib->ms.cache_display_cfg);
8305 mode_lib->mp.num_active_pipes = dml_get_num_active_pipes(&mode_lib->ms.cache_display_cfg);
8306 dml_calc_pipe_plane_mapping(&mode_lib->ms.cache_display_cfg.hw, mode_lib->mp.pipe_plane);
8307
8308 #ifdef __DML_VBA_DEBUG__
8309 dml_print("DML::%s: --- START --- \n", __func__);
8310 dml_print("DML::%s: num_active_planes = %u\n", __func__, mode_lib->ms.num_active_planes);
8311 dml_print("DML::%s: num_active_pipes = %u\n", __func__, mode_lib->mp.num_active_pipes);
8312 #endif
8313
8314 s->DSCFormatFactor = 0;
8315
8316 // Unlike dppclk and dispclk which can be calculated in mode_programming
8317 // DCFCLK is calculated in mode_support (which is the state bbox dcfclk or min dcfclk if min dcfclk option is used in mode support calculation)
8318 if (clk_cfg->dcfclk_option != dml_use_override_freq)
8319 locals->Dcfclk = mode_lib->ms.DCFCLK;
8320 else
8321 locals->Dcfclk = clk_cfg->dcfclk_mhz;
8322
8323 #ifdef __DML_VBA_DEBUG__
8324 dml_print_dml_policy(&mode_lib->ms.policy);
8325 dml_print_soc_state_bounding_box(&mode_lib->ms.state);
8326 dml_print_soc_bounding_box(&mode_lib->ms.soc);
8327 dml_print_clk_cfg(clk_cfg);
8328
8329 dml_print("DML::%s: ImmediateFlipSupport = %u\n", __func__, mode_lib->ms.support.ImmediateFlipSupport);
8330 dml_print("DML::%s: Using DCFCLK = %f\n", __func__, locals->Dcfclk);
8331 dml_print("DML::%s: Using SOCCLK = %f\n", __func__, mode_lib->ms.SOCCLK);
8332 #endif
8333
8334 locals->WritebackDISPCLK = 0.0;
8335 locals->GlobalDPPCLK = 0.0;
8336
8337 // DISPCLK and DPPCLK Calculation
8338 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
8339 if (mode_lib->ms.cache_display_cfg.writeback.WritebackEnable[k]) {
8340 locals->WritebackDISPCLK =
8341 dml_max(
8342 locals->WritebackDISPCLK,
8343 CalculateWriteBackDISPCLK(
8344 mode_lib->ms.cache_display_cfg.writeback.WritebackPixelFormat[k],
8345 mode_lib->ms.cache_display_cfg.timing.PixelClock[k],
8346 mode_lib->ms.cache_display_cfg.writeback.WritebackHRatio[k],
8347 mode_lib->ms.cache_display_cfg.writeback.WritebackVRatio[k],
8348 mode_lib->ms.cache_display_cfg.writeback.WritebackHTaps[k],
8349 mode_lib->ms.cache_display_cfg.writeback.WritebackVTaps[k],
8350 mode_lib->ms.cache_display_cfg.writeback.WritebackSourceWidth[k],
8351 mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationWidth[k],
8352 mode_lib->ms.cache_display_cfg.timing.HTotal[k],
8353 mode_lib->ms.ip.writeback_line_buffer_buffer_size,
8354 mode_lib->ms.soc.dispclk_dppclk_vco_speed_mhz));
8355 }
8356 }
8357
8358 locals->Dispclk_calculated = locals->WritebackDISPCLK;
8359
8360 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
8361 if (mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] == k) {
8362 locals->Dispclk_calculated = dml_max(locals->Dispclk_calculated, CalculateRequiredDispclk(
8363 mode_lib->ms.cache_display_cfg.hw.ODMMode[k],
8364 mode_lib->ms.cache_display_cfg.timing.PixelClock[k],
8365 mode_lib->ms.soc.dcn_downspread_percent,
8366 mode_lib->ms.ip.dispclk_ramp_margin_percent,
8367 mode_lib->ms.soc.dispclk_dppclk_vco_speed_mhz,
8368 mode_lib->ms.max_state.dispclk_mhz));
8369 }
8370 }
8371 if (clk_cfg->dispclk_option == dml_use_required_freq)
8372 locals->Dispclk = locals->Dispclk_calculated;
8373 else if (clk_cfg->dispclk_option == dml_use_override_freq)
8374 locals->Dispclk = clk_cfg->dispclk_mhz;
8375 else
8376 locals->Dispclk = mode_lib->ms.state.dispclk_mhz;
8377 #ifdef __DML_VBA_DEBUG__
8378 dml_print("DML::%s: Using Dispclk = %f\n", __func__, locals->Dispclk);
8379 #endif
8380
8381 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
8382 CalculateSinglePipeDPPCLKAndSCLThroughput(
8383 mode_lib->ms.cache_display_cfg.plane.HRatio[k],
8384 mode_lib->ms.cache_display_cfg.plane.HRatioChroma[k],
8385 mode_lib->ms.cache_display_cfg.plane.VRatio[k],
8386 mode_lib->ms.cache_display_cfg.plane.VRatioChroma[k],
8387 mode_lib->ms.ip.max_dchub_pscl_bw_pix_per_clk,
8388 mode_lib->ms.ip.max_pscl_lb_bw_pix_per_clk,
8389 mode_lib->ms.cache_display_cfg.timing.PixelClock[k],
8390 mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k],
8391 mode_lib->ms.cache_display_cfg.plane.HTaps[k],
8392 mode_lib->ms.cache_display_cfg.plane.HTapsChroma[k],
8393 mode_lib->ms.cache_display_cfg.plane.VTaps[k],
8394 mode_lib->ms.cache_display_cfg.plane.VTapsChroma[k],
8395
8396 /* Output */
8397 &locals->PSCL_THROUGHPUT[k],
8398 &locals->PSCL_THROUGHPUT_CHROMA[k],
8399 &locals->DPPCLKUsingSingleDPP[k]);
8400 }
8401
8402 CalculateDPPCLK(mode_lib->ms.num_active_planes,
8403 mode_lib->ms.soc.dcn_downspread_percent,
8404 mode_lib->ms.soc.dispclk_dppclk_vco_speed_mhz,
8405 locals->DPPCLKUsingSingleDPP,
8406 mode_lib->ms.cache_display_cfg.hw.DPPPerSurface,
8407 /* Output */
8408 &locals->GlobalDPPCLK,
8409 locals->Dppclk_calculated);
8410
8411 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
8412 if (clk_cfg->dppclk_option[k] == dml_use_required_freq)
8413 locals->Dppclk[k] = locals->Dppclk_calculated[k];
8414 else if (clk_cfg->dppclk_option[k] == dml_use_override_freq)
8415 locals->Dppclk[k] = clk_cfg->dppclk_mhz[k];
8416 else
8417 locals->Dppclk[k] = mode_lib->ms.state.dppclk_mhz;
8418 #ifdef __DML_VBA_DEBUG__
8419 dml_print("DML::%s: Using Dppclk[%0d] = %f\n", __func__, k, locals->Dppclk[k]);
8420 #endif
8421 }
8422
8423 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
8424 CalculateBytePerPixelAndBlockSizes(
8425 mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k],
8426 mode_lib->ms.cache_display_cfg.surface.SurfaceTiling[k],
8427
8428 /* Output */
8429 &locals->BytePerPixelY[k],
8430 &locals->BytePerPixelC[k],
8431 &locals->BytePerPixelDETY[k],
8432 &locals->BytePerPixelDETC[k],
8433 &locals->BlockHeight256BytesY[k],
8434 &locals->BlockHeight256BytesC[k],
8435 &locals->BlockWidth256BytesY[k],
8436 &locals->BlockWidth256BytesC[k],
8437 &locals->BlockHeightY[k],
8438 &locals->BlockHeightC[k],
8439 &locals->BlockWidthY[k],
8440 &locals->BlockWidthC[k]);
8441 }
8442
8443
8444 dml_print("DML::%s: %u\n", __func__, __LINE__);
8445 CalculateSwathWidth(
8446 false, // ForceSingleDPP
8447 mode_lib->ms.num_active_planes,
8448 mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat,
8449 mode_lib->ms.cache_display_cfg.plane.SourceScan,
8450 mode_lib->ms.cache_display_cfg.plane.ViewportStationary,
8451 mode_lib->ms.cache_display_cfg.plane.ViewportWidth,
8452 mode_lib->ms.cache_display_cfg.plane.ViewportHeight,
8453 mode_lib->ms.cache_display_cfg.plane.ViewportXStart,
8454 mode_lib->ms.cache_display_cfg.plane.ViewportYStart,
8455 mode_lib->ms.cache_display_cfg.plane.ViewportXStartC,
8456 mode_lib->ms.cache_display_cfg.plane.ViewportYStartC,
8457 mode_lib->ms.cache_display_cfg.surface.SurfaceWidthY,
8458 mode_lib->ms.cache_display_cfg.surface.SurfaceWidthC,
8459 mode_lib->ms.cache_display_cfg.surface.SurfaceHeightY,
8460 mode_lib->ms.cache_display_cfg.surface.SurfaceHeightC,
8461 mode_lib->ms.cache_display_cfg.hw.ODMMode,
8462 locals->BytePerPixelY,
8463 locals->BytePerPixelC,
8464 locals->BlockHeight256BytesY,
8465 locals->BlockHeight256BytesC,
8466 locals->BlockWidth256BytesY,
8467 locals->BlockWidth256BytesC,
8468 mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming,
8469 mode_lib->ms.cache_display_cfg.timing.HActive,
8470 mode_lib->ms.cache_display_cfg.plane.HRatio,
8471 mode_lib->ms.cache_display_cfg.hw.DPPPerSurface,
8472
8473 /* Output */
8474 locals->SwathWidthSingleDPPY,
8475 locals->SwathWidthSingleDPPC,
8476 locals->SwathWidthY,
8477 locals->SwathWidthC,
8478 s->dummy_integer_array[0], // dml_uint_t MaximumSwathHeightY[]
8479 s->dummy_integer_array[1], // dml_uint_t MaximumSwathHeightC[]
8480 locals->swath_width_luma_ub,
8481 locals->swath_width_chroma_ub);
8482
8483 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
8484 locals->ReadBandwidthSurfaceLuma[k] = locals->SwathWidthSingleDPPY[k] * locals->BytePerPixelY[k] / (mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k]) * mode_lib->ms.cache_display_cfg.plane.VRatio[k];
8485 locals->ReadBandwidthSurfaceChroma[k] = locals->SwathWidthSingleDPPC[k] * locals->BytePerPixelC[k] / (mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k]) * mode_lib->ms.cache_display_cfg.plane.VRatioChroma[k];
8486 dml_print("DML::%s: ReadBandwidthSurfaceLuma[%i] = %fBps\n", __func__, k, locals->ReadBandwidthSurfaceLuma[k]);
8487 dml_print("DML::%s: ReadBandwidthSurfaceChroma[%i] = %fBps\n", __func__, k, locals->ReadBandwidthSurfaceChroma[k]);
8488 }
8489
8490 CalculateSwathAndDETConfiguration_params->DETSizeOverride = mode_lib->ms.cache_display_cfg.plane.DETSizeOverride;
8491 CalculateSwathAndDETConfiguration_params->UseMALLForPStateChange = mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange;
8492 CalculateSwathAndDETConfiguration_params->ConfigReturnBufferSizeInKByte = mode_lib->ms.ip.config_return_buffer_size_in_kbytes;
8493 CalculateSwathAndDETConfiguration_params->ROBBufferSizeInKByte = mode_lib->ms.ip.rob_buffer_size_kbytes;
8494 CalculateSwathAndDETConfiguration_params->MaxTotalDETInKByte = mode_lib->ms.MaxTotalDETInKByte;
8495 CalculateSwathAndDETConfiguration_params->MinCompressedBufferSizeInKByte = mode_lib->ms.MinCompressedBufferSizeInKByte;
8496 CalculateSwathAndDETConfiguration_params->PixelChunkSizeInKByte = mode_lib->ms.ip.pixel_chunk_size_kbytes;
8497 CalculateSwathAndDETConfiguration_params->ForceSingleDPP = false;
8498 CalculateSwathAndDETConfiguration_params->NumberOfActiveSurfaces = mode_lib->ms.num_active_planes;
8499 CalculateSwathAndDETConfiguration_params->nomDETInKByte = mode_lib->ms.NomDETInKByte;
8500 CalculateSwathAndDETConfiguration_params->UseUnboundedRequestingFinal = mode_lib->ms.policy.UseUnboundedRequesting;
8501 CalculateSwathAndDETConfiguration_params->ConfigReturnBufferSegmentSizeInkByte = mode_lib->ms.ip.config_return_buffer_segment_size_in_kbytes;
8502 CalculateSwathAndDETConfiguration_params->CompressedBufferSegmentSizeInkByteFinal = mode_lib->ms.ip.compressed_buffer_segment_size_in_kbytes;
8503 CalculateSwathAndDETConfiguration_params->Output = s->dummy_output_encoder_array;
8504 CalculateSwathAndDETConfiguration_params->ReadBandwidthLuma = locals->ReadBandwidthSurfaceLuma;
8505 CalculateSwathAndDETConfiguration_params->ReadBandwidthChroma = locals->ReadBandwidthSurfaceChroma;
8506 CalculateSwathAndDETConfiguration_params->MaximumSwathWidthLuma = s->dummy_single_array[0];
8507 CalculateSwathAndDETConfiguration_params->MaximumSwathWidthChroma = s->dummy_single_array[1];
8508 CalculateSwathAndDETConfiguration_params->SourceScan = mode_lib->ms.cache_display_cfg.plane.SourceScan;
8509 CalculateSwathAndDETConfiguration_params->ViewportStationary = mode_lib->ms.cache_display_cfg.plane.ViewportStationary;
8510 CalculateSwathAndDETConfiguration_params->SourcePixelFormat = mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat;
8511 CalculateSwathAndDETConfiguration_params->SurfaceTiling = mode_lib->ms.cache_display_cfg.surface.SurfaceTiling;
8512 CalculateSwathAndDETConfiguration_params->ViewportWidth = mode_lib->ms.cache_display_cfg.plane.ViewportWidth;
8513 CalculateSwathAndDETConfiguration_params->ViewportHeight = mode_lib->ms.cache_display_cfg.plane.ViewportHeight;
8514 CalculateSwathAndDETConfiguration_params->ViewportXStart = mode_lib->ms.cache_display_cfg.plane.ViewportXStart;
8515 CalculateSwathAndDETConfiguration_params->ViewportYStart = mode_lib->ms.cache_display_cfg.plane.ViewportYStart;
8516 CalculateSwathAndDETConfiguration_params->ViewportXStartC = mode_lib->ms.cache_display_cfg.plane.ViewportXStartC;
8517 CalculateSwathAndDETConfiguration_params->ViewportYStartC = mode_lib->ms.cache_display_cfg.plane.ViewportYStartC;
8518 CalculateSwathAndDETConfiguration_params->SurfaceWidthY = mode_lib->ms.cache_display_cfg.surface.SurfaceWidthY;
8519 CalculateSwathAndDETConfiguration_params->SurfaceWidthC = mode_lib->ms.cache_display_cfg.surface.SurfaceWidthC;
8520 CalculateSwathAndDETConfiguration_params->SurfaceHeightY = mode_lib->ms.cache_display_cfg.surface.SurfaceHeightY;
8521 CalculateSwathAndDETConfiguration_params->SurfaceHeightC = mode_lib->ms.cache_display_cfg.surface.SurfaceHeightC;
8522 CalculateSwathAndDETConfiguration_params->Read256BytesBlockHeightY = locals->BlockHeight256BytesY;
8523 CalculateSwathAndDETConfiguration_params->Read256BytesBlockHeightC = locals->BlockHeight256BytesC;
8524 CalculateSwathAndDETConfiguration_params->Read256BytesBlockWidthY = locals->BlockWidth256BytesY;
8525 CalculateSwathAndDETConfiguration_params->Read256BytesBlockWidthC = locals->BlockWidth256BytesC;
8526 CalculateSwathAndDETConfiguration_params->ODMMode = mode_lib->ms.cache_display_cfg.hw.ODMMode;
8527 CalculateSwathAndDETConfiguration_params->BlendingAndTiming = mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming;
8528 CalculateSwathAndDETConfiguration_params->BytePerPixY = locals->BytePerPixelY;
8529 CalculateSwathAndDETConfiguration_params->BytePerPixC = locals->BytePerPixelC;
8530 CalculateSwathAndDETConfiguration_params->BytePerPixDETY = locals->BytePerPixelDETY;
8531 CalculateSwathAndDETConfiguration_params->BytePerPixDETC = locals->BytePerPixelDETC;
8532 CalculateSwathAndDETConfiguration_params->HActive = mode_lib->ms.cache_display_cfg.timing.HActive;
8533 CalculateSwathAndDETConfiguration_params->HRatio = mode_lib->ms.cache_display_cfg.plane.HRatio;
8534 CalculateSwathAndDETConfiguration_params->HRatioChroma = mode_lib->ms.cache_display_cfg.plane.HRatioChroma;
8535 CalculateSwathAndDETConfiguration_params->DPPPerSurface = mode_lib->ms.cache_display_cfg.hw.DPPPerSurface;
8536 CalculateSwathAndDETConfiguration_params->swath_width_luma_ub = s->dummy_long_array[0];
8537 CalculateSwathAndDETConfiguration_params->swath_width_chroma_ub = s->dummy_long_array[1];
8538 CalculateSwathAndDETConfiguration_params->SwathWidth = s->dummy_long_array[2];
8539 CalculateSwathAndDETConfiguration_params->SwathWidthChroma = s->dummy_long_array[3];
8540 CalculateSwathAndDETConfiguration_params->SwathHeightY = locals->SwathHeightY;
8541 CalculateSwathAndDETConfiguration_params->SwathHeightC = locals->SwathHeightC;
8542 CalculateSwathAndDETConfiguration_params->DETBufferSizeInKByte = locals->DETBufferSizeInKByte;
8543 CalculateSwathAndDETConfiguration_params->DETBufferSizeY = locals->DETBufferSizeY;
8544 CalculateSwathAndDETConfiguration_params->DETBufferSizeC = locals->DETBufferSizeC;
8545 CalculateSwathAndDETConfiguration_params->UnboundedRequestEnabled = &locals->UnboundedRequestEnabled;
8546 CalculateSwathAndDETConfiguration_params->compbuf_reserved_space_64b = &locals->compbuf_reserved_space_64b;
8547 CalculateSwathAndDETConfiguration_params->compbuf_reserved_space_zs = &locals->compbuf_reserved_space_zs;
8548 CalculateSwathAndDETConfiguration_params->CompressedBufferSizeInkByte = &locals->CompressedBufferSizeInkByte;
8549 CalculateSwathAndDETConfiguration_params->ViewportSizeSupportPerSurface = &s->dummy_boolean_array[0][0];
8550 CalculateSwathAndDETConfiguration_params->ViewportSizeSupport = &s->dummy_boolean[0];
8551
8552 // VBA_DELTA
8553 // Calculate DET size, swath height here. In VBA, they are calculated in mode check stage
8554 CalculateSwathAndDETConfiguration(&mode_lib->scratch,
8555 CalculateSwathAndDETConfiguration_params);
8556
8557 // DCFCLK Deep Sleep
8558 CalculateDCFCLKDeepSleep(
8559 mode_lib->ms.num_active_planes,
8560 locals->BytePerPixelY,
8561 locals->BytePerPixelC,
8562 mode_lib->ms.cache_display_cfg.plane.VRatio,
8563 mode_lib->ms.cache_display_cfg.plane.VRatioChroma,
8564 locals->SwathWidthY,
8565 locals->SwathWidthC,
8566 mode_lib->ms.cache_display_cfg.hw.DPPPerSurface,
8567 mode_lib->ms.cache_display_cfg.plane.HRatio,
8568 mode_lib->ms.cache_display_cfg.plane.HRatioChroma,
8569 mode_lib->ms.cache_display_cfg.timing.PixelClock,
8570 locals->PSCL_THROUGHPUT,
8571 locals->PSCL_THROUGHPUT_CHROMA,
8572 locals->Dppclk,
8573 locals->ReadBandwidthSurfaceLuma,
8574 locals->ReadBandwidthSurfaceChroma,
8575 mode_lib->ms.soc.return_bus_width_bytes,
8576
8577 /* Output */
8578 &locals->DCFCLKDeepSleep);
8579
8580 // DSCCLK
8581 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
8582 if ((mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] != k) || !mode_lib->ms.cache_display_cfg.hw.DSCEnabled[k]) {
8583 locals->DSCCLK_calculated[k] = 0.0;
8584 } else {
8585 if (mode_lib->ms.cache_display_cfg.output.OutputFormat[k] == dml_420)
8586 s->DSCFormatFactor = 2;
8587 else if (mode_lib->ms.cache_display_cfg.output.OutputFormat[k] == dml_444)
8588 s->DSCFormatFactor = 1;
8589 else if (mode_lib->ms.cache_display_cfg.output.OutputFormat[k] == dml_n422 || mode_lib->ms.cache_display_cfg.output.OutputEncoder[k] == dml_hdmifrl)
8590 s->DSCFormatFactor = 2;
8591 else
8592 s->DSCFormatFactor = 1;
8593 if (mode_lib->ms.cache_display_cfg.hw.ODMMode[k] == dml_odm_mode_combine_4to1)
8594 locals->DSCCLK_calculated[k] = mode_lib->ms.cache_display_cfg.output.PixelClockBackEnd[k] / 12 / s->DSCFormatFactor / (1 - mode_lib->ms.soc.dcn_downspread_percent / 100);
8595 else if (mode_lib->ms.cache_display_cfg.hw.ODMMode[k] == dml_odm_mode_combine_2to1)
8596 locals->DSCCLK_calculated[k] = mode_lib->ms.cache_display_cfg.output.PixelClockBackEnd[k] / 6 / s->DSCFormatFactor / (1 - mode_lib->ms.soc.dcn_downspread_percent / 100);
8597 else
8598 locals->DSCCLK_calculated[k] = mode_lib->ms.cache_display_cfg.output.PixelClockBackEnd[k] / 3 / s->DSCFormatFactor / (1 - mode_lib->ms.soc.dcn_downspread_percent / 100);
8599 }
8600 }
8601
8602 // DSC Delay
8603 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
8604 locals->DSCDelay[k] = DSCDelayRequirement(mode_lib->ms.cache_display_cfg.hw.DSCEnabled[k],
8605 mode_lib->ms.cache_display_cfg.hw.ODMMode[k],
8606 mode_lib->ms.cache_display_cfg.output.DSCInputBitPerComponent[k],
8607 mode_lib->ms.cache_display_cfg.output.OutputBpp[k],
8608 mode_lib->ms.cache_display_cfg.timing.HActive[k],
8609 mode_lib->ms.cache_display_cfg.timing.HTotal[k],
8610 mode_lib->ms.cache_display_cfg.hw.NumberOfDSCSlices[k],
8611 mode_lib->ms.cache_display_cfg.output.OutputFormat[k],
8612 mode_lib->ms.cache_display_cfg.output.OutputEncoder[k],
8613 mode_lib->ms.cache_display_cfg.timing.PixelClock[k],
8614 mode_lib->ms.cache_display_cfg.output.PixelClockBackEnd[k]);
8615 }
8616
8617 for (k = 0; k < mode_lib->ms.num_active_planes; ++k)
8618 for (j = 0; j < mode_lib->ms.num_active_planes; ++j) // NumberOfSurfaces
8619 if (j != k && mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] == j && mode_lib->ms.cache_display_cfg.hw.DSCEnabled[j])
8620 locals->DSCDelay[k] = locals->DSCDelay[j];
8621
8622 // Prefetch
8623 CalculateSurfaceSizeInMall(
8624 mode_lib->ms.num_active_planes,
8625 mode_lib->ms.soc.mall_allocated_for_dcn_mbytes,
8626 mode_lib->ms.cache_display_cfg.plane.UseMALLForStaticScreen,
8627 mode_lib->ms.cache_display_cfg.surface.DCCEnable,
8628 mode_lib->ms.cache_display_cfg.plane.ViewportStationary,
8629 mode_lib->ms.cache_display_cfg.plane.ViewportXStart,
8630 mode_lib->ms.cache_display_cfg.plane.ViewportYStart,
8631 mode_lib->ms.cache_display_cfg.plane.ViewportXStartC,
8632 mode_lib->ms.cache_display_cfg.plane.ViewportYStartC,
8633 mode_lib->ms.cache_display_cfg.plane.ViewportWidth,
8634 mode_lib->ms.cache_display_cfg.plane.ViewportHeight,
8635 locals->BytePerPixelY,
8636 mode_lib->ms.cache_display_cfg.plane.ViewportWidthChroma,
8637 mode_lib->ms.cache_display_cfg.plane.ViewportHeightChroma,
8638 locals->BytePerPixelC,
8639 mode_lib->ms.cache_display_cfg.surface.SurfaceWidthY,
8640 mode_lib->ms.cache_display_cfg.surface.SurfaceWidthC,
8641 mode_lib->ms.cache_display_cfg.surface.SurfaceHeightY,
8642 mode_lib->ms.cache_display_cfg.surface.SurfaceHeightC,
8643 locals->BlockWidth256BytesY,
8644 locals->BlockWidth256BytesC,
8645 locals->BlockHeight256BytesY,
8646 locals->BlockHeight256BytesC,
8647 locals->BlockWidthY,
8648 locals->BlockWidthC,
8649 locals->BlockHeightY,
8650 locals->BlockHeightC,
8651
8652 /* Output */
8653 locals->SurfaceSizeInTheMALL,
8654 &s->dummy_boolean[0]); /* dml_bool_t *ExceededMALLSize */
8655
8656 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
8657 s->SurfaceParameters[k].PixelClock = mode_lib->ms.cache_display_cfg.timing.PixelClock[k];
8658 s->SurfaceParameters[k].DPPPerSurface = mode_lib->ms.cache_display_cfg.hw.DPPPerSurface[k];
8659 s->SurfaceParameters[k].SourceScan = mode_lib->ms.cache_display_cfg.plane.SourceScan[k];
8660 s->SurfaceParameters[k].ViewportHeight = mode_lib->ms.cache_display_cfg.plane.ViewportHeight[k];
8661 s->SurfaceParameters[k].ViewportHeightChroma = mode_lib->ms.cache_display_cfg.plane.ViewportHeightChroma[k];
8662 s->SurfaceParameters[k].BlockWidth256BytesY = locals->BlockWidth256BytesY[k];
8663 s->SurfaceParameters[k].BlockHeight256BytesY = locals->BlockHeight256BytesY[k];
8664 s->SurfaceParameters[k].BlockWidth256BytesC = locals->BlockWidth256BytesC[k];
8665 s->SurfaceParameters[k].BlockHeight256BytesC = locals->BlockHeight256BytesC[k];
8666 s->SurfaceParameters[k].BlockWidthY = locals->BlockWidthY[k];
8667 s->SurfaceParameters[k].BlockHeightY = locals->BlockHeightY[k];
8668 s->SurfaceParameters[k].BlockWidthC = locals->BlockWidthC[k];
8669 s->SurfaceParameters[k].BlockHeightC = locals->BlockHeightC[k];
8670 s->SurfaceParameters[k].InterlaceEnable = mode_lib->ms.cache_display_cfg.timing.Interlace[k];
8671 s->SurfaceParameters[k].HTotal = mode_lib->ms.cache_display_cfg.timing.HTotal[k];
8672 s->SurfaceParameters[k].DCCEnable = mode_lib->ms.cache_display_cfg.surface.DCCEnable[k];
8673 s->SurfaceParameters[k].SourcePixelFormat = mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k];
8674 s->SurfaceParameters[k].SurfaceTiling = mode_lib->ms.cache_display_cfg.surface.SurfaceTiling[k];
8675 s->SurfaceParameters[k].BytePerPixelY = locals->BytePerPixelY[k];
8676 s->SurfaceParameters[k].BytePerPixelC = locals->BytePerPixelC[k];
8677 s->SurfaceParameters[k].ProgressiveToInterlaceUnitInOPP = mode_lib->ms.ip.ptoi_supported;
8678 s->SurfaceParameters[k].VRatio = mode_lib->ms.cache_display_cfg.plane.VRatio[k];
8679 s->SurfaceParameters[k].VRatioChroma = mode_lib->ms.cache_display_cfg.plane.VRatioChroma[k];
8680 s->SurfaceParameters[k].VTaps = mode_lib->ms.cache_display_cfg.plane.VTaps[k];
8681 s->SurfaceParameters[k].VTapsChroma = mode_lib->ms.cache_display_cfg.plane.VTapsChroma[k];
8682 s->SurfaceParameters[k].PitchY = mode_lib->ms.cache_display_cfg.surface.PitchY[k];
8683 s->SurfaceParameters[k].DCCMetaPitchY = mode_lib->ms.cache_display_cfg.surface.DCCMetaPitchY[k];
8684 s->SurfaceParameters[k].PitchC = mode_lib->ms.cache_display_cfg.surface.PitchC[k];
8685 s->SurfaceParameters[k].DCCMetaPitchC = mode_lib->ms.cache_display_cfg.surface.DCCMetaPitchC[k];
8686 s->SurfaceParameters[k].ViewportStationary = mode_lib->ms.cache_display_cfg.plane.ViewportStationary[k];
8687 s->SurfaceParameters[k].ViewportXStart = mode_lib->ms.cache_display_cfg.plane.ViewportXStart[k];
8688 s->SurfaceParameters[k].ViewportYStart = mode_lib->ms.cache_display_cfg.plane.ViewportYStart[k];
8689 s->SurfaceParameters[k].ViewportXStartC = mode_lib->ms.cache_display_cfg.plane.ViewportXStartC[k];
8690 s->SurfaceParameters[k].ViewportYStartC = mode_lib->ms.cache_display_cfg.plane.ViewportYStartC[k];
8691 s->SurfaceParameters[k].FORCE_ONE_ROW_FOR_FRAME = mode_lib->ms.cache_display_cfg.plane.ForceOneRowForFrame[k];
8692 s->SurfaceParameters[k].SwathHeightY = locals->SwathHeightY[k];
8693 s->SurfaceParameters[k].SwathHeightC = locals->SwathHeightC[k];
8694 }
8695
8696 CalculateVMRowAndSwath_params->NumberOfActiveSurfaces = mode_lib->ms.num_active_planes;
8697 CalculateVMRowAndSwath_params->myPipe = s->SurfaceParameters;
8698 CalculateVMRowAndSwath_params->SurfaceSizeInMALL = locals->SurfaceSizeInTheMALL;
8699 CalculateVMRowAndSwath_params->PTEBufferSizeInRequestsLuma = mode_lib->ms.ip.dpte_buffer_size_in_pte_reqs_luma;
8700 CalculateVMRowAndSwath_params->PTEBufferSizeInRequestsChroma = mode_lib->ms.ip.dpte_buffer_size_in_pte_reqs_chroma;
8701 CalculateVMRowAndSwath_params->DCCMetaBufferSizeBytes = mode_lib->ms.ip.dcc_meta_buffer_size_bytes;
8702 CalculateVMRowAndSwath_params->UseMALLForStaticScreen = mode_lib->ms.cache_display_cfg.plane.UseMALLForStaticScreen;
8703 CalculateVMRowAndSwath_params->UseMALLForPStateChange = mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange;
8704 CalculateVMRowAndSwath_params->MALLAllocatedForDCN = mode_lib->ms.soc.mall_allocated_for_dcn_mbytes;
8705 CalculateVMRowAndSwath_params->SwathWidthY = locals->SwathWidthY;
8706 CalculateVMRowAndSwath_params->SwathWidthC = locals->SwathWidthC;
8707 CalculateVMRowAndSwath_params->GPUVMEnable = mode_lib->ms.cache_display_cfg.plane.GPUVMEnable;
8708 CalculateVMRowAndSwath_params->HostVMEnable = mode_lib->ms.cache_display_cfg.plane.HostVMEnable;
8709 CalculateVMRowAndSwath_params->HostVMMaxNonCachedPageTableLevels = mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels;
8710 CalculateVMRowAndSwath_params->GPUVMMaxPageTableLevels = mode_lib->ms.cache_display_cfg.plane.GPUVMMaxPageTableLevels;
8711 CalculateVMRowAndSwath_params->GPUVMMinPageSizeKBytes = mode_lib->ms.cache_display_cfg.plane.GPUVMMinPageSizeKBytes;
8712 CalculateVMRowAndSwath_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024;
8713 CalculateVMRowAndSwath_params->PTEBufferModeOverrideEn = mode_lib->ms.cache_display_cfg.plane.PTEBufferModeOverrideEn;
8714 CalculateVMRowAndSwath_params->PTEBufferModeOverrideVal = mode_lib->ms.cache_display_cfg.plane.PTEBufferMode;
8715 CalculateVMRowAndSwath_params->PTEBufferSizeNotExceeded = s->dummy_boolean_array[0];
8716 CalculateVMRowAndSwath_params->DCCMetaBufferSizeNotExceeded = s->dummy_boolean_array[1];
8717 CalculateVMRowAndSwath_params->dpte_row_width_luma_ub = locals->dpte_row_width_luma_ub;
8718 CalculateVMRowAndSwath_params->dpte_row_width_chroma_ub = locals->dpte_row_width_chroma_ub;
8719 CalculateVMRowAndSwath_params->dpte_row_height_luma = locals->dpte_row_height;
8720 CalculateVMRowAndSwath_params->dpte_row_height_chroma = locals->dpte_row_height_chroma;
8721 CalculateVMRowAndSwath_params->dpte_row_height_linear_luma = locals->dpte_row_height_linear;
8722 CalculateVMRowAndSwath_params->dpte_row_height_linear_chroma = locals->dpte_row_height_linear_chroma;
8723 CalculateVMRowAndSwath_params->meta_req_width = locals->meta_req_width;
8724 CalculateVMRowAndSwath_params->meta_req_width_chroma = locals->meta_req_width_chroma;
8725 CalculateVMRowAndSwath_params->meta_req_height = locals->meta_req_height;
8726 CalculateVMRowAndSwath_params->meta_req_height_chroma = locals->meta_req_height_chroma;
8727 CalculateVMRowAndSwath_params->meta_row_width = locals->meta_row_width;
8728 CalculateVMRowAndSwath_params->meta_row_width_chroma = locals->meta_row_width_chroma;
8729 CalculateVMRowAndSwath_params->meta_row_height = locals->meta_row_height;
8730 CalculateVMRowAndSwath_params->meta_row_height_chroma = locals->meta_row_height_chroma;
8731 CalculateVMRowAndSwath_params->vm_group_bytes = locals->vm_group_bytes;
8732 CalculateVMRowAndSwath_params->dpte_group_bytes = locals->dpte_group_bytes;
8733 CalculateVMRowAndSwath_params->PixelPTEReqWidthY = locals->PixelPTEReqWidthY;
8734 CalculateVMRowAndSwath_params->PixelPTEReqHeightY = locals->PixelPTEReqHeightY;
8735 CalculateVMRowAndSwath_params->PTERequestSizeY = locals->PTERequestSizeY;
8736 CalculateVMRowAndSwath_params->PixelPTEReqWidthC = locals->PixelPTEReqWidthC;
8737 CalculateVMRowAndSwath_params->PixelPTEReqHeightC = locals->PixelPTEReqHeightC;
8738 CalculateVMRowAndSwath_params->PTERequestSizeC = locals->PTERequestSizeC;
8739 CalculateVMRowAndSwath_params->dpde0_bytes_per_frame_ub_l = locals->dpde0_bytes_per_frame_ub_l;
8740 CalculateVMRowAndSwath_params->meta_pte_bytes_per_frame_ub_l = locals->meta_pte_bytes_per_frame_ub_l;
8741 CalculateVMRowAndSwath_params->dpde0_bytes_per_frame_ub_c = locals->dpde0_bytes_per_frame_ub_c;
8742 CalculateVMRowAndSwath_params->meta_pte_bytes_per_frame_ub_c = locals->meta_pte_bytes_per_frame_ub_c;
8743 CalculateVMRowAndSwath_params->PrefetchSourceLinesY = locals->PrefetchSourceLinesY;
8744 CalculateVMRowAndSwath_params->PrefetchSourceLinesC = locals->PrefetchSourceLinesC;
8745 CalculateVMRowAndSwath_params->VInitPreFillY = locals->VInitPreFillY;
8746 CalculateVMRowAndSwath_params->VInitPreFillC = locals->VInitPreFillC;
8747 CalculateVMRowAndSwath_params->MaxNumSwathY = locals->MaxNumSwathY;
8748 CalculateVMRowAndSwath_params->MaxNumSwathC = locals->MaxNumSwathC;
8749 CalculateVMRowAndSwath_params->meta_row_bw = locals->meta_row_bw;
8750 CalculateVMRowAndSwath_params->dpte_row_bw = locals->dpte_row_bw;
8751 CalculateVMRowAndSwath_params->PixelPTEBytesPerRow = locals->PixelPTEBytesPerRow;
8752 CalculateVMRowAndSwath_params->PDEAndMetaPTEBytesFrame = locals->PDEAndMetaPTEBytesFrame;
8753 CalculateVMRowAndSwath_params->MetaRowByte = locals->MetaRowByte;
8754 CalculateVMRowAndSwath_params->use_one_row_for_frame = locals->use_one_row_for_frame;
8755 CalculateVMRowAndSwath_params->use_one_row_for_frame_flip = locals->use_one_row_for_frame_flip;
8756 CalculateVMRowAndSwath_params->UsesMALLForStaticScreen = locals->UsesMALLForStaticScreen;
8757 CalculateVMRowAndSwath_params->PTE_BUFFER_MODE = locals->PTE_BUFFER_MODE;
8758 CalculateVMRowAndSwath_params->BIGK_FRAGMENT_SIZE = locals->BIGK_FRAGMENT_SIZE;
8759
8760 CalculateVMRowAndSwath(&mode_lib->scratch,
8761 CalculateVMRowAndSwath_params);
8762
8763 s->ReorderBytes = (dml_uint_t)(mode_lib->ms.soc.num_chans * dml_max3(
8764 mode_lib->ms.soc.urgent_out_of_order_return_per_channel_pixel_only_bytes,
8765 mode_lib->ms.soc.urgent_out_of_order_return_per_channel_pixel_and_vm_bytes,
8766 mode_lib->ms.soc.urgent_out_of_order_return_per_channel_vm_only_bytes));
8767
8768 s->VMDataOnlyReturnBW = dml_get_return_bw_mbps_vm_only(&mode_lib->ms.soc,
8769 mode_lib->ms.state.use_ideal_dram_bw_strobe,
8770 mode_lib->ms.cache_display_cfg.plane.HostVMEnable,
8771 locals->Dcfclk,
8772 mode_lib->ms.FabricClock,
8773 mode_lib->ms.DRAMSpeed);
8774
8775 #ifdef __DML_VBA_DEBUG__
8776 dml_print("DML::%s: locals->Dcfclk = %f\n", __func__, locals->Dcfclk);
8777 dml_print("DML::%s: mode_lib->ms.soc.return_bus_width_bytes = %u\n", __func__, mode_lib->ms.soc.return_bus_width_bytes);
8778 dml_print("DML::%s: mode_lib->ms.FabricClock = %f\n", __func__, mode_lib->ms.FabricClock);
8779 dml_print("DML::%s: mode_lib->ms.soc.fabric_datapath_to_dcn_data_return_bytes = %u\n", __func__, mode_lib->ms.soc.fabric_datapath_to_dcn_data_return_bytes);
8780 dml_print("DML::%s: mode_lib->ms.soc.pct_ideal_sdp_bw_after_urgent = %f\n", __func__, mode_lib->ms.soc.pct_ideal_sdp_bw_after_urgent);
8781 dml_print("DML::%s: mode_lib->ms.DRAMSpeed = %f\n", __func__, mode_lib->ms.DRAMSpeed);
8782 dml_print("DML::%s: mode_lib->ms.soc.num_chans = %u\n", __func__, mode_lib->ms.soc.num_chans);
8783 dml_print("DML::%s: mode_lib->ms.soc.dram_channel_width_bytes = %u\n", __func__, mode_lib->ms.soc.dram_channel_width_bytes);
8784 dml_print("DML::%s: mode_lib->ms.state_idx = %u\n", __func__, mode_lib->ms.state_idx);
8785 dml_print("DML::%s: mode_lib->ms.max_state_idx = %u\n", __func__, mode_lib->ms.max_state_idx);
8786 dml_print("DML::%s: mode_lib->ms.state.use_ideal_dram_bw_strobe = %u\n", __func__, mode_lib->ms.state.use_ideal_dram_bw_strobe);
8787 dml_print("DML::%s: VMDataOnlyReturnBW = %f\n", __func__, s->VMDataOnlyReturnBW);
8788 dml_print("DML::%s: ReturnBW = %f\n", __func__, mode_lib->ms.ReturnBW);
8789 #endif
8790
8791 s->HostVMInefficiencyFactor = 1.0;
8792 if (mode_lib->ms.cache_display_cfg.plane.GPUVMEnable && mode_lib->ms.cache_display_cfg.plane.HostVMEnable)
8793 s->HostVMInefficiencyFactor = mode_lib->ms.ReturnBW / s->VMDataOnlyReturnBW;
8794
8795 s->TotalDCCActiveDPP = 0;
8796 s->TotalActiveDPP = 0;
8797 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
8798 s->TotalActiveDPP = s->TotalActiveDPP + mode_lib->ms.cache_display_cfg.hw.DPPPerSurface[k];
8799 if (mode_lib->ms.cache_display_cfg.surface.DCCEnable[k])
8800 s->TotalDCCActiveDPP = s->TotalDCCActiveDPP + mode_lib->ms.cache_display_cfg.hw.DPPPerSurface[k];
8801 }
8802
8803 locals->UrgentExtraLatency = CalculateExtraLatency(
8804 mode_lib->ms.soc.round_trip_ping_latency_dcfclk_cycles,
8805 s->ReorderBytes,
8806 locals->Dcfclk,
8807 s->TotalActiveDPP,
8808 mode_lib->ms.ip.pixel_chunk_size_kbytes,
8809 s->TotalDCCActiveDPP,
8810 mode_lib->ms.ip.meta_chunk_size_kbytes,
8811 mode_lib->ms.ReturnBW,
8812 mode_lib->ms.cache_display_cfg.plane.GPUVMEnable,
8813 mode_lib->ms.cache_display_cfg.plane.HostVMEnable,
8814 mode_lib->ms.num_active_planes,
8815 mode_lib->ms.cache_display_cfg.hw.DPPPerSurface,
8816 locals->dpte_group_bytes,
8817 s->HostVMInefficiencyFactor,
8818 mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024,
8819 mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels);
8820
8821 locals->TCalc = 24.0 / locals->DCFCLKDeepSleep;
8822
8823 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
8824 if (mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] == k) {
8825 if (mode_lib->ms.cache_display_cfg.writeback.WritebackEnable[k] == true) {
8826 locals->WritebackDelay[k] =
8827 mode_lib->ms.state.writeback_latency_us
8828 + CalculateWriteBackDelay(
8829 mode_lib->ms.cache_display_cfg.writeback.WritebackPixelFormat[k],
8830 mode_lib->ms.cache_display_cfg.writeback.WritebackHRatio[k],
8831 mode_lib->ms.cache_display_cfg.writeback.WritebackVRatio[k],
8832 mode_lib->ms.cache_display_cfg.writeback.WritebackVTaps[k],
8833 mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationWidth[k],
8834 mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationHeight[k],
8835 mode_lib->ms.cache_display_cfg.writeback.WritebackSourceHeight[k],
8836 mode_lib->ms.cache_display_cfg.timing.HTotal[k]) / locals->Dispclk;
8837 } else
8838 locals->WritebackDelay[k] = 0;
8839 for (j = 0; j < mode_lib->ms.num_active_planes; ++j) {
8840 if (mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[j] == k
8841 && mode_lib->ms.cache_display_cfg.writeback.WritebackEnable[j] == true) {
8842 locals->WritebackDelay[k] =
8843 dml_max(
8844 locals->WritebackDelay[k],
8845 mode_lib->ms.state.writeback_latency_us
8846 + CalculateWriteBackDelay(
8847 mode_lib->ms.cache_display_cfg.writeback.WritebackPixelFormat[j],
8848 mode_lib->ms.cache_display_cfg.writeback.WritebackHRatio[j],
8849 mode_lib->ms.cache_display_cfg.writeback.WritebackVRatio[j],
8850 mode_lib->ms.cache_display_cfg.writeback.WritebackVTaps[j],
8851 mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationWidth[j],
8852 mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationHeight[j],
8853 mode_lib->ms.cache_display_cfg.writeback.WritebackSourceHeight[j],
8854 mode_lib->ms.cache_display_cfg.timing.HTotal[k]) / locals->Dispclk);
8855 }
8856 }
8857 }
8858 }
8859
8860 for (k = 0; k < mode_lib->ms.num_active_planes; ++k)
8861 for (j = 0; j < mode_lib->ms.num_active_planes; ++j)
8862 if (mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming[k] == j)
8863 locals->WritebackDelay[k] = locals->WritebackDelay[j];
8864
8865 locals->UrgentLatency = CalculateUrgentLatency(mode_lib->ms.state.urgent_latency_pixel_data_only_us,
8866 mode_lib->ms.state.urgent_latency_pixel_mixed_with_vm_data_us,
8867 mode_lib->ms.state.urgent_latency_vm_data_only_us,
8868 mode_lib->ms.soc.do_urgent_latency_adjustment,
8869 mode_lib->ms.state.urgent_latency_adjustment_fabric_clock_component_us,
8870 mode_lib->ms.state.urgent_latency_adjustment_fabric_clock_reference_mhz,
8871 mode_lib->ms.FabricClock);
8872
8873 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
8874 CalculateUrgentBurstFactor(mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange[k],
8875 locals->swath_width_luma_ub[k],
8876 locals->swath_width_chroma_ub[k],
8877 locals->SwathHeightY[k],
8878 locals->SwathHeightC[k],
8879 mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k],
8880 locals->UrgentLatency,
8881 mode_lib->ms.ip.cursor_buffer_size,
8882 mode_lib->ms.cache_display_cfg.plane.CursorWidth[k],
8883 mode_lib->ms.cache_display_cfg.plane.CursorBPP[k],
8884 mode_lib->ms.cache_display_cfg.plane.VRatio[k],
8885 mode_lib->ms.cache_display_cfg.plane.VRatioChroma[k],
8886 locals->BytePerPixelDETY[k],
8887 locals->BytePerPixelDETC[k],
8888 locals->DETBufferSizeY[k],
8889 locals->DETBufferSizeC[k],
8890
8891 /* output */
8892 &locals->UrgBurstFactorCursor[k],
8893 &locals->UrgBurstFactorLuma[k],
8894 &locals->UrgBurstFactorChroma[k],
8895 &locals->NoUrgentLatencyHiding[k]);
8896
8897 locals->cursor_bw[k] = mode_lib->ms.cache_display_cfg.plane.NumberOfCursors[k] * mode_lib->ms.cache_display_cfg.plane.CursorWidth[k] * mode_lib->ms.cache_display_cfg.plane.CursorBPP[k] / 8.0 /
8898 ((dml_float_t) mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k]) * mode_lib->ms.cache_display_cfg.plane.VRatio[k];
8899 }
8900
8901 s->VStartupLines = __DML_VBA_MIN_VSTARTUP__;
8902 s->MaxVStartupAllPlanes = 0;
8903
8904 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
8905 s->MaxVStartupLines[k] = CalculateMaxVStartup(k,
8906 mode_lib->ms.ip.ptoi_supported,
8907 mode_lib->ms.ip.vblank_nom_default_us,
8908 &mode_lib->ms.cache_display_cfg.timing,
8909 locals->WritebackDelay[k]);
8910
8911 #ifdef __DML_VBA_DEBUG__
8912 dml_print("DML::%s: k=%u MaxVStartupLines = %u\n", __func__, k, s->MaxVStartupLines[k]);
8913 dml_print("DML::%s: k=%u WritebackDelay = %f\n", __func__, k, locals->WritebackDelay[k]);
8914 #endif
8915 }
8916
8917 for (k = 0; k < mode_lib->ms.num_active_planes; ++k)
8918 s->MaxVStartupAllPlanes = (dml_uint_t)(dml_max(s->MaxVStartupAllPlanes, s->MaxVStartupLines[k]));
8919
8920 s->ImmediateFlipRequirementFinal = false;
8921 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
8922 s->ImmediateFlipRequirementFinal = s->ImmediateFlipRequirementFinal || (mode_lib->ms.policy.ImmediateFlipRequirement[k] == dml_immediate_flip_required);
8923 }
8924 #ifdef __DML_VBA_DEBUG__
8925 dml_print("DML::%s: ImmediateFlipRequirementFinal = %u\n", __func__, s->ImmediateFlipRequirementFinal);
8926 #endif
8927
8928 // The prefetch scheduling should only be calculated once as per AllowForPStateChangeOrStutterInVBlank requirement
8929 // If the AllowForPStateChangeOrStutterInVBlank requirement is not strict (i.e. only try those power saving feature
8930 // if possible, then will try to program for the best power saving features in order of difficulty (dram, fclk, stutter)
8931 s->iteration = 0;
8932 s->MaxTotalRDBandwidth = 0;
8933 s->AllPrefetchModeTested = false;
8934 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
8935 CalculatePrefetchMode(mode_lib->ms.policy.AllowForPStateChangeOrStutterInVBlank[k], &s->MinPrefetchMode[k], &s->MaxPrefetchMode[k]);
8936 s->NextPrefetchMode[k] = s->MinPrefetchMode[k];
8937 }
8938
8939 do {
8940 s->MaxTotalRDBandwidthNoUrgentBurst = 0.0;
8941 s->DestinationLineTimesForPrefetchLessThan2 = false;
8942 s->VRatioPrefetchMoreThanMax = false;
8943
8944 dml_print("DML::%s: Start one iteration: VStartupLines = %u\n", __func__, s->VStartupLines);
8945
8946 s->AllPrefetchModeTested = true;
8947 s->MaxTotalRDBandwidth = 0;
8948 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
8949 locals->PrefetchMode[k] = s->NextPrefetchMode[k];
8950 TWait = CalculateTWait(
8951 locals->PrefetchMode[k],
8952 mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange[k],
8953 mode_lib->ms.policy.SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
8954 mode_lib->ms.cache_display_cfg.timing.DRRDisplay[k],
8955 mode_lib->ms.state.dram_clock_change_latency_us,
8956 mode_lib->ms.state.fclk_change_latency_us,
8957 locals->UrgentLatency,
8958 mode_lib->ms.state.sr_enter_plus_exit_time_us);
8959
8960 myPipe = &s->myPipe;
8961 myPipe->Dppclk = locals->Dppclk[k];
8962 myPipe->Dispclk = locals->Dispclk;
8963 myPipe->PixelClock = mode_lib->ms.cache_display_cfg.timing.PixelClock[k];
8964 myPipe->DCFClkDeepSleep = locals->DCFCLKDeepSleep;
8965 myPipe->DPPPerSurface = mode_lib->ms.cache_display_cfg.hw.DPPPerSurface[k];
8966 myPipe->ScalerEnabled = mode_lib->ms.cache_display_cfg.plane.ScalerEnabled[k];
8967 myPipe->SourceScan = mode_lib->ms.cache_display_cfg.plane.SourceScan[k];
8968 myPipe->BlockWidth256BytesY = locals->BlockWidth256BytesY[k];
8969 myPipe->BlockHeight256BytesY = locals->BlockHeight256BytesY[k];
8970 myPipe->BlockWidth256BytesC = locals->BlockWidth256BytesC[k];
8971 myPipe->BlockHeight256BytesC = locals->BlockHeight256BytesC[k];
8972 myPipe->InterlaceEnable = mode_lib->ms.cache_display_cfg.timing.Interlace[k];
8973 myPipe->NumberOfCursors = mode_lib->ms.cache_display_cfg.plane.NumberOfCursors[k];
8974 myPipe->VBlank = mode_lib->ms.cache_display_cfg.timing.VTotal[k] - mode_lib->ms.cache_display_cfg.timing.VActive[k];
8975 myPipe->HTotal = mode_lib->ms.cache_display_cfg.timing.HTotal[k];
8976 myPipe->HActive = mode_lib->ms.cache_display_cfg.timing.HActive[k];
8977 myPipe->DCCEnable = mode_lib->ms.cache_display_cfg.surface.DCCEnable[k];
8978 myPipe->ODMMode = mode_lib->ms.cache_display_cfg.hw.ODMMode[k];
8979 myPipe->SourcePixelFormat = mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k];
8980 myPipe->BytePerPixelY = locals->BytePerPixelY[k];
8981 myPipe->BytePerPixelC = locals->BytePerPixelC[k];
8982 myPipe->ProgressiveToInterlaceUnitInOPP = mode_lib->ms.ip.ptoi_supported;
8983
8984 #ifdef __DML_VBA_DEBUG__
8985 dml_print("DML::%s: Calling CalculatePrefetchSchedule for k=%u\n", __func__, k);
8986 dml_print("DML::%s: AllowForPStateChangeOrStutterInVBlank = %u\n", __func__, mode_lib->ms.policy.AllowForPStateChangeOrStutterInVBlank[k]);
8987 dml_print("DML::%s: PrefetchMode[k] = %u (Min=%u Max=%u)\n", __func__, locals->PrefetchMode[k], s->MinPrefetchMode[k], s->MaxPrefetchMode[k]);
8988 #endif
8989
8990 CalculatePrefetchSchedule_params->EnhancedPrefetchScheduleAccelerationFinal = mode_lib->ms.policy.EnhancedPrefetchScheduleAccelerationFinal;
8991 CalculatePrefetchSchedule_params->HostVMInefficiencyFactor = s->HostVMInefficiencyFactor;
8992 CalculatePrefetchSchedule_params->myPipe = myPipe;
8993 CalculatePrefetchSchedule_params->DSCDelay = locals->DSCDelay[k];
8994 CalculatePrefetchSchedule_params->DPPCLKDelaySubtotalPlusCNVCFormater = mode_lib->ms.ip.dppclk_delay_subtotal + mode_lib->ms.ip.dppclk_delay_cnvc_formatter;
8995 CalculatePrefetchSchedule_params->DPPCLKDelaySCL = mode_lib->ms.ip.dppclk_delay_scl;
8996 CalculatePrefetchSchedule_params->DPPCLKDelaySCLLBOnly = mode_lib->ms.ip.dppclk_delay_scl_lb_only;
8997 CalculatePrefetchSchedule_params->DPPCLKDelayCNVCCursor = mode_lib->ms.ip.dppclk_delay_cnvc_cursor;
8998 CalculatePrefetchSchedule_params->DISPCLKDelaySubtotal = mode_lib->ms.ip.dispclk_delay_subtotal;
8999 CalculatePrefetchSchedule_params->DPP_RECOUT_WIDTH = (dml_uint_t)(locals->SwathWidthY[k] / mode_lib->ms.cache_display_cfg.plane.HRatio[k]);
9000 CalculatePrefetchSchedule_params->OutputFormat = mode_lib->ms.cache_display_cfg.output.OutputFormat[k];
9001 CalculatePrefetchSchedule_params->MaxInterDCNTileRepeaters = mode_lib->ms.ip.max_inter_dcn_tile_repeaters;
9002 CalculatePrefetchSchedule_params->VStartup = (dml_uint_t)(dml_min(s->VStartupLines, s->MaxVStartupLines[k]));
9003 CalculatePrefetchSchedule_params->MaxVStartup = s->MaxVStartupLines[k];
9004 CalculatePrefetchSchedule_params->GPUVMPageTableLevels = mode_lib->ms.cache_display_cfg.plane.GPUVMMaxPageTableLevels;
9005 CalculatePrefetchSchedule_params->GPUVMEnable = mode_lib->ms.cache_display_cfg.plane.GPUVMEnable;
9006 CalculatePrefetchSchedule_params->HostVMEnable = mode_lib->ms.cache_display_cfg.plane.HostVMEnable;
9007 CalculatePrefetchSchedule_params->HostVMMaxNonCachedPageTableLevels = mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels;
9008 CalculatePrefetchSchedule_params->HostVMMinPageSize = mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024;
9009 CalculatePrefetchSchedule_params->DynamicMetadataEnable = mode_lib->ms.cache_display_cfg.plane.DynamicMetadataEnable[k];
9010 CalculatePrefetchSchedule_params->DynamicMetadataVMEnabled = mode_lib->ms.ip.dynamic_metadata_vm_enabled;
9011 CalculatePrefetchSchedule_params->DynamicMetadataLinesBeforeActiveRequired = mode_lib->ms.cache_display_cfg.plane.DynamicMetadataLinesBeforeActiveRequired[k];
9012 CalculatePrefetchSchedule_params->DynamicMetadataTransmittedBytes = mode_lib->ms.cache_display_cfg.plane.DynamicMetadataTransmittedBytes[k];
9013 CalculatePrefetchSchedule_params->UrgentLatency = locals->UrgentLatency;
9014 CalculatePrefetchSchedule_params->UrgentExtraLatency = locals->UrgentExtraLatency;
9015 CalculatePrefetchSchedule_params->TCalc = locals->TCalc;
9016 CalculatePrefetchSchedule_params->PDEAndMetaPTEBytesFrame = locals->PDEAndMetaPTEBytesFrame[k];
9017 CalculatePrefetchSchedule_params->MetaRowByte = locals->MetaRowByte[k];
9018 CalculatePrefetchSchedule_params->PixelPTEBytesPerRow = locals->PixelPTEBytesPerRow[k];
9019 CalculatePrefetchSchedule_params->PrefetchSourceLinesY = locals->PrefetchSourceLinesY[k];
9020 CalculatePrefetchSchedule_params->VInitPreFillY = locals->VInitPreFillY[k];
9021 CalculatePrefetchSchedule_params->MaxNumSwathY = locals->MaxNumSwathY[k];
9022 CalculatePrefetchSchedule_params->PrefetchSourceLinesC = locals->PrefetchSourceLinesC[k];
9023 CalculatePrefetchSchedule_params->VInitPreFillC = locals->VInitPreFillC[k];
9024 CalculatePrefetchSchedule_params->MaxNumSwathC = locals->MaxNumSwathC[k];
9025 CalculatePrefetchSchedule_params->swath_width_luma_ub = locals->swath_width_luma_ub[k];
9026 CalculatePrefetchSchedule_params->swath_width_chroma_ub = locals->swath_width_chroma_ub[k];
9027 CalculatePrefetchSchedule_params->SwathHeightY = locals->SwathHeightY[k];
9028 CalculatePrefetchSchedule_params->SwathHeightC = locals->SwathHeightC[k];
9029 CalculatePrefetchSchedule_params->TWait = TWait;
9030 CalculatePrefetchSchedule_params->DSTXAfterScaler = &locals->DSTXAfterScaler[k];
9031 CalculatePrefetchSchedule_params->DSTYAfterScaler = &locals->DSTYAfterScaler[k];
9032 CalculatePrefetchSchedule_params->DestinationLinesForPrefetch = &locals->DestinationLinesForPrefetch[k];
9033 CalculatePrefetchSchedule_params->DestinationLinesToRequestVMInVBlank = &locals->DestinationLinesToRequestVMInVBlank[k];
9034 CalculatePrefetchSchedule_params->DestinationLinesToRequestRowInVBlank = &locals->DestinationLinesToRequestRowInVBlank[k];
9035 CalculatePrefetchSchedule_params->VRatioPrefetchY = &locals->VRatioPrefetchY[k];
9036 CalculatePrefetchSchedule_params->VRatioPrefetchC = &locals->VRatioPrefetchC[k];
9037 CalculatePrefetchSchedule_params->RequiredPrefetchPixDataBWLuma = &locals->RequiredPrefetchPixDataBWLuma[k];
9038 CalculatePrefetchSchedule_params->RequiredPrefetchPixDataBWChroma = &locals->RequiredPrefetchPixDataBWChroma[k];
9039 CalculatePrefetchSchedule_params->NotEnoughTimeForDynamicMetadata = &locals->NotEnoughTimeForDynamicMetadata[k];
9040 CalculatePrefetchSchedule_params->Tno_bw = &locals->Tno_bw[k];
9041 CalculatePrefetchSchedule_params->prefetch_vmrow_bw = &locals->prefetch_vmrow_bw[k];
9042 CalculatePrefetchSchedule_params->Tdmdl_vm = &locals->Tdmdl_vm[k];
9043 CalculatePrefetchSchedule_params->Tdmdl = &locals->Tdmdl[k];
9044 CalculatePrefetchSchedule_params->TSetup = &locals->TSetup[k];
9045 CalculatePrefetchSchedule_params->VUpdateOffsetPix = &locals->VUpdateOffsetPix[k];
9046 CalculatePrefetchSchedule_params->VUpdateWidthPix = &locals->VUpdateWidthPix[k];
9047 CalculatePrefetchSchedule_params->VReadyOffsetPix = &locals->VReadyOffsetPix[k];
9048
9049 locals->NoTimeToPrefetch[k] =
9050 CalculatePrefetchSchedule(&mode_lib->scratch,
9051 CalculatePrefetchSchedule_params);
9052
9053 #ifdef __DML_VBA_DEBUG__
9054 dml_print("DML::%s: k=%0u NoTimeToPrefetch=%0d\n", __func__, k, locals->NoTimeToPrefetch[k]);
9055 #endif
9056 locals->VStartup[k] = (dml_uint_t)(dml_min(s->VStartupLines, s->MaxVStartupLines[k]));
9057 locals->VStartupMin[k] = locals->VStartup[k];
9058 }
9059
9060 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
9061 CalculateUrgentBurstFactor(
9062 mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange[k],
9063 locals->swath_width_luma_ub[k],
9064 locals->swath_width_chroma_ub[k],
9065 locals->SwathHeightY[k],
9066 locals->SwathHeightC[k],
9067 mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k],
9068 locals->UrgentLatency,
9069 mode_lib->ms.ip.cursor_buffer_size,
9070 mode_lib->ms.cache_display_cfg.plane.CursorWidth[k],
9071 mode_lib->ms.cache_display_cfg.plane.CursorBPP[k],
9072 locals->VRatioPrefetchY[k],
9073 locals->VRatioPrefetchC[k],
9074 locals->BytePerPixelDETY[k],
9075 locals->BytePerPixelDETC[k],
9076 locals->DETBufferSizeY[k],
9077 locals->DETBufferSizeC[k],
9078 /* Output */
9079 &locals->UrgBurstFactorCursorPre[k],
9080 &locals->UrgBurstFactorLumaPre[k],
9081 &locals->UrgBurstFactorChromaPre[k],
9082 &locals->NoUrgentLatencyHidingPre[k]);
9083
9084 locals->cursor_bw_pre[k] = mode_lib->ms.cache_display_cfg.plane.NumberOfCursors[k] * mode_lib->ms.cache_display_cfg.plane.CursorWidth[k] * mode_lib->ms.cache_display_cfg.plane.CursorBPP[k] / 8.0 / (mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k]) * locals->VRatioPrefetchY[k];
9085
9086 #ifdef __DML_VBA_DEBUG__
9087 dml_print("DML::%s: k=%0u DPPPerSurface=%u\n", __func__, k, mode_lib->ms.cache_display_cfg.hw.DPPPerSurface[k]);
9088 dml_print("DML::%s: k=%0u UrgBurstFactorLuma=%f\n", __func__, k, locals->UrgBurstFactorLuma[k]);
9089 dml_print("DML::%s: k=%0u UrgBurstFactorChroma=%f\n", __func__, k, locals->UrgBurstFactorChroma[k]);
9090 dml_print("DML::%s: k=%0u UrgBurstFactorLumaPre=%f\n", __func__, k, locals->UrgBurstFactorLumaPre[k]);
9091 dml_print("DML::%s: k=%0u UrgBurstFactorChromaPre=%f\n", __func__, k, locals->UrgBurstFactorChromaPre[k]);
9092
9093 dml_print("DML::%s: k=%0u VRatioPrefetchY=%f\n", __func__, k, locals->VRatioPrefetchY[k]);
9094 dml_print("DML::%s: k=%0u VRatioY=%f\n", __func__, k, mode_lib->ms.cache_display_cfg.plane.VRatio[k]);
9095
9096 dml_print("DML::%s: k=%0u prefetch_vmrow_bw=%f\n", __func__, k, locals->prefetch_vmrow_bw[k]);
9097 dml_print("DML::%s: k=%0u ReadBandwidthSurfaceLuma=%f\n", __func__, k, locals->ReadBandwidthSurfaceLuma[k]);
9098 dml_print("DML::%s: k=%0u ReadBandwidthSurfaceChroma=%f\n", __func__, k, locals->ReadBandwidthSurfaceChroma[k]);
9099 dml_print("DML::%s: k=%0u cursor_bw=%f\n", __func__, k, locals->cursor_bw[k]);
9100 dml_print("DML::%s: k=%0u meta_row_bw=%f\n", __func__, k, locals->meta_row_bw[k]);
9101 dml_print("DML::%s: k=%0u dpte_row_bw=%f\n", __func__, k, locals->dpte_row_bw[k]);
9102 dml_print("DML::%s: k=%0u RequiredPrefetchPixDataBWLuma=%f\n", __func__, k, locals->RequiredPrefetchPixDataBWLuma[k]);
9103 dml_print("DML::%s: k=%0u RequiredPrefetchPixDataBWChroma=%f\n", __func__, k, locals->RequiredPrefetchPixDataBWChroma[k]);
9104 dml_print("DML::%s: k=%0u cursor_bw_pre=%f\n", __func__, k, locals->cursor_bw_pre[k]);
9105 dml_print("DML::%s: k=%0u MaxTotalRDBandwidthNoUrgentBurst=%f\n", __func__, k, s->MaxTotalRDBandwidthNoUrgentBurst);
9106 #endif
9107 if (locals->DestinationLinesForPrefetch[k] < 2)
9108 s->DestinationLineTimesForPrefetchLessThan2 = true;
9109
9110 if (locals->VRatioPrefetchY[k] > __DML_MAX_VRATIO_PRE_ENHANCE_PREFETCH_ACC__ ||
9111 locals->VRatioPrefetchC[k] > __DML_MAX_VRATIO_PRE_ENHANCE_PREFETCH_ACC__ ||
9112 ((s->VStartupLines < s->MaxVStartupLines[k] || mode_lib->ms.policy.EnhancedPrefetchScheduleAccelerationFinal == 0) &&
9113 (locals->VRatioPrefetchY[k] > __DML_MAX_VRATIO_PRE__ || locals->VRatioPrefetchC[k] > __DML_MAX_VRATIO_PRE__)))
9114 s->VRatioPrefetchMoreThanMax = true;
9115
9116 //dml_bool_t DestinationLinesToRequestVMInVBlankEqualOrMoreThan32 = false;
9117 //dml_bool_t DestinationLinesToRequestRowInVBlankEqualOrMoreThan16 = false;
9118 //if (locals->DestinationLinesToRequestVMInVBlank[k] >= 32) {
9119 // DestinationLinesToRequestVMInVBlankEqualOrMoreThan32 = true;
9120 //}
9121
9122 //if (locals->DestinationLinesToRequestRowInVBlank[k] >= 16) {
9123 // DestinationLinesToRequestRowInVBlankEqualOrMoreThan16 = true;
9124 //}
9125 }
9126
9127 locals->FractionOfUrgentBandwidth = s->MaxTotalRDBandwidthNoUrgentBurst / mode_lib->ms.ReturnBW;
9128
9129 #ifdef __DML_VBA_DEBUG__
9130 dml_print("DML::%s: MaxTotalRDBandwidthNoUrgentBurst=%f \n", __func__, s->MaxTotalRDBandwidthNoUrgentBurst);
9131 dml_print("DML::%s: ReturnBW=%f \n", __func__, mode_lib->ms.ReturnBW);
9132 dml_print("DML::%s: FractionOfUrgentBandwidth=%f \n", __func__, locals->FractionOfUrgentBandwidth);
9133 #endif
9134
9135 CalculatePrefetchBandwithSupport(
9136 mode_lib->ms.num_active_planes,
9137 mode_lib->ms.ReturnBW,
9138 mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange,
9139 locals->NoUrgentLatencyHidingPre,
9140 locals->ReadBandwidthSurfaceLuma,
9141 locals->ReadBandwidthSurfaceChroma,
9142 locals->RequiredPrefetchPixDataBWLuma,
9143 locals->RequiredPrefetchPixDataBWChroma,
9144 locals->cursor_bw,
9145 locals->meta_row_bw,
9146 locals->dpte_row_bw,
9147 locals->cursor_bw_pre,
9148 locals->prefetch_vmrow_bw,
9149 mode_lib->ms.cache_display_cfg.hw.DPPPerSurface,
9150 locals->UrgBurstFactorLuma,
9151 locals->UrgBurstFactorChroma,
9152 locals->UrgBurstFactorCursor,
9153 locals->UrgBurstFactorLumaPre,
9154 locals->UrgBurstFactorChromaPre,
9155 locals->UrgBurstFactorCursorPre,
9156
9157 /* output */
9158 &s->MaxTotalRDBandwidth, // dml_float_t *PrefetchBandwidth
9159 &s->MaxTotalRDBandwidthNotIncludingMALLPrefetch, // dml_float_t *PrefetchBandwidthNotIncludingMALLPrefetch
9160 &s->dummy_single[0], // dml_float_t *FractionOfUrgentBandwidth
9161 &locals->PrefetchModeSupported);
9162
9163 for (k = 0; k < mode_lib->ms.num_active_planes; ++k)
9164 s->dummy_unit_vector[k] = 1.0;
9165
9166 CalculatePrefetchBandwithSupport(mode_lib->ms.num_active_planes,
9167 mode_lib->ms.ReturnBW,
9168 mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange,
9169 locals->NoUrgentLatencyHidingPre,
9170 locals->ReadBandwidthSurfaceLuma,
9171 locals->ReadBandwidthSurfaceChroma,
9172 locals->RequiredPrefetchPixDataBWLuma,
9173 locals->RequiredPrefetchPixDataBWChroma,
9174 locals->cursor_bw,
9175 locals->meta_row_bw,
9176 locals->dpte_row_bw,
9177 locals->cursor_bw_pre,
9178 locals->prefetch_vmrow_bw,
9179 mode_lib->ms.cache_display_cfg.hw.DPPPerSurface,
9180 s->dummy_unit_vector,
9181 s->dummy_unit_vector,
9182 s->dummy_unit_vector,
9183 s->dummy_unit_vector,
9184 s->dummy_unit_vector,
9185 s->dummy_unit_vector,
9186
9187 /* output */
9188 &s->NonUrgentMaxTotalRDBandwidth, // dml_float_t *PrefetchBandwidth
9189 &s->NonUrgentMaxTotalRDBandwidthNotIncludingMALLPrefetch, // dml_float_t *PrefetchBandwidthNotIncludingMALLPrefetch
9190 &locals->FractionOfUrgentBandwidth,
9191 &s->dummy_boolean[0]); // dml_bool_t *PrefetchBandwidthSupport
9192
9193
9194
9195 if (s->VRatioPrefetchMoreThanMax != false || s->DestinationLineTimesForPrefetchLessThan2 != false) {
9196 dml_print("DML::%s: VRatioPrefetchMoreThanMax = %u\n", __func__, s->VRatioPrefetchMoreThanMax);
9197 dml_print("DML::%s: DestinationLineTimesForPrefetchLessThan2 = %u\n", __func__, s->DestinationLineTimesForPrefetchLessThan2);
9198 locals->PrefetchModeSupported = false;
9199 }
9200
9201 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
9202 if (locals->NoTimeToPrefetch[k] == true || locals->NotEnoughTimeForDynamicMetadata[k]) {
9203 dml_print("DML::%s: k=%u, NoTimeToPrefetch = %0d\n", __func__, k, locals->NoTimeToPrefetch[k]);
9204 dml_print("DML::%s: k=%u, NotEnoughTimeForDynamicMetadata=%u\n", __func__, k, locals->NotEnoughTimeForDynamicMetadata[k]);
9205 locals->PrefetchModeSupported = false;
9206 }
9207 }
9208
9209
9210 if (locals->PrefetchModeSupported == true && mode_lib->ms.support.ImmediateFlipSupport == true) {
9211 locals->BandwidthAvailableForImmediateFlip = CalculateBandwidthAvailableForImmediateFlip(
9212 mode_lib->ms.num_active_planes,
9213 mode_lib->ms.ReturnBW,
9214 locals->ReadBandwidthSurfaceLuma,
9215 locals->ReadBandwidthSurfaceChroma,
9216 locals->RequiredPrefetchPixDataBWLuma,
9217 locals->RequiredPrefetchPixDataBWChroma,
9218 locals->cursor_bw,
9219 locals->cursor_bw_pre,
9220 mode_lib->ms.cache_display_cfg.hw.DPPPerSurface,
9221 locals->UrgBurstFactorLuma,
9222 locals->UrgBurstFactorChroma,
9223 locals->UrgBurstFactorCursor,
9224 locals->UrgBurstFactorLumaPre,
9225 locals->UrgBurstFactorChromaPre,
9226 locals->UrgBurstFactorCursorPre);
9227
9228 locals->TotImmediateFlipBytes = 0;
9229 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
9230 if (mode_lib->ms.policy.ImmediateFlipRequirement[k] != dml_immediate_flip_not_required) {
9231 locals->TotImmediateFlipBytes = locals->TotImmediateFlipBytes + mode_lib->ms.cache_display_cfg.hw.DPPPerSurface[k] * (locals->PDEAndMetaPTEBytesFrame[k] + locals->MetaRowByte[k]);
9232 if (locals->use_one_row_for_frame_flip[k]) {
9233 locals->TotImmediateFlipBytes = locals->TotImmediateFlipBytes + mode_lib->ms.cache_display_cfg.hw.DPPPerSurface[k] * (2 * locals->PixelPTEBytesPerRow[k]);
9234 } else {
9235 locals->TotImmediateFlipBytes = locals->TotImmediateFlipBytes + mode_lib->ms.cache_display_cfg.hw.DPPPerSurface[k] * locals->PixelPTEBytesPerRow[k];
9236 }
9237 #ifdef __DML_VBA_DEBUG__
9238 dml_print("DML::%s: k = %u\n", __func__, k);
9239 dml_print("DML::%s: DPPPerSurface = %u\n", __func__, mode_lib->ms.cache_display_cfg.hw.DPPPerSurface[k]);
9240 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %u\n", __func__, locals->PDEAndMetaPTEBytesFrame[k]);
9241 dml_print("DML::%s: MetaRowByte = %u\n", __func__, locals->MetaRowByte[k]);
9242 dml_print("DML::%s: PixelPTEBytesPerRow = %u\n", __func__, locals->PixelPTEBytesPerRow[k]);
9243 dml_print("DML::%s: TotImmediateFlipBytes = %u\n", __func__, locals->TotImmediateFlipBytes);
9244 #endif
9245 }
9246 }
9247 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
9248 CalculateFlipSchedule(
9249 s->HostVMInefficiencyFactor,
9250 locals->UrgentExtraLatency,
9251 locals->UrgentLatency,
9252 mode_lib->ms.cache_display_cfg.plane.GPUVMMaxPageTableLevels,
9253 mode_lib->ms.cache_display_cfg.plane.HostVMEnable,
9254 mode_lib->ms.cache_display_cfg.plane.HostVMMaxPageTableLevels,
9255 mode_lib->ms.cache_display_cfg.plane.GPUVMEnable,
9256 mode_lib->ms.soc.hostvm_min_page_size_kbytes * 1024,
9257 locals->PDEAndMetaPTEBytesFrame[k],
9258 locals->MetaRowByte[k],
9259 locals->PixelPTEBytesPerRow[k],
9260 locals->BandwidthAvailableForImmediateFlip,
9261 locals->TotImmediateFlipBytes,
9262 mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k],
9263 mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k],
9264 mode_lib->ms.cache_display_cfg.plane.VRatio[k],
9265 mode_lib->ms.cache_display_cfg.plane.VRatioChroma[k],
9266 locals->Tno_bw[k],
9267 mode_lib->ms.cache_display_cfg.surface.DCCEnable[k],
9268 locals->dpte_row_height[k],
9269 locals->meta_row_height[k],
9270 locals->dpte_row_height_chroma[k],
9271 locals->meta_row_height_chroma[k],
9272 locals->use_one_row_for_frame_flip[k],
9273
9274 /* Output */
9275 &locals->DestinationLinesToRequestVMInImmediateFlip[k],
9276 &locals->DestinationLinesToRequestRowInImmediateFlip[k],
9277 &locals->final_flip_bw[k],
9278 &locals->ImmediateFlipSupportedForPipe[k]);
9279 }
9280
9281 CalculateImmediateFlipBandwithSupport(mode_lib->ms.num_active_planes,
9282 mode_lib->ms.ReturnBW,
9283 mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange,
9284 mode_lib->ms.policy.ImmediateFlipRequirement,
9285 locals->final_flip_bw,
9286 locals->ReadBandwidthSurfaceLuma,
9287 locals->ReadBandwidthSurfaceChroma,
9288 locals->RequiredPrefetchPixDataBWLuma,
9289 locals->RequiredPrefetchPixDataBWChroma,
9290 locals->cursor_bw,
9291 locals->meta_row_bw,
9292 locals->dpte_row_bw,
9293 locals->cursor_bw_pre,
9294 locals->prefetch_vmrow_bw,
9295 mode_lib->ms.cache_display_cfg.hw.DPPPerSurface,
9296 locals->UrgBurstFactorLuma,
9297 locals->UrgBurstFactorChroma,
9298 locals->UrgBurstFactorCursor,
9299 locals->UrgBurstFactorLumaPre,
9300 locals->UrgBurstFactorChromaPre,
9301 locals->UrgBurstFactorCursorPre,
9302
9303 /* output */
9304 &locals->total_dcn_read_bw_with_flip, // dml_float_t *TotalBandwidth
9305 &locals->total_dcn_read_bw_with_flip_not_including_MALL_prefetch, // dml_float_t TotalBandwidthNotIncludingMALLPrefetch
9306 &s->dummy_single[0], // dml_float_t *FractionOfUrgentBandwidth
9307 &locals->ImmediateFlipSupported); // dml_bool_t *ImmediateFlipBandwidthSupport
9308
9309 CalculateImmediateFlipBandwithSupport(mode_lib->ms.num_active_planes,
9310 mode_lib->ms.ReturnBW,
9311 mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange,
9312 mode_lib->ms.policy.ImmediateFlipRequirement,
9313 locals->final_flip_bw,
9314 locals->ReadBandwidthSurfaceLuma,
9315 locals->ReadBandwidthSurfaceChroma,
9316 locals->RequiredPrefetchPixDataBWLuma,
9317 locals->RequiredPrefetchPixDataBWChroma,
9318 locals->cursor_bw,
9319 locals->meta_row_bw,
9320 locals->dpte_row_bw,
9321 locals->cursor_bw_pre,
9322 locals->prefetch_vmrow_bw,
9323 mode_lib->ms.cache_display_cfg.hw.DPPPerSurface,
9324 s->dummy_unit_vector,
9325 s->dummy_unit_vector,
9326 s->dummy_unit_vector,
9327 s->dummy_unit_vector,
9328 s->dummy_unit_vector,
9329 s->dummy_unit_vector,
9330
9331 /* output */
9332 &locals->non_urgent_total_dcn_read_bw_with_flip, // dml_float_t *TotalBandwidth
9333 &locals->non_urgent_total_dcn_read_bw_with_flip_not_including_MALL_prefetch, // dml_float_t TotalBandwidthNotIncludingMALLPrefetch
9334 &locals->FractionOfUrgentBandwidthImmediateFlip, // dml_float_t *FractionOfUrgentBandwidth
9335 &s->dummy_boolean[0]); // dml_bool_t *ImmediateFlipBandwidthSupport
9336
9337 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
9338 if (mode_lib->ms.policy.ImmediateFlipRequirement[k] != dml_immediate_flip_not_required && locals->ImmediateFlipSupportedForPipe[k] == false) {
9339 locals->ImmediateFlipSupported = false;
9340 #ifdef __DML_VBA_DEBUG__
9341 dml_print("DML::%s: Pipe %0d not supporting iflip\n", __func__, k);
9342 #endif
9343 }
9344 }
9345 } else {
9346 locals->ImmediateFlipSupported = false;
9347 locals->total_dcn_read_bw_with_flip = s->MaxTotalRDBandwidth;
9348 locals->total_dcn_read_bw_with_flip_not_including_MALL_prefetch = s->MaxTotalRDBandwidthNotIncludingMALLPrefetch;
9349 locals->non_urgent_total_dcn_read_bw_with_flip = s->NonUrgentMaxTotalRDBandwidth;
9350 locals->non_urgent_total_dcn_read_bw_with_flip_not_including_MALL_prefetch = s->NonUrgentMaxTotalRDBandwidthNotIncludingMALLPrefetch;
9351 }
9352
9353 /* consider flip support is okay if the flip bw is ok or (when user does't require a iflip and there is no host vm) */
9354 locals->PrefetchAndImmediateFlipSupported = (locals->PrefetchModeSupported == true &&
9355 ((!mode_lib->ms.support.ImmediateFlipSupport && !mode_lib->ms.cache_display_cfg.plane.HostVMEnable && !s->ImmediateFlipRequirementFinal) ||
9356 locals->ImmediateFlipSupported)) ? true : false;
9357
9358 #ifdef __DML_VBA_DEBUG__
9359 dml_print("DML::%s: PrefetchModeSupported = %u\n", __func__, locals->PrefetchModeSupported);
9360 for (k = 0; k < mode_lib->ms.num_active_planes; ++k)
9361 dml_print("DML::%s: ImmediateFlipRequirement[%u] = %u\n", __func__, k, mode_lib->ms.policy.ImmediateFlipRequirement[k] == dml_immediate_flip_required);
9362 dml_print("DML::%s: HostVMEnable = %u\n", __func__, mode_lib->ms.cache_display_cfg.plane.HostVMEnable);
9363 dml_print("DML::%s: ImmediateFlipSupport = %u (from mode_support)\n", __func__, mode_lib->ms.support.ImmediateFlipSupport);
9364 dml_print("DML::%s: ImmediateFlipSupported = %u\n", __func__, locals->ImmediateFlipSupported);
9365 dml_print("DML::%s: PrefetchAndImmediateFlipSupported = %u\n", __func__, locals->PrefetchAndImmediateFlipSupported);
9366 #endif
9367 dml_print("DML::%s: Done one iteration: VStartupLines=%u, MaxVStartupAllPlanes=%u\n", __func__, s->VStartupLines, s->MaxVStartupAllPlanes);
9368
9369 s->VStartupLines = s->VStartupLines + 1;
9370
9371 if (s->VStartupLines > s->MaxVStartupAllPlanes) {
9372 s->VStartupLines = __DML_VBA_MIN_VSTARTUP__;
9373
9374 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
9375 s->NextPrefetchMode[k] = s->NextPrefetchMode[k] + 1;
9376
9377 if (s->NextPrefetchMode[k] <= s->MaxPrefetchMode[k])
9378 s->AllPrefetchModeTested = false;
9379 dml_print("DML::%s: VStartupLines=%u, reaches max vstartup, try next prefetch mode=%u\n", __func__, s->VStartupLines-1, s->AllPrefetchModeTested);
9380 }
9381 } else {
9382 s->AllPrefetchModeTested = false;
9383 }
9384 s->iteration++;
9385 if (s->iteration > 2500) {
9386 dml_print("ERROR: DML::%s: Too many errors, exit now\n", __func__);
9387 ASSERT(0);
9388 }
9389 } while (!(locals->PrefetchAndImmediateFlipSupported || s->AllPrefetchModeTested));
9390
9391 if (locals->PrefetchAndImmediateFlipSupported) {
9392 dml_print("DML::%s: Good, Prefetch and flip scheduling solution found at VStartupLines=%u (MaxVStartupAllPlanes=%u)\n", __func__, s->VStartupLines-1, s->MaxVStartupAllPlanes);
9393 } else {
9394 dml_print("DML::%s: Bad, Prefetch and flip scheduling solution did NOT find solution! (MaxVStartupAllPlanes=%u)\n", __func__, s->MaxVStartupAllPlanes);
9395 }
9396
9397 //Watermarks and NB P-State/DRAM Clock Change Support
9398 {
9399 s->mmSOCParameters.UrgentLatency = locals->UrgentLatency;
9400 s->mmSOCParameters.ExtraLatency = locals->UrgentExtraLatency;
9401 s->mmSOCParameters.WritebackLatency = mode_lib->ms.state.writeback_latency_us;
9402 s->mmSOCParameters.DRAMClockChangeLatency = mode_lib->ms.state.dram_clock_change_latency_us;
9403 s->mmSOCParameters.FCLKChangeLatency = mode_lib->ms.state.fclk_change_latency_us;
9404 s->mmSOCParameters.SRExitTime = mode_lib->ms.state.sr_exit_time_us;
9405 s->mmSOCParameters.SREnterPlusExitTime = mode_lib->ms.state.sr_enter_plus_exit_time_us;
9406 s->mmSOCParameters.SRExitZ8Time = mode_lib->ms.state.sr_exit_z8_time_us;
9407 s->mmSOCParameters.SREnterPlusExitZ8Time = mode_lib->ms.state.sr_enter_plus_exit_z8_time_us;
9408 s->mmSOCParameters.USRRetrainingLatency = mode_lib->ms.state.usr_retraining_latency_us;
9409 s->mmSOCParameters.SMNLatency = mode_lib->ms.soc.smn_latency_us;
9410
9411 CalculateWatermarks_params->USRRetrainingRequiredFinal = mode_lib->ms.policy.USRRetrainingRequiredFinal;
9412 CalculateWatermarks_params->UseMALLForPStateChange = mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange;
9413 CalculateWatermarks_params->PrefetchMode = locals->PrefetchMode;
9414 CalculateWatermarks_params->NumberOfActiveSurfaces = mode_lib->ms.num_active_planes;
9415 CalculateWatermarks_params->MaxLineBufferLines = mode_lib->ms.ip.max_line_buffer_lines;
9416 CalculateWatermarks_params->LineBufferSize = mode_lib->ms.ip.line_buffer_size_bits;
9417 CalculateWatermarks_params->WritebackInterfaceBufferSize = mode_lib->ms.ip.writeback_interface_buffer_size_kbytes;
9418 CalculateWatermarks_params->DCFCLK = locals->Dcfclk;
9419 CalculateWatermarks_params->ReturnBW = mode_lib->ms.ReturnBW;
9420 CalculateWatermarks_params->SynchronizeTimingsFinal = mode_lib->ms.policy.SynchronizeTimingsFinal;
9421 CalculateWatermarks_params->SynchronizeDRRDisplaysForUCLKPStateChangeFinal = mode_lib->ms.policy.SynchronizeDRRDisplaysForUCLKPStateChangeFinal;
9422 CalculateWatermarks_params->DRRDisplay = mode_lib->ms.cache_display_cfg.timing.DRRDisplay;
9423 CalculateWatermarks_params->dpte_group_bytes = locals->dpte_group_bytes;
9424 CalculateWatermarks_params->meta_row_height = locals->meta_row_height;
9425 CalculateWatermarks_params->meta_row_height_chroma = locals->meta_row_height_chroma;
9426 CalculateWatermarks_params->mmSOCParameters = s->mmSOCParameters;
9427 CalculateWatermarks_params->WritebackChunkSize = mode_lib->ms.ip.writeback_chunk_size_kbytes;
9428 CalculateWatermarks_params->SOCCLK = mode_lib->ms.SOCCLK;
9429 CalculateWatermarks_params->DCFClkDeepSleep = locals->DCFCLKDeepSleep;
9430 CalculateWatermarks_params->DETBufferSizeY = locals->DETBufferSizeY;
9431 CalculateWatermarks_params->DETBufferSizeC = locals->DETBufferSizeC;
9432 CalculateWatermarks_params->SwathHeightY = locals->SwathHeightY;
9433 CalculateWatermarks_params->SwathHeightC = locals->SwathHeightC;
9434 CalculateWatermarks_params->LBBitPerPixel = mode_lib->ms.cache_display_cfg.plane.LBBitPerPixel;
9435 CalculateWatermarks_params->SwathWidthY = locals->SwathWidthY;
9436 CalculateWatermarks_params->SwathWidthC = locals->SwathWidthC;
9437 CalculateWatermarks_params->HRatio = mode_lib->ms.cache_display_cfg.plane.HRatio;
9438 CalculateWatermarks_params->HRatioChroma = mode_lib->ms.cache_display_cfg.plane.HRatioChroma;
9439 CalculateWatermarks_params->VTaps = mode_lib->ms.cache_display_cfg.plane.VTaps;
9440 CalculateWatermarks_params->VTapsChroma = mode_lib->ms.cache_display_cfg.plane.VTapsChroma;
9441 CalculateWatermarks_params->VRatio = mode_lib->ms.cache_display_cfg.plane.VRatio;
9442 CalculateWatermarks_params->VRatioChroma = mode_lib->ms.cache_display_cfg.plane.VRatioChroma;
9443 CalculateWatermarks_params->HTotal = mode_lib->ms.cache_display_cfg.timing.HTotal;
9444 CalculateWatermarks_params->VTotal = mode_lib->ms.cache_display_cfg.timing.VTotal;
9445 CalculateWatermarks_params->VActive = mode_lib->ms.cache_display_cfg.timing.VActive;
9446 CalculateWatermarks_params->PixelClock = mode_lib->ms.cache_display_cfg.timing.PixelClock;
9447 CalculateWatermarks_params->BlendingAndTiming = mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming;
9448 CalculateWatermarks_params->DPPPerSurface = mode_lib->ms.cache_display_cfg.hw.DPPPerSurface;
9449 CalculateWatermarks_params->BytePerPixelDETY = locals->BytePerPixelDETY;
9450 CalculateWatermarks_params->BytePerPixelDETC = locals->BytePerPixelDETC;
9451 CalculateWatermarks_params->DSTXAfterScaler = locals->DSTXAfterScaler;
9452 CalculateWatermarks_params->DSTYAfterScaler = locals->DSTYAfterScaler;
9453 CalculateWatermarks_params->WritebackEnable = mode_lib->ms.cache_display_cfg.writeback.WritebackEnable;
9454 CalculateWatermarks_params->WritebackPixelFormat = mode_lib->ms.cache_display_cfg.writeback.WritebackPixelFormat;
9455 CalculateWatermarks_params->WritebackDestinationWidth = mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationWidth;
9456 CalculateWatermarks_params->WritebackDestinationHeight = mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationHeight;
9457 CalculateWatermarks_params->WritebackSourceHeight = mode_lib->ms.cache_display_cfg.writeback.WritebackSourceHeight;
9458 CalculateWatermarks_params->UnboundedRequestEnabled = locals->UnboundedRequestEnabled;
9459 CalculateWatermarks_params->CompressedBufferSizeInkByte = locals->CompressedBufferSizeInkByte;
9460
9461 // Output
9462 CalculateWatermarks_params->Watermark = &locals->Watermark; // Watermarks *Watermark
9463 CalculateWatermarks_params->DRAMClockChangeSupport = &locals->DRAMClockChangeSupport;
9464 CalculateWatermarks_params->MaxActiveDRAMClockChangeLatencySupported = locals->MaxActiveDRAMClockChangeLatencySupported; // dml_float_t *MaxActiveDRAMClockChangeLatencySupported[]
9465 CalculateWatermarks_params->SubViewportLinesNeededInMALL = locals->SubViewportLinesNeededInMALL; // dml_uint_t SubViewportLinesNeededInMALL[]
9466 CalculateWatermarks_params->FCLKChangeSupport = &locals->FCLKChangeSupport;
9467 CalculateWatermarks_params->MaxActiveFCLKChangeLatencySupported = &locals->MaxActiveFCLKChangeLatencySupported; // dml_float_t *MaxActiveFCLKChangeLatencySupported
9468 CalculateWatermarks_params->USRRetrainingSupport = &locals->USRRetrainingSupport;
9469
9470 CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(
9471 &mode_lib->scratch,
9472 CalculateWatermarks_params);
9473
9474 /* Copy the calculated watermarks to mp.Watermark as the getter functions are
9475 * implemented by the DML team to copy the calculated values from the mp.Watermark interface.
9476 * &mode_lib->mp.Watermark and &locals->Watermark are the same address, memcpy may lead to
9477 * unexpected behavior. memmove should be used.
9478 */
9479 memmove(&mode_lib->mp.Watermark, CalculateWatermarks_params->Watermark, sizeof(struct Watermarks));
9480
9481 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
9482 if (mode_lib->ms.cache_display_cfg.writeback.WritebackEnable[k] == true) {
9483 locals->WritebackAllowDRAMClockChangeEndPosition[k] = dml_max(0, locals->VStartupMin[k] * mode_lib->ms.cache_display_cfg.timing.HTotal[k] /
9484 mode_lib->ms.cache_display_cfg.timing.PixelClock[k] - locals->Watermark.WritebackDRAMClockChangeWatermark);
9485 locals->WritebackAllowFCLKChangeEndPosition[k] = dml_max(0, locals->VStartupMin[k] * mode_lib->ms.cache_display_cfg.timing.HTotal[k] /
9486 mode_lib->ms.cache_display_cfg.timing.PixelClock[k] - locals->Watermark.WritebackFCLKChangeWatermark);
9487 } else {
9488 locals->WritebackAllowDRAMClockChangeEndPosition[k] = 0;
9489 locals->WritebackAllowFCLKChangeEndPosition[k] = 0;
9490 }
9491 }
9492 }
9493
9494 //Display Pipeline Delivery Time in Prefetch, Groups
9495 CalculatePixelDeliveryTimes(
9496 mode_lib->ms.num_active_planes,
9497 mode_lib->ms.cache_display_cfg.plane.VRatio,
9498 mode_lib->ms.cache_display_cfg.plane.VRatioChroma,
9499 locals->VRatioPrefetchY,
9500 locals->VRatioPrefetchC,
9501 locals->swath_width_luma_ub,
9502 locals->swath_width_chroma_ub,
9503 mode_lib->ms.cache_display_cfg.hw.DPPPerSurface,
9504 mode_lib->ms.cache_display_cfg.plane.HRatio,
9505 mode_lib->ms.cache_display_cfg.plane.HRatioChroma,
9506 mode_lib->ms.cache_display_cfg.timing.PixelClock,
9507 locals->PSCL_THROUGHPUT,
9508 locals->PSCL_THROUGHPUT_CHROMA,
9509 locals->Dppclk,
9510 locals->BytePerPixelC,
9511 mode_lib->ms.cache_display_cfg.plane.SourceScan,
9512 mode_lib->ms.cache_display_cfg.plane.NumberOfCursors,
9513 mode_lib->ms.cache_display_cfg.plane.CursorWidth,
9514 mode_lib->ms.cache_display_cfg.plane.CursorBPP,
9515 locals->BlockWidth256BytesY,
9516 locals->BlockHeight256BytesY,
9517 locals->BlockWidth256BytesC,
9518 locals->BlockHeight256BytesC,
9519
9520 /* Output */
9521 locals->DisplayPipeLineDeliveryTimeLuma,
9522 locals->DisplayPipeLineDeliveryTimeChroma,
9523 locals->DisplayPipeLineDeliveryTimeLumaPrefetch,
9524 locals->DisplayPipeLineDeliveryTimeChromaPrefetch,
9525 locals->DisplayPipeRequestDeliveryTimeLuma,
9526 locals->DisplayPipeRequestDeliveryTimeChroma,
9527 locals->DisplayPipeRequestDeliveryTimeLumaPrefetch,
9528 locals->DisplayPipeRequestDeliveryTimeChromaPrefetch,
9529 locals->CursorRequestDeliveryTime,
9530 locals->CursorRequestDeliveryTimePrefetch);
9531
9532 CalculateMetaAndPTETimes(
9533 locals->use_one_row_for_frame,
9534 mode_lib->ms.num_active_planes,
9535 mode_lib->ms.cache_display_cfg.plane.GPUVMEnable,
9536 mode_lib->ms.ip.meta_chunk_size_kbytes,
9537 mode_lib->ms.ip.min_meta_chunk_size_bytes,
9538 mode_lib->ms.cache_display_cfg.timing.HTotal,
9539 mode_lib->ms.cache_display_cfg.plane.VRatio,
9540 mode_lib->ms.cache_display_cfg.plane.VRatioChroma,
9541 locals->DestinationLinesToRequestRowInVBlank,
9542 locals->DestinationLinesToRequestRowInImmediateFlip,
9543 mode_lib->ms.cache_display_cfg.surface.DCCEnable,
9544 mode_lib->ms.cache_display_cfg.timing.PixelClock,
9545 locals->BytePerPixelY,
9546 locals->BytePerPixelC,
9547 mode_lib->ms.cache_display_cfg.plane.SourceScan,
9548 locals->dpte_row_height,
9549 locals->dpte_row_height_chroma,
9550 locals->meta_row_width,
9551 locals->meta_row_width_chroma,
9552 locals->meta_row_height,
9553 locals->meta_row_height_chroma,
9554 locals->meta_req_width,
9555 locals->meta_req_width_chroma,
9556 locals->meta_req_height,
9557 locals->meta_req_height_chroma,
9558 locals->dpte_group_bytes,
9559 locals->PTERequestSizeY,
9560 locals->PTERequestSizeC,
9561 locals->PixelPTEReqWidthY,
9562 locals->PixelPTEReqHeightY,
9563 locals->PixelPTEReqWidthC,
9564 locals->PixelPTEReqHeightC,
9565 locals->dpte_row_width_luma_ub,
9566 locals->dpte_row_width_chroma_ub,
9567
9568 /* Output */
9569 locals->DST_Y_PER_PTE_ROW_NOM_L,
9570 locals->DST_Y_PER_PTE_ROW_NOM_C,
9571 locals->DST_Y_PER_META_ROW_NOM_L,
9572 locals->DST_Y_PER_META_ROW_NOM_C,
9573 locals->TimePerMetaChunkNominal,
9574 locals->TimePerChromaMetaChunkNominal,
9575 locals->TimePerMetaChunkVBlank,
9576 locals->TimePerChromaMetaChunkVBlank,
9577 locals->TimePerMetaChunkFlip,
9578 locals->TimePerChromaMetaChunkFlip,
9579 locals->time_per_pte_group_nom_luma,
9580 locals->time_per_pte_group_vblank_luma,
9581 locals->time_per_pte_group_flip_luma,
9582 locals->time_per_pte_group_nom_chroma,
9583 locals->time_per_pte_group_vblank_chroma,
9584 locals->time_per_pte_group_flip_chroma);
9585
9586 CalculateVMGroupAndRequestTimes(
9587 mode_lib->ms.num_active_planes,
9588 mode_lib->ms.cache_display_cfg.plane.GPUVMEnable,
9589 mode_lib->ms.cache_display_cfg.plane.GPUVMMaxPageTableLevels,
9590 mode_lib->ms.cache_display_cfg.timing.HTotal,
9591 locals->BytePerPixelC,
9592 locals->DestinationLinesToRequestVMInVBlank,
9593 locals->DestinationLinesToRequestVMInImmediateFlip,
9594 mode_lib->ms.cache_display_cfg.surface.DCCEnable,
9595 mode_lib->ms.cache_display_cfg.timing.PixelClock,
9596 locals->dpte_row_width_luma_ub,
9597 locals->dpte_row_width_chroma_ub,
9598 locals->vm_group_bytes,
9599 locals->dpde0_bytes_per_frame_ub_l,
9600 locals->dpde0_bytes_per_frame_ub_c,
9601 locals->meta_pte_bytes_per_frame_ub_l,
9602 locals->meta_pte_bytes_per_frame_ub_c,
9603
9604 /* Output */
9605 locals->TimePerVMGroupVBlank,
9606 locals->TimePerVMGroupFlip,
9607 locals->TimePerVMRequestVBlank,
9608 locals->TimePerVMRequestFlip);
9609
9610 // Min TTUVBlank
9611 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
9612 if (locals->PrefetchMode[k] == 0) {
9613 locals->MinTTUVBlank[k] = dml_max4(
9614 locals->Watermark.DRAMClockChangeWatermark,
9615 locals->Watermark.FCLKChangeWatermark,
9616 locals->Watermark.StutterEnterPlusExitWatermark,
9617 locals->Watermark.UrgentWatermark);
9618 } else if (locals->PrefetchMode[k] == 1) {
9619 locals->MinTTUVBlank[k] = dml_max3(
9620 locals->Watermark.FCLKChangeWatermark,
9621 locals->Watermark.StutterEnterPlusExitWatermark,
9622 locals->Watermark.UrgentWatermark);
9623 } else if (locals->PrefetchMode[k] == 2) {
9624 locals->MinTTUVBlank[k] = dml_max(
9625 locals->Watermark.StutterEnterPlusExitWatermark,
9626 locals->Watermark.UrgentWatermark);
9627 } else {
9628 locals->MinTTUVBlank[k] = locals->Watermark.UrgentWatermark;
9629 }
9630 if (!mode_lib->ms.cache_display_cfg.plane.DynamicMetadataEnable[k])
9631 locals->MinTTUVBlank[k] = locals->TCalc + locals->MinTTUVBlank[k];
9632 }
9633
9634 // DCC Configuration
9635 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
9636 #ifdef __DML_VBA_DEBUG__
9637 dml_print("DML::%s: Calculate DCC configuration for surface k=%u\n", __func__, k);
9638 #endif
9639 CalculateDCCConfiguration(
9640 mode_lib->ms.cache_display_cfg.surface.DCCEnable[k],
9641 mode_lib->ms.policy.DCCProgrammingAssumesScanDirectionUnknownFinal,
9642 mode_lib->ms.cache_display_cfg.surface.SourcePixelFormat[k],
9643 mode_lib->ms.cache_display_cfg.surface.SurfaceWidthY[k],
9644 mode_lib->ms.cache_display_cfg.surface.SurfaceWidthC[k],
9645 mode_lib->ms.cache_display_cfg.surface.SurfaceHeightY[k],
9646 mode_lib->ms.cache_display_cfg.surface.SurfaceHeightC[k],
9647 mode_lib->ms.NomDETInKByte,
9648 locals->BlockHeight256BytesY[k],
9649 locals->BlockHeight256BytesC[k],
9650 mode_lib->ms.cache_display_cfg.surface.SurfaceTiling[k],
9651 locals->BytePerPixelY[k],
9652 locals->BytePerPixelC[k],
9653 locals->BytePerPixelDETY[k],
9654 locals->BytePerPixelDETC[k],
9655 mode_lib->ms.cache_display_cfg.plane.SourceScan[k],
9656 /* Output */
9657 &locals->DCCYMaxUncompressedBlock[k],
9658 &locals->DCCCMaxUncompressedBlock[k],
9659 &locals->DCCYMaxCompressedBlock[k],
9660 &locals->DCCCMaxCompressedBlock[k],
9661 &locals->DCCYIndependentBlock[k],
9662 &locals->DCCCIndependentBlock[k]);
9663 }
9664
9665 // VStartup Adjustment
9666 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
9667 s->Tvstartup_margin = (s->MaxVStartupLines[k] - locals->VStartupMin[k]) * mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k];
9668 #ifdef __DML_VBA_DEBUG__
9669 dml_print("DML::%s: k=%u, MinTTUVBlank = %f (before vstartup margin)\n", __func__, k, locals->MinTTUVBlank[k]);
9670 #endif
9671
9672 locals->MinTTUVBlank[k] = locals->MinTTUVBlank[k] + s->Tvstartup_margin;
9673
9674 #ifdef __DML_VBA_DEBUG__
9675 dml_print("DML::%s: k=%u, Tvstartup_margin = %f\n", __func__, k, s->Tvstartup_margin);
9676 dml_print("DML::%s: k=%u, MaxVStartupLines = %u\n", __func__, k, s->MaxVStartupLines[k]);
9677 dml_print("DML::%s: k=%u, MinTTUVBlank = %f\n", __func__, k, locals->MinTTUVBlank[k]);
9678 #endif
9679
9680 locals->Tdmdl[k] = locals->Tdmdl[k] + s->Tvstartup_margin;
9681 if (mode_lib->ms.cache_display_cfg.plane.DynamicMetadataEnable[k] && mode_lib->ms.ip.dynamic_metadata_vm_enabled) {
9682 locals->Tdmdl_vm[k] = locals->Tdmdl_vm[k] + s->Tvstartup_margin;
9683 }
9684
9685 isInterlaceTiming = (mode_lib->ms.cache_display_cfg.timing.Interlace[k] && !mode_lib->ms.ip.ptoi_supported);
9686
9687 // The actual positioning of the vstartup
9688 locals->VStartup[k] = (isInterlaceTiming ? (2 * s->MaxVStartupLines[k]) : s->MaxVStartupLines[k]);
9689
9690 s->dlg_vblank_start = ((isInterlaceTiming ? dml_floor((mode_lib->ms.cache_display_cfg.timing.VTotal[k] - mode_lib->ms.cache_display_cfg.timing.VFrontPorch[k]) / 2.0, 1.0) :
9691 mode_lib->ms.cache_display_cfg.timing.VTotal[k]) - mode_lib->ms.cache_display_cfg.timing.VFrontPorch[k]);
9692 s->LSetup = dml_floor(4.0 * locals->TSetup[k] / ((dml_float_t) mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k]), 1.0) / 4.0;
9693 s->blank_lines_remaining = (mode_lib->ms.cache_display_cfg.timing.VTotal[k] - mode_lib->ms.cache_display_cfg.timing.VActive[k]) - locals->VStartup[k];
9694
9695 if (s->blank_lines_remaining < 0) {
9696 dml_print("ERROR: Vstartup is larger than vblank!?\n");
9697 s->blank_lines_remaining = 0;
9698 ASSERT(0);
9699 }
9700 locals->MIN_DST_Y_NEXT_START[k] = s->dlg_vblank_start + s->blank_lines_remaining + s->LSetup;
9701
9702 // debug only
9703 s->old_MIN_DST_Y_NEXT_START = ((isInterlaceTiming ? dml_floor((mode_lib->ms.cache_display_cfg.timing.VTotal[k] - mode_lib->ms.cache_display_cfg.timing.VFrontPorch[k]) / 2.0, 1.0) :
9704 mode_lib->ms.cache_display_cfg.timing.VTotal[k]) - mode_lib->ms.cache_display_cfg.timing.VFrontPorch[k])
9705 + dml_max(1.0, dml_ceil((dml_float_t) locals->WritebackDelay[k] / ((dml_float_t) mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k]), 1.0))
9706 + dml_floor(4.0 * locals->TSetup[k] / ((dml_float_t) mode_lib->ms.cache_display_cfg.timing.HTotal[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k]), 1.0) / 4.0;
9707
9708 if (((locals->VUpdateOffsetPix[k] + locals->VUpdateWidthPix[k] + locals->VReadyOffsetPix[k]) / (double) mode_lib->ms.cache_display_cfg.timing.HTotal[k]) <=
9709 (isInterlaceTiming ?
9710 dml_floor((mode_lib->ms.cache_display_cfg.timing.VTotal[k] - mode_lib->ms.cache_display_cfg.timing.VActive[k] - mode_lib->ms.cache_display_cfg.timing.VFrontPorch[k] - locals->VStartup[k]) / 2.0, 1.0) :
9711 (int) (mode_lib->ms.cache_display_cfg.timing.VTotal[k] - mode_lib->ms.cache_display_cfg.timing.VActive[k] - mode_lib->ms.cache_display_cfg.timing.VFrontPorch[k] - locals->VStartup[k]))) {
9712 locals->VREADY_AT_OR_AFTER_VSYNC[k] = true;
9713 } else {
9714 locals->VREADY_AT_OR_AFTER_VSYNC[k] = false;
9715 }
9716 #ifdef __DML_VBA_DEBUG__
9717 dml_print("DML::%s: k=%u, VStartup = %u (max)\n", __func__, k, locals->VStartup[k]);
9718 dml_print("DML::%s: k=%u, VStartupMin = %u (max)\n", __func__, k, locals->VStartupMin[k]);
9719 dml_print("DML::%s: k=%u, VUpdateOffsetPix = %u\n", __func__, k, locals->VUpdateOffsetPix[k]);
9720 dml_print("DML::%s: k=%u, VUpdateWidthPix = %u\n", __func__, k, locals->VUpdateWidthPix[k]);
9721 dml_print("DML::%s: k=%u, VReadyOffsetPix = %u\n", __func__, k, locals->VReadyOffsetPix[k]);
9722 dml_print("DML::%s: k=%u, HTotal = %u\n", __func__, k, mode_lib->ms.cache_display_cfg.timing.HTotal[k]);
9723 dml_print("DML::%s: k=%u, VTotal = %u\n", __func__, k, mode_lib->ms.cache_display_cfg.timing.VTotal[k]);
9724 dml_print("DML::%s: k=%u, VActive = %u\n", __func__, k, mode_lib->ms.cache_display_cfg.timing.VActive[k]);
9725 dml_print("DML::%s: k=%u, VFrontPorch = %u\n", __func__, k, mode_lib->ms.cache_display_cfg.timing.VFrontPorch[k]);
9726 dml_print("DML::%s: k=%u, TSetup = %f\n", __func__, k, locals->TSetup[k]);
9727 dml_print("DML::%s: k=%u, MIN_DST_Y_NEXT_START = %f\n", __func__, k, locals->MIN_DST_Y_NEXT_START[k]);
9728 dml_print("DML::%s: k=%u, MIN_DST_Y_NEXT_START = %f (old)\n", __func__, k, s->old_MIN_DST_Y_NEXT_START);
9729 dml_print("DML::%s: k=%u, VREADY_AT_OR_AFTER_VSYNC = %u\n", __func__, k, locals->VREADY_AT_OR_AFTER_VSYNC[k]);
9730 #endif
9731 }
9732
9733 //Maximum Bandwidth Used
9734 s->TotalWRBandwidth = 0;
9735 s->WRBandwidth = 0;
9736 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
9737 if (mode_lib->ms.cache_display_cfg.writeback.WritebackEnable[k] == true && mode_lib->ms.cache_display_cfg.writeback.WritebackPixelFormat[k] == dml_444_32) {
9738 s->WRBandwidth = mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationWidth[k] * mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationHeight[k] /
9739 (mode_lib->ms.cache_display_cfg.timing.HTotal[k] * mode_lib->ms.cache_display_cfg.writeback.WritebackSourceHeight[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k]) * 4;
9740 } else if (mode_lib->ms.cache_display_cfg.writeback.WritebackEnable[k] == true) {
9741 s->WRBandwidth = mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationWidth[k] * mode_lib->ms.cache_display_cfg.writeback.WritebackDestinationHeight[k] /
9742 (mode_lib->ms.cache_display_cfg.timing.HTotal[k] * mode_lib->ms.cache_display_cfg.writeback.WritebackSourceHeight[k] / mode_lib->ms.cache_display_cfg.timing.PixelClock[k]) * 8;
9743 }
9744 s->TotalWRBandwidth = s->TotalWRBandwidth + s->WRBandwidth;
9745 }
9746
9747 locals->TotalDataReadBandwidth = 0;
9748 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
9749 locals->TotalDataReadBandwidth = locals->TotalDataReadBandwidth + locals->ReadBandwidthSurfaceLuma[k] + locals->ReadBandwidthSurfaceChroma[k];
9750
9751 #ifdef __DML_VBA_DEBUG__
9752 dml_print("DML::%s: k=%u, TotalDataReadBandwidth = %f\n", __func__, k, locals->TotalDataReadBandwidth);
9753 dml_print("DML::%s: k=%u, ReadBandwidthSurfaceLuma = %f\n", __func__, k, locals->ReadBandwidthSurfaceLuma[k]);
9754 dml_print("DML::%s: k=%u, ReadBandwidthSurfaceChroma = %f\n", __func__, k, locals->ReadBandwidthSurfaceChroma[k]);
9755 #endif
9756 }
9757
9758 locals->TotalDataReadBandwidthNotIncludingMALLPrefetch = 0;
9759 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
9760 if (mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange[k] != dml_use_mall_pstate_change_phantom_pipe) {
9761 locals->TotalDataReadBandwidthNotIncludingMALLPrefetch = locals->TotalDataReadBandwidthNotIncludingMALLPrefetch
9762 + locals->ReadBandwidthSurfaceLuma[k] + locals->ReadBandwidthSurfaceChroma[k];
9763 }
9764 }
9765
9766 CalculateStutterEfficiency_params->CompressedBufferSizeInkByte = locals->CompressedBufferSizeInkByte;
9767 CalculateStutterEfficiency_params->UseMALLForPStateChange = mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange;
9768 CalculateStutterEfficiency_params->UnboundedRequestEnabled = locals->UnboundedRequestEnabled;
9769 CalculateStutterEfficiency_params->MetaFIFOSizeInKEntries = mode_lib->ms.ip.meta_fifo_size_in_kentries;
9770 CalculateStutterEfficiency_params->ZeroSizeBufferEntries = mode_lib->ms.ip.zero_size_buffer_entries;
9771 CalculateStutterEfficiency_params->PixelChunkSizeInKByte = mode_lib->ms.ip.pixel_chunk_size_kbytes;
9772 CalculateStutterEfficiency_params->NumberOfActiveSurfaces = mode_lib->ms.num_active_planes;
9773 CalculateStutterEfficiency_params->ROBBufferSizeInKByte = mode_lib->ms.ip.rob_buffer_size_kbytes;
9774 CalculateStutterEfficiency_params->TotalDataReadBandwidth = locals->TotalDataReadBandwidth;
9775 CalculateStutterEfficiency_params->DCFCLK = locals->Dcfclk;
9776 CalculateStutterEfficiency_params->ReturnBW = mode_lib->ms.ReturnBW;
9777 CalculateStutterEfficiency_params->CompbufReservedSpace64B = locals->compbuf_reserved_space_64b;
9778 CalculateStutterEfficiency_params->CompbufReservedSpaceZs = locals->compbuf_reserved_space_zs;
9779 CalculateStutterEfficiency_params->SRExitTime = mode_lib->ms.state.sr_exit_time_us;
9780 CalculateStutterEfficiency_params->SRExitZ8Time = mode_lib->ms.state.sr_exit_z8_time_us;
9781 CalculateStutterEfficiency_params->SynchronizeTimingsFinal = mode_lib->ms.policy.SynchronizeTimingsFinal;
9782 CalculateStutterEfficiency_params->BlendingAndTiming = mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming;
9783 CalculateStutterEfficiency_params->StutterEnterPlusExitWatermark = locals->Watermark.StutterEnterPlusExitWatermark;
9784 CalculateStutterEfficiency_params->Z8StutterEnterPlusExitWatermark = locals->Watermark.Z8StutterEnterPlusExitWatermark;
9785 CalculateStutterEfficiency_params->ProgressiveToInterlaceUnitInOPP = mode_lib->ms.ip.ptoi_supported;
9786 CalculateStutterEfficiency_params->Interlace = mode_lib->ms.cache_display_cfg.timing.Interlace;
9787 CalculateStutterEfficiency_params->MinTTUVBlank = locals->MinTTUVBlank;
9788 CalculateStutterEfficiency_params->DPPPerSurface = mode_lib->ms.cache_display_cfg.hw.DPPPerSurface;
9789 CalculateStutterEfficiency_params->DETBufferSizeY = locals->DETBufferSizeY;
9790 CalculateStutterEfficiency_params->BytePerPixelY = locals->BytePerPixelY;
9791 CalculateStutterEfficiency_params->BytePerPixelDETY = locals->BytePerPixelDETY;
9792 CalculateStutterEfficiency_params->SwathWidthY = locals->SwathWidthY;
9793 CalculateStutterEfficiency_params->SwathHeightY = locals->SwathHeightY;
9794 CalculateStutterEfficiency_params->SwathHeightC = locals->SwathHeightC;
9795 CalculateStutterEfficiency_params->NetDCCRateLuma = mode_lib->ms.cache_display_cfg.surface.DCCRateLuma;
9796 CalculateStutterEfficiency_params->NetDCCRateChroma = mode_lib->ms.cache_display_cfg.surface.DCCRateChroma;
9797 CalculateStutterEfficiency_params->DCCFractionOfZeroSizeRequestsLuma = mode_lib->ms.cache_display_cfg.surface.DCCFractionOfZeroSizeRequestsLuma;
9798 CalculateStutterEfficiency_params->DCCFractionOfZeroSizeRequestsChroma = mode_lib->ms.cache_display_cfg.surface.DCCFractionOfZeroSizeRequestsChroma;
9799 CalculateStutterEfficiency_params->HTotal = mode_lib->ms.cache_display_cfg.timing.HTotal;
9800 CalculateStutterEfficiency_params->VTotal = mode_lib->ms.cache_display_cfg.timing.VTotal;
9801 CalculateStutterEfficiency_params->PixelClock = mode_lib->ms.cache_display_cfg.timing.PixelClock;
9802 CalculateStutterEfficiency_params->VRatio = mode_lib->ms.cache_display_cfg.plane.VRatio;
9803 CalculateStutterEfficiency_params->SourceScan = mode_lib->ms.cache_display_cfg.plane.SourceScan;
9804 CalculateStutterEfficiency_params->BlockHeight256BytesY = locals->BlockHeight256BytesY;
9805 CalculateStutterEfficiency_params->BlockWidth256BytesY = locals->BlockWidth256BytesY;
9806 CalculateStutterEfficiency_params->BlockHeight256BytesC = locals->BlockHeight256BytesC;
9807 CalculateStutterEfficiency_params->BlockWidth256BytesC = locals->BlockWidth256BytesC;
9808 CalculateStutterEfficiency_params->DCCYMaxUncompressedBlock = locals->DCCYMaxUncompressedBlock;
9809 CalculateStutterEfficiency_params->DCCCMaxUncompressedBlock = locals->DCCCMaxUncompressedBlock;
9810 CalculateStutterEfficiency_params->VActive = mode_lib->ms.cache_display_cfg.timing.VActive;
9811 CalculateStutterEfficiency_params->DCCEnable = mode_lib->ms.cache_display_cfg.surface.DCCEnable;
9812 CalculateStutterEfficiency_params->WritebackEnable = mode_lib->ms.cache_display_cfg.writeback.WritebackEnable;
9813 CalculateStutterEfficiency_params->ReadBandwidthSurfaceLuma = locals->ReadBandwidthSurfaceLuma;
9814 CalculateStutterEfficiency_params->ReadBandwidthSurfaceChroma = locals->ReadBandwidthSurfaceChroma;
9815 CalculateStutterEfficiency_params->meta_row_bw = locals->meta_row_bw;
9816 CalculateStutterEfficiency_params->dpte_row_bw = locals->dpte_row_bw;
9817 CalculateStutterEfficiency_params->StutterEfficiencyNotIncludingVBlank = &locals->StutterEfficiencyNotIncludingVBlank;
9818 CalculateStutterEfficiency_params->StutterEfficiency = &locals->StutterEfficiency;
9819 CalculateStutterEfficiency_params->NumberOfStutterBurstsPerFrame = &locals->NumberOfStutterBurstsPerFrame;
9820 CalculateStutterEfficiency_params->Z8StutterEfficiencyNotIncludingVBlank = &locals->Z8StutterEfficiencyNotIncludingVBlank;
9821 CalculateStutterEfficiency_params->Z8StutterEfficiency = &locals->Z8StutterEfficiency;
9822 CalculateStutterEfficiency_params->Z8NumberOfStutterBurstsPerFrame = &locals->Z8NumberOfStutterBurstsPerFrame;
9823 CalculateStutterEfficiency_params->StutterPeriod = &locals->StutterPeriod;
9824 CalculateStutterEfficiency_params->DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = &locals->DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE;
9825
9826 // Stutter Efficiency
9827 CalculateStutterEfficiency(&mode_lib->scratch,
9828 CalculateStutterEfficiency_params);
9829
9830 #ifdef __DML_VBA_ALLOW_DELTA__
9831 {
9832 dml_float_t dummy_single[2];
9833 dml_uint_t dummy_integer[1];
9834 dml_bool_t dummy_boolean[1];
9835
9836 // Calculate z8 stutter eff assuming 0 reserved space
9837 CalculateStutterEfficiency(
9838 locals->CompressedBufferSizeInkByte,
9839 mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange,
9840 locals->UnboundedRequestEnabled,
9841 mode_lib->ms.ip.meta_fifo_size_in_kentries,
9842 mode_lib->ms.ip.zero_size_buffer_entries,
9843 mode_lib->ms.ip.pixel_chunk_size_kbytes,
9844 mode_lib->ms.num_active_planes,
9845 mode_lib->ms.ip.rob_buffer_size_kbytes,
9846 locals->TotalDataReadBandwidth,
9847 locals->Dcfclk,
9848 mode_lib->ms.ReturnBW,
9849 0, //mode_lib->ms.ip.compbuf_reserved_space_64b,
9850 0, //mode_lib->ms.ip.compbuf_reserved_space_zs,
9851 mode_lib->ms.state.sr_exit_time_us,
9852 mode_lib->ms.state.sr_exit_z8_time_us,
9853 mode_lib->ms.policy.SynchronizeTimingsFinal,
9854 mode_lib->ms.cache_display_cfg.plane.BlendingAndTiming,
9855 locals->Watermark.StutterEnterPlusExitWatermark,
9856 locals->Watermark.Z8StutterEnterPlusExitWatermark,
9857 mode_lib->ms.ip.ptoi_supported,
9858 mode_lib->ms.cache_display_cfg.timing.Interlace,
9859 locals->MinTTUVBlank,
9860 mode_lib->ms.cache_display_cfg.hw.DPPPerSurface,
9861 mode_lib->ms.DETBufferSizeY,
9862 locals->BytePerPixelY,
9863 locals->BytePerPixelDETY,
9864 locals->SwathWidthY,
9865 mode_lib->ms.SwathHeightY,
9866 mode_lib->ms.SwathHeightC,
9867 mode_lib->ms.cache_display_cfg.surface.DCCRateLuma,
9868 mode_lib->ms.cache_display_cfg.surface.DCCRateChroma,
9869 mode_lib->ms.cache_display_cfg.surface.DCCFractionOfZeroSizeRequestsLuma,
9870 mode_lib->ms.cache_display_cfg.surface.DCCFractionOfZeroSizeRequestsChroma,
9871 mode_lib->ms.cache_display_cfg.timing.HTotal,
9872 mode_lib->ms.cache_display_cfg.timing.VTotal,
9873 mode_lib->ms.cache_display_cfg.timing.PixelClock,
9874 mode_lib->ms.cache_display_cfg.plane.VRatio,
9875 mode_lib->ms.cache_display_cfg.plane.SourceScan,
9876 locals->BlockHeight256BytesY,
9877 locals->BlockWidth256BytesY,
9878 locals->BlockHeight256BytesC,
9879 locals->BlockWidth256BytesC,
9880 locals->DCCYMaxUncompressedBlock,
9881 locals->DCCCMaxUncompressedBlock,
9882 mode_lib->ms.cache_display_cfg.timing.VActive,
9883 mode_lib->ms.cache_display_cfg.surface.DCCEnable,
9884 mode_lib->ms.cache_display_cfg.writeback.WritebackEnable,
9885 locals->ReadBandwidthSurfaceLuma,
9886 locals->ReadBandwidthSurfaceChroma,
9887 locals->meta_row_bw,
9888 locals->dpte_row_bw,
9889
9890 /* Output */
9891 &dummy_single[0],
9892 &dummy_single[1],
9893 &dummy_integer[0],
9894 &locals->Z8StutterEfficiencyNotIncludingVBlankBestCase,
9895 &locals->Z8StutterEfficiencyBestCase,
9896 &locals->Z8NumberOfStutterBurstsPerFrameBestCase,
9897 &locals->StutterPeriodBestCase,
9898 &dummy_boolean[0]);
9899 }
9900 #else
9901 locals->Z8StutterEfficiencyNotIncludingVBlankBestCase = locals->Z8StutterEfficiencyNotIncludingVBlank;
9902 locals->Z8StutterEfficiencyBestCase = locals->Z8StutterEfficiency;
9903 locals->Z8NumberOfStutterBurstsPerFrameBestCase = locals->Z8NumberOfStutterBurstsPerFrame;
9904 locals->StutterPeriodBestCase = locals->StutterPeriod;
9905 #endif
9906
9907 #ifdef __DML_VBA_DEBUG__
9908 dml_print("DML::%s: --- END --- \n", __func__);
9909 #endif
9910 } // dml_core_mode_programming
9911
9912 /// Function: dml_core_get_row_heights
9913 /// @brief Get row height for DPTE and META with minimal input.
dml_core_get_row_heights(dml_uint_t * dpte_row_height,dml_uint_t * meta_row_height,const struct display_mode_lib_st * mode_lib,dml_bool_t is_plane1,enum dml_source_format_class SourcePixelFormat,enum dml_swizzle_mode SurfaceTiling,enum dml_rotation_angle ScanDirection,dml_uint_t pitch,dml_uint_t GPUVMMinPageSizeKBytes)9914 void dml_core_get_row_heights(
9915 dml_uint_t *dpte_row_height,
9916 dml_uint_t *meta_row_height,
9917 const struct display_mode_lib_st *mode_lib,
9918 dml_bool_t is_plane1,
9919 enum dml_source_format_class SourcePixelFormat,
9920 enum dml_swizzle_mode SurfaceTiling,
9921 enum dml_rotation_angle ScanDirection,
9922 dml_uint_t pitch,
9923 dml_uint_t GPUVMMinPageSizeKBytes)
9924 {
9925 dml_uint_t BytePerPixelY;
9926 dml_uint_t BytePerPixelC;
9927 dml_float_t BytePerPixelInDETY;
9928 dml_float_t BytePerPixelInDETC;
9929 dml_uint_t BlockHeight256BytesY;
9930 dml_uint_t BlockHeight256BytesC;
9931 dml_uint_t BlockWidth256BytesY;
9932 dml_uint_t BlockWidth256BytesC;
9933 dml_uint_t MacroTileWidthY;
9934 dml_uint_t MacroTileWidthC;
9935 dml_uint_t MacroTileHeightY;
9936 dml_uint_t MacroTileHeightC;
9937
9938 dml_uint_t BytePerPixel;
9939 dml_uint_t BlockHeight256Bytes;
9940 dml_uint_t BlockWidth256Bytes;
9941 dml_uint_t MacroTileWidth;
9942 dml_uint_t MacroTileHeight;
9943 dml_uint_t PTEBufferSizeInRequests;
9944
9945 dml_uint_t dummy_integer[16];
9946
9947 CalculateBytePerPixelAndBlockSizes(
9948 SourcePixelFormat,
9949 SurfaceTiling,
9950
9951 /* Output */
9952 &BytePerPixelY,
9953 &BytePerPixelC,
9954 &BytePerPixelInDETY,
9955 &BytePerPixelInDETC,
9956 &BlockHeight256BytesY,
9957 &BlockHeight256BytesC,
9958 &BlockWidth256BytesY,
9959 &BlockWidth256BytesC,
9960 &MacroTileHeightY,
9961 &MacroTileHeightC,
9962 &MacroTileWidthY,
9963 &MacroTileWidthC);
9964
9965 BytePerPixel = is_plane1 ? BytePerPixelC : BytePerPixelY;
9966 BlockHeight256Bytes = is_plane1 ? BlockHeight256BytesC : BlockHeight256BytesY;
9967 BlockWidth256Bytes = is_plane1 ? BlockWidth256BytesC : BlockWidth256BytesY;
9968 MacroTileWidth = is_plane1 ? MacroTileWidthC : MacroTileWidthY;
9969 MacroTileHeight = is_plane1 ? MacroTileHeightC : MacroTileHeightY;
9970 PTEBufferSizeInRequests = is_plane1 ? mode_lib->ip.dpte_buffer_size_in_pte_reqs_chroma : mode_lib->ip.dpte_buffer_size_in_pte_reqs_luma;
9971 #ifdef __DML_RQ_DLG_CALC_DEBUG__
9972 dml_print("DML_DLG: %s: is_plane1 = %u\n", __func__, is_plane1);
9973 dml_print("DML_DLG: %s: BytePerPixel = %u\n", __func__, BytePerPixel);
9974 dml_print("DML_DLG: %s: BlockHeight256Bytes = %u\n", __func__, BlockHeight256Bytes);
9975 dml_print("DML_DLG: %s: BlockWidth256Bytes = %u\n", __func__, BlockWidth256Bytes);
9976 dml_print("DML_DLG: %s: MacroTileWidth = %u\n", __func__, MacroTileWidth);
9977 dml_print("DML_DLG: %s: MacroTileHeight = %u\n", __func__, MacroTileHeight);
9978 dml_print("DML_DLG: %s: PTEBufferSizeInRequests = %u\n", __func__, PTEBufferSizeInRequests);
9979 dml_print("DML_DLG: %s: dpte_buffer_size_in_pte_reqs_luma = %u\n", __func__, mode_lib->ip.dpte_buffer_size_in_pte_reqs_luma);
9980 dml_print("DML_DLG: %s: dpte_buffer_size_in_pte_reqs_chroma = %u\n", __func__, mode_lib->ip.dpte_buffer_size_in_pte_reqs_chroma);
9981 dml_print("DML_DLG: %s: GPUVMMinPageSizeKBytes = %u\n", __func__, GPUVMMinPageSizeKBytes);
9982 #endif
9983
9984 // just supply with enough parameters to calculate meta and dte
9985 CalculateVMAndRowBytes(
9986 0, // dml_bool_t ViewportStationary,
9987 1, // dml_bool_t DCCEnable,
9988 1, // dml_uint_t NumberOfDPPs,
9989 BlockHeight256Bytes,
9990 BlockWidth256Bytes,
9991 SourcePixelFormat,
9992 SurfaceTiling,
9993 BytePerPixel,
9994 ScanDirection,
9995 0, // dml_uint_t SwathWidth,
9996 0, // dml_uint_t ViewportHeight, (Note: DML calculates one_row_for_frame height regardless, would need test input if that height is useful)
9997 0, // dml_uint_t ViewportXStart,
9998 0, // dml_uint_t ViewportYStart,
9999 1, // dml_bool_t GPUVMEnable,
10000 4, // dml_uint_t GPUVMMaxPageTableLevels,
10001 GPUVMMinPageSizeKBytes,
10002 PTEBufferSizeInRequests,
10003 pitch,
10004 0, // dml_uint_t DCCMetaPitch,
10005 MacroTileWidth,
10006 MacroTileHeight,
10007
10008 // /* Output */
10009 &dummy_integer[0], // dml_uint_t *MetaRowByte,
10010 &dummy_integer[1], // dml_uint_t *PixelPTEBytesPerRow,
10011 &dummy_integer[2], // dml_uint_t *PixelPTEBytesPerRowStorage,
10012 &dummy_integer[3], // dml_uint_t *dpte_row_width_ub,
10013 dpte_row_height,
10014 &dummy_integer[4], // dml_uint_t *dpte_row_height_linear
10015 &dummy_integer[5], // dml_uint_t *PixelPTEBytesPerRow_one_row_per_frame,
10016 &dummy_integer[6], // dml_uint_t *dpte_row_width_ub_one_row_per_frame,
10017 &dummy_integer[7], // dml_uint_t *dpte_row_height_one_row_per_frame,
10018 &dummy_integer[8], // dml_uint_t *MetaRequestWidth,
10019 &dummy_integer[9], // dml_uint_t *MetaRequestHeight,
10020 &dummy_integer[10], // dml_uint_t *meta_row_width,
10021 meta_row_height,
10022 &dummy_integer[11], // dml_uint_t *PixelPTEReqWidth,
10023 &dummy_integer[12], // dml_uint_t *PixelPTEReqHeight,
10024 &dummy_integer[13], // dml_uint_t *PTERequestSize,
10025 &dummy_integer[14], // dml_uint_t *DPDE0BytesFrame,
10026 &dummy_integer[15]); // dml_uint_t *MetaPTEBytesFrame)
10027
10028 #ifdef __DML_RQ_DLG_CALC_DEBUG__
10029 dml_print("DML_DLG: %s: dpte_row_height = %u\n", __func__, *dpte_row_height);
10030 dml_print("DML_DLG: %s: meta_row_height = %u\n", __func__, *meta_row_height);
10031 #endif
10032 }
10033
dml_get_soc_state_bounding_box(const struct soc_states_st * states,dml_uint_t state_idx)10034 static struct soc_state_bounding_box_st dml_get_soc_state_bounding_box(
10035 const struct soc_states_st *states,
10036 dml_uint_t state_idx)
10037 {
10038 dml_print("DML::%s: state_idx=%u (num_states=%u)\n", __func__, state_idx, states->num_states);
10039
10040 if (state_idx >= (dml_uint_t)states->num_states) {
10041 dml_print("DML::%s: ERROR: Invalid state_idx=%u! num_states=%u\n", __func__, state_idx, states->num_states);
10042 ASSERT(0);
10043 }
10044 return (states->state_array[state_idx]);
10045 }
10046
10047 /// @brief Copy the parameters to a calculation struct, it actually only need when the DML needs to have
10048 /// the intelligence to re-calculate when any of display cfg, bbox, or policy changes since last calculated.
10049 ///
cache_ip_soc_cfg(struct display_mode_lib_st * mode_lib,dml_uint_t state_idx)10050 static void cache_ip_soc_cfg(struct display_mode_lib_st *mode_lib,
10051 dml_uint_t state_idx)
10052 {
10053 mode_lib->ms.state_idx = state_idx;
10054 mode_lib->ms.max_state_idx = mode_lib->states.num_states - 1;
10055 mode_lib->ms.soc = mode_lib->soc;
10056 mode_lib->ms.ip = mode_lib->ip;
10057 mode_lib->ms.policy = mode_lib->policy;
10058 mode_lib->ms.state = dml_get_soc_state_bounding_box(&mode_lib->states, state_idx);
10059 mode_lib->ms.max_state = dml_get_soc_state_bounding_box(&mode_lib->states, mode_lib->states.num_states - 1);
10060 }
10061
cache_display_cfg(struct display_mode_lib_st * mode_lib,const struct dml_display_cfg_st * display_cfg)10062 static void cache_display_cfg(struct display_mode_lib_st *mode_lib,
10063 const struct dml_display_cfg_st *display_cfg)
10064 {
10065 mode_lib->ms.cache_display_cfg = *display_cfg;
10066 }
10067
fetch_socbb_params(struct display_mode_lib_st * mode_lib)10068 static void fetch_socbb_params(struct display_mode_lib_st *mode_lib)
10069 {
10070 struct soc_state_bounding_box_st *state = &mode_lib->ms.state;
10071
10072 // Default values, SOCCLK, DRAMSpeed, and FabricClock will be reassigned to the same state value in mode_check step
10073 // If UseMinimumRequiredDCFCLK is used, the DCFCLK will be the min dcflk for the mode support
10074 mode_lib->ms.SOCCLK = (dml_float_t)state->socclk_mhz;
10075 mode_lib->ms.DRAMSpeed = (dml_float_t)state->dram_speed_mts;
10076 mode_lib->ms.FabricClock = (dml_float_t)state->fabricclk_mhz;
10077 mode_lib->ms.DCFCLK = (dml_float_t)state->dcfclk_mhz;
10078 }
10079
10080 /// @brief Use display_cfg directly for mode_support calculation
10081 /// Calculated values and informational output are stored in mode_lib.vba data struct
10082 /// The display configuration is described with pipes struct and num_pipes
10083 /// This function is used when physical resource mapping is not finalized (for example,
10084 /// don't know how many pipes to represent a surface)
10085 /// @param mode_lib Contains the bounding box and policy setting.
10086 /// @param state_idx Power state index
10087 /// @param display_cfg Display configurations. A display
dml_mode_support(struct display_mode_lib_st * mode_lib,dml_uint_t state_idx,const struct dml_display_cfg_st * display_cfg)10088 dml_bool_t dml_mode_support(
10089 struct display_mode_lib_st *mode_lib,
10090 dml_uint_t state_idx,
10091 const struct dml_display_cfg_st *display_cfg)
10092 {
10093 dml_bool_t is_mode_support;
10094
10095 dml_print("DML::%s: ------------- START ----------\n", __func__);
10096 cache_ip_soc_cfg(mode_lib, state_idx);
10097 cache_display_cfg(mode_lib, display_cfg);
10098
10099 fetch_socbb_params(mode_lib);
10100
10101 dml_print("DML::%s: state_idx = %u\n", __func__, state_idx);
10102
10103 is_mode_support = dml_core_mode_support(mode_lib);
10104
10105 dml_print("DML::%s: is_mode_support = %u\n", __func__, is_mode_support);
10106 dml_print("DML::%s: ------------- DONE ----------\n", __func__);
10107 return is_mode_support;
10108 }
10109
10110 /// @Brief A function to calculate the programming values for DCN DCHUB (Assume mode is supported)
10111 /// The output will be stored in the mode_lib.mp (mode_program_st) data struct and those can be accessed via the getter functions
10112 /// Calculated values include: watermarks, dlg, rq reg, different clock frequency
10113 /// This function returns 1 when there is no error.
10114 /// Note: In this function, it is assumed that DCFCLK, SOCCLK freq are the state values, and mode_program will just use the DML calculated DPPCLK and DISPCLK
10115 /// @param mode_lib mode_lib data struct that house all the input/output/bbox and calculation values.
10116 /// @param state_idx Power state idx chosen
10117 /// @param display_cfg Display Configuration
10118 /// @param call_standalone Calling mode_programming without calling mode support. Some of the "support" struct member will be pre-calculated before doing mode programming
10119 /// TODO: Add clk_cfg input, could be useful for standalone mode
dml_mode_programming(struct display_mode_lib_st * mode_lib,dml_uint_t state_idx,const struct dml_display_cfg_st * display_cfg,bool call_standalone)10120 dml_bool_t dml_mode_programming(
10121 struct display_mode_lib_st *mode_lib,
10122 dml_uint_t state_idx,
10123 const struct dml_display_cfg_st *display_cfg,
10124 bool call_standalone)
10125 {
10126 struct dml_clk_cfg_st clk_cfg;
10127 memset(&clk_cfg, 0, sizeof(clk_cfg));
10128
10129 clk_cfg.dcfclk_option = dml_use_required_freq;
10130 clk_cfg.dispclk_option = dml_use_required_freq;
10131 for (dml_uint_t k = 0; k < __DML_NUM_PLANES__; ++k)
10132 clk_cfg.dppclk_option[k] = dml_use_required_freq;
10133
10134 dml_print("DML::%s: ------------- START ----------\n", __func__);
10135 dml_print("DML::%s: state_idx = %u\n", __func__, state_idx);
10136 dml_print("DML::%s: call_standalone = %u\n", __func__, call_standalone);
10137
10138 cache_ip_soc_cfg(mode_lib, state_idx);
10139 cache_display_cfg(mode_lib, display_cfg);
10140
10141 fetch_socbb_params(mode_lib);
10142 if (call_standalone) {
10143 mode_lib->ms.support.ImmediateFlipSupport = 1; // assume mode support say immediate flip ok at max state/combine
10144 dml_core_mode_support_partial(mode_lib);
10145 }
10146
10147 dml_core_mode_programming(mode_lib, &clk_cfg);
10148
10149 dml_print("DML::%s: ------------- DONE ----------\n", __func__);
10150 dml_print("DML::%s: PrefetchAndImmediateFlipSupported = %0d\n", __func__, mode_lib->mp.PrefetchAndImmediateFlipSupported);
10151 return mode_lib->mp.PrefetchAndImmediateFlipSupported;
10152 }
10153
mode_support_pwr_states(dml_uint_t * lowest_state_idx,struct display_mode_lib_st * mode_lib,const struct dml_display_cfg_st * display_cfg,dml_uint_t start_state_idx,dml_uint_t end_state_idx)10154 static dml_uint_t mode_support_pwr_states(
10155 dml_uint_t *lowest_state_idx,
10156 struct display_mode_lib_st *mode_lib,
10157 const struct dml_display_cfg_st *display_cfg,
10158 dml_uint_t start_state_idx,
10159 dml_uint_t end_state_idx)
10160 {
10161 dml_uint_t state_idx = 0;
10162 dml_bool_t mode_is_supported = 0;
10163 *lowest_state_idx = end_state_idx;
10164
10165 if (end_state_idx < start_state_idx)
10166 ASSERT(0);
10167
10168 if (end_state_idx >= mode_lib->states.num_states) // idx is 0-based
10169 ASSERT(0);
10170
10171 for (state_idx = start_state_idx; state_idx <= end_state_idx; state_idx++) {
10172 if (dml_mode_support(mode_lib, state_idx, display_cfg)) {
10173 dml_print("DML::%s: Mode is supported at power state_idx = %u\n", __func__, state_idx);
10174 mode_is_supported = 1;
10175 *lowest_state_idx = state_idx;
10176 break;
10177 }
10178 }
10179
10180 return mode_is_supported;
10181 }
10182
dml_mode_support_ex(struct dml_mode_support_ex_params_st * in_out_params)10183 dml_uint_t dml_mode_support_ex(struct dml_mode_support_ex_params_st *in_out_params)
10184 {
10185 dml_uint_t result;
10186
10187 result = mode_support_pwr_states(&in_out_params->out_lowest_state_idx,
10188 in_out_params->mode_lib,
10189 in_out_params->in_display_cfg,
10190 0,
10191 in_out_params->mode_lib->states.num_states - 1);
10192
10193 if (result)
10194 *in_out_params->out_evaluation_info = in_out_params->mode_lib->ms.support;
10195
10196 return result;
10197 }
10198
dml_get_is_phantom_pipe(struct display_mode_lib_st * mode_lib,dml_uint_t pipe_idx)10199 dml_bool_t dml_get_is_phantom_pipe(struct display_mode_lib_st *mode_lib, dml_uint_t pipe_idx)
10200 {
10201 dml_uint_t plane_idx = mode_lib->mp.pipe_plane[pipe_idx];
10202 dml_print("DML::%s: pipe_idx=%d UseMALLForPStateChange=%0d\n", __func__, pipe_idx, mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange[plane_idx]);
10203 return (mode_lib->ms.cache_display_cfg.plane.UseMALLForPStateChange[plane_idx] == dml_use_mall_pstate_change_phantom_pipe);
10204 }
10205
10206 #define dml_get_per_surface_var_func(variable, type, interval_var) type dml_get_##variable(struct display_mode_lib_st *mode_lib, dml_uint_t surface_idx) \
10207 { \
10208 dml_uint_t plane_idx; \
10209 plane_idx = mode_lib->mp.pipe_plane[surface_idx]; \
10210 return (type) interval_var[plane_idx]; \
10211 }
10212
10213 #define dml_get_var_func(var, type, internal_var) type dml_get_##var(struct display_mode_lib_st *mode_lib) \
10214 { \
10215 return (type) internal_var; \
10216 }
10217
10218 dml_get_var_func(wm_urgent, dml_float_t, mode_lib->mp.Watermark.UrgentWatermark);
10219 dml_get_var_func(wm_stutter_exit, dml_float_t, mode_lib->mp.Watermark.StutterExitWatermark);
10220 dml_get_var_func(wm_stutter_enter_exit, dml_float_t, mode_lib->mp.Watermark.StutterEnterPlusExitWatermark);
10221 dml_get_var_func(wm_memory_trip, dml_float_t, mode_lib->mp.UrgentLatency);
10222 dml_get_var_func(wm_fclk_change, dml_float_t, mode_lib->mp.Watermark.FCLKChangeWatermark);
10223 dml_get_var_func(wm_usr_retraining, dml_float_t, mode_lib->mp.Watermark.USRRetrainingWatermark);
10224 dml_get_var_func(wm_dram_clock_change, dml_float_t, mode_lib->mp.Watermark.DRAMClockChangeWatermark);
10225 dml_get_var_func(wm_z8_stutter_enter_exit, dml_float_t, mode_lib->mp.Watermark.Z8StutterEnterPlusExitWatermark);
10226 dml_get_var_func(wm_z8_stutter, dml_float_t, mode_lib->mp.Watermark.Z8StutterExitWatermark);
10227 dml_get_var_func(fraction_of_urgent_bandwidth, dml_float_t, mode_lib->mp.FractionOfUrgentBandwidth);
10228 dml_get_var_func(fraction_of_urgent_bandwidth_imm_flip, dml_float_t, mode_lib->mp.FractionOfUrgentBandwidthImmediateFlip);
10229 dml_get_var_func(urgent_latency, dml_float_t, mode_lib->mp.UrgentLatency);
10230 dml_get_var_func(clk_dcf_deepsleep, dml_float_t, mode_lib->mp.DCFCLKDeepSleep);
10231 dml_get_var_func(wm_writeback_dram_clock_change, dml_float_t, mode_lib->mp.Watermark.WritebackDRAMClockChangeWatermark);
10232 dml_get_var_func(wm_writeback_urgent, dml_float_t, mode_lib->mp.Watermark.WritebackUrgentWatermark);
10233 dml_get_var_func(stutter_efficiency, dml_float_t, mode_lib->mp.StutterEfficiency);
10234 dml_get_var_func(stutter_efficiency_no_vblank, dml_float_t, mode_lib->mp.StutterEfficiencyNotIncludingVBlank);
10235 dml_get_var_func(stutter_efficiency_z8, dml_float_t, mode_lib->mp.Z8StutterEfficiency);
10236 dml_get_var_func(stutter_num_bursts_z8, dml_float_t, mode_lib->mp.Z8NumberOfStutterBurstsPerFrame);
10237 dml_get_var_func(stutter_period, dml_float_t, mode_lib->mp.StutterPeriod);
10238 dml_get_var_func(stutter_efficiency_z8_bestcase, dml_float_t, mode_lib->mp.Z8StutterEfficiencyBestCase);
10239 dml_get_var_func(stutter_num_bursts_z8_bestcase, dml_float_t, mode_lib->mp.Z8NumberOfStutterBurstsPerFrameBestCase);
10240 dml_get_var_func(stutter_period_bestcase, dml_float_t, mode_lib->mp.StutterPeriodBestCase);
10241 dml_get_var_func(urgent_extra_latency, dml_float_t, mode_lib->mp.UrgentExtraLatency);
10242 dml_get_var_func(fclk_change_latency, dml_float_t, mode_lib->mp.MaxActiveFCLKChangeLatencySupported);
10243 dml_get_var_func(dispclk_calculated, dml_float_t, mode_lib->mp.Dispclk_calculated);
10244 dml_get_var_func(total_data_read_bw, dml_float_t, mode_lib->mp.TotalDataReadBandwidth);
10245 dml_get_var_func(return_bw, dml_float_t, mode_lib->ms.ReturnBW);
10246 dml_get_var_func(return_dram_bw, dml_float_t, mode_lib->ms.ReturnDRAMBW);
10247 dml_get_var_func(tcalc, dml_float_t, mode_lib->mp.TCalc);
10248 dml_get_var_func(comp_buffer_size_kbytes, dml_uint_t, mode_lib->mp.CompressedBufferSizeInkByte);
10249 dml_get_var_func(pixel_chunk_size_in_kbyte, dml_uint_t, mode_lib->ms.ip.pixel_chunk_size_kbytes);
10250 dml_get_var_func(alpha_pixel_chunk_size_in_kbyte, dml_uint_t, mode_lib->ms.ip.alpha_pixel_chunk_size_kbytes);
10251 dml_get_var_func(meta_chunk_size_in_kbyte, dml_uint_t, mode_lib->ms.ip.meta_chunk_size_kbytes);
10252 dml_get_var_func(min_pixel_chunk_size_in_byte, dml_uint_t, mode_lib->ms.ip.min_pixel_chunk_size_bytes);
10253 dml_get_var_func(min_meta_chunk_size_in_byte, dml_uint_t, mode_lib->ms.ip.min_meta_chunk_size_bytes);
10254 dml_get_var_func(total_immediate_flip_bytes, dml_uint_t, mode_lib->mp.TotImmediateFlipBytes);
10255
10256 dml_get_per_surface_var_func(dsc_delay, dml_uint_t, mode_lib->mp.DSCDelay); // this is the dsc latency
10257 dml_get_per_surface_var_func(dppclk_calculated, dml_float_t, mode_lib->mp.Dppclk_calculated);
10258 dml_get_per_surface_var_func(dscclk_calculated, dml_float_t, mode_lib->mp.DSCCLK_calculated);
10259 dml_get_per_surface_var_func(min_ttu_vblank_in_us, dml_float_t, mode_lib->mp.MinTTUVBlank);
10260 dml_get_per_surface_var_func(vratio_prefetch_l, dml_float_t, mode_lib->mp.VRatioPrefetchY);
10261 dml_get_per_surface_var_func(vratio_prefetch_c, dml_float_t, mode_lib->mp.VRatioPrefetchC);
10262 dml_get_per_surface_var_func(dst_x_after_scaler, dml_uint_t, mode_lib->mp.DSTXAfterScaler);
10263 dml_get_per_surface_var_func(dst_y_after_scaler, dml_uint_t, mode_lib->mp.DSTYAfterScaler);
10264 dml_get_per_surface_var_func(dst_y_per_vm_vblank, dml_float_t, mode_lib->mp.DestinationLinesToRequestVMInVBlank);
10265 dml_get_per_surface_var_func(dst_y_per_row_vblank, dml_float_t, mode_lib->mp.DestinationLinesToRequestRowInVBlank);
10266 dml_get_per_surface_var_func(dst_y_prefetch, dml_float_t, mode_lib->mp.DestinationLinesForPrefetch);
10267 dml_get_per_surface_var_func(dst_y_per_vm_flip, dml_float_t, mode_lib->mp.DestinationLinesToRequestVMInImmediateFlip);
10268 dml_get_per_surface_var_func(dst_y_per_row_flip, dml_float_t, mode_lib->mp.DestinationLinesToRequestRowInImmediateFlip);
10269 dml_get_per_surface_var_func(dst_y_per_pte_row_nom_l, dml_float_t, mode_lib->mp.DST_Y_PER_PTE_ROW_NOM_L);
10270 dml_get_per_surface_var_func(dst_y_per_pte_row_nom_c, dml_float_t, mode_lib->mp.DST_Y_PER_PTE_ROW_NOM_C);
10271 dml_get_per_surface_var_func(dst_y_per_meta_row_nom_l, dml_float_t, mode_lib->mp.DST_Y_PER_META_ROW_NOM_L);
10272 dml_get_per_surface_var_func(dst_y_per_meta_row_nom_c, dml_float_t, mode_lib->mp.DST_Y_PER_META_ROW_NOM_C);
10273 dml_get_per_surface_var_func(refcyc_per_vm_group_vblank_in_us, dml_float_t, mode_lib->mp.TimePerVMGroupVBlank);
10274 dml_get_per_surface_var_func(refcyc_per_vm_group_flip_in_us, dml_float_t, mode_lib->mp.TimePerVMGroupFlip);
10275 dml_get_per_surface_var_func(refcyc_per_vm_req_vblank_in_us, dml_float_t, mode_lib->mp.TimePerVMRequestVBlank);
10276 dml_get_per_surface_var_func(refcyc_per_vm_req_flip_in_us, dml_float_t, mode_lib->mp.TimePerVMRequestFlip);
10277 dml_get_per_surface_var_func(refcyc_per_vm_dmdata_in_us, dml_float_t, mode_lib->mp.Tdmdl_vm);
10278 dml_get_per_surface_var_func(dmdata_dl_delta_in_us, dml_float_t, mode_lib->mp.Tdmdl);
10279 dml_get_per_surface_var_func(refcyc_per_line_delivery_l_in_us, dml_float_t, mode_lib->mp.DisplayPipeLineDeliveryTimeLuma);
10280 dml_get_per_surface_var_func(refcyc_per_line_delivery_c_in_us, dml_float_t, mode_lib->mp.DisplayPipeLineDeliveryTimeChroma);
10281 dml_get_per_surface_var_func(refcyc_per_line_delivery_pre_l_in_us, dml_float_t, mode_lib->mp.DisplayPipeLineDeliveryTimeLumaPrefetch);
10282 dml_get_per_surface_var_func(refcyc_per_line_delivery_pre_c_in_us, dml_float_t, mode_lib->mp.DisplayPipeLineDeliveryTimeChromaPrefetch);
10283 dml_get_per_surface_var_func(refcyc_per_req_delivery_l_in_us, dml_float_t, mode_lib->mp.DisplayPipeRequestDeliveryTimeLuma);
10284 dml_get_per_surface_var_func(refcyc_per_req_delivery_c_in_us, dml_float_t, mode_lib->mp.DisplayPipeRequestDeliveryTimeChroma);
10285 dml_get_per_surface_var_func(refcyc_per_req_delivery_pre_l_in_us, dml_float_t, mode_lib->mp.DisplayPipeRequestDeliveryTimeLumaPrefetch);
10286 dml_get_per_surface_var_func(refcyc_per_req_delivery_pre_c_in_us, dml_float_t, mode_lib->mp.DisplayPipeRequestDeliveryTimeChromaPrefetch);
10287 dml_get_per_surface_var_func(refcyc_per_cursor_req_delivery_in_us, dml_float_t, mode_lib->mp.CursorRequestDeliveryTime);
10288 dml_get_per_surface_var_func(refcyc_per_cursor_req_delivery_pre_in_us, dml_float_t, mode_lib->mp.CursorRequestDeliveryTimePrefetch);
10289 dml_get_per_surface_var_func(refcyc_per_meta_chunk_nom_l_in_us, dml_float_t, mode_lib->mp.TimePerMetaChunkNominal);
10290 dml_get_per_surface_var_func(refcyc_per_meta_chunk_nom_c_in_us, dml_float_t, mode_lib->mp.TimePerChromaMetaChunkNominal);
10291 dml_get_per_surface_var_func(refcyc_per_meta_chunk_vblank_l_in_us, dml_float_t, mode_lib->mp.TimePerMetaChunkVBlank);
10292 dml_get_per_surface_var_func(refcyc_per_meta_chunk_vblank_c_in_us, dml_float_t, mode_lib->mp.TimePerChromaMetaChunkVBlank);
10293 dml_get_per_surface_var_func(refcyc_per_meta_chunk_flip_l_in_us, dml_float_t, mode_lib->mp.TimePerMetaChunkFlip);
10294 dml_get_per_surface_var_func(refcyc_per_meta_chunk_flip_c_in_us, dml_float_t, mode_lib->mp.TimePerChromaMetaChunkFlip);
10295 dml_get_per_surface_var_func(refcyc_per_pte_group_nom_l_in_us, dml_float_t, mode_lib->mp.time_per_pte_group_nom_luma);
10296 dml_get_per_surface_var_func(refcyc_per_pte_group_nom_c_in_us, dml_float_t, mode_lib->mp.time_per_pte_group_nom_chroma);
10297 dml_get_per_surface_var_func(refcyc_per_pte_group_vblank_l_in_us, dml_float_t, mode_lib->mp.time_per_pte_group_vblank_luma);
10298 dml_get_per_surface_var_func(refcyc_per_pte_group_vblank_c_in_us, dml_float_t, mode_lib->mp.time_per_pte_group_vblank_chroma);
10299 dml_get_per_surface_var_func(refcyc_per_pte_group_flip_l_in_us, dml_float_t, mode_lib->mp.time_per_pte_group_flip_luma);
10300 dml_get_per_surface_var_func(refcyc_per_pte_group_flip_c_in_us, dml_float_t, mode_lib->mp.time_per_pte_group_flip_chroma);
10301 dml_get_per_surface_var_func(dpte_group_size_in_bytes, dml_uint_t, mode_lib->mp.dpte_group_bytes);
10302 dml_get_per_surface_var_func(vm_group_size_in_bytes, dml_uint_t, mode_lib->mp.vm_group_bytes);
10303 dml_get_per_surface_var_func(swath_height_l, dml_uint_t, mode_lib->ms.SwathHeightY);
10304 dml_get_per_surface_var_func(swath_height_c, dml_uint_t, mode_lib->ms.SwathHeightC);
10305 dml_get_per_surface_var_func(dpte_row_height_l, dml_uint_t, mode_lib->mp.dpte_row_height);
10306 dml_get_per_surface_var_func(dpte_row_height_c, dml_uint_t, mode_lib->mp.dpte_row_height_chroma);
10307 dml_get_per_surface_var_func(dpte_row_height_linear_l, dml_uint_t, mode_lib->mp.dpte_row_height_linear);
10308 dml_get_per_surface_var_func(dpte_row_height_linear_c, dml_uint_t, mode_lib->mp.dpte_row_height_linear_chroma);
10309 dml_get_per_surface_var_func(meta_row_height_l, dml_uint_t, mode_lib->mp.meta_row_height);
10310 dml_get_per_surface_var_func(meta_row_height_c, dml_uint_t, mode_lib->mp.meta_row_height_chroma);
10311
10312 dml_get_per_surface_var_func(vstartup_calculated, dml_uint_t, mode_lib->mp.VStartup);
10313 dml_get_per_surface_var_func(vupdate_offset, dml_uint_t, mode_lib->mp.VUpdateOffsetPix);
10314 dml_get_per_surface_var_func(vupdate_width, dml_uint_t, mode_lib->mp.VUpdateWidthPix);
10315 dml_get_per_surface_var_func(vready_offset, dml_uint_t, mode_lib->mp.VReadyOffsetPix);
10316 dml_get_per_surface_var_func(vready_at_or_after_vsync, dml_uint_t, mode_lib->mp.VREADY_AT_OR_AFTER_VSYNC);
10317 dml_get_per_surface_var_func(min_dst_y_next_start, dml_uint_t, mode_lib->mp.MIN_DST_Y_NEXT_START);
10318 dml_get_per_surface_var_func(det_stored_buffer_size_l_bytes, dml_uint_t, mode_lib->ms.DETBufferSizeY);
10319 dml_get_per_surface_var_func(det_stored_buffer_size_c_bytes, dml_uint_t, mode_lib->ms.DETBufferSizeC);
10320 dml_get_per_surface_var_func(use_mall_for_static_screen, dml_uint_t, mode_lib->mp.UsesMALLForStaticScreen);
10321 dml_get_per_surface_var_func(surface_size_for_mall, dml_uint_t, mode_lib->mp.SurfaceSizeInTheMALL);
10322 dml_get_per_surface_var_func(dcc_max_uncompressed_block_l, dml_uint_t, mode_lib->mp.DCCYMaxUncompressedBlock);
10323 dml_get_per_surface_var_func(dcc_max_compressed_block_l, dml_uint_t, mode_lib->mp.DCCYMaxCompressedBlock);
10324 dml_get_per_surface_var_func(dcc_independent_block_l, dml_uint_t, mode_lib->mp.DCCYIndependentBlock);
10325 dml_get_per_surface_var_func(dcc_max_uncompressed_block_c, dml_uint_t, mode_lib->mp.DCCCMaxUncompressedBlock);
10326 dml_get_per_surface_var_func(dcc_max_compressed_block_c, dml_uint_t, mode_lib->mp.DCCCMaxCompressedBlock);
10327 dml_get_per_surface_var_func(dcc_independent_block_c, dml_uint_t, mode_lib->mp.DCCCIndependentBlock);
10328 dml_get_per_surface_var_func(max_active_dram_clock_change_latency_supported, dml_uint_t, mode_lib->mp.MaxActiveDRAMClockChangeLatencySupported);
10329 dml_get_per_surface_var_func(pte_buffer_mode, dml_uint_t, mode_lib->mp.PTE_BUFFER_MODE);
10330 dml_get_per_surface_var_func(bigk_fragment_size, dml_uint_t, mode_lib->mp.BIGK_FRAGMENT_SIZE);
10331 dml_get_per_surface_var_func(dpte_bytes_per_row, dml_uint_t, mode_lib->mp.PixelPTEBytesPerRow);
10332 dml_get_per_surface_var_func(meta_bytes_per_row, dml_uint_t, mode_lib->mp.MetaRowByte);
10333 dml_get_per_surface_var_func(det_buffer_size_kbytes, dml_uint_t, mode_lib->ms.DETBufferSizeInKByte);
10334