1/*************************************************************************************** 2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences 3* Copyright (c) 2020-2021 Peng Cheng Laboratory 4* 5* XiangShan is licensed under Mulan PSL v2. 6* You can use this software according to the terms and conditions of the Mulan PSL v2. 7* You may obtain a copy of Mulan PSL v2 at: 8* http://license.coscl.org.cn/MulanPSL2 9* 10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 13* 14* See the Mulan PSL v2 for more details. 15***************************************************************************************/ 16 17package xiangshan.mem 18 19import org.chipsalliance.cde.config.Parameters 20import chisel3._ 21import chisel3.util._ 22import freechips.rocketchip.diplomacy._ 23import freechips.rocketchip.diplomacy.{BundleBridgeSource, LazyModule, LazyModuleImp} 24import freechips.rocketchip.interrupts.{IntSinkNode, IntSinkPortSimple} 25import freechips.rocketchip.tile.HasFPUParameters 26import freechips.rocketchip.tilelink._ 27import device.MsiInfoBundle 28import utils._ 29import utility._ 30import system.SoCParamsKey 31import xiangshan._ 32import xiangshan.ExceptionNO._ 33import xiangshan.frontend.HasInstrMMIOConst 34import xiangshan.backend.Bundles.{DynInst, MemExuInput, MemExuOutput} 35import xiangshan.backend.ctrlblock.{DebugLSIO, LsTopdownInfo} 36import xiangshan.backend.exu.MemExeUnit 37import xiangshan.backend.fu._ 38import xiangshan.backend.fu.FuType._ 39import xiangshan.backend.fu.NewCSR.{CsrTriggerBundle, TriggerUtil} 40import xiangshan.backend.fu.util.{HasCSRConst, SdtrigExt} 41import xiangshan.backend.{BackendToTopBundle, TopToBackendBundle} 42import xiangshan.backend.rob.{RobDebugRollingIO, RobPtr, RobLsqIO} 43import xiangshan.backend.datapath.NewPipelineConnect 44import xiangshan.backend.trace.{Itype, TraceCoreInterface} 45import xiangshan.backend.Bundles._ 46import xiangshan.mem._ 47import xiangshan.mem.mdp._ 48import xiangshan.mem.Bundles._ 49import xiangshan.mem.prefetch.{BasePrefecher, L1Prefetcher, SMSParams, SMSPrefetcher} 50import xiangshan.cache._ 51import xiangshan.cache.mmu._ 52import coupledL2.{PrefetchRecv} 53 54trait HasMemBlockParameters extends HasXSParameter { 55 // number of memory units 56 val LduCnt = backendParams.LduCnt 57 val StaCnt = backendParams.StaCnt 58 val StdCnt = backendParams.StdCnt 59 val HyuCnt = backendParams.HyuCnt 60 val VlduCnt = backendParams.VlduCnt 61 val VstuCnt = backendParams.VstuCnt 62 63 val LdExuCnt = LduCnt + HyuCnt 64 val StAddrCnt = StaCnt + HyuCnt 65 val StDataCnt = StdCnt 66 val MemExuCnt = LduCnt + HyuCnt + StaCnt + StdCnt 67 val MemAddrExtCnt = LdExuCnt + StaCnt 68 val MemVExuCnt = VlduCnt + VstuCnt 69 70 val AtomicWBPort = 0 71 val MisalignWBPort = 1 72 val UncacheWBPort = 2 73 val NCWBPorts = Seq(1, 2) 74} 75 76abstract class MemBlockBundle(implicit val p: Parameters) extends Bundle with HasMemBlockParameters 77 78class Std(cfg: FuConfig)(implicit p: Parameters) extends FuncUnit(cfg) { 79 io.in.ready := io.out.ready 80 io.out.valid := io.in.valid 81 io.out.bits := 0.U.asTypeOf(io.out.bits) 82 io.out.bits.res.data := io.in.bits.data.src(0) 83 io.out.bits.ctrl.robIdx := io.in.bits.ctrl.robIdx 84} 85 86class ooo_to_mem(implicit p: Parameters) extends MemBlockBundle { 87 val backendToTopBypass = Flipped(new BackendToTopBundle) 88 89 val loadFastMatch = Vec(LdExuCnt, Input(UInt(LdExuCnt.W))) 90 val loadFastFuOpType = Vec(LdExuCnt, Input(FuOpType())) 91 val loadFastImm = Vec(LdExuCnt, Input(UInt(12.W))) 92 val sfence = Input(new SfenceBundle) 93 val tlbCsr = Input(new TlbCsrBundle) 94 val lsqio = new Bundle { 95 val lcommit = Input(UInt(log2Up(CommitWidth + 1).W)) 96 val scommit = Input(UInt(log2Up(CommitWidth + 1).W)) 97 val pendingMMIOld = Input(Bool()) 98 val pendingld = Input(Bool()) 99 val pendingst = Input(Bool()) 100 val pendingVst = Input(Bool()) 101 val commit = Input(Bool()) 102 val pendingPtr = Input(new RobPtr) 103 val pendingPtrNext = Input(new RobPtr) 104 } 105 106 val isStoreException = Input(Bool()) 107 val isVlsException = Input(Bool()) 108 val csrCtrl = Flipped(new CustomCSRCtrlIO) 109 val enqLsq = new LsqEnqIO 110 val flushSb = Input(Bool()) 111 112 val storePc = Vec(StaCnt, Input(UInt(VAddrBits.W))) // for hw prefetch 113 val hybridPc = Vec(HyuCnt, Input(UInt(VAddrBits.W))) // for hw prefetch 114 115 val issueLda = MixedVec(Seq.fill(LduCnt)(Flipped(DecoupledIO(new MemExuInput)))) 116 val issueSta = MixedVec(Seq.fill(StaCnt)(Flipped(DecoupledIO(new MemExuInput)))) 117 val issueStd = MixedVec(Seq.fill(StdCnt)(Flipped(DecoupledIO(new MemExuInput)))) 118 val issueHya = MixedVec(Seq.fill(HyuCnt)(Flipped(DecoupledIO(new MemExuInput)))) 119 val issueVldu = MixedVec(Seq.fill(VlduCnt)(Flipped(DecoupledIO(new MemExuInput(isVector=true))))) 120 121 def issueUops = issueLda ++ issueSta ++ issueStd ++ issueHya ++ issueVldu 122} 123 124class mem_to_ooo(implicit p: Parameters) extends MemBlockBundle { 125 val topToBackendBypass = new TopToBackendBundle 126 127 val otherFastWakeup = Vec(LdExuCnt, ValidIO(new DynInst)) 128 val lqCancelCnt = Output(UInt(log2Up(VirtualLoadQueueSize + 1).W)) 129 val sqCancelCnt = Output(UInt(log2Up(StoreQueueSize + 1).W)) 130 val sqDeq = Output(UInt(log2Ceil(EnsbufferWidth + 1).W)) 131 val lqDeq = Output(UInt(log2Up(CommitWidth + 1).W)) 132 // used by VLSU issue queue, the vector store would wait all store before it, and the vector load would wait all load 133 val sqDeqPtr = Output(new SqPtr) 134 val lqDeqPtr = Output(new LqPtr) 135 val stIn = Vec(StAddrCnt, ValidIO(new MemExuInput)) 136 val stIssuePtr = Output(new SqPtr()) 137 138 val memoryViolation = ValidIO(new Redirect) 139 val sbIsEmpty = Output(Bool()) 140 141 val lsTopdownInfo = Vec(LdExuCnt, Output(new LsTopdownInfo)) 142 143 val lsqio = new Bundle { 144 val vaddr = Output(UInt(XLEN.W)) 145 val vstart = Output(UInt((log2Up(VLEN) + 1).W)) 146 val vl = Output(UInt((log2Up(VLEN) + 1).W)) 147 val gpaddr = Output(UInt(XLEN.W)) 148 val isForVSnonLeafPTE = Output(Bool()) 149 val mmio = Output(Vec(LoadPipelineWidth, Bool())) 150 val uop = Output(Vec(LoadPipelineWidth, new DynInst)) 151 val lqCanAccept = Output(Bool()) 152 val sqCanAccept = Output(Bool()) 153 } 154 155 val storeDebugInfo = Vec(EnsbufferWidth, new Bundle { 156 val robidx = Output(new RobPtr) 157 val pc = Input(UInt(VAddrBits.W)) 158 }) 159 160 val writebackLda = Vec(LduCnt, DecoupledIO(new MemExuOutput)) 161 val writebackSta = Vec(StaCnt, DecoupledIO(new MemExuOutput)) 162 val writebackStd = Vec(StdCnt, DecoupledIO(new MemExuOutput)) 163 val writebackHyuLda = Vec(HyuCnt, DecoupledIO(new MemExuOutput)) 164 val writebackHyuSta = Vec(HyuCnt, DecoupledIO(new MemExuOutput)) 165 val writebackVldu = Vec(VlduCnt, DecoupledIO(new MemExuOutput(isVector = true))) 166 def writeBack: Seq[DecoupledIO[MemExuOutput]] = { 167 writebackSta ++ 168 writebackHyuLda ++ writebackHyuSta ++ 169 writebackLda ++ 170 writebackVldu ++ 171 writebackStd 172 } 173 174 val ldaIqFeedback = Vec(LduCnt, new MemRSFeedbackIO) 175 val staIqFeedback = Vec(StaCnt, new MemRSFeedbackIO) 176 val hyuIqFeedback = Vec(HyuCnt, new MemRSFeedbackIO) 177 val vstuIqFeedback= Vec(VstuCnt, new MemRSFeedbackIO(isVector = true)) 178 val vlduIqFeedback= Vec(VlduCnt, new MemRSFeedbackIO(isVector = true)) 179 val ldCancel = Vec(backendParams.LdExuCnt, new LoadCancelIO) 180 val wakeup = Vec(backendParams.LdExuCnt, Valid(new DynInst)) 181 182 val s3_delayed_load_error = Vec(LdExuCnt, Output(Bool())) 183} 184 185class MemCoreTopDownIO extends Bundle { 186 val robHeadMissInDCache = Output(Bool()) 187 val robHeadTlbReplay = Output(Bool()) 188 val robHeadTlbMiss = Output(Bool()) 189 val robHeadLoadVio = Output(Bool()) 190 val robHeadLoadMSHR = Output(Bool()) 191} 192 193class fetch_to_mem(implicit p: Parameters) extends XSBundle{ 194 val itlb = Flipped(new TlbPtwIO()) 195} 196 197// triple buffer applied in i-mmio path (two at MemBlock, one at L2Top) 198class InstrUncacheBuffer()(implicit p: Parameters) extends LazyModule with HasInstrMMIOConst { 199 val node = new TLBufferNode(BufferParams.default, BufferParams.default, BufferParams.default, BufferParams.default, BufferParams.default) 200 lazy val module = new InstrUncacheBufferImpl 201 202 class InstrUncacheBufferImpl extends LazyModuleImp(this) { 203 (node.in zip node.out) foreach { case ((in, edgeIn), (out, edgeOut)) => 204 out.a <> BufferParams.default(BufferParams.default(in.a)) 205 in.d <> BufferParams.default(BufferParams.default(out.d)) 206 207 // only a.valid, a.ready, a.address can change 208 // hoping that the rest would be optimized to keep MemBlock port unchanged after adding buffer 209 out.a.bits.data := 0.U 210 out.a.bits.mask := Fill(mmioBusBytes, 1.U(1.W)) 211 out.a.bits.opcode := 4.U // Get 212 out.a.bits.size := log2Ceil(mmioBusBytes).U 213 out.a.bits.source := 0.U 214 } 215 } 216} 217 218// triple buffer applied in L1I$-L2 path (two at MemBlock, one at L2Top) 219class ICacheBuffer()(implicit p: Parameters) extends LazyModule { 220 val node = new TLBufferNode(BufferParams.default, BufferParams.default, BufferParams.default, BufferParams.default, BufferParams.default) 221 lazy val module = new ICacheBufferImpl 222 223 class ICacheBufferImpl extends LazyModuleImp(this) { 224 (node.in zip node.out) foreach { case ((in, edgeIn), (out, edgeOut)) => 225 out.a <> BufferParams.default(BufferParams.default(in.a)) 226 in.d <> BufferParams.default(BufferParams.default(out.d)) 227 } 228 } 229} 230 231class ICacheCtrlBuffer()(implicit p: Parameters) extends LazyModule { 232 val node = new TLBufferNode(BufferParams.default, BufferParams.default, BufferParams.default, BufferParams.default, BufferParams.default) 233 lazy val module = new ICacheCtrlBufferImpl 234 235 class ICacheCtrlBufferImpl extends LazyModuleImp(this) { 236 (node.in zip node.out) foreach { case ((in, edgeIn), (out, edgeOut)) => 237 out.a <> BufferParams.default(BufferParams.default(in.a)) 238 in.d <> BufferParams.default(BufferParams.default(out.d)) 239 } 240 } 241} 242 243// Frontend bus goes through MemBlock 244class FrontendBridge()(implicit p: Parameters) extends LazyModule { 245 val icache_node = LazyModule(new ICacheBuffer()).suggestName("icache").node// to keep IO port name 246 val icachectrl_node = LazyModule(new ICacheCtrlBuffer()).suggestName("icachectrl").node 247 val instr_uncache_node = LazyModule(new InstrUncacheBuffer()).suggestName("instr_uncache").node 248 lazy val module = new LazyModuleImp(this) { 249 } 250} 251 252class MemBlockInlined()(implicit p: Parameters) extends LazyModule 253 with HasXSParameter { 254 override def shouldBeInlined: Boolean = true 255 256 val dcache = LazyModule(new DCacheWrapper()) 257 val uncache = LazyModule(new Uncache()) 258 val uncache_port = TLTempNode() 259 val uncache_xbar = TLXbar() 260 val ptw = LazyModule(new L2TLBWrapper()) 261 val ptw_to_l2_buffer = if (!coreParams.softPTW) LazyModule(new TLBuffer) else null 262 val l1d_to_l2_buffer = if (coreParams.dcacheParametersOpt.nonEmpty) LazyModule(new TLBuffer) else null 263 val dcache_port = TLNameNode("dcache_client") // to keep dcache-L2 port name 264 val l2_pf_sender_opt = coreParams.prefetcher.map(_ => 265 BundleBridgeSource(() => new PrefetchRecv) 266 ) 267 val l3_pf_sender_opt = if (p(SoCParamsKey).L3CacheParamsOpt.nonEmpty) coreParams.prefetcher.map(_ => 268 BundleBridgeSource(() => new huancun.PrefetchRecv) 269 ) else None 270 val frontendBridge = LazyModule(new FrontendBridge) 271 // interrupt sinks 272 val clint_int_sink = IntSinkNode(IntSinkPortSimple(1, 2)) 273 val debug_int_sink = IntSinkNode(IntSinkPortSimple(1, 1)) 274 val plic_int_sink = IntSinkNode(IntSinkPortSimple(2, 1)) 275 val nmi_int_sink = IntSinkNode(IntSinkPortSimple(1, (new NonmaskableInterruptIO).elements.size)) 276 277 if (!coreParams.softPTW) { 278 ptw_to_l2_buffer.node := ptw.node 279 } 280 uncache_xbar := TLBuffer() := uncache.clientNode 281 if (dcache.uncacheNode.isDefined) { 282 dcache.uncacheNode.get := TLBuffer.chainNode(2) := uncache_xbar 283 } 284 uncache_port := TLBuffer.chainNode(2) := uncache_xbar 285 286 lazy val module = new MemBlockInlinedImp(this) 287} 288 289class MemBlockInlinedImp(outer: MemBlockInlined) extends LazyModuleImp(outer) 290 with HasXSParameter 291 with HasFPUParameters 292 with HasPerfEvents 293 with HasL1PrefetchSourceParameter 294 with HasCircularQueuePtrHelper 295 with HasMemBlockParameters 296 with HasTlbConst 297 with HasCSRConst 298 with SdtrigExt 299{ 300 val io = IO(new Bundle { 301 val hartId = Input(UInt(hartIdLen.W)) 302 val redirect = Flipped(ValidIO(new Redirect)) 303 304 val ooo_to_mem = new ooo_to_mem 305 val mem_to_ooo = new mem_to_ooo 306 val fetch_to_mem = new fetch_to_mem 307 308 val ifetchPrefetch = Vec(LduCnt, ValidIO(new SoftIfetchPrefetchBundle)) 309 310 // misc 311 val error = ValidIO(new L1CacheErrorInfo) 312 val memInfo = new Bundle { 313 val sqFull = Output(Bool()) 314 val lqFull = Output(Bool()) 315 val dcacheMSHRFull = Output(Bool()) 316 } 317 val debug_ls = new DebugLSIO 318 val l2_hint = Input(Valid(new L2ToL1Hint())) 319 val l2PfqBusy = Input(Bool()) 320 val l2_tlb_req = Flipped(new TlbRequestIO(nRespDups = 2)) 321 val l2_pmp_resp = new PMPRespBundle 322 val l2_flush_done = Input(Bool()) 323 324 val debugTopDown = new Bundle { 325 val robHeadVaddr = Flipped(Valid(UInt(VAddrBits.W))) 326 val toCore = new MemCoreTopDownIO 327 } 328 val debugRolling = Flipped(new RobDebugRollingIO) 329 330 // All the signals from/to frontend/backend to/from bus will go through MemBlock 331 val fromTopToBackend = Input(new Bundle { 332 val msiInfo = ValidIO(new MsiInfoBundle) 333 val clintTime = ValidIO(UInt(64.W)) 334 }) 335 val inner_hartId = Output(UInt(hartIdLen.W)) 336 val inner_reset_vector = Output(UInt(PAddrBits.W)) 337 val outer_reset_vector = Input(UInt(PAddrBits.W)) 338 val outer_cpu_halt = Output(Bool()) 339 val outer_l2_flush_en = Output(Bool()) 340 val outer_power_down_en = Output(Bool()) 341 val outer_cpu_critical_error = Output(Bool()) 342 val inner_beu_errors_icache = Input(new L1BusErrorUnitInfo) 343 val outer_beu_errors_icache = Output(new L1BusErrorUnitInfo) 344 val inner_hc_perfEvents = Output(Vec(numPCntHc * coreParams.L2NBanks + 1, new PerfEvent)) 345 val outer_hc_perfEvents = Input(Vec(numPCntHc * coreParams.L2NBanks + 1, new PerfEvent)) 346 347 // reset signals of frontend & backend are generated in memblock 348 val reset_backend = Output(Reset()) 349 // Reset singal from frontend. 350 val resetInFrontendBypass = new Bundle{ 351 val fromFrontend = Input(Bool()) 352 val toL2Top = Output(Bool()) 353 } 354 val traceCoreInterfaceBypass = new Bundle{ 355 val fromBackend = Flipped(new TraceCoreInterface(hasOffset = true)) 356 val toL2Top = new TraceCoreInterface 357 } 358 359 val topDownInfo = new Bundle { 360 val fromL2Top = Input(new TopDownFromL2Top) 361 val toBackend = Flipped(new TopDownInfo) 362 } 363 }) 364 365 dontTouch(io.inner_hartId) 366 dontTouch(io.inner_reset_vector) 367 dontTouch(io.outer_reset_vector) 368 dontTouch(io.outer_cpu_halt) 369 dontTouch(io.outer_l2_flush_en) 370 dontTouch(io.outer_power_down_en) 371 dontTouch(io.outer_cpu_critical_error) 372 dontTouch(io.inner_beu_errors_icache) 373 dontTouch(io.outer_beu_errors_icache) 374 dontTouch(io.inner_hc_perfEvents) 375 dontTouch(io.outer_hc_perfEvents) 376 377 val redirect = RegNextWithEnable(io.redirect) 378 379 private val dcache = outer.dcache.module 380 val uncache = outer.uncache.module 381 382 //val delayedDcacheRefill = RegNext(dcache.io.lsu.lsq) 383 384 val csrCtrl = DelayN(io.ooo_to_mem.csrCtrl, 2) 385 dcache.io.l2_pf_store_only := RegNext(io.ooo_to_mem.csrCtrl.pf_ctrl.l2_pf_store_only, false.B) 386 io.error <> DelayNWithValid(dcache.io.error, 2) 387 when(!csrCtrl.cache_error_enable){ 388 io.error.bits.report_to_beu := false.B 389 io.error.valid := false.B 390 } 391 392 val loadUnits = Seq.fill(LduCnt)(Module(new LoadUnit)) 393 val storeUnits = Seq.fill(StaCnt)(Module(new StoreUnit)) 394 val stdExeUnits = Seq.fill(StdCnt)(Module(new MemExeUnit(backendParams.memSchdParams.get.issueBlockParams.find(_.StdCnt != 0).get.exuBlockParams.head))) 395 val hybridUnits = Seq.fill(HyuCnt)(Module(new HybridUnit)) // Todo: replace it with HybridUnit 396 val stData = stdExeUnits.map(_.io.out) 397 val exeUnits = loadUnits ++ storeUnits 398 399 // The number of vector load/store units is decoupled with the number of load/store units 400 val vlSplit = Seq.fill(VlduCnt)(Module(new VLSplitImp)) 401 val vsSplit = Seq.fill(VstuCnt)(Module(new VSSplitImp)) 402 val vlMergeBuffer = Module(new VLMergeBufferImp) 403 val vsMergeBuffer = Seq.fill(VstuCnt)(Module(new VSMergeBufferImp)) 404 val vSegmentUnit = Module(new VSegmentUnit) 405 val vfofBuffer = Module(new VfofBuffer) 406 407 // misalign Buffer 408 val loadMisalignBuffer = Module(new LoadMisalignBuffer) 409 val storeMisalignBuffer = Module(new StoreMisalignBuffer) 410 411 val l1_pf_req = Wire(Decoupled(new L1PrefetchReq())) 412 dcache.io.sms_agt_evict_req.ready := false.B 413 val prefetcherOpt: Option[BasePrefecher] = coreParams.prefetcher.map { 414 case _: SMSParams => 415 val sms = Module(new SMSPrefetcher()) 416 sms.io_agt_en := GatedRegNextN(io.ooo_to_mem.csrCtrl.pf_ctrl.l1D_pf_enable_agt, 2, Some(false.B)) 417 sms.io_pht_en := GatedRegNextN(io.ooo_to_mem.csrCtrl.pf_ctrl.l1D_pf_enable_pht, 2, Some(false.B)) 418 sms.io_act_threshold := GatedRegNextN(io.ooo_to_mem.csrCtrl.pf_ctrl.l1D_pf_active_threshold, 2, Some(12.U)) 419 sms.io_act_stride := GatedRegNextN(io.ooo_to_mem.csrCtrl.pf_ctrl.l1D_pf_active_stride, 2, Some(30.U)) 420 sms.io_stride_en := false.B 421 sms.io_dcache_evict <> dcache.io.sms_agt_evict_req 422 sms 423 } 424 prefetcherOpt.foreach{ pf => pf.io.l1_req.ready := false.B } 425 val hartId = p(XSCoreParamsKey).HartId 426 val l1PrefetcherOpt: Option[BasePrefecher] = coreParams.prefetcher.map { 427 case _ => 428 val l1Prefetcher = Module(new L1Prefetcher()) 429 l1Prefetcher.io.enable := Constantin.createRecord(s"enableL1StreamPrefetcher$hartId", initValue = true) 430 l1Prefetcher.pf_ctrl <> dcache.io.pf_ctrl 431 l1Prefetcher.l2PfqBusy := io.l2PfqBusy 432 433 // stride will train on miss or prefetch hit 434 for (i <- 0 until LduCnt) { 435 val source = loadUnits(i).io.prefetch_train_l1 436 l1Prefetcher.stride_train(i).valid := source.valid && source.bits.isFirstIssue && ( 437 source.bits.miss || isFromStride(source.bits.meta_prefetch) 438 ) 439 l1Prefetcher.stride_train(i).bits := source.bits 440 val loadPc = RegNext(io.ooo_to_mem.issueLda(i).bits.uop.pc) // for s1 441 l1Prefetcher.stride_train(i).bits.uop.pc := Mux( 442 loadUnits(i).io.s2_ptr_chasing, 443 RegEnable(loadPc, loadUnits(i).io.s2_prefetch_spec), 444 RegEnable(RegEnable(loadPc, loadUnits(i).io.s1_prefetch_spec), loadUnits(i).io.s2_prefetch_spec) 445 ) 446 } 447 for (i <- 0 until HyuCnt) { 448 val source = hybridUnits(i).io.prefetch_train_l1 449 l1Prefetcher.stride_train.drop(LduCnt)(i).valid := source.valid && source.bits.isFirstIssue && ( 450 source.bits.miss || isFromStride(source.bits.meta_prefetch) 451 ) 452 l1Prefetcher.stride_train.drop(LduCnt)(i).bits := source.bits 453 l1Prefetcher.stride_train.drop(LduCnt)(i).bits.uop.pc := Mux( 454 hybridUnits(i).io.ldu_io.s2_ptr_chasing, 455 RegNext(io.ooo_to_mem.hybridPc(i)), 456 RegNext(RegNext(io.ooo_to_mem.hybridPc(i))) 457 ) 458 } 459 l1Prefetcher 460 } 461 // load prefetch to l1 Dcache 462 l1PrefetcherOpt match { 463 case Some(pf) => l1_pf_req <> Pipeline(in = pf.io.l1_req, depth = 1, pipe = false, name = Some("pf_queue_to_ldu_reg")) 464 case None => 465 l1_pf_req.valid := false.B 466 l1_pf_req.bits := DontCare 467 } 468 val pf_train_on_hit = RegNextN(io.ooo_to_mem.csrCtrl.pf_ctrl.l1D_pf_train_on_hit, 2, Some(true.B)) 469 470 loadUnits.zipWithIndex.map(x => x._1.suggestName("LoadUnit_"+x._2)) 471 storeUnits.zipWithIndex.map(x => x._1.suggestName("StoreUnit_"+x._2)) 472 hybridUnits.zipWithIndex.map(x => x._1.suggestName("HybridUnit_"+x._2)) 473 val atomicsUnit = Module(new AtomicsUnit) 474 475 476 val ldaExeWbReqs = Wire(Vec(LduCnt, Decoupled(new MemExuOutput))) 477 // atomicsUnit will overwrite the source from ldu if it is about to writeback 478 val atomicWritebackOverride = Mux( 479 atomicsUnit.io.out.valid, 480 atomicsUnit.io.out.bits, 481 loadUnits(AtomicWBPort).io.ldout.bits 482 ) 483 ldaExeWbReqs(AtomicWBPort).valid := atomicsUnit.io.out.valid || loadUnits(AtomicWBPort).io.ldout.valid 484 ldaExeWbReqs(AtomicWBPort).bits := atomicWritebackOverride 485 atomicsUnit.io.out.ready := ldaExeWbReqs(AtomicWBPort).ready 486 loadUnits(AtomicWBPort).io.ldout.ready := ldaExeWbReqs(AtomicWBPort).ready 487 488 val st_data_atomics = Seq.tabulate(StdCnt)(i => 489 stData(i).valid && FuType.storeIsAMO(stData(i).bits.uop.fuType) 490 ) 491 492 // misalignBuffer will overwrite the source from ldu if it is about to writeback 493 val misalignWritebackOverride = Mux( 494 loadUnits(MisalignWBPort).io.ldout.valid, 495 loadUnits(MisalignWBPort).io.ldout.bits, 496 loadMisalignBuffer.io.writeBack.bits 497 ) 498 ldaExeWbReqs(MisalignWBPort).valid := loadMisalignBuffer.io.writeBack.valid || loadUnits(MisalignWBPort).io.ldout.valid 499 ldaExeWbReqs(MisalignWBPort).bits := misalignWritebackOverride 500 loadMisalignBuffer.io.writeBack.ready := ldaExeWbReqs(MisalignWBPort).ready && !loadUnits(MisalignWBPort).io.ldout.valid 501 loadMisalignBuffer.io.loadOutValid := loadUnits(MisalignWBPort).io.ldout.valid 502 loadMisalignBuffer.io.loadVecOutValid := loadUnits(MisalignWBPort).io.vecldout.valid 503 loadUnits(MisalignWBPort).io.ldout.ready := ldaExeWbReqs(MisalignWBPort).ready 504 ldaExeWbReqs(MisalignWBPort).bits.isFromLoadUnit := loadUnits(MisalignWBPort).io.ldout.bits.isFromLoadUnit || loadMisalignBuffer.io.writeBack.valid 505 506 // loadUnit will overwrite the source from uncache if it is about to writeback 507 ldaExeWbReqs(UncacheWBPort) <> loadUnits(UncacheWBPort).io.ldout 508 io.mem_to_ooo.writebackLda <> ldaExeWbReqs 509 io.mem_to_ooo.writebackSta <> storeUnits.map(_.io.stout) 510 io.mem_to_ooo.writebackStd.zip(stdExeUnits).foreach {x => 511 x._1.bits := x._2.io.out.bits 512 // AMOs do not need to write back std now. 513 x._1.valid := x._2.io.out.fire && !FuType.storeIsAMO(x._2.io.out.bits.uop.fuType) 514 } 515 io.mem_to_ooo.writebackHyuLda <> hybridUnits.map(_.io.ldout) 516 io.mem_to_ooo.writebackHyuSta <> hybridUnits.map(_.io.stout) 517 io.mem_to_ooo.otherFastWakeup := DontCare 518 io.mem_to_ooo.otherFastWakeup.drop(HyuCnt).take(LduCnt).zip(loadUnits.map(_.io.fast_uop)).foreach{case(a,b)=> a := b} 519 io.mem_to_ooo.otherFastWakeup.take(HyuCnt).zip(hybridUnits.map(_.io.ldu_io.fast_uop)).foreach{case(a,b)=> a:=b} 520 val stOut = io.mem_to_ooo.writebackSta ++ io.mem_to_ooo.writebackHyuSta 521 522 // prefetch to l1 req 523 // Stream's confidence is always 1 524 // (LduCnt + HyuCnt) l1_pf_reqs ? 525 loadUnits.foreach(load_unit => { 526 load_unit.io.prefetch_req.valid <> l1_pf_req.valid 527 load_unit.io.prefetch_req.bits <> l1_pf_req.bits 528 }) 529 530 hybridUnits.foreach(hybrid_unit => { 531 hybrid_unit.io.ldu_io.prefetch_req.valid <> l1_pf_req.valid 532 hybrid_unit.io.ldu_io.prefetch_req.bits <> l1_pf_req.bits 533 }) 534 535 // NOTE: loadUnits(0) has higher bank conflict and miss queue arb priority than loadUnits(1) and loadUnits(2) 536 // when loadUnits(1)/loadUnits(2) stage 0 is busy, hw prefetch will never use that pipeline 537 val LowConfPorts = if (LduCnt == 2) Seq(1) else if (LduCnt == 3) Seq(1, 2) else Seq(0) 538 LowConfPorts.map{case i => loadUnits(i).io.prefetch_req.bits.confidence := 0.U} 539 hybridUnits.foreach(hybrid_unit => { hybrid_unit.io.ldu_io.prefetch_req.bits.confidence := 0.U }) 540 541 val canAcceptHighConfPrefetch = loadUnits.map(_.io.canAcceptHighConfPrefetch) ++ 542 hybridUnits.map(_.io.canAcceptLowConfPrefetch) 543 val canAcceptLowConfPrefetch = loadUnits.map(_.io.canAcceptLowConfPrefetch) ++ 544 hybridUnits.map(_.io.canAcceptLowConfPrefetch) 545 l1_pf_req.ready := (0 until LduCnt + HyuCnt).map{ 546 case i => { 547 if (LowConfPorts.contains(i)) { 548 loadUnits(i).io.canAcceptLowConfPrefetch 549 } else { 550 Mux(l1_pf_req.bits.confidence === 1.U, canAcceptHighConfPrefetch(i), canAcceptLowConfPrefetch(i)) 551 } 552 } 553 }.reduce(_ || _) 554 555 // l1 pf fuzzer interface 556 val DebugEnableL1PFFuzzer = false 557 if (DebugEnableL1PFFuzzer) { 558 // l1 pf req fuzzer 559 val fuzzer = Module(new L1PrefetchFuzzer()) 560 fuzzer.io.vaddr := DontCare 561 fuzzer.io.paddr := DontCare 562 563 // override load_unit prefetch_req 564 loadUnits.foreach(load_unit => { 565 load_unit.io.prefetch_req.valid <> fuzzer.io.req.valid 566 load_unit.io.prefetch_req.bits <> fuzzer.io.req.bits 567 }) 568 569 // override hybrid_unit prefetch_req 570 hybridUnits.foreach(hybrid_unit => { 571 hybrid_unit.io.ldu_io.prefetch_req.valid <> fuzzer.io.req.valid 572 hybrid_unit.io.ldu_io.prefetch_req.bits <> fuzzer.io.req.bits 573 }) 574 575 fuzzer.io.req.ready := l1_pf_req.ready 576 } 577 578 // TODO: fast load wakeup 579 val lsq = Module(new LsqWrapper) 580 val sbuffer = Module(new Sbuffer) 581 // if you wants to stress test dcache store, use FakeSbuffer 582 // val sbuffer = Module(new FakeSbuffer) // out of date now 583 io.mem_to_ooo.stIssuePtr := lsq.io.issuePtrExt 584 585 dcache.io.hartId := io.hartId 586 lsq.io.hartId := io.hartId 587 sbuffer.io.hartId := io.hartId 588 atomicsUnit.io.hartId := io.hartId 589 590 dcache.io.lqEmpty := lsq.io.lqEmpty 591 592 // load/store prefetch to l2 cache 593 prefetcherOpt.foreach(sms_pf => { 594 l1PrefetcherOpt.foreach(l1_pf => { 595 val sms_pf_to_l2 = DelayNWithValid(sms_pf.io.l2_req, 2) 596 val l1_pf_to_l2 = DelayNWithValid(l1_pf.io.l2_req, 2) 597 598 outer.l2_pf_sender_opt.get.out.head._1.addr_valid := sms_pf_to_l2.valid || l1_pf_to_l2.valid 599 outer.l2_pf_sender_opt.get.out.head._1.addr := Mux(l1_pf_to_l2.valid, l1_pf_to_l2.bits.addr, sms_pf_to_l2.bits.addr) 600 outer.l2_pf_sender_opt.get.out.head._1.pf_source := Mux(l1_pf_to_l2.valid, l1_pf_to_l2.bits.source, sms_pf_to_l2.bits.source) 601 outer.l2_pf_sender_opt.get.out.head._1.l2_pf_en := RegNextN(io.ooo_to_mem.csrCtrl.pf_ctrl.l2_pf_enable, 2, Some(true.B)) 602 603 sms_pf.io.enable := RegNextN(io.ooo_to_mem.csrCtrl.pf_ctrl.l1D_pf_enable, 2, Some(false.B)) 604 605 val l2_trace = Wire(new LoadPfDbBundle) 606 l2_trace.paddr := outer.l2_pf_sender_opt.get.out.head._1.addr 607 val table = ChiselDB.createTable(s"L2PrefetchTrace$hartId", new LoadPfDbBundle, basicDB = false) 608 table.log(l2_trace, l1_pf_to_l2.valid, "StreamPrefetchTrace", clock, reset) 609 table.log(l2_trace, !l1_pf_to_l2.valid && sms_pf_to_l2.valid, "L2PrefetchTrace", clock, reset) 610 611 val l1_pf_to_l3 = ValidIODelay(l1_pf.io.l3_req, 4) 612 outer.l3_pf_sender_opt.foreach(_.out.head._1.addr_valid := l1_pf_to_l3.valid) 613 outer.l3_pf_sender_opt.foreach(_.out.head._1.addr := l1_pf_to_l3.bits) 614 outer.l3_pf_sender_opt.foreach(_.out.head._1.l2_pf_en := RegNextN(io.ooo_to_mem.csrCtrl.pf_ctrl.l2_pf_enable, 4, Some(true.B))) 615 616 val l3_trace = Wire(new LoadPfDbBundle) 617 l3_trace.paddr := outer.l3_pf_sender_opt.map(_.out.head._1.addr).getOrElse(0.U) 618 val l3_table = ChiselDB.createTable(s"L3PrefetchTrace$hartId", new LoadPfDbBundle, basicDB = false) 619 l3_table.log(l3_trace, l1_pf_to_l3.valid, "StreamPrefetchTrace", clock, reset) 620 621 XSPerfAccumulate("prefetch_fire_l2", outer.l2_pf_sender_opt.get.out.head._1.addr_valid) 622 XSPerfAccumulate("prefetch_fire_l3", outer.l3_pf_sender_opt.map(_.out.head._1.addr_valid).getOrElse(false.B)) 623 XSPerfAccumulate("l1pf_fire_l2", l1_pf_to_l2.valid) 624 XSPerfAccumulate("sms_fire_l2", !l1_pf_to_l2.valid && sms_pf_to_l2.valid) 625 XSPerfAccumulate("sms_block_by_l1pf", l1_pf_to_l2.valid && sms_pf_to_l2.valid) 626 }) 627 }) 628 629 // ptw 630 val sfence = RegNext(RegNext(io.ooo_to_mem.sfence)) 631 val tlbcsr = RegNext(RegNext(io.ooo_to_mem.tlbCsr)) 632 private val ptw = outer.ptw.module 633 private val ptw_to_l2_buffer = outer.ptw_to_l2_buffer.module 634 private val l1d_to_l2_buffer = outer.l1d_to_l2_buffer.module 635 ptw.io.hartId := io.hartId 636 ptw.io.sfence <> sfence 637 ptw.io.csr.tlb <> tlbcsr 638 ptw.io.csr.distribute_csr <> csrCtrl.distribute_csr 639 640 val perfEventsPTW = if (!coreParams.softPTW) { 641 ptw.getPerfEvents 642 } else { 643 Seq() 644 } 645 646 // dtlb 647 val dtlb_ld_tlb_ld = Module(new TLBNonBlock(LduCnt + HyuCnt + 1, 2, ldtlbParams)) 648 val dtlb_st_tlb_st = Module(new TLBNonBlock(StaCnt, 1, sttlbParams)) 649 val dtlb_prefetch_tlb_prefetch = Module(new TLBNonBlock(2, 2, pftlbParams)) 650 val dtlb_ld = Seq(dtlb_ld_tlb_ld.io) 651 val dtlb_st = Seq(dtlb_st_tlb_st.io) 652 val dtlb_prefetch = Seq(dtlb_prefetch_tlb_prefetch.io) 653 /* tlb vec && constant variable */ 654 val dtlb = dtlb_ld ++ dtlb_st ++ dtlb_prefetch 655 val (dtlb_ld_idx, dtlb_st_idx, dtlb_pf_idx) = (0, 1, 2) 656 val TlbSubSizeVec = Seq(LduCnt + HyuCnt + 1, StaCnt, 2) // (load + hyu + stream pf, store, sms+l2bop) 657 val DTlbSize = TlbSubSizeVec.sum 658 val TlbStartVec = TlbSubSizeVec.scanLeft(0)(_ + _).dropRight(1) 659 val TlbEndVec = TlbSubSizeVec.scanLeft(0)(_ + _).drop(1) 660 661 val ptwio = Wire(new VectorTlbPtwIO(DTlbSize)) 662 val dtlb_reqs = dtlb.map(_.requestor).flatten 663 val dtlb_pmps = dtlb.map(_.pmp).flatten 664 dtlb.map(_.hartId := io.hartId) 665 dtlb.map(_.sfence := sfence) 666 dtlb.map(_.csr := tlbcsr) 667 dtlb.map(_.flushPipe.map(a => a := false.B)) // non-block doesn't need 668 dtlb.map(_.redirect := redirect) 669 if (refillBothTlb) { 670 require(ldtlbParams.outReplace == sttlbParams.outReplace) 671 require(ldtlbParams.outReplace == hytlbParams.outReplace) 672 require(ldtlbParams.outReplace == pftlbParams.outReplace) 673 require(ldtlbParams.outReplace) 674 675 val replace = Module(new TlbReplace(DTlbSize, ldtlbParams)) 676 replace.io.apply_sep(dtlb_ld.map(_.replace) ++ dtlb_st.map(_.replace) ++ dtlb_prefetch.map(_.replace), ptwio.resp.bits.data.s1.entry.tag) 677 } else { 678 // TODO: there will be bugs in TlbReplace when outReplace enable, since the order of Hyu is not right. 679 if (ldtlbParams.outReplace) { 680 val replace_ld = Module(new TlbReplace(LduCnt + 1, ldtlbParams)) 681 replace_ld.io.apply_sep(dtlb_ld.map(_.replace), ptwio.resp.bits.data.s1.entry.tag) 682 } 683 if (hytlbParams.outReplace) { 684 val replace_hy = Module(new TlbReplace(HyuCnt, hytlbParams)) 685 replace_hy.io.apply_sep(dtlb_ld.map(_.replace), ptwio.resp.bits.data.s1.entry.tag) 686 } 687 if (sttlbParams.outReplace) { 688 val replace_st = Module(new TlbReplace(StaCnt, sttlbParams)) 689 replace_st.io.apply_sep(dtlb_st.map(_.replace), ptwio.resp.bits.data.s1.entry.tag) 690 } 691 if (pftlbParams.outReplace) { 692 val replace_pf = Module(new TlbReplace(2, pftlbParams)) 693 replace_pf.io.apply_sep(dtlb_prefetch.map(_.replace), ptwio.resp.bits.data.s1.entry.tag) 694 } 695 } 696 697 val ptw_resp_next = RegEnable(ptwio.resp.bits, ptwio.resp.valid) 698 val ptw_resp_v = RegNext(ptwio.resp.valid && !(sfence.valid && tlbcsr.satp.changed && tlbcsr.vsatp.changed && tlbcsr.hgatp.changed), init = false.B) 699 ptwio.resp.ready := true.B 700 701 val tlbreplay = WireInit(VecInit(Seq.fill(LdExuCnt)(false.B))) 702 val tlbreplay_reg = GatedValidRegNext(tlbreplay) 703 val dtlb_ld0_tlbreplay_reg = GatedValidRegNext(dtlb_ld(0).tlbreplay) 704 705 if (backendParams.debugEn){ dontTouch(tlbreplay) } 706 707 for (i <- 0 until LdExuCnt) { 708 tlbreplay(i) := dtlb_ld(0).ptw.req(i).valid && ptw_resp_next.vector(0) && ptw_resp_v && 709 ptw_resp_next.data.hit(dtlb_ld(0).ptw.req(i).bits.vpn, tlbcsr.satp.asid, tlbcsr.vsatp.asid, tlbcsr.hgatp.vmid, allType = true, ignoreAsid = true) 710 } 711 712 dtlb.flatMap(a => a.ptw.req) 713 .zipWithIndex 714 .foreach{ case (tlb, i) => 715 tlb.ready := ptwio.req(i).ready 716 ptwio.req(i).bits := tlb.bits 717 val vector_hit = if (refillBothTlb) Cat(ptw_resp_next.vector).orR 718 else if (i < TlbEndVec(dtlb_ld_idx)) Cat(ptw_resp_next.vector.slice(TlbStartVec(dtlb_ld_idx), TlbEndVec(dtlb_ld_idx))).orR 719 else if (i < TlbEndVec(dtlb_st_idx)) Cat(ptw_resp_next.vector.slice(TlbStartVec(dtlb_st_idx), TlbEndVec(dtlb_st_idx))).orR 720 else Cat(ptw_resp_next.vector.slice(TlbStartVec(dtlb_pf_idx), TlbEndVec(dtlb_pf_idx))).orR 721 ptwio.req(i).valid := tlb.valid && !(ptw_resp_v && vector_hit && ptw_resp_next.data.hit(tlb.bits.vpn, tlbcsr.satp.asid, tlbcsr.vsatp.asid, tlbcsr.hgatp.vmid, allType = true, ignoreAsid = true)) 722 } 723 dtlb.foreach(_.ptw.resp.bits := ptw_resp_next.data) 724 if (refillBothTlb) { 725 dtlb.foreach(_.ptw.resp.valid := ptw_resp_v && Cat(ptw_resp_next.vector).orR) 726 } else { 727 dtlb_ld.foreach(_.ptw.resp.valid := ptw_resp_v && Cat(ptw_resp_next.vector.slice(TlbStartVec(dtlb_ld_idx), TlbEndVec(dtlb_ld_idx))).orR) 728 dtlb_st.foreach(_.ptw.resp.valid := ptw_resp_v && Cat(ptw_resp_next.vector.slice(TlbStartVec(dtlb_st_idx), TlbEndVec(dtlb_st_idx))).orR) 729 dtlb_prefetch.foreach(_.ptw.resp.valid := ptw_resp_v && Cat(ptw_resp_next.vector.slice(TlbStartVec(dtlb_pf_idx), TlbEndVec(dtlb_pf_idx))).orR) 730 } 731 dtlb_ld.foreach(_.ptw.resp.bits.getGpa := Cat(ptw_resp_next.getGpa.take(LduCnt + HyuCnt + 1)).orR) 732 dtlb_st.foreach(_.ptw.resp.bits.getGpa := Cat(ptw_resp_next.getGpa.slice(LduCnt + HyuCnt + 1, LduCnt + HyuCnt + 1 + StaCnt)).orR) 733 dtlb_prefetch.foreach(_.ptw.resp.bits.getGpa := Cat(ptw_resp_next.getGpa.drop(LduCnt + HyuCnt + 1 + StaCnt)).orR) 734 735 val dtlbRepeater = PTWNewFilter(ldtlbParams.fenceDelay, ptwio, ptw.io.tlb(1), sfence, tlbcsr, l2tlbParams.dfilterSize) 736 val itlbRepeater3 = PTWRepeaterNB(passReady = false, itlbParams.fenceDelay, io.fetch_to_mem.itlb, ptw.io.tlb(0), sfence, tlbcsr) 737 738 lsq.io.debugTopDown.robHeadMissInDTlb := dtlbRepeater.io.rob_head_miss_in_tlb 739 740 // pmp 741 val pmp = Module(new PMP()) 742 pmp.io.distribute_csr <> csrCtrl.distribute_csr 743 744 val pmp_checkers = Seq.fill(DTlbSize)(Module(new PMPChecker(4, leaveHitMux = true))) 745 val pmp_check = pmp_checkers.map(_.io) 746 for ((p,d) <- pmp_check zip dtlb_pmps) { 747 p.apply(tlbcsr.priv.dmode, pmp.io.pmp, pmp.io.pma, d) 748 require(p.req.bits.size.getWidth == d.bits.size.getWidth) 749 } 750 751 for (i <- 0 until LduCnt) { 752 io.debug_ls.debugLsInfo(i) := loadUnits(i).io.debug_ls 753 } 754 for (i <- 0 until HyuCnt) { 755 io.debug_ls.debugLsInfo.drop(LduCnt)(i) := hybridUnits(i).io.ldu_io.debug_ls 756 } 757 for (i <- 0 until StaCnt) { 758 io.debug_ls.debugLsInfo.drop(LduCnt + HyuCnt)(i) := storeUnits(i).io.debug_ls 759 } 760 for (i <- 0 until HyuCnt) { 761 io.debug_ls.debugLsInfo.drop(LduCnt + HyuCnt + StaCnt)(i) := hybridUnits(i).io.stu_io.debug_ls 762 } 763 764 io.mem_to_ooo.lsTopdownInfo := loadUnits.map(_.io.lsTopdownInfo) ++ hybridUnits.map(_.io.ldu_io.lsTopdownInfo) 765 766 // trigger 767 val tdata = RegInit(VecInit(Seq.fill(TriggerNum)(0.U.asTypeOf(new MatchTriggerIO)))) 768 val tEnable = RegInit(VecInit(Seq.fill(TriggerNum)(false.B))) 769 tEnable := csrCtrl.mem_trigger.tEnableVec 770 when(csrCtrl.mem_trigger.tUpdate.valid) { 771 tdata(csrCtrl.mem_trigger.tUpdate.bits.addr) := csrCtrl.mem_trigger.tUpdate.bits.tdata 772 } 773 val triggerCanRaiseBpExp = csrCtrl.mem_trigger.triggerCanRaiseBpExp 774 val debugMode = csrCtrl.mem_trigger.debugMode 775 776 val backendTriggerTimingVec = VecInit(tdata.map(_.timing)) 777 val backendTriggerChainVec = VecInit(tdata.map(_.chain)) 778 779 XSDebug(tEnable.asUInt.orR, "Debug Mode: At least one store trigger is enabled\n") 780 for (j <- 0 until TriggerNum) 781 PrintTriggerInfo(tEnable(j), tdata(j)) 782 783 // The segment instruction is executed atomically. 784 // After the segment instruction directive starts executing, no other instructions should be executed. 785 val vSegmentFlag = RegInit(false.B) 786 787 when(GatedValidRegNext(vSegmentUnit.io.in.fire)) { 788 vSegmentFlag := true.B 789 }.elsewhen(GatedValidRegNext(vSegmentUnit.io.uopwriteback.valid)) { 790 vSegmentFlag := false.B 791 } 792 793 // LoadUnit 794 val correctMissTrain = Constantin.createRecord(s"CorrectMissTrain$hartId", initValue = false) 795 796 for (i <- 0 until LduCnt) { 797 loadUnits(i).io.redirect <> redirect 798 799 // get input form dispatch 800 loadUnits(i).io.ldin <> io.ooo_to_mem.issueLda(i) 801 loadUnits(i).io.feedback_slow <> io.mem_to_ooo.ldaIqFeedback(i).feedbackSlow 802 io.mem_to_ooo.ldaIqFeedback(i).feedbackFast := DontCare 803 loadUnits(i).io.correctMissTrain := correctMissTrain 804 io.mem_to_ooo.ldCancel.drop(HyuCnt)(i) := loadUnits(i).io.ldCancel 805 io.mem_to_ooo.wakeup.drop(HyuCnt)(i) := loadUnits(i).io.wakeup 806 807 // vector 808 if (i < VlduCnt) { 809 loadUnits(i).io.vecldout.ready := false.B 810 } else { 811 loadUnits(i).io.vecldin.valid := false.B 812 loadUnits(i).io.vecldin.bits := DontCare 813 loadUnits(i).io.vecldout.ready := false.B 814 } 815 816 // fast replay 817 loadUnits(i).io.fast_rep_in <> loadUnits(i).io.fast_rep_out 818 819 // SoftPrefetch to frontend (prefetch.i) 820 loadUnits(i).io.ifetchPrefetch <> io.ifetchPrefetch(i) 821 822 // dcache access 823 loadUnits(i).io.dcache <> dcache.io.lsu.load(i) 824 if(i == 0){ 825 vSegmentUnit.io.rdcache := DontCare 826 dcache.io.lsu.load(i).req.valid := loadUnits(i).io.dcache.req.valid || vSegmentUnit.io.rdcache.req.valid 827 dcache.io.lsu.load(i).req.bits := Mux1H(Seq( 828 vSegmentUnit.io.rdcache.req.valid -> vSegmentUnit.io.rdcache.req.bits, 829 loadUnits(i).io.dcache.req.valid -> loadUnits(i).io.dcache.req.bits 830 )) 831 vSegmentUnit.io.rdcache.req.ready := dcache.io.lsu.load(i).req.ready 832 } 833 834 // Dcache requests must also be preempted by the segment. 835 when(vSegmentFlag){ 836 loadUnits(i).io.dcache.req.ready := false.B // Dcache is preempted. 837 838 dcache.io.lsu.load(0).pf_source := vSegmentUnit.io.rdcache.pf_source 839 dcache.io.lsu.load(0).s1_paddr_dup_lsu := vSegmentUnit.io.rdcache.s1_paddr_dup_lsu 840 dcache.io.lsu.load(0).s1_paddr_dup_dcache := vSegmentUnit.io.rdcache.s1_paddr_dup_dcache 841 dcache.io.lsu.load(0).s1_kill := vSegmentUnit.io.rdcache.s1_kill 842 dcache.io.lsu.load(0).s2_kill := vSegmentUnit.io.rdcache.s2_kill 843 dcache.io.lsu.load(0).s0_pc := vSegmentUnit.io.rdcache.s0_pc 844 dcache.io.lsu.load(0).s1_pc := vSegmentUnit.io.rdcache.s1_pc 845 dcache.io.lsu.load(0).s2_pc := vSegmentUnit.io.rdcache.s2_pc 846 dcache.io.lsu.load(0).is128Req := vSegmentUnit.io.rdcache.is128Req 847 }.otherwise { 848 loadUnits(i).io.dcache.req.ready := dcache.io.lsu.load(i).req.ready 849 850 dcache.io.lsu.load(0).pf_source := loadUnits(0).io.dcache.pf_source 851 dcache.io.lsu.load(0).s1_paddr_dup_lsu := loadUnits(0).io.dcache.s1_paddr_dup_lsu 852 dcache.io.lsu.load(0).s1_paddr_dup_dcache := loadUnits(0).io.dcache.s1_paddr_dup_dcache 853 dcache.io.lsu.load(0).s1_kill := loadUnits(0).io.dcache.s1_kill 854 dcache.io.lsu.load(0).s2_kill := loadUnits(0).io.dcache.s2_kill 855 dcache.io.lsu.load(0).s0_pc := loadUnits(0).io.dcache.s0_pc 856 dcache.io.lsu.load(0).s1_pc := loadUnits(0).io.dcache.s1_pc 857 dcache.io.lsu.load(0).s2_pc := loadUnits(0).io.dcache.s2_pc 858 dcache.io.lsu.load(0).is128Req := loadUnits(0).io.dcache.is128Req 859 } 860 861 // forward 862 loadUnits(i).io.lsq.forward <> lsq.io.forward(i) 863 loadUnits(i).io.sbuffer <> sbuffer.io.forward(i) 864 loadUnits(i).io.ubuffer <> uncache.io.forward(i) 865 loadUnits(i).io.tl_d_channel := dcache.io.lsu.forward_D(i) 866 loadUnits(i).io.forward_mshr <> dcache.io.lsu.forward_mshr(i) 867 // ld-ld violation check 868 loadUnits(i).io.lsq.ldld_nuke_query <> lsq.io.ldu.ldld_nuke_query(i) 869 loadUnits(i).io.lsq.stld_nuke_query <> lsq.io.ldu.stld_nuke_query(i) 870 loadUnits(i).io.csrCtrl <> csrCtrl 871 // dcache refill req 872 // loadUnits(i).io.refill <> delayedDcacheRefill 873 // dtlb 874 loadUnits(i).io.tlb <> dtlb_reqs.take(LduCnt)(i) 875 if(i == 0 ){ // port 0 assign to vsegmentUnit 876 val vsegmentDtlbReqValid = vSegmentUnit.io.dtlb.req.valid // segment tlb resquest need to delay 1 cycle 877 dtlb_reqs.take(LduCnt)(i).req.valid := loadUnits(i).io.tlb.req.valid || RegNext(vsegmentDtlbReqValid) 878 vSegmentUnit.io.dtlb.req.ready := dtlb_reqs.take(LduCnt)(i).req.ready 879 dtlb_reqs.take(LduCnt)(i).req.bits := ParallelPriorityMux(Seq( 880 RegNext(vsegmentDtlbReqValid) -> RegEnable(vSegmentUnit.io.dtlb.req.bits, vsegmentDtlbReqValid), 881 loadUnits(i).io.tlb.req.valid -> loadUnits(i).io.tlb.req.bits 882 )) 883 } 884 // pmp 885 loadUnits(i).io.pmp <> pmp_check(i).resp 886 // st-ld violation query 887 val stld_nuke_query = storeUnits.map(_.io.stld_nuke_query) ++ hybridUnits.map(_.io.stu_io.stld_nuke_query) 888 for (s <- 0 until StorePipelineWidth) { 889 loadUnits(i).io.stld_nuke_query(s) := stld_nuke_query(s) 890 } 891 loadUnits(i).io.lq_rep_full <> lsq.io.lq_rep_full 892 // load prefetch train 893 prefetcherOpt.foreach(pf => { 894 // sms will train on all miss load sources 895 val source = loadUnits(i).io.prefetch_train 896 pf.io.ld_in(i).valid := Mux(pf_train_on_hit, 897 source.valid, 898 source.valid && source.bits.isFirstIssue && source.bits.miss 899 ) 900 pf.io.ld_in(i).bits := source.bits 901 val loadPc = RegNext(io.ooo_to_mem.issueLda(i).bits.uop.pc) // for s1 902 pf.io.ld_in(i).bits.uop.pc := Mux( 903 loadUnits(i).io.s2_ptr_chasing, 904 RegEnable(loadPc, loadUnits(i).io.s2_prefetch_spec), 905 RegEnable(RegEnable(loadPc, loadUnits(i).io.s1_prefetch_spec), loadUnits(i).io.s2_prefetch_spec) 906 ) 907 }) 908 l1PrefetcherOpt.foreach(pf => { 909 // stream will train on all load sources 910 val source = loadUnits(i).io.prefetch_train_l1 911 pf.io.ld_in(i).valid := source.valid && source.bits.isFirstIssue 912 pf.io.ld_in(i).bits := source.bits 913 }) 914 915 // load to load fast forward: load(i) prefers data(i) 916 val l2l_fwd_out = loadUnits.map(_.io.l2l_fwd_out) ++ hybridUnits.map(_.io.ldu_io.l2l_fwd_out) 917 val fastPriority = (i until LduCnt + HyuCnt) ++ (0 until i) 918 val fastValidVec = fastPriority.map(j => l2l_fwd_out(j).valid) 919 val fastDataVec = fastPriority.map(j => l2l_fwd_out(j).data) 920 val fastErrorVec = fastPriority.map(j => l2l_fwd_out(j).dly_ld_err) 921 val fastMatchVec = fastPriority.map(j => io.ooo_to_mem.loadFastMatch(i)(j)) 922 loadUnits(i).io.l2l_fwd_in.valid := VecInit(fastValidVec).asUInt.orR 923 loadUnits(i).io.l2l_fwd_in.data := ParallelPriorityMux(fastValidVec, fastDataVec) 924 loadUnits(i).io.l2l_fwd_in.dly_ld_err := ParallelPriorityMux(fastValidVec, fastErrorVec) 925 val fastMatch = ParallelPriorityMux(fastValidVec, fastMatchVec) 926 loadUnits(i).io.ld_fast_match := fastMatch 927 loadUnits(i).io.ld_fast_imm := io.ooo_to_mem.loadFastImm(i) 928 loadUnits(i).io.ld_fast_fuOpType := io.ooo_to_mem.loadFastFuOpType(i) 929 loadUnits(i).io.replay <> lsq.io.replay(i) 930 931 val l2_hint = RegNext(io.l2_hint) 932 933 // L2 Hint for DCache 934 dcache.io.l2_hint <> l2_hint 935 936 loadUnits(i).io.l2_hint <> l2_hint 937 loadUnits(i).io.tlb_hint.id := dtlbRepeater.io.hint.get.req(i).id 938 loadUnits(i).io.tlb_hint.full := dtlbRepeater.io.hint.get.req(i).full || 939 tlbreplay_reg(i) || dtlb_ld0_tlbreplay_reg(i) 940 941 // passdown to lsq (load s2) 942 lsq.io.ldu.ldin(i) <> loadUnits(i).io.lsq.ldin 943 if (i == UncacheWBPort) { 944 lsq.io.ldout(i) <> loadUnits(i).io.lsq.uncache 945 } else { 946 lsq.io.ldout(i).ready := true.B 947 loadUnits(i).io.lsq.uncache.valid := false.B 948 loadUnits(i).io.lsq.uncache.bits := DontCare 949 } 950 lsq.io.ld_raw_data(i) <> loadUnits(i).io.lsq.ld_raw_data 951 lsq.io.ncOut(i) <> loadUnits(i).io.lsq.nc_ldin 952 lsq.io.l2_hint.valid := l2_hint.valid 953 lsq.io.l2_hint.bits.sourceId := l2_hint.bits.sourceId 954 lsq.io.l2_hint.bits.isKeyword := l2_hint.bits.isKeyword 955 956 lsq.io.tlb_hint <> dtlbRepeater.io.hint.get 957 958 // connect misalignBuffer 959 loadMisalignBuffer.io.req(i) <> loadUnits(i).io.misalign_buf 960 961 if (i == MisalignWBPort) { 962 loadUnits(i).io.misalign_ldin <> loadMisalignBuffer.io.splitLoadReq 963 loadUnits(i).io.misalign_ldout <> loadMisalignBuffer.io.splitLoadResp 964 } else { 965 loadUnits(i).io.misalign_ldin.valid := false.B 966 loadUnits(i).io.misalign_ldin.bits := DontCare 967 } 968 969 // alter writeback exception info 970 io.mem_to_ooo.s3_delayed_load_error(i) := loadUnits(i).io.s3_dly_ld_err 971 972 // update mem dependency predictor 973 // io.memPredUpdate(i) := DontCare 974 975 // -------------------------------- 976 // Load Triggers 977 // -------------------------------- 978 loadUnits(i).io.fromCsrTrigger.tdataVec := tdata 979 loadUnits(i).io.fromCsrTrigger.tEnableVec := tEnable 980 loadUnits(i).io.fromCsrTrigger.triggerCanRaiseBpExp := triggerCanRaiseBpExp 981 loadUnits(i).io.fromCsrTrigger.debugMode := debugMode 982 } 983 984 for (i <- 0 until HyuCnt) { 985 hybridUnits(i).io.redirect <> redirect 986 987 // get input from dispatch 988 hybridUnits(i).io.lsin <> io.ooo_to_mem.issueHya(i) 989 hybridUnits(i).io.feedback_slow <> io.mem_to_ooo.hyuIqFeedback(i).feedbackSlow 990 hybridUnits(i).io.feedback_fast <> io.mem_to_ooo.hyuIqFeedback(i).feedbackFast 991 hybridUnits(i).io.correctMissTrain := correctMissTrain 992 io.mem_to_ooo.ldCancel.take(HyuCnt)(i) := hybridUnits(i).io.ldu_io.ldCancel 993 io.mem_to_ooo.wakeup.take(HyuCnt)(i) := hybridUnits(i).io.ldu_io.wakeup 994 995 // ------------------------------------ 996 // Load Port 997 // ------------------------------------ 998 // fast replay 999 hybridUnits(i).io.ldu_io.fast_rep_in <> hybridUnits(i).io.ldu_io.fast_rep_out 1000 1001 // get input from dispatch 1002 hybridUnits(i).io.ldu_io.dcache <> dcache.io.lsu.load(LduCnt + i) 1003 hybridUnits(i).io.stu_io.dcache <> dcache.io.lsu.sta(StaCnt + i) 1004 1005 // dcache access 1006 hybridUnits(i).io.ldu_io.lsq.forward <> lsq.io.forward(LduCnt + i) 1007 // forward 1008 hybridUnits(i).io.ldu_io.sbuffer <> sbuffer.io.forward(LduCnt + i) 1009 hybridUnits(i).io.ldu_io.ubuffer <> uncache.io.forward(LduCnt + i) 1010 // hybridUnits(i).io.ldu_io.vec_forward <> vsFlowQueue.io.forward(LduCnt + i) 1011 hybridUnits(i).io.ldu_io.vec_forward := DontCare 1012 hybridUnits(i).io.ldu_io.tl_d_channel := dcache.io.lsu.forward_D(LduCnt + i) 1013 hybridUnits(i).io.ldu_io.forward_mshr <> dcache.io.lsu.forward_mshr(LduCnt + i) 1014 // ld-ld violation check 1015 hybridUnits(i).io.ldu_io.lsq.ldld_nuke_query <> lsq.io.ldu.ldld_nuke_query(LduCnt + i) 1016 hybridUnits(i).io.ldu_io.lsq.stld_nuke_query <> lsq.io.ldu.stld_nuke_query(LduCnt + i) 1017 hybridUnits(i).io.csrCtrl <> csrCtrl 1018 // dcache refill req 1019 hybridUnits(i).io.ldu_io.tlb_hint.id := dtlbRepeater.io.hint.get.req(LduCnt + i).id 1020 hybridUnits(i).io.ldu_io.tlb_hint.full := dtlbRepeater.io.hint.get.req(LduCnt + i).full || 1021 tlbreplay_reg(LduCnt + i) || dtlb_ld0_tlbreplay_reg(LduCnt + i) 1022 1023 // dtlb 1024 hybridUnits(i).io.tlb <> dtlb_ld.head.requestor(LduCnt + i) 1025 // pmp 1026 hybridUnits(i).io.pmp <> pmp_check.drop(LduCnt)(i).resp 1027 // st-ld violation query 1028 val stld_nuke_query = VecInit(storeUnits.map(_.io.stld_nuke_query) ++ hybridUnits.map(_.io.stu_io.stld_nuke_query)) 1029 hybridUnits(i).io.ldu_io.stld_nuke_query := stld_nuke_query 1030 hybridUnits(i).io.ldu_io.lq_rep_full <> lsq.io.lq_rep_full 1031 // load prefetch train 1032 prefetcherOpt.foreach(pf => { 1033 val source = hybridUnits(i).io.prefetch_train 1034 pf.io.ld_in(LduCnt + i).valid := Mux(pf_train_on_hit, 1035 source.valid, 1036 source.valid && source.bits.isFirstIssue && source.bits.miss 1037 ) 1038 pf.io.ld_in(LduCnt + i).bits := source.bits 1039 pf.io.ld_in(LduCnt + i).bits.uop.pc := Mux(hybridUnits(i).io.ldu_io.s2_ptr_chasing, io.ooo_to_mem.hybridPc(i), RegNext(io.ooo_to_mem.hybridPc(i))) 1040 }) 1041 l1PrefetcherOpt.foreach(pf => { 1042 // stream will train on all load sources 1043 val source = hybridUnits(i).io.prefetch_train_l1 1044 pf.io.ld_in(LduCnt + i).valid := source.valid && source.bits.isFirstIssue && 1045 FuType.isLoad(source.bits.uop.fuType) 1046 pf.io.ld_in(LduCnt + i).bits := source.bits 1047 pf.io.st_in(StaCnt + i).valid := false.B 1048 pf.io.st_in(StaCnt + i).bits := DontCare 1049 }) 1050 prefetcherOpt.foreach(pf => { 1051 val source = hybridUnits(i).io.prefetch_train 1052 pf.io.st_in(StaCnt + i).valid := Mux(pf_train_on_hit, 1053 source.valid, 1054 source.valid && source.bits.isFirstIssue && source.bits.miss 1055 ) && FuType.isStore(source.bits.uop.fuType) 1056 pf.io.st_in(StaCnt + i).bits := source.bits 1057 pf.io.st_in(StaCnt + i).bits.uop.pc := RegNext(io.ooo_to_mem.hybridPc(i)) 1058 }) 1059 1060 // load to load fast forward: load(i) prefers data(i) 1061 val l2l_fwd_out = loadUnits.map(_.io.l2l_fwd_out) ++ hybridUnits.map(_.io.ldu_io.l2l_fwd_out) 1062 val fastPriority = (LduCnt + i until LduCnt + HyuCnt) ++ (0 until LduCnt + i) 1063 val fastValidVec = fastPriority.map(j => l2l_fwd_out(j).valid) 1064 val fastDataVec = fastPriority.map(j => l2l_fwd_out(j).data) 1065 val fastErrorVec = fastPriority.map(j => l2l_fwd_out(j).dly_ld_err) 1066 val fastMatchVec = fastPriority.map(j => io.ooo_to_mem.loadFastMatch(LduCnt + i)(j)) 1067 hybridUnits(i).io.ldu_io.l2l_fwd_in.valid := VecInit(fastValidVec).asUInt.orR 1068 hybridUnits(i).io.ldu_io.l2l_fwd_in.data := ParallelPriorityMux(fastValidVec, fastDataVec) 1069 hybridUnits(i).io.ldu_io.l2l_fwd_in.dly_ld_err := ParallelPriorityMux(fastValidVec, fastErrorVec) 1070 val fastMatch = ParallelPriorityMux(fastValidVec, fastMatchVec) 1071 hybridUnits(i).io.ldu_io.ld_fast_match := fastMatch 1072 hybridUnits(i).io.ldu_io.ld_fast_imm := io.ooo_to_mem.loadFastImm(LduCnt + i) 1073 hybridUnits(i).io.ldu_io.ld_fast_fuOpType := io.ooo_to_mem.loadFastFuOpType(LduCnt + i) 1074 hybridUnits(i).io.ldu_io.replay <> lsq.io.replay(LduCnt + i) 1075 hybridUnits(i).io.ldu_io.l2_hint <> io.l2_hint 1076 1077 // uncache 1078 lsq.io.ldout.drop(LduCnt)(i) <> hybridUnits(i).io.ldu_io.lsq.uncache 1079 lsq.io.ld_raw_data.drop(LduCnt)(i) <> hybridUnits(i).io.ldu_io.lsq.ld_raw_data 1080 1081 1082 // passdown to lsq (load s2) 1083 hybridUnits(i).io.ldu_io.lsq.nc_ldin.valid := false.B 1084 hybridUnits(i).io.ldu_io.lsq.nc_ldin.bits := DontCare 1085 lsq.io.ldu.ldin(LduCnt + i) <> hybridUnits(i).io.ldu_io.lsq.ldin 1086 // Lsq to sta unit 1087 lsq.io.sta.storeMaskIn(StaCnt + i) <> hybridUnits(i).io.stu_io.st_mask_out 1088 1089 // Lsq to std unit's rs 1090 lsq.io.std.storeDataIn(StaCnt + i) := stData(StaCnt + i) 1091 lsq.io.std.storeDataIn(StaCnt + i).valid := stData(StaCnt + i).valid && !st_data_atomics(StaCnt + i) 1092 // prefetch 1093 hybridUnits(i).io.stu_io.prefetch_req <> sbuffer.io.store_prefetch(StaCnt + i) 1094 1095 io.mem_to_ooo.s3_delayed_load_error(LduCnt + i) := hybridUnits(i).io.ldu_io.s3_dly_ld_err 1096 1097 // ------------------------------------ 1098 // Store Port 1099 // ------------------------------------ 1100 hybridUnits(i).io.stu_io.lsq <> lsq.io.sta.storeAddrIn.takeRight(HyuCnt)(i) 1101 hybridUnits(i).io.stu_io.lsq_replenish <> lsq.io.sta.storeAddrInRe.takeRight(HyuCnt)(i) 1102 1103 lsq.io.sta.storeMaskIn.takeRight(HyuCnt)(i) <> hybridUnits(i).io.stu_io.st_mask_out 1104 io.mem_to_ooo.stIn.takeRight(HyuCnt)(i).valid := hybridUnits(i).io.stu_io.issue.valid 1105 io.mem_to_ooo.stIn.takeRight(HyuCnt)(i).bits := hybridUnits(i).io.stu_io.issue.bits 1106 1107 // ------------------------------------ 1108 // Vector Store Port 1109 // ------------------------------------ 1110 hybridUnits(i).io.vec_stu_io.isFirstIssue := true.B 1111 1112 // ------------------------- 1113 // Store Triggers 1114 // ------------------------- 1115 hybridUnits(i).io.fromCsrTrigger.tdataVec := tdata 1116 hybridUnits(i).io.fromCsrTrigger.tEnableVec := tEnable 1117 hybridUnits(i).io.fromCsrTrigger.triggerCanRaiseBpExp := triggerCanRaiseBpExp 1118 hybridUnits(i).io.fromCsrTrigger.debugMode := debugMode 1119 } 1120 1121 // misalignBuffer 1122 loadMisalignBuffer.io.redirect <> redirect 1123 loadMisalignBuffer.io.rob.lcommit := io.ooo_to_mem.lsqio.lcommit 1124 loadMisalignBuffer.io.rob.scommit := io.ooo_to_mem.lsqio.scommit 1125 loadMisalignBuffer.io.rob.pendingMMIOld := io.ooo_to_mem.lsqio.pendingMMIOld 1126 loadMisalignBuffer.io.rob.pendingld := io.ooo_to_mem.lsqio.pendingld 1127 loadMisalignBuffer.io.rob.pendingst := io.ooo_to_mem.lsqio.pendingst 1128 loadMisalignBuffer.io.rob.pendingVst := io.ooo_to_mem.lsqio.pendingVst 1129 loadMisalignBuffer.io.rob.commit := io.ooo_to_mem.lsqio.commit 1130 loadMisalignBuffer.io.rob.pendingPtr := io.ooo_to_mem.lsqio.pendingPtr 1131 loadMisalignBuffer.io.rob.pendingPtrNext := io.ooo_to_mem.lsqio.pendingPtrNext 1132 1133 lsq.io.loadMisalignFull := loadMisalignBuffer.io.loadMisalignFull 1134 1135 storeMisalignBuffer.io.redirect <> redirect 1136 storeMisalignBuffer.io.rob.lcommit := io.ooo_to_mem.lsqio.lcommit 1137 storeMisalignBuffer.io.rob.scommit := io.ooo_to_mem.lsqio.scommit 1138 storeMisalignBuffer.io.rob.pendingMMIOld := io.ooo_to_mem.lsqio.pendingMMIOld 1139 storeMisalignBuffer.io.rob.pendingld := io.ooo_to_mem.lsqio.pendingld 1140 storeMisalignBuffer.io.rob.pendingst := io.ooo_to_mem.lsqio.pendingst 1141 storeMisalignBuffer.io.rob.pendingVst := io.ooo_to_mem.lsqio.pendingVst 1142 storeMisalignBuffer.io.rob.commit := io.ooo_to_mem.lsqio.commit 1143 storeMisalignBuffer.io.rob.pendingPtr := io.ooo_to_mem.lsqio.pendingPtr 1144 storeMisalignBuffer.io.rob.pendingPtrNext := io.ooo_to_mem.lsqio.pendingPtrNext 1145 1146 lsq.io.maControl <> storeMisalignBuffer.io.sqControl 1147 1148 lsq.io.cmoOpReq <> dcache.io.cmoOpReq 1149 lsq.io.cmoOpResp <> dcache.io.cmoOpResp 1150 1151 // Prefetcher 1152 val StreamDTLBPortIndex = TlbStartVec(dtlb_ld_idx) + LduCnt + HyuCnt 1153 val PrefetcherDTLBPortIndex = TlbStartVec(dtlb_pf_idx) 1154 val L2toL1DLBPortIndex = TlbStartVec(dtlb_pf_idx) + 1 1155 prefetcherOpt match { 1156 case Some(pf) => 1157 dtlb_reqs(PrefetcherDTLBPortIndex) <> pf.io.tlb_req 1158 pf.io.pmp_resp := pmp_check(PrefetcherDTLBPortIndex).resp 1159 case None => 1160 dtlb_reqs(PrefetcherDTLBPortIndex) := DontCare 1161 dtlb_reqs(PrefetcherDTLBPortIndex).req.valid := false.B 1162 dtlb_reqs(PrefetcherDTLBPortIndex).resp.ready := true.B 1163 } 1164 l1PrefetcherOpt match { 1165 case Some(pf) => 1166 dtlb_reqs(StreamDTLBPortIndex) <> pf.io.tlb_req 1167 pf.io.pmp_resp := pmp_check(StreamDTLBPortIndex).resp 1168 case None => 1169 dtlb_reqs(StreamDTLBPortIndex) := DontCare 1170 dtlb_reqs(StreamDTLBPortIndex).req.valid := false.B 1171 dtlb_reqs(StreamDTLBPortIndex).resp.ready := true.B 1172 } 1173 dtlb_reqs(L2toL1DLBPortIndex) <> io.l2_tlb_req 1174 dtlb_reqs(L2toL1DLBPortIndex).resp.ready := true.B 1175 io.l2_pmp_resp := pmp_check(L2toL1DLBPortIndex).resp 1176 1177 // StoreUnit 1178 for (i <- 0 until StdCnt) { 1179 stdExeUnits(i).io.flush <> redirect 1180 stdExeUnits(i).io.in.valid := io.ooo_to_mem.issueStd(i).valid 1181 io.ooo_to_mem.issueStd(i).ready := stdExeUnits(i).io.in.ready 1182 stdExeUnits(i).io.in.bits := io.ooo_to_mem.issueStd(i).bits 1183 } 1184 1185 for (i <- 0 until StaCnt) { 1186 val stu = storeUnits(i) 1187 1188 stu.io.redirect <> redirect 1189 stu.io.csrCtrl <> csrCtrl 1190 stu.io.dcache <> dcache.io.lsu.sta(i) 1191 stu.io.feedback_slow <> io.mem_to_ooo.staIqFeedback(i).feedbackSlow 1192 stu.io.stin <> io.ooo_to_mem.issueSta(i) 1193 stu.io.lsq <> lsq.io.sta.storeAddrIn(i) 1194 stu.io.lsq_replenish <> lsq.io.sta.storeAddrInRe(i) 1195 // dtlb 1196 stu.io.tlb <> dtlb_st.head.requestor(i) 1197 stu.io.pmp <> pmp_check(LduCnt + HyuCnt + 1 + i).resp 1198 1199 // ------------------------- 1200 // Store Triggers 1201 // ------------------------- 1202 stu.io.fromCsrTrigger.tdataVec := tdata 1203 stu.io.fromCsrTrigger.tEnableVec := tEnable 1204 stu.io.fromCsrTrigger.triggerCanRaiseBpExp := triggerCanRaiseBpExp 1205 stu.io.fromCsrTrigger.debugMode := debugMode 1206 1207 // prefetch 1208 stu.io.prefetch_req <> sbuffer.io.store_prefetch(i) 1209 1210 // store unit does not need fast feedback 1211 io.mem_to_ooo.staIqFeedback(i).feedbackFast := DontCare 1212 1213 // Lsq to sta unit 1214 lsq.io.sta.storeMaskIn(i) <> stu.io.st_mask_out 1215 1216 // connect misalignBuffer 1217 storeMisalignBuffer.io.req(i) <> stu.io.misalign_buf 1218 1219 if (i == 0) { 1220 stu.io.misalign_stin <> storeMisalignBuffer.io.splitStoreReq 1221 stu.io.misalign_stout <> storeMisalignBuffer.io.splitStoreResp 1222 } else { 1223 stu.io.misalign_stin.valid := false.B 1224 stu.io.misalign_stin.bits := DontCare 1225 } 1226 1227 // Lsq to std unit's rs 1228 if (i < VstuCnt){ 1229 when (vsSplit(i).io.vstd.get.valid) { 1230 lsq.io.std.storeDataIn(i).valid := true.B 1231 lsq.io.std.storeDataIn(i).bits := vsSplit(i).io.vstd.get.bits 1232 stData(i).ready := false.B 1233 }.otherwise { 1234 lsq.io.std.storeDataIn(i).valid := stData(i).valid && !st_data_atomics(i) 1235 lsq.io.std.storeDataIn(i).bits.uop := stData(i).bits.uop 1236 lsq.io.std.storeDataIn(i).bits.data := stData(i).bits.data 1237 lsq.io.std.storeDataIn(i).bits.mask.map(_ := 0.U) 1238 lsq.io.std.storeDataIn(i).bits.vdIdx.map(_ := 0.U) 1239 lsq.io.std.storeDataIn(i).bits.vdIdxInField.map(_ := 0.U) 1240 stData(i).ready := true.B 1241 } 1242 } else { 1243 lsq.io.std.storeDataIn(i).valid := stData(i).valid && !st_data_atomics(i) 1244 lsq.io.std.storeDataIn(i).bits.uop := stData(i).bits.uop 1245 lsq.io.std.storeDataIn(i).bits.data := stData(i).bits.data 1246 lsq.io.std.storeDataIn(i).bits.mask.map(_ := 0.U) 1247 lsq.io.std.storeDataIn(i).bits.vdIdx.map(_ := 0.U) 1248 lsq.io.std.storeDataIn(i).bits.vdIdxInField.map(_ := 0.U) 1249 stData(i).ready := true.B 1250 } 1251 lsq.io.std.storeDataIn.map(_.bits.debug := 0.U.asTypeOf(new DebugBundle)) 1252 lsq.io.std.storeDataIn.foreach(_.bits.isFromLoadUnit := DontCare) 1253 1254 1255 // store prefetch train 1256 l1PrefetcherOpt.foreach(pf => { 1257 // stream will train on all load sources 1258 pf.io.st_in(i).valid := false.B 1259 pf.io.st_in(i).bits := DontCare 1260 }) 1261 1262 prefetcherOpt.foreach(pf => { 1263 pf.io.st_in(i).valid := Mux(pf_train_on_hit, 1264 stu.io.prefetch_train.valid, 1265 stu.io.prefetch_train.valid && stu.io.prefetch_train.bits.isFirstIssue && ( 1266 stu.io.prefetch_train.bits.miss 1267 ) 1268 ) 1269 pf.io.st_in(i).bits := stu.io.prefetch_train.bits 1270 pf.io.st_in(i).bits.uop.pc := RegEnable(RegEnable(io.ooo_to_mem.storePc(i), stu.io.s1_prefetch_spec), stu.io.s2_prefetch_spec) 1271 }) 1272 1273 // 1. sync issue info to store set LFST 1274 // 2. when store issue, broadcast issued sqPtr to wake up the following insts 1275 // io.stIn(i).valid := io.issue(exuParameters.LduCnt + i).valid 1276 // io.stIn(i).bits := io.issue(exuParameters.LduCnt + i).bits 1277 io.mem_to_ooo.stIn(i).valid := stu.io.issue.valid 1278 io.mem_to_ooo.stIn(i).bits := stu.io.issue.bits 1279 1280 stu.io.stout.ready := true.B 1281 1282 // vector 1283 if (i < VstuCnt) { 1284 stu.io.vecstin <> vsSplit(i).io.out 1285 // vsFlowQueue.io.pipeFeedback(i) <> stu.io.vec_feedback_slow // need connect 1286 } else { 1287 stu.io.vecstin.valid := false.B 1288 stu.io.vecstin.bits := DontCare 1289 stu.io.vecstout.ready := false.B 1290 } 1291 stu.io.vec_isFirstIssue := true.B // TODO 1292 } 1293 1294 // mmio store writeback will use store writeback port 0 1295 val mmioStout = WireInit(0.U.asTypeOf(lsq.io.mmioStout)) 1296 NewPipelineConnect( 1297 lsq.io.mmioStout, mmioStout, mmioStout.fire, 1298 false.B, 1299 Option("mmioStOutConnect") 1300 ) 1301 mmioStout.ready := false.B 1302 when (mmioStout.valid && !storeUnits(0).io.stout.valid) { 1303 stOut(0).valid := true.B 1304 stOut(0).bits := mmioStout.bits 1305 mmioStout.ready := true.B 1306 } 1307 1308 // vec mmio writeback 1309 lsq.io.vecmmioStout.ready := false.B 1310 1311 // miss align buffer will overwrite stOut(0) 1312 val storeMisalignCanWriteBack = !mmioStout.valid && !storeUnits(0).io.stout.valid && !storeUnits(0).io.vecstout.valid 1313 storeMisalignBuffer.io.writeBack.ready := storeMisalignCanWriteBack 1314 storeMisalignBuffer.io.storeOutValid := storeUnits(0).io.stout.valid 1315 storeMisalignBuffer.io.storeVecOutValid := storeUnits(0).io.vecstout.valid 1316 when (storeMisalignBuffer.io.writeBack.valid && storeMisalignCanWriteBack) { 1317 stOut(0).valid := true.B 1318 stOut(0).bits := storeMisalignBuffer.io.writeBack.bits 1319 } 1320 1321 // Uncache 1322 uncache.io.enableOutstanding := io.ooo_to_mem.csrCtrl.uncache_write_outstanding_enable 1323 uncache.io.hartId := io.hartId 1324 lsq.io.uncacheOutstanding := io.ooo_to_mem.csrCtrl.uncache_write_outstanding_enable 1325 1326 // Lsq 1327 io.mem_to_ooo.lsqio.mmio := lsq.io.rob.mmio 1328 io.mem_to_ooo.lsqio.uop := lsq.io.rob.uop 1329 lsq.io.rob.lcommit := io.ooo_to_mem.lsqio.lcommit 1330 lsq.io.rob.scommit := io.ooo_to_mem.lsqio.scommit 1331 lsq.io.rob.pendingMMIOld := io.ooo_to_mem.lsqio.pendingMMIOld 1332 lsq.io.rob.pendingld := io.ooo_to_mem.lsqio.pendingld 1333 lsq.io.rob.pendingst := io.ooo_to_mem.lsqio.pendingst 1334 lsq.io.rob.pendingVst := io.ooo_to_mem.lsqio.pendingVst 1335 lsq.io.rob.commit := io.ooo_to_mem.lsqio.commit 1336 lsq.io.rob.pendingPtr := io.ooo_to_mem.lsqio.pendingPtr 1337 lsq.io.rob.pendingPtrNext := io.ooo_to_mem.lsqio.pendingPtrNext 1338 1339 // lsq.io.rob <> io.lsqio.rob 1340 lsq.io.enq <> io.ooo_to_mem.enqLsq 1341 lsq.io.brqRedirect <> redirect 1342 1343 // violation rollback 1344 def selectOldestRedirect(xs: Seq[Valid[Redirect]]): Vec[Bool] = { 1345 val compareVec = (0 until xs.length).map(i => (0 until i).map(j => isAfter(xs(j).bits.robIdx, xs(i).bits.robIdx))) 1346 val resultOnehot = VecInit((0 until xs.length).map(i => Cat((0 until xs.length).map(j => 1347 (if (j < i) !xs(j).valid || compareVec(i)(j) 1348 else if (j == i) xs(i).valid 1349 else !xs(j).valid || !compareVec(j)(i)) 1350 )).andR)) 1351 resultOnehot 1352 } 1353 val allRedirect = loadUnits.map(_.io.rollback) ++ hybridUnits.map(_.io.ldu_io.rollback) ++ lsq.io.nack_rollback ++ lsq.io.nuke_rollback 1354 val oldestOneHot = selectOldestRedirect(allRedirect) 1355 val oldestRedirect = WireDefault(Mux1H(oldestOneHot, allRedirect)) 1356 // memory replay would not cause IAF/IPF/IGPF 1357 oldestRedirect.bits.cfiUpdate.backendIAF := false.B 1358 oldestRedirect.bits.cfiUpdate.backendIPF := false.B 1359 oldestRedirect.bits.cfiUpdate.backendIGPF := false.B 1360 io.mem_to_ooo.memoryViolation := oldestRedirect 1361 io.mem_to_ooo.lsqio.lqCanAccept := lsq.io.lqCanAccept 1362 io.mem_to_ooo.lsqio.sqCanAccept := lsq.io.sqCanAccept 1363 1364 // lsq.io.uncache <> uncache.io.lsq 1365 val s_idle :: s_scalar_uncache :: s_vector_uncache :: Nil = Enum(3) 1366 val uncacheState = RegInit(s_idle) 1367 val uncacheReq = Wire(Decoupled(new UncacheWordReq)) 1368 val uncacheIdResp = uncache.io.lsq.idResp 1369 val uncacheResp = Wire(Decoupled(new UncacheWordResp)) 1370 1371 uncacheReq.bits := DontCare 1372 uncacheReq.valid := false.B 1373 uncacheReq.ready := false.B 1374 uncacheResp.bits := DontCare 1375 uncacheResp.valid := false.B 1376 uncacheResp.ready := false.B 1377 lsq.io.uncache.req.ready := false.B 1378 lsq.io.uncache.idResp.valid := false.B 1379 lsq.io.uncache.idResp.bits := DontCare 1380 lsq.io.uncache.resp.valid := false.B 1381 lsq.io.uncache.resp.bits := DontCare 1382 1383 switch (uncacheState) { 1384 is (s_idle) { 1385 when (uncacheReq.fire) { 1386 when (lsq.io.uncache.req.valid) { 1387 when (!lsq.io.uncache.req.bits.nc || !io.ooo_to_mem.csrCtrl.uncache_write_outstanding_enable) { 1388 uncacheState := s_scalar_uncache 1389 } 1390 }.otherwise { 1391 // val isStore = vsFlowQueue.io.uncache.req.bits.cmd === MemoryOpConstants.M_XWR 1392 when (!io.ooo_to_mem.csrCtrl.uncache_write_outstanding_enable) { 1393 uncacheState := s_vector_uncache 1394 } 1395 } 1396 } 1397 } 1398 1399 is (s_scalar_uncache) { 1400 when (uncacheResp.fire) { 1401 uncacheState := s_idle 1402 } 1403 } 1404 1405 is (s_vector_uncache) { 1406 when (uncacheResp.fire) { 1407 uncacheState := s_idle 1408 } 1409 } 1410 } 1411 1412 when (lsq.io.uncache.req.valid) { 1413 uncacheReq <> lsq.io.uncache.req 1414 } 1415 when (io.ooo_to_mem.csrCtrl.uncache_write_outstanding_enable) { 1416 lsq.io.uncache.resp <> uncacheResp 1417 lsq.io.uncache.idResp <> uncacheIdResp 1418 }.otherwise { 1419 when (uncacheState === s_scalar_uncache) { 1420 lsq.io.uncache.resp <> uncacheResp 1421 lsq.io.uncache.idResp <> uncacheIdResp 1422 } 1423 } 1424 // delay dcache refill for 1 cycle for better timing 1425 AddPipelineReg(uncacheReq, uncache.io.lsq.req, false.B) 1426 AddPipelineReg(uncache.io.lsq.resp, uncacheResp, false.B) 1427 1428 //lsq.io.refill := delayedDcacheRefill 1429 lsq.io.release := dcache.io.lsu.release 1430 lsq.io.lqCancelCnt <> io.mem_to_ooo.lqCancelCnt 1431 lsq.io.sqCancelCnt <> io.mem_to_ooo.sqCancelCnt 1432 lsq.io.lqDeq <> io.mem_to_ooo.lqDeq 1433 lsq.io.sqDeq <> io.mem_to_ooo.sqDeq 1434 // Todo: assign these 1435 io.mem_to_ooo.sqDeqPtr := lsq.io.sqDeqPtr 1436 io.mem_to_ooo.lqDeqPtr := lsq.io.lqDeqPtr 1437 lsq.io.tl_d_channel <> dcache.io.lsu.tl_d_channel 1438 1439 // LSQ to store buffer 1440 lsq.io.sbuffer <> sbuffer.io.in 1441 sbuffer.io.in(0).valid := lsq.io.sbuffer(0).valid || vSegmentUnit.io.sbuffer.valid 1442 sbuffer.io.in(0).bits := Mux1H(Seq( 1443 vSegmentUnit.io.sbuffer.valid -> vSegmentUnit.io.sbuffer.bits, 1444 lsq.io.sbuffer(0).valid -> lsq.io.sbuffer(0).bits 1445 )) 1446 vSegmentUnit.io.sbuffer.ready := sbuffer.io.in(0).ready 1447 lsq.io.sqEmpty <> sbuffer.io.sqempty 1448 dcache.io.force_write := lsq.io.force_write 1449 1450 // Initialize when unenabled difftest. 1451 sbuffer.io.vecDifftestInfo := DontCare 1452 lsq.io.sbufferVecDifftestInfo := DontCare 1453 vSegmentUnit.io.vecDifftestInfo := DontCare 1454 if (env.EnableDifftest) { 1455 sbuffer.io.vecDifftestInfo .zipWithIndex.map{ case (sbufferPort, index) => 1456 if (index == 0) { 1457 val vSegmentDifftestValid = vSegmentUnit.io.vecDifftestInfo.valid 1458 sbufferPort.valid := Mux(vSegmentDifftestValid, vSegmentUnit.io.vecDifftestInfo.valid, lsq.io.sbufferVecDifftestInfo(0).valid) 1459 sbufferPort.bits := Mux(vSegmentDifftestValid, vSegmentUnit.io.vecDifftestInfo.bits, lsq.io.sbufferVecDifftestInfo(0).bits) 1460 1461 vSegmentUnit.io.vecDifftestInfo.ready := sbufferPort.ready 1462 lsq.io.sbufferVecDifftestInfo(0).ready := sbufferPort.ready 1463 } else { 1464 sbufferPort <> lsq.io.sbufferVecDifftestInfo(index) 1465 } 1466 } 1467 } 1468 1469 // lsq.io.vecStoreRetire <> vsFlowQueue.io.sqRelease 1470 // lsq.io.vecWriteback.valid := vlWrapper.io.uopWriteback.fire && 1471 // vlWrapper.io.uopWriteback.bits.uop.vpu.lastUop 1472 // lsq.io.vecWriteback.bits := vlWrapper.io.uopWriteback.bits 1473 1474 // vector 1475 val vLoadCanAccept = (0 until VlduCnt).map(i => 1476 vlSplit(i).io.in.ready && VlduType.isVecLd(io.ooo_to_mem.issueVldu(i).bits.uop.fuOpType) 1477 ) 1478 val vStoreCanAccept = (0 until VstuCnt).map(i => 1479 vsSplit(i).io.in.ready && VstuType.isVecSt(io.ooo_to_mem.issueVldu(i).bits.uop.fuOpType) 1480 ) 1481 val isSegment = io.ooo_to_mem.issueVldu.head.valid && isVsegls(io.ooo_to_mem.issueVldu.head.bits.uop.fuType) 1482 val isFixVlUop = io.ooo_to_mem.issueVldu.map{x => 1483 x.bits.uop.vpu.isVleff && x.bits.uop.vpu.lastUop && x.valid 1484 } 1485 1486 // init port 1487 /** 1488 * TODO: splited vsMergebuffer maybe remove, if one RS can accept two feedback, or don't need RS replay uop 1489 * for now: 1490 * RS0 -> VsSplit0 -> stu0 -> vsMergebuffer0 -> feedback -> RS0 1491 * RS1 -> VsSplit1 -> stu1 -> vsMergebuffer1 -> feedback -> RS1 1492 * 1493 * vector load don't need feedback 1494 * 1495 * RS0 -> VlSplit0 -> ldu0 -> | 1496 * RS1 -> VlSplit1 -> ldu1 -> | -> vlMergebuffer 1497 * replayIO -> ldu3 -> | 1498 * */ 1499 (0 until VstuCnt).foreach{i => 1500 vsMergeBuffer(i).io.fromPipeline := DontCare 1501 vsMergeBuffer(i).io.fromSplit := DontCare 1502 1503 vsMergeBuffer(i).io.fromMisalignBuffer.get.flush := storeMisalignBuffer.io.toVecStoreMergeBuffer(i).flush 1504 vsMergeBuffer(i).io.fromMisalignBuffer.get.mbIndex := storeMisalignBuffer.io.toVecStoreMergeBuffer(i).mbIndex 1505 } 1506 1507 (0 until VstuCnt).foreach{i => 1508 vsSplit(i).io.redirect <> redirect 1509 vsSplit(i).io.in <> io.ooo_to_mem.issueVldu(i) 1510 vsSplit(i).io.in.valid := io.ooo_to_mem.issueVldu(i).valid && 1511 vStoreCanAccept(i) && !isSegment 1512 vsSplit(i).io.toMergeBuffer <> vsMergeBuffer(i).io.fromSplit.head 1513 NewPipelineConnect( 1514 vsSplit(i).io.out, storeUnits(i).io.vecstin, storeUnits(i).io.vecstin.fire, 1515 Mux(vsSplit(i).io.out.fire, vsSplit(i).io.out.bits.uop.robIdx.needFlush(io.redirect), storeUnits(i).io.vecstin.bits.uop.robIdx.needFlush(io.redirect)), 1516 Option("VsSplitConnectStu") 1517 ) 1518 vsSplit(i).io.vstd.get := DontCare // Todo: Discuss how to pass vector store data 1519 1520 vsSplit(i).io.vstdMisalign.get.storeMisalignBufferEmpty := !storeMisalignBuffer.io.full 1521 vsSplit(i).io.vstdMisalign.get.storePipeEmpty := !storeUnits(i).io.s0_s1_valid 1522 1523 } 1524 (0 until VlduCnt).foreach{i => 1525 vlSplit(i).io.redirect <> redirect 1526 vlSplit(i).io.in <> io.ooo_to_mem.issueVldu(i) 1527 vlSplit(i).io.in.valid := io.ooo_to_mem.issueVldu(i).valid && 1528 vLoadCanAccept(i) && !isSegment && !isFixVlUop(i) 1529 vlSplit(i).io.toMergeBuffer <> vlMergeBuffer.io.fromSplit(i) 1530 vlSplit(i).io.threshold.get.valid := vlMergeBuffer.io.toSplit.get.threshold 1531 vlSplit(i).io.threshold.get.bits := lsq.io.lqDeqPtr 1532 NewPipelineConnect( 1533 vlSplit(i).io.out, loadUnits(i).io.vecldin, loadUnits(i).io.vecldin.fire, 1534 Mux(vlSplit(i).io.out.fire, vlSplit(i).io.out.bits.uop.robIdx.needFlush(io.redirect), loadUnits(i).io.vecldin.bits.uop.robIdx.needFlush(io.redirect)), 1535 Option("VlSplitConnectLdu") 1536 ) 1537 1538 //Subsequent instrction will be blocked 1539 vfofBuffer.io.in(i).valid := io.ooo_to_mem.issueVldu(i).valid 1540 vfofBuffer.io.in(i).bits := io.ooo_to_mem.issueVldu(i).bits 1541 } 1542 (0 until LduCnt).foreach{i=> 1543 loadUnits(i).io.vecldout.ready := vlMergeBuffer.io.fromPipeline(i).ready 1544 loadMisalignBuffer.io.vecWriteBack.ready := true.B 1545 1546 if (i == MisalignWBPort) { 1547 when(loadUnits(i).io.vecldout.valid) { 1548 vlMergeBuffer.io.fromPipeline(i).valid := loadUnits(i).io.vecldout.valid 1549 vlMergeBuffer.io.fromPipeline(i).bits := loadUnits(i).io.vecldout.bits 1550 } .otherwise { 1551 vlMergeBuffer.io.fromPipeline(i).valid := loadMisalignBuffer.io.vecWriteBack.valid 1552 vlMergeBuffer.io.fromPipeline(i).bits := loadMisalignBuffer.io.vecWriteBack.bits 1553 } 1554 } else { 1555 vlMergeBuffer.io.fromPipeline(i).valid := loadUnits(i).io.vecldout.valid 1556 vlMergeBuffer.io.fromPipeline(i).bits := loadUnits(i).io.vecldout.bits 1557 } 1558 } 1559 1560 (0 until StaCnt).foreach{i=> 1561 if(i < VstuCnt){ 1562 storeUnits(i).io.vecstout.ready := true.B 1563 storeMisalignBuffer.io.vecWriteBack(i).ready := vsMergeBuffer(i).io.fromPipeline.head.ready 1564 1565 when(storeUnits(i).io.vecstout.valid) { 1566 vsMergeBuffer(i).io.fromPipeline.head.valid := storeUnits(i).io.vecstout.valid 1567 vsMergeBuffer(i).io.fromPipeline.head.bits := storeUnits(i).io.vecstout.bits 1568 } .otherwise { 1569 vsMergeBuffer(i).io.fromPipeline.head.valid := storeMisalignBuffer.io.vecWriteBack(i).valid 1570 vsMergeBuffer(i).io.fromPipeline.head.bits := storeMisalignBuffer.io.vecWriteBack(i).bits 1571 } 1572 } 1573 } 1574 1575 (0 until VlduCnt).foreach{i=> 1576 io.ooo_to_mem.issueVldu(i).ready := vLoadCanAccept(i) || vStoreCanAccept(i) 1577 } 1578 1579 vlMergeBuffer.io.redirect <> redirect 1580 vsMergeBuffer.map(_.io.redirect <> redirect) 1581 (0 until VlduCnt).foreach{i=> 1582 vlMergeBuffer.io.toLsq(i) <> lsq.io.ldvecFeedback(i) 1583 } 1584 (0 until VstuCnt).foreach{i=> 1585 vsMergeBuffer(i).io.toLsq.head <> lsq.io.stvecFeedback(i) 1586 } 1587 1588 (0 until VlduCnt).foreach{i=> 1589 // send to RS 1590 vlMergeBuffer.io.feedback(i) <> io.mem_to_ooo.vlduIqFeedback(i).feedbackSlow 1591 io.mem_to_ooo.vlduIqFeedback(i).feedbackFast := DontCare 1592 } 1593 (0 until VstuCnt).foreach{i => 1594 // send to RS 1595 if (i == 0){ 1596 io.mem_to_ooo.vstuIqFeedback(i).feedbackSlow.valid := vsMergeBuffer(i).io.feedback.head.valid || vSegmentUnit.io.feedback.valid 1597 io.mem_to_ooo.vstuIqFeedback(i).feedbackSlow.bits := Mux1H(Seq( 1598 vSegmentUnit.io.feedback.valid -> vSegmentUnit.io.feedback.bits, 1599 vsMergeBuffer(i).io.feedback.head.valid -> vsMergeBuffer(i).io.feedback.head.bits 1600 )) 1601 io.mem_to_ooo.vstuIqFeedback(i).feedbackFast := DontCare 1602 } else { 1603 vsMergeBuffer(i).io.feedback.head <> io.mem_to_ooo.vstuIqFeedback(i).feedbackSlow 1604 io.mem_to_ooo.vstuIqFeedback(i).feedbackFast := DontCare 1605 } 1606 } 1607 1608 (0 until VlduCnt).foreach{i=> 1609 if (i == 0){ // for segmentUnit, segmentUnit use port0 writeback 1610 io.mem_to_ooo.writebackVldu(i).valid := vlMergeBuffer.io.uopWriteback(i).valid || vsMergeBuffer(i).io.uopWriteback.head.valid || vSegmentUnit.io.uopwriteback.valid 1611 io.mem_to_ooo.writebackVldu(i).bits := PriorityMux(Seq( 1612 vSegmentUnit.io.uopwriteback.valid -> vSegmentUnit.io.uopwriteback.bits, 1613 vlMergeBuffer.io.uopWriteback(i).valid -> vlMergeBuffer.io.uopWriteback(i).bits, 1614 vsMergeBuffer(i).io.uopWriteback.head.valid -> vsMergeBuffer(i).io.uopWriteback.head.bits, 1615 )) 1616 vlMergeBuffer.io.uopWriteback(i).ready := io.mem_to_ooo.writebackVldu(i).ready && !vSegmentUnit.io.uopwriteback.valid 1617 vsMergeBuffer(i).io.uopWriteback.head.ready := io.mem_to_ooo.writebackVldu(i).ready && !vlMergeBuffer.io.uopWriteback(i).valid && !vSegmentUnit.io.uopwriteback.valid 1618 vSegmentUnit.io.uopwriteback.ready := io.mem_to_ooo.writebackVldu(i).ready 1619 } else if (i == 1) { 1620 io.mem_to_ooo.writebackVldu(i).valid := vlMergeBuffer.io.uopWriteback(i).valid || vsMergeBuffer(i).io.uopWriteback.head.valid || vfofBuffer.io.uopWriteback.valid 1621 io.mem_to_ooo.writebackVldu(i).bits := PriorityMux(Seq( 1622 vfofBuffer.io.uopWriteback.valid -> vfofBuffer.io.uopWriteback.bits, 1623 vlMergeBuffer.io.uopWriteback(i).valid -> vlMergeBuffer.io.uopWriteback(i).bits, 1624 vsMergeBuffer(i).io.uopWriteback.head.valid -> vsMergeBuffer(i).io.uopWriteback.head.bits, 1625 )) 1626 vlMergeBuffer.io.uopWriteback(i).ready := io.mem_to_ooo.writebackVldu(i).ready && !vfofBuffer.io.uopWriteback.valid 1627 vsMergeBuffer(i).io.uopWriteback.head.ready := io.mem_to_ooo.writebackVldu(i).ready && !vlMergeBuffer.io.uopWriteback(i).valid && !vfofBuffer.io.uopWriteback.valid 1628 vfofBuffer.io.uopWriteback.ready := io.mem_to_ooo.writebackVldu(i).ready 1629 } else { 1630 io.mem_to_ooo.writebackVldu(i).valid := vlMergeBuffer.io.uopWriteback(i).valid || vsMergeBuffer(i).io.uopWriteback.head.valid 1631 io.mem_to_ooo.writebackVldu(i).bits := PriorityMux(Seq( 1632 vlMergeBuffer.io.uopWriteback(i).valid -> vlMergeBuffer.io.uopWriteback(i).bits, 1633 vsMergeBuffer(i).io.uopWriteback.head.valid -> vsMergeBuffer(i).io.uopWriteback.head.bits, 1634 )) 1635 vlMergeBuffer.io.uopWriteback(i).ready := io.mem_to_ooo.writebackVldu(i).ready 1636 vsMergeBuffer(i).io.uopWriteback.head.ready := io.mem_to_ooo.writebackVldu(i).ready && !vlMergeBuffer.io.uopWriteback(i).valid 1637 } 1638 1639 vfofBuffer.io.mergeUopWriteback(i).valid := vlMergeBuffer.io.uopWriteback(i).valid 1640 vfofBuffer.io.mergeUopWriteback(i).bits := vlMergeBuffer.io.uopWriteback(i).bits 1641 } 1642 1643 1644 vfofBuffer.io.redirect <> redirect 1645 1646 // Sbuffer 1647 sbuffer.io.csrCtrl <> csrCtrl 1648 sbuffer.io.dcache <> dcache.io.lsu.store 1649 sbuffer.io.memSetPattenDetected := dcache.io.memSetPattenDetected 1650 sbuffer.io.force_write <> lsq.io.force_write 1651 // flush sbuffer 1652 val cmoFlush = lsq.io.flushSbuffer.valid 1653 val fenceFlush = io.ooo_to_mem.flushSb 1654 val atomicsFlush = atomicsUnit.io.flush_sbuffer.valid || vSegmentUnit.io.flush_sbuffer.valid 1655 val stIsEmpty = sbuffer.io.flush.empty && uncache.io.flush.empty 1656 io.mem_to_ooo.sbIsEmpty := RegNext(stIsEmpty) 1657 1658 // if both of them tries to flush sbuffer at the same time 1659 // something must have gone wrong 1660 assert(!(fenceFlush && atomicsFlush && cmoFlush)) 1661 sbuffer.io.flush.valid := RegNext(fenceFlush || atomicsFlush || cmoFlush) 1662 uncache.io.flush.valid := sbuffer.io.flush.valid 1663 1664 // AtomicsUnit: AtomicsUnit will override other control signials, 1665 // as atomics insts (LR/SC/AMO) will block the pipeline 1666 val s_normal +: s_atomics = Enum(StaCnt + HyuCnt + 1) 1667 val state = RegInit(s_normal) 1668 1669 val st_atomics = Seq.tabulate(StaCnt)(i => 1670 io.ooo_to_mem.issueSta(i).valid && FuType.storeIsAMO((io.ooo_to_mem.issueSta(i).bits.uop.fuType)) 1671 ) ++ Seq.tabulate(HyuCnt)(i => 1672 io.ooo_to_mem.issueHya(i).valid && FuType.storeIsAMO((io.ooo_to_mem.issueHya(i).bits.uop.fuType)) 1673 ) 1674 1675 for (i <- 0 until StaCnt) when(st_atomics(i)) { 1676 io.ooo_to_mem.issueSta(i).ready := atomicsUnit.io.in.ready 1677 storeUnits(i).io.stin.valid := false.B 1678 1679 state := s_atomics(i) 1680 } 1681 for (i <- 0 until HyuCnt) when(st_atomics(StaCnt + i)) { 1682 io.ooo_to_mem.issueHya(i).ready := atomicsUnit.io.in.ready 1683 hybridUnits(i).io.lsin.valid := false.B 1684 1685 state := s_atomics(StaCnt + i) 1686 assert(!st_atomics.zipWithIndex.filterNot(_._2 == StaCnt + i).unzip._1.reduce(_ || _)) 1687 } 1688 when (atomicsUnit.io.out.valid) { 1689 state := s_normal 1690 } 1691 1692 atomicsUnit.io.in.valid := st_atomics.reduce(_ || _) 1693 atomicsUnit.io.in.bits := Mux1H(Seq.tabulate(StaCnt)(i => 1694 st_atomics(i) -> io.ooo_to_mem.issueSta(i).bits) ++ 1695 Seq.tabulate(HyuCnt)(i => st_atomics(StaCnt+i) -> io.ooo_to_mem.issueHya(i).bits)) 1696 atomicsUnit.io.storeDataIn.zipWithIndex.foreach { case (stdin, i) => 1697 stdin.valid := st_data_atomics(i) 1698 stdin.bits := stData(i).bits 1699 } 1700 atomicsUnit.io.redirect <> redirect 1701 1702 // TODO: complete amo's pmp support 1703 val amoTlb = dtlb_ld(0).requestor(0) 1704 atomicsUnit.io.dtlb.resp.valid := false.B 1705 atomicsUnit.io.dtlb.resp.bits := DontCare 1706 atomicsUnit.io.dtlb.req.ready := amoTlb.req.ready 1707 atomicsUnit.io.pmpResp := pmp_check(0).resp 1708 1709 atomicsUnit.io.dcache <> dcache.io.lsu.atomics 1710 atomicsUnit.io.flush_sbuffer.empty := stIsEmpty 1711 1712 atomicsUnit.io.csrCtrl := csrCtrl 1713 1714 // for atomicsUnit, it uses loadUnit(0)'s TLB port 1715 1716 when (state =/= s_normal) { 1717 // use store wb port instead of load 1718 loadUnits(0).io.ldout.ready := false.B 1719 // use load_0's TLB 1720 atomicsUnit.io.dtlb <> amoTlb 1721 1722 // hw prefetch should be disabled while executing atomic insts 1723 loadUnits.map(i => i.io.prefetch_req.valid := false.B) 1724 1725 // make sure there's no in-flight uops in load unit 1726 assert(!loadUnits(0).io.ldout.valid) 1727 } 1728 1729 lsq.io.flushSbuffer.empty := sbuffer.io.sbempty 1730 1731 for (i <- 0 until StaCnt) { 1732 when (state === s_atomics(i)) { 1733 io.mem_to_ooo.staIqFeedback(i).feedbackSlow := atomicsUnit.io.feedbackSlow 1734 assert(!storeUnits(i).io.feedback_slow.valid) 1735 } 1736 } 1737 for (i <- 0 until HyuCnt) { 1738 when (state === s_atomics(StaCnt + i)) { 1739 io.mem_to_ooo.hyuIqFeedback(i).feedbackSlow := atomicsUnit.io.feedbackSlow 1740 assert(!hybridUnits(i).io.feedback_slow.valid) 1741 } 1742 } 1743 1744 lsq.io.exceptionAddr.isStore := io.ooo_to_mem.isStoreException 1745 // Exception address is used several cycles after flush. 1746 // We delay it by 10 cycles to ensure its flush safety. 1747 val atomicsException = RegInit(false.B) 1748 when (DelayN(redirect.valid, 10) && atomicsException) { 1749 atomicsException := false.B 1750 }.elsewhen (atomicsUnit.io.exceptionInfo.valid) { 1751 atomicsException := true.B 1752 } 1753 1754 val misalignBufExceptionOverwrite = loadMisalignBuffer.io.overwriteExpBuf.valid || storeMisalignBuffer.io.overwriteExpBuf.valid 1755 val misalignBufExceptionVaddr = Mux(loadMisalignBuffer.io.overwriteExpBuf.valid, 1756 loadMisalignBuffer.io.overwriteExpBuf.vaddr, 1757 storeMisalignBuffer.io.overwriteExpBuf.vaddr 1758 ) 1759 val misalignBufExceptionIsHyper = Mux(loadMisalignBuffer.io.overwriteExpBuf.valid, 1760 loadMisalignBuffer.io.overwriteExpBuf.isHyper, 1761 storeMisalignBuffer.io.overwriteExpBuf.isHyper 1762 ) 1763 val misalignBufExceptionGpaddr = Mux(loadMisalignBuffer.io.overwriteExpBuf.valid, 1764 loadMisalignBuffer.io.overwriteExpBuf.gpaddr, 1765 storeMisalignBuffer.io.overwriteExpBuf.gpaddr 1766 ) 1767 val misalignBufExceptionIsForVSnonLeafPTE = Mux(loadMisalignBuffer.io.overwriteExpBuf.valid, 1768 loadMisalignBuffer.io.overwriteExpBuf.isForVSnonLeafPTE, 1769 storeMisalignBuffer.io.overwriteExpBuf.isForVSnonLeafPTE 1770 ) 1771 1772 val vSegmentException = RegInit(false.B) 1773 when (DelayN(redirect.valid, 10) && vSegmentException) { 1774 vSegmentException := false.B 1775 }.elsewhen (vSegmentUnit.io.exceptionInfo.valid) { 1776 vSegmentException := true.B 1777 } 1778 val atomicsExceptionAddress = RegEnable(atomicsUnit.io.exceptionInfo.bits.vaddr, atomicsUnit.io.exceptionInfo.valid) 1779 val vSegmentExceptionVstart = RegEnable(vSegmentUnit.io.exceptionInfo.bits.vstart, vSegmentUnit.io.exceptionInfo.valid) 1780 val vSegmentExceptionVl = RegEnable(vSegmentUnit.io.exceptionInfo.bits.vl, vSegmentUnit.io.exceptionInfo.valid) 1781 val vSegmentExceptionAddress = RegEnable(vSegmentUnit.io.exceptionInfo.bits.vaddr, vSegmentUnit.io.exceptionInfo.valid) 1782 val atomicsExceptionGPAddress = RegEnable(atomicsUnit.io.exceptionInfo.bits.gpaddr, atomicsUnit.io.exceptionInfo.valid) 1783 val vSegmentExceptionGPAddress = RegEnable(vSegmentUnit.io.exceptionInfo.bits.gpaddr, vSegmentUnit.io.exceptionInfo.valid) 1784 val atomicsExceptionIsForVSnonLeafPTE = RegEnable(atomicsUnit.io.exceptionInfo.bits.isForVSnonLeafPTE, atomicsUnit.io.exceptionInfo.valid) 1785 val vSegmentExceptionIsForVSnonLeafPTE = RegEnable(vSegmentUnit.io.exceptionInfo.bits.isForVSnonLeafPTE, vSegmentUnit.io.exceptionInfo.valid) 1786 1787 val exceptionVaddr = Mux( 1788 atomicsException, 1789 atomicsExceptionAddress, 1790 Mux(misalignBufExceptionOverwrite, 1791 misalignBufExceptionVaddr, 1792 Mux(vSegmentException, 1793 vSegmentExceptionAddress, 1794 lsq.io.exceptionAddr.vaddr 1795 ) 1796 ) 1797 ) 1798 // whether vaddr need ext or is hyper inst: 1799 // VaNeedExt: atomicsException -> false; misalignBufExceptionOverwrite -> true; vSegmentException -> false 1800 // IsHyper: atomicsException -> false; vSegmentException -> false 1801 val exceptionVaNeedExt = !atomicsException && 1802 (misalignBufExceptionOverwrite || 1803 (!vSegmentException && lsq.io.exceptionAddr.vaNeedExt)) 1804 val exceptionIsHyper = !atomicsException && 1805 (misalignBufExceptionOverwrite && misalignBufExceptionIsHyper || 1806 (!vSegmentException && lsq.io.exceptionAddr.isHyper && !misalignBufExceptionOverwrite)) 1807 1808 def GenExceptionVa(mode: UInt, isVirt: Bool, vaNeedExt: Bool, 1809 satp: TlbSatpBundle, vsatp: TlbSatpBundle, hgatp: TlbHgatpBundle, 1810 vaddr: UInt) = { 1811 require(VAddrBits >= 50) 1812 1813 val Sv39 = satp.mode === 8.U 1814 val Sv48 = satp.mode === 9.U 1815 val Sv39x4 = vsatp.mode === 8.U || hgatp.mode === 8.U 1816 val Sv48x4 = vsatp.mode === 9.U || hgatp.mode === 9.U 1817 val vmEnable = !isVirt && (Sv39 || Sv48) && (mode < ModeM) 1818 val s2xlateEnable = isVirt && (Sv39x4 || Sv48x4) && (mode < ModeM) 1819 1820 val s2xlate = MuxCase(noS2xlate, Seq( 1821 !isVirt -> noS2xlate, 1822 (vsatp.mode =/= 0.U && hgatp.mode =/= 0.U) -> allStage, 1823 (vsatp.mode === 0.U) -> onlyStage2, 1824 (hgatp.mode === 0.U) -> onlyStage1 1825 )) 1826 val onlyS2 = s2xlate === onlyStage2 1827 1828 val bareAddr = ZeroExt(vaddr(PAddrBits - 1, 0), XLEN) 1829 val sv39Addr = SignExt(vaddr.take(39), XLEN) 1830 val sv39x4Addr = ZeroExt(vaddr.take(39 + 2), XLEN) 1831 val sv48Addr = SignExt(vaddr.take(48), XLEN) 1832 val sv48x4Addr = ZeroExt(vaddr.take(48 + 2), XLEN) 1833 1834 val ExceptionVa = Wire(UInt(XLEN.W)) 1835 when (vaNeedExt) { 1836 ExceptionVa := Mux1H(Seq( 1837 (!(vmEnable || s2xlateEnable)) -> bareAddr, 1838 (!onlyS2 && (Sv39 || Sv39x4)) -> sv39Addr, 1839 (!onlyS2 && (Sv48 || Sv48x4)) -> sv48Addr, 1840 ( onlyS2 && (Sv39 || Sv39x4)) -> sv39x4Addr, 1841 ( onlyS2 && (Sv48 || Sv48x4)) -> sv48x4Addr, 1842 )) 1843 } .otherwise { 1844 ExceptionVa := vaddr 1845 } 1846 1847 ExceptionVa 1848 } 1849 1850 io.mem_to_ooo.lsqio.vaddr := RegNext( 1851 GenExceptionVa(tlbcsr.priv.dmode, tlbcsr.priv.virt || exceptionIsHyper, exceptionVaNeedExt, 1852 tlbcsr.satp, tlbcsr.vsatp, tlbcsr.hgatp, exceptionVaddr) 1853 ) 1854 1855 // vsegment instruction is executed atomic, which mean atomicsException and vSegmentException should not raise at the same time. 1856 XSError(atomicsException && vSegmentException, "atomicsException and vSegmentException raise at the same time!") 1857 io.mem_to_ooo.lsqio.vstart := RegNext(Mux(vSegmentException, 1858 vSegmentExceptionVstart, 1859 lsq.io.exceptionAddr.vstart) 1860 ) 1861 io.mem_to_ooo.lsqio.vl := RegNext(Mux(vSegmentException, 1862 vSegmentExceptionVl, 1863 lsq.io.exceptionAddr.vl) 1864 ) 1865 1866 XSError(atomicsException && atomicsUnit.io.in.valid, "new instruction before exception triggers\n") 1867 io.mem_to_ooo.lsqio.gpaddr := RegNext(Mux( 1868 atomicsException, 1869 atomicsExceptionGPAddress, 1870 Mux(misalignBufExceptionOverwrite, 1871 misalignBufExceptionGpaddr, 1872 Mux(vSegmentException, 1873 vSegmentExceptionGPAddress, 1874 lsq.io.exceptionAddr.gpaddr 1875 ) 1876 ) 1877 )) 1878 io.mem_to_ooo.lsqio.isForVSnonLeafPTE := RegNext(Mux( 1879 atomicsException, 1880 atomicsExceptionIsForVSnonLeafPTE, 1881 Mux(misalignBufExceptionOverwrite, 1882 misalignBufExceptionIsForVSnonLeafPTE, 1883 Mux(vSegmentException, 1884 vSegmentExceptionIsForVSnonLeafPTE, 1885 lsq.io.exceptionAddr.isForVSnonLeafPTE 1886 ) 1887 ) 1888 )) 1889 io.mem_to_ooo.topToBackendBypass match { case x => 1890 x.hartId := io.hartId 1891 x.l2FlushDone := RegNext(io.l2_flush_done) 1892 x.externalInterrupt.msip := outer.clint_int_sink.in.head._1(0) 1893 x.externalInterrupt.mtip := outer.clint_int_sink.in.head._1(1) 1894 x.externalInterrupt.meip := outer.plic_int_sink.in.head._1(0) 1895 x.externalInterrupt.seip := outer.plic_int_sink.in.last._1(0) 1896 x.externalInterrupt.debug := outer.debug_int_sink.in.head._1(0) 1897 x.externalInterrupt.nmi.nmi_31 := outer.nmi_int_sink.in.head._1(0) 1898 x.externalInterrupt.nmi.nmi_43 := outer.nmi_int_sink.in.head._1(1) 1899 x.msiInfo := DelayNWithValid(io.fromTopToBackend.msiInfo, 1) 1900 x.clintTime := DelayNWithValid(io.fromTopToBackend.clintTime, 1) 1901 } 1902 1903 io.memInfo.sqFull := RegNext(lsq.io.sqFull) 1904 io.memInfo.lqFull := RegNext(lsq.io.lqFull) 1905 io.memInfo.dcacheMSHRFull := RegNext(dcache.io.mshrFull) 1906 1907 io.inner_hartId := io.hartId 1908 io.inner_reset_vector := RegNext(io.outer_reset_vector) 1909 io.outer_cpu_halt := io.ooo_to_mem.backendToTopBypass.cpuHalted 1910 io.outer_l2_flush_en := io.ooo_to_mem.csrCtrl.flush_l2_enable 1911 io.outer_power_down_en := io.ooo_to_mem.csrCtrl.power_down_enable 1912 io.outer_cpu_critical_error := io.ooo_to_mem.backendToTopBypass.cpuCriticalError 1913 io.outer_beu_errors_icache := RegNext(io.inner_beu_errors_icache) 1914 io.inner_hc_perfEvents <> RegNext(io.outer_hc_perfEvents) 1915 1916 // vector segmentUnit 1917 vSegmentUnit.io.in.bits <> io.ooo_to_mem.issueVldu.head.bits 1918 vSegmentUnit.io.in.valid := isSegment && io.ooo_to_mem.issueVldu.head.valid// is segment instruction 1919 vSegmentUnit.io.dtlb.resp.bits <> dtlb_reqs.take(LduCnt).head.resp.bits 1920 vSegmentUnit.io.dtlb.resp.valid <> dtlb_reqs.take(LduCnt).head.resp.valid 1921 vSegmentUnit.io.pmpResp <> pmp_check.head.resp 1922 vSegmentUnit.io.flush_sbuffer.empty := stIsEmpty 1923 vSegmentUnit.io.redirect <> redirect 1924 vSegmentUnit.io.rdcache.resp.bits := dcache.io.lsu.load(0).resp.bits 1925 vSegmentUnit.io.rdcache.resp.valid := dcache.io.lsu.load(0).resp.valid 1926 vSegmentUnit.io.rdcache.s2_bank_conflict := dcache.io.lsu.load(0).s2_bank_conflict 1927 // ------------------------- 1928 // Vector Segment Triggers 1929 // ------------------------- 1930 vSegmentUnit.io.fromCsrTrigger.tdataVec := tdata 1931 vSegmentUnit.io.fromCsrTrigger.tEnableVec := tEnable 1932 vSegmentUnit.io.fromCsrTrigger.triggerCanRaiseBpExp := triggerCanRaiseBpExp 1933 vSegmentUnit.io.fromCsrTrigger.debugMode := debugMode 1934 1935 // reset tree of MemBlock 1936 if (p(DebugOptionsKey).ResetGen) { 1937 val leftResetTree = ResetGenNode( 1938 Seq( 1939 ModuleNode(ptw), 1940 ModuleNode(ptw_to_l2_buffer), 1941 ModuleNode(lsq), 1942 ModuleNode(dtlb_st_tlb_st), 1943 ModuleNode(dtlb_prefetch_tlb_prefetch), 1944 ModuleNode(pmp) 1945 ) 1946 ++ pmp_checkers.map(ModuleNode(_)) 1947 ++ (if (prefetcherOpt.isDefined) Seq(ModuleNode(prefetcherOpt.get)) else Nil) 1948 ++ (if (l1PrefetcherOpt.isDefined) Seq(ModuleNode(l1PrefetcherOpt.get)) else Nil) 1949 ) 1950 val rightResetTree = ResetGenNode( 1951 Seq( 1952 ModuleNode(sbuffer), 1953 ModuleNode(dtlb_ld_tlb_ld), 1954 ModuleNode(dcache), 1955 ModuleNode(l1d_to_l2_buffer), 1956 CellNode(io.reset_backend) 1957 ) 1958 ) 1959 ResetGen(leftResetTree, reset, sim = false) 1960 ResetGen(rightResetTree, reset, sim = false) 1961 } else { 1962 io.reset_backend := DontCare 1963 } 1964 io.resetInFrontendBypass.toL2Top := io.resetInFrontendBypass.fromFrontend 1965 // trace interface 1966 val traceToL2Top = io.traceCoreInterfaceBypass.toL2Top 1967 val traceFromBackend = io.traceCoreInterfaceBypass.fromBackend 1968 traceFromBackend.fromEncoder := RegNext(traceToL2Top.fromEncoder) 1969 traceToL2Top.toEncoder.trap := RegEnable( 1970 traceFromBackend.toEncoder.trap, 1971 traceFromBackend.toEncoder.groups(0).valid && Itype.isTrap(traceFromBackend.toEncoder.groups(0).bits.itype) 1972 ) 1973 traceToL2Top.toEncoder.priv := RegEnable( 1974 traceFromBackend.toEncoder.priv, 1975 traceFromBackend.toEncoder.groups(0).valid 1976 ) 1977 (0 until TraceGroupNum).foreach { i => 1978 traceToL2Top.toEncoder.groups(i).valid := RegNext(traceFromBackend.toEncoder.groups(i).valid) 1979 traceToL2Top.toEncoder.groups(i).bits.iretire := RegNext(traceFromBackend.toEncoder.groups(i).bits.iretire) 1980 traceToL2Top.toEncoder.groups(i).bits.itype := RegNext(traceFromBackend.toEncoder.groups(i).bits.itype) 1981 traceToL2Top.toEncoder.groups(i).bits.ilastsize := RegEnable( 1982 traceFromBackend.toEncoder.groups(i).bits.ilastsize, 1983 traceFromBackend.toEncoder.groups(i).valid 1984 ) 1985 traceToL2Top.toEncoder.groups(i).bits.iaddr := RegEnable( 1986 traceFromBackend.toEncoder.groups(i).bits.iaddr, 1987 traceFromBackend.toEncoder.groups(i).valid 1988 ) + (RegEnable( 1989 traceFromBackend.toEncoder.groups(i).bits.ftqOffset.getOrElse(0.U), 1990 traceFromBackend.toEncoder.groups(i).valid 1991 ) << instOffsetBits) 1992 } 1993 1994 1995 io.mem_to_ooo.storeDebugInfo := DontCare 1996 // store event difftest information 1997 if (env.EnableDifftest) { 1998 (0 until EnsbufferWidth).foreach{i => 1999 io.mem_to_ooo.storeDebugInfo(i).robidx := sbuffer.io.vecDifftestInfo(i).bits.robIdx 2000 sbuffer.io.vecDifftestInfo(i).bits.pc := io.mem_to_ooo.storeDebugInfo(i).pc 2001 } 2002 } 2003 2004 // top-down info 2005 dcache.io.debugTopDown.robHeadVaddr := io.debugTopDown.robHeadVaddr 2006 dtlbRepeater.io.debugTopDown.robHeadVaddr := io.debugTopDown.robHeadVaddr 2007 lsq.io.debugTopDown.robHeadVaddr := io.debugTopDown.robHeadVaddr 2008 io.debugTopDown.toCore.robHeadMissInDCache := dcache.io.debugTopDown.robHeadMissInDCache 2009 io.debugTopDown.toCore.robHeadTlbReplay := lsq.io.debugTopDown.robHeadTlbReplay 2010 io.debugTopDown.toCore.robHeadTlbMiss := lsq.io.debugTopDown.robHeadTlbMiss 2011 io.debugTopDown.toCore.robHeadLoadVio := lsq.io.debugTopDown.robHeadLoadVio 2012 io.debugTopDown.toCore.robHeadLoadMSHR := lsq.io.debugTopDown.robHeadLoadMSHR 2013 dcache.io.debugTopDown.robHeadOtherReplay := lsq.io.debugTopDown.robHeadOtherReplay 2014 dcache.io.debugRolling := io.debugRolling 2015 2016 lsq.io.noUopsIssued := io.topDownInfo.toBackend.noUopsIssued 2017 io.topDownInfo.toBackend.lqEmpty := lsq.io.lqEmpty 2018 io.topDownInfo.toBackend.sqEmpty := lsq.io.sqEmpty 2019 io.topDownInfo.toBackend.l1Miss := dcache.io.l1Miss 2020 io.topDownInfo.toBackend.l2TopMiss.l2Miss := RegNext(io.topDownInfo.fromL2Top.l2Miss) 2021 io.topDownInfo.toBackend.l2TopMiss.l3Miss := RegNext(io.topDownInfo.fromL2Top.l3Miss) 2022 2023 val hyLdDeqCount = PopCount(io.ooo_to_mem.issueHya.map(x => x.valid && FuType.isLoad(x.bits.uop.fuType))) 2024 val hyStDeqCount = PopCount(io.ooo_to_mem.issueHya.map(x => x.valid && FuType.isStore(x.bits.uop.fuType))) 2025 val ldDeqCount = PopCount(io.ooo_to_mem.issueLda.map(_.valid)) +& hyLdDeqCount 2026 val stDeqCount = PopCount(io.ooo_to_mem.issueSta.take(StaCnt).map(_.valid)) +& hyStDeqCount 2027 val iqDeqCount = ldDeqCount +& stDeqCount 2028 XSPerfAccumulate("load_iq_deq_count", ldDeqCount) 2029 XSPerfHistogram("load_iq_deq_count", ldDeqCount, true.B, 0, LdExuCnt + 1) 2030 XSPerfAccumulate("store_iq_deq_count", stDeqCount) 2031 XSPerfHistogram("store_iq_deq_count", stDeqCount, true.B, 0, StAddrCnt + 1) 2032 XSPerfAccumulate("ls_iq_deq_count", iqDeqCount) 2033 2034 val pfevent = Module(new PFEvent) 2035 pfevent.io.distribute_csr := csrCtrl.distribute_csr 2036 val csrevents = pfevent.io.hpmevent.slice(16,24) 2037 2038 val perfFromUnits = (loadUnits ++ Seq(sbuffer, lsq, dcache)).flatMap(_.getPerfEvents) 2039 val perfFromPTW = perfEventsPTW.map(x => ("PTW_" + x._1, x._2)) 2040 val perfBlock = Seq(("ldDeqCount", ldDeqCount), 2041 ("stDeqCount", stDeqCount)) 2042 // let index = 0 be no event 2043 val allPerfEvents = Seq(("noEvent", 0.U)) ++ perfFromUnits ++ perfFromPTW ++ perfBlock 2044 2045 if (printEventCoding) { 2046 for (((name, inc), i) <- allPerfEvents.zipWithIndex) { 2047 println("MemBlock perfEvents Set", name, inc, i) 2048 } 2049 } 2050 2051 val allPerfInc = allPerfEvents.map(_._2.asTypeOf(new PerfEvent)) 2052 val perfEvents = HPerfMonitor(csrevents, allPerfInc).getPerfEvents 2053 generatePerfEvent() 2054} 2055 2056class MemBlock()(implicit p: Parameters) extends LazyModule 2057 with HasXSParameter { 2058 override def shouldBeInlined: Boolean = false 2059 2060 val inner = LazyModule(new MemBlockInlined()) 2061 2062 lazy val module = new MemBlockImp(this) 2063} 2064 2065class MemBlockImp(wrapper: MemBlock) extends LazyModuleImp(wrapper) { 2066 val io = IO(wrapper.inner.module.io.cloneType) 2067 val io_perf = IO(wrapper.inner.module.io_perf.cloneType) 2068 io <> wrapper.inner.module.io 2069 io_perf <> wrapper.inner.module.io_perf 2070 2071 if (p(DebugOptionsKey).ResetGen) { 2072 ResetGen(ResetGenNode(Seq(ModuleNode(wrapper.inner.module))), reset, sim = false) 2073 } 2074} 2075