1/*************************************************************************************** 2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences 3* Copyright (c) 2020-2021 Peng Cheng Laboratory 4* 5* XiangShan is licensed under Mulan PSL v2. 6* You can use this software according to the terms and conditions of the Mulan PSL v2. 7* You may obtain a copy of Mulan PSL v2 at: 8* http://license.coscl.org.cn/MulanPSL2 9* 10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 13* 14* See the Mulan PSL v2 for more details. 15***************************************************************************************/ 16 17package xiangshan.mem 18 19import org.chipsalliance.cde.config.Parameters 20import chisel3._ 21import chisel3.util._ 22import freechips.rocketchip.diplomacy._ 23import freechips.rocketchip.diplomacy.{BundleBridgeSource, LazyModule, LazyModuleImp} 24import freechips.rocketchip.interrupts.{IntSinkNode, IntSinkPortSimple} 25import freechips.rocketchip.tile.HasFPUParameters 26import freechips.rocketchip.tilelink._ 27import utils._ 28import utility._ 29import system.SoCParamsKey 30import xiangshan._ 31import xiangshan.ExceptionNO._ 32import xiangshan.frontend.HasInstrMMIOConst 33import xiangshan.backend.Bundles.{DynInst, MemExuInput, MemExuOutput} 34import xiangshan.backend.ctrlblock.{DebugLSIO, LsTopdownInfo} 35import xiangshan.backend.exu.MemExeUnit 36import xiangshan.backend.fu._ 37import xiangshan.backend.fu.FuType._ 38import xiangshan.backend.fu.NewCSR.{CsrTriggerBundle, TriggerUtil, PFEvent} 39import xiangshan.backend.fu.util.{CSRConst, SdtrigExt} 40import xiangshan.backend.{BackendToTopBundle, TopToBackendBundle} 41import xiangshan.backend.rob.{RobDebugRollingIO, RobPtr, RobLsqIO} 42import xiangshan.backend.datapath.NewPipelineConnect 43import xiangshan.backend.trace.{Itype, TraceCoreInterface} 44import xiangshan.backend.Bundles._ 45import xiangshan.mem._ 46import xiangshan.mem.mdp._ 47import xiangshan.mem.Bundles._ 48import xiangshan.mem.prefetch.{BasePrefecher, L1Prefetcher, SMSParams, SMSPrefetcher} 49import xiangshan.cache._ 50import xiangshan.cache.mmu._ 51import coupledL2.PrefetchRecv 52import utility.mbist.{MbistInterface, MbistPipeline} 53import utility.sram.{SramBroadcastBundle, SramHelper} 54import system.HasSoCParameter 55trait HasMemBlockParameters extends HasXSParameter { 56 // number of memory units 57 val LduCnt = backendParams.LduCnt 58 val StaCnt = backendParams.StaCnt 59 val StdCnt = backendParams.StdCnt 60 val HyuCnt = backendParams.HyuCnt 61 val VlduCnt = backendParams.VlduCnt 62 val VstuCnt = backendParams.VstuCnt 63 64 val LdExuCnt = LduCnt + HyuCnt 65 val StAddrCnt = StaCnt + HyuCnt 66 val StDataCnt = StdCnt 67 val MemExuCnt = LduCnt + HyuCnt + StaCnt + StdCnt 68 val MemAddrExtCnt = LdExuCnt + StaCnt 69 val MemVExuCnt = VlduCnt + VstuCnt 70 71 val AtomicWBPort = 0 72 val MisalignWBPort = 1 73 val UncacheWBPort = 2 74 val NCWBPorts = Seq(1, 2) 75} 76 77abstract class MemBlockBundle(implicit val p: Parameters) extends Bundle with HasMemBlockParameters 78 79class Std(cfg: FuConfig)(implicit p: Parameters) extends FuncUnit(cfg) { 80 io.in.ready := io.out.ready 81 io.out.valid := io.in.valid 82 io.out.bits := 0.U.asTypeOf(io.out.bits) 83 io.out.bits.res.data := io.in.bits.data.src(0) 84 io.out.bits.ctrl.robIdx := io.in.bits.ctrl.robIdx 85} 86 87class ooo_to_mem(implicit p: Parameters) extends MemBlockBundle { 88 val backendToTopBypass = Flipped(new BackendToTopBundle) 89 90 val loadFastMatch = Vec(LdExuCnt, Input(UInt(LdExuCnt.W))) 91 val loadFastFuOpType = Vec(LdExuCnt, Input(FuOpType())) 92 val loadFastImm = Vec(LdExuCnt, Input(UInt(12.W))) 93 val sfence = Input(new SfenceBundle) 94 val tlbCsr = Input(new TlbCsrBundle) 95 val lsqio = new Bundle { 96 val lcommit = Input(UInt(log2Up(CommitWidth + 1).W)) 97 val scommit = Input(UInt(log2Up(CommitWidth + 1).W)) 98 val pendingMMIOld = Input(Bool()) 99 val pendingld = Input(Bool()) 100 val pendingst = Input(Bool()) 101 val pendingVst = Input(Bool()) 102 val commit = Input(Bool()) 103 val pendingPtr = Input(new RobPtr) 104 val pendingPtrNext = Input(new RobPtr) 105 } 106 107 val isStoreException = Input(Bool()) 108 val isVlsException = Input(Bool()) 109 val csrCtrl = Flipped(new CustomCSRCtrlIO) 110 val enqLsq = new LsqEnqIO 111 val flushSb = Input(Bool()) 112 113 val storePc = Vec(StaCnt, Input(UInt(VAddrBits.W))) // for hw prefetch 114 val hybridPc = Vec(HyuCnt, Input(UInt(VAddrBits.W))) // for hw prefetch 115 116 val issueLda = MixedVec(Seq.fill(LduCnt)(Flipped(DecoupledIO(new MemExuInput)))) 117 val issueSta = MixedVec(Seq.fill(StaCnt)(Flipped(DecoupledIO(new MemExuInput)))) 118 val issueStd = MixedVec(Seq.fill(StdCnt)(Flipped(DecoupledIO(new MemExuInput)))) 119 val issueHya = MixedVec(Seq.fill(HyuCnt)(Flipped(DecoupledIO(new MemExuInput)))) 120 val issueVldu = MixedVec(Seq.fill(VlduCnt)(Flipped(DecoupledIO(new MemExuInput(isVector=true))))) 121 122 def issueUops = issueLda ++ issueSta ++ issueStd ++ issueHya ++ issueVldu 123} 124 125class mem_to_ooo(implicit p: Parameters) extends MemBlockBundle { 126 val topToBackendBypass = new TopToBackendBundle 127 128 val otherFastWakeup = Vec(LdExuCnt, ValidIO(new DynInst)) 129 val lqCancelCnt = Output(UInt(log2Up(VirtualLoadQueueSize + 1).W)) 130 val sqCancelCnt = Output(UInt(log2Up(StoreQueueSize + 1).W)) 131 val sqDeq = Output(UInt(log2Ceil(EnsbufferWidth + 1).W)) 132 val lqDeq = Output(UInt(log2Up(CommitWidth + 1).W)) 133 // used by VLSU issue queue, the vector store would wait all store before it, and the vector load would wait all load 134 val sqDeqPtr = Output(new SqPtr) 135 val lqDeqPtr = Output(new LqPtr) 136 val stIn = Vec(StAddrCnt, ValidIO(new MemExuInput)) 137 val stIssuePtr = Output(new SqPtr()) 138 139 val memoryViolation = ValidIO(new Redirect) 140 val sbIsEmpty = Output(Bool()) 141 142 val lsTopdownInfo = Vec(LdExuCnt, Output(new LsTopdownInfo)) 143 144 val lsqio = new Bundle { 145 val vaddr = Output(UInt(XLEN.W)) 146 val vstart = Output(UInt((log2Up(VLEN) + 1).W)) 147 val vl = Output(UInt((log2Up(VLEN) + 1).W)) 148 val gpaddr = Output(UInt(XLEN.W)) 149 val isForVSnonLeafPTE = Output(Bool()) 150 val mmio = Output(Vec(LoadPipelineWidth, Bool())) 151 val uop = Output(Vec(LoadPipelineWidth, new DynInst)) 152 val lqCanAccept = Output(Bool()) 153 val sqCanAccept = Output(Bool()) 154 } 155 156 val storeDebugInfo = Vec(EnsbufferWidth, new Bundle { 157 val robidx = Output(new RobPtr) 158 val pc = Input(UInt(VAddrBits.W)) 159 }) 160 161 val writebackLda = Vec(LduCnt, DecoupledIO(new MemExuOutput)) 162 val writebackSta = Vec(StaCnt, DecoupledIO(new MemExuOutput)) 163 val writebackStd = Vec(StdCnt, DecoupledIO(new MemExuOutput)) 164 val writebackHyuLda = Vec(HyuCnt, DecoupledIO(new MemExuOutput)) 165 val writebackHyuSta = Vec(HyuCnt, DecoupledIO(new MemExuOutput)) 166 val writebackVldu = Vec(VlduCnt, DecoupledIO(new MemExuOutput(isVector = true))) 167 def writeBack: Seq[DecoupledIO[MemExuOutput]] = { 168 writebackSta ++ 169 writebackHyuLda ++ writebackHyuSta ++ 170 writebackLda ++ 171 writebackVldu ++ 172 writebackStd 173 } 174 175 val ldaIqFeedback = Vec(LduCnt, new MemRSFeedbackIO) 176 val staIqFeedback = Vec(StaCnt, new MemRSFeedbackIO) 177 val hyuIqFeedback = Vec(HyuCnt, new MemRSFeedbackIO) 178 val vstuIqFeedback= Vec(VstuCnt, new MemRSFeedbackIO(isVector = true)) 179 val vlduIqFeedback= Vec(VlduCnt, new MemRSFeedbackIO(isVector = true)) 180 val ldCancel = Vec(backendParams.LdExuCnt, new LoadCancelIO) 181 val wakeup = Vec(backendParams.LdExuCnt, Valid(new DynInst)) 182 183 val s3_delayed_load_error = Vec(LdExuCnt, Output(Bool())) 184} 185 186class MemCoreTopDownIO extends Bundle { 187 val robHeadMissInDCache = Output(Bool()) 188 val robHeadTlbReplay = Output(Bool()) 189 val robHeadTlbMiss = Output(Bool()) 190 val robHeadLoadVio = Output(Bool()) 191 val robHeadLoadMSHR = Output(Bool()) 192} 193 194class fetch_to_mem(implicit p: Parameters) extends XSBundle{ 195 val itlb = Flipped(new TlbPtwIO()) 196} 197 198// triple buffer applied in i-mmio path (two at MemBlock, one at L2Top) 199class InstrUncacheBuffer()(implicit p: Parameters) extends LazyModule with HasInstrMMIOConst { 200 val node = new TLBufferNode(BufferParams.default, BufferParams.default, BufferParams.default, BufferParams.default, BufferParams.default) 201 lazy val module = new InstrUncacheBufferImpl 202 203 class InstrUncacheBufferImpl extends LazyModuleImp(this) { 204 (node.in zip node.out) foreach { case ((in, edgeIn), (out, edgeOut)) => 205 out.a <> BufferParams.default(BufferParams.default(in.a)) 206 in.d <> BufferParams.default(BufferParams.default(out.d)) 207 208 // only a.valid, a.ready, a.address can change 209 // hoping that the rest would be optimized to keep MemBlock port unchanged after adding buffer 210 out.a.bits.data := 0.U 211 out.a.bits.mask := Fill(mmioBusBytes, 1.U(1.W)) 212 out.a.bits.opcode := 4.U // Get 213 out.a.bits.size := log2Ceil(mmioBusBytes).U 214 out.a.bits.source := 0.U 215 } 216 } 217} 218 219// triple buffer applied in L1I$-L2 path (two at MemBlock, one at L2Top) 220class ICacheBuffer()(implicit p: Parameters) extends LazyModule { 221 val node = new TLBufferNode(BufferParams.default, BufferParams.default, BufferParams.default, BufferParams.default, BufferParams.default) 222 lazy val module = new ICacheBufferImpl 223 224 class ICacheBufferImpl extends LazyModuleImp(this) { 225 (node.in zip node.out) foreach { case ((in, edgeIn), (out, edgeOut)) => 226 out.a <> BufferParams.default(BufferParams.default(in.a)) 227 in.d <> BufferParams.default(BufferParams.default(out.d)) 228 } 229 } 230} 231 232class ICacheCtrlBuffer()(implicit p: Parameters) extends LazyModule { 233 val node = new TLBufferNode(BufferParams.default, BufferParams.default, BufferParams.default, BufferParams.default, BufferParams.default) 234 lazy val module = new ICacheCtrlBufferImpl 235 236 class ICacheCtrlBufferImpl extends LazyModuleImp(this) { 237 (node.in zip node.out) foreach { case ((in, edgeIn), (out, edgeOut)) => 238 out.a <> BufferParams.default(BufferParams.default(in.a)) 239 in.d <> BufferParams.default(BufferParams.default(out.d)) 240 } 241 } 242} 243 244// Frontend bus goes through MemBlock 245class FrontendBridge()(implicit p: Parameters) extends LazyModule { 246 val icache_node = LazyModule(new ICacheBuffer()).suggestName("icache").node// to keep IO port name 247 val icachectrl_node = LazyModule(new ICacheCtrlBuffer()).suggestName("icachectrl").node 248 val instr_uncache_node = LazyModule(new InstrUncacheBuffer()).suggestName("instr_uncache").node 249 lazy val module = new LazyModuleImp(this) { 250 } 251} 252 253class MemBlockInlined()(implicit p: Parameters) extends LazyModule 254 with HasXSParameter { 255 override def shouldBeInlined: Boolean = true 256 257 val dcache = LazyModule(new DCacheWrapper()) 258 val uncache = LazyModule(new Uncache()) 259 val uncache_port = TLTempNode() 260 val uncache_xbar = TLXbar() 261 val ptw = LazyModule(new L2TLBWrapper()) 262 val ptw_to_l2_buffer = if (!coreParams.softPTW) LazyModule(new TLBuffer) else null 263 val l1d_to_l2_buffer = if (coreParams.dcacheParametersOpt.nonEmpty) LazyModule(new TLBuffer) else null 264 val dcache_port = TLNameNode("dcache_client") // to keep dcache-L2 port name 265 val l2_pf_sender_opt = coreParams.prefetcher.map(_ => 266 BundleBridgeSource(() => new PrefetchRecv) 267 ) 268 val l3_pf_sender_opt = if (p(SoCParamsKey).L3CacheParamsOpt.nonEmpty) coreParams.prefetcher.map(_ => 269 BundleBridgeSource(() => new huancun.PrefetchRecv) 270 ) else None 271 val frontendBridge = LazyModule(new FrontendBridge) 272 // interrupt sinks 273 val clint_int_sink = IntSinkNode(IntSinkPortSimple(1, 2)) 274 val debug_int_sink = IntSinkNode(IntSinkPortSimple(1, 1)) 275 val plic_int_sink = IntSinkNode(IntSinkPortSimple(2, 1)) 276 val nmi_int_sink = IntSinkNode(IntSinkPortSimple(1, (new NonmaskableInterruptIO).elements.size)) 277 val beu_local_int_sink = IntSinkNode(IntSinkPortSimple(1, 1)) 278 279 if (!coreParams.softPTW) { 280 ptw_to_l2_buffer.node := ptw.node 281 } 282 uncache_xbar := TLBuffer() := uncache.clientNode 283 if (dcache.uncacheNode.isDefined) { 284 dcache.uncacheNode.get := TLBuffer.chainNode(2) := uncache_xbar 285 } 286 uncache_port := TLBuffer.chainNode(2) := uncache_xbar 287 288 lazy val module = new MemBlockInlinedImp(this) 289} 290 291class MemBlockInlinedImp(outer: MemBlockInlined) extends LazyModuleImp(outer) 292 with HasXSParameter 293 with HasFPUParameters 294 with HasPerfEvents 295 with HasSoCParameter 296 with HasL1PrefetchSourceParameter 297 with HasCircularQueuePtrHelper 298 with HasMemBlockParameters 299 with HasTlbConst 300 with SdtrigExt 301{ 302 val io = IO(new Bundle { 303 val hartId = Input(UInt(hartIdLen.W)) 304 val redirect = Flipped(ValidIO(new Redirect)) 305 306 val ooo_to_mem = new ooo_to_mem 307 val mem_to_ooo = new mem_to_ooo 308 val fetch_to_mem = new fetch_to_mem 309 310 val ifetchPrefetch = Vec(LduCnt, ValidIO(new SoftIfetchPrefetchBundle)) 311 312 // misc 313 val error = ValidIO(new L1CacheErrorInfo) 314 val memInfo = new Bundle { 315 val sqFull = Output(Bool()) 316 val lqFull = Output(Bool()) 317 val dcacheMSHRFull = Output(Bool()) 318 } 319 val debug_ls = new DebugLSIO 320 val l2_hint = Input(Valid(new L2ToL1Hint())) 321 val l2PfqBusy = Input(Bool()) 322 val l2_tlb_req = Flipped(new TlbRequestIO(nRespDups = 2)) 323 val l2_pmp_resp = new PMPRespBundle 324 val l2_flush_done = Input(Bool()) 325 326 val debugTopDown = new Bundle { 327 val robHeadVaddr = Flipped(Valid(UInt(VAddrBits.W))) 328 val toCore = new MemCoreTopDownIO 329 } 330 val debugRolling = Flipped(new RobDebugRollingIO) 331 332 // All the signals from/to frontend/backend to/from bus will go through MemBlock 333 val fromTopToBackend = Input(new Bundle { 334 val msiInfo = ValidIO(UInt(soc.IMSICParams.MSI_INFO_WIDTH.W)) 335 val clintTime = ValidIO(UInt(64.W)) 336 }) 337 val inner_hartId = Output(UInt(hartIdLen.W)) 338 val inner_reset_vector = Output(UInt(PAddrBits.W)) 339 val outer_reset_vector = Input(UInt(PAddrBits.W)) 340 val outer_cpu_halt = Output(Bool()) 341 val outer_l2_flush_en = Output(Bool()) 342 val outer_power_down_en = Output(Bool()) 343 val outer_cpu_critical_error = Output(Bool()) 344 val outer_msi_ack = Output(Bool()) 345 val inner_beu_errors_icache = Input(new L1BusErrorUnitInfo) 346 val outer_beu_errors_icache = Output(new L1BusErrorUnitInfo) 347 val inner_hc_perfEvents = Output(Vec(numPCntHc * coreParams.L2NBanks + 1, new PerfEvent)) 348 val outer_hc_perfEvents = Input(Vec(numPCntHc * coreParams.L2NBanks + 1, new PerfEvent)) 349 350 // reset signals of frontend & backend are generated in memblock 351 val reset_backend = Output(Reset()) 352 // Reset singal from frontend. 353 val resetInFrontendBypass = new Bundle{ 354 val fromFrontend = Input(Bool()) 355 val toL2Top = Output(Bool()) 356 } 357 val traceCoreInterfaceBypass = new Bundle{ 358 val fromBackend = Flipped(new TraceCoreInterface(hasOffset = true)) 359 val toL2Top = new TraceCoreInterface 360 } 361 362 val topDownInfo = new Bundle { 363 val fromL2Top = Input(new TopDownFromL2Top) 364 val toBackend = Flipped(new TopDownInfo) 365 } 366 val dft = if (hasMbist) Some(Input(new SramBroadcastBundle)) else None 367 val dft_reset = if(hasMbist) Some(Input(new DFTResetSignals())) else None 368 val dft_frnt = if (hasMbist) Some(Output(new SramBroadcastBundle)) else None 369 val dft_reset_frnt = if(hasMbist) Some(Output(new DFTResetSignals())) else None 370 val dft_bcknd = if (hasMbist) Some(Output(new SramBroadcastBundle)) else None 371 val dft_reset_bcknd = if(hasMbist) Some(Output(new DFTResetSignals())) else None 372 }) 373 374 dontTouch(io.inner_hartId) 375 dontTouch(io.inner_reset_vector) 376 dontTouch(io.outer_reset_vector) 377 dontTouch(io.outer_cpu_halt) 378 dontTouch(io.outer_l2_flush_en) 379 dontTouch(io.outer_power_down_en) 380 dontTouch(io.outer_cpu_critical_error) 381 dontTouch(io.inner_beu_errors_icache) 382 dontTouch(io.outer_beu_errors_icache) 383 dontTouch(io.inner_hc_perfEvents) 384 dontTouch(io.outer_hc_perfEvents) 385 386 val redirect = RegNextWithEnable(io.redirect) 387 388 private val dcache = outer.dcache.module 389 val uncache = outer.uncache.module 390 391 //val delayedDcacheRefill = RegNext(dcache.io.lsu.lsq) 392 393 val csrCtrl = DelayN(io.ooo_to_mem.csrCtrl, 2) 394 dcache.io.l2_pf_store_only := RegNext(io.ooo_to_mem.csrCtrl.pf_ctrl.l2_pf_store_only, false.B) 395 io.error <> DelayNWithValid(dcache.io.error, 2) 396 when(!csrCtrl.cache_error_enable){ 397 io.error.bits.report_to_beu := false.B 398 io.error.valid := false.B 399 } 400 401 val loadUnits = Seq.fill(LduCnt)(Module(new LoadUnit)) 402 val storeUnits = Seq.fill(StaCnt)(Module(new StoreUnit)) 403 val stdExeUnits = Seq.fill(StdCnt)(Module(new MemExeUnit(backendParams.memSchdParams.get.issueBlockParams.find(_.StdCnt != 0).get.exuBlockParams.head))) 404 val hybridUnits = Seq.fill(HyuCnt)(Module(new HybridUnit)) // Todo: replace it with HybridUnit 405 val stData = stdExeUnits.map(_.io.out) 406 val exeUnits = loadUnits ++ storeUnits 407 408 // The number of vector load/store units is decoupled with the number of load/store units 409 val vlSplit = Seq.fill(VlduCnt)(Module(new VLSplitImp)) 410 val vsSplit = Seq.fill(VstuCnt)(Module(new VSSplitImp)) 411 val vlMergeBuffer = Module(new VLMergeBufferImp) 412 val vsMergeBuffer = Seq.fill(VstuCnt)(Module(new VSMergeBufferImp)) 413 val vSegmentUnit = Module(new VSegmentUnit) 414 val vfofBuffer = Module(new VfofBuffer) 415 416 // misalign Buffer 417 val loadMisalignBuffer = Module(new LoadMisalignBuffer) 418 val storeMisalignBuffer = Module(new StoreMisalignBuffer) 419 420 val l1_pf_req = Wire(Decoupled(new L1PrefetchReq())) 421 dcache.io.sms_agt_evict_req.ready := false.B 422 val prefetcherOpt: Option[BasePrefecher] = coreParams.prefetcher.map { 423 case _: SMSParams => 424 val sms = Module(new SMSPrefetcher()) 425 sms.io_agt_en := GatedRegNextN(io.ooo_to_mem.csrCtrl.pf_ctrl.l1D_pf_enable_agt, 2, Some(false.B)) 426 sms.io_pht_en := GatedRegNextN(io.ooo_to_mem.csrCtrl.pf_ctrl.l1D_pf_enable_pht, 2, Some(false.B)) 427 sms.io_act_threshold := GatedRegNextN(io.ooo_to_mem.csrCtrl.pf_ctrl.l1D_pf_active_threshold, 2, Some(12.U)) 428 sms.io_act_stride := GatedRegNextN(io.ooo_to_mem.csrCtrl.pf_ctrl.l1D_pf_active_stride, 2, Some(30.U)) 429 sms.io_stride_en := false.B 430 sms.io_dcache_evict <> dcache.io.sms_agt_evict_req 431 val mbistSmsPl = MbistPipeline.PlaceMbistPipeline(1, "MbistPipeSms", hasMbist) 432 sms 433 } 434 prefetcherOpt.foreach{ pf => pf.io.l1_req.ready := false.B } 435 val hartId = p(XSCoreParamsKey).HartId 436 val l1PrefetcherOpt: Option[BasePrefecher] = coreParams.prefetcher.map { 437 case _ => 438 val l1Prefetcher = Module(new L1Prefetcher()) 439 l1Prefetcher.io.enable := Constantin.createRecord(s"enableL1StreamPrefetcher$hartId", initValue = true) 440 l1Prefetcher.pf_ctrl <> dcache.io.pf_ctrl 441 l1Prefetcher.l2PfqBusy := io.l2PfqBusy 442 443 // stride will train on miss or prefetch hit 444 for (i <- 0 until LduCnt) { 445 val source = loadUnits(i).io.prefetch_train_l1 446 l1Prefetcher.stride_train(i).valid := source.valid && source.bits.isFirstIssue && ( 447 source.bits.miss || isFromStride(source.bits.meta_prefetch) 448 ) 449 l1Prefetcher.stride_train(i).bits := source.bits 450 val loadPc = RegNext(io.ooo_to_mem.issueLda(i).bits.uop.pc) // for s1 451 l1Prefetcher.stride_train(i).bits.uop.pc := Mux( 452 loadUnits(i).io.s2_ptr_chasing, 453 RegEnable(loadPc, loadUnits(i).io.s2_prefetch_spec), 454 RegEnable(RegEnable(loadPc, loadUnits(i).io.s1_prefetch_spec), loadUnits(i).io.s2_prefetch_spec) 455 ) 456 } 457 for (i <- 0 until HyuCnt) { 458 val source = hybridUnits(i).io.prefetch_train_l1 459 l1Prefetcher.stride_train.drop(LduCnt)(i).valid := source.valid && source.bits.isFirstIssue && ( 460 source.bits.miss || isFromStride(source.bits.meta_prefetch) 461 ) 462 l1Prefetcher.stride_train.drop(LduCnt)(i).bits := source.bits 463 l1Prefetcher.stride_train.drop(LduCnt)(i).bits.uop.pc := Mux( 464 hybridUnits(i).io.ldu_io.s2_ptr_chasing, 465 RegNext(io.ooo_to_mem.hybridPc(i)), 466 RegNext(RegNext(io.ooo_to_mem.hybridPc(i))) 467 ) 468 } 469 l1Prefetcher 470 } 471 // load prefetch to l1 Dcache 472 l1PrefetcherOpt match { 473 case Some(pf) => l1_pf_req <> Pipeline(in = pf.io.l1_req, depth = 1, pipe = false, name = Some("pf_queue_to_ldu_reg")) 474 case None => 475 l1_pf_req.valid := false.B 476 l1_pf_req.bits := DontCare 477 } 478 val pf_train_on_hit = RegNextN(io.ooo_to_mem.csrCtrl.pf_ctrl.l1D_pf_train_on_hit, 2, Some(true.B)) 479 480 loadUnits.zipWithIndex.map(x => x._1.suggestName("LoadUnit_"+x._2)) 481 storeUnits.zipWithIndex.map(x => x._1.suggestName("StoreUnit_"+x._2)) 482 hybridUnits.zipWithIndex.map(x => x._1.suggestName("HybridUnit_"+x._2)) 483 val atomicsUnit = Module(new AtomicsUnit) 484 485 486 val ldaExeWbReqs = Wire(Vec(LduCnt, Decoupled(new MemExuOutput))) 487 // atomicsUnit will overwrite the source from ldu if it is about to writeback 488 val atomicWritebackOverride = Mux( 489 atomicsUnit.io.out.valid, 490 atomicsUnit.io.out.bits, 491 loadUnits(AtomicWBPort).io.ldout.bits 492 ) 493 ldaExeWbReqs(AtomicWBPort).valid := atomicsUnit.io.out.valid || loadUnits(AtomicWBPort).io.ldout.valid 494 ldaExeWbReqs(AtomicWBPort).bits := atomicWritebackOverride 495 atomicsUnit.io.out.ready := ldaExeWbReqs(AtomicWBPort).ready 496 loadUnits(AtomicWBPort).io.ldout.ready := ldaExeWbReqs(AtomicWBPort).ready 497 498 val st_data_atomics = Seq.tabulate(StdCnt)(i => 499 stData(i).valid && FuType.storeIsAMO(stData(i).bits.uop.fuType) 500 ) 501 502 // misalignBuffer will overwrite the source from ldu if it is about to writeback 503 val misalignWritebackOverride = Mux( 504 loadUnits(MisalignWBPort).io.ldout.valid, 505 loadUnits(MisalignWBPort).io.ldout.bits, 506 loadMisalignBuffer.io.writeBack.bits 507 ) 508 ldaExeWbReqs(MisalignWBPort).valid := loadMisalignBuffer.io.writeBack.valid || loadUnits(MisalignWBPort).io.ldout.valid 509 ldaExeWbReqs(MisalignWBPort).bits := misalignWritebackOverride 510 loadMisalignBuffer.io.writeBack.ready := ldaExeWbReqs(MisalignWBPort).ready && !loadUnits(MisalignWBPort).io.ldout.valid 511 loadMisalignBuffer.io.loadOutValid := loadUnits(MisalignWBPort).io.ldout.valid 512 loadMisalignBuffer.io.loadVecOutValid := loadUnits(MisalignWBPort).io.vecldout.valid 513 loadUnits(MisalignWBPort).io.ldout.ready := ldaExeWbReqs(MisalignWBPort).ready 514 ldaExeWbReqs(MisalignWBPort).bits.isFromLoadUnit := loadUnits(MisalignWBPort).io.ldout.bits.isFromLoadUnit || loadMisalignBuffer.io.writeBack.valid 515 516 // loadUnit will overwrite the source from uncache if it is about to writeback 517 ldaExeWbReqs(UncacheWBPort) <> loadUnits(UncacheWBPort).io.ldout 518 io.mem_to_ooo.writebackLda <> ldaExeWbReqs 519 io.mem_to_ooo.writebackSta <> storeUnits.map(_.io.stout) 520 io.mem_to_ooo.writebackStd.zip(stdExeUnits).foreach {x => 521 x._1.bits := x._2.io.out.bits 522 // AMOs do not need to write back std now. 523 x._1.valid := x._2.io.out.fire && !FuType.storeIsAMO(x._2.io.out.bits.uop.fuType) 524 } 525 io.mem_to_ooo.writebackHyuLda <> hybridUnits.map(_.io.ldout) 526 io.mem_to_ooo.writebackHyuSta <> hybridUnits.map(_.io.stout) 527 io.mem_to_ooo.otherFastWakeup := DontCare 528 io.mem_to_ooo.otherFastWakeup.drop(HyuCnt).take(LduCnt).zip(loadUnits.map(_.io.fast_uop)).foreach{case(a,b)=> a := b} 529 io.mem_to_ooo.otherFastWakeup.take(HyuCnt).zip(hybridUnits.map(_.io.ldu_io.fast_uop)).foreach{case(a,b)=> a:=b} 530 val stOut = io.mem_to_ooo.writebackSta ++ io.mem_to_ooo.writebackHyuSta 531 532 // prefetch to l1 req 533 // Stream's confidence is always 1 534 // (LduCnt + HyuCnt) l1_pf_reqs ? 535 loadUnits.foreach(load_unit => { 536 load_unit.io.prefetch_req.valid <> l1_pf_req.valid 537 load_unit.io.prefetch_req.bits <> l1_pf_req.bits 538 }) 539 540 hybridUnits.foreach(hybrid_unit => { 541 hybrid_unit.io.ldu_io.prefetch_req.valid <> l1_pf_req.valid 542 hybrid_unit.io.ldu_io.prefetch_req.bits <> l1_pf_req.bits 543 }) 544 545 // NOTE: loadUnits(0) has higher bank conflict and miss queue arb priority than loadUnits(1) and loadUnits(2) 546 // when loadUnits(1)/loadUnits(2) stage 0 is busy, hw prefetch will never use that pipeline 547 val LowConfPorts = if (LduCnt == 2) Seq(1) else if (LduCnt == 3) Seq(1, 2) else Seq(0) 548 LowConfPorts.map{case i => loadUnits(i).io.prefetch_req.bits.confidence := 0.U} 549 hybridUnits.foreach(hybrid_unit => { hybrid_unit.io.ldu_io.prefetch_req.bits.confidence := 0.U }) 550 551 val canAcceptHighConfPrefetch = loadUnits.map(_.io.canAcceptHighConfPrefetch) ++ 552 hybridUnits.map(_.io.canAcceptLowConfPrefetch) 553 val canAcceptLowConfPrefetch = loadUnits.map(_.io.canAcceptLowConfPrefetch) ++ 554 hybridUnits.map(_.io.canAcceptLowConfPrefetch) 555 l1_pf_req.ready := (0 until LduCnt + HyuCnt).map{ 556 case i => { 557 if (LowConfPorts.contains(i)) { 558 loadUnits(i).io.canAcceptLowConfPrefetch 559 } else { 560 Mux(l1_pf_req.bits.confidence === 1.U, canAcceptHighConfPrefetch(i), canAcceptLowConfPrefetch(i)) 561 } 562 } 563 }.reduce(_ || _) 564 565 // l1 pf fuzzer interface 566 val DebugEnableL1PFFuzzer = false 567 if (DebugEnableL1PFFuzzer) { 568 // l1 pf req fuzzer 569 val fuzzer = Module(new L1PrefetchFuzzer()) 570 fuzzer.io.vaddr := DontCare 571 fuzzer.io.paddr := DontCare 572 573 // override load_unit prefetch_req 574 loadUnits.foreach(load_unit => { 575 load_unit.io.prefetch_req.valid <> fuzzer.io.req.valid 576 load_unit.io.prefetch_req.bits <> fuzzer.io.req.bits 577 }) 578 579 // override hybrid_unit prefetch_req 580 hybridUnits.foreach(hybrid_unit => { 581 hybrid_unit.io.ldu_io.prefetch_req.valid <> fuzzer.io.req.valid 582 hybrid_unit.io.ldu_io.prefetch_req.bits <> fuzzer.io.req.bits 583 }) 584 585 fuzzer.io.req.ready := l1_pf_req.ready 586 } 587 588 // TODO: fast load wakeup 589 val lsq = Module(new LsqWrapper) 590 val sbuffer = Module(new Sbuffer) 591 // if you wants to stress test dcache store, use FakeSbuffer 592 // val sbuffer = Module(new FakeSbuffer) // out of date now 593 io.mem_to_ooo.stIssuePtr := lsq.io.issuePtrExt 594 595 dcache.io.hartId := io.hartId 596 lsq.io.hartId := io.hartId 597 sbuffer.io.hartId := io.hartId 598 atomicsUnit.io.hartId := io.hartId 599 600 dcache.io.lqEmpty := lsq.io.lqEmpty 601 602 // load/store prefetch to l2 cache 603 prefetcherOpt.foreach(sms_pf => { 604 l1PrefetcherOpt.foreach(l1_pf => { 605 val sms_pf_to_l2 = DelayNWithValid(sms_pf.io.l2_req, 2) 606 val l1_pf_to_l2 = DelayNWithValid(l1_pf.io.l2_req, 2) 607 608 outer.l2_pf_sender_opt.get.out.head._1.addr_valid := sms_pf_to_l2.valid || l1_pf_to_l2.valid 609 outer.l2_pf_sender_opt.get.out.head._1.addr := Mux(l1_pf_to_l2.valid, l1_pf_to_l2.bits.addr, sms_pf_to_l2.bits.addr) 610 outer.l2_pf_sender_opt.get.out.head._1.pf_source := Mux(l1_pf_to_l2.valid, l1_pf_to_l2.bits.source, sms_pf_to_l2.bits.source) 611 outer.l2_pf_sender_opt.get.out.head._1.l2_pf_en := RegNextN(io.ooo_to_mem.csrCtrl.pf_ctrl.l2_pf_enable, 2, Some(true.B)) 612 613 sms_pf.io.enable := RegNextN(io.ooo_to_mem.csrCtrl.pf_ctrl.l1D_pf_enable, 2, Some(false.B)) 614 615 val l2_trace = Wire(new LoadPfDbBundle) 616 l2_trace.paddr := outer.l2_pf_sender_opt.get.out.head._1.addr 617 val table = ChiselDB.createTable(s"L2PrefetchTrace$hartId", new LoadPfDbBundle, basicDB = false) 618 table.log(l2_trace, l1_pf_to_l2.valid, "StreamPrefetchTrace", clock, reset) 619 table.log(l2_trace, !l1_pf_to_l2.valid && sms_pf_to_l2.valid, "L2PrefetchTrace", clock, reset) 620 621 val l1_pf_to_l3 = ValidIODelay(l1_pf.io.l3_req, 4) 622 outer.l3_pf_sender_opt.foreach(_.out.head._1.addr_valid := l1_pf_to_l3.valid) 623 outer.l3_pf_sender_opt.foreach(_.out.head._1.addr := l1_pf_to_l3.bits) 624 outer.l3_pf_sender_opt.foreach(_.out.head._1.l2_pf_en := RegNextN(io.ooo_to_mem.csrCtrl.pf_ctrl.l2_pf_enable, 4, Some(true.B))) 625 626 val l3_trace = Wire(new LoadPfDbBundle) 627 l3_trace.paddr := outer.l3_pf_sender_opt.map(_.out.head._1.addr).getOrElse(0.U) 628 val l3_table = ChiselDB.createTable(s"L3PrefetchTrace$hartId", new LoadPfDbBundle, basicDB = false) 629 l3_table.log(l3_trace, l1_pf_to_l3.valid, "StreamPrefetchTrace", clock, reset) 630 631 XSPerfAccumulate("prefetch_fire_l2", outer.l2_pf_sender_opt.get.out.head._1.addr_valid) 632 XSPerfAccumulate("prefetch_fire_l3", outer.l3_pf_sender_opt.map(_.out.head._1.addr_valid).getOrElse(false.B)) 633 XSPerfAccumulate("l1pf_fire_l2", l1_pf_to_l2.valid) 634 XSPerfAccumulate("sms_fire_l2", !l1_pf_to_l2.valid && sms_pf_to_l2.valid) 635 XSPerfAccumulate("sms_block_by_l1pf", l1_pf_to_l2.valid && sms_pf_to_l2.valid) 636 }) 637 }) 638 639 // ptw 640 val sfence = RegNext(RegNext(io.ooo_to_mem.sfence)) 641 val tlbcsr = RegNext(RegNext(io.ooo_to_mem.tlbCsr)) 642 private val ptw = outer.ptw.module 643 private val ptw_to_l2_buffer = outer.ptw_to_l2_buffer.module 644 private val l1d_to_l2_buffer = outer.l1d_to_l2_buffer.module 645 ptw.io.hartId := io.hartId 646 ptw.io.sfence <> sfence 647 ptw.io.csr.tlb <> tlbcsr 648 ptw.io.csr.distribute_csr <> csrCtrl.distribute_csr 649 650 val perfEventsPTW = if (!coreParams.softPTW) { 651 ptw.getPerfEvents 652 } else { 653 Seq() 654 } 655 656 // dtlb 657 val dtlb_ld_tlb_ld = Module(new TLBNonBlock(LduCnt + HyuCnt + 1, 2, ldtlbParams)) 658 val dtlb_st_tlb_st = Module(new TLBNonBlock(StaCnt, 1, sttlbParams)) 659 val dtlb_prefetch_tlb_prefetch = Module(new TLBNonBlock(2, 2, pftlbParams)) 660 val dtlb_ld = Seq(dtlb_ld_tlb_ld.io) 661 val dtlb_st = Seq(dtlb_st_tlb_st.io) 662 val dtlb_prefetch = Seq(dtlb_prefetch_tlb_prefetch.io) 663 /* tlb vec && constant variable */ 664 val dtlb = dtlb_ld ++ dtlb_st ++ dtlb_prefetch 665 val (dtlb_ld_idx, dtlb_st_idx, dtlb_pf_idx) = (0, 1, 2) 666 val TlbSubSizeVec = Seq(LduCnt + HyuCnt + 1, StaCnt, 2) // (load + hyu + stream pf, store, sms+l2bop) 667 val DTlbSize = TlbSubSizeVec.sum 668 val TlbStartVec = TlbSubSizeVec.scanLeft(0)(_ + _).dropRight(1) 669 val TlbEndVec = TlbSubSizeVec.scanLeft(0)(_ + _).drop(1) 670 671 val ptwio = Wire(new VectorTlbPtwIO(DTlbSize)) 672 val dtlb_reqs = dtlb.map(_.requestor).flatten 673 val dtlb_pmps = dtlb.map(_.pmp).flatten 674 dtlb.map(_.hartId := io.hartId) 675 dtlb.map(_.sfence := sfence) 676 dtlb.map(_.csr := tlbcsr) 677 dtlb.map(_.flushPipe.map(a => a := false.B)) // non-block doesn't need 678 dtlb.map(_.redirect := redirect) 679 if (refillBothTlb) { 680 require(ldtlbParams.outReplace == sttlbParams.outReplace) 681 require(ldtlbParams.outReplace == hytlbParams.outReplace) 682 require(ldtlbParams.outReplace == pftlbParams.outReplace) 683 require(ldtlbParams.outReplace) 684 685 val replace = Module(new TlbReplace(DTlbSize, ldtlbParams)) 686 replace.io.apply_sep(dtlb_ld.map(_.replace) ++ dtlb_st.map(_.replace) ++ dtlb_prefetch.map(_.replace), ptwio.resp.bits.data.s1.entry.tag) 687 } else { 688 // TODO: there will be bugs in TlbReplace when outReplace enable, since the order of Hyu is not right. 689 if (ldtlbParams.outReplace) { 690 val replace_ld = Module(new TlbReplace(LduCnt + 1, ldtlbParams)) 691 replace_ld.io.apply_sep(dtlb_ld.map(_.replace), ptwio.resp.bits.data.s1.entry.tag) 692 } 693 if (hytlbParams.outReplace) { 694 val replace_hy = Module(new TlbReplace(HyuCnt, hytlbParams)) 695 replace_hy.io.apply_sep(dtlb_ld.map(_.replace), ptwio.resp.bits.data.s1.entry.tag) 696 } 697 if (sttlbParams.outReplace) { 698 val replace_st = Module(new TlbReplace(StaCnt, sttlbParams)) 699 replace_st.io.apply_sep(dtlb_st.map(_.replace), ptwio.resp.bits.data.s1.entry.tag) 700 } 701 if (pftlbParams.outReplace) { 702 val replace_pf = Module(new TlbReplace(2, pftlbParams)) 703 replace_pf.io.apply_sep(dtlb_prefetch.map(_.replace), ptwio.resp.bits.data.s1.entry.tag) 704 } 705 } 706 707 val ptw_resp_next = RegEnable(ptwio.resp.bits, ptwio.resp.valid) 708 val ptw_resp_v = RegNext(ptwio.resp.valid && !(sfence.valid && tlbcsr.satp.changed && tlbcsr.vsatp.changed && tlbcsr.hgatp.changed), init = false.B) 709 ptwio.resp.ready := true.B 710 711 val tlbreplay = WireInit(VecInit(Seq.fill(LdExuCnt)(false.B))) 712 val tlbreplay_reg = GatedValidRegNext(tlbreplay) 713 val dtlb_ld0_tlbreplay_reg = GatedValidRegNext(dtlb_ld(0).tlbreplay) 714 715 if (backendParams.debugEn){ dontTouch(tlbreplay) } 716 717 for (i <- 0 until LdExuCnt) { 718 tlbreplay(i) := dtlb_ld(0).ptw.req(i).valid && ptw_resp_next.vector(0) && ptw_resp_v && 719 ptw_resp_next.data.hit(dtlb_ld(0).ptw.req(i).bits.vpn, tlbcsr.satp.asid, tlbcsr.vsatp.asid, tlbcsr.hgatp.vmid, allType = true, ignoreAsid = true) 720 } 721 722 dtlb.flatMap(a => a.ptw.req) 723 .zipWithIndex 724 .foreach{ case (tlb, i) => 725 tlb.ready := ptwio.req(i).ready 726 ptwio.req(i).bits := tlb.bits 727 val vector_hit = if (refillBothTlb) Cat(ptw_resp_next.vector).orR 728 else if (i < TlbEndVec(dtlb_ld_idx)) Cat(ptw_resp_next.vector.slice(TlbStartVec(dtlb_ld_idx), TlbEndVec(dtlb_ld_idx))).orR 729 else if (i < TlbEndVec(dtlb_st_idx)) Cat(ptw_resp_next.vector.slice(TlbStartVec(dtlb_st_idx), TlbEndVec(dtlb_st_idx))).orR 730 else Cat(ptw_resp_next.vector.slice(TlbStartVec(dtlb_pf_idx), TlbEndVec(dtlb_pf_idx))).orR 731 ptwio.req(i).valid := tlb.valid && !(ptw_resp_v && vector_hit && ptw_resp_next.data.hit(tlb.bits.vpn, tlbcsr.satp.asid, tlbcsr.vsatp.asid, tlbcsr.hgatp.vmid, allType = true, ignoreAsid = true)) 732 } 733 dtlb.foreach(_.ptw.resp.bits := ptw_resp_next.data) 734 if (refillBothTlb) { 735 dtlb.foreach(_.ptw.resp.valid := ptw_resp_v && Cat(ptw_resp_next.vector).orR) 736 } else { 737 dtlb_ld.foreach(_.ptw.resp.valid := ptw_resp_v && Cat(ptw_resp_next.vector.slice(TlbStartVec(dtlb_ld_idx), TlbEndVec(dtlb_ld_idx))).orR) 738 dtlb_st.foreach(_.ptw.resp.valid := ptw_resp_v && Cat(ptw_resp_next.vector.slice(TlbStartVec(dtlb_st_idx), TlbEndVec(dtlb_st_idx))).orR) 739 dtlb_prefetch.foreach(_.ptw.resp.valid := ptw_resp_v && Cat(ptw_resp_next.vector.slice(TlbStartVec(dtlb_pf_idx), TlbEndVec(dtlb_pf_idx))).orR) 740 } 741 dtlb_ld.foreach(_.ptw.resp.bits.getGpa := Cat(ptw_resp_next.getGpa.take(LduCnt + HyuCnt + 1)).orR) 742 dtlb_st.foreach(_.ptw.resp.bits.getGpa := Cat(ptw_resp_next.getGpa.slice(LduCnt + HyuCnt + 1, LduCnt + HyuCnt + 1 + StaCnt)).orR) 743 dtlb_prefetch.foreach(_.ptw.resp.bits.getGpa := Cat(ptw_resp_next.getGpa.drop(LduCnt + HyuCnt + 1 + StaCnt)).orR) 744 745 val dtlbRepeater = PTWNewFilter(ldtlbParams.fenceDelay, ptwio, ptw.io.tlb(1), sfence, tlbcsr, l2tlbParams.dfilterSize) 746 val itlbRepeater3 = PTWRepeaterNB(passReady = false, itlbParams.fenceDelay, io.fetch_to_mem.itlb, ptw.io.tlb(0), sfence, tlbcsr) 747 748 lsq.io.debugTopDown.robHeadMissInDTlb := dtlbRepeater.io.rob_head_miss_in_tlb 749 750 // pmp 751 val pmp = Module(new PMP()) 752 pmp.io.distribute_csr <> csrCtrl.distribute_csr 753 754 val pmp_checkers = Seq.fill(DTlbSize)(Module(new PMPChecker(4, leaveHitMux = true))) 755 val pmp_check = pmp_checkers.map(_.io) 756 for ((p,d) <- pmp_check zip dtlb_pmps) { 757 if (HasBitmapCheck) { 758 p.apply(tlbcsr.mbmc.CMODE.asBool, tlbcsr.priv.dmode, pmp.io.pmp, pmp.io.pma, d) 759 } else { 760 p.apply(tlbcsr.priv.dmode, pmp.io.pmp, pmp.io.pma, d) 761 } 762 require(p.req.bits.size.getWidth == d.bits.size.getWidth) 763 } 764 765 for (i <- 0 until LduCnt) { 766 io.debug_ls.debugLsInfo(i) := loadUnits(i).io.debug_ls 767 } 768 for (i <- 0 until HyuCnt) { 769 io.debug_ls.debugLsInfo.drop(LduCnt)(i) := hybridUnits(i).io.ldu_io.debug_ls 770 } 771 for (i <- 0 until StaCnt) { 772 io.debug_ls.debugLsInfo.drop(LduCnt + HyuCnt)(i) := storeUnits(i).io.debug_ls 773 } 774 for (i <- 0 until HyuCnt) { 775 io.debug_ls.debugLsInfo.drop(LduCnt + HyuCnt + StaCnt)(i) := hybridUnits(i).io.stu_io.debug_ls 776 } 777 778 io.mem_to_ooo.lsTopdownInfo := loadUnits.map(_.io.lsTopdownInfo) ++ hybridUnits.map(_.io.ldu_io.lsTopdownInfo) 779 780 // trigger 781 val tdata = RegInit(VecInit(Seq.fill(TriggerNum)(0.U.asTypeOf(new MatchTriggerIO)))) 782 val tEnable = RegInit(VecInit(Seq.fill(TriggerNum)(false.B))) 783 tEnable := csrCtrl.mem_trigger.tEnableVec 784 when(csrCtrl.mem_trigger.tUpdate.valid) { 785 tdata(csrCtrl.mem_trigger.tUpdate.bits.addr) := csrCtrl.mem_trigger.tUpdate.bits.tdata 786 } 787 val triggerCanRaiseBpExp = csrCtrl.mem_trigger.triggerCanRaiseBpExp 788 val debugMode = csrCtrl.mem_trigger.debugMode 789 790 val backendTriggerTimingVec = VecInit(tdata.map(_.timing)) 791 val backendTriggerChainVec = VecInit(tdata.map(_.chain)) 792 793 XSDebug(tEnable.asUInt.orR, "Debug Mode: At least one store trigger is enabled\n") 794 for (j <- 0 until TriggerNum) 795 PrintTriggerInfo(tEnable(j), tdata(j)) 796 797 // The segment instruction is executed atomically. 798 // After the segment instruction directive starts executing, no other instructions should be executed. 799 val vSegmentFlag = RegInit(false.B) 800 801 when(GatedValidRegNext(vSegmentUnit.io.in.fire)) { 802 vSegmentFlag := true.B 803 }.elsewhen(GatedValidRegNext(vSegmentUnit.io.uopwriteback.valid)) { 804 vSegmentFlag := false.B 805 } 806 807 val misalign_allow_spec = RegInit(true.B) 808 val ldu_rollback_with_misalign_nack = loadUnits.map(ldu => 809 ldu.io.lsq.ldin.bits.isFrmMisAlignBuf && ldu.io.lsq.ldin.bits.rep_info.rar_nack && ldu.io.rollback.valid 810 ).reduce(_ || _) 811 when (ldu_rollback_with_misalign_nack) { 812 misalign_allow_spec := false.B 813 } .elsewhen(lsq.io.rarValidCount < (LoadQueueRARSize - 4).U) { 814 misalign_allow_spec := true.B 815 } 816 817 // LoadUnit 818 val correctMissTrain = Constantin.createRecord(s"CorrectMissTrain$hartId", initValue = false) 819 820 for (i <- 0 until LduCnt) { 821 loadUnits(i).io.redirect <> redirect 822 loadUnits(i).io.misalign_allow_spec := misalign_allow_spec 823 824 // get input form dispatch 825 loadUnits(i).io.ldin <> io.ooo_to_mem.issueLda(i) 826 loadUnits(i).io.feedback_slow <> io.mem_to_ooo.ldaIqFeedback(i).feedbackSlow 827 io.mem_to_ooo.ldaIqFeedback(i).feedbackFast := DontCare 828 loadUnits(i).io.correctMissTrain := correctMissTrain 829 io.mem_to_ooo.ldCancel.drop(HyuCnt)(i) := loadUnits(i).io.ldCancel 830 io.mem_to_ooo.wakeup.drop(HyuCnt)(i) := loadUnits(i).io.wakeup 831 832 // vector 833 if (i < VlduCnt) { 834 loadUnits(i).io.vecldout.ready := false.B 835 } else { 836 loadUnits(i).io.vecldin.valid := false.B 837 loadUnits(i).io.vecldin.bits := DontCare 838 loadUnits(i).io.vecldout.ready := false.B 839 } 840 841 // fast replay 842 loadUnits(i).io.fast_rep_in <> loadUnits(i).io.fast_rep_out 843 844 // SoftPrefetch to frontend (prefetch.i) 845 loadUnits(i).io.ifetchPrefetch <> io.ifetchPrefetch(i) 846 847 // dcache access 848 loadUnits(i).io.dcache <> dcache.io.lsu.load(i) 849 if(i == 0){ 850 vSegmentUnit.io.rdcache := DontCare 851 dcache.io.lsu.load(i).req.valid := loadUnits(i).io.dcache.req.valid || vSegmentUnit.io.rdcache.req.valid 852 dcache.io.lsu.load(i).req.bits := Mux1H(Seq( 853 vSegmentUnit.io.rdcache.req.valid -> vSegmentUnit.io.rdcache.req.bits, 854 loadUnits(i).io.dcache.req.valid -> loadUnits(i).io.dcache.req.bits 855 )) 856 vSegmentUnit.io.rdcache.req.ready := dcache.io.lsu.load(i).req.ready 857 } 858 859 // Dcache requests must also be preempted by the segment. 860 when(vSegmentFlag){ 861 loadUnits(i).io.dcache.req.ready := false.B // Dcache is preempted. 862 863 dcache.io.lsu.load(0).pf_source := vSegmentUnit.io.rdcache.pf_source 864 dcache.io.lsu.load(0).s1_paddr_dup_lsu := vSegmentUnit.io.rdcache.s1_paddr_dup_lsu 865 dcache.io.lsu.load(0).s1_paddr_dup_dcache := vSegmentUnit.io.rdcache.s1_paddr_dup_dcache 866 dcache.io.lsu.load(0).s1_kill := vSegmentUnit.io.rdcache.s1_kill 867 dcache.io.lsu.load(0).s2_kill := vSegmentUnit.io.rdcache.s2_kill 868 dcache.io.lsu.load(0).s0_pc := vSegmentUnit.io.rdcache.s0_pc 869 dcache.io.lsu.load(0).s1_pc := vSegmentUnit.io.rdcache.s1_pc 870 dcache.io.lsu.load(0).s2_pc := vSegmentUnit.io.rdcache.s2_pc 871 dcache.io.lsu.load(0).is128Req := vSegmentUnit.io.rdcache.is128Req 872 }.otherwise { 873 loadUnits(i).io.dcache.req.ready := dcache.io.lsu.load(i).req.ready 874 875 dcache.io.lsu.load(0).pf_source := loadUnits(0).io.dcache.pf_source 876 dcache.io.lsu.load(0).s1_paddr_dup_lsu := loadUnits(0).io.dcache.s1_paddr_dup_lsu 877 dcache.io.lsu.load(0).s1_paddr_dup_dcache := loadUnits(0).io.dcache.s1_paddr_dup_dcache 878 dcache.io.lsu.load(0).s1_kill := loadUnits(0).io.dcache.s1_kill 879 dcache.io.lsu.load(0).s2_kill := loadUnits(0).io.dcache.s2_kill 880 dcache.io.lsu.load(0).s0_pc := loadUnits(0).io.dcache.s0_pc 881 dcache.io.lsu.load(0).s1_pc := loadUnits(0).io.dcache.s1_pc 882 dcache.io.lsu.load(0).s2_pc := loadUnits(0).io.dcache.s2_pc 883 dcache.io.lsu.load(0).is128Req := loadUnits(0).io.dcache.is128Req 884 } 885 886 // forward 887 loadUnits(i).io.lsq.forward <> lsq.io.forward(i) 888 loadUnits(i).io.sbuffer <> sbuffer.io.forward(i) 889 loadUnits(i).io.ubuffer <> uncache.io.forward(i) 890 loadUnits(i).io.tl_d_channel := dcache.io.lsu.forward_D(i) 891 loadUnits(i).io.forward_mshr <> dcache.io.lsu.forward_mshr(i) 892 // ld-ld violation check 893 loadUnits(i).io.lsq.ldld_nuke_query <> lsq.io.ldu.ldld_nuke_query(i) 894 loadUnits(i).io.lsq.stld_nuke_query <> lsq.io.ldu.stld_nuke_query(i) 895 // loadqueue old ptr 896 loadUnits(i).io.lsq.lqDeqPtr := lsq.io.lqDeqPtr 897 loadUnits(i).io.csrCtrl <> csrCtrl 898 // dcache refill req 899 // loadUnits(i).io.refill <> delayedDcacheRefill 900 // dtlb 901 loadUnits(i).io.tlb <> dtlb_reqs.take(LduCnt)(i) 902 if(i == 0 ){ // port 0 assign to vsegmentUnit 903 val vsegmentDtlbReqValid = vSegmentUnit.io.dtlb.req.valid // segment tlb resquest need to delay 1 cycle 904 dtlb_reqs.take(LduCnt)(i).req.valid := loadUnits(i).io.tlb.req.valid || RegNext(vsegmentDtlbReqValid) 905 vSegmentUnit.io.dtlb.req.ready := dtlb_reqs.take(LduCnt)(i).req.ready 906 dtlb_reqs.take(LduCnt)(i).req.bits := ParallelPriorityMux(Seq( 907 RegNext(vsegmentDtlbReqValid) -> RegEnable(vSegmentUnit.io.dtlb.req.bits, vsegmentDtlbReqValid), 908 loadUnits(i).io.tlb.req.valid -> loadUnits(i).io.tlb.req.bits 909 )) 910 } 911 // pmp 912 loadUnits(i).io.pmp <> pmp_check(i).resp 913 // st-ld violation query 914 val stld_nuke_query = storeUnits.map(_.io.stld_nuke_query) ++ hybridUnits.map(_.io.stu_io.stld_nuke_query) 915 for (s <- 0 until StorePipelineWidth) { 916 loadUnits(i).io.stld_nuke_query(s) := stld_nuke_query(s) 917 } 918 loadUnits(i).io.lq_rep_full <> lsq.io.lq_rep_full 919 // load prefetch train 920 prefetcherOpt.foreach(pf => { 921 // sms will train on all miss load sources 922 val source = loadUnits(i).io.prefetch_train 923 pf.io.ld_in(i).valid := Mux(pf_train_on_hit, 924 source.valid, 925 source.valid && source.bits.isFirstIssue && source.bits.miss 926 ) 927 pf.io.ld_in(i).bits := source.bits 928 val loadPc = RegNext(io.ooo_to_mem.issueLda(i).bits.uop.pc) // for s1 929 pf.io.ld_in(i).bits.uop.pc := Mux( 930 loadUnits(i).io.s2_ptr_chasing, 931 RegEnable(loadPc, loadUnits(i).io.s2_prefetch_spec), 932 RegEnable(RegEnable(loadPc, loadUnits(i).io.s1_prefetch_spec), loadUnits(i).io.s2_prefetch_spec) 933 ) 934 }) 935 l1PrefetcherOpt.foreach(pf => { 936 // stream will train on all load sources 937 val source = loadUnits(i).io.prefetch_train_l1 938 pf.io.ld_in(i).valid := source.valid && source.bits.isFirstIssue 939 pf.io.ld_in(i).bits := source.bits 940 }) 941 942 // load to load fast forward: load(i) prefers data(i) 943 val l2l_fwd_out = loadUnits.map(_.io.l2l_fwd_out) ++ hybridUnits.map(_.io.ldu_io.l2l_fwd_out) 944 val fastPriority = (i until LduCnt + HyuCnt) ++ (0 until i) 945 val fastValidVec = fastPriority.map(j => l2l_fwd_out(j).valid) 946 val fastDataVec = fastPriority.map(j => l2l_fwd_out(j).data) 947 val fastErrorVec = fastPriority.map(j => l2l_fwd_out(j).dly_ld_err) 948 val fastMatchVec = fastPriority.map(j => io.ooo_to_mem.loadFastMatch(i)(j)) 949 loadUnits(i).io.l2l_fwd_in.valid := VecInit(fastValidVec).asUInt.orR 950 loadUnits(i).io.l2l_fwd_in.data := ParallelPriorityMux(fastValidVec, fastDataVec) 951 loadUnits(i).io.l2l_fwd_in.dly_ld_err := ParallelPriorityMux(fastValidVec, fastErrorVec) 952 val fastMatch = ParallelPriorityMux(fastValidVec, fastMatchVec) 953 loadUnits(i).io.ld_fast_match := fastMatch 954 loadUnits(i).io.ld_fast_imm := io.ooo_to_mem.loadFastImm(i) 955 loadUnits(i).io.ld_fast_fuOpType := io.ooo_to_mem.loadFastFuOpType(i) 956 loadUnits(i).io.replay <> lsq.io.replay(i) 957 958 val l2_hint = RegNext(io.l2_hint) 959 960 // L2 Hint for DCache 961 dcache.io.l2_hint <> l2_hint 962 963 loadUnits(i).io.l2_hint <> l2_hint 964 loadUnits(i).io.tlb_hint.id := dtlbRepeater.io.hint.get.req(i).id 965 loadUnits(i).io.tlb_hint.full := dtlbRepeater.io.hint.get.req(i).full || 966 tlbreplay_reg(i) || dtlb_ld0_tlbreplay_reg(i) 967 968 // passdown to lsq (load s2) 969 lsq.io.ldu.ldin(i) <> loadUnits(i).io.lsq.ldin 970 if (i == UncacheWBPort) { 971 lsq.io.ldout(i) <> loadUnits(i).io.lsq.uncache 972 } else { 973 lsq.io.ldout(i).ready := true.B 974 loadUnits(i).io.lsq.uncache.valid := false.B 975 loadUnits(i).io.lsq.uncache.bits := DontCare 976 } 977 lsq.io.ld_raw_data(i) <> loadUnits(i).io.lsq.ld_raw_data 978 lsq.io.ncOut(i) <> loadUnits(i).io.lsq.nc_ldin 979 lsq.io.l2_hint.valid := l2_hint.valid 980 lsq.io.l2_hint.bits.sourceId := l2_hint.bits.sourceId 981 lsq.io.l2_hint.bits.isKeyword := l2_hint.bits.isKeyword 982 983 lsq.io.tlb_hint <> dtlbRepeater.io.hint.get 984 985 // connect misalignBuffer 986 loadMisalignBuffer.io.req(i) <> loadUnits(i).io.misalign_buf 987 988 if (i == MisalignWBPort) { 989 loadUnits(i).io.misalign_ldin <> loadMisalignBuffer.io.splitLoadReq 990 loadUnits(i).io.misalign_ldout <> loadMisalignBuffer.io.splitLoadResp 991 } else { 992 loadUnits(i).io.misalign_ldin.valid := false.B 993 loadUnits(i).io.misalign_ldin.bits := DontCare 994 } 995 996 // alter writeback exception info 997 io.mem_to_ooo.s3_delayed_load_error(i) := loadUnits(i).io.s3_dly_ld_err 998 999 // update mem dependency predictor 1000 // io.memPredUpdate(i) := DontCare 1001 1002 // -------------------------------- 1003 // Load Triggers 1004 // -------------------------------- 1005 loadUnits(i).io.fromCsrTrigger.tdataVec := tdata 1006 loadUnits(i).io.fromCsrTrigger.tEnableVec := tEnable 1007 loadUnits(i).io.fromCsrTrigger.triggerCanRaiseBpExp := triggerCanRaiseBpExp 1008 loadUnits(i).io.fromCsrTrigger.debugMode := debugMode 1009 } 1010 1011 for (i <- 0 until HyuCnt) { 1012 hybridUnits(i).io.redirect <> redirect 1013 1014 // get input from dispatch 1015 hybridUnits(i).io.lsin <> io.ooo_to_mem.issueHya(i) 1016 hybridUnits(i).io.feedback_slow <> io.mem_to_ooo.hyuIqFeedback(i).feedbackSlow 1017 hybridUnits(i).io.feedback_fast <> io.mem_to_ooo.hyuIqFeedback(i).feedbackFast 1018 hybridUnits(i).io.correctMissTrain := correctMissTrain 1019 io.mem_to_ooo.ldCancel.take(HyuCnt)(i) := hybridUnits(i).io.ldu_io.ldCancel 1020 io.mem_to_ooo.wakeup.take(HyuCnt)(i) := hybridUnits(i).io.ldu_io.wakeup 1021 1022 // ------------------------------------ 1023 // Load Port 1024 // ------------------------------------ 1025 // fast replay 1026 hybridUnits(i).io.ldu_io.fast_rep_in <> hybridUnits(i).io.ldu_io.fast_rep_out 1027 1028 // get input from dispatch 1029 hybridUnits(i).io.ldu_io.dcache <> dcache.io.lsu.load(LduCnt + i) 1030 hybridUnits(i).io.stu_io.dcache <> dcache.io.lsu.sta(StaCnt + i) 1031 1032 // dcache access 1033 hybridUnits(i).io.ldu_io.lsq.forward <> lsq.io.forward(LduCnt + i) 1034 // forward 1035 hybridUnits(i).io.ldu_io.sbuffer <> sbuffer.io.forward(LduCnt + i) 1036 hybridUnits(i).io.ldu_io.ubuffer <> uncache.io.forward(LduCnt + i) 1037 // hybridUnits(i).io.ldu_io.vec_forward <> vsFlowQueue.io.forward(LduCnt + i) 1038 hybridUnits(i).io.ldu_io.vec_forward := DontCare 1039 hybridUnits(i).io.ldu_io.tl_d_channel := dcache.io.lsu.forward_D(LduCnt + i) 1040 hybridUnits(i).io.ldu_io.forward_mshr <> dcache.io.lsu.forward_mshr(LduCnt + i) 1041 // ld-ld violation check 1042 hybridUnits(i).io.ldu_io.lsq.ldld_nuke_query <> lsq.io.ldu.ldld_nuke_query(LduCnt + i) 1043 hybridUnits(i).io.ldu_io.lsq.stld_nuke_query <> lsq.io.ldu.stld_nuke_query(LduCnt + i) 1044 hybridUnits(i).io.csrCtrl <> csrCtrl 1045 // dcache refill req 1046 hybridUnits(i).io.ldu_io.tlb_hint.id := dtlbRepeater.io.hint.get.req(LduCnt + i).id 1047 hybridUnits(i).io.ldu_io.tlb_hint.full := dtlbRepeater.io.hint.get.req(LduCnt + i).full || 1048 tlbreplay_reg(LduCnt + i) || dtlb_ld0_tlbreplay_reg(LduCnt + i) 1049 1050 // dtlb 1051 hybridUnits(i).io.tlb <> dtlb_ld.head.requestor(LduCnt + i) 1052 // pmp 1053 hybridUnits(i).io.pmp <> pmp_check.drop(LduCnt)(i).resp 1054 // st-ld violation query 1055 val stld_nuke_query = VecInit(storeUnits.map(_.io.stld_nuke_query) ++ hybridUnits.map(_.io.stu_io.stld_nuke_query)) 1056 hybridUnits(i).io.ldu_io.stld_nuke_query := stld_nuke_query 1057 hybridUnits(i).io.ldu_io.lq_rep_full <> lsq.io.lq_rep_full 1058 // load prefetch train 1059 prefetcherOpt.foreach(pf => { 1060 val source = hybridUnits(i).io.prefetch_train 1061 pf.io.ld_in(LduCnt + i).valid := Mux(pf_train_on_hit, 1062 source.valid, 1063 source.valid && source.bits.isFirstIssue && source.bits.miss 1064 ) 1065 pf.io.ld_in(LduCnt + i).bits := source.bits 1066 pf.io.ld_in(LduCnt + i).bits.uop.pc := Mux(hybridUnits(i).io.ldu_io.s2_ptr_chasing, io.ooo_to_mem.hybridPc(i), RegNext(io.ooo_to_mem.hybridPc(i))) 1067 }) 1068 l1PrefetcherOpt.foreach(pf => { 1069 // stream will train on all load sources 1070 val source = hybridUnits(i).io.prefetch_train_l1 1071 pf.io.ld_in(LduCnt + i).valid := source.valid && source.bits.isFirstIssue && 1072 FuType.isLoad(source.bits.uop.fuType) 1073 pf.io.ld_in(LduCnt + i).bits := source.bits 1074 pf.io.st_in(StaCnt + i).valid := false.B 1075 pf.io.st_in(StaCnt + i).bits := DontCare 1076 }) 1077 prefetcherOpt.foreach(pf => { 1078 val source = hybridUnits(i).io.prefetch_train 1079 pf.io.st_in(StaCnt + i).valid := Mux(pf_train_on_hit, 1080 source.valid, 1081 source.valid && source.bits.isFirstIssue && source.bits.miss 1082 ) && FuType.isStore(source.bits.uop.fuType) 1083 pf.io.st_in(StaCnt + i).bits := source.bits 1084 pf.io.st_in(StaCnt + i).bits.uop.pc := RegNext(io.ooo_to_mem.hybridPc(i)) 1085 }) 1086 1087 // load to load fast forward: load(i) prefers data(i) 1088 val l2l_fwd_out = loadUnits.map(_.io.l2l_fwd_out) ++ hybridUnits.map(_.io.ldu_io.l2l_fwd_out) 1089 val fastPriority = (LduCnt + i until LduCnt + HyuCnt) ++ (0 until LduCnt + i) 1090 val fastValidVec = fastPriority.map(j => l2l_fwd_out(j).valid) 1091 val fastDataVec = fastPriority.map(j => l2l_fwd_out(j).data) 1092 val fastErrorVec = fastPriority.map(j => l2l_fwd_out(j).dly_ld_err) 1093 val fastMatchVec = fastPriority.map(j => io.ooo_to_mem.loadFastMatch(LduCnt + i)(j)) 1094 hybridUnits(i).io.ldu_io.l2l_fwd_in.valid := VecInit(fastValidVec).asUInt.orR 1095 hybridUnits(i).io.ldu_io.l2l_fwd_in.data := ParallelPriorityMux(fastValidVec, fastDataVec) 1096 hybridUnits(i).io.ldu_io.l2l_fwd_in.dly_ld_err := ParallelPriorityMux(fastValidVec, fastErrorVec) 1097 val fastMatch = ParallelPriorityMux(fastValidVec, fastMatchVec) 1098 hybridUnits(i).io.ldu_io.ld_fast_match := fastMatch 1099 hybridUnits(i).io.ldu_io.ld_fast_imm := io.ooo_to_mem.loadFastImm(LduCnt + i) 1100 hybridUnits(i).io.ldu_io.ld_fast_fuOpType := io.ooo_to_mem.loadFastFuOpType(LduCnt + i) 1101 hybridUnits(i).io.ldu_io.replay <> lsq.io.replay(LduCnt + i) 1102 hybridUnits(i).io.ldu_io.l2_hint <> io.l2_hint 1103 1104 // uncache 1105 lsq.io.ldout.drop(LduCnt)(i) <> hybridUnits(i).io.ldu_io.lsq.uncache 1106 lsq.io.ld_raw_data.drop(LduCnt)(i) <> hybridUnits(i).io.ldu_io.lsq.ld_raw_data 1107 1108 1109 // passdown to lsq (load s2) 1110 hybridUnits(i).io.ldu_io.lsq.nc_ldin.valid := false.B 1111 hybridUnits(i).io.ldu_io.lsq.nc_ldin.bits := DontCare 1112 lsq.io.ldu.ldin(LduCnt + i) <> hybridUnits(i).io.ldu_io.lsq.ldin 1113 // Lsq to sta unit 1114 lsq.io.sta.storeMaskIn(StaCnt + i) <> hybridUnits(i).io.stu_io.st_mask_out 1115 1116 // Lsq to std unit's rs 1117 lsq.io.std.storeDataIn(StaCnt + i) := stData(StaCnt + i) 1118 lsq.io.std.storeDataIn(StaCnt + i).valid := stData(StaCnt + i).valid && !st_data_atomics(StaCnt + i) 1119 // prefetch 1120 hybridUnits(i).io.stu_io.prefetch_req <> sbuffer.io.store_prefetch(StaCnt + i) 1121 1122 io.mem_to_ooo.s3_delayed_load_error(LduCnt + i) := hybridUnits(i).io.ldu_io.s3_dly_ld_err 1123 1124 // ------------------------------------ 1125 // Store Port 1126 // ------------------------------------ 1127 hybridUnits(i).io.stu_io.lsq <> lsq.io.sta.storeAddrIn.takeRight(HyuCnt)(i) 1128 hybridUnits(i).io.stu_io.lsq_replenish <> lsq.io.sta.storeAddrInRe.takeRight(HyuCnt)(i) 1129 1130 lsq.io.sta.storeMaskIn.takeRight(HyuCnt)(i) <> hybridUnits(i).io.stu_io.st_mask_out 1131 io.mem_to_ooo.stIn.takeRight(HyuCnt)(i).valid := hybridUnits(i).io.stu_io.issue.valid 1132 io.mem_to_ooo.stIn.takeRight(HyuCnt)(i).bits := hybridUnits(i).io.stu_io.issue.bits 1133 1134 // ------------------------------------ 1135 // Vector Store Port 1136 // ------------------------------------ 1137 hybridUnits(i).io.vec_stu_io.isFirstIssue := true.B 1138 1139 // ------------------------- 1140 // Store Triggers 1141 // ------------------------- 1142 hybridUnits(i).io.fromCsrTrigger.tdataVec := tdata 1143 hybridUnits(i).io.fromCsrTrigger.tEnableVec := tEnable 1144 hybridUnits(i).io.fromCsrTrigger.triggerCanRaiseBpExp := triggerCanRaiseBpExp 1145 hybridUnits(i).io.fromCsrTrigger.debugMode := debugMode 1146 } 1147 1148 // misalignBuffer 1149 loadMisalignBuffer.io.redirect <> redirect 1150 loadMisalignBuffer.io.rob.lcommit := io.ooo_to_mem.lsqio.lcommit 1151 loadMisalignBuffer.io.rob.scommit := io.ooo_to_mem.lsqio.scommit 1152 loadMisalignBuffer.io.rob.pendingMMIOld := io.ooo_to_mem.lsqio.pendingMMIOld 1153 loadMisalignBuffer.io.rob.pendingld := io.ooo_to_mem.lsqio.pendingld 1154 loadMisalignBuffer.io.rob.pendingst := io.ooo_to_mem.lsqio.pendingst 1155 loadMisalignBuffer.io.rob.pendingVst := io.ooo_to_mem.lsqio.pendingVst 1156 loadMisalignBuffer.io.rob.commit := io.ooo_to_mem.lsqio.commit 1157 loadMisalignBuffer.io.rob.pendingPtr := io.ooo_to_mem.lsqio.pendingPtr 1158 loadMisalignBuffer.io.rob.pendingPtrNext := io.ooo_to_mem.lsqio.pendingPtrNext 1159 1160 lsq.io.loadMisalignFull := loadMisalignBuffer.io.loadMisalignFull 1161 lsq.io.misalignAllowSpec := misalign_allow_spec 1162 1163 storeMisalignBuffer.io.redirect <> redirect 1164 storeMisalignBuffer.io.rob.lcommit := io.ooo_to_mem.lsqio.lcommit 1165 storeMisalignBuffer.io.rob.scommit := io.ooo_to_mem.lsqio.scommit 1166 storeMisalignBuffer.io.rob.pendingMMIOld := io.ooo_to_mem.lsqio.pendingMMIOld 1167 storeMisalignBuffer.io.rob.pendingld := io.ooo_to_mem.lsqio.pendingld 1168 storeMisalignBuffer.io.rob.pendingst := io.ooo_to_mem.lsqio.pendingst 1169 storeMisalignBuffer.io.rob.pendingVst := io.ooo_to_mem.lsqio.pendingVst 1170 storeMisalignBuffer.io.rob.commit := io.ooo_to_mem.lsqio.commit 1171 storeMisalignBuffer.io.rob.pendingPtr := io.ooo_to_mem.lsqio.pendingPtr 1172 storeMisalignBuffer.io.rob.pendingPtrNext := io.ooo_to_mem.lsqio.pendingPtrNext 1173 1174 lsq.io.maControl <> storeMisalignBuffer.io.sqControl 1175 1176 lsq.io.cmoOpReq <> dcache.io.cmoOpReq 1177 lsq.io.cmoOpResp <> dcache.io.cmoOpResp 1178 1179 // Prefetcher 1180 val StreamDTLBPortIndex = TlbStartVec(dtlb_ld_idx) + LduCnt + HyuCnt 1181 val PrefetcherDTLBPortIndex = TlbStartVec(dtlb_pf_idx) 1182 val L2toL1DLBPortIndex = TlbStartVec(dtlb_pf_idx) + 1 1183 prefetcherOpt match { 1184 case Some(pf) => 1185 dtlb_reqs(PrefetcherDTLBPortIndex) <> pf.io.tlb_req 1186 pf.io.pmp_resp := pmp_check(PrefetcherDTLBPortIndex).resp 1187 case None => 1188 dtlb_reqs(PrefetcherDTLBPortIndex) := DontCare 1189 dtlb_reqs(PrefetcherDTLBPortIndex).req.valid := false.B 1190 dtlb_reqs(PrefetcherDTLBPortIndex).resp.ready := true.B 1191 } 1192 l1PrefetcherOpt match { 1193 case Some(pf) => 1194 dtlb_reqs(StreamDTLBPortIndex) <> pf.io.tlb_req 1195 pf.io.pmp_resp := pmp_check(StreamDTLBPortIndex).resp 1196 case None => 1197 dtlb_reqs(StreamDTLBPortIndex) := DontCare 1198 dtlb_reqs(StreamDTLBPortIndex).req.valid := false.B 1199 dtlb_reqs(StreamDTLBPortIndex).resp.ready := true.B 1200 } 1201 dtlb_reqs(L2toL1DLBPortIndex) <> io.l2_tlb_req 1202 dtlb_reqs(L2toL1DLBPortIndex).resp.ready := true.B 1203 io.l2_pmp_resp := pmp_check(L2toL1DLBPortIndex).resp 1204 1205 // StoreUnit 1206 for (i <- 0 until StdCnt) { 1207 stdExeUnits(i).io.flush <> redirect 1208 stdExeUnits(i).io.in.valid := io.ooo_to_mem.issueStd(i).valid 1209 io.ooo_to_mem.issueStd(i).ready := stdExeUnits(i).io.in.ready 1210 stdExeUnits(i).io.in.bits := io.ooo_to_mem.issueStd(i).bits 1211 } 1212 1213 for (i <- 0 until StaCnt) { 1214 val stu = storeUnits(i) 1215 1216 stu.io.redirect <> redirect 1217 stu.io.csrCtrl <> csrCtrl 1218 stu.io.dcache <> dcache.io.lsu.sta(i) 1219 stu.io.feedback_slow <> io.mem_to_ooo.staIqFeedback(i).feedbackSlow 1220 stu.io.stin <> io.ooo_to_mem.issueSta(i) 1221 stu.io.lsq <> lsq.io.sta.storeAddrIn(i) 1222 stu.io.lsq_replenish <> lsq.io.sta.storeAddrInRe(i) 1223 // dtlb 1224 stu.io.tlb <> dtlb_st.head.requestor(i) 1225 stu.io.pmp <> pmp_check(LduCnt + HyuCnt + 1 + i).resp 1226 1227 // ------------------------- 1228 // Store Triggers 1229 // ------------------------- 1230 stu.io.fromCsrTrigger.tdataVec := tdata 1231 stu.io.fromCsrTrigger.tEnableVec := tEnable 1232 stu.io.fromCsrTrigger.triggerCanRaiseBpExp := triggerCanRaiseBpExp 1233 stu.io.fromCsrTrigger.debugMode := debugMode 1234 1235 // prefetch 1236 stu.io.prefetch_req <> sbuffer.io.store_prefetch(i) 1237 1238 // store unit does not need fast feedback 1239 io.mem_to_ooo.staIqFeedback(i).feedbackFast := DontCare 1240 1241 // Lsq to sta unit 1242 lsq.io.sta.storeMaskIn(i) <> stu.io.st_mask_out 1243 1244 // connect misalignBuffer 1245 storeMisalignBuffer.io.req(i) <> stu.io.misalign_buf 1246 1247 if (i == 0) { 1248 stu.io.misalign_stin <> storeMisalignBuffer.io.splitStoreReq 1249 stu.io.misalign_stout <> storeMisalignBuffer.io.splitStoreResp 1250 } else { 1251 stu.io.misalign_stin.valid := false.B 1252 stu.io.misalign_stin.bits := DontCare 1253 } 1254 1255 // Lsq to std unit's rs 1256 if (i < VstuCnt){ 1257 when (vsSplit(i).io.vstd.get.valid) { 1258 lsq.io.std.storeDataIn(i).valid := true.B 1259 lsq.io.std.storeDataIn(i).bits := vsSplit(i).io.vstd.get.bits 1260 stData(i).ready := false.B 1261 }.otherwise { 1262 lsq.io.std.storeDataIn(i).valid := stData(i).valid && !st_data_atomics(i) 1263 lsq.io.std.storeDataIn(i).bits.uop := stData(i).bits.uop 1264 lsq.io.std.storeDataIn(i).bits.data := stData(i).bits.data 1265 lsq.io.std.storeDataIn(i).bits.mask.map(_ := 0.U) 1266 lsq.io.std.storeDataIn(i).bits.vdIdx.map(_ := 0.U) 1267 lsq.io.std.storeDataIn(i).bits.vdIdxInField.map(_ := 0.U) 1268 stData(i).ready := true.B 1269 } 1270 } else { 1271 lsq.io.std.storeDataIn(i).valid := stData(i).valid && !st_data_atomics(i) 1272 lsq.io.std.storeDataIn(i).bits.uop := stData(i).bits.uop 1273 lsq.io.std.storeDataIn(i).bits.data := stData(i).bits.data 1274 lsq.io.std.storeDataIn(i).bits.mask.map(_ := 0.U) 1275 lsq.io.std.storeDataIn(i).bits.vdIdx.map(_ := 0.U) 1276 lsq.io.std.storeDataIn(i).bits.vdIdxInField.map(_ := 0.U) 1277 stData(i).ready := true.B 1278 } 1279 lsq.io.std.storeDataIn.map(_.bits.debug := 0.U.asTypeOf(new DebugBundle)) 1280 lsq.io.std.storeDataIn.foreach(_.bits.isFromLoadUnit := DontCare) 1281 1282 1283 // store prefetch train 1284 l1PrefetcherOpt.foreach(pf => { 1285 // stream will train on all load sources 1286 pf.io.st_in(i).valid := false.B 1287 pf.io.st_in(i).bits := DontCare 1288 }) 1289 1290 prefetcherOpt.foreach(pf => { 1291 pf.io.st_in(i).valid := Mux(pf_train_on_hit, 1292 stu.io.prefetch_train.valid, 1293 stu.io.prefetch_train.valid && stu.io.prefetch_train.bits.isFirstIssue && ( 1294 stu.io.prefetch_train.bits.miss 1295 ) 1296 ) 1297 pf.io.st_in(i).bits := stu.io.prefetch_train.bits 1298 pf.io.st_in(i).bits.uop.pc := RegEnable(RegEnable(io.ooo_to_mem.storePc(i), stu.io.s1_prefetch_spec), stu.io.s2_prefetch_spec) 1299 }) 1300 1301 // 1. sync issue info to store set LFST 1302 // 2. when store issue, broadcast issued sqPtr to wake up the following insts 1303 // io.stIn(i).valid := io.issue(exuParameters.LduCnt + i).valid 1304 // io.stIn(i).bits := io.issue(exuParameters.LduCnt + i).bits 1305 io.mem_to_ooo.stIn(i).valid := stu.io.issue.valid 1306 io.mem_to_ooo.stIn(i).bits := stu.io.issue.bits 1307 1308 stu.io.stout.ready := true.B 1309 1310 // vector 1311 if (i < VstuCnt) { 1312 stu.io.vecstin <> vsSplit(i).io.out 1313 // vsFlowQueue.io.pipeFeedback(i) <> stu.io.vec_feedback_slow // need connect 1314 } else { 1315 stu.io.vecstin.valid := false.B 1316 stu.io.vecstin.bits := DontCare 1317 stu.io.vecstout.ready := false.B 1318 } 1319 stu.io.vec_isFirstIssue := true.B // TODO 1320 } 1321 1322 val sqOtherStout = WireInit(0.U.asTypeOf(DecoupledIO(new MemExuOutput))) 1323 sqOtherStout.valid := lsq.io.mmioStout.valid || lsq.io.cboZeroStout.valid 1324 sqOtherStout.bits := Mux(lsq.io.cboZeroStout.valid, lsq.io.cboZeroStout.bits, lsq.io.mmioStout.bits) 1325 assert(!(lsq.io.mmioStout.valid && lsq.io.cboZeroStout.valid), "Cannot writeback to mmio and cboZero at the same time.") 1326 1327 // Store writeback by StoreQueue: 1328 // 1. cbo Zero 1329 // 2. mmio 1330 // Currently, the two should not be present at the same time, so simply make cbo zero a higher priority. 1331 val otherStout = WireInit(0.U.asTypeOf(lsq.io.mmioStout)) 1332 NewPipelineConnect( 1333 sqOtherStout, otherStout, otherStout.fire, 1334 false.B, 1335 Option("otherStoutConnect") 1336 ) 1337 otherStout.ready := false.B 1338 when (otherStout.valid && !storeUnits(0).io.stout.valid) { 1339 stOut(0).valid := true.B 1340 stOut(0).bits := otherStout.bits 1341 otherStout.ready := true.B 1342 } 1343 lsq.io.mmioStout.ready := sqOtherStout.ready 1344 lsq.io.cboZeroStout.ready := sqOtherStout.ready 1345 1346 // vec mmio writeback 1347 lsq.io.vecmmioStout.ready := false.B 1348 1349 // miss align buffer will overwrite stOut(0) 1350 val storeMisalignCanWriteBack = !otherStout.valid && !storeUnits(0).io.stout.valid && !storeUnits(0).io.vecstout.valid 1351 storeMisalignBuffer.io.writeBack.ready := storeMisalignCanWriteBack 1352 storeMisalignBuffer.io.storeOutValid := storeUnits(0).io.stout.valid 1353 storeMisalignBuffer.io.storeVecOutValid := storeUnits(0).io.vecstout.valid 1354 when (storeMisalignBuffer.io.writeBack.valid && storeMisalignCanWriteBack) { 1355 stOut(0).valid := true.B 1356 stOut(0).bits := storeMisalignBuffer.io.writeBack.bits 1357 } 1358 1359 // Uncache 1360 uncache.io.enableOutstanding := io.ooo_to_mem.csrCtrl.uncache_write_outstanding_enable 1361 uncache.io.hartId := io.hartId 1362 lsq.io.uncacheOutstanding := io.ooo_to_mem.csrCtrl.uncache_write_outstanding_enable 1363 1364 // Lsq 1365 io.mem_to_ooo.lsqio.mmio := lsq.io.rob.mmio 1366 io.mem_to_ooo.lsqio.uop := lsq.io.rob.uop 1367 lsq.io.rob.lcommit := io.ooo_to_mem.lsqio.lcommit 1368 lsq.io.rob.scommit := io.ooo_to_mem.lsqio.scommit 1369 lsq.io.rob.pendingMMIOld := io.ooo_to_mem.lsqio.pendingMMIOld 1370 lsq.io.rob.pendingld := io.ooo_to_mem.lsqio.pendingld 1371 lsq.io.rob.pendingst := io.ooo_to_mem.lsqio.pendingst 1372 lsq.io.rob.pendingVst := io.ooo_to_mem.lsqio.pendingVst 1373 lsq.io.rob.commit := io.ooo_to_mem.lsqio.commit 1374 lsq.io.rob.pendingPtr := io.ooo_to_mem.lsqio.pendingPtr 1375 lsq.io.rob.pendingPtrNext := io.ooo_to_mem.lsqio.pendingPtrNext 1376 1377 // lsq.io.rob <> io.lsqio.rob 1378 lsq.io.enq <> io.ooo_to_mem.enqLsq 1379 lsq.io.brqRedirect <> redirect 1380 1381 // violation rollback 1382 def selectOldestRedirect(xs: Seq[Valid[Redirect]]): Vec[Bool] = { 1383 val compareVec = (0 until xs.length).map(i => (0 until i).map(j => isAfter(xs(j).bits.robIdx, xs(i).bits.robIdx))) 1384 val resultOnehot = VecInit((0 until xs.length).map(i => Cat((0 until xs.length).map(j => 1385 (if (j < i) !xs(j).valid || compareVec(i)(j) 1386 else if (j == i) xs(i).valid 1387 else !xs(j).valid || !compareVec(j)(i)) 1388 )).andR)) 1389 resultOnehot 1390 } 1391 val allRedirect = loadUnits.map(_.io.rollback) ++ hybridUnits.map(_.io.ldu_io.rollback) ++ lsq.io.nack_rollback ++ lsq.io.nuke_rollback 1392 val oldestOneHot = selectOldestRedirect(allRedirect) 1393 val oldestRedirect = WireDefault(Mux1H(oldestOneHot, allRedirect)) 1394 // memory replay would not cause IAF/IPF/IGPF 1395 oldestRedirect.bits.cfiUpdate.backendIAF := false.B 1396 oldestRedirect.bits.cfiUpdate.backendIPF := false.B 1397 oldestRedirect.bits.cfiUpdate.backendIGPF := false.B 1398 io.mem_to_ooo.memoryViolation := oldestRedirect 1399 io.mem_to_ooo.lsqio.lqCanAccept := lsq.io.lqCanAccept 1400 io.mem_to_ooo.lsqio.sqCanAccept := lsq.io.sqCanAccept 1401 1402 // lsq.io.uncache <> uncache.io.lsq 1403 val s_idle :: s_scalar_uncache :: s_vector_uncache :: Nil = Enum(3) 1404 val uncacheState = RegInit(s_idle) 1405 val uncacheReq = Wire(Decoupled(new UncacheWordReq)) 1406 val uncacheIdResp = uncache.io.lsq.idResp 1407 val uncacheResp = Wire(Decoupled(new UncacheWordResp)) 1408 1409 uncacheReq.bits := DontCare 1410 uncacheReq.valid := false.B 1411 uncacheReq.ready := false.B 1412 uncacheResp.bits := DontCare 1413 uncacheResp.valid := false.B 1414 uncacheResp.ready := false.B 1415 lsq.io.uncache.req.ready := false.B 1416 lsq.io.uncache.idResp.valid := false.B 1417 lsq.io.uncache.idResp.bits := DontCare 1418 lsq.io.uncache.resp.valid := false.B 1419 lsq.io.uncache.resp.bits := DontCare 1420 1421 switch (uncacheState) { 1422 is (s_idle) { 1423 when (uncacheReq.fire) { 1424 when (lsq.io.uncache.req.valid) { 1425 when (!lsq.io.uncache.req.bits.nc || !io.ooo_to_mem.csrCtrl.uncache_write_outstanding_enable) { 1426 uncacheState := s_scalar_uncache 1427 } 1428 }.otherwise { 1429 // val isStore = vsFlowQueue.io.uncache.req.bits.cmd === MemoryOpConstants.M_XWR 1430 when (!io.ooo_to_mem.csrCtrl.uncache_write_outstanding_enable) { 1431 uncacheState := s_vector_uncache 1432 } 1433 } 1434 } 1435 } 1436 1437 is (s_scalar_uncache) { 1438 when (uncacheResp.fire) { 1439 uncacheState := s_idle 1440 } 1441 } 1442 1443 is (s_vector_uncache) { 1444 when (uncacheResp.fire) { 1445 uncacheState := s_idle 1446 } 1447 } 1448 } 1449 1450 when (lsq.io.uncache.req.valid) { 1451 uncacheReq <> lsq.io.uncache.req 1452 } 1453 when (io.ooo_to_mem.csrCtrl.uncache_write_outstanding_enable) { 1454 lsq.io.uncache.resp <> uncacheResp 1455 lsq.io.uncache.idResp <> uncacheIdResp 1456 }.otherwise { 1457 when (uncacheState === s_scalar_uncache) { 1458 lsq.io.uncache.resp <> uncacheResp 1459 lsq.io.uncache.idResp <> uncacheIdResp 1460 } 1461 } 1462 // delay dcache refill for 1 cycle for better timing 1463 AddPipelineReg(uncacheReq, uncache.io.lsq.req, false.B) 1464 AddPipelineReg(uncache.io.lsq.resp, uncacheResp, false.B) 1465 1466 //lsq.io.refill := delayedDcacheRefill 1467 lsq.io.release := dcache.io.lsu.release 1468 lsq.io.lqCancelCnt <> io.mem_to_ooo.lqCancelCnt 1469 lsq.io.sqCancelCnt <> io.mem_to_ooo.sqCancelCnt 1470 lsq.io.lqDeq <> io.mem_to_ooo.lqDeq 1471 lsq.io.sqDeq <> io.mem_to_ooo.sqDeq 1472 // Todo: assign these 1473 io.mem_to_ooo.sqDeqPtr := lsq.io.sqDeqPtr 1474 io.mem_to_ooo.lqDeqPtr := lsq.io.lqDeqPtr 1475 lsq.io.tl_d_channel <> dcache.io.lsu.tl_d_channel 1476 1477 // LSQ to store buffer 1478 lsq.io.sbuffer <> sbuffer.io.in 1479 sbuffer.io.in(0).valid := lsq.io.sbuffer(0).valid || vSegmentUnit.io.sbuffer.valid 1480 sbuffer.io.in(0).bits := Mux1H(Seq( 1481 vSegmentUnit.io.sbuffer.valid -> vSegmentUnit.io.sbuffer.bits, 1482 lsq.io.sbuffer(0).valid -> lsq.io.sbuffer(0).bits 1483 )) 1484 vSegmentUnit.io.sbuffer.ready := sbuffer.io.in(0).ready 1485 lsq.io.sqEmpty <> sbuffer.io.sqempty 1486 dcache.io.force_write := lsq.io.force_write 1487 1488 // Initialize when unenabled difftest. 1489 sbuffer.io.vecDifftestInfo := DontCare 1490 lsq.io.sbufferVecDifftestInfo := DontCare 1491 vSegmentUnit.io.vecDifftestInfo := DontCare 1492 if (env.EnableDifftest) { 1493 sbuffer.io.vecDifftestInfo .zipWithIndex.map{ case (sbufferPort, index) => 1494 if (index == 0) { 1495 val vSegmentDifftestValid = vSegmentUnit.io.vecDifftestInfo.valid 1496 sbufferPort.valid := Mux(vSegmentDifftestValid, vSegmentUnit.io.vecDifftestInfo.valid, lsq.io.sbufferVecDifftestInfo(0).valid) 1497 sbufferPort.bits := Mux(vSegmentDifftestValid, vSegmentUnit.io.vecDifftestInfo.bits, lsq.io.sbufferVecDifftestInfo(0).bits) 1498 1499 vSegmentUnit.io.vecDifftestInfo.ready := sbufferPort.ready 1500 lsq.io.sbufferVecDifftestInfo(0).ready := sbufferPort.ready 1501 } else { 1502 sbufferPort <> lsq.io.sbufferVecDifftestInfo(index) 1503 } 1504 } 1505 } 1506 1507 // lsq.io.vecStoreRetire <> vsFlowQueue.io.sqRelease 1508 // lsq.io.vecWriteback.valid := vlWrapper.io.uopWriteback.fire && 1509 // vlWrapper.io.uopWriteback.bits.uop.vpu.lastUop 1510 // lsq.io.vecWriteback.bits := vlWrapper.io.uopWriteback.bits 1511 1512 // vector 1513 val vLoadCanAccept = (0 until VlduCnt).map(i => 1514 vlSplit(i).io.in.ready && VlduType.isVecLd(io.ooo_to_mem.issueVldu(i).bits.uop.fuOpType) 1515 ) 1516 val vStoreCanAccept = (0 until VstuCnt).map(i => 1517 vsSplit(i).io.in.ready && VstuType.isVecSt(io.ooo_to_mem.issueVldu(i).bits.uop.fuOpType) 1518 ) 1519 val isSegment = io.ooo_to_mem.issueVldu.head.valid && isVsegls(io.ooo_to_mem.issueVldu.head.bits.uop.fuType) 1520 val isFixVlUop = io.ooo_to_mem.issueVldu.map{x => 1521 x.bits.uop.vpu.isVleff && x.bits.uop.vpu.lastUop && x.valid 1522 } 1523 1524 // init port 1525 /** 1526 * TODO: splited vsMergebuffer maybe remove, if one RS can accept two feedback, or don't need RS replay uop 1527 * for now: 1528 * RS0 -> VsSplit0 -> stu0 -> vsMergebuffer0 -> feedback -> RS0 1529 * RS1 -> VsSplit1 -> stu1 -> vsMergebuffer1 -> feedback -> RS1 1530 * 1531 * vector load don't need feedback 1532 * 1533 * RS0 -> VlSplit0 -> ldu0 -> | 1534 * RS1 -> VlSplit1 -> ldu1 -> | -> vlMergebuffer 1535 * replayIO -> ldu3 -> | 1536 * */ 1537 (0 until VstuCnt).foreach{i => 1538 vsMergeBuffer(i).io.fromPipeline := DontCare 1539 vsMergeBuffer(i).io.fromSplit := DontCare 1540 1541 vsMergeBuffer(i).io.fromMisalignBuffer.get.flush := storeMisalignBuffer.io.toVecStoreMergeBuffer(i).flush 1542 vsMergeBuffer(i).io.fromMisalignBuffer.get.mbIndex := storeMisalignBuffer.io.toVecStoreMergeBuffer(i).mbIndex 1543 } 1544 1545 (0 until VstuCnt).foreach{i => 1546 vsSplit(i).io.redirect <> redirect 1547 vsSplit(i).io.in <> io.ooo_to_mem.issueVldu(i) 1548 vsSplit(i).io.in.valid := io.ooo_to_mem.issueVldu(i).valid && 1549 vStoreCanAccept(i) && !isSegment 1550 vsSplit(i).io.toMergeBuffer <> vsMergeBuffer(i).io.fromSplit.head 1551 NewPipelineConnect( 1552 vsSplit(i).io.out, storeUnits(i).io.vecstin, storeUnits(i).io.vecstin.fire, 1553 Mux(vsSplit(i).io.out.fire, vsSplit(i).io.out.bits.uop.robIdx.needFlush(io.redirect), storeUnits(i).io.vecstin.bits.uop.robIdx.needFlush(io.redirect)), 1554 Option("VsSplitConnectStu") 1555 ) 1556 vsSplit(i).io.vstd.get := DontCare // Todo: Discuss how to pass vector store data 1557 1558 vsSplit(i).io.vstdMisalign.get.storeMisalignBufferEmpty := !storeMisalignBuffer.io.full 1559 vsSplit(i).io.vstdMisalign.get.storePipeEmpty := !storeUnits(i).io.s0_s1_valid 1560 1561 } 1562 (0 until VlduCnt).foreach{i => 1563 vlSplit(i).io.redirect <> redirect 1564 vlSplit(i).io.in <> io.ooo_to_mem.issueVldu(i) 1565 vlSplit(i).io.in.valid := io.ooo_to_mem.issueVldu(i).valid && 1566 vLoadCanAccept(i) && !isSegment && !isFixVlUop(i) 1567 vlSplit(i).io.toMergeBuffer <> vlMergeBuffer.io.fromSplit(i) 1568 vlSplit(i).io.threshold.get.valid := vlMergeBuffer.io.toSplit.get.threshold 1569 vlSplit(i).io.threshold.get.bits := lsq.io.lqDeqPtr 1570 NewPipelineConnect( 1571 vlSplit(i).io.out, loadUnits(i).io.vecldin, loadUnits(i).io.vecldin.fire, 1572 Mux(vlSplit(i).io.out.fire, vlSplit(i).io.out.bits.uop.robIdx.needFlush(io.redirect), loadUnits(i).io.vecldin.bits.uop.robIdx.needFlush(io.redirect)), 1573 Option("VlSplitConnectLdu") 1574 ) 1575 1576 //Subsequent instrction will be blocked 1577 vfofBuffer.io.in(i).valid := io.ooo_to_mem.issueVldu(i).valid 1578 vfofBuffer.io.in(i).bits := io.ooo_to_mem.issueVldu(i).bits 1579 } 1580 (0 until LduCnt).foreach{i=> 1581 loadUnits(i).io.vecldout.ready := vlMergeBuffer.io.fromPipeline(i).ready 1582 loadMisalignBuffer.io.vecWriteBack.ready := true.B 1583 1584 if (i == MisalignWBPort) { 1585 when(loadUnits(i).io.vecldout.valid) { 1586 vlMergeBuffer.io.fromPipeline(i).valid := loadUnits(i).io.vecldout.valid 1587 vlMergeBuffer.io.fromPipeline(i).bits := loadUnits(i).io.vecldout.bits 1588 } .otherwise { 1589 vlMergeBuffer.io.fromPipeline(i).valid := loadMisalignBuffer.io.vecWriteBack.valid 1590 vlMergeBuffer.io.fromPipeline(i).bits := loadMisalignBuffer.io.vecWriteBack.bits 1591 } 1592 } else { 1593 vlMergeBuffer.io.fromPipeline(i).valid := loadUnits(i).io.vecldout.valid 1594 vlMergeBuffer.io.fromPipeline(i).bits := loadUnits(i).io.vecldout.bits 1595 } 1596 } 1597 1598 (0 until StaCnt).foreach{i=> 1599 if(i < VstuCnt){ 1600 storeUnits(i).io.vecstout.ready := true.B 1601 storeMisalignBuffer.io.vecWriteBack(i).ready := vsMergeBuffer(i).io.fromPipeline.head.ready 1602 1603 when(storeUnits(i).io.vecstout.valid) { 1604 vsMergeBuffer(i).io.fromPipeline.head.valid := storeUnits(i).io.vecstout.valid 1605 vsMergeBuffer(i).io.fromPipeline.head.bits := storeUnits(i).io.vecstout.bits 1606 } .otherwise { 1607 vsMergeBuffer(i).io.fromPipeline.head.valid := storeMisalignBuffer.io.vecWriteBack(i).valid 1608 vsMergeBuffer(i).io.fromPipeline.head.bits := storeMisalignBuffer.io.vecWriteBack(i).bits 1609 } 1610 } 1611 } 1612 1613 (0 until VlduCnt).foreach{i=> 1614 io.ooo_to_mem.issueVldu(i).ready := vLoadCanAccept(i) || vStoreCanAccept(i) 1615 } 1616 1617 vlMergeBuffer.io.redirect <> redirect 1618 vsMergeBuffer.map(_.io.redirect <> redirect) 1619 (0 until VlduCnt).foreach{i=> 1620 vlMergeBuffer.io.toLsq(i) <> lsq.io.ldvecFeedback(i) 1621 } 1622 (0 until VstuCnt).foreach{i=> 1623 vsMergeBuffer(i).io.toLsq.head <> lsq.io.stvecFeedback(i) 1624 } 1625 1626 (0 until VlduCnt).foreach{i=> 1627 // send to RS 1628 vlMergeBuffer.io.feedback(i) <> io.mem_to_ooo.vlduIqFeedback(i).feedbackSlow 1629 io.mem_to_ooo.vlduIqFeedback(i).feedbackFast := DontCare 1630 } 1631 (0 until VstuCnt).foreach{i => 1632 // send to RS 1633 if (i == 0){ 1634 io.mem_to_ooo.vstuIqFeedback(i).feedbackSlow.valid := vsMergeBuffer(i).io.feedback.head.valid || vSegmentUnit.io.feedback.valid 1635 io.mem_to_ooo.vstuIqFeedback(i).feedbackSlow.bits := Mux1H(Seq( 1636 vSegmentUnit.io.feedback.valid -> vSegmentUnit.io.feedback.bits, 1637 vsMergeBuffer(i).io.feedback.head.valid -> vsMergeBuffer(i).io.feedback.head.bits 1638 )) 1639 io.mem_to_ooo.vstuIqFeedback(i).feedbackFast := DontCare 1640 } else { 1641 vsMergeBuffer(i).io.feedback.head <> io.mem_to_ooo.vstuIqFeedback(i).feedbackSlow 1642 io.mem_to_ooo.vstuIqFeedback(i).feedbackFast := DontCare 1643 } 1644 } 1645 1646 (0 until VlduCnt).foreach{i=> 1647 if (i == 0){ // for segmentUnit, segmentUnit use port0 writeback 1648 io.mem_to_ooo.writebackVldu(i).valid := vlMergeBuffer.io.uopWriteback(i).valid || vsMergeBuffer(i).io.uopWriteback.head.valid || vSegmentUnit.io.uopwriteback.valid 1649 io.mem_to_ooo.writebackVldu(i).bits := PriorityMux(Seq( 1650 vSegmentUnit.io.uopwriteback.valid -> vSegmentUnit.io.uopwriteback.bits, 1651 vlMergeBuffer.io.uopWriteback(i).valid -> vlMergeBuffer.io.uopWriteback(i).bits, 1652 vsMergeBuffer(i).io.uopWriteback.head.valid -> vsMergeBuffer(i).io.uopWriteback.head.bits, 1653 )) 1654 vlMergeBuffer.io.uopWriteback(i).ready := io.mem_to_ooo.writebackVldu(i).ready && !vSegmentUnit.io.uopwriteback.valid 1655 vsMergeBuffer(i).io.uopWriteback.head.ready := io.mem_to_ooo.writebackVldu(i).ready && !vlMergeBuffer.io.uopWriteback(i).valid && !vSegmentUnit.io.uopwriteback.valid 1656 vSegmentUnit.io.uopwriteback.ready := io.mem_to_ooo.writebackVldu(i).ready 1657 } else if (i == 1) { 1658 io.mem_to_ooo.writebackVldu(i).valid := vlMergeBuffer.io.uopWriteback(i).valid || vsMergeBuffer(i).io.uopWriteback.head.valid || vfofBuffer.io.uopWriteback.valid 1659 io.mem_to_ooo.writebackVldu(i).bits := PriorityMux(Seq( 1660 vfofBuffer.io.uopWriteback.valid -> vfofBuffer.io.uopWriteback.bits, 1661 vlMergeBuffer.io.uopWriteback(i).valid -> vlMergeBuffer.io.uopWriteback(i).bits, 1662 vsMergeBuffer(i).io.uopWriteback.head.valid -> vsMergeBuffer(i).io.uopWriteback.head.bits, 1663 )) 1664 vlMergeBuffer.io.uopWriteback(i).ready := io.mem_to_ooo.writebackVldu(i).ready && !vfofBuffer.io.uopWriteback.valid 1665 vsMergeBuffer(i).io.uopWriteback.head.ready := io.mem_to_ooo.writebackVldu(i).ready && !vlMergeBuffer.io.uopWriteback(i).valid && !vfofBuffer.io.uopWriteback.valid 1666 vfofBuffer.io.uopWriteback.ready := io.mem_to_ooo.writebackVldu(i).ready 1667 } else { 1668 io.mem_to_ooo.writebackVldu(i).valid := vlMergeBuffer.io.uopWriteback(i).valid || vsMergeBuffer(i).io.uopWriteback.head.valid 1669 io.mem_to_ooo.writebackVldu(i).bits := PriorityMux(Seq( 1670 vlMergeBuffer.io.uopWriteback(i).valid -> vlMergeBuffer.io.uopWriteback(i).bits, 1671 vsMergeBuffer(i).io.uopWriteback.head.valid -> vsMergeBuffer(i).io.uopWriteback.head.bits, 1672 )) 1673 vlMergeBuffer.io.uopWriteback(i).ready := io.mem_to_ooo.writebackVldu(i).ready 1674 vsMergeBuffer(i).io.uopWriteback.head.ready := io.mem_to_ooo.writebackVldu(i).ready && !vlMergeBuffer.io.uopWriteback(i).valid 1675 } 1676 1677 vfofBuffer.io.mergeUopWriteback(i).valid := vlMergeBuffer.io.uopWriteback(i).valid 1678 vfofBuffer.io.mergeUopWriteback(i).bits := vlMergeBuffer.io.uopWriteback(i).bits 1679 } 1680 1681 1682 vfofBuffer.io.redirect <> redirect 1683 1684 // Sbuffer 1685 sbuffer.io.csrCtrl <> csrCtrl 1686 sbuffer.io.dcache <> dcache.io.lsu.store 1687 sbuffer.io.memSetPattenDetected := dcache.io.memSetPattenDetected 1688 sbuffer.io.force_write <> lsq.io.force_write 1689 // flush sbuffer 1690 val cmoFlush = lsq.io.flushSbuffer.valid 1691 val fenceFlush = io.ooo_to_mem.flushSb 1692 val atomicsFlush = atomicsUnit.io.flush_sbuffer.valid || vSegmentUnit.io.flush_sbuffer.valid 1693 val stIsEmpty = sbuffer.io.flush.empty && uncache.io.flush.empty 1694 io.mem_to_ooo.sbIsEmpty := RegNext(stIsEmpty) 1695 1696 // if both of them tries to flush sbuffer at the same time 1697 // something must have gone wrong 1698 assert(!(fenceFlush && atomicsFlush && cmoFlush)) 1699 sbuffer.io.flush.valid := RegNext(fenceFlush || atomicsFlush || cmoFlush) 1700 uncache.io.flush.valid := sbuffer.io.flush.valid 1701 1702 // AtomicsUnit: AtomicsUnit will override other control signials, 1703 // as atomics insts (LR/SC/AMO) will block the pipeline 1704 val s_normal +: s_atomics = Enum(StaCnt + HyuCnt + 1) 1705 val state = RegInit(s_normal) 1706 1707 val st_atomics = Seq.tabulate(StaCnt)(i => 1708 io.ooo_to_mem.issueSta(i).valid && FuType.storeIsAMO((io.ooo_to_mem.issueSta(i).bits.uop.fuType)) 1709 ) ++ Seq.tabulate(HyuCnt)(i => 1710 io.ooo_to_mem.issueHya(i).valid && FuType.storeIsAMO((io.ooo_to_mem.issueHya(i).bits.uop.fuType)) 1711 ) 1712 1713 for (i <- 0 until StaCnt) when(st_atomics(i)) { 1714 io.ooo_to_mem.issueSta(i).ready := atomicsUnit.io.in.ready 1715 storeUnits(i).io.stin.valid := false.B 1716 1717 state := s_atomics(i) 1718 } 1719 for (i <- 0 until HyuCnt) when(st_atomics(StaCnt + i)) { 1720 io.ooo_to_mem.issueHya(i).ready := atomicsUnit.io.in.ready 1721 hybridUnits(i).io.lsin.valid := false.B 1722 1723 state := s_atomics(StaCnt + i) 1724 assert(!st_atomics.zipWithIndex.filterNot(_._2 == StaCnt + i).unzip._1.reduce(_ || _)) 1725 } 1726 when (atomicsUnit.io.out.valid) { 1727 state := s_normal 1728 } 1729 1730 atomicsUnit.io.in.valid := st_atomics.reduce(_ || _) 1731 atomicsUnit.io.in.bits := Mux1H(Seq.tabulate(StaCnt)(i => 1732 st_atomics(i) -> io.ooo_to_mem.issueSta(i).bits) ++ 1733 Seq.tabulate(HyuCnt)(i => st_atomics(StaCnt+i) -> io.ooo_to_mem.issueHya(i).bits)) 1734 atomicsUnit.io.storeDataIn.zipWithIndex.foreach { case (stdin, i) => 1735 stdin.valid := st_data_atomics(i) 1736 stdin.bits := stData(i).bits 1737 } 1738 atomicsUnit.io.redirect <> redirect 1739 1740 // TODO: complete amo's pmp support 1741 val amoTlb = dtlb_ld(0).requestor(0) 1742 atomicsUnit.io.dtlb.resp.valid := false.B 1743 atomicsUnit.io.dtlb.resp.bits := DontCare 1744 atomicsUnit.io.dtlb.req.ready := amoTlb.req.ready 1745 atomicsUnit.io.pmpResp := pmp_check(0).resp 1746 1747 atomicsUnit.io.dcache <> dcache.io.lsu.atomics 1748 atomicsUnit.io.flush_sbuffer.empty := stIsEmpty 1749 1750 atomicsUnit.io.csrCtrl := csrCtrl 1751 1752 // for atomicsUnit, it uses loadUnit(0)'s TLB port 1753 1754 when (state =/= s_normal) { 1755 // use store wb port instead of load 1756 loadUnits(0).io.ldout.ready := false.B 1757 // use load_0's TLB 1758 atomicsUnit.io.dtlb <> amoTlb 1759 1760 // hw prefetch should be disabled while executing atomic insts 1761 loadUnits.map(i => i.io.prefetch_req.valid := false.B) 1762 1763 // make sure there's no in-flight uops in load unit 1764 assert(!loadUnits(0).io.ldout.valid) 1765 } 1766 1767 lsq.io.flushSbuffer.empty := sbuffer.io.sbempty 1768 1769 for (i <- 0 until StaCnt) { 1770 when (state === s_atomics(i)) { 1771 io.mem_to_ooo.staIqFeedback(i).feedbackSlow := atomicsUnit.io.feedbackSlow 1772 assert(!storeUnits(i).io.feedback_slow.valid) 1773 } 1774 } 1775 for (i <- 0 until HyuCnt) { 1776 when (state === s_atomics(StaCnt + i)) { 1777 io.mem_to_ooo.hyuIqFeedback(i).feedbackSlow := atomicsUnit.io.feedbackSlow 1778 assert(!hybridUnits(i).io.feedback_slow.valid) 1779 } 1780 } 1781 1782 lsq.io.exceptionAddr.isStore := io.ooo_to_mem.isStoreException 1783 // Exception address is used several cycles after flush. 1784 // We delay it by 10 cycles to ensure its flush safety. 1785 val atomicsException = RegInit(false.B) 1786 when (DelayN(redirect.valid, 10) && atomicsException) { 1787 atomicsException := false.B 1788 }.elsewhen (atomicsUnit.io.exceptionInfo.valid) { 1789 atomicsException := true.B 1790 } 1791 1792 val misalignBufExceptionOverwrite = loadMisalignBuffer.io.overwriteExpBuf.valid || storeMisalignBuffer.io.overwriteExpBuf.valid 1793 val misalignBufExceptionVaddr = Mux(loadMisalignBuffer.io.overwriteExpBuf.valid, 1794 loadMisalignBuffer.io.overwriteExpBuf.vaddr, 1795 storeMisalignBuffer.io.overwriteExpBuf.vaddr 1796 ) 1797 val misalignBufExceptionIsHyper = Mux(loadMisalignBuffer.io.overwriteExpBuf.valid, 1798 loadMisalignBuffer.io.overwriteExpBuf.isHyper, 1799 storeMisalignBuffer.io.overwriteExpBuf.isHyper 1800 ) 1801 val misalignBufExceptionGpaddr = Mux(loadMisalignBuffer.io.overwriteExpBuf.valid, 1802 loadMisalignBuffer.io.overwriteExpBuf.gpaddr, 1803 storeMisalignBuffer.io.overwriteExpBuf.gpaddr 1804 ) 1805 val misalignBufExceptionIsForVSnonLeafPTE = Mux(loadMisalignBuffer.io.overwriteExpBuf.valid, 1806 loadMisalignBuffer.io.overwriteExpBuf.isForVSnonLeafPTE, 1807 storeMisalignBuffer.io.overwriteExpBuf.isForVSnonLeafPTE 1808 ) 1809 1810 val vSegmentException = RegInit(false.B) 1811 when (DelayN(redirect.valid, 10) && vSegmentException) { 1812 vSegmentException := false.B 1813 }.elsewhen (vSegmentUnit.io.exceptionInfo.valid) { 1814 vSegmentException := true.B 1815 } 1816 val atomicsExceptionAddress = RegEnable(atomicsUnit.io.exceptionInfo.bits.vaddr, atomicsUnit.io.exceptionInfo.valid) 1817 val vSegmentExceptionVstart = RegEnable(vSegmentUnit.io.exceptionInfo.bits.vstart, vSegmentUnit.io.exceptionInfo.valid) 1818 val vSegmentExceptionVl = RegEnable(vSegmentUnit.io.exceptionInfo.bits.vl, vSegmentUnit.io.exceptionInfo.valid) 1819 val vSegmentExceptionAddress = RegEnable(vSegmentUnit.io.exceptionInfo.bits.vaddr, vSegmentUnit.io.exceptionInfo.valid) 1820 val atomicsExceptionGPAddress = RegEnable(atomicsUnit.io.exceptionInfo.bits.gpaddr, atomicsUnit.io.exceptionInfo.valid) 1821 val vSegmentExceptionGPAddress = RegEnable(vSegmentUnit.io.exceptionInfo.bits.gpaddr, vSegmentUnit.io.exceptionInfo.valid) 1822 val atomicsExceptionIsForVSnonLeafPTE = RegEnable(atomicsUnit.io.exceptionInfo.bits.isForVSnonLeafPTE, atomicsUnit.io.exceptionInfo.valid) 1823 val vSegmentExceptionIsForVSnonLeafPTE = RegEnable(vSegmentUnit.io.exceptionInfo.bits.isForVSnonLeafPTE, vSegmentUnit.io.exceptionInfo.valid) 1824 1825 val exceptionVaddr = Mux( 1826 atomicsException, 1827 atomicsExceptionAddress, 1828 Mux(misalignBufExceptionOverwrite, 1829 misalignBufExceptionVaddr, 1830 Mux(vSegmentException, 1831 vSegmentExceptionAddress, 1832 lsq.io.exceptionAddr.vaddr 1833 ) 1834 ) 1835 ) 1836 // whether vaddr need ext or is hyper inst: 1837 // VaNeedExt: atomicsException -> false; misalignBufExceptionOverwrite -> true; vSegmentException -> false 1838 // IsHyper: atomicsException -> false; vSegmentException -> false 1839 val exceptionVaNeedExt = !atomicsException && 1840 (misalignBufExceptionOverwrite || 1841 (!vSegmentException && lsq.io.exceptionAddr.vaNeedExt)) 1842 val exceptionIsHyper = !atomicsException && 1843 (misalignBufExceptionOverwrite && misalignBufExceptionIsHyper || 1844 (!vSegmentException && lsq.io.exceptionAddr.isHyper && !misalignBufExceptionOverwrite)) 1845 1846 def GenExceptionVa( 1847 mode: UInt, isVirt: Bool, vaNeedExt: Bool, 1848 satp: TlbSatpBundle, vsatp: TlbSatpBundle, hgatp: TlbHgatpBundle, 1849 vaddr: UInt 1850 ) = { 1851 require(VAddrBits >= 50) 1852 1853 val satpNone = satp.mode === 0.U 1854 val satpSv39 = satp.mode === 8.U 1855 val satpSv48 = satp.mode === 9.U 1856 1857 val vsatpNone = vsatp.mode === 0.U 1858 val vsatpSv39 = vsatp.mode === 8.U 1859 val vsatpSv48 = vsatp.mode === 9.U 1860 1861 val hgatpNone = hgatp.mode === 0.U 1862 val hgatpSv39x4 = hgatp.mode === 8.U 1863 val hgatpSv48x4 = hgatp.mode === 9.U 1864 1865 // For !isVirt, mode check is necessary, as we don't want virtual memory in M-mode. 1866 // For isVirt, mode check is unnecessary, as virt won't be 1 in M-mode. 1867 // Also, isVirt includes Hyper Insts, which don't care mode either. 1868 1869 val useBareAddr = 1870 (isVirt && vsatpNone && hgatpNone) || 1871 (!isVirt && (mode === CSRConst.ModeM)) || 1872 (!isVirt && (mode =/= CSRConst.ModeM) && satpNone) 1873 val useSv39Addr = 1874 (isVirt && vsatpSv39) || 1875 (!isVirt && (mode =/= CSRConst.ModeM) && satpSv39) 1876 val useSv48Addr = 1877 (isVirt && vsatpSv48) || 1878 (!isVirt && (mode =/= CSRConst.ModeM) && satpSv48) 1879 val useSv39x4Addr = isVirt && vsatpNone && hgatpSv39x4 1880 val useSv48x4Addr = isVirt && vsatpNone && hgatpSv48x4 1881 1882 val bareAddr = ZeroExt(vaddr(PAddrBits - 1, 0), XLEN) 1883 val sv39Addr = SignExt(vaddr.take(39), XLEN) 1884 val sv39x4Addr = ZeroExt(vaddr.take(39 + 2), XLEN) 1885 val sv48Addr = SignExt(vaddr.take(48), XLEN) 1886 val sv48x4Addr = ZeroExt(vaddr.take(48 + 2), XLEN) 1887 1888 val ExceptionVa = Wire(UInt(XLEN.W)) 1889 when (vaNeedExt) { 1890 ExceptionVa := Mux1H(Seq( 1891 (useBareAddr) -> bareAddr, 1892 (useSv39Addr) -> sv39Addr, 1893 (useSv48Addr) -> sv48Addr, 1894 (useSv39x4Addr) -> sv39x4Addr, 1895 (useSv48x4Addr) -> sv48x4Addr, 1896 )) 1897 } .otherwise { 1898 ExceptionVa := vaddr 1899 } 1900 1901 ExceptionVa 1902 } 1903 1904 io.mem_to_ooo.lsqio.vaddr := RegNext( 1905 GenExceptionVa(tlbcsr.priv.dmode, tlbcsr.priv.virt || exceptionIsHyper, exceptionVaNeedExt, 1906 tlbcsr.satp, tlbcsr.vsatp, tlbcsr.hgatp, exceptionVaddr) 1907 ) 1908 1909 // vsegment instruction is executed atomic, which mean atomicsException and vSegmentException should not raise at the same time. 1910 XSError(atomicsException && vSegmentException, "atomicsException and vSegmentException raise at the same time!") 1911 io.mem_to_ooo.lsqio.vstart := RegNext(Mux(vSegmentException, 1912 vSegmentExceptionVstart, 1913 lsq.io.exceptionAddr.vstart) 1914 ) 1915 io.mem_to_ooo.lsqio.vl := RegNext(Mux(vSegmentException, 1916 vSegmentExceptionVl, 1917 lsq.io.exceptionAddr.vl) 1918 ) 1919 1920 XSError(atomicsException && atomicsUnit.io.in.valid, "new instruction before exception triggers\n") 1921 io.mem_to_ooo.lsqio.gpaddr := RegNext(Mux( 1922 atomicsException, 1923 atomicsExceptionGPAddress, 1924 Mux(misalignBufExceptionOverwrite, 1925 misalignBufExceptionGpaddr, 1926 Mux(vSegmentException, 1927 vSegmentExceptionGPAddress, 1928 lsq.io.exceptionAddr.gpaddr 1929 ) 1930 ) 1931 )) 1932 io.mem_to_ooo.lsqio.isForVSnonLeafPTE := RegNext(Mux( 1933 atomicsException, 1934 atomicsExceptionIsForVSnonLeafPTE, 1935 Mux(misalignBufExceptionOverwrite, 1936 misalignBufExceptionIsForVSnonLeafPTE, 1937 Mux(vSegmentException, 1938 vSegmentExceptionIsForVSnonLeafPTE, 1939 lsq.io.exceptionAddr.isForVSnonLeafPTE 1940 ) 1941 ) 1942 )) 1943 io.mem_to_ooo.topToBackendBypass match { case x => 1944 x.hartId := io.hartId 1945 x.l2FlushDone := RegNext(io.l2_flush_done) 1946 x.externalInterrupt.msip := outer.clint_int_sink.in.head._1(0) 1947 x.externalInterrupt.mtip := outer.clint_int_sink.in.head._1(1) 1948 x.externalInterrupt.meip := outer.plic_int_sink.in.head._1(0) 1949 x.externalInterrupt.seip := outer.plic_int_sink.in.last._1(0) 1950 x.externalInterrupt.debug := outer.debug_int_sink.in.head._1(0) 1951 x.externalInterrupt.nmi.nmi_31 := outer.nmi_int_sink.in.head._1(0) | outer.beu_local_int_sink.in.head._1(0) 1952 x.externalInterrupt.nmi.nmi_43 := outer.nmi_int_sink.in.head._1(1) 1953 x.msiInfo := DelayNWithValid(io.fromTopToBackend.msiInfo, 1) 1954 x.clintTime := DelayNWithValid(io.fromTopToBackend.clintTime, 1) 1955 } 1956 1957 io.memInfo.sqFull := RegNext(lsq.io.sqFull) 1958 io.memInfo.lqFull := RegNext(lsq.io.lqFull) 1959 io.memInfo.dcacheMSHRFull := RegNext(dcache.io.mshrFull) 1960 1961 io.inner_hartId := io.hartId 1962 io.inner_reset_vector := RegNext(io.outer_reset_vector) 1963 io.outer_cpu_halt := io.ooo_to_mem.backendToTopBypass.cpuHalted 1964 io.outer_l2_flush_en := io.ooo_to_mem.csrCtrl.flush_l2_enable 1965 io.outer_power_down_en := io.ooo_to_mem.csrCtrl.power_down_enable 1966 io.outer_cpu_critical_error := io.ooo_to_mem.backendToTopBypass.cpuCriticalError 1967 io.outer_msi_ack := io.ooo_to_mem.backendToTopBypass.msiAck 1968 io.outer_beu_errors_icache := RegNext(io.inner_beu_errors_icache) 1969 io.inner_hc_perfEvents <> RegNext(io.outer_hc_perfEvents) 1970 1971 // vector segmentUnit 1972 vSegmentUnit.io.in.bits <> io.ooo_to_mem.issueVldu.head.bits 1973 vSegmentUnit.io.in.valid := isSegment && io.ooo_to_mem.issueVldu.head.valid// is segment instruction 1974 vSegmentUnit.io.dtlb.resp.bits <> dtlb_reqs.take(LduCnt).head.resp.bits 1975 vSegmentUnit.io.dtlb.resp.valid <> dtlb_reqs.take(LduCnt).head.resp.valid 1976 vSegmentUnit.io.pmpResp <> pmp_check.head.resp 1977 vSegmentUnit.io.flush_sbuffer.empty := stIsEmpty 1978 vSegmentUnit.io.redirect <> redirect 1979 vSegmentUnit.io.rdcache.resp.bits := dcache.io.lsu.load(0).resp.bits 1980 vSegmentUnit.io.rdcache.resp.valid := dcache.io.lsu.load(0).resp.valid 1981 vSegmentUnit.io.rdcache.s2_bank_conflict := dcache.io.lsu.load(0).s2_bank_conflict 1982 // ------------------------- 1983 // Vector Segment Triggers 1984 // ------------------------- 1985 vSegmentUnit.io.fromCsrTrigger.tdataVec := tdata 1986 vSegmentUnit.io.fromCsrTrigger.tEnableVec := tEnable 1987 vSegmentUnit.io.fromCsrTrigger.triggerCanRaiseBpExp := triggerCanRaiseBpExp 1988 vSegmentUnit.io.fromCsrTrigger.debugMode := debugMode 1989 1990 // reset tree of MemBlock 1991 if (p(DebugOptionsKey).ResetGen) { 1992 val leftResetTree = ResetGenNode( 1993 Seq( 1994 ModuleNode(ptw), 1995 ModuleNode(ptw_to_l2_buffer), 1996 ModuleNode(lsq), 1997 ModuleNode(dtlb_st_tlb_st), 1998 ModuleNode(dtlb_prefetch_tlb_prefetch), 1999 ModuleNode(pmp) 2000 ) 2001 ++ pmp_checkers.map(ModuleNode(_)) 2002 ++ (if (prefetcherOpt.isDefined) Seq(ModuleNode(prefetcherOpt.get)) else Nil) 2003 ++ (if (l1PrefetcherOpt.isDefined) Seq(ModuleNode(l1PrefetcherOpt.get)) else Nil) 2004 ) 2005 val rightResetTree = ResetGenNode( 2006 Seq( 2007 ModuleNode(sbuffer), 2008 ModuleNode(dtlb_ld_tlb_ld), 2009 ModuleNode(dcache), 2010 ModuleNode(l1d_to_l2_buffer), 2011 CellNode(io.reset_backend) 2012 ) 2013 ) 2014 ResetGen(leftResetTree, reset, sim = false, io.dft_reset) 2015 ResetGen(rightResetTree, reset, sim = false, io.dft_reset) 2016 } else { 2017 io.reset_backend := DontCare 2018 } 2019 io.resetInFrontendBypass.toL2Top := io.resetInFrontendBypass.fromFrontend 2020 // trace interface 2021 val traceToL2Top = io.traceCoreInterfaceBypass.toL2Top 2022 val traceFromBackend = io.traceCoreInterfaceBypass.fromBackend 2023 traceFromBackend.fromEncoder := RegNext(traceToL2Top.fromEncoder) 2024 traceToL2Top.toEncoder.trap := RegEnable( 2025 traceFromBackend.toEncoder.trap, 2026 traceFromBackend.toEncoder.groups(0).valid && Itype.isTrap(traceFromBackend.toEncoder.groups(0).bits.itype) 2027 ) 2028 traceToL2Top.toEncoder.priv := RegEnable( 2029 traceFromBackend.toEncoder.priv, 2030 traceFromBackend.toEncoder.groups(0).valid 2031 ) 2032 (0 until TraceGroupNum).foreach { i => 2033 traceToL2Top.toEncoder.groups(i).valid := RegNext(traceFromBackend.toEncoder.groups(i).valid) 2034 traceToL2Top.toEncoder.groups(i).bits.iretire := RegNext(traceFromBackend.toEncoder.groups(i).bits.iretire) 2035 traceToL2Top.toEncoder.groups(i).bits.itype := RegNext(traceFromBackend.toEncoder.groups(i).bits.itype) 2036 traceToL2Top.toEncoder.groups(i).bits.ilastsize := RegEnable( 2037 traceFromBackend.toEncoder.groups(i).bits.ilastsize, 2038 traceFromBackend.toEncoder.groups(i).valid 2039 ) 2040 traceToL2Top.toEncoder.groups(i).bits.iaddr := RegEnable( 2041 traceFromBackend.toEncoder.groups(i).bits.iaddr, 2042 traceFromBackend.toEncoder.groups(i).valid 2043 ) + (RegEnable( 2044 traceFromBackend.toEncoder.groups(i).bits.ftqOffset.getOrElse(0.U), 2045 traceFromBackend.toEncoder.groups(i).valid 2046 ) << instOffsetBits) 2047 } 2048 2049 2050 io.mem_to_ooo.storeDebugInfo := DontCare 2051 // store event difftest information 2052 if (env.EnableDifftest) { 2053 (0 until EnsbufferWidth).foreach{i => 2054 io.mem_to_ooo.storeDebugInfo(i).robidx := sbuffer.io.vecDifftestInfo(i).bits.robIdx 2055 sbuffer.io.vecDifftestInfo(i).bits.pc := io.mem_to_ooo.storeDebugInfo(i).pc 2056 } 2057 } 2058 2059 // top-down info 2060 dcache.io.debugTopDown.robHeadVaddr := io.debugTopDown.robHeadVaddr 2061 dtlbRepeater.io.debugTopDown.robHeadVaddr := io.debugTopDown.robHeadVaddr 2062 lsq.io.debugTopDown.robHeadVaddr := io.debugTopDown.robHeadVaddr 2063 io.debugTopDown.toCore.robHeadMissInDCache := dcache.io.debugTopDown.robHeadMissInDCache 2064 io.debugTopDown.toCore.robHeadTlbReplay := lsq.io.debugTopDown.robHeadTlbReplay 2065 io.debugTopDown.toCore.robHeadTlbMiss := lsq.io.debugTopDown.robHeadTlbMiss 2066 io.debugTopDown.toCore.robHeadLoadVio := lsq.io.debugTopDown.robHeadLoadVio 2067 io.debugTopDown.toCore.robHeadLoadMSHR := lsq.io.debugTopDown.robHeadLoadMSHR 2068 dcache.io.debugTopDown.robHeadOtherReplay := lsq.io.debugTopDown.robHeadOtherReplay 2069 dcache.io.debugRolling := io.debugRolling 2070 2071 lsq.io.noUopsIssued := io.topDownInfo.toBackend.noUopsIssued 2072 io.topDownInfo.toBackend.lqEmpty := lsq.io.lqEmpty 2073 io.topDownInfo.toBackend.sqEmpty := lsq.io.sqEmpty 2074 io.topDownInfo.toBackend.l1Miss := dcache.io.l1Miss 2075 io.topDownInfo.toBackend.l2TopMiss.l2Miss := RegNext(io.topDownInfo.fromL2Top.l2Miss) 2076 io.topDownInfo.toBackend.l2TopMiss.l3Miss := RegNext(io.topDownInfo.fromL2Top.l3Miss) 2077 2078 val hyLdDeqCount = PopCount(io.ooo_to_mem.issueHya.map(x => x.valid && FuType.isLoad(x.bits.uop.fuType))) 2079 val hyStDeqCount = PopCount(io.ooo_to_mem.issueHya.map(x => x.valid && FuType.isStore(x.bits.uop.fuType))) 2080 val ldDeqCount = PopCount(io.ooo_to_mem.issueLda.map(_.valid)) +& hyLdDeqCount 2081 val stDeqCount = PopCount(io.ooo_to_mem.issueSta.take(StaCnt).map(_.valid)) +& hyStDeqCount 2082 val iqDeqCount = ldDeqCount +& stDeqCount 2083 XSPerfAccumulate("load_iq_deq_count", ldDeqCount) 2084 XSPerfHistogram("load_iq_deq_count", ldDeqCount, true.B, 0, LdExuCnt + 1) 2085 XSPerfAccumulate("store_iq_deq_count", stDeqCount) 2086 XSPerfHistogram("store_iq_deq_count", stDeqCount, true.B, 0, StAddrCnt + 1) 2087 XSPerfAccumulate("ls_iq_deq_count", iqDeqCount) 2088 2089 val pfevent = Module(new PFEvent) 2090 pfevent.io.distribute_csr := csrCtrl.distribute_csr 2091 val csrevents = pfevent.io.hpmevent.slice(16,24) 2092 2093 val perfFromUnits = (loadUnits ++ Seq(sbuffer, lsq, dcache)).flatMap(_.getPerfEvents) 2094 val perfFromPTW = perfEventsPTW.map(x => ("PTW_" + x._1, x._2)) 2095 val perfBlock = Seq(("ldDeqCount", ldDeqCount), 2096 ("stDeqCount", stDeqCount)) 2097 // let index = 0 be no event 2098 val allPerfEvents = Seq(("noEvent", 0.U)) ++ perfFromUnits ++ perfFromPTW ++ perfBlock 2099 2100 if (printEventCoding) { 2101 for (((name, inc), i) <- allPerfEvents.zipWithIndex) { 2102 println("MemBlock perfEvents Set", name, inc, i) 2103 } 2104 } 2105 2106 val allPerfInc = allPerfEvents.map(_._2.asTypeOf(new PerfEvent)) 2107 val perfEvents = HPerfMonitor(csrevents, allPerfInc).getPerfEvents 2108 generatePerfEvent() 2109 2110 private val mbistPl = MbistPipeline.PlaceMbistPipeline(Int.MaxValue, "MbistPipeMemBlk", hasMbist) 2111 private val mbistIntf = if(hasMbist) { 2112 val params = mbistPl.get.nodeParams 2113 val intf = Some(Module(new MbistInterface( 2114 params = Seq(params), 2115 ids = Seq(mbistPl.get.childrenIds), 2116 name = s"MbistIntfMemBlk", 2117 pipelineNum = 1 2118 ))) 2119 intf.get.toPipeline.head <> mbistPl.get.mbist 2120 mbistPl.get.registerCSV(intf.get.info, "MbistMemBlk") 2121 intf.get.mbist := DontCare 2122 dontTouch(intf.get.mbist) 2123 //TODO: add mbist controller connections here 2124 intf 2125 } else { 2126 None 2127 } 2128 private val sigFromSrams = if (hasMbist) Some(SramHelper.genBroadCastBundleTop()) else None 2129 private val cg = ClockGate.genTeSrc 2130 dontTouch(cg) 2131 if (hasMbist) { 2132 sigFromSrams.get := io.dft.get 2133 cg.cgen := io.dft.get.cgen 2134 io.dft_frnt.get := io.dft.get 2135 io.dft_reset_frnt.get := io.dft_reset.get 2136 io.dft_bcknd.get := io.dft.get 2137 io.dft_reset_bcknd.get := io.dft_reset.get 2138 } else { 2139 cg.cgen := false.B 2140 } 2141} 2142 2143class MemBlock()(implicit p: Parameters) extends LazyModule 2144 with HasXSParameter { 2145 override def shouldBeInlined: Boolean = false 2146 2147 val inner = LazyModule(new MemBlockInlined()) 2148 2149 lazy val module = new MemBlockImp(this) 2150} 2151 2152class MemBlockImp(wrapper: MemBlock) extends LazyModuleImp(wrapper) { 2153 val io = IO(wrapper.inner.module.io.cloneType) 2154 val io_perf = IO(wrapper.inner.module.io_perf.cloneType) 2155 io <> wrapper.inner.module.io 2156 io_perf <> wrapper.inner.module.io_perf 2157 2158 if (p(DebugOptionsKey).ResetGen) { 2159 ResetGen(ResetGenNode(Seq(ModuleNode(wrapper.inner.module))), reset, sim = false, io.dft_reset) 2160 } 2161} 2162