1/*************************************************************************************** 2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences 3* Copyright (c) 2020-2021 Peng Cheng Laboratory 4* 5* XiangShan is licensed under Mulan PSL v2. 6* You can use this software according to the terms and conditions of the Mulan PSL v2. 7* You may obtain a copy of Mulan PSL v2 at: 8* http://license.coscl.org.cn/MulanPSL2 9* 10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 13* 14* See the Mulan PSL v2 for more details. 15***************************************************************************************/ 16 17package xiangshan.mem 18 19import org.chipsalliance.cde.config.Parameters 20import chisel3._ 21import chisel3.util._ 22import freechips.rocketchip.diplomacy._ 23import freechips.rocketchip.diplomacy.{BundleBridgeSource, LazyModule, LazyModuleImp} 24import freechips.rocketchip.interrupts.{IntSinkNode, IntSinkPortSimple} 25import freechips.rocketchip.tile.HasFPUParameters 26import freechips.rocketchip.tilelink._ 27import utils._ 28import utility._ 29import utility.mbist.{MbistInterface, MbistPipeline} 30import utility.sram.{SramMbistBundle, SramBroadcastBundle, SramHelper} 31import system.{HasSoCParameter, SoCParamsKey} 32import xiangshan._ 33import xiangshan.ExceptionNO._ 34import xiangshan.frontend.HasInstrMMIOConst 35import xiangshan.backend.Bundles.{DynInst, MemExuInput, MemExuOutput} 36import xiangshan.backend.ctrlblock.{DebugLSIO, LsTopdownInfo} 37import xiangshan.backend.exu.MemExeUnit 38import xiangshan.backend.fu._ 39import xiangshan.backend.fu.FuType._ 40import xiangshan.backend.fu.NewCSR.{CsrTriggerBundle, TriggerUtil, PFEvent} 41import xiangshan.backend.fu.util.{CSRConst, SdtrigExt} 42import xiangshan.backend.{BackendToTopBundle, TopToBackendBundle} 43import xiangshan.backend.rob.{RobDebugRollingIO, RobPtr, RobLsqIO} 44import xiangshan.backend.datapath.NewPipelineConnect 45import xiangshan.backend.trace.{Itype, TraceCoreInterface} 46import xiangshan.backend.Bundles._ 47import xiangshan.mem._ 48import xiangshan.mem.mdp._ 49import xiangshan.mem.Bundles._ 50import xiangshan.mem.prefetch.{BasePrefecher, L1Prefetcher, SMSParams, SMSPrefetcher} 51import xiangshan.cache._ 52import xiangshan.cache.mmu._ 53import coupledL2.PrefetchRecv 54import utility.mbist.{MbistInterface, MbistPipeline} 55import utility.sram.{SramBroadcastBundle, SramHelper} 56 57trait HasMemBlockParameters extends HasXSParameter { 58 // number of memory units 59 val LduCnt = backendParams.LduCnt 60 val StaCnt = backendParams.StaCnt 61 val StdCnt = backendParams.StdCnt 62 val HyuCnt = backendParams.HyuCnt 63 val VlduCnt = backendParams.VlduCnt 64 val VstuCnt = backendParams.VstuCnt 65 66 val LdExuCnt = LduCnt + HyuCnt 67 val StAddrCnt = StaCnt + HyuCnt 68 val StDataCnt = StdCnt 69 val MemExuCnt = LduCnt + HyuCnt + StaCnt + StdCnt 70 val MemAddrExtCnt = LdExuCnt + StaCnt 71 val MemVExuCnt = VlduCnt + VstuCnt 72 73 val AtomicWBPort = 0 74 val MisalignWBPort = 1 75 val UncacheWBPort = 2 76 val NCWBPorts = Seq(1, 2) 77} 78 79abstract class MemBlockBundle(implicit val p: Parameters) extends Bundle with HasMemBlockParameters 80 81class Std(cfg: FuConfig)(implicit p: Parameters) extends FuncUnit(cfg) { 82 io.in.ready := io.out.ready 83 io.out.valid := io.in.valid 84 io.out.bits := 0.U.asTypeOf(io.out.bits) 85 io.out.bits.res.data := io.in.bits.data.src(0) 86 io.out.bits.ctrl.robIdx := io.in.bits.ctrl.robIdx 87} 88 89class ooo_to_mem(implicit p: Parameters) extends MemBlockBundle { 90 val backendToTopBypass = Flipped(new BackendToTopBundle) 91 92 val loadFastMatch = Vec(LdExuCnt, Input(UInt(LdExuCnt.W))) 93 val loadFastFuOpType = Vec(LdExuCnt, Input(FuOpType())) 94 val loadFastImm = Vec(LdExuCnt, Input(UInt(12.W))) 95 val sfence = Input(new SfenceBundle) 96 val tlbCsr = Input(new TlbCsrBundle) 97 val lsqio = new Bundle { 98 val lcommit = Input(UInt(log2Up(CommitWidth + 1).W)) 99 val scommit = Input(UInt(log2Up(CommitWidth + 1).W)) 100 val pendingMMIOld = Input(Bool()) 101 val pendingld = Input(Bool()) 102 val pendingst = Input(Bool()) 103 val pendingVst = Input(Bool()) 104 val commit = Input(Bool()) 105 val pendingPtr = Input(new RobPtr) 106 val pendingPtrNext = Input(new RobPtr) 107 } 108 109 val isStoreException = Input(Bool()) 110 val isVlsException = Input(Bool()) 111 val csrCtrl = Flipped(new CustomCSRCtrlIO) 112 val enqLsq = new LsqEnqIO 113 val flushSb = Input(Bool()) 114 115 val storePc = Vec(StaCnt, Input(UInt(VAddrBits.W))) // for hw prefetch 116 val hybridPc = Vec(HyuCnt, Input(UInt(VAddrBits.W))) // for hw prefetch 117 118 val issueLda = MixedVec(Seq.fill(LduCnt)(Flipped(DecoupledIO(new MemExuInput)))) 119 val issueSta = MixedVec(Seq.fill(StaCnt)(Flipped(DecoupledIO(new MemExuInput)))) 120 val issueStd = MixedVec(Seq.fill(StdCnt)(Flipped(DecoupledIO(new MemExuInput)))) 121 val issueHya = MixedVec(Seq.fill(HyuCnt)(Flipped(DecoupledIO(new MemExuInput)))) 122 val issueVldu = MixedVec(Seq.fill(VlduCnt)(Flipped(DecoupledIO(new MemExuInput(isVector=true))))) 123 124 def issueUops = issueLda ++ issueSta ++ issueStd ++ issueHya ++ issueVldu 125} 126 127class mem_to_ooo(implicit p: Parameters) extends MemBlockBundle { 128 val topToBackendBypass = new TopToBackendBundle 129 130 val otherFastWakeup = Vec(LdExuCnt, ValidIO(new DynInst)) 131 val lqCancelCnt = Output(UInt(log2Up(VirtualLoadQueueSize + 1).W)) 132 val sqCancelCnt = Output(UInt(log2Up(StoreQueueSize + 1).W)) 133 val sqDeq = Output(UInt(log2Ceil(EnsbufferWidth + 1).W)) 134 val lqDeq = Output(UInt(log2Up(CommitWidth + 1).W)) 135 // used by VLSU issue queue, the vector store would wait all store before it, and the vector load would wait all load 136 val sqDeqPtr = Output(new SqPtr) 137 val lqDeqPtr = Output(new LqPtr) 138 val stIn = Vec(StAddrCnt, ValidIO(new MemExuInput)) 139 val stIssuePtr = Output(new SqPtr()) 140 141 val memoryViolation = ValidIO(new Redirect) 142 val sbIsEmpty = Output(Bool()) 143 144 val lsTopdownInfo = Vec(LdExuCnt, Output(new LsTopdownInfo)) 145 146 val lsqio = new Bundle { 147 val vaddr = Output(UInt(XLEN.W)) 148 val vstart = Output(UInt((log2Up(VLEN) + 1).W)) 149 val vl = Output(UInt((log2Up(VLEN) + 1).W)) 150 val gpaddr = Output(UInt(XLEN.W)) 151 val isForVSnonLeafPTE = Output(Bool()) 152 val mmio = Output(Vec(LoadPipelineWidth, Bool())) 153 val uop = Output(Vec(LoadPipelineWidth, new DynInst)) 154 val lqCanAccept = Output(Bool()) 155 val sqCanAccept = Output(Bool()) 156 } 157 158 val storeDebugInfo = Vec(EnsbufferWidth, new Bundle { 159 val robidx = Output(new RobPtr) 160 val pc = Input(UInt(VAddrBits.W)) 161 }) 162 163 val writebackLda = Vec(LduCnt, DecoupledIO(new MemExuOutput)) 164 val writebackSta = Vec(StaCnt, DecoupledIO(new MemExuOutput)) 165 val writebackStd = Vec(StdCnt, DecoupledIO(new MemExuOutput)) 166 val writebackHyuLda = Vec(HyuCnt, DecoupledIO(new MemExuOutput)) 167 val writebackHyuSta = Vec(HyuCnt, DecoupledIO(new MemExuOutput)) 168 val writebackVldu = Vec(VlduCnt, DecoupledIO(new MemExuOutput(isVector = true))) 169 def writeBack: Seq[DecoupledIO[MemExuOutput]] = { 170 writebackSta ++ 171 writebackHyuLda ++ writebackHyuSta ++ 172 writebackLda ++ 173 writebackVldu ++ 174 writebackStd 175 } 176 177 val ldaIqFeedback = Vec(LduCnt, new MemRSFeedbackIO) 178 val staIqFeedback = Vec(StaCnt, new MemRSFeedbackIO) 179 val hyuIqFeedback = Vec(HyuCnt, new MemRSFeedbackIO) 180 val vstuIqFeedback= Vec(VstuCnt, new MemRSFeedbackIO(isVector = true)) 181 val vlduIqFeedback= Vec(VlduCnt, new MemRSFeedbackIO(isVector = true)) 182 val ldCancel = Vec(backendParams.LdExuCnt, new LoadCancelIO) 183 val wakeup = Vec(backendParams.LdExuCnt, Valid(new DynInst)) 184 185 val s3_delayed_load_error = Vec(LdExuCnt, Output(Bool())) 186} 187 188class MemCoreTopDownIO extends Bundle { 189 val robHeadMissInDCache = Output(Bool()) 190 val robHeadTlbReplay = Output(Bool()) 191 val robHeadTlbMiss = Output(Bool()) 192 val robHeadLoadVio = Output(Bool()) 193 val robHeadLoadMSHR = Output(Bool()) 194} 195 196class fetch_to_mem(implicit p: Parameters) extends XSBundle{ 197 val itlb = Flipped(new TlbPtwIO()) 198} 199 200// triple buffer applied in i-mmio path (two at MemBlock, one at L2Top) 201class InstrUncacheBuffer()(implicit p: Parameters) extends LazyModule with HasInstrMMIOConst { 202 val node = new TLBufferNode(BufferParams.default, BufferParams.default, BufferParams.default, BufferParams.default, BufferParams.default) 203 lazy val module = new InstrUncacheBufferImpl 204 205 class InstrUncacheBufferImpl extends LazyModuleImp(this) { 206 (node.in zip node.out) foreach { case ((in, edgeIn), (out, edgeOut)) => 207 out.a <> BufferParams.default(BufferParams.default(in.a)) 208 in.d <> BufferParams.default(BufferParams.default(out.d)) 209 210 // only a.valid, a.ready, a.address can change 211 // hoping that the rest would be optimized to keep MemBlock port unchanged after adding buffer 212 out.a.bits.data := 0.U 213 out.a.bits.mask := Fill(mmioBusBytes, 1.U(1.W)) 214 out.a.bits.opcode := 4.U // Get 215 out.a.bits.size := log2Ceil(mmioBusBytes).U 216 out.a.bits.source := 0.U 217 } 218 } 219} 220 221// triple buffer applied in L1I$-L2 path (two at MemBlock, one at L2Top) 222class ICacheBuffer()(implicit p: Parameters) extends LazyModule { 223 val node = new TLBufferNode(BufferParams.default, BufferParams.default, BufferParams.default, BufferParams.default, BufferParams.default) 224 lazy val module = new ICacheBufferImpl 225 226 class ICacheBufferImpl extends LazyModuleImp(this) { 227 (node.in zip node.out) foreach { case ((in, edgeIn), (out, edgeOut)) => 228 out.a <> BufferParams.default(BufferParams.default(in.a)) 229 in.d <> BufferParams.default(BufferParams.default(out.d)) 230 } 231 } 232} 233 234class ICacheCtrlBuffer()(implicit p: Parameters) extends LazyModule { 235 val node = new TLBufferNode(BufferParams.default, BufferParams.default, BufferParams.default, BufferParams.default, BufferParams.default) 236 lazy val module = new ICacheCtrlBufferImpl 237 238 class ICacheCtrlBufferImpl extends LazyModuleImp(this) { 239 (node.in zip node.out) foreach { case ((in, edgeIn), (out, edgeOut)) => 240 out.a <> BufferParams.default(BufferParams.default(in.a)) 241 in.d <> BufferParams.default(BufferParams.default(out.d)) 242 } 243 } 244} 245 246// Frontend bus goes through MemBlock 247class FrontendBridge()(implicit p: Parameters) extends LazyModule { 248 val icache_node = LazyModule(new ICacheBuffer()).suggestName("icache").node// to keep IO port name 249 val icachectrl_node = LazyModule(new ICacheCtrlBuffer()).suggestName("icachectrl").node 250 val instr_uncache_node = LazyModule(new InstrUncacheBuffer()).suggestName("instr_uncache").node 251 lazy val module = new LazyModuleImp(this) { 252 } 253} 254 255class MemBlockInlined()(implicit p: Parameters) extends LazyModule 256 with HasXSParameter { 257 override def shouldBeInlined: Boolean = true 258 259 val dcache = LazyModule(new DCacheWrapper()) 260 val uncache = LazyModule(new Uncache()) 261 val uncache_port = TLTempNode() 262 val uncache_xbar = TLXbar() 263 val ptw = LazyModule(new L2TLBWrapper()) 264 val ptw_to_l2_buffer = if (!coreParams.softPTW) LazyModule(new TLBuffer) else null 265 val l1d_to_l2_buffer = if (coreParams.dcacheParametersOpt.nonEmpty) LazyModule(new TLBuffer) else null 266 val dcache_port = TLNameNode("dcache_client") // to keep dcache-L2 port name 267 val l2_pf_sender_opt = coreParams.prefetcher.map(_ => 268 BundleBridgeSource(() => new PrefetchRecv) 269 ) 270 val l3_pf_sender_opt = if (p(SoCParamsKey).L3CacheParamsOpt.nonEmpty) coreParams.prefetcher.map(_ => 271 BundleBridgeSource(() => new huancun.PrefetchRecv) 272 ) else None 273 val frontendBridge = LazyModule(new FrontendBridge) 274 // interrupt sinks 275 val clint_int_sink = IntSinkNode(IntSinkPortSimple(1, 2)) 276 val debug_int_sink = IntSinkNode(IntSinkPortSimple(1, 1)) 277 val plic_int_sink = IntSinkNode(IntSinkPortSimple(2, 1)) 278 val nmi_int_sink = IntSinkNode(IntSinkPortSimple(1, (new NonmaskableInterruptIO).elements.size)) 279 val beu_local_int_sink = IntSinkNode(IntSinkPortSimple(1, 1)) 280 281 if (!coreParams.softPTW) { 282 ptw_to_l2_buffer.node := ptw.node 283 } 284 uncache_xbar := TLBuffer() := uncache.clientNode 285 if (dcache.uncacheNode.isDefined) { 286 dcache.uncacheNode.get := TLBuffer.chainNode(2) := uncache_xbar 287 } 288 uncache_port := TLBuffer.chainNode(2) := uncache_xbar 289 290 lazy val module = new MemBlockInlinedImp(this) 291} 292 293class MemBlockInlinedImp(outer: MemBlockInlined) extends LazyModuleImp(outer) 294 with HasXSParameter 295 with HasFPUParameters 296 with HasPerfEvents 297 with HasSoCParameter 298 with HasL1PrefetchSourceParameter 299 with HasCircularQueuePtrHelper 300 with HasMemBlockParameters 301 with HasTlbConst 302 with SdtrigExt 303{ 304 val io = IO(new Bundle { 305 val hartId = Input(UInt(hartIdLen.W)) 306 val redirect = Flipped(ValidIO(new Redirect)) 307 308 val ooo_to_mem = new ooo_to_mem 309 val mem_to_ooo = new mem_to_ooo 310 val fetch_to_mem = new fetch_to_mem 311 312 val ifetchPrefetch = Vec(LduCnt, ValidIO(new SoftIfetchPrefetchBundle)) 313 314 // misc 315 val error = ValidIO(new L1CacheErrorInfo) 316 val memInfo = new Bundle { 317 val sqFull = Output(Bool()) 318 val lqFull = Output(Bool()) 319 val dcacheMSHRFull = Output(Bool()) 320 } 321 val debug_ls = new DebugLSIO 322 val l2_hint = Input(Valid(new L2ToL1Hint())) 323 val l2PfqBusy = Input(Bool()) 324 val l2_tlb_req = Flipped(new TlbRequestIO(nRespDups = 2)) 325 val l2_pmp_resp = new PMPRespBundle 326 val l2_flush_done = Input(Bool()) 327 328 val debugTopDown = new Bundle { 329 val robHeadVaddr = Flipped(Valid(UInt(VAddrBits.W))) 330 val toCore = new MemCoreTopDownIO 331 } 332 val debugRolling = Flipped(new RobDebugRollingIO) 333 334 // All the signals from/to frontend/backend to/from bus will go through MemBlock 335 val fromTopToBackend = Input(new Bundle { 336 val msiInfo = ValidIO(UInt(soc.IMSICParams.MSI_INFO_WIDTH.W)) 337 val clintTime = ValidIO(UInt(64.W)) 338 }) 339 val inner_hartId = Output(UInt(hartIdLen.W)) 340 val inner_reset_vector = Output(UInt(PAddrBits.W)) 341 val outer_reset_vector = Input(UInt(PAddrBits.W)) 342 val outer_cpu_halt = Output(Bool()) 343 val outer_l2_flush_en = Output(Bool()) 344 val outer_power_down_en = Output(Bool()) 345 val outer_cpu_critical_error = Output(Bool()) 346 val outer_msi_ack = Output(Bool()) 347 val inner_beu_errors_icache = Input(new L1BusErrorUnitInfo) 348 val outer_beu_errors_icache = Output(new L1BusErrorUnitInfo) 349 val inner_hc_perfEvents = Output(Vec(numPCntHc * coreParams.L2NBanks + 1, new PerfEvent)) 350 val outer_hc_perfEvents = Input(Vec(numPCntHc * coreParams.L2NBanks + 1, new PerfEvent)) 351 352 // reset signals of frontend & backend are generated in memblock 353 val reset_backend = Output(Reset()) 354 // Reset singal from frontend. 355 val resetInFrontendBypass = new Bundle{ 356 val fromFrontend = Input(Bool()) 357 val toL2Top = Output(Bool()) 358 } 359 val traceCoreInterfaceBypass = new Bundle{ 360 val fromBackend = Flipped(new TraceCoreInterface(hasOffset = true)) 361 val toL2Top = new TraceCoreInterface 362 } 363 364 val topDownInfo = new Bundle { 365 val fromL2Top = Input(new TopDownFromL2Top) 366 val toBackend = Flipped(new TopDownInfo) 367 } 368 val sramTestBypass = new Bundle() { 369 val fromL2Top = new Bundle() { 370 val mbist = Option.when(hasMbist)(Input(new SramMbistBundle)) 371 val mbistReset = Option.when(hasMbist)(Input(new DFTResetSignals())) 372 val sramCtl = Option.when(hasSramCtl)(Input(UInt(64.W))) 373 } 374 val toFrontend = new Bundle() { 375 val mbist = Option.when(hasMbist)(Output(new SramMbistBundle)) 376 val mbistReset = Option.when(hasMbist)(Output(new DFTResetSignals())) 377 val sramCtl = Option.when(hasSramCtl)(Output(UInt(64.W))) 378 } 379 val toBackend = new Bundle() { 380 val mbist = Option.when(hasMbist)(Output(new SramMbistBundle)) 381 val mbistReset = Option.when(hasMbist)(Output(new DFTResetSignals())) 382 } 383 } 384 }) 385 386 io.mem_to_ooo.writeBack.zipWithIndex.foreach{ case (wb, i) => 387 PerfCCT.updateInstPos(wb.bits.uop.debug_seqNum, PerfCCT.InstPos.AtBypassVal.id.U, wb.valid, clock, reset) 388 } 389 390 dontTouch(io.inner_hartId) 391 dontTouch(io.inner_reset_vector) 392 dontTouch(io.outer_reset_vector) 393 dontTouch(io.outer_cpu_halt) 394 dontTouch(io.outer_l2_flush_en) 395 dontTouch(io.outer_power_down_en) 396 dontTouch(io.outer_cpu_critical_error) 397 dontTouch(io.inner_beu_errors_icache) 398 dontTouch(io.outer_beu_errors_icache) 399 dontTouch(io.inner_hc_perfEvents) 400 dontTouch(io.outer_hc_perfEvents) 401 402 val redirect = RegNextWithEnable(io.redirect) 403 404 private val dcache = outer.dcache.module 405 val uncache = outer.uncache.module 406 407 //val delayedDcacheRefill = RegNext(dcache.io.lsu.lsq) 408 409 val csrCtrl = DelayN(io.ooo_to_mem.csrCtrl, 2) 410 dcache.io.l2_pf_store_only := RegNext(io.ooo_to_mem.csrCtrl.pf_ctrl.l2_pf_store_only, false.B) 411 io.error <> DelayNWithValid(dcache.io.error, 2) 412 when(!csrCtrl.cache_error_enable){ 413 io.error.bits.report_to_beu := false.B 414 io.error.valid := false.B 415 } 416 417 val loadUnits = Seq.fill(LduCnt)(Module(new LoadUnit)) 418 val storeUnits = Seq.fill(StaCnt)(Module(new StoreUnit)) 419 val stdExeUnits = Seq.fill(StdCnt)(Module(new MemExeUnit(backendParams.memSchdParams.get.issueBlockParams.find(_.StdCnt != 0).get.exuBlockParams.head))) 420 val hybridUnits = Seq.fill(HyuCnt)(Module(new HybridUnit)) // Todo: replace it with HybridUnit 421 val stData = stdExeUnits.map(_.io.out) 422 val exeUnits = loadUnits ++ storeUnits 423 424 // The number of vector load/store units is decoupled with the number of load/store units 425 val vlSplit = Seq.fill(VlduCnt)(Module(new VLSplitImp)) 426 val vsSplit = Seq.fill(VstuCnt)(Module(new VSSplitImp)) 427 val vlMergeBuffer = Module(new VLMergeBufferImp) 428 val vsMergeBuffer = Seq.fill(VstuCnt)(Module(new VSMergeBufferImp)) 429 val vSegmentUnit = Module(new VSegmentUnit) 430 val vfofBuffer = Module(new VfofBuffer) 431 432 // misalign Buffer 433 val loadMisalignBuffer = Module(new LoadMisalignBuffer) 434 val storeMisalignBuffer = Module(new StoreMisalignBuffer) 435 436 val l1_pf_req = Wire(Decoupled(new L1PrefetchReq())) 437 dcache.io.sms_agt_evict_req.ready := false.B 438 val l1D_pf_enable = GatedRegNextN(io.ooo_to_mem.csrCtrl.pf_ctrl.l1D_pf_enable, 2, Some(false.B)) 439 val prefetcherOpt: Option[BasePrefecher] = coreParams.prefetcher.map { 440 case _: SMSParams => 441 val sms = Module(new SMSPrefetcher()) 442 val enableSMS = Constantin.createRecord(s"enableSMS$hartId", initValue = true) 443 // constantinCtrl && master switch csrCtrl && single switch csrCtrl 444 sms.io.enable := enableSMS && l1D_pf_enable && 445 GatedRegNextN(io.ooo_to_mem.csrCtrl.pf_ctrl.l2_pf_recv_enable, 2, Some(false.B)) 446 sms.io_agt_en := GatedRegNextN(io.ooo_to_mem.csrCtrl.pf_ctrl.l1D_pf_enable_agt, 2, Some(false.B)) 447 sms.io_pht_en := GatedRegNextN(io.ooo_to_mem.csrCtrl.pf_ctrl.l1D_pf_enable_pht, 2, Some(false.B)) 448 sms.io_act_threshold := GatedRegNextN(io.ooo_to_mem.csrCtrl.pf_ctrl.l1D_pf_active_threshold, 2, Some(12.U)) 449 sms.io_act_stride := GatedRegNextN(io.ooo_to_mem.csrCtrl.pf_ctrl.l1D_pf_active_stride, 2, Some(30.U)) 450 sms.io_stride_en := false.B 451 sms.io_dcache_evict <> dcache.io.sms_agt_evict_req 452 val mbistSmsPl = MbistPipeline.PlaceMbistPipeline(1, "MbistPipeSms", hasMbist) 453 sms 454 } 455 prefetcherOpt.foreach{ pf => pf.io.l1_req.ready := false.B } 456 val hartId = p(XSCoreParamsKey).HartId 457 val l1PrefetcherOpt: Option[BasePrefecher] = coreParams.prefetcher.map { 458 case _ => 459 val l1Prefetcher = Module(new L1Prefetcher()) 460 val enableL1StreamPrefetcher = Constantin.createRecord(s"enableL1StreamPrefetcher$hartId", initValue = true) 461 // constantinCtrl && master switch csrCtrl && single switch csrCtrl 462 l1Prefetcher.io.enable := enableL1StreamPrefetcher && l1D_pf_enable && 463 GatedRegNextN(io.ooo_to_mem.csrCtrl.pf_ctrl.l1D_pf_enable_stride, 2, Some(false.B)) 464 l1Prefetcher.pf_ctrl <> dcache.io.pf_ctrl 465 l1Prefetcher.l2PfqBusy := io.l2PfqBusy 466 467 // stride will train on miss or prefetch hit 468 for (i <- 0 until LduCnt) { 469 val source = loadUnits(i).io.prefetch_train_l1 470 l1Prefetcher.stride_train(i).valid := source.valid && source.bits.isFirstIssue && ( 471 source.bits.miss || isFromStride(source.bits.meta_prefetch) 472 ) 473 l1Prefetcher.stride_train(i).bits := source.bits 474 val loadPc = RegNext(io.ooo_to_mem.issueLda(i).bits.uop.pc) // for s1 475 l1Prefetcher.stride_train(i).bits.uop.pc := Mux( 476 loadUnits(i).io.s2_ptr_chasing, 477 RegEnable(loadPc, loadUnits(i).io.s2_prefetch_spec), 478 RegEnable(RegEnable(loadPc, loadUnits(i).io.s1_prefetch_spec), loadUnits(i).io.s2_prefetch_spec) 479 ) 480 } 481 for (i <- 0 until HyuCnt) { 482 val source = hybridUnits(i).io.prefetch_train_l1 483 l1Prefetcher.stride_train.drop(LduCnt)(i).valid := source.valid && source.bits.isFirstIssue && ( 484 source.bits.miss || isFromStride(source.bits.meta_prefetch) 485 ) 486 l1Prefetcher.stride_train.drop(LduCnt)(i).bits := source.bits 487 l1Prefetcher.stride_train.drop(LduCnt)(i).bits.uop.pc := Mux( 488 hybridUnits(i).io.ldu_io.s2_ptr_chasing, 489 RegNext(io.ooo_to_mem.hybridPc(i)), 490 RegNext(RegNext(io.ooo_to_mem.hybridPc(i))) 491 ) 492 } 493 l1Prefetcher 494 } 495 // load prefetch to l1 Dcache 496 l1PrefetcherOpt match { 497 case Some(pf) => l1_pf_req <> Pipeline(in = pf.io.l1_req, depth = 1, pipe = false, name = Some("pf_queue_to_ldu_reg")) 498 case None => 499 l1_pf_req.valid := false.B 500 l1_pf_req.bits := DontCare 501 } 502 val pf_train_on_hit = RegNextN(io.ooo_to_mem.csrCtrl.pf_ctrl.l1D_pf_train_on_hit, 2, Some(true.B)) 503 504 loadUnits.zipWithIndex.map(x => x._1.suggestName("LoadUnit_"+x._2)) 505 storeUnits.zipWithIndex.map(x => x._1.suggestName("StoreUnit_"+x._2)) 506 hybridUnits.zipWithIndex.map(x => x._1.suggestName("HybridUnit_"+x._2)) 507 val atomicsUnit = Module(new AtomicsUnit) 508 509 510 val ldaExeWbReqs = Wire(Vec(LduCnt, Decoupled(new MemExuOutput))) 511 // atomicsUnit will overwrite the source from ldu if it is about to writeback 512 val atomicWritebackOverride = Mux( 513 atomicsUnit.io.out.valid, 514 atomicsUnit.io.out.bits, 515 loadUnits(AtomicWBPort).io.ldout.bits 516 ) 517 ldaExeWbReqs(AtomicWBPort).valid := atomicsUnit.io.out.valid || loadUnits(AtomicWBPort).io.ldout.valid 518 ldaExeWbReqs(AtomicWBPort).bits := atomicWritebackOverride 519 atomicsUnit.io.out.ready := ldaExeWbReqs(AtomicWBPort).ready 520 loadUnits(AtomicWBPort).io.ldout.ready := ldaExeWbReqs(AtomicWBPort).ready 521 522 val st_data_atomics = Seq.tabulate(StdCnt)(i => 523 stData(i).valid && FuType.storeIsAMO(stData(i).bits.uop.fuType) 524 ) 525 526 // misalignBuffer will overwrite the source from ldu if it is about to writeback 527 val misalignWritebackOverride = Mux( 528 loadUnits(MisalignWBPort).io.ldout.valid, 529 loadUnits(MisalignWBPort).io.ldout.bits, 530 loadMisalignBuffer.io.writeBack.bits 531 ) 532 ldaExeWbReqs(MisalignWBPort).valid := loadMisalignBuffer.io.writeBack.valid || loadUnits(MisalignWBPort).io.ldout.valid 533 ldaExeWbReqs(MisalignWBPort).bits := misalignWritebackOverride 534 loadMisalignBuffer.io.writeBack.ready := ldaExeWbReqs(MisalignWBPort).ready && !loadUnits(MisalignWBPort).io.ldout.valid 535 loadMisalignBuffer.io.loadOutValid := loadUnits(MisalignWBPort).io.ldout.valid 536 loadMisalignBuffer.io.loadVecOutValid := loadUnits(MisalignWBPort).io.vecldout.valid 537 loadUnits(MisalignWBPort).io.ldout.ready := ldaExeWbReqs(MisalignWBPort).ready 538 ldaExeWbReqs(MisalignWBPort).bits.isFromLoadUnit := loadUnits(MisalignWBPort).io.ldout.bits.isFromLoadUnit || loadMisalignBuffer.io.writeBack.valid 539 540 // loadUnit will overwrite the source from uncache if it is about to writeback 541 ldaExeWbReqs(UncacheWBPort) <> loadUnits(UncacheWBPort).io.ldout 542 io.mem_to_ooo.writebackLda <> ldaExeWbReqs 543 io.mem_to_ooo.writebackSta <> storeUnits.map(_.io.stout) 544 io.mem_to_ooo.writebackStd.zip(stdExeUnits).foreach {x => 545 x._1.bits := x._2.io.out.bits 546 // AMOs do not need to write back std now. 547 x._1.valid := x._2.io.out.fire && !FuType.storeIsAMO(x._2.io.out.bits.uop.fuType) 548 } 549 io.mem_to_ooo.writebackHyuLda <> hybridUnits.map(_.io.ldout) 550 io.mem_to_ooo.writebackHyuSta <> hybridUnits.map(_.io.stout) 551 io.mem_to_ooo.otherFastWakeup := DontCare 552 io.mem_to_ooo.otherFastWakeup.drop(HyuCnt).take(LduCnt).zip(loadUnits.map(_.io.fast_uop)).foreach{case(a,b)=> a := b} 553 io.mem_to_ooo.otherFastWakeup.take(HyuCnt).zip(hybridUnits.map(_.io.ldu_io.fast_uop)).foreach{case(a,b)=> a:=b} 554 val stOut = io.mem_to_ooo.writebackSta ++ io.mem_to_ooo.writebackHyuSta 555 556 // prefetch to l1 req 557 // Stream's confidence is always 1 558 // (LduCnt + HyuCnt) l1_pf_reqs ? 559 loadUnits.foreach(load_unit => { 560 load_unit.io.prefetch_req.valid <> l1_pf_req.valid 561 load_unit.io.prefetch_req.bits <> l1_pf_req.bits 562 }) 563 564 hybridUnits.foreach(hybrid_unit => { 565 hybrid_unit.io.ldu_io.prefetch_req.valid <> l1_pf_req.valid 566 hybrid_unit.io.ldu_io.prefetch_req.bits <> l1_pf_req.bits 567 }) 568 569 // NOTE: loadUnits(0) has higher bank conflict and miss queue arb priority than loadUnits(1) and loadUnits(2) 570 // when loadUnits(1)/loadUnits(2) stage 0 is busy, hw prefetch will never use that pipeline 571 val LowConfPorts = if (LduCnt == 2) Seq(1) else if (LduCnt == 3) Seq(1, 2) else Seq(0) 572 LowConfPorts.map{case i => loadUnits(i).io.prefetch_req.bits.confidence := 0.U} 573 hybridUnits.foreach(hybrid_unit => { hybrid_unit.io.ldu_io.prefetch_req.bits.confidence := 0.U }) 574 575 val canAcceptHighConfPrefetch = loadUnits.map(_.io.canAcceptHighConfPrefetch) ++ 576 hybridUnits.map(_.io.canAcceptLowConfPrefetch) 577 val canAcceptLowConfPrefetch = loadUnits.map(_.io.canAcceptLowConfPrefetch) ++ 578 hybridUnits.map(_.io.canAcceptLowConfPrefetch) 579 l1_pf_req.ready := (0 until LduCnt + HyuCnt).map{ 580 case i => { 581 if (LowConfPorts.contains(i)) { 582 loadUnits(i).io.canAcceptLowConfPrefetch 583 } else { 584 Mux(l1_pf_req.bits.confidence === 1.U, canAcceptHighConfPrefetch(i), canAcceptLowConfPrefetch(i)) 585 } 586 } 587 }.reduce(_ || _) 588 589 // l1 pf fuzzer interface 590 val DebugEnableL1PFFuzzer = false 591 if (DebugEnableL1PFFuzzer) { 592 // l1 pf req fuzzer 593 val fuzzer = Module(new L1PrefetchFuzzer()) 594 fuzzer.io.vaddr := DontCare 595 fuzzer.io.paddr := DontCare 596 597 // override load_unit prefetch_req 598 loadUnits.foreach(load_unit => { 599 load_unit.io.prefetch_req.valid <> fuzzer.io.req.valid 600 load_unit.io.prefetch_req.bits <> fuzzer.io.req.bits 601 }) 602 603 // override hybrid_unit prefetch_req 604 hybridUnits.foreach(hybrid_unit => { 605 hybrid_unit.io.ldu_io.prefetch_req.valid <> fuzzer.io.req.valid 606 hybrid_unit.io.ldu_io.prefetch_req.bits <> fuzzer.io.req.bits 607 }) 608 609 fuzzer.io.req.ready := l1_pf_req.ready 610 } 611 612 // TODO: fast load wakeup 613 val lsq = Module(new LsqWrapper) 614 val sbuffer = Module(new Sbuffer) 615 // if you wants to stress test dcache store, use FakeSbuffer 616 // val sbuffer = Module(new FakeSbuffer) // out of date now 617 io.mem_to_ooo.stIssuePtr := lsq.io.issuePtrExt 618 619 dcache.io.hartId := io.hartId 620 lsq.io.hartId := io.hartId 621 sbuffer.io.hartId := io.hartId 622 atomicsUnit.io.hartId := io.hartId 623 624 dcache.io.lqEmpty := lsq.io.lqEmpty 625 626 // load/store prefetch to l2 cache 627 prefetcherOpt.foreach(sms_pf => { 628 l1PrefetcherOpt.foreach(l1_pf => { 629 val sms_pf_to_l2 = DelayNWithValid(sms_pf.io.l2_req, 2) 630 val l1_pf_to_l2 = DelayNWithValid(l1_pf.io.l2_req, 2) 631 632 outer.l2_pf_sender_opt.get.out.head._1.addr_valid := sms_pf_to_l2.valid || l1_pf_to_l2.valid 633 outer.l2_pf_sender_opt.get.out.head._1.addr := Mux(l1_pf_to_l2.valid, l1_pf_to_l2.bits.addr, sms_pf_to_l2.bits.addr) 634 outer.l2_pf_sender_opt.get.out.head._1.pf_source := Mux(l1_pf_to_l2.valid, l1_pf_to_l2.bits.source, sms_pf_to_l2.bits.source) 635 outer.l2_pf_sender_opt.get.out.head._1.l2_pf_en := RegNextN(io.ooo_to_mem.csrCtrl.pf_ctrl.l2_pf_enable, 2, Some(true.B)) 636 637 val l2_trace = Wire(new LoadPfDbBundle) 638 l2_trace.paddr := outer.l2_pf_sender_opt.get.out.head._1.addr 639 val table = ChiselDB.createTable(s"L2PrefetchTrace$hartId", new LoadPfDbBundle, basicDB = false) 640 table.log(l2_trace, l1_pf_to_l2.valid, "StreamPrefetchTrace", clock, reset) 641 table.log(l2_trace, !l1_pf_to_l2.valid && sms_pf_to_l2.valid, "L2PrefetchTrace", clock, reset) 642 643 val l1_pf_to_l3 = ValidIODelay(l1_pf.io.l3_req, 4) 644 outer.l3_pf_sender_opt.foreach(_.out.head._1.addr_valid := l1_pf_to_l3.valid) 645 outer.l3_pf_sender_opt.foreach(_.out.head._1.addr := l1_pf_to_l3.bits) 646 outer.l3_pf_sender_opt.foreach(_.out.head._1.l2_pf_en := RegNextN(io.ooo_to_mem.csrCtrl.pf_ctrl.l2_pf_enable, 4, Some(true.B))) 647 648 val l3_trace = Wire(new LoadPfDbBundle) 649 l3_trace.paddr := outer.l3_pf_sender_opt.map(_.out.head._1.addr).getOrElse(0.U) 650 val l3_table = ChiselDB.createTable(s"L3PrefetchTrace$hartId", new LoadPfDbBundle, basicDB = false) 651 l3_table.log(l3_trace, l1_pf_to_l3.valid, "StreamPrefetchTrace", clock, reset) 652 653 XSPerfAccumulate("prefetch_fire_l2", outer.l2_pf_sender_opt.get.out.head._1.addr_valid) 654 XSPerfAccumulate("prefetch_fire_l3", outer.l3_pf_sender_opt.map(_.out.head._1.addr_valid).getOrElse(false.B)) 655 XSPerfAccumulate("l1pf_fire_l2", l1_pf_to_l2.valid) 656 XSPerfAccumulate("sms_fire_l2", !l1_pf_to_l2.valid && sms_pf_to_l2.valid) 657 XSPerfAccumulate("sms_block_by_l1pf", l1_pf_to_l2.valid && sms_pf_to_l2.valid) 658 }) 659 }) 660 661 // ptw 662 val sfence = RegNext(RegNext(io.ooo_to_mem.sfence)) 663 val tlbcsr = RegNext(RegNext(io.ooo_to_mem.tlbCsr)) 664 private val ptw = outer.ptw.module 665 private val ptw_to_l2_buffer = outer.ptw_to_l2_buffer.module 666 private val l1d_to_l2_buffer = outer.l1d_to_l2_buffer.module 667 ptw.io.hartId := io.hartId 668 ptw.io.sfence <> sfence 669 ptw.io.csr.tlb <> tlbcsr 670 ptw.io.csr.distribute_csr <> csrCtrl.distribute_csr 671 672 val perfEventsPTW = if (!coreParams.softPTW) { 673 ptw.getPerfEvents 674 } else { 675 Seq() 676 } 677 678 // dtlb 679 val dtlb_ld_tlb_ld = Module(new TLBNonBlock(LduCnt + HyuCnt + 1, 2, ldtlbParams)) 680 val dtlb_st_tlb_st = Module(new TLBNonBlock(StaCnt, 1, sttlbParams)) 681 val dtlb_prefetch_tlb_prefetch = Module(new TLBNonBlock(2, 2, pftlbParams)) 682 val dtlb_ld = Seq(dtlb_ld_tlb_ld.io) 683 val dtlb_st = Seq(dtlb_st_tlb_st.io) 684 val dtlb_prefetch = Seq(dtlb_prefetch_tlb_prefetch.io) 685 /* tlb vec && constant variable */ 686 val dtlb = dtlb_ld ++ dtlb_st ++ dtlb_prefetch 687 val (dtlb_ld_idx, dtlb_st_idx, dtlb_pf_idx) = (0, 1, 2) 688 val TlbSubSizeVec = Seq(LduCnt + HyuCnt + 1, StaCnt, 2) // (load + hyu + stream pf, store, sms+l2bop) 689 val DTlbSize = TlbSubSizeVec.sum 690 val TlbStartVec = TlbSubSizeVec.scanLeft(0)(_ + _).dropRight(1) 691 val TlbEndVec = TlbSubSizeVec.scanLeft(0)(_ + _).drop(1) 692 693 val ptwio = Wire(new VectorTlbPtwIO(DTlbSize)) 694 val dtlb_reqs = dtlb.map(_.requestor).flatten 695 val dtlb_pmps = dtlb.map(_.pmp).flatten 696 dtlb.map(_.hartId := io.hartId) 697 dtlb.map(_.sfence := sfence) 698 dtlb.map(_.csr := tlbcsr) 699 dtlb.map(_.flushPipe.map(a => a := false.B)) // non-block doesn't need 700 dtlb.map(_.redirect := redirect) 701 if (refillBothTlb) { 702 require(ldtlbParams.outReplace == sttlbParams.outReplace) 703 require(ldtlbParams.outReplace == hytlbParams.outReplace) 704 require(ldtlbParams.outReplace == pftlbParams.outReplace) 705 require(ldtlbParams.outReplace) 706 707 val replace = Module(new TlbReplace(DTlbSize, ldtlbParams)) 708 replace.io.apply_sep(dtlb_ld.map(_.replace) ++ dtlb_st.map(_.replace) ++ dtlb_prefetch.map(_.replace), ptwio.resp.bits.data.s1.entry.tag) 709 } else { 710 // TODO: there will be bugs in TlbReplace when outReplace enable, since the order of Hyu is not right. 711 if (ldtlbParams.outReplace) { 712 val replace_ld = Module(new TlbReplace(LduCnt + 1, ldtlbParams)) 713 replace_ld.io.apply_sep(dtlb_ld.map(_.replace), ptwio.resp.bits.data.s1.entry.tag) 714 } 715 if (hytlbParams.outReplace) { 716 val replace_hy = Module(new TlbReplace(HyuCnt, hytlbParams)) 717 replace_hy.io.apply_sep(dtlb_ld.map(_.replace), ptwio.resp.bits.data.s1.entry.tag) 718 } 719 if (sttlbParams.outReplace) { 720 val replace_st = Module(new TlbReplace(StaCnt, sttlbParams)) 721 replace_st.io.apply_sep(dtlb_st.map(_.replace), ptwio.resp.bits.data.s1.entry.tag) 722 } 723 if (pftlbParams.outReplace) { 724 val replace_pf = Module(new TlbReplace(2, pftlbParams)) 725 replace_pf.io.apply_sep(dtlb_prefetch.map(_.replace), ptwio.resp.bits.data.s1.entry.tag) 726 } 727 } 728 729 val ptw_resp_next = RegEnable(ptwio.resp.bits, ptwio.resp.valid) 730 val ptw_resp_v = RegNext(ptwio.resp.valid && !(sfence.valid && tlbcsr.satp.changed && tlbcsr.vsatp.changed && tlbcsr.hgatp.changed), init = false.B) 731 ptwio.resp.ready := true.B 732 733 val tlbreplay = WireInit(VecInit(Seq.fill(LdExuCnt)(false.B))) 734 val tlbreplay_reg = GatedValidRegNext(tlbreplay) 735 val dtlb_ld0_tlbreplay_reg = GatedValidRegNext(dtlb_ld(0).tlbreplay) 736 737 if (backendParams.debugEn){ dontTouch(tlbreplay) } 738 739 for (i <- 0 until LdExuCnt) { 740 tlbreplay(i) := dtlb_ld(0).ptw.req(i).valid && ptw_resp_next.vector(0) && ptw_resp_v && 741 ptw_resp_next.data.hit(dtlb_ld(0).ptw.req(i).bits.vpn, tlbcsr.satp.asid, tlbcsr.vsatp.asid, tlbcsr.hgatp.vmid, allType = true, ignoreAsid = true) 742 } 743 744 dtlb.flatMap(a => a.ptw.req) 745 .zipWithIndex 746 .foreach{ case (tlb, i) => 747 tlb.ready := ptwio.req(i).ready 748 ptwio.req(i).bits := tlb.bits 749 val vector_hit = if (refillBothTlb) Cat(ptw_resp_next.vector).orR 750 else if (i < TlbEndVec(dtlb_ld_idx)) Cat(ptw_resp_next.vector.slice(TlbStartVec(dtlb_ld_idx), TlbEndVec(dtlb_ld_idx))).orR 751 else if (i < TlbEndVec(dtlb_st_idx)) Cat(ptw_resp_next.vector.slice(TlbStartVec(dtlb_st_idx), TlbEndVec(dtlb_st_idx))).orR 752 else Cat(ptw_resp_next.vector.slice(TlbStartVec(dtlb_pf_idx), TlbEndVec(dtlb_pf_idx))).orR 753 ptwio.req(i).valid := tlb.valid && !(ptw_resp_v && vector_hit && ptw_resp_next.data.hit(tlb.bits.vpn, tlbcsr.satp.asid, tlbcsr.vsatp.asid, tlbcsr.hgatp.vmid, allType = true, ignoreAsid = true)) 754 } 755 dtlb.foreach(_.ptw.resp.bits := ptw_resp_next.data) 756 if (refillBothTlb) { 757 dtlb.foreach(_.ptw.resp.valid := ptw_resp_v && Cat(ptw_resp_next.vector).orR) 758 } else { 759 dtlb_ld.foreach(_.ptw.resp.valid := ptw_resp_v && Cat(ptw_resp_next.vector.slice(TlbStartVec(dtlb_ld_idx), TlbEndVec(dtlb_ld_idx))).orR) 760 dtlb_st.foreach(_.ptw.resp.valid := ptw_resp_v && Cat(ptw_resp_next.vector.slice(TlbStartVec(dtlb_st_idx), TlbEndVec(dtlb_st_idx))).orR) 761 dtlb_prefetch.foreach(_.ptw.resp.valid := ptw_resp_v && Cat(ptw_resp_next.vector.slice(TlbStartVec(dtlb_pf_idx), TlbEndVec(dtlb_pf_idx))).orR) 762 } 763 dtlb_ld.foreach(_.ptw.resp.bits.getGpa := Cat(ptw_resp_next.getGpa.take(LduCnt + HyuCnt + 1)).orR) 764 dtlb_st.foreach(_.ptw.resp.bits.getGpa := Cat(ptw_resp_next.getGpa.slice(LduCnt + HyuCnt + 1, LduCnt + HyuCnt + 1 + StaCnt)).orR) 765 dtlb_prefetch.foreach(_.ptw.resp.bits.getGpa := Cat(ptw_resp_next.getGpa.drop(LduCnt + HyuCnt + 1 + StaCnt)).orR) 766 767 val dtlbRepeater = PTWNewFilter(ldtlbParams.fenceDelay, ptwio, ptw.io.tlb(1), sfence, tlbcsr, l2tlbParams.dfilterSize) 768 val itlbRepeater3 = PTWRepeaterNB(passReady = false, itlbParams.fenceDelay, io.fetch_to_mem.itlb, ptw.io.tlb(0), sfence, tlbcsr) 769 770 lsq.io.debugTopDown.robHeadMissInDTlb := dtlbRepeater.io.rob_head_miss_in_tlb 771 772 // pmp 773 val pmp = Module(new PMP()) 774 pmp.io.distribute_csr <> csrCtrl.distribute_csr 775 776 val pmp_checkers = Seq.fill(DTlbSize)(Module(new PMPChecker(4, leaveHitMux = true))) 777 val pmp_check = pmp_checkers.map(_.io) 778 for ((p,d) <- pmp_check zip dtlb_pmps) { 779 if (HasBitmapCheck) { 780 p.apply(tlbcsr.mbmc.CMODE.asBool, tlbcsr.priv.dmode, pmp.io.pmp, pmp.io.pma, d) 781 } else { 782 p.apply(tlbcsr.priv.dmode, pmp.io.pmp, pmp.io.pma, d) 783 } 784 require(p.req.bits.size.getWidth == d.bits.size.getWidth) 785 } 786 787 for (i <- 0 until LduCnt) { 788 io.debug_ls.debugLsInfo(i) := loadUnits(i).io.debug_ls 789 } 790 for (i <- 0 until HyuCnt) { 791 io.debug_ls.debugLsInfo.drop(LduCnt)(i) := hybridUnits(i).io.ldu_io.debug_ls 792 } 793 for (i <- 0 until StaCnt) { 794 io.debug_ls.debugLsInfo.drop(LduCnt + HyuCnt)(i) := storeUnits(i).io.debug_ls 795 } 796 for (i <- 0 until HyuCnt) { 797 io.debug_ls.debugLsInfo.drop(LduCnt + HyuCnt + StaCnt)(i) := hybridUnits(i).io.stu_io.debug_ls 798 } 799 800 io.mem_to_ooo.lsTopdownInfo := loadUnits.map(_.io.lsTopdownInfo) ++ hybridUnits.map(_.io.ldu_io.lsTopdownInfo) 801 802 // trigger 803 val tdata = RegInit(VecInit(Seq.fill(TriggerNum)(0.U.asTypeOf(new MatchTriggerIO)))) 804 val tEnable = RegInit(VecInit(Seq.fill(TriggerNum)(false.B))) 805 tEnable := csrCtrl.mem_trigger.tEnableVec 806 when(csrCtrl.mem_trigger.tUpdate.valid) { 807 tdata(csrCtrl.mem_trigger.tUpdate.bits.addr) := csrCtrl.mem_trigger.tUpdate.bits.tdata 808 } 809 val triggerCanRaiseBpExp = csrCtrl.mem_trigger.triggerCanRaiseBpExp 810 val debugMode = csrCtrl.mem_trigger.debugMode 811 812 val backendTriggerTimingVec = VecInit(tdata.map(_.timing)) 813 val backendTriggerChainVec = VecInit(tdata.map(_.chain)) 814 815 XSDebug(tEnable.asUInt.orR, "Debug Mode: At least one store trigger is enabled\n") 816 for (j <- 0 until TriggerNum) 817 PrintTriggerInfo(tEnable(j), tdata(j)) 818 819 // The segment instruction is executed atomically. 820 // After the segment instruction directive starts executing, no other instructions should be executed. 821 val vSegmentFlag = RegInit(false.B) 822 823 when(GatedValidRegNext(vSegmentUnit.io.in.fire)) { 824 vSegmentFlag := true.B 825 }.elsewhen(GatedValidRegNext(vSegmentUnit.io.uopwriteback.valid)) { 826 vSegmentFlag := false.B 827 } 828 829 val misalign_allow_spec = RegInit(true.B) 830 val ldu_rollback_with_misalign_nack = loadUnits.map(ldu => 831 ldu.io.lsq.ldin.bits.isFrmMisAlignBuf && ldu.io.lsq.ldin.bits.rep_info.rar_nack && ldu.io.rollback.valid 832 ).reduce(_ || _) 833 when (ldu_rollback_with_misalign_nack) { 834 misalign_allow_spec := false.B 835 } .elsewhen(lsq.io.rarValidCount < (LoadQueueRARSize - 4).U) { 836 misalign_allow_spec := true.B 837 } 838 839 // LoadUnit 840 val correctMissTrain = Constantin.createRecord(s"CorrectMissTrain$hartId", initValue = false) 841 842 for (i <- 0 until LduCnt) { 843 loadUnits(i).io.redirect <> redirect 844 loadUnits(i).io.misalign_allow_spec := misalign_allow_spec 845 846 // get input form dispatch 847 loadUnits(i).io.ldin <> io.ooo_to_mem.issueLda(i) 848 loadUnits(i).io.feedback_slow <> io.mem_to_ooo.ldaIqFeedback(i).feedbackSlow 849 io.mem_to_ooo.ldaIqFeedback(i).feedbackFast := DontCare 850 loadUnits(i).io.correctMissTrain := correctMissTrain 851 io.mem_to_ooo.ldCancel.drop(HyuCnt)(i) := loadUnits(i).io.ldCancel 852 io.mem_to_ooo.wakeup.drop(HyuCnt)(i) := loadUnits(i).io.wakeup 853 854 // vector 855 if (i < VlduCnt) { 856 loadUnits(i).io.vecldout.ready := false.B 857 } else { 858 loadUnits(i).io.vecldin.valid := false.B 859 loadUnits(i).io.vecldin.bits := DontCare 860 loadUnits(i).io.vecldout.ready := false.B 861 } 862 863 // fast replay 864 loadUnits(i).io.fast_rep_in <> loadUnits(i).io.fast_rep_out 865 866 // SoftPrefetch to frontend (prefetch.i) 867 loadUnits(i).io.ifetchPrefetch <> io.ifetchPrefetch(i) 868 869 // dcache access 870 loadUnits(i).io.dcache <> dcache.io.lsu.load(i) 871 if(i == 0){ 872 vSegmentUnit.io.rdcache := DontCare 873 dcache.io.lsu.load(i).req.valid := loadUnits(i).io.dcache.req.valid || vSegmentUnit.io.rdcache.req.valid 874 dcache.io.lsu.load(i).req.bits := Mux1H(Seq( 875 vSegmentUnit.io.rdcache.req.valid -> vSegmentUnit.io.rdcache.req.bits, 876 loadUnits(i).io.dcache.req.valid -> loadUnits(i).io.dcache.req.bits 877 )) 878 vSegmentUnit.io.rdcache.req.ready := dcache.io.lsu.load(i).req.ready 879 } 880 881 // Dcache requests must also be preempted by the segment. 882 when(vSegmentFlag){ 883 loadUnits(i).io.dcache.req.ready := false.B // Dcache is preempted. 884 885 dcache.io.lsu.load(0).pf_source := vSegmentUnit.io.rdcache.pf_source 886 dcache.io.lsu.load(0).s1_paddr_dup_lsu := vSegmentUnit.io.rdcache.s1_paddr_dup_lsu 887 dcache.io.lsu.load(0).s1_paddr_dup_dcache := vSegmentUnit.io.rdcache.s1_paddr_dup_dcache 888 dcache.io.lsu.load(0).s1_kill := vSegmentUnit.io.rdcache.s1_kill 889 dcache.io.lsu.load(0).s2_kill := vSegmentUnit.io.rdcache.s2_kill 890 dcache.io.lsu.load(0).s0_pc := vSegmentUnit.io.rdcache.s0_pc 891 dcache.io.lsu.load(0).s1_pc := vSegmentUnit.io.rdcache.s1_pc 892 dcache.io.lsu.load(0).s2_pc := vSegmentUnit.io.rdcache.s2_pc 893 dcache.io.lsu.load(0).is128Req := vSegmentUnit.io.rdcache.is128Req 894 }.otherwise { 895 loadUnits(i).io.dcache.req.ready := dcache.io.lsu.load(i).req.ready 896 897 dcache.io.lsu.load(0).pf_source := loadUnits(0).io.dcache.pf_source 898 dcache.io.lsu.load(0).s1_paddr_dup_lsu := loadUnits(0).io.dcache.s1_paddr_dup_lsu 899 dcache.io.lsu.load(0).s1_paddr_dup_dcache := loadUnits(0).io.dcache.s1_paddr_dup_dcache 900 dcache.io.lsu.load(0).s1_kill := loadUnits(0).io.dcache.s1_kill 901 dcache.io.lsu.load(0).s2_kill := loadUnits(0).io.dcache.s2_kill 902 dcache.io.lsu.load(0).s0_pc := loadUnits(0).io.dcache.s0_pc 903 dcache.io.lsu.load(0).s1_pc := loadUnits(0).io.dcache.s1_pc 904 dcache.io.lsu.load(0).s2_pc := loadUnits(0).io.dcache.s2_pc 905 dcache.io.lsu.load(0).is128Req := loadUnits(0).io.dcache.is128Req 906 } 907 908 // forward 909 loadUnits(i).io.lsq.forward <> lsq.io.forward(i) 910 loadUnits(i).io.sbuffer <> sbuffer.io.forward(i) 911 loadUnits(i).io.ubuffer <> uncache.io.forward(i) 912 loadUnits(i).io.tl_d_channel := dcache.io.lsu.forward_D(i) 913 loadUnits(i).io.forward_mshr <> dcache.io.lsu.forward_mshr(i) 914 // ld-ld violation check 915 loadUnits(i).io.lsq.ldld_nuke_query <> lsq.io.ldu.ldld_nuke_query(i) 916 loadUnits(i).io.lsq.stld_nuke_query <> lsq.io.ldu.stld_nuke_query(i) 917 // loadqueue old ptr 918 loadUnits(i).io.lsq.lqDeqPtr := lsq.io.lqDeqPtr 919 loadUnits(i).io.csrCtrl <> csrCtrl 920 // dcache refill req 921 // loadUnits(i).io.refill <> delayedDcacheRefill 922 // dtlb 923 loadUnits(i).io.tlb <> dtlb_reqs.take(LduCnt)(i) 924 if(i == 0 ){ // port 0 assign to vsegmentUnit 925 val vsegmentDtlbReqValid = vSegmentUnit.io.dtlb.req.valid // segment tlb resquest need to delay 1 cycle 926 dtlb_reqs.take(LduCnt)(i).req.valid := loadUnits(i).io.tlb.req.valid || RegNext(vsegmentDtlbReqValid) 927 vSegmentUnit.io.dtlb.req.ready := dtlb_reqs.take(LduCnt)(i).req.ready 928 dtlb_reqs.take(LduCnt)(i).req.bits := ParallelPriorityMux(Seq( 929 RegNext(vsegmentDtlbReqValid) -> RegEnable(vSegmentUnit.io.dtlb.req.bits, vsegmentDtlbReqValid), 930 loadUnits(i).io.tlb.req.valid -> loadUnits(i).io.tlb.req.bits 931 )) 932 } 933 // pmp 934 loadUnits(i).io.pmp <> pmp_check(i).resp 935 // st-ld violation query 936 val stld_nuke_query = storeUnits.map(_.io.stld_nuke_query) ++ hybridUnits.map(_.io.stu_io.stld_nuke_query) 937 for (s <- 0 until StorePipelineWidth) { 938 loadUnits(i).io.stld_nuke_query(s) := stld_nuke_query(s) 939 } 940 loadUnits(i).io.lq_rep_full <> lsq.io.lq_rep_full 941 // load prefetch train 942 prefetcherOpt.foreach(pf => { 943 // sms will train on all miss load sources 944 val source = loadUnits(i).io.prefetch_train 945 pf.io.ld_in(i).valid := Mux(pf_train_on_hit, 946 source.valid, 947 source.valid && source.bits.isFirstIssue && source.bits.miss 948 ) 949 pf.io.ld_in(i).bits := source.bits 950 val loadPc = RegNext(io.ooo_to_mem.issueLda(i).bits.uop.pc) // for s1 951 pf.io.ld_in(i).bits.uop.pc := Mux( 952 loadUnits(i).io.s2_ptr_chasing, 953 RegEnable(loadPc, loadUnits(i).io.s2_prefetch_spec), 954 RegEnable(RegEnable(loadPc, loadUnits(i).io.s1_prefetch_spec), loadUnits(i).io.s2_prefetch_spec) 955 ) 956 }) 957 l1PrefetcherOpt.foreach(pf => { 958 // stream will train on all load sources 959 val source = loadUnits(i).io.prefetch_train_l1 960 pf.io.ld_in(i).valid := source.valid && source.bits.isFirstIssue 961 pf.io.ld_in(i).bits := source.bits 962 }) 963 964 // load to load fast forward: load(i) prefers data(i) 965 val l2l_fwd_out = loadUnits.map(_.io.l2l_fwd_out) ++ hybridUnits.map(_.io.ldu_io.l2l_fwd_out) 966 val fastPriority = (i until LduCnt + HyuCnt) ++ (0 until i) 967 val fastValidVec = fastPriority.map(j => l2l_fwd_out(j).valid) 968 val fastDataVec = fastPriority.map(j => l2l_fwd_out(j).data) 969 val fastErrorVec = fastPriority.map(j => l2l_fwd_out(j).dly_ld_err) 970 val fastMatchVec = fastPriority.map(j => io.ooo_to_mem.loadFastMatch(i)(j)) 971 loadUnits(i).io.l2l_fwd_in.valid := VecInit(fastValidVec).asUInt.orR 972 loadUnits(i).io.l2l_fwd_in.data := ParallelPriorityMux(fastValidVec, fastDataVec) 973 loadUnits(i).io.l2l_fwd_in.dly_ld_err := ParallelPriorityMux(fastValidVec, fastErrorVec) 974 val fastMatch = ParallelPriorityMux(fastValidVec, fastMatchVec) 975 loadUnits(i).io.ld_fast_match := fastMatch 976 loadUnits(i).io.ld_fast_imm := io.ooo_to_mem.loadFastImm(i) 977 loadUnits(i).io.ld_fast_fuOpType := io.ooo_to_mem.loadFastFuOpType(i) 978 loadUnits(i).io.replay <> lsq.io.replay(i) 979 980 val l2_hint = RegNext(io.l2_hint) 981 982 // L2 Hint for DCache 983 dcache.io.l2_hint <> l2_hint 984 985 loadUnits(i).io.l2_hint <> l2_hint 986 loadUnits(i).io.tlb_hint.id := dtlbRepeater.io.hint.get.req(i).id 987 loadUnits(i).io.tlb_hint.full := dtlbRepeater.io.hint.get.req(i).full || 988 tlbreplay_reg(i) || dtlb_ld0_tlbreplay_reg(i) 989 990 // passdown to lsq (load s2) 991 lsq.io.ldu.ldin(i) <> loadUnits(i).io.lsq.ldin 992 if (i == UncacheWBPort) { 993 lsq.io.ldout(i) <> loadUnits(i).io.lsq.uncache 994 } else { 995 lsq.io.ldout(i).ready := true.B 996 loadUnits(i).io.lsq.uncache.valid := false.B 997 loadUnits(i).io.lsq.uncache.bits := DontCare 998 } 999 lsq.io.ld_raw_data(i) <> loadUnits(i).io.lsq.ld_raw_data 1000 lsq.io.ncOut(i) <> loadUnits(i).io.lsq.nc_ldin 1001 lsq.io.l2_hint.valid := l2_hint.valid 1002 lsq.io.l2_hint.bits.sourceId := l2_hint.bits.sourceId 1003 lsq.io.l2_hint.bits.isKeyword := l2_hint.bits.isKeyword 1004 1005 lsq.io.tlb_hint <> dtlbRepeater.io.hint.get 1006 1007 // connect misalignBuffer 1008 loadMisalignBuffer.io.enq(i) <> loadUnits(i).io.misalign_enq 1009 1010 if (i == MisalignWBPort) { 1011 loadUnits(i).io.misalign_ldin <> loadMisalignBuffer.io.splitLoadReq 1012 loadUnits(i).io.misalign_ldout <> loadMisalignBuffer.io.splitLoadResp 1013 } else { 1014 loadUnits(i).io.misalign_ldin.valid := false.B 1015 loadUnits(i).io.misalign_ldin.bits := DontCare 1016 } 1017 1018 // alter writeback exception info 1019 io.mem_to_ooo.s3_delayed_load_error(i) := loadUnits(i).io.s3_dly_ld_err 1020 1021 // update mem dependency predictor 1022 // io.memPredUpdate(i) := DontCare 1023 1024 // -------------------------------- 1025 // Load Triggers 1026 // -------------------------------- 1027 loadUnits(i).io.fromCsrTrigger.tdataVec := tdata 1028 loadUnits(i).io.fromCsrTrigger.tEnableVec := tEnable 1029 loadUnits(i).io.fromCsrTrigger.triggerCanRaiseBpExp := triggerCanRaiseBpExp 1030 loadUnits(i).io.fromCsrTrigger.debugMode := debugMode 1031 } 1032 1033 for (i <- 0 until HyuCnt) { 1034 hybridUnits(i).io.redirect <> redirect 1035 1036 // get input from dispatch 1037 hybridUnits(i).io.lsin <> io.ooo_to_mem.issueHya(i) 1038 hybridUnits(i).io.feedback_slow <> io.mem_to_ooo.hyuIqFeedback(i).feedbackSlow 1039 hybridUnits(i).io.feedback_fast <> io.mem_to_ooo.hyuIqFeedback(i).feedbackFast 1040 hybridUnits(i).io.correctMissTrain := correctMissTrain 1041 io.mem_to_ooo.ldCancel.take(HyuCnt)(i) := hybridUnits(i).io.ldu_io.ldCancel 1042 io.mem_to_ooo.wakeup.take(HyuCnt)(i) := hybridUnits(i).io.ldu_io.wakeup 1043 1044 // ------------------------------------ 1045 // Load Port 1046 // ------------------------------------ 1047 // fast replay 1048 hybridUnits(i).io.ldu_io.fast_rep_in <> hybridUnits(i).io.ldu_io.fast_rep_out 1049 1050 // get input from dispatch 1051 hybridUnits(i).io.ldu_io.dcache <> dcache.io.lsu.load(LduCnt + i) 1052 hybridUnits(i).io.stu_io.dcache <> dcache.io.lsu.sta(StaCnt + i) 1053 1054 // dcache access 1055 hybridUnits(i).io.ldu_io.lsq.forward <> lsq.io.forward(LduCnt + i) 1056 // forward 1057 hybridUnits(i).io.ldu_io.sbuffer <> sbuffer.io.forward(LduCnt + i) 1058 hybridUnits(i).io.ldu_io.ubuffer <> uncache.io.forward(LduCnt + i) 1059 // hybridUnits(i).io.ldu_io.vec_forward <> vsFlowQueue.io.forward(LduCnt + i) 1060 hybridUnits(i).io.ldu_io.vec_forward := DontCare 1061 hybridUnits(i).io.ldu_io.tl_d_channel := dcache.io.lsu.forward_D(LduCnt + i) 1062 hybridUnits(i).io.ldu_io.forward_mshr <> dcache.io.lsu.forward_mshr(LduCnt + i) 1063 // ld-ld violation check 1064 hybridUnits(i).io.ldu_io.lsq.ldld_nuke_query <> lsq.io.ldu.ldld_nuke_query(LduCnt + i) 1065 hybridUnits(i).io.ldu_io.lsq.stld_nuke_query <> lsq.io.ldu.stld_nuke_query(LduCnt + i) 1066 hybridUnits(i).io.csrCtrl <> csrCtrl 1067 // dcache refill req 1068 hybridUnits(i).io.ldu_io.tlb_hint.id := dtlbRepeater.io.hint.get.req(LduCnt + i).id 1069 hybridUnits(i).io.ldu_io.tlb_hint.full := dtlbRepeater.io.hint.get.req(LduCnt + i).full || 1070 tlbreplay_reg(LduCnt + i) || dtlb_ld0_tlbreplay_reg(LduCnt + i) 1071 1072 // dtlb 1073 hybridUnits(i).io.tlb <> dtlb_ld.head.requestor(LduCnt + i) 1074 // pmp 1075 hybridUnits(i).io.pmp <> pmp_check.drop(LduCnt)(i).resp 1076 // st-ld violation query 1077 val stld_nuke_query = VecInit(storeUnits.map(_.io.stld_nuke_query) ++ hybridUnits.map(_.io.stu_io.stld_nuke_query)) 1078 hybridUnits(i).io.ldu_io.stld_nuke_query := stld_nuke_query 1079 hybridUnits(i).io.ldu_io.lq_rep_full <> lsq.io.lq_rep_full 1080 // load prefetch train 1081 prefetcherOpt.foreach(pf => { 1082 val source = hybridUnits(i).io.prefetch_train 1083 pf.io.ld_in(LduCnt + i).valid := Mux(pf_train_on_hit, 1084 source.valid, 1085 source.valid && source.bits.isFirstIssue && source.bits.miss 1086 ) 1087 pf.io.ld_in(LduCnt + i).bits := source.bits 1088 pf.io.ld_in(LduCnt + i).bits.uop.pc := Mux(hybridUnits(i).io.ldu_io.s2_ptr_chasing, io.ooo_to_mem.hybridPc(i), RegNext(io.ooo_to_mem.hybridPc(i))) 1089 }) 1090 l1PrefetcherOpt.foreach(pf => { 1091 // stream will train on all load sources 1092 val source = hybridUnits(i).io.prefetch_train_l1 1093 pf.io.ld_in(LduCnt + i).valid := source.valid && source.bits.isFirstIssue && 1094 FuType.isLoad(source.bits.uop.fuType) 1095 pf.io.ld_in(LduCnt + i).bits := source.bits 1096 pf.io.st_in(StaCnt + i).valid := false.B 1097 pf.io.st_in(StaCnt + i).bits := DontCare 1098 }) 1099 prefetcherOpt.foreach(pf => { 1100 val source = hybridUnits(i).io.prefetch_train 1101 pf.io.st_in(StaCnt + i).valid := Mux(pf_train_on_hit, 1102 source.valid, 1103 source.valid && source.bits.isFirstIssue && source.bits.miss 1104 ) && FuType.isStore(source.bits.uop.fuType) 1105 pf.io.st_in(StaCnt + i).bits := source.bits 1106 pf.io.st_in(StaCnt + i).bits.uop.pc := RegNext(io.ooo_to_mem.hybridPc(i)) 1107 }) 1108 1109 // load to load fast forward: load(i) prefers data(i) 1110 val l2l_fwd_out = loadUnits.map(_.io.l2l_fwd_out) ++ hybridUnits.map(_.io.ldu_io.l2l_fwd_out) 1111 val fastPriority = (LduCnt + i until LduCnt + HyuCnt) ++ (0 until LduCnt + i) 1112 val fastValidVec = fastPriority.map(j => l2l_fwd_out(j).valid) 1113 val fastDataVec = fastPriority.map(j => l2l_fwd_out(j).data) 1114 val fastErrorVec = fastPriority.map(j => l2l_fwd_out(j).dly_ld_err) 1115 val fastMatchVec = fastPriority.map(j => io.ooo_to_mem.loadFastMatch(LduCnt + i)(j)) 1116 hybridUnits(i).io.ldu_io.l2l_fwd_in.valid := VecInit(fastValidVec).asUInt.orR 1117 hybridUnits(i).io.ldu_io.l2l_fwd_in.data := ParallelPriorityMux(fastValidVec, fastDataVec) 1118 hybridUnits(i).io.ldu_io.l2l_fwd_in.dly_ld_err := ParallelPriorityMux(fastValidVec, fastErrorVec) 1119 val fastMatch = ParallelPriorityMux(fastValidVec, fastMatchVec) 1120 hybridUnits(i).io.ldu_io.ld_fast_match := fastMatch 1121 hybridUnits(i).io.ldu_io.ld_fast_imm := io.ooo_to_mem.loadFastImm(LduCnt + i) 1122 hybridUnits(i).io.ldu_io.ld_fast_fuOpType := io.ooo_to_mem.loadFastFuOpType(LduCnt + i) 1123 hybridUnits(i).io.ldu_io.replay <> lsq.io.replay(LduCnt + i) 1124 hybridUnits(i).io.ldu_io.l2_hint <> io.l2_hint 1125 1126 // uncache 1127 lsq.io.ldout.drop(LduCnt)(i) <> hybridUnits(i).io.ldu_io.lsq.uncache 1128 lsq.io.ld_raw_data.drop(LduCnt)(i) <> hybridUnits(i).io.ldu_io.lsq.ld_raw_data 1129 1130 1131 // passdown to lsq (load s2) 1132 hybridUnits(i).io.ldu_io.lsq.nc_ldin.valid := false.B 1133 hybridUnits(i).io.ldu_io.lsq.nc_ldin.bits := DontCare 1134 lsq.io.ldu.ldin(LduCnt + i) <> hybridUnits(i).io.ldu_io.lsq.ldin 1135 // Lsq to sta unit 1136 lsq.io.sta.storeMaskIn(StaCnt + i) <> hybridUnits(i).io.stu_io.st_mask_out 1137 1138 // Lsq to std unit's rs 1139 lsq.io.std.storeDataIn(StaCnt + i) := stData(StaCnt + i) 1140 lsq.io.std.storeDataIn(StaCnt + i).valid := stData(StaCnt + i).valid && !st_data_atomics(StaCnt + i) 1141 // prefetch 1142 hybridUnits(i).io.stu_io.prefetch_req <> sbuffer.io.store_prefetch(StaCnt + i) 1143 1144 io.mem_to_ooo.s3_delayed_load_error(LduCnt + i) := hybridUnits(i).io.ldu_io.s3_dly_ld_err 1145 1146 // ------------------------------------ 1147 // Store Port 1148 // ------------------------------------ 1149 hybridUnits(i).io.stu_io.lsq <> lsq.io.sta.storeAddrIn.takeRight(HyuCnt)(i) 1150 hybridUnits(i).io.stu_io.lsq_replenish <> lsq.io.sta.storeAddrInRe.takeRight(HyuCnt)(i) 1151 1152 lsq.io.sta.storeMaskIn.takeRight(HyuCnt)(i) <> hybridUnits(i).io.stu_io.st_mask_out 1153 io.mem_to_ooo.stIn.takeRight(HyuCnt)(i).valid := hybridUnits(i).io.stu_io.issue.valid 1154 io.mem_to_ooo.stIn.takeRight(HyuCnt)(i).bits := hybridUnits(i).io.stu_io.issue.bits 1155 1156 // ------------------------------------ 1157 // Vector Store Port 1158 // ------------------------------------ 1159 hybridUnits(i).io.vec_stu_io.isFirstIssue := true.B 1160 1161 // ------------------------- 1162 // Store Triggers 1163 // ------------------------- 1164 hybridUnits(i).io.fromCsrTrigger.tdataVec := tdata 1165 hybridUnits(i).io.fromCsrTrigger.tEnableVec := tEnable 1166 hybridUnits(i).io.fromCsrTrigger.triggerCanRaiseBpExp := triggerCanRaiseBpExp 1167 hybridUnits(i).io.fromCsrTrigger.debugMode := debugMode 1168 } 1169 1170 // misalignBuffer 1171 loadMisalignBuffer.io.redirect <> redirect 1172 loadMisalignBuffer.io.rob.lcommit := io.ooo_to_mem.lsqio.lcommit 1173 loadMisalignBuffer.io.rob.scommit := io.ooo_to_mem.lsqio.scommit 1174 loadMisalignBuffer.io.rob.pendingMMIOld := io.ooo_to_mem.lsqio.pendingMMIOld 1175 loadMisalignBuffer.io.rob.pendingld := io.ooo_to_mem.lsqio.pendingld 1176 loadMisalignBuffer.io.rob.pendingst := io.ooo_to_mem.lsqio.pendingst 1177 loadMisalignBuffer.io.rob.pendingVst := io.ooo_to_mem.lsqio.pendingVst 1178 loadMisalignBuffer.io.rob.commit := io.ooo_to_mem.lsqio.commit 1179 loadMisalignBuffer.io.rob.pendingPtr := io.ooo_to_mem.lsqio.pendingPtr 1180 loadMisalignBuffer.io.rob.pendingPtrNext := io.ooo_to_mem.lsqio.pendingPtrNext 1181 1182 lsq.io.loadMisalignFull := loadMisalignBuffer.io.loadMisalignFull 1183 lsq.io.misalignAllowSpec := misalign_allow_spec 1184 1185 storeMisalignBuffer.io.redirect <> redirect 1186 storeMisalignBuffer.io.rob.lcommit := io.ooo_to_mem.lsqio.lcommit 1187 storeMisalignBuffer.io.rob.scommit := io.ooo_to_mem.lsqio.scommit 1188 storeMisalignBuffer.io.rob.pendingMMIOld := io.ooo_to_mem.lsqio.pendingMMIOld 1189 storeMisalignBuffer.io.rob.pendingld := io.ooo_to_mem.lsqio.pendingld 1190 storeMisalignBuffer.io.rob.pendingst := io.ooo_to_mem.lsqio.pendingst 1191 storeMisalignBuffer.io.rob.pendingVst := io.ooo_to_mem.lsqio.pendingVst 1192 storeMisalignBuffer.io.rob.commit := io.ooo_to_mem.lsqio.commit 1193 storeMisalignBuffer.io.rob.pendingPtr := io.ooo_to_mem.lsqio.pendingPtr 1194 storeMisalignBuffer.io.rob.pendingPtrNext := io.ooo_to_mem.lsqio.pendingPtrNext 1195 1196 lsq.io.maControl <> storeMisalignBuffer.io.sqControl 1197 1198 lsq.io.cmoOpReq <> dcache.io.cmoOpReq 1199 lsq.io.cmoOpResp <> dcache.io.cmoOpResp 1200 1201 // Prefetcher 1202 val StreamDTLBPortIndex = TlbStartVec(dtlb_ld_idx) + LduCnt + HyuCnt 1203 val PrefetcherDTLBPortIndex = TlbStartVec(dtlb_pf_idx) 1204 val L2toL1DLBPortIndex = TlbStartVec(dtlb_pf_idx) + 1 1205 prefetcherOpt match { 1206 case Some(pf) => 1207 dtlb_reqs(PrefetcherDTLBPortIndex) <> pf.io.tlb_req 1208 pf.io.pmp_resp := pmp_check(PrefetcherDTLBPortIndex).resp 1209 case None => 1210 dtlb_reqs(PrefetcherDTLBPortIndex) := DontCare 1211 dtlb_reqs(PrefetcherDTLBPortIndex).req.valid := false.B 1212 dtlb_reqs(PrefetcherDTLBPortIndex).resp.ready := true.B 1213 } 1214 l1PrefetcherOpt match { 1215 case Some(pf) => 1216 dtlb_reqs(StreamDTLBPortIndex) <> pf.io.tlb_req 1217 pf.io.pmp_resp := pmp_check(StreamDTLBPortIndex).resp 1218 case None => 1219 dtlb_reqs(StreamDTLBPortIndex) := DontCare 1220 dtlb_reqs(StreamDTLBPortIndex).req.valid := false.B 1221 dtlb_reqs(StreamDTLBPortIndex).resp.ready := true.B 1222 } 1223 dtlb_reqs(L2toL1DLBPortIndex) <> io.l2_tlb_req 1224 dtlb_reqs(L2toL1DLBPortIndex).resp.ready := true.B 1225 io.l2_pmp_resp := pmp_check(L2toL1DLBPortIndex).resp 1226 1227 // StoreUnit 1228 for (i <- 0 until StdCnt) { 1229 stdExeUnits(i).io.flush <> redirect 1230 stdExeUnits(i).io.in.valid := io.ooo_to_mem.issueStd(i).valid 1231 io.ooo_to_mem.issueStd(i).ready := stdExeUnits(i).io.in.ready 1232 stdExeUnits(i).io.in.bits := io.ooo_to_mem.issueStd(i).bits 1233 } 1234 1235 for (i <- 0 until StaCnt) { 1236 val stu = storeUnits(i) 1237 1238 stu.io.redirect <> redirect 1239 stu.io.csrCtrl <> csrCtrl 1240 stu.io.dcache <> dcache.io.lsu.sta(i) 1241 stu.io.feedback_slow <> io.mem_to_ooo.staIqFeedback(i).feedbackSlow 1242 stu.io.stin <> io.ooo_to_mem.issueSta(i) 1243 stu.io.lsq <> lsq.io.sta.storeAddrIn(i) 1244 stu.io.lsq_replenish <> lsq.io.sta.storeAddrInRe(i) 1245 // dtlb 1246 stu.io.tlb <> dtlb_st.head.requestor(i) 1247 stu.io.pmp <> pmp_check(LduCnt + HyuCnt + 1 + i).resp 1248 1249 // ------------------------- 1250 // Store Triggers 1251 // ------------------------- 1252 stu.io.fromCsrTrigger.tdataVec := tdata 1253 stu.io.fromCsrTrigger.tEnableVec := tEnable 1254 stu.io.fromCsrTrigger.triggerCanRaiseBpExp := triggerCanRaiseBpExp 1255 stu.io.fromCsrTrigger.debugMode := debugMode 1256 1257 // prefetch 1258 stu.io.prefetch_req <> sbuffer.io.store_prefetch(i) 1259 1260 // store unit does not need fast feedback 1261 io.mem_to_ooo.staIqFeedback(i).feedbackFast := DontCare 1262 1263 // Lsq to sta unit 1264 lsq.io.sta.storeMaskIn(i) <> stu.io.st_mask_out 1265 1266 // connect misalignBuffer 1267 storeMisalignBuffer.io.enq(i) <> stu.io.misalign_enq 1268 1269 if (i == 0) { 1270 stu.io.misalign_stin <> storeMisalignBuffer.io.splitStoreReq 1271 stu.io.misalign_stout <> storeMisalignBuffer.io.splitStoreResp 1272 } else { 1273 stu.io.misalign_stin.valid := false.B 1274 stu.io.misalign_stin.bits := DontCare 1275 } 1276 1277 // Lsq to std unit's rs 1278 if (i < VstuCnt){ 1279 when (vsSplit(i).io.vstd.get.valid) { 1280 lsq.io.std.storeDataIn(i).valid := true.B 1281 lsq.io.std.storeDataIn(i).bits := vsSplit(i).io.vstd.get.bits 1282 stData(i).ready := false.B 1283 }.otherwise { 1284 lsq.io.std.storeDataIn(i).valid := stData(i).valid && !st_data_atomics(i) 1285 lsq.io.std.storeDataIn(i).bits.uop := stData(i).bits.uop 1286 lsq.io.std.storeDataIn(i).bits.data := stData(i).bits.data 1287 lsq.io.std.storeDataIn(i).bits.mask.map(_ := 0.U) 1288 lsq.io.std.storeDataIn(i).bits.vdIdx.map(_ := 0.U) 1289 lsq.io.std.storeDataIn(i).bits.vdIdxInField.map(_ := 0.U) 1290 stData(i).ready := true.B 1291 } 1292 } else { 1293 lsq.io.std.storeDataIn(i).valid := stData(i).valid && !st_data_atomics(i) 1294 lsq.io.std.storeDataIn(i).bits.uop := stData(i).bits.uop 1295 lsq.io.std.storeDataIn(i).bits.data := stData(i).bits.data 1296 lsq.io.std.storeDataIn(i).bits.mask.map(_ := 0.U) 1297 lsq.io.std.storeDataIn(i).bits.vdIdx.map(_ := 0.U) 1298 lsq.io.std.storeDataIn(i).bits.vdIdxInField.map(_ := 0.U) 1299 stData(i).ready := true.B 1300 } 1301 lsq.io.std.storeDataIn.map(_.bits.debug := 0.U.asTypeOf(new DebugBundle)) 1302 lsq.io.std.storeDataIn.foreach(_.bits.isFromLoadUnit := DontCare) 1303 1304 1305 // store prefetch train 1306 l1PrefetcherOpt.foreach(pf => { 1307 // stream will train on all load sources 1308 pf.io.st_in(i).valid := false.B 1309 pf.io.st_in(i).bits := DontCare 1310 }) 1311 1312 prefetcherOpt.foreach(pf => { 1313 pf.io.st_in(i).valid := Mux(pf_train_on_hit, 1314 stu.io.prefetch_train.valid, 1315 stu.io.prefetch_train.valid && stu.io.prefetch_train.bits.isFirstIssue && ( 1316 stu.io.prefetch_train.bits.miss 1317 ) 1318 ) 1319 pf.io.st_in(i).bits := stu.io.prefetch_train.bits 1320 pf.io.st_in(i).bits.uop.pc := RegEnable(RegEnable(io.ooo_to_mem.storePc(i), stu.io.s1_prefetch_spec), stu.io.s2_prefetch_spec) 1321 }) 1322 1323 // 1. sync issue info to store set LFST 1324 // 2. when store issue, broadcast issued sqPtr to wake up the following insts 1325 // io.stIn(i).valid := io.issue(exuParameters.LduCnt + i).valid 1326 // io.stIn(i).bits := io.issue(exuParameters.LduCnt + i).bits 1327 io.mem_to_ooo.stIn(i).valid := stu.io.issue.valid 1328 io.mem_to_ooo.stIn(i).bits := stu.io.issue.bits 1329 1330 stu.io.stout.ready := true.B 1331 1332 // vector 1333 if (i < VstuCnt) { 1334 stu.io.vecstin <> vsSplit(i).io.out 1335 // vsFlowQueue.io.pipeFeedback(i) <> stu.io.vec_feedback_slow // need connect 1336 } else { 1337 stu.io.vecstin.valid := false.B 1338 stu.io.vecstin.bits := DontCare 1339 stu.io.vecstout.ready := false.B 1340 } 1341 stu.io.vec_isFirstIssue := true.B // TODO 1342 } 1343 1344 val sqOtherStout = WireInit(0.U.asTypeOf(DecoupledIO(new MemExuOutput))) 1345 sqOtherStout.valid := lsq.io.mmioStout.valid || lsq.io.cboZeroStout.valid 1346 sqOtherStout.bits := Mux(lsq.io.cboZeroStout.valid, lsq.io.cboZeroStout.bits, lsq.io.mmioStout.bits) 1347 assert(!(lsq.io.mmioStout.valid && lsq.io.cboZeroStout.valid), "Cannot writeback to mmio and cboZero at the same time.") 1348 1349 // Store writeback by StoreQueue: 1350 // 1. cbo Zero 1351 // 2. mmio 1352 // Currently, the two should not be present at the same time, so simply make cbo zero a higher priority. 1353 val otherStout = WireInit(0.U.asTypeOf(lsq.io.mmioStout)) 1354 NewPipelineConnect( 1355 sqOtherStout, otherStout, otherStout.fire, 1356 false.B, 1357 Option("otherStoutConnect") 1358 ) 1359 otherStout.ready := false.B 1360 when (otherStout.valid && !storeUnits(0).io.stout.valid) { 1361 stOut(0).valid := true.B 1362 stOut(0).bits := otherStout.bits 1363 otherStout.ready := true.B 1364 } 1365 lsq.io.mmioStout.ready := sqOtherStout.ready 1366 lsq.io.cboZeroStout.ready := sqOtherStout.ready 1367 1368 // vec mmio writeback 1369 lsq.io.vecmmioStout.ready := false.B 1370 1371 // miss align buffer will overwrite stOut(0) 1372 val storeMisalignCanWriteBack = !otherStout.valid && !storeUnits(0).io.stout.valid && !storeUnits(0).io.vecstout.valid 1373 storeMisalignBuffer.io.writeBack.ready := storeMisalignCanWriteBack 1374 storeMisalignBuffer.io.storeOutValid := storeUnits(0).io.stout.valid 1375 storeMisalignBuffer.io.storeVecOutValid := storeUnits(0).io.vecstout.valid 1376 when (storeMisalignBuffer.io.writeBack.valid && storeMisalignCanWriteBack) { 1377 stOut(0).valid := true.B 1378 stOut(0).bits := storeMisalignBuffer.io.writeBack.bits 1379 } 1380 1381 // Uncache 1382 uncache.io.enableOutstanding := io.ooo_to_mem.csrCtrl.uncache_write_outstanding_enable 1383 uncache.io.hartId := io.hartId 1384 lsq.io.uncacheOutstanding := io.ooo_to_mem.csrCtrl.uncache_write_outstanding_enable 1385 1386 // Lsq 1387 io.mem_to_ooo.lsqio.mmio := lsq.io.rob.mmio 1388 io.mem_to_ooo.lsqio.uop := lsq.io.rob.uop 1389 lsq.io.rob.lcommit := io.ooo_to_mem.lsqio.lcommit 1390 lsq.io.rob.scommit := io.ooo_to_mem.lsqio.scommit 1391 lsq.io.rob.pendingMMIOld := io.ooo_to_mem.lsqio.pendingMMIOld 1392 lsq.io.rob.pendingld := io.ooo_to_mem.lsqio.pendingld 1393 lsq.io.rob.pendingst := io.ooo_to_mem.lsqio.pendingst 1394 lsq.io.rob.pendingVst := io.ooo_to_mem.lsqio.pendingVst 1395 lsq.io.rob.commit := io.ooo_to_mem.lsqio.commit 1396 lsq.io.rob.pendingPtr := io.ooo_to_mem.lsqio.pendingPtr 1397 lsq.io.rob.pendingPtrNext := io.ooo_to_mem.lsqio.pendingPtrNext 1398 1399 // lsq.io.rob <> io.lsqio.rob 1400 lsq.io.enq <> io.ooo_to_mem.enqLsq 1401 lsq.io.brqRedirect <> redirect 1402 1403 // violation rollback 1404 def selectOldestRedirect(xs: Seq[Valid[Redirect]]): Vec[Bool] = { 1405 val compareVec = (0 until xs.length).map(i => (0 until i).map(j => isAfter(xs(j).bits.robIdx, xs(i).bits.robIdx))) 1406 val resultOnehot = VecInit((0 until xs.length).map(i => Cat((0 until xs.length).map(j => 1407 (if (j < i) !xs(j).valid || compareVec(i)(j) 1408 else if (j == i) xs(i).valid 1409 else !xs(j).valid || !compareVec(j)(i)) 1410 )).andR)) 1411 resultOnehot 1412 } 1413 val allRedirect = loadUnits.map(_.io.rollback) ++ hybridUnits.map(_.io.ldu_io.rollback) ++ lsq.io.nack_rollback ++ lsq.io.nuke_rollback 1414 val oldestOneHot = selectOldestRedirect(allRedirect) 1415 val oldestRedirect = WireDefault(Mux1H(oldestOneHot, allRedirect)) 1416 // memory replay would not cause IAF/IPF/IGPF 1417 oldestRedirect.bits.cfiUpdate.backendIAF := false.B 1418 oldestRedirect.bits.cfiUpdate.backendIPF := false.B 1419 oldestRedirect.bits.cfiUpdate.backendIGPF := false.B 1420 io.mem_to_ooo.memoryViolation := oldestRedirect 1421 io.mem_to_ooo.lsqio.lqCanAccept := lsq.io.lqCanAccept 1422 io.mem_to_ooo.lsqio.sqCanAccept := lsq.io.sqCanAccept 1423 1424 // lsq.io.uncache <> uncache.io.lsq 1425 val s_idle :: s_scalar_uncache :: s_vector_uncache :: Nil = Enum(3) 1426 val uncacheState = RegInit(s_idle) 1427 val uncacheReq = Wire(Decoupled(new UncacheWordReq)) 1428 val uncacheIdResp = uncache.io.lsq.idResp 1429 val uncacheResp = Wire(Decoupled(new UncacheWordResp)) 1430 1431 uncacheReq.bits := DontCare 1432 uncacheReq.valid := false.B 1433 uncacheReq.ready := false.B 1434 uncacheResp.bits := DontCare 1435 uncacheResp.valid := false.B 1436 uncacheResp.ready := false.B 1437 lsq.io.uncache.req.ready := false.B 1438 lsq.io.uncache.idResp.valid := false.B 1439 lsq.io.uncache.idResp.bits := DontCare 1440 lsq.io.uncache.resp.valid := false.B 1441 lsq.io.uncache.resp.bits := DontCare 1442 1443 switch (uncacheState) { 1444 is (s_idle) { 1445 when (uncacheReq.fire) { 1446 when (lsq.io.uncache.req.valid) { 1447 when (!lsq.io.uncache.req.bits.nc || !io.ooo_to_mem.csrCtrl.uncache_write_outstanding_enable) { 1448 uncacheState := s_scalar_uncache 1449 } 1450 }.otherwise { 1451 // val isStore = vsFlowQueue.io.uncache.req.bits.cmd === MemoryOpConstants.M_XWR 1452 when (!io.ooo_to_mem.csrCtrl.uncache_write_outstanding_enable) { 1453 uncacheState := s_vector_uncache 1454 } 1455 } 1456 } 1457 } 1458 1459 is (s_scalar_uncache) { 1460 when (uncacheResp.fire) { 1461 uncacheState := s_idle 1462 } 1463 } 1464 1465 is (s_vector_uncache) { 1466 when (uncacheResp.fire) { 1467 uncacheState := s_idle 1468 } 1469 } 1470 } 1471 1472 when (lsq.io.uncache.req.valid) { 1473 uncacheReq <> lsq.io.uncache.req 1474 } 1475 when (io.ooo_to_mem.csrCtrl.uncache_write_outstanding_enable) { 1476 lsq.io.uncache.resp <> uncacheResp 1477 lsq.io.uncache.idResp <> uncacheIdResp 1478 }.otherwise { 1479 when (uncacheState === s_scalar_uncache) { 1480 lsq.io.uncache.resp <> uncacheResp 1481 lsq.io.uncache.idResp <> uncacheIdResp 1482 } 1483 } 1484 // delay dcache refill for 1 cycle for better timing 1485 AddPipelineReg(uncacheReq, uncache.io.lsq.req, false.B) 1486 AddPipelineReg(uncache.io.lsq.resp, uncacheResp, false.B) 1487 1488 //lsq.io.refill := delayedDcacheRefill 1489 lsq.io.release := dcache.io.lsu.release 1490 lsq.io.lqCancelCnt <> io.mem_to_ooo.lqCancelCnt 1491 lsq.io.sqCancelCnt <> io.mem_to_ooo.sqCancelCnt 1492 lsq.io.lqDeq <> io.mem_to_ooo.lqDeq 1493 lsq.io.sqDeq <> io.mem_to_ooo.sqDeq 1494 // Todo: assign these 1495 io.mem_to_ooo.sqDeqPtr := lsq.io.sqDeqPtr 1496 io.mem_to_ooo.lqDeqPtr := lsq.io.lqDeqPtr 1497 lsq.io.tl_d_channel <> dcache.io.lsu.tl_d_channel 1498 1499 // LSQ to store buffer 1500 lsq.io.sbuffer <> sbuffer.io.in 1501 sbuffer.io.in(0).valid := lsq.io.sbuffer(0).valid || vSegmentUnit.io.sbuffer.valid 1502 sbuffer.io.in(0).bits := Mux1H(Seq( 1503 vSegmentUnit.io.sbuffer.valid -> vSegmentUnit.io.sbuffer.bits, 1504 lsq.io.sbuffer(0).valid -> lsq.io.sbuffer(0).bits 1505 )) 1506 vSegmentUnit.io.sbuffer.ready := sbuffer.io.in(0).ready 1507 lsq.io.sqEmpty <> sbuffer.io.sqempty 1508 dcache.io.force_write := lsq.io.force_write 1509 1510 // Initialize when unenabled difftest. 1511 sbuffer.io.vecDifftestInfo := DontCare 1512 lsq.io.sbufferVecDifftestInfo := DontCare 1513 vSegmentUnit.io.vecDifftestInfo := DontCare 1514 if (env.EnableDifftest) { 1515 sbuffer.io.vecDifftestInfo .zipWithIndex.map{ case (sbufferPort, index) => 1516 if (index == 0) { 1517 val vSegmentDifftestValid = vSegmentUnit.io.vecDifftestInfo.valid 1518 sbufferPort.valid := Mux(vSegmentDifftestValid, vSegmentUnit.io.vecDifftestInfo.valid, lsq.io.sbufferVecDifftestInfo(0).valid) 1519 sbufferPort.bits := Mux(vSegmentDifftestValid, vSegmentUnit.io.vecDifftestInfo.bits, lsq.io.sbufferVecDifftestInfo(0).bits) 1520 1521 vSegmentUnit.io.vecDifftestInfo.ready := sbufferPort.ready 1522 lsq.io.sbufferVecDifftestInfo(0).ready := sbufferPort.ready 1523 } else { 1524 sbufferPort <> lsq.io.sbufferVecDifftestInfo(index) 1525 } 1526 } 1527 } 1528 1529 // lsq.io.vecStoreRetire <> vsFlowQueue.io.sqRelease 1530 // lsq.io.vecWriteback.valid := vlWrapper.io.uopWriteback.fire && 1531 // vlWrapper.io.uopWriteback.bits.uop.vpu.lastUop 1532 // lsq.io.vecWriteback.bits := vlWrapper.io.uopWriteback.bits 1533 1534 // vector 1535 val vLoadCanAccept = (0 until VlduCnt).map(i => 1536 vlSplit(i).io.in.ready && VlduType.isVecLd(io.ooo_to_mem.issueVldu(i).bits.uop.fuOpType) 1537 ) 1538 val vStoreCanAccept = (0 until VstuCnt).map(i => 1539 vsSplit(i).io.in.ready && VstuType.isVecSt(io.ooo_to_mem.issueVldu(i).bits.uop.fuOpType) 1540 ) 1541 val isSegment = io.ooo_to_mem.issueVldu.head.valid && isVsegls(io.ooo_to_mem.issueVldu.head.bits.uop.fuType) 1542 val isFixVlUop = io.ooo_to_mem.issueVldu.map{x => 1543 x.bits.uop.vpu.isVleff && x.bits.uop.vpu.lastUop && x.valid 1544 } 1545 1546 // init port 1547 /** 1548 * TODO: splited vsMergebuffer maybe remove, if one RS can accept two feedback, or don't need RS replay uop 1549 * for now: 1550 * RS0 -> VsSplit0 -> stu0 -> vsMergebuffer0 -> feedback -> RS0 1551 * RS1 -> VsSplit1 -> stu1 -> vsMergebuffer1 -> feedback -> RS1 1552 * 1553 * vector load don't need feedback 1554 * 1555 * RS0 -> VlSplit0 -> ldu0 -> | 1556 * RS1 -> VlSplit1 -> ldu1 -> | -> vlMergebuffer 1557 * replayIO -> ldu3 -> | 1558 * */ 1559 (0 until VstuCnt).foreach{i => 1560 vsMergeBuffer(i).io.fromPipeline := DontCare 1561 vsMergeBuffer(i).io.fromSplit := DontCare 1562 1563 vsMergeBuffer(i).io.fromMisalignBuffer.get.flush := storeMisalignBuffer.io.toVecStoreMergeBuffer(i).flush 1564 vsMergeBuffer(i).io.fromMisalignBuffer.get.mbIndex := storeMisalignBuffer.io.toVecStoreMergeBuffer(i).mbIndex 1565 } 1566 1567 (0 until VstuCnt).foreach{i => 1568 vsSplit(i).io.redirect <> redirect 1569 vsSplit(i).io.in <> io.ooo_to_mem.issueVldu(i) 1570 vsSplit(i).io.in.valid := io.ooo_to_mem.issueVldu(i).valid && 1571 vStoreCanAccept(i) && !isSegment 1572 vsSplit(i).io.toMergeBuffer <> vsMergeBuffer(i).io.fromSplit.head 1573 NewPipelineConnect( 1574 vsSplit(i).io.out, storeUnits(i).io.vecstin, storeUnits(i).io.vecstin.fire, 1575 Mux(vsSplit(i).io.out.fire, vsSplit(i).io.out.bits.uop.robIdx.needFlush(io.redirect), storeUnits(i).io.vecstin.bits.uop.robIdx.needFlush(io.redirect)), 1576 Option("VsSplitConnectStu") 1577 ) 1578 vsSplit(i).io.vstd.get := DontCare // Todo: Discuss how to pass vector store data 1579 1580 vsSplit(i).io.vstdMisalign.get.storeMisalignBufferEmpty := !storeMisalignBuffer.io.full 1581 vsSplit(i).io.vstdMisalign.get.storePipeEmpty := !storeUnits(i).io.s0_s1_valid 1582 1583 } 1584 (0 until VlduCnt).foreach{i => 1585 vlSplit(i).io.redirect <> redirect 1586 vlSplit(i).io.in <> io.ooo_to_mem.issueVldu(i) 1587 vlSplit(i).io.in.valid := io.ooo_to_mem.issueVldu(i).valid && 1588 vLoadCanAccept(i) && !isSegment && !isFixVlUop(i) 1589 vlSplit(i).io.toMergeBuffer <> vlMergeBuffer.io.fromSplit(i) 1590 vlSplit(i).io.threshold.get.valid := vlMergeBuffer.io.toSplit.get.threshold 1591 vlSplit(i).io.threshold.get.bits := lsq.io.lqDeqPtr 1592 NewPipelineConnect( 1593 vlSplit(i).io.out, loadUnits(i).io.vecldin, loadUnits(i).io.vecldin.fire, 1594 Mux(vlSplit(i).io.out.fire, vlSplit(i).io.out.bits.uop.robIdx.needFlush(io.redirect), loadUnits(i).io.vecldin.bits.uop.robIdx.needFlush(io.redirect)), 1595 Option("VlSplitConnectLdu") 1596 ) 1597 1598 //Subsequent instrction will be blocked 1599 vfofBuffer.io.in(i).valid := io.ooo_to_mem.issueVldu(i).valid 1600 vfofBuffer.io.in(i).bits := io.ooo_to_mem.issueVldu(i).bits 1601 } 1602 (0 until LduCnt).foreach{i=> 1603 loadUnits(i).io.vecldout.ready := vlMergeBuffer.io.fromPipeline(i).ready 1604 loadMisalignBuffer.io.vecWriteBack.ready := true.B 1605 1606 if (i == MisalignWBPort) { 1607 when(loadUnits(i).io.vecldout.valid) { 1608 vlMergeBuffer.io.fromPipeline(i).valid := loadUnits(i).io.vecldout.valid 1609 vlMergeBuffer.io.fromPipeline(i).bits := loadUnits(i).io.vecldout.bits 1610 } .otherwise { 1611 vlMergeBuffer.io.fromPipeline(i).valid := loadMisalignBuffer.io.vecWriteBack.valid 1612 vlMergeBuffer.io.fromPipeline(i).bits := loadMisalignBuffer.io.vecWriteBack.bits 1613 } 1614 } else { 1615 vlMergeBuffer.io.fromPipeline(i).valid := loadUnits(i).io.vecldout.valid 1616 vlMergeBuffer.io.fromPipeline(i).bits := loadUnits(i).io.vecldout.bits 1617 } 1618 } 1619 1620 (0 until StaCnt).foreach{i=> 1621 if(i < VstuCnt){ 1622 storeUnits(i).io.vecstout.ready := true.B 1623 storeMisalignBuffer.io.vecWriteBack(i).ready := vsMergeBuffer(i).io.fromPipeline.head.ready 1624 1625 when(storeUnits(i).io.vecstout.valid) { 1626 vsMergeBuffer(i).io.fromPipeline.head.valid := storeUnits(i).io.vecstout.valid 1627 vsMergeBuffer(i).io.fromPipeline.head.bits := storeUnits(i).io.vecstout.bits 1628 } .otherwise { 1629 vsMergeBuffer(i).io.fromPipeline.head.valid := storeMisalignBuffer.io.vecWriteBack(i).valid 1630 vsMergeBuffer(i).io.fromPipeline.head.bits := storeMisalignBuffer.io.vecWriteBack(i).bits 1631 } 1632 } 1633 } 1634 1635 (0 until VlduCnt).foreach{i=> 1636 io.ooo_to_mem.issueVldu(i).ready := vLoadCanAccept(i) || vStoreCanAccept(i) 1637 } 1638 1639 vlMergeBuffer.io.redirect <> redirect 1640 vsMergeBuffer.map(_.io.redirect <> redirect) 1641 (0 until VlduCnt).foreach{i=> 1642 vlMergeBuffer.io.toLsq(i) <> lsq.io.ldvecFeedback(i) 1643 } 1644 (0 until VstuCnt).foreach{i=> 1645 vsMergeBuffer(i).io.toLsq.head <> lsq.io.stvecFeedback(i) 1646 } 1647 1648 (0 until VlduCnt).foreach{i=> 1649 // send to RS 1650 vlMergeBuffer.io.feedback(i) <> io.mem_to_ooo.vlduIqFeedback(i).feedbackSlow 1651 io.mem_to_ooo.vlduIqFeedback(i).feedbackFast := DontCare 1652 } 1653 (0 until VstuCnt).foreach{i => 1654 // send to RS 1655 if (i == 0){ 1656 io.mem_to_ooo.vstuIqFeedback(i).feedbackSlow.valid := vsMergeBuffer(i).io.feedback.head.valid || vSegmentUnit.io.feedback.valid 1657 io.mem_to_ooo.vstuIqFeedback(i).feedbackSlow.bits := Mux1H(Seq( 1658 vSegmentUnit.io.feedback.valid -> vSegmentUnit.io.feedback.bits, 1659 vsMergeBuffer(i).io.feedback.head.valid -> vsMergeBuffer(i).io.feedback.head.bits 1660 )) 1661 io.mem_to_ooo.vstuIqFeedback(i).feedbackFast := DontCare 1662 } else { 1663 vsMergeBuffer(i).io.feedback.head <> io.mem_to_ooo.vstuIqFeedback(i).feedbackSlow 1664 io.mem_to_ooo.vstuIqFeedback(i).feedbackFast := DontCare 1665 } 1666 } 1667 1668 (0 until VlduCnt).foreach{i=> 1669 if (i == 0){ // for segmentUnit, segmentUnit use port0 writeback 1670 io.mem_to_ooo.writebackVldu(i).valid := vlMergeBuffer.io.uopWriteback(i).valid || vsMergeBuffer(i).io.uopWriteback.head.valid || vSegmentUnit.io.uopwriteback.valid 1671 io.mem_to_ooo.writebackVldu(i).bits := PriorityMux(Seq( 1672 vSegmentUnit.io.uopwriteback.valid -> vSegmentUnit.io.uopwriteback.bits, 1673 vlMergeBuffer.io.uopWriteback(i).valid -> vlMergeBuffer.io.uopWriteback(i).bits, 1674 vsMergeBuffer(i).io.uopWriteback.head.valid -> vsMergeBuffer(i).io.uopWriteback.head.bits, 1675 )) 1676 vlMergeBuffer.io.uopWriteback(i).ready := io.mem_to_ooo.writebackVldu(i).ready && !vSegmentUnit.io.uopwriteback.valid 1677 vsMergeBuffer(i).io.uopWriteback.head.ready := io.mem_to_ooo.writebackVldu(i).ready && !vlMergeBuffer.io.uopWriteback(i).valid && !vSegmentUnit.io.uopwriteback.valid 1678 vSegmentUnit.io.uopwriteback.ready := io.mem_to_ooo.writebackVldu(i).ready 1679 } else if (i == 1) { 1680 io.mem_to_ooo.writebackVldu(i).valid := vlMergeBuffer.io.uopWriteback(i).valid || vsMergeBuffer(i).io.uopWriteback.head.valid || vfofBuffer.io.uopWriteback.valid 1681 io.mem_to_ooo.writebackVldu(i).bits := PriorityMux(Seq( 1682 vfofBuffer.io.uopWriteback.valid -> vfofBuffer.io.uopWriteback.bits, 1683 vlMergeBuffer.io.uopWriteback(i).valid -> vlMergeBuffer.io.uopWriteback(i).bits, 1684 vsMergeBuffer(i).io.uopWriteback.head.valid -> vsMergeBuffer(i).io.uopWriteback.head.bits, 1685 )) 1686 vlMergeBuffer.io.uopWriteback(i).ready := io.mem_to_ooo.writebackVldu(i).ready && !vfofBuffer.io.uopWriteback.valid 1687 vsMergeBuffer(i).io.uopWriteback.head.ready := io.mem_to_ooo.writebackVldu(i).ready && !vlMergeBuffer.io.uopWriteback(i).valid && !vfofBuffer.io.uopWriteback.valid 1688 vfofBuffer.io.uopWriteback.ready := io.mem_to_ooo.writebackVldu(i).ready 1689 } else { 1690 io.mem_to_ooo.writebackVldu(i).valid := vlMergeBuffer.io.uopWriteback(i).valid || vsMergeBuffer(i).io.uopWriteback.head.valid 1691 io.mem_to_ooo.writebackVldu(i).bits := PriorityMux(Seq( 1692 vlMergeBuffer.io.uopWriteback(i).valid -> vlMergeBuffer.io.uopWriteback(i).bits, 1693 vsMergeBuffer(i).io.uopWriteback.head.valid -> vsMergeBuffer(i).io.uopWriteback.head.bits, 1694 )) 1695 vlMergeBuffer.io.uopWriteback(i).ready := io.mem_to_ooo.writebackVldu(i).ready 1696 vsMergeBuffer(i).io.uopWriteback.head.ready := io.mem_to_ooo.writebackVldu(i).ready && !vlMergeBuffer.io.uopWriteback(i).valid 1697 } 1698 1699 vfofBuffer.io.mergeUopWriteback(i).valid := vlMergeBuffer.io.uopWriteback(i).valid 1700 vfofBuffer.io.mergeUopWriteback(i).bits := vlMergeBuffer.io.uopWriteback(i).bits 1701 } 1702 1703 1704 vfofBuffer.io.redirect <> redirect 1705 1706 // Sbuffer 1707 sbuffer.io.csrCtrl <> csrCtrl 1708 sbuffer.io.dcache <> dcache.io.lsu.store 1709 sbuffer.io.memSetPattenDetected := dcache.io.memSetPattenDetected 1710 sbuffer.io.force_write <> lsq.io.force_write 1711 // flush sbuffer 1712 val cmoFlush = lsq.io.flushSbuffer.valid 1713 val fenceFlush = io.ooo_to_mem.flushSb 1714 val atomicsFlush = atomicsUnit.io.flush_sbuffer.valid || vSegmentUnit.io.flush_sbuffer.valid 1715 val stIsEmpty = sbuffer.io.flush.empty && uncache.io.flush.empty 1716 io.mem_to_ooo.sbIsEmpty := RegNext(stIsEmpty) 1717 1718 // if both of them tries to flush sbuffer at the same time 1719 // something must have gone wrong 1720 assert(!(fenceFlush && atomicsFlush && cmoFlush)) 1721 sbuffer.io.flush.valid := RegNext(fenceFlush || atomicsFlush || cmoFlush) 1722 uncache.io.flush.valid := sbuffer.io.flush.valid 1723 1724 // AtomicsUnit: AtomicsUnit will override other control signials, 1725 // as atomics insts (LR/SC/AMO) will block the pipeline 1726 val s_normal +: s_atomics = Enum(StaCnt + HyuCnt + 1) 1727 val state = RegInit(s_normal) 1728 1729 val st_atomics = Seq.tabulate(StaCnt)(i => 1730 io.ooo_to_mem.issueSta(i).valid && FuType.storeIsAMO((io.ooo_to_mem.issueSta(i).bits.uop.fuType)) 1731 ) ++ Seq.tabulate(HyuCnt)(i => 1732 io.ooo_to_mem.issueHya(i).valid && FuType.storeIsAMO((io.ooo_to_mem.issueHya(i).bits.uop.fuType)) 1733 ) 1734 1735 for (i <- 0 until StaCnt) when(st_atomics(i)) { 1736 io.ooo_to_mem.issueSta(i).ready := atomicsUnit.io.in.ready 1737 storeUnits(i).io.stin.valid := false.B 1738 1739 state := s_atomics(i) 1740 } 1741 for (i <- 0 until HyuCnt) when(st_atomics(StaCnt + i)) { 1742 io.ooo_to_mem.issueHya(i).ready := atomicsUnit.io.in.ready 1743 hybridUnits(i).io.lsin.valid := false.B 1744 1745 state := s_atomics(StaCnt + i) 1746 assert(!st_atomics.zipWithIndex.filterNot(_._2 == StaCnt + i).unzip._1.reduce(_ || _)) 1747 } 1748 when (atomicsUnit.io.out.valid) { 1749 state := s_normal 1750 } 1751 1752 atomicsUnit.io.in.valid := st_atomics.reduce(_ || _) 1753 atomicsUnit.io.in.bits := Mux1H(Seq.tabulate(StaCnt)(i => 1754 st_atomics(i) -> io.ooo_to_mem.issueSta(i).bits) ++ 1755 Seq.tabulate(HyuCnt)(i => st_atomics(StaCnt+i) -> io.ooo_to_mem.issueHya(i).bits)) 1756 atomicsUnit.io.storeDataIn.zipWithIndex.foreach { case (stdin, i) => 1757 stdin.valid := st_data_atomics(i) 1758 stdin.bits := stData(i).bits 1759 } 1760 atomicsUnit.io.redirect <> redirect 1761 1762 // TODO: complete amo's pmp support 1763 val amoTlb = dtlb_ld(0).requestor(0) 1764 atomicsUnit.io.dtlb.resp.valid := false.B 1765 atomicsUnit.io.dtlb.resp.bits := DontCare 1766 atomicsUnit.io.dtlb.req.ready := amoTlb.req.ready 1767 atomicsUnit.io.pmpResp := pmp_check(0).resp 1768 1769 atomicsUnit.io.dcache <> dcache.io.lsu.atomics 1770 atomicsUnit.io.flush_sbuffer.empty := stIsEmpty 1771 1772 atomicsUnit.io.csrCtrl := csrCtrl 1773 1774 // for atomicsUnit, it uses loadUnit(0)'s TLB port 1775 1776 when (state =/= s_normal) { 1777 // use store wb port instead of load 1778 loadUnits(0).io.ldout.ready := false.B 1779 // use load_0's TLB 1780 atomicsUnit.io.dtlb <> amoTlb 1781 1782 // hw prefetch should be disabled while executing atomic insts 1783 loadUnits.map(i => i.io.prefetch_req.valid := false.B) 1784 1785 // make sure there's no in-flight uops in load unit 1786 assert(!loadUnits(0).io.ldout.valid) 1787 } 1788 1789 lsq.io.flushSbuffer.empty := sbuffer.io.sbempty 1790 1791 for (i <- 0 until StaCnt) { 1792 when (state === s_atomics(i)) { 1793 io.mem_to_ooo.staIqFeedback(i).feedbackSlow := atomicsUnit.io.feedbackSlow 1794 assert(!storeUnits(i).io.feedback_slow.valid) 1795 } 1796 } 1797 for (i <- 0 until HyuCnt) { 1798 when (state === s_atomics(StaCnt + i)) { 1799 io.mem_to_ooo.hyuIqFeedback(i).feedbackSlow := atomicsUnit.io.feedbackSlow 1800 assert(!hybridUnits(i).io.feedback_slow.valid) 1801 } 1802 } 1803 1804 lsq.io.exceptionAddr.isStore := io.ooo_to_mem.isStoreException 1805 // Exception address is used several cycles after flush. 1806 // We delay it by 10 cycles to ensure its flush safety. 1807 val atomicsException = RegInit(false.B) 1808 when (DelayN(redirect.valid, 10) && atomicsException) { 1809 atomicsException := false.B 1810 }.elsewhen (atomicsUnit.io.exceptionInfo.valid) { 1811 atomicsException := true.B 1812 } 1813 1814 val misalignBufExceptionOverwrite = loadMisalignBuffer.io.overwriteExpBuf.valid || storeMisalignBuffer.io.overwriteExpBuf.valid 1815 val misalignBufExceptionVaddr = Mux(loadMisalignBuffer.io.overwriteExpBuf.valid, 1816 loadMisalignBuffer.io.overwriteExpBuf.vaddr, 1817 storeMisalignBuffer.io.overwriteExpBuf.vaddr 1818 ) 1819 val misalignBufExceptionIsHyper = Mux(loadMisalignBuffer.io.overwriteExpBuf.valid, 1820 loadMisalignBuffer.io.overwriteExpBuf.isHyper, 1821 storeMisalignBuffer.io.overwriteExpBuf.isHyper 1822 ) 1823 val misalignBufExceptionGpaddr = Mux(loadMisalignBuffer.io.overwriteExpBuf.valid, 1824 loadMisalignBuffer.io.overwriteExpBuf.gpaddr, 1825 storeMisalignBuffer.io.overwriteExpBuf.gpaddr 1826 ) 1827 val misalignBufExceptionIsForVSnonLeafPTE = Mux(loadMisalignBuffer.io.overwriteExpBuf.valid, 1828 loadMisalignBuffer.io.overwriteExpBuf.isForVSnonLeafPTE, 1829 storeMisalignBuffer.io.overwriteExpBuf.isForVSnonLeafPTE 1830 ) 1831 1832 val vSegmentException = RegInit(false.B) 1833 when (DelayN(redirect.valid, 10) && vSegmentException) { 1834 vSegmentException := false.B 1835 }.elsewhen (vSegmentUnit.io.exceptionInfo.valid) { 1836 vSegmentException := true.B 1837 } 1838 val atomicsExceptionAddress = RegEnable(atomicsUnit.io.exceptionInfo.bits.vaddr, atomicsUnit.io.exceptionInfo.valid) 1839 val vSegmentExceptionVstart = RegEnable(vSegmentUnit.io.exceptionInfo.bits.vstart, vSegmentUnit.io.exceptionInfo.valid) 1840 val vSegmentExceptionVl = RegEnable(vSegmentUnit.io.exceptionInfo.bits.vl, vSegmentUnit.io.exceptionInfo.valid) 1841 val vSegmentExceptionAddress = RegEnable(vSegmentUnit.io.exceptionInfo.bits.vaddr, vSegmentUnit.io.exceptionInfo.valid) 1842 val atomicsExceptionGPAddress = RegEnable(atomicsUnit.io.exceptionInfo.bits.gpaddr, atomicsUnit.io.exceptionInfo.valid) 1843 val vSegmentExceptionGPAddress = RegEnable(vSegmentUnit.io.exceptionInfo.bits.gpaddr, vSegmentUnit.io.exceptionInfo.valid) 1844 val atomicsExceptionIsForVSnonLeafPTE = RegEnable(atomicsUnit.io.exceptionInfo.bits.isForVSnonLeafPTE, atomicsUnit.io.exceptionInfo.valid) 1845 val vSegmentExceptionIsForVSnonLeafPTE = RegEnable(vSegmentUnit.io.exceptionInfo.bits.isForVSnonLeafPTE, vSegmentUnit.io.exceptionInfo.valid) 1846 1847 val exceptionVaddr = Mux( 1848 atomicsException, 1849 atomicsExceptionAddress, 1850 Mux(misalignBufExceptionOverwrite, 1851 misalignBufExceptionVaddr, 1852 Mux(vSegmentException, 1853 vSegmentExceptionAddress, 1854 lsq.io.exceptionAddr.vaddr 1855 ) 1856 ) 1857 ) 1858 // whether vaddr need ext or is hyper inst: 1859 // VaNeedExt: atomicsException -> false; misalignBufExceptionOverwrite -> true; vSegmentException -> false 1860 // IsHyper: atomicsException -> false; vSegmentException -> false 1861 val exceptionVaNeedExt = !atomicsException && 1862 (misalignBufExceptionOverwrite || 1863 (!vSegmentException && lsq.io.exceptionAddr.vaNeedExt)) 1864 val exceptionIsHyper = !atomicsException && 1865 (misalignBufExceptionOverwrite && misalignBufExceptionIsHyper || 1866 (!vSegmentException && lsq.io.exceptionAddr.isHyper && !misalignBufExceptionOverwrite)) 1867 1868 def GenExceptionVa( 1869 mode: UInt, isVirt: Bool, vaNeedExt: Bool, 1870 satp: TlbSatpBundle, vsatp: TlbSatpBundle, hgatp: TlbHgatpBundle, 1871 vaddr: UInt 1872 ) = { 1873 require(VAddrBits >= 50) 1874 1875 val satpNone = satp.mode === 0.U 1876 val satpSv39 = satp.mode === 8.U 1877 val satpSv48 = satp.mode === 9.U 1878 1879 val vsatpNone = vsatp.mode === 0.U 1880 val vsatpSv39 = vsatp.mode === 8.U 1881 val vsatpSv48 = vsatp.mode === 9.U 1882 1883 val hgatpNone = hgatp.mode === 0.U 1884 val hgatpSv39x4 = hgatp.mode === 8.U 1885 val hgatpSv48x4 = hgatp.mode === 9.U 1886 1887 // For !isVirt, mode check is necessary, as we don't want virtual memory in M-mode. 1888 // For isVirt, mode check is unnecessary, as virt won't be 1 in M-mode. 1889 // Also, isVirt includes Hyper Insts, which don't care mode either. 1890 1891 val useBareAddr = 1892 (isVirt && vsatpNone && hgatpNone) || 1893 (!isVirt && (mode === CSRConst.ModeM)) || 1894 (!isVirt && (mode =/= CSRConst.ModeM) && satpNone) 1895 val useSv39Addr = 1896 (isVirt && vsatpSv39) || 1897 (!isVirt && (mode =/= CSRConst.ModeM) && satpSv39) 1898 val useSv48Addr = 1899 (isVirt && vsatpSv48) || 1900 (!isVirt && (mode =/= CSRConst.ModeM) && satpSv48) 1901 val useSv39x4Addr = isVirt && vsatpNone && hgatpSv39x4 1902 val useSv48x4Addr = isVirt && vsatpNone && hgatpSv48x4 1903 1904 val bareAddr = ZeroExt(vaddr(PAddrBits - 1, 0), XLEN) 1905 val sv39Addr = SignExt(vaddr.take(39), XLEN) 1906 val sv39x4Addr = ZeroExt(vaddr.take(39 + 2), XLEN) 1907 val sv48Addr = SignExt(vaddr.take(48), XLEN) 1908 val sv48x4Addr = ZeroExt(vaddr.take(48 + 2), XLEN) 1909 1910 val ExceptionVa = Wire(UInt(XLEN.W)) 1911 when (vaNeedExt) { 1912 ExceptionVa := Mux1H(Seq( 1913 (useBareAddr) -> bareAddr, 1914 (useSv39Addr) -> sv39Addr, 1915 (useSv48Addr) -> sv48Addr, 1916 (useSv39x4Addr) -> sv39x4Addr, 1917 (useSv48x4Addr) -> sv48x4Addr, 1918 )) 1919 } .otherwise { 1920 ExceptionVa := vaddr 1921 } 1922 1923 ExceptionVa 1924 } 1925 1926 io.mem_to_ooo.lsqio.vaddr := RegNext( 1927 GenExceptionVa(tlbcsr.priv.dmode, tlbcsr.priv.virt || exceptionIsHyper, exceptionVaNeedExt, 1928 tlbcsr.satp, tlbcsr.vsatp, tlbcsr.hgatp, exceptionVaddr) 1929 ) 1930 1931 // vsegment instruction is executed atomic, which mean atomicsException and vSegmentException should not raise at the same time. 1932 XSError(atomicsException && vSegmentException, "atomicsException and vSegmentException raise at the same time!") 1933 io.mem_to_ooo.lsqio.vstart := RegNext(Mux(vSegmentException, 1934 vSegmentExceptionVstart, 1935 lsq.io.exceptionAddr.vstart) 1936 ) 1937 io.mem_to_ooo.lsqio.vl := RegNext(Mux(vSegmentException, 1938 vSegmentExceptionVl, 1939 lsq.io.exceptionAddr.vl) 1940 ) 1941 1942 XSError(atomicsException && atomicsUnit.io.in.valid, "new instruction before exception triggers\n") 1943 io.mem_to_ooo.lsqio.gpaddr := RegNext(Mux( 1944 atomicsException, 1945 atomicsExceptionGPAddress, 1946 Mux(misalignBufExceptionOverwrite, 1947 misalignBufExceptionGpaddr, 1948 Mux(vSegmentException, 1949 vSegmentExceptionGPAddress, 1950 lsq.io.exceptionAddr.gpaddr 1951 ) 1952 ) 1953 )) 1954 io.mem_to_ooo.lsqio.isForVSnonLeafPTE := RegNext(Mux( 1955 atomicsException, 1956 atomicsExceptionIsForVSnonLeafPTE, 1957 Mux(misalignBufExceptionOverwrite, 1958 misalignBufExceptionIsForVSnonLeafPTE, 1959 Mux(vSegmentException, 1960 vSegmentExceptionIsForVSnonLeafPTE, 1961 lsq.io.exceptionAddr.isForVSnonLeafPTE 1962 ) 1963 ) 1964 )) 1965 io.mem_to_ooo.topToBackendBypass match { case x => 1966 x.hartId := io.hartId 1967 x.l2FlushDone := RegNext(io.l2_flush_done) 1968 x.externalInterrupt.msip := outer.clint_int_sink.in.head._1(0) 1969 x.externalInterrupt.mtip := outer.clint_int_sink.in.head._1(1) 1970 x.externalInterrupt.meip := outer.plic_int_sink.in.head._1(0) 1971 x.externalInterrupt.seip := outer.plic_int_sink.in.last._1(0) 1972 x.externalInterrupt.debug := outer.debug_int_sink.in.head._1(0) 1973 x.externalInterrupt.nmi.nmi_31 := outer.nmi_int_sink.in.head._1(0) | outer.beu_local_int_sink.in.head._1(0) 1974 x.externalInterrupt.nmi.nmi_43 := outer.nmi_int_sink.in.head._1(1) 1975 x.msiInfo := DelayNWithValid(io.fromTopToBackend.msiInfo, 1) 1976 x.clintTime := DelayNWithValid(io.fromTopToBackend.clintTime, 1) 1977 } 1978 1979 io.memInfo.sqFull := RegNext(lsq.io.sqFull) 1980 io.memInfo.lqFull := RegNext(lsq.io.lqFull) 1981 io.memInfo.dcacheMSHRFull := RegNext(dcache.io.mshrFull) 1982 1983 io.inner_hartId := io.hartId 1984 io.inner_reset_vector := RegNext(io.outer_reset_vector) 1985 io.outer_cpu_halt := io.ooo_to_mem.backendToTopBypass.cpuHalted 1986 io.outer_l2_flush_en := io.ooo_to_mem.csrCtrl.flush_l2_enable 1987 io.outer_power_down_en := io.ooo_to_mem.csrCtrl.power_down_enable 1988 io.outer_cpu_critical_error := io.ooo_to_mem.backendToTopBypass.cpuCriticalError 1989 io.outer_msi_ack := io.ooo_to_mem.backendToTopBypass.msiAck 1990 io.outer_beu_errors_icache := RegNext(io.inner_beu_errors_icache) 1991 io.inner_hc_perfEvents <> RegNext(io.outer_hc_perfEvents) 1992 1993 // vector segmentUnit 1994 vSegmentUnit.io.in.bits <> io.ooo_to_mem.issueVldu.head.bits 1995 vSegmentUnit.io.in.valid := isSegment && io.ooo_to_mem.issueVldu.head.valid// is segment instruction 1996 vSegmentUnit.io.dtlb.resp.bits <> dtlb_reqs.take(LduCnt).head.resp.bits 1997 vSegmentUnit.io.dtlb.resp.valid <> dtlb_reqs.take(LduCnt).head.resp.valid 1998 vSegmentUnit.io.pmpResp <> pmp_check.head.resp 1999 vSegmentUnit.io.flush_sbuffer.empty := stIsEmpty 2000 vSegmentUnit.io.redirect <> redirect 2001 vSegmentUnit.io.rdcache.resp.bits := dcache.io.lsu.load(0).resp.bits 2002 vSegmentUnit.io.rdcache.resp.valid := dcache.io.lsu.load(0).resp.valid 2003 vSegmentUnit.io.rdcache.s2_bank_conflict := dcache.io.lsu.load(0).s2_bank_conflict 2004 // ------------------------- 2005 // Vector Segment Triggers 2006 // ------------------------- 2007 vSegmentUnit.io.fromCsrTrigger.tdataVec := tdata 2008 vSegmentUnit.io.fromCsrTrigger.tEnableVec := tEnable 2009 vSegmentUnit.io.fromCsrTrigger.triggerCanRaiseBpExp := triggerCanRaiseBpExp 2010 vSegmentUnit.io.fromCsrTrigger.debugMode := debugMode 2011 2012 // reset tree of MemBlock 2013 if (p(DebugOptionsKey).ResetGen) { 2014 val leftResetTree = ResetGenNode( 2015 Seq( 2016 ModuleNode(ptw), 2017 ModuleNode(ptw_to_l2_buffer), 2018 ModuleNode(lsq), 2019 ModuleNode(dtlb_st_tlb_st), 2020 ModuleNode(dtlb_prefetch_tlb_prefetch), 2021 ModuleNode(pmp) 2022 ) 2023 ++ pmp_checkers.map(ModuleNode(_)) 2024 ++ (if (prefetcherOpt.isDefined) Seq(ModuleNode(prefetcherOpt.get)) else Nil) 2025 ++ (if (l1PrefetcherOpt.isDefined) Seq(ModuleNode(l1PrefetcherOpt.get)) else Nil) 2026 ) 2027 val rightResetTree = ResetGenNode( 2028 Seq( 2029 ModuleNode(sbuffer), 2030 ModuleNode(dtlb_ld_tlb_ld), 2031 ModuleNode(dcache), 2032 ModuleNode(l1d_to_l2_buffer), 2033 CellNode(io.reset_backend) 2034 ) 2035 ) 2036 ResetGen(leftResetTree, reset, sim = false, io.sramTestBypass.fromL2Top.mbistReset) 2037 ResetGen(rightResetTree, reset, sim = false, io.sramTestBypass.fromL2Top.mbistReset) 2038 } else { 2039 io.reset_backend := DontCare 2040 } 2041 io.resetInFrontendBypass.toL2Top := io.resetInFrontendBypass.fromFrontend 2042 // trace interface 2043 val traceToL2Top = io.traceCoreInterfaceBypass.toL2Top 2044 val traceFromBackend = io.traceCoreInterfaceBypass.fromBackend 2045 traceFromBackend.fromEncoder := RegNext(traceToL2Top.fromEncoder) 2046 traceToL2Top.toEncoder.trap := RegEnable( 2047 traceFromBackend.toEncoder.trap, 2048 traceFromBackend.toEncoder.groups(0).valid && Itype.isTrap(traceFromBackend.toEncoder.groups(0).bits.itype) 2049 ) 2050 traceToL2Top.toEncoder.priv := RegEnable( 2051 traceFromBackend.toEncoder.priv, 2052 traceFromBackend.toEncoder.groups(0).valid 2053 ) 2054 (0 until TraceGroupNum).foreach { i => 2055 traceToL2Top.toEncoder.groups(i).valid := RegNext(traceFromBackend.toEncoder.groups(i).valid) 2056 traceToL2Top.toEncoder.groups(i).bits.iretire := RegNext(traceFromBackend.toEncoder.groups(i).bits.iretire) 2057 traceToL2Top.toEncoder.groups(i).bits.itype := RegNext(traceFromBackend.toEncoder.groups(i).bits.itype) 2058 traceToL2Top.toEncoder.groups(i).bits.ilastsize := RegEnable( 2059 traceFromBackend.toEncoder.groups(i).bits.ilastsize, 2060 traceFromBackend.toEncoder.groups(i).valid 2061 ) 2062 traceToL2Top.toEncoder.groups(i).bits.iaddr := RegEnable( 2063 traceFromBackend.toEncoder.groups(i).bits.iaddr, 2064 traceFromBackend.toEncoder.groups(i).valid 2065 ) + (RegEnable( 2066 traceFromBackend.toEncoder.groups(i).bits.ftqOffset.getOrElse(0.U), 2067 traceFromBackend.toEncoder.groups(i).valid 2068 ) << instOffsetBits) 2069 } 2070 2071 2072 io.mem_to_ooo.storeDebugInfo := DontCare 2073 // store event difftest information 2074 if (env.EnableDifftest) { 2075 (0 until EnsbufferWidth).foreach{i => 2076 io.mem_to_ooo.storeDebugInfo(i).robidx := sbuffer.io.vecDifftestInfo(i).bits.robIdx 2077 sbuffer.io.vecDifftestInfo(i).bits.pc := io.mem_to_ooo.storeDebugInfo(i).pc 2078 } 2079 } 2080 2081 // top-down info 2082 dcache.io.debugTopDown.robHeadVaddr := io.debugTopDown.robHeadVaddr 2083 dtlbRepeater.io.debugTopDown.robHeadVaddr := io.debugTopDown.robHeadVaddr 2084 lsq.io.debugTopDown.robHeadVaddr := io.debugTopDown.robHeadVaddr 2085 io.debugTopDown.toCore.robHeadMissInDCache := dcache.io.debugTopDown.robHeadMissInDCache 2086 io.debugTopDown.toCore.robHeadTlbReplay := lsq.io.debugTopDown.robHeadTlbReplay 2087 io.debugTopDown.toCore.robHeadTlbMiss := lsq.io.debugTopDown.robHeadTlbMiss 2088 io.debugTopDown.toCore.robHeadLoadVio := lsq.io.debugTopDown.robHeadLoadVio 2089 io.debugTopDown.toCore.robHeadLoadMSHR := lsq.io.debugTopDown.robHeadLoadMSHR 2090 dcache.io.debugTopDown.robHeadOtherReplay := lsq.io.debugTopDown.robHeadOtherReplay 2091 dcache.io.debugRolling := io.debugRolling 2092 2093 lsq.io.noUopsIssued := io.topDownInfo.toBackend.noUopsIssued 2094 io.topDownInfo.toBackend.lqEmpty := lsq.io.lqEmpty 2095 io.topDownInfo.toBackend.sqEmpty := lsq.io.sqEmpty 2096 io.topDownInfo.toBackend.l1Miss := dcache.io.l1Miss 2097 io.topDownInfo.toBackend.l2TopMiss.l2Miss := RegNext(io.topDownInfo.fromL2Top.l2Miss) 2098 io.topDownInfo.toBackend.l2TopMiss.l3Miss := RegNext(io.topDownInfo.fromL2Top.l3Miss) 2099 2100 val hyLdDeqCount = PopCount(io.ooo_to_mem.issueHya.map(x => x.valid && FuType.isLoad(x.bits.uop.fuType))) 2101 val hyStDeqCount = PopCount(io.ooo_to_mem.issueHya.map(x => x.valid && FuType.isStore(x.bits.uop.fuType))) 2102 val ldDeqCount = PopCount(io.ooo_to_mem.issueLda.map(_.valid)) +& hyLdDeqCount 2103 val stDeqCount = PopCount(io.ooo_to_mem.issueSta.take(StaCnt).map(_.valid)) +& hyStDeqCount 2104 val iqDeqCount = ldDeqCount +& stDeqCount 2105 XSPerfAccumulate("load_iq_deq_count", ldDeqCount) 2106 XSPerfHistogram("load_iq_deq_count", ldDeqCount, true.B, 0, LdExuCnt + 1) 2107 XSPerfAccumulate("store_iq_deq_count", stDeqCount) 2108 XSPerfHistogram("store_iq_deq_count", stDeqCount, true.B, 0, StAddrCnt + 1) 2109 XSPerfAccumulate("ls_iq_deq_count", iqDeqCount) 2110 2111 val pfevent = Module(new PFEvent) 2112 pfevent.io.distribute_csr := csrCtrl.distribute_csr 2113 val csrevents = pfevent.io.hpmevent.slice(16,24) 2114 2115 val perfFromUnits = (loadUnits ++ Seq(sbuffer, lsq, dcache)).flatMap(_.getPerfEvents) 2116 val perfFromPTW = perfEventsPTW.map(x => ("PTW_" + x._1, x._2)) 2117 val perfBlock = Seq(("ldDeqCount", ldDeqCount), 2118 ("stDeqCount", stDeqCount)) 2119 // let index = 0 be no event 2120 val allPerfEvents = Seq(("noEvent", 0.U)) ++ perfFromUnits ++ perfFromPTW ++ perfBlock 2121 2122 if (printEventCoding) { 2123 for (((name, inc), i) <- allPerfEvents.zipWithIndex) { 2124 println("MemBlock perfEvents Set", name, inc, i) 2125 } 2126 } 2127 2128 val allPerfInc = allPerfEvents.map(_._2.asTypeOf(new PerfEvent)) 2129 val perfEvents = HPerfMonitor(csrevents, allPerfInc).getPerfEvents 2130 generatePerfEvent() 2131 2132 private val mbistPl = MbistPipeline.PlaceMbistPipeline(Int.MaxValue, "MbistPipeMemBlk", hasMbist) 2133 private val mbistIntf = if(hasMbist) { 2134 val params = mbistPl.get.nodeParams 2135 val intf = Some(Module(new MbistInterface( 2136 params = Seq(params), 2137 ids = Seq(mbistPl.get.childrenIds), 2138 name = s"MbistIntfMemBlk", 2139 pipelineNum = 1 2140 ))) 2141 intf.get.toPipeline.head <> mbistPl.get.mbist 2142 mbistPl.get.registerCSV(intf.get.info, "MbistMemBlk") 2143 intf.get.mbist := DontCare 2144 dontTouch(intf.get.mbist) 2145 //TODO: add mbist controller connections here 2146 intf 2147 } else { 2148 None 2149 } 2150 private val sigFromSrams = if (hasSramTest) Some(SramHelper.genBroadCastBundleTop()) else None 2151 private val cg = ClockGate.genTeSrc 2152 dontTouch(cg) 2153 2154 sigFromSrams.foreach({ case sig => sig.mbist := DontCare }) 2155 if (hasMbist) { 2156 sigFromSrams.get.mbist := io.sramTestBypass.fromL2Top.mbist.get 2157 io.sramTestBypass.toFrontend.mbist.get := io.sramTestBypass.fromL2Top.mbist.get 2158 io.sramTestBypass.toFrontend.mbistReset.get := io.sramTestBypass.fromL2Top.mbistReset.get 2159 io.sramTestBypass.toBackend.mbist.get := io.sramTestBypass.fromL2Top.mbist.get 2160 io.sramTestBypass.toBackend.mbistReset.get := io.sramTestBypass.fromL2Top.mbistReset.get 2161 cg.cgen := io.sramTestBypass.fromL2Top.mbist.get.cgen 2162 } else { 2163 cg.cgen := false.B 2164 } 2165 2166 // sram debug 2167 val sramCtl = Option.when(hasSramCtl)(RegNext(io.sramTestBypass.fromL2Top.sramCtl.get)) 2168 sigFromSrams.foreach({ case sig => sig.sramCtl := DontCare }) 2169 sigFromSrams.zip(sramCtl).foreach { 2170 case (sig, ctl) => 2171 sig.sramCtl.RTSEL := ctl(1, 0) // CFG[1 : 0] 2172 sig.sramCtl.WTSEL := ctl(3, 2) // CFG[3 : 2] 2173 sig.sramCtl.MCR := ctl(5, 4) // CFG[5 : 4] 2174 sig.sramCtl.MCW := ctl(7, 6) // CFG[7 : 6] 2175 } 2176 if (hasSramCtl) { 2177 io.sramTestBypass.toFrontend.sramCtl.get := sramCtl.get 2178 } 2179} 2180 2181class MemBlock()(implicit p: Parameters) extends LazyModule 2182 with HasXSParameter { 2183 override def shouldBeInlined: Boolean = false 2184 2185 val inner = LazyModule(new MemBlockInlined()) 2186 2187 lazy val module = new MemBlockImp(this) 2188} 2189 2190class MemBlockImp(wrapper: MemBlock) extends LazyModuleImp(wrapper) { 2191 val io = IO(wrapper.inner.module.io.cloneType) 2192 val io_perf = IO(wrapper.inner.module.io_perf.cloneType) 2193 io <> wrapper.inner.module.io 2194 io_perf <> wrapper.inner.module.io_perf 2195 2196 if (p(DebugOptionsKey).ResetGen) { 2197 ResetGen( 2198 ResetGenNode(Seq(ModuleNode(wrapper.inner.module))), 2199 reset, sim = false, io.sramTestBypass.fromL2Top.mbistReset 2200 ) 2201 } 2202} 2203