1c590fb32Scz4e/*************************************************************************************** 2c590fb32Scz4e* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences 3c590fb32Scz4e* Copyright (c) 2020-2021 Peng Cheng Laboratory 4c590fb32Scz4e* 5c590fb32Scz4e* XiangShan is licensed under Mulan PSL v2. 6c590fb32Scz4e* You can use this software according to the terms and conditions of the Mulan PSL v2. 7c590fb32Scz4e* You may obtain a copy of Mulan PSL v2 at: 8c590fb32Scz4e* http://license.coscl.org.cn/MulanPSL2 9c590fb32Scz4e* 10c590fb32Scz4e* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 11c590fb32Scz4e* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 12c590fb32Scz4e* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 13c590fb32Scz4e* 14c590fb32Scz4e* See the Mulan PSL v2 for more details. 15c590fb32Scz4e***************************************************************************************/ 16c590fb32Scz4e 17c590fb32Scz4epackage xiangshan.mem 18c590fb32Scz4e 19c590fb32Scz4eimport org.chipsalliance.cde.config.Parameters 20c590fb32Scz4eimport chisel3._ 21c590fb32Scz4eimport chisel3.util._ 22c590fb32Scz4eimport freechips.rocketchip.diplomacy._ 23c590fb32Scz4eimport freechips.rocketchip.diplomacy.{BundleBridgeSource, LazyModule, LazyModuleImp} 24c590fb32Scz4eimport freechips.rocketchip.interrupts.{IntSinkNode, IntSinkPortSimple} 25c590fb32Scz4eimport freechips.rocketchip.tile.HasFPUParameters 26c590fb32Scz4eimport freechips.rocketchip.tilelink._ 27c590fb32Scz4eimport utils._ 28c590fb32Scz4eimport utility._ 29602aa9f1Scz4eimport utility.mbist.{MbistInterface, MbistPipeline} 30602aa9f1Scz4eimport utility.sram.{SramMbistBundle, SramBroadcastBundle, SramHelper} 318cfc24b2STang Haojinimport system.{HasSoCParameter, SoCParamsKey} 32c590fb32Scz4eimport xiangshan._ 33c590fb32Scz4eimport xiangshan.ExceptionNO._ 34c590fb32Scz4eimport xiangshan.frontend.HasInstrMMIOConst 35c590fb32Scz4eimport xiangshan.backend.Bundles.{DynInst, MemExuInput, MemExuOutput} 36c590fb32Scz4eimport xiangshan.backend.ctrlblock.{DebugLSIO, LsTopdownInfo} 37c590fb32Scz4eimport xiangshan.backend.exu.MemExeUnit 38c590fb32Scz4eimport xiangshan.backend.fu._ 39c590fb32Scz4eimport xiangshan.backend.fu.FuType._ 40a67fd0f5SGuanghui Chengimport xiangshan.backend.fu.NewCSR.{CsrTriggerBundle, TriggerUtil, PFEvent} 41075d4937Sjunxiong-jiimport xiangshan.backend.fu.util.{CSRConst, SdtrigExt} 42c590fb32Scz4eimport xiangshan.backend.{BackendToTopBundle, TopToBackendBundle} 43c590fb32Scz4eimport xiangshan.backend.rob.{RobDebugRollingIO, RobPtr, RobLsqIO} 44c590fb32Scz4eimport xiangshan.backend.datapath.NewPipelineConnect 45c590fb32Scz4eimport xiangshan.backend.trace.{Itype, TraceCoreInterface} 46c590fb32Scz4eimport xiangshan.backend.Bundles._ 47c590fb32Scz4eimport xiangshan.mem._ 48c590fb32Scz4eimport xiangshan.mem.mdp._ 499e12e8edScz4eimport xiangshan.mem.Bundles._ 50c590fb32Scz4eimport xiangshan.mem.prefetch.{BasePrefecher, L1Prefetcher, SMSParams, SMSPrefetcher} 51c590fb32Scz4eimport xiangshan.cache._ 52c590fb32Scz4eimport xiangshan.cache.mmu._ 534b2c87baS梁森 Liang Senimport coupledL2.PrefetchRecv 548cfc24b2STang Haojinimport utility.mbist.{MbistInterface, MbistPipeline} 558cfc24b2STang Haojinimport utility.sram.{SramBroadcastBundle, SramHelper} 56602aa9f1Scz4e 57c590fb32Scz4etrait HasMemBlockParameters extends HasXSParameter { 58c590fb32Scz4e // number of memory units 59c590fb32Scz4e val LduCnt = backendParams.LduCnt 60c590fb32Scz4e val StaCnt = backendParams.StaCnt 61c590fb32Scz4e val StdCnt = backendParams.StdCnt 62c590fb32Scz4e val HyuCnt = backendParams.HyuCnt 63c590fb32Scz4e val VlduCnt = backendParams.VlduCnt 64c590fb32Scz4e val VstuCnt = backendParams.VstuCnt 65c590fb32Scz4e 66c590fb32Scz4e val LdExuCnt = LduCnt + HyuCnt 67c590fb32Scz4e val StAddrCnt = StaCnt + HyuCnt 68c590fb32Scz4e val StDataCnt = StdCnt 69c590fb32Scz4e val MemExuCnt = LduCnt + HyuCnt + StaCnt + StdCnt 70c590fb32Scz4e val MemAddrExtCnt = LdExuCnt + StaCnt 71c590fb32Scz4e val MemVExuCnt = VlduCnt + VstuCnt 72c590fb32Scz4e 73c590fb32Scz4e val AtomicWBPort = 0 74c590fb32Scz4e val MisalignWBPort = 1 75c590fb32Scz4e val UncacheWBPort = 2 76c590fb32Scz4e val NCWBPorts = Seq(1, 2) 77c590fb32Scz4e} 78c590fb32Scz4e 79c590fb32Scz4eabstract class MemBlockBundle(implicit val p: Parameters) extends Bundle with HasMemBlockParameters 80c590fb32Scz4e 81c590fb32Scz4eclass Std(cfg: FuConfig)(implicit p: Parameters) extends FuncUnit(cfg) { 82c590fb32Scz4e io.in.ready := io.out.ready 83c590fb32Scz4e io.out.valid := io.in.valid 84c590fb32Scz4e io.out.bits := 0.U.asTypeOf(io.out.bits) 85c590fb32Scz4e io.out.bits.res.data := io.in.bits.data.src(0) 86c590fb32Scz4e io.out.bits.ctrl.robIdx := io.in.bits.ctrl.robIdx 87c590fb32Scz4e} 88c590fb32Scz4e 89c590fb32Scz4eclass ooo_to_mem(implicit p: Parameters) extends MemBlockBundle { 90c590fb32Scz4e val backendToTopBypass = Flipped(new BackendToTopBundle) 91c590fb32Scz4e 92c590fb32Scz4e val loadFastMatch = Vec(LdExuCnt, Input(UInt(LdExuCnt.W))) 93c590fb32Scz4e val loadFastFuOpType = Vec(LdExuCnt, Input(FuOpType())) 94c590fb32Scz4e val loadFastImm = Vec(LdExuCnt, Input(UInt(12.W))) 95c590fb32Scz4e val sfence = Input(new SfenceBundle) 96c590fb32Scz4e val tlbCsr = Input(new TlbCsrBundle) 97c590fb32Scz4e val lsqio = new Bundle { 98c590fb32Scz4e val lcommit = Input(UInt(log2Up(CommitWidth + 1).W)) 99c590fb32Scz4e val scommit = Input(UInt(log2Up(CommitWidth + 1).W)) 100c590fb32Scz4e val pendingMMIOld = Input(Bool()) 101c590fb32Scz4e val pendingld = Input(Bool()) 102c590fb32Scz4e val pendingst = Input(Bool()) 103c590fb32Scz4e val pendingVst = Input(Bool()) 104c590fb32Scz4e val commit = Input(Bool()) 105c590fb32Scz4e val pendingPtr = Input(new RobPtr) 106c590fb32Scz4e val pendingPtrNext = Input(new RobPtr) 107c590fb32Scz4e } 108c590fb32Scz4e 109c590fb32Scz4e val isStoreException = Input(Bool()) 110c590fb32Scz4e val isVlsException = Input(Bool()) 111c590fb32Scz4e val csrCtrl = Flipped(new CustomCSRCtrlIO) 112c590fb32Scz4e val enqLsq = new LsqEnqIO 113c590fb32Scz4e val flushSb = Input(Bool()) 114c590fb32Scz4e 115c590fb32Scz4e val storePc = Vec(StaCnt, Input(UInt(VAddrBits.W))) // for hw prefetch 116c590fb32Scz4e val hybridPc = Vec(HyuCnt, Input(UInt(VAddrBits.W))) // for hw prefetch 117c590fb32Scz4e 118c590fb32Scz4e val issueLda = MixedVec(Seq.fill(LduCnt)(Flipped(DecoupledIO(new MemExuInput)))) 119c590fb32Scz4e val issueSta = MixedVec(Seq.fill(StaCnt)(Flipped(DecoupledIO(new MemExuInput)))) 120c590fb32Scz4e val issueStd = MixedVec(Seq.fill(StdCnt)(Flipped(DecoupledIO(new MemExuInput)))) 121c590fb32Scz4e val issueHya = MixedVec(Seq.fill(HyuCnt)(Flipped(DecoupledIO(new MemExuInput)))) 122c590fb32Scz4e val issueVldu = MixedVec(Seq.fill(VlduCnt)(Flipped(DecoupledIO(new MemExuInput(isVector=true))))) 123c590fb32Scz4e 124c590fb32Scz4e def issueUops = issueLda ++ issueSta ++ issueStd ++ issueHya ++ issueVldu 125c590fb32Scz4e} 126c590fb32Scz4e 127c590fb32Scz4eclass mem_to_ooo(implicit p: Parameters) extends MemBlockBundle { 128c590fb32Scz4e val topToBackendBypass = new TopToBackendBundle 129c590fb32Scz4e 130c590fb32Scz4e val otherFastWakeup = Vec(LdExuCnt, ValidIO(new DynInst)) 131c590fb32Scz4e val lqCancelCnt = Output(UInt(log2Up(VirtualLoadQueueSize + 1).W)) 132c590fb32Scz4e val sqCancelCnt = Output(UInt(log2Up(StoreQueueSize + 1).W)) 133c590fb32Scz4e val sqDeq = Output(UInt(log2Ceil(EnsbufferWidth + 1).W)) 134c590fb32Scz4e val lqDeq = Output(UInt(log2Up(CommitWidth + 1).W)) 135c590fb32Scz4e // used by VLSU issue queue, the vector store would wait all store before it, and the vector load would wait all load 136c590fb32Scz4e val sqDeqPtr = Output(new SqPtr) 137c590fb32Scz4e val lqDeqPtr = Output(new LqPtr) 138c590fb32Scz4e val stIn = Vec(StAddrCnt, ValidIO(new MemExuInput)) 139c590fb32Scz4e val stIssuePtr = Output(new SqPtr()) 140c590fb32Scz4e 141c590fb32Scz4e val memoryViolation = ValidIO(new Redirect) 142c590fb32Scz4e val sbIsEmpty = Output(Bool()) 143c590fb32Scz4e 144c590fb32Scz4e val lsTopdownInfo = Vec(LdExuCnt, Output(new LsTopdownInfo)) 145c590fb32Scz4e 146c590fb32Scz4e val lsqio = new Bundle { 147c590fb32Scz4e val vaddr = Output(UInt(XLEN.W)) 148c590fb32Scz4e val vstart = Output(UInt((log2Up(VLEN) + 1).W)) 149c590fb32Scz4e val vl = Output(UInt((log2Up(VLEN) + 1).W)) 150c590fb32Scz4e val gpaddr = Output(UInt(XLEN.W)) 151c590fb32Scz4e val isForVSnonLeafPTE = Output(Bool()) 152c590fb32Scz4e val mmio = Output(Vec(LoadPipelineWidth, Bool())) 153c590fb32Scz4e val uop = Output(Vec(LoadPipelineWidth, new DynInst)) 154c590fb32Scz4e val lqCanAccept = Output(Bool()) 155c590fb32Scz4e val sqCanAccept = Output(Bool()) 156c590fb32Scz4e } 157c590fb32Scz4e 158c590fb32Scz4e val storeDebugInfo = Vec(EnsbufferWidth, new Bundle { 159c590fb32Scz4e val robidx = Output(new RobPtr) 160c590fb32Scz4e val pc = Input(UInt(VAddrBits.W)) 161c590fb32Scz4e }) 162c590fb32Scz4e 163c590fb32Scz4e val writebackLda = Vec(LduCnt, DecoupledIO(new MemExuOutput)) 164c590fb32Scz4e val writebackSta = Vec(StaCnt, DecoupledIO(new MemExuOutput)) 165c590fb32Scz4e val writebackStd = Vec(StdCnt, DecoupledIO(new MemExuOutput)) 166c590fb32Scz4e val writebackHyuLda = Vec(HyuCnt, DecoupledIO(new MemExuOutput)) 167c590fb32Scz4e val writebackHyuSta = Vec(HyuCnt, DecoupledIO(new MemExuOutput)) 168c590fb32Scz4e val writebackVldu = Vec(VlduCnt, DecoupledIO(new MemExuOutput(isVector = true))) 169c590fb32Scz4e def writeBack: Seq[DecoupledIO[MemExuOutput]] = { 170c590fb32Scz4e writebackSta ++ 171c590fb32Scz4e writebackHyuLda ++ writebackHyuSta ++ 172c590fb32Scz4e writebackLda ++ 173c590fb32Scz4e writebackVldu ++ 174c590fb32Scz4e writebackStd 175c590fb32Scz4e } 176c590fb32Scz4e 177c590fb32Scz4e val ldaIqFeedback = Vec(LduCnt, new MemRSFeedbackIO) 178c590fb32Scz4e val staIqFeedback = Vec(StaCnt, new MemRSFeedbackIO) 179c590fb32Scz4e val hyuIqFeedback = Vec(HyuCnt, new MemRSFeedbackIO) 180c590fb32Scz4e val vstuIqFeedback= Vec(VstuCnt, new MemRSFeedbackIO(isVector = true)) 181c590fb32Scz4e val vlduIqFeedback= Vec(VlduCnt, new MemRSFeedbackIO(isVector = true)) 182c590fb32Scz4e val ldCancel = Vec(backendParams.LdExuCnt, new LoadCancelIO) 183c590fb32Scz4e val wakeup = Vec(backendParams.LdExuCnt, Valid(new DynInst)) 184c590fb32Scz4e 185c590fb32Scz4e val s3_delayed_load_error = Vec(LdExuCnt, Output(Bool())) 186c590fb32Scz4e} 187c590fb32Scz4e 188c590fb32Scz4eclass MemCoreTopDownIO extends Bundle { 189c590fb32Scz4e val robHeadMissInDCache = Output(Bool()) 190c590fb32Scz4e val robHeadTlbReplay = Output(Bool()) 191c590fb32Scz4e val robHeadTlbMiss = Output(Bool()) 192c590fb32Scz4e val robHeadLoadVio = Output(Bool()) 193c590fb32Scz4e val robHeadLoadMSHR = Output(Bool()) 194c590fb32Scz4e} 195c590fb32Scz4e 196c590fb32Scz4eclass fetch_to_mem(implicit p: Parameters) extends XSBundle{ 197c590fb32Scz4e val itlb = Flipped(new TlbPtwIO()) 198c590fb32Scz4e} 199c590fb32Scz4e 200c590fb32Scz4e// triple buffer applied in i-mmio path (two at MemBlock, one at L2Top) 201c590fb32Scz4eclass InstrUncacheBuffer()(implicit p: Parameters) extends LazyModule with HasInstrMMIOConst { 202c590fb32Scz4e val node = new TLBufferNode(BufferParams.default, BufferParams.default, BufferParams.default, BufferParams.default, BufferParams.default) 203c590fb32Scz4e lazy val module = new InstrUncacheBufferImpl 204c590fb32Scz4e 205c590fb32Scz4e class InstrUncacheBufferImpl extends LazyModuleImp(this) { 206c590fb32Scz4e (node.in zip node.out) foreach { case ((in, edgeIn), (out, edgeOut)) => 207c590fb32Scz4e out.a <> BufferParams.default(BufferParams.default(in.a)) 208c590fb32Scz4e in.d <> BufferParams.default(BufferParams.default(out.d)) 209c590fb32Scz4e 210c590fb32Scz4e // only a.valid, a.ready, a.address can change 211c590fb32Scz4e // hoping that the rest would be optimized to keep MemBlock port unchanged after adding buffer 212c590fb32Scz4e out.a.bits.data := 0.U 213c590fb32Scz4e out.a.bits.mask := Fill(mmioBusBytes, 1.U(1.W)) 214c590fb32Scz4e out.a.bits.opcode := 4.U // Get 215c590fb32Scz4e out.a.bits.size := log2Ceil(mmioBusBytes).U 216c590fb32Scz4e out.a.bits.source := 0.U 217c590fb32Scz4e } 218c590fb32Scz4e } 219c590fb32Scz4e} 220c590fb32Scz4e 221c590fb32Scz4e// triple buffer applied in L1I$-L2 path (two at MemBlock, one at L2Top) 222c590fb32Scz4eclass ICacheBuffer()(implicit p: Parameters) extends LazyModule { 223c590fb32Scz4e val node = new TLBufferNode(BufferParams.default, BufferParams.default, BufferParams.default, BufferParams.default, BufferParams.default) 224c590fb32Scz4e lazy val module = new ICacheBufferImpl 225c590fb32Scz4e 226c590fb32Scz4e class ICacheBufferImpl extends LazyModuleImp(this) { 227c590fb32Scz4e (node.in zip node.out) foreach { case ((in, edgeIn), (out, edgeOut)) => 228c590fb32Scz4e out.a <> BufferParams.default(BufferParams.default(in.a)) 229c590fb32Scz4e in.d <> BufferParams.default(BufferParams.default(out.d)) 230c590fb32Scz4e } 231c590fb32Scz4e } 232c590fb32Scz4e} 233c590fb32Scz4e 234c590fb32Scz4eclass ICacheCtrlBuffer()(implicit p: Parameters) extends LazyModule { 235c590fb32Scz4e val node = new TLBufferNode(BufferParams.default, BufferParams.default, BufferParams.default, BufferParams.default, BufferParams.default) 236c590fb32Scz4e lazy val module = new ICacheCtrlBufferImpl 237c590fb32Scz4e 238c590fb32Scz4e class ICacheCtrlBufferImpl extends LazyModuleImp(this) { 239c590fb32Scz4e (node.in zip node.out) foreach { case ((in, edgeIn), (out, edgeOut)) => 240c590fb32Scz4e out.a <> BufferParams.default(BufferParams.default(in.a)) 241c590fb32Scz4e in.d <> BufferParams.default(BufferParams.default(out.d)) 242c590fb32Scz4e } 243c590fb32Scz4e } 244c590fb32Scz4e} 245c590fb32Scz4e 246c590fb32Scz4e// Frontend bus goes through MemBlock 247c590fb32Scz4eclass FrontendBridge()(implicit p: Parameters) extends LazyModule { 248c590fb32Scz4e val icache_node = LazyModule(new ICacheBuffer()).suggestName("icache").node// to keep IO port name 249c590fb32Scz4e val icachectrl_node = LazyModule(new ICacheCtrlBuffer()).suggestName("icachectrl").node 250c590fb32Scz4e val instr_uncache_node = LazyModule(new InstrUncacheBuffer()).suggestName("instr_uncache").node 251c590fb32Scz4e lazy val module = new LazyModuleImp(this) { 252c590fb32Scz4e } 253c590fb32Scz4e} 254c590fb32Scz4e 255c590fb32Scz4eclass MemBlockInlined()(implicit p: Parameters) extends LazyModule 256c590fb32Scz4e with HasXSParameter { 257c590fb32Scz4e override def shouldBeInlined: Boolean = true 258c590fb32Scz4e 259c590fb32Scz4e val dcache = LazyModule(new DCacheWrapper()) 260c590fb32Scz4e val uncache = LazyModule(new Uncache()) 261c590fb32Scz4e val uncache_port = TLTempNode() 262c590fb32Scz4e val uncache_xbar = TLXbar() 263c590fb32Scz4e val ptw = LazyModule(new L2TLBWrapper()) 264c590fb32Scz4e val ptw_to_l2_buffer = if (!coreParams.softPTW) LazyModule(new TLBuffer) else null 265c590fb32Scz4e val l1d_to_l2_buffer = if (coreParams.dcacheParametersOpt.nonEmpty) LazyModule(new TLBuffer) else null 266c590fb32Scz4e val dcache_port = TLNameNode("dcache_client") // to keep dcache-L2 port name 267c590fb32Scz4e val l2_pf_sender_opt = coreParams.prefetcher.map(_ => 268c590fb32Scz4e BundleBridgeSource(() => new PrefetchRecv) 269c590fb32Scz4e ) 270c590fb32Scz4e val l3_pf_sender_opt = if (p(SoCParamsKey).L3CacheParamsOpt.nonEmpty) coreParams.prefetcher.map(_ => 271c590fb32Scz4e BundleBridgeSource(() => new huancun.PrefetchRecv) 272c590fb32Scz4e ) else None 273c590fb32Scz4e val frontendBridge = LazyModule(new FrontendBridge) 274c590fb32Scz4e // interrupt sinks 275c590fb32Scz4e val clint_int_sink = IntSinkNode(IntSinkPortSimple(1, 2)) 276c590fb32Scz4e val debug_int_sink = IntSinkNode(IntSinkPortSimple(1, 1)) 277c590fb32Scz4e val plic_int_sink = IntSinkNode(IntSinkPortSimple(2, 1)) 278c590fb32Scz4e val nmi_int_sink = IntSinkNode(IntSinkPortSimple(1, (new NonmaskableInterruptIO).elements.size)) 27976cb49abScz4e val beu_local_int_sink = IntSinkNode(IntSinkPortSimple(1, 1)) 280c590fb32Scz4e 281c590fb32Scz4e if (!coreParams.softPTW) { 282c590fb32Scz4e ptw_to_l2_buffer.node := ptw.node 283c590fb32Scz4e } 284c590fb32Scz4e uncache_xbar := TLBuffer() := uncache.clientNode 285c590fb32Scz4e if (dcache.uncacheNode.isDefined) { 286c590fb32Scz4e dcache.uncacheNode.get := TLBuffer.chainNode(2) := uncache_xbar 287c590fb32Scz4e } 288c590fb32Scz4e uncache_port := TLBuffer.chainNode(2) := uncache_xbar 289c590fb32Scz4e 290c590fb32Scz4e lazy val module = new MemBlockInlinedImp(this) 291c590fb32Scz4e} 292c590fb32Scz4e 293c590fb32Scz4eclass MemBlockInlinedImp(outer: MemBlockInlined) extends LazyModuleImp(outer) 294c590fb32Scz4e with HasXSParameter 295c590fb32Scz4e with HasFPUParameters 296c590fb32Scz4e with HasPerfEvents 2978cfc24b2STang Haojin with HasSoCParameter 298c590fb32Scz4e with HasL1PrefetchSourceParameter 299c590fb32Scz4e with HasCircularQueuePtrHelper 300c590fb32Scz4e with HasMemBlockParameters 301c590fb32Scz4e with HasTlbConst 302c590fb32Scz4e with SdtrigExt 303c590fb32Scz4e{ 304c590fb32Scz4e val io = IO(new Bundle { 305c590fb32Scz4e val hartId = Input(UInt(hartIdLen.W)) 306c590fb32Scz4e val redirect = Flipped(ValidIO(new Redirect)) 307c590fb32Scz4e 308c590fb32Scz4e val ooo_to_mem = new ooo_to_mem 309c590fb32Scz4e val mem_to_ooo = new mem_to_ooo 310c590fb32Scz4e val fetch_to_mem = new fetch_to_mem 311c590fb32Scz4e 312c590fb32Scz4e val ifetchPrefetch = Vec(LduCnt, ValidIO(new SoftIfetchPrefetchBundle)) 313c590fb32Scz4e 314c590fb32Scz4e // misc 315c590fb32Scz4e val error = ValidIO(new L1CacheErrorInfo) 316c590fb32Scz4e val memInfo = new Bundle { 317c590fb32Scz4e val sqFull = Output(Bool()) 318c590fb32Scz4e val lqFull = Output(Bool()) 319c590fb32Scz4e val dcacheMSHRFull = Output(Bool()) 320c590fb32Scz4e } 321c590fb32Scz4e val debug_ls = new DebugLSIO 322c590fb32Scz4e val l2_hint = Input(Valid(new L2ToL1Hint())) 323c590fb32Scz4e val l2PfqBusy = Input(Bool()) 324c590fb32Scz4e val l2_tlb_req = Flipped(new TlbRequestIO(nRespDups = 2)) 325c590fb32Scz4e val l2_pmp_resp = new PMPRespBundle 326c590fb32Scz4e val l2_flush_done = Input(Bool()) 327c590fb32Scz4e 328c590fb32Scz4e val debugTopDown = new Bundle { 329c590fb32Scz4e val robHeadVaddr = Flipped(Valid(UInt(VAddrBits.W))) 330c590fb32Scz4e val toCore = new MemCoreTopDownIO 331c590fb32Scz4e } 332c590fb32Scz4e val debugRolling = Flipped(new RobDebugRollingIO) 333c590fb32Scz4e 334c590fb32Scz4e // All the signals from/to frontend/backend to/from bus will go through MemBlock 335c590fb32Scz4e val fromTopToBackend = Input(new Bundle { 3368cfc24b2STang Haojin val msiInfo = ValidIO(UInt(soc.IMSICParams.MSI_INFO_WIDTH.W)) 337c590fb32Scz4e val clintTime = ValidIO(UInt(64.W)) 338c590fb32Scz4e }) 339c590fb32Scz4e val inner_hartId = Output(UInt(hartIdLen.W)) 340c590fb32Scz4e val inner_reset_vector = Output(UInt(PAddrBits.W)) 341c590fb32Scz4e val outer_reset_vector = Input(UInt(PAddrBits.W)) 342c590fb32Scz4e val outer_cpu_halt = Output(Bool()) 343c590fb32Scz4e val outer_l2_flush_en = Output(Bool()) 344c590fb32Scz4e val outer_power_down_en = Output(Bool()) 345c590fb32Scz4e val outer_cpu_critical_error = Output(Bool()) 3468cfc24b2STang Haojin val outer_msi_ack = Output(Bool()) 347c590fb32Scz4e val inner_beu_errors_icache = Input(new L1BusErrorUnitInfo) 348c590fb32Scz4e val outer_beu_errors_icache = Output(new L1BusErrorUnitInfo) 349c590fb32Scz4e val inner_hc_perfEvents = Output(Vec(numPCntHc * coreParams.L2NBanks + 1, new PerfEvent)) 350c590fb32Scz4e val outer_hc_perfEvents = Input(Vec(numPCntHc * coreParams.L2NBanks + 1, new PerfEvent)) 351c590fb32Scz4e 352c590fb32Scz4e // reset signals of frontend & backend are generated in memblock 353c590fb32Scz4e val reset_backend = Output(Reset()) 354c590fb32Scz4e // Reset singal from frontend. 355c590fb32Scz4e val resetInFrontendBypass = new Bundle{ 356c590fb32Scz4e val fromFrontend = Input(Bool()) 357c590fb32Scz4e val toL2Top = Output(Bool()) 358c590fb32Scz4e } 359c590fb32Scz4e val traceCoreInterfaceBypass = new Bundle{ 360c590fb32Scz4e val fromBackend = Flipped(new TraceCoreInterface(hasOffset = true)) 361c590fb32Scz4e val toL2Top = new TraceCoreInterface 362c590fb32Scz4e } 363c590fb32Scz4e 364c590fb32Scz4e val topDownInfo = new Bundle { 365c590fb32Scz4e val fromL2Top = Input(new TopDownFromL2Top) 366c590fb32Scz4e val toBackend = Flipped(new TopDownInfo) 367c590fb32Scz4e } 368602aa9f1Scz4e val sramTestBypass = new Bundle() { 369602aa9f1Scz4e val fromL2Top = new Bundle() { 370602aa9f1Scz4e val mbist = Option.when(hasMbist)(Input(new SramMbistBundle)) 371602aa9f1Scz4e val mbistReset = Option.when(hasMbist)(Input(new DFTResetSignals())) 372602aa9f1Scz4e val sramCtl = Option.when(hasSramCtl)(Input(UInt(64.W))) 373602aa9f1Scz4e } 374602aa9f1Scz4e val toFrontend = new Bundle() { 375602aa9f1Scz4e val mbist = Option.when(hasMbist)(Output(new SramMbistBundle)) 376602aa9f1Scz4e val mbistReset = Option.when(hasMbist)(Output(new DFTResetSignals())) 377602aa9f1Scz4e val sramCtl = Option.when(hasSramCtl)(Output(UInt(64.W))) 378602aa9f1Scz4e } 379602aa9f1Scz4e val toBackend = new Bundle() { 380602aa9f1Scz4e val mbist = Option.when(hasMbist)(Output(new SramMbistBundle)) 381602aa9f1Scz4e val mbistReset = Option.when(hasMbist)(Output(new DFTResetSignals())) 382602aa9f1Scz4e } 383602aa9f1Scz4e } 384c590fb32Scz4e }) 385c590fb32Scz4e 386*1592abd1SYan Xu io.mem_to_ooo.writeBack.zipWithIndex.foreach{ case (wb, i) => 387*1592abd1SYan Xu PerfCCT.updateInstPos(wb.bits.uop.debug_seqNum, PerfCCT.InstPos.AtBypassVal.id.U, wb.valid, clock, reset) 388*1592abd1SYan Xu } 389*1592abd1SYan Xu 390c590fb32Scz4e dontTouch(io.inner_hartId) 391c590fb32Scz4e dontTouch(io.inner_reset_vector) 392c590fb32Scz4e dontTouch(io.outer_reset_vector) 393c590fb32Scz4e dontTouch(io.outer_cpu_halt) 394c590fb32Scz4e dontTouch(io.outer_l2_flush_en) 395c590fb32Scz4e dontTouch(io.outer_power_down_en) 396c590fb32Scz4e dontTouch(io.outer_cpu_critical_error) 397c590fb32Scz4e dontTouch(io.inner_beu_errors_icache) 398c590fb32Scz4e dontTouch(io.outer_beu_errors_icache) 399c590fb32Scz4e dontTouch(io.inner_hc_perfEvents) 400c590fb32Scz4e dontTouch(io.outer_hc_perfEvents) 401c590fb32Scz4e 402c590fb32Scz4e val redirect = RegNextWithEnable(io.redirect) 403c590fb32Scz4e 404c590fb32Scz4e private val dcache = outer.dcache.module 405c590fb32Scz4e val uncache = outer.uncache.module 406c590fb32Scz4e 407c590fb32Scz4e //val delayedDcacheRefill = RegNext(dcache.io.lsu.lsq) 408c590fb32Scz4e 409c590fb32Scz4e val csrCtrl = DelayN(io.ooo_to_mem.csrCtrl, 2) 410c590fb32Scz4e dcache.io.l2_pf_store_only := RegNext(io.ooo_to_mem.csrCtrl.pf_ctrl.l2_pf_store_only, false.B) 411c590fb32Scz4e io.error <> DelayNWithValid(dcache.io.error, 2) 412c590fb32Scz4e when(!csrCtrl.cache_error_enable){ 413c590fb32Scz4e io.error.bits.report_to_beu := false.B 414c590fb32Scz4e io.error.valid := false.B 415c590fb32Scz4e } 416c590fb32Scz4e 417c590fb32Scz4e val loadUnits = Seq.fill(LduCnt)(Module(new LoadUnit)) 418c590fb32Scz4e val storeUnits = Seq.fill(StaCnt)(Module(new StoreUnit)) 419c590fb32Scz4e val stdExeUnits = Seq.fill(StdCnt)(Module(new MemExeUnit(backendParams.memSchdParams.get.issueBlockParams.find(_.StdCnt != 0).get.exuBlockParams.head))) 420c590fb32Scz4e val hybridUnits = Seq.fill(HyuCnt)(Module(new HybridUnit)) // Todo: replace it with HybridUnit 421c590fb32Scz4e val stData = stdExeUnits.map(_.io.out) 422c590fb32Scz4e val exeUnits = loadUnits ++ storeUnits 423c590fb32Scz4e 424c590fb32Scz4e // The number of vector load/store units is decoupled with the number of load/store units 425c590fb32Scz4e val vlSplit = Seq.fill(VlduCnt)(Module(new VLSplitImp)) 426c590fb32Scz4e val vsSplit = Seq.fill(VstuCnt)(Module(new VSSplitImp)) 427c590fb32Scz4e val vlMergeBuffer = Module(new VLMergeBufferImp) 428c590fb32Scz4e val vsMergeBuffer = Seq.fill(VstuCnt)(Module(new VSMergeBufferImp)) 429c590fb32Scz4e val vSegmentUnit = Module(new VSegmentUnit) 430c590fb32Scz4e val vfofBuffer = Module(new VfofBuffer) 431c590fb32Scz4e 432c590fb32Scz4e // misalign Buffer 433c590fb32Scz4e val loadMisalignBuffer = Module(new LoadMisalignBuffer) 434c590fb32Scz4e val storeMisalignBuffer = Module(new StoreMisalignBuffer) 435c590fb32Scz4e 436c590fb32Scz4e val l1_pf_req = Wire(Decoupled(new L1PrefetchReq())) 437c590fb32Scz4e dcache.io.sms_agt_evict_req.ready := false.B 438c590fb32Scz4e val prefetcherOpt: Option[BasePrefecher] = coreParams.prefetcher.map { 439c590fb32Scz4e case _: SMSParams => 440c590fb32Scz4e val sms = Module(new SMSPrefetcher()) 441c590fb32Scz4e sms.io_agt_en := GatedRegNextN(io.ooo_to_mem.csrCtrl.pf_ctrl.l1D_pf_enable_agt, 2, Some(false.B)) 442c590fb32Scz4e sms.io_pht_en := GatedRegNextN(io.ooo_to_mem.csrCtrl.pf_ctrl.l1D_pf_enable_pht, 2, Some(false.B)) 443c590fb32Scz4e sms.io_act_threshold := GatedRegNextN(io.ooo_to_mem.csrCtrl.pf_ctrl.l1D_pf_active_threshold, 2, Some(12.U)) 444c590fb32Scz4e sms.io_act_stride := GatedRegNextN(io.ooo_to_mem.csrCtrl.pf_ctrl.l1D_pf_active_stride, 2, Some(30.U)) 445c590fb32Scz4e sms.io_stride_en := false.B 446c590fb32Scz4e sms.io_dcache_evict <> dcache.io.sms_agt_evict_req 4474b2c87baS梁森 Liang Sen val mbistSmsPl = MbistPipeline.PlaceMbistPipeline(1, "MbistPipeSms", hasMbist) 448c590fb32Scz4e sms 449c590fb32Scz4e } 450c590fb32Scz4e prefetcherOpt.foreach{ pf => pf.io.l1_req.ready := false.B } 451c590fb32Scz4e val hartId = p(XSCoreParamsKey).HartId 452c590fb32Scz4e val l1PrefetcherOpt: Option[BasePrefecher] = coreParams.prefetcher.map { 453c590fb32Scz4e case _ => 454c590fb32Scz4e val l1Prefetcher = Module(new L1Prefetcher()) 4559db05eaeScz4e val enableL1StreamPrefetcher = Constantin.createRecord(s"enableL1StreamPrefetcher$hartId", initValue = true) 4569db05eaeScz4e l1Prefetcher.io.enable := enableL1StreamPrefetcher && 4579db05eaeScz4e GatedRegNextN(io.ooo_to_mem.csrCtrl.pf_ctrl.l1D_pf_enable, 2, Some(false.B)) 458c590fb32Scz4e l1Prefetcher.pf_ctrl <> dcache.io.pf_ctrl 459c590fb32Scz4e l1Prefetcher.l2PfqBusy := io.l2PfqBusy 460c590fb32Scz4e 461c590fb32Scz4e // stride will train on miss or prefetch hit 462c590fb32Scz4e for (i <- 0 until LduCnt) { 463c590fb32Scz4e val source = loadUnits(i).io.prefetch_train_l1 464c590fb32Scz4e l1Prefetcher.stride_train(i).valid := source.valid && source.bits.isFirstIssue && ( 465c590fb32Scz4e source.bits.miss || isFromStride(source.bits.meta_prefetch) 466c590fb32Scz4e ) 467c590fb32Scz4e l1Prefetcher.stride_train(i).bits := source.bits 468c590fb32Scz4e val loadPc = RegNext(io.ooo_to_mem.issueLda(i).bits.uop.pc) // for s1 469c590fb32Scz4e l1Prefetcher.stride_train(i).bits.uop.pc := Mux( 470c590fb32Scz4e loadUnits(i).io.s2_ptr_chasing, 471c590fb32Scz4e RegEnable(loadPc, loadUnits(i).io.s2_prefetch_spec), 472c590fb32Scz4e RegEnable(RegEnable(loadPc, loadUnits(i).io.s1_prefetch_spec), loadUnits(i).io.s2_prefetch_spec) 473c590fb32Scz4e ) 474c590fb32Scz4e } 475c590fb32Scz4e for (i <- 0 until HyuCnt) { 476c590fb32Scz4e val source = hybridUnits(i).io.prefetch_train_l1 477c590fb32Scz4e l1Prefetcher.stride_train.drop(LduCnt)(i).valid := source.valid && source.bits.isFirstIssue && ( 478c590fb32Scz4e source.bits.miss || isFromStride(source.bits.meta_prefetch) 479c590fb32Scz4e ) 480c590fb32Scz4e l1Prefetcher.stride_train.drop(LduCnt)(i).bits := source.bits 481c590fb32Scz4e l1Prefetcher.stride_train.drop(LduCnt)(i).bits.uop.pc := Mux( 482c590fb32Scz4e hybridUnits(i).io.ldu_io.s2_ptr_chasing, 483c590fb32Scz4e RegNext(io.ooo_to_mem.hybridPc(i)), 484c590fb32Scz4e RegNext(RegNext(io.ooo_to_mem.hybridPc(i))) 485c590fb32Scz4e ) 486c590fb32Scz4e } 487c590fb32Scz4e l1Prefetcher 488c590fb32Scz4e } 489c590fb32Scz4e // load prefetch to l1 Dcache 490c590fb32Scz4e l1PrefetcherOpt match { 491c590fb32Scz4e case Some(pf) => l1_pf_req <> Pipeline(in = pf.io.l1_req, depth = 1, pipe = false, name = Some("pf_queue_to_ldu_reg")) 492c590fb32Scz4e case None => 493c590fb32Scz4e l1_pf_req.valid := false.B 494c590fb32Scz4e l1_pf_req.bits := DontCare 495c590fb32Scz4e } 496c590fb32Scz4e val pf_train_on_hit = RegNextN(io.ooo_to_mem.csrCtrl.pf_ctrl.l1D_pf_train_on_hit, 2, Some(true.B)) 497c590fb32Scz4e 498c590fb32Scz4e loadUnits.zipWithIndex.map(x => x._1.suggestName("LoadUnit_"+x._2)) 499c590fb32Scz4e storeUnits.zipWithIndex.map(x => x._1.suggestName("StoreUnit_"+x._2)) 500c590fb32Scz4e hybridUnits.zipWithIndex.map(x => x._1.suggestName("HybridUnit_"+x._2)) 501c590fb32Scz4e val atomicsUnit = Module(new AtomicsUnit) 502c590fb32Scz4e 503c590fb32Scz4e 504c590fb32Scz4e val ldaExeWbReqs = Wire(Vec(LduCnt, Decoupled(new MemExuOutput))) 505c590fb32Scz4e // atomicsUnit will overwrite the source from ldu if it is about to writeback 506c590fb32Scz4e val atomicWritebackOverride = Mux( 507c590fb32Scz4e atomicsUnit.io.out.valid, 508c590fb32Scz4e atomicsUnit.io.out.bits, 509c590fb32Scz4e loadUnits(AtomicWBPort).io.ldout.bits 510c590fb32Scz4e ) 511c590fb32Scz4e ldaExeWbReqs(AtomicWBPort).valid := atomicsUnit.io.out.valid || loadUnits(AtomicWBPort).io.ldout.valid 512c590fb32Scz4e ldaExeWbReqs(AtomicWBPort).bits := atomicWritebackOverride 513c590fb32Scz4e atomicsUnit.io.out.ready := ldaExeWbReqs(AtomicWBPort).ready 514c590fb32Scz4e loadUnits(AtomicWBPort).io.ldout.ready := ldaExeWbReqs(AtomicWBPort).ready 515c590fb32Scz4e 516c590fb32Scz4e val st_data_atomics = Seq.tabulate(StdCnt)(i => 517c590fb32Scz4e stData(i).valid && FuType.storeIsAMO(stData(i).bits.uop.fuType) 518c590fb32Scz4e ) 519c590fb32Scz4e 520c590fb32Scz4e // misalignBuffer will overwrite the source from ldu if it is about to writeback 521c590fb32Scz4e val misalignWritebackOverride = Mux( 522c590fb32Scz4e loadUnits(MisalignWBPort).io.ldout.valid, 523c590fb32Scz4e loadUnits(MisalignWBPort).io.ldout.bits, 524c590fb32Scz4e loadMisalignBuffer.io.writeBack.bits 525c590fb32Scz4e ) 526c590fb32Scz4e ldaExeWbReqs(MisalignWBPort).valid := loadMisalignBuffer.io.writeBack.valid || loadUnits(MisalignWBPort).io.ldout.valid 527c590fb32Scz4e ldaExeWbReqs(MisalignWBPort).bits := misalignWritebackOverride 528c590fb32Scz4e loadMisalignBuffer.io.writeBack.ready := ldaExeWbReqs(MisalignWBPort).ready && !loadUnits(MisalignWBPort).io.ldout.valid 529c590fb32Scz4e loadMisalignBuffer.io.loadOutValid := loadUnits(MisalignWBPort).io.ldout.valid 530c590fb32Scz4e loadMisalignBuffer.io.loadVecOutValid := loadUnits(MisalignWBPort).io.vecldout.valid 531c590fb32Scz4e loadUnits(MisalignWBPort).io.ldout.ready := ldaExeWbReqs(MisalignWBPort).ready 532c590fb32Scz4e ldaExeWbReqs(MisalignWBPort).bits.isFromLoadUnit := loadUnits(MisalignWBPort).io.ldout.bits.isFromLoadUnit || loadMisalignBuffer.io.writeBack.valid 533c590fb32Scz4e 534c590fb32Scz4e // loadUnit will overwrite the source from uncache if it is about to writeback 535c590fb32Scz4e ldaExeWbReqs(UncacheWBPort) <> loadUnits(UncacheWBPort).io.ldout 536c590fb32Scz4e io.mem_to_ooo.writebackLda <> ldaExeWbReqs 537c590fb32Scz4e io.mem_to_ooo.writebackSta <> storeUnits.map(_.io.stout) 538c590fb32Scz4e io.mem_to_ooo.writebackStd.zip(stdExeUnits).foreach {x => 539c590fb32Scz4e x._1.bits := x._2.io.out.bits 540c590fb32Scz4e // AMOs do not need to write back std now. 541c590fb32Scz4e x._1.valid := x._2.io.out.fire && !FuType.storeIsAMO(x._2.io.out.bits.uop.fuType) 542c590fb32Scz4e } 543c590fb32Scz4e io.mem_to_ooo.writebackHyuLda <> hybridUnits.map(_.io.ldout) 544c590fb32Scz4e io.mem_to_ooo.writebackHyuSta <> hybridUnits.map(_.io.stout) 545c590fb32Scz4e io.mem_to_ooo.otherFastWakeup := DontCare 546c590fb32Scz4e io.mem_to_ooo.otherFastWakeup.drop(HyuCnt).take(LduCnt).zip(loadUnits.map(_.io.fast_uop)).foreach{case(a,b)=> a := b} 547c590fb32Scz4e io.mem_to_ooo.otherFastWakeup.take(HyuCnt).zip(hybridUnits.map(_.io.ldu_io.fast_uop)).foreach{case(a,b)=> a:=b} 548c590fb32Scz4e val stOut = io.mem_to_ooo.writebackSta ++ io.mem_to_ooo.writebackHyuSta 549c590fb32Scz4e 550c590fb32Scz4e // prefetch to l1 req 551c590fb32Scz4e // Stream's confidence is always 1 552c590fb32Scz4e // (LduCnt + HyuCnt) l1_pf_reqs ? 553c590fb32Scz4e loadUnits.foreach(load_unit => { 554c590fb32Scz4e load_unit.io.prefetch_req.valid <> l1_pf_req.valid 555c590fb32Scz4e load_unit.io.prefetch_req.bits <> l1_pf_req.bits 556c590fb32Scz4e }) 557c590fb32Scz4e 558c590fb32Scz4e hybridUnits.foreach(hybrid_unit => { 559c590fb32Scz4e hybrid_unit.io.ldu_io.prefetch_req.valid <> l1_pf_req.valid 560c590fb32Scz4e hybrid_unit.io.ldu_io.prefetch_req.bits <> l1_pf_req.bits 561c590fb32Scz4e }) 562c590fb32Scz4e 563c590fb32Scz4e // NOTE: loadUnits(0) has higher bank conflict and miss queue arb priority than loadUnits(1) and loadUnits(2) 564c590fb32Scz4e // when loadUnits(1)/loadUnits(2) stage 0 is busy, hw prefetch will never use that pipeline 565c590fb32Scz4e val LowConfPorts = if (LduCnt == 2) Seq(1) else if (LduCnt == 3) Seq(1, 2) else Seq(0) 566c590fb32Scz4e LowConfPorts.map{case i => loadUnits(i).io.prefetch_req.bits.confidence := 0.U} 567c590fb32Scz4e hybridUnits.foreach(hybrid_unit => { hybrid_unit.io.ldu_io.prefetch_req.bits.confidence := 0.U }) 568c590fb32Scz4e 569c590fb32Scz4e val canAcceptHighConfPrefetch = loadUnits.map(_.io.canAcceptHighConfPrefetch) ++ 570c590fb32Scz4e hybridUnits.map(_.io.canAcceptLowConfPrefetch) 571c590fb32Scz4e val canAcceptLowConfPrefetch = loadUnits.map(_.io.canAcceptLowConfPrefetch) ++ 572c590fb32Scz4e hybridUnits.map(_.io.canAcceptLowConfPrefetch) 573c590fb32Scz4e l1_pf_req.ready := (0 until LduCnt + HyuCnt).map{ 574c590fb32Scz4e case i => { 575c590fb32Scz4e if (LowConfPorts.contains(i)) { 576c590fb32Scz4e loadUnits(i).io.canAcceptLowConfPrefetch 577c590fb32Scz4e } else { 578c590fb32Scz4e Mux(l1_pf_req.bits.confidence === 1.U, canAcceptHighConfPrefetch(i), canAcceptLowConfPrefetch(i)) 579c590fb32Scz4e } 580c590fb32Scz4e } 581c590fb32Scz4e }.reduce(_ || _) 582c590fb32Scz4e 583c590fb32Scz4e // l1 pf fuzzer interface 584c590fb32Scz4e val DebugEnableL1PFFuzzer = false 585c590fb32Scz4e if (DebugEnableL1PFFuzzer) { 586c590fb32Scz4e // l1 pf req fuzzer 587c590fb32Scz4e val fuzzer = Module(new L1PrefetchFuzzer()) 588c590fb32Scz4e fuzzer.io.vaddr := DontCare 589c590fb32Scz4e fuzzer.io.paddr := DontCare 590c590fb32Scz4e 591c590fb32Scz4e // override load_unit prefetch_req 592c590fb32Scz4e loadUnits.foreach(load_unit => { 593c590fb32Scz4e load_unit.io.prefetch_req.valid <> fuzzer.io.req.valid 594c590fb32Scz4e load_unit.io.prefetch_req.bits <> fuzzer.io.req.bits 595c590fb32Scz4e }) 596c590fb32Scz4e 597c590fb32Scz4e // override hybrid_unit prefetch_req 598c590fb32Scz4e hybridUnits.foreach(hybrid_unit => { 599c590fb32Scz4e hybrid_unit.io.ldu_io.prefetch_req.valid <> fuzzer.io.req.valid 600c590fb32Scz4e hybrid_unit.io.ldu_io.prefetch_req.bits <> fuzzer.io.req.bits 601c590fb32Scz4e }) 602c590fb32Scz4e 603c590fb32Scz4e fuzzer.io.req.ready := l1_pf_req.ready 604c590fb32Scz4e } 605c590fb32Scz4e 606c590fb32Scz4e // TODO: fast load wakeup 607c590fb32Scz4e val lsq = Module(new LsqWrapper) 608c590fb32Scz4e val sbuffer = Module(new Sbuffer) 609c590fb32Scz4e // if you wants to stress test dcache store, use FakeSbuffer 610c590fb32Scz4e // val sbuffer = Module(new FakeSbuffer) // out of date now 611c590fb32Scz4e io.mem_to_ooo.stIssuePtr := lsq.io.issuePtrExt 612c590fb32Scz4e 613c590fb32Scz4e dcache.io.hartId := io.hartId 614c590fb32Scz4e lsq.io.hartId := io.hartId 615c590fb32Scz4e sbuffer.io.hartId := io.hartId 616c590fb32Scz4e atomicsUnit.io.hartId := io.hartId 617c590fb32Scz4e 618c590fb32Scz4e dcache.io.lqEmpty := lsq.io.lqEmpty 619c590fb32Scz4e 620c590fb32Scz4e // load/store prefetch to l2 cache 621c590fb32Scz4e prefetcherOpt.foreach(sms_pf => { 622c590fb32Scz4e l1PrefetcherOpt.foreach(l1_pf => { 623c590fb32Scz4e val sms_pf_to_l2 = DelayNWithValid(sms_pf.io.l2_req, 2) 624c590fb32Scz4e val l1_pf_to_l2 = DelayNWithValid(l1_pf.io.l2_req, 2) 625c590fb32Scz4e 626c590fb32Scz4e outer.l2_pf_sender_opt.get.out.head._1.addr_valid := sms_pf_to_l2.valid || l1_pf_to_l2.valid 627c590fb32Scz4e outer.l2_pf_sender_opt.get.out.head._1.addr := Mux(l1_pf_to_l2.valid, l1_pf_to_l2.bits.addr, sms_pf_to_l2.bits.addr) 628c590fb32Scz4e outer.l2_pf_sender_opt.get.out.head._1.pf_source := Mux(l1_pf_to_l2.valid, l1_pf_to_l2.bits.source, sms_pf_to_l2.bits.source) 629c590fb32Scz4e outer.l2_pf_sender_opt.get.out.head._1.l2_pf_en := RegNextN(io.ooo_to_mem.csrCtrl.pf_ctrl.l2_pf_enable, 2, Some(true.B)) 630c590fb32Scz4e 631c590fb32Scz4e sms_pf.io.enable := RegNextN(io.ooo_to_mem.csrCtrl.pf_ctrl.l1D_pf_enable, 2, Some(false.B)) 632c590fb32Scz4e 633c590fb32Scz4e val l2_trace = Wire(new LoadPfDbBundle) 634c590fb32Scz4e l2_trace.paddr := outer.l2_pf_sender_opt.get.out.head._1.addr 635c590fb32Scz4e val table = ChiselDB.createTable(s"L2PrefetchTrace$hartId", new LoadPfDbBundle, basicDB = false) 636c590fb32Scz4e table.log(l2_trace, l1_pf_to_l2.valid, "StreamPrefetchTrace", clock, reset) 637c590fb32Scz4e table.log(l2_trace, !l1_pf_to_l2.valid && sms_pf_to_l2.valid, "L2PrefetchTrace", clock, reset) 638c590fb32Scz4e 639c590fb32Scz4e val l1_pf_to_l3 = ValidIODelay(l1_pf.io.l3_req, 4) 640c590fb32Scz4e outer.l3_pf_sender_opt.foreach(_.out.head._1.addr_valid := l1_pf_to_l3.valid) 641c590fb32Scz4e outer.l3_pf_sender_opt.foreach(_.out.head._1.addr := l1_pf_to_l3.bits) 642c590fb32Scz4e outer.l3_pf_sender_opt.foreach(_.out.head._1.l2_pf_en := RegNextN(io.ooo_to_mem.csrCtrl.pf_ctrl.l2_pf_enable, 4, Some(true.B))) 643c590fb32Scz4e 644c590fb32Scz4e val l3_trace = Wire(new LoadPfDbBundle) 645c590fb32Scz4e l3_trace.paddr := outer.l3_pf_sender_opt.map(_.out.head._1.addr).getOrElse(0.U) 646c590fb32Scz4e val l3_table = ChiselDB.createTable(s"L3PrefetchTrace$hartId", new LoadPfDbBundle, basicDB = false) 647c590fb32Scz4e l3_table.log(l3_trace, l1_pf_to_l3.valid, "StreamPrefetchTrace", clock, reset) 648c590fb32Scz4e 649c590fb32Scz4e XSPerfAccumulate("prefetch_fire_l2", outer.l2_pf_sender_opt.get.out.head._1.addr_valid) 650c590fb32Scz4e XSPerfAccumulate("prefetch_fire_l3", outer.l3_pf_sender_opt.map(_.out.head._1.addr_valid).getOrElse(false.B)) 651c590fb32Scz4e XSPerfAccumulate("l1pf_fire_l2", l1_pf_to_l2.valid) 652c590fb32Scz4e XSPerfAccumulate("sms_fire_l2", !l1_pf_to_l2.valid && sms_pf_to_l2.valid) 653c590fb32Scz4e XSPerfAccumulate("sms_block_by_l1pf", l1_pf_to_l2.valid && sms_pf_to_l2.valid) 654c590fb32Scz4e }) 655c590fb32Scz4e }) 656c590fb32Scz4e 657c590fb32Scz4e // ptw 658c590fb32Scz4e val sfence = RegNext(RegNext(io.ooo_to_mem.sfence)) 659c590fb32Scz4e val tlbcsr = RegNext(RegNext(io.ooo_to_mem.tlbCsr)) 660c590fb32Scz4e private val ptw = outer.ptw.module 661c590fb32Scz4e private val ptw_to_l2_buffer = outer.ptw_to_l2_buffer.module 662c590fb32Scz4e private val l1d_to_l2_buffer = outer.l1d_to_l2_buffer.module 663c590fb32Scz4e ptw.io.hartId := io.hartId 664c590fb32Scz4e ptw.io.sfence <> sfence 665c590fb32Scz4e ptw.io.csr.tlb <> tlbcsr 666c590fb32Scz4e ptw.io.csr.distribute_csr <> csrCtrl.distribute_csr 667c590fb32Scz4e 668c590fb32Scz4e val perfEventsPTW = if (!coreParams.softPTW) { 669c590fb32Scz4e ptw.getPerfEvents 670c590fb32Scz4e } else { 671c590fb32Scz4e Seq() 672c590fb32Scz4e } 673c590fb32Scz4e 674c590fb32Scz4e // dtlb 675c590fb32Scz4e val dtlb_ld_tlb_ld = Module(new TLBNonBlock(LduCnt + HyuCnt + 1, 2, ldtlbParams)) 676c590fb32Scz4e val dtlb_st_tlb_st = Module(new TLBNonBlock(StaCnt, 1, sttlbParams)) 677c590fb32Scz4e val dtlb_prefetch_tlb_prefetch = Module(new TLBNonBlock(2, 2, pftlbParams)) 678c590fb32Scz4e val dtlb_ld = Seq(dtlb_ld_tlb_ld.io) 679c590fb32Scz4e val dtlb_st = Seq(dtlb_st_tlb_st.io) 680c590fb32Scz4e val dtlb_prefetch = Seq(dtlb_prefetch_tlb_prefetch.io) 681c590fb32Scz4e /* tlb vec && constant variable */ 682c590fb32Scz4e val dtlb = dtlb_ld ++ dtlb_st ++ dtlb_prefetch 683c590fb32Scz4e val (dtlb_ld_idx, dtlb_st_idx, dtlb_pf_idx) = (0, 1, 2) 684c590fb32Scz4e val TlbSubSizeVec = Seq(LduCnt + HyuCnt + 1, StaCnt, 2) // (load + hyu + stream pf, store, sms+l2bop) 685c590fb32Scz4e val DTlbSize = TlbSubSizeVec.sum 686c590fb32Scz4e val TlbStartVec = TlbSubSizeVec.scanLeft(0)(_ + _).dropRight(1) 687c590fb32Scz4e val TlbEndVec = TlbSubSizeVec.scanLeft(0)(_ + _).drop(1) 688c590fb32Scz4e 689c590fb32Scz4e val ptwio = Wire(new VectorTlbPtwIO(DTlbSize)) 690c590fb32Scz4e val dtlb_reqs = dtlb.map(_.requestor).flatten 691c590fb32Scz4e val dtlb_pmps = dtlb.map(_.pmp).flatten 692c590fb32Scz4e dtlb.map(_.hartId := io.hartId) 693c590fb32Scz4e dtlb.map(_.sfence := sfence) 694c590fb32Scz4e dtlb.map(_.csr := tlbcsr) 695c590fb32Scz4e dtlb.map(_.flushPipe.map(a => a := false.B)) // non-block doesn't need 696c590fb32Scz4e dtlb.map(_.redirect := redirect) 697c590fb32Scz4e if (refillBothTlb) { 698c590fb32Scz4e require(ldtlbParams.outReplace == sttlbParams.outReplace) 699c590fb32Scz4e require(ldtlbParams.outReplace == hytlbParams.outReplace) 700c590fb32Scz4e require(ldtlbParams.outReplace == pftlbParams.outReplace) 701c590fb32Scz4e require(ldtlbParams.outReplace) 702c590fb32Scz4e 703c590fb32Scz4e val replace = Module(new TlbReplace(DTlbSize, ldtlbParams)) 704c590fb32Scz4e replace.io.apply_sep(dtlb_ld.map(_.replace) ++ dtlb_st.map(_.replace) ++ dtlb_prefetch.map(_.replace), ptwio.resp.bits.data.s1.entry.tag) 705c590fb32Scz4e } else { 706c590fb32Scz4e // TODO: there will be bugs in TlbReplace when outReplace enable, since the order of Hyu is not right. 707c590fb32Scz4e if (ldtlbParams.outReplace) { 708c590fb32Scz4e val replace_ld = Module(new TlbReplace(LduCnt + 1, ldtlbParams)) 709c590fb32Scz4e replace_ld.io.apply_sep(dtlb_ld.map(_.replace), ptwio.resp.bits.data.s1.entry.tag) 710c590fb32Scz4e } 711c590fb32Scz4e if (hytlbParams.outReplace) { 712c590fb32Scz4e val replace_hy = Module(new TlbReplace(HyuCnt, hytlbParams)) 713c590fb32Scz4e replace_hy.io.apply_sep(dtlb_ld.map(_.replace), ptwio.resp.bits.data.s1.entry.tag) 714c590fb32Scz4e } 715c590fb32Scz4e if (sttlbParams.outReplace) { 716c590fb32Scz4e val replace_st = Module(new TlbReplace(StaCnt, sttlbParams)) 717c590fb32Scz4e replace_st.io.apply_sep(dtlb_st.map(_.replace), ptwio.resp.bits.data.s1.entry.tag) 718c590fb32Scz4e } 719c590fb32Scz4e if (pftlbParams.outReplace) { 720c590fb32Scz4e val replace_pf = Module(new TlbReplace(2, pftlbParams)) 721c590fb32Scz4e replace_pf.io.apply_sep(dtlb_prefetch.map(_.replace), ptwio.resp.bits.data.s1.entry.tag) 722c590fb32Scz4e } 723c590fb32Scz4e } 724c590fb32Scz4e 725c590fb32Scz4e val ptw_resp_next = RegEnable(ptwio.resp.bits, ptwio.resp.valid) 726c590fb32Scz4e val ptw_resp_v = RegNext(ptwio.resp.valid && !(sfence.valid && tlbcsr.satp.changed && tlbcsr.vsatp.changed && tlbcsr.hgatp.changed), init = false.B) 727c590fb32Scz4e ptwio.resp.ready := true.B 728c590fb32Scz4e 729c590fb32Scz4e val tlbreplay = WireInit(VecInit(Seq.fill(LdExuCnt)(false.B))) 730c590fb32Scz4e val tlbreplay_reg = GatedValidRegNext(tlbreplay) 731c590fb32Scz4e val dtlb_ld0_tlbreplay_reg = GatedValidRegNext(dtlb_ld(0).tlbreplay) 732c590fb32Scz4e 733c590fb32Scz4e if (backendParams.debugEn){ dontTouch(tlbreplay) } 734c590fb32Scz4e 735c590fb32Scz4e for (i <- 0 until LdExuCnt) { 736c590fb32Scz4e tlbreplay(i) := dtlb_ld(0).ptw.req(i).valid && ptw_resp_next.vector(0) && ptw_resp_v && 737c590fb32Scz4e ptw_resp_next.data.hit(dtlb_ld(0).ptw.req(i).bits.vpn, tlbcsr.satp.asid, tlbcsr.vsatp.asid, tlbcsr.hgatp.vmid, allType = true, ignoreAsid = true) 738c590fb32Scz4e } 739c590fb32Scz4e 740c590fb32Scz4e dtlb.flatMap(a => a.ptw.req) 741c590fb32Scz4e .zipWithIndex 742c590fb32Scz4e .foreach{ case (tlb, i) => 743c590fb32Scz4e tlb.ready := ptwio.req(i).ready 744c590fb32Scz4e ptwio.req(i).bits := tlb.bits 745c590fb32Scz4e val vector_hit = if (refillBothTlb) Cat(ptw_resp_next.vector).orR 746c590fb32Scz4e else if (i < TlbEndVec(dtlb_ld_idx)) Cat(ptw_resp_next.vector.slice(TlbStartVec(dtlb_ld_idx), TlbEndVec(dtlb_ld_idx))).orR 747c590fb32Scz4e else if (i < TlbEndVec(dtlb_st_idx)) Cat(ptw_resp_next.vector.slice(TlbStartVec(dtlb_st_idx), TlbEndVec(dtlb_st_idx))).orR 748c590fb32Scz4e else Cat(ptw_resp_next.vector.slice(TlbStartVec(dtlb_pf_idx), TlbEndVec(dtlb_pf_idx))).orR 749c590fb32Scz4e ptwio.req(i).valid := tlb.valid && !(ptw_resp_v && vector_hit && ptw_resp_next.data.hit(tlb.bits.vpn, tlbcsr.satp.asid, tlbcsr.vsatp.asid, tlbcsr.hgatp.vmid, allType = true, ignoreAsid = true)) 750c590fb32Scz4e } 751c590fb32Scz4e dtlb.foreach(_.ptw.resp.bits := ptw_resp_next.data) 752c590fb32Scz4e if (refillBothTlb) { 753c590fb32Scz4e dtlb.foreach(_.ptw.resp.valid := ptw_resp_v && Cat(ptw_resp_next.vector).orR) 754c590fb32Scz4e } else { 755c590fb32Scz4e dtlb_ld.foreach(_.ptw.resp.valid := ptw_resp_v && Cat(ptw_resp_next.vector.slice(TlbStartVec(dtlb_ld_idx), TlbEndVec(dtlb_ld_idx))).orR) 756c590fb32Scz4e dtlb_st.foreach(_.ptw.resp.valid := ptw_resp_v && Cat(ptw_resp_next.vector.slice(TlbStartVec(dtlb_st_idx), TlbEndVec(dtlb_st_idx))).orR) 757c590fb32Scz4e dtlb_prefetch.foreach(_.ptw.resp.valid := ptw_resp_v && Cat(ptw_resp_next.vector.slice(TlbStartVec(dtlb_pf_idx), TlbEndVec(dtlb_pf_idx))).orR) 758c590fb32Scz4e } 759c590fb32Scz4e dtlb_ld.foreach(_.ptw.resp.bits.getGpa := Cat(ptw_resp_next.getGpa.take(LduCnt + HyuCnt + 1)).orR) 760c590fb32Scz4e dtlb_st.foreach(_.ptw.resp.bits.getGpa := Cat(ptw_resp_next.getGpa.slice(LduCnt + HyuCnt + 1, LduCnt + HyuCnt + 1 + StaCnt)).orR) 761c590fb32Scz4e dtlb_prefetch.foreach(_.ptw.resp.bits.getGpa := Cat(ptw_resp_next.getGpa.drop(LduCnt + HyuCnt + 1 + StaCnt)).orR) 762c590fb32Scz4e 763c590fb32Scz4e val dtlbRepeater = PTWNewFilter(ldtlbParams.fenceDelay, ptwio, ptw.io.tlb(1), sfence, tlbcsr, l2tlbParams.dfilterSize) 764c590fb32Scz4e val itlbRepeater3 = PTWRepeaterNB(passReady = false, itlbParams.fenceDelay, io.fetch_to_mem.itlb, ptw.io.tlb(0), sfence, tlbcsr) 765c590fb32Scz4e 766c590fb32Scz4e lsq.io.debugTopDown.robHeadMissInDTlb := dtlbRepeater.io.rob_head_miss_in_tlb 767c590fb32Scz4e 768c590fb32Scz4e // pmp 769c590fb32Scz4e val pmp = Module(new PMP()) 770c590fb32Scz4e pmp.io.distribute_csr <> csrCtrl.distribute_csr 771c590fb32Scz4e 772c590fb32Scz4e val pmp_checkers = Seq.fill(DTlbSize)(Module(new PMPChecker(4, leaveHitMux = true))) 773c590fb32Scz4e val pmp_check = pmp_checkers.map(_.io) 774c590fb32Scz4e for ((p,d) <- pmp_check zip dtlb_pmps) { 7758882eb68SXin Tian if (HasBitmapCheck) { 7768882eb68SXin Tian p.apply(tlbcsr.mbmc.CMODE.asBool, tlbcsr.priv.dmode, pmp.io.pmp, pmp.io.pma, d) 7778882eb68SXin Tian } else { 778c590fb32Scz4e p.apply(tlbcsr.priv.dmode, pmp.io.pmp, pmp.io.pma, d) 7798882eb68SXin Tian } 780c590fb32Scz4e require(p.req.bits.size.getWidth == d.bits.size.getWidth) 781c590fb32Scz4e } 782c590fb32Scz4e 783c590fb32Scz4e for (i <- 0 until LduCnt) { 784c590fb32Scz4e io.debug_ls.debugLsInfo(i) := loadUnits(i).io.debug_ls 785c590fb32Scz4e } 786c590fb32Scz4e for (i <- 0 until HyuCnt) { 787c590fb32Scz4e io.debug_ls.debugLsInfo.drop(LduCnt)(i) := hybridUnits(i).io.ldu_io.debug_ls 788c590fb32Scz4e } 789c590fb32Scz4e for (i <- 0 until StaCnt) { 790c590fb32Scz4e io.debug_ls.debugLsInfo.drop(LduCnt + HyuCnt)(i) := storeUnits(i).io.debug_ls 791c590fb32Scz4e } 792c590fb32Scz4e for (i <- 0 until HyuCnt) { 793c590fb32Scz4e io.debug_ls.debugLsInfo.drop(LduCnt + HyuCnt + StaCnt)(i) := hybridUnits(i).io.stu_io.debug_ls 794c590fb32Scz4e } 795c590fb32Scz4e 796c590fb32Scz4e io.mem_to_ooo.lsTopdownInfo := loadUnits.map(_.io.lsTopdownInfo) ++ hybridUnits.map(_.io.ldu_io.lsTopdownInfo) 797c590fb32Scz4e 798c590fb32Scz4e // trigger 799c590fb32Scz4e val tdata = RegInit(VecInit(Seq.fill(TriggerNum)(0.U.asTypeOf(new MatchTriggerIO)))) 800c590fb32Scz4e val tEnable = RegInit(VecInit(Seq.fill(TriggerNum)(false.B))) 801c590fb32Scz4e tEnable := csrCtrl.mem_trigger.tEnableVec 802c590fb32Scz4e when(csrCtrl.mem_trigger.tUpdate.valid) { 803c590fb32Scz4e tdata(csrCtrl.mem_trigger.tUpdate.bits.addr) := csrCtrl.mem_trigger.tUpdate.bits.tdata 804c590fb32Scz4e } 805c590fb32Scz4e val triggerCanRaiseBpExp = csrCtrl.mem_trigger.triggerCanRaiseBpExp 806c590fb32Scz4e val debugMode = csrCtrl.mem_trigger.debugMode 807c590fb32Scz4e 808c590fb32Scz4e val backendTriggerTimingVec = VecInit(tdata.map(_.timing)) 809c590fb32Scz4e val backendTriggerChainVec = VecInit(tdata.map(_.chain)) 810c590fb32Scz4e 811c590fb32Scz4e XSDebug(tEnable.asUInt.orR, "Debug Mode: At least one store trigger is enabled\n") 812c590fb32Scz4e for (j <- 0 until TriggerNum) 813c590fb32Scz4e PrintTriggerInfo(tEnable(j), tdata(j)) 814c590fb32Scz4e 815c590fb32Scz4e // The segment instruction is executed atomically. 816c590fb32Scz4e // After the segment instruction directive starts executing, no other instructions should be executed. 817c590fb32Scz4e val vSegmentFlag = RegInit(false.B) 818c590fb32Scz4e 819c590fb32Scz4e when(GatedValidRegNext(vSegmentUnit.io.in.fire)) { 820c590fb32Scz4e vSegmentFlag := true.B 821c590fb32Scz4e }.elsewhen(GatedValidRegNext(vSegmentUnit.io.uopwriteback.valid)) { 822c590fb32Scz4e vSegmentFlag := false.B 823c590fb32Scz4e } 824c590fb32Scz4e 825522c7f99SAnzo val misalign_allow_spec = RegInit(true.B) 826522c7f99SAnzo val ldu_rollback_with_misalign_nack = loadUnits.map(ldu => 827522c7f99SAnzo ldu.io.lsq.ldin.bits.isFrmMisAlignBuf && ldu.io.lsq.ldin.bits.rep_info.rar_nack && ldu.io.rollback.valid 828522c7f99SAnzo ).reduce(_ || _) 829522c7f99SAnzo when (ldu_rollback_with_misalign_nack) { 830522c7f99SAnzo misalign_allow_spec := false.B 831522c7f99SAnzo } .elsewhen(lsq.io.rarValidCount < (LoadQueueRARSize - 4).U) { 832522c7f99SAnzo misalign_allow_spec := true.B 833522c7f99SAnzo } 834522c7f99SAnzo 835c590fb32Scz4e // LoadUnit 836c590fb32Scz4e val correctMissTrain = Constantin.createRecord(s"CorrectMissTrain$hartId", initValue = false) 837c590fb32Scz4e 838c590fb32Scz4e for (i <- 0 until LduCnt) { 839c590fb32Scz4e loadUnits(i).io.redirect <> redirect 840522c7f99SAnzo loadUnits(i).io.misalign_allow_spec := misalign_allow_spec 841c590fb32Scz4e 842c590fb32Scz4e // get input form dispatch 843c590fb32Scz4e loadUnits(i).io.ldin <> io.ooo_to_mem.issueLda(i) 844c590fb32Scz4e loadUnits(i).io.feedback_slow <> io.mem_to_ooo.ldaIqFeedback(i).feedbackSlow 845c590fb32Scz4e io.mem_to_ooo.ldaIqFeedback(i).feedbackFast := DontCare 846c590fb32Scz4e loadUnits(i).io.correctMissTrain := correctMissTrain 847c590fb32Scz4e io.mem_to_ooo.ldCancel.drop(HyuCnt)(i) := loadUnits(i).io.ldCancel 848c590fb32Scz4e io.mem_to_ooo.wakeup.drop(HyuCnt)(i) := loadUnits(i).io.wakeup 849c590fb32Scz4e 850c590fb32Scz4e // vector 851c590fb32Scz4e if (i < VlduCnt) { 852c590fb32Scz4e loadUnits(i).io.vecldout.ready := false.B 853c590fb32Scz4e } else { 854c590fb32Scz4e loadUnits(i).io.vecldin.valid := false.B 855c590fb32Scz4e loadUnits(i).io.vecldin.bits := DontCare 856c590fb32Scz4e loadUnits(i).io.vecldout.ready := false.B 857c590fb32Scz4e } 858c590fb32Scz4e 859c590fb32Scz4e // fast replay 860c590fb32Scz4e loadUnits(i).io.fast_rep_in <> loadUnits(i).io.fast_rep_out 861c590fb32Scz4e 862c590fb32Scz4e // SoftPrefetch to frontend (prefetch.i) 863c590fb32Scz4e loadUnits(i).io.ifetchPrefetch <> io.ifetchPrefetch(i) 864c590fb32Scz4e 865c590fb32Scz4e // dcache access 866c590fb32Scz4e loadUnits(i).io.dcache <> dcache.io.lsu.load(i) 867c590fb32Scz4e if(i == 0){ 868c590fb32Scz4e vSegmentUnit.io.rdcache := DontCare 869c590fb32Scz4e dcache.io.lsu.load(i).req.valid := loadUnits(i).io.dcache.req.valid || vSegmentUnit.io.rdcache.req.valid 870c590fb32Scz4e dcache.io.lsu.load(i).req.bits := Mux1H(Seq( 871c590fb32Scz4e vSegmentUnit.io.rdcache.req.valid -> vSegmentUnit.io.rdcache.req.bits, 872c590fb32Scz4e loadUnits(i).io.dcache.req.valid -> loadUnits(i).io.dcache.req.bits 873c590fb32Scz4e )) 874c590fb32Scz4e vSegmentUnit.io.rdcache.req.ready := dcache.io.lsu.load(i).req.ready 875c590fb32Scz4e } 876c590fb32Scz4e 877c590fb32Scz4e // Dcache requests must also be preempted by the segment. 878c590fb32Scz4e when(vSegmentFlag){ 879c590fb32Scz4e loadUnits(i).io.dcache.req.ready := false.B // Dcache is preempted. 880c590fb32Scz4e 881c590fb32Scz4e dcache.io.lsu.load(0).pf_source := vSegmentUnit.io.rdcache.pf_source 882c590fb32Scz4e dcache.io.lsu.load(0).s1_paddr_dup_lsu := vSegmentUnit.io.rdcache.s1_paddr_dup_lsu 883c590fb32Scz4e dcache.io.lsu.load(0).s1_paddr_dup_dcache := vSegmentUnit.io.rdcache.s1_paddr_dup_dcache 884c590fb32Scz4e dcache.io.lsu.load(0).s1_kill := vSegmentUnit.io.rdcache.s1_kill 885c590fb32Scz4e dcache.io.lsu.load(0).s2_kill := vSegmentUnit.io.rdcache.s2_kill 886c590fb32Scz4e dcache.io.lsu.load(0).s0_pc := vSegmentUnit.io.rdcache.s0_pc 887c590fb32Scz4e dcache.io.lsu.load(0).s1_pc := vSegmentUnit.io.rdcache.s1_pc 888c590fb32Scz4e dcache.io.lsu.load(0).s2_pc := vSegmentUnit.io.rdcache.s2_pc 889c590fb32Scz4e dcache.io.lsu.load(0).is128Req := vSegmentUnit.io.rdcache.is128Req 890c590fb32Scz4e }.otherwise { 891c590fb32Scz4e loadUnits(i).io.dcache.req.ready := dcache.io.lsu.load(i).req.ready 892c590fb32Scz4e 893c590fb32Scz4e dcache.io.lsu.load(0).pf_source := loadUnits(0).io.dcache.pf_source 894c590fb32Scz4e dcache.io.lsu.load(0).s1_paddr_dup_lsu := loadUnits(0).io.dcache.s1_paddr_dup_lsu 895c590fb32Scz4e dcache.io.lsu.load(0).s1_paddr_dup_dcache := loadUnits(0).io.dcache.s1_paddr_dup_dcache 896c590fb32Scz4e dcache.io.lsu.load(0).s1_kill := loadUnits(0).io.dcache.s1_kill 897c590fb32Scz4e dcache.io.lsu.load(0).s2_kill := loadUnits(0).io.dcache.s2_kill 898c590fb32Scz4e dcache.io.lsu.load(0).s0_pc := loadUnits(0).io.dcache.s0_pc 899c590fb32Scz4e dcache.io.lsu.load(0).s1_pc := loadUnits(0).io.dcache.s1_pc 900c590fb32Scz4e dcache.io.lsu.load(0).s2_pc := loadUnits(0).io.dcache.s2_pc 901c590fb32Scz4e dcache.io.lsu.load(0).is128Req := loadUnits(0).io.dcache.is128Req 902c590fb32Scz4e } 903c590fb32Scz4e 904c590fb32Scz4e // forward 905c590fb32Scz4e loadUnits(i).io.lsq.forward <> lsq.io.forward(i) 906c590fb32Scz4e loadUnits(i).io.sbuffer <> sbuffer.io.forward(i) 907c590fb32Scz4e loadUnits(i).io.ubuffer <> uncache.io.forward(i) 908c590fb32Scz4e loadUnits(i).io.tl_d_channel := dcache.io.lsu.forward_D(i) 909c590fb32Scz4e loadUnits(i).io.forward_mshr <> dcache.io.lsu.forward_mshr(i) 910c590fb32Scz4e // ld-ld violation check 911c590fb32Scz4e loadUnits(i).io.lsq.ldld_nuke_query <> lsq.io.ldu.ldld_nuke_query(i) 912c590fb32Scz4e loadUnits(i).io.lsq.stld_nuke_query <> lsq.io.ldu.stld_nuke_query(i) 913522c7f99SAnzo // loadqueue old ptr 914522c7f99SAnzo loadUnits(i).io.lsq.lqDeqPtr := lsq.io.lqDeqPtr 915c590fb32Scz4e loadUnits(i).io.csrCtrl <> csrCtrl 916c590fb32Scz4e // dcache refill req 917c590fb32Scz4e // loadUnits(i).io.refill <> delayedDcacheRefill 918c590fb32Scz4e // dtlb 919c590fb32Scz4e loadUnits(i).io.tlb <> dtlb_reqs.take(LduCnt)(i) 920c590fb32Scz4e if(i == 0 ){ // port 0 assign to vsegmentUnit 921c590fb32Scz4e val vsegmentDtlbReqValid = vSegmentUnit.io.dtlb.req.valid // segment tlb resquest need to delay 1 cycle 922c590fb32Scz4e dtlb_reqs.take(LduCnt)(i).req.valid := loadUnits(i).io.tlb.req.valid || RegNext(vsegmentDtlbReqValid) 923c590fb32Scz4e vSegmentUnit.io.dtlb.req.ready := dtlb_reqs.take(LduCnt)(i).req.ready 924c590fb32Scz4e dtlb_reqs.take(LduCnt)(i).req.bits := ParallelPriorityMux(Seq( 925c590fb32Scz4e RegNext(vsegmentDtlbReqValid) -> RegEnable(vSegmentUnit.io.dtlb.req.bits, vsegmentDtlbReqValid), 926c590fb32Scz4e loadUnits(i).io.tlb.req.valid -> loadUnits(i).io.tlb.req.bits 927c590fb32Scz4e )) 928c590fb32Scz4e } 929c590fb32Scz4e // pmp 930c590fb32Scz4e loadUnits(i).io.pmp <> pmp_check(i).resp 931c590fb32Scz4e // st-ld violation query 932c590fb32Scz4e val stld_nuke_query = storeUnits.map(_.io.stld_nuke_query) ++ hybridUnits.map(_.io.stu_io.stld_nuke_query) 933c590fb32Scz4e for (s <- 0 until StorePipelineWidth) { 934c590fb32Scz4e loadUnits(i).io.stld_nuke_query(s) := stld_nuke_query(s) 935c590fb32Scz4e } 936c590fb32Scz4e loadUnits(i).io.lq_rep_full <> lsq.io.lq_rep_full 937c590fb32Scz4e // load prefetch train 938c590fb32Scz4e prefetcherOpt.foreach(pf => { 939c590fb32Scz4e // sms will train on all miss load sources 940c590fb32Scz4e val source = loadUnits(i).io.prefetch_train 941c590fb32Scz4e pf.io.ld_in(i).valid := Mux(pf_train_on_hit, 942c590fb32Scz4e source.valid, 943c590fb32Scz4e source.valid && source.bits.isFirstIssue && source.bits.miss 944c590fb32Scz4e ) 945c590fb32Scz4e pf.io.ld_in(i).bits := source.bits 946c590fb32Scz4e val loadPc = RegNext(io.ooo_to_mem.issueLda(i).bits.uop.pc) // for s1 947c590fb32Scz4e pf.io.ld_in(i).bits.uop.pc := Mux( 948c590fb32Scz4e loadUnits(i).io.s2_ptr_chasing, 949c590fb32Scz4e RegEnable(loadPc, loadUnits(i).io.s2_prefetch_spec), 950c590fb32Scz4e RegEnable(RegEnable(loadPc, loadUnits(i).io.s1_prefetch_spec), loadUnits(i).io.s2_prefetch_spec) 951c590fb32Scz4e ) 952c590fb32Scz4e }) 953c590fb32Scz4e l1PrefetcherOpt.foreach(pf => { 954c590fb32Scz4e // stream will train on all load sources 955c590fb32Scz4e val source = loadUnits(i).io.prefetch_train_l1 956c590fb32Scz4e pf.io.ld_in(i).valid := source.valid && source.bits.isFirstIssue 957c590fb32Scz4e pf.io.ld_in(i).bits := source.bits 958c590fb32Scz4e }) 959c590fb32Scz4e 960c590fb32Scz4e // load to load fast forward: load(i) prefers data(i) 961c590fb32Scz4e val l2l_fwd_out = loadUnits.map(_.io.l2l_fwd_out) ++ hybridUnits.map(_.io.ldu_io.l2l_fwd_out) 962c590fb32Scz4e val fastPriority = (i until LduCnt + HyuCnt) ++ (0 until i) 963c590fb32Scz4e val fastValidVec = fastPriority.map(j => l2l_fwd_out(j).valid) 964c590fb32Scz4e val fastDataVec = fastPriority.map(j => l2l_fwd_out(j).data) 965c590fb32Scz4e val fastErrorVec = fastPriority.map(j => l2l_fwd_out(j).dly_ld_err) 966c590fb32Scz4e val fastMatchVec = fastPriority.map(j => io.ooo_to_mem.loadFastMatch(i)(j)) 967c590fb32Scz4e loadUnits(i).io.l2l_fwd_in.valid := VecInit(fastValidVec).asUInt.orR 968c590fb32Scz4e loadUnits(i).io.l2l_fwd_in.data := ParallelPriorityMux(fastValidVec, fastDataVec) 969c590fb32Scz4e loadUnits(i).io.l2l_fwd_in.dly_ld_err := ParallelPriorityMux(fastValidVec, fastErrorVec) 970c590fb32Scz4e val fastMatch = ParallelPriorityMux(fastValidVec, fastMatchVec) 971c590fb32Scz4e loadUnits(i).io.ld_fast_match := fastMatch 972c590fb32Scz4e loadUnits(i).io.ld_fast_imm := io.ooo_to_mem.loadFastImm(i) 973c590fb32Scz4e loadUnits(i).io.ld_fast_fuOpType := io.ooo_to_mem.loadFastFuOpType(i) 974c590fb32Scz4e loadUnits(i).io.replay <> lsq.io.replay(i) 975c590fb32Scz4e 976c590fb32Scz4e val l2_hint = RegNext(io.l2_hint) 977c590fb32Scz4e 978c590fb32Scz4e // L2 Hint for DCache 979c590fb32Scz4e dcache.io.l2_hint <> l2_hint 980c590fb32Scz4e 981c590fb32Scz4e loadUnits(i).io.l2_hint <> l2_hint 982c590fb32Scz4e loadUnits(i).io.tlb_hint.id := dtlbRepeater.io.hint.get.req(i).id 983c590fb32Scz4e loadUnits(i).io.tlb_hint.full := dtlbRepeater.io.hint.get.req(i).full || 984c590fb32Scz4e tlbreplay_reg(i) || dtlb_ld0_tlbreplay_reg(i) 985c590fb32Scz4e 986c590fb32Scz4e // passdown to lsq (load s2) 987c590fb32Scz4e lsq.io.ldu.ldin(i) <> loadUnits(i).io.lsq.ldin 988c590fb32Scz4e if (i == UncacheWBPort) { 989c590fb32Scz4e lsq.io.ldout(i) <> loadUnits(i).io.lsq.uncache 990c590fb32Scz4e } else { 991c590fb32Scz4e lsq.io.ldout(i).ready := true.B 992c590fb32Scz4e loadUnits(i).io.lsq.uncache.valid := false.B 993c590fb32Scz4e loadUnits(i).io.lsq.uncache.bits := DontCare 994c590fb32Scz4e } 995c590fb32Scz4e lsq.io.ld_raw_data(i) <> loadUnits(i).io.lsq.ld_raw_data 996c590fb32Scz4e lsq.io.ncOut(i) <> loadUnits(i).io.lsq.nc_ldin 997c590fb32Scz4e lsq.io.l2_hint.valid := l2_hint.valid 998c590fb32Scz4e lsq.io.l2_hint.bits.sourceId := l2_hint.bits.sourceId 999c590fb32Scz4e lsq.io.l2_hint.bits.isKeyword := l2_hint.bits.isKeyword 1000c590fb32Scz4e 1001c590fb32Scz4e lsq.io.tlb_hint <> dtlbRepeater.io.hint.get 1002c590fb32Scz4e 1003c590fb32Scz4e // connect misalignBuffer 1004c590fb32Scz4e loadMisalignBuffer.io.req(i) <> loadUnits(i).io.misalign_buf 1005c590fb32Scz4e 1006c590fb32Scz4e if (i == MisalignWBPort) { 1007c590fb32Scz4e loadUnits(i).io.misalign_ldin <> loadMisalignBuffer.io.splitLoadReq 1008c590fb32Scz4e loadUnits(i).io.misalign_ldout <> loadMisalignBuffer.io.splitLoadResp 1009c590fb32Scz4e } else { 1010c590fb32Scz4e loadUnits(i).io.misalign_ldin.valid := false.B 1011c590fb32Scz4e loadUnits(i).io.misalign_ldin.bits := DontCare 1012c590fb32Scz4e } 1013c590fb32Scz4e 1014c590fb32Scz4e // alter writeback exception info 1015c590fb32Scz4e io.mem_to_ooo.s3_delayed_load_error(i) := loadUnits(i).io.s3_dly_ld_err 1016c590fb32Scz4e 1017c590fb32Scz4e // update mem dependency predictor 1018c590fb32Scz4e // io.memPredUpdate(i) := DontCare 1019c590fb32Scz4e 1020c590fb32Scz4e // -------------------------------- 1021c590fb32Scz4e // Load Triggers 1022c590fb32Scz4e // -------------------------------- 1023c590fb32Scz4e loadUnits(i).io.fromCsrTrigger.tdataVec := tdata 1024c590fb32Scz4e loadUnits(i).io.fromCsrTrigger.tEnableVec := tEnable 1025c590fb32Scz4e loadUnits(i).io.fromCsrTrigger.triggerCanRaiseBpExp := triggerCanRaiseBpExp 1026c590fb32Scz4e loadUnits(i).io.fromCsrTrigger.debugMode := debugMode 1027c590fb32Scz4e } 1028c590fb32Scz4e 1029c590fb32Scz4e for (i <- 0 until HyuCnt) { 1030c590fb32Scz4e hybridUnits(i).io.redirect <> redirect 1031c590fb32Scz4e 1032c590fb32Scz4e // get input from dispatch 1033c590fb32Scz4e hybridUnits(i).io.lsin <> io.ooo_to_mem.issueHya(i) 1034c590fb32Scz4e hybridUnits(i).io.feedback_slow <> io.mem_to_ooo.hyuIqFeedback(i).feedbackSlow 1035c590fb32Scz4e hybridUnits(i).io.feedback_fast <> io.mem_to_ooo.hyuIqFeedback(i).feedbackFast 1036c590fb32Scz4e hybridUnits(i).io.correctMissTrain := correctMissTrain 1037c590fb32Scz4e io.mem_to_ooo.ldCancel.take(HyuCnt)(i) := hybridUnits(i).io.ldu_io.ldCancel 1038c590fb32Scz4e io.mem_to_ooo.wakeup.take(HyuCnt)(i) := hybridUnits(i).io.ldu_io.wakeup 1039c590fb32Scz4e 1040c590fb32Scz4e // ------------------------------------ 1041c590fb32Scz4e // Load Port 1042c590fb32Scz4e // ------------------------------------ 1043c590fb32Scz4e // fast replay 1044c590fb32Scz4e hybridUnits(i).io.ldu_io.fast_rep_in <> hybridUnits(i).io.ldu_io.fast_rep_out 1045c590fb32Scz4e 1046c590fb32Scz4e // get input from dispatch 1047c590fb32Scz4e hybridUnits(i).io.ldu_io.dcache <> dcache.io.lsu.load(LduCnt + i) 1048c590fb32Scz4e hybridUnits(i).io.stu_io.dcache <> dcache.io.lsu.sta(StaCnt + i) 1049c590fb32Scz4e 1050c590fb32Scz4e // dcache access 1051c590fb32Scz4e hybridUnits(i).io.ldu_io.lsq.forward <> lsq.io.forward(LduCnt + i) 1052c590fb32Scz4e // forward 1053c590fb32Scz4e hybridUnits(i).io.ldu_io.sbuffer <> sbuffer.io.forward(LduCnt + i) 1054c590fb32Scz4e hybridUnits(i).io.ldu_io.ubuffer <> uncache.io.forward(LduCnt + i) 1055c590fb32Scz4e // hybridUnits(i).io.ldu_io.vec_forward <> vsFlowQueue.io.forward(LduCnt + i) 1056c590fb32Scz4e hybridUnits(i).io.ldu_io.vec_forward := DontCare 1057c590fb32Scz4e hybridUnits(i).io.ldu_io.tl_d_channel := dcache.io.lsu.forward_D(LduCnt + i) 1058c590fb32Scz4e hybridUnits(i).io.ldu_io.forward_mshr <> dcache.io.lsu.forward_mshr(LduCnt + i) 1059c590fb32Scz4e // ld-ld violation check 1060c590fb32Scz4e hybridUnits(i).io.ldu_io.lsq.ldld_nuke_query <> lsq.io.ldu.ldld_nuke_query(LduCnt + i) 1061c590fb32Scz4e hybridUnits(i).io.ldu_io.lsq.stld_nuke_query <> lsq.io.ldu.stld_nuke_query(LduCnt + i) 1062c590fb32Scz4e hybridUnits(i).io.csrCtrl <> csrCtrl 1063c590fb32Scz4e // dcache refill req 1064c590fb32Scz4e hybridUnits(i).io.ldu_io.tlb_hint.id := dtlbRepeater.io.hint.get.req(LduCnt + i).id 1065c590fb32Scz4e hybridUnits(i).io.ldu_io.tlb_hint.full := dtlbRepeater.io.hint.get.req(LduCnt + i).full || 1066c590fb32Scz4e tlbreplay_reg(LduCnt + i) || dtlb_ld0_tlbreplay_reg(LduCnt + i) 1067c590fb32Scz4e 1068c590fb32Scz4e // dtlb 1069c590fb32Scz4e hybridUnits(i).io.tlb <> dtlb_ld.head.requestor(LduCnt + i) 1070c590fb32Scz4e // pmp 1071c590fb32Scz4e hybridUnits(i).io.pmp <> pmp_check.drop(LduCnt)(i).resp 1072c590fb32Scz4e // st-ld violation query 1073c590fb32Scz4e val stld_nuke_query = VecInit(storeUnits.map(_.io.stld_nuke_query) ++ hybridUnits.map(_.io.stu_io.stld_nuke_query)) 1074c590fb32Scz4e hybridUnits(i).io.ldu_io.stld_nuke_query := stld_nuke_query 1075c590fb32Scz4e hybridUnits(i).io.ldu_io.lq_rep_full <> lsq.io.lq_rep_full 1076c590fb32Scz4e // load prefetch train 1077c590fb32Scz4e prefetcherOpt.foreach(pf => { 1078c590fb32Scz4e val source = hybridUnits(i).io.prefetch_train 1079c590fb32Scz4e pf.io.ld_in(LduCnt + i).valid := Mux(pf_train_on_hit, 1080c590fb32Scz4e source.valid, 1081c590fb32Scz4e source.valid && source.bits.isFirstIssue && source.bits.miss 1082c590fb32Scz4e ) 1083c590fb32Scz4e pf.io.ld_in(LduCnt + i).bits := source.bits 1084c590fb32Scz4e pf.io.ld_in(LduCnt + i).bits.uop.pc := Mux(hybridUnits(i).io.ldu_io.s2_ptr_chasing, io.ooo_to_mem.hybridPc(i), RegNext(io.ooo_to_mem.hybridPc(i))) 1085c590fb32Scz4e }) 1086c590fb32Scz4e l1PrefetcherOpt.foreach(pf => { 1087c590fb32Scz4e // stream will train on all load sources 1088c590fb32Scz4e val source = hybridUnits(i).io.prefetch_train_l1 1089c590fb32Scz4e pf.io.ld_in(LduCnt + i).valid := source.valid && source.bits.isFirstIssue && 1090c590fb32Scz4e FuType.isLoad(source.bits.uop.fuType) 1091c590fb32Scz4e pf.io.ld_in(LduCnt + i).bits := source.bits 1092c590fb32Scz4e pf.io.st_in(StaCnt + i).valid := false.B 1093c590fb32Scz4e pf.io.st_in(StaCnt + i).bits := DontCare 1094c590fb32Scz4e }) 1095c590fb32Scz4e prefetcherOpt.foreach(pf => { 1096c590fb32Scz4e val source = hybridUnits(i).io.prefetch_train 1097c590fb32Scz4e pf.io.st_in(StaCnt + i).valid := Mux(pf_train_on_hit, 1098c590fb32Scz4e source.valid, 1099c590fb32Scz4e source.valid && source.bits.isFirstIssue && source.bits.miss 1100c590fb32Scz4e ) && FuType.isStore(source.bits.uop.fuType) 1101c590fb32Scz4e pf.io.st_in(StaCnt + i).bits := source.bits 1102c590fb32Scz4e pf.io.st_in(StaCnt + i).bits.uop.pc := RegNext(io.ooo_to_mem.hybridPc(i)) 1103c590fb32Scz4e }) 1104c590fb32Scz4e 1105c590fb32Scz4e // load to load fast forward: load(i) prefers data(i) 1106c590fb32Scz4e val l2l_fwd_out = loadUnits.map(_.io.l2l_fwd_out) ++ hybridUnits.map(_.io.ldu_io.l2l_fwd_out) 1107c590fb32Scz4e val fastPriority = (LduCnt + i until LduCnt + HyuCnt) ++ (0 until LduCnt + i) 1108c590fb32Scz4e val fastValidVec = fastPriority.map(j => l2l_fwd_out(j).valid) 1109c590fb32Scz4e val fastDataVec = fastPriority.map(j => l2l_fwd_out(j).data) 1110c590fb32Scz4e val fastErrorVec = fastPriority.map(j => l2l_fwd_out(j).dly_ld_err) 1111c590fb32Scz4e val fastMatchVec = fastPriority.map(j => io.ooo_to_mem.loadFastMatch(LduCnt + i)(j)) 1112c590fb32Scz4e hybridUnits(i).io.ldu_io.l2l_fwd_in.valid := VecInit(fastValidVec).asUInt.orR 1113c590fb32Scz4e hybridUnits(i).io.ldu_io.l2l_fwd_in.data := ParallelPriorityMux(fastValidVec, fastDataVec) 1114c590fb32Scz4e hybridUnits(i).io.ldu_io.l2l_fwd_in.dly_ld_err := ParallelPriorityMux(fastValidVec, fastErrorVec) 1115c590fb32Scz4e val fastMatch = ParallelPriorityMux(fastValidVec, fastMatchVec) 1116c590fb32Scz4e hybridUnits(i).io.ldu_io.ld_fast_match := fastMatch 1117c590fb32Scz4e hybridUnits(i).io.ldu_io.ld_fast_imm := io.ooo_to_mem.loadFastImm(LduCnt + i) 1118c590fb32Scz4e hybridUnits(i).io.ldu_io.ld_fast_fuOpType := io.ooo_to_mem.loadFastFuOpType(LduCnt + i) 1119c590fb32Scz4e hybridUnits(i).io.ldu_io.replay <> lsq.io.replay(LduCnt + i) 1120c590fb32Scz4e hybridUnits(i).io.ldu_io.l2_hint <> io.l2_hint 1121c590fb32Scz4e 1122c590fb32Scz4e // uncache 1123c590fb32Scz4e lsq.io.ldout.drop(LduCnt)(i) <> hybridUnits(i).io.ldu_io.lsq.uncache 1124c590fb32Scz4e lsq.io.ld_raw_data.drop(LduCnt)(i) <> hybridUnits(i).io.ldu_io.lsq.ld_raw_data 1125c590fb32Scz4e 1126c590fb32Scz4e 1127c590fb32Scz4e // passdown to lsq (load s2) 1128c590fb32Scz4e hybridUnits(i).io.ldu_io.lsq.nc_ldin.valid := false.B 1129c590fb32Scz4e hybridUnits(i).io.ldu_io.lsq.nc_ldin.bits := DontCare 1130c590fb32Scz4e lsq.io.ldu.ldin(LduCnt + i) <> hybridUnits(i).io.ldu_io.lsq.ldin 1131c590fb32Scz4e // Lsq to sta unit 1132c590fb32Scz4e lsq.io.sta.storeMaskIn(StaCnt + i) <> hybridUnits(i).io.stu_io.st_mask_out 1133c590fb32Scz4e 1134c590fb32Scz4e // Lsq to std unit's rs 1135c590fb32Scz4e lsq.io.std.storeDataIn(StaCnt + i) := stData(StaCnt + i) 1136c590fb32Scz4e lsq.io.std.storeDataIn(StaCnt + i).valid := stData(StaCnt + i).valid && !st_data_atomics(StaCnt + i) 1137c590fb32Scz4e // prefetch 1138c590fb32Scz4e hybridUnits(i).io.stu_io.prefetch_req <> sbuffer.io.store_prefetch(StaCnt + i) 1139c590fb32Scz4e 1140c590fb32Scz4e io.mem_to_ooo.s3_delayed_load_error(LduCnt + i) := hybridUnits(i).io.ldu_io.s3_dly_ld_err 1141c590fb32Scz4e 1142c590fb32Scz4e // ------------------------------------ 1143c590fb32Scz4e // Store Port 1144c590fb32Scz4e // ------------------------------------ 1145c590fb32Scz4e hybridUnits(i).io.stu_io.lsq <> lsq.io.sta.storeAddrIn.takeRight(HyuCnt)(i) 1146c590fb32Scz4e hybridUnits(i).io.stu_io.lsq_replenish <> lsq.io.sta.storeAddrInRe.takeRight(HyuCnt)(i) 1147c590fb32Scz4e 1148c590fb32Scz4e lsq.io.sta.storeMaskIn.takeRight(HyuCnt)(i) <> hybridUnits(i).io.stu_io.st_mask_out 1149c590fb32Scz4e io.mem_to_ooo.stIn.takeRight(HyuCnt)(i).valid := hybridUnits(i).io.stu_io.issue.valid 1150c590fb32Scz4e io.mem_to_ooo.stIn.takeRight(HyuCnt)(i).bits := hybridUnits(i).io.stu_io.issue.bits 1151c590fb32Scz4e 1152c590fb32Scz4e // ------------------------------------ 1153c590fb32Scz4e // Vector Store Port 1154c590fb32Scz4e // ------------------------------------ 1155c590fb32Scz4e hybridUnits(i).io.vec_stu_io.isFirstIssue := true.B 1156c590fb32Scz4e 1157c590fb32Scz4e // ------------------------- 1158c590fb32Scz4e // Store Triggers 1159c590fb32Scz4e // ------------------------- 1160c590fb32Scz4e hybridUnits(i).io.fromCsrTrigger.tdataVec := tdata 1161c590fb32Scz4e hybridUnits(i).io.fromCsrTrigger.tEnableVec := tEnable 1162c590fb32Scz4e hybridUnits(i).io.fromCsrTrigger.triggerCanRaiseBpExp := triggerCanRaiseBpExp 1163c590fb32Scz4e hybridUnits(i).io.fromCsrTrigger.debugMode := debugMode 1164c590fb32Scz4e } 1165c590fb32Scz4e 1166c590fb32Scz4e // misalignBuffer 1167c590fb32Scz4e loadMisalignBuffer.io.redirect <> redirect 1168c590fb32Scz4e loadMisalignBuffer.io.rob.lcommit := io.ooo_to_mem.lsqio.lcommit 1169c590fb32Scz4e loadMisalignBuffer.io.rob.scommit := io.ooo_to_mem.lsqio.scommit 1170c590fb32Scz4e loadMisalignBuffer.io.rob.pendingMMIOld := io.ooo_to_mem.lsqio.pendingMMIOld 1171c590fb32Scz4e loadMisalignBuffer.io.rob.pendingld := io.ooo_to_mem.lsqio.pendingld 1172c590fb32Scz4e loadMisalignBuffer.io.rob.pendingst := io.ooo_to_mem.lsqio.pendingst 1173c590fb32Scz4e loadMisalignBuffer.io.rob.pendingVst := io.ooo_to_mem.lsqio.pendingVst 1174c590fb32Scz4e loadMisalignBuffer.io.rob.commit := io.ooo_to_mem.lsqio.commit 1175c590fb32Scz4e loadMisalignBuffer.io.rob.pendingPtr := io.ooo_to_mem.lsqio.pendingPtr 1176c590fb32Scz4e loadMisalignBuffer.io.rob.pendingPtrNext := io.ooo_to_mem.lsqio.pendingPtrNext 1177c590fb32Scz4e 1178c590fb32Scz4e lsq.io.loadMisalignFull := loadMisalignBuffer.io.loadMisalignFull 1179522c7f99SAnzo lsq.io.misalignAllowSpec := misalign_allow_spec 1180c590fb32Scz4e 1181c590fb32Scz4e storeMisalignBuffer.io.redirect <> redirect 1182c590fb32Scz4e storeMisalignBuffer.io.rob.lcommit := io.ooo_to_mem.lsqio.lcommit 1183c590fb32Scz4e storeMisalignBuffer.io.rob.scommit := io.ooo_to_mem.lsqio.scommit 1184c590fb32Scz4e storeMisalignBuffer.io.rob.pendingMMIOld := io.ooo_to_mem.lsqio.pendingMMIOld 1185c590fb32Scz4e storeMisalignBuffer.io.rob.pendingld := io.ooo_to_mem.lsqio.pendingld 1186c590fb32Scz4e storeMisalignBuffer.io.rob.pendingst := io.ooo_to_mem.lsqio.pendingst 1187c590fb32Scz4e storeMisalignBuffer.io.rob.pendingVst := io.ooo_to_mem.lsqio.pendingVst 1188c590fb32Scz4e storeMisalignBuffer.io.rob.commit := io.ooo_to_mem.lsqio.commit 1189c590fb32Scz4e storeMisalignBuffer.io.rob.pendingPtr := io.ooo_to_mem.lsqio.pendingPtr 1190c590fb32Scz4e storeMisalignBuffer.io.rob.pendingPtrNext := io.ooo_to_mem.lsqio.pendingPtrNext 1191c590fb32Scz4e 1192c590fb32Scz4e lsq.io.maControl <> storeMisalignBuffer.io.sqControl 1193c590fb32Scz4e 1194c590fb32Scz4e lsq.io.cmoOpReq <> dcache.io.cmoOpReq 1195c590fb32Scz4e lsq.io.cmoOpResp <> dcache.io.cmoOpResp 1196c590fb32Scz4e 1197c590fb32Scz4e // Prefetcher 1198c590fb32Scz4e val StreamDTLBPortIndex = TlbStartVec(dtlb_ld_idx) + LduCnt + HyuCnt 1199c590fb32Scz4e val PrefetcherDTLBPortIndex = TlbStartVec(dtlb_pf_idx) 1200c590fb32Scz4e val L2toL1DLBPortIndex = TlbStartVec(dtlb_pf_idx) + 1 1201c590fb32Scz4e prefetcherOpt match { 1202c590fb32Scz4e case Some(pf) => 1203c590fb32Scz4e dtlb_reqs(PrefetcherDTLBPortIndex) <> pf.io.tlb_req 1204c590fb32Scz4e pf.io.pmp_resp := pmp_check(PrefetcherDTLBPortIndex).resp 1205c590fb32Scz4e case None => 1206c590fb32Scz4e dtlb_reqs(PrefetcherDTLBPortIndex) := DontCare 1207c590fb32Scz4e dtlb_reqs(PrefetcherDTLBPortIndex).req.valid := false.B 1208c590fb32Scz4e dtlb_reqs(PrefetcherDTLBPortIndex).resp.ready := true.B 1209c590fb32Scz4e } 1210c590fb32Scz4e l1PrefetcherOpt match { 1211c590fb32Scz4e case Some(pf) => 1212c590fb32Scz4e dtlb_reqs(StreamDTLBPortIndex) <> pf.io.tlb_req 1213c590fb32Scz4e pf.io.pmp_resp := pmp_check(StreamDTLBPortIndex).resp 1214c590fb32Scz4e case None => 1215c590fb32Scz4e dtlb_reqs(StreamDTLBPortIndex) := DontCare 1216c590fb32Scz4e dtlb_reqs(StreamDTLBPortIndex).req.valid := false.B 1217c590fb32Scz4e dtlb_reqs(StreamDTLBPortIndex).resp.ready := true.B 1218c590fb32Scz4e } 1219c590fb32Scz4e dtlb_reqs(L2toL1DLBPortIndex) <> io.l2_tlb_req 1220c590fb32Scz4e dtlb_reqs(L2toL1DLBPortIndex).resp.ready := true.B 1221c590fb32Scz4e io.l2_pmp_resp := pmp_check(L2toL1DLBPortIndex).resp 1222c590fb32Scz4e 1223c590fb32Scz4e // StoreUnit 1224c590fb32Scz4e for (i <- 0 until StdCnt) { 1225c590fb32Scz4e stdExeUnits(i).io.flush <> redirect 1226c590fb32Scz4e stdExeUnits(i).io.in.valid := io.ooo_to_mem.issueStd(i).valid 1227c590fb32Scz4e io.ooo_to_mem.issueStd(i).ready := stdExeUnits(i).io.in.ready 1228c590fb32Scz4e stdExeUnits(i).io.in.bits := io.ooo_to_mem.issueStd(i).bits 1229c590fb32Scz4e } 1230c590fb32Scz4e 1231c590fb32Scz4e for (i <- 0 until StaCnt) { 1232c590fb32Scz4e val stu = storeUnits(i) 1233c590fb32Scz4e 1234c590fb32Scz4e stu.io.redirect <> redirect 1235c590fb32Scz4e stu.io.csrCtrl <> csrCtrl 1236c590fb32Scz4e stu.io.dcache <> dcache.io.lsu.sta(i) 1237c590fb32Scz4e stu.io.feedback_slow <> io.mem_to_ooo.staIqFeedback(i).feedbackSlow 1238c590fb32Scz4e stu.io.stin <> io.ooo_to_mem.issueSta(i) 1239c590fb32Scz4e stu.io.lsq <> lsq.io.sta.storeAddrIn(i) 1240c590fb32Scz4e stu.io.lsq_replenish <> lsq.io.sta.storeAddrInRe(i) 1241c590fb32Scz4e // dtlb 1242c590fb32Scz4e stu.io.tlb <> dtlb_st.head.requestor(i) 1243c590fb32Scz4e stu.io.pmp <> pmp_check(LduCnt + HyuCnt + 1 + i).resp 1244c590fb32Scz4e 1245c590fb32Scz4e // ------------------------- 1246c590fb32Scz4e // Store Triggers 1247c590fb32Scz4e // ------------------------- 1248c590fb32Scz4e stu.io.fromCsrTrigger.tdataVec := tdata 1249c590fb32Scz4e stu.io.fromCsrTrigger.tEnableVec := tEnable 1250c590fb32Scz4e stu.io.fromCsrTrigger.triggerCanRaiseBpExp := triggerCanRaiseBpExp 1251c590fb32Scz4e stu.io.fromCsrTrigger.debugMode := debugMode 1252c590fb32Scz4e 1253c590fb32Scz4e // prefetch 1254c590fb32Scz4e stu.io.prefetch_req <> sbuffer.io.store_prefetch(i) 1255c590fb32Scz4e 1256c590fb32Scz4e // store unit does not need fast feedback 1257c590fb32Scz4e io.mem_to_ooo.staIqFeedback(i).feedbackFast := DontCare 1258c590fb32Scz4e 1259c590fb32Scz4e // Lsq to sta unit 1260c590fb32Scz4e lsq.io.sta.storeMaskIn(i) <> stu.io.st_mask_out 1261c590fb32Scz4e 1262c590fb32Scz4e // connect misalignBuffer 1263c590fb32Scz4e storeMisalignBuffer.io.req(i) <> stu.io.misalign_buf 1264c590fb32Scz4e 1265c590fb32Scz4e if (i == 0) { 1266c590fb32Scz4e stu.io.misalign_stin <> storeMisalignBuffer.io.splitStoreReq 1267c590fb32Scz4e stu.io.misalign_stout <> storeMisalignBuffer.io.splitStoreResp 1268c590fb32Scz4e } else { 1269c590fb32Scz4e stu.io.misalign_stin.valid := false.B 1270c590fb32Scz4e stu.io.misalign_stin.bits := DontCare 1271c590fb32Scz4e } 1272c590fb32Scz4e 1273c590fb32Scz4e // Lsq to std unit's rs 1274c590fb32Scz4e if (i < VstuCnt){ 1275c590fb32Scz4e when (vsSplit(i).io.vstd.get.valid) { 1276c590fb32Scz4e lsq.io.std.storeDataIn(i).valid := true.B 1277c590fb32Scz4e lsq.io.std.storeDataIn(i).bits := vsSplit(i).io.vstd.get.bits 1278c590fb32Scz4e stData(i).ready := false.B 1279c590fb32Scz4e }.otherwise { 1280c590fb32Scz4e lsq.io.std.storeDataIn(i).valid := stData(i).valid && !st_data_atomics(i) 1281c590fb32Scz4e lsq.io.std.storeDataIn(i).bits.uop := stData(i).bits.uop 1282c590fb32Scz4e lsq.io.std.storeDataIn(i).bits.data := stData(i).bits.data 1283c590fb32Scz4e lsq.io.std.storeDataIn(i).bits.mask.map(_ := 0.U) 1284c590fb32Scz4e lsq.io.std.storeDataIn(i).bits.vdIdx.map(_ := 0.U) 1285c590fb32Scz4e lsq.io.std.storeDataIn(i).bits.vdIdxInField.map(_ := 0.U) 1286c590fb32Scz4e stData(i).ready := true.B 1287c590fb32Scz4e } 1288c590fb32Scz4e } else { 1289c590fb32Scz4e lsq.io.std.storeDataIn(i).valid := stData(i).valid && !st_data_atomics(i) 1290c590fb32Scz4e lsq.io.std.storeDataIn(i).bits.uop := stData(i).bits.uop 1291c590fb32Scz4e lsq.io.std.storeDataIn(i).bits.data := stData(i).bits.data 1292c590fb32Scz4e lsq.io.std.storeDataIn(i).bits.mask.map(_ := 0.U) 1293c590fb32Scz4e lsq.io.std.storeDataIn(i).bits.vdIdx.map(_ := 0.U) 1294c590fb32Scz4e lsq.io.std.storeDataIn(i).bits.vdIdxInField.map(_ := 0.U) 1295c590fb32Scz4e stData(i).ready := true.B 1296c590fb32Scz4e } 1297c590fb32Scz4e lsq.io.std.storeDataIn.map(_.bits.debug := 0.U.asTypeOf(new DebugBundle)) 1298c590fb32Scz4e lsq.io.std.storeDataIn.foreach(_.bits.isFromLoadUnit := DontCare) 1299c590fb32Scz4e 1300c590fb32Scz4e 1301c590fb32Scz4e // store prefetch train 1302c590fb32Scz4e l1PrefetcherOpt.foreach(pf => { 1303c590fb32Scz4e // stream will train on all load sources 1304c590fb32Scz4e pf.io.st_in(i).valid := false.B 1305c590fb32Scz4e pf.io.st_in(i).bits := DontCare 1306c590fb32Scz4e }) 1307c590fb32Scz4e 1308c590fb32Scz4e prefetcherOpt.foreach(pf => { 1309c590fb32Scz4e pf.io.st_in(i).valid := Mux(pf_train_on_hit, 1310c590fb32Scz4e stu.io.prefetch_train.valid, 1311c590fb32Scz4e stu.io.prefetch_train.valid && stu.io.prefetch_train.bits.isFirstIssue && ( 1312c590fb32Scz4e stu.io.prefetch_train.bits.miss 1313c590fb32Scz4e ) 1314c590fb32Scz4e ) 1315c590fb32Scz4e pf.io.st_in(i).bits := stu.io.prefetch_train.bits 1316c590fb32Scz4e pf.io.st_in(i).bits.uop.pc := RegEnable(RegEnable(io.ooo_to_mem.storePc(i), stu.io.s1_prefetch_spec), stu.io.s2_prefetch_spec) 1317c590fb32Scz4e }) 1318c590fb32Scz4e 1319c590fb32Scz4e // 1. sync issue info to store set LFST 1320c590fb32Scz4e // 2. when store issue, broadcast issued sqPtr to wake up the following insts 1321c590fb32Scz4e // io.stIn(i).valid := io.issue(exuParameters.LduCnt + i).valid 1322c590fb32Scz4e // io.stIn(i).bits := io.issue(exuParameters.LduCnt + i).bits 1323c590fb32Scz4e io.mem_to_ooo.stIn(i).valid := stu.io.issue.valid 1324c590fb32Scz4e io.mem_to_ooo.stIn(i).bits := stu.io.issue.bits 1325c590fb32Scz4e 1326c590fb32Scz4e stu.io.stout.ready := true.B 1327c590fb32Scz4e 1328c590fb32Scz4e // vector 1329c590fb32Scz4e if (i < VstuCnt) { 1330c590fb32Scz4e stu.io.vecstin <> vsSplit(i).io.out 1331c590fb32Scz4e // vsFlowQueue.io.pipeFeedback(i) <> stu.io.vec_feedback_slow // need connect 1332c590fb32Scz4e } else { 1333c590fb32Scz4e stu.io.vecstin.valid := false.B 1334c590fb32Scz4e stu.io.vecstin.bits := DontCare 1335c590fb32Scz4e stu.io.vecstout.ready := false.B 1336c590fb32Scz4e } 1337c590fb32Scz4e stu.io.vec_isFirstIssue := true.B // TODO 1338c590fb32Scz4e } 1339c590fb32Scz4e 13403c808de0SAnzo val sqOtherStout = WireInit(0.U.asTypeOf(DecoupledIO(new MemExuOutput))) 13413c808de0SAnzo sqOtherStout.valid := lsq.io.mmioStout.valid || lsq.io.cboZeroStout.valid 13423c808de0SAnzo sqOtherStout.bits := Mux(lsq.io.cboZeroStout.valid, lsq.io.cboZeroStout.bits, lsq.io.mmioStout.bits) 13433c808de0SAnzo assert(!(lsq.io.mmioStout.valid && lsq.io.cboZeroStout.valid), "Cannot writeback to mmio and cboZero at the same time.") 13443c808de0SAnzo 13453c808de0SAnzo // Store writeback by StoreQueue: 13463c808de0SAnzo // 1. cbo Zero 13473c808de0SAnzo // 2. mmio 13483c808de0SAnzo // Currently, the two should not be present at the same time, so simply make cbo zero a higher priority. 13493c808de0SAnzo val otherStout = WireInit(0.U.asTypeOf(lsq.io.mmioStout)) 1350c590fb32Scz4e NewPipelineConnect( 13513c808de0SAnzo sqOtherStout, otherStout, otherStout.fire, 1352c590fb32Scz4e false.B, 13533c808de0SAnzo Option("otherStoutConnect") 1354c590fb32Scz4e ) 13553c808de0SAnzo otherStout.ready := false.B 13563c808de0SAnzo when (otherStout.valid && !storeUnits(0).io.stout.valid) { 1357c590fb32Scz4e stOut(0).valid := true.B 13583c808de0SAnzo stOut(0).bits := otherStout.bits 13593c808de0SAnzo otherStout.ready := true.B 1360c590fb32Scz4e } 13613c808de0SAnzo lsq.io.mmioStout.ready := sqOtherStout.ready 13623c808de0SAnzo lsq.io.cboZeroStout.ready := sqOtherStout.ready 1363c590fb32Scz4e 1364c590fb32Scz4e // vec mmio writeback 1365c590fb32Scz4e lsq.io.vecmmioStout.ready := false.B 1366c590fb32Scz4e 1367c590fb32Scz4e // miss align buffer will overwrite stOut(0) 13683c808de0SAnzo val storeMisalignCanWriteBack = !otherStout.valid && !storeUnits(0).io.stout.valid && !storeUnits(0).io.vecstout.valid 1369c590fb32Scz4e storeMisalignBuffer.io.writeBack.ready := storeMisalignCanWriteBack 1370c590fb32Scz4e storeMisalignBuffer.io.storeOutValid := storeUnits(0).io.stout.valid 1371c590fb32Scz4e storeMisalignBuffer.io.storeVecOutValid := storeUnits(0).io.vecstout.valid 1372c590fb32Scz4e when (storeMisalignBuffer.io.writeBack.valid && storeMisalignCanWriteBack) { 1373c590fb32Scz4e stOut(0).valid := true.B 1374c590fb32Scz4e stOut(0).bits := storeMisalignBuffer.io.writeBack.bits 1375c590fb32Scz4e } 1376c590fb32Scz4e 1377c590fb32Scz4e // Uncache 1378c590fb32Scz4e uncache.io.enableOutstanding := io.ooo_to_mem.csrCtrl.uncache_write_outstanding_enable 1379c590fb32Scz4e uncache.io.hartId := io.hartId 1380c590fb32Scz4e lsq.io.uncacheOutstanding := io.ooo_to_mem.csrCtrl.uncache_write_outstanding_enable 1381c590fb32Scz4e 1382c590fb32Scz4e // Lsq 1383c590fb32Scz4e io.mem_to_ooo.lsqio.mmio := lsq.io.rob.mmio 1384c590fb32Scz4e io.mem_to_ooo.lsqio.uop := lsq.io.rob.uop 1385c590fb32Scz4e lsq.io.rob.lcommit := io.ooo_to_mem.lsqio.lcommit 1386c590fb32Scz4e lsq.io.rob.scommit := io.ooo_to_mem.lsqio.scommit 1387c590fb32Scz4e lsq.io.rob.pendingMMIOld := io.ooo_to_mem.lsqio.pendingMMIOld 1388c590fb32Scz4e lsq.io.rob.pendingld := io.ooo_to_mem.lsqio.pendingld 1389c590fb32Scz4e lsq.io.rob.pendingst := io.ooo_to_mem.lsqio.pendingst 1390c590fb32Scz4e lsq.io.rob.pendingVst := io.ooo_to_mem.lsqio.pendingVst 1391c590fb32Scz4e lsq.io.rob.commit := io.ooo_to_mem.lsqio.commit 1392c590fb32Scz4e lsq.io.rob.pendingPtr := io.ooo_to_mem.lsqio.pendingPtr 1393c590fb32Scz4e lsq.io.rob.pendingPtrNext := io.ooo_to_mem.lsqio.pendingPtrNext 1394c590fb32Scz4e 1395c590fb32Scz4e // lsq.io.rob <> io.lsqio.rob 1396c590fb32Scz4e lsq.io.enq <> io.ooo_to_mem.enqLsq 1397c590fb32Scz4e lsq.io.brqRedirect <> redirect 1398c590fb32Scz4e 1399c590fb32Scz4e // violation rollback 1400c590fb32Scz4e def selectOldestRedirect(xs: Seq[Valid[Redirect]]): Vec[Bool] = { 1401c590fb32Scz4e val compareVec = (0 until xs.length).map(i => (0 until i).map(j => isAfter(xs(j).bits.robIdx, xs(i).bits.robIdx))) 1402c590fb32Scz4e val resultOnehot = VecInit((0 until xs.length).map(i => Cat((0 until xs.length).map(j => 1403c590fb32Scz4e (if (j < i) !xs(j).valid || compareVec(i)(j) 1404c590fb32Scz4e else if (j == i) xs(i).valid 1405c590fb32Scz4e else !xs(j).valid || !compareVec(j)(i)) 1406c590fb32Scz4e )).andR)) 1407c590fb32Scz4e resultOnehot 1408c590fb32Scz4e } 1409c590fb32Scz4e val allRedirect = loadUnits.map(_.io.rollback) ++ hybridUnits.map(_.io.ldu_io.rollback) ++ lsq.io.nack_rollback ++ lsq.io.nuke_rollback 1410c590fb32Scz4e val oldestOneHot = selectOldestRedirect(allRedirect) 1411c590fb32Scz4e val oldestRedirect = WireDefault(Mux1H(oldestOneHot, allRedirect)) 1412c590fb32Scz4e // memory replay would not cause IAF/IPF/IGPF 1413c590fb32Scz4e oldestRedirect.bits.cfiUpdate.backendIAF := false.B 1414c590fb32Scz4e oldestRedirect.bits.cfiUpdate.backendIPF := false.B 1415c590fb32Scz4e oldestRedirect.bits.cfiUpdate.backendIGPF := false.B 1416c590fb32Scz4e io.mem_to_ooo.memoryViolation := oldestRedirect 1417c590fb32Scz4e io.mem_to_ooo.lsqio.lqCanAccept := lsq.io.lqCanAccept 1418c590fb32Scz4e io.mem_to_ooo.lsqio.sqCanAccept := lsq.io.sqCanAccept 1419c590fb32Scz4e 1420c590fb32Scz4e // lsq.io.uncache <> uncache.io.lsq 1421c590fb32Scz4e val s_idle :: s_scalar_uncache :: s_vector_uncache :: Nil = Enum(3) 1422c590fb32Scz4e val uncacheState = RegInit(s_idle) 1423c590fb32Scz4e val uncacheReq = Wire(Decoupled(new UncacheWordReq)) 1424c590fb32Scz4e val uncacheIdResp = uncache.io.lsq.idResp 1425c590fb32Scz4e val uncacheResp = Wire(Decoupled(new UncacheWordResp)) 1426c590fb32Scz4e 1427c590fb32Scz4e uncacheReq.bits := DontCare 1428c590fb32Scz4e uncacheReq.valid := false.B 1429c590fb32Scz4e uncacheReq.ready := false.B 1430c590fb32Scz4e uncacheResp.bits := DontCare 1431c590fb32Scz4e uncacheResp.valid := false.B 1432c590fb32Scz4e uncacheResp.ready := false.B 1433c590fb32Scz4e lsq.io.uncache.req.ready := false.B 1434c590fb32Scz4e lsq.io.uncache.idResp.valid := false.B 1435c590fb32Scz4e lsq.io.uncache.idResp.bits := DontCare 1436c590fb32Scz4e lsq.io.uncache.resp.valid := false.B 1437c590fb32Scz4e lsq.io.uncache.resp.bits := DontCare 1438c590fb32Scz4e 1439c590fb32Scz4e switch (uncacheState) { 1440c590fb32Scz4e is (s_idle) { 1441c590fb32Scz4e when (uncacheReq.fire) { 1442c590fb32Scz4e when (lsq.io.uncache.req.valid) { 1443c590fb32Scz4e when (!lsq.io.uncache.req.bits.nc || !io.ooo_to_mem.csrCtrl.uncache_write_outstanding_enable) { 1444c590fb32Scz4e uncacheState := s_scalar_uncache 1445c590fb32Scz4e } 1446c590fb32Scz4e }.otherwise { 1447c590fb32Scz4e // val isStore = vsFlowQueue.io.uncache.req.bits.cmd === MemoryOpConstants.M_XWR 1448c590fb32Scz4e when (!io.ooo_to_mem.csrCtrl.uncache_write_outstanding_enable) { 1449c590fb32Scz4e uncacheState := s_vector_uncache 1450c590fb32Scz4e } 1451c590fb32Scz4e } 1452c590fb32Scz4e } 1453c590fb32Scz4e } 1454c590fb32Scz4e 1455c590fb32Scz4e is (s_scalar_uncache) { 1456c590fb32Scz4e when (uncacheResp.fire) { 1457c590fb32Scz4e uncacheState := s_idle 1458c590fb32Scz4e } 1459c590fb32Scz4e } 1460c590fb32Scz4e 1461c590fb32Scz4e is (s_vector_uncache) { 1462c590fb32Scz4e when (uncacheResp.fire) { 1463c590fb32Scz4e uncacheState := s_idle 1464c590fb32Scz4e } 1465c590fb32Scz4e } 1466c590fb32Scz4e } 1467c590fb32Scz4e 1468c590fb32Scz4e when (lsq.io.uncache.req.valid) { 1469c590fb32Scz4e uncacheReq <> lsq.io.uncache.req 1470c590fb32Scz4e } 1471c590fb32Scz4e when (io.ooo_to_mem.csrCtrl.uncache_write_outstanding_enable) { 1472c590fb32Scz4e lsq.io.uncache.resp <> uncacheResp 1473c590fb32Scz4e lsq.io.uncache.idResp <> uncacheIdResp 1474c590fb32Scz4e }.otherwise { 1475c590fb32Scz4e when (uncacheState === s_scalar_uncache) { 1476c590fb32Scz4e lsq.io.uncache.resp <> uncacheResp 1477c590fb32Scz4e lsq.io.uncache.idResp <> uncacheIdResp 1478c590fb32Scz4e } 1479c590fb32Scz4e } 1480c590fb32Scz4e // delay dcache refill for 1 cycle for better timing 1481c590fb32Scz4e AddPipelineReg(uncacheReq, uncache.io.lsq.req, false.B) 1482c590fb32Scz4e AddPipelineReg(uncache.io.lsq.resp, uncacheResp, false.B) 1483c590fb32Scz4e 1484c590fb32Scz4e //lsq.io.refill := delayedDcacheRefill 1485c590fb32Scz4e lsq.io.release := dcache.io.lsu.release 1486c590fb32Scz4e lsq.io.lqCancelCnt <> io.mem_to_ooo.lqCancelCnt 1487c590fb32Scz4e lsq.io.sqCancelCnt <> io.mem_to_ooo.sqCancelCnt 1488c590fb32Scz4e lsq.io.lqDeq <> io.mem_to_ooo.lqDeq 1489c590fb32Scz4e lsq.io.sqDeq <> io.mem_to_ooo.sqDeq 1490c590fb32Scz4e // Todo: assign these 1491c590fb32Scz4e io.mem_to_ooo.sqDeqPtr := lsq.io.sqDeqPtr 1492c590fb32Scz4e io.mem_to_ooo.lqDeqPtr := lsq.io.lqDeqPtr 1493c590fb32Scz4e lsq.io.tl_d_channel <> dcache.io.lsu.tl_d_channel 1494c590fb32Scz4e 1495c590fb32Scz4e // LSQ to store buffer 1496c590fb32Scz4e lsq.io.sbuffer <> sbuffer.io.in 1497c590fb32Scz4e sbuffer.io.in(0).valid := lsq.io.sbuffer(0).valid || vSegmentUnit.io.sbuffer.valid 1498c590fb32Scz4e sbuffer.io.in(0).bits := Mux1H(Seq( 1499c590fb32Scz4e vSegmentUnit.io.sbuffer.valid -> vSegmentUnit.io.sbuffer.bits, 1500c590fb32Scz4e lsq.io.sbuffer(0).valid -> lsq.io.sbuffer(0).bits 1501c590fb32Scz4e )) 1502c590fb32Scz4e vSegmentUnit.io.sbuffer.ready := sbuffer.io.in(0).ready 1503c590fb32Scz4e lsq.io.sqEmpty <> sbuffer.io.sqempty 1504c590fb32Scz4e dcache.io.force_write := lsq.io.force_write 1505c590fb32Scz4e 1506c590fb32Scz4e // Initialize when unenabled difftest. 1507c590fb32Scz4e sbuffer.io.vecDifftestInfo := DontCare 1508c590fb32Scz4e lsq.io.sbufferVecDifftestInfo := DontCare 1509c590fb32Scz4e vSegmentUnit.io.vecDifftestInfo := DontCare 1510c590fb32Scz4e if (env.EnableDifftest) { 1511c590fb32Scz4e sbuffer.io.vecDifftestInfo .zipWithIndex.map{ case (sbufferPort, index) => 1512c590fb32Scz4e if (index == 0) { 1513c590fb32Scz4e val vSegmentDifftestValid = vSegmentUnit.io.vecDifftestInfo.valid 1514c590fb32Scz4e sbufferPort.valid := Mux(vSegmentDifftestValid, vSegmentUnit.io.vecDifftestInfo.valid, lsq.io.sbufferVecDifftestInfo(0).valid) 1515c590fb32Scz4e sbufferPort.bits := Mux(vSegmentDifftestValid, vSegmentUnit.io.vecDifftestInfo.bits, lsq.io.sbufferVecDifftestInfo(0).bits) 1516c590fb32Scz4e 1517c590fb32Scz4e vSegmentUnit.io.vecDifftestInfo.ready := sbufferPort.ready 1518c590fb32Scz4e lsq.io.sbufferVecDifftestInfo(0).ready := sbufferPort.ready 1519c590fb32Scz4e } else { 1520c590fb32Scz4e sbufferPort <> lsq.io.sbufferVecDifftestInfo(index) 1521c590fb32Scz4e } 1522c590fb32Scz4e } 1523c590fb32Scz4e } 1524c590fb32Scz4e 1525c590fb32Scz4e // lsq.io.vecStoreRetire <> vsFlowQueue.io.sqRelease 1526c590fb32Scz4e // lsq.io.vecWriteback.valid := vlWrapper.io.uopWriteback.fire && 1527c590fb32Scz4e // vlWrapper.io.uopWriteback.bits.uop.vpu.lastUop 1528c590fb32Scz4e // lsq.io.vecWriteback.bits := vlWrapper.io.uopWriteback.bits 1529c590fb32Scz4e 1530c590fb32Scz4e // vector 1531c590fb32Scz4e val vLoadCanAccept = (0 until VlduCnt).map(i => 1532c590fb32Scz4e vlSplit(i).io.in.ready && VlduType.isVecLd(io.ooo_to_mem.issueVldu(i).bits.uop.fuOpType) 1533c590fb32Scz4e ) 1534c590fb32Scz4e val vStoreCanAccept = (0 until VstuCnt).map(i => 1535c590fb32Scz4e vsSplit(i).io.in.ready && VstuType.isVecSt(io.ooo_to_mem.issueVldu(i).bits.uop.fuOpType) 1536c590fb32Scz4e ) 1537c590fb32Scz4e val isSegment = io.ooo_to_mem.issueVldu.head.valid && isVsegls(io.ooo_to_mem.issueVldu.head.bits.uop.fuType) 1538c590fb32Scz4e val isFixVlUop = io.ooo_to_mem.issueVldu.map{x => 1539c590fb32Scz4e x.bits.uop.vpu.isVleff && x.bits.uop.vpu.lastUop && x.valid 1540c590fb32Scz4e } 1541c590fb32Scz4e 1542c590fb32Scz4e // init port 1543c590fb32Scz4e /** 1544c590fb32Scz4e * TODO: splited vsMergebuffer maybe remove, if one RS can accept two feedback, or don't need RS replay uop 1545c590fb32Scz4e * for now: 1546c590fb32Scz4e * RS0 -> VsSplit0 -> stu0 -> vsMergebuffer0 -> feedback -> RS0 1547c590fb32Scz4e * RS1 -> VsSplit1 -> stu1 -> vsMergebuffer1 -> feedback -> RS1 1548c590fb32Scz4e * 1549c590fb32Scz4e * vector load don't need feedback 1550c590fb32Scz4e * 1551c590fb32Scz4e * RS0 -> VlSplit0 -> ldu0 -> | 1552c590fb32Scz4e * RS1 -> VlSplit1 -> ldu1 -> | -> vlMergebuffer 1553c590fb32Scz4e * replayIO -> ldu3 -> | 1554c590fb32Scz4e * */ 1555c590fb32Scz4e (0 until VstuCnt).foreach{i => 1556c590fb32Scz4e vsMergeBuffer(i).io.fromPipeline := DontCare 1557c590fb32Scz4e vsMergeBuffer(i).io.fromSplit := DontCare 1558c590fb32Scz4e 1559c590fb32Scz4e vsMergeBuffer(i).io.fromMisalignBuffer.get.flush := storeMisalignBuffer.io.toVecStoreMergeBuffer(i).flush 1560c590fb32Scz4e vsMergeBuffer(i).io.fromMisalignBuffer.get.mbIndex := storeMisalignBuffer.io.toVecStoreMergeBuffer(i).mbIndex 1561c590fb32Scz4e } 1562c590fb32Scz4e 1563c590fb32Scz4e (0 until VstuCnt).foreach{i => 1564c590fb32Scz4e vsSplit(i).io.redirect <> redirect 1565c590fb32Scz4e vsSplit(i).io.in <> io.ooo_to_mem.issueVldu(i) 1566c590fb32Scz4e vsSplit(i).io.in.valid := io.ooo_to_mem.issueVldu(i).valid && 1567c590fb32Scz4e vStoreCanAccept(i) && !isSegment 1568c590fb32Scz4e vsSplit(i).io.toMergeBuffer <> vsMergeBuffer(i).io.fromSplit.head 1569c590fb32Scz4e NewPipelineConnect( 1570c590fb32Scz4e vsSplit(i).io.out, storeUnits(i).io.vecstin, storeUnits(i).io.vecstin.fire, 1571c590fb32Scz4e Mux(vsSplit(i).io.out.fire, vsSplit(i).io.out.bits.uop.robIdx.needFlush(io.redirect), storeUnits(i).io.vecstin.bits.uop.robIdx.needFlush(io.redirect)), 1572c590fb32Scz4e Option("VsSplitConnectStu") 1573c590fb32Scz4e ) 1574c590fb32Scz4e vsSplit(i).io.vstd.get := DontCare // Todo: Discuss how to pass vector store data 1575c590fb32Scz4e 1576c590fb32Scz4e vsSplit(i).io.vstdMisalign.get.storeMisalignBufferEmpty := !storeMisalignBuffer.io.full 1577c590fb32Scz4e vsSplit(i).io.vstdMisalign.get.storePipeEmpty := !storeUnits(i).io.s0_s1_valid 1578c590fb32Scz4e 1579c590fb32Scz4e } 1580c590fb32Scz4e (0 until VlduCnt).foreach{i => 1581c590fb32Scz4e vlSplit(i).io.redirect <> redirect 1582c590fb32Scz4e vlSplit(i).io.in <> io.ooo_to_mem.issueVldu(i) 1583c590fb32Scz4e vlSplit(i).io.in.valid := io.ooo_to_mem.issueVldu(i).valid && 1584c590fb32Scz4e vLoadCanAccept(i) && !isSegment && !isFixVlUop(i) 1585c590fb32Scz4e vlSplit(i).io.toMergeBuffer <> vlMergeBuffer.io.fromSplit(i) 1586c590fb32Scz4e vlSplit(i).io.threshold.get.valid := vlMergeBuffer.io.toSplit.get.threshold 1587c590fb32Scz4e vlSplit(i).io.threshold.get.bits := lsq.io.lqDeqPtr 1588c590fb32Scz4e NewPipelineConnect( 1589c590fb32Scz4e vlSplit(i).io.out, loadUnits(i).io.vecldin, loadUnits(i).io.vecldin.fire, 1590c590fb32Scz4e Mux(vlSplit(i).io.out.fire, vlSplit(i).io.out.bits.uop.robIdx.needFlush(io.redirect), loadUnits(i).io.vecldin.bits.uop.robIdx.needFlush(io.redirect)), 1591c590fb32Scz4e Option("VlSplitConnectLdu") 1592c590fb32Scz4e ) 1593c590fb32Scz4e 1594c590fb32Scz4e //Subsequent instrction will be blocked 1595c590fb32Scz4e vfofBuffer.io.in(i).valid := io.ooo_to_mem.issueVldu(i).valid 1596c590fb32Scz4e vfofBuffer.io.in(i).bits := io.ooo_to_mem.issueVldu(i).bits 1597c590fb32Scz4e } 1598c590fb32Scz4e (0 until LduCnt).foreach{i=> 1599c590fb32Scz4e loadUnits(i).io.vecldout.ready := vlMergeBuffer.io.fromPipeline(i).ready 1600c590fb32Scz4e loadMisalignBuffer.io.vecWriteBack.ready := true.B 1601c590fb32Scz4e 1602c590fb32Scz4e if (i == MisalignWBPort) { 1603c590fb32Scz4e when(loadUnits(i).io.vecldout.valid) { 1604c590fb32Scz4e vlMergeBuffer.io.fromPipeline(i).valid := loadUnits(i).io.vecldout.valid 1605c590fb32Scz4e vlMergeBuffer.io.fromPipeline(i).bits := loadUnits(i).io.vecldout.bits 1606c590fb32Scz4e } .otherwise { 1607c590fb32Scz4e vlMergeBuffer.io.fromPipeline(i).valid := loadMisalignBuffer.io.vecWriteBack.valid 1608c590fb32Scz4e vlMergeBuffer.io.fromPipeline(i).bits := loadMisalignBuffer.io.vecWriteBack.bits 1609c590fb32Scz4e } 1610c590fb32Scz4e } else { 1611c590fb32Scz4e vlMergeBuffer.io.fromPipeline(i).valid := loadUnits(i).io.vecldout.valid 1612c590fb32Scz4e vlMergeBuffer.io.fromPipeline(i).bits := loadUnits(i).io.vecldout.bits 1613c590fb32Scz4e } 1614c590fb32Scz4e } 1615c590fb32Scz4e 1616c590fb32Scz4e (0 until StaCnt).foreach{i=> 1617c590fb32Scz4e if(i < VstuCnt){ 1618c590fb32Scz4e storeUnits(i).io.vecstout.ready := true.B 1619c590fb32Scz4e storeMisalignBuffer.io.vecWriteBack(i).ready := vsMergeBuffer(i).io.fromPipeline.head.ready 1620c590fb32Scz4e 1621c590fb32Scz4e when(storeUnits(i).io.vecstout.valid) { 1622c590fb32Scz4e vsMergeBuffer(i).io.fromPipeline.head.valid := storeUnits(i).io.vecstout.valid 1623c590fb32Scz4e vsMergeBuffer(i).io.fromPipeline.head.bits := storeUnits(i).io.vecstout.bits 1624c590fb32Scz4e } .otherwise { 1625c590fb32Scz4e vsMergeBuffer(i).io.fromPipeline.head.valid := storeMisalignBuffer.io.vecWriteBack(i).valid 1626c590fb32Scz4e vsMergeBuffer(i).io.fromPipeline.head.bits := storeMisalignBuffer.io.vecWriteBack(i).bits 1627c590fb32Scz4e } 1628c590fb32Scz4e } 1629c590fb32Scz4e } 1630c590fb32Scz4e 1631c590fb32Scz4e (0 until VlduCnt).foreach{i=> 1632c590fb32Scz4e io.ooo_to_mem.issueVldu(i).ready := vLoadCanAccept(i) || vStoreCanAccept(i) 1633c590fb32Scz4e } 1634c590fb32Scz4e 1635c590fb32Scz4e vlMergeBuffer.io.redirect <> redirect 1636c590fb32Scz4e vsMergeBuffer.map(_.io.redirect <> redirect) 1637c590fb32Scz4e (0 until VlduCnt).foreach{i=> 1638c590fb32Scz4e vlMergeBuffer.io.toLsq(i) <> lsq.io.ldvecFeedback(i) 1639c590fb32Scz4e } 1640c590fb32Scz4e (0 until VstuCnt).foreach{i=> 1641c590fb32Scz4e vsMergeBuffer(i).io.toLsq.head <> lsq.io.stvecFeedback(i) 1642c590fb32Scz4e } 1643c590fb32Scz4e 1644c590fb32Scz4e (0 until VlduCnt).foreach{i=> 1645c590fb32Scz4e // send to RS 1646c590fb32Scz4e vlMergeBuffer.io.feedback(i) <> io.mem_to_ooo.vlduIqFeedback(i).feedbackSlow 1647c590fb32Scz4e io.mem_to_ooo.vlduIqFeedback(i).feedbackFast := DontCare 1648c590fb32Scz4e } 1649c590fb32Scz4e (0 until VstuCnt).foreach{i => 1650c590fb32Scz4e // send to RS 1651c590fb32Scz4e if (i == 0){ 1652c590fb32Scz4e io.mem_to_ooo.vstuIqFeedback(i).feedbackSlow.valid := vsMergeBuffer(i).io.feedback.head.valid || vSegmentUnit.io.feedback.valid 1653c590fb32Scz4e io.mem_to_ooo.vstuIqFeedback(i).feedbackSlow.bits := Mux1H(Seq( 1654c590fb32Scz4e vSegmentUnit.io.feedback.valid -> vSegmentUnit.io.feedback.bits, 1655c590fb32Scz4e vsMergeBuffer(i).io.feedback.head.valid -> vsMergeBuffer(i).io.feedback.head.bits 1656c590fb32Scz4e )) 1657c590fb32Scz4e io.mem_to_ooo.vstuIqFeedback(i).feedbackFast := DontCare 1658c590fb32Scz4e } else { 1659c590fb32Scz4e vsMergeBuffer(i).io.feedback.head <> io.mem_to_ooo.vstuIqFeedback(i).feedbackSlow 1660c590fb32Scz4e io.mem_to_ooo.vstuIqFeedback(i).feedbackFast := DontCare 1661c590fb32Scz4e } 1662c590fb32Scz4e } 1663c590fb32Scz4e 1664c590fb32Scz4e (0 until VlduCnt).foreach{i=> 1665c590fb32Scz4e if (i == 0){ // for segmentUnit, segmentUnit use port0 writeback 1666c590fb32Scz4e io.mem_to_ooo.writebackVldu(i).valid := vlMergeBuffer.io.uopWriteback(i).valid || vsMergeBuffer(i).io.uopWriteback.head.valid || vSegmentUnit.io.uopwriteback.valid 1667c590fb32Scz4e io.mem_to_ooo.writebackVldu(i).bits := PriorityMux(Seq( 1668c590fb32Scz4e vSegmentUnit.io.uopwriteback.valid -> vSegmentUnit.io.uopwriteback.bits, 1669c590fb32Scz4e vlMergeBuffer.io.uopWriteback(i).valid -> vlMergeBuffer.io.uopWriteback(i).bits, 1670c590fb32Scz4e vsMergeBuffer(i).io.uopWriteback.head.valid -> vsMergeBuffer(i).io.uopWriteback.head.bits, 1671c590fb32Scz4e )) 1672c590fb32Scz4e vlMergeBuffer.io.uopWriteback(i).ready := io.mem_to_ooo.writebackVldu(i).ready && !vSegmentUnit.io.uopwriteback.valid 1673c590fb32Scz4e vsMergeBuffer(i).io.uopWriteback.head.ready := io.mem_to_ooo.writebackVldu(i).ready && !vlMergeBuffer.io.uopWriteback(i).valid && !vSegmentUnit.io.uopwriteback.valid 1674c590fb32Scz4e vSegmentUnit.io.uopwriteback.ready := io.mem_to_ooo.writebackVldu(i).ready 1675c590fb32Scz4e } else if (i == 1) { 1676c590fb32Scz4e io.mem_to_ooo.writebackVldu(i).valid := vlMergeBuffer.io.uopWriteback(i).valid || vsMergeBuffer(i).io.uopWriteback.head.valid || vfofBuffer.io.uopWriteback.valid 1677c590fb32Scz4e io.mem_to_ooo.writebackVldu(i).bits := PriorityMux(Seq( 1678c590fb32Scz4e vfofBuffer.io.uopWriteback.valid -> vfofBuffer.io.uopWriteback.bits, 1679c590fb32Scz4e vlMergeBuffer.io.uopWriteback(i).valid -> vlMergeBuffer.io.uopWriteback(i).bits, 1680c590fb32Scz4e vsMergeBuffer(i).io.uopWriteback.head.valid -> vsMergeBuffer(i).io.uopWriteback.head.bits, 1681c590fb32Scz4e )) 1682c590fb32Scz4e vlMergeBuffer.io.uopWriteback(i).ready := io.mem_to_ooo.writebackVldu(i).ready && !vfofBuffer.io.uopWriteback.valid 1683c590fb32Scz4e vsMergeBuffer(i).io.uopWriteback.head.ready := io.mem_to_ooo.writebackVldu(i).ready && !vlMergeBuffer.io.uopWriteback(i).valid && !vfofBuffer.io.uopWriteback.valid 1684c590fb32Scz4e vfofBuffer.io.uopWriteback.ready := io.mem_to_ooo.writebackVldu(i).ready 1685c590fb32Scz4e } else { 1686c590fb32Scz4e io.mem_to_ooo.writebackVldu(i).valid := vlMergeBuffer.io.uopWriteback(i).valid || vsMergeBuffer(i).io.uopWriteback.head.valid 1687c590fb32Scz4e io.mem_to_ooo.writebackVldu(i).bits := PriorityMux(Seq( 1688c590fb32Scz4e vlMergeBuffer.io.uopWriteback(i).valid -> vlMergeBuffer.io.uopWriteback(i).bits, 1689c590fb32Scz4e vsMergeBuffer(i).io.uopWriteback.head.valid -> vsMergeBuffer(i).io.uopWriteback.head.bits, 1690c590fb32Scz4e )) 1691c590fb32Scz4e vlMergeBuffer.io.uopWriteback(i).ready := io.mem_to_ooo.writebackVldu(i).ready 1692c590fb32Scz4e vsMergeBuffer(i).io.uopWriteback.head.ready := io.mem_to_ooo.writebackVldu(i).ready && !vlMergeBuffer.io.uopWriteback(i).valid 1693c590fb32Scz4e } 1694c590fb32Scz4e 1695c590fb32Scz4e vfofBuffer.io.mergeUopWriteback(i).valid := vlMergeBuffer.io.uopWriteback(i).valid 1696c590fb32Scz4e vfofBuffer.io.mergeUopWriteback(i).bits := vlMergeBuffer.io.uopWriteback(i).bits 1697c590fb32Scz4e } 1698c590fb32Scz4e 1699c590fb32Scz4e 1700c590fb32Scz4e vfofBuffer.io.redirect <> redirect 1701c590fb32Scz4e 1702c590fb32Scz4e // Sbuffer 1703c590fb32Scz4e sbuffer.io.csrCtrl <> csrCtrl 1704c590fb32Scz4e sbuffer.io.dcache <> dcache.io.lsu.store 1705c590fb32Scz4e sbuffer.io.memSetPattenDetected := dcache.io.memSetPattenDetected 1706c590fb32Scz4e sbuffer.io.force_write <> lsq.io.force_write 1707c590fb32Scz4e // flush sbuffer 1708c590fb32Scz4e val cmoFlush = lsq.io.flushSbuffer.valid 1709c590fb32Scz4e val fenceFlush = io.ooo_to_mem.flushSb 1710c590fb32Scz4e val atomicsFlush = atomicsUnit.io.flush_sbuffer.valid || vSegmentUnit.io.flush_sbuffer.valid 1711c590fb32Scz4e val stIsEmpty = sbuffer.io.flush.empty && uncache.io.flush.empty 1712c590fb32Scz4e io.mem_to_ooo.sbIsEmpty := RegNext(stIsEmpty) 1713c590fb32Scz4e 1714c590fb32Scz4e // if both of them tries to flush sbuffer at the same time 1715c590fb32Scz4e // something must have gone wrong 1716c590fb32Scz4e assert(!(fenceFlush && atomicsFlush && cmoFlush)) 1717c590fb32Scz4e sbuffer.io.flush.valid := RegNext(fenceFlush || atomicsFlush || cmoFlush) 1718c590fb32Scz4e uncache.io.flush.valid := sbuffer.io.flush.valid 1719c590fb32Scz4e 1720c590fb32Scz4e // AtomicsUnit: AtomicsUnit will override other control signials, 1721c590fb32Scz4e // as atomics insts (LR/SC/AMO) will block the pipeline 1722c590fb32Scz4e val s_normal +: s_atomics = Enum(StaCnt + HyuCnt + 1) 1723c590fb32Scz4e val state = RegInit(s_normal) 1724c590fb32Scz4e 1725c590fb32Scz4e val st_atomics = Seq.tabulate(StaCnt)(i => 1726c590fb32Scz4e io.ooo_to_mem.issueSta(i).valid && FuType.storeIsAMO((io.ooo_to_mem.issueSta(i).bits.uop.fuType)) 1727c590fb32Scz4e ) ++ Seq.tabulate(HyuCnt)(i => 1728c590fb32Scz4e io.ooo_to_mem.issueHya(i).valid && FuType.storeIsAMO((io.ooo_to_mem.issueHya(i).bits.uop.fuType)) 1729c590fb32Scz4e ) 1730c590fb32Scz4e 1731c590fb32Scz4e for (i <- 0 until StaCnt) when(st_atomics(i)) { 1732c590fb32Scz4e io.ooo_to_mem.issueSta(i).ready := atomicsUnit.io.in.ready 1733c590fb32Scz4e storeUnits(i).io.stin.valid := false.B 1734c590fb32Scz4e 1735c590fb32Scz4e state := s_atomics(i) 1736c590fb32Scz4e } 1737c590fb32Scz4e for (i <- 0 until HyuCnt) when(st_atomics(StaCnt + i)) { 1738c590fb32Scz4e io.ooo_to_mem.issueHya(i).ready := atomicsUnit.io.in.ready 1739c590fb32Scz4e hybridUnits(i).io.lsin.valid := false.B 1740c590fb32Scz4e 1741c590fb32Scz4e state := s_atomics(StaCnt + i) 1742c590fb32Scz4e assert(!st_atomics.zipWithIndex.filterNot(_._2 == StaCnt + i).unzip._1.reduce(_ || _)) 1743c590fb32Scz4e } 1744c590fb32Scz4e when (atomicsUnit.io.out.valid) { 1745c590fb32Scz4e state := s_normal 1746c590fb32Scz4e } 1747c590fb32Scz4e 1748c590fb32Scz4e atomicsUnit.io.in.valid := st_atomics.reduce(_ || _) 1749c590fb32Scz4e atomicsUnit.io.in.bits := Mux1H(Seq.tabulate(StaCnt)(i => 1750c590fb32Scz4e st_atomics(i) -> io.ooo_to_mem.issueSta(i).bits) ++ 1751c590fb32Scz4e Seq.tabulate(HyuCnt)(i => st_atomics(StaCnt+i) -> io.ooo_to_mem.issueHya(i).bits)) 1752c590fb32Scz4e atomicsUnit.io.storeDataIn.zipWithIndex.foreach { case (stdin, i) => 1753c590fb32Scz4e stdin.valid := st_data_atomics(i) 1754c590fb32Scz4e stdin.bits := stData(i).bits 1755c590fb32Scz4e } 1756c590fb32Scz4e atomicsUnit.io.redirect <> redirect 1757c590fb32Scz4e 1758c590fb32Scz4e // TODO: complete amo's pmp support 1759c590fb32Scz4e val amoTlb = dtlb_ld(0).requestor(0) 1760c590fb32Scz4e atomicsUnit.io.dtlb.resp.valid := false.B 1761c590fb32Scz4e atomicsUnit.io.dtlb.resp.bits := DontCare 1762c590fb32Scz4e atomicsUnit.io.dtlb.req.ready := amoTlb.req.ready 1763c590fb32Scz4e atomicsUnit.io.pmpResp := pmp_check(0).resp 1764c590fb32Scz4e 1765c590fb32Scz4e atomicsUnit.io.dcache <> dcache.io.lsu.atomics 1766c590fb32Scz4e atomicsUnit.io.flush_sbuffer.empty := stIsEmpty 1767c590fb32Scz4e 1768c590fb32Scz4e atomicsUnit.io.csrCtrl := csrCtrl 1769c590fb32Scz4e 1770c590fb32Scz4e // for atomicsUnit, it uses loadUnit(0)'s TLB port 1771c590fb32Scz4e 1772c590fb32Scz4e when (state =/= s_normal) { 1773c590fb32Scz4e // use store wb port instead of load 1774c590fb32Scz4e loadUnits(0).io.ldout.ready := false.B 1775c590fb32Scz4e // use load_0's TLB 1776c590fb32Scz4e atomicsUnit.io.dtlb <> amoTlb 1777c590fb32Scz4e 1778c590fb32Scz4e // hw prefetch should be disabled while executing atomic insts 1779c590fb32Scz4e loadUnits.map(i => i.io.prefetch_req.valid := false.B) 1780c590fb32Scz4e 1781c590fb32Scz4e // make sure there's no in-flight uops in load unit 1782c590fb32Scz4e assert(!loadUnits(0).io.ldout.valid) 1783c590fb32Scz4e } 1784c590fb32Scz4e 1785c590fb32Scz4e lsq.io.flushSbuffer.empty := sbuffer.io.sbempty 1786c590fb32Scz4e 1787c590fb32Scz4e for (i <- 0 until StaCnt) { 1788c590fb32Scz4e when (state === s_atomics(i)) { 1789c590fb32Scz4e io.mem_to_ooo.staIqFeedback(i).feedbackSlow := atomicsUnit.io.feedbackSlow 1790c590fb32Scz4e assert(!storeUnits(i).io.feedback_slow.valid) 1791c590fb32Scz4e } 1792c590fb32Scz4e } 1793c590fb32Scz4e for (i <- 0 until HyuCnt) { 1794c590fb32Scz4e when (state === s_atomics(StaCnt + i)) { 1795c590fb32Scz4e io.mem_to_ooo.hyuIqFeedback(i).feedbackSlow := atomicsUnit.io.feedbackSlow 1796c590fb32Scz4e assert(!hybridUnits(i).io.feedback_slow.valid) 1797c590fb32Scz4e } 1798c590fb32Scz4e } 1799c590fb32Scz4e 1800c590fb32Scz4e lsq.io.exceptionAddr.isStore := io.ooo_to_mem.isStoreException 1801c590fb32Scz4e // Exception address is used several cycles after flush. 1802c590fb32Scz4e // We delay it by 10 cycles to ensure its flush safety. 1803c590fb32Scz4e val atomicsException = RegInit(false.B) 1804c590fb32Scz4e when (DelayN(redirect.valid, 10) && atomicsException) { 1805c590fb32Scz4e atomicsException := false.B 1806c590fb32Scz4e }.elsewhen (atomicsUnit.io.exceptionInfo.valid) { 1807c590fb32Scz4e atomicsException := true.B 1808c590fb32Scz4e } 1809c590fb32Scz4e 1810c590fb32Scz4e val misalignBufExceptionOverwrite = loadMisalignBuffer.io.overwriteExpBuf.valid || storeMisalignBuffer.io.overwriteExpBuf.valid 1811c590fb32Scz4e val misalignBufExceptionVaddr = Mux(loadMisalignBuffer.io.overwriteExpBuf.valid, 1812c590fb32Scz4e loadMisalignBuffer.io.overwriteExpBuf.vaddr, 1813c590fb32Scz4e storeMisalignBuffer.io.overwriteExpBuf.vaddr 1814c590fb32Scz4e ) 1815c590fb32Scz4e val misalignBufExceptionIsHyper = Mux(loadMisalignBuffer.io.overwriteExpBuf.valid, 1816c590fb32Scz4e loadMisalignBuffer.io.overwriteExpBuf.isHyper, 1817c590fb32Scz4e storeMisalignBuffer.io.overwriteExpBuf.isHyper 1818c590fb32Scz4e ) 1819c590fb32Scz4e val misalignBufExceptionGpaddr = Mux(loadMisalignBuffer.io.overwriteExpBuf.valid, 1820c590fb32Scz4e loadMisalignBuffer.io.overwriteExpBuf.gpaddr, 1821c590fb32Scz4e storeMisalignBuffer.io.overwriteExpBuf.gpaddr 1822c590fb32Scz4e ) 1823c590fb32Scz4e val misalignBufExceptionIsForVSnonLeafPTE = Mux(loadMisalignBuffer.io.overwriteExpBuf.valid, 1824c590fb32Scz4e loadMisalignBuffer.io.overwriteExpBuf.isForVSnonLeafPTE, 1825c590fb32Scz4e storeMisalignBuffer.io.overwriteExpBuf.isForVSnonLeafPTE 1826c590fb32Scz4e ) 1827c590fb32Scz4e 1828c590fb32Scz4e val vSegmentException = RegInit(false.B) 1829c590fb32Scz4e when (DelayN(redirect.valid, 10) && vSegmentException) { 1830c590fb32Scz4e vSegmentException := false.B 1831c590fb32Scz4e }.elsewhen (vSegmentUnit.io.exceptionInfo.valid) { 1832c590fb32Scz4e vSegmentException := true.B 1833c590fb32Scz4e } 1834c590fb32Scz4e val atomicsExceptionAddress = RegEnable(atomicsUnit.io.exceptionInfo.bits.vaddr, atomicsUnit.io.exceptionInfo.valid) 1835c590fb32Scz4e val vSegmentExceptionVstart = RegEnable(vSegmentUnit.io.exceptionInfo.bits.vstart, vSegmentUnit.io.exceptionInfo.valid) 1836c590fb32Scz4e val vSegmentExceptionVl = RegEnable(vSegmentUnit.io.exceptionInfo.bits.vl, vSegmentUnit.io.exceptionInfo.valid) 1837c590fb32Scz4e val vSegmentExceptionAddress = RegEnable(vSegmentUnit.io.exceptionInfo.bits.vaddr, vSegmentUnit.io.exceptionInfo.valid) 1838c590fb32Scz4e val atomicsExceptionGPAddress = RegEnable(atomicsUnit.io.exceptionInfo.bits.gpaddr, atomicsUnit.io.exceptionInfo.valid) 1839c590fb32Scz4e val vSegmentExceptionGPAddress = RegEnable(vSegmentUnit.io.exceptionInfo.bits.gpaddr, vSegmentUnit.io.exceptionInfo.valid) 1840c590fb32Scz4e val atomicsExceptionIsForVSnonLeafPTE = RegEnable(atomicsUnit.io.exceptionInfo.bits.isForVSnonLeafPTE, atomicsUnit.io.exceptionInfo.valid) 1841c590fb32Scz4e val vSegmentExceptionIsForVSnonLeafPTE = RegEnable(vSegmentUnit.io.exceptionInfo.bits.isForVSnonLeafPTE, vSegmentUnit.io.exceptionInfo.valid) 1842c590fb32Scz4e 1843c590fb32Scz4e val exceptionVaddr = Mux( 1844c590fb32Scz4e atomicsException, 1845c590fb32Scz4e atomicsExceptionAddress, 1846c590fb32Scz4e Mux(misalignBufExceptionOverwrite, 1847c590fb32Scz4e misalignBufExceptionVaddr, 1848c590fb32Scz4e Mux(vSegmentException, 1849c590fb32Scz4e vSegmentExceptionAddress, 1850c590fb32Scz4e lsq.io.exceptionAddr.vaddr 1851c590fb32Scz4e ) 1852c590fb32Scz4e ) 1853c590fb32Scz4e ) 1854c590fb32Scz4e // whether vaddr need ext or is hyper inst: 1855c590fb32Scz4e // VaNeedExt: atomicsException -> false; misalignBufExceptionOverwrite -> true; vSegmentException -> false 1856c590fb32Scz4e // IsHyper: atomicsException -> false; vSegmentException -> false 1857c590fb32Scz4e val exceptionVaNeedExt = !atomicsException && 1858c590fb32Scz4e (misalignBufExceptionOverwrite || 1859c590fb32Scz4e (!vSegmentException && lsq.io.exceptionAddr.vaNeedExt)) 1860c590fb32Scz4e val exceptionIsHyper = !atomicsException && 1861c590fb32Scz4e (misalignBufExceptionOverwrite && misalignBufExceptionIsHyper || 1862c590fb32Scz4e (!vSegmentException && lsq.io.exceptionAddr.isHyper && !misalignBufExceptionOverwrite)) 1863c590fb32Scz4e 1864168f1995SXu, Zefan def GenExceptionVa( 1865168f1995SXu, Zefan mode: UInt, isVirt: Bool, vaNeedExt: Bool, 1866c590fb32Scz4e satp: TlbSatpBundle, vsatp: TlbSatpBundle, hgatp: TlbHgatpBundle, 1867168f1995SXu, Zefan vaddr: UInt 1868168f1995SXu, Zefan ) = { 1869c590fb32Scz4e require(VAddrBits >= 50) 1870c590fb32Scz4e 1871168f1995SXu, Zefan val satpNone = satp.mode === 0.U 1872168f1995SXu, Zefan val satpSv39 = satp.mode === 8.U 1873168f1995SXu, Zefan val satpSv48 = satp.mode === 9.U 1874c590fb32Scz4e 1875168f1995SXu, Zefan val vsatpNone = vsatp.mode === 0.U 1876168f1995SXu, Zefan val vsatpSv39 = vsatp.mode === 8.U 1877168f1995SXu, Zefan val vsatpSv48 = vsatp.mode === 9.U 1878168f1995SXu, Zefan 1879168f1995SXu, Zefan val hgatpNone = hgatp.mode === 0.U 1880168f1995SXu, Zefan val hgatpSv39x4 = hgatp.mode === 8.U 1881168f1995SXu, Zefan val hgatpSv48x4 = hgatp.mode === 9.U 1882168f1995SXu, Zefan 1883168f1995SXu, Zefan // For !isVirt, mode check is necessary, as we don't want virtual memory in M-mode. 1884168f1995SXu, Zefan // For isVirt, mode check is unnecessary, as virt won't be 1 in M-mode. 1885168f1995SXu, Zefan // Also, isVirt includes Hyper Insts, which don't care mode either. 1886168f1995SXu, Zefan 1887168f1995SXu, Zefan val useBareAddr = 1888168f1995SXu, Zefan (isVirt && vsatpNone && hgatpNone) || 1889168f1995SXu, Zefan (!isVirt && (mode === CSRConst.ModeM)) || 1890168f1995SXu, Zefan (!isVirt && (mode =/= CSRConst.ModeM) && satpNone) 1891168f1995SXu, Zefan val useSv39Addr = 1892168f1995SXu, Zefan (isVirt && vsatpSv39) || 1893168f1995SXu, Zefan (!isVirt && (mode =/= CSRConst.ModeM) && satpSv39) 1894168f1995SXu, Zefan val useSv48Addr = 1895168f1995SXu, Zefan (isVirt && vsatpSv48) || 1896168f1995SXu, Zefan (!isVirt && (mode =/= CSRConst.ModeM) && satpSv48) 1897168f1995SXu, Zefan val useSv39x4Addr = isVirt && vsatpNone && hgatpSv39x4 1898168f1995SXu, Zefan val useSv48x4Addr = isVirt && vsatpNone && hgatpSv48x4 1899c590fb32Scz4e 1900c590fb32Scz4e val bareAddr = ZeroExt(vaddr(PAddrBits - 1, 0), XLEN) 1901c590fb32Scz4e val sv39Addr = SignExt(vaddr.take(39), XLEN) 1902c590fb32Scz4e val sv39x4Addr = ZeroExt(vaddr.take(39 + 2), XLEN) 1903c590fb32Scz4e val sv48Addr = SignExt(vaddr.take(48), XLEN) 1904c590fb32Scz4e val sv48x4Addr = ZeroExt(vaddr.take(48 + 2), XLEN) 1905c590fb32Scz4e 1906c590fb32Scz4e val ExceptionVa = Wire(UInt(XLEN.W)) 1907c590fb32Scz4e when (vaNeedExt) { 1908c590fb32Scz4e ExceptionVa := Mux1H(Seq( 1909168f1995SXu, Zefan (useBareAddr) -> bareAddr, 1910168f1995SXu, Zefan (useSv39Addr) -> sv39Addr, 1911168f1995SXu, Zefan (useSv48Addr) -> sv48Addr, 1912168f1995SXu, Zefan (useSv39x4Addr) -> sv39x4Addr, 1913168f1995SXu, Zefan (useSv48x4Addr) -> sv48x4Addr, 1914c590fb32Scz4e )) 1915c590fb32Scz4e } .otherwise { 1916c590fb32Scz4e ExceptionVa := vaddr 1917c590fb32Scz4e } 1918c590fb32Scz4e 1919c590fb32Scz4e ExceptionVa 1920c590fb32Scz4e } 1921c590fb32Scz4e 1922c590fb32Scz4e io.mem_to_ooo.lsqio.vaddr := RegNext( 1923c590fb32Scz4e GenExceptionVa(tlbcsr.priv.dmode, tlbcsr.priv.virt || exceptionIsHyper, exceptionVaNeedExt, 1924c590fb32Scz4e tlbcsr.satp, tlbcsr.vsatp, tlbcsr.hgatp, exceptionVaddr) 1925c590fb32Scz4e ) 1926c590fb32Scz4e 1927c590fb32Scz4e // vsegment instruction is executed atomic, which mean atomicsException and vSegmentException should not raise at the same time. 1928c590fb32Scz4e XSError(atomicsException && vSegmentException, "atomicsException and vSegmentException raise at the same time!") 1929c590fb32Scz4e io.mem_to_ooo.lsqio.vstart := RegNext(Mux(vSegmentException, 1930c590fb32Scz4e vSegmentExceptionVstart, 1931c590fb32Scz4e lsq.io.exceptionAddr.vstart) 1932c590fb32Scz4e ) 1933c590fb32Scz4e io.mem_to_ooo.lsqio.vl := RegNext(Mux(vSegmentException, 1934c590fb32Scz4e vSegmentExceptionVl, 1935c590fb32Scz4e lsq.io.exceptionAddr.vl) 1936c590fb32Scz4e ) 1937c590fb32Scz4e 1938c590fb32Scz4e XSError(atomicsException && atomicsUnit.io.in.valid, "new instruction before exception triggers\n") 1939c590fb32Scz4e io.mem_to_ooo.lsqio.gpaddr := RegNext(Mux( 1940c590fb32Scz4e atomicsException, 1941c590fb32Scz4e atomicsExceptionGPAddress, 1942c590fb32Scz4e Mux(misalignBufExceptionOverwrite, 1943c590fb32Scz4e misalignBufExceptionGpaddr, 1944c590fb32Scz4e Mux(vSegmentException, 1945c590fb32Scz4e vSegmentExceptionGPAddress, 1946c590fb32Scz4e lsq.io.exceptionAddr.gpaddr 1947c590fb32Scz4e ) 1948c590fb32Scz4e ) 1949c590fb32Scz4e )) 1950c590fb32Scz4e io.mem_to_ooo.lsqio.isForVSnonLeafPTE := RegNext(Mux( 1951c590fb32Scz4e atomicsException, 1952c590fb32Scz4e atomicsExceptionIsForVSnonLeafPTE, 1953c590fb32Scz4e Mux(misalignBufExceptionOverwrite, 1954c590fb32Scz4e misalignBufExceptionIsForVSnonLeafPTE, 1955c590fb32Scz4e Mux(vSegmentException, 1956c590fb32Scz4e vSegmentExceptionIsForVSnonLeafPTE, 1957c590fb32Scz4e lsq.io.exceptionAddr.isForVSnonLeafPTE 1958c590fb32Scz4e ) 1959c590fb32Scz4e ) 1960c590fb32Scz4e )) 1961c590fb32Scz4e io.mem_to_ooo.topToBackendBypass match { case x => 1962c590fb32Scz4e x.hartId := io.hartId 1963c590fb32Scz4e x.l2FlushDone := RegNext(io.l2_flush_done) 1964c590fb32Scz4e x.externalInterrupt.msip := outer.clint_int_sink.in.head._1(0) 1965c590fb32Scz4e x.externalInterrupt.mtip := outer.clint_int_sink.in.head._1(1) 1966c590fb32Scz4e x.externalInterrupt.meip := outer.plic_int_sink.in.head._1(0) 1967c590fb32Scz4e x.externalInterrupt.seip := outer.plic_int_sink.in.last._1(0) 1968c590fb32Scz4e x.externalInterrupt.debug := outer.debug_int_sink.in.head._1(0) 196976cb49abScz4e x.externalInterrupt.nmi.nmi_31 := outer.nmi_int_sink.in.head._1(0) | outer.beu_local_int_sink.in.head._1(0) 1970c590fb32Scz4e x.externalInterrupt.nmi.nmi_43 := outer.nmi_int_sink.in.head._1(1) 1971c590fb32Scz4e x.msiInfo := DelayNWithValid(io.fromTopToBackend.msiInfo, 1) 1972c590fb32Scz4e x.clintTime := DelayNWithValid(io.fromTopToBackend.clintTime, 1) 1973c590fb32Scz4e } 1974c590fb32Scz4e 1975c590fb32Scz4e io.memInfo.sqFull := RegNext(lsq.io.sqFull) 1976c590fb32Scz4e io.memInfo.lqFull := RegNext(lsq.io.lqFull) 1977c590fb32Scz4e io.memInfo.dcacheMSHRFull := RegNext(dcache.io.mshrFull) 1978c590fb32Scz4e 1979c590fb32Scz4e io.inner_hartId := io.hartId 1980c590fb32Scz4e io.inner_reset_vector := RegNext(io.outer_reset_vector) 1981c590fb32Scz4e io.outer_cpu_halt := io.ooo_to_mem.backendToTopBypass.cpuHalted 1982c590fb32Scz4e io.outer_l2_flush_en := io.ooo_to_mem.csrCtrl.flush_l2_enable 1983c590fb32Scz4e io.outer_power_down_en := io.ooo_to_mem.csrCtrl.power_down_enable 1984c590fb32Scz4e io.outer_cpu_critical_error := io.ooo_to_mem.backendToTopBypass.cpuCriticalError 19858cfc24b2STang Haojin io.outer_msi_ack := io.ooo_to_mem.backendToTopBypass.msiAck 1986c590fb32Scz4e io.outer_beu_errors_icache := RegNext(io.inner_beu_errors_icache) 1987c590fb32Scz4e io.inner_hc_perfEvents <> RegNext(io.outer_hc_perfEvents) 1988c590fb32Scz4e 1989c590fb32Scz4e // vector segmentUnit 1990c590fb32Scz4e vSegmentUnit.io.in.bits <> io.ooo_to_mem.issueVldu.head.bits 1991c590fb32Scz4e vSegmentUnit.io.in.valid := isSegment && io.ooo_to_mem.issueVldu.head.valid// is segment instruction 1992c590fb32Scz4e vSegmentUnit.io.dtlb.resp.bits <> dtlb_reqs.take(LduCnt).head.resp.bits 1993c590fb32Scz4e vSegmentUnit.io.dtlb.resp.valid <> dtlb_reqs.take(LduCnt).head.resp.valid 1994c590fb32Scz4e vSegmentUnit.io.pmpResp <> pmp_check.head.resp 1995c590fb32Scz4e vSegmentUnit.io.flush_sbuffer.empty := stIsEmpty 1996c590fb32Scz4e vSegmentUnit.io.redirect <> redirect 1997c590fb32Scz4e vSegmentUnit.io.rdcache.resp.bits := dcache.io.lsu.load(0).resp.bits 1998c590fb32Scz4e vSegmentUnit.io.rdcache.resp.valid := dcache.io.lsu.load(0).resp.valid 1999c590fb32Scz4e vSegmentUnit.io.rdcache.s2_bank_conflict := dcache.io.lsu.load(0).s2_bank_conflict 2000c590fb32Scz4e // ------------------------- 2001c590fb32Scz4e // Vector Segment Triggers 2002c590fb32Scz4e // ------------------------- 2003c590fb32Scz4e vSegmentUnit.io.fromCsrTrigger.tdataVec := tdata 2004c590fb32Scz4e vSegmentUnit.io.fromCsrTrigger.tEnableVec := tEnable 2005c590fb32Scz4e vSegmentUnit.io.fromCsrTrigger.triggerCanRaiseBpExp := triggerCanRaiseBpExp 2006c590fb32Scz4e vSegmentUnit.io.fromCsrTrigger.debugMode := debugMode 2007c590fb32Scz4e 2008c590fb32Scz4e // reset tree of MemBlock 2009c590fb32Scz4e if (p(DebugOptionsKey).ResetGen) { 2010c590fb32Scz4e val leftResetTree = ResetGenNode( 2011c590fb32Scz4e Seq( 2012c590fb32Scz4e ModuleNode(ptw), 2013c590fb32Scz4e ModuleNode(ptw_to_l2_buffer), 2014c590fb32Scz4e ModuleNode(lsq), 2015c590fb32Scz4e ModuleNode(dtlb_st_tlb_st), 2016c590fb32Scz4e ModuleNode(dtlb_prefetch_tlb_prefetch), 2017c590fb32Scz4e ModuleNode(pmp) 2018c590fb32Scz4e ) 2019c590fb32Scz4e ++ pmp_checkers.map(ModuleNode(_)) 2020c590fb32Scz4e ++ (if (prefetcherOpt.isDefined) Seq(ModuleNode(prefetcherOpt.get)) else Nil) 2021c590fb32Scz4e ++ (if (l1PrefetcherOpt.isDefined) Seq(ModuleNode(l1PrefetcherOpt.get)) else Nil) 2022c590fb32Scz4e ) 2023c590fb32Scz4e val rightResetTree = ResetGenNode( 2024c590fb32Scz4e Seq( 2025c590fb32Scz4e ModuleNode(sbuffer), 2026c590fb32Scz4e ModuleNode(dtlb_ld_tlb_ld), 2027c590fb32Scz4e ModuleNode(dcache), 2028c590fb32Scz4e ModuleNode(l1d_to_l2_buffer), 2029c590fb32Scz4e CellNode(io.reset_backend) 2030c590fb32Scz4e ) 2031c590fb32Scz4e ) 2032602aa9f1Scz4e ResetGen(leftResetTree, reset, sim = false, io.sramTestBypass.fromL2Top.mbistReset) 2033602aa9f1Scz4e ResetGen(rightResetTree, reset, sim = false, io.sramTestBypass.fromL2Top.mbistReset) 2034c590fb32Scz4e } else { 2035c590fb32Scz4e io.reset_backend := DontCare 2036c590fb32Scz4e } 2037c590fb32Scz4e io.resetInFrontendBypass.toL2Top := io.resetInFrontendBypass.fromFrontend 2038c590fb32Scz4e // trace interface 2039c590fb32Scz4e val traceToL2Top = io.traceCoreInterfaceBypass.toL2Top 2040c590fb32Scz4e val traceFromBackend = io.traceCoreInterfaceBypass.fromBackend 2041c590fb32Scz4e traceFromBackend.fromEncoder := RegNext(traceToL2Top.fromEncoder) 2042c590fb32Scz4e traceToL2Top.toEncoder.trap := RegEnable( 2043c590fb32Scz4e traceFromBackend.toEncoder.trap, 2044c590fb32Scz4e traceFromBackend.toEncoder.groups(0).valid && Itype.isTrap(traceFromBackend.toEncoder.groups(0).bits.itype) 2045c590fb32Scz4e ) 2046c590fb32Scz4e traceToL2Top.toEncoder.priv := RegEnable( 2047c590fb32Scz4e traceFromBackend.toEncoder.priv, 2048c590fb32Scz4e traceFromBackend.toEncoder.groups(0).valid 2049c590fb32Scz4e ) 2050c590fb32Scz4e (0 until TraceGroupNum).foreach { i => 2051c590fb32Scz4e traceToL2Top.toEncoder.groups(i).valid := RegNext(traceFromBackend.toEncoder.groups(i).valid) 2052c590fb32Scz4e traceToL2Top.toEncoder.groups(i).bits.iretire := RegNext(traceFromBackend.toEncoder.groups(i).bits.iretire) 2053c590fb32Scz4e traceToL2Top.toEncoder.groups(i).bits.itype := RegNext(traceFromBackend.toEncoder.groups(i).bits.itype) 2054c590fb32Scz4e traceToL2Top.toEncoder.groups(i).bits.ilastsize := RegEnable( 2055c590fb32Scz4e traceFromBackend.toEncoder.groups(i).bits.ilastsize, 2056c590fb32Scz4e traceFromBackend.toEncoder.groups(i).valid 2057c590fb32Scz4e ) 2058c590fb32Scz4e traceToL2Top.toEncoder.groups(i).bits.iaddr := RegEnable( 2059c590fb32Scz4e traceFromBackend.toEncoder.groups(i).bits.iaddr, 2060c590fb32Scz4e traceFromBackend.toEncoder.groups(i).valid 2061c590fb32Scz4e ) + (RegEnable( 2062c590fb32Scz4e traceFromBackend.toEncoder.groups(i).bits.ftqOffset.getOrElse(0.U), 2063c590fb32Scz4e traceFromBackend.toEncoder.groups(i).valid 2064c590fb32Scz4e ) << instOffsetBits) 2065c590fb32Scz4e } 2066c590fb32Scz4e 2067c590fb32Scz4e 2068c590fb32Scz4e io.mem_to_ooo.storeDebugInfo := DontCare 2069c590fb32Scz4e // store event difftest information 2070c590fb32Scz4e if (env.EnableDifftest) { 2071c590fb32Scz4e (0 until EnsbufferWidth).foreach{i => 2072c590fb32Scz4e io.mem_to_ooo.storeDebugInfo(i).robidx := sbuffer.io.vecDifftestInfo(i).bits.robIdx 2073c590fb32Scz4e sbuffer.io.vecDifftestInfo(i).bits.pc := io.mem_to_ooo.storeDebugInfo(i).pc 2074c590fb32Scz4e } 2075c590fb32Scz4e } 2076c590fb32Scz4e 2077c590fb32Scz4e // top-down info 2078c590fb32Scz4e dcache.io.debugTopDown.robHeadVaddr := io.debugTopDown.robHeadVaddr 2079c590fb32Scz4e dtlbRepeater.io.debugTopDown.robHeadVaddr := io.debugTopDown.robHeadVaddr 2080c590fb32Scz4e lsq.io.debugTopDown.robHeadVaddr := io.debugTopDown.robHeadVaddr 2081c590fb32Scz4e io.debugTopDown.toCore.robHeadMissInDCache := dcache.io.debugTopDown.robHeadMissInDCache 2082c590fb32Scz4e io.debugTopDown.toCore.robHeadTlbReplay := lsq.io.debugTopDown.robHeadTlbReplay 2083c590fb32Scz4e io.debugTopDown.toCore.robHeadTlbMiss := lsq.io.debugTopDown.robHeadTlbMiss 2084c590fb32Scz4e io.debugTopDown.toCore.robHeadLoadVio := lsq.io.debugTopDown.robHeadLoadVio 2085c590fb32Scz4e io.debugTopDown.toCore.robHeadLoadMSHR := lsq.io.debugTopDown.robHeadLoadMSHR 2086c590fb32Scz4e dcache.io.debugTopDown.robHeadOtherReplay := lsq.io.debugTopDown.robHeadOtherReplay 2087c590fb32Scz4e dcache.io.debugRolling := io.debugRolling 2088c590fb32Scz4e 2089c590fb32Scz4e lsq.io.noUopsIssued := io.topDownInfo.toBackend.noUopsIssued 2090c590fb32Scz4e io.topDownInfo.toBackend.lqEmpty := lsq.io.lqEmpty 2091c590fb32Scz4e io.topDownInfo.toBackend.sqEmpty := lsq.io.sqEmpty 2092c590fb32Scz4e io.topDownInfo.toBackend.l1Miss := dcache.io.l1Miss 2093c590fb32Scz4e io.topDownInfo.toBackend.l2TopMiss.l2Miss := RegNext(io.topDownInfo.fromL2Top.l2Miss) 2094c590fb32Scz4e io.topDownInfo.toBackend.l2TopMiss.l3Miss := RegNext(io.topDownInfo.fromL2Top.l3Miss) 2095c590fb32Scz4e 2096c590fb32Scz4e val hyLdDeqCount = PopCount(io.ooo_to_mem.issueHya.map(x => x.valid && FuType.isLoad(x.bits.uop.fuType))) 2097c590fb32Scz4e val hyStDeqCount = PopCount(io.ooo_to_mem.issueHya.map(x => x.valid && FuType.isStore(x.bits.uop.fuType))) 2098c590fb32Scz4e val ldDeqCount = PopCount(io.ooo_to_mem.issueLda.map(_.valid)) +& hyLdDeqCount 2099c590fb32Scz4e val stDeqCount = PopCount(io.ooo_to_mem.issueSta.take(StaCnt).map(_.valid)) +& hyStDeqCount 2100c590fb32Scz4e val iqDeqCount = ldDeqCount +& stDeqCount 2101c590fb32Scz4e XSPerfAccumulate("load_iq_deq_count", ldDeqCount) 2102c590fb32Scz4e XSPerfHistogram("load_iq_deq_count", ldDeqCount, true.B, 0, LdExuCnt + 1) 2103c590fb32Scz4e XSPerfAccumulate("store_iq_deq_count", stDeqCount) 2104c590fb32Scz4e XSPerfHistogram("store_iq_deq_count", stDeqCount, true.B, 0, StAddrCnt + 1) 2105c590fb32Scz4e XSPerfAccumulate("ls_iq_deq_count", iqDeqCount) 2106c590fb32Scz4e 2107c590fb32Scz4e val pfevent = Module(new PFEvent) 2108c590fb32Scz4e pfevent.io.distribute_csr := csrCtrl.distribute_csr 2109c590fb32Scz4e val csrevents = pfevent.io.hpmevent.slice(16,24) 2110c590fb32Scz4e 2111c590fb32Scz4e val perfFromUnits = (loadUnits ++ Seq(sbuffer, lsq, dcache)).flatMap(_.getPerfEvents) 2112c590fb32Scz4e val perfFromPTW = perfEventsPTW.map(x => ("PTW_" + x._1, x._2)) 2113c590fb32Scz4e val perfBlock = Seq(("ldDeqCount", ldDeqCount), 2114c590fb32Scz4e ("stDeqCount", stDeqCount)) 2115c590fb32Scz4e // let index = 0 be no event 2116c590fb32Scz4e val allPerfEvents = Seq(("noEvent", 0.U)) ++ perfFromUnits ++ perfFromPTW ++ perfBlock 2117c590fb32Scz4e 2118c590fb32Scz4e if (printEventCoding) { 2119c590fb32Scz4e for (((name, inc), i) <- allPerfEvents.zipWithIndex) { 2120c590fb32Scz4e println("MemBlock perfEvents Set", name, inc, i) 2121c590fb32Scz4e } 2122c590fb32Scz4e } 2123c590fb32Scz4e 2124c590fb32Scz4e val allPerfInc = allPerfEvents.map(_._2.asTypeOf(new PerfEvent)) 2125c590fb32Scz4e val perfEvents = HPerfMonitor(csrevents, allPerfInc).getPerfEvents 2126c590fb32Scz4e generatePerfEvent() 21274b2c87baS梁森 Liang Sen 21284b2c87baS梁森 Liang Sen private val mbistPl = MbistPipeline.PlaceMbistPipeline(Int.MaxValue, "MbistPipeMemBlk", hasMbist) 21294b2c87baS梁森 Liang Sen private val mbistIntf = if(hasMbist) { 21304b2c87baS梁森 Liang Sen val params = mbistPl.get.nodeParams 21314b2c87baS梁森 Liang Sen val intf = Some(Module(new MbistInterface( 21324b2c87baS梁森 Liang Sen params = Seq(params), 21334b2c87baS梁森 Liang Sen ids = Seq(mbistPl.get.childrenIds), 21344b2c87baS梁森 Liang Sen name = s"MbistIntfMemBlk", 21354b2c87baS梁森 Liang Sen pipelineNum = 1 21364b2c87baS梁森 Liang Sen ))) 21374b2c87baS梁森 Liang Sen intf.get.toPipeline.head <> mbistPl.get.mbist 21384b2c87baS梁森 Liang Sen mbistPl.get.registerCSV(intf.get.info, "MbistMemBlk") 21394b2c87baS梁森 Liang Sen intf.get.mbist := DontCare 21404b2c87baS梁森 Liang Sen dontTouch(intf.get.mbist) 21414b2c87baS梁森 Liang Sen //TODO: add mbist controller connections here 21424b2c87baS梁森 Liang Sen intf 21434b2c87baS梁森 Liang Sen } else { 21444b2c87baS梁森 Liang Sen None 21454b2c87baS梁森 Liang Sen } 2146602aa9f1Scz4e private val sigFromSrams = if (hasSramTest) Some(SramHelper.genBroadCastBundleTop()) else None 21474b2c87baS梁森 Liang Sen private val cg = ClockGate.genTeSrc 21484b2c87baS梁森 Liang Sen dontTouch(cg) 2149602aa9f1Scz4e 2150602aa9f1Scz4e sigFromSrams.foreach({ case sig => sig.mbist := DontCare }) 21514b2c87baS梁森 Liang Sen if (hasMbist) { 2152602aa9f1Scz4e sigFromSrams.get.mbist := io.sramTestBypass.fromL2Top.mbist.get 2153602aa9f1Scz4e io.sramTestBypass.toFrontend.mbist.get := io.sramTestBypass.fromL2Top.mbist.get 2154602aa9f1Scz4e io.sramTestBypass.toFrontend.mbistReset.get := io.sramTestBypass.fromL2Top.mbistReset.get 2155602aa9f1Scz4e io.sramTestBypass.toBackend.mbist.get := io.sramTestBypass.fromL2Top.mbist.get 2156602aa9f1Scz4e io.sramTestBypass.toBackend.mbistReset.get := io.sramTestBypass.fromL2Top.mbistReset.get 2157602aa9f1Scz4e cg.cgen := io.sramTestBypass.fromL2Top.mbist.get.cgen 21584b2c87baS梁森 Liang Sen } else { 21594b2c87baS梁森 Liang Sen cg.cgen := false.B 21604b2c87baS梁森 Liang Sen } 2161602aa9f1Scz4e 2162602aa9f1Scz4e // sram debug 2163602aa9f1Scz4e val sramCtl = Option.when(hasSramCtl)(RegNext(io.sramTestBypass.fromL2Top.sramCtl.get)) 2164602aa9f1Scz4e sigFromSrams.foreach({ case sig => sig.sramCtl := DontCare }) 2165602aa9f1Scz4e sigFromSrams.zip(sramCtl).foreach { 2166602aa9f1Scz4e case (sig, ctl) => 2167602aa9f1Scz4e sig.sramCtl.RTSEL := ctl(1, 0) // CFG[1 : 0] 2168602aa9f1Scz4e sig.sramCtl.WTSEL := ctl(3, 2) // CFG[3 : 2] 2169602aa9f1Scz4e sig.sramCtl.MCR := ctl(5, 4) // CFG[5 : 4] 2170602aa9f1Scz4e sig.sramCtl.MCW := ctl(7, 6) // CFG[7 : 6] 2171602aa9f1Scz4e } 2172602aa9f1Scz4e if (hasSramCtl) { 2173602aa9f1Scz4e io.sramTestBypass.toFrontend.sramCtl.get := sramCtl.get 2174602aa9f1Scz4e } 2175c590fb32Scz4e} 2176c590fb32Scz4e 2177c590fb32Scz4eclass MemBlock()(implicit p: Parameters) extends LazyModule 2178c590fb32Scz4e with HasXSParameter { 2179c590fb32Scz4e override def shouldBeInlined: Boolean = false 2180c590fb32Scz4e 2181c590fb32Scz4e val inner = LazyModule(new MemBlockInlined()) 2182c590fb32Scz4e 2183c590fb32Scz4e lazy val module = new MemBlockImp(this) 2184c590fb32Scz4e} 2185c590fb32Scz4e 2186c590fb32Scz4eclass MemBlockImp(wrapper: MemBlock) extends LazyModuleImp(wrapper) { 2187c590fb32Scz4e val io = IO(wrapper.inner.module.io.cloneType) 2188c590fb32Scz4e val io_perf = IO(wrapper.inner.module.io_perf.cloneType) 2189c590fb32Scz4e io <> wrapper.inner.module.io 2190c590fb32Scz4e io_perf <> wrapper.inner.module.io_perf 2191c590fb32Scz4e 2192c590fb32Scz4e if (p(DebugOptionsKey).ResetGen) { 2193602aa9f1Scz4e ResetGen( 2194602aa9f1Scz4e ResetGenNode(Seq(ModuleNode(wrapper.inner.module))), 2195602aa9f1Scz4e reset, sim = false, io.sramTestBypass.fromL2Top.mbistReset 2196602aa9f1Scz4e ) 2197c590fb32Scz4e } 2198c590fb32Scz4e} 2199