1c590fb32Scz4e/*************************************************************************************** 2c590fb32Scz4e* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences 3c590fb32Scz4e* Copyright (c) 2020-2021 Peng Cheng Laboratory 4c590fb32Scz4e* 5c590fb32Scz4e* XiangShan is licensed under Mulan PSL v2. 6c590fb32Scz4e* You can use this software according to the terms and conditions of the Mulan PSL v2. 7c590fb32Scz4e* You may obtain a copy of Mulan PSL v2 at: 8c590fb32Scz4e* http://license.coscl.org.cn/MulanPSL2 9c590fb32Scz4e* 10c590fb32Scz4e* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 11c590fb32Scz4e* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 12c590fb32Scz4e* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 13c590fb32Scz4e* 14c590fb32Scz4e* See the Mulan PSL v2 for more details. 15c590fb32Scz4e***************************************************************************************/ 16c590fb32Scz4e 17c590fb32Scz4epackage xiangshan.mem 18c590fb32Scz4e 19c590fb32Scz4eimport org.chipsalliance.cde.config.Parameters 20c590fb32Scz4eimport chisel3._ 21c590fb32Scz4eimport chisel3.util._ 22c590fb32Scz4eimport freechips.rocketchip.diplomacy._ 23c590fb32Scz4eimport freechips.rocketchip.diplomacy.{BundleBridgeSource, LazyModule, LazyModuleImp} 24c590fb32Scz4eimport freechips.rocketchip.interrupts.{IntSinkNode, IntSinkPortSimple} 25c590fb32Scz4eimport freechips.rocketchip.tile.HasFPUParameters 26c590fb32Scz4eimport freechips.rocketchip.tilelink._ 27529b1cfdSTang Haojinimport device.MsiInfoBundle 28c590fb32Scz4eimport utils._ 29c590fb32Scz4eimport utility._ 30*602aa9f1Scz4eimport utility.mbist.{MbistInterface, MbistPipeline} 31*602aa9f1Scz4eimport utility.sram.{SramMbistBundle, SramBroadcastBundle, SramHelper} 32c590fb32Scz4eimport system.SoCParamsKey 33c590fb32Scz4eimport xiangshan._ 34c590fb32Scz4eimport xiangshan.ExceptionNO._ 35c590fb32Scz4eimport xiangshan.frontend.HasInstrMMIOConst 36c590fb32Scz4eimport xiangshan.backend.Bundles.{DynInst, MemExuInput, MemExuOutput} 37c590fb32Scz4eimport xiangshan.backend.ctrlblock.{DebugLSIO, LsTopdownInfo} 38c590fb32Scz4eimport xiangshan.backend.exu.MemExeUnit 39c590fb32Scz4eimport xiangshan.backend.fu._ 40c590fb32Scz4eimport xiangshan.backend.fu.FuType._ 41a67fd0f5SGuanghui Chengimport xiangshan.backend.fu.NewCSR.{CsrTriggerBundle, TriggerUtil, PFEvent} 42075d4937Sjunxiong-jiimport xiangshan.backend.fu.util.{CSRConst, SdtrigExt} 43c590fb32Scz4eimport xiangshan.backend.{BackendToTopBundle, TopToBackendBundle} 44c590fb32Scz4eimport xiangshan.backend.rob.{RobDebugRollingIO, RobPtr, RobLsqIO} 45c590fb32Scz4eimport xiangshan.backend.datapath.NewPipelineConnect 46c590fb32Scz4eimport xiangshan.backend.trace.{Itype, TraceCoreInterface} 47c590fb32Scz4eimport xiangshan.backend.Bundles._ 48c590fb32Scz4eimport xiangshan.mem._ 49c590fb32Scz4eimport xiangshan.mem.mdp._ 509e12e8edScz4eimport xiangshan.mem.Bundles._ 51c590fb32Scz4eimport xiangshan.mem.prefetch.{BasePrefecher, L1Prefetcher, SMSParams, SMSPrefetcher} 52c590fb32Scz4eimport xiangshan.cache._ 53c590fb32Scz4eimport xiangshan.cache.mmu._ 544b2c87baS梁森 Liang Senimport coupledL2.PrefetchRecv 55*602aa9f1Scz4eimport system.HasSoCParameter 56*602aa9f1Scz4e 57c590fb32Scz4etrait HasMemBlockParameters extends HasXSParameter { 58c590fb32Scz4e // number of memory units 59c590fb32Scz4e val LduCnt = backendParams.LduCnt 60c590fb32Scz4e val StaCnt = backendParams.StaCnt 61c590fb32Scz4e val StdCnt = backendParams.StdCnt 62c590fb32Scz4e val HyuCnt = backendParams.HyuCnt 63c590fb32Scz4e val VlduCnt = backendParams.VlduCnt 64c590fb32Scz4e val VstuCnt = backendParams.VstuCnt 65c590fb32Scz4e 66c590fb32Scz4e val LdExuCnt = LduCnt + HyuCnt 67c590fb32Scz4e val StAddrCnt = StaCnt + HyuCnt 68c590fb32Scz4e val StDataCnt = StdCnt 69c590fb32Scz4e val MemExuCnt = LduCnt + HyuCnt + StaCnt + StdCnt 70c590fb32Scz4e val MemAddrExtCnt = LdExuCnt + StaCnt 71c590fb32Scz4e val MemVExuCnt = VlduCnt + VstuCnt 72c590fb32Scz4e 73c590fb32Scz4e val AtomicWBPort = 0 74c590fb32Scz4e val MisalignWBPort = 1 75c590fb32Scz4e val UncacheWBPort = 2 76c590fb32Scz4e val NCWBPorts = Seq(1, 2) 77c590fb32Scz4e} 78c590fb32Scz4e 79c590fb32Scz4eabstract class MemBlockBundle(implicit val p: Parameters) extends Bundle with HasMemBlockParameters 80c590fb32Scz4e 81c590fb32Scz4eclass Std(cfg: FuConfig)(implicit p: Parameters) extends FuncUnit(cfg) { 82c590fb32Scz4e io.in.ready := io.out.ready 83c590fb32Scz4e io.out.valid := io.in.valid 84c590fb32Scz4e io.out.bits := 0.U.asTypeOf(io.out.bits) 85c590fb32Scz4e io.out.bits.res.data := io.in.bits.data.src(0) 86c590fb32Scz4e io.out.bits.ctrl.robIdx := io.in.bits.ctrl.robIdx 87c590fb32Scz4e} 88c590fb32Scz4e 89c590fb32Scz4eclass ooo_to_mem(implicit p: Parameters) extends MemBlockBundle { 90c590fb32Scz4e val backendToTopBypass = Flipped(new BackendToTopBundle) 91c590fb32Scz4e 92c590fb32Scz4e val loadFastMatch = Vec(LdExuCnt, Input(UInt(LdExuCnt.W))) 93c590fb32Scz4e val loadFastFuOpType = Vec(LdExuCnt, Input(FuOpType())) 94c590fb32Scz4e val loadFastImm = Vec(LdExuCnt, Input(UInt(12.W))) 95c590fb32Scz4e val sfence = Input(new SfenceBundle) 96c590fb32Scz4e val tlbCsr = Input(new TlbCsrBundle) 97c590fb32Scz4e val lsqio = new Bundle { 98c590fb32Scz4e val lcommit = Input(UInt(log2Up(CommitWidth + 1).W)) 99c590fb32Scz4e val scommit = Input(UInt(log2Up(CommitWidth + 1).W)) 100c590fb32Scz4e val pendingMMIOld = Input(Bool()) 101c590fb32Scz4e val pendingld = Input(Bool()) 102c590fb32Scz4e val pendingst = Input(Bool()) 103c590fb32Scz4e val pendingVst = Input(Bool()) 104c590fb32Scz4e val commit = Input(Bool()) 105c590fb32Scz4e val pendingPtr = Input(new RobPtr) 106c590fb32Scz4e val pendingPtrNext = Input(new RobPtr) 107c590fb32Scz4e } 108c590fb32Scz4e 109c590fb32Scz4e val isStoreException = Input(Bool()) 110c590fb32Scz4e val isVlsException = Input(Bool()) 111c590fb32Scz4e val csrCtrl = Flipped(new CustomCSRCtrlIO) 112c590fb32Scz4e val enqLsq = new LsqEnqIO 113c590fb32Scz4e val flushSb = Input(Bool()) 114c590fb32Scz4e 115c590fb32Scz4e val storePc = Vec(StaCnt, Input(UInt(VAddrBits.W))) // for hw prefetch 116c590fb32Scz4e val hybridPc = Vec(HyuCnt, Input(UInt(VAddrBits.W))) // for hw prefetch 117c590fb32Scz4e 118c590fb32Scz4e val issueLda = MixedVec(Seq.fill(LduCnt)(Flipped(DecoupledIO(new MemExuInput)))) 119c590fb32Scz4e val issueSta = MixedVec(Seq.fill(StaCnt)(Flipped(DecoupledIO(new MemExuInput)))) 120c590fb32Scz4e val issueStd = MixedVec(Seq.fill(StdCnt)(Flipped(DecoupledIO(new MemExuInput)))) 121c590fb32Scz4e val issueHya = MixedVec(Seq.fill(HyuCnt)(Flipped(DecoupledIO(new MemExuInput)))) 122c590fb32Scz4e val issueVldu = MixedVec(Seq.fill(VlduCnt)(Flipped(DecoupledIO(new MemExuInput(isVector=true))))) 123c590fb32Scz4e 124c590fb32Scz4e def issueUops = issueLda ++ issueSta ++ issueStd ++ issueHya ++ issueVldu 125c590fb32Scz4e} 126c590fb32Scz4e 127c590fb32Scz4eclass mem_to_ooo(implicit p: Parameters) extends MemBlockBundle { 128c590fb32Scz4e val topToBackendBypass = new TopToBackendBundle 129c590fb32Scz4e 130c590fb32Scz4e val otherFastWakeup = Vec(LdExuCnt, ValidIO(new DynInst)) 131c590fb32Scz4e val lqCancelCnt = Output(UInt(log2Up(VirtualLoadQueueSize + 1).W)) 132c590fb32Scz4e val sqCancelCnt = Output(UInt(log2Up(StoreQueueSize + 1).W)) 133c590fb32Scz4e val sqDeq = Output(UInt(log2Ceil(EnsbufferWidth + 1).W)) 134c590fb32Scz4e val lqDeq = Output(UInt(log2Up(CommitWidth + 1).W)) 135c590fb32Scz4e // used by VLSU issue queue, the vector store would wait all store before it, and the vector load would wait all load 136c590fb32Scz4e val sqDeqPtr = Output(new SqPtr) 137c590fb32Scz4e val lqDeqPtr = Output(new LqPtr) 138c590fb32Scz4e val stIn = Vec(StAddrCnt, ValidIO(new MemExuInput)) 139c590fb32Scz4e val stIssuePtr = Output(new SqPtr()) 140c590fb32Scz4e 141c590fb32Scz4e val memoryViolation = ValidIO(new Redirect) 142c590fb32Scz4e val sbIsEmpty = Output(Bool()) 143c590fb32Scz4e 144c590fb32Scz4e val lsTopdownInfo = Vec(LdExuCnt, Output(new LsTopdownInfo)) 145c590fb32Scz4e 146c590fb32Scz4e val lsqio = new Bundle { 147c590fb32Scz4e val vaddr = Output(UInt(XLEN.W)) 148c590fb32Scz4e val vstart = Output(UInt((log2Up(VLEN) + 1).W)) 149c590fb32Scz4e val vl = Output(UInt((log2Up(VLEN) + 1).W)) 150c590fb32Scz4e val gpaddr = Output(UInt(XLEN.W)) 151c590fb32Scz4e val isForVSnonLeafPTE = Output(Bool()) 152c590fb32Scz4e val mmio = Output(Vec(LoadPipelineWidth, Bool())) 153c590fb32Scz4e val uop = Output(Vec(LoadPipelineWidth, new DynInst)) 154c590fb32Scz4e val lqCanAccept = Output(Bool()) 155c590fb32Scz4e val sqCanAccept = Output(Bool()) 156c590fb32Scz4e } 157c590fb32Scz4e 158c590fb32Scz4e val storeDebugInfo = Vec(EnsbufferWidth, new Bundle { 159c590fb32Scz4e val robidx = Output(new RobPtr) 160c590fb32Scz4e val pc = Input(UInt(VAddrBits.W)) 161c590fb32Scz4e }) 162c590fb32Scz4e 163c590fb32Scz4e val writebackLda = Vec(LduCnt, DecoupledIO(new MemExuOutput)) 164c590fb32Scz4e val writebackSta = Vec(StaCnt, DecoupledIO(new MemExuOutput)) 165c590fb32Scz4e val writebackStd = Vec(StdCnt, DecoupledIO(new MemExuOutput)) 166c590fb32Scz4e val writebackHyuLda = Vec(HyuCnt, DecoupledIO(new MemExuOutput)) 167c590fb32Scz4e val writebackHyuSta = Vec(HyuCnt, DecoupledIO(new MemExuOutput)) 168c590fb32Scz4e val writebackVldu = Vec(VlduCnt, DecoupledIO(new MemExuOutput(isVector = true))) 169c590fb32Scz4e def writeBack: Seq[DecoupledIO[MemExuOutput]] = { 170c590fb32Scz4e writebackSta ++ 171c590fb32Scz4e writebackHyuLda ++ writebackHyuSta ++ 172c590fb32Scz4e writebackLda ++ 173c590fb32Scz4e writebackVldu ++ 174c590fb32Scz4e writebackStd 175c590fb32Scz4e } 176c590fb32Scz4e 177c590fb32Scz4e val ldaIqFeedback = Vec(LduCnt, new MemRSFeedbackIO) 178c590fb32Scz4e val staIqFeedback = Vec(StaCnt, new MemRSFeedbackIO) 179c590fb32Scz4e val hyuIqFeedback = Vec(HyuCnt, new MemRSFeedbackIO) 180c590fb32Scz4e val vstuIqFeedback= Vec(VstuCnt, new MemRSFeedbackIO(isVector = true)) 181c590fb32Scz4e val vlduIqFeedback= Vec(VlduCnt, new MemRSFeedbackIO(isVector = true)) 182c590fb32Scz4e val ldCancel = Vec(backendParams.LdExuCnt, new LoadCancelIO) 183c590fb32Scz4e val wakeup = Vec(backendParams.LdExuCnt, Valid(new DynInst)) 184c590fb32Scz4e 185c590fb32Scz4e val s3_delayed_load_error = Vec(LdExuCnt, Output(Bool())) 186c590fb32Scz4e} 187c590fb32Scz4e 188c590fb32Scz4eclass MemCoreTopDownIO extends Bundle { 189c590fb32Scz4e val robHeadMissInDCache = Output(Bool()) 190c590fb32Scz4e val robHeadTlbReplay = Output(Bool()) 191c590fb32Scz4e val robHeadTlbMiss = Output(Bool()) 192c590fb32Scz4e val robHeadLoadVio = Output(Bool()) 193c590fb32Scz4e val robHeadLoadMSHR = Output(Bool()) 194c590fb32Scz4e} 195c590fb32Scz4e 196c590fb32Scz4eclass fetch_to_mem(implicit p: Parameters) extends XSBundle{ 197c590fb32Scz4e val itlb = Flipped(new TlbPtwIO()) 198c590fb32Scz4e} 199c590fb32Scz4e 200c590fb32Scz4e// triple buffer applied in i-mmio path (two at MemBlock, one at L2Top) 201c590fb32Scz4eclass InstrUncacheBuffer()(implicit p: Parameters) extends LazyModule with HasInstrMMIOConst { 202c590fb32Scz4e val node = new TLBufferNode(BufferParams.default, BufferParams.default, BufferParams.default, BufferParams.default, BufferParams.default) 203c590fb32Scz4e lazy val module = new InstrUncacheBufferImpl 204c590fb32Scz4e 205c590fb32Scz4e class InstrUncacheBufferImpl extends LazyModuleImp(this) { 206c590fb32Scz4e (node.in zip node.out) foreach { case ((in, edgeIn), (out, edgeOut)) => 207c590fb32Scz4e out.a <> BufferParams.default(BufferParams.default(in.a)) 208c590fb32Scz4e in.d <> BufferParams.default(BufferParams.default(out.d)) 209c590fb32Scz4e 210c590fb32Scz4e // only a.valid, a.ready, a.address can change 211c590fb32Scz4e // hoping that the rest would be optimized to keep MemBlock port unchanged after adding buffer 212c590fb32Scz4e out.a.bits.data := 0.U 213c590fb32Scz4e out.a.bits.mask := Fill(mmioBusBytes, 1.U(1.W)) 214c590fb32Scz4e out.a.bits.opcode := 4.U // Get 215c590fb32Scz4e out.a.bits.size := log2Ceil(mmioBusBytes).U 216c590fb32Scz4e out.a.bits.source := 0.U 217c590fb32Scz4e } 218c590fb32Scz4e } 219c590fb32Scz4e} 220c590fb32Scz4e 221c590fb32Scz4e// triple buffer applied in L1I$-L2 path (two at MemBlock, one at L2Top) 222c590fb32Scz4eclass ICacheBuffer()(implicit p: Parameters) extends LazyModule { 223c590fb32Scz4e val node = new TLBufferNode(BufferParams.default, BufferParams.default, BufferParams.default, BufferParams.default, BufferParams.default) 224c590fb32Scz4e lazy val module = new ICacheBufferImpl 225c590fb32Scz4e 226c590fb32Scz4e class ICacheBufferImpl extends LazyModuleImp(this) { 227c590fb32Scz4e (node.in zip node.out) foreach { case ((in, edgeIn), (out, edgeOut)) => 228c590fb32Scz4e out.a <> BufferParams.default(BufferParams.default(in.a)) 229c590fb32Scz4e in.d <> BufferParams.default(BufferParams.default(out.d)) 230c590fb32Scz4e } 231c590fb32Scz4e } 232c590fb32Scz4e} 233c590fb32Scz4e 234c590fb32Scz4eclass ICacheCtrlBuffer()(implicit p: Parameters) extends LazyModule { 235c590fb32Scz4e val node = new TLBufferNode(BufferParams.default, BufferParams.default, BufferParams.default, BufferParams.default, BufferParams.default) 236c590fb32Scz4e lazy val module = new ICacheCtrlBufferImpl 237c590fb32Scz4e 238c590fb32Scz4e class ICacheCtrlBufferImpl extends LazyModuleImp(this) { 239c590fb32Scz4e (node.in zip node.out) foreach { case ((in, edgeIn), (out, edgeOut)) => 240c590fb32Scz4e out.a <> BufferParams.default(BufferParams.default(in.a)) 241c590fb32Scz4e in.d <> BufferParams.default(BufferParams.default(out.d)) 242c590fb32Scz4e } 243c590fb32Scz4e } 244c590fb32Scz4e} 245c590fb32Scz4e 246c590fb32Scz4e// Frontend bus goes through MemBlock 247c590fb32Scz4eclass FrontendBridge()(implicit p: Parameters) extends LazyModule { 248c590fb32Scz4e val icache_node = LazyModule(new ICacheBuffer()).suggestName("icache").node// to keep IO port name 249c590fb32Scz4e val icachectrl_node = LazyModule(new ICacheCtrlBuffer()).suggestName("icachectrl").node 250c590fb32Scz4e val instr_uncache_node = LazyModule(new InstrUncacheBuffer()).suggestName("instr_uncache").node 251c590fb32Scz4e lazy val module = new LazyModuleImp(this) { 252c590fb32Scz4e } 253c590fb32Scz4e} 254c590fb32Scz4e 255c590fb32Scz4eclass MemBlockInlined()(implicit p: Parameters) extends LazyModule 256c590fb32Scz4e with HasXSParameter { 257c590fb32Scz4e override def shouldBeInlined: Boolean = true 258c590fb32Scz4e 259c590fb32Scz4e val dcache = LazyModule(new DCacheWrapper()) 260c590fb32Scz4e val uncache = LazyModule(new Uncache()) 261c590fb32Scz4e val uncache_port = TLTempNode() 262c590fb32Scz4e val uncache_xbar = TLXbar() 263c590fb32Scz4e val ptw = LazyModule(new L2TLBWrapper()) 264c590fb32Scz4e val ptw_to_l2_buffer = if (!coreParams.softPTW) LazyModule(new TLBuffer) else null 265c590fb32Scz4e val l1d_to_l2_buffer = if (coreParams.dcacheParametersOpt.nonEmpty) LazyModule(new TLBuffer) else null 266c590fb32Scz4e val dcache_port = TLNameNode("dcache_client") // to keep dcache-L2 port name 267c590fb32Scz4e val l2_pf_sender_opt = coreParams.prefetcher.map(_ => 268c590fb32Scz4e BundleBridgeSource(() => new PrefetchRecv) 269c590fb32Scz4e ) 270c590fb32Scz4e val l3_pf_sender_opt = if (p(SoCParamsKey).L3CacheParamsOpt.nonEmpty) coreParams.prefetcher.map(_ => 271c590fb32Scz4e BundleBridgeSource(() => new huancun.PrefetchRecv) 272c590fb32Scz4e ) else None 273c590fb32Scz4e val frontendBridge = LazyModule(new FrontendBridge) 274c590fb32Scz4e // interrupt sinks 275c590fb32Scz4e val clint_int_sink = IntSinkNode(IntSinkPortSimple(1, 2)) 276c590fb32Scz4e val debug_int_sink = IntSinkNode(IntSinkPortSimple(1, 1)) 277c590fb32Scz4e val plic_int_sink = IntSinkNode(IntSinkPortSimple(2, 1)) 278c590fb32Scz4e val nmi_int_sink = IntSinkNode(IntSinkPortSimple(1, (new NonmaskableInterruptIO).elements.size)) 27976cb49abScz4e val beu_local_int_sink = IntSinkNode(IntSinkPortSimple(1, 1)) 280c590fb32Scz4e 281c590fb32Scz4e if (!coreParams.softPTW) { 282c590fb32Scz4e ptw_to_l2_buffer.node := ptw.node 283c590fb32Scz4e } 284c590fb32Scz4e uncache_xbar := TLBuffer() := uncache.clientNode 285c590fb32Scz4e if (dcache.uncacheNode.isDefined) { 286c590fb32Scz4e dcache.uncacheNode.get := TLBuffer.chainNode(2) := uncache_xbar 287c590fb32Scz4e } 288c590fb32Scz4e uncache_port := TLBuffer.chainNode(2) := uncache_xbar 289c590fb32Scz4e 290c590fb32Scz4e lazy val module = new MemBlockInlinedImp(this) 291c590fb32Scz4e} 292c590fb32Scz4e 293c590fb32Scz4eclass MemBlockInlinedImp(outer: MemBlockInlined) extends LazyModuleImp(outer) 294c590fb32Scz4e with HasXSParameter 295c590fb32Scz4e with HasFPUParameters 296c590fb32Scz4e with HasPerfEvents 297c590fb32Scz4e with HasL1PrefetchSourceParameter 298c590fb32Scz4e with HasCircularQueuePtrHelper 299c590fb32Scz4e with HasMemBlockParameters 300c590fb32Scz4e with HasTlbConst 301c590fb32Scz4e with SdtrigExt 302c590fb32Scz4e{ 303c590fb32Scz4e val io = IO(new Bundle { 304c590fb32Scz4e val hartId = Input(UInt(hartIdLen.W)) 305c590fb32Scz4e val redirect = Flipped(ValidIO(new Redirect)) 306c590fb32Scz4e 307c590fb32Scz4e val ooo_to_mem = new ooo_to_mem 308c590fb32Scz4e val mem_to_ooo = new mem_to_ooo 309c590fb32Scz4e val fetch_to_mem = new fetch_to_mem 310c590fb32Scz4e 311c590fb32Scz4e val ifetchPrefetch = Vec(LduCnt, ValidIO(new SoftIfetchPrefetchBundle)) 312c590fb32Scz4e 313c590fb32Scz4e // misc 314c590fb32Scz4e val error = ValidIO(new L1CacheErrorInfo) 315c590fb32Scz4e val memInfo = new Bundle { 316c590fb32Scz4e val sqFull = Output(Bool()) 317c590fb32Scz4e val lqFull = Output(Bool()) 318c590fb32Scz4e val dcacheMSHRFull = Output(Bool()) 319c590fb32Scz4e } 320c590fb32Scz4e val debug_ls = new DebugLSIO 321c590fb32Scz4e val l2_hint = Input(Valid(new L2ToL1Hint())) 322c590fb32Scz4e val l2PfqBusy = Input(Bool()) 323c590fb32Scz4e val l2_tlb_req = Flipped(new TlbRequestIO(nRespDups = 2)) 324c590fb32Scz4e val l2_pmp_resp = new PMPRespBundle 325c590fb32Scz4e val l2_flush_done = Input(Bool()) 326c590fb32Scz4e 327c590fb32Scz4e val debugTopDown = new Bundle { 328c590fb32Scz4e val robHeadVaddr = Flipped(Valid(UInt(VAddrBits.W))) 329c590fb32Scz4e val toCore = new MemCoreTopDownIO 330c590fb32Scz4e } 331c590fb32Scz4e val debugRolling = Flipped(new RobDebugRollingIO) 332c590fb32Scz4e 333c590fb32Scz4e // All the signals from/to frontend/backend to/from bus will go through MemBlock 334c590fb32Scz4e val fromTopToBackend = Input(new Bundle { 335529b1cfdSTang Haojin val msiInfo = ValidIO(new MsiInfoBundle) 336c590fb32Scz4e val clintTime = ValidIO(UInt(64.W)) 337c590fb32Scz4e }) 338c590fb32Scz4e val inner_hartId = Output(UInt(hartIdLen.W)) 339c590fb32Scz4e val inner_reset_vector = Output(UInt(PAddrBits.W)) 340c590fb32Scz4e val outer_reset_vector = Input(UInt(PAddrBits.W)) 341c590fb32Scz4e val outer_cpu_halt = Output(Bool()) 342c590fb32Scz4e val outer_l2_flush_en = Output(Bool()) 343c590fb32Scz4e val outer_power_down_en = Output(Bool()) 344c590fb32Scz4e val outer_cpu_critical_error = Output(Bool()) 345c590fb32Scz4e val inner_beu_errors_icache = Input(new L1BusErrorUnitInfo) 346c590fb32Scz4e val outer_beu_errors_icache = Output(new L1BusErrorUnitInfo) 347c590fb32Scz4e val inner_hc_perfEvents = Output(Vec(numPCntHc * coreParams.L2NBanks + 1, new PerfEvent)) 348c590fb32Scz4e val outer_hc_perfEvents = Input(Vec(numPCntHc * coreParams.L2NBanks + 1, new PerfEvent)) 349c590fb32Scz4e 350c590fb32Scz4e // reset signals of frontend & backend are generated in memblock 351c590fb32Scz4e val reset_backend = Output(Reset()) 352c590fb32Scz4e // Reset singal from frontend. 353c590fb32Scz4e val resetInFrontendBypass = new Bundle{ 354c590fb32Scz4e val fromFrontend = Input(Bool()) 355c590fb32Scz4e val toL2Top = Output(Bool()) 356c590fb32Scz4e } 357c590fb32Scz4e val traceCoreInterfaceBypass = new Bundle{ 358c590fb32Scz4e val fromBackend = Flipped(new TraceCoreInterface(hasOffset = true)) 359c590fb32Scz4e val toL2Top = new TraceCoreInterface 360c590fb32Scz4e } 361c590fb32Scz4e 362c590fb32Scz4e val topDownInfo = new Bundle { 363c590fb32Scz4e val fromL2Top = Input(new TopDownFromL2Top) 364c590fb32Scz4e val toBackend = Flipped(new TopDownInfo) 365c590fb32Scz4e } 366*602aa9f1Scz4e val sramTestBypass = new Bundle() { 367*602aa9f1Scz4e val fromL2Top = new Bundle() { 368*602aa9f1Scz4e val mbist = Option.when(hasMbist)(Input(new SramMbistBundle)) 369*602aa9f1Scz4e val mbistReset = Option.when(hasMbist)(Input(new DFTResetSignals())) 370*602aa9f1Scz4e val sramCtl = Option.when(hasSramCtl)(Input(UInt(64.W))) 371*602aa9f1Scz4e } 372*602aa9f1Scz4e val toFrontend = new Bundle() { 373*602aa9f1Scz4e val mbist = Option.when(hasMbist)(Output(new SramMbistBundle)) 374*602aa9f1Scz4e val mbistReset = Option.when(hasMbist)(Output(new DFTResetSignals())) 375*602aa9f1Scz4e val sramCtl = Option.when(hasSramCtl)(Output(UInt(64.W))) 376*602aa9f1Scz4e } 377*602aa9f1Scz4e val toBackend = new Bundle() { 378*602aa9f1Scz4e val mbist = Option.when(hasMbist)(Output(new SramMbistBundle)) 379*602aa9f1Scz4e val mbistReset = Option.when(hasMbist)(Output(new DFTResetSignals())) 380*602aa9f1Scz4e } 381*602aa9f1Scz4e } 382c590fb32Scz4e }) 383c590fb32Scz4e 384c590fb32Scz4e dontTouch(io.inner_hartId) 385c590fb32Scz4e dontTouch(io.inner_reset_vector) 386c590fb32Scz4e dontTouch(io.outer_reset_vector) 387c590fb32Scz4e dontTouch(io.outer_cpu_halt) 388c590fb32Scz4e dontTouch(io.outer_l2_flush_en) 389c590fb32Scz4e dontTouch(io.outer_power_down_en) 390c590fb32Scz4e dontTouch(io.outer_cpu_critical_error) 391c590fb32Scz4e dontTouch(io.inner_beu_errors_icache) 392c590fb32Scz4e dontTouch(io.outer_beu_errors_icache) 393c590fb32Scz4e dontTouch(io.inner_hc_perfEvents) 394c590fb32Scz4e dontTouch(io.outer_hc_perfEvents) 395c590fb32Scz4e 396c590fb32Scz4e val redirect = RegNextWithEnable(io.redirect) 397c590fb32Scz4e 398c590fb32Scz4e private val dcache = outer.dcache.module 399c590fb32Scz4e val uncache = outer.uncache.module 400c590fb32Scz4e 401c590fb32Scz4e //val delayedDcacheRefill = RegNext(dcache.io.lsu.lsq) 402c590fb32Scz4e 403c590fb32Scz4e val csrCtrl = DelayN(io.ooo_to_mem.csrCtrl, 2) 404c590fb32Scz4e dcache.io.l2_pf_store_only := RegNext(io.ooo_to_mem.csrCtrl.pf_ctrl.l2_pf_store_only, false.B) 405c590fb32Scz4e io.error <> DelayNWithValid(dcache.io.error, 2) 406c590fb32Scz4e when(!csrCtrl.cache_error_enable){ 407c590fb32Scz4e io.error.bits.report_to_beu := false.B 408c590fb32Scz4e io.error.valid := false.B 409c590fb32Scz4e } 410c590fb32Scz4e 411c590fb32Scz4e val loadUnits = Seq.fill(LduCnt)(Module(new LoadUnit)) 412c590fb32Scz4e val storeUnits = Seq.fill(StaCnt)(Module(new StoreUnit)) 413c590fb32Scz4e val stdExeUnits = Seq.fill(StdCnt)(Module(new MemExeUnit(backendParams.memSchdParams.get.issueBlockParams.find(_.StdCnt != 0).get.exuBlockParams.head))) 414c590fb32Scz4e val hybridUnits = Seq.fill(HyuCnt)(Module(new HybridUnit)) // Todo: replace it with HybridUnit 415c590fb32Scz4e val stData = stdExeUnits.map(_.io.out) 416c590fb32Scz4e val exeUnits = loadUnits ++ storeUnits 417c590fb32Scz4e 418c590fb32Scz4e // The number of vector load/store units is decoupled with the number of load/store units 419c590fb32Scz4e val vlSplit = Seq.fill(VlduCnt)(Module(new VLSplitImp)) 420c590fb32Scz4e val vsSplit = Seq.fill(VstuCnt)(Module(new VSSplitImp)) 421c590fb32Scz4e val vlMergeBuffer = Module(new VLMergeBufferImp) 422c590fb32Scz4e val vsMergeBuffer = Seq.fill(VstuCnt)(Module(new VSMergeBufferImp)) 423c590fb32Scz4e val vSegmentUnit = Module(new VSegmentUnit) 424c590fb32Scz4e val vfofBuffer = Module(new VfofBuffer) 425c590fb32Scz4e 426c590fb32Scz4e // misalign Buffer 427c590fb32Scz4e val loadMisalignBuffer = Module(new LoadMisalignBuffer) 428c590fb32Scz4e val storeMisalignBuffer = Module(new StoreMisalignBuffer) 429c590fb32Scz4e 430c590fb32Scz4e val l1_pf_req = Wire(Decoupled(new L1PrefetchReq())) 431c590fb32Scz4e dcache.io.sms_agt_evict_req.ready := false.B 432c590fb32Scz4e val prefetcherOpt: Option[BasePrefecher] = coreParams.prefetcher.map { 433c590fb32Scz4e case _: SMSParams => 434c590fb32Scz4e val sms = Module(new SMSPrefetcher()) 435c590fb32Scz4e sms.io_agt_en := GatedRegNextN(io.ooo_to_mem.csrCtrl.pf_ctrl.l1D_pf_enable_agt, 2, Some(false.B)) 436c590fb32Scz4e sms.io_pht_en := GatedRegNextN(io.ooo_to_mem.csrCtrl.pf_ctrl.l1D_pf_enable_pht, 2, Some(false.B)) 437c590fb32Scz4e sms.io_act_threshold := GatedRegNextN(io.ooo_to_mem.csrCtrl.pf_ctrl.l1D_pf_active_threshold, 2, Some(12.U)) 438c590fb32Scz4e sms.io_act_stride := GatedRegNextN(io.ooo_to_mem.csrCtrl.pf_ctrl.l1D_pf_active_stride, 2, Some(30.U)) 439c590fb32Scz4e sms.io_stride_en := false.B 440c590fb32Scz4e sms.io_dcache_evict <> dcache.io.sms_agt_evict_req 4414b2c87baS梁森 Liang Sen val mbistSmsPl = MbistPipeline.PlaceMbistPipeline(1, "MbistPipeSms", hasMbist) 442c590fb32Scz4e sms 443c590fb32Scz4e } 444c590fb32Scz4e prefetcherOpt.foreach{ pf => pf.io.l1_req.ready := false.B } 445c590fb32Scz4e val hartId = p(XSCoreParamsKey).HartId 446c590fb32Scz4e val l1PrefetcherOpt: Option[BasePrefecher] = coreParams.prefetcher.map { 447c590fb32Scz4e case _ => 448c590fb32Scz4e val l1Prefetcher = Module(new L1Prefetcher()) 4499db05eaeScz4e val enableL1StreamPrefetcher = Constantin.createRecord(s"enableL1StreamPrefetcher$hartId", initValue = true) 4509db05eaeScz4e l1Prefetcher.io.enable := enableL1StreamPrefetcher && 4519db05eaeScz4e GatedRegNextN(io.ooo_to_mem.csrCtrl.pf_ctrl.l1D_pf_enable, 2, Some(false.B)) 452c590fb32Scz4e l1Prefetcher.pf_ctrl <> dcache.io.pf_ctrl 453c590fb32Scz4e l1Prefetcher.l2PfqBusy := io.l2PfqBusy 454c590fb32Scz4e 455c590fb32Scz4e // stride will train on miss or prefetch hit 456c590fb32Scz4e for (i <- 0 until LduCnt) { 457c590fb32Scz4e val source = loadUnits(i).io.prefetch_train_l1 458c590fb32Scz4e l1Prefetcher.stride_train(i).valid := source.valid && source.bits.isFirstIssue && ( 459c590fb32Scz4e source.bits.miss || isFromStride(source.bits.meta_prefetch) 460c590fb32Scz4e ) 461c590fb32Scz4e l1Prefetcher.stride_train(i).bits := source.bits 462c590fb32Scz4e val loadPc = RegNext(io.ooo_to_mem.issueLda(i).bits.uop.pc) // for s1 463c590fb32Scz4e l1Prefetcher.stride_train(i).bits.uop.pc := Mux( 464c590fb32Scz4e loadUnits(i).io.s2_ptr_chasing, 465c590fb32Scz4e RegEnable(loadPc, loadUnits(i).io.s2_prefetch_spec), 466c590fb32Scz4e RegEnable(RegEnable(loadPc, loadUnits(i).io.s1_prefetch_spec), loadUnits(i).io.s2_prefetch_spec) 467c590fb32Scz4e ) 468c590fb32Scz4e } 469c590fb32Scz4e for (i <- 0 until HyuCnt) { 470c590fb32Scz4e val source = hybridUnits(i).io.prefetch_train_l1 471c590fb32Scz4e l1Prefetcher.stride_train.drop(LduCnt)(i).valid := source.valid && source.bits.isFirstIssue && ( 472c590fb32Scz4e source.bits.miss || isFromStride(source.bits.meta_prefetch) 473c590fb32Scz4e ) 474c590fb32Scz4e l1Prefetcher.stride_train.drop(LduCnt)(i).bits := source.bits 475c590fb32Scz4e l1Prefetcher.stride_train.drop(LduCnt)(i).bits.uop.pc := Mux( 476c590fb32Scz4e hybridUnits(i).io.ldu_io.s2_ptr_chasing, 477c590fb32Scz4e RegNext(io.ooo_to_mem.hybridPc(i)), 478c590fb32Scz4e RegNext(RegNext(io.ooo_to_mem.hybridPc(i))) 479c590fb32Scz4e ) 480c590fb32Scz4e } 481c590fb32Scz4e l1Prefetcher 482c590fb32Scz4e } 483c590fb32Scz4e // load prefetch to l1 Dcache 484c590fb32Scz4e l1PrefetcherOpt match { 485c590fb32Scz4e case Some(pf) => l1_pf_req <> Pipeline(in = pf.io.l1_req, depth = 1, pipe = false, name = Some("pf_queue_to_ldu_reg")) 486c590fb32Scz4e case None => 487c590fb32Scz4e l1_pf_req.valid := false.B 488c590fb32Scz4e l1_pf_req.bits := DontCare 489c590fb32Scz4e } 490c590fb32Scz4e val pf_train_on_hit = RegNextN(io.ooo_to_mem.csrCtrl.pf_ctrl.l1D_pf_train_on_hit, 2, Some(true.B)) 491c590fb32Scz4e 492c590fb32Scz4e loadUnits.zipWithIndex.map(x => x._1.suggestName("LoadUnit_"+x._2)) 493c590fb32Scz4e storeUnits.zipWithIndex.map(x => x._1.suggestName("StoreUnit_"+x._2)) 494c590fb32Scz4e hybridUnits.zipWithIndex.map(x => x._1.suggestName("HybridUnit_"+x._2)) 495c590fb32Scz4e val atomicsUnit = Module(new AtomicsUnit) 496c590fb32Scz4e 497c590fb32Scz4e 498c590fb32Scz4e val ldaExeWbReqs = Wire(Vec(LduCnt, Decoupled(new MemExuOutput))) 499c590fb32Scz4e // atomicsUnit will overwrite the source from ldu if it is about to writeback 500c590fb32Scz4e val atomicWritebackOverride = Mux( 501c590fb32Scz4e atomicsUnit.io.out.valid, 502c590fb32Scz4e atomicsUnit.io.out.bits, 503c590fb32Scz4e loadUnits(AtomicWBPort).io.ldout.bits 504c590fb32Scz4e ) 505c590fb32Scz4e ldaExeWbReqs(AtomicWBPort).valid := atomicsUnit.io.out.valid || loadUnits(AtomicWBPort).io.ldout.valid 506c590fb32Scz4e ldaExeWbReqs(AtomicWBPort).bits := atomicWritebackOverride 507c590fb32Scz4e atomicsUnit.io.out.ready := ldaExeWbReqs(AtomicWBPort).ready 508c590fb32Scz4e loadUnits(AtomicWBPort).io.ldout.ready := ldaExeWbReqs(AtomicWBPort).ready 509c590fb32Scz4e 510c590fb32Scz4e val st_data_atomics = Seq.tabulate(StdCnt)(i => 511c590fb32Scz4e stData(i).valid && FuType.storeIsAMO(stData(i).bits.uop.fuType) 512c590fb32Scz4e ) 513c590fb32Scz4e 514c590fb32Scz4e // misalignBuffer will overwrite the source from ldu if it is about to writeback 515c590fb32Scz4e val misalignWritebackOverride = Mux( 516c590fb32Scz4e loadUnits(MisalignWBPort).io.ldout.valid, 517c590fb32Scz4e loadUnits(MisalignWBPort).io.ldout.bits, 518c590fb32Scz4e loadMisalignBuffer.io.writeBack.bits 519c590fb32Scz4e ) 520c590fb32Scz4e ldaExeWbReqs(MisalignWBPort).valid := loadMisalignBuffer.io.writeBack.valid || loadUnits(MisalignWBPort).io.ldout.valid 521c590fb32Scz4e ldaExeWbReqs(MisalignWBPort).bits := misalignWritebackOverride 522c590fb32Scz4e loadMisalignBuffer.io.writeBack.ready := ldaExeWbReqs(MisalignWBPort).ready && !loadUnits(MisalignWBPort).io.ldout.valid 523c590fb32Scz4e loadMisalignBuffer.io.loadOutValid := loadUnits(MisalignWBPort).io.ldout.valid 524c590fb32Scz4e loadMisalignBuffer.io.loadVecOutValid := loadUnits(MisalignWBPort).io.vecldout.valid 525c590fb32Scz4e loadUnits(MisalignWBPort).io.ldout.ready := ldaExeWbReqs(MisalignWBPort).ready 526c590fb32Scz4e ldaExeWbReqs(MisalignWBPort).bits.isFromLoadUnit := loadUnits(MisalignWBPort).io.ldout.bits.isFromLoadUnit || loadMisalignBuffer.io.writeBack.valid 527c590fb32Scz4e 528c590fb32Scz4e // loadUnit will overwrite the source from uncache if it is about to writeback 529c590fb32Scz4e ldaExeWbReqs(UncacheWBPort) <> loadUnits(UncacheWBPort).io.ldout 530c590fb32Scz4e io.mem_to_ooo.writebackLda <> ldaExeWbReqs 531c590fb32Scz4e io.mem_to_ooo.writebackSta <> storeUnits.map(_.io.stout) 532c590fb32Scz4e io.mem_to_ooo.writebackStd.zip(stdExeUnits).foreach {x => 533c590fb32Scz4e x._1.bits := x._2.io.out.bits 534c590fb32Scz4e // AMOs do not need to write back std now. 535c590fb32Scz4e x._1.valid := x._2.io.out.fire && !FuType.storeIsAMO(x._2.io.out.bits.uop.fuType) 536c590fb32Scz4e } 537c590fb32Scz4e io.mem_to_ooo.writebackHyuLda <> hybridUnits.map(_.io.ldout) 538c590fb32Scz4e io.mem_to_ooo.writebackHyuSta <> hybridUnits.map(_.io.stout) 539c590fb32Scz4e io.mem_to_ooo.otherFastWakeup := DontCare 540c590fb32Scz4e io.mem_to_ooo.otherFastWakeup.drop(HyuCnt).take(LduCnt).zip(loadUnits.map(_.io.fast_uop)).foreach{case(a,b)=> a := b} 541c590fb32Scz4e io.mem_to_ooo.otherFastWakeup.take(HyuCnt).zip(hybridUnits.map(_.io.ldu_io.fast_uop)).foreach{case(a,b)=> a:=b} 542c590fb32Scz4e val stOut = io.mem_to_ooo.writebackSta ++ io.mem_to_ooo.writebackHyuSta 543c590fb32Scz4e 544c590fb32Scz4e // prefetch to l1 req 545c590fb32Scz4e // Stream's confidence is always 1 546c590fb32Scz4e // (LduCnt + HyuCnt) l1_pf_reqs ? 547c590fb32Scz4e loadUnits.foreach(load_unit => { 548c590fb32Scz4e load_unit.io.prefetch_req.valid <> l1_pf_req.valid 549c590fb32Scz4e load_unit.io.prefetch_req.bits <> l1_pf_req.bits 550c590fb32Scz4e }) 551c590fb32Scz4e 552c590fb32Scz4e hybridUnits.foreach(hybrid_unit => { 553c590fb32Scz4e hybrid_unit.io.ldu_io.prefetch_req.valid <> l1_pf_req.valid 554c590fb32Scz4e hybrid_unit.io.ldu_io.prefetch_req.bits <> l1_pf_req.bits 555c590fb32Scz4e }) 556c590fb32Scz4e 557c590fb32Scz4e // NOTE: loadUnits(0) has higher bank conflict and miss queue arb priority than loadUnits(1) and loadUnits(2) 558c590fb32Scz4e // when loadUnits(1)/loadUnits(2) stage 0 is busy, hw prefetch will never use that pipeline 559c590fb32Scz4e val LowConfPorts = if (LduCnt == 2) Seq(1) else if (LduCnt == 3) Seq(1, 2) else Seq(0) 560c590fb32Scz4e LowConfPorts.map{case i => loadUnits(i).io.prefetch_req.bits.confidence := 0.U} 561c590fb32Scz4e hybridUnits.foreach(hybrid_unit => { hybrid_unit.io.ldu_io.prefetch_req.bits.confidence := 0.U }) 562c590fb32Scz4e 563c590fb32Scz4e val canAcceptHighConfPrefetch = loadUnits.map(_.io.canAcceptHighConfPrefetch) ++ 564c590fb32Scz4e hybridUnits.map(_.io.canAcceptLowConfPrefetch) 565c590fb32Scz4e val canAcceptLowConfPrefetch = loadUnits.map(_.io.canAcceptLowConfPrefetch) ++ 566c590fb32Scz4e hybridUnits.map(_.io.canAcceptLowConfPrefetch) 567c590fb32Scz4e l1_pf_req.ready := (0 until LduCnt + HyuCnt).map{ 568c590fb32Scz4e case i => { 569c590fb32Scz4e if (LowConfPorts.contains(i)) { 570c590fb32Scz4e loadUnits(i).io.canAcceptLowConfPrefetch 571c590fb32Scz4e } else { 572c590fb32Scz4e Mux(l1_pf_req.bits.confidence === 1.U, canAcceptHighConfPrefetch(i), canAcceptLowConfPrefetch(i)) 573c590fb32Scz4e } 574c590fb32Scz4e } 575c590fb32Scz4e }.reduce(_ || _) 576c590fb32Scz4e 577c590fb32Scz4e // l1 pf fuzzer interface 578c590fb32Scz4e val DebugEnableL1PFFuzzer = false 579c590fb32Scz4e if (DebugEnableL1PFFuzzer) { 580c590fb32Scz4e // l1 pf req fuzzer 581c590fb32Scz4e val fuzzer = Module(new L1PrefetchFuzzer()) 582c590fb32Scz4e fuzzer.io.vaddr := DontCare 583c590fb32Scz4e fuzzer.io.paddr := DontCare 584c590fb32Scz4e 585c590fb32Scz4e // override load_unit prefetch_req 586c590fb32Scz4e loadUnits.foreach(load_unit => { 587c590fb32Scz4e load_unit.io.prefetch_req.valid <> fuzzer.io.req.valid 588c590fb32Scz4e load_unit.io.prefetch_req.bits <> fuzzer.io.req.bits 589c590fb32Scz4e }) 590c590fb32Scz4e 591c590fb32Scz4e // override hybrid_unit prefetch_req 592c590fb32Scz4e hybridUnits.foreach(hybrid_unit => { 593c590fb32Scz4e hybrid_unit.io.ldu_io.prefetch_req.valid <> fuzzer.io.req.valid 594c590fb32Scz4e hybrid_unit.io.ldu_io.prefetch_req.bits <> fuzzer.io.req.bits 595c590fb32Scz4e }) 596c590fb32Scz4e 597c590fb32Scz4e fuzzer.io.req.ready := l1_pf_req.ready 598c590fb32Scz4e } 599c590fb32Scz4e 600c590fb32Scz4e // TODO: fast load wakeup 601c590fb32Scz4e val lsq = Module(new LsqWrapper) 602c590fb32Scz4e val sbuffer = Module(new Sbuffer) 603c590fb32Scz4e // if you wants to stress test dcache store, use FakeSbuffer 604c590fb32Scz4e // val sbuffer = Module(new FakeSbuffer) // out of date now 605c590fb32Scz4e io.mem_to_ooo.stIssuePtr := lsq.io.issuePtrExt 606c590fb32Scz4e 607c590fb32Scz4e dcache.io.hartId := io.hartId 608c590fb32Scz4e lsq.io.hartId := io.hartId 609c590fb32Scz4e sbuffer.io.hartId := io.hartId 610c590fb32Scz4e atomicsUnit.io.hartId := io.hartId 611c590fb32Scz4e 612c590fb32Scz4e dcache.io.lqEmpty := lsq.io.lqEmpty 613c590fb32Scz4e 614c590fb32Scz4e // load/store prefetch to l2 cache 615c590fb32Scz4e prefetcherOpt.foreach(sms_pf => { 616c590fb32Scz4e l1PrefetcherOpt.foreach(l1_pf => { 617c590fb32Scz4e val sms_pf_to_l2 = DelayNWithValid(sms_pf.io.l2_req, 2) 618c590fb32Scz4e val l1_pf_to_l2 = DelayNWithValid(l1_pf.io.l2_req, 2) 619c590fb32Scz4e 620c590fb32Scz4e outer.l2_pf_sender_opt.get.out.head._1.addr_valid := sms_pf_to_l2.valid || l1_pf_to_l2.valid 621c590fb32Scz4e outer.l2_pf_sender_opt.get.out.head._1.addr := Mux(l1_pf_to_l2.valid, l1_pf_to_l2.bits.addr, sms_pf_to_l2.bits.addr) 622c590fb32Scz4e outer.l2_pf_sender_opt.get.out.head._1.pf_source := Mux(l1_pf_to_l2.valid, l1_pf_to_l2.bits.source, sms_pf_to_l2.bits.source) 623c590fb32Scz4e outer.l2_pf_sender_opt.get.out.head._1.l2_pf_en := RegNextN(io.ooo_to_mem.csrCtrl.pf_ctrl.l2_pf_enable, 2, Some(true.B)) 624c590fb32Scz4e 625c590fb32Scz4e sms_pf.io.enable := RegNextN(io.ooo_to_mem.csrCtrl.pf_ctrl.l1D_pf_enable, 2, Some(false.B)) 626c590fb32Scz4e 627c590fb32Scz4e val l2_trace = Wire(new LoadPfDbBundle) 628c590fb32Scz4e l2_trace.paddr := outer.l2_pf_sender_opt.get.out.head._1.addr 629c590fb32Scz4e val table = ChiselDB.createTable(s"L2PrefetchTrace$hartId", new LoadPfDbBundle, basicDB = false) 630c590fb32Scz4e table.log(l2_trace, l1_pf_to_l2.valid, "StreamPrefetchTrace", clock, reset) 631c590fb32Scz4e table.log(l2_trace, !l1_pf_to_l2.valid && sms_pf_to_l2.valid, "L2PrefetchTrace", clock, reset) 632c590fb32Scz4e 633c590fb32Scz4e val l1_pf_to_l3 = ValidIODelay(l1_pf.io.l3_req, 4) 634c590fb32Scz4e outer.l3_pf_sender_opt.foreach(_.out.head._1.addr_valid := l1_pf_to_l3.valid) 635c590fb32Scz4e outer.l3_pf_sender_opt.foreach(_.out.head._1.addr := l1_pf_to_l3.bits) 636c590fb32Scz4e outer.l3_pf_sender_opt.foreach(_.out.head._1.l2_pf_en := RegNextN(io.ooo_to_mem.csrCtrl.pf_ctrl.l2_pf_enable, 4, Some(true.B))) 637c590fb32Scz4e 638c590fb32Scz4e val l3_trace = Wire(new LoadPfDbBundle) 639c590fb32Scz4e l3_trace.paddr := outer.l3_pf_sender_opt.map(_.out.head._1.addr).getOrElse(0.U) 640c590fb32Scz4e val l3_table = ChiselDB.createTable(s"L3PrefetchTrace$hartId", new LoadPfDbBundle, basicDB = false) 641c590fb32Scz4e l3_table.log(l3_trace, l1_pf_to_l3.valid, "StreamPrefetchTrace", clock, reset) 642c590fb32Scz4e 643c590fb32Scz4e XSPerfAccumulate("prefetch_fire_l2", outer.l2_pf_sender_opt.get.out.head._1.addr_valid) 644c590fb32Scz4e XSPerfAccumulate("prefetch_fire_l3", outer.l3_pf_sender_opt.map(_.out.head._1.addr_valid).getOrElse(false.B)) 645c590fb32Scz4e XSPerfAccumulate("l1pf_fire_l2", l1_pf_to_l2.valid) 646c590fb32Scz4e XSPerfAccumulate("sms_fire_l2", !l1_pf_to_l2.valid && sms_pf_to_l2.valid) 647c590fb32Scz4e XSPerfAccumulate("sms_block_by_l1pf", l1_pf_to_l2.valid && sms_pf_to_l2.valid) 648c590fb32Scz4e }) 649c590fb32Scz4e }) 650c590fb32Scz4e 651c590fb32Scz4e // ptw 652c590fb32Scz4e val sfence = RegNext(RegNext(io.ooo_to_mem.sfence)) 653c590fb32Scz4e val tlbcsr = RegNext(RegNext(io.ooo_to_mem.tlbCsr)) 654c590fb32Scz4e private val ptw = outer.ptw.module 655c590fb32Scz4e private val ptw_to_l2_buffer = outer.ptw_to_l2_buffer.module 656c590fb32Scz4e private val l1d_to_l2_buffer = outer.l1d_to_l2_buffer.module 657c590fb32Scz4e ptw.io.hartId := io.hartId 658c590fb32Scz4e ptw.io.sfence <> sfence 659c590fb32Scz4e ptw.io.csr.tlb <> tlbcsr 660c590fb32Scz4e ptw.io.csr.distribute_csr <> csrCtrl.distribute_csr 661c590fb32Scz4e 662c590fb32Scz4e val perfEventsPTW = if (!coreParams.softPTW) { 663c590fb32Scz4e ptw.getPerfEvents 664c590fb32Scz4e } else { 665c590fb32Scz4e Seq() 666c590fb32Scz4e } 667c590fb32Scz4e 668c590fb32Scz4e // dtlb 669c590fb32Scz4e val dtlb_ld_tlb_ld = Module(new TLBNonBlock(LduCnt + HyuCnt + 1, 2, ldtlbParams)) 670c590fb32Scz4e val dtlb_st_tlb_st = Module(new TLBNonBlock(StaCnt, 1, sttlbParams)) 671c590fb32Scz4e val dtlb_prefetch_tlb_prefetch = Module(new TLBNonBlock(2, 2, pftlbParams)) 672c590fb32Scz4e val dtlb_ld = Seq(dtlb_ld_tlb_ld.io) 673c590fb32Scz4e val dtlb_st = Seq(dtlb_st_tlb_st.io) 674c590fb32Scz4e val dtlb_prefetch = Seq(dtlb_prefetch_tlb_prefetch.io) 675c590fb32Scz4e /* tlb vec && constant variable */ 676c590fb32Scz4e val dtlb = dtlb_ld ++ dtlb_st ++ dtlb_prefetch 677c590fb32Scz4e val (dtlb_ld_idx, dtlb_st_idx, dtlb_pf_idx) = (0, 1, 2) 678c590fb32Scz4e val TlbSubSizeVec = Seq(LduCnt + HyuCnt + 1, StaCnt, 2) // (load + hyu + stream pf, store, sms+l2bop) 679c590fb32Scz4e val DTlbSize = TlbSubSizeVec.sum 680c590fb32Scz4e val TlbStartVec = TlbSubSizeVec.scanLeft(0)(_ + _).dropRight(1) 681c590fb32Scz4e val TlbEndVec = TlbSubSizeVec.scanLeft(0)(_ + _).drop(1) 682c590fb32Scz4e 683c590fb32Scz4e val ptwio = Wire(new VectorTlbPtwIO(DTlbSize)) 684c590fb32Scz4e val dtlb_reqs = dtlb.map(_.requestor).flatten 685c590fb32Scz4e val dtlb_pmps = dtlb.map(_.pmp).flatten 686c590fb32Scz4e dtlb.map(_.hartId := io.hartId) 687c590fb32Scz4e dtlb.map(_.sfence := sfence) 688c590fb32Scz4e dtlb.map(_.csr := tlbcsr) 689c590fb32Scz4e dtlb.map(_.flushPipe.map(a => a := false.B)) // non-block doesn't need 690c590fb32Scz4e dtlb.map(_.redirect := redirect) 691c590fb32Scz4e if (refillBothTlb) { 692c590fb32Scz4e require(ldtlbParams.outReplace == sttlbParams.outReplace) 693c590fb32Scz4e require(ldtlbParams.outReplace == hytlbParams.outReplace) 694c590fb32Scz4e require(ldtlbParams.outReplace == pftlbParams.outReplace) 695c590fb32Scz4e require(ldtlbParams.outReplace) 696c590fb32Scz4e 697c590fb32Scz4e val replace = Module(new TlbReplace(DTlbSize, ldtlbParams)) 698c590fb32Scz4e replace.io.apply_sep(dtlb_ld.map(_.replace) ++ dtlb_st.map(_.replace) ++ dtlb_prefetch.map(_.replace), ptwio.resp.bits.data.s1.entry.tag) 699c590fb32Scz4e } else { 700c590fb32Scz4e // TODO: there will be bugs in TlbReplace when outReplace enable, since the order of Hyu is not right. 701c590fb32Scz4e if (ldtlbParams.outReplace) { 702c590fb32Scz4e val replace_ld = Module(new TlbReplace(LduCnt + 1, ldtlbParams)) 703c590fb32Scz4e replace_ld.io.apply_sep(dtlb_ld.map(_.replace), ptwio.resp.bits.data.s1.entry.tag) 704c590fb32Scz4e } 705c590fb32Scz4e if (hytlbParams.outReplace) { 706c590fb32Scz4e val replace_hy = Module(new TlbReplace(HyuCnt, hytlbParams)) 707c590fb32Scz4e replace_hy.io.apply_sep(dtlb_ld.map(_.replace), ptwio.resp.bits.data.s1.entry.tag) 708c590fb32Scz4e } 709c590fb32Scz4e if (sttlbParams.outReplace) { 710c590fb32Scz4e val replace_st = Module(new TlbReplace(StaCnt, sttlbParams)) 711c590fb32Scz4e replace_st.io.apply_sep(dtlb_st.map(_.replace), ptwio.resp.bits.data.s1.entry.tag) 712c590fb32Scz4e } 713c590fb32Scz4e if (pftlbParams.outReplace) { 714c590fb32Scz4e val replace_pf = Module(new TlbReplace(2, pftlbParams)) 715c590fb32Scz4e replace_pf.io.apply_sep(dtlb_prefetch.map(_.replace), ptwio.resp.bits.data.s1.entry.tag) 716c590fb32Scz4e } 717c590fb32Scz4e } 718c590fb32Scz4e 719c590fb32Scz4e val ptw_resp_next = RegEnable(ptwio.resp.bits, ptwio.resp.valid) 720c590fb32Scz4e val ptw_resp_v = RegNext(ptwio.resp.valid && !(sfence.valid && tlbcsr.satp.changed && tlbcsr.vsatp.changed && tlbcsr.hgatp.changed), init = false.B) 721c590fb32Scz4e ptwio.resp.ready := true.B 722c590fb32Scz4e 723c590fb32Scz4e val tlbreplay = WireInit(VecInit(Seq.fill(LdExuCnt)(false.B))) 724c590fb32Scz4e val tlbreplay_reg = GatedValidRegNext(tlbreplay) 725c590fb32Scz4e val dtlb_ld0_tlbreplay_reg = GatedValidRegNext(dtlb_ld(0).tlbreplay) 726c590fb32Scz4e 727c590fb32Scz4e if (backendParams.debugEn){ dontTouch(tlbreplay) } 728c590fb32Scz4e 729c590fb32Scz4e for (i <- 0 until LdExuCnt) { 730c590fb32Scz4e tlbreplay(i) := dtlb_ld(0).ptw.req(i).valid && ptw_resp_next.vector(0) && ptw_resp_v && 731c590fb32Scz4e ptw_resp_next.data.hit(dtlb_ld(0).ptw.req(i).bits.vpn, tlbcsr.satp.asid, tlbcsr.vsatp.asid, tlbcsr.hgatp.vmid, allType = true, ignoreAsid = true) 732c590fb32Scz4e } 733c590fb32Scz4e 734c590fb32Scz4e dtlb.flatMap(a => a.ptw.req) 735c590fb32Scz4e .zipWithIndex 736c590fb32Scz4e .foreach{ case (tlb, i) => 737c590fb32Scz4e tlb.ready := ptwio.req(i).ready 738c590fb32Scz4e ptwio.req(i).bits := tlb.bits 739c590fb32Scz4e val vector_hit = if (refillBothTlb) Cat(ptw_resp_next.vector).orR 740c590fb32Scz4e else if (i < TlbEndVec(dtlb_ld_idx)) Cat(ptw_resp_next.vector.slice(TlbStartVec(dtlb_ld_idx), TlbEndVec(dtlb_ld_idx))).orR 741c590fb32Scz4e else if (i < TlbEndVec(dtlb_st_idx)) Cat(ptw_resp_next.vector.slice(TlbStartVec(dtlb_st_idx), TlbEndVec(dtlb_st_idx))).orR 742c590fb32Scz4e else Cat(ptw_resp_next.vector.slice(TlbStartVec(dtlb_pf_idx), TlbEndVec(dtlb_pf_idx))).orR 743c590fb32Scz4e ptwio.req(i).valid := tlb.valid && !(ptw_resp_v && vector_hit && ptw_resp_next.data.hit(tlb.bits.vpn, tlbcsr.satp.asid, tlbcsr.vsatp.asid, tlbcsr.hgatp.vmid, allType = true, ignoreAsid = true)) 744c590fb32Scz4e } 745c590fb32Scz4e dtlb.foreach(_.ptw.resp.bits := ptw_resp_next.data) 746c590fb32Scz4e if (refillBothTlb) { 747c590fb32Scz4e dtlb.foreach(_.ptw.resp.valid := ptw_resp_v && Cat(ptw_resp_next.vector).orR) 748c590fb32Scz4e } else { 749c590fb32Scz4e dtlb_ld.foreach(_.ptw.resp.valid := ptw_resp_v && Cat(ptw_resp_next.vector.slice(TlbStartVec(dtlb_ld_idx), TlbEndVec(dtlb_ld_idx))).orR) 750c590fb32Scz4e dtlb_st.foreach(_.ptw.resp.valid := ptw_resp_v && Cat(ptw_resp_next.vector.slice(TlbStartVec(dtlb_st_idx), TlbEndVec(dtlb_st_idx))).orR) 751c590fb32Scz4e dtlb_prefetch.foreach(_.ptw.resp.valid := ptw_resp_v && Cat(ptw_resp_next.vector.slice(TlbStartVec(dtlb_pf_idx), TlbEndVec(dtlb_pf_idx))).orR) 752c590fb32Scz4e } 753c590fb32Scz4e dtlb_ld.foreach(_.ptw.resp.bits.getGpa := Cat(ptw_resp_next.getGpa.take(LduCnt + HyuCnt + 1)).orR) 754c590fb32Scz4e dtlb_st.foreach(_.ptw.resp.bits.getGpa := Cat(ptw_resp_next.getGpa.slice(LduCnt + HyuCnt + 1, LduCnt + HyuCnt + 1 + StaCnt)).orR) 755c590fb32Scz4e dtlb_prefetch.foreach(_.ptw.resp.bits.getGpa := Cat(ptw_resp_next.getGpa.drop(LduCnt + HyuCnt + 1 + StaCnt)).orR) 756c590fb32Scz4e 757c590fb32Scz4e val dtlbRepeater = PTWNewFilter(ldtlbParams.fenceDelay, ptwio, ptw.io.tlb(1), sfence, tlbcsr, l2tlbParams.dfilterSize) 758c590fb32Scz4e val itlbRepeater3 = PTWRepeaterNB(passReady = false, itlbParams.fenceDelay, io.fetch_to_mem.itlb, ptw.io.tlb(0), sfence, tlbcsr) 759c590fb32Scz4e 760c590fb32Scz4e lsq.io.debugTopDown.robHeadMissInDTlb := dtlbRepeater.io.rob_head_miss_in_tlb 761c590fb32Scz4e 762c590fb32Scz4e // pmp 763c590fb32Scz4e val pmp = Module(new PMP()) 764c590fb32Scz4e pmp.io.distribute_csr <> csrCtrl.distribute_csr 765c590fb32Scz4e 766c590fb32Scz4e val pmp_checkers = Seq.fill(DTlbSize)(Module(new PMPChecker(4, leaveHitMux = true))) 767c590fb32Scz4e val pmp_check = pmp_checkers.map(_.io) 768c590fb32Scz4e for ((p,d) <- pmp_check zip dtlb_pmps) { 7698882eb68SXin Tian if (HasBitmapCheck) { 7708882eb68SXin Tian p.apply(tlbcsr.mbmc.CMODE.asBool, tlbcsr.priv.dmode, pmp.io.pmp, pmp.io.pma, d) 7718882eb68SXin Tian } else { 772c590fb32Scz4e p.apply(tlbcsr.priv.dmode, pmp.io.pmp, pmp.io.pma, d) 7738882eb68SXin Tian } 774c590fb32Scz4e require(p.req.bits.size.getWidth == d.bits.size.getWidth) 775c590fb32Scz4e } 776c590fb32Scz4e 777c590fb32Scz4e for (i <- 0 until LduCnt) { 778c590fb32Scz4e io.debug_ls.debugLsInfo(i) := loadUnits(i).io.debug_ls 779c590fb32Scz4e } 780c590fb32Scz4e for (i <- 0 until HyuCnt) { 781c590fb32Scz4e io.debug_ls.debugLsInfo.drop(LduCnt)(i) := hybridUnits(i).io.ldu_io.debug_ls 782c590fb32Scz4e } 783c590fb32Scz4e for (i <- 0 until StaCnt) { 784c590fb32Scz4e io.debug_ls.debugLsInfo.drop(LduCnt + HyuCnt)(i) := storeUnits(i).io.debug_ls 785c590fb32Scz4e } 786c590fb32Scz4e for (i <- 0 until HyuCnt) { 787c590fb32Scz4e io.debug_ls.debugLsInfo.drop(LduCnt + HyuCnt + StaCnt)(i) := hybridUnits(i).io.stu_io.debug_ls 788c590fb32Scz4e } 789c590fb32Scz4e 790c590fb32Scz4e io.mem_to_ooo.lsTopdownInfo := loadUnits.map(_.io.lsTopdownInfo) ++ hybridUnits.map(_.io.ldu_io.lsTopdownInfo) 791c590fb32Scz4e 792c590fb32Scz4e // trigger 793c590fb32Scz4e val tdata = RegInit(VecInit(Seq.fill(TriggerNum)(0.U.asTypeOf(new MatchTriggerIO)))) 794c590fb32Scz4e val tEnable = RegInit(VecInit(Seq.fill(TriggerNum)(false.B))) 795c590fb32Scz4e tEnable := csrCtrl.mem_trigger.tEnableVec 796c590fb32Scz4e when(csrCtrl.mem_trigger.tUpdate.valid) { 797c590fb32Scz4e tdata(csrCtrl.mem_trigger.tUpdate.bits.addr) := csrCtrl.mem_trigger.tUpdate.bits.tdata 798c590fb32Scz4e } 799c590fb32Scz4e val triggerCanRaiseBpExp = csrCtrl.mem_trigger.triggerCanRaiseBpExp 800c590fb32Scz4e val debugMode = csrCtrl.mem_trigger.debugMode 801c590fb32Scz4e 802c590fb32Scz4e val backendTriggerTimingVec = VecInit(tdata.map(_.timing)) 803c590fb32Scz4e val backendTriggerChainVec = VecInit(tdata.map(_.chain)) 804c590fb32Scz4e 805c590fb32Scz4e XSDebug(tEnable.asUInt.orR, "Debug Mode: At least one store trigger is enabled\n") 806c590fb32Scz4e for (j <- 0 until TriggerNum) 807c590fb32Scz4e PrintTriggerInfo(tEnable(j), tdata(j)) 808c590fb32Scz4e 809c590fb32Scz4e // The segment instruction is executed atomically. 810c590fb32Scz4e // After the segment instruction directive starts executing, no other instructions should be executed. 811c590fb32Scz4e val vSegmentFlag = RegInit(false.B) 812c590fb32Scz4e 813c590fb32Scz4e when(GatedValidRegNext(vSegmentUnit.io.in.fire)) { 814c590fb32Scz4e vSegmentFlag := true.B 815c590fb32Scz4e }.elsewhen(GatedValidRegNext(vSegmentUnit.io.uopwriteback.valid)) { 816c590fb32Scz4e vSegmentFlag := false.B 817c590fb32Scz4e } 818c590fb32Scz4e 819522c7f99SAnzo val misalign_allow_spec = RegInit(true.B) 820522c7f99SAnzo val ldu_rollback_with_misalign_nack = loadUnits.map(ldu => 821522c7f99SAnzo ldu.io.lsq.ldin.bits.isFrmMisAlignBuf && ldu.io.lsq.ldin.bits.rep_info.rar_nack && ldu.io.rollback.valid 822522c7f99SAnzo ).reduce(_ || _) 823522c7f99SAnzo when (ldu_rollback_with_misalign_nack) { 824522c7f99SAnzo misalign_allow_spec := false.B 825522c7f99SAnzo } .elsewhen(lsq.io.rarValidCount < (LoadQueueRARSize - 4).U) { 826522c7f99SAnzo misalign_allow_spec := true.B 827522c7f99SAnzo } 828522c7f99SAnzo 829c590fb32Scz4e // LoadUnit 830c590fb32Scz4e val correctMissTrain = Constantin.createRecord(s"CorrectMissTrain$hartId", initValue = false) 831c590fb32Scz4e 832c590fb32Scz4e for (i <- 0 until LduCnt) { 833c590fb32Scz4e loadUnits(i).io.redirect <> redirect 834522c7f99SAnzo loadUnits(i).io.misalign_allow_spec := misalign_allow_spec 835c590fb32Scz4e 836c590fb32Scz4e // get input form dispatch 837c590fb32Scz4e loadUnits(i).io.ldin <> io.ooo_to_mem.issueLda(i) 838c590fb32Scz4e loadUnits(i).io.feedback_slow <> io.mem_to_ooo.ldaIqFeedback(i).feedbackSlow 839c590fb32Scz4e io.mem_to_ooo.ldaIqFeedback(i).feedbackFast := DontCare 840c590fb32Scz4e loadUnits(i).io.correctMissTrain := correctMissTrain 841c590fb32Scz4e io.mem_to_ooo.ldCancel.drop(HyuCnt)(i) := loadUnits(i).io.ldCancel 842c590fb32Scz4e io.mem_to_ooo.wakeup.drop(HyuCnt)(i) := loadUnits(i).io.wakeup 843c590fb32Scz4e 844c590fb32Scz4e // vector 845c590fb32Scz4e if (i < VlduCnt) { 846c590fb32Scz4e loadUnits(i).io.vecldout.ready := false.B 847c590fb32Scz4e } else { 848c590fb32Scz4e loadUnits(i).io.vecldin.valid := false.B 849c590fb32Scz4e loadUnits(i).io.vecldin.bits := DontCare 850c590fb32Scz4e loadUnits(i).io.vecldout.ready := false.B 851c590fb32Scz4e } 852c590fb32Scz4e 853c590fb32Scz4e // fast replay 854c590fb32Scz4e loadUnits(i).io.fast_rep_in <> loadUnits(i).io.fast_rep_out 855c590fb32Scz4e 856c590fb32Scz4e // SoftPrefetch to frontend (prefetch.i) 857c590fb32Scz4e loadUnits(i).io.ifetchPrefetch <> io.ifetchPrefetch(i) 858c590fb32Scz4e 859c590fb32Scz4e // dcache access 860c590fb32Scz4e loadUnits(i).io.dcache <> dcache.io.lsu.load(i) 861c590fb32Scz4e if(i == 0){ 862c590fb32Scz4e vSegmentUnit.io.rdcache := DontCare 863c590fb32Scz4e dcache.io.lsu.load(i).req.valid := loadUnits(i).io.dcache.req.valid || vSegmentUnit.io.rdcache.req.valid 864c590fb32Scz4e dcache.io.lsu.load(i).req.bits := Mux1H(Seq( 865c590fb32Scz4e vSegmentUnit.io.rdcache.req.valid -> vSegmentUnit.io.rdcache.req.bits, 866c590fb32Scz4e loadUnits(i).io.dcache.req.valid -> loadUnits(i).io.dcache.req.bits 867c590fb32Scz4e )) 868c590fb32Scz4e vSegmentUnit.io.rdcache.req.ready := dcache.io.lsu.load(i).req.ready 869c590fb32Scz4e } 870c590fb32Scz4e 871c590fb32Scz4e // Dcache requests must also be preempted by the segment. 872c590fb32Scz4e when(vSegmentFlag){ 873c590fb32Scz4e loadUnits(i).io.dcache.req.ready := false.B // Dcache is preempted. 874c590fb32Scz4e 875c590fb32Scz4e dcache.io.lsu.load(0).pf_source := vSegmentUnit.io.rdcache.pf_source 876c590fb32Scz4e dcache.io.lsu.load(0).s1_paddr_dup_lsu := vSegmentUnit.io.rdcache.s1_paddr_dup_lsu 877c590fb32Scz4e dcache.io.lsu.load(0).s1_paddr_dup_dcache := vSegmentUnit.io.rdcache.s1_paddr_dup_dcache 878c590fb32Scz4e dcache.io.lsu.load(0).s1_kill := vSegmentUnit.io.rdcache.s1_kill 879c590fb32Scz4e dcache.io.lsu.load(0).s2_kill := vSegmentUnit.io.rdcache.s2_kill 880c590fb32Scz4e dcache.io.lsu.load(0).s0_pc := vSegmentUnit.io.rdcache.s0_pc 881c590fb32Scz4e dcache.io.lsu.load(0).s1_pc := vSegmentUnit.io.rdcache.s1_pc 882c590fb32Scz4e dcache.io.lsu.load(0).s2_pc := vSegmentUnit.io.rdcache.s2_pc 883c590fb32Scz4e dcache.io.lsu.load(0).is128Req := vSegmentUnit.io.rdcache.is128Req 884c590fb32Scz4e }.otherwise { 885c590fb32Scz4e loadUnits(i).io.dcache.req.ready := dcache.io.lsu.load(i).req.ready 886c590fb32Scz4e 887c590fb32Scz4e dcache.io.lsu.load(0).pf_source := loadUnits(0).io.dcache.pf_source 888c590fb32Scz4e dcache.io.lsu.load(0).s1_paddr_dup_lsu := loadUnits(0).io.dcache.s1_paddr_dup_lsu 889c590fb32Scz4e dcache.io.lsu.load(0).s1_paddr_dup_dcache := loadUnits(0).io.dcache.s1_paddr_dup_dcache 890c590fb32Scz4e dcache.io.lsu.load(0).s1_kill := loadUnits(0).io.dcache.s1_kill 891c590fb32Scz4e dcache.io.lsu.load(0).s2_kill := loadUnits(0).io.dcache.s2_kill 892c590fb32Scz4e dcache.io.lsu.load(0).s0_pc := loadUnits(0).io.dcache.s0_pc 893c590fb32Scz4e dcache.io.lsu.load(0).s1_pc := loadUnits(0).io.dcache.s1_pc 894c590fb32Scz4e dcache.io.lsu.load(0).s2_pc := loadUnits(0).io.dcache.s2_pc 895c590fb32Scz4e dcache.io.lsu.load(0).is128Req := loadUnits(0).io.dcache.is128Req 896c590fb32Scz4e } 897c590fb32Scz4e 898c590fb32Scz4e // forward 899c590fb32Scz4e loadUnits(i).io.lsq.forward <> lsq.io.forward(i) 900c590fb32Scz4e loadUnits(i).io.sbuffer <> sbuffer.io.forward(i) 901c590fb32Scz4e loadUnits(i).io.ubuffer <> uncache.io.forward(i) 902c590fb32Scz4e loadUnits(i).io.tl_d_channel := dcache.io.lsu.forward_D(i) 903c590fb32Scz4e loadUnits(i).io.forward_mshr <> dcache.io.lsu.forward_mshr(i) 904c590fb32Scz4e // ld-ld violation check 905c590fb32Scz4e loadUnits(i).io.lsq.ldld_nuke_query <> lsq.io.ldu.ldld_nuke_query(i) 906c590fb32Scz4e loadUnits(i).io.lsq.stld_nuke_query <> lsq.io.ldu.stld_nuke_query(i) 907522c7f99SAnzo // loadqueue old ptr 908522c7f99SAnzo loadUnits(i).io.lsq.lqDeqPtr := lsq.io.lqDeqPtr 909c590fb32Scz4e loadUnits(i).io.csrCtrl <> csrCtrl 910c590fb32Scz4e // dcache refill req 911c590fb32Scz4e // loadUnits(i).io.refill <> delayedDcacheRefill 912c590fb32Scz4e // dtlb 913c590fb32Scz4e loadUnits(i).io.tlb <> dtlb_reqs.take(LduCnt)(i) 914c590fb32Scz4e if(i == 0 ){ // port 0 assign to vsegmentUnit 915c590fb32Scz4e val vsegmentDtlbReqValid = vSegmentUnit.io.dtlb.req.valid // segment tlb resquest need to delay 1 cycle 916c590fb32Scz4e dtlb_reqs.take(LduCnt)(i).req.valid := loadUnits(i).io.tlb.req.valid || RegNext(vsegmentDtlbReqValid) 917c590fb32Scz4e vSegmentUnit.io.dtlb.req.ready := dtlb_reqs.take(LduCnt)(i).req.ready 918c590fb32Scz4e dtlb_reqs.take(LduCnt)(i).req.bits := ParallelPriorityMux(Seq( 919c590fb32Scz4e RegNext(vsegmentDtlbReqValid) -> RegEnable(vSegmentUnit.io.dtlb.req.bits, vsegmentDtlbReqValid), 920c590fb32Scz4e loadUnits(i).io.tlb.req.valid -> loadUnits(i).io.tlb.req.bits 921c590fb32Scz4e )) 922c590fb32Scz4e } 923c590fb32Scz4e // pmp 924c590fb32Scz4e loadUnits(i).io.pmp <> pmp_check(i).resp 925c590fb32Scz4e // st-ld violation query 926c590fb32Scz4e val stld_nuke_query = storeUnits.map(_.io.stld_nuke_query) ++ hybridUnits.map(_.io.stu_io.stld_nuke_query) 927c590fb32Scz4e for (s <- 0 until StorePipelineWidth) { 928c590fb32Scz4e loadUnits(i).io.stld_nuke_query(s) := stld_nuke_query(s) 929c590fb32Scz4e } 930c590fb32Scz4e loadUnits(i).io.lq_rep_full <> lsq.io.lq_rep_full 931c590fb32Scz4e // load prefetch train 932c590fb32Scz4e prefetcherOpt.foreach(pf => { 933c590fb32Scz4e // sms will train on all miss load sources 934c590fb32Scz4e val source = loadUnits(i).io.prefetch_train 935c590fb32Scz4e pf.io.ld_in(i).valid := Mux(pf_train_on_hit, 936c590fb32Scz4e source.valid, 937c590fb32Scz4e source.valid && source.bits.isFirstIssue && source.bits.miss 938c590fb32Scz4e ) 939c590fb32Scz4e pf.io.ld_in(i).bits := source.bits 940c590fb32Scz4e val loadPc = RegNext(io.ooo_to_mem.issueLda(i).bits.uop.pc) // for s1 941c590fb32Scz4e pf.io.ld_in(i).bits.uop.pc := Mux( 942c590fb32Scz4e loadUnits(i).io.s2_ptr_chasing, 943c590fb32Scz4e RegEnable(loadPc, loadUnits(i).io.s2_prefetch_spec), 944c590fb32Scz4e RegEnable(RegEnable(loadPc, loadUnits(i).io.s1_prefetch_spec), loadUnits(i).io.s2_prefetch_spec) 945c590fb32Scz4e ) 946c590fb32Scz4e }) 947c590fb32Scz4e l1PrefetcherOpt.foreach(pf => { 948c590fb32Scz4e // stream will train on all load sources 949c590fb32Scz4e val source = loadUnits(i).io.prefetch_train_l1 950c590fb32Scz4e pf.io.ld_in(i).valid := source.valid && source.bits.isFirstIssue 951c590fb32Scz4e pf.io.ld_in(i).bits := source.bits 952c590fb32Scz4e }) 953c590fb32Scz4e 954c590fb32Scz4e // load to load fast forward: load(i) prefers data(i) 955c590fb32Scz4e val l2l_fwd_out = loadUnits.map(_.io.l2l_fwd_out) ++ hybridUnits.map(_.io.ldu_io.l2l_fwd_out) 956c590fb32Scz4e val fastPriority = (i until LduCnt + HyuCnt) ++ (0 until i) 957c590fb32Scz4e val fastValidVec = fastPriority.map(j => l2l_fwd_out(j).valid) 958c590fb32Scz4e val fastDataVec = fastPriority.map(j => l2l_fwd_out(j).data) 959c590fb32Scz4e val fastErrorVec = fastPriority.map(j => l2l_fwd_out(j).dly_ld_err) 960c590fb32Scz4e val fastMatchVec = fastPriority.map(j => io.ooo_to_mem.loadFastMatch(i)(j)) 961c590fb32Scz4e loadUnits(i).io.l2l_fwd_in.valid := VecInit(fastValidVec).asUInt.orR 962c590fb32Scz4e loadUnits(i).io.l2l_fwd_in.data := ParallelPriorityMux(fastValidVec, fastDataVec) 963c590fb32Scz4e loadUnits(i).io.l2l_fwd_in.dly_ld_err := ParallelPriorityMux(fastValidVec, fastErrorVec) 964c590fb32Scz4e val fastMatch = ParallelPriorityMux(fastValidVec, fastMatchVec) 965c590fb32Scz4e loadUnits(i).io.ld_fast_match := fastMatch 966c590fb32Scz4e loadUnits(i).io.ld_fast_imm := io.ooo_to_mem.loadFastImm(i) 967c590fb32Scz4e loadUnits(i).io.ld_fast_fuOpType := io.ooo_to_mem.loadFastFuOpType(i) 968c590fb32Scz4e loadUnits(i).io.replay <> lsq.io.replay(i) 969c590fb32Scz4e 970c590fb32Scz4e val l2_hint = RegNext(io.l2_hint) 971c590fb32Scz4e 972c590fb32Scz4e // L2 Hint for DCache 973c590fb32Scz4e dcache.io.l2_hint <> l2_hint 974c590fb32Scz4e 975c590fb32Scz4e loadUnits(i).io.l2_hint <> l2_hint 976c590fb32Scz4e loadUnits(i).io.tlb_hint.id := dtlbRepeater.io.hint.get.req(i).id 977c590fb32Scz4e loadUnits(i).io.tlb_hint.full := dtlbRepeater.io.hint.get.req(i).full || 978c590fb32Scz4e tlbreplay_reg(i) || dtlb_ld0_tlbreplay_reg(i) 979c590fb32Scz4e 980c590fb32Scz4e // passdown to lsq (load s2) 981c590fb32Scz4e lsq.io.ldu.ldin(i) <> loadUnits(i).io.lsq.ldin 982c590fb32Scz4e if (i == UncacheWBPort) { 983c590fb32Scz4e lsq.io.ldout(i) <> loadUnits(i).io.lsq.uncache 984c590fb32Scz4e } else { 985c590fb32Scz4e lsq.io.ldout(i).ready := true.B 986c590fb32Scz4e loadUnits(i).io.lsq.uncache.valid := false.B 987c590fb32Scz4e loadUnits(i).io.lsq.uncache.bits := DontCare 988c590fb32Scz4e } 989c590fb32Scz4e lsq.io.ld_raw_data(i) <> loadUnits(i).io.lsq.ld_raw_data 990c590fb32Scz4e lsq.io.ncOut(i) <> loadUnits(i).io.lsq.nc_ldin 991c590fb32Scz4e lsq.io.l2_hint.valid := l2_hint.valid 992c590fb32Scz4e lsq.io.l2_hint.bits.sourceId := l2_hint.bits.sourceId 993c590fb32Scz4e lsq.io.l2_hint.bits.isKeyword := l2_hint.bits.isKeyword 994c590fb32Scz4e 995c590fb32Scz4e lsq.io.tlb_hint <> dtlbRepeater.io.hint.get 996c590fb32Scz4e 997c590fb32Scz4e // connect misalignBuffer 998c590fb32Scz4e loadMisalignBuffer.io.req(i) <> loadUnits(i).io.misalign_buf 999c590fb32Scz4e 1000c590fb32Scz4e if (i == MisalignWBPort) { 1001c590fb32Scz4e loadUnits(i).io.misalign_ldin <> loadMisalignBuffer.io.splitLoadReq 1002c590fb32Scz4e loadUnits(i).io.misalign_ldout <> loadMisalignBuffer.io.splitLoadResp 1003c590fb32Scz4e } else { 1004c590fb32Scz4e loadUnits(i).io.misalign_ldin.valid := false.B 1005c590fb32Scz4e loadUnits(i).io.misalign_ldin.bits := DontCare 1006c590fb32Scz4e } 1007c590fb32Scz4e 1008c590fb32Scz4e // alter writeback exception info 1009c590fb32Scz4e io.mem_to_ooo.s3_delayed_load_error(i) := loadUnits(i).io.s3_dly_ld_err 1010c590fb32Scz4e 1011c590fb32Scz4e // update mem dependency predictor 1012c590fb32Scz4e // io.memPredUpdate(i) := DontCare 1013c590fb32Scz4e 1014c590fb32Scz4e // -------------------------------- 1015c590fb32Scz4e // Load Triggers 1016c590fb32Scz4e // -------------------------------- 1017c590fb32Scz4e loadUnits(i).io.fromCsrTrigger.tdataVec := tdata 1018c590fb32Scz4e loadUnits(i).io.fromCsrTrigger.tEnableVec := tEnable 1019c590fb32Scz4e loadUnits(i).io.fromCsrTrigger.triggerCanRaiseBpExp := triggerCanRaiseBpExp 1020c590fb32Scz4e loadUnits(i).io.fromCsrTrigger.debugMode := debugMode 1021c590fb32Scz4e } 1022c590fb32Scz4e 1023c590fb32Scz4e for (i <- 0 until HyuCnt) { 1024c590fb32Scz4e hybridUnits(i).io.redirect <> redirect 1025c590fb32Scz4e 1026c590fb32Scz4e // get input from dispatch 1027c590fb32Scz4e hybridUnits(i).io.lsin <> io.ooo_to_mem.issueHya(i) 1028c590fb32Scz4e hybridUnits(i).io.feedback_slow <> io.mem_to_ooo.hyuIqFeedback(i).feedbackSlow 1029c590fb32Scz4e hybridUnits(i).io.feedback_fast <> io.mem_to_ooo.hyuIqFeedback(i).feedbackFast 1030c590fb32Scz4e hybridUnits(i).io.correctMissTrain := correctMissTrain 1031c590fb32Scz4e io.mem_to_ooo.ldCancel.take(HyuCnt)(i) := hybridUnits(i).io.ldu_io.ldCancel 1032c590fb32Scz4e io.mem_to_ooo.wakeup.take(HyuCnt)(i) := hybridUnits(i).io.ldu_io.wakeup 1033c590fb32Scz4e 1034c590fb32Scz4e // ------------------------------------ 1035c590fb32Scz4e // Load Port 1036c590fb32Scz4e // ------------------------------------ 1037c590fb32Scz4e // fast replay 1038c590fb32Scz4e hybridUnits(i).io.ldu_io.fast_rep_in <> hybridUnits(i).io.ldu_io.fast_rep_out 1039c590fb32Scz4e 1040c590fb32Scz4e // get input from dispatch 1041c590fb32Scz4e hybridUnits(i).io.ldu_io.dcache <> dcache.io.lsu.load(LduCnt + i) 1042c590fb32Scz4e hybridUnits(i).io.stu_io.dcache <> dcache.io.lsu.sta(StaCnt + i) 1043c590fb32Scz4e 1044c590fb32Scz4e // dcache access 1045c590fb32Scz4e hybridUnits(i).io.ldu_io.lsq.forward <> lsq.io.forward(LduCnt + i) 1046c590fb32Scz4e // forward 1047c590fb32Scz4e hybridUnits(i).io.ldu_io.sbuffer <> sbuffer.io.forward(LduCnt + i) 1048c590fb32Scz4e hybridUnits(i).io.ldu_io.ubuffer <> uncache.io.forward(LduCnt + i) 1049c590fb32Scz4e // hybridUnits(i).io.ldu_io.vec_forward <> vsFlowQueue.io.forward(LduCnt + i) 1050c590fb32Scz4e hybridUnits(i).io.ldu_io.vec_forward := DontCare 1051c590fb32Scz4e hybridUnits(i).io.ldu_io.tl_d_channel := dcache.io.lsu.forward_D(LduCnt + i) 1052c590fb32Scz4e hybridUnits(i).io.ldu_io.forward_mshr <> dcache.io.lsu.forward_mshr(LduCnt + i) 1053c590fb32Scz4e // ld-ld violation check 1054c590fb32Scz4e hybridUnits(i).io.ldu_io.lsq.ldld_nuke_query <> lsq.io.ldu.ldld_nuke_query(LduCnt + i) 1055c590fb32Scz4e hybridUnits(i).io.ldu_io.lsq.stld_nuke_query <> lsq.io.ldu.stld_nuke_query(LduCnt + i) 1056c590fb32Scz4e hybridUnits(i).io.csrCtrl <> csrCtrl 1057c590fb32Scz4e // dcache refill req 1058c590fb32Scz4e hybridUnits(i).io.ldu_io.tlb_hint.id := dtlbRepeater.io.hint.get.req(LduCnt + i).id 1059c590fb32Scz4e hybridUnits(i).io.ldu_io.tlb_hint.full := dtlbRepeater.io.hint.get.req(LduCnt + i).full || 1060c590fb32Scz4e tlbreplay_reg(LduCnt + i) || dtlb_ld0_tlbreplay_reg(LduCnt + i) 1061c590fb32Scz4e 1062c590fb32Scz4e // dtlb 1063c590fb32Scz4e hybridUnits(i).io.tlb <> dtlb_ld.head.requestor(LduCnt + i) 1064c590fb32Scz4e // pmp 1065c590fb32Scz4e hybridUnits(i).io.pmp <> pmp_check.drop(LduCnt)(i).resp 1066c590fb32Scz4e // st-ld violation query 1067c590fb32Scz4e val stld_nuke_query = VecInit(storeUnits.map(_.io.stld_nuke_query) ++ hybridUnits.map(_.io.stu_io.stld_nuke_query)) 1068c590fb32Scz4e hybridUnits(i).io.ldu_io.stld_nuke_query := stld_nuke_query 1069c590fb32Scz4e hybridUnits(i).io.ldu_io.lq_rep_full <> lsq.io.lq_rep_full 1070c590fb32Scz4e // load prefetch train 1071c590fb32Scz4e prefetcherOpt.foreach(pf => { 1072c590fb32Scz4e val source = hybridUnits(i).io.prefetch_train 1073c590fb32Scz4e pf.io.ld_in(LduCnt + i).valid := Mux(pf_train_on_hit, 1074c590fb32Scz4e source.valid, 1075c590fb32Scz4e source.valid && source.bits.isFirstIssue && source.bits.miss 1076c590fb32Scz4e ) 1077c590fb32Scz4e pf.io.ld_in(LduCnt + i).bits := source.bits 1078c590fb32Scz4e pf.io.ld_in(LduCnt + i).bits.uop.pc := Mux(hybridUnits(i).io.ldu_io.s2_ptr_chasing, io.ooo_to_mem.hybridPc(i), RegNext(io.ooo_to_mem.hybridPc(i))) 1079c590fb32Scz4e }) 1080c590fb32Scz4e l1PrefetcherOpt.foreach(pf => { 1081c590fb32Scz4e // stream will train on all load sources 1082c590fb32Scz4e val source = hybridUnits(i).io.prefetch_train_l1 1083c590fb32Scz4e pf.io.ld_in(LduCnt + i).valid := source.valid && source.bits.isFirstIssue && 1084c590fb32Scz4e FuType.isLoad(source.bits.uop.fuType) 1085c590fb32Scz4e pf.io.ld_in(LduCnt + i).bits := source.bits 1086c590fb32Scz4e pf.io.st_in(StaCnt + i).valid := false.B 1087c590fb32Scz4e pf.io.st_in(StaCnt + i).bits := DontCare 1088c590fb32Scz4e }) 1089c590fb32Scz4e prefetcherOpt.foreach(pf => { 1090c590fb32Scz4e val source = hybridUnits(i).io.prefetch_train 1091c590fb32Scz4e pf.io.st_in(StaCnt + i).valid := Mux(pf_train_on_hit, 1092c590fb32Scz4e source.valid, 1093c590fb32Scz4e source.valid && source.bits.isFirstIssue && source.bits.miss 1094c590fb32Scz4e ) && FuType.isStore(source.bits.uop.fuType) 1095c590fb32Scz4e pf.io.st_in(StaCnt + i).bits := source.bits 1096c590fb32Scz4e pf.io.st_in(StaCnt + i).bits.uop.pc := RegNext(io.ooo_to_mem.hybridPc(i)) 1097c590fb32Scz4e }) 1098c590fb32Scz4e 1099c590fb32Scz4e // load to load fast forward: load(i) prefers data(i) 1100c590fb32Scz4e val l2l_fwd_out = loadUnits.map(_.io.l2l_fwd_out) ++ hybridUnits.map(_.io.ldu_io.l2l_fwd_out) 1101c590fb32Scz4e val fastPriority = (LduCnt + i until LduCnt + HyuCnt) ++ (0 until LduCnt + i) 1102c590fb32Scz4e val fastValidVec = fastPriority.map(j => l2l_fwd_out(j).valid) 1103c590fb32Scz4e val fastDataVec = fastPriority.map(j => l2l_fwd_out(j).data) 1104c590fb32Scz4e val fastErrorVec = fastPriority.map(j => l2l_fwd_out(j).dly_ld_err) 1105c590fb32Scz4e val fastMatchVec = fastPriority.map(j => io.ooo_to_mem.loadFastMatch(LduCnt + i)(j)) 1106c590fb32Scz4e hybridUnits(i).io.ldu_io.l2l_fwd_in.valid := VecInit(fastValidVec).asUInt.orR 1107c590fb32Scz4e hybridUnits(i).io.ldu_io.l2l_fwd_in.data := ParallelPriorityMux(fastValidVec, fastDataVec) 1108c590fb32Scz4e hybridUnits(i).io.ldu_io.l2l_fwd_in.dly_ld_err := ParallelPriorityMux(fastValidVec, fastErrorVec) 1109c590fb32Scz4e val fastMatch = ParallelPriorityMux(fastValidVec, fastMatchVec) 1110c590fb32Scz4e hybridUnits(i).io.ldu_io.ld_fast_match := fastMatch 1111c590fb32Scz4e hybridUnits(i).io.ldu_io.ld_fast_imm := io.ooo_to_mem.loadFastImm(LduCnt + i) 1112c590fb32Scz4e hybridUnits(i).io.ldu_io.ld_fast_fuOpType := io.ooo_to_mem.loadFastFuOpType(LduCnt + i) 1113c590fb32Scz4e hybridUnits(i).io.ldu_io.replay <> lsq.io.replay(LduCnt + i) 1114c590fb32Scz4e hybridUnits(i).io.ldu_io.l2_hint <> io.l2_hint 1115c590fb32Scz4e 1116c590fb32Scz4e // uncache 1117c590fb32Scz4e lsq.io.ldout.drop(LduCnt)(i) <> hybridUnits(i).io.ldu_io.lsq.uncache 1118c590fb32Scz4e lsq.io.ld_raw_data.drop(LduCnt)(i) <> hybridUnits(i).io.ldu_io.lsq.ld_raw_data 1119c590fb32Scz4e 1120c590fb32Scz4e 1121c590fb32Scz4e // passdown to lsq (load s2) 1122c590fb32Scz4e hybridUnits(i).io.ldu_io.lsq.nc_ldin.valid := false.B 1123c590fb32Scz4e hybridUnits(i).io.ldu_io.lsq.nc_ldin.bits := DontCare 1124c590fb32Scz4e lsq.io.ldu.ldin(LduCnt + i) <> hybridUnits(i).io.ldu_io.lsq.ldin 1125c590fb32Scz4e // Lsq to sta unit 1126c590fb32Scz4e lsq.io.sta.storeMaskIn(StaCnt + i) <> hybridUnits(i).io.stu_io.st_mask_out 1127c590fb32Scz4e 1128c590fb32Scz4e // Lsq to std unit's rs 1129c590fb32Scz4e lsq.io.std.storeDataIn(StaCnt + i) := stData(StaCnt + i) 1130c590fb32Scz4e lsq.io.std.storeDataIn(StaCnt + i).valid := stData(StaCnt + i).valid && !st_data_atomics(StaCnt + i) 1131c590fb32Scz4e // prefetch 1132c590fb32Scz4e hybridUnits(i).io.stu_io.prefetch_req <> sbuffer.io.store_prefetch(StaCnt + i) 1133c590fb32Scz4e 1134c590fb32Scz4e io.mem_to_ooo.s3_delayed_load_error(LduCnt + i) := hybridUnits(i).io.ldu_io.s3_dly_ld_err 1135c590fb32Scz4e 1136c590fb32Scz4e // ------------------------------------ 1137c590fb32Scz4e // Store Port 1138c590fb32Scz4e // ------------------------------------ 1139c590fb32Scz4e hybridUnits(i).io.stu_io.lsq <> lsq.io.sta.storeAddrIn.takeRight(HyuCnt)(i) 1140c590fb32Scz4e hybridUnits(i).io.stu_io.lsq_replenish <> lsq.io.sta.storeAddrInRe.takeRight(HyuCnt)(i) 1141c590fb32Scz4e 1142c590fb32Scz4e lsq.io.sta.storeMaskIn.takeRight(HyuCnt)(i) <> hybridUnits(i).io.stu_io.st_mask_out 1143c590fb32Scz4e io.mem_to_ooo.stIn.takeRight(HyuCnt)(i).valid := hybridUnits(i).io.stu_io.issue.valid 1144c590fb32Scz4e io.mem_to_ooo.stIn.takeRight(HyuCnt)(i).bits := hybridUnits(i).io.stu_io.issue.bits 1145c590fb32Scz4e 1146c590fb32Scz4e // ------------------------------------ 1147c590fb32Scz4e // Vector Store Port 1148c590fb32Scz4e // ------------------------------------ 1149c590fb32Scz4e hybridUnits(i).io.vec_stu_io.isFirstIssue := true.B 1150c590fb32Scz4e 1151c590fb32Scz4e // ------------------------- 1152c590fb32Scz4e // Store Triggers 1153c590fb32Scz4e // ------------------------- 1154c590fb32Scz4e hybridUnits(i).io.fromCsrTrigger.tdataVec := tdata 1155c590fb32Scz4e hybridUnits(i).io.fromCsrTrigger.tEnableVec := tEnable 1156c590fb32Scz4e hybridUnits(i).io.fromCsrTrigger.triggerCanRaiseBpExp := triggerCanRaiseBpExp 1157c590fb32Scz4e hybridUnits(i).io.fromCsrTrigger.debugMode := debugMode 1158c590fb32Scz4e } 1159c590fb32Scz4e 1160c590fb32Scz4e // misalignBuffer 1161c590fb32Scz4e loadMisalignBuffer.io.redirect <> redirect 1162c590fb32Scz4e loadMisalignBuffer.io.rob.lcommit := io.ooo_to_mem.lsqio.lcommit 1163c590fb32Scz4e loadMisalignBuffer.io.rob.scommit := io.ooo_to_mem.lsqio.scommit 1164c590fb32Scz4e loadMisalignBuffer.io.rob.pendingMMIOld := io.ooo_to_mem.lsqio.pendingMMIOld 1165c590fb32Scz4e loadMisalignBuffer.io.rob.pendingld := io.ooo_to_mem.lsqio.pendingld 1166c590fb32Scz4e loadMisalignBuffer.io.rob.pendingst := io.ooo_to_mem.lsqio.pendingst 1167c590fb32Scz4e loadMisalignBuffer.io.rob.pendingVst := io.ooo_to_mem.lsqio.pendingVst 1168c590fb32Scz4e loadMisalignBuffer.io.rob.commit := io.ooo_to_mem.lsqio.commit 1169c590fb32Scz4e loadMisalignBuffer.io.rob.pendingPtr := io.ooo_to_mem.lsqio.pendingPtr 1170c590fb32Scz4e loadMisalignBuffer.io.rob.pendingPtrNext := io.ooo_to_mem.lsqio.pendingPtrNext 1171c590fb32Scz4e 1172c590fb32Scz4e lsq.io.loadMisalignFull := loadMisalignBuffer.io.loadMisalignFull 1173522c7f99SAnzo lsq.io.misalignAllowSpec := misalign_allow_spec 1174c590fb32Scz4e 1175c590fb32Scz4e storeMisalignBuffer.io.redirect <> redirect 1176c590fb32Scz4e storeMisalignBuffer.io.rob.lcommit := io.ooo_to_mem.lsqio.lcommit 1177c590fb32Scz4e storeMisalignBuffer.io.rob.scommit := io.ooo_to_mem.lsqio.scommit 1178c590fb32Scz4e storeMisalignBuffer.io.rob.pendingMMIOld := io.ooo_to_mem.lsqio.pendingMMIOld 1179c590fb32Scz4e storeMisalignBuffer.io.rob.pendingld := io.ooo_to_mem.lsqio.pendingld 1180c590fb32Scz4e storeMisalignBuffer.io.rob.pendingst := io.ooo_to_mem.lsqio.pendingst 1181c590fb32Scz4e storeMisalignBuffer.io.rob.pendingVst := io.ooo_to_mem.lsqio.pendingVst 1182c590fb32Scz4e storeMisalignBuffer.io.rob.commit := io.ooo_to_mem.lsqio.commit 1183c590fb32Scz4e storeMisalignBuffer.io.rob.pendingPtr := io.ooo_to_mem.lsqio.pendingPtr 1184c590fb32Scz4e storeMisalignBuffer.io.rob.pendingPtrNext := io.ooo_to_mem.lsqio.pendingPtrNext 1185c590fb32Scz4e 1186c590fb32Scz4e lsq.io.maControl <> storeMisalignBuffer.io.sqControl 1187c590fb32Scz4e 1188c590fb32Scz4e lsq.io.cmoOpReq <> dcache.io.cmoOpReq 1189c590fb32Scz4e lsq.io.cmoOpResp <> dcache.io.cmoOpResp 1190c590fb32Scz4e 1191c590fb32Scz4e // Prefetcher 1192c590fb32Scz4e val StreamDTLBPortIndex = TlbStartVec(dtlb_ld_idx) + LduCnt + HyuCnt 1193c590fb32Scz4e val PrefetcherDTLBPortIndex = TlbStartVec(dtlb_pf_idx) 1194c590fb32Scz4e val L2toL1DLBPortIndex = TlbStartVec(dtlb_pf_idx) + 1 1195c590fb32Scz4e prefetcherOpt match { 1196c590fb32Scz4e case Some(pf) => 1197c590fb32Scz4e dtlb_reqs(PrefetcherDTLBPortIndex) <> pf.io.tlb_req 1198c590fb32Scz4e pf.io.pmp_resp := pmp_check(PrefetcherDTLBPortIndex).resp 1199c590fb32Scz4e case None => 1200c590fb32Scz4e dtlb_reqs(PrefetcherDTLBPortIndex) := DontCare 1201c590fb32Scz4e dtlb_reqs(PrefetcherDTLBPortIndex).req.valid := false.B 1202c590fb32Scz4e dtlb_reqs(PrefetcherDTLBPortIndex).resp.ready := true.B 1203c590fb32Scz4e } 1204c590fb32Scz4e l1PrefetcherOpt match { 1205c590fb32Scz4e case Some(pf) => 1206c590fb32Scz4e dtlb_reqs(StreamDTLBPortIndex) <> pf.io.tlb_req 1207c590fb32Scz4e pf.io.pmp_resp := pmp_check(StreamDTLBPortIndex).resp 1208c590fb32Scz4e case None => 1209c590fb32Scz4e dtlb_reqs(StreamDTLBPortIndex) := DontCare 1210c590fb32Scz4e dtlb_reqs(StreamDTLBPortIndex).req.valid := false.B 1211c590fb32Scz4e dtlb_reqs(StreamDTLBPortIndex).resp.ready := true.B 1212c590fb32Scz4e } 1213c590fb32Scz4e dtlb_reqs(L2toL1DLBPortIndex) <> io.l2_tlb_req 1214c590fb32Scz4e dtlb_reqs(L2toL1DLBPortIndex).resp.ready := true.B 1215c590fb32Scz4e io.l2_pmp_resp := pmp_check(L2toL1DLBPortIndex).resp 1216c590fb32Scz4e 1217c590fb32Scz4e // StoreUnit 1218c590fb32Scz4e for (i <- 0 until StdCnt) { 1219c590fb32Scz4e stdExeUnits(i).io.flush <> redirect 1220c590fb32Scz4e stdExeUnits(i).io.in.valid := io.ooo_to_mem.issueStd(i).valid 1221c590fb32Scz4e io.ooo_to_mem.issueStd(i).ready := stdExeUnits(i).io.in.ready 1222c590fb32Scz4e stdExeUnits(i).io.in.bits := io.ooo_to_mem.issueStd(i).bits 1223c590fb32Scz4e } 1224c590fb32Scz4e 1225c590fb32Scz4e for (i <- 0 until StaCnt) { 1226c590fb32Scz4e val stu = storeUnits(i) 1227c590fb32Scz4e 1228c590fb32Scz4e stu.io.redirect <> redirect 1229c590fb32Scz4e stu.io.csrCtrl <> csrCtrl 1230c590fb32Scz4e stu.io.dcache <> dcache.io.lsu.sta(i) 1231c590fb32Scz4e stu.io.feedback_slow <> io.mem_to_ooo.staIqFeedback(i).feedbackSlow 1232c590fb32Scz4e stu.io.stin <> io.ooo_to_mem.issueSta(i) 1233c590fb32Scz4e stu.io.lsq <> lsq.io.sta.storeAddrIn(i) 1234c590fb32Scz4e stu.io.lsq_replenish <> lsq.io.sta.storeAddrInRe(i) 1235c590fb32Scz4e // dtlb 1236c590fb32Scz4e stu.io.tlb <> dtlb_st.head.requestor(i) 1237c590fb32Scz4e stu.io.pmp <> pmp_check(LduCnt + HyuCnt + 1 + i).resp 1238c590fb32Scz4e 1239c590fb32Scz4e // ------------------------- 1240c590fb32Scz4e // Store Triggers 1241c590fb32Scz4e // ------------------------- 1242c590fb32Scz4e stu.io.fromCsrTrigger.tdataVec := tdata 1243c590fb32Scz4e stu.io.fromCsrTrigger.tEnableVec := tEnable 1244c590fb32Scz4e stu.io.fromCsrTrigger.triggerCanRaiseBpExp := triggerCanRaiseBpExp 1245c590fb32Scz4e stu.io.fromCsrTrigger.debugMode := debugMode 1246c590fb32Scz4e 1247c590fb32Scz4e // prefetch 1248c590fb32Scz4e stu.io.prefetch_req <> sbuffer.io.store_prefetch(i) 1249c590fb32Scz4e 1250c590fb32Scz4e // store unit does not need fast feedback 1251c590fb32Scz4e io.mem_to_ooo.staIqFeedback(i).feedbackFast := DontCare 1252c590fb32Scz4e 1253c590fb32Scz4e // Lsq to sta unit 1254c590fb32Scz4e lsq.io.sta.storeMaskIn(i) <> stu.io.st_mask_out 1255c590fb32Scz4e 1256c590fb32Scz4e // connect misalignBuffer 1257c590fb32Scz4e storeMisalignBuffer.io.req(i) <> stu.io.misalign_buf 1258c590fb32Scz4e 1259c590fb32Scz4e if (i == 0) { 1260c590fb32Scz4e stu.io.misalign_stin <> storeMisalignBuffer.io.splitStoreReq 1261c590fb32Scz4e stu.io.misalign_stout <> storeMisalignBuffer.io.splitStoreResp 1262c590fb32Scz4e } else { 1263c590fb32Scz4e stu.io.misalign_stin.valid := false.B 1264c590fb32Scz4e stu.io.misalign_stin.bits := DontCare 1265c590fb32Scz4e } 1266c590fb32Scz4e 1267c590fb32Scz4e // Lsq to std unit's rs 1268c590fb32Scz4e if (i < VstuCnt){ 1269c590fb32Scz4e when (vsSplit(i).io.vstd.get.valid) { 1270c590fb32Scz4e lsq.io.std.storeDataIn(i).valid := true.B 1271c590fb32Scz4e lsq.io.std.storeDataIn(i).bits := vsSplit(i).io.vstd.get.bits 1272c590fb32Scz4e stData(i).ready := false.B 1273c590fb32Scz4e }.otherwise { 1274c590fb32Scz4e lsq.io.std.storeDataIn(i).valid := stData(i).valid && !st_data_atomics(i) 1275c590fb32Scz4e lsq.io.std.storeDataIn(i).bits.uop := stData(i).bits.uop 1276c590fb32Scz4e lsq.io.std.storeDataIn(i).bits.data := stData(i).bits.data 1277c590fb32Scz4e lsq.io.std.storeDataIn(i).bits.mask.map(_ := 0.U) 1278c590fb32Scz4e lsq.io.std.storeDataIn(i).bits.vdIdx.map(_ := 0.U) 1279c590fb32Scz4e lsq.io.std.storeDataIn(i).bits.vdIdxInField.map(_ := 0.U) 1280c590fb32Scz4e stData(i).ready := true.B 1281c590fb32Scz4e } 1282c590fb32Scz4e } else { 1283c590fb32Scz4e lsq.io.std.storeDataIn(i).valid := stData(i).valid && !st_data_atomics(i) 1284c590fb32Scz4e lsq.io.std.storeDataIn(i).bits.uop := stData(i).bits.uop 1285c590fb32Scz4e lsq.io.std.storeDataIn(i).bits.data := stData(i).bits.data 1286c590fb32Scz4e lsq.io.std.storeDataIn(i).bits.mask.map(_ := 0.U) 1287c590fb32Scz4e lsq.io.std.storeDataIn(i).bits.vdIdx.map(_ := 0.U) 1288c590fb32Scz4e lsq.io.std.storeDataIn(i).bits.vdIdxInField.map(_ := 0.U) 1289c590fb32Scz4e stData(i).ready := true.B 1290c590fb32Scz4e } 1291c590fb32Scz4e lsq.io.std.storeDataIn.map(_.bits.debug := 0.U.asTypeOf(new DebugBundle)) 1292c590fb32Scz4e lsq.io.std.storeDataIn.foreach(_.bits.isFromLoadUnit := DontCare) 1293c590fb32Scz4e 1294c590fb32Scz4e 1295c590fb32Scz4e // store prefetch train 1296c590fb32Scz4e l1PrefetcherOpt.foreach(pf => { 1297c590fb32Scz4e // stream will train on all load sources 1298c590fb32Scz4e pf.io.st_in(i).valid := false.B 1299c590fb32Scz4e pf.io.st_in(i).bits := DontCare 1300c590fb32Scz4e }) 1301c590fb32Scz4e 1302c590fb32Scz4e prefetcherOpt.foreach(pf => { 1303c590fb32Scz4e pf.io.st_in(i).valid := Mux(pf_train_on_hit, 1304c590fb32Scz4e stu.io.prefetch_train.valid, 1305c590fb32Scz4e stu.io.prefetch_train.valid && stu.io.prefetch_train.bits.isFirstIssue && ( 1306c590fb32Scz4e stu.io.prefetch_train.bits.miss 1307c590fb32Scz4e ) 1308c590fb32Scz4e ) 1309c590fb32Scz4e pf.io.st_in(i).bits := stu.io.prefetch_train.bits 1310c590fb32Scz4e pf.io.st_in(i).bits.uop.pc := RegEnable(RegEnable(io.ooo_to_mem.storePc(i), stu.io.s1_prefetch_spec), stu.io.s2_prefetch_spec) 1311c590fb32Scz4e }) 1312c590fb32Scz4e 1313c590fb32Scz4e // 1. sync issue info to store set LFST 1314c590fb32Scz4e // 2. when store issue, broadcast issued sqPtr to wake up the following insts 1315c590fb32Scz4e // io.stIn(i).valid := io.issue(exuParameters.LduCnt + i).valid 1316c590fb32Scz4e // io.stIn(i).bits := io.issue(exuParameters.LduCnt + i).bits 1317c590fb32Scz4e io.mem_to_ooo.stIn(i).valid := stu.io.issue.valid 1318c590fb32Scz4e io.mem_to_ooo.stIn(i).bits := stu.io.issue.bits 1319c590fb32Scz4e 1320c590fb32Scz4e stu.io.stout.ready := true.B 1321c590fb32Scz4e 1322c590fb32Scz4e // vector 1323c590fb32Scz4e if (i < VstuCnt) { 1324c590fb32Scz4e stu.io.vecstin <> vsSplit(i).io.out 1325c590fb32Scz4e // vsFlowQueue.io.pipeFeedback(i) <> stu.io.vec_feedback_slow // need connect 1326c590fb32Scz4e } else { 1327c590fb32Scz4e stu.io.vecstin.valid := false.B 1328c590fb32Scz4e stu.io.vecstin.bits := DontCare 1329c590fb32Scz4e stu.io.vecstout.ready := false.B 1330c590fb32Scz4e } 1331c590fb32Scz4e stu.io.vec_isFirstIssue := true.B // TODO 1332c590fb32Scz4e } 1333c590fb32Scz4e 13343c808de0SAnzo val sqOtherStout = WireInit(0.U.asTypeOf(DecoupledIO(new MemExuOutput))) 13353c808de0SAnzo sqOtherStout.valid := lsq.io.mmioStout.valid || lsq.io.cboZeroStout.valid 13363c808de0SAnzo sqOtherStout.bits := Mux(lsq.io.cboZeroStout.valid, lsq.io.cboZeroStout.bits, lsq.io.mmioStout.bits) 13373c808de0SAnzo assert(!(lsq.io.mmioStout.valid && lsq.io.cboZeroStout.valid), "Cannot writeback to mmio and cboZero at the same time.") 13383c808de0SAnzo 13393c808de0SAnzo // Store writeback by StoreQueue: 13403c808de0SAnzo // 1. cbo Zero 13413c808de0SAnzo // 2. mmio 13423c808de0SAnzo // Currently, the two should not be present at the same time, so simply make cbo zero a higher priority. 13433c808de0SAnzo val otherStout = WireInit(0.U.asTypeOf(lsq.io.mmioStout)) 1344c590fb32Scz4e NewPipelineConnect( 13453c808de0SAnzo sqOtherStout, otherStout, otherStout.fire, 1346c590fb32Scz4e false.B, 13473c808de0SAnzo Option("otherStoutConnect") 1348c590fb32Scz4e ) 13493c808de0SAnzo otherStout.ready := false.B 13503c808de0SAnzo when (otherStout.valid && !storeUnits(0).io.stout.valid) { 1351c590fb32Scz4e stOut(0).valid := true.B 13523c808de0SAnzo stOut(0).bits := otherStout.bits 13533c808de0SAnzo otherStout.ready := true.B 1354c590fb32Scz4e } 13553c808de0SAnzo lsq.io.mmioStout.ready := sqOtherStout.ready 13563c808de0SAnzo lsq.io.cboZeroStout.ready := sqOtherStout.ready 1357c590fb32Scz4e 1358c590fb32Scz4e // vec mmio writeback 1359c590fb32Scz4e lsq.io.vecmmioStout.ready := false.B 1360c590fb32Scz4e 1361c590fb32Scz4e // miss align buffer will overwrite stOut(0) 13623c808de0SAnzo val storeMisalignCanWriteBack = !otherStout.valid && !storeUnits(0).io.stout.valid && !storeUnits(0).io.vecstout.valid 1363c590fb32Scz4e storeMisalignBuffer.io.writeBack.ready := storeMisalignCanWriteBack 1364c590fb32Scz4e storeMisalignBuffer.io.storeOutValid := storeUnits(0).io.stout.valid 1365c590fb32Scz4e storeMisalignBuffer.io.storeVecOutValid := storeUnits(0).io.vecstout.valid 1366c590fb32Scz4e when (storeMisalignBuffer.io.writeBack.valid && storeMisalignCanWriteBack) { 1367c590fb32Scz4e stOut(0).valid := true.B 1368c590fb32Scz4e stOut(0).bits := storeMisalignBuffer.io.writeBack.bits 1369c590fb32Scz4e } 1370c590fb32Scz4e 1371c590fb32Scz4e // Uncache 1372c590fb32Scz4e uncache.io.enableOutstanding := io.ooo_to_mem.csrCtrl.uncache_write_outstanding_enable 1373c590fb32Scz4e uncache.io.hartId := io.hartId 1374c590fb32Scz4e lsq.io.uncacheOutstanding := io.ooo_to_mem.csrCtrl.uncache_write_outstanding_enable 1375c590fb32Scz4e 1376c590fb32Scz4e // Lsq 1377c590fb32Scz4e io.mem_to_ooo.lsqio.mmio := lsq.io.rob.mmio 1378c590fb32Scz4e io.mem_to_ooo.lsqio.uop := lsq.io.rob.uop 1379c590fb32Scz4e lsq.io.rob.lcommit := io.ooo_to_mem.lsqio.lcommit 1380c590fb32Scz4e lsq.io.rob.scommit := io.ooo_to_mem.lsqio.scommit 1381c590fb32Scz4e lsq.io.rob.pendingMMIOld := io.ooo_to_mem.lsqio.pendingMMIOld 1382c590fb32Scz4e lsq.io.rob.pendingld := io.ooo_to_mem.lsqio.pendingld 1383c590fb32Scz4e lsq.io.rob.pendingst := io.ooo_to_mem.lsqio.pendingst 1384c590fb32Scz4e lsq.io.rob.pendingVst := io.ooo_to_mem.lsqio.pendingVst 1385c590fb32Scz4e lsq.io.rob.commit := io.ooo_to_mem.lsqio.commit 1386c590fb32Scz4e lsq.io.rob.pendingPtr := io.ooo_to_mem.lsqio.pendingPtr 1387c590fb32Scz4e lsq.io.rob.pendingPtrNext := io.ooo_to_mem.lsqio.pendingPtrNext 1388c590fb32Scz4e 1389c590fb32Scz4e // lsq.io.rob <> io.lsqio.rob 1390c590fb32Scz4e lsq.io.enq <> io.ooo_to_mem.enqLsq 1391c590fb32Scz4e lsq.io.brqRedirect <> redirect 1392c590fb32Scz4e 1393c590fb32Scz4e // violation rollback 1394c590fb32Scz4e def selectOldestRedirect(xs: Seq[Valid[Redirect]]): Vec[Bool] = { 1395c590fb32Scz4e val compareVec = (0 until xs.length).map(i => (0 until i).map(j => isAfter(xs(j).bits.robIdx, xs(i).bits.robIdx))) 1396c590fb32Scz4e val resultOnehot = VecInit((0 until xs.length).map(i => Cat((0 until xs.length).map(j => 1397c590fb32Scz4e (if (j < i) !xs(j).valid || compareVec(i)(j) 1398c590fb32Scz4e else if (j == i) xs(i).valid 1399c590fb32Scz4e else !xs(j).valid || !compareVec(j)(i)) 1400c590fb32Scz4e )).andR)) 1401c590fb32Scz4e resultOnehot 1402c590fb32Scz4e } 1403c590fb32Scz4e val allRedirect = loadUnits.map(_.io.rollback) ++ hybridUnits.map(_.io.ldu_io.rollback) ++ lsq.io.nack_rollback ++ lsq.io.nuke_rollback 1404c590fb32Scz4e val oldestOneHot = selectOldestRedirect(allRedirect) 1405c590fb32Scz4e val oldestRedirect = WireDefault(Mux1H(oldestOneHot, allRedirect)) 1406c590fb32Scz4e // memory replay would not cause IAF/IPF/IGPF 1407c590fb32Scz4e oldestRedirect.bits.cfiUpdate.backendIAF := false.B 1408c590fb32Scz4e oldestRedirect.bits.cfiUpdate.backendIPF := false.B 1409c590fb32Scz4e oldestRedirect.bits.cfiUpdate.backendIGPF := false.B 1410c590fb32Scz4e io.mem_to_ooo.memoryViolation := oldestRedirect 1411c590fb32Scz4e io.mem_to_ooo.lsqio.lqCanAccept := lsq.io.lqCanAccept 1412c590fb32Scz4e io.mem_to_ooo.lsqio.sqCanAccept := lsq.io.sqCanAccept 1413c590fb32Scz4e 1414c590fb32Scz4e // lsq.io.uncache <> uncache.io.lsq 1415c590fb32Scz4e val s_idle :: s_scalar_uncache :: s_vector_uncache :: Nil = Enum(3) 1416c590fb32Scz4e val uncacheState = RegInit(s_idle) 1417c590fb32Scz4e val uncacheReq = Wire(Decoupled(new UncacheWordReq)) 1418c590fb32Scz4e val uncacheIdResp = uncache.io.lsq.idResp 1419c590fb32Scz4e val uncacheResp = Wire(Decoupled(new UncacheWordResp)) 1420c590fb32Scz4e 1421c590fb32Scz4e uncacheReq.bits := DontCare 1422c590fb32Scz4e uncacheReq.valid := false.B 1423c590fb32Scz4e uncacheReq.ready := false.B 1424c590fb32Scz4e uncacheResp.bits := DontCare 1425c590fb32Scz4e uncacheResp.valid := false.B 1426c590fb32Scz4e uncacheResp.ready := false.B 1427c590fb32Scz4e lsq.io.uncache.req.ready := false.B 1428c590fb32Scz4e lsq.io.uncache.idResp.valid := false.B 1429c590fb32Scz4e lsq.io.uncache.idResp.bits := DontCare 1430c590fb32Scz4e lsq.io.uncache.resp.valid := false.B 1431c590fb32Scz4e lsq.io.uncache.resp.bits := DontCare 1432c590fb32Scz4e 1433c590fb32Scz4e switch (uncacheState) { 1434c590fb32Scz4e is (s_idle) { 1435c590fb32Scz4e when (uncacheReq.fire) { 1436c590fb32Scz4e when (lsq.io.uncache.req.valid) { 1437c590fb32Scz4e when (!lsq.io.uncache.req.bits.nc || !io.ooo_to_mem.csrCtrl.uncache_write_outstanding_enable) { 1438c590fb32Scz4e uncacheState := s_scalar_uncache 1439c590fb32Scz4e } 1440c590fb32Scz4e }.otherwise { 1441c590fb32Scz4e // val isStore = vsFlowQueue.io.uncache.req.bits.cmd === MemoryOpConstants.M_XWR 1442c590fb32Scz4e when (!io.ooo_to_mem.csrCtrl.uncache_write_outstanding_enable) { 1443c590fb32Scz4e uncacheState := s_vector_uncache 1444c590fb32Scz4e } 1445c590fb32Scz4e } 1446c590fb32Scz4e } 1447c590fb32Scz4e } 1448c590fb32Scz4e 1449c590fb32Scz4e is (s_scalar_uncache) { 1450c590fb32Scz4e when (uncacheResp.fire) { 1451c590fb32Scz4e uncacheState := s_idle 1452c590fb32Scz4e } 1453c590fb32Scz4e } 1454c590fb32Scz4e 1455c590fb32Scz4e is (s_vector_uncache) { 1456c590fb32Scz4e when (uncacheResp.fire) { 1457c590fb32Scz4e uncacheState := s_idle 1458c590fb32Scz4e } 1459c590fb32Scz4e } 1460c590fb32Scz4e } 1461c590fb32Scz4e 1462c590fb32Scz4e when (lsq.io.uncache.req.valid) { 1463c590fb32Scz4e uncacheReq <> lsq.io.uncache.req 1464c590fb32Scz4e } 1465c590fb32Scz4e when (io.ooo_to_mem.csrCtrl.uncache_write_outstanding_enable) { 1466c590fb32Scz4e lsq.io.uncache.resp <> uncacheResp 1467c590fb32Scz4e lsq.io.uncache.idResp <> uncacheIdResp 1468c590fb32Scz4e }.otherwise { 1469c590fb32Scz4e when (uncacheState === s_scalar_uncache) { 1470c590fb32Scz4e lsq.io.uncache.resp <> uncacheResp 1471c590fb32Scz4e lsq.io.uncache.idResp <> uncacheIdResp 1472c590fb32Scz4e } 1473c590fb32Scz4e } 1474c590fb32Scz4e // delay dcache refill for 1 cycle for better timing 1475c590fb32Scz4e AddPipelineReg(uncacheReq, uncache.io.lsq.req, false.B) 1476c590fb32Scz4e AddPipelineReg(uncache.io.lsq.resp, uncacheResp, false.B) 1477c590fb32Scz4e 1478c590fb32Scz4e //lsq.io.refill := delayedDcacheRefill 1479c590fb32Scz4e lsq.io.release := dcache.io.lsu.release 1480c590fb32Scz4e lsq.io.lqCancelCnt <> io.mem_to_ooo.lqCancelCnt 1481c590fb32Scz4e lsq.io.sqCancelCnt <> io.mem_to_ooo.sqCancelCnt 1482c590fb32Scz4e lsq.io.lqDeq <> io.mem_to_ooo.lqDeq 1483c590fb32Scz4e lsq.io.sqDeq <> io.mem_to_ooo.sqDeq 1484c590fb32Scz4e // Todo: assign these 1485c590fb32Scz4e io.mem_to_ooo.sqDeqPtr := lsq.io.sqDeqPtr 1486c590fb32Scz4e io.mem_to_ooo.lqDeqPtr := lsq.io.lqDeqPtr 1487c590fb32Scz4e lsq.io.tl_d_channel <> dcache.io.lsu.tl_d_channel 1488c590fb32Scz4e 1489c590fb32Scz4e // LSQ to store buffer 1490c590fb32Scz4e lsq.io.sbuffer <> sbuffer.io.in 1491c590fb32Scz4e sbuffer.io.in(0).valid := lsq.io.sbuffer(0).valid || vSegmentUnit.io.sbuffer.valid 1492c590fb32Scz4e sbuffer.io.in(0).bits := Mux1H(Seq( 1493c590fb32Scz4e vSegmentUnit.io.sbuffer.valid -> vSegmentUnit.io.sbuffer.bits, 1494c590fb32Scz4e lsq.io.sbuffer(0).valid -> lsq.io.sbuffer(0).bits 1495c590fb32Scz4e )) 1496c590fb32Scz4e vSegmentUnit.io.sbuffer.ready := sbuffer.io.in(0).ready 1497c590fb32Scz4e lsq.io.sqEmpty <> sbuffer.io.sqempty 1498c590fb32Scz4e dcache.io.force_write := lsq.io.force_write 1499c590fb32Scz4e 1500c590fb32Scz4e // Initialize when unenabled difftest. 1501c590fb32Scz4e sbuffer.io.vecDifftestInfo := DontCare 1502c590fb32Scz4e lsq.io.sbufferVecDifftestInfo := DontCare 1503c590fb32Scz4e vSegmentUnit.io.vecDifftestInfo := DontCare 1504c590fb32Scz4e if (env.EnableDifftest) { 1505c590fb32Scz4e sbuffer.io.vecDifftestInfo .zipWithIndex.map{ case (sbufferPort, index) => 1506c590fb32Scz4e if (index == 0) { 1507c590fb32Scz4e val vSegmentDifftestValid = vSegmentUnit.io.vecDifftestInfo.valid 1508c590fb32Scz4e sbufferPort.valid := Mux(vSegmentDifftestValid, vSegmentUnit.io.vecDifftestInfo.valid, lsq.io.sbufferVecDifftestInfo(0).valid) 1509c590fb32Scz4e sbufferPort.bits := Mux(vSegmentDifftestValid, vSegmentUnit.io.vecDifftestInfo.bits, lsq.io.sbufferVecDifftestInfo(0).bits) 1510c590fb32Scz4e 1511c590fb32Scz4e vSegmentUnit.io.vecDifftestInfo.ready := sbufferPort.ready 1512c590fb32Scz4e lsq.io.sbufferVecDifftestInfo(0).ready := sbufferPort.ready 1513c590fb32Scz4e } else { 1514c590fb32Scz4e sbufferPort <> lsq.io.sbufferVecDifftestInfo(index) 1515c590fb32Scz4e } 1516c590fb32Scz4e } 1517c590fb32Scz4e } 1518c590fb32Scz4e 1519c590fb32Scz4e // lsq.io.vecStoreRetire <> vsFlowQueue.io.sqRelease 1520c590fb32Scz4e // lsq.io.vecWriteback.valid := vlWrapper.io.uopWriteback.fire && 1521c590fb32Scz4e // vlWrapper.io.uopWriteback.bits.uop.vpu.lastUop 1522c590fb32Scz4e // lsq.io.vecWriteback.bits := vlWrapper.io.uopWriteback.bits 1523c590fb32Scz4e 1524c590fb32Scz4e // vector 1525c590fb32Scz4e val vLoadCanAccept = (0 until VlduCnt).map(i => 1526c590fb32Scz4e vlSplit(i).io.in.ready && VlduType.isVecLd(io.ooo_to_mem.issueVldu(i).bits.uop.fuOpType) 1527c590fb32Scz4e ) 1528c590fb32Scz4e val vStoreCanAccept = (0 until VstuCnt).map(i => 1529c590fb32Scz4e vsSplit(i).io.in.ready && VstuType.isVecSt(io.ooo_to_mem.issueVldu(i).bits.uop.fuOpType) 1530c590fb32Scz4e ) 1531c590fb32Scz4e val isSegment = io.ooo_to_mem.issueVldu.head.valid && isVsegls(io.ooo_to_mem.issueVldu.head.bits.uop.fuType) 1532c590fb32Scz4e val isFixVlUop = io.ooo_to_mem.issueVldu.map{x => 1533c590fb32Scz4e x.bits.uop.vpu.isVleff && x.bits.uop.vpu.lastUop && x.valid 1534c590fb32Scz4e } 1535c590fb32Scz4e 1536c590fb32Scz4e // init port 1537c590fb32Scz4e /** 1538c590fb32Scz4e * TODO: splited vsMergebuffer maybe remove, if one RS can accept two feedback, or don't need RS replay uop 1539c590fb32Scz4e * for now: 1540c590fb32Scz4e * RS0 -> VsSplit0 -> stu0 -> vsMergebuffer0 -> feedback -> RS0 1541c590fb32Scz4e * RS1 -> VsSplit1 -> stu1 -> vsMergebuffer1 -> feedback -> RS1 1542c590fb32Scz4e * 1543c590fb32Scz4e * vector load don't need feedback 1544c590fb32Scz4e * 1545c590fb32Scz4e * RS0 -> VlSplit0 -> ldu0 -> | 1546c590fb32Scz4e * RS1 -> VlSplit1 -> ldu1 -> | -> vlMergebuffer 1547c590fb32Scz4e * replayIO -> ldu3 -> | 1548c590fb32Scz4e * */ 1549c590fb32Scz4e (0 until VstuCnt).foreach{i => 1550c590fb32Scz4e vsMergeBuffer(i).io.fromPipeline := DontCare 1551c590fb32Scz4e vsMergeBuffer(i).io.fromSplit := DontCare 1552c590fb32Scz4e 1553c590fb32Scz4e vsMergeBuffer(i).io.fromMisalignBuffer.get.flush := storeMisalignBuffer.io.toVecStoreMergeBuffer(i).flush 1554c590fb32Scz4e vsMergeBuffer(i).io.fromMisalignBuffer.get.mbIndex := storeMisalignBuffer.io.toVecStoreMergeBuffer(i).mbIndex 1555c590fb32Scz4e } 1556c590fb32Scz4e 1557c590fb32Scz4e (0 until VstuCnt).foreach{i => 1558c590fb32Scz4e vsSplit(i).io.redirect <> redirect 1559c590fb32Scz4e vsSplit(i).io.in <> io.ooo_to_mem.issueVldu(i) 1560c590fb32Scz4e vsSplit(i).io.in.valid := io.ooo_to_mem.issueVldu(i).valid && 1561c590fb32Scz4e vStoreCanAccept(i) && !isSegment 1562c590fb32Scz4e vsSplit(i).io.toMergeBuffer <> vsMergeBuffer(i).io.fromSplit.head 1563c590fb32Scz4e NewPipelineConnect( 1564c590fb32Scz4e vsSplit(i).io.out, storeUnits(i).io.vecstin, storeUnits(i).io.vecstin.fire, 1565c590fb32Scz4e Mux(vsSplit(i).io.out.fire, vsSplit(i).io.out.bits.uop.robIdx.needFlush(io.redirect), storeUnits(i).io.vecstin.bits.uop.robIdx.needFlush(io.redirect)), 1566c590fb32Scz4e Option("VsSplitConnectStu") 1567c590fb32Scz4e ) 1568c590fb32Scz4e vsSplit(i).io.vstd.get := DontCare // Todo: Discuss how to pass vector store data 1569c590fb32Scz4e 1570c590fb32Scz4e vsSplit(i).io.vstdMisalign.get.storeMisalignBufferEmpty := !storeMisalignBuffer.io.full 1571c590fb32Scz4e vsSplit(i).io.vstdMisalign.get.storePipeEmpty := !storeUnits(i).io.s0_s1_valid 1572c590fb32Scz4e 1573c590fb32Scz4e } 1574c590fb32Scz4e (0 until VlduCnt).foreach{i => 1575c590fb32Scz4e vlSplit(i).io.redirect <> redirect 1576c590fb32Scz4e vlSplit(i).io.in <> io.ooo_to_mem.issueVldu(i) 1577c590fb32Scz4e vlSplit(i).io.in.valid := io.ooo_to_mem.issueVldu(i).valid && 1578c590fb32Scz4e vLoadCanAccept(i) && !isSegment && !isFixVlUop(i) 1579c590fb32Scz4e vlSplit(i).io.toMergeBuffer <> vlMergeBuffer.io.fromSplit(i) 1580c590fb32Scz4e vlSplit(i).io.threshold.get.valid := vlMergeBuffer.io.toSplit.get.threshold 1581c590fb32Scz4e vlSplit(i).io.threshold.get.bits := lsq.io.lqDeqPtr 1582c590fb32Scz4e NewPipelineConnect( 1583c590fb32Scz4e vlSplit(i).io.out, loadUnits(i).io.vecldin, loadUnits(i).io.vecldin.fire, 1584c590fb32Scz4e Mux(vlSplit(i).io.out.fire, vlSplit(i).io.out.bits.uop.robIdx.needFlush(io.redirect), loadUnits(i).io.vecldin.bits.uop.robIdx.needFlush(io.redirect)), 1585c590fb32Scz4e Option("VlSplitConnectLdu") 1586c590fb32Scz4e ) 1587c590fb32Scz4e 1588c590fb32Scz4e //Subsequent instrction will be blocked 1589c590fb32Scz4e vfofBuffer.io.in(i).valid := io.ooo_to_mem.issueVldu(i).valid 1590c590fb32Scz4e vfofBuffer.io.in(i).bits := io.ooo_to_mem.issueVldu(i).bits 1591c590fb32Scz4e } 1592c590fb32Scz4e (0 until LduCnt).foreach{i=> 1593c590fb32Scz4e loadUnits(i).io.vecldout.ready := vlMergeBuffer.io.fromPipeline(i).ready 1594c590fb32Scz4e loadMisalignBuffer.io.vecWriteBack.ready := true.B 1595c590fb32Scz4e 1596c590fb32Scz4e if (i == MisalignWBPort) { 1597c590fb32Scz4e when(loadUnits(i).io.vecldout.valid) { 1598c590fb32Scz4e vlMergeBuffer.io.fromPipeline(i).valid := loadUnits(i).io.vecldout.valid 1599c590fb32Scz4e vlMergeBuffer.io.fromPipeline(i).bits := loadUnits(i).io.vecldout.bits 1600c590fb32Scz4e } .otherwise { 1601c590fb32Scz4e vlMergeBuffer.io.fromPipeline(i).valid := loadMisalignBuffer.io.vecWriteBack.valid 1602c590fb32Scz4e vlMergeBuffer.io.fromPipeline(i).bits := loadMisalignBuffer.io.vecWriteBack.bits 1603c590fb32Scz4e } 1604c590fb32Scz4e } else { 1605c590fb32Scz4e vlMergeBuffer.io.fromPipeline(i).valid := loadUnits(i).io.vecldout.valid 1606c590fb32Scz4e vlMergeBuffer.io.fromPipeline(i).bits := loadUnits(i).io.vecldout.bits 1607c590fb32Scz4e } 1608c590fb32Scz4e } 1609c590fb32Scz4e 1610c590fb32Scz4e (0 until StaCnt).foreach{i=> 1611c590fb32Scz4e if(i < VstuCnt){ 1612c590fb32Scz4e storeUnits(i).io.vecstout.ready := true.B 1613c590fb32Scz4e storeMisalignBuffer.io.vecWriteBack(i).ready := vsMergeBuffer(i).io.fromPipeline.head.ready 1614c590fb32Scz4e 1615c590fb32Scz4e when(storeUnits(i).io.vecstout.valid) { 1616c590fb32Scz4e vsMergeBuffer(i).io.fromPipeline.head.valid := storeUnits(i).io.vecstout.valid 1617c590fb32Scz4e vsMergeBuffer(i).io.fromPipeline.head.bits := storeUnits(i).io.vecstout.bits 1618c590fb32Scz4e } .otherwise { 1619c590fb32Scz4e vsMergeBuffer(i).io.fromPipeline.head.valid := storeMisalignBuffer.io.vecWriteBack(i).valid 1620c590fb32Scz4e vsMergeBuffer(i).io.fromPipeline.head.bits := storeMisalignBuffer.io.vecWriteBack(i).bits 1621c590fb32Scz4e } 1622c590fb32Scz4e } 1623c590fb32Scz4e } 1624c590fb32Scz4e 1625c590fb32Scz4e (0 until VlduCnt).foreach{i=> 1626c590fb32Scz4e io.ooo_to_mem.issueVldu(i).ready := vLoadCanAccept(i) || vStoreCanAccept(i) 1627c590fb32Scz4e } 1628c590fb32Scz4e 1629c590fb32Scz4e vlMergeBuffer.io.redirect <> redirect 1630c590fb32Scz4e vsMergeBuffer.map(_.io.redirect <> redirect) 1631c590fb32Scz4e (0 until VlduCnt).foreach{i=> 1632c590fb32Scz4e vlMergeBuffer.io.toLsq(i) <> lsq.io.ldvecFeedback(i) 1633c590fb32Scz4e } 1634c590fb32Scz4e (0 until VstuCnt).foreach{i=> 1635c590fb32Scz4e vsMergeBuffer(i).io.toLsq.head <> lsq.io.stvecFeedback(i) 1636c590fb32Scz4e } 1637c590fb32Scz4e 1638c590fb32Scz4e (0 until VlduCnt).foreach{i=> 1639c590fb32Scz4e // send to RS 1640c590fb32Scz4e vlMergeBuffer.io.feedback(i) <> io.mem_to_ooo.vlduIqFeedback(i).feedbackSlow 1641c590fb32Scz4e io.mem_to_ooo.vlduIqFeedback(i).feedbackFast := DontCare 1642c590fb32Scz4e } 1643c590fb32Scz4e (0 until VstuCnt).foreach{i => 1644c590fb32Scz4e // send to RS 1645c590fb32Scz4e if (i == 0){ 1646c590fb32Scz4e io.mem_to_ooo.vstuIqFeedback(i).feedbackSlow.valid := vsMergeBuffer(i).io.feedback.head.valid || vSegmentUnit.io.feedback.valid 1647c590fb32Scz4e io.mem_to_ooo.vstuIqFeedback(i).feedbackSlow.bits := Mux1H(Seq( 1648c590fb32Scz4e vSegmentUnit.io.feedback.valid -> vSegmentUnit.io.feedback.bits, 1649c590fb32Scz4e vsMergeBuffer(i).io.feedback.head.valid -> vsMergeBuffer(i).io.feedback.head.bits 1650c590fb32Scz4e )) 1651c590fb32Scz4e io.mem_to_ooo.vstuIqFeedback(i).feedbackFast := DontCare 1652c590fb32Scz4e } else { 1653c590fb32Scz4e vsMergeBuffer(i).io.feedback.head <> io.mem_to_ooo.vstuIqFeedback(i).feedbackSlow 1654c590fb32Scz4e io.mem_to_ooo.vstuIqFeedback(i).feedbackFast := DontCare 1655c590fb32Scz4e } 1656c590fb32Scz4e } 1657c590fb32Scz4e 1658c590fb32Scz4e (0 until VlduCnt).foreach{i=> 1659c590fb32Scz4e if (i == 0){ // for segmentUnit, segmentUnit use port0 writeback 1660c590fb32Scz4e io.mem_to_ooo.writebackVldu(i).valid := vlMergeBuffer.io.uopWriteback(i).valid || vsMergeBuffer(i).io.uopWriteback.head.valid || vSegmentUnit.io.uopwriteback.valid 1661c590fb32Scz4e io.mem_to_ooo.writebackVldu(i).bits := PriorityMux(Seq( 1662c590fb32Scz4e vSegmentUnit.io.uopwriteback.valid -> vSegmentUnit.io.uopwriteback.bits, 1663c590fb32Scz4e vlMergeBuffer.io.uopWriteback(i).valid -> vlMergeBuffer.io.uopWriteback(i).bits, 1664c590fb32Scz4e vsMergeBuffer(i).io.uopWriteback.head.valid -> vsMergeBuffer(i).io.uopWriteback.head.bits, 1665c590fb32Scz4e )) 1666c590fb32Scz4e vlMergeBuffer.io.uopWriteback(i).ready := io.mem_to_ooo.writebackVldu(i).ready && !vSegmentUnit.io.uopwriteback.valid 1667c590fb32Scz4e vsMergeBuffer(i).io.uopWriteback.head.ready := io.mem_to_ooo.writebackVldu(i).ready && !vlMergeBuffer.io.uopWriteback(i).valid && !vSegmentUnit.io.uopwriteback.valid 1668c590fb32Scz4e vSegmentUnit.io.uopwriteback.ready := io.mem_to_ooo.writebackVldu(i).ready 1669c590fb32Scz4e } else if (i == 1) { 1670c590fb32Scz4e io.mem_to_ooo.writebackVldu(i).valid := vlMergeBuffer.io.uopWriteback(i).valid || vsMergeBuffer(i).io.uopWriteback.head.valid || vfofBuffer.io.uopWriteback.valid 1671c590fb32Scz4e io.mem_to_ooo.writebackVldu(i).bits := PriorityMux(Seq( 1672c590fb32Scz4e vfofBuffer.io.uopWriteback.valid -> vfofBuffer.io.uopWriteback.bits, 1673c590fb32Scz4e vlMergeBuffer.io.uopWriteback(i).valid -> vlMergeBuffer.io.uopWriteback(i).bits, 1674c590fb32Scz4e vsMergeBuffer(i).io.uopWriteback.head.valid -> vsMergeBuffer(i).io.uopWriteback.head.bits, 1675c590fb32Scz4e )) 1676c590fb32Scz4e vlMergeBuffer.io.uopWriteback(i).ready := io.mem_to_ooo.writebackVldu(i).ready && !vfofBuffer.io.uopWriteback.valid 1677c590fb32Scz4e vsMergeBuffer(i).io.uopWriteback.head.ready := io.mem_to_ooo.writebackVldu(i).ready && !vlMergeBuffer.io.uopWriteback(i).valid && !vfofBuffer.io.uopWriteback.valid 1678c590fb32Scz4e vfofBuffer.io.uopWriteback.ready := io.mem_to_ooo.writebackVldu(i).ready 1679c590fb32Scz4e } else { 1680c590fb32Scz4e io.mem_to_ooo.writebackVldu(i).valid := vlMergeBuffer.io.uopWriteback(i).valid || vsMergeBuffer(i).io.uopWriteback.head.valid 1681c590fb32Scz4e io.mem_to_ooo.writebackVldu(i).bits := PriorityMux(Seq( 1682c590fb32Scz4e vlMergeBuffer.io.uopWriteback(i).valid -> vlMergeBuffer.io.uopWriteback(i).bits, 1683c590fb32Scz4e vsMergeBuffer(i).io.uopWriteback.head.valid -> vsMergeBuffer(i).io.uopWriteback.head.bits, 1684c590fb32Scz4e )) 1685c590fb32Scz4e vlMergeBuffer.io.uopWriteback(i).ready := io.mem_to_ooo.writebackVldu(i).ready 1686c590fb32Scz4e vsMergeBuffer(i).io.uopWriteback.head.ready := io.mem_to_ooo.writebackVldu(i).ready && !vlMergeBuffer.io.uopWriteback(i).valid 1687c590fb32Scz4e } 1688c590fb32Scz4e 1689c590fb32Scz4e vfofBuffer.io.mergeUopWriteback(i).valid := vlMergeBuffer.io.uopWriteback(i).valid 1690c590fb32Scz4e vfofBuffer.io.mergeUopWriteback(i).bits := vlMergeBuffer.io.uopWriteback(i).bits 1691c590fb32Scz4e } 1692c590fb32Scz4e 1693c590fb32Scz4e 1694c590fb32Scz4e vfofBuffer.io.redirect <> redirect 1695c590fb32Scz4e 1696c590fb32Scz4e // Sbuffer 1697c590fb32Scz4e sbuffer.io.csrCtrl <> csrCtrl 1698c590fb32Scz4e sbuffer.io.dcache <> dcache.io.lsu.store 1699c590fb32Scz4e sbuffer.io.memSetPattenDetected := dcache.io.memSetPattenDetected 1700c590fb32Scz4e sbuffer.io.force_write <> lsq.io.force_write 1701c590fb32Scz4e // flush sbuffer 1702c590fb32Scz4e val cmoFlush = lsq.io.flushSbuffer.valid 1703c590fb32Scz4e val fenceFlush = io.ooo_to_mem.flushSb 1704c590fb32Scz4e val atomicsFlush = atomicsUnit.io.flush_sbuffer.valid || vSegmentUnit.io.flush_sbuffer.valid 1705c590fb32Scz4e val stIsEmpty = sbuffer.io.flush.empty && uncache.io.flush.empty 1706c590fb32Scz4e io.mem_to_ooo.sbIsEmpty := RegNext(stIsEmpty) 1707c590fb32Scz4e 1708c590fb32Scz4e // if both of them tries to flush sbuffer at the same time 1709c590fb32Scz4e // something must have gone wrong 1710c590fb32Scz4e assert(!(fenceFlush && atomicsFlush && cmoFlush)) 1711c590fb32Scz4e sbuffer.io.flush.valid := RegNext(fenceFlush || atomicsFlush || cmoFlush) 1712c590fb32Scz4e uncache.io.flush.valid := sbuffer.io.flush.valid 1713c590fb32Scz4e 1714c590fb32Scz4e // AtomicsUnit: AtomicsUnit will override other control signials, 1715c590fb32Scz4e // as atomics insts (LR/SC/AMO) will block the pipeline 1716c590fb32Scz4e val s_normal +: s_atomics = Enum(StaCnt + HyuCnt + 1) 1717c590fb32Scz4e val state = RegInit(s_normal) 1718c590fb32Scz4e 1719c590fb32Scz4e val st_atomics = Seq.tabulate(StaCnt)(i => 1720c590fb32Scz4e io.ooo_to_mem.issueSta(i).valid && FuType.storeIsAMO((io.ooo_to_mem.issueSta(i).bits.uop.fuType)) 1721c590fb32Scz4e ) ++ Seq.tabulate(HyuCnt)(i => 1722c590fb32Scz4e io.ooo_to_mem.issueHya(i).valid && FuType.storeIsAMO((io.ooo_to_mem.issueHya(i).bits.uop.fuType)) 1723c590fb32Scz4e ) 1724c590fb32Scz4e 1725c590fb32Scz4e for (i <- 0 until StaCnt) when(st_atomics(i)) { 1726c590fb32Scz4e io.ooo_to_mem.issueSta(i).ready := atomicsUnit.io.in.ready 1727c590fb32Scz4e storeUnits(i).io.stin.valid := false.B 1728c590fb32Scz4e 1729c590fb32Scz4e state := s_atomics(i) 1730c590fb32Scz4e } 1731c590fb32Scz4e for (i <- 0 until HyuCnt) when(st_atomics(StaCnt + i)) { 1732c590fb32Scz4e io.ooo_to_mem.issueHya(i).ready := atomicsUnit.io.in.ready 1733c590fb32Scz4e hybridUnits(i).io.lsin.valid := false.B 1734c590fb32Scz4e 1735c590fb32Scz4e state := s_atomics(StaCnt + i) 1736c590fb32Scz4e assert(!st_atomics.zipWithIndex.filterNot(_._2 == StaCnt + i).unzip._1.reduce(_ || _)) 1737c590fb32Scz4e } 1738c590fb32Scz4e when (atomicsUnit.io.out.valid) { 1739c590fb32Scz4e state := s_normal 1740c590fb32Scz4e } 1741c590fb32Scz4e 1742c590fb32Scz4e atomicsUnit.io.in.valid := st_atomics.reduce(_ || _) 1743c590fb32Scz4e atomicsUnit.io.in.bits := Mux1H(Seq.tabulate(StaCnt)(i => 1744c590fb32Scz4e st_atomics(i) -> io.ooo_to_mem.issueSta(i).bits) ++ 1745c590fb32Scz4e Seq.tabulate(HyuCnt)(i => st_atomics(StaCnt+i) -> io.ooo_to_mem.issueHya(i).bits)) 1746c590fb32Scz4e atomicsUnit.io.storeDataIn.zipWithIndex.foreach { case (stdin, i) => 1747c590fb32Scz4e stdin.valid := st_data_atomics(i) 1748c590fb32Scz4e stdin.bits := stData(i).bits 1749c590fb32Scz4e } 1750c590fb32Scz4e atomicsUnit.io.redirect <> redirect 1751c590fb32Scz4e 1752c590fb32Scz4e // TODO: complete amo's pmp support 1753c590fb32Scz4e val amoTlb = dtlb_ld(0).requestor(0) 1754c590fb32Scz4e atomicsUnit.io.dtlb.resp.valid := false.B 1755c590fb32Scz4e atomicsUnit.io.dtlb.resp.bits := DontCare 1756c590fb32Scz4e atomicsUnit.io.dtlb.req.ready := amoTlb.req.ready 1757c590fb32Scz4e atomicsUnit.io.pmpResp := pmp_check(0).resp 1758c590fb32Scz4e 1759c590fb32Scz4e atomicsUnit.io.dcache <> dcache.io.lsu.atomics 1760c590fb32Scz4e atomicsUnit.io.flush_sbuffer.empty := stIsEmpty 1761c590fb32Scz4e 1762c590fb32Scz4e atomicsUnit.io.csrCtrl := csrCtrl 1763c590fb32Scz4e 1764c590fb32Scz4e // for atomicsUnit, it uses loadUnit(0)'s TLB port 1765c590fb32Scz4e 1766c590fb32Scz4e when (state =/= s_normal) { 1767c590fb32Scz4e // use store wb port instead of load 1768c590fb32Scz4e loadUnits(0).io.ldout.ready := false.B 1769c590fb32Scz4e // use load_0's TLB 1770c590fb32Scz4e atomicsUnit.io.dtlb <> amoTlb 1771c590fb32Scz4e 1772c590fb32Scz4e // hw prefetch should be disabled while executing atomic insts 1773c590fb32Scz4e loadUnits.map(i => i.io.prefetch_req.valid := false.B) 1774c590fb32Scz4e 1775c590fb32Scz4e // make sure there's no in-flight uops in load unit 1776c590fb32Scz4e assert(!loadUnits(0).io.ldout.valid) 1777c590fb32Scz4e } 1778c590fb32Scz4e 1779c590fb32Scz4e lsq.io.flushSbuffer.empty := sbuffer.io.sbempty 1780c590fb32Scz4e 1781c590fb32Scz4e for (i <- 0 until StaCnt) { 1782c590fb32Scz4e when (state === s_atomics(i)) { 1783c590fb32Scz4e io.mem_to_ooo.staIqFeedback(i).feedbackSlow := atomicsUnit.io.feedbackSlow 1784c590fb32Scz4e assert(!storeUnits(i).io.feedback_slow.valid) 1785c590fb32Scz4e } 1786c590fb32Scz4e } 1787c590fb32Scz4e for (i <- 0 until HyuCnt) { 1788c590fb32Scz4e when (state === s_atomics(StaCnt + i)) { 1789c590fb32Scz4e io.mem_to_ooo.hyuIqFeedback(i).feedbackSlow := atomicsUnit.io.feedbackSlow 1790c590fb32Scz4e assert(!hybridUnits(i).io.feedback_slow.valid) 1791c590fb32Scz4e } 1792c590fb32Scz4e } 1793c590fb32Scz4e 1794c590fb32Scz4e lsq.io.exceptionAddr.isStore := io.ooo_to_mem.isStoreException 1795c590fb32Scz4e // Exception address is used several cycles after flush. 1796c590fb32Scz4e // We delay it by 10 cycles to ensure its flush safety. 1797c590fb32Scz4e val atomicsException = RegInit(false.B) 1798c590fb32Scz4e when (DelayN(redirect.valid, 10) && atomicsException) { 1799c590fb32Scz4e atomicsException := false.B 1800c590fb32Scz4e }.elsewhen (atomicsUnit.io.exceptionInfo.valid) { 1801c590fb32Scz4e atomicsException := true.B 1802c590fb32Scz4e } 1803c590fb32Scz4e 1804c590fb32Scz4e val misalignBufExceptionOverwrite = loadMisalignBuffer.io.overwriteExpBuf.valid || storeMisalignBuffer.io.overwriteExpBuf.valid 1805c590fb32Scz4e val misalignBufExceptionVaddr = Mux(loadMisalignBuffer.io.overwriteExpBuf.valid, 1806c590fb32Scz4e loadMisalignBuffer.io.overwriteExpBuf.vaddr, 1807c590fb32Scz4e storeMisalignBuffer.io.overwriteExpBuf.vaddr 1808c590fb32Scz4e ) 1809c590fb32Scz4e val misalignBufExceptionIsHyper = Mux(loadMisalignBuffer.io.overwriteExpBuf.valid, 1810c590fb32Scz4e loadMisalignBuffer.io.overwriteExpBuf.isHyper, 1811c590fb32Scz4e storeMisalignBuffer.io.overwriteExpBuf.isHyper 1812c590fb32Scz4e ) 1813c590fb32Scz4e val misalignBufExceptionGpaddr = Mux(loadMisalignBuffer.io.overwriteExpBuf.valid, 1814c590fb32Scz4e loadMisalignBuffer.io.overwriteExpBuf.gpaddr, 1815c590fb32Scz4e storeMisalignBuffer.io.overwriteExpBuf.gpaddr 1816c590fb32Scz4e ) 1817c590fb32Scz4e val misalignBufExceptionIsForVSnonLeafPTE = Mux(loadMisalignBuffer.io.overwriteExpBuf.valid, 1818c590fb32Scz4e loadMisalignBuffer.io.overwriteExpBuf.isForVSnonLeafPTE, 1819c590fb32Scz4e storeMisalignBuffer.io.overwriteExpBuf.isForVSnonLeafPTE 1820c590fb32Scz4e ) 1821c590fb32Scz4e 1822c590fb32Scz4e val vSegmentException = RegInit(false.B) 1823c590fb32Scz4e when (DelayN(redirect.valid, 10) && vSegmentException) { 1824c590fb32Scz4e vSegmentException := false.B 1825c590fb32Scz4e }.elsewhen (vSegmentUnit.io.exceptionInfo.valid) { 1826c590fb32Scz4e vSegmentException := true.B 1827c590fb32Scz4e } 1828c590fb32Scz4e val atomicsExceptionAddress = RegEnable(atomicsUnit.io.exceptionInfo.bits.vaddr, atomicsUnit.io.exceptionInfo.valid) 1829c590fb32Scz4e val vSegmentExceptionVstart = RegEnable(vSegmentUnit.io.exceptionInfo.bits.vstart, vSegmentUnit.io.exceptionInfo.valid) 1830c590fb32Scz4e val vSegmentExceptionVl = RegEnable(vSegmentUnit.io.exceptionInfo.bits.vl, vSegmentUnit.io.exceptionInfo.valid) 1831c590fb32Scz4e val vSegmentExceptionAddress = RegEnable(vSegmentUnit.io.exceptionInfo.bits.vaddr, vSegmentUnit.io.exceptionInfo.valid) 1832c590fb32Scz4e val atomicsExceptionGPAddress = RegEnable(atomicsUnit.io.exceptionInfo.bits.gpaddr, atomicsUnit.io.exceptionInfo.valid) 1833c590fb32Scz4e val vSegmentExceptionGPAddress = RegEnable(vSegmentUnit.io.exceptionInfo.bits.gpaddr, vSegmentUnit.io.exceptionInfo.valid) 1834c590fb32Scz4e val atomicsExceptionIsForVSnonLeafPTE = RegEnable(atomicsUnit.io.exceptionInfo.bits.isForVSnonLeafPTE, atomicsUnit.io.exceptionInfo.valid) 1835c590fb32Scz4e val vSegmentExceptionIsForVSnonLeafPTE = RegEnable(vSegmentUnit.io.exceptionInfo.bits.isForVSnonLeafPTE, vSegmentUnit.io.exceptionInfo.valid) 1836c590fb32Scz4e 1837c590fb32Scz4e val exceptionVaddr = Mux( 1838c590fb32Scz4e atomicsException, 1839c590fb32Scz4e atomicsExceptionAddress, 1840c590fb32Scz4e Mux(misalignBufExceptionOverwrite, 1841c590fb32Scz4e misalignBufExceptionVaddr, 1842c590fb32Scz4e Mux(vSegmentException, 1843c590fb32Scz4e vSegmentExceptionAddress, 1844c590fb32Scz4e lsq.io.exceptionAddr.vaddr 1845c590fb32Scz4e ) 1846c590fb32Scz4e ) 1847c590fb32Scz4e ) 1848c590fb32Scz4e // whether vaddr need ext or is hyper inst: 1849c590fb32Scz4e // VaNeedExt: atomicsException -> false; misalignBufExceptionOverwrite -> true; vSegmentException -> false 1850c590fb32Scz4e // IsHyper: atomicsException -> false; vSegmentException -> false 1851c590fb32Scz4e val exceptionVaNeedExt = !atomicsException && 1852c590fb32Scz4e (misalignBufExceptionOverwrite || 1853c590fb32Scz4e (!vSegmentException && lsq.io.exceptionAddr.vaNeedExt)) 1854c590fb32Scz4e val exceptionIsHyper = !atomicsException && 1855c590fb32Scz4e (misalignBufExceptionOverwrite && misalignBufExceptionIsHyper || 1856c590fb32Scz4e (!vSegmentException && lsq.io.exceptionAddr.isHyper && !misalignBufExceptionOverwrite)) 1857c590fb32Scz4e 1858168f1995SXu, Zefan def GenExceptionVa( 1859168f1995SXu, Zefan mode: UInt, isVirt: Bool, vaNeedExt: Bool, 1860c590fb32Scz4e satp: TlbSatpBundle, vsatp: TlbSatpBundle, hgatp: TlbHgatpBundle, 1861168f1995SXu, Zefan vaddr: UInt 1862168f1995SXu, Zefan ) = { 1863c590fb32Scz4e require(VAddrBits >= 50) 1864c590fb32Scz4e 1865168f1995SXu, Zefan val satpNone = satp.mode === 0.U 1866168f1995SXu, Zefan val satpSv39 = satp.mode === 8.U 1867168f1995SXu, Zefan val satpSv48 = satp.mode === 9.U 1868c590fb32Scz4e 1869168f1995SXu, Zefan val vsatpNone = vsatp.mode === 0.U 1870168f1995SXu, Zefan val vsatpSv39 = vsatp.mode === 8.U 1871168f1995SXu, Zefan val vsatpSv48 = vsatp.mode === 9.U 1872168f1995SXu, Zefan 1873168f1995SXu, Zefan val hgatpNone = hgatp.mode === 0.U 1874168f1995SXu, Zefan val hgatpSv39x4 = hgatp.mode === 8.U 1875168f1995SXu, Zefan val hgatpSv48x4 = hgatp.mode === 9.U 1876168f1995SXu, Zefan 1877168f1995SXu, Zefan // For !isVirt, mode check is necessary, as we don't want virtual memory in M-mode. 1878168f1995SXu, Zefan // For isVirt, mode check is unnecessary, as virt won't be 1 in M-mode. 1879168f1995SXu, Zefan // Also, isVirt includes Hyper Insts, which don't care mode either. 1880168f1995SXu, Zefan 1881168f1995SXu, Zefan val useBareAddr = 1882168f1995SXu, Zefan (isVirt && vsatpNone && hgatpNone) || 1883168f1995SXu, Zefan (!isVirt && (mode === CSRConst.ModeM)) || 1884168f1995SXu, Zefan (!isVirt && (mode =/= CSRConst.ModeM) && satpNone) 1885168f1995SXu, Zefan val useSv39Addr = 1886168f1995SXu, Zefan (isVirt && vsatpSv39) || 1887168f1995SXu, Zefan (!isVirt && (mode =/= CSRConst.ModeM) && satpSv39) 1888168f1995SXu, Zefan val useSv48Addr = 1889168f1995SXu, Zefan (isVirt && vsatpSv48) || 1890168f1995SXu, Zefan (!isVirt && (mode =/= CSRConst.ModeM) && satpSv48) 1891168f1995SXu, Zefan val useSv39x4Addr = isVirt && vsatpNone && hgatpSv39x4 1892168f1995SXu, Zefan val useSv48x4Addr = isVirt && vsatpNone && hgatpSv48x4 1893c590fb32Scz4e 1894c590fb32Scz4e val bareAddr = ZeroExt(vaddr(PAddrBits - 1, 0), XLEN) 1895c590fb32Scz4e val sv39Addr = SignExt(vaddr.take(39), XLEN) 1896c590fb32Scz4e val sv39x4Addr = ZeroExt(vaddr.take(39 + 2), XLEN) 1897c590fb32Scz4e val sv48Addr = SignExt(vaddr.take(48), XLEN) 1898c590fb32Scz4e val sv48x4Addr = ZeroExt(vaddr.take(48 + 2), XLEN) 1899c590fb32Scz4e 1900c590fb32Scz4e val ExceptionVa = Wire(UInt(XLEN.W)) 1901c590fb32Scz4e when (vaNeedExt) { 1902c590fb32Scz4e ExceptionVa := Mux1H(Seq( 1903168f1995SXu, Zefan (useBareAddr) -> bareAddr, 1904168f1995SXu, Zefan (useSv39Addr) -> sv39Addr, 1905168f1995SXu, Zefan (useSv48Addr) -> sv48Addr, 1906168f1995SXu, Zefan (useSv39x4Addr) -> sv39x4Addr, 1907168f1995SXu, Zefan (useSv48x4Addr) -> sv48x4Addr, 1908c590fb32Scz4e )) 1909c590fb32Scz4e } .otherwise { 1910c590fb32Scz4e ExceptionVa := vaddr 1911c590fb32Scz4e } 1912c590fb32Scz4e 1913c590fb32Scz4e ExceptionVa 1914c590fb32Scz4e } 1915c590fb32Scz4e 1916c590fb32Scz4e io.mem_to_ooo.lsqio.vaddr := RegNext( 1917c590fb32Scz4e GenExceptionVa(tlbcsr.priv.dmode, tlbcsr.priv.virt || exceptionIsHyper, exceptionVaNeedExt, 1918c590fb32Scz4e tlbcsr.satp, tlbcsr.vsatp, tlbcsr.hgatp, exceptionVaddr) 1919c590fb32Scz4e ) 1920c590fb32Scz4e 1921c590fb32Scz4e // vsegment instruction is executed atomic, which mean atomicsException and vSegmentException should not raise at the same time. 1922c590fb32Scz4e XSError(atomicsException && vSegmentException, "atomicsException and vSegmentException raise at the same time!") 1923c590fb32Scz4e io.mem_to_ooo.lsqio.vstart := RegNext(Mux(vSegmentException, 1924c590fb32Scz4e vSegmentExceptionVstart, 1925c590fb32Scz4e lsq.io.exceptionAddr.vstart) 1926c590fb32Scz4e ) 1927c590fb32Scz4e io.mem_to_ooo.lsqio.vl := RegNext(Mux(vSegmentException, 1928c590fb32Scz4e vSegmentExceptionVl, 1929c590fb32Scz4e lsq.io.exceptionAddr.vl) 1930c590fb32Scz4e ) 1931c590fb32Scz4e 1932c590fb32Scz4e XSError(atomicsException && atomicsUnit.io.in.valid, "new instruction before exception triggers\n") 1933c590fb32Scz4e io.mem_to_ooo.lsqio.gpaddr := RegNext(Mux( 1934c590fb32Scz4e atomicsException, 1935c590fb32Scz4e atomicsExceptionGPAddress, 1936c590fb32Scz4e Mux(misalignBufExceptionOverwrite, 1937c590fb32Scz4e misalignBufExceptionGpaddr, 1938c590fb32Scz4e Mux(vSegmentException, 1939c590fb32Scz4e vSegmentExceptionGPAddress, 1940c590fb32Scz4e lsq.io.exceptionAddr.gpaddr 1941c590fb32Scz4e ) 1942c590fb32Scz4e ) 1943c590fb32Scz4e )) 1944c590fb32Scz4e io.mem_to_ooo.lsqio.isForVSnonLeafPTE := RegNext(Mux( 1945c590fb32Scz4e atomicsException, 1946c590fb32Scz4e atomicsExceptionIsForVSnonLeafPTE, 1947c590fb32Scz4e Mux(misalignBufExceptionOverwrite, 1948c590fb32Scz4e misalignBufExceptionIsForVSnonLeafPTE, 1949c590fb32Scz4e Mux(vSegmentException, 1950c590fb32Scz4e vSegmentExceptionIsForVSnonLeafPTE, 1951c590fb32Scz4e lsq.io.exceptionAddr.isForVSnonLeafPTE 1952c590fb32Scz4e ) 1953c590fb32Scz4e ) 1954c590fb32Scz4e )) 1955c590fb32Scz4e io.mem_to_ooo.topToBackendBypass match { case x => 1956c590fb32Scz4e x.hartId := io.hartId 1957c590fb32Scz4e x.l2FlushDone := RegNext(io.l2_flush_done) 1958c590fb32Scz4e x.externalInterrupt.msip := outer.clint_int_sink.in.head._1(0) 1959c590fb32Scz4e x.externalInterrupt.mtip := outer.clint_int_sink.in.head._1(1) 1960c590fb32Scz4e x.externalInterrupt.meip := outer.plic_int_sink.in.head._1(0) 1961c590fb32Scz4e x.externalInterrupt.seip := outer.plic_int_sink.in.last._1(0) 1962c590fb32Scz4e x.externalInterrupt.debug := outer.debug_int_sink.in.head._1(0) 196376cb49abScz4e x.externalInterrupt.nmi.nmi_31 := outer.nmi_int_sink.in.head._1(0) | outer.beu_local_int_sink.in.head._1(0) 1964c590fb32Scz4e x.externalInterrupt.nmi.nmi_43 := outer.nmi_int_sink.in.head._1(1) 1965c590fb32Scz4e x.msiInfo := DelayNWithValid(io.fromTopToBackend.msiInfo, 1) 1966c590fb32Scz4e x.clintTime := DelayNWithValid(io.fromTopToBackend.clintTime, 1) 1967c590fb32Scz4e } 1968c590fb32Scz4e 1969c590fb32Scz4e io.memInfo.sqFull := RegNext(lsq.io.sqFull) 1970c590fb32Scz4e io.memInfo.lqFull := RegNext(lsq.io.lqFull) 1971c590fb32Scz4e io.memInfo.dcacheMSHRFull := RegNext(dcache.io.mshrFull) 1972c590fb32Scz4e 1973c590fb32Scz4e io.inner_hartId := io.hartId 1974c590fb32Scz4e io.inner_reset_vector := RegNext(io.outer_reset_vector) 1975c590fb32Scz4e io.outer_cpu_halt := io.ooo_to_mem.backendToTopBypass.cpuHalted 1976c590fb32Scz4e io.outer_l2_flush_en := io.ooo_to_mem.csrCtrl.flush_l2_enable 1977c590fb32Scz4e io.outer_power_down_en := io.ooo_to_mem.csrCtrl.power_down_enable 1978c590fb32Scz4e io.outer_cpu_critical_error := io.ooo_to_mem.backendToTopBypass.cpuCriticalError 1979c590fb32Scz4e io.outer_beu_errors_icache := RegNext(io.inner_beu_errors_icache) 1980c590fb32Scz4e io.inner_hc_perfEvents <> RegNext(io.outer_hc_perfEvents) 1981c590fb32Scz4e 1982c590fb32Scz4e // vector segmentUnit 1983c590fb32Scz4e vSegmentUnit.io.in.bits <> io.ooo_to_mem.issueVldu.head.bits 1984c590fb32Scz4e vSegmentUnit.io.in.valid := isSegment && io.ooo_to_mem.issueVldu.head.valid// is segment instruction 1985c590fb32Scz4e vSegmentUnit.io.dtlb.resp.bits <> dtlb_reqs.take(LduCnt).head.resp.bits 1986c590fb32Scz4e vSegmentUnit.io.dtlb.resp.valid <> dtlb_reqs.take(LduCnt).head.resp.valid 1987c590fb32Scz4e vSegmentUnit.io.pmpResp <> pmp_check.head.resp 1988c590fb32Scz4e vSegmentUnit.io.flush_sbuffer.empty := stIsEmpty 1989c590fb32Scz4e vSegmentUnit.io.redirect <> redirect 1990c590fb32Scz4e vSegmentUnit.io.rdcache.resp.bits := dcache.io.lsu.load(0).resp.bits 1991c590fb32Scz4e vSegmentUnit.io.rdcache.resp.valid := dcache.io.lsu.load(0).resp.valid 1992c590fb32Scz4e vSegmentUnit.io.rdcache.s2_bank_conflict := dcache.io.lsu.load(0).s2_bank_conflict 1993c590fb32Scz4e // ------------------------- 1994c590fb32Scz4e // Vector Segment Triggers 1995c590fb32Scz4e // ------------------------- 1996c590fb32Scz4e vSegmentUnit.io.fromCsrTrigger.tdataVec := tdata 1997c590fb32Scz4e vSegmentUnit.io.fromCsrTrigger.tEnableVec := tEnable 1998c590fb32Scz4e vSegmentUnit.io.fromCsrTrigger.triggerCanRaiseBpExp := triggerCanRaiseBpExp 1999c590fb32Scz4e vSegmentUnit.io.fromCsrTrigger.debugMode := debugMode 2000c590fb32Scz4e 2001c590fb32Scz4e // reset tree of MemBlock 2002c590fb32Scz4e if (p(DebugOptionsKey).ResetGen) { 2003c590fb32Scz4e val leftResetTree = ResetGenNode( 2004c590fb32Scz4e Seq( 2005c590fb32Scz4e ModuleNode(ptw), 2006c590fb32Scz4e ModuleNode(ptw_to_l2_buffer), 2007c590fb32Scz4e ModuleNode(lsq), 2008c590fb32Scz4e ModuleNode(dtlb_st_tlb_st), 2009c590fb32Scz4e ModuleNode(dtlb_prefetch_tlb_prefetch), 2010c590fb32Scz4e ModuleNode(pmp) 2011c590fb32Scz4e ) 2012c590fb32Scz4e ++ pmp_checkers.map(ModuleNode(_)) 2013c590fb32Scz4e ++ (if (prefetcherOpt.isDefined) Seq(ModuleNode(prefetcherOpt.get)) else Nil) 2014c590fb32Scz4e ++ (if (l1PrefetcherOpt.isDefined) Seq(ModuleNode(l1PrefetcherOpt.get)) else Nil) 2015c590fb32Scz4e ) 2016c590fb32Scz4e val rightResetTree = ResetGenNode( 2017c590fb32Scz4e Seq( 2018c590fb32Scz4e ModuleNode(sbuffer), 2019c590fb32Scz4e ModuleNode(dtlb_ld_tlb_ld), 2020c590fb32Scz4e ModuleNode(dcache), 2021c590fb32Scz4e ModuleNode(l1d_to_l2_buffer), 2022c590fb32Scz4e CellNode(io.reset_backend) 2023c590fb32Scz4e ) 2024c590fb32Scz4e ) 2025*602aa9f1Scz4e ResetGen(leftResetTree, reset, sim = false, io.sramTestBypass.fromL2Top.mbistReset) 2026*602aa9f1Scz4e ResetGen(rightResetTree, reset, sim = false, io.sramTestBypass.fromL2Top.mbistReset) 2027c590fb32Scz4e } else { 2028c590fb32Scz4e io.reset_backend := DontCare 2029c590fb32Scz4e } 2030c590fb32Scz4e io.resetInFrontendBypass.toL2Top := io.resetInFrontendBypass.fromFrontend 2031c590fb32Scz4e // trace interface 2032c590fb32Scz4e val traceToL2Top = io.traceCoreInterfaceBypass.toL2Top 2033c590fb32Scz4e val traceFromBackend = io.traceCoreInterfaceBypass.fromBackend 2034c590fb32Scz4e traceFromBackend.fromEncoder := RegNext(traceToL2Top.fromEncoder) 2035c590fb32Scz4e traceToL2Top.toEncoder.trap := RegEnable( 2036c590fb32Scz4e traceFromBackend.toEncoder.trap, 2037c590fb32Scz4e traceFromBackend.toEncoder.groups(0).valid && Itype.isTrap(traceFromBackend.toEncoder.groups(0).bits.itype) 2038c590fb32Scz4e ) 2039c590fb32Scz4e traceToL2Top.toEncoder.priv := RegEnable( 2040c590fb32Scz4e traceFromBackend.toEncoder.priv, 2041c590fb32Scz4e traceFromBackend.toEncoder.groups(0).valid 2042c590fb32Scz4e ) 2043c590fb32Scz4e (0 until TraceGroupNum).foreach { i => 2044c590fb32Scz4e traceToL2Top.toEncoder.groups(i).valid := RegNext(traceFromBackend.toEncoder.groups(i).valid) 2045c590fb32Scz4e traceToL2Top.toEncoder.groups(i).bits.iretire := RegNext(traceFromBackend.toEncoder.groups(i).bits.iretire) 2046c590fb32Scz4e traceToL2Top.toEncoder.groups(i).bits.itype := RegNext(traceFromBackend.toEncoder.groups(i).bits.itype) 2047c590fb32Scz4e traceToL2Top.toEncoder.groups(i).bits.ilastsize := RegEnable( 2048c590fb32Scz4e traceFromBackend.toEncoder.groups(i).bits.ilastsize, 2049c590fb32Scz4e traceFromBackend.toEncoder.groups(i).valid 2050c590fb32Scz4e ) 2051c590fb32Scz4e traceToL2Top.toEncoder.groups(i).bits.iaddr := RegEnable( 2052c590fb32Scz4e traceFromBackend.toEncoder.groups(i).bits.iaddr, 2053c590fb32Scz4e traceFromBackend.toEncoder.groups(i).valid 2054c590fb32Scz4e ) + (RegEnable( 2055c590fb32Scz4e traceFromBackend.toEncoder.groups(i).bits.ftqOffset.getOrElse(0.U), 2056c590fb32Scz4e traceFromBackend.toEncoder.groups(i).valid 2057c590fb32Scz4e ) << instOffsetBits) 2058c590fb32Scz4e } 2059c590fb32Scz4e 2060c590fb32Scz4e 2061c590fb32Scz4e io.mem_to_ooo.storeDebugInfo := DontCare 2062c590fb32Scz4e // store event difftest information 2063c590fb32Scz4e if (env.EnableDifftest) { 2064c590fb32Scz4e (0 until EnsbufferWidth).foreach{i => 2065c590fb32Scz4e io.mem_to_ooo.storeDebugInfo(i).robidx := sbuffer.io.vecDifftestInfo(i).bits.robIdx 2066c590fb32Scz4e sbuffer.io.vecDifftestInfo(i).bits.pc := io.mem_to_ooo.storeDebugInfo(i).pc 2067c590fb32Scz4e } 2068c590fb32Scz4e } 2069c590fb32Scz4e 2070c590fb32Scz4e // top-down info 2071c590fb32Scz4e dcache.io.debugTopDown.robHeadVaddr := io.debugTopDown.robHeadVaddr 2072c590fb32Scz4e dtlbRepeater.io.debugTopDown.robHeadVaddr := io.debugTopDown.robHeadVaddr 2073c590fb32Scz4e lsq.io.debugTopDown.robHeadVaddr := io.debugTopDown.robHeadVaddr 2074c590fb32Scz4e io.debugTopDown.toCore.robHeadMissInDCache := dcache.io.debugTopDown.robHeadMissInDCache 2075c590fb32Scz4e io.debugTopDown.toCore.robHeadTlbReplay := lsq.io.debugTopDown.robHeadTlbReplay 2076c590fb32Scz4e io.debugTopDown.toCore.robHeadTlbMiss := lsq.io.debugTopDown.robHeadTlbMiss 2077c590fb32Scz4e io.debugTopDown.toCore.robHeadLoadVio := lsq.io.debugTopDown.robHeadLoadVio 2078c590fb32Scz4e io.debugTopDown.toCore.robHeadLoadMSHR := lsq.io.debugTopDown.robHeadLoadMSHR 2079c590fb32Scz4e dcache.io.debugTopDown.robHeadOtherReplay := lsq.io.debugTopDown.robHeadOtherReplay 2080c590fb32Scz4e dcache.io.debugRolling := io.debugRolling 2081c590fb32Scz4e 2082c590fb32Scz4e lsq.io.noUopsIssued := io.topDownInfo.toBackend.noUopsIssued 2083c590fb32Scz4e io.topDownInfo.toBackend.lqEmpty := lsq.io.lqEmpty 2084c590fb32Scz4e io.topDownInfo.toBackend.sqEmpty := lsq.io.sqEmpty 2085c590fb32Scz4e io.topDownInfo.toBackend.l1Miss := dcache.io.l1Miss 2086c590fb32Scz4e io.topDownInfo.toBackend.l2TopMiss.l2Miss := RegNext(io.topDownInfo.fromL2Top.l2Miss) 2087c590fb32Scz4e io.topDownInfo.toBackend.l2TopMiss.l3Miss := RegNext(io.topDownInfo.fromL2Top.l3Miss) 2088c590fb32Scz4e 2089c590fb32Scz4e val hyLdDeqCount = PopCount(io.ooo_to_mem.issueHya.map(x => x.valid && FuType.isLoad(x.bits.uop.fuType))) 2090c590fb32Scz4e val hyStDeqCount = PopCount(io.ooo_to_mem.issueHya.map(x => x.valid && FuType.isStore(x.bits.uop.fuType))) 2091c590fb32Scz4e val ldDeqCount = PopCount(io.ooo_to_mem.issueLda.map(_.valid)) +& hyLdDeqCount 2092c590fb32Scz4e val stDeqCount = PopCount(io.ooo_to_mem.issueSta.take(StaCnt).map(_.valid)) +& hyStDeqCount 2093c590fb32Scz4e val iqDeqCount = ldDeqCount +& stDeqCount 2094c590fb32Scz4e XSPerfAccumulate("load_iq_deq_count", ldDeqCount) 2095c590fb32Scz4e XSPerfHistogram("load_iq_deq_count", ldDeqCount, true.B, 0, LdExuCnt + 1) 2096c590fb32Scz4e XSPerfAccumulate("store_iq_deq_count", stDeqCount) 2097c590fb32Scz4e XSPerfHistogram("store_iq_deq_count", stDeqCount, true.B, 0, StAddrCnt + 1) 2098c590fb32Scz4e XSPerfAccumulate("ls_iq_deq_count", iqDeqCount) 2099c590fb32Scz4e 2100c590fb32Scz4e val pfevent = Module(new PFEvent) 2101c590fb32Scz4e pfevent.io.distribute_csr := csrCtrl.distribute_csr 2102c590fb32Scz4e val csrevents = pfevent.io.hpmevent.slice(16,24) 2103c590fb32Scz4e 2104c590fb32Scz4e val perfFromUnits = (loadUnits ++ Seq(sbuffer, lsq, dcache)).flatMap(_.getPerfEvents) 2105c590fb32Scz4e val perfFromPTW = perfEventsPTW.map(x => ("PTW_" + x._1, x._2)) 2106c590fb32Scz4e val perfBlock = Seq(("ldDeqCount", ldDeqCount), 2107c590fb32Scz4e ("stDeqCount", stDeqCount)) 2108c590fb32Scz4e // let index = 0 be no event 2109c590fb32Scz4e val allPerfEvents = Seq(("noEvent", 0.U)) ++ perfFromUnits ++ perfFromPTW ++ perfBlock 2110c590fb32Scz4e 2111c590fb32Scz4e if (printEventCoding) { 2112c590fb32Scz4e for (((name, inc), i) <- allPerfEvents.zipWithIndex) { 2113c590fb32Scz4e println("MemBlock perfEvents Set", name, inc, i) 2114c590fb32Scz4e } 2115c590fb32Scz4e } 2116c590fb32Scz4e 2117c590fb32Scz4e val allPerfInc = allPerfEvents.map(_._2.asTypeOf(new PerfEvent)) 2118c590fb32Scz4e val perfEvents = HPerfMonitor(csrevents, allPerfInc).getPerfEvents 2119c590fb32Scz4e generatePerfEvent() 21204b2c87baS梁森 Liang Sen 21214b2c87baS梁森 Liang Sen private val mbistPl = MbistPipeline.PlaceMbistPipeline(Int.MaxValue, "MbistPipeMemBlk", hasMbist) 21224b2c87baS梁森 Liang Sen private val mbistIntf = if(hasMbist) { 21234b2c87baS梁森 Liang Sen val params = mbistPl.get.nodeParams 21244b2c87baS梁森 Liang Sen val intf = Some(Module(new MbistInterface( 21254b2c87baS梁森 Liang Sen params = Seq(params), 21264b2c87baS梁森 Liang Sen ids = Seq(mbistPl.get.childrenIds), 21274b2c87baS梁森 Liang Sen name = s"MbistIntfMemBlk", 21284b2c87baS梁森 Liang Sen pipelineNum = 1 21294b2c87baS梁森 Liang Sen ))) 21304b2c87baS梁森 Liang Sen intf.get.toPipeline.head <> mbistPl.get.mbist 21314b2c87baS梁森 Liang Sen mbistPl.get.registerCSV(intf.get.info, "MbistMemBlk") 21324b2c87baS梁森 Liang Sen intf.get.mbist := DontCare 21334b2c87baS梁森 Liang Sen dontTouch(intf.get.mbist) 21344b2c87baS梁森 Liang Sen //TODO: add mbist controller connections here 21354b2c87baS梁森 Liang Sen intf 21364b2c87baS梁森 Liang Sen } else { 21374b2c87baS梁森 Liang Sen None 21384b2c87baS梁森 Liang Sen } 2139*602aa9f1Scz4e private val sigFromSrams = if (hasSramTest) Some(SramHelper.genBroadCastBundleTop()) else None 21404b2c87baS梁森 Liang Sen private val cg = ClockGate.genTeSrc 21414b2c87baS梁森 Liang Sen dontTouch(cg) 2142*602aa9f1Scz4e 2143*602aa9f1Scz4e sigFromSrams.foreach({ case sig => sig.mbist := DontCare }) 21444b2c87baS梁森 Liang Sen if (hasMbist) { 2145*602aa9f1Scz4e sigFromSrams.get.mbist := io.sramTestBypass.fromL2Top.mbist.get 2146*602aa9f1Scz4e io.sramTestBypass.toFrontend.mbist.get := io.sramTestBypass.fromL2Top.mbist.get 2147*602aa9f1Scz4e io.sramTestBypass.toFrontend.mbistReset.get := io.sramTestBypass.fromL2Top.mbistReset.get 2148*602aa9f1Scz4e io.sramTestBypass.toBackend.mbist.get := io.sramTestBypass.fromL2Top.mbist.get 2149*602aa9f1Scz4e io.sramTestBypass.toBackend.mbistReset.get := io.sramTestBypass.fromL2Top.mbistReset.get 2150*602aa9f1Scz4e cg.cgen := io.sramTestBypass.fromL2Top.mbist.get.cgen 21514b2c87baS梁森 Liang Sen } else { 21524b2c87baS梁森 Liang Sen cg.cgen := false.B 21534b2c87baS梁森 Liang Sen } 2154*602aa9f1Scz4e 2155*602aa9f1Scz4e // sram debug 2156*602aa9f1Scz4e val sramCtl = Option.when(hasSramCtl)(RegNext(io.sramTestBypass.fromL2Top.sramCtl.get)) 2157*602aa9f1Scz4e sigFromSrams.foreach({ case sig => sig.sramCtl := DontCare }) 2158*602aa9f1Scz4e sigFromSrams.zip(sramCtl).foreach { 2159*602aa9f1Scz4e case (sig, ctl) => 2160*602aa9f1Scz4e sig.sramCtl.RTSEL := ctl(1, 0) // CFG[1 : 0] 2161*602aa9f1Scz4e sig.sramCtl.WTSEL := ctl(3, 2) // CFG[3 : 2] 2162*602aa9f1Scz4e sig.sramCtl.MCR := ctl(5, 4) // CFG[5 : 4] 2163*602aa9f1Scz4e sig.sramCtl.MCW := ctl(7, 6) // CFG[7 : 6] 2164*602aa9f1Scz4e } 2165*602aa9f1Scz4e if (hasSramCtl) { 2166*602aa9f1Scz4e io.sramTestBypass.toFrontend.sramCtl.get := sramCtl.get 2167*602aa9f1Scz4e } 2168c590fb32Scz4e} 2169c590fb32Scz4e 2170c590fb32Scz4eclass MemBlock()(implicit p: Parameters) extends LazyModule 2171c590fb32Scz4e with HasXSParameter { 2172c590fb32Scz4e override def shouldBeInlined: Boolean = false 2173c590fb32Scz4e 2174c590fb32Scz4e val inner = LazyModule(new MemBlockInlined()) 2175c590fb32Scz4e 2176c590fb32Scz4e lazy val module = new MemBlockImp(this) 2177c590fb32Scz4e} 2178c590fb32Scz4e 2179c590fb32Scz4eclass MemBlockImp(wrapper: MemBlock) extends LazyModuleImp(wrapper) { 2180c590fb32Scz4e val io = IO(wrapper.inner.module.io.cloneType) 2181c590fb32Scz4e val io_perf = IO(wrapper.inner.module.io_perf.cloneType) 2182c590fb32Scz4e io <> wrapper.inner.module.io 2183c590fb32Scz4e io_perf <> wrapper.inner.module.io_perf 2184c590fb32Scz4e 2185c590fb32Scz4e if (p(DebugOptionsKey).ResetGen) { 2186*602aa9f1Scz4e ResetGen( 2187*602aa9f1Scz4e ResetGenNode(Seq(ModuleNode(wrapper.inner.module))), 2188*602aa9f1Scz4e reset, sim = false, io.sramTestBypass.fromL2Top.mbistReset 2189*602aa9f1Scz4e ) 2190c590fb32Scz4e } 2191c590fb32Scz4e} 2192