1c590fb32Scz4e/*************************************************************************************** 2c590fb32Scz4e* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences 3c590fb32Scz4e* Copyright (c) 2020-2021 Peng Cheng Laboratory 4c590fb32Scz4e* 5c590fb32Scz4e* XiangShan is licensed under Mulan PSL v2. 6c590fb32Scz4e* You can use this software according to the terms and conditions of the Mulan PSL v2. 7c590fb32Scz4e* You may obtain a copy of Mulan PSL v2 at: 8c590fb32Scz4e* http://license.coscl.org.cn/MulanPSL2 9c590fb32Scz4e* 10c590fb32Scz4e* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 11c590fb32Scz4e* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 12c590fb32Scz4e* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 13c590fb32Scz4e* 14c590fb32Scz4e* See the Mulan PSL v2 for more details. 15c590fb32Scz4e***************************************************************************************/ 16c590fb32Scz4e 17c590fb32Scz4epackage xiangshan.mem 18c590fb32Scz4e 19c590fb32Scz4eimport org.chipsalliance.cde.config.Parameters 20c590fb32Scz4eimport chisel3._ 21c590fb32Scz4eimport chisel3.util._ 22c590fb32Scz4eimport freechips.rocketchip.diplomacy._ 23c590fb32Scz4eimport freechips.rocketchip.diplomacy.{BundleBridgeSource, LazyModule, LazyModuleImp} 24c590fb32Scz4eimport freechips.rocketchip.interrupts.{IntSinkNode, IntSinkPortSimple} 25c590fb32Scz4eimport freechips.rocketchip.tile.HasFPUParameters 26c590fb32Scz4eimport freechips.rocketchip.tilelink._ 27c590fb32Scz4eimport utils._ 28c590fb32Scz4eimport utility._ 29602aa9f1Scz4eimport utility.mbist.{MbistInterface, MbistPipeline} 30602aa9f1Scz4eimport utility.sram.{SramMbistBundle, SramBroadcastBundle, SramHelper} 318cfc24b2STang Haojinimport system.{HasSoCParameter, SoCParamsKey} 32c590fb32Scz4eimport xiangshan._ 33c590fb32Scz4eimport xiangshan.ExceptionNO._ 34c590fb32Scz4eimport xiangshan.frontend.HasInstrMMIOConst 35c590fb32Scz4eimport xiangshan.backend.Bundles.{DynInst, MemExuInput, MemExuOutput} 36c590fb32Scz4eimport xiangshan.backend.ctrlblock.{DebugLSIO, LsTopdownInfo} 37c590fb32Scz4eimport xiangshan.backend.exu.MemExeUnit 38c590fb32Scz4eimport xiangshan.backend.fu._ 39c590fb32Scz4eimport xiangshan.backend.fu.FuType._ 40a67fd0f5SGuanghui Chengimport xiangshan.backend.fu.NewCSR.{CsrTriggerBundle, TriggerUtil, PFEvent} 41075d4937Sjunxiong-jiimport xiangshan.backend.fu.util.{CSRConst, SdtrigExt} 42c590fb32Scz4eimport xiangshan.backend.{BackendToTopBundle, TopToBackendBundle} 43c590fb32Scz4eimport xiangshan.backend.rob.{RobDebugRollingIO, RobPtr, RobLsqIO} 44c590fb32Scz4eimport xiangshan.backend.datapath.NewPipelineConnect 45c590fb32Scz4eimport xiangshan.backend.trace.{Itype, TraceCoreInterface} 46c590fb32Scz4eimport xiangshan.backend.Bundles._ 47c590fb32Scz4eimport xiangshan.mem._ 48c590fb32Scz4eimport xiangshan.mem.mdp._ 499e12e8edScz4eimport xiangshan.mem.Bundles._ 50c590fb32Scz4eimport xiangshan.mem.prefetch.{BasePrefecher, L1Prefetcher, SMSParams, SMSPrefetcher} 51c590fb32Scz4eimport xiangshan.cache._ 52c590fb32Scz4eimport xiangshan.cache.mmu._ 534b2c87baS梁森 Liang Senimport coupledL2.PrefetchRecv 548cfc24b2STang Haojinimport utility.mbist.{MbistInterface, MbistPipeline} 558cfc24b2STang Haojinimport utility.sram.{SramBroadcastBundle, SramHelper} 56602aa9f1Scz4e 57c590fb32Scz4etrait HasMemBlockParameters extends HasXSParameter { 58c590fb32Scz4e // number of memory units 59c590fb32Scz4e val LduCnt = backendParams.LduCnt 60c590fb32Scz4e val StaCnt = backendParams.StaCnt 61c590fb32Scz4e val StdCnt = backendParams.StdCnt 62c590fb32Scz4e val HyuCnt = backendParams.HyuCnt 63c590fb32Scz4e val VlduCnt = backendParams.VlduCnt 64c590fb32Scz4e val VstuCnt = backendParams.VstuCnt 65c590fb32Scz4e 66c590fb32Scz4e val LdExuCnt = LduCnt + HyuCnt 67c590fb32Scz4e val StAddrCnt = StaCnt + HyuCnt 68c590fb32Scz4e val StDataCnt = StdCnt 69c590fb32Scz4e val MemExuCnt = LduCnt + HyuCnt + StaCnt + StdCnt 70c590fb32Scz4e val MemAddrExtCnt = LdExuCnt + StaCnt 71c590fb32Scz4e val MemVExuCnt = VlduCnt + VstuCnt 72c590fb32Scz4e 73c590fb32Scz4e val AtomicWBPort = 0 74c590fb32Scz4e val MisalignWBPort = 1 75c590fb32Scz4e val UncacheWBPort = 2 76c590fb32Scz4e val NCWBPorts = Seq(1, 2) 77c590fb32Scz4e} 78c590fb32Scz4e 79c590fb32Scz4eabstract class MemBlockBundle(implicit val p: Parameters) extends Bundle with HasMemBlockParameters 80c590fb32Scz4e 81c590fb32Scz4eclass Std(cfg: FuConfig)(implicit p: Parameters) extends FuncUnit(cfg) { 82c590fb32Scz4e io.in.ready := io.out.ready 83c590fb32Scz4e io.out.valid := io.in.valid 84c590fb32Scz4e io.out.bits := 0.U.asTypeOf(io.out.bits) 85c590fb32Scz4e io.out.bits.res.data := io.in.bits.data.src(0) 86c590fb32Scz4e io.out.bits.ctrl.robIdx := io.in.bits.ctrl.robIdx 87c590fb32Scz4e} 88c590fb32Scz4e 89c590fb32Scz4eclass ooo_to_mem(implicit p: Parameters) extends MemBlockBundle { 90c590fb32Scz4e val backendToTopBypass = Flipped(new BackendToTopBundle) 91c590fb32Scz4e 92c590fb32Scz4e val loadFastMatch = Vec(LdExuCnt, Input(UInt(LdExuCnt.W))) 93c590fb32Scz4e val loadFastFuOpType = Vec(LdExuCnt, Input(FuOpType())) 94c590fb32Scz4e val loadFastImm = Vec(LdExuCnt, Input(UInt(12.W))) 95c590fb32Scz4e val sfence = Input(new SfenceBundle) 96c590fb32Scz4e val tlbCsr = Input(new TlbCsrBundle) 97c590fb32Scz4e val lsqio = new Bundle { 98c590fb32Scz4e val lcommit = Input(UInt(log2Up(CommitWidth + 1).W)) 99c590fb32Scz4e val scommit = Input(UInt(log2Up(CommitWidth + 1).W)) 100c590fb32Scz4e val pendingMMIOld = Input(Bool()) 101c590fb32Scz4e val pendingld = Input(Bool()) 102c590fb32Scz4e val pendingst = Input(Bool()) 103c590fb32Scz4e val pendingVst = Input(Bool()) 104c590fb32Scz4e val commit = Input(Bool()) 105c590fb32Scz4e val pendingPtr = Input(new RobPtr) 106c590fb32Scz4e val pendingPtrNext = Input(new RobPtr) 107c590fb32Scz4e } 108c590fb32Scz4e 109c590fb32Scz4e val isStoreException = Input(Bool()) 110c590fb32Scz4e val isVlsException = Input(Bool()) 111c590fb32Scz4e val csrCtrl = Flipped(new CustomCSRCtrlIO) 112c590fb32Scz4e val enqLsq = new LsqEnqIO 113c590fb32Scz4e val flushSb = Input(Bool()) 114c590fb32Scz4e 115c590fb32Scz4e val storePc = Vec(StaCnt, Input(UInt(VAddrBits.W))) // for hw prefetch 116c590fb32Scz4e val hybridPc = Vec(HyuCnt, Input(UInt(VAddrBits.W))) // for hw prefetch 117c590fb32Scz4e 118c590fb32Scz4e val issueLda = MixedVec(Seq.fill(LduCnt)(Flipped(DecoupledIO(new MemExuInput)))) 119c590fb32Scz4e val issueSta = MixedVec(Seq.fill(StaCnt)(Flipped(DecoupledIO(new MemExuInput)))) 120c590fb32Scz4e val issueStd = MixedVec(Seq.fill(StdCnt)(Flipped(DecoupledIO(new MemExuInput)))) 121c590fb32Scz4e val issueHya = MixedVec(Seq.fill(HyuCnt)(Flipped(DecoupledIO(new MemExuInput)))) 122c590fb32Scz4e val issueVldu = MixedVec(Seq.fill(VlduCnt)(Flipped(DecoupledIO(new MemExuInput(isVector=true))))) 123c590fb32Scz4e 124c590fb32Scz4e def issueUops = issueLda ++ issueSta ++ issueStd ++ issueHya ++ issueVldu 125c590fb32Scz4e} 126c590fb32Scz4e 127c590fb32Scz4eclass mem_to_ooo(implicit p: Parameters) extends MemBlockBundle { 128c590fb32Scz4e val topToBackendBypass = new TopToBackendBundle 129c590fb32Scz4e 130c590fb32Scz4e val otherFastWakeup = Vec(LdExuCnt, ValidIO(new DynInst)) 131c590fb32Scz4e val lqCancelCnt = Output(UInt(log2Up(VirtualLoadQueueSize + 1).W)) 132c590fb32Scz4e val sqCancelCnt = Output(UInt(log2Up(StoreQueueSize + 1).W)) 133c590fb32Scz4e val sqDeq = Output(UInt(log2Ceil(EnsbufferWidth + 1).W)) 134c590fb32Scz4e val lqDeq = Output(UInt(log2Up(CommitWidth + 1).W)) 135c590fb32Scz4e // used by VLSU issue queue, the vector store would wait all store before it, and the vector load would wait all load 136c590fb32Scz4e val sqDeqPtr = Output(new SqPtr) 137c590fb32Scz4e val lqDeqPtr = Output(new LqPtr) 138c590fb32Scz4e val stIn = Vec(StAddrCnt, ValidIO(new MemExuInput)) 139c590fb32Scz4e val stIssuePtr = Output(new SqPtr()) 140c590fb32Scz4e 141c590fb32Scz4e val memoryViolation = ValidIO(new Redirect) 142c590fb32Scz4e val sbIsEmpty = Output(Bool()) 143c590fb32Scz4e 144c590fb32Scz4e val lsTopdownInfo = Vec(LdExuCnt, Output(new LsTopdownInfo)) 145c590fb32Scz4e 146c590fb32Scz4e val lsqio = new Bundle { 147c590fb32Scz4e val vaddr = Output(UInt(XLEN.W)) 148c590fb32Scz4e val vstart = Output(UInt((log2Up(VLEN) + 1).W)) 149c590fb32Scz4e val vl = Output(UInt((log2Up(VLEN) + 1).W)) 150c590fb32Scz4e val gpaddr = Output(UInt(XLEN.W)) 151c590fb32Scz4e val isForVSnonLeafPTE = Output(Bool()) 152c590fb32Scz4e val mmio = Output(Vec(LoadPipelineWidth, Bool())) 153c590fb32Scz4e val uop = Output(Vec(LoadPipelineWidth, new DynInst)) 154c590fb32Scz4e val lqCanAccept = Output(Bool()) 155c590fb32Scz4e val sqCanAccept = Output(Bool()) 156c590fb32Scz4e } 157c590fb32Scz4e 158c590fb32Scz4e val storeDebugInfo = Vec(EnsbufferWidth, new Bundle { 159c590fb32Scz4e val robidx = Output(new RobPtr) 160c590fb32Scz4e val pc = Input(UInt(VAddrBits.W)) 161c590fb32Scz4e }) 162c590fb32Scz4e 163c590fb32Scz4e val writebackLda = Vec(LduCnt, DecoupledIO(new MemExuOutput)) 164c590fb32Scz4e val writebackSta = Vec(StaCnt, DecoupledIO(new MemExuOutput)) 165c590fb32Scz4e val writebackStd = Vec(StdCnt, DecoupledIO(new MemExuOutput)) 166c590fb32Scz4e val writebackHyuLda = Vec(HyuCnt, DecoupledIO(new MemExuOutput)) 167c590fb32Scz4e val writebackHyuSta = Vec(HyuCnt, DecoupledIO(new MemExuOutput)) 168c590fb32Scz4e val writebackVldu = Vec(VlduCnt, DecoupledIO(new MemExuOutput(isVector = true))) 169c590fb32Scz4e def writeBack: Seq[DecoupledIO[MemExuOutput]] = { 170c590fb32Scz4e writebackSta ++ 171c590fb32Scz4e writebackHyuLda ++ writebackHyuSta ++ 172c590fb32Scz4e writebackLda ++ 173c590fb32Scz4e writebackVldu ++ 174c590fb32Scz4e writebackStd 175c590fb32Scz4e } 176c590fb32Scz4e 177c590fb32Scz4e val ldaIqFeedback = Vec(LduCnt, new MemRSFeedbackIO) 178c590fb32Scz4e val staIqFeedback = Vec(StaCnt, new MemRSFeedbackIO) 179c590fb32Scz4e val hyuIqFeedback = Vec(HyuCnt, new MemRSFeedbackIO) 180c590fb32Scz4e val vstuIqFeedback= Vec(VstuCnt, new MemRSFeedbackIO(isVector = true)) 181c590fb32Scz4e val vlduIqFeedback= Vec(VlduCnt, new MemRSFeedbackIO(isVector = true)) 182c590fb32Scz4e val ldCancel = Vec(backendParams.LdExuCnt, new LoadCancelIO) 183c590fb32Scz4e val wakeup = Vec(backendParams.LdExuCnt, Valid(new DynInst)) 184c590fb32Scz4e 185c590fb32Scz4e val s3_delayed_load_error = Vec(LdExuCnt, Output(Bool())) 186c590fb32Scz4e} 187c590fb32Scz4e 188c590fb32Scz4eclass MemCoreTopDownIO extends Bundle { 189c590fb32Scz4e val robHeadMissInDCache = Output(Bool()) 190c590fb32Scz4e val robHeadTlbReplay = Output(Bool()) 191c590fb32Scz4e val robHeadTlbMiss = Output(Bool()) 192c590fb32Scz4e val robHeadLoadVio = Output(Bool()) 193c590fb32Scz4e val robHeadLoadMSHR = Output(Bool()) 194c590fb32Scz4e} 195c590fb32Scz4e 196c590fb32Scz4eclass fetch_to_mem(implicit p: Parameters) extends XSBundle{ 197c590fb32Scz4e val itlb = Flipped(new TlbPtwIO()) 198c590fb32Scz4e} 199c590fb32Scz4e 200c590fb32Scz4e// triple buffer applied in i-mmio path (two at MemBlock, one at L2Top) 201c590fb32Scz4eclass InstrUncacheBuffer()(implicit p: Parameters) extends LazyModule with HasInstrMMIOConst { 202c590fb32Scz4e val node = new TLBufferNode(BufferParams.default, BufferParams.default, BufferParams.default, BufferParams.default, BufferParams.default) 203c590fb32Scz4e lazy val module = new InstrUncacheBufferImpl 204c590fb32Scz4e 205c590fb32Scz4e class InstrUncacheBufferImpl extends LazyModuleImp(this) { 206c590fb32Scz4e (node.in zip node.out) foreach { case ((in, edgeIn), (out, edgeOut)) => 207c590fb32Scz4e out.a <> BufferParams.default(BufferParams.default(in.a)) 208c590fb32Scz4e in.d <> BufferParams.default(BufferParams.default(out.d)) 209c590fb32Scz4e 210c590fb32Scz4e // only a.valid, a.ready, a.address can change 211c590fb32Scz4e // hoping that the rest would be optimized to keep MemBlock port unchanged after adding buffer 212c590fb32Scz4e out.a.bits.data := 0.U 213c590fb32Scz4e out.a.bits.mask := Fill(mmioBusBytes, 1.U(1.W)) 214c590fb32Scz4e out.a.bits.opcode := 4.U // Get 215c590fb32Scz4e out.a.bits.size := log2Ceil(mmioBusBytes).U 216c590fb32Scz4e out.a.bits.source := 0.U 217c590fb32Scz4e } 218c590fb32Scz4e } 219c590fb32Scz4e} 220c590fb32Scz4e 221c590fb32Scz4e// triple buffer applied in L1I$-L2 path (two at MemBlock, one at L2Top) 222c590fb32Scz4eclass ICacheBuffer()(implicit p: Parameters) extends LazyModule { 223c590fb32Scz4e val node = new TLBufferNode(BufferParams.default, BufferParams.default, BufferParams.default, BufferParams.default, BufferParams.default) 224c590fb32Scz4e lazy val module = new ICacheBufferImpl 225c590fb32Scz4e 226c590fb32Scz4e class ICacheBufferImpl extends LazyModuleImp(this) { 227c590fb32Scz4e (node.in zip node.out) foreach { case ((in, edgeIn), (out, edgeOut)) => 228c590fb32Scz4e out.a <> BufferParams.default(BufferParams.default(in.a)) 229c590fb32Scz4e in.d <> BufferParams.default(BufferParams.default(out.d)) 230c590fb32Scz4e } 231c590fb32Scz4e } 232c590fb32Scz4e} 233c590fb32Scz4e 234c590fb32Scz4eclass ICacheCtrlBuffer()(implicit p: Parameters) extends LazyModule { 235c590fb32Scz4e val node = new TLBufferNode(BufferParams.default, BufferParams.default, BufferParams.default, BufferParams.default, BufferParams.default) 236c590fb32Scz4e lazy val module = new ICacheCtrlBufferImpl 237c590fb32Scz4e 238c590fb32Scz4e class ICacheCtrlBufferImpl extends LazyModuleImp(this) { 239c590fb32Scz4e (node.in zip node.out) foreach { case ((in, edgeIn), (out, edgeOut)) => 240c590fb32Scz4e out.a <> BufferParams.default(BufferParams.default(in.a)) 241c590fb32Scz4e in.d <> BufferParams.default(BufferParams.default(out.d)) 242c590fb32Scz4e } 243c590fb32Scz4e } 244c590fb32Scz4e} 245c590fb32Scz4e 246c590fb32Scz4e// Frontend bus goes through MemBlock 247c590fb32Scz4eclass FrontendBridge()(implicit p: Parameters) extends LazyModule { 248c590fb32Scz4e val icache_node = LazyModule(new ICacheBuffer()).suggestName("icache").node// to keep IO port name 249c590fb32Scz4e val icachectrl_node = LazyModule(new ICacheCtrlBuffer()).suggestName("icachectrl").node 250c590fb32Scz4e val instr_uncache_node = LazyModule(new InstrUncacheBuffer()).suggestName("instr_uncache").node 251c590fb32Scz4e lazy val module = new LazyModuleImp(this) { 252c590fb32Scz4e } 253c590fb32Scz4e} 254c590fb32Scz4e 255c590fb32Scz4eclass MemBlockInlined()(implicit p: Parameters) extends LazyModule 256c590fb32Scz4e with HasXSParameter { 257c590fb32Scz4e override def shouldBeInlined: Boolean = true 258c590fb32Scz4e 259c590fb32Scz4e val dcache = LazyModule(new DCacheWrapper()) 260c590fb32Scz4e val uncache = LazyModule(new Uncache()) 261c590fb32Scz4e val uncache_port = TLTempNode() 262c590fb32Scz4e val uncache_xbar = TLXbar() 263c590fb32Scz4e val ptw = LazyModule(new L2TLBWrapper()) 264c590fb32Scz4e val ptw_to_l2_buffer = if (!coreParams.softPTW) LazyModule(new TLBuffer) else null 265c590fb32Scz4e val l1d_to_l2_buffer = if (coreParams.dcacheParametersOpt.nonEmpty) LazyModule(new TLBuffer) else null 266c590fb32Scz4e val dcache_port = TLNameNode("dcache_client") // to keep dcache-L2 port name 267c590fb32Scz4e val l2_pf_sender_opt = coreParams.prefetcher.map(_ => 268c590fb32Scz4e BundleBridgeSource(() => new PrefetchRecv) 269c590fb32Scz4e ) 270c590fb32Scz4e val l3_pf_sender_opt = if (p(SoCParamsKey).L3CacheParamsOpt.nonEmpty) coreParams.prefetcher.map(_ => 271c590fb32Scz4e BundleBridgeSource(() => new huancun.PrefetchRecv) 272c590fb32Scz4e ) else None 273c590fb32Scz4e val frontendBridge = LazyModule(new FrontendBridge) 274c590fb32Scz4e // interrupt sinks 275c590fb32Scz4e val clint_int_sink = IntSinkNode(IntSinkPortSimple(1, 2)) 276c590fb32Scz4e val debug_int_sink = IntSinkNode(IntSinkPortSimple(1, 1)) 277c590fb32Scz4e val plic_int_sink = IntSinkNode(IntSinkPortSimple(2, 1)) 278c590fb32Scz4e val nmi_int_sink = IntSinkNode(IntSinkPortSimple(1, (new NonmaskableInterruptIO).elements.size)) 27976cb49abScz4e val beu_local_int_sink = IntSinkNode(IntSinkPortSimple(1, 1)) 280c590fb32Scz4e 281c590fb32Scz4e if (!coreParams.softPTW) { 282c590fb32Scz4e ptw_to_l2_buffer.node := ptw.node 283c590fb32Scz4e } 284c590fb32Scz4e uncache_xbar := TLBuffer() := uncache.clientNode 285c590fb32Scz4e if (dcache.uncacheNode.isDefined) { 286c590fb32Scz4e dcache.uncacheNode.get := TLBuffer.chainNode(2) := uncache_xbar 287c590fb32Scz4e } 288c590fb32Scz4e uncache_port := TLBuffer.chainNode(2) := uncache_xbar 289c590fb32Scz4e 290c590fb32Scz4e lazy val module = new MemBlockInlinedImp(this) 291c590fb32Scz4e} 292c590fb32Scz4e 293c590fb32Scz4eclass MemBlockInlinedImp(outer: MemBlockInlined) extends LazyModuleImp(outer) 294c590fb32Scz4e with HasXSParameter 295c590fb32Scz4e with HasFPUParameters 296c590fb32Scz4e with HasPerfEvents 2978cfc24b2STang Haojin with HasSoCParameter 298c590fb32Scz4e with HasL1PrefetchSourceParameter 299c590fb32Scz4e with HasCircularQueuePtrHelper 300c590fb32Scz4e with HasMemBlockParameters 301c590fb32Scz4e with HasTlbConst 302c590fb32Scz4e with SdtrigExt 303c590fb32Scz4e{ 304c590fb32Scz4e val io = IO(new Bundle { 305c590fb32Scz4e val hartId = Input(UInt(hartIdLen.W)) 306c590fb32Scz4e val redirect = Flipped(ValidIO(new Redirect)) 307c590fb32Scz4e 308c590fb32Scz4e val ooo_to_mem = new ooo_to_mem 309c590fb32Scz4e val mem_to_ooo = new mem_to_ooo 310c590fb32Scz4e val fetch_to_mem = new fetch_to_mem 311c590fb32Scz4e 312c590fb32Scz4e val ifetchPrefetch = Vec(LduCnt, ValidIO(new SoftIfetchPrefetchBundle)) 313c590fb32Scz4e 314c590fb32Scz4e // misc 315c590fb32Scz4e val error = ValidIO(new L1CacheErrorInfo) 316c590fb32Scz4e val memInfo = new Bundle { 317c590fb32Scz4e val sqFull = Output(Bool()) 318c590fb32Scz4e val lqFull = Output(Bool()) 319c590fb32Scz4e val dcacheMSHRFull = Output(Bool()) 320c590fb32Scz4e } 321c590fb32Scz4e val debug_ls = new DebugLSIO 322c590fb32Scz4e val l2_hint = Input(Valid(new L2ToL1Hint())) 323c590fb32Scz4e val l2PfqBusy = Input(Bool()) 324c590fb32Scz4e val l2_tlb_req = Flipped(new TlbRequestIO(nRespDups = 2)) 325c590fb32Scz4e val l2_pmp_resp = new PMPRespBundle 326c590fb32Scz4e val l2_flush_done = Input(Bool()) 327c590fb32Scz4e 328c590fb32Scz4e val debugTopDown = new Bundle { 329c590fb32Scz4e val robHeadVaddr = Flipped(Valid(UInt(VAddrBits.W))) 330c590fb32Scz4e val toCore = new MemCoreTopDownIO 331c590fb32Scz4e } 332c590fb32Scz4e val debugRolling = Flipped(new RobDebugRollingIO) 333c590fb32Scz4e 334c590fb32Scz4e // All the signals from/to frontend/backend to/from bus will go through MemBlock 335c590fb32Scz4e val fromTopToBackend = Input(new Bundle { 3368cfc24b2STang Haojin val msiInfo = ValidIO(UInt(soc.IMSICParams.MSI_INFO_WIDTH.W)) 337c590fb32Scz4e val clintTime = ValidIO(UInt(64.W)) 338c590fb32Scz4e }) 339c590fb32Scz4e val inner_hartId = Output(UInt(hartIdLen.W)) 340c590fb32Scz4e val inner_reset_vector = Output(UInt(PAddrBits.W)) 341c590fb32Scz4e val outer_reset_vector = Input(UInt(PAddrBits.W)) 342c590fb32Scz4e val outer_cpu_halt = Output(Bool()) 343c590fb32Scz4e val outer_l2_flush_en = Output(Bool()) 344c590fb32Scz4e val outer_power_down_en = Output(Bool()) 345c590fb32Scz4e val outer_cpu_critical_error = Output(Bool()) 3468cfc24b2STang Haojin val outer_msi_ack = Output(Bool()) 347c590fb32Scz4e val inner_beu_errors_icache = Input(new L1BusErrorUnitInfo) 348c590fb32Scz4e val outer_beu_errors_icache = Output(new L1BusErrorUnitInfo) 349c590fb32Scz4e val inner_hc_perfEvents = Output(Vec(numPCntHc * coreParams.L2NBanks + 1, new PerfEvent)) 350c590fb32Scz4e val outer_hc_perfEvents = Input(Vec(numPCntHc * coreParams.L2NBanks + 1, new PerfEvent)) 351c590fb32Scz4e 352c590fb32Scz4e // reset signals of frontend & backend are generated in memblock 353c590fb32Scz4e val reset_backend = Output(Reset()) 354c590fb32Scz4e // Reset singal from frontend. 355c590fb32Scz4e val resetInFrontendBypass = new Bundle{ 356c590fb32Scz4e val fromFrontend = Input(Bool()) 357c590fb32Scz4e val toL2Top = Output(Bool()) 358c590fb32Scz4e } 359c590fb32Scz4e val traceCoreInterfaceBypass = new Bundle{ 360c590fb32Scz4e val fromBackend = Flipped(new TraceCoreInterface(hasOffset = true)) 361c590fb32Scz4e val toL2Top = new TraceCoreInterface 362c590fb32Scz4e } 363c590fb32Scz4e 364c590fb32Scz4e val topDownInfo = new Bundle { 365c590fb32Scz4e val fromL2Top = Input(new TopDownFromL2Top) 366c590fb32Scz4e val toBackend = Flipped(new TopDownInfo) 367c590fb32Scz4e } 368602aa9f1Scz4e val sramTestBypass = new Bundle() { 369602aa9f1Scz4e val fromL2Top = new Bundle() { 370602aa9f1Scz4e val mbist = Option.when(hasMbist)(Input(new SramMbistBundle)) 371602aa9f1Scz4e val mbistReset = Option.when(hasMbist)(Input(new DFTResetSignals())) 372602aa9f1Scz4e val sramCtl = Option.when(hasSramCtl)(Input(UInt(64.W))) 373602aa9f1Scz4e } 374602aa9f1Scz4e val toFrontend = new Bundle() { 375602aa9f1Scz4e val mbist = Option.when(hasMbist)(Output(new SramMbistBundle)) 376602aa9f1Scz4e val mbistReset = Option.when(hasMbist)(Output(new DFTResetSignals())) 377602aa9f1Scz4e val sramCtl = Option.when(hasSramCtl)(Output(UInt(64.W))) 378602aa9f1Scz4e } 379602aa9f1Scz4e val toBackend = new Bundle() { 380602aa9f1Scz4e val mbist = Option.when(hasMbist)(Output(new SramMbistBundle)) 381602aa9f1Scz4e val mbistReset = Option.when(hasMbist)(Output(new DFTResetSignals())) 382602aa9f1Scz4e } 383602aa9f1Scz4e } 384c590fb32Scz4e }) 385c590fb32Scz4e 3861592abd1SYan Xu io.mem_to_ooo.writeBack.zipWithIndex.foreach{ case (wb, i) => 3871592abd1SYan Xu PerfCCT.updateInstPos(wb.bits.uop.debug_seqNum, PerfCCT.InstPos.AtBypassVal.id.U, wb.valid, clock, reset) 3881592abd1SYan Xu } 3891592abd1SYan Xu 390c590fb32Scz4e dontTouch(io.inner_hartId) 391c590fb32Scz4e dontTouch(io.inner_reset_vector) 392c590fb32Scz4e dontTouch(io.outer_reset_vector) 393c590fb32Scz4e dontTouch(io.outer_cpu_halt) 394c590fb32Scz4e dontTouch(io.outer_l2_flush_en) 395c590fb32Scz4e dontTouch(io.outer_power_down_en) 396c590fb32Scz4e dontTouch(io.outer_cpu_critical_error) 397c590fb32Scz4e dontTouch(io.inner_beu_errors_icache) 398c590fb32Scz4e dontTouch(io.outer_beu_errors_icache) 399c590fb32Scz4e dontTouch(io.inner_hc_perfEvents) 400c590fb32Scz4e dontTouch(io.outer_hc_perfEvents) 401c590fb32Scz4e 402c590fb32Scz4e val redirect = RegNextWithEnable(io.redirect) 403c590fb32Scz4e 404c590fb32Scz4e private val dcache = outer.dcache.module 405c590fb32Scz4e val uncache = outer.uncache.module 406c590fb32Scz4e 407c590fb32Scz4e //val delayedDcacheRefill = RegNext(dcache.io.lsu.lsq) 408c590fb32Scz4e 409c590fb32Scz4e val csrCtrl = DelayN(io.ooo_to_mem.csrCtrl, 2) 410c590fb32Scz4e dcache.io.l2_pf_store_only := RegNext(io.ooo_to_mem.csrCtrl.pf_ctrl.l2_pf_store_only, false.B) 411c590fb32Scz4e io.error <> DelayNWithValid(dcache.io.error, 2) 412c590fb32Scz4e when(!csrCtrl.cache_error_enable){ 413c590fb32Scz4e io.error.bits.report_to_beu := false.B 414c590fb32Scz4e io.error.valid := false.B 415c590fb32Scz4e } 416c590fb32Scz4e 417c590fb32Scz4e val loadUnits = Seq.fill(LduCnt)(Module(new LoadUnit)) 418c590fb32Scz4e val storeUnits = Seq.fill(StaCnt)(Module(new StoreUnit)) 419c590fb32Scz4e val stdExeUnits = Seq.fill(StdCnt)(Module(new MemExeUnit(backendParams.memSchdParams.get.issueBlockParams.find(_.StdCnt != 0).get.exuBlockParams.head))) 420c590fb32Scz4e val hybridUnits = Seq.fill(HyuCnt)(Module(new HybridUnit)) // Todo: replace it with HybridUnit 421c590fb32Scz4e val stData = stdExeUnits.map(_.io.out) 422c590fb32Scz4e val exeUnits = loadUnits ++ storeUnits 423c590fb32Scz4e 424c590fb32Scz4e // The number of vector load/store units is decoupled with the number of load/store units 425c590fb32Scz4e val vlSplit = Seq.fill(VlduCnt)(Module(new VLSplitImp)) 426c590fb32Scz4e val vsSplit = Seq.fill(VstuCnt)(Module(new VSSplitImp)) 427c590fb32Scz4e val vlMergeBuffer = Module(new VLMergeBufferImp) 428c590fb32Scz4e val vsMergeBuffer = Seq.fill(VstuCnt)(Module(new VSMergeBufferImp)) 429c590fb32Scz4e val vSegmentUnit = Module(new VSegmentUnit) 430c590fb32Scz4e val vfofBuffer = Module(new VfofBuffer) 431c590fb32Scz4e 432c590fb32Scz4e // misalign Buffer 433c590fb32Scz4e val loadMisalignBuffer = Module(new LoadMisalignBuffer) 434c590fb32Scz4e val storeMisalignBuffer = Module(new StoreMisalignBuffer) 435c590fb32Scz4e 436c590fb32Scz4e val l1_pf_req = Wire(Decoupled(new L1PrefetchReq())) 437c590fb32Scz4e dcache.io.sms_agt_evict_req.ready := false.B 438*05cc6da9SYanqin Li val l1D_pf_enable = GatedRegNextN(io.ooo_to_mem.csrCtrl.pf_ctrl.l1D_pf_enable, 2, Some(false.B)) 439c590fb32Scz4e val prefetcherOpt: Option[BasePrefecher] = coreParams.prefetcher.map { 440c590fb32Scz4e case _: SMSParams => 441c590fb32Scz4e val sms = Module(new SMSPrefetcher()) 442*05cc6da9SYanqin Li val enableSMS = Constantin.createRecord(s"enableSMS$hartId", initValue = true) 443*05cc6da9SYanqin Li // constantinCtrl && master switch csrCtrl && single switch csrCtrl 444*05cc6da9SYanqin Li sms.io.enable := enableSMS && l1D_pf_enable && 445*05cc6da9SYanqin Li GatedRegNextN(io.ooo_to_mem.csrCtrl.pf_ctrl.l2_pf_recv_enable, 2, Some(false.B)) 446c590fb32Scz4e sms.io_agt_en := GatedRegNextN(io.ooo_to_mem.csrCtrl.pf_ctrl.l1D_pf_enable_agt, 2, Some(false.B)) 447c590fb32Scz4e sms.io_pht_en := GatedRegNextN(io.ooo_to_mem.csrCtrl.pf_ctrl.l1D_pf_enable_pht, 2, Some(false.B)) 448c590fb32Scz4e sms.io_act_threshold := GatedRegNextN(io.ooo_to_mem.csrCtrl.pf_ctrl.l1D_pf_active_threshold, 2, Some(12.U)) 449c590fb32Scz4e sms.io_act_stride := GatedRegNextN(io.ooo_to_mem.csrCtrl.pf_ctrl.l1D_pf_active_stride, 2, Some(30.U)) 450c590fb32Scz4e sms.io_stride_en := false.B 451c590fb32Scz4e sms.io_dcache_evict <> dcache.io.sms_agt_evict_req 4524b2c87baS梁森 Liang Sen val mbistSmsPl = MbistPipeline.PlaceMbistPipeline(1, "MbistPipeSms", hasMbist) 453c590fb32Scz4e sms 454c590fb32Scz4e } 455c590fb32Scz4e prefetcherOpt.foreach{ pf => pf.io.l1_req.ready := false.B } 456c590fb32Scz4e val hartId = p(XSCoreParamsKey).HartId 457c590fb32Scz4e val l1PrefetcherOpt: Option[BasePrefecher] = coreParams.prefetcher.map { 458c590fb32Scz4e case _ => 459c590fb32Scz4e val l1Prefetcher = Module(new L1Prefetcher()) 4609db05eaeScz4e val enableL1StreamPrefetcher = Constantin.createRecord(s"enableL1StreamPrefetcher$hartId", initValue = true) 461*05cc6da9SYanqin Li // constantinCtrl && master switch csrCtrl && single switch csrCtrl 462*05cc6da9SYanqin Li l1Prefetcher.io.enable := enableL1StreamPrefetcher && l1D_pf_enable && 463*05cc6da9SYanqin Li GatedRegNextN(io.ooo_to_mem.csrCtrl.pf_ctrl.l1D_pf_enable_stride, 2, Some(false.B)) 464c590fb32Scz4e l1Prefetcher.pf_ctrl <> dcache.io.pf_ctrl 465c590fb32Scz4e l1Prefetcher.l2PfqBusy := io.l2PfqBusy 466c590fb32Scz4e 467c590fb32Scz4e // stride will train on miss or prefetch hit 468c590fb32Scz4e for (i <- 0 until LduCnt) { 469c590fb32Scz4e val source = loadUnits(i).io.prefetch_train_l1 470c590fb32Scz4e l1Prefetcher.stride_train(i).valid := source.valid && source.bits.isFirstIssue && ( 471c590fb32Scz4e source.bits.miss || isFromStride(source.bits.meta_prefetch) 472c590fb32Scz4e ) 473c590fb32Scz4e l1Prefetcher.stride_train(i).bits := source.bits 474c590fb32Scz4e val loadPc = RegNext(io.ooo_to_mem.issueLda(i).bits.uop.pc) // for s1 475c590fb32Scz4e l1Prefetcher.stride_train(i).bits.uop.pc := Mux( 476c590fb32Scz4e loadUnits(i).io.s2_ptr_chasing, 477c590fb32Scz4e RegEnable(loadPc, loadUnits(i).io.s2_prefetch_spec), 478c590fb32Scz4e RegEnable(RegEnable(loadPc, loadUnits(i).io.s1_prefetch_spec), loadUnits(i).io.s2_prefetch_spec) 479c590fb32Scz4e ) 480c590fb32Scz4e } 481c590fb32Scz4e for (i <- 0 until HyuCnt) { 482c590fb32Scz4e val source = hybridUnits(i).io.prefetch_train_l1 483c590fb32Scz4e l1Prefetcher.stride_train.drop(LduCnt)(i).valid := source.valid && source.bits.isFirstIssue && ( 484c590fb32Scz4e source.bits.miss || isFromStride(source.bits.meta_prefetch) 485c590fb32Scz4e ) 486c590fb32Scz4e l1Prefetcher.stride_train.drop(LduCnt)(i).bits := source.bits 487c590fb32Scz4e l1Prefetcher.stride_train.drop(LduCnt)(i).bits.uop.pc := Mux( 488c590fb32Scz4e hybridUnits(i).io.ldu_io.s2_ptr_chasing, 489c590fb32Scz4e RegNext(io.ooo_to_mem.hybridPc(i)), 490c590fb32Scz4e RegNext(RegNext(io.ooo_to_mem.hybridPc(i))) 491c590fb32Scz4e ) 492c590fb32Scz4e } 493c590fb32Scz4e l1Prefetcher 494c590fb32Scz4e } 495c590fb32Scz4e // load prefetch to l1 Dcache 496c590fb32Scz4e l1PrefetcherOpt match { 497c590fb32Scz4e case Some(pf) => l1_pf_req <> Pipeline(in = pf.io.l1_req, depth = 1, pipe = false, name = Some("pf_queue_to_ldu_reg")) 498c590fb32Scz4e case None => 499c590fb32Scz4e l1_pf_req.valid := false.B 500c590fb32Scz4e l1_pf_req.bits := DontCare 501c590fb32Scz4e } 502c590fb32Scz4e val pf_train_on_hit = RegNextN(io.ooo_to_mem.csrCtrl.pf_ctrl.l1D_pf_train_on_hit, 2, Some(true.B)) 503c590fb32Scz4e 504c590fb32Scz4e loadUnits.zipWithIndex.map(x => x._1.suggestName("LoadUnit_"+x._2)) 505c590fb32Scz4e storeUnits.zipWithIndex.map(x => x._1.suggestName("StoreUnit_"+x._2)) 506c590fb32Scz4e hybridUnits.zipWithIndex.map(x => x._1.suggestName("HybridUnit_"+x._2)) 507c590fb32Scz4e val atomicsUnit = Module(new AtomicsUnit) 508c590fb32Scz4e 509c590fb32Scz4e 510c590fb32Scz4e val ldaExeWbReqs = Wire(Vec(LduCnt, Decoupled(new MemExuOutput))) 511c590fb32Scz4e // atomicsUnit will overwrite the source from ldu if it is about to writeback 512c590fb32Scz4e val atomicWritebackOverride = Mux( 513c590fb32Scz4e atomicsUnit.io.out.valid, 514c590fb32Scz4e atomicsUnit.io.out.bits, 515c590fb32Scz4e loadUnits(AtomicWBPort).io.ldout.bits 516c590fb32Scz4e ) 517c590fb32Scz4e ldaExeWbReqs(AtomicWBPort).valid := atomicsUnit.io.out.valid || loadUnits(AtomicWBPort).io.ldout.valid 518c590fb32Scz4e ldaExeWbReqs(AtomicWBPort).bits := atomicWritebackOverride 519c590fb32Scz4e atomicsUnit.io.out.ready := ldaExeWbReqs(AtomicWBPort).ready 520c590fb32Scz4e loadUnits(AtomicWBPort).io.ldout.ready := ldaExeWbReqs(AtomicWBPort).ready 521c590fb32Scz4e 522c590fb32Scz4e val st_data_atomics = Seq.tabulate(StdCnt)(i => 523c590fb32Scz4e stData(i).valid && FuType.storeIsAMO(stData(i).bits.uop.fuType) 524c590fb32Scz4e ) 525c590fb32Scz4e 526c590fb32Scz4e // misalignBuffer will overwrite the source from ldu if it is about to writeback 527c590fb32Scz4e val misalignWritebackOverride = Mux( 528c590fb32Scz4e loadUnits(MisalignWBPort).io.ldout.valid, 529c590fb32Scz4e loadUnits(MisalignWBPort).io.ldout.bits, 530c590fb32Scz4e loadMisalignBuffer.io.writeBack.bits 531c590fb32Scz4e ) 532c590fb32Scz4e ldaExeWbReqs(MisalignWBPort).valid := loadMisalignBuffer.io.writeBack.valid || loadUnits(MisalignWBPort).io.ldout.valid 533c590fb32Scz4e ldaExeWbReqs(MisalignWBPort).bits := misalignWritebackOverride 534c590fb32Scz4e loadMisalignBuffer.io.writeBack.ready := ldaExeWbReqs(MisalignWBPort).ready && !loadUnits(MisalignWBPort).io.ldout.valid 535c590fb32Scz4e loadMisalignBuffer.io.loadOutValid := loadUnits(MisalignWBPort).io.ldout.valid 536c590fb32Scz4e loadMisalignBuffer.io.loadVecOutValid := loadUnits(MisalignWBPort).io.vecldout.valid 537c590fb32Scz4e loadUnits(MisalignWBPort).io.ldout.ready := ldaExeWbReqs(MisalignWBPort).ready 538c590fb32Scz4e ldaExeWbReqs(MisalignWBPort).bits.isFromLoadUnit := loadUnits(MisalignWBPort).io.ldout.bits.isFromLoadUnit || loadMisalignBuffer.io.writeBack.valid 539c590fb32Scz4e 540c590fb32Scz4e // loadUnit will overwrite the source from uncache if it is about to writeback 541c590fb32Scz4e ldaExeWbReqs(UncacheWBPort) <> loadUnits(UncacheWBPort).io.ldout 542c590fb32Scz4e io.mem_to_ooo.writebackLda <> ldaExeWbReqs 543c590fb32Scz4e io.mem_to_ooo.writebackSta <> storeUnits.map(_.io.stout) 544c590fb32Scz4e io.mem_to_ooo.writebackStd.zip(stdExeUnits).foreach {x => 545c590fb32Scz4e x._1.bits := x._2.io.out.bits 546c590fb32Scz4e // AMOs do not need to write back std now. 547c590fb32Scz4e x._1.valid := x._2.io.out.fire && !FuType.storeIsAMO(x._2.io.out.bits.uop.fuType) 548c590fb32Scz4e } 549c590fb32Scz4e io.mem_to_ooo.writebackHyuLda <> hybridUnits.map(_.io.ldout) 550c590fb32Scz4e io.mem_to_ooo.writebackHyuSta <> hybridUnits.map(_.io.stout) 551c590fb32Scz4e io.mem_to_ooo.otherFastWakeup := DontCare 552c590fb32Scz4e io.mem_to_ooo.otherFastWakeup.drop(HyuCnt).take(LduCnt).zip(loadUnits.map(_.io.fast_uop)).foreach{case(a,b)=> a := b} 553c590fb32Scz4e io.mem_to_ooo.otherFastWakeup.take(HyuCnt).zip(hybridUnits.map(_.io.ldu_io.fast_uop)).foreach{case(a,b)=> a:=b} 554c590fb32Scz4e val stOut = io.mem_to_ooo.writebackSta ++ io.mem_to_ooo.writebackHyuSta 555c590fb32Scz4e 556c590fb32Scz4e // prefetch to l1 req 557c590fb32Scz4e // Stream's confidence is always 1 558c590fb32Scz4e // (LduCnt + HyuCnt) l1_pf_reqs ? 559c590fb32Scz4e loadUnits.foreach(load_unit => { 560c590fb32Scz4e load_unit.io.prefetch_req.valid <> l1_pf_req.valid 561c590fb32Scz4e load_unit.io.prefetch_req.bits <> l1_pf_req.bits 562c590fb32Scz4e }) 563c590fb32Scz4e 564c590fb32Scz4e hybridUnits.foreach(hybrid_unit => { 565c590fb32Scz4e hybrid_unit.io.ldu_io.prefetch_req.valid <> l1_pf_req.valid 566c590fb32Scz4e hybrid_unit.io.ldu_io.prefetch_req.bits <> l1_pf_req.bits 567c590fb32Scz4e }) 568c590fb32Scz4e 569c590fb32Scz4e // NOTE: loadUnits(0) has higher bank conflict and miss queue arb priority than loadUnits(1) and loadUnits(2) 570c590fb32Scz4e // when loadUnits(1)/loadUnits(2) stage 0 is busy, hw prefetch will never use that pipeline 571c590fb32Scz4e val LowConfPorts = if (LduCnt == 2) Seq(1) else if (LduCnt == 3) Seq(1, 2) else Seq(0) 572c590fb32Scz4e LowConfPorts.map{case i => loadUnits(i).io.prefetch_req.bits.confidence := 0.U} 573c590fb32Scz4e hybridUnits.foreach(hybrid_unit => { hybrid_unit.io.ldu_io.prefetch_req.bits.confidence := 0.U }) 574c590fb32Scz4e 575c590fb32Scz4e val canAcceptHighConfPrefetch = loadUnits.map(_.io.canAcceptHighConfPrefetch) ++ 576c590fb32Scz4e hybridUnits.map(_.io.canAcceptLowConfPrefetch) 577c590fb32Scz4e val canAcceptLowConfPrefetch = loadUnits.map(_.io.canAcceptLowConfPrefetch) ++ 578c590fb32Scz4e hybridUnits.map(_.io.canAcceptLowConfPrefetch) 579c590fb32Scz4e l1_pf_req.ready := (0 until LduCnt + HyuCnt).map{ 580c590fb32Scz4e case i => { 581c590fb32Scz4e if (LowConfPorts.contains(i)) { 582c590fb32Scz4e loadUnits(i).io.canAcceptLowConfPrefetch 583c590fb32Scz4e } else { 584c590fb32Scz4e Mux(l1_pf_req.bits.confidence === 1.U, canAcceptHighConfPrefetch(i), canAcceptLowConfPrefetch(i)) 585c590fb32Scz4e } 586c590fb32Scz4e } 587c590fb32Scz4e }.reduce(_ || _) 588c590fb32Scz4e 589c590fb32Scz4e // l1 pf fuzzer interface 590c590fb32Scz4e val DebugEnableL1PFFuzzer = false 591c590fb32Scz4e if (DebugEnableL1PFFuzzer) { 592c590fb32Scz4e // l1 pf req fuzzer 593c590fb32Scz4e val fuzzer = Module(new L1PrefetchFuzzer()) 594c590fb32Scz4e fuzzer.io.vaddr := DontCare 595c590fb32Scz4e fuzzer.io.paddr := DontCare 596c590fb32Scz4e 597c590fb32Scz4e // override load_unit prefetch_req 598c590fb32Scz4e loadUnits.foreach(load_unit => { 599c590fb32Scz4e load_unit.io.prefetch_req.valid <> fuzzer.io.req.valid 600c590fb32Scz4e load_unit.io.prefetch_req.bits <> fuzzer.io.req.bits 601c590fb32Scz4e }) 602c590fb32Scz4e 603c590fb32Scz4e // override hybrid_unit prefetch_req 604c590fb32Scz4e hybridUnits.foreach(hybrid_unit => { 605c590fb32Scz4e hybrid_unit.io.ldu_io.prefetch_req.valid <> fuzzer.io.req.valid 606c590fb32Scz4e hybrid_unit.io.ldu_io.prefetch_req.bits <> fuzzer.io.req.bits 607c590fb32Scz4e }) 608c590fb32Scz4e 609c590fb32Scz4e fuzzer.io.req.ready := l1_pf_req.ready 610c590fb32Scz4e } 611c590fb32Scz4e 612c590fb32Scz4e // TODO: fast load wakeup 613c590fb32Scz4e val lsq = Module(new LsqWrapper) 614c590fb32Scz4e val sbuffer = Module(new Sbuffer) 615c590fb32Scz4e // if you wants to stress test dcache store, use FakeSbuffer 616c590fb32Scz4e // val sbuffer = Module(new FakeSbuffer) // out of date now 617c590fb32Scz4e io.mem_to_ooo.stIssuePtr := lsq.io.issuePtrExt 618c590fb32Scz4e 619c590fb32Scz4e dcache.io.hartId := io.hartId 620c590fb32Scz4e lsq.io.hartId := io.hartId 621c590fb32Scz4e sbuffer.io.hartId := io.hartId 622c590fb32Scz4e atomicsUnit.io.hartId := io.hartId 623c590fb32Scz4e 624c590fb32Scz4e dcache.io.lqEmpty := lsq.io.lqEmpty 625c590fb32Scz4e 626c590fb32Scz4e // load/store prefetch to l2 cache 627c590fb32Scz4e prefetcherOpt.foreach(sms_pf => { 628c590fb32Scz4e l1PrefetcherOpt.foreach(l1_pf => { 629c590fb32Scz4e val sms_pf_to_l2 = DelayNWithValid(sms_pf.io.l2_req, 2) 630c590fb32Scz4e val l1_pf_to_l2 = DelayNWithValid(l1_pf.io.l2_req, 2) 631c590fb32Scz4e 632c590fb32Scz4e outer.l2_pf_sender_opt.get.out.head._1.addr_valid := sms_pf_to_l2.valid || l1_pf_to_l2.valid 633c590fb32Scz4e outer.l2_pf_sender_opt.get.out.head._1.addr := Mux(l1_pf_to_l2.valid, l1_pf_to_l2.bits.addr, sms_pf_to_l2.bits.addr) 634c590fb32Scz4e outer.l2_pf_sender_opt.get.out.head._1.pf_source := Mux(l1_pf_to_l2.valid, l1_pf_to_l2.bits.source, sms_pf_to_l2.bits.source) 635c590fb32Scz4e outer.l2_pf_sender_opt.get.out.head._1.l2_pf_en := RegNextN(io.ooo_to_mem.csrCtrl.pf_ctrl.l2_pf_enable, 2, Some(true.B)) 636c590fb32Scz4e 637c590fb32Scz4e val l2_trace = Wire(new LoadPfDbBundle) 638c590fb32Scz4e l2_trace.paddr := outer.l2_pf_sender_opt.get.out.head._1.addr 639c590fb32Scz4e val table = ChiselDB.createTable(s"L2PrefetchTrace$hartId", new LoadPfDbBundle, basicDB = false) 640c590fb32Scz4e table.log(l2_trace, l1_pf_to_l2.valid, "StreamPrefetchTrace", clock, reset) 641c590fb32Scz4e table.log(l2_trace, !l1_pf_to_l2.valid && sms_pf_to_l2.valid, "L2PrefetchTrace", clock, reset) 642c590fb32Scz4e 643c590fb32Scz4e val l1_pf_to_l3 = ValidIODelay(l1_pf.io.l3_req, 4) 644c590fb32Scz4e outer.l3_pf_sender_opt.foreach(_.out.head._1.addr_valid := l1_pf_to_l3.valid) 645c590fb32Scz4e outer.l3_pf_sender_opt.foreach(_.out.head._1.addr := l1_pf_to_l3.bits) 646c590fb32Scz4e outer.l3_pf_sender_opt.foreach(_.out.head._1.l2_pf_en := RegNextN(io.ooo_to_mem.csrCtrl.pf_ctrl.l2_pf_enable, 4, Some(true.B))) 647c590fb32Scz4e 648c590fb32Scz4e val l3_trace = Wire(new LoadPfDbBundle) 649c590fb32Scz4e l3_trace.paddr := outer.l3_pf_sender_opt.map(_.out.head._1.addr).getOrElse(0.U) 650c590fb32Scz4e val l3_table = ChiselDB.createTable(s"L3PrefetchTrace$hartId", new LoadPfDbBundle, basicDB = false) 651c590fb32Scz4e l3_table.log(l3_trace, l1_pf_to_l3.valid, "StreamPrefetchTrace", clock, reset) 652c590fb32Scz4e 653c590fb32Scz4e XSPerfAccumulate("prefetch_fire_l2", outer.l2_pf_sender_opt.get.out.head._1.addr_valid) 654c590fb32Scz4e XSPerfAccumulate("prefetch_fire_l3", outer.l3_pf_sender_opt.map(_.out.head._1.addr_valid).getOrElse(false.B)) 655c590fb32Scz4e XSPerfAccumulate("l1pf_fire_l2", l1_pf_to_l2.valid) 656c590fb32Scz4e XSPerfAccumulate("sms_fire_l2", !l1_pf_to_l2.valid && sms_pf_to_l2.valid) 657c590fb32Scz4e XSPerfAccumulate("sms_block_by_l1pf", l1_pf_to_l2.valid && sms_pf_to_l2.valid) 658c590fb32Scz4e }) 659c590fb32Scz4e }) 660c590fb32Scz4e 661c590fb32Scz4e // ptw 662c590fb32Scz4e val sfence = RegNext(RegNext(io.ooo_to_mem.sfence)) 663c590fb32Scz4e val tlbcsr = RegNext(RegNext(io.ooo_to_mem.tlbCsr)) 664c590fb32Scz4e private val ptw = outer.ptw.module 665c590fb32Scz4e private val ptw_to_l2_buffer = outer.ptw_to_l2_buffer.module 666c590fb32Scz4e private val l1d_to_l2_buffer = outer.l1d_to_l2_buffer.module 667c590fb32Scz4e ptw.io.hartId := io.hartId 668c590fb32Scz4e ptw.io.sfence <> sfence 669c590fb32Scz4e ptw.io.csr.tlb <> tlbcsr 670c590fb32Scz4e ptw.io.csr.distribute_csr <> csrCtrl.distribute_csr 671c590fb32Scz4e 672c590fb32Scz4e val perfEventsPTW = if (!coreParams.softPTW) { 673c590fb32Scz4e ptw.getPerfEvents 674c590fb32Scz4e } else { 675c590fb32Scz4e Seq() 676c590fb32Scz4e } 677c590fb32Scz4e 678c590fb32Scz4e // dtlb 679c590fb32Scz4e val dtlb_ld_tlb_ld = Module(new TLBNonBlock(LduCnt + HyuCnt + 1, 2, ldtlbParams)) 680c590fb32Scz4e val dtlb_st_tlb_st = Module(new TLBNonBlock(StaCnt, 1, sttlbParams)) 681c590fb32Scz4e val dtlb_prefetch_tlb_prefetch = Module(new TLBNonBlock(2, 2, pftlbParams)) 682c590fb32Scz4e val dtlb_ld = Seq(dtlb_ld_tlb_ld.io) 683c590fb32Scz4e val dtlb_st = Seq(dtlb_st_tlb_st.io) 684c590fb32Scz4e val dtlb_prefetch = Seq(dtlb_prefetch_tlb_prefetch.io) 685c590fb32Scz4e /* tlb vec && constant variable */ 686c590fb32Scz4e val dtlb = dtlb_ld ++ dtlb_st ++ dtlb_prefetch 687c590fb32Scz4e val (dtlb_ld_idx, dtlb_st_idx, dtlb_pf_idx) = (0, 1, 2) 688c590fb32Scz4e val TlbSubSizeVec = Seq(LduCnt + HyuCnt + 1, StaCnt, 2) // (load + hyu + stream pf, store, sms+l2bop) 689c590fb32Scz4e val DTlbSize = TlbSubSizeVec.sum 690c590fb32Scz4e val TlbStartVec = TlbSubSizeVec.scanLeft(0)(_ + _).dropRight(1) 691c590fb32Scz4e val TlbEndVec = TlbSubSizeVec.scanLeft(0)(_ + _).drop(1) 692c590fb32Scz4e 693c590fb32Scz4e val ptwio = Wire(new VectorTlbPtwIO(DTlbSize)) 694c590fb32Scz4e val dtlb_reqs = dtlb.map(_.requestor).flatten 695c590fb32Scz4e val dtlb_pmps = dtlb.map(_.pmp).flatten 696c590fb32Scz4e dtlb.map(_.hartId := io.hartId) 697c590fb32Scz4e dtlb.map(_.sfence := sfence) 698c590fb32Scz4e dtlb.map(_.csr := tlbcsr) 699c590fb32Scz4e dtlb.map(_.flushPipe.map(a => a := false.B)) // non-block doesn't need 700c590fb32Scz4e dtlb.map(_.redirect := redirect) 701c590fb32Scz4e if (refillBothTlb) { 702c590fb32Scz4e require(ldtlbParams.outReplace == sttlbParams.outReplace) 703c590fb32Scz4e require(ldtlbParams.outReplace == hytlbParams.outReplace) 704c590fb32Scz4e require(ldtlbParams.outReplace == pftlbParams.outReplace) 705c590fb32Scz4e require(ldtlbParams.outReplace) 706c590fb32Scz4e 707c590fb32Scz4e val replace = Module(new TlbReplace(DTlbSize, ldtlbParams)) 708c590fb32Scz4e replace.io.apply_sep(dtlb_ld.map(_.replace) ++ dtlb_st.map(_.replace) ++ dtlb_prefetch.map(_.replace), ptwio.resp.bits.data.s1.entry.tag) 709c590fb32Scz4e } else { 710c590fb32Scz4e // TODO: there will be bugs in TlbReplace when outReplace enable, since the order of Hyu is not right. 711c590fb32Scz4e if (ldtlbParams.outReplace) { 712c590fb32Scz4e val replace_ld = Module(new TlbReplace(LduCnt + 1, ldtlbParams)) 713c590fb32Scz4e replace_ld.io.apply_sep(dtlb_ld.map(_.replace), ptwio.resp.bits.data.s1.entry.tag) 714c590fb32Scz4e } 715c590fb32Scz4e if (hytlbParams.outReplace) { 716c590fb32Scz4e val replace_hy = Module(new TlbReplace(HyuCnt, hytlbParams)) 717c590fb32Scz4e replace_hy.io.apply_sep(dtlb_ld.map(_.replace), ptwio.resp.bits.data.s1.entry.tag) 718c590fb32Scz4e } 719c590fb32Scz4e if (sttlbParams.outReplace) { 720c590fb32Scz4e val replace_st = Module(new TlbReplace(StaCnt, sttlbParams)) 721c590fb32Scz4e replace_st.io.apply_sep(dtlb_st.map(_.replace), ptwio.resp.bits.data.s1.entry.tag) 722c590fb32Scz4e } 723c590fb32Scz4e if (pftlbParams.outReplace) { 724c590fb32Scz4e val replace_pf = Module(new TlbReplace(2, pftlbParams)) 725c590fb32Scz4e replace_pf.io.apply_sep(dtlb_prefetch.map(_.replace), ptwio.resp.bits.data.s1.entry.tag) 726c590fb32Scz4e } 727c590fb32Scz4e } 728c590fb32Scz4e 729c590fb32Scz4e val ptw_resp_next = RegEnable(ptwio.resp.bits, ptwio.resp.valid) 730c590fb32Scz4e val ptw_resp_v = RegNext(ptwio.resp.valid && !(sfence.valid && tlbcsr.satp.changed && tlbcsr.vsatp.changed && tlbcsr.hgatp.changed), init = false.B) 731c590fb32Scz4e ptwio.resp.ready := true.B 732c590fb32Scz4e 733c590fb32Scz4e val tlbreplay = WireInit(VecInit(Seq.fill(LdExuCnt)(false.B))) 734c590fb32Scz4e val tlbreplay_reg = GatedValidRegNext(tlbreplay) 735c590fb32Scz4e val dtlb_ld0_tlbreplay_reg = GatedValidRegNext(dtlb_ld(0).tlbreplay) 736c590fb32Scz4e 737c590fb32Scz4e if (backendParams.debugEn){ dontTouch(tlbreplay) } 738c590fb32Scz4e 739c590fb32Scz4e for (i <- 0 until LdExuCnt) { 740c590fb32Scz4e tlbreplay(i) := dtlb_ld(0).ptw.req(i).valid && ptw_resp_next.vector(0) && ptw_resp_v && 741c590fb32Scz4e ptw_resp_next.data.hit(dtlb_ld(0).ptw.req(i).bits.vpn, tlbcsr.satp.asid, tlbcsr.vsatp.asid, tlbcsr.hgatp.vmid, allType = true, ignoreAsid = true) 742c590fb32Scz4e } 743c590fb32Scz4e 744c590fb32Scz4e dtlb.flatMap(a => a.ptw.req) 745c590fb32Scz4e .zipWithIndex 746c590fb32Scz4e .foreach{ case (tlb, i) => 747c590fb32Scz4e tlb.ready := ptwio.req(i).ready 748c590fb32Scz4e ptwio.req(i).bits := tlb.bits 749c590fb32Scz4e val vector_hit = if (refillBothTlb) Cat(ptw_resp_next.vector).orR 750c590fb32Scz4e else if (i < TlbEndVec(dtlb_ld_idx)) Cat(ptw_resp_next.vector.slice(TlbStartVec(dtlb_ld_idx), TlbEndVec(dtlb_ld_idx))).orR 751c590fb32Scz4e else if (i < TlbEndVec(dtlb_st_idx)) Cat(ptw_resp_next.vector.slice(TlbStartVec(dtlb_st_idx), TlbEndVec(dtlb_st_idx))).orR 752c590fb32Scz4e else Cat(ptw_resp_next.vector.slice(TlbStartVec(dtlb_pf_idx), TlbEndVec(dtlb_pf_idx))).orR 753c590fb32Scz4e ptwio.req(i).valid := tlb.valid && !(ptw_resp_v && vector_hit && ptw_resp_next.data.hit(tlb.bits.vpn, tlbcsr.satp.asid, tlbcsr.vsatp.asid, tlbcsr.hgatp.vmid, allType = true, ignoreAsid = true)) 754c590fb32Scz4e } 755c590fb32Scz4e dtlb.foreach(_.ptw.resp.bits := ptw_resp_next.data) 756c590fb32Scz4e if (refillBothTlb) { 757c590fb32Scz4e dtlb.foreach(_.ptw.resp.valid := ptw_resp_v && Cat(ptw_resp_next.vector).orR) 758c590fb32Scz4e } else { 759c590fb32Scz4e dtlb_ld.foreach(_.ptw.resp.valid := ptw_resp_v && Cat(ptw_resp_next.vector.slice(TlbStartVec(dtlb_ld_idx), TlbEndVec(dtlb_ld_idx))).orR) 760c590fb32Scz4e dtlb_st.foreach(_.ptw.resp.valid := ptw_resp_v && Cat(ptw_resp_next.vector.slice(TlbStartVec(dtlb_st_idx), TlbEndVec(dtlb_st_idx))).orR) 761c590fb32Scz4e dtlb_prefetch.foreach(_.ptw.resp.valid := ptw_resp_v && Cat(ptw_resp_next.vector.slice(TlbStartVec(dtlb_pf_idx), TlbEndVec(dtlb_pf_idx))).orR) 762c590fb32Scz4e } 763c590fb32Scz4e dtlb_ld.foreach(_.ptw.resp.bits.getGpa := Cat(ptw_resp_next.getGpa.take(LduCnt + HyuCnt + 1)).orR) 764c590fb32Scz4e dtlb_st.foreach(_.ptw.resp.bits.getGpa := Cat(ptw_resp_next.getGpa.slice(LduCnt + HyuCnt + 1, LduCnt + HyuCnt + 1 + StaCnt)).orR) 765c590fb32Scz4e dtlb_prefetch.foreach(_.ptw.resp.bits.getGpa := Cat(ptw_resp_next.getGpa.drop(LduCnt + HyuCnt + 1 + StaCnt)).orR) 766c590fb32Scz4e 767c590fb32Scz4e val dtlbRepeater = PTWNewFilter(ldtlbParams.fenceDelay, ptwio, ptw.io.tlb(1), sfence, tlbcsr, l2tlbParams.dfilterSize) 768c590fb32Scz4e val itlbRepeater3 = PTWRepeaterNB(passReady = false, itlbParams.fenceDelay, io.fetch_to_mem.itlb, ptw.io.tlb(0), sfence, tlbcsr) 769c590fb32Scz4e 770c590fb32Scz4e lsq.io.debugTopDown.robHeadMissInDTlb := dtlbRepeater.io.rob_head_miss_in_tlb 771c590fb32Scz4e 772c590fb32Scz4e // pmp 773c590fb32Scz4e val pmp = Module(new PMP()) 774c590fb32Scz4e pmp.io.distribute_csr <> csrCtrl.distribute_csr 775c590fb32Scz4e 776c590fb32Scz4e val pmp_checkers = Seq.fill(DTlbSize)(Module(new PMPChecker(4, leaveHitMux = true))) 777c590fb32Scz4e val pmp_check = pmp_checkers.map(_.io) 778c590fb32Scz4e for ((p,d) <- pmp_check zip dtlb_pmps) { 7798882eb68SXin Tian if (HasBitmapCheck) { 7808882eb68SXin Tian p.apply(tlbcsr.mbmc.CMODE.asBool, tlbcsr.priv.dmode, pmp.io.pmp, pmp.io.pma, d) 7818882eb68SXin Tian } else { 782c590fb32Scz4e p.apply(tlbcsr.priv.dmode, pmp.io.pmp, pmp.io.pma, d) 7838882eb68SXin Tian } 784c590fb32Scz4e require(p.req.bits.size.getWidth == d.bits.size.getWidth) 785c590fb32Scz4e } 786c590fb32Scz4e 787c590fb32Scz4e for (i <- 0 until LduCnt) { 788c590fb32Scz4e io.debug_ls.debugLsInfo(i) := loadUnits(i).io.debug_ls 789c590fb32Scz4e } 790c590fb32Scz4e for (i <- 0 until HyuCnt) { 791c590fb32Scz4e io.debug_ls.debugLsInfo.drop(LduCnt)(i) := hybridUnits(i).io.ldu_io.debug_ls 792c590fb32Scz4e } 793c590fb32Scz4e for (i <- 0 until StaCnt) { 794c590fb32Scz4e io.debug_ls.debugLsInfo.drop(LduCnt + HyuCnt)(i) := storeUnits(i).io.debug_ls 795c590fb32Scz4e } 796c590fb32Scz4e for (i <- 0 until HyuCnt) { 797c590fb32Scz4e io.debug_ls.debugLsInfo.drop(LduCnt + HyuCnt + StaCnt)(i) := hybridUnits(i).io.stu_io.debug_ls 798c590fb32Scz4e } 799c590fb32Scz4e 800c590fb32Scz4e io.mem_to_ooo.lsTopdownInfo := loadUnits.map(_.io.lsTopdownInfo) ++ hybridUnits.map(_.io.ldu_io.lsTopdownInfo) 801c590fb32Scz4e 802c590fb32Scz4e // trigger 803c590fb32Scz4e val tdata = RegInit(VecInit(Seq.fill(TriggerNum)(0.U.asTypeOf(new MatchTriggerIO)))) 804c590fb32Scz4e val tEnable = RegInit(VecInit(Seq.fill(TriggerNum)(false.B))) 805c590fb32Scz4e tEnable := csrCtrl.mem_trigger.tEnableVec 806c590fb32Scz4e when(csrCtrl.mem_trigger.tUpdate.valid) { 807c590fb32Scz4e tdata(csrCtrl.mem_trigger.tUpdate.bits.addr) := csrCtrl.mem_trigger.tUpdate.bits.tdata 808c590fb32Scz4e } 809c590fb32Scz4e val triggerCanRaiseBpExp = csrCtrl.mem_trigger.triggerCanRaiseBpExp 810c590fb32Scz4e val debugMode = csrCtrl.mem_trigger.debugMode 811c590fb32Scz4e 812c590fb32Scz4e val backendTriggerTimingVec = VecInit(tdata.map(_.timing)) 813c590fb32Scz4e val backendTriggerChainVec = VecInit(tdata.map(_.chain)) 814c590fb32Scz4e 815c590fb32Scz4e XSDebug(tEnable.asUInt.orR, "Debug Mode: At least one store trigger is enabled\n") 816c590fb32Scz4e for (j <- 0 until TriggerNum) 817c590fb32Scz4e PrintTriggerInfo(tEnable(j), tdata(j)) 818c590fb32Scz4e 819c590fb32Scz4e // The segment instruction is executed atomically. 820c590fb32Scz4e // After the segment instruction directive starts executing, no other instructions should be executed. 821c590fb32Scz4e val vSegmentFlag = RegInit(false.B) 822c590fb32Scz4e 823c590fb32Scz4e when(GatedValidRegNext(vSegmentUnit.io.in.fire)) { 824c590fb32Scz4e vSegmentFlag := true.B 825c590fb32Scz4e }.elsewhen(GatedValidRegNext(vSegmentUnit.io.uopwriteback.valid)) { 826c590fb32Scz4e vSegmentFlag := false.B 827c590fb32Scz4e } 828c590fb32Scz4e 829522c7f99SAnzo val misalign_allow_spec = RegInit(true.B) 830522c7f99SAnzo val ldu_rollback_with_misalign_nack = loadUnits.map(ldu => 831522c7f99SAnzo ldu.io.lsq.ldin.bits.isFrmMisAlignBuf && ldu.io.lsq.ldin.bits.rep_info.rar_nack && ldu.io.rollback.valid 832522c7f99SAnzo ).reduce(_ || _) 833522c7f99SAnzo when (ldu_rollback_with_misalign_nack) { 834522c7f99SAnzo misalign_allow_spec := false.B 835522c7f99SAnzo } .elsewhen(lsq.io.rarValidCount < (LoadQueueRARSize - 4).U) { 836522c7f99SAnzo misalign_allow_spec := true.B 837522c7f99SAnzo } 838522c7f99SAnzo 839c590fb32Scz4e // LoadUnit 840c590fb32Scz4e val correctMissTrain = Constantin.createRecord(s"CorrectMissTrain$hartId", initValue = false) 841c590fb32Scz4e 842c590fb32Scz4e for (i <- 0 until LduCnt) { 843c590fb32Scz4e loadUnits(i).io.redirect <> redirect 844522c7f99SAnzo loadUnits(i).io.misalign_allow_spec := misalign_allow_spec 845c590fb32Scz4e 846c590fb32Scz4e // get input form dispatch 847c590fb32Scz4e loadUnits(i).io.ldin <> io.ooo_to_mem.issueLda(i) 848c590fb32Scz4e loadUnits(i).io.feedback_slow <> io.mem_to_ooo.ldaIqFeedback(i).feedbackSlow 849c590fb32Scz4e io.mem_to_ooo.ldaIqFeedback(i).feedbackFast := DontCare 850c590fb32Scz4e loadUnits(i).io.correctMissTrain := correctMissTrain 851c590fb32Scz4e io.mem_to_ooo.ldCancel.drop(HyuCnt)(i) := loadUnits(i).io.ldCancel 852c590fb32Scz4e io.mem_to_ooo.wakeup.drop(HyuCnt)(i) := loadUnits(i).io.wakeup 853c590fb32Scz4e 854c590fb32Scz4e // vector 855c590fb32Scz4e if (i < VlduCnt) { 856c590fb32Scz4e loadUnits(i).io.vecldout.ready := false.B 857c590fb32Scz4e } else { 858c590fb32Scz4e loadUnits(i).io.vecldin.valid := false.B 859c590fb32Scz4e loadUnits(i).io.vecldin.bits := DontCare 860c590fb32Scz4e loadUnits(i).io.vecldout.ready := false.B 861c590fb32Scz4e } 862c590fb32Scz4e 863c590fb32Scz4e // fast replay 864c590fb32Scz4e loadUnits(i).io.fast_rep_in <> loadUnits(i).io.fast_rep_out 865c590fb32Scz4e 866c590fb32Scz4e // SoftPrefetch to frontend (prefetch.i) 867c590fb32Scz4e loadUnits(i).io.ifetchPrefetch <> io.ifetchPrefetch(i) 868c590fb32Scz4e 869c590fb32Scz4e // dcache access 870c590fb32Scz4e loadUnits(i).io.dcache <> dcache.io.lsu.load(i) 871c590fb32Scz4e if(i == 0){ 872c590fb32Scz4e vSegmentUnit.io.rdcache := DontCare 873c590fb32Scz4e dcache.io.lsu.load(i).req.valid := loadUnits(i).io.dcache.req.valid || vSegmentUnit.io.rdcache.req.valid 874c590fb32Scz4e dcache.io.lsu.load(i).req.bits := Mux1H(Seq( 875c590fb32Scz4e vSegmentUnit.io.rdcache.req.valid -> vSegmentUnit.io.rdcache.req.bits, 876c590fb32Scz4e loadUnits(i).io.dcache.req.valid -> loadUnits(i).io.dcache.req.bits 877c590fb32Scz4e )) 878c590fb32Scz4e vSegmentUnit.io.rdcache.req.ready := dcache.io.lsu.load(i).req.ready 879c590fb32Scz4e } 880c590fb32Scz4e 881c590fb32Scz4e // Dcache requests must also be preempted by the segment. 882c590fb32Scz4e when(vSegmentFlag){ 883c590fb32Scz4e loadUnits(i).io.dcache.req.ready := false.B // Dcache is preempted. 884c590fb32Scz4e 885c590fb32Scz4e dcache.io.lsu.load(0).pf_source := vSegmentUnit.io.rdcache.pf_source 886c590fb32Scz4e dcache.io.lsu.load(0).s1_paddr_dup_lsu := vSegmentUnit.io.rdcache.s1_paddr_dup_lsu 887c590fb32Scz4e dcache.io.lsu.load(0).s1_paddr_dup_dcache := vSegmentUnit.io.rdcache.s1_paddr_dup_dcache 888c590fb32Scz4e dcache.io.lsu.load(0).s1_kill := vSegmentUnit.io.rdcache.s1_kill 889c590fb32Scz4e dcache.io.lsu.load(0).s2_kill := vSegmentUnit.io.rdcache.s2_kill 890c590fb32Scz4e dcache.io.lsu.load(0).s0_pc := vSegmentUnit.io.rdcache.s0_pc 891c590fb32Scz4e dcache.io.lsu.load(0).s1_pc := vSegmentUnit.io.rdcache.s1_pc 892c590fb32Scz4e dcache.io.lsu.load(0).s2_pc := vSegmentUnit.io.rdcache.s2_pc 893c590fb32Scz4e dcache.io.lsu.load(0).is128Req := vSegmentUnit.io.rdcache.is128Req 894c590fb32Scz4e }.otherwise { 895c590fb32Scz4e loadUnits(i).io.dcache.req.ready := dcache.io.lsu.load(i).req.ready 896c590fb32Scz4e 897c590fb32Scz4e dcache.io.lsu.load(0).pf_source := loadUnits(0).io.dcache.pf_source 898c590fb32Scz4e dcache.io.lsu.load(0).s1_paddr_dup_lsu := loadUnits(0).io.dcache.s1_paddr_dup_lsu 899c590fb32Scz4e dcache.io.lsu.load(0).s1_paddr_dup_dcache := loadUnits(0).io.dcache.s1_paddr_dup_dcache 900c590fb32Scz4e dcache.io.lsu.load(0).s1_kill := loadUnits(0).io.dcache.s1_kill 901c590fb32Scz4e dcache.io.lsu.load(0).s2_kill := loadUnits(0).io.dcache.s2_kill 902c590fb32Scz4e dcache.io.lsu.load(0).s0_pc := loadUnits(0).io.dcache.s0_pc 903c590fb32Scz4e dcache.io.lsu.load(0).s1_pc := loadUnits(0).io.dcache.s1_pc 904c590fb32Scz4e dcache.io.lsu.load(0).s2_pc := loadUnits(0).io.dcache.s2_pc 905c590fb32Scz4e dcache.io.lsu.load(0).is128Req := loadUnits(0).io.dcache.is128Req 906c590fb32Scz4e } 907c590fb32Scz4e 908c590fb32Scz4e // forward 909c590fb32Scz4e loadUnits(i).io.lsq.forward <> lsq.io.forward(i) 910c590fb32Scz4e loadUnits(i).io.sbuffer <> sbuffer.io.forward(i) 911c590fb32Scz4e loadUnits(i).io.ubuffer <> uncache.io.forward(i) 912c590fb32Scz4e loadUnits(i).io.tl_d_channel := dcache.io.lsu.forward_D(i) 913c590fb32Scz4e loadUnits(i).io.forward_mshr <> dcache.io.lsu.forward_mshr(i) 914c590fb32Scz4e // ld-ld violation check 915c590fb32Scz4e loadUnits(i).io.lsq.ldld_nuke_query <> lsq.io.ldu.ldld_nuke_query(i) 916c590fb32Scz4e loadUnits(i).io.lsq.stld_nuke_query <> lsq.io.ldu.stld_nuke_query(i) 917522c7f99SAnzo // loadqueue old ptr 918522c7f99SAnzo loadUnits(i).io.lsq.lqDeqPtr := lsq.io.lqDeqPtr 919c590fb32Scz4e loadUnits(i).io.csrCtrl <> csrCtrl 920c590fb32Scz4e // dcache refill req 921c590fb32Scz4e // loadUnits(i).io.refill <> delayedDcacheRefill 922c590fb32Scz4e // dtlb 923c590fb32Scz4e loadUnits(i).io.tlb <> dtlb_reqs.take(LduCnt)(i) 924c590fb32Scz4e if(i == 0 ){ // port 0 assign to vsegmentUnit 925c590fb32Scz4e val vsegmentDtlbReqValid = vSegmentUnit.io.dtlb.req.valid // segment tlb resquest need to delay 1 cycle 926c590fb32Scz4e dtlb_reqs.take(LduCnt)(i).req.valid := loadUnits(i).io.tlb.req.valid || RegNext(vsegmentDtlbReqValid) 927c590fb32Scz4e vSegmentUnit.io.dtlb.req.ready := dtlb_reqs.take(LduCnt)(i).req.ready 928c590fb32Scz4e dtlb_reqs.take(LduCnt)(i).req.bits := ParallelPriorityMux(Seq( 929c590fb32Scz4e RegNext(vsegmentDtlbReqValid) -> RegEnable(vSegmentUnit.io.dtlb.req.bits, vsegmentDtlbReqValid), 930c590fb32Scz4e loadUnits(i).io.tlb.req.valid -> loadUnits(i).io.tlb.req.bits 931c590fb32Scz4e )) 932c590fb32Scz4e } 933c590fb32Scz4e // pmp 934c590fb32Scz4e loadUnits(i).io.pmp <> pmp_check(i).resp 935c590fb32Scz4e // st-ld violation query 936c590fb32Scz4e val stld_nuke_query = storeUnits.map(_.io.stld_nuke_query) ++ hybridUnits.map(_.io.stu_io.stld_nuke_query) 937c590fb32Scz4e for (s <- 0 until StorePipelineWidth) { 938c590fb32Scz4e loadUnits(i).io.stld_nuke_query(s) := stld_nuke_query(s) 939c590fb32Scz4e } 940c590fb32Scz4e loadUnits(i).io.lq_rep_full <> lsq.io.lq_rep_full 941c590fb32Scz4e // load prefetch train 942c590fb32Scz4e prefetcherOpt.foreach(pf => { 943c590fb32Scz4e // sms will train on all miss load sources 944c590fb32Scz4e val source = loadUnits(i).io.prefetch_train 945c590fb32Scz4e pf.io.ld_in(i).valid := Mux(pf_train_on_hit, 946c590fb32Scz4e source.valid, 947c590fb32Scz4e source.valid && source.bits.isFirstIssue && source.bits.miss 948c590fb32Scz4e ) 949c590fb32Scz4e pf.io.ld_in(i).bits := source.bits 950c590fb32Scz4e val loadPc = RegNext(io.ooo_to_mem.issueLda(i).bits.uop.pc) // for s1 951c590fb32Scz4e pf.io.ld_in(i).bits.uop.pc := Mux( 952c590fb32Scz4e loadUnits(i).io.s2_ptr_chasing, 953c590fb32Scz4e RegEnable(loadPc, loadUnits(i).io.s2_prefetch_spec), 954c590fb32Scz4e RegEnable(RegEnable(loadPc, loadUnits(i).io.s1_prefetch_spec), loadUnits(i).io.s2_prefetch_spec) 955c590fb32Scz4e ) 956c590fb32Scz4e }) 957c590fb32Scz4e l1PrefetcherOpt.foreach(pf => { 958c590fb32Scz4e // stream will train on all load sources 959c590fb32Scz4e val source = loadUnits(i).io.prefetch_train_l1 960c590fb32Scz4e pf.io.ld_in(i).valid := source.valid && source.bits.isFirstIssue 961c590fb32Scz4e pf.io.ld_in(i).bits := source.bits 962c590fb32Scz4e }) 963c590fb32Scz4e 964c590fb32Scz4e // load to load fast forward: load(i) prefers data(i) 965c590fb32Scz4e val l2l_fwd_out = loadUnits.map(_.io.l2l_fwd_out) ++ hybridUnits.map(_.io.ldu_io.l2l_fwd_out) 966c590fb32Scz4e val fastPriority = (i until LduCnt + HyuCnt) ++ (0 until i) 967c590fb32Scz4e val fastValidVec = fastPriority.map(j => l2l_fwd_out(j).valid) 968c590fb32Scz4e val fastDataVec = fastPriority.map(j => l2l_fwd_out(j).data) 969c590fb32Scz4e val fastErrorVec = fastPriority.map(j => l2l_fwd_out(j).dly_ld_err) 970c590fb32Scz4e val fastMatchVec = fastPriority.map(j => io.ooo_to_mem.loadFastMatch(i)(j)) 971c590fb32Scz4e loadUnits(i).io.l2l_fwd_in.valid := VecInit(fastValidVec).asUInt.orR 972c590fb32Scz4e loadUnits(i).io.l2l_fwd_in.data := ParallelPriorityMux(fastValidVec, fastDataVec) 973c590fb32Scz4e loadUnits(i).io.l2l_fwd_in.dly_ld_err := ParallelPriorityMux(fastValidVec, fastErrorVec) 974c590fb32Scz4e val fastMatch = ParallelPriorityMux(fastValidVec, fastMatchVec) 975c590fb32Scz4e loadUnits(i).io.ld_fast_match := fastMatch 976c590fb32Scz4e loadUnits(i).io.ld_fast_imm := io.ooo_to_mem.loadFastImm(i) 977c590fb32Scz4e loadUnits(i).io.ld_fast_fuOpType := io.ooo_to_mem.loadFastFuOpType(i) 978c590fb32Scz4e loadUnits(i).io.replay <> lsq.io.replay(i) 979c590fb32Scz4e 980c590fb32Scz4e val l2_hint = RegNext(io.l2_hint) 981c590fb32Scz4e 982c590fb32Scz4e // L2 Hint for DCache 983c590fb32Scz4e dcache.io.l2_hint <> l2_hint 984c590fb32Scz4e 985c590fb32Scz4e loadUnits(i).io.l2_hint <> l2_hint 986c590fb32Scz4e loadUnits(i).io.tlb_hint.id := dtlbRepeater.io.hint.get.req(i).id 987c590fb32Scz4e loadUnits(i).io.tlb_hint.full := dtlbRepeater.io.hint.get.req(i).full || 988c590fb32Scz4e tlbreplay_reg(i) || dtlb_ld0_tlbreplay_reg(i) 989c590fb32Scz4e 990c590fb32Scz4e // passdown to lsq (load s2) 991c590fb32Scz4e lsq.io.ldu.ldin(i) <> loadUnits(i).io.lsq.ldin 992c590fb32Scz4e if (i == UncacheWBPort) { 993c590fb32Scz4e lsq.io.ldout(i) <> loadUnits(i).io.lsq.uncache 994c590fb32Scz4e } else { 995c590fb32Scz4e lsq.io.ldout(i).ready := true.B 996c590fb32Scz4e loadUnits(i).io.lsq.uncache.valid := false.B 997c590fb32Scz4e loadUnits(i).io.lsq.uncache.bits := DontCare 998c590fb32Scz4e } 999c590fb32Scz4e lsq.io.ld_raw_data(i) <> loadUnits(i).io.lsq.ld_raw_data 1000c590fb32Scz4e lsq.io.ncOut(i) <> loadUnits(i).io.lsq.nc_ldin 1001c590fb32Scz4e lsq.io.l2_hint.valid := l2_hint.valid 1002c590fb32Scz4e lsq.io.l2_hint.bits.sourceId := l2_hint.bits.sourceId 1003c590fb32Scz4e lsq.io.l2_hint.bits.isKeyword := l2_hint.bits.isKeyword 1004c590fb32Scz4e 1005c590fb32Scz4e lsq.io.tlb_hint <> dtlbRepeater.io.hint.get 1006c590fb32Scz4e 1007c590fb32Scz4e // connect misalignBuffer 10084ec1f462Scz4e loadMisalignBuffer.io.enq(i) <> loadUnits(i).io.misalign_enq 1009c590fb32Scz4e 1010c590fb32Scz4e if (i == MisalignWBPort) { 1011c590fb32Scz4e loadUnits(i).io.misalign_ldin <> loadMisalignBuffer.io.splitLoadReq 1012c590fb32Scz4e loadUnits(i).io.misalign_ldout <> loadMisalignBuffer.io.splitLoadResp 1013c590fb32Scz4e } else { 1014c590fb32Scz4e loadUnits(i).io.misalign_ldin.valid := false.B 1015c590fb32Scz4e loadUnits(i).io.misalign_ldin.bits := DontCare 1016c590fb32Scz4e } 1017c590fb32Scz4e 1018c590fb32Scz4e // alter writeback exception info 1019c590fb32Scz4e io.mem_to_ooo.s3_delayed_load_error(i) := loadUnits(i).io.s3_dly_ld_err 1020c590fb32Scz4e 1021c590fb32Scz4e // update mem dependency predictor 1022c590fb32Scz4e // io.memPredUpdate(i) := DontCare 1023c590fb32Scz4e 1024c590fb32Scz4e // -------------------------------- 1025c590fb32Scz4e // Load Triggers 1026c590fb32Scz4e // -------------------------------- 1027c590fb32Scz4e loadUnits(i).io.fromCsrTrigger.tdataVec := tdata 1028c590fb32Scz4e loadUnits(i).io.fromCsrTrigger.tEnableVec := tEnable 1029c590fb32Scz4e loadUnits(i).io.fromCsrTrigger.triggerCanRaiseBpExp := triggerCanRaiseBpExp 1030c590fb32Scz4e loadUnits(i).io.fromCsrTrigger.debugMode := debugMode 1031c590fb32Scz4e } 1032c590fb32Scz4e 1033c590fb32Scz4e for (i <- 0 until HyuCnt) { 1034c590fb32Scz4e hybridUnits(i).io.redirect <> redirect 1035c590fb32Scz4e 1036c590fb32Scz4e // get input from dispatch 1037c590fb32Scz4e hybridUnits(i).io.lsin <> io.ooo_to_mem.issueHya(i) 1038c590fb32Scz4e hybridUnits(i).io.feedback_slow <> io.mem_to_ooo.hyuIqFeedback(i).feedbackSlow 1039c590fb32Scz4e hybridUnits(i).io.feedback_fast <> io.mem_to_ooo.hyuIqFeedback(i).feedbackFast 1040c590fb32Scz4e hybridUnits(i).io.correctMissTrain := correctMissTrain 1041c590fb32Scz4e io.mem_to_ooo.ldCancel.take(HyuCnt)(i) := hybridUnits(i).io.ldu_io.ldCancel 1042c590fb32Scz4e io.mem_to_ooo.wakeup.take(HyuCnt)(i) := hybridUnits(i).io.ldu_io.wakeup 1043c590fb32Scz4e 1044c590fb32Scz4e // ------------------------------------ 1045c590fb32Scz4e // Load Port 1046c590fb32Scz4e // ------------------------------------ 1047c590fb32Scz4e // fast replay 1048c590fb32Scz4e hybridUnits(i).io.ldu_io.fast_rep_in <> hybridUnits(i).io.ldu_io.fast_rep_out 1049c590fb32Scz4e 1050c590fb32Scz4e // get input from dispatch 1051c590fb32Scz4e hybridUnits(i).io.ldu_io.dcache <> dcache.io.lsu.load(LduCnt + i) 1052c590fb32Scz4e hybridUnits(i).io.stu_io.dcache <> dcache.io.lsu.sta(StaCnt + i) 1053c590fb32Scz4e 1054c590fb32Scz4e // dcache access 1055c590fb32Scz4e hybridUnits(i).io.ldu_io.lsq.forward <> lsq.io.forward(LduCnt + i) 1056c590fb32Scz4e // forward 1057c590fb32Scz4e hybridUnits(i).io.ldu_io.sbuffer <> sbuffer.io.forward(LduCnt + i) 1058c590fb32Scz4e hybridUnits(i).io.ldu_io.ubuffer <> uncache.io.forward(LduCnt + i) 1059c590fb32Scz4e // hybridUnits(i).io.ldu_io.vec_forward <> vsFlowQueue.io.forward(LduCnt + i) 1060c590fb32Scz4e hybridUnits(i).io.ldu_io.vec_forward := DontCare 1061c590fb32Scz4e hybridUnits(i).io.ldu_io.tl_d_channel := dcache.io.lsu.forward_D(LduCnt + i) 1062c590fb32Scz4e hybridUnits(i).io.ldu_io.forward_mshr <> dcache.io.lsu.forward_mshr(LduCnt + i) 1063c590fb32Scz4e // ld-ld violation check 1064c590fb32Scz4e hybridUnits(i).io.ldu_io.lsq.ldld_nuke_query <> lsq.io.ldu.ldld_nuke_query(LduCnt + i) 1065c590fb32Scz4e hybridUnits(i).io.ldu_io.lsq.stld_nuke_query <> lsq.io.ldu.stld_nuke_query(LduCnt + i) 1066c590fb32Scz4e hybridUnits(i).io.csrCtrl <> csrCtrl 1067c590fb32Scz4e // dcache refill req 1068c590fb32Scz4e hybridUnits(i).io.ldu_io.tlb_hint.id := dtlbRepeater.io.hint.get.req(LduCnt + i).id 1069c590fb32Scz4e hybridUnits(i).io.ldu_io.tlb_hint.full := dtlbRepeater.io.hint.get.req(LduCnt + i).full || 1070c590fb32Scz4e tlbreplay_reg(LduCnt + i) || dtlb_ld0_tlbreplay_reg(LduCnt + i) 1071c590fb32Scz4e 1072c590fb32Scz4e // dtlb 1073c590fb32Scz4e hybridUnits(i).io.tlb <> dtlb_ld.head.requestor(LduCnt + i) 1074c590fb32Scz4e // pmp 1075c590fb32Scz4e hybridUnits(i).io.pmp <> pmp_check.drop(LduCnt)(i).resp 1076c590fb32Scz4e // st-ld violation query 1077c590fb32Scz4e val stld_nuke_query = VecInit(storeUnits.map(_.io.stld_nuke_query) ++ hybridUnits.map(_.io.stu_io.stld_nuke_query)) 1078c590fb32Scz4e hybridUnits(i).io.ldu_io.stld_nuke_query := stld_nuke_query 1079c590fb32Scz4e hybridUnits(i).io.ldu_io.lq_rep_full <> lsq.io.lq_rep_full 1080c590fb32Scz4e // load prefetch train 1081c590fb32Scz4e prefetcherOpt.foreach(pf => { 1082c590fb32Scz4e val source = hybridUnits(i).io.prefetch_train 1083c590fb32Scz4e pf.io.ld_in(LduCnt + i).valid := Mux(pf_train_on_hit, 1084c590fb32Scz4e source.valid, 1085c590fb32Scz4e source.valid && source.bits.isFirstIssue && source.bits.miss 1086c590fb32Scz4e ) 1087c590fb32Scz4e pf.io.ld_in(LduCnt + i).bits := source.bits 1088c590fb32Scz4e pf.io.ld_in(LduCnt + i).bits.uop.pc := Mux(hybridUnits(i).io.ldu_io.s2_ptr_chasing, io.ooo_to_mem.hybridPc(i), RegNext(io.ooo_to_mem.hybridPc(i))) 1089c590fb32Scz4e }) 1090c590fb32Scz4e l1PrefetcherOpt.foreach(pf => { 1091c590fb32Scz4e // stream will train on all load sources 1092c590fb32Scz4e val source = hybridUnits(i).io.prefetch_train_l1 1093c590fb32Scz4e pf.io.ld_in(LduCnt + i).valid := source.valid && source.bits.isFirstIssue && 1094c590fb32Scz4e FuType.isLoad(source.bits.uop.fuType) 1095c590fb32Scz4e pf.io.ld_in(LduCnt + i).bits := source.bits 1096c590fb32Scz4e pf.io.st_in(StaCnt + i).valid := false.B 1097c590fb32Scz4e pf.io.st_in(StaCnt + i).bits := DontCare 1098c590fb32Scz4e }) 1099c590fb32Scz4e prefetcherOpt.foreach(pf => { 1100c590fb32Scz4e val source = hybridUnits(i).io.prefetch_train 1101c590fb32Scz4e pf.io.st_in(StaCnt + i).valid := Mux(pf_train_on_hit, 1102c590fb32Scz4e source.valid, 1103c590fb32Scz4e source.valid && source.bits.isFirstIssue && source.bits.miss 1104c590fb32Scz4e ) && FuType.isStore(source.bits.uop.fuType) 1105c590fb32Scz4e pf.io.st_in(StaCnt + i).bits := source.bits 1106c590fb32Scz4e pf.io.st_in(StaCnt + i).bits.uop.pc := RegNext(io.ooo_to_mem.hybridPc(i)) 1107c590fb32Scz4e }) 1108c590fb32Scz4e 1109c590fb32Scz4e // load to load fast forward: load(i) prefers data(i) 1110c590fb32Scz4e val l2l_fwd_out = loadUnits.map(_.io.l2l_fwd_out) ++ hybridUnits.map(_.io.ldu_io.l2l_fwd_out) 1111c590fb32Scz4e val fastPriority = (LduCnt + i until LduCnt + HyuCnt) ++ (0 until LduCnt + i) 1112c590fb32Scz4e val fastValidVec = fastPriority.map(j => l2l_fwd_out(j).valid) 1113c590fb32Scz4e val fastDataVec = fastPriority.map(j => l2l_fwd_out(j).data) 1114c590fb32Scz4e val fastErrorVec = fastPriority.map(j => l2l_fwd_out(j).dly_ld_err) 1115c590fb32Scz4e val fastMatchVec = fastPriority.map(j => io.ooo_to_mem.loadFastMatch(LduCnt + i)(j)) 1116c590fb32Scz4e hybridUnits(i).io.ldu_io.l2l_fwd_in.valid := VecInit(fastValidVec).asUInt.orR 1117c590fb32Scz4e hybridUnits(i).io.ldu_io.l2l_fwd_in.data := ParallelPriorityMux(fastValidVec, fastDataVec) 1118c590fb32Scz4e hybridUnits(i).io.ldu_io.l2l_fwd_in.dly_ld_err := ParallelPriorityMux(fastValidVec, fastErrorVec) 1119c590fb32Scz4e val fastMatch = ParallelPriorityMux(fastValidVec, fastMatchVec) 1120c590fb32Scz4e hybridUnits(i).io.ldu_io.ld_fast_match := fastMatch 1121c590fb32Scz4e hybridUnits(i).io.ldu_io.ld_fast_imm := io.ooo_to_mem.loadFastImm(LduCnt + i) 1122c590fb32Scz4e hybridUnits(i).io.ldu_io.ld_fast_fuOpType := io.ooo_to_mem.loadFastFuOpType(LduCnt + i) 1123c590fb32Scz4e hybridUnits(i).io.ldu_io.replay <> lsq.io.replay(LduCnt + i) 1124c590fb32Scz4e hybridUnits(i).io.ldu_io.l2_hint <> io.l2_hint 1125c590fb32Scz4e 1126c590fb32Scz4e // uncache 1127c590fb32Scz4e lsq.io.ldout.drop(LduCnt)(i) <> hybridUnits(i).io.ldu_io.lsq.uncache 1128c590fb32Scz4e lsq.io.ld_raw_data.drop(LduCnt)(i) <> hybridUnits(i).io.ldu_io.lsq.ld_raw_data 1129c590fb32Scz4e 1130c590fb32Scz4e 1131c590fb32Scz4e // passdown to lsq (load s2) 1132c590fb32Scz4e hybridUnits(i).io.ldu_io.lsq.nc_ldin.valid := false.B 1133c590fb32Scz4e hybridUnits(i).io.ldu_io.lsq.nc_ldin.bits := DontCare 1134c590fb32Scz4e lsq.io.ldu.ldin(LduCnt + i) <> hybridUnits(i).io.ldu_io.lsq.ldin 1135c590fb32Scz4e // Lsq to sta unit 1136c590fb32Scz4e lsq.io.sta.storeMaskIn(StaCnt + i) <> hybridUnits(i).io.stu_io.st_mask_out 1137c590fb32Scz4e 1138c590fb32Scz4e // Lsq to std unit's rs 1139c590fb32Scz4e lsq.io.std.storeDataIn(StaCnt + i) := stData(StaCnt + i) 1140c590fb32Scz4e lsq.io.std.storeDataIn(StaCnt + i).valid := stData(StaCnt + i).valid && !st_data_atomics(StaCnt + i) 1141c590fb32Scz4e // prefetch 1142c590fb32Scz4e hybridUnits(i).io.stu_io.prefetch_req <> sbuffer.io.store_prefetch(StaCnt + i) 1143c590fb32Scz4e 1144c590fb32Scz4e io.mem_to_ooo.s3_delayed_load_error(LduCnt + i) := hybridUnits(i).io.ldu_io.s3_dly_ld_err 1145c590fb32Scz4e 1146c590fb32Scz4e // ------------------------------------ 1147c590fb32Scz4e // Store Port 1148c590fb32Scz4e // ------------------------------------ 1149c590fb32Scz4e hybridUnits(i).io.stu_io.lsq <> lsq.io.sta.storeAddrIn.takeRight(HyuCnt)(i) 1150c590fb32Scz4e hybridUnits(i).io.stu_io.lsq_replenish <> lsq.io.sta.storeAddrInRe.takeRight(HyuCnt)(i) 1151c590fb32Scz4e 1152c590fb32Scz4e lsq.io.sta.storeMaskIn.takeRight(HyuCnt)(i) <> hybridUnits(i).io.stu_io.st_mask_out 1153c590fb32Scz4e io.mem_to_ooo.stIn.takeRight(HyuCnt)(i).valid := hybridUnits(i).io.stu_io.issue.valid 1154c590fb32Scz4e io.mem_to_ooo.stIn.takeRight(HyuCnt)(i).bits := hybridUnits(i).io.stu_io.issue.bits 1155c590fb32Scz4e 1156c590fb32Scz4e // ------------------------------------ 1157c590fb32Scz4e // Vector Store Port 1158c590fb32Scz4e // ------------------------------------ 1159c590fb32Scz4e hybridUnits(i).io.vec_stu_io.isFirstIssue := true.B 1160c590fb32Scz4e 1161c590fb32Scz4e // ------------------------- 1162c590fb32Scz4e // Store Triggers 1163c590fb32Scz4e // ------------------------- 1164c590fb32Scz4e hybridUnits(i).io.fromCsrTrigger.tdataVec := tdata 1165c590fb32Scz4e hybridUnits(i).io.fromCsrTrigger.tEnableVec := tEnable 1166c590fb32Scz4e hybridUnits(i).io.fromCsrTrigger.triggerCanRaiseBpExp := triggerCanRaiseBpExp 1167c590fb32Scz4e hybridUnits(i).io.fromCsrTrigger.debugMode := debugMode 1168c590fb32Scz4e } 1169c590fb32Scz4e 1170c590fb32Scz4e // misalignBuffer 1171c590fb32Scz4e loadMisalignBuffer.io.redirect <> redirect 1172c590fb32Scz4e loadMisalignBuffer.io.rob.lcommit := io.ooo_to_mem.lsqio.lcommit 1173c590fb32Scz4e loadMisalignBuffer.io.rob.scommit := io.ooo_to_mem.lsqio.scommit 1174c590fb32Scz4e loadMisalignBuffer.io.rob.pendingMMIOld := io.ooo_to_mem.lsqio.pendingMMIOld 1175c590fb32Scz4e loadMisalignBuffer.io.rob.pendingld := io.ooo_to_mem.lsqio.pendingld 1176c590fb32Scz4e loadMisalignBuffer.io.rob.pendingst := io.ooo_to_mem.lsqio.pendingst 1177c590fb32Scz4e loadMisalignBuffer.io.rob.pendingVst := io.ooo_to_mem.lsqio.pendingVst 1178c590fb32Scz4e loadMisalignBuffer.io.rob.commit := io.ooo_to_mem.lsqio.commit 1179c590fb32Scz4e loadMisalignBuffer.io.rob.pendingPtr := io.ooo_to_mem.lsqio.pendingPtr 1180c590fb32Scz4e loadMisalignBuffer.io.rob.pendingPtrNext := io.ooo_to_mem.lsqio.pendingPtrNext 1181c590fb32Scz4e 1182c590fb32Scz4e lsq.io.loadMisalignFull := loadMisalignBuffer.io.loadMisalignFull 1183522c7f99SAnzo lsq.io.misalignAllowSpec := misalign_allow_spec 1184c590fb32Scz4e 1185c590fb32Scz4e storeMisalignBuffer.io.redirect <> redirect 1186c590fb32Scz4e storeMisalignBuffer.io.rob.lcommit := io.ooo_to_mem.lsqio.lcommit 1187c590fb32Scz4e storeMisalignBuffer.io.rob.scommit := io.ooo_to_mem.lsqio.scommit 1188c590fb32Scz4e storeMisalignBuffer.io.rob.pendingMMIOld := io.ooo_to_mem.lsqio.pendingMMIOld 1189c590fb32Scz4e storeMisalignBuffer.io.rob.pendingld := io.ooo_to_mem.lsqio.pendingld 1190c590fb32Scz4e storeMisalignBuffer.io.rob.pendingst := io.ooo_to_mem.lsqio.pendingst 1191c590fb32Scz4e storeMisalignBuffer.io.rob.pendingVst := io.ooo_to_mem.lsqio.pendingVst 1192c590fb32Scz4e storeMisalignBuffer.io.rob.commit := io.ooo_to_mem.lsqio.commit 1193c590fb32Scz4e storeMisalignBuffer.io.rob.pendingPtr := io.ooo_to_mem.lsqio.pendingPtr 1194c590fb32Scz4e storeMisalignBuffer.io.rob.pendingPtrNext := io.ooo_to_mem.lsqio.pendingPtrNext 1195c590fb32Scz4e 1196c590fb32Scz4e lsq.io.maControl <> storeMisalignBuffer.io.sqControl 1197c590fb32Scz4e 1198c590fb32Scz4e lsq.io.cmoOpReq <> dcache.io.cmoOpReq 1199c590fb32Scz4e lsq.io.cmoOpResp <> dcache.io.cmoOpResp 1200c590fb32Scz4e 1201c590fb32Scz4e // Prefetcher 1202c590fb32Scz4e val StreamDTLBPortIndex = TlbStartVec(dtlb_ld_idx) + LduCnt + HyuCnt 1203c590fb32Scz4e val PrefetcherDTLBPortIndex = TlbStartVec(dtlb_pf_idx) 1204c590fb32Scz4e val L2toL1DLBPortIndex = TlbStartVec(dtlb_pf_idx) + 1 1205c590fb32Scz4e prefetcherOpt match { 1206c590fb32Scz4e case Some(pf) => 1207c590fb32Scz4e dtlb_reqs(PrefetcherDTLBPortIndex) <> pf.io.tlb_req 1208c590fb32Scz4e pf.io.pmp_resp := pmp_check(PrefetcherDTLBPortIndex).resp 1209c590fb32Scz4e case None => 1210c590fb32Scz4e dtlb_reqs(PrefetcherDTLBPortIndex) := DontCare 1211c590fb32Scz4e dtlb_reqs(PrefetcherDTLBPortIndex).req.valid := false.B 1212c590fb32Scz4e dtlb_reqs(PrefetcherDTLBPortIndex).resp.ready := true.B 1213c590fb32Scz4e } 1214c590fb32Scz4e l1PrefetcherOpt match { 1215c590fb32Scz4e case Some(pf) => 1216c590fb32Scz4e dtlb_reqs(StreamDTLBPortIndex) <> pf.io.tlb_req 1217c590fb32Scz4e pf.io.pmp_resp := pmp_check(StreamDTLBPortIndex).resp 1218c590fb32Scz4e case None => 1219c590fb32Scz4e dtlb_reqs(StreamDTLBPortIndex) := DontCare 1220c590fb32Scz4e dtlb_reqs(StreamDTLBPortIndex).req.valid := false.B 1221c590fb32Scz4e dtlb_reqs(StreamDTLBPortIndex).resp.ready := true.B 1222c590fb32Scz4e } 1223c590fb32Scz4e dtlb_reqs(L2toL1DLBPortIndex) <> io.l2_tlb_req 1224c590fb32Scz4e dtlb_reqs(L2toL1DLBPortIndex).resp.ready := true.B 1225c590fb32Scz4e io.l2_pmp_resp := pmp_check(L2toL1DLBPortIndex).resp 1226c590fb32Scz4e 1227c590fb32Scz4e // StoreUnit 1228c590fb32Scz4e for (i <- 0 until StdCnt) { 1229c590fb32Scz4e stdExeUnits(i).io.flush <> redirect 1230c590fb32Scz4e stdExeUnits(i).io.in.valid := io.ooo_to_mem.issueStd(i).valid 1231c590fb32Scz4e io.ooo_to_mem.issueStd(i).ready := stdExeUnits(i).io.in.ready 1232c590fb32Scz4e stdExeUnits(i).io.in.bits := io.ooo_to_mem.issueStd(i).bits 1233c590fb32Scz4e } 1234c590fb32Scz4e 1235c590fb32Scz4e for (i <- 0 until StaCnt) { 1236c590fb32Scz4e val stu = storeUnits(i) 1237c590fb32Scz4e 1238c590fb32Scz4e stu.io.redirect <> redirect 1239c590fb32Scz4e stu.io.csrCtrl <> csrCtrl 1240c590fb32Scz4e stu.io.dcache <> dcache.io.lsu.sta(i) 1241c590fb32Scz4e stu.io.feedback_slow <> io.mem_to_ooo.staIqFeedback(i).feedbackSlow 1242c590fb32Scz4e stu.io.stin <> io.ooo_to_mem.issueSta(i) 1243c590fb32Scz4e stu.io.lsq <> lsq.io.sta.storeAddrIn(i) 1244c590fb32Scz4e stu.io.lsq_replenish <> lsq.io.sta.storeAddrInRe(i) 1245c590fb32Scz4e // dtlb 1246c590fb32Scz4e stu.io.tlb <> dtlb_st.head.requestor(i) 1247c590fb32Scz4e stu.io.pmp <> pmp_check(LduCnt + HyuCnt + 1 + i).resp 1248c590fb32Scz4e 1249c590fb32Scz4e // ------------------------- 1250c590fb32Scz4e // Store Triggers 1251c590fb32Scz4e // ------------------------- 1252c590fb32Scz4e stu.io.fromCsrTrigger.tdataVec := tdata 1253c590fb32Scz4e stu.io.fromCsrTrigger.tEnableVec := tEnable 1254c590fb32Scz4e stu.io.fromCsrTrigger.triggerCanRaiseBpExp := triggerCanRaiseBpExp 1255c590fb32Scz4e stu.io.fromCsrTrigger.debugMode := debugMode 1256c590fb32Scz4e 1257c590fb32Scz4e // prefetch 1258c590fb32Scz4e stu.io.prefetch_req <> sbuffer.io.store_prefetch(i) 1259c590fb32Scz4e 1260c590fb32Scz4e // store unit does not need fast feedback 1261c590fb32Scz4e io.mem_to_ooo.staIqFeedback(i).feedbackFast := DontCare 1262c590fb32Scz4e 1263c590fb32Scz4e // Lsq to sta unit 1264c590fb32Scz4e lsq.io.sta.storeMaskIn(i) <> stu.io.st_mask_out 1265c590fb32Scz4e 1266c590fb32Scz4e // connect misalignBuffer 12674ec1f462Scz4e storeMisalignBuffer.io.enq(i) <> stu.io.misalign_enq 1268c590fb32Scz4e 1269c590fb32Scz4e if (i == 0) { 1270c590fb32Scz4e stu.io.misalign_stin <> storeMisalignBuffer.io.splitStoreReq 1271c590fb32Scz4e stu.io.misalign_stout <> storeMisalignBuffer.io.splitStoreResp 1272c590fb32Scz4e } else { 1273c590fb32Scz4e stu.io.misalign_stin.valid := false.B 1274c590fb32Scz4e stu.io.misalign_stin.bits := DontCare 1275c590fb32Scz4e } 1276c590fb32Scz4e 1277c590fb32Scz4e // Lsq to std unit's rs 1278c590fb32Scz4e if (i < VstuCnt){ 1279c590fb32Scz4e when (vsSplit(i).io.vstd.get.valid) { 1280c590fb32Scz4e lsq.io.std.storeDataIn(i).valid := true.B 1281c590fb32Scz4e lsq.io.std.storeDataIn(i).bits := vsSplit(i).io.vstd.get.bits 1282c590fb32Scz4e stData(i).ready := false.B 1283c590fb32Scz4e }.otherwise { 1284c590fb32Scz4e lsq.io.std.storeDataIn(i).valid := stData(i).valid && !st_data_atomics(i) 1285c590fb32Scz4e lsq.io.std.storeDataIn(i).bits.uop := stData(i).bits.uop 1286c590fb32Scz4e lsq.io.std.storeDataIn(i).bits.data := stData(i).bits.data 1287c590fb32Scz4e lsq.io.std.storeDataIn(i).bits.mask.map(_ := 0.U) 1288c590fb32Scz4e lsq.io.std.storeDataIn(i).bits.vdIdx.map(_ := 0.U) 1289c590fb32Scz4e lsq.io.std.storeDataIn(i).bits.vdIdxInField.map(_ := 0.U) 1290c590fb32Scz4e stData(i).ready := true.B 1291c590fb32Scz4e } 1292c590fb32Scz4e } else { 1293c590fb32Scz4e lsq.io.std.storeDataIn(i).valid := stData(i).valid && !st_data_atomics(i) 1294c590fb32Scz4e lsq.io.std.storeDataIn(i).bits.uop := stData(i).bits.uop 1295c590fb32Scz4e lsq.io.std.storeDataIn(i).bits.data := stData(i).bits.data 1296c590fb32Scz4e lsq.io.std.storeDataIn(i).bits.mask.map(_ := 0.U) 1297c590fb32Scz4e lsq.io.std.storeDataIn(i).bits.vdIdx.map(_ := 0.U) 1298c590fb32Scz4e lsq.io.std.storeDataIn(i).bits.vdIdxInField.map(_ := 0.U) 1299c590fb32Scz4e stData(i).ready := true.B 1300c590fb32Scz4e } 1301c590fb32Scz4e lsq.io.std.storeDataIn.map(_.bits.debug := 0.U.asTypeOf(new DebugBundle)) 1302c590fb32Scz4e lsq.io.std.storeDataIn.foreach(_.bits.isFromLoadUnit := DontCare) 1303c590fb32Scz4e 1304c590fb32Scz4e 1305c590fb32Scz4e // store prefetch train 1306c590fb32Scz4e l1PrefetcherOpt.foreach(pf => { 1307c590fb32Scz4e // stream will train on all load sources 1308c590fb32Scz4e pf.io.st_in(i).valid := false.B 1309c590fb32Scz4e pf.io.st_in(i).bits := DontCare 1310c590fb32Scz4e }) 1311c590fb32Scz4e 1312c590fb32Scz4e prefetcherOpt.foreach(pf => { 1313c590fb32Scz4e pf.io.st_in(i).valid := Mux(pf_train_on_hit, 1314c590fb32Scz4e stu.io.prefetch_train.valid, 1315c590fb32Scz4e stu.io.prefetch_train.valid && stu.io.prefetch_train.bits.isFirstIssue && ( 1316c590fb32Scz4e stu.io.prefetch_train.bits.miss 1317c590fb32Scz4e ) 1318c590fb32Scz4e ) 1319c590fb32Scz4e pf.io.st_in(i).bits := stu.io.prefetch_train.bits 1320c590fb32Scz4e pf.io.st_in(i).bits.uop.pc := RegEnable(RegEnable(io.ooo_to_mem.storePc(i), stu.io.s1_prefetch_spec), stu.io.s2_prefetch_spec) 1321c590fb32Scz4e }) 1322c590fb32Scz4e 1323c590fb32Scz4e // 1. sync issue info to store set LFST 1324c590fb32Scz4e // 2. when store issue, broadcast issued sqPtr to wake up the following insts 1325c590fb32Scz4e // io.stIn(i).valid := io.issue(exuParameters.LduCnt + i).valid 1326c590fb32Scz4e // io.stIn(i).bits := io.issue(exuParameters.LduCnt + i).bits 1327c590fb32Scz4e io.mem_to_ooo.stIn(i).valid := stu.io.issue.valid 1328c590fb32Scz4e io.mem_to_ooo.stIn(i).bits := stu.io.issue.bits 1329c590fb32Scz4e 1330c590fb32Scz4e stu.io.stout.ready := true.B 1331c590fb32Scz4e 1332c590fb32Scz4e // vector 1333c590fb32Scz4e if (i < VstuCnt) { 1334c590fb32Scz4e stu.io.vecstin <> vsSplit(i).io.out 1335c590fb32Scz4e // vsFlowQueue.io.pipeFeedback(i) <> stu.io.vec_feedback_slow // need connect 1336c590fb32Scz4e } else { 1337c590fb32Scz4e stu.io.vecstin.valid := false.B 1338c590fb32Scz4e stu.io.vecstin.bits := DontCare 1339c590fb32Scz4e stu.io.vecstout.ready := false.B 1340c590fb32Scz4e } 1341c590fb32Scz4e stu.io.vec_isFirstIssue := true.B // TODO 1342c590fb32Scz4e } 1343c590fb32Scz4e 13443c808de0SAnzo val sqOtherStout = WireInit(0.U.asTypeOf(DecoupledIO(new MemExuOutput))) 13453c808de0SAnzo sqOtherStout.valid := lsq.io.mmioStout.valid || lsq.io.cboZeroStout.valid 13463c808de0SAnzo sqOtherStout.bits := Mux(lsq.io.cboZeroStout.valid, lsq.io.cboZeroStout.bits, lsq.io.mmioStout.bits) 13473c808de0SAnzo assert(!(lsq.io.mmioStout.valid && lsq.io.cboZeroStout.valid), "Cannot writeback to mmio and cboZero at the same time.") 13483c808de0SAnzo 13493c808de0SAnzo // Store writeback by StoreQueue: 13503c808de0SAnzo // 1. cbo Zero 13513c808de0SAnzo // 2. mmio 13523c808de0SAnzo // Currently, the two should not be present at the same time, so simply make cbo zero a higher priority. 13533c808de0SAnzo val otherStout = WireInit(0.U.asTypeOf(lsq.io.mmioStout)) 1354c590fb32Scz4e NewPipelineConnect( 13553c808de0SAnzo sqOtherStout, otherStout, otherStout.fire, 1356c590fb32Scz4e false.B, 13573c808de0SAnzo Option("otherStoutConnect") 1358c590fb32Scz4e ) 13593c808de0SAnzo otherStout.ready := false.B 13603c808de0SAnzo when (otherStout.valid && !storeUnits(0).io.stout.valid) { 1361c590fb32Scz4e stOut(0).valid := true.B 13623c808de0SAnzo stOut(0).bits := otherStout.bits 13633c808de0SAnzo otherStout.ready := true.B 1364c590fb32Scz4e } 13653c808de0SAnzo lsq.io.mmioStout.ready := sqOtherStout.ready 13663c808de0SAnzo lsq.io.cboZeroStout.ready := sqOtherStout.ready 1367c590fb32Scz4e 1368c590fb32Scz4e // vec mmio writeback 1369c590fb32Scz4e lsq.io.vecmmioStout.ready := false.B 1370c590fb32Scz4e 1371c590fb32Scz4e // miss align buffer will overwrite stOut(0) 13723c808de0SAnzo val storeMisalignCanWriteBack = !otherStout.valid && !storeUnits(0).io.stout.valid && !storeUnits(0).io.vecstout.valid 1373c590fb32Scz4e storeMisalignBuffer.io.writeBack.ready := storeMisalignCanWriteBack 1374c590fb32Scz4e storeMisalignBuffer.io.storeOutValid := storeUnits(0).io.stout.valid 1375c590fb32Scz4e storeMisalignBuffer.io.storeVecOutValid := storeUnits(0).io.vecstout.valid 1376c590fb32Scz4e when (storeMisalignBuffer.io.writeBack.valid && storeMisalignCanWriteBack) { 1377c590fb32Scz4e stOut(0).valid := true.B 1378c590fb32Scz4e stOut(0).bits := storeMisalignBuffer.io.writeBack.bits 1379c590fb32Scz4e } 1380c590fb32Scz4e 1381c590fb32Scz4e // Uncache 1382c590fb32Scz4e uncache.io.enableOutstanding := io.ooo_to_mem.csrCtrl.uncache_write_outstanding_enable 1383c590fb32Scz4e uncache.io.hartId := io.hartId 1384c590fb32Scz4e lsq.io.uncacheOutstanding := io.ooo_to_mem.csrCtrl.uncache_write_outstanding_enable 1385c590fb32Scz4e 1386c590fb32Scz4e // Lsq 1387c590fb32Scz4e io.mem_to_ooo.lsqio.mmio := lsq.io.rob.mmio 1388c590fb32Scz4e io.mem_to_ooo.lsqio.uop := lsq.io.rob.uop 1389c590fb32Scz4e lsq.io.rob.lcommit := io.ooo_to_mem.lsqio.lcommit 1390c590fb32Scz4e lsq.io.rob.scommit := io.ooo_to_mem.lsqio.scommit 1391c590fb32Scz4e lsq.io.rob.pendingMMIOld := io.ooo_to_mem.lsqio.pendingMMIOld 1392c590fb32Scz4e lsq.io.rob.pendingld := io.ooo_to_mem.lsqio.pendingld 1393c590fb32Scz4e lsq.io.rob.pendingst := io.ooo_to_mem.lsqio.pendingst 1394c590fb32Scz4e lsq.io.rob.pendingVst := io.ooo_to_mem.lsqio.pendingVst 1395c590fb32Scz4e lsq.io.rob.commit := io.ooo_to_mem.lsqio.commit 1396c590fb32Scz4e lsq.io.rob.pendingPtr := io.ooo_to_mem.lsqio.pendingPtr 1397c590fb32Scz4e lsq.io.rob.pendingPtrNext := io.ooo_to_mem.lsqio.pendingPtrNext 1398c590fb32Scz4e 1399c590fb32Scz4e // lsq.io.rob <> io.lsqio.rob 1400c590fb32Scz4e lsq.io.enq <> io.ooo_to_mem.enqLsq 1401c590fb32Scz4e lsq.io.brqRedirect <> redirect 1402c590fb32Scz4e 1403c590fb32Scz4e // violation rollback 1404c590fb32Scz4e def selectOldestRedirect(xs: Seq[Valid[Redirect]]): Vec[Bool] = { 1405c590fb32Scz4e val compareVec = (0 until xs.length).map(i => (0 until i).map(j => isAfter(xs(j).bits.robIdx, xs(i).bits.robIdx))) 1406c590fb32Scz4e val resultOnehot = VecInit((0 until xs.length).map(i => Cat((0 until xs.length).map(j => 1407c590fb32Scz4e (if (j < i) !xs(j).valid || compareVec(i)(j) 1408c590fb32Scz4e else if (j == i) xs(i).valid 1409c590fb32Scz4e else !xs(j).valid || !compareVec(j)(i)) 1410c590fb32Scz4e )).andR)) 1411c590fb32Scz4e resultOnehot 1412c590fb32Scz4e } 1413c590fb32Scz4e val allRedirect = loadUnits.map(_.io.rollback) ++ hybridUnits.map(_.io.ldu_io.rollback) ++ lsq.io.nack_rollback ++ lsq.io.nuke_rollback 1414c590fb32Scz4e val oldestOneHot = selectOldestRedirect(allRedirect) 1415c590fb32Scz4e val oldestRedirect = WireDefault(Mux1H(oldestOneHot, allRedirect)) 1416c590fb32Scz4e // memory replay would not cause IAF/IPF/IGPF 1417c590fb32Scz4e oldestRedirect.bits.cfiUpdate.backendIAF := false.B 1418c590fb32Scz4e oldestRedirect.bits.cfiUpdate.backendIPF := false.B 1419c590fb32Scz4e oldestRedirect.bits.cfiUpdate.backendIGPF := false.B 1420c590fb32Scz4e io.mem_to_ooo.memoryViolation := oldestRedirect 1421c590fb32Scz4e io.mem_to_ooo.lsqio.lqCanAccept := lsq.io.lqCanAccept 1422c590fb32Scz4e io.mem_to_ooo.lsqio.sqCanAccept := lsq.io.sqCanAccept 1423c590fb32Scz4e 1424c590fb32Scz4e // lsq.io.uncache <> uncache.io.lsq 1425c590fb32Scz4e val s_idle :: s_scalar_uncache :: s_vector_uncache :: Nil = Enum(3) 1426c590fb32Scz4e val uncacheState = RegInit(s_idle) 1427c590fb32Scz4e val uncacheReq = Wire(Decoupled(new UncacheWordReq)) 1428c590fb32Scz4e val uncacheIdResp = uncache.io.lsq.idResp 1429c590fb32Scz4e val uncacheResp = Wire(Decoupled(new UncacheWordResp)) 1430c590fb32Scz4e 1431c590fb32Scz4e uncacheReq.bits := DontCare 1432c590fb32Scz4e uncacheReq.valid := false.B 1433c590fb32Scz4e uncacheReq.ready := false.B 1434c590fb32Scz4e uncacheResp.bits := DontCare 1435c590fb32Scz4e uncacheResp.valid := false.B 1436c590fb32Scz4e uncacheResp.ready := false.B 1437c590fb32Scz4e lsq.io.uncache.req.ready := false.B 1438c590fb32Scz4e lsq.io.uncache.idResp.valid := false.B 1439c590fb32Scz4e lsq.io.uncache.idResp.bits := DontCare 1440c590fb32Scz4e lsq.io.uncache.resp.valid := false.B 1441c590fb32Scz4e lsq.io.uncache.resp.bits := DontCare 1442c590fb32Scz4e 1443c590fb32Scz4e switch (uncacheState) { 1444c590fb32Scz4e is (s_idle) { 1445c590fb32Scz4e when (uncacheReq.fire) { 1446c590fb32Scz4e when (lsq.io.uncache.req.valid) { 1447c590fb32Scz4e when (!lsq.io.uncache.req.bits.nc || !io.ooo_to_mem.csrCtrl.uncache_write_outstanding_enable) { 1448c590fb32Scz4e uncacheState := s_scalar_uncache 1449c590fb32Scz4e } 1450c590fb32Scz4e }.otherwise { 1451c590fb32Scz4e // val isStore = vsFlowQueue.io.uncache.req.bits.cmd === MemoryOpConstants.M_XWR 1452c590fb32Scz4e when (!io.ooo_to_mem.csrCtrl.uncache_write_outstanding_enable) { 1453c590fb32Scz4e uncacheState := s_vector_uncache 1454c590fb32Scz4e } 1455c590fb32Scz4e } 1456c590fb32Scz4e } 1457c590fb32Scz4e } 1458c590fb32Scz4e 1459c590fb32Scz4e is (s_scalar_uncache) { 1460c590fb32Scz4e when (uncacheResp.fire) { 1461c590fb32Scz4e uncacheState := s_idle 1462c590fb32Scz4e } 1463c590fb32Scz4e } 1464c590fb32Scz4e 1465c590fb32Scz4e is (s_vector_uncache) { 1466c590fb32Scz4e when (uncacheResp.fire) { 1467c590fb32Scz4e uncacheState := s_idle 1468c590fb32Scz4e } 1469c590fb32Scz4e } 1470c590fb32Scz4e } 1471c590fb32Scz4e 1472c590fb32Scz4e when (lsq.io.uncache.req.valid) { 1473c590fb32Scz4e uncacheReq <> lsq.io.uncache.req 1474c590fb32Scz4e } 1475c590fb32Scz4e when (io.ooo_to_mem.csrCtrl.uncache_write_outstanding_enable) { 1476c590fb32Scz4e lsq.io.uncache.resp <> uncacheResp 1477c590fb32Scz4e lsq.io.uncache.idResp <> uncacheIdResp 1478c590fb32Scz4e }.otherwise { 1479c590fb32Scz4e when (uncacheState === s_scalar_uncache) { 1480c590fb32Scz4e lsq.io.uncache.resp <> uncacheResp 1481c590fb32Scz4e lsq.io.uncache.idResp <> uncacheIdResp 1482c590fb32Scz4e } 1483c590fb32Scz4e } 1484c590fb32Scz4e // delay dcache refill for 1 cycle for better timing 1485c590fb32Scz4e AddPipelineReg(uncacheReq, uncache.io.lsq.req, false.B) 1486c590fb32Scz4e AddPipelineReg(uncache.io.lsq.resp, uncacheResp, false.B) 1487c590fb32Scz4e 1488c590fb32Scz4e //lsq.io.refill := delayedDcacheRefill 1489c590fb32Scz4e lsq.io.release := dcache.io.lsu.release 1490c590fb32Scz4e lsq.io.lqCancelCnt <> io.mem_to_ooo.lqCancelCnt 1491c590fb32Scz4e lsq.io.sqCancelCnt <> io.mem_to_ooo.sqCancelCnt 1492c590fb32Scz4e lsq.io.lqDeq <> io.mem_to_ooo.lqDeq 1493c590fb32Scz4e lsq.io.sqDeq <> io.mem_to_ooo.sqDeq 1494c590fb32Scz4e // Todo: assign these 1495c590fb32Scz4e io.mem_to_ooo.sqDeqPtr := lsq.io.sqDeqPtr 1496c590fb32Scz4e io.mem_to_ooo.lqDeqPtr := lsq.io.lqDeqPtr 1497c590fb32Scz4e lsq.io.tl_d_channel <> dcache.io.lsu.tl_d_channel 1498c590fb32Scz4e 1499c590fb32Scz4e // LSQ to store buffer 1500c590fb32Scz4e lsq.io.sbuffer <> sbuffer.io.in 1501c590fb32Scz4e sbuffer.io.in(0).valid := lsq.io.sbuffer(0).valid || vSegmentUnit.io.sbuffer.valid 1502c590fb32Scz4e sbuffer.io.in(0).bits := Mux1H(Seq( 1503c590fb32Scz4e vSegmentUnit.io.sbuffer.valid -> vSegmentUnit.io.sbuffer.bits, 1504c590fb32Scz4e lsq.io.sbuffer(0).valid -> lsq.io.sbuffer(0).bits 1505c590fb32Scz4e )) 1506c590fb32Scz4e vSegmentUnit.io.sbuffer.ready := sbuffer.io.in(0).ready 1507c590fb32Scz4e lsq.io.sqEmpty <> sbuffer.io.sqempty 1508c590fb32Scz4e dcache.io.force_write := lsq.io.force_write 1509c590fb32Scz4e 1510c590fb32Scz4e // Initialize when unenabled difftest. 1511c590fb32Scz4e sbuffer.io.vecDifftestInfo := DontCare 1512c590fb32Scz4e lsq.io.sbufferVecDifftestInfo := DontCare 1513c590fb32Scz4e vSegmentUnit.io.vecDifftestInfo := DontCare 1514c590fb32Scz4e if (env.EnableDifftest) { 1515c590fb32Scz4e sbuffer.io.vecDifftestInfo .zipWithIndex.map{ case (sbufferPort, index) => 1516c590fb32Scz4e if (index == 0) { 1517c590fb32Scz4e val vSegmentDifftestValid = vSegmentUnit.io.vecDifftestInfo.valid 1518c590fb32Scz4e sbufferPort.valid := Mux(vSegmentDifftestValid, vSegmentUnit.io.vecDifftestInfo.valid, lsq.io.sbufferVecDifftestInfo(0).valid) 1519c590fb32Scz4e sbufferPort.bits := Mux(vSegmentDifftestValid, vSegmentUnit.io.vecDifftestInfo.bits, lsq.io.sbufferVecDifftestInfo(0).bits) 1520c590fb32Scz4e 1521c590fb32Scz4e vSegmentUnit.io.vecDifftestInfo.ready := sbufferPort.ready 1522c590fb32Scz4e lsq.io.sbufferVecDifftestInfo(0).ready := sbufferPort.ready 1523c590fb32Scz4e } else { 1524c590fb32Scz4e sbufferPort <> lsq.io.sbufferVecDifftestInfo(index) 1525c590fb32Scz4e } 1526c590fb32Scz4e } 1527c590fb32Scz4e } 1528c590fb32Scz4e 1529c590fb32Scz4e // lsq.io.vecStoreRetire <> vsFlowQueue.io.sqRelease 1530c590fb32Scz4e // lsq.io.vecWriteback.valid := vlWrapper.io.uopWriteback.fire && 1531c590fb32Scz4e // vlWrapper.io.uopWriteback.bits.uop.vpu.lastUop 1532c590fb32Scz4e // lsq.io.vecWriteback.bits := vlWrapper.io.uopWriteback.bits 1533c590fb32Scz4e 1534c590fb32Scz4e // vector 1535c590fb32Scz4e val vLoadCanAccept = (0 until VlduCnt).map(i => 1536c590fb32Scz4e vlSplit(i).io.in.ready && VlduType.isVecLd(io.ooo_to_mem.issueVldu(i).bits.uop.fuOpType) 1537c590fb32Scz4e ) 1538c590fb32Scz4e val vStoreCanAccept = (0 until VstuCnt).map(i => 1539c590fb32Scz4e vsSplit(i).io.in.ready && VstuType.isVecSt(io.ooo_to_mem.issueVldu(i).bits.uop.fuOpType) 1540c590fb32Scz4e ) 1541c590fb32Scz4e val isSegment = io.ooo_to_mem.issueVldu.head.valid && isVsegls(io.ooo_to_mem.issueVldu.head.bits.uop.fuType) 1542c590fb32Scz4e val isFixVlUop = io.ooo_to_mem.issueVldu.map{x => 1543c590fb32Scz4e x.bits.uop.vpu.isVleff && x.bits.uop.vpu.lastUop && x.valid 1544c590fb32Scz4e } 1545c590fb32Scz4e 1546c590fb32Scz4e // init port 1547c590fb32Scz4e /** 1548c590fb32Scz4e * TODO: splited vsMergebuffer maybe remove, if one RS can accept two feedback, or don't need RS replay uop 1549c590fb32Scz4e * for now: 1550c590fb32Scz4e * RS0 -> VsSplit0 -> stu0 -> vsMergebuffer0 -> feedback -> RS0 1551c590fb32Scz4e * RS1 -> VsSplit1 -> stu1 -> vsMergebuffer1 -> feedback -> RS1 1552c590fb32Scz4e * 1553c590fb32Scz4e * vector load don't need feedback 1554c590fb32Scz4e * 1555c590fb32Scz4e * RS0 -> VlSplit0 -> ldu0 -> | 1556c590fb32Scz4e * RS1 -> VlSplit1 -> ldu1 -> | -> vlMergebuffer 1557c590fb32Scz4e * replayIO -> ldu3 -> | 1558c590fb32Scz4e * */ 1559c590fb32Scz4e (0 until VstuCnt).foreach{i => 1560c590fb32Scz4e vsMergeBuffer(i).io.fromPipeline := DontCare 1561c590fb32Scz4e vsMergeBuffer(i).io.fromSplit := DontCare 1562c590fb32Scz4e 1563c590fb32Scz4e vsMergeBuffer(i).io.fromMisalignBuffer.get.flush := storeMisalignBuffer.io.toVecStoreMergeBuffer(i).flush 1564c590fb32Scz4e vsMergeBuffer(i).io.fromMisalignBuffer.get.mbIndex := storeMisalignBuffer.io.toVecStoreMergeBuffer(i).mbIndex 1565c590fb32Scz4e } 1566c590fb32Scz4e 1567c590fb32Scz4e (0 until VstuCnt).foreach{i => 1568c590fb32Scz4e vsSplit(i).io.redirect <> redirect 1569c590fb32Scz4e vsSplit(i).io.in <> io.ooo_to_mem.issueVldu(i) 1570c590fb32Scz4e vsSplit(i).io.in.valid := io.ooo_to_mem.issueVldu(i).valid && 1571c590fb32Scz4e vStoreCanAccept(i) && !isSegment 1572c590fb32Scz4e vsSplit(i).io.toMergeBuffer <> vsMergeBuffer(i).io.fromSplit.head 1573c590fb32Scz4e NewPipelineConnect( 1574c590fb32Scz4e vsSplit(i).io.out, storeUnits(i).io.vecstin, storeUnits(i).io.vecstin.fire, 1575c590fb32Scz4e Mux(vsSplit(i).io.out.fire, vsSplit(i).io.out.bits.uop.robIdx.needFlush(io.redirect), storeUnits(i).io.vecstin.bits.uop.robIdx.needFlush(io.redirect)), 1576c590fb32Scz4e Option("VsSplitConnectStu") 1577c590fb32Scz4e ) 1578c590fb32Scz4e vsSplit(i).io.vstd.get := DontCare // Todo: Discuss how to pass vector store data 1579c590fb32Scz4e 1580c590fb32Scz4e vsSplit(i).io.vstdMisalign.get.storeMisalignBufferEmpty := !storeMisalignBuffer.io.full 1581c590fb32Scz4e vsSplit(i).io.vstdMisalign.get.storePipeEmpty := !storeUnits(i).io.s0_s1_valid 1582c590fb32Scz4e 1583c590fb32Scz4e } 1584c590fb32Scz4e (0 until VlduCnt).foreach{i => 1585c590fb32Scz4e vlSplit(i).io.redirect <> redirect 1586c590fb32Scz4e vlSplit(i).io.in <> io.ooo_to_mem.issueVldu(i) 1587c590fb32Scz4e vlSplit(i).io.in.valid := io.ooo_to_mem.issueVldu(i).valid && 1588c590fb32Scz4e vLoadCanAccept(i) && !isSegment && !isFixVlUop(i) 1589c590fb32Scz4e vlSplit(i).io.toMergeBuffer <> vlMergeBuffer.io.fromSplit(i) 1590c590fb32Scz4e vlSplit(i).io.threshold.get.valid := vlMergeBuffer.io.toSplit.get.threshold 1591c590fb32Scz4e vlSplit(i).io.threshold.get.bits := lsq.io.lqDeqPtr 1592c590fb32Scz4e NewPipelineConnect( 1593c590fb32Scz4e vlSplit(i).io.out, loadUnits(i).io.vecldin, loadUnits(i).io.vecldin.fire, 1594c590fb32Scz4e Mux(vlSplit(i).io.out.fire, vlSplit(i).io.out.bits.uop.robIdx.needFlush(io.redirect), loadUnits(i).io.vecldin.bits.uop.robIdx.needFlush(io.redirect)), 1595c590fb32Scz4e Option("VlSplitConnectLdu") 1596c590fb32Scz4e ) 1597c590fb32Scz4e 1598c590fb32Scz4e //Subsequent instrction will be blocked 1599c590fb32Scz4e vfofBuffer.io.in(i).valid := io.ooo_to_mem.issueVldu(i).valid 1600c590fb32Scz4e vfofBuffer.io.in(i).bits := io.ooo_to_mem.issueVldu(i).bits 1601c590fb32Scz4e } 1602c590fb32Scz4e (0 until LduCnt).foreach{i=> 1603c590fb32Scz4e loadUnits(i).io.vecldout.ready := vlMergeBuffer.io.fromPipeline(i).ready 1604c590fb32Scz4e loadMisalignBuffer.io.vecWriteBack.ready := true.B 1605c590fb32Scz4e 1606c590fb32Scz4e if (i == MisalignWBPort) { 1607c590fb32Scz4e when(loadUnits(i).io.vecldout.valid) { 1608c590fb32Scz4e vlMergeBuffer.io.fromPipeline(i).valid := loadUnits(i).io.vecldout.valid 1609c590fb32Scz4e vlMergeBuffer.io.fromPipeline(i).bits := loadUnits(i).io.vecldout.bits 1610c590fb32Scz4e } .otherwise { 1611c590fb32Scz4e vlMergeBuffer.io.fromPipeline(i).valid := loadMisalignBuffer.io.vecWriteBack.valid 1612c590fb32Scz4e vlMergeBuffer.io.fromPipeline(i).bits := loadMisalignBuffer.io.vecWriteBack.bits 1613c590fb32Scz4e } 1614c590fb32Scz4e } else { 1615c590fb32Scz4e vlMergeBuffer.io.fromPipeline(i).valid := loadUnits(i).io.vecldout.valid 1616c590fb32Scz4e vlMergeBuffer.io.fromPipeline(i).bits := loadUnits(i).io.vecldout.bits 1617c590fb32Scz4e } 1618c590fb32Scz4e } 1619c590fb32Scz4e 1620c590fb32Scz4e (0 until StaCnt).foreach{i=> 1621c590fb32Scz4e if(i < VstuCnt){ 1622c590fb32Scz4e storeUnits(i).io.vecstout.ready := true.B 1623c590fb32Scz4e storeMisalignBuffer.io.vecWriteBack(i).ready := vsMergeBuffer(i).io.fromPipeline.head.ready 1624c590fb32Scz4e 1625c590fb32Scz4e when(storeUnits(i).io.vecstout.valid) { 1626c590fb32Scz4e vsMergeBuffer(i).io.fromPipeline.head.valid := storeUnits(i).io.vecstout.valid 1627c590fb32Scz4e vsMergeBuffer(i).io.fromPipeline.head.bits := storeUnits(i).io.vecstout.bits 1628c590fb32Scz4e } .otherwise { 1629c590fb32Scz4e vsMergeBuffer(i).io.fromPipeline.head.valid := storeMisalignBuffer.io.vecWriteBack(i).valid 1630c590fb32Scz4e vsMergeBuffer(i).io.fromPipeline.head.bits := storeMisalignBuffer.io.vecWriteBack(i).bits 1631c590fb32Scz4e } 1632c590fb32Scz4e } 1633c590fb32Scz4e } 1634c590fb32Scz4e 1635c590fb32Scz4e (0 until VlduCnt).foreach{i=> 1636c590fb32Scz4e io.ooo_to_mem.issueVldu(i).ready := vLoadCanAccept(i) || vStoreCanAccept(i) 1637c590fb32Scz4e } 1638c590fb32Scz4e 1639c590fb32Scz4e vlMergeBuffer.io.redirect <> redirect 1640c590fb32Scz4e vsMergeBuffer.map(_.io.redirect <> redirect) 1641c590fb32Scz4e (0 until VlduCnt).foreach{i=> 1642c590fb32Scz4e vlMergeBuffer.io.toLsq(i) <> lsq.io.ldvecFeedback(i) 1643c590fb32Scz4e } 1644c590fb32Scz4e (0 until VstuCnt).foreach{i=> 1645c590fb32Scz4e vsMergeBuffer(i).io.toLsq.head <> lsq.io.stvecFeedback(i) 1646c590fb32Scz4e } 1647c590fb32Scz4e 1648c590fb32Scz4e (0 until VlduCnt).foreach{i=> 1649c590fb32Scz4e // send to RS 1650c590fb32Scz4e vlMergeBuffer.io.feedback(i) <> io.mem_to_ooo.vlduIqFeedback(i).feedbackSlow 1651c590fb32Scz4e io.mem_to_ooo.vlduIqFeedback(i).feedbackFast := DontCare 1652c590fb32Scz4e } 1653c590fb32Scz4e (0 until VstuCnt).foreach{i => 1654c590fb32Scz4e // send to RS 1655c590fb32Scz4e if (i == 0){ 1656c590fb32Scz4e io.mem_to_ooo.vstuIqFeedback(i).feedbackSlow.valid := vsMergeBuffer(i).io.feedback.head.valid || vSegmentUnit.io.feedback.valid 1657c590fb32Scz4e io.mem_to_ooo.vstuIqFeedback(i).feedbackSlow.bits := Mux1H(Seq( 1658c590fb32Scz4e vSegmentUnit.io.feedback.valid -> vSegmentUnit.io.feedback.bits, 1659c590fb32Scz4e vsMergeBuffer(i).io.feedback.head.valid -> vsMergeBuffer(i).io.feedback.head.bits 1660c590fb32Scz4e )) 1661c590fb32Scz4e io.mem_to_ooo.vstuIqFeedback(i).feedbackFast := DontCare 1662c590fb32Scz4e } else { 1663c590fb32Scz4e vsMergeBuffer(i).io.feedback.head <> io.mem_to_ooo.vstuIqFeedback(i).feedbackSlow 1664c590fb32Scz4e io.mem_to_ooo.vstuIqFeedback(i).feedbackFast := DontCare 1665c590fb32Scz4e } 1666c590fb32Scz4e } 1667c590fb32Scz4e 1668c590fb32Scz4e (0 until VlduCnt).foreach{i=> 1669c590fb32Scz4e if (i == 0){ // for segmentUnit, segmentUnit use port0 writeback 1670c590fb32Scz4e io.mem_to_ooo.writebackVldu(i).valid := vlMergeBuffer.io.uopWriteback(i).valid || vsMergeBuffer(i).io.uopWriteback.head.valid || vSegmentUnit.io.uopwriteback.valid 1671c590fb32Scz4e io.mem_to_ooo.writebackVldu(i).bits := PriorityMux(Seq( 1672c590fb32Scz4e vSegmentUnit.io.uopwriteback.valid -> vSegmentUnit.io.uopwriteback.bits, 1673c590fb32Scz4e vlMergeBuffer.io.uopWriteback(i).valid -> vlMergeBuffer.io.uopWriteback(i).bits, 1674c590fb32Scz4e vsMergeBuffer(i).io.uopWriteback.head.valid -> vsMergeBuffer(i).io.uopWriteback.head.bits, 1675c590fb32Scz4e )) 1676c590fb32Scz4e vlMergeBuffer.io.uopWriteback(i).ready := io.mem_to_ooo.writebackVldu(i).ready && !vSegmentUnit.io.uopwriteback.valid 1677c590fb32Scz4e vsMergeBuffer(i).io.uopWriteback.head.ready := io.mem_to_ooo.writebackVldu(i).ready && !vlMergeBuffer.io.uopWriteback(i).valid && !vSegmentUnit.io.uopwriteback.valid 1678c590fb32Scz4e vSegmentUnit.io.uopwriteback.ready := io.mem_to_ooo.writebackVldu(i).ready 1679c590fb32Scz4e } else if (i == 1) { 1680c590fb32Scz4e io.mem_to_ooo.writebackVldu(i).valid := vlMergeBuffer.io.uopWriteback(i).valid || vsMergeBuffer(i).io.uopWriteback.head.valid || vfofBuffer.io.uopWriteback.valid 1681c590fb32Scz4e io.mem_to_ooo.writebackVldu(i).bits := PriorityMux(Seq( 1682c590fb32Scz4e vfofBuffer.io.uopWriteback.valid -> vfofBuffer.io.uopWriteback.bits, 1683c590fb32Scz4e vlMergeBuffer.io.uopWriteback(i).valid -> vlMergeBuffer.io.uopWriteback(i).bits, 1684c590fb32Scz4e vsMergeBuffer(i).io.uopWriteback.head.valid -> vsMergeBuffer(i).io.uopWriteback.head.bits, 1685c590fb32Scz4e )) 1686c590fb32Scz4e vlMergeBuffer.io.uopWriteback(i).ready := io.mem_to_ooo.writebackVldu(i).ready && !vfofBuffer.io.uopWriteback.valid 1687c590fb32Scz4e vsMergeBuffer(i).io.uopWriteback.head.ready := io.mem_to_ooo.writebackVldu(i).ready && !vlMergeBuffer.io.uopWriteback(i).valid && !vfofBuffer.io.uopWriteback.valid 1688c590fb32Scz4e vfofBuffer.io.uopWriteback.ready := io.mem_to_ooo.writebackVldu(i).ready 1689c590fb32Scz4e } else { 1690c590fb32Scz4e io.mem_to_ooo.writebackVldu(i).valid := vlMergeBuffer.io.uopWriteback(i).valid || vsMergeBuffer(i).io.uopWriteback.head.valid 1691c590fb32Scz4e io.mem_to_ooo.writebackVldu(i).bits := PriorityMux(Seq( 1692c590fb32Scz4e vlMergeBuffer.io.uopWriteback(i).valid -> vlMergeBuffer.io.uopWriteback(i).bits, 1693c590fb32Scz4e vsMergeBuffer(i).io.uopWriteback.head.valid -> vsMergeBuffer(i).io.uopWriteback.head.bits, 1694c590fb32Scz4e )) 1695c590fb32Scz4e vlMergeBuffer.io.uopWriteback(i).ready := io.mem_to_ooo.writebackVldu(i).ready 1696c590fb32Scz4e vsMergeBuffer(i).io.uopWriteback.head.ready := io.mem_to_ooo.writebackVldu(i).ready && !vlMergeBuffer.io.uopWriteback(i).valid 1697c590fb32Scz4e } 1698c590fb32Scz4e 1699c590fb32Scz4e vfofBuffer.io.mergeUopWriteback(i).valid := vlMergeBuffer.io.uopWriteback(i).valid 1700c590fb32Scz4e vfofBuffer.io.mergeUopWriteback(i).bits := vlMergeBuffer.io.uopWriteback(i).bits 1701c590fb32Scz4e } 1702c590fb32Scz4e 1703c590fb32Scz4e 1704c590fb32Scz4e vfofBuffer.io.redirect <> redirect 1705c590fb32Scz4e 1706c590fb32Scz4e // Sbuffer 1707c590fb32Scz4e sbuffer.io.csrCtrl <> csrCtrl 1708c590fb32Scz4e sbuffer.io.dcache <> dcache.io.lsu.store 1709c590fb32Scz4e sbuffer.io.memSetPattenDetected := dcache.io.memSetPattenDetected 1710c590fb32Scz4e sbuffer.io.force_write <> lsq.io.force_write 1711c590fb32Scz4e // flush sbuffer 1712c590fb32Scz4e val cmoFlush = lsq.io.flushSbuffer.valid 1713c590fb32Scz4e val fenceFlush = io.ooo_to_mem.flushSb 1714c590fb32Scz4e val atomicsFlush = atomicsUnit.io.flush_sbuffer.valid || vSegmentUnit.io.flush_sbuffer.valid 1715c590fb32Scz4e val stIsEmpty = sbuffer.io.flush.empty && uncache.io.flush.empty 1716c590fb32Scz4e io.mem_to_ooo.sbIsEmpty := RegNext(stIsEmpty) 1717c590fb32Scz4e 1718c590fb32Scz4e // if both of them tries to flush sbuffer at the same time 1719c590fb32Scz4e // something must have gone wrong 1720c590fb32Scz4e assert(!(fenceFlush && atomicsFlush && cmoFlush)) 1721c590fb32Scz4e sbuffer.io.flush.valid := RegNext(fenceFlush || atomicsFlush || cmoFlush) 1722c590fb32Scz4e uncache.io.flush.valid := sbuffer.io.flush.valid 1723c590fb32Scz4e 1724c590fb32Scz4e // AtomicsUnit: AtomicsUnit will override other control signials, 1725c590fb32Scz4e // as atomics insts (LR/SC/AMO) will block the pipeline 1726c590fb32Scz4e val s_normal +: s_atomics = Enum(StaCnt + HyuCnt + 1) 1727c590fb32Scz4e val state = RegInit(s_normal) 1728c590fb32Scz4e 1729c590fb32Scz4e val st_atomics = Seq.tabulate(StaCnt)(i => 1730c590fb32Scz4e io.ooo_to_mem.issueSta(i).valid && FuType.storeIsAMO((io.ooo_to_mem.issueSta(i).bits.uop.fuType)) 1731c590fb32Scz4e ) ++ Seq.tabulate(HyuCnt)(i => 1732c590fb32Scz4e io.ooo_to_mem.issueHya(i).valid && FuType.storeIsAMO((io.ooo_to_mem.issueHya(i).bits.uop.fuType)) 1733c590fb32Scz4e ) 1734c590fb32Scz4e 1735c590fb32Scz4e for (i <- 0 until StaCnt) when(st_atomics(i)) { 1736c590fb32Scz4e io.ooo_to_mem.issueSta(i).ready := atomicsUnit.io.in.ready 1737c590fb32Scz4e storeUnits(i).io.stin.valid := false.B 1738c590fb32Scz4e 1739c590fb32Scz4e state := s_atomics(i) 1740c590fb32Scz4e } 1741c590fb32Scz4e for (i <- 0 until HyuCnt) when(st_atomics(StaCnt + i)) { 1742c590fb32Scz4e io.ooo_to_mem.issueHya(i).ready := atomicsUnit.io.in.ready 1743c590fb32Scz4e hybridUnits(i).io.lsin.valid := false.B 1744c590fb32Scz4e 1745c590fb32Scz4e state := s_atomics(StaCnt + i) 1746c590fb32Scz4e assert(!st_atomics.zipWithIndex.filterNot(_._2 == StaCnt + i).unzip._1.reduce(_ || _)) 1747c590fb32Scz4e } 1748c590fb32Scz4e when (atomicsUnit.io.out.valid) { 1749c590fb32Scz4e state := s_normal 1750c590fb32Scz4e } 1751c590fb32Scz4e 1752c590fb32Scz4e atomicsUnit.io.in.valid := st_atomics.reduce(_ || _) 1753c590fb32Scz4e atomicsUnit.io.in.bits := Mux1H(Seq.tabulate(StaCnt)(i => 1754c590fb32Scz4e st_atomics(i) -> io.ooo_to_mem.issueSta(i).bits) ++ 1755c590fb32Scz4e Seq.tabulate(HyuCnt)(i => st_atomics(StaCnt+i) -> io.ooo_to_mem.issueHya(i).bits)) 1756c590fb32Scz4e atomicsUnit.io.storeDataIn.zipWithIndex.foreach { case (stdin, i) => 1757c590fb32Scz4e stdin.valid := st_data_atomics(i) 1758c590fb32Scz4e stdin.bits := stData(i).bits 1759c590fb32Scz4e } 1760c590fb32Scz4e atomicsUnit.io.redirect <> redirect 1761c590fb32Scz4e 1762c590fb32Scz4e // TODO: complete amo's pmp support 1763c590fb32Scz4e val amoTlb = dtlb_ld(0).requestor(0) 1764c590fb32Scz4e atomicsUnit.io.dtlb.resp.valid := false.B 1765c590fb32Scz4e atomicsUnit.io.dtlb.resp.bits := DontCare 1766c590fb32Scz4e atomicsUnit.io.dtlb.req.ready := amoTlb.req.ready 1767c590fb32Scz4e atomicsUnit.io.pmpResp := pmp_check(0).resp 1768c590fb32Scz4e 1769c590fb32Scz4e atomicsUnit.io.dcache <> dcache.io.lsu.atomics 1770c590fb32Scz4e atomicsUnit.io.flush_sbuffer.empty := stIsEmpty 1771c590fb32Scz4e 1772c590fb32Scz4e atomicsUnit.io.csrCtrl := csrCtrl 1773c590fb32Scz4e 1774c590fb32Scz4e // for atomicsUnit, it uses loadUnit(0)'s TLB port 1775c590fb32Scz4e 1776c590fb32Scz4e when (state =/= s_normal) { 1777c590fb32Scz4e // use store wb port instead of load 1778c590fb32Scz4e loadUnits(0).io.ldout.ready := false.B 1779c590fb32Scz4e // use load_0's TLB 1780c590fb32Scz4e atomicsUnit.io.dtlb <> amoTlb 1781c590fb32Scz4e 1782c590fb32Scz4e // hw prefetch should be disabled while executing atomic insts 1783c590fb32Scz4e loadUnits.map(i => i.io.prefetch_req.valid := false.B) 1784c590fb32Scz4e 1785c590fb32Scz4e // make sure there's no in-flight uops in load unit 1786c590fb32Scz4e assert(!loadUnits(0).io.ldout.valid) 1787c590fb32Scz4e } 1788c590fb32Scz4e 1789c590fb32Scz4e lsq.io.flushSbuffer.empty := sbuffer.io.sbempty 1790c590fb32Scz4e 1791c590fb32Scz4e for (i <- 0 until StaCnt) { 1792c590fb32Scz4e when (state === s_atomics(i)) { 1793c590fb32Scz4e io.mem_to_ooo.staIqFeedback(i).feedbackSlow := atomicsUnit.io.feedbackSlow 1794c590fb32Scz4e assert(!storeUnits(i).io.feedback_slow.valid) 1795c590fb32Scz4e } 1796c590fb32Scz4e } 1797c590fb32Scz4e for (i <- 0 until HyuCnt) { 1798c590fb32Scz4e when (state === s_atomics(StaCnt + i)) { 1799c590fb32Scz4e io.mem_to_ooo.hyuIqFeedback(i).feedbackSlow := atomicsUnit.io.feedbackSlow 1800c590fb32Scz4e assert(!hybridUnits(i).io.feedback_slow.valid) 1801c590fb32Scz4e } 1802c590fb32Scz4e } 1803c590fb32Scz4e 1804c590fb32Scz4e lsq.io.exceptionAddr.isStore := io.ooo_to_mem.isStoreException 1805c590fb32Scz4e // Exception address is used several cycles after flush. 1806c590fb32Scz4e // We delay it by 10 cycles to ensure its flush safety. 1807c590fb32Scz4e val atomicsException = RegInit(false.B) 1808c590fb32Scz4e when (DelayN(redirect.valid, 10) && atomicsException) { 1809c590fb32Scz4e atomicsException := false.B 1810c590fb32Scz4e }.elsewhen (atomicsUnit.io.exceptionInfo.valid) { 1811c590fb32Scz4e atomicsException := true.B 1812c590fb32Scz4e } 1813c590fb32Scz4e 1814c590fb32Scz4e val misalignBufExceptionOverwrite = loadMisalignBuffer.io.overwriteExpBuf.valid || storeMisalignBuffer.io.overwriteExpBuf.valid 1815c590fb32Scz4e val misalignBufExceptionVaddr = Mux(loadMisalignBuffer.io.overwriteExpBuf.valid, 1816c590fb32Scz4e loadMisalignBuffer.io.overwriteExpBuf.vaddr, 1817c590fb32Scz4e storeMisalignBuffer.io.overwriteExpBuf.vaddr 1818c590fb32Scz4e ) 1819c590fb32Scz4e val misalignBufExceptionIsHyper = Mux(loadMisalignBuffer.io.overwriteExpBuf.valid, 1820c590fb32Scz4e loadMisalignBuffer.io.overwriteExpBuf.isHyper, 1821c590fb32Scz4e storeMisalignBuffer.io.overwriteExpBuf.isHyper 1822c590fb32Scz4e ) 1823c590fb32Scz4e val misalignBufExceptionGpaddr = Mux(loadMisalignBuffer.io.overwriteExpBuf.valid, 1824c590fb32Scz4e loadMisalignBuffer.io.overwriteExpBuf.gpaddr, 1825c590fb32Scz4e storeMisalignBuffer.io.overwriteExpBuf.gpaddr 1826c590fb32Scz4e ) 1827c590fb32Scz4e val misalignBufExceptionIsForVSnonLeafPTE = Mux(loadMisalignBuffer.io.overwriteExpBuf.valid, 1828c590fb32Scz4e loadMisalignBuffer.io.overwriteExpBuf.isForVSnonLeafPTE, 1829c590fb32Scz4e storeMisalignBuffer.io.overwriteExpBuf.isForVSnonLeafPTE 1830c590fb32Scz4e ) 1831c590fb32Scz4e 1832c590fb32Scz4e val vSegmentException = RegInit(false.B) 1833c590fb32Scz4e when (DelayN(redirect.valid, 10) && vSegmentException) { 1834c590fb32Scz4e vSegmentException := false.B 1835c590fb32Scz4e }.elsewhen (vSegmentUnit.io.exceptionInfo.valid) { 1836c590fb32Scz4e vSegmentException := true.B 1837c590fb32Scz4e } 1838c590fb32Scz4e val atomicsExceptionAddress = RegEnable(atomicsUnit.io.exceptionInfo.bits.vaddr, atomicsUnit.io.exceptionInfo.valid) 1839c590fb32Scz4e val vSegmentExceptionVstart = RegEnable(vSegmentUnit.io.exceptionInfo.bits.vstart, vSegmentUnit.io.exceptionInfo.valid) 1840c590fb32Scz4e val vSegmentExceptionVl = RegEnable(vSegmentUnit.io.exceptionInfo.bits.vl, vSegmentUnit.io.exceptionInfo.valid) 1841c590fb32Scz4e val vSegmentExceptionAddress = RegEnable(vSegmentUnit.io.exceptionInfo.bits.vaddr, vSegmentUnit.io.exceptionInfo.valid) 1842c590fb32Scz4e val atomicsExceptionGPAddress = RegEnable(atomicsUnit.io.exceptionInfo.bits.gpaddr, atomicsUnit.io.exceptionInfo.valid) 1843c590fb32Scz4e val vSegmentExceptionGPAddress = RegEnable(vSegmentUnit.io.exceptionInfo.bits.gpaddr, vSegmentUnit.io.exceptionInfo.valid) 1844c590fb32Scz4e val atomicsExceptionIsForVSnonLeafPTE = RegEnable(atomicsUnit.io.exceptionInfo.bits.isForVSnonLeafPTE, atomicsUnit.io.exceptionInfo.valid) 1845c590fb32Scz4e val vSegmentExceptionIsForVSnonLeafPTE = RegEnable(vSegmentUnit.io.exceptionInfo.bits.isForVSnonLeafPTE, vSegmentUnit.io.exceptionInfo.valid) 1846c590fb32Scz4e 1847c590fb32Scz4e val exceptionVaddr = Mux( 1848c590fb32Scz4e atomicsException, 1849c590fb32Scz4e atomicsExceptionAddress, 1850c590fb32Scz4e Mux(misalignBufExceptionOverwrite, 1851c590fb32Scz4e misalignBufExceptionVaddr, 1852c590fb32Scz4e Mux(vSegmentException, 1853c590fb32Scz4e vSegmentExceptionAddress, 1854c590fb32Scz4e lsq.io.exceptionAddr.vaddr 1855c590fb32Scz4e ) 1856c590fb32Scz4e ) 1857c590fb32Scz4e ) 1858c590fb32Scz4e // whether vaddr need ext or is hyper inst: 1859c590fb32Scz4e // VaNeedExt: atomicsException -> false; misalignBufExceptionOverwrite -> true; vSegmentException -> false 1860c590fb32Scz4e // IsHyper: atomicsException -> false; vSegmentException -> false 1861c590fb32Scz4e val exceptionVaNeedExt = !atomicsException && 1862c590fb32Scz4e (misalignBufExceptionOverwrite || 1863c590fb32Scz4e (!vSegmentException && lsq.io.exceptionAddr.vaNeedExt)) 1864c590fb32Scz4e val exceptionIsHyper = !atomicsException && 1865c590fb32Scz4e (misalignBufExceptionOverwrite && misalignBufExceptionIsHyper || 1866c590fb32Scz4e (!vSegmentException && lsq.io.exceptionAddr.isHyper && !misalignBufExceptionOverwrite)) 1867c590fb32Scz4e 1868168f1995SXu, Zefan def GenExceptionVa( 1869168f1995SXu, Zefan mode: UInt, isVirt: Bool, vaNeedExt: Bool, 1870c590fb32Scz4e satp: TlbSatpBundle, vsatp: TlbSatpBundle, hgatp: TlbHgatpBundle, 1871168f1995SXu, Zefan vaddr: UInt 1872168f1995SXu, Zefan ) = { 1873c590fb32Scz4e require(VAddrBits >= 50) 1874c590fb32Scz4e 1875168f1995SXu, Zefan val satpNone = satp.mode === 0.U 1876168f1995SXu, Zefan val satpSv39 = satp.mode === 8.U 1877168f1995SXu, Zefan val satpSv48 = satp.mode === 9.U 1878c590fb32Scz4e 1879168f1995SXu, Zefan val vsatpNone = vsatp.mode === 0.U 1880168f1995SXu, Zefan val vsatpSv39 = vsatp.mode === 8.U 1881168f1995SXu, Zefan val vsatpSv48 = vsatp.mode === 9.U 1882168f1995SXu, Zefan 1883168f1995SXu, Zefan val hgatpNone = hgatp.mode === 0.U 1884168f1995SXu, Zefan val hgatpSv39x4 = hgatp.mode === 8.U 1885168f1995SXu, Zefan val hgatpSv48x4 = hgatp.mode === 9.U 1886168f1995SXu, Zefan 1887168f1995SXu, Zefan // For !isVirt, mode check is necessary, as we don't want virtual memory in M-mode. 1888168f1995SXu, Zefan // For isVirt, mode check is unnecessary, as virt won't be 1 in M-mode. 1889168f1995SXu, Zefan // Also, isVirt includes Hyper Insts, which don't care mode either. 1890168f1995SXu, Zefan 1891168f1995SXu, Zefan val useBareAddr = 1892168f1995SXu, Zefan (isVirt && vsatpNone && hgatpNone) || 1893168f1995SXu, Zefan (!isVirt && (mode === CSRConst.ModeM)) || 1894168f1995SXu, Zefan (!isVirt && (mode =/= CSRConst.ModeM) && satpNone) 1895168f1995SXu, Zefan val useSv39Addr = 1896168f1995SXu, Zefan (isVirt && vsatpSv39) || 1897168f1995SXu, Zefan (!isVirt && (mode =/= CSRConst.ModeM) && satpSv39) 1898168f1995SXu, Zefan val useSv48Addr = 1899168f1995SXu, Zefan (isVirt && vsatpSv48) || 1900168f1995SXu, Zefan (!isVirt && (mode =/= CSRConst.ModeM) && satpSv48) 1901168f1995SXu, Zefan val useSv39x4Addr = isVirt && vsatpNone && hgatpSv39x4 1902168f1995SXu, Zefan val useSv48x4Addr = isVirt && vsatpNone && hgatpSv48x4 1903c590fb32Scz4e 1904c590fb32Scz4e val bareAddr = ZeroExt(vaddr(PAddrBits - 1, 0), XLEN) 1905c590fb32Scz4e val sv39Addr = SignExt(vaddr.take(39), XLEN) 1906c590fb32Scz4e val sv39x4Addr = ZeroExt(vaddr.take(39 + 2), XLEN) 1907c590fb32Scz4e val sv48Addr = SignExt(vaddr.take(48), XLEN) 1908c590fb32Scz4e val sv48x4Addr = ZeroExt(vaddr.take(48 + 2), XLEN) 1909c590fb32Scz4e 1910c590fb32Scz4e val ExceptionVa = Wire(UInt(XLEN.W)) 1911c590fb32Scz4e when (vaNeedExt) { 1912c590fb32Scz4e ExceptionVa := Mux1H(Seq( 1913168f1995SXu, Zefan (useBareAddr) -> bareAddr, 1914168f1995SXu, Zefan (useSv39Addr) -> sv39Addr, 1915168f1995SXu, Zefan (useSv48Addr) -> sv48Addr, 1916168f1995SXu, Zefan (useSv39x4Addr) -> sv39x4Addr, 1917168f1995SXu, Zefan (useSv48x4Addr) -> sv48x4Addr, 1918c590fb32Scz4e )) 1919c590fb32Scz4e } .otherwise { 1920c590fb32Scz4e ExceptionVa := vaddr 1921c590fb32Scz4e } 1922c590fb32Scz4e 1923c590fb32Scz4e ExceptionVa 1924c590fb32Scz4e } 1925c590fb32Scz4e 1926c590fb32Scz4e io.mem_to_ooo.lsqio.vaddr := RegNext( 1927c590fb32Scz4e GenExceptionVa(tlbcsr.priv.dmode, tlbcsr.priv.virt || exceptionIsHyper, exceptionVaNeedExt, 1928c590fb32Scz4e tlbcsr.satp, tlbcsr.vsatp, tlbcsr.hgatp, exceptionVaddr) 1929c590fb32Scz4e ) 1930c590fb32Scz4e 1931c590fb32Scz4e // vsegment instruction is executed atomic, which mean atomicsException and vSegmentException should not raise at the same time. 1932c590fb32Scz4e XSError(atomicsException && vSegmentException, "atomicsException and vSegmentException raise at the same time!") 1933c590fb32Scz4e io.mem_to_ooo.lsqio.vstart := RegNext(Mux(vSegmentException, 1934c590fb32Scz4e vSegmentExceptionVstart, 1935c590fb32Scz4e lsq.io.exceptionAddr.vstart) 1936c590fb32Scz4e ) 1937c590fb32Scz4e io.mem_to_ooo.lsqio.vl := RegNext(Mux(vSegmentException, 1938c590fb32Scz4e vSegmentExceptionVl, 1939c590fb32Scz4e lsq.io.exceptionAddr.vl) 1940c590fb32Scz4e ) 1941c590fb32Scz4e 1942c590fb32Scz4e XSError(atomicsException && atomicsUnit.io.in.valid, "new instruction before exception triggers\n") 1943c590fb32Scz4e io.mem_to_ooo.lsqio.gpaddr := RegNext(Mux( 1944c590fb32Scz4e atomicsException, 1945c590fb32Scz4e atomicsExceptionGPAddress, 1946c590fb32Scz4e Mux(misalignBufExceptionOverwrite, 1947c590fb32Scz4e misalignBufExceptionGpaddr, 1948c590fb32Scz4e Mux(vSegmentException, 1949c590fb32Scz4e vSegmentExceptionGPAddress, 1950c590fb32Scz4e lsq.io.exceptionAddr.gpaddr 1951c590fb32Scz4e ) 1952c590fb32Scz4e ) 1953c590fb32Scz4e )) 1954c590fb32Scz4e io.mem_to_ooo.lsqio.isForVSnonLeafPTE := RegNext(Mux( 1955c590fb32Scz4e atomicsException, 1956c590fb32Scz4e atomicsExceptionIsForVSnonLeafPTE, 1957c590fb32Scz4e Mux(misalignBufExceptionOverwrite, 1958c590fb32Scz4e misalignBufExceptionIsForVSnonLeafPTE, 1959c590fb32Scz4e Mux(vSegmentException, 1960c590fb32Scz4e vSegmentExceptionIsForVSnonLeafPTE, 1961c590fb32Scz4e lsq.io.exceptionAddr.isForVSnonLeafPTE 1962c590fb32Scz4e ) 1963c590fb32Scz4e ) 1964c590fb32Scz4e )) 1965c590fb32Scz4e io.mem_to_ooo.topToBackendBypass match { case x => 1966c590fb32Scz4e x.hartId := io.hartId 1967c590fb32Scz4e x.l2FlushDone := RegNext(io.l2_flush_done) 1968c590fb32Scz4e x.externalInterrupt.msip := outer.clint_int_sink.in.head._1(0) 1969c590fb32Scz4e x.externalInterrupt.mtip := outer.clint_int_sink.in.head._1(1) 1970c590fb32Scz4e x.externalInterrupt.meip := outer.plic_int_sink.in.head._1(0) 1971c590fb32Scz4e x.externalInterrupt.seip := outer.plic_int_sink.in.last._1(0) 1972c590fb32Scz4e x.externalInterrupt.debug := outer.debug_int_sink.in.head._1(0) 197376cb49abScz4e x.externalInterrupt.nmi.nmi_31 := outer.nmi_int_sink.in.head._1(0) | outer.beu_local_int_sink.in.head._1(0) 1974c590fb32Scz4e x.externalInterrupt.nmi.nmi_43 := outer.nmi_int_sink.in.head._1(1) 1975c590fb32Scz4e x.msiInfo := DelayNWithValid(io.fromTopToBackend.msiInfo, 1) 1976c590fb32Scz4e x.clintTime := DelayNWithValid(io.fromTopToBackend.clintTime, 1) 1977c590fb32Scz4e } 1978c590fb32Scz4e 1979c590fb32Scz4e io.memInfo.sqFull := RegNext(lsq.io.sqFull) 1980c590fb32Scz4e io.memInfo.lqFull := RegNext(lsq.io.lqFull) 1981c590fb32Scz4e io.memInfo.dcacheMSHRFull := RegNext(dcache.io.mshrFull) 1982c590fb32Scz4e 1983c590fb32Scz4e io.inner_hartId := io.hartId 1984c590fb32Scz4e io.inner_reset_vector := RegNext(io.outer_reset_vector) 1985c590fb32Scz4e io.outer_cpu_halt := io.ooo_to_mem.backendToTopBypass.cpuHalted 1986c590fb32Scz4e io.outer_l2_flush_en := io.ooo_to_mem.csrCtrl.flush_l2_enable 1987c590fb32Scz4e io.outer_power_down_en := io.ooo_to_mem.csrCtrl.power_down_enable 1988c590fb32Scz4e io.outer_cpu_critical_error := io.ooo_to_mem.backendToTopBypass.cpuCriticalError 19898cfc24b2STang Haojin io.outer_msi_ack := io.ooo_to_mem.backendToTopBypass.msiAck 1990c590fb32Scz4e io.outer_beu_errors_icache := RegNext(io.inner_beu_errors_icache) 1991c590fb32Scz4e io.inner_hc_perfEvents <> RegNext(io.outer_hc_perfEvents) 1992c590fb32Scz4e 1993c590fb32Scz4e // vector segmentUnit 1994c590fb32Scz4e vSegmentUnit.io.in.bits <> io.ooo_to_mem.issueVldu.head.bits 1995c590fb32Scz4e vSegmentUnit.io.in.valid := isSegment && io.ooo_to_mem.issueVldu.head.valid// is segment instruction 1996c590fb32Scz4e vSegmentUnit.io.dtlb.resp.bits <> dtlb_reqs.take(LduCnt).head.resp.bits 1997c590fb32Scz4e vSegmentUnit.io.dtlb.resp.valid <> dtlb_reqs.take(LduCnt).head.resp.valid 1998c590fb32Scz4e vSegmentUnit.io.pmpResp <> pmp_check.head.resp 1999c590fb32Scz4e vSegmentUnit.io.flush_sbuffer.empty := stIsEmpty 2000c590fb32Scz4e vSegmentUnit.io.redirect <> redirect 2001c590fb32Scz4e vSegmentUnit.io.rdcache.resp.bits := dcache.io.lsu.load(0).resp.bits 2002c590fb32Scz4e vSegmentUnit.io.rdcache.resp.valid := dcache.io.lsu.load(0).resp.valid 2003c590fb32Scz4e vSegmentUnit.io.rdcache.s2_bank_conflict := dcache.io.lsu.load(0).s2_bank_conflict 2004c590fb32Scz4e // ------------------------- 2005c590fb32Scz4e // Vector Segment Triggers 2006c590fb32Scz4e // ------------------------- 2007c590fb32Scz4e vSegmentUnit.io.fromCsrTrigger.tdataVec := tdata 2008c590fb32Scz4e vSegmentUnit.io.fromCsrTrigger.tEnableVec := tEnable 2009c590fb32Scz4e vSegmentUnit.io.fromCsrTrigger.triggerCanRaiseBpExp := triggerCanRaiseBpExp 2010c590fb32Scz4e vSegmentUnit.io.fromCsrTrigger.debugMode := debugMode 2011c590fb32Scz4e 2012c590fb32Scz4e // reset tree of MemBlock 2013c590fb32Scz4e if (p(DebugOptionsKey).ResetGen) { 2014c590fb32Scz4e val leftResetTree = ResetGenNode( 2015c590fb32Scz4e Seq( 2016c590fb32Scz4e ModuleNode(ptw), 2017c590fb32Scz4e ModuleNode(ptw_to_l2_buffer), 2018c590fb32Scz4e ModuleNode(lsq), 2019c590fb32Scz4e ModuleNode(dtlb_st_tlb_st), 2020c590fb32Scz4e ModuleNode(dtlb_prefetch_tlb_prefetch), 2021c590fb32Scz4e ModuleNode(pmp) 2022c590fb32Scz4e ) 2023c590fb32Scz4e ++ pmp_checkers.map(ModuleNode(_)) 2024c590fb32Scz4e ++ (if (prefetcherOpt.isDefined) Seq(ModuleNode(prefetcherOpt.get)) else Nil) 2025c590fb32Scz4e ++ (if (l1PrefetcherOpt.isDefined) Seq(ModuleNode(l1PrefetcherOpt.get)) else Nil) 2026c590fb32Scz4e ) 2027c590fb32Scz4e val rightResetTree = ResetGenNode( 2028c590fb32Scz4e Seq( 2029c590fb32Scz4e ModuleNode(sbuffer), 2030c590fb32Scz4e ModuleNode(dtlb_ld_tlb_ld), 2031c590fb32Scz4e ModuleNode(dcache), 2032c590fb32Scz4e ModuleNode(l1d_to_l2_buffer), 2033c590fb32Scz4e CellNode(io.reset_backend) 2034c590fb32Scz4e ) 2035c590fb32Scz4e ) 2036602aa9f1Scz4e ResetGen(leftResetTree, reset, sim = false, io.sramTestBypass.fromL2Top.mbistReset) 2037602aa9f1Scz4e ResetGen(rightResetTree, reset, sim = false, io.sramTestBypass.fromL2Top.mbistReset) 2038c590fb32Scz4e } else { 2039c590fb32Scz4e io.reset_backend := DontCare 2040c590fb32Scz4e } 2041c590fb32Scz4e io.resetInFrontendBypass.toL2Top := io.resetInFrontendBypass.fromFrontend 2042c590fb32Scz4e // trace interface 2043c590fb32Scz4e val traceToL2Top = io.traceCoreInterfaceBypass.toL2Top 2044c590fb32Scz4e val traceFromBackend = io.traceCoreInterfaceBypass.fromBackend 2045c590fb32Scz4e traceFromBackend.fromEncoder := RegNext(traceToL2Top.fromEncoder) 2046c590fb32Scz4e traceToL2Top.toEncoder.trap := RegEnable( 2047c590fb32Scz4e traceFromBackend.toEncoder.trap, 2048c590fb32Scz4e traceFromBackend.toEncoder.groups(0).valid && Itype.isTrap(traceFromBackend.toEncoder.groups(0).bits.itype) 2049c590fb32Scz4e ) 2050c590fb32Scz4e traceToL2Top.toEncoder.priv := RegEnable( 2051c590fb32Scz4e traceFromBackend.toEncoder.priv, 2052c590fb32Scz4e traceFromBackend.toEncoder.groups(0).valid 2053c590fb32Scz4e ) 2054c590fb32Scz4e (0 until TraceGroupNum).foreach { i => 2055c590fb32Scz4e traceToL2Top.toEncoder.groups(i).valid := RegNext(traceFromBackend.toEncoder.groups(i).valid) 2056c590fb32Scz4e traceToL2Top.toEncoder.groups(i).bits.iretire := RegNext(traceFromBackend.toEncoder.groups(i).bits.iretire) 2057c590fb32Scz4e traceToL2Top.toEncoder.groups(i).bits.itype := RegNext(traceFromBackend.toEncoder.groups(i).bits.itype) 2058c590fb32Scz4e traceToL2Top.toEncoder.groups(i).bits.ilastsize := RegEnable( 2059c590fb32Scz4e traceFromBackend.toEncoder.groups(i).bits.ilastsize, 2060c590fb32Scz4e traceFromBackend.toEncoder.groups(i).valid 2061c590fb32Scz4e ) 2062c590fb32Scz4e traceToL2Top.toEncoder.groups(i).bits.iaddr := RegEnable( 2063c590fb32Scz4e traceFromBackend.toEncoder.groups(i).bits.iaddr, 2064c590fb32Scz4e traceFromBackend.toEncoder.groups(i).valid 2065c590fb32Scz4e ) + (RegEnable( 2066c590fb32Scz4e traceFromBackend.toEncoder.groups(i).bits.ftqOffset.getOrElse(0.U), 2067c590fb32Scz4e traceFromBackend.toEncoder.groups(i).valid 2068c590fb32Scz4e ) << instOffsetBits) 2069c590fb32Scz4e } 2070c590fb32Scz4e 2071c590fb32Scz4e 2072c590fb32Scz4e io.mem_to_ooo.storeDebugInfo := DontCare 2073c590fb32Scz4e // store event difftest information 2074c590fb32Scz4e if (env.EnableDifftest) { 2075c590fb32Scz4e (0 until EnsbufferWidth).foreach{i => 2076c590fb32Scz4e io.mem_to_ooo.storeDebugInfo(i).robidx := sbuffer.io.vecDifftestInfo(i).bits.robIdx 2077c590fb32Scz4e sbuffer.io.vecDifftestInfo(i).bits.pc := io.mem_to_ooo.storeDebugInfo(i).pc 2078c590fb32Scz4e } 2079c590fb32Scz4e } 2080c590fb32Scz4e 2081c590fb32Scz4e // top-down info 2082c590fb32Scz4e dcache.io.debugTopDown.robHeadVaddr := io.debugTopDown.robHeadVaddr 2083c590fb32Scz4e dtlbRepeater.io.debugTopDown.robHeadVaddr := io.debugTopDown.robHeadVaddr 2084c590fb32Scz4e lsq.io.debugTopDown.robHeadVaddr := io.debugTopDown.robHeadVaddr 2085c590fb32Scz4e io.debugTopDown.toCore.robHeadMissInDCache := dcache.io.debugTopDown.robHeadMissInDCache 2086c590fb32Scz4e io.debugTopDown.toCore.robHeadTlbReplay := lsq.io.debugTopDown.robHeadTlbReplay 2087c590fb32Scz4e io.debugTopDown.toCore.robHeadTlbMiss := lsq.io.debugTopDown.robHeadTlbMiss 2088c590fb32Scz4e io.debugTopDown.toCore.robHeadLoadVio := lsq.io.debugTopDown.robHeadLoadVio 2089c590fb32Scz4e io.debugTopDown.toCore.robHeadLoadMSHR := lsq.io.debugTopDown.robHeadLoadMSHR 2090c590fb32Scz4e dcache.io.debugTopDown.robHeadOtherReplay := lsq.io.debugTopDown.robHeadOtherReplay 2091c590fb32Scz4e dcache.io.debugRolling := io.debugRolling 2092c590fb32Scz4e 2093c590fb32Scz4e lsq.io.noUopsIssued := io.topDownInfo.toBackend.noUopsIssued 2094c590fb32Scz4e io.topDownInfo.toBackend.lqEmpty := lsq.io.lqEmpty 2095c590fb32Scz4e io.topDownInfo.toBackend.sqEmpty := lsq.io.sqEmpty 2096c590fb32Scz4e io.topDownInfo.toBackend.l1Miss := dcache.io.l1Miss 2097c590fb32Scz4e io.topDownInfo.toBackend.l2TopMiss.l2Miss := RegNext(io.topDownInfo.fromL2Top.l2Miss) 2098c590fb32Scz4e io.topDownInfo.toBackend.l2TopMiss.l3Miss := RegNext(io.topDownInfo.fromL2Top.l3Miss) 2099c590fb32Scz4e 2100c590fb32Scz4e val hyLdDeqCount = PopCount(io.ooo_to_mem.issueHya.map(x => x.valid && FuType.isLoad(x.bits.uop.fuType))) 2101c590fb32Scz4e val hyStDeqCount = PopCount(io.ooo_to_mem.issueHya.map(x => x.valid && FuType.isStore(x.bits.uop.fuType))) 2102c590fb32Scz4e val ldDeqCount = PopCount(io.ooo_to_mem.issueLda.map(_.valid)) +& hyLdDeqCount 2103c590fb32Scz4e val stDeqCount = PopCount(io.ooo_to_mem.issueSta.take(StaCnt).map(_.valid)) +& hyStDeqCount 2104c590fb32Scz4e val iqDeqCount = ldDeqCount +& stDeqCount 2105c590fb32Scz4e XSPerfAccumulate("load_iq_deq_count", ldDeqCount) 2106c590fb32Scz4e XSPerfHistogram("load_iq_deq_count", ldDeqCount, true.B, 0, LdExuCnt + 1) 2107c590fb32Scz4e XSPerfAccumulate("store_iq_deq_count", stDeqCount) 2108c590fb32Scz4e XSPerfHistogram("store_iq_deq_count", stDeqCount, true.B, 0, StAddrCnt + 1) 2109c590fb32Scz4e XSPerfAccumulate("ls_iq_deq_count", iqDeqCount) 2110c590fb32Scz4e 2111c590fb32Scz4e val pfevent = Module(new PFEvent) 2112c590fb32Scz4e pfevent.io.distribute_csr := csrCtrl.distribute_csr 2113c590fb32Scz4e val csrevents = pfevent.io.hpmevent.slice(16,24) 2114c590fb32Scz4e 2115c590fb32Scz4e val perfFromUnits = (loadUnits ++ Seq(sbuffer, lsq, dcache)).flatMap(_.getPerfEvents) 2116c590fb32Scz4e val perfFromPTW = perfEventsPTW.map(x => ("PTW_" + x._1, x._2)) 2117c590fb32Scz4e val perfBlock = Seq(("ldDeqCount", ldDeqCount), 2118c590fb32Scz4e ("stDeqCount", stDeqCount)) 2119c590fb32Scz4e // let index = 0 be no event 2120c590fb32Scz4e val allPerfEvents = Seq(("noEvent", 0.U)) ++ perfFromUnits ++ perfFromPTW ++ perfBlock 2121c590fb32Scz4e 2122c590fb32Scz4e if (printEventCoding) { 2123c590fb32Scz4e for (((name, inc), i) <- allPerfEvents.zipWithIndex) { 2124c590fb32Scz4e println("MemBlock perfEvents Set", name, inc, i) 2125c590fb32Scz4e } 2126c590fb32Scz4e } 2127c590fb32Scz4e 2128c590fb32Scz4e val allPerfInc = allPerfEvents.map(_._2.asTypeOf(new PerfEvent)) 2129c590fb32Scz4e val perfEvents = HPerfMonitor(csrevents, allPerfInc).getPerfEvents 2130c590fb32Scz4e generatePerfEvent() 21314b2c87baS梁森 Liang Sen 21324b2c87baS梁森 Liang Sen private val mbistPl = MbistPipeline.PlaceMbistPipeline(Int.MaxValue, "MbistPipeMemBlk", hasMbist) 21334b2c87baS梁森 Liang Sen private val mbistIntf = if(hasMbist) { 21344b2c87baS梁森 Liang Sen val params = mbistPl.get.nodeParams 21354b2c87baS梁森 Liang Sen val intf = Some(Module(new MbistInterface( 21364b2c87baS梁森 Liang Sen params = Seq(params), 21374b2c87baS梁森 Liang Sen ids = Seq(mbistPl.get.childrenIds), 21384b2c87baS梁森 Liang Sen name = s"MbistIntfMemBlk", 21394b2c87baS梁森 Liang Sen pipelineNum = 1 21404b2c87baS梁森 Liang Sen ))) 21414b2c87baS梁森 Liang Sen intf.get.toPipeline.head <> mbistPl.get.mbist 21424b2c87baS梁森 Liang Sen mbistPl.get.registerCSV(intf.get.info, "MbistMemBlk") 21434b2c87baS梁森 Liang Sen intf.get.mbist := DontCare 21444b2c87baS梁森 Liang Sen dontTouch(intf.get.mbist) 21454b2c87baS梁森 Liang Sen //TODO: add mbist controller connections here 21464b2c87baS梁森 Liang Sen intf 21474b2c87baS梁森 Liang Sen } else { 21484b2c87baS梁森 Liang Sen None 21494b2c87baS梁森 Liang Sen } 2150602aa9f1Scz4e private val sigFromSrams = if (hasSramTest) Some(SramHelper.genBroadCastBundleTop()) else None 21514b2c87baS梁森 Liang Sen private val cg = ClockGate.genTeSrc 21524b2c87baS梁森 Liang Sen dontTouch(cg) 2153602aa9f1Scz4e 2154602aa9f1Scz4e sigFromSrams.foreach({ case sig => sig.mbist := DontCare }) 21554b2c87baS梁森 Liang Sen if (hasMbist) { 2156602aa9f1Scz4e sigFromSrams.get.mbist := io.sramTestBypass.fromL2Top.mbist.get 2157602aa9f1Scz4e io.sramTestBypass.toFrontend.mbist.get := io.sramTestBypass.fromL2Top.mbist.get 2158602aa9f1Scz4e io.sramTestBypass.toFrontend.mbistReset.get := io.sramTestBypass.fromL2Top.mbistReset.get 2159602aa9f1Scz4e io.sramTestBypass.toBackend.mbist.get := io.sramTestBypass.fromL2Top.mbist.get 2160602aa9f1Scz4e io.sramTestBypass.toBackend.mbistReset.get := io.sramTestBypass.fromL2Top.mbistReset.get 2161602aa9f1Scz4e cg.cgen := io.sramTestBypass.fromL2Top.mbist.get.cgen 21624b2c87baS梁森 Liang Sen } else { 21634b2c87baS梁森 Liang Sen cg.cgen := false.B 21644b2c87baS梁森 Liang Sen } 2165602aa9f1Scz4e 2166602aa9f1Scz4e // sram debug 2167602aa9f1Scz4e val sramCtl = Option.when(hasSramCtl)(RegNext(io.sramTestBypass.fromL2Top.sramCtl.get)) 2168602aa9f1Scz4e sigFromSrams.foreach({ case sig => sig.sramCtl := DontCare }) 2169602aa9f1Scz4e sigFromSrams.zip(sramCtl).foreach { 2170602aa9f1Scz4e case (sig, ctl) => 2171602aa9f1Scz4e sig.sramCtl.RTSEL := ctl(1, 0) // CFG[1 : 0] 2172602aa9f1Scz4e sig.sramCtl.WTSEL := ctl(3, 2) // CFG[3 : 2] 2173602aa9f1Scz4e sig.sramCtl.MCR := ctl(5, 4) // CFG[5 : 4] 2174602aa9f1Scz4e sig.sramCtl.MCW := ctl(7, 6) // CFG[7 : 6] 2175602aa9f1Scz4e } 2176602aa9f1Scz4e if (hasSramCtl) { 2177602aa9f1Scz4e io.sramTestBypass.toFrontend.sramCtl.get := sramCtl.get 2178602aa9f1Scz4e } 2179c590fb32Scz4e} 2180c590fb32Scz4e 2181c590fb32Scz4eclass MemBlock()(implicit p: Parameters) extends LazyModule 2182c590fb32Scz4e with HasXSParameter { 2183c590fb32Scz4e override def shouldBeInlined: Boolean = false 2184c590fb32Scz4e 2185c590fb32Scz4e val inner = LazyModule(new MemBlockInlined()) 2186c590fb32Scz4e 2187c590fb32Scz4e lazy val module = new MemBlockImp(this) 2188c590fb32Scz4e} 2189c590fb32Scz4e 2190c590fb32Scz4eclass MemBlockImp(wrapper: MemBlock) extends LazyModuleImp(wrapper) { 2191c590fb32Scz4e val io = IO(wrapper.inner.module.io.cloneType) 2192c590fb32Scz4e val io_perf = IO(wrapper.inner.module.io_perf.cloneType) 2193c590fb32Scz4e io <> wrapper.inner.module.io 2194c590fb32Scz4e io_perf <> wrapper.inner.module.io_perf 2195c590fb32Scz4e 2196c590fb32Scz4e if (p(DebugOptionsKey).ResetGen) { 2197602aa9f1Scz4e ResetGen( 2198602aa9f1Scz4e ResetGenNode(Seq(ModuleNode(wrapper.inner.module))), 2199602aa9f1Scz4e reset, sim = false, io.sramTestBypass.fromL2Top.mbistReset 2200602aa9f1Scz4e ) 2201c590fb32Scz4e } 2202c590fb32Scz4e} 2203