xref: /XiangShan/src/main/scala/xiangshan/mem/MemBlock.scala (revision 602aa9f1a8fb63310bea30e8b3e247e5aca5f123)
1c590fb32Scz4e/***************************************************************************************
2c590fb32Scz4e* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3c590fb32Scz4e* Copyright (c) 2020-2021 Peng Cheng Laboratory
4c590fb32Scz4e*
5c590fb32Scz4e* XiangShan is licensed under Mulan PSL v2.
6c590fb32Scz4e* You can use this software according to the terms and conditions of the Mulan PSL v2.
7c590fb32Scz4e* You may obtain a copy of Mulan PSL v2 at:
8c590fb32Scz4e*          http://license.coscl.org.cn/MulanPSL2
9c590fb32Scz4e*
10c590fb32Scz4e* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11c590fb32Scz4e* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12c590fb32Scz4e* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13c590fb32Scz4e*
14c590fb32Scz4e* See the Mulan PSL v2 for more details.
15c590fb32Scz4e***************************************************************************************/
16c590fb32Scz4e
17c590fb32Scz4epackage xiangshan.mem
18c590fb32Scz4e
19c590fb32Scz4eimport org.chipsalliance.cde.config.Parameters
20c590fb32Scz4eimport chisel3._
21c590fb32Scz4eimport chisel3.util._
22c590fb32Scz4eimport freechips.rocketchip.diplomacy._
23c590fb32Scz4eimport freechips.rocketchip.diplomacy.{BundleBridgeSource, LazyModule, LazyModuleImp}
24c590fb32Scz4eimport freechips.rocketchip.interrupts.{IntSinkNode, IntSinkPortSimple}
25c590fb32Scz4eimport freechips.rocketchip.tile.HasFPUParameters
26c590fb32Scz4eimport freechips.rocketchip.tilelink._
27529b1cfdSTang Haojinimport device.MsiInfoBundle
28c590fb32Scz4eimport utils._
29c590fb32Scz4eimport utility._
30*602aa9f1Scz4eimport utility.mbist.{MbistInterface, MbistPipeline}
31*602aa9f1Scz4eimport utility.sram.{SramMbistBundle, SramBroadcastBundle, SramHelper}
32c590fb32Scz4eimport system.SoCParamsKey
33c590fb32Scz4eimport xiangshan._
34c590fb32Scz4eimport xiangshan.ExceptionNO._
35c590fb32Scz4eimport xiangshan.frontend.HasInstrMMIOConst
36c590fb32Scz4eimport xiangshan.backend.Bundles.{DynInst, MemExuInput, MemExuOutput}
37c590fb32Scz4eimport xiangshan.backend.ctrlblock.{DebugLSIO, LsTopdownInfo}
38c590fb32Scz4eimport xiangshan.backend.exu.MemExeUnit
39c590fb32Scz4eimport xiangshan.backend.fu._
40c590fb32Scz4eimport xiangshan.backend.fu.FuType._
41a67fd0f5SGuanghui Chengimport xiangshan.backend.fu.NewCSR.{CsrTriggerBundle, TriggerUtil, PFEvent}
42075d4937Sjunxiong-jiimport xiangshan.backend.fu.util.{CSRConst, SdtrigExt}
43c590fb32Scz4eimport xiangshan.backend.{BackendToTopBundle, TopToBackendBundle}
44c590fb32Scz4eimport xiangshan.backend.rob.{RobDebugRollingIO, RobPtr, RobLsqIO}
45c590fb32Scz4eimport xiangshan.backend.datapath.NewPipelineConnect
46c590fb32Scz4eimport xiangshan.backend.trace.{Itype, TraceCoreInterface}
47c590fb32Scz4eimport xiangshan.backend.Bundles._
48c590fb32Scz4eimport xiangshan.mem._
49c590fb32Scz4eimport xiangshan.mem.mdp._
509e12e8edScz4eimport xiangshan.mem.Bundles._
51c590fb32Scz4eimport xiangshan.mem.prefetch.{BasePrefecher, L1Prefetcher, SMSParams, SMSPrefetcher}
52c590fb32Scz4eimport xiangshan.cache._
53c590fb32Scz4eimport xiangshan.cache.mmu._
544b2c87baS梁森 Liang Senimport coupledL2.PrefetchRecv
55*602aa9f1Scz4eimport system.HasSoCParameter
56*602aa9f1Scz4e
57c590fb32Scz4etrait HasMemBlockParameters extends HasXSParameter {
58c590fb32Scz4e  // number of memory units
59c590fb32Scz4e  val LduCnt  = backendParams.LduCnt
60c590fb32Scz4e  val StaCnt  = backendParams.StaCnt
61c590fb32Scz4e  val StdCnt  = backendParams.StdCnt
62c590fb32Scz4e  val HyuCnt  = backendParams.HyuCnt
63c590fb32Scz4e  val VlduCnt = backendParams.VlduCnt
64c590fb32Scz4e  val VstuCnt = backendParams.VstuCnt
65c590fb32Scz4e
66c590fb32Scz4e  val LdExuCnt  = LduCnt + HyuCnt
67c590fb32Scz4e  val StAddrCnt = StaCnt + HyuCnt
68c590fb32Scz4e  val StDataCnt = StdCnt
69c590fb32Scz4e  val MemExuCnt = LduCnt + HyuCnt + StaCnt + StdCnt
70c590fb32Scz4e  val MemAddrExtCnt = LdExuCnt + StaCnt
71c590fb32Scz4e  val MemVExuCnt = VlduCnt + VstuCnt
72c590fb32Scz4e
73c590fb32Scz4e  val AtomicWBPort   = 0
74c590fb32Scz4e  val MisalignWBPort = 1
75c590fb32Scz4e  val UncacheWBPort  = 2
76c590fb32Scz4e  val NCWBPorts = Seq(1, 2)
77c590fb32Scz4e}
78c590fb32Scz4e
79c590fb32Scz4eabstract class MemBlockBundle(implicit val p: Parameters) extends Bundle with HasMemBlockParameters
80c590fb32Scz4e
81c590fb32Scz4eclass Std(cfg: FuConfig)(implicit p: Parameters) extends FuncUnit(cfg) {
82c590fb32Scz4e  io.in.ready := io.out.ready
83c590fb32Scz4e  io.out.valid := io.in.valid
84c590fb32Scz4e  io.out.bits := 0.U.asTypeOf(io.out.bits)
85c590fb32Scz4e  io.out.bits.res.data := io.in.bits.data.src(0)
86c590fb32Scz4e  io.out.bits.ctrl.robIdx := io.in.bits.ctrl.robIdx
87c590fb32Scz4e}
88c590fb32Scz4e
89c590fb32Scz4eclass ooo_to_mem(implicit p: Parameters) extends MemBlockBundle {
90c590fb32Scz4e  val backendToTopBypass = Flipped(new BackendToTopBundle)
91c590fb32Scz4e
92c590fb32Scz4e  val loadFastMatch = Vec(LdExuCnt, Input(UInt(LdExuCnt.W)))
93c590fb32Scz4e  val loadFastFuOpType = Vec(LdExuCnt, Input(FuOpType()))
94c590fb32Scz4e  val loadFastImm = Vec(LdExuCnt, Input(UInt(12.W)))
95c590fb32Scz4e  val sfence = Input(new SfenceBundle)
96c590fb32Scz4e  val tlbCsr = Input(new TlbCsrBundle)
97c590fb32Scz4e  val lsqio = new Bundle {
98c590fb32Scz4e    val lcommit = Input(UInt(log2Up(CommitWidth + 1).W))
99c590fb32Scz4e    val scommit = Input(UInt(log2Up(CommitWidth + 1).W))
100c590fb32Scz4e    val pendingMMIOld = Input(Bool())
101c590fb32Scz4e    val pendingld = Input(Bool())
102c590fb32Scz4e    val pendingst = Input(Bool())
103c590fb32Scz4e    val pendingVst = Input(Bool())
104c590fb32Scz4e    val commit = Input(Bool())
105c590fb32Scz4e    val pendingPtr = Input(new RobPtr)
106c590fb32Scz4e    val pendingPtrNext = Input(new RobPtr)
107c590fb32Scz4e  }
108c590fb32Scz4e
109c590fb32Scz4e  val isStoreException = Input(Bool())
110c590fb32Scz4e  val isVlsException = Input(Bool())
111c590fb32Scz4e  val csrCtrl = Flipped(new CustomCSRCtrlIO)
112c590fb32Scz4e  val enqLsq = new LsqEnqIO
113c590fb32Scz4e  val flushSb = Input(Bool())
114c590fb32Scz4e
115c590fb32Scz4e  val storePc = Vec(StaCnt, Input(UInt(VAddrBits.W))) // for hw prefetch
116c590fb32Scz4e  val hybridPc = Vec(HyuCnt, Input(UInt(VAddrBits.W))) // for hw prefetch
117c590fb32Scz4e
118c590fb32Scz4e  val issueLda = MixedVec(Seq.fill(LduCnt)(Flipped(DecoupledIO(new MemExuInput))))
119c590fb32Scz4e  val issueSta = MixedVec(Seq.fill(StaCnt)(Flipped(DecoupledIO(new MemExuInput))))
120c590fb32Scz4e  val issueStd = MixedVec(Seq.fill(StdCnt)(Flipped(DecoupledIO(new MemExuInput))))
121c590fb32Scz4e  val issueHya = MixedVec(Seq.fill(HyuCnt)(Flipped(DecoupledIO(new MemExuInput))))
122c590fb32Scz4e  val issueVldu = MixedVec(Seq.fill(VlduCnt)(Flipped(DecoupledIO(new MemExuInput(isVector=true)))))
123c590fb32Scz4e
124c590fb32Scz4e  def issueUops = issueLda ++ issueSta ++ issueStd ++ issueHya ++ issueVldu
125c590fb32Scz4e}
126c590fb32Scz4e
127c590fb32Scz4eclass mem_to_ooo(implicit p: Parameters) extends MemBlockBundle {
128c590fb32Scz4e  val topToBackendBypass = new TopToBackendBundle
129c590fb32Scz4e
130c590fb32Scz4e  val otherFastWakeup = Vec(LdExuCnt, ValidIO(new DynInst))
131c590fb32Scz4e  val lqCancelCnt = Output(UInt(log2Up(VirtualLoadQueueSize + 1).W))
132c590fb32Scz4e  val sqCancelCnt = Output(UInt(log2Up(StoreQueueSize + 1).W))
133c590fb32Scz4e  val sqDeq = Output(UInt(log2Ceil(EnsbufferWidth + 1).W))
134c590fb32Scz4e  val lqDeq = Output(UInt(log2Up(CommitWidth + 1).W))
135c590fb32Scz4e  // used by VLSU issue queue, the vector store would wait all store before it, and the vector load would wait all load
136c590fb32Scz4e  val sqDeqPtr = Output(new SqPtr)
137c590fb32Scz4e  val lqDeqPtr = Output(new LqPtr)
138c590fb32Scz4e  val stIn = Vec(StAddrCnt, ValidIO(new MemExuInput))
139c590fb32Scz4e  val stIssuePtr = Output(new SqPtr())
140c590fb32Scz4e
141c590fb32Scz4e  val memoryViolation = ValidIO(new Redirect)
142c590fb32Scz4e  val sbIsEmpty = Output(Bool())
143c590fb32Scz4e
144c590fb32Scz4e  val lsTopdownInfo = Vec(LdExuCnt, Output(new LsTopdownInfo))
145c590fb32Scz4e
146c590fb32Scz4e  val lsqio = new Bundle {
147c590fb32Scz4e    val vaddr = Output(UInt(XLEN.W))
148c590fb32Scz4e    val vstart = Output(UInt((log2Up(VLEN) + 1).W))
149c590fb32Scz4e    val vl = Output(UInt((log2Up(VLEN) + 1).W))
150c590fb32Scz4e    val gpaddr = Output(UInt(XLEN.W))
151c590fb32Scz4e    val isForVSnonLeafPTE = Output(Bool())
152c590fb32Scz4e    val mmio = Output(Vec(LoadPipelineWidth, Bool()))
153c590fb32Scz4e    val uop = Output(Vec(LoadPipelineWidth, new DynInst))
154c590fb32Scz4e    val lqCanAccept = Output(Bool())
155c590fb32Scz4e    val sqCanAccept = Output(Bool())
156c590fb32Scz4e  }
157c590fb32Scz4e
158c590fb32Scz4e  val storeDebugInfo = Vec(EnsbufferWidth, new Bundle {
159c590fb32Scz4e    val robidx = Output(new RobPtr)
160c590fb32Scz4e    val pc     = Input(UInt(VAddrBits.W))
161c590fb32Scz4e  })
162c590fb32Scz4e
163c590fb32Scz4e  val writebackLda = Vec(LduCnt, DecoupledIO(new MemExuOutput))
164c590fb32Scz4e  val writebackSta = Vec(StaCnt, DecoupledIO(new MemExuOutput))
165c590fb32Scz4e  val writebackStd = Vec(StdCnt, DecoupledIO(new MemExuOutput))
166c590fb32Scz4e  val writebackHyuLda = Vec(HyuCnt, DecoupledIO(new MemExuOutput))
167c590fb32Scz4e  val writebackHyuSta = Vec(HyuCnt, DecoupledIO(new MemExuOutput))
168c590fb32Scz4e  val writebackVldu = Vec(VlduCnt, DecoupledIO(new MemExuOutput(isVector = true)))
169c590fb32Scz4e  def writeBack: Seq[DecoupledIO[MemExuOutput]] = {
170c590fb32Scz4e    writebackSta ++
171c590fb32Scz4e      writebackHyuLda ++ writebackHyuSta ++
172c590fb32Scz4e      writebackLda ++
173c590fb32Scz4e      writebackVldu ++
174c590fb32Scz4e      writebackStd
175c590fb32Scz4e  }
176c590fb32Scz4e
177c590fb32Scz4e  val ldaIqFeedback = Vec(LduCnt, new MemRSFeedbackIO)
178c590fb32Scz4e  val staIqFeedback = Vec(StaCnt, new MemRSFeedbackIO)
179c590fb32Scz4e  val hyuIqFeedback = Vec(HyuCnt, new MemRSFeedbackIO)
180c590fb32Scz4e  val vstuIqFeedback= Vec(VstuCnt, new MemRSFeedbackIO(isVector = true))
181c590fb32Scz4e  val vlduIqFeedback= Vec(VlduCnt, new MemRSFeedbackIO(isVector = true))
182c590fb32Scz4e  val ldCancel = Vec(backendParams.LdExuCnt, new LoadCancelIO)
183c590fb32Scz4e  val wakeup = Vec(backendParams.LdExuCnt, Valid(new DynInst))
184c590fb32Scz4e
185c590fb32Scz4e  val s3_delayed_load_error = Vec(LdExuCnt, Output(Bool()))
186c590fb32Scz4e}
187c590fb32Scz4e
188c590fb32Scz4eclass MemCoreTopDownIO extends Bundle {
189c590fb32Scz4e  val robHeadMissInDCache = Output(Bool())
190c590fb32Scz4e  val robHeadTlbReplay = Output(Bool())
191c590fb32Scz4e  val robHeadTlbMiss = Output(Bool())
192c590fb32Scz4e  val robHeadLoadVio = Output(Bool())
193c590fb32Scz4e  val robHeadLoadMSHR = Output(Bool())
194c590fb32Scz4e}
195c590fb32Scz4e
196c590fb32Scz4eclass fetch_to_mem(implicit p: Parameters) extends XSBundle{
197c590fb32Scz4e  val itlb = Flipped(new TlbPtwIO())
198c590fb32Scz4e}
199c590fb32Scz4e
200c590fb32Scz4e// triple buffer applied in i-mmio path (two at MemBlock, one at L2Top)
201c590fb32Scz4eclass InstrUncacheBuffer()(implicit p: Parameters) extends LazyModule with HasInstrMMIOConst {
202c590fb32Scz4e  val node = new TLBufferNode(BufferParams.default, BufferParams.default, BufferParams.default, BufferParams.default, BufferParams.default)
203c590fb32Scz4e  lazy val module = new InstrUncacheBufferImpl
204c590fb32Scz4e
205c590fb32Scz4e  class InstrUncacheBufferImpl extends LazyModuleImp(this) {
206c590fb32Scz4e    (node.in zip node.out) foreach { case ((in, edgeIn), (out, edgeOut)) =>
207c590fb32Scz4e      out.a <> BufferParams.default(BufferParams.default(in.a))
208c590fb32Scz4e      in.d <> BufferParams.default(BufferParams.default(out.d))
209c590fb32Scz4e
210c590fb32Scz4e      // only a.valid, a.ready, a.address can change
211c590fb32Scz4e      // hoping that the rest would be optimized to keep MemBlock port unchanged after adding buffer
212c590fb32Scz4e      out.a.bits.data := 0.U
213c590fb32Scz4e      out.a.bits.mask := Fill(mmioBusBytes, 1.U(1.W))
214c590fb32Scz4e      out.a.bits.opcode := 4.U // Get
215c590fb32Scz4e      out.a.bits.size := log2Ceil(mmioBusBytes).U
216c590fb32Scz4e      out.a.bits.source := 0.U
217c590fb32Scz4e    }
218c590fb32Scz4e  }
219c590fb32Scz4e}
220c590fb32Scz4e
221c590fb32Scz4e// triple buffer applied in L1I$-L2 path (two at MemBlock, one at L2Top)
222c590fb32Scz4eclass ICacheBuffer()(implicit p: Parameters) extends LazyModule {
223c590fb32Scz4e  val node = new TLBufferNode(BufferParams.default, BufferParams.default, BufferParams.default, BufferParams.default, BufferParams.default)
224c590fb32Scz4e  lazy val module = new ICacheBufferImpl
225c590fb32Scz4e
226c590fb32Scz4e  class ICacheBufferImpl extends LazyModuleImp(this) {
227c590fb32Scz4e    (node.in zip node.out) foreach { case ((in, edgeIn), (out, edgeOut)) =>
228c590fb32Scz4e      out.a <> BufferParams.default(BufferParams.default(in.a))
229c590fb32Scz4e      in.d <> BufferParams.default(BufferParams.default(out.d))
230c590fb32Scz4e    }
231c590fb32Scz4e  }
232c590fb32Scz4e}
233c590fb32Scz4e
234c590fb32Scz4eclass ICacheCtrlBuffer()(implicit p: Parameters) extends LazyModule {
235c590fb32Scz4e  val node = new TLBufferNode(BufferParams.default, BufferParams.default, BufferParams.default, BufferParams.default, BufferParams.default)
236c590fb32Scz4e  lazy val module = new ICacheCtrlBufferImpl
237c590fb32Scz4e
238c590fb32Scz4e  class ICacheCtrlBufferImpl extends LazyModuleImp(this) {
239c590fb32Scz4e    (node.in zip node.out) foreach { case ((in, edgeIn), (out, edgeOut)) =>
240c590fb32Scz4e      out.a <> BufferParams.default(BufferParams.default(in.a))
241c590fb32Scz4e      in.d <> BufferParams.default(BufferParams.default(out.d))
242c590fb32Scz4e    }
243c590fb32Scz4e  }
244c590fb32Scz4e}
245c590fb32Scz4e
246c590fb32Scz4e// Frontend bus goes through MemBlock
247c590fb32Scz4eclass FrontendBridge()(implicit p: Parameters) extends LazyModule {
248c590fb32Scz4e  val icache_node = LazyModule(new ICacheBuffer()).suggestName("icache").node// to keep IO port name
249c590fb32Scz4e  val icachectrl_node = LazyModule(new ICacheCtrlBuffer()).suggestName("icachectrl").node
250c590fb32Scz4e  val instr_uncache_node = LazyModule(new InstrUncacheBuffer()).suggestName("instr_uncache").node
251c590fb32Scz4e  lazy val module = new LazyModuleImp(this) {
252c590fb32Scz4e  }
253c590fb32Scz4e}
254c590fb32Scz4e
255c590fb32Scz4eclass MemBlockInlined()(implicit p: Parameters) extends LazyModule
256c590fb32Scz4e  with HasXSParameter {
257c590fb32Scz4e  override def shouldBeInlined: Boolean = true
258c590fb32Scz4e
259c590fb32Scz4e  val dcache = LazyModule(new DCacheWrapper())
260c590fb32Scz4e  val uncache = LazyModule(new Uncache())
261c590fb32Scz4e  val uncache_port = TLTempNode()
262c590fb32Scz4e  val uncache_xbar = TLXbar()
263c590fb32Scz4e  val ptw = LazyModule(new L2TLBWrapper())
264c590fb32Scz4e  val ptw_to_l2_buffer = if (!coreParams.softPTW) LazyModule(new TLBuffer) else null
265c590fb32Scz4e  val l1d_to_l2_buffer = if (coreParams.dcacheParametersOpt.nonEmpty) LazyModule(new TLBuffer) else null
266c590fb32Scz4e  val dcache_port = TLNameNode("dcache_client") // to keep dcache-L2 port name
267c590fb32Scz4e  val l2_pf_sender_opt = coreParams.prefetcher.map(_ =>
268c590fb32Scz4e    BundleBridgeSource(() => new PrefetchRecv)
269c590fb32Scz4e  )
270c590fb32Scz4e  val l3_pf_sender_opt = if (p(SoCParamsKey).L3CacheParamsOpt.nonEmpty) coreParams.prefetcher.map(_ =>
271c590fb32Scz4e    BundleBridgeSource(() => new huancun.PrefetchRecv)
272c590fb32Scz4e  ) else None
273c590fb32Scz4e  val frontendBridge = LazyModule(new FrontendBridge)
274c590fb32Scz4e  // interrupt sinks
275c590fb32Scz4e  val clint_int_sink = IntSinkNode(IntSinkPortSimple(1, 2))
276c590fb32Scz4e  val debug_int_sink = IntSinkNode(IntSinkPortSimple(1, 1))
277c590fb32Scz4e  val plic_int_sink = IntSinkNode(IntSinkPortSimple(2, 1))
278c590fb32Scz4e  val nmi_int_sink = IntSinkNode(IntSinkPortSimple(1, (new NonmaskableInterruptIO).elements.size))
27976cb49abScz4e  val beu_local_int_sink = IntSinkNode(IntSinkPortSimple(1, 1))
280c590fb32Scz4e
281c590fb32Scz4e  if (!coreParams.softPTW) {
282c590fb32Scz4e    ptw_to_l2_buffer.node := ptw.node
283c590fb32Scz4e  }
284c590fb32Scz4e  uncache_xbar := TLBuffer() := uncache.clientNode
285c590fb32Scz4e  if (dcache.uncacheNode.isDefined) {
286c590fb32Scz4e    dcache.uncacheNode.get := TLBuffer.chainNode(2) := uncache_xbar
287c590fb32Scz4e  }
288c590fb32Scz4e  uncache_port := TLBuffer.chainNode(2) := uncache_xbar
289c590fb32Scz4e
290c590fb32Scz4e  lazy val module = new MemBlockInlinedImp(this)
291c590fb32Scz4e}
292c590fb32Scz4e
293c590fb32Scz4eclass MemBlockInlinedImp(outer: MemBlockInlined) extends LazyModuleImp(outer)
294c590fb32Scz4e  with HasXSParameter
295c590fb32Scz4e  with HasFPUParameters
296c590fb32Scz4e  with HasPerfEvents
297c590fb32Scz4e  with HasL1PrefetchSourceParameter
298c590fb32Scz4e  with HasCircularQueuePtrHelper
299c590fb32Scz4e  with HasMemBlockParameters
300c590fb32Scz4e  with HasTlbConst
301c590fb32Scz4e  with SdtrigExt
302c590fb32Scz4e{
303c590fb32Scz4e  val io = IO(new Bundle {
304c590fb32Scz4e    val hartId = Input(UInt(hartIdLen.W))
305c590fb32Scz4e    val redirect = Flipped(ValidIO(new Redirect))
306c590fb32Scz4e
307c590fb32Scz4e    val ooo_to_mem = new ooo_to_mem
308c590fb32Scz4e    val mem_to_ooo = new mem_to_ooo
309c590fb32Scz4e    val fetch_to_mem = new fetch_to_mem
310c590fb32Scz4e
311c590fb32Scz4e    val ifetchPrefetch = Vec(LduCnt, ValidIO(new SoftIfetchPrefetchBundle))
312c590fb32Scz4e
313c590fb32Scz4e    // misc
314c590fb32Scz4e    val error = ValidIO(new L1CacheErrorInfo)
315c590fb32Scz4e    val memInfo = new Bundle {
316c590fb32Scz4e      val sqFull = Output(Bool())
317c590fb32Scz4e      val lqFull = Output(Bool())
318c590fb32Scz4e      val dcacheMSHRFull = Output(Bool())
319c590fb32Scz4e    }
320c590fb32Scz4e    val debug_ls = new DebugLSIO
321c590fb32Scz4e    val l2_hint = Input(Valid(new L2ToL1Hint()))
322c590fb32Scz4e    val l2PfqBusy = Input(Bool())
323c590fb32Scz4e    val l2_tlb_req = Flipped(new TlbRequestIO(nRespDups = 2))
324c590fb32Scz4e    val l2_pmp_resp = new PMPRespBundle
325c590fb32Scz4e    val l2_flush_done = Input(Bool())
326c590fb32Scz4e
327c590fb32Scz4e    val debugTopDown = new Bundle {
328c590fb32Scz4e      val robHeadVaddr = Flipped(Valid(UInt(VAddrBits.W)))
329c590fb32Scz4e      val toCore = new MemCoreTopDownIO
330c590fb32Scz4e    }
331c590fb32Scz4e    val debugRolling = Flipped(new RobDebugRollingIO)
332c590fb32Scz4e
333c590fb32Scz4e    // All the signals from/to frontend/backend to/from bus will go through MemBlock
334c590fb32Scz4e    val fromTopToBackend = Input(new Bundle {
335529b1cfdSTang Haojin      val msiInfo   = ValidIO(new MsiInfoBundle)
336c590fb32Scz4e      val clintTime = ValidIO(UInt(64.W))
337c590fb32Scz4e    })
338c590fb32Scz4e    val inner_hartId = Output(UInt(hartIdLen.W))
339c590fb32Scz4e    val inner_reset_vector = Output(UInt(PAddrBits.W))
340c590fb32Scz4e    val outer_reset_vector = Input(UInt(PAddrBits.W))
341c590fb32Scz4e    val outer_cpu_halt = Output(Bool())
342c590fb32Scz4e    val outer_l2_flush_en = Output(Bool())
343c590fb32Scz4e    val outer_power_down_en = Output(Bool())
344c590fb32Scz4e    val outer_cpu_critical_error = Output(Bool())
345c590fb32Scz4e    val inner_beu_errors_icache = Input(new L1BusErrorUnitInfo)
346c590fb32Scz4e    val outer_beu_errors_icache = Output(new L1BusErrorUnitInfo)
347c590fb32Scz4e    val inner_hc_perfEvents = Output(Vec(numPCntHc * coreParams.L2NBanks + 1, new PerfEvent))
348c590fb32Scz4e    val outer_hc_perfEvents = Input(Vec(numPCntHc * coreParams.L2NBanks + 1, new PerfEvent))
349c590fb32Scz4e
350c590fb32Scz4e    // reset signals of frontend & backend are generated in memblock
351c590fb32Scz4e    val reset_backend = Output(Reset())
352c590fb32Scz4e    // Reset singal from frontend.
353c590fb32Scz4e    val resetInFrontendBypass = new Bundle{
354c590fb32Scz4e      val fromFrontend = Input(Bool())
355c590fb32Scz4e      val toL2Top      = Output(Bool())
356c590fb32Scz4e    }
357c590fb32Scz4e    val traceCoreInterfaceBypass = new Bundle{
358c590fb32Scz4e      val fromBackend = Flipped(new TraceCoreInterface(hasOffset = true))
359c590fb32Scz4e      val toL2Top     = new TraceCoreInterface
360c590fb32Scz4e    }
361c590fb32Scz4e
362c590fb32Scz4e    val topDownInfo = new Bundle {
363c590fb32Scz4e      val fromL2Top = Input(new TopDownFromL2Top)
364c590fb32Scz4e      val toBackend = Flipped(new TopDownInfo)
365c590fb32Scz4e    }
366*602aa9f1Scz4e    val sramTestBypass = new Bundle() {
367*602aa9f1Scz4e      val fromL2Top = new Bundle() {
368*602aa9f1Scz4e        val mbist      = Option.when(hasMbist)(Input(new SramMbistBundle))
369*602aa9f1Scz4e        val mbistReset = Option.when(hasMbist)(Input(new DFTResetSignals()))
370*602aa9f1Scz4e        val sramCtl    = Option.when(hasSramCtl)(Input(UInt(64.W)))
371*602aa9f1Scz4e      }
372*602aa9f1Scz4e      val toFrontend = new Bundle() {
373*602aa9f1Scz4e        val mbist      = Option.when(hasMbist)(Output(new SramMbistBundle))
374*602aa9f1Scz4e        val mbistReset = Option.when(hasMbist)(Output(new DFTResetSignals()))
375*602aa9f1Scz4e        val sramCtl    = Option.when(hasSramCtl)(Output(UInt(64.W)))
376*602aa9f1Scz4e      }
377*602aa9f1Scz4e      val toBackend = new Bundle() {
378*602aa9f1Scz4e        val mbist      = Option.when(hasMbist)(Output(new SramMbistBundle))
379*602aa9f1Scz4e        val mbistReset = Option.when(hasMbist)(Output(new DFTResetSignals()))
380*602aa9f1Scz4e      }
381*602aa9f1Scz4e    }
382c590fb32Scz4e  })
383c590fb32Scz4e
384c590fb32Scz4e  dontTouch(io.inner_hartId)
385c590fb32Scz4e  dontTouch(io.inner_reset_vector)
386c590fb32Scz4e  dontTouch(io.outer_reset_vector)
387c590fb32Scz4e  dontTouch(io.outer_cpu_halt)
388c590fb32Scz4e  dontTouch(io.outer_l2_flush_en)
389c590fb32Scz4e  dontTouch(io.outer_power_down_en)
390c590fb32Scz4e  dontTouch(io.outer_cpu_critical_error)
391c590fb32Scz4e  dontTouch(io.inner_beu_errors_icache)
392c590fb32Scz4e  dontTouch(io.outer_beu_errors_icache)
393c590fb32Scz4e  dontTouch(io.inner_hc_perfEvents)
394c590fb32Scz4e  dontTouch(io.outer_hc_perfEvents)
395c590fb32Scz4e
396c590fb32Scz4e  val redirect = RegNextWithEnable(io.redirect)
397c590fb32Scz4e
398c590fb32Scz4e  private val dcache = outer.dcache.module
399c590fb32Scz4e  val uncache = outer.uncache.module
400c590fb32Scz4e
401c590fb32Scz4e  //val delayedDcacheRefill = RegNext(dcache.io.lsu.lsq)
402c590fb32Scz4e
403c590fb32Scz4e  val csrCtrl = DelayN(io.ooo_to_mem.csrCtrl, 2)
404c590fb32Scz4e  dcache.io.l2_pf_store_only := RegNext(io.ooo_to_mem.csrCtrl.pf_ctrl.l2_pf_store_only, false.B)
405c590fb32Scz4e  io.error <> DelayNWithValid(dcache.io.error, 2)
406c590fb32Scz4e  when(!csrCtrl.cache_error_enable){
407c590fb32Scz4e    io.error.bits.report_to_beu := false.B
408c590fb32Scz4e    io.error.valid := false.B
409c590fb32Scz4e  }
410c590fb32Scz4e
411c590fb32Scz4e  val loadUnits = Seq.fill(LduCnt)(Module(new LoadUnit))
412c590fb32Scz4e  val storeUnits = Seq.fill(StaCnt)(Module(new StoreUnit))
413c590fb32Scz4e  val stdExeUnits = Seq.fill(StdCnt)(Module(new MemExeUnit(backendParams.memSchdParams.get.issueBlockParams.find(_.StdCnt != 0).get.exuBlockParams.head)))
414c590fb32Scz4e  val hybridUnits = Seq.fill(HyuCnt)(Module(new HybridUnit)) // Todo: replace it with HybridUnit
415c590fb32Scz4e  val stData = stdExeUnits.map(_.io.out)
416c590fb32Scz4e  val exeUnits = loadUnits ++ storeUnits
417c590fb32Scz4e
418c590fb32Scz4e  // The number of vector load/store units is decoupled with the number of load/store units
419c590fb32Scz4e  val vlSplit = Seq.fill(VlduCnt)(Module(new VLSplitImp))
420c590fb32Scz4e  val vsSplit = Seq.fill(VstuCnt)(Module(new VSSplitImp))
421c590fb32Scz4e  val vlMergeBuffer = Module(new VLMergeBufferImp)
422c590fb32Scz4e  val vsMergeBuffer = Seq.fill(VstuCnt)(Module(new VSMergeBufferImp))
423c590fb32Scz4e  val vSegmentUnit  = Module(new VSegmentUnit)
424c590fb32Scz4e  val vfofBuffer    = Module(new VfofBuffer)
425c590fb32Scz4e
426c590fb32Scz4e  // misalign Buffer
427c590fb32Scz4e  val loadMisalignBuffer = Module(new LoadMisalignBuffer)
428c590fb32Scz4e  val storeMisalignBuffer = Module(new StoreMisalignBuffer)
429c590fb32Scz4e
430c590fb32Scz4e  val l1_pf_req = Wire(Decoupled(new L1PrefetchReq()))
431c590fb32Scz4e  dcache.io.sms_agt_evict_req.ready := false.B
432c590fb32Scz4e  val prefetcherOpt: Option[BasePrefecher] = coreParams.prefetcher.map {
433c590fb32Scz4e    case _: SMSParams =>
434c590fb32Scz4e      val sms = Module(new SMSPrefetcher())
435c590fb32Scz4e      sms.io_agt_en := GatedRegNextN(io.ooo_to_mem.csrCtrl.pf_ctrl.l1D_pf_enable_agt, 2, Some(false.B))
436c590fb32Scz4e      sms.io_pht_en := GatedRegNextN(io.ooo_to_mem.csrCtrl.pf_ctrl.l1D_pf_enable_pht, 2, Some(false.B))
437c590fb32Scz4e      sms.io_act_threshold := GatedRegNextN(io.ooo_to_mem.csrCtrl.pf_ctrl.l1D_pf_active_threshold, 2, Some(12.U))
438c590fb32Scz4e      sms.io_act_stride := GatedRegNextN(io.ooo_to_mem.csrCtrl.pf_ctrl.l1D_pf_active_stride, 2, Some(30.U))
439c590fb32Scz4e      sms.io_stride_en := false.B
440c590fb32Scz4e      sms.io_dcache_evict <> dcache.io.sms_agt_evict_req
4414b2c87baS梁森 Liang Sen      val mbistSmsPl = MbistPipeline.PlaceMbistPipeline(1, "MbistPipeSms", hasMbist)
442c590fb32Scz4e      sms
443c590fb32Scz4e  }
444c590fb32Scz4e  prefetcherOpt.foreach{ pf => pf.io.l1_req.ready := false.B }
445c590fb32Scz4e  val hartId = p(XSCoreParamsKey).HartId
446c590fb32Scz4e  val l1PrefetcherOpt: Option[BasePrefecher] = coreParams.prefetcher.map {
447c590fb32Scz4e    case _ =>
448c590fb32Scz4e      val l1Prefetcher = Module(new L1Prefetcher())
4499db05eaeScz4e      val enableL1StreamPrefetcher = Constantin.createRecord(s"enableL1StreamPrefetcher$hartId", initValue = true)
4509db05eaeScz4e      l1Prefetcher.io.enable := enableL1StreamPrefetcher &&
4519db05eaeScz4e        GatedRegNextN(io.ooo_to_mem.csrCtrl.pf_ctrl.l1D_pf_enable, 2, Some(false.B))
452c590fb32Scz4e      l1Prefetcher.pf_ctrl <> dcache.io.pf_ctrl
453c590fb32Scz4e      l1Prefetcher.l2PfqBusy := io.l2PfqBusy
454c590fb32Scz4e
455c590fb32Scz4e      // stride will train on miss or prefetch hit
456c590fb32Scz4e      for (i <- 0 until LduCnt) {
457c590fb32Scz4e        val source = loadUnits(i).io.prefetch_train_l1
458c590fb32Scz4e        l1Prefetcher.stride_train(i).valid := source.valid && source.bits.isFirstIssue && (
459c590fb32Scz4e          source.bits.miss || isFromStride(source.bits.meta_prefetch)
460c590fb32Scz4e        )
461c590fb32Scz4e        l1Prefetcher.stride_train(i).bits := source.bits
462c590fb32Scz4e        val loadPc = RegNext(io.ooo_to_mem.issueLda(i).bits.uop.pc) // for s1
463c590fb32Scz4e        l1Prefetcher.stride_train(i).bits.uop.pc := Mux(
464c590fb32Scz4e          loadUnits(i).io.s2_ptr_chasing,
465c590fb32Scz4e          RegEnable(loadPc, loadUnits(i).io.s2_prefetch_spec),
466c590fb32Scz4e          RegEnable(RegEnable(loadPc, loadUnits(i).io.s1_prefetch_spec), loadUnits(i).io.s2_prefetch_spec)
467c590fb32Scz4e        )
468c590fb32Scz4e      }
469c590fb32Scz4e      for (i <- 0 until HyuCnt) {
470c590fb32Scz4e        val source = hybridUnits(i).io.prefetch_train_l1
471c590fb32Scz4e        l1Prefetcher.stride_train.drop(LduCnt)(i).valid := source.valid && source.bits.isFirstIssue && (
472c590fb32Scz4e          source.bits.miss || isFromStride(source.bits.meta_prefetch)
473c590fb32Scz4e        )
474c590fb32Scz4e        l1Prefetcher.stride_train.drop(LduCnt)(i).bits := source.bits
475c590fb32Scz4e        l1Prefetcher.stride_train.drop(LduCnt)(i).bits.uop.pc := Mux(
476c590fb32Scz4e          hybridUnits(i).io.ldu_io.s2_ptr_chasing,
477c590fb32Scz4e          RegNext(io.ooo_to_mem.hybridPc(i)),
478c590fb32Scz4e          RegNext(RegNext(io.ooo_to_mem.hybridPc(i)))
479c590fb32Scz4e        )
480c590fb32Scz4e      }
481c590fb32Scz4e      l1Prefetcher
482c590fb32Scz4e  }
483c590fb32Scz4e  // load prefetch to l1 Dcache
484c590fb32Scz4e  l1PrefetcherOpt match {
485c590fb32Scz4e    case Some(pf) => l1_pf_req <> Pipeline(in = pf.io.l1_req, depth = 1, pipe = false, name = Some("pf_queue_to_ldu_reg"))
486c590fb32Scz4e    case None =>
487c590fb32Scz4e      l1_pf_req.valid := false.B
488c590fb32Scz4e      l1_pf_req.bits := DontCare
489c590fb32Scz4e  }
490c590fb32Scz4e  val pf_train_on_hit = RegNextN(io.ooo_to_mem.csrCtrl.pf_ctrl.l1D_pf_train_on_hit, 2, Some(true.B))
491c590fb32Scz4e
492c590fb32Scz4e  loadUnits.zipWithIndex.map(x => x._1.suggestName("LoadUnit_"+x._2))
493c590fb32Scz4e  storeUnits.zipWithIndex.map(x => x._1.suggestName("StoreUnit_"+x._2))
494c590fb32Scz4e  hybridUnits.zipWithIndex.map(x => x._1.suggestName("HybridUnit_"+x._2))
495c590fb32Scz4e  val atomicsUnit = Module(new AtomicsUnit)
496c590fb32Scz4e
497c590fb32Scz4e
498c590fb32Scz4e  val ldaExeWbReqs = Wire(Vec(LduCnt, Decoupled(new MemExuOutput)))
499c590fb32Scz4e  // atomicsUnit will overwrite the source from ldu if it is about to writeback
500c590fb32Scz4e  val atomicWritebackOverride = Mux(
501c590fb32Scz4e    atomicsUnit.io.out.valid,
502c590fb32Scz4e    atomicsUnit.io.out.bits,
503c590fb32Scz4e    loadUnits(AtomicWBPort).io.ldout.bits
504c590fb32Scz4e  )
505c590fb32Scz4e  ldaExeWbReqs(AtomicWBPort).valid := atomicsUnit.io.out.valid || loadUnits(AtomicWBPort).io.ldout.valid
506c590fb32Scz4e  ldaExeWbReqs(AtomicWBPort).bits  := atomicWritebackOverride
507c590fb32Scz4e  atomicsUnit.io.out.ready := ldaExeWbReqs(AtomicWBPort).ready
508c590fb32Scz4e  loadUnits(AtomicWBPort).io.ldout.ready := ldaExeWbReqs(AtomicWBPort).ready
509c590fb32Scz4e
510c590fb32Scz4e  val st_data_atomics = Seq.tabulate(StdCnt)(i =>
511c590fb32Scz4e    stData(i).valid && FuType.storeIsAMO(stData(i).bits.uop.fuType)
512c590fb32Scz4e  )
513c590fb32Scz4e
514c590fb32Scz4e  // misalignBuffer will overwrite the source from ldu if it is about to writeback
515c590fb32Scz4e  val misalignWritebackOverride = Mux(
516c590fb32Scz4e    loadUnits(MisalignWBPort).io.ldout.valid,
517c590fb32Scz4e    loadUnits(MisalignWBPort).io.ldout.bits,
518c590fb32Scz4e    loadMisalignBuffer.io.writeBack.bits
519c590fb32Scz4e  )
520c590fb32Scz4e  ldaExeWbReqs(MisalignWBPort).valid    := loadMisalignBuffer.io.writeBack.valid || loadUnits(MisalignWBPort).io.ldout.valid
521c590fb32Scz4e  ldaExeWbReqs(MisalignWBPort).bits     := misalignWritebackOverride
522c590fb32Scz4e  loadMisalignBuffer.io.writeBack.ready := ldaExeWbReqs(MisalignWBPort).ready && !loadUnits(MisalignWBPort).io.ldout.valid
523c590fb32Scz4e  loadMisalignBuffer.io.loadOutValid    := loadUnits(MisalignWBPort).io.ldout.valid
524c590fb32Scz4e  loadMisalignBuffer.io.loadVecOutValid := loadUnits(MisalignWBPort).io.vecldout.valid
525c590fb32Scz4e  loadUnits(MisalignWBPort).io.ldout.ready := ldaExeWbReqs(MisalignWBPort).ready
526c590fb32Scz4e  ldaExeWbReqs(MisalignWBPort).bits.isFromLoadUnit := loadUnits(MisalignWBPort).io.ldout.bits.isFromLoadUnit || loadMisalignBuffer.io.writeBack.valid
527c590fb32Scz4e
528c590fb32Scz4e  // loadUnit will overwrite the source from uncache if it is about to writeback
529c590fb32Scz4e  ldaExeWbReqs(UncacheWBPort) <> loadUnits(UncacheWBPort).io.ldout
530c590fb32Scz4e  io.mem_to_ooo.writebackLda <> ldaExeWbReqs
531c590fb32Scz4e  io.mem_to_ooo.writebackSta <> storeUnits.map(_.io.stout)
532c590fb32Scz4e  io.mem_to_ooo.writebackStd.zip(stdExeUnits).foreach {x =>
533c590fb32Scz4e    x._1.bits  := x._2.io.out.bits
534c590fb32Scz4e    // AMOs do not need to write back std now.
535c590fb32Scz4e    x._1.valid := x._2.io.out.fire && !FuType.storeIsAMO(x._2.io.out.bits.uop.fuType)
536c590fb32Scz4e  }
537c590fb32Scz4e  io.mem_to_ooo.writebackHyuLda <> hybridUnits.map(_.io.ldout)
538c590fb32Scz4e  io.mem_to_ooo.writebackHyuSta <> hybridUnits.map(_.io.stout)
539c590fb32Scz4e  io.mem_to_ooo.otherFastWakeup := DontCare
540c590fb32Scz4e  io.mem_to_ooo.otherFastWakeup.drop(HyuCnt).take(LduCnt).zip(loadUnits.map(_.io.fast_uop)).foreach{case(a,b)=> a := b}
541c590fb32Scz4e  io.mem_to_ooo.otherFastWakeup.take(HyuCnt).zip(hybridUnits.map(_.io.ldu_io.fast_uop)).foreach{case(a,b)=> a:=b}
542c590fb32Scz4e  val stOut = io.mem_to_ooo.writebackSta ++ io.mem_to_ooo.writebackHyuSta
543c590fb32Scz4e
544c590fb32Scz4e  // prefetch to l1 req
545c590fb32Scz4e  // Stream's confidence is always 1
546c590fb32Scz4e  // (LduCnt + HyuCnt) l1_pf_reqs ?
547c590fb32Scz4e  loadUnits.foreach(load_unit => {
548c590fb32Scz4e    load_unit.io.prefetch_req.valid <> l1_pf_req.valid
549c590fb32Scz4e    load_unit.io.prefetch_req.bits <> l1_pf_req.bits
550c590fb32Scz4e  })
551c590fb32Scz4e
552c590fb32Scz4e  hybridUnits.foreach(hybrid_unit => {
553c590fb32Scz4e    hybrid_unit.io.ldu_io.prefetch_req.valid <> l1_pf_req.valid
554c590fb32Scz4e    hybrid_unit.io.ldu_io.prefetch_req.bits <> l1_pf_req.bits
555c590fb32Scz4e  })
556c590fb32Scz4e
557c590fb32Scz4e  // NOTE: loadUnits(0) has higher bank conflict and miss queue arb priority than loadUnits(1) and loadUnits(2)
558c590fb32Scz4e  // when loadUnits(1)/loadUnits(2) stage 0 is busy, hw prefetch will never use that pipeline
559c590fb32Scz4e  val LowConfPorts = if (LduCnt == 2) Seq(1) else if (LduCnt == 3) Seq(1, 2) else Seq(0)
560c590fb32Scz4e  LowConfPorts.map{case i => loadUnits(i).io.prefetch_req.bits.confidence := 0.U}
561c590fb32Scz4e  hybridUnits.foreach(hybrid_unit => { hybrid_unit.io.ldu_io.prefetch_req.bits.confidence := 0.U })
562c590fb32Scz4e
563c590fb32Scz4e  val canAcceptHighConfPrefetch = loadUnits.map(_.io.canAcceptHighConfPrefetch) ++
564c590fb32Scz4e                                  hybridUnits.map(_.io.canAcceptLowConfPrefetch)
565c590fb32Scz4e  val canAcceptLowConfPrefetch = loadUnits.map(_.io.canAcceptLowConfPrefetch) ++
566c590fb32Scz4e                                 hybridUnits.map(_.io.canAcceptLowConfPrefetch)
567c590fb32Scz4e  l1_pf_req.ready := (0 until LduCnt + HyuCnt).map{
568c590fb32Scz4e    case i => {
569c590fb32Scz4e      if (LowConfPorts.contains(i)) {
570c590fb32Scz4e        loadUnits(i).io.canAcceptLowConfPrefetch
571c590fb32Scz4e      } else {
572c590fb32Scz4e        Mux(l1_pf_req.bits.confidence === 1.U, canAcceptHighConfPrefetch(i), canAcceptLowConfPrefetch(i))
573c590fb32Scz4e      }
574c590fb32Scz4e    }
575c590fb32Scz4e  }.reduce(_ || _)
576c590fb32Scz4e
577c590fb32Scz4e  // l1 pf fuzzer interface
578c590fb32Scz4e  val DebugEnableL1PFFuzzer = false
579c590fb32Scz4e  if (DebugEnableL1PFFuzzer) {
580c590fb32Scz4e    // l1 pf req fuzzer
581c590fb32Scz4e    val fuzzer = Module(new L1PrefetchFuzzer())
582c590fb32Scz4e    fuzzer.io.vaddr := DontCare
583c590fb32Scz4e    fuzzer.io.paddr := DontCare
584c590fb32Scz4e
585c590fb32Scz4e    // override load_unit prefetch_req
586c590fb32Scz4e    loadUnits.foreach(load_unit => {
587c590fb32Scz4e      load_unit.io.prefetch_req.valid <> fuzzer.io.req.valid
588c590fb32Scz4e      load_unit.io.prefetch_req.bits <> fuzzer.io.req.bits
589c590fb32Scz4e    })
590c590fb32Scz4e
591c590fb32Scz4e    // override hybrid_unit prefetch_req
592c590fb32Scz4e    hybridUnits.foreach(hybrid_unit => {
593c590fb32Scz4e      hybrid_unit.io.ldu_io.prefetch_req.valid <> fuzzer.io.req.valid
594c590fb32Scz4e      hybrid_unit.io.ldu_io.prefetch_req.bits <> fuzzer.io.req.bits
595c590fb32Scz4e    })
596c590fb32Scz4e
597c590fb32Scz4e    fuzzer.io.req.ready := l1_pf_req.ready
598c590fb32Scz4e  }
599c590fb32Scz4e
600c590fb32Scz4e  // TODO: fast load wakeup
601c590fb32Scz4e  val lsq     = Module(new LsqWrapper)
602c590fb32Scz4e  val sbuffer = Module(new Sbuffer)
603c590fb32Scz4e  // if you wants to stress test dcache store, use FakeSbuffer
604c590fb32Scz4e  // val sbuffer = Module(new FakeSbuffer) // out of date now
605c590fb32Scz4e  io.mem_to_ooo.stIssuePtr := lsq.io.issuePtrExt
606c590fb32Scz4e
607c590fb32Scz4e  dcache.io.hartId := io.hartId
608c590fb32Scz4e  lsq.io.hartId := io.hartId
609c590fb32Scz4e  sbuffer.io.hartId := io.hartId
610c590fb32Scz4e  atomicsUnit.io.hartId := io.hartId
611c590fb32Scz4e
612c590fb32Scz4e  dcache.io.lqEmpty := lsq.io.lqEmpty
613c590fb32Scz4e
614c590fb32Scz4e  // load/store prefetch to l2 cache
615c590fb32Scz4e  prefetcherOpt.foreach(sms_pf => {
616c590fb32Scz4e    l1PrefetcherOpt.foreach(l1_pf => {
617c590fb32Scz4e      val sms_pf_to_l2 = DelayNWithValid(sms_pf.io.l2_req, 2)
618c590fb32Scz4e      val l1_pf_to_l2 = DelayNWithValid(l1_pf.io.l2_req, 2)
619c590fb32Scz4e
620c590fb32Scz4e      outer.l2_pf_sender_opt.get.out.head._1.addr_valid := sms_pf_to_l2.valid || l1_pf_to_l2.valid
621c590fb32Scz4e      outer.l2_pf_sender_opt.get.out.head._1.addr := Mux(l1_pf_to_l2.valid, l1_pf_to_l2.bits.addr, sms_pf_to_l2.bits.addr)
622c590fb32Scz4e      outer.l2_pf_sender_opt.get.out.head._1.pf_source := Mux(l1_pf_to_l2.valid, l1_pf_to_l2.bits.source, sms_pf_to_l2.bits.source)
623c590fb32Scz4e      outer.l2_pf_sender_opt.get.out.head._1.l2_pf_en := RegNextN(io.ooo_to_mem.csrCtrl.pf_ctrl.l2_pf_enable, 2, Some(true.B))
624c590fb32Scz4e
625c590fb32Scz4e      sms_pf.io.enable := RegNextN(io.ooo_to_mem.csrCtrl.pf_ctrl.l1D_pf_enable, 2, Some(false.B))
626c590fb32Scz4e
627c590fb32Scz4e      val l2_trace = Wire(new LoadPfDbBundle)
628c590fb32Scz4e      l2_trace.paddr := outer.l2_pf_sender_opt.get.out.head._1.addr
629c590fb32Scz4e      val table = ChiselDB.createTable(s"L2PrefetchTrace$hartId", new LoadPfDbBundle, basicDB = false)
630c590fb32Scz4e      table.log(l2_trace, l1_pf_to_l2.valid, "StreamPrefetchTrace", clock, reset)
631c590fb32Scz4e      table.log(l2_trace, !l1_pf_to_l2.valid && sms_pf_to_l2.valid, "L2PrefetchTrace", clock, reset)
632c590fb32Scz4e
633c590fb32Scz4e      val l1_pf_to_l3 = ValidIODelay(l1_pf.io.l3_req, 4)
634c590fb32Scz4e      outer.l3_pf_sender_opt.foreach(_.out.head._1.addr_valid := l1_pf_to_l3.valid)
635c590fb32Scz4e      outer.l3_pf_sender_opt.foreach(_.out.head._1.addr := l1_pf_to_l3.bits)
636c590fb32Scz4e      outer.l3_pf_sender_opt.foreach(_.out.head._1.l2_pf_en := RegNextN(io.ooo_to_mem.csrCtrl.pf_ctrl.l2_pf_enable, 4, Some(true.B)))
637c590fb32Scz4e
638c590fb32Scz4e      val l3_trace = Wire(new LoadPfDbBundle)
639c590fb32Scz4e      l3_trace.paddr := outer.l3_pf_sender_opt.map(_.out.head._1.addr).getOrElse(0.U)
640c590fb32Scz4e      val l3_table = ChiselDB.createTable(s"L3PrefetchTrace$hartId", new LoadPfDbBundle, basicDB = false)
641c590fb32Scz4e      l3_table.log(l3_trace, l1_pf_to_l3.valid, "StreamPrefetchTrace", clock, reset)
642c590fb32Scz4e
643c590fb32Scz4e      XSPerfAccumulate("prefetch_fire_l2", outer.l2_pf_sender_opt.get.out.head._1.addr_valid)
644c590fb32Scz4e      XSPerfAccumulate("prefetch_fire_l3", outer.l3_pf_sender_opt.map(_.out.head._1.addr_valid).getOrElse(false.B))
645c590fb32Scz4e      XSPerfAccumulate("l1pf_fire_l2", l1_pf_to_l2.valid)
646c590fb32Scz4e      XSPerfAccumulate("sms_fire_l2", !l1_pf_to_l2.valid && sms_pf_to_l2.valid)
647c590fb32Scz4e      XSPerfAccumulate("sms_block_by_l1pf", l1_pf_to_l2.valid && sms_pf_to_l2.valid)
648c590fb32Scz4e    })
649c590fb32Scz4e  })
650c590fb32Scz4e
651c590fb32Scz4e  // ptw
652c590fb32Scz4e  val sfence = RegNext(RegNext(io.ooo_to_mem.sfence))
653c590fb32Scz4e  val tlbcsr = RegNext(RegNext(io.ooo_to_mem.tlbCsr))
654c590fb32Scz4e  private val ptw = outer.ptw.module
655c590fb32Scz4e  private val ptw_to_l2_buffer = outer.ptw_to_l2_buffer.module
656c590fb32Scz4e  private val l1d_to_l2_buffer = outer.l1d_to_l2_buffer.module
657c590fb32Scz4e  ptw.io.hartId := io.hartId
658c590fb32Scz4e  ptw.io.sfence <> sfence
659c590fb32Scz4e  ptw.io.csr.tlb <> tlbcsr
660c590fb32Scz4e  ptw.io.csr.distribute_csr <> csrCtrl.distribute_csr
661c590fb32Scz4e
662c590fb32Scz4e  val perfEventsPTW = if (!coreParams.softPTW) {
663c590fb32Scz4e    ptw.getPerfEvents
664c590fb32Scz4e  } else {
665c590fb32Scz4e    Seq()
666c590fb32Scz4e  }
667c590fb32Scz4e
668c590fb32Scz4e  // dtlb
669c590fb32Scz4e  val dtlb_ld_tlb_ld = Module(new TLBNonBlock(LduCnt + HyuCnt + 1, 2, ldtlbParams))
670c590fb32Scz4e  val dtlb_st_tlb_st = Module(new TLBNonBlock(StaCnt, 1, sttlbParams))
671c590fb32Scz4e  val dtlb_prefetch_tlb_prefetch = Module(new TLBNonBlock(2, 2, pftlbParams))
672c590fb32Scz4e  val dtlb_ld = Seq(dtlb_ld_tlb_ld.io)
673c590fb32Scz4e  val dtlb_st = Seq(dtlb_st_tlb_st.io)
674c590fb32Scz4e  val dtlb_prefetch = Seq(dtlb_prefetch_tlb_prefetch.io)
675c590fb32Scz4e  /* tlb vec && constant variable */
676c590fb32Scz4e  val dtlb = dtlb_ld ++ dtlb_st ++ dtlb_prefetch
677c590fb32Scz4e  val (dtlb_ld_idx, dtlb_st_idx, dtlb_pf_idx) = (0, 1, 2)
678c590fb32Scz4e  val TlbSubSizeVec = Seq(LduCnt + HyuCnt + 1, StaCnt, 2) // (load + hyu + stream pf, store, sms+l2bop)
679c590fb32Scz4e  val DTlbSize = TlbSubSizeVec.sum
680c590fb32Scz4e  val TlbStartVec = TlbSubSizeVec.scanLeft(0)(_ + _).dropRight(1)
681c590fb32Scz4e  val TlbEndVec = TlbSubSizeVec.scanLeft(0)(_ + _).drop(1)
682c590fb32Scz4e
683c590fb32Scz4e  val ptwio = Wire(new VectorTlbPtwIO(DTlbSize))
684c590fb32Scz4e  val dtlb_reqs = dtlb.map(_.requestor).flatten
685c590fb32Scz4e  val dtlb_pmps = dtlb.map(_.pmp).flatten
686c590fb32Scz4e  dtlb.map(_.hartId := io.hartId)
687c590fb32Scz4e  dtlb.map(_.sfence := sfence)
688c590fb32Scz4e  dtlb.map(_.csr := tlbcsr)
689c590fb32Scz4e  dtlb.map(_.flushPipe.map(a => a := false.B)) // non-block doesn't need
690c590fb32Scz4e  dtlb.map(_.redirect := redirect)
691c590fb32Scz4e  if (refillBothTlb) {
692c590fb32Scz4e    require(ldtlbParams.outReplace == sttlbParams.outReplace)
693c590fb32Scz4e    require(ldtlbParams.outReplace == hytlbParams.outReplace)
694c590fb32Scz4e    require(ldtlbParams.outReplace == pftlbParams.outReplace)
695c590fb32Scz4e    require(ldtlbParams.outReplace)
696c590fb32Scz4e
697c590fb32Scz4e    val replace = Module(new TlbReplace(DTlbSize, ldtlbParams))
698c590fb32Scz4e    replace.io.apply_sep(dtlb_ld.map(_.replace) ++ dtlb_st.map(_.replace) ++ dtlb_prefetch.map(_.replace), ptwio.resp.bits.data.s1.entry.tag)
699c590fb32Scz4e  } else {
700c590fb32Scz4e    // TODO: there will be bugs in TlbReplace when outReplace enable, since the order of Hyu is not right.
701c590fb32Scz4e    if (ldtlbParams.outReplace) {
702c590fb32Scz4e      val replace_ld = Module(new TlbReplace(LduCnt + 1, ldtlbParams))
703c590fb32Scz4e      replace_ld.io.apply_sep(dtlb_ld.map(_.replace), ptwio.resp.bits.data.s1.entry.tag)
704c590fb32Scz4e    }
705c590fb32Scz4e    if (hytlbParams.outReplace) {
706c590fb32Scz4e      val replace_hy = Module(new TlbReplace(HyuCnt, hytlbParams))
707c590fb32Scz4e      replace_hy.io.apply_sep(dtlb_ld.map(_.replace), ptwio.resp.bits.data.s1.entry.tag)
708c590fb32Scz4e    }
709c590fb32Scz4e    if (sttlbParams.outReplace) {
710c590fb32Scz4e      val replace_st = Module(new TlbReplace(StaCnt, sttlbParams))
711c590fb32Scz4e      replace_st.io.apply_sep(dtlb_st.map(_.replace), ptwio.resp.bits.data.s1.entry.tag)
712c590fb32Scz4e    }
713c590fb32Scz4e    if (pftlbParams.outReplace) {
714c590fb32Scz4e      val replace_pf = Module(new TlbReplace(2, pftlbParams))
715c590fb32Scz4e      replace_pf.io.apply_sep(dtlb_prefetch.map(_.replace), ptwio.resp.bits.data.s1.entry.tag)
716c590fb32Scz4e    }
717c590fb32Scz4e  }
718c590fb32Scz4e
719c590fb32Scz4e  val ptw_resp_next = RegEnable(ptwio.resp.bits, ptwio.resp.valid)
720c590fb32Scz4e  val ptw_resp_v = RegNext(ptwio.resp.valid && !(sfence.valid && tlbcsr.satp.changed && tlbcsr.vsatp.changed && tlbcsr.hgatp.changed), init = false.B)
721c590fb32Scz4e  ptwio.resp.ready := true.B
722c590fb32Scz4e
723c590fb32Scz4e  val tlbreplay = WireInit(VecInit(Seq.fill(LdExuCnt)(false.B)))
724c590fb32Scz4e  val tlbreplay_reg = GatedValidRegNext(tlbreplay)
725c590fb32Scz4e  val dtlb_ld0_tlbreplay_reg = GatedValidRegNext(dtlb_ld(0).tlbreplay)
726c590fb32Scz4e
727c590fb32Scz4e  if (backendParams.debugEn){ dontTouch(tlbreplay) }
728c590fb32Scz4e
729c590fb32Scz4e  for (i <- 0 until LdExuCnt) {
730c590fb32Scz4e    tlbreplay(i) := dtlb_ld(0).ptw.req(i).valid && ptw_resp_next.vector(0) && ptw_resp_v &&
731c590fb32Scz4e      ptw_resp_next.data.hit(dtlb_ld(0).ptw.req(i).bits.vpn, tlbcsr.satp.asid, tlbcsr.vsatp.asid, tlbcsr.hgatp.vmid, allType = true, ignoreAsid = true)
732c590fb32Scz4e  }
733c590fb32Scz4e
734c590fb32Scz4e  dtlb.flatMap(a => a.ptw.req)
735c590fb32Scz4e    .zipWithIndex
736c590fb32Scz4e    .foreach{ case (tlb, i) =>
737c590fb32Scz4e      tlb.ready := ptwio.req(i).ready
738c590fb32Scz4e      ptwio.req(i).bits := tlb.bits
739c590fb32Scz4e    val vector_hit = if (refillBothTlb) Cat(ptw_resp_next.vector).orR
740c590fb32Scz4e      else if (i < TlbEndVec(dtlb_ld_idx)) Cat(ptw_resp_next.vector.slice(TlbStartVec(dtlb_ld_idx), TlbEndVec(dtlb_ld_idx))).orR
741c590fb32Scz4e      else if (i < TlbEndVec(dtlb_st_idx)) Cat(ptw_resp_next.vector.slice(TlbStartVec(dtlb_st_idx), TlbEndVec(dtlb_st_idx))).orR
742c590fb32Scz4e      else                                 Cat(ptw_resp_next.vector.slice(TlbStartVec(dtlb_pf_idx), TlbEndVec(dtlb_pf_idx))).orR
743c590fb32Scz4e    ptwio.req(i).valid := tlb.valid && !(ptw_resp_v && vector_hit && ptw_resp_next.data.hit(tlb.bits.vpn, tlbcsr.satp.asid, tlbcsr.vsatp.asid, tlbcsr.hgatp.vmid, allType = true, ignoreAsid = true))
744c590fb32Scz4e  }
745c590fb32Scz4e  dtlb.foreach(_.ptw.resp.bits := ptw_resp_next.data)
746c590fb32Scz4e  if (refillBothTlb) {
747c590fb32Scz4e    dtlb.foreach(_.ptw.resp.valid := ptw_resp_v && Cat(ptw_resp_next.vector).orR)
748c590fb32Scz4e  } else {
749c590fb32Scz4e    dtlb_ld.foreach(_.ptw.resp.valid := ptw_resp_v && Cat(ptw_resp_next.vector.slice(TlbStartVec(dtlb_ld_idx), TlbEndVec(dtlb_ld_idx))).orR)
750c590fb32Scz4e    dtlb_st.foreach(_.ptw.resp.valid := ptw_resp_v && Cat(ptw_resp_next.vector.slice(TlbStartVec(dtlb_st_idx), TlbEndVec(dtlb_st_idx))).orR)
751c590fb32Scz4e    dtlb_prefetch.foreach(_.ptw.resp.valid := ptw_resp_v && Cat(ptw_resp_next.vector.slice(TlbStartVec(dtlb_pf_idx), TlbEndVec(dtlb_pf_idx))).orR)
752c590fb32Scz4e  }
753c590fb32Scz4e  dtlb_ld.foreach(_.ptw.resp.bits.getGpa := Cat(ptw_resp_next.getGpa.take(LduCnt + HyuCnt + 1)).orR)
754c590fb32Scz4e  dtlb_st.foreach(_.ptw.resp.bits.getGpa := Cat(ptw_resp_next.getGpa.slice(LduCnt + HyuCnt + 1, LduCnt + HyuCnt + 1 + StaCnt)).orR)
755c590fb32Scz4e  dtlb_prefetch.foreach(_.ptw.resp.bits.getGpa := Cat(ptw_resp_next.getGpa.drop(LduCnt + HyuCnt + 1 + StaCnt)).orR)
756c590fb32Scz4e
757c590fb32Scz4e  val dtlbRepeater  = PTWNewFilter(ldtlbParams.fenceDelay, ptwio, ptw.io.tlb(1), sfence, tlbcsr, l2tlbParams.dfilterSize)
758c590fb32Scz4e  val itlbRepeater3 = PTWRepeaterNB(passReady = false, itlbParams.fenceDelay, io.fetch_to_mem.itlb, ptw.io.tlb(0), sfence, tlbcsr)
759c590fb32Scz4e
760c590fb32Scz4e  lsq.io.debugTopDown.robHeadMissInDTlb := dtlbRepeater.io.rob_head_miss_in_tlb
761c590fb32Scz4e
762c590fb32Scz4e  // pmp
763c590fb32Scz4e  val pmp = Module(new PMP())
764c590fb32Scz4e  pmp.io.distribute_csr <> csrCtrl.distribute_csr
765c590fb32Scz4e
766c590fb32Scz4e  val pmp_checkers = Seq.fill(DTlbSize)(Module(new PMPChecker(4, leaveHitMux = true)))
767c590fb32Scz4e  val pmp_check = pmp_checkers.map(_.io)
768c590fb32Scz4e  for ((p,d) <- pmp_check zip dtlb_pmps) {
7698882eb68SXin Tian    if (HasBitmapCheck) {
7708882eb68SXin Tian      p.apply(tlbcsr.mbmc.CMODE.asBool, tlbcsr.priv.dmode, pmp.io.pmp, pmp.io.pma, d)
7718882eb68SXin Tian    } else {
772c590fb32Scz4e      p.apply(tlbcsr.priv.dmode, pmp.io.pmp, pmp.io.pma, d)
7738882eb68SXin Tian    }
774c590fb32Scz4e    require(p.req.bits.size.getWidth == d.bits.size.getWidth)
775c590fb32Scz4e  }
776c590fb32Scz4e
777c590fb32Scz4e  for (i <- 0 until LduCnt) {
778c590fb32Scz4e    io.debug_ls.debugLsInfo(i) := loadUnits(i).io.debug_ls
779c590fb32Scz4e  }
780c590fb32Scz4e  for (i <- 0 until HyuCnt) {
781c590fb32Scz4e    io.debug_ls.debugLsInfo.drop(LduCnt)(i) := hybridUnits(i).io.ldu_io.debug_ls
782c590fb32Scz4e  }
783c590fb32Scz4e  for (i <- 0 until StaCnt) {
784c590fb32Scz4e    io.debug_ls.debugLsInfo.drop(LduCnt + HyuCnt)(i) := storeUnits(i).io.debug_ls
785c590fb32Scz4e  }
786c590fb32Scz4e  for (i <- 0 until HyuCnt) {
787c590fb32Scz4e    io.debug_ls.debugLsInfo.drop(LduCnt + HyuCnt + StaCnt)(i) := hybridUnits(i).io.stu_io.debug_ls
788c590fb32Scz4e  }
789c590fb32Scz4e
790c590fb32Scz4e  io.mem_to_ooo.lsTopdownInfo := loadUnits.map(_.io.lsTopdownInfo) ++ hybridUnits.map(_.io.ldu_io.lsTopdownInfo)
791c590fb32Scz4e
792c590fb32Scz4e  // trigger
793c590fb32Scz4e  val tdata = RegInit(VecInit(Seq.fill(TriggerNum)(0.U.asTypeOf(new MatchTriggerIO))))
794c590fb32Scz4e  val tEnable = RegInit(VecInit(Seq.fill(TriggerNum)(false.B)))
795c590fb32Scz4e  tEnable := csrCtrl.mem_trigger.tEnableVec
796c590fb32Scz4e  when(csrCtrl.mem_trigger.tUpdate.valid) {
797c590fb32Scz4e    tdata(csrCtrl.mem_trigger.tUpdate.bits.addr) := csrCtrl.mem_trigger.tUpdate.bits.tdata
798c590fb32Scz4e  }
799c590fb32Scz4e  val triggerCanRaiseBpExp = csrCtrl.mem_trigger.triggerCanRaiseBpExp
800c590fb32Scz4e  val debugMode = csrCtrl.mem_trigger.debugMode
801c590fb32Scz4e
802c590fb32Scz4e  val backendTriggerTimingVec = VecInit(tdata.map(_.timing))
803c590fb32Scz4e  val backendTriggerChainVec = VecInit(tdata.map(_.chain))
804c590fb32Scz4e
805c590fb32Scz4e  XSDebug(tEnable.asUInt.orR, "Debug Mode: At least one store trigger is enabled\n")
806c590fb32Scz4e  for (j <- 0 until TriggerNum)
807c590fb32Scz4e    PrintTriggerInfo(tEnable(j), tdata(j))
808c590fb32Scz4e
809c590fb32Scz4e  // The segment instruction is executed atomically.
810c590fb32Scz4e  // After the segment instruction directive starts executing, no other instructions should be executed.
811c590fb32Scz4e  val vSegmentFlag = RegInit(false.B)
812c590fb32Scz4e
813c590fb32Scz4e  when(GatedValidRegNext(vSegmentUnit.io.in.fire)) {
814c590fb32Scz4e    vSegmentFlag := true.B
815c590fb32Scz4e  }.elsewhen(GatedValidRegNext(vSegmentUnit.io.uopwriteback.valid)) {
816c590fb32Scz4e    vSegmentFlag := false.B
817c590fb32Scz4e  }
818c590fb32Scz4e
819522c7f99SAnzo  val misalign_allow_spec = RegInit(true.B)
820522c7f99SAnzo  val ldu_rollback_with_misalign_nack = loadUnits.map(ldu =>
821522c7f99SAnzo    ldu.io.lsq.ldin.bits.isFrmMisAlignBuf && ldu.io.lsq.ldin.bits.rep_info.rar_nack && ldu.io.rollback.valid
822522c7f99SAnzo  ).reduce(_ || _)
823522c7f99SAnzo  when (ldu_rollback_with_misalign_nack) {
824522c7f99SAnzo    misalign_allow_spec := false.B
825522c7f99SAnzo  } .elsewhen(lsq.io.rarValidCount < (LoadQueueRARSize - 4).U) {
826522c7f99SAnzo    misalign_allow_spec := true.B
827522c7f99SAnzo  }
828522c7f99SAnzo
829c590fb32Scz4e  // LoadUnit
830c590fb32Scz4e  val correctMissTrain = Constantin.createRecord(s"CorrectMissTrain$hartId", initValue = false)
831c590fb32Scz4e
832c590fb32Scz4e  for (i <- 0 until LduCnt) {
833c590fb32Scz4e    loadUnits(i).io.redirect <> redirect
834522c7f99SAnzo    loadUnits(i).io.misalign_allow_spec := misalign_allow_spec
835c590fb32Scz4e
836c590fb32Scz4e    // get input form dispatch
837c590fb32Scz4e    loadUnits(i).io.ldin <> io.ooo_to_mem.issueLda(i)
838c590fb32Scz4e    loadUnits(i).io.feedback_slow <> io.mem_to_ooo.ldaIqFeedback(i).feedbackSlow
839c590fb32Scz4e    io.mem_to_ooo.ldaIqFeedback(i).feedbackFast := DontCare
840c590fb32Scz4e    loadUnits(i).io.correctMissTrain := correctMissTrain
841c590fb32Scz4e    io.mem_to_ooo.ldCancel.drop(HyuCnt)(i) := loadUnits(i).io.ldCancel
842c590fb32Scz4e    io.mem_to_ooo.wakeup.drop(HyuCnt)(i) := loadUnits(i).io.wakeup
843c590fb32Scz4e
844c590fb32Scz4e    // vector
845c590fb32Scz4e    if (i < VlduCnt) {
846c590fb32Scz4e      loadUnits(i).io.vecldout.ready := false.B
847c590fb32Scz4e    } else {
848c590fb32Scz4e      loadUnits(i).io.vecldin.valid := false.B
849c590fb32Scz4e      loadUnits(i).io.vecldin.bits := DontCare
850c590fb32Scz4e      loadUnits(i).io.vecldout.ready := false.B
851c590fb32Scz4e    }
852c590fb32Scz4e
853c590fb32Scz4e    // fast replay
854c590fb32Scz4e    loadUnits(i).io.fast_rep_in <> loadUnits(i).io.fast_rep_out
855c590fb32Scz4e
856c590fb32Scz4e    // SoftPrefetch to frontend (prefetch.i)
857c590fb32Scz4e    loadUnits(i).io.ifetchPrefetch <> io.ifetchPrefetch(i)
858c590fb32Scz4e
859c590fb32Scz4e    // dcache access
860c590fb32Scz4e    loadUnits(i).io.dcache <> dcache.io.lsu.load(i)
861c590fb32Scz4e    if(i == 0){
862c590fb32Scz4e      vSegmentUnit.io.rdcache := DontCare
863c590fb32Scz4e      dcache.io.lsu.load(i).req.valid := loadUnits(i).io.dcache.req.valid || vSegmentUnit.io.rdcache.req.valid
864c590fb32Scz4e      dcache.io.lsu.load(i).req.bits  := Mux1H(Seq(
865c590fb32Scz4e        vSegmentUnit.io.rdcache.req.valid -> vSegmentUnit.io.rdcache.req.bits,
866c590fb32Scz4e        loadUnits(i).io.dcache.req.valid -> loadUnits(i).io.dcache.req.bits
867c590fb32Scz4e      ))
868c590fb32Scz4e      vSegmentUnit.io.rdcache.req.ready := dcache.io.lsu.load(i).req.ready
869c590fb32Scz4e    }
870c590fb32Scz4e
871c590fb32Scz4e    // Dcache requests must also be preempted by the segment.
872c590fb32Scz4e    when(vSegmentFlag){
873c590fb32Scz4e      loadUnits(i).io.dcache.req.ready             := false.B // Dcache is preempted.
874c590fb32Scz4e
875c590fb32Scz4e      dcache.io.lsu.load(0).pf_source              := vSegmentUnit.io.rdcache.pf_source
876c590fb32Scz4e      dcache.io.lsu.load(0).s1_paddr_dup_lsu       := vSegmentUnit.io.rdcache.s1_paddr_dup_lsu
877c590fb32Scz4e      dcache.io.lsu.load(0).s1_paddr_dup_dcache    := vSegmentUnit.io.rdcache.s1_paddr_dup_dcache
878c590fb32Scz4e      dcache.io.lsu.load(0).s1_kill                := vSegmentUnit.io.rdcache.s1_kill
879c590fb32Scz4e      dcache.io.lsu.load(0).s2_kill                := vSegmentUnit.io.rdcache.s2_kill
880c590fb32Scz4e      dcache.io.lsu.load(0).s0_pc                  := vSegmentUnit.io.rdcache.s0_pc
881c590fb32Scz4e      dcache.io.lsu.load(0).s1_pc                  := vSegmentUnit.io.rdcache.s1_pc
882c590fb32Scz4e      dcache.io.lsu.load(0).s2_pc                  := vSegmentUnit.io.rdcache.s2_pc
883c590fb32Scz4e      dcache.io.lsu.load(0).is128Req               := vSegmentUnit.io.rdcache.is128Req
884c590fb32Scz4e    }.otherwise {
885c590fb32Scz4e      loadUnits(i).io.dcache.req.ready             := dcache.io.lsu.load(i).req.ready
886c590fb32Scz4e
887c590fb32Scz4e      dcache.io.lsu.load(0).pf_source              := loadUnits(0).io.dcache.pf_source
888c590fb32Scz4e      dcache.io.lsu.load(0).s1_paddr_dup_lsu       := loadUnits(0).io.dcache.s1_paddr_dup_lsu
889c590fb32Scz4e      dcache.io.lsu.load(0).s1_paddr_dup_dcache    := loadUnits(0).io.dcache.s1_paddr_dup_dcache
890c590fb32Scz4e      dcache.io.lsu.load(0).s1_kill                := loadUnits(0).io.dcache.s1_kill
891c590fb32Scz4e      dcache.io.lsu.load(0).s2_kill                := loadUnits(0).io.dcache.s2_kill
892c590fb32Scz4e      dcache.io.lsu.load(0).s0_pc                  := loadUnits(0).io.dcache.s0_pc
893c590fb32Scz4e      dcache.io.lsu.load(0).s1_pc                  := loadUnits(0).io.dcache.s1_pc
894c590fb32Scz4e      dcache.io.lsu.load(0).s2_pc                  := loadUnits(0).io.dcache.s2_pc
895c590fb32Scz4e      dcache.io.lsu.load(0).is128Req               := loadUnits(0).io.dcache.is128Req
896c590fb32Scz4e    }
897c590fb32Scz4e
898c590fb32Scz4e    // forward
899c590fb32Scz4e    loadUnits(i).io.lsq.forward <> lsq.io.forward(i)
900c590fb32Scz4e    loadUnits(i).io.sbuffer <> sbuffer.io.forward(i)
901c590fb32Scz4e    loadUnits(i).io.ubuffer <> uncache.io.forward(i)
902c590fb32Scz4e    loadUnits(i).io.tl_d_channel := dcache.io.lsu.forward_D(i)
903c590fb32Scz4e    loadUnits(i).io.forward_mshr <> dcache.io.lsu.forward_mshr(i)
904c590fb32Scz4e    // ld-ld violation check
905c590fb32Scz4e    loadUnits(i).io.lsq.ldld_nuke_query <> lsq.io.ldu.ldld_nuke_query(i)
906c590fb32Scz4e    loadUnits(i).io.lsq.stld_nuke_query <> lsq.io.ldu.stld_nuke_query(i)
907522c7f99SAnzo    // loadqueue old ptr
908522c7f99SAnzo    loadUnits(i).io.lsq.lqDeqPtr := lsq.io.lqDeqPtr
909c590fb32Scz4e    loadUnits(i).io.csrCtrl       <> csrCtrl
910c590fb32Scz4e    // dcache refill req
911c590fb32Scz4e  // loadUnits(i).io.refill           <> delayedDcacheRefill
912c590fb32Scz4e    // dtlb
913c590fb32Scz4e    loadUnits(i).io.tlb <> dtlb_reqs.take(LduCnt)(i)
914c590fb32Scz4e    if(i == 0 ){ // port 0 assign to vsegmentUnit
915c590fb32Scz4e      val vsegmentDtlbReqValid = vSegmentUnit.io.dtlb.req.valid // segment tlb resquest need to delay 1 cycle
916c590fb32Scz4e      dtlb_reqs.take(LduCnt)(i).req.valid := loadUnits(i).io.tlb.req.valid || RegNext(vsegmentDtlbReqValid)
917c590fb32Scz4e      vSegmentUnit.io.dtlb.req.ready      := dtlb_reqs.take(LduCnt)(i).req.ready
918c590fb32Scz4e      dtlb_reqs.take(LduCnt)(i).req.bits  := ParallelPriorityMux(Seq(
919c590fb32Scz4e        RegNext(vsegmentDtlbReqValid)     -> RegEnable(vSegmentUnit.io.dtlb.req.bits, vsegmentDtlbReqValid),
920c590fb32Scz4e        loadUnits(i).io.tlb.req.valid     -> loadUnits(i).io.tlb.req.bits
921c590fb32Scz4e      ))
922c590fb32Scz4e    }
923c590fb32Scz4e    // pmp
924c590fb32Scz4e    loadUnits(i).io.pmp <> pmp_check(i).resp
925c590fb32Scz4e    // st-ld violation query
926c590fb32Scz4e    val stld_nuke_query = storeUnits.map(_.io.stld_nuke_query) ++ hybridUnits.map(_.io.stu_io.stld_nuke_query)
927c590fb32Scz4e    for (s <- 0 until StorePipelineWidth) {
928c590fb32Scz4e      loadUnits(i).io.stld_nuke_query(s) := stld_nuke_query(s)
929c590fb32Scz4e    }
930c590fb32Scz4e    loadUnits(i).io.lq_rep_full <> lsq.io.lq_rep_full
931c590fb32Scz4e    // load prefetch train
932c590fb32Scz4e    prefetcherOpt.foreach(pf => {
933c590fb32Scz4e      // sms will train on all miss load sources
934c590fb32Scz4e      val source = loadUnits(i).io.prefetch_train
935c590fb32Scz4e      pf.io.ld_in(i).valid := Mux(pf_train_on_hit,
936c590fb32Scz4e        source.valid,
937c590fb32Scz4e        source.valid && source.bits.isFirstIssue && source.bits.miss
938c590fb32Scz4e      )
939c590fb32Scz4e      pf.io.ld_in(i).bits := source.bits
940c590fb32Scz4e      val loadPc = RegNext(io.ooo_to_mem.issueLda(i).bits.uop.pc) // for s1
941c590fb32Scz4e      pf.io.ld_in(i).bits.uop.pc := Mux(
942c590fb32Scz4e        loadUnits(i).io.s2_ptr_chasing,
943c590fb32Scz4e        RegEnable(loadPc, loadUnits(i).io.s2_prefetch_spec),
944c590fb32Scz4e        RegEnable(RegEnable(loadPc, loadUnits(i).io.s1_prefetch_spec), loadUnits(i).io.s2_prefetch_spec)
945c590fb32Scz4e      )
946c590fb32Scz4e    })
947c590fb32Scz4e    l1PrefetcherOpt.foreach(pf => {
948c590fb32Scz4e      // stream will train on all load sources
949c590fb32Scz4e      val source = loadUnits(i).io.prefetch_train_l1
950c590fb32Scz4e      pf.io.ld_in(i).valid := source.valid && source.bits.isFirstIssue
951c590fb32Scz4e      pf.io.ld_in(i).bits := source.bits
952c590fb32Scz4e    })
953c590fb32Scz4e
954c590fb32Scz4e    // load to load fast forward: load(i) prefers data(i)
955c590fb32Scz4e    val l2l_fwd_out = loadUnits.map(_.io.l2l_fwd_out) ++ hybridUnits.map(_.io.ldu_io.l2l_fwd_out)
956c590fb32Scz4e    val fastPriority = (i until LduCnt + HyuCnt) ++ (0 until i)
957c590fb32Scz4e    val fastValidVec = fastPriority.map(j => l2l_fwd_out(j).valid)
958c590fb32Scz4e    val fastDataVec = fastPriority.map(j => l2l_fwd_out(j).data)
959c590fb32Scz4e    val fastErrorVec = fastPriority.map(j => l2l_fwd_out(j).dly_ld_err)
960c590fb32Scz4e    val fastMatchVec = fastPriority.map(j => io.ooo_to_mem.loadFastMatch(i)(j))
961c590fb32Scz4e    loadUnits(i).io.l2l_fwd_in.valid := VecInit(fastValidVec).asUInt.orR
962c590fb32Scz4e    loadUnits(i).io.l2l_fwd_in.data := ParallelPriorityMux(fastValidVec, fastDataVec)
963c590fb32Scz4e    loadUnits(i).io.l2l_fwd_in.dly_ld_err := ParallelPriorityMux(fastValidVec, fastErrorVec)
964c590fb32Scz4e    val fastMatch = ParallelPriorityMux(fastValidVec, fastMatchVec)
965c590fb32Scz4e    loadUnits(i).io.ld_fast_match := fastMatch
966c590fb32Scz4e    loadUnits(i).io.ld_fast_imm := io.ooo_to_mem.loadFastImm(i)
967c590fb32Scz4e    loadUnits(i).io.ld_fast_fuOpType := io.ooo_to_mem.loadFastFuOpType(i)
968c590fb32Scz4e    loadUnits(i).io.replay <> lsq.io.replay(i)
969c590fb32Scz4e
970c590fb32Scz4e    val l2_hint = RegNext(io.l2_hint)
971c590fb32Scz4e
972c590fb32Scz4e    // L2 Hint for DCache
973c590fb32Scz4e    dcache.io.l2_hint <> l2_hint
974c590fb32Scz4e
975c590fb32Scz4e    loadUnits(i).io.l2_hint <> l2_hint
976c590fb32Scz4e    loadUnits(i).io.tlb_hint.id := dtlbRepeater.io.hint.get.req(i).id
977c590fb32Scz4e    loadUnits(i).io.tlb_hint.full := dtlbRepeater.io.hint.get.req(i).full ||
978c590fb32Scz4e      tlbreplay_reg(i) || dtlb_ld0_tlbreplay_reg(i)
979c590fb32Scz4e
980c590fb32Scz4e    // passdown to lsq (load s2)
981c590fb32Scz4e    lsq.io.ldu.ldin(i) <> loadUnits(i).io.lsq.ldin
982c590fb32Scz4e    if (i == UncacheWBPort) {
983c590fb32Scz4e      lsq.io.ldout(i) <> loadUnits(i).io.lsq.uncache
984c590fb32Scz4e    } else {
985c590fb32Scz4e      lsq.io.ldout(i).ready := true.B
986c590fb32Scz4e      loadUnits(i).io.lsq.uncache.valid := false.B
987c590fb32Scz4e      loadUnits(i).io.lsq.uncache.bits := DontCare
988c590fb32Scz4e    }
989c590fb32Scz4e    lsq.io.ld_raw_data(i) <> loadUnits(i).io.lsq.ld_raw_data
990c590fb32Scz4e    lsq.io.ncOut(i) <> loadUnits(i).io.lsq.nc_ldin
991c590fb32Scz4e    lsq.io.l2_hint.valid := l2_hint.valid
992c590fb32Scz4e    lsq.io.l2_hint.bits.sourceId := l2_hint.bits.sourceId
993c590fb32Scz4e    lsq.io.l2_hint.bits.isKeyword := l2_hint.bits.isKeyword
994c590fb32Scz4e
995c590fb32Scz4e    lsq.io.tlb_hint <> dtlbRepeater.io.hint.get
996c590fb32Scz4e
997c590fb32Scz4e    // connect misalignBuffer
998c590fb32Scz4e    loadMisalignBuffer.io.req(i) <> loadUnits(i).io.misalign_buf
999c590fb32Scz4e
1000c590fb32Scz4e    if (i == MisalignWBPort) {
1001c590fb32Scz4e      loadUnits(i).io.misalign_ldin  <> loadMisalignBuffer.io.splitLoadReq
1002c590fb32Scz4e      loadUnits(i).io.misalign_ldout <> loadMisalignBuffer.io.splitLoadResp
1003c590fb32Scz4e    } else {
1004c590fb32Scz4e      loadUnits(i).io.misalign_ldin.valid := false.B
1005c590fb32Scz4e      loadUnits(i).io.misalign_ldin.bits := DontCare
1006c590fb32Scz4e    }
1007c590fb32Scz4e
1008c590fb32Scz4e    // alter writeback exception info
1009c590fb32Scz4e    io.mem_to_ooo.s3_delayed_load_error(i) := loadUnits(i).io.s3_dly_ld_err
1010c590fb32Scz4e
1011c590fb32Scz4e    // update mem dependency predictor
1012c590fb32Scz4e    // io.memPredUpdate(i) := DontCare
1013c590fb32Scz4e
1014c590fb32Scz4e    // --------------------------------
1015c590fb32Scz4e    // Load Triggers
1016c590fb32Scz4e    // --------------------------------
1017c590fb32Scz4e    loadUnits(i).io.fromCsrTrigger.tdataVec := tdata
1018c590fb32Scz4e    loadUnits(i).io.fromCsrTrigger.tEnableVec := tEnable
1019c590fb32Scz4e    loadUnits(i).io.fromCsrTrigger.triggerCanRaiseBpExp := triggerCanRaiseBpExp
1020c590fb32Scz4e    loadUnits(i).io.fromCsrTrigger.debugMode := debugMode
1021c590fb32Scz4e  }
1022c590fb32Scz4e
1023c590fb32Scz4e  for (i <- 0 until HyuCnt) {
1024c590fb32Scz4e    hybridUnits(i).io.redirect <> redirect
1025c590fb32Scz4e
1026c590fb32Scz4e    // get input from dispatch
1027c590fb32Scz4e    hybridUnits(i).io.lsin <> io.ooo_to_mem.issueHya(i)
1028c590fb32Scz4e    hybridUnits(i).io.feedback_slow <> io.mem_to_ooo.hyuIqFeedback(i).feedbackSlow
1029c590fb32Scz4e    hybridUnits(i).io.feedback_fast <> io.mem_to_ooo.hyuIqFeedback(i).feedbackFast
1030c590fb32Scz4e    hybridUnits(i).io.correctMissTrain := correctMissTrain
1031c590fb32Scz4e    io.mem_to_ooo.ldCancel.take(HyuCnt)(i) := hybridUnits(i).io.ldu_io.ldCancel
1032c590fb32Scz4e    io.mem_to_ooo.wakeup.take(HyuCnt)(i) := hybridUnits(i).io.ldu_io.wakeup
1033c590fb32Scz4e
1034c590fb32Scz4e    // ------------------------------------
1035c590fb32Scz4e    //  Load Port
1036c590fb32Scz4e    // ------------------------------------
1037c590fb32Scz4e    // fast replay
1038c590fb32Scz4e    hybridUnits(i).io.ldu_io.fast_rep_in <> hybridUnits(i).io.ldu_io.fast_rep_out
1039c590fb32Scz4e
1040c590fb32Scz4e    // get input from dispatch
1041c590fb32Scz4e    hybridUnits(i).io.ldu_io.dcache <> dcache.io.lsu.load(LduCnt + i)
1042c590fb32Scz4e    hybridUnits(i).io.stu_io.dcache <> dcache.io.lsu.sta(StaCnt + i)
1043c590fb32Scz4e
1044c590fb32Scz4e    // dcache access
1045c590fb32Scz4e    hybridUnits(i).io.ldu_io.lsq.forward <> lsq.io.forward(LduCnt + i)
1046c590fb32Scz4e    // forward
1047c590fb32Scz4e    hybridUnits(i).io.ldu_io.sbuffer <> sbuffer.io.forward(LduCnt + i)
1048c590fb32Scz4e    hybridUnits(i).io.ldu_io.ubuffer <> uncache.io.forward(LduCnt + i)
1049c590fb32Scz4e    // hybridUnits(i).io.ldu_io.vec_forward <> vsFlowQueue.io.forward(LduCnt + i)
1050c590fb32Scz4e    hybridUnits(i).io.ldu_io.vec_forward := DontCare
1051c590fb32Scz4e    hybridUnits(i).io.ldu_io.tl_d_channel := dcache.io.lsu.forward_D(LduCnt + i)
1052c590fb32Scz4e    hybridUnits(i).io.ldu_io.forward_mshr <> dcache.io.lsu.forward_mshr(LduCnt + i)
1053c590fb32Scz4e    // ld-ld violation check
1054c590fb32Scz4e    hybridUnits(i).io.ldu_io.lsq.ldld_nuke_query <> lsq.io.ldu.ldld_nuke_query(LduCnt + i)
1055c590fb32Scz4e    hybridUnits(i).io.ldu_io.lsq.stld_nuke_query <> lsq.io.ldu.stld_nuke_query(LduCnt + i)
1056c590fb32Scz4e    hybridUnits(i).io.csrCtrl <> csrCtrl
1057c590fb32Scz4e    // dcache refill req
1058c590fb32Scz4e    hybridUnits(i).io.ldu_io.tlb_hint.id := dtlbRepeater.io.hint.get.req(LduCnt + i).id
1059c590fb32Scz4e    hybridUnits(i).io.ldu_io.tlb_hint.full := dtlbRepeater.io.hint.get.req(LduCnt + i).full ||
1060c590fb32Scz4e      tlbreplay_reg(LduCnt + i) || dtlb_ld0_tlbreplay_reg(LduCnt + i)
1061c590fb32Scz4e
1062c590fb32Scz4e    // dtlb
1063c590fb32Scz4e    hybridUnits(i).io.tlb <> dtlb_ld.head.requestor(LduCnt + i)
1064c590fb32Scz4e    // pmp
1065c590fb32Scz4e    hybridUnits(i).io.pmp <> pmp_check.drop(LduCnt)(i).resp
1066c590fb32Scz4e    // st-ld violation query
1067c590fb32Scz4e    val stld_nuke_query = VecInit(storeUnits.map(_.io.stld_nuke_query) ++ hybridUnits.map(_.io.stu_io.stld_nuke_query))
1068c590fb32Scz4e    hybridUnits(i).io.ldu_io.stld_nuke_query := stld_nuke_query
1069c590fb32Scz4e    hybridUnits(i).io.ldu_io.lq_rep_full <> lsq.io.lq_rep_full
1070c590fb32Scz4e    // load prefetch train
1071c590fb32Scz4e    prefetcherOpt.foreach(pf => {
1072c590fb32Scz4e      val source = hybridUnits(i).io.prefetch_train
1073c590fb32Scz4e      pf.io.ld_in(LduCnt + i).valid := Mux(pf_train_on_hit,
1074c590fb32Scz4e        source.valid,
1075c590fb32Scz4e        source.valid && source.bits.isFirstIssue && source.bits.miss
1076c590fb32Scz4e      )
1077c590fb32Scz4e      pf.io.ld_in(LduCnt + i).bits := source.bits
1078c590fb32Scz4e      pf.io.ld_in(LduCnt + i).bits.uop.pc := Mux(hybridUnits(i).io.ldu_io.s2_ptr_chasing, io.ooo_to_mem.hybridPc(i), RegNext(io.ooo_to_mem.hybridPc(i)))
1079c590fb32Scz4e    })
1080c590fb32Scz4e    l1PrefetcherOpt.foreach(pf => {
1081c590fb32Scz4e      // stream will train on all load sources
1082c590fb32Scz4e      val source = hybridUnits(i).io.prefetch_train_l1
1083c590fb32Scz4e      pf.io.ld_in(LduCnt + i).valid := source.valid && source.bits.isFirstIssue &&
1084c590fb32Scz4e                                       FuType.isLoad(source.bits.uop.fuType)
1085c590fb32Scz4e      pf.io.ld_in(LduCnt + i).bits := source.bits
1086c590fb32Scz4e      pf.io.st_in(StaCnt + i).valid := false.B
1087c590fb32Scz4e      pf.io.st_in(StaCnt + i).bits := DontCare
1088c590fb32Scz4e    })
1089c590fb32Scz4e    prefetcherOpt.foreach(pf => {
1090c590fb32Scz4e      val source = hybridUnits(i).io.prefetch_train
1091c590fb32Scz4e      pf.io.st_in(StaCnt + i).valid := Mux(pf_train_on_hit,
1092c590fb32Scz4e        source.valid,
1093c590fb32Scz4e        source.valid && source.bits.isFirstIssue && source.bits.miss
1094c590fb32Scz4e      ) && FuType.isStore(source.bits.uop.fuType)
1095c590fb32Scz4e      pf.io.st_in(StaCnt + i).bits := source.bits
1096c590fb32Scz4e      pf.io.st_in(StaCnt + i).bits.uop.pc := RegNext(io.ooo_to_mem.hybridPc(i))
1097c590fb32Scz4e    })
1098c590fb32Scz4e
1099c590fb32Scz4e    // load to load fast forward: load(i) prefers data(i)
1100c590fb32Scz4e    val l2l_fwd_out = loadUnits.map(_.io.l2l_fwd_out) ++ hybridUnits.map(_.io.ldu_io.l2l_fwd_out)
1101c590fb32Scz4e    val fastPriority = (LduCnt + i until LduCnt + HyuCnt) ++ (0 until LduCnt + i)
1102c590fb32Scz4e    val fastValidVec = fastPriority.map(j => l2l_fwd_out(j).valid)
1103c590fb32Scz4e    val fastDataVec = fastPriority.map(j => l2l_fwd_out(j).data)
1104c590fb32Scz4e    val fastErrorVec = fastPriority.map(j => l2l_fwd_out(j).dly_ld_err)
1105c590fb32Scz4e    val fastMatchVec = fastPriority.map(j => io.ooo_to_mem.loadFastMatch(LduCnt + i)(j))
1106c590fb32Scz4e    hybridUnits(i).io.ldu_io.l2l_fwd_in.valid := VecInit(fastValidVec).asUInt.orR
1107c590fb32Scz4e    hybridUnits(i).io.ldu_io.l2l_fwd_in.data := ParallelPriorityMux(fastValidVec, fastDataVec)
1108c590fb32Scz4e    hybridUnits(i).io.ldu_io.l2l_fwd_in.dly_ld_err := ParallelPriorityMux(fastValidVec, fastErrorVec)
1109c590fb32Scz4e    val fastMatch = ParallelPriorityMux(fastValidVec, fastMatchVec)
1110c590fb32Scz4e    hybridUnits(i).io.ldu_io.ld_fast_match := fastMatch
1111c590fb32Scz4e    hybridUnits(i).io.ldu_io.ld_fast_imm := io.ooo_to_mem.loadFastImm(LduCnt + i)
1112c590fb32Scz4e    hybridUnits(i).io.ldu_io.ld_fast_fuOpType := io.ooo_to_mem.loadFastFuOpType(LduCnt + i)
1113c590fb32Scz4e    hybridUnits(i).io.ldu_io.replay <> lsq.io.replay(LduCnt + i)
1114c590fb32Scz4e    hybridUnits(i).io.ldu_io.l2_hint <> io.l2_hint
1115c590fb32Scz4e
1116c590fb32Scz4e    // uncache
1117c590fb32Scz4e    lsq.io.ldout.drop(LduCnt)(i) <> hybridUnits(i).io.ldu_io.lsq.uncache
1118c590fb32Scz4e    lsq.io.ld_raw_data.drop(LduCnt)(i) <> hybridUnits(i).io.ldu_io.lsq.ld_raw_data
1119c590fb32Scz4e
1120c590fb32Scz4e
1121c590fb32Scz4e    // passdown to lsq (load s2)
1122c590fb32Scz4e    hybridUnits(i).io.ldu_io.lsq.nc_ldin.valid := false.B
1123c590fb32Scz4e    hybridUnits(i).io.ldu_io.lsq.nc_ldin.bits := DontCare
1124c590fb32Scz4e    lsq.io.ldu.ldin(LduCnt + i) <> hybridUnits(i).io.ldu_io.lsq.ldin
1125c590fb32Scz4e    // Lsq to sta unit
1126c590fb32Scz4e    lsq.io.sta.storeMaskIn(StaCnt + i) <> hybridUnits(i).io.stu_io.st_mask_out
1127c590fb32Scz4e
1128c590fb32Scz4e    // Lsq to std unit's rs
1129c590fb32Scz4e    lsq.io.std.storeDataIn(StaCnt + i) := stData(StaCnt + i)
1130c590fb32Scz4e    lsq.io.std.storeDataIn(StaCnt + i).valid := stData(StaCnt + i).valid && !st_data_atomics(StaCnt + i)
1131c590fb32Scz4e    // prefetch
1132c590fb32Scz4e    hybridUnits(i).io.stu_io.prefetch_req <> sbuffer.io.store_prefetch(StaCnt + i)
1133c590fb32Scz4e
1134c590fb32Scz4e    io.mem_to_ooo.s3_delayed_load_error(LduCnt + i) := hybridUnits(i).io.ldu_io.s3_dly_ld_err
1135c590fb32Scz4e
1136c590fb32Scz4e    // ------------------------------------
1137c590fb32Scz4e    //  Store Port
1138c590fb32Scz4e    // ------------------------------------
1139c590fb32Scz4e    hybridUnits(i).io.stu_io.lsq <> lsq.io.sta.storeAddrIn.takeRight(HyuCnt)(i)
1140c590fb32Scz4e    hybridUnits(i).io.stu_io.lsq_replenish <> lsq.io.sta.storeAddrInRe.takeRight(HyuCnt)(i)
1141c590fb32Scz4e
1142c590fb32Scz4e    lsq.io.sta.storeMaskIn.takeRight(HyuCnt)(i) <> hybridUnits(i).io.stu_io.st_mask_out
1143c590fb32Scz4e    io.mem_to_ooo.stIn.takeRight(HyuCnt)(i).valid := hybridUnits(i).io.stu_io.issue.valid
1144c590fb32Scz4e    io.mem_to_ooo.stIn.takeRight(HyuCnt)(i).bits := hybridUnits(i).io.stu_io.issue.bits
1145c590fb32Scz4e
1146c590fb32Scz4e    // ------------------------------------
1147c590fb32Scz4e    //  Vector Store Port
1148c590fb32Scz4e    // ------------------------------------
1149c590fb32Scz4e    hybridUnits(i).io.vec_stu_io.isFirstIssue := true.B
1150c590fb32Scz4e
1151c590fb32Scz4e    // -------------------------
1152c590fb32Scz4e    // Store Triggers
1153c590fb32Scz4e    // -------------------------
1154c590fb32Scz4e    hybridUnits(i).io.fromCsrTrigger.tdataVec := tdata
1155c590fb32Scz4e    hybridUnits(i).io.fromCsrTrigger.tEnableVec := tEnable
1156c590fb32Scz4e    hybridUnits(i).io.fromCsrTrigger.triggerCanRaiseBpExp := triggerCanRaiseBpExp
1157c590fb32Scz4e    hybridUnits(i).io.fromCsrTrigger.debugMode := debugMode
1158c590fb32Scz4e  }
1159c590fb32Scz4e
1160c590fb32Scz4e  // misalignBuffer
1161c590fb32Scz4e  loadMisalignBuffer.io.redirect                <> redirect
1162c590fb32Scz4e  loadMisalignBuffer.io.rob.lcommit             := io.ooo_to_mem.lsqio.lcommit
1163c590fb32Scz4e  loadMisalignBuffer.io.rob.scommit             := io.ooo_to_mem.lsqio.scommit
1164c590fb32Scz4e  loadMisalignBuffer.io.rob.pendingMMIOld       := io.ooo_to_mem.lsqio.pendingMMIOld
1165c590fb32Scz4e  loadMisalignBuffer.io.rob.pendingld           := io.ooo_to_mem.lsqio.pendingld
1166c590fb32Scz4e  loadMisalignBuffer.io.rob.pendingst           := io.ooo_to_mem.lsqio.pendingst
1167c590fb32Scz4e  loadMisalignBuffer.io.rob.pendingVst          := io.ooo_to_mem.lsqio.pendingVst
1168c590fb32Scz4e  loadMisalignBuffer.io.rob.commit              := io.ooo_to_mem.lsqio.commit
1169c590fb32Scz4e  loadMisalignBuffer.io.rob.pendingPtr          := io.ooo_to_mem.lsqio.pendingPtr
1170c590fb32Scz4e  loadMisalignBuffer.io.rob.pendingPtrNext      := io.ooo_to_mem.lsqio.pendingPtrNext
1171c590fb32Scz4e
1172c590fb32Scz4e  lsq.io.loadMisalignFull                       := loadMisalignBuffer.io.loadMisalignFull
1173522c7f99SAnzo  lsq.io.misalignAllowSpec                      := misalign_allow_spec
1174c590fb32Scz4e
1175c590fb32Scz4e  storeMisalignBuffer.io.redirect               <> redirect
1176c590fb32Scz4e  storeMisalignBuffer.io.rob.lcommit            := io.ooo_to_mem.lsqio.lcommit
1177c590fb32Scz4e  storeMisalignBuffer.io.rob.scommit            := io.ooo_to_mem.lsqio.scommit
1178c590fb32Scz4e  storeMisalignBuffer.io.rob.pendingMMIOld      := io.ooo_to_mem.lsqio.pendingMMIOld
1179c590fb32Scz4e  storeMisalignBuffer.io.rob.pendingld          := io.ooo_to_mem.lsqio.pendingld
1180c590fb32Scz4e  storeMisalignBuffer.io.rob.pendingst          := io.ooo_to_mem.lsqio.pendingst
1181c590fb32Scz4e  storeMisalignBuffer.io.rob.pendingVst         := io.ooo_to_mem.lsqio.pendingVst
1182c590fb32Scz4e  storeMisalignBuffer.io.rob.commit             := io.ooo_to_mem.lsqio.commit
1183c590fb32Scz4e  storeMisalignBuffer.io.rob.pendingPtr         := io.ooo_to_mem.lsqio.pendingPtr
1184c590fb32Scz4e  storeMisalignBuffer.io.rob.pendingPtrNext     := io.ooo_to_mem.lsqio.pendingPtrNext
1185c590fb32Scz4e
1186c590fb32Scz4e  lsq.io.maControl                              <> storeMisalignBuffer.io.sqControl
1187c590fb32Scz4e
1188c590fb32Scz4e  lsq.io.cmoOpReq <> dcache.io.cmoOpReq
1189c590fb32Scz4e  lsq.io.cmoOpResp <> dcache.io.cmoOpResp
1190c590fb32Scz4e
1191c590fb32Scz4e  // Prefetcher
1192c590fb32Scz4e  val StreamDTLBPortIndex = TlbStartVec(dtlb_ld_idx) + LduCnt + HyuCnt
1193c590fb32Scz4e  val PrefetcherDTLBPortIndex = TlbStartVec(dtlb_pf_idx)
1194c590fb32Scz4e  val L2toL1DLBPortIndex = TlbStartVec(dtlb_pf_idx) + 1
1195c590fb32Scz4e  prefetcherOpt match {
1196c590fb32Scz4e  case Some(pf) =>
1197c590fb32Scz4e    dtlb_reqs(PrefetcherDTLBPortIndex) <> pf.io.tlb_req
1198c590fb32Scz4e    pf.io.pmp_resp := pmp_check(PrefetcherDTLBPortIndex).resp
1199c590fb32Scz4e  case None =>
1200c590fb32Scz4e    dtlb_reqs(PrefetcherDTLBPortIndex) := DontCare
1201c590fb32Scz4e    dtlb_reqs(PrefetcherDTLBPortIndex).req.valid := false.B
1202c590fb32Scz4e    dtlb_reqs(PrefetcherDTLBPortIndex).resp.ready := true.B
1203c590fb32Scz4e  }
1204c590fb32Scz4e  l1PrefetcherOpt match {
1205c590fb32Scz4e    case Some(pf) =>
1206c590fb32Scz4e      dtlb_reqs(StreamDTLBPortIndex) <> pf.io.tlb_req
1207c590fb32Scz4e      pf.io.pmp_resp := pmp_check(StreamDTLBPortIndex).resp
1208c590fb32Scz4e    case None =>
1209c590fb32Scz4e        dtlb_reqs(StreamDTLBPortIndex) := DontCare
1210c590fb32Scz4e        dtlb_reqs(StreamDTLBPortIndex).req.valid := false.B
1211c590fb32Scz4e        dtlb_reqs(StreamDTLBPortIndex).resp.ready := true.B
1212c590fb32Scz4e  }
1213c590fb32Scz4e  dtlb_reqs(L2toL1DLBPortIndex) <> io.l2_tlb_req
1214c590fb32Scz4e  dtlb_reqs(L2toL1DLBPortIndex).resp.ready := true.B
1215c590fb32Scz4e  io.l2_pmp_resp := pmp_check(L2toL1DLBPortIndex).resp
1216c590fb32Scz4e
1217c590fb32Scz4e  // StoreUnit
1218c590fb32Scz4e  for (i <- 0 until StdCnt) {
1219c590fb32Scz4e    stdExeUnits(i).io.flush <> redirect
1220c590fb32Scz4e    stdExeUnits(i).io.in.valid := io.ooo_to_mem.issueStd(i).valid
1221c590fb32Scz4e    io.ooo_to_mem.issueStd(i).ready := stdExeUnits(i).io.in.ready
1222c590fb32Scz4e    stdExeUnits(i).io.in.bits := io.ooo_to_mem.issueStd(i).bits
1223c590fb32Scz4e  }
1224c590fb32Scz4e
1225c590fb32Scz4e  for (i <- 0 until StaCnt) {
1226c590fb32Scz4e    val stu = storeUnits(i)
1227c590fb32Scz4e
1228c590fb32Scz4e    stu.io.redirect      <> redirect
1229c590fb32Scz4e    stu.io.csrCtrl       <> csrCtrl
1230c590fb32Scz4e    stu.io.dcache        <> dcache.io.lsu.sta(i)
1231c590fb32Scz4e    stu.io.feedback_slow <> io.mem_to_ooo.staIqFeedback(i).feedbackSlow
1232c590fb32Scz4e    stu.io.stin         <> io.ooo_to_mem.issueSta(i)
1233c590fb32Scz4e    stu.io.lsq          <> lsq.io.sta.storeAddrIn(i)
1234c590fb32Scz4e    stu.io.lsq_replenish <> lsq.io.sta.storeAddrInRe(i)
1235c590fb32Scz4e    // dtlb
1236c590fb32Scz4e    stu.io.tlb          <> dtlb_st.head.requestor(i)
1237c590fb32Scz4e    stu.io.pmp          <> pmp_check(LduCnt + HyuCnt + 1 + i).resp
1238c590fb32Scz4e
1239c590fb32Scz4e    // -------------------------
1240c590fb32Scz4e    // Store Triggers
1241c590fb32Scz4e    // -------------------------
1242c590fb32Scz4e    stu.io.fromCsrTrigger.tdataVec := tdata
1243c590fb32Scz4e    stu.io.fromCsrTrigger.tEnableVec := tEnable
1244c590fb32Scz4e    stu.io.fromCsrTrigger.triggerCanRaiseBpExp := triggerCanRaiseBpExp
1245c590fb32Scz4e    stu.io.fromCsrTrigger.debugMode := debugMode
1246c590fb32Scz4e
1247c590fb32Scz4e    // prefetch
1248c590fb32Scz4e    stu.io.prefetch_req <> sbuffer.io.store_prefetch(i)
1249c590fb32Scz4e
1250c590fb32Scz4e    // store unit does not need fast feedback
1251c590fb32Scz4e    io.mem_to_ooo.staIqFeedback(i).feedbackFast := DontCare
1252c590fb32Scz4e
1253c590fb32Scz4e    // Lsq to sta unit
1254c590fb32Scz4e    lsq.io.sta.storeMaskIn(i) <> stu.io.st_mask_out
1255c590fb32Scz4e
1256c590fb32Scz4e    // connect misalignBuffer
1257c590fb32Scz4e    storeMisalignBuffer.io.req(i) <> stu.io.misalign_buf
1258c590fb32Scz4e
1259c590fb32Scz4e    if (i == 0) {
1260c590fb32Scz4e      stu.io.misalign_stin  <> storeMisalignBuffer.io.splitStoreReq
1261c590fb32Scz4e      stu.io.misalign_stout <> storeMisalignBuffer.io.splitStoreResp
1262c590fb32Scz4e    } else {
1263c590fb32Scz4e      stu.io.misalign_stin.valid := false.B
1264c590fb32Scz4e      stu.io.misalign_stin.bits := DontCare
1265c590fb32Scz4e    }
1266c590fb32Scz4e
1267c590fb32Scz4e    // Lsq to std unit's rs
1268c590fb32Scz4e    if (i < VstuCnt){
1269c590fb32Scz4e      when (vsSplit(i).io.vstd.get.valid) {
1270c590fb32Scz4e        lsq.io.std.storeDataIn(i).valid := true.B
1271c590fb32Scz4e        lsq.io.std.storeDataIn(i).bits := vsSplit(i).io.vstd.get.bits
1272c590fb32Scz4e        stData(i).ready := false.B
1273c590fb32Scz4e      }.otherwise {
1274c590fb32Scz4e        lsq.io.std.storeDataIn(i).valid := stData(i).valid && !st_data_atomics(i)
1275c590fb32Scz4e        lsq.io.std.storeDataIn(i).bits.uop := stData(i).bits.uop
1276c590fb32Scz4e        lsq.io.std.storeDataIn(i).bits.data := stData(i).bits.data
1277c590fb32Scz4e        lsq.io.std.storeDataIn(i).bits.mask.map(_ := 0.U)
1278c590fb32Scz4e        lsq.io.std.storeDataIn(i).bits.vdIdx.map(_ := 0.U)
1279c590fb32Scz4e        lsq.io.std.storeDataIn(i).bits.vdIdxInField.map(_ := 0.U)
1280c590fb32Scz4e        stData(i).ready := true.B
1281c590fb32Scz4e      }
1282c590fb32Scz4e    } else {
1283c590fb32Scz4e        lsq.io.std.storeDataIn(i).valid := stData(i).valid && !st_data_atomics(i)
1284c590fb32Scz4e        lsq.io.std.storeDataIn(i).bits.uop := stData(i).bits.uop
1285c590fb32Scz4e        lsq.io.std.storeDataIn(i).bits.data := stData(i).bits.data
1286c590fb32Scz4e        lsq.io.std.storeDataIn(i).bits.mask.map(_ := 0.U)
1287c590fb32Scz4e        lsq.io.std.storeDataIn(i).bits.vdIdx.map(_ := 0.U)
1288c590fb32Scz4e        lsq.io.std.storeDataIn(i).bits.vdIdxInField.map(_ := 0.U)
1289c590fb32Scz4e        stData(i).ready := true.B
1290c590fb32Scz4e    }
1291c590fb32Scz4e    lsq.io.std.storeDataIn.map(_.bits.debug := 0.U.asTypeOf(new DebugBundle))
1292c590fb32Scz4e    lsq.io.std.storeDataIn.foreach(_.bits.isFromLoadUnit := DontCare)
1293c590fb32Scz4e
1294c590fb32Scz4e
1295c590fb32Scz4e    // store prefetch train
1296c590fb32Scz4e    l1PrefetcherOpt.foreach(pf => {
1297c590fb32Scz4e      // stream will train on all load sources
1298c590fb32Scz4e      pf.io.st_in(i).valid := false.B
1299c590fb32Scz4e      pf.io.st_in(i).bits := DontCare
1300c590fb32Scz4e    })
1301c590fb32Scz4e
1302c590fb32Scz4e    prefetcherOpt.foreach(pf => {
1303c590fb32Scz4e      pf.io.st_in(i).valid := Mux(pf_train_on_hit,
1304c590fb32Scz4e        stu.io.prefetch_train.valid,
1305c590fb32Scz4e        stu.io.prefetch_train.valid && stu.io.prefetch_train.bits.isFirstIssue && (
1306c590fb32Scz4e          stu.io.prefetch_train.bits.miss
1307c590fb32Scz4e          )
1308c590fb32Scz4e      )
1309c590fb32Scz4e      pf.io.st_in(i).bits := stu.io.prefetch_train.bits
1310c590fb32Scz4e      pf.io.st_in(i).bits.uop.pc := RegEnable(RegEnable(io.ooo_to_mem.storePc(i), stu.io.s1_prefetch_spec), stu.io.s2_prefetch_spec)
1311c590fb32Scz4e    })
1312c590fb32Scz4e
1313c590fb32Scz4e    // 1. sync issue info to store set LFST
1314c590fb32Scz4e    // 2. when store issue, broadcast issued sqPtr to wake up the following insts
1315c590fb32Scz4e    // io.stIn(i).valid := io.issue(exuParameters.LduCnt + i).valid
1316c590fb32Scz4e    // io.stIn(i).bits := io.issue(exuParameters.LduCnt + i).bits
1317c590fb32Scz4e    io.mem_to_ooo.stIn(i).valid := stu.io.issue.valid
1318c590fb32Scz4e    io.mem_to_ooo.stIn(i).bits := stu.io.issue.bits
1319c590fb32Scz4e
1320c590fb32Scz4e    stu.io.stout.ready := true.B
1321c590fb32Scz4e
1322c590fb32Scz4e    // vector
1323c590fb32Scz4e    if (i < VstuCnt) {
1324c590fb32Scz4e      stu.io.vecstin <> vsSplit(i).io.out
1325c590fb32Scz4e      // vsFlowQueue.io.pipeFeedback(i) <> stu.io.vec_feedback_slow // need connect
1326c590fb32Scz4e    } else {
1327c590fb32Scz4e      stu.io.vecstin.valid := false.B
1328c590fb32Scz4e      stu.io.vecstin.bits := DontCare
1329c590fb32Scz4e      stu.io.vecstout.ready := false.B
1330c590fb32Scz4e    }
1331c590fb32Scz4e    stu.io.vec_isFirstIssue := true.B // TODO
1332c590fb32Scz4e  }
1333c590fb32Scz4e
13343c808de0SAnzo  val sqOtherStout = WireInit(0.U.asTypeOf(DecoupledIO(new MemExuOutput)))
13353c808de0SAnzo  sqOtherStout.valid := lsq.io.mmioStout.valid || lsq.io.cboZeroStout.valid
13363c808de0SAnzo  sqOtherStout.bits  := Mux(lsq.io.cboZeroStout.valid, lsq.io.cboZeroStout.bits, lsq.io.mmioStout.bits)
13373c808de0SAnzo  assert(!(lsq.io.mmioStout.valid && lsq.io.cboZeroStout.valid), "Cannot writeback to mmio and cboZero at the same time.")
13383c808de0SAnzo
13393c808de0SAnzo  // Store writeback by StoreQueue:
13403c808de0SAnzo  //   1. cbo Zero
13413c808de0SAnzo  //   2. mmio
13423c808de0SAnzo  // Currently, the two should not be present at the same time, so simply make cbo zero a higher priority.
13433c808de0SAnzo  val otherStout = WireInit(0.U.asTypeOf(lsq.io.mmioStout))
1344c590fb32Scz4e  NewPipelineConnect(
13453c808de0SAnzo    sqOtherStout, otherStout, otherStout.fire,
1346c590fb32Scz4e    false.B,
13473c808de0SAnzo    Option("otherStoutConnect")
1348c590fb32Scz4e  )
13493c808de0SAnzo  otherStout.ready := false.B
13503c808de0SAnzo  when (otherStout.valid && !storeUnits(0).io.stout.valid) {
1351c590fb32Scz4e    stOut(0).valid := true.B
13523c808de0SAnzo    stOut(0).bits  := otherStout.bits
13533c808de0SAnzo    otherStout.ready := true.B
1354c590fb32Scz4e  }
13553c808de0SAnzo  lsq.io.mmioStout.ready := sqOtherStout.ready
13563c808de0SAnzo  lsq.io.cboZeroStout.ready := sqOtherStout.ready
1357c590fb32Scz4e
1358c590fb32Scz4e  // vec mmio writeback
1359c590fb32Scz4e  lsq.io.vecmmioStout.ready := false.B
1360c590fb32Scz4e
1361c590fb32Scz4e  // miss align buffer will overwrite stOut(0)
13623c808de0SAnzo  val storeMisalignCanWriteBack = !otherStout.valid && !storeUnits(0).io.stout.valid && !storeUnits(0).io.vecstout.valid
1363c590fb32Scz4e  storeMisalignBuffer.io.writeBack.ready := storeMisalignCanWriteBack
1364c590fb32Scz4e  storeMisalignBuffer.io.storeOutValid := storeUnits(0).io.stout.valid
1365c590fb32Scz4e  storeMisalignBuffer.io.storeVecOutValid := storeUnits(0).io.vecstout.valid
1366c590fb32Scz4e  when (storeMisalignBuffer.io.writeBack.valid && storeMisalignCanWriteBack) {
1367c590fb32Scz4e    stOut(0).valid := true.B
1368c590fb32Scz4e    stOut(0).bits  := storeMisalignBuffer.io.writeBack.bits
1369c590fb32Scz4e  }
1370c590fb32Scz4e
1371c590fb32Scz4e  // Uncache
1372c590fb32Scz4e  uncache.io.enableOutstanding := io.ooo_to_mem.csrCtrl.uncache_write_outstanding_enable
1373c590fb32Scz4e  uncache.io.hartId := io.hartId
1374c590fb32Scz4e  lsq.io.uncacheOutstanding := io.ooo_to_mem.csrCtrl.uncache_write_outstanding_enable
1375c590fb32Scz4e
1376c590fb32Scz4e  // Lsq
1377c590fb32Scz4e  io.mem_to_ooo.lsqio.mmio       := lsq.io.rob.mmio
1378c590fb32Scz4e  io.mem_to_ooo.lsqio.uop        := lsq.io.rob.uop
1379c590fb32Scz4e  lsq.io.rob.lcommit             := io.ooo_to_mem.lsqio.lcommit
1380c590fb32Scz4e  lsq.io.rob.scommit             := io.ooo_to_mem.lsqio.scommit
1381c590fb32Scz4e  lsq.io.rob.pendingMMIOld       := io.ooo_to_mem.lsqio.pendingMMIOld
1382c590fb32Scz4e  lsq.io.rob.pendingld           := io.ooo_to_mem.lsqio.pendingld
1383c590fb32Scz4e  lsq.io.rob.pendingst           := io.ooo_to_mem.lsqio.pendingst
1384c590fb32Scz4e  lsq.io.rob.pendingVst          := io.ooo_to_mem.lsqio.pendingVst
1385c590fb32Scz4e  lsq.io.rob.commit              := io.ooo_to_mem.lsqio.commit
1386c590fb32Scz4e  lsq.io.rob.pendingPtr          := io.ooo_to_mem.lsqio.pendingPtr
1387c590fb32Scz4e  lsq.io.rob.pendingPtrNext      := io.ooo_to_mem.lsqio.pendingPtrNext
1388c590fb32Scz4e
1389c590fb32Scz4e  //  lsq.io.rob            <> io.lsqio.rob
1390c590fb32Scz4e  lsq.io.enq            <> io.ooo_to_mem.enqLsq
1391c590fb32Scz4e  lsq.io.brqRedirect    <> redirect
1392c590fb32Scz4e
1393c590fb32Scz4e  //  violation rollback
1394c590fb32Scz4e  def selectOldestRedirect(xs: Seq[Valid[Redirect]]): Vec[Bool] = {
1395c590fb32Scz4e    val compareVec = (0 until xs.length).map(i => (0 until i).map(j => isAfter(xs(j).bits.robIdx, xs(i).bits.robIdx)))
1396c590fb32Scz4e    val resultOnehot = VecInit((0 until xs.length).map(i => Cat((0 until xs.length).map(j =>
1397c590fb32Scz4e      (if (j < i) !xs(j).valid || compareVec(i)(j)
1398c590fb32Scz4e      else if (j == i) xs(i).valid
1399c590fb32Scz4e      else !xs(j).valid || !compareVec(j)(i))
1400c590fb32Scz4e    )).andR))
1401c590fb32Scz4e    resultOnehot
1402c590fb32Scz4e  }
1403c590fb32Scz4e  val allRedirect = loadUnits.map(_.io.rollback) ++ hybridUnits.map(_.io.ldu_io.rollback) ++ lsq.io.nack_rollback ++ lsq.io.nuke_rollback
1404c590fb32Scz4e  val oldestOneHot = selectOldestRedirect(allRedirect)
1405c590fb32Scz4e  val oldestRedirect = WireDefault(Mux1H(oldestOneHot, allRedirect))
1406c590fb32Scz4e  // memory replay would not cause IAF/IPF/IGPF
1407c590fb32Scz4e  oldestRedirect.bits.cfiUpdate.backendIAF := false.B
1408c590fb32Scz4e  oldestRedirect.bits.cfiUpdate.backendIPF := false.B
1409c590fb32Scz4e  oldestRedirect.bits.cfiUpdate.backendIGPF := false.B
1410c590fb32Scz4e  io.mem_to_ooo.memoryViolation := oldestRedirect
1411c590fb32Scz4e  io.mem_to_ooo.lsqio.lqCanAccept  := lsq.io.lqCanAccept
1412c590fb32Scz4e  io.mem_to_ooo.lsqio.sqCanAccept  := lsq.io.sqCanAccept
1413c590fb32Scz4e
1414c590fb32Scz4e  // lsq.io.uncache        <> uncache.io.lsq
1415c590fb32Scz4e  val s_idle :: s_scalar_uncache :: s_vector_uncache :: Nil = Enum(3)
1416c590fb32Scz4e  val uncacheState = RegInit(s_idle)
1417c590fb32Scz4e  val uncacheReq = Wire(Decoupled(new UncacheWordReq))
1418c590fb32Scz4e  val uncacheIdResp = uncache.io.lsq.idResp
1419c590fb32Scz4e  val uncacheResp = Wire(Decoupled(new UncacheWordResp))
1420c590fb32Scz4e
1421c590fb32Scz4e  uncacheReq.bits := DontCare
1422c590fb32Scz4e  uncacheReq.valid := false.B
1423c590fb32Scz4e  uncacheReq.ready := false.B
1424c590fb32Scz4e  uncacheResp.bits := DontCare
1425c590fb32Scz4e  uncacheResp.valid := false.B
1426c590fb32Scz4e  uncacheResp.ready := false.B
1427c590fb32Scz4e  lsq.io.uncache.req.ready := false.B
1428c590fb32Scz4e  lsq.io.uncache.idResp.valid := false.B
1429c590fb32Scz4e  lsq.io.uncache.idResp.bits := DontCare
1430c590fb32Scz4e  lsq.io.uncache.resp.valid := false.B
1431c590fb32Scz4e  lsq.io.uncache.resp.bits := DontCare
1432c590fb32Scz4e
1433c590fb32Scz4e  switch (uncacheState) {
1434c590fb32Scz4e    is (s_idle) {
1435c590fb32Scz4e      when (uncacheReq.fire) {
1436c590fb32Scz4e        when (lsq.io.uncache.req.valid) {
1437c590fb32Scz4e          when (!lsq.io.uncache.req.bits.nc || !io.ooo_to_mem.csrCtrl.uncache_write_outstanding_enable) {
1438c590fb32Scz4e            uncacheState := s_scalar_uncache
1439c590fb32Scz4e          }
1440c590fb32Scz4e        }.otherwise {
1441c590fb32Scz4e          // val isStore = vsFlowQueue.io.uncache.req.bits.cmd === MemoryOpConstants.M_XWR
1442c590fb32Scz4e          when (!io.ooo_to_mem.csrCtrl.uncache_write_outstanding_enable) {
1443c590fb32Scz4e            uncacheState := s_vector_uncache
1444c590fb32Scz4e          }
1445c590fb32Scz4e        }
1446c590fb32Scz4e      }
1447c590fb32Scz4e    }
1448c590fb32Scz4e
1449c590fb32Scz4e    is (s_scalar_uncache) {
1450c590fb32Scz4e      when (uncacheResp.fire) {
1451c590fb32Scz4e        uncacheState := s_idle
1452c590fb32Scz4e      }
1453c590fb32Scz4e    }
1454c590fb32Scz4e
1455c590fb32Scz4e    is (s_vector_uncache) {
1456c590fb32Scz4e      when (uncacheResp.fire) {
1457c590fb32Scz4e        uncacheState := s_idle
1458c590fb32Scz4e      }
1459c590fb32Scz4e    }
1460c590fb32Scz4e  }
1461c590fb32Scz4e
1462c590fb32Scz4e  when (lsq.io.uncache.req.valid) {
1463c590fb32Scz4e    uncacheReq <> lsq.io.uncache.req
1464c590fb32Scz4e  }
1465c590fb32Scz4e  when (io.ooo_to_mem.csrCtrl.uncache_write_outstanding_enable) {
1466c590fb32Scz4e    lsq.io.uncache.resp <> uncacheResp
1467c590fb32Scz4e    lsq.io.uncache.idResp <> uncacheIdResp
1468c590fb32Scz4e  }.otherwise {
1469c590fb32Scz4e    when (uncacheState === s_scalar_uncache) {
1470c590fb32Scz4e      lsq.io.uncache.resp <> uncacheResp
1471c590fb32Scz4e      lsq.io.uncache.idResp <> uncacheIdResp
1472c590fb32Scz4e    }
1473c590fb32Scz4e  }
1474c590fb32Scz4e  // delay dcache refill for 1 cycle for better timing
1475c590fb32Scz4e  AddPipelineReg(uncacheReq, uncache.io.lsq.req, false.B)
1476c590fb32Scz4e  AddPipelineReg(uncache.io.lsq.resp, uncacheResp, false.B)
1477c590fb32Scz4e
1478c590fb32Scz4e  //lsq.io.refill         := delayedDcacheRefill
1479c590fb32Scz4e  lsq.io.release        := dcache.io.lsu.release
1480c590fb32Scz4e  lsq.io.lqCancelCnt <> io.mem_to_ooo.lqCancelCnt
1481c590fb32Scz4e  lsq.io.sqCancelCnt <> io.mem_to_ooo.sqCancelCnt
1482c590fb32Scz4e  lsq.io.lqDeq <> io.mem_to_ooo.lqDeq
1483c590fb32Scz4e  lsq.io.sqDeq <> io.mem_to_ooo.sqDeq
1484c590fb32Scz4e  // Todo: assign these
1485c590fb32Scz4e  io.mem_to_ooo.sqDeqPtr := lsq.io.sqDeqPtr
1486c590fb32Scz4e  io.mem_to_ooo.lqDeqPtr := lsq.io.lqDeqPtr
1487c590fb32Scz4e  lsq.io.tl_d_channel <> dcache.io.lsu.tl_d_channel
1488c590fb32Scz4e
1489c590fb32Scz4e  // LSQ to store buffer
1490c590fb32Scz4e  lsq.io.sbuffer        <> sbuffer.io.in
1491c590fb32Scz4e  sbuffer.io.in(0).valid := lsq.io.sbuffer(0).valid || vSegmentUnit.io.sbuffer.valid
1492c590fb32Scz4e  sbuffer.io.in(0).bits  := Mux1H(Seq(
1493c590fb32Scz4e    vSegmentUnit.io.sbuffer.valid -> vSegmentUnit.io.sbuffer.bits,
1494c590fb32Scz4e    lsq.io.sbuffer(0).valid       -> lsq.io.sbuffer(0).bits
1495c590fb32Scz4e  ))
1496c590fb32Scz4e  vSegmentUnit.io.sbuffer.ready := sbuffer.io.in(0).ready
1497c590fb32Scz4e  lsq.io.sqEmpty        <> sbuffer.io.sqempty
1498c590fb32Scz4e  dcache.io.force_write := lsq.io.force_write
1499c590fb32Scz4e
1500c590fb32Scz4e  // Initialize when unenabled difftest.
1501c590fb32Scz4e  sbuffer.io.vecDifftestInfo      := DontCare
1502c590fb32Scz4e  lsq.io.sbufferVecDifftestInfo   := DontCare
1503c590fb32Scz4e  vSegmentUnit.io.vecDifftestInfo := DontCare
1504c590fb32Scz4e  if (env.EnableDifftest) {
1505c590fb32Scz4e    sbuffer.io.vecDifftestInfo .zipWithIndex.map{ case (sbufferPort, index) =>
1506c590fb32Scz4e      if (index == 0) {
1507c590fb32Scz4e        val vSegmentDifftestValid = vSegmentUnit.io.vecDifftestInfo.valid
1508c590fb32Scz4e        sbufferPort.valid := Mux(vSegmentDifftestValid, vSegmentUnit.io.vecDifftestInfo.valid, lsq.io.sbufferVecDifftestInfo(0).valid)
1509c590fb32Scz4e        sbufferPort.bits  := Mux(vSegmentDifftestValid, vSegmentUnit.io.vecDifftestInfo.bits, lsq.io.sbufferVecDifftestInfo(0).bits)
1510c590fb32Scz4e
1511c590fb32Scz4e        vSegmentUnit.io.vecDifftestInfo.ready  := sbufferPort.ready
1512c590fb32Scz4e        lsq.io.sbufferVecDifftestInfo(0).ready := sbufferPort.ready
1513c590fb32Scz4e      } else {
1514c590fb32Scz4e         sbufferPort <> lsq.io.sbufferVecDifftestInfo(index)
1515c590fb32Scz4e      }
1516c590fb32Scz4e    }
1517c590fb32Scz4e  }
1518c590fb32Scz4e
1519c590fb32Scz4e  // lsq.io.vecStoreRetire <> vsFlowQueue.io.sqRelease
1520c590fb32Scz4e  // lsq.io.vecWriteback.valid := vlWrapper.io.uopWriteback.fire &&
1521c590fb32Scz4e  //   vlWrapper.io.uopWriteback.bits.uop.vpu.lastUop
1522c590fb32Scz4e  // lsq.io.vecWriteback.bits := vlWrapper.io.uopWriteback.bits
1523c590fb32Scz4e
1524c590fb32Scz4e  // vector
1525c590fb32Scz4e  val vLoadCanAccept  = (0 until VlduCnt).map(i =>
1526c590fb32Scz4e    vlSplit(i).io.in.ready && VlduType.isVecLd(io.ooo_to_mem.issueVldu(i).bits.uop.fuOpType)
1527c590fb32Scz4e  )
1528c590fb32Scz4e  val vStoreCanAccept = (0 until VstuCnt).map(i =>
1529c590fb32Scz4e    vsSplit(i).io.in.ready && VstuType.isVecSt(io.ooo_to_mem.issueVldu(i).bits.uop.fuOpType)
1530c590fb32Scz4e  )
1531c590fb32Scz4e  val isSegment     = io.ooo_to_mem.issueVldu.head.valid && isVsegls(io.ooo_to_mem.issueVldu.head.bits.uop.fuType)
1532c590fb32Scz4e  val isFixVlUop    = io.ooo_to_mem.issueVldu.map{x =>
1533c590fb32Scz4e    x.bits.uop.vpu.isVleff && x.bits.uop.vpu.lastUop && x.valid
1534c590fb32Scz4e  }
1535c590fb32Scz4e
1536c590fb32Scz4e  // init port
1537c590fb32Scz4e  /**
1538c590fb32Scz4e   * TODO: splited vsMergebuffer maybe remove, if one RS can accept two feedback, or don't need RS replay uop
1539c590fb32Scz4e   * for now:
1540c590fb32Scz4e   *  RS0 -> VsSplit0 -> stu0 -> vsMergebuffer0 -> feedback -> RS0
1541c590fb32Scz4e   *  RS1 -> VsSplit1 -> stu1 -> vsMergebuffer1 -> feedback -> RS1
1542c590fb32Scz4e   *
1543c590fb32Scz4e   * vector load don't need feedback
1544c590fb32Scz4e   *
1545c590fb32Scz4e   *  RS0 -> VlSplit0  -> ldu0 -> |
1546c590fb32Scz4e   *  RS1 -> VlSplit1  -> ldu1 -> |  -> vlMergebuffer
1547c590fb32Scz4e   *        replayIO   -> ldu3 -> |
1548c590fb32Scz4e   * */
1549c590fb32Scz4e  (0 until VstuCnt).foreach{i =>
1550c590fb32Scz4e    vsMergeBuffer(i).io.fromPipeline := DontCare
1551c590fb32Scz4e    vsMergeBuffer(i).io.fromSplit := DontCare
1552c590fb32Scz4e
1553c590fb32Scz4e    vsMergeBuffer(i).io.fromMisalignBuffer.get.flush := storeMisalignBuffer.io.toVecStoreMergeBuffer(i).flush
1554c590fb32Scz4e    vsMergeBuffer(i).io.fromMisalignBuffer.get.mbIndex := storeMisalignBuffer.io.toVecStoreMergeBuffer(i).mbIndex
1555c590fb32Scz4e  }
1556c590fb32Scz4e
1557c590fb32Scz4e  (0 until VstuCnt).foreach{i =>
1558c590fb32Scz4e    vsSplit(i).io.redirect <> redirect
1559c590fb32Scz4e    vsSplit(i).io.in <> io.ooo_to_mem.issueVldu(i)
1560c590fb32Scz4e    vsSplit(i).io.in.valid := io.ooo_to_mem.issueVldu(i).valid &&
1561c590fb32Scz4e                              vStoreCanAccept(i) && !isSegment
1562c590fb32Scz4e    vsSplit(i).io.toMergeBuffer <> vsMergeBuffer(i).io.fromSplit.head
1563c590fb32Scz4e    NewPipelineConnect(
1564c590fb32Scz4e      vsSplit(i).io.out, storeUnits(i).io.vecstin, storeUnits(i).io.vecstin.fire,
1565c590fb32Scz4e      Mux(vsSplit(i).io.out.fire, vsSplit(i).io.out.bits.uop.robIdx.needFlush(io.redirect), storeUnits(i).io.vecstin.bits.uop.robIdx.needFlush(io.redirect)),
1566c590fb32Scz4e      Option("VsSplitConnectStu")
1567c590fb32Scz4e    )
1568c590fb32Scz4e    vsSplit(i).io.vstd.get := DontCare // Todo: Discuss how to pass vector store data
1569c590fb32Scz4e
1570c590fb32Scz4e    vsSplit(i).io.vstdMisalign.get.storeMisalignBufferEmpty := !storeMisalignBuffer.io.full
1571c590fb32Scz4e    vsSplit(i).io.vstdMisalign.get.storePipeEmpty := !storeUnits(i).io.s0_s1_valid
1572c590fb32Scz4e
1573c590fb32Scz4e  }
1574c590fb32Scz4e  (0 until VlduCnt).foreach{i =>
1575c590fb32Scz4e    vlSplit(i).io.redirect <> redirect
1576c590fb32Scz4e    vlSplit(i).io.in <> io.ooo_to_mem.issueVldu(i)
1577c590fb32Scz4e    vlSplit(i).io.in.valid := io.ooo_to_mem.issueVldu(i).valid &&
1578c590fb32Scz4e                              vLoadCanAccept(i) && !isSegment && !isFixVlUop(i)
1579c590fb32Scz4e    vlSplit(i).io.toMergeBuffer <> vlMergeBuffer.io.fromSplit(i)
1580c590fb32Scz4e    vlSplit(i).io.threshold.get.valid := vlMergeBuffer.io.toSplit.get.threshold
1581c590fb32Scz4e    vlSplit(i).io.threshold.get.bits  := lsq.io.lqDeqPtr
1582c590fb32Scz4e    NewPipelineConnect(
1583c590fb32Scz4e      vlSplit(i).io.out, loadUnits(i).io.vecldin, loadUnits(i).io.vecldin.fire,
1584c590fb32Scz4e      Mux(vlSplit(i).io.out.fire, vlSplit(i).io.out.bits.uop.robIdx.needFlush(io.redirect), loadUnits(i).io.vecldin.bits.uop.robIdx.needFlush(io.redirect)),
1585c590fb32Scz4e      Option("VlSplitConnectLdu")
1586c590fb32Scz4e    )
1587c590fb32Scz4e
1588c590fb32Scz4e    //Subsequent instrction will be blocked
1589c590fb32Scz4e    vfofBuffer.io.in(i).valid := io.ooo_to_mem.issueVldu(i).valid
1590c590fb32Scz4e    vfofBuffer.io.in(i).bits  := io.ooo_to_mem.issueVldu(i).bits
1591c590fb32Scz4e  }
1592c590fb32Scz4e  (0 until LduCnt).foreach{i=>
1593c590fb32Scz4e    loadUnits(i).io.vecldout.ready         := vlMergeBuffer.io.fromPipeline(i).ready
1594c590fb32Scz4e    loadMisalignBuffer.io.vecWriteBack.ready := true.B
1595c590fb32Scz4e
1596c590fb32Scz4e    if (i == MisalignWBPort) {
1597c590fb32Scz4e      when(loadUnits(i).io.vecldout.valid) {
1598c590fb32Scz4e        vlMergeBuffer.io.fromPipeline(i).valid := loadUnits(i).io.vecldout.valid
1599c590fb32Scz4e        vlMergeBuffer.io.fromPipeline(i).bits  := loadUnits(i).io.vecldout.bits
1600c590fb32Scz4e      } .otherwise {
1601c590fb32Scz4e        vlMergeBuffer.io.fromPipeline(i).valid   := loadMisalignBuffer.io.vecWriteBack.valid
1602c590fb32Scz4e        vlMergeBuffer.io.fromPipeline(i).bits    := loadMisalignBuffer.io.vecWriteBack.bits
1603c590fb32Scz4e      }
1604c590fb32Scz4e    } else {
1605c590fb32Scz4e      vlMergeBuffer.io.fromPipeline(i).valid := loadUnits(i).io.vecldout.valid
1606c590fb32Scz4e      vlMergeBuffer.io.fromPipeline(i).bits  := loadUnits(i).io.vecldout.bits
1607c590fb32Scz4e    }
1608c590fb32Scz4e  }
1609c590fb32Scz4e
1610c590fb32Scz4e  (0 until StaCnt).foreach{i=>
1611c590fb32Scz4e    if(i < VstuCnt){
1612c590fb32Scz4e      storeUnits(i).io.vecstout.ready := true.B
1613c590fb32Scz4e      storeMisalignBuffer.io.vecWriteBack(i).ready := vsMergeBuffer(i).io.fromPipeline.head.ready
1614c590fb32Scz4e
1615c590fb32Scz4e      when(storeUnits(i).io.vecstout.valid) {
1616c590fb32Scz4e        vsMergeBuffer(i).io.fromPipeline.head.valid := storeUnits(i).io.vecstout.valid
1617c590fb32Scz4e        vsMergeBuffer(i).io.fromPipeline.head.bits  := storeUnits(i).io.vecstout.bits
1618c590fb32Scz4e      } .otherwise {
1619c590fb32Scz4e        vsMergeBuffer(i).io.fromPipeline.head.valid   := storeMisalignBuffer.io.vecWriteBack(i).valid
1620c590fb32Scz4e        vsMergeBuffer(i).io.fromPipeline.head.bits    := storeMisalignBuffer.io.vecWriteBack(i).bits
1621c590fb32Scz4e      }
1622c590fb32Scz4e    }
1623c590fb32Scz4e  }
1624c590fb32Scz4e
1625c590fb32Scz4e  (0 until VlduCnt).foreach{i=>
1626c590fb32Scz4e    io.ooo_to_mem.issueVldu(i).ready := vLoadCanAccept(i) || vStoreCanAccept(i)
1627c590fb32Scz4e  }
1628c590fb32Scz4e
1629c590fb32Scz4e  vlMergeBuffer.io.redirect <> redirect
1630c590fb32Scz4e  vsMergeBuffer.map(_.io.redirect <> redirect)
1631c590fb32Scz4e  (0 until VlduCnt).foreach{i=>
1632c590fb32Scz4e    vlMergeBuffer.io.toLsq(i) <> lsq.io.ldvecFeedback(i)
1633c590fb32Scz4e  }
1634c590fb32Scz4e  (0 until VstuCnt).foreach{i=>
1635c590fb32Scz4e    vsMergeBuffer(i).io.toLsq.head <> lsq.io.stvecFeedback(i)
1636c590fb32Scz4e  }
1637c590fb32Scz4e
1638c590fb32Scz4e  (0 until VlduCnt).foreach{i=>
1639c590fb32Scz4e    // send to RS
1640c590fb32Scz4e    vlMergeBuffer.io.feedback(i) <> io.mem_to_ooo.vlduIqFeedback(i).feedbackSlow
1641c590fb32Scz4e    io.mem_to_ooo.vlduIqFeedback(i).feedbackFast := DontCare
1642c590fb32Scz4e  }
1643c590fb32Scz4e  (0 until VstuCnt).foreach{i =>
1644c590fb32Scz4e    // send to RS
1645c590fb32Scz4e    if (i == 0){
1646c590fb32Scz4e      io.mem_to_ooo.vstuIqFeedback(i).feedbackSlow.valid := vsMergeBuffer(i).io.feedback.head.valid || vSegmentUnit.io.feedback.valid
1647c590fb32Scz4e      io.mem_to_ooo.vstuIqFeedback(i).feedbackSlow.bits := Mux1H(Seq(
1648c590fb32Scz4e        vSegmentUnit.io.feedback.valid -> vSegmentUnit.io.feedback.bits,
1649c590fb32Scz4e        vsMergeBuffer(i).io.feedback.head.valid ->  vsMergeBuffer(i).io.feedback.head.bits
1650c590fb32Scz4e      ))
1651c590fb32Scz4e      io.mem_to_ooo.vstuIqFeedback(i).feedbackFast := DontCare
1652c590fb32Scz4e    } else {
1653c590fb32Scz4e      vsMergeBuffer(i).io.feedback.head <> io.mem_to_ooo.vstuIqFeedback(i).feedbackSlow
1654c590fb32Scz4e      io.mem_to_ooo.vstuIqFeedback(i).feedbackFast := DontCare
1655c590fb32Scz4e    }
1656c590fb32Scz4e  }
1657c590fb32Scz4e
1658c590fb32Scz4e  (0 until VlduCnt).foreach{i=>
1659c590fb32Scz4e    if (i == 0){ // for segmentUnit, segmentUnit use port0 writeback
1660c590fb32Scz4e      io.mem_to_ooo.writebackVldu(i).valid := vlMergeBuffer.io.uopWriteback(i).valid || vsMergeBuffer(i).io.uopWriteback.head.valid || vSegmentUnit.io.uopwriteback.valid
1661c590fb32Scz4e      io.mem_to_ooo.writebackVldu(i).bits := PriorityMux(Seq(
1662c590fb32Scz4e        vSegmentUnit.io.uopwriteback.valid          -> vSegmentUnit.io.uopwriteback.bits,
1663c590fb32Scz4e        vlMergeBuffer.io.uopWriteback(i).valid      -> vlMergeBuffer.io.uopWriteback(i).bits,
1664c590fb32Scz4e        vsMergeBuffer(i).io.uopWriteback.head.valid -> vsMergeBuffer(i).io.uopWriteback.head.bits,
1665c590fb32Scz4e      ))
1666c590fb32Scz4e      vlMergeBuffer.io.uopWriteback(i).ready := io.mem_to_ooo.writebackVldu(i).ready && !vSegmentUnit.io.uopwriteback.valid
1667c590fb32Scz4e      vsMergeBuffer(i).io.uopWriteback.head.ready := io.mem_to_ooo.writebackVldu(i).ready && !vlMergeBuffer.io.uopWriteback(i).valid && !vSegmentUnit.io.uopwriteback.valid
1668c590fb32Scz4e      vSegmentUnit.io.uopwriteback.ready := io.mem_to_ooo.writebackVldu(i).ready
1669c590fb32Scz4e    } else if (i == 1) {
1670c590fb32Scz4e      io.mem_to_ooo.writebackVldu(i).valid := vlMergeBuffer.io.uopWriteback(i).valid || vsMergeBuffer(i).io.uopWriteback.head.valid || vfofBuffer.io.uopWriteback.valid
1671c590fb32Scz4e      io.mem_to_ooo.writebackVldu(i).bits := PriorityMux(Seq(
1672c590fb32Scz4e        vfofBuffer.io.uopWriteback.valid            -> vfofBuffer.io.uopWriteback.bits,
1673c590fb32Scz4e        vlMergeBuffer.io.uopWriteback(i).valid      -> vlMergeBuffer.io.uopWriteback(i).bits,
1674c590fb32Scz4e        vsMergeBuffer(i).io.uopWriteback.head.valid -> vsMergeBuffer(i).io.uopWriteback.head.bits,
1675c590fb32Scz4e      ))
1676c590fb32Scz4e      vlMergeBuffer.io.uopWriteback(i).ready := io.mem_to_ooo.writebackVldu(i).ready && !vfofBuffer.io.uopWriteback.valid
1677c590fb32Scz4e      vsMergeBuffer(i).io.uopWriteback.head.ready := io.mem_to_ooo.writebackVldu(i).ready && !vlMergeBuffer.io.uopWriteback(i).valid && !vfofBuffer.io.uopWriteback.valid
1678c590fb32Scz4e      vfofBuffer.io.uopWriteback.ready := io.mem_to_ooo.writebackVldu(i).ready
1679c590fb32Scz4e    } else {
1680c590fb32Scz4e      io.mem_to_ooo.writebackVldu(i).valid := vlMergeBuffer.io.uopWriteback(i).valid || vsMergeBuffer(i).io.uopWriteback.head.valid
1681c590fb32Scz4e      io.mem_to_ooo.writebackVldu(i).bits := PriorityMux(Seq(
1682c590fb32Scz4e        vlMergeBuffer.io.uopWriteback(i).valid -> vlMergeBuffer.io.uopWriteback(i).bits,
1683c590fb32Scz4e        vsMergeBuffer(i).io.uopWriteback.head.valid -> vsMergeBuffer(i).io.uopWriteback.head.bits,
1684c590fb32Scz4e      ))
1685c590fb32Scz4e      vlMergeBuffer.io.uopWriteback(i).ready := io.mem_to_ooo.writebackVldu(i).ready
1686c590fb32Scz4e      vsMergeBuffer(i).io.uopWriteback.head.ready := io.mem_to_ooo.writebackVldu(i).ready && !vlMergeBuffer.io.uopWriteback(i).valid
1687c590fb32Scz4e    }
1688c590fb32Scz4e
1689c590fb32Scz4e    vfofBuffer.io.mergeUopWriteback(i).valid := vlMergeBuffer.io.uopWriteback(i).valid
1690c590fb32Scz4e    vfofBuffer.io.mergeUopWriteback(i).bits  := vlMergeBuffer.io.uopWriteback(i).bits
1691c590fb32Scz4e  }
1692c590fb32Scz4e
1693c590fb32Scz4e
1694c590fb32Scz4e  vfofBuffer.io.redirect <> redirect
1695c590fb32Scz4e
1696c590fb32Scz4e  // Sbuffer
1697c590fb32Scz4e  sbuffer.io.csrCtrl    <> csrCtrl
1698c590fb32Scz4e  sbuffer.io.dcache     <> dcache.io.lsu.store
1699c590fb32Scz4e  sbuffer.io.memSetPattenDetected := dcache.io.memSetPattenDetected
1700c590fb32Scz4e  sbuffer.io.force_write <> lsq.io.force_write
1701c590fb32Scz4e  // flush sbuffer
1702c590fb32Scz4e  val cmoFlush = lsq.io.flushSbuffer.valid
1703c590fb32Scz4e  val fenceFlush = io.ooo_to_mem.flushSb
1704c590fb32Scz4e  val atomicsFlush = atomicsUnit.io.flush_sbuffer.valid || vSegmentUnit.io.flush_sbuffer.valid
1705c590fb32Scz4e  val stIsEmpty = sbuffer.io.flush.empty && uncache.io.flush.empty
1706c590fb32Scz4e  io.mem_to_ooo.sbIsEmpty := RegNext(stIsEmpty)
1707c590fb32Scz4e
1708c590fb32Scz4e  // if both of them tries to flush sbuffer at the same time
1709c590fb32Scz4e  // something must have gone wrong
1710c590fb32Scz4e  assert(!(fenceFlush && atomicsFlush && cmoFlush))
1711c590fb32Scz4e  sbuffer.io.flush.valid := RegNext(fenceFlush || atomicsFlush || cmoFlush)
1712c590fb32Scz4e  uncache.io.flush.valid := sbuffer.io.flush.valid
1713c590fb32Scz4e
1714c590fb32Scz4e  // AtomicsUnit: AtomicsUnit will override other control signials,
1715c590fb32Scz4e  // as atomics insts (LR/SC/AMO) will block the pipeline
1716c590fb32Scz4e  val s_normal +: s_atomics = Enum(StaCnt + HyuCnt + 1)
1717c590fb32Scz4e  val state = RegInit(s_normal)
1718c590fb32Scz4e
1719c590fb32Scz4e  val st_atomics = Seq.tabulate(StaCnt)(i =>
1720c590fb32Scz4e    io.ooo_to_mem.issueSta(i).valid && FuType.storeIsAMO((io.ooo_to_mem.issueSta(i).bits.uop.fuType))
1721c590fb32Scz4e  ) ++ Seq.tabulate(HyuCnt)(i =>
1722c590fb32Scz4e    io.ooo_to_mem.issueHya(i).valid && FuType.storeIsAMO((io.ooo_to_mem.issueHya(i).bits.uop.fuType))
1723c590fb32Scz4e  )
1724c590fb32Scz4e
1725c590fb32Scz4e  for (i <- 0 until StaCnt) when(st_atomics(i)) {
1726c590fb32Scz4e    io.ooo_to_mem.issueSta(i).ready := atomicsUnit.io.in.ready
1727c590fb32Scz4e    storeUnits(i).io.stin.valid := false.B
1728c590fb32Scz4e
1729c590fb32Scz4e    state := s_atomics(i)
1730c590fb32Scz4e  }
1731c590fb32Scz4e  for (i <- 0 until HyuCnt) when(st_atomics(StaCnt + i)) {
1732c590fb32Scz4e    io.ooo_to_mem.issueHya(i).ready := atomicsUnit.io.in.ready
1733c590fb32Scz4e    hybridUnits(i).io.lsin.valid := false.B
1734c590fb32Scz4e
1735c590fb32Scz4e    state := s_atomics(StaCnt + i)
1736c590fb32Scz4e    assert(!st_atomics.zipWithIndex.filterNot(_._2 == StaCnt + i).unzip._1.reduce(_ || _))
1737c590fb32Scz4e  }
1738c590fb32Scz4e  when (atomicsUnit.io.out.valid) {
1739c590fb32Scz4e    state := s_normal
1740c590fb32Scz4e  }
1741c590fb32Scz4e
1742c590fb32Scz4e  atomicsUnit.io.in.valid := st_atomics.reduce(_ || _)
1743c590fb32Scz4e  atomicsUnit.io.in.bits  := Mux1H(Seq.tabulate(StaCnt)(i =>
1744c590fb32Scz4e    st_atomics(i) -> io.ooo_to_mem.issueSta(i).bits) ++
1745c590fb32Scz4e    Seq.tabulate(HyuCnt)(i => st_atomics(StaCnt+i) -> io.ooo_to_mem.issueHya(i).bits))
1746c590fb32Scz4e  atomicsUnit.io.storeDataIn.zipWithIndex.foreach { case (stdin, i) =>
1747c590fb32Scz4e    stdin.valid := st_data_atomics(i)
1748c590fb32Scz4e    stdin.bits := stData(i).bits
1749c590fb32Scz4e  }
1750c590fb32Scz4e  atomicsUnit.io.redirect <> redirect
1751c590fb32Scz4e
1752c590fb32Scz4e  // TODO: complete amo's pmp support
1753c590fb32Scz4e  val amoTlb = dtlb_ld(0).requestor(0)
1754c590fb32Scz4e  atomicsUnit.io.dtlb.resp.valid := false.B
1755c590fb32Scz4e  atomicsUnit.io.dtlb.resp.bits  := DontCare
1756c590fb32Scz4e  atomicsUnit.io.dtlb.req.ready  := amoTlb.req.ready
1757c590fb32Scz4e  atomicsUnit.io.pmpResp := pmp_check(0).resp
1758c590fb32Scz4e
1759c590fb32Scz4e  atomicsUnit.io.dcache <> dcache.io.lsu.atomics
1760c590fb32Scz4e  atomicsUnit.io.flush_sbuffer.empty := stIsEmpty
1761c590fb32Scz4e
1762c590fb32Scz4e  atomicsUnit.io.csrCtrl := csrCtrl
1763c590fb32Scz4e
1764c590fb32Scz4e  // for atomicsUnit, it uses loadUnit(0)'s TLB port
1765c590fb32Scz4e
1766c590fb32Scz4e  when (state =/= s_normal) {
1767c590fb32Scz4e    // use store wb port instead of load
1768c590fb32Scz4e    loadUnits(0).io.ldout.ready := false.B
1769c590fb32Scz4e    // use load_0's TLB
1770c590fb32Scz4e    atomicsUnit.io.dtlb <> amoTlb
1771c590fb32Scz4e
1772c590fb32Scz4e    // hw prefetch should be disabled while executing atomic insts
1773c590fb32Scz4e    loadUnits.map(i => i.io.prefetch_req.valid := false.B)
1774c590fb32Scz4e
1775c590fb32Scz4e    // make sure there's no in-flight uops in load unit
1776c590fb32Scz4e    assert(!loadUnits(0).io.ldout.valid)
1777c590fb32Scz4e  }
1778c590fb32Scz4e
1779c590fb32Scz4e  lsq.io.flushSbuffer.empty := sbuffer.io.sbempty
1780c590fb32Scz4e
1781c590fb32Scz4e  for (i <- 0 until StaCnt) {
1782c590fb32Scz4e    when (state === s_atomics(i)) {
1783c590fb32Scz4e      io.mem_to_ooo.staIqFeedback(i).feedbackSlow := atomicsUnit.io.feedbackSlow
1784c590fb32Scz4e      assert(!storeUnits(i).io.feedback_slow.valid)
1785c590fb32Scz4e    }
1786c590fb32Scz4e  }
1787c590fb32Scz4e  for (i <- 0 until HyuCnt) {
1788c590fb32Scz4e    when (state === s_atomics(StaCnt + i)) {
1789c590fb32Scz4e      io.mem_to_ooo.hyuIqFeedback(i).feedbackSlow := atomicsUnit.io.feedbackSlow
1790c590fb32Scz4e      assert(!hybridUnits(i).io.feedback_slow.valid)
1791c590fb32Scz4e    }
1792c590fb32Scz4e  }
1793c590fb32Scz4e
1794c590fb32Scz4e  lsq.io.exceptionAddr.isStore := io.ooo_to_mem.isStoreException
1795c590fb32Scz4e  // Exception address is used several cycles after flush.
1796c590fb32Scz4e  // We delay it by 10 cycles to ensure its flush safety.
1797c590fb32Scz4e  val atomicsException = RegInit(false.B)
1798c590fb32Scz4e  when (DelayN(redirect.valid, 10) && atomicsException) {
1799c590fb32Scz4e    atomicsException := false.B
1800c590fb32Scz4e  }.elsewhen (atomicsUnit.io.exceptionInfo.valid) {
1801c590fb32Scz4e    atomicsException := true.B
1802c590fb32Scz4e  }
1803c590fb32Scz4e
1804c590fb32Scz4e  val misalignBufExceptionOverwrite = loadMisalignBuffer.io.overwriteExpBuf.valid || storeMisalignBuffer.io.overwriteExpBuf.valid
1805c590fb32Scz4e  val misalignBufExceptionVaddr = Mux(loadMisalignBuffer.io.overwriteExpBuf.valid,
1806c590fb32Scz4e    loadMisalignBuffer.io.overwriteExpBuf.vaddr,
1807c590fb32Scz4e    storeMisalignBuffer.io.overwriteExpBuf.vaddr
1808c590fb32Scz4e  )
1809c590fb32Scz4e  val misalignBufExceptionIsHyper = Mux(loadMisalignBuffer.io.overwriteExpBuf.valid,
1810c590fb32Scz4e    loadMisalignBuffer.io.overwriteExpBuf.isHyper,
1811c590fb32Scz4e    storeMisalignBuffer.io.overwriteExpBuf.isHyper
1812c590fb32Scz4e  )
1813c590fb32Scz4e  val misalignBufExceptionGpaddr = Mux(loadMisalignBuffer.io.overwriteExpBuf.valid,
1814c590fb32Scz4e    loadMisalignBuffer.io.overwriteExpBuf.gpaddr,
1815c590fb32Scz4e    storeMisalignBuffer.io.overwriteExpBuf.gpaddr
1816c590fb32Scz4e  )
1817c590fb32Scz4e  val misalignBufExceptionIsForVSnonLeafPTE = Mux(loadMisalignBuffer.io.overwriteExpBuf.valid,
1818c590fb32Scz4e    loadMisalignBuffer.io.overwriteExpBuf.isForVSnonLeafPTE,
1819c590fb32Scz4e    storeMisalignBuffer.io.overwriteExpBuf.isForVSnonLeafPTE
1820c590fb32Scz4e  )
1821c590fb32Scz4e
1822c590fb32Scz4e  val vSegmentException = RegInit(false.B)
1823c590fb32Scz4e  when (DelayN(redirect.valid, 10) && vSegmentException) {
1824c590fb32Scz4e    vSegmentException := false.B
1825c590fb32Scz4e  }.elsewhen (vSegmentUnit.io.exceptionInfo.valid) {
1826c590fb32Scz4e    vSegmentException := true.B
1827c590fb32Scz4e  }
1828c590fb32Scz4e  val atomicsExceptionAddress = RegEnable(atomicsUnit.io.exceptionInfo.bits.vaddr, atomicsUnit.io.exceptionInfo.valid)
1829c590fb32Scz4e  val vSegmentExceptionVstart = RegEnable(vSegmentUnit.io.exceptionInfo.bits.vstart, vSegmentUnit.io.exceptionInfo.valid)
1830c590fb32Scz4e  val vSegmentExceptionVl     = RegEnable(vSegmentUnit.io.exceptionInfo.bits.vl, vSegmentUnit.io.exceptionInfo.valid)
1831c590fb32Scz4e  val vSegmentExceptionAddress = RegEnable(vSegmentUnit.io.exceptionInfo.bits.vaddr, vSegmentUnit.io.exceptionInfo.valid)
1832c590fb32Scz4e  val atomicsExceptionGPAddress = RegEnable(atomicsUnit.io.exceptionInfo.bits.gpaddr, atomicsUnit.io.exceptionInfo.valid)
1833c590fb32Scz4e  val vSegmentExceptionGPAddress = RegEnable(vSegmentUnit.io.exceptionInfo.bits.gpaddr, vSegmentUnit.io.exceptionInfo.valid)
1834c590fb32Scz4e  val atomicsExceptionIsForVSnonLeafPTE = RegEnable(atomicsUnit.io.exceptionInfo.bits.isForVSnonLeafPTE, atomicsUnit.io.exceptionInfo.valid)
1835c590fb32Scz4e  val vSegmentExceptionIsForVSnonLeafPTE = RegEnable(vSegmentUnit.io.exceptionInfo.bits.isForVSnonLeafPTE, vSegmentUnit.io.exceptionInfo.valid)
1836c590fb32Scz4e
1837c590fb32Scz4e  val exceptionVaddr = Mux(
1838c590fb32Scz4e    atomicsException,
1839c590fb32Scz4e    atomicsExceptionAddress,
1840c590fb32Scz4e    Mux(misalignBufExceptionOverwrite,
1841c590fb32Scz4e      misalignBufExceptionVaddr,
1842c590fb32Scz4e      Mux(vSegmentException,
1843c590fb32Scz4e        vSegmentExceptionAddress,
1844c590fb32Scz4e        lsq.io.exceptionAddr.vaddr
1845c590fb32Scz4e      )
1846c590fb32Scz4e    )
1847c590fb32Scz4e  )
1848c590fb32Scz4e  // whether vaddr need ext or is hyper inst:
1849c590fb32Scz4e  // VaNeedExt: atomicsException -> false; misalignBufExceptionOverwrite -> true; vSegmentException -> false
1850c590fb32Scz4e  // IsHyper: atomicsException -> false; vSegmentException -> false
1851c590fb32Scz4e  val exceptionVaNeedExt = !atomicsException &&
1852c590fb32Scz4e    (misalignBufExceptionOverwrite ||
1853c590fb32Scz4e      (!vSegmentException && lsq.io.exceptionAddr.vaNeedExt))
1854c590fb32Scz4e  val exceptionIsHyper = !atomicsException &&
1855c590fb32Scz4e    (misalignBufExceptionOverwrite && misalignBufExceptionIsHyper ||
1856c590fb32Scz4e      (!vSegmentException && lsq.io.exceptionAddr.isHyper && !misalignBufExceptionOverwrite))
1857c590fb32Scz4e
1858168f1995SXu, Zefan  def GenExceptionVa(
1859168f1995SXu, Zefan    mode: UInt, isVirt: Bool, vaNeedExt: Bool,
1860c590fb32Scz4e    satp: TlbSatpBundle, vsatp: TlbSatpBundle, hgatp: TlbHgatpBundle,
1861168f1995SXu, Zefan    vaddr: UInt
1862168f1995SXu, Zefan  ) = {
1863c590fb32Scz4e    require(VAddrBits >= 50)
1864c590fb32Scz4e
1865168f1995SXu, Zefan    val satpNone = satp.mode === 0.U
1866168f1995SXu, Zefan    val satpSv39 = satp.mode === 8.U
1867168f1995SXu, Zefan    val satpSv48 = satp.mode === 9.U
1868c590fb32Scz4e
1869168f1995SXu, Zefan    val vsatpNone = vsatp.mode === 0.U
1870168f1995SXu, Zefan    val vsatpSv39 = vsatp.mode === 8.U
1871168f1995SXu, Zefan    val vsatpSv48 = vsatp.mode === 9.U
1872168f1995SXu, Zefan
1873168f1995SXu, Zefan    val hgatpNone = hgatp.mode === 0.U
1874168f1995SXu, Zefan    val hgatpSv39x4 = hgatp.mode === 8.U
1875168f1995SXu, Zefan    val hgatpSv48x4 = hgatp.mode === 9.U
1876168f1995SXu, Zefan
1877168f1995SXu, Zefan    // For !isVirt, mode check is necessary, as we don't want virtual memory in M-mode.
1878168f1995SXu, Zefan    // For isVirt, mode check is unnecessary, as virt won't be 1 in M-mode.
1879168f1995SXu, Zefan    // Also, isVirt includes Hyper Insts, which don't care mode either.
1880168f1995SXu, Zefan
1881168f1995SXu, Zefan    val useBareAddr =
1882168f1995SXu, Zefan      (isVirt && vsatpNone && hgatpNone) ||
1883168f1995SXu, Zefan      (!isVirt && (mode === CSRConst.ModeM)) ||
1884168f1995SXu, Zefan      (!isVirt && (mode =/= CSRConst.ModeM) && satpNone)
1885168f1995SXu, Zefan    val useSv39Addr =
1886168f1995SXu, Zefan      (isVirt && vsatpSv39) ||
1887168f1995SXu, Zefan      (!isVirt && (mode =/= CSRConst.ModeM) && satpSv39)
1888168f1995SXu, Zefan    val useSv48Addr =
1889168f1995SXu, Zefan      (isVirt && vsatpSv48) ||
1890168f1995SXu, Zefan      (!isVirt && (mode =/= CSRConst.ModeM) && satpSv48)
1891168f1995SXu, Zefan    val useSv39x4Addr = isVirt && vsatpNone && hgatpSv39x4
1892168f1995SXu, Zefan    val useSv48x4Addr = isVirt && vsatpNone && hgatpSv48x4
1893c590fb32Scz4e
1894c590fb32Scz4e    val bareAddr   = ZeroExt(vaddr(PAddrBits - 1, 0), XLEN)
1895c590fb32Scz4e    val sv39Addr   = SignExt(vaddr.take(39), XLEN)
1896c590fb32Scz4e    val sv39x4Addr = ZeroExt(vaddr.take(39 + 2), XLEN)
1897c590fb32Scz4e    val sv48Addr   = SignExt(vaddr.take(48), XLEN)
1898c590fb32Scz4e    val sv48x4Addr = ZeroExt(vaddr.take(48 + 2), XLEN)
1899c590fb32Scz4e
1900c590fb32Scz4e    val ExceptionVa = Wire(UInt(XLEN.W))
1901c590fb32Scz4e    when (vaNeedExt) {
1902c590fb32Scz4e      ExceptionVa := Mux1H(Seq(
1903168f1995SXu, Zefan        (useBareAddr)   -> bareAddr,
1904168f1995SXu, Zefan        (useSv39Addr)   -> sv39Addr,
1905168f1995SXu, Zefan        (useSv48Addr)   -> sv48Addr,
1906168f1995SXu, Zefan        (useSv39x4Addr) -> sv39x4Addr,
1907168f1995SXu, Zefan        (useSv48x4Addr) -> sv48x4Addr,
1908c590fb32Scz4e      ))
1909c590fb32Scz4e    } .otherwise {
1910c590fb32Scz4e      ExceptionVa := vaddr
1911c590fb32Scz4e    }
1912c590fb32Scz4e
1913c590fb32Scz4e    ExceptionVa
1914c590fb32Scz4e  }
1915c590fb32Scz4e
1916c590fb32Scz4e  io.mem_to_ooo.lsqio.vaddr := RegNext(
1917c590fb32Scz4e    GenExceptionVa(tlbcsr.priv.dmode, tlbcsr.priv.virt || exceptionIsHyper, exceptionVaNeedExt,
1918c590fb32Scz4e    tlbcsr.satp, tlbcsr.vsatp, tlbcsr.hgatp, exceptionVaddr)
1919c590fb32Scz4e  )
1920c590fb32Scz4e
1921c590fb32Scz4e  // vsegment instruction is executed atomic, which mean atomicsException and vSegmentException should not raise at the same time.
1922c590fb32Scz4e  XSError(atomicsException && vSegmentException, "atomicsException and vSegmentException raise at the same time!")
1923c590fb32Scz4e  io.mem_to_ooo.lsqio.vstart := RegNext(Mux(vSegmentException,
1924c590fb32Scz4e                                            vSegmentExceptionVstart,
1925c590fb32Scz4e                                            lsq.io.exceptionAddr.vstart)
1926c590fb32Scz4e  )
1927c590fb32Scz4e  io.mem_to_ooo.lsqio.vl     := RegNext(Mux(vSegmentException,
1928c590fb32Scz4e                                            vSegmentExceptionVl,
1929c590fb32Scz4e                                            lsq.io.exceptionAddr.vl)
1930c590fb32Scz4e  )
1931c590fb32Scz4e
1932c590fb32Scz4e  XSError(atomicsException && atomicsUnit.io.in.valid, "new instruction before exception triggers\n")
1933c590fb32Scz4e  io.mem_to_ooo.lsqio.gpaddr := RegNext(Mux(
1934c590fb32Scz4e    atomicsException,
1935c590fb32Scz4e    atomicsExceptionGPAddress,
1936c590fb32Scz4e    Mux(misalignBufExceptionOverwrite,
1937c590fb32Scz4e      misalignBufExceptionGpaddr,
1938c590fb32Scz4e      Mux(vSegmentException,
1939c590fb32Scz4e        vSegmentExceptionGPAddress,
1940c590fb32Scz4e        lsq.io.exceptionAddr.gpaddr
1941c590fb32Scz4e      )
1942c590fb32Scz4e    )
1943c590fb32Scz4e  ))
1944c590fb32Scz4e  io.mem_to_ooo.lsqio.isForVSnonLeafPTE := RegNext(Mux(
1945c590fb32Scz4e    atomicsException,
1946c590fb32Scz4e    atomicsExceptionIsForVSnonLeafPTE,
1947c590fb32Scz4e    Mux(misalignBufExceptionOverwrite,
1948c590fb32Scz4e      misalignBufExceptionIsForVSnonLeafPTE,
1949c590fb32Scz4e      Mux(vSegmentException,
1950c590fb32Scz4e        vSegmentExceptionIsForVSnonLeafPTE,
1951c590fb32Scz4e        lsq.io.exceptionAddr.isForVSnonLeafPTE
1952c590fb32Scz4e      )
1953c590fb32Scz4e    )
1954c590fb32Scz4e  ))
1955c590fb32Scz4e  io.mem_to_ooo.topToBackendBypass match { case x =>
1956c590fb32Scz4e    x.hartId            := io.hartId
1957c590fb32Scz4e    x.l2FlushDone       := RegNext(io.l2_flush_done)
1958c590fb32Scz4e    x.externalInterrupt.msip  := outer.clint_int_sink.in.head._1(0)
1959c590fb32Scz4e    x.externalInterrupt.mtip  := outer.clint_int_sink.in.head._1(1)
1960c590fb32Scz4e    x.externalInterrupt.meip  := outer.plic_int_sink.in.head._1(0)
1961c590fb32Scz4e    x.externalInterrupt.seip  := outer.plic_int_sink.in.last._1(0)
1962c590fb32Scz4e    x.externalInterrupt.debug := outer.debug_int_sink.in.head._1(0)
196376cb49abScz4e    x.externalInterrupt.nmi.nmi_31 := outer.nmi_int_sink.in.head._1(0) | outer.beu_local_int_sink.in.head._1(0)
1964c590fb32Scz4e    x.externalInterrupt.nmi.nmi_43 := outer.nmi_int_sink.in.head._1(1)
1965c590fb32Scz4e    x.msiInfo           := DelayNWithValid(io.fromTopToBackend.msiInfo, 1)
1966c590fb32Scz4e    x.clintTime         := DelayNWithValid(io.fromTopToBackend.clintTime, 1)
1967c590fb32Scz4e  }
1968c590fb32Scz4e
1969c590fb32Scz4e  io.memInfo.sqFull := RegNext(lsq.io.sqFull)
1970c590fb32Scz4e  io.memInfo.lqFull := RegNext(lsq.io.lqFull)
1971c590fb32Scz4e  io.memInfo.dcacheMSHRFull := RegNext(dcache.io.mshrFull)
1972c590fb32Scz4e
1973c590fb32Scz4e  io.inner_hartId := io.hartId
1974c590fb32Scz4e  io.inner_reset_vector := RegNext(io.outer_reset_vector)
1975c590fb32Scz4e  io.outer_cpu_halt := io.ooo_to_mem.backendToTopBypass.cpuHalted
1976c590fb32Scz4e  io.outer_l2_flush_en := io.ooo_to_mem.csrCtrl.flush_l2_enable
1977c590fb32Scz4e  io.outer_power_down_en := io.ooo_to_mem.csrCtrl.power_down_enable
1978c590fb32Scz4e  io.outer_cpu_critical_error := io.ooo_to_mem.backendToTopBypass.cpuCriticalError
1979c590fb32Scz4e  io.outer_beu_errors_icache := RegNext(io.inner_beu_errors_icache)
1980c590fb32Scz4e  io.inner_hc_perfEvents <> RegNext(io.outer_hc_perfEvents)
1981c590fb32Scz4e
1982c590fb32Scz4e  // vector segmentUnit
1983c590fb32Scz4e  vSegmentUnit.io.in.bits <> io.ooo_to_mem.issueVldu.head.bits
1984c590fb32Scz4e  vSegmentUnit.io.in.valid := isSegment && io.ooo_to_mem.issueVldu.head.valid// is segment instruction
1985c590fb32Scz4e  vSegmentUnit.io.dtlb.resp.bits <> dtlb_reqs.take(LduCnt).head.resp.bits
1986c590fb32Scz4e  vSegmentUnit.io.dtlb.resp.valid <> dtlb_reqs.take(LduCnt).head.resp.valid
1987c590fb32Scz4e  vSegmentUnit.io.pmpResp <> pmp_check.head.resp
1988c590fb32Scz4e  vSegmentUnit.io.flush_sbuffer.empty := stIsEmpty
1989c590fb32Scz4e  vSegmentUnit.io.redirect <> redirect
1990c590fb32Scz4e  vSegmentUnit.io.rdcache.resp.bits := dcache.io.lsu.load(0).resp.bits
1991c590fb32Scz4e  vSegmentUnit.io.rdcache.resp.valid := dcache.io.lsu.load(0).resp.valid
1992c590fb32Scz4e  vSegmentUnit.io.rdcache.s2_bank_conflict := dcache.io.lsu.load(0).s2_bank_conflict
1993c590fb32Scz4e  // -------------------------
1994c590fb32Scz4e  // Vector Segment Triggers
1995c590fb32Scz4e  // -------------------------
1996c590fb32Scz4e  vSegmentUnit.io.fromCsrTrigger.tdataVec := tdata
1997c590fb32Scz4e  vSegmentUnit.io.fromCsrTrigger.tEnableVec := tEnable
1998c590fb32Scz4e  vSegmentUnit.io.fromCsrTrigger.triggerCanRaiseBpExp := triggerCanRaiseBpExp
1999c590fb32Scz4e  vSegmentUnit.io.fromCsrTrigger.debugMode := debugMode
2000c590fb32Scz4e
2001c590fb32Scz4e  // reset tree of MemBlock
2002c590fb32Scz4e  if (p(DebugOptionsKey).ResetGen) {
2003c590fb32Scz4e    val leftResetTree = ResetGenNode(
2004c590fb32Scz4e      Seq(
2005c590fb32Scz4e        ModuleNode(ptw),
2006c590fb32Scz4e        ModuleNode(ptw_to_l2_buffer),
2007c590fb32Scz4e        ModuleNode(lsq),
2008c590fb32Scz4e        ModuleNode(dtlb_st_tlb_st),
2009c590fb32Scz4e        ModuleNode(dtlb_prefetch_tlb_prefetch),
2010c590fb32Scz4e        ModuleNode(pmp)
2011c590fb32Scz4e      )
2012c590fb32Scz4e      ++ pmp_checkers.map(ModuleNode(_))
2013c590fb32Scz4e      ++ (if (prefetcherOpt.isDefined) Seq(ModuleNode(prefetcherOpt.get)) else Nil)
2014c590fb32Scz4e      ++ (if (l1PrefetcherOpt.isDefined) Seq(ModuleNode(l1PrefetcherOpt.get)) else Nil)
2015c590fb32Scz4e    )
2016c590fb32Scz4e    val rightResetTree = ResetGenNode(
2017c590fb32Scz4e      Seq(
2018c590fb32Scz4e        ModuleNode(sbuffer),
2019c590fb32Scz4e        ModuleNode(dtlb_ld_tlb_ld),
2020c590fb32Scz4e        ModuleNode(dcache),
2021c590fb32Scz4e        ModuleNode(l1d_to_l2_buffer),
2022c590fb32Scz4e        CellNode(io.reset_backend)
2023c590fb32Scz4e      )
2024c590fb32Scz4e    )
2025*602aa9f1Scz4e    ResetGen(leftResetTree, reset, sim = false, io.sramTestBypass.fromL2Top.mbistReset)
2026*602aa9f1Scz4e    ResetGen(rightResetTree, reset, sim = false, io.sramTestBypass.fromL2Top.mbistReset)
2027c590fb32Scz4e  } else {
2028c590fb32Scz4e    io.reset_backend := DontCare
2029c590fb32Scz4e  }
2030c590fb32Scz4e  io.resetInFrontendBypass.toL2Top := io.resetInFrontendBypass.fromFrontend
2031c590fb32Scz4e  // trace interface
2032c590fb32Scz4e  val traceToL2Top = io.traceCoreInterfaceBypass.toL2Top
2033c590fb32Scz4e  val traceFromBackend = io.traceCoreInterfaceBypass.fromBackend
2034c590fb32Scz4e  traceFromBackend.fromEncoder := RegNext(traceToL2Top.fromEncoder)
2035c590fb32Scz4e  traceToL2Top.toEncoder.trap  := RegEnable(
2036c590fb32Scz4e    traceFromBackend.toEncoder.trap,
2037c590fb32Scz4e    traceFromBackend.toEncoder.groups(0).valid && Itype.isTrap(traceFromBackend.toEncoder.groups(0).bits.itype)
2038c590fb32Scz4e  )
2039c590fb32Scz4e  traceToL2Top.toEncoder.priv := RegEnable(
2040c590fb32Scz4e    traceFromBackend.toEncoder.priv,
2041c590fb32Scz4e    traceFromBackend.toEncoder.groups(0).valid
2042c590fb32Scz4e  )
2043c590fb32Scz4e  (0 until TraceGroupNum).foreach { i =>
2044c590fb32Scz4e    traceToL2Top.toEncoder.groups(i).valid := RegNext(traceFromBackend.toEncoder.groups(i).valid)
2045c590fb32Scz4e    traceToL2Top.toEncoder.groups(i).bits.iretire := RegNext(traceFromBackend.toEncoder.groups(i).bits.iretire)
2046c590fb32Scz4e    traceToL2Top.toEncoder.groups(i).bits.itype := RegNext(traceFromBackend.toEncoder.groups(i).bits.itype)
2047c590fb32Scz4e    traceToL2Top.toEncoder.groups(i).bits.ilastsize := RegEnable(
2048c590fb32Scz4e      traceFromBackend.toEncoder.groups(i).bits.ilastsize,
2049c590fb32Scz4e      traceFromBackend.toEncoder.groups(i).valid
2050c590fb32Scz4e    )
2051c590fb32Scz4e    traceToL2Top.toEncoder.groups(i).bits.iaddr := RegEnable(
2052c590fb32Scz4e      traceFromBackend.toEncoder.groups(i).bits.iaddr,
2053c590fb32Scz4e      traceFromBackend.toEncoder.groups(i).valid
2054c590fb32Scz4e    ) + (RegEnable(
2055c590fb32Scz4e      traceFromBackend.toEncoder.groups(i).bits.ftqOffset.getOrElse(0.U),
2056c590fb32Scz4e      traceFromBackend.toEncoder.groups(i).valid
2057c590fb32Scz4e    ) << instOffsetBits)
2058c590fb32Scz4e  }
2059c590fb32Scz4e
2060c590fb32Scz4e
2061c590fb32Scz4e  io.mem_to_ooo.storeDebugInfo := DontCare
2062c590fb32Scz4e  // store event difftest information
2063c590fb32Scz4e  if (env.EnableDifftest) {
2064c590fb32Scz4e    (0 until EnsbufferWidth).foreach{i =>
2065c590fb32Scz4e        io.mem_to_ooo.storeDebugInfo(i).robidx := sbuffer.io.vecDifftestInfo(i).bits.robIdx
2066c590fb32Scz4e        sbuffer.io.vecDifftestInfo(i).bits.pc := io.mem_to_ooo.storeDebugInfo(i).pc
2067c590fb32Scz4e    }
2068c590fb32Scz4e  }
2069c590fb32Scz4e
2070c590fb32Scz4e  // top-down info
2071c590fb32Scz4e  dcache.io.debugTopDown.robHeadVaddr := io.debugTopDown.robHeadVaddr
2072c590fb32Scz4e  dtlbRepeater.io.debugTopDown.robHeadVaddr := io.debugTopDown.robHeadVaddr
2073c590fb32Scz4e  lsq.io.debugTopDown.robHeadVaddr := io.debugTopDown.robHeadVaddr
2074c590fb32Scz4e  io.debugTopDown.toCore.robHeadMissInDCache := dcache.io.debugTopDown.robHeadMissInDCache
2075c590fb32Scz4e  io.debugTopDown.toCore.robHeadTlbReplay := lsq.io.debugTopDown.robHeadTlbReplay
2076c590fb32Scz4e  io.debugTopDown.toCore.robHeadTlbMiss := lsq.io.debugTopDown.robHeadTlbMiss
2077c590fb32Scz4e  io.debugTopDown.toCore.robHeadLoadVio := lsq.io.debugTopDown.robHeadLoadVio
2078c590fb32Scz4e  io.debugTopDown.toCore.robHeadLoadMSHR := lsq.io.debugTopDown.robHeadLoadMSHR
2079c590fb32Scz4e  dcache.io.debugTopDown.robHeadOtherReplay := lsq.io.debugTopDown.robHeadOtherReplay
2080c590fb32Scz4e  dcache.io.debugRolling := io.debugRolling
2081c590fb32Scz4e
2082c590fb32Scz4e  lsq.io.noUopsIssued := io.topDownInfo.toBackend.noUopsIssued
2083c590fb32Scz4e  io.topDownInfo.toBackend.lqEmpty := lsq.io.lqEmpty
2084c590fb32Scz4e  io.topDownInfo.toBackend.sqEmpty := lsq.io.sqEmpty
2085c590fb32Scz4e  io.topDownInfo.toBackend.l1Miss := dcache.io.l1Miss
2086c590fb32Scz4e  io.topDownInfo.toBackend.l2TopMiss.l2Miss := RegNext(io.topDownInfo.fromL2Top.l2Miss)
2087c590fb32Scz4e  io.topDownInfo.toBackend.l2TopMiss.l3Miss := RegNext(io.topDownInfo.fromL2Top.l3Miss)
2088c590fb32Scz4e
2089c590fb32Scz4e  val hyLdDeqCount = PopCount(io.ooo_to_mem.issueHya.map(x => x.valid && FuType.isLoad(x.bits.uop.fuType)))
2090c590fb32Scz4e  val hyStDeqCount = PopCount(io.ooo_to_mem.issueHya.map(x => x.valid && FuType.isStore(x.bits.uop.fuType)))
2091c590fb32Scz4e  val ldDeqCount = PopCount(io.ooo_to_mem.issueLda.map(_.valid)) +& hyLdDeqCount
2092c590fb32Scz4e  val stDeqCount = PopCount(io.ooo_to_mem.issueSta.take(StaCnt).map(_.valid)) +& hyStDeqCount
2093c590fb32Scz4e  val iqDeqCount = ldDeqCount +& stDeqCount
2094c590fb32Scz4e  XSPerfAccumulate("load_iq_deq_count", ldDeqCount)
2095c590fb32Scz4e  XSPerfHistogram("load_iq_deq_count", ldDeqCount, true.B, 0, LdExuCnt + 1)
2096c590fb32Scz4e  XSPerfAccumulate("store_iq_deq_count", stDeqCount)
2097c590fb32Scz4e  XSPerfHistogram("store_iq_deq_count", stDeqCount, true.B, 0, StAddrCnt + 1)
2098c590fb32Scz4e  XSPerfAccumulate("ls_iq_deq_count", iqDeqCount)
2099c590fb32Scz4e
2100c590fb32Scz4e  val pfevent = Module(new PFEvent)
2101c590fb32Scz4e  pfevent.io.distribute_csr := csrCtrl.distribute_csr
2102c590fb32Scz4e  val csrevents = pfevent.io.hpmevent.slice(16,24)
2103c590fb32Scz4e
2104c590fb32Scz4e  val perfFromUnits = (loadUnits ++ Seq(sbuffer, lsq, dcache)).flatMap(_.getPerfEvents)
2105c590fb32Scz4e  val perfFromPTW = perfEventsPTW.map(x => ("PTW_" + x._1, x._2))
2106c590fb32Scz4e  val perfBlock     = Seq(("ldDeqCount", ldDeqCount),
2107c590fb32Scz4e                          ("stDeqCount", stDeqCount))
2108c590fb32Scz4e  // let index = 0 be no event
2109c590fb32Scz4e  val allPerfEvents = Seq(("noEvent", 0.U)) ++ perfFromUnits ++ perfFromPTW ++ perfBlock
2110c590fb32Scz4e
2111c590fb32Scz4e  if (printEventCoding) {
2112c590fb32Scz4e    for (((name, inc), i) <- allPerfEvents.zipWithIndex) {
2113c590fb32Scz4e      println("MemBlock perfEvents Set", name, inc, i)
2114c590fb32Scz4e    }
2115c590fb32Scz4e  }
2116c590fb32Scz4e
2117c590fb32Scz4e  val allPerfInc = allPerfEvents.map(_._2.asTypeOf(new PerfEvent))
2118c590fb32Scz4e  val perfEvents = HPerfMonitor(csrevents, allPerfInc).getPerfEvents
2119c590fb32Scz4e  generatePerfEvent()
21204b2c87baS梁森 Liang Sen
21214b2c87baS梁森 Liang Sen  private val mbistPl = MbistPipeline.PlaceMbistPipeline(Int.MaxValue, "MbistPipeMemBlk", hasMbist)
21224b2c87baS梁森 Liang Sen  private val mbistIntf = if(hasMbist) {
21234b2c87baS梁森 Liang Sen    val params = mbistPl.get.nodeParams
21244b2c87baS梁森 Liang Sen    val intf = Some(Module(new MbistInterface(
21254b2c87baS梁森 Liang Sen      params = Seq(params),
21264b2c87baS梁森 Liang Sen      ids = Seq(mbistPl.get.childrenIds),
21274b2c87baS梁森 Liang Sen      name = s"MbistIntfMemBlk",
21284b2c87baS梁森 Liang Sen      pipelineNum = 1
21294b2c87baS梁森 Liang Sen    )))
21304b2c87baS梁森 Liang Sen    intf.get.toPipeline.head <> mbistPl.get.mbist
21314b2c87baS梁森 Liang Sen    mbistPl.get.registerCSV(intf.get.info, "MbistMemBlk")
21324b2c87baS梁森 Liang Sen    intf.get.mbist := DontCare
21334b2c87baS梁森 Liang Sen    dontTouch(intf.get.mbist)
21344b2c87baS梁森 Liang Sen    //TODO: add mbist controller connections here
21354b2c87baS梁森 Liang Sen    intf
21364b2c87baS梁森 Liang Sen  } else {
21374b2c87baS梁森 Liang Sen    None
21384b2c87baS梁森 Liang Sen  }
2139*602aa9f1Scz4e   private val sigFromSrams = if (hasSramTest) Some(SramHelper.genBroadCastBundleTop()) else None
21404b2c87baS梁森 Liang Sen  private val cg = ClockGate.genTeSrc
21414b2c87baS梁森 Liang Sen  dontTouch(cg)
2142*602aa9f1Scz4e
2143*602aa9f1Scz4e  sigFromSrams.foreach({ case sig => sig.mbist := DontCare })
21444b2c87baS梁森 Liang Sen  if (hasMbist) {
2145*602aa9f1Scz4e    sigFromSrams.get.mbist := io.sramTestBypass.fromL2Top.mbist.get
2146*602aa9f1Scz4e    io.sramTestBypass.toFrontend.mbist.get := io.sramTestBypass.fromL2Top.mbist.get
2147*602aa9f1Scz4e    io.sramTestBypass.toFrontend.mbistReset.get := io.sramTestBypass.fromL2Top.mbistReset.get
2148*602aa9f1Scz4e    io.sramTestBypass.toBackend.mbist.get := io.sramTestBypass.fromL2Top.mbist.get
2149*602aa9f1Scz4e    io.sramTestBypass.toBackend.mbistReset.get := io.sramTestBypass.fromL2Top.mbistReset.get
2150*602aa9f1Scz4e    cg.cgen := io.sramTestBypass.fromL2Top.mbist.get.cgen
21514b2c87baS梁森 Liang Sen  } else {
21524b2c87baS梁森 Liang Sen    cg.cgen := false.B
21534b2c87baS梁森 Liang Sen  }
2154*602aa9f1Scz4e
2155*602aa9f1Scz4e  // sram debug
2156*602aa9f1Scz4e  val sramCtl = Option.when(hasSramCtl)(RegNext(io.sramTestBypass.fromL2Top.sramCtl.get))
2157*602aa9f1Scz4e  sigFromSrams.foreach({ case sig => sig.sramCtl := DontCare })
2158*602aa9f1Scz4e  sigFromSrams.zip(sramCtl).foreach {
2159*602aa9f1Scz4e    case (sig, ctl) =>
2160*602aa9f1Scz4e      sig.sramCtl.RTSEL := ctl(1, 0) // CFG[1 : 0]
2161*602aa9f1Scz4e      sig.sramCtl.WTSEL := ctl(3, 2) // CFG[3 : 2]
2162*602aa9f1Scz4e      sig.sramCtl.MCR   := ctl(5, 4) // CFG[5 : 4]
2163*602aa9f1Scz4e      sig.sramCtl.MCW   := ctl(7, 6) // CFG[7 : 6]
2164*602aa9f1Scz4e  }
2165*602aa9f1Scz4e  if (hasSramCtl) {
2166*602aa9f1Scz4e    io.sramTestBypass.toFrontend.sramCtl.get := sramCtl.get
2167*602aa9f1Scz4e  }
2168c590fb32Scz4e}
2169c590fb32Scz4e
2170c590fb32Scz4eclass MemBlock()(implicit p: Parameters) extends LazyModule
2171c590fb32Scz4e  with HasXSParameter {
2172c590fb32Scz4e  override def shouldBeInlined: Boolean = false
2173c590fb32Scz4e
2174c590fb32Scz4e  val inner = LazyModule(new MemBlockInlined())
2175c590fb32Scz4e
2176c590fb32Scz4e  lazy val module = new MemBlockImp(this)
2177c590fb32Scz4e}
2178c590fb32Scz4e
2179c590fb32Scz4eclass MemBlockImp(wrapper: MemBlock) extends LazyModuleImp(wrapper) {
2180c590fb32Scz4e  val io = IO(wrapper.inner.module.io.cloneType)
2181c590fb32Scz4e  val io_perf = IO(wrapper.inner.module.io_perf.cloneType)
2182c590fb32Scz4e  io <> wrapper.inner.module.io
2183c590fb32Scz4e  io_perf <> wrapper.inner.module.io_perf
2184c590fb32Scz4e
2185c590fb32Scz4e  if (p(DebugOptionsKey).ResetGen) {
2186*602aa9f1Scz4e    ResetGen(
2187*602aa9f1Scz4e      ResetGenNode(Seq(ModuleNode(wrapper.inner.module))),
2188*602aa9f1Scz4e      reset, sim = false, io.sramTestBypass.fromL2Top.mbistReset
2189*602aa9f1Scz4e    )
2190c590fb32Scz4e  }
2191c590fb32Scz4e}
2192