xref: /XiangShan/src/main/scala/xiangshan/mem/MemBlock.scala (revision 05cc6da9631ca9da7abb3d03ae37e832778a8a0e)
1c590fb32Scz4e/***************************************************************************************
2c590fb32Scz4e* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3c590fb32Scz4e* Copyright (c) 2020-2021 Peng Cheng Laboratory
4c590fb32Scz4e*
5c590fb32Scz4e* XiangShan is licensed under Mulan PSL v2.
6c590fb32Scz4e* You can use this software according to the terms and conditions of the Mulan PSL v2.
7c590fb32Scz4e* You may obtain a copy of Mulan PSL v2 at:
8c590fb32Scz4e*          http://license.coscl.org.cn/MulanPSL2
9c590fb32Scz4e*
10c590fb32Scz4e* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11c590fb32Scz4e* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12c590fb32Scz4e* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13c590fb32Scz4e*
14c590fb32Scz4e* See the Mulan PSL v2 for more details.
15c590fb32Scz4e***************************************************************************************/
16c590fb32Scz4e
17c590fb32Scz4epackage xiangshan.mem
18c590fb32Scz4e
19c590fb32Scz4eimport org.chipsalliance.cde.config.Parameters
20c590fb32Scz4eimport chisel3._
21c590fb32Scz4eimport chisel3.util._
22c590fb32Scz4eimport freechips.rocketchip.diplomacy._
23c590fb32Scz4eimport freechips.rocketchip.diplomacy.{BundleBridgeSource, LazyModule, LazyModuleImp}
24c590fb32Scz4eimport freechips.rocketchip.interrupts.{IntSinkNode, IntSinkPortSimple}
25c590fb32Scz4eimport freechips.rocketchip.tile.HasFPUParameters
26c590fb32Scz4eimport freechips.rocketchip.tilelink._
27c590fb32Scz4eimport utils._
28c590fb32Scz4eimport utility._
29602aa9f1Scz4eimport utility.mbist.{MbistInterface, MbistPipeline}
30602aa9f1Scz4eimport utility.sram.{SramMbistBundle, SramBroadcastBundle, SramHelper}
318cfc24b2STang Haojinimport system.{HasSoCParameter, SoCParamsKey}
32c590fb32Scz4eimport xiangshan._
33c590fb32Scz4eimport xiangshan.ExceptionNO._
34c590fb32Scz4eimport xiangshan.frontend.HasInstrMMIOConst
35c590fb32Scz4eimport xiangshan.backend.Bundles.{DynInst, MemExuInput, MemExuOutput}
36c590fb32Scz4eimport xiangshan.backend.ctrlblock.{DebugLSIO, LsTopdownInfo}
37c590fb32Scz4eimport xiangshan.backend.exu.MemExeUnit
38c590fb32Scz4eimport xiangshan.backend.fu._
39c590fb32Scz4eimport xiangshan.backend.fu.FuType._
40a67fd0f5SGuanghui Chengimport xiangshan.backend.fu.NewCSR.{CsrTriggerBundle, TriggerUtil, PFEvent}
41075d4937Sjunxiong-jiimport xiangshan.backend.fu.util.{CSRConst, SdtrigExt}
42c590fb32Scz4eimport xiangshan.backend.{BackendToTopBundle, TopToBackendBundle}
43c590fb32Scz4eimport xiangshan.backend.rob.{RobDebugRollingIO, RobPtr, RobLsqIO}
44c590fb32Scz4eimport xiangshan.backend.datapath.NewPipelineConnect
45c590fb32Scz4eimport xiangshan.backend.trace.{Itype, TraceCoreInterface}
46c590fb32Scz4eimport xiangshan.backend.Bundles._
47c590fb32Scz4eimport xiangshan.mem._
48c590fb32Scz4eimport xiangshan.mem.mdp._
499e12e8edScz4eimport xiangshan.mem.Bundles._
50c590fb32Scz4eimport xiangshan.mem.prefetch.{BasePrefecher, L1Prefetcher, SMSParams, SMSPrefetcher}
51c590fb32Scz4eimport xiangshan.cache._
52c590fb32Scz4eimport xiangshan.cache.mmu._
534b2c87baS梁森 Liang Senimport coupledL2.PrefetchRecv
548cfc24b2STang Haojinimport utility.mbist.{MbistInterface, MbistPipeline}
558cfc24b2STang Haojinimport utility.sram.{SramBroadcastBundle, SramHelper}
56602aa9f1Scz4e
57c590fb32Scz4etrait HasMemBlockParameters extends HasXSParameter {
58c590fb32Scz4e  // number of memory units
59c590fb32Scz4e  val LduCnt  = backendParams.LduCnt
60c590fb32Scz4e  val StaCnt  = backendParams.StaCnt
61c590fb32Scz4e  val StdCnt  = backendParams.StdCnt
62c590fb32Scz4e  val HyuCnt  = backendParams.HyuCnt
63c590fb32Scz4e  val VlduCnt = backendParams.VlduCnt
64c590fb32Scz4e  val VstuCnt = backendParams.VstuCnt
65c590fb32Scz4e
66c590fb32Scz4e  val LdExuCnt  = LduCnt + HyuCnt
67c590fb32Scz4e  val StAddrCnt = StaCnt + HyuCnt
68c590fb32Scz4e  val StDataCnt = StdCnt
69c590fb32Scz4e  val MemExuCnt = LduCnt + HyuCnt + StaCnt + StdCnt
70c590fb32Scz4e  val MemAddrExtCnt = LdExuCnt + StaCnt
71c590fb32Scz4e  val MemVExuCnt = VlduCnt + VstuCnt
72c590fb32Scz4e
73c590fb32Scz4e  val AtomicWBPort   = 0
74c590fb32Scz4e  val MisalignWBPort = 1
75c590fb32Scz4e  val UncacheWBPort  = 2
76c590fb32Scz4e  val NCWBPorts = Seq(1, 2)
77c590fb32Scz4e}
78c590fb32Scz4e
79c590fb32Scz4eabstract class MemBlockBundle(implicit val p: Parameters) extends Bundle with HasMemBlockParameters
80c590fb32Scz4e
81c590fb32Scz4eclass Std(cfg: FuConfig)(implicit p: Parameters) extends FuncUnit(cfg) {
82c590fb32Scz4e  io.in.ready := io.out.ready
83c590fb32Scz4e  io.out.valid := io.in.valid
84c590fb32Scz4e  io.out.bits := 0.U.asTypeOf(io.out.bits)
85c590fb32Scz4e  io.out.bits.res.data := io.in.bits.data.src(0)
86c590fb32Scz4e  io.out.bits.ctrl.robIdx := io.in.bits.ctrl.robIdx
87c590fb32Scz4e}
88c590fb32Scz4e
89c590fb32Scz4eclass ooo_to_mem(implicit p: Parameters) extends MemBlockBundle {
90c590fb32Scz4e  val backendToTopBypass = Flipped(new BackendToTopBundle)
91c590fb32Scz4e
92c590fb32Scz4e  val loadFastMatch = Vec(LdExuCnt, Input(UInt(LdExuCnt.W)))
93c590fb32Scz4e  val loadFastFuOpType = Vec(LdExuCnt, Input(FuOpType()))
94c590fb32Scz4e  val loadFastImm = Vec(LdExuCnt, Input(UInt(12.W)))
95c590fb32Scz4e  val sfence = Input(new SfenceBundle)
96c590fb32Scz4e  val tlbCsr = Input(new TlbCsrBundle)
97c590fb32Scz4e  val lsqio = new Bundle {
98c590fb32Scz4e    val lcommit = Input(UInt(log2Up(CommitWidth + 1).W))
99c590fb32Scz4e    val scommit = Input(UInt(log2Up(CommitWidth + 1).W))
100c590fb32Scz4e    val pendingMMIOld = Input(Bool())
101c590fb32Scz4e    val pendingld = Input(Bool())
102c590fb32Scz4e    val pendingst = Input(Bool())
103c590fb32Scz4e    val pendingVst = Input(Bool())
104c590fb32Scz4e    val commit = Input(Bool())
105c590fb32Scz4e    val pendingPtr = Input(new RobPtr)
106c590fb32Scz4e    val pendingPtrNext = Input(new RobPtr)
107c590fb32Scz4e  }
108c590fb32Scz4e
109c590fb32Scz4e  val isStoreException = Input(Bool())
110c590fb32Scz4e  val isVlsException = Input(Bool())
111c590fb32Scz4e  val csrCtrl = Flipped(new CustomCSRCtrlIO)
112c590fb32Scz4e  val enqLsq = new LsqEnqIO
113c590fb32Scz4e  val flushSb = Input(Bool())
114c590fb32Scz4e
115c590fb32Scz4e  val storePc = Vec(StaCnt, Input(UInt(VAddrBits.W))) // for hw prefetch
116c590fb32Scz4e  val hybridPc = Vec(HyuCnt, Input(UInt(VAddrBits.W))) // for hw prefetch
117c590fb32Scz4e
118c590fb32Scz4e  val issueLda = MixedVec(Seq.fill(LduCnt)(Flipped(DecoupledIO(new MemExuInput))))
119c590fb32Scz4e  val issueSta = MixedVec(Seq.fill(StaCnt)(Flipped(DecoupledIO(new MemExuInput))))
120c590fb32Scz4e  val issueStd = MixedVec(Seq.fill(StdCnt)(Flipped(DecoupledIO(new MemExuInput))))
121c590fb32Scz4e  val issueHya = MixedVec(Seq.fill(HyuCnt)(Flipped(DecoupledIO(new MemExuInput))))
122c590fb32Scz4e  val issueVldu = MixedVec(Seq.fill(VlduCnt)(Flipped(DecoupledIO(new MemExuInput(isVector=true)))))
123c590fb32Scz4e
124c590fb32Scz4e  def issueUops = issueLda ++ issueSta ++ issueStd ++ issueHya ++ issueVldu
125c590fb32Scz4e}
126c590fb32Scz4e
127c590fb32Scz4eclass mem_to_ooo(implicit p: Parameters) extends MemBlockBundle {
128c590fb32Scz4e  val topToBackendBypass = new TopToBackendBundle
129c590fb32Scz4e
130c590fb32Scz4e  val otherFastWakeup = Vec(LdExuCnt, ValidIO(new DynInst))
131c590fb32Scz4e  val lqCancelCnt = Output(UInt(log2Up(VirtualLoadQueueSize + 1).W))
132c590fb32Scz4e  val sqCancelCnt = Output(UInt(log2Up(StoreQueueSize + 1).W))
133c590fb32Scz4e  val sqDeq = Output(UInt(log2Ceil(EnsbufferWidth + 1).W))
134c590fb32Scz4e  val lqDeq = Output(UInt(log2Up(CommitWidth + 1).W))
135c590fb32Scz4e  // used by VLSU issue queue, the vector store would wait all store before it, and the vector load would wait all load
136c590fb32Scz4e  val sqDeqPtr = Output(new SqPtr)
137c590fb32Scz4e  val lqDeqPtr = Output(new LqPtr)
138c590fb32Scz4e  val stIn = Vec(StAddrCnt, ValidIO(new MemExuInput))
139c590fb32Scz4e  val stIssuePtr = Output(new SqPtr())
140c590fb32Scz4e
141c590fb32Scz4e  val memoryViolation = ValidIO(new Redirect)
142c590fb32Scz4e  val sbIsEmpty = Output(Bool())
143c590fb32Scz4e
144c590fb32Scz4e  val lsTopdownInfo = Vec(LdExuCnt, Output(new LsTopdownInfo))
145c590fb32Scz4e
146c590fb32Scz4e  val lsqio = new Bundle {
147c590fb32Scz4e    val vaddr = Output(UInt(XLEN.W))
148c590fb32Scz4e    val vstart = Output(UInt((log2Up(VLEN) + 1).W))
149c590fb32Scz4e    val vl = Output(UInt((log2Up(VLEN) + 1).W))
150c590fb32Scz4e    val gpaddr = Output(UInt(XLEN.W))
151c590fb32Scz4e    val isForVSnonLeafPTE = Output(Bool())
152c590fb32Scz4e    val mmio = Output(Vec(LoadPipelineWidth, Bool()))
153c590fb32Scz4e    val uop = Output(Vec(LoadPipelineWidth, new DynInst))
154c590fb32Scz4e    val lqCanAccept = Output(Bool())
155c590fb32Scz4e    val sqCanAccept = Output(Bool())
156c590fb32Scz4e  }
157c590fb32Scz4e
158c590fb32Scz4e  val storeDebugInfo = Vec(EnsbufferWidth, new Bundle {
159c590fb32Scz4e    val robidx = Output(new RobPtr)
160c590fb32Scz4e    val pc     = Input(UInt(VAddrBits.W))
161c590fb32Scz4e  })
162c590fb32Scz4e
163c590fb32Scz4e  val writebackLda = Vec(LduCnt, DecoupledIO(new MemExuOutput))
164c590fb32Scz4e  val writebackSta = Vec(StaCnt, DecoupledIO(new MemExuOutput))
165c590fb32Scz4e  val writebackStd = Vec(StdCnt, DecoupledIO(new MemExuOutput))
166c590fb32Scz4e  val writebackHyuLda = Vec(HyuCnt, DecoupledIO(new MemExuOutput))
167c590fb32Scz4e  val writebackHyuSta = Vec(HyuCnt, DecoupledIO(new MemExuOutput))
168c590fb32Scz4e  val writebackVldu = Vec(VlduCnt, DecoupledIO(new MemExuOutput(isVector = true)))
169c590fb32Scz4e  def writeBack: Seq[DecoupledIO[MemExuOutput]] = {
170c590fb32Scz4e    writebackSta ++
171c590fb32Scz4e      writebackHyuLda ++ writebackHyuSta ++
172c590fb32Scz4e      writebackLda ++
173c590fb32Scz4e      writebackVldu ++
174c590fb32Scz4e      writebackStd
175c590fb32Scz4e  }
176c590fb32Scz4e
177c590fb32Scz4e  val ldaIqFeedback = Vec(LduCnt, new MemRSFeedbackIO)
178c590fb32Scz4e  val staIqFeedback = Vec(StaCnt, new MemRSFeedbackIO)
179c590fb32Scz4e  val hyuIqFeedback = Vec(HyuCnt, new MemRSFeedbackIO)
180c590fb32Scz4e  val vstuIqFeedback= Vec(VstuCnt, new MemRSFeedbackIO(isVector = true))
181c590fb32Scz4e  val vlduIqFeedback= Vec(VlduCnt, new MemRSFeedbackIO(isVector = true))
182c590fb32Scz4e  val ldCancel = Vec(backendParams.LdExuCnt, new LoadCancelIO)
183c590fb32Scz4e  val wakeup = Vec(backendParams.LdExuCnt, Valid(new DynInst))
184c590fb32Scz4e
185c590fb32Scz4e  val s3_delayed_load_error = Vec(LdExuCnt, Output(Bool()))
186c590fb32Scz4e}
187c590fb32Scz4e
188c590fb32Scz4eclass MemCoreTopDownIO extends Bundle {
189c590fb32Scz4e  val robHeadMissInDCache = Output(Bool())
190c590fb32Scz4e  val robHeadTlbReplay = Output(Bool())
191c590fb32Scz4e  val robHeadTlbMiss = Output(Bool())
192c590fb32Scz4e  val robHeadLoadVio = Output(Bool())
193c590fb32Scz4e  val robHeadLoadMSHR = Output(Bool())
194c590fb32Scz4e}
195c590fb32Scz4e
196c590fb32Scz4eclass fetch_to_mem(implicit p: Parameters) extends XSBundle{
197c590fb32Scz4e  val itlb = Flipped(new TlbPtwIO())
198c590fb32Scz4e}
199c590fb32Scz4e
200c590fb32Scz4e// triple buffer applied in i-mmio path (two at MemBlock, one at L2Top)
201c590fb32Scz4eclass InstrUncacheBuffer()(implicit p: Parameters) extends LazyModule with HasInstrMMIOConst {
202c590fb32Scz4e  val node = new TLBufferNode(BufferParams.default, BufferParams.default, BufferParams.default, BufferParams.default, BufferParams.default)
203c590fb32Scz4e  lazy val module = new InstrUncacheBufferImpl
204c590fb32Scz4e
205c590fb32Scz4e  class InstrUncacheBufferImpl extends LazyModuleImp(this) {
206c590fb32Scz4e    (node.in zip node.out) foreach { case ((in, edgeIn), (out, edgeOut)) =>
207c590fb32Scz4e      out.a <> BufferParams.default(BufferParams.default(in.a))
208c590fb32Scz4e      in.d <> BufferParams.default(BufferParams.default(out.d))
209c590fb32Scz4e
210c590fb32Scz4e      // only a.valid, a.ready, a.address can change
211c590fb32Scz4e      // hoping that the rest would be optimized to keep MemBlock port unchanged after adding buffer
212c590fb32Scz4e      out.a.bits.data := 0.U
213c590fb32Scz4e      out.a.bits.mask := Fill(mmioBusBytes, 1.U(1.W))
214c590fb32Scz4e      out.a.bits.opcode := 4.U // Get
215c590fb32Scz4e      out.a.bits.size := log2Ceil(mmioBusBytes).U
216c590fb32Scz4e      out.a.bits.source := 0.U
217c590fb32Scz4e    }
218c590fb32Scz4e  }
219c590fb32Scz4e}
220c590fb32Scz4e
221c590fb32Scz4e// triple buffer applied in L1I$-L2 path (two at MemBlock, one at L2Top)
222c590fb32Scz4eclass ICacheBuffer()(implicit p: Parameters) extends LazyModule {
223c590fb32Scz4e  val node = new TLBufferNode(BufferParams.default, BufferParams.default, BufferParams.default, BufferParams.default, BufferParams.default)
224c590fb32Scz4e  lazy val module = new ICacheBufferImpl
225c590fb32Scz4e
226c590fb32Scz4e  class ICacheBufferImpl extends LazyModuleImp(this) {
227c590fb32Scz4e    (node.in zip node.out) foreach { case ((in, edgeIn), (out, edgeOut)) =>
228c590fb32Scz4e      out.a <> BufferParams.default(BufferParams.default(in.a))
229c590fb32Scz4e      in.d <> BufferParams.default(BufferParams.default(out.d))
230c590fb32Scz4e    }
231c590fb32Scz4e  }
232c590fb32Scz4e}
233c590fb32Scz4e
234c590fb32Scz4eclass ICacheCtrlBuffer()(implicit p: Parameters) extends LazyModule {
235c590fb32Scz4e  val node = new TLBufferNode(BufferParams.default, BufferParams.default, BufferParams.default, BufferParams.default, BufferParams.default)
236c590fb32Scz4e  lazy val module = new ICacheCtrlBufferImpl
237c590fb32Scz4e
238c590fb32Scz4e  class ICacheCtrlBufferImpl extends LazyModuleImp(this) {
239c590fb32Scz4e    (node.in zip node.out) foreach { case ((in, edgeIn), (out, edgeOut)) =>
240c590fb32Scz4e      out.a <> BufferParams.default(BufferParams.default(in.a))
241c590fb32Scz4e      in.d <> BufferParams.default(BufferParams.default(out.d))
242c590fb32Scz4e    }
243c590fb32Scz4e  }
244c590fb32Scz4e}
245c590fb32Scz4e
246c590fb32Scz4e// Frontend bus goes through MemBlock
247c590fb32Scz4eclass FrontendBridge()(implicit p: Parameters) extends LazyModule {
248c590fb32Scz4e  val icache_node = LazyModule(new ICacheBuffer()).suggestName("icache").node// to keep IO port name
249c590fb32Scz4e  val icachectrl_node = LazyModule(new ICacheCtrlBuffer()).suggestName("icachectrl").node
250c590fb32Scz4e  val instr_uncache_node = LazyModule(new InstrUncacheBuffer()).suggestName("instr_uncache").node
251c590fb32Scz4e  lazy val module = new LazyModuleImp(this) {
252c590fb32Scz4e  }
253c590fb32Scz4e}
254c590fb32Scz4e
255c590fb32Scz4eclass MemBlockInlined()(implicit p: Parameters) extends LazyModule
256c590fb32Scz4e  with HasXSParameter {
257c590fb32Scz4e  override def shouldBeInlined: Boolean = true
258c590fb32Scz4e
259c590fb32Scz4e  val dcache = LazyModule(new DCacheWrapper())
260c590fb32Scz4e  val uncache = LazyModule(new Uncache())
261c590fb32Scz4e  val uncache_port = TLTempNode()
262c590fb32Scz4e  val uncache_xbar = TLXbar()
263c590fb32Scz4e  val ptw = LazyModule(new L2TLBWrapper())
264c590fb32Scz4e  val ptw_to_l2_buffer = if (!coreParams.softPTW) LazyModule(new TLBuffer) else null
265c590fb32Scz4e  val l1d_to_l2_buffer = if (coreParams.dcacheParametersOpt.nonEmpty) LazyModule(new TLBuffer) else null
266c590fb32Scz4e  val dcache_port = TLNameNode("dcache_client") // to keep dcache-L2 port name
267c590fb32Scz4e  val l2_pf_sender_opt = coreParams.prefetcher.map(_ =>
268c590fb32Scz4e    BundleBridgeSource(() => new PrefetchRecv)
269c590fb32Scz4e  )
270c590fb32Scz4e  val l3_pf_sender_opt = if (p(SoCParamsKey).L3CacheParamsOpt.nonEmpty) coreParams.prefetcher.map(_ =>
271c590fb32Scz4e    BundleBridgeSource(() => new huancun.PrefetchRecv)
272c590fb32Scz4e  ) else None
273c590fb32Scz4e  val frontendBridge = LazyModule(new FrontendBridge)
274c590fb32Scz4e  // interrupt sinks
275c590fb32Scz4e  val clint_int_sink = IntSinkNode(IntSinkPortSimple(1, 2))
276c590fb32Scz4e  val debug_int_sink = IntSinkNode(IntSinkPortSimple(1, 1))
277c590fb32Scz4e  val plic_int_sink = IntSinkNode(IntSinkPortSimple(2, 1))
278c590fb32Scz4e  val nmi_int_sink = IntSinkNode(IntSinkPortSimple(1, (new NonmaskableInterruptIO).elements.size))
27976cb49abScz4e  val beu_local_int_sink = IntSinkNode(IntSinkPortSimple(1, 1))
280c590fb32Scz4e
281c590fb32Scz4e  if (!coreParams.softPTW) {
282c590fb32Scz4e    ptw_to_l2_buffer.node := ptw.node
283c590fb32Scz4e  }
284c590fb32Scz4e  uncache_xbar := TLBuffer() := uncache.clientNode
285c590fb32Scz4e  if (dcache.uncacheNode.isDefined) {
286c590fb32Scz4e    dcache.uncacheNode.get := TLBuffer.chainNode(2) := uncache_xbar
287c590fb32Scz4e  }
288c590fb32Scz4e  uncache_port := TLBuffer.chainNode(2) := uncache_xbar
289c590fb32Scz4e
290c590fb32Scz4e  lazy val module = new MemBlockInlinedImp(this)
291c590fb32Scz4e}
292c590fb32Scz4e
293c590fb32Scz4eclass MemBlockInlinedImp(outer: MemBlockInlined) extends LazyModuleImp(outer)
294c590fb32Scz4e  with HasXSParameter
295c590fb32Scz4e  with HasFPUParameters
296c590fb32Scz4e  with HasPerfEvents
2978cfc24b2STang Haojin  with HasSoCParameter
298c590fb32Scz4e  with HasL1PrefetchSourceParameter
299c590fb32Scz4e  with HasCircularQueuePtrHelper
300c590fb32Scz4e  with HasMemBlockParameters
301c590fb32Scz4e  with HasTlbConst
302c590fb32Scz4e  with SdtrigExt
303c590fb32Scz4e{
304c590fb32Scz4e  val io = IO(new Bundle {
305c590fb32Scz4e    val hartId = Input(UInt(hartIdLen.W))
306c590fb32Scz4e    val redirect = Flipped(ValidIO(new Redirect))
307c590fb32Scz4e
308c590fb32Scz4e    val ooo_to_mem = new ooo_to_mem
309c590fb32Scz4e    val mem_to_ooo = new mem_to_ooo
310c590fb32Scz4e    val fetch_to_mem = new fetch_to_mem
311c590fb32Scz4e
312c590fb32Scz4e    val ifetchPrefetch = Vec(LduCnt, ValidIO(new SoftIfetchPrefetchBundle))
313c590fb32Scz4e
314c590fb32Scz4e    // misc
315c590fb32Scz4e    val error = ValidIO(new L1CacheErrorInfo)
316c590fb32Scz4e    val memInfo = new Bundle {
317c590fb32Scz4e      val sqFull = Output(Bool())
318c590fb32Scz4e      val lqFull = Output(Bool())
319c590fb32Scz4e      val dcacheMSHRFull = Output(Bool())
320c590fb32Scz4e    }
321c590fb32Scz4e    val debug_ls = new DebugLSIO
322c590fb32Scz4e    val l2_hint = Input(Valid(new L2ToL1Hint()))
323c590fb32Scz4e    val l2PfqBusy = Input(Bool())
324c590fb32Scz4e    val l2_tlb_req = Flipped(new TlbRequestIO(nRespDups = 2))
325c590fb32Scz4e    val l2_pmp_resp = new PMPRespBundle
326c590fb32Scz4e    val l2_flush_done = Input(Bool())
327c590fb32Scz4e
328c590fb32Scz4e    val debugTopDown = new Bundle {
329c590fb32Scz4e      val robHeadVaddr = Flipped(Valid(UInt(VAddrBits.W)))
330c590fb32Scz4e      val toCore = new MemCoreTopDownIO
331c590fb32Scz4e    }
332c590fb32Scz4e    val debugRolling = Flipped(new RobDebugRollingIO)
333c590fb32Scz4e
334c590fb32Scz4e    // All the signals from/to frontend/backend to/from bus will go through MemBlock
335c590fb32Scz4e    val fromTopToBackend = Input(new Bundle {
3368cfc24b2STang Haojin      val msiInfo   = ValidIO(UInt(soc.IMSICParams.MSI_INFO_WIDTH.W))
337c590fb32Scz4e      val clintTime = ValidIO(UInt(64.W))
338c590fb32Scz4e    })
339c590fb32Scz4e    val inner_hartId = Output(UInt(hartIdLen.W))
340c590fb32Scz4e    val inner_reset_vector = Output(UInt(PAddrBits.W))
341c590fb32Scz4e    val outer_reset_vector = Input(UInt(PAddrBits.W))
342c590fb32Scz4e    val outer_cpu_halt = Output(Bool())
343c590fb32Scz4e    val outer_l2_flush_en = Output(Bool())
344c590fb32Scz4e    val outer_power_down_en = Output(Bool())
345c590fb32Scz4e    val outer_cpu_critical_error = Output(Bool())
3468cfc24b2STang Haojin    val outer_msi_ack = Output(Bool())
347c590fb32Scz4e    val inner_beu_errors_icache = Input(new L1BusErrorUnitInfo)
348c590fb32Scz4e    val outer_beu_errors_icache = Output(new L1BusErrorUnitInfo)
349c590fb32Scz4e    val inner_hc_perfEvents = Output(Vec(numPCntHc * coreParams.L2NBanks + 1, new PerfEvent))
350c590fb32Scz4e    val outer_hc_perfEvents = Input(Vec(numPCntHc * coreParams.L2NBanks + 1, new PerfEvent))
351c590fb32Scz4e
352c590fb32Scz4e    // reset signals of frontend & backend are generated in memblock
353c590fb32Scz4e    val reset_backend = Output(Reset())
354c590fb32Scz4e    // Reset singal from frontend.
355c590fb32Scz4e    val resetInFrontendBypass = new Bundle{
356c590fb32Scz4e      val fromFrontend = Input(Bool())
357c590fb32Scz4e      val toL2Top      = Output(Bool())
358c590fb32Scz4e    }
359c590fb32Scz4e    val traceCoreInterfaceBypass = new Bundle{
360c590fb32Scz4e      val fromBackend = Flipped(new TraceCoreInterface(hasOffset = true))
361c590fb32Scz4e      val toL2Top     = new TraceCoreInterface
362c590fb32Scz4e    }
363c590fb32Scz4e
364c590fb32Scz4e    val topDownInfo = new Bundle {
365c590fb32Scz4e      val fromL2Top = Input(new TopDownFromL2Top)
366c590fb32Scz4e      val toBackend = Flipped(new TopDownInfo)
367c590fb32Scz4e    }
368602aa9f1Scz4e    val sramTestBypass = new Bundle() {
369602aa9f1Scz4e      val fromL2Top = new Bundle() {
370602aa9f1Scz4e        val mbist      = Option.when(hasMbist)(Input(new SramMbistBundle))
371602aa9f1Scz4e        val mbistReset = Option.when(hasMbist)(Input(new DFTResetSignals()))
372602aa9f1Scz4e        val sramCtl    = Option.when(hasSramCtl)(Input(UInt(64.W)))
373602aa9f1Scz4e      }
374602aa9f1Scz4e      val toFrontend = new Bundle() {
375602aa9f1Scz4e        val mbist      = Option.when(hasMbist)(Output(new SramMbistBundle))
376602aa9f1Scz4e        val mbistReset = Option.when(hasMbist)(Output(new DFTResetSignals()))
377602aa9f1Scz4e        val sramCtl    = Option.when(hasSramCtl)(Output(UInt(64.W)))
378602aa9f1Scz4e      }
379602aa9f1Scz4e      val toBackend = new Bundle() {
380602aa9f1Scz4e        val mbist      = Option.when(hasMbist)(Output(new SramMbistBundle))
381602aa9f1Scz4e        val mbistReset = Option.when(hasMbist)(Output(new DFTResetSignals()))
382602aa9f1Scz4e      }
383602aa9f1Scz4e    }
384c590fb32Scz4e  })
385c590fb32Scz4e
3861592abd1SYan Xu  io.mem_to_ooo.writeBack.zipWithIndex.foreach{ case (wb, i) =>
3871592abd1SYan Xu    PerfCCT.updateInstPos(wb.bits.uop.debug_seqNum, PerfCCT.InstPos.AtBypassVal.id.U, wb.valid, clock, reset)
3881592abd1SYan Xu  }
3891592abd1SYan Xu
390c590fb32Scz4e  dontTouch(io.inner_hartId)
391c590fb32Scz4e  dontTouch(io.inner_reset_vector)
392c590fb32Scz4e  dontTouch(io.outer_reset_vector)
393c590fb32Scz4e  dontTouch(io.outer_cpu_halt)
394c590fb32Scz4e  dontTouch(io.outer_l2_flush_en)
395c590fb32Scz4e  dontTouch(io.outer_power_down_en)
396c590fb32Scz4e  dontTouch(io.outer_cpu_critical_error)
397c590fb32Scz4e  dontTouch(io.inner_beu_errors_icache)
398c590fb32Scz4e  dontTouch(io.outer_beu_errors_icache)
399c590fb32Scz4e  dontTouch(io.inner_hc_perfEvents)
400c590fb32Scz4e  dontTouch(io.outer_hc_perfEvents)
401c590fb32Scz4e
402c590fb32Scz4e  val redirect = RegNextWithEnable(io.redirect)
403c590fb32Scz4e
404c590fb32Scz4e  private val dcache = outer.dcache.module
405c590fb32Scz4e  val uncache = outer.uncache.module
406c590fb32Scz4e
407c590fb32Scz4e  //val delayedDcacheRefill = RegNext(dcache.io.lsu.lsq)
408c590fb32Scz4e
409c590fb32Scz4e  val csrCtrl = DelayN(io.ooo_to_mem.csrCtrl, 2)
410c590fb32Scz4e  dcache.io.l2_pf_store_only := RegNext(io.ooo_to_mem.csrCtrl.pf_ctrl.l2_pf_store_only, false.B)
411c590fb32Scz4e  io.error <> DelayNWithValid(dcache.io.error, 2)
412c590fb32Scz4e  when(!csrCtrl.cache_error_enable){
413c590fb32Scz4e    io.error.bits.report_to_beu := false.B
414c590fb32Scz4e    io.error.valid := false.B
415c590fb32Scz4e  }
416c590fb32Scz4e
417c590fb32Scz4e  val loadUnits = Seq.fill(LduCnt)(Module(new LoadUnit))
418c590fb32Scz4e  val storeUnits = Seq.fill(StaCnt)(Module(new StoreUnit))
419c590fb32Scz4e  val stdExeUnits = Seq.fill(StdCnt)(Module(new MemExeUnit(backendParams.memSchdParams.get.issueBlockParams.find(_.StdCnt != 0).get.exuBlockParams.head)))
420c590fb32Scz4e  val hybridUnits = Seq.fill(HyuCnt)(Module(new HybridUnit)) // Todo: replace it with HybridUnit
421c590fb32Scz4e  val stData = stdExeUnits.map(_.io.out)
422c590fb32Scz4e  val exeUnits = loadUnits ++ storeUnits
423c590fb32Scz4e
424c590fb32Scz4e  // The number of vector load/store units is decoupled with the number of load/store units
425c590fb32Scz4e  val vlSplit = Seq.fill(VlduCnt)(Module(new VLSplitImp))
426c590fb32Scz4e  val vsSplit = Seq.fill(VstuCnt)(Module(new VSSplitImp))
427c590fb32Scz4e  val vlMergeBuffer = Module(new VLMergeBufferImp)
428c590fb32Scz4e  val vsMergeBuffer = Seq.fill(VstuCnt)(Module(new VSMergeBufferImp))
429c590fb32Scz4e  val vSegmentUnit  = Module(new VSegmentUnit)
430c590fb32Scz4e  val vfofBuffer    = Module(new VfofBuffer)
431c590fb32Scz4e
432c590fb32Scz4e  // misalign Buffer
433c590fb32Scz4e  val loadMisalignBuffer = Module(new LoadMisalignBuffer)
434c590fb32Scz4e  val storeMisalignBuffer = Module(new StoreMisalignBuffer)
435c590fb32Scz4e
436c590fb32Scz4e  val l1_pf_req = Wire(Decoupled(new L1PrefetchReq()))
437c590fb32Scz4e  dcache.io.sms_agt_evict_req.ready := false.B
438*05cc6da9SYanqin Li  val l1D_pf_enable = GatedRegNextN(io.ooo_to_mem.csrCtrl.pf_ctrl.l1D_pf_enable, 2, Some(false.B))
439c590fb32Scz4e  val prefetcherOpt: Option[BasePrefecher] = coreParams.prefetcher.map {
440c590fb32Scz4e    case _: SMSParams =>
441c590fb32Scz4e      val sms = Module(new SMSPrefetcher())
442*05cc6da9SYanqin Li      val enableSMS = Constantin.createRecord(s"enableSMS$hartId", initValue = true)
443*05cc6da9SYanqin Li      // constantinCtrl && master switch csrCtrl && single switch csrCtrl
444*05cc6da9SYanqin Li      sms.io.enable := enableSMS && l1D_pf_enable &&
445*05cc6da9SYanqin Li        GatedRegNextN(io.ooo_to_mem.csrCtrl.pf_ctrl.l2_pf_recv_enable, 2, Some(false.B))
446c590fb32Scz4e      sms.io_agt_en := GatedRegNextN(io.ooo_to_mem.csrCtrl.pf_ctrl.l1D_pf_enable_agt, 2, Some(false.B))
447c590fb32Scz4e      sms.io_pht_en := GatedRegNextN(io.ooo_to_mem.csrCtrl.pf_ctrl.l1D_pf_enable_pht, 2, Some(false.B))
448c590fb32Scz4e      sms.io_act_threshold := GatedRegNextN(io.ooo_to_mem.csrCtrl.pf_ctrl.l1D_pf_active_threshold, 2, Some(12.U))
449c590fb32Scz4e      sms.io_act_stride := GatedRegNextN(io.ooo_to_mem.csrCtrl.pf_ctrl.l1D_pf_active_stride, 2, Some(30.U))
450c590fb32Scz4e      sms.io_stride_en := false.B
451c590fb32Scz4e      sms.io_dcache_evict <> dcache.io.sms_agt_evict_req
4524b2c87baS梁森 Liang Sen      val mbistSmsPl = MbistPipeline.PlaceMbistPipeline(1, "MbistPipeSms", hasMbist)
453c590fb32Scz4e      sms
454c590fb32Scz4e  }
455c590fb32Scz4e  prefetcherOpt.foreach{ pf => pf.io.l1_req.ready := false.B }
456c590fb32Scz4e  val hartId = p(XSCoreParamsKey).HartId
457c590fb32Scz4e  val l1PrefetcherOpt: Option[BasePrefecher] = coreParams.prefetcher.map {
458c590fb32Scz4e    case _ =>
459c590fb32Scz4e      val l1Prefetcher = Module(new L1Prefetcher())
4609db05eaeScz4e      val enableL1StreamPrefetcher = Constantin.createRecord(s"enableL1StreamPrefetcher$hartId", initValue = true)
461*05cc6da9SYanqin Li      // constantinCtrl && master switch csrCtrl && single switch csrCtrl
462*05cc6da9SYanqin Li      l1Prefetcher.io.enable := enableL1StreamPrefetcher && l1D_pf_enable &&
463*05cc6da9SYanqin Li        GatedRegNextN(io.ooo_to_mem.csrCtrl.pf_ctrl.l1D_pf_enable_stride, 2, Some(false.B))
464c590fb32Scz4e      l1Prefetcher.pf_ctrl <> dcache.io.pf_ctrl
465c590fb32Scz4e      l1Prefetcher.l2PfqBusy := io.l2PfqBusy
466c590fb32Scz4e
467c590fb32Scz4e      // stride will train on miss or prefetch hit
468c590fb32Scz4e      for (i <- 0 until LduCnt) {
469c590fb32Scz4e        val source = loadUnits(i).io.prefetch_train_l1
470c590fb32Scz4e        l1Prefetcher.stride_train(i).valid := source.valid && source.bits.isFirstIssue && (
471c590fb32Scz4e          source.bits.miss || isFromStride(source.bits.meta_prefetch)
472c590fb32Scz4e        )
473c590fb32Scz4e        l1Prefetcher.stride_train(i).bits := source.bits
474c590fb32Scz4e        val loadPc = RegNext(io.ooo_to_mem.issueLda(i).bits.uop.pc) // for s1
475c590fb32Scz4e        l1Prefetcher.stride_train(i).bits.uop.pc := Mux(
476c590fb32Scz4e          loadUnits(i).io.s2_ptr_chasing,
477c590fb32Scz4e          RegEnable(loadPc, loadUnits(i).io.s2_prefetch_spec),
478c590fb32Scz4e          RegEnable(RegEnable(loadPc, loadUnits(i).io.s1_prefetch_spec), loadUnits(i).io.s2_prefetch_spec)
479c590fb32Scz4e        )
480c590fb32Scz4e      }
481c590fb32Scz4e      for (i <- 0 until HyuCnt) {
482c590fb32Scz4e        val source = hybridUnits(i).io.prefetch_train_l1
483c590fb32Scz4e        l1Prefetcher.stride_train.drop(LduCnt)(i).valid := source.valid && source.bits.isFirstIssue && (
484c590fb32Scz4e          source.bits.miss || isFromStride(source.bits.meta_prefetch)
485c590fb32Scz4e        )
486c590fb32Scz4e        l1Prefetcher.stride_train.drop(LduCnt)(i).bits := source.bits
487c590fb32Scz4e        l1Prefetcher.stride_train.drop(LduCnt)(i).bits.uop.pc := Mux(
488c590fb32Scz4e          hybridUnits(i).io.ldu_io.s2_ptr_chasing,
489c590fb32Scz4e          RegNext(io.ooo_to_mem.hybridPc(i)),
490c590fb32Scz4e          RegNext(RegNext(io.ooo_to_mem.hybridPc(i)))
491c590fb32Scz4e        )
492c590fb32Scz4e      }
493c590fb32Scz4e      l1Prefetcher
494c590fb32Scz4e  }
495c590fb32Scz4e  // load prefetch to l1 Dcache
496c590fb32Scz4e  l1PrefetcherOpt match {
497c590fb32Scz4e    case Some(pf) => l1_pf_req <> Pipeline(in = pf.io.l1_req, depth = 1, pipe = false, name = Some("pf_queue_to_ldu_reg"))
498c590fb32Scz4e    case None =>
499c590fb32Scz4e      l1_pf_req.valid := false.B
500c590fb32Scz4e      l1_pf_req.bits := DontCare
501c590fb32Scz4e  }
502c590fb32Scz4e  val pf_train_on_hit = RegNextN(io.ooo_to_mem.csrCtrl.pf_ctrl.l1D_pf_train_on_hit, 2, Some(true.B))
503c590fb32Scz4e
504c590fb32Scz4e  loadUnits.zipWithIndex.map(x => x._1.suggestName("LoadUnit_"+x._2))
505c590fb32Scz4e  storeUnits.zipWithIndex.map(x => x._1.suggestName("StoreUnit_"+x._2))
506c590fb32Scz4e  hybridUnits.zipWithIndex.map(x => x._1.suggestName("HybridUnit_"+x._2))
507c590fb32Scz4e  val atomicsUnit = Module(new AtomicsUnit)
508c590fb32Scz4e
509c590fb32Scz4e
510c590fb32Scz4e  val ldaExeWbReqs = Wire(Vec(LduCnt, Decoupled(new MemExuOutput)))
511c590fb32Scz4e  // atomicsUnit will overwrite the source from ldu if it is about to writeback
512c590fb32Scz4e  val atomicWritebackOverride = Mux(
513c590fb32Scz4e    atomicsUnit.io.out.valid,
514c590fb32Scz4e    atomicsUnit.io.out.bits,
515c590fb32Scz4e    loadUnits(AtomicWBPort).io.ldout.bits
516c590fb32Scz4e  )
517c590fb32Scz4e  ldaExeWbReqs(AtomicWBPort).valid := atomicsUnit.io.out.valid || loadUnits(AtomicWBPort).io.ldout.valid
518c590fb32Scz4e  ldaExeWbReqs(AtomicWBPort).bits  := atomicWritebackOverride
519c590fb32Scz4e  atomicsUnit.io.out.ready := ldaExeWbReqs(AtomicWBPort).ready
520c590fb32Scz4e  loadUnits(AtomicWBPort).io.ldout.ready := ldaExeWbReqs(AtomicWBPort).ready
521c590fb32Scz4e
522c590fb32Scz4e  val st_data_atomics = Seq.tabulate(StdCnt)(i =>
523c590fb32Scz4e    stData(i).valid && FuType.storeIsAMO(stData(i).bits.uop.fuType)
524c590fb32Scz4e  )
525c590fb32Scz4e
526c590fb32Scz4e  // misalignBuffer will overwrite the source from ldu if it is about to writeback
527c590fb32Scz4e  val misalignWritebackOverride = Mux(
528c590fb32Scz4e    loadUnits(MisalignWBPort).io.ldout.valid,
529c590fb32Scz4e    loadUnits(MisalignWBPort).io.ldout.bits,
530c590fb32Scz4e    loadMisalignBuffer.io.writeBack.bits
531c590fb32Scz4e  )
532c590fb32Scz4e  ldaExeWbReqs(MisalignWBPort).valid    := loadMisalignBuffer.io.writeBack.valid || loadUnits(MisalignWBPort).io.ldout.valid
533c590fb32Scz4e  ldaExeWbReqs(MisalignWBPort).bits     := misalignWritebackOverride
534c590fb32Scz4e  loadMisalignBuffer.io.writeBack.ready := ldaExeWbReqs(MisalignWBPort).ready && !loadUnits(MisalignWBPort).io.ldout.valid
535c590fb32Scz4e  loadMisalignBuffer.io.loadOutValid    := loadUnits(MisalignWBPort).io.ldout.valid
536c590fb32Scz4e  loadMisalignBuffer.io.loadVecOutValid := loadUnits(MisalignWBPort).io.vecldout.valid
537c590fb32Scz4e  loadUnits(MisalignWBPort).io.ldout.ready := ldaExeWbReqs(MisalignWBPort).ready
538c590fb32Scz4e  ldaExeWbReqs(MisalignWBPort).bits.isFromLoadUnit := loadUnits(MisalignWBPort).io.ldout.bits.isFromLoadUnit || loadMisalignBuffer.io.writeBack.valid
539c590fb32Scz4e
540c590fb32Scz4e  // loadUnit will overwrite the source from uncache if it is about to writeback
541c590fb32Scz4e  ldaExeWbReqs(UncacheWBPort) <> loadUnits(UncacheWBPort).io.ldout
542c590fb32Scz4e  io.mem_to_ooo.writebackLda <> ldaExeWbReqs
543c590fb32Scz4e  io.mem_to_ooo.writebackSta <> storeUnits.map(_.io.stout)
544c590fb32Scz4e  io.mem_to_ooo.writebackStd.zip(stdExeUnits).foreach {x =>
545c590fb32Scz4e    x._1.bits  := x._2.io.out.bits
546c590fb32Scz4e    // AMOs do not need to write back std now.
547c590fb32Scz4e    x._1.valid := x._2.io.out.fire && !FuType.storeIsAMO(x._2.io.out.bits.uop.fuType)
548c590fb32Scz4e  }
549c590fb32Scz4e  io.mem_to_ooo.writebackHyuLda <> hybridUnits.map(_.io.ldout)
550c590fb32Scz4e  io.mem_to_ooo.writebackHyuSta <> hybridUnits.map(_.io.stout)
551c590fb32Scz4e  io.mem_to_ooo.otherFastWakeup := DontCare
552c590fb32Scz4e  io.mem_to_ooo.otherFastWakeup.drop(HyuCnt).take(LduCnt).zip(loadUnits.map(_.io.fast_uop)).foreach{case(a,b)=> a := b}
553c590fb32Scz4e  io.mem_to_ooo.otherFastWakeup.take(HyuCnt).zip(hybridUnits.map(_.io.ldu_io.fast_uop)).foreach{case(a,b)=> a:=b}
554c590fb32Scz4e  val stOut = io.mem_to_ooo.writebackSta ++ io.mem_to_ooo.writebackHyuSta
555c590fb32Scz4e
556c590fb32Scz4e  // prefetch to l1 req
557c590fb32Scz4e  // Stream's confidence is always 1
558c590fb32Scz4e  // (LduCnt + HyuCnt) l1_pf_reqs ?
559c590fb32Scz4e  loadUnits.foreach(load_unit => {
560c590fb32Scz4e    load_unit.io.prefetch_req.valid <> l1_pf_req.valid
561c590fb32Scz4e    load_unit.io.prefetch_req.bits <> l1_pf_req.bits
562c590fb32Scz4e  })
563c590fb32Scz4e
564c590fb32Scz4e  hybridUnits.foreach(hybrid_unit => {
565c590fb32Scz4e    hybrid_unit.io.ldu_io.prefetch_req.valid <> l1_pf_req.valid
566c590fb32Scz4e    hybrid_unit.io.ldu_io.prefetch_req.bits <> l1_pf_req.bits
567c590fb32Scz4e  })
568c590fb32Scz4e
569c590fb32Scz4e  // NOTE: loadUnits(0) has higher bank conflict and miss queue arb priority than loadUnits(1) and loadUnits(2)
570c590fb32Scz4e  // when loadUnits(1)/loadUnits(2) stage 0 is busy, hw prefetch will never use that pipeline
571c590fb32Scz4e  val LowConfPorts = if (LduCnt == 2) Seq(1) else if (LduCnt == 3) Seq(1, 2) else Seq(0)
572c590fb32Scz4e  LowConfPorts.map{case i => loadUnits(i).io.prefetch_req.bits.confidence := 0.U}
573c590fb32Scz4e  hybridUnits.foreach(hybrid_unit => { hybrid_unit.io.ldu_io.prefetch_req.bits.confidence := 0.U })
574c590fb32Scz4e
575c590fb32Scz4e  val canAcceptHighConfPrefetch = loadUnits.map(_.io.canAcceptHighConfPrefetch) ++
576c590fb32Scz4e                                  hybridUnits.map(_.io.canAcceptLowConfPrefetch)
577c590fb32Scz4e  val canAcceptLowConfPrefetch = loadUnits.map(_.io.canAcceptLowConfPrefetch) ++
578c590fb32Scz4e                                 hybridUnits.map(_.io.canAcceptLowConfPrefetch)
579c590fb32Scz4e  l1_pf_req.ready := (0 until LduCnt + HyuCnt).map{
580c590fb32Scz4e    case i => {
581c590fb32Scz4e      if (LowConfPorts.contains(i)) {
582c590fb32Scz4e        loadUnits(i).io.canAcceptLowConfPrefetch
583c590fb32Scz4e      } else {
584c590fb32Scz4e        Mux(l1_pf_req.bits.confidence === 1.U, canAcceptHighConfPrefetch(i), canAcceptLowConfPrefetch(i))
585c590fb32Scz4e      }
586c590fb32Scz4e    }
587c590fb32Scz4e  }.reduce(_ || _)
588c590fb32Scz4e
589c590fb32Scz4e  // l1 pf fuzzer interface
590c590fb32Scz4e  val DebugEnableL1PFFuzzer = false
591c590fb32Scz4e  if (DebugEnableL1PFFuzzer) {
592c590fb32Scz4e    // l1 pf req fuzzer
593c590fb32Scz4e    val fuzzer = Module(new L1PrefetchFuzzer())
594c590fb32Scz4e    fuzzer.io.vaddr := DontCare
595c590fb32Scz4e    fuzzer.io.paddr := DontCare
596c590fb32Scz4e
597c590fb32Scz4e    // override load_unit prefetch_req
598c590fb32Scz4e    loadUnits.foreach(load_unit => {
599c590fb32Scz4e      load_unit.io.prefetch_req.valid <> fuzzer.io.req.valid
600c590fb32Scz4e      load_unit.io.prefetch_req.bits <> fuzzer.io.req.bits
601c590fb32Scz4e    })
602c590fb32Scz4e
603c590fb32Scz4e    // override hybrid_unit prefetch_req
604c590fb32Scz4e    hybridUnits.foreach(hybrid_unit => {
605c590fb32Scz4e      hybrid_unit.io.ldu_io.prefetch_req.valid <> fuzzer.io.req.valid
606c590fb32Scz4e      hybrid_unit.io.ldu_io.prefetch_req.bits <> fuzzer.io.req.bits
607c590fb32Scz4e    })
608c590fb32Scz4e
609c590fb32Scz4e    fuzzer.io.req.ready := l1_pf_req.ready
610c590fb32Scz4e  }
611c590fb32Scz4e
612c590fb32Scz4e  // TODO: fast load wakeup
613c590fb32Scz4e  val lsq     = Module(new LsqWrapper)
614c590fb32Scz4e  val sbuffer = Module(new Sbuffer)
615c590fb32Scz4e  // if you wants to stress test dcache store, use FakeSbuffer
616c590fb32Scz4e  // val sbuffer = Module(new FakeSbuffer) // out of date now
617c590fb32Scz4e  io.mem_to_ooo.stIssuePtr := lsq.io.issuePtrExt
618c590fb32Scz4e
619c590fb32Scz4e  dcache.io.hartId := io.hartId
620c590fb32Scz4e  lsq.io.hartId := io.hartId
621c590fb32Scz4e  sbuffer.io.hartId := io.hartId
622c590fb32Scz4e  atomicsUnit.io.hartId := io.hartId
623c590fb32Scz4e
624c590fb32Scz4e  dcache.io.lqEmpty := lsq.io.lqEmpty
625c590fb32Scz4e
626c590fb32Scz4e  // load/store prefetch to l2 cache
627c590fb32Scz4e  prefetcherOpt.foreach(sms_pf => {
628c590fb32Scz4e    l1PrefetcherOpt.foreach(l1_pf => {
629c590fb32Scz4e      val sms_pf_to_l2 = DelayNWithValid(sms_pf.io.l2_req, 2)
630c590fb32Scz4e      val l1_pf_to_l2 = DelayNWithValid(l1_pf.io.l2_req, 2)
631c590fb32Scz4e
632c590fb32Scz4e      outer.l2_pf_sender_opt.get.out.head._1.addr_valid := sms_pf_to_l2.valid || l1_pf_to_l2.valid
633c590fb32Scz4e      outer.l2_pf_sender_opt.get.out.head._1.addr := Mux(l1_pf_to_l2.valid, l1_pf_to_l2.bits.addr, sms_pf_to_l2.bits.addr)
634c590fb32Scz4e      outer.l2_pf_sender_opt.get.out.head._1.pf_source := Mux(l1_pf_to_l2.valid, l1_pf_to_l2.bits.source, sms_pf_to_l2.bits.source)
635c590fb32Scz4e      outer.l2_pf_sender_opt.get.out.head._1.l2_pf_en := RegNextN(io.ooo_to_mem.csrCtrl.pf_ctrl.l2_pf_enable, 2, Some(true.B))
636c590fb32Scz4e
637c590fb32Scz4e      val l2_trace = Wire(new LoadPfDbBundle)
638c590fb32Scz4e      l2_trace.paddr := outer.l2_pf_sender_opt.get.out.head._1.addr
639c590fb32Scz4e      val table = ChiselDB.createTable(s"L2PrefetchTrace$hartId", new LoadPfDbBundle, basicDB = false)
640c590fb32Scz4e      table.log(l2_trace, l1_pf_to_l2.valid, "StreamPrefetchTrace", clock, reset)
641c590fb32Scz4e      table.log(l2_trace, !l1_pf_to_l2.valid && sms_pf_to_l2.valid, "L2PrefetchTrace", clock, reset)
642c590fb32Scz4e
643c590fb32Scz4e      val l1_pf_to_l3 = ValidIODelay(l1_pf.io.l3_req, 4)
644c590fb32Scz4e      outer.l3_pf_sender_opt.foreach(_.out.head._1.addr_valid := l1_pf_to_l3.valid)
645c590fb32Scz4e      outer.l3_pf_sender_opt.foreach(_.out.head._1.addr := l1_pf_to_l3.bits)
646c590fb32Scz4e      outer.l3_pf_sender_opt.foreach(_.out.head._1.l2_pf_en := RegNextN(io.ooo_to_mem.csrCtrl.pf_ctrl.l2_pf_enable, 4, Some(true.B)))
647c590fb32Scz4e
648c590fb32Scz4e      val l3_trace = Wire(new LoadPfDbBundle)
649c590fb32Scz4e      l3_trace.paddr := outer.l3_pf_sender_opt.map(_.out.head._1.addr).getOrElse(0.U)
650c590fb32Scz4e      val l3_table = ChiselDB.createTable(s"L3PrefetchTrace$hartId", new LoadPfDbBundle, basicDB = false)
651c590fb32Scz4e      l3_table.log(l3_trace, l1_pf_to_l3.valid, "StreamPrefetchTrace", clock, reset)
652c590fb32Scz4e
653c590fb32Scz4e      XSPerfAccumulate("prefetch_fire_l2", outer.l2_pf_sender_opt.get.out.head._1.addr_valid)
654c590fb32Scz4e      XSPerfAccumulate("prefetch_fire_l3", outer.l3_pf_sender_opt.map(_.out.head._1.addr_valid).getOrElse(false.B))
655c590fb32Scz4e      XSPerfAccumulate("l1pf_fire_l2", l1_pf_to_l2.valid)
656c590fb32Scz4e      XSPerfAccumulate("sms_fire_l2", !l1_pf_to_l2.valid && sms_pf_to_l2.valid)
657c590fb32Scz4e      XSPerfAccumulate("sms_block_by_l1pf", l1_pf_to_l2.valid && sms_pf_to_l2.valid)
658c590fb32Scz4e    })
659c590fb32Scz4e  })
660c590fb32Scz4e
661c590fb32Scz4e  // ptw
662c590fb32Scz4e  val sfence = RegNext(RegNext(io.ooo_to_mem.sfence))
663c590fb32Scz4e  val tlbcsr = RegNext(RegNext(io.ooo_to_mem.tlbCsr))
664c590fb32Scz4e  private val ptw = outer.ptw.module
665c590fb32Scz4e  private val ptw_to_l2_buffer = outer.ptw_to_l2_buffer.module
666c590fb32Scz4e  private val l1d_to_l2_buffer = outer.l1d_to_l2_buffer.module
667c590fb32Scz4e  ptw.io.hartId := io.hartId
668c590fb32Scz4e  ptw.io.sfence <> sfence
669c590fb32Scz4e  ptw.io.csr.tlb <> tlbcsr
670c590fb32Scz4e  ptw.io.csr.distribute_csr <> csrCtrl.distribute_csr
671c590fb32Scz4e
672c590fb32Scz4e  val perfEventsPTW = if (!coreParams.softPTW) {
673c590fb32Scz4e    ptw.getPerfEvents
674c590fb32Scz4e  } else {
675c590fb32Scz4e    Seq()
676c590fb32Scz4e  }
677c590fb32Scz4e
678c590fb32Scz4e  // dtlb
679c590fb32Scz4e  val dtlb_ld_tlb_ld = Module(new TLBNonBlock(LduCnt + HyuCnt + 1, 2, ldtlbParams))
680c590fb32Scz4e  val dtlb_st_tlb_st = Module(new TLBNonBlock(StaCnt, 1, sttlbParams))
681c590fb32Scz4e  val dtlb_prefetch_tlb_prefetch = Module(new TLBNonBlock(2, 2, pftlbParams))
682c590fb32Scz4e  val dtlb_ld = Seq(dtlb_ld_tlb_ld.io)
683c590fb32Scz4e  val dtlb_st = Seq(dtlb_st_tlb_st.io)
684c590fb32Scz4e  val dtlb_prefetch = Seq(dtlb_prefetch_tlb_prefetch.io)
685c590fb32Scz4e  /* tlb vec && constant variable */
686c590fb32Scz4e  val dtlb = dtlb_ld ++ dtlb_st ++ dtlb_prefetch
687c590fb32Scz4e  val (dtlb_ld_idx, dtlb_st_idx, dtlb_pf_idx) = (0, 1, 2)
688c590fb32Scz4e  val TlbSubSizeVec = Seq(LduCnt + HyuCnt + 1, StaCnt, 2) // (load + hyu + stream pf, store, sms+l2bop)
689c590fb32Scz4e  val DTlbSize = TlbSubSizeVec.sum
690c590fb32Scz4e  val TlbStartVec = TlbSubSizeVec.scanLeft(0)(_ + _).dropRight(1)
691c590fb32Scz4e  val TlbEndVec = TlbSubSizeVec.scanLeft(0)(_ + _).drop(1)
692c590fb32Scz4e
693c590fb32Scz4e  val ptwio = Wire(new VectorTlbPtwIO(DTlbSize))
694c590fb32Scz4e  val dtlb_reqs = dtlb.map(_.requestor).flatten
695c590fb32Scz4e  val dtlb_pmps = dtlb.map(_.pmp).flatten
696c590fb32Scz4e  dtlb.map(_.hartId := io.hartId)
697c590fb32Scz4e  dtlb.map(_.sfence := sfence)
698c590fb32Scz4e  dtlb.map(_.csr := tlbcsr)
699c590fb32Scz4e  dtlb.map(_.flushPipe.map(a => a := false.B)) // non-block doesn't need
700c590fb32Scz4e  dtlb.map(_.redirect := redirect)
701c590fb32Scz4e  if (refillBothTlb) {
702c590fb32Scz4e    require(ldtlbParams.outReplace == sttlbParams.outReplace)
703c590fb32Scz4e    require(ldtlbParams.outReplace == hytlbParams.outReplace)
704c590fb32Scz4e    require(ldtlbParams.outReplace == pftlbParams.outReplace)
705c590fb32Scz4e    require(ldtlbParams.outReplace)
706c590fb32Scz4e
707c590fb32Scz4e    val replace = Module(new TlbReplace(DTlbSize, ldtlbParams))
708c590fb32Scz4e    replace.io.apply_sep(dtlb_ld.map(_.replace) ++ dtlb_st.map(_.replace) ++ dtlb_prefetch.map(_.replace), ptwio.resp.bits.data.s1.entry.tag)
709c590fb32Scz4e  } else {
710c590fb32Scz4e    // TODO: there will be bugs in TlbReplace when outReplace enable, since the order of Hyu is not right.
711c590fb32Scz4e    if (ldtlbParams.outReplace) {
712c590fb32Scz4e      val replace_ld = Module(new TlbReplace(LduCnt + 1, ldtlbParams))
713c590fb32Scz4e      replace_ld.io.apply_sep(dtlb_ld.map(_.replace), ptwio.resp.bits.data.s1.entry.tag)
714c590fb32Scz4e    }
715c590fb32Scz4e    if (hytlbParams.outReplace) {
716c590fb32Scz4e      val replace_hy = Module(new TlbReplace(HyuCnt, hytlbParams))
717c590fb32Scz4e      replace_hy.io.apply_sep(dtlb_ld.map(_.replace), ptwio.resp.bits.data.s1.entry.tag)
718c590fb32Scz4e    }
719c590fb32Scz4e    if (sttlbParams.outReplace) {
720c590fb32Scz4e      val replace_st = Module(new TlbReplace(StaCnt, sttlbParams))
721c590fb32Scz4e      replace_st.io.apply_sep(dtlb_st.map(_.replace), ptwio.resp.bits.data.s1.entry.tag)
722c590fb32Scz4e    }
723c590fb32Scz4e    if (pftlbParams.outReplace) {
724c590fb32Scz4e      val replace_pf = Module(new TlbReplace(2, pftlbParams))
725c590fb32Scz4e      replace_pf.io.apply_sep(dtlb_prefetch.map(_.replace), ptwio.resp.bits.data.s1.entry.tag)
726c590fb32Scz4e    }
727c590fb32Scz4e  }
728c590fb32Scz4e
729c590fb32Scz4e  val ptw_resp_next = RegEnable(ptwio.resp.bits, ptwio.resp.valid)
730c590fb32Scz4e  val ptw_resp_v = RegNext(ptwio.resp.valid && !(sfence.valid && tlbcsr.satp.changed && tlbcsr.vsatp.changed && tlbcsr.hgatp.changed), init = false.B)
731c590fb32Scz4e  ptwio.resp.ready := true.B
732c590fb32Scz4e
733c590fb32Scz4e  val tlbreplay = WireInit(VecInit(Seq.fill(LdExuCnt)(false.B)))
734c590fb32Scz4e  val tlbreplay_reg = GatedValidRegNext(tlbreplay)
735c590fb32Scz4e  val dtlb_ld0_tlbreplay_reg = GatedValidRegNext(dtlb_ld(0).tlbreplay)
736c590fb32Scz4e
737c590fb32Scz4e  if (backendParams.debugEn){ dontTouch(tlbreplay) }
738c590fb32Scz4e
739c590fb32Scz4e  for (i <- 0 until LdExuCnt) {
740c590fb32Scz4e    tlbreplay(i) := dtlb_ld(0).ptw.req(i).valid && ptw_resp_next.vector(0) && ptw_resp_v &&
741c590fb32Scz4e      ptw_resp_next.data.hit(dtlb_ld(0).ptw.req(i).bits.vpn, tlbcsr.satp.asid, tlbcsr.vsatp.asid, tlbcsr.hgatp.vmid, allType = true, ignoreAsid = true)
742c590fb32Scz4e  }
743c590fb32Scz4e
744c590fb32Scz4e  dtlb.flatMap(a => a.ptw.req)
745c590fb32Scz4e    .zipWithIndex
746c590fb32Scz4e    .foreach{ case (tlb, i) =>
747c590fb32Scz4e      tlb.ready := ptwio.req(i).ready
748c590fb32Scz4e      ptwio.req(i).bits := tlb.bits
749c590fb32Scz4e    val vector_hit = if (refillBothTlb) Cat(ptw_resp_next.vector).orR
750c590fb32Scz4e      else if (i < TlbEndVec(dtlb_ld_idx)) Cat(ptw_resp_next.vector.slice(TlbStartVec(dtlb_ld_idx), TlbEndVec(dtlb_ld_idx))).orR
751c590fb32Scz4e      else if (i < TlbEndVec(dtlb_st_idx)) Cat(ptw_resp_next.vector.slice(TlbStartVec(dtlb_st_idx), TlbEndVec(dtlb_st_idx))).orR
752c590fb32Scz4e      else                                 Cat(ptw_resp_next.vector.slice(TlbStartVec(dtlb_pf_idx), TlbEndVec(dtlb_pf_idx))).orR
753c590fb32Scz4e    ptwio.req(i).valid := tlb.valid && !(ptw_resp_v && vector_hit && ptw_resp_next.data.hit(tlb.bits.vpn, tlbcsr.satp.asid, tlbcsr.vsatp.asid, tlbcsr.hgatp.vmid, allType = true, ignoreAsid = true))
754c590fb32Scz4e  }
755c590fb32Scz4e  dtlb.foreach(_.ptw.resp.bits := ptw_resp_next.data)
756c590fb32Scz4e  if (refillBothTlb) {
757c590fb32Scz4e    dtlb.foreach(_.ptw.resp.valid := ptw_resp_v && Cat(ptw_resp_next.vector).orR)
758c590fb32Scz4e  } else {
759c590fb32Scz4e    dtlb_ld.foreach(_.ptw.resp.valid := ptw_resp_v && Cat(ptw_resp_next.vector.slice(TlbStartVec(dtlb_ld_idx), TlbEndVec(dtlb_ld_idx))).orR)
760c590fb32Scz4e    dtlb_st.foreach(_.ptw.resp.valid := ptw_resp_v && Cat(ptw_resp_next.vector.slice(TlbStartVec(dtlb_st_idx), TlbEndVec(dtlb_st_idx))).orR)
761c590fb32Scz4e    dtlb_prefetch.foreach(_.ptw.resp.valid := ptw_resp_v && Cat(ptw_resp_next.vector.slice(TlbStartVec(dtlb_pf_idx), TlbEndVec(dtlb_pf_idx))).orR)
762c590fb32Scz4e  }
763c590fb32Scz4e  dtlb_ld.foreach(_.ptw.resp.bits.getGpa := Cat(ptw_resp_next.getGpa.take(LduCnt + HyuCnt + 1)).orR)
764c590fb32Scz4e  dtlb_st.foreach(_.ptw.resp.bits.getGpa := Cat(ptw_resp_next.getGpa.slice(LduCnt + HyuCnt + 1, LduCnt + HyuCnt + 1 + StaCnt)).orR)
765c590fb32Scz4e  dtlb_prefetch.foreach(_.ptw.resp.bits.getGpa := Cat(ptw_resp_next.getGpa.drop(LduCnt + HyuCnt + 1 + StaCnt)).orR)
766c590fb32Scz4e
767c590fb32Scz4e  val dtlbRepeater  = PTWNewFilter(ldtlbParams.fenceDelay, ptwio, ptw.io.tlb(1), sfence, tlbcsr, l2tlbParams.dfilterSize)
768c590fb32Scz4e  val itlbRepeater3 = PTWRepeaterNB(passReady = false, itlbParams.fenceDelay, io.fetch_to_mem.itlb, ptw.io.tlb(0), sfence, tlbcsr)
769c590fb32Scz4e
770c590fb32Scz4e  lsq.io.debugTopDown.robHeadMissInDTlb := dtlbRepeater.io.rob_head_miss_in_tlb
771c590fb32Scz4e
772c590fb32Scz4e  // pmp
773c590fb32Scz4e  val pmp = Module(new PMP())
774c590fb32Scz4e  pmp.io.distribute_csr <> csrCtrl.distribute_csr
775c590fb32Scz4e
776c590fb32Scz4e  val pmp_checkers = Seq.fill(DTlbSize)(Module(new PMPChecker(4, leaveHitMux = true)))
777c590fb32Scz4e  val pmp_check = pmp_checkers.map(_.io)
778c590fb32Scz4e  for ((p,d) <- pmp_check zip dtlb_pmps) {
7798882eb68SXin Tian    if (HasBitmapCheck) {
7808882eb68SXin Tian      p.apply(tlbcsr.mbmc.CMODE.asBool, tlbcsr.priv.dmode, pmp.io.pmp, pmp.io.pma, d)
7818882eb68SXin Tian    } else {
782c590fb32Scz4e      p.apply(tlbcsr.priv.dmode, pmp.io.pmp, pmp.io.pma, d)
7838882eb68SXin Tian    }
784c590fb32Scz4e    require(p.req.bits.size.getWidth == d.bits.size.getWidth)
785c590fb32Scz4e  }
786c590fb32Scz4e
787c590fb32Scz4e  for (i <- 0 until LduCnt) {
788c590fb32Scz4e    io.debug_ls.debugLsInfo(i) := loadUnits(i).io.debug_ls
789c590fb32Scz4e  }
790c590fb32Scz4e  for (i <- 0 until HyuCnt) {
791c590fb32Scz4e    io.debug_ls.debugLsInfo.drop(LduCnt)(i) := hybridUnits(i).io.ldu_io.debug_ls
792c590fb32Scz4e  }
793c590fb32Scz4e  for (i <- 0 until StaCnt) {
794c590fb32Scz4e    io.debug_ls.debugLsInfo.drop(LduCnt + HyuCnt)(i) := storeUnits(i).io.debug_ls
795c590fb32Scz4e  }
796c590fb32Scz4e  for (i <- 0 until HyuCnt) {
797c590fb32Scz4e    io.debug_ls.debugLsInfo.drop(LduCnt + HyuCnt + StaCnt)(i) := hybridUnits(i).io.stu_io.debug_ls
798c590fb32Scz4e  }
799c590fb32Scz4e
800c590fb32Scz4e  io.mem_to_ooo.lsTopdownInfo := loadUnits.map(_.io.lsTopdownInfo) ++ hybridUnits.map(_.io.ldu_io.lsTopdownInfo)
801c590fb32Scz4e
802c590fb32Scz4e  // trigger
803c590fb32Scz4e  val tdata = RegInit(VecInit(Seq.fill(TriggerNum)(0.U.asTypeOf(new MatchTriggerIO))))
804c590fb32Scz4e  val tEnable = RegInit(VecInit(Seq.fill(TriggerNum)(false.B)))
805c590fb32Scz4e  tEnable := csrCtrl.mem_trigger.tEnableVec
806c590fb32Scz4e  when(csrCtrl.mem_trigger.tUpdate.valid) {
807c590fb32Scz4e    tdata(csrCtrl.mem_trigger.tUpdate.bits.addr) := csrCtrl.mem_trigger.tUpdate.bits.tdata
808c590fb32Scz4e  }
809c590fb32Scz4e  val triggerCanRaiseBpExp = csrCtrl.mem_trigger.triggerCanRaiseBpExp
810c590fb32Scz4e  val debugMode = csrCtrl.mem_trigger.debugMode
811c590fb32Scz4e
812c590fb32Scz4e  val backendTriggerTimingVec = VecInit(tdata.map(_.timing))
813c590fb32Scz4e  val backendTriggerChainVec = VecInit(tdata.map(_.chain))
814c590fb32Scz4e
815c590fb32Scz4e  XSDebug(tEnable.asUInt.orR, "Debug Mode: At least one store trigger is enabled\n")
816c590fb32Scz4e  for (j <- 0 until TriggerNum)
817c590fb32Scz4e    PrintTriggerInfo(tEnable(j), tdata(j))
818c590fb32Scz4e
819c590fb32Scz4e  // The segment instruction is executed atomically.
820c590fb32Scz4e  // After the segment instruction directive starts executing, no other instructions should be executed.
821c590fb32Scz4e  val vSegmentFlag = RegInit(false.B)
822c590fb32Scz4e
823c590fb32Scz4e  when(GatedValidRegNext(vSegmentUnit.io.in.fire)) {
824c590fb32Scz4e    vSegmentFlag := true.B
825c590fb32Scz4e  }.elsewhen(GatedValidRegNext(vSegmentUnit.io.uopwriteback.valid)) {
826c590fb32Scz4e    vSegmentFlag := false.B
827c590fb32Scz4e  }
828c590fb32Scz4e
829522c7f99SAnzo  val misalign_allow_spec = RegInit(true.B)
830522c7f99SAnzo  val ldu_rollback_with_misalign_nack = loadUnits.map(ldu =>
831522c7f99SAnzo    ldu.io.lsq.ldin.bits.isFrmMisAlignBuf && ldu.io.lsq.ldin.bits.rep_info.rar_nack && ldu.io.rollback.valid
832522c7f99SAnzo  ).reduce(_ || _)
833522c7f99SAnzo  when (ldu_rollback_with_misalign_nack) {
834522c7f99SAnzo    misalign_allow_spec := false.B
835522c7f99SAnzo  } .elsewhen(lsq.io.rarValidCount < (LoadQueueRARSize - 4).U) {
836522c7f99SAnzo    misalign_allow_spec := true.B
837522c7f99SAnzo  }
838522c7f99SAnzo
839c590fb32Scz4e  // LoadUnit
840c590fb32Scz4e  val correctMissTrain = Constantin.createRecord(s"CorrectMissTrain$hartId", initValue = false)
841c590fb32Scz4e
842c590fb32Scz4e  for (i <- 0 until LduCnt) {
843c590fb32Scz4e    loadUnits(i).io.redirect <> redirect
844522c7f99SAnzo    loadUnits(i).io.misalign_allow_spec := misalign_allow_spec
845c590fb32Scz4e
846c590fb32Scz4e    // get input form dispatch
847c590fb32Scz4e    loadUnits(i).io.ldin <> io.ooo_to_mem.issueLda(i)
848c590fb32Scz4e    loadUnits(i).io.feedback_slow <> io.mem_to_ooo.ldaIqFeedback(i).feedbackSlow
849c590fb32Scz4e    io.mem_to_ooo.ldaIqFeedback(i).feedbackFast := DontCare
850c590fb32Scz4e    loadUnits(i).io.correctMissTrain := correctMissTrain
851c590fb32Scz4e    io.mem_to_ooo.ldCancel.drop(HyuCnt)(i) := loadUnits(i).io.ldCancel
852c590fb32Scz4e    io.mem_to_ooo.wakeup.drop(HyuCnt)(i) := loadUnits(i).io.wakeup
853c590fb32Scz4e
854c590fb32Scz4e    // vector
855c590fb32Scz4e    if (i < VlduCnt) {
856c590fb32Scz4e      loadUnits(i).io.vecldout.ready := false.B
857c590fb32Scz4e    } else {
858c590fb32Scz4e      loadUnits(i).io.vecldin.valid := false.B
859c590fb32Scz4e      loadUnits(i).io.vecldin.bits := DontCare
860c590fb32Scz4e      loadUnits(i).io.vecldout.ready := false.B
861c590fb32Scz4e    }
862c590fb32Scz4e
863c590fb32Scz4e    // fast replay
864c590fb32Scz4e    loadUnits(i).io.fast_rep_in <> loadUnits(i).io.fast_rep_out
865c590fb32Scz4e
866c590fb32Scz4e    // SoftPrefetch to frontend (prefetch.i)
867c590fb32Scz4e    loadUnits(i).io.ifetchPrefetch <> io.ifetchPrefetch(i)
868c590fb32Scz4e
869c590fb32Scz4e    // dcache access
870c590fb32Scz4e    loadUnits(i).io.dcache <> dcache.io.lsu.load(i)
871c590fb32Scz4e    if(i == 0){
872c590fb32Scz4e      vSegmentUnit.io.rdcache := DontCare
873c590fb32Scz4e      dcache.io.lsu.load(i).req.valid := loadUnits(i).io.dcache.req.valid || vSegmentUnit.io.rdcache.req.valid
874c590fb32Scz4e      dcache.io.lsu.load(i).req.bits  := Mux1H(Seq(
875c590fb32Scz4e        vSegmentUnit.io.rdcache.req.valid -> vSegmentUnit.io.rdcache.req.bits,
876c590fb32Scz4e        loadUnits(i).io.dcache.req.valid -> loadUnits(i).io.dcache.req.bits
877c590fb32Scz4e      ))
878c590fb32Scz4e      vSegmentUnit.io.rdcache.req.ready := dcache.io.lsu.load(i).req.ready
879c590fb32Scz4e    }
880c590fb32Scz4e
881c590fb32Scz4e    // Dcache requests must also be preempted by the segment.
882c590fb32Scz4e    when(vSegmentFlag){
883c590fb32Scz4e      loadUnits(i).io.dcache.req.ready             := false.B // Dcache is preempted.
884c590fb32Scz4e
885c590fb32Scz4e      dcache.io.lsu.load(0).pf_source              := vSegmentUnit.io.rdcache.pf_source
886c590fb32Scz4e      dcache.io.lsu.load(0).s1_paddr_dup_lsu       := vSegmentUnit.io.rdcache.s1_paddr_dup_lsu
887c590fb32Scz4e      dcache.io.lsu.load(0).s1_paddr_dup_dcache    := vSegmentUnit.io.rdcache.s1_paddr_dup_dcache
888c590fb32Scz4e      dcache.io.lsu.load(0).s1_kill                := vSegmentUnit.io.rdcache.s1_kill
889c590fb32Scz4e      dcache.io.lsu.load(0).s2_kill                := vSegmentUnit.io.rdcache.s2_kill
890c590fb32Scz4e      dcache.io.lsu.load(0).s0_pc                  := vSegmentUnit.io.rdcache.s0_pc
891c590fb32Scz4e      dcache.io.lsu.load(0).s1_pc                  := vSegmentUnit.io.rdcache.s1_pc
892c590fb32Scz4e      dcache.io.lsu.load(0).s2_pc                  := vSegmentUnit.io.rdcache.s2_pc
893c590fb32Scz4e      dcache.io.lsu.load(0).is128Req               := vSegmentUnit.io.rdcache.is128Req
894c590fb32Scz4e    }.otherwise {
895c590fb32Scz4e      loadUnits(i).io.dcache.req.ready             := dcache.io.lsu.load(i).req.ready
896c590fb32Scz4e
897c590fb32Scz4e      dcache.io.lsu.load(0).pf_source              := loadUnits(0).io.dcache.pf_source
898c590fb32Scz4e      dcache.io.lsu.load(0).s1_paddr_dup_lsu       := loadUnits(0).io.dcache.s1_paddr_dup_lsu
899c590fb32Scz4e      dcache.io.lsu.load(0).s1_paddr_dup_dcache    := loadUnits(0).io.dcache.s1_paddr_dup_dcache
900c590fb32Scz4e      dcache.io.lsu.load(0).s1_kill                := loadUnits(0).io.dcache.s1_kill
901c590fb32Scz4e      dcache.io.lsu.load(0).s2_kill                := loadUnits(0).io.dcache.s2_kill
902c590fb32Scz4e      dcache.io.lsu.load(0).s0_pc                  := loadUnits(0).io.dcache.s0_pc
903c590fb32Scz4e      dcache.io.lsu.load(0).s1_pc                  := loadUnits(0).io.dcache.s1_pc
904c590fb32Scz4e      dcache.io.lsu.load(0).s2_pc                  := loadUnits(0).io.dcache.s2_pc
905c590fb32Scz4e      dcache.io.lsu.load(0).is128Req               := loadUnits(0).io.dcache.is128Req
906c590fb32Scz4e    }
907c590fb32Scz4e
908c590fb32Scz4e    // forward
909c590fb32Scz4e    loadUnits(i).io.lsq.forward <> lsq.io.forward(i)
910c590fb32Scz4e    loadUnits(i).io.sbuffer <> sbuffer.io.forward(i)
911c590fb32Scz4e    loadUnits(i).io.ubuffer <> uncache.io.forward(i)
912c590fb32Scz4e    loadUnits(i).io.tl_d_channel := dcache.io.lsu.forward_D(i)
913c590fb32Scz4e    loadUnits(i).io.forward_mshr <> dcache.io.lsu.forward_mshr(i)
914c590fb32Scz4e    // ld-ld violation check
915c590fb32Scz4e    loadUnits(i).io.lsq.ldld_nuke_query <> lsq.io.ldu.ldld_nuke_query(i)
916c590fb32Scz4e    loadUnits(i).io.lsq.stld_nuke_query <> lsq.io.ldu.stld_nuke_query(i)
917522c7f99SAnzo    // loadqueue old ptr
918522c7f99SAnzo    loadUnits(i).io.lsq.lqDeqPtr := lsq.io.lqDeqPtr
919c590fb32Scz4e    loadUnits(i).io.csrCtrl       <> csrCtrl
920c590fb32Scz4e    // dcache refill req
921c590fb32Scz4e  // loadUnits(i).io.refill           <> delayedDcacheRefill
922c590fb32Scz4e    // dtlb
923c590fb32Scz4e    loadUnits(i).io.tlb <> dtlb_reqs.take(LduCnt)(i)
924c590fb32Scz4e    if(i == 0 ){ // port 0 assign to vsegmentUnit
925c590fb32Scz4e      val vsegmentDtlbReqValid = vSegmentUnit.io.dtlb.req.valid // segment tlb resquest need to delay 1 cycle
926c590fb32Scz4e      dtlb_reqs.take(LduCnt)(i).req.valid := loadUnits(i).io.tlb.req.valid || RegNext(vsegmentDtlbReqValid)
927c590fb32Scz4e      vSegmentUnit.io.dtlb.req.ready      := dtlb_reqs.take(LduCnt)(i).req.ready
928c590fb32Scz4e      dtlb_reqs.take(LduCnt)(i).req.bits  := ParallelPriorityMux(Seq(
929c590fb32Scz4e        RegNext(vsegmentDtlbReqValid)     -> RegEnable(vSegmentUnit.io.dtlb.req.bits, vsegmentDtlbReqValid),
930c590fb32Scz4e        loadUnits(i).io.tlb.req.valid     -> loadUnits(i).io.tlb.req.bits
931c590fb32Scz4e      ))
932c590fb32Scz4e    }
933c590fb32Scz4e    // pmp
934c590fb32Scz4e    loadUnits(i).io.pmp <> pmp_check(i).resp
935c590fb32Scz4e    // st-ld violation query
936c590fb32Scz4e    val stld_nuke_query = storeUnits.map(_.io.stld_nuke_query) ++ hybridUnits.map(_.io.stu_io.stld_nuke_query)
937c590fb32Scz4e    for (s <- 0 until StorePipelineWidth) {
938c590fb32Scz4e      loadUnits(i).io.stld_nuke_query(s) := stld_nuke_query(s)
939c590fb32Scz4e    }
940c590fb32Scz4e    loadUnits(i).io.lq_rep_full <> lsq.io.lq_rep_full
941c590fb32Scz4e    // load prefetch train
942c590fb32Scz4e    prefetcherOpt.foreach(pf => {
943c590fb32Scz4e      // sms will train on all miss load sources
944c590fb32Scz4e      val source = loadUnits(i).io.prefetch_train
945c590fb32Scz4e      pf.io.ld_in(i).valid := Mux(pf_train_on_hit,
946c590fb32Scz4e        source.valid,
947c590fb32Scz4e        source.valid && source.bits.isFirstIssue && source.bits.miss
948c590fb32Scz4e      )
949c590fb32Scz4e      pf.io.ld_in(i).bits := source.bits
950c590fb32Scz4e      val loadPc = RegNext(io.ooo_to_mem.issueLda(i).bits.uop.pc) // for s1
951c590fb32Scz4e      pf.io.ld_in(i).bits.uop.pc := Mux(
952c590fb32Scz4e        loadUnits(i).io.s2_ptr_chasing,
953c590fb32Scz4e        RegEnable(loadPc, loadUnits(i).io.s2_prefetch_spec),
954c590fb32Scz4e        RegEnable(RegEnable(loadPc, loadUnits(i).io.s1_prefetch_spec), loadUnits(i).io.s2_prefetch_spec)
955c590fb32Scz4e      )
956c590fb32Scz4e    })
957c590fb32Scz4e    l1PrefetcherOpt.foreach(pf => {
958c590fb32Scz4e      // stream will train on all load sources
959c590fb32Scz4e      val source = loadUnits(i).io.prefetch_train_l1
960c590fb32Scz4e      pf.io.ld_in(i).valid := source.valid && source.bits.isFirstIssue
961c590fb32Scz4e      pf.io.ld_in(i).bits := source.bits
962c590fb32Scz4e    })
963c590fb32Scz4e
964c590fb32Scz4e    // load to load fast forward: load(i) prefers data(i)
965c590fb32Scz4e    val l2l_fwd_out = loadUnits.map(_.io.l2l_fwd_out) ++ hybridUnits.map(_.io.ldu_io.l2l_fwd_out)
966c590fb32Scz4e    val fastPriority = (i until LduCnt + HyuCnt) ++ (0 until i)
967c590fb32Scz4e    val fastValidVec = fastPriority.map(j => l2l_fwd_out(j).valid)
968c590fb32Scz4e    val fastDataVec = fastPriority.map(j => l2l_fwd_out(j).data)
969c590fb32Scz4e    val fastErrorVec = fastPriority.map(j => l2l_fwd_out(j).dly_ld_err)
970c590fb32Scz4e    val fastMatchVec = fastPriority.map(j => io.ooo_to_mem.loadFastMatch(i)(j))
971c590fb32Scz4e    loadUnits(i).io.l2l_fwd_in.valid := VecInit(fastValidVec).asUInt.orR
972c590fb32Scz4e    loadUnits(i).io.l2l_fwd_in.data := ParallelPriorityMux(fastValidVec, fastDataVec)
973c590fb32Scz4e    loadUnits(i).io.l2l_fwd_in.dly_ld_err := ParallelPriorityMux(fastValidVec, fastErrorVec)
974c590fb32Scz4e    val fastMatch = ParallelPriorityMux(fastValidVec, fastMatchVec)
975c590fb32Scz4e    loadUnits(i).io.ld_fast_match := fastMatch
976c590fb32Scz4e    loadUnits(i).io.ld_fast_imm := io.ooo_to_mem.loadFastImm(i)
977c590fb32Scz4e    loadUnits(i).io.ld_fast_fuOpType := io.ooo_to_mem.loadFastFuOpType(i)
978c590fb32Scz4e    loadUnits(i).io.replay <> lsq.io.replay(i)
979c590fb32Scz4e
980c590fb32Scz4e    val l2_hint = RegNext(io.l2_hint)
981c590fb32Scz4e
982c590fb32Scz4e    // L2 Hint for DCache
983c590fb32Scz4e    dcache.io.l2_hint <> l2_hint
984c590fb32Scz4e
985c590fb32Scz4e    loadUnits(i).io.l2_hint <> l2_hint
986c590fb32Scz4e    loadUnits(i).io.tlb_hint.id := dtlbRepeater.io.hint.get.req(i).id
987c590fb32Scz4e    loadUnits(i).io.tlb_hint.full := dtlbRepeater.io.hint.get.req(i).full ||
988c590fb32Scz4e      tlbreplay_reg(i) || dtlb_ld0_tlbreplay_reg(i)
989c590fb32Scz4e
990c590fb32Scz4e    // passdown to lsq (load s2)
991c590fb32Scz4e    lsq.io.ldu.ldin(i) <> loadUnits(i).io.lsq.ldin
992c590fb32Scz4e    if (i == UncacheWBPort) {
993c590fb32Scz4e      lsq.io.ldout(i) <> loadUnits(i).io.lsq.uncache
994c590fb32Scz4e    } else {
995c590fb32Scz4e      lsq.io.ldout(i).ready := true.B
996c590fb32Scz4e      loadUnits(i).io.lsq.uncache.valid := false.B
997c590fb32Scz4e      loadUnits(i).io.lsq.uncache.bits := DontCare
998c590fb32Scz4e    }
999c590fb32Scz4e    lsq.io.ld_raw_data(i) <> loadUnits(i).io.lsq.ld_raw_data
1000c590fb32Scz4e    lsq.io.ncOut(i) <> loadUnits(i).io.lsq.nc_ldin
1001c590fb32Scz4e    lsq.io.l2_hint.valid := l2_hint.valid
1002c590fb32Scz4e    lsq.io.l2_hint.bits.sourceId := l2_hint.bits.sourceId
1003c590fb32Scz4e    lsq.io.l2_hint.bits.isKeyword := l2_hint.bits.isKeyword
1004c590fb32Scz4e
1005c590fb32Scz4e    lsq.io.tlb_hint <> dtlbRepeater.io.hint.get
1006c590fb32Scz4e
1007c590fb32Scz4e    // connect misalignBuffer
10084ec1f462Scz4e    loadMisalignBuffer.io.enq(i) <> loadUnits(i).io.misalign_enq
1009c590fb32Scz4e
1010c590fb32Scz4e    if (i == MisalignWBPort) {
1011c590fb32Scz4e      loadUnits(i).io.misalign_ldin  <> loadMisalignBuffer.io.splitLoadReq
1012c590fb32Scz4e      loadUnits(i).io.misalign_ldout <> loadMisalignBuffer.io.splitLoadResp
1013c590fb32Scz4e    } else {
1014c590fb32Scz4e      loadUnits(i).io.misalign_ldin.valid := false.B
1015c590fb32Scz4e      loadUnits(i).io.misalign_ldin.bits := DontCare
1016c590fb32Scz4e    }
1017c590fb32Scz4e
1018c590fb32Scz4e    // alter writeback exception info
1019c590fb32Scz4e    io.mem_to_ooo.s3_delayed_load_error(i) := loadUnits(i).io.s3_dly_ld_err
1020c590fb32Scz4e
1021c590fb32Scz4e    // update mem dependency predictor
1022c590fb32Scz4e    // io.memPredUpdate(i) := DontCare
1023c590fb32Scz4e
1024c590fb32Scz4e    // --------------------------------
1025c590fb32Scz4e    // Load Triggers
1026c590fb32Scz4e    // --------------------------------
1027c590fb32Scz4e    loadUnits(i).io.fromCsrTrigger.tdataVec := tdata
1028c590fb32Scz4e    loadUnits(i).io.fromCsrTrigger.tEnableVec := tEnable
1029c590fb32Scz4e    loadUnits(i).io.fromCsrTrigger.triggerCanRaiseBpExp := triggerCanRaiseBpExp
1030c590fb32Scz4e    loadUnits(i).io.fromCsrTrigger.debugMode := debugMode
1031c590fb32Scz4e  }
1032c590fb32Scz4e
1033c590fb32Scz4e  for (i <- 0 until HyuCnt) {
1034c590fb32Scz4e    hybridUnits(i).io.redirect <> redirect
1035c590fb32Scz4e
1036c590fb32Scz4e    // get input from dispatch
1037c590fb32Scz4e    hybridUnits(i).io.lsin <> io.ooo_to_mem.issueHya(i)
1038c590fb32Scz4e    hybridUnits(i).io.feedback_slow <> io.mem_to_ooo.hyuIqFeedback(i).feedbackSlow
1039c590fb32Scz4e    hybridUnits(i).io.feedback_fast <> io.mem_to_ooo.hyuIqFeedback(i).feedbackFast
1040c590fb32Scz4e    hybridUnits(i).io.correctMissTrain := correctMissTrain
1041c590fb32Scz4e    io.mem_to_ooo.ldCancel.take(HyuCnt)(i) := hybridUnits(i).io.ldu_io.ldCancel
1042c590fb32Scz4e    io.mem_to_ooo.wakeup.take(HyuCnt)(i) := hybridUnits(i).io.ldu_io.wakeup
1043c590fb32Scz4e
1044c590fb32Scz4e    // ------------------------------------
1045c590fb32Scz4e    //  Load Port
1046c590fb32Scz4e    // ------------------------------------
1047c590fb32Scz4e    // fast replay
1048c590fb32Scz4e    hybridUnits(i).io.ldu_io.fast_rep_in <> hybridUnits(i).io.ldu_io.fast_rep_out
1049c590fb32Scz4e
1050c590fb32Scz4e    // get input from dispatch
1051c590fb32Scz4e    hybridUnits(i).io.ldu_io.dcache <> dcache.io.lsu.load(LduCnt + i)
1052c590fb32Scz4e    hybridUnits(i).io.stu_io.dcache <> dcache.io.lsu.sta(StaCnt + i)
1053c590fb32Scz4e
1054c590fb32Scz4e    // dcache access
1055c590fb32Scz4e    hybridUnits(i).io.ldu_io.lsq.forward <> lsq.io.forward(LduCnt + i)
1056c590fb32Scz4e    // forward
1057c590fb32Scz4e    hybridUnits(i).io.ldu_io.sbuffer <> sbuffer.io.forward(LduCnt + i)
1058c590fb32Scz4e    hybridUnits(i).io.ldu_io.ubuffer <> uncache.io.forward(LduCnt + i)
1059c590fb32Scz4e    // hybridUnits(i).io.ldu_io.vec_forward <> vsFlowQueue.io.forward(LduCnt + i)
1060c590fb32Scz4e    hybridUnits(i).io.ldu_io.vec_forward := DontCare
1061c590fb32Scz4e    hybridUnits(i).io.ldu_io.tl_d_channel := dcache.io.lsu.forward_D(LduCnt + i)
1062c590fb32Scz4e    hybridUnits(i).io.ldu_io.forward_mshr <> dcache.io.lsu.forward_mshr(LduCnt + i)
1063c590fb32Scz4e    // ld-ld violation check
1064c590fb32Scz4e    hybridUnits(i).io.ldu_io.lsq.ldld_nuke_query <> lsq.io.ldu.ldld_nuke_query(LduCnt + i)
1065c590fb32Scz4e    hybridUnits(i).io.ldu_io.lsq.stld_nuke_query <> lsq.io.ldu.stld_nuke_query(LduCnt + i)
1066c590fb32Scz4e    hybridUnits(i).io.csrCtrl <> csrCtrl
1067c590fb32Scz4e    // dcache refill req
1068c590fb32Scz4e    hybridUnits(i).io.ldu_io.tlb_hint.id := dtlbRepeater.io.hint.get.req(LduCnt + i).id
1069c590fb32Scz4e    hybridUnits(i).io.ldu_io.tlb_hint.full := dtlbRepeater.io.hint.get.req(LduCnt + i).full ||
1070c590fb32Scz4e      tlbreplay_reg(LduCnt + i) || dtlb_ld0_tlbreplay_reg(LduCnt + i)
1071c590fb32Scz4e
1072c590fb32Scz4e    // dtlb
1073c590fb32Scz4e    hybridUnits(i).io.tlb <> dtlb_ld.head.requestor(LduCnt + i)
1074c590fb32Scz4e    // pmp
1075c590fb32Scz4e    hybridUnits(i).io.pmp <> pmp_check.drop(LduCnt)(i).resp
1076c590fb32Scz4e    // st-ld violation query
1077c590fb32Scz4e    val stld_nuke_query = VecInit(storeUnits.map(_.io.stld_nuke_query) ++ hybridUnits.map(_.io.stu_io.stld_nuke_query))
1078c590fb32Scz4e    hybridUnits(i).io.ldu_io.stld_nuke_query := stld_nuke_query
1079c590fb32Scz4e    hybridUnits(i).io.ldu_io.lq_rep_full <> lsq.io.lq_rep_full
1080c590fb32Scz4e    // load prefetch train
1081c590fb32Scz4e    prefetcherOpt.foreach(pf => {
1082c590fb32Scz4e      val source = hybridUnits(i).io.prefetch_train
1083c590fb32Scz4e      pf.io.ld_in(LduCnt + i).valid := Mux(pf_train_on_hit,
1084c590fb32Scz4e        source.valid,
1085c590fb32Scz4e        source.valid && source.bits.isFirstIssue && source.bits.miss
1086c590fb32Scz4e      )
1087c590fb32Scz4e      pf.io.ld_in(LduCnt + i).bits := source.bits
1088c590fb32Scz4e      pf.io.ld_in(LduCnt + i).bits.uop.pc := Mux(hybridUnits(i).io.ldu_io.s2_ptr_chasing, io.ooo_to_mem.hybridPc(i), RegNext(io.ooo_to_mem.hybridPc(i)))
1089c590fb32Scz4e    })
1090c590fb32Scz4e    l1PrefetcherOpt.foreach(pf => {
1091c590fb32Scz4e      // stream will train on all load sources
1092c590fb32Scz4e      val source = hybridUnits(i).io.prefetch_train_l1
1093c590fb32Scz4e      pf.io.ld_in(LduCnt + i).valid := source.valid && source.bits.isFirstIssue &&
1094c590fb32Scz4e                                       FuType.isLoad(source.bits.uop.fuType)
1095c590fb32Scz4e      pf.io.ld_in(LduCnt + i).bits := source.bits
1096c590fb32Scz4e      pf.io.st_in(StaCnt + i).valid := false.B
1097c590fb32Scz4e      pf.io.st_in(StaCnt + i).bits := DontCare
1098c590fb32Scz4e    })
1099c590fb32Scz4e    prefetcherOpt.foreach(pf => {
1100c590fb32Scz4e      val source = hybridUnits(i).io.prefetch_train
1101c590fb32Scz4e      pf.io.st_in(StaCnt + i).valid := Mux(pf_train_on_hit,
1102c590fb32Scz4e        source.valid,
1103c590fb32Scz4e        source.valid && source.bits.isFirstIssue && source.bits.miss
1104c590fb32Scz4e      ) && FuType.isStore(source.bits.uop.fuType)
1105c590fb32Scz4e      pf.io.st_in(StaCnt + i).bits := source.bits
1106c590fb32Scz4e      pf.io.st_in(StaCnt + i).bits.uop.pc := RegNext(io.ooo_to_mem.hybridPc(i))
1107c590fb32Scz4e    })
1108c590fb32Scz4e
1109c590fb32Scz4e    // load to load fast forward: load(i) prefers data(i)
1110c590fb32Scz4e    val l2l_fwd_out = loadUnits.map(_.io.l2l_fwd_out) ++ hybridUnits.map(_.io.ldu_io.l2l_fwd_out)
1111c590fb32Scz4e    val fastPriority = (LduCnt + i until LduCnt + HyuCnt) ++ (0 until LduCnt + i)
1112c590fb32Scz4e    val fastValidVec = fastPriority.map(j => l2l_fwd_out(j).valid)
1113c590fb32Scz4e    val fastDataVec = fastPriority.map(j => l2l_fwd_out(j).data)
1114c590fb32Scz4e    val fastErrorVec = fastPriority.map(j => l2l_fwd_out(j).dly_ld_err)
1115c590fb32Scz4e    val fastMatchVec = fastPriority.map(j => io.ooo_to_mem.loadFastMatch(LduCnt + i)(j))
1116c590fb32Scz4e    hybridUnits(i).io.ldu_io.l2l_fwd_in.valid := VecInit(fastValidVec).asUInt.orR
1117c590fb32Scz4e    hybridUnits(i).io.ldu_io.l2l_fwd_in.data := ParallelPriorityMux(fastValidVec, fastDataVec)
1118c590fb32Scz4e    hybridUnits(i).io.ldu_io.l2l_fwd_in.dly_ld_err := ParallelPriorityMux(fastValidVec, fastErrorVec)
1119c590fb32Scz4e    val fastMatch = ParallelPriorityMux(fastValidVec, fastMatchVec)
1120c590fb32Scz4e    hybridUnits(i).io.ldu_io.ld_fast_match := fastMatch
1121c590fb32Scz4e    hybridUnits(i).io.ldu_io.ld_fast_imm := io.ooo_to_mem.loadFastImm(LduCnt + i)
1122c590fb32Scz4e    hybridUnits(i).io.ldu_io.ld_fast_fuOpType := io.ooo_to_mem.loadFastFuOpType(LduCnt + i)
1123c590fb32Scz4e    hybridUnits(i).io.ldu_io.replay <> lsq.io.replay(LduCnt + i)
1124c590fb32Scz4e    hybridUnits(i).io.ldu_io.l2_hint <> io.l2_hint
1125c590fb32Scz4e
1126c590fb32Scz4e    // uncache
1127c590fb32Scz4e    lsq.io.ldout.drop(LduCnt)(i) <> hybridUnits(i).io.ldu_io.lsq.uncache
1128c590fb32Scz4e    lsq.io.ld_raw_data.drop(LduCnt)(i) <> hybridUnits(i).io.ldu_io.lsq.ld_raw_data
1129c590fb32Scz4e
1130c590fb32Scz4e
1131c590fb32Scz4e    // passdown to lsq (load s2)
1132c590fb32Scz4e    hybridUnits(i).io.ldu_io.lsq.nc_ldin.valid := false.B
1133c590fb32Scz4e    hybridUnits(i).io.ldu_io.lsq.nc_ldin.bits := DontCare
1134c590fb32Scz4e    lsq.io.ldu.ldin(LduCnt + i) <> hybridUnits(i).io.ldu_io.lsq.ldin
1135c590fb32Scz4e    // Lsq to sta unit
1136c590fb32Scz4e    lsq.io.sta.storeMaskIn(StaCnt + i) <> hybridUnits(i).io.stu_io.st_mask_out
1137c590fb32Scz4e
1138c590fb32Scz4e    // Lsq to std unit's rs
1139c590fb32Scz4e    lsq.io.std.storeDataIn(StaCnt + i) := stData(StaCnt + i)
1140c590fb32Scz4e    lsq.io.std.storeDataIn(StaCnt + i).valid := stData(StaCnt + i).valid && !st_data_atomics(StaCnt + i)
1141c590fb32Scz4e    // prefetch
1142c590fb32Scz4e    hybridUnits(i).io.stu_io.prefetch_req <> sbuffer.io.store_prefetch(StaCnt + i)
1143c590fb32Scz4e
1144c590fb32Scz4e    io.mem_to_ooo.s3_delayed_load_error(LduCnt + i) := hybridUnits(i).io.ldu_io.s3_dly_ld_err
1145c590fb32Scz4e
1146c590fb32Scz4e    // ------------------------------------
1147c590fb32Scz4e    //  Store Port
1148c590fb32Scz4e    // ------------------------------------
1149c590fb32Scz4e    hybridUnits(i).io.stu_io.lsq <> lsq.io.sta.storeAddrIn.takeRight(HyuCnt)(i)
1150c590fb32Scz4e    hybridUnits(i).io.stu_io.lsq_replenish <> lsq.io.sta.storeAddrInRe.takeRight(HyuCnt)(i)
1151c590fb32Scz4e
1152c590fb32Scz4e    lsq.io.sta.storeMaskIn.takeRight(HyuCnt)(i) <> hybridUnits(i).io.stu_io.st_mask_out
1153c590fb32Scz4e    io.mem_to_ooo.stIn.takeRight(HyuCnt)(i).valid := hybridUnits(i).io.stu_io.issue.valid
1154c590fb32Scz4e    io.mem_to_ooo.stIn.takeRight(HyuCnt)(i).bits := hybridUnits(i).io.stu_io.issue.bits
1155c590fb32Scz4e
1156c590fb32Scz4e    // ------------------------------------
1157c590fb32Scz4e    //  Vector Store Port
1158c590fb32Scz4e    // ------------------------------------
1159c590fb32Scz4e    hybridUnits(i).io.vec_stu_io.isFirstIssue := true.B
1160c590fb32Scz4e
1161c590fb32Scz4e    // -------------------------
1162c590fb32Scz4e    // Store Triggers
1163c590fb32Scz4e    // -------------------------
1164c590fb32Scz4e    hybridUnits(i).io.fromCsrTrigger.tdataVec := tdata
1165c590fb32Scz4e    hybridUnits(i).io.fromCsrTrigger.tEnableVec := tEnable
1166c590fb32Scz4e    hybridUnits(i).io.fromCsrTrigger.triggerCanRaiseBpExp := triggerCanRaiseBpExp
1167c590fb32Scz4e    hybridUnits(i).io.fromCsrTrigger.debugMode := debugMode
1168c590fb32Scz4e  }
1169c590fb32Scz4e
1170c590fb32Scz4e  // misalignBuffer
1171c590fb32Scz4e  loadMisalignBuffer.io.redirect                <> redirect
1172c590fb32Scz4e  loadMisalignBuffer.io.rob.lcommit             := io.ooo_to_mem.lsqio.lcommit
1173c590fb32Scz4e  loadMisalignBuffer.io.rob.scommit             := io.ooo_to_mem.lsqio.scommit
1174c590fb32Scz4e  loadMisalignBuffer.io.rob.pendingMMIOld       := io.ooo_to_mem.lsqio.pendingMMIOld
1175c590fb32Scz4e  loadMisalignBuffer.io.rob.pendingld           := io.ooo_to_mem.lsqio.pendingld
1176c590fb32Scz4e  loadMisalignBuffer.io.rob.pendingst           := io.ooo_to_mem.lsqio.pendingst
1177c590fb32Scz4e  loadMisalignBuffer.io.rob.pendingVst          := io.ooo_to_mem.lsqio.pendingVst
1178c590fb32Scz4e  loadMisalignBuffer.io.rob.commit              := io.ooo_to_mem.lsqio.commit
1179c590fb32Scz4e  loadMisalignBuffer.io.rob.pendingPtr          := io.ooo_to_mem.lsqio.pendingPtr
1180c590fb32Scz4e  loadMisalignBuffer.io.rob.pendingPtrNext      := io.ooo_to_mem.lsqio.pendingPtrNext
1181c590fb32Scz4e
1182c590fb32Scz4e  lsq.io.loadMisalignFull                       := loadMisalignBuffer.io.loadMisalignFull
1183522c7f99SAnzo  lsq.io.misalignAllowSpec                      := misalign_allow_spec
1184c590fb32Scz4e
1185c590fb32Scz4e  storeMisalignBuffer.io.redirect               <> redirect
1186c590fb32Scz4e  storeMisalignBuffer.io.rob.lcommit            := io.ooo_to_mem.lsqio.lcommit
1187c590fb32Scz4e  storeMisalignBuffer.io.rob.scommit            := io.ooo_to_mem.lsqio.scommit
1188c590fb32Scz4e  storeMisalignBuffer.io.rob.pendingMMIOld      := io.ooo_to_mem.lsqio.pendingMMIOld
1189c590fb32Scz4e  storeMisalignBuffer.io.rob.pendingld          := io.ooo_to_mem.lsqio.pendingld
1190c590fb32Scz4e  storeMisalignBuffer.io.rob.pendingst          := io.ooo_to_mem.lsqio.pendingst
1191c590fb32Scz4e  storeMisalignBuffer.io.rob.pendingVst         := io.ooo_to_mem.lsqio.pendingVst
1192c590fb32Scz4e  storeMisalignBuffer.io.rob.commit             := io.ooo_to_mem.lsqio.commit
1193c590fb32Scz4e  storeMisalignBuffer.io.rob.pendingPtr         := io.ooo_to_mem.lsqio.pendingPtr
1194c590fb32Scz4e  storeMisalignBuffer.io.rob.pendingPtrNext     := io.ooo_to_mem.lsqio.pendingPtrNext
1195c590fb32Scz4e
1196c590fb32Scz4e  lsq.io.maControl                              <> storeMisalignBuffer.io.sqControl
1197c590fb32Scz4e
1198c590fb32Scz4e  lsq.io.cmoOpReq <> dcache.io.cmoOpReq
1199c590fb32Scz4e  lsq.io.cmoOpResp <> dcache.io.cmoOpResp
1200c590fb32Scz4e
1201c590fb32Scz4e  // Prefetcher
1202c590fb32Scz4e  val StreamDTLBPortIndex = TlbStartVec(dtlb_ld_idx) + LduCnt + HyuCnt
1203c590fb32Scz4e  val PrefetcherDTLBPortIndex = TlbStartVec(dtlb_pf_idx)
1204c590fb32Scz4e  val L2toL1DLBPortIndex = TlbStartVec(dtlb_pf_idx) + 1
1205c590fb32Scz4e  prefetcherOpt match {
1206c590fb32Scz4e  case Some(pf) =>
1207c590fb32Scz4e    dtlb_reqs(PrefetcherDTLBPortIndex) <> pf.io.tlb_req
1208c590fb32Scz4e    pf.io.pmp_resp := pmp_check(PrefetcherDTLBPortIndex).resp
1209c590fb32Scz4e  case None =>
1210c590fb32Scz4e    dtlb_reqs(PrefetcherDTLBPortIndex) := DontCare
1211c590fb32Scz4e    dtlb_reqs(PrefetcherDTLBPortIndex).req.valid := false.B
1212c590fb32Scz4e    dtlb_reqs(PrefetcherDTLBPortIndex).resp.ready := true.B
1213c590fb32Scz4e  }
1214c590fb32Scz4e  l1PrefetcherOpt match {
1215c590fb32Scz4e    case Some(pf) =>
1216c590fb32Scz4e      dtlb_reqs(StreamDTLBPortIndex) <> pf.io.tlb_req
1217c590fb32Scz4e      pf.io.pmp_resp := pmp_check(StreamDTLBPortIndex).resp
1218c590fb32Scz4e    case None =>
1219c590fb32Scz4e        dtlb_reqs(StreamDTLBPortIndex) := DontCare
1220c590fb32Scz4e        dtlb_reqs(StreamDTLBPortIndex).req.valid := false.B
1221c590fb32Scz4e        dtlb_reqs(StreamDTLBPortIndex).resp.ready := true.B
1222c590fb32Scz4e  }
1223c590fb32Scz4e  dtlb_reqs(L2toL1DLBPortIndex) <> io.l2_tlb_req
1224c590fb32Scz4e  dtlb_reqs(L2toL1DLBPortIndex).resp.ready := true.B
1225c590fb32Scz4e  io.l2_pmp_resp := pmp_check(L2toL1DLBPortIndex).resp
1226c590fb32Scz4e
1227c590fb32Scz4e  // StoreUnit
1228c590fb32Scz4e  for (i <- 0 until StdCnt) {
1229c590fb32Scz4e    stdExeUnits(i).io.flush <> redirect
1230c590fb32Scz4e    stdExeUnits(i).io.in.valid := io.ooo_to_mem.issueStd(i).valid
1231c590fb32Scz4e    io.ooo_to_mem.issueStd(i).ready := stdExeUnits(i).io.in.ready
1232c590fb32Scz4e    stdExeUnits(i).io.in.bits := io.ooo_to_mem.issueStd(i).bits
1233c590fb32Scz4e  }
1234c590fb32Scz4e
1235c590fb32Scz4e  for (i <- 0 until StaCnt) {
1236c590fb32Scz4e    val stu = storeUnits(i)
1237c590fb32Scz4e
1238c590fb32Scz4e    stu.io.redirect      <> redirect
1239c590fb32Scz4e    stu.io.csrCtrl       <> csrCtrl
1240c590fb32Scz4e    stu.io.dcache        <> dcache.io.lsu.sta(i)
1241c590fb32Scz4e    stu.io.feedback_slow <> io.mem_to_ooo.staIqFeedback(i).feedbackSlow
1242c590fb32Scz4e    stu.io.stin         <> io.ooo_to_mem.issueSta(i)
1243c590fb32Scz4e    stu.io.lsq          <> lsq.io.sta.storeAddrIn(i)
1244c590fb32Scz4e    stu.io.lsq_replenish <> lsq.io.sta.storeAddrInRe(i)
1245c590fb32Scz4e    // dtlb
1246c590fb32Scz4e    stu.io.tlb          <> dtlb_st.head.requestor(i)
1247c590fb32Scz4e    stu.io.pmp          <> pmp_check(LduCnt + HyuCnt + 1 + i).resp
1248c590fb32Scz4e
1249c590fb32Scz4e    // -------------------------
1250c590fb32Scz4e    // Store Triggers
1251c590fb32Scz4e    // -------------------------
1252c590fb32Scz4e    stu.io.fromCsrTrigger.tdataVec := tdata
1253c590fb32Scz4e    stu.io.fromCsrTrigger.tEnableVec := tEnable
1254c590fb32Scz4e    stu.io.fromCsrTrigger.triggerCanRaiseBpExp := triggerCanRaiseBpExp
1255c590fb32Scz4e    stu.io.fromCsrTrigger.debugMode := debugMode
1256c590fb32Scz4e
1257c590fb32Scz4e    // prefetch
1258c590fb32Scz4e    stu.io.prefetch_req <> sbuffer.io.store_prefetch(i)
1259c590fb32Scz4e
1260c590fb32Scz4e    // store unit does not need fast feedback
1261c590fb32Scz4e    io.mem_to_ooo.staIqFeedback(i).feedbackFast := DontCare
1262c590fb32Scz4e
1263c590fb32Scz4e    // Lsq to sta unit
1264c590fb32Scz4e    lsq.io.sta.storeMaskIn(i) <> stu.io.st_mask_out
1265c590fb32Scz4e
1266c590fb32Scz4e    // connect misalignBuffer
12674ec1f462Scz4e    storeMisalignBuffer.io.enq(i) <> stu.io.misalign_enq
1268c590fb32Scz4e
1269c590fb32Scz4e    if (i == 0) {
1270c590fb32Scz4e      stu.io.misalign_stin  <> storeMisalignBuffer.io.splitStoreReq
1271c590fb32Scz4e      stu.io.misalign_stout <> storeMisalignBuffer.io.splitStoreResp
1272c590fb32Scz4e    } else {
1273c590fb32Scz4e      stu.io.misalign_stin.valid := false.B
1274c590fb32Scz4e      stu.io.misalign_stin.bits := DontCare
1275c590fb32Scz4e    }
1276c590fb32Scz4e
1277c590fb32Scz4e    // Lsq to std unit's rs
1278c590fb32Scz4e    if (i < VstuCnt){
1279c590fb32Scz4e      when (vsSplit(i).io.vstd.get.valid) {
1280c590fb32Scz4e        lsq.io.std.storeDataIn(i).valid := true.B
1281c590fb32Scz4e        lsq.io.std.storeDataIn(i).bits := vsSplit(i).io.vstd.get.bits
1282c590fb32Scz4e        stData(i).ready := false.B
1283c590fb32Scz4e      }.otherwise {
1284c590fb32Scz4e        lsq.io.std.storeDataIn(i).valid := stData(i).valid && !st_data_atomics(i)
1285c590fb32Scz4e        lsq.io.std.storeDataIn(i).bits.uop := stData(i).bits.uop
1286c590fb32Scz4e        lsq.io.std.storeDataIn(i).bits.data := stData(i).bits.data
1287c590fb32Scz4e        lsq.io.std.storeDataIn(i).bits.mask.map(_ := 0.U)
1288c590fb32Scz4e        lsq.io.std.storeDataIn(i).bits.vdIdx.map(_ := 0.U)
1289c590fb32Scz4e        lsq.io.std.storeDataIn(i).bits.vdIdxInField.map(_ := 0.U)
1290c590fb32Scz4e        stData(i).ready := true.B
1291c590fb32Scz4e      }
1292c590fb32Scz4e    } else {
1293c590fb32Scz4e        lsq.io.std.storeDataIn(i).valid := stData(i).valid && !st_data_atomics(i)
1294c590fb32Scz4e        lsq.io.std.storeDataIn(i).bits.uop := stData(i).bits.uop
1295c590fb32Scz4e        lsq.io.std.storeDataIn(i).bits.data := stData(i).bits.data
1296c590fb32Scz4e        lsq.io.std.storeDataIn(i).bits.mask.map(_ := 0.U)
1297c590fb32Scz4e        lsq.io.std.storeDataIn(i).bits.vdIdx.map(_ := 0.U)
1298c590fb32Scz4e        lsq.io.std.storeDataIn(i).bits.vdIdxInField.map(_ := 0.U)
1299c590fb32Scz4e        stData(i).ready := true.B
1300c590fb32Scz4e    }
1301c590fb32Scz4e    lsq.io.std.storeDataIn.map(_.bits.debug := 0.U.asTypeOf(new DebugBundle))
1302c590fb32Scz4e    lsq.io.std.storeDataIn.foreach(_.bits.isFromLoadUnit := DontCare)
1303c590fb32Scz4e
1304c590fb32Scz4e
1305c590fb32Scz4e    // store prefetch train
1306c590fb32Scz4e    l1PrefetcherOpt.foreach(pf => {
1307c590fb32Scz4e      // stream will train on all load sources
1308c590fb32Scz4e      pf.io.st_in(i).valid := false.B
1309c590fb32Scz4e      pf.io.st_in(i).bits := DontCare
1310c590fb32Scz4e    })
1311c590fb32Scz4e
1312c590fb32Scz4e    prefetcherOpt.foreach(pf => {
1313c590fb32Scz4e      pf.io.st_in(i).valid := Mux(pf_train_on_hit,
1314c590fb32Scz4e        stu.io.prefetch_train.valid,
1315c590fb32Scz4e        stu.io.prefetch_train.valid && stu.io.prefetch_train.bits.isFirstIssue && (
1316c590fb32Scz4e          stu.io.prefetch_train.bits.miss
1317c590fb32Scz4e          )
1318c590fb32Scz4e      )
1319c590fb32Scz4e      pf.io.st_in(i).bits := stu.io.prefetch_train.bits
1320c590fb32Scz4e      pf.io.st_in(i).bits.uop.pc := RegEnable(RegEnable(io.ooo_to_mem.storePc(i), stu.io.s1_prefetch_spec), stu.io.s2_prefetch_spec)
1321c590fb32Scz4e    })
1322c590fb32Scz4e
1323c590fb32Scz4e    // 1. sync issue info to store set LFST
1324c590fb32Scz4e    // 2. when store issue, broadcast issued sqPtr to wake up the following insts
1325c590fb32Scz4e    // io.stIn(i).valid := io.issue(exuParameters.LduCnt + i).valid
1326c590fb32Scz4e    // io.stIn(i).bits := io.issue(exuParameters.LduCnt + i).bits
1327c590fb32Scz4e    io.mem_to_ooo.stIn(i).valid := stu.io.issue.valid
1328c590fb32Scz4e    io.mem_to_ooo.stIn(i).bits := stu.io.issue.bits
1329c590fb32Scz4e
1330c590fb32Scz4e    stu.io.stout.ready := true.B
1331c590fb32Scz4e
1332c590fb32Scz4e    // vector
1333c590fb32Scz4e    if (i < VstuCnt) {
1334c590fb32Scz4e      stu.io.vecstin <> vsSplit(i).io.out
1335c590fb32Scz4e      // vsFlowQueue.io.pipeFeedback(i) <> stu.io.vec_feedback_slow // need connect
1336c590fb32Scz4e    } else {
1337c590fb32Scz4e      stu.io.vecstin.valid := false.B
1338c590fb32Scz4e      stu.io.vecstin.bits := DontCare
1339c590fb32Scz4e      stu.io.vecstout.ready := false.B
1340c590fb32Scz4e    }
1341c590fb32Scz4e    stu.io.vec_isFirstIssue := true.B // TODO
1342c590fb32Scz4e  }
1343c590fb32Scz4e
13443c808de0SAnzo  val sqOtherStout = WireInit(0.U.asTypeOf(DecoupledIO(new MemExuOutput)))
13453c808de0SAnzo  sqOtherStout.valid := lsq.io.mmioStout.valid || lsq.io.cboZeroStout.valid
13463c808de0SAnzo  sqOtherStout.bits  := Mux(lsq.io.cboZeroStout.valid, lsq.io.cboZeroStout.bits, lsq.io.mmioStout.bits)
13473c808de0SAnzo  assert(!(lsq.io.mmioStout.valid && lsq.io.cboZeroStout.valid), "Cannot writeback to mmio and cboZero at the same time.")
13483c808de0SAnzo
13493c808de0SAnzo  // Store writeback by StoreQueue:
13503c808de0SAnzo  //   1. cbo Zero
13513c808de0SAnzo  //   2. mmio
13523c808de0SAnzo  // Currently, the two should not be present at the same time, so simply make cbo zero a higher priority.
13533c808de0SAnzo  val otherStout = WireInit(0.U.asTypeOf(lsq.io.mmioStout))
1354c590fb32Scz4e  NewPipelineConnect(
13553c808de0SAnzo    sqOtherStout, otherStout, otherStout.fire,
1356c590fb32Scz4e    false.B,
13573c808de0SAnzo    Option("otherStoutConnect")
1358c590fb32Scz4e  )
13593c808de0SAnzo  otherStout.ready := false.B
13603c808de0SAnzo  when (otherStout.valid && !storeUnits(0).io.stout.valid) {
1361c590fb32Scz4e    stOut(0).valid := true.B
13623c808de0SAnzo    stOut(0).bits  := otherStout.bits
13633c808de0SAnzo    otherStout.ready := true.B
1364c590fb32Scz4e  }
13653c808de0SAnzo  lsq.io.mmioStout.ready := sqOtherStout.ready
13663c808de0SAnzo  lsq.io.cboZeroStout.ready := sqOtherStout.ready
1367c590fb32Scz4e
1368c590fb32Scz4e  // vec mmio writeback
1369c590fb32Scz4e  lsq.io.vecmmioStout.ready := false.B
1370c590fb32Scz4e
1371c590fb32Scz4e  // miss align buffer will overwrite stOut(0)
13723c808de0SAnzo  val storeMisalignCanWriteBack = !otherStout.valid && !storeUnits(0).io.stout.valid && !storeUnits(0).io.vecstout.valid
1373c590fb32Scz4e  storeMisalignBuffer.io.writeBack.ready := storeMisalignCanWriteBack
1374c590fb32Scz4e  storeMisalignBuffer.io.storeOutValid := storeUnits(0).io.stout.valid
1375c590fb32Scz4e  storeMisalignBuffer.io.storeVecOutValid := storeUnits(0).io.vecstout.valid
1376c590fb32Scz4e  when (storeMisalignBuffer.io.writeBack.valid && storeMisalignCanWriteBack) {
1377c590fb32Scz4e    stOut(0).valid := true.B
1378c590fb32Scz4e    stOut(0).bits  := storeMisalignBuffer.io.writeBack.bits
1379c590fb32Scz4e  }
1380c590fb32Scz4e
1381c590fb32Scz4e  // Uncache
1382c590fb32Scz4e  uncache.io.enableOutstanding := io.ooo_to_mem.csrCtrl.uncache_write_outstanding_enable
1383c590fb32Scz4e  uncache.io.hartId := io.hartId
1384c590fb32Scz4e  lsq.io.uncacheOutstanding := io.ooo_to_mem.csrCtrl.uncache_write_outstanding_enable
1385c590fb32Scz4e
1386c590fb32Scz4e  // Lsq
1387c590fb32Scz4e  io.mem_to_ooo.lsqio.mmio       := lsq.io.rob.mmio
1388c590fb32Scz4e  io.mem_to_ooo.lsqio.uop        := lsq.io.rob.uop
1389c590fb32Scz4e  lsq.io.rob.lcommit             := io.ooo_to_mem.lsqio.lcommit
1390c590fb32Scz4e  lsq.io.rob.scommit             := io.ooo_to_mem.lsqio.scommit
1391c590fb32Scz4e  lsq.io.rob.pendingMMIOld       := io.ooo_to_mem.lsqio.pendingMMIOld
1392c590fb32Scz4e  lsq.io.rob.pendingld           := io.ooo_to_mem.lsqio.pendingld
1393c590fb32Scz4e  lsq.io.rob.pendingst           := io.ooo_to_mem.lsqio.pendingst
1394c590fb32Scz4e  lsq.io.rob.pendingVst          := io.ooo_to_mem.lsqio.pendingVst
1395c590fb32Scz4e  lsq.io.rob.commit              := io.ooo_to_mem.lsqio.commit
1396c590fb32Scz4e  lsq.io.rob.pendingPtr          := io.ooo_to_mem.lsqio.pendingPtr
1397c590fb32Scz4e  lsq.io.rob.pendingPtrNext      := io.ooo_to_mem.lsqio.pendingPtrNext
1398c590fb32Scz4e
1399c590fb32Scz4e  //  lsq.io.rob            <> io.lsqio.rob
1400c590fb32Scz4e  lsq.io.enq            <> io.ooo_to_mem.enqLsq
1401c590fb32Scz4e  lsq.io.brqRedirect    <> redirect
1402c590fb32Scz4e
1403c590fb32Scz4e  //  violation rollback
1404c590fb32Scz4e  def selectOldestRedirect(xs: Seq[Valid[Redirect]]): Vec[Bool] = {
1405c590fb32Scz4e    val compareVec = (0 until xs.length).map(i => (0 until i).map(j => isAfter(xs(j).bits.robIdx, xs(i).bits.robIdx)))
1406c590fb32Scz4e    val resultOnehot = VecInit((0 until xs.length).map(i => Cat((0 until xs.length).map(j =>
1407c590fb32Scz4e      (if (j < i) !xs(j).valid || compareVec(i)(j)
1408c590fb32Scz4e      else if (j == i) xs(i).valid
1409c590fb32Scz4e      else !xs(j).valid || !compareVec(j)(i))
1410c590fb32Scz4e    )).andR))
1411c590fb32Scz4e    resultOnehot
1412c590fb32Scz4e  }
1413c590fb32Scz4e  val allRedirect = loadUnits.map(_.io.rollback) ++ hybridUnits.map(_.io.ldu_io.rollback) ++ lsq.io.nack_rollback ++ lsq.io.nuke_rollback
1414c590fb32Scz4e  val oldestOneHot = selectOldestRedirect(allRedirect)
1415c590fb32Scz4e  val oldestRedirect = WireDefault(Mux1H(oldestOneHot, allRedirect))
1416c590fb32Scz4e  // memory replay would not cause IAF/IPF/IGPF
1417c590fb32Scz4e  oldestRedirect.bits.cfiUpdate.backendIAF := false.B
1418c590fb32Scz4e  oldestRedirect.bits.cfiUpdate.backendIPF := false.B
1419c590fb32Scz4e  oldestRedirect.bits.cfiUpdate.backendIGPF := false.B
1420c590fb32Scz4e  io.mem_to_ooo.memoryViolation := oldestRedirect
1421c590fb32Scz4e  io.mem_to_ooo.lsqio.lqCanAccept  := lsq.io.lqCanAccept
1422c590fb32Scz4e  io.mem_to_ooo.lsqio.sqCanAccept  := lsq.io.sqCanAccept
1423c590fb32Scz4e
1424c590fb32Scz4e  // lsq.io.uncache        <> uncache.io.lsq
1425c590fb32Scz4e  val s_idle :: s_scalar_uncache :: s_vector_uncache :: Nil = Enum(3)
1426c590fb32Scz4e  val uncacheState = RegInit(s_idle)
1427c590fb32Scz4e  val uncacheReq = Wire(Decoupled(new UncacheWordReq))
1428c590fb32Scz4e  val uncacheIdResp = uncache.io.lsq.idResp
1429c590fb32Scz4e  val uncacheResp = Wire(Decoupled(new UncacheWordResp))
1430c590fb32Scz4e
1431c590fb32Scz4e  uncacheReq.bits := DontCare
1432c590fb32Scz4e  uncacheReq.valid := false.B
1433c590fb32Scz4e  uncacheReq.ready := false.B
1434c590fb32Scz4e  uncacheResp.bits := DontCare
1435c590fb32Scz4e  uncacheResp.valid := false.B
1436c590fb32Scz4e  uncacheResp.ready := false.B
1437c590fb32Scz4e  lsq.io.uncache.req.ready := false.B
1438c590fb32Scz4e  lsq.io.uncache.idResp.valid := false.B
1439c590fb32Scz4e  lsq.io.uncache.idResp.bits := DontCare
1440c590fb32Scz4e  lsq.io.uncache.resp.valid := false.B
1441c590fb32Scz4e  lsq.io.uncache.resp.bits := DontCare
1442c590fb32Scz4e
1443c590fb32Scz4e  switch (uncacheState) {
1444c590fb32Scz4e    is (s_idle) {
1445c590fb32Scz4e      when (uncacheReq.fire) {
1446c590fb32Scz4e        when (lsq.io.uncache.req.valid) {
1447c590fb32Scz4e          when (!lsq.io.uncache.req.bits.nc || !io.ooo_to_mem.csrCtrl.uncache_write_outstanding_enable) {
1448c590fb32Scz4e            uncacheState := s_scalar_uncache
1449c590fb32Scz4e          }
1450c590fb32Scz4e        }.otherwise {
1451c590fb32Scz4e          // val isStore = vsFlowQueue.io.uncache.req.bits.cmd === MemoryOpConstants.M_XWR
1452c590fb32Scz4e          when (!io.ooo_to_mem.csrCtrl.uncache_write_outstanding_enable) {
1453c590fb32Scz4e            uncacheState := s_vector_uncache
1454c590fb32Scz4e          }
1455c590fb32Scz4e        }
1456c590fb32Scz4e      }
1457c590fb32Scz4e    }
1458c590fb32Scz4e
1459c590fb32Scz4e    is (s_scalar_uncache) {
1460c590fb32Scz4e      when (uncacheResp.fire) {
1461c590fb32Scz4e        uncacheState := s_idle
1462c590fb32Scz4e      }
1463c590fb32Scz4e    }
1464c590fb32Scz4e
1465c590fb32Scz4e    is (s_vector_uncache) {
1466c590fb32Scz4e      when (uncacheResp.fire) {
1467c590fb32Scz4e        uncacheState := s_idle
1468c590fb32Scz4e      }
1469c590fb32Scz4e    }
1470c590fb32Scz4e  }
1471c590fb32Scz4e
1472c590fb32Scz4e  when (lsq.io.uncache.req.valid) {
1473c590fb32Scz4e    uncacheReq <> lsq.io.uncache.req
1474c590fb32Scz4e  }
1475c590fb32Scz4e  when (io.ooo_to_mem.csrCtrl.uncache_write_outstanding_enable) {
1476c590fb32Scz4e    lsq.io.uncache.resp <> uncacheResp
1477c590fb32Scz4e    lsq.io.uncache.idResp <> uncacheIdResp
1478c590fb32Scz4e  }.otherwise {
1479c590fb32Scz4e    when (uncacheState === s_scalar_uncache) {
1480c590fb32Scz4e      lsq.io.uncache.resp <> uncacheResp
1481c590fb32Scz4e      lsq.io.uncache.idResp <> uncacheIdResp
1482c590fb32Scz4e    }
1483c590fb32Scz4e  }
1484c590fb32Scz4e  // delay dcache refill for 1 cycle for better timing
1485c590fb32Scz4e  AddPipelineReg(uncacheReq, uncache.io.lsq.req, false.B)
1486c590fb32Scz4e  AddPipelineReg(uncache.io.lsq.resp, uncacheResp, false.B)
1487c590fb32Scz4e
1488c590fb32Scz4e  //lsq.io.refill         := delayedDcacheRefill
1489c590fb32Scz4e  lsq.io.release        := dcache.io.lsu.release
1490c590fb32Scz4e  lsq.io.lqCancelCnt <> io.mem_to_ooo.lqCancelCnt
1491c590fb32Scz4e  lsq.io.sqCancelCnt <> io.mem_to_ooo.sqCancelCnt
1492c590fb32Scz4e  lsq.io.lqDeq <> io.mem_to_ooo.lqDeq
1493c590fb32Scz4e  lsq.io.sqDeq <> io.mem_to_ooo.sqDeq
1494c590fb32Scz4e  // Todo: assign these
1495c590fb32Scz4e  io.mem_to_ooo.sqDeqPtr := lsq.io.sqDeqPtr
1496c590fb32Scz4e  io.mem_to_ooo.lqDeqPtr := lsq.io.lqDeqPtr
1497c590fb32Scz4e  lsq.io.tl_d_channel <> dcache.io.lsu.tl_d_channel
1498c590fb32Scz4e
1499c590fb32Scz4e  // LSQ to store buffer
1500c590fb32Scz4e  lsq.io.sbuffer        <> sbuffer.io.in
1501c590fb32Scz4e  sbuffer.io.in(0).valid := lsq.io.sbuffer(0).valid || vSegmentUnit.io.sbuffer.valid
1502c590fb32Scz4e  sbuffer.io.in(0).bits  := Mux1H(Seq(
1503c590fb32Scz4e    vSegmentUnit.io.sbuffer.valid -> vSegmentUnit.io.sbuffer.bits,
1504c590fb32Scz4e    lsq.io.sbuffer(0).valid       -> lsq.io.sbuffer(0).bits
1505c590fb32Scz4e  ))
1506c590fb32Scz4e  vSegmentUnit.io.sbuffer.ready := sbuffer.io.in(0).ready
1507c590fb32Scz4e  lsq.io.sqEmpty        <> sbuffer.io.sqempty
1508c590fb32Scz4e  dcache.io.force_write := lsq.io.force_write
1509c590fb32Scz4e
1510c590fb32Scz4e  // Initialize when unenabled difftest.
1511c590fb32Scz4e  sbuffer.io.vecDifftestInfo      := DontCare
1512c590fb32Scz4e  lsq.io.sbufferVecDifftestInfo   := DontCare
1513c590fb32Scz4e  vSegmentUnit.io.vecDifftestInfo := DontCare
1514c590fb32Scz4e  if (env.EnableDifftest) {
1515c590fb32Scz4e    sbuffer.io.vecDifftestInfo .zipWithIndex.map{ case (sbufferPort, index) =>
1516c590fb32Scz4e      if (index == 0) {
1517c590fb32Scz4e        val vSegmentDifftestValid = vSegmentUnit.io.vecDifftestInfo.valid
1518c590fb32Scz4e        sbufferPort.valid := Mux(vSegmentDifftestValid, vSegmentUnit.io.vecDifftestInfo.valid, lsq.io.sbufferVecDifftestInfo(0).valid)
1519c590fb32Scz4e        sbufferPort.bits  := Mux(vSegmentDifftestValid, vSegmentUnit.io.vecDifftestInfo.bits, lsq.io.sbufferVecDifftestInfo(0).bits)
1520c590fb32Scz4e
1521c590fb32Scz4e        vSegmentUnit.io.vecDifftestInfo.ready  := sbufferPort.ready
1522c590fb32Scz4e        lsq.io.sbufferVecDifftestInfo(0).ready := sbufferPort.ready
1523c590fb32Scz4e      } else {
1524c590fb32Scz4e         sbufferPort <> lsq.io.sbufferVecDifftestInfo(index)
1525c590fb32Scz4e      }
1526c590fb32Scz4e    }
1527c590fb32Scz4e  }
1528c590fb32Scz4e
1529c590fb32Scz4e  // lsq.io.vecStoreRetire <> vsFlowQueue.io.sqRelease
1530c590fb32Scz4e  // lsq.io.vecWriteback.valid := vlWrapper.io.uopWriteback.fire &&
1531c590fb32Scz4e  //   vlWrapper.io.uopWriteback.bits.uop.vpu.lastUop
1532c590fb32Scz4e  // lsq.io.vecWriteback.bits := vlWrapper.io.uopWriteback.bits
1533c590fb32Scz4e
1534c590fb32Scz4e  // vector
1535c590fb32Scz4e  val vLoadCanAccept  = (0 until VlduCnt).map(i =>
1536c590fb32Scz4e    vlSplit(i).io.in.ready && VlduType.isVecLd(io.ooo_to_mem.issueVldu(i).bits.uop.fuOpType)
1537c590fb32Scz4e  )
1538c590fb32Scz4e  val vStoreCanAccept = (0 until VstuCnt).map(i =>
1539c590fb32Scz4e    vsSplit(i).io.in.ready && VstuType.isVecSt(io.ooo_to_mem.issueVldu(i).bits.uop.fuOpType)
1540c590fb32Scz4e  )
1541c590fb32Scz4e  val isSegment     = io.ooo_to_mem.issueVldu.head.valid && isVsegls(io.ooo_to_mem.issueVldu.head.bits.uop.fuType)
1542c590fb32Scz4e  val isFixVlUop    = io.ooo_to_mem.issueVldu.map{x =>
1543c590fb32Scz4e    x.bits.uop.vpu.isVleff && x.bits.uop.vpu.lastUop && x.valid
1544c590fb32Scz4e  }
1545c590fb32Scz4e
1546c590fb32Scz4e  // init port
1547c590fb32Scz4e  /**
1548c590fb32Scz4e   * TODO: splited vsMergebuffer maybe remove, if one RS can accept two feedback, or don't need RS replay uop
1549c590fb32Scz4e   * for now:
1550c590fb32Scz4e   *  RS0 -> VsSplit0 -> stu0 -> vsMergebuffer0 -> feedback -> RS0
1551c590fb32Scz4e   *  RS1 -> VsSplit1 -> stu1 -> vsMergebuffer1 -> feedback -> RS1
1552c590fb32Scz4e   *
1553c590fb32Scz4e   * vector load don't need feedback
1554c590fb32Scz4e   *
1555c590fb32Scz4e   *  RS0 -> VlSplit0  -> ldu0 -> |
1556c590fb32Scz4e   *  RS1 -> VlSplit1  -> ldu1 -> |  -> vlMergebuffer
1557c590fb32Scz4e   *        replayIO   -> ldu3 -> |
1558c590fb32Scz4e   * */
1559c590fb32Scz4e  (0 until VstuCnt).foreach{i =>
1560c590fb32Scz4e    vsMergeBuffer(i).io.fromPipeline := DontCare
1561c590fb32Scz4e    vsMergeBuffer(i).io.fromSplit := DontCare
1562c590fb32Scz4e
1563c590fb32Scz4e    vsMergeBuffer(i).io.fromMisalignBuffer.get.flush := storeMisalignBuffer.io.toVecStoreMergeBuffer(i).flush
1564c590fb32Scz4e    vsMergeBuffer(i).io.fromMisalignBuffer.get.mbIndex := storeMisalignBuffer.io.toVecStoreMergeBuffer(i).mbIndex
1565c590fb32Scz4e  }
1566c590fb32Scz4e
1567c590fb32Scz4e  (0 until VstuCnt).foreach{i =>
1568c590fb32Scz4e    vsSplit(i).io.redirect <> redirect
1569c590fb32Scz4e    vsSplit(i).io.in <> io.ooo_to_mem.issueVldu(i)
1570c590fb32Scz4e    vsSplit(i).io.in.valid := io.ooo_to_mem.issueVldu(i).valid &&
1571c590fb32Scz4e                              vStoreCanAccept(i) && !isSegment
1572c590fb32Scz4e    vsSplit(i).io.toMergeBuffer <> vsMergeBuffer(i).io.fromSplit.head
1573c590fb32Scz4e    NewPipelineConnect(
1574c590fb32Scz4e      vsSplit(i).io.out, storeUnits(i).io.vecstin, storeUnits(i).io.vecstin.fire,
1575c590fb32Scz4e      Mux(vsSplit(i).io.out.fire, vsSplit(i).io.out.bits.uop.robIdx.needFlush(io.redirect), storeUnits(i).io.vecstin.bits.uop.robIdx.needFlush(io.redirect)),
1576c590fb32Scz4e      Option("VsSplitConnectStu")
1577c590fb32Scz4e    )
1578c590fb32Scz4e    vsSplit(i).io.vstd.get := DontCare // Todo: Discuss how to pass vector store data
1579c590fb32Scz4e
1580c590fb32Scz4e    vsSplit(i).io.vstdMisalign.get.storeMisalignBufferEmpty := !storeMisalignBuffer.io.full
1581c590fb32Scz4e    vsSplit(i).io.vstdMisalign.get.storePipeEmpty := !storeUnits(i).io.s0_s1_valid
1582c590fb32Scz4e
1583c590fb32Scz4e  }
1584c590fb32Scz4e  (0 until VlduCnt).foreach{i =>
1585c590fb32Scz4e    vlSplit(i).io.redirect <> redirect
1586c590fb32Scz4e    vlSplit(i).io.in <> io.ooo_to_mem.issueVldu(i)
1587c590fb32Scz4e    vlSplit(i).io.in.valid := io.ooo_to_mem.issueVldu(i).valid &&
1588c590fb32Scz4e                              vLoadCanAccept(i) && !isSegment && !isFixVlUop(i)
1589c590fb32Scz4e    vlSplit(i).io.toMergeBuffer <> vlMergeBuffer.io.fromSplit(i)
1590c590fb32Scz4e    vlSplit(i).io.threshold.get.valid := vlMergeBuffer.io.toSplit.get.threshold
1591c590fb32Scz4e    vlSplit(i).io.threshold.get.bits  := lsq.io.lqDeqPtr
1592c590fb32Scz4e    NewPipelineConnect(
1593c590fb32Scz4e      vlSplit(i).io.out, loadUnits(i).io.vecldin, loadUnits(i).io.vecldin.fire,
1594c590fb32Scz4e      Mux(vlSplit(i).io.out.fire, vlSplit(i).io.out.bits.uop.robIdx.needFlush(io.redirect), loadUnits(i).io.vecldin.bits.uop.robIdx.needFlush(io.redirect)),
1595c590fb32Scz4e      Option("VlSplitConnectLdu")
1596c590fb32Scz4e    )
1597c590fb32Scz4e
1598c590fb32Scz4e    //Subsequent instrction will be blocked
1599c590fb32Scz4e    vfofBuffer.io.in(i).valid := io.ooo_to_mem.issueVldu(i).valid
1600c590fb32Scz4e    vfofBuffer.io.in(i).bits  := io.ooo_to_mem.issueVldu(i).bits
1601c590fb32Scz4e  }
1602c590fb32Scz4e  (0 until LduCnt).foreach{i=>
1603c590fb32Scz4e    loadUnits(i).io.vecldout.ready         := vlMergeBuffer.io.fromPipeline(i).ready
1604c590fb32Scz4e    loadMisalignBuffer.io.vecWriteBack.ready := true.B
1605c590fb32Scz4e
1606c590fb32Scz4e    if (i == MisalignWBPort) {
1607c590fb32Scz4e      when(loadUnits(i).io.vecldout.valid) {
1608c590fb32Scz4e        vlMergeBuffer.io.fromPipeline(i).valid := loadUnits(i).io.vecldout.valid
1609c590fb32Scz4e        vlMergeBuffer.io.fromPipeline(i).bits  := loadUnits(i).io.vecldout.bits
1610c590fb32Scz4e      } .otherwise {
1611c590fb32Scz4e        vlMergeBuffer.io.fromPipeline(i).valid   := loadMisalignBuffer.io.vecWriteBack.valid
1612c590fb32Scz4e        vlMergeBuffer.io.fromPipeline(i).bits    := loadMisalignBuffer.io.vecWriteBack.bits
1613c590fb32Scz4e      }
1614c590fb32Scz4e    } else {
1615c590fb32Scz4e      vlMergeBuffer.io.fromPipeline(i).valid := loadUnits(i).io.vecldout.valid
1616c590fb32Scz4e      vlMergeBuffer.io.fromPipeline(i).bits  := loadUnits(i).io.vecldout.bits
1617c590fb32Scz4e    }
1618c590fb32Scz4e  }
1619c590fb32Scz4e
1620c590fb32Scz4e  (0 until StaCnt).foreach{i=>
1621c590fb32Scz4e    if(i < VstuCnt){
1622c590fb32Scz4e      storeUnits(i).io.vecstout.ready := true.B
1623c590fb32Scz4e      storeMisalignBuffer.io.vecWriteBack(i).ready := vsMergeBuffer(i).io.fromPipeline.head.ready
1624c590fb32Scz4e
1625c590fb32Scz4e      when(storeUnits(i).io.vecstout.valid) {
1626c590fb32Scz4e        vsMergeBuffer(i).io.fromPipeline.head.valid := storeUnits(i).io.vecstout.valid
1627c590fb32Scz4e        vsMergeBuffer(i).io.fromPipeline.head.bits  := storeUnits(i).io.vecstout.bits
1628c590fb32Scz4e      } .otherwise {
1629c590fb32Scz4e        vsMergeBuffer(i).io.fromPipeline.head.valid   := storeMisalignBuffer.io.vecWriteBack(i).valid
1630c590fb32Scz4e        vsMergeBuffer(i).io.fromPipeline.head.bits    := storeMisalignBuffer.io.vecWriteBack(i).bits
1631c590fb32Scz4e      }
1632c590fb32Scz4e    }
1633c590fb32Scz4e  }
1634c590fb32Scz4e
1635c590fb32Scz4e  (0 until VlduCnt).foreach{i=>
1636c590fb32Scz4e    io.ooo_to_mem.issueVldu(i).ready := vLoadCanAccept(i) || vStoreCanAccept(i)
1637c590fb32Scz4e  }
1638c590fb32Scz4e
1639c590fb32Scz4e  vlMergeBuffer.io.redirect <> redirect
1640c590fb32Scz4e  vsMergeBuffer.map(_.io.redirect <> redirect)
1641c590fb32Scz4e  (0 until VlduCnt).foreach{i=>
1642c590fb32Scz4e    vlMergeBuffer.io.toLsq(i) <> lsq.io.ldvecFeedback(i)
1643c590fb32Scz4e  }
1644c590fb32Scz4e  (0 until VstuCnt).foreach{i=>
1645c590fb32Scz4e    vsMergeBuffer(i).io.toLsq.head <> lsq.io.stvecFeedback(i)
1646c590fb32Scz4e  }
1647c590fb32Scz4e
1648c590fb32Scz4e  (0 until VlduCnt).foreach{i=>
1649c590fb32Scz4e    // send to RS
1650c590fb32Scz4e    vlMergeBuffer.io.feedback(i) <> io.mem_to_ooo.vlduIqFeedback(i).feedbackSlow
1651c590fb32Scz4e    io.mem_to_ooo.vlduIqFeedback(i).feedbackFast := DontCare
1652c590fb32Scz4e  }
1653c590fb32Scz4e  (0 until VstuCnt).foreach{i =>
1654c590fb32Scz4e    // send to RS
1655c590fb32Scz4e    if (i == 0){
1656c590fb32Scz4e      io.mem_to_ooo.vstuIqFeedback(i).feedbackSlow.valid := vsMergeBuffer(i).io.feedback.head.valid || vSegmentUnit.io.feedback.valid
1657c590fb32Scz4e      io.mem_to_ooo.vstuIqFeedback(i).feedbackSlow.bits := Mux1H(Seq(
1658c590fb32Scz4e        vSegmentUnit.io.feedback.valid -> vSegmentUnit.io.feedback.bits,
1659c590fb32Scz4e        vsMergeBuffer(i).io.feedback.head.valid ->  vsMergeBuffer(i).io.feedback.head.bits
1660c590fb32Scz4e      ))
1661c590fb32Scz4e      io.mem_to_ooo.vstuIqFeedback(i).feedbackFast := DontCare
1662c590fb32Scz4e    } else {
1663c590fb32Scz4e      vsMergeBuffer(i).io.feedback.head <> io.mem_to_ooo.vstuIqFeedback(i).feedbackSlow
1664c590fb32Scz4e      io.mem_to_ooo.vstuIqFeedback(i).feedbackFast := DontCare
1665c590fb32Scz4e    }
1666c590fb32Scz4e  }
1667c590fb32Scz4e
1668c590fb32Scz4e  (0 until VlduCnt).foreach{i=>
1669c590fb32Scz4e    if (i == 0){ // for segmentUnit, segmentUnit use port0 writeback
1670c590fb32Scz4e      io.mem_to_ooo.writebackVldu(i).valid := vlMergeBuffer.io.uopWriteback(i).valid || vsMergeBuffer(i).io.uopWriteback.head.valid || vSegmentUnit.io.uopwriteback.valid
1671c590fb32Scz4e      io.mem_to_ooo.writebackVldu(i).bits := PriorityMux(Seq(
1672c590fb32Scz4e        vSegmentUnit.io.uopwriteback.valid          -> vSegmentUnit.io.uopwriteback.bits,
1673c590fb32Scz4e        vlMergeBuffer.io.uopWriteback(i).valid      -> vlMergeBuffer.io.uopWriteback(i).bits,
1674c590fb32Scz4e        vsMergeBuffer(i).io.uopWriteback.head.valid -> vsMergeBuffer(i).io.uopWriteback.head.bits,
1675c590fb32Scz4e      ))
1676c590fb32Scz4e      vlMergeBuffer.io.uopWriteback(i).ready := io.mem_to_ooo.writebackVldu(i).ready && !vSegmentUnit.io.uopwriteback.valid
1677c590fb32Scz4e      vsMergeBuffer(i).io.uopWriteback.head.ready := io.mem_to_ooo.writebackVldu(i).ready && !vlMergeBuffer.io.uopWriteback(i).valid && !vSegmentUnit.io.uopwriteback.valid
1678c590fb32Scz4e      vSegmentUnit.io.uopwriteback.ready := io.mem_to_ooo.writebackVldu(i).ready
1679c590fb32Scz4e    } else if (i == 1) {
1680c590fb32Scz4e      io.mem_to_ooo.writebackVldu(i).valid := vlMergeBuffer.io.uopWriteback(i).valid || vsMergeBuffer(i).io.uopWriteback.head.valid || vfofBuffer.io.uopWriteback.valid
1681c590fb32Scz4e      io.mem_to_ooo.writebackVldu(i).bits := PriorityMux(Seq(
1682c590fb32Scz4e        vfofBuffer.io.uopWriteback.valid            -> vfofBuffer.io.uopWriteback.bits,
1683c590fb32Scz4e        vlMergeBuffer.io.uopWriteback(i).valid      -> vlMergeBuffer.io.uopWriteback(i).bits,
1684c590fb32Scz4e        vsMergeBuffer(i).io.uopWriteback.head.valid -> vsMergeBuffer(i).io.uopWriteback.head.bits,
1685c590fb32Scz4e      ))
1686c590fb32Scz4e      vlMergeBuffer.io.uopWriteback(i).ready := io.mem_to_ooo.writebackVldu(i).ready && !vfofBuffer.io.uopWriteback.valid
1687c590fb32Scz4e      vsMergeBuffer(i).io.uopWriteback.head.ready := io.mem_to_ooo.writebackVldu(i).ready && !vlMergeBuffer.io.uopWriteback(i).valid && !vfofBuffer.io.uopWriteback.valid
1688c590fb32Scz4e      vfofBuffer.io.uopWriteback.ready := io.mem_to_ooo.writebackVldu(i).ready
1689c590fb32Scz4e    } else {
1690c590fb32Scz4e      io.mem_to_ooo.writebackVldu(i).valid := vlMergeBuffer.io.uopWriteback(i).valid || vsMergeBuffer(i).io.uopWriteback.head.valid
1691c590fb32Scz4e      io.mem_to_ooo.writebackVldu(i).bits := PriorityMux(Seq(
1692c590fb32Scz4e        vlMergeBuffer.io.uopWriteback(i).valid -> vlMergeBuffer.io.uopWriteback(i).bits,
1693c590fb32Scz4e        vsMergeBuffer(i).io.uopWriteback.head.valid -> vsMergeBuffer(i).io.uopWriteback.head.bits,
1694c590fb32Scz4e      ))
1695c590fb32Scz4e      vlMergeBuffer.io.uopWriteback(i).ready := io.mem_to_ooo.writebackVldu(i).ready
1696c590fb32Scz4e      vsMergeBuffer(i).io.uopWriteback.head.ready := io.mem_to_ooo.writebackVldu(i).ready && !vlMergeBuffer.io.uopWriteback(i).valid
1697c590fb32Scz4e    }
1698c590fb32Scz4e
1699c590fb32Scz4e    vfofBuffer.io.mergeUopWriteback(i).valid := vlMergeBuffer.io.uopWriteback(i).valid
1700c590fb32Scz4e    vfofBuffer.io.mergeUopWriteback(i).bits  := vlMergeBuffer.io.uopWriteback(i).bits
1701c590fb32Scz4e  }
1702c590fb32Scz4e
1703c590fb32Scz4e
1704c590fb32Scz4e  vfofBuffer.io.redirect <> redirect
1705c590fb32Scz4e
1706c590fb32Scz4e  // Sbuffer
1707c590fb32Scz4e  sbuffer.io.csrCtrl    <> csrCtrl
1708c590fb32Scz4e  sbuffer.io.dcache     <> dcache.io.lsu.store
1709c590fb32Scz4e  sbuffer.io.memSetPattenDetected := dcache.io.memSetPattenDetected
1710c590fb32Scz4e  sbuffer.io.force_write <> lsq.io.force_write
1711c590fb32Scz4e  // flush sbuffer
1712c590fb32Scz4e  val cmoFlush = lsq.io.flushSbuffer.valid
1713c590fb32Scz4e  val fenceFlush = io.ooo_to_mem.flushSb
1714c590fb32Scz4e  val atomicsFlush = atomicsUnit.io.flush_sbuffer.valid || vSegmentUnit.io.flush_sbuffer.valid
1715c590fb32Scz4e  val stIsEmpty = sbuffer.io.flush.empty && uncache.io.flush.empty
1716c590fb32Scz4e  io.mem_to_ooo.sbIsEmpty := RegNext(stIsEmpty)
1717c590fb32Scz4e
1718c590fb32Scz4e  // if both of them tries to flush sbuffer at the same time
1719c590fb32Scz4e  // something must have gone wrong
1720c590fb32Scz4e  assert(!(fenceFlush && atomicsFlush && cmoFlush))
1721c590fb32Scz4e  sbuffer.io.flush.valid := RegNext(fenceFlush || atomicsFlush || cmoFlush)
1722c590fb32Scz4e  uncache.io.flush.valid := sbuffer.io.flush.valid
1723c590fb32Scz4e
1724c590fb32Scz4e  // AtomicsUnit: AtomicsUnit will override other control signials,
1725c590fb32Scz4e  // as atomics insts (LR/SC/AMO) will block the pipeline
1726c590fb32Scz4e  val s_normal +: s_atomics = Enum(StaCnt + HyuCnt + 1)
1727c590fb32Scz4e  val state = RegInit(s_normal)
1728c590fb32Scz4e
1729c590fb32Scz4e  val st_atomics = Seq.tabulate(StaCnt)(i =>
1730c590fb32Scz4e    io.ooo_to_mem.issueSta(i).valid && FuType.storeIsAMO((io.ooo_to_mem.issueSta(i).bits.uop.fuType))
1731c590fb32Scz4e  ) ++ Seq.tabulate(HyuCnt)(i =>
1732c590fb32Scz4e    io.ooo_to_mem.issueHya(i).valid && FuType.storeIsAMO((io.ooo_to_mem.issueHya(i).bits.uop.fuType))
1733c590fb32Scz4e  )
1734c590fb32Scz4e
1735c590fb32Scz4e  for (i <- 0 until StaCnt) when(st_atomics(i)) {
1736c590fb32Scz4e    io.ooo_to_mem.issueSta(i).ready := atomicsUnit.io.in.ready
1737c590fb32Scz4e    storeUnits(i).io.stin.valid := false.B
1738c590fb32Scz4e
1739c590fb32Scz4e    state := s_atomics(i)
1740c590fb32Scz4e  }
1741c590fb32Scz4e  for (i <- 0 until HyuCnt) when(st_atomics(StaCnt + i)) {
1742c590fb32Scz4e    io.ooo_to_mem.issueHya(i).ready := atomicsUnit.io.in.ready
1743c590fb32Scz4e    hybridUnits(i).io.lsin.valid := false.B
1744c590fb32Scz4e
1745c590fb32Scz4e    state := s_atomics(StaCnt + i)
1746c590fb32Scz4e    assert(!st_atomics.zipWithIndex.filterNot(_._2 == StaCnt + i).unzip._1.reduce(_ || _))
1747c590fb32Scz4e  }
1748c590fb32Scz4e  when (atomicsUnit.io.out.valid) {
1749c590fb32Scz4e    state := s_normal
1750c590fb32Scz4e  }
1751c590fb32Scz4e
1752c590fb32Scz4e  atomicsUnit.io.in.valid := st_atomics.reduce(_ || _)
1753c590fb32Scz4e  atomicsUnit.io.in.bits  := Mux1H(Seq.tabulate(StaCnt)(i =>
1754c590fb32Scz4e    st_atomics(i) -> io.ooo_to_mem.issueSta(i).bits) ++
1755c590fb32Scz4e    Seq.tabulate(HyuCnt)(i => st_atomics(StaCnt+i) -> io.ooo_to_mem.issueHya(i).bits))
1756c590fb32Scz4e  atomicsUnit.io.storeDataIn.zipWithIndex.foreach { case (stdin, i) =>
1757c590fb32Scz4e    stdin.valid := st_data_atomics(i)
1758c590fb32Scz4e    stdin.bits := stData(i).bits
1759c590fb32Scz4e  }
1760c590fb32Scz4e  atomicsUnit.io.redirect <> redirect
1761c590fb32Scz4e
1762c590fb32Scz4e  // TODO: complete amo's pmp support
1763c590fb32Scz4e  val amoTlb = dtlb_ld(0).requestor(0)
1764c590fb32Scz4e  atomicsUnit.io.dtlb.resp.valid := false.B
1765c590fb32Scz4e  atomicsUnit.io.dtlb.resp.bits  := DontCare
1766c590fb32Scz4e  atomicsUnit.io.dtlb.req.ready  := amoTlb.req.ready
1767c590fb32Scz4e  atomicsUnit.io.pmpResp := pmp_check(0).resp
1768c590fb32Scz4e
1769c590fb32Scz4e  atomicsUnit.io.dcache <> dcache.io.lsu.atomics
1770c590fb32Scz4e  atomicsUnit.io.flush_sbuffer.empty := stIsEmpty
1771c590fb32Scz4e
1772c590fb32Scz4e  atomicsUnit.io.csrCtrl := csrCtrl
1773c590fb32Scz4e
1774c590fb32Scz4e  // for atomicsUnit, it uses loadUnit(0)'s TLB port
1775c590fb32Scz4e
1776c590fb32Scz4e  when (state =/= s_normal) {
1777c590fb32Scz4e    // use store wb port instead of load
1778c590fb32Scz4e    loadUnits(0).io.ldout.ready := false.B
1779c590fb32Scz4e    // use load_0's TLB
1780c590fb32Scz4e    atomicsUnit.io.dtlb <> amoTlb
1781c590fb32Scz4e
1782c590fb32Scz4e    // hw prefetch should be disabled while executing atomic insts
1783c590fb32Scz4e    loadUnits.map(i => i.io.prefetch_req.valid := false.B)
1784c590fb32Scz4e
1785c590fb32Scz4e    // make sure there's no in-flight uops in load unit
1786c590fb32Scz4e    assert(!loadUnits(0).io.ldout.valid)
1787c590fb32Scz4e  }
1788c590fb32Scz4e
1789c590fb32Scz4e  lsq.io.flushSbuffer.empty := sbuffer.io.sbempty
1790c590fb32Scz4e
1791c590fb32Scz4e  for (i <- 0 until StaCnt) {
1792c590fb32Scz4e    when (state === s_atomics(i)) {
1793c590fb32Scz4e      io.mem_to_ooo.staIqFeedback(i).feedbackSlow := atomicsUnit.io.feedbackSlow
1794c590fb32Scz4e      assert(!storeUnits(i).io.feedback_slow.valid)
1795c590fb32Scz4e    }
1796c590fb32Scz4e  }
1797c590fb32Scz4e  for (i <- 0 until HyuCnt) {
1798c590fb32Scz4e    when (state === s_atomics(StaCnt + i)) {
1799c590fb32Scz4e      io.mem_to_ooo.hyuIqFeedback(i).feedbackSlow := atomicsUnit.io.feedbackSlow
1800c590fb32Scz4e      assert(!hybridUnits(i).io.feedback_slow.valid)
1801c590fb32Scz4e    }
1802c590fb32Scz4e  }
1803c590fb32Scz4e
1804c590fb32Scz4e  lsq.io.exceptionAddr.isStore := io.ooo_to_mem.isStoreException
1805c590fb32Scz4e  // Exception address is used several cycles after flush.
1806c590fb32Scz4e  // We delay it by 10 cycles to ensure its flush safety.
1807c590fb32Scz4e  val atomicsException = RegInit(false.B)
1808c590fb32Scz4e  when (DelayN(redirect.valid, 10) && atomicsException) {
1809c590fb32Scz4e    atomicsException := false.B
1810c590fb32Scz4e  }.elsewhen (atomicsUnit.io.exceptionInfo.valid) {
1811c590fb32Scz4e    atomicsException := true.B
1812c590fb32Scz4e  }
1813c590fb32Scz4e
1814c590fb32Scz4e  val misalignBufExceptionOverwrite = loadMisalignBuffer.io.overwriteExpBuf.valid || storeMisalignBuffer.io.overwriteExpBuf.valid
1815c590fb32Scz4e  val misalignBufExceptionVaddr = Mux(loadMisalignBuffer.io.overwriteExpBuf.valid,
1816c590fb32Scz4e    loadMisalignBuffer.io.overwriteExpBuf.vaddr,
1817c590fb32Scz4e    storeMisalignBuffer.io.overwriteExpBuf.vaddr
1818c590fb32Scz4e  )
1819c590fb32Scz4e  val misalignBufExceptionIsHyper = Mux(loadMisalignBuffer.io.overwriteExpBuf.valid,
1820c590fb32Scz4e    loadMisalignBuffer.io.overwriteExpBuf.isHyper,
1821c590fb32Scz4e    storeMisalignBuffer.io.overwriteExpBuf.isHyper
1822c590fb32Scz4e  )
1823c590fb32Scz4e  val misalignBufExceptionGpaddr = Mux(loadMisalignBuffer.io.overwriteExpBuf.valid,
1824c590fb32Scz4e    loadMisalignBuffer.io.overwriteExpBuf.gpaddr,
1825c590fb32Scz4e    storeMisalignBuffer.io.overwriteExpBuf.gpaddr
1826c590fb32Scz4e  )
1827c590fb32Scz4e  val misalignBufExceptionIsForVSnonLeafPTE = Mux(loadMisalignBuffer.io.overwriteExpBuf.valid,
1828c590fb32Scz4e    loadMisalignBuffer.io.overwriteExpBuf.isForVSnonLeafPTE,
1829c590fb32Scz4e    storeMisalignBuffer.io.overwriteExpBuf.isForVSnonLeafPTE
1830c590fb32Scz4e  )
1831c590fb32Scz4e
1832c590fb32Scz4e  val vSegmentException = RegInit(false.B)
1833c590fb32Scz4e  when (DelayN(redirect.valid, 10) && vSegmentException) {
1834c590fb32Scz4e    vSegmentException := false.B
1835c590fb32Scz4e  }.elsewhen (vSegmentUnit.io.exceptionInfo.valid) {
1836c590fb32Scz4e    vSegmentException := true.B
1837c590fb32Scz4e  }
1838c590fb32Scz4e  val atomicsExceptionAddress = RegEnable(atomicsUnit.io.exceptionInfo.bits.vaddr, atomicsUnit.io.exceptionInfo.valid)
1839c590fb32Scz4e  val vSegmentExceptionVstart = RegEnable(vSegmentUnit.io.exceptionInfo.bits.vstart, vSegmentUnit.io.exceptionInfo.valid)
1840c590fb32Scz4e  val vSegmentExceptionVl     = RegEnable(vSegmentUnit.io.exceptionInfo.bits.vl, vSegmentUnit.io.exceptionInfo.valid)
1841c590fb32Scz4e  val vSegmentExceptionAddress = RegEnable(vSegmentUnit.io.exceptionInfo.bits.vaddr, vSegmentUnit.io.exceptionInfo.valid)
1842c590fb32Scz4e  val atomicsExceptionGPAddress = RegEnable(atomicsUnit.io.exceptionInfo.bits.gpaddr, atomicsUnit.io.exceptionInfo.valid)
1843c590fb32Scz4e  val vSegmentExceptionGPAddress = RegEnable(vSegmentUnit.io.exceptionInfo.bits.gpaddr, vSegmentUnit.io.exceptionInfo.valid)
1844c590fb32Scz4e  val atomicsExceptionIsForVSnonLeafPTE = RegEnable(atomicsUnit.io.exceptionInfo.bits.isForVSnonLeafPTE, atomicsUnit.io.exceptionInfo.valid)
1845c590fb32Scz4e  val vSegmentExceptionIsForVSnonLeafPTE = RegEnable(vSegmentUnit.io.exceptionInfo.bits.isForVSnonLeafPTE, vSegmentUnit.io.exceptionInfo.valid)
1846c590fb32Scz4e
1847c590fb32Scz4e  val exceptionVaddr = Mux(
1848c590fb32Scz4e    atomicsException,
1849c590fb32Scz4e    atomicsExceptionAddress,
1850c590fb32Scz4e    Mux(misalignBufExceptionOverwrite,
1851c590fb32Scz4e      misalignBufExceptionVaddr,
1852c590fb32Scz4e      Mux(vSegmentException,
1853c590fb32Scz4e        vSegmentExceptionAddress,
1854c590fb32Scz4e        lsq.io.exceptionAddr.vaddr
1855c590fb32Scz4e      )
1856c590fb32Scz4e    )
1857c590fb32Scz4e  )
1858c590fb32Scz4e  // whether vaddr need ext or is hyper inst:
1859c590fb32Scz4e  // VaNeedExt: atomicsException -> false; misalignBufExceptionOverwrite -> true; vSegmentException -> false
1860c590fb32Scz4e  // IsHyper: atomicsException -> false; vSegmentException -> false
1861c590fb32Scz4e  val exceptionVaNeedExt = !atomicsException &&
1862c590fb32Scz4e    (misalignBufExceptionOverwrite ||
1863c590fb32Scz4e      (!vSegmentException && lsq.io.exceptionAddr.vaNeedExt))
1864c590fb32Scz4e  val exceptionIsHyper = !atomicsException &&
1865c590fb32Scz4e    (misalignBufExceptionOverwrite && misalignBufExceptionIsHyper ||
1866c590fb32Scz4e      (!vSegmentException && lsq.io.exceptionAddr.isHyper && !misalignBufExceptionOverwrite))
1867c590fb32Scz4e
1868168f1995SXu, Zefan  def GenExceptionVa(
1869168f1995SXu, Zefan    mode: UInt, isVirt: Bool, vaNeedExt: Bool,
1870c590fb32Scz4e    satp: TlbSatpBundle, vsatp: TlbSatpBundle, hgatp: TlbHgatpBundle,
1871168f1995SXu, Zefan    vaddr: UInt
1872168f1995SXu, Zefan  ) = {
1873c590fb32Scz4e    require(VAddrBits >= 50)
1874c590fb32Scz4e
1875168f1995SXu, Zefan    val satpNone = satp.mode === 0.U
1876168f1995SXu, Zefan    val satpSv39 = satp.mode === 8.U
1877168f1995SXu, Zefan    val satpSv48 = satp.mode === 9.U
1878c590fb32Scz4e
1879168f1995SXu, Zefan    val vsatpNone = vsatp.mode === 0.U
1880168f1995SXu, Zefan    val vsatpSv39 = vsatp.mode === 8.U
1881168f1995SXu, Zefan    val vsatpSv48 = vsatp.mode === 9.U
1882168f1995SXu, Zefan
1883168f1995SXu, Zefan    val hgatpNone = hgatp.mode === 0.U
1884168f1995SXu, Zefan    val hgatpSv39x4 = hgatp.mode === 8.U
1885168f1995SXu, Zefan    val hgatpSv48x4 = hgatp.mode === 9.U
1886168f1995SXu, Zefan
1887168f1995SXu, Zefan    // For !isVirt, mode check is necessary, as we don't want virtual memory in M-mode.
1888168f1995SXu, Zefan    // For isVirt, mode check is unnecessary, as virt won't be 1 in M-mode.
1889168f1995SXu, Zefan    // Also, isVirt includes Hyper Insts, which don't care mode either.
1890168f1995SXu, Zefan
1891168f1995SXu, Zefan    val useBareAddr =
1892168f1995SXu, Zefan      (isVirt && vsatpNone && hgatpNone) ||
1893168f1995SXu, Zefan      (!isVirt && (mode === CSRConst.ModeM)) ||
1894168f1995SXu, Zefan      (!isVirt && (mode =/= CSRConst.ModeM) && satpNone)
1895168f1995SXu, Zefan    val useSv39Addr =
1896168f1995SXu, Zefan      (isVirt && vsatpSv39) ||
1897168f1995SXu, Zefan      (!isVirt && (mode =/= CSRConst.ModeM) && satpSv39)
1898168f1995SXu, Zefan    val useSv48Addr =
1899168f1995SXu, Zefan      (isVirt && vsatpSv48) ||
1900168f1995SXu, Zefan      (!isVirt && (mode =/= CSRConst.ModeM) && satpSv48)
1901168f1995SXu, Zefan    val useSv39x4Addr = isVirt && vsatpNone && hgatpSv39x4
1902168f1995SXu, Zefan    val useSv48x4Addr = isVirt && vsatpNone && hgatpSv48x4
1903c590fb32Scz4e
1904c590fb32Scz4e    val bareAddr   = ZeroExt(vaddr(PAddrBits - 1, 0), XLEN)
1905c590fb32Scz4e    val sv39Addr   = SignExt(vaddr.take(39), XLEN)
1906c590fb32Scz4e    val sv39x4Addr = ZeroExt(vaddr.take(39 + 2), XLEN)
1907c590fb32Scz4e    val sv48Addr   = SignExt(vaddr.take(48), XLEN)
1908c590fb32Scz4e    val sv48x4Addr = ZeroExt(vaddr.take(48 + 2), XLEN)
1909c590fb32Scz4e
1910c590fb32Scz4e    val ExceptionVa = Wire(UInt(XLEN.W))
1911c590fb32Scz4e    when (vaNeedExt) {
1912c590fb32Scz4e      ExceptionVa := Mux1H(Seq(
1913168f1995SXu, Zefan        (useBareAddr)   -> bareAddr,
1914168f1995SXu, Zefan        (useSv39Addr)   -> sv39Addr,
1915168f1995SXu, Zefan        (useSv48Addr)   -> sv48Addr,
1916168f1995SXu, Zefan        (useSv39x4Addr) -> sv39x4Addr,
1917168f1995SXu, Zefan        (useSv48x4Addr) -> sv48x4Addr,
1918c590fb32Scz4e      ))
1919c590fb32Scz4e    } .otherwise {
1920c590fb32Scz4e      ExceptionVa := vaddr
1921c590fb32Scz4e    }
1922c590fb32Scz4e
1923c590fb32Scz4e    ExceptionVa
1924c590fb32Scz4e  }
1925c590fb32Scz4e
1926c590fb32Scz4e  io.mem_to_ooo.lsqio.vaddr := RegNext(
1927c590fb32Scz4e    GenExceptionVa(tlbcsr.priv.dmode, tlbcsr.priv.virt || exceptionIsHyper, exceptionVaNeedExt,
1928c590fb32Scz4e    tlbcsr.satp, tlbcsr.vsatp, tlbcsr.hgatp, exceptionVaddr)
1929c590fb32Scz4e  )
1930c590fb32Scz4e
1931c590fb32Scz4e  // vsegment instruction is executed atomic, which mean atomicsException and vSegmentException should not raise at the same time.
1932c590fb32Scz4e  XSError(atomicsException && vSegmentException, "atomicsException and vSegmentException raise at the same time!")
1933c590fb32Scz4e  io.mem_to_ooo.lsqio.vstart := RegNext(Mux(vSegmentException,
1934c590fb32Scz4e                                            vSegmentExceptionVstart,
1935c590fb32Scz4e                                            lsq.io.exceptionAddr.vstart)
1936c590fb32Scz4e  )
1937c590fb32Scz4e  io.mem_to_ooo.lsqio.vl     := RegNext(Mux(vSegmentException,
1938c590fb32Scz4e                                            vSegmentExceptionVl,
1939c590fb32Scz4e                                            lsq.io.exceptionAddr.vl)
1940c590fb32Scz4e  )
1941c590fb32Scz4e
1942c590fb32Scz4e  XSError(atomicsException && atomicsUnit.io.in.valid, "new instruction before exception triggers\n")
1943c590fb32Scz4e  io.mem_to_ooo.lsqio.gpaddr := RegNext(Mux(
1944c590fb32Scz4e    atomicsException,
1945c590fb32Scz4e    atomicsExceptionGPAddress,
1946c590fb32Scz4e    Mux(misalignBufExceptionOverwrite,
1947c590fb32Scz4e      misalignBufExceptionGpaddr,
1948c590fb32Scz4e      Mux(vSegmentException,
1949c590fb32Scz4e        vSegmentExceptionGPAddress,
1950c590fb32Scz4e        lsq.io.exceptionAddr.gpaddr
1951c590fb32Scz4e      )
1952c590fb32Scz4e    )
1953c590fb32Scz4e  ))
1954c590fb32Scz4e  io.mem_to_ooo.lsqio.isForVSnonLeafPTE := RegNext(Mux(
1955c590fb32Scz4e    atomicsException,
1956c590fb32Scz4e    atomicsExceptionIsForVSnonLeafPTE,
1957c590fb32Scz4e    Mux(misalignBufExceptionOverwrite,
1958c590fb32Scz4e      misalignBufExceptionIsForVSnonLeafPTE,
1959c590fb32Scz4e      Mux(vSegmentException,
1960c590fb32Scz4e        vSegmentExceptionIsForVSnonLeafPTE,
1961c590fb32Scz4e        lsq.io.exceptionAddr.isForVSnonLeafPTE
1962c590fb32Scz4e      )
1963c590fb32Scz4e    )
1964c590fb32Scz4e  ))
1965c590fb32Scz4e  io.mem_to_ooo.topToBackendBypass match { case x =>
1966c590fb32Scz4e    x.hartId            := io.hartId
1967c590fb32Scz4e    x.l2FlushDone       := RegNext(io.l2_flush_done)
1968c590fb32Scz4e    x.externalInterrupt.msip  := outer.clint_int_sink.in.head._1(0)
1969c590fb32Scz4e    x.externalInterrupt.mtip  := outer.clint_int_sink.in.head._1(1)
1970c590fb32Scz4e    x.externalInterrupt.meip  := outer.plic_int_sink.in.head._1(0)
1971c590fb32Scz4e    x.externalInterrupt.seip  := outer.plic_int_sink.in.last._1(0)
1972c590fb32Scz4e    x.externalInterrupt.debug := outer.debug_int_sink.in.head._1(0)
197376cb49abScz4e    x.externalInterrupt.nmi.nmi_31 := outer.nmi_int_sink.in.head._1(0) | outer.beu_local_int_sink.in.head._1(0)
1974c590fb32Scz4e    x.externalInterrupt.nmi.nmi_43 := outer.nmi_int_sink.in.head._1(1)
1975c590fb32Scz4e    x.msiInfo           := DelayNWithValid(io.fromTopToBackend.msiInfo, 1)
1976c590fb32Scz4e    x.clintTime         := DelayNWithValid(io.fromTopToBackend.clintTime, 1)
1977c590fb32Scz4e  }
1978c590fb32Scz4e
1979c590fb32Scz4e  io.memInfo.sqFull := RegNext(lsq.io.sqFull)
1980c590fb32Scz4e  io.memInfo.lqFull := RegNext(lsq.io.lqFull)
1981c590fb32Scz4e  io.memInfo.dcacheMSHRFull := RegNext(dcache.io.mshrFull)
1982c590fb32Scz4e
1983c590fb32Scz4e  io.inner_hartId := io.hartId
1984c590fb32Scz4e  io.inner_reset_vector := RegNext(io.outer_reset_vector)
1985c590fb32Scz4e  io.outer_cpu_halt := io.ooo_to_mem.backendToTopBypass.cpuHalted
1986c590fb32Scz4e  io.outer_l2_flush_en := io.ooo_to_mem.csrCtrl.flush_l2_enable
1987c590fb32Scz4e  io.outer_power_down_en := io.ooo_to_mem.csrCtrl.power_down_enable
1988c590fb32Scz4e  io.outer_cpu_critical_error := io.ooo_to_mem.backendToTopBypass.cpuCriticalError
19898cfc24b2STang Haojin  io.outer_msi_ack := io.ooo_to_mem.backendToTopBypass.msiAck
1990c590fb32Scz4e  io.outer_beu_errors_icache := RegNext(io.inner_beu_errors_icache)
1991c590fb32Scz4e  io.inner_hc_perfEvents <> RegNext(io.outer_hc_perfEvents)
1992c590fb32Scz4e
1993c590fb32Scz4e  // vector segmentUnit
1994c590fb32Scz4e  vSegmentUnit.io.in.bits <> io.ooo_to_mem.issueVldu.head.bits
1995c590fb32Scz4e  vSegmentUnit.io.in.valid := isSegment && io.ooo_to_mem.issueVldu.head.valid// is segment instruction
1996c590fb32Scz4e  vSegmentUnit.io.dtlb.resp.bits <> dtlb_reqs.take(LduCnt).head.resp.bits
1997c590fb32Scz4e  vSegmentUnit.io.dtlb.resp.valid <> dtlb_reqs.take(LduCnt).head.resp.valid
1998c590fb32Scz4e  vSegmentUnit.io.pmpResp <> pmp_check.head.resp
1999c590fb32Scz4e  vSegmentUnit.io.flush_sbuffer.empty := stIsEmpty
2000c590fb32Scz4e  vSegmentUnit.io.redirect <> redirect
2001c590fb32Scz4e  vSegmentUnit.io.rdcache.resp.bits := dcache.io.lsu.load(0).resp.bits
2002c590fb32Scz4e  vSegmentUnit.io.rdcache.resp.valid := dcache.io.lsu.load(0).resp.valid
2003c590fb32Scz4e  vSegmentUnit.io.rdcache.s2_bank_conflict := dcache.io.lsu.load(0).s2_bank_conflict
2004c590fb32Scz4e  // -------------------------
2005c590fb32Scz4e  // Vector Segment Triggers
2006c590fb32Scz4e  // -------------------------
2007c590fb32Scz4e  vSegmentUnit.io.fromCsrTrigger.tdataVec := tdata
2008c590fb32Scz4e  vSegmentUnit.io.fromCsrTrigger.tEnableVec := tEnable
2009c590fb32Scz4e  vSegmentUnit.io.fromCsrTrigger.triggerCanRaiseBpExp := triggerCanRaiseBpExp
2010c590fb32Scz4e  vSegmentUnit.io.fromCsrTrigger.debugMode := debugMode
2011c590fb32Scz4e
2012c590fb32Scz4e  // reset tree of MemBlock
2013c590fb32Scz4e  if (p(DebugOptionsKey).ResetGen) {
2014c590fb32Scz4e    val leftResetTree = ResetGenNode(
2015c590fb32Scz4e      Seq(
2016c590fb32Scz4e        ModuleNode(ptw),
2017c590fb32Scz4e        ModuleNode(ptw_to_l2_buffer),
2018c590fb32Scz4e        ModuleNode(lsq),
2019c590fb32Scz4e        ModuleNode(dtlb_st_tlb_st),
2020c590fb32Scz4e        ModuleNode(dtlb_prefetch_tlb_prefetch),
2021c590fb32Scz4e        ModuleNode(pmp)
2022c590fb32Scz4e      )
2023c590fb32Scz4e      ++ pmp_checkers.map(ModuleNode(_))
2024c590fb32Scz4e      ++ (if (prefetcherOpt.isDefined) Seq(ModuleNode(prefetcherOpt.get)) else Nil)
2025c590fb32Scz4e      ++ (if (l1PrefetcherOpt.isDefined) Seq(ModuleNode(l1PrefetcherOpt.get)) else Nil)
2026c590fb32Scz4e    )
2027c590fb32Scz4e    val rightResetTree = ResetGenNode(
2028c590fb32Scz4e      Seq(
2029c590fb32Scz4e        ModuleNode(sbuffer),
2030c590fb32Scz4e        ModuleNode(dtlb_ld_tlb_ld),
2031c590fb32Scz4e        ModuleNode(dcache),
2032c590fb32Scz4e        ModuleNode(l1d_to_l2_buffer),
2033c590fb32Scz4e        CellNode(io.reset_backend)
2034c590fb32Scz4e      )
2035c590fb32Scz4e    )
2036602aa9f1Scz4e    ResetGen(leftResetTree, reset, sim = false, io.sramTestBypass.fromL2Top.mbistReset)
2037602aa9f1Scz4e    ResetGen(rightResetTree, reset, sim = false, io.sramTestBypass.fromL2Top.mbistReset)
2038c590fb32Scz4e  } else {
2039c590fb32Scz4e    io.reset_backend := DontCare
2040c590fb32Scz4e  }
2041c590fb32Scz4e  io.resetInFrontendBypass.toL2Top := io.resetInFrontendBypass.fromFrontend
2042c590fb32Scz4e  // trace interface
2043c590fb32Scz4e  val traceToL2Top = io.traceCoreInterfaceBypass.toL2Top
2044c590fb32Scz4e  val traceFromBackend = io.traceCoreInterfaceBypass.fromBackend
2045c590fb32Scz4e  traceFromBackend.fromEncoder := RegNext(traceToL2Top.fromEncoder)
2046c590fb32Scz4e  traceToL2Top.toEncoder.trap  := RegEnable(
2047c590fb32Scz4e    traceFromBackend.toEncoder.trap,
2048c590fb32Scz4e    traceFromBackend.toEncoder.groups(0).valid && Itype.isTrap(traceFromBackend.toEncoder.groups(0).bits.itype)
2049c590fb32Scz4e  )
2050c590fb32Scz4e  traceToL2Top.toEncoder.priv := RegEnable(
2051c590fb32Scz4e    traceFromBackend.toEncoder.priv,
2052c590fb32Scz4e    traceFromBackend.toEncoder.groups(0).valid
2053c590fb32Scz4e  )
2054c590fb32Scz4e  (0 until TraceGroupNum).foreach { i =>
2055c590fb32Scz4e    traceToL2Top.toEncoder.groups(i).valid := RegNext(traceFromBackend.toEncoder.groups(i).valid)
2056c590fb32Scz4e    traceToL2Top.toEncoder.groups(i).bits.iretire := RegNext(traceFromBackend.toEncoder.groups(i).bits.iretire)
2057c590fb32Scz4e    traceToL2Top.toEncoder.groups(i).bits.itype := RegNext(traceFromBackend.toEncoder.groups(i).bits.itype)
2058c590fb32Scz4e    traceToL2Top.toEncoder.groups(i).bits.ilastsize := RegEnable(
2059c590fb32Scz4e      traceFromBackend.toEncoder.groups(i).bits.ilastsize,
2060c590fb32Scz4e      traceFromBackend.toEncoder.groups(i).valid
2061c590fb32Scz4e    )
2062c590fb32Scz4e    traceToL2Top.toEncoder.groups(i).bits.iaddr := RegEnable(
2063c590fb32Scz4e      traceFromBackend.toEncoder.groups(i).bits.iaddr,
2064c590fb32Scz4e      traceFromBackend.toEncoder.groups(i).valid
2065c590fb32Scz4e    ) + (RegEnable(
2066c590fb32Scz4e      traceFromBackend.toEncoder.groups(i).bits.ftqOffset.getOrElse(0.U),
2067c590fb32Scz4e      traceFromBackend.toEncoder.groups(i).valid
2068c590fb32Scz4e    ) << instOffsetBits)
2069c590fb32Scz4e  }
2070c590fb32Scz4e
2071c590fb32Scz4e
2072c590fb32Scz4e  io.mem_to_ooo.storeDebugInfo := DontCare
2073c590fb32Scz4e  // store event difftest information
2074c590fb32Scz4e  if (env.EnableDifftest) {
2075c590fb32Scz4e    (0 until EnsbufferWidth).foreach{i =>
2076c590fb32Scz4e        io.mem_to_ooo.storeDebugInfo(i).robidx := sbuffer.io.vecDifftestInfo(i).bits.robIdx
2077c590fb32Scz4e        sbuffer.io.vecDifftestInfo(i).bits.pc := io.mem_to_ooo.storeDebugInfo(i).pc
2078c590fb32Scz4e    }
2079c590fb32Scz4e  }
2080c590fb32Scz4e
2081c590fb32Scz4e  // top-down info
2082c590fb32Scz4e  dcache.io.debugTopDown.robHeadVaddr := io.debugTopDown.robHeadVaddr
2083c590fb32Scz4e  dtlbRepeater.io.debugTopDown.robHeadVaddr := io.debugTopDown.robHeadVaddr
2084c590fb32Scz4e  lsq.io.debugTopDown.robHeadVaddr := io.debugTopDown.robHeadVaddr
2085c590fb32Scz4e  io.debugTopDown.toCore.robHeadMissInDCache := dcache.io.debugTopDown.robHeadMissInDCache
2086c590fb32Scz4e  io.debugTopDown.toCore.robHeadTlbReplay := lsq.io.debugTopDown.robHeadTlbReplay
2087c590fb32Scz4e  io.debugTopDown.toCore.robHeadTlbMiss := lsq.io.debugTopDown.robHeadTlbMiss
2088c590fb32Scz4e  io.debugTopDown.toCore.robHeadLoadVio := lsq.io.debugTopDown.robHeadLoadVio
2089c590fb32Scz4e  io.debugTopDown.toCore.robHeadLoadMSHR := lsq.io.debugTopDown.robHeadLoadMSHR
2090c590fb32Scz4e  dcache.io.debugTopDown.robHeadOtherReplay := lsq.io.debugTopDown.robHeadOtherReplay
2091c590fb32Scz4e  dcache.io.debugRolling := io.debugRolling
2092c590fb32Scz4e
2093c590fb32Scz4e  lsq.io.noUopsIssued := io.topDownInfo.toBackend.noUopsIssued
2094c590fb32Scz4e  io.topDownInfo.toBackend.lqEmpty := lsq.io.lqEmpty
2095c590fb32Scz4e  io.topDownInfo.toBackend.sqEmpty := lsq.io.sqEmpty
2096c590fb32Scz4e  io.topDownInfo.toBackend.l1Miss := dcache.io.l1Miss
2097c590fb32Scz4e  io.topDownInfo.toBackend.l2TopMiss.l2Miss := RegNext(io.topDownInfo.fromL2Top.l2Miss)
2098c590fb32Scz4e  io.topDownInfo.toBackend.l2TopMiss.l3Miss := RegNext(io.topDownInfo.fromL2Top.l3Miss)
2099c590fb32Scz4e
2100c590fb32Scz4e  val hyLdDeqCount = PopCount(io.ooo_to_mem.issueHya.map(x => x.valid && FuType.isLoad(x.bits.uop.fuType)))
2101c590fb32Scz4e  val hyStDeqCount = PopCount(io.ooo_to_mem.issueHya.map(x => x.valid && FuType.isStore(x.bits.uop.fuType)))
2102c590fb32Scz4e  val ldDeqCount = PopCount(io.ooo_to_mem.issueLda.map(_.valid)) +& hyLdDeqCount
2103c590fb32Scz4e  val stDeqCount = PopCount(io.ooo_to_mem.issueSta.take(StaCnt).map(_.valid)) +& hyStDeqCount
2104c590fb32Scz4e  val iqDeqCount = ldDeqCount +& stDeqCount
2105c590fb32Scz4e  XSPerfAccumulate("load_iq_deq_count", ldDeqCount)
2106c590fb32Scz4e  XSPerfHistogram("load_iq_deq_count", ldDeqCount, true.B, 0, LdExuCnt + 1)
2107c590fb32Scz4e  XSPerfAccumulate("store_iq_deq_count", stDeqCount)
2108c590fb32Scz4e  XSPerfHistogram("store_iq_deq_count", stDeqCount, true.B, 0, StAddrCnt + 1)
2109c590fb32Scz4e  XSPerfAccumulate("ls_iq_deq_count", iqDeqCount)
2110c590fb32Scz4e
2111c590fb32Scz4e  val pfevent = Module(new PFEvent)
2112c590fb32Scz4e  pfevent.io.distribute_csr := csrCtrl.distribute_csr
2113c590fb32Scz4e  val csrevents = pfevent.io.hpmevent.slice(16,24)
2114c590fb32Scz4e
2115c590fb32Scz4e  val perfFromUnits = (loadUnits ++ Seq(sbuffer, lsq, dcache)).flatMap(_.getPerfEvents)
2116c590fb32Scz4e  val perfFromPTW = perfEventsPTW.map(x => ("PTW_" + x._1, x._2))
2117c590fb32Scz4e  val perfBlock     = Seq(("ldDeqCount", ldDeqCount),
2118c590fb32Scz4e                          ("stDeqCount", stDeqCount))
2119c590fb32Scz4e  // let index = 0 be no event
2120c590fb32Scz4e  val allPerfEvents = Seq(("noEvent", 0.U)) ++ perfFromUnits ++ perfFromPTW ++ perfBlock
2121c590fb32Scz4e
2122c590fb32Scz4e  if (printEventCoding) {
2123c590fb32Scz4e    for (((name, inc), i) <- allPerfEvents.zipWithIndex) {
2124c590fb32Scz4e      println("MemBlock perfEvents Set", name, inc, i)
2125c590fb32Scz4e    }
2126c590fb32Scz4e  }
2127c590fb32Scz4e
2128c590fb32Scz4e  val allPerfInc = allPerfEvents.map(_._2.asTypeOf(new PerfEvent))
2129c590fb32Scz4e  val perfEvents = HPerfMonitor(csrevents, allPerfInc).getPerfEvents
2130c590fb32Scz4e  generatePerfEvent()
21314b2c87baS梁森 Liang Sen
21324b2c87baS梁森 Liang Sen  private val mbistPl = MbistPipeline.PlaceMbistPipeline(Int.MaxValue, "MbistPipeMemBlk", hasMbist)
21334b2c87baS梁森 Liang Sen  private val mbistIntf = if(hasMbist) {
21344b2c87baS梁森 Liang Sen    val params = mbistPl.get.nodeParams
21354b2c87baS梁森 Liang Sen    val intf = Some(Module(new MbistInterface(
21364b2c87baS梁森 Liang Sen      params = Seq(params),
21374b2c87baS梁森 Liang Sen      ids = Seq(mbistPl.get.childrenIds),
21384b2c87baS梁森 Liang Sen      name = s"MbistIntfMemBlk",
21394b2c87baS梁森 Liang Sen      pipelineNum = 1
21404b2c87baS梁森 Liang Sen    )))
21414b2c87baS梁森 Liang Sen    intf.get.toPipeline.head <> mbistPl.get.mbist
21424b2c87baS梁森 Liang Sen    mbistPl.get.registerCSV(intf.get.info, "MbistMemBlk")
21434b2c87baS梁森 Liang Sen    intf.get.mbist := DontCare
21444b2c87baS梁森 Liang Sen    dontTouch(intf.get.mbist)
21454b2c87baS梁森 Liang Sen    //TODO: add mbist controller connections here
21464b2c87baS梁森 Liang Sen    intf
21474b2c87baS梁森 Liang Sen  } else {
21484b2c87baS梁森 Liang Sen    None
21494b2c87baS梁森 Liang Sen  }
2150602aa9f1Scz4e   private val sigFromSrams = if (hasSramTest) Some(SramHelper.genBroadCastBundleTop()) else None
21514b2c87baS梁森 Liang Sen  private val cg = ClockGate.genTeSrc
21524b2c87baS梁森 Liang Sen  dontTouch(cg)
2153602aa9f1Scz4e
2154602aa9f1Scz4e  sigFromSrams.foreach({ case sig => sig.mbist := DontCare })
21554b2c87baS梁森 Liang Sen  if (hasMbist) {
2156602aa9f1Scz4e    sigFromSrams.get.mbist := io.sramTestBypass.fromL2Top.mbist.get
2157602aa9f1Scz4e    io.sramTestBypass.toFrontend.mbist.get := io.sramTestBypass.fromL2Top.mbist.get
2158602aa9f1Scz4e    io.sramTestBypass.toFrontend.mbistReset.get := io.sramTestBypass.fromL2Top.mbistReset.get
2159602aa9f1Scz4e    io.sramTestBypass.toBackend.mbist.get := io.sramTestBypass.fromL2Top.mbist.get
2160602aa9f1Scz4e    io.sramTestBypass.toBackend.mbistReset.get := io.sramTestBypass.fromL2Top.mbistReset.get
2161602aa9f1Scz4e    cg.cgen := io.sramTestBypass.fromL2Top.mbist.get.cgen
21624b2c87baS梁森 Liang Sen  } else {
21634b2c87baS梁森 Liang Sen    cg.cgen := false.B
21644b2c87baS梁森 Liang Sen  }
2165602aa9f1Scz4e
2166602aa9f1Scz4e  // sram debug
2167602aa9f1Scz4e  val sramCtl = Option.when(hasSramCtl)(RegNext(io.sramTestBypass.fromL2Top.sramCtl.get))
2168602aa9f1Scz4e  sigFromSrams.foreach({ case sig => sig.sramCtl := DontCare })
2169602aa9f1Scz4e  sigFromSrams.zip(sramCtl).foreach {
2170602aa9f1Scz4e    case (sig, ctl) =>
2171602aa9f1Scz4e      sig.sramCtl.RTSEL := ctl(1, 0) // CFG[1 : 0]
2172602aa9f1Scz4e      sig.sramCtl.WTSEL := ctl(3, 2) // CFG[3 : 2]
2173602aa9f1Scz4e      sig.sramCtl.MCR   := ctl(5, 4) // CFG[5 : 4]
2174602aa9f1Scz4e      sig.sramCtl.MCW   := ctl(7, 6) // CFG[7 : 6]
2175602aa9f1Scz4e  }
2176602aa9f1Scz4e  if (hasSramCtl) {
2177602aa9f1Scz4e    io.sramTestBypass.toFrontend.sramCtl.get := sramCtl.get
2178602aa9f1Scz4e  }
2179c590fb32Scz4e}
2180c590fb32Scz4e
2181c590fb32Scz4eclass MemBlock()(implicit p: Parameters) extends LazyModule
2182c590fb32Scz4e  with HasXSParameter {
2183c590fb32Scz4e  override def shouldBeInlined: Boolean = false
2184c590fb32Scz4e
2185c590fb32Scz4e  val inner = LazyModule(new MemBlockInlined())
2186c590fb32Scz4e
2187c590fb32Scz4e  lazy val module = new MemBlockImp(this)
2188c590fb32Scz4e}
2189c590fb32Scz4e
2190c590fb32Scz4eclass MemBlockImp(wrapper: MemBlock) extends LazyModuleImp(wrapper) {
2191c590fb32Scz4e  val io = IO(wrapper.inner.module.io.cloneType)
2192c590fb32Scz4e  val io_perf = IO(wrapper.inner.module.io_perf.cloneType)
2193c590fb32Scz4e  io <> wrapper.inner.module.io
2194c590fb32Scz4e  io_perf <> wrapper.inner.module.io_perf
2195c590fb32Scz4e
2196c590fb32Scz4e  if (p(DebugOptionsKey).ResetGen) {
2197602aa9f1Scz4e    ResetGen(
2198602aa9f1Scz4e      ResetGenNode(Seq(ModuleNode(wrapper.inner.module))),
2199602aa9f1Scz4e      reset, sim = false, io.sramTestBypass.fromL2Top.mbistReset
2200602aa9f1Scz4e    )
2201c590fb32Scz4e  }
2202c590fb32Scz4e}
2203