xref: /XiangShan/src/main/scala/xiangshan/mem/MemBlock.scala (revision 1592abd11eecf7bec0f1453ffe4a7617167f8ba9)
1c590fb32Scz4e/***************************************************************************************
2c590fb32Scz4e* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3c590fb32Scz4e* Copyright (c) 2020-2021 Peng Cheng Laboratory
4c590fb32Scz4e*
5c590fb32Scz4e* XiangShan is licensed under Mulan PSL v2.
6c590fb32Scz4e* You can use this software according to the terms and conditions of the Mulan PSL v2.
7c590fb32Scz4e* You may obtain a copy of Mulan PSL v2 at:
8c590fb32Scz4e*          http://license.coscl.org.cn/MulanPSL2
9c590fb32Scz4e*
10c590fb32Scz4e* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11c590fb32Scz4e* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12c590fb32Scz4e* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13c590fb32Scz4e*
14c590fb32Scz4e* See the Mulan PSL v2 for more details.
15c590fb32Scz4e***************************************************************************************/
16c590fb32Scz4e
17c590fb32Scz4epackage xiangshan.mem
18c590fb32Scz4e
19c590fb32Scz4eimport org.chipsalliance.cde.config.Parameters
20c590fb32Scz4eimport chisel3._
21c590fb32Scz4eimport chisel3.util._
22c590fb32Scz4eimport freechips.rocketchip.diplomacy._
23c590fb32Scz4eimport freechips.rocketchip.diplomacy.{BundleBridgeSource, LazyModule, LazyModuleImp}
24c590fb32Scz4eimport freechips.rocketchip.interrupts.{IntSinkNode, IntSinkPortSimple}
25c590fb32Scz4eimport freechips.rocketchip.tile.HasFPUParameters
26c590fb32Scz4eimport freechips.rocketchip.tilelink._
27c590fb32Scz4eimport utils._
28c590fb32Scz4eimport utility._
29602aa9f1Scz4eimport utility.mbist.{MbistInterface, MbistPipeline}
30602aa9f1Scz4eimport utility.sram.{SramMbistBundle, SramBroadcastBundle, SramHelper}
318cfc24b2STang Haojinimport system.{HasSoCParameter, SoCParamsKey}
32c590fb32Scz4eimport xiangshan._
33c590fb32Scz4eimport xiangshan.ExceptionNO._
34c590fb32Scz4eimport xiangshan.frontend.HasInstrMMIOConst
35c590fb32Scz4eimport xiangshan.backend.Bundles.{DynInst, MemExuInput, MemExuOutput}
36c590fb32Scz4eimport xiangshan.backend.ctrlblock.{DebugLSIO, LsTopdownInfo}
37c590fb32Scz4eimport xiangshan.backend.exu.MemExeUnit
38c590fb32Scz4eimport xiangshan.backend.fu._
39c590fb32Scz4eimport xiangshan.backend.fu.FuType._
40a67fd0f5SGuanghui Chengimport xiangshan.backend.fu.NewCSR.{CsrTriggerBundle, TriggerUtil, PFEvent}
41075d4937Sjunxiong-jiimport xiangshan.backend.fu.util.{CSRConst, SdtrigExt}
42c590fb32Scz4eimport xiangshan.backend.{BackendToTopBundle, TopToBackendBundle}
43c590fb32Scz4eimport xiangshan.backend.rob.{RobDebugRollingIO, RobPtr, RobLsqIO}
44c590fb32Scz4eimport xiangshan.backend.datapath.NewPipelineConnect
45c590fb32Scz4eimport xiangshan.backend.trace.{Itype, TraceCoreInterface}
46c590fb32Scz4eimport xiangshan.backend.Bundles._
47c590fb32Scz4eimport xiangshan.mem._
48c590fb32Scz4eimport xiangshan.mem.mdp._
499e12e8edScz4eimport xiangshan.mem.Bundles._
50c590fb32Scz4eimport xiangshan.mem.prefetch.{BasePrefecher, L1Prefetcher, SMSParams, SMSPrefetcher}
51c590fb32Scz4eimport xiangshan.cache._
52c590fb32Scz4eimport xiangshan.cache.mmu._
534b2c87baS梁森 Liang Senimport coupledL2.PrefetchRecv
548cfc24b2STang Haojinimport utility.mbist.{MbistInterface, MbistPipeline}
558cfc24b2STang Haojinimport utility.sram.{SramBroadcastBundle, SramHelper}
56602aa9f1Scz4e
57c590fb32Scz4etrait HasMemBlockParameters extends HasXSParameter {
58c590fb32Scz4e  // number of memory units
59c590fb32Scz4e  val LduCnt  = backendParams.LduCnt
60c590fb32Scz4e  val StaCnt  = backendParams.StaCnt
61c590fb32Scz4e  val StdCnt  = backendParams.StdCnt
62c590fb32Scz4e  val HyuCnt  = backendParams.HyuCnt
63c590fb32Scz4e  val VlduCnt = backendParams.VlduCnt
64c590fb32Scz4e  val VstuCnt = backendParams.VstuCnt
65c590fb32Scz4e
66c590fb32Scz4e  val LdExuCnt  = LduCnt + HyuCnt
67c590fb32Scz4e  val StAddrCnt = StaCnt + HyuCnt
68c590fb32Scz4e  val StDataCnt = StdCnt
69c590fb32Scz4e  val MemExuCnt = LduCnt + HyuCnt + StaCnt + StdCnt
70c590fb32Scz4e  val MemAddrExtCnt = LdExuCnt + StaCnt
71c590fb32Scz4e  val MemVExuCnt = VlduCnt + VstuCnt
72c590fb32Scz4e
73c590fb32Scz4e  val AtomicWBPort   = 0
74c590fb32Scz4e  val MisalignWBPort = 1
75c590fb32Scz4e  val UncacheWBPort  = 2
76c590fb32Scz4e  val NCWBPorts = Seq(1, 2)
77c590fb32Scz4e}
78c590fb32Scz4e
79c590fb32Scz4eabstract class MemBlockBundle(implicit val p: Parameters) extends Bundle with HasMemBlockParameters
80c590fb32Scz4e
81c590fb32Scz4eclass Std(cfg: FuConfig)(implicit p: Parameters) extends FuncUnit(cfg) {
82c590fb32Scz4e  io.in.ready := io.out.ready
83c590fb32Scz4e  io.out.valid := io.in.valid
84c590fb32Scz4e  io.out.bits := 0.U.asTypeOf(io.out.bits)
85c590fb32Scz4e  io.out.bits.res.data := io.in.bits.data.src(0)
86c590fb32Scz4e  io.out.bits.ctrl.robIdx := io.in.bits.ctrl.robIdx
87c590fb32Scz4e}
88c590fb32Scz4e
89c590fb32Scz4eclass ooo_to_mem(implicit p: Parameters) extends MemBlockBundle {
90c590fb32Scz4e  val backendToTopBypass = Flipped(new BackendToTopBundle)
91c590fb32Scz4e
92c590fb32Scz4e  val loadFastMatch = Vec(LdExuCnt, Input(UInt(LdExuCnt.W)))
93c590fb32Scz4e  val loadFastFuOpType = Vec(LdExuCnt, Input(FuOpType()))
94c590fb32Scz4e  val loadFastImm = Vec(LdExuCnt, Input(UInt(12.W)))
95c590fb32Scz4e  val sfence = Input(new SfenceBundle)
96c590fb32Scz4e  val tlbCsr = Input(new TlbCsrBundle)
97c590fb32Scz4e  val lsqio = new Bundle {
98c590fb32Scz4e    val lcommit = Input(UInt(log2Up(CommitWidth + 1).W))
99c590fb32Scz4e    val scommit = Input(UInt(log2Up(CommitWidth + 1).W))
100c590fb32Scz4e    val pendingMMIOld = Input(Bool())
101c590fb32Scz4e    val pendingld = Input(Bool())
102c590fb32Scz4e    val pendingst = Input(Bool())
103c590fb32Scz4e    val pendingVst = Input(Bool())
104c590fb32Scz4e    val commit = Input(Bool())
105c590fb32Scz4e    val pendingPtr = Input(new RobPtr)
106c590fb32Scz4e    val pendingPtrNext = Input(new RobPtr)
107c590fb32Scz4e  }
108c590fb32Scz4e
109c590fb32Scz4e  val isStoreException = Input(Bool())
110c590fb32Scz4e  val isVlsException = Input(Bool())
111c590fb32Scz4e  val csrCtrl = Flipped(new CustomCSRCtrlIO)
112c590fb32Scz4e  val enqLsq = new LsqEnqIO
113c590fb32Scz4e  val flushSb = Input(Bool())
114c590fb32Scz4e
115c590fb32Scz4e  val storePc = Vec(StaCnt, Input(UInt(VAddrBits.W))) // for hw prefetch
116c590fb32Scz4e  val hybridPc = Vec(HyuCnt, Input(UInt(VAddrBits.W))) // for hw prefetch
117c590fb32Scz4e
118c590fb32Scz4e  val issueLda = MixedVec(Seq.fill(LduCnt)(Flipped(DecoupledIO(new MemExuInput))))
119c590fb32Scz4e  val issueSta = MixedVec(Seq.fill(StaCnt)(Flipped(DecoupledIO(new MemExuInput))))
120c590fb32Scz4e  val issueStd = MixedVec(Seq.fill(StdCnt)(Flipped(DecoupledIO(new MemExuInput))))
121c590fb32Scz4e  val issueHya = MixedVec(Seq.fill(HyuCnt)(Flipped(DecoupledIO(new MemExuInput))))
122c590fb32Scz4e  val issueVldu = MixedVec(Seq.fill(VlduCnt)(Flipped(DecoupledIO(new MemExuInput(isVector=true)))))
123c590fb32Scz4e
124c590fb32Scz4e  def issueUops = issueLda ++ issueSta ++ issueStd ++ issueHya ++ issueVldu
125c590fb32Scz4e}
126c590fb32Scz4e
127c590fb32Scz4eclass mem_to_ooo(implicit p: Parameters) extends MemBlockBundle {
128c590fb32Scz4e  val topToBackendBypass = new TopToBackendBundle
129c590fb32Scz4e
130c590fb32Scz4e  val otherFastWakeup = Vec(LdExuCnt, ValidIO(new DynInst))
131c590fb32Scz4e  val lqCancelCnt = Output(UInt(log2Up(VirtualLoadQueueSize + 1).W))
132c590fb32Scz4e  val sqCancelCnt = Output(UInt(log2Up(StoreQueueSize + 1).W))
133c590fb32Scz4e  val sqDeq = Output(UInt(log2Ceil(EnsbufferWidth + 1).W))
134c590fb32Scz4e  val lqDeq = Output(UInt(log2Up(CommitWidth + 1).W))
135c590fb32Scz4e  // used by VLSU issue queue, the vector store would wait all store before it, and the vector load would wait all load
136c590fb32Scz4e  val sqDeqPtr = Output(new SqPtr)
137c590fb32Scz4e  val lqDeqPtr = Output(new LqPtr)
138c590fb32Scz4e  val stIn = Vec(StAddrCnt, ValidIO(new MemExuInput))
139c590fb32Scz4e  val stIssuePtr = Output(new SqPtr())
140c590fb32Scz4e
141c590fb32Scz4e  val memoryViolation = ValidIO(new Redirect)
142c590fb32Scz4e  val sbIsEmpty = Output(Bool())
143c590fb32Scz4e
144c590fb32Scz4e  val lsTopdownInfo = Vec(LdExuCnt, Output(new LsTopdownInfo))
145c590fb32Scz4e
146c590fb32Scz4e  val lsqio = new Bundle {
147c590fb32Scz4e    val vaddr = Output(UInt(XLEN.W))
148c590fb32Scz4e    val vstart = Output(UInt((log2Up(VLEN) + 1).W))
149c590fb32Scz4e    val vl = Output(UInt((log2Up(VLEN) + 1).W))
150c590fb32Scz4e    val gpaddr = Output(UInt(XLEN.W))
151c590fb32Scz4e    val isForVSnonLeafPTE = Output(Bool())
152c590fb32Scz4e    val mmio = Output(Vec(LoadPipelineWidth, Bool()))
153c590fb32Scz4e    val uop = Output(Vec(LoadPipelineWidth, new DynInst))
154c590fb32Scz4e    val lqCanAccept = Output(Bool())
155c590fb32Scz4e    val sqCanAccept = Output(Bool())
156c590fb32Scz4e  }
157c590fb32Scz4e
158c590fb32Scz4e  val storeDebugInfo = Vec(EnsbufferWidth, new Bundle {
159c590fb32Scz4e    val robidx = Output(new RobPtr)
160c590fb32Scz4e    val pc     = Input(UInt(VAddrBits.W))
161c590fb32Scz4e  })
162c590fb32Scz4e
163c590fb32Scz4e  val writebackLda = Vec(LduCnt, DecoupledIO(new MemExuOutput))
164c590fb32Scz4e  val writebackSta = Vec(StaCnt, DecoupledIO(new MemExuOutput))
165c590fb32Scz4e  val writebackStd = Vec(StdCnt, DecoupledIO(new MemExuOutput))
166c590fb32Scz4e  val writebackHyuLda = Vec(HyuCnt, DecoupledIO(new MemExuOutput))
167c590fb32Scz4e  val writebackHyuSta = Vec(HyuCnt, DecoupledIO(new MemExuOutput))
168c590fb32Scz4e  val writebackVldu = Vec(VlduCnt, DecoupledIO(new MemExuOutput(isVector = true)))
169c590fb32Scz4e  def writeBack: Seq[DecoupledIO[MemExuOutput]] = {
170c590fb32Scz4e    writebackSta ++
171c590fb32Scz4e      writebackHyuLda ++ writebackHyuSta ++
172c590fb32Scz4e      writebackLda ++
173c590fb32Scz4e      writebackVldu ++
174c590fb32Scz4e      writebackStd
175c590fb32Scz4e  }
176c590fb32Scz4e
177c590fb32Scz4e  val ldaIqFeedback = Vec(LduCnt, new MemRSFeedbackIO)
178c590fb32Scz4e  val staIqFeedback = Vec(StaCnt, new MemRSFeedbackIO)
179c590fb32Scz4e  val hyuIqFeedback = Vec(HyuCnt, new MemRSFeedbackIO)
180c590fb32Scz4e  val vstuIqFeedback= Vec(VstuCnt, new MemRSFeedbackIO(isVector = true))
181c590fb32Scz4e  val vlduIqFeedback= Vec(VlduCnt, new MemRSFeedbackIO(isVector = true))
182c590fb32Scz4e  val ldCancel = Vec(backendParams.LdExuCnt, new LoadCancelIO)
183c590fb32Scz4e  val wakeup = Vec(backendParams.LdExuCnt, Valid(new DynInst))
184c590fb32Scz4e
185c590fb32Scz4e  val s3_delayed_load_error = Vec(LdExuCnt, Output(Bool()))
186c590fb32Scz4e}
187c590fb32Scz4e
188c590fb32Scz4eclass MemCoreTopDownIO extends Bundle {
189c590fb32Scz4e  val robHeadMissInDCache = Output(Bool())
190c590fb32Scz4e  val robHeadTlbReplay = Output(Bool())
191c590fb32Scz4e  val robHeadTlbMiss = Output(Bool())
192c590fb32Scz4e  val robHeadLoadVio = Output(Bool())
193c590fb32Scz4e  val robHeadLoadMSHR = Output(Bool())
194c590fb32Scz4e}
195c590fb32Scz4e
196c590fb32Scz4eclass fetch_to_mem(implicit p: Parameters) extends XSBundle{
197c590fb32Scz4e  val itlb = Flipped(new TlbPtwIO())
198c590fb32Scz4e}
199c590fb32Scz4e
200c590fb32Scz4e// triple buffer applied in i-mmio path (two at MemBlock, one at L2Top)
201c590fb32Scz4eclass InstrUncacheBuffer()(implicit p: Parameters) extends LazyModule with HasInstrMMIOConst {
202c590fb32Scz4e  val node = new TLBufferNode(BufferParams.default, BufferParams.default, BufferParams.default, BufferParams.default, BufferParams.default)
203c590fb32Scz4e  lazy val module = new InstrUncacheBufferImpl
204c590fb32Scz4e
205c590fb32Scz4e  class InstrUncacheBufferImpl extends LazyModuleImp(this) {
206c590fb32Scz4e    (node.in zip node.out) foreach { case ((in, edgeIn), (out, edgeOut)) =>
207c590fb32Scz4e      out.a <> BufferParams.default(BufferParams.default(in.a))
208c590fb32Scz4e      in.d <> BufferParams.default(BufferParams.default(out.d))
209c590fb32Scz4e
210c590fb32Scz4e      // only a.valid, a.ready, a.address can change
211c590fb32Scz4e      // hoping that the rest would be optimized to keep MemBlock port unchanged after adding buffer
212c590fb32Scz4e      out.a.bits.data := 0.U
213c590fb32Scz4e      out.a.bits.mask := Fill(mmioBusBytes, 1.U(1.W))
214c590fb32Scz4e      out.a.bits.opcode := 4.U // Get
215c590fb32Scz4e      out.a.bits.size := log2Ceil(mmioBusBytes).U
216c590fb32Scz4e      out.a.bits.source := 0.U
217c590fb32Scz4e    }
218c590fb32Scz4e  }
219c590fb32Scz4e}
220c590fb32Scz4e
221c590fb32Scz4e// triple buffer applied in L1I$-L2 path (two at MemBlock, one at L2Top)
222c590fb32Scz4eclass ICacheBuffer()(implicit p: Parameters) extends LazyModule {
223c590fb32Scz4e  val node = new TLBufferNode(BufferParams.default, BufferParams.default, BufferParams.default, BufferParams.default, BufferParams.default)
224c590fb32Scz4e  lazy val module = new ICacheBufferImpl
225c590fb32Scz4e
226c590fb32Scz4e  class ICacheBufferImpl extends LazyModuleImp(this) {
227c590fb32Scz4e    (node.in zip node.out) foreach { case ((in, edgeIn), (out, edgeOut)) =>
228c590fb32Scz4e      out.a <> BufferParams.default(BufferParams.default(in.a))
229c590fb32Scz4e      in.d <> BufferParams.default(BufferParams.default(out.d))
230c590fb32Scz4e    }
231c590fb32Scz4e  }
232c590fb32Scz4e}
233c590fb32Scz4e
234c590fb32Scz4eclass ICacheCtrlBuffer()(implicit p: Parameters) extends LazyModule {
235c590fb32Scz4e  val node = new TLBufferNode(BufferParams.default, BufferParams.default, BufferParams.default, BufferParams.default, BufferParams.default)
236c590fb32Scz4e  lazy val module = new ICacheCtrlBufferImpl
237c590fb32Scz4e
238c590fb32Scz4e  class ICacheCtrlBufferImpl extends LazyModuleImp(this) {
239c590fb32Scz4e    (node.in zip node.out) foreach { case ((in, edgeIn), (out, edgeOut)) =>
240c590fb32Scz4e      out.a <> BufferParams.default(BufferParams.default(in.a))
241c590fb32Scz4e      in.d <> BufferParams.default(BufferParams.default(out.d))
242c590fb32Scz4e    }
243c590fb32Scz4e  }
244c590fb32Scz4e}
245c590fb32Scz4e
246c590fb32Scz4e// Frontend bus goes through MemBlock
247c590fb32Scz4eclass FrontendBridge()(implicit p: Parameters) extends LazyModule {
248c590fb32Scz4e  val icache_node = LazyModule(new ICacheBuffer()).suggestName("icache").node// to keep IO port name
249c590fb32Scz4e  val icachectrl_node = LazyModule(new ICacheCtrlBuffer()).suggestName("icachectrl").node
250c590fb32Scz4e  val instr_uncache_node = LazyModule(new InstrUncacheBuffer()).suggestName("instr_uncache").node
251c590fb32Scz4e  lazy val module = new LazyModuleImp(this) {
252c590fb32Scz4e  }
253c590fb32Scz4e}
254c590fb32Scz4e
255c590fb32Scz4eclass MemBlockInlined()(implicit p: Parameters) extends LazyModule
256c590fb32Scz4e  with HasXSParameter {
257c590fb32Scz4e  override def shouldBeInlined: Boolean = true
258c590fb32Scz4e
259c590fb32Scz4e  val dcache = LazyModule(new DCacheWrapper())
260c590fb32Scz4e  val uncache = LazyModule(new Uncache())
261c590fb32Scz4e  val uncache_port = TLTempNode()
262c590fb32Scz4e  val uncache_xbar = TLXbar()
263c590fb32Scz4e  val ptw = LazyModule(new L2TLBWrapper())
264c590fb32Scz4e  val ptw_to_l2_buffer = if (!coreParams.softPTW) LazyModule(new TLBuffer) else null
265c590fb32Scz4e  val l1d_to_l2_buffer = if (coreParams.dcacheParametersOpt.nonEmpty) LazyModule(new TLBuffer) else null
266c590fb32Scz4e  val dcache_port = TLNameNode("dcache_client") // to keep dcache-L2 port name
267c590fb32Scz4e  val l2_pf_sender_opt = coreParams.prefetcher.map(_ =>
268c590fb32Scz4e    BundleBridgeSource(() => new PrefetchRecv)
269c590fb32Scz4e  )
270c590fb32Scz4e  val l3_pf_sender_opt = if (p(SoCParamsKey).L3CacheParamsOpt.nonEmpty) coreParams.prefetcher.map(_ =>
271c590fb32Scz4e    BundleBridgeSource(() => new huancun.PrefetchRecv)
272c590fb32Scz4e  ) else None
273c590fb32Scz4e  val frontendBridge = LazyModule(new FrontendBridge)
274c590fb32Scz4e  // interrupt sinks
275c590fb32Scz4e  val clint_int_sink = IntSinkNode(IntSinkPortSimple(1, 2))
276c590fb32Scz4e  val debug_int_sink = IntSinkNode(IntSinkPortSimple(1, 1))
277c590fb32Scz4e  val plic_int_sink = IntSinkNode(IntSinkPortSimple(2, 1))
278c590fb32Scz4e  val nmi_int_sink = IntSinkNode(IntSinkPortSimple(1, (new NonmaskableInterruptIO).elements.size))
27976cb49abScz4e  val beu_local_int_sink = IntSinkNode(IntSinkPortSimple(1, 1))
280c590fb32Scz4e
281c590fb32Scz4e  if (!coreParams.softPTW) {
282c590fb32Scz4e    ptw_to_l2_buffer.node := ptw.node
283c590fb32Scz4e  }
284c590fb32Scz4e  uncache_xbar := TLBuffer() := uncache.clientNode
285c590fb32Scz4e  if (dcache.uncacheNode.isDefined) {
286c590fb32Scz4e    dcache.uncacheNode.get := TLBuffer.chainNode(2) := uncache_xbar
287c590fb32Scz4e  }
288c590fb32Scz4e  uncache_port := TLBuffer.chainNode(2) := uncache_xbar
289c590fb32Scz4e
290c590fb32Scz4e  lazy val module = new MemBlockInlinedImp(this)
291c590fb32Scz4e}
292c590fb32Scz4e
293c590fb32Scz4eclass MemBlockInlinedImp(outer: MemBlockInlined) extends LazyModuleImp(outer)
294c590fb32Scz4e  with HasXSParameter
295c590fb32Scz4e  with HasFPUParameters
296c590fb32Scz4e  with HasPerfEvents
2978cfc24b2STang Haojin  with HasSoCParameter
298c590fb32Scz4e  with HasL1PrefetchSourceParameter
299c590fb32Scz4e  with HasCircularQueuePtrHelper
300c590fb32Scz4e  with HasMemBlockParameters
301c590fb32Scz4e  with HasTlbConst
302c590fb32Scz4e  with SdtrigExt
303c590fb32Scz4e{
304c590fb32Scz4e  val io = IO(new Bundle {
305c590fb32Scz4e    val hartId = Input(UInt(hartIdLen.W))
306c590fb32Scz4e    val redirect = Flipped(ValidIO(new Redirect))
307c590fb32Scz4e
308c590fb32Scz4e    val ooo_to_mem = new ooo_to_mem
309c590fb32Scz4e    val mem_to_ooo = new mem_to_ooo
310c590fb32Scz4e    val fetch_to_mem = new fetch_to_mem
311c590fb32Scz4e
312c590fb32Scz4e    val ifetchPrefetch = Vec(LduCnt, ValidIO(new SoftIfetchPrefetchBundle))
313c590fb32Scz4e
314c590fb32Scz4e    // misc
315c590fb32Scz4e    val error = ValidIO(new L1CacheErrorInfo)
316c590fb32Scz4e    val memInfo = new Bundle {
317c590fb32Scz4e      val sqFull = Output(Bool())
318c590fb32Scz4e      val lqFull = Output(Bool())
319c590fb32Scz4e      val dcacheMSHRFull = Output(Bool())
320c590fb32Scz4e    }
321c590fb32Scz4e    val debug_ls = new DebugLSIO
322c590fb32Scz4e    val l2_hint = Input(Valid(new L2ToL1Hint()))
323c590fb32Scz4e    val l2PfqBusy = Input(Bool())
324c590fb32Scz4e    val l2_tlb_req = Flipped(new TlbRequestIO(nRespDups = 2))
325c590fb32Scz4e    val l2_pmp_resp = new PMPRespBundle
326c590fb32Scz4e    val l2_flush_done = Input(Bool())
327c590fb32Scz4e
328c590fb32Scz4e    val debugTopDown = new Bundle {
329c590fb32Scz4e      val robHeadVaddr = Flipped(Valid(UInt(VAddrBits.W)))
330c590fb32Scz4e      val toCore = new MemCoreTopDownIO
331c590fb32Scz4e    }
332c590fb32Scz4e    val debugRolling = Flipped(new RobDebugRollingIO)
333c590fb32Scz4e
334c590fb32Scz4e    // All the signals from/to frontend/backend to/from bus will go through MemBlock
335c590fb32Scz4e    val fromTopToBackend = Input(new Bundle {
3368cfc24b2STang Haojin      val msiInfo   = ValidIO(UInt(soc.IMSICParams.MSI_INFO_WIDTH.W))
337c590fb32Scz4e      val clintTime = ValidIO(UInt(64.W))
338c590fb32Scz4e    })
339c590fb32Scz4e    val inner_hartId = Output(UInt(hartIdLen.W))
340c590fb32Scz4e    val inner_reset_vector = Output(UInt(PAddrBits.W))
341c590fb32Scz4e    val outer_reset_vector = Input(UInt(PAddrBits.W))
342c590fb32Scz4e    val outer_cpu_halt = Output(Bool())
343c590fb32Scz4e    val outer_l2_flush_en = Output(Bool())
344c590fb32Scz4e    val outer_power_down_en = Output(Bool())
345c590fb32Scz4e    val outer_cpu_critical_error = Output(Bool())
3468cfc24b2STang Haojin    val outer_msi_ack = Output(Bool())
347c590fb32Scz4e    val inner_beu_errors_icache = Input(new L1BusErrorUnitInfo)
348c590fb32Scz4e    val outer_beu_errors_icache = Output(new L1BusErrorUnitInfo)
349c590fb32Scz4e    val inner_hc_perfEvents = Output(Vec(numPCntHc * coreParams.L2NBanks + 1, new PerfEvent))
350c590fb32Scz4e    val outer_hc_perfEvents = Input(Vec(numPCntHc * coreParams.L2NBanks + 1, new PerfEvent))
351c590fb32Scz4e
352c590fb32Scz4e    // reset signals of frontend & backend are generated in memblock
353c590fb32Scz4e    val reset_backend = Output(Reset())
354c590fb32Scz4e    // Reset singal from frontend.
355c590fb32Scz4e    val resetInFrontendBypass = new Bundle{
356c590fb32Scz4e      val fromFrontend = Input(Bool())
357c590fb32Scz4e      val toL2Top      = Output(Bool())
358c590fb32Scz4e    }
359c590fb32Scz4e    val traceCoreInterfaceBypass = new Bundle{
360c590fb32Scz4e      val fromBackend = Flipped(new TraceCoreInterface(hasOffset = true))
361c590fb32Scz4e      val toL2Top     = new TraceCoreInterface
362c590fb32Scz4e    }
363c590fb32Scz4e
364c590fb32Scz4e    val topDownInfo = new Bundle {
365c590fb32Scz4e      val fromL2Top = Input(new TopDownFromL2Top)
366c590fb32Scz4e      val toBackend = Flipped(new TopDownInfo)
367c590fb32Scz4e    }
368602aa9f1Scz4e    val sramTestBypass = new Bundle() {
369602aa9f1Scz4e      val fromL2Top = new Bundle() {
370602aa9f1Scz4e        val mbist      = Option.when(hasMbist)(Input(new SramMbistBundle))
371602aa9f1Scz4e        val mbistReset = Option.when(hasMbist)(Input(new DFTResetSignals()))
372602aa9f1Scz4e        val sramCtl    = Option.when(hasSramCtl)(Input(UInt(64.W)))
373602aa9f1Scz4e      }
374602aa9f1Scz4e      val toFrontend = new Bundle() {
375602aa9f1Scz4e        val mbist      = Option.when(hasMbist)(Output(new SramMbistBundle))
376602aa9f1Scz4e        val mbistReset = Option.when(hasMbist)(Output(new DFTResetSignals()))
377602aa9f1Scz4e        val sramCtl    = Option.when(hasSramCtl)(Output(UInt(64.W)))
378602aa9f1Scz4e      }
379602aa9f1Scz4e      val toBackend = new Bundle() {
380602aa9f1Scz4e        val mbist      = Option.when(hasMbist)(Output(new SramMbistBundle))
381602aa9f1Scz4e        val mbistReset = Option.when(hasMbist)(Output(new DFTResetSignals()))
382602aa9f1Scz4e      }
383602aa9f1Scz4e    }
384c590fb32Scz4e  })
385c590fb32Scz4e
386*1592abd1SYan Xu  io.mem_to_ooo.writeBack.zipWithIndex.foreach{ case (wb, i) =>
387*1592abd1SYan Xu    PerfCCT.updateInstPos(wb.bits.uop.debug_seqNum, PerfCCT.InstPos.AtBypassVal.id.U, wb.valid, clock, reset)
388*1592abd1SYan Xu  }
389*1592abd1SYan Xu
390c590fb32Scz4e  dontTouch(io.inner_hartId)
391c590fb32Scz4e  dontTouch(io.inner_reset_vector)
392c590fb32Scz4e  dontTouch(io.outer_reset_vector)
393c590fb32Scz4e  dontTouch(io.outer_cpu_halt)
394c590fb32Scz4e  dontTouch(io.outer_l2_flush_en)
395c590fb32Scz4e  dontTouch(io.outer_power_down_en)
396c590fb32Scz4e  dontTouch(io.outer_cpu_critical_error)
397c590fb32Scz4e  dontTouch(io.inner_beu_errors_icache)
398c590fb32Scz4e  dontTouch(io.outer_beu_errors_icache)
399c590fb32Scz4e  dontTouch(io.inner_hc_perfEvents)
400c590fb32Scz4e  dontTouch(io.outer_hc_perfEvents)
401c590fb32Scz4e
402c590fb32Scz4e  val redirect = RegNextWithEnable(io.redirect)
403c590fb32Scz4e
404c590fb32Scz4e  private val dcache = outer.dcache.module
405c590fb32Scz4e  val uncache = outer.uncache.module
406c590fb32Scz4e
407c590fb32Scz4e  //val delayedDcacheRefill = RegNext(dcache.io.lsu.lsq)
408c590fb32Scz4e
409c590fb32Scz4e  val csrCtrl = DelayN(io.ooo_to_mem.csrCtrl, 2)
410c590fb32Scz4e  dcache.io.l2_pf_store_only := RegNext(io.ooo_to_mem.csrCtrl.pf_ctrl.l2_pf_store_only, false.B)
411c590fb32Scz4e  io.error <> DelayNWithValid(dcache.io.error, 2)
412c590fb32Scz4e  when(!csrCtrl.cache_error_enable){
413c590fb32Scz4e    io.error.bits.report_to_beu := false.B
414c590fb32Scz4e    io.error.valid := false.B
415c590fb32Scz4e  }
416c590fb32Scz4e
417c590fb32Scz4e  val loadUnits = Seq.fill(LduCnt)(Module(new LoadUnit))
418c590fb32Scz4e  val storeUnits = Seq.fill(StaCnt)(Module(new StoreUnit))
419c590fb32Scz4e  val stdExeUnits = Seq.fill(StdCnt)(Module(new MemExeUnit(backendParams.memSchdParams.get.issueBlockParams.find(_.StdCnt != 0).get.exuBlockParams.head)))
420c590fb32Scz4e  val hybridUnits = Seq.fill(HyuCnt)(Module(new HybridUnit)) // Todo: replace it with HybridUnit
421c590fb32Scz4e  val stData = stdExeUnits.map(_.io.out)
422c590fb32Scz4e  val exeUnits = loadUnits ++ storeUnits
423c590fb32Scz4e
424c590fb32Scz4e  // The number of vector load/store units is decoupled with the number of load/store units
425c590fb32Scz4e  val vlSplit = Seq.fill(VlduCnt)(Module(new VLSplitImp))
426c590fb32Scz4e  val vsSplit = Seq.fill(VstuCnt)(Module(new VSSplitImp))
427c590fb32Scz4e  val vlMergeBuffer = Module(new VLMergeBufferImp)
428c590fb32Scz4e  val vsMergeBuffer = Seq.fill(VstuCnt)(Module(new VSMergeBufferImp))
429c590fb32Scz4e  val vSegmentUnit  = Module(new VSegmentUnit)
430c590fb32Scz4e  val vfofBuffer    = Module(new VfofBuffer)
431c590fb32Scz4e
432c590fb32Scz4e  // misalign Buffer
433c590fb32Scz4e  val loadMisalignBuffer = Module(new LoadMisalignBuffer)
434c590fb32Scz4e  val storeMisalignBuffer = Module(new StoreMisalignBuffer)
435c590fb32Scz4e
436c590fb32Scz4e  val l1_pf_req = Wire(Decoupled(new L1PrefetchReq()))
437c590fb32Scz4e  dcache.io.sms_agt_evict_req.ready := false.B
438c590fb32Scz4e  val prefetcherOpt: Option[BasePrefecher] = coreParams.prefetcher.map {
439c590fb32Scz4e    case _: SMSParams =>
440c590fb32Scz4e      val sms = Module(new SMSPrefetcher())
441c590fb32Scz4e      sms.io_agt_en := GatedRegNextN(io.ooo_to_mem.csrCtrl.pf_ctrl.l1D_pf_enable_agt, 2, Some(false.B))
442c590fb32Scz4e      sms.io_pht_en := GatedRegNextN(io.ooo_to_mem.csrCtrl.pf_ctrl.l1D_pf_enable_pht, 2, Some(false.B))
443c590fb32Scz4e      sms.io_act_threshold := GatedRegNextN(io.ooo_to_mem.csrCtrl.pf_ctrl.l1D_pf_active_threshold, 2, Some(12.U))
444c590fb32Scz4e      sms.io_act_stride := GatedRegNextN(io.ooo_to_mem.csrCtrl.pf_ctrl.l1D_pf_active_stride, 2, Some(30.U))
445c590fb32Scz4e      sms.io_stride_en := false.B
446c590fb32Scz4e      sms.io_dcache_evict <> dcache.io.sms_agt_evict_req
4474b2c87baS梁森 Liang Sen      val mbistSmsPl = MbistPipeline.PlaceMbistPipeline(1, "MbistPipeSms", hasMbist)
448c590fb32Scz4e      sms
449c590fb32Scz4e  }
450c590fb32Scz4e  prefetcherOpt.foreach{ pf => pf.io.l1_req.ready := false.B }
451c590fb32Scz4e  val hartId = p(XSCoreParamsKey).HartId
452c590fb32Scz4e  val l1PrefetcherOpt: Option[BasePrefecher] = coreParams.prefetcher.map {
453c590fb32Scz4e    case _ =>
454c590fb32Scz4e      val l1Prefetcher = Module(new L1Prefetcher())
4559db05eaeScz4e      val enableL1StreamPrefetcher = Constantin.createRecord(s"enableL1StreamPrefetcher$hartId", initValue = true)
4569db05eaeScz4e      l1Prefetcher.io.enable := enableL1StreamPrefetcher &&
4579db05eaeScz4e        GatedRegNextN(io.ooo_to_mem.csrCtrl.pf_ctrl.l1D_pf_enable, 2, Some(false.B))
458c590fb32Scz4e      l1Prefetcher.pf_ctrl <> dcache.io.pf_ctrl
459c590fb32Scz4e      l1Prefetcher.l2PfqBusy := io.l2PfqBusy
460c590fb32Scz4e
461c590fb32Scz4e      // stride will train on miss or prefetch hit
462c590fb32Scz4e      for (i <- 0 until LduCnt) {
463c590fb32Scz4e        val source = loadUnits(i).io.prefetch_train_l1
464c590fb32Scz4e        l1Prefetcher.stride_train(i).valid := source.valid && source.bits.isFirstIssue && (
465c590fb32Scz4e          source.bits.miss || isFromStride(source.bits.meta_prefetch)
466c590fb32Scz4e        )
467c590fb32Scz4e        l1Prefetcher.stride_train(i).bits := source.bits
468c590fb32Scz4e        val loadPc = RegNext(io.ooo_to_mem.issueLda(i).bits.uop.pc) // for s1
469c590fb32Scz4e        l1Prefetcher.stride_train(i).bits.uop.pc := Mux(
470c590fb32Scz4e          loadUnits(i).io.s2_ptr_chasing,
471c590fb32Scz4e          RegEnable(loadPc, loadUnits(i).io.s2_prefetch_spec),
472c590fb32Scz4e          RegEnable(RegEnable(loadPc, loadUnits(i).io.s1_prefetch_spec), loadUnits(i).io.s2_prefetch_spec)
473c590fb32Scz4e        )
474c590fb32Scz4e      }
475c590fb32Scz4e      for (i <- 0 until HyuCnt) {
476c590fb32Scz4e        val source = hybridUnits(i).io.prefetch_train_l1
477c590fb32Scz4e        l1Prefetcher.stride_train.drop(LduCnt)(i).valid := source.valid && source.bits.isFirstIssue && (
478c590fb32Scz4e          source.bits.miss || isFromStride(source.bits.meta_prefetch)
479c590fb32Scz4e        )
480c590fb32Scz4e        l1Prefetcher.stride_train.drop(LduCnt)(i).bits := source.bits
481c590fb32Scz4e        l1Prefetcher.stride_train.drop(LduCnt)(i).bits.uop.pc := Mux(
482c590fb32Scz4e          hybridUnits(i).io.ldu_io.s2_ptr_chasing,
483c590fb32Scz4e          RegNext(io.ooo_to_mem.hybridPc(i)),
484c590fb32Scz4e          RegNext(RegNext(io.ooo_to_mem.hybridPc(i)))
485c590fb32Scz4e        )
486c590fb32Scz4e      }
487c590fb32Scz4e      l1Prefetcher
488c590fb32Scz4e  }
489c590fb32Scz4e  // load prefetch to l1 Dcache
490c590fb32Scz4e  l1PrefetcherOpt match {
491c590fb32Scz4e    case Some(pf) => l1_pf_req <> Pipeline(in = pf.io.l1_req, depth = 1, pipe = false, name = Some("pf_queue_to_ldu_reg"))
492c590fb32Scz4e    case None =>
493c590fb32Scz4e      l1_pf_req.valid := false.B
494c590fb32Scz4e      l1_pf_req.bits := DontCare
495c590fb32Scz4e  }
496c590fb32Scz4e  val pf_train_on_hit = RegNextN(io.ooo_to_mem.csrCtrl.pf_ctrl.l1D_pf_train_on_hit, 2, Some(true.B))
497c590fb32Scz4e
498c590fb32Scz4e  loadUnits.zipWithIndex.map(x => x._1.suggestName("LoadUnit_"+x._2))
499c590fb32Scz4e  storeUnits.zipWithIndex.map(x => x._1.suggestName("StoreUnit_"+x._2))
500c590fb32Scz4e  hybridUnits.zipWithIndex.map(x => x._1.suggestName("HybridUnit_"+x._2))
501c590fb32Scz4e  val atomicsUnit = Module(new AtomicsUnit)
502c590fb32Scz4e
503c590fb32Scz4e
504c590fb32Scz4e  val ldaExeWbReqs = Wire(Vec(LduCnt, Decoupled(new MemExuOutput)))
505c590fb32Scz4e  // atomicsUnit will overwrite the source from ldu if it is about to writeback
506c590fb32Scz4e  val atomicWritebackOverride = Mux(
507c590fb32Scz4e    atomicsUnit.io.out.valid,
508c590fb32Scz4e    atomicsUnit.io.out.bits,
509c590fb32Scz4e    loadUnits(AtomicWBPort).io.ldout.bits
510c590fb32Scz4e  )
511c590fb32Scz4e  ldaExeWbReqs(AtomicWBPort).valid := atomicsUnit.io.out.valid || loadUnits(AtomicWBPort).io.ldout.valid
512c590fb32Scz4e  ldaExeWbReqs(AtomicWBPort).bits  := atomicWritebackOverride
513c590fb32Scz4e  atomicsUnit.io.out.ready := ldaExeWbReqs(AtomicWBPort).ready
514c590fb32Scz4e  loadUnits(AtomicWBPort).io.ldout.ready := ldaExeWbReqs(AtomicWBPort).ready
515c590fb32Scz4e
516c590fb32Scz4e  val st_data_atomics = Seq.tabulate(StdCnt)(i =>
517c590fb32Scz4e    stData(i).valid && FuType.storeIsAMO(stData(i).bits.uop.fuType)
518c590fb32Scz4e  )
519c590fb32Scz4e
520c590fb32Scz4e  // misalignBuffer will overwrite the source from ldu if it is about to writeback
521c590fb32Scz4e  val misalignWritebackOverride = Mux(
522c590fb32Scz4e    loadUnits(MisalignWBPort).io.ldout.valid,
523c590fb32Scz4e    loadUnits(MisalignWBPort).io.ldout.bits,
524c590fb32Scz4e    loadMisalignBuffer.io.writeBack.bits
525c590fb32Scz4e  )
526c590fb32Scz4e  ldaExeWbReqs(MisalignWBPort).valid    := loadMisalignBuffer.io.writeBack.valid || loadUnits(MisalignWBPort).io.ldout.valid
527c590fb32Scz4e  ldaExeWbReqs(MisalignWBPort).bits     := misalignWritebackOverride
528c590fb32Scz4e  loadMisalignBuffer.io.writeBack.ready := ldaExeWbReqs(MisalignWBPort).ready && !loadUnits(MisalignWBPort).io.ldout.valid
529c590fb32Scz4e  loadMisalignBuffer.io.loadOutValid    := loadUnits(MisalignWBPort).io.ldout.valid
530c590fb32Scz4e  loadMisalignBuffer.io.loadVecOutValid := loadUnits(MisalignWBPort).io.vecldout.valid
531c590fb32Scz4e  loadUnits(MisalignWBPort).io.ldout.ready := ldaExeWbReqs(MisalignWBPort).ready
532c590fb32Scz4e  ldaExeWbReqs(MisalignWBPort).bits.isFromLoadUnit := loadUnits(MisalignWBPort).io.ldout.bits.isFromLoadUnit || loadMisalignBuffer.io.writeBack.valid
533c590fb32Scz4e
534c590fb32Scz4e  // loadUnit will overwrite the source from uncache if it is about to writeback
535c590fb32Scz4e  ldaExeWbReqs(UncacheWBPort) <> loadUnits(UncacheWBPort).io.ldout
536c590fb32Scz4e  io.mem_to_ooo.writebackLda <> ldaExeWbReqs
537c590fb32Scz4e  io.mem_to_ooo.writebackSta <> storeUnits.map(_.io.stout)
538c590fb32Scz4e  io.mem_to_ooo.writebackStd.zip(stdExeUnits).foreach {x =>
539c590fb32Scz4e    x._1.bits  := x._2.io.out.bits
540c590fb32Scz4e    // AMOs do not need to write back std now.
541c590fb32Scz4e    x._1.valid := x._2.io.out.fire && !FuType.storeIsAMO(x._2.io.out.bits.uop.fuType)
542c590fb32Scz4e  }
543c590fb32Scz4e  io.mem_to_ooo.writebackHyuLda <> hybridUnits.map(_.io.ldout)
544c590fb32Scz4e  io.mem_to_ooo.writebackHyuSta <> hybridUnits.map(_.io.stout)
545c590fb32Scz4e  io.mem_to_ooo.otherFastWakeup := DontCare
546c590fb32Scz4e  io.mem_to_ooo.otherFastWakeup.drop(HyuCnt).take(LduCnt).zip(loadUnits.map(_.io.fast_uop)).foreach{case(a,b)=> a := b}
547c590fb32Scz4e  io.mem_to_ooo.otherFastWakeup.take(HyuCnt).zip(hybridUnits.map(_.io.ldu_io.fast_uop)).foreach{case(a,b)=> a:=b}
548c590fb32Scz4e  val stOut = io.mem_to_ooo.writebackSta ++ io.mem_to_ooo.writebackHyuSta
549c590fb32Scz4e
550c590fb32Scz4e  // prefetch to l1 req
551c590fb32Scz4e  // Stream's confidence is always 1
552c590fb32Scz4e  // (LduCnt + HyuCnt) l1_pf_reqs ?
553c590fb32Scz4e  loadUnits.foreach(load_unit => {
554c590fb32Scz4e    load_unit.io.prefetch_req.valid <> l1_pf_req.valid
555c590fb32Scz4e    load_unit.io.prefetch_req.bits <> l1_pf_req.bits
556c590fb32Scz4e  })
557c590fb32Scz4e
558c590fb32Scz4e  hybridUnits.foreach(hybrid_unit => {
559c590fb32Scz4e    hybrid_unit.io.ldu_io.prefetch_req.valid <> l1_pf_req.valid
560c590fb32Scz4e    hybrid_unit.io.ldu_io.prefetch_req.bits <> l1_pf_req.bits
561c590fb32Scz4e  })
562c590fb32Scz4e
563c590fb32Scz4e  // NOTE: loadUnits(0) has higher bank conflict and miss queue arb priority than loadUnits(1) and loadUnits(2)
564c590fb32Scz4e  // when loadUnits(1)/loadUnits(2) stage 0 is busy, hw prefetch will never use that pipeline
565c590fb32Scz4e  val LowConfPorts = if (LduCnt == 2) Seq(1) else if (LduCnt == 3) Seq(1, 2) else Seq(0)
566c590fb32Scz4e  LowConfPorts.map{case i => loadUnits(i).io.prefetch_req.bits.confidence := 0.U}
567c590fb32Scz4e  hybridUnits.foreach(hybrid_unit => { hybrid_unit.io.ldu_io.prefetch_req.bits.confidence := 0.U })
568c590fb32Scz4e
569c590fb32Scz4e  val canAcceptHighConfPrefetch = loadUnits.map(_.io.canAcceptHighConfPrefetch) ++
570c590fb32Scz4e                                  hybridUnits.map(_.io.canAcceptLowConfPrefetch)
571c590fb32Scz4e  val canAcceptLowConfPrefetch = loadUnits.map(_.io.canAcceptLowConfPrefetch) ++
572c590fb32Scz4e                                 hybridUnits.map(_.io.canAcceptLowConfPrefetch)
573c590fb32Scz4e  l1_pf_req.ready := (0 until LduCnt + HyuCnt).map{
574c590fb32Scz4e    case i => {
575c590fb32Scz4e      if (LowConfPorts.contains(i)) {
576c590fb32Scz4e        loadUnits(i).io.canAcceptLowConfPrefetch
577c590fb32Scz4e      } else {
578c590fb32Scz4e        Mux(l1_pf_req.bits.confidence === 1.U, canAcceptHighConfPrefetch(i), canAcceptLowConfPrefetch(i))
579c590fb32Scz4e      }
580c590fb32Scz4e    }
581c590fb32Scz4e  }.reduce(_ || _)
582c590fb32Scz4e
583c590fb32Scz4e  // l1 pf fuzzer interface
584c590fb32Scz4e  val DebugEnableL1PFFuzzer = false
585c590fb32Scz4e  if (DebugEnableL1PFFuzzer) {
586c590fb32Scz4e    // l1 pf req fuzzer
587c590fb32Scz4e    val fuzzer = Module(new L1PrefetchFuzzer())
588c590fb32Scz4e    fuzzer.io.vaddr := DontCare
589c590fb32Scz4e    fuzzer.io.paddr := DontCare
590c590fb32Scz4e
591c590fb32Scz4e    // override load_unit prefetch_req
592c590fb32Scz4e    loadUnits.foreach(load_unit => {
593c590fb32Scz4e      load_unit.io.prefetch_req.valid <> fuzzer.io.req.valid
594c590fb32Scz4e      load_unit.io.prefetch_req.bits <> fuzzer.io.req.bits
595c590fb32Scz4e    })
596c590fb32Scz4e
597c590fb32Scz4e    // override hybrid_unit prefetch_req
598c590fb32Scz4e    hybridUnits.foreach(hybrid_unit => {
599c590fb32Scz4e      hybrid_unit.io.ldu_io.prefetch_req.valid <> fuzzer.io.req.valid
600c590fb32Scz4e      hybrid_unit.io.ldu_io.prefetch_req.bits <> fuzzer.io.req.bits
601c590fb32Scz4e    })
602c590fb32Scz4e
603c590fb32Scz4e    fuzzer.io.req.ready := l1_pf_req.ready
604c590fb32Scz4e  }
605c590fb32Scz4e
606c590fb32Scz4e  // TODO: fast load wakeup
607c590fb32Scz4e  val lsq     = Module(new LsqWrapper)
608c590fb32Scz4e  val sbuffer = Module(new Sbuffer)
609c590fb32Scz4e  // if you wants to stress test dcache store, use FakeSbuffer
610c590fb32Scz4e  // val sbuffer = Module(new FakeSbuffer) // out of date now
611c590fb32Scz4e  io.mem_to_ooo.stIssuePtr := lsq.io.issuePtrExt
612c590fb32Scz4e
613c590fb32Scz4e  dcache.io.hartId := io.hartId
614c590fb32Scz4e  lsq.io.hartId := io.hartId
615c590fb32Scz4e  sbuffer.io.hartId := io.hartId
616c590fb32Scz4e  atomicsUnit.io.hartId := io.hartId
617c590fb32Scz4e
618c590fb32Scz4e  dcache.io.lqEmpty := lsq.io.lqEmpty
619c590fb32Scz4e
620c590fb32Scz4e  // load/store prefetch to l2 cache
621c590fb32Scz4e  prefetcherOpt.foreach(sms_pf => {
622c590fb32Scz4e    l1PrefetcherOpt.foreach(l1_pf => {
623c590fb32Scz4e      val sms_pf_to_l2 = DelayNWithValid(sms_pf.io.l2_req, 2)
624c590fb32Scz4e      val l1_pf_to_l2 = DelayNWithValid(l1_pf.io.l2_req, 2)
625c590fb32Scz4e
626c590fb32Scz4e      outer.l2_pf_sender_opt.get.out.head._1.addr_valid := sms_pf_to_l2.valid || l1_pf_to_l2.valid
627c590fb32Scz4e      outer.l2_pf_sender_opt.get.out.head._1.addr := Mux(l1_pf_to_l2.valid, l1_pf_to_l2.bits.addr, sms_pf_to_l2.bits.addr)
628c590fb32Scz4e      outer.l2_pf_sender_opt.get.out.head._1.pf_source := Mux(l1_pf_to_l2.valid, l1_pf_to_l2.bits.source, sms_pf_to_l2.bits.source)
629c590fb32Scz4e      outer.l2_pf_sender_opt.get.out.head._1.l2_pf_en := RegNextN(io.ooo_to_mem.csrCtrl.pf_ctrl.l2_pf_enable, 2, Some(true.B))
630c590fb32Scz4e
631c590fb32Scz4e      sms_pf.io.enable := RegNextN(io.ooo_to_mem.csrCtrl.pf_ctrl.l1D_pf_enable, 2, Some(false.B))
632c590fb32Scz4e
633c590fb32Scz4e      val l2_trace = Wire(new LoadPfDbBundle)
634c590fb32Scz4e      l2_trace.paddr := outer.l2_pf_sender_opt.get.out.head._1.addr
635c590fb32Scz4e      val table = ChiselDB.createTable(s"L2PrefetchTrace$hartId", new LoadPfDbBundle, basicDB = false)
636c590fb32Scz4e      table.log(l2_trace, l1_pf_to_l2.valid, "StreamPrefetchTrace", clock, reset)
637c590fb32Scz4e      table.log(l2_trace, !l1_pf_to_l2.valid && sms_pf_to_l2.valid, "L2PrefetchTrace", clock, reset)
638c590fb32Scz4e
639c590fb32Scz4e      val l1_pf_to_l3 = ValidIODelay(l1_pf.io.l3_req, 4)
640c590fb32Scz4e      outer.l3_pf_sender_opt.foreach(_.out.head._1.addr_valid := l1_pf_to_l3.valid)
641c590fb32Scz4e      outer.l3_pf_sender_opt.foreach(_.out.head._1.addr := l1_pf_to_l3.bits)
642c590fb32Scz4e      outer.l3_pf_sender_opt.foreach(_.out.head._1.l2_pf_en := RegNextN(io.ooo_to_mem.csrCtrl.pf_ctrl.l2_pf_enable, 4, Some(true.B)))
643c590fb32Scz4e
644c590fb32Scz4e      val l3_trace = Wire(new LoadPfDbBundle)
645c590fb32Scz4e      l3_trace.paddr := outer.l3_pf_sender_opt.map(_.out.head._1.addr).getOrElse(0.U)
646c590fb32Scz4e      val l3_table = ChiselDB.createTable(s"L3PrefetchTrace$hartId", new LoadPfDbBundle, basicDB = false)
647c590fb32Scz4e      l3_table.log(l3_trace, l1_pf_to_l3.valid, "StreamPrefetchTrace", clock, reset)
648c590fb32Scz4e
649c590fb32Scz4e      XSPerfAccumulate("prefetch_fire_l2", outer.l2_pf_sender_opt.get.out.head._1.addr_valid)
650c590fb32Scz4e      XSPerfAccumulate("prefetch_fire_l3", outer.l3_pf_sender_opt.map(_.out.head._1.addr_valid).getOrElse(false.B))
651c590fb32Scz4e      XSPerfAccumulate("l1pf_fire_l2", l1_pf_to_l2.valid)
652c590fb32Scz4e      XSPerfAccumulate("sms_fire_l2", !l1_pf_to_l2.valid && sms_pf_to_l2.valid)
653c590fb32Scz4e      XSPerfAccumulate("sms_block_by_l1pf", l1_pf_to_l2.valid && sms_pf_to_l2.valid)
654c590fb32Scz4e    })
655c590fb32Scz4e  })
656c590fb32Scz4e
657c590fb32Scz4e  // ptw
658c590fb32Scz4e  val sfence = RegNext(RegNext(io.ooo_to_mem.sfence))
659c590fb32Scz4e  val tlbcsr = RegNext(RegNext(io.ooo_to_mem.tlbCsr))
660c590fb32Scz4e  private val ptw = outer.ptw.module
661c590fb32Scz4e  private val ptw_to_l2_buffer = outer.ptw_to_l2_buffer.module
662c590fb32Scz4e  private val l1d_to_l2_buffer = outer.l1d_to_l2_buffer.module
663c590fb32Scz4e  ptw.io.hartId := io.hartId
664c590fb32Scz4e  ptw.io.sfence <> sfence
665c590fb32Scz4e  ptw.io.csr.tlb <> tlbcsr
666c590fb32Scz4e  ptw.io.csr.distribute_csr <> csrCtrl.distribute_csr
667c590fb32Scz4e
668c590fb32Scz4e  val perfEventsPTW = if (!coreParams.softPTW) {
669c590fb32Scz4e    ptw.getPerfEvents
670c590fb32Scz4e  } else {
671c590fb32Scz4e    Seq()
672c590fb32Scz4e  }
673c590fb32Scz4e
674c590fb32Scz4e  // dtlb
675c590fb32Scz4e  val dtlb_ld_tlb_ld = Module(new TLBNonBlock(LduCnt + HyuCnt + 1, 2, ldtlbParams))
676c590fb32Scz4e  val dtlb_st_tlb_st = Module(new TLBNonBlock(StaCnt, 1, sttlbParams))
677c590fb32Scz4e  val dtlb_prefetch_tlb_prefetch = Module(new TLBNonBlock(2, 2, pftlbParams))
678c590fb32Scz4e  val dtlb_ld = Seq(dtlb_ld_tlb_ld.io)
679c590fb32Scz4e  val dtlb_st = Seq(dtlb_st_tlb_st.io)
680c590fb32Scz4e  val dtlb_prefetch = Seq(dtlb_prefetch_tlb_prefetch.io)
681c590fb32Scz4e  /* tlb vec && constant variable */
682c590fb32Scz4e  val dtlb = dtlb_ld ++ dtlb_st ++ dtlb_prefetch
683c590fb32Scz4e  val (dtlb_ld_idx, dtlb_st_idx, dtlb_pf_idx) = (0, 1, 2)
684c590fb32Scz4e  val TlbSubSizeVec = Seq(LduCnt + HyuCnt + 1, StaCnt, 2) // (load + hyu + stream pf, store, sms+l2bop)
685c590fb32Scz4e  val DTlbSize = TlbSubSizeVec.sum
686c590fb32Scz4e  val TlbStartVec = TlbSubSizeVec.scanLeft(0)(_ + _).dropRight(1)
687c590fb32Scz4e  val TlbEndVec = TlbSubSizeVec.scanLeft(0)(_ + _).drop(1)
688c590fb32Scz4e
689c590fb32Scz4e  val ptwio = Wire(new VectorTlbPtwIO(DTlbSize))
690c590fb32Scz4e  val dtlb_reqs = dtlb.map(_.requestor).flatten
691c590fb32Scz4e  val dtlb_pmps = dtlb.map(_.pmp).flatten
692c590fb32Scz4e  dtlb.map(_.hartId := io.hartId)
693c590fb32Scz4e  dtlb.map(_.sfence := sfence)
694c590fb32Scz4e  dtlb.map(_.csr := tlbcsr)
695c590fb32Scz4e  dtlb.map(_.flushPipe.map(a => a := false.B)) // non-block doesn't need
696c590fb32Scz4e  dtlb.map(_.redirect := redirect)
697c590fb32Scz4e  if (refillBothTlb) {
698c590fb32Scz4e    require(ldtlbParams.outReplace == sttlbParams.outReplace)
699c590fb32Scz4e    require(ldtlbParams.outReplace == hytlbParams.outReplace)
700c590fb32Scz4e    require(ldtlbParams.outReplace == pftlbParams.outReplace)
701c590fb32Scz4e    require(ldtlbParams.outReplace)
702c590fb32Scz4e
703c590fb32Scz4e    val replace = Module(new TlbReplace(DTlbSize, ldtlbParams))
704c590fb32Scz4e    replace.io.apply_sep(dtlb_ld.map(_.replace) ++ dtlb_st.map(_.replace) ++ dtlb_prefetch.map(_.replace), ptwio.resp.bits.data.s1.entry.tag)
705c590fb32Scz4e  } else {
706c590fb32Scz4e    // TODO: there will be bugs in TlbReplace when outReplace enable, since the order of Hyu is not right.
707c590fb32Scz4e    if (ldtlbParams.outReplace) {
708c590fb32Scz4e      val replace_ld = Module(new TlbReplace(LduCnt + 1, ldtlbParams))
709c590fb32Scz4e      replace_ld.io.apply_sep(dtlb_ld.map(_.replace), ptwio.resp.bits.data.s1.entry.tag)
710c590fb32Scz4e    }
711c590fb32Scz4e    if (hytlbParams.outReplace) {
712c590fb32Scz4e      val replace_hy = Module(new TlbReplace(HyuCnt, hytlbParams))
713c590fb32Scz4e      replace_hy.io.apply_sep(dtlb_ld.map(_.replace), ptwio.resp.bits.data.s1.entry.tag)
714c590fb32Scz4e    }
715c590fb32Scz4e    if (sttlbParams.outReplace) {
716c590fb32Scz4e      val replace_st = Module(new TlbReplace(StaCnt, sttlbParams))
717c590fb32Scz4e      replace_st.io.apply_sep(dtlb_st.map(_.replace), ptwio.resp.bits.data.s1.entry.tag)
718c590fb32Scz4e    }
719c590fb32Scz4e    if (pftlbParams.outReplace) {
720c590fb32Scz4e      val replace_pf = Module(new TlbReplace(2, pftlbParams))
721c590fb32Scz4e      replace_pf.io.apply_sep(dtlb_prefetch.map(_.replace), ptwio.resp.bits.data.s1.entry.tag)
722c590fb32Scz4e    }
723c590fb32Scz4e  }
724c590fb32Scz4e
725c590fb32Scz4e  val ptw_resp_next = RegEnable(ptwio.resp.bits, ptwio.resp.valid)
726c590fb32Scz4e  val ptw_resp_v = RegNext(ptwio.resp.valid && !(sfence.valid && tlbcsr.satp.changed && tlbcsr.vsatp.changed && tlbcsr.hgatp.changed), init = false.B)
727c590fb32Scz4e  ptwio.resp.ready := true.B
728c590fb32Scz4e
729c590fb32Scz4e  val tlbreplay = WireInit(VecInit(Seq.fill(LdExuCnt)(false.B)))
730c590fb32Scz4e  val tlbreplay_reg = GatedValidRegNext(tlbreplay)
731c590fb32Scz4e  val dtlb_ld0_tlbreplay_reg = GatedValidRegNext(dtlb_ld(0).tlbreplay)
732c590fb32Scz4e
733c590fb32Scz4e  if (backendParams.debugEn){ dontTouch(tlbreplay) }
734c590fb32Scz4e
735c590fb32Scz4e  for (i <- 0 until LdExuCnt) {
736c590fb32Scz4e    tlbreplay(i) := dtlb_ld(0).ptw.req(i).valid && ptw_resp_next.vector(0) && ptw_resp_v &&
737c590fb32Scz4e      ptw_resp_next.data.hit(dtlb_ld(0).ptw.req(i).bits.vpn, tlbcsr.satp.asid, tlbcsr.vsatp.asid, tlbcsr.hgatp.vmid, allType = true, ignoreAsid = true)
738c590fb32Scz4e  }
739c590fb32Scz4e
740c590fb32Scz4e  dtlb.flatMap(a => a.ptw.req)
741c590fb32Scz4e    .zipWithIndex
742c590fb32Scz4e    .foreach{ case (tlb, i) =>
743c590fb32Scz4e      tlb.ready := ptwio.req(i).ready
744c590fb32Scz4e      ptwio.req(i).bits := tlb.bits
745c590fb32Scz4e    val vector_hit = if (refillBothTlb) Cat(ptw_resp_next.vector).orR
746c590fb32Scz4e      else if (i < TlbEndVec(dtlb_ld_idx)) Cat(ptw_resp_next.vector.slice(TlbStartVec(dtlb_ld_idx), TlbEndVec(dtlb_ld_idx))).orR
747c590fb32Scz4e      else if (i < TlbEndVec(dtlb_st_idx)) Cat(ptw_resp_next.vector.slice(TlbStartVec(dtlb_st_idx), TlbEndVec(dtlb_st_idx))).orR
748c590fb32Scz4e      else                                 Cat(ptw_resp_next.vector.slice(TlbStartVec(dtlb_pf_idx), TlbEndVec(dtlb_pf_idx))).orR
749c590fb32Scz4e    ptwio.req(i).valid := tlb.valid && !(ptw_resp_v && vector_hit && ptw_resp_next.data.hit(tlb.bits.vpn, tlbcsr.satp.asid, tlbcsr.vsatp.asid, tlbcsr.hgatp.vmid, allType = true, ignoreAsid = true))
750c590fb32Scz4e  }
751c590fb32Scz4e  dtlb.foreach(_.ptw.resp.bits := ptw_resp_next.data)
752c590fb32Scz4e  if (refillBothTlb) {
753c590fb32Scz4e    dtlb.foreach(_.ptw.resp.valid := ptw_resp_v && Cat(ptw_resp_next.vector).orR)
754c590fb32Scz4e  } else {
755c590fb32Scz4e    dtlb_ld.foreach(_.ptw.resp.valid := ptw_resp_v && Cat(ptw_resp_next.vector.slice(TlbStartVec(dtlb_ld_idx), TlbEndVec(dtlb_ld_idx))).orR)
756c590fb32Scz4e    dtlb_st.foreach(_.ptw.resp.valid := ptw_resp_v && Cat(ptw_resp_next.vector.slice(TlbStartVec(dtlb_st_idx), TlbEndVec(dtlb_st_idx))).orR)
757c590fb32Scz4e    dtlb_prefetch.foreach(_.ptw.resp.valid := ptw_resp_v && Cat(ptw_resp_next.vector.slice(TlbStartVec(dtlb_pf_idx), TlbEndVec(dtlb_pf_idx))).orR)
758c590fb32Scz4e  }
759c590fb32Scz4e  dtlb_ld.foreach(_.ptw.resp.bits.getGpa := Cat(ptw_resp_next.getGpa.take(LduCnt + HyuCnt + 1)).orR)
760c590fb32Scz4e  dtlb_st.foreach(_.ptw.resp.bits.getGpa := Cat(ptw_resp_next.getGpa.slice(LduCnt + HyuCnt + 1, LduCnt + HyuCnt + 1 + StaCnt)).orR)
761c590fb32Scz4e  dtlb_prefetch.foreach(_.ptw.resp.bits.getGpa := Cat(ptw_resp_next.getGpa.drop(LduCnt + HyuCnt + 1 + StaCnt)).orR)
762c590fb32Scz4e
763c590fb32Scz4e  val dtlbRepeater  = PTWNewFilter(ldtlbParams.fenceDelay, ptwio, ptw.io.tlb(1), sfence, tlbcsr, l2tlbParams.dfilterSize)
764c590fb32Scz4e  val itlbRepeater3 = PTWRepeaterNB(passReady = false, itlbParams.fenceDelay, io.fetch_to_mem.itlb, ptw.io.tlb(0), sfence, tlbcsr)
765c590fb32Scz4e
766c590fb32Scz4e  lsq.io.debugTopDown.robHeadMissInDTlb := dtlbRepeater.io.rob_head_miss_in_tlb
767c590fb32Scz4e
768c590fb32Scz4e  // pmp
769c590fb32Scz4e  val pmp = Module(new PMP())
770c590fb32Scz4e  pmp.io.distribute_csr <> csrCtrl.distribute_csr
771c590fb32Scz4e
772c590fb32Scz4e  val pmp_checkers = Seq.fill(DTlbSize)(Module(new PMPChecker(4, leaveHitMux = true)))
773c590fb32Scz4e  val pmp_check = pmp_checkers.map(_.io)
774c590fb32Scz4e  for ((p,d) <- pmp_check zip dtlb_pmps) {
7758882eb68SXin Tian    if (HasBitmapCheck) {
7768882eb68SXin Tian      p.apply(tlbcsr.mbmc.CMODE.asBool, tlbcsr.priv.dmode, pmp.io.pmp, pmp.io.pma, d)
7778882eb68SXin Tian    } else {
778c590fb32Scz4e      p.apply(tlbcsr.priv.dmode, pmp.io.pmp, pmp.io.pma, d)
7798882eb68SXin Tian    }
780c590fb32Scz4e    require(p.req.bits.size.getWidth == d.bits.size.getWidth)
781c590fb32Scz4e  }
782c590fb32Scz4e
783c590fb32Scz4e  for (i <- 0 until LduCnt) {
784c590fb32Scz4e    io.debug_ls.debugLsInfo(i) := loadUnits(i).io.debug_ls
785c590fb32Scz4e  }
786c590fb32Scz4e  for (i <- 0 until HyuCnt) {
787c590fb32Scz4e    io.debug_ls.debugLsInfo.drop(LduCnt)(i) := hybridUnits(i).io.ldu_io.debug_ls
788c590fb32Scz4e  }
789c590fb32Scz4e  for (i <- 0 until StaCnt) {
790c590fb32Scz4e    io.debug_ls.debugLsInfo.drop(LduCnt + HyuCnt)(i) := storeUnits(i).io.debug_ls
791c590fb32Scz4e  }
792c590fb32Scz4e  for (i <- 0 until HyuCnt) {
793c590fb32Scz4e    io.debug_ls.debugLsInfo.drop(LduCnt + HyuCnt + StaCnt)(i) := hybridUnits(i).io.stu_io.debug_ls
794c590fb32Scz4e  }
795c590fb32Scz4e
796c590fb32Scz4e  io.mem_to_ooo.lsTopdownInfo := loadUnits.map(_.io.lsTopdownInfo) ++ hybridUnits.map(_.io.ldu_io.lsTopdownInfo)
797c590fb32Scz4e
798c590fb32Scz4e  // trigger
799c590fb32Scz4e  val tdata = RegInit(VecInit(Seq.fill(TriggerNum)(0.U.asTypeOf(new MatchTriggerIO))))
800c590fb32Scz4e  val tEnable = RegInit(VecInit(Seq.fill(TriggerNum)(false.B)))
801c590fb32Scz4e  tEnable := csrCtrl.mem_trigger.tEnableVec
802c590fb32Scz4e  when(csrCtrl.mem_trigger.tUpdate.valid) {
803c590fb32Scz4e    tdata(csrCtrl.mem_trigger.tUpdate.bits.addr) := csrCtrl.mem_trigger.tUpdate.bits.tdata
804c590fb32Scz4e  }
805c590fb32Scz4e  val triggerCanRaiseBpExp = csrCtrl.mem_trigger.triggerCanRaiseBpExp
806c590fb32Scz4e  val debugMode = csrCtrl.mem_trigger.debugMode
807c590fb32Scz4e
808c590fb32Scz4e  val backendTriggerTimingVec = VecInit(tdata.map(_.timing))
809c590fb32Scz4e  val backendTriggerChainVec = VecInit(tdata.map(_.chain))
810c590fb32Scz4e
811c590fb32Scz4e  XSDebug(tEnable.asUInt.orR, "Debug Mode: At least one store trigger is enabled\n")
812c590fb32Scz4e  for (j <- 0 until TriggerNum)
813c590fb32Scz4e    PrintTriggerInfo(tEnable(j), tdata(j))
814c590fb32Scz4e
815c590fb32Scz4e  // The segment instruction is executed atomically.
816c590fb32Scz4e  // After the segment instruction directive starts executing, no other instructions should be executed.
817c590fb32Scz4e  val vSegmentFlag = RegInit(false.B)
818c590fb32Scz4e
819c590fb32Scz4e  when(GatedValidRegNext(vSegmentUnit.io.in.fire)) {
820c590fb32Scz4e    vSegmentFlag := true.B
821c590fb32Scz4e  }.elsewhen(GatedValidRegNext(vSegmentUnit.io.uopwriteback.valid)) {
822c590fb32Scz4e    vSegmentFlag := false.B
823c590fb32Scz4e  }
824c590fb32Scz4e
825522c7f99SAnzo  val misalign_allow_spec = RegInit(true.B)
826522c7f99SAnzo  val ldu_rollback_with_misalign_nack = loadUnits.map(ldu =>
827522c7f99SAnzo    ldu.io.lsq.ldin.bits.isFrmMisAlignBuf && ldu.io.lsq.ldin.bits.rep_info.rar_nack && ldu.io.rollback.valid
828522c7f99SAnzo  ).reduce(_ || _)
829522c7f99SAnzo  when (ldu_rollback_with_misalign_nack) {
830522c7f99SAnzo    misalign_allow_spec := false.B
831522c7f99SAnzo  } .elsewhen(lsq.io.rarValidCount < (LoadQueueRARSize - 4).U) {
832522c7f99SAnzo    misalign_allow_spec := true.B
833522c7f99SAnzo  }
834522c7f99SAnzo
835c590fb32Scz4e  // LoadUnit
836c590fb32Scz4e  val correctMissTrain = Constantin.createRecord(s"CorrectMissTrain$hartId", initValue = false)
837c590fb32Scz4e
838c590fb32Scz4e  for (i <- 0 until LduCnt) {
839c590fb32Scz4e    loadUnits(i).io.redirect <> redirect
840522c7f99SAnzo    loadUnits(i).io.misalign_allow_spec := misalign_allow_spec
841c590fb32Scz4e
842c590fb32Scz4e    // get input form dispatch
843c590fb32Scz4e    loadUnits(i).io.ldin <> io.ooo_to_mem.issueLda(i)
844c590fb32Scz4e    loadUnits(i).io.feedback_slow <> io.mem_to_ooo.ldaIqFeedback(i).feedbackSlow
845c590fb32Scz4e    io.mem_to_ooo.ldaIqFeedback(i).feedbackFast := DontCare
846c590fb32Scz4e    loadUnits(i).io.correctMissTrain := correctMissTrain
847c590fb32Scz4e    io.mem_to_ooo.ldCancel.drop(HyuCnt)(i) := loadUnits(i).io.ldCancel
848c590fb32Scz4e    io.mem_to_ooo.wakeup.drop(HyuCnt)(i) := loadUnits(i).io.wakeup
849c590fb32Scz4e
850c590fb32Scz4e    // vector
851c590fb32Scz4e    if (i < VlduCnt) {
852c590fb32Scz4e      loadUnits(i).io.vecldout.ready := false.B
853c590fb32Scz4e    } else {
854c590fb32Scz4e      loadUnits(i).io.vecldin.valid := false.B
855c590fb32Scz4e      loadUnits(i).io.vecldin.bits := DontCare
856c590fb32Scz4e      loadUnits(i).io.vecldout.ready := false.B
857c590fb32Scz4e    }
858c590fb32Scz4e
859c590fb32Scz4e    // fast replay
860c590fb32Scz4e    loadUnits(i).io.fast_rep_in <> loadUnits(i).io.fast_rep_out
861c590fb32Scz4e
862c590fb32Scz4e    // SoftPrefetch to frontend (prefetch.i)
863c590fb32Scz4e    loadUnits(i).io.ifetchPrefetch <> io.ifetchPrefetch(i)
864c590fb32Scz4e
865c590fb32Scz4e    // dcache access
866c590fb32Scz4e    loadUnits(i).io.dcache <> dcache.io.lsu.load(i)
867c590fb32Scz4e    if(i == 0){
868c590fb32Scz4e      vSegmentUnit.io.rdcache := DontCare
869c590fb32Scz4e      dcache.io.lsu.load(i).req.valid := loadUnits(i).io.dcache.req.valid || vSegmentUnit.io.rdcache.req.valid
870c590fb32Scz4e      dcache.io.lsu.load(i).req.bits  := Mux1H(Seq(
871c590fb32Scz4e        vSegmentUnit.io.rdcache.req.valid -> vSegmentUnit.io.rdcache.req.bits,
872c590fb32Scz4e        loadUnits(i).io.dcache.req.valid -> loadUnits(i).io.dcache.req.bits
873c590fb32Scz4e      ))
874c590fb32Scz4e      vSegmentUnit.io.rdcache.req.ready := dcache.io.lsu.load(i).req.ready
875c590fb32Scz4e    }
876c590fb32Scz4e
877c590fb32Scz4e    // Dcache requests must also be preempted by the segment.
878c590fb32Scz4e    when(vSegmentFlag){
879c590fb32Scz4e      loadUnits(i).io.dcache.req.ready             := false.B // Dcache is preempted.
880c590fb32Scz4e
881c590fb32Scz4e      dcache.io.lsu.load(0).pf_source              := vSegmentUnit.io.rdcache.pf_source
882c590fb32Scz4e      dcache.io.lsu.load(0).s1_paddr_dup_lsu       := vSegmentUnit.io.rdcache.s1_paddr_dup_lsu
883c590fb32Scz4e      dcache.io.lsu.load(0).s1_paddr_dup_dcache    := vSegmentUnit.io.rdcache.s1_paddr_dup_dcache
884c590fb32Scz4e      dcache.io.lsu.load(0).s1_kill                := vSegmentUnit.io.rdcache.s1_kill
885c590fb32Scz4e      dcache.io.lsu.load(0).s2_kill                := vSegmentUnit.io.rdcache.s2_kill
886c590fb32Scz4e      dcache.io.lsu.load(0).s0_pc                  := vSegmentUnit.io.rdcache.s0_pc
887c590fb32Scz4e      dcache.io.lsu.load(0).s1_pc                  := vSegmentUnit.io.rdcache.s1_pc
888c590fb32Scz4e      dcache.io.lsu.load(0).s2_pc                  := vSegmentUnit.io.rdcache.s2_pc
889c590fb32Scz4e      dcache.io.lsu.load(0).is128Req               := vSegmentUnit.io.rdcache.is128Req
890c590fb32Scz4e    }.otherwise {
891c590fb32Scz4e      loadUnits(i).io.dcache.req.ready             := dcache.io.lsu.load(i).req.ready
892c590fb32Scz4e
893c590fb32Scz4e      dcache.io.lsu.load(0).pf_source              := loadUnits(0).io.dcache.pf_source
894c590fb32Scz4e      dcache.io.lsu.load(0).s1_paddr_dup_lsu       := loadUnits(0).io.dcache.s1_paddr_dup_lsu
895c590fb32Scz4e      dcache.io.lsu.load(0).s1_paddr_dup_dcache    := loadUnits(0).io.dcache.s1_paddr_dup_dcache
896c590fb32Scz4e      dcache.io.lsu.load(0).s1_kill                := loadUnits(0).io.dcache.s1_kill
897c590fb32Scz4e      dcache.io.lsu.load(0).s2_kill                := loadUnits(0).io.dcache.s2_kill
898c590fb32Scz4e      dcache.io.lsu.load(0).s0_pc                  := loadUnits(0).io.dcache.s0_pc
899c590fb32Scz4e      dcache.io.lsu.load(0).s1_pc                  := loadUnits(0).io.dcache.s1_pc
900c590fb32Scz4e      dcache.io.lsu.load(0).s2_pc                  := loadUnits(0).io.dcache.s2_pc
901c590fb32Scz4e      dcache.io.lsu.load(0).is128Req               := loadUnits(0).io.dcache.is128Req
902c590fb32Scz4e    }
903c590fb32Scz4e
904c590fb32Scz4e    // forward
905c590fb32Scz4e    loadUnits(i).io.lsq.forward <> lsq.io.forward(i)
906c590fb32Scz4e    loadUnits(i).io.sbuffer <> sbuffer.io.forward(i)
907c590fb32Scz4e    loadUnits(i).io.ubuffer <> uncache.io.forward(i)
908c590fb32Scz4e    loadUnits(i).io.tl_d_channel := dcache.io.lsu.forward_D(i)
909c590fb32Scz4e    loadUnits(i).io.forward_mshr <> dcache.io.lsu.forward_mshr(i)
910c590fb32Scz4e    // ld-ld violation check
911c590fb32Scz4e    loadUnits(i).io.lsq.ldld_nuke_query <> lsq.io.ldu.ldld_nuke_query(i)
912c590fb32Scz4e    loadUnits(i).io.lsq.stld_nuke_query <> lsq.io.ldu.stld_nuke_query(i)
913522c7f99SAnzo    // loadqueue old ptr
914522c7f99SAnzo    loadUnits(i).io.lsq.lqDeqPtr := lsq.io.lqDeqPtr
915c590fb32Scz4e    loadUnits(i).io.csrCtrl       <> csrCtrl
916c590fb32Scz4e    // dcache refill req
917c590fb32Scz4e  // loadUnits(i).io.refill           <> delayedDcacheRefill
918c590fb32Scz4e    // dtlb
919c590fb32Scz4e    loadUnits(i).io.tlb <> dtlb_reqs.take(LduCnt)(i)
920c590fb32Scz4e    if(i == 0 ){ // port 0 assign to vsegmentUnit
921c590fb32Scz4e      val vsegmentDtlbReqValid = vSegmentUnit.io.dtlb.req.valid // segment tlb resquest need to delay 1 cycle
922c590fb32Scz4e      dtlb_reqs.take(LduCnt)(i).req.valid := loadUnits(i).io.tlb.req.valid || RegNext(vsegmentDtlbReqValid)
923c590fb32Scz4e      vSegmentUnit.io.dtlb.req.ready      := dtlb_reqs.take(LduCnt)(i).req.ready
924c590fb32Scz4e      dtlb_reqs.take(LduCnt)(i).req.bits  := ParallelPriorityMux(Seq(
925c590fb32Scz4e        RegNext(vsegmentDtlbReqValid)     -> RegEnable(vSegmentUnit.io.dtlb.req.bits, vsegmentDtlbReqValid),
926c590fb32Scz4e        loadUnits(i).io.tlb.req.valid     -> loadUnits(i).io.tlb.req.bits
927c590fb32Scz4e      ))
928c590fb32Scz4e    }
929c590fb32Scz4e    // pmp
930c590fb32Scz4e    loadUnits(i).io.pmp <> pmp_check(i).resp
931c590fb32Scz4e    // st-ld violation query
932c590fb32Scz4e    val stld_nuke_query = storeUnits.map(_.io.stld_nuke_query) ++ hybridUnits.map(_.io.stu_io.stld_nuke_query)
933c590fb32Scz4e    for (s <- 0 until StorePipelineWidth) {
934c590fb32Scz4e      loadUnits(i).io.stld_nuke_query(s) := stld_nuke_query(s)
935c590fb32Scz4e    }
936c590fb32Scz4e    loadUnits(i).io.lq_rep_full <> lsq.io.lq_rep_full
937c590fb32Scz4e    // load prefetch train
938c590fb32Scz4e    prefetcherOpt.foreach(pf => {
939c590fb32Scz4e      // sms will train on all miss load sources
940c590fb32Scz4e      val source = loadUnits(i).io.prefetch_train
941c590fb32Scz4e      pf.io.ld_in(i).valid := Mux(pf_train_on_hit,
942c590fb32Scz4e        source.valid,
943c590fb32Scz4e        source.valid && source.bits.isFirstIssue && source.bits.miss
944c590fb32Scz4e      )
945c590fb32Scz4e      pf.io.ld_in(i).bits := source.bits
946c590fb32Scz4e      val loadPc = RegNext(io.ooo_to_mem.issueLda(i).bits.uop.pc) // for s1
947c590fb32Scz4e      pf.io.ld_in(i).bits.uop.pc := Mux(
948c590fb32Scz4e        loadUnits(i).io.s2_ptr_chasing,
949c590fb32Scz4e        RegEnable(loadPc, loadUnits(i).io.s2_prefetch_spec),
950c590fb32Scz4e        RegEnable(RegEnable(loadPc, loadUnits(i).io.s1_prefetch_spec), loadUnits(i).io.s2_prefetch_spec)
951c590fb32Scz4e      )
952c590fb32Scz4e    })
953c590fb32Scz4e    l1PrefetcherOpt.foreach(pf => {
954c590fb32Scz4e      // stream will train on all load sources
955c590fb32Scz4e      val source = loadUnits(i).io.prefetch_train_l1
956c590fb32Scz4e      pf.io.ld_in(i).valid := source.valid && source.bits.isFirstIssue
957c590fb32Scz4e      pf.io.ld_in(i).bits := source.bits
958c590fb32Scz4e    })
959c590fb32Scz4e
960c590fb32Scz4e    // load to load fast forward: load(i) prefers data(i)
961c590fb32Scz4e    val l2l_fwd_out = loadUnits.map(_.io.l2l_fwd_out) ++ hybridUnits.map(_.io.ldu_io.l2l_fwd_out)
962c590fb32Scz4e    val fastPriority = (i until LduCnt + HyuCnt) ++ (0 until i)
963c590fb32Scz4e    val fastValidVec = fastPriority.map(j => l2l_fwd_out(j).valid)
964c590fb32Scz4e    val fastDataVec = fastPriority.map(j => l2l_fwd_out(j).data)
965c590fb32Scz4e    val fastErrorVec = fastPriority.map(j => l2l_fwd_out(j).dly_ld_err)
966c590fb32Scz4e    val fastMatchVec = fastPriority.map(j => io.ooo_to_mem.loadFastMatch(i)(j))
967c590fb32Scz4e    loadUnits(i).io.l2l_fwd_in.valid := VecInit(fastValidVec).asUInt.orR
968c590fb32Scz4e    loadUnits(i).io.l2l_fwd_in.data := ParallelPriorityMux(fastValidVec, fastDataVec)
969c590fb32Scz4e    loadUnits(i).io.l2l_fwd_in.dly_ld_err := ParallelPriorityMux(fastValidVec, fastErrorVec)
970c590fb32Scz4e    val fastMatch = ParallelPriorityMux(fastValidVec, fastMatchVec)
971c590fb32Scz4e    loadUnits(i).io.ld_fast_match := fastMatch
972c590fb32Scz4e    loadUnits(i).io.ld_fast_imm := io.ooo_to_mem.loadFastImm(i)
973c590fb32Scz4e    loadUnits(i).io.ld_fast_fuOpType := io.ooo_to_mem.loadFastFuOpType(i)
974c590fb32Scz4e    loadUnits(i).io.replay <> lsq.io.replay(i)
975c590fb32Scz4e
976c590fb32Scz4e    val l2_hint = RegNext(io.l2_hint)
977c590fb32Scz4e
978c590fb32Scz4e    // L2 Hint for DCache
979c590fb32Scz4e    dcache.io.l2_hint <> l2_hint
980c590fb32Scz4e
981c590fb32Scz4e    loadUnits(i).io.l2_hint <> l2_hint
982c590fb32Scz4e    loadUnits(i).io.tlb_hint.id := dtlbRepeater.io.hint.get.req(i).id
983c590fb32Scz4e    loadUnits(i).io.tlb_hint.full := dtlbRepeater.io.hint.get.req(i).full ||
984c590fb32Scz4e      tlbreplay_reg(i) || dtlb_ld0_tlbreplay_reg(i)
985c590fb32Scz4e
986c590fb32Scz4e    // passdown to lsq (load s2)
987c590fb32Scz4e    lsq.io.ldu.ldin(i) <> loadUnits(i).io.lsq.ldin
988c590fb32Scz4e    if (i == UncacheWBPort) {
989c590fb32Scz4e      lsq.io.ldout(i) <> loadUnits(i).io.lsq.uncache
990c590fb32Scz4e    } else {
991c590fb32Scz4e      lsq.io.ldout(i).ready := true.B
992c590fb32Scz4e      loadUnits(i).io.lsq.uncache.valid := false.B
993c590fb32Scz4e      loadUnits(i).io.lsq.uncache.bits := DontCare
994c590fb32Scz4e    }
995c590fb32Scz4e    lsq.io.ld_raw_data(i) <> loadUnits(i).io.lsq.ld_raw_data
996c590fb32Scz4e    lsq.io.ncOut(i) <> loadUnits(i).io.lsq.nc_ldin
997c590fb32Scz4e    lsq.io.l2_hint.valid := l2_hint.valid
998c590fb32Scz4e    lsq.io.l2_hint.bits.sourceId := l2_hint.bits.sourceId
999c590fb32Scz4e    lsq.io.l2_hint.bits.isKeyword := l2_hint.bits.isKeyword
1000c590fb32Scz4e
1001c590fb32Scz4e    lsq.io.tlb_hint <> dtlbRepeater.io.hint.get
1002c590fb32Scz4e
1003c590fb32Scz4e    // connect misalignBuffer
1004c590fb32Scz4e    loadMisalignBuffer.io.req(i) <> loadUnits(i).io.misalign_buf
1005c590fb32Scz4e
1006c590fb32Scz4e    if (i == MisalignWBPort) {
1007c590fb32Scz4e      loadUnits(i).io.misalign_ldin  <> loadMisalignBuffer.io.splitLoadReq
1008c590fb32Scz4e      loadUnits(i).io.misalign_ldout <> loadMisalignBuffer.io.splitLoadResp
1009c590fb32Scz4e    } else {
1010c590fb32Scz4e      loadUnits(i).io.misalign_ldin.valid := false.B
1011c590fb32Scz4e      loadUnits(i).io.misalign_ldin.bits := DontCare
1012c590fb32Scz4e    }
1013c590fb32Scz4e
1014c590fb32Scz4e    // alter writeback exception info
1015c590fb32Scz4e    io.mem_to_ooo.s3_delayed_load_error(i) := loadUnits(i).io.s3_dly_ld_err
1016c590fb32Scz4e
1017c590fb32Scz4e    // update mem dependency predictor
1018c590fb32Scz4e    // io.memPredUpdate(i) := DontCare
1019c590fb32Scz4e
1020c590fb32Scz4e    // --------------------------------
1021c590fb32Scz4e    // Load Triggers
1022c590fb32Scz4e    // --------------------------------
1023c590fb32Scz4e    loadUnits(i).io.fromCsrTrigger.tdataVec := tdata
1024c590fb32Scz4e    loadUnits(i).io.fromCsrTrigger.tEnableVec := tEnable
1025c590fb32Scz4e    loadUnits(i).io.fromCsrTrigger.triggerCanRaiseBpExp := triggerCanRaiseBpExp
1026c590fb32Scz4e    loadUnits(i).io.fromCsrTrigger.debugMode := debugMode
1027c590fb32Scz4e  }
1028c590fb32Scz4e
1029c590fb32Scz4e  for (i <- 0 until HyuCnt) {
1030c590fb32Scz4e    hybridUnits(i).io.redirect <> redirect
1031c590fb32Scz4e
1032c590fb32Scz4e    // get input from dispatch
1033c590fb32Scz4e    hybridUnits(i).io.lsin <> io.ooo_to_mem.issueHya(i)
1034c590fb32Scz4e    hybridUnits(i).io.feedback_slow <> io.mem_to_ooo.hyuIqFeedback(i).feedbackSlow
1035c590fb32Scz4e    hybridUnits(i).io.feedback_fast <> io.mem_to_ooo.hyuIqFeedback(i).feedbackFast
1036c590fb32Scz4e    hybridUnits(i).io.correctMissTrain := correctMissTrain
1037c590fb32Scz4e    io.mem_to_ooo.ldCancel.take(HyuCnt)(i) := hybridUnits(i).io.ldu_io.ldCancel
1038c590fb32Scz4e    io.mem_to_ooo.wakeup.take(HyuCnt)(i) := hybridUnits(i).io.ldu_io.wakeup
1039c590fb32Scz4e
1040c590fb32Scz4e    // ------------------------------------
1041c590fb32Scz4e    //  Load Port
1042c590fb32Scz4e    // ------------------------------------
1043c590fb32Scz4e    // fast replay
1044c590fb32Scz4e    hybridUnits(i).io.ldu_io.fast_rep_in <> hybridUnits(i).io.ldu_io.fast_rep_out
1045c590fb32Scz4e
1046c590fb32Scz4e    // get input from dispatch
1047c590fb32Scz4e    hybridUnits(i).io.ldu_io.dcache <> dcache.io.lsu.load(LduCnt + i)
1048c590fb32Scz4e    hybridUnits(i).io.stu_io.dcache <> dcache.io.lsu.sta(StaCnt + i)
1049c590fb32Scz4e
1050c590fb32Scz4e    // dcache access
1051c590fb32Scz4e    hybridUnits(i).io.ldu_io.lsq.forward <> lsq.io.forward(LduCnt + i)
1052c590fb32Scz4e    // forward
1053c590fb32Scz4e    hybridUnits(i).io.ldu_io.sbuffer <> sbuffer.io.forward(LduCnt + i)
1054c590fb32Scz4e    hybridUnits(i).io.ldu_io.ubuffer <> uncache.io.forward(LduCnt + i)
1055c590fb32Scz4e    // hybridUnits(i).io.ldu_io.vec_forward <> vsFlowQueue.io.forward(LduCnt + i)
1056c590fb32Scz4e    hybridUnits(i).io.ldu_io.vec_forward := DontCare
1057c590fb32Scz4e    hybridUnits(i).io.ldu_io.tl_d_channel := dcache.io.lsu.forward_D(LduCnt + i)
1058c590fb32Scz4e    hybridUnits(i).io.ldu_io.forward_mshr <> dcache.io.lsu.forward_mshr(LduCnt + i)
1059c590fb32Scz4e    // ld-ld violation check
1060c590fb32Scz4e    hybridUnits(i).io.ldu_io.lsq.ldld_nuke_query <> lsq.io.ldu.ldld_nuke_query(LduCnt + i)
1061c590fb32Scz4e    hybridUnits(i).io.ldu_io.lsq.stld_nuke_query <> lsq.io.ldu.stld_nuke_query(LduCnt + i)
1062c590fb32Scz4e    hybridUnits(i).io.csrCtrl <> csrCtrl
1063c590fb32Scz4e    // dcache refill req
1064c590fb32Scz4e    hybridUnits(i).io.ldu_io.tlb_hint.id := dtlbRepeater.io.hint.get.req(LduCnt + i).id
1065c590fb32Scz4e    hybridUnits(i).io.ldu_io.tlb_hint.full := dtlbRepeater.io.hint.get.req(LduCnt + i).full ||
1066c590fb32Scz4e      tlbreplay_reg(LduCnt + i) || dtlb_ld0_tlbreplay_reg(LduCnt + i)
1067c590fb32Scz4e
1068c590fb32Scz4e    // dtlb
1069c590fb32Scz4e    hybridUnits(i).io.tlb <> dtlb_ld.head.requestor(LduCnt + i)
1070c590fb32Scz4e    // pmp
1071c590fb32Scz4e    hybridUnits(i).io.pmp <> pmp_check.drop(LduCnt)(i).resp
1072c590fb32Scz4e    // st-ld violation query
1073c590fb32Scz4e    val stld_nuke_query = VecInit(storeUnits.map(_.io.stld_nuke_query) ++ hybridUnits.map(_.io.stu_io.stld_nuke_query))
1074c590fb32Scz4e    hybridUnits(i).io.ldu_io.stld_nuke_query := stld_nuke_query
1075c590fb32Scz4e    hybridUnits(i).io.ldu_io.lq_rep_full <> lsq.io.lq_rep_full
1076c590fb32Scz4e    // load prefetch train
1077c590fb32Scz4e    prefetcherOpt.foreach(pf => {
1078c590fb32Scz4e      val source = hybridUnits(i).io.prefetch_train
1079c590fb32Scz4e      pf.io.ld_in(LduCnt + i).valid := Mux(pf_train_on_hit,
1080c590fb32Scz4e        source.valid,
1081c590fb32Scz4e        source.valid && source.bits.isFirstIssue && source.bits.miss
1082c590fb32Scz4e      )
1083c590fb32Scz4e      pf.io.ld_in(LduCnt + i).bits := source.bits
1084c590fb32Scz4e      pf.io.ld_in(LduCnt + i).bits.uop.pc := Mux(hybridUnits(i).io.ldu_io.s2_ptr_chasing, io.ooo_to_mem.hybridPc(i), RegNext(io.ooo_to_mem.hybridPc(i)))
1085c590fb32Scz4e    })
1086c590fb32Scz4e    l1PrefetcherOpt.foreach(pf => {
1087c590fb32Scz4e      // stream will train on all load sources
1088c590fb32Scz4e      val source = hybridUnits(i).io.prefetch_train_l1
1089c590fb32Scz4e      pf.io.ld_in(LduCnt + i).valid := source.valid && source.bits.isFirstIssue &&
1090c590fb32Scz4e                                       FuType.isLoad(source.bits.uop.fuType)
1091c590fb32Scz4e      pf.io.ld_in(LduCnt + i).bits := source.bits
1092c590fb32Scz4e      pf.io.st_in(StaCnt + i).valid := false.B
1093c590fb32Scz4e      pf.io.st_in(StaCnt + i).bits := DontCare
1094c590fb32Scz4e    })
1095c590fb32Scz4e    prefetcherOpt.foreach(pf => {
1096c590fb32Scz4e      val source = hybridUnits(i).io.prefetch_train
1097c590fb32Scz4e      pf.io.st_in(StaCnt + i).valid := Mux(pf_train_on_hit,
1098c590fb32Scz4e        source.valid,
1099c590fb32Scz4e        source.valid && source.bits.isFirstIssue && source.bits.miss
1100c590fb32Scz4e      ) && FuType.isStore(source.bits.uop.fuType)
1101c590fb32Scz4e      pf.io.st_in(StaCnt + i).bits := source.bits
1102c590fb32Scz4e      pf.io.st_in(StaCnt + i).bits.uop.pc := RegNext(io.ooo_to_mem.hybridPc(i))
1103c590fb32Scz4e    })
1104c590fb32Scz4e
1105c590fb32Scz4e    // load to load fast forward: load(i) prefers data(i)
1106c590fb32Scz4e    val l2l_fwd_out = loadUnits.map(_.io.l2l_fwd_out) ++ hybridUnits.map(_.io.ldu_io.l2l_fwd_out)
1107c590fb32Scz4e    val fastPriority = (LduCnt + i until LduCnt + HyuCnt) ++ (0 until LduCnt + i)
1108c590fb32Scz4e    val fastValidVec = fastPriority.map(j => l2l_fwd_out(j).valid)
1109c590fb32Scz4e    val fastDataVec = fastPriority.map(j => l2l_fwd_out(j).data)
1110c590fb32Scz4e    val fastErrorVec = fastPriority.map(j => l2l_fwd_out(j).dly_ld_err)
1111c590fb32Scz4e    val fastMatchVec = fastPriority.map(j => io.ooo_to_mem.loadFastMatch(LduCnt + i)(j))
1112c590fb32Scz4e    hybridUnits(i).io.ldu_io.l2l_fwd_in.valid := VecInit(fastValidVec).asUInt.orR
1113c590fb32Scz4e    hybridUnits(i).io.ldu_io.l2l_fwd_in.data := ParallelPriorityMux(fastValidVec, fastDataVec)
1114c590fb32Scz4e    hybridUnits(i).io.ldu_io.l2l_fwd_in.dly_ld_err := ParallelPriorityMux(fastValidVec, fastErrorVec)
1115c590fb32Scz4e    val fastMatch = ParallelPriorityMux(fastValidVec, fastMatchVec)
1116c590fb32Scz4e    hybridUnits(i).io.ldu_io.ld_fast_match := fastMatch
1117c590fb32Scz4e    hybridUnits(i).io.ldu_io.ld_fast_imm := io.ooo_to_mem.loadFastImm(LduCnt + i)
1118c590fb32Scz4e    hybridUnits(i).io.ldu_io.ld_fast_fuOpType := io.ooo_to_mem.loadFastFuOpType(LduCnt + i)
1119c590fb32Scz4e    hybridUnits(i).io.ldu_io.replay <> lsq.io.replay(LduCnt + i)
1120c590fb32Scz4e    hybridUnits(i).io.ldu_io.l2_hint <> io.l2_hint
1121c590fb32Scz4e
1122c590fb32Scz4e    // uncache
1123c590fb32Scz4e    lsq.io.ldout.drop(LduCnt)(i) <> hybridUnits(i).io.ldu_io.lsq.uncache
1124c590fb32Scz4e    lsq.io.ld_raw_data.drop(LduCnt)(i) <> hybridUnits(i).io.ldu_io.lsq.ld_raw_data
1125c590fb32Scz4e
1126c590fb32Scz4e
1127c590fb32Scz4e    // passdown to lsq (load s2)
1128c590fb32Scz4e    hybridUnits(i).io.ldu_io.lsq.nc_ldin.valid := false.B
1129c590fb32Scz4e    hybridUnits(i).io.ldu_io.lsq.nc_ldin.bits := DontCare
1130c590fb32Scz4e    lsq.io.ldu.ldin(LduCnt + i) <> hybridUnits(i).io.ldu_io.lsq.ldin
1131c590fb32Scz4e    // Lsq to sta unit
1132c590fb32Scz4e    lsq.io.sta.storeMaskIn(StaCnt + i) <> hybridUnits(i).io.stu_io.st_mask_out
1133c590fb32Scz4e
1134c590fb32Scz4e    // Lsq to std unit's rs
1135c590fb32Scz4e    lsq.io.std.storeDataIn(StaCnt + i) := stData(StaCnt + i)
1136c590fb32Scz4e    lsq.io.std.storeDataIn(StaCnt + i).valid := stData(StaCnt + i).valid && !st_data_atomics(StaCnt + i)
1137c590fb32Scz4e    // prefetch
1138c590fb32Scz4e    hybridUnits(i).io.stu_io.prefetch_req <> sbuffer.io.store_prefetch(StaCnt + i)
1139c590fb32Scz4e
1140c590fb32Scz4e    io.mem_to_ooo.s3_delayed_load_error(LduCnt + i) := hybridUnits(i).io.ldu_io.s3_dly_ld_err
1141c590fb32Scz4e
1142c590fb32Scz4e    // ------------------------------------
1143c590fb32Scz4e    //  Store Port
1144c590fb32Scz4e    // ------------------------------------
1145c590fb32Scz4e    hybridUnits(i).io.stu_io.lsq <> lsq.io.sta.storeAddrIn.takeRight(HyuCnt)(i)
1146c590fb32Scz4e    hybridUnits(i).io.stu_io.lsq_replenish <> lsq.io.sta.storeAddrInRe.takeRight(HyuCnt)(i)
1147c590fb32Scz4e
1148c590fb32Scz4e    lsq.io.sta.storeMaskIn.takeRight(HyuCnt)(i) <> hybridUnits(i).io.stu_io.st_mask_out
1149c590fb32Scz4e    io.mem_to_ooo.stIn.takeRight(HyuCnt)(i).valid := hybridUnits(i).io.stu_io.issue.valid
1150c590fb32Scz4e    io.mem_to_ooo.stIn.takeRight(HyuCnt)(i).bits := hybridUnits(i).io.stu_io.issue.bits
1151c590fb32Scz4e
1152c590fb32Scz4e    // ------------------------------------
1153c590fb32Scz4e    //  Vector Store Port
1154c590fb32Scz4e    // ------------------------------------
1155c590fb32Scz4e    hybridUnits(i).io.vec_stu_io.isFirstIssue := true.B
1156c590fb32Scz4e
1157c590fb32Scz4e    // -------------------------
1158c590fb32Scz4e    // Store Triggers
1159c590fb32Scz4e    // -------------------------
1160c590fb32Scz4e    hybridUnits(i).io.fromCsrTrigger.tdataVec := tdata
1161c590fb32Scz4e    hybridUnits(i).io.fromCsrTrigger.tEnableVec := tEnable
1162c590fb32Scz4e    hybridUnits(i).io.fromCsrTrigger.triggerCanRaiseBpExp := triggerCanRaiseBpExp
1163c590fb32Scz4e    hybridUnits(i).io.fromCsrTrigger.debugMode := debugMode
1164c590fb32Scz4e  }
1165c590fb32Scz4e
1166c590fb32Scz4e  // misalignBuffer
1167c590fb32Scz4e  loadMisalignBuffer.io.redirect                <> redirect
1168c590fb32Scz4e  loadMisalignBuffer.io.rob.lcommit             := io.ooo_to_mem.lsqio.lcommit
1169c590fb32Scz4e  loadMisalignBuffer.io.rob.scommit             := io.ooo_to_mem.lsqio.scommit
1170c590fb32Scz4e  loadMisalignBuffer.io.rob.pendingMMIOld       := io.ooo_to_mem.lsqio.pendingMMIOld
1171c590fb32Scz4e  loadMisalignBuffer.io.rob.pendingld           := io.ooo_to_mem.lsqio.pendingld
1172c590fb32Scz4e  loadMisalignBuffer.io.rob.pendingst           := io.ooo_to_mem.lsqio.pendingst
1173c590fb32Scz4e  loadMisalignBuffer.io.rob.pendingVst          := io.ooo_to_mem.lsqio.pendingVst
1174c590fb32Scz4e  loadMisalignBuffer.io.rob.commit              := io.ooo_to_mem.lsqio.commit
1175c590fb32Scz4e  loadMisalignBuffer.io.rob.pendingPtr          := io.ooo_to_mem.lsqio.pendingPtr
1176c590fb32Scz4e  loadMisalignBuffer.io.rob.pendingPtrNext      := io.ooo_to_mem.lsqio.pendingPtrNext
1177c590fb32Scz4e
1178c590fb32Scz4e  lsq.io.loadMisalignFull                       := loadMisalignBuffer.io.loadMisalignFull
1179522c7f99SAnzo  lsq.io.misalignAllowSpec                      := misalign_allow_spec
1180c590fb32Scz4e
1181c590fb32Scz4e  storeMisalignBuffer.io.redirect               <> redirect
1182c590fb32Scz4e  storeMisalignBuffer.io.rob.lcommit            := io.ooo_to_mem.lsqio.lcommit
1183c590fb32Scz4e  storeMisalignBuffer.io.rob.scommit            := io.ooo_to_mem.lsqio.scommit
1184c590fb32Scz4e  storeMisalignBuffer.io.rob.pendingMMIOld      := io.ooo_to_mem.lsqio.pendingMMIOld
1185c590fb32Scz4e  storeMisalignBuffer.io.rob.pendingld          := io.ooo_to_mem.lsqio.pendingld
1186c590fb32Scz4e  storeMisalignBuffer.io.rob.pendingst          := io.ooo_to_mem.lsqio.pendingst
1187c590fb32Scz4e  storeMisalignBuffer.io.rob.pendingVst         := io.ooo_to_mem.lsqio.pendingVst
1188c590fb32Scz4e  storeMisalignBuffer.io.rob.commit             := io.ooo_to_mem.lsqio.commit
1189c590fb32Scz4e  storeMisalignBuffer.io.rob.pendingPtr         := io.ooo_to_mem.lsqio.pendingPtr
1190c590fb32Scz4e  storeMisalignBuffer.io.rob.pendingPtrNext     := io.ooo_to_mem.lsqio.pendingPtrNext
1191c590fb32Scz4e
1192c590fb32Scz4e  lsq.io.maControl                              <> storeMisalignBuffer.io.sqControl
1193c590fb32Scz4e
1194c590fb32Scz4e  lsq.io.cmoOpReq <> dcache.io.cmoOpReq
1195c590fb32Scz4e  lsq.io.cmoOpResp <> dcache.io.cmoOpResp
1196c590fb32Scz4e
1197c590fb32Scz4e  // Prefetcher
1198c590fb32Scz4e  val StreamDTLBPortIndex = TlbStartVec(dtlb_ld_idx) + LduCnt + HyuCnt
1199c590fb32Scz4e  val PrefetcherDTLBPortIndex = TlbStartVec(dtlb_pf_idx)
1200c590fb32Scz4e  val L2toL1DLBPortIndex = TlbStartVec(dtlb_pf_idx) + 1
1201c590fb32Scz4e  prefetcherOpt match {
1202c590fb32Scz4e  case Some(pf) =>
1203c590fb32Scz4e    dtlb_reqs(PrefetcherDTLBPortIndex) <> pf.io.tlb_req
1204c590fb32Scz4e    pf.io.pmp_resp := pmp_check(PrefetcherDTLBPortIndex).resp
1205c590fb32Scz4e  case None =>
1206c590fb32Scz4e    dtlb_reqs(PrefetcherDTLBPortIndex) := DontCare
1207c590fb32Scz4e    dtlb_reqs(PrefetcherDTLBPortIndex).req.valid := false.B
1208c590fb32Scz4e    dtlb_reqs(PrefetcherDTLBPortIndex).resp.ready := true.B
1209c590fb32Scz4e  }
1210c590fb32Scz4e  l1PrefetcherOpt match {
1211c590fb32Scz4e    case Some(pf) =>
1212c590fb32Scz4e      dtlb_reqs(StreamDTLBPortIndex) <> pf.io.tlb_req
1213c590fb32Scz4e      pf.io.pmp_resp := pmp_check(StreamDTLBPortIndex).resp
1214c590fb32Scz4e    case None =>
1215c590fb32Scz4e        dtlb_reqs(StreamDTLBPortIndex) := DontCare
1216c590fb32Scz4e        dtlb_reqs(StreamDTLBPortIndex).req.valid := false.B
1217c590fb32Scz4e        dtlb_reqs(StreamDTLBPortIndex).resp.ready := true.B
1218c590fb32Scz4e  }
1219c590fb32Scz4e  dtlb_reqs(L2toL1DLBPortIndex) <> io.l2_tlb_req
1220c590fb32Scz4e  dtlb_reqs(L2toL1DLBPortIndex).resp.ready := true.B
1221c590fb32Scz4e  io.l2_pmp_resp := pmp_check(L2toL1DLBPortIndex).resp
1222c590fb32Scz4e
1223c590fb32Scz4e  // StoreUnit
1224c590fb32Scz4e  for (i <- 0 until StdCnt) {
1225c590fb32Scz4e    stdExeUnits(i).io.flush <> redirect
1226c590fb32Scz4e    stdExeUnits(i).io.in.valid := io.ooo_to_mem.issueStd(i).valid
1227c590fb32Scz4e    io.ooo_to_mem.issueStd(i).ready := stdExeUnits(i).io.in.ready
1228c590fb32Scz4e    stdExeUnits(i).io.in.bits := io.ooo_to_mem.issueStd(i).bits
1229c590fb32Scz4e  }
1230c590fb32Scz4e
1231c590fb32Scz4e  for (i <- 0 until StaCnt) {
1232c590fb32Scz4e    val stu = storeUnits(i)
1233c590fb32Scz4e
1234c590fb32Scz4e    stu.io.redirect      <> redirect
1235c590fb32Scz4e    stu.io.csrCtrl       <> csrCtrl
1236c590fb32Scz4e    stu.io.dcache        <> dcache.io.lsu.sta(i)
1237c590fb32Scz4e    stu.io.feedback_slow <> io.mem_to_ooo.staIqFeedback(i).feedbackSlow
1238c590fb32Scz4e    stu.io.stin         <> io.ooo_to_mem.issueSta(i)
1239c590fb32Scz4e    stu.io.lsq          <> lsq.io.sta.storeAddrIn(i)
1240c590fb32Scz4e    stu.io.lsq_replenish <> lsq.io.sta.storeAddrInRe(i)
1241c590fb32Scz4e    // dtlb
1242c590fb32Scz4e    stu.io.tlb          <> dtlb_st.head.requestor(i)
1243c590fb32Scz4e    stu.io.pmp          <> pmp_check(LduCnt + HyuCnt + 1 + i).resp
1244c590fb32Scz4e
1245c590fb32Scz4e    // -------------------------
1246c590fb32Scz4e    // Store Triggers
1247c590fb32Scz4e    // -------------------------
1248c590fb32Scz4e    stu.io.fromCsrTrigger.tdataVec := tdata
1249c590fb32Scz4e    stu.io.fromCsrTrigger.tEnableVec := tEnable
1250c590fb32Scz4e    stu.io.fromCsrTrigger.triggerCanRaiseBpExp := triggerCanRaiseBpExp
1251c590fb32Scz4e    stu.io.fromCsrTrigger.debugMode := debugMode
1252c590fb32Scz4e
1253c590fb32Scz4e    // prefetch
1254c590fb32Scz4e    stu.io.prefetch_req <> sbuffer.io.store_prefetch(i)
1255c590fb32Scz4e
1256c590fb32Scz4e    // store unit does not need fast feedback
1257c590fb32Scz4e    io.mem_to_ooo.staIqFeedback(i).feedbackFast := DontCare
1258c590fb32Scz4e
1259c590fb32Scz4e    // Lsq to sta unit
1260c590fb32Scz4e    lsq.io.sta.storeMaskIn(i) <> stu.io.st_mask_out
1261c590fb32Scz4e
1262c590fb32Scz4e    // connect misalignBuffer
1263c590fb32Scz4e    storeMisalignBuffer.io.req(i) <> stu.io.misalign_buf
1264c590fb32Scz4e
1265c590fb32Scz4e    if (i == 0) {
1266c590fb32Scz4e      stu.io.misalign_stin  <> storeMisalignBuffer.io.splitStoreReq
1267c590fb32Scz4e      stu.io.misalign_stout <> storeMisalignBuffer.io.splitStoreResp
1268c590fb32Scz4e    } else {
1269c590fb32Scz4e      stu.io.misalign_stin.valid := false.B
1270c590fb32Scz4e      stu.io.misalign_stin.bits := DontCare
1271c590fb32Scz4e    }
1272c590fb32Scz4e
1273c590fb32Scz4e    // Lsq to std unit's rs
1274c590fb32Scz4e    if (i < VstuCnt){
1275c590fb32Scz4e      when (vsSplit(i).io.vstd.get.valid) {
1276c590fb32Scz4e        lsq.io.std.storeDataIn(i).valid := true.B
1277c590fb32Scz4e        lsq.io.std.storeDataIn(i).bits := vsSplit(i).io.vstd.get.bits
1278c590fb32Scz4e        stData(i).ready := false.B
1279c590fb32Scz4e      }.otherwise {
1280c590fb32Scz4e        lsq.io.std.storeDataIn(i).valid := stData(i).valid && !st_data_atomics(i)
1281c590fb32Scz4e        lsq.io.std.storeDataIn(i).bits.uop := stData(i).bits.uop
1282c590fb32Scz4e        lsq.io.std.storeDataIn(i).bits.data := stData(i).bits.data
1283c590fb32Scz4e        lsq.io.std.storeDataIn(i).bits.mask.map(_ := 0.U)
1284c590fb32Scz4e        lsq.io.std.storeDataIn(i).bits.vdIdx.map(_ := 0.U)
1285c590fb32Scz4e        lsq.io.std.storeDataIn(i).bits.vdIdxInField.map(_ := 0.U)
1286c590fb32Scz4e        stData(i).ready := true.B
1287c590fb32Scz4e      }
1288c590fb32Scz4e    } else {
1289c590fb32Scz4e        lsq.io.std.storeDataIn(i).valid := stData(i).valid && !st_data_atomics(i)
1290c590fb32Scz4e        lsq.io.std.storeDataIn(i).bits.uop := stData(i).bits.uop
1291c590fb32Scz4e        lsq.io.std.storeDataIn(i).bits.data := stData(i).bits.data
1292c590fb32Scz4e        lsq.io.std.storeDataIn(i).bits.mask.map(_ := 0.U)
1293c590fb32Scz4e        lsq.io.std.storeDataIn(i).bits.vdIdx.map(_ := 0.U)
1294c590fb32Scz4e        lsq.io.std.storeDataIn(i).bits.vdIdxInField.map(_ := 0.U)
1295c590fb32Scz4e        stData(i).ready := true.B
1296c590fb32Scz4e    }
1297c590fb32Scz4e    lsq.io.std.storeDataIn.map(_.bits.debug := 0.U.asTypeOf(new DebugBundle))
1298c590fb32Scz4e    lsq.io.std.storeDataIn.foreach(_.bits.isFromLoadUnit := DontCare)
1299c590fb32Scz4e
1300c590fb32Scz4e
1301c590fb32Scz4e    // store prefetch train
1302c590fb32Scz4e    l1PrefetcherOpt.foreach(pf => {
1303c590fb32Scz4e      // stream will train on all load sources
1304c590fb32Scz4e      pf.io.st_in(i).valid := false.B
1305c590fb32Scz4e      pf.io.st_in(i).bits := DontCare
1306c590fb32Scz4e    })
1307c590fb32Scz4e
1308c590fb32Scz4e    prefetcherOpt.foreach(pf => {
1309c590fb32Scz4e      pf.io.st_in(i).valid := Mux(pf_train_on_hit,
1310c590fb32Scz4e        stu.io.prefetch_train.valid,
1311c590fb32Scz4e        stu.io.prefetch_train.valid && stu.io.prefetch_train.bits.isFirstIssue && (
1312c590fb32Scz4e          stu.io.prefetch_train.bits.miss
1313c590fb32Scz4e          )
1314c590fb32Scz4e      )
1315c590fb32Scz4e      pf.io.st_in(i).bits := stu.io.prefetch_train.bits
1316c590fb32Scz4e      pf.io.st_in(i).bits.uop.pc := RegEnable(RegEnable(io.ooo_to_mem.storePc(i), stu.io.s1_prefetch_spec), stu.io.s2_prefetch_spec)
1317c590fb32Scz4e    })
1318c590fb32Scz4e
1319c590fb32Scz4e    // 1. sync issue info to store set LFST
1320c590fb32Scz4e    // 2. when store issue, broadcast issued sqPtr to wake up the following insts
1321c590fb32Scz4e    // io.stIn(i).valid := io.issue(exuParameters.LduCnt + i).valid
1322c590fb32Scz4e    // io.stIn(i).bits := io.issue(exuParameters.LduCnt + i).bits
1323c590fb32Scz4e    io.mem_to_ooo.stIn(i).valid := stu.io.issue.valid
1324c590fb32Scz4e    io.mem_to_ooo.stIn(i).bits := stu.io.issue.bits
1325c590fb32Scz4e
1326c590fb32Scz4e    stu.io.stout.ready := true.B
1327c590fb32Scz4e
1328c590fb32Scz4e    // vector
1329c590fb32Scz4e    if (i < VstuCnt) {
1330c590fb32Scz4e      stu.io.vecstin <> vsSplit(i).io.out
1331c590fb32Scz4e      // vsFlowQueue.io.pipeFeedback(i) <> stu.io.vec_feedback_slow // need connect
1332c590fb32Scz4e    } else {
1333c590fb32Scz4e      stu.io.vecstin.valid := false.B
1334c590fb32Scz4e      stu.io.vecstin.bits := DontCare
1335c590fb32Scz4e      stu.io.vecstout.ready := false.B
1336c590fb32Scz4e    }
1337c590fb32Scz4e    stu.io.vec_isFirstIssue := true.B // TODO
1338c590fb32Scz4e  }
1339c590fb32Scz4e
13403c808de0SAnzo  val sqOtherStout = WireInit(0.U.asTypeOf(DecoupledIO(new MemExuOutput)))
13413c808de0SAnzo  sqOtherStout.valid := lsq.io.mmioStout.valid || lsq.io.cboZeroStout.valid
13423c808de0SAnzo  sqOtherStout.bits  := Mux(lsq.io.cboZeroStout.valid, lsq.io.cboZeroStout.bits, lsq.io.mmioStout.bits)
13433c808de0SAnzo  assert(!(lsq.io.mmioStout.valid && lsq.io.cboZeroStout.valid), "Cannot writeback to mmio and cboZero at the same time.")
13443c808de0SAnzo
13453c808de0SAnzo  // Store writeback by StoreQueue:
13463c808de0SAnzo  //   1. cbo Zero
13473c808de0SAnzo  //   2. mmio
13483c808de0SAnzo  // Currently, the two should not be present at the same time, so simply make cbo zero a higher priority.
13493c808de0SAnzo  val otherStout = WireInit(0.U.asTypeOf(lsq.io.mmioStout))
1350c590fb32Scz4e  NewPipelineConnect(
13513c808de0SAnzo    sqOtherStout, otherStout, otherStout.fire,
1352c590fb32Scz4e    false.B,
13533c808de0SAnzo    Option("otherStoutConnect")
1354c590fb32Scz4e  )
13553c808de0SAnzo  otherStout.ready := false.B
13563c808de0SAnzo  when (otherStout.valid && !storeUnits(0).io.stout.valid) {
1357c590fb32Scz4e    stOut(0).valid := true.B
13583c808de0SAnzo    stOut(0).bits  := otherStout.bits
13593c808de0SAnzo    otherStout.ready := true.B
1360c590fb32Scz4e  }
13613c808de0SAnzo  lsq.io.mmioStout.ready := sqOtherStout.ready
13623c808de0SAnzo  lsq.io.cboZeroStout.ready := sqOtherStout.ready
1363c590fb32Scz4e
1364c590fb32Scz4e  // vec mmio writeback
1365c590fb32Scz4e  lsq.io.vecmmioStout.ready := false.B
1366c590fb32Scz4e
1367c590fb32Scz4e  // miss align buffer will overwrite stOut(0)
13683c808de0SAnzo  val storeMisalignCanWriteBack = !otherStout.valid && !storeUnits(0).io.stout.valid && !storeUnits(0).io.vecstout.valid
1369c590fb32Scz4e  storeMisalignBuffer.io.writeBack.ready := storeMisalignCanWriteBack
1370c590fb32Scz4e  storeMisalignBuffer.io.storeOutValid := storeUnits(0).io.stout.valid
1371c590fb32Scz4e  storeMisalignBuffer.io.storeVecOutValid := storeUnits(0).io.vecstout.valid
1372c590fb32Scz4e  when (storeMisalignBuffer.io.writeBack.valid && storeMisalignCanWriteBack) {
1373c590fb32Scz4e    stOut(0).valid := true.B
1374c590fb32Scz4e    stOut(0).bits  := storeMisalignBuffer.io.writeBack.bits
1375c590fb32Scz4e  }
1376c590fb32Scz4e
1377c590fb32Scz4e  // Uncache
1378c590fb32Scz4e  uncache.io.enableOutstanding := io.ooo_to_mem.csrCtrl.uncache_write_outstanding_enable
1379c590fb32Scz4e  uncache.io.hartId := io.hartId
1380c590fb32Scz4e  lsq.io.uncacheOutstanding := io.ooo_to_mem.csrCtrl.uncache_write_outstanding_enable
1381c590fb32Scz4e
1382c590fb32Scz4e  // Lsq
1383c590fb32Scz4e  io.mem_to_ooo.lsqio.mmio       := lsq.io.rob.mmio
1384c590fb32Scz4e  io.mem_to_ooo.lsqio.uop        := lsq.io.rob.uop
1385c590fb32Scz4e  lsq.io.rob.lcommit             := io.ooo_to_mem.lsqio.lcommit
1386c590fb32Scz4e  lsq.io.rob.scommit             := io.ooo_to_mem.lsqio.scommit
1387c590fb32Scz4e  lsq.io.rob.pendingMMIOld       := io.ooo_to_mem.lsqio.pendingMMIOld
1388c590fb32Scz4e  lsq.io.rob.pendingld           := io.ooo_to_mem.lsqio.pendingld
1389c590fb32Scz4e  lsq.io.rob.pendingst           := io.ooo_to_mem.lsqio.pendingst
1390c590fb32Scz4e  lsq.io.rob.pendingVst          := io.ooo_to_mem.lsqio.pendingVst
1391c590fb32Scz4e  lsq.io.rob.commit              := io.ooo_to_mem.lsqio.commit
1392c590fb32Scz4e  lsq.io.rob.pendingPtr          := io.ooo_to_mem.lsqio.pendingPtr
1393c590fb32Scz4e  lsq.io.rob.pendingPtrNext      := io.ooo_to_mem.lsqio.pendingPtrNext
1394c590fb32Scz4e
1395c590fb32Scz4e  //  lsq.io.rob            <> io.lsqio.rob
1396c590fb32Scz4e  lsq.io.enq            <> io.ooo_to_mem.enqLsq
1397c590fb32Scz4e  lsq.io.brqRedirect    <> redirect
1398c590fb32Scz4e
1399c590fb32Scz4e  //  violation rollback
1400c590fb32Scz4e  def selectOldestRedirect(xs: Seq[Valid[Redirect]]): Vec[Bool] = {
1401c590fb32Scz4e    val compareVec = (0 until xs.length).map(i => (0 until i).map(j => isAfter(xs(j).bits.robIdx, xs(i).bits.robIdx)))
1402c590fb32Scz4e    val resultOnehot = VecInit((0 until xs.length).map(i => Cat((0 until xs.length).map(j =>
1403c590fb32Scz4e      (if (j < i) !xs(j).valid || compareVec(i)(j)
1404c590fb32Scz4e      else if (j == i) xs(i).valid
1405c590fb32Scz4e      else !xs(j).valid || !compareVec(j)(i))
1406c590fb32Scz4e    )).andR))
1407c590fb32Scz4e    resultOnehot
1408c590fb32Scz4e  }
1409c590fb32Scz4e  val allRedirect = loadUnits.map(_.io.rollback) ++ hybridUnits.map(_.io.ldu_io.rollback) ++ lsq.io.nack_rollback ++ lsq.io.nuke_rollback
1410c590fb32Scz4e  val oldestOneHot = selectOldestRedirect(allRedirect)
1411c590fb32Scz4e  val oldestRedirect = WireDefault(Mux1H(oldestOneHot, allRedirect))
1412c590fb32Scz4e  // memory replay would not cause IAF/IPF/IGPF
1413c590fb32Scz4e  oldestRedirect.bits.cfiUpdate.backendIAF := false.B
1414c590fb32Scz4e  oldestRedirect.bits.cfiUpdate.backendIPF := false.B
1415c590fb32Scz4e  oldestRedirect.bits.cfiUpdate.backendIGPF := false.B
1416c590fb32Scz4e  io.mem_to_ooo.memoryViolation := oldestRedirect
1417c590fb32Scz4e  io.mem_to_ooo.lsqio.lqCanAccept  := lsq.io.lqCanAccept
1418c590fb32Scz4e  io.mem_to_ooo.lsqio.sqCanAccept  := lsq.io.sqCanAccept
1419c590fb32Scz4e
1420c590fb32Scz4e  // lsq.io.uncache        <> uncache.io.lsq
1421c590fb32Scz4e  val s_idle :: s_scalar_uncache :: s_vector_uncache :: Nil = Enum(3)
1422c590fb32Scz4e  val uncacheState = RegInit(s_idle)
1423c590fb32Scz4e  val uncacheReq = Wire(Decoupled(new UncacheWordReq))
1424c590fb32Scz4e  val uncacheIdResp = uncache.io.lsq.idResp
1425c590fb32Scz4e  val uncacheResp = Wire(Decoupled(new UncacheWordResp))
1426c590fb32Scz4e
1427c590fb32Scz4e  uncacheReq.bits := DontCare
1428c590fb32Scz4e  uncacheReq.valid := false.B
1429c590fb32Scz4e  uncacheReq.ready := false.B
1430c590fb32Scz4e  uncacheResp.bits := DontCare
1431c590fb32Scz4e  uncacheResp.valid := false.B
1432c590fb32Scz4e  uncacheResp.ready := false.B
1433c590fb32Scz4e  lsq.io.uncache.req.ready := false.B
1434c590fb32Scz4e  lsq.io.uncache.idResp.valid := false.B
1435c590fb32Scz4e  lsq.io.uncache.idResp.bits := DontCare
1436c590fb32Scz4e  lsq.io.uncache.resp.valid := false.B
1437c590fb32Scz4e  lsq.io.uncache.resp.bits := DontCare
1438c590fb32Scz4e
1439c590fb32Scz4e  switch (uncacheState) {
1440c590fb32Scz4e    is (s_idle) {
1441c590fb32Scz4e      when (uncacheReq.fire) {
1442c590fb32Scz4e        when (lsq.io.uncache.req.valid) {
1443c590fb32Scz4e          when (!lsq.io.uncache.req.bits.nc || !io.ooo_to_mem.csrCtrl.uncache_write_outstanding_enable) {
1444c590fb32Scz4e            uncacheState := s_scalar_uncache
1445c590fb32Scz4e          }
1446c590fb32Scz4e        }.otherwise {
1447c590fb32Scz4e          // val isStore = vsFlowQueue.io.uncache.req.bits.cmd === MemoryOpConstants.M_XWR
1448c590fb32Scz4e          when (!io.ooo_to_mem.csrCtrl.uncache_write_outstanding_enable) {
1449c590fb32Scz4e            uncacheState := s_vector_uncache
1450c590fb32Scz4e          }
1451c590fb32Scz4e        }
1452c590fb32Scz4e      }
1453c590fb32Scz4e    }
1454c590fb32Scz4e
1455c590fb32Scz4e    is (s_scalar_uncache) {
1456c590fb32Scz4e      when (uncacheResp.fire) {
1457c590fb32Scz4e        uncacheState := s_idle
1458c590fb32Scz4e      }
1459c590fb32Scz4e    }
1460c590fb32Scz4e
1461c590fb32Scz4e    is (s_vector_uncache) {
1462c590fb32Scz4e      when (uncacheResp.fire) {
1463c590fb32Scz4e        uncacheState := s_idle
1464c590fb32Scz4e      }
1465c590fb32Scz4e    }
1466c590fb32Scz4e  }
1467c590fb32Scz4e
1468c590fb32Scz4e  when (lsq.io.uncache.req.valid) {
1469c590fb32Scz4e    uncacheReq <> lsq.io.uncache.req
1470c590fb32Scz4e  }
1471c590fb32Scz4e  when (io.ooo_to_mem.csrCtrl.uncache_write_outstanding_enable) {
1472c590fb32Scz4e    lsq.io.uncache.resp <> uncacheResp
1473c590fb32Scz4e    lsq.io.uncache.idResp <> uncacheIdResp
1474c590fb32Scz4e  }.otherwise {
1475c590fb32Scz4e    when (uncacheState === s_scalar_uncache) {
1476c590fb32Scz4e      lsq.io.uncache.resp <> uncacheResp
1477c590fb32Scz4e      lsq.io.uncache.idResp <> uncacheIdResp
1478c590fb32Scz4e    }
1479c590fb32Scz4e  }
1480c590fb32Scz4e  // delay dcache refill for 1 cycle for better timing
1481c590fb32Scz4e  AddPipelineReg(uncacheReq, uncache.io.lsq.req, false.B)
1482c590fb32Scz4e  AddPipelineReg(uncache.io.lsq.resp, uncacheResp, false.B)
1483c590fb32Scz4e
1484c590fb32Scz4e  //lsq.io.refill         := delayedDcacheRefill
1485c590fb32Scz4e  lsq.io.release        := dcache.io.lsu.release
1486c590fb32Scz4e  lsq.io.lqCancelCnt <> io.mem_to_ooo.lqCancelCnt
1487c590fb32Scz4e  lsq.io.sqCancelCnt <> io.mem_to_ooo.sqCancelCnt
1488c590fb32Scz4e  lsq.io.lqDeq <> io.mem_to_ooo.lqDeq
1489c590fb32Scz4e  lsq.io.sqDeq <> io.mem_to_ooo.sqDeq
1490c590fb32Scz4e  // Todo: assign these
1491c590fb32Scz4e  io.mem_to_ooo.sqDeqPtr := lsq.io.sqDeqPtr
1492c590fb32Scz4e  io.mem_to_ooo.lqDeqPtr := lsq.io.lqDeqPtr
1493c590fb32Scz4e  lsq.io.tl_d_channel <> dcache.io.lsu.tl_d_channel
1494c590fb32Scz4e
1495c590fb32Scz4e  // LSQ to store buffer
1496c590fb32Scz4e  lsq.io.sbuffer        <> sbuffer.io.in
1497c590fb32Scz4e  sbuffer.io.in(0).valid := lsq.io.sbuffer(0).valid || vSegmentUnit.io.sbuffer.valid
1498c590fb32Scz4e  sbuffer.io.in(0).bits  := Mux1H(Seq(
1499c590fb32Scz4e    vSegmentUnit.io.sbuffer.valid -> vSegmentUnit.io.sbuffer.bits,
1500c590fb32Scz4e    lsq.io.sbuffer(0).valid       -> lsq.io.sbuffer(0).bits
1501c590fb32Scz4e  ))
1502c590fb32Scz4e  vSegmentUnit.io.sbuffer.ready := sbuffer.io.in(0).ready
1503c590fb32Scz4e  lsq.io.sqEmpty        <> sbuffer.io.sqempty
1504c590fb32Scz4e  dcache.io.force_write := lsq.io.force_write
1505c590fb32Scz4e
1506c590fb32Scz4e  // Initialize when unenabled difftest.
1507c590fb32Scz4e  sbuffer.io.vecDifftestInfo      := DontCare
1508c590fb32Scz4e  lsq.io.sbufferVecDifftestInfo   := DontCare
1509c590fb32Scz4e  vSegmentUnit.io.vecDifftestInfo := DontCare
1510c590fb32Scz4e  if (env.EnableDifftest) {
1511c590fb32Scz4e    sbuffer.io.vecDifftestInfo .zipWithIndex.map{ case (sbufferPort, index) =>
1512c590fb32Scz4e      if (index == 0) {
1513c590fb32Scz4e        val vSegmentDifftestValid = vSegmentUnit.io.vecDifftestInfo.valid
1514c590fb32Scz4e        sbufferPort.valid := Mux(vSegmentDifftestValid, vSegmentUnit.io.vecDifftestInfo.valid, lsq.io.sbufferVecDifftestInfo(0).valid)
1515c590fb32Scz4e        sbufferPort.bits  := Mux(vSegmentDifftestValid, vSegmentUnit.io.vecDifftestInfo.bits, lsq.io.sbufferVecDifftestInfo(0).bits)
1516c590fb32Scz4e
1517c590fb32Scz4e        vSegmentUnit.io.vecDifftestInfo.ready  := sbufferPort.ready
1518c590fb32Scz4e        lsq.io.sbufferVecDifftestInfo(0).ready := sbufferPort.ready
1519c590fb32Scz4e      } else {
1520c590fb32Scz4e         sbufferPort <> lsq.io.sbufferVecDifftestInfo(index)
1521c590fb32Scz4e      }
1522c590fb32Scz4e    }
1523c590fb32Scz4e  }
1524c590fb32Scz4e
1525c590fb32Scz4e  // lsq.io.vecStoreRetire <> vsFlowQueue.io.sqRelease
1526c590fb32Scz4e  // lsq.io.vecWriteback.valid := vlWrapper.io.uopWriteback.fire &&
1527c590fb32Scz4e  //   vlWrapper.io.uopWriteback.bits.uop.vpu.lastUop
1528c590fb32Scz4e  // lsq.io.vecWriteback.bits := vlWrapper.io.uopWriteback.bits
1529c590fb32Scz4e
1530c590fb32Scz4e  // vector
1531c590fb32Scz4e  val vLoadCanAccept  = (0 until VlduCnt).map(i =>
1532c590fb32Scz4e    vlSplit(i).io.in.ready && VlduType.isVecLd(io.ooo_to_mem.issueVldu(i).bits.uop.fuOpType)
1533c590fb32Scz4e  )
1534c590fb32Scz4e  val vStoreCanAccept = (0 until VstuCnt).map(i =>
1535c590fb32Scz4e    vsSplit(i).io.in.ready && VstuType.isVecSt(io.ooo_to_mem.issueVldu(i).bits.uop.fuOpType)
1536c590fb32Scz4e  )
1537c590fb32Scz4e  val isSegment     = io.ooo_to_mem.issueVldu.head.valid && isVsegls(io.ooo_to_mem.issueVldu.head.bits.uop.fuType)
1538c590fb32Scz4e  val isFixVlUop    = io.ooo_to_mem.issueVldu.map{x =>
1539c590fb32Scz4e    x.bits.uop.vpu.isVleff && x.bits.uop.vpu.lastUop && x.valid
1540c590fb32Scz4e  }
1541c590fb32Scz4e
1542c590fb32Scz4e  // init port
1543c590fb32Scz4e  /**
1544c590fb32Scz4e   * TODO: splited vsMergebuffer maybe remove, if one RS can accept two feedback, or don't need RS replay uop
1545c590fb32Scz4e   * for now:
1546c590fb32Scz4e   *  RS0 -> VsSplit0 -> stu0 -> vsMergebuffer0 -> feedback -> RS0
1547c590fb32Scz4e   *  RS1 -> VsSplit1 -> stu1 -> vsMergebuffer1 -> feedback -> RS1
1548c590fb32Scz4e   *
1549c590fb32Scz4e   * vector load don't need feedback
1550c590fb32Scz4e   *
1551c590fb32Scz4e   *  RS0 -> VlSplit0  -> ldu0 -> |
1552c590fb32Scz4e   *  RS1 -> VlSplit1  -> ldu1 -> |  -> vlMergebuffer
1553c590fb32Scz4e   *        replayIO   -> ldu3 -> |
1554c590fb32Scz4e   * */
1555c590fb32Scz4e  (0 until VstuCnt).foreach{i =>
1556c590fb32Scz4e    vsMergeBuffer(i).io.fromPipeline := DontCare
1557c590fb32Scz4e    vsMergeBuffer(i).io.fromSplit := DontCare
1558c590fb32Scz4e
1559c590fb32Scz4e    vsMergeBuffer(i).io.fromMisalignBuffer.get.flush := storeMisalignBuffer.io.toVecStoreMergeBuffer(i).flush
1560c590fb32Scz4e    vsMergeBuffer(i).io.fromMisalignBuffer.get.mbIndex := storeMisalignBuffer.io.toVecStoreMergeBuffer(i).mbIndex
1561c590fb32Scz4e  }
1562c590fb32Scz4e
1563c590fb32Scz4e  (0 until VstuCnt).foreach{i =>
1564c590fb32Scz4e    vsSplit(i).io.redirect <> redirect
1565c590fb32Scz4e    vsSplit(i).io.in <> io.ooo_to_mem.issueVldu(i)
1566c590fb32Scz4e    vsSplit(i).io.in.valid := io.ooo_to_mem.issueVldu(i).valid &&
1567c590fb32Scz4e                              vStoreCanAccept(i) && !isSegment
1568c590fb32Scz4e    vsSplit(i).io.toMergeBuffer <> vsMergeBuffer(i).io.fromSplit.head
1569c590fb32Scz4e    NewPipelineConnect(
1570c590fb32Scz4e      vsSplit(i).io.out, storeUnits(i).io.vecstin, storeUnits(i).io.vecstin.fire,
1571c590fb32Scz4e      Mux(vsSplit(i).io.out.fire, vsSplit(i).io.out.bits.uop.robIdx.needFlush(io.redirect), storeUnits(i).io.vecstin.bits.uop.robIdx.needFlush(io.redirect)),
1572c590fb32Scz4e      Option("VsSplitConnectStu")
1573c590fb32Scz4e    )
1574c590fb32Scz4e    vsSplit(i).io.vstd.get := DontCare // Todo: Discuss how to pass vector store data
1575c590fb32Scz4e
1576c590fb32Scz4e    vsSplit(i).io.vstdMisalign.get.storeMisalignBufferEmpty := !storeMisalignBuffer.io.full
1577c590fb32Scz4e    vsSplit(i).io.vstdMisalign.get.storePipeEmpty := !storeUnits(i).io.s0_s1_valid
1578c590fb32Scz4e
1579c590fb32Scz4e  }
1580c590fb32Scz4e  (0 until VlduCnt).foreach{i =>
1581c590fb32Scz4e    vlSplit(i).io.redirect <> redirect
1582c590fb32Scz4e    vlSplit(i).io.in <> io.ooo_to_mem.issueVldu(i)
1583c590fb32Scz4e    vlSplit(i).io.in.valid := io.ooo_to_mem.issueVldu(i).valid &&
1584c590fb32Scz4e                              vLoadCanAccept(i) && !isSegment && !isFixVlUop(i)
1585c590fb32Scz4e    vlSplit(i).io.toMergeBuffer <> vlMergeBuffer.io.fromSplit(i)
1586c590fb32Scz4e    vlSplit(i).io.threshold.get.valid := vlMergeBuffer.io.toSplit.get.threshold
1587c590fb32Scz4e    vlSplit(i).io.threshold.get.bits  := lsq.io.lqDeqPtr
1588c590fb32Scz4e    NewPipelineConnect(
1589c590fb32Scz4e      vlSplit(i).io.out, loadUnits(i).io.vecldin, loadUnits(i).io.vecldin.fire,
1590c590fb32Scz4e      Mux(vlSplit(i).io.out.fire, vlSplit(i).io.out.bits.uop.robIdx.needFlush(io.redirect), loadUnits(i).io.vecldin.bits.uop.robIdx.needFlush(io.redirect)),
1591c590fb32Scz4e      Option("VlSplitConnectLdu")
1592c590fb32Scz4e    )
1593c590fb32Scz4e
1594c590fb32Scz4e    //Subsequent instrction will be blocked
1595c590fb32Scz4e    vfofBuffer.io.in(i).valid := io.ooo_to_mem.issueVldu(i).valid
1596c590fb32Scz4e    vfofBuffer.io.in(i).bits  := io.ooo_to_mem.issueVldu(i).bits
1597c590fb32Scz4e  }
1598c590fb32Scz4e  (0 until LduCnt).foreach{i=>
1599c590fb32Scz4e    loadUnits(i).io.vecldout.ready         := vlMergeBuffer.io.fromPipeline(i).ready
1600c590fb32Scz4e    loadMisalignBuffer.io.vecWriteBack.ready := true.B
1601c590fb32Scz4e
1602c590fb32Scz4e    if (i == MisalignWBPort) {
1603c590fb32Scz4e      when(loadUnits(i).io.vecldout.valid) {
1604c590fb32Scz4e        vlMergeBuffer.io.fromPipeline(i).valid := loadUnits(i).io.vecldout.valid
1605c590fb32Scz4e        vlMergeBuffer.io.fromPipeline(i).bits  := loadUnits(i).io.vecldout.bits
1606c590fb32Scz4e      } .otherwise {
1607c590fb32Scz4e        vlMergeBuffer.io.fromPipeline(i).valid   := loadMisalignBuffer.io.vecWriteBack.valid
1608c590fb32Scz4e        vlMergeBuffer.io.fromPipeline(i).bits    := loadMisalignBuffer.io.vecWriteBack.bits
1609c590fb32Scz4e      }
1610c590fb32Scz4e    } else {
1611c590fb32Scz4e      vlMergeBuffer.io.fromPipeline(i).valid := loadUnits(i).io.vecldout.valid
1612c590fb32Scz4e      vlMergeBuffer.io.fromPipeline(i).bits  := loadUnits(i).io.vecldout.bits
1613c590fb32Scz4e    }
1614c590fb32Scz4e  }
1615c590fb32Scz4e
1616c590fb32Scz4e  (0 until StaCnt).foreach{i=>
1617c590fb32Scz4e    if(i < VstuCnt){
1618c590fb32Scz4e      storeUnits(i).io.vecstout.ready := true.B
1619c590fb32Scz4e      storeMisalignBuffer.io.vecWriteBack(i).ready := vsMergeBuffer(i).io.fromPipeline.head.ready
1620c590fb32Scz4e
1621c590fb32Scz4e      when(storeUnits(i).io.vecstout.valid) {
1622c590fb32Scz4e        vsMergeBuffer(i).io.fromPipeline.head.valid := storeUnits(i).io.vecstout.valid
1623c590fb32Scz4e        vsMergeBuffer(i).io.fromPipeline.head.bits  := storeUnits(i).io.vecstout.bits
1624c590fb32Scz4e      } .otherwise {
1625c590fb32Scz4e        vsMergeBuffer(i).io.fromPipeline.head.valid   := storeMisalignBuffer.io.vecWriteBack(i).valid
1626c590fb32Scz4e        vsMergeBuffer(i).io.fromPipeline.head.bits    := storeMisalignBuffer.io.vecWriteBack(i).bits
1627c590fb32Scz4e      }
1628c590fb32Scz4e    }
1629c590fb32Scz4e  }
1630c590fb32Scz4e
1631c590fb32Scz4e  (0 until VlduCnt).foreach{i=>
1632c590fb32Scz4e    io.ooo_to_mem.issueVldu(i).ready := vLoadCanAccept(i) || vStoreCanAccept(i)
1633c590fb32Scz4e  }
1634c590fb32Scz4e
1635c590fb32Scz4e  vlMergeBuffer.io.redirect <> redirect
1636c590fb32Scz4e  vsMergeBuffer.map(_.io.redirect <> redirect)
1637c590fb32Scz4e  (0 until VlduCnt).foreach{i=>
1638c590fb32Scz4e    vlMergeBuffer.io.toLsq(i) <> lsq.io.ldvecFeedback(i)
1639c590fb32Scz4e  }
1640c590fb32Scz4e  (0 until VstuCnt).foreach{i=>
1641c590fb32Scz4e    vsMergeBuffer(i).io.toLsq.head <> lsq.io.stvecFeedback(i)
1642c590fb32Scz4e  }
1643c590fb32Scz4e
1644c590fb32Scz4e  (0 until VlduCnt).foreach{i=>
1645c590fb32Scz4e    // send to RS
1646c590fb32Scz4e    vlMergeBuffer.io.feedback(i) <> io.mem_to_ooo.vlduIqFeedback(i).feedbackSlow
1647c590fb32Scz4e    io.mem_to_ooo.vlduIqFeedback(i).feedbackFast := DontCare
1648c590fb32Scz4e  }
1649c590fb32Scz4e  (0 until VstuCnt).foreach{i =>
1650c590fb32Scz4e    // send to RS
1651c590fb32Scz4e    if (i == 0){
1652c590fb32Scz4e      io.mem_to_ooo.vstuIqFeedback(i).feedbackSlow.valid := vsMergeBuffer(i).io.feedback.head.valid || vSegmentUnit.io.feedback.valid
1653c590fb32Scz4e      io.mem_to_ooo.vstuIqFeedback(i).feedbackSlow.bits := Mux1H(Seq(
1654c590fb32Scz4e        vSegmentUnit.io.feedback.valid -> vSegmentUnit.io.feedback.bits,
1655c590fb32Scz4e        vsMergeBuffer(i).io.feedback.head.valid ->  vsMergeBuffer(i).io.feedback.head.bits
1656c590fb32Scz4e      ))
1657c590fb32Scz4e      io.mem_to_ooo.vstuIqFeedback(i).feedbackFast := DontCare
1658c590fb32Scz4e    } else {
1659c590fb32Scz4e      vsMergeBuffer(i).io.feedback.head <> io.mem_to_ooo.vstuIqFeedback(i).feedbackSlow
1660c590fb32Scz4e      io.mem_to_ooo.vstuIqFeedback(i).feedbackFast := DontCare
1661c590fb32Scz4e    }
1662c590fb32Scz4e  }
1663c590fb32Scz4e
1664c590fb32Scz4e  (0 until VlduCnt).foreach{i=>
1665c590fb32Scz4e    if (i == 0){ // for segmentUnit, segmentUnit use port0 writeback
1666c590fb32Scz4e      io.mem_to_ooo.writebackVldu(i).valid := vlMergeBuffer.io.uopWriteback(i).valid || vsMergeBuffer(i).io.uopWriteback.head.valid || vSegmentUnit.io.uopwriteback.valid
1667c590fb32Scz4e      io.mem_to_ooo.writebackVldu(i).bits := PriorityMux(Seq(
1668c590fb32Scz4e        vSegmentUnit.io.uopwriteback.valid          -> vSegmentUnit.io.uopwriteback.bits,
1669c590fb32Scz4e        vlMergeBuffer.io.uopWriteback(i).valid      -> vlMergeBuffer.io.uopWriteback(i).bits,
1670c590fb32Scz4e        vsMergeBuffer(i).io.uopWriteback.head.valid -> vsMergeBuffer(i).io.uopWriteback.head.bits,
1671c590fb32Scz4e      ))
1672c590fb32Scz4e      vlMergeBuffer.io.uopWriteback(i).ready := io.mem_to_ooo.writebackVldu(i).ready && !vSegmentUnit.io.uopwriteback.valid
1673c590fb32Scz4e      vsMergeBuffer(i).io.uopWriteback.head.ready := io.mem_to_ooo.writebackVldu(i).ready && !vlMergeBuffer.io.uopWriteback(i).valid && !vSegmentUnit.io.uopwriteback.valid
1674c590fb32Scz4e      vSegmentUnit.io.uopwriteback.ready := io.mem_to_ooo.writebackVldu(i).ready
1675c590fb32Scz4e    } else if (i == 1) {
1676c590fb32Scz4e      io.mem_to_ooo.writebackVldu(i).valid := vlMergeBuffer.io.uopWriteback(i).valid || vsMergeBuffer(i).io.uopWriteback.head.valid || vfofBuffer.io.uopWriteback.valid
1677c590fb32Scz4e      io.mem_to_ooo.writebackVldu(i).bits := PriorityMux(Seq(
1678c590fb32Scz4e        vfofBuffer.io.uopWriteback.valid            -> vfofBuffer.io.uopWriteback.bits,
1679c590fb32Scz4e        vlMergeBuffer.io.uopWriteback(i).valid      -> vlMergeBuffer.io.uopWriteback(i).bits,
1680c590fb32Scz4e        vsMergeBuffer(i).io.uopWriteback.head.valid -> vsMergeBuffer(i).io.uopWriteback.head.bits,
1681c590fb32Scz4e      ))
1682c590fb32Scz4e      vlMergeBuffer.io.uopWriteback(i).ready := io.mem_to_ooo.writebackVldu(i).ready && !vfofBuffer.io.uopWriteback.valid
1683c590fb32Scz4e      vsMergeBuffer(i).io.uopWriteback.head.ready := io.mem_to_ooo.writebackVldu(i).ready && !vlMergeBuffer.io.uopWriteback(i).valid && !vfofBuffer.io.uopWriteback.valid
1684c590fb32Scz4e      vfofBuffer.io.uopWriteback.ready := io.mem_to_ooo.writebackVldu(i).ready
1685c590fb32Scz4e    } else {
1686c590fb32Scz4e      io.mem_to_ooo.writebackVldu(i).valid := vlMergeBuffer.io.uopWriteback(i).valid || vsMergeBuffer(i).io.uopWriteback.head.valid
1687c590fb32Scz4e      io.mem_to_ooo.writebackVldu(i).bits := PriorityMux(Seq(
1688c590fb32Scz4e        vlMergeBuffer.io.uopWriteback(i).valid -> vlMergeBuffer.io.uopWriteback(i).bits,
1689c590fb32Scz4e        vsMergeBuffer(i).io.uopWriteback.head.valid -> vsMergeBuffer(i).io.uopWriteback.head.bits,
1690c590fb32Scz4e      ))
1691c590fb32Scz4e      vlMergeBuffer.io.uopWriteback(i).ready := io.mem_to_ooo.writebackVldu(i).ready
1692c590fb32Scz4e      vsMergeBuffer(i).io.uopWriteback.head.ready := io.mem_to_ooo.writebackVldu(i).ready && !vlMergeBuffer.io.uopWriteback(i).valid
1693c590fb32Scz4e    }
1694c590fb32Scz4e
1695c590fb32Scz4e    vfofBuffer.io.mergeUopWriteback(i).valid := vlMergeBuffer.io.uopWriteback(i).valid
1696c590fb32Scz4e    vfofBuffer.io.mergeUopWriteback(i).bits  := vlMergeBuffer.io.uopWriteback(i).bits
1697c590fb32Scz4e  }
1698c590fb32Scz4e
1699c590fb32Scz4e
1700c590fb32Scz4e  vfofBuffer.io.redirect <> redirect
1701c590fb32Scz4e
1702c590fb32Scz4e  // Sbuffer
1703c590fb32Scz4e  sbuffer.io.csrCtrl    <> csrCtrl
1704c590fb32Scz4e  sbuffer.io.dcache     <> dcache.io.lsu.store
1705c590fb32Scz4e  sbuffer.io.memSetPattenDetected := dcache.io.memSetPattenDetected
1706c590fb32Scz4e  sbuffer.io.force_write <> lsq.io.force_write
1707c590fb32Scz4e  // flush sbuffer
1708c590fb32Scz4e  val cmoFlush = lsq.io.flushSbuffer.valid
1709c590fb32Scz4e  val fenceFlush = io.ooo_to_mem.flushSb
1710c590fb32Scz4e  val atomicsFlush = atomicsUnit.io.flush_sbuffer.valid || vSegmentUnit.io.flush_sbuffer.valid
1711c590fb32Scz4e  val stIsEmpty = sbuffer.io.flush.empty && uncache.io.flush.empty
1712c590fb32Scz4e  io.mem_to_ooo.sbIsEmpty := RegNext(stIsEmpty)
1713c590fb32Scz4e
1714c590fb32Scz4e  // if both of them tries to flush sbuffer at the same time
1715c590fb32Scz4e  // something must have gone wrong
1716c590fb32Scz4e  assert(!(fenceFlush && atomicsFlush && cmoFlush))
1717c590fb32Scz4e  sbuffer.io.flush.valid := RegNext(fenceFlush || atomicsFlush || cmoFlush)
1718c590fb32Scz4e  uncache.io.flush.valid := sbuffer.io.flush.valid
1719c590fb32Scz4e
1720c590fb32Scz4e  // AtomicsUnit: AtomicsUnit will override other control signials,
1721c590fb32Scz4e  // as atomics insts (LR/SC/AMO) will block the pipeline
1722c590fb32Scz4e  val s_normal +: s_atomics = Enum(StaCnt + HyuCnt + 1)
1723c590fb32Scz4e  val state = RegInit(s_normal)
1724c590fb32Scz4e
1725c590fb32Scz4e  val st_atomics = Seq.tabulate(StaCnt)(i =>
1726c590fb32Scz4e    io.ooo_to_mem.issueSta(i).valid && FuType.storeIsAMO((io.ooo_to_mem.issueSta(i).bits.uop.fuType))
1727c590fb32Scz4e  ) ++ Seq.tabulate(HyuCnt)(i =>
1728c590fb32Scz4e    io.ooo_to_mem.issueHya(i).valid && FuType.storeIsAMO((io.ooo_to_mem.issueHya(i).bits.uop.fuType))
1729c590fb32Scz4e  )
1730c590fb32Scz4e
1731c590fb32Scz4e  for (i <- 0 until StaCnt) when(st_atomics(i)) {
1732c590fb32Scz4e    io.ooo_to_mem.issueSta(i).ready := atomicsUnit.io.in.ready
1733c590fb32Scz4e    storeUnits(i).io.stin.valid := false.B
1734c590fb32Scz4e
1735c590fb32Scz4e    state := s_atomics(i)
1736c590fb32Scz4e  }
1737c590fb32Scz4e  for (i <- 0 until HyuCnt) when(st_atomics(StaCnt + i)) {
1738c590fb32Scz4e    io.ooo_to_mem.issueHya(i).ready := atomicsUnit.io.in.ready
1739c590fb32Scz4e    hybridUnits(i).io.lsin.valid := false.B
1740c590fb32Scz4e
1741c590fb32Scz4e    state := s_atomics(StaCnt + i)
1742c590fb32Scz4e    assert(!st_atomics.zipWithIndex.filterNot(_._2 == StaCnt + i).unzip._1.reduce(_ || _))
1743c590fb32Scz4e  }
1744c590fb32Scz4e  when (atomicsUnit.io.out.valid) {
1745c590fb32Scz4e    state := s_normal
1746c590fb32Scz4e  }
1747c590fb32Scz4e
1748c590fb32Scz4e  atomicsUnit.io.in.valid := st_atomics.reduce(_ || _)
1749c590fb32Scz4e  atomicsUnit.io.in.bits  := Mux1H(Seq.tabulate(StaCnt)(i =>
1750c590fb32Scz4e    st_atomics(i) -> io.ooo_to_mem.issueSta(i).bits) ++
1751c590fb32Scz4e    Seq.tabulate(HyuCnt)(i => st_atomics(StaCnt+i) -> io.ooo_to_mem.issueHya(i).bits))
1752c590fb32Scz4e  atomicsUnit.io.storeDataIn.zipWithIndex.foreach { case (stdin, i) =>
1753c590fb32Scz4e    stdin.valid := st_data_atomics(i)
1754c590fb32Scz4e    stdin.bits := stData(i).bits
1755c590fb32Scz4e  }
1756c590fb32Scz4e  atomicsUnit.io.redirect <> redirect
1757c590fb32Scz4e
1758c590fb32Scz4e  // TODO: complete amo's pmp support
1759c590fb32Scz4e  val amoTlb = dtlb_ld(0).requestor(0)
1760c590fb32Scz4e  atomicsUnit.io.dtlb.resp.valid := false.B
1761c590fb32Scz4e  atomicsUnit.io.dtlb.resp.bits  := DontCare
1762c590fb32Scz4e  atomicsUnit.io.dtlb.req.ready  := amoTlb.req.ready
1763c590fb32Scz4e  atomicsUnit.io.pmpResp := pmp_check(0).resp
1764c590fb32Scz4e
1765c590fb32Scz4e  atomicsUnit.io.dcache <> dcache.io.lsu.atomics
1766c590fb32Scz4e  atomicsUnit.io.flush_sbuffer.empty := stIsEmpty
1767c590fb32Scz4e
1768c590fb32Scz4e  atomicsUnit.io.csrCtrl := csrCtrl
1769c590fb32Scz4e
1770c590fb32Scz4e  // for atomicsUnit, it uses loadUnit(0)'s TLB port
1771c590fb32Scz4e
1772c590fb32Scz4e  when (state =/= s_normal) {
1773c590fb32Scz4e    // use store wb port instead of load
1774c590fb32Scz4e    loadUnits(0).io.ldout.ready := false.B
1775c590fb32Scz4e    // use load_0's TLB
1776c590fb32Scz4e    atomicsUnit.io.dtlb <> amoTlb
1777c590fb32Scz4e
1778c590fb32Scz4e    // hw prefetch should be disabled while executing atomic insts
1779c590fb32Scz4e    loadUnits.map(i => i.io.prefetch_req.valid := false.B)
1780c590fb32Scz4e
1781c590fb32Scz4e    // make sure there's no in-flight uops in load unit
1782c590fb32Scz4e    assert(!loadUnits(0).io.ldout.valid)
1783c590fb32Scz4e  }
1784c590fb32Scz4e
1785c590fb32Scz4e  lsq.io.flushSbuffer.empty := sbuffer.io.sbempty
1786c590fb32Scz4e
1787c590fb32Scz4e  for (i <- 0 until StaCnt) {
1788c590fb32Scz4e    when (state === s_atomics(i)) {
1789c590fb32Scz4e      io.mem_to_ooo.staIqFeedback(i).feedbackSlow := atomicsUnit.io.feedbackSlow
1790c590fb32Scz4e      assert(!storeUnits(i).io.feedback_slow.valid)
1791c590fb32Scz4e    }
1792c590fb32Scz4e  }
1793c590fb32Scz4e  for (i <- 0 until HyuCnt) {
1794c590fb32Scz4e    when (state === s_atomics(StaCnt + i)) {
1795c590fb32Scz4e      io.mem_to_ooo.hyuIqFeedback(i).feedbackSlow := atomicsUnit.io.feedbackSlow
1796c590fb32Scz4e      assert(!hybridUnits(i).io.feedback_slow.valid)
1797c590fb32Scz4e    }
1798c590fb32Scz4e  }
1799c590fb32Scz4e
1800c590fb32Scz4e  lsq.io.exceptionAddr.isStore := io.ooo_to_mem.isStoreException
1801c590fb32Scz4e  // Exception address is used several cycles after flush.
1802c590fb32Scz4e  // We delay it by 10 cycles to ensure its flush safety.
1803c590fb32Scz4e  val atomicsException = RegInit(false.B)
1804c590fb32Scz4e  when (DelayN(redirect.valid, 10) && atomicsException) {
1805c590fb32Scz4e    atomicsException := false.B
1806c590fb32Scz4e  }.elsewhen (atomicsUnit.io.exceptionInfo.valid) {
1807c590fb32Scz4e    atomicsException := true.B
1808c590fb32Scz4e  }
1809c590fb32Scz4e
1810c590fb32Scz4e  val misalignBufExceptionOverwrite = loadMisalignBuffer.io.overwriteExpBuf.valid || storeMisalignBuffer.io.overwriteExpBuf.valid
1811c590fb32Scz4e  val misalignBufExceptionVaddr = Mux(loadMisalignBuffer.io.overwriteExpBuf.valid,
1812c590fb32Scz4e    loadMisalignBuffer.io.overwriteExpBuf.vaddr,
1813c590fb32Scz4e    storeMisalignBuffer.io.overwriteExpBuf.vaddr
1814c590fb32Scz4e  )
1815c590fb32Scz4e  val misalignBufExceptionIsHyper = Mux(loadMisalignBuffer.io.overwriteExpBuf.valid,
1816c590fb32Scz4e    loadMisalignBuffer.io.overwriteExpBuf.isHyper,
1817c590fb32Scz4e    storeMisalignBuffer.io.overwriteExpBuf.isHyper
1818c590fb32Scz4e  )
1819c590fb32Scz4e  val misalignBufExceptionGpaddr = Mux(loadMisalignBuffer.io.overwriteExpBuf.valid,
1820c590fb32Scz4e    loadMisalignBuffer.io.overwriteExpBuf.gpaddr,
1821c590fb32Scz4e    storeMisalignBuffer.io.overwriteExpBuf.gpaddr
1822c590fb32Scz4e  )
1823c590fb32Scz4e  val misalignBufExceptionIsForVSnonLeafPTE = Mux(loadMisalignBuffer.io.overwriteExpBuf.valid,
1824c590fb32Scz4e    loadMisalignBuffer.io.overwriteExpBuf.isForVSnonLeafPTE,
1825c590fb32Scz4e    storeMisalignBuffer.io.overwriteExpBuf.isForVSnonLeafPTE
1826c590fb32Scz4e  )
1827c590fb32Scz4e
1828c590fb32Scz4e  val vSegmentException = RegInit(false.B)
1829c590fb32Scz4e  when (DelayN(redirect.valid, 10) && vSegmentException) {
1830c590fb32Scz4e    vSegmentException := false.B
1831c590fb32Scz4e  }.elsewhen (vSegmentUnit.io.exceptionInfo.valid) {
1832c590fb32Scz4e    vSegmentException := true.B
1833c590fb32Scz4e  }
1834c590fb32Scz4e  val atomicsExceptionAddress = RegEnable(atomicsUnit.io.exceptionInfo.bits.vaddr, atomicsUnit.io.exceptionInfo.valid)
1835c590fb32Scz4e  val vSegmentExceptionVstart = RegEnable(vSegmentUnit.io.exceptionInfo.bits.vstart, vSegmentUnit.io.exceptionInfo.valid)
1836c590fb32Scz4e  val vSegmentExceptionVl     = RegEnable(vSegmentUnit.io.exceptionInfo.bits.vl, vSegmentUnit.io.exceptionInfo.valid)
1837c590fb32Scz4e  val vSegmentExceptionAddress = RegEnable(vSegmentUnit.io.exceptionInfo.bits.vaddr, vSegmentUnit.io.exceptionInfo.valid)
1838c590fb32Scz4e  val atomicsExceptionGPAddress = RegEnable(atomicsUnit.io.exceptionInfo.bits.gpaddr, atomicsUnit.io.exceptionInfo.valid)
1839c590fb32Scz4e  val vSegmentExceptionGPAddress = RegEnable(vSegmentUnit.io.exceptionInfo.bits.gpaddr, vSegmentUnit.io.exceptionInfo.valid)
1840c590fb32Scz4e  val atomicsExceptionIsForVSnonLeafPTE = RegEnable(atomicsUnit.io.exceptionInfo.bits.isForVSnonLeafPTE, atomicsUnit.io.exceptionInfo.valid)
1841c590fb32Scz4e  val vSegmentExceptionIsForVSnonLeafPTE = RegEnable(vSegmentUnit.io.exceptionInfo.bits.isForVSnonLeafPTE, vSegmentUnit.io.exceptionInfo.valid)
1842c590fb32Scz4e
1843c590fb32Scz4e  val exceptionVaddr = Mux(
1844c590fb32Scz4e    atomicsException,
1845c590fb32Scz4e    atomicsExceptionAddress,
1846c590fb32Scz4e    Mux(misalignBufExceptionOverwrite,
1847c590fb32Scz4e      misalignBufExceptionVaddr,
1848c590fb32Scz4e      Mux(vSegmentException,
1849c590fb32Scz4e        vSegmentExceptionAddress,
1850c590fb32Scz4e        lsq.io.exceptionAddr.vaddr
1851c590fb32Scz4e      )
1852c590fb32Scz4e    )
1853c590fb32Scz4e  )
1854c590fb32Scz4e  // whether vaddr need ext or is hyper inst:
1855c590fb32Scz4e  // VaNeedExt: atomicsException -> false; misalignBufExceptionOverwrite -> true; vSegmentException -> false
1856c590fb32Scz4e  // IsHyper: atomicsException -> false; vSegmentException -> false
1857c590fb32Scz4e  val exceptionVaNeedExt = !atomicsException &&
1858c590fb32Scz4e    (misalignBufExceptionOverwrite ||
1859c590fb32Scz4e      (!vSegmentException && lsq.io.exceptionAddr.vaNeedExt))
1860c590fb32Scz4e  val exceptionIsHyper = !atomicsException &&
1861c590fb32Scz4e    (misalignBufExceptionOverwrite && misalignBufExceptionIsHyper ||
1862c590fb32Scz4e      (!vSegmentException && lsq.io.exceptionAddr.isHyper && !misalignBufExceptionOverwrite))
1863c590fb32Scz4e
1864168f1995SXu, Zefan  def GenExceptionVa(
1865168f1995SXu, Zefan    mode: UInt, isVirt: Bool, vaNeedExt: Bool,
1866c590fb32Scz4e    satp: TlbSatpBundle, vsatp: TlbSatpBundle, hgatp: TlbHgatpBundle,
1867168f1995SXu, Zefan    vaddr: UInt
1868168f1995SXu, Zefan  ) = {
1869c590fb32Scz4e    require(VAddrBits >= 50)
1870c590fb32Scz4e
1871168f1995SXu, Zefan    val satpNone = satp.mode === 0.U
1872168f1995SXu, Zefan    val satpSv39 = satp.mode === 8.U
1873168f1995SXu, Zefan    val satpSv48 = satp.mode === 9.U
1874c590fb32Scz4e
1875168f1995SXu, Zefan    val vsatpNone = vsatp.mode === 0.U
1876168f1995SXu, Zefan    val vsatpSv39 = vsatp.mode === 8.U
1877168f1995SXu, Zefan    val vsatpSv48 = vsatp.mode === 9.U
1878168f1995SXu, Zefan
1879168f1995SXu, Zefan    val hgatpNone = hgatp.mode === 0.U
1880168f1995SXu, Zefan    val hgatpSv39x4 = hgatp.mode === 8.U
1881168f1995SXu, Zefan    val hgatpSv48x4 = hgatp.mode === 9.U
1882168f1995SXu, Zefan
1883168f1995SXu, Zefan    // For !isVirt, mode check is necessary, as we don't want virtual memory in M-mode.
1884168f1995SXu, Zefan    // For isVirt, mode check is unnecessary, as virt won't be 1 in M-mode.
1885168f1995SXu, Zefan    // Also, isVirt includes Hyper Insts, which don't care mode either.
1886168f1995SXu, Zefan
1887168f1995SXu, Zefan    val useBareAddr =
1888168f1995SXu, Zefan      (isVirt && vsatpNone && hgatpNone) ||
1889168f1995SXu, Zefan      (!isVirt && (mode === CSRConst.ModeM)) ||
1890168f1995SXu, Zefan      (!isVirt && (mode =/= CSRConst.ModeM) && satpNone)
1891168f1995SXu, Zefan    val useSv39Addr =
1892168f1995SXu, Zefan      (isVirt && vsatpSv39) ||
1893168f1995SXu, Zefan      (!isVirt && (mode =/= CSRConst.ModeM) && satpSv39)
1894168f1995SXu, Zefan    val useSv48Addr =
1895168f1995SXu, Zefan      (isVirt && vsatpSv48) ||
1896168f1995SXu, Zefan      (!isVirt && (mode =/= CSRConst.ModeM) && satpSv48)
1897168f1995SXu, Zefan    val useSv39x4Addr = isVirt && vsatpNone && hgatpSv39x4
1898168f1995SXu, Zefan    val useSv48x4Addr = isVirt && vsatpNone && hgatpSv48x4
1899c590fb32Scz4e
1900c590fb32Scz4e    val bareAddr   = ZeroExt(vaddr(PAddrBits - 1, 0), XLEN)
1901c590fb32Scz4e    val sv39Addr   = SignExt(vaddr.take(39), XLEN)
1902c590fb32Scz4e    val sv39x4Addr = ZeroExt(vaddr.take(39 + 2), XLEN)
1903c590fb32Scz4e    val sv48Addr   = SignExt(vaddr.take(48), XLEN)
1904c590fb32Scz4e    val sv48x4Addr = ZeroExt(vaddr.take(48 + 2), XLEN)
1905c590fb32Scz4e
1906c590fb32Scz4e    val ExceptionVa = Wire(UInt(XLEN.W))
1907c590fb32Scz4e    when (vaNeedExt) {
1908c590fb32Scz4e      ExceptionVa := Mux1H(Seq(
1909168f1995SXu, Zefan        (useBareAddr)   -> bareAddr,
1910168f1995SXu, Zefan        (useSv39Addr)   -> sv39Addr,
1911168f1995SXu, Zefan        (useSv48Addr)   -> sv48Addr,
1912168f1995SXu, Zefan        (useSv39x4Addr) -> sv39x4Addr,
1913168f1995SXu, Zefan        (useSv48x4Addr) -> sv48x4Addr,
1914c590fb32Scz4e      ))
1915c590fb32Scz4e    } .otherwise {
1916c590fb32Scz4e      ExceptionVa := vaddr
1917c590fb32Scz4e    }
1918c590fb32Scz4e
1919c590fb32Scz4e    ExceptionVa
1920c590fb32Scz4e  }
1921c590fb32Scz4e
1922c590fb32Scz4e  io.mem_to_ooo.lsqio.vaddr := RegNext(
1923c590fb32Scz4e    GenExceptionVa(tlbcsr.priv.dmode, tlbcsr.priv.virt || exceptionIsHyper, exceptionVaNeedExt,
1924c590fb32Scz4e    tlbcsr.satp, tlbcsr.vsatp, tlbcsr.hgatp, exceptionVaddr)
1925c590fb32Scz4e  )
1926c590fb32Scz4e
1927c590fb32Scz4e  // vsegment instruction is executed atomic, which mean atomicsException and vSegmentException should not raise at the same time.
1928c590fb32Scz4e  XSError(atomicsException && vSegmentException, "atomicsException and vSegmentException raise at the same time!")
1929c590fb32Scz4e  io.mem_to_ooo.lsqio.vstart := RegNext(Mux(vSegmentException,
1930c590fb32Scz4e                                            vSegmentExceptionVstart,
1931c590fb32Scz4e                                            lsq.io.exceptionAddr.vstart)
1932c590fb32Scz4e  )
1933c590fb32Scz4e  io.mem_to_ooo.lsqio.vl     := RegNext(Mux(vSegmentException,
1934c590fb32Scz4e                                            vSegmentExceptionVl,
1935c590fb32Scz4e                                            lsq.io.exceptionAddr.vl)
1936c590fb32Scz4e  )
1937c590fb32Scz4e
1938c590fb32Scz4e  XSError(atomicsException && atomicsUnit.io.in.valid, "new instruction before exception triggers\n")
1939c590fb32Scz4e  io.mem_to_ooo.lsqio.gpaddr := RegNext(Mux(
1940c590fb32Scz4e    atomicsException,
1941c590fb32Scz4e    atomicsExceptionGPAddress,
1942c590fb32Scz4e    Mux(misalignBufExceptionOverwrite,
1943c590fb32Scz4e      misalignBufExceptionGpaddr,
1944c590fb32Scz4e      Mux(vSegmentException,
1945c590fb32Scz4e        vSegmentExceptionGPAddress,
1946c590fb32Scz4e        lsq.io.exceptionAddr.gpaddr
1947c590fb32Scz4e      )
1948c590fb32Scz4e    )
1949c590fb32Scz4e  ))
1950c590fb32Scz4e  io.mem_to_ooo.lsqio.isForVSnonLeafPTE := RegNext(Mux(
1951c590fb32Scz4e    atomicsException,
1952c590fb32Scz4e    atomicsExceptionIsForVSnonLeafPTE,
1953c590fb32Scz4e    Mux(misalignBufExceptionOverwrite,
1954c590fb32Scz4e      misalignBufExceptionIsForVSnonLeafPTE,
1955c590fb32Scz4e      Mux(vSegmentException,
1956c590fb32Scz4e        vSegmentExceptionIsForVSnonLeafPTE,
1957c590fb32Scz4e        lsq.io.exceptionAddr.isForVSnonLeafPTE
1958c590fb32Scz4e      )
1959c590fb32Scz4e    )
1960c590fb32Scz4e  ))
1961c590fb32Scz4e  io.mem_to_ooo.topToBackendBypass match { case x =>
1962c590fb32Scz4e    x.hartId            := io.hartId
1963c590fb32Scz4e    x.l2FlushDone       := RegNext(io.l2_flush_done)
1964c590fb32Scz4e    x.externalInterrupt.msip  := outer.clint_int_sink.in.head._1(0)
1965c590fb32Scz4e    x.externalInterrupt.mtip  := outer.clint_int_sink.in.head._1(1)
1966c590fb32Scz4e    x.externalInterrupt.meip  := outer.plic_int_sink.in.head._1(0)
1967c590fb32Scz4e    x.externalInterrupt.seip  := outer.plic_int_sink.in.last._1(0)
1968c590fb32Scz4e    x.externalInterrupt.debug := outer.debug_int_sink.in.head._1(0)
196976cb49abScz4e    x.externalInterrupt.nmi.nmi_31 := outer.nmi_int_sink.in.head._1(0) | outer.beu_local_int_sink.in.head._1(0)
1970c590fb32Scz4e    x.externalInterrupt.nmi.nmi_43 := outer.nmi_int_sink.in.head._1(1)
1971c590fb32Scz4e    x.msiInfo           := DelayNWithValid(io.fromTopToBackend.msiInfo, 1)
1972c590fb32Scz4e    x.clintTime         := DelayNWithValid(io.fromTopToBackend.clintTime, 1)
1973c590fb32Scz4e  }
1974c590fb32Scz4e
1975c590fb32Scz4e  io.memInfo.sqFull := RegNext(lsq.io.sqFull)
1976c590fb32Scz4e  io.memInfo.lqFull := RegNext(lsq.io.lqFull)
1977c590fb32Scz4e  io.memInfo.dcacheMSHRFull := RegNext(dcache.io.mshrFull)
1978c590fb32Scz4e
1979c590fb32Scz4e  io.inner_hartId := io.hartId
1980c590fb32Scz4e  io.inner_reset_vector := RegNext(io.outer_reset_vector)
1981c590fb32Scz4e  io.outer_cpu_halt := io.ooo_to_mem.backendToTopBypass.cpuHalted
1982c590fb32Scz4e  io.outer_l2_flush_en := io.ooo_to_mem.csrCtrl.flush_l2_enable
1983c590fb32Scz4e  io.outer_power_down_en := io.ooo_to_mem.csrCtrl.power_down_enable
1984c590fb32Scz4e  io.outer_cpu_critical_error := io.ooo_to_mem.backendToTopBypass.cpuCriticalError
19858cfc24b2STang Haojin  io.outer_msi_ack := io.ooo_to_mem.backendToTopBypass.msiAck
1986c590fb32Scz4e  io.outer_beu_errors_icache := RegNext(io.inner_beu_errors_icache)
1987c590fb32Scz4e  io.inner_hc_perfEvents <> RegNext(io.outer_hc_perfEvents)
1988c590fb32Scz4e
1989c590fb32Scz4e  // vector segmentUnit
1990c590fb32Scz4e  vSegmentUnit.io.in.bits <> io.ooo_to_mem.issueVldu.head.bits
1991c590fb32Scz4e  vSegmentUnit.io.in.valid := isSegment && io.ooo_to_mem.issueVldu.head.valid// is segment instruction
1992c590fb32Scz4e  vSegmentUnit.io.dtlb.resp.bits <> dtlb_reqs.take(LduCnt).head.resp.bits
1993c590fb32Scz4e  vSegmentUnit.io.dtlb.resp.valid <> dtlb_reqs.take(LduCnt).head.resp.valid
1994c590fb32Scz4e  vSegmentUnit.io.pmpResp <> pmp_check.head.resp
1995c590fb32Scz4e  vSegmentUnit.io.flush_sbuffer.empty := stIsEmpty
1996c590fb32Scz4e  vSegmentUnit.io.redirect <> redirect
1997c590fb32Scz4e  vSegmentUnit.io.rdcache.resp.bits := dcache.io.lsu.load(0).resp.bits
1998c590fb32Scz4e  vSegmentUnit.io.rdcache.resp.valid := dcache.io.lsu.load(0).resp.valid
1999c590fb32Scz4e  vSegmentUnit.io.rdcache.s2_bank_conflict := dcache.io.lsu.load(0).s2_bank_conflict
2000c590fb32Scz4e  // -------------------------
2001c590fb32Scz4e  // Vector Segment Triggers
2002c590fb32Scz4e  // -------------------------
2003c590fb32Scz4e  vSegmentUnit.io.fromCsrTrigger.tdataVec := tdata
2004c590fb32Scz4e  vSegmentUnit.io.fromCsrTrigger.tEnableVec := tEnable
2005c590fb32Scz4e  vSegmentUnit.io.fromCsrTrigger.triggerCanRaiseBpExp := triggerCanRaiseBpExp
2006c590fb32Scz4e  vSegmentUnit.io.fromCsrTrigger.debugMode := debugMode
2007c590fb32Scz4e
2008c590fb32Scz4e  // reset tree of MemBlock
2009c590fb32Scz4e  if (p(DebugOptionsKey).ResetGen) {
2010c590fb32Scz4e    val leftResetTree = ResetGenNode(
2011c590fb32Scz4e      Seq(
2012c590fb32Scz4e        ModuleNode(ptw),
2013c590fb32Scz4e        ModuleNode(ptw_to_l2_buffer),
2014c590fb32Scz4e        ModuleNode(lsq),
2015c590fb32Scz4e        ModuleNode(dtlb_st_tlb_st),
2016c590fb32Scz4e        ModuleNode(dtlb_prefetch_tlb_prefetch),
2017c590fb32Scz4e        ModuleNode(pmp)
2018c590fb32Scz4e      )
2019c590fb32Scz4e      ++ pmp_checkers.map(ModuleNode(_))
2020c590fb32Scz4e      ++ (if (prefetcherOpt.isDefined) Seq(ModuleNode(prefetcherOpt.get)) else Nil)
2021c590fb32Scz4e      ++ (if (l1PrefetcherOpt.isDefined) Seq(ModuleNode(l1PrefetcherOpt.get)) else Nil)
2022c590fb32Scz4e    )
2023c590fb32Scz4e    val rightResetTree = ResetGenNode(
2024c590fb32Scz4e      Seq(
2025c590fb32Scz4e        ModuleNode(sbuffer),
2026c590fb32Scz4e        ModuleNode(dtlb_ld_tlb_ld),
2027c590fb32Scz4e        ModuleNode(dcache),
2028c590fb32Scz4e        ModuleNode(l1d_to_l2_buffer),
2029c590fb32Scz4e        CellNode(io.reset_backend)
2030c590fb32Scz4e      )
2031c590fb32Scz4e    )
2032602aa9f1Scz4e    ResetGen(leftResetTree, reset, sim = false, io.sramTestBypass.fromL2Top.mbistReset)
2033602aa9f1Scz4e    ResetGen(rightResetTree, reset, sim = false, io.sramTestBypass.fromL2Top.mbistReset)
2034c590fb32Scz4e  } else {
2035c590fb32Scz4e    io.reset_backend := DontCare
2036c590fb32Scz4e  }
2037c590fb32Scz4e  io.resetInFrontendBypass.toL2Top := io.resetInFrontendBypass.fromFrontend
2038c590fb32Scz4e  // trace interface
2039c590fb32Scz4e  val traceToL2Top = io.traceCoreInterfaceBypass.toL2Top
2040c590fb32Scz4e  val traceFromBackend = io.traceCoreInterfaceBypass.fromBackend
2041c590fb32Scz4e  traceFromBackend.fromEncoder := RegNext(traceToL2Top.fromEncoder)
2042c590fb32Scz4e  traceToL2Top.toEncoder.trap  := RegEnable(
2043c590fb32Scz4e    traceFromBackend.toEncoder.trap,
2044c590fb32Scz4e    traceFromBackend.toEncoder.groups(0).valid && Itype.isTrap(traceFromBackend.toEncoder.groups(0).bits.itype)
2045c590fb32Scz4e  )
2046c590fb32Scz4e  traceToL2Top.toEncoder.priv := RegEnable(
2047c590fb32Scz4e    traceFromBackend.toEncoder.priv,
2048c590fb32Scz4e    traceFromBackend.toEncoder.groups(0).valid
2049c590fb32Scz4e  )
2050c590fb32Scz4e  (0 until TraceGroupNum).foreach { i =>
2051c590fb32Scz4e    traceToL2Top.toEncoder.groups(i).valid := RegNext(traceFromBackend.toEncoder.groups(i).valid)
2052c590fb32Scz4e    traceToL2Top.toEncoder.groups(i).bits.iretire := RegNext(traceFromBackend.toEncoder.groups(i).bits.iretire)
2053c590fb32Scz4e    traceToL2Top.toEncoder.groups(i).bits.itype := RegNext(traceFromBackend.toEncoder.groups(i).bits.itype)
2054c590fb32Scz4e    traceToL2Top.toEncoder.groups(i).bits.ilastsize := RegEnable(
2055c590fb32Scz4e      traceFromBackend.toEncoder.groups(i).bits.ilastsize,
2056c590fb32Scz4e      traceFromBackend.toEncoder.groups(i).valid
2057c590fb32Scz4e    )
2058c590fb32Scz4e    traceToL2Top.toEncoder.groups(i).bits.iaddr := RegEnable(
2059c590fb32Scz4e      traceFromBackend.toEncoder.groups(i).bits.iaddr,
2060c590fb32Scz4e      traceFromBackend.toEncoder.groups(i).valid
2061c590fb32Scz4e    ) + (RegEnable(
2062c590fb32Scz4e      traceFromBackend.toEncoder.groups(i).bits.ftqOffset.getOrElse(0.U),
2063c590fb32Scz4e      traceFromBackend.toEncoder.groups(i).valid
2064c590fb32Scz4e    ) << instOffsetBits)
2065c590fb32Scz4e  }
2066c590fb32Scz4e
2067c590fb32Scz4e
2068c590fb32Scz4e  io.mem_to_ooo.storeDebugInfo := DontCare
2069c590fb32Scz4e  // store event difftest information
2070c590fb32Scz4e  if (env.EnableDifftest) {
2071c590fb32Scz4e    (0 until EnsbufferWidth).foreach{i =>
2072c590fb32Scz4e        io.mem_to_ooo.storeDebugInfo(i).robidx := sbuffer.io.vecDifftestInfo(i).bits.robIdx
2073c590fb32Scz4e        sbuffer.io.vecDifftestInfo(i).bits.pc := io.mem_to_ooo.storeDebugInfo(i).pc
2074c590fb32Scz4e    }
2075c590fb32Scz4e  }
2076c590fb32Scz4e
2077c590fb32Scz4e  // top-down info
2078c590fb32Scz4e  dcache.io.debugTopDown.robHeadVaddr := io.debugTopDown.robHeadVaddr
2079c590fb32Scz4e  dtlbRepeater.io.debugTopDown.robHeadVaddr := io.debugTopDown.robHeadVaddr
2080c590fb32Scz4e  lsq.io.debugTopDown.robHeadVaddr := io.debugTopDown.robHeadVaddr
2081c590fb32Scz4e  io.debugTopDown.toCore.robHeadMissInDCache := dcache.io.debugTopDown.robHeadMissInDCache
2082c590fb32Scz4e  io.debugTopDown.toCore.robHeadTlbReplay := lsq.io.debugTopDown.robHeadTlbReplay
2083c590fb32Scz4e  io.debugTopDown.toCore.robHeadTlbMiss := lsq.io.debugTopDown.robHeadTlbMiss
2084c590fb32Scz4e  io.debugTopDown.toCore.robHeadLoadVio := lsq.io.debugTopDown.robHeadLoadVio
2085c590fb32Scz4e  io.debugTopDown.toCore.robHeadLoadMSHR := lsq.io.debugTopDown.robHeadLoadMSHR
2086c590fb32Scz4e  dcache.io.debugTopDown.robHeadOtherReplay := lsq.io.debugTopDown.robHeadOtherReplay
2087c590fb32Scz4e  dcache.io.debugRolling := io.debugRolling
2088c590fb32Scz4e
2089c590fb32Scz4e  lsq.io.noUopsIssued := io.topDownInfo.toBackend.noUopsIssued
2090c590fb32Scz4e  io.topDownInfo.toBackend.lqEmpty := lsq.io.lqEmpty
2091c590fb32Scz4e  io.topDownInfo.toBackend.sqEmpty := lsq.io.sqEmpty
2092c590fb32Scz4e  io.topDownInfo.toBackend.l1Miss := dcache.io.l1Miss
2093c590fb32Scz4e  io.topDownInfo.toBackend.l2TopMiss.l2Miss := RegNext(io.topDownInfo.fromL2Top.l2Miss)
2094c590fb32Scz4e  io.topDownInfo.toBackend.l2TopMiss.l3Miss := RegNext(io.topDownInfo.fromL2Top.l3Miss)
2095c590fb32Scz4e
2096c590fb32Scz4e  val hyLdDeqCount = PopCount(io.ooo_to_mem.issueHya.map(x => x.valid && FuType.isLoad(x.bits.uop.fuType)))
2097c590fb32Scz4e  val hyStDeqCount = PopCount(io.ooo_to_mem.issueHya.map(x => x.valid && FuType.isStore(x.bits.uop.fuType)))
2098c590fb32Scz4e  val ldDeqCount = PopCount(io.ooo_to_mem.issueLda.map(_.valid)) +& hyLdDeqCount
2099c590fb32Scz4e  val stDeqCount = PopCount(io.ooo_to_mem.issueSta.take(StaCnt).map(_.valid)) +& hyStDeqCount
2100c590fb32Scz4e  val iqDeqCount = ldDeqCount +& stDeqCount
2101c590fb32Scz4e  XSPerfAccumulate("load_iq_deq_count", ldDeqCount)
2102c590fb32Scz4e  XSPerfHistogram("load_iq_deq_count", ldDeqCount, true.B, 0, LdExuCnt + 1)
2103c590fb32Scz4e  XSPerfAccumulate("store_iq_deq_count", stDeqCount)
2104c590fb32Scz4e  XSPerfHistogram("store_iq_deq_count", stDeqCount, true.B, 0, StAddrCnt + 1)
2105c590fb32Scz4e  XSPerfAccumulate("ls_iq_deq_count", iqDeqCount)
2106c590fb32Scz4e
2107c590fb32Scz4e  val pfevent = Module(new PFEvent)
2108c590fb32Scz4e  pfevent.io.distribute_csr := csrCtrl.distribute_csr
2109c590fb32Scz4e  val csrevents = pfevent.io.hpmevent.slice(16,24)
2110c590fb32Scz4e
2111c590fb32Scz4e  val perfFromUnits = (loadUnits ++ Seq(sbuffer, lsq, dcache)).flatMap(_.getPerfEvents)
2112c590fb32Scz4e  val perfFromPTW = perfEventsPTW.map(x => ("PTW_" + x._1, x._2))
2113c590fb32Scz4e  val perfBlock     = Seq(("ldDeqCount", ldDeqCount),
2114c590fb32Scz4e                          ("stDeqCount", stDeqCount))
2115c590fb32Scz4e  // let index = 0 be no event
2116c590fb32Scz4e  val allPerfEvents = Seq(("noEvent", 0.U)) ++ perfFromUnits ++ perfFromPTW ++ perfBlock
2117c590fb32Scz4e
2118c590fb32Scz4e  if (printEventCoding) {
2119c590fb32Scz4e    for (((name, inc), i) <- allPerfEvents.zipWithIndex) {
2120c590fb32Scz4e      println("MemBlock perfEvents Set", name, inc, i)
2121c590fb32Scz4e    }
2122c590fb32Scz4e  }
2123c590fb32Scz4e
2124c590fb32Scz4e  val allPerfInc = allPerfEvents.map(_._2.asTypeOf(new PerfEvent))
2125c590fb32Scz4e  val perfEvents = HPerfMonitor(csrevents, allPerfInc).getPerfEvents
2126c590fb32Scz4e  generatePerfEvent()
21274b2c87baS梁森 Liang Sen
21284b2c87baS梁森 Liang Sen  private val mbistPl = MbistPipeline.PlaceMbistPipeline(Int.MaxValue, "MbistPipeMemBlk", hasMbist)
21294b2c87baS梁森 Liang Sen  private val mbistIntf = if(hasMbist) {
21304b2c87baS梁森 Liang Sen    val params = mbistPl.get.nodeParams
21314b2c87baS梁森 Liang Sen    val intf = Some(Module(new MbistInterface(
21324b2c87baS梁森 Liang Sen      params = Seq(params),
21334b2c87baS梁森 Liang Sen      ids = Seq(mbistPl.get.childrenIds),
21344b2c87baS梁森 Liang Sen      name = s"MbistIntfMemBlk",
21354b2c87baS梁森 Liang Sen      pipelineNum = 1
21364b2c87baS梁森 Liang Sen    )))
21374b2c87baS梁森 Liang Sen    intf.get.toPipeline.head <> mbistPl.get.mbist
21384b2c87baS梁森 Liang Sen    mbistPl.get.registerCSV(intf.get.info, "MbistMemBlk")
21394b2c87baS梁森 Liang Sen    intf.get.mbist := DontCare
21404b2c87baS梁森 Liang Sen    dontTouch(intf.get.mbist)
21414b2c87baS梁森 Liang Sen    //TODO: add mbist controller connections here
21424b2c87baS梁森 Liang Sen    intf
21434b2c87baS梁森 Liang Sen  } else {
21444b2c87baS梁森 Liang Sen    None
21454b2c87baS梁森 Liang Sen  }
2146602aa9f1Scz4e   private val sigFromSrams = if (hasSramTest) Some(SramHelper.genBroadCastBundleTop()) else None
21474b2c87baS梁森 Liang Sen  private val cg = ClockGate.genTeSrc
21484b2c87baS梁森 Liang Sen  dontTouch(cg)
2149602aa9f1Scz4e
2150602aa9f1Scz4e  sigFromSrams.foreach({ case sig => sig.mbist := DontCare })
21514b2c87baS梁森 Liang Sen  if (hasMbist) {
2152602aa9f1Scz4e    sigFromSrams.get.mbist := io.sramTestBypass.fromL2Top.mbist.get
2153602aa9f1Scz4e    io.sramTestBypass.toFrontend.mbist.get := io.sramTestBypass.fromL2Top.mbist.get
2154602aa9f1Scz4e    io.sramTestBypass.toFrontend.mbistReset.get := io.sramTestBypass.fromL2Top.mbistReset.get
2155602aa9f1Scz4e    io.sramTestBypass.toBackend.mbist.get := io.sramTestBypass.fromL2Top.mbist.get
2156602aa9f1Scz4e    io.sramTestBypass.toBackend.mbistReset.get := io.sramTestBypass.fromL2Top.mbistReset.get
2157602aa9f1Scz4e    cg.cgen := io.sramTestBypass.fromL2Top.mbist.get.cgen
21584b2c87baS梁森 Liang Sen  } else {
21594b2c87baS梁森 Liang Sen    cg.cgen := false.B
21604b2c87baS梁森 Liang Sen  }
2161602aa9f1Scz4e
2162602aa9f1Scz4e  // sram debug
2163602aa9f1Scz4e  val sramCtl = Option.when(hasSramCtl)(RegNext(io.sramTestBypass.fromL2Top.sramCtl.get))
2164602aa9f1Scz4e  sigFromSrams.foreach({ case sig => sig.sramCtl := DontCare })
2165602aa9f1Scz4e  sigFromSrams.zip(sramCtl).foreach {
2166602aa9f1Scz4e    case (sig, ctl) =>
2167602aa9f1Scz4e      sig.sramCtl.RTSEL := ctl(1, 0) // CFG[1 : 0]
2168602aa9f1Scz4e      sig.sramCtl.WTSEL := ctl(3, 2) // CFG[3 : 2]
2169602aa9f1Scz4e      sig.sramCtl.MCR   := ctl(5, 4) // CFG[5 : 4]
2170602aa9f1Scz4e      sig.sramCtl.MCW   := ctl(7, 6) // CFG[7 : 6]
2171602aa9f1Scz4e  }
2172602aa9f1Scz4e  if (hasSramCtl) {
2173602aa9f1Scz4e    io.sramTestBypass.toFrontend.sramCtl.get := sramCtl.get
2174602aa9f1Scz4e  }
2175c590fb32Scz4e}
2176c590fb32Scz4e
2177c590fb32Scz4eclass MemBlock()(implicit p: Parameters) extends LazyModule
2178c590fb32Scz4e  with HasXSParameter {
2179c590fb32Scz4e  override def shouldBeInlined: Boolean = false
2180c590fb32Scz4e
2181c590fb32Scz4e  val inner = LazyModule(new MemBlockInlined())
2182c590fb32Scz4e
2183c590fb32Scz4e  lazy val module = new MemBlockImp(this)
2184c590fb32Scz4e}
2185c590fb32Scz4e
2186c590fb32Scz4eclass MemBlockImp(wrapper: MemBlock) extends LazyModuleImp(wrapper) {
2187c590fb32Scz4e  val io = IO(wrapper.inner.module.io.cloneType)
2188c590fb32Scz4e  val io_perf = IO(wrapper.inner.module.io_perf.cloneType)
2189c590fb32Scz4e  io <> wrapper.inner.module.io
2190c590fb32Scz4e  io_perf <> wrapper.inner.module.io_perf
2191c590fb32Scz4e
2192c590fb32Scz4e  if (p(DebugOptionsKey).ResetGen) {
2193602aa9f1Scz4e    ResetGen(
2194602aa9f1Scz4e      ResetGenNode(Seq(ModuleNode(wrapper.inner.module))),
2195602aa9f1Scz4e      reset, sim = false, io.sramTestBypass.fromL2Top.mbistReset
2196602aa9f1Scz4e    )
2197c590fb32Scz4e  }
2198c590fb32Scz4e}
2199