xref: /XiangShan/src/main/scala/xiangshan/mem/MemBlock.scala (revision 42b75a597e916f6a6887cb8bc626483d0d2645dd)
1/***************************************************************************************
2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3* Copyright (c) 2020-2021 Peng Cheng Laboratory
4*
5* XiangShan is licensed under Mulan PSL v2.
6* You can use this software according to the terms and conditions of the Mulan PSL v2.
7* You may obtain a copy of Mulan PSL v2 at:
8*          http://license.coscl.org.cn/MulanPSL2
9*
10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13*
14* See the Mulan PSL v2 for more details.
15***************************************************************************************/
16
17package xiangshan.mem
18
19import org.chipsalliance.cde.config.Parameters
20import chisel3._
21import chisel3.util._
22import freechips.rocketchip.diplomacy._
23import freechips.rocketchip.diplomacy.{BundleBridgeSource, LazyModule, LazyModuleImp}
24import freechips.rocketchip.interrupts.{IntSinkNode, IntSinkPortSimple}
25import freechips.rocketchip.tile.HasFPUParameters
26import freechips.rocketchip.tilelink._
27import utils._
28import utility._
29import system.SoCParamsKey
30import xiangshan._
31import xiangshan.ExceptionNO._
32import xiangshan.frontend.HasInstrMMIOConst
33import xiangshan.backend.Bundles.{DynInst, MemExuInput, MemExuOutput}
34import xiangshan.backend.ctrlblock.{DebugLSIO, LsTopdownInfo}
35import xiangshan.backend.exu.MemExeUnit
36import xiangshan.backend.fu._
37import xiangshan.backend.fu.FuType._
38import xiangshan.backend.fu.NewCSR.{CsrTriggerBundle, TriggerUtil, PFEvent}
39import xiangshan.backend.fu.util.{CSRConst, SdtrigExt}
40import xiangshan.backend.{BackendToTopBundle, TopToBackendBundle}
41import xiangshan.backend.rob.{RobDebugRollingIO, RobPtr, RobLsqIO}
42import xiangshan.backend.datapath.NewPipelineConnect
43import xiangshan.backend.trace.{Itype, TraceCoreInterface}
44import xiangshan.backend.Bundles._
45import xiangshan.mem._
46import xiangshan.mem.mdp._
47import xiangshan.mem.Bundles._
48import xiangshan.mem.prefetch.{BasePrefecher, L1Prefetcher, SMSParams, SMSPrefetcher}
49import xiangshan.cache._
50import xiangshan.cache.mmu._
51import coupledL2.PrefetchRecv
52import utility.mbist.{MbistInterface, MbistPipeline}
53import utility.sram.{SramBroadcastBundle, SramHelper}
54import system.HasSoCParameter
55trait HasMemBlockParameters extends HasXSParameter {
56  // number of memory units
57  val LduCnt  = backendParams.LduCnt
58  val StaCnt  = backendParams.StaCnt
59  val StdCnt  = backendParams.StdCnt
60  val HyuCnt  = backendParams.HyuCnt
61  val VlduCnt = backendParams.VlduCnt
62  val VstuCnt = backendParams.VstuCnt
63
64  val LdExuCnt  = LduCnt + HyuCnt
65  val StAddrCnt = StaCnt + HyuCnt
66  val StDataCnt = StdCnt
67  val MemExuCnt = LduCnt + HyuCnt + StaCnt + StdCnt
68  val MemAddrExtCnt = LdExuCnt + StaCnt
69  val MemVExuCnt = VlduCnt + VstuCnt
70
71  val AtomicWBPort   = 0
72  val MisalignWBPort = 1
73  val UncacheWBPort  = 2
74  val NCWBPorts = Seq(1, 2)
75}
76
77abstract class MemBlockBundle(implicit val p: Parameters) extends Bundle with HasMemBlockParameters
78
79class Std(cfg: FuConfig)(implicit p: Parameters) extends FuncUnit(cfg) {
80  io.in.ready := io.out.ready
81  io.out.valid := io.in.valid
82  io.out.bits := 0.U.asTypeOf(io.out.bits)
83  io.out.bits.res.data := io.in.bits.data.src(0)
84  io.out.bits.ctrl.robIdx := io.in.bits.ctrl.robIdx
85}
86
87class ooo_to_mem(implicit p: Parameters) extends MemBlockBundle {
88  val backendToTopBypass = Flipped(new BackendToTopBundle)
89
90  val loadFastMatch = Vec(LdExuCnt, Input(UInt(LdExuCnt.W)))
91  val loadFastFuOpType = Vec(LdExuCnt, Input(FuOpType()))
92  val loadFastImm = Vec(LdExuCnt, Input(UInt(12.W)))
93  val sfence = Input(new SfenceBundle)
94  val tlbCsr = Input(new TlbCsrBundle)
95  val lsqio = new Bundle {
96    val lcommit = Input(UInt(log2Up(CommitWidth + 1).W))
97    val scommit = Input(UInt(log2Up(CommitWidth + 1).W))
98    val pendingMMIOld = Input(Bool())
99    val pendingld = Input(Bool())
100    val pendingst = Input(Bool())
101    val pendingVst = Input(Bool())
102    val commit = Input(Bool())
103    val pendingPtr = Input(new RobPtr)
104    val pendingPtrNext = Input(new RobPtr)
105  }
106
107  val isStoreException = Input(Bool())
108  val isVlsException = Input(Bool())
109  val csrCtrl = Flipped(new CustomCSRCtrlIO)
110  val enqLsq = new LsqEnqIO
111  val flushSb = Input(Bool())
112
113  val storePc = Vec(StaCnt, Input(UInt(VAddrBits.W))) // for hw prefetch
114  val hybridPc = Vec(HyuCnt, Input(UInt(VAddrBits.W))) // for hw prefetch
115
116  val issueLda = MixedVec(Seq.fill(LduCnt)(Flipped(DecoupledIO(new MemExuInput))))
117  val issueSta = MixedVec(Seq.fill(StaCnt)(Flipped(DecoupledIO(new MemExuInput))))
118  val issueStd = MixedVec(Seq.fill(StdCnt)(Flipped(DecoupledIO(new MemExuInput))))
119  val issueHya = MixedVec(Seq.fill(HyuCnt)(Flipped(DecoupledIO(new MemExuInput))))
120  val issueVldu = MixedVec(Seq.fill(VlduCnt)(Flipped(DecoupledIO(new MemExuInput(isVector=true)))))
121
122  def issueUops = issueLda ++ issueSta ++ issueStd ++ issueHya ++ issueVldu
123}
124
125class mem_to_ooo(implicit p: Parameters) extends MemBlockBundle {
126  val topToBackendBypass = new TopToBackendBundle
127
128  val otherFastWakeup = Vec(LdExuCnt, ValidIO(new DynInst))
129  val lqCancelCnt = Output(UInt(log2Up(VirtualLoadQueueSize + 1).W))
130  val sqCancelCnt = Output(UInt(log2Up(StoreQueueSize + 1).W))
131  val sqDeq = Output(UInt(log2Ceil(EnsbufferWidth + 1).W))
132  val lqDeq = Output(UInt(log2Up(CommitWidth + 1).W))
133  // used by VLSU issue queue, the vector store would wait all store before it, and the vector load would wait all load
134  val sqDeqPtr = Output(new SqPtr)
135  val lqDeqPtr = Output(new LqPtr)
136  val stIn = Vec(StAddrCnt, ValidIO(new MemExuInput))
137  val stIssuePtr = Output(new SqPtr())
138
139  val memoryViolation = ValidIO(new Redirect)
140  val sbIsEmpty = Output(Bool())
141
142  val lsTopdownInfo = Vec(LdExuCnt, Output(new LsTopdownInfo))
143
144  val lsqio = new Bundle {
145    val vaddr = Output(UInt(XLEN.W))
146    val vstart = Output(UInt((log2Up(VLEN) + 1).W))
147    val vl = Output(UInt((log2Up(VLEN) + 1).W))
148    val gpaddr = Output(UInt(XLEN.W))
149    val isForVSnonLeafPTE = Output(Bool())
150    val mmio = Output(Vec(LoadPipelineWidth, Bool()))
151    val uop = Output(Vec(LoadPipelineWidth, new DynInst))
152    val lqCanAccept = Output(Bool())
153    val sqCanAccept = Output(Bool())
154  }
155
156  val storeDebugInfo = Vec(EnsbufferWidth, new Bundle {
157    val robidx = Output(new RobPtr)
158    val pc     = Input(UInt(VAddrBits.W))
159  })
160
161  val writebackLda = Vec(LduCnt, DecoupledIO(new MemExuOutput))
162  val writebackSta = Vec(StaCnt, DecoupledIO(new MemExuOutput))
163  val writebackStd = Vec(StdCnt, DecoupledIO(new MemExuOutput))
164  val writebackHyuLda = Vec(HyuCnt, DecoupledIO(new MemExuOutput))
165  val writebackHyuSta = Vec(HyuCnt, DecoupledIO(new MemExuOutput))
166  val writebackVldu = Vec(VlduCnt, DecoupledIO(new MemExuOutput(isVector = true)))
167  def writeBack: Seq[DecoupledIO[MemExuOutput]] = {
168    writebackSta ++
169      writebackHyuLda ++ writebackHyuSta ++
170      writebackLda ++
171      writebackVldu ++
172      writebackStd
173  }
174
175  val ldaIqFeedback = Vec(LduCnt, new MemRSFeedbackIO)
176  val staIqFeedback = Vec(StaCnt, new MemRSFeedbackIO)
177  val hyuIqFeedback = Vec(HyuCnt, new MemRSFeedbackIO)
178  val vstuIqFeedback= Vec(VstuCnt, new MemRSFeedbackIO(isVector = true))
179  val vlduIqFeedback= Vec(VlduCnt, new MemRSFeedbackIO(isVector = true))
180  val ldCancel = Vec(backendParams.LdExuCnt, new LoadCancelIO)
181  val wakeup = Vec(backendParams.LdExuCnt, Valid(new DynInst))
182
183  val s3_delayed_load_error = Vec(LdExuCnt, Output(Bool()))
184}
185
186class MemCoreTopDownIO extends Bundle {
187  val robHeadMissInDCache = Output(Bool())
188  val robHeadTlbReplay = Output(Bool())
189  val robHeadTlbMiss = Output(Bool())
190  val robHeadLoadVio = Output(Bool())
191  val robHeadLoadMSHR = Output(Bool())
192}
193
194class fetch_to_mem(implicit p: Parameters) extends XSBundle{
195  val itlb = Flipped(new TlbPtwIO())
196}
197
198// triple buffer applied in i-mmio path (two at MemBlock, one at L2Top)
199class InstrUncacheBuffer()(implicit p: Parameters) extends LazyModule with HasInstrMMIOConst {
200  val node = new TLBufferNode(BufferParams.default, BufferParams.default, BufferParams.default, BufferParams.default, BufferParams.default)
201  lazy val module = new InstrUncacheBufferImpl
202
203  class InstrUncacheBufferImpl extends LazyModuleImp(this) {
204    (node.in zip node.out) foreach { case ((in, edgeIn), (out, edgeOut)) =>
205      out.a <> BufferParams.default(BufferParams.default(in.a))
206      in.d <> BufferParams.default(BufferParams.default(out.d))
207
208      // only a.valid, a.ready, a.address can change
209      // hoping that the rest would be optimized to keep MemBlock port unchanged after adding buffer
210      out.a.bits.data := 0.U
211      out.a.bits.mask := Fill(mmioBusBytes, 1.U(1.W))
212      out.a.bits.opcode := 4.U // Get
213      out.a.bits.size := log2Ceil(mmioBusBytes).U
214      out.a.bits.source := 0.U
215    }
216  }
217}
218
219// triple buffer applied in L1I$-L2 path (two at MemBlock, one at L2Top)
220class ICacheBuffer()(implicit p: Parameters) extends LazyModule {
221  val node = new TLBufferNode(BufferParams.default, BufferParams.default, BufferParams.default, BufferParams.default, BufferParams.default)
222  lazy val module = new ICacheBufferImpl
223
224  class ICacheBufferImpl extends LazyModuleImp(this) {
225    (node.in zip node.out) foreach { case ((in, edgeIn), (out, edgeOut)) =>
226      out.a <> BufferParams.default(BufferParams.default(in.a))
227      in.d <> BufferParams.default(BufferParams.default(out.d))
228    }
229  }
230}
231
232class ICacheCtrlBuffer()(implicit p: Parameters) extends LazyModule {
233  val node = new TLBufferNode(BufferParams.default, BufferParams.default, BufferParams.default, BufferParams.default, BufferParams.default)
234  lazy val module = new ICacheCtrlBufferImpl
235
236  class ICacheCtrlBufferImpl extends LazyModuleImp(this) {
237    (node.in zip node.out) foreach { case ((in, edgeIn), (out, edgeOut)) =>
238      out.a <> BufferParams.default(BufferParams.default(in.a))
239      in.d <> BufferParams.default(BufferParams.default(out.d))
240    }
241  }
242}
243
244// Frontend bus goes through MemBlock
245class FrontendBridge()(implicit p: Parameters) extends LazyModule {
246  val icache_node = LazyModule(new ICacheBuffer()).suggestName("icache").node// to keep IO port name
247  val icachectrl_node = LazyModule(new ICacheCtrlBuffer()).suggestName("icachectrl").node
248  val instr_uncache_node = LazyModule(new InstrUncacheBuffer()).suggestName("instr_uncache").node
249  lazy val module = new LazyModuleImp(this) {
250  }
251}
252
253class MemBlockInlined()(implicit p: Parameters) extends LazyModule
254  with HasXSParameter {
255  override def shouldBeInlined: Boolean = true
256
257  val dcache = LazyModule(new DCacheWrapper())
258  val uncache = LazyModule(new Uncache())
259  val uncache_port = TLTempNode()
260  val uncache_xbar = TLXbar()
261  val ptw = LazyModule(new L2TLBWrapper())
262  val ptw_to_l2_buffer = if (!coreParams.softPTW) LazyModule(new TLBuffer) else null
263  val l1d_to_l2_buffer = if (coreParams.dcacheParametersOpt.nonEmpty) LazyModule(new TLBuffer) else null
264  val dcache_port = TLNameNode("dcache_client") // to keep dcache-L2 port name
265  val l2_pf_sender_opt = coreParams.prefetcher.map(_ =>
266    BundleBridgeSource(() => new PrefetchRecv)
267  )
268  val l3_pf_sender_opt = if (p(SoCParamsKey).L3CacheParamsOpt.nonEmpty) coreParams.prefetcher.map(_ =>
269    BundleBridgeSource(() => new huancun.PrefetchRecv)
270  ) else None
271  val frontendBridge = LazyModule(new FrontendBridge)
272  // interrupt sinks
273  val clint_int_sink = IntSinkNode(IntSinkPortSimple(1, 2))
274  val debug_int_sink = IntSinkNode(IntSinkPortSimple(1, 1))
275  val plic_int_sink = IntSinkNode(IntSinkPortSimple(2, 1))
276  val nmi_int_sink = IntSinkNode(IntSinkPortSimple(1, (new NonmaskableInterruptIO).elements.size))
277  val beu_local_int_sink = IntSinkNode(IntSinkPortSimple(1, 1))
278
279  if (!coreParams.softPTW) {
280    ptw_to_l2_buffer.node := ptw.node
281  }
282  uncache_xbar := TLBuffer() := uncache.clientNode
283  if (dcache.uncacheNode.isDefined) {
284    dcache.uncacheNode.get := TLBuffer.chainNode(2) := uncache_xbar
285  }
286  uncache_port := TLBuffer.chainNode(2) := uncache_xbar
287
288  lazy val module = new MemBlockInlinedImp(this)
289}
290
291class MemBlockInlinedImp(outer: MemBlockInlined) extends LazyModuleImp(outer)
292  with HasXSParameter
293  with HasFPUParameters
294  with HasPerfEvents
295  with HasSoCParameter
296  with HasL1PrefetchSourceParameter
297  with HasCircularQueuePtrHelper
298  with HasMemBlockParameters
299  with HasTlbConst
300  with SdtrigExt
301{
302  val io = IO(new Bundle {
303    val hartId = Input(UInt(hartIdLen.W))
304    val redirect = Flipped(ValidIO(new Redirect))
305
306    val ooo_to_mem = new ooo_to_mem
307    val mem_to_ooo = new mem_to_ooo
308    val fetch_to_mem = new fetch_to_mem
309
310    val ifetchPrefetch = Vec(LduCnt, ValidIO(new SoftIfetchPrefetchBundle))
311
312    // misc
313    val error = ValidIO(new L1CacheErrorInfo)
314    val memInfo = new Bundle {
315      val sqFull = Output(Bool())
316      val lqFull = Output(Bool())
317      val dcacheMSHRFull = Output(Bool())
318    }
319    val debug_ls = new DebugLSIO
320    val l2_hint = Input(Valid(new L2ToL1Hint()))
321    val l2PfqBusy = Input(Bool())
322    val l2_tlb_req = Flipped(new TlbRequestIO(nRespDups = 2))
323    val l2_pmp_resp = new PMPRespBundle
324    val l2_flush_done = Input(Bool())
325
326    val debugTopDown = new Bundle {
327      val robHeadVaddr = Flipped(Valid(UInt(VAddrBits.W)))
328      val toCore = new MemCoreTopDownIO
329    }
330    val debugRolling = Flipped(new RobDebugRollingIO)
331
332    // All the signals from/to frontend/backend to/from bus will go through MemBlock
333    val fromTopToBackend = Input(new Bundle {
334      val msiInfo   = ValidIO(UInt(soc.IMSICParams.MSI_INFO_WIDTH.W))
335      val clintTime = ValidIO(UInt(64.W))
336    })
337    val inner_hartId = Output(UInt(hartIdLen.W))
338    val inner_reset_vector = Output(UInt(PAddrBits.W))
339    val outer_reset_vector = Input(UInt(PAddrBits.W))
340    val outer_cpu_halt = Output(Bool())
341    val outer_l2_flush_en = Output(Bool())
342    val outer_power_down_en = Output(Bool())
343    val outer_cpu_critical_error = Output(Bool())
344    val outer_msi_ack = Output(Bool())
345    val inner_beu_errors_icache = Input(new L1BusErrorUnitInfo)
346    val outer_beu_errors_icache = Output(new L1BusErrorUnitInfo)
347    val inner_hc_perfEvents = Output(Vec(numPCntHc * coreParams.L2NBanks + 1, new PerfEvent))
348    val outer_hc_perfEvents = Input(Vec(numPCntHc * coreParams.L2NBanks + 1, new PerfEvent))
349
350    // reset signals of frontend & backend are generated in memblock
351    val reset_backend = Output(Reset())
352    // Reset singal from frontend.
353    val resetInFrontendBypass = new Bundle{
354      val fromFrontend = Input(Bool())
355      val toL2Top      = Output(Bool())
356    }
357    val traceCoreInterfaceBypass = new Bundle{
358      val fromBackend = Flipped(new TraceCoreInterface(hasOffset = true))
359      val toL2Top     = new TraceCoreInterface
360    }
361
362    val topDownInfo = new Bundle {
363      val fromL2Top = Input(new TopDownFromL2Top)
364      val toBackend = Flipped(new TopDownInfo)
365    }
366    val dft = if (hasMbist) Some(Input(new SramBroadcastBundle)) else None
367    val dft_reset = if(hasMbist) Some(Input(new DFTResetSignals())) else None
368    val dft_frnt = if (hasMbist) Some(Output(new SramBroadcastBundle)) else None
369    val dft_reset_frnt = if(hasMbist) Some(Output(new DFTResetSignals())) else None
370    val dft_bcknd = if (hasMbist) Some(Output(new SramBroadcastBundle)) else None
371    val dft_reset_bcknd = if(hasMbist) Some(Output(new DFTResetSignals())) else None
372  })
373
374  dontTouch(io.inner_hartId)
375  dontTouch(io.inner_reset_vector)
376  dontTouch(io.outer_reset_vector)
377  dontTouch(io.outer_cpu_halt)
378  dontTouch(io.outer_l2_flush_en)
379  dontTouch(io.outer_power_down_en)
380  dontTouch(io.outer_cpu_critical_error)
381  dontTouch(io.inner_beu_errors_icache)
382  dontTouch(io.outer_beu_errors_icache)
383  dontTouch(io.inner_hc_perfEvents)
384  dontTouch(io.outer_hc_perfEvents)
385
386  val redirect = RegNextWithEnable(io.redirect)
387
388  private val dcache = outer.dcache.module
389  val uncache = outer.uncache.module
390
391  //val delayedDcacheRefill = RegNext(dcache.io.lsu.lsq)
392
393  val csrCtrl = DelayN(io.ooo_to_mem.csrCtrl, 2)
394  dcache.io.l2_pf_store_only := RegNext(io.ooo_to_mem.csrCtrl.pf_ctrl.l2_pf_store_only, false.B)
395  io.error <> DelayNWithValid(dcache.io.error, 2)
396  when(!csrCtrl.cache_error_enable){
397    io.error.bits.report_to_beu := false.B
398    io.error.valid := false.B
399  }
400
401  val loadUnits = Seq.fill(LduCnt)(Module(new LoadUnit))
402  val storeUnits = Seq.fill(StaCnt)(Module(new StoreUnit))
403  val stdExeUnits = Seq.fill(StdCnt)(Module(new MemExeUnit(backendParams.memSchdParams.get.issueBlockParams.find(_.StdCnt != 0).get.exuBlockParams.head)))
404  val hybridUnits = Seq.fill(HyuCnt)(Module(new HybridUnit)) // Todo: replace it with HybridUnit
405  val stData = stdExeUnits.map(_.io.out)
406  val exeUnits = loadUnits ++ storeUnits
407
408  // The number of vector load/store units is decoupled with the number of load/store units
409  val vlSplit = Seq.fill(VlduCnt)(Module(new VLSplitImp))
410  val vsSplit = Seq.fill(VstuCnt)(Module(new VSSplitImp))
411  val vlMergeBuffer = Module(new VLMergeBufferImp)
412  val vsMergeBuffer = Seq.fill(VstuCnt)(Module(new VSMergeBufferImp))
413  val vSegmentUnit  = Module(new VSegmentUnit)
414  val vfofBuffer    = Module(new VfofBuffer)
415
416  // misalign Buffer
417  val loadMisalignBuffer = Module(new LoadMisalignBuffer)
418  val storeMisalignBuffer = Module(new StoreMisalignBuffer)
419
420  val l1_pf_req = Wire(Decoupled(new L1PrefetchReq()))
421  dcache.io.sms_agt_evict_req.ready := false.B
422  val prefetcherOpt: Option[BasePrefecher] = coreParams.prefetcher.map {
423    case _: SMSParams =>
424      val sms = Module(new SMSPrefetcher())
425      sms.io_agt_en := GatedRegNextN(io.ooo_to_mem.csrCtrl.pf_ctrl.l1D_pf_enable_agt, 2, Some(false.B))
426      sms.io_pht_en := GatedRegNextN(io.ooo_to_mem.csrCtrl.pf_ctrl.l1D_pf_enable_pht, 2, Some(false.B))
427      sms.io_act_threshold := GatedRegNextN(io.ooo_to_mem.csrCtrl.pf_ctrl.l1D_pf_active_threshold, 2, Some(12.U))
428      sms.io_act_stride := GatedRegNextN(io.ooo_to_mem.csrCtrl.pf_ctrl.l1D_pf_active_stride, 2, Some(30.U))
429      sms.io_stride_en := false.B
430      sms.io_dcache_evict <> dcache.io.sms_agt_evict_req
431      val mbistSmsPl = MbistPipeline.PlaceMbistPipeline(1, "MbistPipeSms", hasMbist)
432      sms
433  }
434  prefetcherOpt.foreach{ pf => pf.io.l1_req.ready := false.B }
435  val hartId = p(XSCoreParamsKey).HartId
436  val l1PrefetcherOpt: Option[BasePrefecher] = coreParams.prefetcher.map {
437    case _ =>
438      val l1Prefetcher = Module(new L1Prefetcher())
439      l1Prefetcher.io.enable := Constantin.createRecord(s"enableL1StreamPrefetcher$hartId", initValue = true)
440      l1Prefetcher.pf_ctrl <> dcache.io.pf_ctrl
441      l1Prefetcher.l2PfqBusy := io.l2PfqBusy
442
443      // stride will train on miss or prefetch hit
444      for (i <- 0 until LduCnt) {
445        val source = loadUnits(i).io.prefetch_train_l1
446        l1Prefetcher.stride_train(i).valid := source.valid && source.bits.isFirstIssue && (
447          source.bits.miss || isFromStride(source.bits.meta_prefetch)
448        )
449        l1Prefetcher.stride_train(i).bits := source.bits
450        val loadPc = RegNext(io.ooo_to_mem.issueLda(i).bits.uop.pc) // for s1
451        l1Prefetcher.stride_train(i).bits.uop.pc := Mux(
452          loadUnits(i).io.s2_ptr_chasing,
453          RegEnable(loadPc, loadUnits(i).io.s2_prefetch_spec),
454          RegEnable(RegEnable(loadPc, loadUnits(i).io.s1_prefetch_spec), loadUnits(i).io.s2_prefetch_spec)
455        )
456      }
457      for (i <- 0 until HyuCnt) {
458        val source = hybridUnits(i).io.prefetch_train_l1
459        l1Prefetcher.stride_train.drop(LduCnt)(i).valid := source.valid && source.bits.isFirstIssue && (
460          source.bits.miss || isFromStride(source.bits.meta_prefetch)
461        )
462        l1Prefetcher.stride_train.drop(LduCnt)(i).bits := source.bits
463        l1Prefetcher.stride_train.drop(LduCnt)(i).bits.uop.pc := Mux(
464          hybridUnits(i).io.ldu_io.s2_ptr_chasing,
465          RegNext(io.ooo_to_mem.hybridPc(i)),
466          RegNext(RegNext(io.ooo_to_mem.hybridPc(i)))
467        )
468      }
469      l1Prefetcher
470  }
471  // load prefetch to l1 Dcache
472  l1PrefetcherOpt match {
473    case Some(pf) => l1_pf_req <> Pipeline(in = pf.io.l1_req, depth = 1, pipe = false, name = Some("pf_queue_to_ldu_reg"))
474    case None =>
475      l1_pf_req.valid := false.B
476      l1_pf_req.bits := DontCare
477  }
478  val pf_train_on_hit = RegNextN(io.ooo_to_mem.csrCtrl.pf_ctrl.l1D_pf_train_on_hit, 2, Some(true.B))
479
480  loadUnits.zipWithIndex.map(x => x._1.suggestName("LoadUnit_"+x._2))
481  storeUnits.zipWithIndex.map(x => x._1.suggestName("StoreUnit_"+x._2))
482  hybridUnits.zipWithIndex.map(x => x._1.suggestName("HybridUnit_"+x._2))
483  val atomicsUnit = Module(new AtomicsUnit)
484
485
486  val ldaExeWbReqs = Wire(Vec(LduCnt, Decoupled(new MemExuOutput)))
487  // atomicsUnit will overwrite the source from ldu if it is about to writeback
488  val atomicWritebackOverride = Mux(
489    atomicsUnit.io.out.valid,
490    atomicsUnit.io.out.bits,
491    loadUnits(AtomicWBPort).io.ldout.bits
492  )
493  ldaExeWbReqs(AtomicWBPort).valid := atomicsUnit.io.out.valid || loadUnits(AtomicWBPort).io.ldout.valid
494  ldaExeWbReqs(AtomicWBPort).bits  := atomicWritebackOverride
495  atomicsUnit.io.out.ready := ldaExeWbReqs(AtomicWBPort).ready
496  loadUnits(AtomicWBPort).io.ldout.ready := ldaExeWbReqs(AtomicWBPort).ready
497
498  val st_data_atomics = Seq.tabulate(StdCnt)(i =>
499    stData(i).valid && FuType.storeIsAMO(stData(i).bits.uop.fuType)
500  )
501
502  // misalignBuffer will overwrite the source from ldu if it is about to writeback
503  val misalignWritebackOverride = Mux(
504    loadUnits(MisalignWBPort).io.ldout.valid,
505    loadUnits(MisalignWBPort).io.ldout.bits,
506    loadMisalignBuffer.io.writeBack.bits
507  )
508  ldaExeWbReqs(MisalignWBPort).valid    := loadMisalignBuffer.io.writeBack.valid || loadUnits(MisalignWBPort).io.ldout.valid
509  ldaExeWbReqs(MisalignWBPort).bits     := misalignWritebackOverride
510  loadMisalignBuffer.io.writeBack.ready := ldaExeWbReqs(MisalignWBPort).ready && !loadUnits(MisalignWBPort).io.ldout.valid
511  loadMisalignBuffer.io.loadOutValid    := loadUnits(MisalignWBPort).io.ldout.valid
512  loadMisalignBuffer.io.loadVecOutValid := loadUnits(MisalignWBPort).io.vecldout.valid
513  loadUnits(MisalignWBPort).io.ldout.ready := ldaExeWbReqs(MisalignWBPort).ready
514  ldaExeWbReqs(MisalignWBPort).bits.isFromLoadUnit := loadUnits(MisalignWBPort).io.ldout.bits.isFromLoadUnit || loadMisalignBuffer.io.writeBack.valid
515
516  // loadUnit will overwrite the source from uncache if it is about to writeback
517  ldaExeWbReqs(UncacheWBPort) <> loadUnits(UncacheWBPort).io.ldout
518  io.mem_to_ooo.writebackLda <> ldaExeWbReqs
519  io.mem_to_ooo.writebackSta <> storeUnits.map(_.io.stout)
520  io.mem_to_ooo.writebackStd.zip(stdExeUnits).foreach {x =>
521    x._1.bits  := x._2.io.out.bits
522    // AMOs do not need to write back std now.
523    x._1.valid := x._2.io.out.fire && !FuType.storeIsAMO(x._2.io.out.bits.uop.fuType)
524  }
525  io.mem_to_ooo.writebackHyuLda <> hybridUnits.map(_.io.ldout)
526  io.mem_to_ooo.writebackHyuSta <> hybridUnits.map(_.io.stout)
527  io.mem_to_ooo.otherFastWakeup := DontCare
528  io.mem_to_ooo.otherFastWakeup.drop(HyuCnt).take(LduCnt).zip(loadUnits.map(_.io.fast_uop)).foreach{case(a,b)=> a := b}
529  io.mem_to_ooo.otherFastWakeup.take(HyuCnt).zip(hybridUnits.map(_.io.ldu_io.fast_uop)).foreach{case(a,b)=> a:=b}
530  val stOut = io.mem_to_ooo.writebackSta ++ io.mem_to_ooo.writebackHyuSta
531
532  // prefetch to l1 req
533  // Stream's confidence is always 1
534  // (LduCnt + HyuCnt) l1_pf_reqs ?
535  loadUnits.foreach(load_unit => {
536    load_unit.io.prefetch_req.valid <> l1_pf_req.valid
537    load_unit.io.prefetch_req.bits <> l1_pf_req.bits
538  })
539
540  hybridUnits.foreach(hybrid_unit => {
541    hybrid_unit.io.ldu_io.prefetch_req.valid <> l1_pf_req.valid
542    hybrid_unit.io.ldu_io.prefetch_req.bits <> l1_pf_req.bits
543  })
544
545  // NOTE: loadUnits(0) has higher bank conflict and miss queue arb priority than loadUnits(1) and loadUnits(2)
546  // when loadUnits(1)/loadUnits(2) stage 0 is busy, hw prefetch will never use that pipeline
547  val LowConfPorts = if (LduCnt == 2) Seq(1) else if (LduCnt == 3) Seq(1, 2) else Seq(0)
548  LowConfPorts.map{case i => loadUnits(i).io.prefetch_req.bits.confidence := 0.U}
549  hybridUnits.foreach(hybrid_unit => { hybrid_unit.io.ldu_io.prefetch_req.bits.confidence := 0.U })
550
551  val canAcceptHighConfPrefetch = loadUnits.map(_.io.canAcceptHighConfPrefetch) ++
552                                  hybridUnits.map(_.io.canAcceptLowConfPrefetch)
553  val canAcceptLowConfPrefetch = loadUnits.map(_.io.canAcceptLowConfPrefetch) ++
554                                 hybridUnits.map(_.io.canAcceptLowConfPrefetch)
555  l1_pf_req.ready := (0 until LduCnt + HyuCnt).map{
556    case i => {
557      if (LowConfPorts.contains(i)) {
558        loadUnits(i).io.canAcceptLowConfPrefetch
559      } else {
560        Mux(l1_pf_req.bits.confidence === 1.U, canAcceptHighConfPrefetch(i), canAcceptLowConfPrefetch(i))
561      }
562    }
563  }.reduce(_ || _)
564
565  // l1 pf fuzzer interface
566  val DebugEnableL1PFFuzzer = false
567  if (DebugEnableL1PFFuzzer) {
568    // l1 pf req fuzzer
569    val fuzzer = Module(new L1PrefetchFuzzer())
570    fuzzer.io.vaddr := DontCare
571    fuzzer.io.paddr := DontCare
572
573    // override load_unit prefetch_req
574    loadUnits.foreach(load_unit => {
575      load_unit.io.prefetch_req.valid <> fuzzer.io.req.valid
576      load_unit.io.prefetch_req.bits <> fuzzer.io.req.bits
577    })
578
579    // override hybrid_unit prefetch_req
580    hybridUnits.foreach(hybrid_unit => {
581      hybrid_unit.io.ldu_io.prefetch_req.valid <> fuzzer.io.req.valid
582      hybrid_unit.io.ldu_io.prefetch_req.bits <> fuzzer.io.req.bits
583    })
584
585    fuzzer.io.req.ready := l1_pf_req.ready
586  }
587
588  // TODO: fast load wakeup
589  val lsq     = Module(new LsqWrapper)
590  val sbuffer = Module(new Sbuffer)
591  // if you wants to stress test dcache store, use FakeSbuffer
592  // val sbuffer = Module(new FakeSbuffer) // out of date now
593  io.mem_to_ooo.stIssuePtr := lsq.io.issuePtrExt
594
595  dcache.io.hartId := io.hartId
596  lsq.io.hartId := io.hartId
597  sbuffer.io.hartId := io.hartId
598  atomicsUnit.io.hartId := io.hartId
599
600  dcache.io.lqEmpty := lsq.io.lqEmpty
601
602  // load/store prefetch to l2 cache
603  prefetcherOpt.foreach(sms_pf => {
604    l1PrefetcherOpt.foreach(l1_pf => {
605      val sms_pf_to_l2 = DelayNWithValid(sms_pf.io.l2_req, 2)
606      val l1_pf_to_l2 = DelayNWithValid(l1_pf.io.l2_req, 2)
607
608      outer.l2_pf_sender_opt.get.out.head._1.addr_valid := sms_pf_to_l2.valid || l1_pf_to_l2.valid
609      outer.l2_pf_sender_opt.get.out.head._1.addr := Mux(l1_pf_to_l2.valid, l1_pf_to_l2.bits.addr, sms_pf_to_l2.bits.addr)
610      outer.l2_pf_sender_opt.get.out.head._1.pf_source := Mux(l1_pf_to_l2.valid, l1_pf_to_l2.bits.source, sms_pf_to_l2.bits.source)
611      outer.l2_pf_sender_opt.get.out.head._1.l2_pf_en := RegNextN(io.ooo_to_mem.csrCtrl.pf_ctrl.l2_pf_enable, 2, Some(true.B))
612
613      sms_pf.io.enable := RegNextN(io.ooo_to_mem.csrCtrl.pf_ctrl.l1D_pf_enable, 2, Some(false.B))
614
615      val l2_trace = Wire(new LoadPfDbBundle)
616      l2_trace.paddr := outer.l2_pf_sender_opt.get.out.head._1.addr
617      val table = ChiselDB.createTable(s"L2PrefetchTrace$hartId", new LoadPfDbBundle, basicDB = false)
618      table.log(l2_trace, l1_pf_to_l2.valid, "StreamPrefetchTrace", clock, reset)
619      table.log(l2_trace, !l1_pf_to_l2.valid && sms_pf_to_l2.valid, "L2PrefetchTrace", clock, reset)
620
621      val l1_pf_to_l3 = ValidIODelay(l1_pf.io.l3_req, 4)
622      outer.l3_pf_sender_opt.foreach(_.out.head._1.addr_valid := l1_pf_to_l3.valid)
623      outer.l3_pf_sender_opt.foreach(_.out.head._1.addr := l1_pf_to_l3.bits)
624      outer.l3_pf_sender_opt.foreach(_.out.head._1.l2_pf_en := RegNextN(io.ooo_to_mem.csrCtrl.pf_ctrl.l2_pf_enable, 4, Some(true.B)))
625
626      val l3_trace = Wire(new LoadPfDbBundle)
627      l3_trace.paddr := outer.l3_pf_sender_opt.map(_.out.head._1.addr).getOrElse(0.U)
628      val l3_table = ChiselDB.createTable(s"L3PrefetchTrace$hartId", new LoadPfDbBundle, basicDB = false)
629      l3_table.log(l3_trace, l1_pf_to_l3.valid, "StreamPrefetchTrace", clock, reset)
630
631      XSPerfAccumulate("prefetch_fire_l2", outer.l2_pf_sender_opt.get.out.head._1.addr_valid)
632      XSPerfAccumulate("prefetch_fire_l3", outer.l3_pf_sender_opt.map(_.out.head._1.addr_valid).getOrElse(false.B))
633      XSPerfAccumulate("l1pf_fire_l2", l1_pf_to_l2.valid)
634      XSPerfAccumulate("sms_fire_l2", !l1_pf_to_l2.valid && sms_pf_to_l2.valid)
635      XSPerfAccumulate("sms_block_by_l1pf", l1_pf_to_l2.valid && sms_pf_to_l2.valid)
636    })
637  })
638
639  // ptw
640  val sfence = RegNext(RegNext(io.ooo_to_mem.sfence))
641  val tlbcsr = RegNext(RegNext(io.ooo_to_mem.tlbCsr))
642  private val ptw = outer.ptw.module
643  private val ptw_to_l2_buffer = outer.ptw_to_l2_buffer.module
644  private val l1d_to_l2_buffer = outer.l1d_to_l2_buffer.module
645  ptw.io.hartId := io.hartId
646  ptw.io.sfence <> sfence
647  ptw.io.csr.tlb <> tlbcsr
648  ptw.io.csr.distribute_csr <> csrCtrl.distribute_csr
649
650  val perfEventsPTW = if (!coreParams.softPTW) {
651    ptw.getPerfEvents
652  } else {
653    Seq()
654  }
655
656  // dtlb
657  val dtlb_ld_tlb_ld = Module(new TLBNonBlock(LduCnt + HyuCnt + 1, 2, ldtlbParams))
658  val dtlb_st_tlb_st = Module(new TLBNonBlock(StaCnt, 1, sttlbParams))
659  val dtlb_prefetch_tlb_prefetch = Module(new TLBNonBlock(2, 2, pftlbParams))
660  val dtlb_ld = Seq(dtlb_ld_tlb_ld.io)
661  val dtlb_st = Seq(dtlb_st_tlb_st.io)
662  val dtlb_prefetch = Seq(dtlb_prefetch_tlb_prefetch.io)
663  /* tlb vec && constant variable */
664  val dtlb = dtlb_ld ++ dtlb_st ++ dtlb_prefetch
665  val (dtlb_ld_idx, dtlb_st_idx, dtlb_pf_idx) = (0, 1, 2)
666  val TlbSubSizeVec = Seq(LduCnt + HyuCnt + 1, StaCnt, 2) // (load + hyu + stream pf, store, sms+l2bop)
667  val DTlbSize = TlbSubSizeVec.sum
668  val TlbStartVec = TlbSubSizeVec.scanLeft(0)(_ + _).dropRight(1)
669  val TlbEndVec = TlbSubSizeVec.scanLeft(0)(_ + _).drop(1)
670
671  val ptwio = Wire(new VectorTlbPtwIO(DTlbSize))
672  val dtlb_reqs = dtlb.map(_.requestor).flatten
673  val dtlb_pmps = dtlb.map(_.pmp).flatten
674  dtlb.map(_.hartId := io.hartId)
675  dtlb.map(_.sfence := sfence)
676  dtlb.map(_.csr := tlbcsr)
677  dtlb.map(_.flushPipe.map(a => a := false.B)) // non-block doesn't need
678  dtlb.map(_.redirect := redirect)
679  if (refillBothTlb) {
680    require(ldtlbParams.outReplace == sttlbParams.outReplace)
681    require(ldtlbParams.outReplace == hytlbParams.outReplace)
682    require(ldtlbParams.outReplace == pftlbParams.outReplace)
683    require(ldtlbParams.outReplace)
684
685    val replace = Module(new TlbReplace(DTlbSize, ldtlbParams))
686    replace.io.apply_sep(dtlb_ld.map(_.replace) ++ dtlb_st.map(_.replace) ++ dtlb_prefetch.map(_.replace), ptwio.resp.bits.data.s1.entry.tag)
687  } else {
688    // TODO: there will be bugs in TlbReplace when outReplace enable, since the order of Hyu is not right.
689    if (ldtlbParams.outReplace) {
690      val replace_ld = Module(new TlbReplace(LduCnt + 1, ldtlbParams))
691      replace_ld.io.apply_sep(dtlb_ld.map(_.replace), ptwio.resp.bits.data.s1.entry.tag)
692    }
693    if (hytlbParams.outReplace) {
694      val replace_hy = Module(new TlbReplace(HyuCnt, hytlbParams))
695      replace_hy.io.apply_sep(dtlb_ld.map(_.replace), ptwio.resp.bits.data.s1.entry.tag)
696    }
697    if (sttlbParams.outReplace) {
698      val replace_st = Module(new TlbReplace(StaCnt, sttlbParams))
699      replace_st.io.apply_sep(dtlb_st.map(_.replace), ptwio.resp.bits.data.s1.entry.tag)
700    }
701    if (pftlbParams.outReplace) {
702      val replace_pf = Module(new TlbReplace(2, pftlbParams))
703      replace_pf.io.apply_sep(dtlb_prefetch.map(_.replace), ptwio.resp.bits.data.s1.entry.tag)
704    }
705  }
706
707  val ptw_resp_next = RegEnable(ptwio.resp.bits, ptwio.resp.valid)
708  val ptw_resp_v = RegNext(ptwio.resp.valid && !(sfence.valid && tlbcsr.satp.changed && tlbcsr.vsatp.changed && tlbcsr.hgatp.changed), init = false.B)
709  ptwio.resp.ready := true.B
710
711  val tlbreplay = WireInit(VecInit(Seq.fill(LdExuCnt)(false.B)))
712  val tlbreplay_reg = GatedValidRegNext(tlbreplay)
713  val dtlb_ld0_tlbreplay_reg = GatedValidRegNext(dtlb_ld(0).tlbreplay)
714
715  if (backendParams.debugEn){ dontTouch(tlbreplay) }
716
717  for (i <- 0 until LdExuCnt) {
718    tlbreplay(i) := dtlb_ld(0).ptw.req(i).valid && ptw_resp_next.vector(0) && ptw_resp_v &&
719      ptw_resp_next.data.hit(dtlb_ld(0).ptw.req(i).bits.vpn, tlbcsr.satp.asid, tlbcsr.vsatp.asid, tlbcsr.hgatp.vmid, allType = true, ignoreAsid = true)
720  }
721
722  dtlb.flatMap(a => a.ptw.req)
723    .zipWithIndex
724    .foreach{ case (tlb, i) =>
725      tlb.ready := ptwio.req(i).ready
726      ptwio.req(i).bits := tlb.bits
727    val vector_hit = if (refillBothTlb) Cat(ptw_resp_next.vector).orR
728      else if (i < TlbEndVec(dtlb_ld_idx)) Cat(ptw_resp_next.vector.slice(TlbStartVec(dtlb_ld_idx), TlbEndVec(dtlb_ld_idx))).orR
729      else if (i < TlbEndVec(dtlb_st_idx)) Cat(ptw_resp_next.vector.slice(TlbStartVec(dtlb_st_idx), TlbEndVec(dtlb_st_idx))).orR
730      else                                 Cat(ptw_resp_next.vector.slice(TlbStartVec(dtlb_pf_idx), TlbEndVec(dtlb_pf_idx))).orR
731    ptwio.req(i).valid := tlb.valid && !(ptw_resp_v && vector_hit && ptw_resp_next.data.hit(tlb.bits.vpn, tlbcsr.satp.asid, tlbcsr.vsatp.asid, tlbcsr.hgatp.vmid, allType = true, ignoreAsid = true))
732  }
733  dtlb.foreach(_.ptw.resp.bits := ptw_resp_next.data)
734  if (refillBothTlb) {
735    dtlb.foreach(_.ptw.resp.valid := ptw_resp_v && Cat(ptw_resp_next.vector).orR)
736  } else {
737    dtlb_ld.foreach(_.ptw.resp.valid := ptw_resp_v && Cat(ptw_resp_next.vector.slice(TlbStartVec(dtlb_ld_idx), TlbEndVec(dtlb_ld_idx))).orR)
738    dtlb_st.foreach(_.ptw.resp.valid := ptw_resp_v && Cat(ptw_resp_next.vector.slice(TlbStartVec(dtlb_st_idx), TlbEndVec(dtlb_st_idx))).orR)
739    dtlb_prefetch.foreach(_.ptw.resp.valid := ptw_resp_v && Cat(ptw_resp_next.vector.slice(TlbStartVec(dtlb_pf_idx), TlbEndVec(dtlb_pf_idx))).orR)
740  }
741  dtlb_ld.foreach(_.ptw.resp.bits.getGpa := Cat(ptw_resp_next.getGpa.take(LduCnt + HyuCnt + 1)).orR)
742  dtlb_st.foreach(_.ptw.resp.bits.getGpa := Cat(ptw_resp_next.getGpa.slice(LduCnt + HyuCnt + 1, LduCnt + HyuCnt + 1 + StaCnt)).orR)
743  dtlb_prefetch.foreach(_.ptw.resp.bits.getGpa := Cat(ptw_resp_next.getGpa.drop(LduCnt + HyuCnt + 1 + StaCnt)).orR)
744
745  val dtlbRepeater  = PTWNewFilter(ldtlbParams.fenceDelay, ptwio, ptw.io.tlb(1), sfence, tlbcsr, l2tlbParams.dfilterSize)
746  val itlbRepeater3 = PTWRepeaterNB(passReady = false, itlbParams.fenceDelay, io.fetch_to_mem.itlb, ptw.io.tlb(0), sfence, tlbcsr)
747
748  lsq.io.debugTopDown.robHeadMissInDTlb := dtlbRepeater.io.rob_head_miss_in_tlb
749
750  // pmp
751  val pmp = Module(new PMP())
752  pmp.io.distribute_csr <> csrCtrl.distribute_csr
753
754  val pmp_checkers = Seq.fill(DTlbSize)(Module(new PMPChecker(4, leaveHitMux = true)))
755  val pmp_check = pmp_checkers.map(_.io)
756  for ((p,d) <- pmp_check zip dtlb_pmps) {
757    if (HasBitmapCheck) {
758      p.apply(tlbcsr.mbmc.CMODE.asBool, tlbcsr.priv.dmode, pmp.io.pmp, pmp.io.pma, d)
759    } else {
760      p.apply(tlbcsr.priv.dmode, pmp.io.pmp, pmp.io.pma, d)
761    }
762    require(p.req.bits.size.getWidth == d.bits.size.getWidth)
763  }
764
765  for (i <- 0 until LduCnt) {
766    io.debug_ls.debugLsInfo(i) := loadUnits(i).io.debug_ls
767  }
768  for (i <- 0 until HyuCnt) {
769    io.debug_ls.debugLsInfo.drop(LduCnt)(i) := hybridUnits(i).io.ldu_io.debug_ls
770  }
771  for (i <- 0 until StaCnt) {
772    io.debug_ls.debugLsInfo.drop(LduCnt + HyuCnt)(i) := storeUnits(i).io.debug_ls
773  }
774  for (i <- 0 until HyuCnt) {
775    io.debug_ls.debugLsInfo.drop(LduCnt + HyuCnt + StaCnt)(i) := hybridUnits(i).io.stu_io.debug_ls
776  }
777
778  io.mem_to_ooo.lsTopdownInfo := loadUnits.map(_.io.lsTopdownInfo) ++ hybridUnits.map(_.io.ldu_io.lsTopdownInfo)
779
780  // trigger
781  val tdata = RegInit(VecInit(Seq.fill(TriggerNum)(0.U.asTypeOf(new MatchTriggerIO))))
782  val tEnable = RegInit(VecInit(Seq.fill(TriggerNum)(false.B)))
783  tEnable := csrCtrl.mem_trigger.tEnableVec
784  when(csrCtrl.mem_trigger.tUpdate.valid) {
785    tdata(csrCtrl.mem_trigger.tUpdate.bits.addr) := csrCtrl.mem_trigger.tUpdate.bits.tdata
786  }
787  val triggerCanRaiseBpExp = csrCtrl.mem_trigger.triggerCanRaiseBpExp
788  val debugMode = csrCtrl.mem_trigger.debugMode
789
790  val backendTriggerTimingVec = VecInit(tdata.map(_.timing))
791  val backendTriggerChainVec = VecInit(tdata.map(_.chain))
792
793  XSDebug(tEnable.asUInt.orR, "Debug Mode: At least one store trigger is enabled\n")
794  for (j <- 0 until TriggerNum)
795    PrintTriggerInfo(tEnable(j), tdata(j))
796
797  // The segment instruction is executed atomically.
798  // After the segment instruction directive starts executing, no other instructions should be executed.
799  val vSegmentFlag = RegInit(false.B)
800
801  when(GatedValidRegNext(vSegmentUnit.io.in.fire)) {
802    vSegmentFlag := true.B
803  }.elsewhen(GatedValidRegNext(vSegmentUnit.io.uopwriteback.valid)) {
804    vSegmentFlag := false.B
805  }
806
807  val misalign_allow_spec = RegInit(true.B)
808  val ldu_rollback_with_misalign_nack = loadUnits.map(ldu =>
809    ldu.io.lsq.ldin.bits.isFrmMisAlignBuf && ldu.io.lsq.ldin.bits.rep_info.rar_nack && ldu.io.rollback.valid
810  ).reduce(_ || _)
811  when (ldu_rollback_with_misalign_nack) {
812    misalign_allow_spec := false.B
813  } .elsewhen(lsq.io.rarValidCount < (LoadQueueRARSize - 4).U) {
814    misalign_allow_spec := true.B
815  }
816
817  // LoadUnit
818  val correctMissTrain = Constantin.createRecord(s"CorrectMissTrain$hartId", initValue = false)
819
820  for (i <- 0 until LduCnt) {
821    loadUnits(i).io.redirect <> redirect
822    loadUnits(i).io.misalign_allow_spec := misalign_allow_spec
823
824    // get input form dispatch
825    loadUnits(i).io.ldin <> io.ooo_to_mem.issueLda(i)
826    loadUnits(i).io.feedback_slow <> io.mem_to_ooo.ldaIqFeedback(i).feedbackSlow
827    io.mem_to_ooo.ldaIqFeedback(i).feedbackFast := DontCare
828    loadUnits(i).io.correctMissTrain := correctMissTrain
829    io.mem_to_ooo.ldCancel.drop(HyuCnt)(i) := loadUnits(i).io.ldCancel
830    io.mem_to_ooo.wakeup.drop(HyuCnt)(i) := loadUnits(i).io.wakeup
831
832    // vector
833    if (i < VlduCnt) {
834      loadUnits(i).io.vecldout.ready := false.B
835    } else {
836      loadUnits(i).io.vecldin.valid := false.B
837      loadUnits(i).io.vecldin.bits := DontCare
838      loadUnits(i).io.vecldout.ready := false.B
839    }
840
841    // fast replay
842    loadUnits(i).io.fast_rep_in <> loadUnits(i).io.fast_rep_out
843
844    // SoftPrefetch to frontend (prefetch.i)
845    loadUnits(i).io.ifetchPrefetch <> io.ifetchPrefetch(i)
846
847    // dcache access
848    loadUnits(i).io.dcache <> dcache.io.lsu.load(i)
849    if(i == 0){
850      vSegmentUnit.io.rdcache := DontCare
851      dcache.io.lsu.load(i).req.valid := loadUnits(i).io.dcache.req.valid || vSegmentUnit.io.rdcache.req.valid
852      dcache.io.lsu.load(i).req.bits  := Mux1H(Seq(
853        vSegmentUnit.io.rdcache.req.valid -> vSegmentUnit.io.rdcache.req.bits,
854        loadUnits(i).io.dcache.req.valid -> loadUnits(i).io.dcache.req.bits
855      ))
856      vSegmentUnit.io.rdcache.req.ready := dcache.io.lsu.load(i).req.ready
857    }
858
859    // Dcache requests must also be preempted by the segment.
860    when(vSegmentFlag){
861      loadUnits(i).io.dcache.req.ready             := false.B // Dcache is preempted.
862
863      dcache.io.lsu.load(0).pf_source              := vSegmentUnit.io.rdcache.pf_source
864      dcache.io.lsu.load(0).s1_paddr_dup_lsu       := vSegmentUnit.io.rdcache.s1_paddr_dup_lsu
865      dcache.io.lsu.load(0).s1_paddr_dup_dcache    := vSegmentUnit.io.rdcache.s1_paddr_dup_dcache
866      dcache.io.lsu.load(0).s1_kill                := vSegmentUnit.io.rdcache.s1_kill
867      dcache.io.lsu.load(0).s2_kill                := vSegmentUnit.io.rdcache.s2_kill
868      dcache.io.lsu.load(0).s0_pc                  := vSegmentUnit.io.rdcache.s0_pc
869      dcache.io.lsu.load(0).s1_pc                  := vSegmentUnit.io.rdcache.s1_pc
870      dcache.io.lsu.load(0).s2_pc                  := vSegmentUnit.io.rdcache.s2_pc
871      dcache.io.lsu.load(0).is128Req               := vSegmentUnit.io.rdcache.is128Req
872    }.otherwise {
873      loadUnits(i).io.dcache.req.ready             := dcache.io.lsu.load(i).req.ready
874
875      dcache.io.lsu.load(0).pf_source              := loadUnits(0).io.dcache.pf_source
876      dcache.io.lsu.load(0).s1_paddr_dup_lsu       := loadUnits(0).io.dcache.s1_paddr_dup_lsu
877      dcache.io.lsu.load(0).s1_paddr_dup_dcache    := loadUnits(0).io.dcache.s1_paddr_dup_dcache
878      dcache.io.lsu.load(0).s1_kill                := loadUnits(0).io.dcache.s1_kill
879      dcache.io.lsu.load(0).s2_kill                := loadUnits(0).io.dcache.s2_kill
880      dcache.io.lsu.load(0).s0_pc                  := loadUnits(0).io.dcache.s0_pc
881      dcache.io.lsu.load(0).s1_pc                  := loadUnits(0).io.dcache.s1_pc
882      dcache.io.lsu.load(0).s2_pc                  := loadUnits(0).io.dcache.s2_pc
883      dcache.io.lsu.load(0).is128Req               := loadUnits(0).io.dcache.is128Req
884    }
885
886    // forward
887    loadUnits(i).io.lsq.forward <> lsq.io.forward(i)
888    loadUnits(i).io.sbuffer <> sbuffer.io.forward(i)
889    loadUnits(i).io.ubuffer <> uncache.io.forward(i)
890    loadUnits(i).io.tl_d_channel := dcache.io.lsu.forward_D(i)
891    loadUnits(i).io.forward_mshr <> dcache.io.lsu.forward_mshr(i)
892    // ld-ld violation check
893    loadUnits(i).io.lsq.ldld_nuke_query <> lsq.io.ldu.ldld_nuke_query(i)
894    loadUnits(i).io.lsq.stld_nuke_query <> lsq.io.ldu.stld_nuke_query(i)
895    // loadqueue old ptr
896    loadUnits(i).io.lsq.lqDeqPtr := lsq.io.lqDeqPtr
897    loadUnits(i).io.csrCtrl       <> csrCtrl
898    // dcache refill req
899  // loadUnits(i).io.refill           <> delayedDcacheRefill
900    // dtlb
901    loadUnits(i).io.tlb <> dtlb_reqs.take(LduCnt)(i)
902    if(i == 0 ){ // port 0 assign to vsegmentUnit
903      val vsegmentDtlbReqValid = vSegmentUnit.io.dtlb.req.valid // segment tlb resquest need to delay 1 cycle
904      dtlb_reqs.take(LduCnt)(i).req.valid := loadUnits(i).io.tlb.req.valid || RegNext(vsegmentDtlbReqValid)
905      vSegmentUnit.io.dtlb.req.ready      := dtlb_reqs.take(LduCnt)(i).req.ready
906      dtlb_reqs.take(LduCnt)(i).req.bits  := ParallelPriorityMux(Seq(
907        RegNext(vsegmentDtlbReqValid)     -> RegEnable(vSegmentUnit.io.dtlb.req.bits, vsegmentDtlbReqValid),
908        loadUnits(i).io.tlb.req.valid     -> loadUnits(i).io.tlb.req.bits
909      ))
910    }
911    // pmp
912    loadUnits(i).io.pmp <> pmp_check(i).resp
913    // st-ld violation query
914    val stld_nuke_query = storeUnits.map(_.io.stld_nuke_query) ++ hybridUnits.map(_.io.stu_io.stld_nuke_query)
915    for (s <- 0 until StorePipelineWidth) {
916      loadUnits(i).io.stld_nuke_query(s) := stld_nuke_query(s)
917    }
918    loadUnits(i).io.lq_rep_full <> lsq.io.lq_rep_full
919    // load prefetch train
920    prefetcherOpt.foreach(pf => {
921      // sms will train on all miss load sources
922      val source = loadUnits(i).io.prefetch_train
923      pf.io.ld_in(i).valid := Mux(pf_train_on_hit,
924        source.valid,
925        source.valid && source.bits.isFirstIssue && source.bits.miss
926      )
927      pf.io.ld_in(i).bits := source.bits
928      val loadPc = RegNext(io.ooo_to_mem.issueLda(i).bits.uop.pc) // for s1
929      pf.io.ld_in(i).bits.uop.pc := Mux(
930        loadUnits(i).io.s2_ptr_chasing,
931        RegEnable(loadPc, loadUnits(i).io.s2_prefetch_spec),
932        RegEnable(RegEnable(loadPc, loadUnits(i).io.s1_prefetch_spec), loadUnits(i).io.s2_prefetch_spec)
933      )
934    })
935    l1PrefetcherOpt.foreach(pf => {
936      // stream will train on all load sources
937      val source = loadUnits(i).io.prefetch_train_l1
938      pf.io.ld_in(i).valid := source.valid && source.bits.isFirstIssue
939      pf.io.ld_in(i).bits := source.bits
940    })
941
942    // load to load fast forward: load(i) prefers data(i)
943    val l2l_fwd_out = loadUnits.map(_.io.l2l_fwd_out) ++ hybridUnits.map(_.io.ldu_io.l2l_fwd_out)
944    val fastPriority = (i until LduCnt + HyuCnt) ++ (0 until i)
945    val fastValidVec = fastPriority.map(j => l2l_fwd_out(j).valid)
946    val fastDataVec = fastPriority.map(j => l2l_fwd_out(j).data)
947    val fastErrorVec = fastPriority.map(j => l2l_fwd_out(j).dly_ld_err)
948    val fastMatchVec = fastPriority.map(j => io.ooo_to_mem.loadFastMatch(i)(j))
949    loadUnits(i).io.l2l_fwd_in.valid := VecInit(fastValidVec).asUInt.orR
950    loadUnits(i).io.l2l_fwd_in.data := ParallelPriorityMux(fastValidVec, fastDataVec)
951    loadUnits(i).io.l2l_fwd_in.dly_ld_err := ParallelPriorityMux(fastValidVec, fastErrorVec)
952    val fastMatch = ParallelPriorityMux(fastValidVec, fastMatchVec)
953    loadUnits(i).io.ld_fast_match := fastMatch
954    loadUnits(i).io.ld_fast_imm := io.ooo_to_mem.loadFastImm(i)
955    loadUnits(i).io.ld_fast_fuOpType := io.ooo_to_mem.loadFastFuOpType(i)
956    loadUnits(i).io.replay <> lsq.io.replay(i)
957
958    val l2_hint = RegNext(io.l2_hint)
959
960    // L2 Hint for DCache
961    dcache.io.l2_hint <> l2_hint
962
963    loadUnits(i).io.l2_hint <> l2_hint
964    loadUnits(i).io.tlb_hint.id := dtlbRepeater.io.hint.get.req(i).id
965    loadUnits(i).io.tlb_hint.full := dtlbRepeater.io.hint.get.req(i).full ||
966      tlbreplay_reg(i) || dtlb_ld0_tlbreplay_reg(i)
967
968    // passdown to lsq (load s2)
969    lsq.io.ldu.ldin(i) <> loadUnits(i).io.lsq.ldin
970    if (i == UncacheWBPort) {
971      lsq.io.ldout(i) <> loadUnits(i).io.lsq.uncache
972    } else {
973      lsq.io.ldout(i).ready := true.B
974      loadUnits(i).io.lsq.uncache.valid := false.B
975      loadUnits(i).io.lsq.uncache.bits := DontCare
976    }
977    lsq.io.ld_raw_data(i) <> loadUnits(i).io.lsq.ld_raw_data
978    lsq.io.ncOut(i) <> loadUnits(i).io.lsq.nc_ldin
979    lsq.io.l2_hint.valid := l2_hint.valid
980    lsq.io.l2_hint.bits.sourceId := l2_hint.bits.sourceId
981    lsq.io.l2_hint.bits.isKeyword := l2_hint.bits.isKeyword
982
983    lsq.io.tlb_hint <> dtlbRepeater.io.hint.get
984
985    // connect misalignBuffer
986    loadMisalignBuffer.io.req(i) <> loadUnits(i).io.misalign_buf
987
988    if (i == MisalignWBPort) {
989      loadUnits(i).io.misalign_ldin  <> loadMisalignBuffer.io.splitLoadReq
990      loadUnits(i).io.misalign_ldout <> loadMisalignBuffer.io.splitLoadResp
991    } else {
992      loadUnits(i).io.misalign_ldin.valid := false.B
993      loadUnits(i).io.misalign_ldin.bits := DontCare
994    }
995
996    // alter writeback exception info
997    io.mem_to_ooo.s3_delayed_load_error(i) := loadUnits(i).io.s3_dly_ld_err
998
999    // update mem dependency predictor
1000    // io.memPredUpdate(i) := DontCare
1001
1002    // --------------------------------
1003    // Load Triggers
1004    // --------------------------------
1005    loadUnits(i).io.fromCsrTrigger.tdataVec := tdata
1006    loadUnits(i).io.fromCsrTrigger.tEnableVec := tEnable
1007    loadUnits(i).io.fromCsrTrigger.triggerCanRaiseBpExp := triggerCanRaiseBpExp
1008    loadUnits(i).io.fromCsrTrigger.debugMode := debugMode
1009  }
1010
1011  for (i <- 0 until HyuCnt) {
1012    hybridUnits(i).io.redirect <> redirect
1013
1014    // get input from dispatch
1015    hybridUnits(i).io.lsin <> io.ooo_to_mem.issueHya(i)
1016    hybridUnits(i).io.feedback_slow <> io.mem_to_ooo.hyuIqFeedback(i).feedbackSlow
1017    hybridUnits(i).io.feedback_fast <> io.mem_to_ooo.hyuIqFeedback(i).feedbackFast
1018    hybridUnits(i).io.correctMissTrain := correctMissTrain
1019    io.mem_to_ooo.ldCancel.take(HyuCnt)(i) := hybridUnits(i).io.ldu_io.ldCancel
1020    io.mem_to_ooo.wakeup.take(HyuCnt)(i) := hybridUnits(i).io.ldu_io.wakeup
1021
1022    // ------------------------------------
1023    //  Load Port
1024    // ------------------------------------
1025    // fast replay
1026    hybridUnits(i).io.ldu_io.fast_rep_in <> hybridUnits(i).io.ldu_io.fast_rep_out
1027
1028    // get input from dispatch
1029    hybridUnits(i).io.ldu_io.dcache <> dcache.io.lsu.load(LduCnt + i)
1030    hybridUnits(i).io.stu_io.dcache <> dcache.io.lsu.sta(StaCnt + i)
1031
1032    // dcache access
1033    hybridUnits(i).io.ldu_io.lsq.forward <> lsq.io.forward(LduCnt + i)
1034    // forward
1035    hybridUnits(i).io.ldu_io.sbuffer <> sbuffer.io.forward(LduCnt + i)
1036    hybridUnits(i).io.ldu_io.ubuffer <> uncache.io.forward(LduCnt + i)
1037    // hybridUnits(i).io.ldu_io.vec_forward <> vsFlowQueue.io.forward(LduCnt + i)
1038    hybridUnits(i).io.ldu_io.vec_forward := DontCare
1039    hybridUnits(i).io.ldu_io.tl_d_channel := dcache.io.lsu.forward_D(LduCnt + i)
1040    hybridUnits(i).io.ldu_io.forward_mshr <> dcache.io.lsu.forward_mshr(LduCnt + i)
1041    // ld-ld violation check
1042    hybridUnits(i).io.ldu_io.lsq.ldld_nuke_query <> lsq.io.ldu.ldld_nuke_query(LduCnt + i)
1043    hybridUnits(i).io.ldu_io.lsq.stld_nuke_query <> lsq.io.ldu.stld_nuke_query(LduCnt + i)
1044    hybridUnits(i).io.csrCtrl <> csrCtrl
1045    // dcache refill req
1046    hybridUnits(i).io.ldu_io.tlb_hint.id := dtlbRepeater.io.hint.get.req(LduCnt + i).id
1047    hybridUnits(i).io.ldu_io.tlb_hint.full := dtlbRepeater.io.hint.get.req(LduCnt + i).full ||
1048      tlbreplay_reg(LduCnt + i) || dtlb_ld0_tlbreplay_reg(LduCnt + i)
1049
1050    // dtlb
1051    hybridUnits(i).io.tlb <> dtlb_ld.head.requestor(LduCnt + i)
1052    // pmp
1053    hybridUnits(i).io.pmp <> pmp_check.drop(LduCnt)(i).resp
1054    // st-ld violation query
1055    val stld_nuke_query = VecInit(storeUnits.map(_.io.stld_nuke_query) ++ hybridUnits.map(_.io.stu_io.stld_nuke_query))
1056    hybridUnits(i).io.ldu_io.stld_nuke_query := stld_nuke_query
1057    hybridUnits(i).io.ldu_io.lq_rep_full <> lsq.io.lq_rep_full
1058    // load prefetch train
1059    prefetcherOpt.foreach(pf => {
1060      val source = hybridUnits(i).io.prefetch_train
1061      pf.io.ld_in(LduCnt + i).valid := Mux(pf_train_on_hit,
1062        source.valid,
1063        source.valid && source.bits.isFirstIssue && source.bits.miss
1064      )
1065      pf.io.ld_in(LduCnt + i).bits := source.bits
1066      pf.io.ld_in(LduCnt + i).bits.uop.pc := Mux(hybridUnits(i).io.ldu_io.s2_ptr_chasing, io.ooo_to_mem.hybridPc(i), RegNext(io.ooo_to_mem.hybridPc(i)))
1067    })
1068    l1PrefetcherOpt.foreach(pf => {
1069      // stream will train on all load sources
1070      val source = hybridUnits(i).io.prefetch_train_l1
1071      pf.io.ld_in(LduCnt + i).valid := source.valid && source.bits.isFirstIssue &&
1072                                       FuType.isLoad(source.bits.uop.fuType)
1073      pf.io.ld_in(LduCnt + i).bits := source.bits
1074      pf.io.st_in(StaCnt + i).valid := false.B
1075      pf.io.st_in(StaCnt + i).bits := DontCare
1076    })
1077    prefetcherOpt.foreach(pf => {
1078      val source = hybridUnits(i).io.prefetch_train
1079      pf.io.st_in(StaCnt + i).valid := Mux(pf_train_on_hit,
1080        source.valid,
1081        source.valid && source.bits.isFirstIssue && source.bits.miss
1082      ) && FuType.isStore(source.bits.uop.fuType)
1083      pf.io.st_in(StaCnt + i).bits := source.bits
1084      pf.io.st_in(StaCnt + i).bits.uop.pc := RegNext(io.ooo_to_mem.hybridPc(i))
1085    })
1086
1087    // load to load fast forward: load(i) prefers data(i)
1088    val l2l_fwd_out = loadUnits.map(_.io.l2l_fwd_out) ++ hybridUnits.map(_.io.ldu_io.l2l_fwd_out)
1089    val fastPriority = (LduCnt + i until LduCnt + HyuCnt) ++ (0 until LduCnt + i)
1090    val fastValidVec = fastPriority.map(j => l2l_fwd_out(j).valid)
1091    val fastDataVec = fastPriority.map(j => l2l_fwd_out(j).data)
1092    val fastErrorVec = fastPriority.map(j => l2l_fwd_out(j).dly_ld_err)
1093    val fastMatchVec = fastPriority.map(j => io.ooo_to_mem.loadFastMatch(LduCnt + i)(j))
1094    hybridUnits(i).io.ldu_io.l2l_fwd_in.valid := VecInit(fastValidVec).asUInt.orR
1095    hybridUnits(i).io.ldu_io.l2l_fwd_in.data := ParallelPriorityMux(fastValidVec, fastDataVec)
1096    hybridUnits(i).io.ldu_io.l2l_fwd_in.dly_ld_err := ParallelPriorityMux(fastValidVec, fastErrorVec)
1097    val fastMatch = ParallelPriorityMux(fastValidVec, fastMatchVec)
1098    hybridUnits(i).io.ldu_io.ld_fast_match := fastMatch
1099    hybridUnits(i).io.ldu_io.ld_fast_imm := io.ooo_to_mem.loadFastImm(LduCnt + i)
1100    hybridUnits(i).io.ldu_io.ld_fast_fuOpType := io.ooo_to_mem.loadFastFuOpType(LduCnt + i)
1101    hybridUnits(i).io.ldu_io.replay <> lsq.io.replay(LduCnt + i)
1102    hybridUnits(i).io.ldu_io.l2_hint <> io.l2_hint
1103
1104    // uncache
1105    lsq.io.ldout.drop(LduCnt)(i) <> hybridUnits(i).io.ldu_io.lsq.uncache
1106    lsq.io.ld_raw_data.drop(LduCnt)(i) <> hybridUnits(i).io.ldu_io.lsq.ld_raw_data
1107
1108
1109    // passdown to lsq (load s2)
1110    hybridUnits(i).io.ldu_io.lsq.nc_ldin.valid := false.B
1111    hybridUnits(i).io.ldu_io.lsq.nc_ldin.bits := DontCare
1112    lsq.io.ldu.ldin(LduCnt + i) <> hybridUnits(i).io.ldu_io.lsq.ldin
1113    // Lsq to sta unit
1114    lsq.io.sta.storeMaskIn(StaCnt + i) <> hybridUnits(i).io.stu_io.st_mask_out
1115
1116    // Lsq to std unit's rs
1117    lsq.io.std.storeDataIn(StaCnt + i) := stData(StaCnt + i)
1118    lsq.io.std.storeDataIn(StaCnt + i).valid := stData(StaCnt + i).valid && !st_data_atomics(StaCnt + i)
1119    // prefetch
1120    hybridUnits(i).io.stu_io.prefetch_req <> sbuffer.io.store_prefetch(StaCnt + i)
1121
1122    io.mem_to_ooo.s3_delayed_load_error(LduCnt + i) := hybridUnits(i).io.ldu_io.s3_dly_ld_err
1123
1124    // ------------------------------------
1125    //  Store Port
1126    // ------------------------------------
1127    hybridUnits(i).io.stu_io.lsq <> lsq.io.sta.storeAddrIn.takeRight(HyuCnt)(i)
1128    hybridUnits(i).io.stu_io.lsq_replenish <> lsq.io.sta.storeAddrInRe.takeRight(HyuCnt)(i)
1129
1130    lsq.io.sta.storeMaskIn.takeRight(HyuCnt)(i) <> hybridUnits(i).io.stu_io.st_mask_out
1131    io.mem_to_ooo.stIn.takeRight(HyuCnt)(i).valid := hybridUnits(i).io.stu_io.issue.valid
1132    io.mem_to_ooo.stIn.takeRight(HyuCnt)(i).bits := hybridUnits(i).io.stu_io.issue.bits
1133
1134    // ------------------------------------
1135    //  Vector Store Port
1136    // ------------------------------------
1137    hybridUnits(i).io.vec_stu_io.isFirstIssue := true.B
1138
1139    // -------------------------
1140    // Store Triggers
1141    // -------------------------
1142    hybridUnits(i).io.fromCsrTrigger.tdataVec := tdata
1143    hybridUnits(i).io.fromCsrTrigger.tEnableVec := tEnable
1144    hybridUnits(i).io.fromCsrTrigger.triggerCanRaiseBpExp := triggerCanRaiseBpExp
1145    hybridUnits(i).io.fromCsrTrigger.debugMode := debugMode
1146  }
1147
1148  // misalignBuffer
1149  loadMisalignBuffer.io.redirect                <> redirect
1150  loadMisalignBuffer.io.rob.lcommit             := io.ooo_to_mem.lsqio.lcommit
1151  loadMisalignBuffer.io.rob.scommit             := io.ooo_to_mem.lsqio.scommit
1152  loadMisalignBuffer.io.rob.pendingMMIOld       := io.ooo_to_mem.lsqio.pendingMMIOld
1153  loadMisalignBuffer.io.rob.pendingld           := io.ooo_to_mem.lsqio.pendingld
1154  loadMisalignBuffer.io.rob.pendingst           := io.ooo_to_mem.lsqio.pendingst
1155  loadMisalignBuffer.io.rob.pendingVst          := io.ooo_to_mem.lsqio.pendingVst
1156  loadMisalignBuffer.io.rob.commit              := io.ooo_to_mem.lsqio.commit
1157  loadMisalignBuffer.io.rob.pendingPtr          := io.ooo_to_mem.lsqio.pendingPtr
1158  loadMisalignBuffer.io.rob.pendingPtrNext      := io.ooo_to_mem.lsqio.pendingPtrNext
1159
1160  lsq.io.loadMisalignFull                       := loadMisalignBuffer.io.loadMisalignFull
1161  lsq.io.misalignAllowSpec                      := misalign_allow_spec
1162
1163  storeMisalignBuffer.io.redirect               <> redirect
1164  storeMisalignBuffer.io.rob.lcommit            := io.ooo_to_mem.lsqio.lcommit
1165  storeMisalignBuffer.io.rob.scommit            := io.ooo_to_mem.lsqio.scommit
1166  storeMisalignBuffer.io.rob.pendingMMIOld      := io.ooo_to_mem.lsqio.pendingMMIOld
1167  storeMisalignBuffer.io.rob.pendingld          := io.ooo_to_mem.lsqio.pendingld
1168  storeMisalignBuffer.io.rob.pendingst          := io.ooo_to_mem.lsqio.pendingst
1169  storeMisalignBuffer.io.rob.pendingVst         := io.ooo_to_mem.lsqio.pendingVst
1170  storeMisalignBuffer.io.rob.commit             := io.ooo_to_mem.lsqio.commit
1171  storeMisalignBuffer.io.rob.pendingPtr         := io.ooo_to_mem.lsqio.pendingPtr
1172  storeMisalignBuffer.io.rob.pendingPtrNext     := io.ooo_to_mem.lsqio.pendingPtrNext
1173
1174  lsq.io.maControl                              <> storeMisalignBuffer.io.sqControl
1175
1176  lsq.io.cmoOpReq <> dcache.io.cmoOpReq
1177  lsq.io.cmoOpResp <> dcache.io.cmoOpResp
1178
1179  // Prefetcher
1180  val StreamDTLBPortIndex = TlbStartVec(dtlb_ld_idx) + LduCnt + HyuCnt
1181  val PrefetcherDTLBPortIndex = TlbStartVec(dtlb_pf_idx)
1182  val L2toL1DLBPortIndex = TlbStartVec(dtlb_pf_idx) + 1
1183  prefetcherOpt match {
1184  case Some(pf) =>
1185    dtlb_reqs(PrefetcherDTLBPortIndex) <> pf.io.tlb_req
1186    pf.io.pmp_resp := pmp_check(PrefetcherDTLBPortIndex).resp
1187  case None =>
1188    dtlb_reqs(PrefetcherDTLBPortIndex) := DontCare
1189    dtlb_reqs(PrefetcherDTLBPortIndex).req.valid := false.B
1190    dtlb_reqs(PrefetcherDTLBPortIndex).resp.ready := true.B
1191  }
1192  l1PrefetcherOpt match {
1193    case Some(pf) =>
1194      dtlb_reqs(StreamDTLBPortIndex) <> pf.io.tlb_req
1195      pf.io.pmp_resp := pmp_check(StreamDTLBPortIndex).resp
1196    case None =>
1197        dtlb_reqs(StreamDTLBPortIndex) := DontCare
1198        dtlb_reqs(StreamDTLBPortIndex).req.valid := false.B
1199        dtlb_reqs(StreamDTLBPortIndex).resp.ready := true.B
1200  }
1201  dtlb_reqs(L2toL1DLBPortIndex) <> io.l2_tlb_req
1202  dtlb_reqs(L2toL1DLBPortIndex).resp.ready := true.B
1203  io.l2_pmp_resp := pmp_check(L2toL1DLBPortIndex).resp
1204
1205  // StoreUnit
1206  for (i <- 0 until StdCnt) {
1207    stdExeUnits(i).io.flush <> redirect
1208    stdExeUnits(i).io.in.valid := io.ooo_to_mem.issueStd(i).valid
1209    io.ooo_to_mem.issueStd(i).ready := stdExeUnits(i).io.in.ready
1210    stdExeUnits(i).io.in.bits := io.ooo_to_mem.issueStd(i).bits
1211  }
1212
1213  for (i <- 0 until StaCnt) {
1214    val stu = storeUnits(i)
1215
1216    stu.io.redirect      <> redirect
1217    stu.io.csrCtrl       <> csrCtrl
1218    stu.io.dcache        <> dcache.io.lsu.sta(i)
1219    stu.io.feedback_slow <> io.mem_to_ooo.staIqFeedback(i).feedbackSlow
1220    stu.io.stin         <> io.ooo_to_mem.issueSta(i)
1221    stu.io.lsq          <> lsq.io.sta.storeAddrIn(i)
1222    stu.io.lsq_replenish <> lsq.io.sta.storeAddrInRe(i)
1223    // dtlb
1224    stu.io.tlb          <> dtlb_st.head.requestor(i)
1225    stu.io.pmp          <> pmp_check(LduCnt + HyuCnt + 1 + i).resp
1226
1227    // -------------------------
1228    // Store Triggers
1229    // -------------------------
1230    stu.io.fromCsrTrigger.tdataVec := tdata
1231    stu.io.fromCsrTrigger.tEnableVec := tEnable
1232    stu.io.fromCsrTrigger.triggerCanRaiseBpExp := triggerCanRaiseBpExp
1233    stu.io.fromCsrTrigger.debugMode := debugMode
1234
1235    // prefetch
1236    stu.io.prefetch_req <> sbuffer.io.store_prefetch(i)
1237
1238    // store unit does not need fast feedback
1239    io.mem_to_ooo.staIqFeedback(i).feedbackFast := DontCare
1240
1241    // Lsq to sta unit
1242    lsq.io.sta.storeMaskIn(i) <> stu.io.st_mask_out
1243
1244    // connect misalignBuffer
1245    storeMisalignBuffer.io.req(i) <> stu.io.misalign_buf
1246
1247    if (i == 0) {
1248      stu.io.misalign_stin  <> storeMisalignBuffer.io.splitStoreReq
1249      stu.io.misalign_stout <> storeMisalignBuffer.io.splitStoreResp
1250    } else {
1251      stu.io.misalign_stin.valid := false.B
1252      stu.io.misalign_stin.bits := DontCare
1253    }
1254
1255    // Lsq to std unit's rs
1256    if (i < VstuCnt){
1257      when (vsSplit(i).io.vstd.get.valid) {
1258        lsq.io.std.storeDataIn(i).valid := true.B
1259        lsq.io.std.storeDataIn(i).bits := vsSplit(i).io.vstd.get.bits
1260        stData(i).ready := false.B
1261      }.otherwise {
1262        lsq.io.std.storeDataIn(i).valid := stData(i).valid && !st_data_atomics(i)
1263        lsq.io.std.storeDataIn(i).bits.uop := stData(i).bits.uop
1264        lsq.io.std.storeDataIn(i).bits.data := stData(i).bits.data
1265        lsq.io.std.storeDataIn(i).bits.mask.map(_ := 0.U)
1266        lsq.io.std.storeDataIn(i).bits.vdIdx.map(_ := 0.U)
1267        lsq.io.std.storeDataIn(i).bits.vdIdxInField.map(_ := 0.U)
1268        stData(i).ready := true.B
1269      }
1270    } else {
1271        lsq.io.std.storeDataIn(i).valid := stData(i).valid && !st_data_atomics(i)
1272        lsq.io.std.storeDataIn(i).bits.uop := stData(i).bits.uop
1273        lsq.io.std.storeDataIn(i).bits.data := stData(i).bits.data
1274        lsq.io.std.storeDataIn(i).bits.mask.map(_ := 0.U)
1275        lsq.io.std.storeDataIn(i).bits.vdIdx.map(_ := 0.U)
1276        lsq.io.std.storeDataIn(i).bits.vdIdxInField.map(_ := 0.U)
1277        stData(i).ready := true.B
1278    }
1279    lsq.io.std.storeDataIn.map(_.bits.debug := 0.U.asTypeOf(new DebugBundle))
1280    lsq.io.std.storeDataIn.foreach(_.bits.isFromLoadUnit := DontCare)
1281
1282
1283    // store prefetch train
1284    l1PrefetcherOpt.foreach(pf => {
1285      // stream will train on all load sources
1286      pf.io.st_in(i).valid := false.B
1287      pf.io.st_in(i).bits := DontCare
1288    })
1289
1290    prefetcherOpt.foreach(pf => {
1291      pf.io.st_in(i).valid := Mux(pf_train_on_hit,
1292        stu.io.prefetch_train.valid,
1293        stu.io.prefetch_train.valid && stu.io.prefetch_train.bits.isFirstIssue && (
1294          stu.io.prefetch_train.bits.miss
1295          )
1296      )
1297      pf.io.st_in(i).bits := stu.io.prefetch_train.bits
1298      pf.io.st_in(i).bits.uop.pc := RegEnable(RegEnable(io.ooo_to_mem.storePc(i), stu.io.s1_prefetch_spec), stu.io.s2_prefetch_spec)
1299    })
1300
1301    // 1. sync issue info to store set LFST
1302    // 2. when store issue, broadcast issued sqPtr to wake up the following insts
1303    // io.stIn(i).valid := io.issue(exuParameters.LduCnt + i).valid
1304    // io.stIn(i).bits := io.issue(exuParameters.LduCnt + i).bits
1305    io.mem_to_ooo.stIn(i).valid := stu.io.issue.valid
1306    io.mem_to_ooo.stIn(i).bits := stu.io.issue.bits
1307
1308    stu.io.stout.ready := true.B
1309
1310    // vector
1311    if (i < VstuCnt) {
1312      stu.io.vecstin <> vsSplit(i).io.out
1313      // vsFlowQueue.io.pipeFeedback(i) <> stu.io.vec_feedback_slow // need connect
1314    } else {
1315      stu.io.vecstin.valid := false.B
1316      stu.io.vecstin.bits := DontCare
1317      stu.io.vecstout.ready := false.B
1318    }
1319    stu.io.vec_isFirstIssue := true.B // TODO
1320  }
1321
1322  val sqOtherStout = WireInit(0.U.asTypeOf(DecoupledIO(new MemExuOutput)))
1323  sqOtherStout.valid := lsq.io.mmioStout.valid || lsq.io.cboZeroStout.valid
1324  sqOtherStout.bits  := Mux(lsq.io.cboZeroStout.valid, lsq.io.cboZeroStout.bits, lsq.io.mmioStout.bits)
1325  assert(!(lsq.io.mmioStout.valid && lsq.io.cboZeroStout.valid), "Cannot writeback to mmio and cboZero at the same time.")
1326
1327  // Store writeback by StoreQueue:
1328  //   1. cbo Zero
1329  //   2. mmio
1330  // Currently, the two should not be present at the same time, so simply make cbo zero a higher priority.
1331  val otherStout = WireInit(0.U.asTypeOf(lsq.io.mmioStout))
1332  NewPipelineConnect(
1333    sqOtherStout, otherStout, otherStout.fire,
1334    false.B,
1335    Option("otherStoutConnect")
1336  )
1337  otherStout.ready := false.B
1338  when (otherStout.valid && !storeUnits(0).io.stout.valid) {
1339    stOut(0).valid := true.B
1340    stOut(0).bits  := otherStout.bits
1341    otherStout.ready := true.B
1342  }
1343  lsq.io.mmioStout.ready := sqOtherStout.ready
1344  lsq.io.cboZeroStout.ready := sqOtherStout.ready
1345
1346  // vec mmio writeback
1347  lsq.io.vecmmioStout.ready := false.B
1348
1349  // miss align buffer will overwrite stOut(0)
1350  val storeMisalignCanWriteBack = !otherStout.valid && !storeUnits(0).io.stout.valid && !storeUnits(0).io.vecstout.valid
1351  storeMisalignBuffer.io.writeBack.ready := storeMisalignCanWriteBack
1352  storeMisalignBuffer.io.storeOutValid := storeUnits(0).io.stout.valid
1353  storeMisalignBuffer.io.storeVecOutValid := storeUnits(0).io.vecstout.valid
1354  when (storeMisalignBuffer.io.writeBack.valid && storeMisalignCanWriteBack) {
1355    stOut(0).valid := true.B
1356    stOut(0).bits  := storeMisalignBuffer.io.writeBack.bits
1357  }
1358
1359  // Uncache
1360  uncache.io.enableOutstanding := io.ooo_to_mem.csrCtrl.uncache_write_outstanding_enable
1361  uncache.io.hartId := io.hartId
1362  lsq.io.uncacheOutstanding := io.ooo_to_mem.csrCtrl.uncache_write_outstanding_enable
1363
1364  // Lsq
1365  io.mem_to_ooo.lsqio.mmio       := lsq.io.rob.mmio
1366  io.mem_to_ooo.lsqio.uop        := lsq.io.rob.uop
1367  lsq.io.rob.lcommit             := io.ooo_to_mem.lsqio.lcommit
1368  lsq.io.rob.scommit             := io.ooo_to_mem.lsqio.scommit
1369  lsq.io.rob.pendingMMIOld       := io.ooo_to_mem.lsqio.pendingMMIOld
1370  lsq.io.rob.pendingld           := io.ooo_to_mem.lsqio.pendingld
1371  lsq.io.rob.pendingst           := io.ooo_to_mem.lsqio.pendingst
1372  lsq.io.rob.pendingVst          := io.ooo_to_mem.lsqio.pendingVst
1373  lsq.io.rob.commit              := io.ooo_to_mem.lsqio.commit
1374  lsq.io.rob.pendingPtr          := io.ooo_to_mem.lsqio.pendingPtr
1375  lsq.io.rob.pendingPtrNext      := io.ooo_to_mem.lsqio.pendingPtrNext
1376
1377  //  lsq.io.rob            <> io.lsqio.rob
1378  lsq.io.enq            <> io.ooo_to_mem.enqLsq
1379  lsq.io.brqRedirect    <> redirect
1380
1381  //  violation rollback
1382  def selectOldestRedirect(xs: Seq[Valid[Redirect]]): Vec[Bool] = {
1383    val compareVec = (0 until xs.length).map(i => (0 until i).map(j => isAfter(xs(j).bits.robIdx, xs(i).bits.robIdx)))
1384    val resultOnehot = VecInit((0 until xs.length).map(i => Cat((0 until xs.length).map(j =>
1385      (if (j < i) !xs(j).valid || compareVec(i)(j)
1386      else if (j == i) xs(i).valid
1387      else !xs(j).valid || !compareVec(j)(i))
1388    )).andR))
1389    resultOnehot
1390  }
1391  val allRedirect = loadUnits.map(_.io.rollback) ++ hybridUnits.map(_.io.ldu_io.rollback) ++ lsq.io.nack_rollback ++ lsq.io.nuke_rollback
1392  val oldestOneHot = selectOldestRedirect(allRedirect)
1393  val oldestRedirect = WireDefault(Mux1H(oldestOneHot, allRedirect))
1394  // memory replay would not cause IAF/IPF/IGPF
1395  oldestRedirect.bits.cfiUpdate.backendIAF := false.B
1396  oldestRedirect.bits.cfiUpdate.backendIPF := false.B
1397  oldestRedirect.bits.cfiUpdate.backendIGPF := false.B
1398  io.mem_to_ooo.memoryViolation := oldestRedirect
1399  io.mem_to_ooo.lsqio.lqCanAccept  := lsq.io.lqCanAccept
1400  io.mem_to_ooo.lsqio.sqCanAccept  := lsq.io.sqCanAccept
1401
1402  // lsq.io.uncache        <> uncache.io.lsq
1403  val s_idle :: s_scalar_uncache :: s_vector_uncache :: Nil = Enum(3)
1404  val uncacheState = RegInit(s_idle)
1405  val uncacheReq = Wire(Decoupled(new UncacheWordReq))
1406  val uncacheIdResp = uncache.io.lsq.idResp
1407  val uncacheResp = Wire(Decoupled(new UncacheWordResp))
1408
1409  uncacheReq.bits := DontCare
1410  uncacheReq.valid := false.B
1411  uncacheReq.ready := false.B
1412  uncacheResp.bits := DontCare
1413  uncacheResp.valid := false.B
1414  uncacheResp.ready := false.B
1415  lsq.io.uncache.req.ready := false.B
1416  lsq.io.uncache.idResp.valid := false.B
1417  lsq.io.uncache.idResp.bits := DontCare
1418  lsq.io.uncache.resp.valid := false.B
1419  lsq.io.uncache.resp.bits := DontCare
1420
1421  switch (uncacheState) {
1422    is (s_idle) {
1423      when (uncacheReq.fire) {
1424        when (lsq.io.uncache.req.valid) {
1425          when (!lsq.io.uncache.req.bits.nc || !io.ooo_to_mem.csrCtrl.uncache_write_outstanding_enable) {
1426            uncacheState := s_scalar_uncache
1427          }
1428        }.otherwise {
1429          // val isStore = vsFlowQueue.io.uncache.req.bits.cmd === MemoryOpConstants.M_XWR
1430          when (!io.ooo_to_mem.csrCtrl.uncache_write_outstanding_enable) {
1431            uncacheState := s_vector_uncache
1432          }
1433        }
1434      }
1435    }
1436
1437    is (s_scalar_uncache) {
1438      when (uncacheResp.fire) {
1439        uncacheState := s_idle
1440      }
1441    }
1442
1443    is (s_vector_uncache) {
1444      when (uncacheResp.fire) {
1445        uncacheState := s_idle
1446      }
1447    }
1448  }
1449
1450  when (lsq.io.uncache.req.valid) {
1451    uncacheReq <> lsq.io.uncache.req
1452  }
1453  when (io.ooo_to_mem.csrCtrl.uncache_write_outstanding_enable) {
1454    lsq.io.uncache.resp <> uncacheResp
1455    lsq.io.uncache.idResp <> uncacheIdResp
1456  }.otherwise {
1457    when (uncacheState === s_scalar_uncache) {
1458      lsq.io.uncache.resp <> uncacheResp
1459      lsq.io.uncache.idResp <> uncacheIdResp
1460    }
1461  }
1462  // delay dcache refill for 1 cycle for better timing
1463  AddPipelineReg(uncacheReq, uncache.io.lsq.req, false.B)
1464  AddPipelineReg(uncache.io.lsq.resp, uncacheResp, false.B)
1465
1466  //lsq.io.refill         := delayedDcacheRefill
1467  lsq.io.release        := dcache.io.lsu.release
1468  lsq.io.lqCancelCnt <> io.mem_to_ooo.lqCancelCnt
1469  lsq.io.sqCancelCnt <> io.mem_to_ooo.sqCancelCnt
1470  lsq.io.lqDeq <> io.mem_to_ooo.lqDeq
1471  lsq.io.sqDeq <> io.mem_to_ooo.sqDeq
1472  // Todo: assign these
1473  io.mem_to_ooo.sqDeqPtr := lsq.io.sqDeqPtr
1474  io.mem_to_ooo.lqDeqPtr := lsq.io.lqDeqPtr
1475  lsq.io.tl_d_channel <> dcache.io.lsu.tl_d_channel
1476
1477  // LSQ to store buffer
1478  lsq.io.sbuffer        <> sbuffer.io.in
1479  sbuffer.io.in(0).valid := lsq.io.sbuffer(0).valid || vSegmentUnit.io.sbuffer.valid
1480  sbuffer.io.in(0).bits  := Mux1H(Seq(
1481    vSegmentUnit.io.sbuffer.valid -> vSegmentUnit.io.sbuffer.bits,
1482    lsq.io.sbuffer(0).valid       -> lsq.io.sbuffer(0).bits
1483  ))
1484  vSegmentUnit.io.sbuffer.ready := sbuffer.io.in(0).ready
1485  lsq.io.sqEmpty        <> sbuffer.io.sqempty
1486  dcache.io.force_write := lsq.io.force_write
1487
1488  // Initialize when unenabled difftest.
1489  sbuffer.io.vecDifftestInfo      := DontCare
1490  lsq.io.sbufferVecDifftestInfo   := DontCare
1491  vSegmentUnit.io.vecDifftestInfo := DontCare
1492  if (env.EnableDifftest) {
1493    sbuffer.io.vecDifftestInfo .zipWithIndex.map{ case (sbufferPort, index) =>
1494      if (index == 0) {
1495        val vSegmentDifftestValid = vSegmentUnit.io.vecDifftestInfo.valid
1496        sbufferPort.valid := Mux(vSegmentDifftestValid, vSegmentUnit.io.vecDifftestInfo.valid, lsq.io.sbufferVecDifftestInfo(0).valid)
1497        sbufferPort.bits  := Mux(vSegmentDifftestValid, vSegmentUnit.io.vecDifftestInfo.bits, lsq.io.sbufferVecDifftestInfo(0).bits)
1498
1499        vSegmentUnit.io.vecDifftestInfo.ready  := sbufferPort.ready
1500        lsq.io.sbufferVecDifftestInfo(0).ready := sbufferPort.ready
1501      } else {
1502         sbufferPort <> lsq.io.sbufferVecDifftestInfo(index)
1503      }
1504    }
1505  }
1506
1507  // lsq.io.vecStoreRetire <> vsFlowQueue.io.sqRelease
1508  // lsq.io.vecWriteback.valid := vlWrapper.io.uopWriteback.fire &&
1509  //   vlWrapper.io.uopWriteback.bits.uop.vpu.lastUop
1510  // lsq.io.vecWriteback.bits := vlWrapper.io.uopWriteback.bits
1511
1512  // vector
1513  val vLoadCanAccept  = (0 until VlduCnt).map(i =>
1514    vlSplit(i).io.in.ready && VlduType.isVecLd(io.ooo_to_mem.issueVldu(i).bits.uop.fuOpType)
1515  )
1516  val vStoreCanAccept = (0 until VstuCnt).map(i =>
1517    vsSplit(i).io.in.ready && VstuType.isVecSt(io.ooo_to_mem.issueVldu(i).bits.uop.fuOpType)
1518  )
1519  val isSegment     = io.ooo_to_mem.issueVldu.head.valid && isVsegls(io.ooo_to_mem.issueVldu.head.bits.uop.fuType)
1520  val isFixVlUop    = io.ooo_to_mem.issueVldu.map{x =>
1521    x.bits.uop.vpu.isVleff && x.bits.uop.vpu.lastUop && x.valid
1522  }
1523
1524  // init port
1525  /**
1526   * TODO: splited vsMergebuffer maybe remove, if one RS can accept two feedback, or don't need RS replay uop
1527   * for now:
1528   *  RS0 -> VsSplit0 -> stu0 -> vsMergebuffer0 -> feedback -> RS0
1529   *  RS1 -> VsSplit1 -> stu1 -> vsMergebuffer1 -> feedback -> RS1
1530   *
1531   * vector load don't need feedback
1532   *
1533   *  RS0 -> VlSplit0  -> ldu0 -> |
1534   *  RS1 -> VlSplit1  -> ldu1 -> |  -> vlMergebuffer
1535   *        replayIO   -> ldu3 -> |
1536   * */
1537  (0 until VstuCnt).foreach{i =>
1538    vsMergeBuffer(i).io.fromPipeline := DontCare
1539    vsMergeBuffer(i).io.fromSplit := DontCare
1540
1541    vsMergeBuffer(i).io.fromMisalignBuffer.get.flush := storeMisalignBuffer.io.toVecStoreMergeBuffer(i).flush
1542    vsMergeBuffer(i).io.fromMisalignBuffer.get.mbIndex := storeMisalignBuffer.io.toVecStoreMergeBuffer(i).mbIndex
1543  }
1544
1545  (0 until VstuCnt).foreach{i =>
1546    vsSplit(i).io.redirect <> redirect
1547    vsSplit(i).io.in <> io.ooo_to_mem.issueVldu(i)
1548    vsSplit(i).io.in.valid := io.ooo_to_mem.issueVldu(i).valid &&
1549                              vStoreCanAccept(i) && !isSegment
1550    vsSplit(i).io.toMergeBuffer <> vsMergeBuffer(i).io.fromSplit.head
1551    NewPipelineConnect(
1552      vsSplit(i).io.out, storeUnits(i).io.vecstin, storeUnits(i).io.vecstin.fire,
1553      Mux(vsSplit(i).io.out.fire, vsSplit(i).io.out.bits.uop.robIdx.needFlush(io.redirect), storeUnits(i).io.vecstin.bits.uop.robIdx.needFlush(io.redirect)),
1554      Option("VsSplitConnectStu")
1555    )
1556    vsSplit(i).io.vstd.get := DontCare // Todo: Discuss how to pass vector store data
1557
1558    vsSplit(i).io.vstdMisalign.get.storeMisalignBufferEmpty := !storeMisalignBuffer.io.full
1559    vsSplit(i).io.vstdMisalign.get.storePipeEmpty := !storeUnits(i).io.s0_s1_valid
1560
1561  }
1562  (0 until VlduCnt).foreach{i =>
1563    vlSplit(i).io.redirect <> redirect
1564    vlSplit(i).io.in <> io.ooo_to_mem.issueVldu(i)
1565    vlSplit(i).io.in.valid := io.ooo_to_mem.issueVldu(i).valid &&
1566                              vLoadCanAccept(i) && !isSegment && !isFixVlUop(i)
1567    vlSplit(i).io.toMergeBuffer <> vlMergeBuffer.io.fromSplit(i)
1568    vlSplit(i).io.threshold.get.valid := vlMergeBuffer.io.toSplit.get.threshold
1569    vlSplit(i).io.threshold.get.bits  := lsq.io.lqDeqPtr
1570    NewPipelineConnect(
1571      vlSplit(i).io.out, loadUnits(i).io.vecldin, loadUnits(i).io.vecldin.fire,
1572      Mux(vlSplit(i).io.out.fire, vlSplit(i).io.out.bits.uop.robIdx.needFlush(io.redirect), loadUnits(i).io.vecldin.bits.uop.robIdx.needFlush(io.redirect)),
1573      Option("VlSplitConnectLdu")
1574    )
1575
1576    //Subsequent instrction will be blocked
1577    vfofBuffer.io.in(i).valid := io.ooo_to_mem.issueVldu(i).valid
1578    vfofBuffer.io.in(i).bits  := io.ooo_to_mem.issueVldu(i).bits
1579  }
1580  (0 until LduCnt).foreach{i=>
1581    loadUnits(i).io.vecldout.ready         := vlMergeBuffer.io.fromPipeline(i).ready
1582    loadMisalignBuffer.io.vecWriteBack.ready := true.B
1583
1584    if (i == MisalignWBPort) {
1585      when(loadUnits(i).io.vecldout.valid) {
1586        vlMergeBuffer.io.fromPipeline(i).valid := loadUnits(i).io.vecldout.valid
1587        vlMergeBuffer.io.fromPipeline(i).bits  := loadUnits(i).io.vecldout.bits
1588      } .otherwise {
1589        vlMergeBuffer.io.fromPipeline(i).valid   := loadMisalignBuffer.io.vecWriteBack.valid
1590        vlMergeBuffer.io.fromPipeline(i).bits    := loadMisalignBuffer.io.vecWriteBack.bits
1591      }
1592    } else {
1593      vlMergeBuffer.io.fromPipeline(i).valid := loadUnits(i).io.vecldout.valid
1594      vlMergeBuffer.io.fromPipeline(i).bits  := loadUnits(i).io.vecldout.bits
1595    }
1596  }
1597
1598  (0 until StaCnt).foreach{i=>
1599    if(i < VstuCnt){
1600      storeUnits(i).io.vecstout.ready := true.B
1601      storeMisalignBuffer.io.vecWriteBack(i).ready := vsMergeBuffer(i).io.fromPipeline.head.ready
1602
1603      when(storeUnits(i).io.vecstout.valid) {
1604        vsMergeBuffer(i).io.fromPipeline.head.valid := storeUnits(i).io.vecstout.valid
1605        vsMergeBuffer(i).io.fromPipeline.head.bits  := storeUnits(i).io.vecstout.bits
1606      } .otherwise {
1607        vsMergeBuffer(i).io.fromPipeline.head.valid   := storeMisalignBuffer.io.vecWriteBack(i).valid
1608        vsMergeBuffer(i).io.fromPipeline.head.bits    := storeMisalignBuffer.io.vecWriteBack(i).bits
1609      }
1610    }
1611  }
1612
1613  (0 until VlduCnt).foreach{i=>
1614    io.ooo_to_mem.issueVldu(i).ready := vLoadCanAccept(i) || vStoreCanAccept(i)
1615  }
1616
1617  vlMergeBuffer.io.redirect <> redirect
1618  vsMergeBuffer.map(_.io.redirect <> redirect)
1619  (0 until VlduCnt).foreach{i=>
1620    vlMergeBuffer.io.toLsq(i) <> lsq.io.ldvecFeedback(i)
1621  }
1622  (0 until VstuCnt).foreach{i=>
1623    vsMergeBuffer(i).io.toLsq.head <> lsq.io.stvecFeedback(i)
1624  }
1625
1626  (0 until VlduCnt).foreach{i=>
1627    // send to RS
1628    vlMergeBuffer.io.feedback(i) <> io.mem_to_ooo.vlduIqFeedback(i).feedbackSlow
1629    io.mem_to_ooo.vlduIqFeedback(i).feedbackFast := DontCare
1630  }
1631  (0 until VstuCnt).foreach{i =>
1632    // send to RS
1633    if (i == 0){
1634      io.mem_to_ooo.vstuIqFeedback(i).feedbackSlow.valid := vsMergeBuffer(i).io.feedback.head.valid || vSegmentUnit.io.feedback.valid
1635      io.mem_to_ooo.vstuIqFeedback(i).feedbackSlow.bits := Mux1H(Seq(
1636        vSegmentUnit.io.feedback.valid -> vSegmentUnit.io.feedback.bits,
1637        vsMergeBuffer(i).io.feedback.head.valid ->  vsMergeBuffer(i).io.feedback.head.bits
1638      ))
1639      io.mem_to_ooo.vstuIqFeedback(i).feedbackFast := DontCare
1640    } else {
1641      vsMergeBuffer(i).io.feedback.head <> io.mem_to_ooo.vstuIqFeedback(i).feedbackSlow
1642      io.mem_to_ooo.vstuIqFeedback(i).feedbackFast := DontCare
1643    }
1644  }
1645
1646  (0 until VlduCnt).foreach{i=>
1647    if (i == 0){ // for segmentUnit, segmentUnit use port0 writeback
1648      io.mem_to_ooo.writebackVldu(i).valid := vlMergeBuffer.io.uopWriteback(i).valid || vsMergeBuffer(i).io.uopWriteback.head.valid || vSegmentUnit.io.uopwriteback.valid
1649      io.mem_to_ooo.writebackVldu(i).bits := PriorityMux(Seq(
1650        vSegmentUnit.io.uopwriteback.valid          -> vSegmentUnit.io.uopwriteback.bits,
1651        vlMergeBuffer.io.uopWriteback(i).valid      -> vlMergeBuffer.io.uopWriteback(i).bits,
1652        vsMergeBuffer(i).io.uopWriteback.head.valid -> vsMergeBuffer(i).io.uopWriteback.head.bits,
1653      ))
1654      vlMergeBuffer.io.uopWriteback(i).ready := io.mem_to_ooo.writebackVldu(i).ready && !vSegmentUnit.io.uopwriteback.valid
1655      vsMergeBuffer(i).io.uopWriteback.head.ready := io.mem_to_ooo.writebackVldu(i).ready && !vlMergeBuffer.io.uopWriteback(i).valid && !vSegmentUnit.io.uopwriteback.valid
1656      vSegmentUnit.io.uopwriteback.ready := io.mem_to_ooo.writebackVldu(i).ready
1657    } else if (i == 1) {
1658      io.mem_to_ooo.writebackVldu(i).valid := vlMergeBuffer.io.uopWriteback(i).valid || vsMergeBuffer(i).io.uopWriteback.head.valid || vfofBuffer.io.uopWriteback.valid
1659      io.mem_to_ooo.writebackVldu(i).bits := PriorityMux(Seq(
1660        vfofBuffer.io.uopWriteback.valid            -> vfofBuffer.io.uopWriteback.bits,
1661        vlMergeBuffer.io.uopWriteback(i).valid      -> vlMergeBuffer.io.uopWriteback(i).bits,
1662        vsMergeBuffer(i).io.uopWriteback.head.valid -> vsMergeBuffer(i).io.uopWriteback.head.bits,
1663      ))
1664      vlMergeBuffer.io.uopWriteback(i).ready := io.mem_to_ooo.writebackVldu(i).ready && !vfofBuffer.io.uopWriteback.valid
1665      vsMergeBuffer(i).io.uopWriteback.head.ready := io.mem_to_ooo.writebackVldu(i).ready && !vlMergeBuffer.io.uopWriteback(i).valid && !vfofBuffer.io.uopWriteback.valid
1666      vfofBuffer.io.uopWriteback.ready := io.mem_to_ooo.writebackVldu(i).ready
1667    } else {
1668      io.mem_to_ooo.writebackVldu(i).valid := vlMergeBuffer.io.uopWriteback(i).valid || vsMergeBuffer(i).io.uopWriteback.head.valid
1669      io.mem_to_ooo.writebackVldu(i).bits := PriorityMux(Seq(
1670        vlMergeBuffer.io.uopWriteback(i).valid -> vlMergeBuffer.io.uopWriteback(i).bits,
1671        vsMergeBuffer(i).io.uopWriteback.head.valid -> vsMergeBuffer(i).io.uopWriteback.head.bits,
1672      ))
1673      vlMergeBuffer.io.uopWriteback(i).ready := io.mem_to_ooo.writebackVldu(i).ready
1674      vsMergeBuffer(i).io.uopWriteback.head.ready := io.mem_to_ooo.writebackVldu(i).ready && !vlMergeBuffer.io.uopWriteback(i).valid
1675    }
1676
1677    vfofBuffer.io.mergeUopWriteback(i).valid := vlMergeBuffer.io.uopWriteback(i).valid
1678    vfofBuffer.io.mergeUopWriteback(i).bits  := vlMergeBuffer.io.uopWriteback(i).bits
1679  }
1680
1681
1682  vfofBuffer.io.redirect <> redirect
1683
1684  // Sbuffer
1685  sbuffer.io.csrCtrl    <> csrCtrl
1686  sbuffer.io.dcache     <> dcache.io.lsu.store
1687  sbuffer.io.memSetPattenDetected := dcache.io.memSetPattenDetected
1688  sbuffer.io.force_write <> lsq.io.force_write
1689  // flush sbuffer
1690  val cmoFlush = lsq.io.flushSbuffer.valid
1691  val fenceFlush = io.ooo_to_mem.flushSb
1692  val atomicsFlush = atomicsUnit.io.flush_sbuffer.valid || vSegmentUnit.io.flush_sbuffer.valid
1693  val stIsEmpty = sbuffer.io.flush.empty && uncache.io.flush.empty
1694  io.mem_to_ooo.sbIsEmpty := RegNext(stIsEmpty)
1695
1696  // if both of them tries to flush sbuffer at the same time
1697  // something must have gone wrong
1698  assert(!(fenceFlush && atomicsFlush && cmoFlush))
1699  sbuffer.io.flush.valid := RegNext(fenceFlush || atomicsFlush || cmoFlush)
1700  uncache.io.flush.valid := sbuffer.io.flush.valid
1701
1702  // AtomicsUnit: AtomicsUnit will override other control signials,
1703  // as atomics insts (LR/SC/AMO) will block the pipeline
1704  val s_normal +: s_atomics = Enum(StaCnt + HyuCnt + 1)
1705  val state = RegInit(s_normal)
1706
1707  val st_atomics = Seq.tabulate(StaCnt)(i =>
1708    io.ooo_to_mem.issueSta(i).valid && FuType.storeIsAMO((io.ooo_to_mem.issueSta(i).bits.uop.fuType))
1709  ) ++ Seq.tabulate(HyuCnt)(i =>
1710    io.ooo_to_mem.issueHya(i).valid && FuType.storeIsAMO((io.ooo_to_mem.issueHya(i).bits.uop.fuType))
1711  )
1712
1713  for (i <- 0 until StaCnt) when(st_atomics(i)) {
1714    io.ooo_to_mem.issueSta(i).ready := atomicsUnit.io.in.ready
1715    storeUnits(i).io.stin.valid := false.B
1716
1717    state := s_atomics(i)
1718  }
1719  for (i <- 0 until HyuCnt) when(st_atomics(StaCnt + i)) {
1720    io.ooo_to_mem.issueHya(i).ready := atomicsUnit.io.in.ready
1721    hybridUnits(i).io.lsin.valid := false.B
1722
1723    state := s_atomics(StaCnt + i)
1724    assert(!st_atomics.zipWithIndex.filterNot(_._2 == StaCnt + i).unzip._1.reduce(_ || _))
1725  }
1726  when (atomicsUnit.io.out.valid) {
1727    state := s_normal
1728  }
1729
1730  atomicsUnit.io.in.valid := st_atomics.reduce(_ || _)
1731  atomicsUnit.io.in.bits  := Mux1H(Seq.tabulate(StaCnt)(i =>
1732    st_atomics(i) -> io.ooo_to_mem.issueSta(i).bits) ++
1733    Seq.tabulate(HyuCnt)(i => st_atomics(StaCnt+i) -> io.ooo_to_mem.issueHya(i).bits))
1734  atomicsUnit.io.storeDataIn.zipWithIndex.foreach { case (stdin, i) =>
1735    stdin.valid := st_data_atomics(i)
1736    stdin.bits := stData(i).bits
1737  }
1738  atomicsUnit.io.redirect <> redirect
1739
1740  // TODO: complete amo's pmp support
1741  val amoTlb = dtlb_ld(0).requestor(0)
1742  atomicsUnit.io.dtlb.resp.valid := false.B
1743  atomicsUnit.io.dtlb.resp.bits  := DontCare
1744  atomicsUnit.io.dtlb.req.ready  := amoTlb.req.ready
1745  atomicsUnit.io.pmpResp := pmp_check(0).resp
1746
1747  atomicsUnit.io.dcache <> dcache.io.lsu.atomics
1748  atomicsUnit.io.flush_sbuffer.empty := stIsEmpty
1749
1750  atomicsUnit.io.csrCtrl := csrCtrl
1751
1752  // for atomicsUnit, it uses loadUnit(0)'s TLB port
1753
1754  when (state =/= s_normal) {
1755    // use store wb port instead of load
1756    loadUnits(0).io.ldout.ready := false.B
1757    // use load_0's TLB
1758    atomicsUnit.io.dtlb <> amoTlb
1759
1760    // hw prefetch should be disabled while executing atomic insts
1761    loadUnits.map(i => i.io.prefetch_req.valid := false.B)
1762
1763    // make sure there's no in-flight uops in load unit
1764    assert(!loadUnits(0).io.ldout.valid)
1765  }
1766
1767  lsq.io.flushSbuffer.empty := sbuffer.io.sbempty
1768
1769  for (i <- 0 until StaCnt) {
1770    when (state === s_atomics(i)) {
1771      io.mem_to_ooo.staIqFeedback(i).feedbackSlow := atomicsUnit.io.feedbackSlow
1772      assert(!storeUnits(i).io.feedback_slow.valid)
1773    }
1774  }
1775  for (i <- 0 until HyuCnt) {
1776    when (state === s_atomics(StaCnt + i)) {
1777      io.mem_to_ooo.hyuIqFeedback(i).feedbackSlow := atomicsUnit.io.feedbackSlow
1778      assert(!hybridUnits(i).io.feedback_slow.valid)
1779    }
1780  }
1781
1782  lsq.io.exceptionAddr.isStore := io.ooo_to_mem.isStoreException
1783  // Exception address is used several cycles after flush.
1784  // We delay it by 10 cycles to ensure its flush safety.
1785  val atomicsException = RegInit(false.B)
1786  when (DelayN(redirect.valid, 10) && atomicsException) {
1787    atomicsException := false.B
1788  }.elsewhen (atomicsUnit.io.exceptionInfo.valid) {
1789    atomicsException := true.B
1790  }
1791
1792  val misalignBufExceptionOverwrite = loadMisalignBuffer.io.overwriteExpBuf.valid || storeMisalignBuffer.io.overwriteExpBuf.valid
1793  val misalignBufExceptionVaddr = Mux(loadMisalignBuffer.io.overwriteExpBuf.valid,
1794    loadMisalignBuffer.io.overwriteExpBuf.vaddr,
1795    storeMisalignBuffer.io.overwriteExpBuf.vaddr
1796  )
1797  val misalignBufExceptionIsHyper = Mux(loadMisalignBuffer.io.overwriteExpBuf.valid,
1798    loadMisalignBuffer.io.overwriteExpBuf.isHyper,
1799    storeMisalignBuffer.io.overwriteExpBuf.isHyper
1800  )
1801  val misalignBufExceptionGpaddr = Mux(loadMisalignBuffer.io.overwriteExpBuf.valid,
1802    loadMisalignBuffer.io.overwriteExpBuf.gpaddr,
1803    storeMisalignBuffer.io.overwriteExpBuf.gpaddr
1804  )
1805  val misalignBufExceptionIsForVSnonLeafPTE = Mux(loadMisalignBuffer.io.overwriteExpBuf.valid,
1806    loadMisalignBuffer.io.overwriteExpBuf.isForVSnonLeafPTE,
1807    storeMisalignBuffer.io.overwriteExpBuf.isForVSnonLeafPTE
1808  )
1809
1810  val vSegmentException = RegInit(false.B)
1811  when (DelayN(redirect.valid, 10) && vSegmentException) {
1812    vSegmentException := false.B
1813  }.elsewhen (vSegmentUnit.io.exceptionInfo.valid) {
1814    vSegmentException := true.B
1815  }
1816  val atomicsExceptionAddress = RegEnable(atomicsUnit.io.exceptionInfo.bits.vaddr, atomicsUnit.io.exceptionInfo.valid)
1817  val vSegmentExceptionVstart = RegEnable(vSegmentUnit.io.exceptionInfo.bits.vstart, vSegmentUnit.io.exceptionInfo.valid)
1818  val vSegmentExceptionVl     = RegEnable(vSegmentUnit.io.exceptionInfo.bits.vl, vSegmentUnit.io.exceptionInfo.valid)
1819  val vSegmentExceptionAddress = RegEnable(vSegmentUnit.io.exceptionInfo.bits.vaddr, vSegmentUnit.io.exceptionInfo.valid)
1820  val atomicsExceptionGPAddress = RegEnable(atomicsUnit.io.exceptionInfo.bits.gpaddr, atomicsUnit.io.exceptionInfo.valid)
1821  val vSegmentExceptionGPAddress = RegEnable(vSegmentUnit.io.exceptionInfo.bits.gpaddr, vSegmentUnit.io.exceptionInfo.valid)
1822  val atomicsExceptionIsForVSnonLeafPTE = RegEnable(atomicsUnit.io.exceptionInfo.bits.isForVSnonLeafPTE, atomicsUnit.io.exceptionInfo.valid)
1823  val vSegmentExceptionIsForVSnonLeafPTE = RegEnable(vSegmentUnit.io.exceptionInfo.bits.isForVSnonLeafPTE, vSegmentUnit.io.exceptionInfo.valid)
1824
1825  val exceptionVaddr = Mux(
1826    atomicsException,
1827    atomicsExceptionAddress,
1828    Mux(misalignBufExceptionOverwrite,
1829      misalignBufExceptionVaddr,
1830      Mux(vSegmentException,
1831        vSegmentExceptionAddress,
1832        lsq.io.exceptionAddr.vaddr
1833      )
1834    )
1835  )
1836  // whether vaddr need ext or is hyper inst:
1837  // VaNeedExt: atomicsException -> false; misalignBufExceptionOverwrite -> true; vSegmentException -> false
1838  // IsHyper: atomicsException -> false; vSegmentException -> false
1839  val exceptionVaNeedExt = !atomicsException &&
1840    (misalignBufExceptionOverwrite ||
1841      (!vSegmentException && lsq.io.exceptionAddr.vaNeedExt))
1842  val exceptionIsHyper = !atomicsException &&
1843    (misalignBufExceptionOverwrite && misalignBufExceptionIsHyper ||
1844      (!vSegmentException && lsq.io.exceptionAddr.isHyper && !misalignBufExceptionOverwrite))
1845
1846  def GenExceptionVa(
1847    mode: UInt, isVirt: Bool, vaNeedExt: Bool,
1848    satp: TlbSatpBundle, vsatp: TlbSatpBundle, hgatp: TlbHgatpBundle,
1849    vaddr: UInt
1850  ) = {
1851    require(VAddrBits >= 50)
1852
1853    val satpNone = satp.mode === 0.U
1854    val satpSv39 = satp.mode === 8.U
1855    val satpSv48 = satp.mode === 9.U
1856
1857    val vsatpNone = vsatp.mode === 0.U
1858    val vsatpSv39 = vsatp.mode === 8.U
1859    val vsatpSv48 = vsatp.mode === 9.U
1860
1861    val hgatpNone = hgatp.mode === 0.U
1862    val hgatpSv39x4 = hgatp.mode === 8.U
1863    val hgatpSv48x4 = hgatp.mode === 9.U
1864
1865    // For !isVirt, mode check is necessary, as we don't want virtual memory in M-mode.
1866    // For isVirt, mode check is unnecessary, as virt won't be 1 in M-mode.
1867    // Also, isVirt includes Hyper Insts, which don't care mode either.
1868
1869    val useBareAddr =
1870      (isVirt && vsatpNone && hgatpNone) ||
1871      (!isVirt && (mode === CSRConst.ModeM)) ||
1872      (!isVirt && (mode =/= CSRConst.ModeM) && satpNone)
1873    val useSv39Addr =
1874      (isVirt && vsatpSv39) ||
1875      (!isVirt && (mode =/= CSRConst.ModeM) && satpSv39)
1876    val useSv48Addr =
1877      (isVirt && vsatpSv48) ||
1878      (!isVirt && (mode =/= CSRConst.ModeM) && satpSv48)
1879    val useSv39x4Addr = isVirt && vsatpNone && hgatpSv39x4
1880    val useSv48x4Addr = isVirt && vsatpNone && hgatpSv48x4
1881
1882    val bareAddr   = ZeroExt(vaddr(PAddrBits - 1, 0), XLEN)
1883    val sv39Addr   = SignExt(vaddr.take(39), XLEN)
1884    val sv39x4Addr = ZeroExt(vaddr.take(39 + 2), XLEN)
1885    val sv48Addr   = SignExt(vaddr.take(48), XLEN)
1886    val sv48x4Addr = ZeroExt(vaddr.take(48 + 2), XLEN)
1887
1888    val ExceptionVa = Wire(UInt(XLEN.W))
1889    when (vaNeedExt) {
1890      ExceptionVa := Mux1H(Seq(
1891        (useBareAddr)   -> bareAddr,
1892        (useSv39Addr)   -> sv39Addr,
1893        (useSv48Addr)   -> sv48Addr,
1894        (useSv39x4Addr) -> sv39x4Addr,
1895        (useSv48x4Addr) -> sv48x4Addr,
1896      ))
1897    } .otherwise {
1898      ExceptionVa := vaddr
1899    }
1900
1901    ExceptionVa
1902  }
1903
1904  io.mem_to_ooo.lsqio.vaddr := RegNext(
1905    GenExceptionVa(tlbcsr.priv.dmode, tlbcsr.priv.virt || exceptionIsHyper, exceptionVaNeedExt,
1906    tlbcsr.satp, tlbcsr.vsatp, tlbcsr.hgatp, exceptionVaddr)
1907  )
1908
1909  // vsegment instruction is executed atomic, which mean atomicsException and vSegmentException should not raise at the same time.
1910  XSError(atomicsException && vSegmentException, "atomicsException and vSegmentException raise at the same time!")
1911  io.mem_to_ooo.lsqio.vstart := RegNext(Mux(vSegmentException,
1912                                            vSegmentExceptionVstart,
1913                                            lsq.io.exceptionAddr.vstart)
1914  )
1915  io.mem_to_ooo.lsqio.vl     := RegNext(Mux(vSegmentException,
1916                                            vSegmentExceptionVl,
1917                                            lsq.io.exceptionAddr.vl)
1918  )
1919
1920  XSError(atomicsException && atomicsUnit.io.in.valid, "new instruction before exception triggers\n")
1921  io.mem_to_ooo.lsqio.gpaddr := RegNext(Mux(
1922    atomicsException,
1923    atomicsExceptionGPAddress,
1924    Mux(misalignBufExceptionOverwrite,
1925      misalignBufExceptionGpaddr,
1926      Mux(vSegmentException,
1927        vSegmentExceptionGPAddress,
1928        lsq.io.exceptionAddr.gpaddr
1929      )
1930    )
1931  ))
1932  io.mem_to_ooo.lsqio.isForVSnonLeafPTE := RegNext(Mux(
1933    atomicsException,
1934    atomicsExceptionIsForVSnonLeafPTE,
1935    Mux(misalignBufExceptionOverwrite,
1936      misalignBufExceptionIsForVSnonLeafPTE,
1937      Mux(vSegmentException,
1938        vSegmentExceptionIsForVSnonLeafPTE,
1939        lsq.io.exceptionAddr.isForVSnonLeafPTE
1940      )
1941    )
1942  ))
1943  io.mem_to_ooo.topToBackendBypass match { case x =>
1944    x.hartId            := io.hartId
1945    x.l2FlushDone       := RegNext(io.l2_flush_done)
1946    x.externalInterrupt.msip  := outer.clint_int_sink.in.head._1(0)
1947    x.externalInterrupt.mtip  := outer.clint_int_sink.in.head._1(1)
1948    x.externalInterrupt.meip  := outer.plic_int_sink.in.head._1(0)
1949    x.externalInterrupt.seip  := outer.plic_int_sink.in.last._1(0)
1950    x.externalInterrupt.debug := outer.debug_int_sink.in.head._1(0)
1951    x.externalInterrupt.nmi.nmi_31 := outer.nmi_int_sink.in.head._1(0) | outer.beu_local_int_sink.in.head._1(0)
1952    x.externalInterrupt.nmi.nmi_43 := outer.nmi_int_sink.in.head._1(1)
1953    x.msiInfo           := DelayNWithValid(io.fromTopToBackend.msiInfo, 1)
1954    x.clintTime         := DelayNWithValid(io.fromTopToBackend.clintTime, 1)
1955  }
1956
1957  io.memInfo.sqFull := RegNext(lsq.io.sqFull)
1958  io.memInfo.lqFull := RegNext(lsq.io.lqFull)
1959  io.memInfo.dcacheMSHRFull := RegNext(dcache.io.mshrFull)
1960
1961  io.inner_hartId := io.hartId
1962  io.inner_reset_vector := RegNext(io.outer_reset_vector)
1963  io.outer_cpu_halt := io.ooo_to_mem.backendToTopBypass.cpuHalted
1964  io.outer_l2_flush_en := io.ooo_to_mem.csrCtrl.flush_l2_enable
1965  io.outer_power_down_en := io.ooo_to_mem.csrCtrl.power_down_enable
1966  io.outer_cpu_critical_error := io.ooo_to_mem.backendToTopBypass.cpuCriticalError
1967  io.outer_msi_ack := io.ooo_to_mem.backendToTopBypass.msiAck
1968  io.outer_beu_errors_icache := RegNext(io.inner_beu_errors_icache)
1969  io.inner_hc_perfEvents <> RegNext(io.outer_hc_perfEvents)
1970
1971  // vector segmentUnit
1972  vSegmentUnit.io.in.bits <> io.ooo_to_mem.issueVldu.head.bits
1973  vSegmentUnit.io.in.valid := isSegment && io.ooo_to_mem.issueVldu.head.valid// is segment instruction
1974  vSegmentUnit.io.dtlb.resp.bits <> dtlb_reqs.take(LduCnt).head.resp.bits
1975  vSegmentUnit.io.dtlb.resp.valid <> dtlb_reqs.take(LduCnt).head.resp.valid
1976  vSegmentUnit.io.pmpResp <> pmp_check.head.resp
1977  vSegmentUnit.io.flush_sbuffer.empty := stIsEmpty
1978  vSegmentUnit.io.redirect <> redirect
1979  vSegmentUnit.io.rdcache.resp.bits := dcache.io.lsu.load(0).resp.bits
1980  vSegmentUnit.io.rdcache.resp.valid := dcache.io.lsu.load(0).resp.valid
1981  vSegmentUnit.io.rdcache.s2_bank_conflict := dcache.io.lsu.load(0).s2_bank_conflict
1982  // -------------------------
1983  // Vector Segment Triggers
1984  // -------------------------
1985  vSegmentUnit.io.fromCsrTrigger.tdataVec := tdata
1986  vSegmentUnit.io.fromCsrTrigger.tEnableVec := tEnable
1987  vSegmentUnit.io.fromCsrTrigger.triggerCanRaiseBpExp := triggerCanRaiseBpExp
1988  vSegmentUnit.io.fromCsrTrigger.debugMode := debugMode
1989
1990  // reset tree of MemBlock
1991  if (p(DebugOptionsKey).ResetGen) {
1992    val leftResetTree = ResetGenNode(
1993      Seq(
1994        ModuleNode(ptw),
1995        ModuleNode(ptw_to_l2_buffer),
1996        ModuleNode(lsq),
1997        ModuleNode(dtlb_st_tlb_st),
1998        ModuleNode(dtlb_prefetch_tlb_prefetch),
1999        ModuleNode(pmp)
2000      )
2001      ++ pmp_checkers.map(ModuleNode(_))
2002      ++ (if (prefetcherOpt.isDefined) Seq(ModuleNode(prefetcherOpt.get)) else Nil)
2003      ++ (if (l1PrefetcherOpt.isDefined) Seq(ModuleNode(l1PrefetcherOpt.get)) else Nil)
2004    )
2005    val rightResetTree = ResetGenNode(
2006      Seq(
2007        ModuleNode(sbuffer),
2008        ModuleNode(dtlb_ld_tlb_ld),
2009        ModuleNode(dcache),
2010        ModuleNode(l1d_to_l2_buffer),
2011        CellNode(io.reset_backend)
2012      )
2013    )
2014    ResetGen(leftResetTree, reset, sim = false, io.dft_reset)
2015    ResetGen(rightResetTree, reset, sim = false, io.dft_reset)
2016  } else {
2017    io.reset_backend := DontCare
2018  }
2019  io.resetInFrontendBypass.toL2Top := io.resetInFrontendBypass.fromFrontend
2020  // trace interface
2021  val traceToL2Top = io.traceCoreInterfaceBypass.toL2Top
2022  val traceFromBackend = io.traceCoreInterfaceBypass.fromBackend
2023  traceFromBackend.fromEncoder := RegNext(traceToL2Top.fromEncoder)
2024  traceToL2Top.toEncoder.trap  := RegEnable(
2025    traceFromBackend.toEncoder.trap,
2026    traceFromBackend.toEncoder.groups(0).valid && Itype.isTrap(traceFromBackend.toEncoder.groups(0).bits.itype)
2027  )
2028  traceToL2Top.toEncoder.priv := RegEnable(
2029    traceFromBackend.toEncoder.priv,
2030    traceFromBackend.toEncoder.groups(0).valid
2031  )
2032  (0 until TraceGroupNum).foreach { i =>
2033    traceToL2Top.toEncoder.groups(i).valid := RegNext(traceFromBackend.toEncoder.groups(i).valid)
2034    traceToL2Top.toEncoder.groups(i).bits.iretire := RegNext(traceFromBackend.toEncoder.groups(i).bits.iretire)
2035    traceToL2Top.toEncoder.groups(i).bits.itype := RegNext(traceFromBackend.toEncoder.groups(i).bits.itype)
2036    traceToL2Top.toEncoder.groups(i).bits.ilastsize := RegEnable(
2037      traceFromBackend.toEncoder.groups(i).bits.ilastsize,
2038      traceFromBackend.toEncoder.groups(i).valid
2039    )
2040    traceToL2Top.toEncoder.groups(i).bits.iaddr := RegEnable(
2041      traceFromBackend.toEncoder.groups(i).bits.iaddr,
2042      traceFromBackend.toEncoder.groups(i).valid
2043    ) + (RegEnable(
2044      traceFromBackend.toEncoder.groups(i).bits.ftqOffset.getOrElse(0.U),
2045      traceFromBackend.toEncoder.groups(i).valid
2046    ) << instOffsetBits)
2047  }
2048
2049
2050  io.mem_to_ooo.storeDebugInfo := DontCare
2051  // store event difftest information
2052  if (env.EnableDifftest) {
2053    (0 until EnsbufferWidth).foreach{i =>
2054        io.mem_to_ooo.storeDebugInfo(i).robidx := sbuffer.io.vecDifftestInfo(i).bits.robIdx
2055        sbuffer.io.vecDifftestInfo(i).bits.pc := io.mem_to_ooo.storeDebugInfo(i).pc
2056    }
2057  }
2058
2059  // top-down info
2060  dcache.io.debugTopDown.robHeadVaddr := io.debugTopDown.robHeadVaddr
2061  dtlbRepeater.io.debugTopDown.robHeadVaddr := io.debugTopDown.robHeadVaddr
2062  lsq.io.debugTopDown.robHeadVaddr := io.debugTopDown.robHeadVaddr
2063  io.debugTopDown.toCore.robHeadMissInDCache := dcache.io.debugTopDown.robHeadMissInDCache
2064  io.debugTopDown.toCore.robHeadTlbReplay := lsq.io.debugTopDown.robHeadTlbReplay
2065  io.debugTopDown.toCore.robHeadTlbMiss := lsq.io.debugTopDown.robHeadTlbMiss
2066  io.debugTopDown.toCore.robHeadLoadVio := lsq.io.debugTopDown.robHeadLoadVio
2067  io.debugTopDown.toCore.robHeadLoadMSHR := lsq.io.debugTopDown.robHeadLoadMSHR
2068  dcache.io.debugTopDown.robHeadOtherReplay := lsq.io.debugTopDown.robHeadOtherReplay
2069  dcache.io.debugRolling := io.debugRolling
2070
2071  lsq.io.noUopsIssued := io.topDownInfo.toBackend.noUopsIssued
2072  io.topDownInfo.toBackend.lqEmpty := lsq.io.lqEmpty
2073  io.topDownInfo.toBackend.sqEmpty := lsq.io.sqEmpty
2074  io.topDownInfo.toBackend.l1Miss := dcache.io.l1Miss
2075  io.topDownInfo.toBackend.l2TopMiss.l2Miss := RegNext(io.topDownInfo.fromL2Top.l2Miss)
2076  io.topDownInfo.toBackend.l2TopMiss.l3Miss := RegNext(io.topDownInfo.fromL2Top.l3Miss)
2077
2078  val hyLdDeqCount = PopCount(io.ooo_to_mem.issueHya.map(x => x.valid && FuType.isLoad(x.bits.uop.fuType)))
2079  val hyStDeqCount = PopCount(io.ooo_to_mem.issueHya.map(x => x.valid && FuType.isStore(x.bits.uop.fuType)))
2080  val ldDeqCount = PopCount(io.ooo_to_mem.issueLda.map(_.valid)) +& hyLdDeqCount
2081  val stDeqCount = PopCount(io.ooo_to_mem.issueSta.take(StaCnt).map(_.valid)) +& hyStDeqCount
2082  val iqDeqCount = ldDeqCount +& stDeqCount
2083  XSPerfAccumulate("load_iq_deq_count", ldDeqCount)
2084  XSPerfHistogram("load_iq_deq_count", ldDeqCount, true.B, 0, LdExuCnt + 1)
2085  XSPerfAccumulate("store_iq_deq_count", stDeqCount)
2086  XSPerfHistogram("store_iq_deq_count", stDeqCount, true.B, 0, StAddrCnt + 1)
2087  XSPerfAccumulate("ls_iq_deq_count", iqDeqCount)
2088
2089  val pfevent = Module(new PFEvent)
2090  pfevent.io.distribute_csr := csrCtrl.distribute_csr
2091  val csrevents = pfevent.io.hpmevent.slice(16,24)
2092
2093  val perfFromUnits = (loadUnits ++ Seq(sbuffer, lsq, dcache)).flatMap(_.getPerfEvents)
2094  val perfFromPTW = perfEventsPTW.map(x => ("PTW_" + x._1, x._2))
2095  val perfBlock     = Seq(("ldDeqCount", ldDeqCount),
2096                          ("stDeqCount", stDeqCount))
2097  // let index = 0 be no event
2098  val allPerfEvents = Seq(("noEvent", 0.U)) ++ perfFromUnits ++ perfFromPTW ++ perfBlock
2099
2100  if (printEventCoding) {
2101    for (((name, inc), i) <- allPerfEvents.zipWithIndex) {
2102      println("MemBlock perfEvents Set", name, inc, i)
2103    }
2104  }
2105
2106  val allPerfInc = allPerfEvents.map(_._2.asTypeOf(new PerfEvent))
2107  val perfEvents = HPerfMonitor(csrevents, allPerfInc).getPerfEvents
2108  generatePerfEvent()
2109
2110  private val mbistPl = MbistPipeline.PlaceMbistPipeline(Int.MaxValue, "MbistPipeMemBlk", hasMbist)
2111  private val mbistIntf = if(hasMbist) {
2112    val params = mbistPl.get.nodeParams
2113    val intf = Some(Module(new MbistInterface(
2114      params = Seq(params),
2115      ids = Seq(mbistPl.get.childrenIds),
2116      name = s"MbistIntfMemBlk",
2117      pipelineNum = 1
2118    )))
2119    intf.get.toPipeline.head <> mbistPl.get.mbist
2120    mbistPl.get.registerCSV(intf.get.info, "MbistMemBlk")
2121    intf.get.mbist := DontCare
2122    dontTouch(intf.get.mbist)
2123    //TODO: add mbist controller connections here
2124    intf
2125  } else {
2126    None
2127  }
2128  private val sigFromSrams = if (hasMbist) Some(SramHelper.genBroadCastBundleTop()) else None
2129  private val cg = ClockGate.genTeSrc
2130  dontTouch(cg)
2131  if (hasMbist) {
2132    sigFromSrams.get := io.dft.get
2133    cg.cgen := io.dft.get.cgen
2134    io.dft_frnt.get := io.dft.get
2135    io.dft_reset_frnt.get := io.dft_reset.get
2136    io.dft_bcknd.get := io.dft.get
2137    io.dft_reset_bcknd.get := io.dft_reset.get
2138  } else {
2139    cg.cgen := false.B
2140  }
2141}
2142
2143class MemBlock()(implicit p: Parameters) extends LazyModule
2144  with HasXSParameter {
2145  override def shouldBeInlined: Boolean = false
2146
2147  val inner = LazyModule(new MemBlockInlined())
2148
2149  lazy val module = new MemBlockImp(this)
2150}
2151
2152class MemBlockImp(wrapper: MemBlock) extends LazyModuleImp(wrapper) {
2153  val io = IO(wrapper.inner.module.io.cloneType)
2154  val io_perf = IO(wrapper.inner.module.io_perf.cloneType)
2155  io <> wrapper.inner.module.io
2156  io_perf <> wrapper.inner.module.io_perf
2157
2158  if (p(DebugOptionsKey).ResetGen) {
2159    ResetGen(ResetGenNode(Seq(ModuleNode(wrapper.inner.module))), reset, sim = false, io.dft_reset)
2160  }
2161}
2162