xref: /XiangShan/src/main/scala/xiangshan/mem/MemBlock.scala (revision 05cc6da9631ca9da7abb3d03ae37e832778a8a0e)
1/***************************************************************************************
2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3* Copyright (c) 2020-2021 Peng Cheng Laboratory
4*
5* XiangShan is licensed under Mulan PSL v2.
6* You can use this software according to the terms and conditions of the Mulan PSL v2.
7* You may obtain a copy of Mulan PSL v2 at:
8*          http://license.coscl.org.cn/MulanPSL2
9*
10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13*
14* See the Mulan PSL v2 for more details.
15***************************************************************************************/
16
17package xiangshan.mem
18
19import org.chipsalliance.cde.config.Parameters
20import chisel3._
21import chisel3.util._
22import freechips.rocketchip.diplomacy._
23import freechips.rocketchip.diplomacy.{BundleBridgeSource, LazyModule, LazyModuleImp}
24import freechips.rocketchip.interrupts.{IntSinkNode, IntSinkPortSimple}
25import freechips.rocketchip.tile.HasFPUParameters
26import freechips.rocketchip.tilelink._
27import utils._
28import utility._
29import utility.mbist.{MbistInterface, MbistPipeline}
30import utility.sram.{SramMbistBundle, SramBroadcastBundle, SramHelper}
31import system.{HasSoCParameter, SoCParamsKey}
32import xiangshan._
33import xiangshan.ExceptionNO._
34import xiangshan.frontend.HasInstrMMIOConst
35import xiangshan.backend.Bundles.{DynInst, MemExuInput, MemExuOutput}
36import xiangshan.backend.ctrlblock.{DebugLSIO, LsTopdownInfo}
37import xiangshan.backend.exu.MemExeUnit
38import xiangshan.backend.fu._
39import xiangshan.backend.fu.FuType._
40import xiangshan.backend.fu.NewCSR.{CsrTriggerBundle, TriggerUtil, PFEvent}
41import xiangshan.backend.fu.util.{CSRConst, SdtrigExt}
42import xiangshan.backend.{BackendToTopBundle, TopToBackendBundle}
43import xiangshan.backend.rob.{RobDebugRollingIO, RobPtr, RobLsqIO}
44import xiangshan.backend.datapath.NewPipelineConnect
45import xiangshan.backend.trace.{Itype, TraceCoreInterface}
46import xiangshan.backend.Bundles._
47import xiangshan.mem._
48import xiangshan.mem.mdp._
49import xiangshan.mem.Bundles._
50import xiangshan.mem.prefetch.{BasePrefecher, L1Prefetcher, SMSParams, SMSPrefetcher}
51import xiangshan.cache._
52import xiangshan.cache.mmu._
53import coupledL2.PrefetchRecv
54import utility.mbist.{MbistInterface, MbistPipeline}
55import utility.sram.{SramBroadcastBundle, SramHelper}
56
57trait HasMemBlockParameters extends HasXSParameter {
58  // number of memory units
59  val LduCnt  = backendParams.LduCnt
60  val StaCnt  = backendParams.StaCnt
61  val StdCnt  = backendParams.StdCnt
62  val HyuCnt  = backendParams.HyuCnt
63  val VlduCnt = backendParams.VlduCnt
64  val VstuCnt = backendParams.VstuCnt
65
66  val LdExuCnt  = LduCnt + HyuCnt
67  val StAddrCnt = StaCnt + HyuCnt
68  val StDataCnt = StdCnt
69  val MemExuCnt = LduCnt + HyuCnt + StaCnt + StdCnt
70  val MemAddrExtCnt = LdExuCnt + StaCnt
71  val MemVExuCnt = VlduCnt + VstuCnt
72
73  val AtomicWBPort   = 0
74  val MisalignWBPort = 1
75  val UncacheWBPort  = 2
76  val NCWBPorts = Seq(1, 2)
77}
78
79abstract class MemBlockBundle(implicit val p: Parameters) extends Bundle with HasMemBlockParameters
80
81class Std(cfg: FuConfig)(implicit p: Parameters) extends FuncUnit(cfg) {
82  io.in.ready := io.out.ready
83  io.out.valid := io.in.valid
84  io.out.bits := 0.U.asTypeOf(io.out.bits)
85  io.out.bits.res.data := io.in.bits.data.src(0)
86  io.out.bits.ctrl.robIdx := io.in.bits.ctrl.robIdx
87}
88
89class ooo_to_mem(implicit p: Parameters) extends MemBlockBundle {
90  val backendToTopBypass = Flipped(new BackendToTopBundle)
91
92  val loadFastMatch = Vec(LdExuCnt, Input(UInt(LdExuCnt.W)))
93  val loadFastFuOpType = Vec(LdExuCnt, Input(FuOpType()))
94  val loadFastImm = Vec(LdExuCnt, Input(UInt(12.W)))
95  val sfence = Input(new SfenceBundle)
96  val tlbCsr = Input(new TlbCsrBundle)
97  val lsqio = new Bundle {
98    val lcommit = Input(UInt(log2Up(CommitWidth + 1).W))
99    val scommit = Input(UInt(log2Up(CommitWidth + 1).W))
100    val pendingMMIOld = Input(Bool())
101    val pendingld = Input(Bool())
102    val pendingst = Input(Bool())
103    val pendingVst = Input(Bool())
104    val commit = Input(Bool())
105    val pendingPtr = Input(new RobPtr)
106    val pendingPtrNext = Input(new RobPtr)
107  }
108
109  val isStoreException = Input(Bool())
110  val isVlsException = Input(Bool())
111  val csrCtrl = Flipped(new CustomCSRCtrlIO)
112  val enqLsq = new LsqEnqIO
113  val flushSb = Input(Bool())
114
115  val storePc = Vec(StaCnt, Input(UInt(VAddrBits.W))) // for hw prefetch
116  val hybridPc = Vec(HyuCnt, Input(UInt(VAddrBits.W))) // for hw prefetch
117
118  val issueLda = MixedVec(Seq.fill(LduCnt)(Flipped(DecoupledIO(new MemExuInput))))
119  val issueSta = MixedVec(Seq.fill(StaCnt)(Flipped(DecoupledIO(new MemExuInput))))
120  val issueStd = MixedVec(Seq.fill(StdCnt)(Flipped(DecoupledIO(new MemExuInput))))
121  val issueHya = MixedVec(Seq.fill(HyuCnt)(Flipped(DecoupledIO(new MemExuInput))))
122  val issueVldu = MixedVec(Seq.fill(VlduCnt)(Flipped(DecoupledIO(new MemExuInput(isVector=true)))))
123
124  def issueUops = issueLda ++ issueSta ++ issueStd ++ issueHya ++ issueVldu
125}
126
127class mem_to_ooo(implicit p: Parameters) extends MemBlockBundle {
128  val topToBackendBypass = new TopToBackendBundle
129
130  val otherFastWakeup = Vec(LdExuCnt, ValidIO(new DynInst))
131  val lqCancelCnt = Output(UInt(log2Up(VirtualLoadQueueSize + 1).W))
132  val sqCancelCnt = Output(UInt(log2Up(StoreQueueSize + 1).W))
133  val sqDeq = Output(UInt(log2Ceil(EnsbufferWidth + 1).W))
134  val lqDeq = Output(UInt(log2Up(CommitWidth + 1).W))
135  // used by VLSU issue queue, the vector store would wait all store before it, and the vector load would wait all load
136  val sqDeqPtr = Output(new SqPtr)
137  val lqDeqPtr = Output(new LqPtr)
138  val stIn = Vec(StAddrCnt, ValidIO(new MemExuInput))
139  val stIssuePtr = Output(new SqPtr())
140
141  val memoryViolation = ValidIO(new Redirect)
142  val sbIsEmpty = Output(Bool())
143
144  val lsTopdownInfo = Vec(LdExuCnt, Output(new LsTopdownInfo))
145
146  val lsqio = new Bundle {
147    val vaddr = Output(UInt(XLEN.W))
148    val vstart = Output(UInt((log2Up(VLEN) + 1).W))
149    val vl = Output(UInt((log2Up(VLEN) + 1).W))
150    val gpaddr = Output(UInt(XLEN.W))
151    val isForVSnonLeafPTE = Output(Bool())
152    val mmio = Output(Vec(LoadPipelineWidth, Bool()))
153    val uop = Output(Vec(LoadPipelineWidth, new DynInst))
154    val lqCanAccept = Output(Bool())
155    val sqCanAccept = Output(Bool())
156  }
157
158  val storeDebugInfo = Vec(EnsbufferWidth, new Bundle {
159    val robidx = Output(new RobPtr)
160    val pc     = Input(UInt(VAddrBits.W))
161  })
162
163  val writebackLda = Vec(LduCnt, DecoupledIO(new MemExuOutput))
164  val writebackSta = Vec(StaCnt, DecoupledIO(new MemExuOutput))
165  val writebackStd = Vec(StdCnt, DecoupledIO(new MemExuOutput))
166  val writebackHyuLda = Vec(HyuCnt, DecoupledIO(new MemExuOutput))
167  val writebackHyuSta = Vec(HyuCnt, DecoupledIO(new MemExuOutput))
168  val writebackVldu = Vec(VlduCnt, DecoupledIO(new MemExuOutput(isVector = true)))
169  def writeBack: Seq[DecoupledIO[MemExuOutput]] = {
170    writebackSta ++
171      writebackHyuLda ++ writebackHyuSta ++
172      writebackLda ++
173      writebackVldu ++
174      writebackStd
175  }
176
177  val ldaIqFeedback = Vec(LduCnt, new MemRSFeedbackIO)
178  val staIqFeedback = Vec(StaCnt, new MemRSFeedbackIO)
179  val hyuIqFeedback = Vec(HyuCnt, new MemRSFeedbackIO)
180  val vstuIqFeedback= Vec(VstuCnt, new MemRSFeedbackIO(isVector = true))
181  val vlduIqFeedback= Vec(VlduCnt, new MemRSFeedbackIO(isVector = true))
182  val ldCancel = Vec(backendParams.LdExuCnt, new LoadCancelIO)
183  val wakeup = Vec(backendParams.LdExuCnt, Valid(new DynInst))
184
185  val s3_delayed_load_error = Vec(LdExuCnt, Output(Bool()))
186}
187
188class MemCoreTopDownIO extends Bundle {
189  val robHeadMissInDCache = Output(Bool())
190  val robHeadTlbReplay = Output(Bool())
191  val robHeadTlbMiss = Output(Bool())
192  val robHeadLoadVio = Output(Bool())
193  val robHeadLoadMSHR = Output(Bool())
194}
195
196class fetch_to_mem(implicit p: Parameters) extends XSBundle{
197  val itlb = Flipped(new TlbPtwIO())
198}
199
200// triple buffer applied in i-mmio path (two at MemBlock, one at L2Top)
201class InstrUncacheBuffer()(implicit p: Parameters) extends LazyModule with HasInstrMMIOConst {
202  val node = new TLBufferNode(BufferParams.default, BufferParams.default, BufferParams.default, BufferParams.default, BufferParams.default)
203  lazy val module = new InstrUncacheBufferImpl
204
205  class InstrUncacheBufferImpl extends LazyModuleImp(this) {
206    (node.in zip node.out) foreach { case ((in, edgeIn), (out, edgeOut)) =>
207      out.a <> BufferParams.default(BufferParams.default(in.a))
208      in.d <> BufferParams.default(BufferParams.default(out.d))
209
210      // only a.valid, a.ready, a.address can change
211      // hoping that the rest would be optimized to keep MemBlock port unchanged after adding buffer
212      out.a.bits.data := 0.U
213      out.a.bits.mask := Fill(mmioBusBytes, 1.U(1.W))
214      out.a.bits.opcode := 4.U // Get
215      out.a.bits.size := log2Ceil(mmioBusBytes).U
216      out.a.bits.source := 0.U
217    }
218  }
219}
220
221// triple buffer applied in L1I$-L2 path (two at MemBlock, one at L2Top)
222class ICacheBuffer()(implicit p: Parameters) extends LazyModule {
223  val node = new TLBufferNode(BufferParams.default, BufferParams.default, BufferParams.default, BufferParams.default, BufferParams.default)
224  lazy val module = new ICacheBufferImpl
225
226  class ICacheBufferImpl extends LazyModuleImp(this) {
227    (node.in zip node.out) foreach { case ((in, edgeIn), (out, edgeOut)) =>
228      out.a <> BufferParams.default(BufferParams.default(in.a))
229      in.d <> BufferParams.default(BufferParams.default(out.d))
230    }
231  }
232}
233
234class ICacheCtrlBuffer()(implicit p: Parameters) extends LazyModule {
235  val node = new TLBufferNode(BufferParams.default, BufferParams.default, BufferParams.default, BufferParams.default, BufferParams.default)
236  lazy val module = new ICacheCtrlBufferImpl
237
238  class ICacheCtrlBufferImpl extends LazyModuleImp(this) {
239    (node.in zip node.out) foreach { case ((in, edgeIn), (out, edgeOut)) =>
240      out.a <> BufferParams.default(BufferParams.default(in.a))
241      in.d <> BufferParams.default(BufferParams.default(out.d))
242    }
243  }
244}
245
246// Frontend bus goes through MemBlock
247class FrontendBridge()(implicit p: Parameters) extends LazyModule {
248  val icache_node = LazyModule(new ICacheBuffer()).suggestName("icache").node// to keep IO port name
249  val icachectrl_node = LazyModule(new ICacheCtrlBuffer()).suggestName("icachectrl").node
250  val instr_uncache_node = LazyModule(new InstrUncacheBuffer()).suggestName("instr_uncache").node
251  lazy val module = new LazyModuleImp(this) {
252  }
253}
254
255class MemBlockInlined()(implicit p: Parameters) extends LazyModule
256  with HasXSParameter {
257  override def shouldBeInlined: Boolean = true
258
259  val dcache = LazyModule(new DCacheWrapper())
260  val uncache = LazyModule(new Uncache())
261  val uncache_port = TLTempNode()
262  val uncache_xbar = TLXbar()
263  val ptw = LazyModule(new L2TLBWrapper())
264  val ptw_to_l2_buffer = if (!coreParams.softPTW) LazyModule(new TLBuffer) else null
265  val l1d_to_l2_buffer = if (coreParams.dcacheParametersOpt.nonEmpty) LazyModule(new TLBuffer) else null
266  val dcache_port = TLNameNode("dcache_client") // to keep dcache-L2 port name
267  val l2_pf_sender_opt = coreParams.prefetcher.map(_ =>
268    BundleBridgeSource(() => new PrefetchRecv)
269  )
270  val l3_pf_sender_opt = if (p(SoCParamsKey).L3CacheParamsOpt.nonEmpty) coreParams.prefetcher.map(_ =>
271    BundleBridgeSource(() => new huancun.PrefetchRecv)
272  ) else None
273  val frontendBridge = LazyModule(new FrontendBridge)
274  // interrupt sinks
275  val clint_int_sink = IntSinkNode(IntSinkPortSimple(1, 2))
276  val debug_int_sink = IntSinkNode(IntSinkPortSimple(1, 1))
277  val plic_int_sink = IntSinkNode(IntSinkPortSimple(2, 1))
278  val nmi_int_sink = IntSinkNode(IntSinkPortSimple(1, (new NonmaskableInterruptIO).elements.size))
279  val beu_local_int_sink = IntSinkNode(IntSinkPortSimple(1, 1))
280
281  if (!coreParams.softPTW) {
282    ptw_to_l2_buffer.node := ptw.node
283  }
284  uncache_xbar := TLBuffer() := uncache.clientNode
285  if (dcache.uncacheNode.isDefined) {
286    dcache.uncacheNode.get := TLBuffer.chainNode(2) := uncache_xbar
287  }
288  uncache_port := TLBuffer.chainNode(2) := uncache_xbar
289
290  lazy val module = new MemBlockInlinedImp(this)
291}
292
293class MemBlockInlinedImp(outer: MemBlockInlined) extends LazyModuleImp(outer)
294  with HasXSParameter
295  with HasFPUParameters
296  with HasPerfEvents
297  with HasSoCParameter
298  with HasL1PrefetchSourceParameter
299  with HasCircularQueuePtrHelper
300  with HasMemBlockParameters
301  with HasTlbConst
302  with SdtrigExt
303{
304  val io = IO(new Bundle {
305    val hartId = Input(UInt(hartIdLen.W))
306    val redirect = Flipped(ValidIO(new Redirect))
307
308    val ooo_to_mem = new ooo_to_mem
309    val mem_to_ooo = new mem_to_ooo
310    val fetch_to_mem = new fetch_to_mem
311
312    val ifetchPrefetch = Vec(LduCnt, ValidIO(new SoftIfetchPrefetchBundle))
313
314    // misc
315    val error = ValidIO(new L1CacheErrorInfo)
316    val memInfo = new Bundle {
317      val sqFull = Output(Bool())
318      val lqFull = Output(Bool())
319      val dcacheMSHRFull = Output(Bool())
320    }
321    val debug_ls = new DebugLSIO
322    val l2_hint = Input(Valid(new L2ToL1Hint()))
323    val l2PfqBusy = Input(Bool())
324    val l2_tlb_req = Flipped(new TlbRequestIO(nRespDups = 2))
325    val l2_pmp_resp = new PMPRespBundle
326    val l2_flush_done = Input(Bool())
327
328    val debugTopDown = new Bundle {
329      val robHeadVaddr = Flipped(Valid(UInt(VAddrBits.W)))
330      val toCore = new MemCoreTopDownIO
331    }
332    val debugRolling = Flipped(new RobDebugRollingIO)
333
334    // All the signals from/to frontend/backend to/from bus will go through MemBlock
335    val fromTopToBackend = Input(new Bundle {
336      val msiInfo   = ValidIO(UInt(soc.IMSICParams.MSI_INFO_WIDTH.W))
337      val clintTime = ValidIO(UInt(64.W))
338    })
339    val inner_hartId = Output(UInt(hartIdLen.W))
340    val inner_reset_vector = Output(UInt(PAddrBits.W))
341    val outer_reset_vector = Input(UInt(PAddrBits.W))
342    val outer_cpu_halt = Output(Bool())
343    val outer_l2_flush_en = Output(Bool())
344    val outer_power_down_en = Output(Bool())
345    val outer_cpu_critical_error = Output(Bool())
346    val outer_msi_ack = Output(Bool())
347    val inner_beu_errors_icache = Input(new L1BusErrorUnitInfo)
348    val outer_beu_errors_icache = Output(new L1BusErrorUnitInfo)
349    val inner_hc_perfEvents = Output(Vec(numPCntHc * coreParams.L2NBanks + 1, new PerfEvent))
350    val outer_hc_perfEvents = Input(Vec(numPCntHc * coreParams.L2NBanks + 1, new PerfEvent))
351
352    // reset signals of frontend & backend are generated in memblock
353    val reset_backend = Output(Reset())
354    // Reset singal from frontend.
355    val resetInFrontendBypass = new Bundle{
356      val fromFrontend = Input(Bool())
357      val toL2Top      = Output(Bool())
358    }
359    val traceCoreInterfaceBypass = new Bundle{
360      val fromBackend = Flipped(new TraceCoreInterface(hasOffset = true))
361      val toL2Top     = new TraceCoreInterface
362    }
363
364    val topDownInfo = new Bundle {
365      val fromL2Top = Input(new TopDownFromL2Top)
366      val toBackend = Flipped(new TopDownInfo)
367    }
368    val sramTestBypass = new Bundle() {
369      val fromL2Top = new Bundle() {
370        val mbist      = Option.when(hasMbist)(Input(new SramMbistBundle))
371        val mbistReset = Option.when(hasMbist)(Input(new DFTResetSignals()))
372        val sramCtl    = Option.when(hasSramCtl)(Input(UInt(64.W)))
373      }
374      val toFrontend = new Bundle() {
375        val mbist      = Option.when(hasMbist)(Output(new SramMbistBundle))
376        val mbistReset = Option.when(hasMbist)(Output(new DFTResetSignals()))
377        val sramCtl    = Option.when(hasSramCtl)(Output(UInt(64.W)))
378      }
379      val toBackend = new Bundle() {
380        val mbist      = Option.when(hasMbist)(Output(new SramMbistBundle))
381        val mbistReset = Option.when(hasMbist)(Output(new DFTResetSignals()))
382      }
383    }
384  })
385
386  io.mem_to_ooo.writeBack.zipWithIndex.foreach{ case (wb, i) =>
387    PerfCCT.updateInstPos(wb.bits.uop.debug_seqNum, PerfCCT.InstPos.AtBypassVal.id.U, wb.valid, clock, reset)
388  }
389
390  dontTouch(io.inner_hartId)
391  dontTouch(io.inner_reset_vector)
392  dontTouch(io.outer_reset_vector)
393  dontTouch(io.outer_cpu_halt)
394  dontTouch(io.outer_l2_flush_en)
395  dontTouch(io.outer_power_down_en)
396  dontTouch(io.outer_cpu_critical_error)
397  dontTouch(io.inner_beu_errors_icache)
398  dontTouch(io.outer_beu_errors_icache)
399  dontTouch(io.inner_hc_perfEvents)
400  dontTouch(io.outer_hc_perfEvents)
401
402  val redirect = RegNextWithEnable(io.redirect)
403
404  private val dcache = outer.dcache.module
405  val uncache = outer.uncache.module
406
407  //val delayedDcacheRefill = RegNext(dcache.io.lsu.lsq)
408
409  val csrCtrl = DelayN(io.ooo_to_mem.csrCtrl, 2)
410  dcache.io.l2_pf_store_only := RegNext(io.ooo_to_mem.csrCtrl.pf_ctrl.l2_pf_store_only, false.B)
411  io.error <> DelayNWithValid(dcache.io.error, 2)
412  when(!csrCtrl.cache_error_enable){
413    io.error.bits.report_to_beu := false.B
414    io.error.valid := false.B
415  }
416
417  val loadUnits = Seq.fill(LduCnt)(Module(new LoadUnit))
418  val storeUnits = Seq.fill(StaCnt)(Module(new StoreUnit))
419  val stdExeUnits = Seq.fill(StdCnt)(Module(new MemExeUnit(backendParams.memSchdParams.get.issueBlockParams.find(_.StdCnt != 0).get.exuBlockParams.head)))
420  val hybridUnits = Seq.fill(HyuCnt)(Module(new HybridUnit)) // Todo: replace it with HybridUnit
421  val stData = stdExeUnits.map(_.io.out)
422  val exeUnits = loadUnits ++ storeUnits
423
424  // The number of vector load/store units is decoupled with the number of load/store units
425  val vlSplit = Seq.fill(VlduCnt)(Module(new VLSplitImp))
426  val vsSplit = Seq.fill(VstuCnt)(Module(new VSSplitImp))
427  val vlMergeBuffer = Module(new VLMergeBufferImp)
428  val vsMergeBuffer = Seq.fill(VstuCnt)(Module(new VSMergeBufferImp))
429  val vSegmentUnit  = Module(new VSegmentUnit)
430  val vfofBuffer    = Module(new VfofBuffer)
431
432  // misalign Buffer
433  val loadMisalignBuffer = Module(new LoadMisalignBuffer)
434  val storeMisalignBuffer = Module(new StoreMisalignBuffer)
435
436  val l1_pf_req = Wire(Decoupled(new L1PrefetchReq()))
437  dcache.io.sms_agt_evict_req.ready := false.B
438  val l1D_pf_enable = GatedRegNextN(io.ooo_to_mem.csrCtrl.pf_ctrl.l1D_pf_enable, 2, Some(false.B))
439  val prefetcherOpt: Option[BasePrefecher] = coreParams.prefetcher.map {
440    case _: SMSParams =>
441      val sms = Module(new SMSPrefetcher())
442      val enableSMS = Constantin.createRecord(s"enableSMS$hartId", initValue = true)
443      // constantinCtrl && master switch csrCtrl && single switch csrCtrl
444      sms.io.enable := enableSMS && l1D_pf_enable &&
445        GatedRegNextN(io.ooo_to_mem.csrCtrl.pf_ctrl.l2_pf_recv_enable, 2, Some(false.B))
446      sms.io_agt_en := GatedRegNextN(io.ooo_to_mem.csrCtrl.pf_ctrl.l1D_pf_enable_agt, 2, Some(false.B))
447      sms.io_pht_en := GatedRegNextN(io.ooo_to_mem.csrCtrl.pf_ctrl.l1D_pf_enable_pht, 2, Some(false.B))
448      sms.io_act_threshold := GatedRegNextN(io.ooo_to_mem.csrCtrl.pf_ctrl.l1D_pf_active_threshold, 2, Some(12.U))
449      sms.io_act_stride := GatedRegNextN(io.ooo_to_mem.csrCtrl.pf_ctrl.l1D_pf_active_stride, 2, Some(30.U))
450      sms.io_stride_en := false.B
451      sms.io_dcache_evict <> dcache.io.sms_agt_evict_req
452      val mbistSmsPl = MbistPipeline.PlaceMbistPipeline(1, "MbistPipeSms", hasMbist)
453      sms
454  }
455  prefetcherOpt.foreach{ pf => pf.io.l1_req.ready := false.B }
456  val hartId = p(XSCoreParamsKey).HartId
457  val l1PrefetcherOpt: Option[BasePrefecher] = coreParams.prefetcher.map {
458    case _ =>
459      val l1Prefetcher = Module(new L1Prefetcher())
460      val enableL1StreamPrefetcher = Constantin.createRecord(s"enableL1StreamPrefetcher$hartId", initValue = true)
461      // constantinCtrl && master switch csrCtrl && single switch csrCtrl
462      l1Prefetcher.io.enable := enableL1StreamPrefetcher && l1D_pf_enable &&
463        GatedRegNextN(io.ooo_to_mem.csrCtrl.pf_ctrl.l1D_pf_enable_stride, 2, Some(false.B))
464      l1Prefetcher.pf_ctrl <> dcache.io.pf_ctrl
465      l1Prefetcher.l2PfqBusy := io.l2PfqBusy
466
467      // stride will train on miss or prefetch hit
468      for (i <- 0 until LduCnt) {
469        val source = loadUnits(i).io.prefetch_train_l1
470        l1Prefetcher.stride_train(i).valid := source.valid && source.bits.isFirstIssue && (
471          source.bits.miss || isFromStride(source.bits.meta_prefetch)
472        )
473        l1Prefetcher.stride_train(i).bits := source.bits
474        val loadPc = RegNext(io.ooo_to_mem.issueLda(i).bits.uop.pc) // for s1
475        l1Prefetcher.stride_train(i).bits.uop.pc := Mux(
476          loadUnits(i).io.s2_ptr_chasing,
477          RegEnable(loadPc, loadUnits(i).io.s2_prefetch_spec),
478          RegEnable(RegEnable(loadPc, loadUnits(i).io.s1_prefetch_spec), loadUnits(i).io.s2_prefetch_spec)
479        )
480      }
481      for (i <- 0 until HyuCnt) {
482        val source = hybridUnits(i).io.prefetch_train_l1
483        l1Prefetcher.stride_train.drop(LduCnt)(i).valid := source.valid && source.bits.isFirstIssue && (
484          source.bits.miss || isFromStride(source.bits.meta_prefetch)
485        )
486        l1Prefetcher.stride_train.drop(LduCnt)(i).bits := source.bits
487        l1Prefetcher.stride_train.drop(LduCnt)(i).bits.uop.pc := Mux(
488          hybridUnits(i).io.ldu_io.s2_ptr_chasing,
489          RegNext(io.ooo_to_mem.hybridPc(i)),
490          RegNext(RegNext(io.ooo_to_mem.hybridPc(i)))
491        )
492      }
493      l1Prefetcher
494  }
495  // load prefetch to l1 Dcache
496  l1PrefetcherOpt match {
497    case Some(pf) => l1_pf_req <> Pipeline(in = pf.io.l1_req, depth = 1, pipe = false, name = Some("pf_queue_to_ldu_reg"))
498    case None =>
499      l1_pf_req.valid := false.B
500      l1_pf_req.bits := DontCare
501  }
502  val pf_train_on_hit = RegNextN(io.ooo_to_mem.csrCtrl.pf_ctrl.l1D_pf_train_on_hit, 2, Some(true.B))
503
504  loadUnits.zipWithIndex.map(x => x._1.suggestName("LoadUnit_"+x._2))
505  storeUnits.zipWithIndex.map(x => x._1.suggestName("StoreUnit_"+x._2))
506  hybridUnits.zipWithIndex.map(x => x._1.suggestName("HybridUnit_"+x._2))
507  val atomicsUnit = Module(new AtomicsUnit)
508
509
510  val ldaExeWbReqs = Wire(Vec(LduCnt, Decoupled(new MemExuOutput)))
511  // atomicsUnit will overwrite the source from ldu if it is about to writeback
512  val atomicWritebackOverride = Mux(
513    atomicsUnit.io.out.valid,
514    atomicsUnit.io.out.bits,
515    loadUnits(AtomicWBPort).io.ldout.bits
516  )
517  ldaExeWbReqs(AtomicWBPort).valid := atomicsUnit.io.out.valid || loadUnits(AtomicWBPort).io.ldout.valid
518  ldaExeWbReqs(AtomicWBPort).bits  := atomicWritebackOverride
519  atomicsUnit.io.out.ready := ldaExeWbReqs(AtomicWBPort).ready
520  loadUnits(AtomicWBPort).io.ldout.ready := ldaExeWbReqs(AtomicWBPort).ready
521
522  val st_data_atomics = Seq.tabulate(StdCnt)(i =>
523    stData(i).valid && FuType.storeIsAMO(stData(i).bits.uop.fuType)
524  )
525
526  // misalignBuffer will overwrite the source from ldu if it is about to writeback
527  val misalignWritebackOverride = Mux(
528    loadUnits(MisalignWBPort).io.ldout.valid,
529    loadUnits(MisalignWBPort).io.ldout.bits,
530    loadMisalignBuffer.io.writeBack.bits
531  )
532  ldaExeWbReqs(MisalignWBPort).valid    := loadMisalignBuffer.io.writeBack.valid || loadUnits(MisalignWBPort).io.ldout.valid
533  ldaExeWbReqs(MisalignWBPort).bits     := misalignWritebackOverride
534  loadMisalignBuffer.io.writeBack.ready := ldaExeWbReqs(MisalignWBPort).ready && !loadUnits(MisalignWBPort).io.ldout.valid
535  loadMisalignBuffer.io.loadOutValid    := loadUnits(MisalignWBPort).io.ldout.valid
536  loadMisalignBuffer.io.loadVecOutValid := loadUnits(MisalignWBPort).io.vecldout.valid
537  loadUnits(MisalignWBPort).io.ldout.ready := ldaExeWbReqs(MisalignWBPort).ready
538  ldaExeWbReqs(MisalignWBPort).bits.isFromLoadUnit := loadUnits(MisalignWBPort).io.ldout.bits.isFromLoadUnit || loadMisalignBuffer.io.writeBack.valid
539
540  // loadUnit will overwrite the source from uncache if it is about to writeback
541  ldaExeWbReqs(UncacheWBPort) <> loadUnits(UncacheWBPort).io.ldout
542  io.mem_to_ooo.writebackLda <> ldaExeWbReqs
543  io.mem_to_ooo.writebackSta <> storeUnits.map(_.io.stout)
544  io.mem_to_ooo.writebackStd.zip(stdExeUnits).foreach {x =>
545    x._1.bits  := x._2.io.out.bits
546    // AMOs do not need to write back std now.
547    x._1.valid := x._2.io.out.fire && !FuType.storeIsAMO(x._2.io.out.bits.uop.fuType)
548  }
549  io.mem_to_ooo.writebackHyuLda <> hybridUnits.map(_.io.ldout)
550  io.mem_to_ooo.writebackHyuSta <> hybridUnits.map(_.io.stout)
551  io.mem_to_ooo.otherFastWakeup := DontCare
552  io.mem_to_ooo.otherFastWakeup.drop(HyuCnt).take(LduCnt).zip(loadUnits.map(_.io.fast_uop)).foreach{case(a,b)=> a := b}
553  io.mem_to_ooo.otherFastWakeup.take(HyuCnt).zip(hybridUnits.map(_.io.ldu_io.fast_uop)).foreach{case(a,b)=> a:=b}
554  val stOut = io.mem_to_ooo.writebackSta ++ io.mem_to_ooo.writebackHyuSta
555
556  // prefetch to l1 req
557  // Stream's confidence is always 1
558  // (LduCnt + HyuCnt) l1_pf_reqs ?
559  loadUnits.foreach(load_unit => {
560    load_unit.io.prefetch_req.valid <> l1_pf_req.valid
561    load_unit.io.prefetch_req.bits <> l1_pf_req.bits
562  })
563
564  hybridUnits.foreach(hybrid_unit => {
565    hybrid_unit.io.ldu_io.prefetch_req.valid <> l1_pf_req.valid
566    hybrid_unit.io.ldu_io.prefetch_req.bits <> l1_pf_req.bits
567  })
568
569  // NOTE: loadUnits(0) has higher bank conflict and miss queue arb priority than loadUnits(1) and loadUnits(2)
570  // when loadUnits(1)/loadUnits(2) stage 0 is busy, hw prefetch will never use that pipeline
571  val LowConfPorts = if (LduCnt == 2) Seq(1) else if (LduCnt == 3) Seq(1, 2) else Seq(0)
572  LowConfPorts.map{case i => loadUnits(i).io.prefetch_req.bits.confidence := 0.U}
573  hybridUnits.foreach(hybrid_unit => { hybrid_unit.io.ldu_io.prefetch_req.bits.confidence := 0.U })
574
575  val canAcceptHighConfPrefetch = loadUnits.map(_.io.canAcceptHighConfPrefetch) ++
576                                  hybridUnits.map(_.io.canAcceptLowConfPrefetch)
577  val canAcceptLowConfPrefetch = loadUnits.map(_.io.canAcceptLowConfPrefetch) ++
578                                 hybridUnits.map(_.io.canAcceptLowConfPrefetch)
579  l1_pf_req.ready := (0 until LduCnt + HyuCnt).map{
580    case i => {
581      if (LowConfPorts.contains(i)) {
582        loadUnits(i).io.canAcceptLowConfPrefetch
583      } else {
584        Mux(l1_pf_req.bits.confidence === 1.U, canAcceptHighConfPrefetch(i), canAcceptLowConfPrefetch(i))
585      }
586    }
587  }.reduce(_ || _)
588
589  // l1 pf fuzzer interface
590  val DebugEnableL1PFFuzzer = false
591  if (DebugEnableL1PFFuzzer) {
592    // l1 pf req fuzzer
593    val fuzzer = Module(new L1PrefetchFuzzer())
594    fuzzer.io.vaddr := DontCare
595    fuzzer.io.paddr := DontCare
596
597    // override load_unit prefetch_req
598    loadUnits.foreach(load_unit => {
599      load_unit.io.prefetch_req.valid <> fuzzer.io.req.valid
600      load_unit.io.prefetch_req.bits <> fuzzer.io.req.bits
601    })
602
603    // override hybrid_unit prefetch_req
604    hybridUnits.foreach(hybrid_unit => {
605      hybrid_unit.io.ldu_io.prefetch_req.valid <> fuzzer.io.req.valid
606      hybrid_unit.io.ldu_io.prefetch_req.bits <> fuzzer.io.req.bits
607    })
608
609    fuzzer.io.req.ready := l1_pf_req.ready
610  }
611
612  // TODO: fast load wakeup
613  val lsq     = Module(new LsqWrapper)
614  val sbuffer = Module(new Sbuffer)
615  // if you wants to stress test dcache store, use FakeSbuffer
616  // val sbuffer = Module(new FakeSbuffer) // out of date now
617  io.mem_to_ooo.stIssuePtr := lsq.io.issuePtrExt
618
619  dcache.io.hartId := io.hartId
620  lsq.io.hartId := io.hartId
621  sbuffer.io.hartId := io.hartId
622  atomicsUnit.io.hartId := io.hartId
623
624  dcache.io.lqEmpty := lsq.io.lqEmpty
625
626  // load/store prefetch to l2 cache
627  prefetcherOpt.foreach(sms_pf => {
628    l1PrefetcherOpt.foreach(l1_pf => {
629      val sms_pf_to_l2 = DelayNWithValid(sms_pf.io.l2_req, 2)
630      val l1_pf_to_l2 = DelayNWithValid(l1_pf.io.l2_req, 2)
631
632      outer.l2_pf_sender_opt.get.out.head._1.addr_valid := sms_pf_to_l2.valid || l1_pf_to_l2.valid
633      outer.l2_pf_sender_opt.get.out.head._1.addr := Mux(l1_pf_to_l2.valid, l1_pf_to_l2.bits.addr, sms_pf_to_l2.bits.addr)
634      outer.l2_pf_sender_opt.get.out.head._1.pf_source := Mux(l1_pf_to_l2.valid, l1_pf_to_l2.bits.source, sms_pf_to_l2.bits.source)
635      outer.l2_pf_sender_opt.get.out.head._1.l2_pf_en := RegNextN(io.ooo_to_mem.csrCtrl.pf_ctrl.l2_pf_enable, 2, Some(true.B))
636
637      val l2_trace = Wire(new LoadPfDbBundle)
638      l2_trace.paddr := outer.l2_pf_sender_opt.get.out.head._1.addr
639      val table = ChiselDB.createTable(s"L2PrefetchTrace$hartId", new LoadPfDbBundle, basicDB = false)
640      table.log(l2_trace, l1_pf_to_l2.valid, "StreamPrefetchTrace", clock, reset)
641      table.log(l2_trace, !l1_pf_to_l2.valid && sms_pf_to_l2.valid, "L2PrefetchTrace", clock, reset)
642
643      val l1_pf_to_l3 = ValidIODelay(l1_pf.io.l3_req, 4)
644      outer.l3_pf_sender_opt.foreach(_.out.head._1.addr_valid := l1_pf_to_l3.valid)
645      outer.l3_pf_sender_opt.foreach(_.out.head._1.addr := l1_pf_to_l3.bits)
646      outer.l3_pf_sender_opt.foreach(_.out.head._1.l2_pf_en := RegNextN(io.ooo_to_mem.csrCtrl.pf_ctrl.l2_pf_enable, 4, Some(true.B)))
647
648      val l3_trace = Wire(new LoadPfDbBundle)
649      l3_trace.paddr := outer.l3_pf_sender_opt.map(_.out.head._1.addr).getOrElse(0.U)
650      val l3_table = ChiselDB.createTable(s"L3PrefetchTrace$hartId", new LoadPfDbBundle, basicDB = false)
651      l3_table.log(l3_trace, l1_pf_to_l3.valid, "StreamPrefetchTrace", clock, reset)
652
653      XSPerfAccumulate("prefetch_fire_l2", outer.l2_pf_sender_opt.get.out.head._1.addr_valid)
654      XSPerfAccumulate("prefetch_fire_l3", outer.l3_pf_sender_opt.map(_.out.head._1.addr_valid).getOrElse(false.B))
655      XSPerfAccumulate("l1pf_fire_l2", l1_pf_to_l2.valid)
656      XSPerfAccumulate("sms_fire_l2", !l1_pf_to_l2.valid && sms_pf_to_l2.valid)
657      XSPerfAccumulate("sms_block_by_l1pf", l1_pf_to_l2.valid && sms_pf_to_l2.valid)
658    })
659  })
660
661  // ptw
662  val sfence = RegNext(RegNext(io.ooo_to_mem.sfence))
663  val tlbcsr = RegNext(RegNext(io.ooo_to_mem.tlbCsr))
664  private val ptw = outer.ptw.module
665  private val ptw_to_l2_buffer = outer.ptw_to_l2_buffer.module
666  private val l1d_to_l2_buffer = outer.l1d_to_l2_buffer.module
667  ptw.io.hartId := io.hartId
668  ptw.io.sfence <> sfence
669  ptw.io.csr.tlb <> tlbcsr
670  ptw.io.csr.distribute_csr <> csrCtrl.distribute_csr
671
672  val perfEventsPTW = if (!coreParams.softPTW) {
673    ptw.getPerfEvents
674  } else {
675    Seq()
676  }
677
678  // dtlb
679  val dtlb_ld_tlb_ld = Module(new TLBNonBlock(LduCnt + HyuCnt + 1, 2, ldtlbParams))
680  val dtlb_st_tlb_st = Module(new TLBNonBlock(StaCnt, 1, sttlbParams))
681  val dtlb_prefetch_tlb_prefetch = Module(new TLBNonBlock(2, 2, pftlbParams))
682  val dtlb_ld = Seq(dtlb_ld_tlb_ld.io)
683  val dtlb_st = Seq(dtlb_st_tlb_st.io)
684  val dtlb_prefetch = Seq(dtlb_prefetch_tlb_prefetch.io)
685  /* tlb vec && constant variable */
686  val dtlb = dtlb_ld ++ dtlb_st ++ dtlb_prefetch
687  val (dtlb_ld_idx, dtlb_st_idx, dtlb_pf_idx) = (0, 1, 2)
688  val TlbSubSizeVec = Seq(LduCnt + HyuCnt + 1, StaCnt, 2) // (load + hyu + stream pf, store, sms+l2bop)
689  val DTlbSize = TlbSubSizeVec.sum
690  val TlbStartVec = TlbSubSizeVec.scanLeft(0)(_ + _).dropRight(1)
691  val TlbEndVec = TlbSubSizeVec.scanLeft(0)(_ + _).drop(1)
692
693  val ptwio = Wire(new VectorTlbPtwIO(DTlbSize))
694  val dtlb_reqs = dtlb.map(_.requestor).flatten
695  val dtlb_pmps = dtlb.map(_.pmp).flatten
696  dtlb.map(_.hartId := io.hartId)
697  dtlb.map(_.sfence := sfence)
698  dtlb.map(_.csr := tlbcsr)
699  dtlb.map(_.flushPipe.map(a => a := false.B)) // non-block doesn't need
700  dtlb.map(_.redirect := redirect)
701  if (refillBothTlb) {
702    require(ldtlbParams.outReplace == sttlbParams.outReplace)
703    require(ldtlbParams.outReplace == hytlbParams.outReplace)
704    require(ldtlbParams.outReplace == pftlbParams.outReplace)
705    require(ldtlbParams.outReplace)
706
707    val replace = Module(new TlbReplace(DTlbSize, ldtlbParams))
708    replace.io.apply_sep(dtlb_ld.map(_.replace) ++ dtlb_st.map(_.replace) ++ dtlb_prefetch.map(_.replace), ptwio.resp.bits.data.s1.entry.tag)
709  } else {
710    // TODO: there will be bugs in TlbReplace when outReplace enable, since the order of Hyu is not right.
711    if (ldtlbParams.outReplace) {
712      val replace_ld = Module(new TlbReplace(LduCnt + 1, ldtlbParams))
713      replace_ld.io.apply_sep(dtlb_ld.map(_.replace), ptwio.resp.bits.data.s1.entry.tag)
714    }
715    if (hytlbParams.outReplace) {
716      val replace_hy = Module(new TlbReplace(HyuCnt, hytlbParams))
717      replace_hy.io.apply_sep(dtlb_ld.map(_.replace), ptwio.resp.bits.data.s1.entry.tag)
718    }
719    if (sttlbParams.outReplace) {
720      val replace_st = Module(new TlbReplace(StaCnt, sttlbParams))
721      replace_st.io.apply_sep(dtlb_st.map(_.replace), ptwio.resp.bits.data.s1.entry.tag)
722    }
723    if (pftlbParams.outReplace) {
724      val replace_pf = Module(new TlbReplace(2, pftlbParams))
725      replace_pf.io.apply_sep(dtlb_prefetch.map(_.replace), ptwio.resp.bits.data.s1.entry.tag)
726    }
727  }
728
729  val ptw_resp_next = RegEnable(ptwio.resp.bits, ptwio.resp.valid)
730  val ptw_resp_v = RegNext(ptwio.resp.valid && !(sfence.valid && tlbcsr.satp.changed && tlbcsr.vsatp.changed && tlbcsr.hgatp.changed), init = false.B)
731  ptwio.resp.ready := true.B
732
733  val tlbreplay = WireInit(VecInit(Seq.fill(LdExuCnt)(false.B)))
734  val tlbreplay_reg = GatedValidRegNext(tlbreplay)
735  val dtlb_ld0_tlbreplay_reg = GatedValidRegNext(dtlb_ld(0).tlbreplay)
736
737  if (backendParams.debugEn){ dontTouch(tlbreplay) }
738
739  for (i <- 0 until LdExuCnt) {
740    tlbreplay(i) := dtlb_ld(0).ptw.req(i).valid && ptw_resp_next.vector(0) && ptw_resp_v &&
741      ptw_resp_next.data.hit(dtlb_ld(0).ptw.req(i).bits.vpn, tlbcsr.satp.asid, tlbcsr.vsatp.asid, tlbcsr.hgatp.vmid, allType = true, ignoreAsid = true)
742  }
743
744  dtlb.flatMap(a => a.ptw.req)
745    .zipWithIndex
746    .foreach{ case (tlb, i) =>
747      tlb.ready := ptwio.req(i).ready
748      ptwio.req(i).bits := tlb.bits
749    val vector_hit = if (refillBothTlb) Cat(ptw_resp_next.vector).orR
750      else if (i < TlbEndVec(dtlb_ld_idx)) Cat(ptw_resp_next.vector.slice(TlbStartVec(dtlb_ld_idx), TlbEndVec(dtlb_ld_idx))).orR
751      else if (i < TlbEndVec(dtlb_st_idx)) Cat(ptw_resp_next.vector.slice(TlbStartVec(dtlb_st_idx), TlbEndVec(dtlb_st_idx))).orR
752      else                                 Cat(ptw_resp_next.vector.slice(TlbStartVec(dtlb_pf_idx), TlbEndVec(dtlb_pf_idx))).orR
753    ptwio.req(i).valid := tlb.valid && !(ptw_resp_v && vector_hit && ptw_resp_next.data.hit(tlb.bits.vpn, tlbcsr.satp.asid, tlbcsr.vsatp.asid, tlbcsr.hgatp.vmid, allType = true, ignoreAsid = true))
754  }
755  dtlb.foreach(_.ptw.resp.bits := ptw_resp_next.data)
756  if (refillBothTlb) {
757    dtlb.foreach(_.ptw.resp.valid := ptw_resp_v && Cat(ptw_resp_next.vector).orR)
758  } else {
759    dtlb_ld.foreach(_.ptw.resp.valid := ptw_resp_v && Cat(ptw_resp_next.vector.slice(TlbStartVec(dtlb_ld_idx), TlbEndVec(dtlb_ld_idx))).orR)
760    dtlb_st.foreach(_.ptw.resp.valid := ptw_resp_v && Cat(ptw_resp_next.vector.slice(TlbStartVec(dtlb_st_idx), TlbEndVec(dtlb_st_idx))).orR)
761    dtlb_prefetch.foreach(_.ptw.resp.valid := ptw_resp_v && Cat(ptw_resp_next.vector.slice(TlbStartVec(dtlb_pf_idx), TlbEndVec(dtlb_pf_idx))).orR)
762  }
763  dtlb_ld.foreach(_.ptw.resp.bits.getGpa := Cat(ptw_resp_next.getGpa.take(LduCnt + HyuCnt + 1)).orR)
764  dtlb_st.foreach(_.ptw.resp.bits.getGpa := Cat(ptw_resp_next.getGpa.slice(LduCnt + HyuCnt + 1, LduCnt + HyuCnt + 1 + StaCnt)).orR)
765  dtlb_prefetch.foreach(_.ptw.resp.bits.getGpa := Cat(ptw_resp_next.getGpa.drop(LduCnt + HyuCnt + 1 + StaCnt)).orR)
766
767  val dtlbRepeater  = PTWNewFilter(ldtlbParams.fenceDelay, ptwio, ptw.io.tlb(1), sfence, tlbcsr, l2tlbParams.dfilterSize)
768  val itlbRepeater3 = PTWRepeaterNB(passReady = false, itlbParams.fenceDelay, io.fetch_to_mem.itlb, ptw.io.tlb(0), sfence, tlbcsr)
769
770  lsq.io.debugTopDown.robHeadMissInDTlb := dtlbRepeater.io.rob_head_miss_in_tlb
771
772  // pmp
773  val pmp = Module(new PMP())
774  pmp.io.distribute_csr <> csrCtrl.distribute_csr
775
776  val pmp_checkers = Seq.fill(DTlbSize)(Module(new PMPChecker(4, leaveHitMux = true)))
777  val pmp_check = pmp_checkers.map(_.io)
778  for ((p,d) <- pmp_check zip dtlb_pmps) {
779    if (HasBitmapCheck) {
780      p.apply(tlbcsr.mbmc.CMODE.asBool, tlbcsr.priv.dmode, pmp.io.pmp, pmp.io.pma, d)
781    } else {
782      p.apply(tlbcsr.priv.dmode, pmp.io.pmp, pmp.io.pma, d)
783    }
784    require(p.req.bits.size.getWidth == d.bits.size.getWidth)
785  }
786
787  for (i <- 0 until LduCnt) {
788    io.debug_ls.debugLsInfo(i) := loadUnits(i).io.debug_ls
789  }
790  for (i <- 0 until HyuCnt) {
791    io.debug_ls.debugLsInfo.drop(LduCnt)(i) := hybridUnits(i).io.ldu_io.debug_ls
792  }
793  for (i <- 0 until StaCnt) {
794    io.debug_ls.debugLsInfo.drop(LduCnt + HyuCnt)(i) := storeUnits(i).io.debug_ls
795  }
796  for (i <- 0 until HyuCnt) {
797    io.debug_ls.debugLsInfo.drop(LduCnt + HyuCnt + StaCnt)(i) := hybridUnits(i).io.stu_io.debug_ls
798  }
799
800  io.mem_to_ooo.lsTopdownInfo := loadUnits.map(_.io.lsTopdownInfo) ++ hybridUnits.map(_.io.ldu_io.lsTopdownInfo)
801
802  // trigger
803  val tdata = RegInit(VecInit(Seq.fill(TriggerNum)(0.U.asTypeOf(new MatchTriggerIO))))
804  val tEnable = RegInit(VecInit(Seq.fill(TriggerNum)(false.B)))
805  tEnable := csrCtrl.mem_trigger.tEnableVec
806  when(csrCtrl.mem_trigger.tUpdate.valid) {
807    tdata(csrCtrl.mem_trigger.tUpdate.bits.addr) := csrCtrl.mem_trigger.tUpdate.bits.tdata
808  }
809  val triggerCanRaiseBpExp = csrCtrl.mem_trigger.triggerCanRaiseBpExp
810  val debugMode = csrCtrl.mem_trigger.debugMode
811
812  val backendTriggerTimingVec = VecInit(tdata.map(_.timing))
813  val backendTriggerChainVec = VecInit(tdata.map(_.chain))
814
815  XSDebug(tEnable.asUInt.orR, "Debug Mode: At least one store trigger is enabled\n")
816  for (j <- 0 until TriggerNum)
817    PrintTriggerInfo(tEnable(j), tdata(j))
818
819  // The segment instruction is executed atomically.
820  // After the segment instruction directive starts executing, no other instructions should be executed.
821  val vSegmentFlag = RegInit(false.B)
822
823  when(GatedValidRegNext(vSegmentUnit.io.in.fire)) {
824    vSegmentFlag := true.B
825  }.elsewhen(GatedValidRegNext(vSegmentUnit.io.uopwriteback.valid)) {
826    vSegmentFlag := false.B
827  }
828
829  val misalign_allow_spec = RegInit(true.B)
830  val ldu_rollback_with_misalign_nack = loadUnits.map(ldu =>
831    ldu.io.lsq.ldin.bits.isFrmMisAlignBuf && ldu.io.lsq.ldin.bits.rep_info.rar_nack && ldu.io.rollback.valid
832  ).reduce(_ || _)
833  when (ldu_rollback_with_misalign_nack) {
834    misalign_allow_spec := false.B
835  } .elsewhen(lsq.io.rarValidCount < (LoadQueueRARSize - 4).U) {
836    misalign_allow_spec := true.B
837  }
838
839  // LoadUnit
840  val correctMissTrain = Constantin.createRecord(s"CorrectMissTrain$hartId", initValue = false)
841
842  for (i <- 0 until LduCnt) {
843    loadUnits(i).io.redirect <> redirect
844    loadUnits(i).io.misalign_allow_spec := misalign_allow_spec
845
846    // get input form dispatch
847    loadUnits(i).io.ldin <> io.ooo_to_mem.issueLda(i)
848    loadUnits(i).io.feedback_slow <> io.mem_to_ooo.ldaIqFeedback(i).feedbackSlow
849    io.mem_to_ooo.ldaIqFeedback(i).feedbackFast := DontCare
850    loadUnits(i).io.correctMissTrain := correctMissTrain
851    io.mem_to_ooo.ldCancel.drop(HyuCnt)(i) := loadUnits(i).io.ldCancel
852    io.mem_to_ooo.wakeup.drop(HyuCnt)(i) := loadUnits(i).io.wakeup
853
854    // vector
855    if (i < VlduCnt) {
856      loadUnits(i).io.vecldout.ready := false.B
857    } else {
858      loadUnits(i).io.vecldin.valid := false.B
859      loadUnits(i).io.vecldin.bits := DontCare
860      loadUnits(i).io.vecldout.ready := false.B
861    }
862
863    // fast replay
864    loadUnits(i).io.fast_rep_in <> loadUnits(i).io.fast_rep_out
865
866    // SoftPrefetch to frontend (prefetch.i)
867    loadUnits(i).io.ifetchPrefetch <> io.ifetchPrefetch(i)
868
869    // dcache access
870    loadUnits(i).io.dcache <> dcache.io.lsu.load(i)
871    if(i == 0){
872      vSegmentUnit.io.rdcache := DontCare
873      dcache.io.lsu.load(i).req.valid := loadUnits(i).io.dcache.req.valid || vSegmentUnit.io.rdcache.req.valid
874      dcache.io.lsu.load(i).req.bits  := Mux1H(Seq(
875        vSegmentUnit.io.rdcache.req.valid -> vSegmentUnit.io.rdcache.req.bits,
876        loadUnits(i).io.dcache.req.valid -> loadUnits(i).io.dcache.req.bits
877      ))
878      vSegmentUnit.io.rdcache.req.ready := dcache.io.lsu.load(i).req.ready
879    }
880
881    // Dcache requests must also be preempted by the segment.
882    when(vSegmentFlag){
883      loadUnits(i).io.dcache.req.ready             := false.B // Dcache is preempted.
884
885      dcache.io.lsu.load(0).pf_source              := vSegmentUnit.io.rdcache.pf_source
886      dcache.io.lsu.load(0).s1_paddr_dup_lsu       := vSegmentUnit.io.rdcache.s1_paddr_dup_lsu
887      dcache.io.lsu.load(0).s1_paddr_dup_dcache    := vSegmentUnit.io.rdcache.s1_paddr_dup_dcache
888      dcache.io.lsu.load(0).s1_kill                := vSegmentUnit.io.rdcache.s1_kill
889      dcache.io.lsu.load(0).s2_kill                := vSegmentUnit.io.rdcache.s2_kill
890      dcache.io.lsu.load(0).s0_pc                  := vSegmentUnit.io.rdcache.s0_pc
891      dcache.io.lsu.load(0).s1_pc                  := vSegmentUnit.io.rdcache.s1_pc
892      dcache.io.lsu.load(0).s2_pc                  := vSegmentUnit.io.rdcache.s2_pc
893      dcache.io.lsu.load(0).is128Req               := vSegmentUnit.io.rdcache.is128Req
894    }.otherwise {
895      loadUnits(i).io.dcache.req.ready             := dcache.io.lsu.load(i).req.ready
896
897      dcache.io.lsu.load(0).pf_source              := loadUnits(0).io.dcache.pf_source
898      dcache.io.lsu.load(0).s1_paddr_dup_lsu       := loadUnits(0).io.dcache.s1_paddr_dup_lsu
899      dcache.io.lsu.load(0).s1_paddr_dup_dcache    := loadUnits(0).io.dcache.s1_paddr_dup_dcache
900      dcache.io.lsu.load(0).s1_kill                := loadUnits(0).io.dcache.s1_kill
901      dcache.io.lsu.load(0).s2_kill                := loadUnits(0).io.dcache.s2_kill
902      dcache.io.lsu.load(0).s0_pc                  := loadUnits(0).io.dcache.s0_pc
903      dcache.io.lsu.load(0).s1_pc                  := loadUnits(0).io.dcache.s1_pc
904      dcache.io.lsu.load(0).s2_pc                  := loadUnits(0).io.dcache.s2_pc
905      dcache.io.lsu.load(0).is128Req               := loadUnits(0).io.dcache.is128Req
906    }
907
908    // forward
909    loadUnits(i).io.lsq.forward <> lsq.io.forward(i)
910    loadUnits(i).io.sbuffer <> sbuffer.io.forward(i)
911    loadUnits(i).io.ubuffer <> uncache.io.forward(i)
912    loadUnits(i).io.tl_d_channel := dcache.io.lsu.forward_D(i)
913    loadUnits(i).io.forward_mshr <> dcache.io.lsu.forward_mshr(i)
914    // ld-ld violation check
915    loadUnits(i).io.lsq.ldld_nuke_query <> lsq.io.ldu.ldld_nuke_query(i)
916    loadUnits(i).io.lsq.stld_nuke_query <> lsq.io.ldu.stld_nuke_query(i)
917    // loadqueue old ptr
918    loadUnits(i).io.lsq.lqDeqPtr := lsq.io.lqDeqPtr
919    loadUnits(i).io.csrCtrl       <> csrCtrl
920    // dcache refill req
921  // loadUnits(i).io.refill           <> delayedDcacheRefill
922    // dtlb
923    loadUnits(i).io.tlb <> dtlb_reqs.take(LduCnt)(i)
924    if(i == 0 ){ // port 0 assign to vsegmentUnit
925      val vsegmentDtlbReqValid = vSegmentUnit.io.dtlb.req.valid // segment tlb resquest need to delay 1 cycle
926      dtlb_reqs.take(LduCnt)(i).req.valid := loadUnits(i).io.tlb.req.valid || RegNext(vsegmentDtlbReqValid)
927      vSegmentUnit.io.dtlb.req.ready      := dtlb_reqs.take(LduCnt)(i).req.ready
928      dtlb_reqs.take(LduCnt)(i).req.bits  := ParallelPriorityMux(Seq(
929        RegNext(vsegmentDtlbReqValid)     -> RegEnable(vSegmentUnit.io.dtlb.req.bits, vsegmentDtlbReqValid),
930        loadUnits(i).io.tlb.req.valid     -> loadUnits(i).io.tlb.req.bits
931      ))
932    }
933    // pmp
934    loadUnits(i).io.pmp <> pmp_check(i).resp
935    // st-ld violation query
936    val stld_nuke_query = storeUnits.map(_.io.stld_nuke_query) ++ hybridUnits.map(_.io.stu_io.stld_nuke_query)
937    for (s <- 0 until StorePipelineWidth) {
938      loadUnits(i).io.stld_nuke_query(s) := stld_nuke_query(s)
939    }
940    loadUnits(i).io.lq_rep_full <> lsq.io.lq_rep_full
941    // load prefetch train
942    prefetcherOpt.foreach(pf => {
943      // sms will train on all miss load sources
944      val source = loadUnits(i).io.prefetch_train
945      pf.io.ld_in(i).valid := Mux(pf_train_on_hit,
946        source.valid,
947        source.valid && source.bits.isFirstIssue && source.bits.miss
948      )
949      pf.io.ld_in(i).bits := source.bits
950      val loadPc = RegNext(io.ooo_to_mem.issueLda(i).bits.uop.pc) // for s1
951      pf.io.ld_in(i).bits.uop.pc := Mux(
952        loadUnits(i).io.s2_ptr_chasing,
953        RegEnable(loadPc, loadUnits(i).io.s2_prefetch_spec),
954        RegEnable(RegEnable(loadPc, loadUnits(i).io.s1_prefetch_spec), loadUnits(i).io.s2_prefetch_spec)
955      )
956    })
957    l1PrefetcherOpt.foreach(pf => {
958      // stream will train on all load sources
959      val source = loadUnits(i).io.prefetch_train_l1
960      pf.io.ld_in(i).valid := source.valid && source.bits.isFirstIssue
961      pf.io.ld_in(i).bits := source.bits
962    })
963
964    // load to load fast forward: load(i) prefers data(i)
965    val l2l_fwd_out = loadUnits.map(_.io.l2l_fwd_out) ++ hybridUnits.map(_.io.ldu_io.l2l_fwd_out)
966    val fastPriority = (i until LduCnt + HyuCnt) ++ (0 until i)
967    val fastValidVec = fastPriority.map(j => l2l_fwd_out(j).valid)
968    val fastDataVec = fastPriority.map(j => l2l_fwd_out(j).data)
969    val fastErrorVec = fastPriority.map(j => l2l_fwd_out(j).dly_ld_err)
970    val fastMatchVec = fastPriority.map(j => io.ooo_to_mem.loadFastMatch(i)(j))
971    loadUnits(i).io.l2l_fwd_in.valid := VecInit(fastValidVec).asUInt.orR
972    loadUnits(i).io.l2l_fwd_in.data := ParallelPriorityMux(fastValidVec, fastDataVec)
973    loadUnits(i).io.l2l_fwd_in.dly_ld_err := ParallelPriorityMux(fastValidVec, fastErrorVec)
974    val fastMatch = ParallelPriorityMux(fastValidVec, fastMatchVec)
975    loadUnits(i).io.ld_fast_match := fastMatch
976    loadUnits(i).io.ld_fast_imm := io.ooo_to_mem.loadFastImm(i)
977    loadUnits(i).io.ld_fast_fuOpType := io.ooo_to_mem.loadFastFuOpType(i)
978    loadUnits(i).io.replay <> lsq.io.replay(i)
979
980    val l2_hint = RegNext(io.l2_hint)
981
982    // L2 Hint for DCache
983    dcache.io.l2_hint <> l2_hint
984
985    loadUnits(i).io.l2_hint <> l2_hint
986    loadUnits(i).io.tlb_hint.id := dtlbRepeater.io.hint.get.req(i).id
987    loadUnits(i).io.tlb_hint.full := dtlbRepeater.io.hint.get.req(i).full ||
988      tlbreplay_reg(i) || dtlb_ld0_tlbreplay_reg(i)
989
990    // passdown to lsq (load s2)
991    lsq.io.ldu.ldin(i) <> loadUnits(i).io.lsq.ldin
992    if (i == UncacheWBPort) {
993      lsq.io.ldout(i) <> loadUnits(i).io.lsq.uncache
994    } else {
995      lsq.io.ldout(i).ready := true.B
996      loadUnits(i).io.lsq.uncache.valid := false.B
997      loadUnits(i).io.lsq.uncache.bits := DontCare
998    }
999    lsq.io.ld_raw_data(i) <> loadUnits(i).io.lsq.ld_raw_data
1000    lsq.io.ncOut(i) <> loadUnits(i).io.lsq.nc_ldin
1001    lsq.io.l2_hint.valid := l2_hint.valid
1002    lsq.io.l2_hint.bits.sourceId := l2_hint.bits.sourceId
1003    lsq.io.l2_hint.bits.isKeyword := l2_hint.bits.isKeyword
1004
1005    lsq.io.tlb_hint <> dtlbRepeater.io.hint.get
1006
1007    // connect misalignBuffer
1008    loadMisalignBuffer.io.enq(i) <> loadUnits(i).io.misalign_enq
1009
1010    if (i == MisalignWBPort) {
1011      loadUnits(i).io.misalign_ldin  <> loadMisalignBuffer.io.splitLoadReq
1012      loadUnits(i).io.misalign_ldout <> loadMisalignBuffer.io.splitLoadResp
1013    } else {
1014      loadUnits(i).io.misalign_ldin.valid := false.B
1015      loadUnits(i).io.misalign_ldin.bits := DontCare
1016    }
1017
1018    // alter writeback exception info
1019    io.mem_to_ooo.s3_delayed_load_error(i) := loadUnits(i).io.s3_dly_ld_err
1020
1021    // update mem dependency predictor
1022    // io.memPredUpdate(i) := DontCare
1023
1024    // --------------------------------
1025    // Load Triggers
1026    // --------------------------------
1027    loadUnits(i).io.fromCsrTrigger.tdataVec := tdata
1028    loadUnits(i).io.fromCsrTrigger.tEnableVec := tEnable
1029    loadUnits(i).io.fromCsrTrigger.triggerCanRaiseBpExp := triggerCanRaiseBpExp
1030    loadUnits(i).io.fromCsrTrigger.debugMode := debugMode
1031  }
1032
1033  for (i <- 0 until HyuCnt) {
1034    hybridUnits(i).io.redirect <> redirect
1035
1036    // get input from dispatch
1037    hybridUnits(i).io.lsin <> io.ooo_to_mem.issueHya(i)
1038    hybridUnits(i).io.feedback_slow <> io.mem_to_ooo.hyuIqFeedback(i).feedbackSlow
1039    hybridUnits(i).io.feedback_fast <> io.mem_to_ooo.hyuIqFeedback(i).feedbackFast
1040    hybridUnits(i).io.correctMissTrain := correctMissTrain
1041    io.mem_to_ooo.ldCancel.take(HyuCnt)(i) := hybridUnits(i).io.ldu_io.ldCancel
1042    io.mem_to_ooo.wakeup.take(HyuCnt)(i) := hybridUnits(i).io.ldu_io.wakeup
1043
1044    // ------------------------------------
1045    //  Load Port
1046    // ------------------------------------
1047    // fast replay
1048    hybridUnits(i).io.ldu_io.fast_rep_in <> hybridUnits(i).io.ldu_io.fast_rep_out
1049
1050    // get input from dispatch
1051    hybridUnits(i).io.ldu_io.dcache <> dcache.io.lsu.load(LduCnt + i)
1052    hybridUnits(i).io.stu_io.dcache <> dcache.io.lsu.sta(StaCnt + i)
1053
1054    // dcache access
1055    hybridUnits(i).io.ldu_io.lsq.forward <> lsq.io.forward(LduCnt + i)
1056    // forward
1057    hybridUnits(i).io.ldu_io.sbuffer <> sbuffer.io.forward(LduCnt + i)
1058    hybridUnits(i).io.ldu_io.ubuffer <> uncache.io.forward(LduCnt + i)
1059    // hybridUnits(i).io.ldu_io.vec_forward <> vsFlowQueue.io.forward(LduCnt + i)
1060    hybridUnits(i).io.ldu_io.vec_forward := DontCare
1061    hybridUnits(i).io.ldu_io.tl_d_channel := dcache.io.lsu.forward_D(LduCnt + i)
1062    hybridUnits(i).io.ldu_io.forward_mshr <> dcache.io.lsu.forward_mshr(LduCnt + i)
1063    // ld-ld violation check
1064    hybridUnits(i).io.ldu_io.lsq.ldld_nuke_query <> lsq.io.ldu.ldld_nuke_query(LduCnt + i)
1065    hybridUnits(i).io.ldu_io.lsq.stld_nuke_query <> lsq.io.ldu.stld_nuke_query(LduCnt + i)
1066    hybridUnits(i).io.csrCtrl <> csrCtrl
1067    // dcache refill req
1068    hybridUnits(i).io.ldu_io.tlb_hint.id := dtlbRepeater.io.hint.get.req(LduCnt + i).id
1069    hybridUnits(i).io.ldu_io.tlb_hint.full := dtlbRepeater.io.hint.get.req(LduCnt + i).full ||
1070      tlbreplay_reg(LduCnt + i) || dtlb_ld0_tlbreplay_reg(LduCnt + i)
1071
1072    // dtlb
1073    hybridUnits(i).io.tlb <> dtlb_ld.head.requestor(LduCnt + i)
1074    // pmp
1075    hybridUnits(i).io.pmp <> pmp_check.drop(LduCnt)(i).resp
1076    // st-ld violation query
1077    val stld_nuke_query = VecInit(storeUnits.map(_.io.stld_nuke_query) ++ hybridUnits.map(_.io.stu_io.stld_nuke_query))
1078    hybridUnits(i).io.ldu_io.stld_nuke_query := stld_nuke_query
1079    hybridUnits(i).io.ldu_io.lq_rep_full <> lsq.io.lq_rep_full
1080    // load prefetch train
1081    prefetcherOpt.foreach(pf => {
1082      val source = hybridUnits(i).io.prefetch_train
1083      pf.io.ld_in(LduCnt + i).valid := Mux(pf_train_on_hit,
1084        source.valid,
1085        source.valid && source.bits.isFirstIssue && source.bits.miss
1086      )
1087      pf.io.ld_in(LduCnt + i).bits := source.bits
1088      pf.io.ld_in(LduCnt + i).bits.uop.pc := Mux(hybridUnits(i).io.ldu_io.s2_ptr_chasing, io.ooo_to_mem.hybridPc(i), RegNext(io.ooo_to_mem.hybridPc(i)))
1089    })
1090    l1PrefetcherOpt.foreach(pf => {
1091      // stream will train on all load sources
1092      val source = hybridUnits(i).io.prefetch_train_l1
1093      pf.io.ld_in(LduCnt + i).valid := source.valid && source.bits.isFirstIssue &&
1094                                       FuType.isLoad(source.bits.uop.fuType)
1095      pf.io.ld_in(LduCnt + i).bits := source.bits
1096      pf.io.st_in(StaCnt + i).valid := false.B
1097      pf.io.st_in(StaCnt + i).bits := DontCare
1098    })
1099    prefetcherOpt.foreach(pf => {
1100      val source = hybridUnits(i).io.prefetch_train
1101      pf.io.st_in(StaCnt + i).valid := Mux(pf_train_on_hit,
1102        source.valid,
1103        source.valid && source.bits.isFirstIssue && source.bits.miss
1104      ) && FuType.isStore(source.bits.uop.fuType)
1105      pf.io.st_in(StaCnt + i).bits := source.bits
1106      pf.io.st_in(StaCnt + i).bits.uop.pc := RegNext(io.ooo_to_mem.hybridPc(i))
1107    })
1108
1109    // load to load fast forward: load(i) prefers data(i)
1110    val l2l_fwd_out = loadUnits.map(_.io.l2l_fwd_out) ++ hybridUnits.map(_.io.ldu_io.l2l_fwd_out)
1111    val fastPriority = (LduCnt + i until LduCnt + HyuCnt) ++ (0 until LduCnt + i)
1112    val fastValidVec = fastPriority.map(j => l2l_fwd_out(j).valid)
1113    val fastDataVec = fastPriority.map(j => l2l_fwd_out(j).data)
1114    val fastErrorVec = fastPriority.map(j => l2l_fwd_out(j).dly_ld_err)
1115    val fastMatchVec = fastPriority.map(j => io.ooo_to_mem.loadFastMatch(LduCnt + i)(j))
1116    hybridUnits(i).io.ldu_io.l2l_fwd_in.valid := VecInit(fastValidVec).asUInt.orR
1117    hybridUnits(i).io.ldu_io.l2l_fwd_in.data := ParallelPriorityMux(fastValidVec, fastDataVec)
1118    hybridUnits(i).io.ldu_io.l2l_fwd_in.dly_ld_err := ParallelPriorityMux(fastValidVec, fastErrorVec)
1119    val fastMatch = ParallelPriorityMux(fastValidVec, fastMatchVec)
1120    hybridUnits(i).io.ldu_io.ld_fast_match := fastMatch
1121    hybridUnits(i).io.ldu_io.ld_fast_imm := io.ooo_to_mem.loadFastImm(LduCnt + i)
1122    hybridUnits(i).io.ldu_io.ld_fast_fuOpType := io.ooo_to_mem.loadFastFuOpType(LduCnt + i)
1123    hybridUnits(i).io.ldu_io.replay <> lsq.io.replay(LduCnt + i)
1124    hybridUnits(i).io.ldu_io.l2_hint <> io.l2_hint
1125
1126    // uncache
1127    lsq.io.ldout.drop(LduCnt)(i) <> hybridUnits(i).io.ldu_io.lsq.uncache
1128    lsq.io.ld_raw_data.drop(LduCnt)(i) <> hybridUnits(i).io.ldu_io.lsq.ld_raw_data
1129
1130
1131    // passdown to lsq (load s2)
1132    hybridUnits(i).io.ldu_io.lsq.nc_ldin.valid := false.B
1133    hybridUnits(i).io.ldu_io.lsq.nc_ldin.bits := DontCare
1134    lsq.io.ldu.ldin(LduCnt + i) <> hybridUnits(i).io.ldu_io.lsq.ldin
1135    // Lsq to sta unit
1136    lsq.io.sta.storeMaskIn(StaCnt + i) <> hybridUnits(i).io.stu_io.st_mask_out
1137
1138    // Lsq to std unit's rs
1139    lsq.io.std.storeDataIn(StaCnt + i) := stData(StaCnt + i)
1140    lsq.io.std.storeDataIn(StaCnt + i).valid := stData(StaCnt + i).valid && !st_data_atomics(StaCnt + i)
1141    // prefetch
1142    hybridUnits(i).io.stu_io.prefetch_req <> sbuffer.io.store_prefetch(StaCnt + i)
1143
1144    io.mem_to_ooo.s3_delayed_load_error(LduCnt + i) := hybridUnits(i).io.ldu_io.s3_dly_ld_err
1145
1146    // ------------------------------------
1147    //  Store Port
1148    // ------------------------------------
1149    hybridUnits(i).io.stu_io.lsq <> lsq.io.sta.storeAddrIn.takeRight(HyuCnt)(i)
1150    hybridUnits(i).io.stu_io.lsq_replenish <> lsq.io.sta.storeAddrInRe.takeRight(HyuCnt)(i)
1151
1152    lsq.io.sta.storeMaskIn.takeRight(HyuCnt)(i) <> hybridUnits(i).io.stu_io.st_mask_out
1153    io.mem_to_ooo.stIn.takeRight(HyuCnt)(i).valid := hybridUnits(i).io.stu_io.issue.valid
1154    io.mem_to_ooo.stIn.takeRight(HyuCnt)(i).bits := hybridUnits(i).io.stu_io.issue.bits
1155
1156    // ------------------------------------
1157    //  Vector Store Port
1158    // ------------------------------------
1159    hybridUnits(i).io.vec_stu_io.isFirstIssue := true.B
1160
1161    // -------------------------
1162    // Store Triggers
1163    // -------------------------
1164    hybridUnits(i).io.fromCsrTrigger.tdataVec := tdata
1165    hybridUnits(i).io.fromCsrTrigger.tEnableVec := tEnable
1166    hybridUnits(i).io.fromCsrTrigger.triggerCanRaiseBpExp := triggerCanRaiseBpExp
1167    hybridUnits(i).io.fromCsrTrigger.debugMode := debugMode
1168  }
1169
1170  // misalignBuffer
1171  loadMisalignBuffer.io.redirect                <> redirect
1172  loadMisalignBuffer.io.rob.lcommit             := io.ooo_to_mem.lsqio.lcommit
1173  loadMisalignBuffer.io.rob.scommit             := io.ooo_to_mem.lsqio.scommit
1174  loadMisalignBuffer.io.rob.pendingMMIOld       := io.ooo_to_mem.lsqio.pendingMMIOld
1175  loadMisalignBuffer.io.rob.pendingld           := io.ooo_to_mem.lsqio.pendingld
1176  loadMisalignBuffer.io.rob.pendingst           := io.ooo_to_mem.lsqio.pendingst
1177  loadMisalignBuffer.io.rob.pendingVst          := io.ooo_to_mem.lsqio.pendingVst
1178  loadMisalignBuffer.io.rob.commit              := io.ooo_to_mem.lsqio.commit
1179  loadMisalignBuffer.io.rob.pendingPtr          := io.ooo_to_mem.lsqio.pendingPtr
1180  loadMisalignBuffer.io.rob.pendingPtrNext      := io.ooo_to_mem.lsqio.pendingPtrNext
1181
1182  lsq.io.loadMisalignFull                       := loadMisalignBuffer.io.loadMisalignFull
1183  lsq.io.misalignAllowSpec                      := misalign_allow_spec
1184
1185  storeMisalignBuffer.io.redirect               <> redirect
1186  storeMisalignBuffer.io.rob.lcommit            := io.ooo_to_mem.lsqio.lcommit
1187  storeMisalignBuffer.io.rob.scommit            := io.ooo_to_mem.lsqio.scommit
1188  storeMisalignBuffer.io.rob.pendingMMIOld      := io.ooo_to_mem.lsqio.pendingMMIOld
1189  storeMisalignBuffer.io.rob.pendingld          := io.ooo_to_mem.lsqio.pendingld
1190  storeMisalignBuffer.io.rob.pendingst          := io.ooo_to_mem.lsqio.pendingst
1191  storeMisalignBuffer.io.rob.pendingVst         := io.ooo_to_mem.lsqio.pendingVst
1192  storeMisalignBuffer.io.rob.commit             := io.ooo_to_mem.lsqio.commit
1193  storeMisalignBuffer.io.rob.pendingPtr         := io.ooo_to_mem.lsqio.pendingPtr
1194  storeMisalignBuffer.io.rob.pendingPtrNext     := io.ooo_to_mem.lsqio.pendingPtrNext
1195
1196  lsq.io.maControl                              <> storeMisalignBuffer.io.sqControl
1197
1198  lsq.io.cmoOpReq <> dcache.io.cmoOpReq
1199  lsq.io.cmoOpResp <> dcache.io.cmoOpResp
1200
1201  // Prefetcher
1202  val StreamDTLBPortIndex = TlbStartVec(dtlb_ld_idx) + LduCnt + HyuCnt
1203  val PrefetcherDTLBPortIndex = TlbStartVec(dtlb_pf_idx)
1204  val L2toL1DLBPortIndex = TlbStartVec(dtlb_pf_idx) + 1
1205  prefetcherOpt match {
1206  case Some(pf) =>
1207    dtlb_reqs(PrefetcherDTLBPortIndex) <> pf.io.tlb_req
1208    pf.io.pmp_resp := pmp_check(PrefetcherDTLBPortIndex).resp
1209  case None =>
1210    dtlb_reqs(PrefetcherDTLBPortIndex) := DontCare
1211    dtlb_reqs(PrefetcherDTLBPortIndex).req.valid := false.B
1212    dtlb_reqs(PrefetcherDTLBPortIndex).resp.ready := true.B
1213  }
1214  l1PrefetcherOpt match {
1215    case Some(pf) =>
1216      dtlb_reqs(StreamDTLBPortIndex) <> pf.io.tlb_req
1217      pf.io.pmp_resp := pmp_check(StreamDTLBPortIndex).resp
1218    case None =>
1219        dtlb_reqs(StreamDTLBPortIndex) := DontCare
1220        dtlb_reqs(StreamDTLBPortIndex).req.valid := false.B
1221        dtlb_reqs(StreamDTLBPortIndex).resp.ready := true.B
1222  }
1223  dtlb_reqs(L2toL1DLBPortIndex) <> io.l2_tlb_req
1224  dtlb_reqs(L2toL1DLBPortIndex).resp.ready := true.B
1225  io.l2_pmp_resp := pmp_check(L2toL1DLBPortIndex).resp
1226
1227  // StoreUnit
1228  for (i <- 0 until StdCnt) {
1229    stdExeUnits(i).io.flush <> redirect
1230    stdExeUnits(i).io.in.valid := io.ooo_to_mem.issueStd(i).valid
1231    io.ooo_to_mem.issueStd(i).ready := stdExeUnits(i).io.in.ready
1232    stdExeUnits(i).io.in.bits := io.ooo_to_mem.issueStd(i).bits
1233  }
1234
1235  for (i <- 0 until StaCnt) {
1236    val stu = storeUnits(i)
1237
1238    stu.io.redirect      <> redirect
1239    stu.io.csrCtrl       <> csrCtrl
1240    stu.io.dcache        <> dcache.io.lsu.sta(i)
1241    stu.io.feedback_slow <> io.mem_to_ooo.staIqFeedback(i).feedbackSlow
1242    stu.io.stin         <> io.ooo_to_mem.issueSta(i)
1243    stu.io.lsq          <> lsq.io.sta.storeAddrIn(i)
1244    stu.io.lsq_replenish <> lsq.io.sta.storeAddrInRe(i)
1245    // dtlb
1246    stu.io.tlb          <> dtlb_st.head.requestor(i)
1247    stu.io.pmp          <> pmp_check(LduCnt + HyuCnt + 1 + i).resp
1248
1249    // -------------------------
1250    // Store Triggers
1251    // -------------------------
1252    stu.io.fromCsrTrigger.tdataVec := tdata
1253    stu.io.fromCsrTrigger.tEnableVec := tEnable
1254    stu.io.fromCsrTrigger.triggerCanRaiseBpExp := triggerCanRaiseBpExp
1255    stu.io.fromCsrTrigger.debugMode := debugMode
1256
1257    // prefetch
1258    stu.io.prefetch_req <> sbuffer.io.store_prefetch(i)
1259
1260    // store unit does not need fast feedback
1261    io.mem_to_ooo.staIqFeedback(i).feedbackFast := DontCare
1262
1263    // Lsq to sta unit
1264    lsq.io.sta.storeMaskIn(i) <> stu.io.st_mask_out
1265
1266    // connect misalignBuffer
1267    storeMisalignBuffer.io.enq(i) <> stu.io.misalign_enq
1268
1269    if (i == 0) {
1270      stu.io.misalign_stin  <> storeMisalignBuffer.io.splitStoreReq
1271      stu.io.misalign_stout <> storeMisalignBuffer.io.splitStoreResp
1272    } else {
1273      stu.io.misalign_stin.valid := false.B
1274      stu.io.misalign_stin.bits := DontCare
1275    }
1276
1277    // Lsq to std unit's rs
1278    if (i < VstuCnt){
1279      when (vsSplit(i).io.vstd.get.valid) {
1280        lsq.io.std.storeDataIn(i).valid := true.B
1281        lsq.io.std.storeDataIn(i).bits := vsSplit(i).io.vstd.get.bits
1282        stData(i).ready := false.B
1283      }.otherwise {
1284        lsq.io.std.storeDataIn(i).valid := stData(i).valid && !st_data_atomics(i)
1285        lsq.io.std.storeDataIn(i).bits.uop := stData(i).bits.uop
1286        lsq.io.std.storeDataIn(i).bits.data := stData(i).bits.data
1287        lsq.io.std.storeDataIn(i).bits.mask.map(_ := 0.U)
1288        lsq.io.std.storeDataIn(i).bits.vdIdx.map(_ := 0.U)
1289        lsq.io.std.storeDataIn(i).bits.vdIdxInField.map(_ := 0.U)
1290        stData(i).ready := true.B
1291      }
1292    } else {
1293        lsq.io.std.storeDataIn(i).valid := stData(i).valid && !st_data_atomics(i)
1294        lsq.io.std.storeDataIn(i).bits.uop := stData(i).bits.uop
1295        lsq.io.std.storeDataIn(i).bits.data := stData(i).bits.data
1296        lsq.io.std.storeDataIn(i).bits.mask.map(_ := 0.U)
1297        lsq.io.std.storeDataIn(i).bits.vdIdx.map(_ := 0.U)
1298        lsq.io.std.storeDataIn(i).bits.vdIdxInField.map(_ := 0.U)
1299        stData(i).ready := true.B
1300    }
1301    lsq.io.std.storeDataIn.map(_.bits.debug := 0.U.asTypeOf(new DebugBundle))
1302    lsq.io.std.storeDataIn.foreach(_.bits.isFromLoadUnit := DontCare)
1303
1304
1305    // store prefetch train
1306    l1PrefetcherOpt.foreach(pf => {
1307      // stream will train on all load sources
1308      pf.io.st_in(i).valid := false.B
1309      pf.io.st_in(i).bits := DontCare
1310    })
1311
1312    prefetcherOpt.foreach(pf => {
1313      pf.io.st_in(i).valid := Mux(pf_train_on_hit,
1314        stu.io.prefetch_train.valid,
1315        stu.io.prefetch_train.valid && stu.io.prefetch_train.bits.isFirstIssue && (
1316          stu.io.prefetch_train.bits.miss
1317          )
1318      )
1319      pf.io.st_in(i).bits := stu.io.prefetch_train.bits
1320      pf.io.st_in(i).bits.uop.pc := RegEnable(RegEnable(io.ooo_to_mem.storePc(i), stu.io.s1_prefetch_spec), stu.io.s2_prefetch_spec)
1321    })
1322
1323    // 1. sync issue info to store set LFST
1324    // 2. when store issue, broadcast issued sqPtr to wake up the following insts
1325    // io.stIn(i).valid := io.issue(exuParameters.LduCnt + i).valid
1326    // io.stIn(i).bits := io.issue(exuParameters.LduCnt + i).bits
1327    io.mem_to_ooo.stIn(i).valid := stu.io.issue.valid
1328    io.mem_to_ooo.stIn(i).bits := stu.io.issue.bits
1329
1330    stu.io.stout.ready := true.B
1331
1332    // vector
1333    if (i < VstuCnt) {
1334      stu.io.vecstin <> vsSplit(i).io.out
1335      // vsFlowQueue.io.pipeFeedback(i) <> stu.io.vec_feedback_slow // need connect
1336    } else {
1337      stu.io.vecstin.valid := false.B
1338      stu.io.vecstin.bits := DontCare
1339      stu.io.vecstout.ready := false.B
1340    }
1341    stu.io.vec_isFirstIssue := true.B // TODO
1342  }
1343
1344  val sqOtherStout = WireInit(0.U.asTypeOf(DecoupledIO(new MemExuOutput)))
1345  sqOtherStout.valid := lsq.io.mmioStout.valid || lsq.io.cboZeroStout.valid
1346  sqOtherStout.bits  := Mux(lsq.io.cboZeroStout.valid, lsq.io.cboZeroStout.bits, lsq.io.mmioStout.bits)
1347  assert(!(lsq.io.mmioStout.valid && lsq.io.cboZeroStout.valid), "Cannot writeback to mmio and cboZero at the same time.")
1348
1349  // Store writeback by StoreQueue:
1350  //   1. cbo Zero
1351  //   2. mmio
1352  // Currently, the two should not be present at the same time, so simply make cbo zero a higher priority.
1353  val otherStout = WireInit(0.U.asTypeOf(lsq.io.mmioStout))
1354  NewPipelineConnect(
1355    sqOtherStout, otherStout, otherStout.fire,
1356    false.B,
1357    Option("otherStoutConnect")
1358  )
1359  otherStout.ready := false.B
1360  when (otherStout.valid && !storeUnits(0).io.stout.valid) {
1361    stOut(0).valid := true.B
1362    stOut(0).bits  := otherStout.bits
1363    otherStout.ready := true.B
1364  }
1365  lsq.io.mmioStout.ready := sqOtherStout.ready
1366  lsq.io.cboZeroStout.ready := sqOtherStout.ready
1367
1368  // vec mmio writeback
1369  lsq.io.vecmmioStout.ready := false.B
1370
1371  // miss align buffer will overwrite stOut(0)
1372  val storeMisalignCanWriteBack = !otherStout.valid && !storeUnits(0).io.stout.valid && !storeUnits(0).io.vecstout.valid
1373  storeMisalignBuffer.io.writeBack.ready := storeMisalignCanWriteBack
1374  storeMisalignBuffer.io.storeOutValid := storeUnits(0).io.stout.valid
1375  storeMisalignBuffer.io.storeVecOutValid := storeUnits(0).io.vecstout.valid
1376  when (storeMisalignBuffer.io.writeBack.valid && storeMisalignCanWriteBack) {
1377    stOut(0).valid := true.B
1378    stOut(0).bits  := storeMisalignBuffer.io.writeBack.bits
1379  }
1380
1381  // Uncache
1382  uncache.io.enableOutstanding := io.ooo_to_mem.csrCtrl.uncache_write_outstanding_enable
1383  uncache.io.hartId := io.hartId
1384  lsq.io.uncacheOutstanding := io.ooo_to_mem.csrCtrl.uncache_write_outstanding_enable
1385
1386  // Lsq
1387  io.mem_to_ooo.lsqio.mmio       := lsq.io.rob.mmio
1388  io.mem_to_ooo.lsqio.uop        := lsq.io.rob.uop
1389  lsq.io.rob.lcommit             := io.ooo_to_mem.lsqio.lcommit
1390  lsq.io.rob.scommit             := io.ooo_to_mem.lsqio.scommit
1391  lsq.io.rob.pendingMMIOld       := io.ooo_to_mem.lsqio.pendingMMIOld
1392  lsq.io.rob.pendingld           := io.ooo_to_mem.lsqio.pendingld
1393  lsq.io.rob.pendingst           := io.ooo_to_mem.lsqio.pendingst
1394  lsq.io.rob.pendingVst          := io.ooo_to_mem.lsqio.pendingVst
1395  lsq.io.rob.commit              := io.ooo_to_mem.lsqio.commit
1396  lsq.io.rob.pendingPtr          := io.ooo_to_mem.lsqio.pendingPtr
1397  lsq.io.rob.pendingPtrNext      := io.ooo_to_mem.lsqio.pendingPtrNext
1398
1399  //  lsq.io.rob            <> io.lsqio.rob
1400  lsq.io.enq            <> io.ooo_to_mem.enqLsq
1401  lsq.io.brqRedirect    <> redirect
1402
1403  //  violation rollback
1404  def selectOldestRedirect(xs: Seq[Valid[Redirect]]): Vec[Bool] = {
1405    val compareVec = (0 until xs.length).map(i => (0 until i).map(j => isAfter(xs(j).bits.robIdx, xs(i).bits.robIdx)))
1406    val resultOnehot = VecInit((0 until xs.length).map(i => Cat((0 until xs.length).map(j =>
1407      (if (j < i) !xs(j).valid || compareVec(i)(j)
1408      else if (j == i) xs(i).valid
1409      else !xs(j).valid || !compareVec(j)(i))
1410    )).andR))
1411    resultOnehot
1412  }
1413  val allRedirect = loadUnits.map(_.io.rollback) ++ hybridUnits.map(_.io.ldu_io.rollback) ++ lsq.io.nack_rollback ++ lsq.io.nuke_rollback
1414  val oldestOneHot = selectOldestRedirect(allRedirect)
1415  val oldestRedirect = WireDefault(Mux1H(oldestOneHot, allRedirect))
1416  // memory replay would not cause IAF/IPF/IGPF
1417  oldestRedirect.bits.cfiUpdate.backendIAF := false.B
1418  oldestRedirect.bits.cfiUpdate.backendIPF := false.B
1419  oldestRedirect.bits.cfiUpdate.backendIGPF := false.B
1420  io.mem_to_ooo.memoryViolation := oldestRedirect
1421  io.mem_to_ooo.lsqio.lqCanAccept  := lsq.io.lqCanAccept
1422  io.mem_to_ooo.lsqio.sqCanAccept  := lsq.io.sqCanAccept
1423
1424  // lsq.io.uncache        <> uncache.io.lsq
1425  val s_idle :: s_scalar_uncache :: s_vector_uncache :: Nil = Enum(3)
1426  val uncacheState = RegInit(s_idle)
1427  val uncacheReq = Wire(Decoupled(new UncacheWordReq))
1428  val uncacheIdResp = uncache.io.lsq.idResp
1429  val uncacheResp = Wire(Decoupled(new UncacheWordResp))
1430
1431  uncacheReq.bits := DontCare
1432  uncacheReq.valid := false.B
1433  uncacheReq.ready := false.B
1434  uncacheResp.bits := DontCare
1435  uncacheResp.valid := false.B
1436  uncacheResp.ready := false.B
1437  lsq.io.uncache.req.ready := false.B
1438  lsq.io.uncache.idResp.valid := false.B
1439  lsq.io.uncache.idResp.bits := DontCare
1440  lsq.io.uncache.resp.valid := false.B
1441  lsq.io.uncache.resp.bits := DontCare
1442
1443  switch (uncacheState) {
1444    is (s_idle) {
1445      when (uncacheReq.fire) {
1446        when (lsq.io.uncache.req.valid) {
1447          when (!lsq.io.uncache.req.bits.nc || !io.ooo_to_mem.csrCtrl.uncache_write_outstanding_enable) {
1448            uncacheState := s_scalar_uncache
1449          }
1450        }.otherwise {
1451          // val isStore = vsFlowQueue.io.uncache.req.bits.cmd === MemoryOpConstants.M_XWR
1452          when (!io.ooo_to_mem.csrCtrl.uncache_write_outstanding_enable) {
1453            uncacheState := s_vector_uncache
1454          }
1455        }
1456      }
1457    }
1458
1459    is (s_scalar_uncache) {
1460      when (uncacheResp.fire) {
1461        uncacheState := s_idle
1462      }
1463    }
1464
1465    is (s_vector_uncache) {
1466      when (uncacheResp.fire) {
1467        uncacheState := s_idle
1468      }
1469    }
1470  }
1471
1472  when (lsq.io.uncache.req.valid) {
1473    uncacheReq <> lsq.io.uncache.req
1474  }
1475  when (io.ooo_to_mem.csrCtrl.uncache_write_outstanding_enable) {
1476    lsq.io.uncache.resp <> uncacheResp
1477    lsq.io.uncache.idResp <> uncacheIdResp
1478  }.otherwise {
1479    when (uncacheState === s_scalar_uncache) {
1480      lsq.io.uncache.resp <> uncacheResp
1481      lsq.io.uncache.idResp <> uncacheIdResp
1482    }
1483  }
1484  // delay dcache refill for 1 cycle for better timing
1485  AddPipelineReg(uncacheReq, uncache.io.lsq.req, false.B)
1486  AddPipelineReg(uncache.io.lsq.resp, uncacheResp, false.B)
1487
1488  //lsq.io.refill         := delayedDcacheRefill
1489  lsq.io.release        := dcache.io.lsu.release
1490  lsq.io.lqCancelCnt <> io.mem_to_ooo.lqCancelCnt
1491  lsq.io.sqCancelCnt <> io.mem_to_ooo.sqCancelCnt
1492  lsq.io.lqDeq <> io.mem_to_ooo.lqDeq
1493  lsq.io.sqDeq <> io.mem_to_ooo.sqDeq
1494  // Todo: assign these
1495  io.mem_to_ooo.sqDeqPtr := lsq.io.sqDeqPtr
1496  io.mem_to_ooo.lqDeqPtr := lsq.io.lqDeqPtr
1497  lsq.io.tl_d_channel <> dcache.io.lsu.tl_d_channel
1498
1499  // LSQ to store buffer
1500  lsq.io.sbuffer        <> sbuffer.io.in
1501  sbuffer.io.in(0).valid := lsq.io.sbuffer(0).valid || vSegmentUnit.io.sbuffer.valid
1502  sbuffer.io.in(0).bits  := Mux1H(Seq(
1503    vSegmentUnit.io.sbuffer.valid -> vSegmentUnit.io.sbuffer.bits,
1504    lsq.io.sbuffer(0).valid       -> lsq.io.sbuffer(0).bits
1505  ))
1506  vSegmentUnit.io.sbuffer.ready := sbuffer.io.in(0).ready
1507  lsq.io.sqEmpty        <> sbuffer.io.sqempty
1508  dcache.io.force_write := lsq.io.force_write
1509
1510  // Initialize when unenabled difftest.
1511  sbuffer.io.vecDifftestInfo      := DontCare
1512  lsq.io.sbufferVecDifftestInfo   := DontCare
1513  vSegmentUnit.io.vecDifftestInfo := DontCare
1514  if (env.EnableDifftest) {
1515    sbuffer.io.vecDifftestInfo .zipWithIndex.map{ case (sbufferPort, index) =>
1516      if (index == 0) {
1517        val vSegmentDifftestValid = vSegmentUnit.io.vecDifftestInfo.valid
1518        sbufferPort.valid := Mux(vSegmentDifftestValid, vSegmentUnit.io.vecDifftestInfo.valid, lsq.io.sbufferVecDifftestInfo(0).valid)
1519        sbufferPort.bits  := Mux(vSegmentDifftestValid, vSegmentUnit.io.vecDifftestInfo.bits, lsq.io.sbufferVecDifftestInfo(0).bits)
1520
1521        vSegmentUnit.io.vecDifftestInfo.ready  := sbufferPort.ready
1522        lsq.io.sbufferVecDifftestInfo(0).ready := sbufferPort.ready
1523      } else {
1524         sbufferPort <> lsq.io.sbufferVecDifftestInfo(index)
1525      }
1526    }
1527  }
1528
1529  // lsq.io.vecStoreRetire <> vsFlowQueue.io.sqRelease
1530  // lsq.io.vecWriteback.valid := vlWrapper.io.uopWriteback.fire &&
1531  //   vlWrapper.io.uopWriteback.bits.uop.vpu.lastUop
1532  // lsq.io.vecWriteback.bits := vlWrapper.io.uopWriteback.bits
1533
1534  // vector
1535  val vLoadCanAccept  = (0 until VlduCnt).map(i =>
1536    vlSplit(i).io.in.ready && VlduType.isVecLd(io.ooo_to_mem.issueVldu(i).bits.uop.fuOpType)
1537  )
1538  val vStoreCanAccept = (0 until VstuCnt).map(i =>
1539    vsSplit(i).io.in.ready && VstuType.isVecSt(io.ooo_to_mem.issueVldu(i).bits.uop.fuOpType)
1540  )
1541  val isSegment     = io.ooo_to_mem.issueVldu.head.valid && isVsegls(io.ooo_to_mem.issueVldu.head.bits.uop.fuType)
1542  val isFixVlUop    = io.ooo_to_mem.issueVldu.map{x =>
1543    x.bits.uop.vpu.isVleff && x.bits.uop.vpu.lastUop && x.valid
1544  }
1545
1546  // init port
1547  /**
1548   * TODO: splited vsMergebuffer maybe remove, if one RS can accept two feedback, or don't need RS replay uop
1549   * for now:
1550   *  RS0 -> VsSplit0 -> stu0 -> vsMergebuffer0 -> feedback -> RS0
1551   *  RS1 -> VsSplit1 -> stu1 -> vsMergebuffer1 -> feedback -> RS1
1552   *
1553   * vector load don't need feedback
1554   *
1555   *  RS0 -> VlSplit0  -> ldu0 -> |
1556   *  RS1 -> VlSplit1  -> ldu1 -> |  -> vlMergebuffer
1557   *        replayIO   -> ldu3 -> |
1558   * */
1559  (0 until VstuCnt).foreach{i =>
1560    vsMergeBuffer(i).io.fromPipeline := DontCare
1561    vsMergeBuffer(i).io.fromSplit := DontCare
1562
1563    vsMergeBuffer(i).io.fromMisalignBuffer.get.flush := storeMisalignBuffer.io.toVecStoreMergeBuffer(i).flush
1564    vsMergeBuffer(i).io.fromMisalignBuffer.get.mbIndex := storeMisalignBuffer.io.toVecStoreMergeBuffer(i).mbIndex
1565  }
1566
1567  (0 until VstuCnt).foreach{i =>
1568    vsSplit(i).io.redirect <> redirect
1569    vsSplit(i).io.in <> io.ooo_to_mem.issueVldu(i)
1570    vsSplit(i).io.in.valid := io.ooo_to_mem.issueVldu(i).valid &&
1571                              vStoreCanAccept(i) && !isSegment
1572    vsSplit(i).io.toMergeBuffer <> vsMergeBuffer(i).io.fromSplit.head
1573    NewPipelineConnect(
1574      vsSplit(i).io.out, storeUnits(i).io.vecstin, storeUnits(i).io.vecstin.fire,
1575      Mux(vsSplit(i).io.out.fire, vsSplit(i).io.out.bits.uop.robIdx.needFlush(io.redirect), storeUnits(i).io.vecstin.bits.uop.robIdx.needFlush(io.redirect)),
1576      Option("VsSplitConnectStu")
1577    )
1578    vsSplit(i).io.vstd.get := DontCare // Todo: Discuss how to pass vector store data
1579
1580    vsSplit(i).io.vstdMisalign.get.storeMisalignBufferEmpty := !storeMisalignBuffer.io.full
1581    vsSplit(i).io.vstdMisalign.get.storePipeEmpty := !storeUnits(i).io.s0_s1_valid
1582
1583  }
1584  (0 until VlduCnt).foreach{i =>
1585    vlSplit(i).io.redirect <> redirect
1586    vlSplit(i).io.in <> io.ooo_to_mem.issueVldu(i)
1587    vlSplit(i).io.in.valid := io.ooo_to_mem.issueVldu(i).valid &&
1588                              vLoadCanAccept(i) && !isSegment && !isFixVlUop(i)
1589    vlSplit(i).io.toMergeBuffer <> vlMergeBuffer.io.fromSplit(i)
1590    vlSplit(i).io.threshold.get.valid := vlMergeBuffer.io.toSplit.get.threshold
1591    vlSplit(i).io.threshold.get.bits  := lsq.io.lqDeqPtr
1592    NewPipelineConnect(
1593      vlSplit(i).io.out, loadUnits(i).io.vecldin, loadUnits(i).io.vecldin.fire,
1594      Mux(vlSplit(i).io.out.fire, vlSplit(i).io.out.bits.uop.robIdx.needFlush(io.redirect), loadUnits(i).io.vecldin.bits.uop.robIdx.needFlush(io.redirect)),
1595      Option("VlSplitConnectLdu")
1596    )
1597
1598    //Subsequent instrction will be blocked
1599    vfofBuffer.io.in(i).valid := io.ooo_to_mem.issueVldu(i).valid
1600    vfofBuffer.io.in(i).bits  := io.ooo_to_mem.issueVldu(i).bits
1601  }
1602  (0 until LduCnt).foreach{i=>
1603    loadUnits(i).io.vecldout.ready         := vlMergeBuffer.io.fromPipeline(i).ready
1604    loadMisalignBuffer.io.vecWriteBack.ready := true.B
1605
1606    if (i == MisalignWBPort) {
1607      when(loadUnits(i).io.vecldout.valid) {
1608        vlMergeBuffer.io.fromPipeline(i).valid := loadUnits(i).io.vecldout.valid
1609        vlMergeBuffer.io.fromPipeline(i).bits  := loadUnits(i).io.vecldout.bits
1610      } .otherwise {
1611        vlMergeBuffer.io.fromPipeline(i).valid   := loadMisalignBuffer.io.vecWriteBack.valid
1612        vlMergeBuffer.io.fromPipeline(i).bits    := loadMisalignBuffer.io.vecWriteBack.bits
1613      }
1614    } else {
1615      vlMergeBuffer.io.fromPipeline(i).valid := loadUnits(i).io.vecldout.valid
1616      vlMergeBuffer.io.fromPipeline(i).bits  := loadUnits(i).io.vecldout.bits
1617    }
1618  }
1619
1620  (0 until StaCnt).foreach{i=>
1621    if(i < VstuCnt){
1622      storeUnits(i).io.vecstout.ready := true.B
1623      storeMisalignBuffer.io.vecWriteBack(i).ready := vsMergeBuffer(i).io.fromPipeline.head.ready
1624
1625      when(storeUnits(i).io.vecstout.valid) {
1626        vsMergeBuffer(i).io.fromPipeline.head.valid := storeUnits(i).io.vecstout.valid
1627        vsMergeBuffer(i).io.fromPipeline.head.bits  := storeUnits(i).io.vecstout.bits
1628      } .otherwise {
1629        vsMergeBuffer(i).io.fromPipeline.head.valid   := storeMisalignBuffer.io.vecWriteBack(i).valid
1630        vsMergeBuffer(i).io.fromPipeline.head.bits    := storeMisalignBuffer.io.vecWriteBack(i).bits
1631      }
1632    }
1633  }
1634
1635  (0 until VlduCnt).foreach{i=>
1636    io.ooo_to_mem.issueVldu(i).ready := vLoadCanAccept(i) || vStoreCanAccept(i)
1637  }
1638
1639  vlMergeBuffer.io.redirect <> redirect
1640  vsMergeBuffer.map(_.io.redirect <> redirect)
1641  (0 until VlduCnt).foreach{i=>
1642    vlMergeBuffer.io.toLsq(i) <> lsq.io.ldvecFeedback(i)
1643  }
1644  (0 until VstuCnt).foreach{i=>
1645    vsMergeBuffer(i).io.toLsq.head <> lsq.io.stvecFeedback(i)
1646  }
1647
1648  (0 until VlduCnt).foreach{i=>
1649    // send to RS
1650    vlMergeBuffer.io.feedback(i) <> io.mem_to_ooo.vlduIqFeedback(i).feedbackSlow
1651    io.mem_to_ooo.vlduIqFeedback(i).feedbackFast := DontCare
1652  }
1653  (0 until VstuCnt).foreach{i =>
1654    // send to RS
1655    if (i == 0){
1656      io.mem_to_ooo.vstuIqFeedback(i).feedbackSlow.valid := vsMergeBuffer(i).io.feedback.head.valid || vSegmentUnit.io.feedback.valid
1657      io.mem_to_ooo.vstuIqFeedback(i).feedbackSlow.bits := Mux1H(Seq(
1658        vSegmentUnit.io.feedback.valid -> vSegmentUnit.io.feedback.bits,
1659        vsMergeBuffer(i).io.feedback.head.valid ->  vsMergeBuffer(i).io.feedback.head.bits
1660      ))
1661      io.mem_to_ooo.vstuIqFeedback(i).feedbackFast := DontCare
1662    } else {
1663      vsMergeBuffer(i).io.feedback.head <> io.mem_to_ooo.vstuIqFeedback(i).feedbackSlow
1664      io.mem_to_ooo.vstuIqFeedback(i).feedbackFast := DontCare
1665    }
1666  }
1667
1668  (0 until VlduCnt).foreach{i=>
1669    if (i == 0){ // for segmentUnit, segmentUnit use port0 writeback
1670      io.mem_to_ooo.writebackVldu(i).valid := vlMergeBuffer.io.uopWriteback(i).valid || vsMergeBuffer(i).io.uopWriteback.head.valid || vSegmentUnit.io.uopwriteback.valid
1671      io.mem_to_ooo.writebackVldu(i).bits := PriorityMux(Seq(
1672        vSegmentUnit.io.uopwriteback.valid          -> vSegmentUnit.io.uopwriteback.bits,
1673        vlMergeBuffer.io.uopWriteback(i).valid      -> vlMergeBuffer.io.uopWriteback(i).bits,
1674        vsMergeBuffer(i).io.uopWriteback.head.valid -> vsMergeBuffer(i).io.uopWriteback.head.bits,
1675      ))
1676      vlMergeBuffer.io.uopWriteback(i).ready := io.mem_to_ooo.writebackVldu(i).ready && !vSegmentUnit.io.uopwriteback.valid
1677      vsMergeBuffer(i).io.uopWriteback.head.ready := io.mem_to_ooo.writebackVldu(i).ready && !vlMergeBuffer.io.uopWriteback(i).valid && !vSegmentUnit.io.uopwriteback.valid
1678      vSegmentUnit.io.uopwriteback.ready := io.mem_to_ooo.writebackVldu(i).ready
1679    } else if (i == 1) {
1680      io.mem_to_ooo.writebackVldu(i).valid := vlMergeBuffer.io.uopWriteback(i).valid || vsMergeBuffer(i).io.uopWriteback.head.valid || vfofBuffer.io.uopWriteback.valid
1681      io.mem_to_ooo.writebackVldu(i).bits := PriorityMux(Seq(
1682        vfofBuffer.io.uopWriteback.valid            -> vfofBuffer.io.uopWriteback.bits,
1683        vlMergeBuffer.io.uopWriteback(i).valid      -> vlMergeBuffer.io.uopWriteback(i).bits,
1684        vsMergeBuffer(i).io.uopWriteback.head.valid -> vsMergeBuffer(i).io.uopWriteback.head.bits,
1685      ))
1686      vlMergeBuffer.io.uopWriteback(i).ready := io.mem_to_ooo.writebackVldu(i).ready && !vfofBuffer.io.uopWriteback.valid
1687      vsMergeBuffer(i).io.uopWriteback.head.ready := io.mem_to_ooo.writebackVldu(i).ready && !vlMergeBuffer.io.uopWriteback(i).valid && !vfofBuffer.io.uopWriteback.valid
1688      vfofBuffer.io.uopWriteback.ready := io.mem_to_ooo.writebackVldu(i).ready
1689    } else {
1690      io.mem_to_ooo.writebackVldu(i).valid := vlMergeBuffer.io.uopWriteback(i).valid || vsMergeBuffer(i).io.uopWriteback.head.valid
1691      io.mem_to_ooo.writebackVldu(i).bits := PriorityMux(Seq(
1692        vlMergeBuffer.io.uopWriteback(i).valid -> vlMergeBuffer.io.uopWriteback(i).bits,
1693        vsMergeBuffer(i).io.uopWriteback.head.valid -> vsMergeBuffer(i).io.uopWriteback.head.bits,
1694      ))
1695      vlMergeBuffer.io.uopWriteback(i).ready := io.mem_to_ooo.writebackVldu(i).ready
1696      vsMergeBuffer(i).io.uopWriteback.head.ready := io.mem_to_ooo.writebackVldu(i).ready && !vlMergeBuffer.io.uopWriteback(i).valid
1697    }
1698
1699    vfofBuffer.io.mergeUopWriteback(i).valid := vlMergeBuffer.io.uopWriteback(i).valid
1700    vfofBuffer.io.mergeUopWriteback(i).bits  := vlMergeBuffer.io.uopWriteback(i).bits
1701  }
1702
1703
1704  vfofBuffer.io.redirect <> redirect
1705
1706  // Sbuffer
1707  sbuffer.io.csrCtrl    <> csrCtrl
1708  sbuffer.io.dcache     <> dcache.io.lsu.store
1709  sbuffer.io.memSetPattenDetected := dcache.io.memSetPattenDetected
1710  sbuffer.io.force_write <> lsq.io.force_write
1711  // flush sbuffer
1712  val cmoFlush = lsq.io.flushSbuffer.valid
1713  val fenceFlush = io.ooo_to_mem.flushSb
1714  val atomicsFlush = atomicsUnit.io.flush_sbuffer.valid || vSegmentUnit.io.flush_sbuffer.valid
1715  val stIsEmpty = sbuffer.io.flush.empty && uncache.io.flush.empty
1716  io.mem_to_ooo.sbIsEmpty := RegNext(stIsEmpty)
1717
1718  // if both of them tries to flush sbuffer at the same time
1719  // something must have gone wrong
1720  assert(!(fenceFlush && atomicsFlush && cmoFlush))
1721  sbuffer.io.flush.valid := RegNext(fenceFlush || atomicsFlush || cmoFlush)
1722  uncache.io.flush.valid := sbuffer.io.flush.valid
1723
1724  // AtomicsUnit: AtomicsUnit will override other control signials,
1725  // as atomics insts (LR/SC/AMO) will block the pipeline
1726  val s_normal +: s_atomics = Enum(StaCnt + HyuCnt + 1)
1727  val state = RegInit(s_normal)
1728
1729  val st_atomics = Seq.tabulate(StaCnt)(i =>
1730    io.ooo_to_mem.issueSta(i).valid && FuType.storeIsAMO((io.ooo_to_mem.issueSta(i).bits.uop.fuType))
1731  ) ++ Seq.tabulate(HyuCnt)(i =>
1732    io.ooo_to_mem.issueHya(i).valid && FuType.storeIsAMO((io.ooo_to_mem.issueHya(i).bits.uop.fuType))
1733  )
1734
1735  for (i <- 0 until StaCnt) when(st_atomics(i)) {
1736    io.ooo_to_mem.issueSta(i).ready := atomicsUnit.io.in.ready
1737    storeUnits(i).io.stin.valid := false.B
1738
1739    state := s_atomics(i)
1740  }
1741  for (i <- 0 until HyuCnt) when(st_atomics(StaCnt + i)) {
1742    io.ooo_to_mem.issueHya(i).ready := atomicsUnit.io.in.ready
1743    hybridUnits(i).io.lsin.valid := false.B
1744
1745    state := s_atomics(StaCnt + i)
1746    assert(!st_atomics.zipWithIndex.filterNot(_._2 == StaCnt + i).unzip._1.reduce(_ || _))
1747  }
1748  when (atomicsUnit.io.out.valid) {
1749    state := s_normal
1750  }
1751
1752  atomicsUnit.io.in.valid := st_atomics.reduce(_ || _)
1753  atomicsUnit.io.in.bits  := Mux1H(Seq.tabulate(StaCnt)(i =>
1754    st_atomics(i) -> io.ooo_to_mem.issueSta(i).bits) ++
1755    Seq.tabulate(HyuCnt)(i => st_atomics(StaCnt+i) -> io.ooo_to_mem.issueHya(i).bits))
1756  atomicsUnit.io.storeDataIn.zipWithIndex.foreach { case (stdin, i) =>
1757    stdin.valid := st_data_atomics(i)
1758    stdin.bits := stData(i).bits
1759  }
1760  atomicsUnit.io.redirect <> redirect
1761
1762  // TODO: complete amo's pmp support
1763  val amoTlb = dtlb_ld(0).requestor(0)
1764  atomicsUnit.io.dtlb.resp.valid := false.B
1765  atomicsUnit.io.dtlb.resp.bits  := DontCare
1766  atomicsUnit.io.dtlb.req.ready  := amoTlb.req.ready
1767  atomicsUnit.io.pmpResp := pmp_check(0).resp
1768
1769  atomicsUnit.io.dcache <> dcache.io.lsu.atomics
1770  atomicsUnit.io.flush_sbuffer.empty := stIsEmpty
1771
1772  atomicsUnit.io.csrCtrl := csrCtrl
1773
1774  // for atomicsUnit, it uses loadUnit(0)'s TLB port
1775
1776  when (state =/= s_normal) {
1777    // use store wb port instead of load
1778    loadUnits(0).io.ldout.ready := false.B
1779    // use load_0's TLB
1780    atomicsUnit.io.dtlb <> amoTlb
1781
1782    // hw prefetch should be disabled while executing atomic insts
1783    loadUnits.map(i => i.io.prefetch_req.valid := false.B)
1784
1785    // make sure there's no in-flight uops in load unit
1786    assert(!loadUnits(0).io.ldout.valid)
1787  }
1788
1789  lsq.io.flushSbuffer.empty := sbuffer.io.sbempty
1790
1791  for (i <- 0 until StaCnt) {
1792    when (state === s_atomics(i)) {
1793      io.mem_to_ooo.staIqFeedback(i).feedbackSlow := atomicsUnit.io.feedbackSlow
1794      assert(!storeUnits(i).io.feedback_slow.valid)
1795    }
1796  }
1797  for (i <- 0 until HyuCnt) {
1798    when (state === s_atomics(StaCnt + i)) {
1799      io.mem_to_ooo.hyuIqFeedback(i).feedbackSlow := atomicsUnit.io.feedbackSlow
1800      assert(!hybridUnits(i).io.feedback_slow.valid)
1801    }
1802  }
1803
1804  lsq.io.exceptionAddr.isStore := io.ooo_to_mem.isStoreException
1805  // Exception address is used several cycles after flush.
1806  // We delay it by 10 cycles to ensure its flush safety.
1807  val atomicsException = RegInit(false.B)
1808  when (DelayN(redirect.valid, 10) && atomicsException) {
1809    atomicsException := false.B
1810  }.elsewhen (atomicsUnit.io.exceptionInfo.valid) {
1811    atomicsException := true.B
1812  }
1813
1814  val misalignBufExceptionOverwrite = loadMisalignBuffer.io.overwriteExpBuf.valid || storeMisalignBuffer.io.overwriteExpBuf.valid
1815  val misalignBufExceptionVaddr = Mux(loadMisalignBuffer.io.overwriteExpBuf.valid,
1816    loadMisalignBuffer.io.overwriteExpBuf.vaddr,
1817    storeMisalignBuffer.io.overwriteExpBuf.vaddr
1818  )
1819  val misalignBufExceptionIsHyper = Mux(loadMisalignBuffer.io.overwriteExpBuf.valid,
1820    loadMisalignBuffer.io.overwriteExpBuf.isHyper,
1821    storeMisalignBuffer.io.overwriteExpBuf.isHyper
1822  )
1823  val misalignBufExceptionGpaddr = Mux(loadMisalignBuffer.io.overwriteExpBuf.valid,
1824    loadMisalignBuffer.io.overwriteExpBuf.gpaddr,
1825    storeMisalignBuffer.io.overwriteExpBuf.gpaddr
1826  )
1827  val misalignBufExceptionIsForVSnonLeafPTE = Mux(loadMisalignBuffer.io.overwriteExpBuf.valid,
1828    loadMisalignBuffer.io.overwriteExpBuf.isForVSnonLeafPTE,
1829    storeMisalignBuffer.io.overwriteExpBuf.isForVSnonLeafPTE
1830  )
1831
1832  val vSegmentException = RegInit(false.B)
1833  when (DelayN(redirect.valid, 10) && vSegmentException) {
1834    vSegmentException := false.B
1835  }.elsewhen (vSegmentUnit.io.exceptionInfo.valid) {
1836    vSegmentException := true.B
1837  }
1838  val atomicsExceptionAddress = RegEnable(atomicsUnit.io.exceptionInfo.bits.vaddr, atomicsUnit.io.exceptionInfo.valid)
1839  val vSegmentExceptionVstart = RegEnable(vSegmentUnit.io.exceptionInfo.bits.vstart, vSegmentUnit.io.exceptionInfo.valid)
1840  val vSegmentExceptionVl     = RegEnable(vSegmentUnit.io.exceptionInfo.bits.vl, vSegmentUnit.io.exceptionInfo.valid)
1841  val vSegmentExceptionAddress = RegEnable(vSegmentUnit.io.exceptionInfo.bits.vaddr, vSegmentUnit.io.exceptionInfo.valid)
1842  val atomicsExceptionGPAddress = RegEnable(atomicsUnit.io.exceptionInfo.bits.gpaddr, atomicsUnit.io.exceptionInfo.valid)
1843  val vSegmentExceptionGPAddress = RegEnable(vSegmentUnit.io.exceptionInfo.bits.gpaddr, vSegmentUnit.io.exceptionInfo.valid)
1844  val atomicsExceptionIsForVSnonLeafPTE = RegEnable(atomicsUnit.io.exceptionInfo.bits.isForVSnonLeafPTE, atomicsUnit.io.exceptionInfo.valid)
1845  val vSegmentExceptionIsForVSnonLeafPTE = RegEnable(vSegmentUnit.io.exceptionInfo.bits.isForVSnonLeafPTE, vSegmentUnit.io.exceptionInfo.valid)
1846
1847  val exceptionVaddr = Mux(
1848    atomicsException,
1849    atomicsExceptionAddress,
1850    Mux(misalignBufExceptionOverwrite,
1851      misalignBufExceptionVaddr,
1852      Mux(vSegmentException,
1853        vSegmentExceptionAddress,
1854        lsq.io.exceptionAddr.vaddr
1855      )
1856    )
1857  )
1858  // whether vaddr need ext or is hyper inst:
1859  // VaNeedExt: atomicsException -> false; misalignBufExceptionOverwrite -> true; vSegmentException -> false
1860  // IsHyper: atomicsException -> false; vSegmentException -> false
1861  val exceptionVaNeedExt = !atomicsException &&
1862    (misalignBufExceptionOverwrite ||
1863      (!vSegmentException && lsq.io.exceptionAddr.vaNeedExt))
1864  val exceptionIsHyper = !atomicsException &&
1865    (misalignBufExceptionOverwrite && misalignBufExceptionIsHyper ||
1866      (!vSegmentException && lsq.io.exceptionAddr.isHyper && !misalignBufExceptionOverwrite))
1867
1868  def GenExceptionVa(
1869    mode: UInt, isVirt: Bool, vaNeedExt: Bool,
1870    satp: TlbSatpBundle, vsatp: TlbSatpBundle, hgatp: TlbHgatpBundle,
1871    vaddr: UInt
1872  ) = {
1873    require(VAddrBits >= 50)
1874
1875    val satpNone = satp.mode === 0.U
1876    val satpSv39 = satp.mode === 8.U
1877    val satpSv48 = satp.mode === 9.U
1878
1879    val vsatpNone = vsatp.mode === 0.U
1880    val vsatpSv39 = vsatp.mode === 8.U
1881    val vsatpSv48 = vsatp.mode === 9.U
1882
1883    val hgatpNone = hgatp.mode === 0.U
1884    val hgatpSv39x4 = hgatp.mode === 8.U
1885    val hgatpSv48x4 = hgatp.mode === 9.U
1886
1887    // For !isVirt, mode check is necessary, as we don't want virtual memory in M-mode.
1888    // For isVirt, mode check is unnecessary, as virt won't be 1 in M-mode.
1889    // Also, isVirt includes Hyper Insts, which don't care mode either.
1890
1891    val useBareAddr =
1892      (isVirt && vsatpNone && hgatpNone) ||
1893      (!isVirt && (mode === CSRConst.ModeM)) ||
1894      (!isVirt && (mode =/= CSRConst.ModeM) && satpNone)
1895    val useSv39Addr =
1896      (isVirt && vsatpSv39) ||
1897      (!isVirt && (mode =/= CSRConst.ModeM) && satpSv39)
1898    val useSv48Addr =
1899      (isVirt && vsatpSv48) ||
1900      (!isVirt && (mode =/= CSRConst.ModeM) && satpSv48)
1901    val useSv39x4Addr = isVirt && vsatpNone && hgatpSv39x4
1902    val useSv48x4Addr = isVirt && vsatpNone && hgatpSv48x4
1903
1904    val bareAddr   = ZeroExt(vaddr(PAddrBits - 1, 0), XLEN)
1905    val sv39Addr   = SignExt(vaddr.take(39), XLEN)
1906    val sv39x4Addr = ZeroExt(vaddr.take(39 + 2), XLEN)
1907    val sv48Addr   = SignExt(vaddr.take(48), XLEN)
1908    val sv48x4Addr = ZeroExt(vaddr.take(48 + 2), XLEN)
1909
1910    val ExceptionVa = Wire(UInt(XLEN.W))
1911    when (vaNeedExt) {
1912      ExceptionVa := Mux1H(Seq(
1913        (useBareAddr)   -> bareAddr,
1914        (useSv39Addr)   -> sv39Addr,
1915        (useSv48Addr)   -> sv48Addr,
1916        (useSv39x4Addr) -> sv39x4Addr,
1917        (useSv48x4Addr) -> sv48x4Addr,
1918      ))
1919    } .otherwise {
1920      ExceptionVa := vaddr
1921    }
1922
1923    ExceptionVa
1924  }
1925
1926  io.mem_to_ooo.lsqio.vaddr := RegNext(
1927    GenExceptionVa(tlbcsr.priv.dmode, tlbcsr.priv.virt || exceptionIsHyper, exceptionVaNeedExt,
1928    tlbcsr.satp, tlbcsr.vsatp, tlbcsr.hgatp, exceptionVaddr)
1929  )
1930
1931  // vsegment instruction is executed atomic, which mean atomicsException and vSegmentException should not raise at the same time.
1932  XSError(atomicsException && vSegmentException, "atomicsException and vSegmentException raise at the same time!")
1933  io.mem_to_ooo.lsqio.vstart := RegNext(Mux(vSegmentException,
1934                                            vSegmentExceptionVstart,
1935                                            lsq.io.exceptionAddr.vstart)
1936  )
1937  io.mem_to_ooo.lsqio.vl     := RegNext(Mux(vSegmentException,
1938                                            vSegmentExceptionVl,
1939                                            lsq.io.exceptionAddr.vl)
1940  )
1941
1942  XSError(atomicsException && atomicsUnit.io.in.valid, "new instruction before exception triggers\n")
1943  io.mem_to_ooo.lsqio.gpaddr := RegNext(Mux(
1944    atomicsException,
1945    atomicsExceptionGPAddress,
1946    Mux(misalignBufExceptionOverwrite,
1947      misalignBufExceptionGpaddr,
1948      Mux(vSegmentException,
1949        vSegmentExceptionGPAddress,
1950        lsq.io.exceptionAddr.gpaddr
1951      )
1952    )
1953  ))
1954  io.mem_to_ooo.lsqio.isForVSnonLeafPTE := RegNext(Mux(
1955    atomicsException,
1956    atomicsExceptionIsForVSnonLeafPTE,
1957    Mux(misalignBufExceptionOverwrite,
1958      misalignBufExceptionIsForVSnonLeafPTE,
1959      Mux(vSegmentException,
1960        vSegmentExceptionIsForVSnonLeafPTE,
1961        lsq.io.exceptionAddr.isForVSnonLeafPTE
1962      )
1963    )
1964  ))
1965  io.mem_to_ooo.topToBackendBypass match { case x =>
1966    x.hartId            := io.hartId
1967    x.l2FlushDone       := RegNext(io.l2_flush_done)
1968    x.externalInterrupt.msip  := outer.clint_int_sink.in.head._1(0)
1969    x.externalInterrupt.mtip  := outer.clint_int_sink.in.head._1(1)
1970    x.externalInterrupt.meip  := outer.plic_int_sink.in.head._1(0)
1971    x.externalInterrupt.seip  := outer.plic_int_sink.in.last._1(0)
1972    x.externalInterrupt.debug := outer.debug_int_sink.in.head._1(0)
1973    x.externalInterrupt.nmi.nmi_31 := outer.nmi_int_sink.in.head._1(0) | outer.beu_local_int_sink.in.head._1(0)
1974    x.externalInterrupt.nmi.nmi_43 := outer.nmi_int_sink.in.head._1(1)
1975    x.msiInfo           := DelayNWithValid(io.fromTopToBackend.msiInfo, 1)
1976    x.clintTime         := DelayNWithValid(io.fromTopToBackend.clintTime, 1)
1977  }
1978
1979  io.memInfo.sqFull := RegNext(lsq.io.sqFull)
1980  io.memInfo.lqFull := RegNext(lsq.io.lqFull)
1981  io.memInfo.dcacheMSHRFull := RegNext(dcache.io.mshrFull)
1982
1983  io.inner_hartId := io.hartId
1984  io.inner_reset_vector := RegNext(io.outer_reset_vector)
1985  io.outer_cpu_halt := io.ooo_to_mem.backendToTopBypass.cpuHalted
1986  io.outer_l2_flush_en := io.ooo_to_mem.csrCtrl.flush_l2_enable
1987  io.outer_power_down_en := io.ooo_to_mem.csrCtrl.power_down_enable
1988  io.outer_cpu_critical_error := io.ooo_to_mem.backendToTopBypass.cpuCriticalError
1989  io.outer_msi_ack := io.ooo_to_mem.backendToTopBypass.msiAck
1990  io.outer_beu_errors_icache := RegNext(io.inner_beu_errors_icache)
1991  io.inner_hc_perfEvents <> RegNext(io.outer_hc_perfEvents)
1992
1993  // vector segmentUnit
1994  vSegmentUnit.io.in.bits <> io.ooo_to_mem.issueVldu.head.bits
1995  vSegmentUnit.io.in.valid := isSegment && io.ooo_to_mem.issueVldu.head.valid// is segment instruction
1996  vSegmentUnit.io.dtlb.resp.bits <> dtlb_reqs.take(LduCnt).head.resp.bits
1997  vSegmentUnit.io.dtlb.resp.valid <> dtlb_reqs.take(LduCnt).head.resp.valid
1998  vSegmentUnit.io.pmpResp <> pmp_check.head.resp
1999  vSegmentUnit.io.flush_sbuffer.empty := stIsEmpty
2000  vSegmentUnit.io.redirect <> redirect
2001  vSegmentUnit.io.rdcache.resp.bits := dcache.io.lsu.load(0).resp.bits
2002  vSegmentUnit.io.rdcache.resp.valid := dcache.io.lsu.load(0).resp.valid
2003  vSegmentUnit.io.rdcache.s2_bank_conflict := dcache.io.lsu.load(0).s2_bank_conflict
2004  // -------------------------
2005  // Vector Segment Triggers
2006  // -------------------------
2007  vSegmentUnit.io.fromCsrTrigger.tdataVec := tdata
2008  vSegmentUnit.io.fromCsrTrigger.tEnableVec := tEnable
2009  vSegmentUnit.io.fromCsrTrigger.triggerCanRaiseBpExp := triggerCanRaiseBpExp
2010  vSegmentUnit.io.fromCsrTrigger.debugMode := debugMode
2011
2012  // reset tree of MemBlock
2013  if (p(DebugOptionsKey).ResetGen) {
2014    val leftResetTree = ResetGenNode(
2015      Seq(
2016        ModuleNode(ptw),
2017        ModuleNode(ptw_to_l2_buffer),
2018        ModuleNode(lsq),
2019        ModuleNode(dtlb_st_tlb_st),
2020        ModuleNode(dtlb_prefetch_tlb_prefetch),
2021        ModuleNode(pmp)
2022      )
2023      ++ pmp_checkers.map(ModuleNode(_))
2024      ++ (if (prefetcherOpt.isDefined) Seq(ModuleNode(prefetcherOpt.get)) else Nil)
2025      ++ (if (l1PrefetcherOpt.isDefined) Seq(ModuleNode(l1PrefetcherOpt.get)) else Nil)
2026    )
2027    val rightResetTree = ResetGenNode(
2028      Seq(
2029        ModuleNode(sbuffer),
2030        ModuleNode(dtlb_ld_tlb_ld),
2031        ModuleNode(dcache),
2032        ModuleNode(l1d_to_l2_buffer),
2033        CellNode(io.reset_backend)
2034      )
2035    )
2036    ResetGen(leftResetTree, reset, sim = false, io.sramTestBypass.fromL2Top.mbistReset)
2037    ResetGen(rightResetTree, reset, sim = false, io.sramTestBypass.fromL2Top.mbistReset)
2038  } else {
2039    io.reset_backend := DontCare
2040  }
2041  io.resetInFrontendBypass.toL2Top := io.resetInFrontendBypass.fromFrontend
2042  // trace interface
2043  val traceToL2Top = io.traceCoreInterfaceBypass.toL2Top
2044  val traceFromBackend = io.traceCoreInterfaceBypass.fromBackend
2045  traceFromBackend.fromEncoder := RegNext(traceToL2Top.fromEncoder)
2046  traceToL2Top.toEncoder.trap  := RegEnable(
2047    traceFromBackend.toEncoder.trap,
2048    traceFromBackend.toEncoder.groups(0).valid && Itype.isTrap(traceFromBackend.toEncoder.groups(0).bits.itype)
2049  )
2050  traceToL2Top.toEncoder.priv := RegEnable(
2051    traceFromBackend.toEncoder.priv,
2052    traceFromBackend.toEncoder.groups(0).valid
2053  )
2054  (0 until TraceGroupNum).foreach { i =>
2055    traceToL2Top.toEncoder.groups(i).valid := RegNext(traceFromBackend.toEncoder.groups(i).valid)
2056    traceToL2Top.toEncoder.groups(i).bits.iretire := RegNext(traceFromBackend.toEncoder.groups(i).bits.iretire)
2057    traceToL2Top.toEncoder.groups(i).bits.itype := RegNext(traceFromBackend.toEncoder.groups(i).bits.itype)
2058    traceToL2Top.toEncoder.groups(i).bits.ilastsize := RegEnable(
2059      traceFromBackend.toEncoder.groups(i).bits.ilastsize,
2060      traceFromBackend.toEncoder.groups(i).valid
2061    )
2062    traceToL2Top.toEncoder.groups(i).bits.iaddr := RegEnable(
2063      traceFromBackend.toEncoder.groups(i).bits.iaddr,
2064      traceFromBackend.toEncoder.groups(i).valid
2065    ) + (RegEnable(
2066      traceFromBackend.toEncoder.groups(i).bits.ftqOffset.getOrElse(0.U),
2067      traceFromBackend.toEncoder.groups(i).valid
2068    ) << instOffsetBits)
2069  }
2070
2071
2072  io.mem_to_ooo.storeDebugInfo := DontCare
2073  // store event difftest information
2074  if (env.EnableDifftest) {
2075    (0 until EnsbufferWidth).foreach{i =>
2076        io.mem_to_ooo.storeDebugInfo(i).robidx := sbuffer.io.vecDifftestInfo(i).bits.robIdx
2077        sbuffer.io.vecDifftestInfo(i).bits.pc := io.mem_to_ooo.storeDebugInfo(i).pc
2078    }
2079  }
2080
2081  // top-down info
2082  dcache.io.debugTopDown.robHeadVaddr := io.debugTopDown.robHeadVaddr
2083  dtlbRepeater.io.debugTopDown.robHeadVaddr := io.debugTopDown.robHeadVaddr
2084  lsq.io.debugTopDown.robHeadVaddr := io.debugTopDown.robHeadVaddr
2085  io.debugTopDown.toCore.robHeadMissInDCache := dcache.io.debugTopDown.robHeadMissInDCache
2086  io.debugTopDown.toCore.robHeadTlbReplay := lsq.io.debugTopDown.robHeadTlbReplay
2087  io.debugTopDown.toCore.robHeadTlbMiss := lsq.io.debugTopDown.robHeadTlbMiss
2088  io.debugTopDown.toCore.robHeadLoadVio := lsq.io.debugTopDown.robHeadLoadVio
2089  io.debugTopDown.toCore.robHeadLoadMSHR := lsq.io.debugTopDown.robHeadLoadMSHR
2090  dcache.io.debugTopDown.robHeadOtherReplay := lsq.io.debugTopDown.robHeadOtherReplay
2091  dcache.io.debugRolling := io.debugRolling
2092
2093  lsq.io.noUopsIssued := io.topDownInfo.toBackend.noUopsIssued
2094  io.topDownInfo.toBackend.lqEmpty := lsq.io.lqEmpty
2095  io.topDownInfo.toBackend.sqEmpty := lsq.io.sqEmpty
2096  io.topDownInfo.toBackend.l1Miss := dcache.io.l1Miss
2097  io.topDownInfo.toBackend.l2TopMiss.l2Miss := RegNext(io.topDownInfo.fromL2Top.l2Miss)
2098  io.topDownInfo.toBackend.l2TopMiss.l3Miss := RegNext(io.topDownInfo.fromL2Top.l3Miss)
2099
2100  val hyLdDeqCount = PopCount(io.ooo_to_mem.issueHya.map(x => x.valid && FuType.isLoad(x.bits.uop.fuType)))
2101  val hyStDeqCount = PopCount(io.ooo_to_mem.issueHya.map(x => x.valid && FuType.isStore(x.bits.uop.fuType)))
2102  val ldDeqCount = PopCount(io.ooo_to_mem.issueLda.map(_.valid)) +& hyLdDeqCount
2103  val stDeqCount = PopCount(io.ooo_to_mem.issueSta.take(StaCnt).map(_.valid)) +& hyStDeqCount
2104  val iqDeqCount = ldDeqCount +& stDeqCount
2105  XSPerfAccumulate("load_iq_deq_count", ldDeqCount)
2106  XSPerfHistogram("load_iq_deq_count", ldDeqCount, true.B, 0, LdExuCnt + 1)
2107  XSPerfAccumulate("store_iq_deq_count", stDeqCount)
2108  XSPerfHistogram("store_iq_deq_count", stDeqCount, true.B, 0, StAddrCnt + 1)
2109  XSPerfAccumulate("ls_iq_deq_count", iqDeqCount)
2110
2111  val pfevent = Module(new PFEvent)
2112  pfevent.io.distribute_csr := csrCtrl.distribute_csr
2113  val csrevents = pfevent.io.hpmevent.slice(16,24)
2114
2115  val perfFromUnits = (loadUnits ++ Seq(sbuffer, lsq, dcache)).flatMap(_.getPerfEvents)
2116  val perfFromPTW = perfEventsPTW.map(x => ("PTW_" + x._1, x._2))
2117  val perfBlock     = Seq(("ldDeqCount", ldDeqCount),
2118                          ("stDeqCount", stDeqCount))
2119  // let index = 0 be no event
2120  val allPerfEvents = Seq(("noEvent", 0.U)) ++ perfFromUnits ++ perfFromPTW ++ perfBlock
2121
2122  if (printEventCoding) {
2123    for (((name, inc), i) <- allPerfEvents.zipWithIndex) {
2124      println("MemBlock perfEvents Set", name, inc, i)
2125    }
2126  }
2127
2128  val allPerfInc = allPerfEvents.map(_._2.asTypeOf(new PerfEvent))
2129  val perfEvents = HPerfMonitor(csrevents, allPerfInc).getPerfEvents
2130  generatePerfEvent()
2131
2132  private val mbistPl = MbistPipeline.PlaceMbistPipeline(Int.MaxValue, "MbistPipeMemBlk", hasMbist)
2133  private val mbistIntf = if(hasMbist) {
2134    val params = mbistPl.get.nodeParams
2135    val intf = Some(Module(new MbistInterface(
2136      params = Seq(params),
2137      ids = Seq(mbistPl.get.childrenIds),
2138      name = s"MbistIntfMemBlk",
2139      pipelineNum = 1
2140    )))
2141    intf.get.toPipeline.head <> mbistPl.get.mbist
2142    mbistPl.get.registerCSV(intf.get.info, "MbistMemBlk")
2143    intf.get.mbist := DontCare
2144    dontTouch(intf.get.mbist)
2145    //TODO: add mbist controller connections here
2146    intf
2147  } else {
2148    None
2149  }
2150   private val sigFromSrams = if (hasSramTest) Some(SramHelper.genBroadCastBundleTop()) else None
2151  private val cg = ClockGate.genTeSrc
2152  dontTouch(cg)
2153
2154  sigFromSrams.foreach({ case sig => sig.mbist := DontCare })
2155  if (hasMbist) {
2156    sigFromSrams.get.mbist := io.sramTestBypass.fromL2Top.mbist.get
2157    io.sramTestBypass.toFrontend.mbist.get := io.sramTestBypass.fromL2Top.mbist.get
2158    io.sramTestBypass.toFrontend.mbistReset.get := io.sramTestBypass.fromL2Top.mbistReset.get
2159    io.sramTestBypass.toBackend.mbist.get := io.sramTestBypass.fromL2Top.mbist.get
2160    io.sramTestBypass.toBackend.mbistReset.get := io.sramTestBypass.fromL2Top.mbistReset.get
2161    cg.cgen := io.sramTestBypass.fromL2Top.mbist.get.cgen
2162  } else {
2163    cg.cgen := false.B
2164  }
2165
2166  // sram debug
2167  val sramCtl = Option.when(hasSramCtl)(RegNext(io.sramTestBypass.fromL2Top.sramCtl.get))
2168  sigFromSrams.foreach({ case sig => sig.sramCtl := DontCare })
2169  sigFromSrams.zip(sramCtl).foreach {
2170    case (sig, ctl) =>
2171      sig.sramCtl.RTSEL := ctl(1, 0) // CFG[1 : 0]
2172      sig.sramCtl.WTSEL := ctl(3, 2) // CFG[3 : 2]
2173      sig.sramCtl.MCR   := ctl(5, 4) // CFG[5 : 4]
2174      sig.sramCtl.MCW   := ctl(7, 6) // CFG[7 : 6]
2175  }
2176  if (hasSramCtl) {
2177    io.sramTestBypass.toFrontend.sramCtl.get := sramCtl.get
2178  }
2179}
2180
2181class MemBlock()(implicit p: Parameters) extends LazyModule
2182  with HasXSParameter {
2183  override def shouldBeInlined: Boolean = false
2184
2185  val inner = LazyModule(new MemBlockInlined())
2186
2187  lazy val module = new MemBlockImp(this)
2188}
2189
2190class MemBlockImp(wrapper: MemBlock) extends LazyModuleImp(wrapper) {
2191  val io = IO(wrapper.inner.module.io.cloneType)
2192  val io_perf = IO(wrapper.inner.module.io_perf.cloneType)
2193  io <> wrapper.inner.module.io
2194  io_perf <> wrapper.inner.module.io_perf
2195
2196  if (p(DebugOptionsKey).ResetGen) {
2197    ResetGen(
2198      ResetGenNode(Seq(ModuleNode(wrapper.inner.module))),
2199      reset, sim = false, io.sramTestBypass.fromL2Top.mbistReset
2200    )
2201  }
2202}
2203