xref: /XiangShan/src/main/scala/xiangshan/backend/Backend.scala (revision 45f43e6e5f88874a7573ff096d1e5c2855bd16c7)
1/***************************************************************************************
2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3* Copyright (c) 2020-2021 Peng Cheng Laboratory
4*
5* XiangShan is licensed under Mulan PSL v2.
6* You can use this software according to the terms and conditions of the Mulan PSL v2.
7* You may obtain a copy of Mulan PSL v2 at:
8*          http://license.coscl.org.cn/MulanPSL2
9*
10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13*
14* See the Mulan PSL v2 for more details.
15***************************************************************************************/
16
17package xiangshan.backend
18
19import org.chipsalliance.cde.config.Parameters
20import chisel3._
21import chisel3.util._
22import freechips.rocketchip.diplomacy.{LazyModule, LazyModuleImp}
23import utils._
24import utility._
25import xiangshan._
26import xiangshan.backend.exu._
27import xiangshan.backend.fu._
28import xiangshan.backend.rob._
29import xiangshan.backend.dispatch._
30import xiangshan.mem._
31
32class FakeMemBlockWbSource()(implicit p: Parameters) extends LazyModule
33  with HasXSParameter with HasWritebackSource {
34  lazy val module = new FakeMemBlockWbSourceImp(this)
35
36  override val writebackSourceParams: Seq[WritebackSourceParams] = {
37    val params = new WritebackSourceParams
38    params.exuConfigs = (loadExuConfigs ++ storeExuConfigs).map(cfg => Seq(cfg))
39    Seq(params)
40  }
41  override lazy val writebackSourceImp: HasWritebackSourceImp = module
42}
43
44class FakeMemBlockWbSourceImp(outer: FakeMemBlockWbSource) extends LazyModuleImp(outer)
45  with HasXSParameter
46  with HasWritebackSourceImp
47{
48  val io = IO(new Bundle() {
49    val in = Vec(exuParameters.LsExuCnt + exuParameters.StuCnt, Flipped(DecoupledIO(new ExuOutput)))
50    val out = Vec(exuParameters.LsExuCnt + exuParameters.StuCnt, DecoupledIO(new ExuOutput))
51  })
52  override def writebackSource1: Option[Seq[Seq[DecoupledIO[ExuOutput]]]] = Some(Seq(io.out))
53  io.out <> io.in
54}
55
56// Merge CtrlBlock, exuBlocks, wbArbiter, wb2Ctrl, etc into 1 module
57class Backend(memWbSource: HasWritebackSource)(implicit p: Parameters) extends LazyModule
58  with HasXSParameter
59  with HasExuWbHelper
60{
61  val wbArbiter = LazyModule(new WbArbiterWrapper(exuConfigs, NRIntWritePorts, NRFpWritePorts))
62  val intWbPorts = wbArbiter.intWbPorts
63  val fpWbPorts = wbArbiter.fpWbPorts
64
65  // TODO: better RS organization
66  // generate rs according to number of function units
67  require(exuParameters.JmpCnt == 1)
68  require(exuParameters.MduCnt <= exuParameters.AluCnt && exuParameters.MduCnt > 0)
69  require(exuParameters.FmiscCnt <= exuParameters.FmacCnt && exuParameters.FmiscCnt > 0)
70  require(exuParameters.LduCnt == exuParameters.StuCnt) // TODO: remove this limitation
71
72  // one RS every 2 MDUs
73  val schedulePorts = Seq(
74    // exuCfg, numDeq, intFastWakeupTarget, fpFastWakeupTarget
75    Seq(
76      (AluExeUnitCfg, exuParameters.AluCnt, Seq(AluExeUnitCfg, LdExeUnitCfg, StaExeUnitCfg), Seq()),
77      (MulDivExeUnitCfg, exuParameters.MduCnt, Seq(AluExeUnitCfg, MulDivExeUnitCfg), Seq()),
78      (JumpCSRExeUnitCfg, 1, Seq(), Seq()),
79      (LdExeUnitCfg, exuParameters.LduCnt, Seq(AluExeUnitCfg, LdExeUnitCfg), Seq()),
80      (StaExeUnitCfg, exuParameters.StuCnt, Seq(), Seq()),
81      (StdExeUnitCfg, exuParameters.StuCnt, Seq(), Seq())
82    ),
83    Seq(
84      (FmacExeUnitCfg, exuParameters.FmacCnt, Seq(), Seq(FmacExeUnitCfg, FmiscExeUnitCfg)),
85      (FmiscExeUnitCfg, exuParameters.FmiscCnt, Seq(), Seq())
86    )
87  )
88
89  // should do outer fast wakeup ports here
90  val otherFastPorts = schedulePorts.zipWithIndex.map { case (sche, i) =>
91    val otherCfg = schedulePorts.zipWithIndex.filter(_._2 != i).map(_._1).reduce(_ ++ _)
92    val outerPorts = sche.map(cfg => {
93      // exe units from this scheduler need fastUops from exeunits
94      val outerWakeupInSche = sche.filter(_._1.wakeupFromExu)
95      val intraIntScheOuter = outerWakeupInSche.filter(_._3.contains(cfg._1)).map(_._1)
96      val intraFpScheOuter = outerWakeupInSche.filter(_._4.contains(cfg._1)).map(_._1)
97      // exe units from other schedulers need fastUop from outside
98      val otherIntSource = otherCfg.filter(_._3.contains(cfg._1)).map(_._1)
99      val otherFpSource = otherCfg.filter(_._4.contains(cfg._1)).map(_._1)
100      val intSource = findInWbPorts(intWbPorts, intraIntScheOuter ++ otherIntSource)
101      val fpSource = findInWbPorts(fpWbPorts, intraFpScheOuter ++ otherFpSource)
102      getFastWakeupIndex(cfg._1, intSource, fpSource, intWbPorts.length).sorted
103    })
104    println(s"inter-scheduler wakeup sources for $i: $outerPorts")
105    outerPorts
106  }
107
108  // allow mdu and fmisc to have 2*numDeq enqueue ports
109  val intDpPorts = (0 until exuParameters.AluCnt).map(i => {
110    if (i < exuParameters.JmpCnt) Seq((0, i), (1, i), (2, i))
111    else if (i < 2 * exuParameters.MduCnt) Seq((0, i), (1, i))
112    else Seq((0, i))
113  })
114  val lsDpPorts = (0 until exuParameters.LduCnt).map(i => Seq((3, i))) ++
115                  (0 until exuParameters.StuCnt).map(i => Seq((4, i))) ++
116                  (0 until exuParameters.StuCnt).map(i => Seq((5, i)))
117  val fpDpPorts = (0 until exuParameters.FmacCnt).map(i => {
118    if (i < 2 * exuParameters.FmiscCnt) Seq((0, i), (1, i))
119    else Seq((0, i))
120  })
121
122  val dispatchPorts = Seq(intDpPorts ++ lsDpPorts, fpDpPorts)
123
124  val outIntRfReadPorts = Seq(0, 0)
125  val outFpRfReadPorts = Seq(0, StorePipelineWidth)
126  val hasIntRf = Seq(true, false)
127  val hasFpRf = Seq(false, true)
128
129  val exuBlocks = schedulePorts.zip(dispatchPorts).zip(otherFastPorts).zipWithIndex.map {
130    case (((sche, disp), other), i) =>
131      LazyModule(new ExuBlock(sche, disp, intWbPorts, fpWbPorts, other, outIntRfReadPorts(i), outFpRfReadPorts(i), hasIntRf(i), hasFpRf(i)))
132  }
133
134  val fakeMemBlockWbSource = LazyModule(new FakeMemBlockWbSource())
135
136  val wb2Ctrl = LazyModule(new Wb2Ctrl(exuConfigs))
137  wb2Ctrl.addWritebackSink(exuBlocks :+ fakeMemBlockWbSource)
138  val dpExuConfigs = exuBlocks.flatMap(_.scheduler.dispatch2.map(_.configs))
139  val ctrlBlock = LazyModule(new CtrlBlock(dpExuConfigs))
140  val writebackSources = Seq(Seq(wb2Ctrl), Seq(wbArbiter))
141  writebackSources.foreach(s => ctrlBlock.addWritebackSink(s))
142
143  lazy val module = new BackendImp(this)
144}
145
146class BackendImp(outer: Backend)(implicit p: Parameters) extends LazyModuleImp(outer)
147  with HasXSParameter
148{
149  val io = IO(new Bundle() {
150    val hartId = Input(UInt(64.W))
151    val cpu_halt = Output(Bool())
152
153    val memBlock = new Bundle() { // TODO: use class
154      val redirect = ValidIO(new Redirect)
155      val issue = Vec(exuParameters.LsExuCnt + exuParameters.StuCnt, DecoupledIO(new ExuInput))
156      val loadFastMatch = Vec(exuParameters.LduCnt, Output(UInt(exuParameters.LduCnt.W)))
157      val loadFastFuOpType = Vec(exuParameters.LduCnt, Output(FuOpType()))
158      val loadFastImm = Vec(exuParameters.LduCnt, Output(UInt(12.W)))
159      val rsfeedback = Vec(exuParameters.LsExuCnt, Flipped(new MemRSFeedbackIO()(p.alter((site, here, up) => {
160        case XSCoreParamsKey => up(XSCoreParamsKey).copy(
161          IssQueSize = IssQueSize * (if (Enable3Load3Store) 3 else 2)
162        )
163      }))))
164      val loadPc = Vec(exuParameters.LduCnt, Output(UInt(VAddrBits.W)))
165      val storePc = Vec(exuParameters.StuCnt, Output(UInt(VAddrBits.W)))
166      val stIssuePtr = Input(new SqPtr())
167      val writeback = Vec(exuParameters.LsExuCnt + exuParameters.StuCnt, Flipped(DecoupledIO(new ExuOutput)))
168      val s3_delayed_load_error = Vec(exuParameters.LduCnt, Input(Bool()))
169      val otherFastWakeup = Vec(exuParameters.LduCnt + 2 * exuParameters.StuCnt, Flipped(ValidIO(new MicroOp)))
170      val stIn = Vec(exuParameters.StuCnt, Flipped(ValidIO(new ExuInput)))
171      val memoryViolation = Flipped(ValidIO(new Redirect))
172      val sfence = Output(new SfenceBundle)
173      val tlbCsr = Output(new TlbCsrBundle)
174      val fenceToSbuffer = new FenceToSbuffer
175      val enqLsq = Flipped(new LsqEnqIO)
176      val lsqio = new Bundle {
177        val exceptionAddr = Flipped(new ExceptionAddrIO) // to csr
178        val rob = new RobLsqIO // rob to lsq
179        val lqCanAccept = Input(Bool())
180        val sqCanAccept = Input(Bool())
181      }
182      val csrCtrl = new CustomCSRCtrlIO
183      val lqCancelCnt = Input(UInt(log2Up(VirtualLoadQueueSize + 1).W))
184      val sqCancelCnt = Input(UInt(log2Up(StoreQueueSize + 1).W))
185      val scommit = Input(UInt(log2Ceil(EnsbufferWidth + 1).W))
186      val lcommit = Input(UInt(log2Up(CommitWidth + 1).W))
187      val debug_ls = Flipped(new DebugLSIO)
188      val lsTopdownInfo = Vec(exuParameters.LduCnt, Input(new LsTopdownInfo))
189    }
190
191    val frontend = new Bundle() { // TODO: use class
192      val frontend2Ctrl = Flipped(new FrontendToCtrlIO)
193      val sfence = Output(new SfenceBundle)
194      val tlbCsr = Output(new TlbCsrBundle)
195      val csrCtrl = Output(new CustomCSRCtrlIO)
196      val fencei = Output(Bool())
197    }
198
199    // CSR related
200    val perf = Input(new PerfCounterIO)
201    val externalInterrupt = new ExternalInterruptIO
202    val distributedUpdate = Vec(2, Flipped(new DistributedCSRUpdateReq))
203
204    val l2_pf_enable = Output(Bool())
205
206    val debugTopDown = new Bundle {
207      val fromRob = new RobCoreTopDownIO
208      val fromCore = new CoreDispatchTopDownIO
209    }
210    val debugRolling = new RobDebugRollingIO
211  })
212
213  private val ctrlBlock = outer.ctrlBlock.module
214  private val wb2Ctrl = outer.wb2Ctrl.module
215  private val exuBlocks = outer.exuBlocks.map(_.module)
216  private val wbArbiter = outer.wbArbiter.module
217
218  val mem = io.memBlock
219  val frontend = io.frontend
220
221  outer.fakeMemBlockWbSource.module.io.in <> mem.writeback
222
223  ctrlBlock.io.hartId := io.hartId
224  exuBlocks.foreach(_.io.hartId := io.hartId)
225  wbArbiter.io.hartId := io.hartId
226
227  io.cpu_halt := ctrlBlock.io.cpu_halt
228
229  wbArbiter.io.redirect <> ctrlBlock.io.redirect
230
231  val allWriteback = exuBlocks.flatMap(_.io.fuWriteback) ++ outer.fakeMemBlockWbSource.module.io.out
232  require(exuConfigs.length == allWriteback.length, s"${exuConfigs.length} != ${allWriteback.length}")
233  wbArbiter.io.in <> allWriteback
234  val rfWriteback = wbArbiter.io.out
235
236  // memblock error exception writeback, 1 cycle after normal writeback
237  wb2Ctrl.io.s3_delayed_load_error <> mem.s3_delayed_load_error
238
239  wb2Ctrl.io.redirect <> ctrlBlock.io.redirect
240  outer.wb2Ctrl.generateWritebackIO()
241
242  require(exuBlocks.count(_.fuConfigs.map(_._1).contains(JumpCSRExeUnitCfg)) == 1)
243  val csrFenceMod = exuBlocks.filter(_.fuConfigs.map(_._1).contains(JumpCSRExeUnitCfg)).head
244  val csrioIn = csrFenceMod.io.fuExtra.csrio.get
245  val fenceio = csrFenceMod.io.fuExtra.fenceio.get
246
247  ctrlBlock.io.frontend <> frontend.frontend2Ctrl
248  frontend.sfence <> fenceio.sfence
249  frontend.tlbCsr <> csrioIn.tlb
250  frontend.csrCtrl <> csrioIn.customCtrl
251  frontend.fencei := fenceio.fencei
252
253  ctrlBlock.io.csrCtrl <> csrioIn.customCtrl
254  val redirectBlocks = exuBlocks.reverse.filter(_.fuConfigs.map(_._1).map(_.hasRedirect).reduce(_ || _))
255  ctrlBlock.io.exuRedirect <> redirectBlocks.flatMap(_.io.fuExtra.exuRedirect)
256  ctrlBlock.io.stIn <> mem.stIn
257  ctrlBlock.io.memoryViolation <> mem.memoryViolation
258  exuBlocks.head.io.scheExtra.enqLsq.get <> mem.enqLsq
259  exuBlocks.foreach(b => {
260    b.io.scheExtra.lcommit := mem.lcommit
261    b.io.scheExtra.scommit := mem.scommit
262    b.io.scheExtra.lqCancelCnt := mem.lqCancelCnt
263    b.io.scheExtra.sqCancelCnt := mem.sqCancelCnt
264  })
265  val sourceModules = outer.writebackSources.map(_.map(_.module.asInstanceOf[HasWritebackSourceImp]))
266  outer.ctrlBlock.generateWritebackIO()
267
268  val allFastUop = exuBlocks.flatMap(b => b.io.fastUopOut.dropRight(b.numOutFu)) ++ mem.otherFastWakeup
269  require(allFastUop.length == exuConfigs.length, s"${allFastUop.length} != ${exuConfigs.length}")
270  val intFastUop = allFastUop.zip(exuConfigs).filter(_._2.writeIntRf).map(_._1)
271  val fpFastUop = allFastUop.zip(exuConfigs).filter(_._2.writeFpRf).map(_._1)
272  val intFastUop1 = outer.wbArbiter.intConnections.map(c => intFastUop(c.head))
273  val fpFastUop1 = outer.wbArbiter.fpConnections.map(c => fpFastUop(c.head))
274  val allFastUop1 = intFastUop1 ++ fpFastUop1
275
276  ctrlBlock.io.dispatch <> exuBlocks.flatMap(_.io.in)
277  ctrlBlock.io.rsReady := exuBlocks.flatMap(_.io.scheExtra.rsReady)
278  ctrlBlock.io.enqLsq <> mem.enqLsq
279  ctrlBlock.io.lqDeq := mem.lcommit
280  ctrlBlock.io.sqDeq := mem.scommit
281  ctrlBlock.io.lqCanAccept := mem.lsqio.lqCanAccept
282  ctrlBlock.io.sqCanAccept := mem.lsqio.sqCanAccept
283  ctrlBlock.io.lqCancelCnt := mem.lqCancelCnt
284  ctrlBlock.io.sqCancelCnt := mem.sqCancelCnt
285  ctrlBlock.io.robHeadLsIssue := exuBlocks.map(_.io.scheExtra.robHeadLsIssue).reduce(_ || _)
286
287  exuBlocks(0).io.scheExtra.fpRfReadIn.get <> exuBlocks(1).io.scheExtra.fpRfReadOut.get
288  exuBlocks(0).io.scheExtra.fpStateReadIn.get <> exuBlocks(1).io.scheExtra.fpStateReadOut.get
289
290  for((c, e) <- ctrlBlock.io.ld_pc_read.zip(exuBlocks(0).io.issue.get)){
291    // read load pc at load s0
292    c.ptr := e.bits.uop.cf.ftqPtr
293    c.offset := e.bits.uop.cf.ftqOffset
294  }
295  // return load pc at load s2
296  mem.loadPc <> VecInit(ctrlBlock.io.ld_pc_read.map(_.data))
297
298  for((c, e) <- ctrlBlock.io.st_pc_read.zip(exuBlocks(0).io.issue.get.drop(exuParameters.LduCnt))){
299    // read store pc at store s0
300    c.ptr := e.bits.uop.cf.ftqPtr
301    c.offset := e.bits.uop.cf.ftqOffset
302  }
303  // return store pc at store s2
304  mem.storePc <> VecInit(ctrlBlock.io.st_pc_read.map(_.data))
305
306  mem.issue <> exuBlocks(0).io.issue.get
307  // By default, instructions do not have exceptions when they enter the function units.
308  mem.issue.map(_.bits.uop.clearExceptions())
309  exuBlocks(0).io.scheExtra.loadFastMatch.get <> mem.loadFastMatch
310  exuBlocks(0).io.scheExtra.loadFastFuOpType.get <> mem.loadFastFuOpType
311  exuBlocks(0).io.scheExtra.loadFastImm.get <> mem.loadFastImm
312
313  val stdIssue = exuBlocks(0).io.issue.get.takeRight(exuParameters.StuCnt)
314  exuBlocks.map(_.io).foreach { exu =>
315    exu.redirect <> ctrlBlock.io.redirect
316    exu.allocPregs <> ctrlBlock.io.allocPregs
317    exu.rfWriteback <> rfWriteback
318    exu.fastUopIn <> allFastUop1
319    exu.scheExtra.jumpPc <> ctrlBlock.io.jumpPc
320    exu.scheExtra.jalr_target <> ctrlBlock.io.jalr_target
321    exu.scheExtra.stIssuePtr <> mem.stIssuePtr
322    exu.scheExtra.debug_fp_rat <> ctrlBlock.io.debug_fp_rat
323    exu.scheExtra.debug_int_rat <> ctrlBlock.io.debug_int_rat
324    exu.scheExtra.robDeqPtr := ctrlBlock.io.robDeqPtr
325    exu.scheExtra.memWaitUpdateReq.staIssue.zip(mem.stIn).foreach{case (sink, src) => {
326      sink.bits := src.bits
327      sink.valid := src.valid
328    }}
329    exu.scheExtra.memWaitUpdateReq.stdIssue.zip(stdIssue).foreach{case (sink, src) => {
330      sink.valid := src.valid
331      sink.bits := src.bits
332    }}
333  }
334
335  XSPerfHistogram("fastIn_count", PopCount(allFastUop1.map(_.valid)), true.B, 0, allFastUop1.length, 1)
336  XSPerfHistogram("wakeup_count", PopCount(rfWriteback.map(_.valid)), true.B, 0, rfWriteback.length, 1)
337
338  ctrlBlock.perfinfo.perfEventsEu0 := exuBlocks(0).getPerf.dropRight(outer.exuBlocks(0).scheduler.numRs)
339  ctrlBlock.perfinfo.perfEventsEu1 := exuBlocks(1).getPerf.dropRight(outer.exuBlocks(1).scheduler.numRs)
340
341  if (Enable3Load3Store) {
342    ctrlBlock.perfinfo.perfEventsRs  := DontCare // outer.exuBlocks.flatMap(b => b.module.getPerf.takeRight(b.scheduler.numRs))
343  } else {
344    ctrlBlock.perfinfo.perfEventsRs  := outer.exuBlocks.flatMap(b => b.module.getPerf.takeRight(b.scheduler.numRs))
345  }
346
347  csrioIn.hartId <> io.hartId
348
349  val perf = WireInit(io.perf) // other perf events are assigned outside the backend
350  perf.retiredInstr <> ctrlBlock.io.robio.toCSR.perfinfo.retiredInstr
351  perf.ctrlInfo <> ctrlBlock.io.perfInfo.ctrlInfo
352  perf.perfEventsCtrl <> ctrlBlock.getPerf
353  csrioIn.perf <> perf
354
355  csrioIn.fpu.fflags <> ctrlBlock.io.robio.toCSR.fflags
356  csrioIn.fpu.isIllegal := false.B
357  csrioIn.fpu.dirty_fs <> ctrlBlock.io.robio.toCSR.dirty_fs
358  csrioIn.fpu.frm <> exuBlocks(1).io.fuExtra.frm.get
359  csrioIn.exception <> ctrlBlock.io.robio.exception
360  csrioIn.isXRet <> ctrlBlock.io.robio.toCSR.isXRet
361  csrioIn.trapTarget <> ctrlBlock.io.robio.toCSR.trapTarget
362  csrioIn.interrupt <> ctrlBlock.io.robio.toCSR.intrBitSet
363  csrioIn.wfi_event <> ctrlBlock.io.robio.toCSR.wfiEvent
364  csrioIn.memExceptionVAddr <> mem.lsqio.exceptionAddr.vaddr
365
366  csrioIn.externalInterrupt := io.externalInterrupt
367
368  csrioIn.distributedUpdate := io.distributedUpdate
369
370  mem.sfence <> fenceio.sfence
371  mem.fenceToSbuffer <> fenceio.sbuffer
372
373  mem.redirect <> ctrlBlock.io.redirect
374  mem.rsfeedback <> exuBlocks(0).io.scheExtra.feedback.get
375  mem.csrCtrl <> csrioIn.customCtrl
376  mem.tlbCsr <> csrioIn.tlb
377  mem.lsqio.rob <> ctrlBlock.io.robio.lsq
378  mem.lsqio.exceptionAddr.isStore := CommitType.lsInstIsStore(ctrlBlock.io.robio.exception.bits.uop.ctrl.commitType)
379  mem.debug_ls <> ctrlBlock.io.robio.debug_ls
380  mem.lsTopdownInfo <> ctrlBlock.io.robio.lsTopdownInfo
381
382  // if l2 prefetcher use stream prefetch, it should be placed in XSCore
383  io.l2_pf_enable := csrioIn.customCtrl.l2_pf_enable
384
385  io.debugTopDown.fromRob := ctrlBlock.io.debugTopDown.fromRob
386  ctrlBlock.io.debugTopDown.fromCore := io.debugTopDown.fromCore
387  io.debugRolling := ctrlBlock.io.debugRolling
388
389  val resetTree = ResetGenNode(
390    exuBlocks.tail.map(m => ModuleNode(m))
391      :+ ModuleNode(wbArbiter)
392      :+ ModuleNode(ctrlBlock)
393  )
394  ResetGen(resetTree, reset, !p(DebugOptionsKey).FPGAPlatform)
395}
396