1/*************************************************************************************** 2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences 3* Copyright (c) 2020-2021 Peng Cheng Laboratory 4* 5* XiangShan is licensed under Mulan PSL v2. 6* You can use this software according to the terms and conditions of the Mulan PSL v2. 7* You may obtain a copy of Mulan PSL v2 at: 8* http://license.coscl.org.cn/MulanPSL2 9* 10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 13* 14* See the Mulan PSL v2 for more details. 15***************************************************************************************/ 16 17package xiangshan.backend 18 19import org.chipsalliance.cde.config.Parameters 20import chisel3._ 21import chisel3.util._ 22import freechips.rocketchip.diplomacy.{LazyModule, LazyModuleImp} 23import utils._ 24import utility._ 25import xiangshan._ 26import xiangshan.backend.exu._ 27import xiangshan.backend.fu._ 28import xiangshan.backend.rob._ 29import xiangshan.backend.dispatch._ 30import xiangshan.mem._ 31 32class FakeMemBlockWbSource()(implicit p: Parameters) extends LazyModule 33 with HasXSParameter with HasWritebackSource { 34 lazy val module = new FakeMemBlockWbSourceImp(this) 35 36 override val writebackSourceParams: Seq[WritebackSourceParams] = { 37 val params = new WritebackSourceParams 38 params.exuConfigs = (loadExuConfigs ++ storeExuConfigs).map(cfg => Seq(cfg)) 39 Seq(params) 40 } 41 override lazy val writebackSourceImp: HasWritebackSourceImp = module 42} 43 44class FakeMemBlockWbSourceImp(outer: FakeMemBlockWbSource) extends LazyModuleImp(outer) 45 with HasXSParameter 46 with HasWritebackSourceImp 47{ 48 val io = IO(new Bundle() { 49 val in = Vec(exuParameters.LsExuCnt + exuParameters.StuCnt, Flipped(DecoupledIO(new ExuOutput))) 50 val out = Vec(exuParameters.LsExuCnt + exuParameters.StuCnt, DecoupledIO(new ExuOutput)) 51 }) 52 override def writebackSource1: Option[Seq[Seq[DecoupledIO[ExuOutput]]]] = Some(Seq(io.out)) 53 io.out <> io.in 54} 55 56// Merge CtrlBlock, exuBlocks, wbArbiter, wb2Ctrl, etc into 1 module 57class Backend(memWbSource: HasWritebackSource)(implicit p: Parameters) extends LazyModule 58 with HasXSParameter 59 with HasExuWbHelper 60{ 61 val wbArbiter = LazyModule(new WbArbiterWrapper(exuConfigs, NRIntWritePorts, NRFpWritePorts)) 62 val intWbPorts = wbArbiter.intWbPorts 63 val fpWbPorts = wbArbiter.fpWbPorts 64 65 // TODO: better RS organization 66 // generate rs according to number of function units 67 require(exuParameters.JmpCnt == 1) 68 require(exuParameters.MduCnt <= exuParameters.AluCnt && exuParameters.MduCnt > 0) 69 require(exuParameters.FmiscCnt <= exuParameters.FmacCnt && exuParameters.FmiscCnt > 0) 70 require(exuParameters.LduCnt == exuParameters.StuCnt) // TODO: remove this limitation 71 72 // one RS every 2 MDUs 73 val schedulePorts = Seq( 74 // exuCfg, numDeq, intFastWakeupTarget, fpFastWakeupTarget 75 Seq( 76 (AluExeUnitCfg, exuParameters.AluCnt, Seq(AluExeUnitCfg, LdExeUnitCfg, StaExeUnitCfg), Seq()), 77 (MulDivExeUnitCfg, exuParameters.MduCnt, Seq(AluExeUnitCfg, MulDivExeUnitCfg), Seq()), 78 (JumpCSRExeUnitCfg, 1, Seq(), Seq()), 79 (LdExeUnitCfg, exuParameters.LduCnt, Seq(AluExeUnitCfg, LdExeUnitCfg), Seq()), 80 (StaExeUnitCfg, exuParameters.StuCnt, Seq(), Seq()), 81 (StdExeUnitCfg, exuParameters.StuCnt, Seq(), Seq()) 82 ), 83 Seq( 84 (FmacExeUnitCfg, exuParameters.FmacCnt, Seq(), Seq(FmacExeUnitCfg, FmiscExeUnitCfg)), 85 (FmiscExeUnitCfg, exuParameters.FmiscCnt, Seq(), Seq()) 86 ) 87 ) 88 89 // should do outer fast wakeup ports here 90 val otherFastPorts = schedulePorts.zipWithIndex.map { case (sche, i) => 91 val otherCfg = schedulePorts.zipWithIndex.filter(_._2 != i).map(_._1).reduce(_ ++ _) 92 val outerPorts = sche.map(cfg => { 93 // exe units from this scheduler need fastUops from exeunits 94 val outerWakeupInSche = sche.filter(_._1.wakeupFromExu) 95 val intraIntScheOuter = outerWakeupInSche.filter(_._3.contains(cfg._1)).map(_._1) 96 val intraFpScheOuter = outerWakeupInSche.filter(_._4.contains(cfg._1)).map(_._1) 97 // exe units from other schedulers need fastUop from outside 98 val otherIntSource = otherCfg.filter(_._3.contains(cfg._1)).map(_._1) 99 val otherFpSource = otherCfg.filter(_._4.contains(cfg._1)).map(_._1) 100 val intSource = findInWbPorts(intWbPorts, intraIntScheOuter ++ otherIntSource) 101 val fpSource = findInWbPorts(fpWbPorts, intraFpScheOuter ++ otherFpSource) 102 getFastWakeupIndex(cfg._1, intSource, fpSource, intWbPorts.length).sorted 103 }) 104 println(s"inter-scheduler wakeup sources for $i: $outerPorts") 105 outerPorts 106 } 107 108 // allow mdu and fmisc to have 2*numDeq enqueue ports 109 val intDpPorts = (0 until exuParameters.AluCnt).map(i => { 110 if (i < exuParameters.JmpCnt) Seq((0, i), (1, i), (2, i)) 111 else if (i < 2 * exuParameters.MduCnt) Seq((0, i), (1, i)) 112 else Seq((0, i)) 113 }) 114 val lsDpPorts = (0 until exuParameters.LduCnt).map(i => Seq((3, i))) ++ 115 (0 until exuParameters.StuCnt).map(i => Seq((4, i))) ++ 116 (0 until exuParameters.StuCnt).map(i => Seq((5, i))) 117 val fpDpPorts = (0 until exuParameters.FmacCnt).map(i => { 118 if (i < 2 * exuParameters.FmiscCnt) Seq((0, i), (1, i)) 119 else Seq((0, i)) 120 }) 121 122 val dispatchPorts = Seq(intDpPorts ++ lsDpPorts, fpDpPorts) 123 124 val outIntRfReadPorts = Seq(0, 0) 125 val outFpRfReadPorts = Seq(0, StorePipelineWidth) 126 val hasIntRf = Seq(true, false) 127 val hasFpRf = Seq(false, true) 128 129 val exuBlocks = schedulePorts.zip(dispatchPorts).zip(otherFastPorts).zipWithIndex.map { 130 case (((sche, disp), other), i) => 131 LazyModule(new ExuBlock(sche, disp, intWbPorts, fpWbPorts, other, outIntRfReadPorts(i), outFpRfReadPorts(i), hasIntRf(i), hasFpRf(i))) 132 } 133 134 val fakeMemBlockWbSource = LazyModule(new FakeMemBlockWbSource()) 135 136 val wb2Ctrl = LazyModule(new Wb2Ctrl(exuConfigs)) 137 wb2Ctrl.addWritebackSink(exuBlocks :+ fakeMemBlockWbSource) 138 val dpExuConfigs = exuBlocks.flatMap(_.scheduler.dispatch2.map(_.configs)) 139 val ctrlBlock = LazyModule(new CtrlBlock(dpExuConfigs)) 140 val writebackSources = Seq(Seq(wb2Ctrl), Seq(wbArbiter)) 141 writebackSources.foreach(s => ctrlBlock.addWritebackSink(s)) 142 143 lazy val module = new BackendImp(this) 144} 145 146class BackendImp(outer: Backend)(implicit p: Parameters) extends LazyModuleImp(outer) 147 with HasXSParameter 148{ 149 val io = IO(new Bundle() { 150 val hartId = Input(UInt(64.W)) 151 val cpu_halt = Output(Bool()) 152 153 val memBlock = new Bundle() { // TODO: use class 154 val redirect = ValidIO(new Redirect) 155 val issue = Vec(exuParameters.LsExuCnt + exuParameters.StuCnt, DecoupledIO(new ExuInput)) 156 val loadFastMatch = Vec(exuParameters.LduCnt, Output(UInt(exuParameters.LduCnt.W))) 157 val loadFastFuOpType = Vec(exuParameters.LduCnt, Output(FuOpType())) 158 val loadFastImm = Vec(exuParameters.LduCnt, Output(UInt(12.W))) 159 val rsfeedback = Vec(exuParameters.LsExuCnt, Flipped(new MemRSFeedbackIO()(p.alter((site, here, up) => { 160 case XSCoreParamsKey => up(XSCoreParamsKey).copy( 161 IssQueSize = IssQueSize * (if (Enable3Load3Store) 3 else 2) 162 ) 163 })))) 164 val loadPc = Vec(exuParameters.LduCnt, Output(UInt(VAddrBits.W))) 165 val storePc = Vec(exuParameters.StuCnt, Output(UInt(VAddrBits.W))) 166 val stIssuePtr = Input(new SqPtr()) 167 val writeback = Vec(exuParameters.LsExuCnt + exuParameters.StuCnt, Flipped(DecoupledIO(new ExuOutput))) 168 val s3_delayed_load_error = Vec(exuParameters.LduCnt, Input(Bool())) 169 val otherFastWakeup = Vec(exuParameters.LduCnt + 2 * exuParameters.StuCnt, Flipped(ValidIO(new MicroOp))) 170 val stIn = Vec(exuParameters.StuCnt, Flipped(ValidIO(new ExuInput))) 171 val memoryViolation = Flipped(ValidIO(new Redirect)) 172 val sfence = Output(new SfenceBundle) 173 val tlbCsr = Output(new TlbCsrBundle) 174 val fenceToSbuffer = new FenceToSbuffer 175 val enqLsq = Flipped(new LsqEnqIO) 176 val lsqio = new Bundle { 177 val exceptionAddr = Flipped(new ExceptionAddrIO) // to csr 178 val rob = new RobLsqIO // rob to lsq 179 val lqCanAccept = Input(Bool()) 180 val sqCanAccept = Input(Bool()) 181 } 182 val csrCtrl = new CustomCSRCtrlIO 183 val lqCancelCnt = Input(UInt(log2Up(VirtualLoadQueueSize + 1).W)) 184 val sqCancelCnt = Input(UInt(log2Up(StoreQueueSize + 1).W)) 185 val scommit = Input(UInt(log2Ceil(EnsbufferWidth + 1).W)) 186 val lcommit = Input(UInt(log2Up(CommitWidth + 1).W)) 187 val debug_ls = Flipped(new DebugLSIO) 188 val lsTopdownInfo = Vec(exuParameters.LduCnt, Input(new LsTopdownInfo)) 189 } 190 191 val frontend = new Bundle() { // TODO: use class 192 val frontend2Ctrl = Flipped(new FrontendToCtrlIO) 193 val sfence = Output(new SfenceBundle) 194 val tlbCsr = Output(new TlbCsrBundle) 195 val csrCtrl = Output(new CustomCSRCtrlIO) 196 val fencei = Output(Bool()) 197 } 198 199 // CSR related 200 val perf = Input(new PerfCounterIO) 201 val externalInterrupt = new ExternalInterruptIO 202 val distributedUpdate = Vec(2, Flipped(new DistributedCSRUpdateReq)) 203 204 val l2_pf_enable = Output(Bool()) 205 206 val debugTopDown = new Bundle { 207 val fromRob = new RobCoreTopDownIO 208 val fromCore = new CoreDispatchTopDownIO 209 } 210 val debugRolling = new RobDebugRollingIO 211 }) 212 213 private val ctrlBlock = outer.ctrlBlock.module 214 private val wb2Ctrl = outer.wb2Ctrl.module 215 private val exuBlocks = outer.exuBlocks.map(_.module) 216 private val wbArbiter = outer.wbArbiter.module 217 218 val mem = io.memBlock 219 val frontend = io.frontend 220 221 outer.fakeMemBlockWbSource.module.io.in <> mem.writeback 222 223 ctrlBlock.io.hartId := io.hartId 224 exuBlocks.foreach(_.io.hartId := io.hartId) 225 wbArbiter.io.hartId := io.hartId 226 227 io.cpu_halt := ctrlBlock.io.cpu_halt 228 229 wbArbiter.io.redirect <> ctrlBlock.io.redirect 230 231 val allWriteback = exuBlocks.flatMap(_.io.fuWriteback) ++ outer.fakeMemBlockWbSource.module.io.out 232 require(exuConfigs.length == allWriteback.length, s"${exuConfigs.length} != ${allWriteback.length}") 233 wbArbiter.io.in <> allWriteback 234 val rfWriteback = wbArbiter.io.out 235 236 // memblock error exception writeback, 1 cycle after normal writeback 237 wb2Ctrl.io.s3_delayed_load_error <> mem.s3_delayed_load_error 238 239 wb2Ctrl.io.redirect <> ctrlBlock.io.redirect 240 outer.wb2Ctrl.generateWritebackIO() 241 242 require(exuBlocks.count(_.fuConfigs.map(_._1).contains(JumpCSRExeUnitCfg)) == 1) 243 val csrFenceMod = exuBlocks.filter(_.fuConfigs.map(_._1).contains(JumpCSRExeUnitCfg)).head 244 val csrioIn = csrFenceMod.io.fuExtra.csrio.get 245 val fenceio = csrFenceMod.io.fuExtra.fenceio.get 246 247 ctrlBlock.io.frontend <> frontend.frontend2Ctrl 248 frontend.sfence <> fenceio.sfence 249 frontend.tlbCsr <> csrioIn.tlb 250 frontend.csrCtrl <> csrioIn.customCtrl 251 frontend.fencei := fenceio.fencei 252 253 ctrlBlock.io.csrCtrl <> csrioIn.customCtrl 254 val redirectBlocks = exuBlocks.reverse.filter(_.fuConfigs.map(_._1).map(_.hasRedirect).reduce(_ || _)) 255 ctrlBlock.io.exuRedirect <> redirectBlocks.flatMap(_.io.fuExtra.exuRedirect) 256 ctrlBlock.io.stIn <> mem.stIn 257 ctrlBlock.io.memoryViolation <> mem.memoryViolation 258 exuBlocks.head.io.scheExtra.enqLsq.get <> mem.enqLsq 259 exuBlocks.foreach(b => { 260 b.io.scheExtra.lcommit := mem.lcommit 261 b.io.scheExtra.scommit := mem.scommit 262 b.io.scheExtra.lqCancelCnt := mem.lqCancelCnt 263 b.io.scheExtra.sqCancelCnt := mem.sqCancelCnt 264 }) 265 val sourceModules = outer.writebackSources.map(_.map(_.module.asInstanceOf[HasWritebackSourceImp])) 266 outer.ctrlBlock.generateWritebackIO() 267 268 val allFastUop = exuBlocks.flatMap(b => b.io.fastUopOut.dropRight(b.numOutFu)) ++ mem.otherFastWakeup 269 require(allFastUop.length == exuConfigs.length, s"${allFastUop.length} != ${exuConfigs.length}") 270 val intFastUop = allFastUop.zip(exuConfigs).filter(_._2.writeIntRf).map(_._1) 271 val fpFastUop = allFastUop.zip(exuConfigs).filter(_._2.writeFpRf).map(_._1) 272 val intFastUop1 = outer.wbArbiter.intConnections.map(c => intFastUop(c.head)) 273 val fpFastUop1 = outer.wbArbiter.fpConnections.map(c => fpFastUop(c.head)) 274 val allFastUop1 = intFastUop1 ++ fpFastUop1 275 276 ctrlBlock.io.dispatch <> exuBlocks.flatMap(_.io.in) 277 ctrlBlock.io.rsReady := exuBlocks.flatMap(_.io.scheExtra.rsReady) 278 ctrlBlock.io.enqLsq <> mem.enqLsq 279 ctrlBlock.io.lqDeq := mem.lcommit 280 ctrlBlock.io.sqDeq := mem.scommit 281 ctrlBlock.io.lqCanAccept := mem.lsqio.lqCanAccept 282 ctrlBlock.io.sqCanAccept := mem.lsqio.sqCanAccept 283 ctrlBlock.io.lqCancelCnt := mem.lqCancelCnt 284 ctrlBlock.io.sqCancelCnt := mem.sqCancelCnt 285 ctrlBlock.io.robHeadLsIssue := exuBlocks.map(_.io.scheExtra.robHeadLsIssue).reduce(_ || _) 286 287 exuBlocks(0).io.scheExtra.fpRfReadIn.get <> exuBlocks(1).io.scheExtra.fpRfReadOut.get 288 exuBlocks(0).io.scheExtra.fpStateReadIn.get <> exuBlocks(1).io.scheExtra.fpStateReadOut.get 289 290 for((c, e) <- ctrlBlock.io.ld_pc_read.zip(exuBlocks(0).io.issue.get)){ 291 // read load pc at load s0 292 c.ptr := e.bits.uop.cf.ftqPtr 293 c.offset := e.bits.uop.cf.ftqOffset 294 } 295 // return load pc at load s2 296 mem.loadPc <> VecInit(ctrlBlock.io.ld_pc_read.map(_.data)) 297 298 for((c, e) <- ctrlBlock.io.st_pc_read.zip(exuBlocks(0).io.issue.get.drop(exuParameters.LduCnt))){ 299 // read store pc at store s0 300 c.ptr := e.bits.uop.cf.ftqPtr 301 c.offset := e.bits.uop.cf.ftqOffset 302 } 303 // return store pc at store s2 304 mem.storePc <> VecInit(ctrlBlock.io.st_pc_read.map(_.data)) 305 306 mem.issue <> exuBlocks(0).io.issue.get 307 // By default, instructions do not have exceptions when they enter the function units. 308 mem.issue.map(_.bits.uop.clearExceptions()) 309 exuBlocks(0).io.scheExtra.loadFastMatch.get <> mem.loadFastMatch 310 exuBlocks(0).io.scheExtra.loadFastFuOpType.get <> mem.loadFastFuOpType 311 exuBlocks(0).io.scheExtra.loadFastImm.get <> mem.loadFastImm 312 313 val stdIssue = exuBlocks(0).io.issue.get.takeRight(exuParameters.StuCnt) 314 exuBlocks.map(_.io).foreach { exu => 315 exu.redirect <> ctrlBlock.io.redirect 316 exu.allocPregs <> ctrlBlock.io.allocPregs 317 exu.rfWriteback <> rfWriteback 318 exu.fastUopIn <> allFastUop1 319 exu.scheExtra.jumpPc <> ctrlBlock.io.jumpPc 320 exu.scheExtra.jalr_target <> ctrlBlock.io.jalr_target 321 exu.scheExtra.stIssuePtr <> mem.stIssuePtr 322 exu.scheExtra.debug_fp_rat <> ctrlBlock.io.debug_fp_rat 323 exu.scheExtra.debug_int_rat <> ctrlBlock.io.debug_int_rat 324 exu.scheExtra.robDeqPtr := ctrlBlock.io.robDeqPtr 325 exu.scheExtra.memWaitUpdateReq.staIssue.zip(mem.stIn).foreach{case (sink, src) => { 326 sink.bits := src.bits 327 sink.valid := src.valid 328 }} 329 exu.scheExtra.memWaitUpdateReq.stdIssue.zip(stdIssue).foreach{case (sink, src) => { 330 sink.valid := src.valid 331 sink.bits := src.bits 332 }} 333 } 334 335 XSPerfHistogram("fastIn_count", PopCount(allFastUop1.map(_.valid)), true.B, 0, allFastUop1.length, 1) 336 XSPerfHistogram("wakeup_count", PopCount(rfWriteback.map(_.valid)), true.B, 0, rfWriteback.length, 1) 337 338 ctrlBlock.perfinfo.perfEventsEu0 := exuBlocks(0).getPerf.dropRight(outer.exuBlocks(0).scheduler.numRs) 339 ctrlBlock.perfinfo.perfEventsEu1 := exuBlocks(1).getPerf.dropRight(outer.exuBlocks(1).scheduler.numRs) 340 341 if (Enable3Load3Store) { 342 ctrlBlock.perfinfo.perfEventsRs := DontCare // outer.exuBlocks.flatMap(b => b.module.getPerf.takeRight(b.scheduler.numRs)) 343 } else { 344 ctrlBlock.perfinfo.perfEventsRs := outer.exuBlocks.flatMap(b => b.module.getPerf.takeRight(b.scheduler.numRs)) 345 } 346 347 csrioIn.hartId <> io.hartId 348 349 val perf = WireInit(io.perf) // other perf events are assigned outside the backend 350 perf.retiredInstr <> ctrlBlock.io.robio.toCSR.perfinfo.retiredInstr 351 perf.ctrlInfo <> ctrlBlock.io.perfInfo.ctrlInfo 352 perf.perfEventsCtrl <> ctrlBlock.getPerf 353 csrioIn.perf <> perf 354 355 csrioIn.fpu.fflags <> ctrlBlock.io.robio.toCSR.fflags 356 csrioIn.fpu.isIllegal := false.B 357 csrioIn.fpu.dirty_fs <> ctrlBlock.io.robio.toCSR.dirty_fs 358 csrioIn.fpu.frm <> exuBlocks(1).io.fuExtra.frm.get 359 csrioIn.exception <> ctrlBlock.io.robio.exception 360 csrioIn.isXRet <> ctrlBlock.io.robio.toCSR.isXRet 361 csrioIn.trapTarget <> ctrlBlock.io.robio.toCSR.trapTarget 362 csrioIn.interrupt <> ctrlBlock.io.robio.toCSR.intrBitSet 363 csrioIn.wfi_event <> ctrlBlock.io.robio.toCSR.wfiEvent 364 csrioIn.memExceptionVAddr <> mem.lsqio.exceptionAddr.vaddr 365 366 csrioIn.externalInterrupt := io.externalInterrupt 367 368 csrioIn.distributedUpdate := io.distributedUpdate 369 370 mem.sfence <> fenceio.sfence 371 mem.fenceToSbuffer <> fenceio.sbuffer 372 373 mem.redirect <> ctrlBlock.io.redirect 374 mem.rsfeedback <> exuBlocks(0).io.scheExtra.feedback.get 375 mem.csrCtrl <> csrioIn.customCtrl 376 mem.tlbCsr <> csrioIn.tlb 377 mem.lsqio.rob <> ctrlBlock.io.robio.lsq 378 mem.lsqio.exceptionAddr.isStore := CommitType.lsInstIsStore(ctrlBlock.io.robio.exception.bits.uop.ctrl.commitType) 379 mem.debug_ls <> ctrlBlock.io.robio.debug_ls 380 mem.lsTopdownInfo <> ctrlBlock.io.robio.lsTopdownInfo 381 382 // if l2 prefetcher use stream prefetch, it should be placed in XSCore 383 io.l2_pf_enable := csrioIn.customCtrl.l2_pf_enable 384 385 io.debugTopDown.fromRob := ctrlBlock.io.debugTopDown.fromRob 386 ctrlBlock.io.debugTopDown.fromCore := io.debugTopDown.fromCore 387 io.debugRolling := ctrlBlock.io.debugRolling 388 389 val resetTree = ResetGenNode( 390 exuBlocks.tail.map(m => ModuleNode(m)) 391 :+ ModuleNode(wbArbiter) 392 :+ ModuleNode(ctrlBlock) 393 ) 394 ResetGen(resetTree, reset, !p(DebugOptionsKey).FPGAPlatform) 395} 396