1/*************************************************************************************** 2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences 3* Copyright (c) 2020-2021 Peng Cheng Laboratory 4* 5* XiangShan is licensed under Mulan PSL v2. 6* You can use this software according to the terms and conditions of the Mulan PSL v2. 7* You may obtain a copy of Mulan PSL v2 at: 8* http://license.coscl.org.cn/MulanPSL2 9* 10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 13* 14* See the Mulan PSL v2 for more details. 15***************************************************************************************/ 16 17package xiangshan.frontend 18 19import org.chipsalliance.cde.config.Parameters 20import chisel3._ 21import chisel3.util._ 22import utils._ 23import utility._ 24import xiangshan._ 25import xiangshan.frontend.icache._ 26import xiangshan.backend.CtrlToFtqIO 27import xiangshan.backend.decode.ImmUnion 28import utility.ChiselDB 29 30class FtqDebugBundle extends Bundle { 31 val pc = UInt(39.W) 32 val target = UInt(39.W) 33 val isBr = Bool() 34 val isJmp = Bool() 35 val isCall = Bool() 36 val isRet = Bool() 37 val misPred = Bool() 38 val isTaken = Bool() 39 val predStage = UInt(2.W) 40} 41 42class FtqPtr(implicit p: Parameters) extends CircularQueuePtr[FtqPtr]( 43 p => p(XSCoreParamsKey).FtqSize 44){ 45} 46 47object FtqPtr { 48 def apply(f: Bool, v: UInt)(implicit p: Parameters): FtqPtr = { 49 val ptr = Wire(new FtqPtr) 50 ptr.flag := f 51 ptr.value := v 52 ptr 53 } 54 def inverse(ptr: FtqPtr)(implicit p: Parameters): FtqPtr = { 55 apply(!ptr.flag, ptr.value) 56 } 57} 58 59class FtqNRSRAM[T <: Data](gen: T, numRead: Int)(implicit p: Parameters) extends XSModule { 60 61 val io = IO(new Bundle() { 62 val raddr = Input(Vec(numRead, UInt(log2Up(FtqSize).W))) 63 val ren = Input(Vec(numRead, Bool())) 64 val rdata = Output(Vec(numRead, gen)) 65 val waddr = Input(UInt(log2Up(FtqSize).W)) 66 val wen = Input(Bool()) 67 val wdata = Input(gen) 68 }) 69 70 for(i <- 0 until numRead){ 71 val sram = Module(new SRAMTemplate(gen, FtqSize)) 72 sram.io.r.req.valid := io.ren(i) 73 sram.io.r.req.bits.setIdx := io.raddr(i) 74 io.rdata(i) := sram.io.r.resp.data(0) 75 sram.io.w.req.valid := io.wen 76 sram.io.w.req.bits.setIdx := io.waddr 77 sram.io.w.req.bits.data := VecInit(io.wdata) 78 } 79 80} 81 82class Ftq_RF_Components(implicit p: Parameters) extends XSBundle with BPUUtils { 83 val startAddr = UInt(VAddrBits.W) 84 val nextLineAddr = UInt(VAddrBits.W) 85 val isNextMask = Vec(PredictWidth, Bool()) 86 val fallThruError = Bool() 87 // val carry = Bool() 88 def getPc(offset: UInt) = { 89 def getHigher(pc: UInt) = pc(VAddrBits-1, log2Ceil(PredictWidth)+instOffsetBits+1) 90 def getOffset(pc: UInt) = pc(log2Ceil(PredictWidth)+instOffsetBits, instOffsetBits) 91 Cat(getHigher(Mux(isNextMask(offset) && startAddr(log2Ceil(PredictWidth)+instOffsetBits), nextLineAddr, startAddr)), 92 getOffset(startAddr)+offset, 0.U(instOffsetBits.W)) 93 } 94 def fromBranchPrediction(resp: BranchPredictionBundle) = { 95 def carryPos(addr: UInt) = addr(instOffsetBits+log2Ceil(PredictWidth)+1) 96 this.startAddr := resp.pc(3) 97 this.nextLineAddr := resp.pc(3) + (FetchWidth * 4 * 2).U // may be broken on other configs 98 this.isNextMask := VecInit((0 until PredictWidth).map(i => 99 (resp.pc(3)(log2Ceil(PredictWidth), 1) +& i.U)(log2Ceil(PredictWidth)).asBool 100 )) 101 this.fallThruError := resp.fallThruError(3) 102 this 103 } 104 override def toPrintable: Printable = { 105 p"startAddr:${Hexadecimal(startAddr)}" 106 } 107} 108 109class Ftq_pd_Entry(implicit p: Parameters) extends XSBundle { 110 val brMask = Vec(PredictWidth, Bool()) 111 val jmpInfo = ValidUndirectioned(Vec(3, Bool())) 112 val jmpOffset = UInt(log2Ceil(PredictWidth).W) 113 val jalTarget = UInt(VAddrBits.W) 114 val rvcMask = Vec(PredictWidth, Bool()) 115 def hasJal = jmpInfo.valid && !jmpInfo.bits(0) 116 def hasJalr = jmpInfo.valid && jmpInfo.bits(0) 117 def hasCall = jmpInfo.valid && jmpInfo.bits(1) 118 def hasRet = jmpInfo.valid && jmpInfo.bits(2) 119 120 def fromPdWb(pdWb: PredecodeWritebackBundle) = { 121 val pds = pdWb.pd 122 this.brMask := VecInit(pds.map(pd => pd.isBr && pd.valid)) 123 this.jmpInfo.valid := VecInit(pds.map(pd => (pd.isJal || pd.isJalr) && pd.valid)).asUInt.orR 124 this.jmpInfo.bits := ParallelPriorityMux(pds.map(pd => (pd.isJal || pd.isJalr) && pd.valid), 125 pds.map(pd => VecInit(pd.isJalr, pd.isCall, pd.isRet))) 126 this.jmpOffset := ParallelPriorityEncoder(pds.map(pd => (pd.isJal || pd.isJalr) && pd.valid)) 127 this.rvcMask := VecInit(pds.map(pd => pd.isRVC)) 128 this.jalTarget := pdWb.jalTarget 129 } 130 131 def toPd(offset: UInt) = { 132 require(offset.getWidth == log2Ceil(PredictWidth)) 133 val pd = Wire(new PreDecodeInfo) 134 pd.valid := true.B 135 pd.isRVC := rvcMask(offset) 136 val isBr = brMask(offset) 137 val isJalr = offset === jmpOffset && jmpInfo.valid && jmpInfo.bits(0) 138 pd.brType := Cat(offset === jmpOffset && jmpInfo.valid, isJalr || isBr) 139 pd.isCall := offset === jmpOffset && jmpInfo.valid && jmpInfo.bits(1) 140 pd.isRet := offset === jmpOffset && jmpInfo.valid && jmpInfo.bits(2) 141 pd 142 } 143} 144 145class PrefetchPtrDB(implicit p: Parameters) extends Bundle { 146 val fromFtqPtr = UInt(log2Up(p(XSCoreParamsKey).FtqSize).W) 147 val fromIfuPtr = UInt(log2Up(p(XSCoreParamsKey).FtqSize).W) 148} 149 150class Ftq_Redirect_SRAMEntry(implicit p: Parameters) extends SpeculativeInfo { 151 val sc_disagree = Vec(numBr, Bool()) 152} 153 154class Ftq_1R_SRAMEntry(implicit p: Parameters) extends XSBundle with HasBPUConst { 155 val meta = UInt(MaxMetaLength.W) 156} 157 158class Ftq_Pred_Info(implicit p: Parameters) extends XSBundle { 159 val target = UInt(VAddrBits.W) 160 val cfiIndex = ValidUndirectioned(UInt(log2Ceil(PredictWidth).W)) 161} 162 163 164class FtqRead[T <: Data](private val gen: T)(implicit p: Parameters) extends XSBundle { 165 val ptr = Output(new FtqPtr) 166 val offset = Output(UInt(log2Ceil(PredictWidth).W)) 167 val data = Input(gen) 168 def apply(ptr: FtqPtr, offset: UInt) = { 169 this.ptr := ptr 170 this.offset := offset 171 this.data 172 } 173} 174 175 176class FtqToBpuIO(implicit p: Parameters) extends XSBundle { 177 val redirect = Valid(new BranchPredictionRedirect) 178 val update = Valid(new BranchPredictionUpdate) 179 val enq_ptr = Output(new FtqPtr) 180} 181 182class FtqToIfuIO(implicit p: Parameters) extends XSBundle with HasCircularQueuePtrHelper { 183 val req = Decoupled(new FetchRequestBundle) 184 val redirect = Valid(new BranchPredictionRedirect) 185 val topdown_redirect = Valid(new BranchPredictionRedirect) 186 val flushFromBpu = new Bundle { 187 // when ifu pipeline is not stalled, 188 // a packet from bpu s3 can reach f1 at most 189 val s2 = Valid(new FtqPtr) 190 val s3 = Valid(new FtqPtr) 191 def shouldFlushBy(src: Valid[FtqPtr], idx_to_flush: FtqPtr) = { 192 src.valid && !isAfter(src.bits, idx_to_flush) 193 } 194 def shouldFlushByStage2(idx: FtqPtr) = shouldFlushBy(s2, idx) 195 def shouldFlushByStage3(idx: FtqPtr) = shouldFlushBy(s3, idx) 196 } 197} 198 199class FtqToICacheIO(implicit p: Parameters) extends XSBundle with HasCircularQueuePtrHelper { 200 //NOTE: req.bits must be prepare in T cycle 201 // while req.valid is set true in T + 1 cycle 202 val req = Decoupled(new FtqToICacheRequestBundle) 203} 204 205trait HasBackendRedirectInfo extends HasXSParameter { 206 def numRedirectPcRead = exuParameters.JmpCnt + exuParameters.AluCnt + 1 207 def isLoadReplay(r: Valid[Redirect]) = r.bits.flushItself() 208} 209 210class FtqToCtrlIO(implicit p: Parameters) extends XSBundle with HasBackendRedirectInfo { 211 // write to backend pc mem 212 val pc_mem_wen = Output(Bool()) 213 val pc_mem_waddr = Output(UInt(log2Ceil(FtqSize).W)) 214 val pc_mem_wdata = Output(new Ftq_RF_Components) 215 // newest target 216 val newest_entry_target = Output(UInt(VAddrBits.W)) 217 val newest_entry_ptr = Output(new FtqPtr) 218} 219 220 221class FTBEntryGen(implicit p: Parameters) extends XSModule with HasBackendRedirectInfo with HasBPUParameter { 222 val io = IO(new Bundle { 223 val start_addr = Input(UInt(VAddrBits.W)) 224 val old_entry = Input(new FTBEntry) 225 val pd = Input(new Ftq_pd_Entry) 226 val cfiIndex = Flipped(Valid(UInt(log2Ceil(PredictWidth).W))) 227 val target = Input(UInt(VAddrBits.W)) 228 val hit = Input(Bool()) 229 val mispredict_vec = Input(Vec(PredictWidth, Bool())) 230 231 val new_entry = Output(new FTBEntry) 232 val new_br_insert_pos = Output(Vec(numBr, Bool())) 233 val taken_mask = Output(Vec(numBr, Bool())) 234 val jmp_taken = Output(Bool()) 235 val mispred_mask = Output(Vec(numBr+1, Bool())) 236 237 // for perf counters 238 val is_init_entry = Output(Bool()) 239 val is_old_entry = Output(Bool()) 240 val is_new_br = Output(Bool()) 241 val is_jalr_target_modified = Output(Bool()) 242 val is_always_taken_modified = Output(Bool()) 243 val is_br_full = Output(Bool()) 244 }) 245 246 // no mispredictions detected at predecode 247 val hit = io.hit 248 val pd = io.pd 249 250 val init_entry = WireInit(0.U.asTypeOf(new FTBEntry)) 251 252 253 val cfi_is_br = pd.brMask(io.cfiIndex.bits) && io.cfiIndex.valid 254 val entry_has_jmp = pd.jmpInfo.valid 255 val new_jmp_is_jal = entry_has_jmp && !pd.jmpInfo.bits(0) && io.cfiIndex.valid 256 val new_jmp_is_jalr = entry_has_jmp && pd.jmpInfo.bits(0) && io.cfiIndex.valid 257 val new_jmp_is_call = entry_has_jmp && pd.jmpInfo.bits(1) && io.cfiIndex.valid 258 val new_jmp_is_ret = entry_has_jmp && pd.jmpInfo.bits(2) && io.cfiIndex.valid 259 val last_jmp_rvi = entry_has_jmp && pd.jmpOffset === (PredictWidth-1).U && !pd.rvcMask.last 260 // val last_br_rvi = cfi_is_br && io.cfiIndex.bits === (PredictWidth-1).U && !pd.rvcMask.last 261 262 val cfi_is_jal = io.cfiIndex.bits === pd.jmpOffset && new_jmp_is_jal 263 val cfi_is_jalr = io.cfiIndex.bits === pd.jmpOffset && new_jmp_is_jalr 264 265 def carryPos = log2Ceil(PredictWidth)+instOffsetBits 266 def getLower(pc: UInt) = pc(carryPos-1, instOffsetBits) 267 // if not hit, establish a new entry 268 init_entry.valid := true.B 269 // tag is left for ftb to assign 270 271 // case br 272 val init_br_slot = init_entry.getSlotForBr(0) 273 when (cfi_is_br) { 274 init_br_slot.valid := true.B 275 init_br_slot.offset := io.cfiIndex.bits 276 init_br_slot.setLowerStatByTarget(io.start_addr, io.target, numBr == 1) 277 init_entry.always_taken(0) := true.B // set to always taken on init 278 } 279 280 // case jmp 281 when (entry_has_jmp) { 282 init_entry.tailSlot.offset := pd.jmpOffset 283 init_entry.tailSlot.valid := new_jmp_is_jal || new_jmp_is_jalr 284 init_entry.tailSlot.setLowerStatByTarget(io.start_addr, Mux(cfi_is_jalr, io.target, pd.jalTarget), isShare=false) 285 } 286 287 val jmpPft = getLower(io.start_addr) +& pd.jmpOffset +& Mux(pd.rvcMask(pd.jmpOffset), 1.U, 2.U) 288 init_entry.pftAddr := Mux(entry_has_jmp && !last_jmp_rvi, jmpPft, getLower(io.start_addr)) 289 init_entry.carry := Mux(entry_has_jmp && !last_jmp_rvi, jmpPft(carryPos-instOffsetBits), true.B) 290 init_entry.isJalr := new_jmp_is_jalr 291 init_entry.isCall := new_jmp_is_call 292 init_entry.isRet := new_jmp_is_ret 293 // that means fall thru points to the middle of an inst 294 init_entry.last_may_be_rvi_call := pd.jmpOffset === (PredictWidth-1).U && !pd.rvcMask(pd.jmpOffset) 295 296 // if hit, check whether a new cfi(only br is possible) is detected 297 val oe = io.old_entry 298 val br_recorded_vec = oe.getBrRecordedVec(io.cfiIndex.bits) 299 val br_recorded = br_recorded_vec.asUInt.orR 300 val is_new_br = cfi_is_br && !br_recorded 301 val new_br_offset = io.cfiIndex.bits 302 // vec(i) means new br will be inserted BEFORE old br(i) 303 val allBrSlotsVec = oe.allSlotsForBr 304 val new_br_insert_onehot = VecInit((0 until numBr).map{ 305 i => i match { 306 case 0 => 307 !allBrSlotsVec(0).valid || new_br_offset < allBrSlotsVec(0).offset 308 case idx => 309 allBrSlotsVec(idx-1).valid && new_br_offset > allBrSlotsVec(idx-1).offset && 310 (!allBrSlotsVec(idx).valid || new_br_offset < allBrSlotsVec(idx).offset) 311 } 312 }) 313 314 val old_entry_modified = WireInit(io.old_entry) 315 for (i <- 0 until numBr) { 316 val slot = old_entry_modified.allSlotsForBr(i) 317 when (new_br_insert_onehot(i)) { 318 slot.valid := true.B 319 slot.offset := new_br_offset 320 slot.setLowerStatByTarget(io.start_addr, io.target, i == numBr-1) 321 old_entry_modified.always_taken(i) := true.B 322 }.elsewhen (new_br_offset > oe.allSlotsForBr(i).offset) { 323 old_entry_modified.always_taken(i) := false.B 324 // all other fields remain unchanged 325 }.otherwise { 326 // case i == 0, remain unchanged 327 if (i != 0) { 328 val noNeedToMoveFromFormerSlot = (i == numBr-1).B && !oe.brSlots.last.valid 329 when (!noNeedToMoveFromFormerSlot) { 330 slot.fromAnotherSlot(oe.allSlotsForBr(i-1)) 331 old_entry_modified.always_taken(i) := oe.always_taken(i) 332 } 333 } 334 } 335 } 336 337 // two circumstances: 338 // 1. oe: | br | j |, new br should be in front of j, thus addr of j should be new pft 339 // 2. oe: | br | br |, new br could be anywhere between, thus new pft is the addr of either 340 // the previous last br or the new br 341 val may_have_to_replace = oe.noEmptySlotForNewBr 342 val pft_need_to_change = is_new_br && may_have_to_replace 343 // it should either be the given last br or the new br 344 when (pft_need_to_change) { 345 val new_pft_offset = 346 Mux(!new_br_insert_onehot.asUInt.orR, 347 new_br_offset, oe.allSlotsForBr.last.offset) 348 349 // set jmp to invalid 350 old_entry_modified.pftAddr := getLower(io.start_addr) + new_pft_offset 351 old_entry_modified.carry := (getLower(io.start_addr) +& new_pft_offset).head(1).asBool 352 old_entry_modified.last_may_be_rvi_call := false.B 353 old_entry_modified.isCall := false.B 354 old_entry_modified.isRet := false.B 355 old_entry_modified.isJalr := false.B 356 } 357 358 val old_entry_jmp_target_modified = WireInit(oe) 359 val old_target = oe.tailSlot.getTarget(io.start_addr) // may be wrong because we store only 20 lowest bits 360 val old_tail_is_jmp = !oe.tailSlot.sharing 361 val jalr_target_modified = cfi_is_jalr && (old_target =/= io.target) && old_tail_is_jmp // TODO: pass full jalr target 362 when (jalr_target_modified) { 363 old_entry_jmp_target_modified.setByJmpTarget(io.start_addr, io.target) 364 old_entry_jmp_target_modified.always_taken := 0.U.asTypeOf(Vec(numBr, Bool())) 365 } 366 367 val old_entry_always_taken = WireInit(oe) 368 val always_taken_modified_vec = Wire(Vec(numBr, Bool())) // whether modified or not 369 for (i <- 0 until numBr) { 370 old_entry_always_taken.always_taken(i) := 371 oe.always_taken(i) && io.cfiIndex.valid && oe.brValids(i) && io.cfiIndex.bits === oe.brOffset(i) 372 always_taken_modified_vec(i) := oe.always_taken(i) && !old_entry_always_taken.always_taken(i) 373 } 374 val always_taken_modified = always_taken_modified_vec.reduce(_||_) 375 376 377 378 val derived_from_old_entry = 379 Mux(is_new_br, old_entry_modified, 380 Mux(jalr_target_modified, old_entry_jmp_target_modified, old_entry_always_taken)) 381 382 383 io.new_entry := Mux(!hit, init_entry, derived_from_old_entry) 384 385 io.new_br_insert_pos := new_br_insert_onehot 386 io.taken_mask := VecInit((io.new_entry.brOffset zip io.new_entry.brValids).map{ 387 case (off, v) => io.cfiIndex.bits === off && io.cfiIndex.valid && v 388 }) 389 io.jmp_taken := io.new_entry.jmpValid && io.new_entry.tailSlot.offset === io.cfiIndex.bits 390 for (i <- 0 until numBr) { 391 io.mispred_mask(i) := io.new_entry.brValids(i) && io.mispredict_vec(io.new_entry.brOffset(i)) 392 } 393 io.mispred_mask.last := io.new_entry.jmpValid && io.mispredict_vec(pd.jmpOffset) 394 395 // for perf counters 396 io.is_init_entry := !hit 397 io.is_old_entry := hit && !is_new_br && !jalr_target_modified && !always_taken_modified 398 io.is_new_br := hit && is_new_br 399 io.is_jalr_target_modified := hit && jalr_target_modified 400 io.is_always_taken_modified := hit && always_taken_modified 401 io.is_br_full := hit && is_new_br && may_have_to_replace 402} 403 404class FtqPcMemWrapper(numOtherReads: Int)(implicit p: Parameters) extends XSModule with HasBackendRedirectInfo { 405 val io = IO(new Bundle { 406 val ifuPtr_w = Input(new FtqPtr) 407 val ifuPtrPlus1_w = Input(new FtqPtr) 408 val ifuPtrPlus2_w = Input(new FtqPtr) 409 val commPtr_w = Input(new FtqPtr) 410 val commPtrPlus1_w = Input(new FtqPtr) 411 val ifuPtr_rdata = Output(new Ftq_RF_Components) 412 val ifuPtrPlus1_rdata = Output(new Ftq_RF_Components) 413 val ifuPtrPlus2_rdata = Output(new Ftq_RF_Components) 414 val commPtr_rdata = Output(new Ftq_RF_Components) 415 val commPtrPlus1_rdata = Output(new Ftq_RF_Components) 416 417 val other_raddrs = Input(Vec(numOtherReads, UInt(log2Ceil(FtqSize).W))) 418 val other_rdatas = Output(Vec(numOtherReads, new Ftq_RF_Components)) 419 420 val wen = Input(Bool()) 421 val waddr = Input(UInt(log2Ceil(FtqSize).W)) 422 val wdata = Input(new Ftq_RF_Components) 423 }) 424 425 val num_pc_read = numOtherReads + 5 426 val mem = Module(new SyncDataModuleTemplate(new Ftq_RF_Components, FtqSize, 427 num_pc_read, 1, "FtqPC")) 428 mem.io.wen(0) := io.wen 429 mem.io.waddr(0) := io.waddr 430 mem.io.wdata(0) := io.wdata 431 432 // read one cycle ahead for ftq local reads 433 val raddr_vec = VecInit(io.other_raddrs ++ 434 Seq(io.ifuPtr_w.value, io.ifuPtrPlus1_w.value, io.ifuPtrPlus2_w.value, io.commPtrPlus1_w.value, io.commPtr_w.value)) 435 436 mem.io.raddr := raddr_vec 437 438 io.other_rdatas := mem.io.rdata.dropRight(5) 439 io.ifuPtr_rdata := mem.io.rdata.dropRight(4).last 440 io.ifuPtrPlus1_rdata := mem.io.rdata.dropRight(3).last 441 io.ifuPtrPlus2_rdata := mem.io.rdata.dropRight(2).last 442 io.commPtrPlus1_rdata := mem.io.rdata.dropRight(1).last 443 io.commPtr_rdata := mem.io.rdata.last 444} 445 446class Ftq(implicit p: Parameters) extends XSModule with HasCircularQueuePtrHelper 447 with HasBackendRedirectInfo with BPUUtils with HasBPUConst with HasPerfEvents 448 with HasICacheParameters{ 449 val io = IO(new Bundle { 450 val fromBpu = Flipped(new BpuToFtqIO) 451 val fromIfu = Flipped(new IfuToFtqIO) 452 val fromBackend = Flipped(new CtrlToFtqIO) 453 454 val toBpu = new FtqToBpuIO 455 val toIfu = new FtqToIfuIO 456 val toICache = new FtqToICacheIO 457 val toBackend = new FtqToCtrlIO 458 459 val toPrefetch = new FtqPrefechBundle 460 461 val bpuInfo = new Bundle { 462 val bpRight = Output(UInt(XLEN.W)) 463 val bpWrong = Output(UInt(XLEN.W)) 464 } 465 466 val mmioCommitRead = Flipped(new mmioCommitRead) 467 468 // for perf 469 val ControlBTBMissBubble = Output(Bool()) 470 val TAGEMissBubble = Output(Bool()) 471 val SCMissBubble = Output(Bool()) 472 val ITTAGEMissBubble = Output(Bool()) 473 val RASMissBubble = Output(Bool()) 474 }) 475 io.bpuInfo := DontCare 476 477 val topdown_stage = RegInit(0.U.asTypeOf(new FrontendTopDownBundle)) 478 dontTouch(topdown_stage) 479 // only driven by clock, not valid-ready 480 topdown_stage := io.fromBpu.resp.bits.topdown_info 481 io.toIfu.req.bits.topdown_info := topdown_stage 482 483 val ifuRedirected = RegInit(VecInit(Seq.fill(FtqSize)(false.B))) 484 485 val backendRedirect = Wire(Valid(new BranchPredictionRedirect)) 486 when(io.fromBackend.redirect.valid) { 487 assert(RegNext(io.fromBackend.ftqIdxAhead.map(_.valid).reduce(_|_))) 488 assert(io.fromBackend.ftqIdxSelOH.valid) 489 assert(PopCount(io.fromBackend.ftqIdxSelOH.bits) === 1.U) 490 } 491 492 val stage2Flush = backendRedirect.valid 493 val backendFlush = stage2Flush || RegNext(stage2Flush) 494 val ifuFlush = Wire(Bool()) 495 496 val flush = stage2Flush || RegNext(stage2Flush) 497 498 val allowBpuIn, allowToIfu = WireInit(false.B) 499 val flushToIfu = !allowToIfu 500 allowBpuIn := !ifuFlush && !backendRedirect.valid 501 allowToIfu := !ifuFlush && !backendRedirect.valid 502 503 def copyNum = 5 504 val bpuPtr, ifuPtr, ifuWbPtr, commPtr = RegInit(FtqPtr(false.B, 0.U)) 505 val ifuPtrPlus1 = RegInit(FtqPtr(false.B, 1.U)) 506 val ifuPtrPlus2 = RegInit(FtqPtr(false.B, 2.U)) 507 val commPtrPlus1 = RegInit(FtqPtr(false.B, 1.U)) 508 val copied_ifu_ptr = Seq.fill(copyNum)(RegInit(FtqPtr(false.B, 0.U))) 509 val copied_bpu_ptr = Seq.fill(copyNum)(RegInit(FtqPtr(false.B, 0.U))) 510 require(FtqSize >= 4) 511 val ifuPtr_write = WireInit(ifuPtr) 512 val ifuPtrPlus1_write = WireInit(ifuPtrPlus1) 513 val ifuPtrPlus2_write = WireInit(ifuPtrPlus2) 514 val ifuWbPtr_write = WireInit(ifuWbPtr) 515 val commPtr_write = WireInit(commPtr) 516 val commPtrPlus1_write = WireInit(commPtrPlus1) 517 ifuPtr := ifuPtr_write 518 ifuPtrPlus1 := ifuPtrPlus1_write 519 ifuPtrPlus2 := ifuPtrPlus2_write 520 ifuWbPtr := ifuWbPtr_write 521 commPtr := commPtr_write 522 commPtrPlus1 := commPtrPlus1_write 523 copied_ifu_ptr.map{ptr => 524 ptr := ifuPtr_write 525 dontTouch(ptr) 526 } 527 val validEntries = distanceBetween(bpuPtr, commPtr) 528 val canCommit = Wire(Bool()) 529 530 // ********************************************************************** 531 // **************************** enq from bpu **************************** 532 // ********************************************************************** 533 val new_entry_ready = validEntries < FtqSize.U || canCommit 534 io.fromBpu.resp.ready := new_entry_ready 535 536 val bpu_s2_resp = io.fromBpu.resp.bits.s2 537 val bpu_s3_resp = io.fromBpu.resp.bits.s3 538 val bpu_s2_redirect = bpu_s2_resp.valid(3) && bpu_s2_resp.hasRedirect(3) 539 val bpu_s3_redirect = bpu_s3_resp.valid(3) && bpu_s3_resp.hasRedirect(3) 540 541 io.toBpu.enq_ptr := bpuPtr 542 val enq_fire = io.fromBpu.resp.fire && allowBpuIn // from bpu s1 543 val bpu_in_fire = (io.fromBpu.resp.fire || bpu_s2_redirect || bpu_s3_redirect) && allowBpuIn 544 545 val bpu_in_resp = io.fromBpu.resp.bits.selectedResp 546 val bpu_in_stage = io.fromBpu.resp.bits.selectedRespIdxForFtq 547 val bpu_in_resp_ptr = Mux(bpu_in_stage === BP_S1, bpuPtr, bpu_in_resp.ftq_idx) 548 val bpu_in_resp_idx = bpu_in_resp_ptr.value 549 550 // read ports: prefetchReq ++ ifuReq1 + ifuReq2 + ifuReq3 + commitUpdate2 + commitUpdate 551 val ftq_pc_mem = Module(new FtqPcMemWrapper(1)) 552 // resp from uBTB 553 ftq_pc_mem.io.wen := bpu_in_fire 554 ftq_pc_mem.io.waddr := bpu_in_resp_idx 555 ftq_pc_mem.io.wdata.fromBranchPrediction(bpu_in_resp) 556 557 // ifuRedirect + backendRedirect + commit 558 val ftq_redirect_sram = Module(new FtqNRSRAM(new Ftq_Redirect_SRAMEntry, 1+BackendRedirectNum+1)) 559 // these info is intended to enq at the last stage of bpu 560 ftq_redirect_sram.io.wen := io.fromBpu.resp.bits.lastStage.valid(3) 561 ftq_redirect_sram.io.waddr := io.fromBpu.resp.bits.lastStage.ftq_idx.value 562 ftq_redirect_sram.io.wdata := io.fromBpu.resp.bits.last_stage_spec_info 563 println(f"ftq redirect SRAM: entry ${ftq_redirect_sram.io.wdata.getWidth} * ${FtqSize} * 3") 564 println(f"ftq redirect SRAM: ahead fh ${ftq_redirect_sram.io.wdata.afhob.getWidth} * ${FtqSize} * 3") 565 566 val ftq_meta_1r_sram = Module(new FtqNRSRAM(new Ftq_1R_SRAMEntry, 1)) 567 // these info is intended to enq at the last stage of bpu 568 ftq_meta_1r_sram.io.wen := io.fromBpu.resp.bits.lastStage.valid(3) 569 ftq_meta_1r_sram.io.waddr := io.fromBpu.resp.bits.lastStage.ftq_idx.value 570 ftq_meta_1r_sram.io.wdata.meta := io.fromBpu.resp.bits.last_stage_meta 571 // ifuRedirect + backendRedirect + commit 572 val ftb_entry_mem = Module(new SyncDataModuleTemplate(new FTBEntry, FtqSize, 1+BackendRedirectNum+1, 1)) 573 ftb_entry_mem.io.wen(0) := io.fromBpu.resp.bits.lastStage.valid(3) 574 ftb_entry_mem.io.waddr(0) := io.fromBpu.resp.bits.lastStage.ftq_idx.value 575 ftb_entry_mem.io.wdata(0) := io.fromBpu.resp.bits.last_stage_ftb_entry 576 577 578 // multi-write 579 val update_target = Reg(Vec(FtqSize, UInt(VAddrBits.W))) // could be taken target or fallThrough //TODO: remove this 580 val newest_entry_target = Reg(UInt(VAddrBits.W)) 581 val newest_entry_ptr = Reg(new FtqPtr) 582 val cfiIndex_vec = Reg(Vec(FtqSize, ValidUndirectioned(UInt(log2Ceil(PredictWidth).W)))) 583 val mispredict_vec = Reg(Vec(FtqSize, Vec(PredictWidth, Bool()))) 584 val pred_stage = Reg(Vec(FtqSize, UInt(2.W))) 585 val pred_s1_cycle = if (!env.FPGAPlatform) Some(Reg(Vec(FtqSize, UInt(64.W)))) else None 586 587 val c_invalid :: c_valid :: c_commited :: Nil = Enum(3) 588 val commitStateQueue = RegInit(VecInit(Seq.fill(FtqSize) { 589 VecInit(Seq.fill(PredictWidth)(c_invalid)) 590 })) 591 592 val f_to_send :: f_sent :: Nil = Enum(2) 593 val entry_fetch_status = RegInit(VecInit(Seq.fill(FtqSize)(f_sent))) 594 595 val h_not_hit :: h_false_hit :: h_hit :: Nil = Enum(3) 596 val entry_hit_status = RegInit(VecInit(Seq.fill(FtqSize)(h_not_hit))) 597 598 // modify registers one cycle later to cut critical path 599 val last_cycle_bpu_in = RegNext(bpu_in_fire) 600 val last_cycle_bpu_in_ptr = RegNext(bpu_in_resp_ptr) 601 val last_cycle_bpu_in_idx = last_cycle_bpu_in_ptr.value 602 val last_cycle_bpu_target = RegNext(bpu_in_resp.getTarget(3)) 603 val last_cycle_cfiIndex = RegNext(bpu_in_resp.cfiIndex(3)) 604 val last_cycle_bpu_in_stage = RegNext(bpu_in_stage) 605 606 def extra_copyNum_for_commitStateQueue = 2 607 val copied_last_cycle_bpu_in = VecInit(Seq.fill(copyNum+extra_copyNum_for_commitStateQueue)(RegNext(bpu_in_fire))) 608 val copied_last_cycle_bpu_in_ptr_for_ftq = VecInit(Seq.fill(extra_copyNum_for_commitStateQueue)(RegNext(bpu_in_resp_ptr))) 609 610 when (last_cycle_bpu_in) { 611 entry_fetch_status(last_cycle_bpu_in_idx) := f_to_send 612 cfiIndex_vec(last_cycle_bpu_in_idx) := last_cycle_cfiIndex 613 pred_stage(last_cycle_bpu_in_idx) := last_cycle_bpu_in_stage 614 615 update_target(last_cycle_bpu_in_idx) := last_cycle_bpu_target // TODO: remove this 616 newest_entry_target := last_cycle_bpu_target 617 newest_entry_ptr := last_cycle_bpu_in_ptr 618 } 619 620 // reduce fanout by delay write for a cycle 621 when (RegNext(last_cycle_bpu_in)) { 622 mispredict_vec(RegNext(last_cycle_bpu_in_idx)) := WireInit(VecInit(Seq.fill(PredictWidth)(false.B))) 623 } 624 625 // record s1 pred cycles 626 pred_s1_cycle.map(vec => { 627 when (bpu_in_fire && (bpu_in_stage === BP_S1)) { 628 vec(bpu_in_resp_ptr.value) := bpu_in_resp.full_pred(0).predCycle.getOrElse(0.U) 629 } 630 }) 631 632 // reduce fanout using copied last_cycle_bpu_in and copied last_cycle_bpu_in_ptr 633 val copied_last_cycle_bpu_in_for_ftq = copied_last_cycle_bpu_in.takeRight(extra_copyNum_for_commitStateQueue) 634 copied_last_cycle_bpu_in_for_ftq.zip(copied_last_cycle_bpu_in_ptr_for_ftq).zipWithIndex.map { 635 case ((in, ptr), i) => 636 when (in) { 637 val perSetEntries = FtqSize / extra_copyNum_for_commitStateQueue // 32 638 require(FtqSize % extra_copyNum_for_commitStateQueue == 0) 639 for (j <- 0 until perSetEntries) { 640 when (ptr.value === (i*perSetEntries+j).U) { 641 commitStateQueue(i*perSetEntries+j) := VecInit(Seq.fill(PredictWidth)(c_invalid)) 642 } 643 } 644 } 645 } 646 647 // num cycle is fixed 648 io.toBackend.newest_entry_ptr := RegNext(newest_entry_ptr) 649 io.toBackend.newest_entry_target := RegNext(newest_entry_target) 650 651 652 bpuPtr := bpuPtr + enq_fire 653 copied_bpu_ptr.map(_ := bpuPtr + enq_fire) 654 when (io.toIfu.req.fire && allowToIfu) { 655 ifuPtr_write := ifuPtrPlus1 656 ifuPtrPlus1_write := ifuPtrPlus2 657 ifuPtrPlus2_write := ifuPtrPlus2 + 1.U 658 } 659 660 // only use ftb result to assign hit status 661 when (bpu_s2_resp.valid(3)) { 662 entry_hit_status(bpu_s2_resp.ftq_idx.value) := Mux(bpu_s2_resp.full_pred(3).hit, h_hit, h_not_hit) 663 } 664 665 666 io.toIfu.flushFromBpu.s2.valid := bpu_s2_redirect 667 io.toIfu.flushFromBpu.s2.bits := bpu_s2_resp.ftq_idx 668 when (bpu_s2_redirect) { 669 bpuPtr := bpu_s2_resp.ftq_idx + 1.U 670 copied_bpu_ptr.map(_ := bpu_s2_resp.ftq_idx + 1.U) 671 // only when ifuPtr runs ahead of bpu s2 resp should we recover it 672 when (!isBefore(ifuPtr, bpu_s2_resp.ftq_idx)) { 673 ifuPtr_write := bpu_s2_resp.ftq_idx 674 ifuPtrPlus1_write := bpu_s2_resp.ftq_idx + 1.U 675 ifuPtrPlus2_write := bpu_s2_resp.ftq_idx + 2.U 676 } 677 } 678 679 io.toIfu.flushFromBpu.s3.valid := bpu_s3_redirect 680 io.toIfu.flushFromBpu.s3.bits := bpu_s3_resp.ftq_idx 681 when (bpu_s3_redirect) { 682 bpuPtr := bpu_s3_resp.ftq_idx + 1.U 683 copied_bpu_ptr.map(_ := bpu_s3_resp.ftq_idx + 1.U) 684 // only when ifuPtr runs ahead of bpu s2 resp should we recover it 685 when (!isBefore(ifuPtr, bpu_s3_resp.ftq_idx)) { 686 ifuPtr_write := bpu_s3_resp.ftq_idx 687 ifuPtrPlus1_write := bpu_s3_resp.ftq_idx + 1.U 688 ifuPtrPlus2_write := bpu_s3_resp.ftq_idx + 2.U 689 } 690 } 691 692 XSError(isBefore(bpuPtr, ifuPtr) && !isFull(bpuPtr, ifuPtr), "\nifuPtr is before bpuPtr!\n") 693 XSError(isBefore(ifuWbPtr, commPtr) && !isFull(ifuWbPtr, commPtr), "\ncommPtr is before ifuWbPtr!\n") 694 695 (0 until copyNum).map{i => 696 XSError(copied_bpu_ptr(i) =/= bpuPtr, "\ncopiedBpuPtr is different from bpuPtr!\n") 697 } 698 699 // **************************************************************** 700 // **************************** to ifu **************************** 701 // **************************************************************** 702 // 0 for ifu, and 1-4 for ICache 703 val bpu_in_bypass_buf = RegEnable(ftq_pc_mem.io.wdata, bpu_in_fire) 704 val copied_bpu_in_bypass_buf = VecInit(Seq.fill(copyNum)(RegEnable(ftq_pc_mem.io.wdata, bpu_in_fire))) 705 val bpu_in_bypass_buf_for_ifu = bpu_in_bypass_buf 706 val bpu_in_bypass_ptr = RegNext(bpu_in_resp_ptr) 707 val last_cycle_to_ifu_fire = RegNext(io.toIfu.req.fire) 708 709 val copied_bpu_in_bypass_ptr = VecInit(Seq.fill(copyNum)(RegNext(bpu_in_resp_ptr))) 710 val copied_last_cycle_to_ifu_fire = VecInit(Seq.fill(copyNum)(RegNext(io.toIfu.req.fire))) 711 712 // read pc and target 713 ftq_pc_mem.io.ifuPtr_w := ifuPtr_write 714 ftq_pc_mem.io.ifuPtrPlus1_w := ifuPtrPlus1_write 715 ftq_pc_mem.io.ifuPtrPlus2_w := ifuPtrPlus2_write 716 ftq_pc_mem.io.commPtr_w := commPtr_write 717 ftq_pc_mem.io.commPtrPlus1_w := commPtrPlus1_write 718 719 720 io.toIfu.req.bits.ftqIdx := ifuPtr 721 722 val toICachePcBundle = Wire(Vec(copyNum,new Ftq_RF_Components)) 723 val toICacheEntryToSend = Wire(Vec(copyNum,Bool())) 724 val toIfuPcBundle = Wire(new Ftq_RF_Components) 725 val entry_is_to_send = WireInit(entry_fetch_status(ifuPtr.value) === f_to_send) 726 val entry_ftq_offset = WireInit(cfiIndex_vec(ifuPtr.value)) 727 val entry_next_addr = Wire(UInt(VAddrBits.W)) 728 729 val pc_mem_ifu_ptr_rdata = VecInit(Seq.fill(copyNum)(RegNext(ftq_pc_mem.io.ifuPtr_rdata))) 730 val pc_mem_ifu_plus1_rdata = VecInit(Seq.fill(copyNum)(RegNext(ftq_pc_mem.io.ifuPtrPlus1_rdata))) 731 val diff_entry_next_addr = WireInit(update_target(ifuPtr.value)) //TODO: remove this 732 733 val copied_ifu_plus1_to_send = VecInit(Seq.fill(copyNum)(RegNext(entry_fetch_status(ifuPtrPlus1.value) === f_to_send) || RegNext(last_cycle_bpu_in && bpu_in_bypass_ptr === (ifuPtrPlus1)))) 734 val copied_ifu_ptr_to_send = VecInit(Seq.fill(copyNum)(RegNext(entry_fetch_status(ifuPtr.value) === f_to_send) || RegNext(last_cycle_bpu_in && bpu_in_bypass_ptr === ifuPtr))) 735 736 for(i <- 0 until copyNum){ 737 when(copied_last_cycle_bpu_in(i) && copied_bpu_in_bypass_ptr(i) === copied_ifu_ptr(i)){ 738 toICachePcBundle(i) := copied_bpu_in_bypass_buf(i) 739 toICacheEntryToSend(i) := true.B 740 }.elsewhen(copied_last_cycle_to_ifu_fire(i)){ 741 toICachePcBundle(i) := pc_mem_ifu_plus1_rdata(i) 742 toICacheEntryToSend(i) := copied_ifu_plus1_to_send(i) 743 }.otherwise{ 744 toICachePcBundle(i) := pc_mem_ifu_ptr_rdata(i) 745 toICacheEntryToSend(i) := copied_ifu_ptr_to_send(i) 746 } 747 } 748 749 // TODO: reconsider target address bypass logic 750 when (last_cycle_bpu_in && bpu_in_bypass_ptr === ifuPtr) { 751 toIfuPcBundle := bpu_in_bypass_buf_for_ifu 752 entry_is_to_send := true.B 753 entry_next_addr := last_cycle_bpu_target 754 entry_ftq_offset := last_cycle_cfiIndex 755 diff_entry_next_addr := last_cycle_bpu_target // TODO: remove this 756 }.elsewhen (last_cycle_to_ifu_fire) { 757 toIfuPcBundle := RegNext(ftq_pc_mem.io.ifuPtrPlus1_rdata) 758 entry_is_to_send := RegNext(entry_fetch_status(ifuPtrPlus1.value) === f_to_send) || 759 RegNext(last_cycle_bpu_in && bpu_in_bypass_ptr === (ifuPtrPlus1)) // reduce potential bubbles 760 entry_next_addr := Mux(last_cycle_bpu_in && bpu_in_bypass_ptr === (ifuPtrPlus1), 761 bpu_in_bypass_buf_for_ifu.startAddr, 762 Mux(ifuPtr === newest_entry_ptr, 763 newest_entry_target, 764 RegNext(ftq_pc_mem.io.ifuPtrPlus2_rdata.startAddr))) // ifuPtr+2 765 }.otherwise { 766 toIfuPcBundle := RegNext(ftq_pc_mem.io.ifuPtr_rdata) 767 entry_is_to_send := RegNext(entry_fetch_status(ifuPtr.value) === f_to_send) || 768 RegNext(last_cycle_bpu_in && bpu_in_bypass_ptr === ifuPtr) // reduce potential bubbles 769 entry_next_addr := Mux(last_cycle_bpu_in && bpu_in_bypass_ptr === (ifuPtrPlus1), 770 bpu_in_bypass_buf_for_ifu.startAddr, 771 Mux(ifuPtr === newest_entry_ptr, 772 newest_entry_target, 773 RegNext(ftq_pc_mem.io.ifuPtrPlus1_rdata.startAddr))) // ifuPtr+1 774 } 775 776 io.toIfu.req.valid := entry_is_to_send && ifuPtr =/= bpuPtr 777 io.toIfu.req.bits.nextStartAddr := entry_next_addr 778 io.toIfu.req.bits.ftqOffset := entry_ftq_offset 779 io.toIfu.req.bits.fromFtqPcBundle(toIfuPcBundle) 780 781 io.toICache.req.valid := entry_is_to_send && ifuPtr =/= bpuPtr 782 io.toICache.req.bits.readValid.zipWithIndex.map{case(copy, i) => copy := toICacheEntryToSend(i) && copied_ifu_ptr(i) =/= copied_bpu_ptr(i)} 783 io.toICache.req.bits.pcMemRead.zipWithIndex.map{case(copy,i) => copy.fromFtqPcBundle(toICachePcBundle(i))} 784 // io.toICache.req.bits.bypassSelect := last_cycle_bpu_in && bpu_in_bypass_ptr === ifuPtr 785 // io.toICache.req.bits.bpuBypassWrite.zipWithIndex.map{case(bypassWrtie, i) => 786 // bypassWrtie.startAddr := bpu_in_bypass_buf.tail(i).startAddr 787 // bypassWrtie.nextlineStart := bpu_in_bypass_buf.tail(i).nextLineAddr 788 // } 789 790 // TODO: remove this 791 XSError(io.toIfu.req.valid && diff_entry_next_addr =/= entry_next_addr, 792 p"\nifu_req_target wrong! ifuPtr: ${ifuPtr}, entry_next_addr: ${Hexadecimal(entry_next_addr)} diff_entry_next_addr: ${Hexadecimal(diff_entry_next_addr)}\n") 793 794 // when fall through is smaller in value than start address, there must be a false hit 795 when (toIfuPcBundle.fallThruError && entry_hit_status(ifuPtr.value) === h_hit) { 796 when (io.toIfu.req.fire && 797 !(bpu_s2_redirect && bpu_s2_resp.ftq_idx === ifuPtr) && 798 !(bpu_s3_redirect && bpu_s3_resp.ftq_idx === ifuPtr) 799 ) { 800 entry_hit_status(ifuPtr.value) := h_false_hit 801 // XSError(true.B, "FTB false hit by fallThroughError, startAddr: %x, fallTHru: %x\n", io.toIfu.req.bits.startAddr, io.toIfu.req.bits.nextStartAddr) 802 } 803 XSDebug(true.B, "fallThruError! start:%x, fallThru:%x\n", io.toIfu.req.bits.startAddr, io.toIfu.req.bits.nextStartAddr) 804 } 805 806 XSPerfAccumulate(f"fall_through_error_to_ifu", toIfuPcBundle.fallThruError && entry_hit_status(ifuPtr.value) === h_hit && 807 io.toIfu.req.fire && !(bpu_s2_redirect && bpu_s2_resp.ftq_idx === ifuPtr) && !(bpu_s3_redirect && bpu_s3_resp.ftq_idx === ifuPtr)) 808 809 val ifu_req_should_be_flushed = 810 io.toIfu.flushFromBpu.shouldFlushByStage2(io.toIfu.req.bits.ftqIdx) || 811 io.toIfu.flushFromBpu.shouldFlushByStage3(io.toIfu.req.bits.ftqIdx) 812 813 when (io.toIfu.req.fire && !ifu_req_should_be_flushed) { 814 entry_fetch_status(ifuPtr.value) := f_sent 815 } 816 817 // ********************************************************************* 818 // **************************** wb from ifu **************************** 819 // ********************************************************************* 820 val pdWb = io.fromIfu.pdWb 821 val pds = pdWb.bits.pd 822 val ifu_wb_valid = pdWb.valid 823 val ifu_wb_idx = pdWb.bits.ftqIdx.value 824 // read ports: commit update 825 val ftq_pd_mem = Module(new SyncDataModuleTemplate(new Ftq_pd_Entry, FtqSize, 1, 1)) 826 ftq_pd_mem.io.wen(0) := ifu_wb_valid 827 ftq_pd_mem.io.waddr(0) := pdWb.bits.ftqIdx.value 828 ftq_pd_mem.io.wdata(0).fromPdWb(pdWb.bits) 829 830 val hit_pd_valid = entry_hit_status(ifu_wb_idx) === h_hit && ifu_wb_valid 831 val hit_pd_mispred = hit_pd_valid && pdWb.bits.misOffset.valid 832 val hit_pd_mispred_reg = RegNext(hit_pd_mispred, init=false.B) 833 val pd_reg = RegEnable(pds, pdWb.valid) 834 val start_pc_reg = RegEnable(pdWb.bits.pc(0), pdWb.valid) 835 val wb_idx_reg = RegEnable(ifu_wb_idx, pdWb.valid) 836 837 when (ifu_wb_valid) { 838 val comm_stq_wen = VecInit(pds.map(_.valid).zip(pdWb.bits.instrRange).map{ 839 case (v, inRange) => v && inRange 840 }) 841 (commitStateQueue(ifu_wb_idx) zip comm_stq_wen).map{ 842 case (qe, v) => when (v) { qe := c_valid } 843 } 844 } 845 846 when (ifu_wb_valid) { 847 ifuWbPtr_write := ifuWbPtr + 1.U 848 } 849 850 XSError(ifu_wb_valid && isAfter(pdWb.bits.ftqIdx, ifuPtr), "IFU returned a predecode before its req, check IFU") 851 852 ftb_entry_mem.io.raddr.head := ifu_wb_idx 853 val has_false_hit = WireInit(false.B) 854 when (RegNext(hit_pd_valid)) { 855 // check for false hit 856 val pred_ftb_entry = ftb_entry_mem.io.rdata.head 857 val brSlots = pred_ftb_entry.brSlots 858 val tailSlot = pred_ftb_entry.tailSlot 859 // we check cfis that bpu predicted 860 861 // bpu predicted branches but denied by predecode 862 val br_false_hit = 863 brSlots.map{ 864 s => s.valid && !(pd_reg(s.offset).valid && pd_reg(s.offset).isBr) 865 }.reduce(_||_) || 866 (tailSlot.valid && pred_ftb_entry.tailSlot.sharing && 867 !(pd_reg(tailSlot.offset).valid && pd_reg(tailSlot.offset).isBr)) 868 869 val jmpOffset = tailSlot.offset 870 val jmp_pd = pd_reg(jmpOffset) 871 val jal_false_hit = pred_ftb_entry.jmpValid && 872 ((pred_ftb_entry.isJal && !(jmp_pd.valid && jmp_pd.isJal)) || 873 (pred_ftb_entry.isJalr && !(jmp_pd.valid && jmp_pd.isJalr)) || 874 (pred_ftb_entry.isCall && !(jmp_pd.valid && jmp_pd.isCall)) || 875 (pred_ftb_entry.isRet && !(jmp_pd.valid && jmp_pd.isRet)) 876 ) 877 878 has_false_hit := br_false_hit || jal_false_hit || hit_pd_mispred_reg 879 XSDebug(has_false_hit, "FTB false hit by br or jal or hit_pd, startAddr: %x\n", pdWb.bits.pc(0)) 880 881 // assert(!has_false_hit) 882 } 883 884 when (has_false_hit) { 885 entry_hit_status(wb_idx_reg) := h_false_hit 886 } 887 888 889 // ********************************************************************** 890 // ***************************** to backend ***************************** 891 // ********************************************************************** 892 // to backend pc mem / target 893 io.toBackend.pc_mem_wen := RegNext(last_cycle_bpu_in) 894 io.toBackend.pc_mem_waddr := RegNext(last_cycle_bpu_in_idx) 895 io.toBackend.pc_mem_wdata := RegNext(bpu_in_bypass_buf_for_ifu) 896 897 // ******************************************************************************* 898 // **************************** redirect from backend **************************** 899 // ******************************************************************************* 900 901 // redirect read cfiInfo, couples to redirectGen s2 902 val ftq_redirect_rdata = Wire(Vec(BackendRedirectNum, new Ftq_Redirect_SRAMEntry)) 903 val ftb_redirect_rdata = Wire(Vec(BackendRedirectNum, new FTBEntry)) 904 for (i <- 0 until BackendRedirectNum) { 905 ftq_redirect_sram.io.ren(i + 1) := io.fromBackend.ftqIdxAhead(i).valid 906 ftq_redirect_sram.io.raddr(i + 1) := io.fromBackend.ftqIdxAhead(i).bits.value 907 ftb_entry_mem.io.raddr(i + 1) := io.fromBackend.ftqIdxAhead(i).bits.value 908 909 ftq_redirect_rdata(i) := ftq_redirect_sram.io.rdata(i + 1) 910 ftb_redirect_rdata(i) := ftb_entry_mem.io.rdata(i + 1) 911 } 912 val stage3CfiInfo = Mux1H(io.fromBackend.ftqIdxSelOH.bits, ftq_redirect_rdata) 913 val fromBackendRedirect = WireInit(backendRedirect) 914 val backendRedirectCfi = fromBackendRedirect.bits.cfiUpdate 915 backendRedirectCfi.fromFtqRedirectSram(stage3CfiInfo) 916 917 918 val r_ftb_entry = Mux1H(io.fromBackend.ftqIdxSelOH.bits, ftb_redirect_rdata) 919 val r_ftqOffset = fromBackendRedirect.bits.ftqOffset 920 921 backendRedirectCfi.br_hit := r_ftb_entry.brIsSaved(r_ftqOffset) 922 backendRedirectCfi.jr_hit := r_ftb_entry.isJalr && r_ftb_entry.tailSlot.offset === r_ftqOffset 923 // FIXME: not portable 924 backendRedirectCfi.sc_hit := backendRedirectCfi.br_hit && Mux(r_ftb_entry.brSlots(0).offset === r_ftqOffset, 925 stage3CfiInfo.sc_disagree(0), stage3CfiInfo.sc_disagree(1)) 926 927 when (entry_hit_status(fromBackendRedirect.bits.ftqIdx.value) === h_hit) { 928 backendRedirectCfi.shift := PopCount(r_ftb_entry.getBrMaskByOffset(r_ftqOffset)) +& 929 (backendRedirectCfi.pd.isBr && !r_ftb_entry.brIsSaved(r_ftqOffset) && 930 !r_ftb_entry.newBrCanNotInsert(r_ftqOffset)) 931 932 backendRedirectCfi.addIntoHist := backendRedirectCfi.pd.isBr && (r_ftb_entry.brIsSaved(r_ftqOffset) || 933 !r_ftb_entry.newBrCanNotInsert(r_ftqOffset)) 934 }.otherwise { 935 backendRedirectCfi.shift := (backendRedirectCfi.pd.isBr && backendRedirectCfi.taken).asUInt 936 backendRedirectCfi.addIntoHist := backendRedirectCfi.pd.isBr.asUInt 937 } 938 939 940 // *************************************************************************** 941 // **************************** redirect from ifu **************************** 942 // *************************************************************************** 943 val fromIfuRedirect = WireInit(0.U.asTypeOf(Valid(new BranchPredictionRedirect))) 944 fromIfuRedirect.valid := pdWb.valid && pdWb.bits.misOffset.valid && !backendFlush 945 fromIfuRedirect.bits.ftqIdx := pdWb.bits.ftqIdx 946 fromIfuRedirect.bits.ftqOffset := pdWb.bits.misOffset.bits 947 fromIfuRedirect.bits.level := RedirectLevel.flushAfter 948 fromIfuRedirect.bits.BTBMissBubble := true.B 949 fromIfuRedirect.bits.debugIsMemVio := false.B 950 fromIfuRedirect.bits.debugIsCtrl := false.B 951 952 val ifuRedirectCfiUpdate = fromIfuRedirect.bits.cfiUpdate 953 ifuRedirectCfiUpdate.pc := pdWb.bits.pc(pdWb.bits.misOffset.bits) 954 ifuRedirectCfiUpdate.pd := pdWb.bits.pd(pdWb.bits.misOffset.bits) 955 ifuRedirectCfiUpdate.predTaken := cfiIndex_vec(pdWb.bits.ftqIdx.value).valid 956 ifuRedirectCfiUpdate.target := pdWb.bits.target 957 ifuRedirectCfiUpdate.taken := pdWb.bits.cfiOffset.valid 958 ifuRedirectCfiUpdate.isMisPred := pdWb.bits.misOffset.valid 959 960 val ifuRedirectReg = RegNext(fromIfuRedirect, init=0.U.asTypeOf(Valid(new BranchPredictionRedirect))) 961 val ifuRedirectToBpu = WireInit(ifuRedirectReg) 962 ifuFlush := fromIfuRedirect.valid || ifuRedirectToBpu.valid 963 964 ftq_redirect_sram.io.ren.head := fromIfuRedirect.valid 965 ftq_redirect_sram.io.raddr.head := fromIfuRedirect.bits.ftqIdx.value 966 967 ftb_entry_mem.io.raddr.head := fromIfuRedirect.bits.ftqIdx.value 968 969 val toBpuCfi = ifuRedirectToBpu.bits.cfiUpdate 970 toBpuCfi.fromFtqRedirectSram(ftq_redirect_sram.io.rdata.head) 971 when (ifuRedirectReg.bits.cfiUpdate.pd.isRet && ifuRedirectReg.bits.cfiUpdate.pd.valid) { 972 toBpuCfi.target := toBpuCfi.topAddr 973 } 974 975 when (ifuRedirectReg.valid) { 976 ifuRedirected(ifuRedirectReg.bits.ftqIdx.value) := true.B 977 } .elsewhen(RegNext(pdWb.valid)) { 978 // if pdWb and no redirect, set to false 979 ifuRedirected(last_cycle_bpu_in_ptr.value) := false.B 980 } 981 982 // ********************************************************************* 983 // **************************** wb from exu **************************** 984 // ********************************************************************* 985 986 backendRedirect.valid := io.fromBackend.redirect.valid 987 backendRedirect.bits.connectRedirect(io.fromBackend.redirect.bits) 988 backendRedirect.bits.BTBMissBubble := false.B 989 990 991 def extractRedirectInfo(wb: Valid[Redirect]) = { 992 val ftqPtr = wb.bits.ftqIdx 993 val ftqOffset = wb.bits.ftqOffset 994 val taken = wb.bits.cfiUpdate.taken 995 val mispred = wb.bits.cfiUpdate.isMisPred 996 (wb.valid, ftqPtr, ftqOffset, taken, mispred) 997 } 998 999 // fix mispredict entry 1000 val lastIsMispredict = RegNext( 1001 backendRedirect.valid && backendRedirect.bits.level === RedirectLevel.flushAfter, init = false.B 1002 ) 1003 1004 def updateCfiInfo(redirect: Valid[Redirect], isBackend: Boolean = true) = { 1005 val (r_valid, r_ptr, r_offset, r_taken, r_mispred) = extractRedirectInfo(redirect) 1006 val r_idx = r_ptr.value 1007 val cfiIndex_bits_wen = r_valid && r_taken && r_offset < cfiIndex_vec(r_idx).bits 1008 val cfiIndex_valid_wen = r_valid && r_offset === cfiIndex_vec(r_idx).bits 1009 when (cfiIndex_bits_wen || cfiIndex_valid_wen) { 1010 cfiIndex_vec(r_idx).valid := cfiIndex_bits_wen || cfiIndex_valid_wen && r_taken 1011 } .elsewhen (r_valid && !r_taken && r_offset =/= cfiIndex_vec(r_idx).bits) { 1012 cfiIndex_vec(r_idx).valid :=false.B 1013 } 1014 when (cfiIndex_bits_wen) { 1015 cfiIndex_vec(r_idx).bits := r_offset 1016 } 1017 newest_entry_target := redirect.bits.cfiUpdate.target 1018 newest_entry_ptr := r_ptr 1019 update_target(r_idx) := redirect.bits.cfiUpdate.target // TODO: remove this 1020 if (isBackend) { 1021 mispredict_vec(r_idx)(r_offset) := r_mispred 1022 } 1023 } 1024 1025 when(backendRedirect.valid) { 1026 updateCfiInfo(backendRedirect) 1027 }.elsewhen (ifuRedirectToBpu.valid) { 1028 updateCfiInfo(ifuRedirectToBpu, isBackend=false) 1029 } 1030 1031 when (backendRedirect.valid) { 1032 when (backendRedirect.bits.ControlRedirectBubble) { 1033 when (fromBackendRedirect.bits.ControlBTBMissBubble) { 1034 topdown_stage.reasons(TopDownCounters.BTBMissBubble.id) := true.B 1035 io.toIfu.req.bits.topdown_info.reasons(TopDownCounters.BTBMissBubble.id) := true.B 1036 } .elsewhen (fromBackendRedirect.bits.TAGEMissBubble) { 1037 topdown_stage.reasons(TopDownCounters.TAGEMissBubble.id) := true.B 1038 io.toIfu.req.bits.topdown_info.reasons(TopDownCounters.TAGEMissBubble.id) := true.B 1039 } .elsewhen (fromBackendRedirect.bits.SCMissBubble) { 1040 topdown_stage.reasons(TopDownCounters.SCMissBubble.id) := true.B 1041 io.toIfu.req.bits.topdown_info.reasons(TopDownCounters.SCMissBubble.id) := true.B 1042 } .elsewhen (fromBackendRedirect.bits.ITTAGEMissBubble) { 1043 topdown_stage.reasons(TopDownCounters.ITTAGEMissBubble.id) := true.B 1044 io.toIfu.req.bits.topdown_info.reasons(TopDownCounters.ITTAGEMissBubble.id) := true.B 1045 } .elsewhen (fromBackendRedirect.bits.RASMissBubble) { 1046 topdown_stage.reasons(TopDownCounters.RASMissBubble.id) := true.B 1047 io.toIfu.req.bits.topdown_info.reasons(TopDownCounters.RASMissBubble.id) := true.B 1048 } 1049 1050 1051 } .elsewhen (backendRedirect.bits.MemVioRedirectBubble) { 1052 topdown_stage.reasons(TopDownCounters.MemVioRedirectBubble.id) := true.B 1053 io.toIfu.req.bits.topdown_info.reasons(TopDownCounters.MemVioRedirectBubble.id) := true.B 1054 } .otherwise { 1055 topdown_stage.reasons(TopDownCounters.OtherRedirectBubble.id) := true.B 1056 io.toIfu.req.bits.topdown_info.reasons(TopDownCounters.OtherRedirectBubble.id) := true.B 1057 } 1058 } .elsewhen (ifuRedirectReg.valid) { 1059 topdown_stage.reasons(TopDownCounters.BTBMissBubble.id) := true.B 1060 io.toIfu.req.bits.topdown_info.reasons(TopDownCounters.BTBMissBubble.id) := true.B 1061 } 1062 1063 io.ControlBTBMissBubble := fromBackendRedirect.bits.ControlBTBMissBubble 1064 io.TAGEMissBubble := fromBackendRedirect.bits.TAGEMissBubble 1065 io.SCMissBubble := fromBackendRedirect.bits.SCMissBubble 1066 io.ITTAGEMissBubble := fromBackendRedirect.bits.ITTAGEMissBubble 1067 io.RASMissBubble := fromBackendRedirect.bits.RASMissBubble 1068 1069 // *********************************************************************************** 1070 // **************************** flush ptr and state queue **************************** 1071 // *********************************************************************************** 1072 1073 val redirectVec = VecInit(backendRedirect, fromIfuRedirect) 1074 1075 // when redirect, we should reset ptrs and status queues 1076 when(redirectVec.map(r => r.valid).reduce(_||_)){ 1077 val r = PriorityMux(redirectVec.map(r => (r.valid -> r.bits))) 1078 val notIfu = redirectVec.dropRight(1).map(r => r.valid).reduce(_||_) 1079 val (idx, offset, flushItSelf) = (r.ftqIdx, r.ftqOffset, RedirectLevel.flushItself(r.level)) 1080 val next = idx + 1.U 1081 bpuPtr := next 1082 copied_bpu_ptr.map(_ := next) 1083 ifuPtr_write := next 1084 ifuWbPtr_write := next 1085 ifuPtrPlus1_write := idx + 2.U 1086 ifuPtrPlus2_write := idx + 3.U 1087 1088 } 1089 when(RegNext(redirectVec.map(r => r.valid).reduce(_||_))){ 1090 val r = PriorityMux(redirectVec.map(r => (r.valid -> r.bits))) 1091 val notIfu = redirectVec.dropRight(1).map(r => r.valid).reduce(_||_) 1092 val (idx, offset, flushItSelf) = (r.ftqIdx, r.ftqOffset, RedirectLevel.flushItself(r.level)) 1093 when (RegNext(notIfu)) { 1094 commitStateQueue(RegNext(idx.value)).zipWithIndex.foreach({ case (s, i) => 1095 when(i.U > RegNext(offset) || i.U === RegNext(offset) && RegNext(flushItSelf)){ 1096 s := c_invalid 1097 } 1098 }) 1099 } 1100 } 1101 1102 1103 // only the valid bit is actually needed 1104 io.toIfu.redirect.bits := backendRedirect.bits 1105 io.toIfu.redirect.valid := stage2Flush 1106 io.toIfu.topdown_redirect := fromBackendRedirect 1107 1108 // commit 1109 for (c <- io.fromBackend.rob_commits) { 1110 when(c.valid) { 1111 commitStateQueue(c.bits.ftqIdx.value)(c.bits.ftqOffset) := c_commited 1112 // TODO: remove this 1113 // For instruction fusions, we also update the next instruction 1114 when (c.bits.commitType === 4.U) { 1115 commitStateQueue(c.bits.ftqIdx.value)(c.bits.ftqOffset + 1.U) := c_commited 1116 }.elsewhen(c.bits.commitType === 5.U) { 1117 commitStateQueue(c.bits.ftqIdx.value)(c.bits.ftqOffset + 2.U) := c_commited 1118 }.elsewhen(c.bits.commitType === 6.U) { 1119 val index = (c.bits.ftqIdx + 1.U).value 1120 commitStateQueue(index)(0) := c_commited 1121 }.elsewhen(c.bits.commitType === 7.U) { 1122 val index = (c.bits.ftqIdx + 1.U).value 1123 commitStateQueue(index)(1) := c_commited 1124 } 1125 } 1126 } 1127 1128 // **************************************************************** 1129 // **************************** to bpu **************************** 1130 // **************************************************************** 1131 1132 io.toBpu.redirect := Mux(fromBackendRedirect.valid, fromBackendRedirect, ifuRedirectToBpu) 1133 val dummy_s1_pred_cycle_vec = VecInit(List.tabulate(FtqSize)(_=>0.U(64.W))) 1134 val redirect_latency = GTimer() - pred_s1_cycle.getOrElse(dummy_s1_pred_cycle_vec)(io.toBpu.redirect.bits.ftqIdx.value) + 1.U 1135 XSPerfHistogram("backend_redirect_latency", redirect_latency, fromBackendRedirect.valid, 0, 60, 1) 1136 XSPerfHistogram("ifu_redirect_latency", redirect_latency, !fromBackendRedirect.valid && ifuRedirectToBpu.valid, 0, 60, 1) 1137 1138 XSError(io.toBpu.redirect.valid && isBefore(io.toBpu.redirect.bits.ftqIdx, commPtr), "Ftq received a redirect after its commit, check backend or replay") 1139 1140 val may_have_stall_from_bpu = Wire(Bool()) 1141 val bpu_ftb_update_stall = RegInit(0.U(2.W)) // 2-cycle stall, so we need 3 states 1142 may_have_stall_from_bpu := bpu_ftb_update_stall =/= 0.U 1143 canCommit := commPtr =/= ifuWbPtr && !may_have_stall_from_bpu && 1144 Cat(commitStateQueue(commPtr.value).map(s => { 1145 s === c_invalid || s === c_commited 1146 })).andR 1147 1148 val mmioReadPtr = io.mmioCommitRead.mmioFtqPtr 1149 val mmioLastCommit = isBefore(commPtr, mmioReadPtr) && (isAfter(ifuPtr,mmioReadPtr) || mmioReadPtr === ifuPtr) && 1150 Cat(commitStateQueue(mmioReadPtr.value).map(s => { s === c_invalid || s === c_commited})).andR 1151 io.mmioCommitRead.mmioLastCommit := RegNext(mmioLastCommit) 1152 1153 // commit reads 1154 val commit_pc_bundle = RegNext(ftq_pc_mem.io.commPtr_rdata) 1155 val commit_target = 1156 Mux(RegNext(commPtr === newest_entry_ptr), 1157 RegNext(newest_entry_target), 1158 RegNext(ftq_pc_mem.io.commPtrPlus1_rdata.startAddr)) 1159 ftq_pd_mem.io.raddr.last := commPtr.value 1160 val commit_pd = ftq_pd_mem.io.rdata.last 1161 ftq_redirect_sram.io.ren.last := canCommit 1162 ftq_redirect_sram.io.raddr.last := commPtr.value 1163 val commit_spec_meta = ftq_redirect_sram.io.rdata.last 1164 ftq_meta_1r_sram.io.ren(0) := canCommit 1165 ftq_meta_1r_sram.io.raddr(0) := commPtr.value 1166 val commit_meta = ftq_meta_1r_sram.io.rdata(0) 1167 ftb_entry_mem.io.raddr.last := commPtr.value 1168 val commit_ftb_entry = ftb_entry_mem.io.rdata.last 1169 1170 // need one cycle to read mem and srams 1171 val do_commit_ptr = RegNext(commPtr) 1172 val do_commit = RegNext(canCommit, init=false.B) 1173 when (canCommit) { 1174 commPtr_write := commPtrPlus1 1175 commPtrPlus1_write := commPtrPlus1 + 1.U 1176 } 1177 val commit_state = RegNext(commitStateQueue(commPtr.value)) 1178 val can_commit_cfi = WireInit(cfiIndex_vec(commPtr.value)) 1179 val do_commit_cfi = WireInit(cfiIndex_vec(do_commit_ptr.value)) 1180 // 1181 //when (commitStateQueue(commPtr.value)(can_commit_cfi.bits) =/= c_commited) { 1182 // can_commit_cfi.valid := false.B 1183 //} 1184 val commit_cfi = RegNext(can_commit_cfi) 1185 val debug_cfi = commitStateQueue(do_commit_ptr.value)(do_commit_cfi.bits) =/= c_commited && do_commit_cfi.valid 1186 1187 val commit_mispredict : Vec[Bool] = VecInit((RegNext(mispredict_vec(commPtr.value)) zip commit_state).map { 1188 case (mis, state) => mis && state === c_commited 1189 }) 1190 val commit_instCommited: Vec[Bool] = VecInit(commit_state.map(_ === c_commited)) // [PredictWidth] 1191 val can_commit_hit = entry_hit_status(commPtr.value) 1192 val commit_hit = RegNext(can_commit_hit) 1193 val diff_commit_target = RegNext(update_target(commPtr.value)) // TODO: remove this 1194 val commit_stage = RegNext(pred_stage(commPtr.value)) 1195 val commit_valid = commit_hit === h_hit || commit_cfi.valid // hit or taken 1196 1197 val to_bpu_hit = can_commit_hit === h_hit || can_commit_hit === h_false_hit 1198 switch (bpu_ftb_update_stall) { 1199 is (0.U) { 1200 when (can_commit_cfi.valid && !to_bpu_hit && canCommit) { 1201 bpu_ftb_update_stall := 2.U // 2-cycle stall 1202 } 1203 } 1204 is (2.U) { 1205 bpu_ftb_update_stall := 1.U 1206 } 1207 is (1.U) { 1208 bpu_ftb_update_stall := 0.U 1209 } 1210 is (3.U) { 1211 XSError(true.B, "bpu_ftb_update_stall should be 0, 1 or 2") 1212 } 1213 } 1214 1215 // TODO: remove this 1216 XSError(do_commit && diff_commit_target =/= commit_target, "\ncommit target should be the same as update target\n") 1217 1218 // update latency stats 1219 val update_latency = GTimer() - pred_s1_cycle.getOrElse(dummy_s1_pred_cycle_vec)(do_commit_ptr.value) + 1.U 1220 XSPerfHistogram("bpu_update_latency", update_latency, io.toBpu.update.valid, 0, 64, 2) 1221 1222 io.toBpu.update := DontCare 1223 io.toBpu.update.valid := commit_valid && do_commit 1224 val update = io.toBpu.update.bits 1225 update.false_hit := commit_hit === h_false_hit 1226 update.pc := commit_pc_bundle.startAddr 1227 update.meta := commit_meta.meta 1228 update.cfi_idx := commit_cfi 1229 update.full_target := commit_target 1230 update.from_stage := commit_stage 1231 update.spec_info := commit_spec_meta 1232 XSError(commit_valid && do_commit && debug_cfi, "\ncommit cfi can be non c_commited\n") 1233 1234 val commit_real_hit = commit_hit === h_hit 1235 val update_ftb_entry = update.ftb_entry 1236 1237 val ftbEntryGen = Module(new FTBEntryGen).io 1238 ftbEntryGen.start_addr := commit_pc_bundle.startAddr 1239 ftbEntryGen.old_entry := commit_ftb_entry 1240 ftbEntryGen.pd := commit_pd 1241 ftbEntryGen.cfiIndex := commit_cfi 1242 ftbEntryGen.target := commit_target 1243 ftbEntryGen.hit := commit_real_hit 1244 ftbEntryGen.mispredict_vec := commit_mispredict 1245 1246 update_ftb_entry := ftbEntryGen.new_entry 1247 update.new_br_insert_pos := ftbEntryGen.new_br_insert_pos 1248 update.mispred_mask := ftbEntryGen.mispred_mask 1249 update.old_entry := ftbEntryGen.is_old_entry 1250 update.pred_hit := commit_hit === h_hit || commit_hit === h_false_hit 1251 update.br_taken_mask := ftbEntryGen.taken_mask 1252 update.br_committed := (ftbEntryGen.new_entry.brValids zip ftbEntryGen.new_entry.brOffset) map { 1253 case (valid, offset) => valid && commit_instCommited(offset) 1254 } 1255 update.jmp_taken := ftbEntryGen.jmp_taken 1256 1257 // update.full_pred.fromFtbEntry(ftbEntryGen.new_entry, update.pc) 1258 // update.full_pred.jalr_target := commit_target 1259 // update.full_pred.hit := true.B 1260 // when (update.full_pred.is_jalr) { 1261 // update.full_pred.targets.last := commit_target 1262 // } 1263 1264 // **************************************************************** 1265 // *********************** to prefetch **************************** 1266 // **************************************************************** 1267 /** 1268 ****************************************************************************** 1269 * prefetchPtr control 1270 * - 1. prefetchPtr plus 1 when toPrefetch fire and keep distance from bpuPtr more than 2 1271 * - 2. limit range of prefetchPtr is in [ifuPtr + minRange, ifuPtr + maxRange] 1272 * - 3. flush prefetchPtr when receive redirect from ifu or backend 1273 ****************************************************************************** 1274 */ 1275 val prefetchPtr = RegInit(FtqPtr(false.B, 0.U)) 1276 val nextPrefetchPtr = WireInit(prefetchPtr) 1277 1278 prefetchPtr := nextPrefetchPtr 1279 1280 // TODO: consider req which cross cacheline 1281 when(io.toPrefetch.req.fire) { 1282 when(prefetchPtr < bpuPtr - 2.U) { 1283 nextPrefetchPtr := prefetchPtr + 1.U 1284 } 1285 } 1286 1287 when(prefetchPtr < ifuPtr + minRangeFromIFUptr.U) { 1288 nextPrefetchPtr := ifuPtr + minRangeFromIFUptr.U 1289 }.elsewhen(prefetchPtr > ifuPtr + maxRangeFromIFUptr.U) { 1290 nextPrefetchPtr := ifuPtr + maxRangeFromIFUptr.U 1291 } 1292 1293 when(redirectVec.map(r => r.valid).reduce(_||_)){ 1294 val r = PriorityMux(redirectVec.map(r => (r.valid -> r.bits))) 1295 val next = r.ftqIdx + minRangeFromIFUptr.U 1296 nextPrefetchPtr := next 1297 } 1298 1299 // data from ftq_pc_mem has 1 cycle delay 1300 io.toPrefetch.req.valid := RegNext(entry_fetch_status(nextPrefetchPtr.value) === f_to_send) 1301 ftq_pc_mem.io.other_raddrs(0) := nextPrefetchPtr.value 1302 io.toPrefetch.req.bits.target := RegNext(ftq_pc_mem.io.other_rdatas(0).startAddr) 1303 1304 // record position relationship between ifuPtr, pfPtr and bpuPtr 1305 val isWritePrefetchPtrTable = WireInit(Constantin.createRecord("isWritePrefetchPtrTable" + p(XSCoreParamsKey).HartId.toString)) 1306 val prefetchPtrTable = ChiselDB.createTable("PrefetchPtrTable" + p(XSCoreParamsKey).HartId.toString, new PrefetchPtrDB) 1307 val prefetchPtrDumpData = Wire(new PrefetchPtrDB) 1308 prefetchPtrDumpData.fromFtqPtr := distanceBetween(bpuPtr, prefetchPtr) 1309 prefetchPtrDumpData.fromIfuPtr := distanceBetween(prefetchPtr, ifuPtr) 1310 1311 prefetchPtrTable.log( 1312 data = prefetchPtrDumpData, 1313 en = isWritePrefetchPtrTable.orR && io.toPrefetch.req.fire, 1314 site = "FTQ" + p(XSCoreParamsKey).HartId.toString, 1315 clock = clock, 1316 reset = reset 1317 ) 1318 1319 1320 // ****************************************************************************** 1321 // **************************** commit perf counters **************************** 1322 // ****************************************************************************** 1323 1324 val commit_inst_mask = VecInit(commit_state.map(c => c === c_commited && do_commit)).asUInt 1325 val commit_mispred_mask = commit_mispredict.asUInt 1326 val commit_not_mispred_mask = ~commit_mispred_mask 1327 1328 val commit_br_mask = commit_pd.brMask.asUInt 1329 val commit_jmp_mask = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.jmpInfo.valid.asTypeOf(UInt(1.W))) 1330 val commit_cfi_mask = (commit_br_mask | commit_jmp_mask) 1331 1332 val mbpInstrs = commit_inst_mask & commit_cfi_mask 1333 1334 val mbpRights = mbpInstrs & commit_not_mispred_mask 1335 val mbpWrongs = mbpInstrs & commit_mispred_mask 1336 1337 io.bpuInfo.bpRight := PopCount(mbpRights) 1338 io.bpuInfo.bpWrong := PopCount(mbpWrongs) 1339 1340 val isWriteFTQTable = WireInit(Constantin.createRecord("isWriteFTQTable" + p(XSCoreParamsKey).HartId.toString)) 1341 val ftqBranchTraceDB = ChiselDB.createTable("FTQTable" + p(XSCoreParamsKey).HartId.toString, new FtqDebugBundle) 1342 // Cfi Info 1343 for (i <- 0 until PredictWidth) { 1344 val pc = commit_pc_bundle.startAddr + (i * instBytes).U 1345 val v = commit_state(i) === c_commited 1346 val isBr = commit_pd.brMask(i) 1347 val isJmp = commit_pd.jmpInfo.valid && commit_pd.jmpOffset === i.U 1348 val isCfi = isBr || isJmp 1349 val isTaken = commit_cfi.valid && commit_cfi.bits === i.U 1350 val misPred = commit_mispredict(i) 1351 // val ghist = commit_spec_meta.ghist.predHist 1352 val histPtr = commit_spec_meta.histPtr 1353 val predCycle = commit_meta.meta(63, 0) 1354 val target = commit_target 1355 1356 val brIdx = OHToUInt(Reverse(Cat(update_ftb_entry.brValids.zip(update_ftb_entry.brOffset).map{case(v, offset) => v && offset === i.U}))) 1357 val inFtbEntry = update_ftb_entry.brValids.zip(update_ftb_entry.brOffset).map{case(v, offset) => v && offset === i.U}.reduce(_||_) 1358 val addIntoHist = ((commit_hit === h_hit) && inFtbEntry) || ((!(commit_hit === h_hit) && i.U === commit_cfi.bits && isBr && commit_cfi.valid)) 1359 XSDebug(v && do_commit && isCfi, p"cfi_update: isBr(${isBr}) pc(${Hexadecimal(pc)}) " + 1360 p"taken(${isTaken}) mispred(${misPred}) cycle($predCycle) hist(${histPtr.value}) " + 1361 p"startAddr(${Hexadecimal(commit_pc_bundle.startAddr)}) AddIntoHist(${addIntoHist}) " + 1362 p"brInEntry(${inFtbEntry}) brIdx(${brIdx}) target(${Hexadecimal(target)})\n") 1363 1364 val logbundle = Wire(new FtqDebugBundle) 1365 logbundle.pc := pc 1366 logbundle.target := target 1367 logbundle.isBr := isBr 1368 logbundle.isJmp := isJmp 1369 logbundle.isCall := isJmp && commit_pd.hasCall 1370 logbundle.isRet := isJmp && commit_pd.hasRet 1371 logbundle.misPred := misPred 1372 logbundle.isTaken := isTaken 1373 logbundle.predStage := commit_stage 1374 1375 ftqBranchTraceDB.log( 1376 data = logbundle /* hardware of type T */, 1377 en = isWriteFTQTable.orR && v && do_commit && isCfi, 1378 site = "FTQ" + p(XSCoreParamsKey).HartId.toString, 1379 clock = clock, 1380 reset = reset 1381 ) 1382 } 1383 1384 val enq = io.fromBpu.resp 1385 val perf_redirect = backendRedirect 1386 1387 XSPerfAccumulate("entry", validEntries) 1388 XSPerfAccumulate("bpu_to_ftq_stall", enq.valid && !enq.ready) 1389 XSPerfAccumulate("mispredictRedirect", perf_redirect.valid && RedirectLevel.flushAfter === perf_redirect.bits.level) 1390 XSPerfAccumulate("replayRedirect", perf_redirect.valid && RedirectLevel.flushItself(perf_redirect.bits.level)) 1391 XSPerfAccumulate("predecodeRedirect", fromIfuRedirect.valid) 1392 1393 XSPerfAccumulate("to_ifu_bubble", io.toIfu.req.ready && !io.toIfu.req.valid) 1394 1395 XSPerfAccumulate("to_ifu_stall", io.toIfu.req.valid && !io.toIfu.req.ready) 1396 XSPerfAccumulate("from_bpu_real_bubble", !enq.valid && enq.ready && allowBpuIn) 1397 XSPerfAccumulate("bpu_to_ifu_bubble", bpuPtr === ifuPtr) 1398 XSPerfAccumulate("bpu_to_ifu_bubble_when_ftq_full", (bpuPtr === ifuPtr) && isFull(bpuPtr, commPtr) && io.toIfu.req.ready) 1399 1400 XSPerfAccumulate("redirectAhead_ValidNum", io.fromBackend.ftqIdxAhead.map(_.valid).reduce(_|_)) 1401 XSPerfAccumulate("fromBackendRedirect_ValidNum", io.fromBackend.redirect.valid) 1402 XSPerfAccumulate("toBpuRedirect_ValidNum", io.toBpu.redirect.valid) 1403 1404 val from_bpu = io.fromBpu.resp.bits 1405 val to_ifu = io.toIfu.req.bits 1406 1407 1408 XSPerfHistogram("commit_num_inst", PopCount(commit_inst_mask), do_commit, 0, PredictWidth+1, 1) 1409 1410 1411 1412 1413 val commit_jal_mask = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.hasJal.asTypeOf(UInt(1.W))) 1414 val commit_jalr_mask = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.hasJalr.asTypeOf(UInt(1.W))) 1415 val commit_call_mask = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.hasCall.asTypeOf(UInt(1.W))) 1416 val commit_ret_mask = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.hasRet.asTypeOf(UInt(1.W))) 1417 1418 1419 val mbpBRights = mbpRights & commit_br_mask 1420 val mbpJRights = mbpRights & commit_jal_mask 1421 val mbpIRights = mbpRights & commit_jalr_mask 1422 val mbpCRights = mbpRights & commit_call_mask 1423 val mbpRRights = mbpRights & commit_ret_mask 1424 1425 val mbpBWrongs = mbpWrongs & commit_br_mask 1426 val mbpJWrongs = mbpWrongs & commit_jal_mask 1427 val mbpIWrongs = mbpWrongs & commit_jalr_mask 1428 val mbpCWrongs = mbpWrongs & commit_call_mask 1429 val mbpRWrongs = mbpWrongs & commit_ret_mask 1430 1431 val commit_pred_stage = RegNext(pred_stage(commPtr.value)) 1432 1433 def pred_stage_map(src: UInt, name: String) = { 1434 (0 until numBpStages).map(i => 1435 f"${name}_stage_${i+1}" -> PopCount(src.asBools.map(_ && commit_pred_stage === BP_STAGES(i))) 1436 ).foldLeft(Map[String, UInt]())(_+_) 1437 } 1438 1439 val mispred_stage_map = pred_stage_map(mbpWrongs, "mispredict") 1440 val br_mispred_stage_map = pred_stage_map(mbpBWrongs, "br_mispredict") 1441 val jalr_mispred_stage_map = pred_stage_map(mbpIWrongs, "jalr_mispredict") 1442 val correct_stage_map = pred_stage_map(mbpRights, "correct") 1443 val br_correct_stage_map = pred_stage_map(mbpBRights, "br_correct") 1444 val jalr_correct_stage_map = pred_stage_map(mbpIRights, "jalr_correct") 1445 1446 val update_valid = io.toBpu.update.valid 1447 def u(cond: Bool) = update_valid && cond 1448 val ftb_false_hit = u(update.false_hit) 1449 // assert(!ftb_false_hit) 1450 val ftb_hit = u(commit_hit === h_hit) 1451 1452 val ftb_new_entry = u(ftbEntryGen.is_init_entry) 1453 val ftb_new_entry_only_br = ftb_new_entry && !update_ftb_entry.jmpValid 1454 val ftb_new_entry_only_jmp = ftb_new_entry && !update_ftb_entry.brValids(0) 1455 val ftb_new_entry_has_br_and_jmp = ftb_new_entry && update_ftb_entry.brValids(0) && update_ftb_entry.jmpValid 1456 1457 val ftb_old_entry = u(ftbEntryGen.is_old_entry) 1458 1459 val ftb_modified_entry = u(ftbEntryGen.is_new_br || ftbEntryGen.is_jalr_target_modified || ftbEntryGen.is_always_taken_modified) 1460 val ftb_modified_entry_new_br = u(ftbEntryGen.is_new_br) 1461 val ftb_modified_entry_ifu_redirected = u(ifuRedirected(do_commit_ptr.value)) 1462 val ftb_modified_entry_jalr_target_modified = u(ftbEntryGen.is_jalr_target_modified) 1463 val ftb_modified_entry_br_full = ftb_modified_entry && ftbEntryGen.is_br_full 1464 val ftb_modified_entry_always_taken = ftb_modified_entry && ftbEntryGen.is_always_taken_modified 1465 1466 def getFtbEntryLen(pc: UInt, entry: FTBEntry) = (entry.getFallThrough(pc) - pc) >> instOffsetBits 1467 val gen_ftb_entry_len = getFtbEntryLen(update.pc, ftbEntryGen.new_entry) 1468 XSPerfHistogram("ftb_init_entry_len", gen_ftb_entry_len, ftb_new_entry, 0, PredictWidth+1, 1) 1469 XSPerfHistogram("ftb_modified_entry_len", gen_ftb_entry_len, ftb_modified_entry, 0, PredictWidth+1, 1) 1470 val s3_ftb_entry_len = getFtbEntryLen(from_bpu.s3.pc(0), from_bpu.last_stage_ftb_entry) 1471 XSPerfHistogram("s3_ftb_entry_len", s3_ftb_entry_len, from_bpu.s3.valid(0), 0, PredictWidth+1, 1) 1472 1473 XSPerfHistogram("ftq_has_entry", validEntries, true.B, 0, FtqSize+1, 1) 1474 1475 val perfCountsMap = Map( 1476 "BpInstr" -> PopCount(mbpInstrs), 1477 "BpBInstr" -> PopCount(mbpBRights | mbpBWrongs), 1478 "BpRight" -> PopCount(mbpRights), 1479 "BpWrong" -> PopCount(mbpWrongs), 1480 "BpBRight" -> PopCount(mbpBRights), 1481 "BpBWrong" -> PopCount(mbpBWrongs), 1482 "BpJRight" -> PopCount(mbpJRights), 1483 "BpJWrong" -> PopCount(mbpJWrongs), 1484 "BpIRight" -> PopCount(mbpIRights), 1485 "BpIWrong" -> PopCount(mbpIWrongs), 1486 "BpCRight" -> PopCount(mbpCRights), 1487 "BpCWrong" -> PopCount(mbpCWrongs), 1488 "BpRRight" -> PopCount(mbpRRights), 1489 "BpRWrong" -> PopCount(mbpRWrongs), 1490 1491 "ftb_false_hit" -> PopCount(ftb_false_hit), 1492 "ftb_hit" -> PopCount(ftb_hit), 1493 "ftb_new_entry" -> PopCount(ftb_new_entry), 1494 "ftb_new_entry_only_br" -> PopCount(ftb_new_entry_only_br), 1495 "ftb_new_entry_only_jmp" -> PopCount(ftb_new_entry_only_jmp), 1496 "ftb_new_entry_has_br_and_jmp" -> PopCount(ftb_new_entry_has_br_and_jmp), 1497 "ftb_old_entry" -> PopCount(ftb_old_entry), 1498 "ftb_modified_entry" -> PopCount(ftb_modified_entry), 1499 "ftb_modified_entry_new_br" -> PopCount(ftb_modified_entry_new_br), 1500 "ftb_jalr_target_modified" -> PopCount(ftb_modified_entry_jalr_target_modified), 1501 "ftb_modified_entry_br_full" -> PopCount(ftb_modified_entry_br_full), 1502 "ftb_modified_entry_always_taken" -> PopCount(ftb_modified_entry_always_taken) 1503 ) ++ mispred_stage_map ++ br_mispred_stage_map ++ jalr_mispred_stage_map ++ 1504 correct_stage_map ++ br_correct_stage_map ++ jalr_correct_stage_map 1505 1506 for((key, value) <- perfCountsMap) { 1507 XSPerfAccumulate(key, value) 1508 } 1509 1510 // --------------------------- Debug -------------------------------- 1511 // XSDebug(enq_fire, p"enq! " + io.fromBpu.resp.bits.toPrintable) 1512 XSDebug(io.toIfu.req.fire, p"fire to ifu " + io.toIfu.req.bits.toPrintable) 1513 XSDebug(do_commit, p"deq! [ptr] $do_commit_ptr\n") 1514 XSDebug(true.B, p"[bpuPtr] $bpuPtr, [ifuPtr] $ifuPtr, [ifuWbPtr] $ifuWbPtr [commPtr] $commPtr\n") 1515 XSDebug(true.B, p"[in] v:${io.fromBpu.resp.valid} r:${io.fromBpu.resp.ready} " + 1516 p"[out] v:${io.toIfu.req.valid} r:${io.toIfu.req.ready}\n") 1517 XSDebug(do_commit, p"[deq info] cfiIndex: $commit_cfi, $commit_pc_bundle, target: ${Hexadecimal(commit_target)}\n") 1518 1519 // def ubtbCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = { 1520 // commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map { 1521 // case (((valid, pd), ans), taken) => 1522 // Mux(valid && pd.isBr, 1523 // isWrong ^ Mux(ans.hit.asBool, 1524 // Mux(ans.taken.asBool, taken && ans.target === commitEntry.target, 1525 // !taken), 1526 // !taken), 1527 // false.B) 1528 // } 1529 // } 1530 1531 // def btbCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = { 1532 // commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map { 1533 // case (((valid, pd), ans), taken) => 1534 // Mux(valid && pd.isBr, 1535 // isWrong ^ Mux(ans.hit.asBool, 1536 // Mux(ans.taken.asBool, taken && ans.target === commitEntry.target, 1537 // !taken), 1538 // !taken), 1539 // false.B) 1540 // } 1541 // } 1542 1543 // def tageCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = { 1544 // commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map { 1545 // case (((valid, pd), ans), taken) => 1546 // Mux(valid && pd.isBr, 1547 // isWrong ^ (ans.taken.asBool === taken), 1548 // false.B) 1549 // } 1550 // } 1551 1552 // def loopCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = { 1553 // commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map { 1554 // case (((valid, pd), ans), taken) => 1555 // Mux(valid && (pd.isBr) && ans.hit.asBool, 1556 // isWrong ^ (!taken), 1557 // false.B) 1558 // } 1559 // } 1560 1561 // def rasCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = { 1562 // commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map { 1563 // case (((valid, pd), ans), taken) => 1564 // Mux(valid && pd.isRet.asBool /*&& taken*/ && ans.hit.asBool, 1565 // isWrong ^ (ans.target === commitEntry.target), 1566 // false.B) 1567 // } 1568 // } 1569 1570 // val ubtbRights = ubtbCheck(commitEntry, commitEntry.metas.map(_.ubtbAns), false.B) 1571 // val ubtbWrongs = ubtbCheck(commitEntry, commitEntry.metas.map(_.ubtbAns), true.B) 1572 // // btb and ubtb pred jal and jalr as well 1573 // val btbRights = btbCheck(commitEntry, commitEntry.metas.map(_.btbAns), false.B) 1574 // val btbWrongs = btbCheck(commitEntry, commitEntry.metas.map(_.btbAns), true.B) 1575 // val tageRights = tageCheck(commitEntry, commitEntry.metas.map(_.tageAns), false.B) 1576 // val tageWrongs = tageCheck(commitEntry, commitEntry.metas.map(_.tageAns), true.B) 1577 1578 // val loopRights = loopCheck(commitEntry, commitEntry.metas.map(_.loopAns), false.B) 1579 // val loopWrongs = loopCheck(commitEntry, commitEntry.metas.map(_.loopAns), true.B) 1580 1581 // val rasRights = rasCheck(commitEntry, commitEntry.metas.map(_.rasAns), false.B) 1582 // val rasWrongs = rasCheck(commitEntry, commitEntry.metas.map(_.rasAns), true.B) 1583 1584 val perfEvents = Seq( 1585 ("bpu_s2_redirect ", bpu_s2_redirect ), 1586 ("bpu_s3_redirect ", bpu_s3_redirect ), 1587 ("bpu_to_ftq_stall ", enq.valid && ~enq.ready ), 1588 ("mispredictRedirect ", perf_redirect.valid && RedirectLevel.flushAfter === perf_redirect.bits.level), 1589 ("replayRedirect ", perf_redirect.valid && RedirectLevel.flushItself(perf_redirect.bits.level) ), 1590 ("predecodeRedirect ", fromIfuRedirect.valid ), 1591 ("to_ifu_bubble ", io.toIfu.req.ready && !io.toIfu.req.valid ), 1592 ("from_bpu_real_bubble ", !enq.valid && enq.ready && allowBpuIn ), 1593 ("BpInstr ", PopCount(mbpInstrs) ), 1594 ("BpBInstr ", PopCount(mbpBRights | mbpBWrongs) ), 1595 ("BpRight ", PopCount(mbpRights) ), 1596 ("BpWrong ", PopCount(mbpWrongs) ), 1597 ("BpBRight ", PopCount(mbpBRights) ), 1598 ("BpBWrong ", PopCount(mbpBWrongs) ), 1599 ("BpJRight ", PopCount(mbpJRights) ), 1600 ("BpJWrong ", PopCount(mbpJWrongs) ), 1601 ("BpIRight ", PopCount(mbpIRights) ), 1602 ("BpIWrong ", PopCount(mbpIWrongs) ), 1603 ("BpCRight ", PopCount(mbpCRights) ), 1604 ("BpCWrong ", PopCount(mbpCWrongs) ), 1605 ("BpRRight ", PopCount(mbpRRights) ), 1606 ("BpRWrong ", PopCount(mbpRWrongs) ), 1607 ("ftb_false_hit ", PopCount(ftb_false_hit) ), 1608 ("ftb_hit ", PopCount(ftb_hit) ), 1609 ) 1610 generatePerfEvent() 1611}