1/*************************************************************************************** 2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences 3* Copyright (c) 2020-2021 Peng Cheng Laboratory 4* 5* XiangShan is licensed under Mulan PSL v2. 6* You can use this software according to the terms and conditions of the Mulan PSL v2. 7* You may obtain a copy of Mulan PSL v2 at: 8* http://license.coscl.org.cn/MulanPSL2 9* 10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 13* 14* See the Mulan PSL v2 for more details. 15***************************************************************************************/ 16 17package xiangshan.frontend 18 19import chipsalliance.rocketchip.config.Parameters 20import chisel3._ 21import chisel3.util._ 22import utils.{AsyncDataModuleTemplate, CircularQueuePtr, DataModuleTemplate, HasCircularQueuePtrHelper, SRAMTemplate, SyncDataModuleTemplate, XSDebug, XSPerfAccumulate, PerfBundle, PerfEventsBundle, XSError} 23import xiangshan._ 24import scala.tools.nsc.doc.model.Val 25import utils.{ParallelPriorityMux, ParallelPriorityEncoder} 26import xiangshan.backend.{CtrlToFtqIO} 27import firrtl.annotations.MemoryLoadFileType 28 29class FtqPtr(implicit p: Parameters) extends CircularQueuePtr[FtqPtr]( 30 p => p(XSCoreParamsKey).FtqSize 31){ 32 override def cloneType = (new FtqPtr).asInstanceOf[this.type] 33} 34 35object FtqPtr { 36 def apply(f: Bool, v: UInt)(implicit p: Parameters): FtqPtr = { 37 val ptr = Wire(new FtqPtr) 38 ptr.flag := f 39 ptr.value := v 40 ptr 41 } 42 def inverse(ptr: FtqPtr)(implicit p: Parameters): FtqPtr = { 43 apply(!ptr.flag, ptr.value) 44 } 45} 46 47class FtqNRSRAM[T <: Data](gen: T, numRead: Int)(implicit p: Parameters) extends XSModule { 48 49 val io = IO(new Bundle() { 50 val raddr = Input(Vec(numRead, UInt(log2Up(FtqSize).W))) 51 val ren = Input(Vec(numRead, Bool())) 52 val rdata = Output(Vec(numRead, gen)) 53 val waddr = Input(UInt(log2Up(FtqSize).W)) 54 val wen = Input(Bool()) 55 val wdata = Input(gen) 56 }) 57 58 for(i <- 0 until numRead){ 59 val sram = Module(new SRAMTemplate(gen, FtqSize)) 60 sram.io.r.req.valid := io.ren(i) 61 sram.io.r.req.bits.setIdx := io.raddr(i) 62 io.rdata(i) := sram.io.r.resp.data(0) 63 sram.io.w.req.valid := io.wen 64 sram.io.w.req.bits.setIdx := io.waddr 65 sram.io.w.req.bits.data := VecInit(io.wdata) 66 } 67 68} 69 70class Ftq_RF_Components(implicit p: Parameters) extends XSBundle with BPUUtils { 71 // TODO: move pftAddr, oversize, carry to another mem 72 val startAddr = UInt(VAddrBits.W) 73 val nextRangeAddr = UInt(VAddrBits.W) 74 val pftAddr = UInt((log2Ceil(PredictWidth)+1).W) 75 val isNextMask = Vec(PredictWidth, Bool()) 76 val oversize = Bool() 77 val carry = Bool() 78 def getPc(offset: UInt) = { 79 def getHigher(pc: UInt) = pc(VAddrBits-1, log2Ceil(PredictWidth)+instOffsetBits) 80 def getOffset(pc: UInt) = pc(log2Ceil(PredictWidth)+instOffsetBits-1, instOffsetBits) 81 Cat(getHigher(Mux(isNextMask(offset), nextRangeAddr, startAddr)), 82 getOffset(startAddr)+offset, 0.U(instOffsetBits.W)) 83 } 84 def getFallThrough() = { 85 getFallThroughAddr(this.startAddr, this.carry, this.pftAddr) 86 } 87 def fallThroughError() = { 88 !carry && startAddr(instOffsetBits+log2Ceil(PredictWidth), instOffsetBits) > pftAddr 89 } 90 def fromBranchPrediction(resp: BranchPredictionBundle) = { 91 this.startAddr := resp.pc 92 this.nextRangeAddr := resp.pc + (FetchWidth * 4).U 93 this.pftAddr := 94 Mux(resp.preds.hit, resp.ftb_entry.pftAddr, 95 resp.pc(instOffsetBits + log2Ceil(PredictWidth), instOffsetBits) ^ (1 << log2Ceil(PredictWidth)).U) 96 this.isNextMask := VecInit((0 until PredictWidth).map(i => 97 (resp.pc(log2Ceil(PredictWidth), 1) +& i.U)(log2Ceil(PredictWidth)).asBool() 98 )) 99 this.oversize := Mux(resp.preds.hit, resp.ftb_entry.oversize, false.B) 100 this.carry := Mux(resp.preds.hit, resp.ftb_entry.carry, resp.pc(instOffsetBits + log2Ceil(PredictWidth)).asBool) 101 this 102 } 103 override def toPrintable: Printable = { 104 p"startAddr:${Hexadecimal(startAddr)}, fallThru:${Hexadecimal(getFallThrough())}" 105 } 106} 107 108class Ftq_pd_Entry(implicit p: Parameters) extends XSBundle { 109 val brMask = Vec(PredictWidth, Bool()) 110 val jmpInfo = ValidUndirectioned(Vec(3, Bool())) 111 val jmpOffset = UInt(log2Ceil(PredictWidth).W) 112 val jalTarget = UInt(VAddrBits.W) 113 val rvcMask = Vec(PredictWidth, Bool()) 114 def hasJal = jmpInfo.valid && !jmpInfo.bits(0) 115 def hasJalr = jmpInfo.valid && jmpInfo.bits(0) 116 def hasCall = jmpInfo.valid && jmpInfo.bits(1) 117 def hasRet = jmpInfo.valid && jmpInfo.bits(2) 118 119 def fromPdWb(pdWb: PredecodeWritebackBundle) = { 120 val pds = pdWb.pd 121 this.brMask := VecInit(pds.map(pd => pd.isBr && pd.valid)) 122 this.jmpInfo.valid := VecInit(pds.map(pd => (pd.isJal || pd.isJalr) && pd.valid)).asUInt.orR 123 this.jmpInfo.bits := ParallelPriorityMux(pds.map(pd => (pd.isJal || pd.isJalr) && pd.valid), 124 pds.map(pd => VecInit(pd.isJalr, pd.isCall, pd.isRet))) 125 this.jmpOffset := ParallelPriorityEncoder(pds.map(pd => (pd.isJal || pd.isJalr) && pd.valid)) 126 this.rvcMask := VecInit(pds.map(pd => pd.isRVC)) 127 this.jalTarget := pdWb.jalTarget 128 } 129 130 def toPd(offset: UInt) = { 131 require(offset.getWidth == log2Ceil(PredictWidth)) 132 val pd = Wire(new PreDecodeInfo) 133 pd.valid := true.B 134 pd.isRVC := rvcMask(offset) 135 val isBr = brMask(offset) 136 val isJalr = offset === jmpOffset && jmpInfo.valid && jmpInfo.bits(0) 137 pd.brType := Cat(offset === jmpOffset && jmpInfo.valid, isJalr || isBr) 138 pd.isCall := offset === jmpOffset && jmpInfo.valid && jmpInfo.bits(1) 139 pd.isRet := offset === jmpOffset && jmpInfo.valid && jmpInfo.bits(2) 140 pd 141 } 142} 143 144 145 146class Ftq_Redirect_SRAMEntry(implicit p: Parameters) extends XSBundle with HasBPUConst { 147 val rasSp = UInt(log2Ceil(RasSize).W) 148 val rasEntry = new RASEntry 149 val specCnt = Vec(numBr, UInt(10.W)) 150 // val ghist = new ShiftingGlobalHistory 151 val folded_hist = new AllFoldedHistories(foldedGHistInfos) 152 val histPtr = new CGHPtr 153 val phist = UInt(PathHistoryLength.W) 154 val phNewBit = UInt(1.W) 155 156 def fromBranchPrediction(resp: BranchPredictionBundle) = { 157 this.rasSp := resp.rasSp 158 this.rasEntry := resp.rasTop 159 this.specCnt := resp.specCnt 160 // this.ghist := resp.ghist 161 this.folded_hist := resp.folded_hist 162 this.histPtr := resp.histPtr 163 this.phist := resp.phist 164 this.phNewBit := resp.pc(instOffsetBits) 165 this 166 } 167} 168 169class Ftq_1R_SRAMEntry(implicit p: Parameters) extends XSBundle with HasBPUConst { 170 val meta = UInt(MaxMetaLength.W) 171} 172 173class Ftq_Pred_Info(implicit p: Parameters) extends XSBundle { 174 val target = UInt(VAddrBits.W) 175 val cfiIndex = ValidUndirectioned(UInt(log2Ceil(PredictWidth).W)) 176} 177 178// class FtqEntry(implicit p: Parameters) extends XSBundle with HasBPUConst { 179// val startAddr = UInt(VAddrBits.W) 180// val fallThruAddr = UInt(VAddrBits.W) 181// val isNextMask = Vec(PredictWidth, Bool()) 182 183// val meta = UInt(MaxMetaLength.W) 184 185// val rasSp = UInt(log2Ceil(RasSize).W) 186// val rasEntry = new RASEntry 187// val hist = new ShiftingGlobalHistory 188// val specCnt = Vec(numBr, UInt(10.W)) 189 190// val valids = Vec(PredictWidth, Bool()) 191// val brMask = Vec(PredictWidth, Bool()) 192// // isJalr, isCall, isRet 193// val jmpInfo = ValidUndirectioned(Vec(3, Bool())) 194// val jmpOffset = UInt(log2Ceil(PredictWidth).W) 195 196// val mispredVec = Vec(PredictWidth, Bool()) 197// val cfiIndex = ValidUndirectioned(UInt(log2Ceil(PredictWidth).W)) 198// val target = UInt(VAddrBits.W) 199// } 200 201class FtqRead[T <: Data](private val gen: T)(implicit p: Parameters) extends XSBundle { 202 val ptr = Output(new FtqPtr) 203 val offset = Output(UInt(log2Ceil(PredictWidth).W)) 204 val data = Input(gen) 205 def apply(ptr: FtqPtr, offset: UInt) = { 206 this.ptr := ptr 207 this.offset := offset 208 this.data 209 } 210 override def cloneType = (new FtqRead(gen)).asInstanceOf[this.type] 211} 212 213 214class FtqToBpuIO(implicit p: Parameters) extends XSBundle { 215 val redirect = Valid(new BranchPredictionRedirect) 216 val update = Valid(new BranchPredictionUpdate) 217 val enq_ptr = Output(new FtqPtr) 218} 219 220class FtqToIfuIO(implicit p: Parameters) extends XSBundle with HasCircularQueuePtrHelper { 221 val req = Decoupled(new FetchRequestBundle) 222 val redirect = Valid(new Redirect) 223 val flushFromBpu = new Bundle { 224 // when ifu pipeline is not stalled, 225 // a packet from bpu s3 can reach f1 at most 226 val s2 = Valid(new FtqPtr) 227 val s3 = Valid(new FtqPtr) 228 def shouldFlushBy(src: Valid[FtqPtr], idx_to_flush: FtqPtr) = { 229 src.valid && !isAfter(src.bits, idx_to_flush) 230 } 231 def shouldFlushByStage2(idx: FtqPtr) = shouldFlushBy(s2, idx) 232 def shouldFlushByStage3(idx: FtqPtr) = shouldFlushBy(s3, idx) 233 } 234} 235 236trait HasBackendRedirectInfo extends HasXSParameter { 237 def numRedirect = exuParameters.JmpCnt + exuParameters.AluCnt + 1 238 def isLoadReplay(r: Valid[Redirect]) = r.bits.flushItself() 239} 240 241class FtqToCtrlIO(implicit p: Parameters) extends XSBundle with HasBackendRedirectInfo { 242 val pc_reads = Vec(1 + numRedirect + 1 + 1, Flipped(new FtqRead(UInt(VAddrBits.W)))) 243 val target_read = Flipped(new FtqRead(UInt(VAddrBits.W))) 244 def getJumpPcRead = pc_reads.head 245 def getRedirectPcRead = VecInit(pc_reads.tail.dropRight(2)) 246 def getMemPredPcRead = pc_reads.init.last 247 def getRobFlushPcRead = pc_reads.last 248} 249 250 251class FTBEntryGen(implicit p: Parameters) extends XSModule with HasBackendRedirectInfo with HasBPUParameter { 252 val io = IO(new Bundle { 253 val start_addr = Input(UInt(VAddrBits.W)) 254 val old_entry = Input(new FTBEntry) 255 val pd = Input(new Ftq_pd_Entry) 256 val cfiIndex = Flipped(Valid(UInt(log2Ceil(PredictWidth).W))) 257 val target = Input(UInt(VAddrBits.W)) 258 val hit = Input(Bool()) 259 val mispredict_vec = Input(Vec(PredictWidth, Bool())) 260 261 val new_entry = Output(new FTBEntry) 262 val new_br_insert_pos = Output(Vec(numBr, Bool())) 263 val taken_mask = Output(Vec(numBr, Bool())) 264 val mispred_mask = Output(Vec(numBr+1, Bool())) 265 266 // for perf counters 267 val is_init_entry = Output(Bool()) 268 val is_old_entry = Output(Bool()) 269 val is_new_br = Output(Bool()) 270 val is_jalr_target_modified = Output(Bool()) 271 val is_always_taken_modified = Output(Bool()) 272 val is_br_full = Output(Bool()) 273 }) 274 275 // no mispredictions detected at predecode 276 val hit = io.hit 277 val pd = io.pd 278 279 val init_entry = WireInit(0.U.asTypeOf(new FTBEntry)) 280 281 282 val cfi_is_br = pd.brMask(io.cfiIndex.bits) && io.cfiIndex.valid 283 val entry_has_jmp = pd.jmpInfo.valid 284 val new_jmp_is_jal = entry_has_jmp && !pd.jmpInfo.bits(0) && io.cfiIndex.valid 285 val new_jmp_is_jalr = entry_has_jmp && pd.jmpInfo.bits(0) && io.cfiIndex.valid 286 val new_jmp_is_call = entry_has_jmp && pd.jmpInfo.bits(1) && io.cfiIndex.valid 287 val new_jmp_is_ret = entry_has_jmp && pd.jmpInfo.bits(2) && io.cfiIndex.valid 288 val last_jmp_rvi = entry_has_jmp && pd.jmpOffset === (PredictWidth-1).U && !pd.rvcMask.last 289 val last_br_rvi = cfi_is_br && io.cfiIndex.bits === (PredictWidth-1).U && !pd.rvcMask.last 290 291 val cfi_is_jal = io.cfiIndex.bits === pd.jmpOffset && new_jmp_is_jal 292 val cfi_is_jalr = io.cfiIndex.bits === pd.jmpOffset && new_jmp_is_jalr 293 294 def carryPos = log2Ceil(PredictWidth)+instOffsetBits+1 295 def getLower(pc: UInt) = pc(carryPos-1, instOffsetBits) 296 // if not hit, establish a new entry 297 init_entry.valid := true.B 298 // tag is left for ftb to assign 299 300 // case br 301 val init_br_slot = init_entry.getSlotForBr(0) 302 when (cfi_is_br) { 303 init_br_slot.valid := true.B 304 init_br_slot.offset := io.cfiIndex.bits 305 init_br_slot.setLowerStatByTarget(io.start_addr, io.target, shareTailSlot && numBr == 1) 306 init_entry.always_taken(0) := true.B // set to always taken on init 307 } 308 // init_entry.isBrSharing := shareTailSlot.B && (numBr == 1).B && cfi_is_br 309 310 // case jmp 311 when (entry_has_jmp) { 312 init_entry.tailSlot.offset := pd.jmpOffset 313 init_entry.tailSlot.valid := new_jmp_is_jal || new_jmp_is_jalr 314 init_entry.tailSlot.setLowerStatByTarget(io.start_addr, Mux(cfi_is_jalr, io.target, pd.jalTarget), isShare=false) 315 } 316 317 val jmpPft = getLower(io.start_addr) +& pd.jmpOffset +& Mux(pd.rvcMask(pd.jmpOffset), 1.U, 2.U) 318 init_entry.pftAddr := Mux(entry_has_jmp, jmpPft, getLower(io.start_addr) + ((FetchWidth*4)>>instOffsetBits).U + Mux(last_br_rvi, 1.U, 0.U)) 319 init_entry.carry := Mux(entry_has_jmp, jmpPft(carryPos-instOffsetBits), io.start_addr(carryPos-1) || (io.start_addr(carryPos-2, instOffsetBits).andR && last_br_rvi)) 320 init_entry.isJalr := new_jmp_is_jalr 321 init_entry.isCall := new_jmp_is_call 322 init_entry.isRet := new_jmp_is_ret 323 init_entry.last_is_rvc := Mux(entry_has_jmp, pd.rvcMask(pd.jmpOffset), pd.rvcMask.last) 324 325 init_entry.oversize := last_br_rvi || last_jmp_rvi 326 327 // if hit, check whether a new cfi(only br is possible) is detected 328 val oe = io.old_entry 329 val br_recorded_vec = oe.getBrRecordedVec(io.cfiIndex.bits) 330 val br_recorded = br_recorded_vec.asUInt.orR 331 val is_new_br = cfi_is_br && !br_recorded 332 val new_br_offset = io.cfiIndex.bits 333 // vec(i) means new br will be inserted BEFORE old br(i) 334 val allBrSlotsVec = oe.allSlotsForBr 335 val new_br_insert_onehot = VecInit((0 until numBr).map{ 336 i => i match { 337 case 0 => 338 !allBrSlotsVec(0).valid || new_br_offset < allBrSlotsVec(0).offset 339 case idx => 340 allBrSlotsVec(idx-1).valid && new_br_offset > allBrSlotsVec(idx-1).offset && 341 (!allBrSlotsVec(idx).valid || new_br_offset < allBrSlotsVec(idx).offset) 342 } 343 }) 344 345 val old_entry_modified = WireInit(io.old_entry) 346 for (i <- 0 until numBr) { 347 val slot = old_entry_modified.allSlotsForBr(i) 348 when (new_br_insert_onehot(i)) { 349 slot.valid := true.B 350 slot.offset := new_br_offset 351 slot.setLowerStatByTarget(io.start_addr, io.target, shareTailSlot && i == numBr-1) 352 old_entry_modified.always_taken(i) := true.B 353 }.elsewhen (new_br_offset > oe.allSlotsForBr(i).offset) { 354 old_entry_modified.always_taken(i) := false.B 355 // all other fields remain unchanged 356 }.otherwise { 357 // case i == 0, remain unchanged 358 if (i != 0) { 359 val noNeedToMoveFromFormerSlot = (shareTailSlot && i == numBr-1).B && !oe.brSlots.last.valid 360 when (!noNeedToMoveFromFormerSlot) { 361 slot.fromAnotherSlot(oe.allSlotsForBr(i-1)) 362 old_entry_modified.always_taken(i) := oe.always_taken(i) 363 } 364 } 365 } 366 } 367 368 // two circumstances: 369 // 1. oe: | br | j |, new br should be in front of j, thus addr of j should be new pft 370 // 2. oe: | br | br |, new br could be anywhere between, thus new pft is the addr of either 371 // the previous last br or the new br 372 val may_have_to_replace = oe.noEmptySlotForNewBr 373 val pft_need_to_change = is_new_br && may_have_to_replace 374 // it should either be the given last br or the new br 375 when (pft_need_to_change) { 376 val new_pft_offset = 377 Mux(!new_br_insert_onehot.asUInt.orR, 378 new_br_offset, oe.allSlotsForBr.last.offset) 379 380 // set jmp to invalid 381 if (!shareTailSlot) { 382 old_entry_modified.tailSlot.valid := false.B 383 } 384 old_entry_modified.pftAddr := getLower(io.start_addr) + new_pft_offset 385 old_entry_modified.last_is_rvc := pd.rvcMask(new_pft_offset - 1.U) // TODO: fix this 386 old_entry_modified.carry := (getLower(io.start_addr) +& new_pft_offset).head(1).asBool 387 old_entry_modified.oversize := false.B 388 old_entry_modified.isCall := false.B 389 old_entry_modified.isRet := false.B 390 old_entry_modified.isJalr := false.B 391 } 392 393 val old_entry_jmp_target_modified = WireInit(oe) 394 val old_target = oe.tailSlot.getTarget(io.start_addr) // may be wrong because we store only 20 lowest bits 395 val old_tail_is_jmp = !oe.tailSlot.sharing || !shareTailSlot.B 396 val jalr_target_modified = cfi_is_jalr && (old_target =/= io.target) && old_tail_is_jmp // TODO: pass full jalr target 397 when (jalr_target_modified) { 398 old_entry_jmp_target_modified.setByJmpTarget(io.start_addr, io.target) 399 old_entry_jmp_target_modified.always_taken := 0.U.asTypeOf(Vec(numBr, Bool())) 400 } 401 402 val old_entry_always_taken = WireInit(oe) 403 val always_taken_modified_vec = Wire(Vec(numBr, Bool())) // whether modified or not 404 for (i <- 0 until numBr) { 405 old_entry_always_taken.always_taken(i) := 406 oe.always_taken(i) && io.cfiIndex.valid && oe.brValids(i) && io.cfiIndex.bits === oe.brOffset(i) 407 always_taken_modified_vec(i) := oe.always_taken(i) && !old_entry_always_taken.always_taken(i) 408 } 409 val always_taken_modified = always_taken_modified_vec.reduce(_||_) 410 411 412 413 val derived_from_old_entry = 414 Mux(is_new_br, old_entry_modified, 415 Mux(jalr_target_modified, old_entry_jmp_target_modified, old_entry_always_taken)) 416 417 418 io.new_entry := Mux(!hit, init_entry, derived_from_old_entry) 419 420 io.new_br_insert_pos := new_br_insert_onehot 421 io.taken_mask := VecInit((io.new_entry.brOffset zip io.new_entry.brValids).map{ 422 case (off, v) => io.cfiIndex.bits === off && io.cfiIndex.valid && v 423 }) 424 for (i <- 0 until numBr) { 425 io.mispred_mask(i) := io.new_entry.brValids(i) && io.mispredict_vec(io.new_entry.brOffset(i)) 426 } 427 io.mispred_mask.last := io.new_entry.jmpValid && io.mispredict_vec(pd.jmpOffset) 428 429 // for perf counters 430 io.is_init_entry := !hit 431 io.is_old_entry := hit && !is_new_br && !jalr_target_modified && !always_taken_modified 432 io.is_new_br := hit && is_new_br 433 io.is_jalr_target_modified := hit && jalr_target_modified 434 io.is_always_taken_modified := hit && always_taken_modified 435 io.is_br_full := hit && is_new_br && may_have_to_replace 436} 437 438class Ftq(implicit p: Parameters) extends XSModule with HasCircularQueuePtrHelper 439 with HasBackendRedirectInfo with BPUUtils with HasBPUConst { 440 val io = IO(new Bundle { 441 val fromBpu = Flipped(new BpuToFtqIO) 442 val fromIfu = Flipped(new IfuToFtqIO) 443 val fromBackend = Flipped(new CtrlToFtqIO) 444 445 val toBpu = new FtqToBpuIO 446 val toIfu = new FtqToIfuIO 447 val toBackend = new FtqToCtrlIO 448 449 val bpuInfo = new Bundle { 450 val bpRight = Output(UInt(XLEN.W)) 451 val bpWrong = Output(UInt(XLEN.W)) 452 } 453 }) 454 io.bpuInfo := DontCare 455 456 val robFlush = io.fromBackend.robFlush 457 val stage2Redirect = io.fromBackend.stage2Redirect 458 val stage3Redirect = io.fromBackend.stage3Redirect 459 460 val stage2Flush = stage2Redirect.valid || robFlush.valid 461 val backendFlush = stage2Flush || RegNext(stage2Flush) 462 val ifuFlush = Wire(Bool()) 463 464 val flush = stage2Flush || RegNext(stage2Flush) 465 466 val allowBpuIn, allowToIfu = WireInit(false.B) 467 val flushToIfu = !allowToIfu 468 allowBpuIn := !ifuFlush && !robFlush.valid && !stage2Redirect.valid && !stage3Redirect.valid 469 allowToIfu := !ifuFlush && !robFlush.valid && !stage2Redirect.valid && !stage3Redirect.valid 470 471 val bpuPtr, ifuPtr, ifuWbPtr, commPtr = RegInit(FtqPtr(false.B, 0.U)) 472 val validEntries = distanceBetween(bpuPtr, commPtr) 473 474 // ********************************************************************** 475 // **************************** enq from bpu **************************** 476 // ********************************************************************** 477 val new_entry_ready = validEntries < FtqSize.U 478 io.fromBpu.resp.ready := new_entry_ready 479 480 val bpu_s2_resp = io.fromBpu.resp.bits.s2 481 val bpu_s3_resp = io.fromBpu.resp.bits.s3 482 val bpu_s2_redirect = bpu_s2_resp.valid && bpu_s2_resp.hasRedirect 483 val bpu_s3_redirect = bpu_s3_resp.valid && bpu_s3_resp.hasRedirect 484 485 io.toBpu.enq_ptr := bpuPtr 486 val enq_fire = io.fromBpu.resp.fire() && allowBpuIn // from bpu s1 487 val bpu_in_fire = (io.fromBpu.resp.fire() || bpu_s2_redirect || bpu_s3_redirect) && allowBpuIn 488 489 val bpu_in_resp = WireInit(io.fromBpu.resp.bits.selectedResp) 490 val bpu_in_stage = WireInit(io.fromBpu.resp.bits.selectedRespIdx) 491 val bpu_in_resp_ptr = Mux(bpu_in_stage === BP_S1, bpuPtr, bpu_in_resp.ftq_idx) 492 val bpu_in_resp_idx = bpu_in_resp_ptr.value 493 494 // read ports: jumpPc + redirects + loadPred + robFlush + ifuReq1 + ifuReq2 + commitUpdate 495 val ftq_pc_mem = Module(new SyncDataModuleTemplate(new Ftq_RF_Components, FtqSize, 1+numRedirect+2+1+1+1, 1)) 496 // resp from uBTB 497 ftq_pc_mem.io.wen(0) := bpu_in_fire 498 ftq_pc_mem.io.waddr(0) := bpu_in_resp_idx 499 ftq_pc_mem.io.wdata(0).fromBranchPrediction(bpu_in_resp) 500 501 // ifuRedirect + backendRedirect + commit 502 val ftq_redirect_sram = Module(new FtqNRSRAM(new Ftq_Redirect_SRAMEntry, 1+1+1)) 503 // these info is intended to enq at the last stage of bpu 504 ftq_redirect_sram.io.wen := io.fromBpu.resp.bits.lastStage.valid 505 ftq_redirect_sram.io.waddr := io.fromBpu.resp.bits.lastStage.ftq_idx.value 506 ftq_redirect_sram.io.wdata.fromBranchPrediction(io.fromBpu.resp.bits.lastStage) 507 508 val ftq_meta_1r_sram = Module(new FtqNRSRAM(new Ftq_1R_SRAMEntry, 1)) 509 // these info is intended to enq at the last stage of bpu 510 ftq_meta_1r_sram.io.wen := io.fromBpu.resp.bits.lastStage.valid 511 ftq_meta_1r_sram.io.waddr := io.fromBpu.resp.bits.lastStage.ftq_idx.value 512 ftq_meta_1r_sram.io.wdata.meta := io.fromBpu.resp.bits.meta 513 // ifuRedirect + backendRedirect + commit 514 val ftb_entry_mem = Module(new SyncDataModuleTemplate(new FTBEntry, FtqSize, 1+1+1, 1)) 515 ftb_entry_mem.io.wen(0) := io.fromBpu.resp.bits.lastStage.valid 516 ftb_entry_mem.io.waddr(0) := io.fromBpu.resp.bits.lastStage.ftq_idx.value 517 ftb_entry_mem.io.wdata(0) := io.fromBpu.resp.bits.lastStage.ftb_entry 518 519 520 // multi-write 521 val update_target = Reg(Vec(FtqSize, UInt(VAddrBits.W))) 522 val cfiIndex_vec = Reg(Vec(FtqSize, ValidUndirectioned(UInt(log2Ceil(PredictWidth).W)))) 523 val mispredict_vec = Reg(Vec(FtqSize, Vec(PredictWidth, Bool()))) 524 val pred_stage = Reg(Vec(FtqSize, UInt(2.W))) 525 526 val c_invalid :: c_valid :: c_commited :: Nil = Enum(3) 527 val commitStateQueue = RegInit(VecInit(Seq.fill(FtqSize) { 528 VecInit(Seq.fill(PredictWidth)(c_invalid)) 529 })) 530 531 val f_to_send :: f_sent :: Nil = Enum(2) 532 val entry_fetch_status = RegInit(VecInit(Seq.fill(FtqSize)(f_sent))) 533 534 val h_not_hit :: h_false_hit :: h_hit :: Nil = Enum(3) 535 val entry_hit_status = RegInit(VecInit(Seq.fill(FtqSize)(h_not_hit))) 536 537 538 when (bpu_in_fire) { 539 entry_fetch_status(bpu_in_resp_idx) := f_to_send 540 commitStateQueue(bpu_in_resp_idx) := VecInit(Seq.fill(PredictWidth)(c_invalid)) 541 cfiIndex_vec(bpu_in_resp_idx) := bpu_in_resp.genCfiIndex 542 mispredict_vec(bpu_in_resp_idx) := WireInit(VecInit(Seq.fill(PredictWidth)(false.B))) 543 update_target(bpu_in_resp_idx) := bpu_in_resp.target 544 pred_stage(bpu_in_resp_idx) := bpu_in_stage 545 } 546 547 bpuPtr := bpuPtr + enq_fire 548 ifuPtr := ifuPtr + io.toIfu.req.fire 549 550 // only use ftb result to assign hit status 551 when (bpu_s2_resp.valid) { 552 entry_hit_status(bpu_s2_resp.ftq_idx.value) := Mux(bpu_s2_resp.preds.hit, h_hit, h_not_hit) 553 } 554 555 556 io.toIfu.flushFromBpu.s2.valid := bpu_s2_resp.valid && bpu_s2_resp.hasRedirect 557 io.toIfu.flushFromBpu.s2.bits := bpu_s2_resp.ftq_idx 558 when (bpu_s2_resp.valid && bpu_s2_resp.hasRedirect) { 559 bpuPtr := bpu_s2_resp.ftq_idx + 1.U 560 // only when ifuPtr runs ahead of bpu s2 resp should we recover it 561 when (!isBefore(ifuPtr, bpu_s2_resp.ftq_idx)) { 562 ifuPtr := bpu_s2_resp.ftq_idx 563 } 564 } 565 566 io.toIfu.flushFromBpu.s3.valid := bpu_s3_resp.valid && bpu_s3_resp.hasRedirect 567 io.toIfu.flushFromBpu.s3.bits := bpu_s3_resp.ftq_idx 568 when (bpu_s3_resp.valid && bpu_s3_resp.hasRedirect) { 569 bpuPtr := bpu_s3_resp.ftq_idx + 1.U 570 // only when ifuPtr runs ahead of bpu s2 resp should we recover it 571 when (!isBefore(ifuPtr, bpu_s3_resp.ftq_idx)) { 572 ifuPtr := bpu_s3_resp.ftq_idx 573 } 574 XSError(true.B, "\ns3_redirect mechanism not implemented!\n") 575 } 576 577 XSError(isBefore(bpuPtr, ifuPtr) && !isFull(bpuPtr, ifuPtr), "\nifuPtr is before bpuPtr!\n") 578 579 // **************************************************************** 580 // **************************** to ifu **************************** 581 // **************************************************************** 582 val bpu_in_bypass_buf = RegEnable(ftq_pc_mem.io.wdata(0), enable=bpu_in_fire) 583 val bpu_in_bypass_ptr = RegNext(bpu_in_resp_ptr) 584 val last_cycle_bpu_in = RegNext(bpu_in_fire) 585 val last_cycle_to_ifu_fire = RegNext(io.toIfu.req.fire) 586 587 // read pc and target 588 ftq_pc_mem.io.raddr.init.init.last := ifuPtr.value 589 ftq_pc_mem.io.raddr.init.last := (ifuPtr+1.U).value 590 591 val toIfuReq = Wire(chiselTypeOf(io.toIfu.req)) 592 593 toIfuReq.valid := allowToIfu && entry_fetch_status(ifuPtr.value) === f_to_send && ifuPtr =/= bpuPtr 594 toIfuReq.bits.ftqIdx := ifuPtr 595 toIfuReq.bits.target := update_target(ifuPtr.value) 596 toIfuReq.bits.ftqOffset := cfiIndex_vec(ifuPtr.value) 597 toIfuReq.bits.fallThruError := false.B 598 599 when (last_cycle_bpu_in && bpu_in_bypass_ptr === ifuPtr) { 600 toIfuReq.bits.fromFtqPcBundle(bpu_in_bypass_buf) 601 }.elsewhen (last_cycle_to_ifu_fire) { 602 toIfuReq.bits.fromFtqPcBundle(ftq_pc_mem.io.rdata.init.last) 603 }.otherwise { 604 toIfuReq.bits.fromFtqPcBundle(ftq_pc_mem.io.rdata.init.init.last) 605 } 606 607 io.toIfu.req <> toIfuReq 608 609 // when fall through is smaller in value than start address, there must be a false hit 610 when (toIfuReq.bits.fallThroughError() && entry_hit_status(ifuPtr.value) === h_hit) { 611 when (io.toIfu.req.fire && 612 !(bpu_s2_redirect && bpu_s2_resp.ftq_idx === ifuPtr) && 613 !(bpu_s3_redirect && bpu_s3_resp.ftq_idx === ifuPtr) 614 ) { 615 entry_hit_status(ifuPtr.value) := h_false_hit 616 XSDebug(true.B, "FTB false hit by fallThroughError, startAddr: %x, fallTHru: %x\n", toIfuReq.bits.startAddr, toIfuReq.bits.fallThruAddr) 617 } 618 io.toIfu.req.bits.fallThruAddr := toIfuReq.bits.startAddr + (FetchWidth*4).U 619 io.toIfu.req.bits.fallThruError := true.B 620 XSDebug(true.B, "fallThruError! start:%x, fallThru:%x\n", toIfuReq.bits.startAddr, toIfuReq.bits.fallThruAddr) 621 } 622 623 val ifu_req_should_be_flushed = 624 io.toIfu.flushFromBpu.shouldFlushByStage2(toIfuReq.bits.ftqIdx) || 625 io.toIfu.flushFromBpu.shouldFlushByStage3(toIfuReq.bits.ftqIdx) 626 627 when (io.toIfu.req.fire && !ifu_req_should_be_flushed) { 628 entry_fetch_status(ifuPtr.value) := f_sent 629 } 630 631 632 // ********************************************************************* 633 // **************************** wb from ifu **************************** 634 // ********************************************************************* 635 val pdWb = io.fromIfu.pdWb 636 val pds = pdWb.bits.pd 637 val ifu_wb_valid = pdWb.valid 638 val ifu_wb_idx = pdWb.bits.ftqIdx.value 639 // read ports: commit update 640 val ftq_pd_mem = Module(new SyncDataModuleTemplate(new Ftq_pd_Entry, FtqSize, 1, 1)) 641 ftq_pd_mem.io.wen(0) := ifu_wb_valid 642 ftq_pd_mem.io.waddr(0) := pdWb.bits.ftqIdx.value 643 ftq_pd_mem.io.wdata(0).fromPdWb(pdWb.bits) 644 645 val hit_pd_valid = entry_hit_status(ifu_wb_idx) === h_hit && ifu_wb_valid 646 val hit_pd_mispred = hit_pd_valid && pdWb.bits.misOffset.valid 647 val hit_pd_mispred_reg = RegNext(hit_pd_mispred, init=false.B) 648 val pd_reg = RegEnable(pds, enable = pdWb.valid) 649 val start_pc_reg = RegEnable(pdWb.bits.pc(0), enable = pdWb.valid) 650 val wb_idx_reg = RegEnable(ifu_wb_idx, enable = pdWb.valid) 651 652 when (ifu_wb_valid) { 653 val comm_stq_wen = VecInit(pds.map(_.valid).zip(pdWb.bits.instrRange).map{ 654 case (v, inRange) => v && inRange 655 }) 656 (commitStateQueue(ifu_wb_idx) zip comm_stq_wen).map{ 657 case (qe, v) => when (v) { qe := c_valid } 658 } 659 } 660 661 ifuWbPtr := ifuWbPtr + ifu_wb_valid 662 663 ftb_entry_mem.io.raddr.head := ifu_wb_idx 664 val has_false_hit = WireInit(false.B) 665 when (RegNext(hit_pd_valid)) { 666 // check for false hit 667 val pred_ftb_entry = ftb_entry_mem.io.rdata.head 668 val brSlots = pred_ftb_entry.brSlots 669 val tailSlot = pred_ftb_entry.tailSlot 670 // we check cfis that bpu predicted 671 672 // bpu predicted branches but denied by predecode 673 val br_false_hit = 674 brSlots.map{ 675 s => s.valid && !(pd_reg(s.offset).valid && pd_reg(s.offset).isBr) 676 }.reduce(_||_) || 677 (shareTailSlot.B && tailSlot.valid && pred_ftb_entry.tailSlot.sharing && 678 !(pd_reg(tailSlot.offset).valid && pd_reg(tailSlot.offset).isBr)) 679 680 val jmpOffset = tailSlot.offset 681 val jmp_pd = pd_reg(jmpOffset) 682 val jal_false_hit = pred_ftb_entry.jmpValid && 683 ((pred_ftb_entry.isJal && !(jmp_pd.valid && jmp_pd.isJal)) || 684 (pred_ftb_entry.isJalr && !(jmp_pd.valid && jmp_pd.isJalr)) || 685 (pred_ftb_entry.isCall && !(jmp_pd.valid && jmp_pd.isCall)) || 686 (pred_ftb_entry.isRet && !(jmp_pd.valid && jmp_pd.isRet)) 687 ) 688 689 has_false_hit := br_false_hit || jal_false_hit || hit_pd_mispred_reg 690 XSDebug(has_false_hit, "FTB false hit by br or jal or hit_pd, startAddr: %x\n", pdWb.bits.pc(0)) 691 692 // assert(!has_false_hit) 693 } 694 695 when (has_false_hit) { 696 entry_hit_status(wb_idx_reg) := h_false_hit 697 } 698 699 700 // ********************************************************************** 701 // **************************** backend read **************************** 702 // ********************************************************************** 703 704 // pc reads 705 for ((req, i) <- io.toBackend.pc_reads.zipWithIndex) { 706 ftq_pc_mem.io.raddr(i) := req.ptr.value 707 req.data := ftq_pc_mem.io.rdata(i).getPc(RegNext(req.offset)) 708 } 709 // target read 710 io.toBackend.target_read.data := RegNext(update_target(io.toBackend.target_read.ptr.value)) 711 712 // ******************************************************************************* 713 // **************************** redirect from backend **************************** 714 // ******************************************************************************* 715 716 // redirect read cfiInfo, couples to redirectGen s2 717 ftq_redirect_sram.io.ren.init.last := io.fromBackend.stage2Redirect.valid 718 ftq_redirect_sram.io.raddr.init.last := io.fromBackend.stage2Redirect.bits.ftqIdx.value 719 720 ftb_entry_mem.io.raddr.init.last := io.fromBackend.stage2Redirect.bits.ftqIdx.value 721 722 val stage3CfiInfo = ftq_redirect_sram.io.rdata.init.last 723 val fromBackendRedirect = WireInit(io.fromBackend.stage3Redirect) 724 val backendRedirectCfi = fromBackendRedirect.bits.cfiUpdate 725 backendRedirectCfi.fromFtqRedirectSram(stage3CfiInfo) 726 727 val r_ftb_entry = ftb_entry_mem.io.rdata.init.last 728 val r_ftqOffset = fromBackendRedirect.bits.ftqOffset 729 730 when (entry_hit_status(fromBackendRedirect.bits.ftqIdx.value) === h_hit) { 731 backendRedirectCfi.shift := PopCount(r_ftb_entry.getBrMaskByOffset(r_ftqOffset)) +& 732 (backendRedirectCfi.pd.isBr && !r_ftb_entry.brIsSaved(r_ftqOffset) && 733 !r_ftb_entry.newBrCanNotInsert(r_ftqOffset)) 734 735 backendRedirectCfi.addIntoHist := backendRedirectCfi.pd.isBr && (r_ftb_entry.brIsSaved(r_ftqOffset) || 736 !r_ftb_entry.newBrCanNotInsert(r_ftqOffset)) 737 }.otherwise { 738 backendRedirectCfi.shift := (backendRedirectCfi.pd.isBr && backendRedirectCfi.taken).asUInt 739 backendRedirectCfi.addIntoHist := backendRedirectCfi.pd.isBr.asUInt 740 } 741 742 743 // *************************************************************************** 744 // **************************** redirect from ifu **************************** 745 // *************************************************************************** 746 val fromIfuRedirect = WireInit(0.U.asTypeOf(Valid(new Redirect))) 747 fromIfuRedirect.valid := pdWb.valid && pdWb.bits.misOffset.valid && !backendFlush 748 fromIfuRedirect.bits.ftqIdx := pdWb.bits.ftqIdx 749 fromIfuRedirect.bits.ftqOffset := pdWb.bits.misOffset.bits 750 fromIfuRedirect.bits.level := RedirectLevel.flushAfter 751 752 val ifuRedirectCfiUpdate = fromIfuRedirect.bits.cfiUpdate 753 ifuRedirectCfiUpdate.pc := pdWb.bits.pc(pdWb.bits.misOffset.bits) 754 ifuRedirectCfiUpdate.pd := pdWb.bits.pd(pdWb.bits.misOffset.bits) 755 ifuRedirectCfiUpdate.predTaken := cfiIndex_vec(pdWb.bits.ftqIdx.value).valid 756 ifuRedirectCfiUpdate.target := pdWb.bits.target 757 ifuRedirectCfiUpdate.taken := pdWb.bits.cfiOffset.valid 758 ifuRedirectCfiUpdate.isMisPred := pdWb.bits.misOffset.valid 759 760 val ifuRedirectReg = RegNext(fromIfuRedirect, init=0.U.asTypeOf(Valid(new Redirect))) 761 val ifuRedirectToBpu = WireInit(ifuRedirectReg) 762 ifuFlush := fromIfuRedirect.valid || ifuRedirectToBpu.valid 763 764 ftq_redirect_sram.io.ren.head := fromIfuRedirect.valid 765 ftq_redirect_sram.io.raddr.head := fromIfuRedirect.bits.ftqIdx.value 766 767 ftb_entry_mem.io.raddr.head := fromIfuRedirect.bits.ftqIdx.value 768 769 val toBpuCfi = ifuRedirectToBpu.bits.cfiUpdate 770 toBpuCfi.fromFtqRedirectSram(ftq_redirect_sram.io.rdata.head) 771 when (ifuRedirectReg.bits.cfiUpdate.pd.isRet) { 772 toBpuCfi.target := toBpuCfi.rasEntry.retAddr 773 } 774 775 // ********************************************************************* 776 // **************************** wb from exu **************************** 777 // ********************************************************************* 778 779 def extractRedirectInfo(wb: Valid[Redirect]) = { 780 val ftqIdx = wb.bits.ftqIdx.value 781 val ftqOffset = wb.bits.ftqOffset 782 val taken = wb.bits.cfiUpdate.taken 783 val mispred = wb.bits.cfiUpdate.isMisPred 784 (wb.valid, ftqIdx, ftqOffset, taken, mispred) 785 } 786 787 // fix mispredict entry 788 val lastIsMispredict = RegNext( 789 stage2Redirect.valid && stage2Redirect.bits.level === RedirectLevel.flushAfter, init = false.B 790 ) 791 792 def updateCfiInfo(redirect: Valid[Redirect], isBackend: Boolean = true) = { 793 val (r_valid, r_idx, r_offset, r_taken, r_mispred) = extractRedirectInfo(redirect) 794 val cfiIndex_bits_wen = r_valid && r_taken && r_offset < cfiIndex_vec(r_idx).bits 795 val cfiIndex_valid_wen = r_valid && r_offset === cfiIndex_vec(r_idx).bits 796 when (cfiIndex_bits_wen || cfiIndex_valid_wen) { 797 cfiIndex_vec(r_idx).valid := cfiIndex_bits_wen || cfiIndex_valid_wen && r_taken 798 } 799 when (cfiIndex_bits_wen) { 800 cfiIndex_vec(r_idx).bits := r_offset 801 } 802 update_target(r_idx) := redirect.bits.cfiUpdate.target 803 if (isBackend) { 804 mispredict_vec(r_idx)(r_offset) := r_mispred 805 } 806 } 807 808 when(stage3Redirect.valid && lastIsMispredict) { 809 updateCfiInfo(stage3Redirect) 810 }.elsewhen (ifuRedirectToBpu.valid) { 811 updateCfiInfo(ifuRedirectToBpu, isBackend=false) 812 } 813 814 // *********************************************************************************** 815 // **************************** flush ptr and state queue **************************** 816 // *********************************************************************************** 817 818 class RedirectInfo extends Bundle { 819 val valid = Bool() 820 val ftqIdx = new FtqPtr 821 val ftqOffset = UInt(log2Ceil(PredictWidth).W) 822 val flushItSelf = Bool() 823 def apply(redirect: Valid[Redirect]) = { 824 this.valid := redirect.valid 825 this.ftqIdx := redirect.bits.ftqIdx 826 this.ftqOffset := redirect.bits.ftqOffset 827 this.flushItSelf := RedirectLevel.flushItself(redirect.bits.level) 828 this 829 } 830 } 831 val redirectVec = Wire(Vec(3, new RedirectInfo)) 832 val robRedirect = robFlush 833 834 redirectVec.zip(Seq(robRedirect, stage2Redirect, fromIfuRedirect)).map { 835 case (ve, r) => ve(r) 836 } 837 838 // when redirect, we should reset ptrs and status queues 839 when(redirectVec.map(r => r.valid).reduce(_||_)){ 840 val r = PriorityMux(redirectVec.map(r => (r.valid -> r))) 841 val notIfu = redirectVec.dropRight(1).map(r => r.valid).reduce(_||_) 842 val (idx, offset, flushItSelf) = (r.ftqIdx, r.ftqOffset, r.flushItSelf) 843 val next = idx + 1.U 844 bpuPtr := next 845 ifuPtr := next 846 ifuWbPtr := next 847 when (notIfu) { 848 commitStateQueue(idx.value).zipWithIndex.foreach({ case (s, i) => 849 when(i.U > offset || i.U === offset && flushItSelf){ 850 s := c_invalid 851 } 852 }) 853 } 854 } 855 856 // only the valid bit is actually needed 857 io.toIfu.redirect.bits := Mux(robFlush.valid, robFlush.bits, stage2Redirect.bits) 858 io.toIfu.redirect.valid := stage2Flush 859 860 // commit 861 for (c <- io.fromBackend.rob_commits) { 862 when(c.valid) { 863 commitStateQueue(c.bits.ftqIdx.value)(c.bits.ftqOffset) := c_commited 864 // TODO: remove this 865 // For instruction fusions, we also update the next instruction 866 when (c.bits.commitType === 4.U) { 867 commitStateQueue(c.bits.ftqIdx.value)(c.bits.ftqOffset + 1.U) := c_commited 868 }.elsewhen(c.bits.commitType === 5.U) { 869 commitStateQueue(c.bits.ftqIdx.value)(c.bits.ftqOffset + 2.U) := c_commited 870 }.elsewhen(c.bits.commitType === 6.U) { 871 val index = (c.bits.ftqIdx + 1.U).value 872 commitStateQueue(index)(0) := c_commited 873 }.elsewhen(c.bits.commitType === 7.U) { 874 val index = (c.bits.ftqIdx + 1.U).value 875 commitStateQueue(index)(1) := c_commited 876 } 877 } 878 } 879 880 // **************************************************************** 881 // **************************** to bpu **************************** 882 // **************************************************************** 883 884 io.toBpu.redirect <> Mux(fromBackendRedirect.valid, fromBackendRedirect, ifuRedirectToBpu) 885 886 val may_have_stall_from_bpu = RegInit(false.B) 887 val canCommit = commPtr =/= ifuWbPtr && !may_have_stall_from_bpu && 888 Cat(commitStateQueue(commPtr.value).map(s => { 889 s === c_invalid || s === c_commited 890 })).andR() 891 892 // commit reads 893 ftq_pc_mem.io.raddr.last := commPtr.value 894 val commit_pc_bundle = ftq_pc_mem.io.rdata.last 895 ftq_pd_mem.io.raddr.last := commPtr.value 896 val commit_pd = ftq_pd_mem.io.rdata.last 897 ftq_redirect_sram.io.ren.last := canCommit 898 ftq_redirect_sram.io.raddr.last := commPtr.value 899 val commit_spec_meta = ftq_redirect_sram.io.rdata.last 900 ftq_meta_1r_sram.io.ren(0) := canCommit 901 ftq_meta_1r_sram.io.raddr(0) := commPtr.value 902 val commit_meta = ftq_meta_1r_sram.io.rdata(0) 903 ftb_entry_mem.io.raddr.last := commPtr.value 904 val commit_ftb_entry = ftb_entry_mem.io.rdata.last 905 906 // need one cycle to read mem and srams 907 val do_commit_ptr = RegNext(commPtr) 908 val do_commit = RegNext(canCommit, init=false.B) 909 when (canCommit) { commPtr := commPtr + 1.U } 910 val commit_state = RegNext(commitStateQueue(commPtr.value)) 911 val can_commit_cfi = WireInit(cfiIndex_vec(commPtr.value)) 912 when (commitStateQueue(commPtr.value)(can_commit_cfi.bits) =/= c_commited) { 913 can_commit_cfi.valid := false.B 914 } 915 val commit_cfi = RegNext(can_commit_cfi) 916 917 val commit_mispredict = VecInit((RegNext(mispredict_vec(commPtr.value)) zip commit_state).map { 918 case (mis, state) => mis && state === c_commited 919 }) 920 val can_commit_hit = entry_hit_status(commPtr.value) 921 val commit_hit = RegNext(can_commit_hit) 922 val commit_target = RegNext(update_target(commPtr.value)) 923 val commit_valid = commit_hit === h_hit || commit_cfi.valid // hit or taken 924 925 val to_bpu_hit = can_commit_hit === h_hit || can_commit_hit === h_false_hit 926 may_have_stall_from_bpu := can_commit_cfi.valid && !to_bpu_hit && !may_have_stall_from_bpu 927 928 io.toBpu.update := DontCare 929 io.toBpu.update.valid := commit_valid && do_commit 930 val update = io.toBpu.update.bits 931 update.false_hit := commit_hit === h_false_hit 932 update.pc := commit_pc_bundle.startAddr 933 update.preds.hit := commit_hit === h_hit || commit_hit === h_false_hit 934 update.meta := commit_meta.meta 935 update.full_target := commit_target 936 update.fromFtqRedirectSram(commit_spec_meta) 937 938 val commit_real_hit = commit_hit === h_hit 939 val update_ftb_entry = update.ftb_entry 940 941 val ftbEntryGen = Module(new FTBEntryGen).io 942 ftbEntryGen.start_addr := commit_pc_bundle.startAddr 943 ftbEntryGen.old_entry := commit_ftb_entry 944 ftbEntryGen.pd := commit_pd 945 ftbEntryGen.cfiIndex := commit_cfi 946 ftbEntryGen.target := commit_target 947 ftbEntryGen.hit := commit_real_hit 948 ftbEntryGen.mispredict_vec := commit_mispredict 949 950 update_ftb_entry := ftbEntryGen.new_entry 951 update.new_br_insert_pos := ftbEntryGen.new_br_insert_pos 952 update.mispred_mask := ftbEntryGen.mispred_mask 953 update.old_entry := ftbEntryGen.is_old_entry 954 update.preds.br_taken_mask := ftbEntryGen.taken_mask 955 956 // ****************************************************************************** 957 // **************************** commit perf counters **************************** 958 // ****************************************************************************** 959 960 val commit_inst_mask = VecInit(commit_state.map(c => c === c_commited && do_commit)).asUInt 961 val commit_mispred_mask = commit_mispredict.asUInt 962 val commit_not_mispred_mask = ~commit_mispred_mask 963 964 val commit_br_mask = commit_pd.brMask.asUInt 965 val commit_jmp_mask = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.jmpInfo.valid.asTypeOf(UInt(1.W))) 966 val commit_cfi_mask = (commit_br_mask | commit_jmp_mask) 967 968 val mbpInstrs = commit_inst_mask & commit_cfi_mask 969 970 val mbpRights = mbpInstrs & commit_not_mispred_mask 971 val mbpWrongs = mbpInstrs & commit_mispred_mask 972 973 io.bpuInfo.bpRight := PopCount(mbpRights) 974 io.bpuInfo.bpWrong := PopCount(mbpWrongs) 975 976 // Cfi Info 977 for (i <- 0 until PredictWidth) { 978 val pc = commit_pc_bundle.startAddr + (i * instBytes).U 979 val v = commit_state(i) === c_commited 980 val isBr = commit_pd.brMask(i) 981 val isJmp = commit_pd.jmpInfo.valid && commit_pd.jmpOffset === i.U 982 val isCfi = isBr || isJmp 983 val isTaken = commit_cfi.valid && commit_cfi.bits === i.U 984 val misPred = commit_mispredict(i) 985 // val ghist = commit_spec_meta.ghist.predHist 986 val histPtr = commit_spec_meta.histPtr 987 val predCycle = commit_meta.meta(63, 0) 988 val target = commit_target 989 990 val brIdx = OHToUInt(Reverse(Cat(update_ftb_entry.brValids.zip(update_ftb_entry.brOffset).map{case(v, offset) => v && offset === i.U}))) 991 val inFtbEntry = update_ftb_entry.brValids.zip(update_ftb_entry.brOffset).map{case(v, offset) => v && offset === i.U}.reduce(_||_) 992 val addIntoHist = ((commit_hit === h_hit) && inFtbEntry) || ((!(commit_hit === h_hit) && i.U === commit_cfi.bits && isBr && commit_cfi.valid)) 993 XSDebug(v && do_commit && isCfi, p"cfi_update: isBr(${isBr}) pc(${Hexadecimal(pc)}) " + 994 p"taken(${isTaken}) mispred(${misPred}) cycle($predCycle) hist(${histPtr.value}) " + 995 p"startAddr(${Hexadecimal(commit_pc_bundle.startAddr)}) AddIntoHist(${addIntoHist}) " + 996 p"brInEntry(${inFtbEntry}) brIdx(${brIdx}) target(${Hexadecimal(target)})\n") 997 } 998 999 val enq = io.fromBpu.resp 1000 val perf_redirect = io.fromBackend.stage2Redirect 1001 1002 XSPerfAccumulate("entry", validEntries) 1003 XSPerfAccumulate("bpu_to_ftq_stall", enq.valid && !enq.ready) 1004 XSPerfAccumulate("mispredictRedirect", perf_redirect.valid && RedirectLevel.flushAfter === perf_redirect.bits.level) 1005 XSPerfAccumulate("replayRedirect", perf_redirect.valid && RedirectLevel.flushItself(perf_redirect.bits.level)) 1006 XSPerfAccumulate("predecodeRedirect", fromIfuRedirect.valid) 1007 1008 XSPerfAccumulate("to_ifu_bubble", io.toIfu.req.ready && !io.toIfu.req.valid) 1009 1010 XSPerfAccumulate("to_ifu_stall", io.toIfu.req.valid && !io.toIfu.req.ready) 1011 XSPerfAccumulate("from_bpu_real_bubble", !enq.valid && enq.ready && allowBpuIn) 1012 XSPerfAccumulate("bpu_to_ftq_bubble", bpuPtr === ifuPtr) 1013 1014 val from_bpu = io.fromBpu.resp.bits 1015 def in_entry_len_map_gen(resp: BranchPredictionBundle)(stage: String) = { 1016 val entry_len = (resp.ftb_entry.getFallThrough(resp.pc) - resp.pc) >> instOffsetBits 1017 val entry_len_recording_vec = (1 to PredictWidth+1).map(i => entry_len === i.U) 1018 val entry_len_map = (1 to PredictWidth+1).map(i => 1019 f"${stage}_ftb_entry_len_$i" -> (entry_len_recording_vec(i-1) && resp.valid) 1020 ).foldLeft(Map[String, UInt]())(_+_) 1021 entry_len_map 1022 } 1023 val s1_entry_len_map = in_entry_len_map_gen(from_bpu.s1)("s1") 1024 val s2_entry_len_map = in_entry_len_map_gen(from_bpu.s2)("s2") 1025 val s3_entry_len_map = in_entry_len_map_gen(from_bpu.s3)("s3") 1026 1027 val to_ifu = io.toIfu.req.bits 1028 val to_ifu_entry_len = (to_ifu.fallThruAddr - to_ifu.startAddr) >> instOffsetBits 1029 val to_ifu_entry_len_recording_vec = (1 to PredictWidth+1).map(i => to_ifu_entry_len === i.U) 1030 val to_ifu_entry_len_map = (1 to PredictWidth+1).map(i => 1031 f"to_ifu_ftb_entry_len_$i" -> (to_ifu_entry_len_recording_vec(i-1) && io.toIfu.req.fire) 1032 ).foldLeft(Map[String, UInt]())(_+_) 1033 1034 1035 1036 val commit_num_inst_recording_vec = (1 to PredictWidth).map(i => PopCount(commit_inst_mask) === i.U) 1037 val commit_num_inst_map = (1 to PredictWidth).map(i => 1038 f"commit_num_inst_$i" -> (commit_num_inst_recording_vec(i-1) && do_commit) 1039 ).foldLeft(Map[String, UInt]())(_+_) 1040 1041 1042 1043 val commit_jal_mask = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.hasJal.asTypeOf(UInt(1.W))) 1044 val commit_jalr_mask = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.hasJalr.asTypeOf(UInt(1.W))) 1045 val commit_call_mask = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.hasCall.asTypeOf(UInt(1.W))) 1046 val commit_ret_mask = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.hasRet.asTypeOf(UInt(1.W))) 1047 1048 1049 val mbpBRights = mbpRights & commit_br_mask 1050 val mbpJRights = mbpRights & commit_jal_mask 1051 val mbpIRights = mbpRights & commit_jalr_mask 1052 val mbpCRights = mbpRights & commit_call_mask 1053 val mbpRRights = mbpRights & commit_ret_mask 1054 1055 val mbpBWrongs = mbpWrongs & commit_br_mask 1056 val mbpJWrongs = mbpWrongs & commit_jal_mask 1057 val mbpIWrongs = mbpWrongs & commit_jalr_mask 1058 val mbpCWrongs = mbpWrongs & commit_call_mask 1059 val mbpRWrongs = mbpWrongs & commit_ret_mask 1060 1061 val commit_pred_stage = RegNext(pred_stage(commPtr.value)) 1062 1063 def pred_stage_map(src: UInt, name: String) = { 1064 (0 until numBpStages).map(i => 1065 f"${name}_stage_${i+1}" -> PopCount(src.asBools.map(_ && commit_pred_stage === BP_STAGES(i))) 1066 ).foldLeft(Map[String, UInt]())(_+_) 1067 } 1068 1069 val mispred_stage_map = pred_stage_map(mbpWrongs, "mispredict") 1070 val br_mispred_stage_map = pred_stage_map(mbpBWrongs, "br_mispredict") 1071 val jalr_mispred_stage_map = pred_stage_map(mbpIWrongs, "jalr_mispredict") 1072 val correct_stage_map = pred_stage_map(mbpRights, "correct") 1073 val br_correct_stage_map = pred_stage_map(mbpBRights, "br_correct") 1074 val jalr_correct_stage_map = pred_stage_map(mbpIRights, "jalr_correct") 1075 1076 val update_valid = io.toBpu.update.valid 1077 def u(cond: Bool) = update_valid && cond 1078 val ftb_false_hit = u(update.false_hit) 1079 // assert(!ftb_false_hit) 1080 val ftb_hit = u(commit_hit === h_hit) 1081 1082 val ftb_new_entry = u(ftbEntryGen.is_init_entry) 1083 val ftb_new_entry_only_br = ftb_new_entry && !update.ftb_entry.jmpValid 1084 val ftb_new_entry_only_jmp = ftb_new_entry && !update.ftb_entry.brValids(0) 1085 val ftb_new_entry_has_br_and_jmp = ftb_new_entry && update.ftb_entry.brValids(0) && update.ftb_entry.jmpValid 1086 1087 val ftb_old_entry = u(ftbEntryGen.is_old_entry) 1088 1089 val ftb_modified_entry = u(ftbEntryGen.is_new_br || ftbEntryGen.is_jalr_target_modified || ftbEntryGen.is_always_taken_modified) 1090 val ftb_modified_entry_new_br = u(ftbEntryGen.is_new_br) 1091 val ftb_modified_entry_jalr_target_modified = u(ftbEntryGen.is_jalr_target_modified) 1092 val ftb_modified_entry_br_full = ftb_modified_entry && ftbEntryGen.is_br_full 1093 val ftb_modified_entry_always_taken = ftb_modified_entry && ftbEntryGen.is_always_taken_modified 1094 1095 val ftb_entry_len = (ftbEntryGen.new_entry.getFallThrough(update.pc) - update.pc) >> instOffsetBits 1096 val ftb_entry_len_recording_vec = (1 to PredictWidth+1).map(i => ftb_entry_len === i.U) 1097 val ftb_init_entry_len_map = (1 to PredictWidth+1).map(i => 1098 f"ftb_init_entry_len_$i" -> (ftb_entry_len_recording_vec(i-1) && ftb_new_entry) 1099 ).foldLeft(Map[String, UInt]())(_+_) 1100 val ftb_modified_entry_len_map = (1 to PredictWidth+1).map(i => 1101 f"ftb_modified_entry_len_$i" -> (ftb_entry_len_recording_vec(i-1) && ftb_modified_entry) 1102 ).foldLeft(Map[String, UInt]())(_+_) 1103 1104 val ftq_occupancy_map = (0 to FtqSize).map(i => 1105 f"ftq_has_entry_$i" ->( validEntries === i.U) 1106 ).foldLeft(Map[String, UInt]())(_+_) 1107 1108 val perfCountsMap = Map( 1109 "BpInstr" -> PopCount(mbpInstrs), 1110 "BpBInstr" -> PopCount(mbpBRights | mbpBWrongs), 1111 "BpRight" -> PopCount(mbpRights), 1112 "BpWrong" -> PopCount(mbpWrongs), 1113 "BpBRight" -> PopCount(mbpBRights), 1114 "BpBWrong" -> PopCount(mbpBWrongs), 1115 "BpJRight" -> PopCount(mbpJRights), 1116 "BpJWrong" -> PopCount(mbpJWrongs), 1117 "BpIRight" -> PopCount(mbpIRights), 1118 "BpIWrong" -> PopCount(mbpIWrongs), 1119 "BpCRight" -> PopCount(mbpCRights), 1120 "BpCWrong" -> PopCount(mbpCWrongs), 1121 "BpRRight" -> PopCount(mbpRRights), 1122 "BpRWrong" -> PopCount(mbpRWrongs), 1123 1124 "ftb_false_hit" -> PopCount(ftb_false_hit), 1125 "ftb_hit" -> PopCount(ftb_hit), 1126 "ftb_new_entry" -> PopCount(ftb_new_entry), 1127 "ftb_new_entry_only_br" -> PopCount(ftb_new_entry_only_br), 1128 "ftb_new_entry_only_jmp" -> PopCount(ftb_new_entry_only_jmp), 1129 "ftb_new_entry_has_br_and_jmp" -> PopCount(ftb_new_entry_has_br_and_jmp), 1130 "ftb_old_entry" -> PopCount(ftb_old_entry), 1131 "ftb_modified_entry" -> PopCount(ftb_modified_entry), 1132 "ftb_modified_entry_new_br" -> PopCount(ftb_modified_entry_new_br), 1133 "ftb_jalr_target_modified" -> PopCount(ftb_modified_entry_jalr_target_modified), 1134 "ftb_modified_entry_br_full" -> PopCount(ftb_modified_entry_br_full), 1135 "ftb_modified_entry_always_taken" -> PopCount(ftb_modified_entry_always_taken) 1136 ) ++ ftb_init_entry_len_map ++ ftb_modified_entry_len_map ++ s1_entry_len_map ++ 1137 s2_entry_len_map ++ s3_entry_len_map ++ 1138 to_ifu_entry_len_map ++ commit_num_inst_map ++ ftq_occupancy_map ++ 1139 mispred_stage_map ++ br_mispred_stage_map ++ jalr_mispred_stage_map ++ 1140 correct_stage_map ++ br_correct_stage_map ++ jalr_correct_stage_map 1141 1142 for((key, value) <- perfCountsMap) { 1143 XSPerfAccumulate(key, value) 1144 } 1145 1146 // --------------------------- Debug -------------------------------- 1147 // XSDebug(enq_fire, p"enq! " + io.fromBpu.resp.bits.toPrintable) 1148 XSDebug(io.toIfu.req.fire, p"fire to ifu " + io.toIfu.req.bits.toPrintable) 1149 XSDebug(do_commit, p"deq! [ptr] $do_commit_ptr\n") 1150 XSDebug(true.B, p"[bpuPtr] $bpuPtr, [ifuPtr] $ifuPtr, [ifuWbPtr] $ifuWbPtr [commPtr] $commPtr\n") 1151 XSDebug(true.B, p"[in] v:${io.fromBpu.resp.valid} r:${io.fromBpu.resp.ready} " + 1152 p"[out] v:${io.toIfu.req.valid} r:${io.toIfu.req.ready}\n") 1153 XSDebug(do_commit, p"[deq info] cfiIndex: $commit_cfi, $commit_pc_bundle, target: ${Hexadecimal(commit_target)}\n") 1154 1155 // def ubtbCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = { 1156 // commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map { 1157 // case (((valid, pd), ans), taken) => 1158 // Mux(valid && pd.isBr, 1159 // isWrong ^ Mux(ans.hit.asBool, 1160 // Mux(ans.taken.asBool, taken && ans.target === commitEntry.target, 1161 // !taken), 1162 // !taken), 1163 // false.B) 1164 // } 1165 // } 1166 1167 // def btbCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = { 1168 // commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map { 1169 // case (((valid, pd), ans), taken) => 1170 // Mux(valid && pd.isBr, 1171 // isWrong ^ Mux(ans.hit.asBool, 1172 // Mux(ans.taken.asBool, taken && ans.target === commitEntry.target, 1173 // !taken), 1174 // !taken), 1175 // false.B) 1176 // } 1177 // } 1178 1179 // def tageCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = { 1180 // commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map { 1181 // case (((valid, pd), ans), taken) => 1182 // Mux(valid && pd.isBr, 1183 // isWrong ^ (ans.taken.asBool === taken), 1184 // false.B) 1185 // } 1186 // } 1187 1188 // def loopCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = { 1189 // commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map { 1190 // case (((valid, pd), ans), taken) => 1191 // Mux(valid && (pd.isBr) && ans.hit.asBool, 1192 // isWrong ^ (!taken), 1193 // false.B) 1194 // } 1195 // } 1196 1197 // def rasCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = { 1198 // commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map { 1199 // case (((valid, pd), ans), taken) => 1200 // Mux(valid && pd.isRet.asBool /*&& taken*/ && ans.hit.asBool, 1201 // isWrong ^ (ans.target === commitEntry.target), 1202 // false.B) 1203 // } 1204 // } 1205 1206 // val ubtbRights = ubtbCheck(commitEntry, commitEntry.metas.map(_.ubtbAns), false.B) 1207 // val ubtbWrongs = ubtbCheck(commitEntry, commitEntry.metas.map(_.ubtbAns), true.B) 1208 // // btb and ubtb pred jal and jalr as well 1209 // val btbRights = btbCheck(commitEntry, commitEntry.metas.map(_.btbAns), false.B) 1210 // val btbWrongs = btbCheck(commitEntry, commitEntry.metas.map(_.btbAns), true.B) 1211 // val tageRights = tageCheck(commitEntry, commitEntry.metas.map(_.tageAns), false.B) 1212 // val tageWrongs = tageCheck(commitEntry, commitEntry.metas.map(_.tageAns), true.B) 1213 1214 // val loopRights = loopCheck(commitEntry, commitEntry.metas.map(_.loopAns), false.B) 1215 // val loopWrongs = loopCheck(commitEntry, commitEntry.metas.map(_.loopAns), true.B) 1216 1217 // val rasRights = rasCheck(commitEntry, commitEntry.metas.map(_.rasAns), false.B) 1218 // val rasWrongs = rasCheck(commitEntry, commitEntry.metas.map(_.rasAns), true.B) 1219 val perfinfo = IO(new Bundle(){ 1220 val perfEvents = Output(new PerfEventsBundle(22)) 1221 }) 1222 val perfEvents = Seq( 1223 ("bpu_s2_redirect ", bpu_s2_redirect ), 1224 ("bpu_s3_redirect ", bpu_s3_redirect ), 1225 ("bpu_to_ftq_stall ", enq.valid && ~enq.ready ), 1226 ("mispredictRedirect ", perf_redirect.valid && RedirectLevel.flushAfter === perf_redirect.bits.level), 1227 ("replayRedirect ", perf_redirect.valid && RedirectLevel.flushItself(perf_redirect.bits.level) ), 1228 ("predecodeRedirect ", fromIfuRedirect.valid ), 1229 ("to_ifu_bubble ", io.toIfu.req.ready && !io.toIfu.req.valid ), 1230 ("from_bpu_real_bubble ", !enq.valid && enq.ready && allowBpuIn ), 1231 ("BpInstr ", PopCount(mbpInstrs) ), 1232 ("BpBInstr ", PopCount(mbpBRights | mbpBWrongs) ), 1233 ("BpRight ", PopCount(mbpRights) ), 1234 ("BpWrong ", PopCount(mbpWrongs) ), 1235 ("BpBRight ", PopCount(mbpBRights) ), 1236 ("BpBWrong ", PopCount(mbpBWrongs) ), 1237 ("BpJRight ", PopCount(mbpJRights) ), 1238 ("BpJWrong ", PopCount(mbpJWrongs) ), 1239 ("BpIRight ", PopCount(mbpIRights) ), 1240 ("BpIWrong ", PopCount(mbpIWrongs) ), 1241 ("BpCRight ", PopCount(mbpCRights) ), 1242 ("BpCWrong ", PopCount(mbpCWrongs) ), 1243 ("BpRRight ", PopCount(mbpRRights) ), 1244 ("BpRWrong ", PopCount(mbpRWrongs) ), 1245 ("ftb_false_hit ", PopCount(ftb_false_hit) ), 1246 ("ftb_hit ", PopCount(ftb_hit) ), 1247 ) 1248 1249 for (((perf_out,(perf_name,perf)),i) <- perfinfo.perfEvents.perf_events.zip(perfEvents).zipWithIndex) { 1250 perf_out.incr_step := RegNext(perf) 1251 } 1252} 1253