1/*************************************************************************************** 2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences 3* Copyright (c) 2020-2021 Peng Cheng Laboratory 4* 5* XiangShan is licensed under Mulan PSL v2. 6* You can use this software according to the terms and conditions of the Mulan PSL v2. 7* You may obtain a copy of Mulan PSL v2 at: 8* http://license.coscl.org.cn/MulanPSL2 9* 10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 13* 14* See the Mulan PSL v2 for more details. 15***************************************************************************************/ 16 17package xiangshan.frontend 18 19import chipsalliance.rocketchip.config.Parameters 20import chisel3._ 21import chisel3.util._ 22import utils.{AsyncDataModuleTemplate, CircularQueuePtr, DataModuleTemplate, HasCircularQueuePtrHelper, SRAMTemplate, SyncDataModuleTemplate, XSDebug, XSPerfAccumulate, PerfBundle, PerfEventsBundle, XSError} 23import xiangshan._ 24import scala.tools.nsc.doc.model.Val 25import utils.{ParallelPriorityMux, ParallelPriorityEncoder} 26import xiangshan.backend.{CtrlToFtqIO} 27import firrtl.annotations.MemoryLoadFileType 28 29class FtqPtr(implicit p: Parameters) extends CircularQueuePtr[FtqPtr]( 30 p => p(XSCoreParamsKey).FtqSize 31){ 32 override def cloneType = (new FtqPtr).asInstanceOf[this.type] 33} 34 35object FtqPtr { 36 def apply(f: Bool, v: UInt)(implicit p: Parameters): FtqPtr = { 37 val ptr = Wire(new FtqPtr) 38 ptr.flag := f 39 ptr.value := v 40 ptr 41 } 42 def inverse(ptr: FtqPtr)(implicit p: Parameters): FtqPtr = { 43 apply(!ptr.flag, ptr.value) 44 } 45} 46 47class FtqNRSRAM[T <: Data](gen: T, numRead: Int)(implicit p: Parameters) extends XSModule { 48 49 val io = IO(new Bundle() { 50 val raddr = Input(Vec(numRead, UInt(log2Up(FtqSize).W))) 51 val ren = Input(Vec(numRead, Bool())) 52 val rdata = Output(Vec(numRead, gen)) 53 val waddr = Input(UInt(log2Up(FtqSize).W)) 54 val wen = Input(Bool()) 55 val wdata = Input(gen) 56 }) 57 58 for(i <- 0 until numRead){ 59 val sram = Module(new SRAMTemplate(gen, FtqSize)) 60 sram.io.r.req.valid := io.ren(i) 61 sram.io.r.req.bits.setIdx := io.raddr(i) 62 io.rdata(i) := sram.io.r.resp.data(0) 63 sram.io.w.req.valid := io.wen 64 sram.io.w.req.bits.setIdx := io.waddr 65 sram.io.w.req.bits.data := VecInit(io.wdata) 66 } 67 68} 69 70class Ftq_RF_Components(implicit p: Parameters) extends XSBundle with BPUUtils { 71 // TODO: move pftAddr, oversize, carry to another mem 72 val startAddr = UInt(VAddrBits.W) 73 val nextRangeAddr = UInt(VAddrBits.W) 74 val pftAddr = UInt((log2Ceil(PredictWidth)+1).W) 75 val isNextMask = Vec(PredictWidth, Bool()) 76 val oversize = Bool() 77 val carry = Bool() 78 def getPc(offset: UInt) = { 79 def getHigher(pc: UInt) = pc(VAddrBits-1, log2Ceil(PredictWidth)+instOffsetBits+1) 80 def getOffset(pc: UInt) = pc(log2Ceil(PredictWidth)+instOffsetBits, instOffsetBits) 81 Cat(getHigher(Mux(isNextMask(offset) && startAddr(log2Ceil(PredictWidth)+instOffsetBits), nextRangeAddr, startAddr)), 82 getOffset(startAddr)+offset, 0.U(instOffsetBits.W)) 83 } 84 def getFallThrough() = { 85 def getHigher(pc: UInt) = pc.head(VAddrBits-log2Ceil(PredictWidth)-instOffsetBits-1) 86 val startHigher = getHigher(startAddr) 87 val nextHigher = getHigher(nextRangeAddr) 88 val higher = Mux(carry, nextHigher, startHigher) 89 Cat(higher, pftAddr, 0.U(instOffsetBits.W)) 90 } 91 def fallThroughError() = { 92 val startLower = Cat(0.U(1.W), startAddr(instOffsetBits+log2Ceil(PredictWidth), instOffsetBits)) 93 val endLowerwithCarry = Cat(carry, pftAddr) 94 require(startLower.getWidth == log2Ceil(PredictWidth)+2) 95 require(endLowerwithCarry.getWidth == log2Ceil(PredictWidth)+2) 96 startLower >= endLowerwithCarry || (endLowerwithCarry - startLower) > (PredictWidth+1).U 97 } 98 def fromBranchPrediction(resp: BranchPredictionBundle) = { 99 def carryPos(addr: UInt) = addr(instOffsetBits+log2Ceil(PredictWidth)+1) 100 this.startAddr := resp.pc 101 this.nextRangeAddr := resp.pc + (FetchWidth * 4 * 2).U 102 this.pftAddr := 103 Mux(resp.preds.hit, resp.preds.fallThroughAddr(instOffsetBits+log2Ceil(PredictWidth),instOffsetBits), 104 resp.pc(instOffsetBits + log2Ceil(PredictWidth), instOffsetBits) ^ (1 << log2Ceil(PredictWidth)).U) 105 this.isNextMask := VecInit((0 until PredictWidth).map(i => 106 (resp.pc(log2Ceil(PredictWidth), 1) +& i.U)(log2Ceil(PredictWidth)).asBool() 107 )) 108 this.oversize := Mux(resp.preds.hit, resp.preds.oversize, false.B) 109 this.carry := 110 Mux(resp.preds.hit, 111 carryPos(resp.pc) ^ carryPos(resp.preds.fallThroughAddr), 112 resp.pc(instOffsetBits + log2Ceil(PredictWidth)).asBool 113 ) 114 this 115 } 116 override def toPrintable: Printable = { 117 p"startAddr:${Hexadecimal(startAddr)}, fallThru:${Hexadecimal(getFallThrough())}" 118 } 119} 120 121class Ftq_pd_Entry(implicit p: Parameters) extends XSBundle { 122 val brMask = Vec(PredictWidth, Bool()) 123 val jmpInfo = ValidUndirectioned(Vec(3, Bool())) 124 val jmpOffset = UInt(log2Ceil(PredictWidth).W) 125 val jalTarget = UInt(VAddrBits.W) 126 val rvcMask = Vec(PredictWidth, Bool()) 127 def hasJal = jmpInfo.valid && !jmpInfo.bits(0) 128 def hasJalr = jmpInfo.valid && jmpInfo.bits(0) 129 def hasCall = jmpInfo.valid && jmpInfo.bits(1) 130 def hasRet = jmpInfo.valid && jmpInfo.bits(2) 131 132 def fromPdWb(pdWb: PredecodeWritebackBundle) = { 133 val pds = pdWb.pd 134 this.brMask := VecInit(pds.map(pd => pd.isBr && pd.valid)) 135 this.jmpInfo.valid := VecInit(pds.map(pd => (pd.isJal || pd.isJalr) && pd.valid)).asUInt.orR 136 this.jmpInfo.bits := ParallelPriorityMux(pds.map(pd => (pd.isJal || pd.isJalr) && pd.valid), 137 pds.map(pd => VecInit(pd.isJalr, pd.isCall, pd.isRet))) 138 this.jmpOffset := ParallelPriorityEncoder(pds.map(pd => (pd.isJal || pd.isJalr) && pd.valid)) 139 this.rvcMask := VecInit(pds.map(pd => pd.isRVC)) 140 this.jalTarget := pdWb.jalTarget 141 } 142 143 def toPd(offset: UInt) = { 144 require(offset.getWidth == log2Ceil(PredictWidth)) 145 val pd = Wire(new PreDecodeInfo) 146 pd.valid := true.B 147 pd.isRVC := rvcMask(offset) 148 val isBr = brMask(offset) 149 val isJalr = offset === jmpOffset && jmpInfo.valid && jmpInfo.bits(0) 150 pd.brType := Cat(offset === jmpOffset && jmpInfo.valid, isJalr || isBr) 151 pd.isCall := offset === jmpOffset && jmpInfo.valid && jmpInfo.bits(1) 152 pd.isRet := offset === jmpOffset && jmpInfo.valid && jmpInfo.bits(2) 153 pd 154 } 155} 156 157 158 159class Ftq_Redirect_SRAMEntry(implicit p: Parameters) extends XSBundle with HasBPUConst { 160 val rasSp = UInt(log2Ceil(RasSize).W) 161 val rasEntry = new RASEntry 162 val specCnt = Vec(numBr, UInt(10.W)) 163 // val ghist = new ShiftingGlobalHistory 164 val folded_hist = new AllFoldedHistories(foldedGHistInfos) 165 val histPtr = new CGHPtr 166 val phist = UInt(PathHistoryLength.W) 167 val phNewBit = UInt(1.W) 168 169 def fromBranchPrediction(resp: BranchPredictionBundle) = { 170 this.rasSp := resp.rasSp 171 this.rasEntry := resp.rasTop 172 this.specCnt := resp.specCnt 173 // this.ghist := resp.ghist 174 this.folded_hist := resp.folded_hist 175 this.histPtr := resp.histPtr 176 this.phist := resp.phist 177 this.phNewBit := resp.pc(instOffsetBits) 178 this 179 } 180} 181 182class Ftq_1R_SRAMEntry(implicit p: Parameters) extends XSBundle with HasBPUConst { 183 val meta = UInt(MaxMetaLength.W) 184} 185 186class Ftq_Pred_Info(implicit p: Parameters) extends XSBundle { 187 val target = UInt(VAddrBits.W) 188 val cfiIndex = ValidUndirectioned(UInt(log2Ceil(PredictWidth).W)) 189} 190 191// class FtqEntry(implicit p: Parameters) extends XSBundle with HasBPUConst { 192// val startAddr = UInt(VAddrBits.W) 193// val fallThruAddr = UInt(VAddrBits.W) 194// val isNextMask = Vec(PredictWidth, Bool()) 195 196// val meta = UInt(MaxMetaLength.W) 197 198// val rasSp = UInt(log2Ceil(RasSize).W) 199// val rasEntry = new RASEntry 200// val hist = new ShiftingGlobalHistory 201// val specCnt = Vec(numBr, UInt(10.W)) 202 203// val valids = Vec(PredictWidth, Bool()) 204// val brMask = Vec(PredictWidth, Bool()) 205// // isJalr, isCall, isRet 206// val jmpInfo = ValidUndirectioned(Vec(3, Bool())) 207// val jmpOffset = UInt(log2Ceil(PredictWidth).W) 208 209// val mispredVec = Vec(PredictWidth, Bool()) 210// val cfiIndex = ValidUndirectioned(UInt(log2Ceil(PredictWidth).W)) 211// val target = UInt(VAddrBits.W) 212// } 213 214class FtqRead[T <: Data](private val gen: T)(implicit p: Parameters) extends XSBundle { 215 val ptr = Output(new FtqPtr) 216 val offset = Output(UInt(log2Ceil(PredictWidth).W)) 217 val data = Input(gen) 218 def apply(ptr: FtqPtr, offset: UInt) = { 219 this.ptr := ptr 220 this.offset := offset 221 this.data 222 } 223 override def cloneType = (new FtqRead(gen)).asInstanceOf[this.type] 224} 225 226 227class FtqToBpuIO(implicit p: Parameters) extends XSBundle { 228 val redirect = Valid(new BranchPredictionRedirect) 229 val update = Valid(new BranchPredictionUpdate) 230 val enq_ptr = Output(new FtqPtr) 231} 232 233class FtqToIfuIO(implicit p: Parameters) extends XSBundle with HasCircularQueuePtrHelper { 234 val req = Decoupled(new FetchRequestBundle) 235 val redirect = Valid(new Redirect) 236 val flushFromBpu = new Bundle { 237 // when ifu pipeline is not stalled, 238 // a packet from bpu s3 can reach f1 at most 239 val s2 = Valid(new FtqPtr) 240 // val s3 = Valid(new FtqPtr) 241 def shouldFlushBy(src: Valid[FtqPtr], idx_to_flush: FtqPtr) = { 242 src.valid && !isAfter(src.bits, idx_to_flush) 243 } 244 def shouldFlushByStage2(idx: FtqPtr) = shouldFlushBy(s2, idx) 245 // def shouldFlushByStage3(idx: FtqPtr) = shouldFlushBy(s3, idx) 246 } 247} 248 249trait HasBackendRedirectInfo extends HasXSParameter { 250 def numRedirect = exuParameters.JmpCnt + exuParameters.AluCnt + 1 251 def isLoadReplay(r: Valid[Redirect]) = r.bits.flushItself() 252} 253 254class FtqToCtrlIO(implicit p: Parameters) extends XSBundle with HasBackendRedirectInfo { 255 val pc_reads = Vec(1 + numRedirect + 1 + 1, Flipped(new FtqRead(UInt(VAddrBits.W)))) 256 val target_read = Flipped(new FtqRead(UInt(VAddrBits.W))) 257 def getJumpPcRead = pc_reads.head 258 def getRedirectPcRead = VecInit(pc_reads.tail.dropRight(2)) 259 def getMemPredPcRead = pc_reads.init.last 260 def getRobFlushPcRead = pc_reads.last 261} 262 263 264class FTBEntryGen(implicit p: Parameters) extends XSModule with HasBackendRedirectInfo with HasBPUParameter { 265 val io = IO(new Bundle { 266 val start_addr = Input(UInt(VAddrBits.W)) 267 val old_entry = Input(new FTBEntry) 268 val pd = Input(new Ftq_pd_Entry) 269 val cfiIndex = Flipped(Valid(UInt(log2Ceil(PredictWidth).W))) 270 val target = Input(UInt(VAddrBits.W)) 271 val hit = Input(Bool()) 272 val mispredict_vec = Input(Vec(PredictWidth, Bool())) 273 274 val new_entry = Output(new FTBEntry) 275 val new_br_insert_pos = Output(Vec(numBr, Bool())) 276 val taken_mask = Output(Vec(numBr, Bool())) 277 val mispred_mask = Output(Vec(numBr+1, Bool())) 278 279 // for perf counters 280 val is_init_entry = Output(Bool()) 281 val is_old_entry = Output(Bool()) 282 val is_new_br = Output(Bool()) 283 val is_jalr_target_modified = Output(Bool()) 284 val is_always_taken_modified = Output(Bool()) 285 val is_br_full = Output(Bool()) 286 }) 287 288 // no mispredictions detected at predecode 289 val hit = io.hit 290 val pd = io.pd 291 292 val init_entry = WireInit(0.U.asTypeOf(new FTBEntry)) 293 294 295 val cfi_is_br = pd.brMask(io.cfiIndex.bits) && io.cfiIndex.valid 296 val entry_has_jmp = pd.jmpInfo.valid 297 val new_jmp_is_jal = entry_has_jmp && !pd.jmpInfo.bits(0) && io.cfiIndex.valid 298 val new_jmp_is_jalr = entry_has_jmp && pd.jmpInfo.bits(0) && io.cfiIndex.valid 299 val new_jmp_is_call = entry_has_jmp && pd.jmpInfo.bits(1) && io.cfiIndex.valid 300 val new_jmp_is_ret = entry_has_jmp && pd.jmpInfo.bits(2) && io.cfiIndex.valid 301 val last_jmp_rvi = entry_has_jmp && pd.jmpOffset === (PredictWidth-1).U && !pd.rvcMask.last 302 val last_br_rvi = cfi_is_br && io.cfiIndex.bits === (PredictWidth-1).U && !pd.rvcMask.last 303 304 val cfi_is_jal = io.cfiIndex.bits === pd.jmpOffset && new_jmp_is_jal 305 val cfi_is_jalr = io.cfiIndex.bits === pd.jmpOffset && new_jmp_is_jalr 306 307 def carryPos = log2Ceil(PredictWidth)+instOffsetBits+1 308 def getLower(pc: UInt) = pc(carryPos-1, instOffsetBits) 309 // if not hit, establish a new entry 310 init_entry.valid := true.B 311 // tag is left for ftb to assign 312 313 // case br 314 val init_br_slot = init_entry.getSlotForBr(0) 315 when (cfi_is_br) { 316 init_br_slot.valid := true.B 317 init_br_slot.offset := io.cfiIndex.bits 318 init_br_slot.setLowerStatByTarget(io.start_addr, io.target, shareTailSlot && numBr == 1) 319 init_entry.always_taken(0) := true.B // set to always taken on init 320 } 321 // init_entry.isBrSharing := shareTailSlot.B && (numBr == 1).B && cfi_is_br 322 323 // case jmp 324 when (entry_has_jmp) { 325 init_entry.tailSlot.offset := pd.jmpOffset 326 init_entry.tailSlot.valid := new_jmp_is_jal || new_jmp_is_jalr 327 init_entry.tailSlot.setLowerStatByTarget(io.start_addr, Mux(cfi_is_jalr, io.target, pd.jalTarget), isShare=false) 328 } 329 330 val jmpPft = getLower(io.start_addr) +& pd.jmpOffset +& Mux(pd.rvcMask(pd.jmpOffset), 1.U, 2.U) 331 init_entry.pftAddr := Mux(entry_has_jmp, jmpPft, getLower(io.start_addr) + ((FetchWidth*4)>>instOffsetBits).U + Mux(last_br_rvi, 1.U, 0.U)) 332 init_entry.carry := Mux(entry_has_jmp, jmpPft(carryPos-instOffsetBits), io.start_addr(carryPos-1) || (io.start_addr(carryPos-2, instOffsetBits).andR && last_br_rvi)) 333 init_entry.isJalr := new_jmp_is_jalr 334 init_entry.isCall := new_jmp_is_call 335 init_entry.isRet := new_jmp_is_ret 336 init_entry.last_is_rvc := Mux(entry_has_jmp, pd.rvcMask(pd.jmpOffset), pd.rvcMask.last) 337 338 init_entry.oversize := last_br_rvi || last_jmp_rvi 339 340 // if hit, check whether a new cfi(only br is possible) is detected 341 val oe = io.old_entry 342 val br_recorded_vec = oe.getBrRecordedVec(io.cfiIndex.bits) 343 val br_recorded = br_recorded_vec.asUInt.orR 344 val is_new_br = cfi_is_br && !br_recorded 345 val new_br_offset = io.cfiIndex.bits 346 // vec(i) means new br will be inserted BEFORE old br(i) 347 val allBrSlotsVec = oe.allSlotsForBr 348 val new_br_insert_onehot = VecInit((0 until numBr).map{ 349 i => i match { 350 case 0 => 351 !allBrSlotsVec(0).valid || new_br_offset < allBrSlotsVec(0).offset 352 case idx => 353 allBrSlotsVec(idx-1).valid && new_br_offset > allBrSlotsVec(idx-1).offset && 354 (!allBrSlotsVec(idx).valid || new_br_offset < allBrSlotsVec(idx).offset) 355 } 356 }) 357 358 val old_entry_modified = WireInit(io.old_entry) 359 for (i <- 0 until numBr) { 360 val slot = old_entry_modified.allSlotsForBr(i) 361 when (new_br_insert_onehot(i)) { 362 slot.valid := true.B 363 slot.offset := new_br_offset 364 slot.setLowerStatByTarget(io.start_addr, io.target, shareTailSlot && i == numBr-1) 365 old_entry_modified.always_taken(i) := true.B 366 }.elsewhen (new_br_offset > oe.allSlotsForBr(i).offset) { 367 old_entry_modified.always_taken(i) := false.B 368 // all other fields remain unchanged 369 }.otherwise { 370 // case i == 0, remain unchanged 371 if (i != 0) { 372 val noNeedToMoveFromFormerSlot = (shareTailSlot && i == numBr-1).B && !oe.brSlots.last.valid 373 when (!noNeedToMoveFromFormerSlot) { 374 slot.fromAnotherSlot(oe.allSlotsForBr(i-1)) 375 old_entry_modified.always_taken(i) := oe.always_taken(i) 376 } 377 } 378 } 379 } 380 381 // two circumstances: 382 // 1. oe: | br | j |, new br should be in front of j, thus addr of j should be new pft 383 // 2. oe: | br | br |, new br could be anywhere between, thus new pft is the addr of either 384 // the previous last br or the new br 385 val may_have_to_replace = oe.noEmptySlotForNewBr 386 val pft_need_to_change = is_new_br && may_have_to_replace 387 // it should either be the given last br or the new br 388 when (pft_need_to_change) { 389 val new_pft_offset = 390 Mux(!new_br_insert_onehot.asUInt.orR, 391 new_br_offset, oe.allSlotsForBr.last.offset) 392 393 // set jmp to invalid 394 if (!shareTailSlot) { 395 old_entry_modified.tailSlot.valid := false.B 396 } 397 old_entry_modified.pftAddr := getLower(io.start_addr) + new_pft_offset 398 old_entry_modified.last_is_rvc := pd.rvcMask(new_pft_offset - 1.U) // TODO: fix this 399 old_entry_modified.carry := (getLower(io.start_addr) +& new_pft_offset).head(1).asBool 400 old_entry_modified.oversize := false.B 401 old_entry_modified.isCall := false.B 402 old_entry_modified.isRet := false.B 403 old_entry_modified.isJalr := false.B 404 } 405 406 val old_entry_jmp_target_modified = WireInit(oe) 407 val old_target = oe.tailSlot.getTarget(io.start_addr) // may be wrong because we store only 20 lowest bits 408 val old_tail_is_jmp = !oe.tailSlot.sharing || !shareTailSlot.B 409 val jalr_target_modified = cfi_is_jalr && (old_target =/= io.target) && old_tail_is_jmp // TODO: pass full jalr target 410 when (jalr_target_modified) { 411 old_entry_jmp_target_modified.setByJmpTarget(io.start_addr, io.target) 412 old_entry_jmp_target_modified.always_taken := 0.U.asTypeOf(Vec(numBr, Bool())) 413 } 414 415 val old_entry_always_taken = WireInit(oe) 416 val always_taken_modified_vec = Wire(Vec(numBr, Bool())) // whether modified or not 417 for (i <- 0 until numBr) { 418 old_entry_always_taken.always_taken(i) := 419 oe.always_taken(i) && io.cfiIndex.valid && oe.brValids(i) && io.cfiIndex.bits === oe.brOffset(i) 420 always_taken_modified_vec(i) := oe.always_taken(i) && !old_entry_always_taken.always_taken(i) 421 } 422 val always_taken_modified = always_taken_modified_vec.reduce(_||_) 423 424 425 426 val derived_from_old_entry = 427 Mux(is_new_br, old_entry_modified, 428 Mux(jalr_target_modified, old_entry_jmp_target_modified, old_entry_always_taken)) 429 430 431 io.new_entry := Mux(!hit, init_entry, derived_from_old_entry) 432 433 io.new_br_insert_pos := new_br_insert_onehot 434 io.taken_mask := VecInit((io.new_entry.brOffset zip io.new_entry.brValids).map{ 435 case (off, v) => io.cfiIndex.bits === off && io.cfiIndex.valid && v 436 }) 437 for (i <- 0 until numBr) { 438 io.mispred_mask(i) := io.new_entry.brValids(i) && io.mispredict_vec(io.new_entry.brOffset(i)) 439 } 440 io.mispred_mask.last := io.new_entry.jmpValid && io.mispredict_vec(pd.jmpOffset) 441 442 // for perf counters 443 io.is_init_entry := !hit 444 io.is_old_entry := hit && !is_new_br && !jalr_target_modified && !always_taken_modified 445 io.is_new_br := hit && is_new_br 446 io.is_jalr_target_modified := hit && jalr_target_modified 447 io.is_always_taken_modified := hit && always_taken_modified 448 io.is_br_full := hit && is_new_br && may_have_to_replace 449} 450 451class Ftq(implicit p: Parameters) extends XSModule with HasCircularQueuePtrHelper 452 with HasBackendRedirectInfo with BPUUtils with HasBPUConst { 453 val io = IO(new Bundle { 454 val fromBpu = Flipped(new BpuToFtqIO) 455 val fromIfu = Flipped(new IfuToFtqIO) 456 val fromBackend = Flipped(new CtrlToFtqIO) 457 458 val toBpu = new FtqToBpuIO 459 val toIfu = new FtqToIfuIO 460 val toBackend = new FtqToCtrlIO 461 462 val bpuInfo = new Bundle { 463 val bpRight = Output(UInt(XLEN.W)) 464 val bpWrong = Output(UInt(XLEN.W)) 465 } 466 }) 467 io.bpuInfo := DontCare 468 469 val robFlush = io.fromBackend.robFlush 470 val stage2Redirect = io.fromBackend.stage2Redirect 471 val stage3Redirect = io.fromBackend.stage3Redirect 472 473 val stage2Flush = stage2Redirect.valid || robFlush.valid 474 val backendFlush = stage2Flush || RegNext(stage2Flush) 475 val ifuFlush = Wire(Bool()) 476 477 val flush = stage2Flush || RegNext(stage2Flush) 478 479 val allowBpuIn, allowToIfu = WireInit(false.B) 480 val flushToIfu = !allowToIfu 481 allowBpuIn := !ifuFlush && !robFlush.valid && !stage2Redirect.valid && !stage3Redirect.valid 482 allowToIfu := !ifuFlush && !robFlush.valid && !stage2Redirect.valid && !stage3Redirect.valid 483 484 val bpuPtr, ifuPtr, ifuWbPtr, commPtr = RegInit(FtqPtr(false.B, 0.U)) 485 val validEntries = distanceBetween(bpuPtr, commPtr) 486 487 // ********************************************************************** 488 // **************************** enq from bpu **************************** 489 // ********************************************************************** 490 val new_entry_ready = validEntries < FtqSize.U 491 io.fromBpu.resp.ready := new_entry_ready 492 493 val bpu_s2_resp = io.fromBpu.resp.bits.s2 494 // val bpu_s3_resp = io.fromBpu.resp.bits.s3 495 val bpu_s2_redirect = bpu_s2_resp.valid && bpu_s2_resp.hasRedirect 496 // val bpu_s3_redirect = bpu_s3_resp.valid && bpu_s3_resp.hasRedirect 497 498 io.toBpu.enq_ptr := bpuPtr 499 val enq_fire = io.fromBpu.resp.fire() && allowBpuIn // from bpu s1 500 val bpu_in_fire = (io.fromBpu.resp.fire() || bpu_s2_redirect/* || bpu_s3_redirect */) && allowBpuIn 501 502 val bpu_in_resp = WireInit(io.fromBpu.resp.bits.selectedResp) 503 val bpu_in_stage = WireInit(io.fromBpu.resp.bits.selectedRespIdx) 504 val bpu_in_resp_ptr = Mux(bpu_in_stage === BP_S1, bpuPtr, bpu_in_resp.ftq_idx) 505 val bpu_in_resp_idx = bpu_in_resp_ptr.value 506 507 // read ports: jumpPc + redirects + loadPred + robFlush + ifuReq1 + ifuReq2 + commitUpdate 508 val ftq_pc_mem = Module(new SyncDataModuleTemplate(new Ftq_RF_Components, FtqSize, 1+numRedirect+2+1+1+1, 1)) 509 // resp from uBTB 510 ftq_pc_mem.io.wen(0) := bpu_in_fire 511 ftq_pc_mem.io.waddr(0) := bpu_in_resp_idx 512 ftq_pc_mem.io.wdata(0).fromBranchPrediction(bpu_in_resp) 513 514 // ifuRedirect + backendRedirect + commit 515 val ftq_redirect_sram = Module(new FtqNRSRAM(new Ftq_Redirect_SRAMEntry, 1+1+1)) 516 // these info is intended to enq at the last stage of bpu 517 ftq_redirect_sram.io.wen := io.fromBpu.resp.bits.lastStage.valid 518 ftq_redirect_sram.io.waddr := io.fromBpu.resp.bits.lastStage.ftq_idx.value 519 ftq_redirect_sram.io.wdata.fromBranchPrediction(io.fromBpu.resp.bits.lastStage) 520 521 val ftq_meta_1r_sram = Module(new FtqNRSRAM(new Ftq_1R_SRAMEntry, 1)) 522 // these info is intended to enq at the last stage of bpu 523 ftq_meta_1r_sram.io.wen := io.fromBpu.resp.bits.lastStage.valid 524 ftq_meta_1r_sram.io.waddr := io.fromBpu.resp.bits.lastStage.ftq_idx.value 525 ftq_meta_1r_sram.io.wdata.meta := io.fromBpu.resp.bits.meta 526 // ifuRedirect + backendRedirect + commit 527 val ftb_entry_mem = Module(new SyncDataModuleTemplate(new FTBEntry, FtqSize, 1+1+1, 1)) 528 ftb_entry_mem.io.wen(0) := io.fromBpu.resp.bits.lastStage.valid 529 ftb_entry_mem.io.waddr(0) := io.fromBpu.resp.bits.lastStage.ftq_idx.value 530 ftb_entry_mem.io.wdata(0) := io.fromBpu.resp.bits.lastStage.ftb_entry 531 532 533 // multi-write 534 val update_target = Reg(Vec(FtqSize, UInt(VAddrBits.W))) 535 val cfiIndex_vec = Reg(Vec(FtqSize, ValidUndirectioned(UInt(log2Ceil(PredictWidth).W)))) 536 val mispredict_vec = Reg(Vec(FtqSize, Vec(PredictWidth, Bool()))) 537 val pred_stage = Reg(Vec(FtqSize, UInt(2.W))) 538 539 val c_invalid :: c_valid :: c_commited :: Nil = Enum(3) 540 val commitStateQueue = RegInit(VecInit(Seq.fill(FtqSize) { 541 VecInit(Seq.fill(PredictWidth)(c_invalid)) 542 })) 543 544 val f_to_send :: f_sent :: Nil = Enum(2) 545 val entry_fetch_status = RegInit(VecInit(Seq.fill(FtqSize)(f_sent))) 546 547 val h_not_hit :: h_false_hit :: h_hit :: Nil = Enum(3) 548 val entry_hit_status = RegInit(VecInit(Seq.fill(FtqSize)(h_not_hit))) 549 550 551 when (bpu_in_fire) { 552 entry_fetch_status(bpu_in_resp_idx) := f_to_send 553 commitStateQueue(bpu_in_resp_idx) := VecInit(Seq.fill(PredictWidth)(c_invalid)) 554 cfiIndex_vec(bpu_in_resp_idx) := bpu_in_resp.genCfiIndex 555 mispredict_vec(bpu_in_resp_idx) := WireInit(VecInit(Seq.fill(PredictWidth)(false.B))) 556 update_target(bpu_in_resp_idx) := bpu_in_resp.target 557 pred_stage(bpu_in_resp_idx) := bpu_in_stage 558 } 559 560 bpuPtr := bpuPtr + enq_fire 561 ifuPtr := ifuPtr + io.toIfu.req.fire 562 563 // only use ftb result to assign hit status 564 when (bpu_s2_resp.valid) { 565 entry_hit_status(bpu_s2_resp.ftq_idx.value) := Mux(bpu_s2_resp.preds.hit, h_hit, h_not_hit) 566 } 567 568 569 io.toIfu.flushFromBpu.s2.valid := bpu_s2_redirect 570 io.toIfu.flushFromBpu.s2.bits := bpu_s2_resp.ftq_idx 571 when (bpu_s2_resp.valid && bpu_s2_resp.hasRedirect) { 572 bpuPtr := bpu_s2_resp.ftq_idx + 1.U 573 // only when ifuPtr runs ahead of bpu s2 resp should we recover it 574 when (!isBefore(ifuPtr, bpu_s2_resp.ftq_idx)) { 575 ifuPtr := bpu_s2_resp.ftq_idx 576 } 577 } 578 579 // io.toIfu.flushFromBpu.s3.valid := bpu_s3_redirect 580 // io.toIfu.flushFromBpu.s3.bits := bpu_s3_resp.ftq_idx 581 // when (bpu_s3_resp.valid && bpu_s3_resp.hasRedirect) { 582 // bpuPtr := bpu_s3_resp.ftq_idx + 1.U 583 // // only when ifuPtr runs ahead of bpu s2 resp should we recover it 584 // when (!isBefore(ifuPtr, bpu_s3_resp.ftq_idx)) { 585 // ifuPtr := bpu_s3_resp.ftq_idx 586 // } 587 // XSError(true.B, "\ns3_redirect mechanism not implemented!\n") 588 // } 589 590 XSError(isBefore(bpuPtr, ifuPtr) && !isFull(bpuPtr, ifuPtr), "\nifuPtr is before bpuPtr!\n") 591 592 // **************************************************************** 593 // **************************** to ifu **************************** 594 // **************************************************************** 595 val bpu_in_bypass_buf = RegEnable(ftq_pc_mem.io.wdata(0), enable=bpu_in_fire) 596 val bpu_in_bypass_ptr = RegNext(bpu_in_resp_ptr) 597 val last_cycle_bpu_in = RegNext(bpu_in_fire) 598 val last_cycle_to_ifu_fire = RegNext(io.toIfu.req.fire) 599 600 // read pc and target 601 ftq_pc_mem.io.raddr.init.init.last := ifuPtr.value 602 ftq_pc_mem.io.raddr.init.last := (ifuPtr+1.U).value 603 604 io.toIfu.req.valid := allowToIfu && entry_fetch_status(ifuPtr.value) === f_to_send && ifuPtr =/= bpuPtr 605 io.toIfu.req.bits.ftqIdx := ifuPtr 606 io.toIfu.req.bits.target := update_target(ifuPtr.value) 607 io.toIfu.req.bits.ftqOffset := cfiIndex_vec(ifuPtr.value) 608 609 when (last_cycle_bpu_in && bpu_in_bypass_ptr === ifuPtr) { 610 io.toIfu.req.bits.fromFtqPcBundle(bpu_in_bypass_buf) 611 }.elsewhen (last_cycle_to_ifu_fire) { 612 io.toIfu.req.bits.fromFtqPcBundle(ftq_pc_mem.io.rdata.init.last) 613 }.otherwise { 614 io.toIfu.req.bits.fromFtqPcBundle(ftq_pc_mem.io.rdata.init.init.last) 615 } 616 617 // when fall through is smaller in value than start address, there must be a false hit 618 when (io.toIfu.req.bits.fallThruError && entry_hit_status(ifuPtr.value) === h_hit) { 619 when (io.toIfu.req.fire && 620 !(bpu_s2_redirect && bpu_s2_resp.ftq_idx === ifuPtr)/* && 621 !(bpu_s3_redirect && bpu_s3_resp.ftq_idx === ifuPtr) */ 622 ) { 623 entry_hit_status(ifuPtr.value) := h_false_hit 624 XSDebug(true.B, "FTB false hit by fallThroughError, startAddr: %x, fallTHru: %x\n", io.toIfu.req.bits.startAddr, io.toIfu.req.bits.fallThruAddr) 625 } 626 XSDebug(true.B, "fallThruError! start:%x, fallThru:%x\n", io.toIfu.req.bits.startAddr, io.toIfu.req.bits.fallThruAddr) 627 } 628 629 val ifu_req_should_be_flushed = 630 io.toIfu.flushFromBpu.shouldFlushByStage2(io.toIfu.req.bits.ftqIdx)/* || 631 io.toIfu.flushFromBpu.shouldFlushByStage3(io.toIfu.req.bits.ftqIdx) */ 632 633 when (io.toIfu.req.fire && !ifu_req_should_be_flushed) { 634 entry_fetch_status(ifuPtr.value) := f_sent 635 } 636 637 638 // ********************************************************************* 639 // **************************** wb from ifu **************************** 640 // ********************************************************************* 641 val pdWb = io.fromIfu.pdWb 642 val pds = pdWb.bits.pd 643 val ifu_wb_valid = pdWb.valid 644 val ifu_wb_idx = pdWb.bits.ftqIdx.value 645 // read ports: commit update 646 val ftq_pd_mem = Module(new SyncDataModuleTemplate(new Ftq_pd_Entry, FtqSize, 1, 1)) 647 ftq_pd_mem.io.wen(0) := ifu_wb_valid 648 ftq_pd_mem.io.waddr(0) := pdWb.bits.ftqIdx.value 649 ftq_pd_mem.io.wdata(0).fromPdWb(pdWb.bits) 650 651 val hit_pd_valid = entry_hit_status(ifu_wb_idx) === h_hit && ifu_wb_valid 652 val hit_pd_mispred = hit_pd_valid && pdWb.bits.misOffset.valid 653 val hit_pd_mispred_reg = RegNext(hit_pd_mispred, init=false.B) 654 val pd_reg = RegEnable(pds, enable = pdWb.valid) 655 val start_pc_reg = RegEnable(pdWb.bits.pc(0), enable = pdWb.valid) 656 val wb_idx_reg = RegEnable(ifu_wb_idx, enable = pdWb.valid) 657 658 when (ifu_wb_valid) { 659 val comm_stq_wen = VecInit(pds.map(_.valid).zip(pdWb.bits.instrRange).map{ 660 case (v, inRange) => v && inRange 661 }) 662 (commitStateQueue(ifu_wb_idx) zip comm_stq_wen).map{ 663 case (qe, v) => when (v) { qe := c_valid } 664 } 665 } 666 667 ifuWbPtr := ifuWbPtr + ifu_wb_valid 668 669 ftb_entry_mem.io.raddr.head := ifu_wb_idx 670 val has_false_hit = WireInit(false.B) 671 when (RegNext(hit_pd_valid)) { 672 // check for false hit 673 val pred_ftb_entry = ftb_entry_mem.io.rdata.head 674 val brSlots = pred_ftb_entry.brSlots 675 val tailSlot = pred_ftb_entry.tailSlot 676 // we check cfis that bpu predicted 677 678 // bpu predicted branches but denied by predecode 679 val br_false_hit = 680 brSlots.map{ 681 s => s.valid && !(pd_reg(s.offset).valid && pd_reg(s.offset).isBr) 682 }.reduce(_||_) || 683 (shareTailSlot.B && tailSlot.valid && pred_ftb_entry.tailSlot.sharing && 684 !(pd_reg(tailSlot.offset).valid && pd_reg(tailSlot.offset).isBr)) 685 686 val jmpOffset = tailSlot.offset 687 val jmp_pd = pd_reg(jmpOffset) 688 val jal_false_hit = pred_ftb_entry.jmpValid && 689 ((pred_ftb_entry.isJal && !(jmp_pd.valid && jmp_pd.isJal)) || 690 (pred_ftb_entry.isJalr && !(jmp_pd.valid && jmp_pd.isJalr)) || 691 (pred_ftb_entry.isCall && !(jmp_pd.valid && jmp_pd.isCall)) || 692 (pred_ftb_entry.isRet && !(jmp_pd.valid && jmp_pd.isRet)) 693 ) 694 695 has_false_hit := br_false_hit || jal_false_hit || hit_pd_mispred_reg 696 XSDebug(has_false_hit, "FTB false hit by br or jal or hit_pd, startAddr: %x\n", pdWb.bits.pc(0)) 697 698 // assert(!has_false_hit) 699 } 700 701 when (has_false_hit) { 702 entry_hit_status(wb_idx_reg) := h_false_hit 703 } 704 705 706 // ********************************************************************** 707 // **************************** backend read **************************** 708 // ********************************************************************** 709 710 // pc reads 711 for ((req, i) <- io.toBackend.pc_reads.zipWithIndex) { 712 ftq_pc_mem.io.raddr(i) := req.ptr.value 713 req.data := ftq_pc_mem.io.rdata(i).getPc(RegNext(req.offset)) 714 } 715 // target read 716 io.toBackend.target_read.data := RegNext(update_target(io.toBackend.target_read.ptr.value)) 717 718 // ******************************************************************************* 719 // **************************** redirect from backend **************************** 720 // ******************************************************************************* 721 722 // redirect read cfiInfo, couples to redirectGen s2 723 ftq_redirect_sram.io.ren.init.last := io.fromBackend.stage2Redirect.valid 724 ftq_redirect_sram.io.raddr.init.last := io.fromBackend.stage2Redirect.bits.ftqIdx.value 725 726 ftb_entry_mem.io.raddr.init.last := io.fromBackend.stage2Redirect.bits.ftqIdx.value 727 728 val stage3CfiInfo = ftq_redirect_sram.io.rdata.init.last 729 val fromBackendRedirect = WireInit(io.fromBackend.stage3Redirect) 730 val backendRedirectCfi = fromBackendRedirect.bits.cfiUpdate 731 backendRedirectCfi.fromFtqRedirectSram(stage3CfiInfo) 732 733 val r_ftb_entry = ftb_entry_mem.io.rdata.init.last 734 val r_ftqOffset = fromBackendRedirect.bits.ftqOffset 735 736 when (entry_hit_status(fromBackendRedirect.bits.ftqIdx.value) === h_hit) { 737 backendRedirectCfi.shift := PopCount(r_ftb_entry.getBrMaskByOffset(r_ftqOffset)) +& 738 (backendRedirectCfi.pd.isBr && !r_ftb_entry.brIsSaved(r_ftqOffset) && 739 !r_ftb_entry.newBrCanNotInsert(r_ftqOffset)) 740 741 backendRedirectCfi.addIntoHist := backendRedirectCfi.pd.isBr && (r_ftb_entry.brIsSaved(r_ftqOffset) || 742 !r_ftb_entry.newBrCanNotInsert(r_ftqOffset)) 743 }.otherwise { 744 backendRedirectCfi.shift := (backendRedirectCfi.pd.isBr && backendRedirectCfi.taken).asUInt 745 backendRedirectCfi.addIntoHist := backendRedirectCfi.pd.isBr.asUInt 746 } 747 748 749 // *************************************************************************** 750 // **************************** redirect from ifu **************************** 751 // *************************************************************************** 752 val fromIfuRedirect = WireInit(0.U.asTypeOf(Valid(new Redirect))) 753 fromIfuRedirect.valid := pdWb.valid && pdWb.bits.misOffset.valid && !backendFlush 754 fromIfuRedirect.bits.ftqIdx := pdWb.bits.ftqIdx 755 fromIfuRedirect.bits.ftqOffset := pdWb.bits.misOffset.bits 756 fromIfuRedirect.bits.level := RedirectLevel.flushAfter 757 758 val ifuRedirectCfiUpdate = fromIfuRedirect.bits.cfiUpdate 759 ifuRedirectCfiUpdate.pc := pdWb.bits.pc(pdWb.bits.misOffset.bits) 760 ifuRedirectCfiUpdate.pd := pdWb.bits.pd(pdWb.bits.misOffset.bits) 761 ifuRedirectCfiUpdate.predTaken := cfiIndex_vec(pdWb.bits.ftqIdx.value).valid 762 ifuRedirectCfiUpdate.target := pdWb.bits.target 763 ifuRedirectCfiUpdate.taken := pdWb.bits.cfiOffset.valid 764 ifuRedirectCfiUpdate.isMisPred := pdWb.bits.misOffset.valid 765 766 val ifuRedirectReg = RegNext(fromIfuRedirect, init=0.U.asTypeOf(Valid(new Redirect))) 767 val ifuRedirectToBpu = WireInit(ifuRedirectReg) 768 ifuFlush := fromIfuRedirect.valid || ifuRedirectToBpu.valid 769 770 ftq_redirect_sram.io.ren.head := fromIfuRedirect.valid 771 ftq_redirect_sram.io.raddr.head := fromIfuRedirect.bits.ftqIdx.value 772 773 ftb_entry_mem.io.raddr.head := fromIfuRedirect.bits.ftqIdx.value 774 775 val toBpuCfi = ifuRedirectToBpu.bits.cfiUpdate 776 toBpuCfi.fromFtqRedirectSram(ftq_redirect_sram.io.rdata.head) 777 when (ifuRedirectReg.bits.cfiUpdate.pd.isRet) { 778 toBpuCfi.target := toBpuCfi.rasEntry.retAddr 779 } 780 781 // ********************************************************************* 782 // **************************** wb from exu **************************** 783 // ********************************************************************* 784 785 def extractRedirectInfo(wb: Valid[Redirect]) = { 786 val ftqIdx = wb.bits.ftqIdx.value 787 val ftqOffset = wb.bits.ftqOffset 788 val taken = wb.bits.cfiUpdate.taken 789 val mispred = wb.bits.cfiUpdate.isMisPred 790 (wb.valid, ftqIdx, ftqOffset, taken, mispred) 791 } 792 793 // fix mispredict entry 794 val lastIsMispredict = RegNext( 795 stage2Redirect.valid && stage2Redirect.bits.level === RedirectLevel.flushAfter, init = false.B 796 ) 797 798 def updateCfiInfo(redirect: Valid[Redirect], isBackend: Boolean = true) = { 799 val (r_valid, r_idx, r_offset, r_taken, r_mispred) = extractRedirectInfo(redirect) 800 val cfiIndex_bits_wen = r_valid && r_taken && r_offset < cfiIndex_vec(r_idx).bits 801 val cfiIndex_valid_wen = r_valid && r_offset === cfiIndex_vec(r_idx).bits 802 when (cfiIndex_bits_wen || cfiIndex_valid_wen) { 803 cfiIndex_vec(r_idx).valid := cfiIndex_bits_wen || cfiIndex_valid_wen && r_taken 804 } 805 when (cfiIndex_bits_wen) { 806 cfiIndex_vec(r_idx).bits := r_offset 807 } 808 update_target(r_idx) := redirect.bits.cfiUpdate.target 809 if (isBackend) { 810 mispredict_vec(r_idx)(r_offset) := r_mispred 811 } 812 } 813 814 when(stage3Redirect.valid && lastIsMispredict) { 815 updateCfiInfo(stage3Redirect) 816 }.elsewhen (ifuRedirectToBpu.valid) { 817 updateCfiInfo(ifuRedirectToBpu, isBackend=false) 818 } 819 820 // *********************************************************************************** 821 // **************************** flush ptr and state queue **************************** 822 // *********************************************************************************** 823 824 val redirectVec = VecInit(robFlush, stage2Redirect, fromIfuRedirect) 825 826 // when redirect, we should reset ptrs and status queues 827 when(redirectVec.map(r => r.valid).reduce(_||_)){ 828 val r = PriorityMux(redirectVec.map(r => (r.valid -> r.bits))) 829 val notIfu = redirectVec.dropRight(1).map(r => r.valid).reduce(_||_) 830 val (idx, offset, flushItSelf) = (r.ftqIdx, r.ftqOffset, RedirectLevel.flushItself(r.level)) 831 val next = idx + 1.U 832 bpuPtr := next 833 ifuPtr := next 834 ifuWbPtr := next 835 when (notIfu) { 836 commitStateQueue(idx.value).zipWithIndex.foreach({ case (s, i) => 837 when(i.U > offset || i.U === offset && flushItSelf){ 838 s := c_invalid 839 } 840 }) 841 } 842 } 843 844 // only the valid bit is actually needed 845 io.toIfu.redirect.bits := Mux(robFlush.valid, robFlush.bits, stage2Redirect.bits) 846 io.toIfu.redirect.valid := stage2Flush 847 848 // commit 849 for (c <- io.fromBackend.rob_commits) { 850 when(c.valid) { 851 commitStateQueue(c.bits.ftqIdx.value)(c.bits.ftqOffset) := c_commited 852 // TODO: remove this 853 // For instruction fusions, we also update the next instruction 854 when (c.bits.commitType === 4.U) { 855 commitStateQueue(c.bits.ftqIdx.value)(c.bits.ftqOffset + 1.U) := c_commited 856 }.elsewhen(c.bits.commitType === 5.U) { 857 commitStateQueue(c.bits.ftqIdx.value)(c.bits.ftqOffset + 2.U) := c_commited 858 }.elsewhen(c.bits.commitType === 6.U) { 859 val index = (c.bits.ftqIdx + 1.U).value 860 commitStateQueue(index)(0) := c_commited 861 }.elsewhen(c.bits.commitType === 7.U) { 862 val index = (c.bits.ftqIdx + 1.U).value 863 commitStateQueue(index)(1) := c_commited 864 } 865 } 866 } 867 868 // **************************************************************** 869 // **************************** to bpu **************************** 870 // **************************************************************** 871 872 io.toBpu.redirect <> Mux(fromBackendRedirect.valid, fromBackendRedirect, ifuRedirectToBpu) 873 874 val may_have_stall_from_bpu = RegInit(false.B) 875 val canCommit = commPtr =/= ifuWbPtr && !may_have_stall_from_bpu && 876 Cat(commitStateQueue(commPtr.value).map(s => { 877 s === c_invalid || s === c_commited 878 })).andR() 879 880 // commit reads 881 ftq_pc_mem.io.raddr.last := commPtr.value 882 val commit_pc_bundle = ftq_pc_mem.io.rdata.last 883 ftq_pd_mem.io.raddr.last := commPtr.value 884 val commit_pd = ftq_pd_mem.io.rdata.last 885 ftq_redirect_sram.io.ren.last := canCommit 886 ftq_redirect_sram.io.raddr.last := commPtr.value 887 val commit_spec_meta = ftq_redirect_sram.io.rdata.last 888 ftq_meta_1r_sram.io.ren(0) := canCommit 889 ftq_meta_1r_sram.io.raddr(0) := commPtr.value 890 val commit_meta = ftq_meta_1r_sram.io.rdata(0) 891 ftb_entry_mem.io.raddr.last := commPtr.value 892 val commit_ftb_entry = ftb_entry_mem.io.rdata.last 893 894 // need one cycle to read mem and srams 895 val do_commit_ptr = RegNext(commPtr) 896 val do_commit = RegNext(canCommit, init=false.B) 897 when (canCommit) { commPtr := commPtr + 1.U } 898 val commit_state = RegNext(commitStateQueue(commPtr.value)) 899 val can_commit_cfi = WireInit(cfiIndex_vec(commPtr.value)) 900 when (commitStateQueue(commPtr.value)(can_commit_cfi.bits) =/= c_commited) { 901 can_commit_cfi.valid := false.B 902 } 903 val commit_cfi = RegNext(can_commit_cfi) 904 905 val commit_mispredict = VecInit((RegNext(mispredict_vec(commPtr.value)) zip commit_state).map { 906 case (mis, state) => mis && state === c_commited 907 }) 908 val can_commit_hit = entry_hit_status(commPtr.value) 909 val commit_hit = RegNext(can_commit_hit) 910 val commit_target = RegNext(update_target(commPtr.value)) 911 val commit_valid = commit_hit === h_hit || commit_cfi.valid // hit or taken 912 913 val to_bpu_hit = can_commit_hit === h_hit || can_commit_hit === h_false_hit 914 may_have_stall_from_bpu := can_commit_cfi.valid && !to_bpu_hit && !may_have_stall_from_bpu 915 916 io.toBpu.update := DontCare 917 io.toBpu.update.valid := commit_valid && do_commit 918 val update = io.toBpu.update.bits 919 update.false_hit := commit_hit === h_false_hit 920 update.pc := commit_pc_bundle.startAddr 921 update.preds.hit := commit_hit === h_hit || commit_hit === h_false_hit 922 update.meta := commit_meta.meta 923 update.full_target := commit_target 924 update.fromFtqRedirectSram(commit_spec_meta) 925 926 val commit_real_hit = commit_hit === h_hit 927 val update_ftb_entry = update.ftb_entry 928 929 val ftbEntryGen = Module(new FTBEntryGen).io 930 ftbEntryGen.start_addr := commit_pc_bundle.startAddr 931 ftbEntryGen.old_entry := commit_ftb_entry 932 ftbEntryGen.pd := commit_pd 933 ftbEntryGen.cfiIndex := commit_cfi 934 ftbEntryGen.target := commit_target 935 ftbEntryGen.hit := commit_real_hit 936 ftbEntryGen.mispredict_vec := commit_mispredict 937 938 update_ftb_entry := ftbEntryGen.new_entry 939 update.new_br_insert_pos := ftbEntryGen.new_br_insert_pos 940 update.mispred_mask := ftbEntryGen.mispred_mask 941 update.old_entry := ftbEntryGen.is_old_entry 942 update.preds.br_taken_mask := ftbEntryGen.taken_mask 943 944 // ****************************************************************************** 945 // **************************** commit perf counters **************************** 946 // ****************************************************************************** 947 948 val commit_inst_mask = VecInit(commit_state.map(c => c === c_commited && do_commit)).asUInt 949 val commit_mispred_mask = commit_mispredict.asUInt 950 val commit_not_mispred_mask = ~commit_mispred_mask 951 952 val commit_br_mask = commit_pd.brMask.asUInt 953 val commit_jmp_mask = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.jmpInfo.valid.asTypeOf(UInt(1.W))) 954 val commit_cfi_mask = (commit_br_mask | commit_jmp_mask) 955 956 val mbpInstrs = commit_inst_mask & commit_cfi_mask 957 958 val mbpRights = mbpInstrs & commit_not_mispred_mask 959 val mbpWrongs = mbpInstrs & commit_mispred_mask 960 961 io.bpuInfo.bpRight := PopCount(mbpRights) 962 io.bpuInfo.bpWrong := PopCount(mbpWrongs) 963 964 // Cfi Info 965 for (i <- 0 until PredictWidth) { 966 val pc = commit_pc_bundle.startAddr + (i * instBytes).U 967 val v = commit_state(i) === c_commited 968 val isBr = commit_pd.brMask(i) 969 val isJmp = commit_pd.jmpInfo.valid && commit_pd.jmpOffset === i.U 970 val isCfi = isBr || isJmp 971 val isTaken = commit_cfi.valid && commit_cfi.bits === i.U 972 val misPred = commit_mispredict(i) 973 // val ghist = commit_spec_meta.ghist.predHist 974 val histPtr = commit_spec_meta.histPtr 975 val predCycle = commit_meta.meta(63, 0) 976 val target = commit_target 977 978 val brIdx = OHToUInt(Reverse(Cat(update_ftb_entry.brValids.zip(update_ftb_entry.brOffset).map{case(v, offset) => v && offset === i.U}))) 979 val inFtbEntry = update_ftb_entry.brValids.zip(update_ftb_entry.brOffset).map{case(v, offset) => v && offset === i.U}.reduce(_||_) 980 val addIntoHist = ((commit_hit === h_hit) && inFtbEntry) || ((!(commit_hit === h_hit) && i.U === commit_cfi.bits && isBr && commit_cfi.valid)) 981 XSDebug(v && do_commit && isCfi, p"cfi_update: isBr(${isBr}) pc(${Hexadecimal(pc)}) " + 982 p"taken(${isTaken}) mispred(${misPred}) cycle($predCycle) hist(${histPtr.value}) " + 983 p"startAddr(${Hexadecimal(commit_pc_bundle.startAddr)}) AddIntoHist(${addIntoHist}) " + 984 p"brInEntry(${inFtbEntry}) brIdx(${brIdx}) target(${Hexadecimal(target)})\n") 985 } 986 987 val enq = io.fromBpu.resp 988 val perf_redirect = io.fromBackend.stage2Redirect 989 990 XSPerfAccumulate("entry", validEntries) 991 XSPerfAccumulate("bpu_to_ftq_stall", enq.valid && !enq.ready) 992 XSPerfAccumulate("mispredictRedirect", perf_redirect.valid && RedirectLevel.flushAfter === perf_redirect.bits.level) 993 XSPerfAccumulate("replayRedirect", perf_redirect.valid && RedirectLevel.flushItself(perf_redirect.bits.level)) 994 XSPerfAccumulate("predecodeRedirect", fromIfuRedirect.valid) 995 996 XSPerfAccumulate("to_ifu_bubble", io.toIfu.req.ready && !io.toIfu.req.valid) 997 998 XSPerfAccumulate("to_ifu_stall", io.toIfu.req.valid && !io.toIfu.req.ready) 999 XSPerfAccumulate("from_bpu_real_bubble", !enq.valid && enq.ready && allowBpuIn) 1000 XSPerfAccumulate("bpu_to_ftq_bubble", bpuPtr === ifuPtr) 1001 1002 val from_bpu = io.fromBpu.resp.bits 1003 def in_entry_len_map_gen(resp: BranchPredictionBundle)(stage: String) = { 1004 val entry_len = (resp.ftb_entry.getFallThrough(resp.pc) - resp.pc) >> instOffsetBits 1005 val entry_len_recording_vec = (1 to PredictWidth+1).map(i => entry_len === i.U) 1006 val entry_len_map = (1 to PredictWidth+1).map(i => 1007 f"${stage}_ftb_entry_len_$i" -> (entry_len_recording_vec(i-1) && resp.valid) 1008 ).foldLeft(Map[String, UInt]())(_+_) 1009 entry_len_map 1010 } 1011 val s1_entry_len_map = in_entry_len_map_gen(from_bpu.s1)("s1") 1012 val s2_entry_len_map = in_entry_len_map_gen(from_bpu.s2)("s2") 1013 // val s3_entry_len_map = in_entry_len_map_gen(from_bpu.s3)("s3") 1014 1015 val to_ifu = io.toIfu.req.bits 1016 val to_ifu_entry_len = (to_ifu.fallThruAddr - to_ifu.startAddr) >> instOffsetBits 1017 val to_ifu_entry_len_recording_vec = (1 to PredictWidth+1).map(i => to_ifu_entry_len === i.U) 1018 val to_ifu_entry_len_map = (1 to PredictWidth+1).map(i => 1019 f"to_ifu_ftb_entry_len_$i" -> (to_ifu_entry_len_recording_vec(i-1) && io.toIfu.req.fire) 1020 ).foldLeft(Map[String, UInt]())(_+_) 1021 1022 1023 1024 val commit_num_inst_recording_vec = (1 to PredictWidth).map(i => PopCount(commit_inst_mask) === i.U) 1025 val commit_num_inst_map = (1 to PredictWidth).map(i => 1026 f"commit_num_inst_$i" -> (commit_num_inst_recording_vec(i-1) && do_commit) 1027 ).foldLeft(Map[String, UInt]())(_+_) 1028 1029 1030 1031 val commit_jal_mask = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.hasJal.asTypeOf(UInt(1.W))) 1032 val commit_jalr_mask = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.hasJalr.asTypeOf(UInt(1.W))) 1033 val commit_call_mask = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.hasCall.asTypeOf(UInt(1.W))) 1034 val commit_ret_mask = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.hasRet.asTypeOf(UInt(1.W))) 1035 1036 1037 val mbpBRights = mbpRights & commit_br_mask 1038 val mbpJRights = mbpRights & commit_jal_mask 1039 val mbpIRights = mbpRights & commit_jalr_mask 1040 val mbpCRights = mbpRights & commit_call_mask 1041 val mbpRRights = mbpRights & commit_ret_mask 1042 1043 val mbpBWrongs = mbpWrongs & commit_br_mask 1044 val mbpJWrongs = mbpWrongs & commit_jal_mask 1045 val mbpIWrongs = mbpWrongs & commit_jalr_mask 1046 val mbpCWrongs = mbpWrongs & commit_call_mask 1047 val mbpRWrongs = mbpWrongs & commit_ret_mask 1048 1049 val commit_pred_stage = RegNext(pred_stage(commPtr.value)) 1050 1051 def pred_stage_map(src: UInt, name: String) = { 1052 (0 until numBpStages).map(i => 1053 f"${name}_stage_${i+1}" -> PopCount(src.asBools.map(_ && commit_pred_stage === BP_STAGES(i))) 1054 ).foldLeft(Map[String, UInt]())(_+_) 1055 } 1056 1057 val mispred_stage_map = pred_stage_map(mbpWrongs, "mispredict") 1058 val br_mispred_stage_map = pred_stage_map(mbpBWrongs, "br_mispredict") 1059 val jalr_mispred_stage_map = pred_stage_map(mbpIWrongs, "jalr_mispredict") 1060 val correct_stage_map = pred_stage_map(mbpRights, "correct") 1061 val br_correct_stage_map = pred_stage_map(mbpBRights, "br_correct") 1062 val jalr_correct_stage_map = pred_stage_map(mbpIRights, "jalr_correct") 1063 1064 val update_valid = io.toBpu.update.valid 1065 def u(cond: Bool) = update_valid && cond 1066 val ftb_false_hit = u(update.false_hit) 1067 // assert(!ftb_false_hit) 1068 val ftb_hit = u(commit_hit === h_hit) 1069 1070 val ftb_new_entry = u(ftbEntryGen.is_init_entry) 1071 val ftb_new_entry_only_br = ftb_new_entry && !update.ftb_entry.jmpValid 1072 val ftb_new_entry_only_jmp = ftb_new_entry && !update.ftb_entry.brValids(0) 1073 val ftb_new_entry_has_br_and_jmp = ftb_new_entry && update.ftb_entry.brValids(0) && update.ftb_entry.jmpValid 1074 1075 val ftb_old_entry = u(ftbEntryGen.is_old_entry) 1076 1077 val ftb_modified_entry = u(ftbEntryGen.is_new_br || ftbEntryGen.is_jalr_target_modified || ftbEntryGen.is_always_taken_modified) 1078 val ftb_modified_entry_new_br = u(ftbEntryGen.is_new_br) 1079 val ftb_modified_entry_jalr_target_modified = u(ftbEntryGen.is_jalr_target_modified) 1080 val ftb_modified_entry_br_full = ftb_modified_entry && ftbEntryGen.is_br_full 1081 val ftb_modified_entry_always_taken = ftb_modified_entry && ftbEntryGen.is_always_taken_modified 1082 1083 val ftb_entry_len = (ftbEntryGen.new_entry.getFallThrough(update.pc) - update.pc) >> instOffsetBits 1084 val ftb_entry_len_recording_vec = (1 to PredictWidth+1).map(i => ftb_entry_len === i.U) 1085 val ftb_init_entry_len_map = (1 to PredictWidth+1).map(i => 1086 f"ftb_init_entry_len_$i" -> (ftb_entry_len_recording_vec(i-1) && ftb_new_entry) 1087 ).foldLeft(Map[String, UInt]())(_+_) 1088 val ftb_modified_entry_len_map = (1 to PredictWidth+1).map(i => 1089 f"ftb_modified_entry_len_$i" -> (ftb_entry_len_recording_vec(i-1) && ftb_modified_entry) 1090 ).foldLeft(Map[String, UInt]())(_+_) 1091 1092 val ftq_occupancy_map = (0 to FtqSize).map(i => 1093 f"ftq_has_entry_$i" ->( validEntries === i.U) 1094 ).foldLeft(Map[String, UInt]())(_+_) 1095 1096 val perfCountsMap = Map( 1097 "BpInstr" -> PopCount(mbpInstrs), 1098 "BpBInstr" -> PopCount(mbpBRights | mbpBWrongs), 1099 "BpRight" -> PopCount(mbpRights), 1100 "BpWrong" -> PopCount(mbpWrongs), 1101 "BpBRight" -> PopCount(mbpBRights), 1102 "BpBWrong" -> PopCount(mbpBWrongs), 1103 "BpJRight" -> PopCount(mbpJRights), 1104 "BpJWrong" -> PopCount(mbpJWrongs), 1105 "BpIRight" -> PopCount(mbpIRights), 1106 "BpIWrong" -> PopCount(mbpIWrongs), 1107 "BpCRight" -> PopCount(mbpCRights), 1108 "BpCWrong" -> PopCount(mbpCWrongs), 1109 "BpRRight" -> PopCount(mbpRRights), 1110 "BpRWrong" -> PopCount(mbpRWrongs), 1111 1112 "ftb_false_hit" -> PopCount(ftb_false_hit), 1113 "ftb_hit" -> PopCount(ftb_hit), 1114 "ftb_new_entry" -> PopCount(ftb_new_entry), 1115 "ftb_new_entry_only_br" -> PopCount(ftb_new_entry_only_br), 1116 "ftb_new_entry_only_jmp" -> PopCount(ftb_new_entry_only_jmp), 1117 "ftb_new_entry_has_br_and_jmp" -> PopCount(ftb_new_entry_has_br_and_jmp), 1118 "ftb_old_entry" -> PopCount(ftb_old_entry), 1119 "ftb_modified_entry" -> PopCount(ftb_modified_entry), 1120 "ftb_modified_entry_new_br" -> PopCount(ftb_modified_entry_new_br), 1121 "ftb_jalr_target_modified" -> PopCount(ftb_modified_entry_jalr_target_modified), 1122 "ftb_modified_entry_br_full" -> PopCount(ftb_modified_entry_br_full), 1123 "ftb_modified_entry_always_taken" -> PopCount(ftb_modified_entry_always_taken) 1124 ) ++ ftb_init_entry_len_map ++ ftb_modified_entry_len_map ++ s1_entry_len_map ++ 1125 s2_entry_len_map ++ /* s3_entry_len_map ++ */ 1126 to_ifu_entry_len_map ++ commit_num_inst_map ++ ftq_occupancy_map ++ 1127 mispred_stage_map ++ br_mispred_stage_map ++ jalr_mispred_stage_map ++ 1128 correct_stage_map ++ br_correct_stage_map ++ jalr_correct_stage_map 1129 1130 for((key, value) <- perfCountsMap) { 1131 XSPerfAccumulate(key, value) 1132 } 1133 1134 // --------------------------- Debug -------------------------------- 1135 // XSDebug(enq_fire, p"enq! " + io.fromBpu.resp.bits.toPrintable) 1136 XSDebug(io.toIfu.req.fire, p"fire to ifu " + io.toIfu.req.bits.toPrintable) 1137 XSDebug(do_commit, p"deq! [ptr] $do_commit_ptr\n") 1138 XSDebug(true.B, p"[bpuPtr] $bpuPtr, [ifuPtr] $ifuPtr, [ifuWbPtr] $ifuWbPtr [commPtr] $commPtr\n") 1139 XSDebug(true.B, p"[in] v:${io.fromBpu.resp.valid} r:${io.fromBpu.resp.ready} " + 1140 p"[out] v:${io.toIfu.req.valid} r:${io.toIfu.req.ready}\n") 1141 XSDebug(do_commit, p"[deq info] cfiIndex: $commit_cfi, $commit_pc_bundle, target: ${Hexadecimal(commit_target)}\n") 1142 1143 // def ubtbCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = { 1144 // commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map { 1145 // case (((valid, pd), ans), taken) => 1146 // Mux(valid && pd.isBr, 1147 // isWrong ^ Mux(ans.hit.asBool, 1148 // Mux(ans.taken.asBool, taken && ans.target === commitEntry.target, 1149 // !taken), 1150 // !taken), 1151 // false.B) 1152 // } 1153 // } 1154 1155 // def btbCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = { 1156 // commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map { 1157 // case (((valid, pd), ans), taken) => 1158 // Mux(valid && pd.isBr, 1159 // isWrong ^ Mux(ans.hit.asBool, 1160 // Mux(ans.taken.asBool, taken && ans.target === commitEntry.target, 1161 // !taken), 1162 // !taken), 1163 // false.B) 1164 // } 1165 // } 1166 1167 // def tageCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = { 1168 // commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map { 1169 // case (((valid, pd), ans), taken) => 1170 // Mux(valid && pd.isBr, 1171 // isWrong ^ (ans.taken.asBool === taken), 1172 // false.B) 1173 // } 1174 // } 1175 1176 // def loopCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = { 1177 // commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map { 1178 // case (((valid, pd), ans), taken) => 1179 // Mux(valid && (pd.isBr) && ans.hit.asBool, 1180 // isWrong ^ (!taken), 1181 // false.B) 1182 // } 1183 // } 1184 1185 // def rasCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = { 1186 // commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map { 1187 // case (((valid, pd), ans), taken) => 1188 // Mux(valid && pd.isRet.asBool /*&& taken*/ && ans.hit.asBool, 1189 // isWrong ^ (ans.target === commitEntry.target), 1190 // false.B) 1191 // } 1192 // } 1193 1194 // val ubtbRights = ubtbCheck(commitEntry, commitEntry.metas.map(_.ubtbAns), false.B) 1195 // val ubtbWrongs = ubtbCheck(commitEntry, commitEntry.metas.map(_.ubtbAns), true.B) 1196 // // btb and ubtb pred jal and jalr as well 1197 // val btbRights = btbCheck(commitEntry, commitEntry.metas.map(_.btbAns), false.B) 1198 // val btbWrongs = btbCheck(commitEntry, commitEntry.metas.map(_.btbAns), true.B) 1199 // val tageRights = tageCheck(commitEntry, commitEntry.metas.map(_.tageAns), false.B) 1200 // val tageWrongs = tageCheck(commitEntry, commitEntry.metas.map(_.tageAns), true.B) 1201 1202 // val loopRights = loopCheck(commitEntry, commitEntry.metas.map(_.loopAns), false.B) 1203 // val loopWrongs = loopCheck(commitEntry, commitEntry.metas.map(_.loopAns), true.B) 1204 1205 // val rasRights = rasCheck(commitEntry, commitEntry.metas.map(_.rasAns), false.B) 1206 // val rasWrongs = rasCheck(commitEntry, commitEntry.metas.map(_.rasAns), true.B) 1207 val perfinfo = IO(new Bundle(){ 1208 val perfEvents = Output(new PerfEventsBundle(22)) 1209 }) 1210 val perfEvents = Seq( 1211 ("bpu_s2_redirect ", bpu_s2_redirect ), 1212 // ("bpu_s3_redirect ", bpu_s3_redirect ), 1213 ("bpu_to_ftq_stall ", enq.valid && ~enq.ready ), 1214 ("mispredictRedirect ", perf_redirect.valid && RedirectLevel.flushAfter === perf_redirect.bits.level), 1215 ("replayRedirect ", perf_redirect.valid && RedirectLevel.flushItself(perf_redirect.bits.level) ), 1216 ("predecodeRedirect ", fromIfuRedirect.valid ), 1217 ("to_ifu_bubble ", io.toIfu.req.ready && !io.toIfu.req.valid ), 1218 ("from_bpu_real_bubble ", !enq.valid && enq.ready && allowBpuIn ), 1219 ("BpInstr ", PopCount(mbpInstrs) ), 1220 ("BpBInstr ", PopCount(mbpBRights | mbpBWrongs) ), 1221 ("BpRight ", PopCount(mbpRights) ), 1222 ("BpWrong ", PopCount(mbpWrongs) ), 1223 ("BpBRight ", PopCount(mbpBRights) ), 1224 ("BpBWrong ", PopCount(mbpBWrongs) ), 1225 ("BpJRight ", PopCount(mbpJRights) ), 1226 ("BpJWrong ", PopCount(mbpJWrongs) ), 1227 ("BpIRight ", PopCount(mbpIRights) ), 1228 ("BpIWrong ", PopCount(mbpIWrongs) ), 1229 ("BpCRight ", PopCount(mbpCRights) ), 1230 ("BpCWrong ", PopCount(mbpCWrongs) ), 1231 ("BpRRight ", PopCount(mbpRRights) ), 1232 ("BpRWrong ", PopCount(mbpRWrongs) ), 1233 ("ftb_false_hit ", PopCount(ftb_false_hit) ), 1234 ("ftb_hit ", PopCount(ftb_hit) ), 1235 ) 1236 1237 for (((perf_out,(perf_name,perf)),i) <- perfinfo.perfEvents.perf_events.zip(perfEvents).zipWithIndex) { 1238 perf_out.incr_step := RegNext(perf) 1239 } 1240} 1241