1package xiangshan.frontend 2 3import chisel3._ 4import chisel3.util._ 5import xiangshan._ 6import utils._ 7 8class TableAddr(val idxBits: Int, val banks: Int) extends XSBundle { 9 def tagBits = VAddrBits - idxBits - 2 10 11 val tag = UInt(tagBits.W) 12 val idx = UInt(idxBits.W) 13 val offset = UInt(2.W) 14 15 def fromUInt(x: UInt) = x.asTypeOf(UInt(VAddrBits.W)).asTypeOf(this) 16 def getTag(x: UInt) = fromUInt(x).tag 17 def getIdx(x: UInt) = fromUInt(x).idx 18 def getBank(x: UInt) = getIdx(x)(log2Up(banks) - 1, 0) 19 def getBankIdx(x: UInt) = getIdx(x)(idxBits - 1, log2Up(banks)) 20} 21 22class BPU extends XSModule { 23 val io = IO(new Bundle() { 24 val flush = Input(Bool()) 25 val in = new Bundle { val pc = Flipped(Valid(UInt(VAddrBits.W))) } 26 // val out = new Bundle { val redirect = Valid(UInt(VAddrBits.W)) } 27 val predMask = Output(Vec(FetchWidth, Bool())) 28 val predTargets = Output(Vec(FetchWidth, UInt(VAddrBits.W))) 29 }) 30 31 val flush = BoolStopWatch(io.flush, io.in.pc.valid, startHighPriority = true) 32 33 // BTB makes a quick prediction for branch and direct jump, which is 34 // 4-way set-associative, and each way is divided into 4 banks. 35 val btbAddr = new TableAddr(log2Up(BtbSets), BtbBanks) 36 def btbEntry() = new Bundle { 37 val valid = Bool() 38 // TODO: don't need full length of tag and target 39 val tag = UInt(btbAddr.tagBits.W) 40 val _type = UInt(2.W) 41 val target = UInt(VAddrBits.W) 42 val pred = UInt(2.W) // 2-bit saturated counter as a quick predictor 43 } 44 45 val btb = List.fill(BtbBanks)(List.fill(BtbWays)( 46 Module(new SRAMTemplate(btbEntry(), set = BtbSets / BtbBanks, shouldReset = true, holdRead = true, singlePort = true)))) 47 48 // val fetchPkgAligned = btbAddr.getBank(io.in.pc.bits) === 0.U 49 val HeadBank = btbAddr.getBank(io.in.pc.bits) 50 val TailBank = btbAddr.getBank(io.in.pc.bits + FetchWidth.U << 2.U - 4.U) 51 for (b <- 0 until BtbBanks) { 52 for (w <- 0 until BtbWays) { 53 btb(b)(w).reset := reset.asBool 54 btb(b)(w).io.r.req.valid := io.in.pc.valid && Mux(TailBank > HeadBank, b.U >= HeadBank && b.U <= TailBank, b.U >= TailBank || b.U <= HeadBank) 55 btb(b)(w).io.r.req.bits.setIdx := btbAddr.getBankIdx(io.in.pc.bits) 56 } 57 } 58 // latch pc for 1 cycle latency when reading SRAM 59 val pcLatch = RegEnable(io.in.pc.bits, io.in.pc.valid) 60 val btbRead = Wire(Vec(BtbBanks, Vec(BtbWays, btbEntry()))) 61 val btbHits = Wire(Vec(FetchWidth, Bool())) 62 val btbTargets = Wire(Vec(FetchWidth, UInt(VAddrBits.W))) 63 val btbTypes = Wire(Vec(FetchWidth, UInt(2.W))) 64 // val btbPreds = Wire(Vec(FetchWidth, UInt(2.W))) 65 val btbTakens = Wire(Vec(FetchWidth, Bool())) 66 for (b <- 0 until BtbBanks) { 67 for (w <- 0 until BtbWays) { 68 btbRead(b)(w) := btb(b)(w).io.r.resp.data(0) 69 } 70 } 71 for (i <- 0 until FetchWidth) { 72 btbHits(i) := false.B 73 for (b <- 0 until BtbBanks) { 74 when (b.U === btbAddr.getBank(pcLatch)) { 75 for (w <- 0 until BtbWays) { 76 when (btbRead(b)(w).valid && btbRead(b)(w).tag === btbAddr.getTag(Cat(pcLatch(VAddrBits - 1, 2), 0.U(2.W)) + i.U << 2)) { 77 btbHits(i) := !flush && RegNext(btb(b)(w).io.r.req.fire(), init = false.B) 78 btbTargets(i) := btbRead(b)(w).target 79 btbTypes(i) := btbRead(b)(w)._type 80 // btbPreds(i) := btbRead(b)(w).pred 81 btbTakens(i) := (btbRead(b)(w).pred)(1).asBool 82 } 83 } 84 } 85 } 86 } 87 88 // JBTAC, divided into 8 banks, makes prediction for indirect jump except ret. 89 val jbtacAddr = new TableAddr(log2Up(JbtacSize), JbtacBanks) 90 def jbtacEntry() = new Bundle { 91 val valid = Bool() 92 // TODO: don't need full length of tag and target 93 val tag = UInt(jbtacAddr.tagBits.W) 94 val target = UInt(VAddrBits.W) 95 } 96 97 val jbtac = List.fill(JbtacBanks)(Module(new SRAMTemplate(jbtacEntry(), set = JbtacSize / JbtacBanks, shouldReset = true, holdRead = true, singlePort = true))) 98 99 (0 until JbtacBanks).map(i => jbtac(i).reset := reset.asBool) 100 (0 until JbtacBanks).map(i => jbtac(i).io.r.req.valid := io.in.pc.valid) 101 (0 until JbtacBanks).map(i => jbtac(i).io.r.req.bits.setIdx := jbtacAddr.getBankIdx(Cat((io.in.pc.bits)(VAddrBits - 1, 2), 0.U(2.W)) + i.U << 2)) 102 103 val jbtacRead = Wire(Vec(JbtacBanks, jbtacEntry())) 104 (0 until JbtacBanks).map(i => jbtacRead(i) := jbtac(i).io.r.resp.data(0)) 105 val jbtacHits = Wire(Vec(FetchWidth, Bool())) 106 val jbtacTargets = Wire(Vec(FetchWidth, UInt(VAddrBits.W))) 107 val jbtacHeadBank = jbtacAddr.getBank(Cat(pcLatch(VAddrBits - 1, 2), 0.U(2.W))) 108 for (i <- 0 until FetchWidth) { 109 jbtacHits(i) := false.B 110 for (b <- 0 until JbtacBanks) { 111 when (jbtacHeadBank + i.U === b.U) { 112 jbtacHits(i) := jbtacRead(b).valid && jbtacRead(b).tag === jbtacAddr.getTag(Cat(pcLatch(VAddrBits - 1, 2), 0.U(2.W)) + i.U << 2) && 113 !flush && RegNext(jbtac(b).io.r.req.fire(), init = false.B) 114 jbtacTargets(i) := jbtacRead(b).target 115 } 116 } 117 } 118 119 // redirect based on BTB and JBTAC 120 /* 121 val redirectMask = Wire(Vec(FetchWidth, Bool())) 122 val redirectTarget = Wire(Vec(FetchWidth, UInt(VAddrBits.W))) 123 (0 until FetchWidth).map(i => redirectMask(i) := btbHits(i) && Mux(btbTypes(i) === BTBtype.B, btbTakens(i), true.B) || jbtacHits(i)) 124 (0 until FetchWidth).map(i => redirectTarget(i) := Mux(btbHits(i) && !(btbTypes(i) === BTBtype.B && !btbTakens(i)), btbTargets(i), jbtacTargets(i))) 125 io.out.redirect.valid := redirectMask.asUInt.orR 126 io.out.redirect.bits := PriorityMux(redirectMask, redirectTarget) 127 */ 128 (0 until FetchWidth).map(i => io.predMask(i) := btbHits(i) && Mux(btbTypes(i) === BTBtype.B, btbTakens(i), true.B) || jbtacHits(i)) 129 (0 until FetchWidth).map(i => io.predTargets(i) := Mux(btbHits(i) && !(btbTypes(i) === BTBtype.B && !btbTakens(i)), btbTargets(i), jbtacTargets(i))) 130 131} 132