xref: /XiangShan/src/main/scala/xiangshan/frontend/IFU.scala (revision 6e962ad096fc22e07e37e258f79cdf715666e003)
1package xiangshan.frontend
2
3import chisel3._
4import chisel3.util._
5import device.RAMHelper
6import xiangshan._
7import utils._
8
9trait HasIFUConst { this: XSModule =>
10  val resetVector = 0x80000000L//TODO: set reset vec
11  val groupAlign = log2Up(FetchWidth * 4)
12  def groupPC(pc: UInt): UInt = Cat(pc(VAddrBits-1, groupAlign), 0.U(groupAlign.W))
13  def snpc(pc: UInt): UInt = pc + (1 << groupAlign).U
14
15}
16
17class IFUIO extends XSBundle
18{
19  val fetchPacket = DecoupledIO(new FetchPacket)
20  val redirect = Flipped(ValidIO(new Redirect))
21  val outOfOrderBrInfo = Flipped(ValidIO(new BranchUpdateInfo))
22  val inOrderBrInfo = Flipped(ValidIO(new BranchUpdateInfo))
23  val icacheReq = DecoupledIO(new FakeIcacheReq)
24  val icacheResp = Flipped(DecoupledIO(new FakeIcacheResp))
25}
26
27class BaseBPU extends XSModule {
28  val io = IO(new Bundle() {
29    val redirect = Flipped(ValidIO(new Redirect))
30    val outOfOrderBrInfo = Flipped(ValidIO(new BranchUpdateInfo))
31    val inOrderBrInfo = Flipped(ValidIO(new BranchUpdateInfo))
32    val in = new Bundle { val pc = Flipped(Valid(UInt(VAddrBits.W))) }
33    val btbOut = ValidIO(new BranchPrediction)
34    val tageOut = Decoupled(new BranchPrediction)
35    val predecode = Flipped(ValidIO(new Predecode))
36  })
37}
38
39class FakeBPU extends BaseBPU {
40
41  io.btbOut.valid := false.B
42  io.btbOut.bits <> DontCare
43  io.btbOut.bits.redirect := false.B
44  io.btbOut.bits.target := DontCare
45  io.tageOut.valid := false.B
46  io.tageOut.bits <> DontCare
47}
48
49
50class IFU extends XSModule with HasIFUConst
51{
52    val io = IO(new IFUIO)
53//    val bpu = if(EnableBPU) Module(new BPU) else Module(new FakeBPU)
54    val bpu = Module(new FakeBPU)
55
56    //-------------------------
57    //      IF1  PC update
58    //-------------------------
59    //local
60    val if1_npc = WireInit(0.U(VAddrBits.W))
61    val if1_valid = !reset.asBool
62    val if1_pc = RegInit(resetVector.U(VAddrBits.W))
63    //next
64    val if2_ready = WireInit(false.B)
65    val if2_snpc = snpc(if1_pc) //TODO: calculate snpc according to mask of current fetch packet
66    val needflush = WireInit(false.B)
67    // when an RVI instruction is predicted as taken and it crosses over two fetch packets,
68    // IFU should not take this branch but fetch the latter half of the instruction sequentially,
69    // and take the jump target in the next fetch cycle
70    val if2_lateJumpLatch = WireInit(false.B)
71    val if2_lateJumpTarget = RegInit(0.U(VAddrBits.W))
72    val if4_lateJumpLatch = WireInit(false.B)
73    val if4_lateJumpTarget = RegInit(0.U(VAddrBits.W))
74
75    //pipe fire
76    val if1_fire = if1_valid && if2_ready || needflush
77    val if1_pcUpdate = if1_fire || needflush
78
79    bpu.io.in.pc.valid := if1_fire
80    bpu.io.in.pc.bits := if1_npc
81    bpu.io.redirect := io.redirect
82    bpu.io.inOrderBrInfo := io.inOrderBrInfo
83    bpu.io.outOfOrderBrInfo := io.outOfOrderBrInfo
84
85    XSDebug("[IF1]if1_valid:%d  ||  if1_npc:0x%x  || if1_pcUpdate:%d if1_pc:0x%x  || if2_ready:%d",if1_valid,if1_npc,if1_pcUpdate,if1_pc,if2_ready)
86    XSDebug(false,if1_fire,"------IF1->fire!!!")
87    XSDebug(false,true.B,"\n")
88
89    //-------------------------
90    //      IF2  btb response
91    //           icache visit
92    //-------------------------
93    //local
94    val if2_valid = RegEnable(next=if1_valid,init=false.B,enable=if1_fire)
95    val if2_pc = if1_pc
96    val if2_btb_taken = bpu.io.btbOut.valid && bpu.io.btbOut.bits.redirect
97    val if2_btb_lateJump = WireInit(false.B)
98    val if2_btb_insMask = Mux(if2_btb_taken, bpu.io.btbOut.bits.instrValid.asUInt, Fill(FetchWidth*2, 1.U(1.W))) // TODO: FIX THIS
99    val if2_btb_target = Mux(if2_btb_lateJump, if2_snpc, bpu.io.btbOut.bits.target)
100
101    if2_lateJumpLatch := BoolStopWatch(if2_btb_lateJump, if1_fire, startHighPriority = true)
102    // since late jump target should be taken after the latter half of late jump instr is fetched, we need to latch this target
103    when (if2_btb_lateJump) {
104      if2_lateJumpTarget := bpu.io.btbOut.bits.target
105    }
106
107    //next
108    val if3_ready = WireInit(false.B)
109
110    //pipe fire
111    val if2_fire = if2_valid && if3_ready && io.icacheReq.fire()
112    if2_ready := (if2_fire) || !if2_valid
113
114    io.icacheReq.valid := if2_valid
115    io.icacheReq.bits.addr := if2_pc
116
117    when(RegNext(reset.asBool) && !reset.asBool){
118    //when((GTimer() === 501.U)){ //TODO:this is ugly
119      XSDebug("RESET....\n")
120      if1_npc := resetVector.U(VAddrBits.W)
121    }.elsewhen (if2_fire) {
122      if1_npc := Mux(if4_lateJumpLatch, if4_lateJumpTarget, Mux(if2_lateJumpLatch, if2_lateJumpTarget, if2_snpc))
123    }.otherwise {
124      if1_npc := if1_pc
125    }
126
127    when(if1_pcUpdate)
128    {
129      if1_pc := if1_npc
130    }
131
132    // when if2 fire and if2 redirects, update npc
133    when(if2_fire && if2_btb_taken)
134    {
135      if1_npc := if2_btb_target
136    }
137
138    bpu.io.in.pc.valid := if1_fire && !if2_btb_lateJump
139
140    XSDebug("[IF2]if2_valid:%d  ||  if2_pc:0x%x   || if3_ready:%d                                        ",if2_valid,if2_pc,if3_ready)
141    XSDebug(false,if2_fire,"------IF2->fire!!!")
142    XSDebug(false,true.B,"\n")
143    XSDebug("[IF2-Icache-Req] icache_in_valid:%d  icache_in_ready:%d\n",io.icacheReq.valid,io.icacheReq.ready)
144    XSDebug("[IF2-BPU-out]if2_btbTaken:%d || if2_btb_insMask:%b || if2_btb_target:0x%x \n",if2_btb_taken,if2_btb_insMask.asUInt,if2_btb_target)
145    //-------------------------
146    //      IF3  icache hit check
147    //-------------------------
148    //local
149    val if3_valid = RegEnable(next=if2_valid,init=false.B,enable=if2_fire)
150    val if3_pc = RegEnable(if2_pc,if2_fire)
151    val if3_npc = RegEnable(if1_npc, if2_fire)
152    val if3_btb_target = RegEnable(Mux(if2_lateJumpLatch, if2_lateJumpTarget, Mux(if2_btb_lateJump, bpu.io.btbOut.bits.target, if2_btb_target)), if2_fire)
153    val if3_btb_taken = RegEnable(Mux(if2_lateJumpLatch, true.B, if2_btb_taken), if2_fire)
154    val if3_btb_insMask = RegEnable(Mux(if2_lateJumpLatch, 1.U((FetchWidth*2).W), if2_btb_insMask), if2_fire)
155    val if3_btb_lateJump = RegEnable(if2_btb_lateJump, if2_fire)
156
157    //next
158    val if4_ready = WireInit(false.B)
159
160    //pipe fire
161    val if3_fire = if3_valid && if4_ready
162    if3_ready := if3_fire  || !if3_valid
163
164
165    XSDebug("[IF3]if3_valid:%d  ||  if3_pc:0x%x   if3_npc:0x%x || if4_ready:%d                    ",if3_valid,if3_pc,if3_npc,if4_ready)
166    XSDebug("[IF3]if3_btb_taken:%d if3_btb_insMask:%b if3_btb_lateJump:%d if3_btb_target:0x%x\n",
167      if3_btb_taken, if3_btb_insMask, if3_btb_lateJump, if3_btb_target)
168    XSDebug(false,if3_fire,"------IF3->fire!!!")
169    XSDebug(false,true.B,"\n")
170
171    //-------------------------
172    //      IF4  icache response
173    //           RAS result
174    //           taget generate
175    //-------------------------
176    val if4_valid = RegEnable(next=if3_valid,init=false.B,enable=if3_fire)
177    val if4_pc = RegEnable(if3_pc,if3_fire)
178    val if4_npc = RegEnable(if3_npc,if3_fire)
179    val if4_btb_target = RegEnable(if3_btb_target,if3_fire)
180    val if4_btb_taken = RegEnable(if3_btb_taken,if3_fire)
181    val if4_btb_insMask = RegEnable(if3_btb_insMask, if3_fire)
182    val if4_btb_lateJump = RegEnable(if3_btb_lateJump, if3_fire)
183    val if4_tage_taken = bpu.io.tageOut.valid && bpu.io.tageOut.bits.redirect
184    val if4_tage_lateJump = if4_tage_taken && bpu.io.tageOut.bits.lateJump && !io.redirect.valid
185    val if4_tage_insMask = bpu.io.tageOut.bits.instrValid
186    val if4_snpc = if4_pc + (PopCount(if4_tage_insMask) << 1.U)
187    val if4_tage_target = Mux(if4_tage_lateJump, if4_snpc, bpu.io.tageOut.bits.target)
188
189    if2_btb_lateJump := if2_btb_taken && bpu.io.btbOut.bits.lateJump && !io.redirect.valid && !if4_tage_taken
190
191    if4_lateJumpLatch := BoolStopWatch(if4_tage_lateJump, if1_fire, startHighPriority = true)
192    when (if4_tage_lateJump) {
193      if4_lateJumpTarget := bpu.io.tageOut.bits.target
194    }
195
196    bpu.io.in.pc.valid := if1_fire && !if2_btb_lateJump && !if4_tage_lateJump
197
198    XSDebug("[IF4]if4_valid:%d  ||  if4_pc:0x%x   if4_npc:0x%x\n",if4_valid,if4_pc,if4_npc)
199    XSDebug("[IF4]          if4_btb_taken:%d  if4_btb_lateJump:%d  if4_btb_insMask:%b  if4_btb_target:0x%x\n",if4_btb_taken, if4_btb_lateJump, if4_btb_insMask.asUInt, if4_btb_target)
200    XSDebug("[IF4-TAGE-out]if4_tage_taken:%d if4_tage_lateJump:%d if4_tage_insMask:%b if4_tage_target:0x%x\n",if4_tage_taken,if4_tage_lateJump,if4_tage_insMask.asUInt,if4_tage_target)
201    XSDebug("[IF4-ICACHE-RESP]icacheResp.valid:%d   icacheResp.ready:%d\n",io.icacheResp.valid,io.icacheResp.ready)
202
203    when(io.icacheResp.fire() && if4_tage_taken &&if4_valid)
204    {
205      if1_npc := if4_tage_target
206    }
207
208    //redirect: miss predict
209    when(io.redirect.valid){
210      if1_npc := io.redirect.bits.target
211    }
212    XSDebug(io.redirect.valid, "[IFU-REDIRECT] target:0x%x  \n", io.redirect.bits.target)
213
214
215    //flush pipline
216    // if(EnableBPD){needflush := (if4_valid && if4_tage_taken) || io.redirectInfo.flush() }
217    // else {needflush := io.redirectInfo.flush()}
218    needflush := (if4_valid && if4_tage_taken && io.icacheResp.fire()) || io.redirect.valid
219    when(needflush){
220      // if2_valid := false.B
221      if3_valid := false.B
222      if4_valid := false.B
223    }
224    //flush ICache
225    io.icacheReq.bits.flush := needflush
226
227    //Output -> iBuffer
228    //io.fetchPacket <> DontCare
229    if4_ready := io.fetchPacket.ready && (io.icacheResp.valid || !if4_valid) && (GTimer() > 500.U)
230    io.fetchPacket.valid := if4_valid && !io.redirect.valid
231    io.fetchPacket.bits.instrs := io.icacheResp.bits.icacheOut
232    /*
233    if(EnableBPU){
234      io.fetchPacket.bits.mask := Mux(if4_tage_taken, Fill(FetchWidth*2, 1.U(1.W)) & if4_tage_insMask.asUInt,
235                                  Mux(if4_btb_taken,  Fill(FetchWidth*2, 1.U(1.W)) & if4_btb_insMask.asUInt,
236                                                      Fill(FetchWidth*2, 1.U(1.W)))
237      )
238    }
239    else{
240      io.fetchPacket.bits.mask := Fill(FetchWidth*2, 1.U(1.W)) //TODO : consider cross cacheline fetch
241    }
242    */
243    io.fetchPacket.bits.mask := Mux(if4_lateJumpLatch,  1.U((FetchWidth*2).W),
244                                Mux(if4_tage_taken,     Fill(FetchWidth*2, 1.U(1.W)) & if4_tage_insMask.asUInt,
245                                                        Fill(FetchWidth*2, 1.U(1.W)) & if4_btb_insMask.asUInt))
246    io.fetchPacket.bits.pc := if4_pc
247
248    XSDebug(io.fetchPacket.fire,"[IFU-Out-FetchPacket] starPC:0x%x   GroupPC:0x%xn\n",if4_pc.asUInt,groupPC(if4_pc).asUInt)
249    XSDebug(io.fetchPacket.fire,"[IFU-Out-FetchPacket] instrmask %b\n",io.fetchPacket.bits.mask.asUInt)
250    for(i <- 0 until (FetchWidth*2)) {
251      when (if4_btb_taken && !if4_tage_taken && i.U === OHToUInt(HighestBit(if4_btb_insMask.asUInt, FetchWidth*2))) {
252        io.fetchPacket.bits.pnpc(i) := if4_btb_target
253        if (i != 0) {
254          when (!io.icacheResp.bits.predecode.isRVC(i) && !if4_btb_lateJump) {
255            io.fetchPacket.bits.pnpc(i-1) := if4_btb_target
256          }
257        }
258      }.elsewhen (if4_tage_taken && i.U === OHToUInt(HighestBit(if4_tage_insMask.asUInt, FetchWidth*2))) {
259        io.fetchPacket.bits.pnpc(i) := Mux(if4_tage_lateJump, bpu.io.tageOut.bits.target, if4_tage_target)
260        if (i != 0) {
261          when (!io.icacheResp.bits.predecode.isRVC(i) && !if4_tage_lateJump) {
262            io.fetchPacket.bits.pnpc(i-1) := if4_tage_target
263          }
264        }
265      }.otherwise {
266        io.fetchPacket.bits.pnpc(i) := if4_pc + (i.U << 1.U) + Mux(io.icacheResp.bits.predecode.isRVC(i), 2.U, 4.U)
267      }
268      XSDebug(io.fetchPacket.fire,"[IFU-Out-FetchPacket] instruction %x    pnpc:0x%x\n",
269        Mux((i.U)(0), io.fetchPacket.bits.instrs(i>>1)(31,16), io.fetchPacket.bits.instrs(i>>1)(15,0)),
270        io.fetchPacket.bits.pnpc(i))
271    }
272    io.fetchPacket.bits.hist := bpu.io.tageOut.bits.hist
273    // io.fetchPacket.bits.btbVictimWay := bpu.io.tageOut.bits.btbVictimWay
274    io.fetchPacket.bits.predCtr := bpu.io.tageOut.bits.predCtr
275    io.fetchPacket.bits.btbHit := bpu.io.tageOut.bits.btbHit
276    io.fetchPacket.bits.tageMeta := bpu.io.tageOut.bits.tageMeta
277    io.fetchPacket.bits.rasSp := bpu.io.tageOut.bits.rasSp
278    io.fetchPacket.bits.rasTopCtr := bpu.io.tageOut.bits.rasTopCtr
279    bpu.io.tageOut.ready := io.fetchPacket.ready
280
281    //to BPU
282    bpu.io.predecode.valid := io.icacheResp.fire() && if4_valid
283    bpu.io.predecode.bits <> io.icacheResp.bits.predecode
284    //TODO: consider RVC && consider cross cacheline fetch
285    bpu.io.predecode.bits.mask := Fill(FetchWidth*2, 1.U(1.W))
286    bpu.io.predecode.bits.isRVC := 0.U.asTypeOf(Vec(FetchWidth*2, Bool()))
287    io.icacheResp.ready := io.fetchPacket.ready && (GTimer() > 500.U)
288
289}