xref: /XiangShan/src/main/scala/xiangshan/frontend/RAS.scala (revision a0c65233389cccd2fdffe58236fb0a7dedf6d54f)
1/***************************************************************************************
2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3* Copyright (c) 2020-2021 Peng Cheng Laboratory
4*
5* XiangShan is licensed under Mulan PSL v2.
6* You can use this software according to the terms and conditions of the Mulan PSL v2.
7* You may obtain a copy of Mulan PSL v2 at:
8*          http://license.coscl.org.cn/MulanPSL2
9*
10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13*
14* See the Mulan PSL v2 for more details.
15***************************************************************************************/
16/*
17package xiangshan.frontend
18
19import chipsalliance.rocketchip.config.Parameters
20import chisel3._
21import chisel3.experimental.chiselName
22import chisel3.util._
23import utils._
24import utility._
25import xiangshan._
26
27import scala.{Tuple2 => &}
28
29
30class RASEntry()(implicit p: Parameters) extends XSBundle {
31    val retAddr = UInt(VAddrBits.W)
32    val ctr = UInt(8.W) // layer of nested call functions
33}
34
35@chiselName
36class RAS(implicit p: Parameters) extends BasePredictor {
37  object RASEntry {
38    def apply(retAddr: UInt, ctr: UInt): RASEntry = {
39      val e = Wire(new RASEntry)
40      e.retAddr := retAddr
41      e.ctr := ctr
42      e
43    }
44  }
45
46  @chiselName
47  class RASStack(val rasSize: Int) extends XSModule {
48    val io = IO(new Bundle {
49      val push_valid = Input(Bool())
50      val pop_valid = Input(Bool())
51      val spec_new_addr = Input(UInt(VAddrBits.W))
52
53      val recover_sp = Input(UInt(log2Up(rasSize).W))
54      val recover_top = Input(new RASEntry)
55      val recover_valid = Input(Bool())
56      val recover_push = Input(Bool())
57      val recover_pop = Input(Bool())
58      val recover_new_addr = Input(UInt(VAddrBits.W))
59
60      val sp = Output(UInt(log2Up(rasSize).W))
61      val top = Output(new RASEntry)
62    })
63
64    val debugIO = IO(new Bundle{
65        val spec_push_entry = Output(new RASEntry)
66        val spec_alloc_new = Output(Bool())
67        val recover_push_entry = Output(new RASEntry)
68        val recover_alloc_new = Output(Bool())
69        val sp = Output(UInt(log2Up(rasSize).W))
70        val topRegister = Output(new RASEntry)
71        val out_mem = Output(Vec(RasSize, new RASEntry))
72    })
73
74    val stack = Mem(RasSize, new RASEntry)
75    val sp = RegInit(0.U(log2Up(rasSize).W))
76    val top = RegInit(0.U.asTypeOf(new RASEntry()))
77    val topPtr = RegInit(0.U(log2Up(rasSize).W))
78
79    val wen = WireInit(false.B)
80    val write_bypass_entry = RegInit(0.U.asTypeOf(new RASEntry()))
81    val write_bypass_ptr = RegInit(0.U(log2Up(rasSize).W))
82    val write_bypass_valid = RegInit(false.B)
83    when (wen) {
84      write_bypass_valid := true.B
85    }.elsewhen (write_bypass_valid) {
86      write_bypass_valid := false.B
87    }
88
89    when (write_bypass_valid) {
90      stack(write_bypass_ptr) := write_bypass_entry
91    }
92
93    def ptrInc(ptr: UInt) = Mux(ptr === (rasSize-1).U, 0.U, ptr + 1.U)
94    def ptrDec(ptr: UInt) = Mux(ptr === 0.U, (rasSize-1).U, ptr - 1.U)
95
96    val spec_alloc_new = io.spec_new_addr =/= top.retAddr || top.ctr.andR
97    val recover_alloc_new = io.recover_new_addr =/= io.recover_top.retAddr || io.recover_top.ctr.andR
98
99    // TODO: fix overflow and underflow bugs
100    def update(recover: Bool)(do_push: Bool, do_pop: Bool, do_alloc_new: Bool,
101                              do_sp: UInt, do_top_ptr: UInt, do_new_addr: UInt,
102                              do_top: RASEntry) = {
103      when (do_push) {
104        when (do_alloc_new) {
105          sp     := ptrInc(do_sp)
106          topPtr := do_sp
107          top.retAddr := do_new_addr
108          top.ctr := 0.U
109          // write bypass
110          wen := true.B
111          write_bypass_entry := RASEntry(do_new_addr, 0.U)
112          write_bypass_ptr := do_sp
113        }.otherwise {
114          when (recover) {
115            sp := do_sp
116            topPtr := do_top_ptr
117            top.retAddr := do_top.retAddr
118          }
119          top.ctr := do_top.ctr + 1.U
120          // write bypass
121          wen := true.B
122          write_bypass_entry := RASEntry(do_new_addr, do_top.ctr + 1.U)
123          write_bypass_ptr := do_top_ptr
124        }
125      }.elsewhen (do_pop) {
126        when (do_top.ctr === 0.U) {
127          sp     := ptrDec(do_sp)
128          topPtr := ptrDec(do_top_ptr)
129          // read bypass
130          top :=
131            Mux(ptrDec(do_top_ptr) === write_bypass_ptr && write_bypass_valid,
132              write_bypass_entry,
133              stack.read(ptrDec(do_top_ptr))
134            )
135        }.otherwise {
136          when (recover) {
137            sp := do_sp
138            topPtr := do_top_ptr
139            top.retAddr := do_top.retAddr
140          }
141          top.ctr := do_top.ctr - 1.U
142          // write bypass
143          wen := true.B
144          write_bypass_entry := RASEntry(do_top.retAddr, do_top.ctr - 1.U)
145          write_bypass_ptr := do_top_ptr
146        }
147      }.otherwise {
148        when (recover) {
149          sp := do_sp
150          topPtr := do_top_ptr
151          top := do_top
152          // write bypass
153          wen := true.B
154          write_bypass_entry := do_top
155          write_bypass_ptr := do_top_ptr
156        }
157      }
158    }
159
160
161    update(io.recover_valid)(
162      Mux(io.recover_valid, io.recover_push,     io.push_valid),
163      Mux(io.recover_valid, io.recover_pop,      io.pop_valid),
164      Mux(io.recover_valid, recover_alloc_new,   spec_alloc_new),
165      Mux(io.recover_valid, io.recover_sp,       sp),
166      Mux(io.recover_valid, io.recover_sp - 1.U, topPtr),
167      Mux(io.recover_valid, io.recover_new_addr, io.spec_new_addr),
168      Mux(io.recover_valid, io.recover_top,      top))
169
170    io.sp := sp
171    io.top := top
172
173    val resetIdx = RegInit(0.U(log2Ceil(RasSize).W))
174    val do_reset = RegInit(true.B)
175    when (do_reset) {
176      stack.write(resetIdx, RASEntry(0x80000000L.U, 0.U))
177    }
178    resetIdx := resetIdx + do_reset
179    when (resetIdx === (RasSize-1).U) {
180      do_reset := false.B
181    }
182
183    debugIO.spec_push_entry := RASEntry(io.spec_new_addr, Mux(spec_alloc_new, 1.U, top.ctr + 1.U))
184    debugIO.spec_alloc_new := spec_alloc_new
185    debugIO.recover_push_entry := RASEntry(io.recover_new_addr, Mux(recover_alloc_new, 1.U, io.recover_top.ctr + 1.U))
186    debugIO.recover_alloc_new := recover_alloc_new
187    debugIO.sp := sp
188    debugIO.topRegister := top
189    for (i <- 0 until RasSize) {
190        debugIO.out_mem(i) := Mux(i.U === write_bypass_ptr && write_bypass_valid, write_bypass_entry, stack.read(i.U))
191    }
192  }
193
194  val spec = Module(new RASStack(RasSize))
195  val spec_ras = spec.io
196  val spec_top_addr = spec_ras.top.retAddr
197
198
199  val s2_spec_push = WireInit(false.B)
200  val s2_spec_pop = WireInit(false.B)
201  val s2_full_pred = io.in.bits.resp_in(0).s2.full_pred
202  // when last inst is an rvi call, fall through address would be set to the middle of it, so an addition is needed
203  val s2_spec_new_addr = s2_full_pred(2).fallThroughAddr + Mux(s2_full_pred(2).last_may_be_rvi_call, 2.U, 0.U)
204  spec_ras.push_valid := s2_spec_push
205  spec_ras.pop_valid  := s2_spec_pop
206  spec_ras.spec_new_addr := s2_spec_new_addr
207
208  // confirm that the call/ret is the taken cfi
209  s2_spec_push := io.s2_fire(2) && s2_full_pred(2).hit_taken_on_call && !io.s3_redirect(2)
210  s2_spec_pop  := io.s2_fire(2) && s2_full_pred(2).hit_taken_on_ret  && !io.s3_redirect(2)
211
212  val s2_jalr_target_dup = io.out.s2.full_pred.map(_.jalr_target)
213  val s2_last_target_in_dup = s2_full_pred.map(_.targets.last)
214  val s2_last_target_out_dup = io.out.s2.full_pred.map(_.targets.last)
215  val s2_is_jalr_dup = s2_full_pred.map(_.is_jalr)
216  val s2_is_ret_dup = s2_full_pred.map(_.is_ret)
217  // assert(is_jalr && is_ret || !is_ret)
218  val ras_enable_dup = dup(RegNext(io.ctrl.ras_enable))
219  for (ras_enable & s2_is_ret & s2_jalr_target <-
220    ras_enable_dup zip s2_is_ret_dup zip s2_jalr_target_dup) {
221      when(s2_is_ret && ras_enable) {
222        s2_jalr_target := spec_top_addr
223        // FIXME: should use s1 globally
224      }
225    }
226  for (s2_lto & s2_is_jalr & s2_jalr_target & s2_lti <-
227    s2_last_target_out_dup zip s2_is_jalr_dup zip s2_jalr_target_dup zip s2_last_target_in_dup) {
228      s2_lto := Mux(s2_is_jalr, s2_jalr_target, s2_lti)
229    }
230
231  val s3_top_dup = io.s2_fire.map(f => RegEnable(spec_ras.top, f))
232  val s3_sp = RegEnable(spec_ras.sp, io.s2_fire(2))
233  val s3_spec_new_addr = RegEnable(s2_spec_new_addr, io.s2_fire(2))
234
235  val s3_full_pred = io.in.bits.resp_in(0).s3.full_pred
236  val s3_jalr_target_dup = io.out.s3.full_pred.map(_.jalr_target)
237  val s3_last_target_in_dup = s3_full_pred.map(_.targets.last)
238  val s3_last_target_out_dup = io.out.s3.full_pred.map(_.targets.last)
239  val s3_is_jalr_dup = s3_full_pred.map(_.is_jalr)
240  val s3_is_ret_dup = s3_full_pred.map(_.is_ret)
241  // assert(is_jalr && is_ret || !is_ret)
242
243  for (ras_enable & s3_is_ret & s3_jalr_target & s3_top <-
244    ras_enable_dup zip s3_is_ret_dup zip s3_jalr_target_dup zip s3_top_dup) {
245      when(s3_is_ret && ras_enable) {
246        s3_jalr_target := s3_top.retAddr
247        // FIXME: should use s1 globally
248      }
249    }
250  for (s3_lto & s3_is_jalr & s3_jalr_target & s3_lti <-
251    s3_last_target_out_dup zip s3_is_jalr_dup zip s3_jalr_target_dup zip s3_last_target_in_dup) {
252      s3_lto := Mux(s3_is_jalr, s3_jalr_target, s3_lti)
253    }
254
255  val s3_pushed_in_s2 = RegEnable(s2_spec_push, io.s2_fire(2))
256  val s3_popped_in_s2 = RegEnable(s2_spec_pop,  io.s2_fire(2))
257  val s3_push = io.in.bits.resp_in(0).s3.full_pred(2).hit_taken_on_call
258  val s3_pop  = io.in.bits.resp_in(0).s3.full_pred(2).hit_taken_on_ret
259
260  val s3_recover = io.s3_fire(2) && (s3_pushed_in_s2 =/= s3_push || s3_popped_in_s2 =/= s3_pop)
261  io.out.last_stage_spec_info.rasSp  := s3_sp
262  io.out.last_stage_spec_info.rasTop := s3_top_dup(2)
263
264
265  val redirect = RegNext(io.redirect)
266  val do_recover = redirect.valid || s3_recover
267  val recover_cfi = redirect.bits.cfiUpdate
268
269  val retMissPred  = do_recover && redirect.bits.level === 0.U && recover_cfi.pd.isRet
270  val callMissPred = do_recover && redirect.bits.level === 0.U && recover_cfi.pd.isCall
271  // when we mispredict a call, we must redo a push operation
272  // similarly, when we mispredict a return, we should redo a pop
273  spec_ras.recover_valid := do_recover
274  spec_ras.recover_push := Mux(redirect.valid, callMissPred, s3_push)
275  spec_ras.recover_pop  := Mux(redirect.valid, retMissPred, s3_pop)
276
277  spec_ras.recover_sp  := Mux(redirect.valid, recover_cfi.rasSp, s3_sp)
278  spec_ras.recover_top := Mux(redirect.valid, recover_cfi.rasEntry, s3_top_dup(2))
279  spec_ras.recover_new_addr := Mux(redirect.valid, recover_cfi.pc + Mux(recover_cfi.pd.isRVC, 2.U, 4.U), s3_spec_new_addr)
280
281
282  XSPerfAccumulate("ras_s3_recover", s3_recover)
283  XSPerfAccumulate("ras_redirect_recover", redirect.valid)
284  XSPerfAccumulate("ras_s3_and_redirect_recover_at_the_same_time", s3_recover && redirect.valid)
285  // TODO: back-up stack for ras
286  // use checkpoint to recover RAS
287
288  val spec_debug = spec.debugIO
289  XSDebug("----------------RAS----------------\n")
290  XSDebug(" TopRegister: 0x%x   %d \n",spec_debug.topRegister.retAddr,spec_debug.topRegister.ctr)
291  XSDebug("  index       addr           ctr \n")
292  for(i <- 0 until RasSize){
293      XSDebug("  (%d)   0x%x      %d",i.U,spec_debug.out_mem(i).retAddr,spec_debug.out_mem(i).ctr)
294      when(i.U === spec_debug.sp){XSDebug(false,true.B,"   <----sp")}
295      XSDebug(false,true.B,"\n")
296  }
297  XSDebug(s2_spec_push, "s2_spec_push  inAddr: 0x%x  inCtr: %d |  allocNewEntry:%d |   sp:%d \n",
298  s2_spec_new_addr,spec_debug.spec_push_entry.ctr,spec_debug.spec_alloc_new,spec_debug.sp.asUInt)
299  XSDebug(s2_spec_pop, "s2_spec_pop  outAddr: 0x%x \n",io.out.s2.getTarget(2))
300  val s3_recover_entry = spec_debug.recover_push_entry
301  XSDebug(s3_recover && s3_push, "s3_recover_push  inAddr: 0x%x  inCtr: %d |  allocNewEntry:%d |   sp:%d \n",
302    s3_recover_entry.retAddr, s3_recover_entry.ctr, spec_debug.recover_alloc_new, s3_sp.asUInt)
303  XSDebug(s3_recover && s3_pop, "s3_recover_pop  outAddr: 0x%x \n",io.out.s3.getTarget(2))
304  val redirectUpdate = redirect.bits.cfiUpdate
305  XSDebug(do_recover && callMissPred, "redirect_recover_push\n")
306  XSDebug(do_recover && retMissPred, "redirect_recover_pop\n")
307  XSDebug(do_recover, "redirect_recover(SP:%d retAddr:%x ctr:%d) \n",
308      redirectUpdate.rasSp,redirectUpdate.rasEntry.retAddr,redirectUpdate.rasEntry.ctr)
309
310  generatePerfEvent()
311}
312*/