109c6f1ddSLingrui98/*************************************************************************************** 209c6f1ddSLingrui98* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences 309c6f1ddSLingrui98* Copyright (c) 2020-2021 Peng Cheng Laboratory 409c6f1ddSLingrui98* 509c6f1ddSLingrui98* XiangShan is licensed under Mulan PSL v2. 609c6f1ddSLingrui98* You can use this software according to the terms and conditions of the Mulan PSL v2. 709c6f1ddSLingrui98* You may obtain a copy of Mulan PSL v2 at: 809c6f1ddSLingrui98* http://license.coscl.org.cn/MulanPSL2 909c6f1ddSLingrui98* 1009c6f1ddSLingrui98* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 1109c6f1ddSLingrui98* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 1209c6f1ddSLingrui98* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 1309c6f1ddSLingrui98* 1409c6f1ddSLingrui98* See the Mulan PSL v2 for more details. 1509c6f1ddSLingrui98***************************************************************************************/ 1609c6f1ddSLingrui98 1709c6f1ddSLingrui98package xiangshan.frontend 1809c6f1ddSLingrui98 1909c6f1ddSLingrui98import chipsalliance.rocketchip.config.Parameters 2009c6f1ddSLingrui98import chisel3._ 2109c6f1ddSLingrui98import chisel3.experimental.chiselName 2209c6f1ddSLingrui98import chisel3.util._ 2309c6f1ddSLingrui98import utils._ 2409c6f1ddSLingrui98import xiangshan._ 2509c6f1ddSLingrui98 2609c6f1ddSLingrui98class RASEntry()(implicit p: Parameters) extends XSBundle { 2709c6f1ddSLingrui98 val retAddr = UInt(VAddrBits.W) 2809c6f1ddSLingrui98 val ctr = UInt(8.W) // layer of nested call functions 2909c6f1ddSLingrui98} 3009c6f1ddSLingrui98 3109c6f1ddSLingrui98@chiselName 3209c6f1ddSLingrui98class RAS(implicit p: Parameters) extends BasePredictor { 3309c6f1ddSLingrui98 object RASEntry { 3409c6f1ddSLingrui98 def apply(retAddr: UInt, ctr: UInt): RASEntry = { 3509c6f1ddSLingrui98 val e = Wire(new RASEntry) 3609c6f1ddSLingrui98 e.retAddr := retAddr 3709c6f1ddSLingrui98 e.ctr := ctr 3809c6f1ddSLingrui98 e 3909c6f1ddSLingrui98 } 4009c6f1ddSLingrui98 } 4109c6f1ddSLingrui98 4209c6f1ddSLingrui98 @chiselName 4309c6f1ddSLingrui98 class RASStack(val rasSize: Int) extends XSModule { 4409c6f1ddSLingrui98 val io = IO(new Bundle { 4509c6f1ddSLingrui98 val push_valid = Input(Bool()) 4609c6f1ddSLingrui98 val pop_valid = Input(Bool()) 4709c6f1ddSLingrui98 val spec_new_addr = Input(UInt(VAddrBits.W)) 4809c6f1ddSLingrui98 4909c6f1ddSLingrui98 val recover_sp = Input(UInt(log2Up(rasSize).W)) 5009c6f1ddSLingrui98 val recover_top = Input(new RASEntry) 5109c6f1ddSLingrui98 val recover_valid = Input(Bool()) 5209c6f1ddSLingrui98 val recover_push = Input(Bool()) 5309c6f1ddSLingrui98 val recover_pop = Input(Bool()) 5409c6f1ddSLingrui98 val recover_new_addr = Input(UInt(VAddrBits.W)) 5509c6f1ddSLingrui98 5609c6f1ddSLingrui98 val sp = Output(UInt(log2Up(rasSize).W)) 5709c6f1ddSLingrui98 val top = Output(new RASEntry) 5809c6f1ddSLingrui98 }) 5909c6f1ddSLingrui98 6009c6f1ddSLingrui98 val debugIO = IO(new Bundle{ 617e8709feSLingrui98 val spec_push_entry = Output(new RASEntry) 627e8709feSLingrui98 val spec_alloc_new = Output(Bool()) 637e8709feSLingrui98 val recover_push_entry = Output(new RASEntry) 647e8709feSLingrui98 val recover_alloc_new = Output(Bool()) 6509c6f1ddSLingrui98 val sp = Output(UInt(log2Up(rasSize).W)) 6609c6f1ddSLingrui98 val topRegister = Output(new RASEntry) 6709c6f1ddSLingrui98 val out_mem = Output(Vec(RasSize, new RASEntry)) 6809c6f1ddSLingrui98 }) 6909c6f1ddSLingrui98 7009c6f1ddSLingrui98 val stack = Mem(RasSize, new RASEntry) 7109c6f1ddSLingrui98 val sp = RegInit(0.U(log2Up(rasSize).W)) 7224334accSLingrui98 val top = Reg(new RASEntry()) 7309c6f1ddSLingrui98 val topPtr = RegInit(0.U(log2Up(rasSize).W)) 7409c6f1ddSLingrui98 75d0a8077aSLingrui98 val wen = WireInit(false.B) 76d0a8077aSLingrui98 val write_bypass_entry = Reg(new RASEntry()) 77eb6496c5SLingrui98 val write_bypass_ptr = RegInit(0.U(log2Up(rasSize).W)) 78eb6496c5SLingrui98 val write_bypass_valid = RegInit(false.B) 79d0a8077aSLingrui98 when (wen) { 80d0a8077aSLingrui98 write_bypass_valid := true.B 81d0a8077aSLingrui98 }.elsewhen (write_bypass_valid) { 82d0a8077aSLingrui98 write_bypass_valid := false.B 83d0a8077aSLingrui98 } 84d0a8077aSLingrui98 85d0a8077aSLingrui98 when (write_bypass_valid) { 86d0a8077aSLingrui98 stack(write_bypass_ptr) := write_bypass_entry 87d0a8077aSLingrui98 } 88d0a8077aSLingrui98 8909c6f1ddSLingrui98 def ptrInc(ptr: UInt) = Mux(ptr === (rasSize-1).U, 0.U, ptr + 1.U) 9009c6f1ddSLingrui98 def ptrDec(ptr: UInt) = Mux(ptr === 0.U, (rasSize-1).U, ptr - 1.U) 9109c6f1ddSLingrui98 927e8709feSLingrui98 val spec_alloc_new = io.spec_new_addr =/= top.retAddr || top.ctr.andR 9309c6f1ddSLingrui98 val recover_alloc_new = io.recover_new_addr =/= io.recover_top.retAddr || io.recover_top.ctr.andR 9409c6f1ddSLingrui98 9509c6f1ddSLingrui98 // TODO: fix overflow and underflow bugs 9609c6f1ddSLingrui98 def update(recover: Bool)(do_push: Bool, do_pop: Bool, do_alloc_new: Bool, 9709c6f1ddSLingrui98 do_sp: UInt, do_top_ptr: UInt, do_new_addr: UInt, 9809c6f1ddSLingrui98 do_top: RASEntry) = { 9909c6f1ddSLingrui98 when (do_push) { 10009c6f1ddSLingrui98 when (do_alloc_new) { 10109c6f1ddSLingrui98 sp := ptrInc(do_sp) 10209c6f1ddSLingrui98 topPtr := do_sp 10309c6f1ddSLingrui98 top.retAddr := do_new_addr 1047e8709feSLingrui98 top.ctr := 0.U 105d0a8077aSLingrui98 // write bypass 106d0a8077aSLingrui98 wen := true.B 107d0a8077aSLingrui98 write_bypass_entry := RASEntry(do_new_addr, 0.U) 108d0a8077aSLingrui98 write_bypass_ptr := do_sp 10909c6f1ddSLingrui98 }.otherwise { 11009c6f1ddSLingrui98 when (recover) { 11109c6f1ddSLingrui98 sp := do_sp 11209c6f1ddSLingrui98 topPtr := do_top_ptr 11309c6f1ddSLingrui98 top.retAddr := do_top.retAddr 11409c6f1ddSLingrui98 } 11509c6f1ddSLingrui98 top.ctr := do_top.ctr + 1.U 116d0a8077aSLingrui98 // write bypass 117d0a8077aSLingrui98 wen := true.B 118d0a8077aSLingrui98 write_bypass_entry := RASEntry(do_new_addr, do_top.ctr + 1.U) 119d0a8077aSLingrui98 write_bypass_ptr := do_top_ptr 12009c6f1ddSLingrui98 } 12109c6f1ddSLingrui98 }.elsewhen (do_pop) { 1227e8709feSLingrui98 when (do_top.ctr === 0.U) { 12309c6f1ddSLingrui98 sp := ptrDec(do_sp) 12409c6f1ddSLingrui98 topPtr := ptrDec(do_top_ptr) 125d0a8077aSLingrui98 // read bypass 126d0a8077aSLingrui98 top := 127d0a8077aSLingrui98 Mux(ptrDec(do_top_ptr) === write_bypass_ptr && write_bypass_valid, 128d0a8077aSLingrui98 write_bypass_entry, 129d0a8077aSLingrui98 stack.read(ptrDec(do_top_ptr)) 130d0a8077aSLingrui98 ) 13109c6f1ddSLingrui98 }.otherwise { 13209c6f1ddSLingrui98 when (recover) { 13309c6f1ddSLingrui98 sp := do_sp 13409c6f1ddSLingrui98 topPtr := do_top_ptr 13509c6f1ddSLingrui98 top.retAddr := do_top.retAddr 13609c6f1ddSLingrui98 } 13709c6f1ddSLingrui98 top.ctr := do_top.ctr - 1.U 138d0a8077aSLingrui98 // write bypass 139d0a8077aSLingrui98 wen := true.B 140d0a8077aSLingrui98 write_bypass_entry := RASEntry(do_top.retAddr, do_top.ctr - 1.U) 141d0a8077aSLingrui98 write_bypass_ptr := do_top_ptr 14209c6f1ddSLingrui98 } 14309c6f1ddSLingrui98 }.otherwise { 14409c6f1ddSLingrui98 when (recover) { 14509c6f1ddSLingrui98 sp := do_sp 14609c6f1ddSLingrui98 topPtr := do_top_ptr 14709c6f1ddSLingrui98 top := do_top 148d0a8077aSLingrui98 // write bypass 149d0a8077aSLingrui98 wen := true.B 150d0a8077aSLingrui98 write_bypass_entry := do_top 151d0a8077aSLingrui98 write_bypass_ptr := do_top_ptr 15209c6f1ddSLingrui98 } 15309c6f1ddSLingrui98 } 15409c6f1ddSLingrui98 } 15509c6f1ddSLingrui98 156d717fd1eSLingrui98 15709c6f1ddSLingrui98 update(io.recover_valid)( 15809c6f1ddSLingrui98 Mux(io.recover_valid, io.recover_push, io.push_valid), 15909c6f1ddSLingrui98 Mux(io.recover_valid, io.recover_pop, io.pop_valid), 1607e8709feSLingrui98 Mux(io.recover_valid, recover_alloc_new, spec_alloc_new), 16109c6f1ddSLingrui98 Mux(io.recover_valid, io.recover_sp, sp), 16209c6f1ddSLingrui98 Mux(io.recover_valid, io.recover_sp - 1.U, topPtr), 16309c6f1ddSLingrui98 Mux(io.recover_valid, io.recover_new_addr, io.spec_new_addr), 16409c6f1ddSLingrui98 Mux(io.recover_valid, io.recover_top, top)) 16509c6f1ddSLingrui98 16609c6f1ddSLingrui98 io.sp := sp 16709c6f1ddSLingrui98 io.top := top 16809c6f1ddSLingrui98 169*6fe623afSLingrui98 val resetIdx = RegInit(0.U(log2Ceil(RasSize).W)) 170*6fe623afSLingrui98 val do_reset = RegInit(true.B) 171*6fe623afSLingrui98 when (do_reset) { 172*6fe623afSLingrui98 stack.write(resetIdx, RASEntry(0x80000000L.U, 0.U)) 173*6fe623afSLingrui98 } 174*6fe623afSLingrui98 resetIdx := resetIdx + do_reset 175*6fe623afSLingrui98 when (resetIdx === (RasSize-1).U) { 176*6fe623afSLingrui98 do_reset := false.B 177*6fe623afSLingrui98 } 178*6fe623afSLingrui98 1797e8709feSLingrui98 debugIO.spec_push_entry := RASEntry(io.spec_new_addr, Mux(spec_alloc_new, 1.U, top.ctr + 1.U)) 1807e8709feSLingrui98 debugIO.spec_alloc_new := spec_alloc_new 1817e8709feSLingrui98 debugIO.recover_push_entry := RASEntry(io.recover_new_addr, Mux(recover_alloc_new, 1.U, io.recover_top.ctr + 1.U)) 1827e8709feSLingrui98 debugIO.recover_alloc_new := recover_alloc_new 18309c6f1ddSLingrui98 debugIO.sp := sp 18409c6f1ddSLingrui98 debugIO.topRegister := top 18509c6f1ddSLingrui98 for (i <- 0 until RasSize) { 186d0a8077aSLingrui98 debugIO.out_mem(i) := Mux(i.U === write_bypass_ptr && write_bypass_valid, write_bypass_entry, stack.read(i.U)) 18709c6f1ddSLingrui98 } 18809c6f1ddSLingrui98 } 18909c6f1ddSLingrui98 19009c6f1ddSLingrui98 val spec = Module(new RASStack(RasSize)) 19109c6f1ddSLingrui98 val spec_ras = spec.io 192cb4f77ceSLingrui98 val spec_top_addr = spec_ras.top.retAddr 19309c6f1ddSLingrui98 19409c6f1ddSLingrui98 195cb4f77ceSLingrui98 val s2_spec_push = WireInit(false.B) 196cb4f77ceSLingrui98 val s2_spec_pop = WireInit(false.B) 197f4ebc4b2SLingrui98 val s2_full_pred = io.in.bits.resp_in(0).s2.full_pred 198f4ebc4b2SLingrui98 // when last inst is an rvi call, fall through address would be set to the middle of it, so an addition is needed 199f4ebc4b2SLingrui98 val s2_spec_new_addr = s2_full_pred.fallThroughAddr + Mux(s2_full_pred.last_may_be_rvi_call, 2.U, 0.U) 200cb4f77ceSLingrui98 spec_ras.push_valid := s2_spec_push 201cb4f77ceSLingrui98 spec_ras.pop_valid := s2_spec_pop 202cb4f77ceSLingrui98 spec_ras.spec_new_addr := s2_spec_new_addr 20309c6f1ddSLingrui98 20409c6f1ddSLingrui98 // confirm that the call/ret is the taken cfi 205f4ebc4b2SLingrui98 s2_spec_push := io.s2_fire && s2_full_pred.hit_taken_on_call && !io.s3_redirect 206f4ebc4b2SLingrui98 s2_spec_pop := io.s2_fire && s2_full_pred.hit_taken_on_ret && !io.s3_redirect 20709c6f1ddSLingrui98 208c2d1ec7dSLingrui98 val s2_jalr_target = io.out.s2.full_pred.jalr_target 209f4ebc4b2SLingrui98 val s2_last_target_in = s2_full_pred.targets.last 210c2d1ec7dSLingrui98 val s2_last_target_out = io.out.s2.full_pred.targets.last 211f4ebc4b2SLingrui98 val s2_is_jalr = s2_full_pred.is_jalr 212f4ebc4b2SLingrui98 val s2_is_ret = s2_full_pred.is_ret 213b30c10d6SLingrui98 // assert(is_jalr && is_ret || !is_ret) 2146ee06c7aSSteve Gou when(s2_is_ret && io.ctrl.ras_enable) { 215cb4f77ceSLingrui98 s2_jalr_target := spec_top_addr 216b30c10d6SLingrui98 // FIXME: should use s1 globally 21709c6f1ddSLingrui98 } 218cb4f77ceSLingrui98 s2_last_target_out := Mux(s2_is_jalr, s2_jalr_target, s2_last_target_in) 21909c6f1ddSLingrui98 220cb4f77ceSLingrui98 val s3_top = RegEnable(spec_ras.top, io.s2_fire) 221cb4f77ceSLingrui98 val s3_sp = RegEnable(spec_ras.sp, io.s2_fire) 222cb4f77ceSLingrui98 val s3_spec_new_addr = RegEnable(s2_spec_new_addr, io.s2_fire) 223cb4f77ceSLingrui98 224c2d1ec7dSLingrui98 val s3_jalr_target = io.out.s3.full_pred.jalr_target 225cb4f77ceSLingrui98 val s3_last_target_in = io.in.bits.resp_in(0).s3.full_pred.targets.last 226c2d1ec7dSLingrui98 val s3_last_target_out = io.out.s3.full_pred.targets.last 227cb4f77ceSLingrui98 val s3_is_jalr = io.in.bits.resp_in(0).s3.full_pred.is_jalr 228cb4f77ceSLingrui98 val s3_is_ret = io.in.bits.resp_in(0).s3.full_pred.is_ret 229cb4f77ceSLingrui98 // assert(is_jalr && is_ret || !is_ret) 2306ee06c7aSSteve Gou when(s3_is_ret && io.ctrl.ras_enable) { 231cb4f77ceSLingrui98 s3_jalr_target := s3_top.retAddr 232cb4f77ceSLingrui98 // FIXME: should use s1 globally 233cb4f77ceSLingrui98 } 234cb4f77ceSLingrui98 s3_last_target_out := Mux(s3_is_jalr, s3_jalr_target, s3_last_target_in) 235cb4f77ceSLingrui98 236cb4f77ceSLingrui98 val s3_pushed_in_s2 = RegEnable(s2_spec_push, io.s2_fire) 237cb4f77ceSLingrui98 val s3_popped_in_s2 = RegEnable(s2_spec_pop, io.s2_fire) 2385df98e43SLingrui98 val s3_push = io.in.bits.resp_in(0).s3.full_pred.hit_taken_on_call 2395df98e43SLingrui98 val s3_pop = io.in.bits.resp_in(0).s3.full_pred.hit_taken_on_ret 240cb4f77ceSLingrui98 2415df98e43SLingrui98 val s3_recover = io.s3_fire && (s3_pushed_in_s2 =/= s3_push || s3_popped_in_s2 =/= s3_pop) 242c2d1ec7dSLingrui98 io.out.last_stage_spec_info.rasSp := s3_sp 243c2d1ec7dSLingrui98 io.out.last_stage_spec_info.rasTop := s3_top 24409c6f1ddSLingrui98 24509c6f1ddSLingrui98 24609c6f1ddSLingrui98 val redirect = RegNext(io.redirect) 247cb4f77ceSLingrui98 val do_recover = redirect.valid || s3_recover 24809c6f1ddSLingrui98 val recover_cfi = redirect.bits.cfiUpdate 24909c6f1ddSLingrui98 25009c6f1ddSLingrui98 val retMissPred = do_recover && redirect.bits.level === 0.U && recover_cfi.pd.isRet 25109c6f1ddSLingrui98 val callMissPred = do_recover && redirect.bits.level === 0.U && recover_cfi.pd.isCall 25209c6f1ddSLingrui98 // when we mispredict a call, we must redo a push operation 25309c6f1ddSLingrui98 // similarly, when we mispredict a return, we should redo a pop 25409c6f1ddSLingrui98 spec_ras.recover_valid := do_recover 2555df98e43SLingrui98 spec_ras.recover_push := Mux(redirect.valid, callMissPred, s3_push) 2565df98e43SLingrui98 spec_ras.recover_pop := Mux(redirect.valid, retMissPred, s3_pop) 25709c6f1ddSLingrui98 258cb4f77ceSLingrui98 spec_ras.recover_sp := Mux(redirect.valid, recover_cfi.rasSp, s3_sp) 259cb4f77ceSLingrui98 spec_ras.recover_top := Mux(redirect.valid, recover_cfi.rasEntry, s3_top) 260cb4f77ceSLingrui98 spec_ras.recover_new_addr := Mux(redirect.valid, recover_cfi.pc + Mux(recover_cfi.pd.isRVC, 2.U, 4.U), s3_spec_new_addr) 26109c6f1ddSLingrui98 262cb4f77ceSLingrui98 263cb4f77ceSLingrui98 XSPerfAccumulate("ras_s3_recover", s3_recover) 264cb4f77ceSLingrui98 XSPerfAccumulate("ras_redirect_recover", redirect.valid) 265cb4f77ceSLingrui98 XSPerfAccumulate("ras_s3_and_redirect_recover_at_the_same_time", s3_recover && redirect.valid) 26609c6f1ddSLingrui98 // TODO: back-up stack for ras 26709c6f1ddSLingrui98 // use checkpoint to recover RAS 26809c6f1ddSLingrui98 26909c6f1ddSLingrui98 val spec_debug = spec.debugIO 27009c6f1ddSLingrui98 XSDebug("----------------RAS----------------\n") 27109c6f1ddSLingrui98 XSDebug(" TopRegister: 0x%x %d \n",spec_debug.topRegister.retAddr,spec_debug.topRegister.ctr) 27209c6f1ddSLingrui98 XSDebug(" index addr ctr \n") 27309c6f1ddSLingrui98 for(i <- 0 until RasSize){ 27409c6f1ddSLingrui98 XSDebug(" (%d) 0x%x %d",i.U,spec_debug.out_mem(i).retAddr,spec_debug.out_mem(i).ctr) 27509c6f1ddSLingrui98 when(i.U === spec_debug.sp){XSDebug(false,true.B," <----sp")} 27609c6f1ddSLingrui98 XSDebug(false,true.B,"\n") 27709c6f1ddSLingrui98 } 2787e8709feSLingrui98 XSDebug(s2_spec_push, "s2_spec_push inAddr: 0x%x inCtr: %d | allocNewEntry:%d | sp:%d \n", 2797e8709feSLingrui98 s2_spec_new_addr,spec_debug.spec_push_entry.ctr,spec_debug.spec_alloc_new,spec_debug.sp.asUInt) 280c2d1ec7dSLingrui98 XSDebug(s2_spec_pop, "s2_spec_pop outAddr: 0x%x \n",io.out.s2.getTarget) 2817e8709feSLingrui98 val s3_recover_entry = spec_debug.recover_push_entry 2827e8709feSLingrui98 XSDebug(s3_recover && s3_push, "s3_recover_push inAddr: 0x%x inCtr: %d | allocNewEntry:%d | sp:%d \n", 2837e8709feSLingrui98 s3_recover_entry.retAddr, s3_recover_entry.ctr, spec_debug.recover_alloc_new, s3_sp.asUInt) 284c2d1ec7dSLingrui98 XSDebug(s3_recover && s3_pop, "s3_recover_pop outAddr: 0x%x \n",io.out.s3.getTarget) 28509c6f1ddSLingrui98 val redirectUpdate = redirect.bits.cfiUpdate 2867e8709feSLingrui98 XSDebug(do_recover && callMissPred, "redirect_recover_push\n") 2877e8709feSLingrui98 XSDebug(do_recover && retMissPred, "redirect_recover_pop\n") 2887e8709feSLingrui98 XSDebug(do_recover, "redirect_recover(SP:%d retAddr:%x ctr:%d) \n", 2897e8709feSLingrui98 redirectUpdate.rasSp,redirectUpdate.rasEntry.retAddr,redirectUpdate.rasEntry.ctr) 2904813e060SLingrui98 2914813e060SLingrui98 generatePerfEvent() 29209c6f1ddSLingrui98} 293