1/*************************************************************************************** 2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences 3* Copyright (c) 2020-2021 Peng Cheng Laboratory 4* 5* XiangShan is licensed under Mulan PSL v2. 6* You can use this software according to the terms and conditions of the Mulan PSL v2. 7* You may obtain a copy of Mulan PSL v2 at: 8* http://license.coscl.org.cn/MulanPSL2 9* 10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 13* 14* See the Mulan PSL v2 for more details. 15***************************************************************************************/ 16 17package xiangshan.mem.mdp 18 19import org.chipsalliance.cde.config.Parameters 20import chisel3._ 21import chisel3.util._ 22import xiangshan._ 23import utils._ 24import utility._ 25import xiangshan.backend.rob.RobPtr 26 27// store set load violation predictor 28// See "Memory Dependence Prediction using Store Sets" for details 29 30// Store Set Identifier Table Entry 31class SSITEntry(implicit p: Parameters) extends XSBundle { 32 val valid = Bool() 33 val ssid = UInt(SSIDWidth.W) // store set identifier 34 val strict = Bool() // strict load wait is needed 35} 36 37// Store Set Identifier Table Entry 38class SSITDataEntry(implicit p: Parameters) extends XSBundle { 39 val ssid = UInt(SSIDWidth.W) // store set identifier 40 val strict = Bool() // strict load wait is needed 41} 42 43// Store Set Identifier Table 44class SSIT(implicit p: Parameters) extends XSModule { 45 val io = IO(new Bundle { 46 // to decode 47 val raddr = Vec(DecodeWidth, Input(UInt(MemPredPCWidth.W))) // xor hashed decode pc(VaddrBits-1, 1) 48 // to rename 49 val rdata = Vec(RenameWidth, Output(new SSITEntry)) 50 // misc 51 val update = Input(new MemPredUpdateReq) // RegNext should be added outside 52 val csrCtrl = Input(new CustomCSRCtrlIO) 53 }) 54 55 // raddrs are sent to ssit in decode 56 // rdata will be send to rename 57 require(DecodeWidth == RenameWidth) 58 59 // data sram read port allocate 60 // 61 // SSIT update logic will reuse decode ssit read port. 62 // If io.update.valid, a redirect will be send to frontend, 63 // then decode will not need to read SSIT 64 val SSIT_DECODE_READ_PORT_BASE = 0 65 val SSIT_UPDATE_LOAD_READ_PORT = 0 66 val SSIT_UPDATE_STORE_READ_PORT = 1 67 val SSIT_READ_PORT_NUM = DecodeWidth 68 69 // data sram write port allocate 70 // load update and flush uses the same write port 71 val SSIT_MISC_WRITE_PORT = 0 72 val SSIT_UPDATE_LOAD_WRITE_PORT = 0 73 val SSIT_UPDATE_STORE_WRITE_PORT = 1 74 val SSIT_WRITE_PORT_NUM = 2 75 76 val valid_array = Module(new SyncDataModuleTemplate( 77 Bool(), 78 SSITSize, 79 SSIT_READ_PORT_NUM, 80 SSIT_WRITE_PORT_NUM 81 )) 82 83 val data_array = Module(new SyncDataModuleTemplate( 84 new SSITDataEntry, 85 SSITSize, 86 SSIT_READ_PORT_NUM, 87 SSIT_WRITE_PORT_NUM 88 )) 89 90 // TODO: use SRAM or not? 91 (0 until SSIT_WRITE_PORT_NUM).map(i => { 92 valid_array.io.wen(i) := false.B 93 valid_array.io.waddr(i) := 0.U 94 valid_array.io.wdata(i) := false.B 95 data_array.io.wen(i) := false.B 96 data_array.io.waddr(i) := 0.U 97 data_array.io.wdata(i) := 0.U.asTypeOf(new SSITDataEntry) 98 }) 99 100 val debug_valid = RegInit(VecInit(Seq.fill(SSITSize)(false.B))) 101 val debug_ssid = Reg(Vec(SSITSize, UInt(SSIDWidth.W))) 102 val debug_strict = Reg(Vec(SSITSize, Bool())) 103 if(!env.FPGAPlatform){ 104 dontTouch(debug_valid) 105 dontTouch(debug_ssid) 106 dontTouch(debug_strict) 107 } 108 109 val resetCounter = RegInit(0.U(ResetTimeMax2Pow.W)) 110 resetCounter := resetCounter + 1.U 111 112 for (i <- 0 until DecodeWidth) { 113 // io.rdata(i).valid := RegNext(valid(io.raddr(i))) 114 // io.rdata(i).ssid := RegNext(ssid(io.raddr(i))) 115 // io.rdata(i).strict := RegNext(strict(io.raddr(i)) && valid(io.raddr(i))) 116 117 // read SSIT in decode stage 118 valid_array.io.raddr(i) := io.raddr(i) 119 data_array.io.raddr(i) := io.raddr(i) 120 121 // gen result in rename stage 122 io.rdata(i).valid := valid_array.io.rdata(i) 123 io.rdata(i).ssid := data_array.io.rdata(i).ssid 124 io.rdata(i).strict := data_array.io.rdata(i).strict 125 } 126 127 // flush SSIT 128 // reset period: ResetTimeMax2Pow 129 val resetStepCounter = RegInit(0.U(log2Up(SSITSize + 1).W)) 130 val s_idle :: s_flush :: Nil = Enum(2) 131 val state = RegInit(s_flush) 132 133 switch (state) { 134 is(s_idle) { 135 when(resetCounter(ResetTimeMax2Pow - 1, ResetTimeMin2Pow)(RegNext(io.csrCtrl.lvpred_timeout))) { 136 state := s_flush 137 resetCounter := 0.U 138 } 139 } 140 is(s_flush) { 141 when(resetStepCounter === (SSITSize - 1).U) { 142 state := s_idle // reset finished 143 resetStepCounter := 0.U 144 }.otherwise{ 145 resetStepCounter := resetStepCounter + 1.U 146 } 147 valid_array.io.wen(SSIT_MISC_WRITE_PORT) := true.B 148 valid_array.io.waddr(SSIT_MISC_WRITE_PORT) := resetStepCounter 149 valid_array.io.wdata(SSIT_MISC_WRITE_PORT) := false.B 150 debug_valid(resetStepCounter) := false.B 151 } 152 } 153 XSPerfAccumulate("reset_timeout", state === s_flush && resetCounter === 0.U) 154 155 // update SSIT if load violation redirect is detected 156 157 // update stage 0: read ssit 158 val s1_mempred_update_req_valid = RegNext(io.update.valid) 159 val s1_mempred_update_req = RegEnable(io.update, io.update.valid) 160 161 // when io.update.valid, take over ssit read port 162 when (io.update.valid) { 163 valid_array.io.raddr(SSIT_UPDATE_LOAD_READ_PORT) := io.update.ldpc 164 valid_array.io.raddr(SSIT_UPDATE_STORE_READ_PORT) := io.update.stpc 165 data_array.io.raddr(SSIT_UPDATE_LOAD_READ_PORT) := io.update.ldpc 166 data_array.io.raddr(SSIT_UPDATE_STORE_READ_PORT) := io.update.stpc 167 } 168 169 // update stage 1: get ssit read result 170 171 // Read result 172 // load has already been assigned with a store set 173 val s1_loadAssigned = valid_array.io.rdata(SSIT_UPDATE_LOAD_READ_PORT) 174 val s1_loadOldSSID = data_array.io.rdata(SSIT_UPDATE_LOAD_READ_PORT).ssid 175 val s1_loadStrict = data_array.io.rdata(SSIT_UPDATE_LOAD_READ_PORT).strict 176 // store has already been assigned with a store set 177 val s1_storeAssigned = valid_array.io.rdata(SSIT_UPDATE_STORE_READ_PORT) 178 val s1_storeOldSSID = data_array.io.rdata(SSIT_UPDATE_STORE_READ_PORT).ssid 179 val s1_storeStrict = data_array.io.rdata(SSIT_UPDATE_STORE_READ_PORT).strict 180 // val s1_ssidIsSame = s1_loadOldSSID === s1_storeOldSSID 181 182 // update stage 2, update ssit data_array 183 val s2_mempred_update_req_valid = RegNext(s1_mempred_update_req_valid) 184 val s2_mempred_update_req = RegEnable(s1_mempred_update_req, s1_mempred_update_req_valid) 185 val s2_loadAssigned = RegEnable(s1_loadAssigned, s1_mempred_update_req_valid) 186 val s2_storeAssigned = RegEnable(s1_storeAssigned, s1_mempred_update_req_valid) 187 val s2_loadOldSSID = RegEnable(s1_loadOldSSID, s1_mempred_update_req_valid) 188 val s2_storeOldSSID = RegEnable(s1_storeOldSSID, s1_mempred_update_req_valid) 189 val s2_loadStrict = RegEnable(s1_loadStrict, s1_mempred_update_req_valid) 190 191 val s2_ssidIsSame = s2_loadOldSSID === s2_storeOldSSID 192 // for now we just use lowest bits of ldpc as store set id 193 val s2_ldSsidAllocate = XORFold(s2_mempred_update_req.ldpc, SSIDWidth) 194 val s2_stSsidAllocate = XORFold(s2_mempred_update_req.stpc, SSIDWidth) 195 // both the load and the store have already been assigned store sets 196 // but load's store set ID is smaller 197 val s2_winnerSSID = Mux(s2_loadOldSSID < s2_storeOldSSID, s2_loadOldSSID, s2_storeOldSSID) 198 199 def update_ld_ssit_entry(pc: UInt, valid: Bool, ssid: UInt, strict: Bool) = { 200 valid_array.io.wen(SSIT_UPDATE_LOAD_WRITE_PORT) := true.B 201 valid_array.io.waddr(SSIT_UPDATE_LOAD_WRITE_PORT) := pc 202 valid_array.io.wdata(SSIT_UPDATE_LOAD_WRITE_PORT) := valid 203 data_array.io.wen(SSIT_UPDATE_LOAD_WRITE_PORT) := true.B 204 data_array.io.waddr(SSIT_UPDATE_LOAD_WRITE_PORT) := pc 205 data_array.io.wdata(SSIT_UPDATE_LOAD_WRITE_PORT).ssid := ssid 206 data_array.io.wdata(SSIT_UPDATE_LOAD_WRITE_PORT).strict := strict 207 debug_valid(pc) := valid 208 debug_ssid(pc) := ssid 209 debug_strict(pc) := strict 210 } 211 212 def update_st_ssit_entry(pc: UInt, valid: Bool, ssid: UInt, strict: Bool) = { 213 valid_array.io.wen(SSIT_UPDATE_STORE_WRITE_PORT) := true.B 214 valid_array.io.waddr(SSIT_UPDATE_STORE_WRITE_PORT) := pc 215 valid_array.io.wdata(SSIT_UPDATE_STORE_WRITE_PORT):= valid 216 data_array.io.wen(SSIT_UPDATE_STORE_WRITE_PORT) := true.B 217 data_array.io.waddr(SSIT_UPDATE_STORE_WRITE_PORT) := pc 218 data_array.io.wdata(SSIT_UPDATE_STORE_WRITE_PORT).ssid := ssid 219 data_array.io.wdata(SSIT_UPDATE_STORE_WRITE_PORT).strict := strict 220 debug_valid(pc) := valid 221 debug_ssid(pc) := ssid 222 debug_strict(pc) := strict 223 } 224 225 when(s2_mempred_update_req_valid){ 226 switch (Cat(s2_loadAssigned, s2_storeAssigned)) { 227 // 1. "If neither the load nor the store has been assigned a store set, 228 // two are allocated and assigned to each instruction." 229 is ("b00".U(2.W)) { 230 update_ld_ssit_entry( 231 pc = s2_mempred_update_req.ldpc, 232 valid = true.B, 233 ssid = s2_ldSsidAllocate, 234 strict = false.B 235 ) 236 update_st_ssit_entry( 237 pc = s2_mempred_update_req.stpc, 238 valid = true.B, 239 ssid = s2_stSsidAllocate, 240 strict = false.B 241 ) 242 } 243 // 2. "If the load has been assigned a store set, but the store has not, 244 // one is allocated and assigned to the store instructions." 245 is ("b10".U(2.W)) { 246 update_st_ssit_entry( 247 pc = s2_mempred_update_req.stpc, 248 valid = true.B, 249 ssid = s2_stSsidAllocate, 250 strict = false.B 251 ) 252 } 253 // 3. "If the store has been assigned a store set, but the load has not, 254 // one is allocated and assigned to the load instructions." 255 is ("b01".U(2.W)) { 256 update_ld_ssit_entry( 257 pc = s2_mempred_update_req.ldpc, 258 valid = true.B, 259 ssid = s2_ldSsidAllocate, 260 strict = false.B 261 ) 262 } 263 // 4. "If both the load and the store have already been assigned store sets, 264 // one of the two store sets is declared the "winner". 265 // The instruction belonging to the loser’s store set is assigned the winner’s store set." 266 is ("b11".U(2.W)) { 267 update_ld_ssit_entry( 268 pc = s2_mempred_update_req.ldpc, 269 valid = true.B, 270 ssid = s2_winnerSSID, 271 strict = false.B 272 ) 273 update_st_ssit_entry( 274 pc = s2_mempred_update_req.stpc, 275 valid = true.B, 276 ssid = s2_winnerSSID, 277 strict = false.B 278 ) 279 when(s2_ssidIsSame){ 280 data_array.io.wdata(SSIT_UPDATE_LOAD_READ_PORT).strict := true.B 281 debug_strict(s2_mempred_update_req.ldpc) := true.B 282 } 283 } 284 } 285 } 286 287 // make SyncDataModuleTemplate happy 288 when(valid_array.io.waddr(SSIT_UPDATE_LOAD_WRITE_PORT) === valid_array.io.waddr(SSIT_UPDATE_STORE_WRITE_PORT)){ 289 valid_array.io.wen(SSIT_UPDATE_STORE_WRITE_PORT) := false.B 290 } 291 292 when(data_array.io.waddr(SSIT_UPDATE_LOAD_WRITE_PORT) === data_array.io.waddr(SSIT_UPDATE_STORE_WRITE_PORT)){ 293 data_array.io.wen(SSIT_UPDATE_STORE_WRITE_PORT) := false.B 294 } 295 296 XSPerfAccumulate("ssit_update_lxsx", s2_mempred_update_req_valid && !s2_loadAssigned && !s2_storeAssigned) 297 XSPerfAccumulate("ssit_update_lysx", s2_mempred_update_req_valid && s2_loadAssigned && !s2_storeAssigned) 298 XSPerfAccumulate("ssit_update_lxsy", s2_mempred_update_req_valid && !s2_loadAssigned && s2_storeAssigned) 299 XSPerfAccumulate("ssit_update_lysy", s2_mempred_update_req_valid && s2_loadAssigned && s2_storeAssigned) 300 XSPerfAccumulate("ssit_update_should_strict", s2_mempred_update_req_valid && s2_ssidIsSame && s2_loadAssigned && s2_storeAssigned) 301 XSPerfAccumulate("ssit_update_strict_failed", 302 s2_mempred_update_req_valid && s2_ssidIsSame && s2_loadStrict && s2_loadAssigned && s2_storeAssigned 303 ) // should be zero 304 305 // debug 306 when (s2_mempred_update_req.valid) { 307 XSDebug("%d: SSIT update: load pc %x store pc %x\n", GTimer(), s2_mempred_update_req.ldpc, s2_mempred_update_req.stpc) 308 XSDebug("%d: SSIT update: load valid %b ssid %x store valid %b ssid %x\n", GTimer(), s2_loadAssigned, s2_loadOldSSID, s2_storeAssigned, s2_storeOldSSID) 309 } 310} 311 312 313// Last Fetched Store Table Entry 314class LFSTEntry(implicit p: Parameters) extends XSBundle { 315 val valid = Bool() 316 val robIdx = new RobPtr 317} 318 319class LFSTReq(implicit p: Parameters) extends XSBundle { 320 val isstore = Bool() 321 val ssid = UInt(SSIDWidth.W) // use ssid to lookup LFST 322 val robIdx = new RobPtr 323} 324 325class LFSTResp(implicit p: Parameters) extends XSBundle { 326 val shouldWait = Bool() 327 val robIdx = new RobPtr 328} 329 330class DispatchLFSTIO(implicit p: Parameters) extends XSBundle { 331 val req = Vec(RenameWidth, Valid(new LFSTReq)) 332 val resp = Vec(RenameWidth, Flipped(Valid(new LFSTResp))) 333} 334 335// Last Fetched Store Table 336class LFST(implicit p: Parameters) extends XSModule { 337 val io = IO(new Bundle { 338 // when redirect, mark canceled store as invalid 339 val redirect = Input(Valid(new Redirect)) 340 val dispatch = Flipped(new DispatchLFSTIO) 341 // when store issued, mark store as invalid 342 val storeIssue = Vec(exuParameters.StuCnt, Flipped(Valid(new ExuInput))) 343 val csrCtrl = Input(new CustomCSRCtrlIO) 344 }) 345 346 val validVec = RegInit(VecInit(Seq.fill(LFSTSize)(VecInit(Seq.fill(LFSTWidth)(false.B))))) 347 val robIdxVec = Reg(Vec(LFSTSize, Vec(LFSTWidth, new RobPtr))) 348 val allocPtr = RegInit(VecInit(Seq.fill(LFSTSize)(0.U(log2Up(LFSTWidth).W)))) 349 val valid = Wire(Vec(LFSTSize, Bool())) 350 (0 until LFSTSize).map(i => { 351 valid(i) := validVec(i).asUInt.orR 352 }) 353 354 // read LFST in rename stage 355 for (i <- 0 until RenameWidth) { 356 io.dispatch.resp(i).valid := io.dispatch.req(i).valid 357 358 // If store-load pair is in the same dispatch bundle, loadWaitBit should also be set for load 359 val hitInDispatchBundleVec = if(i > 0){ 360 WireInit(VecInit((0 until i).map(j => 361 io.dispatch.req(j).valid && 362 io.dispatch.req(j).bits.isstore && 363 io.dispatch.req(j).bits.ssid === io.dispatch.req(i).bits.ssid 364 ))) 365 } else { 366 WireInit(VecInit(Seq(false.B))) // DontCare 367 } 368 val hitInDispatchBundle = hitInDispatchBundleVec.asUInt.orR 369 // Check if store set is valid in LFST 370 io.dispatch.resp(i).bits.shouldWait := ( 371 (valid(io.dispatch.req(i).bits.ssid) || hitInDispatchBundle) && 372 io.dispatch.req(i).valid && 373 (!io.dispatch.req(i).bits.isstore || io.csrCtrl.storeset_wait_store) 374 ) && !io.csrCtrl.lvpred_disable || io.csrCtrl.no_spec_load 375 io.dispatch.resp(i).bits.robIdx := robIdxVec(io.dispatch.req(i).bits.ssid)(allocPtr(io.dispatch.req(i).bits.ssid)-1.U) 376 if(i > 0){ 377 (0 until i).map(j => 378 when(hitInDispatchBundleVec(j)){ 379 io.dispatch.resp(i).bits.robIdx := io.dispatch.req(j).bits.robIdx 380 } 381 ) 382 } 383 } 384 385 // when store is issued, mark it as invalid 386 (0 until exuParameters.StuCnt).map(i => { 387 // TODO: opt timing 388 (0 until LFSTWidth).map(j => { 389 when(io.storeIssue(i).valid && io.storeIssue(i).bits.uop.cf.storeSetHit && io.storeIssue(i).bits.uop.robIdx.value === robIdxVec(io.storeIssue(i).bits.uop.cf.ssid)(j).value){ 390 validVec(io.storeIssue(i).bits.uop.cf.ssid)(j) := false.B 391 } 392 }) 393 }) 394 395 // when store is dispatched, mark it as valid 396 (0 until RenameWidth).map(i => { 397 when(io.dispatch.req(i).valid && io.dispatch.req(i).bits.isstore){ 398 val waddr = io.dispatch.req(i).bits.ssid 399 val wptr = allocPtr(waddr) 400 allocPtr(waddr) := allocPtr(waddr) + 1.U 401 validVec(waddr)(wptr) := true.B 402 robIdxVec(waddr)(wptr) := io.dispatch.req(i).bits.robIdx 403 } 404 }) 405 406 // when redirect, cancel store influenced 407 (0 until LFSTSize).map(i => { 408 (0 until LFSTWidth).map(j => { 409 when(validVec(i)(j) && robIdxVec(i)(j).needFlush(io.redirect)){ 410 validVec(i)(j) := false.B 411 } 412 }) 413 }) 414 415 // recover robIdx after squash 416 // behavior model, to be refactored later 417 when(RegNext(io.redirect.fire)) { 418 (0 until LFSTSize).map(i => { 419 (0 until LFSTWidth).map(j => { 420 val check_position = WireInit(allocPtr(i) + (j+1).U) 421 when(!validVec(i)(check_position)){ 422 allocPtr(i) := check_position 423 } 424 }) 425 }) 426 } 427} 428