1/*************************************************************************************** 2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences 3* Copyright (c) 2020-2021 Peng Cheng Laboratory 4* 5* XiangShan is licensed under Mulan PSL v2. 6* You can use this software according to the terms and conditions of the Mulan PSL v2. 7* You may obtain a copy of Mulan PSL v2 at: 8* http://license.coscl.org.cn/MulanPSL2 9* 10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 13* 14* See the Mulan PSL v2 for more details. 15***************************************************************************************/ 16 17package xiangshan.mem 18 19import chipsalliance.rocketchip.config.Parameters 20import chisel3._ 21import chisel3.util._ 22import utils._ 23import utility._ 24import xiangshan._ 25import xiangshan.backend.fu.fpu.FPU 26import xiangshan.backend.fu.FuConfig.LduCfg 27import xiangshan.backend.rob.RobLsqIO 28import xiangshan.cache._ 29import xiangshan.frontend.FtqPtr 30import xiangshan.ExceptionNO._ 31import xiangshan.cache.dcache.ReplayCarry 32import xiangshan.mem.mdp._ 33import xiangshan.backend.Bundles.{DynInst, MemExuOutput, MemMicroOpRbExt} 34import xiangshan.backend.rob.RobPtr 35 36class LqPtr(implicit p: Parameters) extends CircularQueuePtr[LqPtr]( 37 p => p(XSCoreParamsKey).VirtualLoadQueueSize 38){ 39} 40 41object LqPtr { 42 def apply(f: Bool, v: UInt)(implicit p: Parameters): LqPtr = { 43 val ptr = Wire(new LqPtr) 44 ptr.flag := f 45 ptr.value := v 46 ptr 47 } 48} 49 50trait HasLoadHelper { this: XSModule => 51 def rdataHelper(uop: DynInst, rdata: UInt): UInt = { 52 val fpWen = uop.fpWen 53 LookupTree(uop.fuOpType, List( 54 LSUOpType.lb -> SignExt(rdata(7, 0) , XLEN), 55 LSUOpType.lh -> SignExt(rdata(15, 0), XLEN), 56 /* 57 riscv-spec-20191213: 12.2 NaN Boxing of Narrower Values 58 Any operation that writes a narrower result to an f register must write 59 all 1s to the uppermost FLEN−n bits to yield a legal NaN-boxed value. 60 */ 61 LSUOpType.lw -> Mux(fpWen, FPU.box(rdata, FPU.S), SignExt(rdata(31, 0), XLEN)), 62 LSUOpType.ld -> Mux(fpWen, FPU.box(rdata, FPU.D), SignExt(rdata(63, 0), XLEN)), 63 LSUOpType.lbu -> ZeroExt(rdata(7, 0) , XLEN), 64 LSUOpType.lhu -> ZeroExt(rdata(15, 0), XLEN), 65 LSUOpType.lwu -> ZeroExt(rdata(31, 0), XLEN), 66 )) 67 } 68} 69 70class LqEnqIO(implicit p: Parameters) extends XSBundle { 71 private val LsExuCnt = backendParams.StaCnt + backendParams.LduCnt 72 val canAccept = Output(Bool()) 73 val sqCanAccept = Input(Bool()) 74 val needAlloc = Vec(LsExuCnt, Input(Bool())) 75 val req = Vec(LsExuCnt, Flipped(ValidIO(new DynInst))) 76 val resp = Vec(LsExuCnt, Output(new LqPtr)) 77} 78 79class LqTriggerIO(implicit p: Parameters) extends XSBundle { 80 val hitLoadAddrTriggerHitVec = Input(Vec(3, Bool())) 81 val lqLoadAddrTriggerHitVec = Output(Vec(3, Bool())) 82} 83 84class LqExceptionBuffer(implicit p: Parameters) extends XSModule with HasCircularQueuePtrHelper { 85 val io = IO(new Bundle() { 86 val redirect = Flipped(Valid(new Redirect)) 87 val req = Vec(LoadPipelineWidth, Flipped(Valid(new LqWriteBundle))) 88 val exceptionAddr = new ExceptionAddrIO 89 }) 90 91 val req_valid = RegInit(false.B) 92 val req = Reg(new LqWriteBundle) 93 94 // enqueue 95 // s1: 96 val s1_req = VecInit(io.req.map(_.bits)) 97 val s1_valid = VecInit(io.req.map(x => x.valid)) 98 99 // s2: delay 1 cycle 100 val s2_req = RegNext(s1_req) 101 val s2_valid = (0 until LoadPipelineWidth).map(i => 102 RegNext(s1_valid(i)) && 103 !s2_req(i).uop.robIdx.needFlush(RegNext(io.redirect)) && 104 !s2_req(i).uop.robIdx.needFlush(io.redirect) 105 ) 106 val s2_has_exception = s2_req.map(x => ExceptionNO.selectByFu(x.uop.exceptionVec, LduCfg).asUInt.orR) 107 108 val s2_enqueue = Wire(Vec(LoadPipelineWidth, Bool())) 109 for (w <- 0 until LoadPipelineWidth) { 110 s2_enqueue(w) := s2_valid(w) && s2_has_exception(w) 111 } 112 113 when (req.uop.robIdx.needFlush(io.redirect)) { 114 req_valid := false.B 115 } .elsewhen (s2_enqueue.asUInt.orR) { 116 req_valid := req_valid || true.B 117 } 118 119 def selectOldest[T <: LqWriteBundle](valid: Seq[Bool], bits: Seq[T]): (Seq[Bool], Seq[T]) = { 120 assert(valid.length == bits.length) 121 if (valid.length == 0 || valid.length == 1) { 122 (valid, bits) 123 } else if (valid.length == 2) { 124 val res = Seq.fill(2)(Wire(ValidIO(chiselTypeOf(bits(0))))) 125 for (i <- res.indices) { 126 res(i).valid := valid(i) 127 res(i).bits := bits(i) 128 } 129 val oldest = Mux(valid(0) && valid(1), Mux(isAfter(bits(0).uop.robIdx, bits(1).uop.robIdx), res(1), res(0)), Mux(valid(0) && !valid(1), res(0), res(1))) 130 (Seq(oldest.valid), Seq(oldest.bits)) 131 } else { 132 val left = selectOldest(valid.take(valid.length / 2), bits.take(bits.length / 2)) 133 val right = selectOldest(valid.takeRight(valid.length - (valid.length / 2)), bits.takeRight(bits.length - (bits.length / 2))) 134 selectOldest(left._1 ++ right._1, left._2 ++ right._2) 135 } 136 } 137 138 val reqSel = selectOldest(s2_enqueue, s2_req) 139 140 when (req_valid) { 141 req := Mux(reqSel._1(0) && isAfter(req.uop.robIdx, reqSel._2(0).uop.robIdx), reqSel._2(0), req) 142 } .elsewhen (s2_enqueue.asUInt.orR) { 143 req := reqSel._2(0) 144 } 145 146 io.exceptionAddr.vaddr := req.vaddr 147 XSPerfAccumulate("exception", !RegNext(req_valid) && req_valid) 148 149 // end 150} 151 152class LoadQueue(implicit p: Parameters) extends XSModule 153 with HasDCacheParameters 154 with HasCircularQueuePtrHelper 155 with HasLoadHelper 156 with HasPerfEvents 157{ 158 val io = IO(new Bundle() { 159 val redirect = Flipped(Valid(new Redirect)) 160 val enq = new LqEnqIO 161 val ldu = new Bundle() { 162 val storeLoadViolationQuery = Vec(LoadPipelineWidth, Flipped(new LoadViolationQueryIO)) // from load_s2 163 val loadLoadViolationQuery = Vec(LoadPipelineWidth, Flipped(new LoadViolationQueryIO)) // from load_s2 164 val loadIn = Vec(StorePipelineWidth, Flipped(Decoupled(new LqWriteBundle))) // from load_s3 165 } 166 val sta = new Bundle() { 167 val storeAddrIn = Vec(StorePipelineWidth, Flipped(Valid(new LsPipelineBundle))) // from store_s1 168 } 169 val std = new Bundle() { 170 val storeDataIn = Vec(StorePipelineWidth, Flipped(Valid(new MemExuOutput))) // from store_s0, store data, send to sq from rs 171 } 172 val sq = new Bundle() { 173 val stAddrReadySqPtr = Input(new SqPtr) 174 val stAddrReadyVec = Input(Vec(StoreQueueSize, Bool())) 175 val stDataReadySqPtr = Input(new SqPtr) 176 val stDataReadyVec = Input(Vec(StoreQueueSize, Bool())) 177 val stIssuePtr = Input(new SqPtr) 178 val sqEmpty = Input(Bool()) 179 } 180 val loadOut = Vec(LoadPipelineWidth, DecoupledIO(new MemExuOutput)) 181 val ldRawDataOut = Vec(LoadPipelineWidth, Output(new LoadDataFromLQBundle)) 182 val replay = Vec(LoadPipelineWidth, Decoupled(new LsPipelineBundle)) 183 val refill = Flipped(ValidIO(new Refill)) 184 val release = Flipped(Valid(new Release)) 185 val rollback = Output(Valid(new Redirect)) 186 val rob = Flipped(new RobLsqIO) 187 val uncache = new UncacheWordIO 188 val trigger = Vec(LoadPipelineWidth, new LqTriggerIO) 189 val exceptionAddr = new ExceptionAddrIO 190 val lqFull = Output(Bool()) 191 val lqDeq = Output(UInt(log2Up(CommitWidth + 1).W)) 192 val lqCancelCnt = Output(UInt(log2Up(VirtualLoadQueueSize+1).W)) 193 val lqReplayFull = Output(Bool()) 194 val tlbReplayDelayCycleCtrl = Vec(4, Input(UInt(ReSelectLen.W))) 195 }) 196 197 val loadQueueRAR = Module(new LoadQueueRAR) // read-after-read violation 198 val loadQueueRAW = Module(new LoadQueueRAW) // read-after-write violation 199 val loadQueueReplay = Module(new LoadQueueReplay) // enqueue if need replay 200 val virtualLoadQueue = Module(new VirtualLoadQueue) // control state 201 val exceptionBuffer = Module(new LqExceptionBuffer) // exception buffer 202 val uncacheBuffer = Module(new UncacheBuffer) // uncache buffer 203 204 /** 205 * LoadQueueRAR 206 */ 207 loadQueueRAR.io.redirect <> io.redirect 208 loadQueueRAR.io.release <> io.release 209 loadQueueRAR.io.ldWbPtr <> virtualLoadQueue.io.ldWbPtr 210 for (w <- 0 until LoadPipelineWidth) { 211 loadQueueRAR.io.query(w).req <> io.ldu.loadLoadViolationQuery(w).req // from load_s1 212 loadQueueRAR.io.query(w).resp <> io.ldu.loadLoadViolationQuery(w).resp // to load_s2 213 loadQueueRAR.io.query(w).preReq := io.ldu.loadLoadViolationQuery(w).preReq // from load_s1 214 loadQueueRAR.io.query(w).release := io.ldu.loadLoadViolationQuery(w).release // from load_s3 215 } 216 217 /** 218 * LoadQueueRAW 219 */ 220 loadQueueRAW.io.redirect <> io.redirect 221 loadQueueRAW.io.storeIn <> io.sta.storeAddrIn 222 loadQueueRAW.io.stAddrReadySqPtr <> io.sq.stAddrReadySqPtr 223 loadQueueRAW.io.stIssuePtr <> io.sq.stIssuePtr 224 for (w <- 0 until LoadPipelineWidth) { 225 loadQueueRAW.io.query(w).req <> io.ldu.storeLoadViolationQuery(w).req // from load_s1 226 loadQueueRAW.io.query(w).resp <> io.ldu.storeLoadViolationQuery(w).resp // to load_s2 227 loadQueueRAW.io.query(w).preReq := io.ldu.storeLoadViolationQuery(w).preReq // from load_s1 228 loadQueueRAW.io.query(w).release := io.ldu.storeLoadViolationQuery(w).release // from load_s3 229 } 230 231 /** 232 * VirtualLoadQueue 233 */ 234 virtualLoadQueue.io.redirect <> io.redirect 235 virtualLoadQueue.io.enq <> io.enq 236 virtualLoadQueue.io.loadIn <> io.ldu.loadIn // from load_s3 237 virtualLoadQueue.io.lqFull <> io.lqFull 238 virtualLoadQueue.io.lqDeq <> io.lqDeq 239 virtualLoadQueue.io.lqCancelCnt <> io.lqCancelCnt 240 241 /** 242 * Load queue exception buffer 243 */ 244 exceptionBuffer.io.redirect <> io.redirect 245 for ((buff, w) <- exceptionBuffer.io.req.zipWithIndex) { 246 buff.valid := io.ldu.loadIn(w).valid // from load_s3 247 buff.bits := io.ldu.loadIn(w).bits 248 } 249 io.exceptionAddr <> exceptionBuffer.io.exceptionAddr 250 251 /** 252 * Load uncache buffer 253 */ 254 uncacheBuffer.io.redirect <> io.redirect 255 uncacheBuffer.io.loadOut <> io.loadOut 256 uncacheBuffer.io.loadRawDataOut <> io.ldRawDataOut 257 uncacheBuffer.io.rob <> io.rob 258 uncacheBuffer.io.uncache <> io.uncache 259 uncacheBuffer.io.trigger <> io.trigger 260 for ((buff, w) <- uncacheBuffer.io.req.zipWithIndex) { 261 buff.valid := io.ldu.loadIn(w).valid // from load_s3 262 buff.bits := io.ldu.loadIn(w).bits // from load_s3 263 } 264 265 // rollback 266 def selectOldest[T <: Redirect](valid: Seq[Bool], bits: Seq[T]): (Seq[Bool], Seq[T]) = { 267 assert(valid.length == bits.length) 268 if (valid.length == 0 || valid.length == 1) { 269 (valid, bits) 270 } else if (valid.length == 2) { 271 val res = Seq.fill(2)(Wire(ValidIO(chiselTypeOf(bits(0))))) 272 for (i <- res.indices) { 273 res(i).valid := valid(i) 274 res(i).bits := bits(i) 275 } 276 val oldest = Mux(valid(0) && valid(1), Mux(isAfter(bits(0).robIdx, bits(1).robIdx), res(1), res(0)), Mux(valid(0) && !valid(1), res(0), res(1))) 277 (Seq(oldest.valid), Seq(oldest.bits)) 278 } else { 279 val left = selectOldest(valid.take(valid.length / 2), bits.take(bits.length / 2)) 280 val right = selectOldest(valid.takeRight(valid.length - (valid.length / 2)), bits.takeRight(bits.length - (bits.length / 2))) 281 selectOldest(left._1 ++ right._1, left._2 ++ right._2) 282 } 283 } 284 285 val (rollbackSelV, rollbackSelBits) = selectOldest( 286 Seq(loadQueueRAW.io.rollback.valid, uncacheBuffer.io.rollback.valid), 287 Seq(loadQueueRAW.io.rollback.bits, uncacheBuffer.io.rollback.bits) 288 ) 289 io.rollback.valid := rollbackSelV.head 290 io.rollback.bits := rollbackSelBits.head 291 292 /* <------- DANGEROUS: Don't change sequence here ! -------> */ 293 294 /** 295 * LoadQueueReplay 296 */ 297 loadQueueReplay.io.redirect <> io.redirect 298 loadQueueReplay.io.enq <> io.ldu.loadIn // from load_s3 299 loadQueueReplay.io.storeAddrIn <> io.sta.storeAddrIn // from store_s1 300 loadQueueReplay.io.storeDataIn <> io.std.storeDataIn // from store_s0 301 loadQueueReplay.io.replay <> io.replay 302 loadQueueReplay.io.refill <> io.refill 303 loadQueueReplay.io.stAddrReadySqPtr <> io.sq.stAddrReadySqPtr 304 loadQueueReplay.io.stAddrReadyVec <> io.sq.stAddrReadyVec 305 loadQueueReplay.io.stDataReadySqPtr <> io.sq.stDataReadySqPtr 306 loadQueueReplay.io.stDataReadyVec <> io.sq.stDataReadyVec 307 loadQueueReplay.io.sqEmpty <> io.sq.sqEmpty 308 loadQueueReplay.io.lqFull <> io.lqReplayFull 309 loadQueueReplay.io.tlbReplayDelayCycleCtrl <> io.tlbReplayDelayCycleCtrl 310 loadQueueReplay.io.ldWbPtr := virtualLoadQueue.io.ldWbPtr 311 loadQueueReplay.io.rarFull := loadQueueRAR.io.lqFull 312 loadQueueReplay.io.rawFull := loadQueueRAW.io.lqFull 313 314 val full_mask = Cat(loadQueueRAR.io.lqFull, loadQueueRAW.io.lqFull, loadQueueReplay.io.lqFull) 315 XSPerfAccumulate("full_mask_000", full_mask === 0.U) 316 XSPerfAccumulate("full_mask_001", full_mask === 1.U) 317 XSPerfAccumulate("full_mask_010", full_mask === 2.U) 318 XSPerfAccumulate("full_mask_011", full_mask === 3.U) 319 XSPerfAccumulate("full_mask_100", full_mask === 4.U) 320 XSPerfAccumulate("full_mask_101", full_mask === 5.U) 321 XSPerfAccumulate("full_mask_110", full_mask === 6.U) 322 XSPerfAccumulate("full_mask_111", full_mask === 7.U) 323 XSPerfAccumulate("rollback", io.rollback.valid) 324 325 // perf cnt 326 val perfEvents = Seq(virtualLoadQueue, loadQueueRAR, loadQueueRAW, loadQueueReplay).flatMap(_.getPerfEvents) ++ 327 Seq( 328 ("full_mask_000", full_mask === 0.U), 329 ("full_mask_001", full_mask === 1.U), 330 ("full_mask_010", full_mask === 2.U), 331 ("full_mask_011", full_mask === 3.U), 332 ("full_mask_100", full_mask === 4.U), 333 ("full_mask_101", full_mask === 5.U), 334 ("full_mask_110", full_mask === 6.U), 335 ("full_mask_111", full_mask === 7.U), 336 ("rollback", io.rollback.valid) 337 ) 338 generatePerfEvent() 339 // end 340}