1/*************************************************************************************** 2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences 3* Copyright (c) 2020-2021 Peng Cheng Laboratory 4* 5* XiangShan is licensed under Mulan PSL v2. 6* You can use this software according to the terms and conditions of the Mulan PSL v2. 7* You may obtain a copy of Mulan PSL v2 at: 8* http://license.coscl.org.cn/MulanPSL2 9* 10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 13* 14* See the Mulan PSL v2 for more details. 15***************************************************************************************/ 16 17package xiangshan.mem 18 19import org.chipsalliance.cde.config._ 20import chisel3._ 21import chisel3.util._ 22import utils._ 23import utility._ 24import xiangshan._ 25import xiangshan.frontend.FtqPtr 26import xiangshan.backend.rob.RobPtr 27import xiangshan.backend.Bundles.DynInst 28import xiangshan.mem.mdp._ 29import xiangshan.mem.Bundles._ 30import xiangshan.cache._ 31 32class LoadQueueRAW(implicit p: Parameters) extends XSModule 33 with HasDCacheParameters 34 with HasCircularQueuePtrHelper 35 with HasLoadHelper 36 with HasPerfEvents 37{ 38 val io = IO(new Bundle() { 39 // control 40 val redirect = Flipped(ValidIO(new Redirect)) 41 42 // violation query 43 val query = Vec(LoadPipelineWidth, Flipped(new LoadNukeQueryIO)) 44 45 // from store unit s1 46 val storeIn = Vec(StorePipelineWidth, Flipped(Valid(new LsPipelineBundle))) 47 48 // global rollback flush 49 val rollback = Vec(StorePipelineWidth,Output(Valid(new Redirect))) 50 51 // to LoadQueueReplay 52 val stAddrReadySqPtr = Input(new SqPtr) 53 val stIssuePtr = Input(new SqPtr) 54 val lqFull = Output(Bool()) 55 }) 56 57 private def PartialPAddrWidth: Int = 24 58 private def paddrOffset: Int = DCacheVWordOffset 59 private def genPartialPAddr(paddr: UInt) = { 60 paddr(DCacheVWordOffset + PartialPAddrWidth - 1, paddrOffset) 61 } 62 63 println("LoadQueueRAW: size " + LoadQueueRAWSize) 64 // LoadQueueRAW field 65 // +-------+--------+-------+-------+-----------+ 66 // | Valid | uop |PAddr | Mask | Datavalid | 67 // +-------+--------+-------+-------+-----------+ 68 // 69 // Field descriptions: 70 // Allocated : entry has been allocated already 71 // MicroOp : inst's microOp 72 // PAddr : physical address. 73 // Mask : data mask 74 // Datavalid : data valid 75 // 76 val allocated = RegInit(VecInit(List.fill(LoadQueueRAWSize)(false.B))) // The control signals need to explicitly indicate the initial value 77 val uop = Reg(Vec(LoadQueueRAWSize, new DynInst)) 78 val paddrModule = Module(new LqPAddrModule( 79 gen = UInt(PartialPAddrWidth.W), 80 numEntries = LoadQueueRAWSize, 81 numRead = LoadPipelineWidth, 82 numWrite = LoadPipelineWidth, 83 numWBank = LoadQueueNWriteBanks, 84 numWDelay = 2, 85 numCamPort = StorePipelineWidth, 86 enableCacheLineCheck = true, 87 paddrOffset = paddrOffset 88 )) 89 paddrModule.io := DontCare 90 val maskModule = Module(new LqMaskModule( 91 gen = UInt((VLEN/8).W), 92 numEntries = LoadQueueRAWSize, 93 numRead = LoadPipelineWidth, 94 numWrite = LoadPipelineWidth, 95 numWBank = LoadQueueNWriteBanks, 96 numWDelay = 2, 97 numCamPort = StorePipelineWidth 98 )) 99 maskModule.io := DontCare 100 val datavalid = RegInit(VecInit(List.fill(LoadQueueRAWSize)(false.B))) 101 102 // freeliset: store valid entries index. 103 // +---+---+--------------+-----+-----+ 104 // | 0 | 1 | ...... | n-2 | n-1 | 105 // +---+---+--------------+-----+-----+ 106 val freeList = Module(new FreeList( 107 size = LoadQueueRAWSize, 108 allocWidth = LoadPipelineWidth, 109 freeWidth = 4, 110 enablePreAlloc = true, 111 moduleName = "LoadQueueRAW freelist" 112 )) 113 freeList.io := DontCare 114 115 // LoadQueueRAW enqueue 116 val canEnqueue = io.query.map(_.req.valid) 117 val cancelEnqueue = io.query.map(_.req.bits.uop.robIdx.needFlush(io.redirect)) 118 val allAddrCheck = io.stIssuePtr === io.stAddrReadySqPtr 119 val hasAddrInvalidStore = io.query.map(_.req.bits.uop.sqIdx).map(sqIdx => { 120 Mux(!allAddrCheck, isBefore(io.stAddrReadySqPtr, sqIdx), false.B) 121 }) 122 val needEnqueue = canEnqueue.zip(hasAddrInvalidStore).zip(cancelEnqueue).map { case ((v, r), c) => v && r && !c } 123 124 // Allocate logic 125 val acceptedVec = Wire(Vec(LoadPipelineWidth, Bool())) 126 val enqIndexVec = Wire(Vec(LoadPipelineWidth, UInt(log2Up(LoadQueueRAWSize).W))) 127 128 // Enqueue 129 for ((enq, w) <- io.query.map(_.req).zipWithIndex) { 130 acceptedVec(w) := false.B 131 paddrModule.io.wen(w) := false.B 132 maskModule.io.wen(w) := false.B 133 freeList.io.doAllocate(w) := false.B 134 135 freeList.io.allocateReq(w) := true.B 136 137 // Allocate ready 138 val offset = PopCount(needEnqueue.take(w)) 139 val canAccept = freeList.io.canAllocate(offset) 140 val enqIndex = freeList.io.allocateSlot(offset) 141 enq.ready := Mux(needEnqueue(w), canAccept, true.B) 142 143 enqIndexVec(w) := enqIndex 144 when (needEnqueue(w) && enq.ready) { 145 acceptedVec(w) := true.B 146 147 freeList.io.doAllocate(w) := true.B 148 149 // Allocate new entry 150 allocated(enqIndex) := true.B 151 152 // Write paddr 153 paddrModule.io.wen(w) := true.B 154 paddrModule.io.waddr(w) := enqIndex 155 paddrModule.io.wdata(w) := genPartialPAddr(enq.bits.paddr) 156 157 // Write mask 158 maskModule.io.wen(w) := true.B 159 maskModule.io.waddr(w) := enqIndex 160 maskModule.io.wdata(w) := enq.bits.mask 161 162 // Fill info 163 uop(enqIndex) := enq.bits.uop 164 datavalid(enqIndex) := enq.bits.data_valid 165 } 166 val debug_robIdx = enq.bits.uop.robIdx.asUInt 167 XSError(needEnqueue(w) && enq.ready && allocated(enqIndex), p"LoadQueueRAW: You can not write an valid entry! check: ldu $w, robIdx $debug_robIdx") 168 } 169 170 for ((query, w) <- io.query.map(_.resp).zipWithIndex) { 171 query.valid := RegNext(io.query(w).req.valid) 172 query.bits.rep_frm_fetch := RegNext(false.B) 173 } 174 175 // LoadQueueRAW deallocate 176 val freeMaskVec = Wire(Vec(LoadQueueRAWSize, Bool())) 177 178 // init 179 freeMaskVec.map(e => e := false.B) 180 181 // when the stores that "older than" current load address were ready. 182 // current load will be released. 183 for (i <- 0 until LoadQueueRAWSize) { 184 val deqNotBlock = Mux(!allAddrCheck, !isBefore(io.stAddrReadySqPtr, uop(i).sqIdx), true.B) 185 val needCancel = uop(i).robIdx.needFlush(io.redirect) 186 187 when (allocated(i) && (deqNotBlock || needCancel)) { 188 allocated(i) := false.B 189 freeMaskVec(i) := true.B 190 } 191 } 192 193 // if need replay deallocate entry 194 val lastCanAccept = GatedValidRegNext(acceptedVec) 195 val lastAllocIndex = GatedRegNext(enqIndexVec) 196 197 for ((revoke, w) <- io.query.map(_.revoke).zipWithIndex) { 198 val revokeValid = revoke && lastCanAccept(w) 199 val revokeIndex = lastAllocIndex(w) 200 201 when (allocated(revokeIndex) && revokeValid) { 202 allocated(revokeIndex) := false.B 203 freeMaskVec(revokeIndex) := true.B 204 } 205 } 206 freeList.io.free := freeMaskVec.asUInt 207 208 io.lqFull := freeList.io.empty 209 210 /** 211 * Store-Load Memory violation detection 212 * Scheme 1(Current scheme): flush the pipeline then re-fetch from the load instruction (like old load queue). 213 * Scheme 2 : re-fetch instructions from the first instruction after the store instruction. 214 * 215 * When store writes back, it searches LoadQueue for younger load instructions 216 * with the same load physical address. They loaded wrong data and need re-execution. 217 * 218 * Cycle 0: Store Writeback 219 * Generate match vector for store address with rangeMask(stPtr, enqPtr). 220 * Cycle 1: Select oldest load from select group. 221 * Cycle x: Redirect Fire 222 * Choose the oldest load from LoadPipelineWidth oldest loads. 223 * Prepare redirect request according to the detected violation. 224 * Fire redirect request (if valid) 225 */ 226 // SelectGroup 0 SelectGroup 1 SelectGroup y 227 // stage 0: lq lq lq ...... lq lq lq ....... lq lq lq 228 // | | | | | | | | | 229 // stage 1: lq lq lq ...... lq lq lq ....... lq lq lq 230 // \ | / ...... \ | / ....... \ | / 231 // stage 2: lq lq lq 232 // \ | / ....... \ | / ........ \ | / 233 // stage 3: lq lq lq 234 // ... 235 // ... 236 // | 237 // stage x: lq 238 // | 239 // rollback req 240 241 // select logic 242 val SelectGroupSize = RollbackGroupSize 243 val lgSelectGroupSize = log2Ceil(SelectGroupSize) 244 val TotalSelectCycles = scala.math.ceil(log2Ceil(LoadQueueRAWSize).toFloat / lgSelectGroupSize).toInt + 1 245 246 def selectPartialOldest[T <: XSBundleWithMicroOp](valid: Seq[Bool], bits: Seq[T]): (Seq[Bool], Seq[T]) = { 247 assert(valid.length == bits.length) 248 if (valid.length == 0 || valid.length == 1) { 249 (valid, bits) 250 } else if (valid.length == 2) { 251 val res = Seq.fill(2)(Wire(ValidIO(chiselTypeOf(bits(0))))) 252 for (i <- res.indices) { 253 res(i).valid := valid(i) 254 res(i).bits := bits(i) 255 } 256 val oldest = Mux(valid(0) && valid(1), Mux(isAfter(bits(0).uop.robIdx, bits(1).uop.robIdx), res(1), res(0)), Mux(valid(0) && !valid(1), res(0), res(1))) 257 (Seq(oldest.valid), Seq(oldest.bits)) 258 } else { 259 val left = selectPartialOldest(valid.take(valid.length / 2), bits.take(bits.length / 2)) 260 val right = selectPartialOldest(valid.takeRight(valid.length - (valid.length / 2)), bits.takeRight(bits.length - (bits.length / 2))) 261 selectPartialOldest(left._1 ++ right._1, left._2 ++ right._2) 262 } 263 } 264 265 def selectOldest[T <: XSBundleWithMicroOp](valid: Seq[Bool], bits: Seq[T]): (Seq[Bool], Seq[T]) = { 266 assert(valid.length == bits.length) 267 val numSelectGroups = scala.math.ceil(valid.length.toFloat / SelectGroupSize).toInt 268 269 // group info 270 val selectValidGroups = valid.grouped(SelectGroupSize).toList 271 val selectBitsGroups = bits.grouped(SelectGroupSize).toList 272 // select logic 273 if (valid.length <= SelectGroupSize) { 274 val (selValid, selBits) = selectPartialOldest(valid, bits) 275 val selValidNext = GatedValidRegNext(selValid(0)) 276 val selBitsNext = RegEnable(selBits(0), selValid(0)) 277 (Seq(selValidNext && !selBitsNext.uop.robIdx.needFlush(RegNext(io.redirect))), Seq(selBitsNext)) 278 } else { 279 val select = (0 until numSelectGroups).map(g => { 280 val (selValid, selBits) = selectPartialOldest(selectValidGroups(g), selectBitsGroups(g)) 281 val selValidNext = RegNext(selValid(0)) 282 val selBitsNext = RegEnable(selBits(0), selValid(0)) 283 (selValidNext && !selBitsNext.uop.robIdx.needFlush(io.redirect) && !selBitsNext.uop.robIdx.needFlush(RegNext(io.redirect)), selBitsNext) 284 }) 285 selectOldest(select.map(_._1), select.map(_._2)) 286 } 287 } 288 289 val storeIn = io.storeIn 290 291 def detectRollback(i: Int) = { 292 paddrModule.io.violationMdata(i) := genPartialPAddr(RegEnable(storeIn(i).bits.paddr, storeIn(i).valid)) 293 paddrModule.io.violationCheckLine.get(i) := storeIn(i).bits.wlineflag 294 maskModule.io.violationMdata(i) := RegEnable(storeIn(i).bits.mask, storeIn(i).valid) 295 296 val addrMaskMatch = paddrModule.io.violationMmask(i).asUInt & maskModule.io.violationMmask(i).asUInt 297 val entryNeedCheck = GatedValidRegNext(VecInit((0 until LoadQueueRAWSize).map(j => { 298 allocated(j) && storeIn(i).valid && isAfter(uop(j).robIdx, storeIn(i).bits.uop.robIdx) && datavalid(j) && !uop(j).robIdx.needFlush(io.redirect) 299 }))) 300 val lqViolationSelVec = VecInit((0 until LoadQueueRAWSize).map(j => { 301 addrMaskMatch(j) && entryNeedCheck(j) 302 })) 303 304 val lqViolationSelUopExts = uop.map(uop => { 305 val wrapper = Wire(new XSBundleWithMicroOp) 306 wrapper.uop := uop 307 wrapper 308 }) 309 310 // select logic 311 val lqSelect: (Seq[Bool], Seq[XSBundleWithMicroOp]) = selectOldest(lqViolationSelVec, lqViolationSelUopExts) 312 313 // select one inst 314 val lqViolation = lqSelect._1(0) 315 val lqViolationUop = lqSelect._2(0).uop 316 317 XSDebug( 318 lqViolation, 319 "need rollback (ld wb before store) pc %x robidx %d target %x\n", 320 storeIn(i).bits.uop.pc, storeIn(i).bits.uop.robIdx.asUInt, lqViolationUop.robIdx.asUInt 321 ) 322 323 (lqViolation, lqViolationUop) 324 } 325 326 // select rollback (part1) and generate rollback request 327 // rollback check 328 // Lq rollback seq check is done in s3 (next stage), as getting rollbackLq MicroOp is slow 329 val rollbackLqWb = Wire(Vec(StorePipelineWidth, Valid(new DynInst))) 330 val stFtqIdx = Wire(Vec(StorePipelineWidth, new FtqPtr)) 331 val stFtqOffset = Wire(Vec(StorePipelineWidth, UInt(log2Up(PredictWidth).W))) 332 for (w <- 0 until StorePipelineWidth) { 333 val detectedRollback = detectRollback(w) 334 rollbackLqWb(w).valid := detectedRollback._1 && DelayN(storeIn(w).valid && !storeIn(w).bits.miss, TotalSelectCycles) 335 rollbackLqWb(w).bits := detectedRollback._2 336 stFtqIdx(w) := DelayNWithValid(storeIn(w).bits.uop.ftqPtr, storeIn(w).valid, TotalSelectCycles)._2 337 stFtqOffset(w) := DelayNWithValid(storeIn(w).bits.uop.ftqOffset, storeIn(w).valid, TotalSelectCycles)._2 338 } 339 340 // select rollback (part2), generate rollback request, then fire rollback request 341 // Note that we use robIdx - 1.U to flush the load instruction itself. 342 // Thus, here if last cycle's robIdx equals to this cycle's robIdx, it still triggers the redirect. 343 344 // select uop in parallel 345 346 val allRedirect = (0 until StorePipelineWidth).map(i => { 347 val redirect = Wire(Valid(new Redirect)) 348 redirect.valid := rollbackLqWb(i).valid 349 redirect.bits := DontCare 350 redirect.bits.isRVC := rollbackLqWb(i).bits.preDecodeInfo.isRVC 351 redirect.bits.robIdx := rollbackLqWb(i).bits.robIdx 352 redirect.bits.ftqIdx := rollbackLqWb(i).bits.ftqPtr 353 redirect.bits.ftqOffset := rollbackLqWb(i).bits.ftqOffset 354 redirect.bits.stFtqIdx := stFtqIdx(i) 355 redirect.bits.stFtqOffset := stFtqOffset(i) 356 redirect.bits.level := RedirectLevel.flush 357 redirect.bits.cfiUpdate.target := rollbackLqWb(i).bits.pc 358 redirect.bits.debug_runahead_checkpoint_id := rollbackLqWb(i).bits.debugInfo.runahead_checkpoint_id 359 redirect 360 }) 361 io.rollback := allRedirect 362 363 // perf cnt 364 val canEnqCount = PopCount(io.query.map(_.req.fire)) 365 val validCount = freeList.io.validCount 366 val allowEnqueue = validCount <= (LoadQueueRAWSize - LoadPipelineWidth).U 367 val rollbaclValid = io.rollback.map(_.valid).reduce(_ || _).asUInt 368 369 QueuePerf(LoadQueueRAWSize, validCount, !allowEnqueue) 370 XSPerfAccumulate("enqs", canEnqCount) 371 XSPerfAccumulate("stld_rollback", rollbaclValid) 372 val perfEvents: Seq[(String, UInt)] = Seq( 373 ("enq ", canEnqCount), 374 ("stld_rollback", rollbaclValid), 375 ) 376 generatePerfEvent() 377 // end 378} 379