1/*************************************************************************************** 2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences 3* Copyright (c) 2020-2021 Peng Cheng Laboratory 4* 5* XiangShan is licensed under Mulan PSL v2. 6* You can use this software according to the terms and conditions of the Mulan PSL v2. 7* You may obtain a copy of Mulan PSL v2 at: 8* http://license.coscl.org.cn/MulanPSL2 9* 10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 13* 14* See the Mulan PSL v2 for more details. 15***************************************************************************************/ 16 17package xiangshan.mem 18 19import chisel3._ 20import chisel3.util._ 21import org.chipsalliance.cde.config._ 22import xiangshan._ 23import xiangshan.backend.rob.RobPtr 24import xiangshan.cache._ 25import xiangshan.frontend.FtqPtr 26import xiangshan.mem.mdp._ 27import utils._ 28import utility._ 29import xiangshan.backend.Bundles.DynInst 30 31class LoadQueueRAW(implicit p: Parameters) extends XSModule 32 with HasDCacheParameters 33 with HasCircularQueuePtrHelper 34 with HasLoadHelper 35 with HasPerfEvents 36{ 37 val io = IO(new Bundle() { 38 // control 39 val redirect = Flipped(ValidIO(new Redirect)) 40 41 // violation query 42 val query = Vec(LoadPipelineWidth, Flipped(new LoadNukeQueryIO)) 43 44 // from store unit s1 45 val storeIn = Vec(StorePipelineWidth, Flipped(Valid(new LsPipelineBundle))) 46 47 // global rollback flush 48 val rollback = Output(Valid(new Redirect)) 49 50 // to LoadQueueReplay 51 val stAddrReadySqPtr = Input(new SqPtr) 52 val stIssuePtr = Input(new SqPtr) 53 val lqFull = Output(Bool()) 54 }) 55 56 println("LoadQueueRAW: size " + LoadQueueRAWSize) 57 // LoadQueueRAW field 58 // +-------+--------+-------+-------+-----------+ 59 // | Valid | uop |PAddr | Mask | Datavalid | 60 // +-------+--------+-------+-------+-----------+ 61 // 62 // Field descriptions: 63 // Allocated : entry has been allocated already 64 // MicroOp : inst's microOp 65 // PAddr : physical address. 66 // Mask : data mask 67 // Datavalid : data valid 68 // 69 val allocated = RegInit(VecInit(List.fill(LoadQueueRAWSize)(false.B))) // The control signals need to explicitly indicate the initial value 70 val uop = Reg(Vec(LoadQueueRAWSize, new DynInst)) 71 val paddrModule = Module(new LqPAddrModule( 72 gen = UInt(PAddrBits.W), 73 numEntries = LoadQueueRAWSize, 74 numRead = LoadPipelineWidth, 75 numWrite = LoadPipelineWidth, 76 numWBank = LoadQueueNWriteBanks, 77 numWDelay = 2, 78 numCamPort = StorePipelineWidth 79 )) 80 paddrModule.io := DontCare 81 val maskModule = Module(new LqMaskModule( 82 gen = UInt((VLEN/8).W), 83 numEntries = LoadQueueRAWSize, 84 numRead = LoadPipelineWidth, 85 numWrite = LoadPipelineWidth, 86 numWBank = LoadQueueNWriteBanks, 87 numWDelay = 2, 88 numCamPort = StorePipelineWidth 89 )) 90 maskModule.io := DontCare 91 val datavalid = RegInit(VecInit(List.fill(LoadQueueRAWSize)(false.B))) 92 93 // freeliset: store valid entries index. 94 // +---+---+--------------+-----+-----+ 95 // | 0 | 1 | ...... | n-2 | n-1 | 96 // +---+---+--------------+-----+-----+ 97 val freeList = Module(new FreeList( 98 size = LoadQueueRAWSize, 99 allocWidth = LoadPipelineWidth, 100 freeWidth = 4, 101 enablePreAlloc = true, 102 moduleName = "LoadQueueRAW freelist" 103 )) 104 freeList.io := DontCare 105 106 // LoadQueueRAW enqueue 107 val canEnqueue = io.query.map(_.req.valid) 108 val cancelEnqueue = io.query.map(_.req.bits.uop.robIdx.needFlush(io.redirect)) 109 val allAddrCheck = io.stIssuePtr === io.stAddrReadySqPtr 110 val hasAddrInvalidStore = io.query.map(_.req.bits.uop.sqIdx).map(sqIdx => { 111 Mux(!allAddrCheck, isBefore(io.stAddrReadySqPtr, sqIdx), false.B) 112 }) 113 val needEnqueue = canEnqueue.zip(hasAddrInvalidStore).zip(cancelEnqueue).map { case ((v, r), c) => v && r && !c } 114 val bypassPAddr = Reg(Vec(LoadPipelineWidth, UInt(PAddrBits.W))) 115 val bypassMask = Reg(Vec(LoadPipelineWidth, UInt((VLEN/8).W))) 116 117 // Allocate logic 118 val acceptedVec = Wire(Vec(LoadPipelineWidth, Bool())) 119 val enqIndexVec = Wire(Vec(LoadPipelineWidth, UInt())) 120 121 // Enqueue 122 for ((enq, w) <- io.query.map(_.req).zipWithIndex) { 123 acceptedVec(w) := false.B 124 paddrModule.io.wen(w) := false.B 125 maskModule.io.wen(w) := false.B 126 freeList.io.doAllocate(w) := false.B 127 128 freeList.io.allocateReq(w) := true.B 129 130 // Allocate ready 131 val offset = PopCount(needEnqueue.take(w)) 132 val canAccept = freeList.io.canAllocate(offset) 133 val enqIndex = freeList.io.allocateSlot(offset) 134 enq.ready := Mux(needEnqueue(w), canAccept, true.B) 135 136 enqIndexVec(w) := enqIndex 137 when (needEnqueue(w) && enq.ready) { 138 acceptedVec(w) := true.B 139 140 val debug_robIdx = enq.bits.uop.robIdx.asUInt 141 XSError(allocated(enqIndex), p"LoadQueueRAW: You can not write an valid entry! check: ldu $w, robIdx $debug_robIdx") 142 143 freeList.io.doAllocate(w) := true.B 144 145 // Allocate new entry 146 allocated(enqIndex) := true.B 147 148 // Write paddr 149 paddrModule.io.wen(w) := true.B 150 paddrModule.io.waddr(w) := enqIndex 151 paddrModule.io.wdata(w) := enq.bits.paddr 152 bypassPAddr(w) := enq.bits.paddr 153 154 // Write mask 155 maskModule.io.wen(w) := true.B 156 maskModule.io.waddr(w) := enqIndex 157 maskModule.io.wdata(w) := enq.bits.mask 158 bypassMask(w) := enq.bits.mask 159 160 // Fill info 161 uop(enqIndex) := enq.bits.uop 162 datavalid(enqIndex) := enq.bits.data_valid 163 } 164 } 165 166 for ((query, w) <- io.query.map(_.resp).zipWithIndex) { 167 query.valid := RegNext(io.query(w).req.valid) 168 query.bits.rep_frm_fetch := RegNext(false.B) 169 } 170 171 // LoadQueueRAW deallocate 172 val freeMaskVec = Wire(Vec(LoadQueueRAWSize, Bool())) 173 174 // init 175 freeMaskVec.map(e => e := false.B) 176 177 // when the stores that "older than" current load address were ready. 178 // current load will be released. 179 for (i <- 0 until LoadQueueRAWSize) { 180 val deqNotBlock = Mux(!allAddrCheck, !isBefore(io.stAddrReadySqPtr, uop(i).sqIdx), true.B) 181 val needCancel = uop(i).robIdx.needFlush(io.redirect) 182 183 when (allocated(i) && (deqNotBlock || needCancel)) { 184 allocated(i) := false.B 185 freeMaskVec(i) := true.B 186 } 187 } 188 189 // if need replay deallocate entry 190 val lastCanAccept = RegNext(acceptedVec) 191 val lastAllocIndex = RegNext(enqIndexVec) 192 193 for ((revoke, w) <- io.query.map(_.revoke).zipWithIndex) { 194 val revokeValid = revoke && lastCanAccept(w) 195 val revokeIndex = lastAllocIndex(w) 196 197 when (allocated(revokeIndex) && revokeValid) { 198 allocated(revokeIndex) := false.B 199 freeMaskVec(revokeIndex) := true.B 200 } 201 } 202 freeList.io.free := freeMaskVec.asUInt 203 204 io.lqFull := freeList.io.empty 205 206 /** 207 * Store-Load Memory violation detection 208 * Scheme 1(Current scheme): flush the pipeline then re-fetch from the load instruction (like old load queue). 209 * Scheme 2 : re-fetch instructions from the first instruction after the store instruction. 210 * 211 * When store writes back, it searches LoadQueue for younger load instructions 212 * with the same load physical address. They loaded wrong data and need re-execution. 213 * 214 * Cycle 0: Store Writeback 215 * Generate match vector for store address with rangeMask(stPtr, enqPtr). 216 * Cycle 1: Select oldest load from select group. 217 * Cycle x: Redirect Fire 218 * Choose the oldest load from LoadPipelineWidth oldest loads. 219 * Prepare redirect request according to the detected violation. 220 * Fire redirect request (if valid) 221 */ 222 // SelectGroup 0 SelectGroup 1 SelectGroup y 223 // stage 0: lq lq lq ...... lq lq lq ....... lq lq lq 224 // | | | | | | | | | 225 // stage 1: lq lq lq ...... lq lq lq ....... lq lq lq 226 // \ | / ...... \ | / ....... \ | / 227 // stage 2: lq lq lq 228 // \ | / ....... \ | / ........ \ | / 229 // stage 3: lq lq lq 230 // ... 231 // ... 232 // | 233 // stage x: lq 234 // | 235 // rollback req 236 237 // select logic 238 val SelectGroupSize = RollbackGroupSize 239 val lgSelectGroupSize = log2Ceil(SelectGroupSize) 240 val TotalSelectCycles = scala.math.ceil(log2Ceil(LoadQueueRAWSize).toFloat / lgSelectGroupSize).toInt + 1 241 242 def selectPartialOldest[T <: XSBundleWithMicroOp](valid: Seq[Bool], bits: Seq[T]): (Seq[Bool], Seq[T]) = { 243 assert(valid.length == bits.length) 244 if (valid.length == 0 || valid.length == 1) { 245 (valid, bits) 246 } else if (valid.length == 2) { 247 val res = Seq.fill(2)(Wire(ValidIO(chiselTypeOf(bits(0))))) 248 for (i <- res.indices) { 249 res(i).valid := valid(i) 250 res(i).bits := bits(i) 251 } 252 val oldest = Mux(valid(0) && valid(1), Mux(isAfter(bits(0).uop.robIdx, bits(1).uop.robIdx), res(1), res(0)), Mux(valid(0) && !valid(1), res(0), res(1))) 253 (Seq(oldest.valid), Seq(oldest.bits)) 254 } else { 255 val left = selectPartialOldest(valid.take(valid.length / 2), bits.take(bits.length / 2)) 256 val right = selectPartialOldest(valid.takeRight(valid.length - (valid.length / 2)), bits.takeRight(bits.length - (bits.length / 2))) 257 selectPartialOldest(left._1 ++ right._1, left._2 ++ right._2) 258 } 259 } 260 261 def selectOldest[T <: XSBundleWithMicroOp](valid: Seq[Bool], bits: Seq[T]): (Seq[Bool], Seq[T]) = { 262 assert(valid.length == bits.length) 263 val numSelectGroups = scala.math.ceil(valid.length.toFloat / SelectGroupSize).toInt 264 265 // group info 266 val selectValidGroups = 267 if (valid.length <= SelectGroupSize) { 268 Seq(valid) 269 } else { 270 (0 until numSelectGroups).map(g => { 271 if (valid.length < (g + 1) * SelectGroupSize) { 272 valid.takeRight(valid.length - g * SelectGroupSize) 273 } else { 274 (0 until SelectGroupSize).map(j => valid(g * SelectGroupSize + j)) 275 } 276 }) 277 } 278 val selectBitsGroups = 279 if (bits.length <= SelectGroupSize) { 280 Seq(bits) 281 } else { 282 (0 until numSelectGroups).map(g => { 283 if (bits.length < (g + 1) * SelectGroupSize) { 284 bits.takeRight(bits.length - g * SelectGroupSize) 285 } else { 286 (0 until SelectGroupSize).map(j => bits(g * SelectGroupSize + j)) 287 } 288 }) 289 } 290 291 // select logic 292 if (valid.length <= SelectGroupSize) { 293 val (selValid, selBits) = selectPartialOldest(valid, bits) 294 val selValidNext = RegNext(selValid(0)) 295 val selBitsNext = RegNext(selBits(0)) 296 (Seq(selValidNext && !selBitsNext.uop.robIdx.needFlush(io.redirect) && !selBitsNext.uop.robIdx.needFlush(RegNext(io.redirect))), Seq(selBitsNext)) 297 } else { 298 val select = (0 until numSelectGroups).map(g => { 299 val (selValid, selBits) = selectPartialOldest(selectValidGroups(g), selectBitsGroups(g)) 300 val selValidNext = RegNext(selValid(0)) 301 val selBitsNext = RegNext(selBits(0)) 302 (selValidNext && !selBitsNext.uop.robIdx.needFlush(io.redirect) && !selBitsNext.uop.robIdx.needFlush(RegNext(io.redirect)), selBitsNext) 303 }) 304 selectOldest(select.map(_._1), select.map(_._2)) 305 } 306 } 307 308 def detectRollback(i: Int) = { 309 paddrModule.io.violationMdata(i) := io.storeIn(i).bits.paddr 310 maskModule.io.violationMdata(i) := io.storeIn(i).bits.mask 311 312 val bypassPaddrMask = RegNext(VecInit((0 until LoadPipelineWidth).map(j => bypassPAddr(j)(PAddrBits-1, DCacheVWordOffset) === io.storeIn(i).bits.paddr(PAddrBits-1, DCacheVWordOffset)))) 313 val bypassMMask = RegNext(VecInit((0 until LoadPipelineWidth).map(j => (bypassMask(j) & io.storeIn(i).bits.mask).orR))) 314 val bypassMaskUInt = (0 until LoadPipelineWidth).map(j => 315 Fill(LoadQueueRAWSize, RegNext(RegNext(io.query(j).req.fire))) & Mux(bypassPaddrMask(j) && bypassMMask(j), UIntToOH(RegNext(RegNext(enqIndexVec(j)))), 0.U(LoadQueueRAWSize.W)) 316 ).reduce(_|_) 317 318 val addrMaskMatch = RegNext(paddrModule.io.violationMmask(i).asUInt & maskModule.io.violationMmask(i).asUInt) | bypassMaskUInt 319 val entryNeedCheck = RegNext(VecInit((0 until LoadQueueRAWSize).map(j => { 320 allocated(j) && isAfter(uop(j).robIdx, io.storeIn(i).bits.uop.robIdx) && datavalid(j) && !uop(j).robIdx.needFlush(io.redirect) 321 }))) 322 val lqViolationSelVec = VecInit((0 until LoadQueueRAWSize).map(j => { 323 addrMaskMatch(j) && entryNeedCheck(j) 324 })) 325 326 val lqViolationSelUopExts = uop.map(uop => { 327 val wrapper = Wire(new XSBundleWithMicroOp) 328 wrapper.uop := uop 329 wrapper 330 }) 331 332 // select logic 333 val lqSelect = selectOldest(lqViolationSelVec, lqViolationSelUopExts) 334 335 // select one inst 336 val lqViolation = lqSelect._1(0) 337 val lqViolationUop = lqSelect._2(0).uop 338 339 XSDebug( 340 lqViolation, 341 "need rollback (ld wb before store) pc %x robidx %d target %x\n", 342 io.storeIn(i).bits.uop.pc, io.storeIn(i).bits.uop.robIdx.asUInt, lqViolationUop.robIdx.asUInt 343 ) 344 345 (lqViolation, lqViolationUop) 346 } 347 348 // select rollback (part1) and generate rollback request 349 // rollback check 350 // Lq rollback seq check is done in s3 (next stage), as getting rollbackLq MicroOp is slow 351 val rollbackLqWb = Wire(Vec(StorePipelineWidth, Valid(new MicroOpRbExt))) 352 val stFtqIdx = Wire(Vec(StorePipelineWidth, new FtqPtr)) 353 val stFtqOffset = Wire(Vec(StorePipelineWidth, UInt(log2Up(PredictWidth).W))) 354 for (w <- 0 until StorePipelineWidth) { 355 val detectedRollback = detectRollback(w) 356 rollbackLqWb(w).valid := detectedRollback._1 && DelayN(io.storeIn(w).valid && !io.storeIn(w).bits.miss, TotalSelectCycles) 357 rollbackLqWb(w).bits.uop := detectedRollback._2 358 rollbackLqWb(w).bits.flag := w.U 359 stFtqIdx(w) := DelayN(io.storeIn(w).bits.uop.ftqPtr, TotalSelectCycles) 360 stFtqOffset(w) := DelayN(io.storeIn(w).bits.uop.ftqOffset, TotalSelectCycles) 361 } 362 363 val rollbackLqWbValid = rollbackLqWb.map(x => x.valid && !x.bits.uop.robIdx.needFlush(io.redirect)) 364 val rollbackLqWbBits = rollbackLqWb.map(x => x.bits) 365 366 // select rollback (part2), generate rollback request, then fire rollback request 367 // Note that we use robIdx - 1.U to flush the load instruction itself. 368 // Thus, here if last cycle's robIdx equals to this cycle's robIdx, it still triggers the redirect. 369 370 // select uop in parallel 371 val lqs = selectPartialOldest(rollbackLqWbValid, rollbackLqWbBits) 372 val rollbackUopExt = lqs._2(0) 373 val rollbackUop = rollbackUopExt.uop 374 val rollbackStFtqIdx = stFtqIdx(rollbackUopExt.flag) 375 val rollbackStFtqOffset = stFtqOffset(rollbackUopExt.flag) 376 377 // check if rollback request is still valid in parallel 378 io.rollback.bits := DontCare 379 io.rollback.bits.robIdx := rollbackUop.robIdx 380 io.rollback.bits.ftqIdx := rollbackUop.ftqPtr 381 io.rollback.bits.stFtqIdx := rollbackStFtqIdx 382 io.rollback.bits.ftqOffset := rollbackUop.ftqOffset 383 io.rollback.bits.stFtqOffset := rollbackStFtqOffset 384 io.rollback.bits.level := RedirectLevel.flush 385 io.rollback.bits.interrupt := DontCare 386 io.rollback.bits.cfiUpdate := DontCare 387 io.rollback.bits.cfiUpdate.target := rollbackUop.pc 388 io.rollback.bits.debug_runahead_checkpoint_id := rollbackUop.debugInfo.runahead_checkpoint_id 389 // io.rollback.bits.pc := DontCare 390 391 io.rollback.valid := VecInit(rollbackLqWbValid).asUInt.orR 392 393 // perf cnt 394 val canEnqCount = PopCount(io.query.map(_.req.fire)) 395 val validCount = freeList.io.validCount 396 val allowEnqueue = validCount <= (LoadQueueRAWSize - LoadPipelineWidth).U 397 398 QueuePerf(LoadQueueRAWSize, validCount, !allowEnqueue) 399 XSPerfAccumulate("enqs", canEnqCount) 400 XSPerfAccumulate("stld_rollback", io.rollback.valid) 401 val perfEvents: Seq[(String, UInt)] = Seq( 402 ("enq ", canEnqCount), 403 ("stld_rollback", io.rollback.valid), 404 ) 405 generatePerfEvent() 406 // end 407}