1/*************************************************************************************** 2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences 3* Copyright (c) 2020-2021 Peng Cheng Laboratory 4* 5* XiangShan is licensed under Mulan PSL v2. 6* You can use this software according to the terms and conditions of the Mulan PSL v2. 7* You may obtain a copy of Mulan PSL v2 at: 8* http://license.coscl.org.cn/MulanPSL2 9* 10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 13* 14* See the Mulan PSL v2 for more details. 15***************************************************************************************/ 16 17package xiangshan.mem 18 19import chisel3._ 20import chisel3.util._ 21import chipsalliance.rocketchip.config._ 22import xiangshan._ 23import xiangshan.backend.rob.RobPtr 24import xiangshan.cache._ 25import xiangshan.frontend.FtqPtr 26import xiangshan.mem.mdp._ 27import utils._ 28import utility._ 29import xiangshan.backend.Bundles.DynInst 30 31class LoadQueueRAW(implicit p: Parameters) extends XSModule 32 with HasDCacheParameters 33 with HasCircularQueuePtrHelper 34 with HasLoadHelper 35 with HasPerfEvents 36{ 37 val io = IO(new Bundle() { 38 val redirect = Flipped(ValidIO(new Redirect)) 39 val query = Vec(LoadPipelineWidth, Flipped(new LoadViolationQueryIO)) 40 val storeIn = Vec(StorePipelineWidth, Flipped(Valid(new LsPipelineBundle))) 41 val rollback = Output(Valid(new Redirect)) 42 val stAddrReadySqPtr = Input(new SqPtr) 43 val stIssuePtr = Input(new SqPtr) 44 val lqFull = Output(Bool()) 45 }) 46 47 println("LoadQueueRAW: size " + LoadQueueRAWSize) 48 // LoadQueueRAW field 49 // +-------+--------+-------+-------+-----------+ 50 // | Valid | uop |PAddr | Mask | Datavalid | 51 // +-------+--------+-------+-------+-----------+ 52 // 53 // Field descriptions: 54 // Allocated : entry has been allocated already 55 // MicroOp : inst's microOp 56 // PAddr : physical address. 57 // Mask : data mask 58 // Datavalid : data valid 59 // 60 val allocated = RegInit(VecInit(List.fill(LoadQueueRAWSize)(false.B))) // The control signals need to explicitly indicate the initial value 61 val uop = Reg(Vec(LoadQueueRAWSize, new DynInst)) 62 val paddrModule = Module(new LqPAddrModule( 63 gen = UInt(PAddrBits.W), 64 numEntries = LoadQueueRAWSize, 65 numRead = LoadPipelineWidth, 66 numWrite = LoadPipelineWidth, 67 numWBank = LoadQueueNWriteBanks, 68 numWDelay = 2, 69 numCamPort = StorePipelineWidth 70 )) 71 paddrModule.io := DontCare 72 val maskModule = Module(new LqMaskModule( 73 gen = UInt(8.W), 74 numEntries = LoadQueueRAWSize, 75 numRead = LoadPipelineWidth, 76 numWrite = LoadPipelineWidth, 77 numWBank = LoadQueueNWriteBanks, 78 numWDelay = 2, 79 numCamPort = StorePipelineWidth 80 )) 81 maskModule.io := DontCare 82 val datavalid = RegInit(VecInit(List.fill(LoadQueueRAWSize)(false.B))) 83 84 // freeliset: store valid entries index. 85 // +---+---+--------------+-----+-----+ 86 // | 0 | 1 | ...... | n-2 | n-1 | 87 // +---+---+--------------+-----+-----+ 88 val freeList = Module(new FreeList( 89 size = LoadQueueRAWSize, 90 allocWidth = LoadPipelineWidth, 91 freeWidth = 4, 92 moduleName = "LoadQueueRAW freelist" 93 )) 94 freeList.io := DontCare 95 96 // LoadQueueRAW enqueue 97 val canEnqueue = io.query.map(_.req.valid) 98 val cancelEnqueue = io.query.map(_.req.bits.uop.robIdx.needFlush(io.redirect)) 99 val allAddrCheck = io.stIssuePtr === io.stAddrReadySqPtr 100 val hasAddrInvalidStore = io.query.map(_.req.bits.uop.sqIdx).map(sqIdx => { 101 Mux(!allAddrCheck, isBefore(io.stAddrReadySqPtr, sqIdx), false.B) 102 }) 103 val needEnqueue = canEnqueue.zip(hasAddrInvalidStore).zip(cancelEnqueue).map { case ((v, r), c) => v && r && !c } 104 val bypassPAddr = Reg(Vec(LoadPipelineWidth, UInt(PAddrBits.W))) 105 val bypassMask = Reg(Vec(LoadPipelineWidth, UInt(8.W))) 106 107 // Allocate logic 108 val enqValidVec = Wire(Vec(LoadPipelineWidth, Bool())) 109 val enqIndexVec = Wire(Vec(LoadPipelineWidth, UInt())) 110 val enqOffset = Wire(Vec(LoadPipelineWidth, UInt())) 111 112 // Enqueue 113 for ((enq, w) <- io.query.map(_.req).zipWithIndex) { 114 paddrModule.io.wen(w) := false.B 115 maskModule.io.wen(w) := false.B 116 freeList.io.doAllocate(w) := false.B 117 118 enqOffset(w) := PopCount(needEnqueue.take(w)) 119 freeList.io.allocateReq(w) := needEnqueue(w) 120 121 // Allocate ready 122 enqValidVec(w) := freeList.io.canAllocate(enqOffset(w)) 123 enqIndexVec(w) := freeList.io.allocateSlot(enqOffset(w)) 124 enq.ready := Mux(needEnqueue(w), enqValidVec(w), true.B) 125 126 val enqIndex = enqIndexVec(w) 127 when (needEnqueue(w) && enq.ready) { 128 val debug_robIdx = enq.bits.uop.robIdx.asUInt 129 XSError(allocated(enqIndex), p"LoadQueueRAW: You can not write an valid entry! check: ldu $w, robIdx $debug_robIdx") 130 131 freeList.io.doAllocate(w) := true.B 132 133 // Allocate new entry 134 allocated(enqIndex) := true.B 135 136 // Write paddr 137 paddrModule.io.wen(w) := true.B 138 paddrModule.io.waddr(w) := enqIndex 139 paddrModule.io.wdata(w) := enq.bits.paddr 140 bypassPAddr(w) := enq.bits.paddr 141 142 // Write mask 143 maskModule.io.wen(w) := true.B 144 maskModule.io.waddr(w) := enqIndex 145 maskModule.io.wdata(w) := enq.bits.mask 146 bypassMask(w) := enq.bits.mask 147 148 // Fill info 149 uop(enqIndex) := enq.bits.uop 150 datavalid(enqIndex) := enq.bits.datavalid 151 } 152 } 153 154 for ((query, w) <- io.query.map(_.resp).zipWithIndex) { 155 query.valid := RegNext(io.query(w).req.valid) 156 query.bits.replayFromFetch := RegNext(false.B) 157 } 158 159 // LoadQueueRAW deallocate 160 val freeMaskVec = Wire(Vec(LoadQueueRAWSize, Bool())) 161 162 // init 163 freeMaskVec.map(e => e := false.B) 164 165 // when the stores that "older than" current load address were ready. 166 // current load will be released. 167 for (i <- 0 until LoadQueueRAWSize) { 168 val deqNotBlock = Mux(!allAddrCheck, !isBefore(io.stAddrReadySqPtr, uop(i).sqIdx), true.B) 169 val needCancel = uop(i).robIdx.needFlush(io.redirect) 170 171 when (allocated(i) && (deqNotBlock || needCancel)) { 172 allocated(i) := false.B 173 freeMaskVec(i) := true.B 174 } 175 } 176 177 // if need replay deallocate entry 178 val lastCanAccept = RegNext(VecInit(needEnqueue.zip(enqValidVec).map(x => x._1 && x._2))) 179 val lastAllocIndex = RegNext(enqIndexVec) 180 181 for ((release, w) <- io.query.map(_.release).zipWithIndex) { 182 val releaseValid = release && lastCanAccept(w) 183 val releaseIndex = lastAllocIndex(w) 184 185 when (allocated(releaseIndex) && releaseValid) { 186 allocated(releaseIndex) := false.B 187 freeMaskVec(releaseIndex) := true.B 188 } 189 } 190 freeList.io.free := freeMaskVec.asUInt 191 192 io.lqFull := freeList.io.empty 193 194 /** 195 * Store-Load Memory violation detection 196 * Scheme 1(Current scheme): flush the pipeline then re-fetch from the load instruction (like old load queue). 197 * Scheme 2 : re-fetch instructions from the first instruction after the store instruction. 198 * 199 * When store writes back, it searches LoadQueue for younger load instructions 200 * with the same load physical address. They loaded wrong data and need re-execution. 201 * 202 * Cycle 0: Store Writeback 203 * Generate match vector for store address with rangeMask(stPtr, enqPtr). 204 * Cycle 1: Select oldest load from select group. 205 * Cycle x: Redirect Fire 206 * Choose the oldest load from LoadPipelineWidth oldest loads. 207 * Prepare redirect request according to the detected violation. 208 * Fire redirect request (if valid) 209 */ 210 // SelectGroup 0 SelectGroup 1 SelectGroup y 211 // stage 0: lq lq lq ...... lq lq lq ....... lq lq lq 212 // | | | | | | | | | 213 // stage 1: lq lq lq ...... lq lq lq ....... lq lq lq 214 // \ | / ...... \ | / ....... \ | / 215 // stage 2: lq lq lq 216 // \ | / ....... \ | / ........ \ | / 217 // stage 3: lq lq lq 218 // ... 219 // ... 220 // | 221 // stage x: lq 222 // | 223 // rollback req 224 225 // select logic 226 val SelectGroupSize = RollbackGroupSize 227 val lgSelectGroupSize = log2Ceil(SelectGroupSize) 228 val TotalSelectCycles = scala.math.ceil(log2Ceil(LoadQueueRAWSize).toFloat / lgSelectGroupSize).toInt + 1 229 230 def selectPartialOldest[T <: XSBundleWithMicroOp](valid: Seq[Bool], bits: Seq[T]): (Seq[Bool], Seq[T]) = { 231 assert(valid.length == bits.length) 232 if (valid.length == 0 || valid.length == 1) { 233 (valid, bits) 234 } else if (valid.length == 2) { 235 val res = Seq.fill(2)(Wire(ValidIO(chiselTypeOf(bits(0))))) 236 for (i <- res.indices) { 237 res(i).valid := valid(i) 238 res(i).bits := bits(i) 239 } 240 val oldest = Mux(valid(0) && valid(1), Mux(isAfter(bits(0).uop.robIdx, bits(1).uop.robIdx), res(1), res(0)), Mux(valid(0) && !valid(1), res(0), res(1))) 241 (Seq(oldest.valid), Seq(oldest.bits)) 242 } else { 243 val left = selectPartialOldest(valid.take(valid.length / 2), bits.take(bits.length / 2)) 244 val right = selectPartialOldest(valid.takeRight(valid.length - (valid.length / 2)), bits.takeRight(bits.length - (bits.length / 2))) 245 selectPartialOldest(left._1 ++ right._1, left._2 ++ right._2) 246 } 247 } 248 249 def selectOldest[T <: XSBundleWithMicroOp](valid: Seq[Bool], bits: Seq[T]): (Seq[Bool], Seq[T]) = { 250 assert(valid.length == bits.length) 251 val numSelectGroups = scala.math.ceil(valid.length.toFloat / SelectGroupSize).toInt 252 253 // group info 254 val selectValidGroups = 255 if (valid.length <= SelectGroupSize) { 256 Seq(valid) 257 } else { 258 (0 until numSelectGroups).map(g => { 259 if (valid.length < (g + 1) * SelectGroupSize) { 260 valid.takeRight(valid.length - g * SelectGroupSize) 261 } else { 262 (0 until SelectGroupSize).map(j => valid(g * SelectGroupSize + j)) 263 } 264 }) 265 } 266 val selectBitsGroups = 267 if (bits.length <= SelectGroupSize) { 268 Seq(bits) 269 } else { 270 (0 until numSelectGroups).map(g => { 271 if (bits.length < (g + 1) * SelectGroupSize) { 272 bits.takeRight(bits.length - g * SelectGroupSize) 273 } else { 274 (0 until SelectGroupSize).map(j => bits(g * SelectGroupSize + j)) 275 } 276 }) 277 } 278 279 // select logic 280 if (valid.length <= SelectGroupSize) { 281 val (selValid, selBits) = selectPartialOldest(valid, bits) 282 (Seq(RegNext(selValid(0) && !selBits(0).uop.robIdx.needFlush(io.redirect))), Seq(RegNext(selBits(0)))) 283 } else { 284 val select = (0 until numSelectGroups).map(g => { 285 val (selValid, selBits) = selectPartialOldest(selectValidGroups(g), selectBitsGroups(g)) 286 (RegNext(selValid(0) && !selBits(0).uop.robIdx.needFlush(io.redirect)), RegNext(selBits(0))) 287 }) 288 selectOldest(select.map(_._1), select.map(_._2)) 289 } 290 } 291 292 def detectRollback(i: Int) = { 293 paddrModule.io.violationMdata(i) := io.storeIn(i).bits.paddr 294 maskModule.io.violationMdata(i) := io.storeIn(i).bits.mask 295 296 val bypassPaddrMask = RegNext(VecInit((0 until LoadPipelineWidth).map(j => bypassPAddr(j)(PAddrBits-1, 3) === io.storeIn(i).bits.paddr(PAddrBits-1, 3)))) 297 val bypassMMask = RegNext(VecInit((0 until LoadPipelineWidth).map(j => (bypassMask(j) & io.storeIn(i).bits.mask).orR))) 298 val bypassMaskUInt = (0 until LoadPipelineWidth).map(j => 299 Fill(LoadQueueRAWSize, RegNext(RegNext(io.query(j).req.fire))) & Mux(bypassPaddrMask(j) && bypassMMask(j), UIntToOH(RegNext(RegNext(enqIndexVec(j)))), 0.U(LoadQueueRAWSize)) 300 ).reduce(_|_) 301 302 val addrMaskMatch = RegNext(paddrModule.io.violationMmask(i).asUInt & maskModule.io.violationMmask(i).asUInt) | bypassMaskUInt 303 val entryNeedCheck = RegNext(VecInit((0 until LoadQueueRAWSize).map(j => { 304 allocated(j) && isAfter(uop(j).robIdx, io.storeIn(i).bits.uop.robIdx) && datavalid(j) && !uop(j).robIdx.needFlush(io.redirect) 305 }))) 306 val lqViolationSelVec = VecInit((0 until LoadQueueRAWSize).map(j => { 307 addrMaskMatch(j) && entryNeedCheck(j) 308 })) 309 310 val lqViolationSelUopExts = uop.map(uop => { 311 val wrapper = Wire(new XSBundleWithMicroOp) 312 wrapper.uop := uop 313 wrapper 314 }) 315 316 // select logic 317 val lqSelect = selectOldest(lqViolationSelVec, lqViolationSelUopExts) 318 319 // select one inst 320 val lqViolation = lqSelect._1(0) 321 val lqViolationUop = lqSelect._2(0).uop 322 323 XSDebug( 324 lqViolation, 325 "need rollback (ld wb before store) pc %x robidx %d target %x\n", 326 io.storeIn(i).bits.uop.pc, io.storeIn(i).bits.uop.robIdx.asUInt, lqViolationUop.robIdx.asUInt 327 ) 328 329 (lqViolation, lqViolationUop) 330 } 331 332 // select rollback (part1) and generate rollback request 333 // rollback check 334 // Lq rollback seq check is done in s3 (next stage), as getting rollbackLq MicroOp is slow 335 val rollbackLqWb = Wire(Vec(StorePipelineWidth, Valid(new MicroOpRbExt))) 336 val stFtqIdx = Wire(Vec(StorePipelineWidth, new FtqPtr)) 337 val stFtqOffset = Wire(Vec(StorePipelineWidth, UInt(log2Up(PredictWidth).W))) 338 for (w <- 0 until StorePipelineWidth) { 339 val detectedRollback = detectRollback(w) 340 rollbackLqWb(w).valid := detectedRollback._1 && DelayN(io.storeIn(w).valid && !io.storeIn(w).bits.miss, TotalSelectCycles) 341 rollbackLqWb(w).bits.uop := detectedRollback._2 342 rollbackLqWb(w).bits.flag := w.U 343 stFtqIdx(w) := DelayN(io.storeIn(w).bits.uop.ftqPtr, TotalSelectCycles) 344 stFtqOffset(w) := DelayN(io.storeIn(w).bits.uop.ftqOffset, TotalSelectCycles) 345 } 346 347 val rollbackLqWbValid = rollbackLqWb.map(x => x.valid && !x.bits.uop.robIdx.needFlush(io.redirect)) 348 val rollbackLqWbBits = rollbackLqWb.map(x => x.bits) 349 350 // select rollback (part2), generate rollback request, then fire rollback request 351 // Note that we use robIdx - 1.U to flush the load instruction itself. 352 // Thus, here if last cycle's robIdx equals to this cycle's robIdx, it still triggers the redirect. 353 354 // select uop in parallel 355 val lqs = selectPartialOldest(rollbackLqWbValid, rollbackLqWbBits) 356 val rollbackUopExt = lqs._2(0) 357 val rollbackUop = rollbackUopExt.uop 358 val rollbackStFtqIdx = stFtqIdx(rollbackUopExt.flag) 359 val rollbackStFtqOffset = stFtqOffset(rollbackUopExt.flag) 360 361 // check if rollback request is still valid in parallel 362 io.rollback.bits.robIdx := rollbackUop.robIdx 363 io.rollback.bits.ftqIdx := rollbackUop.ftqPtr 364 io.rollback.bits.stFtqIdx := rollbackStFtqIdx 365 io.rollback.bits.ftqOffset := rollbackUop.ftqOffset 366 io.rollback.bits.stFtqOffset := rollbackStFtqOffset 367 io.rollback.bits.level := RedirectLevel.flush 368 io.rollback.bits.interrupt := DontCare 369 io.rollback.bits.cfiUpdate := DontCare 370 io.rollback.bits.cfiUpdate.target := rollbackUop.pc 371 io.rollback.bits.debug_runahead_checkpoint_id := rollbackUop.debugInfo.runahead_checkpoint_id 372 // io.rollback.bits.pc := DontCare 373 374 io.rollback.valid := VecInit(rollbackLqWbValid).asUInt.orR 375 376 // perf cnt 377 val canEnqCount = PopCount(io.query.map(_.req.fire)) 378 val validCount = freeList.io.validCount 379 val allowEnqueue = validCount <= (LoadQueueRAWSize - LoadPipelineWidth).U 380 381 QueuePerf(LoadQueueRAWSize, validCount, !allowEnqueue) 382 XSPerfAccumulate("enqs", canEnqCount) 383 XSPerfAccumulate("stld_rollback", io.rollback.valid) 384 val perfEvents: Seq[(String, UInt)] = Seq( 385 ("enq ", canEnqCount), 386 ("stld_rollback", io.rollback.valid), 387 ) 388 generatePerfEvent() 389 // end 390}