1/*************************************************************************************** 2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences 3* Copyright (c) 2020-2021 Peng Cheng Laboratory 4* 5* XiangShan is licensed under Mulan PSL v2. 6* You can use this software according to the terms and conditions of the Mulan PSL v2. 7* You may obtain a copy of Mulan PSL v2 at: 8* http://license.coscl.org.cn/MulanPSL2 9* 10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 13* 14* See the Mulan PSL v2 for more details. 15***************************************************************************************/ 16 17package xiangshan.mem 18 19import chisel3._ 20import chisel3.util._ 21import org.chipsalliance.cde.config._ 22import xiangshan._ 23import xiangshan.backend.rob.RobPtr 24import xiangshan.cache._ 25import xiangshan.frontend.FtqPtr 26import xiangshan.mem.mdp._ 27import utils._ 28import utility._ 29 30class LoadQueueRAW(implicit p: Parameters) extends XSModule 31 with HasDCacheParameters 32 with HasCircularQueuePtrHelper 33 with HasLoadHelper 34 with HasPerfEvents 35{ 36 val io = IO(new Bundle() { 37 // control 38 val redirect = Flipped(ValidIO(new Redirect)) 39 40 // violation query 41 val query = Vec(LoadPipelineWidth, Flipped(new LoadNukeQueryIO)) 42 43 // from store unit s1 44 val storeIn = Vec(StorePipelineWidth, Flipped(Valid(new LsPipelineBundle))) 45 46 // global rollback flush 47 val rollback = Output(Valid(new Redirect)) 48 49 // to LoadQueueReplay 50 val stAddrReadySqPtr = Input(new SqPtr) 51 val stIssuePtr = Input(new SqPtr) 52 val lqFull = Output(Bool()) 53 }) 54 55 println("LoadQueueRAW: size " + LoadQueueRAWSize) 56 // LoadQueueRAW field 57 // +-------+--------+-------+-------+-----------+ 58 // | Valid | uop |PAddr | Mask | Datavalid | 59 // +-------+--------+-------+-------+-----------+ 60 // 61 // Field descriptions: 62 // Allocated : entry has been allocated already 63 // MicroOp : inst's microOp 64 // PAddr : physical address. 65 // Mask : data mask 66 // Datavalid : data valid 67 // 68 val allocated = RegInit(VecInit(List.fill(LoadQueueRAWSize)(false.B))) // The control signals need to explicitly indicate the initial value 69 val uop = Reg(Vec(LoadQueueRAWSize, new MicroOp)) 70 val paddrModule = Module(new LqPAddrModule( 71 gen = UInt(PAddrBits.W), 72 numEntries = LoadQueueRAWSize, 73 numRead = LoadPipelineWidth, 74 numWrite = LoadPipelineWidth, 75 numWBank = LoadQueueNWriteBanks, 76 numWDelay = 2, 77 numCamPort = StorePipelineWidth 78 )) 79 paddrModule.io := DontCare 80 val maskModule = Module(new LqMaskModule( 81 gen = UInt((VLEN/8).W), 82 numEntries = LoadQueueRAWSize, 83 numRead = LoadPipelineWidth, 84 numWrite = LoadPipelineWidth, 85 numWBank = LoadQueueNWriteBanks, 86 numWDelay = 2, 87 numCamPort = StorePipelineWidth 88 )) 89 maskModule.io := DontCare 90 val datavalid = RegInit(VecInit(List.fill(LoadQueueRAWSize)(false.B))) 91 92 // freeliset: store valid entries index. 93 // +---+---+--------------+-----+-----+ 94 // | 0 | 1 | ...... | n-2 | n-1 | 95 // +---+---+--------------+-----+-----+ 96 val freeList = Module(new FreeList( 97 size = LoadQueueRAWSize, 98 allocWidth = LoadPipelineWidth, 99 freeWidth = 4, 100 enablePreAlloc = true, 101 moduleName = "LoadQueueRAW freelist" 102 )) 103 freeList.io := DontCare 104 105 // LoadQueueRAW enqueue 106 val canEnqueue = io.query.map(_.req.valid) 107 val cancelEnqueue = io.query.map(_.req.bits.uop.robIdx.needFlush(io.redirect)) 108 val allAddrCheck = io.stIssuePtr === io.stAddrReadySqPtr 109 val hasAddrInvalidStore = io.query.map(_.req.bits.uop.sqIdx).map(sqIdx => { 110 Mux(!allAddrCheck, isBefore(io.stAddrReadySqPtr, sqIdx), false.B) 111 }) 112 val needEnqueue = canEnqueue.zip(hasAddrInvalidStore).zip(cancelEnqueue).map { case ((v, r), c) => v && r && !c } 113 val bypassPAddr = Reg(Vec(LoadPipelineWidth, UInt(PAddrBits.W))) 114 val bypassMask = Reg(Vec(LoadPipelineWidth, UInt((VLEN/8).W))) 115 116 // Allocate logic 117 val acceptedVec = Wire(Vec(LoadPipelineWidth, Bool())) 118 val enqIndexVec = Wire(Vec(LoadPipelineWidth, UInt())) 119 120 // Enqueue 121 for ((enq, w) <- io.query.map(_.req).zipWithIndex) { 122 acceptedVec(w) := false.B 123 paddrModule.io.wen(w) := false.B 124 maskModule.io.wen(w) := false.B 125 freeList.io.doAllocate(w) := false.B 126 127 freeList.io.allocateReq(w) := true.B 128 129 // Allocate ready 130 val offset = PopCount(needEnqueue.take(w)) 131 val canAccept = freeList.io.canAllocate(offset) 132 val enqIndex = freeList.io.allocateSlot(offset) 133 enq.ready := Mux(needEnqueue(w), canAccept, true.B) 134 135 enqIndexVec(w) := enqIndex 136 when (needEnqueue(w) && enq.ready) { 137 acceptedVec(w) := true.B 138 139 val debug_robIdx = enq.bits.uop.robIdx.asUInt 140 XSError(allocated(enqIndex), p"LoadQueueRAW: You can not write an valid entry! check: ldu $w, robIdx $debug_robIdx") 141 142 freeList.io.doAllocate(w) := true.B 143 144 // Allocate new entry 145 allocated(enqIndex) := true.B 146 147 // Write paddr 148 paddrModule.io.wen(w) := true.B 149 paddrModule.io.waddr(w) := enqIndex 150 paddrModule.io.wdata(w) := enq.bits.paddr 151 bypassPAddr(w) := enq.bits.paddr 152 153 // Write mask 154 maskModule.io.wen(w) := true.B 155 maskModule.io.waddr(w) := enqIndex 156 maskModule.io.wdata(w) := enq.bits.mask 157 bypassMask(w) := enq.bits.mask 158 159 // Fill info 160 uop(enqIndex) := enq.bits.uop 161 datavalid(enqIndex) := enq.bits.data_valid 162 } 163 } 164 165 for ((query, w) <- io.query.map(_.resp).zipWithIndex) { 166 query.valid := RegNext(io.query(w).req.valid) 167 query.bits.rep_frm_fetch := RegNext(false.B) 168 } 169 170 // LoadQueueRAW deallocate 171 val freeMaskVec = Wire(Vec(LoadQueueRAWSize, Bool())) 172 173 // init 174 freeMaskVec.map(e => e := false.B) 175 176 // when the stores that "older than" current load address were ready. 177 // current load will be released. 178 for (i <- 0 until LoadQueueRAWSize) { 179 val deqNotBlock = Mux(!allAddrCheck, !isBefore(io.stAddrReadySqPtr, uop(i).sqIdx), true.B) 180 val needCancel = uop(i).robIdx.needFlush(io.redirect) 181 182 when (allocated(i) && (deqNotBlock || needCancel)) { 183 allocated(i) := false.B 184 freeMaskVec(i) := true.B 185 } 186 } 187 188 // if need replay deallocate entry 189 val lastCanAccept = RegNext(acceptedVec) 190 val lastAllocIndex = RegNext(enqIndexVec) 191 192 for ((revoke, w) <- io.query.map(_.revoke).zipWithIndex) { 193 val revokeValid = revoke && lastCanAccept(w) 194 val revokeIndex = lastAllocIndex(w) 195 196 when (allocated(revokeIndex) && revokeValid) { 197 allocated(revokeIndex) := false.B 198 freeMaskVec(revokeIndex) := true.B 199 } 200 } 201 freeList.io.free := freeMaskVec.asUInt 202 203 io.lqFull := freeList.io.empty 204 205 /** 206 * Store-Load Memory violation detection 207 * Scheme 1(Current scheme): flush the pipeline then re-fetch from the load instruction (like old load queue). 208 * Scheme 2 : re-fetch instructions from the first instruction after the store instruction. 209 * 210 * When store writes back, it searches LoadQueue for younger load instructions 211 * with the same load physical address. They loaded wrong data and need re-execution. 212 * 213 * Cycle 0: Store Writeback 214 * Generate match vector for store address with rangeMask(stPtr, enqPtr). 215 * Cycle 1: Select oldest load from select group. 216 * Cycle x: Redirect Fire 217 * Choose the oldest load from LoadPipelineWidth oldest loads. 218 * Prepare redirect request according to the detected violation. 219 * Fire redirect request (if valid) 220 */ 221 // SelectGroup 0 SelectGroup 1 SelectGroup y 222 // stage 0: lq lq lq ...... lq lq lq ....... lq lq lq 223 // | | | | | | | | | 224 // stage 1: lq lq lq ...... lq lq lq ....... lq lq lq 225 // \ | / ...... \ | / ....... \ | / 226 // stage 2: lq lq lq 227 // \ | / ....... \ | / ........ \ | / 228 // stage 3: lq lq lq 229 // ... 230 // ... 231 // | 232 // stage x: lq 233 // | 234 // rollback req 235 236 // select logic 237 val SelectGroupSize = RollbackGroupSize 238 val lgSelectGroupSize = log2Ceil(SelectGroupSize) 239 val TotalSelectCycles = scala.math.ceil(log2Ceil(LoadQueueRAWSize).toFloat / lgSelectGroupSize).toInt + 1 240 241 def selectPartialOldest[T <: XSBundleWithMicroOp](valid: Seq[Bool], bits: Seq[T]): (Seq[Bool], Seq[T]) = { 242 assert(valid.length == bits.length) 243 if (valid.length == 0 || valid.length == 1) { 244 (valid, bits) 245 } else if (valid.length == 2) { 246 val res = Seq.fill(2)(Wire(ValidIO(chiselTypeOf(bits(0))))) 247 for (i <- res.indices) { 248 res(i).valid := valid(i) 249 res(i).bits := bits(i) 250 } 251 val oldest = Mux(valid(0) && valid(1), Mux(isAfter(bits(0).uop.robIdx, bits(1).uop.robIdx), res(1), res(0)), Mux(valid(0) && !valid(1), res(0), res(1))) 252 (Seq(oldest.valid), Seq(oldest.bits)) 253 } else { 254 val left = selectPartialOldest(valid.take(valid.length / 2), bits.take(bits.length / 2)) 255 val right = selectPartialOldest(valid.takeRight(valid.length - (valid.length / 2)), bits.takeRight(bits.length - (bits.length / 2))) 256 selectPartialOldest(left._1 ++ right._1, left._2 ++ right._2) 257 } 258 } 259 260 def selectOldest[T <: XSBundleWithMicroOp](valid: Seq[Bool], bits: Seq[T]): (Seq[Bool], Seq[T]) = { 261 assert(valid.length == bits.length) 262 val numSelectGroups = scala.math.ceil(valid.length.toFloat / SelectGroupSize).toInt 263 264 // group info 265 val selectValidGroups = 266 if (valid.length <= SelectGroupSize) { 267 Seq(valid) 268 } else { 269 (0 until numSelectGroups).map(g => { 270 if (valid.length < (g + 1) * SelectGroupSize) { 271 valid.takeRight(valid.length - g * SelectGroupSize) 272 } else { 273 (0 until SelectGroupSize).map(j => valid(g * SelectGroupSize + j)) 274 } 275 }) 276 } 277 val selectBitsGroups = 278 if (bits.length <= SelectGroupSize) { 279 Seq(bits) 280 } else { 281 (0 until numSelectGroups).map(g => { 282 if (bits.length < (g + 1) * SelectGroupSize) { 283 bits.takeRight(bits.length - g * SelectGroupSize) 284 } else { 285 (0 until SelectGroupSize).map(j => bits(g * SelectGroupSize + j)) 286 } 287 }) 288 } 289 290 // select logic 291 if (valid.length <= SelectGroupSize) { 292 val (selValid, selBits) = selectPartialOldest(valid, bits) 293 val selValidNext = RegNext(selValid(0)) 294 val selBitsNext = RegNext(selBits(0)) 295 (Seq(selValidNext && !selBitsNext.uop.robIdx.needFlush(io.redirect) && !selBitsNext.uop.robIdx.needFlush(RegNext(io.redirect))), Seq(selBitsNext)) 296 } else { 297 val select = (0 until numSelectGroups).map(g => { 298 val (selValid, selBits) = selectPartialOldest(selectValidGroups(g), selectBitsGroups(g)) 299 val selValidNext = RegNext(selValid(0)) 300 val selBitsNext = RegNext(selBits(0)) 301 (selValidNext && !selBitsNext.uop.robIdx.needFlush(io.redirect) && !selBitsNext.uop.robIdx.needFlush(RegNext(io.redirect)), selBitsNext) 302 }) 303 selectOldest(select.map(_._1), select.map(_._2)) 304 } 305 } 306 307 def detectRollback(i: Int) = { 308 paddrModule.io.violationMdata(i) := io.storeIn(i).bits.paddr 309 maskModule.io.violationMdata(i) := io.storeIn(i).bits.mask 310 311 val bypassPaddrMask = RegNext(VecInit((0 until LoadPipelineWidth).map(j => bypassPAddr(j)(PAddrBits-1, DCacheVWordOffset) === io.storeIn(i).bits.paddr(PAddrBits-1, DCacheVWordOffset)))) 312 val bypassMMask = RegNext(VecInit((0 until LoadPipelineWidth).map(j => (bypassMask(j) & io.storeIn(i).bits.mask).orR))) 313 val bypassMaskUInt = (0 until LoadPipelineWidth).map(j => 314 Fill(LoadQueueRAWSize, RegNext(RegNext(io.query(j).req.fire))) & Mux(bypassPaddrMask(j) && bypassMMask(j), UIntToOH(RegNext(RegNext(enqIndexVec(j)))), 0.U(LoadQueueRAWSize.W)) 315 ).reduce(_|_) 316 317 val addrMaskMatch = RegNext(paddrModule.io.violationMmask(i).asUInt & maskModule.io.violationMmask(i).asUInt) | bypassMaskUInt 318 val entryNeedCheck = RegNext(VecInit((0 until LoadQueueRAWSize).map(j => { 319 allocated(j) && isAfter(uop(j).robIdx, io.storeIn(i).bits.uop.robIdx) && datavalid(j) && !uop(j).robIdx.needFlush(io.redirect) 320 }))) 321 val lqViolationSelVec = VecInit((0 until LoadQueueRAWSize).map(j => { 322 addrMaskMatch(j) && entryNeedCheck(j) 323 })) 324 325 val lqViolationSelUopExts = uop.map(uop => { 326 val wrapper = Wire(new XSBundleWithMicroOp) 327 wrapper.uop := uop 328 wrapper 329 }) 330 331 // select logic 332 val lqSelect = selectOldest(lqViolationSelVec, lqViolationSelUopExts) 333 334 // select one inst 335 val lqViolation = lqSelect._1(0) 336 val lqViolationUop = lqSelect._2(0).uop 337 338 XSDebug( 339 lqViolation, 340 "need rollback (ld wb before store) pc %x robidx %d target %x\n", 341 io.storeIn(i).bits.uop.cf.pc, io.storeIn(i).bits.uop.robIdx.asUInt, lqViolationUop.robIdx.asUInt 342 ) 343 344 (lqViolation, lqViolationUop) 345 } 346 347 // select rollback (part1) and generate rollback request 348 // rollback check 349 // Lq rollback seq check is done in s3 (next stage), as getting rollbackLq MicroOp is slow 350 val rollbackLqWb = Wire(Vec(StorePipelineWidth, Valid(new MicroOp))) 351 val stFtqIdx = Wire(Vec(StorePipelineWidth, new FtqPtr)) 352 val stFtqOffset = Wire(Vec(StorePipelineWidth, UInt(log2Up(PredictWidth).W))) 353 for (w <- 0 until StorePipelineWidth) { 354 val detectedRollback = detectRollback(w) 355 rollbackLqWb(w).valid := detectedRollback._1 && DelayN(io.storeIn(w).valid && !io.storeIn(w).bits.miss, TotalSelectCycles) 356 rollbackLqWb(w).bits := detectedRollback._2 357 stFtqIdx(w) := DelayN(io.storeIn(w).bits.uop.cf.ftqPtr, TotalSelectCycles) 358 stFtqOffset(w) := DelayN(io.storeIn(w).bits.uop.cf.ftqOffset, TotalSelectCycles) 359 } 360 361 // select rollback (part2), generate rollback request, then fire rollback request 362 // Note that we use robIdx - 1.U to flush the load instruction itself. 363 // Thus, here if last cycle's robIdx equals to this cycle's robIdx, it still triggers the redirect. 364 365 // select uop in parallel 366 def selectOldestRedirect(xs: Seq[Valid[Redirect]]): Vec[Bool] = { 367 val compareVec = (0 until xs.length).map(i => (0 until i).map(j => isAfter(xs(j).bits.robIdx, xs(i).bits.robIdx))) 368 val resultOnehot = VecInit((0 until xs.length).map(i => Cat((0 until xs.length).map(j => 369 (if (j < i) !xs(j).valid || compareVec(i)(j) 370 else if (j == i) xs(i).valid 371 else !xs(j).valid || !compareVec(j)(i)) 372 )).andR)) 373 resultOnehot 374 } 375 val allRedirect = (0 until StorePipelineWidth).map(i => { 376 val redirect = Wire(Valid(new Redirect)) 377 redirect.valid := rollbackLqWb(i).valid 378 redirect.bits := DontCare 379 redirect.bits.isRVC := rollbackLqWb(i).bits.cf.pd.isRVC 380 redirect.bits.robIdx := rollbackLqWb(i).bits.robIdx 381 redirect.bits.ftqIdx := rollbackLqWb(i).bits.cf.ftqPtr 382 redirect.bits.ftqOffset := rollbackLqWb(i).bits.cf.ftqOffset 383 redirect.bits.stFtqIdx := stFtqIdx(i) 384 redirect.bits.stFtqOffset := stFtqOffset(i) 385 redirect.bits.level := RedirectLevel.flush 386 redirect.bits.cfiUpdate.target := rollbackLqWb(i).bits.cf.pc 387 redirect.bits.debug_runahead_checkpoint_id := rollbackLqWb(i).bits.debugInfo.runahead_checkpoint_id 388 redirect 389 }) 390 val oldestOneHot = selectOldestRedirect(allRedirect) 391 val oldestRedirect = Mux1H(oldestOneHot, allRedirect) 392 io.rollback := oldestRedirect 393 394 // perf cnt 395 val canEnqCount = PopCount(io.query.map(_.req.fire)) 396 val validCount = freeList.io.validCount 397 val allowEnqueue = validCount <= (LoadQueueRAWSize - LoadPipelineWidth).U 398 399 QueuePerf(LoadQueueRAWSize, validCount, !allowEnqueue) 400 XSPerfAccumulate("enqs", canEnqCount) 401 XSPerfAccumulate("stld_rollback", io.rollback.valid) 402 val perfEvents: Seq[(String, UInt)] = Seq( 403 ("enq ", canEnqCount), 404 ("stld_rollback", io.rollback.valid), 405 ) 406 generatePerfEvent() 407 // end 408}