1/*************************************************************************************** 2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences 3* Copyright (c) 2020-2021 Peng Cheng Laboratory 4* 5* XiangShan is licensed under Mulan PSL v2. 6* You can use this software according to the terms and conditions of the Mulan PSL v2. 7* You may obtain a copy of Mulan PSL v2 at: 8* http://license.coscl.org.cn/MulanPSL2 9* 10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 13* 14* See the Mulan PSL v2 for more details. 15***************************************************************************************/ 16 17package xiangshan.mem 18 19import chisel3._ 20import chisel3.util._ 21import org.chipsalliance.cde.config._ 22import xiangshan._ 23import xiangshan.backend.rob.RobPtr 24import xiangshan.cache._ 25import xiangshan.frontend.FtqPtr 26import xiangshan.mem.mdp._ 27import utils._ 28import utility._ 29import xiangshan.backend.Bundles.DynInst 30 31class LoadQueueRAW(implicit p: Parameters) extends XSModule 32 with HasDCacheParameters 33 with HasCircularQueuePtrHelper 34 with HasLoadHelper 35 with HasPerfEvents 36{ 37 val io = IO(new Bundle() { 38 // control 39 val redirect = Flipped(ValidIO(new Redirect)) 40 val vecFeedback = Vec(VecLoadPipelineWidth, Flipped(ValidIO(new FeedbackToLsqIO))) 41 42 // violation query 43 val query = Vec(LoadPipelineWidth, Flipped(new LoadNukeQueryIO)) 44 45 // from store unit s1 46 val storeIn = Vec(StorePipelineWidth, Flipped(Valid(new LsPipelineBundle))) 47 48 // global rollback flush 49 val rollback = Output(Valid(new Redirect)) 50 51 // to LoadQueueReplay 52 val stAddrReadySqPtr = Input(new SqPtr) 53 val stIssuePtr = Input(new SqPtr) 54 val lqFull = Output(Bool()) 55 }) 56 57 println("LoadQueueRAW: size " + LoadQueueRAWSize) 58 // LoadQueueRAW field 59 // +-------+--------+-------+-------+-----------+ 60 // | Valid | uop |PAddr | Mask | Datavalid | 61 // +-------+--------+-------+-------+-----------+ 62 // 63 // Field descriptions: 64 // Allocated : entry has been allocated already 65 // MicroOp : inst's microOp 66 // PAddr : physical address. 67 // Mask : data mask 68 // Datavalid : data valid 69 // 70 val allocated = RegInit(VecInit(List.fill(LoadQueueRAWSize)(false.B))) // The control signals need to explicitly indicate the initial value 71 val uop = Reg(Vec(LoadQueueRAWSize, new DynInst)) 72 val paddrModule = Module(new LqPAddrModule( 73 gen = UInt(PAddrBits.W), 74 numEntries = LoadQueueRAWSize, 75 numRead = LoadPipelineWidth, 76 numWrite = LoadPipelineWidth, 77 numWBank = LoadQueueNWriteBanks, 78 numWDelay = 2, 79 numCamPort = StorePipelineWidth 80 )) 81 paddrModule.io := DontCare 82 val maskModule = Module(new LqMaskModule( 83 gen = UInt((VLEN/8).W), 84 numEntries = LoadQueueRAWSize, 85 numRead = LoadPipelineWidth, 86 numWrite = LoadPipelineWidth, 87 numWBank = LoadQueueNWriteBanks, 88 numWDelay = 2, 89 numCamPort = StorePipelineWidth 90 )) 91 maskModule.io := DontCare 92 val datavalid = RegInit(VecInit(List.fill(LoadQueueRAWSize)(false.B))) 93 94 // freeliset: store valid entries index. 95 // +---+---+--------------+-----+-----+ 96 // | 0 | 1 | ...... | n-2 | n-1 | 97 // +---+---+--------------+-----+-----+ 98 val freeList = Module(new FreeList( 99 size = LoadQueueRAWSize, 100 allocWidth = LoadPipelineWidth, 101 freeWidth = 4, 102 enablePreAlloc = true, 103 moduleName = "LoadQueueRAW freelist" 104 )) 105 freeList.io := DontCare 106 107 // LoadQueueRAW enqueue 108 val canEnqueue = io.query.map(_.req.valid) 109 val cancelEnqueue = io.query.map(_.req.bits.uop.robIdx.needFlush(io.redirect)) 110 val allAddrCheck = io.stIssuePtr === io.stAddrReadySqPtr 111 val hasAddrInvalidStore = io.query.map(_.req.bits.uop.sqIdx).map(sqIdx => { 112 Mux(!allAddrCheck, isBefore(io.stAddrReadySqPtr, sqIdx), false.B) 113 }) 114 val needEnqueue = canEnqueue.zip(hasAddrInvalidStore).zip(cancelEnqueue).map { case ((v, r), c) => v && r && !c } 115 val bypassPAddr = Reg(Vec(LoadPipelineWidth, UInt(PAddrBits.W))) 116 val bypassMask = Reg(Vec(LoadPipelineWidth, UInt((VLEN/8).W))) 117 118 // Allocate logic 119 val acceptedVec = Wire(Vec(LoadPipelineWidth, Bool())) 120 val enqIndexVec = Wire(Vec(LoadPipelineWidth, UInt())) 121 122 // Enqueue 123 for ((enq, w) <- io.query.map(_.req).zipWithIndex) { 124 acceptedVec(w) := false.B 125 paddrModule.io.wen(w) := false.B 126 maskModule.io.wen(w) := false.B 127 freeList.io.doAllocate(w) := false.B 128 129 freeList.io.allocateReq(w) := true.B 130 131 // Allocate ready 132 val offset = PopCount(needEnqueue.take(w)) 133 val canAccept = freeList.io.canAllocate(offset) 134 val enqIndex = freeList.io.allocateSlot(offset) 135 enq.ready := Mux(needEnqueue(w), canAccept, true.B) 136 137 enqIndexVec(w) := enqIndex 138 when (needEnqueue(w) && enq.ready) { 139 acceptedVec(w) := true.B 140 141 val debug_robIdx = enq.bits.uop.robIdx.asUInt 142 XSError(allocated(enqIndex), p"LoadQueueRAW: You can not write an valid entry! check: ldu $w, robIdx $debug_robIdx") 143 144 freeList.io.doAllocate(w) := true.B 145 146 // Allocate new entry 147 allocated(enqIndex) := true.B 148 149 // Write paddr 150 paddrModule.io.wen(w) := true.B 151 paddrModule.io.waddr(w) := enqIndex 152 paddrModule.io.wdata(w) := enq.bits.paddr 153 bypassPAddr(w) := enq.bits.paddr 154 155 // Write mask 156 maskModule.io.wen(w) := true.B 157 maskModule.io.waddr(w) := enqIndex 158 maskModule.io.wdata(w) := enq.bits.mask 159 bypassMask(w) := enq.bits.mask 160 161 // Fill info 162 uop(enqIndex) := enq.bits.uop 163 datavalid(enqIndex) := enq.bits.data_valid 164 } 165 } 166 167 for ((query, w) <- io.query.map(_.resp).zipWithIndex) { 168 query.valid := RegNext(io.query(w).req.valid) 169 query.bits.rep_frm_fetch := RegNext(false.B) 170 } 171 172 // LoadQueueRAW deallocate 173 val freeMaskVec = Wire(Vec(LoadQueueRAWSize, Bool())) 174 175 // init 176 freeMaskVec.map(e => e := false.B) 177 178 // when the stores that "older than" current load address were ready. 179 // current load will be released. 180 val vecLdCanceltmp = Wire(Vec(LoadQueueRAWSize, Vec(VecLoadPipelineWidth, Bool()))) 181 val vecLdCancel = Wire(Vec(LoadQueueRAWSize, Bool())) 182 for (i <- 0 until LoadQueueRAWSize) { 183 val deqNotBlock = Mux(!allAddrCheck, !isBefore(io.stAddrReadySqPtr, uop(i).sqIdx), true.B) 184 val needCancel = uop(i).robIdx.needFlush(io.redirect) 185 val fbk = io.vecFeedback 186 for (j <- 0 until VecLoadPipelineWidth) { 187 vecLdCanceltmp(i)(j) := fbk(j).valid && fbk(j).bits.isFlush && uop(i).robIdx === fbk(j).bits.robidx && uop(i).uopIdx === fbk(j).bits.uopidx 188 } 189 vecLdCancel(i) := vecLdCanceltmp(i).reduce(_ || _) 190 191 when (allocated(i) && (deqNotBlock || needCancel || vecLdCancel(i))) { 192 allocated(i) := false.B 193 freeMaskVec(i) := true.B 194 } 195 } 196 197 // if need replay deallocate entry 198 val lastCanAccept = RegNext(acceptedVec) 199 val lastAllocIndex = RegNext(enqIndexVec) 200 201 for ((revoke, w) <- io.query.map(_.revoke).zipWithIndex) { 202 val revokeValid = revoke && lastCanAccept(w) 203 val revokeIndex = lastAllocIndex(w) 204 205 when (allocated(revokeIndex) && revokeValid) { 206 allocated(revokeIndex) := false.B 207 freeMaskVec(revokeIndex) := true.B 208 } 209 } 210 freeList.io.free := freeMaskVec.asUInt 211 212 io.lqFull := freeList.io.empty 213 214 /** 215 * Store-Load Memory violation detection 216 * Scheme 1(Current scheme): flush the pipeline then re-fetch from the load instruction (like old load queue). 217 * Scheme 2 : re-fetch instructions from the first instruction after the store instruction. 218 * 219 * When store writes back, it searches LoadQueue for younger load instructions 220 * with the same load physical address. They loaded wrong data and need re-execution. 221 * 222 * Cycle 0: Store Writeback 223 * Generate match vector for store address with rangeMask(stPtr, enqPtr). 224 * Cycle 1: Select oldest load from select group. 225 * Cycle x: Redirect Fire 226 * Choose the oldest load from LoadPipelineWidth oldest loads. 227 * Prepare redirect request according to the detected violation. 228 * Fire redirect request (if valid) 229 */ 230 // SelectGroup 0 SelectGroup 1 SelectGroup y 231 // stage 0: lq lq lq ...... lq lq lq ....... lq lq lq 232 // | | | | | | | | | 233 // stage 1: lq lq lq ...... lq lq lq ....... lq lq lq 234 // \ | / ...... \ | / ....... \ | / 235 // stage 2: lq lq lq 236 // \ | / ....... \ | / ........ \ | / 237 // stage 3: lq lq lq 238 // ... 239 // ... 240 // | 241 // stage x: lq 242 // | 243 // rollback req 244 245 // select logic 246 val SelectGroupSize = RollbackGroupSize 247 val lgSelectGroupSize = log2Ceil(SelectGroupSize) 248 val TotalSelectCycles = scala.math.ceil(log2Ceil(LoadQueueRAWSize).toFloat / lgSelectGroupSize).toInt + 1 249 250 def selectPartialOldest[T <: XSBundleWithMicroOp](valid: Seq[Bool], bits: Seq[T]): (Seq[Bool], Seq[T]) = { 251 assert(valid.length == bits.length) 252 if (valid.length == 0 || valid.length == 1) { 253 (valid, bits) 254 } else if (valid.length == 2) { 255 val res = Seq.fill(2)(Wire(ValidIO(chiselTypeOf(bits(0))))) 256 for (i <- res.indices) { 257 res(i).valid := valid(i) 258 res(i).bits := bits(i) 259 } 260 val oldest = Mux(valid(0) && valid(1), Mux(isAfter(bits(0).uop.robIdx, bits(1).uop.robIdx), res(1), res(0)), Mux(valid(0) && !valid(1), res(0), res(1))) 261 (Seq(oldest.valid), Seq(oldest.bits)) 262 } else { 263 val left = selectPartialOldest(valid.take(valid.length / 2), bits.take(bits.length / 2)) 264 val right = selectPartialOldest(valid.takeRight(valid.length - (valid.length / 2)), bits.takeRight(bits.length - (bits.length / 2))) 265 selectPartialOldest(left._1 ++ right._1, left._2 ++ right._2) 266 } 267 } 268 269 def selectOldest[T <: XSBundleWithMicroOp](valid: Seq[Bool], bits: Seq[T]): (Seq[Bool], Seq[T]) = { 270 assert(valid.length == bits.length) 271 val numSelectGroups = scala.math.ceil(valid.length.toFloat / SelectGroupSize).toInt 272 273 // group info 274 val selectValidGroups = 275 if (valid.length <= SelectGroupSize) { 276 Seq(valid) 277 } else { 278 (0 until numSelectGroups).map(g => { 279 if (valid.length < (g + 1) * SelectGroupSize) { 280 valid.takeRight(valid.length - g * SelectGroupSize) 281 } else { 282 (0 until SelectGroupSize).map(j => valid(g * SelectGroupSize + j)) 283 } 284 }) 285 } 286 val selectBitsGroups = 287 if (bits.length <= SelectGroupSize) { 288 Seq(bits) 289 } else { 290 (0 until numSelectGroups).map(g => { 291 if (bits.length < (g + 1) * SelectGroupSize) { 292 bits.takeRight(bits.length - g * SelectGroupSize) 293 } else { 294 (0 until SelectGroupSize).map(j => bits(g * SelectGroupSize + j)) 295 } 296 }) 297 } 298 299 // select logic 300 if (valid.length <= SelectGroupSize) { 301 val (selValid, selBits) = selectPartialOldest(valid, bits) 302 val selValidNext = RegNext(selValid(0)) 303 val selBitsNext = RegNext(selBits(0)) 304 (Seq(selValidNext && !selBitsNext.uop.robIdx.needFlush(io.redirect) && !selBitsNext.uop.robIdx.needFlush(RegNext(io.redirect))), Seq(selBitsNext)) 305 } else { 306 val select = (0 until numSelectGroups).map(g => { 307 val (selValid, selBits) = selectPartialOldest(selectValidGroups(g), selectBitsGroups(g)) 308 val selValidNext = RegNext(selValid(0)) 309 val selBitsNext = RegNext(selBits(0)) 310 (selValidNext && !selBitsNext.uop.robIdx.needFlush(io.redirect) && !selBitsNext.uop.robIdx.needFlush(RegNext(io.redirect)), selBitsNext) 311 }) 312 selectOldest(select.map(_._1), select.map(_._2)) 313 } 314 } 315 316 val storeIn = io.storeIn 317 318 def detectRollback(i: Int) = { 319 paddrModule.io.violationMdata(i) := storeIn(i).bits.paddr 320 maskModule.io.violationMdata(i) := storeIn(i).bits.mask 321 322 val bypassPaddrMask = RegNext(VecInit((0 until LoadPipelineWidth).map(j => bypassPAddr(j)(PAddrBits-1, DCacheVWordOffset) === storeIn(i).bits.paddr(PAddrBits-1, DCacheVWordOffset)))) 323 val bypassMMask = RegNext(VecInit((0 until LoadPipelineWidth).map(j => (bypassMask(j) & storeIn(i).bits.mask).orR))) 324 val bypassMaskUInt = (0 until LoadPipelineWidth).map(j => 325 Fill(LoadQueueRAWSize, RegNext(RegNext(io.query(j).req.fire))) & Mux(bypassPaddrMask(j) && bypassMMask(j), UIntToOH(RegNext(RegNext(enqIndexVec(j)))), 0.U(LoadQueueRAWSize.W)) 326 ).reduce(_|_) 327 328 val addrMaskMatch = RegNext(paddrModule.io.violationMmask(i).asUInt & maskModule.io.violationMmask(i).asUInt) | bypassMaskUInt 329 val entryNeedCheck = RegNext(VecInit((0 until LoadQueueRAWSize).map(j => { 330 allocated(j) && isAfter(uop(j).robIdx, storeIn(i).bits.uop.robIdx) && datavalid(j) && !uop(j).robIdx.needFlush(io.redirect) 331 }))) 332 val lqViolationSelVec = VecInit((0 until LoadQueueRAWSize).map(j => { 333 addrMaskMatch(j) && entryNeedCheck(j) 334 })) 335 336 val lqViolationSelUopExts = uop.map(uop => { 337 val wrapper = Wire(new XSBundleWithMicroOp) 338 wrapper.uop := uop 339 wrapper 340 }) 341 342 // select logic 343 val lqSelect = selectOldest(lqViolationSelVec, lqViolationSelUopExts) 344 345 // select one inst 346 val lqViolation = lqSelect._1(0) 347 val lqViolationUop = lqSelect._2(0).uop 348 349 XSDebug( 350 lqViolation, 351 "need rollback (ld wb before store) pc %x robidx %d target %x\n", 352 storeIn(i).bits.uop.pc, storeIn(i).bits.uop.robIdx.asUInt, lqViolationUop.robIdx.asUInt 353 ) 354 355 (lqViolation, lqViolationUop) 356 } 357 358 // select rollback (part1) and generate rollback request 359 // rollback check 360 // Lq rollback seq check is done in s3 (next stage), as getting rollbackLq MicroOp is slow 361 val rollbackLqWb = Wire(Vec(StorePipelineWidth, Valid(new DynInst))) 362 val stFtqIdx = Wire(Vec(StorePipelineWidth, new FtqPtr)) 363 val stFtqOffset = Wire(Vec(StorePipelineWidth, UInt(log2Up(PredictWidth).W))) 364 for (w <- 0 until StorePipelineWidth) { 365 val detectedRollback = detectRollback(w) 366 rollbackLqWb(w).valid := detectedRollback._1 && DelayN(storeIn(w).valid && !storeIn(w).bits.miss, TotalSelectCycles) 367 rollbackLqWb(w).bits := detectedRollback._2 368 stFtqIdx(w) := DelayN(storeIn(w).bits.uop.ftqPtr, TotalSelectCycles) 369 stFtqOffset(w) := DelayN(storeIn(w).bits.uop.ftqOffset, TotalSelectCycles) 370 } 371 372 // select rollback (part2), generate rollback request, then fire rollback request 373 // Note that we use robIdx - 1.U to flush the load instruction itself. 374 // Thus, here if last cycle's robIdx equals to this cycle's robIdx, it still triggers the redirect. 375 376 // select uop in parallel 377 def selectOldestRedirect(xs: Seq[Valid[Redirect]]): Vec[Bool] = { 378 val compareVec = (0 until xs.length).map(i => (0 until i).map(j => isAfter(xs(j).bits.robIdx, xs(i).bits.robIdx))) 379 val resultOnehot = VecInit((0 until xs.length).map(i => Cat((0 until xs.length).map(j => 380 (if (j < i) !xs(j).valid || compareVec(i)(j) 381 else if (j == i) xs(i).valid 382 else !xs(j).valid || !compareVec(j)(i)) 383 )).andR)) 384 resultOnehot 385 } 386 val allRedirect = (0 until StorePipelineWidth).map(i => { 387 val redirect = Wire(Valid(new Redirect)) 388 redirect.valid := rollbackLqWb(i).valid 389 redirect.bits := DontCare 390 redirect.bits.isRVC := rollbackLqWb(i).bits.preDecodeInfo.isRVC 391 redirect.bits.robIdx := rollbackLqWb(i).bits.robIdx 392 redirect.bits.ftqIdx := rollbackLqWb(i).bits.ftqPtr 393 redirect.bits.ftqOffset := rollbackLqWb(i).bits.ftqOffset 394 redirect.bits.stFtqIdx := stFtqIdx(i) 395 redirect.bits.stFtqOffset := stFtqOffset(i) 396 redirect.bits.level := RedirectLevel.flush 397 redirect.bits.cfiUpdate.target := rollbackLqWb(i).bits.pc 398 redirect.bits.debug_runahead_checkpoint_id := rollbackLqWb(i).bits.debugInfo.runahead_checkpoint_id 399 redirect 400 }) 401 val oldestOneHot = selectOldestRedirect(allRedirect) 402 val oldestRedirect = Mux1H(oldestOneHot, allRedirect) 403 io.rollback := oldestRedirect 404 405 // perf cnt 406 val canEnqCount = PopCount(io.query.map(_.req.fire)) 407 val validCount = freeList.io.validCount 408 val allowEnqueue = validCount <= (LoadQueueRAWSize - LoadPipelineWidth).U 409 410 QueuePerf(LoadQueueRAWSize, validCount, !allowEnqueue) 411 XSPerfAccumulate("enqs", canEnqCount) 412 XSPerfAccumulate("stld_rollback", io.rollback.valid) 413 val perfEvents: Seq[(String, UInt)] = Seq( 414 ("enq ", canEnqCount), 415 ("stld_rollback", io.rollback.valid), 416 ) 417 generatePerfEvent() 418 // end 419}