1*e4f69d78Ssfencevma/*************************************************************************************** 2*e4f69d78Ssfencevma* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences 3*e4f69d78Ssfencevma* Copyright (c) 2020-2021 Peng Cheng Laboratory 4*e4f69d78Ssfencevma* 5*e4f69d78Ssfencevma* XiangShan is licensed under Mulan PSL v2. 6*e4f69d78Ssfencevma* You can use this software according to the terms and conditions of the Mulan PSL v2. 7*e4f69d78Ssfencevma* You may obtain a copy of Mulan PSL v2 at: 8*e4f69d78Ssfencevma* http://license.coscl.org.cn/MulanPSL2 9*e4f69d78Ssfencevma* 10*e4f69d78Ssfencevma* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 11*e4f69d78Ssfencevma* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 12*e4f69d78Ssfencevma* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 13*e4f69d78Ssfencevma* 14*e4f69d78Ssfencevma* See the Mulan PSL v2 for more details. 15*e4f69d78Ssfencevma***************************************************************************************/ 16*e4f69d78Ssfencevma 17*e4f69d78Ssfencevmapackage xiangshan.mem 18*e4f69d78Ssfencevma 19*e4f69d78Ssfencevmaimport chisel3._ 20*e4f69d78Ssfencevmaimport chisel3.util._ 21*e4f69d78Ssfencevmaimport chipsalliance.rocketchip.config._ 22*e4f69d78Ssfencevmaimport xiangshan._ 23*e4f69d78Ssfencevmaimport xiangshan.backend.rob.RobPtr 24*e4f69d78Ssfencevmaimport xiangshan.cache._ 25*e4f69d78Ssfencevmaimport xiangshan.frontend.FtqPtr 26*e4f69d78Ssfencevmaimport xiangshan.mem.mdp._ 27*e4f69d78Ssfencevmaimport utils._ 28*e4f69d78Ssfencevmaimport utility._ 29*e4f69d78Ssfencevma 30*e4f69d78Ssfencevmaclass LoadQueueRAW(implicit p: Parameters) extends XSModule 31*e4f69d78Ssfencevma with HasDCacheParameters 32*e4f69d78Ssfencevma with HasCircularQueuePtrHelper 33*e4f69d78Ssfencevma with HasLoadHelper 34*e4f69d78Ssfencevma with HasPerfEvents 35*e4f69d78Ssfencevma{ 36*e4f69d78Ssfencevma val io = IO(new Bundle() { 37*e4f69d78Ssfencevma val redirect = Flipped(ValidIO(new Redirect)) 38*e4f69d78Ssfencevma val query = Vec(LoadPipelineWidth, Flipped(new LoadViolationQueryIO)) 39*e4f69d78Ssfencevma val storeIn = Vec(StorePipelineWidth, Flipped(Valid(new LsPipelineBundle))) 40*e4f69d78Ssfencevma val rollback = Output(Valid(new Redirect)) 41*e4f69d78Ssfencevma val stAddrReadySqPtr = Input(new SqPtr) 42*e4f69d78Ssfencevma val stIssuePtr = Input(new SqPtr) 43*e4f69d78Ssfencevma val lqFull = Output(Bool()) 44*e4f69d78Ssfencevma }) 45*e4f69d78Ssfencevma 46*e4f69d78Ssfencevma println("LoadQueueRAW: size " + LoadQueueRAWSize) 47*e4f69d78Ssfencevma // LoadQueueRAW field 48*e4f69d78Ssfencevma // +-------+--------+-------+-------+-----------+ 49*e4f69d78Ssfencevma // | Valid | uop |PAddr | Mask | Datavalid | 50*e4f69d78Ssfencevma // +-------+--------+-------+-------+-----------+ 51*e4f69d78Ssfencevma // 52*e4f69d78Ssfencevma // Field descriptions: 53*e4f69d78Ssfencevma // Allocated : entry has been allocated already 54*e4f69d78Ssfencevma // MicroOp : inst's microOp 55*e4f69d78Ssfencevma // PAddr : physical address. 56*e4f69d78Ssfencevma // Mask : data mask 57*e4f69d78Ssfencevma // Datavalid : data valid 58*e4f69d78Ssfencevma // 59*e4f69d78Ssfencevma val allocated = RegInit(VecInit(List.fill(LoadQueueRAWSize)(false.B))) // The control signals need to explicitly indicate the initial value 60*e4f69d78Ssfencevma val uop = Reg(Vec(LoadQueueRAWSize, new MicroOp)) 61*e4f69d78Ssfencevma val paddrModule = Module(new LqPAddrModule( 62*e4f69d78Ssfencevma gen = UInt(PAddrBits.W), 63*e4f69d78Ssfencevma numEntries = LoadQueueRAWSize, 64*e4f69d78Ssfencevma numRead = LoadPipelineWidth, 65*e4f69d78Ssfencevma numWrite = LoadPipelineWidth, 66*e4f69d78Ssfencevma numWBank = LoadQueueNWriteBanks, 67*e4f69d78Ssfencevma numWDelay = 2, 68*e4f69d78Ssfencevma numCamPort = StorePipelineWidth 69*e4f69d78Ssfencevma )) 70*e4f69d78Ssfencevma paddrModule.io := DontCare 71*e4f69d78Ssfencevma val maskModule = Module(new LqMaskModule( 72*e4f69d78Ssfencevma gen = UInt(8.W), 73*e4f69d78Ssfencevma numEntries = LoadQueueRAWSize, 74*e4f69d78Ssfencevma numRead = LoadPipelineWidth, 75*e4f69d78Ssfencevma numWrite = LoadPipelineWidth, 76*e4f69d78Ssfencevma numWBank = LoadQueueNWriteBanks, 77*e4f69d78Ssfencevma numWDelay = 2, 78*e4f69d78Ssfencevma numCamPort = StorePipelineWidth 79*e4f69d78Ssfencevma )) 80*e4f69d78Ssfencevma maskModule.io := DontCare 81*e4f69d78Ssfencevma val datavalid = RegInit(VecInit(List.fill(LoadQueueRAWSize)(false.B))) 82*e4f69d78Ssfencevma 83*e4f69d78Ssfencevma // freeliset: store valid entries index. 84*e4f69d78Ssfencevma // +---+---+--------------+-----+-----+ 85*e4f69d78Ssfencevma // | 0 | 1 | ...... | n-2 | n-1 | 86*e4f69d78Ssfencevma // +---+---+--------------+-----+-----+ 87*e4f69d78Ssfencevma val freeList = Module(new FreeList( 88*e4f69d78Ssfencevma size = LoadQueueRAWSize, 89*e4f69d78Ssfencevma allocWidth = LoadPipelineWidth, 90*e4f69d78Ssfencevma freeWidth = 4, 91*e4f69d78Ssfencevma moduleName = "LoadQueueRAW freelist" 92*e4f69d78Ssfencevma )) 93*e4f69d78Ssfencevma freeList.io := DontCare 94*e4f69d78Ssfencevma 95*e4f69d78Ssfencevma // LoadQueueRAW enqueue 96*e4f69d78Ssfencevma val canEnqueue = io.query.map(_.req.valid) 97*e4f69d78Ssfencevma val cancelEnqueue = io.query.map(_.req.bits.uop.robIdx.needFlush(io.redirect)) 98*e4f69d78Ssfencevma val allAddrCheck = io.stIssuePtr === io.stAddrReadySqPtr 99*e4f69d78Ssfencevma val hasAddrInvalidStore = io.query.map(_.req.bits.uop.sqIdx).map(sqIdx => { 100*e4f69d78Ssfencevma Mux(!allAddrCheck, isBefore(io.stAddrReadySqPtr, sqIdx), false.B) 101*e4f69d78Ssfencevma }) 102*e4f69d78Ssfencevma val needEnqueue = canEnqueue.zip(hasAddrInvalidStore).zip(cancelEnqueue).map { case ((v, r), c) => v && r && !c } 103*e4f69d78Ssfencevma val bypassPAddr = Reg(Vec(LoadPipelineWidth, UInt(PAddrBits.W))) 104*e4f69d78Ssfencevma val bypassMask = Reg(Vec(LoadPipelineWidth, UInt(8.W))) 105*e4f69d78Ssfencevma 106*e4f69d78Ssfencevma // Allocate logic 107*e4f69d78Ssfencevma val enqValidVec = Wire(Vec(LoadPipelineWidth, Bool())) 108*e4f69d78Ssfencevma val enqIndexVec = Wire(Vec(LoadPipelineWidth, UInt())) 109*e4f69d78Ssfencevma val enqOffset = Wire(Vec(LoadPipelineWidth, UInt())) 110*e4f69d78Ssfencevma 111*e4f69d78Ssfencevma // Enqueue 112*e4f69d78Ssfencevma for ((enq, w) <- io.query.map(_.req).zipWithIndex) { 113*e4f69d78Ssfencevma paddrModule.io.wen(w) := false.B 114*e4f69d78Ssfencevma maskModule.io.wen(w) := false.B 115*e4f69d78Ssfencevma freeList.io.doAllocate(w) := false.B 116*e4f69d78Ssfencevma 117*e4f69d78Ssfencevma enqOffset(w) := PopCount(needEnqueue.take(w)) 118*e4f69d78Ssfencevma freeList.io.allocateReq(w) := needEnqueue(w) 119*e4f69d78Ssfencevma 120*e4f69d78Ssfencevma // Allocate ready 121*e4f69d78Ssfencevma enqValidVec(w) := freeList.io.canAllocate(enqOffset(w)) 122*e4f69d78Ssfencevma enqIndexVec(w) := freeList.io.allocateSlot(enqOffset(w)) 123*e4f69d78Ssfencevma enq.ready := Mux(needEnqueue(w), enqValidVec(w), true.B) 124*e4f69d78Ssfencevma 125*e4f69d78Ssfencevma val enqIndex = enqIndexVec(w) 126*e4f69d78Ssfencevma when (needEnqueue(w) && enq.ready) { 127*e4f69d78Ssfencevma val debug_robIdx = enq.bits.uop.robIdx.asUInt 128*e4f69d78Ssfencevma XSError(allocated(enqIndex), p"LoadQueueRAW: You can not write an valid entry! check: ldu $w, robIdx $debug_robIdx") 129*e4f69d78Ssfencevma 130*e4f69d78Ssfencevma freeList.io.doAllocate(w) := true.B 131*e4f69d78Ssfencevma 132*e4f69d78Ssfencevma // Allocate new entry 133*e4f69d78Ssfencevma allocated(enqIndex) := true.B 134*e4f69d78Ssfencevma 135*e4f69d78Ssfencevma // Write paddr 136*e4f69d78Ssfencevma paddrModule.io.wen(w) := true.B 137*e4f69d78Ssfencevma paddrModule.io.waddr(w) := enqIndex 138*e4f69d78Ssfencevma paddrModule.io.wdata(w) := enq.bits.paddr 139*e4f69d78Ssfencevma bypassPAddr(w) := enq.bits.paddr 140*e4f69d78Ssfencevma 141*e4f69d78Ssfencevma // Write mask 142*e4f69d78Ssfencevma maskModule.io.wen(w) := true.B 143*e4f69d78Ssfencevma maskModule.io.waddr(w) := enqIndex 144*e4f69d78Ssfencevma maskModule.io.wdata(w) := enq.bits.mask 145*e4f69d78Ssfencevma bypassMask(w) := enq.bits.mask 146*e4f69d78Ssfencevma 147*e4f69d78Ssfencevma // Fill info 148*e4f69d78Ssfencevma uop(enqIndex) := enq.bits.uop 149*e4f69d78Ssfencevma datavalid(enqIndex) := enq.bits.datavalid 150*e4f69d78Ssfencevma } 151*e4f69d78Ssfencevma } 152*e4f69d78Ssfencevma 153*e4f69d78Ssfencevma for ((query, w) <- io.query.map(_.resp).zipWithIndex) { 154*e4f69d78Ssfencevma query.valid := RegNext(io.query(w).req.valid) 155*e4f69d78Ssfencevma query.bits.replayFromFetch := RegNext(false.B) 156*e4f69d78Ssfencevma } 157*e4f69d78Ssfencevma 158*e4f69d78Ssfencevma // LoadQueueRAW deallocate 159*e4f69d78Ssfencevma val freeMaskVec = Wire(Vec(LoadQueueRAWSize, Bool())) 160*e4f69d78Ssfencevma 161*e4f69d78Ssfencevma // init 162*e4f69d78Ssfencevma freeMaskVec.map(e => e := false.B) 163*e4f69d78Ssfencevma 164*e4f69d78Ssfencevma // when the stores that "older than" current load address were ready. 165*e4f69d78Ssfencevma // current load will be released. 166*e4f69d78Ssfencevma for (i <- 0 until LoadQueueRAWSize) { 167*e4f69d78Ssfencevma val deqNotBlock = Mux(!allAddrCheck, !isBefore(io.stAddrReadySqPtr, uop(i).sqIdx), true.B) 168*e4f69d78Ssfencevma val needCancel = uop(i).robIdx.needFlush(io.redirect) 169*e4f69d78Ssfencevma 170*e4f69d78Ssfencevma when (allocated(i) && (deqNotBlock || needCancel)) { 171*e4f69d78Ssfencevma allocated(i) := false.B 172*e4f69d78Ssfencevma freeMaskVec(i) := true.B 173*e4f69d78Ssfencevma } 174*e4f69d78Ssfencevma } 175*e4f69d78Ssfencevma 176*e4f69d78Ssfencevma // if need replay deallocate entry 177*e4f69d78Ssfencevma val lastCanAccept = RegNext(VecInit(needEnqueue.zip(enqValidVec).map(x => x._1 && x._2))) 178*e4f69d78Ssfencevma val lastAllocIndex = RegNext(enqIndexVec) 179*e4f69d78Ssfencevma 180*e4f69d78Ssfencevma for ((release, w) <- io.query.map(_.release).zipWithIndex) { 181*e4f69d78Ssfencevma val releaseValid = release && lastCanAccept(w) 182*e4f69d78Ssfencevma val releaseIndex = lastAllocIndex(w) 183*e4f69d78Ssfencevma 184*e4f69d78Ssfencevma when (allocated(releaseIndex) && releaseValid) { 185*e4f69d78Ssfencevma allocated(releaseIndex) := false.B 186*e4f69d78Ssfencevma freeMaskVec(releaseIndex) := true.B 187*e4f69d78Ssfencevma } 188*e4f69d78Ssfencevma } 189*e4f69d78Ssfencevma freeList.io.free := freeMaskVec.asUInt 190*e4f69d78Ssfencevma 191*e4f69d78Ssfencevma io.lqFull := freeList.io.empty 192*e4f69d78Ssfencevma 193*e4f69d78Ssfencevma /** 194*e4f69d78Ssfencevma * Store-Load Memory violation detection 195*e4f69d78Ssfencevma * Scheme 1(Current scheme): flush the pipeline then re-fetch from the load instruction (like old load queue). 196*e4f69d78Ssfencevma * Scheme 2 : re-fetch instructions from the first instruction after the store instruction. 197*e4f69d78Ssfencevma * 198*e4f69d78Ssfencevma * When store writes back, it searches LoadQueue for younger load instructions 199*e4f69d78Ssfencevma * with the same load physical address. They loaded wrong data and need re-execution. 200*e4f69d78Ssfencevma * 201*e4f69d78Ssfencevma * Cycle 0: Store Writeback 202*e4f69d78Ssfencevma * Generate match vector for store address with rangeMask(stPtr, enqPtr). 203*e4f69d78Ssfencevma * Cycle 1: Select oldest load from select group. 204*e4f69d78Ssfencevma * Cycle x: Redirect Fire 205*e4f69d78Ssfencevma * Choose the oldest load from LoadPipelineWidth oldest loads. 206*e4f69d78Ssfencevma * Prepare redirect request according to the detected violation. 207*e4f69d78Ssfencevma * Fire redirect request (if valid) 208*e4f69d78Ssfencevma */ 209*e4f69d78Ssfencevma // SelectGroup 0 SelectGroup 1 SelectGroup y 210*e4f69d78Ssfencevma // stage 0: lq lq lq ...... lq lq lq ....... lq lq lq 211*e4f69d78Ssfencevma // | | | | | | | | | 212*e4f69d78Ssfencevma // stage 1: lq lq lq ...... lq lq lq ....... lq lq lq 213*e4f69d78Ssfencevma // \ | / ...... \ | / ....... \ | / 214*e4f69d78Ssfencevma // stage 2: lq lq lq 215*e4f69d78Ssfencevma // \ | / ....... \ | / ........ \ | / 216*e4f69d78Ssfencevma // stage 3: lq lq lq 217*e4f69d78Ssfencevma // ... 218*e4f69d78Ssfencevma // ... 219*e4f69d78Ssfencevma // | 220*e4f69d78Ssfencevma // stage x: lq 221*e4f69d78Ssfencevma // | 222*e4f69d78Ssfencevma // rollback req 223*e4f69d78Ssfencevma 224*e4f69d78Ssfencevma // select logic 225*e4f69d78Ssfencevma val SelectGroupSize = RollbackGroupSize 226*e4f69d78Ssfencevma val lgSelectGroupSize = log2Ceil(SelectGroupSize) 227*e4f69d78Ssfencevma val TotalSelectCycles = scala.math.ceil(log2Ceil(LoadQueueRAWSize).toFloat / lgSelectGroupSize).toInt + 1 228*e4f69d78Ssfencevma 229*e4f69d78Ssfencevma def selectPartialOldest[T <: XSBundleWithMicroOp](valid: Seq[Bool], bits: Seq[T]): (Seq[Bool], Seq[T]) = { 230*e4f69d78Ssfencevma assert(valid.length == bits.length) 231*e4f69d78Ssfencevma if (valid.length == 0 || valid.length == 1) { 232*e4f69d78Ssfencevma (valid, bits) 233*e4f69d78Ssfencevma } else if (valid.length == 2) { 234*e4f69d78Ssfencevma val res = Seq.fill(2)(Wire(ValidIO(chiselTypeOf(bits(0))))) 235*e4f69d78Ssfencevma for (i <- res.indices) { 236*e4f69d78Ssfencevma res(i).valid := valid(i) 237*e4f69d78Ssfencevma res(i).bits := bits(i) 238*e4f69d78Ssfencevma } 239*e4f69d78Ssfencevma val oldest = Mux(valid(0) && valid(1), Mux(isAfter(bits(0).uop.robIdx, bits(1).uop.robIdx), res(1), res(0)), Mux(valid(0) && !valid(1), res(0), res(1))) 240*e4f69d78Ssfencevma (Seq(oldest.valid), Seq(oldest.bits)) 241*e4f69d78Ssfencevma } else { 242*e4f69d78Ssfencevma val left = selectPartialOldest(valid.take(valid.length / 2), bits.take(bits.length / 2)) 243*e4f69d78Ssfencevma val right = selectPartialOldest(valid.takeRight(valid.length - (valid.length / 2)), bits.takeRight(bits.length - (bits.length / 2))) 244*e4f69d78Ssfencevma selectPartialOldest(left._1 ++ right._1, left._2 ++ right._2) 245*e4f69d78Ssfencevma } 246*e4f69d78Ssfencevma } 247*e4f69d78Ssfencevma 248*e4f69d78Ssfencevma def selectOldest[T <: XSBundleWithMicroOp](valid: Seq[Bool], bits: Seq[T]): (Seq[Bool], Seq[T]) = { 249*e4f69d78Ssfencevma assert(valid.length == bits.length) 250*e4f69d78Ssfencevma val numSelectGroups = scala.math.ceil(valid.length.toFloat / SelectGroupSize).toInt 251*e4f69d78Ssfencevma 252*e4f69d78Ssfencevma // group info 253*e4f69d78Ssfencevma val selectValidGroups = 254*e4f69d78Ssfencevma if (valid.length <= SelectGroupSize) { 255*e4f69d78Ssfencevma Seq(valid) 256*e4f69d78Ssfencevma } else { 257*e4f69d78Ssfencevma (0 until numSelectGroups).map(g => { 258*e4f69d78Ssfencevma if (valid.length < (g + 1) * SelectGroupSize) { 259*e4f69d78Ssfencevma valid.takeRight(valid.length - g * SelectGroupSize) 260*e4f69d78Ssfencevma } else { 261*e4f69d78Ssfencevma (0 until SelectGroupSize).map(j => valid(g * SelectGroupSize + j)) 262*e4f69d78Ssfencevma } 263*e4f69d78Ssfencevma }) 264*e4f69d78Ssfencevma } 265*e4f69d78Ssfencevma val selectBitsGroups = 266*e4f69d78Ssfencevma if (bits.length <= SelectGroupSize) { 267*e4f69d78Ssfencevma Seq(bits) 268*e4f69d78Ssfencevma } else { 269*e4f69d78Ssfencevma (0 until numSelectGroups).map(g => { 270*e4f69d78Ssfencevma if (bits.length < (g + 1) * SelectGroupSize) { 271*e4f69d78Ssfencevma bits.takeRight(bits.length - g * SelectGroupSize) 272*e4f69d78Ssfencevma } else { 273*e4f69d78Ssfencevma (0 until SelectGroupSize).map(j => bits(g * SelectGroupSize + j)) 274*e4f69d78Ssfencevma } 275*e4f69d78Ssfencevma }) 276*e4f69d78Ssfencevma } 277*e4f69d78Ssfencevma 278*e4f69d78Ssfencevma // select logic 279*e4f69d78Ssfencevma if (valid.length <= SelectGroupSize) { 280*e4f69d78Ssfencevma val (selValid, selBits) = selectPartialOldest(valid, bits) 281*e4f69d78Ssfencevma (Seq(RegNext(selValid(0) && !selBits(0).uop.robIdx.needFlush(io.redirect))), Seq(RegNext(selBits(0)))) 282*e4f69d78Ssfencevma } else { 283*e4f69d78Ssfencevma val select = (0 until numSelectGroups).map(g => { 284*e4f69d78Ssfencevma val (selValid, selBits) = selectPartialOldest(selectValidGroups(g), selectBitsGroups(g)) 285*e4f69d78Ssfencevma (RegNext(selValid(0) && !selBits(0).uop.robIdx.needFlush(io.redirect)), RegNext(selBits(0))) 286*e4f69d78Ssfencevma }) 287*e4f69d78Ssfencevma selectOldest(select.map(_._1), select.map(_._2)) 288*e4f69d78Ssfencevma } 289*e4f69d78Ssfencevma } 290*e4f69d78Ssfencevma 291*e4f69d78Ssfencevma def detectRollback(i: Int) = { 292*e4f69d78Ssfencevma paddrModule.io.violationMdata(i) := io.storeIn(i).bits.paddr 293*e4f69d78Ssfencevma maskModule.io.violationMdata(i) := io.storeIn(i).bits.mask 294*e4f69d78Ssfencevma 295*e4f69d78Ssfencevma val bypassPaddrMask = RegNext(VecInit((0 until LoadPipelineWidth).map(j => bypassPAddr(j)(PAddrBits-1, 3) === io.storeIn(i).bits.paddr(PAddrBits-1, 3)))) 296*e4f69d78Ssfencevma val bypassMMask = RegNext(VecInit((0 until LoadPipelineWidth).map(j => (bypassMask(j) & io.storeIn(i).bits.mask).orR))) 297*e4f69d78Ssfencevma val bypassMaskUInt = (0 until LoadPipelineWidth).map(j => 298*e4f69d78Ssfencevma Fill(LoadQueueRAWSize, RegNext(RegNext(io.query(j).req.fire))) & Mux(bypassPaddrMask(j) && bypassMMask(j), UIntToOH(RegNext(RegNext(enqIndexVec(j)))), 0.U(LoadQueueRAWSize)) 299*e4f69d78Ssfencevma ).reduce(_|_) 300*e4f69d78Ssfencevma 301*e4f69d78Ssfencevma val addrMaskMatch = RegNext(paddrModule.io.violationMmask(i).asUInt & maskModule.io.violationMmask(i).asUInt) | bypassMaskUInt 302*e4f69d78Ssfencevma val entryNeedCheck = RegNext(VecInit((0 until LoadQueueRAWSize).map(j => { 303*e4f69d78Ssfencevma allocated(j) && isAfter(uop(j).robIdx, io.storeIn(i).bits.uop.robIdx) && datavalid(j) && !uop(j).robIdx.needFlush(io.redirect) 304*e4f69d78Ssfencevma }))) 305*e4f69d78Ssfencevma val lqViolationSelVec = VecInit((0 until LoadQueueRAWSize).map(j => { 306*e4f69d78Ssfencevma addrMaskMatch(j) && entryNeedCheck(j) 307*e4f69d78Ssfencevma })) 308*e4f69d78Ssfencevma 309*e4f69d78Ssfencevma val lqViolationSelUopExts = uop.map(uop => { 310*e4f69d78Ssfencevma val wrapper = Wire(new XSBundleWithMicroOp) 311*e4f69d78Ssfencevma wrapper.uop := uop 312*e4f69d78Ssfencevma wrapper 313*e4f69d78Ssfencevma }) 314*e4f69d78Ssfencevma 315*e4f69d78Ssfencevma // select logic 316*e4f69d78Ssfencevma val lqSelect = selectOldest(lqViolationSelVec, lqViolationSelUopExts) 317*e4f69d78Ssfencevma 318*e4f69d78Ssfencevma // select one inst 319*e4f69d78Ssfencevma val lqViolation = lqSelect._1(0) 320*e4f69d78Ssfencevma val lqViolationUop = lqSelect._2(0).uop 321*e4f69d78Ssfencevma 322*e4f69d78Ssfencevma XSDebug( 323*e4f69d78Ssfencevma lqViolation, 324*e4f69d78Ssfencevma "need rollback (ld wb before store) pc %x robidx %d target %x\n", 325*e4f69d78Ssfencevma io.storeIn(i).bits.uop.cf.pc, io.storeIn(i).bits.uop.robIdx.asUInt, lqViolationUop.robIdx.asUInt 326*e4f69d78Ssfencevma ) 327*e4f69d78Ssfencevma 328*e4f69d78Ssfencevma (lqViolation, lqViolationUop) 329*e4f69d78Ssfencevma } 330*e4f69d78Ssfencevma 331*e4f69d78Ssfencevma // select rollback (part1) and generate rollback request 332*e4f69d78Ssfencevma // rollback check 333*e4f69d78Ssfencevma // Lq rollback seq check is done in s3 (next stage), as getting rollbackLq MicroOp is slow 334*e4f69d78Ssfencevma val rollbackLqWb = Wire(Vec(StorePipelineWidth, Valid(new MicroOpRbExt))) 335*e4f69d78Ssfencevma val stFtqIdx = Wire(Vec(StorePipelineWidth, new FtqPtr)) 336*e4f69d78Ssfencevma val stFtqOffset = Wire(Vec(StorePipelineWidth, UInt(log2Up(PredictWidth).W))) 337*e4f69d78Ssfencevma for (w <- 0 until StorePipelineWidth) { 338*e4f69d78Ssfencevma val detectedRollback = detectRollback(w) 339*e4f69d78Ssfencevma rollbackLqWb(w).valid := detectedRollback._1 && DelayN(io.storeIn(w).valid && !io.storeIn(w).bits.miss, TotalSelectCycles) 340*e4f69d78Ssfencevma rollbackLqWb(w).bits.uop := detectedRollback._2 341*e4f69d78Ssfencevma rollbackLqWb(w).bits.flag := w.U 342*e4f69d78Ssfencevma stFtqIdx(w) := DelayN(io.storeIn(w).bits.uop.cf.ftqPtr, TotalSelectCycles) 343*e4f69d78Ssfencevma stFtqOffset(w) := DelayN(io.storeIn(w).bits.uop.cf.ftqOffset, TotalSelectCycles) 344*e4f69d78Ssfencevma } 345*e4f69d78Ssfencevma 346*e4f69d78Ssfencevma val rollbackLqWbValid = rollbackLqWb.map(x => x.valid && !x.bits.uop.robIdx.needFlush(io.redirect)) 347*e4f69d78Ssfencevma val rollbackLqWbBits = rollbackLqWb.map(x => x.bits) 348*e4f69d78Ssfencevma 349*e4f69d78Ssfencevma // select rollback (part2), generate rollback request, then fire rollback request 350*e4f69d78Ssfencevma // Note that we use robIdx - 1.U to flush the load instruction itself. 351*e4f69d78Ssfencevma // Thus, here if last cycle's robIdx equals to this cycle's robIdx, it still triggers the redirect. 352*e4f69d78Ssfencevma 353*e4f69d78Ssfencevma // select uop in parallel 354*e4f69d78Ssfencevma val lqs = selectPartialOldest(rollbackLqWbValid, rollbackLqWbBits) 355*e4f69d78Ssfencevma val rollbackUopExt = lqs._2(0) 356*e4f69d78Ssfencevma val rollbackUop = rollbackUopExt.uop 357*e4f69d78Ssfencevma val rollbackStFtqIdx = stFtqIdx(rollbackUopExt.flag) 358*e4f69d78Ssfencevma val rollbackStFtqOffset = stFtqOffset(rollbackUopExt.flag) 359*e4f69d78Ssfencevma 360*e4f69d78Ssfencevma // check if rollback request is still valid in parallel 361*e4f69d78Ssfencevma io.rollback.bits.robIdx := rollbackUop.robIdx 362*e4f69d78Ssfencevma io.rollback.bits.ftqIdx := rollbackUop.cf.ftqPtr 363*e4f69d78Ssfencevma io.rollback.bits.stFtqIdx := rollbackStFtqIdx 364*e4f69d78Ssfencevma io.rollback.bits.ftqOffset := rollbackUop.cf.ftqOffset 365*e4f69d78Ssfencevma io.rollback.bits.stFtqOffset := rollbackStFtqOffset 366*e4f69d78Ssfencevma io.rollback.bits.level := RedirectLevel.flush 367*e4f69d78Ssfencevma io.rollback.bits.interrupt := DontCare 368*e4f69d78Ssfencevma io.rollback.bits.cfiUpdate := DontCare 369*e4f69d78Ssfencevma io.rollback.bits.cfiUpdate.target := rollbackUop.cf.pc 370*e4f69d78Ssfencevma io.rollback.bits.debug_runahead_checkpoint_id := rollbackUop.debugInfo.runahead_checkpoint_id 371*e4f69d78Ssfencevma // io.rollback.bits.pc := DontCare 372*e4f69d78Ssfencevma 373*e4f69d78Ssfencevma io.rollback.valid := VecInit(rollbackLqWbValid).asUInt.orR 374*e4f69d78Ssfencevma 375*e4f69d78Ssfencevma // perf cnt 376*e4f69d78Ssfencevma val canEnqCount = PopCount(io.query.map(_.req.fire)) 377*e4f69d78Ssfencevma val validCount = freeList.io.validCount 378*e4f69d78Ssfencevma val allowEnqueue = validCount <= (LoadQueueRAWSize - LoadPipelineWidth).U 379*e4f69d78Ssfencevma 380*e4f69d78Ssfencevma QueuePerf(LoadQueueRAWSize, validCount, !allowEnqueue) 381*e4f69d78Ssfencevma XSPerfAccumulate("enqs", canEnqCount) 382*e4f69d78Ssfencevma XSPerfAccumulate("stld_rollback", io.rollback.valid) 383*e4f69d78Ssfencevma val perfEvents: Seq[(String, UInt)] = Seq( 384*e4f69d78Ssfencevma ("enq ", canEnqCount), 385*e4f69d78Ssfencevma ("stld_rollback", io.rollback.valid), 386*e4f69d78Ssfencevma ) 387*e4f69d78Ssfencevma generatePerfEvent() 388*e4f69d78Ssfencevma // end 389*e4f69d78Ssfencevma}