1*e4f69d78Ssfencevma/*************************************************************************************** 2*e4f69d78Ssfencevma* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences 3*e4f69d78Ssfencevma* Copyright (c) 2020-2021 Peng Cheng Laboratory 4*e4f69d78Ssfencevma* 5*e4f69d78Ssfencevma* XiangShan is licensed under Mulan PSL v2. 6*e4f69d78Ssfencevma* You can use this software according to the terms and conditions of the Mulan PSL v2. 7*e4f69d78Ssfencevma* You may obtain a copy of Mulan PSL v2 at: 8*e4f69d78Ssfencevma* http://license.coscl.org.cn/MulanPSL2 9*e4f69d78Ssfencevma* 10*e4f69d78Ssfencevma* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 11*e4f69d78Ssfencevma* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 12*e4f69d78Ssfencevma* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 13*e4f69d78Ssfencevma* 14*e4f69d78Ssfencevma* See the Mulan PSL v2 for more details. 15*e4f69d78Ssfencevma***************************************************************************************/ 16*e4f69d78Ssfencevmapackage xiangshan.mem 17*e4f69d78Ssfencevma 18*e4f69d78Ssfencevmaimport chisel3._ 19*e4f69d78Ssfencevmaimport chisel3.util._ 20*e4f69d78Ssfencevmaimport chipsalliance.rocketchip.config._ 21*e4f69d78Ssfencevmaimport xiangshan._ 22*e4f69d78Ssfencevmaimport xiangshan.backend.rob.RobPtr 23*e4f69d78Ssfencevmaimport xiangshan.cache._ 24*e4f69d78Ssfencevmaimport utils._ 25*e4f69d78Ssfencevmaimport utility._ 26*e4f69d78Ssfencevma 27*e4f69d78Ssfencevmaclass LoadQueueRAR(implicit p: Parameters) extends XSModule 28*e4f69d78Ssfencevma with HasDCacheParameters 29*e4f69d78Ssfencevma with HasCircularQueuePtrHelper 30*e4f69d78Ssfencevma with HasLoadHelper 31*e4f69d78Ssfencevma with HasPerfEvents 32*e4f69d78Ssfencevma{ 33*e4f69d78Ssfencevma val io = IO(new Bundle() { 34*e4f69d78Ssfencevma val redirect = Flipped(Valid(new Redirect)) 35*e4f69d78Ssfencevma val query = Vec(LoadPipelineWidth, Flipped(new LoadViolationQueryIO)) 36*e4f69d78Ssfencevma val release = Flipped(Valid(new Release)) 37*e4f69d78Ssfencevma val ldWbPtr = Input(new LqPtr) 38*e4f69d78Ssfencevma val lqFull = Output(Bool()) 39*e4f69d78Ssfencevma }) 40*e4f69d78Ssfencevma 41*e4f69d78Ssfencevma println("LoadQueueRAR: size: " + LoadQueueRARSize) 42*e4f69d78Ssfencevma // LoadQueueRAR field 43*e4f69d78Ssfencevma // +-------+-------+-------+----------+ 44*e4f69d78Ssfencevma // | Valid | Uop | PAddr | Released | 45*e4f69d78Ssfencevma // +-------+-------+-------+----------+ 46*e4f69d78Ssfencevma // 47*e4f69d78Ssfencevma // Field descriptions: 48*e4f69d78Ssfencevma // Allocated : entry is valid. 49*e4f69d78Ssfencevma // MicroOp : Micro-op 50*e4f69d78Ssfencevma // PAddr : physical address. 51*e4f69d78Ssfencevma // Released : DCache released. 52*e4f69d78Ssfencevma // 53*e4f69d78Ssfencevma val allocated = RegInit(VecInit(List.fill(LoadQueueRARSize)(false.B))) // The control signals need to explicitly indicate the initial value 54*e4f69d78Ssfencevma val uop = Reg(Vec(LoadQueueRARSize, new MicroOp)) 55*e4f69d78Ssfencevma val paddrModule = Module(new LqPAddrModule( 56*e4f69d78Ssfencevma gen = UInt(PAddrBits.W), 57*e4f69d78Ssfencevma numEntries = LoadQueueRARSize, 58*e4f69d78Ssfencevma numRead = LoadPipelineWidth, 59*e4f69d78Ssfencevma numWrite = LoadPipelineWidth, 60*e4f69d78Ssfencevma numWBank = LoadQueueNWriteBanks, 61*e4f69d78Ssfencevma numWDelay = 2, 62*e4f69d78Ssfencevma numCamPort = LoadPipelineWidth 63*e4f69d78Ssfencevma )) 64*e4f69d78Ssfencevma paddrModule.io := DontCare 65*e4f69d78Ssfencevma val released = RegInit(VecInit(List.fill(LoadQueueRARSize)(false.B))) 66*e4f69d78Ssfencevma 67*e4f69d78Ssfencevma // freeliset: store valid entries index. 68*e4f69d78Ssfencevma // +---+---+--------------+-----+-----+ 69*e4f69d78Ssfencevma // | 0 | 1 | ...... | n-2 | n-1 | 70*e4f69d78Ssfencevma // +---+---+--------------+-----+-----+ 71*e4f69d78Ssfencevma val freeList = Module(new FreeList( 72*e4f69d78Ssfencevma size = LoadQueueRARSize, 73*e4f69d78Ssfencevma allocWidth = LoadPipelineWidth, 74*e4f69d78Ssfencevma freeWidth = 4, 75*e4f69d78Ssfencevma moduleName = "LoadQueueRAR freelist" 76*e4f69d78Ssfencevma )) 77*e4f69d78Ssfencevma freeList.io := DontCare 78*e4f69d78Ssfencevma 79*e4f69d78Ssfencevma // Real-allocation: load_s2 80*e4f69d78Ssfencevma // PAddr write needs 2 cycles, release signal should delay 1 cycle so that 81*e4f69d78Ssfencevma // load enqueue can catch release. 82*e4f69d78Ssfencevma val release1Cycle = io.release 83*e4f69d78Ssfencevma val release2Cycle = RegNext(io.release) 84*e4f69d78Ssfencevma val release2Cycle_dup_lsu = RegNext(io.release) 85*e4f69d78Ssfencevma 86*e4f69d78Ssfencevma // LoadQueueRAR enqueue condition: 87*e4f69d78Ssfencevma // There are still not completed load instructions before the current load instruction. 88*e4f69d78Ssfencevma // (e.g. "not completed" means that load instruction get the data or exception). 89*e4f69d78Ssfencevma val canEnqueue = io.query.map(_.req.valid) 90*e4f69d78Ssfencevma val cancelEnqueue = io.query.map(_.req.bits.uop.robIdx.needFlush(io.redirect)) 91*e4f69d78Ssfencevma val hasNotWritebackedLoad = io.query.map(_.req.bits.uop.lqIdx).map(lqIdx => isAfter(lqIdx, io.ldWbPtr)) 92*e4f69d78Ssfencevma val needEnqueue = canEnqueue.zip(hasNotWritebackedLoad).zip(cancelEnqueue).map { case ((v, r), c) => v && r && !c } 93*e4f69d78Ssfencevma 94*e4f69d78Ssfencevma // Allocate logic 95*e4f69d78Ssfencevma val enqValidVec = Wire(Vec(LoadPipelineWidth, Bool())) 96*e4f69d78Ssfencevma val enqIndexVec = Wire(Vec(LoadPipelineWidth, UInt())) 97*e4f69d78Ssfencevma val enqOffset = Wire(Vec(LoadPipelineWidth, UInt())) 98*e4f69d78Ssfencevma 99*e4f69d78Ssfencevma for ((enq, w) <- io.query.map(_.req).zipWithIndex) { 100*e4f69d78Ssfencevma paddrModule.io.wen(w) := false.B 101*e4f69d78Ssfencevma freeList.io.doAllocate(w) := false.B 102*e4f69d78Ssfencevma 103*e4f69d78Ssfencevma enqOffset(w) := PopCount(needEnqueue.take(w)) 104*e4f69d78Ssfencevma freeList.io.allocateReq(w) := needEnqueue(w) 105*e4f69d78Ssfencevma 106*e4f69d78Ssfencevma // Allocate ready 107*e4f69d78Ssfencevma enqValidVec(w) := freeList.io.canAllocate(enqOffset(w)) 108*e4f69d78Ssfencevma enqIndexVec(w) := freeList.io.allocateSlot(enqOffset(w)) 109*e4f69d78Ssfencevma enq.ready := Mux(needEnqueue(w), enqValidVec(w), true.B) 110*e4f69d78Ssfencevma 111*e4f69d78Ssfencevma val enqIndex = enqIndexVec(w) 112*e4f69d78Ssfencevma when (needEnqueue(w) && enq.ready) { 113*e4f69d78Ssfencevma val debug_robIdx = enq.bits.uop.robIdx.asUInt 114*e4f69d78Ssfencevma XSError(allocated(enqIndex), p"LoadQueueRAR: You can not write an valid entry! check: ldu $w, robIdx $debug_robIdx") 115*e4f69d78Ssfencevma 116*e4f69d78Ssfencevma freeList.io.doAllocate(w) := true.B 117*e4f69d78Ssfencevma 118*e4f69d78Ssfencevma // Allocate new entry 119*e4f69d78Ssfencevma allocated(enqIndex) := true.B 120*e4f69d78Ssfencevma 121*e4f69d78Ssfencevma // Write paddr 122*e4f69d78Ssfencevma paddrModule.io.wen(w) := true.B 123*e4f69d78Ssfencevma paddrModule.io.waddr(w) := enqIndex 124*e4f69d78Ssfencevma paddrModule.io.wdata(w) := enq.bits.paddr 125*e4f69d78Ssfencevma 126*e4f69d78Ssfencevma // Fill info 127*e4f69d78Ssfencevma uop(enqIndex) := enq.bits.uop 128*e4f69d78Ssfencevma released(enqIndex) := 129*e4f69d78Ssfencevma enq.bits.datavalid && 130*e4f69d78Ssfencevma release2Cycle.valid && 131*e4f69d78Ssfencevma enq.bits.paddr(PAddrBits-1, DCacheLineOffset) === release2Cycle.bits.paddr(PAddrBits-1, DCacheLineOffset) || 132*e4f69d78Ssfencevma release1Cycle.valid && 133*e4f69d78Ssfencevma enq.bits.paddr(PAddrBits-1, DCacheLineOffset) === release1Cycle.bits.paddr(PAddrBits-1, DCacheLineOffset) 134*e4f69d78Ssfencevma } 135*e4f69d78Ssfencevma } 136*e4f69d78Ssfencevma 137*e4f69d78Ssfencevma // LoadQueueRAR deallocate 138*e4f69d78Ssfencevma val freeMaskVec = Wire(Vec(LoadQueueRARSize, Bool())) 139*e4f69d78Ssfencevma 140*e4f69d78Ssfencevma // init 141*e4f69d78Ssfencevma freeMaskVec.map(e => e := false.B) 142*e4f69d78Ssfencevma 143*e4f69d78Ssfencevma // when the loads that "older than" current load were writebacked, 144*e4f69d78Ssfencevma // current load will be released. 145*e4f69d78Ssfencevma for (i <- 0 until LoadQueueRARSize) { 146*e4f69d78Ssfencevma val deqNotBlock = !isBefore(io.ldWbPtr, uop(i).lqIdx) 147*e4f69d78Ssfencevma val needFlush = uop(i).robIdx.needFlush(io.redirect) 148*e4f69d78Ssfencevma 149*e4f69d78Ssfencevma when (allocated(i) && (deqNotBlock || needFlush)) { 150*e4f69d78Ssfencevma allocated(i) := false.B 151*e4f69d78Ssfencevma freeMaskVec(i) := true.B 152*e4f69d78Ssfencevma } 153*e4f69d78Ssfencevma } 154*e4f69d78Ssfencevma 155*e4f69d78Ssfencevma // if need replay release entry 156*e4f69d78Ssfencevma val lastCanAccept = RegNext(VecInit(needEnqueue.zip(enqValidVec).map(x => x._1 && x._2))) 157*e4f69d78Ssfencevma val lastAllocIndex = RegNext(enqIndexVec) 158*e4f69d78Ssfencevma 159*e4f69d78Ssfencevma for ((release, w) <- io.query.map(_.release).zipWithIndex) { 160*e4f69d78Ssfencevma val releaseValid = release && lastCanAccept(w) 161*e4f69d78Ssfencevma val releaseIndex = lastAllocIndex(w) 162*e4f69d78Ssfencevma 163*e4f69d78Ssfencevma when (allocated(releaseIndex) && releaseValid) { 164*e4f69d78Ssfencevma allocated(releaseIndex) := false.B 165*e4f69d78Ssfencevma freeMaskVec(releaseIndex) := true.B 166*e4f69d78Ssfencevma } 167*e4f69d78Ssfencevma } 168*e4f69d78Ssfencevma 169*e4f69d78Ssfencevma freeList.io.free := freeMaskVec.asUInt 170*e4f69d78Ssfencevma 171*e4f69d78Ssfencevma // LoadQueueRAR Query 172*e4f69d78Ssfencevma // Load-to-Load violation check condition: 173*e4f69d78Ssfencevma // 1. Physical address match by CAM port. 174*e4f69d78Ssfencevma // 2. release is set. 175*e4f69d78Ssfencevma // 3. Younger than current load instruction. 176*e4f69d78Ssfencevma val ldLdViolation = Wire(Vec(LoadPipelineWidth, Bool())) 177*e4f69d78Ssfencevma val allocatedUInt = RegNext(allocated.asUInt) 178*e4f69d78Ssfencevma for ((query, w) <- io.query.zipWithIndex) { 179*e4f69d78Ssfencevma ldLdViolation(w) := false.B 180*e4f69d78Ssfencevma paddrModule.io.releaseViolationMdata(w) := query.req.bits.paddr 181*e4f69d78Ssfencevma 182*e4f69d78Ssfencevma query.resp.valid := RegNext(query.req.valid) 183*e4f69d78Ssfencevma // Generate real violation mask 184*e4f69d78Ssfencevma val robIdxMask = VecInit(uop.map(_.robIdx).map(isAfter(_, query.req.bits.uop.robIdx))) 185*e4f69d78Ssfencevma val matchMask = allocatedUInt & 186*e4f69d78Ssfencevma RegNext(paddrModule.io.releaseViolationMmask(w).asUInt) & 187*e4f69d78Ssfencevma RegNext(robIdxMask.asUInt) 188*e4f69d78Ssfencevma // Load-to-Load violation check result 189*e4f69d78Ssfencevma val ldLdViolationMask = WireInit(matchMask & RegNext(released.asUInt)) 190*e4f69d78Ssfencevma ldLdViolationMask.suggestName("ldLdViolationMask_" + w) 191*e4f69d78Ssfencevma query.resp.bits.replayFromFetch := ldLdViolationMask.orR || RegNext(ldLdViolation(w)) 192*e4f69d78Ssfencevma } 193*e4f69d78Ssfencevma 194*e4f69d78Ssfencevma (0 until LoadPipelineWidth).map(w => { 195*e4f69d78Ssfencevma ldLdViolation(w) := (release1Cycle.valid && io.query(w).req.bits.paddr(PAddrBits-1, DCacheLineOffset) === release1Cycle.bits.paddr(PAddrBits-1, DCacheLineOffset)) || 196*e4f69d78Ssfencevma (release2Cycle.valid && io.query(w).req.bits.paddr(PAddrBits-1, DCacheLineOffset) === release2Cycle.bits.paddr(PAddrBits-1, DCacheLineOffset)) 197*e4f69d78Ssfencevma }) 198*e4f69d78Ssfencevma 199*e4f69d78Ssfencevma // When io.release.valid (release1cycle.valid), it uses the last ld-ld paddr cam port to 200*e4f69d78Ssfencevma // update release flag in 1 cycle 201*e4f69d78Ssfencevma val releaseVioMask = Reg(Vec(LoadQueueRARSize, Bool())) 202*e4f69d78Ssfencevma when (release1Cycle.valid) { 203*e4f69d78Ssfencevma paddrModule.io.releaseMdata.takeRight(1)(0) := release1Cycle.bits.paddr 204*e4f69d78Ssfencevma } 205*e4f69d78Ssfencevma 206*e4f69d78Ssfencevma (0 until LoadQueueRARSize).map(i => { 207*e4f69d78Ssfencevma when (RegNext(paddrModule.io.releaseMmask.takeRight(1)(0)(i) && allocated(i) && release1Cycle.valid)) { 208*e4f69d78Ssfencevma // Note: if a load has missed in dcache and is waiting for refill in load queue, 209*e4f69d78Ssfencevma // its released flag still needs to be set as true if addr matches. 210*e4f69d78Ssfencevma released(i) := true.B 211*e4f69d78Ssfencevma } 212*e4f69d78Ssfencevma }) 213*e4f69d78Ssfencevma 214*e4f69d78Ssfencevma io.lqFull := freeList.io.empty 215*e4f69d78Ssfencevma 216*e4f69d78Ssfencevma // perf cnt 217*e4f69d78Ssfencevma val canEnqCount = PopCount(io.query.map(_.req.fire)) 218*e4f69d78Ssfencevma val validCount = freeList.io.validCount 219*e4f69d78Ssfencevma val allowEnqueue = validCount <= (LoadQueueRARSize - LoadPipelineWidth).U 220*e4f69d78Ssfencevma val ldLdViolationCount = PopCount(io.query.map(_.resp).map(resp => resp.valid && resp.bits.replayFromFetch)) 221*e4f69d78Ssfencevma 222*e4f69d78Ssfencevma QueuePerf(LoadQueueRARSize, validCount, !allowEnqueue) 223*e4f69d78Ssfencevma XSPerfAccumulate("enq", canEnqCount) 224*e4f69d78Ssfencevma XSPerfAccumulate("ld_ld_violation", ldLdViolationCount) 225*e4f69d78Ssfencevma val perfEvents: Seq[(String, UInt)] = Seq( 226*e4f69d78Ssfencevma ("enq", canEnqCount), 227*e4f69d78Ssfencevma ("ld_ld_violation", ldLdViolationCount) 228*e4f69d78Ssfencevma ) 229*e4f69d78Ssfencevma generatePerfEvent() 230*e4f69d78Ssfencevma // End 231*e4f69d78Ssfencevma}