1e4f69d78Ssfencevma/*************************************************************************************** 2e4f69d78Ssfencevma* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences 3e4f69d78Ssfencevma* Copyright (c) 2020-2021 Peng Cheng Laboratory 4e4f69d78Ssfencevma* 5e4f69d78Ssfencevma* XiangShan is licensed under Mulan PSL v2. 6e4f69d78Ssfencevma* You can use this software according to the terms and conditions of the Mulan PSL v2. 7e4f69d78Ssfencevma* You may obtain a copy of Mulan PSL v2 at: 8e4f69d78Ssfencevma* http://license.coscl.org.cn/MulanPSL2 9e4f69d78Ssfencevma* 10e4f69d78Ssfencevma* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 11e4f69d78Ssfencevma* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 12e4f69d78Ssfencevma* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 13e4f69d78Ssfencevma* 14e4f69d78Ssfencevma* See the Mulan PSL v2 for more details. 15e4f69d78Ssfencevma***************************************************************************************/ 16e4f69d78Ssfencevmapackage xiangshan.mem 17e4f69d78Ssfencevma 18e4f69d78Ssfencevmaimport chisel3._ 19e4f69d78Ssfencevmaimport chisel3.util._ 20e4f69d78Ssfencevmaimport chipsalliance.rocketchip.config._ 21e4f69d78Ssfencevmaimport xiangshan._ 22e4f69d78Ssfencevmaimport xiangshan.backend.rob.RobPtr 23e4f69d78Ssfencevmaimport xiangshan.cache._ 24e4f69d78Ssfencevmaimport utils._ 25e4f69d78Ssfencevmaimport utility._ 26*dfb4c5dcSXuan Huimport xiangshan.backend.Bundles.DynInst 27e4f69d78Ssfencevma 28e4f69d78Ssfencevmaclass LoadQueueRAR(implicit p: Parameters) extends XSModule 29e4f69d78Ssfencevma with HasDCacheParameters 30e4f69d78Ssfencevma with HasCircularQueuePtrHelper 31e4f69d78Ssfencevma with HasLoadHelper 32e4f69d78Ssfencevma with HasPerfEvents 33e4f69d78Ssfencevma{ 34e4f69d78Ssfencevma val io = IO(new Bundle() { 35e4f69d78Ssfencevma val redirect = Flipped(Valid(new Redirect)) 36e4f69d78Ssfencevma val query = Vec(LoadPipelineWidth, Flipped(new LoadViolationQueryIO)) 37e4f69d78Ssfencevma val release = Flipped(Valid(new Release)) 38e4f69d78Ssfencevma val ldWbPtr = Input(new LqPtr) 39e4f69d78Ssfencevma val lqFull = Output(Bool()) 40e4f69d78Ssfencevma }) 41e4f69d78Ssfencevma 42e4f69d78Ssfencevma println("LoadQueueRAR: size: " + LoadQueueRARSize) 43e4f69d78Ssfencevma // LoadQueueRAR field 44e4f69d78Ssfencevma // +-------+-------+-------+----------+ 45e4f69d78Ssfencevma // | Valid | Uop | PAddr | Released | 46e4f69d78Ssfencevma // +-------+-------+-------+----------+ 47e4f69d78Ssfencevma // 48e4f69d78Ssfencevma // Field descriptions: 49e4f69d78Ssfencevma // Allocated : entry is valid. 50e4f69d78Ssfencevma // MicroOp : Micro-op 51e4f69d78Ssfencevma // PAddr : physical address. 52e4f69d78Ssfencevma // Released : DCache released. 53e4f69d78Ssfencevma // 54e4f69d78Ssfencevma val allocated = RegInit(VecInit(List.fill(LoadQueueRARSize)(false.B))) // The control signals need to explicitly indicate the initial value 55*dfb4c5dcSXuan Hu val uop = Reg(Vec(LoadQueueRARSize, new DynInst)) 56e4f69d78Ssfencevma val paddrModule = Module(new LqPAddrModule( 57e4f69d78Ssfencevma gen = UInt(PAddrBits.W), 58e4f69d78Ssfencevma numEntries = LoadQueueRARSize, 59e4f69d78Ssfencevma numRead = LoadPipelineWidth, 60e4f69d78Ssfencevma numWrite = LoadPipelineWidth, 61e4f69d78Ssfencevma numWBank = LoadQueueNWriteBanks, 62e4f69d78Ssfencevma numWDelay = 2, 63e4f69d78Ssfencevma numCamPort = LoadPipelineWidth 64e4f69d78Ssfencevma )) 65e4f69d78Ssfencevma paddrModule.io := DontCare 66e4f69d78Ssfencevma val released = RegInit(VecInit(List.fill(LoadQueueRARSize)(false.B))) 67e4f69d78Ssfencevma 68e4f69d78Ssfencevma // freeliset: store valid entries index. 69e4f69d78Ssfencevma // +---+---+--------------+-----+-----+ 70e4f69d78Ssfencevma // | 0 | 1 | ...... | n-2 | n-1 | 71e4f69d78Ssfencevma // +---+---+--------------+-----+-----+ 72e4f69d78Ssfencevma val freeList = Module(new FreeList( 73e4f69d78Ssfencevma size = LoadQueueRARSize, 74e4f69d78Ssfencevma allocWidth = LoadPipelineWidth, 75e4f69d78Ssfencevma freeWidth = 4, 76e4f69d78Ssfencevma moduleName = "LoadQueueRAR freelist" 77e4f69d78Ssfencevma )) 78e4f69d78Ssfencevma freeList.io := DontCare 79e4f69d78Ssfencevma 80e4f69d78Ssfencevma // Real-allocation: load_s2 81e4f69d78Ssfencevma // PAddr write needs 2 cycles, release signal should delay 1 cycle so that 82e4f69d78Ssfencevma // load enqueue can catch release. 83e4f69d78Ssfencevma val release1Cycle = io.release 84e4f69d78Ssfencevma val release2Cycle = RegNext(io.release) 85e4f69d78Ssfencevma val release2Cycle_dup_lsu = RegNext(io.release) 86e4f69d78Ssfencevma 87e4f69d78Ssfencevma // LoadQueueRAR enqueue condition: 88e4f69d78Ssfencevma // There are still not completed load instructions before the current load instruction. 89e4f69d78Ssfencevma // (e.g. "not completed" means that load instruction get the data or exception). 90e4f69d78Ssfencevma val canEnqueue = io.query.map(_.req.valid) 91e4f69d78Ssfencevma val cancelEnqueue = io.query.map(_.req.bits.uop.robIdx.needFlush(io.redirect)) 92e4f69d78Ssfencevma val hasNotWritebackedLoad = io.query.map(_.req.bits.uop.lqIdx).map(lqIdx => isAfter(lqIdx, io.ldWbPtr)) 93e4f69d78Ssfencevma val needEnqueue = canEnqueue.zip(hasNotWritebackedLoad).zip(cancelEnqueue).map { case ((v, r), c) => v && r && !c } 94e4f69d78Ssfencevma 95e4f69d78Ssfencevma // Allocate logic 96e4f69d78Ssfencevma val enqValidVec = Wire(Vec(LoadPipelineWidth, Bool())) 97e4f69d78Ssfencevma val enqIndexVec = Wire(Vec(LoadPipelineWidth, UInt())) 98e4f69d78Ssfencevma val enqOffset = Wire(Vec(LoadPipelineWidth, UInt())) 99e4f69d78Ssfencevma 100e4f69d78Ssfencevma for ((enq, w) <- io.query.map(_.req).zipWithIndex) { 101e4f69d78Ssfencevma paddrModule.io.wen(w) := false.B 102e4f69d78Ssfencevma freeList.io.doAllocate(w) := false.B 103e4f69d78Ssfencevma 104e4f69d78Ssfencevma enqOffset(w) := PopCount(needEnqueue.take(w)) 105e4f69d78Ssfencevma freeList.io.allocateReq(w) := needEnqueue(w) 106e4f69d78Ssfencevma 107e4f69d78Ssfencevma // Allocate ready 108e4f69d78Ssfencevma enqValidVec(w) := freeList.io.canAllocate(enqOffset(w)) 109e4f69d78Ssfencevma enqIndexVec(w) := freeList.io.allocateSlot(enqOffset(w)) 110e4f69d78Ssfencevma enq.ready := Mux(needEnqueue(w), enqValidVec(w), true.B) 111e4f69d78Ssfencevma 112e4f69d78Ssfencevma val enqIndex = enqIndexVec(w) 113e4f69d78Ssfencevma when (needEnqueue(w) && enq.ready) { 114e4f69d78Ssfencevma val debug_robIdx = enq.bits.uop.robIdx.asUInt 115e4f69d78Ssfencevma XSError(allocated(enqIndex), p"LoadQueueRAR: You can not write an valid entry! check: ldu $w, robIdx $debug_robIdx") 116e4f69d78Ssfencevma 117e4f69d78Ssfencevma freeList.io.doAllocate(w) := true.B 118e4f69d78Ssfencevma 119e4f69d78Ssfencevma // Allocate new entry 120e4f69d78Ssfencevma allocated(enqIndex) := true.B 121e4f69d78Ssfencevma 122e4f69d78Ssfencevma // Write paddr 123e4f69d78Ssfencevma paddrModule.io.wen(w) := true.B 124e4f69d78Ssfencevma paddrModule.io.waddr(w) := enqIndex 125e4f69d78Ssfencevma paddrModule.io.wdata(w) := enq.bits.paddr 126e4f69d78Ssfencevma 127e4f69d78Ssfencevma // Fill info 128e4f69d78Ssfencevma uop(enqIndex) := enq.bits.uop 129e4f69d78Ssfencevma released(enqIndex) := 130e4f69d78Ssfencevma enq.bits.datavalid && 131e4f69d78Ssfencevma release2Cycle.valid && 132e4f69d78Ssfencevma enq.bits.paddr(PAddrBits-1, DCacheLineOffset) === release2Cycle.bits.paddr(PAddrBits-1, DCacheLineOffset) || 133e4f69d78Ssfencevma release1Cycle.valid && 134e4f69d78Ssfencevma enq.bits.paddr(PAddrBits-1, DCacheLineOffset) === release1Cycle.bits.paddr(PAddrBits-1, DCacheLineOffset) 135e4f69d78Ssfencevma } 136e4f69d78Ssfencevma } 137e4f69d78Ssfencevma 138e4f69d78Ssfencevma // LoadQueueRAR deallocate 139e4f69d78Ssfencevma val freeMaskVec = Wire(Vec(LoadQueueRARSize, Bool())) 140e4f69d78Ssfencevma 141e4f69d78Ssfencevma // init 142e4f69d78Ssfencevma freeMaskVec.map(e => e := false.B) 143e4f69d78Ssfencevma 144e4f69d78Ssfencevma // when the loads that "older than" current load were writebacked, 145e4f69d78Ssfencevma // current load will be released. 146e4f69d78Ssfencevma for (i <- 0 until LoadQueueRARSize) { 147e4f69d78Ssfencevma val deqNotBlock = !isBefore(io.ldWbPtr, uop(i).lqIdx) 148e4f69d78Ssfencevma val needFlush = uop(i).robIdx.needFlush(io.redirect) 149e4f69d78Ssfencevma 150e4f69d78Ssfencevma when (allocated(i) && (deqNotBlock || needFlush)) { 151e4f69d78Ssfencevma allocated(i) := false.B 152e4f69d78Ssfencevma freeMaskVec(i) := true.B 153e4f69d78Ssfencevma } 154e4f69d78Ssfencevma } 155e4f69d78Ssfencevma 156e4f69d78Ssfencevma // if need replay release entry 157e4f69d78Ssfencevma val lastCanAccept = RegNext(VecInit(needEnqueue.zip(enqValidVec).map(x => x._1 && x._2))) 158e4f69d78Ssfencevma val lastAllocIndex = RegNext(enqIndexVec) 159e4f69d78Ssfencevma 160e4f69d78Ssfencevma for ((release, w) <- io.query.map(_.release).zipWithIndex) { 161e4f69d78Ssfencevma val releaseValid = release && lastCanAccept(w) 162e4f69d78Ssfencevma val releaseIndex = lastAllocIndex(w) 163e4f69d78Ssfencevma 164e4f69d78Ssfencevma when (allocated(releaseIndex) && releaseValid) { 165e4f69d78Ssfencevma allocated(releaseIndex) := false.B 166e4f69d78Ssfencevma freeMaskVec(releaseIndex) := true.B 167e4f69d78Ssfencevma } 168e4f69d78Ssfencevma } 169e4f69d78Ssfencevma 170e4f69d78Ssfencevma freeList.io.free := freeMaskVec.asUInt 171e4f69d78Ssfencevma 172e4f69d78Ssfencevma // LoadQueueRAR Query 173e4f69d78Ssfencevma // Load-to-Load violation check condition: 174e4f69d78Ssfencevma // 1. Physical address match by CAM port. 175e4f69d78Ssfencevma // 2. release is set. 176e4f69d78Ssfencevma // 3. Younger than current load instruction. 177e4f69d78Ssfencevma val ldLdViolation = Wire(Vec(LoadPipelineWidth, Bool())) 178e4f69d78Ssfencevma val allocatedUInt = RegNext(allocated.asUInt) 179e4f69d78Ssfencevma for ((query, w) <- io.query.zipWithIndex) { 180e4f69d78Ssfencevma ldLdViolation(w) := false.B 181e4f69d78Ssfencevma paddrModule.io.releaseViolationMdata(w) := query.req.bits.paddr 182e4f69d78Ssfencevma 183e4f69d78Ssfencevma query.resp.valid := RegNext(query.req.valid) 184e4f69d78Ssfencevma // Generate real violation mask 185e4f69d78Ssfencevma val robIdxMask = VecInit(uop.map(_.robIdx).map(isAfter(_, query.req.bits.uop.robIdx))) 186e4f69d78Ssfencevma val matchMask = allocatedUInt & 187e4f69d78Ssfencevma RegNext(paddrModule.io.releaseViolationMmask(w).asUInt) & 188e4f69d78Ssfencevma RegNext(robIdxMask.asUInt) 189e4f69d78Ssfencevma // Load-to-Load violation check result 190e4f69d78Ssfencevma val ldLdViolationMask = WireInit(matchMask & RegNext(released.asUInt)) 191e4f69d78Ssfencevma ldLdViolationMask.suggestName("ldLdViolationMask_" + w) 192e4f69d78Ssfencevma query.resp.bits.replayFromFetch := ldLdViolationMask.orR || RegNext(ldLdViolation(w)) 193e4f69d78Ssfencevma } 194e4f69d78Ssfencevma 195e4f69d78Ssfencevma (0 until LoadPipelineWidth).map(w => { 196e4f69d78Ssfencevma ldLdViolation(w) := (release1Cycle.valid && io.query(w).req.bits.paddr(PAddrBits-1, DCacheLineOffset) === release1Cycle.bits.paddr(PAddrBits-1, DCacheLineOffset)) || 197e4f69d78Ssfencevma (release2Cycle.valid && io.query(w).req.bits.paddr(PAddrBits-1, DCacheLineOffset) === release2Cycle.bits.paddr(PAddrBits-1, DCacheLineOffset)) 198e4f69d78Ssfencevma }) 199e4f69d78Ssfencevma 200e4f69d78Ssfencevma // When io.release.valid (release1cycle.valid), it uses the last ld-ld paddr cam port to 201e4f69d78Ssfencevma // update release flag in 1 cycle 202e4f69d78Ssfencevma val releaseVioMask = Reg(Vec(LoadQueueRARSize, Bool())) 203e4f69d78Ssfencevma when (release1Cycle.valid) { 204e4f69d78Ssfencevma paddrModule.io.releaseMdata.takeRight(1)(0) := release1Cycle.bits.paddr 205e4f69d78Ssfencevma } 206e4f69d78Ssfencevma 207e4f69d78Ssfencevma (0 until LoadQueueRARSize).map(i => { 208e4f69d78Ssfencevma when (RegNext(paddrModule.io.releaseMmask.takeRight(1)(0)(i) && allocated(i) && release1Cycle.valid)) { 209e4f69d78Ssfencevma // Note: if a load has missed in dcache and is waiting for refill in load queue, 210e4f69d78Ssfencevma // its released flag still needs to be set as true if addr matches. 211e4f69d78Ssfencevma released(i) := true.B 212e4f69d78Ssfencevma } 213e4f69d78Ssfencevma }) 214e4f69d78Ssfencevma 215e4f69d78Ssfencevma io.lqFull := freeList.io.empty 216e4f69d78Ssfencevma 217e4f69d78Ssfencevma // perf cnt 218e4f69d78Ssfencevma val canEnqCount = PopCount(io.query.map(_.req.fire)) 219e4f69d78Ssfencevma val validCount = freeList.io.validCount 220e4f69d78Ssfencevma val allowEnqueue = validCount <= (LoadQueueRARSize - LoadPipelineWidth).U 221e4f69d78Ssfencevma val ldLdViolationCount = PopCount(io.query.map(_.resp).map(resp => resp.valid && resp.bits.replayFromFetch)) 222e4f69d78Ssfencevma 223e4f69d78Ssfencevma QueuePerf(LoadQueueRARSize, validCount, !allowEnqueue) 224e4f69d78Ssfencevma XSPerfAccumulate("enq", canEnqCount) 225e4f69d78Ssfencevma XSPerfAccumulate("ld_ld_violation", ldLdViolationCount) 226e4f69d78Ssfencevma val perfEvents: Seq[(String, UInt)] = Seq( 227e4f69d78Ssfencevma ("enq", canEnqCount), 228e4f69d78Ssfencevma ("ld_ld_violation", ldLdViolationCount) 229e4f69d78Ssfencevma ) 230e4f69d78Ssfencevma generatePerfEvent() 231e4f69d78Ssfencevma // End 232e4f69d78Ssfencevma}