1/*************************************************************************************** 2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences 3* Copyright (c) 2020-2021 Peng Cheng Laboratory 4* 5* XiangShan is licensed under Mulan PSL v2. 6* You can use this software according to the terms and conditions of the Mulan PSL v2. 7* You may obtain a copy of Mulan PSL v2 at: 8* http://license.coscl.org.cn/MulanPSL2 9* 10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 13* 14* See the Mulan PSL v2 for more details. 15***************************************************************************************/ 16 17package xiangshan.mem 18 19import org.chipsalliance.cde.config.Parameters 20import chisel3._ 21import chisel3.util._ 22import utils._ 23import utility._ 24import xiangshan._ 25import xiangshan.cache._ 26import xiangshan.cache.{DCacheWordIO, DCacheLineIO, MemoryOpConstants} 27import xiangshan.mem._ 28import xiangshan.backend.rob.RobPtr 29 30 31// Data module define 32// These data modules are like SyncDataModuleTemplate, but support cam-like ops 33class SQAddrModule(dataWidth: Int, numEntries: Int, numRead: Int, numWrite: Int, numForward: Int)(implicit p: Parameters) extends XSModule with HasDCacheParameters { 34 val io = IO(new Bundle { 35 // sync read 36 val raddr = Input(Vec(numRead, UInt(log2Up(numEntries).W))) 37 val rdata = Output(Vec(numRead, UInt(dataWidth.W))) // rdata: store addr 38 val rlineflag = Output(Vec(numRead, Bool())) // rdata: line op flag 39 // write 40 val wen = Input(Vec(numWrite, Bool())) 41 val waddr = Input(Vec(numWrite, UInt(log2Up(numEntries).W))) 42 val wdata = Input(Vec(numWrite, UInt(dataWidth.W))) // wdata: store addr 43 val wmask = Input(Vec(numWrite, UInt((VLEN/8).W))) 44 val wlineflag = Input(Vec(numWrite, Bool())) // wdata: line op flag 45 // forward addr cam 46 val forwardMdata = Input(Vec(numForward, UInt(dataWidth.W))) // addr 47 val forwardDataMask = Input(Vec(numForward, UInt((VLEN/8).W))) // forward mask 48 val forwardMmask = Output(Vec(numForward, Vec(numEntries, Bool()))) // cam result mask 49 // debug 50 val debug_data = Output(Vec(numEntries, UInt(dataWidth.W))) 51 }) 52 53 val data = Reg(Vec(numEntries, UInt(dataWidth.W))) 54 val mask = Reg(Vec(numEntries, UInt((VLEN/8).W))) 55 val lineflag = Reg(Vec(numEntries, Bool())) // cache line match flag 56 // if lineflag == true, this address points to a whole cacheline 57 io.debug_data := data 58 59 // read ports 60 for (i <- 0 until numRead) { 61 io.rdata(i) := data(RegNext(io.raddr(i))) 62 io.rlineflag(i) := lineflag(RegNext(io.raddr(i))) 63 } 64 65 // below is the write ports (with priorities) 66 for (i <- 0 until numWrite) { 67 when (io.wen(i)) { 68 data(io.waddr(i)) := io.wdata(i) 69 mask(io.waddr(i)) := io.wmask(i) 70 lineflag(io.waddr(i)) := io.wlineflag(i) 71 } 72 } 73 74 // content addressed match 75 for (i <- 0 until numForward) { 76 for (j <- 0 until numEntries) { 77 // io.forwardMmask(i)(j) := io.forwardMdata(i)(dataWidth-1, 3) === data(j)(dataWidth-1, 3) 78 val linehit = io.forwardMdata(i)(dataWidth-1, DCacheLineOffset) === data(j)(dataWidth-1, DCacheLineOffset) 79 val hit128bit = (io.forwardMdata(i)(DCacheLineOffset-1, DCacheVWordOffset) === data(j)(DCacheLineOffset-1, DCacheVWordOffset)) && 80 (!StoreQueueForwardWithMask.B || (mask(j) & io.forwardDataMask(i)).orR) 81 io.forwardMmask(i)(j) := linehit && (hit128bit || lineflag(j)) 82 } 83 } 84 85 // DataModuleTemplate should not be used when there're any write conflicts 86 for (i <- 0 until numWrite) { 87 for (j <- i+1 until numWrite) { 88 assert(!(io.wen(i) && io.wen(j) && io.waddr(i) === io.waddr(j))) 89 } 90 } 91} 92 93class SQData8Entry(implicit p: Parameters) extends XSBundle { 94 val valid = Bool() // this byte is valid 95 val data = UInt((XLEN/8).W) 96} 97 98class SQData8Module(numEntries: Int, numRead: Int, numWrite: Int, numForward: Int)(implicit p: Parameters) extends XSModule 99 with HasDCacheParameters 100 with HasCircularQueuePtrHelper 101{ 102 val io = IO(new Bundle() { 103 // sync read port 104 val raddr = Vec(numRead, Input(UInt(log2Up(numEntries).W))) 105 val rdata = Vec(numRead, Output(new SQData8Entry)) 106 // data write port 107 val data = new Bundle() { 108 val wen = Vec(numWrite, Input(Bool())) 109 val waddr = Vec(numWrite, Input(UInt(log2Up(numEntries).W))) 110 val wdata = Vec(numWrite, Input(UInt((XLEN/8).W))) 111 } 112 // mask (data valid) write port 113 val mask = new Bundle() { 114 val wen = Vec(numWrite, Input(Bool())) 115 val waddr = Vec(numWrite, Input(UInt(log2Up(numEntries).W))) 116 val wdata = Vec(numWrite, Input(Bool())) 117 } 118 119 // st-ld forward addr cam result input, used to select forward data 120 val needForward = Input(Vec(numForward, Vec(2, UInt(numEntries.W)))) 121 // forward result valid bit generated in current cycle 122 val forwardValidFast = Vec(numForward, Output(Bool())) 123 // forward result generated in the next cycle 124 val forwardValid = Vec(numForward, Output(Bool())) // forwardValid = RegNext(forwardValidFast) 125 val forwardData = Vec(numForward, Output(UInt(8.W))) 126 }) 127 128 io := DontCare 129 130 val data = Reg(Vec(numEntries, new SQData8Entry)) 131 132 require(isPow2(StoreQueueNWriteBanks)) 133 require(StoreQueueNWriteBanks > 1) 134 def get_bank(in: UInt): UInt = in(log2Up(StoreQueueNWriteBanks) -1, 0) 135 def get_bank_index(in: UInt): UInt = in >> log2Up(StoreQueueNWriteBanks) 136 def get_vec_index(index: Int, bank: Int): Int = { 137 (index << log2Up(StoreQueueNWriteBanks)) + bank 138 } 139 140 // writeback to sq 141 // store queue data write takes 2 cycles 142 // (0 until numWrite).map(i => { 143 // when(RegNext(io.data.wen(i))){ 144 // data(RegNext(io.data.waddr(i))).data := RegNext(io.data.wdata(i)) 145 // } 146 // }) 147 (0 until numWrite).map(i => { 148 (0 until StoreQueueNWriteBanks).map(bank => { 149 val s0_wen = io.data.wen(i) && get_bank(io.data.waddr(i)) === bank.U 150 val s1_wen = RegNext(s0_wen) 151 val s1_wdata = RegEnable(io.data.wdata(i), s0_wen) 152 val s1_waddr = RegEnable(get_bank_index(io.data.waddr(i)), s0_wen) 153 val numRegsPerBank = StoreQueueSize / StoreQueueNWriteBanks 154 (0 until numRegsPerBank).map(index => { 155 when(s1_wen && s1_waddr === index.U){ 156 data(get_vec_index(index, bank)).data := s1_wdata 157 } 158 }) 159 s0_wen.suggestName("data_s0_wen_" + i +"_bank_" + bank) 160 s1_wen.suggestName("data_s1_wen_" + i +"_bank_" + bank) 161 s1_wdata.suggestName("data_s1_wdata_" + i +"_bank_" + bank) 162 s1_waddr.suggestName("data_s1_waddr_" + i +"_bank_" + bank) 163 }) 164 }) 165 166 // (0 until numWrite).map(i => { 167 // when(RegNext(io.mask.wen(i))){ 168 // data(RegNext(io.mask.waddr(i))).valid := RegNext(io.mask.wdata(i)) 169 // } 170 // }) 171 (0 until numWrite).map(i => { 172 (0 until StoreQueueNWriteBanks).map(bank => { 173 val s0_wen = io.mask.wen(i) && get_bank(io.mask.waddr(i)) === bank.U 174 val s1_wen = RegNext(s0_wen) 175 val s1_wdata = RegEnable(io.mask.wdata(i), s0_wen) 176 val s1_waddr = RegEnable(get_bank_index(io.mask.waddr(i)), s0_wen) 177 val numRegsPerBank = StoreQueueSize / StoreQueueNWriteBanks 178 (0 until numRegsPerBank).map(index => { 179 when(s1_wen && s1_waddr === index.U){ 180 data(get_vec_index(index, bank)).valid := s1_wdata 181 } 182 }) 183 s0_wen.suggestName("mask_s0_wen_" + i +"_bank_" + bank) 184 s1_wen.suggestName("mask_s1_wen_" + i +"_bank_" + bank) 185 s1_wdata.suggestName("mask_s1_wdata_" + i +"_bank_" + bank) 186 s1_waddr.suggestName("mask_s1_waddr_" + i +"_bank_" + bank) 187 }) 188 }) 189 190 // destorequeue read data 191 (0 until numRead).map(i => { 192 io.rdata(i) := data(RegNext(io.raddr(i))) 193 }) 194 195 // DataModuleTemplate should not be used when there're any write conflicts 196 for (i <- 0 until numWrite) { 197 for (j <- i+1 until numWrite) { 198 assert(!(io.data.wen(i) && io.data.wen(j) && io.data.waddr(i) === io.data.waddr(j))) 199 } 200 } 201 for (i <- 0 until numWrite) { 202 for (j <- i+1 until numWrite) { 203 assert(!(io.mask.wen(i) && io.mask.wen(j) && io.mask.waddr(i) === io.mask.waddr(j))) 204 } 205 } 206 207 // forwarding 208 // Compare ringBufferTail (deqPtr) and forward.sqIdx, we have two cases: 209 // (1) if they have the same flag, we need to check range(tail, sqIdx) 210 // (2) if they have different flags, we need to check range(tail, VirtualLoadQueueSize) and range(0, sqIdx) 211 // Forward1: Mux(same_flag, range(tail, sqIdx), range(tail, VirtualLoadQueueSize)) 212 // Forward2: Mux(same_flag, 0.U, range(0, sqIdx) ) 213 // i.e. forward1 is the target entries with the same flag bits and forward2 otherwise 214 215 // entry with larger index should have higher priority since it's data is younger 216 217 (0 until numForward).map(i => { 218 // parallel fwd logic 219 val matchResultVec = Wire(Vec(numEntries * 2, new FwdEntry)) 220 221 def parallelFwd(xs: Seq[Data]): Data = { 222 ParallelOperation(xs, (a: Data, b: Data) => { 223 val l = a.asTypeOf(new FwdEntry) 224 val r = b.asTypeOf(new FwdEntry) 225 val res = Wire(new FwdEntry) 226 res.validFast := l.validFast || r.validFast 227 res.valid := l.valid || r.valid 228 // res.valid := RegNext(res.validFast) 229 res.data := Mux(r.valid, r.data, l.data) 230 res 231 }) 232 } 233 234 for (j <- 0 until numEntries) { 235 val needCheck0 = io.needForward(i)(0)(j) 236 val needCheck1 = io.needForward(i)(1)(j) 237 val needCheck0Reg = RegNext(needCheck0) 238 val needCheck1Reg = RegNext(needCheck1) 239 240 matchResultVec(j).validFast := needCheck0 && data(j).valid 241 matchResultVec(j).valid := needCheck0Reg && data(j).valid 242 matchResultVec(j).data := data(j).data 243 matchResultVec(numEntries + j).validFast := needCheck1 && data(j).valid 244 matchResultVec(numEntries + j).valid := needCheck1Reg && data(j).valid 245 matchResultVec(numEntries + j).data := data(j).data 246 } 247 248 val parallelFwdResult = parallelFwd(matchResultVec).asTypeOf(new FwdEntry) 249 250 // validFast is generated the same cycle with query 251 io.forwardValidFast(i) := parallelFwdResult.validFast 252 // valid is generated 1 cycle after query request 253 io.forwardValid(i) := parallelFwdResult.valid 254 // data is generated 1 cycle after query request 255 io.forwardData(i) := parallelFwdResult.data 256 }) 257} 258 259class SQDataEntry(implicit p: Parameters) extends XSBundle { 260 val mask = UInt((VLEN/8).W) 261 val data = UInt(VLEN.W) 262} 263 264// SQDataModule is a wrapper of SQData8Modules 265class SQDataModule(numEntries: Int, numRead: Int, numWrite: Int, numForward: Int)(implicit p: Parameters) extends XSModule with HasDCacheParameters with HasCircularQueuePtrHelper { 266 val io = IO(new Bundle() { 267 // sync read port 268 val raddr = Vec(numRead, Input(UInt(log2Up(numEntries).W))) 269 val rdata = Vec(numRead, Output(new SQDataEntry)) 270 // data write port 271 val data = new Bundle() { 272 val wen = Vec(numWrite, Input(Bool())) 273 val waddr = Vec(numWrite, Input(UInt(log2Up(numEntries).W))) 274 val wdata = Vec(numWrite, Input(UInt(VLEN.W))) 275 } 276 // mask (data valid) write port 277 val mask = new Bundle() { 278 val wen = Vec(numWrite, Input(Bool())) 279 val waddr = Vec(numWrite, Input(UInt(log2Up(numEntries).W))) 280 val wdata = Vec(numWrite, Input(UInt((VLEN/8).W))) 281 } 282 283 // st-ld forward addr cam result input, used to select forward data 284 val needForward = Input(Vec(numForward, Vec(2, UInt(numEntries.W)))) 285 // forward result valid bit generated in current cycle 286 val forwardMaskFast = Vec(numForward, Output(Vec((VLEN/8), Bool()))) 287 // forward result generated in the next cycle 288 val forwardMask = Vec(numForward, Output(Vec((VLEN/8), Bool()))) // forwardMask = RegNext(forwardMaskFast) 289 val forwardData = Vec(numForward, Output(Vec((VLEN/8), UInt(8.W)))) 290 }) 291 292 val data16 = Seq.fill(16)(Module(new SQData8Module(numEntries, numRead, numWrite, numForward))) 293 294 // writeback to lq/sq 295 for (i <- 0 until numWrite) { 296 // write to data16 297 for (j <- 0 until 16) { 298 data16(j).io.mask.waddr(i) := io.mask.waddr(i) 299 data16(j).io.mask.wdata(i) := io.mask.wdata(i)(j) 300 data16(j).io.mask.wen(i) := io.mask.wen(i) 301 data16(j).io.data.waddr(i) := io.data.waddr(i) 302 data16(j).io.data.wdata(i) := io.data.wdata(i)(8*(j+1)-1, 8*j) 303 data16(j).io.data.wen(i) := io.data.wen(i) 304 } 305 } 306 307 // destorequeue read data 308 for (i <- 0 until numRead) { 309 for (j <- 0 until 16) { 310 data16(j).io.raddr(i) := io.raddr(i) 311 } 312 io.rdata(i).mask := VecInit((0 until 16).map(j => data16(j).io.rdata(i).valid)).asUInt 313 io.rdata(i).data := VecInit((0 until 16).map(j => data16(j).io.rdata(i).data)).asUInt 314 } 315 316 // DataModuleTemplate should not be used when there're any write conflicts 317 for (i <- 0 until numWrite) { 318 for (j <- i+1 until numWrite) { 319 assert(!(io.data.wen(i) && io.data.wen(j) && io.data.waddr(i) === io.data.waddr(j))) 320 } 321 } 322 for (i <- 0 until numWrite) { 323 for (j <- i+1 until numWrite) { 324 assert(!(io.mask.wen(i) && io.mask.wen(j) && io.mask.waddr(i) === io.mask.waddr(j))) 325 } 326 } 327 328 (0 until numForward).map(i => { 329 // parallel fwd logic 330 for (j <- 0 until 16) { 331 data16(j).io.needForward(i) <> io.needForward(i) 332 io.forwardMaskFast(i) := VecInit((0 until 16).map(j => data16(j).io.forwardValidFast(i))) 333 io.forwardMask(i) := VecInit((0 until 16).map(j => data16(j).io.forwardValid(i))) 334 io.forwardData(i) := VecInit((0 until 16).map(j => data16(j).io.forwardData(i))) 335 } 336 }) 337} 338