1/*************************************************************************************** 2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences 3* Copyright (c) 2020-2021 Peng Cheng Laboratory 4* 5* XiangShan is licensed under Mulan PSL v2. 6* You can use this software according to the terms and conditions of the Mulan PSL v2. 7* You may obtain a copy of Mulan PSL v2 at: 8* http://license.coscl.org.cn/MulanPSL2 9* 10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 13* 14* See the Mulan PSL v2 for more details. 15***************************************************************************************/ 16 17package xiangshan.mem 18 19import org.chipsalliance.cde.config.Parameters 20import chisel3._ 21import chisel3.util._ 22import utils._ 23import utility._ 24import xiangshan._ 25import xiangshan.cache._ 26import xiangshan.cache.{DCacheWordIO, DCacheLineIO, MemoryOpConstants} 27import xiangshan.mem._ 28import xiangshan.backend.rob.RobPtr 29 30 31// Data module define 32// These data modules are like SyncDataModuleTemplate, but support cam-like ops 33class SQAddrModule(dataWidth: Int, numEntries: Int, numRead: Int, numWrite: Int, numForward: Int)(implicit p: Parameters) extends XSModule with HasDCacheParameters { 34 val io = IO(new Bundle { 35 // sync read 36 val raddr = Input(Vec(numRead, UInt(log2Up(numEntries).W))) 37 val rdata = Output(Vec(numRead, UInt(dataWidth.W))) // rdata: store addr 38 val rlineflag = Output(Vec(numRead, Bool())) // rdata: line op flag 39 // write 40 val wen = Input(Vec(numWrite, Bool())) 41 val waddr = Input(Vec(numWrite, UInt(log2Up(numEntries).W))) 42 val wdata = Input(Vec(numWrite, UInt(dataWidth.W))) // wdata: store addr 43 val wmask = Input(Vec(numWrite, UInt((VLEN/8).W))) 44 val wlineflag = Input(Vec(numWrite, Bool())) // wdata: line op flag 45 // forward addr cam 46 val forwardMdata = Input(Vec(numForward, UInt(dataWidth.W))) // addr 47 val forwardDataMask = Input(Vec(numForward, UInt((VLEN/8).W))) // forward mask 48 val forwardMmask = Output(Vec(numForward, Vec(numEntries, Bool()))) // cam result mask 49 // debug 50 val debug_data = Output(Vec(numEntries, UInt(dataWidth.W))) 51 }) 52 53 val data = Reg(Vec(numEntries, UInt(dataWidth.W))) 54 val mask = Reg(Vec(numEntries, UInt((VLEN/8).W))) 55 val lineflag = Reg(Vec(numEntries, Bool())) // cache line match flag 56 // if lineflag == true, this address points to a whole cacheline 57 io.debug_data := data 58 59 // read ports 60 for (i <- 0 until numRead) { 61 io.rdata(i) := data(RegNext(io.raddr(i))) 62 io.rlineflag(i) := lineflag(RegNext(io.raddr(i))) 63 } 64 65 // below is the write ports (with priorities) 66 for (i <- 0 until numWrite) { 67 when (io.wen(i)) { 68 data(io.waddr(i)) := io.wdata(i) 69 mask(io.waddr(i)) := io.wmask(i) 70 lineflag(io.waddr(i)) := io.wlineflag(i) 71 } 72 } 73 74 // content addressed match 75 for (i <- 0 until numForward) { 76 for (j <- 0 until numEntries) { 77 // io.forwardMmask(i)(j) := io.forwardMdata(i)(dataWidth-1, 3) === data(j)(dataWidth-1, 3) 78 val linehit = io.forwardMdata(i)(dataWidth-1, DCacheLineOffset) === data(j)(dataWidth-1, DCacheLineOffset) 79 val hit128bit = (io.forwardMdata(i)(DCacheLineOffset-1, DCacheVWordOffset) === data(j)(DCacheLineOffset-1, DCacheVWordOffset)) && 80 (!StoreQueueForwardWithMask.B || (mask(j) & io.forwardDataMask(i)).orR) 81 io.forwardMmask(i)(j) := linehit && (hit128bit || lineflag(j)) 82 } 83 } 84 85 // DataModuleTemplate should not be used when there're any write conflicts 86 for (i <- 0 until numWrite) { 87 for (j <- i+1 until numWrite) { 88 assert(!(io.wen(i) && io.wen(j) && io.waddr(i) === io.waddr(j))) 89 } 90 } 91} 92 93class SQData8Entry(implicit p: Parameters) extends XSBundle { 94 val valid = Bool() // this byte is valid 95 val data = UInt((XLEN/8).W) 96} 97 98class SQData8Module(numEntries: Int, numRead: Int, numWrite: Int, numForward: Int)(implicit p: Parameters) extends XSModule 99 with HasDCacheParameters 100 with HasCircularQueuePtrHelper 101{ 102 val io = IO(new Bundle() { 103 // sync read port 104 val raddr = Vec(numRead, Input(UInt(log2Up(numEntries).W))) 105 val rdata = Vec(numRead, Output(new SQData8Entry)) 106 // data write port 107 val data = new Bundle() { 108 val wen = Vec(numWrite, Input(Bool())) 109 val waddr = Vec(numWrite, Input(UInt(log2Up(numEntries).W))) 110 val wdata = Vec(numWrite, Input(UInt((XLEN/8).W))) 111 } 112 // mask (data valid) write port 113 val mask = new Bundle() { 114 val wen = Vec(numWrite, Input(Bool())) 115 val waddr = Vec(numWrite, Input(UInt(log2Up(numEntries).W))) 116 val wdata = Vec(numWrite, Input(Bool())) 117 } 118 119 // st-ld forward addr cam result input, used to select forward data 120 val needForward = Input(Vec(numForward, Vec(2, UInt(numEntries.W)))) 121 // forward result valid bit generated in current cycle 122 val forwardValidFast = Vec(numForward, Output(Bool())) 123 // forward result generated in the next cycle 124 val forwardValid = Vec(numForward, Output(Bool())) // forwardValid = RegNext(forwardValidFast) 125 val forwardData = Vec(numForward, Output(UInt(8.W))) 126 }) 127 128 io := DontCare 129 130 val data = Reg(Vec(numEntries, new SQData8Entry)) 131 132 require(isPow2(StoreQueueNWriteBanks)) 133 require(StoreQueueNWriteBanks > 1) 134 def get_bank(in: UInt): UInt = in(log2Up(StoreQueueNWriteBanks) -1, 0) 135 def get_bank_index(in: UInt): UInt = in >> log2Up(StoreQueueNWriteBanks) 136 def get_vec_index(index: Int, bank: Int): Int = { 137 (index << log2Up(StoreQueueNWriteBanks)) + bank 138 } 139 140 // writeback to sq 141 // store queue data write takes 2 cycles 142 // (0 until numWrite).map(i => { 143 // when(RegNext(io.data.wen(i))){ 144 // data(RegNext(io.data.waddr(i))).data := RegNext(io.data.wdata(i)) 145 // } 146 // }) 147 (0 until numWrite).map(i => { 148 (0 until StoreQueueNWriteBanks).map(bank => { 149 val s0_wen = io.data.wen(i) && get_bank(io.data.waddr(i)) === bank.U 150 val s1_wen = RegNext(s0_wen) 151 val s1_wdata = RegEnable(io.data.wdata(i), s0_wen) 152 val s1_waddr = RegEnable(get_bank_index(io.data.waddr(i)), s0_wen) 153 val numRegsPerBank = StoreQueueSize / StoreQueueNWriteBanks 154 (0 until numRegsPerBank).map(index => { 155 when(s1_wen && s1_waddr === index.U){ 156 data(get_vec_index(index, bank)).data := s1_wdata 157 } 158 }) 159 s0_wen.suggestName("data_s0_wen_" + i +"_bank_" + bank) 160 s1_wen.suggestName("data_s1_wen_" + i +"_bank_" + bank) 161 s1_wdata.suggestName("data_s1_wdata_" + i +"_bank_" + bank) 162 s1_waddr.suggestName("data_s1_waddr_" + i +"_bank_" + bank) 163 }) 164 }) 165 166 // (0 until numWrite).map(i => { 167 // when(RegNext(io.mask.wen(i))){ 168 // data(RegNext(io.mask.waddr(i))).valid := RegNext(io.mask.wdata(i)) 169 // } 170 // }) 171 (0 until numWrite).map(i => { 172 (0 until StoreQueueNWriteBanks).map(bank => { 173 val s0_wen = io.mask.wen(i) && get_bank(io.mask.waddr(i)) === bank.U 174 val s1_wen = RegNext(s0_wen) 175 val s1_wdata = RegEnable(io.mask.wdata(i), s0_wen) 176 val s1_waddr = RegEnable(get_bank_index(io.mask.waddr(i)), s0_wen) 177 val numRegsPerBank = StoreQueueSize / StoreQueueNWriteBanks 178 (0 until numRegsPerBank).map(index => { 179 when(s1_wen && s1_waddr === index.U){ 180 data(get_vec_index(index, bank)).valid := s1_wdata 181 } 182 }) 183 s0_wen.suggestName("mask_s0_wen_" + i +"_bank_" + bank) 184 s1_wen.suggestName("mask_s1_wen_" + i +"_bank_" + bank) 185 s1_wdata.suggestName("mask_s1_wdata_" + i +"_bank_" + bank) 186 s1_waddr.suggestName("mask_s1_waddr_" + i +"_bank_" + bank) 187 }) 188 }) 189 190 // destorequeue read data 191 (0 until numRead).map(i => { 192 io.rdata(i) := data(RegNext(io.raddr(i))) 193 }) 194 195 // DataModuleTemplate should not be used when there're any write conflicts 196 for (i <- 0 until numWrite) { 197 for (j <- i+1 until numWrite) { 198 assert(!(io.data.wen(i) && io.data.wen(j) && io.data.waddr(i) === io.data.waddr(j))) 199 } 200 } 201 for (i <- 0 until numWrite) { 202 for (j <- i+1 until numWrite) { 203 assert(!(io.mask.wen(i) && io.mask.wen(j) && io.mask.waddr(i) === io.mask.waddr(j))) 204 } 205 } 206 207 // forwarding 208 // Compare ringBufferTail (deqPtr) and forward.sqIdx, we have two cases: 209 // (1) if they have the same flag, we need to check range(tail, sqIdx) 210 // (2) if they have different flags, we need to check range(tail, VirtualLoadQueueSize) and range(0, sqIdx) 211 // Forward1: Mux(same_flag, range(tail, sqIdx), range(tail, VirtualLoadQueueSize)) 212 // Forward2: Mux(same_flag, 0.U, range(0, sqIdx) ) 213 // i.e. forward1 is the target entries with the same flag bits and forward2 otherwise 214 215 // entry with larger index should have higher priority since it's data is younger 216 217 (0 until numForward).map(i => { 218 // parallel fwd logic 219 val matchResultVec = Wire(Vec(numEntries * 2, new FwdEntry)) 220 221 def parallelFwd(xs: Seq[Data]): Data = { 222 ParallelOperation(xs, (a: Data, b: Data) => { 223 val l = a.asTypeOf(new FwdEntry) 224 val r = b.asTypeOf(new FwdEntry) 225 val res = Wire(new FwdEntry) 226 res.validFast := l.validFast || r.validFast 227 res.valid := l.valid || r.valid 228 // res.valid := RegNext(res.validFast) 229 res.data := Mux(r.valid, r.data, l.data) 230 res 231 }) 232 } 233 234 for (j <- 0 until numEntries) { 235 val needCheck0 = io.needForward(i)(0)(j) 236 val needCheck1 = io.needForward(i)(1)(j) 237 val needCheck0Reg = RegNext(needCheck0) 238 val needCheck1Reg = RegNext(needCheck1) 239 (0 until XLEN / 8).foreach(k => { 240 matchResultVec(j).validFast := needCheck0 && data(j).valid 241 matchResultVec(j).valid := needCheck0Reg && data(j).valid 242 matchResultVec(j).data := data(j).data 243 matchResultVec(numEntries + j).validFast := needCheck1 && data(j).valid 244 matchResultVec(numEntries + j).valid := needCheck1Reg && data(j).valid 245 matchResultVec(numEntries + j).data := data(j).data 246 }) 247 } 248 249 val parallelFwdResult = parallelFwd(matchResultVec).asTypeOf(new FwdEntry) 250 251 // validFast is generated the same cycle with query 252 io.forwardValidFast(i) := parallelFwdResult.validFast 253 // valid is generated 1 cycle after query request 254 io.forwardValid(i) := parallelFwdResult.valid 255 // data is generated 1 cycle after query request 256 io.forwardData(i) := parallelFwdResult.data 257 }) 258} 259 260class SQDataEntry(implicit p: Parameters) extends XSBundle { 261 val mask = UInt((VLEN/8).W) 262 val data = UInt(VLEN.W) 263} 264 265// SQDataModule is a wrapper of SQData8Modules 266class SQDataModule(numEntries: Int, numRead: Int, numWrite: Int, numForward: Int)(implicit p: Parameters) extends XSModule with HasDCacheParameters with HasCircularQueuePtrHelper { 267 val io = IO(new Bundle() { 268 // sync read port 269 val raddr = Vec(numRead, Input(UInt(log2Up(numEntries).W))) 270 val rdata = Vec(numRead, Output(new SQDataEntry)) 271 // data write port 272 val data = new Bundle() { 273 val wen = Vec(numWrite, Input(Bool())) 274 val waddr = Vec(numWrite, Input(UInt(log2Up(numEntries).W))) 275 val wdata = Vec(numWrite, Input(UInt(VLEN.W))) 276 } 277 // mask (data valid) write port 278 val mask = new Bundle() { 279 val wen = Vec(numWrite, Input(Bool())) 280 val waddr = Vec(numWrite, Input(UInt(log2Up(numEntries).W))) 281 val wdata = Vec(numWrite, Input(UInt((VLEN/8).W))) 282 } 283 284 // st-ld forward addr cam result input, used to select forward data 285 val needForward = Input(Vec(numForward, Vec(2, UInt(numEntries.W)))) 286 // forward result valid bit generated in current cycle 287 val forwardMaskFast = Vec(numForward, Output(Vec((VLEN/8), Bool()))) 288 // forward result generated in the next cycle 289 val forwardMask = Vec(numForward, Output(Vec((VLEN/8), Bool()))) // forwardMask = RegNext(forwardMaskFast) 290 val forwardData = Vec(numForward, Output(Vec((VLEN/8), UInt(8.W)))) 291 }) 292 293 val data16 = Seq.fill(16)(Module(new SQData8Module(numEntries, numRead, numWrite, numForward))) 294 295 // writeback to lq/sq 296 for (i <- 0 until numWrite) { 297 // write to data16 298 for (j <- 0 until 16) { 299 data16(j).io.mask.waddr(i) := io.mask.waddr(i) 300 data16(j).io.mask.wdata(i) := io.mask.wdata(i)(j) 301 data16(j).io.mask.wen(i) := io.mask.wen(i) 302 data16(j).io.data.waddr(i) := io.data.waddr(i) 303 data16(j).io.data.wdata(i) := io.data.wdata(i)(8*(j+1)-1, 8*j) 304 data16(j).io.data.wen(i) := io.data.wen(i) 305 } 306 } 307 308 // destorequeue read data 309 for (i <- 0 until numRead) { 310 for (j <- 0 until 16) { 311 data16(j).io.raddr(i) := io.raddr(i) 312 } 313 io.rdata(i).mask := VecInit((0 until 16).map(j => data16(j).io.rdata(i).valid)).asUInt 314 io.rdata(i).data := VecInit((0 until 16).map(j => data16(j).io.rdata(i).data)).asUInt 315 } 316 317 // DataModuleTemplate should not be used when there're any write conflicts 318 for (i <- 0 until numWrite) { 319 for (j <- i+1 until numWrite) { 320 assert(!(io.data.wen(i) && io.data.wen(j) && io.data.waddr(i) === io.data.waddr(j))) 321 } 322 } 323 for (i <- 0 until numWrite) { 324 for (j <- i+1 until numWrite) { 325 assert(!(io.mask.wen(i) && io.mask.wen(j) && io.mask.waddr(i) === io.mask.waddr(j))) 326 } 327 } 328 329 (0 until numForward).map(i => { 330 // parallel fwd logic 331 for (j <- 0 until 16) { 332 data16(j).io.needForward(i) <> io.needForward(i) 333 io.forwardMaskFast(i) := VecInit((0 until 16).map(j => data16(j).io.forwardValidFast(i))) 334 io.forwardMask(i) := VecInit((0 until 16).map(j => data16(j).io.forwardValid(i))) 335 io.forwardData(i) := VecInit((0 until 16).map(j => data16(j).io.forwardData(i))) 336 } 337 }) 338} 339