1/*************************************************************************************** 2* Copyright (c) 2024 Beijing Institute of Open Source Chip (BOSC) 3* Copyright (c) 2020-2024 Institute of Computing Technology, Chinese Academy of Sciences 4* Copyright (c) 2020-2021 Peng Cheng Laboratory 5* 6* XiangShan is licensed under Mulan PSL v2. 7* You can use this software according to the terms and conditions of the Mulan PSL v2. 8* You may obtain a copy of Mulan PSL v2 at: 9* http://license.coscl.org.cn/MulanPSL2 10* 11* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 12* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 13* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 14* 15* See the Mulan PSL v2 for more details. 16* 17* 18* Acknowledgement 19* 20* This implementation is inspired by several key papers: 21* [1] Gurindar S. Sohi, and Manoj Franklin. "[High-bandwidth data memory systems for superscalar processors.] 22* (https://doi.org/10.1145/106972.106980)" 4th International Conference on Architectural Support for Programming 23* Languages and Operating Systems (ASPLOS). 1991. 24***************************************************************************************/ 25 26package xiangshan.cache 27 28import org.chipsalliance.cde.config.Parameters 29import chisel3._ 30import utils._ 31import utility._ 32import chisel3.util._ 33import freechips.rocketchip.tilelink.{ClientMetadata, TLClientParameters, TLEdgeOut} 34import xiangshan.mem.LqPtr 35import xiangshan.{L1CacheErrorInfo, XSCoreParamsKey} 36 37import scala.math.max 38 39class BankConflictDB(implicit p: Parameters) extends DCacheBundle{ 40 val addr = Vec(LoadPipelineWidth, Bits(PAddrBits.W)) 41 val set_index = Vec(LoadPipelineWidth, UInt((DCacheAboveIndexOffset - DCacheSetOffset).W)) 42 val bank_index = Vec(VLEN/DCacheSRAMRowBits, UInt((DCacheSetOffset - DCacheBankOffset).W)) 43 val way_index = UInt(wayBits.W) 44 val fake_rr_bank_conflict = Bool() 45} 46 47class L1BankedDataReadReq(implicit p: Parameters) extends DCacheBundle 48{ 49 val way_en = Bits(DCacheWays.W) 50 val addr = Bits(PAddrBits.W) 51} 52 53class L1BankedDataReadReqWithMask(implicit p: Parameters) extends DCacheBundle 54{ 55 val way_en = Bits(DCacheWays.W) 56 val addr = Bits(PAddrBits.W) 57 val bankMask = Bits(DCacheBanks.W) 58 val kill = Bool() 59 val lqIdx = new LqPtr 60} 61 62class L1BankedDataReadLineReq(implicit p: Parameters) extends L1BankedDataReadReq 63{ 64 val rmask = Bits(DCacheBanks.W) 65} 66 67// Now, we can write a cache-block in a single cycle 68class L1BankedDataWriteReq(implicit p: Parameters) extends L1BankedDataReadReq 69{ 70 val wmask = Bits(DCacheBanks.W) 71 val data = Vec(DCacheBanks, Bits(DCacheSRAMRowBits.W)) 72} 73 74// cache-block write request without data 75class L1BankedDataWriteReqCtrl(implicit p: Parameters) extends L1BankedDataReadReq 76 77class L1BankedDataReadResult(implicit p: Parameters) extends DCacheBundle 78{ 79 // you can choose which bank to read to save power 80 val ecc = Bits(dataECCBits.W) 81 val raw_data = Bits(DCacheSRAMRowBits.W) 82 val error_delayed = Bool() // 1 cycle later than data resp 83 84 def asECCData() = { 85 Cat(ecc, raw_data) 86 } 87} 88 89class DataSRAMBankWriteReq(implicit p: Parameters) extends DCacheBundle { 90 val en = Bool() 91 val addr = UInt() 92 val way_en = UInt(DCacheWays.W) 93 val data = UInt(encDataBits.W) 94} 95 96// wrap a sram 97class DataSRAM(bankIdx: Int, wayIdx: Int)(implicit p: Parameters) extends DCacheModule { 98 val io = IO(new Bundle() { 99 val w = new Bundle() { 100 val en = Input(Bool()) 101 val addr = Input(UInt()) 102 val data = Input(UInt(encDataBits.W)) 103 } 104 105 val r = new Bundle() { 106 val en = Input(Bool()) 107 val addr = Input(UInt()) 108 val data = Output(UInt(encDataBits.W)) 109 } 110 }) 111 112 // data sram 113 val data_sram = Module(new SRAMTemplate( 114 Bits(encDataBits.W), 115 set = DCacheSets / DCacheSetDiv, 116 way = 1, 117 shouldReset = false, 118 holdRead = false, 119 singlePort = true 120 )) 121 122 data_sram.io.w.req.valid := io.w.en 123 data_sram.io.w.req.bits.apply( 124 setIdx = io.w.addr, 125 data = io.w.data, 126 waymask = 1.U 127 ) 128 data_sram.io.r.req.valid := io.r.en 129 data_sram.io.r.req.bits.apply(setIdx = io.r.addr) 130 io.r.data := data_sram.io.r.resp.data(0) 131 XSPerfAccumulate("part_data_read_counter", data_sram.io.r.req.valid) 132 133 def dump_r() = { 134 when(RegNext(io.r.en)) { 135 XSDebug("bank read set %x bank %x way %x data %x\n", 136 RegEnable(io.r.addr, io.r.en), 137 bankIdx.U, 138 wayIdx.U, 139 io.r.data 140 ) 141 } 142 } 143 144 def dump_w() = { 145 when(io.w.en) { 146 XSDebug("bank write set %x bank %x way %x data %x\n", 147 io.w.addr, 148 bankIdx.U, 149 wayIdx.U, 150 io.w.data 151 ) 152 } 153 } 154 155 def dump() = { 156 dump_w() 157 dump_r() 158 } 159} 160 161// wrap data rows of 8 ways 162class DataSRAMBank(index: Int)(implicit p: Parameters) extends DCacheModule { 163 val io = IO(new Bundle() { 164 val w = Input(new DataSRAMBankWriteReq) 165 166 val r = new Bundle() { 167 val en = Input(Bool()) 168 val addr = Input(UInt()) 169 val data = Output(Vec(DCacheWays, UInt(encDataBits.W))) 170 } 171 }) 172 173 assert(RegNext(!io.w.en || PopCount(io.w.way_en) <= 1.U)) 174 175 // external controls do not read and write at the same time 176 val w_info = io.w 177 // val rw_bypass = RegNext(io.w.addr === io.r.addr && io.w.way_en === io.r.way_en && io.w.en) 178 179 // multiway data bank 180 val data_bank = Seq.fill(DCacheWays) { 181 Module(new SRAMTemplate( 182 Bits(encDataBits.W), 183 set = DCacheSets / DCacheSetDiv, 184 way = 1, 185 shouldReset = false, 186 holdRead = false, 187 singlePort = true 188 )) 189 } 190 191 for (w <- 0 until DCacheWays) { 192 val wen = w_info.en && w_info.way_en(w) 193 data_bank(w).io.w.req.valid := wen 194 data_bank(w).io.w.req.bits.apply( 195 setIdx = w_info.addr, 196 data = w_info.data, 197 waymask = 1.U 198 ) 199 data_bank(w).io.r.req.valid := io.r.en 200 data_bank(w).io.r.req.bits.apply(setIdx = io.r.addr) 201 data_bank(w).clock := ClockGate(false.B, io.r.en | (io.w.en & io.w.way_en(w)), clock) 202 } 203 XSPerfAccumulate("part_data_read_counter", PopCount(Cat(data_bank.map(_.io.r.req.valid)))) 204 205 io.r.data := data_bank.map(_.io.r.resp.data(0)) 206 207 def dump_r() = { 208 when(RegNext(io.r.en)) { 209 XSDebug("bank read addr %x data %x\n", 210 RegEnable(io.r.addr, io.r.en), 211 io.r.data.asUInt 212 ) 213 } 214 } 215 216 def dump_w() = { 217 when(io.w.en) { 218 XSDebug("bank write addr %x way_en %x data %x\n", 219 io.w.addr, 220 io.w.way_en, 221 io.w.data 222 ) 223 } 224 } 225 226 def dump() = { 227 dump_w() 228 dump_r() 229 } 230} 231 232case object HasDataEccParam 233 234// Banked DCache Data 235// ----------------------------------------------------------------- 236// | Bank0 | Bank1 | Bank2 | Bank3 | Bank4 | Bank5 | Bank6 | Bank7 | 237// ----------------------------------------------------------------- 238// | Way0 | Way0 | Way0 | Way0 | Way0 | Way0 | Way0 | Way0 | 239// | Way1 | Way1 | Way1 | Way1 | Way1 | Way1 | Way1 | Way1 | 240// | .... | .... | .... | .... | .... | .... | .... | .... | 241// ----------------------------------------------------------------- 242abstract class AbstractBankedDataArray(implicit p: Parameters) extends DCacheModule 243{ 244 val DataEccParam = if(EnableDataEcc) Some(HasDataEccParam) else None 245 val ReadlinePortErrorIndex = LoadPipelineWidth 246 val io = IO(new DCacheBundle { 247 // load pipeline read word req 248 val read = Vec(LoadPipelineWidth, Flipped(DecoupledIO(new L1BankedDataReadReqWithMask))) 249 val is128Req = Input(Vec(LoadPipelineWidth, Bool())) 250 // main pipeline read / write line req 251 val readline_intend = Input(Bool()) 252 val readline = Flipped(DecoupledIO(new L1BankedDataReadLineReq)) 253 val write = Flipped(DecoupledIO(new L1BankedDataWriteReq)) 254 val write_dup = Vec(DCacheBanks, Flipped(Decoupled(new L1BankedDataWriteReqCtrl))) 255 // data for readline and loadpipe 256 val readline_resp = Output(Vec(DCacheBanks, new L1BankedDataReadResult())) 257 val readline_error_delayed = Output(Bool()) 258 val read_resp = Output(Vec(LoadPipelineWidth, Vec(VLEN/DCacheSRAMRowBits, new L1BankedDataReadResult()))) 259 val read_error_delayed = Output(Vec(LoadPipelineWidth,Vec(VLEN/DCacheSRAMRowBits, Bool()))) 260 // val nacks = Output(Vec(LoadPipelineWidth, Bool())) 261 // val errors = Output(Vec(LoadPipelineWidth + 1, ValidIO(new L1CacheErrorInfo))) // read ports + readline port 262 // when bank_conflict, read (1) port should be ignored 263 val bank_conflict_slow = Output(Vec(LoadPipelineWidth, Bool())) 264 val disable_ld_fast_wakeup = Output(Vec(LoadPipelineWidth, Bool())) 265 // customized cache op port 266 val cacheOp = Flipped(new L1CacheInnerOpIO) 267 val cacheOp_req_dup = Vec(DCacheDupNum, Flipped(Valid(new CacheCtrlReqInfo))) 268 val cacheOp_req_bits_opCode_dup = Input(Vec(DCacheDupNum, UInt(XLEN.W))) 269 }) 270 271 def pipeMap[T <: Data](f: Int => T) = VecInit((0 until LoadPipelineWidth).map(f)) 272 273 def getECCFromEncWord(encWord: UInt) = { 274 if (EnableDataEcc) { 275 require(encWord.getWidth == encDataBits, s"encDataBits=$encDataBits != encDataBits=$encDataBits!") 276 encWord(encDataBits-1, DCacheSRAMRowBits) 277 } else { 278 0.U 279 } 280 } 281 282 def getDataFromEncWord(encWord: UInt) = { 283 encWord(DCacheSRAMRowBits-1, 0) 284 } 285 286 def asECCData(ecc: UInt, data: UInt) = { 287 if (EnableDataEcc) { 288 Cat(ecc, data) 289 } else { 290 data 291 } 292 } 293 294 def dumpRead = { 295 (0 until LoadPipelineWidth) map { w => 296 when(io.read(w).valid) { 297 XSDebug(s"DataArray Read channel: $w valid way_en: %x addr: %x\n", 298 io.read(w).bits.way_en, io.read(w).bits.addr) 299 } 300 } 301 when(io.readline.valid) { 302 XSDebug(s"DataArray Read Line, valid way_en: %x addr: %x rmask %x\n", 303 io.readline.bits.way_en, io.readline.bits.addr, io.readline.bits.rmask) 304 } 305 } 306 307 def dumpWrite = { 308 when(io.write.valid) { 309 XSDebug(s"DataArray Write valid way_en: %x addr: %x\n", 310 io.write.bits.way_en, io.write.bits.addr) 311 312 (0 until DCacheBanks) map { r => 313 XSDebug(s"cycle: $r data: %x wmask: %x\n", 314 io.write.bits.data(r), io.write.bits.wmask(r)) 315 } 316 } 317 } 318 319 def dumpResp = { 320 XSDebug(s"DataArray ReadeResp channel:\n") 321 (0 until LoadPipelineWidth) map { r => 322 XSDebug(s"cycle: $r data: %x\n", Mux(io.is128Req(r), 323 Cat(io.read_resp(r)(1).raw_data,io.read_resp(r)(0).raw_data), 324 io.read_resp(r)(0).raw_data)) 325 } 326 } 327 328 def dump() = { 329 dumpRead 330 dumpWrite 331 dumpResp 332 } 333 334 def selcetOldestPort(valid: Seq[Bool], bits: Seq[LqPtr], index: Seq[UInt]):((Bool, LqPtr), UInt) = { 335 require(valid.length == bits.length && bits.length == index.length, s"length must eq, valid:${valid.length}, bits:${bits.length}, index:${index.length}") 336 ParallelOperation(valid zip bits zip index, 337 (a: ((Bool, LqPtr), UInt), b: ((Bool, LqPtr), UInt)) => { 338 val au = a._1._2 339 val bu = b._1._2 340 val aValid = a._1._1 341 val bValid = b._1._1 342 val bSel = au > bu 343 val bits = Mux( 344 aValid && bValid, 345 Mux(bSel, b._1._2, a._1._2), 346 Mux(aValid && !bValid, a._1._2, b._1._2) 347 ) 348 val idx = Mux( 349 aValid && bValid, 350 Mux(bSel, b._2, a._2), 351 Mux(aValid && !bValid, a._2, b._2) 352 ) 353 ((aValid || bValid, bits), idx) 354 } 355 ) 356 } 357 358} 359 360// the smallest access unit is sram 361class SramedDataArray(implicit p: Parameters) extends AbstractBankedDataArray { 362 println(" DCacheType: SramedDataArray") 363 val ReduceReadlineConflict = false 364 365 io.write.ready := true.B 366 io.write_dup.foreach(_.ready := true.B) 367 368 val data_banks = List.tabulate(DCacheSetDiv)( k => List.tabulate(DCacheBanks)(i => List.tabulate(DCacheWays)(j => Module(new DataSRAM(i,j))))) 369 data_banks.map(_.map(_.map(_.dump()))) 370 371 val way_en = Wire(Vec(LoadPipelineWidth, io.read(0).bits.way_en.cloneType)) 372 val set_addrs = Wire(Vec(LoadPipelineWidth, UInt())) 373 val div_addrs = Wire(Vec(LoadPipelineWidth, UInt())) 374 val bank_addrs = Wire(Vec(LoadPipelineWidth, Vec(VLEN/DCacheSRAMRowBits, UInt()))) 375 376 val line_set_addr = addr_to_dcache_div_set(io.readline.bits.addr) 377 val line_div_addr = addr_to_dcache_div(io.readline.bits.addr) 378 // when WPU is enabled, line_way_en is all enabled when read data 379 val line_way_en = Fill(DCacheWays, 1.U) // val line_way_en = io.readline.bits.way_en 380 val line_way_en_reg = RegEnable(io.readline.bits.way_en, 0.U(DCacheWays.W),io.readline.valid) 381 382 val write_bank_mask_reg = RegEnable(io.write.bits.wmask, 0.U(DCacheBanks.W), io.write.valid) 383 val write_data_reg = RegEnable(io.write.bits.data, io.write.valid) 384 val write_valid_reg = RegNext(io.write.valid) 385 val write_valid_dup_reg = io.write_dup.map(x => RegNext(x.valid)) 386 val write_wayen_dup_reg = io.write_dup.map(x => RegEnable(x.bits.way_en, 0.U(DCacheWays.W), x.valid)) 387 val write_set_addr_dup_reg = io.write_dup.map(x => RegEnable(addr_to_dcache_div_set(x.bits.addr), x.valid)) 388 val write_div_addr_dup_reg = io.write_dup.map(x => RegEnable(addr_to_dcache_div(x.bits.addr), x.valid)) 389 390 // read data_banks and ecc_banks 391 // for single port SRAM, do not allow read and write in the same cycle 392 val rrhazard = false.B // io.readline.valid 393 (0 until LoadPipelineWidth).map(rport_index => { 394 div_addrs(rport_index) := addr_to_dcache_div(io.read(rport_index).bits.addr) 395 set_addrs(rport_index) := addr_to_dcache_div_set(io.read(rport_index).bits.addr) 396 bank_addrs(rport_index)(0) := addr_to_dcache_bank(io.read(rport_index).bits.addr) 397 bank_addrs(rport_index)(1) := bank_addrs(rport_index)(0) + 1.U 398 399 // use way_en to select a way after data read out 400 assert(!(RegNext(io.read(rport_index).fire && PopCount(io.read(rport_index).bits.way_en) > 1.U))) 401 way_en(rport_index) := io.read(rport_index).bits.way_en 402 }) 403 404 // read conflict 405 val rr_bank_conflict = Seq.tabulate(LoadPipelineWidth)(x => Seq.tabulate(LoadPipelineWidth)(y => { 406 if (x == y) { 407 false.B 408 } else { 409 io.read(x).valid && io.read(y).valid && 410 div_addrs(x) === div_addrs(y) && 411 (io.read(x).bits.bankMask & io.read(y).bits.bankMask) =/= 0.U && 412 io.read(x).bits.way_en === io.read(y).bits.way_en && 413 set_addrs(x) =/= set_addrs(y) 414 } 415 })) 416 val load_req_with_bank_conflict = rr_bank_conflict.map(_.reduce(_ || _)) 417 val load_req_valid = io.read.map(_.valid) 418 val load_req_lqIdx = io.read.map(_.bits.lqIdx) 419 val load_req_index = (0 until LoadPipelineWidth).map(_.asUInt) 420 421 422 val load_req_bank_conflict_selcet = selcetOldestPort(load_req_valid, load_req_lqIdx, load_req_index) 423 val load_req_bank_select_port = UIntToOH(load_req_bank_conflict_selcet._2).asBools 424 425 val rr_bank_conflict_oldest = (0 until LoadPipelineWidth).map(i => 426 !load_req_bank_select_port(i) && load_req_with_bank_conflict(i) 427 ) 428 429 val rrl_bank_conflict = Wire(Vec(LoadPipelineWidth, Bool())) 430 val rrl_bank_conflict_intend = Wire(Vec(LoadPipelineWidth, Bool())) 431 (0 until LoadPipelineWidth).foreach { i => 432 val judge = if (ReduceReadlineConflict) io.read(i).valid && (io.readline.bits.rmask & io.read(i).bits.bankMask) =/= 0.U && line_div_addr === div_addrs(i) && line_set_addr =/= set_addrs(i) 433 else io.read(i).valid && line_div_addr === div_addrs(i) && line_set_addr =/= set_addrs(i) 434 rrl_bank_conflict(i) := judge && io.readline.valid 435 rrl_bank_conflict_intend(i) := judge && io.readline_intend 436 } 437 val wr_bank_conflict = Seq.tabulate(LoadPipelineWidth)(x => 438 io.read(x).valid && write_valid_reg && 439 div_addrs(x) === write_div_addr_dup_reg.head && 440 way_en(x) === write_wayen_dup_reg.head && 441 (write_bank_mask_reg(bank_addrs(x)(0)) || write_bank_mask_reg(bank_addrs(x)(1)) && io.is128Req(x)) 442 ) 443 val wrl_bank_conflict = io.readline.valid && write_valid_reg && line_div_addr === write_div_addr_dup_reg.head 444 // ready 445 io.readline.ready := !(wrl_bank_conflict) 446 io.read.zipWithIndex.map { case (x, i) => x.ready := !(wr_bank_conflict(i) || rrhazard) } 447 448 val perf_multi_read = PopCount(io.read.map(_.valid)) >= 2.U 449 val bank_conflict_fast = Wire(Vec(LoadPipelineWidth, Bool())) 450 (0 until LoadPipelineWidth).foreach(i => { 451 bank_conflict_fast(i) := wr_bank_conflict(i) || rrl_bank_conflict(i) || 452 rr_bank_conflict_oldest(i) 453 io.bank_conflict_slow(i) := RegNext(bank_conflict_fast(i)) 454 io.disable_ld_fast_wakeup(i) := wr_bank_conflict(i) || rrl_bank_conflict_intend(i) || 455 (if (i == 0) 0.B else (0 until i).map(rr_bank_conflict(_)(i)).reduce(_ || _)) 456 }) 457 XSPerfAccumulate("data_array_multi_read", perf_multi_read) 458 (1 until LoadPipelineWidth).foreach(y => (0 until y).foreach(x => 459 XSPerfAccumulate(s"data_array_rr_bank_conflict_${x}_${y}", rr_bank_conflict(x)(y)) 460 )) 461 (0 until LoadPipelineWidth).foreach(i => { 462 XSPerfAccumulate(s"data_array_rrl_bank_conflict_${i}", rrl_bank_conflict(i)) 463 XSPerfAccumulate(s"data_array_rw_bank_conflict_${i}", wr_bank_conflict(i)) 464 XSPerfAccumulate(s"data_array_read_${i}", io.read(i).valid) 465 }) 466 XSPerfAccumulate("data_array_access_total", PopCount(io.read.map(_.valid))) 467 XSPerfAccumulate("data_array_read_line", io.readline.valid) 468 XSPerfAccumulate("data_array_write", io.write.valid) 469 470 val read_result = Wire(Vec(DCacheSetDiv, Vec(DCacheBanks, Vec(DCacheWays,new L1BankedDataReadResult())))) 471 val read_result_delayed = Wire(Vec(DCacheSetDiv, Vec(DCacheBanks, Vec(DCacheWays,new L1BankedDataReadResult())))) 472 val read_error_delayed_result = Wire(Vec(DCacheSetDiv, Vec(DCacheBanks, Vec(DCacheWays, Bool())))) 473 474 for (div_index <- 0 until DCacheSetDiv){ 475 for (bank_index <- 0 until DCacheBanks) { 476 for (way_index <- 0 until DCacheWays) { 477 // Set Addr & Read Way Mask 478 // 479 // Pipe 0 .... Pipe (n-1) 480 // + .... + 481 // | .... | 482 // +----+---------------+-----+ 483 // X X 484 // X +------+ Bank Addr Match 485 // +---------+----------+ 486 // | 487 // +--------+--------+ 488 // | Data Bank | 489 // +-----------------+ 490 val loadpipe_en = WireInit(VecInit(List.tabulate(LoadPipelineWidth)(i => { 491 io.read(i).valid && div_addrs(i) === div_index.U && (bank_addrs(i)(0) === bank_index.U || bank_addrs(i)(1) === bank_index.U && io.is128Req(i)) && 492 way_en(i)(way_index) && 493 !rr_bank_conflict_oldest(i) 494 }))) 495 val readline_en = Wire(Bool()) 496 if (ReduceReadlineConflict) { 497 readline_en := io.readline.valid && io.readline.bits.rmask(bank_index) && line_way_en(way_index) && div_index.U === line_div_addr 498 } else { 499 readline_en := io.readline.valid && line_way_en(way_index) && div_index.U === line_div_addr 500 } 501 val sram_set_addr = Mux(readline_en, 502 addr_to_dcache_div_set(io.readline.bits.addr), 503 PriorityMux(Seq.tabulate(LoadPipelineWidth)(i => loadpipe_en(i) -> set_addrs(i))) 504 ) 505 val read_en = loadpipe_en.asUInt.orR || readline_en 506 // read raw data 507 val data_bank = data_banks(div_index)(bank_index)(way_index) 508 data_bank.io.r.en := read_en 509 data_bank.io.r.addr := sram_set_addr 510 511 read_result(div_index)(bank_index)(way_index).ecc := getECCFromEncWord(data_bank.io.r.data) 512 read_result(div_index)(bank_index)(way_index).raw_data := getDataFromEncWord(data_bank.io.r.data) 513 514 if (EnableDataEcc) { 515 val ecc_data = read_result(div_index)(bank_index)(way_index).asECCData() 516 val ecc_data_delayed = RegEnable(ecc_data, RegNext(read_en)) 517 read_result(div_index)(bank_index)(way_index).error_delayed := dcacheParameters.dataCode.decode(ecc_data_delayed).error 518 read_error_delayed_result(div_index)(bank_index)(way_index) := read_result(div_index)(bank_index)(way_index).error_delayed 519 } else { 520 read_result(div_index)(bank_index)(way_index).error_delayed := false.B 521 read_error_delayed_result(div_index)(bank_index)(way_index) := false.B 522 } 523 524 read_result_delayed(div_index)(bank_index)(way_index) := RegEnable(read_result(div_index)(bank_index)(way_index), RegNext(read_en)) 525 } 526 } 527 } 528 529 val data_read_oh = WireInit(VecInit(Seq.fill(DCacheSetDiv * DCacheBanks * DCacheWays)(0.U(1.W)))) 530 for(div_index <- 0 until DCacheSetDiv){ 531 for (bank_index <- 0 until DCacheBanks) { 532 for (way_index <- 0 until DCacheWays) { 533 data_read_oh(div_index * DCacheBanks * DCacheWays + bank_index * DCacheWays + way_index) := data_banks(div_index)(bank_index)(way_index).io.r.en 534 } 535 } 536 } 537 XSPerfAccumulate("data_read_counter", PopCount(Cat(data_read_oh))) 538 539 // read result: expose banked read result 540 // TODO: clock gate 541 (0 until LoadPipelineWidth).map(i => { 542 // io.read_resp(i) := read_result(RegNext(bank_addrs(i)))(RegNext(OHToUInt(way_en(i)))) 543 val r_read_fire = RegNext(io.read(i).fire) 544 val r_div_addr = RegEnable(div_addrs(i), io.read(i).fire) 545 val r_bank_addr = RegEnable(bank_addrs(i), io.read(i).fire) 546 val r_way_addr = RegNext(OHToUInt(way_en(i))) 547 val rr_read_fire = RegNext(RegNext(io.read(i).fire)) 548 val rr_div_addr = RegEnable(RegEnable(div_addrs(i), io.read(i).fire), r_read_fire) 549 val rr_bank_addr = RegEnable(RegEnable(bank_addrs(i), io.read(i).fire), r_read_fire) 550 val rr_way_addr = RegEnable(RegEnable(OHToUInt(way_en(i)), io.read(i).fire), r_read_fire) 551 (0 until VLEN/DCacheSRAMRowBits).map( j =>{ 552 io.read_resp(i)(j) := read_result(r_div_addr)(r_bank_addr(j))(r_way_addr) 553 // error detection 554 // normal read ports 555 io.read_error_delayed(i)(j) := rr_read_fire && read_error_delayed_result(rr_div_addr)(rr_bank_addr(j))(rr_way_addr) && !RegNext(io.bank_conflict_slow(i)) 556 }) 557 }) 558 559 // readline port 560 val readline_error_delayed = Wire(Vec(DCacheBanks, Bool())) 561 val readline_r_way_addr = RegEnable(OHToUInt(io.readline.bits.way_en), io.readline.valid) 562 val readline_rr_way_addr = RegEnable(readline_r_way_addr, RegNext(io.readline.valid)) 563 val readline_r_div_addr = RegEnable(line_div_addr, io.readline.valid) 564 val readline_rr_div_addr = RegEnable(readline_r_div_addr, RegNext(io.readline.valid)) 565 (0 until DCacheBanks).map(i => { 566 io.readline_resp(i) := read_result(readline_r_div_addr)(i)(readline_r_way_addr) 567 readline_error_delayed(i) := read_result(readline_rr_div_addr)(i)(readline_rr_way_addr).error_delayed 568 }) 569 io.readline_error_delayed := RegNext(RegNext(io.readline.fire)) && readline_error_delayed.asUInt.orR 570 571 // write data_banks & ecc_banks 572 for (div_index <- 0 until DCacheSetDiv) { 573 for (bank_index <- 0 until DCacheBanks) { 574 for (way_index <- 0 until DCacheWays) { 575 // data write 576 val wen_reg = write_bank_mask_reg(bank_index) && 577 write_valid_dup_reg(bank_index) && 578 write_div_addr_dup_reg(bank_index) === div_index.U && 579 write_wayen_dup_reg(bank_index)(way_index) 580 val write_ecc_reg = RegEnable(getECCFromEncWord(cacheParams.dataCode.encode(io.write.bits.data(bank_index))), io.write.valid) 581 val data_bank = data_banks(div_index)(bank_index)(way_index) 582 data_bank.io.w.en := wen_reg 583 data_bank.io.w.addr := write_set_addr_dup_reg(bank_index) 584 data_bank.io.w.data := asECCData(write_ecc_reg, write_data_reg(bank_index)) 585 } 586 } 587 } 588 589 io.cacheOp.resp.valid := false.B 590 io.cacheOp.resp.bits := DontCare 591 592 val tableName = "BankConflict" + p(XSCoreParamsKey).HartId.toString 593 val siteName = "BankedDataArray" + p(XSCoreParamsKey).HartId.toString 594 val bankConflictTable = ChiselDB.createTable(tableName, new BankConflictDB) 595 val bankConflictData = Wire(new BankConflictDB) 596 for (i <- 0 until LoadPipelineWidth) { 597 bankConflictData.set_index(i) := set_addrs(i) 598 bankConflictData.addr(i) := io.read(i).bits.addr 599 } 600 601 // FIXME: rr_bank_conflict(0)(1) no generalization 602 when(rr_bank_conflict(0)(1)) { 603 (0 until (VLEN/DCacheSRAMRowBits)).map(i => { 604 bankConflictData.bank_index(i) := bank_addrs(0)(i) 605 }) 606 bankConflictData.way_index := OHToUInt(way_en(0)) 607 bankConflictData.fake_rr_bank_conflict := set_addrs(0) === set_addrs(1) && div_addrs(0) === div_addrs(1) 608 }.otherwise { 609 (0 until (VLEN/DCacheSRAMRowBits)).map(i => { 610 bankConflictData.bank_index(i) := 0.U 611 }) 612 bankConflictData.way_index := 0.U 613 bankConflictData.fake_rr_bank_conflict := false.B 614 } 615 616 val isWriteBankConflictTable = Constantin.createRecord(s"isWriteBankConflictTable${p(XSCoreParamsKey).HartId}") 617 bankConflictTable.log( 618 data = bankConflictData, 619 en = isWriteBankConflictTable.orR && rr_bank_conflict(0)(1), 620 site = siteName, 621 clock = clock, 622 reset = reset 623 ) 624 625 (1 until LoadPipelineWidth).foreach(y => (0 until y).foreach(x => 626 XSPerfAccumulate(s"data_array_fake_rr_bank_conflict_${x}_${y}", rr_bank_conflict(x)(y) && set_addrs(x)===set_addrs(y) && div_addrs(x) === div_addrs(y)) 627 )) 628 629 if (backendParams.debugEn){ 630 load_req_with_bank_conflict.map(dontTouch(_)) 631 dontTouch(read_result) 632 dontTouch(read_error_delayed_result) 633 } 634} 635 636// the smallest access unit is bank 637class BankedDataArray(implicit p: Parameters) extends AbstractBankedDataArray { 638 println(" DCacheType: BankedDataArray") 639 val ReduceReadlineConflict = false 640 641 io.write.ready := true.B 642 io.write_dup.foreach(_.ready := true.B) 643 644 val data_banks = List.fill(DCacheSetDiv)(List.tabulate(DCacheBanks)(i => Module(new DataSRAMBank(i)))) 645 data_banks.map(_.map(_.dump())) 646 647 val way_en = Wire(Vec(LoadPipelineWidth, io.read(0).bits.way_en.cloneType)) 648 val set_addrs = Wire(Vec(LoadPipelineWidth, UInt())) 649 val div_addrs = Wire(Vec(LoadPipelineWidth, UInt())) 650 val bank_addrs = Wire(Vec(LoadPipelineWidth, Vec(VLEN/DCacheSRAMRowBits, UInt()))) 651 val way_en_reg = Wire(Vec(LoadPipelineWidth, io.read(0).bits.way_en.cloneType)) 652 val set_addrs_reg = Wire(Vec(LoadPipelineWidth, UInt())) 653 654 val line_set_addr = addr_to_dcache_div_set(io.readline.bits.addr) 655 val line_div_addr = addr_to_dcache_div(io.readline.bits.addr) 656 val line_way_en = io.readline.bits.way_en 657 658 val write_bank_mask_reg = RegEnable(io.write.bits.wmask, io.write.valid) 659 val write_data_reg = RegEnable(io.write.bits.data, io.write.valid) 660 val write_valid_reg = RegNext(io.write.valid) 661 val write_valid_dup_reg = io.write_dup.map(x => RegNext(x.valid)) 662 val write_wayen_dup_reg = io.write_dup.map(x => RegEnable(x.bits.way_en, x.valid)) 663 val write_set_addr_dup_reg = io.write_dup.map(x => RegEnable(addr_to_dcache_div_set(x.bits.addr), x.valid)) 664 val write_div_addr_dup_reg = io.write_dup.map(x => RegEnable(addr_to_dcache_div(x.bits.addr), x.valid)) 665 666 // read data_banks and ecc_banks 667 // for single port SRAM, do not allow read and write in the same cycle 668 val rwhazard = RegNext(io.write.valid) 669 val rrhazard = false.B // io.readline.valid 670 (0 until LoadPipelineWidth).map(rport_index => { 671 div_addrs(rport_index) := addr_to_dcache_div(io.read(rport_index).bits.addr) 672 bank_addrs(rport_index)(0) := addr_to_dcache_bank(io.read(rport_index).bits.addr) 673 bank_addrs(rport_index)(1) := Mux(io.is128Req(rport_index), bank_addrs(rport_index)(0) + 1.U, bank_addrs(rport_index)(0)) 674 set_addrs(rport_index) := addr_to_dcache_div_set(io.read(rport_index).bits.addr) 675 set_addrs_reg(rport_index) := RegEnable(addr_to_dcache_div_set(io.read(rport_index).bits.addr), io.read(rport_index).valid) 676 677 // use way_en to select a way after data read out 678 assert(!(RegNext(io.read(rport_index).fire && PopCount(io.read(rport_index).bits.way_en) > 1.U))) 679 way_en(rport_index) := io.read(rport_index).bits.way_en 680 way_en_reg(rport_index) := RegEnable(io.read(rport_index).bits.way_en, io.read(rport_index).valid) 681 }) 682 683 // read each bank, get bank result 684 val rr_bank_conflict = Seq.tabulate(LoadPipelineWidth)(x => Seq.tabulate(LoadPipelineWidth)(y => { 685 if (x == y) { 686 false.B 687 } else { 688 io.read(x).valid && io.read(y).valid && 689 div_addrs(x) === div_addrs(y) && 690 (io.read(x).bits.bankMask & io.read(y).bits.bankMask) =/= 0.U && 691 set_addrs(x) =/= set_addrs(y) 692 } 693 } 694 )) 695 696 val load_req_with_bank_conflict = rr_bank_conflict.map(_.reduce(_ || _)) 697 val load_req_valid = io.read.map(_.valid) 698 val load_req_lqIdx = io.read.map(_.bits.lqIdx) 699 val load_req_index = (0 until LoadPipelineWidth).map(_.asUInt) 700 701 val load_req_bank_conflict_selcet = selcetOldestPort(load_req_valid, load_req_lqIdx, load_req_index) 702 val load_req_bank_select_port = UIntToOH(load_req_bank_conflict_selcet._2).asBools 703 704 val rr_bank_conflict_oldest = (0 until LoadPipelineWidth).map(i => 705 !load_req_bank_select_port(i) && load_req_with_bank_conflict(i) 706 ) 707 708 val rrl_bank_conflict = Wire(Vec(LoadPipelineWidth, Bool())) 709 val rrl_bank_conflict_intend = Wire(Vec(LoadPipelineWidth, Bool())) 710 (0 until LoadPipelineWidth).foreach { i => 711 val judge = if (ReduceReadlineConflict) io.read(i).valid && (io.readline.bits.rmask & io.read(i).bits.bankMask) =/= 0.U && div_addrs(i) === line_div_addr 712 else io.read(i).valid && div_addrs(i)===line_div_addr 713 rrl_bank_conflict(i) := judge && io.readline.valid 714 rrl_bank_conflict_intend(i) := judge && io.readline_intend 715 } 716 val wr_bank_conflict = Seq.tabulate(LoadPipelineWidth)(x => 717 io.read(x).valid && 718 write_valid_reg && 719 div_addrs(x) === write_div_addr_dup_reg.head && 720 (write_bank_mask_reg(bank_addrs(x)(0)) || write_bank_mask_reg(bank_addrs(x)(1)) && io.is128Req(x)) 721 ) 722 val wrl_bank_conflict = io.readline.valid && write_valid_reg && line_div_addr === write_div_addr_dup_reg.head 723 // ready 724 io.readline.ready := !(wrl_bank_conflict) 725 io.read.zipWithIndex.map{case(x, i) => x.ready := !(wr_bank_conflict(i) || rrhazard)} 726 727 val perf_multi_read = PopCount(io.read.map(_.valid)) >= 2.U 728 (0 until LoadPipelineWidth).foreach(i => { 729 // remove fake rr_bank_conflict situation in s2 730 val real_other_bank_conflict_reg = RegNext(wr_bank_conflict(i) || rrl_bank_conflict(i)) 731 val real_rr_bank_conflict_reg = RegNext(rr_bank_conflict_oldest(i)) 732 io.bank_conflict_slow(i) := real_other_bank_conflict_reg || real_rr_bank_conflict_reg 733 734 // get result in s1 735 io.disable_ld_fast_wakeup(i) := wr_bank_conflict(i) || rrl_bank_conflict_intend(i) || 736 (if (i == 0) 0.B else (0 until i).map(rr_bank_conflict(_)(i)).reduce(_ || _)) 737 }) 738 XSPerfAccumulate("data_array_multi_read", perf_multi_read) 739 (1 until LoadPipelineWidth).foreach(y => (0 until y).foreach(x => 740 XSPerfAccumulate(s"data_array_rr_bank_conflict_${x}_${y}", rr_bank_conflict(x)(y)) 741 )) 742 (0 until LoadPipelineWidth).foreach(i => { 743 XSPerfAccumulate(s"data_array_rrl_bank_conflict_${i}", rrl_bank_conflict(i)) 744 XSPerfAccumulate(s"data_array_rw_bank_conflict_${i}", wr_bank_conflict(i)) 745 XSPerfAccumulate(s"data_array_read_${i}", io.read(i).valid) 746 }) 747 XSPerfAccumulate("data_array_access_total", PopCount(io.read.map(_.valid))) 748 XSPerfAccumulate("data_array_read_line", io.readline.valid) 749 XSPerfAccumulate("data_array_write", io.write.valid) 750 751 val bank_result = Wire(Vec(DCacheSetDiv, Vec(DCacheBanks, Vec(DCacheWays, new L1BankedDataReadResult())))) 752 val bank_result_delayed = Wire(Vec(DCacheSetDiv, Vec(DCacheBanks, Vec(DCacheWays, new L1BankedDataReadResult())))) 753 val read_bank_error_delayed = Wire(Vec(DCacheSetDiv, Vec(DCacheBanks, Vec(DCacheWays, Bool())))) 754 755 for (div_index <- 0 until DCacheSetDiv) { 756 for (bank_index <- 0 until DCacheBanks) { 757 // Set Addr & Read Way Mask 758 // 759 // Pipe 0 .... Pipe (n-1) 760 // + .... + 761 // | .... | 762 // +----+---------------+-----+ 763 // X X 764 // X +------+ Bank Addr Match 765 // +---------+----------+ 766 // | 767 // +--------+--------+ 768 // | Data Bank | 769 // +-----------------+ 770 val bank_addr_matchs = WireInit(VecInit(List.tabulate(LoadPipelineWidth)(i => { 771 io.read(i).valid && div_addrs(i) === div_index.U && (bank_addrs(i)(0) === bank_index.U || bank_addrs(i)(1) === bank_index.U && io.is128Req(i)) && 772 !rr_bank_conflict_oldest(i) 773 }))) 774 val readline_match = Wire(Bool()) 775 if (ReduceReadlineConflict) { 776 readline_match := io.readline.valid && io.readline.bits.rmask(bank_index) && line_div_addr === div_index.U 777 } else { 778 readline_match := io.readline.valid && line_div_addr === div_index.U 779 } 780 781 val bank_set_addr = Mux(readline_match, 782 line_set_addr, 783 PriorityMux(Seq.tabulate(LoadPipelineWidth)(i => bank_addr_matchs(i) -> set_addrs(i))) 784 ) 785 val read_enable = bank_addr_matchs.asUInt.orR || readline_match 786 787 // read raw data 788 val data_bank = data_banks(div_index)(bank_index) 789 data_bank.io.r.en := read_enable 790 data_bank.io.r.addr := bank_set_addr 791 for (way_index <- 0 until DCacheWays) { 792 bank_result(div_index)(bank_index)(way_index).ecc := getECCFromEncWord(data_bank.io.r.data(way_index)) 793 bank_result(div_index)(bank_index)(way_index).raw_data := getDataFromEncWord(data_bank.io.r.data(way_index)) 794 795 if (EnableDataEcc) { 796 val ecc_data = bank_result(div_index)(bank_index)(way_index).asECCData() 797 val ecc_data_delayed = RegEnable(ecc_data, RegNext(read_enable)) 798 bank_result(div_index)(bank_index)(way_index).error_delayed := dcacheParameters.dataCode.decode(ecc_data_delayed).error 799 read_bank_error_delayed(div_index)(bank_index)(way_index) := bank_result(div_index)(bank_index)(way_index).error_delayed 800 } else { 801 bank_result(div_index)(bank_index)(way_index).error_delayed := false.B 802 read_bank_error_delayed(div_index)(bank_index)(way_index) := false.B 803 } 804 bank_result_delayed(div_index)(bank_index)(way_index) := RegEnable(bank_result(div_index)(bank_index)(way_index), RegNext(read_enable)) 805 } 806 } 807 } 808 809 val data_read_oh = WireInit(VecInit(Seq.fill(DCacheSetDiv)(0.U(XLEN.W)))) 810 for (div_index <- 0 until DCacheSetDiv){ 811 val temp = WireInit(VecInit(Seq.fill(DCacheBanks)(0.U(XLEN.W)))) 812 for (bank_index <- 0 until DCacheBanks) { 813 temp(bank_index) := PopCount(Fill(DCacheWays, data_banks(div_index)(bank_index).io.r.en.asUInt)) 814 } 815 data_read_oh(div_index) := temp.reduce(_ + _) 816 } 817 XSPerfAccumulate("data_read_counter", data_read_oh.foldLeft(0.U)(_ + _)) 818 819 (0 until LoadPipelineWidth).map(i => { 820 // 1 cycle after read fire(load s2) 821 val r_read_fire = RegNext(io.read(i).fire) 822 val r_div_addr = RegEnable(div_addrs(i), io.read(i).fire) 823 val r_bank_addr = RegEnable(bank_addrs(i), io.read(i).fire) 824 val r_way_addr = RegEnable(OHToUInt(way_en(i)), io.read(i).fire) 825 // 2 cycles after read fire(load s3) 826 val rr_read_fire = RegNext(r_read_fire) 827 val rr_div_addr = RegEnable(RegEnable(div_addrs(i), io.read(i).fire), r_read_fire) 828 val rr_bank_addr = RegEnable(RegEnable(bank_addrs(i), io.read(i).fire), r_read_fire) 829 val rr_way_addr = RegEnable(RegEnable(OHToUInt(way_en(i)), io.read(i).fire), r_read_fire) 830 (0 until VLEN/DCacheSRAMRowBits).map( j =>{ 831 io.read_resp(i)(j) := bank_result(r_div_addr)(r_bank_addr(j))(r_way_addr) 832 // error detection 833 io.read_error_delayed(i)(j) := rr_read_fire && read_bank_error_delayed(rr_div_addr)(rr_bank_addr(j))(rr_way_addr) && !RegNext(io.bank_conflict_slow(i)) 834 }) 835 }) 836 837 // read result: expose banked read result 838 val readline_error_delayed = Wire(Vec(DCacheBanks, Bool())) 839 val readline_r_way_addr = RegEnable(OHToUInt(io.readline.bits.way_en), io.readline.valid) 840 val readline_rr_way_addr = RegEnable(readline_r_way_addr, RegNext(io.readline.valid)) 841 val readline_r_div_addr = RegEnable(line_div_addr, io.readline.valid) 842 val readline_rr_div_addr = RegEnable(readline_r_div_addr, RegNext(io.readline.valid)) 843 (0 until DCacheBanks).map(i => { 844 io.readline_resp(i) := bank_result(readline_r_div_addr)(i)(readline_r_way_addr) 845 readline_error_delayed(i) := bank_result(readline_rr_div_addr)(i)(readline_rr_way_addr).error_delayed 846 }) 847 io.readline_error_delayed := RegNext(RegNext(io.readline.fire)) && readline_error_delayed.asUInt.orR 848 849 // write data_banks & ecc_banks 850 for (div_index <- 0 until DCacheSetDiv) { 851 for (bank_index <- 0 until DCacheBanks) { 852 // data write 853 val wen_reg = write_bank_mask_reg(bank_index) && 854 write_valid_dup_reg(bank_index) && 855 write_div_addr_dup_reg(bank_index) === div_index.U && RegNext(io.write.valid) 856 val write_ecc_reg = RegEnable(getECCFromEncWord(cacheParams.dataCode.encode(io.write.bits.data(bank_index))), io.write.valid) 857 val data_bank = data_banks(div_index)(bank_index) 858 data_bank.io.w.en := wen_reg 859 data_bank.io.w.way_en := write_wayen_dup_reg(bank_index) 860 data_bank.io.w.addr := write_set_addr_dup_reg(bank_index) 861 data_bank.io.w.data := asECCData(write_ecc_reg, write_data_reg(bank_index)) 862 } 863 } 864 865 io.cacheOp.resp.valid := false.B 866 io.cacheOp.resp.bits := DontCare 867 868 val tableName = "BankConflict" + p(XSCoreParamsKey).HartId.toString 869 val siteName = "BankedDataArray" + p(XSCoreParamsKey).HartId.toString 870 val bankConflictTable = ChiselDB.createTable(tableName, new BankConflictDB) 871 val bankConflictData = Wire(new BankConflictDB) 872 for (i <- 0 until LoadPipelineWidth) { 873 bankConflictData.set_index(i) := set_addrs(i) 874 bankConflictData.addr(i) := io.read(i).bits.addr 875 } 876 877 // FIXME: rr_bank_conflict(0)(1) no generalization 878 when(rr_bank_conflict(0)(1)) { 879 (0 until (VLEN/DCacheSRAMRowBits)).map(i => { 880 bankConflictData.bank_index(i) := bank_addrs(0)(i) 881 }) 882 bankConflictData.way_index := OHToUInt(way_en(0)) 883 bankConflictData.fake_rr_bank_conflict := set_addrs(0) === set_addrs(1) && div_addrs(0) === div_addrs(1) 884 }.otherwise { 885 (0 until (VLEN/DCacheSRAMRowBits)).map(i => { 886 bankConflictData.bank_index(i) := 0.U 887 }) 888 bankConflictData.way_index := 0.U 889 bankConflictData.fake_rr_bank_conflict := false.B 890 } 891 892 val isWriteBankConflictTable = Constantin.createRecord(s"isWriteBankConflictTable${p(XSCoreParamsKey).HartId}") 893 bankConflictTable.log( 894 data = bankConflictData, 895 en = isWriteBankConflictTable.orR && rr_bank_conflict(0)(1), 896 site = siteName, 897 clock = clock, 898 reset = reset 899 ) 900 901 (1 until LoadPipelineWidth).foreach(y => (0 until y).foreach(x => 902 XSPerfAccumulate(s"data_array_fake_rr_bank_conflict_${x}_${y}", rr_bank_conflict(x)(y) && set_addrs(x) === set_addrs(y) && div_addrs(x) === div_addrs(y)) 903 )) 904 905 if (backendParams.debugEn){ 906 load_req_with_bank_conflict.map(dontTouch(_)) 907 dontTouch(bank_result) 908 dontTouch(read_bank_error_delayed) 909 } 910} 911