1/*************************************************************************************** 2* Copyright (c) 2024 Beijing Institute of Open Source Chip (BOSC) 3* Copyright (c) 2020-2024 Institute of Computing Technology, Chinese Academy of Sciences 4* Copyright (c) 2020-2021 Peng Cheng Laboratory 5* 6* XiangShan is licensed under Mulan PSL v2. 7* You can use this software according to the terms and conditions of the Mulan PSL v2. 8* You may obtain a copy of Mulan PSL v2 at: 9* http://license.coscl.org.cn/MulanPSL2 10* 11* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 12* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 13* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 14* 15* See the Mulan PSL v2 for more details. 16* 17* 18* Acknowledgement 19* 20* This implementation is inspired by several key papers: 21* [1] Gurindar S. Sohi, and Manoj Franklin. "[High-bandwidth data memory systems for superscalar processors.] 22* (https://doi.org/10.1145/106972.106980)" 4th International Conference on Architectural Support for Programming 23* Languages and Operating Systems (ASPLOS). 1991. 24***************************************************************************************/ 25 26package xiangshan.cache 27 28import org.chipsalliance.cde.config.Parameters 29import chisel3._ 30import utils._ 31import utility._ 32import chisel3.util._ 33import freechips.rocketchip.tilelink.{ClientMetadata, TLClientParameters, TLEdgeOut} 34import xiangshan.{L1CacheErrorInfo, XSCoreParamsKey} 35 36import scala.math.max 37 38class BankConflictDB(implicit p: Parameters) extends DCacheBundle{ 39 val addr = Vec(LoadPipelineWidth, Bits(PAddrBits.W)) 40 val set_index = Vec(LoadPipelineWidth, UInt((DCacheAboveIndexOffset - DCacheSetOffset).W)) 41 val bank_index = Vec(VLEN/DCacheSRAMRowBits, UInt((DCacheSetOffset - DCacheBankOffset).W)) 42 val way_index = UInt(wayBits.W) 43 val fake_rr_bank_conflict = Bool() 44} 45 46class L1BankedDataReadReq(implicit p: Parameters) extends DCacheBundle 47{ 48 val way_en = Bits(DCacheWays.W) 49 val addr = Bits(PAddrBits.W) 50} 51 52class L1BankedDataReadReqWithMask(implicit p: Parameters) extends DCacheBundle 53{ 54 val way_en = Bits(DCacheWays.W) 55 val addr = Bits(PAddrBits.W) 56 val bankMask = Bits(DCacheBanks.W) 57 val kill = Bool() 58} 59 60class L1BankedDataReadLineReq(implicit p: Parameters) extends L1BankedDataReadReq 61{ 62 val rmask = Bits(DCacheBanks.W) 63} 64 65// Now, we can write a cache-block in a single cycle 66class L1BankedDataWriteReq(implicit p: Parameters) extends L1BankedDataReadReq 67{ 68 val wmask = Bits(DCacheBanks.W) 69 val data = Vec(DCacheBanks, Bits(DCacheSRAMRowBits.W)) 70} 71 72// cache-block write request without data 73class L1BankedDataWriteReqCtrl(implicit p: Parameters) extends L1BankedDataReadReq 74 75class L1BankedDataReadResult(implicit p: Parameters) extends DCacheBundle 76{ 77 // you can choose which bank to read to save power 78 val ecc = Bits(dataECCBits.W) 79 val raw_data = Bits(DCacheSRAMRowBits.W) 80 val error_delayed = Bool() // 1 cycle later than data resp 81 82 def asECCData() = { 83 Cat(ecc, raw_data) 84 } 85} 86 87class DataSRAMBankWriteReq(implicit p: Parameters) extends DCacheBundle { 88 val en = Bool() 89 val addr = UInt() 90 val way_en = UInt(DCacheWays.W) 91 val data = UInt(encDataBits.W) 92} 93 94// wrap a sram 95class DataSRAM(bankIdx: Int, wayIdx: Int)(implicit p: Parameters) extends DCacheModule { 96 val io = IO(new Bundle() { 97 val w = new Bundle() { 98 val en = Input(Bool()) 99 val addr = Input(UInt()) 100 val data = Input(UInt(encDataBits.W)) 101 } 102 103 val r = new Bundle() { 104 val en = Input(Bool()) 105 val addr = Input(UInt()) 106 val data = Output(UInt(encDataBits.W)) 107 } 108 }) 109 110 // data sram 111 val data_sram = Module(new SRAMTemplate( 112 Bits(encDataBits.W), 113 set = DCacheSets / DCacheSetDiv, 114 way = 1, 115 shouldReset = false, 116 holdRead = false, 117 singlePort = true 118 )) 119 120 data_sram.io.w.req.valid := io.w.en 121 data_sram.io.w.req.bits.apply( 122 setIdx = io.w.addr, 123 data = io.w.data, 124 waymask = 1.U 125 ) 126 data_sram.io.r.req.valid := io.r.en 127 data_sram.io.r.req.bits.apply(setIdx = io.r.addr) 128 io.r.data := data_sram.io.r.resp.data(0) 129 XSPerfAccumulate("part_data_read_counter", data_sram.io.r.req.valid) 130 131 def dump_r() = { 132 when(RegNext(io.r.en)) { 133 XSDebug("bank read set %x bank %x way %x data %x\n", 134 RegEnable(io.r.addr, io.r.en), 135 bankIdx.U, 136 wayIdx.U, 137 io.r.data 138 ) 139 } 140 } 141 142 def dump_w() = { 143 when(io.w.en) { 144 XSDebug("bank write set %x bank %x way %x data %x\n", 145 io.w.addr, 146 bankIdx.U, 147 wayIdx.U, 148 io.w.data 149 ) 150 } 151 } 152 153 def dump() = { 154 dump_w() 155 dump_r() 156 } 157} 158 159// wrap data rows of 8 ways 160class DataSRAMBank(index: Int)(implicit p: Parameters) extends DCacheModule { 161 val io = IO(new Bundle() { 162 val w = Input(new DataSRAMBankWriteReq) 163 164 val r = new Bundle() { 165 val en = Input(Bool()) 166 val addr = Input(UInt()) 167 val data = Output(Vec(DCacheWays, UInt(encDataBits.W))) 168 } 169 }) 170 171 assert(RegNext(!io.w.en || PopCount(io.w.way_en) <= 1.U)) 172 173 // external controls do not read and write at the same time 174 val w_info = io.w 175 // val rw_bypass = RegNext(io.w.addr === io.r.addr && io.w.way_en === io.r.way_en && io.w.en) 176 177 // multiway data bank 178 val data_bank = Seq.fill(DCacheWays) { 179 Module(new SRAMTemplate( 180 Bits(encDataBits.W), 181 set = DCacheSets / DCacheSetDiv, 182 way = 1, 183 shouldReset = false, 184 holdRead = false, 185 singlePort = true 186 )) 187 } 188 189 for (w <- 0 until DCacheWays) { 190 val wen = w_info.en && w_info.way_en(w) 191 data_bank(w).io.w.req.valid := wen 192 data_bank(w).io.w.req.bits.apply( 193 setIdx = w_info.addr, 194 data = w_info.data, 195 waymask = 1.U 196 ) 197 data_bank(w).io.r.req.valid := io.r.en 198 data_bank(w).io.r.req.bits.apply(setIdx = io.r.addr) 199 data_bank(w).clock := ClockGate(false.B, io.r.en | (io.w.en & io.w.way_en(w)), clock) 200 } 201 XSPerfAccumulate("part_data_read_counter", PopCount(Cat(data_bank.map(_.io.r.req.valid)))) 202 203 io.r.data := data_bank.map(_.io.r.resp.data(0)) 204 205 def dump_r() = { 206 when(RegNext(io.r.en)) { 207 XSDebug("bank read addr %x data %x\n", 208 RegEnable(io.r.addr, io.r.en), 209 io.r.data.asUInt 210 ) 211 } 212 } 213 214 def dump_w() = { 215 when(io.w.en) { 216 XSDebug("bank write addr %x way_en %x data %x\n", 217 io.w.addr, 218 io.w.way_en, 219 io.w.data 220 ) 221 } 222 } 223 224 def dump() = { 225 dump_w() 226 dump_r() 227 } 228} 229 230case object HasDataEccParam 231 232// Banked DCache Data 233// ----------------------------------------------------------------- 234// | Bank0 | Bank1 | Bank2 | Bank3 | Bank4 | Bank5 | Bank6 | Bank7 | 235// ----------------------------------------------------------------- 236// | Way0 | Way0 | Way0 | Way0 | Way0 | Way0 | Way0 | Way0 | 237// | Way1 | Way1 | Way1 | Way1 | Way1 | Way1 | Way1 | Way1 | 238// | .... | .... | .... | .... | .... | .... | .... | .... | 239// ----------------------------------------------------------------- 240abstract class AbstractBankedDataArray(implicit p: Parameters) extends DCacheModule 241{ 242 val DataEccParam = if(EnableDataEcc) Some(HasDataEccParam) else None 243 val ReadlinePortErrorIndex = LoadPipelineWidth 244 val io = IO(new DCacheBundle { 245 // load pipeline read word req 246 val read = Vec(LoadPipelineWidth, Flipped(DecoupledIO(new L1BankedDataReadReqWithMask))) 247 val is128Req = Input(Vec(LoadPipelineWidth, Bool())) 248 // main pipeline read / write line req 249 val readline_intend = Input(Bool()) 250 val readline = Flipped(DecoupledIO(new L1BankedDataReadLineReq)) 251 val write = Flipped(DecoupledIO(new L1BankedDataWriteReq)) 252 val write_dup = Vec(DCacheBanks, Flipped(Decoupled(new L1BankedDataWriteReqCtrl))) 253 // data for readline and loadpipe 254 val readline_resp = Output(Vec(DCacheBanks, new L1BankedDataReadResult())) 255 val readline_error_delayed = Output(Bool()) 256 val read_resp = Output(Vec(LoadPipelineWidth, Vec(VLEN/DCacheSRAMRowBits, new L1BankedDataReadResult()))) 257 val read_error_delayed = Output(Vec(LoadPipelineWidth,Vec(VLEN/DCacheSRAMRowBits, Bool()))) 258 // val nacks = Output(Vec(LoadPipelineWidth, Bool())) 259 // val errors = Output(Vec(LoadPipelineWidth + 1, ValidIO(new L1CacheErrorInfo))) // read ports + readline port 260 // when bank_conflict, read (1) port should be ignored 261 val bank_conflict_slow = Output(Vec(LoadPipelineWidth, Bool())) 262 val disable_ld_fast_wakeup = Output(Vec(LoadPipelineWidth, Bool())) 263 // customized cache op port 264 val cacheOp = Flipped(new L1CacheInnerOpIO) 265 val cacheOp_req_dup = Vec(DCacheDupNum, Flipped(Valid(new CacheCtrlReqInfo))) 266 val cacheOp_req_bits_opCode_dup = Input(Vec(DCacheDupNum, UInt(XLEN.W))) 267 }) 268 269 def pipeMap[T <: Data](f: Int => T) = VecInit((0 until LoadPipelineWidth).map(f)) 270 271 def getECCFromEncWord(encWord: UInt) = { 272 if (EnableDataEcc) { 273 require(encWord.getWidth == encDataBits, s"encDataBits=$encDataBits != encDataBits=$encDataBits!") 274 encWord(encDataBits-1, DCacheSRAMRowBits) 275 } else { 276 0.U 277 } 278 } 279 280 def getDataFromEncWord(encWord: UInt) = { 281 encWord(DCacheSRAMRowBits-1, 0) 282 } 283 284 def asECCData(ecc: UInt, data: UInt) = { 285 if (EnableDataEcc) { 286 Cat(ecc, data) 287 } else { 288 data 289 } 290 } 291 292 def dumpRead = { 293 (0 until LoadPipelineWidth) map { w => 294 when(io.read(w).valid) { 295 XSDebug(s"DataArray Read channel: $w valid way_en: %x addr: %x\n", 296 io.read(w).bits.way_en, io.read(w).bits.addr) 297 } 298 } 299 when(io.readline.valid) { 300 XSDebug(s"DataArray Read Line, valid way_en: %x addr: %x rmask %x\n", 301 io.readline.bits.way_en, io.readline.bits.addr, io.readline.bits.rmask) 302 } 303 } 304 305 def dumpWrite = { 306 when(io.write.valid) { 307 XSDebug(s"DataArray Write valid way_en: %x addr: %x\n", 308 io.write.bits.way_en, io.write.bits.addr) 309 310 (0 until DCacheBanks) map { r => 311 XSDebug(s"cycle: $r data: %x wmask: %x\n", 312 io.write.bits.data(r), io.write.bits.wmask(r)) 313 } 314 } 315 } 316 317 def dumpResp = { 318 XSDebug(s"DataArray ReadeResp channel:\n") 319 (0 until LoadPipelineWidth) map { r => 320 XSDebug(s"cycle: $r data: %x\n", Mux(io.is128Req(r), 321 Cat(io.read_resp(r)(1).raw_data,io.read_resp(r)(0).raw_data), 322 io.read_resp(r)(0).raw_data)) 323 } 324 } 325 326 def dump() = { 327 dumpRead 328 dumpWrite 329 dumpResp 330 } 331} 332 333// the smallest access unit is sram 334class SramedDataArray(implicit p: Parameters) extends AbstractBankedDataArray { 335 println(" DCacheType: SramedDataArray") 336 val ReduceReadlineConflict = false 337 338 io.write.ready := true.B 339 io.write_dup.foreach(_.ready := true.B) 340 341 val data_banks = List.tabulate(DCacheSetDiv)( k => List.tabulate(DCacheBanks)(i => List.tabulate(DCacheWays)(j => Module(new DataSRAM(i,j))))) 342 data_banks.map(_.map(_.map(_.dump()))) 343 344 val way_en = Wire(Vec(LoadPipelineWidth, io.read(0).bits.way_en.cloneType)) 345 val set_addrs = Wire(Vec(LoadPipelineWidth, UInt())) 346 val div_addrs = Wire(Vec(LoadPipelineWidth, UInt())) 347 val bank_addrs = Wire(Vec(LoadPipelineWidth, Vec(VLEN/DCacheSRAMRowBits, UInt()))) 348 349 val line_set_addr = addr_to_dcache_div_set(io.readline.bits.addr) 350 val line_div_addr = addr_to_dcache_div(io.readline.bits.addr) 351 // when WPU is enabled, line_way_en is all enabled when read data 352 val line_way_en = Fill(DCacheWays, 1.U) // val line_way_en = io.readline.bits.way_en 353 val line_way_en_reg = RegEnable(io.readline.bits.way_en, 0.U(DCacheWays.W),io.readline.valid) 354 355 val write_bank_mask_reg = RegEnable(io.write.bits.wmask, 0.U(DCacheBanks.W), io.write.valid) 356 val write_data_reg = RegEnable(io.write.bits.data, io.write.valid) 357 val write_valid_reg = RegNext(io.write.valid) 358 val write_valid_dup_reg = io.write_dup.map(x => RegNext(x.valid)) 359 val write_wayen_dup_reg = io.write_dup.map(x => RegEnable(x.bits.way_en, 0.U(DCacheWays.W), x.valid)) 360 val write_set_addr_dup_reg = io.write_dup.map(x => RegEnable(addr_to_dcache_div_set(x.bits.addr), x.valid)) 361 val write_div_addr_dup_reg = io.write_dup.map(x => RegEnable(addr_to_dcache_div(x.bits.addr), x.valid)) 362 363 // read data_banks and ecc_banks 364 // for single port SRAM, do not allow read and write in the same cycle 365 val rrhazard = false.B // io.readline.valid 366 (0 until LoadPipelineWidth).map(rport_index => { 367 div_addrs(rport_index) := addr_to_dcache_div(io.read(rport_index).bits.addr) 368 set_addrs(rport_index) := addr_to_dcache_div_set(io.read(rport_index).bits.addr) 369 bank_addrs(rport_index)(0) := addr_to_dcache_bank(io.read(rport_index).bits.addr) 370 bank_addrs(rport_index)(1) := bank_addrs(rport_index)(0) + 1.U 371 372 // use way_en to select a way after data read out 373 assert(!(RegNext(io.read(rport_index).fire && PopCount(io.read(rport_index).bits.way_en) > 1.U))) 374 way_en(rport_index) := io.read(rport_index).bits.way_en 375 }) 376 377 // read conflict 378 val rr_bank_conflict = Seq.tabulate(LoadPipelineWidth)(x => Seq.tabulate(LoadPipelineWidth)(y => 379 io.read(x).valid && io.read(y).valid && 380 div_addrs(x) === div_addrs(y) && 381 (io.read(x).bits.bankMask & io.read(y).bits.bankMask) =/= 0.U && 382 io.read(x).bits.way_en === io.read(y).bits.way_en && 383 set_addrs(x) =/= set_addrs(y) 384 )) 385 val rrl_bank_conflict = Wire(Vec(LoadPipelineWidth, Bool())) 386 val rrl_bank_conflict_intend = Wire(Vec(LoadPipelineWidth, Bool())) 387 (0 until LoadPipelineWidth).foreach { i => 388 val judge = if (ReduceReadlineConflict) io.read(i).valid && (io.readline.bits.rmask & io.read(i).bits.bankMask) =/= 0.U && line_div_addr === div_addrs(i) && line_set_addr =/= set_addrs(i) 389 else io.read(i).valid && line_div_addr === div_addrs(i) && line_set_addr =/= set_addrs(i) 390 rrl_bank_conflict(i) := judge && io.readline.valid 391 rrl_bank_conflict_intend(i) := judge && io.readline_intend 392 } 393 val wr_bank_conflict = Seq.tabulate(LoadPipelineWidth)(x => 394 io.read(x).valid && write_valid_reg && 395 div_addrs(x) === write_div_addr_dup_reg.head && 396 way_en(x) === write_wayen_dup_reg.head && 397 (write_bank_mask_reg(bank_addrs(x)(0)) || write_bank_mask_reg(bank_addrs(x)(1)) && io.is128Req(x)) 398 ) 399 val wrl_bank_conflict = io.readline.valid && write_valid_reg && line_div_addr === write_div_addr_dup_reg.head 400 // ready 401 io.readline.ready := !(wrl_bank_conflict) 402 io.read.zipWithIndex.map { case (x, i) => x.ready := !(wr_bank_conflict(i) || rrhazard) } 403 404 val perf_multi_read = PopCount(io.read.map(_.valid)) >= 2.U 405 val bank_conflict_fast = Wire(Vec(LoadPipelineWidth, Bool())) 406 (0 until LoadPipelineWidth).foreach(i => { 407 bank_conflict_fast(i) := wr_bank_conflict(i) || rrl_bank_conflict(i) || 408 (if (i == 0) 0.B else (0 until i).map(rr_bank_conflict(_)(i)).reduce(_ || _)) 409 io.bank_conflict_slow(i) := RegNext(bank_conflict_fast(i)) 410 io.disable_ld_fast_wakeup(i) := wr_bank_conflict(i) || rrl_bank_conflict_intend(i) || 411 (if (i == 0) 0.B else (0 until i).map(rr_bank_conflict(_)(i)).reduce(_ || _)) 412 }) 413 XSPerfAccumulate("data_array_multi_read", perf_multi_read) 414 (1 until LoadPipelineWidth).foreach(y => (0 until y).foreach(x => 415 XSPerfAccumulate(s"data_array_rr_bank_conflict_${x}_${y}", rr_bank_conflict(x)(y)) 416 )) 417 (0 until LoadPipelineWidth).foreach(i => { 418 XSPerfAccumulate(s"data_array_rrl_bank_conflict_${i}", rrl_bank_conflict(i)) 419 XSPerfAccumulate(s"data_array_rw_bank_conflict_${i}", wr_bank_conflict(i)) 420 XSPerfAccumulate(s"data_array_read_${i}", io.read(i).valid) 421 }) 422 XSPerfAccumulate("data_array_access_total", PopCount(io.read.map(_.valid))) 423 XSPerfAccumulate("data_array_read_line", io.readline.valid) 424 XSPerfAccumulate("data_array_write", io.write.valid) 425 426 val read_result = Wire(Vec(DCacheSetDiv, Vec(DCacheBanks, Vec(DCacheWays,new L1BankedDataReadResult())))) 427 val read_result_delayed = Wire(Vec(DCacheSetDiv, Vec(DCacheBanks, Vec(DCacheWays,new L1BankedDataReadResult())))) 428 val read_error_delayed_result = Wire(Vec(DCacheSetDiv, Vec(DCacheBanks, Vec(DCacheWays, Bool())))) 429 dontTouch(read_result) 430 dontTouch(read_error_delayed_result) 431 for (div_index <- 0 until DCacheSetDiv){ 432 for (bank_index <- 0 until DCacheBanks) { 433 for (way_index <- 0 until DCacheWays) { 434 // Set Addr & Read Way Mask 435 // 436 // Pipe 0 .... Pipe (n-1) 437 // + .... + 438 // | .... | 439 // +----+---------------+-----+ 440 // X X 441 // X +------+ Bank Addr Match 442 // +---------+----------+ 443 // | 444 // +--------+--------+ 445 // | Data Bank | 446 // +-----------------+ 447 val loadpipe_en = WireInit(VecInit(List.tabulate(LoadPipelineWidth)(i => { 448 io.read(i).valid && div_addrs(i) === div_index.U && (bank_addrs(i)(0) === bank_index.U || bank_addrs(i)(1) === bank_index.U && io.is128Req(i)) && way_en(i)(way_index) 449 }))) 450 val readline_en = Wire(Bool()) 451 if (ReduceReadlineConflict) { 452 readline_en := io.readline.valid && io.readline.bits.rmask(bank_index) && line_way_en(way_index) && div_index.U === line_div_addr 453 } else { 454 readline_en := io.readline.valid && line_way_en(way_index) && div_index.U === line_div_addr 455 } 456 val sram_set_addr = Mux(readline_en, 457 addr_to_dcache_div_set(io.readline.bits.addr), 458 PriorityMux(Seq.tabulate(LoadPipelineWidth)(i => loadpipe_en(i) -> set_addrs(i))) 459 ) 460 val read_en = loadpipe_en.asUInt.orR || readline_en 461 // read raw data 462 val data_bank = data_banks(div_index)(bank_index)(way_index) 463 data_bank.io.r.en := read_en 464 data_bank.io.r.addr := sram_set_addr 465 466 read_result(div_index)(bank_index)(way_index).ecc := getECCFromEncWord(data_bank.io.r.data) 467 read_result(div_index)(bank_index)(way_index).raw_data := getDataFromEncWord(data_bank.io.r.data) 468 469 if (EnableDataEcc) { 470 val ecc_data = read_result(div_index)(bank_index)(way_index).asECCData() 471 val ecc_data_delayed = RegEnable(ecc_data, RegNext(read_en)) 472 read_result(div_index)(bank_index)(way_index).error_delayed := dcacheParameters.dataCode.decode(ecc_data_delayed).error 473 read_error_delayed_result(div_index)(bank_index)(way_index) := read_result(div_index)(bank_index)(way_index).error_delayed 474 } else { 475 read_result(div_index)(bank_index)(way_index).error_delayed := false.B 476 read_error_delayed_result(div_index)(bank_index)(way_index) := false.B 477 } 478 479 read_result_delayed(div_index)(bank_index)(way_index) := RegEnable(read_result(div_index)(bank_index)(way_index), RegNext(read_en)) 480 } 481 } 482 } 483 484 val data_read_oh = WireInit(VecInit(Seq.fill(DCacheSetDiv * DCacheBanks * DCacheWays)(0.U(1.W)))) 485 for(div_index <- 0 until DCacheSetDiv){ 486 for (bank_index <- 0 until DCacheBanks) { 487 for (way_index <- 0 until DCacheWays) { 488 data_read_oh(div_index * DCacheBanks * DCacheWays + bank_index * DCacheWays + way_index) := data_banks(div_index)(bank_index)(way_index).io.r.en 489 } 490 } 491 } 492 XSPerfAccumulate("data_read_counter", PopCount(Cat(data_read_oh))) 493 494 // read result: expose banked read result 495 // TODO: clock gate 496 (0 until LoadPipelineWidth).map(i => { 497 // io.read_resp(i) := read_result(RegNext(bank_addrs(i)))(RegNext(OHToUInt(way_en(i)))) 498 val r_read_fire = RegNext(io.read(i).fire) 499 val r_div_addr = RegEnable(div_addrs(i), io.read(i).fire) 500 val r_bank_addr = RegEnable(bank_addrs(i), io.read(i).fire) 501 val r_way_addr = RegNext(OHToUInt(way_en(i))) 502 val rr_read_fire = RegNext(RegNext(io.read(i).fire)) 503 val rr_div_addr = RegEnable(RegEnable(div_addrs(i), io.read(i).fire), r_read_fire) 504 val rr_bank_addr = RegEnable(RegEnable(bank_addrs(i), io.read(i).fire), r_read_fire) 505 val rr_way_addr = RegEnable(RegEnable(OHToUInt(way_en(i)), io.read(i).fire), r_read_fire) 506 (0 until VLEN/DCacheSRAMRowBits).map( j =>{ 507 io.read_resp(i)(j) := read_result(r_div_addr)(r_bank_addr(j))(r_way_addr) 508 // error detection 509 // normal read ports 510 io.read_error_delayed(i)(j) := rr_read_fire && read_error_delayed_result(rr_div_addr)(rr_bank_addr(j))(rr_way_addr) && !RegNext(io.bank_conflict_slow(i)) 511 }) 512 }) 513 514 // readline port 515 (0 until DCacheBanks).map(i => { 516 io.readline_resp(i) := read_result(RegEnable(line_div_addr, io.readline.valid))(i)(RegEnable(OHToUInt(io.readline.bits.way_en),io.readline.valid)) 517 }) 518 io.readline_error_delayed := RegNext(RegNext(io.readline.fire)) && 519 VecInit((0 until DCacheBanks).map(i => io.readline_resp(i).error_delayed)).asUInt.orR 520 521 // write data_banks & ecc_banks 522 for (div_index <- 0 until DCacheSetDiv) { 523 for (bank_index <- 0 until DCacheBanks) { 524 for (way_index <- 0 until DCacheWays) { 525 // data write 526 val wen_reg = write_bank_mask_reg(bank_index) && 527 write_valid_dup_reg(bank_index) && 528 write_div_addr_dup_reg(bank_index) === div_index.U && 529 write_wayen_dup_reg(bank_index)(way_index) 530 val write_ecc_reg = RegEnable(getECCFromEncWord(cacheParams.dataCode.encode(io.write.bits.data(bank_index))), io.write.valid) 531 val data_bank = data_banks(div_index)(bank_index)(way_index) 532 data_bank.io.w.en := wen_reg 533 data_bank.io.w.addr := write_set_addr_dup_reg(bank_index) 534 data_bank.io.w.data := asECCData(write_ecc_reg, write_data_reg(bank_index)) 535 } 536 } 537 } 538 539 io.cacheOp.resp.valid := false.B 540 io.cacheOp.resp.bits := DontCare 541 542 val tableName = "BankConflict" + p(XSCoreParamsKey).HartId.toString 543 val siteName = "BankedDataArray" + p(XSCoreParamsKey).HartId.toString 544 val bankConflictTable = ChiselDB.createTable(tableName, new BankConflictDB) 545 val bankConflictData = Wire(new BankConflictDB) 546 for (i <- 0 until LoadPipelineWidth) { 547 bankConflictData.set_index(i) := set_addrs(i) 548 bankConflictData.addr(i) := io.read(i).bits.addr 549 } 550 551 // FIXME: rr_bank_conflict(0)(1) no generalization 552 when(rr_bank_conflict(0)(1)) { 553 (0 until (VLEN/DCacheSRAMRowBits)).map(i => { 554 bankConflictData.bank_index(i) := bank_addrs(0)(i) 555 }) 556 bankConflictData.way_index := OHToUInt(way_en(0)) 557 bankConflictData.fake_rr_bank_conflict := set_addrs(0) === set_addrs(1) && div_addrs(0) === div_addrs(1) 558 }.otherwise { 559 (0 until (VLEN/DCacheSRAMRowBits)).map(i => { 560 bankConflictData.bank_index(i) := 0.U 561 }) 562 bankConflictData.way_index := 0.U 563 bankConflictData.fake_rr_bank_conflict := false.B 564 } 565 566 val isWriteBankConflictTable = Constantin.createRecord(s"isWriteBankConflictTable${p(XSCoreParamsKey).HartId}") 567 bankConflictTable.log( 568 data = bankConflictData, 569 en = isWriteBankConflictTable.orR && rr_bank_conflict(0)(1), 570 site = siteName, 571 clock = clock, 572 reset = reset 573 ) 574 575 (1 until LoadPipelineWidth).foreach(y => (0 until y).foreach(x => 576 XSPerfAccumulate(s"data_array_fake_rr_bank_conflict_${x}_${y}", rr_bank_conflict(x)(y) && set_addrs(x)===set_addrs(y) && div_addrs(x) === div_addrs(y)) 577 )) 578 579} 580 581// the smallest access unit is bank 582class BankedDataArray(implicit p: Parameters) extends AbstractBankedDataArray { 583 println(" DCacheType: BankedDataArray") 584 val ReduceReadlineConflict = false 585 586 io.write.ready := true.B 587 io.write_dup.foreach(_.ready := true.B) 588 589 val data_banks = List.fill(DCacheSetDiv)(List.tabulate(DCacheBanks)(i => Module(new DataSRAMBank(i)))) 590 data_banks.map(_.map(_.dump())) 591 592 val way_en = Wire(Vec(LoadPipelineWidth, io.read(0).bits.way_en.cloneType)) 593 val set_addrs = Wire(Vec(LoadPipelineWidth, UInt())) 594 val div_addrs = Wire(Vec(LoadPipelineWidth, UInt())) 595 val bank_addrs = Wire(Vec(LoadPipelineWidth, Vec(VLEN/DCacheSRAMRowBits, UInt()))) 596 val way_en_reg = Wire(Vec(LoadPipelineWidth, io.read(0).bits.way_en.cloneType)) 597 val set_addrs_reg = Wire(Vec(LoadPipelineWidth, UInt())) 598 599 val line_set_addr = addr_to_dcache_div_set(io.readline.bits.addr) 600 val line_div_addr = addr_to_dcache_div(io.readline.bits.addr) 601 val line_way_en = io.readline.bits.way_en 602 603 val write_bank_mask_reg = RegEnable(io.write.bits.wmask, io.write.valid) 604 val write_data_reg = RegEnable(io.write.bits.data, io.write.valid) 605 val write_valid_reg = RegNext(io.write.valid) 606 val write_valid_dup_reg = io.write_dup.map(x => RegNext(x.valid)) 607 val write_wayen_dup_reg = io.write_dup.map(x => RegEnable(x.bits.way_en, x.valid)) 608 val write_set_addr_dup_reg = io.write_dup.map(x => RegEnable(addr_to_dcache_div_set(x.bits.addr), x.valid)) 609 val write_div_addr_dup_reg = io.write_dup.map(x => RegEnable(addr_to_dcache_div(x.bits.addr), x.valid)) 610 611 // read data_banks and ecc_banks 612 // for single port SRAM, do not allow read and write in the same cycle 613 val rwhazard = RegNext(io.write.valid) 614 val rrhazard = false.B // io.readline.valid 615 (0 until LoadPipelineWidth).map(rport_index => { 616 div_addrs(rport_index) := addr_to_dcache_div(io.read(rport_index).bits.addr) 617 bank_addrs(rport_index)(0) := addr_to_dcache_bank(io.read(rport_index).bits.addr) 618 bank_addrs(rport_index)(1) := Mux(io.is128Req(rport_index), bank_addrs(rport_index)(0) + 1.U, bank_addrs(rport_index)(0)) 619 set_addrs(rport_index) := addr_to_dcache_div_set(io.read(rport_index).bits.addr) 620 set_addrs_reg(rport_index) := RegEnable(addr_to_dcache_div_set(io.read(rport_index).bits.addr), io.read(rport_index).valid) 621 622 // use way_en to select a way after data read out 623 assert(!(RegNext(io.read(rport_index).fire && PopCount(io.read(rport_index).bits.way_en) > 1.U))) 624 way_en(rport_index) := io.read(rport_index).bits.way_en 625 way_en_reg(rport_index) := RegEnable(io.read(rport_index).bits.way_en, io.read(rport_index).valid) 626 }) 627 628 // read each bank, get bank result 629 val rr_bank_conflict = Seq.tabulate(LoadPipelineWidth)(x => Seq.tabulate(LoadPipelineWidth)(y => 630 io.read(x).valid && io.read(y).valid && 631 div_addrs(x) === div_addrs(y) && 632 (io.read(x).bits.bankMask & io.read(y).bits.bankMask) =/= 0.U 633 )) 634 val rrl_bank_conflict = Wire(Vec(LoadPipelineWidth, Bool())) 635 val rrl_bank_conflict_intend = Wire(Vec(LoadPipelineWidth, Bool())) 636 (0 until LoadPipelineWidth).foreach { i => 637 val judge = if (ReduceReadlineConflict) io.read(i).valid && (io.readline.bits.rmask & io.read(i).bits.bankMask) =/= 0.U && div_addrs(i) === line_div_addr 638 else io.read(i).valid && div_addrs(i)===line_div_addr 639 rrl_bank_conflict(i) := judge && io.readline.valid 640 rrl_bank_conflict_intend(i) := judge && io.readline_intend 641 } 642 val wr_bank_conflict = Seq.tabulate(LoadPipelineWidth)(x => 643 io.read(x).valid && 644 write_valid_reg && 645 div_addrs(x) === write_div_addr_dup_reg.head && 646 (write_bank_mask_reg(bank_addrs(x)(0)) || write_bank_mask_reg(bank_addrs(x)(1)) && io.is128Req(x)) 647 ) 648 val wrl_bank_conflict = io.readline.valid && write_valid_reg && line_div_addr === write_div_addr_dup_reg.head 649 // ready 650 io.readline.ready := !(wrl_bank_conflict) 651 io.read.zipWithIndex.map{case(x, i) => x.ready := !(wr_bank_conflict(i) || rrhazard)} 652 653 val perf_multi_read = PopCount(io.read.map(_.valid)) >= 2.U 654 (0 until LoadPipelineWidth).foreach(i => { 655 // remove fake rr_bank_conflict situation in s2 656 val real_other_bank_conflict_reg = RegNext(wr_bank_conflict(i) || rrl_bank_conflict(i)) 657 val real_rr_bank_conflict_reg = (if (i == 0) 0.B else (0 until i).map{ j => 658 RegNext(rr_bank_conflict(j)(i)) && (set_addrs_reg(j) =/= set_addrs_reg(i)) 659 }.reduce(_ || _)) 660 io.bank_conflict_slow(i) := real_other_bank_conflict_reg || real_rr_bank_conflict_reg 661 662 // get result in s1 663 io.disable_ld_fast_wakeup(i) := wr_bank_conflict(i) || rrl_bank_conflict_intend(i) || 664 (if (i == 0) 0.B else (0 until i).map(rr_bank_conflict(_)(i)).reduce(_ || _)) 665 }) 666 XSPerfAccumulate("data_array_multi_read", perf_multi_read) 667 (1 until LoadPipelineWidth).foreach(y => (0 until y).foreach(x => 668 XSPerfAccumulate(s"data_array_rr_bank_conflict_${x}_${y}", rr_bank_conflict(x)(y)) 669 )) 670 (0 until LoadPipelineWidth).foreach(i => { 671 XSPerfAccumulate(s"data_array_rrl_bank_conflict_${i}", rrl_bank_conflict(i)) 672 XSPerfAccumulate(s"data_array_rw_bank_conflict_${i}", wr_bank_conflict(i)) 673 XSPerfAccumulate(s"data_array_read_${i}", io.read(i).valid) 674 }) 675 XSPerfAccumulate("data_array_access_total", PopCount(io.read.map(_.valid))) 676 XSPerfAccumulate("data_array_read_line", io.readline.valid) 677 XSPerfAccumulate("data_array_write", io.write.valid) 678 679 val bank_result = Wire(Vec(DCacheSetDiv, Vec(DCacheBanks, Vec(DCacheWays, new L1BankedDataReadResult())))) 680 val bank_result_delayed = Wire(Vec(DCacheSetDiv, Vec(DCacheBanks, Vec(DCacheWays, new L1BankedDataReadResult())))) 681 val read_bank_error_delayed = Wire(Vec(DCacheSetDiv, Vec(DCacheBanks, Vec(DCacheWays, Bool())))) 682 dontTouch(bank_result) 683 dontTouch(read_bank_error_delayed) 684 for (div_index <- 0 until DCacheSetDiv) { 685 for (bank_index <- 0 until DCacheBanks) { 686 // Set Addr & Read Way Mask 687 // 688 // Pipe 0 .... Pipe (n-1) 689 // + .... + 690 // | .... | 691 // +----+---------------+-----+ 692 // X X 693 // X +------+ Bank Addr Match 694 // +---------+----------+ 695 // | 696 // +--------+--------+ 697 // | Data Bank | 698 // +-----------------+ 699 val bank_addr_matchs = WireInit(VecInit(List.tabulate(LoadPipelineWidth)(i => { 700 io.read(i).valid && div_addrs(i) === div_index.U && (bank_addrs(i)(0) === bank_index.U || bank_addrs(i)(1) === bank_index.U && io.is128Req(i)) 701 }))) 702 val readline_match = Wire(Bool()) 703 if (ReduceReadlineConflict) { 704 readline_match := io.readline.valid && io.readline.bits.rmask(bank_index) && line_div_addr === div_index.U 705 } else { 706 readline_match := io.readline.valid && line_div_addr === div_index.U 707 } 708 709 val bank_set_addr = Mux(readline_match, 710 line_set_addr, 711 PriorityMux(Seq.tabulate(LoadPipelineWidth)(i => bank_addr_matchs(i) -> set_addrs(i))) 712 ) 713 val read_enable = bank_addr_matchs.asUInt.orR || readline_match 714 715 // read raw data 716 val data_bank = data_banks(div_index)(bank_index) 717 data_bank.io.r.en := read_enable 718 data_bank.io.r.addr := bank_set_addr 719 for (way_index <- 0 until DCacheWays) { 720 bank_result(div_index)(bank_index)(way_index).ecc := getECCFromEncWord(data_bank.io.r.data(way_index)) 721 bank_result(div_index)(bank_index)(way_index).raw_data := getDataFromEncWord(data_bank.io.r.data(way_index)) 722 723 if (EnableDataEcc) { 724 val ecc_data = bank_result(div_index)(bank_index)(way_index).asECCData() 725 val ecc_data_delayed = RegEnable(ecc_data, RegNext(read_enable)) 726 bank_result(div_index)(bank_index)(way_index).error_delayed := dcacheParameters.dataCode.decode(ecc_data_delayed).error 727 read_bank_error_delayed(div_index)(bank_index)(way_index) := bank_result(div_index)(bank_index)(way_index).error_delayed 728 } else { 729 bank_result(div_index)(bank_index)(way_index).error_delayed := false.B 730 read_bank_error_delayed(div_index)(bank_index)(way_index) := false.B 731 } 732 bank_result_delayed(div_index)(bank_index)(way_index) := RegEnable(bank_result(div_index)(bank_index)(way_index), RegNext(read_enable)) 733 } 734 } 735 } 736 737 val data_read_oh = WireInit(VecInit(Seq.fill(DCacheSetDiv)(0.U(XLEN.W)))) 738 for (div_index <- 0 until DCacheSetDiv){ 739 val temp = WireInit(VecInit(Seq.fill(DCacheBanks)(0.U(XLEN.W)))) 740 for (bank_index <- 0 until DCacheBanks) { 741 temp(bank_index) := PopCount(Fill(DCacheWays, data_banks(div_index)(bank_index).io.r.en.asUInt)) 742 } 743 data_read_oh(div_index) := temp.reduce(_ + _) 744 } 745 XSPerfAccumulate("data_read_counter", data_read_oh.foldLeft(0.U)(_ + _)) 746 747 (0 until LoadPipelineWidth).map(i => { 748 // 1 cycle after read fire(load s2) 749 val r_read_fire = RegNext(io.read(i).fire) 750 val r_div_addr = RegEnable(div_addrs(i), io.read(i).fire) 751 val r_bank_addr = RegEnable(bank_addrs(i), io.read(i).fire) 752 val r_way_addr = RegEnable(OHToUInt(way_en(i)), io.read(i).fire) 753 // 2 cycles after read fire(load s3) 754 val rr_read_fire = RegNext(r_read_fire) 755 val rr_div_addr = RegEnable(RegEnable(div_addrs(i), io.read(i).fire), r_read_fire) 756 val rr_bank_addr = RegEnable(RegEnable(bank_addrs(i), io.read(i).fire), r_read_fire) 757 val rr_way_addr = RegEnable(RegEnable(OHToUInt(way_en(i)), io.read(i).fire), r_read_fire) 758 (0 until VLEN/DCacheSRAMRowBits).map( j =>{ 759 io.read_resp(i)(j) := bank_result(r_div_addr)(r_bank_addr(j))(r_way_addr) 760 // error detection 761 io.read_error_delayed(i)(j) := rr_read_fire && read_bank_error_delayed(rr_div_addr)(rr_bank_addr(j))(rr_way_addr) && !RegNext(io.bank_conflict_slow(i)) 762 }) 763 }) 764 765 // read result: expose banked read result 766 (0 until DCacheBanks).map(i => { 767 io.readline_resp(i) := bank_result(RegEnable(line_div_addr, io.readline.valid))(i)(RegEnable(OHToUInt(io.readline.bits.way_en), io.readline.valid)) 768 }) 769 io.readline_error_delayed := RegNext(RegNext(io.readline.fire)) && 770 VecInit((0 until DCacheBanks).map(i => io.readline_resp(i).error_delayed)).asUInt.orR 771 772 // write data_banks & ecc_banks 773 for (div_index <- 0 until DCacheSetDiv) { 774 for (bank_index <- 0 until DCacheBanks) { 775 // data write 776 val wen_reg = write_bank_mask_reg(bank_index) && 777 write_valid_dup_reg(bank_index) && 778 write_div_addr_dup_reg(bank_index) === div_index.U && RegNext(io.write.valid) 779 val write_ecc_reg = RegEnable(getECCFromEncWord(cacheParams.dataCode.encode(io.write.bits.data(bank_index))), io.write.valid) 780 val data_bank = data_banks(div_index)(bank_index) 781 data_bank.io.w.en := wen_reg 782 data_bank.io.w.way_en := write_wayen_dup_reg(bank_index) 783 data_bank.io.w.addr := write_set_addr_dup_reg(bank_index) 784 data_bank.io.w.data := asECCData(write_ecc_reg, write_data_reg(bank_index)) 785 } 786 } 787 788 io.cacheOp.resp.valid := false.B 789 io.cacheOp.resp.bits := DontCare 790 791 val tableName = "BankConflict" + p(XSCoreParamsKey).HartId.toString 792 val siteName = "BankedDataArray" + p(XSCoreParamsKey).HartId.toString 793 val bankConflictTable = ChiselDB.createTable(tableName, new BankConflictDB) 794 val bankConflictData = Wire(new BankConflictDB) 795 for (i <- 0 until LoadPipelineWidth) { 796 bankConflictData.set_index(i) := set_addrs(i) 797 bankConflictData.addr(i) := io.read(i).bits.addr 798 } 799 800 // FIXME: rr_bank_conflict(0)(1) no generalization 801 when(rr_bank_conflict(0)(1)) { 802 (0 until (VLEN/DCacheSRAMRowBits)).map(i => { 803 bankConflictData.bank_index(i) := bank_addrs(0)(i) 804 }) 805 bankConflictData.way_index := OHToUInt(way_en(0)) 806 bankConflictData.fake_rr_bank_conflict := set_addrs(0) === set_addrs(1) && div_addrs(0) === div_addrs(1) 807 }.otherwise { 808 (0 until (VLEN/DCacheSRAMRowBits)).map(i => { 809 bankConflictData.bank_index(i) := 0.U 810 }) 811 bankConflictData.way_index := 0.U 812 bankConflictData.fake_rr_bank_conflict := false.B 813 } 814 815 val isWriteBankConflictTable = Constantin.createRecord(s"isWriteBankConflictTable${p(XSCoreParamsKey).HartId}") 816 bankConflictTable.log( 817 data = bankConflictData, 818 en = isWriteBankConflictTable.orR && rr_bank_conflict(0)(1), 819 site = siteName, 820 clock = clock, 821 reset = reset 822 ) 823 824 (1 until LoadPipelineWidth).foreach(y => (0 until y).foreach(x => 825 XSPerfAccumulate(s"data_array_fake_rr_bank_conflict_${x}_${y}", rr_bank_conflict(x)(y) && set_addrs(x) === set_addrs(y) && div_addrs(x) === div_addrs(y)) 826 )) 827 828} 829