1/*************************************************************************************** 2* Copyright (c) 2024 Beijing Institute of Open Source Chip (BOSC) 3* Copyright (c) 2020-2024 Institute of Computing Technology, Chinese Academy of Sciences 4* Copyright (c) 2020-2021 Peng Cheng Laboratory 5* 6* XiangShan is licensed under Mulan PSL v2. 7* You can use this software according to the terms and conditions of the Mulan PSL v2. 8* You may obtain a copy of Mulan PSL v2 at: 9* http://license.coscl.org.cn/MulanPSL2 10* 11* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 12* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 13* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 14* 15* See the Mulan PSL v2 for more details. 16* 17* 18* Acknowledgement 19* 20* This implementation is inspired by several key papers: 21* [1] Gurindar S. Sohi, and Manoj Franklin. "[High-bandwidth data memory systems for superscalar processors.] 22* (https://doi.org/10.1145/106972.106980)" 4th International Conference on Architectural Support for Programming 23* Languages and Operating Systems (ASPLOS). 1991. 24***************************************************************************************/ 25 26package xiangshan.cache 27 28import org.chipsalliance.cde.config.Parameters 29import chisel3._ 30import utils._ 31import utility._ 32import chisel3.util._ 33import freechips.rocketchip.tilelink.{ClientMetadata, TLClientParameters, TLEdgeOut} 34import xiangshan.{L1CacheErrorInfo, XSCoreParamsKey} 35 36import scala.math.max 37 38class BankConflictDB(implicit p: Parameters) extends DCacheBundle{ 39 val addr = Vec(LoadPipelineWidth, Bits(PAddrBits.W)) 40 val set_index = Vec(LoadPipelineWidth, UInt((DCacheAboveIndexOffset - DCacheSetOffset).W)) 41 val bank_index = Vec(VLEN/DCacheSRAMRowBits, UInt((DCacheSetOffset - DCacheBankOffset).W)) 42 val way_index = UInt(wayBits.W) 43 val fake_rr_bank_conflict = Bool() 44} 45 46class L1BankedDataReadReq(implicit p: Parameters) extends DCacheBundle 47{ 48 val way_en = Bits(DCacheWays.W) 49 val addr = Bits(PAddrBits.W) 50} 51 52class L1BankedDataReadReqWithMask(implicit p: Parameters) extends DCacheBundle 53{ 54 val way_en = Bits(DCacheWays.W) 55 val addr = Bits(PAddrBits.W) 56 val bankMask = Bits(DCacheBanks.W) 57 val kill = Bool() 58} 59 60class L1BankedDataReadLineReq(implicit p: Parameters) extends L1BankedDataReadReq 61{ 62 val rmask = Bits(DCacheBanks.W) 63} 64 65// Now, we can write a cache-block in a single cycle 66class L1BankedDataWriteReq(implicit p: Parameters) extends L1BankedDataReadReq 67{ 68 val wmask = Bits(DCacheBanks.W) 69 val data = Vec(DCacheBanks, Bits(DCacheSRAMRowBits.W)) 70} 71 72// cache-block write request without data 73class L1BankedDataWriteReqCtrl(implicit p: Parameters) extends L1BankedDataReadReq 74 75class L1BankedDataReadResult(implicit p: Parameters) extends DCacheBundle 76{ 77 // you can choose which bank to read to save power 78 val ecc = Bits(eccBits.W) 79 val raw_data = Bits(DCacheSRAMRowBits.W) 80 val error_delayed = Bool() // 1 cycle later than data resp 81 82 def asECCData() = { 83 Cat(ecc, raw_data) 84 } 85} 86 87class DataSRAMBankWriteReq(implicit p: Parameters) extends DCacheBundle { 88 val en = Bool() 89 val addr = UInt() 90 val way_en = UInt(DCacheWays.W) 91 val data = UInt(DCacheSRAMRowBits.W) 92} 93 94// wrap a sram 95class DataSRAM(bankIdx: Int, wayIdx: Int)(implicit p: Parameters) extends DCacheModule { 96 val io = IO(new Bundle() { 97 val w = new Bundle() { 98 val en = Input(Bool()) 99 val addr = Input(UInt()) 100 val data = Input(UInt(DCacheSRAMRowBits.W)) 101 } 102 103 val r = new Bundle() { 104 val en = Input(Bool()) 105 val addr = Input(UInt()) 106 val data = Output(UInt(DCacheSRAMRowBits.W)) 107 } 108 }) 109 110 // data sram 111 val data_sram = Module(new SRAMTemplate( 112 Bits(DCacheSRAMRowBits.W), 113 set = DCacheSets / DCacheSetDiv, 114 way = 1, 115 shouldReset = false, 116 holdRead = false, 117 singlePort = true 118 )) 119 120 data_sram.io.w.req.valid := io.w.en 121 data_sram.io.w.req.bits.apply( 122 setIdx = io.w.addr, 123 data = io.w.data, 124 waymask = 1.U 125 ) 126 data_sram.io.r.req.valid := io.r.en 127 data_sram.io.r.req.bits.apply(setIdx = io.r.addr) 128 io.r.data := data_sram.io.r.resp.data(0) 129 XSPerfAccumulate("part_data_read_counter", data_sram.io.r.req.valid) 130 131 def dump_r() = { 132 when(RegNext(io.r.en)) { 133 XSDebug("bank read set %x bank %x way %x data %x\n", 134 RegEnable(io.r.addr, io.r.en), 135 bankIdx.U, 136 wayIdx.U, 137 io.r.data 138 ) 139 } 140 } 141 142 def dump_w() = { 143 when(io.w.en) { 144 XSDebug("bank write set %x bank %x way %x data %x\n", 145 io.w.addr, 146 bankIdx.U, 147 wayIdx.U, 148 io.w.data 149 ) 150 } 151 } 152 153 def dump() = { 154 dump_w() 155 dump_r() 156 } 157} 158 159// wrap data rows of 8 ways 160class DataSRAMBank(index: Int)(implicit p: Parameters) extends DCacheModule { 161 val io = IO(new Bundle() { 162 val w = Input(new DataSRAMBankWriteReq) 163 164 val r = new Bundle() { 165 val en = Input(Bool()) 166 val addr = Input(UInt()) 167 val data = Output(Vec(DCacheWays, UInt(DCacheSRAMRowBits.W))) 168 } 169 }) 170 171 assert(RegNext(!io.w.en || PopCount(io.w.way_en) <= 1.U)) 172 173 // external controls do not read and write at the same time 174 val w_info = io.w 175 // val rw_bypass = RegNext(io.w.addr === io.r.addr && io.w.way_en === io.r.way_en && io.w.en) 176 177 // multiway data bank 178 val data_bank = Seq.fill(DCacheWays) { 179 Module(new SRAMTemplate( 180 Bits(DCacheSRAMRowBits.W), 181 set = DCacheSets / DCacheSetDiv, 182 way = 1, 183 shouldReset = false, 184 holdRead = false, 185 singlePort = true 186 )) 187 } 188 189 for (w <- 0 until DCacheWays) { 190 val wen = w_info.en && w_info.way_en(w) 191 data_bank(w).io.w.req.valid := wen 192 data_bank(w).io.w.req.bits.apply( 193 setIdx = w_info.addr, 194 data = w_info.data, 195 waymask = 1.U 196 ) 197 data_bank(w).io.r.req.valid := io.r.en 198 data_bank(w).io.r.req.bits.apply(setIdx = io.r.addr) 199 data_bank(w).clock := ClockGate(false.B, io.r.en | (io.w.en & io.w.way_en(w)), clock) 200 } 201 XSPerfAccumulate("part_data_read_counter", PopCount(Cat(data_bank.map(_.io.r.req.valid)))) 202 203 io.r.data := data_bank.map(_.io.r.resp.data(0)) 204 205 def dump_r() = { 206 when(RegNext(io.r.en)) { 207 XSDebug("bank read addr %x data %x\n", 208 RegEnable(io.r.addr, io.r.en), 209 io.r.data.asUInt 210 ) 211 } 212 } 213 214 def dump_w() = { 215 when(io.w.en) { 216 XSDebug("bank write addr %x way_en %x data %x\n", 217 io.w.addr, 218 io.w.way_en, 219 io.w.data 220 ) 221 } 222 } 223 224 def dump() = { 225 dump_w() 226 dump_r() 227 } 228} 229 230case object HasDataEccParam 231 232// Banked DCache Data 233// ----------------------------------------------------------------- 234// | Bank0 | Bank1 | Bank2 | Bank3 | Bank4 | Bank5 | Bank6 | Bank7 | 235// ----------------------------------------------------------------- 236// | Way0 | Way0 | Way0 | Way0 | Way0 | Way0 | Way0 | Way0 | 237// | Way1 | Way1 | Way1 | Way1 | Way1 | Way1 | Way1 | Way1 | 238// | .... | .... | .... | .... | .... | .... | .... | .... | 239// ----------------------------------------------------------------- 240abstract class AbstractBankedDataArray(implicit p: Parameters) extends DCacheModule 241{ 242 val DataEccParam = if(EnableDataEcc) Some(HasDataEccParam) else None 243 val ReadlinePortErrorIndex = LoadPipelineWidth 244 val io = IO(new DCacheBundle { 245 // load pipeline read word req 246 val read = Vec(LoadPipelineWidth, Flipped(DecoupledIO(new L1BankedDataReadReqWithMask))) 247 val is128Req = Input(Vec(LoadPipelineWidth, Bool())) 248 // main pipeline read / write line req 249 val readline_intend = Input(Bool()) 250 val readline = Flipped(DecoupledIO(new L1BankedDataReadLineReq)) 251 val write = Flipped(DecoupledIO(new L1BankedDataWriteReq)) 252 val write_dup = Vec(DCacheBanks, Flipped(Decoupled(new L1BankedDataWriteReqCtrl))) 253 // data for readline and loadpipe 254 val readline_resp = Output(Vec(DCacheBanks, new L1BankedDataReadResult())) 255 val readline_error_delayed = Output(Bool()) 256 val read_resp = Output(Vec(LoadPipelineWidth, Vec(VLEN/DCacheSRAMRowBits, new L1BankedDataReadResult()))) 257 val read_error_delayed = Output(Vec(LoadPipelineWidth,Vec(VLEN/DCacheSRAMRowBits, Bool()))) 258 // val nacks = Output(Vec(LoadPipelineWidth, Bool())) 259 // val errors = Output(Vec(LoadPipelineWidth + 1, ValidIO(new L1CacheErrorInfo))) // read ports + readline port 260 // when bank_conflict, read (1) port should be ignored 261 val bank_conflict_slow = Output(Vec(LoadPipelineWidth, Bool())) 262 val disable_ld_fast_wakeup = Output(Vec(LoadPipelineWidth, Bool())) 263 // customized cache op port 264 val cacheOp = Flipped(new L1CacheInnerOpIO) 265 val cacheOp_req_dup = Vec(DCacheDupNum, Flipped(Valid(new CacheCtrlReqInfo))) 266 val cacheOp_req_bits_opCode_dup = Input(Vec(DCacheDupNum, UInt(XLEN.W))) 267 }) 268 269 def pipeMap[T <: Data](f: Int => T) = VecInit((0 until LoadPipelineWidth).map(f)) 270 271 def getECCFromEncWord(encWord: UInt) = { 272 require(encWord.getWidth == encWordBits) 273 encWord(encWordBits - 1, wordBits) 274 } 275 276 def dumpRead = { 277 (0 until LoadPipelineWidth) map { w => 278 when(io.read(w).valid) { 279 XSDebug(s"DataArray Read channel: $w valid way_en: %x addr: %x\n", 280 io.read(w).bits.way_en, io.read(w).bits.addr) 281 } 282 } 283 when(io.readline.valid) { 284 XSDebug(s"DataArray Read Line, valid way_en: %x addr: %x rmask %x\n", 285 io.readline.bits.way_en, io.readline.bits.addr, io.readline.bits.rmask) 286 } 287 } 288 289 def dumpWrite = { 290 when(io.write.valid) { 291 XSDebug(s"DataArray Write valid way_en: %x addr: %x\n", 292 io.write.bits.way_en, io.write.bits.addr) 293 294 (0 until DCacheBanks) map { r => 295 XSDebug(s"cycle: $r data: %x wmask: %x\n", 296 io.write.bits.data(r), io.write.bits.wmask(r)) 297 } 298 } 299 } 300 301 def dumpResp = { 302 XSDebug(s"DataArray ReadeResp channel:\n") 303 (0 until LoadPipelineWidth) map { r => 304 XSDebug(s"cycle: $r data: %x\n", Mux(io.is128Req(r), 305 Cat(io.read_resp(r)(1).raw_data,io.read_resp(r)(0).raw_data), 306 io.read_resp(r)(0).raw_data)) 307 } 308 } 309 310 def dump() = { 311 dumpRead 312 dumpWrite 313 dumpResp 314 } 315} 316 317// the smallest access unit is sram 318class SramedDataArray(implicit p: Parameters) extends AbstractBankedDataArray { 319 println(" DCacheType: SramedDataArray") 320 val ReduceReadlineConflict = false 321 322 io.write.ready := true.B 323 io.write_dup.foreach(_.ready := true.B) 324 325 val data_banks = List.tabulate(DCacheSetDiv)( k => List.tabulate(DCacheBanks)(i => List.tabulate(DCacheWays)(j => Module(new DataSRAM(i,j))))) 326 // ecc_banks also needs to be changed to two-dimensional to align with data_banks 327 val ecc_banks = DataEccParam.map { 328 case _ => 329 val ecc = List.tabulate(DCacheSetDiv)( k => 330 List.tabulate(DCacheWays)(j => 331 List.tabulate(DCacheBanks)(i => 332 Module(new SRAMTemplate( 333 Bits(eccBits.W), 334 set = DCacheSets / DCacheSetDiv, 335 way = 1, 336 shouldReset = false, 337 holdRead = false, 338 singlePort = true 339 )) 340 ))) 341 ecc 342 } 343 344 data_banks.map(_.map(_.map(_.dump()))) 345 346 val way_en = Wire(Vec(LoadPipelineWidth, io.read(0).bits.way_en.cloneType)) 347 val set_addrs = Wire(Vec(LoadPipelineWidth, UInt())) 348 val div_addrs = Wire(Vec(LoadPipelineWidth, UInt())) 349 val bank_addrs = Wire(Vec(LoadPipelineWidth, Vec(VLEN/DCacheSRAMRowBits, UInt()))) 350 351 val line_set_addr = addr_to_dcache_div_set(io.readline.bits.addr) 352 val line_div_addr = addr_to_dcache_div(io.readline.bits.addr) 353 // when WPU is enabled, line_way_en is all enabled when read data 354 val line_way_en = Fill(DCacheWays, 1.U) // val line_way_en = io.readline.bits.way_en 355 val line_way_en_reg = RegEnable(io.readline.bits.way_en, 0.U(DCacheWays.W),io.readline.valid) 356 357 val write_bank_mask_reg = RegEnable(io.write.bits.wmask, 0.U(DCacheBanks.W), io.write.valid) 358 val write_data_reg = RegEnable(io.write.bits.data, io.write.valid) 359 val write_valid_reg = RegNext(io.write.valid) 360 val write_valid_dup_reg = io.write_dup.map(x => RegNext(x.valid)) 361 val write_wayen_dup_reg = io.write_dup.map(x => RegEnable(x.bits.way_en, 0.U(DCacheWays.W), x.valid)) 362 val write_set_addr_dup_reg = io.write_dup.map(x => RegEnable(addr_to_dcache_div_set(x.bits.addr), x.valid)) 363 val write_div_addr_dup_reg = io.write_dup.map(x => RegEnable(addr_to_dcache_div(x.bits.addr), x.valid)) 364 365 // read data_banks and ecc_banks 366 // for single port SRAM, do not allow read and write in the same cycle 367 val rrhazard = false.B // io.readline.valid 368 (0 until LoadPipelineWidth).map(rport_index => { 369 div_addrs(rport_index) := addr_to_dcache_div(io.read(rport_index).bits.addr) 370 set_addrs(rport_index) := addr_to_dcache_div_set(io.read(rport_index).bits.addr) 371 bank_addrs(rport_index)(0) := addr_to_dcache_bank(io.read(rport_index).bits.addr) 372 bank_addrs(rport_index)(1) := bank_addrs(rport_index)(0) + 1.U 373 374 // use way_en to select a way after data read out 375 assert(!(RegNext(io.read(rport_index).fire && PopCount(io.read(rport_index).bits.way_en) > 1.U))) 376 way_en(rport_index) := io.read(rport_index).bits.way_en 377 }) 378 379 // read conflict 380 val rr_bank_conflict = Seq.tabulate(LoadPipelineWidth)(x => Seq.tabulate(LoadPipelineWidth)(y => 381 io.read(x).valid && io.read(y).valid && 382 div_addrs(x) === div_addrs(y) && 383 (io.read(x).bits.bankMask & io.read(y).bits.bankMask) =/= 0.U && 384 io.read(x).bits.way_en === io.read(y).bits.way_en && 385 set_addrs(x) =/= set_addrs(y) 386 )) 387 val rrl_bank_conflict = Wire(Vec(LoadPipelineWidth, Bool())) 388 val rrl_bank_conflict_intend = Wire(Vec(LoadPipelineWidth, Bool())) 389 (0 until LoadPipelineWidth).foreach { i => 390 val judge = if (ReduceReadlineConflict) io.read(i).valid && (io.readline.bits.rmask & io.read(i).bits.bankMask) =/= 0.U && line_div_addr === div_addrs(i) && line_set_addr =/= set_addrs(i) 391 else io.read(i).valid && line_div_addr === div_addrs(i) && line_set_addr =/= set_addrs(i) 392 rrl_bank_conflict(i) := judge && io.readline.valid 393 rrl_bank_conflict_intend(i) := judge && io.readline_intend 394 } 395 val wr_bank_conflict = Seq.tabulate(LoadPipelineWidth)(x => 396 io.read(x).valid && write_valid_reg && 397 div_addrs(x) === write_div_addr_dup_reg.head && 398 way_en(x) === write_wayen_dup_reg.head && 399 (write_bank_mask_reg(bank_addrs(x)(0)) || write_bank_mask_reg(bank_addrs(x)(1)) && io.is128Req(x)) 400 ) 401 val wrl_bank_conflict = io.readline.valid && write_valid_reg && line_div_addr === write_div_addr_dup_reg.head 402 // ready 403 io.readline.ready := !(wrl_bank_conflict) 404 io.read.zipWithIndex.map { case (x, i) => x.ready := !(wr_bank_conflict(i) || rrhazard) } 405 406 val perf_multi_read = PopCount(io.read.map(_.valid)) >= 2.U 407 val bank_conflict_fast = Wire(Vec(LoadPipelineWidth, Bool())) 408 (0 until LoadPipelineWidth).foreach(i => { 409 bank_conflict_fast(i) := wr_bank_conflict(i) || rrl_bank_conflict(i) || 410 (if (i == 0) 0.B else (0 until i).map(rr_bank_conflict(_)(i)).reduce(_ || _)) 411 io.bank_conflict_slow(i) := RegNext(bank_conflict_fast(i)) 412 io.disable_ld_fast_wakeup(i) := wr_bank_conflict(i) || rrl_bank_conflict_intend(i) || 413 (if (i == 0) 0.B else (0 until i).map(rr_bank_conflict(_)(i)).reduce(_ || _)) 414 }) 415 XSPerfAccumulate("data_array_multi_read", perf_multi_read) 416 (1 until LoadPipelineWidth).foreach(y => (0 until y).foreach(x => 417 XSPerfAccumulate(s"data_array_rr_bank_conflict_${x}_${y}", rr_bank_conflict(x)(y)) 418 )) 419 (0 until LoadPipelineWidth).foreach(i => { 420 XSPerfAccumulate(s"data_array_rrl_bank_conflict_${i}", rrl_bank_conflict(i)) 421 XSPerfAccumulate(s"data_array_rw_bank_conflict_${i}", wr_bank_conflict(i)) 422 XSPerfAccumulate(s"data_array_read_${i}", io.read(i).valid) 423 }) 424 XSPerfAccumulate("data_array_access_total", PopCount(io.read.map(_.valid))) 425 XSPerfAccumulate("data_array_read_line", io.readline.valid) 426 XSPerfAccumulate("data_array_write", io.write.valid) 427 428 val read_result = Wire(Vec(DCacheSetDiv, Vec(DCacheBanks, Vec(DCacheWays,new L1BankedDataReadResult())))) 429 val read_result_delayed = Wire(Vec(DCacheSetDiv, Vec(DCacheBanks, Vec(DCacheWays,new L1BankedDataReadResult())))) 430 val read_error_delayed_result = Wire(Vec(DCacheSetDiv, Vec(DCacheBanks, Vec(DCacheWays, Bool())))) 431 dontTouch(read_result) 432 dontTouch(read_error_delayed_result) 433 for (div_index <- 0 until DCacheSetDiv){ 434 for (bank_index <- 0 until DCacheBanks) { 435 for (way_index <- 0 until DCacheWays) { 436 // Set Addr & Read Way Mask 437 // 438 // Pipe 0 .... Pipe (n-1) 439 // + .... + 440 // | .... | 441 // +----+---------------+-----+ 442 // X X 443 // X +------+ Bank Addr Match 444 // +---------+----------+ 445 // | 446 // +--------+--------+ 447 // | Data Bank | 448 // +-----------------+ 449 val loadpipe_en = WireInit(VecInit(List.tabulate(LoadPipelineWidth)(i => { 450 io.read(i).valid && div_addrs(i) === div_index.U && (bank_addrs(i)(0) === bank_index.U || bank_addrs(i)(1) === bank_index.U && io.is128Req(i)) && way_en(i)(way_index) 451 }))) 452 val readline_en = Wire(Bool()) 453 if (ReduceReadlineConflict) { 454 readline_en := io.readline.valid && io.readline.bits.rmask(bank_index) && line_way_en(way_index) && div_index.U === line_div_addr 455 } else { 456 readline_en := io.readline.valid && line_way_en(way_index) && div_index.U === line_div_addr 457 } 458 val sram_set_addr = Mux(readline_en, 459 addr_to_dcache_div_set(io.readline.bits.addr), 460 PriorityMux(Seq.tabulate(LoadPipelineWidth)(i => loadpipe_en(i) -> set_addrs(i))) 461 ) 462 val read_en = loadpipe_en.asUInt.orR || readline_en 463 // read raw data 464 val data_bank = data_banks(div_index)(bank_index)(way_index) 465 data_bank.io.r.en := read_en 466 data_bank.io.r.addr := sram_set_addr 467 ecc_banks match { 468 case Some(banks) => 469 val ecc_bank = banks(div_index)(bank_index)(way_index) 470 ecc_bank.io.r.req.valid := read_en 471 ecc_bank.io.r.req.bits.apply(setIdx = sram_set_addr) 472 read_result(div_index)(bank_index)(way_index).ecc := ecc_bank.io.r.resp.data(0) 473 case None => 474 read_result(div_index)(bank_index)(way_index).ecc := 0.U 475 } 476 477 read_result(div_index)(bank_index)(way_index).raw_data := data_bank.io.r.data 478 read_result_delayed(div_index)(bank_index)(way_index) := RegEnable(read_result(div_index)(bank_index)(way_index), RegNext(read_en)) 479 480 // use ECC to check error 481 ecc_banks match { 482 case Some(_) => 483 val ecc_data = read_result(div_index)(bank_index)(way_index).asECCData() 484 val ecc_data_delayed = RegEnable(ecc_data, RegNext(read_en)) 485 read_result(div_index)(bank_index)(way_index).error_delayed := dcacheParameters.dataCode.decode(ecc_data_delayed).error 486 read_error_delayed_result(div_index)(bank_index)(way_index) := read_result(div_index)(bank_index)(way_index).error_delayed 487 case None => 488 read_result(div_index)(bank_index)(way_index).error_delayed := false.B 489 read_error_delayed_result(div_index)(bank_index)(way_index) := false.B 490 } 491 } 492 } 493 } 494 495 val data_read_oh = WireInit(VecInit(Seq.fill(DCacheSetDiv * DCacheBanks * DCacheWays)(0.U(1.W)))) 496 for(div_index <- 0 until DCacheSetDiv){ 497 for (bank_index <- 0 until DCacheBanks) { 498 for (way_index <- 0 until DCacheWays) { 499 data_read_oh(div_index * DCacheBanks * DCacheWays + bank_index * DCacheWays + way_index) := data_banks(div_index)(bank_index)(way_index).io.r.en 500 } 501 } 502 } 503 XSPerfAccumulate("data_read_counter", PopCount(Cat(data_read_oh))) 504 505 // read result: expose banked read result 506 // TODO: clock gate 507 (0 until LoadPipelineWidth).map(i => { 508 // io.read_resp(i) := read_result(RegNext(bank_addrs(i)))(RegNext(OHToUInt(way_en(i)))) 509 val r_read_fire = RegNext(io.read(i).fire) 510 val r_div_addr = RegEnable(div_addrs(i), io.read(i).fire) 511 val r_bank_addr = RegEnable(bank_addrs(i), io.read(i).fire) 512 val r_way_addr = RegNext(OHToUInt(way_en(i))) 513 val rr_read_fire = RegNext(RegNext(io.read(i).fire)) 514 val rr_div_addr = RegEnable(RegEnable(div_addrs(i), io.read(i).fire), r_read_fire) 515 val rr_bank_addr = RegEnable(RegEnable(bank_addrs(i), io.read(i).fire), r_read_fire) 516 val rr_way_addr = RegEnable(RegEnable(OHToUInt(way_en(i)), io.read(i).fire), r_read_fire) 517 (0 until VLEN/DCacheSRAMRowBits).map( j =>{ 518 io.read_resp(i)(j) := read_result(r_div_addr)(r_bank_addr(j))(r_way_addr) 519 // error detection 520 // normal read ports 521 io.read_error_delayed(i)(j) := rr_read_fire && read_error_delayed_result(rr_div_addr)(rr_bank_addr(j))(rr_way_addr) && !RegNext(io.bank_conflict_slow(i)) 522 }) 523 }) 524 525 // readline port 526 (0 until DCacheBanks).map(i => { 527 io.readline_resp(i) := read_result(RegEnable(line_div_addr, io.readline.valid))(i)(RegEnable(OHToUInt(io.readline.bits.way_en),io.readline.valid)) 528 }) 529 io.readline_error_delayed := RegNext(RegNext(io.readline.fire)) && 530 VecInit((0 until DCacheBanks).map(i => io.readline_resp(i).error_delayed)).asUInt.orR 531 532 // write data_banks & ecc_banks 533 for (div_index <- 0 until DCacheSetDiv) { 534 for (bank_index <- 0 until DCacheBanks) { 535 for (way_index <- 0 until DCacheWays) { 536 // data write 537 val wen_reg = write_bank_mask_reg(bank_index) && 538 write_valid_dup_reg(bank_index) && 539 write_div_addr_dup_reg(bank_index) === div_index.U && 540 write_wayen_dup_reg(bank_index)(way_index) 541 val data_bank = data_banks(div_index)(bank_index)(way_index) 542 data_bank.io.w.en := wen_reg 543 544 data_bank.io.w.addr := write_set_addr_dup_reg(bank_index) 545 data_bank.io.w.data := write_data_reg(bank_index) 546 // ecc write 547 ecc_banks match { 548 case Some(banks) => 549 val ecc_bank = banks(div_index)(bank_index)(way_index) 550 ecc_bank.io.w.req.valid := wen_reg 551 ecc_bank.io.w.req.bits.apply( 552 setIdx = write_set_addr_dup_reg(bank_index), 553 data = RegEnable(getECCFromEncWord(cacheParams.dataCode.encode((io.write.bits.data(bank_index)))), io.write.valid), 554 waymask = 1.U 555 ) 556 when(ecc_bank.io.w.req.valid) { 557 XSDebug("write in ecc sram: bank %x set %x data %x waymask %x\n", 558 bank_index.U, 559 addr_to_dcache_div_set(io.write.bits.addr), 560 getECCFromEncWord(cacheParams.dataCode.encode((io.write.bits.data(bank_index)))), 561 io.write.bits.way_en 562 ) 563 } 564 case None => None 565 } 566 } 567 } 568 } 569 570 require(nWays <= 32) 571 io.cacheOp.resp.bits := DontCare 572 val cacheOpShouldResp = WireInit(false.B) 573 val eccReadResult = Wire(Vec(DCacheBanks, UInt(eccBits.W))) 574 // DCacheDupNum is 16 575 // vec: the dupIdx for every bank and every group 576 val rdata_dup_vec = Seq(0,0,1,1,2,2,3,3) 577 val rdataEcc_dup_vec = Seq(4,4,5,5,6,6,7,7) 578 val wdata_dup_vec = Seq(8,8,9,9,10,10,11,11) 579 val wdataEcc_dup_vec = Seq(12,12,13,13,14,14,15,15) 580 val cacheOpDivAddr = set_to_dcache_div(io.cacheOp.req.bits.index) 581 val cacheOpSetAddr = set_to_dcache_div_set(io.cacheOp.req.bits.index) 582 val cacheOpWayNum = io.cacheOp.req.bits.wayNum(4, 0) 583 rdata_dup_vec.zipWithIndex.map{ case(dupIdx, bankIdx) => 584 for (divIdx <- 0 until DCacheSetDiv){ 585 for (wayIdx <- 0 until DCacheWays) { 586 when(io.cacheOp_req_dup(dupIdx).valid && CacheInstrucion.isReadData(io.cacheOp_req_bits_opCode_dup(dupIdx))) { 587 val data_bank = data_banks(divIdx)(bankIdx)(wayIdx) 588 data_bank.io.r.en := UIntToOH(io.cacheOp.req.bits.wayNum(4, 0))(wayIdx) && cacheOpDivAddr === divIdx.U 589 data_bank.io.r.addr := cacheOpSetAddr 590 cacheOpShouldResp := true.B 591 } 592 } 593 } 594 } 595 rdataEcc_dup_vec.zipWithIndex.map{ case(dupIdx, bankIdx) => 596 for (divIdx <- 0 until DCacheSetDiv) { 597 for (wayIdx <- 0 until DCacheWays) { 598 when(io.cacheOp_req_dup(dupIdx).valid && CacheInstrucion.isReadDataECC(io.cacheOp_req_bits_opCode_dup(dupIdx))) { 599 ecc_banks match { 600 case Some(banks) => 601 val ecc_bank = banks(divIdx)(bankIdx)(wayIdx) 602 ecc_bank.io.r.req.valid := true.B 603 ecc_bank.io.r.req.bits.setIdx := cacheOpSetAddr 604 cacheOpShouldResp := true.B 605 case None => 606 cacheOpShouldResp := true.B 607 } 608 } 609 } 610 } 611 } 612 wdata_dup_vec.zipWithIndex.map{ case(dupIdx, bankIdx) => 613 for (divIdx <- 0 until DCacheSetDiv) { 614 for (wayIdx <- 0 until DCacheWays) { 615 when(io.cacheOp_req_dup(dupIdx).valid && CacheInstrucion.isWriteData(io.cacheOp_req_bits_opCode_dup(dupIdx))) { 616 val data_bank = data_banks(divIdx)(bankIdx)(wayIdx) 617 data_bank.io.w.en := UIntToOH(io.cacheOp.req.bits.wayNum(4, 0))(wayIdx) && cacheOpDivAddr === divIdx.U 618 data_bank.io.w.addr := cacheOpSetAddr 619 data_bank.io.w.data := io.cacheOp.req.bits.write_data_vec(bankIdx) 620 cacheOpShouldResp := true.B 621 } 622 } 623 } 624 } 625 wdataEcc_dup_vec.zipWithIndex.map{ case(dupIdx, bankIdx) => 626 for (divIdx <- 0 until DCacheSetDiv) { 627 for (wayIdx <- 0 until DCacheWays) { 628 when(io.cacheOp_req_dup(dupIdx).valid && CacheInstrucion.isWriteDataECC(io.cacheOp_req_bits_opCode_dup(dupIdx))) { 629 ecc_banks match { 630 case Some(banks) => 631 val ecc_bank = banks(divIdx)(bankIdx)(wayIdx) 632 ecc_bank.io.w.req.valid := UIntToOH(io.cacheOp.req.bits.wayNum(4, 0))(wayIdx) && cacheOpDivAddr === divIdx.U 633 ecc_bank.io.w.req.bits.apply( 634 setIdx = cacheOpSetAddr, 635 data = io.cacheOp.req.bits.write_data_ecc, 636 waymask = 1.U 637 ) 638 cacheOpShouldResp := true.B 639 case None => 640 cacheOpShouldResp := true.B 641 } 642 } 643 } 644 } 645 } 646 io.cacheOp.resp.valid := RegNext(io.cacheOp.req.valid && cacheOpShouldResp) 647 for (bank_index <- 0 until DCacheBanks) { 648 val cacheOpDivAddrReg = RegEnable(cacheOpDivAddr, io.cacheOp.req.valid) 649 val cacheOpWayNumDivAddrReg = RegEnable(cacheOpWayNum, io.cacheOp.req.valid) 650 io.cacheOp.resp.bits.read_data_vec(bank_index) := read_result(cacheOpDivAddrReg)(bank_index)(cacheOpWayNumDivAddrReg).raw_data 651 eccReadResult(bank_index) := read_result(cacheOpDivAddrReg)(bank_index)(cacheOpWayNumDivAddrReg).ecc 652 } 653 654 io.cacheOp.resp.bits.read_data_ecc := Mux(io.cacheOp.resp.valid, 655 eccReadResult(RegEnable(io.cacheOp.req.bits.bank_num, io.cacheOp.req.valid)), 656 0.U 657 ) 658 659 val tableName = "BankConflict" + p(XSCoreParamsKey).HartId.toString 660 val siteName = "BankedDataArray" + p(XSCoreParamsKey).HartId.toString 661 val bankConflictTable = ChiselDB.createTable(tableName, new BankConflictDB) 662 val bankConflictData = Wire(new BankConflictDB) 663 for (i <- 0 until LoadPipelineWidth) { 664 bankConflictData.set_index(i) := set_addrs(i) 665 bankConflictData.addr(i) := io.read(i).bits.addr 666 } 667 668 // FIXME: rr_bank_conflict(0)(1) no generalization 669 when(rr_bank_conflict(0)(1)) { 670 (0 until (VLEN/DCacheSRAMRowBits)).map(i => { 671 bankConflictData.bank_index(i) := bank_addrs(0)(i) 672 }) 673 bankConflictData.way_index := OHToUInt(way_en(0)) 674 bankConflictData.fake_rr_bank_conflict := set_addrs(0) === set_addrs(1) && div_addrs(0) === div_addrs(1) 675 }.otherwise { 676 (0 until (VLEN/DCacheSRAMRowBits)).map(i => { 677 bankConflictData.bank_index(i) := 0.U 678 }) 679 bankConflictData.way_index := 0.U 680 bankConflictData.fake_rr_bank_conflict := false.B 681 } 682 683 val isWriteBankConflictTable = Constantin.createRecord(s"isWriteBankConflictTable${p(XSCoreParamsKey).HartId}") 684 bankConflictTable.log( 685 data = bankConflictData, 686 en = isWriteBankConflictTable.orR && rr_bank_conflict(0)(1), 687 site = siteName, 688 clock = clock, 689 reset = reset 690 ) 691 692 (1 until LoadPipelineWidth).foreach(y => (0 until y).foreach(x => 693 XSPerfAccumulate(s"data_array_fake_rr_bank_conflict_${x}_${y}", rr_bank_conflict(x)(y) && set_addrs(x)===set_addrs(y) && div_addrs(x) === div_addrs(y)) 694 )) 695 696} 697 698// the smallest access unit is bank 699class BankedDataArray(implicit p: Parameters) extends AbstractBankedDataArray { 700 println(" DCacheType: BankedDataArray") 701 val ReduceReadlineConflict = false 702 703 io.write.ready := true.B 704 io.write_dup.foreach(_.ready := true.B) 705 706 val data_banks = List.fill(DCacheSetDiv)(List.tabulate(DCacheBanks)(i => Module(new DataSRAMBank(i)))) 707 val ecc_banks = DataEccParam.map { 708 case _ => 709 val ecc = List.fill(DCacheSetDiv)(List.fill(DCacheBanks)( 710 Module(new SRAMTemplate( 711 Bits(eccBits.W), 712 set = DCacheSets / DCacheSetDiv, 713 way = DCacheWays, 714 shouldReset = false, 715 holdRead = false, 716 singlePort = true 717 )) 718 )) 719 ecc 720 } 721 722 data_banks.map(_.map(_.dump())) 723 724 val way_en = Wire(Vec(LoadPipelineWidth, io.read(0).bits.way_en.cloneType)) 725 val set_addrs = Wire(Vec(LoadPipelineWidth, UInt())) 726 val div_addrs = Wire(Vec(LoadPipelineWidth, UInt())) 727 val bank_addrs = Wire(Vec(LoadPipelineWidth, Vec(VLEN/DCacheSRAMRowBits, UInt()))) 728 val way_en_reg = Wire(Vec(LoadPipelineWidth, io.read(0).bits.way_en.cloneType)) 729 val set_addrs_reg = Wire(Vec(LoadPipelineWidth, UInt())) 730 731 val line_set_addr = addr_to_dcache_div_set(io.readline.bits.addr) 732 val line_div_addr = addr_to_dcache_div(io.readline.bits.addr) 733 val line_way_en = io.readline.bits.way_en 734 735 val write_bank_mask_reg = RegEnable(io.write.bits.wmask, io.write.valid) 736 val write_data_reg = RegEnable(io.write.bits.data, io.write.valid) 737 val write_valid_reg = RegNext(io.write.valid) 738 val write_valid_dup_reg = io.write_dup.map(x => RegNext(x.valid)) 739 val write_wayen_dup_reg = io.write_dup.map(x => RegEnable(x.bits.way_en, x.valid)) 740 val write_set_addr_dup_reg = io.write_dup.map(x => RegEnable(addr_to_dcache_div_set(x.bits.addr), x.valid)) 741 val write_div_addr_dup_reg = io.write_dup.map(x => RegEnable(addr_to_dcache_div(x.bits.addr), x.valid)) 742 743 // read data_banks and ecc_banks 744 // for single port SRAM, do not allow read and write in the same cycle 745 val rwhazard = RegNext(io.write.valid) 746 val rrhazard = false.B // io.readline.valid 747 (0 until LoadPipelineWidth).map(rport_index => { 748 div_addrs(rport_index) := addr_to_dcache_div(io.read(rport_index).bits.addr) 749 bank_addrs(rport_index)(0) := addr_to_dcache_bank(io.read(rport_index).bits.addr) 750 bank_addrs(rport_index)(1) := Mux(io.is128Req(rport_index), bank_addrs(rport_index)(0) + 1.U, bank_addrs(rport_index)(0)) 751 set_addrs(rport_index) := addr_to_dcache_div_set(io.read(rport_index).bits.addr) 752 set_addrs_reg(rport_index) := RegEnable(addr_to_dcache_div_set(io.read(rport_index).bits.addr), io.read(rport_index).valid) 753 754 // use way_en to select a way after data read out 755 assert(!(RegNext(io.read(rport_index).fire && PopCount(io.read(rport_index).bits.way_en) > 1.U))) 756 way_en(rport_index) := io.read(rport_index).bits.way_en 757 way_en_reg(rport_index) := RegEnable(io.read(rport_index).bits.way_en, io.read(rport_index).valid) 758 }) 759 760 // read each bank, get bank result 761 val rr_bank_conflict = Seq.tabulate(LoadPipelineWidth)(x => Seq.tabulate(LoadPipelineWidth)(y => 762 io.read(x).valid && io.read(y).valid && 763 div_addrs(x) === div_addrs(y) && 764 (io.read(x).bits.bankMask & io.read(y).bits.bankMask) =/= 0.U 765 )) 766 val rrl_bank_conflict = Wire(Vec(LoadPipelineWidth, Bool())) 767 val rrl_bank_conflict_intend = Wire(Vec(LoadPipelineWidth, Bool())) 768 (0 until LoadPipelineWidth).foreach { i => 769 val judge = if (ReduceReadlineConflict) io.read(i).valid && (io.readline.bits.rmask & io.read(i).bits.bankMask) =/= 0.U && div_addrs(i) === line_div_addr 770 else io.read(i).valid && div_addrs(i)===line_div_addr 771 rrl_bank_conflict(i) := judge && io.readline.valid 772 rrl_bank_conflict_intend(i) := judge && io.readline_intend 773 } 774 val wr_bank_conflict = Seq.tabulate(LoadPipelineWidth)(x => 775 io.read(x).valid && 776 write_valid_reg && 777 div_addrs(x) === write_div_addr_dup_reg.head && 778 (write_bank_mask_reg(bank_addrs(x)(0)) || write_bank_mask_reg(bank_addrs(x)(1)) && io.is128Req(x)) 779 ) 780 val wrl_bank_conflict = io.readline.valid && write_valid_reg && line_div_addr === write_div_addr_dup_reg.head 781 // ready 782 io.readline.ready := !(wrl_bank_conflict) 783 io.read.zipWithIndex.map{case(x, i) => x.ready := !(wr_bank_conflict(i) || rrhazard)} 784 785 val perf_multi_read = PopCount(io.read.map(_.valid)) >= 2.U 786 (0 until LoadPipelineWidth).foreach(i => { 787 // remove fake rr_bank_conflict situation in s2 788 val real_other_bank_conflict_reg = RegNext(wr_bank_conflict(i) || rrl_bank_conflict(i)) 789 val real_rr_bank_conflict_reg = (if (i == 0) 0.B else (0 until i).map{ j => 790 RegNext(rr_bank_conflict(j)(i)) && (set_addrs_reg(j) =/= set_addrs_reg(i)) 791 }.reduce(_ || _)) 792 io.bank_conflict_slow(i) := real_other_bank_conflict_reg || real_rr_bank_conflict_reg 793 794 // get result in s1 795 io.disable_ld_fast_wakeup(i) := wr_bank_conflict(i) || rrl_bank_conflict_intend(i) || 796 (if (i == 0) 0.B else (0 until i).map(rr_bank_conflict(_)(i)).reduce(_ || _)) 797 }) 798 XSPerfAccumulate("data_array_multi_read", perf_multi_read) 799 (1 until LoadPipelineWidth).foreach(y => (0 until y).foreach(x => 800 XSPerfAccumulate(s"data_array_rr_bank_conflict_${x}_${y}", rr_bank_conflict(x)(y)) 801 )) 802 (0 until LoadPipelineWidth).foreach(i => { 803 XSPerfAccumulate(s"data_array_rrl_bank_conflict_${i}", rrl_bank_conflict(i)) 804 XSPerfAccumulate(s"data_array_rw_bank_conflict_${i}", wr_bank_conflict(i)) 805 XSPerfAccumulate(s"data_array_read_${i}", io.read(i).valid) 806 }) 807 XSPerfAccumulate("data_array_access_total", PopCount(io.read.map(_.valid))) 808 XSPerfAccumulate("data_array_read_line", io.readline.valid) 809 XSPerfAccumulate("data_array_write", io.write.valid) 810 811 val bank_result = Wire(Vec(DCacheSetDiv, Vec(DCacheBanks, Vec(DCacheWays, new L1BankedDataReadResult())))) 812 val bank_result_delayed = Wire(Vec(DCacheSetDiv, Vec(DCacheBanks, Vec(DCacheWays, new L1BankedDataReadResult())))) 813 val ecc_result = Wire(Vec(DCacheSetDiv, Vec(DCacheBanks, Vec(DCacheWays, UInt(eccBits.W))))) 814 val read_bank_error_delayed = Wire(Vec(DCacheSetDiv, Vec(DCacheBanks, Vec(DCacheWays, Bool())))) 815 dontTouch(bank_result) 816 dontTouch(read_bank_error_delayed) 817 for (div_index <- 0 until DCacheSetDiv) { 818 for (bank_index <- 0 until DCacheBanks) { 819 // Set Addr & Read Way Mask 820 // 821 // Pipe 0 .... Pipe (n-1) 822 // + .... + 823 // | .... | 824 // +----+---------------+-----+ 825 // X X 826 // X +------+ Bank Addr Match 827 // +---------+----------+ 828 // | 829 // +--------+--------+ 830 // | Data Bank | 831 // +-----------------+ 832 val bank_addr_matchs = WireInit(VecInit(List.tabulate(LoadPipelineWidth)(i => { 833 io.read(i).valid && div_addrs(i) === div_index.U && (bank_addrs(i)(0) === bank_index.U || bank_addrs(i)(1) === bank_index.U && io.is128Req(i)) 834 }))) 835 val readline_match = Wire(Bool()) 836 if (ReduceReadlineConflict) { 837 readline_match := io.readline.valid && io.readline.bits.rmask(bank_index) && line_div_addr === div_index.U 838 } else { 839 readline_match := io.readline.valid && line_div_addr === div_index.U 840 } 841 842 val bank_set_addr = Mux(readline_match, 843 line_set_addr, 844 PriorityMux(Seq.tabulate(LoadPipelineWidth)(i => bank_addr_matchs(i) -> set_addrs(i))) 845 ) 846 val read_enable = bank_addr_matchs.asUInt.orR || readline_match 847 848 // read raw data 849 val data_bank = data_banks(div_index)(bank_index) 850 data_bank.io.r.en := read_enable 851 data_bank.io.r.addr := bank_set_addr 852 for (way_index <- 0 until DCacheWays) { 853 bank_result(div_index)(bank_index)(way_index).raw_data := data_bank.io.r.data(way_index) 854 bank_result_delayed(div_index)(bank_index)(way_index) := RegEnable(bank_result(div_index)(bank_index)(way_index), RegNext(read_enable)) 855 } 856 857 // read ECC 858 ecc_banks match { 859 case Some(banks) => 860 val ecc_bank = banks(div_index)(bank_index) 861 ecc_bank.io.r.req.valid := read_enable 862 ecc_bank.io.r.req.bits.apply(setIdx = bank_set_addr) 863 ecc_result(div_index)(bank_index) := ecc_bank.io.r.resp.data 864 for (way_index <- 0 until DCacheWays) { 865 bank_result(div_index)(bank_index)(way_index).ecc := ecc_bank.io.r.resp.data(way_index) 866 } 867 case None => 868 ecc_result(div_index)(bank_index) := DontCare 869 for (way_index <- 0 until DCacheWays) { 870 bank_result(div_index)(bank_index)(way_index).ecc := DontCare 871 } 872 } 873 874 // use ECC to check error 875 ecc_banks match { 876 case Some(_) => 877 for (way_index <- 0 until DCacheWays) { 878 val ecc_data = bank_result(div_index)(bank_index)(way_index).asECCData() 879 val ecc_data_delayed = RegEnable(ecc_data, RegNext(read_enable)) 880 bank_result(div_index)(bank_index)(way_index).error_delayed := dcacheParameters.dataCode.decode(ecc_data_delayed).error 881 read_bank_error_delayed(div_index)(bank_index)(way_index) := bank_result(div_index)(bank_index)(way_index).error_delayed 882 } 883 case None => 884 for (way_index <- 0 until DCacheWays) { 885 bank_result(div_index)(bank_index)(way_index).error_delayed := false.B 886 read_bank_error_delayed(div_index)(bank_index)(way_index) := false.B 887 } 888 } 889 } 890 } 891 892 val data_read_oh = WireInit(VecInit(Seq.fill(DCacheSetDiv)(0.U(XLEN.W)))) 893 for (div_index <- 0 until DCacheSetDiv){ 894 val temp = WireInit(VecInit(Seq.fill(DCacheBanks)(0.U(XLEN.W)))) 895 for (bank_index <- 0 until DCacheBanks) { 896 temp(bank_index) := PopCount(Fill(DCacheWays, data_banks(div_index)(bank_index).io.r.en.asUInt)) 897 } 898 data_read_oh(div_index) := temp.reduce(_ + _) 899 } 900 XSPerfAccumulate("data_read_counter", data_read_oh.foldLeft(0.U)(_ + _)) 901 902 (0 until LoadPipelineWidth).map(i => { 903 // 1 cycle after read fire(load s2) 904 val r_read_fire = RegNext(io.read(i).fire) 905 val r_div_addr = RegEnable(div_addrs(i), io.read(i).fire) 906 val r_bank_addr = RegEnable(bank_addrs(i), io.read(i).fire) 907 val r_way_addr = RegEnable(OHToUInt(way_en(i)), io.read(i).fire) 908 // 2 cycles after read fire(load s3) 909 val rr_read_fire = RegNext(r_read_fire) 910 val rr_div_addr = RegEnable(RegEnable(div_addrs(i), io.read(i).fire), r_read_fire) 911 val rr_bank_addr = RegEnable(RegEnable(bank_addrs(i), io.read(i).fire), r_read_fire) 912 val rr_way_addr = RegEnable(RegEnable(OHToUInt(way_en(i)), io.read(i).fire), r_read_fire) 913 (0 until VLEN/DCacheSRAMRowBits).map( j =>{ 914 io.read_resp(i)(j) := bank_result(r_div_addr)(r_bank_addr(j))(r_way_addr) 915 // error detection 916 io.read_error_delayed(i)(j) := rr_read_fire && read_bank_error_delayed(rr_div_addr)(rr_bank_addr(j))(rr_way_addr) && !RegNext(io.bank_conflict_slow(i)) 917 }) 918 }) 919 920 // read result: expose banked read result 921 (0 until DCacheBanks).map(i => { 922 io.readline_resp(i) := bank_result(RegEnable(line_div_addr, io.readline.valid))(i)(RegEnable(OHToUInt(io.readline.bits.way_en), io.readline.valid)) 923 }) 924 io.readline_error_delayed := RegNext(RegNext(io.readline.fire)) && 925 VecInit((0 until DCacheBanks).map(i => io.readline_resp(i).error_delayed)).asUInt.orR 926 927 // write data_banks & ecc_banks 928 for (div_index <- 0 until DCacheSetDiv) { 929 for (bank_index <- 0 until DCacheBanks) { 930 // data write 931 val wen_reg = write_bank_mask_reg(bank_index) && 932 write_valid_dup_reg(bank_index) && 933 write_div_addr_dup_reg(bank_index) === div_index.U && RegNext(io.write.valid) 934 val data_bank = data_banks(div_index)(bank_index) 935 data_bank.io.w.en := wen_reg 936 data_bank.io.w.way_en := write_wayen_dup_reg(bank_index) 937 data_bank.io.w.addr := write_set_addr_dup_reg(bank_index) 938 data_bank.io.w.data := write_data_reg(bank_index) 939 940 // ecc write 941 ecc_banks match { 942 case Some(banks) => 943 val ecc_bank = banks(div_index)(bank_index) 944 ecc_bank.io.w.req.valid := wen_reg 945 ecc_bank.io.w.req.bits.apply( 946 setIdx = write_set_addr_dup_reg(bank_index), 947 data = RegEnable(getECCFromEncWord(cacheParams.dataCode.encode((io.write.bits.data(bank_index)))), io.write.valid), 948 waymask = write_wayen_dup_reg(bank_index) 949 ) 950 when(ecc_bank.io.w.req.valid) { 951 XSDebug("write in ecc sram: bank %x set %x data %x waymask %x\n", 952 bank_index.U, 953 addr_to_dcache_div_set(io.write.bits.addr), 954 getECCFromEncWord(cacheParams.dataCode.encode((io.write.bits.data(bank_index)))), 955 io.write.bits.way_en 956 ) 957 } 958 case None => None 959 } 960 } 961 } 962 963 // deal with customized cache op 964 require(nWays <= 32) 965 io.cacheOp.resp.bits := DontCare 966 val cacheOpShouldResp = WireInit(false.B) 967 val eccReadResult = Wire(Vec(DCacheBanks, UInt(eccBits.W))) 968 // DCacheDupNum is 16 969 // vec: the dupIdx for every bank and every group 970 val rdata_dup_vec = Seq(0, 0, 1, 1, 2, 2, 3, 3) 971 val rdataEcc_dup_vec = Seq(4, 4, 5, 5, 6, 6, 7, 7) 972 val wdata_dup_vec = Seq(8, 8, 9, 9, 10, 10, 11, 11) 973 val wdataEcc_dup_vec = Seq(12, 12, 13, 13, 14, 14, 15, 15) 974 val cacheOpDivAddr = set_to_dcache_div(io.cacheOp.req.bits.index) 975 val cacheOpSetAddr = set_to_dcache_div_set(io.cacheOp.req.bits.index) 976 val cacheOpWayMask = UIntToOH(io.cacheOp.req.bits.wayNum(4, 0)) 977 rdata_dup_vec.zipWithIndex.map{ case(dupIdx, bankIdx) => 978 for (divIdx <- 0 until DCacheSetDiv) { 979 when(io.cacheOp_req_dup(dupIdx).valid && CacheInstrucion.isReadData(io.cacheOp_req_bits_opCode_dup(dupIdx))) { 980 val data_bank = data_banks(divIdx)(bankIdx) 981 data_bank.io.r.en := true.B 982 data_bank.io.r.addr := cacheOpSetAddr 983 cacheOpShouldResp := true.B 984 } 985 } 986 } 987 rdataEcc_dup_vec.zipWithIndex.map{ case(dupIdx, bankIdx) => 988 for (divIdx <- 0 until DCacheSetDiv) { 989 when(io.cacheOp_req_dup(dupIdx).valid && CacheInstrucion.isReadDataECC(io.cacheOp_req_bits_opCode_dup(dupIdx))) { 990 ecc_banks match { 991 case Some(banks) => 992 val ecc_bank = banks(divIdx)(bankIdx) 993 ecc_bank.io.r.req.valid := true.B 994 ecc_bank.io.r.req.bits.setIdx := cacheOpSetAddr 995 cacheOpShouldResp := true.B 996 case None => 997 cacheOpShouldResp := true.B 998 } 999 } 1000 } 1001 } 1002 wdata_dup_vec.zipWithIndex.map{ case(dupIdx, bankIdx) => 1003 for (divIdx <- 0 until DCacheSetDiv) { 1004 when(io.cacheOp_req_dup(dupIdx).valid && CacheInstrucion.isWriteData(io.cacheOp_req_bits_opCode_dup(dupIdx))) { 1005 val data_bank = data_banks(divIdx)(bankIdx) 1006 data_bank.io.w.en := cacheOpDivAddr === divIdx.U 1007 data_bank.io.w.way_en := cacheOpWayMask 1008 data_bank.io.w.addr := cacheOpSetAddr 1009 data_bank.io.w.data := io.cacheOp.req.bits.write_data_vec(bankIdx) 1010 cacheOpShouldResp := true.B 1011 } 1012 } 1013 } 1014 wdataEcc_dup_vec.zipWithIndex.map{ case(dupIdx, bankIdx) => 1015 for (divIdx <- 0 until DCacheSetDiv) { 1016 when(io.cacheOp_req_dup(dupIdx).valid && CacheInstrucion.isWriteDataECC(io.cacheOp_req_bits_opCode_dup(dupIdx))) { 1017 ecc_banks match { 1018 case Some(banks) => 1019 val ecc_bank = banks(divIdx)(bankIdx) 1020 ecc_bank.io.w.req.valid := cacheOpDivAddr === divIdx.U 1021 ecc_bank.io.w.req.bits.apply( 1022 setIdx = cacheOpSetAddr, 1023 data = io.cacheOp.req.bits.write_data_ecc, 1024 waymask = cacheOpWayMask 1025 ) 1026 cacheOpShouldResp := true.B 1027 case None => 1028 cacheOpShouldResp := true.B 1029 } 1030 } 1031 } 1032 } 1033 1034 io.cacheOp.resp.valid := RegNext(io.cacheOp.req.valid && cacheOpShouldResp) 1035 for (bank_index <- 0 until DCacheBanks) { 1036 val cacheOpDivAddrReg = RegEnable(cacheOpDivAddr, io.cacheOp.req.valid) 1037 val cacheOpWayMaskReg = RegEnable(cacheOpWayMask, io.cacheOp.req.valid) 1038 io.cacheOp.resp.bits.read_data_vec(bank_index) := bank_result(cacheOpDivAddrReg)(bank_index)(cacheOpWayMaskReg).raw_data 1039 eccReadResult(bank_index) := Mux1H(cacheOpWayMaskReg, ecc_result(cacheOpDivAddrReg)(bank_index)) 1040 } 1041 1042 io.cacheOp.resp.bits.read_data_ecc := Mux(io.cacheOp.resp.valid, 1043 eccReadResult(RegEnable(io.cacheOp.req.bits.bank_num, io.cacheOp.req.valid)), 1044 0.U 1045 ) 1046 1047 val tableName = "BankConflict" + p(XSCoreParamsKey).HartId.toString 1048 val siteName = "BankedDataArray" + p(XSCoreParamsKey).HartId.toString 1049 val bankConflictTable = ChiselDB.createTable(tableName, new BankConflictDB) 1050 val bankConflictData = Wire(new BankConflictDB) 1051 for (i <- 0 until LoadPipelineWidth) { 1052 bankConflictData.set_index(i) := set_addrs(i) 1053 bankConflictData.addr(i) := io.read(i).bits.addr 1054 } 1055 1056 // FIXME: rr_bank_conflict(0)(1) no generalization 1057 when(rr_bank_conflict(0)(1)) { 1058 (0 until (VLEN/DCacheSRAMRowBits)).map(i => { 1059 bankConflictData.bank_index(i) := bank_addrs(0)(i) 1060 }) 1061 bankConflictData.way_index := OHToUInt(way_en(0)) 1062 bankConflictData.fake_rr_bank_conflict := set_addrs(0) === set_addrs(1) && div_addrs(0) === div_addrs(1) 1063 }.otherwise { 1064 (0 until (VLEN/DCacheSRAMRowBits)).map(i => { 1065 bankConflictData.bank_index(i) := 0.U 1066 }) 1067 bankConflictData.way_index := 0.U 1068 bankConflictData.fake_rr_bank_conflict := false.B 1069 } 1070 1071 val isWriteBankConflictTable = Constantin.createRecord(s"isWriteBankConflictTable${p(XSCoreParamsKey).HartId}") 1072 bankConflictTable.log( 1073 data = bankConflictData, 1074 en = isWriteBankConflictTable.orR && rr_bank_conflict(0)(1), 1075 site = siteName, 1076 clock = clock, 1077 reset = reset 1078 ) 1079 1080 (1 until LoadPipelineWidth).foreach(y => (0 until y).foreach(x => 1081 XSPerfAccumulate(s"data_array_fake_rr_bank_conflict_${x}_${y}", rr_bank_conflict(x)(y) && set_addrs(x) === set_addrs(y) && div_addrs(x) === div_addrs(y)) 1082 )) 1083 1084} 1085