1/*************************************************************************************** 2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences 3* Copyright (c) 2020-2021 Peng Cheng Laboratory 4* 5* XiangShan is licensed under Mulan PSL v2. 6* You can use this software according to the terms and conditions of the Mulan PSL v2. 7* You may obtain a copy of Mulan PSL v2 at: 8* http://license.coscl.org.cn/MulanPSL2 9* 10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 13* 14* See the Mulan PSL v2 for more details. 15***************************************************************************************/ 16 17package xiangshan.cache 18 19import chipsalliance.rocketchip.config.Parameters 20import chisel3._ 21import utils._ 22import utility._ 23import chisel3.util._ 24import freechips.rocketchip.tilelink.{ClientMetadata, TLClientParameters, TLEdgeOut} 25import xiangshan.{L1CacheErrorInfo, XSCoreParamsKey} 26 27import scala.math.max 28 29class BankConflictDB(implicit p: Parameters) extends DCacheBundle{ 30 val addr = Vec(LoadPipelineWidth, Bits(PAddrBits.W)) 31 val set_index = Vec(LoadPipelineWidth, UInt((DCacheAboveIndexOffset - DCacheSetOffset).W)) 32 val bank_index = UInt((DCacheSetOffset - DCacheBankOffset).W) 33 val way_index = UInt(wayBits.W) 34 val fake_rr_bank_conflict = Bool() 35} 36 37class L1BankedDataReadReq(implicit p: Parameters) extends DCacheBundle 38{ 39 val way_en = Bits(DCacheWays.W) 40 val addr = Bits(PAddrBits.W) 41} 42 43class L1BankedDataReadLineReq(implicit p: Parameters) extends L1BankedDataReadReq 44{ 45 val rmask = Bits(DCacheBanks.W) 46} 47 48// Now, we can write a cache-block in a single cycle 49class L1BankedDataWriteReq(implicit p: Parameters) extends L1BankedDataReadReq 50{ 51 val wmask = Bits(DCacheBanks.W) 52 val data = Vec(DCacheBanks, Bits(DCacheSRAMRowBits.W)) 53} 54 55// cache-block write request without data 56class L1BankedDataWriteReqCtrl(implicit p: Parameters) extends L1BankedDataReadReq 57 58class L1BankedDataReadResult(implicit p: Parameters) extends DCacheBundle 59{ 60 // you can choose which bank to read to save power 61 val ecc = Bits(eccBits.W) 62 val raw_data = Bits(DCacheSRAMRowBits.W) 63 val error_delayed = Bool() // 1 cycle later than data resp 64 65 def asECCData() = { 66 Cat(ecc, raw_data) 67 } 68} 69 70class DataSRAMBankWriteReq(implicit p: Parameters) extends DCacheBundle { 71 val en = Bool() 72 val addr = UInt() 73 val way_en = UInt(DCacheWays.W) 74 val data = UInt(DCacheSRAMRowBits.W) 75} 76 77// wrap a sram 78class DataSRAM(bankIdx: Int, wayIdx: Int)(implicit p: Parameters) extends DCacheModule { 79 val io = IO(new Bundle() { 80 val w = new Bundle() { 81 val en = Input(Bool()) 82 val addr = Input(UInt()) 83 val data = Input(UInt(DCacheSRAMRowBits.W)) 84 } 85 86 val r = new Bundle() { 87 val en = Input(Bool()) 88 val addr = Input(UInt()) 89 val data = Output(UInt(DCacheSRAMRowBits.W)) 90 } 91 }) 92 93 // data sram 94 val data_sram = Module(new SRAMTemplate( 95 Bits(DCacheSRAMRowBits.W), 96 set = DCacheSets / DCacheSetDiv, 97 way = 1, 98 shouldReset = false, 99 holdRead = false, 100 singlePort = true 101 )) 102 103 data_sram.io.w.req.valid := io.w.en 104 data_sram.io.w.req.bits.apply( 105 setIdx = io.w.addr, 106 data = io.w.data, 107 waymask = 1.U 108 ) 109 data_sram.io.r.req.valid := io.r.en 110 data_sram.io.r.req.bits.apply(setIdx = io.r.addr) 111 io.r.data := data_sram.io.r.resp.data(0) 112 XSPerfAccumulate("data_sram_read_counter", data_sram.io.r.req.valid) 113 114 def dump_r() = { 115 when(RegNext(io.r.en)) { 116 XSDebug("bank read set %x bank %x way %x data %x\n", 117 RegNext(io.r.addr), 118 bankIdx.U, 119 wayIdx.U, 120 io.r.data 121 ) 122 } 123 } 124 125 def dump_w() = { 126 when(io.w.en) { 127 XSDebug("bank write set %x bank %x way %x data %x\n", 128 io.w.addr, 129 bankIdx.U, 130 wayIdx.U, 131 io.w.data 132 ) 133 } 134 } 135 136 def dump() = { 137 dump_w() 138 dump_r() 139 } 140} 141 142// wrap data rows of 8 ways 143class DataSRAMBank(index: Int)(implicit p: Parameters) extends DCacheModule { 144 val io = IO(new Bundle() { 145 val w = Input(new DataSRAMBankWriteReq) 146 147 val r = new Bundle() { 148 val en = Input(Bool()) 149 val addr = Input(UInt()) 150 val way_en = Input(UInt(DCacheWays.W)) 151 val data = Output(UInt(DCacheSRAMRowBits.W)) 152 } 153 }) 154 155 assert(RegNext(!io.w.en || PopCount(io.w.way_en) <= 1.U)) 156 assert(RegNext(!io.r.en || PopCount(io.r.way_en) <= 1.U)) 157 158 val r_way_en_reg = RegNext(io.r.way_en) 159 160 // external controls do not read and write at the same time 161 val w_info = io.w 162 // val rw_bypass = RegNext(io.w.addr === io.r.addr && io.w.way_en === io.r.way_en && io.w.en) 163 164 // multiway data bank 165 val data_bank = Array.fill(DCacheWays) { 166 Module(new SRAMTemplate( 167 Bits(DCacheSRAMRowBits.W), 168 set = DCacheSets / DCacheSetDiv, 169 way = 1, 170 shouldReset = false, 171 holdRead = false, 172 singlePort = true 173 )) 174 } 175 176 for (w <- 0 until DCacheWays) { 177 val wen = w_info.en && w_info.way_en(w) 178 data_bank(w).io.w.req.valid := wen 179 data_bank(w).io.w.req.bits.apply( 180 setIdx = w_info.addr, 181 data = w_info.data, 182 waymask = 1.U 183 ) 184 data_bank(w).io.r.req.valid := io.r.en 185 data_bank(w).io.r.req.bits.apply(setIdx = io.r.addr) 186 } 187 XSPerfAccumulate("data_read_counter", PopCount(Cat(data_bank.map(_.io.r.req.valid)))) 188 189 val half = nWays / 2 190 val data_read = data_bank.map(_.io.r.resp.data(0)) 191 val data_left = Mux1H(r_way_en_reg.tail(half), data_read.take(half)) 192 val data_right = Mux1H(r_way_en_reg.head(half), data_read.drop(half)) 193 194 val sel_low = r_way_en_reg.tail(half).orR() 195 val row_data = Mux(sel_low, data_left, data_right) 196 197 io.r.data := row_data 198 199 def dump_r() = { 200 when(RegNext(io.r.en)) { 201 XSDebug("bank read addr %x way_en %x data %x\n", 202 RegNext(io.r.addr), 203 RegNext(io.r.way_en), 204 io.r.data 205 ) 206 } 207 } 208 209 def dump_w() = { 210 when(io.w.en) { 211 XSDebug("bank write addr %x way_en %x data %x\n", 212 io.w.addr, 213 io.w.way_en, 214 io.w.data 215 ) 216 } 217 } 218 219 def dump() = { 220 dump_w() 221 dump_r() 222 } 223} 224 225// Banked DCache Data 226// ----------------------------------------------------------------- 227// | Bank0 | Bank1 | Bank2 | Bank3 | Bank4 | Bank5 | Bank6 | Bank7 | 228// ----------------------------------------------------------------- 229// | Way0 | Way0 | Way0 | Way0 | Way0 | Way0 | Way0 | Way0 | 230// | Way1 | Way1 | Way1 | Way1 | Way1 | Way1 | Way1 | Way1 | 231// | .... | .... | .... | .... | .... | .... | .... | .... | 232// ----------------------------------------------------------------- 233abstract class AbstractBankedDataArray(implicit p: Parameters) extends DCacheModule 234{ 235 val ReadlinePortErrorIndex = LoadPipelineWidth 236 val io = IO(new DCacheBundle { 237 // load pipeline read word req 238 val read = Vec(LoadPipelineWidth, Flipped(DecoupledIO(new L1BankedDataReadReq))) 239 // main pipeline read / write line req 240 val readline_intend = Input(Bool()) 241 val readline = Flipped(DecoupledIO(new L1BankedDataReadLineReq)) 242 val write = Flipped(DecoupledIO(new L1BankedDataWriteReq)) 243 val write_dup = Vec(DCacheBanks, Flipped(Decoupled(new L1BankedDataWriteReqCtrl))) 244 // data for readline and loadpipe 245 val readline_resp = Output(Vec(DCacheBanks, new L1BankedDataReadResult())) 246 val readline_error_delayed = Output(Bool()) 247 val read_resp_delayed = Output(Vec(LoadPipelineWidth, new L1BankedDataReadResult())) 248 val read_error_delayed = Output(Vec(LoadPipelineWidth, Bool())) 249 // val nacks = Output(Vec(LoadPipelineWidth, Bool())) 250 // val errors = Output(Vec(LoadPipelineWidth + 1, new L1CacheErrorInfo)) // read ports + readline port 251 // when bank_conflict, read (1) port should be ignored 252 val bank_conflict_slow = Output(Vec(LoadPipelineWidth, Bool())) 253 val bank_conflict_fast = Output(Vec(LoadPipelineWidth, Bool())) 254 val disable_ld_fast_wakeup = Output(Vec(LoadPipelineWidth, Bool())) 255 // customized cache op port 256 val cacheOp = Flipped(new L1CacheInnerOpIO) 257 val cacheOp_req_dup = Vec(DCacheDupNum, Flipped(Valid(new CacheCtrlReqInfo))) 258 val cacheOp_req_bits_opCode_dup = Input(Vec(DCacheDupNum, UInt(XLEN.W))) 259 }) 260 261 def pipeMap[T <: Data](f: Int => T) = VecInit((0 until LoadPipelineWidth).map(f)) 262 263 def getECCFromEncWord(encWord: UInt) = { 264 require(encWord.getWidth == encWordBits) 265 encWord(encWordBits - 1, wordBits) 266 } 267 268 def dumpRead() = { 269 (0 until LoadPipelineWidth) map { w => 270 when(io.read(w).valid) { 271 XSDebug(s"DataArray Read channel: $w valid way_en: %x addr: %x\n", 272 io.read(w).bits.way_en, io.read(w).bits.addr) 273 } 274 } 275 when(io.readline.valid) { 276 XSDebug(s"DataArray Read Line, valid way_en: %x addr: %x rmask %x\n", 277 io.readline.bits.way_en, io.readline.bits.addr, io.readline.bits.rmask) 278 } 279 } 280 281 def dumpWrite() = { 282 when(io.write.valid) { 283 XSDebug(s"DataArray Write valid way_en: %x addr: %x\n", 284 io.write.bits.way_en, io.write.bits.addr) 285 286 (0 until DCacheBanks) map { r => 287 XSDebug(s"cycle: $r data: %x wmask: %x\n", 288 io.write.bits.data(r), io.write.bits.wmask(r)) 289 } 290 } 291 } 292 293 def dumpResp() = { 294 XSDebug(s"DataArray ReadeResp channel:\n") 295 (0 until LoadPipelineWidth) map { r => 296 XSDebug(s"cycle: $r data: %x\n", io.read_resp_delayed(r).raw_data) 297 } 298 } 299 300 def dump() = { 301 dumpRead 302 dumpWrite 303 dumpResp 304 } 305} 306 307// the smallest access unit is sram 308class SramedDataArray(implicit p: Parameters) extends AbstractBankedDataArray { 309 println(" DCacheType: SramedDataArray") 310 val ReduceReadlineConflict = false 311 312 io.write.ready := true.B 313 io.write_dup.foreach(_.ready := true.B) 314 315 val data_banks = List.tabulate(DCacheSetDiv)( k => List.tabulate(DCacheBanks)(i => List.tabulate(DCacheWays)(j => Module(new DataSRAM(i,j))))) 316 // ecc_banks also needs to be changed to two-dimensional to align with data_banks 317 val ecc_banks = List.tabulate(DCacheSetDiv)( k => 318 List.tabulate(DCacheWays)(j => 319 List.tabulate(DCacheBanks)(i => 320 Module(new SRAMTemplate( 321 Bits(eccBits.W), 322 set = DCacheSets / DCacheSetDiv, 323 way = 1, 324 shouldReset = false, 325 holdRead = false, 326 singlePort = true 327 )) 328 ))) 329 330 data_banks.map(_.map(_.map(_.dump()))) 331 332 val way_en = Wire(Vec(LoadPipelineWidth, io.read(0).bits.way_en.cloneType)) 333 val set_addrs = Wire(Vec(LoadPipelineWidth, UInt())) 334 val div_addrs = Wire(Vec(LoadPipelineWidth, UInt())) 335 val bank_addrs = Wire(Vec(LoadPipelineWidth, UInt())) 336 337 val line_set_addr = addr_to_dcache_div_set(io.readline.bits.addr) 338 val line_div_addr = addr_to_dcache_div(io.readline.bits.addr) 339 val line_way_en = io.readline.bits.way_en 340 341 val write_bank_mask_reg = RegNext(io.write.bits.wmask) 342 val write_data_reg = RegNext(io.write.bits.data) 343 val write_valid_reg = RegNext(io.write.valid) 344 val write_valid_dup_reg = io.write_dup.map(x => RegNext(x.valid)) 345 val write_wayen_dup_reg = io.write_dup.map(x => RegNext(x.bits.way_en)) 346 val write_set_addr_dup_reg = io.write_dup.map(x => RegNext(addr_to_dcache_div_set(x.bits.addr))) 347 val write_div_addr_dup_reg = io.write_dup.map(x => RegNext(addr_to_dcache_div(x.bits.addr))) 348 349 // read data_banks and ecc_banks 350 // for single port SRAM, do not allow read and write in the same cycle 351 val rrhazard = false.B // io.readline.valid 352 (0 until LoadPipelineWidth).map(rport_index => { 353 div_addrs(rport_index) := addr_to_dcache_div(io.read(rport_index).bits.addr) 354 set_addrs(rport_index) := addr_to_dcache_div_set(io.read(rport_index).bits.addr) 355 bank_addrs(rport_index) := addr_to_dcache_bank(io.read(rport_index).bits.addr) 356 357 // use way_en to select a way after data read out 358 assert(!(RegNext(io.read(rport_index).fire() && PopCount(io.read(rport_index).bits.way_en) > 1.U))) 359 way_en(rport_index) := io.read(rport_index).bits.way_en 360 }) 361 362 // read conflict 363 val rr_bank_conflict = Seq.tabulate(LoadPipelineWidth)(x => Seq.tabulate(LoadPipelineWidth)(y => 364 io.read(x).valid && io.read(y).valid && 365 div_addrs(x) === div_addrs(y) && 366 bank_addrs(x) === bank_addrs(y) && 367 io.read(x).bits.way_en === io.read(y).bits.way_en && 368 set_addrs(x) =/= set_addrs(y) 369 )) 370 val rrl_bank_conflict = Wire(Vec(LoadPipelineWidth, Bool())) 371 val rrl_bank_conflict_intend = Wire(Vec(LoadPipelineWidth, Bool())) 372 (0 until LoadPipelineWidth).foreach { i => 373 val judge = if (ReduceReadlineConflict) io.read(i).valid && io.readline.bits.rmask(bank_addrs(i)) && line_div_addr === div_addrs(i) && io.readline.bits.way_en === way_en(i) && line_set_addr =/= set_addrs(i) 374 else io.read(i).valid && line_div_addr === div_addrs(i) && io.readline.bits.way_en === way_en(i) && line_set_addr =/= set_addrs(i) 375 rrl_bank_conflict(i) := judge && io.readline.valid 376 rrl_bank_conflict_intend(i) := judge && io.readline_intend 377 } 378 val wr_bank_conflict = Seq.tabulate(LoadPipelineWidth)(x => 379 io.read(x).valid && write_valid_reg && 380 div_addrs(x) === write_div_addr_dup_reg.head && 381 way_en(x) === write_wayen_dup_reg.head 382 ) 383 val wrl_bank_conflict = io.readline.valid && write_valid_reg && line_div_addr === write_div_addr_dup_reg.head && line_way_en === write_wayen_dup_reg.head 384 // ready 385 io.readline.ready := !(wrl_bank_conflict) 386 io.read.zipWithIndex.map { case (x, i) => x.ready := !(wr_bank_conflict(i) || rrhazard) } 387 388 val perf_multi_read = PopCount(io.read.map(_.valid)) >= 2.U 389 (0 until LoadPipelineWidth).foreach(i => { 390 io.bank_conflict_fast(i) := wr_bank_conflict(i) || rrl_bank_conflict(i) || 391 (if (i == 0) 0.B else (0 until i).map(rr_bank_conflict(_)(i)).reduce(_ || _)) 392 io.bank_conflict_slow(i) := RegNext(io.bank_conflict_fast(i)) 393 io.disable_ld_fast_wakeup(i) := wr_bank_conflict(i) || rrl_bank_conflict_intend(i) || 394 (if (i == 0) 0.B else (0 until i).map(rr_bank_conflict(_)(i)).reduce(_ || _)) 395 }) 396 XSPerfAccumulate("data_array_multi_read", perf_multi_read) 397 (1 until LoadPipelineWidth).foreach(y => (0 until y).foreach(x => 398 XSPerfAccumulate(s"data_array_rr_bank_conflict_${x}_${y}", rr_bank_conflict(x)(y)) 399 )) 400 (0 until LoadPipelineWidth).foreach(i => { 401 XSPerfAccumulate(s"data_array_rrl_bank_conflict_${i}", rrl_bank_conflict(i)) 402 XSPerfAccumulate(s"data_array_rw_bank_conflict_${i}", wr_bank_conflict(i)) 403 XSPerfAccumulate(s"data_array_read_${i}", io.read(i).valid) 404 }) 405 XSPerfAccumulate("data_array_access_total", PopCount(io.read.map(_.valid))) 406 XSPerfAccumulate("data_array_read_line", io.readline.valid) 407 XSPerfAccumulate("data_array_write", io.write.valid) 408 409 val read_result = Wire(Vec(DCacheSetDiv, Vec(DCacheBanks, Vec(DCacheWays,new L1BankedDataReadResult())))) 410 val read_error_delayed_result = Wire(Vec(DCacheSetDiv, Vec(DCacheBanks, Vec(DCacheWays, Bool())))) 411 dontTouch(read_result) 412 dontTouch(read_error_delayed_result) 413 for (div_index <- 0 until DCacheSetDiv){ 414 for (bank_index <- 0 until DCacheBanks) { 415 for (way_index <- 0 until DCacheWays) { 416 // Set Addr & Read Way Mask 417 // 418 // Pipe 0 .... Pipe (n-1) 419 // + .... + 420 // | .... | 421 // +----+---------------+-----+ 422 // X X 423 // X +------+ Bank Addr Match 424 // +---------+----------+ 425 // | 426 // +--------+--------+ 427 // | Data Bank | 428 // +-----------------+ 429 val loadpipe_en = WireInit(VecInit(List.tabulate(LoadPipelineWidth)(i => { 430 io.read(i).valid && div_addrs(i) === div_index.U && bank_addrs(i) === bank_index.U && way_en(i)(way_index) 431 }))) 432 val readline_en = Wire(Bool()) 433 if (ReduceReadlineConflict) { 434 readline_en := io.readline.valid && io.readline.bits.rmask(bank_index) && io.readline.bits.way_en(way_index) && div_index.U === line_div_addr 435 } else { 436 readline_en := io.readline.valid && io.readline.bits.way_en(way_index) && div_index.U === line_div_addr 437 } 438 val sram_set_addr = Mux(readline_en, 439 addr_to_dcache_div_set(io.readline.bits.addr), 440 PriorityMux(Seq.tabulate(LoadPipelineWidth)(i => loadpipe_en(i) -> set_addrs(i))) 441 ) 442 val read_en = loadpipe_en.asUInt.orR || readline_en 443 // read raw data 444 val data_bank = data_banks(div_index)(bank_index)(way_index) 445 data_bank.io.r.en := read_en 446 data_bank.io.r.addr := sram_set_addr 447 val ecc_bank = ecc_banks(div_index)(bank_index)(way_index) 448 ecc_bank.io.r.req.valid := read_en 449 ecc_bank.io.r.req.bits.apply(setIdx = sram_set_addr) 450 451 read_result(div_index)(bank_index)(way_index).raw_data := data_bank.io.r.data 452 read_result(div_index)(bank_index)(way_index).ecc := ecc_bank.io.r.resp.data(0) 453 454 // use ECC to check error 455 val ecc_data = read_result(div_index)(bank_index)(way_index).asECCData() 456 val ecc_data_delayed = RegEnable(ecc_data, RegNext(read_en)) 457 read_result(div_index)(bank_index)(way_index).error_delayed := dcacheParameters.dataCode.decode(ecc_data_delayed).error 458 read_error_delayed_result(div_index)(bank_index)(way_index) := read_result(div_index)(bank_index)(way_index).error_delayed 459 } 460 } 461 } 462 463 // read result: expose banked read result 464 val read_result_delayed = RegNext(read_result) 465 (0 until LoadPipelineWidth).map(i => { 466 // io.read_resp(i) := read_result(RegNext(bank_addrs(i)))(RegNext(OHToUInt(way_en(i)))) 467 val rr_read_fire = RegNext(RegNext(io.read(i).fire)) 468 val rr_div_addr = RegNext(RegNext(div_addrs(i))) 469 val rr_bank_addr = RegNext(RegNext(bank_addrs(i))) 470 val rr_way_addr = RegNext(RegNext(OHToUInt(way_en(i)))) 471 io.read_resp_delayed(i) := read_result_delayed(rr_div_addr)(rr_bank_addr)(rr_way_addr) 472 // error detection 473 // normal read ports 474 io.read_error_delayed(i) := rr_read_fire && read_error_delayed_result(rr_div_addr)(rr_bank_addr)(rr_way_addr) && !RegNext(io.bank_conflict_slow(i)) 475 }) 476 477 // readline port 478 (0 until DCacheBanks).map(i => { 479 io.readline_resp(i) := read_result(RegNext(line_div_addr))(i)(RegNext(OHToUInt(io.readline.bits.way_en))) 480 }) 481 io.readline_error_delayed := RegNext(RegNext(io.readline.fire())) && 482 VecInit((0 until DCacheBanks).map(i => io.readline_resp(i).error_delayed)).asUInt().orR 483 484 // write data_banks & ecc_banks 485 for (div_index <- 0 until DCacheSetDiv) { 486 for (bank_index <- 0 until DCacheBanks) { 487 for (way_index <- 0 until DCacheWays) { 488 // data write 489 val wen_reg = write_bank_mask_reg(bank_index) && 490 write_valid_dup_reg(bank_index) && 491 write_div_addr_dup_reg(bank_index) === div_index.U && 492 write_wayen_dup_reg(bank_index)(way_index) 493 val data_bank = data_banks(div_index)(bank_index)(way_index) 494 data_bank.io.w.en := wen_reg 495 496 data_bank.io.w.addr := write_set_addr_dup_reg(bank_index) 497 data_bank.io.w.data := write_data_reg(bank_index) 498 // ecc write 499 val ecc_bank = ecc_banks(div_index)(bank_index)(way_index) 500 ecc_bank.io.w.req.valid := wen_reg 501 ecc_bank.io.w.req.bits.apply( 502 setIdx = write_set_addr_dup_reg(bank_index), 503 data = RegNext(getECCFromEncWord(cacheParams.dataCode.encode((io.write.bits.data(bank_index))))), 504 waymask = 1.U 505 ) 506 when(ecc_bank.io.w.req.valid) { 507 XSDebug("write in ecc sram: bank %x set %x data %x waymask %x\n", 508 bank_index.U, 509 addr_to_dcache_div_set(io.write.bits.addr), 510 getECCFromEncWord(cacheParams.dataCode.encode((io.write.bits.data(bank_index)))), 511 io.write.bits.way_en 512 ); 513 } 514 } 515 } 516 } 517 518 require(nWays <= 32) 519 io.cacheOp.resp.bits := DontCare 520 val cacheOpShouldResp = WireInit(false.B) 521 val eccReadResult = Wire(Vec(DCacheBanks, UInt(eccBits.W))) 522 // DCacheDupNum is 16 523 // vec: the dupIdx for every bank and every group 524 val rdata_dup_vec = Seq(0,0,1,1,2,2,3,3) 525 val rdataEcc_dup_vec = Seq(4,4,5,5,6,6,7,7) 526 val wdata_dup_vec = Seq(8,8,9,9,10,10,11,11) 527 val wdataEcc_dup_vec = Seq(12,12,13,13,14,14,15,15) 528 val cacheOpDivAddr = set_to_dcache_div(io.cacheOp.req.bits.index) 529 val cacheOpSetAddr = set_to_dcache_div_set(io.cacheOp.req.bits.index) 530 val cacheOpWayNum = io.cacheOp.req.bits.wayNum(4, 0) 531 rdata_dup_vec.zipWithIndex.map{ case(dupIdx, bankIdx) => 532 for (divIdx <- 0 until DCacheSetDiv){ 533 for (wayIdx <- 0 until DCacheWays) { 534 when(io.cacheOp_req_dup(dupIdx).valid && CacheInstrucion.isReadData(io.cacheOp_req_bits_opCode_dup(dupIdx))) { 535 val data_bank = data_banks(divIdx)(bankIdx)(wayIdx) 536 data_bank.io.r.en := UIntToOH(io.cacheOp.req.bits.wayNum(4, 0))(wayIdx) && cacheOpDivAddr === divIdx.U 537 data_bank.io.r.addr := cacheOpSetAddr 538 cacheOpShouldResp := true.B 539 } 540 } 541 } 542 } 543 rdataEcc_dup_vec.zipWithIndex.map{ case(dupIdx, bankIdx) => 544 for (divIdx <- 0 until DCacheSetDiv) { 545 for (wayIdx <- 0 until DCacheWays) { 546 when(io.cacheOp_req_dup(dupIdx).valid && CacheInstrucion.isReadDataECC(io.cacheOp_req_bits_opCode_dup(dupIdx))) { 547 val ecc_bank = ecc_banks(divIdx)(bankIdx)(wayIdx) 548 ecc_bank.io.r.req.valid := true.B 549 ecc_bank.io.r.req.bits.setIdx := cacheOpSetAddr 550 cacheOpShouldResp := true.B 551 } 552 } 553 } 554 } 555 wdata_dup_vec.zipWithIndex.map{ case(dupIdx, bankIdx) => 556 for (divIdx <- 0 until DCacheSetDiv) { 557 for (wayIdx <- 0 until DCacheWays) { 558 when(io.cacheOp_req_dup(dupIdx).valid && CacheInstrucion.isWriteData(io.cacheOp_req_bits_opCode_dup(dupIdx))) { 559 val data_bank = data_banks(divIdx)(bankIdx)(wayIdx) 560 data_bank.io.w.en := UIntToOH(io.cacheOp.req.bits.wayNum(4, 0))(wayIdx) && cacheOpDivAddr === divIdx.U 561 data_bank.io.w.addr := cacheOpSetAddr 562 data_bank.io.w.data := io.cacheOp.req.bits.write_data_vec(bankIdx) 563 cacheOpShouldResp := true.B 564 } 565 } 566 } 567 } 568 wdataEcc_dup_vec.zipWithIndex.map{ case(dupIdx, bankIdx) => 569 for (divIdx <- 0 until DCacheSetDiv) { 570 for (wayIdx <- 0 until DCacheWays) { 571 when(io.cacheOp_req_dup(dupIdx).valid && CacheInstrucion.isWriteDataECC(io.cacheOp_req_bits_opCode_dup(dupIdx))) { 572 val ecc_bank = ecc_banks(divIdx)(bankIdx)(wayIdx) 573 ecc_bank.io.w.req.valid := UIntToOH(io.cacheOp.req.bits.wayNum(4, 0))(wayIdx) && cacheOpDivAddr === divIdx.U 574 ecc_bank.io.w.req.bits.apply( 575 setIdx = cacheOpSetAddr, 576 data = io.cacheOp.req.bits.write_data_ecc, 577 waymask = 1.U 578 ) 579 cacheOpShouldResp := true.B 580 } 581 } 582 } 583 } 584 585 io.cacheOp.resp.valid := RegNext(io.cacheOp.req.valid && cacheOpShouldResp) 586 for (bank_index <- 0 until DCacheBanks) { 587 io.cacheOp.resp.bits.read_data_vec(bank_index) := read_result(RegNext(cacheOpDivAddr))(bank_index)(RegNext(cacheOpWayNum)).raw_data 588 eccReadResult(bank_index) := read_result(cacheOpDivAddr)(bank_index)(RegNext(cacheOpWayNum)).ecc 589 } 590 591 io.cacheOp.resp.bits.read_data_ecc := Mux(io.cacheOp.resp.valid, 592 eccReadResult(RegNext(io.cacheOp.req.bits.bank_num)), 593 0.U 594 ) 595 596 val tableName = "BankConflict" + p(XSCoreParamsKey).HartId.toString 597 val siteName = "BankedDataArray" + p(XSCoreParamsKey).HartId.toString 598 val bankConflictTable = ChiselDB.createTable(tableName, new BankConflictDB) 599 val bankConflictData = Wire(new BankConflictDB) 600 for (i <- 0 until LoadPipelineWidth) { 601 bankConflictData.set_index(i) := set_addrs(i) 602 bankConflictData.addr(i) := io.read(i).bits.addr 603 } 604 605 // FIXME: rr_bank_conflict(0)(1) no generalization 606 when(rr_bank_conflict(0)(1)) { 607 bankConflictData.bank_index := bank_addrs(0) 608 bankConflictData.way_index := OHToUInt(way_en(0)) 609 bankConflictData.fake_rr_bank_conflict := set_addrs(0) === set_addrs(1) 610 }.otherwise { 611 bankConflictData.bank_index := 0.U 612 bankConflictData.way_index := 0.U 613 bankConflictData.fake_rr_bank_conflict := false.B 614 } 615 616 val isWriteBankConflictTable = WireInit(Constantin.createRecord("isWriteBankConflictTable" + p(XSCoreParamsKey).HartId.toString)) 617 bankConflictTable.log( 618 data = bankConflictData, 619 en = isWriteBankConflictTable.orR && rr_bank_conflict(0)(1), 620 site = siteName, 621 clock = clock, 622 reset = reset 623 ) 624 625 (1 until LoadPipelineWidth).foreach(y => (0 until y).foreach(x => 626 XSPerfAccumulate(s"data_array_fake_rr_bank_conflict_${x}_${y}", rr_bank_conflict(x)(y) && set_addrs(x)===set_addrs(y)) 627 )) 628 629} 630 631// the smallest access unit is bank 632class BankedDataArray(implicit p: Parameters) extends AbstractBankedDataArray { 633 println(" DCacheType: BankedDataArray") 634 val ReduceReadlineConflict = false 635 636 io.write.ready := true.B 637 io.write_dup.foreach(_.ready := true.B) 638 639 val data_banks = List.fill(DCacheSetDiv)(List.tabulate(DCacheBanks)(i => Module(new DataSRAMBank(i)))) 640 val ecc_banks = List.fill(DCacheSetDiv)(List.fill(DCacheBanks)(Module(new SRAMTemplate( 641 Bits(eccBits.W), 642 set = DCacheSets / DCacheSetDiv, 643 way = DCacheWays, 644 shouldReset = false, 645 holdRead = false, 646 singlePort = true 647 )))) 648 649 data_banks.map(_.map(_.dump())) 650 651 val way_en = Wire(Vec(LoadPipelineWidth, io.read(0).bits.way_en.cloneType)) 652 val set_addrs = Wire(Vec(LoadPipelineWidth, UInt())) 653 val div_addrs = Wire(Vec(LoadPipelineWidth, UInt())) 654 val bank_addrs = Wire(Vec(LoadPipelineWidth, UInt())) 655 656 val line_set_addr = addr_to_dcache_div_set(io.readline.bits.addr) 657 val line_div_addr = addr_to_dcache_div(io.readline.bits.addr) 658 val line_way_en = io.readline.bits.way_en 659 660 val write_bank_mask_reg = RegNext(io.write.bits.wmask) 661 val write_data_reg = RegNext(io.write.bits.data) 662 val write_valid_reg = RegNext(io.write.valid) 663 val write_valid_dup_reg = io.write_dup.map(x => RegNext(x.valid)) 664 val write_wayen_dup_reg = io.write_dup.map(x => RegNext(x.bits.way_en)) 665 val write_set_addr_dup_reg = io.write_dup.map(x => RegNext(addr_to_dcache_div_set(x.bits.addr))) 666 val write_div_addr_dup_reg = io.write_dup.map(x => RegNext(addr_to_dcache_div(x.bits.addr))) 667 668 // read data_banks and ecc_banks 669 // for single port SRAM, do not allow read and write in the same cycle 670 val rwhazard = RegNext(io.write.valid) 671 val rrhazard = false.B // io.readline.valid 672 (0 until LoadPipelineWidth).map(rport_index => { 673 div_addrs(rport_index) := addr_to_dcache_div(io.read(rport_index).bits.addr) 674 set_addrs(rport_index) := addr_to_dcache_div_set(io.read(rport_index).bits.addr) 675 bank_addrs(rport_index) := addr_to_dcache_bank(io.read(rport_index).bits.addr) 676 677 // use way_en to select a way after data read out 678 assert(!(RegNext(io.read(rport_index).fire() && PopCount(io.read(rport_index).bits.way_en) > 1.U))) 679 way_en(rport_index) := io.read(rport_index).bits.way_en 680 }) 681 682 // read each bank, get bank result 683 val rr_bank_conflict = Seq.tabulate(LoadPipelineWidth)(x => Seq.tabulate(LoadPipelineWidth)(y => 684 io.read(x).valid && io.read(y).valid && 685 div_addrs(x)===div_addrs(y) && 686 bank_addrs(x) === bank_addrs(y) 687 )) 688 val rrl_bank_conflict = Wire(Vec(LoadPipelineWidth, Bool())) 689 val rrl_bank_conflict_intend = Wire(Vec(LoadPipelineWidth, Bool())) 690 (0 until LoadPipelineWidth).foreach { i => 691 val judge = if (ReduceReadlineConflict) io.read(i).valid && io.readline.bits.rmask(bank_addrs(i)) && div_addrs(i)===line_div_addr 692 else io.read(i).valid && div_addrs(i)===line_div_addr 693 rrl_bank_conflict(i) := judge && io.readline.valid 694 rrl_bank_conflict_intend(i) := judge && io.readline_intend 695 } 696 val wr_bank_conflict = Seq.tabulate(LoadPipelineWidth)(x => 697 io.read(x).valid && write_valid_reg && div_addrs(x) === write_div_addr_dup_reg.head 698 ) 699 val wrl_bank_conflict = io.readline.valid && write_valid_reg && line_div_addr === write_div_addr_dup_reg.head 700 // ready 701 io.readline.ready := !(wrl_bank_conflict) 702 io.read.zipWithIndex.map{case(x, i) => x.ready := !(wr_bank_conflict(i) || rrhazard)} 703 704 val perf_multi_read = PopCount(io.read.map(_.valid)) >= 2.U 705 (0 until LoadPipelineWidth).foreach(i => { 706 io.bank_conflict_fast(i) := wr_bank_conflict(i) || rrl_bank_conflict(i) || 707 (if (i == 0) 0.B else (0 until i).map(rr_bank_conflict(_)(i)).reduce(_ || _)) 708 io.bank_conflict_slow(i) := RegNext(io.bank_conflict_fast(i)) 709 io.disable_ld_fast_wakeup(i) := wr_bank_conflict(i) || rrl_bank_conflict_intend(i) || 710 (if (i == 0) 0.B else (0 until i).map(rr_bank_conflict(_)(i)).reduce(_ || _)) 711 }) 712 XSPerfAccumulate("data_array_multi_read", perf_multi_read) 713 (1 until LoadPipelineWidth).foreach(y => (0 until y).foreach(x => 714 XSPerfAccumulate(s"data_array_rr_bank_conflict_${x}_${y}", rr_bank_conflict(x)(y)) 715 )) 716 (0 until LoadPipelineWidth).foreach(i => { 717 XSPerfAccumulate(s"data_array_rrl_bank_conflict_${i}", rrl_bank_conflict(i)) 718 XSPerfAccumulate(s"data_array_rw_bank_conflict_${i}", wr_bank_conflict(i)) 719 XSPerfAccumulate(s"data_array_read_${i}", io.read(i).valid) 720 }) 721 XSPerfAccumulate("data_array_access_total", PopCount(io.read.map(_.valid))) 722 XSPerfAccumulate("data_array_read_line", io.readline.valid) 723 XSPerfAccumulate("data_array_write", io.write.valid) 724 725 val bank_result = Wire(Vec(DCacheSetDiv, Vec(DCacheBanks, new L1BankedDataReadResult()))) 726 val read_bank_error_delayed = Wire(Vec(DCacheSetDiv, Vec(DCacheBanks, Bool()))) 727 dontTouch(bank_result) 728 dontTouch(read_bank_error_delayed) 729 for (div_index <- 0 until DCacheSetDiv) { 730 for (bank_index <- 0 until DCacheBanks) { 731 // Set Addr & Read Way Mask 732 // 733 // Pipe 0 .... Pipe (n-1) 734 // + .... + 735 // | .... | 736 // +----+---------------+-----+ 737 // X X 738 // X +------+ Bank Addr Match 739 // +---------+----------+ 740 // | 741 // +--------+--------+ 742 // | Data Bank | 743 // +-----------------+ 744 val bank_addr_matchs = WireInit(VecInit(List.tabulate(LoadPipelineWidth)(i => { 745 io.read(i).valid && div_addrs(i) === div_index.U && bank_addrs(i) === bank_index.U 746 }))) 747 val readline_match = Wire(Bool()) 748 if (ReduceReadlineConflict) { 749 readline_match := io.readline.valid && io.readline.bits.rmask(bank_index) && line_div_addr === div_index.U 750 } else { 751 readline_match := io.readline.valid && line_div_addr === div_index.U 752 } 753 val bank_way_en = Mux(readline_match, 754 io.readline.bits.way_en, 755 PriorityMux(Seq.tabulate(LoadPipelineWidth)(i => bank_addr_matchs(i) -> way_en(i))) 756 ) 757 val bank_set_addr = Mux(readline_match, 758 line_set_addr, 759 PriorityMux(Seq.tabulate(LoadPipelineWidth)(i => bank_addr_matchs(i) -> set_addrs(i))) 760 ) 761 762 val read_enable = bank_addr_matchs.asUInt.orR || readline_match 763 764 // read raw data 765 val data_bank = data_banks(div_index)(bank_index) 766 data_bank.io.r.en := read_enable 767 data_bank.io.r.way_en := bank_way_en 768 data_bank.io.r.addr := bank_set_addr 769 bank_result(div_index)(bank_index).raw_data := data_bank.io.r.data 770 771 // read ECC 772 val ecc_bank = ecc_banks(div_index)(bank_index) 773 ecc_bank.io.r.req.valid := read_enable 774 ecc_bank.io.r.req.bits.apply(setIdx = bank_set_addr) 775 bank_result(div_index)(bank_index).ecc := Mux1H(RegNext(bank_way_en), ecc_bank.io.r.resp.data) 776 777 // use ECC to check error 778 val ecc_data = bank_result(div_index)(bank_index).asECCData() 779 val ecc_data_delayed = RegEnable(ecc_data, RegNext(read_enable)) 780 bank_result(div_index)(bank_index).error_delayed := dcacheParameters.dataCode.decode(ecc_data_delayed).error 781 read_bank_error_delayed(div_index)(bank_index) := bank_result(div_index)(bank_index).error_delayed 782 } 783 } 784 785 val bank_result_delayed = RegNext(bank_result) 786 (0 until LoadPipelineWidth).map(i => { 787 val rr_read_fire = RegNext(RegNext(io.read(i).fire)) 788 val rr_div_addr = RegNext(RegNext(div_addrs(i))) 789 val rr_bank_addr = RegNext(RegNext(bank_addrs(i))) 790 val rr_way_addr = RegNext(RegNext(OHToUInt(way_en(i)))) 791 io.read_resp_delayed(i) := bank_result_delayed(rr_div_addr)(rr_bank_addr) 792 // error detection 793 io.read_error_delayed(i) := rr_read_fire && read_bank_error_delayed(rr_div_addr)(rr_bank_addr) && !RegNext(io.bank_conflict_slow(i)) 794 795 }) 796 797 // read result: expose banked read result 798 io.readline_resp := bank_result(RegNext(line_div_addr)) 799 io.readline_error_delayed := RegNext(RegNext(io.readline.fire())) && 800 VecInit((0 until DCacheBanks).map(i => io.readline_resp(i).error_delayed)).asUInt().orR 801 802 // write data_banks & ecc_banks 803 for (div_index <- 0 until DCacheSetDiv) { 804 for (bank_index <- 0 until DCacheBanks) { 805 // data write 806 val wen_reg = write_bank_mask_reg(bank_index) && 807 write_valid_dup_reg(bank_index) && 808 write_div_addr_dup_reg(bank_index) === div_index.U 809 val data_bank = data_banks(div_index)(bank_index) 810 data_bank.io.w.en := wen_reg 811 data_bank.io.w.way_en := write_wayen_dup_reg(bank_index) 812 data_bank.io.w.addr := write_set_addr_dup_reg(bank_index) 813 data_bank.io.w.data := write_data_reg(bank_index) 814 815 // ecc write 816 val ecc_bank = ecc_banks(div_index)(bank_index) 817 ecc_bank.io.w.req.valid := wen_reg 818 ecc_bank.io.w.req.bits.apply( 819 setIdx = write_set_addr_dup_reg(bank_index), 820 data = RegNext(getECCFromEncWord(cacheParams.dataCode.encode((io.write.bits.data(bank_index))))), 821 waymask = write_wayen_dup_reg(bank_index) 822 ) 823 when(ecc_bank.io.w.req.valid) { 824 XSDebug("write in ecc sram: bank %x set %x data %x waymask %x\n", 825 bank_index.U, 826 addr_to_dcache_div_set(io.write.bits.addr), 827 getECCFromEncWord(cacheParams.dataCode.encode((io.write.bits.data(bank_index)))), 828 io.write.bits.way_en 829 ); 830 } 831 } 832 } 833 834 // deal with customized cache op 835 require(nWays <= 32) 836 io.cacheOp.resp.bits := DontCare 837 val cacheOpShouldResp = WireInit(false.B) 838 val eccReadResult = Wire(Vec(DCacheBanks, UInt(eccBits.W))) 839 // DCacheDupNum is 16 840 // vec: the dupIdx for every bank and every group 841 val rdata_dup_vec = Seq(0, 0, 1, 1, 2, 2, 3, 3) 842 val rdataEcc_dup_vec = Seq(4, 4, 5, 5, 6, 6, 7, 7) 843 val wdata_dup_vec = Seq(8, 8, 9, 9, 10, 10, 11, 11) 844 val wdataEcc_dup_vec = Seq(12, 12, 13, 13, 14, 14, 15, 15) 845 val cacheOpDivAddr = set_to_dcache_div(io.cacheOp.req.bits.index) 846 val cacheOpSetAddr = set_to_dcache_div_set(io.cacheOp.req.bits.index) 847 val cacheOpWayMask = UIntToOH(io.cacheOp.req.bits.wayNum(4, 0)) 848 rdata_dup_vec.zipWithIndex.map{ case(dupIdx, bankIdx) => 849 for (divIdx <- 0 until DCacheSetDiv) { 850 when(io.cacheOp_req_dup(dupIdx).valid && CacheInstrucion.isReadData(io.cacheOp_req_bits_opCode_dup(dupIdx))) { 851 val data_bank = data_banks(divIdx)(bankIdx) 852 data_bank.io.r.en := true.B 853 data_bank.io.r.way_en := cacheOpWayMask 854 data_bank.io.r.addr := cacheOpSetAddr 855 cacheOpShouldResp := true.B 856 } 857 } 858 } 859 rdataEcc_dup_vec.zipWithIndex.map{ case(dupIdx, bankIdx) => 860 for (divIdx <- 0 until DCacheSetDiv) { 861 when(io.cacheOp_req_dup(dupIdx).valid && CacheInstrucion.isReadDataECC(io.cacheOp_req_bits_opCode_dup(dupIdx))) { 862 val ecc_bank = ecc_banks(divIdx)(bankIdx) 863 ecc_bank.io.r.req.valid := true.B 864 ecc_bank.io.r.req.bits.setIdx := cacheOpSetAddr 865 cacheOpShouldResp := true.B 866 } 867 } 868 } 869 wdata_dup_vec.zipWithIndex.map{ case(dupIdx, bankIdx) => 870 for (divIdx <- 0 until DCacheSetDiv) { 871 when(io.cacheOp_req_dup(dupIdx).valid && CacheInstrucion.isWriteData(io.cacheOp_req_bits_opCode_dup(dupIdx))) { 872 val data_bank = data_banks(divIdx)(bankIdx) 873 data_bank.io.w.en := cacheOpDivAddr === divIdx.U 874 data_bank.io.w.way_en := cacheOpWayMask 875 data_bank.io.w.addr := cacheOpSetAddr 876 data_bank.io.w.data := io.cacheOp.req.bits.write_data_vec(bankIdx) 877 cacheOpShouldResp := true.B 878 } 879 } 880 } 881 wdataEcc_dup_vec.zipWithIndex.map{ case(dupIdx, bankIdx) => 882 for (divIdx <- 0 until DCacheSetDiv) { 883 when(io.cacheOp_req_dup(dupIdx).valid && CacheInstrucion.isWriteDataECC(io.cacheOp_req_bits_opCode_dup(dupIdx))) { 884 val ecc_bank = ecc_banks(divIdx)(bankIdx) 885 ecc_bank.io.w.req.valid := cacheOpDivAddr === divIdx.U 886 ecc_bank.io.w.req.bits.apply( 887 setIdx = cacheOpSetAddr, 888 data = io.cacheOp.req.bits.write_data_ecc, 889 waymask = cacheOpWayMask 890 ) 891 cacheOpShouldResp := true.B 892 } 893 } 894 } 895 896 io.cacheOp.resp.valid := RegNext(io.cacheOp.req.valid && cacheOpShouldResp) 897 for (bank_index <- 0 until DCacheBanks) { 898 io.cacheOp.resp.bits.read_data_vec(bank_index) := bank_result(RegNext(cacheOpDivAddr))(bank_index).raw_data 899 eccReadResult(bank_index) := bank_result(RegNext(cacheOpDivAddr))(bank_index).ecc 900 } 901 902 io.cacheOp.resp.bits.read_data_ecc := Mux(io.cacheOp.resp.valid, 903 eccReadResult(RegNext(io.cacheOp.req.bits.bank_num)), 904 0.U 905 ) 906 907 val tableName = "BankConflict" + p(XSCoreParamsKey).HartId.toString 908 val siteName = "BankedDataArray" + p(XSCoreParamsKey).HartId.toString 909 val bankConflictTable = ChiselDB.createTable(tableName, new BankConflictDB) 910 val bankConflictData = Wire(new BankConflictDB) 911 for (i <- 0 until LoadPipelineWidth) { 912 bankConflictData.set_index(i) := set_addrs(i) 913 bankConflictData.addr(i) := io.read(i).bits.addr 914 } 915 916 // FIXME: rr_bank_conflict(0)(1) no generalization 917 when(rr_bank_conflict(0)(1)) { 918 bankConflictData.bank_index := bank_addrs(0) 919 bankConflictData.way_index := OHToUInt(way_en(0)) 920 bankConflictData.fake_rr_bank_conflict := set_addrs(0) === set_addrs(1) 921 }.otherwise { 922 bankConflictData.bank_index := 0.U 923 bankConflictData.way_index := 0.U 924 bankConflictData.fake_rr_bank_conflict := false.B 925 } 926 927 val isWriteBankConflictTable = WireInit(Constantin.createRecord("isWriteBankConflictTable" + p(XSCoreParamsKey).HartId.toString)) 928 bankConflictTable.log( 929 data = bankConflictData, 930 en = isWriteBankConflictTable.orR && rr_bank_conflict(0)(1), 931 site = siteName, 932 clock = clock, 933 reset = reset 934 ) 935 936 (1 until LoadPipelineWidth).foreach(y => (0 until y).foreach(x => 937 XSPerfAccumulate(s"data_array_fake_rr_bank_conflict_${x}_${y}", rr_bank_conflict(x)(y) && set_addrs(x) === set_addrs(y)) 938 )) 939 940}