xref: /XiangShan/src/main/scala/xiangshan/cache/dcache/data/BankedDataArray.scala (revision 6639e9a467468f4e1b05a25a5de4500772aedeb1)
1/***************************************************************************************
2* Copyright (c) 2024 Beijing Institute of Open Source Chip (BOSC)
3* Copyright (c) 2020-2024 Institute of Computing Technology, Chinese Academy of Sciences
4* Copyright (c) 2020-2021 Peng Cheng Laboratory
5*
6* XiangShan is licensed under Mulan PSL v2.
7* You can use this software according to the terms and conditions of the Mulan PSL v2.
8* You may obtain a copy of Mulan PSL v2 at:
9*          http://license.coscl.org.cn/MulanPSL2
10*
11* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
12* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
13* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
14*
15* See the Mulan PSL v2 for more details.
16*
17*
18* Acknowledgement
19*
20* This implementation is inspired by several key papers:
21* [1] Gurindar S. Sohi, and Manoj Franklin. "[High-bandwidth data memory systems for superscalar processors.]
22* (https://doi.org/10.1145/106972.106980)" 4th International Conference on Architectural Support for Programming
23* Languages and Operating Systems (ASPLOS). 1991.
24***************************************************************************************/
25
26package xiangshan.cache
27
28import org.chipsalliance.cde.config.Parameters
29import chisel3._
30import utils._
31import utility._
32import chisel3.util._
33import freechips.rocketchip.tilelink.{ClientMetadata, TLClientParameters, TLEdgeOut}
34import xiangshan.{L1CacheErrorInfo, XSCoreParamsKey}
35
36import scala.math.max
37
38class BankConflictDB(implicit p: Parameters) extends DCacheBundle{
39  val addr = Vec(LoadPipelineWidth, Bits(PAddrBits.W))
40  val set_index = Vec(LoadPipelineWidth, UInt((DCacheAboveIndexOffset - DCacheSetOffset).W))
41  val bank_index = Vec(VLEN/DCacheSRAMRowBits, UInt((DCacheSetOffset - DCacheBankOffset).W))
42  val way_index = UInt(wayBits.W)
43  val fake_rr_bank_conflict = Bool()
44}
45
46class L1BankedDataReadReq(implicit p: Parameters) extends DCacheBundle
47{
48  val way_en = Bits(DCacheWays.W)
49  val addr = Bits(PAddrBits.W)
50}
51
52class L1BankedDataReadReqWithMask(implicit p: Parameters) extends DCacheBundle
53{
54  val way_en = Bits(DCacheWays.W)
55  val addr = Bits(PAddrBits.W)
56  val bankMask = Bits(DCacheBanks.W)
57  val kill = Bool()
58}
59
60class L1BankedDataReadLineReq(implicit p: Parameters) extends L1BankedDataReadReq
61{
62  val rmask = Bits(DCacheBanks.W)
63}
64
65// Now, we can write a cache-block in a single cycle
66class L1BankedDataWriteReq(implicit p: Parameters) extends L1BankedDataReadReq
67{
68  val wmask = Bits(DCacheBanks.W)
69  val data = Vec(DCacheBanks, Bits(DCacheSRAMRowBits.W))
70}
71
72// cache-block write request without data
73class L1BankedDataWriteReqCtrl(implicit p: Parameters) extends L1BankedDataReadReq
74
75class L1BankedDataReadResult(implicit p: Parameters) extends DCacheBundle
76{
77  // you can choose which bank to read to save power
78  val ecc = Bits(eccBits.W)
79  val raw_data = Bits(DCacheSRAMRowBits.W)
80  val error_delayed = Bool() // 1 cycle later than data resp
81
82  def asECCData() = {
83    Cat(ecc, raw_data)
84  }
85}
86
87class DataSRAMBankWriteReq(implicit p: Parameters) extends DCacheBundle {
88  val en = Bool()
89  val addr = UInt()
90  val way_en = UInt(DCacheWays.W)
91  val data = UInt(DCacheSRAMRowBits.W)
92}
93
94// wrap a sram
95class DataSRAM(bankIdx: Int, wayIdx: Int)(implicit p: Parameters) extends DCacheModule {
96  val io = IO(new Bundle() {
97    val w = new Bundle() {
98      val en = Input(Bool())
99      val addr = Input(UInt())
100      val data = Input(UInt(DCacheSRAMRowBits.W))
101    }
102
103    val r = new Bundle() {
104      val en = Input(Bool())
105      val addr = Input(UInt())
106      val data = Output(UInt(DCacheSRAMRowBits.W))
107    }
108  })
109
110  // data sram
111  val data_sram = Module(new SRAMTemplate(
112    Bits(DCacheSRAMRowBits.W),
113    set = DCacheSets / DCacheSetDiv,
114    way = 1,
115    shouldReset = false,
116    holdRead = false,
117    singlePort = true
118  ))
119
120  data_sram.io.w.req.valid := io.w.en
121  data_sram.io.w.req.bits.apply(
122    setIdx = io.w.addr,
123    data = io.w.data,
124    waymask = 1.U
125  )
126  data_sram.io.r.req.valid := io.r.en
127  data_sram.io.r.req.bits.apply(setIdx = io.r.addr)
128  io.r.data := data_sram.io.r.resp.data(0)
129  XSPerfAccumulate("part_data_read_counter", data_sram.io.r.req.valid)
130
131  def dump_r() = {
132    when(RegNext(io.r.en)) {
133      XSDebug("bank read set %x bank %x way %x data %x\n",
134        RegEnable(io.r.addr, io.r.en),
135        bankIdx.U,
136        wayIdx.U,
137        io.r.data
138      )
139    }
140  }
141
142  def dump_w() = {
143    when(io.w.en) {
144      XSDebug("bank write set %x bank %x way %x data %x\n",
145        io.w.addr,
146        bankIdx.U,
147        wayIdx.U,
148        io.w.data
149      )
150    }
151  }
152
153  def dump() = {
154    dump_w()
155    dump_r()
156  }
157}
158
159// wrap data rows of 8 ways
160class DataSRAMBank(index: Int)(implicit p: Parameters) extends DCacheModule {
161  val io = IO(new Bundle() {
162    val w = Input(new DataSRAMBankWriteReq)
163
164    val r = new Bundle() {
165      val en = Input(Bool())
166      val addr = Input(UInt())
167      val data = Output(Vec(DCacheWays, UInt(DCacheSRAMRowBits.W)))
168    }
169  })
170
171  assert(RegNext(!io.w.en || PopCount(io.w.way_en) <= 1.U))
172
173  // external controls do not read and write at the same time
174  val w_info = io.w
175  // val rw_bypass = RegNext(io.w.addr === io.r.addr && io.w.way_en === io.r.way_en && io.w.en)
176
177  // multiway data bank
178  val data_bank = Seq.fill(DCacheWays) {
179    Module(new SRAMTemplate(
180      Bits(DCacheSRAMRowBits.W),
181      set = DCacheSets / DCacheSetDiv,
182      way = 1,
183      shouldReset = false,
184      holdRead = false,
185      singlePort = true
186    ))
187  }
188
189  for (w <- 0 until DCacheWays) {
190    val wen = w_info.en && w_info.way_en(w)
191    data_bank(w).io.w.req.valid := wen
192    data_bank(w).io.w.req.bits.apply(
193      setIdx = w_info.addr,
194      data = w_info.data,
195      waymask = 1.U
196    )
197    data_bank(w).io.r.req.valid := io.r.en
198    data_bank(w).io.r.req.bits.apply(setIdx = io.r.addr)
199    data_bank(w).clock := ClockGate(false.B, io.r.en | (io.w.en & io.w.way_en(w)), clock)
200  }
201  XSPerfAccumulate("part_data_read_counter", PopCount(Cat(data_bank.map(_.io.r.req.valid))))
202
203  io.r.data := data_bank.map(_.io.r.resp.data(0))
204
205  def dump_r() = {
206    when(RegNext(io.r.en)) {
207      XSDebug("bank read addr %x data %x\n",
208        RegEnable(io.r.addr, io.r.en),
209        io.r.data.asUInt
210      )
211    }
212  }
213
214  def dump_w() = {
215    when(io.w.en) {
216      XSDebug("bank write addr %x way_en %x data %x\n",
217        io.w.addr,
218        io.w.way_en,
219        io.w.data
220      )
221    }
222  }
223
224  def dump() = {
225    dump_w()
226    dump_r()
227  }
228}
229
230case object HasDataEccParam
231
232//                     Banked DCache Data
233// -----------------------------------------------------------------
234// | Bank0 | Bank1 | Bank2 | Bank3 | Bank4 | Bank5 | Bank6 | Bank7 |
235// -----------------------------------------------------------------
236// | Way0  | Way0  | Way0  | Way0  | Way0  | Way0  | Way0  | Way0  |
237// | Way1  | Way1  | Way1  | Way1  | Way1  | Way1  | Way1  | Way1  |
238// | ....  | ....  | ....  | ....  | ....  | ....  | ....  | ....  |
239// -----------------------------------------------------------------
240abstract class AbstractBankedDataArray(implicit p: Parameters) extends DCacheModule
241{
242  val DataEccParam = if(EnableDataEcc) Some(HasDataEccParam) else None
243  val ReadlinePortErrorIndex = LoadPipelineWidth
244  val io = IO(new DCacheBundle {
245    // load pipeline read word req
246    val read = Vec(LoadPipelineWidth, Flipped(DecoupledIO(new L1BankedDataReadReqWithMask)))
247    val is128Req = Input(Vec(LoadPipelineWidth, Bool()))
248    // main pipeline read / write line req
249    val readline_intend = Input(Bool())
250    val readline = Flipped(DecoupledIO(new L1BankedDataReadLineReq))
251    val write = Flipped(DecoupledIO(new L1BankedDataWriteReq))
252    val write_dup = Vec(DCacheBanks, Flipped(Decoupled(new L1BankedDataWriteReqCtrl)))
253    // data for readline and loadpipe
254    val readline_resp = Output(Vec(DCacheBanks, new L1BankedDataReadResult()))
255    val readline_error_delayed = Output(Bool())
256    val read_resp          = Output(Vec(LoadPipelineWidth, Vec(VLEN/DCacheSRAMRowBits, new L1BankedDataReadResult())))
257    val read_error_delayed = Output(Vec(LoadPipelineWidth,Vec(VLEN/DCacheSRAMRowBits, Bool())))
258    // val nacks = Output(Vec(LoadPipelineWidth, Bool()))
259    // val errors = Output(Vec(LoadPipelineWidth + 1, ValidIO(new L1CacheErrorInfo))) // read ports + readline port
260    // when bank_conflict, read (1) port should be ignored
261    val bank_conflict_slow = Output(Vec(LoadPipelineWidth, Bool()))
262    val disable_ld_fast_wakeup = Output(Vec(LoadPipelineWidth, Bool()))
263    // customized cache op port
264    val cacheOp = Flipped(new L1CacheInnerOpIO)
265    val cacheOp_req_dup = Vec(DCacheDupNum, Flipped(Valid(new CacheCtrlReqInfo)))
266    val cacheOp_req_bits_opCode_dup = Input(Vec(DCacheDupNum, UInt(XLEN.W)))
267  })
268
269  def pipeMap[T <: Data](f: Int => T) = VecInit((0 until LoadPipelineWidth).map(f))
270
271  def getECCFromEncWord(encWord: UInt) = {
272    require(encWord.getWidth == encWordBits)
273    encWord(encWordBits - 1, wordBits)
274  }
275
276  def dumpRead = {
277    (0 until LoadPipelineWidth) map { w =>
278      when(io.read(w).valid) {
279        XSDebug(s"DataArray Read channel: $w valid way_en: %x addr: %x\n",
280          io.read(w).bits.way_en, io.read(w).bits.addr)
281      }
282    }
283    when(io.readline.valid) {
284      XSDebug(s"DataArray Read Line, valid way_en: %x addr: %x rmask %x\n",
285        io.readline.bits.way_en, io.readline.bits.addr, io.readline.bits.rmask)
286    }
287  }
288
289  def dumpWrite = {
290    when(io.write.valid) {
291      XSDebug(s"DataArray Write valid way_en: %x addr: %x\n",
292        io.write.bits.way_en, io.write.bits.addr)
293
294      (0 until DCacheBanks) map { r =>
295        XSDebug(s"cycle: $r data: %x wmask: %x\n",
296          io.write.bits.data(r), io.write.bits.wmask(r))
297      }
298    }
299  }
300
301  def dumpResp = {
302    XSDebug(s"DataArray ReadeResp channel:\n")
303    (0 until LoadPipelineWidth) map { r =>
304      XSDebug(s"cycle: $r data: %x\n", Mux(io.is128Req(r),
305        Cat(io.read_resp(r)(1).raw_data,io.read_resp(r)(0).raw_data),
306        io.read_resp(r)(0).raw_data))
307    }
308  }
309
310  def dump() = {
311    dumpRead
312    dumpWrite
313    dumpResp
314  }
315}
316
317// the smallest access unit is sram
318class SramedDataArray(implicit p: Parameters) extends AbstractBankedDataArray {
319  println("  DCacheType: SramedDataArray")
320  val ReduceReadlineConflict = false
321
322  io.write.ready := true.B
323  io.write_dup.foreach(_.ready := true.B)
324
325  val data_banks = List.tabulate(DCacheSetDiv)( k => List.tabulate(DCacheBanks)(i => List.tabulate(DCacheWays)(j => Module(new DataSRAM(i,j)))))
326  // ecc_banks also needs to be changed to two-dimensional to align with data_banks
327  val ecc_banks = DataEccParam.map {
328    case _ =>
329      val ecc = List.tabulate(DCacheSetDiv)( k =>
330        List.tabulate(DCacheWays)(j =>
331          List.tabulate(DCacheBanks)(i =>
332            Module(new SRAMTemplate(
333                Bits(eccBits.W),
334                set = DCacheSets / DCacheSetDiv,
335                way = 1,
336                shouldReset = false,
337                holdRead = false,
338                singlePort = true
339            ))
340      )))
341      ecc
342  }
343
344  data_banks.map(_.map(_.map(_.dump())))
345
346  val way_en = Wire(Vec(LoadPipelineWidth, io.read(0).bits.way_en.cloneType))
347  val set_addrs = Wire(Vec(LoadPipelineWidth, UInt()))
348  val div_addrs = Wire(Vec(LoadPipelineWidth, UInt()))
349  val bank_addrs = Wire(Vec(LoadPipelineWidth, Vec(VLEN/DCacheSRAMRowBits, UInt())))
350
351  val line_set_addr = addr_to_dcache_div_set(io.readline.bits.addr)
352  val line_div_addr = addr_to_dcache_div(io.readline.bits.addr)
353  // when WPU is enabled, line_way_en is all enabled when read data
354  val line_way_en = Fill(DCacheWays, 1.U) // val line_way_en = io.readline.bits.way_en
355  val line_way_en_reg = RegEnable(io.readline.bits.way_en, 0.U(DCacheWays.W),io.readline.valid)
356
357  val write_bank_mask_reg = RegEnable(io.write.bits.wmask, 0.U(DCacheBanks.W), io.write.valid)
358  val write_data_reg = RegEnable(io.write.bits.data, io.write.valid)
359  val write_valid_reg = RegNext(io.write.valid)
360  val write_valid_dup_reg = io.write_dup.map(x => RegNext(x.valid))
361  val write_wayen_dup_reg = io.write_dup.map(x => RegEnable(x.bits.way_en, 0.U(DCacheWays.W), x.valid))
362  val write_set_addr_dup_reg = io.write_dup.map(x => RegEnable(addr_to_dcache_div_set(x.bits.addr), x.valid))
363  val write_div_addr_dup_reg = io.write_dup.map(x => RegEnable(addr_to_dcache_div(x.bits.addr), x.valid))
364
365  // read data_banks and ecc_banks
366  // for single port SRAM, do not allow read and write in the same cycle
367  val rrhazard = false.B // io.readline.valid
368  (0 until LoadPipelineWidth).map(rport_index => {
369    div_addrs(rport_index) := addr_to_dcache_div(io.read(rport_index).bits.addr)
370    set_addrs(rport_index) := addr_to_dcache_div_set(io.read(rport_index).bits.addr)
371    bank_addrs(rport_index)(0) := addr_to_dcache_bank(io.read(rport_index).bits.addr)
372    bank_addrs(rport_index)(1) := bank_addrs(rport_index)(0) + 1.U
373
374    // use way_en to select a way after data read out
375    assert(!(RegNext(io.read(rport_index).fire && PopCount(io.read(rport_index).bits.way_en) > 1.U)))
376    way_en(rport_index) := io.read(rport_index).bits.way_en
377  })
378
379  // read conflict
380  val rr_bank_conflict = Seq.tabulate(LoadPipelineWidth)(x => Seq.tabulate(LoadPipelineWidth)(y =>
381    io.read(x).valid && io.read(y).valid &&
382    div_addrs(x) === div_addrs(y) &&
383    (io.read(x).bits.bankMask & io.read(y).bits.bankMask) =/= 0.U &&
384    io.read(x).bits.way_en === io.read(y).bits.way_en &&
385    set_addrs(x) =/= set_addrs(y)
386  ))
387  val rrl_bank_conflict = Wire(Vec(LoadPipelineWidth, Bool()))
388  val rrl_bank_conflict_intend = Wire(Vec(LoadPipelineWidth, Bool()))
389  (0 until LoadPipelineWidth).foreach { i =>
390    val judge = if (ReduceReadlineConflict) io.read(i).valid && (io.readline.bits.rmask & io.read(i).bits.bankMask) =/= 0.U && line_div_addr === div_addrs(i) && line_set_addr =/= set_addrs(i)
391                else io.read(i).valid && line_div_addr === div_addrs(i) && line_set_addr =/= set_addrs(i)
392    rrl_bank_conflict(i) := judge && io.readline.valid
393    rrl_bank_conflict_intend(i) := judge && io.readline_intend
394  }
395  val wr_bank_conflict = Seq.tabulate(LoadPipelineWidth)(x =>
396    io.read(x).valid && write_valid_reg &&
397    div_addrs(x) === write_div_addr_dup_reg.head &&
398    way_en(x) === write_wayen_dup_reg.head &&
399    (write_bank_mask_reg(bank_addrs(x)(0)) || write_bank_mask_reg(bank_addrs(x)(1)) && io.is128Req(x))
400  )
401  val wrl_bank_conflict = io.readline.valid && write_valid_reg && line_div_addr === write_div_addr_dup_reg.head
402  // ready
403  io.readline.ready := !(wrl_bank_conflict)
404  io.read.zipWithIndex.map { case (x, i) => x.ready := !(wr_bank_conflict(i) || rrhazard) }
405
406  val perf_multi_read = PopCount(io.read.map(_.valid)) >= 2.U
407  val bank_conflict_fast = Wire(Vec(LoadPipelineWidth, Bool()))
408  (0 until LoadPipelineWidth).foreach(i => {
409    bank_conflict_fast(i) := wr_bank_conflict(i) || rrl_bank_conflict(i) ||
410      (if (i == 0) 0.B else (0 until i).map(rr_bank_conflict(_)(i)).reduce(_ || _))
411    io.bank_conflict_slow(i) := RegNext(bank_conflict_fast(i))
412    io.disable_ld_fast_wakeup(i) := wr_bank_conflict(i) || rrl_bank_conflict_intend(i) ||
413      (if (i == 0) 0.B else (0 until i).map(rr_bank_conflict(_)(i)).reduce(_ || _))
414  })
415  XSPerfAccumulate("data_array_multi_read", perf_multi_read)
416  (1 until LoadPipelineWidth).foreach(y => (0 until y).foreach(x =>
417    XSPerfAccumulate(s"data_array_rr_bank_conflict_${x}_${y}", rr_bank_conflict(x)(y))
418  ))
419  (0 until LoadPipelineWidth).foreach(i => {
420    XSPerfAccumulate(s"data_array_rrl_bank_conflict_${i}", rrl_bank_conflict(i))
421    XSPerfAccumulate(s"data_array_rw_bank_conflict_${i}", wr_bank_conflict(i))
422    XSPerfAccumulate(s"data_array_read_${i}", io.read(i).valid)
423  })
424  XSPerfAccumulate("data_array_access_total", PopCount(io.read.map(_.valid)))
425  XSPerfAccumulate("data_array_read_line", io.readline.valid)
426  XSPerfAccumulate("data_array_write", io.write.valid)
427
428  val read_result = Wire(Vec(DCacheSetDiv, Vec(DCacheBanks, Vec(DCacheWays,new L1BankedDataReadResult()))))
429  val read_result_delayed = Wire(Vec(DCacheSetDiv, Vec(DCacheBanks, Vec(DCacheWays,new L1BankedDataReadResult()))))
430  val read_error_delayed_result = Wire(Vec(DCacheSetDiv, Vec(DCacheBanks, Vec(DCacheWays, Bool()))))
431  dontTouch(read_result)
432  dontTouch(read_error_delayed_result)
433  for (div_index <- 0 until DCacheSetDiv){
434    for (bank_index <- 0 until DCacheBanks) {
435      for (way_index <- 0 until DCacheWays) {
436        //     Set Addr & Read Way Mask
437        //
438        //    Pipe 0   ....  Pipe (n-1)
439        //      +      ....     +
440        //      |      ....     |
441        // +----+---------------+-----+
442        //  X                        X
443        //   X                      +------+ Bank Addr Match
444        //    +---------+----------+
445        //              |
446        //     +--------+--------+
447        //     |    Data Bank    |
448        //     +-----------------+
449        val loadpipe_en = WireInit(VecInit(List.tabulate(LoadPipelineWidth)(i => {
450          io.read(i).valid && div_addrs(i) === div_index.U && (bank_addrs(i)(0) === bank_index.U || bank_addrs(i)(1) === bank_index.U && io.is128Req(i)) && way_en(i)(way_index)
451        })))
452        val readline_en = Wire(Bool())
453        if (ReduceReadlineConflict) {
454          readline_en := io.readline.valid && io.readline.bits.rmask(bank_index) && line_way_en(way_index) && div_index.U === line_div_addr
455        } else {
456          readline_en := io.readline.valid && line_way_en(way_index) && div_index.U === line_div_addr
457        }
458        val sram_set_addr = Mux(readline_en,
459          addr_to_dcache_div_set(io.readline.bits.addr),
460          PriorityMux(Seq.tabulate(LoadPipelineWidth)(i => loadpipe_en(i) -> set_addrs(i)))
461        )
462        val read_en = loadpipe_en.asUInt.orR || readline_en
463        // read raw data
464        val data_bank = data_banks(div_index)(bank_index)(way_index)
465        data_bank.io.r.en := read_en
466        data_bank.io.r.addr := sram_set_addr
467        ecc_banks match {
468          case Some(banks) =>
469            val ecc_bank = banks(div_index)(bank_index)(way_index)
470            ecc_bank.io.r.req.valid := read_en
471            ecc_bank.io.r.req.bits.apply(setIdx = sram_set_addr)
472            read_result(div_index)(bank_index)(way_index).ecc := ecc_bank.io.r.resp.data(0)
473          case None =>
474            read_result(div_index)(bank_index)(way_index).ecc := 0.U
475        }
476
477        read_result(div_index)(bank_index)(way_index).raw_data := data_bank.io.r.data
478        read_result_delayed(div_index)(bank_index)(way_index) := RegEnable(read_result(div_index)(bank_index)(way_index), RegNext(read_en))
479
480        // use ECC to check error
481        ecc_banks match {
482          case Some(_) =>
483            val ecc_data = read_result(div_index)(bank_index)(way_index).asECCData()
484            val ecc_data_delayed = RegEnable(ecc_data, RegNext(read_en))
485            read_result(div_index)(bank_index)(way_index).error_delayed := dcacheParameters.dataCode.decode(ecc_data_delayed).error
486            read_error_delayed_result(div_index)(bank_index)(way_index) := read_result(div_index)(bank_index)(way_index).error_delayed
487          case None =>
488            read_result(div_index)(bank_index)(way_index).error_delayed := false.B
489            read_error_delayed_result(div_index)(bank_index)(way_index) := false.B
490        }
491      }
492    }
493  }
494
495  val data_read_oh = WireInit(VecInit(Seq.fill(DCacheSetDiv * DCacheBanks * DCacheWays)(0.U(1.W))))
496  for(div_index <- 0 until DCacheSetDiv){
497    for (bank_index <- 0 until DCacheBanks) {
498      for (way_index <- 0 until DCacheWays) {
499        data_read_oh(div_index *  DCacheBanks * DCacheWays + bank_index * DCacheWays + way_index) := data_banks(div_index)(bank_index)(way_index).io.r.en
500      }
501    }
502  }
503  XSPerfAccumulate("data_read_counter", PopCount(Cat(data_read_oh)))
504
505  // read result: expose banked read result
506  // TODO: clock gate
507  (0 until LoadPipelineWidth).map(i => {
508    // io.read_resp(i) := read_result(RegNext(bank_addrs(i)))(RegNext(OHToUInt(way_en(i))))
509    val r_read_fire = RegNext(io.read(i).fire)
510    val r_div_addr  = RegEnable(div_addrs(i), io.read(i).fire)
511    val r_bank_addr = RegEnable(bank_addrs(i), io.read(i).fire)
512    val r_way_addr  = RegNext(OHToUInt(way_en(i)))
513    val rr_read_fire = RegNext(RegNext(io.read(i).fire))
514    val rr_div_addr = RegEnable(RegEnable(div_addrs(i), io.read(i).fire), r_read_fire)
515    val rr_bank_addr = RegEnable(RegEnable(bank_addrs(i), io.read(i).fire), r_read_fire)
516    val rr_way_addr = RegEnable(RegEnable(OHToUInt(way_en(i)), io.read(i).fire), r_read_fire)
517    (0 until VLEN/DCacheSRAMRowBits).map( j =>{
518      io.read_resp(i)(j) := read_result(r_div_addr)(r_bank_addr(j))(r_way_addr)
519      // error detection
520      // normal read ports
521      io.read_error_delayed(i)(j) := rr_read_fire && read_error_delayed_result(rr_div_addr)(rr_bank_addr(j))(rr_way_addr) && !RegNext(io.bank_conflict_slow(i))
522    })
523  })
524
525  // readline port
526  (0 until DCacheBanks).map(i => {
527    io.readline_resp(i) := read_result(RegEnable(line_div_addr, io.readline.valid))(i)(RegEnable(OHToUInt(io.readline.bits.way_en),io.readline.valid))
528  })
529  io.readline_error_delayed := RegNext(RegNext(io.readline.fire)) &&
530    VecInit((0 until DCacheBanks).map(i => io.readline_resp(i).error_delayed)).asUInt.orR
531
532  // write data_banks & ecc_banks
533  for (div_index <- 0 until DCacheSetDiv) {
534    for (bank_index <- 0 until DCacheBanks) {
535      for (way_index <- 0 until DCacheWays) {
536        // data write
537        val wen_reg = write_bank_mask_reg(bank_index) &&
538          write_valid_dup_reg(bank_index) &&
539          write_div_addr_dup_reg(bank_index) === div_index.U &&
540          write_wayen_dup_reg(bank_index)(way_index)
541        val data_bank = data_banks(div_index)(bank_index)(way_index)
542        data_bank.io.w.en := wen_reg
543
544        data_bank.io.w.addr := write_set_addr_dup_reg(bank_index)
545        data_bank.io.w.data := write_data_reg(bank_index)
546        // ecc write
547        ecc_banks match {
548          case Some(banks) =>
549            val ecc_bank = banks(div_index)(bank_index)(way_index)
550            ecc_bank.io.w.req.valid := wen_reg
551            ecc_bank.io.w.req.bits.apply(
552              setIdx = write_set_addr_dup_reg(bank_index),
553              data = RegEnable(getECCFromEncWord(cacheParams.dataCode.encode((io.write.bits.data(bank_index)))), io.write.valid),
554              waymask = 1.U
555            )
556            when(ecc_bank.io.w.req.valid) {
557              XSDebug("write in ecc sram: bank %x set %x data %x waymask %x\n",
558                bank_index.U,
559                addr_to_dcache_div_set(io.write.bits.addr),
560                getECCFromEncWord(cacheParams.dataCode.encode((io.write.bits.data(bank_index)))),
561                io.write.bits.way_en
562              )
563            }
564          case None => None
565        }
566      }
567    }
568  }
569
570  require(nWays <= 32)
571  io.cacheOp.resp.bits := DontCare
572  val cacheOpShouldResp = WireInit(false.B)
573  val eccReadResult = Wire(Vec(DCacheBanks, UInt(eccBits.W)))
574  // DCacheDupNum is 16
575  // vec: the dupIdx for every bank and every group
576  val rdata_dup_vec = Seq(0,0,1,1,2,2,3,3)
577  val rdataEcc_dup_vec = Seq(4,4,5,5,6,6,7,7)
578  val wdata_dup_vec = Seq(8,8,9,9,10,10,11,11)
579  val wdataEcc_dup_vec = Seq(12,12,13,13,14,14,15,15)
580  val cacheOpDivAddr = set_to_dcache_div(io.cacheOp.req.bits.index)
581  val cacheOpSetAddr = set_to_dcache_div_set(io.cacheOp.req.bits.index)
582  val cacheOpWayNum = io.cacheOp.req.bits.wayNum(4, 0)
583  rdata_dup_vec.zipWithIndex.map{ case(dupIdx, bankIdx) =>
584    for (divIdx <- 0 until DCacheSetDiv){
585      for (wayIdx <- 0 until DCacheWays) {
586        when(io.cacheOp_req_dup(dupIdx).valid && CacheInstrucion.isReadData(io.cacheOp_req_bits_opCode_dup(dupIdx))) {
587          val data_bank = data_banks(divIdx)(bankIdx)(wayIdx)
588          data_bank.io.r.en := UIntToOH(io.cacheOp.req.bits.wayNum(4, 0))(wayIdx) && cacheOpDivAddr === divIdx.U
589          data_bank.io.r.addr := cacheOpSetAddr
590          cacheOpShouldResp := true.B
591        }
592      }
593    }
594  }
595  rdataEcc_dup_vec.zipWithIndex.map{ case(dupIdx, bankIdx) =>
596    for (divIdx <- 0 until DCacheSetDiv) {
597      for (wayIdx <- 0 until DCacheWays) {
598        when(io.cacheOp_req_dup(dupIdx).valid && CacheInstrucion.isReadDataECC(io.cacheOp_req_bits_opCode_dup(dupIdx))) {
599          ecc_banks match {
600            case Some(banks) =>
601              val ecc_bank = banks(divIdx)(bankIdx)(wayIdx)
602              ecc_bank.io.r.req.valid := true.B
603              ecc_bank.io.r.req.bits.setIdx := cacheOpSetAddr
604              cacheOpShouldResp := true.B
605            case None =>
606              cacheOpShouldResp := true.B
607          }
608        }
609      }
610    }
611  }
612  wdata_dup_vec.zipWithIndex.map{ case(dupIdx, bankIdx) =>
613    for (divIdx <- 0 until DCacheSetDiv) {
614      for (wayIdx <- 0 until DCacheWays) {
615        when(io.cacheOp_req_dup(dupIdx).valid && CacheInstrucion.isWriteData(io.cacheOp_req_bits_opCode_dup(dupIdx))) {
616          val data_bank = data_banks(divIdx)(bankIdx)(wayIdx)
617          data_bank.io.w.en := UIntToOH(io.cacheOp.req.bits.wayNum(4, 0))(wayIdx) && cacheOpDivAddr === divIdx.U
618          data_bank.io.w.addr := cacheOpSetAddr
619          data_bank.io.w.data := io.cacheOp.req.bits.write_data_vec(bankIdx)
620          cacheOpShouldResp := true.B
621        }
622      }
623    }
624  }
625  wdataEcc_dup_vec.zipWithIndex.map{ case(dupIdx, bankIdx) =>
626    for (divIdx <- 0 until DCacheSetDiv) {
627      for (wayIdx <- 0 until DCacheWays) {
628        when(io.cacheOp_req_dup(dupIdx).valid && CacheInstrucion.isWriteDataECC(io.cacheOp_req_bits_opCode_dup(dupIdx))) {
629          ecc_banks match {
630            case Some(banks) =>
631              val ecc_bank = banks(divIdx)(bankIdx)(wayIdx)
632              ecc_bank.io.w.req.valid := UIntToOH(io.cacheOp.req.bits.wayNum(4, 0))(wayIdx) && cacheOpDivAddr === divIdx.U
633              ecc_bank.io.w.req.bits.apply(
634                setIdx = cacheOpSetAddr,
635                data = io.cacheOp.req.bits.write_data_ecc,
636                waymask = 1.U
637              )
638              cacheOpShouldResp := true.B
639            case None =>
640              cacheOpShouldResp := true.B
641          }
642        }
643      }
644    }
645  }
646  io.cacheOp.resp.valid := RegNext(io.cacheOp.req.valid && cacheOpShouldResp)
647  for (bank_index <- 0 until DCacheBanks) {
648    val cacheOpDivAddrReg = RegEnable(cacheOpDivAddr, io.cacheOp.req.valid)
649    val cacheOpWayNumDivAddrReg = RegEnable(cacheOpWayNum, io.cacheOp.req.valid)
650    io.cacheOp.resp.bits.read_data_vec(bank_index) := read_result(cacheOpDivAddrReg)(bank_index)(cacheOpWayNumDivAddrReg).raw_data
651    eccReadResult(bank_index) := read_result(cacheOpDivAddrReg)(bank_index)(cacheOpWayNumDivAddrReg).ecc
652  }
653
654  io.cacheOp.resp.bits.read_data_ecc := Mux(io.cacheOp.resp.valid,
655    eccReadResult(RegEnable(io.cacheOp.req.bits.bank_num, io.cacheOp.req.valid)),
656    0.U
657  )
658
659  val tableName =  "BankConflict" + p(XSCoreParamsKey).HartId.toString
660  val siteName = "BankedDataArray" + p(XSCoreParamsKey).HartId.toString
661  val bankConflictTable = ChiselDB.createTable(tableName, new BankConflictDB)
662  val bankConflictData = Wire(new BankConflictDB)
663  for (i <- 0 until LoadPipelineWidth) {
664    bankConflictData.set_index(i) := set_addrs(i)
665    bankConflictData.addr(i) := io.read(i).bits.addr
666  }
667
668  // FIXME: rr_bank_conflict(0)(1) no generalization
669  when(rr_bank_conflict(0)(1)) {
670    (0 until (VLEN/DCacheSRAMRowBits)).map(i => {
671      bankConflictData.bank_index(i) := bank_addrs(0)(i)
672    })
673    bankConflictData.way_index  := OHToUInt(way_en(0))
674    bankConflictData.fake_rr_bank_conflict := set_addrs(0) === set_addrs(1) && div_addrs(0) === div_addrs(1)
675  }.otherwise {
676    (0 until (VLEN/DCacheSRAMRowBits)).map(i => {
677      bankConflictData.bank_index(i) := 0.U
678    })
679    bankConflictData.way_index := 0.U
680    bankConflictData.fake_rr_bank_conflict := false.B
681  }
682
683  val isWriteBankConflictTable = Constantin.createRecord(s"isWriteBankConflictTable${p(XSCoreParamsKey).HartId}")
684  bankConflictTable.log(
685    data = bankConflictData,
686    en = isWriteBankConflictTable.orR && rr_bank_conflict(0)(1),
687    site = siteName,
688    clock = clock,
689    reset = reset
690  )
691
692  (1 until LoadPipelineWidth).foreach(y => (0 until y).foreach(x =>
693    XSPerfAccumulate(s"data_array_fake_rr_bank_conflict_${x}_${y}", rr_bank_conflict(x)(y) && set_addrs(x)===set_addrs(y) && div_addrs(x) === div_addrs(y))
694  ))
695
696}
697
698// the smallest access unit is bank
699class BankedDataArray(implicit p: Parameters) extends AbstractBankedDataArray {
700  println("  DCacheType: BankedDataArray")
701  val ReduceReadlineConflict = false
702
703  io.write.ready := true.B
704  io.write_dup.foreach(_.ready := true.B)
705
706  val data_banks = List.fill(DCacheSetDiv)(List.tabulate(DCacheBanks)(i => Module(new DataSRAMBank(i))))
707  val ecc_banks = DataEccParam.map {
708    case _ =>
709      val ecc = List.fill(DCacheSetDiv)(List.fill(DCacheBanks)(
710        Module(new SRAMTemplate(
711          Bits(eccBits.W),
712          set = DCacheSets / DCacheSetDiv,
713          way = DCacheWays,
714          shouldReset = false,
715          holdRead = false,
716          singlePort = true
717        ))
718      ))
719      ecc
720  }
721
722  data_banks.map(_.map(_.dump()))
723
724  val way_en = Wire(Vec(LoadPipelineWidth, io.read(0).bits.way_en.cloneType))
725  val set_addrs = Wire(Vec(LoadPipelineWidth, UInt()))
726  val div_addrs = Wire(Vec(LoadPipelineWidth, UInt()))
727  val bank_addrs = Wire(Vec(LoadPipelineWidth, Vec(VLEN/DCacheSRAMRowBits, UInt())))
728  val way_en_reg = Wire(Vec(LoadPipelineWidth, io.read(0).bits.way_en.cloneType))
729  val set_addrs_reg = Wire(Vec(LoadPipelineWidth, UInt()))
730
731  val line_set_addr = addr_to_dcache_div_set(io.readline.bits.addr)
732  val line_div_addr = addr_to_dcache_div(io.readline.bits.addr)
733  val line_way_en = io.readline.bits.way_en
734
735  val write_bank_mask_reg = RegEnable(io.write.bits.wmask, io.write.valid)
736  val write_data_reg = RegEnable(io.write.bits.data, io.write.valid)
737  val write_valid_reg = RegNext(io.write.valid)
738  val write_valid_dup_reg = io.write_dup.map(x => RegNext(x.valid))
739  val write_wayen_dup_reg = io.write_dup.map(x => RegEnable(x.bits.way_en, x.valid))
740  val write_set_addr_dup_reg = io.write_dup.map(x => RegEnable(addr_to_dcache_div_set(x.bits.addr), x.valid))
741  val write_div_addr_dup_reg = io.write_dup.map(x => RegEnable(addr_to_dcache_div(x.bits.addr), x.valid))
742
743  // read data_banks and ecc_banks
744  // for single port SRAM, do not allow read and write in the same cycle
745  val rwhazard = RegNext(io.write.valid)
746  val rrhazard = false.B // io.readline.valid
747  (0 until LoadPipelineWidth).map(rport_index => {
748    div_addrs(rport_index) := addr_to_dcache_div(io.read(rport_index).bits.addr)
749    bank_addrs(rport_index)(0) := addr_to_dcache_bank(io.read(rport_index).bits.addr)
750    bank_addrs(rport_index)(1) := Mux(io.is128Req(rport_index), bank_addrs(rport_index)(0) + 1.U, bank_addrs(rport_index)(0))
751    set_addrs(rport_index) := addr_to_dcache_div_set(io.read(rport_index).bits.addr)
752    set_addrs_reg(rport_index) := RegEnable(addr_to_dcache_div_set(io.read(rport_index).bits.addr), io.read(rport_index).valid)
753
754    // use way_en to select a way after data read out
755    assert(!(RegNext(io.read(rport_index).fire && PopCount(io.read(rport_index).bits.way_en) > 1.U)))
756    way_en(rport_index) := io.read(rport_index).bits.way_en
757    way_en_reg(rport_index) := RegEnable(io.read(rport_index).bits.way_en, io.read(rport_index).valid)
758  })
759
760  // read each bank, get bank result
761  val rr_bank_conflict = Seq.tabulate(LoadPipelineWidth)(x => Seq.tabulate(LoadPipelineWidth)(y =>
762    io.read(x).valid && io.read(y).valid &&
763    div_addrs(x) === div_addrs(y) &&
764    (io.read(x).bits.bankMask & io.read(y).bits.bankMask) =/= 0.U
765  ))
766  val rrl_bank_conflict = Wire(Vec(LoadPipelineWidth, Bool()))
767  val rrl_bank_conflict_intend = Wire(Vec(LoadPipelineWidth, Bool()))
768  (0 until LoadPipelineWidth).foreach { i =>
769    val judge = if (ReduceReadlineConflict) io.read(i).valid && (io.readline.bits.rmask & io.read(i).bits.bankMask) =/= 0.U && div_addrs(i) === line_div_addr
770                else io.read(i).valid && div_addrs(i)===line_div_addr
771    rrl_bank_conflict(i) := judge && io.readline.valid
772    rrl_bank_conflict_intend(i) := judge && io.readline_intend
773  }
774  val wr_bank_conflict = Seq.tabulate(LoadPipelineWidth)(x =>
775    io.read(x).valid &&
776    write_valid_reg &&
777    div_addrs(x) === write_div_addr_dup_reg.head &&
778    (write_bank_mask_reg(bank_addrs(x)(0)) || write_bank_mask_reg(bank_addrs(x)(1)) && io.is128Req(x))
779  )
780  val wrl_bank_conflict = io.readline.valid && write_valid_reg && line_div_addr === write_div_addr_dup_reg.head
781  // ready
782  io.readline.ready := !(wrl_bank_conflict)
783  io.read.zipWithIndex.map{case(x, i) => x.ready := !(wr_bank_conflict(i) || rrhazard)}
784
785  val perf_multi_read = PopCount(io.read.map(_.valid)) >= 2.U
786  (0 until LoadPipelineWidth).foreach(i => {
787    // remove fake rr_bank_conflict situation in s2
788    val real_other_bank_conflict_reg = RegNext(wr_bank_conflict(i) || rrl_bank_conflict(i))
789    val real_rr_bank_conflict_reg = (if (i == 0) 0.B else (0 until i).map{ j =>
790      RegNext(rr_bank_conflict(j)(i)) && (set_addrs_reg(j) =/= set_addrs_reg(i))
791    }.reduce(_ || _))
792    io.bank_conflict_slow(i) := real_other_bank_conflict_reg || real_rr_bank_conflict_reg
793
794    // get result in s1
795    io.disable_ld_fast_wakeup(i) := wr_bank_conflict(i) || rrl_bank_conflict_intend(i) ||
796      (if (i == 0) 0.B else (0 until i).map(rr_bank_conflict(_)(i)).reduce(_ || _))
797  })
798  XSPerfAccumulate("data_array_multi_read", perf_multi_read)
799  (1 until LoadPipelineWidth).foreach(y => (0 until y).foreach(x =>
800    XSPerfAccumulate(s"data_array_rr_bank_conflict_${x}_${y}", rr_bank_conflict(x)(y))
801  ))
802  (0 until LoadPipelineWidth).foreach(i => {
803    XSPerfAccumulate(s"data_array_rrl_bank_conflict_${i}", rrl_bank_conflict(i))
804    XSPerfAccumulate(s"data_array_rw_bank_conflict_${i}", wr_bank_conflict(i))
805    XSPerfAccumulate(s"data_array_read_${i}", io.read(i).valid)
806  })
807  XSPerfAccumulate("data_array_access_total", PopCount(io.read.map(_.valid)))
808  XSPerfAccumulate("data_array_read_line", io.readline.valid)
809  XSPerfAccumulate("data_array_write", io.write.valid)
810
811  val bank_result = Wire(Vec(DCacheSetDiv, Vec(DCacheBanks, Vec(DCacheWays, new L1BankedDataReadResult()))))
812  val bank_result_delayed = Wire(Vec(DCacheSetDiv, Vec(DCacheBanks, Vec(DCacheWays, new L1BankedDataReadResult()))))
813  val ecc_result = Wire(Vec(DCacheSetDiv, Vec(DCacheBanks, Vec(DCacheWays, UInt(eccBits.W)))))
814  val read_bank_error_delayed = Wire(Vec(DCacheSetDiv, Vec(DCacheBanks, Vec(DCacheWays, Bool()))))
815  dontTouch(bank_result)
816  dontTouch(read_bank_error_delayed)
817  for (div_index <- 0 until DCacheSetDiv) {
818    for (bank_index <- 0 until DCacheBanks) {
819      //     Set Addr & Read Way Mask
820      //
821      //    Pipe 0   ....  Pipe (n-1)
822      //      +      ....     +
823      //      |      ....     |
824      // +----+---------------+-----+
825      //  X                        X
826      //   X                      +------+ Bank Addr Match
827      //    +---------+----------+
828      //              |
829      //     +--------+--------+
830      //     |    Data Bank    |
831      //     +-----------------+
832      val bank_addr_matchs = WireInit(VecInit(List.tabulate(LoadPipelineWidth)(i => {
833        io.read(i).valid && div_addrs(i) === div_index.U && (bank_addrs(i)(0) === bank_index.U || bank_addrs(i)(1) === bank_index.U && io.is128Req(i))
834      })))
835      val readline_match = Wire(Bool())
836      if (ReduceReadlineConflict) {
837        readline_match := io.readline.valid && io.readline.bits.rmask(bank_index) && line_div_addr === div_index.U
838      } else {
839        readline_match := io.readline.valid && line_div_addr === div_index.U
840      }
841
842      val bank_set_addr = Mux(readline_match,
843        line_set_addr,
844        PriorityMux(Seq.tabulate(LoadPipelineWidth)(i => bank_addr_matchs(i) -> set_addrs(i)))
845      )
846      val read_enable = bank_addr_matchs.asUInt.orR || readline_match
847
848      // read raw data
849      val data_bank = data_banks(div_index)(bank_index)
850      data_bank.io.r.en := read_enable
851      data_bank.io.r.addr := bank_set_addr
852      for (way_index <- 0 until DCacheWays) {
853        bank_result(div_index)(bank_index)(way_index).raw_data := data_bank.io.r.data(way_index)
854        bank_result_delayed(div_index)(bank_index)(way_index) := RegEnable(bank_result(div_index)(bank_index)(way_index), RegNext(read_enable))
855      }
856
857      // read ECC
858      ecc_banks match {
859        case Some(banks) =>
860          val ecc_bank = banks(div_index)(bank_index)
861          ecc_bank.io.r.req.valid := read_enable
862          ecc_bank.io.r.req.bits.apply(setIdx = bank_set_addr)
863          ecc_result(div_index)(bank_index) := ecc_bank.io.r.resp.data
864          for (way_index <- 0 until DCacheWays) {
865            bank_result(div_index)(bank_index)(way_index).ecc := ecc_bank.io.r.resp.data(way_index)
866          }
867        case None =>
868          ecc_result(div_index)(bank_index) := DontCare
869          for (way_index <- 0 until DCacheWays) {
870            bank_result(div_index)(bank_index)(way_index).ecc := DontCare
871          }
872      }
873
874      // use ECC to check error
875      ecc_banks match {
876        case Some(_) =>
877          for (way_index <- 0 until DCacheWays) {
878            val ecc_data = bank_result(div_index)(bank_index)(way_index).asECCData()
879            val ecc_data_delayed = RegEnable(ecc_data, RegNext(read_enable))
880            bank_result(div_index)(bank_index)(way_index).error_delayed := dcacheParameters.dataCode.decode(ecc_data_delayed).error
881            read_bank_error_delayed(div_index)(bank_index)(way_index) := bank_result(div_index)(bank_index)(way_index).error_delayed
882          }
883        case None =>
884          for (way_index <- 0 until DCacheWays) {
885            bank_result(div_index)(bank_index)(way_index).error_delayed := false.B
886            read_bank_error_delayed(div_index)(bank_index)(way_index) := false.B
887          }
888      }
889    }
890  }
891
892  val data_read_oh = WireInit(VecInit(Seq.fill(DCacheSetDiv)(0.U(XLEN.W))))
893  for (div_index <- 0 until DCacheSetDiv){
894    val temp = WireInit(VecInit(Seq.fill(DCacheBanks)(0.U(XLEN.W))))
895    for (bank_index <- 0 until DCacheBanks) {
896      temp(bank_index) := PopCount(Fill(DCacheWays, data_banks(div_index)(bank_index).io.r.en.asUInt))
897    }
898    data_read_oh(div_index) := temp.reduce(_ + _)
899  }
900  XSPerfAccumulate("data_read_counter", data_read_oh.foldLeft(0.U)(_ + _))
901
902  (0 until LoadPipelineWidth).map(i => {
903    // 1 cycle after read fire(load s2)
904    val r_read_fire = RegNext(io.read(i).fire)
905    val r_div_addr = RegEnable(div_addrs(i), io.read(i).fire)
906    val r_bank_addr = RegEnable(bank_addrs(i), io.read(i).fire)
907    val r_way_addr = RegEnable(OHToUInt(way_en(i)), io.read(i).fire)
908    // 2 cycles after read fire(load s3)
909    val rr_read_fire = RegNext(r_read_fire)
910    val rr_div_addr = RegEnable(RegEnable(div_addrs(i), io.read(i).fire), r_read_fire)
911    val rr_bank_addr = RegEnable(RegEnable(bank_addrs(i), io.read(i).fire), r_read_fire)
912    val rr_way_addr = RegEnable(RegEnable(OHToUInt(way_en(i)), io.read(i).fire), r_read_fire)
913    (0 until VLEN/DCacheSRAMRowBits).map( j =>{
914      io.read_resp(i)(j)          := bank_result(r_div_addr)(r_bank_addr(j))(r_way_addr)
915      // error detection
916      io.read_error_delayed(i)(j) := rr_read_fire && read_bank_error_delayed(rr_div_addr)(rr_bank_addr(j))(rr_way_addr) && !RegNext(io.bank_conflict_slow(i))
917    })
918  })
919
920  // read result: expose banked read result
921  (0 until DCacheBanks).map(i => {
922    io.readline_resp(i) := bank_result(RegEnable(line_div_addr, io.readline.valid))(i)(RegEnable(OHToUInt(io.readline.bits.way_en), io.readline.valid))
923  })
924  io.readline_error_delayed := RegNext(RegNext(io.readline.fire)) &&
925    VecInit((0 until DCacheBanks).map(i => io.readline_resp(i).error_delayed)).asUInt.orR
926
927  // write data_banks & ecc_banks
928  for (div_index <- 0 until DCacheSetDiv) {
929    for (bank_index <- 0 until DCacheBanks) {
930      // data write
931      val wen_reg = write_bank_mask_reg(bank_index) &&
932        write_valid_dup_reg(bank_index) &&
933        write_div_addr_dup_reg(bank_index) === div_index.U && RegNext(io.write.valid)
934      val data_bank = data_banks(div_index)(bank_index)
935      data_bank.io.w.en := wen_reg
936      data_bank.io.w.way_en := write_wayen_dup_reg(bank_index)
937      data_bank.io.w.addr := write_set_addr_dup_reg(bank_index)
938      data_bank.io.w.data := write_data_reg(bank_index)
939
940      // ecc write
941      ecc_banks match {
942        case Some(banks) =>
943          val ecc_bank = banks(div_index)(bank_index)
944          ecc_bank.io.w.req.valid := wen_reg
945          ecc_bank.io.w.req.bits.apply(
946            setIdx = write_set_addr_dup_reg(bank_index),
947            data = RegEnable(getECCFromEncWord(cacheParams.dataCode.encode((io.write.bits.data(bank_index)))), io.write.valid),
948            waymask = write_wayen_dup_reg(bank_index)
949          )
950          when(ecc_bank.io.w.req.valid) {
951            XSDebug("write in ecc sram: bank %x set %x data %x waymask %x\n",
952              bank_index.U,
953              addr_to_dcache_div_set(io.write.bits.addr),
954              getECCFromEncWord(cacheParams.dataCode.encode((io.write.bits.data(bank_index)))),
955              io.write.bits.way_en
956            )
957          }
958        case None => None
959      }
960    }
961  }
962
963  // deal with customized cache op
964  require(nWays <= 32)
965  io.cacheOp.resp.bits := DontCare
966  val cacheOpShouldResp = WireInit(false.B)
967  val eccReadResult = Wire(Vec(DCacheBanks, UInt(eccBits.W)))
968  // DCacheDupNum is 16
969  // vec: the dupIdx for every bank and every group
970  val rdata_dup_vec = Seq(0, 0, 1, 1, 2, 2, 3, 3)
971  val rdataEcc_dup_vec = Seq(4, 4, 5, 5, 6, 6, 7, 7)
972  val wdata_dup_vec = Seq(8, 8, 9, 9, 10, 10, 11, 11)
973  val wdataEcc_dup_vec = Seq(12, 12, 13, 13, 14, 14, 15, 15)
974  val cacheOpDivAddr = set_to_dcache_div(io.cacheOp.req.bits.index)
975  val cacheOpSetAddr = set_to_dcache_div_set(io.cacheOp.req.bits.index)
976  val cacheOpWayMask = UIntToOH(io.cacheOp.req.bits.wayNum(4, 0))
977  rdata_dup_vec.zipWithIndex.map{ case(dupIdx, bankIdx) =>
978    for (divIdx <- 0 until DCacheSetDiv) {
979      when(io.cacheOp_req_dup(dupIdx).valid && CacheInstrucion.isReadData(io.cacheOp_req_bits_opCode_dup(dupIdx))) {
980        val data_bank = data_banks(divIdx)(bankIdx)
981        data_bank.io.r.en := true.B
982        data_bank.io.r.addr := cacheOpSetAddr
983        cacheOpShouldResp := true.B
984      }
985    }
986  }
987  rdataEcc_dup_vec.zipWithIndex.map{ case(dupIdx, bankIdx) =>
988    for (divIdx <- 0 until DCacheSetDiv) {
989      when(io.cacheOp_req_dup(dupIdx).valid && CacheInstrucion.isReadDataECC(io.cacheOp_req_bits_opCode_dup(dupIdx))) {
990        ecc_banks match {
991          case Some(banks) =>
992            val ecc_bank = banks(divIdx)(bankIdx)
993            ecc_bank.io.r.req.valid := true.B
994            ecc_bank.io.r.req.bits.setIdx := cacheOpSetAddr
995            cacheOpShouldResp := true.B
996          case None =>
997            cacheOpShouldResp := true.B
998        }
999      }
1000    }
1001  }
1002  wdata_dup_vec.zipWithIndex.map{ case(dupIdx, bankIdx) =>
1003    for (divIdx <- 0 until DCacheSetDiv) {
1004      when(io.cacheOp_req_dup(dupIdx).valid && CacheInstrucion.isWriteData(io.cacheOp_req_bits_opCode_dup(dupIdx))) {
1005        val data_bank = data_banks(divIdx)(bankIdx)
1006        data_bank.io.w.en := cacheOpDivAddr === divIdx.U
1007        data_bank.io.w.way_en := cacheOpWayMask
1008        data_bank.io.w.addr := cacheOpSetAddr
1009        data_bank.io.w.data := io.cacheOp.req.bits.write_data_vec(bankIdx)
1010        cacheOpShouldResp := true.B
1011      }
1012    }
1013  }
1014  wdataEcc_dup_vec.zipWithIndex.map{ case(dupIdx, bankIdx) =>
1015    for (divIdx <- 0 until DCacheSetDiv) {
1016      when(io.cacheOp_req_dup(dupIdx).valid && CacheInstrucion.isWriteDataECC(io.cacheOp_req_bits_opCode_dup(dupIdx))) {
1017        ecc_banks match {
1018          case Some(banks) =>
1019            val ecc_bank = banks(divIdx)(bankIdx)
1020            ecc_bank.io.w.req.valid := cacheOpDivAddr === divIdx.U
1021            ecc_bank.io.w.req.bits.apply(
1022              setIdx = cacheOpSetAddr,
1023              data = io.cacheOp.req.bits.write_data_ecc,
1024              waymask = cacheOpWayMask
1025            )
1026            cacheOpShouldResp := true.B
1027          case None =>
1028            cacheOpShouldResp := true.B
1029        }
1030      }
1031    }
1032  }
1033
1034  io.cacheOp.resp.valid := RegNext(io.cacheOp.req.valid && cacheOpShouldResp)
1035  for (bank_index <- 0 until DCacheBanks) {
1036    val cacheOpDivAddrReg = RegEnable(cacheOpDivAddr, io.cacheOp.req.valid)
1037    val cacheOpWayMaskReg = RegEnable(cacheOpWayMask, io.cacheOp.req.valid)
1038    io.cacheOp.resp.bits.read_data_vec(bank_index) := bank_result(cacheOpDivAddrReg)(bank_index)(cacheOpWayMaskReg).raw_data
1039    eccReadResult(bank_index) := Mux1H(cacheOpWayMaskReg, ecc_result(cacheOpDivAddrReg)(bank_index))
1040  }
1041
1042  io.cacheOp.resp.bits.read_data_ecc := Mux(io.cacheOp.resp.valid,
1043    eccReadResult(RegEnable(io.cacheOp.req.bits.bank_num, io.cacheOp.req.valid)),
1044    0.U
1045  )
1046
1047  val tableName = "BankConflict" + p(XSCoreParamsKey).HartId.toString
1048  val siteName = "BankedDataArray" + p(XSCoreParamsKey).HartId.toString
1049  val bankConflictTable = ChiselDB.createTable(tableName, new BankConflictDB)
1050  val bankConflictData = Wire(new BankConflictDB)
1051  for (i <- 0 until LoadPipelineWidth) {
1052    bankConflictData.set_index(i) := set_addrs(i)
1053    bankConflictData.addr(i) := io.read(i).bits.addr
1054  }
1055
1056  // FIXME: rr_bank_conflict(0)(1) no generalization
1057  when(rr_bank_conflict(0)(1)) {
1058    (0 until (VLEN/DCacheSRAMRowBits)).map(i => {
1059      bankConflictData.bank_index(i) := bank_addrs(0)(i)
1060    })
1061    bankConflictData.way_index := OHToUInt(way_en(0))
1062    bankConflictData.fake_rr_bank_conflict := set_addrs(0) === set_addrs(1) && div_addrs(0) === div_addrs(1)
1063  }.otherwise {
1064    (0 until (VLEN/DCacheSRAMRowBits)).map(i => {
1065      bankConflictData.bank_index(i) := 0.U
1066    })
1067    bankConflictData.way_index := 0.U
1068    bankConflictData.fake_rr_bank_conflict := false.B
1069  }
1070
1071  val isWriteBankConflictTable = Constantin.createRecord(s"isWriteBankConflictTable${p(XSCoreParamsKey).HartId}")
1072  bankConflictTable.log(
1073    data = bankConflictData,
1074    en = isWriteBankConflictTable.orR && rr_bank_conflict(0)(1),
1075    site = siteName,
1076    clock = clock,
1077    reset = reset
1078  )
1079
1080  (1 until LoadPipelineWidth).foreach(y => (0 until y).foreach(x =>
1081    XSPerfAccumulate(s"data_array_fake_rr_bank_conflict_${x}_${y}", rr_bank_conflict(x)(y) && set_addrs(x) === set_addrs(y) && div_addrs(x) === div_addrs(y))
1082  ))
1083
1084}
1085