xref: /XiangShan/src/main/scala/xiangshan/cache/dcache/data/BankedDataArray.scala (revision 881e32f5b63c435bafbaf5dc1d792ffcc9ea103e)
1/***************************************************************************************
2* Copyright (c) 2024 Beijing Institute of Open Source Chip (BOSC)
3* Copyright (c) 2020-2024 Institute of Computing Technology, Chinese Academy of Sciences
4* Copyright (c) 2020-2021 Peng Cheng Laboratory
5*
6* XiangShan is licensed under Mulan PSL v2.
7* You can use this software according to the terms and conditions of the Mulan PSL v2.
8* You may obtain a copy of Mulan PSL v2 at:
9*          http://license.coscl.org.cn/MulanPSL2
10*
11* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
12* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
13* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
14*
15* See the Mulan PSL v2 for more details.
16*
17*
18* Acknowledgement
19*
20* This implementation is inspired by several key papers:
21* [1] Gurindar S. Sohi, and Manoj Franklin. "[High-bandwidth data memory systems for superscalar processors.]
22* (https://doi.org/10.1145/106972.106980)" 4th International Conference on Architectural Support for Programming
23* Languages and Operating Systems (ASPLOS). 1991.
24***************************************************************************************/
25
26package xiangshan.cache
27
28import org.chipsalliance.cde.config.Parameters
29import chisel3._
30import utils._
31import utility._
32import chisel3.util._
33import freechips.rocketchip.tilelink.{ClientMetadata, TLClientParameters, TLEdgeOut}
34import xiangshan.mem.LqPtr
35import xiangshan.{L1CacheErrorInfo, XSCoreParamsKey}
36
37import scala.math.max
38
39class BankConflictDB(implicit p: Parameters) extends DCacheBundle{
40  val addr = Vec(LoadPipelineWidth, Bits(PAddrBits.W))
41  val set_index = Vec(LoadPipelineWidth, UInt((DCacheAboveIndexOffset - DCacheSetOffset).W))
42  val bank_index = Vec(VLEN/DCacheSRAMRowBits, UInt((DCacheSetOffset - DCacheBankOffset).W))
43  val way_index = UInt(wayBits.W)
44  val fake_rr_bank_conflict = Bool()
45}
46
47class L1BankedDataReadReq(implicit p: Parameters) extends DCacheBundle
48{
49  val way_en = Bits(DCacheWays.W)
50  val addr = Bits(PAddrBits.W)
51}
52
53class L1BankedDataReadReqWithMask(implicit p: Parameters) extends DCacheBundle
54{
55  val way_en = Bits(DCacheWays.W)
56  val addr = Bits(PAddrBits.W)
57  val addr_dup = Bits(PAddrBits.W)
58  val bankMask = Bits(DCacheBanks.W)
59  val kill = Bool()
60  val lqIdx = new LqPtr
61}
62
63class L1BankedDataReadLineReq(implicit p: Parameters) extends L1BankedDataReadReq
64{
65  val rmask = Bits(DCacheBanks.W)
66}
67
68// Now, we can write a cache-block in a single cycle
69class L1BankedDataWriteReq(implicit p: Parameters) extends L1BankedDataReadReq
70{
71  val wmask = Bits(DCacheBanks.W)
72  val data = Vec(DCacheBanks, Bits(DCacheSRAMRowBits.W))
73}
74
75// cache-block write request without data
76class L1BankedDataWriteReqCtrl(implicit p: Parameters) extends L1BankedDataReadReq
77
78class L1BankedDataReadResult(implicit p: Parameters) extends DCacheBundle
79{
80  // you can choose which bank to read to save power
81  val ecc = Bits(dataECCBits.W)
82  val raw_data = Bits(DCacheSRAMRowBits.W)
83  val error_delayed = Bool() // 1 cycle later than data resp
84
85  def asECCData() = {
86    Cat(ecc, raw_data)
87  }
88}
89
90class DataSRAMBankWriteReq(implicit p: Parameters) extends DCacheBundle {
91  val en = Bool()
92  val addr = UInt()
93  val way_en = UInt(DCacheWays.W)
94  val data = UInt(encDataBits.W)
95}
96
97// wrap a sram
98class DataSRAM(bankIdx: Int, wayIdx: Int)(implicit p: Parameters) extends DCacheModule {
99  val io = IO(new Bundle() {
100    val w = new Bundle() {
101      val en = Input(Bool())
102      val addr = Input(UInt())
103      val data = Input(UInt(encDataBits.W))
104    }
105
106    val r = new Bundle() {
107      val en = Input(Bool())
108      val addr = Input(UInt())
109      val data = Output(UInt(encDataBits.W))
110    }
111  })
112
113  // data sram
114  val data_sram = Module(new SRAMTemplate(
115    Bits(encDataBits.W),
116    set = DCacheSets / DCacheSetDiv,
117    way = 1,
118    shouldReset = false,
119    holdRead = false,
120    singlePort = true
121  ))
122
123  data_sram.io.w.req.valid := io.w.en
124  data_sram.io.w.req.bits.apply(
125    setIdx = io.w.addr,
126    data = io.w.data,
127    waymask = 1.U
128  )
129  data_sram.io.r.req.valid := io.r.en
130  data_sram.io.r.req.bits.apply(setIdx = io.r.addr)
131  io.r.data := data_sram.io.r.resp.data(0)
132  XSPerfAccumulate("part_data_read_counter", data_sram.io.r.req.valid)
133
134  def dump_r() = {
135    XSDebug(RegNext(io.r.en),
136      "bank read set %x bank %x way %x data %x\n",
137      RegEnable(io.r.addr, io.r.en),
138      bankIdx.U,
139      wayIdx.U,
140      io.r.data
141    )
142  }
143
144  def dump_w() = {
145    XSDebug(io.w.en,
146      "bank write set %x bank %x way %x data %x\n",
147      io.w.addr,
148      bankIdx.U,
149      wayIdx.U,
150      io.w.data
151    )
152  }
153
154  def dump() = {
155    dump_w()
156    dump_r()
157  }
158}
159
160// wrap data rows of 8 ways
161class DataSRAMBank(index: Int)(implicit p: Parameters) extends DCacheModule {
162  val io = IO(new Bundle() {
163    val w = Input(new DataSRAMBankWriteReq)
164
165    val r = new Bundle() {
166      val en = Input(Bool())
167      val addr = Input(UInt())
168      val data = Output(Vec(DCacheWays, UInt(encDataBits.W)))
169    }
170  })
171
172  assert(RegNext(!io.w.en || PopCount(io.w.way_en) <= 1.U))
173
174  // external controls do not read and write at the same time
175  val w_info = io.w
176  // val rw_bypass = RegNext(io.w.addr === io.r.addr && io.w.way_en === io.r.way_en && io.w.en)
177
178  // multiway data bank
179  val data_bank = Seq.fill(DCacheWays) {
180    Module(new SRAMTemplate(
181      Bits(encDataBits.W),
182      set = DCacheSets / DCacheSetDiv,
183      way = 1,
184      shouldReset = false,
185      holdRead = false,
186      singlePort = true,
187      withClockGate = true
188    ))
189  }
190
191  for (w <- 0 until DCacheWays) {
192    val wen = w_info.en && w_info.way_en(w)
193    data_bank(w).io.w.req.valid := wen
194    data_bank(w).io.w.req.bits.apply(
195      setIdx = w_info.addr,
196      data = w_info.data,
197      waymask = 1.U
198    )
199    data_bank(w).io.r.req.valid := io.r.en
200    data_bank(w).io.r.req.bits.apply(setIdx = io.r.addr)
201  }
202  XSPerfAccumulate("part_data_read_counter", PopCount(Cat(data_bank.map(_.io.r.req.valid))))
203
204  io.r.data := data_bank.map(_.io.r.resp.data(0))
205
206  def dump_r() = {
207    XSDebug(RegNext(io.r.en),
208      "bank read addr %x data %x\n",
209      RegEnable(io.r.addr, io.r.en),
210      io.r.data.asUInt
211    )
212  }
213
214  def dump_w() = {
215    XSDebug(io.w.en,
216      "bank write addr %x way_en %x data %x\n",
217      io.w.addr,
218      io.w.way_en,
219      io.w.data
220    )
221  }
222
223  def dump() = {
224    dump_w()
225    dump_r()
226  }
227}
228
229case object HasDataEccParam
230
231//                     Banked DCache Data
232// -----------------------------------------------------------------
233// | Bank0 | Bank1 | Bank2 | Bank3 | Bank4 | Bank5 | Bank6 | Bank7 |
234// -----------------------------------------------------------------
235// | Way0  | Way0  | Way0  | Way0  | Way0  | Way0  | Way0  | Way0  |
236// | Way1  | Way1  | Way1  | Way1  | Way1  | Way1  | Way1  | Way1  |
237// | ....  | ....  | ....  | ....  | ....  | ....  | ....  | ....  |
238// -----------------------------------------------------------------
239abstract class AbstractBankedDataArray(implicit p: Parameters) extends DCacheModule
240{
241  val DataEccParam = if(EnableDataEcc) Some(HasDataEccParam) else None
242  val ReadlinePortErrorIndex = LoadPipelineWidth
243  val io = IO(new DCacheBundle {
244    // load pipeline read word req
245    val read = Vec(LoadPipelineWidth, Flipped(DecoupledIO(new L1BankedDataReadReqWithMask)))
246    val is128Req = Input(Vec(LoadPipelineWidth, Bool()))
247    // main pipeline read / write line req
248    val readline_intend = Input(Bool())
249    val readline = Flipped(DecoupledIO(new L1BankedDataReadLineReq))
250    val write = Flipped(DecoupledIO(new L1BankedDataWriteReq))
251    val write_dup = Vec(DCacheBanks, Flipped(Decoupled(new L1BankedDataWriteReqCtrl)))
252    // data for readline and loadpipe
253    val readline_resp = Output(Vec(DCacheBanks, new L1BankedDataReadResult()))
254    val readline_error_delayed = Output(Bool())
255    val read_resp          = Output(Vec(LoadPipelineWidth, Vec(VLEN/DCacheSRAMRowBits, new L1BankedDataReadResult())))
256    val read_error_delayed = Output(Vec(LoadPipelineWidth,Vec(VLEN/DCacheSRAMRowBits, Bool())))
257    // val nacks = Output(Vec(LoadPipelineWidth, Bool()))
258    // val errors = Output(Vec(LoadPipelineWidth + 1, ValidIO(new L1CacheErrorInfo))) // read ports + readline port
259    // when bank_conflict, read (1) port should be ignored
260    val bank_conflict_slow = Output(Vec(LoadPipelineWidth, Bool()))
261    val disable_ld_fast_wakeup = Output(Vec(LoadPipelineWidth, Bool()))
262    val pseudo_error = Flipped(DecoupledIO(Vec(DCacheBanks, new CtrlUnitSignalingBundle)))
263  })
264
265  // bank (0, 1, 2, 3) each way use duplicate addr
266  def DuplicatedQueryBankSeq = Seq(0, 1, 2, 3)
267
268  def pipeMap[T <: Data](f: Int => T) = VecInit((0 until LoadPipelineWidth).map(f))
269
270  def getECCFromEncWord(encWord: UInt) = {
271    if (EnableDataEcc) {
272      require(encWord.getWidth == encDataBits, s"encDataBits=$encDataBits != encDataBits=$encDataBits!")
273      encWord(encDataBits-1, DCacheSRAMRowBits)
274    } else {
275      0.U
276    }
277  }
278
279  def getDataFromEncWord(encWord: UInt) = {
280    encWord(DCacheSRAMRowBits-1, 0)
281  }
282
283  def asECCData(ecc: UInt, data: UInt) = {
284    if (EnableDataEcc) {
285      Cat(ecc, data)
286    } else {
287      data
288    }
289  }
290
291  def dumpRead = {
292    (0 until LoadPipelineWidth) map { w =>
293      XSDebug(io.read(w).valid,
294        s"DataArray Read channel: $w valid way_en: %x addr: %x\n",
295        io.read(w).bits.way_en, io.read(w).bits.addr)
296    }
297    XSDebug(io.readline.valid,
298      s"DataArray Read Line, valid way_en: %x addr: %x rmask %x\n",
299      io.readline.bits.way_en, io.readline.bits.addr, io.readline.bits.rmask)
300  }
301
302  def dumpWrite = {
303    XSDebug(io.write.valid,
304      s"DataArray Write valid way_en: %x addr: %x\n",
305      io.write.bits.way_en, io.write.bits.addr)
306
307    (0 until DCacheBanks) map { r =>
308      XSDebug(io.write.valid,
309        s"cycle: $r data: %x wmask: %x\n",
310        io.write.bits.data(r), io.write.bits.wmask(r))
311    }
312  }
313
314  def dumpResp = {
315    XSDebug(s"DataArray ReadeResp channel:\n")
316    (0 until LoadPipelineWidth) map { r =>
317      XSDebug(s"cycle: $r data: %x\n", Mux(io.is128Req(r),
318        Cat(io.read_resp(r)(1).raw_data,io.read_resp(r)(0).raw_data),
319        io.read_resp(r)(0).raw_data))
320    }
321  }
322
323  def dump() = {
324    dumpRead
325    dumpWrite
326    dumpResp
327  }
328
329  def selcetOldestPort(valid: Seq[Bool], bits: Seq[LqPtr], index: Seq[UInt]):((Bool, LqPtr), UInt) = {
330    require(valid.length == bits.length &&  bits.length == index.length, s"length must eq, valid:${valid.length}, bits:${bits.length}, index:${index.length}")
331    ParallelOperation(valid zip bits zip index,
332      (a: ((Bool, LqPtr), UInt), b: ((Bool, LqPtr), UInt)) => {
333        val au = a._1._2
334        val bu = b._1._2
335        val aValid = a._1._1
336        val bValid = b._1._1
337        val bSel = au > bu
338        val bits = Mux(
339          aValid && bValid,
340          Mux(bSel, b._1._2, a._1._2),
341          Mux(aValid && !bValid, a._1._2, b._1._2)
342        )
343        val idx = Mux(
344          aValid && bValid,
345          Mux(bSel, b._2, a._2),
346          Mux(aValid && !bValid, a._2, b._2)
347        )
348        ((aValid || bValid, bits), idx)
349      }
350    )
351  }
352
353}
354
355// the smallest access unit is sram
356class SramedDataArray(implicit p: Parameters) extends AbstractBankedDataArray {
357  println("  DCacheType: SramedDataArray")
358  val ReduceReadlineConflict = false
359
360  io.write.ready := true.B
361  io.write_dup.foreach(_.ready := true.B)
362
363  val data_banks = List.tabulate(DCacheSetDiv)( k => List.tabulate(DCacheBanks)(i => List.tabulate(DCacheWays)(j => Module(new DataSRAM(i,j)))))
364  data_banks.map(_.map(_.map(_.dump())))
365
366  val way_en = Wire(Vec(LoadPipelineWidth, io.read(0).bits.way_en.cloneType))
367  val set_addrs = Wire(Vec(LoadPipelineWidth, UInt()))
368  val div_addrs = Wire(Vec(LoadPipelineWidth, UInt()))
369  val bank_addrs = Wire(Vec(LoadPipelineWidth, Vec(VLEN/DCacheSRAMRowBits, UInt())))
370
371  val line_set_addr = addr_to_dcache_div_set(io.readline.bits.addr)
372  val line_div_addr = addr_to_dcache_div(io.readline.bits.addr)
373  // when WPU is enabled, line_way_en is all enabled when read data
374  val line_way_en = Fill(DCacheWays, 1.U) // val line_way_en = io.readline.bits.way_en
375  val line_way_en_reg = RegEnable(io.readline.bits.way_en, 0.U(DCacheWays.W),io.readline.valid)
376
377  val write_bank_mask_reg = RegEnable(io.write.bits.wmask, 0.U(DCacheBanks.W), io.write.valid)
378  val write_data_reg = RegEnable(io.write.bits.data, io.write.valid)
379  val write_valid_reg = RegNext(io.write.valid)
380  val write_valid_dup_reg = io.write_dup.map(x => RegNext(x.valid))
381  val write_wayen_dup_reg = io.write_dup.map(x => RegEnable(x.bits.way_en, 0.U(DCacheWays.W), x.valid))
382  val write_set_addr_dup_reg = io.write_dup.map(x => RegEnable(addr_to_dcache_div_set(x.bits.addr), x.valid))
383  val write_div_addr_dup_reg = io.write_dup.map(x => RegEnable(addr_to_dcache_div(x.bits.addr), x.valid))
384
385  // read data_banks and ecc_banks
386  // for single port SRAM, do not allow read and write in the same cycle
387  val rrhazard = false.B // io.readline.valid
388  (0 until LoadPipelineWidth).map(rport_index => {
389    div_addrs(rport_index) := addr_to_dcache_div(io.read(rport_index).bits.addr)
390    set_addrs(rport_index) := addr_to_dcache_div_set(io.read(rport_index).bits.addr)
391    bank_addrs(rport_index)(0) := addr_to_dcache_bank(io.read(rport_index).bits.addr)
392    bank_addrs(rport_index)(1) := bank_addrs(rport_index)(0) + 1.U
393
394    // use way_en to select a way after data read out
395    assert(!(RegNext(io.read(rport_index).fire && PopCount(io.read(rport_index).bits.way_en) > 1.U)))
396    way_en(rport_index) := io.read(rport_index).bits.way_en
397  })
398
399  // read conflict
400  val rr_bank_conflict = Seq.tabulate(LoadPipelineWidth)(x => Seq.tabulate(LoadPipelineWidth)(y => {
401    if (x == y) {
402      false.B
403    } else {
404      io.read(x).valid && io.read(y).valid &&
405        div_addrs(x) === div_addrs(y) &&
406        (io.read(x).bits.bankMask & io.read(y).bits.bankMask) =/= 0.U &&
407        io.read(x).bits.way_en === io.read(y).bits.way_en &&
408        set_addrs(x) =/= set_addrs(y)
409    }
410  }))
411  val load_req_with_bank_conflict = rr_bank_conflict.map(_.reduce(_ || _))
412  val load_req_valid = io.read.map(_.valid)
413  val load_req_lqIdx = io.read.map(_.bits.lqIdx)
414  val load_req_index = (0 until LoadPipelineWidth).map(_.asUInt)
415
416
417  val load_req_bank_conflict_selcet = selcetOldestPort(load_req_with_bank_conflict, load_req_lqIdx, load_req_index)
418  val load_req_bank_select_port  = UIntToOH(load_req_bank_conflict_selcet._2).asBools
419
420  val rr_bank_conflict_oldest = (0 until LoadPipelineWidth).map(i =>
421    !load_req_bank_select_port(i) && load_req_with_bank_conflict(i)
422  )
423
424  val rrl_bank_conflict = Wire(Vec(LoadPipelineWidth, Bool()))
425  val rrl_bank_conflict_intend = Wire(Vec(LoadPipelineWidth, Bool()))
426  (0 until LoadPipelineWidth).foreach { i =>
427    val judge = if (ReduceReadlineConflict) io.read(i).valid && (io.readline.bits.rmask & io.read(i).bits.bankMask) =/= 0.U && line_div_addr === div_addrs(i) && line_set_addr =/= set_addrs(i)
428                else io.read(i).valid && line_div_addr === div_addrs(i) && line_set_addr =/= set_addrs(i)
429    rrl_bank_conflict(i) := judge && io.readline.valid
430    rrl_bank_conflict_intend(i) := judge && io.readline_intend
431  }
432  val wr_bank_conflict = Seq.tabulate(LoadPipelineWidth)(x =>
433    io.read(x).valid && write_valid_reg &&
434    div_addrs(x) === write_div_addr_dup_reg.head &&
435    way_en(x) === write_wayen_dup_reg.head &&
436    (write_bank_mask_reg(bank_addrs(x)(0)) || write_bank_mask_reg(bank_addrs(x)(1)) && io.is128Req(x))
437  )
438  val wrl_bank_conflict = io.readline.valid && write_valid_reg && line_div_addr === write_div_addr_dup_reg.head
439  // ready
440  io.readline.ready := !(wrl_bank_conflict)
441  io.read.zipWithIndex.map { case (x, i) => x.ready := !(wr_bank_conflict(i) || rrhazard) }
442
443  val perf_multi_read = PopCount(io.read.map(_.valid)) >= 2.U
444  val bank_conflict_fast = Wire(Vec(LoadPipelineWidth, Bool()))
445  (0 until LoadPipelineWidth).foreach(i => {
446    bank_conflict_fast(i) := wr_bank_conflict(i) || rrl_bank_conflict(i) ||
447    rr_bank_conflict_oldest(i)
448    io.bank_conflict_slow(i) := RegNext(bank_conflict_fast(i))
449    io.disable_ld_fast_wakeup(i) := wr_bank_conflict(i) || rrl_bank_conflict_intend(i) ||
450      (if (i == 0) 0.B else (0 until i).map(rr_bank_conflict(_)(i)).reduce(_ || _))
451  })
452  XSPerfAccumulate("data_array_multi_read", perf_multi_read)
453  (1 until LoadPipelineWidth).foreach(y => (0 until y).foreach(x =>
454    XSPerfAccumulate(s"data_array_rr_bank_conflict_${x}_${y}", rr_bank_conflict(x)(y))
455  ))
456  (0 until LoadPipelineWidth).foreach(i => {
457    XSPerfAccumulate(s"data_array_rrl_bank_conflict_${i}", rrl_bank_conflict(i))
458    XSPerfAccumulate(s"data_array_rw_bank_conflict_${i}", wr_bank_conflict(i))
459    XSPerfAccumulate(s"data_array_read_${i}", io.read(i).valid)
460  })
461  XSPerfAccumulate("data_array_access_total", PopCount(io.read.map(_.valid)))
462  XSPerfAccumulate("data_array_read_line", io.readline.valid)
463  XSPerfAccumulate("data_array_write", io.write.valid)
464
465  val read_result = Wire(Vec(DCacheSetDiv, Vec(DCacheBanks, Vec(DCacheWays,new L1BankedDataReadResult()))))
466  val read_result_delayed = Wire(Vec(DCacheSetDiv, Vec(DCacheBanks, Vec(DCacheWays,new L1BankedDataReadResult()))))
467  val read_error_delayed_result = Wire(Vec(DCacheSetDiv, Vec(DCacheBanks, Vec(DCacheWays, Bool()))))
468  dontTouch(read_result)
469  dontTouch(read_error_delayed_result)
470
471  val pseudo_data_toggle_mask = io.pseudo_error.bits.map {
472    case bank =>
473      Mux(io.pseudo_error.valid && bank.valid, bank.mask, 0.U)
474  }
475  val readline_hit = io.readline.fire &&
476                     (io.readline.bits.rmask & VecInit(io.pseudo_error.bits.map(_.valid)).asUInt).orR
477  val readbank_hit = io.read.zip(bank_addrs.zip(io.is128Req)).zipWithIndex.map {
478                          case ((read, (bank_addr, is128Req)), i) =>
479                            val error_bank0 = io.pseudo_error.bits(bank_addr(0))
480                            val error_bank1 = io.pseudo_error.bits(bank_addr(1))
481                            read.fire && (error_bank0.valid || error_bank1.valid && is128Req) && !io.bank_conflict_slow(i)
482                      }.reduce(_|_)
483  io.pseudo_error.ready := RegNext(readline_hit || readbank_hit)
484
485  for (div_index <- 0 until DCacheSetDiv){
486    for (bank_index <- 0 until DCacheBanks) {
487      for (way_index <- 0 until DCacheWays) {
488        //     Set Addr & Read Way Mask
489        //
490        //    Pipe 0   ....  Pipe (n-1)
491        //      +      ....     +
492        //      |      ....     |
493        // +----+---------------+-----+
494        //  X                        X
495        //   X                      +------+ Bank Addr Match
496        //    +---------+----------+
497        //              |
498        //     +--------+--------+
499        //     |    Data Bank    |
500        //     +-----------------+
501        val loadpipe_en = WireInit(VecInit(List.tabulate(LoadPipelineWidth)(i => {
502          io.read(i).valid && div_addrs(i) === div_index.U && (bank_addrs(i)(0) === bank_index.U || bank_addrs(i)(1) === bank_index.U && io.is128Req(i)) &&
503          way_en(i)(way_index) &&
504          !rr_bank_conflict_oldest(i)
505        })))
506        val readline_en = Wire(Bool())
507        if (ReduceReadlineConflict) {
508          readline_en := io.readline.valid && io.readline.bits.rmask(bank_index) && line_way_en(way_index) && div_index.U === line_div_addr
509        } else {
510          readline_en := io.readline.valid && line_way_en(way_index) && div_index.U === line_div_addr
511        }
512        val sram_set_addr = Mux(readline_en,
513          addr_to_dcache_div_set(io.readline.bits.addr),
514          PriorityMux(Seq.tabulate(LoadPipelineWidth)(i => loadpipe_en(i) -> set_addrs(i)))
515        )
516        val read_en = loadpipe_en.asUInt.orR || readline_en
517        // read raw data
518        val data_bank = data_banks(div_index)(bank_index)(way_index)
519        data_bank.io.r.en := read_en
520        data_bank.io.r.addr := sram_set_addr
521
522        read_result(div_index)(bank_index)(way_index).ecc := getECCFromEncWord(data_bank.io.r.data)
523        read_result(div_index)(bank_index)(way_index).raw_data := getDataFromEncWord(data_bank.io.r.data) ^ pseudo_data_toggle_mask(bank_index)
524
525        if (EnableDataEcc) {
526          val ecc_data = read_result(div_index)(bank_index)(way_index).asECCData()
527          val ecc_data_delayed = RegEnable(ecc_data, RegNext(read_en))
528          read_result(div_index)(bank_index)(way_index).error_delayed := dcacheParameters.dataCode.decode(ecc_data_delayed).error
529          read_error_delayed_result(div_index)(bank_index)(way_index) := read_result(div_index)(bank_index)(way_index).error_delayed
530        } else {
531          read_result(div_index)(bank_index)(way_index).error_delayed := false.B
532          read_error_delayed_result(div_index)(bank_index)(way_index) := false.B
533        }
534
535        read_result_delayed(div_index)(bank_index)(way_index) := RegEnable(read_result(div_index)(bank_index)(way_index), RegNext(read_en))
536      }
537    }
538  }
539
540  val data_read_oh = WireInit(VecInit(Seq.fill(DCacheSetDiv * DCacheBanks * DCacheWays)(0.U(1.W))))
541  for(div_index <- 0 until DCacheSetDiv){
542    for (bank_index <- 0 until DCacheBanks) {
543      for (way_index <- 0 until DCacheWays) {
544        data_read_oh(div_index *  DCacheBanks * DCacheWays + bank_index * DCacheWays + way_index) := data_banks(div_index)(bank_index)(way_index).io.r.en
545      }
546    }
547  }
548  XSPerfAccumulate("data_read_counter", PopCount(Cat(data_read_oh)))
549
550  // read result: expose banked read result
551  // TODO: clock gate
552  (0 until LoadPipelineWidth).map(i => {
553    // io.read_resp(i) := read_result(RegNext(bank_addrs(i)))(RegNext(OHToUInt(way_en(i))))
554    val r_read_fire = RegNext(io.read(i).fire)
555    val r_div_addr  = RegEnable(div_addrs(i), io.read(i).fire)
556    val r_bank_addr = RegEnable(bank_addrs(i), io.read(i).fire)
557    val r_way_addr  = RegNext(OHToUInt(way_en(i)))
558    val rr_read_fire = RegNext(RegNext(io.read(i).fire))
559    val rr_div_addr = RegEnable(RegEnable(div_addrs(i), io.read(i).fire), r_read_fire)
560    val rr_bank_addr = RegEnable(RegEnable(bank_addrs(i), io.read(i).fire), r_read_fire)
561    val rr_way_addr = RegEnable(RegEnable(OHToUInt(way_en(i)), io.read(i).fire), r_read_fire)
562    (0 until VLEN/DCacheSRAMRowBits).map( j =>{
563      io.read_resp(i)(j) := read_result(r_div_addr)(r_bank_addr(j))(r_way_addr)
564      // error detection
565      // normal read ports
566      io.read_error_delayed(i)(j) := rr_read_fire && read_error_delayed_result(rr_div_addr)(rr_bank_addr(j))(rr_way_addr) && !RegNext(io.bank_conflict_slow(i))
567    })
568  })
569
570  // readline port
571  val readline_error_delayed = Wire(Vec(DCacheBanks, Bool()))
572  val readline_r_way_addr = RegEnable(OHToUInt(io.readline.bits.way_en), io.readline.valid)
573  val readline_rr_way_addr = RegEnable(readline_r_way_addr, RegNext(io.readline.valid))
574  val readline_r_div_addr = RegEnable(line_div_addr, io.readline.valid)
575  val readline_rr_div_addr = RegEnable(readline_r_div_addr, RegNext(io.readline.valid))
576  (0 until DCacheBanks).map(i => {
577    io.readline_resp(i) := read_result(readline_r_div_addr)(i)(readline_r_way_addr)
578    readline_error_delayed(i) := read_result(readline_rr_div_addr)(i)(readline_rr_way_addr).error_delayed
579  })
580  io.readline_error_delayed := RegNext(RegNext(io.readline.fire)) && readline_error_delayed.asUInt.orR
581
582  // write data_banks & ecc_banks
583  for (div_index <- 0 until DCacheSetDiv) {
584    for (bank_index <- 0 until DCacheBanks) {
585      for (way_index <- 0 until DCacheWays) {
586        // data write
587        val wen_reg = write_bank_mask_reg(bank_index) &&
588          write_valid_dup_reg(bank_index) &&
589          write_div_addr_dup_reg(bank_index) === div_index.U &&
590          write_wayen_dup_reg(bank_index)(way_index)
591        val write_ecc_reg = RegEnable(getECCFromEncWord(cacheParams.dataCode.encode(io.write.bits.data(bank_index))), io.write.valid)
592        val data_bank = data_banks(div_index)(bank_index)(way_index)
593        data_bank.io.w.en := wen_reg
594        data_bank.io.w.addr := write_set_addr_dup_reg(bank_index)
595        data_bank.io.w.data := asECCData(write_ecc_reg, write_data_reg(bank_index))
596      }
597    }
598  }
599
600  val tableName =  "BankConflict" + p(XSCoreParamsKey).HartId.toString
601  val siteName = "BankedDataArray" + p(XSCoreParamsKey).HartId.toString
602  val bankConflictTable = ChiselDB.createTable(tableName, new BankConflictDB)
603  val bankConflictData = Wire(new BankConflictDB)
604  for (i <- 0 until LoadPipelineWidth) {
605    bankConflictData.set_index(i) := set_addrs(i)
606    bankConflictData.addr(i) := io.read(i).bits.addr
607  }
608
609  // FIXME: rr_bank_conflict(0)(1) no generalization
610  when(rr_bank_conflict(0)(1)) {
611    (0 until (VLEN/DCacheSRAMRowBits)).map(i => {
612      bankConflictData.bank_index(i) := bank_addrs(0)(i)
613    })
614    bankConflictData.way_index  := OHToUInt(way_en(0))
615    bankConflictData.fake_rr_bank_conflict := set_addrs(0) === set_addrs(1) && div_addrs(0) === div_addrs(1)
616  }.otherwise {
617    (0 until (VLEN/DCacheSRAMRowBits)).map(i => {
618      bankConflictData.bank_index(i) := 0.U
619    })
620    bankConflictData.way_index := 0.U
621    bankConflictData.fake_rr_bank_conflict := false.B
622  }
623
624  val isWriteBankConflictTable = Constantin.createRecord(s"isWriteBankConflictTable${p(XSCoreParamsKey).HartId}")
625  bankConflictTable.log(
626    data = bankConflictData,
627    en = isWriteBankConflictTable.orR && rr_bank_conflict(0)(1),
628    site = siteName,
629    clock = clock,
630    reset = reset
631  )
632
633  (1 until LoadPipelineWidth).foreach(y => (0 until y).foreach(x =>
634    XSPerfAccumulate(s"data_array_fake_rr_bank_conflict_${x}_${y}", rr_bank_conflict(x)(y) && set_addrs(x)===set_addrs(y) && div_addrs(x) === div_addrs(y))
635  ))
636
637  if (backendParams.debugEn){
638    load_req_with_bank_conflict.map(dontTouch(_))
639    dontTouch(read_result)
640    dontTouch(read_error_delayed_result)
641  }
642}
643
644// the smallest access unit is bank
645class BankedDataArray(implicit p: Parameters) extends AbstractBankedDataArray {
646  println("  DCacheType: BankedDataArray")
647  val ReduceReadlineConflict = false
648
649  io.write.ready := true.B
650  io.write_dup.foreach(_.ready := true.B)
651
652  val data_banks = List.fill(DCacheSetDiv)(List.tabulate(DCacheBanks)(i => Module(new DataSRAMBank(i))))
653  data_banks.map(_.map(_.dump()))
654
655  val way_en = Wire(Vec(LoadPipelineWidth, io.read(0).bits.way_en.cloneType))
656  val set_addrs = Wire(Vec(LoadPipelineWidth, UInt()))
657  val set_addrs_dup = Wire(Vec(LoadPipelineWidth, UInt()))
658  val div_addrs = Wire(Vec(LoadPipelineWidth, UInt()))
659  val div_addrs_dup = Wire(Vec(LoadPipelineWidth, UInt()))
660  val bank_addrs = Wire(Vec(LoadPipelineWidth, Vec(VLEN/DCacheSRAMRowBits, UInt())))
661  val bank_addrs_dup = Wire(Vec(LoadPipelineWidth, Vec(VLEN/DCacheSRAMRowBits, UInt())))
662  val way_en_reg = Wire(Vec(LoadPipelineWidth, io.read(0).bits.way_en.cloneType))
663  val set_addrs_reg = Wire(Vec(LoadPipelineWidth, UInt()))
664  val set_addrs_dup_reg = Wire(Vec(LoadPipelineWidth, UInt()))
665
666  val line_set_addr = addr_to_dcache_div_set(io.readline.bits.addr)
667  val line_div_addr = addr_to_dcache_div(io.readline.bits.addr)
668  val line_way_en = io.readline.bits.way_en
669
670  val write_bank_mask_reg = RegEnable(io.write.bits.wmask, io.write.valid)
671  val write_data_reg = RegEnable(io.write.bits.data, io.write.valid)
672  val write_valid_reg = RegNext(io.write.valid)
673  val write_valid_dup_reg = io.write_dup.map(x => RegNext(x.valid))
674  val write_wayen_dup_reg = io.write_dup.map(x => RegEnable(x.bits.way_en, x.valid))
675  val write_set_addr_dup_reg = io.write_dup.map(x => RegEnable(addr_to_dcache_div_set(x.bits.addr), x.valid))
676  val write_div_addr_dup_reg = io.write_dup.map(x => RegEnable(addr_to_dcache_div(x.bits.addr), x.valid))
677
678  // read data_banks and ecc_banks
679  // for single port SRAM, do not allow read and write in the same cycle
680  val rwhazard = RegNext(io.write.valid)
681  val rrhazard = false.B // io.readline.valid
682  (0 until LoadPipelineWidth).map(rport_index => {
683    div_addrs(rport_index) := addr_to_dcache_div(io.read(rport_index).bits.addr)
684    div_addrs_dup(rport_index) := addr_to_dcache_div(io.read(rport_index).bits.addr_dup)
685    bank_addrs(rport_index)(0) := addr_to_dcache_bank(io.read(rport_index).bits.addr)
686    bank_addrs(rport_index)(1) := Mux(io.is128Req(rport_index), bank_addrs(rport_index)(0) + 1.U, bank_addrs(rport_index)(0))
687    bank_addrs_dup(rport_index)(0) := addr_to_dcache_bank(io.read(rport_index).bits.addr_dup)
688    bank_addrs_dup(rport_index)(1) := Mux(io.is128Req(rport_index), bank_addrs_dup(rport_index)(0) + 1.U, bank_addrs_dup(rport_index)(0))
689    set_addrs(rport_index) := addr_to_dcache_div_set(io.read(rport_index).bits.addr)
690    set_addrs_dup(rport_index) := addr_to_dcache_div_set(io.read(rport_index).bits.addr_dup)
691    set_addrs_reg(rport_index) := RegEnable(addr_to_dcache_div_set(io.read(rport_index).bits.addr), io.read(rport_index).valid)
692    set_addrs_dup_reg(rport_index) := RegEnable(addr_to_dcache_div_set(io.read(rport_index).bits.addr_dup), io.read(rport_index).valid)
693
694    // use way_en to select a way after data read out
695    assert(!(RegNext(io.read(rport_index).fire && PopCount(io.read(rport_index).bits.way_en) > 1.U)))
696    way_en(rport_index) := io.read(rport_index).bits.way_en
697    way_en_reg(rport_index) := RegEnable(io.read(rport_index).bits.way_en, io.read(rport_index).valid)
698  })
699
700  // read each bank, get bank result
701  val rr_bank_conflict = Seq.tabulate(LoadPipelineWidth)(x => Seq.tabulate(LoadPipelineWidth)(y => {
702    if (x == y) {
703      false.B
704    } else {
705      io.read(x).valid && io.read(y).valid &&
706      div_addrs(x) === div_addrs(y) &&
707      (io.read(x).bits.bankMask & io.read(y).bits.bankMask) =/= 0.U &&
708      set_addrs(x) =/= set_addrs(y)
709    }
710  }
711  ))
712
713  val load_req_with_bank_conflict = rr_bank_conflict.map(_.reduce(_ || _))
714  val load_req_valid = io.read.map(_.valid)
715  val load_req_lqIdx = io.read.map(_.bits.lqIdx)
716  val load_req_index = (0 until LoadPipelineWidth).map(_.asUInt)
717
718  val load_req_bank_conflict_selcet = selcetOldestPort(load_req_with_bank_conflict, load_req_lqIdx, load_req_index)
719  val load_req_bank_select_port  = UIntToOH(load_req_bank_conflict_selcet._2).asBools
720
721  val rr_bank_conflict_oldest = (0 until LoadPipelineWidth).map(i =>
722    !load_req_bank_select_port(i) && load_req_with_bank_conflict(i)
723  )
724
725  val rrl_bank_conflict = Wire(Vec(LoadPipelineWidth, Bool()))
726  val rrl_bank_conflict_intend = Wire(Vec(LoadPipelineWidth, Bool()))
727  (0 until LoadPipelineWidth).foreach { i =>
728    val judge = if (ReduceReadlineConflict) io.read(i).valid && (io.readline.bits.rmask & io.read(i).bits.bankMask) =/= 0.U && div_addrs(i) === line_div_addr
729                else io.read(i).valid && div_addrs(i)===line_div_addr
730    rrl_bank_conflict(i) := judge && io.readline.valid
731    rrl_bank_conflict_intend(i) := judge && io.readline_intend
732  }
733  val wr_bank_conflict = Seq.tabulate(LoadPipelineWidth)(x =>
734    io.read(x).valid &&
735    write_valid_reg &&
736    div_addrs(x) === write_div_addr_dup_reg.head &&
737    (write_bank_mask_reg(bank_addrs(x)(0)) || write_bank_mask_reg(bank_addrs(x)(1)) && io.is128Req(x))
738  )
739  val wrl_bank_conflict = io.readline.valid && write_valid_reg && line_div_addr === write_div_addr_dup_reg.head
740  // ready
741  io.readline.ready := !(wrl_bank_conflict)
742  io.read.zipWithIndex.map{case(x, i) => x.ready := !(wr_bank_conflict(i) || rrhazard)}
743
744  val perf_multi_read = PopCount(io.read.map(_.valid)) >= 2.U
745  (0 until LoadPipelineWidth).foreach(i => {
746    // remove fake rr_bank_conflict situation in s2
747    val real_other_bank_conflict_reg = RegNext(wr_bank_conflict(i) || rrl_bank_conflict(i))
748    val real_rr_bank_conflict_reg = RegNext(rr_bank_conflict_oldest(i))
749    io.bank_conflict_slow(i) := real_other_bank_conflict_reg || real_rr_bank_conflict_reg
750
751    // get result in s1
752    io.disable_ld_fast_wakeup(i) := wr_bank_conflict(i) || rrl_bank_conflict_intend(i) ||
753      (if (i == 0) 0.B else (0 until i).map(rr_bank_conflict(_)(i)).reduce(_ || _))
754  })
755  XSPerfAccumulate("data_array_multi_read", perf_multi_read)
756  (1 until LoadPipelineWidth).foreach(y => (0 until y).foreach(x =>
757    XSPerfAccumulate(s"data_array_rr_bank_conflict_${x}_${y}", rr_bank_conflict(x)(y))
758  ))
759  (0 until LoadPipelineWidth).foreach(i => {
760    XSPerfAccumulate(s"data_array_rrl_bank_conflict_${i}", rrl_bank_conflict(i))
761    XSPerfAccumulate(s"data_array_rw_bank_conflict_${i}", wr_bank_conflict(i))
762    XSPerfAccumulate(s"data_array_read_${i}", io.read(i).valid)
763  })
764  XSPerfAccumulate("data_array_access_total", PopCount(io.read.map(_.valid)))
765  XSPerfAccumulate("data_array_read_line", io.readline.valid)
766  XSPerfAccumulate("data_array_write", io.write.valid)
767
768  val bank_result = Wire(Vec(DCacheSetDiv, Vec(DCacheBanks, Vec(DCacheWays, new L1BankedDataReadResult()))))
769  val bank_result_delayed = Wire(Vec(DCacheSetDiv, Vec(DCacheBanks, Vec(DCacheWays, new L1BankedDataReadResult()))))
770  val read_bank_error_delayed = Wire(Vec(DCacheSetDiv, Vec(DCacheBanks, Vec(DCacheWays, Bool()))))
771
772  val pseudo_data_toggle_mask = io.pseudo_error.bits.map {
773    case bank =>
774      Mux(io.pseudo_error.valid && bank.valid, bank.mask, 0.U)
775  }
776  val readline_hit = io.readline.fire &&
777                     (io.readline.bits.rmask & VecInit(io.pseudo_error.bits.map(_.valid)).asUInt).orR
778  val readbank_hit = io.read.zip(bank_addrs.zip(io.is128Req)).zipWithIndex.map {
779                          case ((read, (bank_addr, is128Req)), i) =>
780                            val error_bank0 = io.pseudo_error.bits(bank_addr(0))
781                            val error_bank1 = io.pseudo_error.bits(bank_addr(1))
782                            read.fire && (error_bank0.valid || error_bank1.valid && is128Req) && !io.bank_conflict_slow(i)
783                      }.reduce(_|_)
784  io.pseudo_error.ready := RegNext(readline_hit || readbank_hit)
785
786  for (div_index <- 0 until DCacheSetDiv) {
787    for (bank_index <- 0 until DCacheBanks) {
788      //     Set Addr & Read Way Mask
789      //
790      //    Pipe 0   ....  Pipe (n-1)
791      //      +      ....     +
792      //      |      ....     |
793      // +----+---------------+-----+
794      //  X                        X
795      //   X                      +------+ Bank Addr Match
796      //    +---------+----------+
797      //              |
798      //     +--------+--------+
799      //     |    Data Bank    |
800      //     +-----------------+
801      val bank_addr_matchs = WireInit(VecInit(List.tabulate(LoadPipelineWidth)(i => {
802        io.read(i).valid && div_addrs(i) === div_index.U && (bank_addrs(i)(0) === bank_index.U || bank_addrs(i)(1) === bank_index.U && io.is128Req(i)) &&
803          !rr_bank_conflict_oldest(i)
804      })))
805      val bank_addr_matchs_dup = WireInit(VecInit(List.tabulate(LoadPipelineWidth)(i => {
806        io.read(i).valid && div_addrs_dup(i) === div_index.U && (bank_addrs_dup(i)(0) === bank_index.U || bank_addrs_dup(i)(1) === bank_index.U && io.is128Req(i)) &&
807          !rr_bank_conflict_oldest(i)
808      })))
809      val readline_match = Wire(Bool())
810      if (ReduceReadlineConflict) {
811        readline_match := io.readline.valid && io.readline.bits.rmask(bank_index) && line_div_addr === div_index.U
812      } else {
813        readline_match := io.readline.valid && line_div_addr === div_index.U
814      }
815
816      val bank_set_addr = Mux(readline_match,
817        line_set_addr,
818        PriorityMux(Seq.tabulate(LoadPipelineWidth)(i => bank_addr_matchs(i) -> set_addrs(i)))
819      )
820      val bank_set_addr_dup = Mux(readline_match,
821        line_set_addr,
822        PriorityMux(Seq.tabulate(LoadPipelineWidth)(i => bank_addr_matchs_dup(i) -> set_addrs_dup(i)))
823      )
824      val read_enable = bank_addr_matchs.asUInt.orR || readline_match
825
826      // read raw data
827      val data_bank = data_banks(div_index)(bank_index)
828      data_bank.io.r.en := read_enable
829
830      if (DuplicatedQueryBankSeq.contains(bank_index)) {
831        data_bank.io.r.addr := bank_set_addr_dup
832      } else {
833        data_bank.io.r.addr := bank_set_addr
834      }
835      for (way_index <- 0 until DCacheWays) {
836        bank_result(div_index)(bank_index)(way_index).ecc := getECCFromEncWord(data_bank.io.r.data(way_index))
837        bank_result(div_index)(bank_index)(way_index).raw_data := getDataFromEncWord(data_bank.io.r.data(way_index)) ^ pseudo_data_toggle_mask(bank_index)
838
839        if (EnableDataEcc) {
840          val ecc_data = bank_result(div_index)(bank_index)(way_index).asECCData()
841          val ecc_data_delayed = RegEnable(ecc_data, RegNext(read_enable))
842          bank_result(div_index)(bank_index)(way_index).error_delayed := dcacheParameters.dataCode.decode(ecc_data_delayed).error
843          read_bank_error_delayed(div_index)(bank_index)(way_index) := bank_result(div_index)(bank_index)(way_index).error_delayed
844        } else {
845          bank_result(div_index)(bank_index)(way_index).error_delayed := false.B
846          read_bank_error_delayed(div_index)(bank_index)(way_index) := false.B
847        }
848        bank_result_delayed(div_index)(bank_index)(way_index) := RegEnable(bank_result(div_index)(bank_index)(way_index), RegNext(read_enable))
849      }
850    }
851  }
852
853  val data_read_oh = WireInit(VecInit(Seq.fill(DCacheSetDiv)(0.U(XLEN.W))))
854  for (div_index <- 0 until DCacheSetDiv){
855    val temp = WireInit(VecInit(Seq.fill(DCacheBanks)(0.U(XLEN.W))))
856    for (bank_index <- 0 until DCacheBanks) {
857      temp(bank_index) := PopCount(Fill(DCacheWays, data_banks(div_index)(bank_index).io.r.en.asUInt))
858    }
859    data_read_oh(div_index) := temp.reduce(_ + _)
860  }
861  XSPerfAccumulate("data_read_counter", data_read_oh.foldLeft(0.U)(_ + _))
862
863  (0 until LoadPipelineWidth).map(i => {
864    // 1 cycle after read fire(load s2)
865    val r_read_fire = RegNext(io.read(i).fire)
866    val r_div_addr = RegEnable(div_addrs(i), io.read(i).fire)
867    val r_bank_addr = RegEnable(bank_addrs(i), io.read(i).fire)
868    val r_way_addr = RegEnable(OHToUInt(way_en(i)), io.read(i).fire)
869    // 2 cycles after read fire(load s3)
870    val rr_read_fire = RegNext(r_read_fire)
871    val rr_div_addr = RegEnable(RegEnable(div_addrs(i), io.read(i).fire), r_read_fire)
872    val rr_bank_addr = RegEnable(RegEnable(bank_addrs(i), io.read(i).fire), r_read_fire)
873    val rr_way_addr = RegEnable(RegEnable(OHToUInt(way_en(i)), io.read(i).fire), r_read_fire)
874    (0 until VLEN/DCacheSRAMRowBits).map( j =>{
875      io.read_resp(i)(j)          := bank_result(r_div_addr)(r_bank_addr(j))(r_way_addr)
876      // error detection
877      io.read_error_delayed(i)(j) := rr_read_fire && read_bank_error_delayed(rr_div_addr)(rr_bank_addr(j))(rr_way_addr) && !RegNext(io.bank_conflict_slow(i))
878    })
879  })
880
881  // read result: expose banked read result
882  val readline_error_delayed = Wire(Vec(DCacheBanks, Bool()))
883  val readline_r_way_addr = RegEnable(OHToUInt(io.readline.bits.way_en), io.readline.valid)
884  val readline_rr_way_addr = RegEnable(readline_r_way_addr, RegNext(io.readline.valid))
885  val readline_r_div_addr = RegEnable(line_div_addr, io.readline.valid)
886  val readline_rr_div_addr = RegEnable(readline_r_div_addr, RegNext(io.readline.valid))
887  (0 until DCacheBanks).map(i => {
888    io.readline_resp(i) := bank_result(readline_r_div_addr)(i)(readline_r_way_addr)
889    readline_error_delayed(i) := bank_result(readline_rr_div_addr)(i)(readline_rr_way_addr).error_delayed
890  })
891  io.readline_error_delayed := RegNext(RegNext(io.readline.fire)) && readline_error_delayed.asUInt.orR
892
893  // write data_banks & ecc_banks
894  for (div_index <- 0 until DCacheSetDiv) {
895    for (bank_index <- 0 until DCacheBanks) {
896      // data write
897      val wen_reg = write_bank_mask_reg(bank_index) &&
898        write_valid_dup_reg(bank_index) &&
899        write_div_addr_dup_reg(bank_index) === div_index.U && RegNext(io.write.valid)
900      val write_ecc_reg = RegEnable(getECCFromEncWord(cacheParams.dataCode.encode(io.write.bits.data(bank_index))), io.write.valid)
901      val data_bank = data_banks(div_index)(bank_index)
902      data_bank.io.w.en := wen_reg
903      data_bank.io.w.way_en := write_wayen_dup_reg(bank_index)
904      data_bank.io.w.addr := write_set_addr_dup_reg(bank_index)
905      data_bank.io.w.data := asECCData(write_ecc_reg, write_data_reg(bank_index))
906    }
907  }
908
909  val tableName = "BankConflict" + p(XSCoreParamsKey).HartId.toString
910  val siteName = "BankedDataArray" + p(XSCoreParamsKey).HartId.toString
911  val bankConflictTable = ChiselDB.createTable(tableName, new BankConflictDB)
912  val bankConflictData = Wire(new BankConflictDB)
913  for (i <- 0 until LoadPipelineWidth) {
914    bankConflictData.set_index(i) := set_addrs(i)
915    bankConflictData.addr(i) := io.read(i).bits.addr
916  }
917
918  // FIXME: rr_bank_conflict(0)(1) no generalization
919  when(rr_bank_conflict(0)(1)) {
920    (0 until (VLEN/DCacheSRAMRowBits)).map(i => {
921      bankConflictData.bank_index(i) := bank_addrs(0)(i)
922    })
923    bankConflictData.way_index := OHToUInt(way_en(0))
924    bankConflictData.fake_rr_bank_conflict := set_addrs(0) === set_addrs(1) && div_addrs(0) === div_addrs(1)
925  }.otherwise {
926    (0 until (VLEN/DCacheSRAMRowBits)).map(i => {
927      bankConflictData.bank_index(i) := 0.U
928    })
929    bankConflictData.way_index := 0.U
930    bankConflictData.fake_rr_bank_conflict := false.B
931  }
932
933  val isWriteBankConflictTable = Constantin.createRecord(s"isWriteBankConflictTable${p(XSCoreParamsKey).HartId}")
934  bankConflictTable.log(
935    data = bankConflictData,
936    en = isWriteBankConflictTable.orR && rr_bank_conflict(0)(1),
937    site = siteName,
938    clock = clock,
939    reset = reset
940  )
941
942  (1 until LoadPipelineWidth).foreach(y => (0 until y).foreach(x =>
943    XSPerfAccumulate(s"data_array_fake_rr_bank_conflict_${x}_${y}", rr_bank_conflict(x)(y) && set_addrs(x) === set_addrs(y) && div_addrs(x) === div_addrs(y))
944  ))
945
946  if (backendParams.debugEn){
947    load_req_with_bank_conflict.map(dontTouch(_))
948    dontTouch(bank_result)
949    dontTouch(read_bank_error_delayed)
950  }
951}
952