xref: /XiangShan/src/main/scala/xiangshan/cache/dcache/data/BankedDataArray.scala (revision 2caa7ef23d5d6566d68f5f98a59dc7ee9066b96a)
1/***************************************************************************************
2* Copyright (c) 2024 Beijing Institute of Open Source Chip (BOSC)
3* Copyright (c) 2020-2024 Institute of Computing Technology, Chinese Academy of Sciences
4* Copyright (c) 2020-2021 Peng Cheng Laboratory
5*
6* XiangShan is licensed under Mulan PSL v2.
7* You can use this software according to the terms and conditions of the Mulan PSL v2.
8* You may obtain a copy of Mulan PSL v2 at:
9*          http://license.coscl.org.cn/MulanPSL2
10*
11* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
12* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
13* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
14*
15* See the Mulan PSL v2 for more details.
16*
17*
18* Acknowledgement
19*
20* This implementation is inspired by several key papers:
21* [1] Gurindar S. Sohi, and Manoj Franklin. "[High-bandwidth data memory systems for superscalar processors.]
22* (https://doi.org/10.1145/106972.106980)" 4th International Conference on Architectural Support for Programming
23* Languages and Operating Systems (ASPLOS). 1991.
24***************************************************************************************/
25
26package xiangshan.cache
27
28import org.chipsalliance.cde.config.Parameters
29import chisel3._
30import utils._
31import utility._
32import chisel3.util._
33import utility.mbist.MbistPipeline
34import xiangshan.mem.LqPtr
35import xiangshan.{L1CacheErrorInfo, XSCoreParamsKey}
36
37import scala.math.max
38
39class BankConflictDB(implicit p: Parameters) extends DCacheBundle{
40  val addr = Vec(LoadPipelineWidth, Bits(PAddrBits.W))
41  val set_index = Vec(LoadPipelineWidth, UInt((DCacheAboveIndexOffset - DCacheSetOffset).W))
42  val bank_index = Vec(VLEN/DCacheSRAMRowBits, UInt((DCacheSetOffset - DCacheBankOffset).W))
43  val way_index = UInt(wayBits.W)
44  val fake_rr_bank_conflict = Bool()
45}
46
47class L1BankedDataReadReq(implicit p: Parameters) extends DCacheBundle
48{
49  val way_en = Bits(DCacheWays.W)
50  val addr = Bits(PAddrBits.W)
51}
52
53class L1BankedDataReadReqWithMask(implicit p: Parameters) extends DCacheBundle
54{
55  val way_en = Bits(DCacheWays.W)
56  val addr = Bits(PAddrBits.W)
57  val addr_dup = Bits(PAddrBits.W)
58  val bankMask = Bits(DCacheBanks.W)
59  val kill = Bool()
60  val lqIdx = new LqPtr
61}
62
63class L1BankedDataReadLineReq(implicit p: Parameters) extends L1BankedDataReadReq
64{
65  val rmask = Bits(DCacheBanks.W)
66}
67
68// Now, we can write a cache-block in a single cycle
69class L1BankedDataWriteReq(implicit p: Parameters) extends L1BankedDataReadReq
70{
71  val wmask = Bits(DCacheBanks.W)
72  val data = Vec(DCacheBanks, Bits(DCacheSRAMRowBits.W))
73}
74
75// cache-block write request without data
76class L1BankedDataWriteReqCtrl(implicit p: Parameters) extends L1BankedDataReadReq
77
78class L1BankedDataReadResult(implicit p: Parameters) extends DCacheBundle
79{
80  // you can choose which bank to read to save power
81  val ecc = Bits(dataECCBits.W)
82  val raw_data = Bits(DCacheSRAMRowBits.W)
83  val error_delayed = Bool() // 1 cycle later than data resp
84
85  def asECCData() = {
86    Cat(ecc, raw_data)
87  }
88}
89
90class DataSRAMBankWriteReq(implicit p: Parameters) extends DCacheBundle {
91  val en = Bool()
92  val addr = UInt()
93  val way_en = UInt(DCacheWays.W)
94  val data = UInt(encDataBits.W)
95}
96
97// wrap a sram
98class DataSRAM(bankIdx: Int, wayIdx: Int)(implicit p: Parameters) extends DCacheModule {
99  val io = IO(new Bundle() {
100    val w = new Bundle() {
101      val en = Input(Bool())
102      val addr = Input(UInt())
103      val data = Input(UInt(encDataBits.W))
104    }
105
106    val r = new Bundle() {
107      val en = Input(Bool())
108      val addr = Input(UInt())
109      val data = Output(UInt(encDataBits.W))
110    }
111  })
112
113  // data sram
114  val data_sram = Module(new SRAMTemplate(
115    Bits(encDataBits.W),
116    set = DCacheSets / DCacheSetDiv,
117    way = 1,
118    shouldReset = false,
119    holdRead = false,
120    singlePort = true,
121    hasMbist = hasMbist
122  ))
123
124  data_sram.io.w.req.valid := io.w.en
125  data_sram.io.w.req.bits.apply(
126    setIdx = io.w.addr,
127    data = io.w.data,
128    waymask = 1.U
129  )
130  data_sram.io.r.req.valid := io.r.en
131  data_sram.io.r.req.bits.apply(setIdx = io.r.addr)
132  io.r.data := data_sram.io.r.resp.data(0)
133  XSPerfAccumulate("part_data_read_counter", data_sram.io.r.req.valid)
134
135  def dump_r() = {
136    XSDebug(RegNext(io.r.en),
137      "bank read set %x bank %x way %x data %x\n",
138      RegEnable(io.r.addr, io.r.en),
139      bankIdx.U,
140      wayIdx.U,
141      io.r.data
142    )
143  }
144
145  def dump_w() = {
146    XSDebug(io.w.en,
147      "bank write set %x bank %x way %x data %x\n",
148      io.w.addr,
149      bankIdx.U,
150      wayIdx.U,
151      io.w.data
152    )
153  }
154
155  def dump() = {
156    dump_w()
157    dump_r()
158  }
159}
160
161// wrap data rows of 8 ways
162class DataSRAMBank(index: Int)(implicit p: Parameters) extends DCacheModule {
163  val io = IO(new Bundle() {
164    val w = Input(new DataSRAMBankWriteReq)
165
166    val r = new Bundle() {
167      val en = Input(Bool())
168      val addr = Input(UInt())
169      val data = Output(Vec(DCacheWays, UInt(encDataBits.W)))
170    }
171  })
172
173  assert(RegNext(!io.w.en || PopCount(io.w.way_en) <= 1.U))
174
175  // external controls do not read and write at the same time
176  val w_info = io.w
177  // val rw_bypass = RegNext(io.w.addr === io.r.addr && io.w.way_en === io.r.way_en && io.w.en)
178
179  // multiway data bank
180  val data_bank = Seq.fill(DCacheWays) {
181    Module(new SRAMTemplate(
182      Bits(encDataBits.W),
183      set = DCacheSets / DCacheSetDiv,
184      way = 1,
185      shouldReset = false,
186      holdRead = false,
187      singlePort = true,
188      withClockGate = true,
189      hasMbist = hasMbist
190    ))
191  }
192
193  for (w <- 0 until DCacheWays) {
194    val wen = w_info.en && w_info.way_en(w)
195    data_bank(w).io.w.req.valid := wen
196    data_bank(w).io.w.req.bits.apply(
197      setIdx = w_info.addr,
198      data = w_info.data,
199      waymask = 1.U
200    )
201    data_bank(w).io.r.req.valid := io.r.en
202    data_bank(w).io.r.req.bits.apply(setIdx = io.r.addr)
203  }
204  XSPerfAccumulate("part_data_read_counter", PopCount(Cat(data_bank.map(_.io.r.req.valid))))
205
206  io.r.data := data_bank.map(_.io.r.resp.data(0))
207
208  def dump_r() = {
209    XSDebug(RegNext(io.r.en),
210      "bank read addr %x data %x\n",
211      RegEnable(io.r.addr, io.r.en),
212      io.r.data.asUInt
213    )
214  }
215
216  def dump_w() = {
217    XSDebug(io.w.en,
218      "bank write addr %x way_en %x data %x\n",
219      io.w.addr,
220      io.w.way_en,
221      io.w.data
222    )
223  }
224
225  def dump() = {
226    dump_w()
227    dump_r()
228  }
229}
230
231case object HasDataEccParam
232
233//                     Banked DCache Data
234// -----------------------------------------------------------------
235// | Bank0 | Bank1 | Bank2 | Bank3 | Bank4 | Bank5 | Bank6 | Bank7 |
236// -----------------------------------------------------------------
237// | Way0  | Way0  | Way0  | Way0  | Way0  | Way0  | Way0  | Way0  |
238// | Way1  | Way1  | Way1  | Way1  | Way1  | Way1  | Way1  | Way1  |
239// | ....  | ....  | ....  | ....  | ....  | ....  | ....  | ....  |
240// -----------------------------------------------------------------
241abstract class AbstractBankedDataArray(implicit p: Parameters) extends DCacheModule
242{
243  val DataEccParam = if(EnableDataEcc) Some(HasDataEccParam) else None
244  val ReadlinePortErrorIndex = LoadPipelineWidth
245  val io = IO(new DCacheBundle {
246    // load pipeline read word req
247    val read = Vec(LoadPipelineWidth, Flipped(DecoupledIO(new L1BankedDataReadReqWithMask)))
248    val is128Req = Input(Vec(LoadPipelineWidth, Bool()))
249    // main pipeline read / write line req
250    val readline_intend = Input(Bool())
251    val readline = Flipped(DecoupledIO(new L1BankedDataReadLineReq))
252    val write = Flipped(DecoupledIO(new L1BankedDataWriteReq))
253    val write_dup = Vec(DCacheBanks, Flipped(Decoupled(new L1BankedDataWriteReqCtrl)))
254    // data for readline and loadpipe
255    val readline_resp = Output(Vec(DCacheBanks, new L1BankedDataReadResult()))
256    val readline_error_delayed = Output(Bool())
257    val read_resp          = Output(Vec(LoadPipelineWidth, Vec(VLEN/DCacheSRAMRowBits, new L1BankedDataReadResult())))
258    val read_error_delayed = Output(Vec(LoadPipelineWidth,Vec(VLEN/DCacheSRAMRowBits, Bool())))
259    // val nacks = Output(Vec(LoadPipelineWidth, Bool()))
260    // val errors = Output(Vec(LoadPipelineWidth + 1, ValidIO(new L1CacheErrorInfo))) // read ports + readline port
261    // when bank_conflict, read (1) port should be ignored
262    val bank_conflict_slow = Output(Vec(LoadPipelineWidth, Bool()))
263    val disable_ld_fast_wakeup = Output(Vec(LoadPipelineWidth, Bool()))
264    val pseudo_error = Flipped(DecoupledIO(Vec(DCacheBanks, new CtrlUnitSignalingBundle)))
265  })
266
267  // bank (0, 1, 2, 3) each way use duplicate addr
268  def DuplicatedQueryBankSeq = Seq(0, 1, 2, 3)
269
270  def pipeMap[T <: Data](f: Int => T) = VecInit((0 until LoadPipelineWidth).map(f))
271
272  def getECCFromEncWord(encWord: UInt) = {
273    if (EnableDataEcc) {
274      require(encWord.getWidth == encDataBits, s"encDataBits=$encDataBits != encDataBits=$encDataBits!")
275      encWord(encDataBits-1, DCacheSRAMRowBits)
276    } else {
277      0.U
278    }
279  }
280
281  def getDataFromEncWord(encWord: UInt) = {
282    encWord(DCacheSRAMRowBits-1, 0)
283  }
284
285  def asECCData(ecc: UInt, data: UInt) = {
286    if (EnableDataEcc) {
287      Cat(ecc, data)
288    } else {
289      data
290    }
291  }
292
293  def dumpRead = {
294    (0 until LoadPipelineWidth) map { w =>
295      XSDebug(io.read(w).valid,
296        s"DataArray Read channel: $w valid way_en: %x addr: %x\n",
297        io.read(w).bits.way_en, io.read(w).bits.addr)
298    }
299    XSDebug(io.readline.valid,
300      s"DataArray Read Line, valid way_en: %x addr: %x rmask %x\n",
301      io.readline.bits.way_en, io.readline.bits.addr, io.readline.bits.rmask)
302  }
303
304  def dumpWrite = {
305    XSDebug(io.write.valid,
306      s"DataArray Write valid way_en: %x addr: %x\n",
307      io.write.bits.way_en, io.write.bits.addr)
308
309    (0 until DCacheBanks) map { r =>
310      XSDebug(io.write.valid,
311        s"cycle: $r data: %x wmask: %x\n",
312        io.write.bits.data(r), io.write.bits.wmask(r))
313    }
314  }
315
316  def dumpResp = {
317    XSDebug(s"DataArray ReadeResp channel:\n")
318    (0 until LoadPipelineWidth) map { r =>
319      XSDebug(s"cycle: $r data: %x\n", Mux(io.is128Req(r),
320        Cat(io.read_resp(r)(1).raw_data,io.read_resp(r)(0).raw_data),
321        io.read_resp(r)(0).raw_data))
322    }
323  }
324
325  def dump() = {
326    dumpRead
327    dumpWrite
328    dumpResp
329  }
330
331  def selcetOldestPort(valid: Seq[Bool], bits: Seq[LqPtr], index: Seq[UInt]):((Bool, LqPtr), UInt) = {
332    require(valid.length == bits.length &&  bits.length == index.length, s"length must eq, valid:${valid.length}, bits:${bits.length}, index:${index.length}")
333    ParallelOperation(valid zip bits zip index,
334      (a: ((Bool, LqPtr), UInt), b: ((Bool, LqPtr), UInt)) => {
335        val au = a._1._2
336        val bu = b._1._2
337        val aValid = a._1._1
338        val bValid = b._1._1
339        val bSel = au > bu
340        val bits = Mux(
341          aValid && bValid,
342          Mux(bSel, b._1._2, a._1._2),
343          Mux(aValid && !bValid, a._1._2, b._1._2)
344        )
345        val idx = Mux(
346          aValid && bValid,
347          Mux(bSel, b._2, a._2),
348          Mux(aValid && !bValid, a._2, b._2)
349        )
350        ((aValid || bValid, bits), idx)
351      }
352    )
353  }
354
355}
356
357// the smallest access unit is sram
358class SramedDataArray(implicit p: Parameters) extends AbstractBankedDataArray {
359  println("  DCacheType: SramedDataArray")
360  val ReduceReadlineConflict = false
361
362  io.write.ready := true.B
363  io.write_dup.foreach(_.ready := true.B)
364
365  val data_banks = List.tabulate(DCacheSetDiv)( k => {
366    val banks = List.tabulate(DCacheBanks)(i => List.tabulate(DCacheWays)(j => Module(new DataSRAM(i,j))))
367    val mbistPl = MbistPipeline.PlaceMbistPipeline(1, s"MbistPipeDataSet$k", hasMbist)
368    banks
369  })
370  data_banks.map(_.map(_.map(_.dump())))
371
372  val way_en = Wire(Vec(LoadPipelineWidth, io.read(0).bits.way_en.cloneType))
373  val set_addrs = Wire(Vec(LoadPipelineWidth, UInt()))
374  val div_addrs = Wire(Vec(LoadPipelineWidth, UInt()))
375  val bank_addrs = Wire(Vec(LoadPipelineWidth, Vec(VLEN/DCacheSRAMRowBits, UInt())))
376
377  val line_set_addr = addr_to_dcache_div_set(io.readline.bits.addr)
378  val line_div_addr = addr_to_dcache_div(io.readline.bits.addr)
379  // when WPU is enabled, line_way_en is all enabled when read data
380  val line_way_en = Fill(DCacheWays, 1.U) // val line_way_en = io.readline.bits.way_en
381  val line_way_en_reg = RegEnable(io.readline.bits.way_en, 0.U(DCacheWays.W),io.readline.valid)
382
383  val write_bank_mask_reg = RegEnable(io.write.bits.wmask, 0.U(DCacheBanks.W), io.write.valid)
384  val write_data_reg = RegEnable(io.write.bits.data, io.write.valid)
385  val write_valid_reg = RegNext(io.write.valid)
386  val write_valid_dup_reg = io.write_dup.map(x => RegNext(x.valid))
387  val write_wayen_dup_reg = io.write_dup.map(x => RegEnable(x.bits.way_en, 0.U(DCacheWays.W), x.valid))
388  val write_set_addr_dup_reg = io.write_dup.map(x => RegEnable(addr_to_dcache_div_set(x.bits.addr), x.valid))
389  val write_div_addr_dup_reg = io.write_dup.map(x => RegEnable(addr_to_dcache_div(x.bits.addr), x.valid))
390
391  // read data_banks and ecc_banks
392  // for single port SRAM, do not allow read and write in the same cycle
393  val rrhazard = false.B // io.readline.valid
394  (0 until LoadPipelineWidth).map(rport_index => {
395    div_addrs(rport_index) := addr_to_dcache_div(io.read(rport_index).bits.addr)
396    set_addrs(rport_index) := addr_to_dcache_div_set(io.read(rport_index).bits.addr)
397    bank_addrs(rport_index)(0) := addr_to_dcache_bank(io.read(rport_index).bits.addr)
398    bank_addrs(rport_index)(1) := bank_addrs(rport_index)(0) + 1.U
399
400    // use way_en to select a way after data read out
401    assert(!(RegNext(io.read(rport_index).fire && PopCount(io.read(rport_index).bits.way_en) > 1.U)))
402    way_en(rport_index) := io.read(rport_index).bits.way_en
403  })
404
405  // read conflict
406  val rr_bank_conflict = Seq.tabulate(LoadPipelineWidth)(x => Seq.tabulate(LoadPipelineWidth)(y => {
407    if (x == y) {
408      false.B
409    } else {
410      io.read(x).valid && io.read(y).valid &&
411        div_addrs(x) === div_addrs(y) &&
412        (io.read(x).bits.bankMask & io.read(y).bits.bankMask) =/= 0.U &&
413        io.read(x).bits.way_en === io.read(y).bits.way_en &&
414        set_addrs(x) =/= set_addrs(y)
415    }
416  }))
417  val load_req_with_bank_conflict = rr_bank_conflict.map(_.reduce(_ || _))
418  val load_req_valid = io.read.map(_.valid)
419  val load_req_lqIdx = io.read.map(_.bits.lqIdx)
420  val load_req_index = (0 until LoadPipelineWidth).map(_.asUInt)
421
422
423  val load_req_bank_conflict_selcet = selcetOldestPort(load_req_with_bank_conflict, load_req_lqIdx, load_req_index)
424  val load_req_bank_select_port  = UIntToOH(load_req_bank_conflict_selcet._2).asBools
425
426  val rr_bank_conflict_oldest = (0 until LoadPipelineWidth).map(i =>
427    !load_req_bank_select_port(i) && load_req_with_bank_conflict(i)
428  )
429
430  val rrl_bank_conflict = Wire(Vec(LoadPipelineWidth, Bool()))
431  val rrl_bank_conflict_intend = Wire(Vec(LoadPipelineWidth, Bool()))
432  (0 until LoadPipelineWidth).foreach { i =>
433    val judge = if (ReduceReadlineConflict) io.read(i).valid && (io.readline.bits.rmask & io.read(i).bits.bankMask) =/= 0.U && line_div_addr === div_addrs(i) && line_set_addr =/= set_addrs(i)
434                else io.read(i).valid && line_div_addr === div_addrs(i) && line_set_addr =/= set_addrs(i)
435    rrl_bank_conflict(i) := judge && io.readline.valid
436    rrl_bank_conflict_intend(i) := judge && io.readline_intend
437  }
438  val wr_bank_conflict = Seq.tabulate(LoadPipelineWidth)(x =>
439    io.read(x).valid && write_valid_reg &&
440    div_addrs(x) === write_div_addr_dup_reg.head &&
441    way_en(x) === write_wayen_dup_reg.head &&
442    (write_bank_mask_reg(bank_addrs(x)(0)) || write_bank_mask_reg(bank_addrs(x)(1)) && io.is128Req(x))
443  )
444  val wrl_bank_conflict = io.readline.valid && write_valid_reg && line_div_addr === write_div_addr_dup_reg.head
445  // ready
446  io.readline.ready := !(wrl_bank_conflict)
447  io.read.zipWithIndex.map { case (x, i) => x.ready := !(wr_bank_conflict(i) || rrhazard) }
448
449  val perf_multi_read = PopCount(io.read.map(_.valid)) >= 2.U
450  val bank_conflict_fast = Wire(Vec(LoadPipelineWidth, Bool()))
451  (0 until LoadPipelineWidth).foreach(i => {
452    bank_conflict_fast(i) := wr_bank_conflict(i) || rrl_bank_conflict(i) ||
453    rr_bank_conflict_oldest(i)
454    io.bank_conflict_slow(i) := RegNext(bank_conflict_fast(i))
455    io.disable_ld_fast_wakeup(i) := wr_bank_conflict(i) || rrl_bank_conflict_intend(i) ||
456      (if (i == 0) 0.B else (0 until i).map(rr_bank_conflict(_)(i)).reduce(_ || _))
457  })
458  XSPerfAccumulate("data_array_multi_read", perf_multi_read)
459  (1 until LoadPipelineWidth).foreach(y => (0 until y).foreach(x =>
460    XSPerfAccumulate(s"data_array_rr_bank_conflict_${x}_${y}", rr_bank_conflict(x)(y))
461  ))
462  (0 until LoadPipelineWidth).foreach(i => {
463    XSPerfAccumulate(s"data_array_rrl_bank_conflict_${i}", rrl_bank_conflict(i))
464    XSPerfAccumulate(s"data_array_rw_bank_conflict_${i}", wr_bank_conflict(i))
465    XSPerfAccumulate(s"data_array_read_${i}", io.read(i).valid)
466  })
467  XSPerfAccumulate("data_array_access_total", PopCount(io.read.map(_.valid)))
468  XSPerfAccumulate("data_array_read_line", io.readline.valid)
469  XSPerfAccumulate("data_array_write", io.write.valid)
470
471  val read_result = Wire(Vec(DCacheSetDiv, Vec(DCacheBanks, Vec(DCacheWays,new L1BankedDataReadResult()))))
472  val read_result_delayed = Wire(Vec(DCacheSetDiv, Vec(DCacheBanks, Vec(DCacheWays,new L1BankedDataReadResult()))))
473  val read_error_delayed_result = Wire(Vec(DCacheSetDiv, Vec(DCacheBanks, Vec(DCacheWays, Bool()))))
474  dontTouch(read_result)
475  dontTouch(read_error_delayed_result)
476
477  val pseudo_data_toggle_mask = io.pseudo_error.bits.map {
478    case bank =>
479      Mux(io.pseudo_error.valid && bank.valid, bank.mask, 0.U)
480  }
481  val readline_hit = io.readline.fire &&
482                     (io.readline.bits.rmask & VecInit(io.pseudo_error.bits.map(_.valid)).asUInt).orR
483  val readbank_hit = io.read.zip(bank_addrs.zip(io.is128Req)).zipWithIndex.map {
484                          case ((read, (bank_addr, is128Req)), i) =>
485                            val error_bank0 = io.pseudo_error.bits(bank_addr(0))
486                            val error_bank1 = io.pseudo_error.bits(bank_addr(1))
487                            read.fire && (error_bank0.valid || error_bank1.valid && is128Req) && !io.bank_conflict_slow(i)
488                      }.reduce(_|_)
489  io.pseudo_error.ready := RegNext(readline_hit || readbank_hit)
490
491  for (div_index <- 0 until DCacheSetDiv){
492    for (bank_index <- 0 until DCacheBanks) {
493      for (way_index <- 0 until DCacheWays) {
494        //     Set Addr & Read Way Mask
495        //
496        //    Pipe 0   ....  Pipe (n-1)
497        //      +      ....     +
498        //      |      ....     |
499        // +----+---------------+-----+
500        //  X                        X
501        //   X                      +------+ Bank Addr Match
502        //    +---------+----------+
503        //              |
504        //     +--------+--------+
505        //     |    Data Bank    |
506        //     +-----------------+
507        val loadpipe_en = WireInit(VecInit(List.tabulate(LoadPipelineWidth)(i => {
508          io.read(i).valid && div_addrs(i) === div_index.U && (bank_addrs(i)(0) === bank_index.U || bank_addrs(i)(1) === bank_index.U && io.is128Req(i)) &&
509          way_en(i)(way_index) &&
510          !rr_bank_conflict_oldest(i)
511        })))
512        val readline_en = Wire(Bool())
513        if (ReduceReadlineConflict) {
514          readline_en := io.readline.valid && io.readline.bits.rmask(bank_index) && line_way_en(way_index) && div_index.U === line_div_addr
515        } else {
516          readline_en := io.readline.valid && line_way_en(way_index) && div_index.U === line_div_addr
517        }
518        val sram_set_addr = Mux(readline_en,
519          addr_to_dcache_div_set(io.readline.bits.addr),
520          PriorityMux(Seq.tabulate(LoadPipelineWidth)(i => loadpipe_en(i) -> set_addrs(i)))
521        )
522        val read_en = loadpipe_en.asUInt.orR || readline_en
523        // read raw data
524        val data_bank = data_banks(div_index)(bank_index)(way_index)
525        data_bank.io.r.en := read_en
526        data_bank.io.r.addr := sram_set_addr
527
528        read_result(div_index)(bank_index)(way_index).ecc := getECCFromEncWord(data_bank.io.r.data)
529        read_result(div_index)(bank_index)(way_index).raw_data := getDataFromEncWord(data_bank.io.r.data) ^ pseudo_data_toggle_mask(bank_index)
530
531        if (EnableDataEcc) {
532          val ecc_data = read_result(div_index)(bank_index)(way_index).asECCData()
533          val ecc_data_delayed = RegEnable(ecc_data, RegNext(read_en))
534          read_result(div_index)(bank_index)(way_index).error_delayed := dcacheParameters.dataCode.decode(ecc_data_delayed).error
535          read_error_delayed_result(div_index)(bank_index)(way_index) := read_result(div_index)(bank_index)(way_index).error_delayed
536        } else {
537          read_result(div_index)(bank_index)(way_index).error_delayed := false.B
538          read_error_delayed_result(div_index)(bank_index)(way_index) := false.B
539        }
540
541        read_result_delayed(div_index)(bank_index)(way_index) := RegEnable(read_result(div_index)(bank_index)(way_index), RegNext(read_en))
542      }
543    }
544  }
545
546  val data_read_oh = WireInit(VecInit(Seq.fill(DCacheSetDiv * DCacheBanks * DCacheWays)(0.U(1.W))))
547  for(div_index <- 0 until DCacheSetDiv){
548    for (bank_index <- 0 until DCacheBanks) {
549      for (way_index <- 0 until DCacheWays) {
550        data_read_oh(div_index *  DCacheBanks * DCacheWays + bank_index * DCacheWays + way_index) := data_banks(div_index)(bank_index)(way_index).io.r.en
551      }
552    }
553  }
554  XSPerfAccumulate("data_read_counter", PopCount(Cat(data_read_oh)))
555
556  // read result: expose banked read result
557  // TODO: clock gate
558  (0 until LoadPipelineWidth).map(i => {
559    // io.read_resp(i) := read_result(RegNext(bank_addrs(i)))(RegNext(OHToUInt(way_en(i))))
560    val r_read_fire = RegNext(io.read(i).fire)
561    val r_div_addr  = RegEnable(div_addrs(i), io.read(i).fire)
562    val r_bank_addr = RegEnable(bank_addrs(i), io.read(i).fire)
563    val r_way_addr  = RegNext(OHToUInt(way_en(i)))
564    val rr_read_fire = RegNext(RegNext(io.read(i).fire))
565    val rr_div_addr = RegEnable(RegEnable(div_addrs(i), io.read(i).fire), r_read_fire)
566    val rr_bank_addr = RegEnable(RegEnable(bank_addrs(i), io.read(i).fire), r_read_fire)
567    val rr_way_addr = RegEnable(RegEnable(OHToUInt(way_en(i)), io.read(i).fire), r_read_fire)
568    (0 until VLEN/DCacheSRAMRowBits).map( j =>{
569      io.read_resp(i)(j) := read_result(r_div_addr)(r_bank_addr(j))(r_way_addr)
570      // error detection
571      // normal read ports
572      io.read_error_delayed(i)(j) := rr_read_fire && read_error_delayed_result(rr_div_addr)(rr_bank_addr(j))(rr_way_addr) && !RegNext(io.bank_conflict_slow(i))
573    })
574  })
575
576  // readline port
577  val readline_error_delayed = Wire(Vec(DCacheBanks, Bool()))
578  val readline_r_way_addr = RegEnable(OHToUInt(io.readline.bits.way_en), io.readline.valid)
579  val readline_rr_way_addr = RegEnable(readline_r_way_addr, RegNext(io.readline.valid))
580  val readline_r_div_addr = RegEnable(line_div_addr, io.readline.valid)
581  val readline_rr_div_addr = RegEnable(readline_r_div_addr, RegNext(io.readline.valid))
582  (0 until DCacheBanks).map(i => {
583    io.readline_resp(i) := read_result(readline_r_div_addr)(i)(readline_r_way_addr)
584    readline_error_delayed(i) := read_result(readline_rr_div_addr)(i)(readline_rr_way_addr).error_delayed
585  })
586  io.readline_error_delayed := RegNext(RegNext(io.readline.fire)) && readline_error_delayed.asUInt.orR
587
588  // write data_banks & ecc_banks
589  for (div_index <- 0 until DCacheSetDiv) {
590    for (bank_index <- 0 until DCacheBanks) {
591      for (way_index <- 0 until DCacheWays) {
592        // data write
593        val wen_reg = write_bank_mask_reg(bank_index) &&
594          write_valid_dup_reg(bank_index) &&
595          write_div_addr_dup_reg(bank_index) === div_index.U &&
596          write_wayen_dup_reg(bank_index)(way_index)
597        val write_ecc_reg = RegEnable(getECCFromEncWord(cacheParams.dataCode.encode(io.write.bits.data(bank_index))), io.write.valid)
598        val data_bank = data_banks(div_index)(bank_index)(way_index)
599        data_bank.io.w.en := wen_reg
600        data_bank.io.w.addr := write_set_addr_dup_reg(bank_index)
601        data_bank.io.w.data := asECCData(write_ecc_reg, write_data_reg(bank_index))
602      }
603    }
604  }
605
606  val tableName =  "BankConflict" + p(XSCoreParamsKey).HartId.toString
607  val siteName = "BankedDataArray" + p(XSCoreParamsKey).HartId.toString
608  val bankConflictTable = ChiselDB.createTable(tableName, new BankConflictDB)
609  val bankConflictData = Wire(new BankConflictDB)
610  for (i <- 0 until LoadPipelineWidth) {
611    bankConflictData.set_index(i) := set_addrs(i)
612    bankConflictData.addr(i) := io.read(i).bits.addr
613  }
614
615  // FIXME: rr_bank_conflict(0)(1) no generalization
616  when(rr_bank_conflict(0)(1)) {
617    (0 until (VLEN/DCacheSRAMRowBits)).map(i => {
618      bankConflictData.bank_index(i) := bank_addrs(0)(i)
619    })
620    bankConflictData.way_index  := OHToUInt(way_en(0))
621    bankConflictData.fake_rr_bank_conflict := set_addrs(0) === set_addrs(1) && div_addrs(0) === div_addrs(1)
622  }.otherwise {
623    (0 until (VLEN/DCacheSRAMRowBits)).map(i => {
624      bankConflictData.bank_index(i) := 0.U
625    })
626    bankConflictData.way_index := 0.U
627    bankConflictData.fake_rr_bank_conflict := false.B
628  }
629
630  val isWriteBankConflictTable = Constantin.createRecord(s"isWriteBankConflictTable${p(XSCoreParamsKey).HartId}")
631  bankConflictTable.log(
632    data = bankConflictData,
633    en = isWriteBankConflictTable.orR && rr_bank_conflict(0)(1),
634    site = siteName,
635    clock = clock,
636    reset = reset
637  )
638
639  (1 until LoadPipelineWidth).foreach(y => (0 until y).foreach(x =>
640    XSPerfAccumulate(s"data_array_fake_rr_bank_conflict_${x}_${y}", rr_bank_conflict(x)(y) && set_addrs(x)===set_addrs(y) && div_addrs(x) === div_addrs(y))
641  ))
642
643  if (backendParams.debugEn){
644    load_req_with_bank_conflict.map(dontTouch(_))
645    dontTouch(read_result)
646    dontTouch(read_error_delayed_result)
647  }
648}
649
650// the smallest access unit is bank
651class BankedDataArray(implicit p: Parameters) extends AbstractBankedDataArray {
652  println("  DCacheType: BankedDataArray")
653  val ReduceReadlineConflict = false
654
655  io.write.ready := true.B
656  io.write_dup.foreach(_.ready := true.B)
657
658  val data_banks = List.tabulate(DCacheSetDiv) { k =>
659    val banks = List.tabulate(DCacheBanks)(i => Module(new DataSRAMBank(i)))
660    val mbistPl = MbistPipeline.PlaceMbistPipeline(1, s"MbistPipeDcacheDataSet$k", hasMbist)
661    banks
662  }
663  data_banks.map(_.map(_.dump()))
664
665  val way_en = Wire(Vec(LoadPipelineWidth, io.read(0).bits.way_en.cloneType))
666  val set_addrs = Wire(Vec(LoadPipelineWidth, UInt()))
667  val set_addrs_dup = Wire(Vec(LoadPipelineWidth, UInt()))
668  val div_addrs = Wire(Vec(LoadPipelineWidth, UInt()))
669  val div_addrs_dup = Wire(Vec(LoadPipelineWidth, UInt()))
670  val bank_addrs = Wire(Vec(LoadPipelineWidth, Vec(VLEN/DCacheSRAMRowBits, UInt())))
671  val bank_addrs_dup = Wire(Vec(LoadPipelineWidth, Vec(VLEN/DCacheSRAMRowBits, UInt())))
672  val way_en_reg = Wire(Vec(LoadPipelineWidth, io.read(0).bits.way_en.cloneType))
673  val set_addrs_reg = Wire(Vec(LoadPipelineWidth, UInt()))
674  val set_addrs_dup_reg = Wire(Vec(LoadPipelineWidth, UInt()))
675
676  val line_set_addr = addr_to_dcache_div_set(io.readline.bits.addr)
677  val line_div_addr = addr_to_dcache_div(io.readline.bits.addr)
678  val line_way_en = io.readline.bits.way_en
679
680  val write_bank_mask_reg = RegEnable(io.write.bits.wmask, io.write.valid)
681  val write_data_reg = RegEnable(io.write.bits.data, io.write.valid)
682  val write_valid_reg = RegNext(io.write.valid)
683  val write_valid_dup_reg = io.write_dup.map(x => RegNext(x.valid))
684  val write_wayen_dup_reg = io.write_dup.map(x => RegEnable(x.bits.way_en, x.valid))
685  val write_set_addr_dup_reg = io.write_dup.map(x => RegEnable(addr_to_dcache_div_set(x.bits.addr), x.valid))
686  val write_div_addr_dup_reg = io.write_dup.map(x => RegEnable(addr_to_dcache_div(x.bits.addr), x.valid))
687
688  // read data_banks and ecc_banks
689  // for single port SRAM, do not allow read and write in the same cycle
690  val rwhazard = RegNext(io.write.valid)
691  val rrhazard = false.B // io.readline.valid
692  (0 until LoadPipelineWidth).map(rport_index => {
693    div_addrs(rport_index) := addr_to_dcache_div(io.read(rport_index).bits.addr)
694    div_addrs_dup(rport_index) := addr_to_dcache_div(io.read(rport_index).bits.addr_dup)
695    bank_addrs(rport_index)(0) := addr_to_dcache_bank(io.read(rport_index).bits.addr)
696    bank_addrs(rport_index)(1) := Mux(io.is128Req(rport_index), bank_addrs(rport_index)(0) + 1.U, bank_addrs(rport_index)(0))
697    bank_addrs_dup(rport_index)(0) := addr_to_dcache_bank(io.read(rport_index).bits.addr_dup)
698    bank_addrs_dup(rport_index)(1) := Mux(io.is128Req(rport_index), bank_addrs_dup(rport_index)(0) + 1.U, bank_addrs_dup(rport_index)(0))
699    set_addrs(rport_index) := addr_to_dcache_div_set(io.read(rport_index).bits.addr)
700    set_addrs_dup(rport_index) := addr_to_dcache_div_set(io.read(rport_index).bits.addr_dup)
701    set_addrs_reg(rport_index) := RegEnable(addr_to_dcache_div_set(io.read(rport_index).bits.addr), io.read(rport_index).valid)
702    set_addrs_dup_reg(rport_index) := RegEnable(addr_to_dcache_div_set(io.read(rport_index).bits.addr_dup), io.read(rport_index).valid)
703
704    // use way_en to select a way after data read out
705    assert(!(RegNext(io.read(rport_index).fire && PopCount(io.read(rport_index).bits.way_en) > 1.U)))
706    way_en(rport_index) := io.read(rport_index).bits.way_en
707    way_en_reg(rport_index) := RegEnable(io.read(rport_index).bits.way_en, io.read(rport_index).valid)
708  })
709
710  // read each bank, get bank result
711  val rr_bank_conflict = Seq.tabulate(LoadPipelineWidth)(x => Seq.tabulate(LoadPipelineWidth)(y => {
712    if (x == y) {
713      false.B
714    } else {
715      io.read(x).valid && io.read(y).valid &&
716      div_addrs(x) === div_addrs(y) &&
717      (io.read(x).bits.bankMask & io.read(y).bits.bankMask) =/= 0.U &&
718      set_addrs(x) =/= set_addrs(y)
719    }
720  }
721  ))
722
723  val load_req_with_bank_conflict = rr_bank_conflict.map(_.reduce(_ || _))
724  val load_req_valid = io.read.map(_.valid)
725  val load_req_lqIdx = io.read.map(_.bits.lqIdx)
726  val load_req_index = (0 until LoadPipelineWidth).map(_.asUInt)
727
728  val load_req_bank_conflict_selcet = selcetOldestPort(load_req_with_bank_conflict, load_req_lqIdx, load_req_index)
729  val load_req_bank_select_port  = UIntToOH(load_req_bank_conflict_selcet._2).asBools
730
731  val rr_bank_conflict_oldest = (0 until LoadPipelineWidth).map(i =>
732    !load_req_bank_select_port(i) && load_req_with_bank_conflict(i)
733  )
734
735  val rrl_bank_conflict = Wire(Vec(LoadPipelineWidth, Bool()))
736  val rrl_bank_conflict_intend = Wire(Vec(LoadPipelineWidth, Bool()))
737  (0 until LoadPipelineWidth).foreach { i =>
738    val judge = if (ReduceReadlineConflict) io.read(i).valid && (io.readline.bits.rmask & io.read(i).bits.bankMask) =/= 0.U && div_addrs(i) === line_div_addr
739                else io.read(i).valid && div_addrs(i)===line_div_addr
740    rrl_bank_conflict(i) := judge && io.readline.valid
741    rrl_bank_conflict_intend(i) := judge && io.readline_intend
742  }
743  val wr_bank_conflict = Seq.tabulate(LoadPipelineWidth)(x =>
744    io.read(x).valid &&
745    write_valid_reg &&
746    div_addrs(x) === write_div_addr_dup_reg.head &&
747    (write_bank_mask_reg(bank_addrs(x)(0)) || write_bank_mask_reg(bank_addrs(x)(1)) && io.is128Req(x))
748  )
749  val wrl_bank_conflict = io.readline.valid && write_valid_reg && line_div_addr === write_div_addr_dup_reg.head
750  // ready
751  io.readline.ready := !(wrl_bank_conflict)
752  io.read.zipWithIndex.map{case(x, i) => x.ready := !(wr_bank_conflict(i) || rrhazard)}
753
754  val perf_multi_read = PopCount(io.read.map(_.valid)) >= 2.U
755  (0 until LoadPipelineWidth).foreach(i => {
756    // remove fake rr_bank_conflict situation in s2
757    val real_other_bank_conflict_reg = RegNext(wr_bank_conflict(i) || rrl_bank_conflict(i))
758    val real_rr_bank_conflict_reg = RegNext(rr_bank_conflict_oldest(i))
759    io.bank_conflict_slow(i) := real_other_bank_conflict_reg || real_rr_bank_conflict_reg
760
761    // get result in s1
762    io.disable_ld_fast_wakeup(i) := wr_bank_conflict(i) || rrl_bank_conflict_intend(i) ||
763      (if (i == 0) 0.B else (0 until i).map(rr_bank_conflict(_)(i)).reduce(_ || _))
764  })
765  XSPerfAccumulate("data_array_multi_read", perf_multi_read)
766  (1 until LoadPipelineWidth).foreach(y => (0 until y).foreach(x =>
767    XSPerfAccumulate(s"data_array_rr_bank_conflict_${x}_${y}", rr_bank_conflict(x)(y))
768  ))
769  (0 until LoadPipelineWidth).foreach(i => {
770    XSPerfAccumulate(s"data_array_rrl_bank_conflict_${i}", rrl_bank_conflict(i))
771    XSPerfAccumulate(s"data_array_rw_bank_conflict_${i}", wr_bank_conflict(i))
772    XSPerfAccumulate(s"data_array_read_${i}", io.read(i).valid)
773  })
774  XSPerfAccumulate("data_array_access_total", PopCount(io.read.map(_.valid)))
775  XSPerfAccumulate("data_array_read_line", io.readline.valid)
776  XSPerfAccumulate("data_array_write", io.write.valid)
777
778  val bank_result = Wire(Vec(DCacheSetDiv, Vec(DCacheBanks, Vec(DCacheWays, new L1BankedDataReadResult()))))
779  val bank_result_delayed = Wire(Vec(DCacheSetDiv, Vec(DCacheBanks, Vec(DCacheWays, new L1BankedDataReadResult()))))
780  val read_bank_error_delayed = Wire(Vec(DCacheSetDiv, Vec(DCacheBanks, Vec(DCacheWays, Bool()))))
781
782  val pseudo_data_toggle_mask = io.pseudo_error.bits.map {
783    case bank =>
784      Mux(io.pseudo_error.valid && bank.valid, bank.mask, 0.U)
785  }
786  val readline_hit = io.readline.fire &&
787                     (io.readline.bits.rmask & VecInit(io.pseudo_error.bits.map(_.valid)).asUInt).orR
788  val readbank_hit = io.read.zip(bank_addrs.zip(io.is128Req)).zipWithIndex.map {
789                          case ((read, (bank_addr, is128Req)), i) =>
790                            val error_bank0 = io.pseudo_error.bits(bank_addr(0))
791                            val error_bank1 = io.pseudo_error.bits(bank_addr(1))
792                            read.fire && (error_bank0.valid || error_bank1.valid && is128Req) && !io.bank_conflict_slow(i)
793                      }.reduce(_|_)
794  io.pseudo_error.ready := RegNext(readline_hit || readbank_hit)
795
796  for (div_index <- 0 until DCacheSetDiv) {
797    for (bank_index <- 0 until DCacheBanks) {
798      //     Set Addr & Read Way Mask
799      //
800      //    Pipe 0   ....  Pipe (n-1)
801      //      +      ....     +
802      //      |      ....     |
803      // +----+---------------+-----+
804      //  X                        X
805      //   X                      +------+ Bank Addr Match
806      //    +---------+----------+
807      //              |
808      //     +--------+--------+
809      //     |    Data Bank    |
810      //     +-----------------+
811      val bank_addr_matchs = WireInit(VecInit(List.tabulate(LoadPipelineWidth)(i => {
812        io.read(i).valid && div_addrs(i) === div_index.U && (bank_addrs(i)(0) === bank_index.U || bank_addrs(i)(1) === bank_index.U && io.is128Req(i)) &&
813          !rr_bank_conflict_oldest(i)
814      })))
815      val bank_addr_matchs_dup = WireInit(VecInit(List.tabulate(LoadPipelineWidth)(i => {
816        io.read(i).valid && div_addrs_dup(i) === div_index.U && (bank_addrs_dup(i)(0) === bank_index.U || bank_addrs_dup(i)(1) === bank_index.U && io.is128Req(i)) &&
817          !rr_bank_conflict_oldest(i)
818      })))
819      val readline_match = Wire(Bool())
820      if (ReduceReadlineConflict) {
821        readline_match := io.readline.valid && io.readline.bits.rmask(bank_index) && line_div_addr === div_index.U
822      } else {
823        readline_match := io.readline.valid && line_div_addr === div_index.U
824      }
825
826      val bank_set_addr = Mux(readline_match,
827        line_set_addr,
828        PriorityMux(Seq.tabulate(LoadPipelineWidth)(i => bank_addr_matchs(i) -> set_addrs(i)))
829      )
830      val bank_set_addr_dup = Mux(readline_match,
831        line_set_addr,
832        PriorityMux(Seq.tabulate(LoadPipelineWidth)(i => bank_addr_matchs_dup(i) -> set_addrs_dup(i)))
833      )
834      val read_enable = bank_addr_matchs.asUInt.orR || readline_match
835
836      // read raw data
837      val data_bank = data_banks(div_index)(bank_index)
838      data_bank.io.r.en := read_enable
839
840      if (DuplicatedQueryBankSeq.contains(bank_index)) {
841        data_bank.io.r.addr := bank_set_addr_dup
842      } else {
843        data_bank.io.r.addr := bank_set_addr
844      }
845      for (way_index <- 0 until DCacheWays) {
846        bank_result(div_index)(bank_index)(way_index).ecc := getECCFromEncWord(data_bank.io.r.data(way_index))
847        bank_result(div_index)(bank_index)(way_index).raw_data := getDataFromEncWord(data_bank.io.r.data(way_index)) ^ pseudo_data_toggle_mask(bank_index)
848
849        if (EnableDataEcc) {
850          val ecc_data = bank_result(div_index)(bank_index)(way_index).asECCData()
851          val ecc_data_delayed = RegEnable(ecc_data, RegNext(read_enable))
852          bank_result(div_index)(bank_index)(way_index).error_delayed := dcacheParameters.dataCode.decode(ecc_data_delayed).error
853          read_bank_error_delayed(div_index)(bank_index)(way_index) := bank_result(div_index)(bank_index)(way_index).error_delayed
854        } else {
855          bank_result(div_index)(bank_index)(way_index).error_delayed := false.B
856          read_bank_error_delayed(div_index)(bank_index)(way_index) := false.B
857        }
858        bank_result_delayed(div_index)(bank_index)(way_index) := RegEnable(bank_result(div_index)(bank_index)(way_index), RegNext(read_enable))
859      }
860    }
861  }
862
863  val data_read_oh = WireInit(VecInit(Seq.fill(DCacheSetDiv)(0.U(XLEN.W))))
864  for (div_index <- 0 until DCacheSetDiv){
865    val temp = WireInit(VecInit(Seq.fill(DCacheBanks)(0.U(XLEN.W))))
866    for (bank_index <- 0 until DCacheBanks) {
867      temp(bank_index) := PopCount(Fill(DCacheWays, data_banks(div_index)(bank_index).io.r.en.asUInt))
868    }
869    data_read_oh(div_index) := temp.reduce(_ + _)
870  }
871  XSPerfAccumulate("data_read_counter", data_read_oh.foldLeft(0.U)(_ + _))
872
873  (0 until LoadPipelineWidth).map(i => {
874    // 1 cycle after read fire(load s2)
875    val r_read_fire = RegNext(io.read(i).fire)
876    val r_div_addr = RegEnable(div_addrs(i), io.read(i).fire)
877    val r_bank_addr = RegEnable(bank_addrs(i), io.read(i).fire)
878    val r_way_addr = RegEnable(OHToUInt(way_en(i)), io.read(i).fire)
879    // 2 cycles after read fire(load s3)
880    val rr_read_fire = RegNext(r_read_fire)
881    val rr_div_addr = RegEnable(RegEnable(div_addrs(i), io.read(i).fire), r_read_fire)
882    val rr_bank_addr = RegEnable(RegEnable(bank_addrs(i), io.read(i).fire), r_read_fire)
883    val rr_way_addr = RegEnable(RegEnable(OHToUInt(way_en(i)), io.read(i).fire), r_read_fire)
884    (0 until VLEN/DCacheSRAMRowBits).map( j =>{
885      io.read_resp(i)(j)          := bank_result(r_div_addr)(r_bank_addr(j))(r_way_addr)
886      // error detection
887      io.read_error_delayed(i)(j) := rr_read_fire && read_bank_error_delayed(rr_div_addr)(rr_bank_addr(j))(rr_way_addr) && !RegNext(io.bank_conflict_slow(i))
888    })
889  })
890
891  // read result: expose banked read result
892  val readline_error_delayed = Wire(Vec(DCacheBanks, Bool()))
893  val readline_r_way_addr = RegEnable(OHToUInt(io.readline.bits.way_en), io.readline.valid)
894  val readline_rr_way_addr = RegEnable(readline_r_way_addr, RegNext(io.readline.valid))
895  val readline_r_div_addr = RegEnable(line_div_addr, io.readline.valid)
896  val readline_rr_div_addr = RegEnable(readline_r_div_addr, RegNext(io.readline.valid))
897  (0 until DCacheBanks).map(i => {
898    io.readline_resp(i) := bank_result(readline_r_div_addr)(i)(readline_r_way_addr)
899    readline_error_delayed(i) := bank_result(readline_rr_div_addr)(i)(readline_rr_way_addr).error_delayed
900  })
901  io.readline_error_delayed := RegNext(RegNext(io.readline.fire)) && readline_error_delayed.asUInt.orR
902
903  // write data_banks & ecc_banks
904  for (div_index <- 0 until DCacheSetDiv) {
905    for (bank_index <- 0 until DCacheBanks) {
906      // data write
907      val wen_reg = write_bank_mask_reg(bank_index) &&
908        write_valid_dup_reg(bank_index) &&
909        write_div_addr_dup_reg(bank_index) === div_index.U && RegNext(io.write.valid)
910      val write_ecc_reg = RegEnable(getECCFromEncWord(cacheParams.dataCode.encode(io.write.bits.data(bank_index))), io.write.valid)
911      val data_bank = data_banks(div_index)(bank_index)
912      data_bank.io.w.en := wen_reg
913      data_bank.io.w.way_en := write_wayen_dup_reg(bank_index)
914      data_bank.io.w.addr := write_set_addr_dup_reg(bank_index)
915      data_bank.io.w.data := asECCData(write_ecc_reg, write_data_reg(bank_index))
916    }
917  }
918
919  val tableName = "BankConflict" + p(XSCoreParamsKey).HartId.toString
920  val siteName = "BankedDataArray" + p(XSCoreParamsKey).HartId.toString
921  val bankConflictTable = ChiselDB.createTable(tableName, new BankConflictDB)
922  val bankConflictData = Wire(new BankConflictDB)
923  for (i <- 0 until LoadPipelineWidth) {
924    bankConflictData.set_index(i) := set_addrs(i)
925    bankConflictData.addr(i) := io.read(i).bits.addr
926  }
927
928  // FIXME: rr_bank_conflict(0)(1) no generalization
929  when(rr_bank_conflict(0)(1)) {
930    (0 until (VLEN/DCacheSRAMRowBits)).map(i => {
931      bankConflictData.bank_index(i) := bank_addrs(0)(i)
932    })
933    bankConflictData.way_index := OHToUInt(way_en(0))
934    bankConflictData.fake_rr_bank_conflict := set_addrs(0) === set_addrs(1) && div_addrs(0) === div_addrs(1)
935  }.otherwise {
936    (0 until (VLEN/DCacheSRAMRowBits)).map(i => {
937      bankConflictData.bank_index(i) := 0.U
938    })
939    bankConflictData.way_index := 0.U
940    bankConflictData.fake_rr_bank_conflict := false.B
941  }
942
943  val isWriteBankConflictTable = Constantin.createRecord(s"isWriteBankConflictTable${p(XSCoreParamsKey).HartId}")
944  bankConflictTable.log(
945    data = bankConflictData,
946    en = isWriteBankConflictTable.orR && rr_bank_conflict(0)(1),
947    site = siteName,
948    clock = clock,
949    reset = reset
950  )
951
952  (1 until LoadPipelineWidth).foreach(y => (0 until y).foreach(x =>
953    XSPerfAccumulate(s"data_array_fake_rr_bank_conflict_${x}_${y}", rr_bank_conflict(x)(y) && set_addrs(x) === set_addrs(y) && div_addrs(x) === div_addrs(y))
954  ))
955
956  if (backendParams.debugEn){
957    load_req_with_bank_conflict.map(dontTouch(_))
958    dontTouch(bank_result)
959    dontTouch(read_bank_error_delayed)
960  }
961}
962