xref: /XiangShan/src/main/scala/xiangshan/cache/dcache/data/BankedDataArray.scala (revision 887862dbb8debde8ab099befc426493834a69ee7)
1/***************************************************************************************
2* Copyright (c) 2024 Beijing Institute of Open Source Chip (BOSC)
3* Copyright (c) 2020-2024 Institute of Computing Technology, Chinese Academy of Sciences
4* Copyright (c) 2020-2021 Peng Cheng Laboratory
5*
6* XiangShan is licensed under Mulan PSL v2.
7* You can use this software according to the terms and conditions of the Mulan PSL v2.
8* You may obtain a copy of Mulan PSL v2 at:
9*          http://license.coscl.org.cn/MulanPSL2
10*
11* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
12* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
13* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
14*
15* See the Mulan PSL v2 for more details.
16***************************************************************************************/
17
18package xiangshan.cache
19
20import org.chipsalliance.cde.config.Parameters
21import chisel3._
22import utils._
23import utility._
24import chisel3.util._
25import freechips.rocketchip.tilelink.{ClientMetadata, TLClientParameters, TLEdgeOut}
26import xiangshan.{L1CacheErrorInfo, XSCoreParamsKey}
27
28import scala.math.max
29
30class BankConflictDB(implicit p: Parameters) extends DCacheBundle{
31  val addr = Vec(LoadPipelineWidth, Bits(PAddrBits.W))
32  val set_index = Vec(LoadPipelineWidth, UInt((DCacheAboveIndexOffset - DCacheSetOffset).W))
33  val bank_index = Vec(VLEN/DCacheSRAMRowBits, UInt((DCacheSetOffset - DCacheBankOffset).W))
34  val way_index = UInt(wayBits.W)
35  val fake_rr_bank_conflict = Bool()
36}
37
38class L1BankedDataReadReq(implicit p: Parameters) extends DCacheBundle
39{
40  val way_en = Bits(DCacheWays.W)
41  val addr = Bits(PAddrBits.W)
42}
43
44class L1BankedDataReadReqWithMask(implicit p: Parameters) extends DCacheBundle
45{
46  val way_en = Bits(DCacheWays.W)
47  val addr = Bits(PAddrBits.W)
48  val bankMask = Bits(DCacheBanks.W)
49}
50
51class L1BankedDataReadLineReq(implicit p: Parameters) extends L1BankedDataReadReq
52{
53  val rmask = Bits(DCacheBanks.W)
54}
55
56// Now, we can write a cache-block in a single cycle
57class L1BankedDataWriteReq(implicit p: Parameters) extends L1BankedDataReadReq
58{
59  val wmask = Bits(DCacheBanks.W)
60  val data = Vec(DCacheBanks, Bits(DCacheSRAMRowBits.W))
61}
62
63// cache-block write request without data
64class L1BankedDataWriteReqCtrl(implicit p: Parameters) extends L1BankedDataReadReq
65
66class L1BankedDataReadResult(implicit p: Parameters) extends DCacheBundle
67{
68  // you can choose which bank to read to save power
69  val ecc = Bits(eccBits.W)
70  val raw_data = Bits(DCacheSRAMRowBits.W)
71  val error_delayed = Bool() // 1 cycle later than data resp
72
73  def asECCData() = {
74    Cat(ecc, raw_data)
75  }
76}
77
78class DataSRAMBankWriteReq(implicit p: Parameters) extends DCacheBundle {
79  val en = Bool()
80  val addr = UInt()
81  val way_en = UInt(DCacheWays.W)
82  val data = UInt(DCacheSRAMRowBits.W)
83}
84
85// wrap a sram
86class DataSRAM(bankIdx: Int, wayIdx: Int)(implicit p: Parameters) extends DCacheModule {
87  val io = IO(new Bundle() {
88    val w = new Bundle() {
89      val en = Input(Bool())
90      val addr = Input(UInt())
91      val data = Input(UInt(DCacheSRAMRowBits.W))
92    }
93
94    val r = new Bundle() {
95      val en = Input(Bool())
96      val addr = Input(UInt())
97      val data = Output(UInt(DCacheSRAMRowBits.W))
98    }
99  })
100
101  // data sram
102  val data_sram = Module(new SRAMTemplate(
103    Bits(DCacheSRAMRowBits.W),
104    set = DCacheSets / DCacheSetDiv,
105    way = 1,
106    shouldReset = false,
107    holdRead = false,
108    singlePort = true
109  ))
110
111  data_sram.io.w.req.valid := io.w.en
112  data_sram.io.w.req.bits.apply(
113    setIdx = io.w.addr,
114    data = io.w.data,
115    waymask = 1.U
116  )
117  data_sram.io.r.req.valid := io.r.en
118  data_sram.io.r.req.bits.apply(setIdx = io.r.addr)
119  io.r.data := data_sram.io.r.resp.data(0)
120  XSPerfAccumulate("part_data_read_counter", data_sram.io.r.req.valid)
121
122  def dump_r() = {
123    when(RegNext(io.r.en)) {
124      XSDebug("bank read set %x bank %x way %x data %x\n",
125        RegEnable(io.r.addr, io.r.en),
126        bankIdx.U,
127        wayIdx.U,
128        io.r.data
129      )
130    }
131  }
132
133  def dump_w() = {
134    when(io.w.en) {
135      XSDebug("bank write set %x bank %x way %x data %x\n",
136        io.w.addr,
137        bankIdx.U,
138        wayIdx.U,
139        io.w.data
140      )
141    }
142  }
143
144  def dump() = {
145    dump_w()
146    dump_r()
147  }
148}
149
150// wrap data rows of 8 ways
151class DataSRAMBank(index: Int)(implicit p: Parameters) extends DCacheModule {
152  val io = IO(new Bundle() {
153    val w = Input(new DataSRAMBankWriteReq)
154
155    val r = new Bundle() {
156      val en = Input(Bool())
157      val addr = Input(UInt())
158      val way_en = Input(UInt(DCacheWays.W))
159      val data = Output(UInt(DCacheSRAMRowBits.W))
160    }
161  })
162
163  assert(RegNext(!io.w.en || PopCount(io.w.way_en) <= 1.U))
164  assert(RegNext(!io.r.en || PopCount(io.r.way_en) <= 1.U))
165
166  val r_way_en_reg = RegEnable(io.r.way_en, io.r.en)
167
168  // external controls do not read and write at the same time
169  val w_info = io.w
170  // val rw_bypass = RegNext(io.w.addr === io.r.addr && io.w.way_en === io.r.way_en && io.w.en)
171
172  // multiway data bank
173  val data_bank = Seq.fill(DCacheWays) {
174    Module(new SRAMTemplate(
175      Bits(DCacheSRAMRowBits.W),
176      set = DCacheSets / DCacheSetDiv,
177      way = 1,
178      shouldReset = false,
179      holdRead = false,
180      singlePort = true
181    ))
182  }
183
184  for (w <- 0 until DCacheWays) {
185    val wen = w_info.en && w_info.way_en(w)
186    data_bank(w).io.w.req.valid := wen
187    data_bank(w).io.w.req.bits.apply(
188      setIdx = w_info.addr,
189      data = w_info.data,
190      waymask = 1.U
191    )
192    data_bank(w).io.r.req.valid := io.r.en
193    data_bank(w).io.r.req.bits.apply(setIdx = io.r.addr)
194  }
195  XSPerfAccumulate("part_data_read_counter", PopCount(Cat(data_bank.map(_.io.r.req.valid))))
196
197  val half = nWays / 2
198  val data_read = data_bank.map(_.io.r.resp.data(0))
199  val data_left = Mux1H(r_way_en_reg.tail(half), data_read.take(half))
200  val data_right = Mux1H(r_way_en_reg.head(half), data_read.drop(half))
201
202  val sel_low = r_way_en_reg.tail(half).orR
203  val row_data = Mux(sel_low, data_left, data_right)
204
205  io.r.data := row_data
206
207  def dump_r() = {
208    when(RegNext(io.r.en)) {
209      XSDebug("bank read addr %x way_en %x data %x\n",
210        RegEnable(io.r.addr, io.r.en),
211        RegEnable(io.r.way_en, io.r.en),
212        io.r.data
213      )
214    }
215  }
216
217  def dump_w() = {
218    when(io.w.en) {
219      XSDebug("bank write addr %x way_en %x data %x\n",
220        io.w.addr,
221        io.w.way_en,
222        io.w.data
223      )
224    }
225  }
226
227  def dump() = {
228    dump_w()
229    dump_r()
230  }
231}
232
233case object HasDataEccParam
234
235//                     Banked DCache Data
236// -----------------------------------------------------------------
237// | Bank0 | Bank1 | Bank2 | Bank3 | Bank4 | Bank5 | Bank6 | Bank7 |
238// -----------------------------------------------------------------
239// | Way0  | Way0  | Way0  | Way0  | Way0  | Way0  | Way0  | Way0  |
240// | Way1  | Way1  | Way1  | Way1  | Way1  | Way1  | Way1  | Way1  |
241// | ....  | ....  | ....  | ....  | ....  | ....  | ....  | ....  |
242// -----------------------------------------------------------------
243abstract class AbstractBankedDataArray(implicit p: Parameters) extends DCacheModule
244{
245  val DataEccParam = if(EnableDataEcc) Some(HasDataEccParam) else None
246  val ReadlinePortErrorIndex = LoadPipelineWidth
247  val io = IO(new DCacheBundle {
248    // load pipeline read word req
249    val read = Vec(LoadPipelineWidth, Flipped(DecoupledIO(new L1BankedDataReadReqWithMask)))
250    val is128Req = Input(Vec(LoadPipelineWidth, Bool()))
251    // main pipeline read / write line req
252    val readline_intend = Input(Bool())
253    val readline = Flipped(DecoupledIO(new L1BankedDataReadLineReq))
254    val write = Flipped(DecoupledIO(new L1BankedDataWriteReq))
255    val write_dup = Vec(DCacheBanks, Flipped(Decoupled(new L1BankedDataWriteReqCtrl)))
256    // data for readline and loadpipe
257    val readline_resp = Output(Vec(DCacheBanks, new L1BankedDataReadResult()))
258    val readline_error_delayed = Output(Bool())
259    val read_resp          = Output(Vec(LoadPipelineWidth, Vec(VLEN/DCacheSRAMRowBits, new L1BankedDataReadResult())))
260    val read_error_delayed = Output(Vec(LoadPipelineWidth,Vec(VLEN/DCacheSRAMRowBits, Bool())))
261    // val nacks = Output(Vec(LoadPipelineWidth, Bool()))
262    // val errors = Output(Vec(LoadPipelineWidth + 1, ValidIO(new L1CacheErrorInfo))) // read ports + readline port
263    // when bank_conflict, read (1) port should be ignored
264    val bank_conflict_slow = Output(Vec(LoadPipelineWidth, Bool()))
265    val disable_ld_fast_wakeup = Output(Vec(LoadPipelineWidth, Bool()))
266    // customized cache op port
267    val cacheOp = Flipped(new L1CacheInnerOpIO)
268    val cacheOp_req_dup = Vec(DCacheDupNum, Flipped(Valid(new CacheCtrlReqInfo)))
269    val cacheOp_req_bits_opCode_dup = Input(Vec(DCacheDupNum, UInt(XLEN.W)))
270  })
271
272  def pipeMap[T <: Data](f: Int => T) = VecInit((0 until LoadPipelineWidth).map(f))
273
274  def getECCFromEncWord(encWord: UInt) = {
275    require(encWord.getWidth == encWordBits)
276    encWord(encWordBits - 1, wordBits)
277  }
278
279  def dumpRead = {
280    (0 until LoadPipelineWidth) map { w =>
281      when(io.read(w).valid) {
282        XSDebug(s"DataArray Read channel: $w valid way_en: %x addr: %x\n",
283          io.read(w).bits.way_en, io.read(w).bits.addr)
284      }
285    }
286    when(io.readline.valid) {
287      XSDebug(s"DataArray Read Line, valid way_en: %x addr: %x rmask %x\n",
288        io.readline.bits.way_en, io.readline.bits.addr, io.readline.bits.rmask)
289    }
290  }
291
292  def dumpWrite = {
293    when(io.write.valid) {
294      XSDebug(s"DataArray Write valid way_en: %x addr: %x\n",
295        io.write.bits.way_en, io.write.bits.addr)
296
297      (0 until DCacheBanks) map { r =>
298        XSDebug(s"cycle: $r data: %x wmask: %x\n",
299          io.write.bits.data(r), io.write.bits.wmask(r))
300      }
301    }
302  }
303
304  def dumpResp = {
305    XSDebug(s"DataArray ReadeResp channel:\n")
306    (0 until LoadPipelineWidth) map { r =>
307      XSDebug(s"cycle: $r data: %x\n", Mux(io.is128Req(r),
308        Cat(io.read_resp(r)(1).raw_data,io.read_resp(r)(0).raw_data),
309        io.read_resp(r)(0).raw_data))
310    }
311  }
312
313  def dump() = {
314    dumpRead
315    dumpWrite
316    dumpResp
317  }
318}
319
320// the smallest access unit is sram
321class SramedDataArray(implicit p: Parameters) extends AbstractBankedDataArray {
322  println("  DCacheType: SramedDataArray")
323  val ReduceReadlineConflict = false
324
325  io.write.ready := true.B
326  io.write_dup.foreach(_.ready := true.B)
327
328  val data_banks = List.tabulate(DCacheSetDiv)( k => List.tabulate(DCacheBanks)(i => List.tabulate(DCacheWays)(j => Module(new DataSRAM(i,j)))))
329  // ecc_banks also needs to be changed to two-dimensional to align with data_banks
330  val ecc_banks = DataEccParam.map {
331    case _ =>
332      val ecc = List.tabulate(DCacheSetDiv)( k =>
333        List.tabulate(DCacheWays)(j =>
334          List.tabulate(DCacheBanks)(i =>
335            Module(new SRAMTemplate(
336                Bits(eccBits.W),
337                set = DCacheSets / DCacheSetDiv,
338                way = 1,
339                shouldReset = false,
340                holdRead = false,
341                singlePort = true
342            ))
343      )))
344      ecc
345  }
346
347  data_banks.map(_.map(_.map(_.dump())))
348
349  val way_en = Wire(Vec(LoadPipelineWidth, io.read(0).bits.way_en.cloneType))
350  val set_addrs = Wire(Vec(LoadPipelineWidth, UInt()))
351  val div_addrs = Wire(Vec(LoadPipelineWidth, UInt()))
352  val bank_addrs = Wire(Vec(LoadPipelineWidth, Vec(VLEN/DCacheSRAMRowBits, UInt())))
353
354  val line_set_addr = addr_to_dcache_div_set(io.readline.bits.addr)
355  val line_div_addr = addr_to_dcache_div(io.readline.bits.addr)
356  // when WPU is enabled, line_way_en is all enabled when read data
357  val line_way_en = Fill(DCacheWays, 1.U) // val line_way_en = io.readline.bits.way_en
358  val line_way_en_reg = RegEnable(io.readline.bits.way_en, 0.U(DCacheWays.W),io.readline.valid)
359
360  val write_bank_mask_reg = RegEnable(io.write.bits.wmask, 0.U(DCacheBanks.W), io.write.valid)
361  val write_data_reg = RegEnable(io.write.bits.data, io.write.valid)
362  val write_valid_reg = RegNext(io.write.valid)
363  val write_valid_dup_reg = io.write_dup.map(x => RegNext(x.valid))
364  val write_wayen_dup_reg = io.write_dup.map(x => RegEnable(x.bits.way_en, 0.U(DCacheWays.W), x.valid))
365  val write_set_addr_dup_reg = io.write_dup.map(x => RegEnable(addr_to_dcache_div_set(x.bits.addr), x.valid))
366  val write_div_addr_dup_reg = io.write_dup.map(x => RegEnable(addr_to_dcache_div(x.bits.addr), x.valid))
367
368  // read data_banks and ecc_banks
369  // for single port SRAM, do not allow read and write in the same cycle
370  val rrhazard = false.B // io.readline.valid
371  (0 until LoadPipelineWidth).map(rport_index => {
372    div_addrs(rport_index) := addr_to_dcache_div(io.read(rport_index).bits.addr)
373    set_addrs(rport_index) := addr_to_dcache_div_set(io.read(rport_index).bits.addr)
374    bank_addrs(rport_index)(0) := addr_to_dcache_bank(io.read(rport_index).bits.addr)
375    bank_addrs(rport_index)(1) := bank_addrs(rport_index)(0) + 1.U
376
377    // use way_en to select a way after data read out
378    assert(!(RegNext(io.read(rport_index).fire && PopCount(io.read(rport_index).bits.way_en) > 1.U)))
379    way_en(rport_index) := io.read(rport_index).bits.way_en
380  })
381
382  // read conflict
383  val rr_bank_conflict = Seq.tabulate(LoadPipelineWidth)(x => Seq.tabulate(LoadPipelineWidth)(y =>
384    io.read(x).valid && io.read(y).valid &&
385    div_addrs(x) === div_addrs(y) &&
386    (io.read(x).bits.bankMask & io.read(y).bits.bankMask) =/= 0.U &&
387    io.read(x).bits.way_en === io.read(y).bits.way_en &&
388    set_addrs(x) =/= set_addrs(y)
389  ))
390  val rrl_bank_conflict = Wire(Vec(LoadPipelineWidth, Bool()))
391  val rrl_bank_conflict_intend = Wire(Vec(LoadPipelineWidth, Bool()))
392  (0 until LoadPipelineWidth).foreach { i =>
393    val judge = if (ReduceReadlineConflict) io.read(i).valid && (io.readline.bits.rmask & io.read(i).bits.bankMask) =/= 0.U && line_div_addr === div_addrs(i) && line_set_addr =/= set_addrs(i)
394                else io.read(i).valid && line_div_addr === div_addrs(i) && line_set_addr =/= set_addrs(i)
395    rrl_bank_conflict(i) := judge && io.readline.valid
396    rrl_bank_conflict_intend(i) := judge && io.readline_intend
397  }
398  val wr_bank_conflict = Seq.tabulate(LoadPipelineWidth)(x =>
399    io.read(x).valid && write_valid_reg &&
400    div_addrs(x) === write_div_addr_dup_reg.head &&
401    way_en(x) === write_wayen_dup_reg.head &&
402    (write_bank_mask_reg(bank_addrs(x)(0)) || write_bank_mask_reg(bank_addrs(x)(1)) && io.is128Req(x))
403  )
404  val wrl_bank_conflict = io.readline.valid && write_valid_reg && line_div_addr === write_div_addr_dup_reg.head
405  // ready
406  io.readline.ready := !(wrl_bank_conflict)
407  io.read.zipWithIndex.map { case (x, i) => x.ready := !(wr_bank_conflict(i) || rrhazard) }
408
409  val perf_multi_read = PopCount(io.read.map(_.valid)) >= 2.U
410  val bank_conflict_fast = Wire(Vec(LoadPipelineWidth, Bool()))
411  (0 until LoadPipelineWidth).foreach(i => {
412    bank_conflict_fast(i) := wr_bank_conflict(i) || rrl_bank_conflict(i) ||
413      (if (i == 0) 0.B else (0 until i).map(rr_bank_conflict(_)(i)).reduce(_ || _))
414    io.bank_conflict_slow(i) := RegNext(bank_conflict_fast(i))
415    io.disable_ld_fast_wakeup(i) := wr_bank_conflict(i) || rrl_bank_conflict_intend(i) ||
416      (if (i == 0) 0.B else (0 until i).map(rr_bank_conflict(_)(i)).reduce(_ || _))
417  })
418  XSPerfAccumulate("data_array_multi_read", perf_multi_read)
419  (1 until LoadPipelineWidth).foreach(y => (0 until y).foreach(x =>
420    XSPerfAccumulate(s"data_array_rr_bank_conflict_${x}_${y}", rr_bank_conflict(x)(y))
421  ))
422  (0 until LoadPipelineWidth).foreach(i => {
423    XSPerfAccumulate(s"data_array_rrl_bank_conflict_${i}", rrl_bank_conflict(i))
424    XSPerfAccumulate(s"data_array_rw_bank_conflict_${i}", wr_bank_conflict(i))
425    XSPerfAccumulate(s"data_array_read_${i}", io.read(i).valid)
426  })
427  XSPerfAccumulate("data_array_access_total", PopCount(io.read.map(_.valid)))
428  XSPerfAccumulate("data_array_read_line", io.readline.valid)
429  XSPerfAccumulate("data_array_write", io.write.valid)
430
431  val read_result = Wire(Vec(DCacheSetDiv, Vec(DCacheBanks, Vec(DCacheWays,new L1BankedDataReadResult()))))
432  val read_result_delayed = Wire(Vec(DCacheSetDiv, Vec(DCacheBanks, Vec(DCacheWays,new L1BankedDataReadResult()))))
433  val read_error_delayed_result = Wire(Vec(DCacheSetDiv, Vec(DCacheBanks, Vec(DCacheWays, Bool()))))
434  dontTouch(read_result)
435  dontTouch(read_error_delayed_result)
436  for (div_index <- 0 until DCacheSetDiv){
437    for (bank_index <- 0 until DCacheBanks) {
438      for (way_index <- 0 until DCacheWays) {
439        //     Set Addr & Read Way Mask
440        //
441        //    Pipe 0   ....  Pipe (n-1)
442        //      +      ....     +
443        //      |      ....     |
444        // +----+---------------+-----+
445        //  X                        X
446        //   X                      +------+ Bank Addr Match
447        //    +---------+----------+
448        //              |
449        //     +--------+--------+
450        //     |    Data Bank    |
451        //     +-----------------+
452        val loadpipe_en = WireInit(VecInit(List.tabulate(LoadPipelineWidth)(i => {
453          io.read(i).valid && div_addrs(i) === div_index.U && (bank_addrs(i)(0) === bank_index.U || bank_addrs(i)(1) === bank_index.U && io.is128Req(i)) && way_en(i)(way_index)
454        })))
455        val readline_en = Wire(Bool())
456        if (ReduceReadlineConflict) {
457          readline_en := io.readline.valid && io.readline.bits.rmask(bank_index) && line_way_en(way_index) && div_index.U === line_div_addr
458        } else {
459          readline_en := io.readline.valid && line_way_en(way_index) && div_index.U === line_div_addr
460        }
461        val sram_set_addr = Mux(readline_en,
462          addr_to_dcache_div_set(io.readline.bits.addr),
463          PriorityMux(Seq.tabulate(LoadPipelineWidth)(i => loadpipe_en(i) -> set_addrs(i)))
464        )
465        val read_en = loadpipe_en.asUInt.orR || readline_en
466        // read raw data
467        val data_bank = data_banks(div_index)(bank_index)(way_index)
468        data_bank.io.r.en := read_en
469        data_bank.io.r.addr := sram_set_addr
470        ecc_banks match {
471          case Some(banks) =>
472            val ecc_bank = banks(div_index)(bank_index)(way_index)
473            ecc_bank.io.r.req.valid := read_en
474            ecc_bank.io.r.req.bits.apply(setIdx = sram_set_addr)
475            read_result(div_index)(bank_index)(way_index).ecc := ecc_bank.io.r.resp.data(0)
476          case None =>
477            read_result(div_index)(bank_index)(way_index).ecc := 0.U
478        }
479
480        read_result(div_index)(bank_index)(way_index).raw_data := data_bank.io.r.data
481        read_result_delayed(div_index)(bank_index)(way_index) := RegEnable(read_result(div_index)(bank_index)(way_index), RegNext(read_en))
482
483        // use ECC to check error
484        ecc_banks match {
485          case Some(_) =>
486            val ecc_data = read_result(div_index)(bank_index)(way_index).asECCData()
487            val ecc_data_delayed = RegEnable(ecc_data, RegNext(read_en))
488            read_result(div_index)(bank_index)(way_index).error_delayed := dcacheParameters.dataCode.decode(ecc_data_delayed).error
489            read_error_delayed_result(div_index)(bank_index)(way_index) := read_result(div_index)(bank_index)(way_index).error_delayed
490          case None =>
491            read_result(div_index)(bank_index)(way_index).error_delayed := false.B
492            read_error_delayed_result(div_index)(bank_index)(way_index) := false.B
493        }
494      }
495    }
496  }
497
498  val data_read_oh = WireInit(VecInit(Seq.fill(DCacheSetDiv * DCacheBanks * DCacheWays)(0.U(1.W))))
499  for(div_index <- 0 until DCacheSetDiv){
500    for (bank_index <- 0 until DCacheBanks) {
501      for (way_index <- 0 until DCacheWays) {
502        data_read_oh(div_index *  DCacheBanks * DCacheWays + bank_index * DCacheWays + way_index) := data_banks(div_index)(bank_index)(way_index).io.r.en
503      }
504    }
505  }
506  XSPerfAccumulate("data_read_counter", PopCount(Cat(data_read_oh)))
507
508  // read result: expose banked read result
509  // TODO: clock gate
510  (0 until LoadPipelineWidth).map(i => {
511    // io.read_resp(i) := read_result(RegNext(bank_addrs(i)))(RegNext(OHToUInt(way_en(i))))
512    val r_read_fire = RegNext(io.read(i).fire)
513    val r_div_addr  = RegEnable(div_addrs(i), io.read(i).fire)
514    val r_bank_addr = RegEnable(bank_addrs(i), io.read(i).fire)
515    val r_way_addr  = RegNext(OHToUInt(way_en(i)))
516    val rr_read_fire = RegNext(RegNext(io.read(i).fire))
517    val rr_div_addr = RegEnable(RegEnable(div_addrs(i), io.read(i).fire), r_read_fire)
518    val rr_bank_addr = RegEnable(RegEnable(bank_addrs(i), io.read(i).fire), r_read_fire)
519    val rr_way_addr = RegEnable(RegEnable(OHToUInt(way_en(i)), io.read(i).fire), r_read_fire)
520    (0 until VLEN/DCacheSRAMRowBits).map( j =>{
521      io.read_resp(i)(j) := read_result(r_div_addr)(r_bank_addr(j))(r_way_addr)
522      // error detection
523      // normal read ports
524      io.read_error_delayed(i)(j) := rr_read_fire && read_error_delayed_result(rr_div_addr)(rr_bank_addr(j))(rr_way_addr) && !RegNext(io.bank_conflict_slow(i))
525    })
526  })
527
528  // readline port
529  (0 until DCacheBanks).map(i => {
530    io.readline_resp(i) := read_result(RegEnable(line_div_addr, io.readline.valid))(i)(RegEnable(OHToUInt(io.readline.bits.way_en),io.readline.valid))
531  })
532  io.readline_error_delayed := RegNext(RegNext(io.readline.fire)) &&
533    VecInit((0 until DCacheBanks).map(i => io.readline_resp(i).error_delayed)).asUInt.orR
534
535  // write data_banks & ecc_banks
536  for (div_index <- 0 until DCacheSetDiv) {
537    for (bank_index <- 0 until DCacheBanks) {
538      for (way_index <- 0 until DCacheWays) {
539        // data write
540        val wen_reg = write_bank_mask_reg(bank_index) &&
541          write_valid_dup_reg(bank_index) &&
542          write_div_addr_dup_reg(bank_index) === div_index.U &&
543          write_wayen_dup_reg(bank_index)(way_index)
544        val data_bank = data_banks(div_index)(bank_index)(way_index)
545        data_bank.io.w.en := wen_reg
546
547        data_bank.io.w.addr := write_set_addr_dup_reg(bank_index)
548        data_bank.io.w.data := write_data_reg(bank_index)
549        // ecc write
550        ecc_banks match {
551          case Some(banks) =>
552            val ecc_bank = banks(div_index)(bank_index)(way_index)
553            ecc_bank.io.w.req.valid := wen_reg
554            ecc_bank.io.w.req.bits.apply(
555              setIdx = write_set_addr_dup_reg(bank_index),
556              data = RegEnable(getECCFromEncWord(cacheParams.dataCode.encode((io.write.bits.data(bank_index)))), io.write.valid),
557              waymask = 1.U
558            )
559            when(ecc_bank.io.w.req.valid) {
560              XSDebug("write in ecc sram: bank %x set %x data %x waymask %x\n",
561                bank_index.U,
562                addr_to_dcache_div_set(io.write.bits.addr),
563                getECCFromEncWord(cacheParams.dataCode.encode((io.write.bits.data(bank_index)))),
564                io.write.bits.way_en
565              )
566            }
567          case None => None
568        }
569      }
570    }
571  }
572
573  require(nWays <= 32)
574  io.cacheOp.resp.bits := DontCare
575  val cacheOpShouldResp = WireInit(false.B)
576  val eccReadResult = Wire(Vec(DCacheBanks, UInt(eccBits.W)))
577  // DCacheDupNum is 16
578  // vec: the dupIdx for every bank and every group
579  val rdata_dup_vec = Seq(0,0,1,1,2,2,3,3)
580  val rdataEcc_dup_vec = Seq(4,4,5,5,6,6,7,7)
581  val wdata_dup_vec = Seq(8,8,9,9,10,10,11,11)
582  val wdataEcc_dup_vec = Seq(12,12,13,13,14,14,15,15)
583  val cacheOpDivAddr = set_to_dcache_div(io.cacheOp.req.bits.index)
584  val cacheOpSetAddr = set_to_dcache_div_set(io.cacheOp.req.bits.index)
585  val cacheOpWayNum = io.cacheOp.req.bits.wayNum(4, 0)
586  rdata_dup_vec.zipWithIndex.map{ case(dupIdx, bankIdx) =>
587    for (divIdx <- 0 until DCacheSetDiv){
588      for (wayIdx <- 0 until DCacheWays) {
589        when(io.cacheOp_req_dup(dupIdx).valid && CacheInstrucion.isReadData(io.cacheOp_req_bits_opCode_dup(dupIdx))) {
590          val data_bank = data_banks(divIdx)(bankIdx)(wayIdx)
591          data_bank.io.r.en := UIntToOH(io.cacheOp.req.bits.wayNum(4, 0))(wayIdx) && cacheOpDivAddr === divIdx.U
592          data_bank.io.r.addr := cacheOpSetAddr
593          cacheOpShouldResp := true.B
594        }
595      }
596    }
597  }
598  rdataEcc_dup_vec.zipWithIndex.map{ case(dupIdx, bankIdx) =>
599    for (divIdx <- 0 until DCacheSetDiv) {
600      for (wayIdx <- 0 until DCacheWays) {
601        when(io.cacheOp_req_dup(dupIdx).valid && CacheInstrucion.isReadDataECC(io.cacheOp_req_bits_opCode_dup(dupIdx))) {
602          ecc_banks match {
603            case Some(banks) =>
604              val ecc_bank = banks(divIdx)(bankIdx)(wayIdx)
605              ecc_bank.io.r.req.valid := true.B
606              ecc_bank.io.r.req.bits.setIdx := cacheOpSetAddr
607              cacheOpShouldResp := true.B
608            case None =>
609              cacheOpShouldResp := true.B
610          }
611        }
612      }
613    }
614  }
615  wdata_dup_vec.zipWithIndex.map{ case(dupIdx, bankIdx) =>
616    for (divIdx <- 0 until DCacheSetDiv) {
617      for (wayIdx <- 0 until DCacheWays) {
618        when(io.cacheOp_req_dup(dupIdx).valid && CacheInstrucion.isWriteData(io.cacheOp_req_bits_opCode_dup(dupIdx))) {
619          val data_bank = data_banks(divIdx)(bankIdx)(wayIdx)
620          data_bank.io.w.en := UIntToOH(io.cacheOp.req.bits.wayNum(4, 0))(wayIdx) && cacheOpDivAddr === divIdx.U
621          data_bank.io.w.addr := cacheOpSetAddr
622          data_bank.io.w.data := io.cacheOp.req.bits.write_data_vec(bankIdx)
623          cacheOpShouldResp := true.B
624        }
625      }
626    }
627  }
628  wdataEcc_dup_vec.zipWithIndex.map{ case(dupIdx, bankIdx) =>
629    for (divIdx <- 0 until DCacheSetDiv) {
630      for (wayIdx <- 0 until DCacheWays) {
631        when(io.cacheOp_req_dup(dupIdx).valid && CacheInstrucion.isWriteDataECC(io.cacheOp_req_bits_opCode_dup(dupIdx))) {
632          ecc_banks match {
633            case Some(banks) =>
634              val ecc_bank = banks(divIdx)(bankIdx)(wayIdx)
635              ecc_bank.io.w.req.valid := UIntToOH(io.cacheOp.req.bits.wayNum(4, 0))(wayIdx) && cacheOpDivAddr === divIdx.U
636              ecc_bank.io.w.req.bits.apply(
637                setIdx = cacheOpSetAddr,
638                data = io.cacheOp.req.bits.write_data_ecc,
639                waymask = 1.U
640              )
641              cacheOpShouldResp := true.B
642            case None =>
643              cacheOpShouldResp := true.B
644          }
645        }
646      }
647    }
648  }
649  io.cacheOp.resp.valid := RegNext(io.cacheOp.req.valid && cacheOpShouldResp)
650  for (bank_index <- 0 until DCacheBanks) {
651    val cacheOpDivAddrReg = RegEnable(cacheOpDivAddr, io.cacheOp.req.valid)
652    val cacheOpWayNumDivAddrReg = RegEnable(cacheOpWayNum, io.cacheOp.req.valid)
653    io.cacheOp.resp.bits.read_data_vec(bank_index) := read_result(cacheOpDivAddrReg)(bank_index)(cacheOpWayNumDivAddrReg).raw_data
654    eccReadResult(bank_index) := read_result(cacheOpDivAddrReg)(bank_index)(cacheOpWayNumDivAddrReg).ecc
655  }
656
657  io.cacheOp.resp.bits.read_data_ecc := Mux(io.cacheOp.resp.valid,
658    eccReadResult(RegEnable(io.cacheOp.req.bits.bank_num, io.cacheOp.req.valid)),
659    0.U
660  )
661
662  val tableName =  "BankConflict" + p(XSCoreParamsKey).HartId.toString
663  val siteName = "BankedDataArray" + p(XSCoreParamsKey).HartId.toString
664  val bankConflictTable = ChiselDB.createTable(tableName, new BankConflictDB)
665  val bankConflictData = Wire(new BankConflictDB)
666  for (i <- 0 until LoadPipelineWidth) {
667    bankConflictData.set_index(i) := set_addrs(i)
668    bankConflictData.addr(i) := io.read(i).bits.addr
669  }
670
671  // FIXME: rr_bank_conflict(0)(1) no generalization
672  when(rr_bank_conflict(0)(1)) {
673    (0 until (VLEN/DCacheSRAMRowBits)).map(i => {
674      bankConflictData.bank_index(i) := bank_addrs(0)(i)
675    })
676    bankConflictData.way_index  := OHToUInt(way_en(0))
677    bankConflictData.fake_rr_bank_conflict := set_addrs(0) === set_addrs(1) && div_addrs(0) === div_addrs(1)
678  }.otherwise {
679    (0 until (VLEN/DCacheSRAMRowBits)).map(i => {
680      bankConflictData.bank_index(i) := 0.U
681    })
682    bankConflictData.way_index := 0.U
683    bankConflictData.fake_rr_bank_conflict := false.B
684  }
685
686  val isWriteBankConflictTable = Constantin.createRecord(s"isWriteBankConflictTable${p(XSCoreParamsKey).HartId}")
687  bankConflictTable.log(
688    data = bankConflictData,
689    en = isWriteBankConflictTable.orR && rr_bank_conflict(0)(1),
690    site = siteName,
691    clock = clock,
692    reset = reset
693  )
694
695  (1 until LoadPipelineWidth).foreach(y => (0 until y).foreach(x =>
696    XSPerfAccumulate(s"data_array_fake_rr_bank_conflict_${x}_${y}", rr_bank_conflict(x)(y) && set_addrs(x)===set_addrs(y) && div_addrs(x) === div_addrs(y))
697  ))
698
699}
700
701// the smallest access unit is bank
702class BankedDataArray(implicit p: Parameters) extends AbstractBankedDataArray {
703  println("  DCacheType: BankedDataArray")
704  val ReduceReadlineConflict = false
705
706  io.write.ready := true.B
707  io.write_dup.foreach(_.ready := true.B)
708
709  val data_banks = List.fill(DCacheSetDiv)(List.tabulate(DCacheBanks)(i => Module(new DataSRAMBank(i))))
710  val ecc_banks = DataEccParam.map {
711    case _ =>
712      val ecc = List.fill(DCacheSetDiv)(List.fill(DCacheBanks)(
713        Module(new SRAMTemplate(
714          Bits(eccBits.W),
715          set = DCacheSets / DCacheSetDiv,
716          way = DCacheWays,
717          shouldReset = false,
718          holdRead = false,
719          singlePort = true
720        ))
721      ))
722      ecc
723  }
724
725  data_banks.map(_.map(_.dump()))
726
727  val way_en = Wire(Vec(LoadPipelineWidth, io.read(0).bits.way_en.cloneType))
728  val set_addrs = Wire(Vec(LoadPipelineWidth, UInt()))
729  val div_addrs = Wire(Vec(LoadPipelineWidth, UInt()))
730  val bank_addrs = Wire(Vec(LoadPipelineWidth, Vec(VLEN/DCacheSRAMRowBits, UInt())))
731  val way_en_reg = Wire(Vec(LoadPipelineWidth, io.read(0).bits.way_en.cloneType))
732  val set_addrs_reg = Wire(Vec(LoadPipelineWidth, UInt()))
733
734  val line_set_addr = addr_to_dcache_div_set(io.readline.bits.addr)
735  val line_div_addr = addr_to_dcache_div(io.readline.bits.addr)
736  val line_way_en = io.readline.bits.way_en
737
738  val write_bank_mask_reg = RegEnable(io.write.bits.wmask, io.write.valid)
739  val write_data_reg = RegEnable(io.write.bits.data, io.write.valid)
740  val write_valid_reg = RegNext(io.write.valid)
741  val write_valid_dup_reg = io.write_dup.map(x => RegNext(x.valid))
742  val write_wayen_dup_reg = io.write_dup.map(x => RegEnable(x.bits.way_en, x.valid))
743  val write_set_addr_dup_reg = io.write_dup.map(x => RegEnable(addr_to_dcache_div_set(x.bits.addr), x.valid))
744  val write_div_addr_dup_reg = io.write_dup.map(x => RegEnable(addr_to_dcache_div(x.bits.addr), x.valid))
745
746  // read data_banks and ecc_banks
747  // for single port SRAM, do not allow read and write in the same cycle
748  val rwhazard = RegNext(io.write.valid)
749  val rrhazard = false.B // io.readline.valid
750  (0 until LoadPipelineWidth).map(rport_index => {
751    div_addrs(rport_index) := addr_to_dcache_div(io.read(rport_index).bits.addr)
752    bank_addrs(rport_index)(0) := addr_to_dcache_bank(io.read(rport_index).bits.addr)
753    bank_addrs(rport_index)(1) := Mux(io.is128Req(rport_index), bank_addrs(rport_index)(0) + 1.U, DCacheBanks.asUInt)
754    set_addrs(rport_index) := addr_to_dcache_div_set(io.read(rport_index).bits.addr)
755    set_addrs_reg(rport_index) := RegEnable(addr_to_dcache_div_set(io.read(rport_index).bits.addr), io.read(rport_index).valid)
756
757    // use way_en to select a way after data read out
758    assert(!(RegNext(io.read(rport_index).fire && PopCount(io.read(rport_index).bits.way_en) > 1.U)))
759    way_en(rport_index) := io.read(rport_index).bits.way_en
760    way_en_reg(rport_index) := RegEnable(io.read(rport_index).bits.way_en, io.read(rport_index).valid)
761  })
762
763  // read each bank, get bank result
764  val rr_bank_conflict = Seq.tabulate(LoadPipelineWidth)(x => Seq.tabulate(LoadPipelineWidth)(y =>
765    io.read(x).valid && io.read(y).valid &&
766    div_addrs(x) === div_addrs(y) &&
767    (io.read(x).bits.bankMask & io.read(y).bits.bankMask) =/= 0.U
768  ))
769  val rrl_bank_conflict = Wire(Vec(LoadPipelineWidth, Bool()))
770  val rrl_bank_conflict_intend = Wire(Vec(LoadPipelineWidth, Bool()))
771  (0 until LoadPipelineWidth).foreach { i =>
772    val judge = if (ReduceReadlineConflict) io.read(i).valid && (io.readline.bits.rmask & io.read(i).bits.bankMask) =/= 0.U && div_addrs(i) === line_div_addr
773                else io.read(i).valid && div_addrs(i)===line_div_addr
774    rrl_bank_conflict(i) := judge && io.readline.valid
775    rrl_bank_conflict_intend(i) := judge && io.readline_intend
776  }
777  val wr_bank_conflict = Seq.tabulate(LoadPipelineWidth)(x =>
778    io.read(x).valid &&
779    write_valid_reg &&
780    div_addrs(x) === write_div_addr_dup_reg.head &&
781    (write_bank_mask_reg(bank_addrs(x)(0)) || write_bank_mask_reg(bank_addrs(x)(1)) && io.is128Req(x))
782  )
783  val wrl_bank_conflict = io.readline.valid && write_valid_reg && line_div_addr === write_div_addr_dup_reg.head
784  // ready
785  io.readline.ready := !(wrl_bank_conflict)
786  io.read.zipWithIndex.map{case(x, i) => x.ready := !(wr_bank_conflict(i) || rrhazard)}
787
788  val perf_multi_read = PopCount(io.read.map(_.valid)) >= 2.U
789  (0 until LoadPipelineWidth).foreach(i => {
790    // remove fake rr_bank_conflict situation in s2
791    val real_other_bank_conflict_reg = RegNext(wr_bank_conflict(i) || rrl_bank_conflict(i))
792    val real_rr_bank_conflict_reg = (if (i == 0) 0.B else (0 until i).map{ j =>
793      RegNext(rr_bank_conflict(j)(i)) &&
794      (way_en_reg(j) =/= way_en_reg(i) || set_addrs_reg(j) =/= set_addrs_reg(i))
795    }.reduce(_ || _))
796    io.bank_conflict_slow(i) := real_other_bank_conflict_reg || real_rr_bank_conflict_reg
797
798    // get result in s1
799    io.disable_ld_fast_wakeup(i) := wr_bank_conflict(i) || rrl_bank_conflict_intend(i) ||
800      (if (i == 0) 0.B else (0 until i).map(rr_bank_conflict(_)(i)).reduce(_ || _))
801  })
802  XSPerfAccumulate("data_array_multi_read", perf_multi_read)
803  (1 until LoadPipelineWidth).foreach(y => (0 until y).foreach(x =>
804    XSPerfAccumulate(s"data_array_rr_bank_conflict_${x}_${y}", rr_bank_conflict(x)(y))
805  ))
806  (0 until LoadPipelineWidth).foreach(i => {
807    XSPerfAccumulate(s"data_array_rrl_bank_conflict_${i}", rrl_bank_conflict(i))
808    XSPerfAccumulate(s"data_array_rw_bank_conflict_${i}", wr_bank_conflict(i))
809    XSPerfAccumulate(s"data_array_read_${i}", io.read(i).valid)
810  })
811  XSPerfAccumulate("data_array_access_total", PopCount(io.read.map(_.valid)))
812  XSPerfAccumulate("data_array_read_line", io.readline.valid)
813  XSPerfAccumulate("data_array_write", io.write.valid)
814
815  val bank_result = Wire(Vec(DCacheSetDiv, Vec(DCacheBanks, new L1BankedDataReadResult())))
816  val bank_result_delayed = Wire(Vec(DCacheSetDiv, Vec(DCacheBanks, new L1BankedDataReadResult())))
817  val ecc_result = Wire(Vec(DCacheSetDiv, Vec(DCacheBanks, Vec(DCacheWays, UInt(eccBits.W)))))
818  val read_bank_error_delayed = Wire(Vec(DCacheSetDiv, Vec(DCacheBanks, Bool())))
819  dontTouch(bank_result)
820  dontTouch(read_bank_error_delayed)
821  for (div_index <- 0 until DCacheSetDiv) {
822    for (bank_index <- 0 until DCacheBanks) {
823      //     Set Addr & Read Way Mask
824      //
825      //    Pipe 0   ....  Pipe (n-1)
826      //      +      ....     +
827      //      |      ....     |
828      // +----+---------------+-----+
829      //  X                        X
830      //   X                      +------+ Bank Addr Match
831      //    +---------+----------+
832      //              |
833      //     +--------+--------+
834      //     |    Data Bank    |
835      //     +-----------------+
836      val bank_addr_matchs = WireInit(VecInit(List.tabulate(LoadPipelineWidth)(i => {
837        io.read(i).valid && div_addrs(i) === div_index.U && (bank_addrs(i)(0) === bank_index.U || bank_addrs(i)(1) === bank_index.U && io.is128Req(i))
838      })))
839      val readline_match = Wire(Bool())
840      if (ReduceReadlineConflict) {
841        readline_match := io.readline.valid && io.readline.bits.rmask(bank_index) && line_div_addr === div_index.U
842      } else {
843        readline_match := io.readline.valid && line_div_addr === div_index.U
844      }
845      val bank_way_en = Mux(readline_match,
846        io.readline.bits.way_en,
847        PriorityMux(Seq.tabulate(LoadPipelineWidth)(i => bank_addr_matchs(i) -> way_en(i)))
848      )
849      // it is too long of bank_way_en's caculation, so bank_way_en_reg can not be caculated by RegNext(bank_way_en)
850      val bank_way_en_reg = Mux(RegNext(readline_match),
851        RegEnable(io.readline.bits.way_en, io.readline.valid),
852        PriorityMux(Seq.tabulate(LoadPipelineWidth)(i => RegNext(bank_addr_matchs(i)) -> RegNext(way_en(i))))
853      )
854      val bank_set_addr = Mux(readline_match,
855        line_set_addr,
856        PriorityMux(Seq.tabulate(LoadPipelineWidth)(i => bank_addr_matchs(i) -> set_addrs(i)))
857      )
858
859      val read_enable = bank_addr_matchs.asUInt.orR || readline_match
860
861      // read raw data
862      val data_bank = data_banks(div_index)(bank_index)
863      data_bank.io.r.en := read_enable
864      data_bank.io.r.way_en := bank_way_en
865      data_bank.io.r.addr := bank_set_addr
866      bank_result(div_index)(bank_index).raw_data := data_bank.io.r.data
867      bank_result_delayed(div_index)(bank_index) := RegEnable(bank_result(div_index)(bank_index), RegNext(read_enable))
868
869      // read ECC
870      ecc_banks match {
871        case Some(banks) =>
872          val ecc_bank = banks(div_index)(bank_index)
873          ecc_bank.io.r.req.valid := read_enable
874          ecc_bank.io.r.req.bits.apply(setIdx = bank_set_addr)
875          ecc_result(div_index)(bank_index) := ecc_bank.io.r.resp.data
876          bank_result(div_index)(bank_index).ecc := Mux1H(bank_way_en_reg, ecc_bank.io.r.resp.data)
877        case None =>
878          ecc_result(div_index)(bank_index) := DontCare
879          bank_result(div_index)(bank_index).ecc := DontCare
880      }
881
882      // use ECC to check error
883      ecc_banks match {
884        case Some(_) =>
885          val ecc_data = bank_result(div_index)(bank_index).asECCData()
886          val ecc_data_delayed = RegEnable(ecc_data, RegNext(read_enable))
887          bank_result(div_index)(bank_index).error_delayed := dcacheParameters.dataCode.decode(ecc_data_delayed).error
888          read_bank_error_delayed(div_index)(bank_index) := bank_result(div_index)(bank_index).error_delayed
889        case None =>
890          bank_result(div_index)(bank_index).error_delayed := false.B
891          read_bank_error_delayed(div_index)(bank_index) := false.B
892      }
893    }
894  }
895
896  val data_read_oh = WireInit(VecInit(Seq.fill(DCacheSetDiv)(0.U(XLEN.W))))
897  for (div_index <- 0 until DCacheSetDiv){
898    val temp = WireInit(VecInit(Seq.fill(DCacheBanks)(0.U(XLEN.W))))
899    for (bank_index <- 0 until DCacheBanks) {
900      temp(bank_index) := PopCount(Fill(DCacheWays, data_banks(div_index)(bank_index).io.r.en.asUInt))
901    }
902    data_read_oh(div_index) := temp.reduce(_ + _)
903  }
904  XSPerfAccumulate("data_read_counter", data_read_oh.foldLeft(0.U)(_ + _))
905
906  (0 until LoadPipelineWidth).map(i => {
907    val r_read_fire = RegNext(io.read(i).fire)
908    val r_div_addr = RegEnable(div_addrs(i), io.read(i).fire)
909    val r_bank_addr = RegEnable(bank_addrs(i), io.read(i).fire)
910    val rr_read_fire = RegNext(r_read_fire)
911    val rr_div_addr = RegEnable(RegEnable(div_addrs(i), io.read(i).fire), r_read_fire)
912    val rr_bank_addr = RegEnable(RegEnable(bank_addrs(i), io.read(i).fire), r_read_fire)
913    val rr_way_addr = RegEnable(RegEnable(OHToUInt(way_en(i)), io.read(i).fire), r_read_fire)
914    (0 until VLEN/DCacheSRAMRowBits).map( j =>{
915      io.read_resp(i)(j)          := bank_result(r_div_addr)(r_bank_addr(j))
916      // error detection
917      io.read_error_delayed(i)(j) := rr_read_fire && read_bank_error_delayed(rr_div_addr)(rr_bank_addr(j)) && !RegNext(io.bank_conflict_slow(i))
918    })
919  })
920
921  // read result: expose banked read result
922  io.readline_resp := bank_result(RegEnable(line_div_addr, io.readline.valid))
923  io.readline_error_delayed := RegNext(RegNext(io.readline.fire)) &&
924    VecInit((0 until DCacheBanks).map(i => io.readline_resp(i).error_delayed)).asUInt.orR
925
926  // write data_banks & ecc_banks
927  for (div_index <- 0 until DCacheSetDiv) {
928    for (bank_index <- 0 until DCacheBanks) {
929      // data write
930      val wen_reg = write_bank_mask_reg(bank_index) &&
931        write_valid_dup_reg(bank_index) &&
932        write_div_addr_dup_reg(bank_index) === div_index.U && RegNext(io.write.valid)
933      val data_bank = data_banks(div_index)(bank_index)
934      data_bank.io.w.en := wen_reg
935      data_bank.io.w.way_en := write_wayen_dup_reg(bank_index)
936      data_bank.io.w.addr := write_set_addr_dup_reg(bank_index)
937      data_bank.io.w.data := write_data_reg(bank_index)
938
939      // ecc write
940      ecc_banks match {
941        case Some(banks) =>
942          val ecc_bank = banks(div_index)(bank_index)
943          ecc_bank.io.w.req.valid := wen_reg
944          ecc_bank.io.w.req.bits.apply(
945            setIdx = write_set_addr_dup_reg(bank_index),
946            data = RegEnable(getECCFromEncWord(cacheParams.dataCode.encode((io.write.bits.data(bank_index)))), io.write.valid),
947            waymask = write_wayen_dup_reg(bank_index)
948          )
949          when(ecc_bank.io.w.req.valid) {
950            XSDebug("write in ecc sram: bank %x set %x data %x waymask %x\n",
951              bank_index.U,
952              addr_to_dcache_div_set(io.write.bits.addr),
953              getECCFromEncWord(cacheParams.dataCode.encode((io.write.bits.data(bank_index)))),
954              io.write.bits.way_en
955            )
956          }
957        case None => None
958      }
959    }
960  }
961
962  // deal with customized cache op
963  require(nWays <= 32)
964  io.cacheOp.resp.bits := DontCare
965  val cacheOpShouldResp = WireInit(false.B)
966  val eccReadResult = Wire(Vec(DCacheBanks, UInt(eccBits.W)))
967  // DCacheDupNum is 16
968  // vec: the dupIdx for every bank and every group
969  val rdata_dup_vec = Seq(0, 0, 1, 1, 2, 2, 3, 3)
970  val rdataEcc_dup_vec = Seq(4, 4, 5, 5, 6, 6, 7, 7)
971  val wdata_dup_vec = Seq(8, 8, 9, 9, 10, 10, 11, 11)
972  val wdataEcc_dup_vec = Seq(12, 12, 13, 13, 14, 14, 15, 15)
973  val cacheOpDivAddr = set_to_dcache_div(io.cacheOp.req.bits.index)
974  val cacheOpSetAddr = set_to_dcache_div_set(io.cacheOp.req.bits.index)
975  val cacheOpWayMask = UIntToOH(io.cacheOp.req.bits.wayNum(4, 0))
976  rdata_dup_vec.zipWithIndex.map{ case(dupIdx, bankIdx) =>
977    for (divIdx <- 0 until DCacheSetDiv) {
978      when(io.cacheOp_req_dup(dupIdx).valid && CacheInstrucion.isReadData(io.cacheOp_req_bits_opCode_dup(dupIdx))) {
979        val data_bank = data_banks(divIdx)(bankIdx)
980        data_bank.io.r.en := true.B
981        data_bank.io.r.way_en := cacheOpWayMask
982        data_bank.io.r.addr := cacheOpSetAddr
983        cacheOpShouldResp := true.B
984      }
985    }
986  }
987  rdataEcc_dup_vec.zipWithIndex.map{ case(dupIdx, bankIdx) =>
988    for (divIdx <- 0 until DCacheSetDiv) {
989      when(io.cacheOp_req_dup(dupIdx).valid && CacheInstrucion.isReadDataECC(io.cacheOp_req_bits_opCode_dup(dupIdx))) {
990        ecc_banks match {
991          case Some(banks) =>
992            val ecc_bank = banks(divIdx)(bankIdx)
993            ecc_bank.io.r.req.valid := true.B
994            ecc_bank.io.r.req.bits.setIdx := cacheOpSetAddr
995            cacheOpShouldResp := true.B
996          case None =>
997            cacheOpShouldResp := true.B
998        }
999      }
1000    }
1001  }
1002  wdata_dup_vec.zipWithIndex.map{ case(dupIdx, bankIdx) =>
1003    for (divIdx <- 0 until DCacheSetDiv) {
1004      when(io.cacheOp_req_dup(dupIdx).valid && CacheInstrucion.isWriteData(io.cacheOp_req_bits_opCode_dup(dupIdx))) {
1005        val data_bank = data_banks(divIdx)(bankIdx)
1006        data_bank.io.w.en := cacheOpDivAddr === divIdx.U
1007        data_bank.io.w.way_en := cacheOpWayMask
1008        data_bank.io.w.addr := cacheOpSetAddr
1009        data_bank.io.w.data := io.cacheOp.req.bits.write_data_vec(bankIdx)
1010        cacheOpShouldResp := true.B
1011      }
1012    }
1013  }
1014  wdataEcc_dup_vec.zipWithIndex.map{ case(dupIdx, bankIdx) =>
1015    for (divIdx <- 0 until DCacheSetDiv) {
1016      when(io.cacheOp_req_dup(dupIdx).valid && CacheInstrucion.isWriteDataECC(io.cacheOp_req_bits_opCode_dup(dupIdx))) {
1017        ecc_banks match {
1018          case Some(banks) =>
1019            val ecc_bank = banks(divIdx)(bankIdx)
1020            ecc_bank.io.w.req.valid := cacheOpDivAddr === divIdx.U
1021            ecc_bank.io.w.req.bits.apply(
1022              setIdx = cacheOpSetAddr,
1023              data = io.cacheOp.req.bits.write_data_ecc,
1024              waymask = cacheOpWayMask
1025            )
1026            cacheOpShouldResp := true.B
1027          case None =>
1028            cacheOpShouldResp := true.B
1029        }
1030      }
1031    }
1032  }
1033
1034  io.cacheOp.resp.valid := RegNext(io.cacheOp.req.valid && cacheOpShouldResp)
1035  for (bank_index <- 0 until DCacheBanks) {
1036    val cacheOpDivAddrReg = RegEnable(cacheOpDivAddr, io.cacheOp.req.valid)
1037    val cacheOpWayMaskReg = RegEnable(cacheOpWayMask, io.cacheOp.req.valid)
1038    io.cacheOp.resp.bits.read_data_vec(bank_index) := bank_result(cacheOpDivAddrReg)(bank_index).raw_data
1039    eccReadResult(bank_index) := Mux1H(cacheOpWayMaskReg, ecc_result(cacheOpDivAddrReg)(bank_index))
1040  }
1041
1042  io.cacheOp.resp.bits.read_data_ecc := Mux(io.cacheOp.resp.valid,
1043    eccReadResult(RegEnable(io.cacheOp.req.bits.bank_num, io.cacheOp.req.valid)),
1044    0.U
1045  )
1046
1047  val tableName = "BankConflict" + p(XSCoreParamsKey).HartId.toString
1048  val siteName = "BankedDataArray" + p(XSCoreParamsKey).HartId.toString
1049  val bankConflictTable = ChiselDB.createTable(tableName, new BankConflictDB)
1050  val bankConflictData = Wire(new BankConflictDB)
1051  for (i <- 0 until LoadPipelineWidth) {
1052    bankConflictData.set_index(i) := set_addrs(i)
1053    bankConflictData.addr(i) := io.read(i).bits.addr
1054  }
1055
1056  // FIXME: rr_bank_conflict(0)(1) no generalization
1057  when(rr_bank_conflict(0)(1)) {
1058    (0 until (VLEN/DCacheSRAMRowBits)).map(i => {
1059      bankConflictData.bank_index(i) := bank_addrs(0)(i)
1060    })
1061    bankConflictData.way_index := OHToUInt(way_en(0))
1062    bankConflictData.fake_rr_bank_conflict := set_addrs(0) === set_addrs(1) && div_addrs(0) === div_addrs(1)
1063  }.otherwise {
1064    (0 until (VLEN/DCacheSRAMRowBits)).map(i => {
1065      bankConflictData.bank_index(i) := 0.U
1066    })
1067    bankConflictData.way_index := 0.U
1068    bankConflictData.fake_rr_bank_conflict := false.B
1069  }
1070
1071  val isWriteBankConflictTable = Constantin.createRecord(s"isWriteBankConflictTable${p(XSCoreParamsKey).HartId}")
1072  bankConflictTable.log(
1073    data = bankConflictData,
1074    en = isWriteBankConflictTable.orR && rr_bank_conflict(0)(1),
1075    site = siteName,
1076    clock = clock,
1077    reset = reset
1078  )
1079
1080  (1 until LoadPipelineWidth).foreach(y => (0 until y).foreach(x =>
1081    XSPerfAccumulate(s"data_array_fake_rr_bank_conflict_${x}_${y}", rr_bank_conflict(x)(y) && set_addrs(x) === set_addrs(y) && div_addrs(x) === div_addrs(y))
1082  ))
1083
1084}
1085