xref: /XiangShan/src/main/scala/xiangshan/cache/dcache/data/BankedDataArray.scala (revision 92b88f30156d46e844042eea94f7121557fd09a1)
1/***************************************************************************************
2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3* Copyright (c) 2020-2021 Peng Cheng Laboratory
4*
5* XiangShan is licensed under Mulan PSL v2.
6* You can use this software according to the terms and conditions of the Mulan PSL v2.
7* You may obtain a copy of Mulan PSL v2 at:
8*          http://license.coscl.org.cn/MulanPSL2
9*
10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13*
14* See the Mulan PSL v2 for more details.
15***************************************************************************************/
16
17package xiangshan.cache
18
19import chipsalliance.rocketchip.config.Parameters
20import chisel3._
21import utils._
22import utility._
23import chisel3.util._
24import freechips.rocketchip.tilelink.{ClientMetadata, TLClientParameters, TLEdgeOut}
25import xiangshan.{L1CacheErrorInfo, XSCoreParamsKey}
26
27import scala.math.max
28
29class BankConflictDB(implicit p: Parameters) extends DCacheBundle{
30  val addr = Vec(LoadPipelineWidth, Bits(PAddrBits.W))
31  val set_index = Vec(LoadPipelineWidth, UInt((DCacheAboveIndexOffset - DCacheSetOffset).W))
32  val bank_index = UInt((DCacheSetOffset - DCacheBankOffset).W)
33  val way_index = UInt(wayBits.W)
34  val fake_rr_bank_conflict = Bool()
35}
36
37class L1BankedDataReadReq(implicit p: Parameters) extends DCacheBundle
38{
39  val way_en = Bits(DCacheWays.W)
40  val addr = Bits(PAddrBits.W)
41}
42
43class L1BankedDataReadLineReq(implicit p: Parameters) extends L1BankedDataReadReq
44{
45  val rmask = Bits(DCacheBanks.W)
46}
47
48// Now, we can write a cache-block in a single cycle
49class L1BankedDataWriteReq(implicit p: Parameters) extends L1BankedDataReadReq
50{
51  val wmask = Bits(DCacheBanks.W)
52  val data = Vec(DCacheBanks, Bits(DCacheSRAMRowBits.W))
53}
54
55// cache-block write request without data
56class L1BankedDataWriteReqCtrl(implicit p: Parameters) extends L1BankedDataReadReq
57
58class L1BankedDataReadResult(implicit p: Parameters) extends DCacheBundle
59{
60  // you can choose which bank to read to save power
61  val ecc = Bits(eccBits.W)
62  val raw_data = Bits(DCacheSRAMRowBits.W)
63  val error_delayed = Bool() // 1 cycle later than data resp
64
65  def asECCData() = {
66    Cat(ecc, raw_data)
67  }
68}
69
70class DataSRAMBankWriteReq(implicit p: Parameters) extends DCacheBundle {
71  val en = Bool()
72  val addr = UInt()
73  val way_en = UInt(DCacheWays.W)
74  val data = UInt(DCacheSRAMRowBits.W)
75}
76
77// wrap a sram
78class DataSRAM(bankIdx: Int, wayIdx: Int)(implicit p: Parameters) extends DCacheModule {
79  val io = IO(new Bundle() {
80    val w = new Bundle() {
81      val en = Input(Bool())
82      val addr = Input(UInt())
83      val data = Input(UInt(DCacheSRAMRowBits.W))
84    }
85
86    val r = new Bundle() {
87      val en = Input(Bool())
88      val addr = Input(UInt())
89      val data = Output(UInt(DCacheSRAMRowBits.W))
90    }
91  })
92
93  // data sram
94  val data_sram = Module(new SRAMTemplate(
95    Bits(DCacheSRAMRowBits.W),
96    set = DCacheSets,
97    way = 1,
98    shouldReset = false,
99    holdRead = false,
100    singlePort = true
101  ))
102
103  val wenReg = RegNext(io.w.en)
104  val waddrReg = RegNext(io.w.addr)
105  val wdataReg = RegNext(io.w.data)
106  data_sram.io.w.req.valid := wenReg
107  data_sram.io.w.req.bits.apply(
108    setIdx = waddrReg,
109    data = wdataReg,
110    waymask = 1.U
111  )
112  data_sram.io.r.req.valid := io.r.en
113  data_sram.io.r.req.bits.apply(setIdx = io.r.addr)
114  io.r.data := data_sram.io.r.resp.data(0)
115  XSPerfAccumulate("data_sram_read_counter", data_sram.io.r.req.valid)
116
117  def dump_r() = {
118    when(RegNext(io.r.en)) {
119      XSDebug("bank read set %x bank %x way %x data %x\n",
120        RegNext(io.r.addr),
121        bankIdx.U,
122        wayIdx.U,
123        io.r.data
124      )
125    }
126  }
127
128  def dump_w() = {
129    when(io.w.en) {
130      XSDebug("bank write set %x bank %x way %x data %x\n",
131        io.w.addr,
132        bankIdx.U,
133        wayIdx.U,
134        io.w.data
135      )
136    }
137  }
138
139  def dump() = {
140    dump_w()
141    dump_r()
142  }
143}
144
145// wrap data rows of 8 ways
146class DataSRAMBank(index: Int)(implicit p: Parameters) extends DCacheModule {
147  val io = IO(new Bundle() {
148    val w = Input(new DataSRAMBankWriteReq)
149
150    val r = new Bundle() {
151      val en = Input(Bool())
152      val addr = Input(UInt())
153      val way_en = Input(UInt(DCacheWays.W))
154      val data = Output(UInt(DCacheSRAMRowBits.W))
155    }
156  })
157
158  assert(RegNext(!io.w.en || PopCount(io.w.way_en) <= 1.U))
159  assert(RegNext(!io.r.en || PopCount(io.r.way_en) <= 1.U))
160
161  val r_way_en_reg = RegNext(io.r.way_en)
162
163  val w_reg = RegNext(io.w)
164  // val rw_bypass = RegNext(io.w.addr === io.r.addr && io.w.way_en === io.r.way_en && io.w.en)
165
166  // multiway data bank
167  val data_bank = Array.fill(DCacheWays) {
168    Module(new SRAMTemplate(
169      Bits(DCacheSRAMRowBits.W),
170      set = DCacheSets,
171      way = 1,
172      shouldReset = false,
173      holdRead = false,
174      singlePort = true
175    ))
176  }
177
178  for (w <- 0 until DCacheWays) {
179    val wen = w_reg.en && w_reg.way_en(w)
180    data_bank(w).io.w.req.valid := wen
181    data_bank(w).io.w.req.bits.apply(
182      setIdx = w_reg.addr,
183      data = w_reg.data,
184      waymask = 1.U
185    )
186    data_bank(w).io.r.req.valid := io.r.en
187    data_bank(w).io.r.req.bits.apply(setIdx = io.r.addr)
188  }
189  XSPerfAccumulate("data_read_counter", PopCount(Cat(data_bank.map(_.io.r.req.valid))))
190
191  val half = nWays / 2
192  val data_read = data_bank.map(_.io.r.resp.data(0))
193  val data_left = Mux1H(r_way_en_reg.tail(half), data_read.take(half))
194  val data_right = Mux1H(r_way_en_reg.head(half), data_read.drop(half))
195
196  val sel_low = r_way_en_reg.tail(half).orR()
197  val row_data = Mux(sel_low, data_left, data_right)
198
199  io.r.data := row_data
200
201  def dump_r() = {
202    when(RegNext(io.r.en)) {
203      XSDebug("bank read addr %x way_en %x data %x\n",
204        RegNext(io.r.addr),
205        RegNext(io.r.way_en),
206        io.r.data
207      )
208    }
209  }
210
211  def dump_w() = {
212    when(io.w.en) {
213      XSDebug("bank write addr %x way_en %x data %x\n",
214        io.w.addr,
215        io.w.way_en,
216        io.w.data
217      )
218    }
219  }
220
221  def dump() = {
222    dump_w()
223    dump_r()
224  }
225}
226
227//                     Banked DCache Data
228// -----------------------------------------------------------------
229// | Bank0 | Bank1 | Bank2 | Bank3 | Bank4 | Bank5 | Bank6 | Bank7 |
230// -----------------------------------------------------------------
231// | Way0  | Way0  | Way0  | Way0  | Way0  | Way0  | Way0  | Way0  |
232// | Way1  | Way1  | Way1  | Way1  | Way1  | Way1  | Way1  | Way1  |
233// | ....  | ....  | ....  | ....  | ....  | ....  | ....  | ....  |
234// -----------------------------------------------------------------
235abstract class AbstractBankedDataArray(implicit p: Parameters) extends DCacheModule
236{
237  val ReadlinePortErrorIndex = LoadPipelineWidth
238  val io = IO(new DCacheBundle {
239    // load pipeline read word req
240    val read = Vec(LoadPipelineWidth, Flipped(DecoupledIO(new L1BankedDataReadReq)))
241    // main pipeline read / write line req
242    val readline_intend = Input(Bool())
243    val readline = Flipped(DecoupledIO(new L1BankedDataReadLineReq))
244    val write = Flipped(DecoupledIO(new L1BankedDataWriteReq))
245    val write_dup = Vec(DCacheBanks, Flipped(Decoupled(new L1BankedDataWriteReqCtrl)))
246    // data for readline and loadpipe
247    val readline_resp = Output(Vec(DCacheBanks, new L1BankedDataReadResult()))
248    val readline_error_delayed = Output(Bool())
249    val read_resp_delayed = Output(Vec(LoadPipelineWidth, new L1BankedDataReadResult()))
250    val read_error_delayed = Output(Vec(LoadPipelineWidth, Bool()))
251    // val nacks = Output(Vec(LoadPipelineWidth, Bool()))
252    // val errors = Output(Vec(LoadPipelineWidth + 1, new L1CacheErrorInfo)) // read ports + readline port
253    // when bank_conflict, read (1) port should be ignored
254    val bank_conflict_slow = Output(Vec(LoadPipelineWidth, Bool()))
255    val bank_conflict_fast = Output(Vec(LoadPipelineWidth, Bool()))
256    val disable_ld_fast_wakeup = Output(Vec(LoadPipelineWidth, Bool()))
257    // customized cache op port
258    val cacheOp = Flipped(new L1CacheInnerOpIO)
259    val cacheOp_req_dup = Vec(DCacheDupNum, Flipped(Valid(new CacheCtrlReqInfo)))
260    val cacheOp_req_bits_opCode_dup = Input(Vec(DCacheDupNum, UInt(XLEN.W)))
261  })
262
263  def pipeMap[T <: Data](f: Int => T) = VecInit((0 until LoadPipelineWidth).map(f))
264
265  def getECCFromEncWord(encWord: UInt) = {
266    require(encWord.getWidth == encWordBits)
267    encWord(encWordBits - 1, wordBits)
268  }
269
270  def dumpRead() = {
271    (0 until LoadPipelineWidth) map { w =>
272      when(io.read(w).valid) {
273        XSDebug(s"DataArray Read channel: $w valid way_en: %x addr: %x\n",
274          io.read(w).bits.way_en, io.read(w).bits.addr)
275      }
276    }
277    when(io.readline.valid) {
278      XSDebug(s"DataArray Read Line, valid way_en: %x addr: %x rmask %x\n",
279        io.readline.bits.way_en, io.readline.bits.addr, io.readline.bits.rmask)
280    }
281  }
282
283  def dumpWrite() = {
284    when(io.write.valid) {
285      XSDebug(s"DataArray Write valid way_en: %x addr: %x\n",
286        io.write.bits.way_en, io.write.bits.addr)
287
288      (0 until DCacheBanks) map { r =>
289        XSDebug(s"cycle: $r data: %x wmask: %x\n",
290          io.write.bits.data(r), io.write.bits.wmask(r))
291      }
292    }
293  }
294
295  def dumpResp() = {
296    XSDebug(s"DataArray ReadeResp channel:\n")
297    (0 until LoadPipelineWidth) map { r =>
298      XSDebug(s"cycle: $r data: %x\n", io.read_resp_delayed(r).raw_data)
299    }
300  }
301
302  def dump() = {
303    dumpRead
304    dumpWrite
305    dumpResp
306  }
307}
308
309// the smallest access unit is sram
310class SramedDataArray(implicit p: Parameters) extends AbstractBankedDataArray {
311  println("  DCacheType: SramedDataArray")
312  val ReduceReadlineConflict = false
313
314  io.write.ready := true.B
315  io.write_dup.foreach(_.ready := true.B)
316
317  val data_banks = List.tabulate(DCacheBanks)(i => List.tabulate(DCacheWays)(j => Module(new DataSRAM(i,j))))
318  // ecc_banks also needs to be changed to two-dimensional to align with data_banks
319  val ecc_banks = List.tabulate(DCacheBanks)(i => List.tabulate(DCacheWays)(j => Module(new SRAMTemplate(
320    Bits(eccBits.W),
321    set = DCacheSets,
322    way = 1,
323    shouldReset = false,
324    holdRead = false,
325    singlePort = true
326    ))))
327
328  data_banks.map(_.map(_.dump()))
329
330  val way_en = Wire(Vec(LoadPipelineWidth, io.read(0).bits.way_en.cloneType))
331  val set_addrs = Wire(Vec(LoadPipelineWidth, UInt()))
332  val bank_addrs = Wire(Vec(LoadPipelineWidth, UInt()))
333
334  // read data_banks and ecc_banks
335  // for single port SRAM, do not allow read and write in the same cycle
336  val rwhazard = RegNext(io.write.valid)
337  val rrhazard = false.B // io.readline.valid
338  (0 until LoadPipelineWidth).map(rport_index => {
339    set_addrs(rport_index) := addr_to_dcache_set(io.read(rport_index).bits.addr)
340    bank_addrs(rport_index) := addr_to_dcache_bank(io.read(rport_index).bits.addr)
341
342    io.read(rport_index).ready := !(rwhazard || rrhazard)
343
344    // use way_en to select a way after data read out
345    assert(!(RegNext(io.read(rport_index).fire() && PopCount(io.read(rport_index).bits.way_en) > 1.U)))
346    way_en(rport_index) := io.read(rport_index).bits.way_en
347  })
348  io.readline.ready := !(rwhazard)
349
350  // read conflict
351  val rr_bank_conflict = Seq.tabulate(LoadPipelineWidth)(x => Seq.tabulate(LoadPipelineWidth)(y =>
352    bank_addrs(x) === bank_addrs(y) && io.read(x).valid && io.read(y).valid && io.read(x).bits.way_en === io.read(y).bits.way_en && set_addrs(x) =/= set_addrs(y)
353  ))
354  val rrl_bank_conflict = Wire(Vec(LoadPipelineWidth, Bool()))
355  if (ReduceReadlineConflict) {
356    (0 until LoadPipelineWidth).foreach(i => rrl_bank_conflict(i) := io.read(i).valid && io.readline.valid && io.readline.bits.rmask(bank_addrs(i)))
357  } else {
358    (0 until LoadPipelineWidth).foreach(i => rrl_bank_conflict(i) := io.read(i).valid && io.readline.valid && io.readline.bits.way_en === way_en(i) && addr_to_dcache_set(io.readline.bits.addr)=/=set_addrs(i))
359  }
360  val rrl_bank_conflict_intend = Wire(Vec(LoadPipelineWidth, Bool()))
361  if (ReduceReadlineConflict) {
362    (0 until LoadPipelineWidth).foreach(i => rrl_bank_conflict_intend(i) := io.read(i).valid && io.readline_intend && io.readline.bits.rmask(bank_addrs(i)))
363  } else {
364    (0 until LoadPipelineWidth).foreach(i => rrl_bank_conflict_intend(i) := io.read(i).valid && io.readline_intend && io.readline.bits.way_en === way_en(i) && addr_to_dcache_set(io.readline.bits.addr)=/=set_addrs(i))
365  }
366
367  val rw_bank_conflict = VecInit(Seq.tabulate(LoadPipelineWidth)(io.read(_).valid && rwhazard))
368  val perf_multi_read = PopCount(io.read.map(_.valid)) >= 2.U
369  (0 until LoadPipelineWidth).foreach(i => {
370    io.bank_conflict_fast(i) := rw_bank_conflict(i) || rrl_bank_conflict(i) ||
371      (if (i == 0) 0.B else (0 until i).map(rr_bank_conflict(_)(i)).reduce(_ || _))
372    io.bank_conflict_slow(i) := RegNext(io.bank_conflict_fast(i))
373    io.disable_ld_fast_wakeup(i) := rw_bank_conflict(i) || rrl_bank_conflict_intend(i) ||
374      (if (i == 0) 0.B else (0 until i).map(rr_bank_conflict(_)(i)).reduce(_ || _))
375  })
376  XSPerfAccumulate("data_array_multi_read", perf_multi_read)
377  (1 until LoadPipelineWidth).foreach(y => (0 until y).foreach(x =>
378    XSPerfAccumulate(s"data_array_rr_bank_conflict_${x}_${y}", rr_bank_conflict(x)(y))
379  ))
380  (0 until LoadPipelineWidth).foreach(i => {
381    XSPerfAccumulate(s"data_array_rrl_bank_conflict_${i}", rrl_bank_conflict(i))
382    XSPerfAccumulate(s"data_array_rw_bank_conflict_${i}", rw_bank_conflict(i))
383    XSPerfAccumulate(s"data_array_read_${i}", io.read(i).valid)
384  })
385  XSPerfAccumulate("data_array_access_total", PopCount(io.read.map(_.valid)))
386  XSPerfAccumulate("data_array_read_line", io.readline.valid)
387  XSPerfAccumulate("data_array_write", io.write.valid)
388
389  val read_result = Wire(Vec(DCacheBanks, Vec(DCacheWays,new L1BankedDataReadResult())))
390  val read_error_delayed_result = Wire(Vec(DCacheBanks, Vec(DCacheWays, Bool())))
391  dontTouch(read_result)
392  dontTouch(read_error_delayed_result)
393  for (bank_index <- 0 until DCacheBanks) {
394    for (way_index <- 0 until DCacheWays) {
395      //     Set Addr & Read Way Mask
396      //
397      //    Pipe 0   ....  Pipe (n-1)
398      //      +      ....     +
399      //      |      ....     |
400      // +----+---------------+-----+
401      //  X                        X
402      //   X                      +------+ Bank Addr Match
403      //    +---------+----------+
404      //              |
405      //     +--------+--------+
406      //     |    Data Bank    |
407      //     +-----------------+
408      val loadpipe_en = WireInit(VecInit(List.tabulate(LoadPipelineWidth)(i => {
409        bank_addrs(i) === bank_index.U && io.read(i).valid && way_en(i)(way_index)
410      })))
411      val readline_en = Wire(Bool())
412      if (ReduceReadlineConflict) {
413        readline_en := io.readline.valid && io.readline.bits.rmask(bank_index) && io.readline.bits.way_en(way_index)
414      } else {
415        readline_en := io.readline.valid && io.readline.bits.way_en(way_index)
416      }
417      val sram_set_addr = Mux(readline_en,
418        addr_to_dcache_set(io.readline.bits.addr),
419        PriorityMux(Seq.tabulate(LoadPipelineWidth)(i => loadpipe_en(i) -> set_addrs(i)))
420      )
421      val read_en = loadpipe_en.asUInt.orR || readline_en
422      // read raw data
423      val data_bank = data_banks(bank_index)(way_index)
424      data_bank.io.r.en := read_en
425      data_bank.io.r.addr := sram_set_addr
426      val ecc_bank = ecc_banks(bank_index)(way_index)
427      ecc_bank.io.r.req.valid := read_en
428      ecc_bank.io.r.req.bits.apply(setIdx = sram_set_addr)
429
430      read_result(bank_index)(way_index).raw_data := data_bank.io.r.data
431      read_result(bank_index)(way_index).ecc := ecc_bank.io.r.resp.data(0)
432
433      // use ECC to check error
434      val ecc_data = read_result(bank_index)(way_index).asECCData()
435      val ecc_data_delayed = RegEnable(ecc_data, RegNext(read_en))
436      read_result(bank_index)(way_index).error_delayed := dcacheParameters.dataCode.decode(ecc_data_delayed).error
437      read_error_delayed_result(bank_index)(way_index) := read_result(bank_index)(way_index).error_delayed
438    }
439  }
440
441  // read result: expose banked read result
442  /*
443  (0 until LoadPipelineWidth).map(i => {
444    io.read_resp(i) := read_result(RegNext(bank_addrs(i)))(RegNext(OHToUInt(way_en(i))))
445   })
446  */
447  val read_result_delayed = RegNext(read_result)
448  (0 until LoadPipelineWidth).map(i => {
449    io.read_resp_delayed(i) := read_result_delayed(RegNext(RegNext(bank_addrs(i))))(RegNext(RegNext(OHToUInt(way_en(i)))))
450  })
451  (0 until DCacheBanks).map(i => {
452    io.readline_resp(i) := read_result(i)(RegNext(OHToUInt(io.readline.bits.way_en)))
453  })
454
455  // error detection
456  // normal read ports
457  (0 until LoadPipelineWidth).map(rport_index => {
458    io.read_error_delayed(rport_index) := RegNext(RegNext(io.read(rport_index).fire())) &&
459      read_error_delayed_result(RegNext(RegNext(bank_addrs(rport_index))))(RegNext(RegNext(OHToUInt(way_en(rport_index))))) &&
460      !RegNext(io.bank_conflict_slow(rport_index))
461  })
462  // readline port
463  io.readline_error_delayed := RegNext(RegNext(io.readline.fire())) &&
464    VecInit((0 until DCacheBanks).map(i => io.readline_resp(i).error_delayed)).asUInt().orR
465
466  // write data_banks & ecc_banks
467  val sram_waddr = addr_to_dcache_set(io.write.bits.addr)
468  val sram_waddr_dup = io.write_dup.map(x => addr_to_dcache_set(x.bits.addr))
469  for (bank_index <- 0 until DCacheBanks) {
470    for (way_index <- 0 until DCacheWays) {
471      // data write
472      val data_bank = data_banks(bank_index)(way_index)
473      data_bank.io.w.en := io.write_dup(bank_index).valid && io.write.bits.wmask(bank_index) && io.write_dup(bank_index).bits.way_en(way_index)
474      data_bank.io.w.addr := sram_waddr_dup(bank_index)
475      data_bank.io.w.data := io.write.bits.data(bank_index)
476      // ecc write
477      val ecc_bank = ecc_banks(bank_index)(way_index)
478      ecc_bank.io.w.req.valid := RegNext(io.write_dup(bank_index).valid && io.write.bits.wmask(bank_index) && io.write_dup(bank_index).bits.way_en(way_index))
479      ecc_bank.io.w.req.bits.apply(
480        setIdx = RegNext(sram_waddr_dup(bank_index)),
481        data = RegNext(getECCFromEncWord(cacheParams.dataCode.encode((io.write.bits.data(bank_index))))),
482        waymask = 1.U
483      )
484      when(ecc_bank.io.w.req.valid) {
485        XSDebug("write in ecc sram: bank %x set %x data %x waymask %x\n",
486          bank_index.U,
487          sram_waddr,
488          getECCFromEncWord(cacheParams.dataCode.encode((io.write.bits.data(bank_index)))),
489          io.write.bits.way_en
490        );
491      }
492    }
493  }
494
495  require(nWays <= 32)
496  io.cacheOp.resp.bits := DontCare
497  val cacheOpShouldResp = WireInit(false.B)
498  val eccReadResult = Wire(Vec(DCacheBanks, UInt(eccBits.W)))
499  // DCacheDupNum is 16
500  // vec: the dupIdx for every bank and every group
501  val rdata_dup_vec = Seq(0,0,1,1,2,2,3,3)
502  val rdataEcc_dup_vec = Seq(4,4,5,5,6,6,7,7)
503  val wdata_dup_vec = Seq(8,8,9,9,10,10,11,11)
504  val wdataEcc_dup_vec = Seq(12,12,13,13,14,14,15,15)
505  rdata_dup_vec.zipWithIndex.map{ case(dupIdx, bankIdx) =>
506    for (wayIdx <- 0 until DCacheWays) {
507      when(io.cacheOp_req_dup(dupIdx).valid && CacheInstrucion.isReadData(io.cacheOp_req_bits_opCode_dup(dupIdx))) {
508        val data_bank = data_banks(bankIdx)(wayIdx)
509        data_bank.io.r.en := UIntToOH(io.cacheOp.req.bits.wayNum(4, 0))(wayIdx)
510        data_bank.io.r.addr := io.cacheOp.req.bits.index
511        cacheOpShouldResp := true.B
512      }
513    }
514  }
515  rdataEcc_dup_vec.zipWithIndex.map{ case(dupIdx, bankIdx) =>
516    for (wayIdx <- 0 until DCacheWays){
517      when(io.cacheOp_req_dup(dupIdx).valid && CacheInstrucion.isReadDataECC(io.cacheOp_req_bits_opCode_dup(dupIdx))) {
518        val ecc_bank = ecc_banks(bankIdx)(wayIdx)
519        ecc_bank.io.r.req.valid := true.B
520        ecc_bank.io.r.req.bits.setIdx := io.cacheOp.req.bits.index
521        cacheOpShouldResp := true.B
522      }
523    }
524  }
525  wdata_dup_vec.zipWithIndex.map{ case(dupIdx, bankIdx) =>
526    for (wayIdx <- 0 until DCacheWays){
527      when(io.cacheOp_req_dup(dupIdx).valid && CacheInstrucion.isWriteData(io.cacheOp_req_bits_opCode_dup(dupIdx))) {
528        val data_bank = data_banks(bankIdx)(wayIdx)
529        data_bank.io.w.en := UIntToOH(io.cacheOp.req.bits.wayNum(4, 0))(wayIdx)
530        data_bank.io.w.addr := io.cacheOp.req.bits.index
531        data_bank.io.w.data := io.cacheOp.req.bits.write_data_vec(bankIdx)
532        cacheOpShouldResp := true.B
533      }
534    }
535  }
536  wdataEcc_dup_vec.zipWithIndex.map{ case(dupIdx, bankIdx) =>
537    for (wayIdx <- 0 until DCacheWays) {
538      when(io.cacheOp_req_dup(dupIdx).valid && CacheInstrucion.isWriteDataECC(io.cacheOp_req_bits_opCode_dup(dupIdx))) {
539        val ecc_bank = ecc_banks(bankIdx)(wayIdx)
540        ecc_bank.io.w.req.valid := UIntToOH(io.cacheOp.req.bits.wayNum(4, 0))(wayIdx)
541        ecc_bank.io.w.req.bits.apply(
542          setIdx = io.cacheOp.req.bits.index,
543          data = io.cacheOp.req.bits.write_data_ecc,
544          waymask = 1.U
545        )
546        cacheOpShouldResp := true.B
547      }
548    }
549  }
550
551  io.cacheOp.resp.valid := RegNext(io.cacheOp.req.valid && cacheOpShouldResp)
552  for (bank_index <- 0 until DCacheBanks) {
553    io.cacheOp.resp.bits.read_data_vec(bank_index) := read_result(bank_index)(RegNext(io.cacheOp.req.bits.wayNum(4, 0))).raw_data
554	  eccReadResult(bank_index) := read_result(bank_index)(RegNext(io.cacheOp.req.bits.wayNum(4, 0))).ecc
555  }
556  io.cacheOp.resp.bits.read_data_ecc := Mux(io.cacheOp.resp.valid,
557    eccReadResult(RegNext(io.cacheOp.req.bits.bank_num)),
558    0.U
559  )
560
561  val tableName =  "BankConflict" + p(XSCoreParamsKey).HartId.toString
562  val siteName = "BankedDataArray" + p(XSCoreParamsKey).HartId.toString
563  val bankConflictTable = ChiselDB.createTable(tableName, new BankConflictDB)
564  val bankConflictData = Wire(new BankConflictDB)
565  for (i <- 0 until LoadPipelineWidth) {
566    bankConflictData.set_index(i) := set_addrs(i)
567    bankConflictData.addr(i) := io.read(i).bits.addr
568  }
569
570  // FIXME: rr_bank_conflict(0)(1) no generalization
571  when(rr_bank_conflict(0)(1)) {
572    bankConflictData.bank_index := bank_addrs(0)
573    bankConflictData.way_index  := OHToUInt(way_en(0))
574    bankConflictData.fake_rr_bank_conflict := set_addrs(0) === set_addrs(1)
575  }.otherwise {
576    bankConflictData.bank_index := 0.U
577    bankConflictData.way_index := 0.U
578    bankConflictData.fake_rr_bank_conflict := false.B
579  }
580
581  val isWriteBankConflictTable = WireInit(Constantin.createRecord("isWriteBankConflictTable" + p(XSCoreParamsKey).HartId.toString))
582  bankConflictTable.log(
583    data = bankConflictData,
584    en = isWriteBankConflictTable.orR && rr_bank_conflict(0)(1),
585    site = siteName,
586    clock = clock,
587    reset = reset
588  )
589
590  (1 until LoadPipelineWidth).foreach(y => (0 until y).foreach(x =>
591    XSPerfAccumulate(s"data_array_fake_rr_bank_conflict_${x}_${y}", rr_bank_conflict(x)(y) && set_addrs(x)===set_addrs(y))
592  ))
593
594}
595
596// the smallest access unit is bank
597class BankedDataArray(implicit p: Parameters) extends AbstractBankedDataArray {
598  println("  DCacheType: BankedDataArray")
599  val ReduceReadlineConflict = false
600
601  io.write.ready := true.B
602  io.write_dup.foreach(_.ready := true.B)
603
604  val data_banks = List.tabulate(DCacheBanks)(i => Module(new DataSRAMBank(i)))
605  val ecc_banks = List.fill(DCacheBanks)(Module(new SRAMTemplate(
606    Bits(eccBits.W),
607    set = DCacheSets,
608    way = DCacheWays,
609    shouldReset = false,
610    holdRead = false,
611    singlePort = true
612  )))
613
614  data_banks.map(_.dump())
615
616  val way_en = Wire(Vec(LoadPipelineWidth, io.read(0).bits.way_en.cloneType))
617  val set_addrs = Wire(Vec(LoadPipelineWidth, UInt()))
618  val bank_addrs = Wire(Vec(LoadPipelineWidth, UInt()))
619
620  // read data_banks and ecc_banks
621  // for single port SRAM, do not allow read and write in the same cycle
622  val rwhazard = RegNext(io.write.valid)
623  val rrhazard = false.B // io.readline.valid
624  (0 until LoadPipelineWidth).map(rport_index => {
625    set_addrs(rport_index) := addr_to_dcache_set(io.read(rport_index).bits.addr)
626    bank_addrs(rport_index) := addr_to_dcache_bank(io.read(rport_index).bits.addr)
627
628    io.read(rport_index).ready := !(rwhazard || rrhazard)
629
630    // use way_en to select a way after data read out
631    assert(!(RegNext(io.read(rport_index).fire() && PopCount(io.read(rport_index).bits.way_en) > 1.U)))
632    way_en(rport_index) := io.read(rport_index).bits.way_en
633  })
634  io.readline.ready := !(rwhazard)
635
636  // read each bank, get bank result
637  val bank_result = Wire(Vec(DCacheBanks, new L1BankedDataReadResult()))
638  dontTouch(bank_result)
639  val read_bank_error_delayed = Wire(Vec(DCacheBanks, Bool()))
640  dontTouch(read_bank_error_delayed)
641  val rr_bank_conflict = Seq.tabulate(LoadPipelineWidth)(x => Seq.tabulate(LoadPipelineWidth)(y =>
642    bank_addrs(x) === bank_addrs(y) && io.read(x).valid && io.read(y).valid
643  ))
644  val rrl_bank_conflict = Wire(Vec(LoadPipelineWidth, Bool()))
645  if (ReduceReadlineConflict) {
646    (0 until LoadPipelineWidth).foreach(i => rrl_bank_conflict(i) := io.read(i).valid && io.readline.valid && io.readline.bits.rmask(bank_addrs(i)))
647  } else {
648    (0 until LoadPipelineWidth).foreach(i => rrl_bank_conflict(i) := io.read(i).valid && io.readline.valid)
649  }
650  val rrl_bank_conflict_intend = Wire(Vec(LoadPipelineWidth, Bool()))
651  if (ReduceReadlineConflict) {
652    (0 until LoadPipelineWidth).foreach(i => rrl_bank_conflict_intend(i) := io.read(i).valid && io.readline_intend && io.readline.bits.rmask(bank_addrs(i)))
653  } else {
654    (0 until LoadPipelineWidth).foreach(i => rrl_bank_conflict_intend(i) := io.read(i).valid && io.readline_intend)
655  }
656
657  val rw_bank_conflict = VecInit(Seq.tabulate(LoadPipelineWidth)(io.read(_).valid && rwhazard))
658  val perf_multi_read = PopCount(io.read.map(_.valid)) >= 2.U
659  (0 until LoadPipelineWidth).foreach(i => {
660    io.bank_conflict_fast(i) := rw_bank_conflict(i) || rrl_bank_conflict(i) ||
661      (if (i == 0) 0.B else (0 until i).map(rr_bank_conflict(_)(i)).reduce(_ || _))
662    io.bank_conflict_slow(i) := RegNext(io.bank_conflict_fast(i))
663    io.disable_ld_fast_wakeup(i) := rw_bank_conflict(i) || rrl_bank_conflict_intend(i) ||
664      (if (i == 0) 0.B else (0 until i).map(rr_bank_conflict(_)(i)).reduce(_ || _))
665  })
666  XSPerfAccumulate("data_array_multi_read", perf_multi_read)
667  (1 until LoadPipelineWidth).foreach(y => (0 until y).foreach(x =>
668    XSPerfAccumulate(s"data_array_rr_bank_conflict_${x}_${y}", rr_bank_conflict(x)(y))
669  ))
670  (0 until LoadPipelineWidth).foreach(i => {
671    XSPerfAccumulate(s"data_array_rrl_bank_conflict_${i}", rrl_bank_conflict(i))
672    XSPerfAccumulate(s"data_array_rw_bank_conflict_${i}", rw_bank_conflict(i))
673    XSPerfAccumulate(s"data_array_read_${i}", io.read(i).valid)
674  })
675  XSPerfAccumulate("data_array_access_total", PopCount(io.read.map(_.valid)))
676  XSPerfAccumulate("data_array_read_line", io.readline.valid)
677  XSPerfAccumulate("data_array_write", io.write.valid)
678
679  for (bank_index <- 0 until DCacheBanks) {
680    //     Set Addr & Read Way Mask
681    //
682    //    Pipe 0   ....  Pipe (n-1)
683    //      +      ....     +
684    //      |      ....     |
685    // +----+---------------+-----+
686    //  X                        X
687    //   X                      +------+ Bank Addr Match
688    //    +---------+----------+
689    //              |
690    //     +--------+--------+
691    //     |    Data Bank    |
692    //     +-----------------+
693    val bank_addr_matchs = WireInit(VecInit(List.tabulate(LoadPipelineWidth)(i => {
694      bank_addrs(i) === bank_index.U && io.read(i).valid
695    })))
696    val readline_match = Wire(Bool())
697    if (ReduceReadlineConflict) {
698      readline_match := io.readline.valid && io.readline.bits.rmask(bank_index)
699    } else {
700      readline_match := io.readline.valid
701    }
702    val bank_way_en = Mux(readline_match,
703      io.readline.bits.way_en,
704      PriorityMux(Seq.tabulate(LoadPipelineWidth)(i => bank_addr_matchs(i) -> way_en(i)))
705    )
706    val bank_set_addr = Mux(readline_match,
707      addr_to_dcache_set(io.readline.bits.addr),
708      PriorityMux(Seq.tabulate(LoadPipelineWidth)(i => bank_addr_matchs(i) -> set_addrs(i)))
709    )
710
711    val read_enable = bank_addr_matchs.asUInt.orR || readline_match
712
713    // read raw data
714    val data_bank = data_banks(bank_index)
715    data_bank.io.r.en := read_enable
716    data_bank.io.r.way_en := bank_way_en
717    data_bank.io.r.addr := bank_set_addr
718    bank_result(bank_index).raw_data := data_bank.io.r.data
719
720    // read ECC
721    val ecc_bank = ecc_banks(bank_index)
722    ecc_bank.io.r.req.valid := read_enable
723    ecc_bank.io.r.req.bits.apply(setIdx = bank_set_addr)
724    bank_result(bank_index).ecc := Mux1H(RegNext(bank_way_en), ecc_bank.io.r.resp.data)
725
726    // use ECC to check error
727    val ecc_data = bank_result(bank_index).asECCData()
728    val ecc_data_delayed = RegEnable(ecc_data, RegNext(read_enable))
729    bank_result(bank_index).error_delayed := dcacheParameters.dataCode.decode(ecc_data_delayed).error
730    read_bank_error_delayed(bank_index) := bank_result(bank_index).error_delayed
731  }
732
733  // read result: expose banked read result
734  io.readline_resp := bank_result
735  val bank_result_delayed = RegNext(bank_result)
736  (0 until LoadPipelineWidth).map(i => {
737    io.read_resp_delayed(i) := bank_result_delayed(RegNext(RegNext(bank_addrs(i))))
738  })
739
740  // error detection
741  // normal read ports
742  (0 until LoadPipelineWidth).map(rport_index => {
743    io.read_error_delayed(rport_index) := RegNext(RegNext(io.read(rport_index).fire())) &&
744      read_bank_error_delayed(RegNext(RegNext(bank_addrs(rport_index)))) &&
745      !RegNext(io.bank_conflict_slow(rport_index))
746  })
747  // readline port
748  io.readline_error_delayed := RegNext(RegNext(io.readline.fire())) &&
749    VecInit((0 until DCacheBanks).map(i => io.readline_resp(i).error_delayed)).asUInt().orR
750
751  // write data_banks & ecc_banks
752  val sram_waddr = addr_to_dcache_set(io.write.bits.addr)
753  val sram_waddr_dup = io.write_dup.map(x => addr_to_dcache_set(x.bits.addr))
754  for (bank_index <- 0 until DCacheBanks) {
755    // data write
756    val data_bank = data_banks(bank_index)
757    data_bank.io.w.en := io.write_dup(bank_index).valid && io.write.bits.wmask(bank_index)
758    data_bank.io.w.way_en := io.write_dup(bank_index).bits.way_en
759    data_bank.io.w.addr := sram_waddr_dup(bank_index)
760    data_bank.io.w.data := io.write.bits.data(bank_index)
761
762    // ecc write
763    val ecc_bank = ecc_banks(bank_index)
764    ecc_bank.io.w.req.valid := RegNext(io.write_dup(bank_index).valid && io.write.bits.wmask(bank_index))
765    ecc_bank.io.w.req.bits.apply(
766      setIdx = RegNext(sram_waddr_dup(bank_index)),
767      data = RegNext(getECCFromEncWord(cacheParams.dataCode.encode((io.write.bits.data(bank_index))))),
768      waymask = RegNext(io.write_dup(bank_index).bits.way_en)
769    )
770    when(ecc_bank.io.w.req.valid) {
771      XSDebug("write in ecc sram: bank %x set %x data %x waymask %x\n",
772        bank_index.U,
773        sram_waddr,
774        getECCFromEncWord(cacheParams.dataCode.encode((io.write.bits.data(bank_index)))),
775        io.write.bits.way_en
776      );
777    }
778  }
779
780  // deal with customized cache op
781  require(nWays <= 32)
782  io.cacheOp.resp.bits := DontCare
783  val cacheOpShouldResp = WireInit(false.B)
784  val eccReadResult = Wire(Vec(DCacheBanks, UInt(eccBits.W)))
785  // DCacheDupNum is 16
786  // vec: the dupIdx for every bank and every group
787  val rdata_dup_vec = Seq(0, 0, 1, 1, 2, 2, 3, 3)
788  val rdataEcc_dup_vec = Seq(4, 4, 5, 5, 6, 6, 7, 7)
789  val wdata_dup_vec = Seq(8, 8, 9, 9, 10, 10, 11, 11)
790  val wdataEcc_dup_vec = Seq(12, 12, 13, 13, 14, 14, 15, 15)
791  rdata_dup_vec.zipWithIndex.map{ case(dupIdx, bankIdx) =>
792    when(io.cacheOp_req_dup(dupIdx).valid && CacheInstrucion.isReadData(io.cacheOp_req_bits_opCode_dup(dupIdx))) {
793      val data_bank = data_banks(bankIdx)
794      data_bank.io.r.en := true.B
795      data_bank.io.r.way_en := UIntToOH(io.cacheOp.req.bits.wayNum(4, 0))
796      data_bank.io.r.addr := io.cacheOp.req.bits.index
797      cacheOpShouldResp := true.B
798    }
799  }
800  rdataEcc_dup_vec.zipWithIndex.map{ case(dupIdx, bankIdx) =>
801    when(io.cacheOp_req_dup(dupIdx).valid && CacheInstrucion.isReadDataECC(io.cacheOp_req_bits_opCode_dup(dupIdx))) {
802      val ecc_bank = ecc_banks(bankIdx)
803      ecc_bank.io.r.req.valid := true.B
804      ecc_bank.io.r.req.bits.setIdx := io.cacheOp.req.bits.index
805      cacheOpShouldResp := true.B
806    }
807  }
808  wdata_dup_vec.zipWithIndex.map{ case(dupIdx, bankIdx) =>
809    when(io.cacheOp_req_dup(dupIdx).valid && CacheInstrucion.isWriteData(io.cacheOp_req_bits_opCode_dup(dupIdx))) {
810      val data_bank = data_banks(bankIdx)
811      data_bank.io.w.en := true.B
812      data_bank.io.w.way_en := UIntToOH(io.cacheOp.req.bits.wayNum(4, 0))
813      data_bank.io.w.addr := io.cacheOp.req.bits.index
814      data_bank.io.w.data := io.cacheOp.req.bits.write_data_vec(bankIdx)
815      cacheOpShouldResp := true.B
816    }
817  }
818  wdataEcc_dup_vec.zipWithIndex.map{ case(dupIdx, bankIdx) =>
819    when(io.cacheOp_req_dup(dupIdx).valid && CacheInstrucion.isWriteDataECC(io.cacheOp_req_bits_opCode_dup(dupIdx))) {
820      val ecc_bank = ecc_banks(bankIdx)
821      ecc_bank.io.w.req.valid := true.B
822      ecc_bank.io.w.req.bits.apply(
823        setIdx = io.cacheOp.req.bits.index,
824        data = io.cacheOp.req.bits.write_data_ecc,
825        waymask = UIntToOH(io.cacheOp.req.bits.wayNum(4, 0))
826      )
827      cacheOpShouldResp := true.B
828    }
829  }
830
831  io.cacheOp.resp.valid := RegNext(io.cacheOp.req.valid && cacheOpShouldResp)
832  for (bank_index <- 0 until DCacheBanks) {
833    io.cacheOp.resp.bits.read_data_vec(bank_index) := bank_result(bank_index).raw_data
834    eccReadResult(bank_index) := ecc_banks(bank_index).io.r.resp.data(RegNext(io.cacheOp.req.bits.wayNum(4, 0)))
835  }
836  io.cacheOp.resp.bits.read_data_ecc := Mux(io.cacheOp.resp.valid,
837    eccReadResult(RegNext(io.cacheOp.req.bits.bank_num)),
838    0.U
839  )
840
841  val tableName = "BankConflict" + p(XSCoreParamsKey).HartId.toString
842  val siteName = "BankedDataArray" + p(XSCoreParamsKey).HartId.toString
843  val bankConflictTable = ChiselDB.createTable(tableName, new BankConflictDB)
844  val bankConflictData = Wire(new BankConflictDB)
845  for (i <- 0 until LoadPipelineWidth) {
846    bankConflictData.set_index(i) := set_addrs(i)
847    bankConflictData.addr(i) := io.read(i).bits.addr
848  }
849
850  // FIXME: rr_bank_conflict(0)(1) no generalization
851  when(rr_bank_conflict(0)(1)) {
852    bankConflictData.bank_index := bank_addrs(0)
853    bankConflictData.way_index := OHToUInt(way_en(0))
854    bankConflictData.fake_rr_bank_conflict := set_addrs(0) === set_addrs(1)
855  }.otherwise {
856    bankConflictData.bank_index := 0.U
857    bankConflictData.way_index := 0.U
858    bankConflictData.fake_rr_bank_conflict := false.B
859  }
860
861  val isWriteBankConflictTable = WireInit(Constantin.createRecord("isWriteBankConflictTable" + p(XSCoreParamsKey).HartId.toString))
862  bankConflictTable.log(
863    data = bankConflictData,
864    en = isWriteBankConflictTable.orR && rr_bank_conflict(0)(1),
865    site = siteName,
866    clock = clock,
867    reset = reset
868  )
869
870  (1 until LoadPipelineWidth).foreach(y => (0 until y).foreach(x =>
871    XSPerfAccumulate(s"data_array_fake_rr_bank_conflict_${x}_${y}", rr_bank_conflict(x)(y) && set_addrs(x) === set_addrs(y))
872  ))
873
874}