xref: /XiangShan/src/main/scala/xiangshan/cache/dcache/data/BankedDataArray.scala (revision 38d0d7c5a34a23dfdb58a3cb2737c3cfddb3ec9d)
1/***************************************************************************************
2* Copyright (c) 2024 Beijing Institute of Open Source Chip (BOSC)
3* Copyright (c) 2020-2024 Institute of Computing Technology, Chinese Academy of Sciences
4* Copyright (c) 2020-2021 Peng Cheng Laboratory
5*
6* XiangShan is licensed under Mulan PSL v2.
7* You can use this software according to the terms and conditions of the Mulan PSL v2.
8* You may obtain a copy of Mulan PSL v2 at:
9*          http://license.coscl.org.cn/MulanPSL2
10*
11* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
12* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
13* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
14*
15* See the Mulan PSL v2 for more details.
16*
17*
18* Acknowledgement
19*
20* This implementation is inspired by several key papers:
21* [1] Gurindar S. Sohi, and Manoj Franklin. "[High-bandwidth data memory systems for superscalar processors.]
22* (https://doi.org/10.1145/106972.106980)" 4th International Conference on Architectural Support for Programming
23* Languages and Operating Systems (ASPLOS). 1991.
24***************************************************************************************/
25
26package xiangshan.cache
27
28import org.chipsalliance.cde.config.Parameters
29import chisel3._
30import utils._
31import utility._
32import chisel3.util._
33import freechips.rocketchip.tilelink.{ClientMetadata, TLClientParameters, TLEdgeOut}
34import xiangshan.mem.LqPtr
35import xiangshan.{L1CacheErrorInfo, XSCoreParamsKey}
36
37import scala.math.max
38
39class BankConflictDB(implicit p: Parameters) extends DCacheBundle{
40  val addr = Vec(LoadPipelineWidth, Bits(PAddrBits.W))
41  val set_index = Vec(LoadPipelineWidth, UInt((DCacheAboveIndexOffset - DCacheSetOffset).W))
42  val bank_index = Vec(VLEN/DCacheSRAMRowBits, UInt((DCacheSetOffset - DCacheBankOffset).W))
43  val way_index = UInt(wayBits.W)
44  val fake_rr_bank_conflict = Bool()
45}
46
47class L1BankedDataReadReq(implicit p: Parameters) extends DCacheBundle
48{
49  val way_en = Bits(DCacheWays.W)
50  val addr = Bits(PAddrBits.W)
51}
52
53class L1BankedDataReadReqWithMask(implicit p: Parameters) extends DCacheBundle
54{
55  val way_en = Bits(DCacheWays.W)
56  val addr = Bits(PAddrBits.W)
57  val bankMask = Bits(DCacheBanks.W)
58  val kill = Bool()
59  val lqIdx = new LqPtr
60}
61
62class L1BankedDataReadLineReq(implicit p: Parameters) extends L1BankedDataReadReq
63{
64  val rmask = Bits(DCacheBanks.W)
65}
66
67// Now, we can write a cache-block in a single cycle
68class L1BankedDataWriteReq(implicit p: Parameters) extends L1BankedDataReadReq
69{
70  val wmask = Bits(DCacheBanks.W)
71  val data = Vec(DCacheBanks, Bits(DCacheSRAMRowBits.W))
72}
73
74// cache-block write request without data
75class L1BankedDataWriteReqCtrl(implicit p: Parameters) extends L1BankedDataReadReq
76
77class L1BankedDataReadResult(implicit p: Parameters) extends DCacheBundle
78{
79  // you can choose which bank to read to save power
80  val ecc = Bits(dataECCBits.W)
81  val raw_data = Bits(DCacheSRAMRowBits.W)
82  val error_delayed = Bool() // 1 cycle later than data resp
83
84  def asECCData() = {
85    Cat(ecc, raw_data)
86  }
87}
88
89class DataSRAMBankWriteReq(implicit p: Parameters) extends DCacheBundle {
90  val en = Bool()
91  val addr = UInt()
92  val way_en = UInt(DCacheWays.W)
93  val data = UInt(encDataBits.W)
94}
95
96// wrap a sram
97class DataSRAM(bankIdx: Int, wayIdx: Int)(implicit p: Parameters) extends DCacheModule {
98  val io = IO(new Bundle() {
99    val w = new Bundle() {
100      val en = Input(Bool())
101      val addr = Input(UInt())
102      val data = Input(UInt(encDataBits.W))
103    }
104
105    val r = new Bundle() {
106      val en = Input(Bool())
107      val addr = Input(UInt())
108      val data = Output(UInt(encDataBits.W))
109    }
110  })
111
112  // data sram
113  val data_sram = Module(new SRAMTemplate(
114    Bits(encDataBits.W),
115    set = DCacheSets / DCacheSetDiv,
116    way = 1,
117    shouldReset = false,
118    holdRead = false,
119    singlePort = true
120  ))
121
122  data_sram.io.w.req.valid := io.w.en
123  data_sram.io.w.req.bits.apply(
124    setIdx = io.w.addr,
125    data = io.w.data,
126    waymask = 1.U
127  )
128  data_sram.io.r.req.valid := io.r.en
129  data_sram.io.r.req.bits.apply(setIdx = io.r.addr)
130  io.r.data := data_sram.io.r.resp.data(0)
131  XSPerfAccumulate("part_data_read_counter", data_sram.io.r.req.valid)
132
133  def dump_r() = {
134    when(RegNext(io.r.en)) {
135      XSDebug("bank read set %x bank %x way %x data %x\n",
136        RegEnable(io.r.addr, io.r.en),
137        bankIdx.U,
138        wayIdx.U,
139        io.r.data
140      )
141    }
142  }
143
144  def dump_w() = {
145    when(io.w.en) {
146      XSDebug("bank write set %x bank %x way %x data %x\n",
147        io.w.addr,
148        bankIdx.U,
149        wayIdx.U,
150        io.w.data
151      )
152    }
153  }
154
155  def dump() = {
156    dump_w()
157    dump_r()
158  }
159}
160
161// wrap data rows of 8 ways
162class DataSRAMBank(index: Int)(implicit p: Parameters) extends DCacheModule {
163  val io = IO(new Bundle() {
164    val w = Input(new DataSRAMBankWriteReq)
165
166    val r = new Bundle() {
167      val en = Input(Bool())
168      val addr = Input(UInt())
169      val data = Output(Vec(DCacheWays, UInt(encDataBits.W)))
170    }
171  })
172
173  assert(RegNext(!io.w.en || PopCount(io.w.way_en) <= 1.U))
174
175  // external controls do not read and write at the same time
176  val w_info = io.w
177  // val rw_bypass = RegNext(io.w.addr === io.r.addr && io.w.way_en === io.r.way_en && io.w.en)
178
179  // multiway data bank
180  val data_bank = Seq.fill(DCacheWays) {
181    Module(new SRAMTemplate(
182      Bits(encDataBits.W),
183      set = DCacheSets / DCacheSetDiv,
184      way = 1,
185      shouldReset = false,
186      holdRead = false,
187      singlePort = true
188    ))
189  }
190
191  for (w <- 0 until DCacheWays) {
192    val wen = w_info.en && w_info.way_en(w)
193    data_bank(w).io.w.req.valid := wen
194    data_bank(w).io.w.req.bits.apply(
195      setIdx = w_info.addr,
196      data = w_info.data,
197      waymask = 1.U
198    )
199    data_bank(w).io.r.req.valid := io.r.en
200    data_bank(w).io.r.req.bits.apply(setIdx = io.r.addr)
201    data_bank(w).clock := ClockGate(false.B, io.r.en | (io.w.en & io.w.way_en(w)), clock)
202  }
203  XSPerfAccumulate("part_data_read_counter", PopCount(Cat(data_bank.map(_.io.r.req.valid))))
204
205  io.r.data := data_bank.map(_.io.r.resp.data(0))
206
207  def dump_r() = {
208    when(RegNext(io.r.en)) {
209      XSDebug("bank read addr %x data %x\n",
210        RegEnable(io.r.addr, io.r.en),
211        io.r.data.asUInt
212      )
213    }
214  }
215
216  def dump_w() = {
217    when(io.w.en) {
218      XSDebug("bank write addr %x way_en %x data %x\n",
219        io.w.addr,
220        io.w.way_en,
221        io.w.data
222      )
223    }
224  }
225
226  def dump() = {
227    dump_w()
228    dump_r()
229  }
230}
231
232case object HasDataEccParam
233
234//                     Banked DCache Data
235// -----------------------------------------------------------------
236// | Bank0 | Bank1 | Bank2 | Bank3 | Bank4 | Bank5 | Bank6 | Bank7 |
237// -----------------------------------------------------------------
238// | Way0  | Way0  | Way0  | Way0  | Way0  | Way0  | Way0  | Way0  |
239// | Way1  | Way1  | Way1  | Way1  | Way1  | Way1  | Way1  | Way1  |
240// | ....  | ....  | ....  | ....  | ....  | ....  | ....  | ....  |
241// -----------------------------------------------------------------
242abstract class AbstractBankedDataArray(implicit p: Parameters) extends DCacheModule
243{
244  val DataEccParam = if(EnableDataEcc) Some(HasDataEccParam) else None
245  val ReadlinePortErrorIndex = LoadPipelineWidth
246  val io = IO(new DCacheBundle {
247    // load pipeline read word req
248    val read = Vec(LoadPipelineWidth, Flipped(DecoupledIO(new L1BankedDataReadReqWithMask)))
249    val is128Req = Input(Vec(LoadPipelineWidth, Bool()))
250    // main pipeline read / write line req
251    val readline_intend = Input(Bool())
252    val readline = Flipped(DecoupledIO(new L1BankedDataReadLineReq))
253    val write = Flipped(DecoupledIO(new L1BankedDataWriteReq))
254    val write_dup = Vec(DCacheBanks, Flipped(Decoupled(new L1BankedDataWriteReqCtrl)))
255    // data for readline and loadpipe
256    val readline_resp = Output(Vec(DCacheBanks, new L1BankedDataReadResult()))
257    val readline_error_delayed = Output(Bool())
258    val read_resp          = Output(Vec(LoadPipelineWidth, Vec(VLEN/DCacheSRAMRowBits, new L1BankedDataReadResult())))
259    val read_error_delayed = Output(Vec(LoadPipelineWidth,Vec(VLEN/DCacheSRAMRowBits, Bool())))
260    // val nacks = Output(Vec(LoadPipelineWidth, Bool()))
261    // val errors = Output(Vec(LoadPipelineWidth + 1, ValidIO(new L1CacheErrorInfo))) // read ports + readline port
262    // when bank_conflict, read (1) port should be ignored
263    val bank_conflict_slow = Output(Vec(LoadPipelineWidth, Bool()))
264    val disable_ld_fast_wakeup = Output(Vec(LoadPipelineWidth, Bool()))
265    // customized cache op port
266    val cacheOp = Flipped(new L1CacheInnerOpIO)
267    val cacheOp_req_dup = Vec(DCacheDupNum, Flipped(Valid(new CacheCtrlReqInfo)))
268    val cacheOp_req_bits_opCode_dup = Input(Vec(DCacheDupNum, UInt(XLEN.W)))
269  })
270
271  def pipeMap[T <: Data](f: Int => T) = VecInit((0 until LoadPipelineWidth).map(f))
272
273  def getECCFromEncWord(encWord: UInt) = {
274    if (EnableDataEcc) {
275      require(encWord.getWidth == encDataBits, s"encDataBits=$encDataBits != encDataBits=$encDataBits!")
276      encWord(encDataBits-1, DCacheSRAMRowBits)
277    } else {
278      0.U
279    }
280  }
281
282  def getDataFromEncWord(encWord: UInt) = {
283    encWord(DCacheSRAMRowBits-1, 0)
284  }
285
286  def asECCData(ecc: UInt, data: UInt) = {
287    if (EnableDataEcc) {
288      Cat(ecc, data)
289    } else {
290      data
291    }
292  }
293
294  def dumpRead = {
295    (0 until LoadPipelineWidth) map { w =>
296      when(io.read(w).valid) {
297        XSDebug(s"DataArray Read channel: $w valid way_en: %x addr: %x\n",
298          io.read(w).bits.way_en, io.read(w).bits.addr)
299      }
300    }
301    when(io.readline.valid) {
302      XSDebug(s"DataArray Read Line, valid way_en: %x addr: %x rmask %x\n",
303        io.readline.bits.way_en, io.readline.bits.addr, io.readline.bits.rmask)
304    }
305  }
306
307  def dumpWrite = {
308    when(io.write.valid) {
309      XSDebug(s"DataArray Write valid way_en: %x addr: %x\n",
310        io.write.bits.way_en, io.write.bits.addr)
311
312      (0 until DCacheBanks) map { r =>
313        XSDebug(s"cycle: $r data: %x wmask: %x\n",
314          io.write.bits.data(r), io.write.bits.wmask(r))
315      }
316    }
317  }
318
319  def dumpResp = {
320    XSDebug(s"DataArray ReadeResp channel:\n")
321    (0 until LoadPipelineWidth) map { r =>
322      XSDebug(s"cycle: $r data: %x\n", Mux(io.is128Req(r),
323        Cat(io.read_resp(r)(1).raw_data,io.read_resp(r)(0).raw_data),
324        io.read_resp(r)(0).raw_data))
325    }
326  }
327
328  def dump() = {
329    dumpRead
330    dumpWrite
331    dumpResp
332  }
333
334  def selcetOldestPort(valid: Seq[Bool], bits: Seq[LqPtr], index: Seq[UInt]):((Bool, LqPtr), UInt) = {
335    require(valid.length == bits.length &&  bits.length == index.length, s"length must eq, valid:${valid.length}, bits:${bits.length}, index:${index.length}")
336    ParallelOperation(valid zip bits zip index,
337      (a: ((Bool, LqPtr), UInt), b: ((Bool, LqPtr), UInt)) => {
338        val au = a._1._2
339        val bu = b._1._2
340        val aValid = a._1._1
341        val bValid = b._1._1
342        val bSel = au > bu
343        val bits = Mux(
344          aValid && bValid,
345          Mux(bSel, b._1._2, a._1._2),
346          Mux(aValid && !bValid, a._1._2, b._1._2)
347        )
348        val idx = Mux(
349          aValid && bValid,
350          Mux(bSel, b._2, a._2),
351          Mux(aValid && !bValid, a._2, b._2)
352        )
353        ((aValid || bValid, bits), idx)
354      }
355    )
356  }
357
358}
359
360// the smallest access unit is sram
361class SramedDataArray(implicit p: Parameters) extends AbstractBankedDataArray {
362  println("  DCacheType: SramedDataArray")
363  val ReduceReadlineConflict = false
364
365  io.write.ready := true.B
366  io.write_dup.foreach(_.ready := true.B)
367
368  val data_banks = List.tabulate(DCacheSetDiv)( k => List.tabulate(DCacheBanks)(i => List.tabulate(DCacheWays)(j => Module(new DataSRAM(i,j)))))
369  data_banks.map(_.map(_.map(_.dump())))
370
371  val way_en = Wire(Vec(LoadPipelineWidth, io.read(0).bits.way_en.cloneType))
372  val set_addrs = Wire(Vec(LoadPipelineWidth, UInt()))
373  val div_addrs = Wire(Vec(LoadPipelineWidth, UInt()))
374  val bank_addrs = Wire(Vec(LoadPipelineWidth, Vec(VLEN/DCacheSRAMRowBits, UInt())))
375
376  val line_set_addr = addr_to_dcache_div_set(io.readline.bits.addr)
377  val line_div_addr = addr_to_dcache_div(io.readline.bits.addr)
378  // when WPU is enabled, line_way_en is all enabled when read data
379  val line_way_en = Fill(DCacheWays, 1.U) // val line_way_en = io.readline.bits.way_en
380  val line_way_en_reg = RegEnable(io.readline.bits.way_en, 0.U(DCacheWays.W),io.readline.valid)
381
382  val write_bank_mask_reg = RegEnable(io.write.bits.wmask, 0.U(DCacheBanks.W), io.write.valid)
383  val write_data_reg = RegEnable(io.write.bits.data, io.write.valid)
384  val write_valid_reg = RegNext(io.write.valid)
385  val write_valid_dup_reg = io.write_dup.map(x => RegNext(x.valid))
386  val write_wayen_dup_reg = io.write_dup.map(x => RegEnable(x.bits.way_en, 0.U(DCacheWays.W), x.valid))
387  val write_set_addr_dup_reg = io.write_dup.map(x => RegEnable(addr_to_dcache_div_set(x.bits.addr), x.valid))
388  val write_div_addr_dup_reg = io.write_dup.map(x => RegEnable(addr_to_dcache_div(x.bits.addr), x.valid))
389
390  // read data_banks and ecc_banks
391  // for single port SRAM, do not allow read and write in the same cycle
392  val rrhazard = false.B // io.readline.valid
393  (0 until LoadPipelineWidth).map(rport_index => {
394    div_addrs(rport_index) := addr_to_dcache_div(io.read(rport_index).bits.addr)
395    set_addrs(rport_index) := addr_to_dcache_div_set(io.read(rport_index).bits.addr)
396    bank_addrs(rport_index)(0) := addr_to_dcache_bank(io.read(rport_index).bits.addr)
397    bank_addrs(rport_index)(1) := bank_addrs(rport_index)(0) + 1.U
398
399    // use way_en to select a way after data read out
400    assert(!(RegNext(io.read(rport_index).fire && PopCount(io.read(rport_index).bits.way_en) > 1.U)))
401    way_en(rport_index) := io.read(rport_index).bits.way_en
402  })
403
404  // read conflict
405  val rr_bank_conflict = Seq.tabulate(LoadPipelineWidth)(x => Seq.tabulate(LoadPipelineWidth)(y => {
406    if (x == y) {
407      false.B
408    } else {
409      io.read(x).valid && io.read(y).valid &&
410        div_addrs(x) === div_addrs(y) &&
411        (io.read(x).bits.bankMask & io.read(y).bits.bankMask) =/= 0.U &&
412        io.read(x).bits.way_en === io.read(y).bits.way_en &&
413        set_addrs(x) =/= set_addrs(y)
414    }
415  }))
416  val load_req_with_bank_conflict = rr_bank_conflict.map(_.reduce(_ || _))
417  val load_req_valid = io.read.map(_.valid)
418  val load_req_lqIdx = io.read.map(_.bits.lqIdx)
419  val load_req_index = (0 until LoadPipelineWidth).map(_.asUInt)
420
421
422  val load_req_bank_conflict_selcet = selcetOldestPort(load_req_valid, load_req_lqIdx, load_req_index)
423  val load_req_bank_select_port  = UIntToOH(load_req_bank_conflict_selcet._2).asBools
424
425  val rr_bank_conflict_oldest = (0 until LoadPipelineWidth).map(i =>
426    !load_req_bank_select_port(i) && load_req_with_bank_conflict(i)
427  )
428
429  val rrl_bank_conflict = Wire(Vec(LoadPipelineWidth, Bool()))
430  val rrl_bank_conflict_intend = Wire(Vec(LoadPipelineWidth, Bool()))
431  (0 until LoadPipelineWidth).foreach { i =>
432    val judge = if (ReduceReadlineConflict) io.read(i).valid && (io.readline.bits.rmask & io.read(i).bits.bankMask) =/= 0.U && line_div_addr === div_addrs(i) && line_set_addr =/= set_addrs(i)
433                else io.read(i).valid && line_div_addr === div_addrs(i) && line_set_addr =/= set_addrs(i)
434    rrl_bank_conflict(i) := judge && io.readline.valid
435    rrl_bank_conflict_intend(i) := judge && io.readline_intend
436  }
437  val wr_bank_conflict = Seq.tabulate(LoadPipelineWidth)(x =>
438    io.read(x).valid && write_valid_reg &&
439    div_addrs(x) === write_div_addr_dup_reg.head &&
440    way_en(x) === write_wayen_dup_reg.head &&
441    (write_bank_mask_reg(bank_addrs(x)(0)) || write_bank_mask_reg(bank_addrs(x)(1)) && io.is128Req(x))
442  )
443  val wrl_bank_conflict = io.readline.valid && write_valid_reg && line_div_addr === write_div_addr_dup_reg.head
444  // ready
445  io.readline.ready := !(wrl_bank_conflict)
446  io.read.zipWithIndex.map { case (x, i) => x.ready := !(wr_bank_conflict(i) || rrhazard) }
447
448  val perf_multi_read = PopCount(io.read.map(_.valid)) >= 2.U
449  val bank_conflict_fast = Wire(Vec(LoadPipelineWidth, Bool()))
450  (0 until LoadPipelineWidth).foreach(i => {
451    bank_conflict_fast(i) := wr_bank_conflict(i) || rrl_bank_conflict(i) ||
452    rr_bank_conflict_oldest(i)
453    io.bank_conflict_slow(i) := RegNext(bank_conflict_fast(i))
454    io.disable_ld_fast_wakeup(i) := wr_bank_conflict(i) || rrl_bank_conflict_intend(i) ||
455      (if (i == 0) 0.B else (0 until i).map(rr_bank_conflict(_)(i)).reduce(_ || _))
456  })
457  XSPerfAccumulate("data_array_multi_read", perf_multi_read)
458  (1 until LoadPipelineWidth).foreach(y => (0 until y).foreach(x =>
459    XSPerfAccumulate(s"data_array_rr_bank_conflict_${x}_${y}", rr_bank_conflict(x)(y))
460  ))
461  (0 until LoadPipelineWidth).foreach(i => {
462    XSPerfAccumulate(s"data_array_rrl_bank_conflict_${i}", rrl_bank_conflict(i))
463    XSPerfAccumulate(s"data_array_rw_bank_conflict_${i}", wr_bank_conflict(i))
464    XSPerfAccumulate(s"data_array_read_${i}", io.read(i).valid)
465  })
466  XSPerfAccumulate("data_array_access_total", PopCount(io.read.map(_.valid)))
467  XSPerfAccumulate("data_array_read_line", io.readline.valid)
468  XSPerfAccumulate("data_array_write", io.write.valid)
469
470  val read_result = Wire(Vec(DCacheSetDiv, Vec(DCacheBanks, Vec(DCacheWays,new L1BankedDataReadResult()))))
471  val read_result_delayed = Wire(Vec(DCacheSetDiv, Vec(DCacheBanks, Vec(DCacheWays,new L1BankedDataReadResult()))))
472  val read_error_delayed_result = Wire(Vec(DCacheSetDiv, Vec(DCacheBanks, Vec(DCacheWays, Bool()))))
473
474  for (div_index <- 0 until DCacheSetDiv){
475    for (bank_index <- 0 until DCacheBanks) {
476      for (way_index <- 0 until DCacheWays) {
477        //     Set Addr & Read Way Mask
478        //
479        //    Pipe 0   ....  Pipe (n-1)
480        //      +      ....     +
481        //      |      ....     |
482        // +----+---------------+-----+
483        //  X                        X
484        //   X                      +------+ Bank Addr Match
485        //    +---------+----------+
486        //              |
487        //     +--------+--------+
488        //     |    Data Bank    |
489        //     +-----------------+
490        val loadpipe_en = WireInit(VecInit(List.tabulate(LoadPipelineWidth)(i => {
491          io.read(i).valid && div_addrs(i) === div_index.U && (bank_addrs(i)(0) === bank_index.U || bank_addrs(i)(1) === bank_index.U && io.is128Req(i)) &&
492          way_en(i)(way_index) &&
493          !rr_bank_conflict_oldest(i)
494        })))
495        val readline_en = Wire(Bool())
496        if (ReduceReadlineConflict) {
497          readline_en := io.readline.valid && io.readline.bits.rmask(bank_index) && line_way_en(way_index) && div_index.U === line_div_addr
498        } else {
499          readline_en := io.readline.valid && line_way_en(way_index) && div_index.U === line_div_addr
500        }
501        val sram_set_addr = Mux(readline_en,
502          addr_to_dcache_div_set(io.readline.bits.addr),
503          PriorityMux(Seq.tabulate(LoadPipelineWidth)(i => loadpipe_en(i) -> set_addrs(i)))
504        )
505        val read_en = loadpipe_en.asUInt.orR || readline_en
506        // read raw data
507        val data_bank = data_banks(div_index)(bank_index)(way_index)
508        data_bank.io.r.en := read_en
509        data_bank.io.r.addr := sram_set_addr
510
511        read_result(div_index)(bank_index)(way_index).ecc := getECCFromEncWord(data_bank.io.r.data)
512        read_result(div_index)(bank_index)(way_index).raw_data := getDataFromEncWord(data_bank.io.r.data)
513
514        if (EnableDataEcc) {
515          val ecc_data = read_result(div_index)(bank_index)(way_index).asECCData()
516          val ecc_data_delayed = RegEnable(ecc_data, RegNext(read_en))
517          read_result(div_index)(bank_index)(way_index).error_delayed := dcacheParameters.dataCode.decode(ecc_data_delayed).error
518          read_error_delayed_result(div_index)(bank_index)(way_index) := read_result(div_index)(bank_index)(way_index).error_delayed
519        } else {
520          read_result(div_index)(bank_index)(way_index).error_delayed := false.B
521          read_error_delayed_result(div_index)(bank_index)(way_index) := false.B
522        }
523
524        read_result_delayed(div_index)(bank_index)(way_index) := RegEnable(read_result(div_index)(bank_index)(way_index), RegNext(read_en))
525      }
526    }
527  }
528
529  val data_read_oh = WireInit(VecInit(Seq.fill(DCacheSetDiv * DCacheBanks * DCacheWays)(0.U(1.W))))
530  for(div_index <- 0 until DCacheSetDiv){
531    for (bank_index <- 0 until DCacheBanks) {
532      for (way_index <- 0 until DCacheWays) {
533        data_read_oh(div_index *  DCacheBanks * DCacheWays + bank_index * DCacheWays + way_index) := data_banks(div_index)(bank_index)(way_index).io.r.en
534      }
535    }
536  }
537  XSPerfAccumulate("data_read_counter", PopCount(Cat(data_read_oh)))
538
539  // read result: expose banked read result
540  // TODO: clock gate
541  (0 until LoadPipelineWidth).map(i => {
542    // io.read_resp(i) := read_result(RegNext(bank_addrs(i)))(RegNext(OHToUInt(way_en(i))))
543    val r_read_fire = RegNext(io.read(i).fire)
544    val r_div_addr  = RegEnable(div_addrs(i), io.read(i).fire)
545    val r_bank_addr = RegEnable(bank_addrs(i), io.read(i).fire)
546    val r_way_addr  = RegNext(OHToUInt(way_en(i)))
547    val rr_read_fire = RegNext(RegNext(io.read(i).fire))
548    val rr_div_addr = RegEnable(RegEnable(div_addrs(i), io.read(i).fire), r_read_fire)
549    val rr_bank_addr = RegEnable(RegEnable(bank_addrs(i), io.read(i).fire), r_read_fire)
550    val rr_way_addr = RegEnable(RegEnable(OHToUInt(way_en(i)), io.read(i).fire), r_read_fire)
551    (0 until VLEN/DCacheSRAMRowBits).map( j =>{
552      io.read_resp(i)(j) := read_result(r_div_addr)(r_bank_addr(j))(r_way_addr)
553      // error detection
554      // normal read ports
555      io.read_error_delayed(i)(j) := rr_read_fire && read_error_delayed_result(rr_div_addr)(rr_bank_addr(j))(rr_way_addr) && !RegNext(io.bank_conflict_slow(i))
556    })
557  })
558
559  // readline port
560  val readline_error_delayed = Wire(Vec(DCacheBanks, Bool()))
561  val readline_r_way_addr = RegEnable(OHToUInt(io.readline.bits.way_en), io.readline.valid)
562  val readline_rr_way_addr = RegEnable(readline_r_way_addr, RegNext(io.readline.valid))
563  val readline_r_div_addr = RegEnable(line_div_addr, io.readline.valid)
564  val readline_rr_div_addr = RegEnable(readline_r_div_addr, RegNext(io.readline.valid))
565  (0 until DCacheBanks).map(i => {
566    io.readline_resp(i) := read_result(readline_r_div_addr)(i)(readline_r_way_addr)
567    readline_error_delayed(i) := read_result(readline_rr_div_addr)(i)(readline_rr_way_addr).error_delayed
568  })
569  io.readline_error_delayed := RegNext(RegNext(io.readline.fire)) && readline_error_delayed.asUInt.orR
570
571  // write data_banks & ecc_banks
572  for (div_index <- 0 until DCacheSetDiv) {
573    for (bank_index <- 0 until DCacheBanks) {
574      for (way_index <- 0 until DCacheWays) {
575        // data write
576        val wen_reg = write_bank_mask_reg(bank_index) &&
577          write_valid_dup_reg(bank_index) &&
578          write_div_addr_dup_reg(bank_index) === div_index.U &&
579          write_wayen_dup_reg(bank_index)(way_index)
580        val write_ecc_reg = RegEnable(getECCFromEncWord(cacheParams.dataCode.encode(io.write.bits.data(bank_index))), io.write.valid)
581        val data_bank = data_banks(div_index)(bank_index)(way_index)
582        data_bank.io.w.en := wen_reg
583        data_bank.io.w.addr := write_set_addr_dup_reg(bank_index)
584        data_bank.io.w.data := asECCData(write_ecc_reg, write_data_reg(bank_index))
585      }
586    }
587  }
588
589  io.cacheOp.resp.valid := false.B
590  io.cacheOp.resp.bits  := DontCare
591
592  val tableName =  "BankConflict" + p(XSCoreParamsKey).HartId.toString
593  val siteName = "BankedDataArray" + p(XSCoreParamsKey).HartId.toString
594  val bankConflictTable = ChiselDB.createTable(tableName, new BankConflictDB)
595  val bankConflictData = Wire(new BankConflictDB)
596  for (i <- 0 until LoadPipelineWidth) {
597    bankConflictData.set_index(i) := set_addrs(i)
598    bankConflictData.addr(i) := io.read(i).bits.addr
599  }
600
601  // FIXME: rr_bank_conflict(0)(1) no generalization
602  when(rr_bank_conflict(0)(1)) {
603    (0 until (VLEN/DCacheSRAMRowBits)).map(i => {
604      bankConflictData.bank_index(i) := bank_addrs(0)(i)
605    })
606    bankConflictData.way_index  := OHToUInt(way_en(0))
607    bankConflictData.fake_rr_bank_conflict := set_addrs(0) === set_addrs(1) && div_addrs(0) === div_addrs(1)
608  }.otherwise {
609    (0 until (VLEN/DCacheSRAMRowBits)).map(i => {
610      bankConflictData.bank_index(i) := 0.U
611    })
612    bankConflictData.way_index := 0.U
613    bankConflictData.fake_rr_bank_conflict := false.B
614  }
615
616  val isWriteBankConflictTable = Constantin.createRecord(s"isWriteBankConflictTable${p(XSCoreParamsKey).HartId}")
617  bankConflictTable.log(
618    data = bankConflictData,
619    en = isWriteBankConflictTable.orR && rr_bank_conflict(0)(1),
620    site = siteName,
621    clock = clock,
622    reset = reset
623  )
624
625  (1 until LoadPipelineWidth).foreach(y => (0 until y).foreach(x =>
626    XSPerfAccumulate(s"data_array_fake_rr_bank_conflict_${x}_${y}", rr_bank_conflict(x)(y) && set_addrs(x)===set_addrs(y) && div_addrs(x) === div_addrs(y))
627  ))
628
629  if (backendParams.debugEn){
630    load_req_with_bank_conflict.map(dontTouch(_))
631    dontTouch(read_result)
632    dontTouch(read_error_delayed_result)
633  }
634}
635
636// the smallest access unit is bank
637class BankedDataArray(implicit p: Parameters) extends AbstractBankedDataArray {
638  println("  DCacheType: BankedDataArray")
639  val ReduceReadlineConflict = false
640
641  io.write.ready := true.B
642  io.write_dup.foreach(_.ready := true.B)
643
644  val data_banks = List.fill(DCacheSetDiv)(List.tabulate(DCacheBanks)(i => Module(new DataSRAMBank(i))))
645  data_banks.map(_.map(_.dump()))
646
647  val way_en = Wire(Vec(LoadPipelineWidth, io.read(0).bits.way_en.cloneType))
648  val set_addrs = Wire(Vec(LoadPipelineWidth, UInt()))
649  val div_addrs = Wire(Vec(LoadPipelineWidth, UInt()))
650  val bank_addrs = Wire(Vec(LoadPipelineWidth, Vec(VLEN/DCacheSRAMRowBits, UInt())))
651  val way_en_reg = Wire(Vec(LoadPipelineWidth, io.read(0).bits.way_en.cloneType))
652  val set_addrs_reg = Wire(Vec(LoadPipelineWidth, UInt()))
653
654  val line_set_addr = addr_to_dcache_div_set(io.readline.bits.addr)
655  val line_div_addr = addr_to_dcache_div(io.readline.bits.addr)
656  val line_way_en = io.readline.bits.way_en
657
658  val write_bank_mask_reg = RegEnable(io.write.bits.wmask, io.write.valid)
659  val write_data_reg = RegEnable(io.write.bits.data, io.write.valid)
660  val write_valid_reg = RegNext(io.write.valid)
661  val write_valid_dup_reg = io.write_dup.map(x => RegNext(x.valid))
662  val write_wayen_dup_reg = io.write_dup.map(x => RegEnable(x.bits.way_en, x.valid))
663  val write_set_addr_dup_reg = io.write_dup.map(x => RegEnable(addr_to_dcache_div_set(x.bits.addr), x.valid))
664  val write_div_addr_dup_reg = io.write_dup.map(x => RegEnable(addr_to_dcache_div(x.bits.addr), x.valid))
665
666  // read data_banks and ecc_banks
667  // for single port SRAM, do not allow read and write in the same cycle
668  val rwhazard = RegNext(io.write.valid)
669  val rrhazard = false.B // io.readline.valid
670  (0 until LoadPipelineWidth).map(rport_index => {
671    div_addrs(rport_index) := addr_to_dcache_div(io.read(rport_index).bits.addr)
672    bank_addrs(rport_index)(0) := addr_to_dcache_bank(io.read(rport_index).bits.addr)
673    bank_addrs(rport_index)(1) := Mux(io.is128Req(rport_index), bank_addrs(rport_index)(0) + 1.U, bank_addrs(rport_index)(0))
674    set_addrs(rport_index) := addr_to_dcache_div_set(io.read(rport_index).bits.addr)
675    set_addrs_reg(rport_index) := RegEnable(addr_to_dcache_div_set(io.read(rport_index).bits.addr), io.read(rport_index).valid)
676
677    // use way_en to select a way after data read out
678    assert(!(RegNext(io.read(rport_index).fire && PopCount(io.read(rport_index).bits.way_en) > 1.U)))
679    way_en(rport_index) := io.read(rport_index).bits.way_en
680    way_en_reg(rport_index) := RegEnable(io.read(rport_index).bits.way_en, io.read(rport_index).valid)
681  })
682
683  // read each bank, get bank result
684  val rr_bank_conflict = Seq.tabulate(LoadPipelineWidth)(x => Seq.tabulate(LoadPipelineWidth)(y => {
685    if (x == y) {
686      false.B
687    } else {
688      io.read(x).valid && io.read(y).valid &&
689      div_addrs(x) === div_addrs(y) &&
690      (io.read(x).bits.bankMask & io.read(y).bits.bankMask) =/= 0.U &&
691      set_addrs(x) =/= set_addrs(y)
692    }
693  }
694  ))
695
696  val load_req_with_bank_conflict = rr_bank_conflict.map(_.reduce(_ || _))
697  val load_req_valid = io.read.map(_.valid)
698  val load_req_lqIdx = io.read.map(_.bits.lqIdx)
699  val load_req_index = (0 until LoadPipelineWidth).map(_.asUInt)
700
701  val load_req_bank_conflict_selcet = selcetOldestPort(load_req_valid, load_req_lqIdx, load_req_index)
702  val load_req_bank_select_port  = UIntToOH(load_req_bank_conflict_selcet._2).asBools
703
704  val rr_bank_conflict_oldest = (0 until LoadPipelineWidth).map(i =>
705    !load_req_bank_select_port(i) && load_req_with_bank_conflict(i)
706  )
707
708  val rrl_bank_conflict = Wire(Vec(LoadPipelineWidth, Bool()))
709  val rrl_bank_conflict_intend = Wire(Vec(LoadPipelineWidth, Bool()))
710  (0 until LoadPipelineWidth).foreach { i =>
711    val judge = if (ReduceReadlineConflict) io.read(i).valid && (io.readline.bits.rmask & io.read(i).bits.bankMask) =/= 0.U && div_addrs(i) === line_div_addr
712                else io.read(i).valid && div_addrs(i)===line_div_addr
713    rrl_bank_conflict(i) := judge && io.readline.valid
714    rrl_bank_conflict_intend(i) := judge && io.readline_intend
715  }
716  val wr_bank_conflict = Seq.tabulate(LoadPipelineWidth)(x =>
717    io.read(x).valid &&
718    write_valid_reg &&
719    div_addrs(x) === write_div_addr_dup_reg.head &&
720    (write_bank_mask_reg(bank_addrs(x)(0)) || write_bank_mask_reg(bank_addrs(x)(1)) && io.is128Req(x))
721  )
722  val wrl_bank_conflict = io.readline.valid && write_valid_reg && line_div_addr === write_div_addr_dup_reg.head
723  // ready
724  io.readline.ready := !(wrl_bank_conflict)
725  io.read.zipWithIndex.map{case(x, i) => x.ready := !(wr_bank_conflict(i) || rrhazard)}
726
727  val perf_multi_read = PopCount(io.read.map(_.valid)) >= 2.U
728  (0 until LoadPipelineWidth).foreach(i => {
729    // remove fake rr_bank_conflict situation in s2
730    val real_other_bank_conflict_reg = RegNext(wr_bank_conflict(i) || rrl_bank_conflict(i))
731    val real_rr_bank_conflict_reg = RegNext(rr_bank_conflict_oldest(i))
732    io.bank_conflict_slow(i) := real_other_bank_conflict_reg || real_rr_bank_conflict_reg
733
734    // get result in s1
735    io.disable_ld_fast_wakeup(i) := wr_bank_conflict(i) || rrl_bank_conflict_intend(i) ||
736      (if (i == 0) 0.B else (0 until i).map(rr_bank_conflict(_)(i)).reduce(_ || _))
737  })
738  XSPerfAccumulate("data_array_multi_read", perf_multi_read)
739  (1 until LoadPipelineWidth).foreach(y => (0 until y).foreach(x =>
740    XSPerfAccumulate(s"data_array_rr_bank_conflict_${x}_${y}", rr_bank_conflict(x)(y))
741  ))
742  (0 until LoadPipelineWidth).foreach(i => {
743    XSPerfAccumulate(s"data_array_rrl_bank_conflict_${i}", rrl_bank_conflict(i))
744    XSPerfAccumulate(s"data_array_rw_bank_conflict_${i}", wr_bank_conflict(i))
745    XSPerfAccumulate(s"data_array_read_${i}", io.read(i).valid)
746  })
747  XSPerfAccumulate("data_array_access_total", PopCount(io.read.map(_.valid)))
748  XSPerfAccumulate("data_array_read_line", io.readline.valid)
749  XSPerfAccumulate("data_array_write", io.write.valid)
750
751  val bank_result = Wire(Vec(DCacheSetDiv, Vec(DCacheBanks, Vec(DCacheWays, new L1BankedDataReadResult()))))
752  val bank_result_delayed = Wire(Vec(DCacheSetDiv, Vec(DCacheBanks, Vec(DCacheWays, new L1BankedDataReadResult()))))
753  val read_bank_error_delayed = Wire(Vec(DCacheSetDiv, Vec(DCacheBanks, Vec(DCacheWays, Bool()))))
754
755  for (div_index <- 0 until DCacheSetDiv) {
756    for (bank_index <- 0 until DCacheBanks) {
757      //     Set Addr & Read Way Mask
758      //
759      //    Pipe 0   ....  Pipe (n-1)
760      //      +      ....     +
761      //      |      ....     |
762      // +----+---------------+-----+
763      //  X                        X
764      //   X                      +------+ Bank Addr Match
765      //    +---------+----------+
766      //              |
767      //     +--------+--------+
768      //     |    Data Bank    |
769      //     +-----------------+
770      val bank_addr_matchs = WireInit(VecInit(List.tabulate(LoadPipelineWidth)(i => {
771        io.read(i).valid && div_addrs(i) === div_index.U && (bank_addrs(i)(0) === bank_index.U || bank_addrs(i)(1) === bank_index.U && io.is128Req(i)) &&
772          !rr_bank_conflict_oldest(i)
773      })))
774      val readline_match = Wire(Bool())
775      if (ReduceReadlineConflict) {
776        readline_match := io.readline.valid && io.readline.bits.rmask(bank_index) && line_div_addr === div_index.U
777      } else {
778        readline_match := io.readline.valid && line_div_addr === div_index.U
779      }
780
781      val bank_set_addr = Mux(readline_match,
782        line_set_addr,
783        PriorityMux(Seq.tabulate(LoadPipelineWidth)(i => bank_addr_matchs(i) -> set_addrs(i)))
784      )
785      val read_enable = bank_addr_matchs.asUInt.orR || readline_match
786
787      // read raw data
788      val data_bank = data_banks(div_index)(bank_index)
789      data_bank.io.r.en := read_enable
790      data_bank.io.r.addr := bank_set_addr
791      for (way_index <- 0 until DCacheWays) {
792        bank_result(div_index)(bank_index)(way_index).ecc := getECCFromEncWord(data_bank.io.r.data(way_index))
793        bank_result(div_index)(bank_index)(way_index).raw_data := getDataFromEncWord(data_bank.io.r.data(way_index))
794
795        if (EnableDataEcc) {
796          val ecc_data = bank_result(div_index)(bank_index)(way_index).asECCData()
797          val ecc_data_delayed = RegEnable(ecc_data, RegNext(read_enable))
798          bank_result(div_index)(bank_index)(way_index).error_delayed := dcacheParameters.dataCode.decode(ecc_data_delayed).error
799          read_bank_error_delayed(div_index)(bank_index)(way_index) := bank_result(div_index)(bank_index)(way_index).error_delayed
800        } else {
801          bank_result(div_index)(bank_index)(way_index).error_delayed := false.B
802          read_bank_error_delayed(div_index)(bank_index)(way_index) := false.B
803        }
804        bank_result_delayed(div_index)(bank_index)(way_index) := RegEnable(bank_result(div_index)(bank_index)(way_index), RegNext(read_enable))
805      }
806    }
807  }
808
809  val data_read_oh = WireInit(VecInit(Seq.fill(DCacheSetDiv)(0.U(XLEN.W))))
810  for (div_index <- 0 until DCacheSetDiv){
811    val temp = WireInit(VecInit(Seq.fill(DCacheBanks)(0.U(XLEN.W))))
812    for (bank_index <- 0 until DCacheBanks) {
813      temp(bank_index) := PopCount(Fill(DCacheWays, data_banks(div_index)(bank_index).io.r.en.asUInt))
814    }
815    data_read_oh(div_index) := temp.reduce(_ + _)
816  }
817  XSPerfAccumulate("data_read_counter", data_read_oh.foldLeft(0.U)(_ + _))
818
819  (0 until LoadPipelineWidth).map(i => {
820    // 1 cycle after read fire(load s2)
821    val r_read_fire = RegNext(io.read(i).fire)
822    val r_div_addr = RegEnable(div_addrs(i), io.read(i).fire)
823    val r_bank_addr = RegEnable(bank_addrs(i), io.read(i).fire)
824    val r_way_addr = RegEnable(OHToUInt(way_en(i)), io.read(i).fire)
825    // 2 cycles after read fire(load s3)
826    val rr_read_fire = RegNext(r_read_fire)
827    val rr_div_addr = RegEnable(RegEnable(div_addrs(i), io.read(i).fire), r_read_fire)
828    val rr_bank_addr = RegEnable(RegEnable(bank_addrs(i), io.read(i).fire), r_read_fire)
829    val rr_way_addr = RegEnable(RegEnable(OHToUInt(way_en(i)), io.read(i).fire), r_read_fire)
830    (0 until VLEN/DCacheSRAMRowBits).map( j =>{
831      io.read_resp(i)(j)          := bank_result(r_div_addr)(r_bank_addr(j))(r_way_addr)
832      // error detection
833      io.read_error_delayed(i)(j) := rr_read_fire && read_bank_error_delayed(rr_div_addr)(rr_bank_addr(j))(rr_way_addr) && !RegNext(io.bank_conflict_slow(i))
834    })
835  })
836
837  // read result: expose banked read result
838  val readline_error_delayed = Wire(Vec(DCacheBanks, Bool()))
839  val readline_r_way_addr = RegEnable(OHToUInt(io.readline.bits.way_en), io.readline.valid)
840  val readline_rr_way_addr = RegEnable(readline_r_way_addr, RegNext(io.readline.valid))
841  val readline_r_div_addr = RegEnable(line_div_addr, io.readline.valid)
842  val readline_rr_div_addr = RegEnable(readline_r_div_addr, RegNext(io.readline.valid))
843  (0 until DCacheBanks).map(i => {
844    io.readline_resp(i) := bank_result(readline_r_div_addr)(i)(readline_r_way_addr)
845    readline_error_delayed(i) := bank_result(readline_rr_div_addr)(i)(readline_rr_way_addr).error_delayed
846  })
847  io.readline_error_delayed := RegNext(RegNext(io.readline.fire)) && readline_error_delayed.asUInt.orR
848
849  // write data_banks & ecc_banks
850  for (div_index <- 0 until DCacheSetDiv) {
851    for (bank_index <- 0 until DCacheBanks) {
852      // data write
853      val wen_reg = write_bank_mask_reg(bank_index) &&
854        write_valid_dup_reg(bank_index) &&
855        write_div_addr_dup_reg(bank_index) === div_index.U && RegNext(io.write.valid)
856      val write_ecc_reg = RegEnable(getECCFromEncWord(cacheParams.dataCode.encode(io.write.bits.data(bank_index))), io.write.valid)
857      val data_bank = data_banks(div_index)(bank_index)
858      data_bank.io.w.en := wen_reg
859      data_bank.io.w.way_en := write_wayen_dup_reg(bank_index)
860      data_bank.io.w.addr := write_set_addr_dup_reg(bank_index)
861      data_bank.io.w.data := asECCData(write_ecc_reg, write_data_reg(bank_index))
862    }
863  }
864
865  io.cacheOp.resp.valid := false.B
866  io.cacheOp.resp.bits  := DontCare
867
868  val tableName = "BankConflict" + p(XSCoreParamsKey).HartId.toString
869  val siteName = "BankedDataArray" + p(XSCoreParamsKey).HartId.toString
870  val bankConflictTable = ChiselDB.createTable(tableName, new BankConflictDB)
871  val bankConflictData = Wire(new BankConflictDB)
872  for (i <- 0 until LoadPipelineWidth) {
873    bankConflictData.set_index(i) := set_addrs(i)
874    bankConflictData.addr(i) := io.read(i).bits.addr
875  }
876
877  // FIXME: rr_bank_conflict(0)(1) no generalization
878  when(rr_bank_conflict(0)(1)) {
879    (0 until (VLEN/DCacheSRAMRowBits)).map(i => {
880      bankConflictData.bank_index(i) := bank_addrs(0)(i)
881    })
882    bankConflictData.way_index := OHToUInt(way_en(0))
883    bankConflictData.fake_rr_bank_conflict := set_addrs(0) === set_addrs(1) && div_addrs(0) === div_addrs(1)
884  }.otherwise {
885    (0 until (VLEN/DCacheSRAMRowBits)).map(i => {
886      bankConflictData.bank_index(i) := 0.U
887    })
888    bankConflictData.way_index := 0.U
889    bankConflictData.fake_rr_bank_conflict := false.B
890  }
891
892  val isWriteBankConflictTable = Constantin.createRecord(s"isWriteBankConflictTable${p(XSCoreParamsKey).HartId}")
893  bankConflictTable.log(
894    data = bankConflictData,
895    en = isWriteBankConflictTable.orR && rr_bank_conflict(0)(1),
896    site = siteName,
897    clock = clock,
898    reset = reset
899  )
900
901  (1 until LoadPipelineWidth).foreach(y => (0 until y).foreach(x =>
902    XSPerfAccumulate(s"data_array_fake_rr_bank_conflict_${x}_${y}", rr_bank_conflict(x)(y) && set_addrs(x) === set_addrs(y) && div_addrs(x) === div_addrs(y))
903  ))
904
905  if (backendParams.debugEn){
906    load_req_with_bank_conflict.map(dontTouch(_))
907    dontTouch(bank_result)
908    dontTouch(read_bank_error_delayed)
909  }
910}
911