xref: /XiangShan/src/main/scala/xiangshan/cache/dcache/data/BankedDataArray.scala (revision dc4fac130426dbec49b49d778b9105d79b4a8eab)
1/***************************************************************************************
2* Copyright (c) 2024 Beijing Institute of Open Source Chip (BOSC)
3* Copyright (c) 2020-2024 Institute of Computing Technology, Chinese Academy of Sciences
4* Copyright (c) 2020-2021 Peng Cheng Laboratory
5*
6* XiangShan is licensed under Mulan PSL v2.
7* You can use this software according to the terms and conditions of the Mulan PSL v2.
8* You may obtain a copy of Mulan PSL v2 at:
9*          http://license.coscl.org.cn/MulanPSL2
10*
11* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
12* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
13* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
14*
15* See the Mulan PSL v2 for more details.
16*
17*
18* Acknowledgement
19*
20* This implementation is inspired by several key papers:
21* [1] Gurindar S. Sohi, and Manoj Franklin. "[High-bandwidth data memory systems for superscalar processors.]
22* (https://doi.org/10.1145/106972.106980)" 4th International Conference on Architectural Support for Programming
23* Languages and Operating Systems (ASPLOS). 1991.
24***************************************************************************************/
25
26package xiangshan.cache
27
28import org.chipsalliance.cde.config.Parameters
29import chisel3._
30import utils._
31import utility._
32import chisel3.util._
33import freechips.rocketchip.tilelink.{ClientMetadata, TLClientParameters, TLEdgeOut}
34import xiangshan.{L1CacheErrorInfo, XSCoreParamsKey}
35
36import scala.math.max
37
38class BankConflictDB(implicit p: Parameters) extends DCacheBundle{
39  val addr = Vec(LoadPipelineWidth, Bits(PAddrBits.W))
40  val set_index = Vec(LoadPipelineWidth, UInt((DCacheAboveIndexOffset - DCacheSetOffset).W))
41  val bank_index = Vec(VLEN/DCacheSRAMRowBits, UInt((DCacheSetOffset - DCacheBankOffset).W))
42  val way_index = UInt(wayBits.W)
43  val fake_rr_bank_conflict = Bool()
44}
45
46class L1BankedDataReadReq(implicit p: Parameters) extends DCacheBundle
47{
48  val way_en = Bits(DCacheWays.W)
49  val addr = Bits(PAddrBits.W)
50}
51
52class L1BankedDataReadReqWithMask(implicit p: Parameters) extends DCacheBundle
53{
54  val way_en = Bits(DCacheWays.W)
55  val addr = Bits(PAddrBits.W)
56  val bankMask = Bits(DCacheBanks.W)
57  val kill = Bool()
58}
59
60class L1BankedDataReadLineReq(implicit p: Parameters) extends L1BankedDataReadReq
61{
62  val rmask = Bits(DCacheBanks.W)
63}
64
65// Now, we can write a cache-block in a single cycle
66class L1BankedDataWriteReq(implicit p: Parameters) extends L1BankedDataReadReq
67{
68  val wmask = Bits(DCacheBanks.W)
69  val data = Vec(DCacheBanks, Bits(DCacheSRAMRowBits.W))
70}
71
72// cache-block write request without data
73class L1BankedDataWriteReqCtrl(implicit p: Parameters) extends L1BankedDataReadReq
74
75class L1BankedDataReadResult(implicit p: Parameters) extends DCacheBundle
76{
77  // you can choose which bank to read to save power
78  val ecc = Bits(dataECCBits.W)
79  val raw_data = Bits(DCacheSRAMRowBits.W)
80  val error_delayed = Bool() // 1 cycle later than data resp
81
82  def asECCData() = {
83    Cat(ecc, raw_data)
84  }
85}
86
87class DataSRAMBankWriteReq(implicit p: Parameters) extends DCacheBundle {
88  val en = Bool()
89  val addr = UInt()
90  val way_en = UInt(DCacheWays.W)
91  val data = UInt(encDataBits.W)
92}
93
94// wrap a sram
95class DataSRAM(bankIdx: Int, wayIdx: Int)(implicit p: Parameters) extends DCacheModule {
96  val io = IO(new Bundle() {
97    val w = new Bundle() {
98      val en = Input(Bool())
99      val addr = Input(UInt())
100      val data = Input(UInt(encDataBits.W))
101    }
102
103    val r = new Bundle() {
104      val en = Input(Bool())
105      val addr = Input(UInt())
106      val data = Output(UInt(encDataBits.W))
107    }
108  })
109
110  // data sram
111  val data_sram = Module(new SRAMTemplate(
112    Bits(encDataBits.W),
113    set = DCacheSets / DCacheSetDiv,
114    way = 1,
115    shouldReset = false,
116    holdRead = false,
117    singlePort = true
118  ))
119
120  data_sram.io.w.req.valid := io.w.en
121  data_sram.io.w.req.bits.apply(
122    setIdx = io.w.addr,
123    data = io.w.data,
124    waymask = 1.U
125  )
126  data_sram.io.r.req.valid := io.r.en
127  data_sram.io.r.req.bits.apply(setIdx = io.r.addr)
128  io.r.data := data_sram.io.r.resp.data(0)
129  XSPerfAccumulate("part_data_read_counter", data_sram.io.r.req.valid)
130
131  def dump_r() = {
132    when(RegNext(io.r.en)) {
133      XSDebug("bank read set %x bank %x way %x data %x\n",
134        RegEnable(io.r.addr, io.r.en),
135        bankIdx.U,
136        wayIdx.U,
137        io.r.data
138      )
139    }
140  }
141
142  def dump_w() = {
143    when(io.w.en) {
144      XSDebug("bank write set %x bank %x way %x data %x\n",
145        io.w.addr,
146        bankIdx.U,
147        wayIdx.U,
148        io.w.data
149      )
150    }
151  }
152
153  def dump() = {
154    dump_w()
155    dump_r()
156  }
157}
158
159// wrap data rows of 8 ways
160class DataSRAMBank(index: Int)(implicit p: Parameters) extends DCacheModule {
161  val io = IO(new Bundle() {
162    val w = Input(new DataSRAMBankWriteReq)
163
164    val r = new Bundle() {
165      val en = Input(Bool())
166      val addr = Input(UInt())
167      val data = Output(Vec(DCacheWays, UInt(encDataBits.W)))
168    }
169  })
170
171  assert(RegNext(!io.w.en || PopCount(io.w.way_en) <= 1.U))
172
173  // external controls do not read and write at the same time
174  val w_info = io.w
175  // val rw_bypass = RegNext(io.w.addr === io.r.addr && io.w.way_en === io.r.way_en && io.w.en)
176
177  // multiway data bank
178  val data_bank = Seq.fill(DCacheWays) {
179    Module(new SRAMTemplate(
180      Bits(encDataBits.W),
181      set = DCacheSets / DCacheSetDiv,
182      way = 1,
183      shouldReset = false,
184      holdRead = false,
185      singlePort = true
186    ))
187  }
188
189  for (w <- 0 until DCacheWays) {
190    val wen = w_info.en && w_info.way_en(w)
191    data_bank(w).io.w.req.valid := wen
192    data_bank(w).io.w.req.bits.apply(
193      setIdx = w_info.addr,
194      data = w_info.data,
195      waymask = 1.U
196    )
197    data_bank(w).io.r.req.valid := io.r.en
198    data_bank(w).io.r.req.bits.apply(setIdx = io.r.addr)
199    data_bank(w).clock := ClockGate(false.B, io.r.en | (io.w.en & io.w.way_en(w)), clock)
200  }
201  XSPerfAccumulate("part_data_read_counter", PopCount(Cat(data_bank.map(_.io.r.req.valid))))
202
203  io.r.data := data_bank.map(_.io.r.resp.data(0))
204
205  def dump_r() = {
206    when(RegNext(io.r.en)) {
207      XSDebug("bank read addr %x data %x\n",
208        RegEnable(io.r.addr, io.r.en),
209        io.r.data.asUInt
210      )
211    }
212  }
213
214  def dump_w() = {
215    when(io.w.en) {
216      XSDebug("bank write addr %x way_en %x data %x\n",
217        io.w.addr,
218        io.w.way_en,
219        io.w.data
220      )
221    }
222  }
223
224  def dump() = {
225    dump_w()
226    dump_r()
227  }
228}
229
230case object HasDataEccParam
231
232//                     Banked DCache Data
233// -----------------------------------------------------------------
234// | Bank0 | Bank1 | Bank2 | Bank3 | Bank4 | Bank5 | Bank6 | Bank7 |
235// -----------------------------------------------------------------
236// | Way0  | Way0  | Way0  | Way0  | Way0  | Way0  | Way0  | Way0  |
237// | Way1  | Way1  | Way1  | Way1  | Way1  | Way1  | Way1  | Way1  |
238// | ....  | ....  | ....  | ....  | ....  | ....  | ....  | ....  |
239// -----------------------------------------------------------------
240abstract class AbstractBankedDataArray(implicit p: Parameters) extends DCacheModule
241{
242  val DataEccParam = if(EnableDataEcc) Some(HasDataEccParam) else None
243  val ReadlinePortErrorIndex = LoadPipelineWidth
244  val io = IO(new DCacheBundle {
245    // load pipeline read word req
246    val read = Vec(LoadPipelineWidth, Flipped(DecoupledIO(new L1BankedDataReadReqWithMask)))
247    val is128Req = Input(Vec(LoadPipelineWidth, Bool()))
248    // main pipeline read / write line req
249    val readline_intend = Input(Bool())
250    val readline = Flipped(DecoupledIO(new L1BankedDataReadLineReq))
251    val write = Flipped(DecoupledIO(new L1BankedDataWriteReq))
252    val write_dup = Vec(DCacheBanks, Flipped(Decoupled(new L1BankedDataWriteReqCtrl)))
253    // data for readline and loadpipe
254    val readline_resp = Output(Vec(DCacheBanks, new L1BankedDataReadResult()))
255    val readline_error_delayed = Output(Bool())
256    val read_resp          = Output(Vec(LoadPipelineWidth, Vec(VLEN/DCacheSRAMRowBits, new L1BankedDataReadResult())))
257    val read_error_delayed = Output(Vec(LoadPipelineWidth,Vec(VLEN/DCacheSRAMRowBits, Bool())))
258    // val nacks = Output(Vec(LoadPipelineWidth, Bool()))
259    // val errors = Output(Vec(LoadPipelineWidth + 1, ValidIO(new L1CacheErrorInfo))) // read ports + readline port
260    // when bank_conflict, read (1) port should be ignored
261    val bank_conflict_slow = Output(Vec(LoadPipelineWidth, Bool()))
262    val disable_ld_fast_wakeup = Output(Vec(LoadPipelineWidth, Bool()))
263    // customized cache op port
264    val cacheOp = Flipped(new L1CacheInnerOpIO)
265    val cacheOp_req_dup = Vec(DCacheDupNum, Flipped(Valid(new CacheCtrlReqInfo)))
266    val cacheOp_req_bits_opCode_dup = Input(Vec(DCacheDupNum, UInt(XLEN.W)))
267  })
268
269  def pipeMap[T <: Data](f: Int => T) = VecInit((0 until LoadPipelineWidth).map(f))
270
271  def getECCFromEncWord(encWord: UInt) = {
272    if (EnableDataEcc) {
273      require(encWord.getWidth == encDataBits, s"encDataBits=$encDataBits != encDataBits=$encDataBits!")
274      encWord(encDataBits-1, DCacheSRAMRowBits)
275    } else {
276      0.U
277    }
278  }
279
280  def getDataFromEncWord(encWord: UInt) = {
281    encWord(DCacheSRAMRowBits-1, 0)
282  }
283
284  def asECCData(ecc: UInt, data: UInt) = {
285    if (EnableDataEcc) {
286      Cat(ecc, data)
287    } else {
288      data
289    }
290  }
291
292  def dumpRead = {
293    (0 until LoadPipelineWidth) map { w =>
294      when(io.read(w).valid) {
295        XSDebug(s"DataArray Read channel: $w valid way_en: %x addr: %x\n",
296          io.read(w).bits.way_en, io.read(w).bits.addr)
297      }
298    }
299    when(io.readline.valid) {
300      XSDebug(s"DataArray Read Line, valid way_en: %x addr: %x rmask %x\n",
301        io.readline.bits.way_en, io.readline.bits.addr, io.readline.bits.rmask)
302    }
303  }
304
305  def dumpWrite = {
306    when(io.write.valid) {
307      XSDebug(s"DataArray Write valid way_en: %x addr: %x\n",
308        io.write.bits.way_en, io.write.bits.addr)
309
310      (0 until DCacheBanks) map { r =>
311        XSDebug(s"cycle: $r data: %x wmask: %x\n",
312          io.write.bits.data(r), io.write.bits.wmask(r))
313      }
314    }
315  }
316
317  def dumpResp = {
318    XSDebug(s"DataArray ReadeResp channel:\n")
319    (0 until LoadPipelineWidth) map { r =>
320      XSDebug(s"cycle: $r data: %x\n", Mux(io.is128Req(r),
321        Cat(io.read_resp(r)(1).raw_data,io.read_resp(r)(0).raw_data),
322        io.read_resp(r)(0).raw_data))
323    }
324  }
325
326  def dump() = {
327    dumpRead
328    dumpWrite
329    dumpResp
330  }
331}
332
333// the smallest access unit is sram
334class SramedDataArray(implicit p: Parameters) extends AbstractBankedDataArray {
335  println("  DCacheType: SramedDataArray")
336  val ReduceReadlineConflict = false
337
338  io.write.ready := true.B
339  io.write_dup.foreach(_.ready := true.B)
340
341  val data_banks = List.tabulate(DCacheSetDiv)( k => List.tabulate(DCacheBanks)(i => List.tabulate(DCacheWays)(j => Module(new DataSRAM(i,j)))))
342  data_banks.map(_.map(_.map(_.dump())))
343
344  val way_en = Wire(Vec(LoadPipelineWidth, io.read(0).bits.way_en.cloneType))
345  val set_addrs = Wire(Vec(LoadPipelineWidth, UInt()))
346  val div_addrs = Wire(Vec(LoadPipelineWidth, UInt()))
347  val bank_addrs = Wire(Vec(LoadPipelineWidth, Vec(VLEN/DCacheSRAMRowBits, UInt())))
348
349  val line_set_addr = addr_to_dcache_div_set(io.readline.bits.addr)
350  val line_div_addr = addr_to_dcache_div(io.readline.bits.addr)
351  // when WPU is enabled, line_way_en is all enabled when read data
352  val line_way_en = Fill(DCacheWays, 1.U) // val line_way_en = io.readline.bits.way_en
353  val line_way_en_reg = RegEnable(io.readline.bits.way_en, 0.U(DCacheWays.W),io.readline.valid)
354
355  val write_bank_mask_reg = RegEnable(io.write.bits.wmask, 0.U(DCacheBanks.W), io.write.valid)
356  val write_data_reg = RegEnable(io.write.bits.data, io.write.valid)
357  val write_valid_reg = RegNext(io.write.valid)
358  val write_valid_dup_reg = io.write_dup.map(x => RegNext(x.valid))
359  val write_wayen_dup_reg = io.write_dup.map(x => RegEnable(x.bits.way_en, 0.U(DCacheWays.W), x.valid))
360  val write_set_addr_dup_reg = io.write_dup.map(x => RegEnable(addr_to_dcache_div_set(x.bits.addr), x.valid))
361  val write_div_addr_dup_reg = io.write_dup.map(x => RegEnable(addr_to_dcache_div(x.bits.addr), x.valid))
362
363  // read data_banks and ecc_banks
364  // for single port SRAM, do not allow read and write in the same cycle
365  val rrhazard = false.B // io.readline.valid
366  (0 until LoadPipelineWidth).map(rport_index => {
367    div_addrs(rport_index) := addr_to_dcache_div(io.read(rport_index).bits.addr)
368    set_addrs(rport_index) := addr_to_dcache_div_set(io.read(rport_index).bits.addr)
369    bank_addrs(rport_index)(0) := addr_to_dcache_bank(io.read(rport_index).bits.addr)
370    bank_addrs(rport_index)(1) := bank_addrs(rport_index)(0) + 1.U
371
372    // use way_en to select a way after data read out
373    assert(!(RegNext(io.read(rport_index).fire && PopCount(io.read(rport_index).bits.way_en) > 1.U)))
374    way_en(rport_index) := io.read(rport_index).bits.way_en
375  })
376
377  // read conflict
378  val rr_bank_conflict = Seq.tabulate(LoadPipelineWidth)(x => Seq.tabulate(LoadPipelineWidth)(y =>
379    io.read(x).valid && io.read(y).valid &&
380    div_addrs(x) === div_addrs(y) &&
381    (io.read(x).bits.bankMask & io.read(y).bits.bankMask) =/= 0.U &&
382    io.read(x).bits.way_en === io.read(y).bits.way_en &&
383    set_addrs(x) =/= set_addrs(y)
384  ))
385  val rrl_bank_conflict = Wire(Vec(LoadPipelineWidth, Bool()))
386  val rrl_bank_conflict_intend = Wire(Vec(LoadPipelineWidth, Bool()))
387  (0 until LoadPipelineWidth).foreach { i =>
388    val judge = if (ReduceReadlineConflict) io.read(i).valid && (io.readline.bits.rmask & io.read(i).bits.bankMask) =/= 0.U && line_div_addr === div_addrs(i) && line_set_addr =/= set_addrs(i)
389                else io.read(i).valid && line_div_addr === div_addrs(i) && line_set_addr =/= set_addrs(i)
390    rrl_bank_conflict(i) := judge && io.readline.valid
391    rrl_bank_conflict_intend(i) := judge && io.readline_intend
392  }
393  val wr_bank_conflict = Seq.tabulate(LoadPipelineWidth)(x =>
394    io.read(x).valid && write_valid_reg &&
395    div_addrs(x) === write_div_addr_dup_reg.head &&
396    way_en(x) === write_wayen_dup_reg.head &&
397    (write_bank_mask_reg(bank_addrs(x)(0)) || write_bank_mask_reg(bank_addrs(x)(1)) && io.is128Req(x))
398  )
399  val wrl_bank_conflict = io.readline.valid && write_valid_reg && line_div_addr === write_div_addr_dup_reg.head
400  // ready
401  io.readline.ready := !(wrl_bank_conflict)
402  io.read.zipWithIndex.map { case (x, i) => x.ready := !(wr_bank_conflict(i) || rrhazard) }
403
404  val perf_multi_read = PopCount(io.read.map(_.valid)) >= 2.U
405  val bank_conflict_fast = Wire(Vec(LoadPipelineWidth, Bool()))
406  (0 until LoadPipelineWidth).foreach(i => {
407    bank_conflict_fast(i) := wr_bank_conflict(i) || rrl_bank_conflict(i) ||
408      (if (i == 0) 0.B else (0 until i).map(rr_bank_conflict(_)(i)).reduce(_ || _))
409    io.bank_conflict_slow(i) := RegNext(bank_conflict_fast(i))
410    io.disable_ld_fast_wakeup(i) := wr_bank_conflict(i) || rrl_bank_conflict_intend(i) ||
411      (if (i == 0) 0.B else (0 until i).map(rr_bank_conflict(_)(i)).reduce(_ || _))
412  })
413  XSPerfAccumulate("data_array_multi_read", perf_multi_read)
414  (1 until LoadPipelineWidth).foreach(y => (0 until y).foreach(x =>
415    XSPerfAccumulate(s"data_array_rr_bank_conflict_${x}_${y}", rr_bank_conflict(x)(y))
416  ))
417  (0 until LoadPipelineWidth).foreach(i => {
418    XSPerfAccumulate(s"data_array_rrl_bank_conflict_${i}", rrl_bank_conflict(i))
419    XSPerfAccumulate(s"data_array_rw_bank_conflict_${i}", wr_bank_conflict(i))
420    XSPerfAccumulate(s"data_array_read_${i}", io.read(i).valid)
421  })
422  XSPerfAccumulate("data_array_access_total", PopCount(io.read.map(_.valid)))
423  XSPerfAccumulate("data_array_read_line", io.readline.valid)
424  XSPerfAccumulate("data_array_write", io.write.valid)
425
426  val read_result = Wire(Vec(DCacheSetDiv, Vec(DCacheBanks, Vec(DCacheWays,new L1BankedDataReadResult()))))
427  val read_result_delayed = Wire(Vec(DCacheSetDiv, Vec(DCacheBanks, Vec(DCacheWays,new L1BankedDataReadResult()))))
428  val read_error_delayed_result = Wire(Vec(DCacheSetDiv, Vec(DCacheBanks, Vec(DCacheWays, Bool()))))
429  dontTouch(read_result)
430  dontTouch(read_error_delayed_result)
431  for (div_index <- 0 until DCacheSetDiv){
432    for (bank_index <- 0 until DCacheBanks) {
433      for (way_index <- 0 until DCacheWays) {
434        //     Set Addr & Read Way Mask
435        //
436        //    Pipe 0   ....  Pipe (n-1)
437        //      +      ....     +
438        //      |      ....     |
439        // +----+---------------+-----+
440        //  X                        X
441        //   X                      +------+ Bank Addr Match
442        //    +---------+----------+
443        //              |
444        //     +--------+--------+
445        //     |    Data Bank    |
446        //     +-----------------+
447        val loadpipe_en = WireInit(VecInit(List.tabulate(LoadPipelineWidth)(i => {
448          io.read(i).valid && div_addrs(i) === div_index.U && (bank_addrs(i)(0) === bank_index.U || bank_addrs(i)(1) === bank_index.U && io.is128Req(i)) && way_en(i)(way_index)
449        })))
450        val readline_en = Wire(Bool())
451        if (ReduceReadlineConflict) {
452          readline_en := io.readline.valid && io.readline.bits.rmask(bank_index) && line_way_en(way_index) && div_index.U === line_div_addr
453        } else {
454          readline_en := io.readline.valid && line_way_en(way_index) && div_index.U === line_div_addr
455        }
456        val sram_set_addr = Mux(readline_en,
457          addr_to_dcache_div_set(io.readline.bits.addr),
458          PriorityMux(Seq.tabulate(LoadPipelineWidth)(i => loadpipe_en(i) -> set_addrs(i)))
459        )
460        val read_en = loadpipe_en.asUInt.orR || readline_en
461        // read raw data
462        val data_bank = data_banks(div_index)(bank_index)(way_index)
463        data_bank.io.r.en := read_en
464        data_bank.io.r.addr := sram_set_addr
465
466        read_result(div_index)(bank_index)(way_index).ecc := getECCFromEncWord(data_bank.io.r.data)
467        read_result(div_index)(bank_index)(way_index).raw_data := getDataFromEncWord(data_bank.io.r.data)
468
469        if (EnableDataEcc) {
470          val ecc_data = read_result(div_index)(bank_index)(way_index).asECCData()
471          val ecc_data_delayed = RegEnable(ecc_data, RegNext(read_en))
472          read_result(div_index)(bank_index)(way_index).error_delayed := dcacheParameters.dataCode.decode(ecc_data_delayed).error
473          read_error_delayed_result(div_index)(bank_index)(way_index) := read_result(div_index)(bank_index)(way_index).error_delayed
474        } else {
475          read_result(div_index)(bank_index)(way_index).error_delayed := false.B
476          read_error_delayed_result(div_index)(bank_index)(way_index) := false.B
477        }
478
479        read_result_delayed(div_index)(bank_index)(way_index) := RegEnable(read_result(div_index)(bank_index)(way_index), RegNext(read_en))
480      }
481    }
482  }
483
484  val data_read_oh = WireInit(VecInit(Seq.fill(DCacheSetDiv * DCacheBanks * DCacheWays)(0.U(1.W))))
485  for(div_index <- 0 until DCacheSetDiv){
486    for (bank_index <- 0 until DCacheBanks) {
487      for (way_index <- 0 until DCacheWays) {
488        data_read_oh(div_index *  DCacheBanks * DCacheWays + bank_index * DCacheWays + way_index) := data_banks(div_index)(bank_index)(way_index).io.r.en
489      }
490    }
491  }
492  XSPerfAccumulate("data_read_counter", PopCount(Cat(data_read_oh)))
493
494  // read result: expose banked read result
495  // TODO: clock gate
496  (0 until LoadPipelineWidth).map(i => {
497    // io.read_resp(i) := read_result(RegNext(bank_addrs(i)))(RegNext(OHToUInt(way_en(i))))
498    val r_read_fire = RegNext(io.read(i).fire)
499    val r_div_addr  = RegEnable(div_addrs(i), io.read(i).fire)
500    val r_bank_addr = RegEnable(bank_addrs(i), io.read(i).fire)
501    val r_way_addr  = RegNext(OHToUInt(way_en(i)))
502    val rr_read_fire = RegNext(RegNext(io.read(i).fire))
503    val rr_div_addr = RegEnable(RegEnable(div_addrs(i), io.read(i).fire), r_read_fire)
504    val rr_bank_addr = RegEnable(RegEnable(bank_addrs(i), io.read(i).fire), r_read_fire)
505    val rr_way_addr = RegEnable(RegEnable(OHToUInt(way_en(i)), io.read(i).fire), r_read_fire)
506    (0 until VLEN/DCacheSRAMRowBits).map( j =>{
507      io.read_resp(i)(j) := read_result(r_div_addr)(r_bank_addr(j))(r_way_addr)
508      // error detection
509      // normal read ports
510      io.read_error_delayed(i)(j) := rr_read_fire && read_error_delayed_result(rr_div_addr)(rr_bank_addr(j))(rr_way_addr) && !RegNext(io.bank_conflict_slow(i))
511    })
512  })
513
514  // readline port
515  (0 until DCacheBanks).map(i => {
516    io.readline_resp(i) := read_result(RegEnable(line_div_addr, io.readline.valid))(i)(RegEnable(OHToUInt(io.readline.bits.way_en),io.readline.valid))
517  })
518  io.readline_error_delayed := RegNext(RegNext(io.readline.fire)) &&
519    VecInit((0 until DCacheBanks).map(i => io.readline_resp(i).error_delayed)).asUInt.orR
520
521  // write data_banks & ecc_banks
522  for (div_index <- 0 until DCacheSetDiv) {
523    for (bank_index <- 0 until DCacheBanks) {
524      for (way_index <- 0 until DCacheWays) {
525        // data write
526        val wen_reg = write_bank_mask_reg(bank_index) &&
527          write_valid_dup_reg(bank_index) &&
528          write_div_addr_dup_reg(bank_index) === div_index.U &&
529          write_wayen_dup_reg(bank_index)(way_index)
530        val write_ecc_reg = RegEnable(getECCFromEncWord(cacheParams.dataCode.encode(io.write.bits.data(bank_index))), io.write.valid)
531        val data_bank = data_banks(div_index)(bank_index)(way_index)
532        data_bank.io.w.en := wen_reg
533        data_bank.io.w.addr := write_set_addr_dup_reg(bank_index)
534        data_bank.io.w.data := asECCData(write_ecc_reg, write_data_reg(bank_index))
535      }
536    }
537  }
538
539  io.cacheOp.resp.valid := false.B
540  io.cacheOp.resp.bits  := DontCare
541
542  val tableName =  "BankConflict" + p(XSCoreParamsKey).HartId.toString
543  val siteName = "BankedDataArray" + p(XSCoreParamsKey).HartId.toString
544  val bankConflictTable = ChiselDB.createTable(tableName, new BankConflictDB)
545  val bankConflictData = Wire(new BankConflictDB)
546  for (i <- 0 until LoadPipelineWidth) {
547    bankConflictData.set_index(i) := set_addrs(i)
548    bankConflictData.addr(i) := io.read(i).bits.addr
549  }
550
551  // FIXME: rr_bank_conflict(0)(1) no generalization
552  when(rr_bank_conflict(0)(1)) {
553    (0 until (VLEN/DCacheSRAMRowBits)).map(i => {
554      bankConflictData.bank_index(i) := bank_addrs(0)(i)
555    })
556    bankConflictData.way_index  := OHToUInt(way_en(0))
557    bankConflictData.fake_rr_bank_conflict := set_addrs(0) === set_addrs(1) && div_addrs(0) === div_addrs(1)
558  }.otherwise {
559    (0 until (VLEN/DCacheSRAMRowBits)).map(i => {
560      bankConflictData.bank_index(i) := 0.U
561    })
562    bankConflictData.way_index := 0.U
563    bankConflictData.fake_rr_bank_conflict := false.B
564  }
565
566  val isWriteBankConflictTable = Constantin.createRecord(s"isWriteBankConflictTable${p(XSCoreParamsKey).HartId}")
567  bankConflictTable.log(
568    data = bankConflictData,
569    en = isWriteBankConflictTable.orR && rr_bank_conflict(0)(1),
570    site = siteName,
571    clock = clock,
572    reset = reset
573  )
574
575  (1 until LoadPipelineWidth).foreach(y => (0 until y).foreach(x =>
576    XSPerfAccumulate(s"data_array_fake_rr_bank_conflict_${x}_${y}", rr_bank_conflict(x)(y) && set_addrs(x)===set_addrs(y) && div_addrs(x) === div_addrs(y))
577  ))
578
579}
580
581// the smallest access unit is bank
582class BankedDataArray(implicit p: Parameters) extends AbstractBankedDataArray {
583  println("  DCacheType: BankedDataArray")
584  val ReduceReadlineConflict = false
585
586  io.write.ready := true.B
587  io.write_dup.foreach(_.ready := true.B)
588
589  val data_banks = List.fill(DCacheSetDiv)(List.tabulate(DCacheBanks)(i => Module(new DataSRAMBank(i))))
590  data_banks.map(_.map(_.dump()))
591
592  val way_en = Wire(Vec(LoadPipelineWidth, io.read(0).bits.way_en.cloneType))
593  val set_addrs = Wire(Vec(LoadPipelineWidth, UInt()))
594  val div_addrs = Wire(Vec(LoadPipelineWidth, UInt()))
595  val bank_addrs = Wire(Vec(LoadPipelineWidth, Vec(VLEN/DCacheSRAMRowBits, UInt())))
596  val way_en_reg = Wire(Vec(LoadPipelineWidth, io.read(0).bits.way_en.cloneType))
597  val set_addrs_reg = Wire(Vec(LoadPipelineWidth, UInt()))
598
599  val line_set_addr = addr_to_dcache_div_set(io.readline.bits.addr)
600  val line_div_addr = addr_to_dcache_div(io.readline.bits.addr)
601  val line_way_en = io.readline.bits.way_en
602
603  val write_bank_mask_reg = RegEnable(io.write.bits.wmask, io.write.valid)
604  val write_data_reg = RegEnable(io.write.bits.data, io.write.valid)
605  val write_valid_reg = RegNext(io.write.valid)
606  val write_valid_dup_reg = io.write_dup.map(x => RegNext(x.valid))
607  val write_wayen_dup_reg = io.write_dup.map(x => RegEnable(x.bits.way_en, x.valid))
608  val write_set_addr_dup_reg = io.write_dup.map(x => RegEnable(addr_to_dcache_div_set(x.bits.addr), x.valid))
609  val write_div_addr_dup_reg = io.write_dup.map(x => RegEnable(addr_to_dcache_div(x.bits.addr), x.valid))
610
611  // read data_banks and ecc_banks
612  // for single port SRAM, do not allow read and write in the same cycle
613  val rwhazard = RegNext(io.write.valid)
614  val rrhazard = false.B // io.readline.valid
615  (0 until LoadPipelineWidth).map(rport_index => {
616    div_addrs(rport_index) := addr_to_dcache_div(io.read(rport_index).bits.addr)
617    bank_addrs(rport_index)(0) := addr_to_dcache_bank(io.read(rport_index).bits.addr)
618    bank_addrs(rport_index)(1) := Mux(io.is128Req(rport_index), bank_addrs(rport_index)(0) + 1.U, bank_addrs(rport_index)(0))
619    set_addrs(rport_index) := addr_to_dcache_div_set(io.read(rport_index).bits.addr)
620    set_addrs_reg(rport_index) := RegEnable(addr_to_dcache_div_set(io.read(rport_index).bits.addr), io.read(rport_index).valid)
621
622    // use way_en to select a way after data read out
623    assert(!(RegNext(io.read(rport_index).fire && PopCount(io.read(rport_index).bits.way_en) > 1.U)))
624    way_en(rport_index) := io.read(rport_index).bits.way_en
625    way_en_reg(rport_index) := RegEnable(io.read(rport_index).bits.way_en, io.read(rport_index).valid)
626  })
627
628  // read each bank, get bank result
629  val rr_bank_conflict = Seq.tabulate(LoadPipelineWidth)(x => Seq.tabulate(LoadPipelineWidth)(y =>
630    io.read(x).valid && io.read(y).valid &&
631    div_addrs(x) === div_addrs(y) &&
632    (io.read(x).bits.bankMask & io.read(y).bits.bankMask) =/= 0.U
633  ))
634  val rrl_bank_conflict = Wire(Vec(LoadPipelineWidth, Bool()))
635  val rrl_bank_conflict_intend = Wire(Vec(LoadPipelineWidth, Bool()))
636  (0 until LoadPipelineWidth).foreach { i =>
637    val judge = if (ReduceReadlineConflict) io.read(i).valid && (io.readline.bits.rmask & io.read(i).bits.bankMask) =/= 0.U && div_addrs(i) === line_div_addr
638                else io.read(i).valid && div_addrs(i)===line_div_addr
639    rrl_bank_conflict(i) := judge && io.readline.valid
640    rrl_bank_conflict_intend(i) := judge && io.readline_intend
641  }
642  val wr_bank_conflict = Seq.tabulate(LoadPipelineWidth)(x =>
643    io.read(x).valid &&
644    write_valid_reg &&
645    div_addrs(x) === write_div_addr_dup_reg.head &&
646    (write_bank_mask_reg(bank_addrs(x)(0)) || write_bank_mask_reg(bank_addrs(x)(1)) && io.is128Req(x))
647  )
648  val wrl_bank_conflict = io.readline.valid && write_valid_reg && line_div_addr === write_div_addr_dup_reg.head
649  // ready
650  io.readline.ready := !(wrl_bank_conflict)
651  io.read.zipWithIndex.map{case(x, i) => x.ready := !(wr_bank_conflict(i) || rrhazard)}
652
653  val perf_multi_read = PopCount(io.read.map(_.valid)) >= 2.U
654  (0 until LoadPipelineWidth).foreach(i => {
655    // remove fake rr_bank_conflict situation in s2
656    val real_other_bank_conflict_reg = RegNext(wr_bank_conflict(i) || rrl_bank_conflict(i))
657    val real_rr_bank_conflict_reg = (if (i == 0) 0.B else (0 until i).map{ j =>
658      RegNext(rr_bank_conflict(j)(i)) && (set_addrs_reg(j) =/= set_addrs_reg(i))
659    }.reduce(_ || _))
660    io.bank_conflict_slow(i) := real_other_bank_conflict_reg || real_rr_bank_conflict_reg
661
662    // get result in s1
663    io.disable_ld_fast_wakeup(i) := wr_bank_conflict(i) || rrl_bank_conflict_intend(i) ||
664      (if (i == 0) 0.B else (0 until i).map(rr_bank_conflict(_)(i)).reduce(_ || _))
665  })
666  XSPerfAccumulate("data_array_multi_read", perf_multi_read)
667  (1 until LoadPipelineWidth).foreach(y => (0 until y).foreach(x =>
668    XSPerfAccumulate(s"data_array_rr_bank_conflict_${x}_${y}", rr_bank_conflict(x)(y))
669  ))
670  (0 until LoadPipelineWidth).foreach(i => {
671    XSPerfAccumulate(s"data_array_rrl_bank_conflict_${i}", rrl_bank_conflict(i))
672    XSPerfAccumulate(s"data_array_rw_bank_conflict_${i}", wr_bank_conflict(i))
673    XSPerfAccumulate(s"data_array_read_${i}", io.read(i).valid)
674  })
675  XSPerfAccumulate("data_array_access_total", PopCount(io.read.map(_.valid)))
676  XSPerfAccumulate("data_array_read_line", io.readline.valid)
677  XSPerfAccumulate("data_array_write", io.write.valid)
678
679  val bank_result = Wire(Vec(DCacheSetDiv, Vec(DCacheBanks, Vec(DCacheWays, new L1BankedDataReadResult()))))
680  val bank_result_delayed = Wire(Vec(DCacheSetDiv, Vec(DCacheBanks, Vec(DCacheWays, new L1BankedDataReadResult()))))
681  val read_bank_error_delayed = Wire(Vec(DCacheSetDiv, Vec(DCacheBanks, Vec(DCacheWays, Bool()))))
682  dontTouch(bank_result)
683  dontTouch(read_bank_error_delayed)
684  for (div_index <- 0 until DCacheSetDiv) {
685    for (bank_index <- 0 until DCacheBanks) {
686      //     Set Addr & Read Way Mask
687      //
688      //    Pipe 0   ....  Pipe (n-1)
689      //      +      ....     +
690      //      |      ....     |
691      // +----+---------------+-----+
692      //  X                        X
693      //   X                      +------+ Bank Addr Match
694      //    +---------+----------+
695      //              |
696      //     +--------+--------+
697      //     |    Data Bank    |
698      //     +-----------------+
699      val bank_addr_matchs = WireInit(VecInit(List.tabulate(LoadPipelineWidth)(i => {
700        io.read(i).valid && div_addrs(i) === div_index.U && (bank_addrs(i)(0) === bank_index.U || bank_addrs(i)(1) === bank_index.U && io.is128Req(i))
701      })))
702      val readline_match = Wire(Bool())
703      if (ReduceReadlineConflict) {
704        readline_match := io.readline.valid && io.readline.bits.rmask(bank_index) && line_div_addr === div_index.U
705      } else {
706        readline_match := io.readline.valid && line_div_addr === div_index.U
707      }
708
709      val bank_set_addr = Mux(readline_match,
710        line_set_addr,
711        PriorityMux(Seq.tabulate(LoadPipelineWidth)(i => bank_addr_matchs(i) -> set_addrs(i)))
712      )
713      val read_enable = bank_addr_matchs.asUInt.orR || readline_match
714
715      // read raw data
716      val data_bank = data_banks(div_index)(bank_index)
717      data_bank.io.r.en := read_enable
718      data_bank.io.r.addr := bank_set_addr
719      for (way_index <- 0 until DCacheWays) {
720        bank_result(div_index)(bank_index)(way_index).ecc := getECCFromEncWord(data_bank.io.r.data(way_index))
721        bank_result(div_index)(bank_index)(way_index).raw_data := getDataFromEncWord(data_bank.io.r.data(way_index))
722
723        if (EnableDataEcc) {
724          val ecc_data = bank_result(div_index)(bank_index)(way_index).asECCData()
725          val ecc_data_delayed = RegEnable(ecc_data, RegNext(read_enable))
726          bank_result(div_index)(bank_index)(way_index).error_delayed := dcacheParameters.dataCode.decode(ecc_data_delayed).error
727          read_bank_error_delayed(div_index)(bank_index)(way_index) := bank_result(div_index)(bank_index)(way_index).error_delayed
728        } else {
729          bank_result(div_index)(bank_index)(way_index).error_delayed := false.B
730          read_bank_error_delayed(div_index)(bank_index)(way_index) := false.B
731        }
732        bank_result_delayed(div_index)(bank_index)(way_index) := RegEnable(bank_result(div_index)(bank_index)(way_index), RegNext(read_enable))
733      }
734    }
735  }
736
737  val data_read_oh = WireInit(VecInit(Seq.fill(DCacheSetDiv)(0.U(XLEN.W))))
738  for (div_index <- 0 until DCacheSetDiv){
739    val temp = WireInit(VecInit(Seq.fill(DCacheBanks)(0.U(XLEN.W))))
740    for (bank_index <- 0 until DCacheBanks) {
741      temp(bank_index) := PopCount(Fill(DCacheWays, data_banks(div_index)(bank_index).io.r.en.asUInt))
742    }
743    data_read_oh(div_index) := temp.reduce(_ + _)
744  }
745  XSPerfAccumulate("data_read_counter", data_read_oh.foldLeft(0.U)(_ + _))
746
747  (0 until LoadPipelineWidth).map(i => {
748    // 1 cycle after read fire(load s2)
749    val r_read_fire = RegNext(io.read(i).fire)
750    val r_div_addr = RegEnable(div_addrs(i), io.read(i).fire)
751    val r_bank_addr = RegEnable(bank_addrs(i), io.read(i).fire)
752    val r_way_addr = RegEnable(OHToUInt(way_en(i)), io.read(i).fire)
753    // 2 cycles after read fire(load s3)
754    val rr_read_fire = RegNext(r_read_fire)
755    val rr_div_addr = RegEnable(RegEnable(div_addrs(i), io.read(i).fire), r_read_fire)
756    val rr_bank_addr = RegEnable(RegEnable(bank_addrs(i), io.read(i).fire), r_read_fire)
757    val rr_way_addr = RegEnable(RegEnable(OHToUInt(way_en(i)), io.read(i).fire), r_read_fire)
758    (0 until VLEN/DCacheSRAMRowBits).map( j =>{
759      io.read_resp(i)(j)          := bank_result(r_div_addr)(r_bank_addr(j))(r_way_addr)
760      // error detection
761      io.read_error_delayed(i)(j) := rr_read_fire && read_bank_error_delayed(rr_div_addr)(rr_bank_addr(j))(rr_way_addr) && !RegNext(io.bank_conflict_slow(i))
762    })
763  })
764
765  // read result: expose banked read result
766  (0 until DCacheBanks).map(i => {
767    io.readline_resp(i) := bank_result(RegEnable(line_div_addr, io.readline.valid))(i)(RegEnable(OHToUInt(io.readline.bits.way_en), io.readline.valid))
768  })
769  io.readline_error_delayed := RegNext(RegNext(io.readline.fire)) &&
770    VecInit((0 until DCacheBanks).map(i => io.readline_resp(i).error_delayed)).asUInt.orR
771
772  // write data_banks & ecc_banks
773  for (div_index <- 0 until DCacheSetDiv) {
774    for (bank_index <- 0 until DCacheBanks) {
775      // data write
776      val wen_reg = write_bank_mask_reg(bank_index) &&
777        write_valid_dup_reg(bank_index) &&
778        write_div_addr_dup_reg(bank_index) === div_index.U && RegNext(io.write.valid)
779      val write_ecc_reg = RegEnable(getECCFromEncWord(cacheParams.dataCode.encode(io.write.bits.data(bank_index))), io.write.valid)
780      val data_bank = data_banks(div_index)(bank_index)
781      data_bank.io.w.en := wen_reg
782      data_bank.io.w.way_en := write_wayen_dup_reg(bank_index)
783      data_bank.io.w.addr := write_set_addr_dup_reg(bank_index)
784      data_bank.io.w.data := asECCData(write_ecc_reg, write_data_reg(bank_index))
785    }
786  }
787
788  io.cacheOp.resp.valid := false.B
789  io.cacheOp.resp.bits  := DontCare
790
791  val tableName = "BankConflict" + p(XSCoreParamsKey).HartId.toString
792  val siteName = "BankedDataArray" + p(XSCoreParamsKey).HartId.toString
793  val bankConflictTable = ChiselDB.createTable(tableName, new BankConflictDB)
794  val bankConflictData = Wire(new BankConflictDB)
795  for (i <- 0 until LoadPipelineWidth) {
796    bankConflictData.set_index(i) := set_addrs(i)
797    bankConflictData.addr(i) := io.read(i).bits.addr
798  }
799
800  // FIXME: rr_bank_conflict(0)(1) no generalization
801  when(rr_bank_conflict(0)(1)) {
802    (0 until (VLEN/DCacheSRAMRowBits)).map(i => {
803      bankConflictData.bank_index(i) := bank_addrs(0)(i)
804    })
805    bankConflictData.way_index := OHToUInt(way_en(0))
806    bankConflictData.fake_rr_bank_conflict := set_addrs(0) === set_addrs(1) && div_addrs(0) === div_addrs(1)
807  }.otherwise {
808    (0 until (VLEN/DCacheSRAMRowBits)).map(i => {
809      bankConflictData.bank_index(i) := 0.U
810    })
811    bankConflictData.way_index := 0.U
812    bankConflictData.fake_rr_bank_conflict := false.B
813  }
814
815  val isWriteBankConflictTable = Constantin.createRecord(s"isWriteBankConflictTable${p(XSCoreParamsKey).HartId}")
816  bankConflictTable.log(
817    data = bankConflictData,
818    en = isWriteBankConflictTable.orR && rr_bank_conflict(0)(1),
819    site = siteName,
820    clock = clock,
821    reset = reset
822  )
823
824  (1 until LoadPipelineWidth).foreach(y => (0 until y).foreach(x =>
825    XSPerfAccumulate(s"data_array_fake_rr_bank_conflict_${x}_${y}", rr_bank_conflict(x)(y) && set_addrs(x) === set_addrs(y) && div_addrs(x) === div_addrs(y))
826  ))
827
828}
829