xref: /XiangShan/src/main/scala/xiangshan/cache/dcache/data/BankedDataArray.scala (revision 00210c34f22e07539e32a7bc422e175b81cc67cf)
1/***************************************************************************************
2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3* Copyright (c) 2020-2021 Peng Cheng Laboratory
4*
5* XiangShan is licensed under Mulan PSL v2.
6* You can use this software according to the terms and conditions of the Mulan PSL v2.
7* You may obtain a copy of Mulan PSL v2 at:
8*          http://license.coscl.org.cn/MulanPSL2
9*
10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13*
14* See the Mulan PSL v2 for more details.
15***************************************************************************************/
16
17package xiangshan.cache
18
19import chipsalliance.rocketchip.config.Parameters
20import chisel3._
21import utils._
22import chisel3.util._
23import freechips.rocketchip.tilelink.{ClientMetadata, TLClientParameters, TLEdgeOut}
24import utils.{Code, ParallelOR, ReplacementPolicy, SRAMTemplate, XSDebug, XSPerfAccumulate}
25import xiangshan.L1CacheErrorInfo
26
27import scala.math.max
28
29class L1BankedDataReadReq(implicit p: Parameters) extends DCacheBundle
30{
31  val way_en = Bits(DCacheWays.W)
32  val addr = Bits(PAddrBits.W)
33}
34
35class L1BankedDataReadLineReq(implicit p: Parameters) extends L1BankedDataReadReq
36{
37  val rmask = Bits(DCacheBanks.W)
38}
39
40// Now, we can write a cache-block in a single cycle
41class L1BankedDataWriteReq(implicit p: Parameters) extends L1BankedDataReadReq
42{
43  val wmask = Bits(DCacheBanks.W)
44  val data = Vec(DCacheBanks, Bits(DCacheSRAMRowBits.W))
45}
46
47class L1BankedDataReadResult(implicit p: Parameters) extends DCacheBundle
48{
49  // you can choose which bank to read to save power
50  val ecc = Bits(eccBits.W)
51  val raw_data = Bits(DCacheSRAMRowBits.W)
52  val error_delayed = Bool() // 1 cycle later than data resp
53
54  def asECCData() = {
55    Cat(ecc, raw_data)
56  }
57}
58
59//                     Banked DCache Data
60// -----------------------------------------------------------------
61// | Bank0 | Bank1 | Bank2 | Bank3 | Bank4 | Bank5 | Bank6 | Bank7 |
62// -----------------------------------------------------------------
63// | Way0  | Way0  | Way0  | Way0  | Way0  | Way0  | Way0  | Way0  |
64// | Way1  | Way1  | Way1  | Way1  | Way1  | Way1  | Way1  | Way1  |
65// | ....  | ....  | ....  | ....  | ....  | ....  | ....  | ....  |
66// -----------------------------------------------------------------
67abstract class AbstractBankedDataArray(implicit p: Parameters) extends DCacheModule
68{
69  val ReadlinePortErrorIndex = LoadPipelineWidth
70  val io = IO(new DCacheBundle {
71    // load pipeline read word req
72    val read = Vec(LoadPipelineWidth, Flipped(DecoupledIO(new L1BankedDataReadReq)))
73    // main pipeline read / write line req
74    val readline_intend = Input(Bool())
75    val readline = Flipped(DecoupledIO(new L1BankedDataReadLineReq))
76    val write = Flipped(DecoupledIO(new L1BankedDataWriteReq))
77    // data bank read resp (all banks)
78    val resp = Output(Vec(DCacheBanks, new L1BankedDataReadResult()))
79    // val nacks = Output(Vec(LoadPipelineWidth, Bool()))
80    // val errors = Output(Vec(LoadPipelineWidth + 1, new L1CacheErrorInfo)) // read ports + readline port
81    val read_error_delayed = Output(Vec(LoadPipelineWidth, Bool()))
82    val readline_error_delayed = Output(Bool())
83    // when bank_conflict, read (1) port should be ignored
84    val bank_conflict_slow = Output(Vec(LoadPipelineWidth, Bool()))
85    val bank_conflict_fast = Output(Vec(LoadPipelineWidth, Bool()))
86    val disable_ld_fast_wakeup = Output(Vec(LoadPipelineWidth, Bool()))
87    // customized cache op port
88    val cacheOp = Flipped(new L1CacheInnerOpIO)
89  })
90
91  def pipeMap[T <: Data](f: Int => T) = VecInit((0 until LoadPipelineWidth).map(f))
92
93  def dumpRead() = {
94    (0 until LoadPipelineWidth) map { w =>
95      when(io.read(w).valid) {
96        XSDebug(s"DataArray Read channel: $w valid way_en: %x addr: %x\n",
97          io.read(w).bits.way_en, io.read(w).bits.addr)
98      }
99    }
100    when(io.readline.valid) {
101      XSDebug(s"DataArray Read Line, valid way_en: %x addr: %x rmask %x\n",
102        io.readline.bits.way_en, io.readline.bits.addr, io.readline.bits.rmask)
103    }
104  }
105
106  def dumpWrite() = {
107    when(io.write.valid) {
108      XSDebug(s"DataArray Write valid way_en: %x addr: %x\n",
109        io.write.bits.way_en, io.write.bits.addr)
110
111      (0 until DCacheBanks) map { r =>
112        XSDebug(s"cycle: $r data: %x wmask: %x\n",
113          io.write.bits.data(r), io.write.bits.wmask(r))
114      }
115    }
116  }
117
118  def dumpResp() = {
119    XSDebug(s"DataArray ReadeResp channel:\n")
120    (0 until DCacheBanks) map { r =>
121      XSDebug(s"cycle: $r data: %x\n", io.resp(r).raw_data)
122    }
123  }
124
125  def dump() = {
126    dumpRead
127    dumpWrite
128    dumpResp
129  }
130}
131
132class BankedDataArray(implicit p: Parameters) extends AbstractBankedDataArray {
133  def getECCFromEncWord(encWord: UInt) = {
134    require(encWord.getWidth == encWordBits)
135    encWord(encWordBits - 1, wordBits)
136  }
137
138  val ReduceReadlineConflict = false
139
140  io.write.ready := true.B
141
142  // wrap data rows of 8 ways
143  class DataSRAMBank(index: Int) extends Module {
144    val io = IO(new Bundle() {
145      val w = new Bundle() {
146        val en = Input(Bool())
147        val addr = Input(UInt())
148        val way_en = Input(UInt(DCacheWays.W))
149        val data = Input(UInt(DCacheSRAMRowBits.W))
150      }
151
152      val r = new Bundle() {
153        val en = Input(Bool())
154        val addr = Input(UInt())
155        val way_en = Input(UInt(DCacheWays.W))
156        val data = Output(UInt(DCacheSRAMRowBits.W))
157      }
158    })
159
160    val r_way_en_reg = RegNext(io.r.way_en)
161
162    // multiway data bank
163    val data_bank = Array.fill(DCacheWays) {
164      Module(new SRAMTemplate(
165        Bits(DCacheSRAMRowBits.W),
166        set = DCacheSets,
167        way = 1,
168        shouldReset = false,
169        holdRead = false,
170        singlePort = true
171      ))
172    }
173
174    for (w <- 0 until DCacheWays) {
175      val wen = io.w.en && io.w.way_en(w)
176      data_bank(w).io.w.req.valid := wen
177      data_bank(w).io.w.req.bits.apply(
178        setIdx = io.w.addr,
179        data = io.w.data,
180        waymask = 1.U
181      )
182      data_bank(w).io.r.req.valid := io.r.en
183      data_bank(w).io.r.req.bits.apply(setIdx = io.r.addr)
184    }
185
186    val half = nWays / 2
187    val data_read = data_bank.map(_.io.r.resp.data(0))
188    val data_left = Mux1H(r_way_en_reg.tail(half), data_read.take(half))
189    val data_right = Mux1H(r_way_en_reg.head(half), data_read.drop(half))
190
191    val sel_low = r_way_en_reg.tail(half).orR()
192    val row_data = Mux(sel_low, data_left, data_right)
193
194    io.r.data := row_data
195
196    def dump_r() = {
197      when(RegNext(io.r.en)) {
198        XSDebug("bank read addr %x way_en %x data %x\n",
199          RegNext(io.r.addr),
200          RegNext(io.r.way_en),
201          io.r.data
202        )
203      }
204    }
205
206    def dump_w() = {
207      when(io.w.en) {
208        XSDebug("bank write addr %x way_en %x data %x\n",
209          io.w.addr,
210          io.w.way_en,
211          io.w.data
212        )
213      }
214    }
215
216    def dump() = {
217      dump_w()
218      dump_r()
219    }
220  }
221
222  val data_banks = List.tabulate(DCacheBanks)(i => Module(new DataSRAMBank(i)))
223  val ecc_banks = List.fill(DCacheBanks)(Module(new SRAMTemplate(
224    Bits(eccBits.W),
225    set = DCacheSets,
226    way = DCacheWays,
227    shouldReset = false,
228    holdRead = false,
229    singlePort = true
230  )))
231
232  data_banks.map(_.dump())
233
234  val way_en = Wire(Vec(LoadPipelineWidth, io.read(0).bits.way_en.cloneType))
235  val way_en_reg = RegNext(way_en)
236  val set_addrs = Wire(Vec(LoadPipelineWidth, UInt()))
237  val bank_addrs = Wire(Vec(LoadPipelineWidth, UInt()))
238
239  // read data_banks and ecc_banks
240  // for single port SRAM, do not allow read and write in the same cycle
241  val rwhazard = io.write.valid
242  val rrhazard = false.B // io.readline.valid
243  (0 until LoadPipelineWidth).map(rport_index => {
244    set_addrs(rport_index) := addr_to_dcache_set(io.read(rport_index).bits.addr)
245    bank_addrs(rport_index) := addr_to_dcache_bank(io.read(rport_index).bits.addr)
246
247    io.read(rport_index).ready := !(rwhazard || rrhazard)
248
249    // use way_en to select a way after data read out
250    assert(!(RegNext(io.read(rport_index).fire() && PopCount(io.read(rport_index).bits.way_en) > 1.U)))
251    way_en(rport_index) := io.read(rport_index).bits.way_en
252  })
253  io.readline.ready := !(rwhazard)
254
255  // read each bank, get bank result
256  val bank_result = Wire(Vec(DCacheBanks, new L1BankedDataReadResult()))
257  dontTouch(bank_result)
258  val read_bank_error_delayed = Wire(Vec(DCacheBanks, Bool()))
259  dontTouch(read_bank_error_delayed)
260  val rr_bank_conflict = Seq.tabulate(LoadPipelineWidth)(x => Seq.tabulate(LoadPipelineWidth)(y =>
261    bank_addrs(x) === bank_addrs(y) && io.read(x).valid && io.read(y).valid
262  ))
263  val rrl_bank_conflict = Wire(Vec(LoadPipelineWidth, Bool()))
264  if (ReduceReadlineConflict) {
265    (0 until LoadPipelineWidth).foreach(i => rrl_bank_conflict(i) := io.read(i).valid && io.readline.valid && io.readline.bits.rmask(bank_addrs(i)))
266  } else {
267    (0 until LoadPipelineWidth).foreach(i => rrl_bank_conflict(i) := io.read(i).valid && io.readline.valid)
268  }
269  val rrl_bank_conflict_intend = Wire(Vec(LoadPipelineWidth, Bool()))
270  if (ReduceReadlineConflict) {
271    (0 until LoadPipelineWidth).foreach(i => rrl_bank_conflict_intend(i) := io.read(i).valid && io.readline_intend && io.readline.bits.rmask(bank_addrs(i)))
272  } else {
273    (0 until LoadPipelineWidth).foreach(i => rrl_bank_conflict_intend(i) := io.read(i).valid && io.readline_intend)
274  }
275
276  val rw_bank_conflict = VecInit(Seq.tabulate(LoadPipelineWidth)(io.read(_).valid && rwhazard))
277  val perf_multi_read = PopCount(io.read.map(_.valid)) >= 2.U
278  (0 until LoadPipelineWidth).foreach(i => {
279    io.bank_conflict_fast(i) := rw_bank_conflict(i) || rrl_bank_conflict(i) ||
280      (if (i == 0) 0.B else (0 until i).map(rr_bank_conflict(_)(i)).reduce(_ || _))
281    io.bank_conflict_slow(i) := RegNext(io.bank_conflict_fast(i))
282    io.disable_ld_fast_wakeup(i) := rw_bank_conflict(i) || rrl_bank_conflict_intend(i) ||
283      (if (i == 0) 0.B else (0 until i).map(rr_bank_conflict(_)(i)).reduce(_ || _))
284  })
285  XSPerfAccumulate("data_array_multi_read", perf_multi_read)
286  (1 until LoadPipelineWidth).foreach(y => (0 until y).foreach(x =>
287    XSPerfAccumulate(s"data_array_rr_bank_conflict_${x}_${y}", rr_bank_conflict(x)(y))
288  ))
289  (0 until LoadPipelineWidth).foreach(i => {
290    XSPerfAccumulate(s"data_array_rrl_bank_conflict_${i}", rrl_bank_conflict(i))
291    XSPerfAccumulate(s"data_array_rw_bank_conflict_${i}", rw_bank_conflict(i))
292    XSPerfAccumulate(s"data_array_read_${i}", io.read(i).valid)
293  })
294  XSPerfAccumulate("data_array_access_total", PopCount(io.read.map(_.valid)))
295  XSPerfAccumulate("data_array_read_line", io.readline.valid)
296  XSPerfAccumulate("data_array_write", io.write.valid)
297
298  for (bank_index <- 0 until DCacheBanks) {
299    //     Set Addr & Read Way Mask
300    //
301    //    Pipe 0   ....  Pipe (n-1)
302    //      +      ....     +
303    //      |      ....     |
304    // +----+---------------+-----+
305    //  X                        X
306    //   X                      +------+ Bank Addr Match
307    //    +---------+----------+
308    //              |
309    //     +--------+--------+
310    //     |    Data Bank    |
311    //     +-----------------+
312    val bank_addr_matchs = WireInit(VecInit(List.tabulate(LoadPipelineWidth)(i => {
313      bank_addrs(i) === bank_index.U && io.read(i).valid
314    })))
315    val readline_match = Wire(Bool())
316    if (ReduceReadlineConflict) {
317      readline_match := io.readline.valid && io.readline.bits.rmask(bank_index)
318    } else {
319      readline_match := io.readline.valid
320    }
321    val bank_way_en = Mux(readline_match,
322      io.readline.bits.way_en,
323      PriorityMux(Seq.tabulate(LoadPipelineWidth)(i => bank_addr_matchs(i) -> way_en(i)))
324    )
325    val bank_set_addr = Mux(readline_match,
326      addr_to_dcache_set(io.readline.bits.addr),
327      PriorityMux(Seq.tabulate(LoadPipelineWidth)(i => bank_addr_matchs(i) -> set_addrs(i)))
328    )
329
330    val read_enable = bank_addr_matchs.asUInt.orR || readline_match
331
332    // read raw data
333    val data_bank = data_banks(bank_index)
334    data_bank.io.r.en := read_enable
335    data_bank.io.r.way_en := bank_way_en
336    data_bank.io.r.addr := bank_set_addr
337    bank_result(bank_index).raw_data := data_bank.io.r.data
338
339    // read ECC
340    val ecc_bank = ecc_banks(bank_index)
341    ecc_bank.io.r.req.valid := read_enable
342    ecc_bank.io.r.req.bits.apply(setIdx = bank_set_addr)
343    bank_result(bank_index).ecc := Mux1H(RegNext(bank_way_en), ecc_bank.io.r.resp.data)
344
345    // use ECC to check error
346    val ecc_data = bank_result(bank_index).asECCData()
347    val ecc_data_delayed = RegEnable(ecc_data, RegNext(read_enable))
348    bank_result(bank_index).error_delayed := dcacheParameters.dataCode.decode(ecc_data_delayed).error
349    read_bank_error_delayed(bank_index) := bank_result(bank_index).error_delayed
350  }
351
352  // read result: expose banked read result
353  io.resp := bank_result
354
355  // error detection
356  // normal read ports
357  (0 until LoadPipelineWidth).map(rport_index => {
358    io.read_error_delayed(rport_index) := RegNext(RegNext(io.read(rport_index).fire())) &&
359      read_bank_error_delayed(RegNext(RegNext(bank_addrs(rport_index)))) &&
360      !RegNext(io.bank_conflict_slow(rport_index))
361  })
362  // readline port
363  io.readline_error_delayed := RegNext(RegNext(io.readline.fire())) &&
364    VecInit((0 until DCacheBanks).map(i => io.resp(i).error_delayed)).asUInt().orR
365
366  // write data_banks & ecc_banks
367  val sram_waddr = addr_to_dcache_set(io.write.bits.addr)
368  for (bank_index <- 0 until DCacheBanks) {
369    // data write
370    val data_bank = data_banks(bank_index)
371    data_bank.io.w.en := io.write.valid && io.write.bits.wmask(bank_index)
372    data_bank.io.w.way_en := io.write.bits.way_en
373    data_bank.io.w.addr := sram_waddr
374    data_bank.io.w.data := io.write.bits.data(bank_index)
375
376    // ecc write
377    val ecc_bank = ecc_banks(bank_index)
378    ecc_bank.io.w.req.valid := io.write.valid && io.write.bits.wmask(bank_index)
379    ecc_bank.io.w.req.bits.apply(
380      setIdx = sram_waddr,
381      data = getECCFromEncWord(cacheParams.dataCode.encode((io.write.bits.data(bank_index)))),
382      waymask = io.write.bits.way_en
383    )
384    when(ecc_bank.io.w.req.valid) {
385      XSDebug("write in ecc sram: bank %x set %x data %x waymask %x\n",
386        bank_index.U,
387        sram_waddr,
388        getECCFromEncWord(cacheParams.dataCode.encode((io.write.bits.data(bank_index)))),
389        io.write.bits.way_en
390      );
391    }
392  }
393
394  // deal with customized cache op
395  require(nWays <= 32)
396  io.cacheOp.resp.bits := DontCare
397  val cacheOpShouldResp = WireInit(false.B)
398  val eccReadResult = Wire(Vec(DCacheBanks, UInt(eccBits.W)))
399  when(io.cacheOp.req.valid){
400    when (CacheInstrucion.isReadData(io.cacheOp.req.bits.opCode)) {
401      for (bank_index <- 0 until DCacheBanks) {
402        val data_bank = data_banks(bank_index)
403        data_bank.io.r.en := true.B
404        data_bank.io.r.way_en := UIntToOH(io.cacheOp.req.bits.wayNum(4, 0))
405        data_bank.io.r.addr := io.cacheOp.req.bits.index
406      }
407      cacheOpShouldResp := true.B
408    }
409	when (CacheInstrucion.isReadDataECC(io.cacheOp.req.bits.opCode)) {
410      for (bank_index <- 0 until DCacheBanks) {
411        val ecc_bank = ecc_banks(bank_index)
412		ecc_bank.io.r.req.valid := true.B
413		ecc_bank.io.r.req.bits.setIdx := io.cacheOp.req.bits.index
414	  }
415	  cacheOpShouldResp := true.B
416	}
417    when(CacheInstrucion.isWriteData(io.cacheOp.req.bits.opCode)){
418      for (bank_index <- 0 until DCacheBanks) {
419        val data_bank = data_banks(bank_index)
420        data_bank.io.w.en := true.B
421        data_bank.io.w.way_en := UIntToOH(io.cacheOp.req.bits.wayNum(4, 0))
422        data_bank.io.w.addr := io.cacheOp.req.bits.index
423        data_bank.io.w.data := io.cacheOp.req.bits.write_data_vec(bank_index)
424      }
425      cacheOpShouldResp := true.B
426    }
427    when(CacheInstrucion.isWriteDataECC(io.cacheOp.req.bits.opCode)){
428      for (bank_index <- 0 until DCacheBanks) {
429        val ecc_bank = ecc_banks(bank_index)
430        ecc_bank.io.w.req.valid := true.B
431        ecc_bank.io.w.req.bits.apply(
432          setIdx = io.cacheOp.req.bits.index,
433          data = io.cacheOp.req.bits.write_data_ecc,
434          waymask = UIntToOH(io.cacheOp.req.bits.wayNum(4, 0))
435        )
436      }
437      cacheOpShouldResp := true.B
438    }
439  }
440  io.cacheOp.resp.valid := RegNext(io.cacheOp.req.valid && cacheOpShouldResp)
441  for (bank_index <- 0 until DCacheBanks) {
442    io.cacheOp.resp.bits.read_data_vec(bank_index) := bank_result(bank_index).raw_data
443	eccReadResult(bank_index) := ecc_banks(bank_index).io.r.resp.data(RegNext(io.cacheOp.req.bits.wayNum(4, 0)))
444  }
445  io.cacheOp.resp.bits.read_data_ecc := Mux(io.cacheOp.resp.valid,
446    eccReadResult(RegNext(io.cacheOp.req.bits.bank_num)),
447    0.U
448  )
449}
450