xref: /XiangShan/src/main/scala/xiangshan/cache/dcache/data/BankedDataArray.scala (revision 7a2fc509e2d355879c4db3dc3f17a6ccacd3d09e)
1/***************************************************************************************
2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3* Copyright (c) 2020-2021 Peng Cheng Laboratory
4*
5* XiangShan is licensed under Mulan PSL v2.
6* You can use this software according to the terms and conditions of the Mulan PSL v2.
7* You may obtain a copy of Mulan PSL v2 at:
8*          http://license.coscl.org.cn/MulanPSL2
9*
10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13*
14* See the Mulan PSL v2 for more details.
15***************************************************************************************/
16
17package xiangshan.cache
18
19import chipsalliance.rocketchip.config.Parameters
20import chisel3._
21import utils._
22import chisel3.util._
23import freechips.rocketchip.tilelink.{ClientMetadata, TLClientParameters, TLEdgeOut}
24import utils.{Code, ParallelOR, ReplacementPolicy, SRAMTemplate, XSDebug, XSPerfAccumulate}
25import xiangshan.L1CacheErrorInfo
26
27import scala.math.max
28
29class L1BankedDataReadReq(implicit p: Parameters) extends DCacheBundle
30{
31  val way_en = Bits(DCacheWays.W)
32  val addr = Bits(PAddrBits.W)
33}
34
35class L1BankedDataReadLineReq(implicit p: Parameters) extends L1BankedDataReadReq
36{
37  val rmask = Bits(DCacheBanks.W)
38}
39
40// Now, we can write a cache-block in a single cycle
41class L1BankedDataWriteReq(implicit p: Parameters) extends L1BankedDataReadReq
42{
43  val wmask = Bits(DCacheBanks.W)
44  val data = Vec(DCacheBanks, Bits(DCacheSRAMRowBits.W))
45}
46
47class L1BankedDataReadResult(implicit p: Parameters) extends DCacheBundle
48{
49  // you can choose which bank to read to save power
50  val ecc = Bits(eccBits.W)
51  val raw_data = Bits(DCacheSRAMRowBits.W)
52  val error = Bool() // slow to generate, use it with care
53
54  def asECCData() = {
55    Cat(ecc, raw_data)
56  }
57}
58
59//                     Banked DCache Data
60// -----------------------------------------------------------------
61// | Bank0 | Bank1 | Bank2 | Bank3 | Bank4 | Bank5 | Bank6 | Bank7 |
62// -----------------------------------------------------------------
63// | Way0  | Way0  | Way0  | Way0  | Way0  | Way0  | Way0  | Way0  |
64// | Way1  | Way1  | Way1  | Way1  | Way1  | Way1  | Way1  | Way1  |
65// | ....  | ....  | ....  | ....  | ....  | ....  | ....  | ....  |
66// -----------------------------------------------------------------
67abstract class AbstractBankedDataArray(implicit p: Parameters) extends DCacheModule
68{
69  val ReadlinePortErrorIndex = LoadPipelineWidth
70  val io = IO(new DCacheBundle {
71    // load pipeline read word req
72    val read = Vec(LoadPipelineWidth, Flipped(DecoupledIO(new L1BankedDataReadReq)))
73    // main pipeline read / write line req
74    val readline = Flipped(DecoupledIO(new L1BankedDataReadLineReq))
75    val write = Flipped(DecoupledIO(new L1BankedDataWriteReq))
76    // data bank read resp (all banks)
77    val resp = Output(Vec(DCacheBanks, new L1BankedDataReadResult()))
78    // val nacks = Output(Vec(LoadPipelineWidth, Bool()))
79    // val errors = Output(Vec(LoadPipelineWidth + 1, new L1CacheErrorInfo)) // read ports + readline port
80    val read_error = Output(Vec(LoadPipelineWidth, Bool()))
81    val readline_error = Output(Bool())
82    // when bank_conflict, read (1) port should be ignored
83    val bank_conflict_slow = Output(Vec(LoadPipelineWidth, Bool()))
84    val bank_conflict_fast = Output(Vec(LoadPipelineWidth, Bool()))
85    // customized cache op port
86    val cacheOp = Flipped(new L1CacheInnerOpIO)
87  })
88
89  def pipeMap[T <: Data](f: Int => T) = VecInit((0 until LoadPipelineWidth).map(f))
90
91  def dumpRead() = {
92    (0 until LoadPipelineWidth) map { w =>
93      when(io.read(w).valid) {
94        XSDebug(s"DataArray Read channel: $w valid way_en: %x addr: %x\n",
95          io.read(w).bits.way_en, io.read(w).bits.addr)
96      }
97    }
98    when(io.readline.valid) {
99      XSDebug(s"DataArray Read Line, valid way_en: %x addr: %x rmask %x\n",
100        io.readline.bits.way_en, io.readline.bits.addr, io.readline.bits.rmask)
101    }
102  }
103
104  def dumpWrite() = {
105    when(io.write.valid) {
106      XSDebug(s"DataArray Write valid way_en: %x addr: %x\n",
107        io.write.bits.way_en, io.write.bits.addr)
108
109      (0 until DCacheBanks) map { r =>
110        XSDebug(s"cycle: $r data: %x wmask: %x\n",
111          io.write.bits.data(r), io.write.bits.wmask(r))
112      }
113    }
114  }
115
116  def dumpResp() = {
117    XSDebug(s"DataArray ReadeResp channel:\n")
118    (0 until DCacheBanks) map { r =>
119      XSDebug(s"cycle: $r data: %x\n", io.resp(r).raw_data)
120    }
121  }
122
123  def dump() = {
124    dumpRead
125    dumpWrite
126    dumpResp
127  }
128}
129
130class BankedDataArray(implicit p: Parameters) extends AbstractBankedDataArray {
131  def getECCFromEncWord(encWord: UInt) = {
132    require(encWord.getWidth == encWordBits)
133    encWord(encWordBits - 1, wordBits)
134  }
135
136  val ReduceReadlineConflict = false
137
138  io.write.ready := true.B
139
140  // wrap data rows of 8 ways
141  class DataSRAMBank(index: Int) extends Module {
142    val io = IO(new Bundle() {
143      val w = new Bundle() {
144        val en = Input(Bool())
145        val addr = Input(UInt())
146        val way_en = Input(UInt(DCacheWays.W))
147        val data = Input(UInt(DCacheSRAMRowBits.W))
148      }
149
150      val r = new Bundle() {
151        val en = Input(Bool())
152        val addr = Input(UInt())
153        val way_en = Input(UInt(DCacheWays.W))
154        val data = Output(UInt(DCacheSRAMRowBits.W))
155      }
156    })
157
158    val r_way_en_reg = RegNext(io.r.way_en)
159
160    // multiway data bank
161    val data_bank = Array.fill(DCacheWays) {
162      Module(new SRAMTemplate(
163        Bits(DCacheSRAMRowBits.W),
164        set = DCacheSets,
165        way = 1,
166        shouldReset = false,
167        holdRead = false,
168        singlePort = true
169      ))
170    }
171
172    for (w <- 0 until DCacheWays) {
173      val wen = io.w.en && io.w.way_en(w)
174      data_bank(w).io.w.req.valid := wen
175      data_bank(w).io.w.req.bits.apply(
176        setIdx = io.w.addr,
177        data = io.w.data,
178        waymask = 1.U
179      )
180      data_bank(w).io.r.req.valid := io.r.en
181      data_bank(w).io.r.req.bits.apply(setIdx = io.r.addr)
182    }
183
184    val half = nWays / 2
185    val data_read = data_bank.map(_.io.r.resp.data(0))
186    val data_left = Mux1H(r_way_en_reg.tail(half), data_read.take(half))
187    val data_right = Mux1H(r_way_en_reg.head(half), data_read.drop(half))
188
189    val sel_low = r_way_en_reg.tail(half).orR()
190    val row_data = Mux(sel_low, data_left, data_right)
191
192    io.r.data := row_data
193
194    def dump_r() = {
195      when(RegNext(io.r.en)) {
196        XSDebug("bank read addr %x way_en %x data %x\n",
197          RegNext(io.r.addr),
198          RegNext(io.r.way_en),
199          io.r.data
200        )
201      }
202    }
203
204    def dump_w() = {
205      when(io.w.en) {
206        XSDebug("bank write addr %x way_en %x data %x\n",
207          io.w.addr,
208          io.w.way_en,
209          io.w.data
210        )
211      }
212    }
213
214    def dump() = {
215      dump_w()
216      dump_r()
217    }
218  }
219
220  val data_banks = List.tabulate(DCacheBanks)(i => Module(new DataSRAMBank(i)))
221  val ecc_banks = List.fill(DCacheBanks)(Module(new SRAMTemplate(
222    Bits(eccBits.W),
223    set = DCacheSets,
224    way = DCacheWays,
225    shouldReset = false,
226    holdRead = false,
227    singlePort = true
228  )))
229
230  data_banks.map(_.dump())
231
232  val way_en = Wire(Vec(LoadPipelineWidth, io.read(0).bits.way_en.cloneType))
233  val way_en_reg = RegNext(way_en)
234  val set_addrs = Wire(Vec(LoadPipelineWidth, UInt()))
235  val bank_addrs = Wire(Vec(LoadPipelineWidth, UInt()))
236
237  // read data_banks and ecc_banks
238  // for single port SRAM, do not allow read and write in the same cycle
239  val rwhazard = io.write.valid
240  val rrhazard = false.B // io.readline.valid
241  (0 until LoadPipelineWidth).map(rport_index => {
242    set_addrs(rport_index) := addr_to_dcache_set(io.read(rport_index).bits.addr)
243    bank_addrs(rport_index) := addr_to_dcache_bank(io.read(rport_index).bits.addr)
244
245    io.read(rport_index).ready := !(rwhazard || rrhazard)
246
247    // use way_en to select a way after data read out
248    assert(!(RegNext(io.read(rport_index).fire() && PopCount(io.read(rport_index).bits.way_en) > 1.U)))
249    way_en(rport_index) := io.read(rport_index).bits.way_en
250  })
251  io.readline.ready := !(rwhazard)
252
253  // read each bank, get bank result
254  val bank_result = Wire(Vec(DCacheBanks, new L1BankedDataReadResult()))
255  dontTouch(bank_result)
256  val read_bank_error = Wire(Vec(DCacheBanks, Bool()))
257  dontTouch(read_bank_error)
258  val rr_bank_conflict = Seq.tabulate(LoadPipelineWidth)(x => Seq.tabulate(LoadPipelineWidth)(y =>
259    bank_addrs(x) === bank_addrs(y) && io.read(x).valid && io.read(y).valid
260  ))
261  val rrl_bank_conflict = Wire(Vec(LoadPipelineWidth, Bool()))
262  if (ReduceReadlineConflict) {
263    (0 until LoadPipelineWidth).foreach(i => rrl_bank_conflict(i) := io.read(i).valid && io.readline.valid && io.readline.bits.rmask(bank_addrs(i)))
264  } else {
265    (0 until LoadPipelineWidth).foreach(i => rrl_bank_conflict(i) := io.read(i).valid && io.readline.valid)
266  }
267
268  val rw_bank_conflict = VecInit(Seq.tabulate(LoadPipelineWidth)(io.read(_).valid && rwhazard))
269  val perf_multi_read = PopCount(io.read.map(_.valid)) >= 2.U
270  (0 until LoadPipelineWidth).foreach(i => {
271    io.bank_conflict_fast(i) := rw_bank_conflict(i) || rrl_bank_conflict(i) ||
272      (if (i == 0) 0.B else (0 until i).map(rr_bank_conflict(_)(i)).reduce(_ || _))
273    io.bank_conflict_slow(i) := RegNext(io.bank_conflict_fast(i))
274  })
275  XSPerfAccumulate("data_array_multi_read", perf_multi_read)
276  (1 until LoadPipelineWidth).foreach(y => (0 until y).foreach(x =>
277    XSPerfAccumulate(s"data_array_rr_bank_conflict_${x}_${y}", rr_bank_conflict(x)(y))
278  ))
279  (0 until LoadPipelineWidth).foreach(i => {
280    XSPerfAccumulate(s"data_array_rrl_bank_conflict_${i}", rrl_bank_conflict(i))
281    XSPerfAccumulate(s"data_array_rw_bank_conflict_${i}", rw_bank_conflict(i))
282    XSPerfAccumulate(s"data_array_read_${i}", io.read(i).valid)
283  })
284  XSPerfAccumulate("data_array_access_total", PopCount(io.read.map(_.valid)))
285  XSPerfAccumulate("data_array_read_line", io.readline.valid)
286  XSPerfAccumulate("data_array_write", io.write.valid)
287
288  for (bank_index <- 0 until DCacheBanks) {
289    //     Set Addr & Read Way Mask
290    //
291    //    Pipe 0   ....  Pipe (n-1)
292    //      +      ....     +
293    //      |      ....     |
294    // +----+---------------+-----+
295    //  X                        X
296    //   X                      +------+ Bank Addr Match
297    //    +---------+----------+
298    //              |
299    //     +--------+--------+
300    //     |    Data Bank    |
301    //     +-----------------+
302    val bank_addr_matchs = WireInit(VecInit(List.tabulate(LoadPipelineWidth)(i => {
303      bank_addrs(i) === bank_index.U && io.read(i).valid
304    })))
305    val readline_match = Wire(Bool())
306    if (ReduceReadlineConflict) {
307      readline_match := io.readline.valid && io.readline.bits.rmask(bank_index)
308    } else {
309      readline_match := io.readline.valid
310    }
311    val bank_way_en = Mux(readline_match,
312      io.readline.bits.way_en,
313      PriorityMux(Seq.tabulate(LoadPipelineWidth)(i => bank_addr_matchs(i) -> way_en(i)))
314    )
315    val bank_set_addr = Mux(readline_match,
316      addr_to_dcache_set(io.readline.bits.addr),
317      PriorityMux(Seq.tabulate(LoadPipelineWidth)(i => bank_addr_matchs(i) -> set_addrs(i)))
318    )
319
320    // read raw data
321    val data_bank = data_banks(bank_index)
322    data_bank.io.r.en := bank_addr_matchs.asUInt.orR || readline_match
323    data_bank.io.r.way_en := bank_way_en
324    data_bank.io.r.addr := bank_set_addr
325    bank_result(bank_index).raw_data := data_bank.io.r.data
326
327    // read ECC
328    val ecc_bank = ecc_banks(bank_index)
329    ecc_bank.io.r.req.valid := bank_addr_matchs.asUInt.orR || readline_match
330    ecc_bank.io.r.req.bits.apply(setIdx = bank_set_addr)
331    bank_result(bank_index).ecc := Mux1H(RegNext(bank_way_en), ecc_bank.io.r.resp.data)
332
333    // use ECC to check error
334    val data = bank_result(bank_index).asECCData()
335    bank_result(bank_index).error := dcacheParameters.dataCode.decode(data).error
336    read_bank_error(bank_index) := bank_result(bank_index).error
337  }
338
339  // read result: expose banked read result
340  io.resp := bank_result
341
342  // error detection
343  // normal read ports
344  (0 until LoadPipelineWidth).map(rport_index => {
345    io.read_error(rport_index) := RegNext(io.read(rport_index).fire()) &&
346      read_bank_error(RegNext(bank_addrs(rport_index))) &&
347      !io.bank_conflict_slow(rport_index)
348  })
349  // readline port
350  io.readline_error := RegNext(io.readline.fire()) &&
351    VecInit((0 until DCacheBanks).map(i => io.resp(i).error)).asUInt().orR
352
353  // write data_banks & ecc_banks
354  val sram_waddr = addr_to_dcache_set(io.write.bits.addr)
355  for (bank_index <- 0 until DCacheBanks) {
356    // data write
357    val data_bank = data_banks(bank_index)
358    data_bank.io.w.en := io.write.valid && io.write.bits.wmask(bank_index)
359    data_bank.io.w.way_en := io.write.bits.way_en
360    data_bank.io.w.addr := sram_waddr
361    data_bank.io.w.data := io.write.bits.data(bank_index)
362
363    // ecc write
364    val ecc_bank = ecc_banks(bank_index)
365    ecc_bank.io.w.req.valid := io.write.valid && io.write.bits.wmask(bank_index)
366    ecc_bank.io.w.req.bits.apply(
367      setIdx = sram_waddr,
368      data = getECCFromEncWord(cacheParams.dataCode.encode((io.write.bits.data(bank_index)))),
369      waymask = io.write.bits.way_en
370    )
371    when(ecc_bank.io.w.req.valid) {
372      XSDebug("write in ecc sram: bank %x set %x data %x waymask %x\n",
373        bank_index.U,
374        sram_waddr,
375        getECCFromEncWord(cacheParams.dataCode.encode((io.write.bits.data(bank_index)))),
376        io.write.bits.way_en
377      );
378    }
379  }
380
381  // deal with customized cache op
382  require(nWays <= 32)
383  io.cacheOp.resp.bits := DontCare
384  val cacheOpShouldResp = WireInit(false.B)
385  val eccReadResult = Wire(Vec(DCacheBanks, UInt(eccBits.W)))
386  when(io.cacheOp.req.valid){
387    when (CacheInstrucion.isReadData(io.cacheOp.req.bits.opCode)) {
388      for (bank_index <- 0 until DCacheBanks) {
389        val data_bank = data_banks(bank_index)
390        data_bank.io.r.en := true.B
391        data_bank.io.r.way_en := UIntToOH(io.cacheOp.req.bits.wayNum(4, 0))
392        data_bank.io.r.addr := io.cacheOp.req.bits.index
393      }
394      cacheOpShouldResp := true.B
395    }
396	when (CacheInstrucion.isReadDataECC(io.cacheOp.req.bits.opCode)) {
397      for (bank_index <- 0 until DCacheBanks) {
398        val ecc_bank = ecc_banks(bank_index)
399		ecc_bank.io.r.req.valid := true.B
400		ecc_bank.io.r.req.bits.setIdx := io.cacheOp.req.bits.index
401	  }
402	  cacheOpShouldResp := true.B
403	}
404    when(CacheInstrucion.isWriteData(io.cacheOp.req.bits.opCode)){
405      for (bank_index <- 0 until DCacheBanks) {
406        val data_bank = data_banks(bank_index)
407        data_bank.io.w.en := true.B
408        data_bank.io.w.way_en := UIntToOH(io.cacheOp.req.bits.wayNum(4, 0))
409        data_bank.io.w.addr := io.cacheOp.req.bits.index
410        data_bank.io.w.data := io.cacheOp.req.bits.write_data_vec(bank_index)
411      }
412      cacheOpShouldResp := true.B
413    }
414    when(CacheInstrucion.isWriteDataECC(io.cacheOp.req.bits.opCode)){
415      for (bank_index <- 0 until DCacheBanks) {
416        val ecc_bank = ecc_banks(bank_index)
417        ecc_bank.io.w.req.valid := true.B
418        ecc_bank.io.w.req.bits.apply(
419          setIdx = io.cacheOp.req.bits.index,
420          data = io.cacheOp.req.bits.write_data_ecc,
421          waymask = UIntToOH(io.cacheOp.req.bits.wayNum(4, 0))
422        )
423      }
424      cacheOpShouldResp := true.B
425    }
426  }
427  io.cacheOp.resp.valid := RegNext(io.cacheOp.req.valid && cacheOpShouldResp)
428  for (bank_index <- 0 until DCacheBanks) {
429    io.cacheOp.resp.bits.read_data_vec(bank_index) := bank_result(bank_index).raw_data
430	eccReadResult(bank_index) := ecc_banks(bank_index).io.r.resp.data(RegNext(io.cacheOp.req.bits.wayNum(4, 0)))
431  }
432  io.cacheOp.resp.bits.read_data_ecc := Mux(io.cacheOp.resp.valid,
433    eccReadResult(RegNext(io.cacheOp.req.bits.bank_num)),
434    0.U
435  )
436}
437