xref: /XiangShan/src/main/scala/xiangshan/cache/dcache/data/BankedDataArray.scala (revision fa9d712c89878ecee3ecf56223b1bb1f63fc78e9)
1/***************************************************************************************
2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3* Copyright (c) 2020-2021 Peng Cheng Laboratory
4*
5* XiangShan is licensed under Mulan PSL v2.
6* You can use this software according to the terms and conditions of the Mulan PSL v2.
7* You may obtain a copy of Mulan PSL v2 at:
8*          http://license.coscl.org.cn/MulanPSL2
9*
10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13*
14* See the Mulan PSL v2 for more details.
15***************************************************************************************/
16
17package xiangshan.cache
18
19import chipsalliance.rocketchip.config.Parameters
20import chisel3._
21import utils._
22import chisel3.util._
23import freechips.rocketchip.tilelink.{ClientMetadata, TLClientParameters, TLEdgeOut}
24import utils.{Code, ParallelOR, ReplacementPolicy, SRAMTemplate, XSDebug, XSPerfAccumulate}
25import xiangshan.L1CacheErrorInfo
26
27import scala.math.max
28
29class L1BankedDataReadReq(implicit p: Parameters) extends DCacheBundle
30{
31  val way_en = Bits(DCacheWays.W)
32  val addr = Bits(PAddrBits.W)
33}
34
35class L1BankedDataReadLineReq(implicit p: Parameters) extends L1BankedDataReadReq
36{
37  val rmask = Bits(DCacheBanks.W)
38}
39
40// Now, we can write a cache-block in a single cycle
41class L1BankedDataWriteReq(implicit p: Parameters) extends L1BankedDataReadReq
42{
43  val wmask = Bits(DCacheBanks.W)
44  val data = Vec(DCacheBanks, Bits(DCacheSRAMRowBits.W))
45}
46
47class L1BankedDataReadResult(implicit p: Parameters) extends DCacheBundle
48{
49  // you can choose which bank to read to save power
50  val ecc = Bits(eccBits.W)
51  val raw_data = Bits(DCacheSRAMRowBits.W)
52  val error = Bool() // slow to generate, use it with care
53
54  def asECCData() = {
55    Cat(ecc, raw_data)
56  }
57}
58
59//                     Banked DCache Data
60// -----------------------------------------------------------------
61// | Bank0 | Bank1 | Bank2 | Bank3 | Bank4 | Bank5 | Bank6 | Bank7 |
62// -----------------------------------------------------------------
63// | Way0  | Way0  | Way0  | Way0  | Way0  | Way0  | Way0  | Way0  |
64// | Way1  | Way1  | Way1  | Way1  | Way1  | Way1  | Way1  | Way1  |
65// | ....  | ....  | ....  | ....  | ....  | ....  | ....  | ....  |
66// -----------------------------------------------------------------
67abstract class AbstractBankedDataArray(implicit p: Parameters) extends DCacheModule
68{
69  val ReadlinePortErrorIndex = LoadPipelineWidth
70  val io = IO(new DCacheBundle {
71    // load pipeline read word req
72    val read = Vec(LoadPipelineWidth, Flipped(DecoupledIO(new L1BankedDataReadReq)))
73    // main pipeline read / write line req
74    val readline_intend = Input(Bool())
75    val readline = Flipped(DecoupledIO(new L1BankedDataReadLineReq))
76    val write = Flipped(DecoupledIO(new L1BankedDataWriteReq))
77    // data bank read resp (all banks)
78    val resp = Output(Vec(DCacheBanks, new L1BankedDataReadResult()))
79    // val nacks = Output(Vec(LoadPipelineWidth, Bool()))
80    // val errors = Output(Vec(LoadPipelineWidth + 1, new L1CacheErrorInfo)) // read ports + readline port
81    val read_error = Output(Vec(LoadPipelineWidth, Bool()))
82    val readline_error = Output(Bool())
83    // when bank_conflict, read (1) port should be ignored
84    val bank_conflict_slow = Output(Vec(LoadPipelineWidth, Bool()))
85    val bank_conflict_fast = Output(Vec(LoadPipelineWidth, Bool()))
86    val disable_ld_fast_wakeup = Output(Vec(LoadPipelineWidth, Bool()))
87    // customized cache op port
88    val cacheOp = Flipped(new L1CacheInnerOpIO)
89  })
90
91  def pipeMap[T <: Data](f: Int => T) = VecInit((0 until LoadPipelineWidth).map(f))
92
93  def dumpRead() = {
94    (0 until LoadPipelineWidth) map { w =>
95      when(io.read(w).valid) {
96        XSDebug(s"DataArray Read channel: $w valid way_en: %x addr: %x\n",
97          io.read(w).bits.way_en, io.read(w).bits.addr)
98      }
99    }
100    when(io.readline.valid) {
101      XSDebug(s"DataArray Read Line, valid way_en: %x addr: %x rmask %x\n",
102        io.readline.bits.way_en, io.readline.bits.addr, io.readline.bits.rmask)
103    }
104  }
105
106  def dumpWrite() = {
107    when(io.write.valid) {
108      XSDebug(s"DataArray Write valid way_en: %x addr: %x\n",
109        io.write.bits.way_en, io.write.bits.addr)
110
111      (0 until DCacheBanks) map { r =>
112        XSDebug(s"cycle: $r data: %x wmask: %x\n",
113          io.write.bits.data(r), io.write.bits.wmask(r))
114      }
115    }
116  }
117
118  def dumpResp() = {
119    XSDebug(s"DataArray ReadeResp channel:\n")
120    (0 until DCacheBanks) map { r =>
121      XSDebug(s"cycle: $r data: %x\n", io.resp(r).raw_data)
122    }
123  }
124
125  def dump() = {
126    dumpRead
127    dumpWrite
128    dumpResp
129  }
130}
131
132class BankedDataArray(implicit p: Parameters) extends AbstractBankedDataArray {
133  def getECCFromEncWord(encWord: UInt) = {
134    require(encWord.getWidth == encWordBits)
135    encWord(encWordBits - 1, wordBits)
136  }
137
138  val ReduceReadlineConflict = false
139
140  io.write.ready := true.B
141
142  // wrap data rows of 8 ways
143  class DataSRAMBank(index: Int) extends Module {
144    val io = IO(new Bundle() {
145      val w = new Bundle() {
146        val en = Input(Bool())
147        val addr = Input(UInt())
148        val way_en = Input(UInt(DCacheWays.W))
149        val data = Input(UInt(DCacheSRAMRowBits.W))
150      }
151
152      val r = new Bundle() {
153        val en = Input(Bool())
154        val addr = Input(UInt())
155        val way_en = Input(UInt(DCacheWays.W))
156        val data = Output(UInt(DCacheSRAMRowBits.W))
157      }
158    })
159
160    val r_way_en_reg = RegNext(io.r.way_en)
161
162    // multiway data bank
163    val data_bank = Array.fill(DCacheWays) {
164      Module(new SRAMTemplate(
165        Bits(DCacheSRAMRowBits.W),
166        set = DCacheSets,
167        way = 1,
168        shouldReset = false,
169        holdRead = false,
170        singlePort = true
171      ))
172    }
173
174    for (w <- 0 until DCacheWays) {
175      val wen = io.w.en && io.w.way_en(w)
176      data_bank(w).io.w.req.valid := wen
177      data_bank(w).io.w.req.bits.apply(
178        setIdx = io.w.addr,
179        data = io.w.data,
180        waymask = 1.U
181      )
182      data_bank(w).io.r.req.valid := io.r.en
183      data_bank(w).io.r.req.bits.apply(setIdx = io.r.addr)
184    }
185
186    val half = nWays / 2
187    val data_read = data_bank.map(_.io.r.resp.data(0))
188    val data_left = Mux1H(r_way_en_reg.tail(half), data_read.take(half))
189    val data_right = Mux1H(r_way_en_reg.head(half), data_read.drop(half))
190
191    val sel_low = r_way_en_reg.tail(half).orR()
192    val row_data = Mux(sel_low, data_left, data_right)
193
194    io.r.data := row_data
195
196    def dump_r() = {
197      when(RegNext(io.r.en)) {
198        XSDebug("bank read addr %x way_en %x data %x\n",
199          RegNext(io.r.addr),
200          RegNext(io.r.way_en),
201          io.r.data
202        )
203      }
204    }
205
206    def dump_w() = {
207      when(io.w.en) {
208        XSDebug("bank write addr %x way_en %x data %x\n",
209          io.w.addr,
210          io.w.way_en,
211          io.w.data
212        )
213      }
214    }
215
216    def dump() = {
217      dump_w()
218      dump_r()
219    }
220  }
221
222  val data_banks = List.tabulate(DCacheBanks)(i => Module(new DataSRAMBank(i)))
223  val ecc_banks = List.fill(DCacheBanks)(Module(new SRAMTemplate(
224    Bits(eccBits.W),
225    set = DCacheSets,
226    way = DCacheWays,
227    shouldReset = false,
228    holdRead = false,
229    singlePort = true
230  )))
231
232  data_banks.map(_.dump())
233
234  val way_en = Wire(Vec(LoadPipelineWidth, io.read(0).bits.way_en.cloneType))
235  val way_en_reg = RegNext(way_en)
236  val set_addrs = Wire(Vec(LoadPipelineWidth, UInt()))
237  val bank_addrs = Wire(Vec(LoadPipelineWidth, UInt()))
238
239  // read data_banks and ecc_banks
240  // for single port SRAM, do not allow read and write in the same cycle
241  val rwhazard = io.write.valid
242  val rrhazard = false.B // io.readline.valid
243  (0 until LoadPipelineWidth).map(rport_index => {
244    set_addrs(rport_index) := addr_to_dcache_set(io.read(rport_index).bits.addr)
245    bank_addrs(rport_index) := addr_to_dcache_bank(io.read(rport_index).bits.addr)
246
247    io.read(rport_index).ready := !(rwhazard || rrhazard)
248
249    // use way_en to select a way after data read out
250    assert(!(RegNext(io.read(rport_index).fire() && PopCount(io.read(rport_index).bits.way_en) > 1.U)))
251    way_en(rport_index) := io.read(rport_index).bits.way_en
252  })
253  io.readline.ready := !(rwhazard)
254
255  // read each bank, get bank result
256  val bank_result = Wire(Vec(DCacheBanks, new L1BankedDataReadResult()))
257  dontTouch(bank_result)
258  val read_bank_error = Wire(Vec(DCacheBanks, Bool()))
259  dontTouch(read_bank_error)
260  val rr_bank_conflict = Seq.tabulate(LoadPipelineWidth)(x => Seq.tabulate(LoadPipelineWidth)(y =>
261    bank_addrs(x) === bank_addrs(y) && io.read(x).valid && io.read(y).valid
262  ))
263  val rrl_bank_conflict = Wire(Vec(LoadPipelineWidth, Bool()))
264  if (ReduceReadlineConflict) {
265    (0 until LoadPipelineWidth).foreach(i => rrl_bank_conflict(i) := io.read(i).valid && io.readline.valid && io.readline.bits.rmask(bank_addrs(i)))
266  } else {
267    (0 until LoadPipelineWidth).foreach(i => rrl_bank_conflict(i) := io.read(i).valid && io.readline.valid)
268  }
269  val rrl_bank_conflict_intend = Wire(Vec(LoadPipelineWidth, Bool()))
270  if (ReduceReadlineConflict) {
271    (0 until LoadPipelineWidth).foreach(i => rrl_bank_conflict_intend(i) := io.read(i).valid && io.readline_intend && io.readline.bits.rmask(bank_addrs(i)))
272  } else {
273    (0 until LoadPipelineWidth).foreach(i => rrl_bank_conflict_intend(i) := io.read(i).valid && io.readline_intend)
274  }
275
276  val rw_bank_conflict = VecInit(Seq.tabulate(LoadPipelineWidth)(io.read(_).valid && rwhazard))
277  val perf_multi_read = PopCount(io.read.map(_.valid)) >= 2.U
278  (0 until LoadPipelineWidth).foreach(i => {
279    io.bank_conflict_fast(i) := rw_bank_conflict(i) || rrl_bank_conflict(i) ||
280      (if (i == 0) 0.B else (0 until i).map(rr_bank_conflict(_)(i)).reduce(_ || _))
281    io.bank_conflict_slow(i) := RegNext(io.bank_conflict_fast(i))
282    io.disable_ld_fast_wakeup(i) := rw_bank_conflict(i) || rrl_bank_conflict_intend(i) ||
283      (if (i == 0) 0.B else (0 until i).map(rr_bank_conflict(_)(i)).reduce(_ || _))
284  })
285  XSPerfAccumulate("data_array_multi_read", perf_multi_read)
286  (1 until LoadPipelineWidth).foreach(y => (0 until y).foreach(x =>
287    XSPerfAccumulate(s"data_array_rr_bank_conflict_${x}_${y}", rr_bank_conflict(x)(y))
288  ))
289  (0 until LoadPipelineWidth).foreach(i => {
290    XSPerfAccumulate(s"data_array_rrl_bank_conflict_${i}", rrl_bank_conflict(i))
291    XSPerfAccumulate(s"data_array_rw_bank_conflict_${i}", rw_bank_conflict(i))
292    XSPerfAccumulate(s"data_array_read_${i}", io.read(i).valid)
293  })
294  XSPerfAccumulate("data_array_access_total", PopCount(io.read.map(_.valid)))
295  XSPerfAccumulate("data_array_read_line", io.readline.valid)
296  XSPerfAccumulate("data_array_write", io.write.valid)
297
298  for (bank_index <- 0 until DCacheBanks) {
299    //     Set Addr & Read Way Mask
300    //
301    //    Pipe 0   ....  Pipe (n-1)
302    //      +      ....     +
303    //      |      ....     |
304    // +----+---------------+-----+
305    //  X                        X
306    //   X                      +------+ Bank Addr Match
307    //    +---------+----------+
308    //              |
309    //     +--------+--------+
310    //     |    Data Bank    |
311    //     +-----------------+
312    val bank_addr_matchs = WireInit(VecInit(List.tabulate(LoadPipelineWidth)(i => {
313      bank_addrs(i) === bank_index.U && io.read(i).valid
314    })))
315    val readline_match = Wire(Bool())
316    if (ReduceReadlineConflict) {
317      readline_match := io.readline.valid && io.readline.bits.rmask(bank_index)
318    } else {
319      readline_match := io.readline.valid
320    }
321    val bank_way_en = Mux(readline_match,
322      io.readline.bits.way_en,
323      PriorityMux(Seq.tabulate(LoadPipelineWidth)(i => bank_addr_matchs(i) -> way_en(i)))
324    )
325    val bank_set_addr = Mux(readline_match,
326      addr_to_dcache_set(io.readline.bits.addr),
327      PriorityMux(Seq.tabulate(LoadPipelineWidth)(i => bank_addr_matchs(i) -> set_addrs(i)))
328    )
329
330    // read raw data
331    val data_bank = data_banks(bank_index)
332    data_bank.io.r.en := bank_addr_matchs.asUInt.orR || readline_match
333    data_bank.io.r.way_en := bank_way_en
334    data_bank.io.r.addr := bank_set_addr
335    bank_result(bank_index).raw_data := data_bank.io.r.data
336
337    // read ECC
338    val ecc_bank = ecc_banks(bank_index)
339    ecc_bank.io.r.req.valid := bank_addr_matchs.asUInt.orR || readline_match
340    ecc_bank.io.r.req.bits.apply(setIdx = bank_set_addr)
341    bank_result(bank_index).ecc := Mux1H(RegNext(bank_way_en), ecc_bank.io.r.resp.data)
342
343    // use ECC to check error
344    val data = bank_result(bank_index).asECCData()
345    bank_result(bank_index).error := dcacheParameters.dataCode.decode(data).error
346    read_bank_error(bank_index) := bank_result(bank_index).error
347  }
348
349  // read result: expose banked read result
350  io.resp := bank_result
351
352  // error detection
353  // normal read ports
354  (0 until LoadPipelineWidth).map(rport_index => {
355    io.read_error(rport_index) := RegNext(io.read(rport_index).fire()) &&
356      read_bank_error(RegNext(bank_addrs(rport_index))) &&
357      !io.bank_conflict_slow(rport_index)
358  })
359  // readline port
360  io.readline_error := RegNext(io.readline.fire()) &&
361    VecInit((0 until DCacheBanks).map(i => io.resp(i).error)).asUInt().orR
362
363  // write data_banks & ecc_banks
364  val sram_waddr = addr_to_dcache_set(io.write.bits.addr)
365  for (bank_index <- 0 until DCacheBanks) {
366    // data write
367    val data_bank = data_banks(bank_index)
368    data_bank.io.w.en := io.write.valid && io.write.bits.wmask(bank_index)
369    data_bank.io.w.way_en := io.write.bits.way_en
370    data_bank.io.w.addr := sram_waddr
371    data_bank.io.w.data := io.write.bits.data(bank_index)
372
373    // ecc write
374    val ecc_bank = ecc_banks(bank_index)
375    ecc_bank.io.w.req.valid := io.write.valid && io.write.bits.wmask(bank_index)
376    ecc_bank.io.w.req.bits.apply(
377      setIdx = sram_waddr,
378      data = getECCFromEncWord(cacheParams.dataCode.encode((io.write.bits.data(bank_index)))),
379      waymask = io.write.bits.way_en
380    )
381    when(ecc_bank.io.w.req.valid) {
382      XSDebug("write in ecc sram: bank %x set %x data %x waymask %x\n",
383        bank_index.U,
384        sram_waddr,
385        getECCFromEncWord(cacheParams.dataCode.encode((io.write.bits.data(bank_index)))),
386        io.write.bits.way_en
387      );
388    }
389  }
390
391  // deal with customized cache op
392  require(nWays <= 32)
393  io.cacheOp.resp.bits := DontCare
394  val cacheOpShouldResp = WireInit(false.B)
395  val eccReadResult = Wire(Vec(DCacheBanks, UInt(eccBits.W)))
396  when(io.cacheOp.req.valid){
397    when (CacheInstrucion.isReadData(io.cacheOp.req.bits.opCode)) {
398      for (bank_index <- 0 until DCacheBanks) {
399        val data_bank = data_banks(bank_index)
400        data_bank.io.r.en := true.B
401        data_bank.io.r.way_en := UIntToOH(io.cacheOp.req.bits.wayNum(4, 0))
402        data_bank.io.r.addr := io.cacheOp.req.bits.index
403      }
404      cacheOpShouldResp := true.B
405    }
406	when (CacheInstrucion.isReadDataECC(io.cacheOp.req.bits.opCode)) {
407      for (bank_index <- 0 until DCacheBanks) {
408        val ecc_bank = ecc_banks(bank_index)
409		ecc_bank.io.r.req.valid := true.B
410		ecc_bank.io.r.req.bits.setIdx := io.cacheOp.req.bits.index
411	  }
412	  cacheOpShouldResp := true.B
413	}
414    when(CacheInstrucion.isWriteData(io.cacheOp.req.bits.opCode)){
415      for (bank_index <- 0 until DCacheBanks) {
416        val data_bank = data_banks(bank_index)
417        data_bank.io.w.en := true.B
418        data_bank.io.w.way_en := UIntToOH(io.cacheOp.req.bits.wayNum(4, 0))
419        data_bank.io.w.addr := io.cacheOp.req.bits.index
420        data_bank.io.w.data := io.cacheOp.req.bits.write_data_vec(bank_index)
421      }
422      cacheOpShouldResp := true.B
423    }
424    when(CacheInstrucion.isWriteDataECC(io.cacheOp.req.bits.opCode)){
425      for (bank_index <- 0 until DCacheBanks) {
426        val ecc_bank = ecc_banks(bank_index)
427        ecc_bank.io.w.req.valid := true.B
428        ecc_bank.io.w.req.bits.apply(
429          setIdx = io.cacheOp.req.bits.index,
430          data = io.cacheOp.req.bits.write_data_ecc,
431          waymask = UIntToOH(io.cacheOp.req.bits.wayNum(4, 0))
432        )
433      }
434      cacheOpShouldResp := true.B
435    }
436  }
437  io.cacheOp.resp.valid := RegNext(io.cacheOp.req.valid && cacheOpShouldResp)
438  for (bank_index <- 0 until DCacheBanks) {
439    io.cacheOp.resp.bits.read_data_vec(bank_index) := bank_result(bank_index).raw_data
440	eccReadResult(bank_index) := ecc_banks(bank_index).io.r.resp.data(RegNext(io.cacheOp.req.bits.wayNum(4, 0)))
441  }
442  io.cacheOp.resp.bits.read_data_ecc := Mux(io.cacheOp.resp.valid,
443    eccReadResult(RegNext(io.cacheOp.req.bits.bank_num)),
444    0.U
445  )
446}
447