xref: /XiangShan/src/main/scala/xiangshan/cache/dcache/data/BankedDataArray.scala (revision a4e57ea3a91431261d57a58df4810c0d9f0366ef)
1/***************************************************************************************
2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3* Copyright (c) 2020-2021 Peng Cheng Laboratory
4*
5* XiangShan is licensed under Mulan PSL v2.
6* You can use this software according to the terms and conditions of the Mulan PSL v2.
7* You may obtain a copy of Mulan PSL v2 at:
8*          http://license.coscl.org.cn/MulanPSL2
9*
10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13*
14* See the Mulan PSL v2 for more details.
15***************************************************************************************/
16
17package xiangshan.cache
18
19import chipsalliance.rocketchip.config.Parameters
20import chisel3._
21import chisel3.util._
22import freechips.rocketchip.tilelink.{ClientMetadata, TLClientParameters, TLEdgeOut}
23import utils.{Code, ParallelOR, ReplacementPolicy, SRAMTemplate, XSDebug, XSPerfAccumulate}
24import xiangshan.L1CacheErrorInfo
25
26import scala.math.max
27
28class L1BankedDataReadReq(implicit p: Parameters) extends DCacheBundle
29{
30  val way_en = Bits(DCacheWays.W)
31  val addr = Bits(PAddrBits.W)
32}
33
34class L1BankedDataReadLineReq(implicit p: Parameters) extends L1BankedDataReadReq
35{
36  val rmask = Bits(DCacheBanks.W)
37}
38
39// Now, we can write a cache-block in a single cycle
40class L1BankedDataWriteReq(implicit p: Parameters) extends L1BankedDataReadReq
41{
42  val wmask = Bits(DCacheBanks.W)
43  val data = Vec(DCacheBanks, Bits(DCacheSRAMRowBits.W))
44}
45
46class L1BankedDataReadResult(implicit p: Parameters) extends DCacheBundle
47{
48  // you can choose which bank to read to save power
49  val ecc = Bits(eccBits.W)
50  val raw_data = Bits(DCacheSRAMRowBits.W)
51  val error = Bool() // slow to generate, use it with care
52
53  def asECCData() = {
54    Cat(ecc, raw_data)
55  }
56}
57
58//                     Banked DCache Data
59// -----------------------------------------------------------------
60// | Bank0 | Bank1 | Bank2 | Bank3 | Bank4 | Bank5 | Bank6 | Bank7 |
61// -----------------------------------------------------------------
62// | Way0  | Way0  | Way0  | Way0  | Way0  | Way0  | Way0  | Way0  |
63// | Way1  | Way1  | Way1  | Way1  | Way1  | Way1  | Way1  | Way1  |
64// | ....  | ....  | ....  | ....  | ....  | ....  | ....  | ....  |
65// -----------------------------------------------------------------
66abstract class AbstractBankedDataArray(implicit p: Parameters) extends DCacheModule
67{
68  val ReadlinePortErrorIndex = LoadPipelineWidth
69  val io = IO(new DCacheBundle {
70    // load pipeline read word req
71    val read = Vec(LoadPipelineWidth, Flipped(DecoupledIO(new L1BankedDataReadReq)))
72    // main pipeline read / write line req
73    val readline = Flipped(DecoupledIO(new L1BankedDataReadLineReq))
74    val write = Flipped(DecoupledIO(new L1BankedDataWriteReq))
75    // data bank read resp (all banks)
76    val resp = Output(Vec(DCacheBanks, new L1BankedDataReadResult()))
77    // val nacks = Output(Vec(LoadPipelineWidth, Bool()))
78    val errors = Output(Vec(LoadPipelineWidth + 1, new L1CacheErrorInfo)) // read ports + readline port
79    // when bank_conflict, read (1) port should be ignored
80    val bank_conflict_slow = Output(Vec(LoadPipelineWidth, Bool()))
81    val bank_conflict_fast = Output(Vec(LoadPipelineWidth, Bool()))
82    // customized cache op port
83    val cacheOp = Flipped(new L1CacheInnerOpIO)
84  })
85  assert(LoadPipelineWidth <= 2) // BankedDataArray is designed for no more than 2 read ports
86
87  def pipeMap[T <: Data](f: Int => T) = VecInit((0 until LoadPipelineWidth).map(f))
88
89  def dumpRead() = {
90    (0 until LoadPipelineWidth) map { w =>
91      when(io.read(w).valid) {
92        XSDebug(s"DataArray Read channel: $w valid way_en: %x addr: %x\n",
93          io.read(w).bits.way_en, io.read(w).bits.addr)
94      }
95    }
96    when(io.readline.valid) {
97      XSDebug(s"DataArray Read Line, valid way_en: %x addr: %x rmask %x\n",
98        io.readline.bits.way_en, io.readline.bits.addr, io.readline.bits.rmask)
99    }
100  }
101
102  def dumpWrite() = {
103    when(io.write.valid) {
104      XSDebug(s"DataArray Write valid way_en: %x addr: %x\n",
105        io.write.bits.way_en, io.write.bits.addr)
106
107      (0 until DCacheBanks) map { r =>
108        XSDebug(s"cycle: $r data: %x wmask: %x\n",
109          io.write.bits.data(r), io.write.bits.wmask(r))
110      }
111    }
112  }
113
114  def dumpResp() = {
115    XSDebug(s"DataArray ReadeResp channel:\n")
116    (0 until DCacheBanks) map { r =>
117      XSDebug(s"cycle: $r data: %x\n", io.resp(r).raw_data)
118    }
119  }
120
121  def dump() = {
122    dumpRead
123    dumpWrite
124    dumpResp
125  }
126}
127
128class BankedDataArray(implicit p: Parameters) extends AbstractBankedDataArray {
129  def getECCFromEncWord(encWord: UInt) = {
130    require(encWord.getWidth == encWordBits)
131    encWord(encWordBits - 1, wordBits)
132  }
133
134  val ReduceReadlineConflict = false
135
136  io.write.ready := true.B
137
138  // wrap data rows of 8 ways
139  class DataSRAMBank(index: Int) extends Module {
140    val io = IO(new Bundle() {
141      val w = new Bundle() {
142        val en = Input(Bool())
143        val addr = Input(UInt())
144        val way_en = Input(UInt(DCacheWays.W))
145        val data = Input(UInt(DCacheSRAMRowBits.W))
146      }
147
148      val r = new Bundle() {
149        val en = Input(Bool())
150        val addr = Input(UInt())
151        val way_en = Input(UInt(DCacheWays.W))
152        val data = Output(UInt(DCacheSRAMRowBits.W))
153      }
154    })
155
156    val r_way_en_reg = RegNext(io.r.way_en)
157
158    // multiway data bank
159    val data_bank = Array.fill(DCacheWays) {
160      Module(new SRAMTemplate(
161        Bits(DCacheSRAMRowBits.W),
162        set = DCacheSets,
163        way = 1,
164        shouldReset = false,
165        holdRead = false,
166        singlePort = true
167      ))
168    }
169
170    for (w <- 0 until DCacheWays) {
171      val wen = io.w.en && io.w.way_en(w)
172      data_bank(w).io.w.req.valid := wen
173      data_bank(w).io.w.req.bits.apply(
174        setIdx = io.w.addr,
175        data = io.w.data,
176        waymask = 1.U
177      )
178      data_bank(w).io.r.req.valid := io.r.en
179      data_bank(w).io.r.req.bits.apply(setIdx = io.r.addr)
180    }
181
182    val half = nWays / 2
183    val data_read = data_bank.map(_.io.r.resp.data(0))
184    val data_left = Mux1H(r_way_en_reg.tail(half), data_read.take(half))
185    val data_right = Mux1H(r_way_en_reg.head(half), data_read.drop(half))
186
187    val sel_low = r_way_en_reg.tail(half).orR()
188    val row_data = Mux(sel_low, data_left, data_right)
189
190    io.r.data := row_data
191
192    def dump_r() = {
193      when(RegNext(io.r.en)) {
194        XSDebug("bank read addr %x way_en %x data %x\n",
195          RegNext(io.r.addr),
196          RegNext(io.r.way_en),
197          io.r.data
198        )
199      }
200    }
201
202    def dump_w() = {
203      when(io.w.en) {
204        XSDebug("bank write addr %x way_en %x data %x\n",
205          io.w.addr,
206          io.w.way_en,
207          io.w.data
208        )
209      }
210    }
211
212    def dump() = {
213      dump_w()
214      dump_r()
215    }
216  }
217
218  val data_banks = List.tabulate(DCacheBanks)(i => Module(new DataSRAMBank(i)))
219  val ecc_banks = List.fill(DCacheBanks)(Module(new SRAMTemplate(
220    Bits(eccBits.W),
221    set = DCacheSets,
222    way = DCacheWays,
223    shouldReset = false,
224    holdRead = false,
225    singlePort = true
226  )))
227
228  data_banks.map(_.dump())
229
230  val way_en = Wire(Vec(LoadPipelineWidth, io.read(0).bits.way_en.cloneType))
231  val way_en_reg = RegNext(way_en)
232  val set_addrs = Wire(Vec(LoadPipelineWidth, UInt()))
233  val bank_addrs = Wire(Vec(LoadPipelineWidth, UInt()))
234
235  // read data_banks and ecc_banks
236  // for single port SRAM, do not allow read and write in the same cycle
237  val rwhazard = io.write.valid
238  val rrhazard = false.B // io.readline.valid
239  (0 until LoadPipelineWidth).map(rport_index => {
240    set_addrs(rport_index) := addr_to_dcache_set(io.read(rport_index).bits.addr)
241    bank_addrs(rport_index) := addr_to_dcache_bank(io.read(rport_index).bits.addr)
242
243    io.read(rport_index).ready := !(rwhazard || rrhazard)
244
245    // use way_en to select a way after data read out
246    assert(!(RegNext(io.read(rport_index).fire() && PopCount(io.read(rport_index).bits.way_en) > 1.U)))
247    way_en(rport_index) := io.read(rport_index).bits.way_en
248  })
249  io.readline.ready := !(rwhazard)
250
251  // read each bank, get bank result
252  val bank_result = Wire(Vec(DCacheBanks, new L1BankedDataReadResult()))
253  dontTouch(bank_result)
254  val read_bank_error = Wire(Vec(DCacheBanks, Bool()))
255  dontTouch(read_bank_error)
256  val rr_bank_conflict = bank_addrs(0) === bank_addrs(1) && io.read(0).valid && io.read(1).valid
257  val rrl_bank_conflict_0 = Wire(Bool())
258  val rrl_bank_conflict_1 = Wire(Bool())
259  if (ReduceReadlineConflict) {
260    rrl_bank_conflict_0 := io.read(0).valid && io.readline.valid && io.readline.bits.rmask(bank_addrs(0))
261    rrl_bank_conflict_1 := io.read(1).valid && io.readline.valid && io.readline.bits.rmask(bank_addrs(1))
262  } else {
263    rrl_bank_conflict_0 := io.read(0).valid && io.readline.valid
264    rrl_bank_conflict_1 := io.read(1).valid && io.readline.valid
265  }
266
267  val rw_bank_conflict_0 = io.read(0).valid && rwhazard
268  val rw_bank_conflict_1 = io.read(1).valid && rwhazard
269  val perf_multi_read = io.read(0).valid && io.read(1).valid
270  io.bank_conflict_fast(0) := rw_bank_conflict_0 || rrl_bank_conflict_0
271  io.bank_conflict_slow(0) := RegNext(io.bank_conflict_fast(0))
272  io.bank_conflict_fast(1) := rw_bank_conflict_1 || rrl_bank_conflict_1 || rr_bank_conflict
273  io.bank_conflict_slow(1) := RegNext(io.bank_conflict_fast(1))
274  XSPerfAccumulate("data_array_multi_read", perf_multi_read)
275  XSPerfAccumulate("data_array_rr_bank_conflict", rr_bank_conflict)
276  XSPerfAccumulate("data_array_rrl_bank_conflict_0", rrl_bank_conflict_0)
277  XSPerfAccumulate("data_array_rrl_bank_conflict_1", rrl_bank_conflict_1)
278  XSPerfAccumulate("data_array_rw_bank_conflict_0", rw_bank_conflict_0)
279  XSPerfAccumulate("data_array_rw_bank_conflict_1", rw_bank_conflict_1)
280  XSPerfAccumulate("data_array_access_total", io.read(0).valid +& io.read(1).valid)
281  XSPerfAccumulate("data_array_read_0", io.read(0).valid)
282  XSPerfAccumulate("data_array_read_1", io.read(1).valid)
283  XSPerfAccumulate("data_array_read_line", io.readline.valid)
284  XSPerfAccumulate("data_array_write", io.write.valid)
285
286  for (bank_index <- 0 until DCacheBanks) {
287    //     Set Addr & Read Way Mask
288    //
289    //      Pipe 0      Pipe 1
290    //        +           +
291    //        |           |
292    // +------+-----------+-------+
293    //  X                        X
294    //   X                      +------+ Bank Addr Match
295    //    +---------+----------+
296    //              |
297    //     +--------+--------+
298    //     |    Data Bank    |
299    //     +-----------------+
300    val bank_addr_matchs = WireInit(VecInit(List.tabulate(LoadPipelineWidth)(i => {
301      bank_addrs(i) === bank_index.U && io.read(i).valid
302    })))
303    val readline_match = Wire(Bool())
304    if (ReduceReadlineConflict) {
305      readline_match := io.readline.valid && io.readline.bits.rmask(bank_index)
306    } else {
307      readline_match := io.readline.valid
308    }
309    val bank_way_en = Mux(readline_match,
310      io.readline.bits.way_en,
311      Mux(bank_addr_matchs(0), way_en(0), way_en(1))
312    )
313    val bank_set_addr = Mux(readline_match,
314      addr_to_dcache_set(io.readline.bits.addr),
315      Mux(bank_addr_matchs(0), set_addrs(0), set_addrs(1))
316    )
317
318    // read raw data
319    val data_bank = data_banks(bank_index)
320    data_bank.io.r.en := bank_addr_matchs.asUInt.orR || readline_match
321    data_bank.io.r.way_en := bank_way_en
322    data_bank.io.r.addr := bank_set_addr
323    bank_result(bank_index).raw_data := data_bank.io.r.data
324
325    // read ECC
326    val ecc_bank = ecc_banks(bank_index)
327    ecc_bank.io.r.req.valid := bank_addr_matchs.asUInt.orR
328    ecc_bank.io.r.req.bits.apply(setIdx = bank_set_addr)
329    bank_result(bank_index).ecc := Mux1H(RegNext(bank_way_en), ecc_bank.io.r.resp.data)
330
331    // use ECC to check error
332    val data = bank_result(bank_index).asECCData()
333    bank_result(bank_index).error := dcacheParameters.dataCode.decode(data).error
334    read_bank_error(bank_index) := bank_result(bank_index).error && RegNext(bank_addr_matchs.asUInt.orR)
335  }
336
337  // read result: expose banked read result
338  io.resp := bank_result
339
340  // error detection
341  // normal read ports
342  (0 until LoadPipelineWidth).map(rport_index => {
343    io.errors(rport_index).ecc_error.valid := RegNext(io.read(rport_index).fire()) &&
344      read_bank_error.asUInt.orR() &&
345      !io.bank_conflict_slow(rport_index)
346    io.errors(rport_index).ecc_error.bits := true.B
347    io.errors(rport_index).paddr.valid := io.errors(rport_index).ecc_error.valid
348    io.errors(rport_index).paddr.bits := RegNext(io.read(rport_index).bits.addr)
349  })
350  // readline port
351  io.errors(ReadlinePortErrorIndex).ecc_error.valid := RegNext(io.readline.fire()) &&
352    VecInit((0 until DCacheBanks).map(i => io.resp(i).error)).asUInt().orR
353  io.errors(ReadlinePortErrorIndex).ecc_error.bits := true.B
354  io.errors(ReadlinePortErrorIndex).paddr.valid := io.errors(ReadlinePortErrorIndex).ecc_error.valid
355  io.errors(ReadlinePortErrorIndex).paddr.bits := RegNext(io.readline.bits.addr)
356
357  // write data_banks & ecc_banks
358  val sram_waddr = addr_to_dcache_set(io.write.bits.addr)
359  for (bank_index <- 0 until DCacheBanks) {
360    // data write
361    val data_bank = data_banks(bank_index)
362    data_bank.io.w.en := io.write.valid && io.write.bits.wmask(bank_index)
363    data_bank.io.w.way_en := io.write.bits.way_en
364    data_bank.io.w.addr := sram_waddr
365    data_bank.io.w.data := io.write.bits.data(bank_index)
366
367    // ecc write
368    val ecc_bank = ecc_banks(bank_index)
369    ecc_bank.io.w.req.valid := io.write.valid && io.write.bits.wmask(bank_index)
370    ecc_bank.io.w.req.bits.apply(
371      setIdx = sram_waddr,
372      data = getECCFromEncWord(cacheParams.dataCode.encode((io.write.bits.data(bank_index)))),
373      waymask = io.write.bits.way_en
374    )
375    when(ecc_bank.io.w.req.valid) {
376      XSDebug("write in ecc sram: bank %x set %x data %x waymask %x\n",
377        bank_index.U,
378        sram_waddr,
379        getECCFromEncWord(cacheParams.dataCode.encode((io.write.bits.data(bank_index)))),
380        io.write.bits.way_en
381      );
382    }
383  }
384
385  // deal with customized cache op
386  require(nWays <= 32)
387  io.cacheOp.resp.bits := DontCare
388  val cacheOpShouldResp = WireInit(false.B)
389  val eccReadResult = Wire(Vec(DCacheBanks, UInt(eccBits.W)))
390  when(io.cacheOp.req.valid){
391    when (CacheInstrucion.isReadData(io.cacheOp.req.bits.opCode)) {
392      for (bank_index <- 0 until DCacheBanks) {
393        val data_bank = data_banks(bank_index)
394        data_bank.io.r.en := true.B
395        data_bank.io.r.way_en := UIntToOH(io.cacheOp.req.bits.wayNum(4, 0))
396        data_bank.io.r.addr := io.cacheOp.req.bits.index
397      }
398      cacheOpShouldResp := true.B
399    }
400	when (CacheInstrucion.isReadDataECC(io.cacheOp.req.bits.opCode)) {
401      for (bank_index <- 0 until DCacheBanks) {
402        val ecc_bank = ecc_banks(bank_index)
403		ecc_bank.io.r.req.valid := true.B
404		ecc_bank.io.r.req.bits.setIdx := io.cacheOp.req.bits.index
405	  }
406	  cacheOpShouldResp := true.B
407	}
408    when(CacheInstrucion.isWriteData(io.cacheOp.req.bits.opCode)){
409      for (bank_index <- 0 until DCacheBanks) {
410        val data_bank = data_banks(bank_index)
411        data_bank.io.w.en := true.B
412        data_bank.io.w.way_en := UIntToOH(io.cacheOp.req.bits.wayNum(4, 0))
413        data_bank.io.w.addr := io.cacheOp.req.bits.index
414        data_bank.io.w.data := io.cacheOp.req.bits.write_data_vec(bank_index)
415      }
416      cacheOpShouldResp := true.B
417    }
418    when(CacheInstrucion.isWriteDataECC(io.cacheOp.req.bits.opCode)){
419      for (bank_index <- 0 until DCacheBanks) {
420        val ecc_bank = ecc_banks(bank_index)
421        ecc_bank.io.w.req.valid := true.B
422        ecc_bank.io.w.req.bits.apply(
423          setIdx = io.cacheOp.req.bits.index,
424          data = io.cacheOp.req.bits.write_data_ecc,
425          waymask = UIntToOH(io.cacheOp.req.bits.wayNum(4, 0))
426        )
427      }
428      cacheOpShouldResp := true.B
429    }
430  }
431  io.cacheOp.resp.valid := RegNext(io.cacheOp.req.valid && cacheOpShouldResp)
432  for (bank_index <- 0 until DCacheBanks) {
433    io.cacheOp.resp.bits.read_data_vec(bank_index) := bank_result(bank_index).raw_data
434	eccReadResult(bank_index) := ecc_banks(bank_index).io.r.resp.data(RegNext(io.cacheOp.req.bits.wayNum(4, 0)))
435  }
436  io.cacheOp.resp.bits.read_data_ecc := Mux(io.cacheOp.resp.valid,
437    eccReadResult(RegNext(io.cacheOp.req.bits.bank_num)),
438    0.U
439  )
440}
441