xref: /XiangShan/src/main/scala/xiangshan/mem/lsqueue/LoadQueueData.scala (revision 0a47e4a170d522db16fdff6b3d3d33297f714ba5)
1/***************************************************************************************
2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3* Copyright (c) 2020-2021 Peng Cheng Laboratory
4*
5* XiangShan is licensed under Mulan PSL v2.
6* You can use this software according to the terms and conditions of the Mulan PSL v2.
7* You may obtain a copy of Mulan PSL v2 at:
8*          http://license.coscl.org.cn/MulanPSL2
9*
10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13*
14* See the Mulan PSL v2 for more details.
15***************************************************************************************/
16
17package xiangshan.mem
18
19import chipsalliance.rocketchip.config.Parameters
20import chisel3._
21import chisel3.util._
22import utils._
23import xiangshan._
24import xiangshan.cache._
25import xiangshan.cache.{DCacheWordIO, DCacheLineIO, MemoryOpConstants}
26import xiangshan.mem._
27import xiangshan.backend.rob.RobPtr
28
29class LQDataEntryWoPaddr(implicit p: Parameters) extends XSBundle {
30  val mask = UInt(8.W)
31  val data = UInt(XLEN.W)
32  val fwdMask = Vec(8, Bool())
33}
34
35class LQDataEntry(implicit p: Parameters) extends LQDataEntryWoPaddr {
36  val paddr = UInt(PAddrBits.W)
37}
38
39// Data module define
40// These data modules are like SyncDataModuleTemplate, but support cam-like ops
41
42// load queue paddr module
43//
44// It supports 3 cam sources:
45// * st-ld violation addr cam
46// * data release addr cam
47// * data refill addr cam
48class LQPaddrModule(numEntries: Int, numRead: Int, numWrite: Int, numWBanks: Int)(implicit p: Parameters) extends XSModule with HasDCacheParameters {
49  val io = IO(new Bundle {
50    // normal read/write ports
51    val raddr = Input(Vec(numRead, UInt(log2Up(numEntries).W)))
52    val rdata = Output(Vec(numRead, UInt((PAddrBits).W)))
53    val wen   = Input(Vec(numWrite, Bool()))
54    val waddr = Input(Vec(numWrite, UInt(log2Up(numEntries).W)))
55    val wdata = Input(Vec(numWrite, UInt((PAddrBits).W)))
56    // violation cam: hit if addr is in the same word
57    val violationMdata = Input(Vec(StorePipelineWidth, UInt((PAddrBits).W))) // addr
58    val violationMmask = Output(Vec(StorePipelineWidth, Vec(numEntries, Bool()))) // cam result mask
59    // release cam: hit if addr is in the same cacheline
60    val releaseMdata = Input(Vec(LoadPipelineWidth, UInt((PAddrBits).W)))
61    val releaseMmask = Output(Vec(LoadPipelineWidth, Vec(numEntries, Bool())))
62    // refill cam: hit if addr is in the same cacheline
63    val refillMdata = Input(UInt((PAddrBits).W))
64    val refillMmask = Output(Vec(numEntries, Bool()))
65  })
66
67  require(isPow2(numWBanks))
68  require(numWBanks >= 2)
69
70  val numEntryPerBank = numEntries / numWBanks
71
72  val data = Reg(Vec(numEntries, UInt((PAddrBits).W)))
73
74  // read ports
75  for (i <- 0 until numRead) {
76    io.rdata(i) := data(RegNext(io.raddr(i)))
77  }
78
79  // write ports
80  val waddr_dec = io.waddr.map(a => UIntToOH(a))
81  def selectBankMask(in: UInt, bank: Int): UInt = {
82    in((bank + 1) * numEntryPerBank - 1, bank * numEntryPerBank)
83  }
84  for (bank <- 0 until numWBanks) {
85    // write ports
86    // s0: write to bank level buffer
87    val s0_bank_waddr_dec = waddr_dec.map(a => selectBankMask(a, bank))
88    val s0_bank_write_en = io.wen.zip(s0_bank_waddr_dec).map(w => w._1 && w._2.orR)
89    s0_bank_waddr_dec.zipWithIndex.map(a =>
90      a._1.suggestName("s0_bank_waddr_dec" + bank + "_" + a._2)
91    )
92    s0_bank_write_en.zipWithIndex.map(a =>
93      a._1.suggestName("s0_bank_write_en" + bank + "_" + a._2)
94    )
95    // s1: write data to entries
96    val s1_bank_waddr_dec = s0_bank_waddr_dec.zip(s0_bank_write_en).map(w => RegEnable(w._1, w._2))
97    val s1_bank_wen = RegNext(VecInit(s0_bank_write_en))
98    val s1_wdata = io.wdata.zip(s0_bank_write_en).map(w => RegEnable(w._1, w._2))
99    s1_bank_waddr_dec.zipWithIndex.map(a =>
100      a._1.suggestName("s1_bank_waddr_dec" + bank + "_" + a._2)
101    )
102    s1_bank_wen.zipWithIndex.map(a =>
103      a._1.suggestName("s1_bank_wen" + bank + "_" + a._2)
104    )
105    s1_wdata.zipWithIndex.map(a =>
106      a._1.suggestName("s1_wdata" + bank + "_" + a._2)
107    )
108
109    // entry write
110    for (entry <- 0 until numEntryPerBank) {
111      // write ports
112      val s1_entry_write_en_vec = s1_bank_wen.zip(s1_bank_waddr_dec).map(w => w._1 && w._2(entry))
113      val s1_entry_write_en = VecInit(s1_entry_write_en_vec).asUInt.orR
114      val s1_entry_write_data = Mux1H(s1_entry_write_en_vec, s1_wdata)
115      when (s1_entry_write_en) {
116        data(bank * numEntryPerBank + entry) := s1_entry_write_data
117      }
118      s1_entry_write_en_vec.zipWithIndex.map(a =>
119        a._1.suggestName("s1_entry_write_en_vec" + bank + "_" + entry + "_" + a._2)
120      )
121      s1_entry_write_en.suggestName("s1_entry_write_en" + bank + "_" + entry)
122      s1_entry_write_data.suggestName("s1_entry_write_data" + bank + "_" + entry)
123    }
124  }
125
126  // content addressed match
127  for (i <- 0 until StorePipelineWidth) {
128    for (j <- 0 until numEntries) {
129      io.violationMmask(i)(j) := io.violationMdata(i)(PAddrBits-1, DCacheWordOffset) === data(j)(PAddrBits-1, DCacheWordOffset)
130    }
131  }
132  for (i <- 0 until LoadPipelineWidth) {
133    for (j <- 0 until numEntries) {
134      io.releaseMmask(i)(j) := io.releaseMdata(i)(PAddrBits-1, DCacheLineOffset) === data(j)(PAddrBits-1, DCacheLineOffset)
135    }
136  }
137
138  for (j <- 0 until numEntries) {
139    io.refillMmask(j) := get_refill_addr(io.refillMdata) === get_refill_addr(data(j))
140  }
141
142  // DataModuleTemplate should not be used when there're any write conflicts
143  for (i <- 0 until numWrite) {
144    for (j <- i+1 until numWrite) {
145      assert(!(io.wen(i) && io.wen(j) && io.waddr(i) === io.waddr(j)))
146    }
147  }
148}
149
150// load queue load mask module
151class LQMaskModule(numEntries: Int, numRead: Int, numWrite: Int)(implicit p: Parameters) extends XSModule {
152  val io = IO(new Bundle {
153    val raddr = Input(Vec(numRead, UInt(log2Up(numEntries).W)))
154    val rdata = Output(Vec(numRead, UInt(8.W)))
155    val wen   = Input(Vec(numWrite, Bool()))
156    val waddr = Input(Vec(numWrite, UInt(log2Up(numEntries).W)))
157    val wdata = Input(Vec(numWrite, UInt(8.W)))
158    // st-ld violation check wmask compare
159    val violationMdata = Input(Vec(StorePipelineWidth, UInt(8.W))) // input 8-bit wmask
160    val violationMmask = Output(Vec(StorePipelineWidth, Vec(numEntries, Bool()))) // output wmask overlap vector
161  })
162
163  val data = Reg(Vec(numEntries, UInt(8.W)))
164
165  // read ports
166  for (i <- 0 until numRead) {
167    io.rdata(i) := data(RegNext(io.raddr(i)))
168  }
169
170  // write ports
171  val waddr_dec = io.waddr.map(a => UIntToOH(a))
172  for (j <- 0 until numEntries) {
173    val write_wen = io.wen.zip(waddr_dec).map(w => w._1 && w._2(j))
174    when (VecInit(write_wen).asUInt.orR) {
175      data(j) := Mux1H(write_wen, io.wdata)
176    }
177  }
178
179  // st-ld violation check wmask compare
180  for (i <- 0 until StorePipelineWidth) {
181    for (j <- 0 until numEntries) {
182      io.violationMmask(i)(j) := (io.violationMdata(i) & data(j)).orR
183    }
184  }
185
186  // DataModuleTemplate should not be used when there're any write conflicts
187  for (i <- 0 until numWrite) {
188    for (j <- i+1 until numWrite) {
189      assert(!(io.wen(i) && io.wen(j) && io.waddr(i) === io.waddr(j)))
190    }
191  }
192}
193
194// SQDataModule is a wrapper of 8 bit MaskedSyncDataModuleTemplates
195//
196// It also contains:
197// * fwdMask, which is used to merge refill data and forwarded data
198// * word index extracted from paddr, which is used to select data from refill data (a cacheline)
199class LQDataModule(numEntries: Int, numRead: Int, numWrite: Int)(implicit p: Parameters) extends XSModule with HasDCacheParameters {
200  val io = IO(new Bundle {
201    // sync read
202    val raddr = Input(Vec(numRead, UInt(log2Up(numEntries).W)))
203    val rdata = Output(Vec(numRead, UInt(XLEN.W)))
204
205    // address indexed write
206    val wen   = Input(Vec(numWrite, Bool()))
207    val waddr = Input(Vec(numWrite, UInt(log2Up(numEntries).W)))
208    val wdata = Input(Vec(numWrite, UInt(XLEN.W)))
209    // forward mask needs to be recorded to merge data
210    val fwdMaskWdata = Input(Vec(numWrite, UInt(8.W)))
211    // refillOffBits - wordOffBits bits in paddr need to be stored in LQDataModule for refilling
212    val paddrWdata = Input(Vec(numWrite, UInt((PAddrBits).W)))
213
214    // masked write
215    val mwmask = Input(Vec(numEntries, Bool()))
216    val refillData = Input(UInt(l1BusDataWidth.W))
217  })
218
219  val data8 = Seq.fill(8)(Module(new MaskedBankedSyncDataModuleTemplate(
220    UInt(8.W), numEntries, numRead, numWrite, numMWrite = refillWords, numWBanks = LoadQueueNWriteBanks
221  )))
222  val fwdMask = Reg(Vec(numEntries, UInt(8.W)))
223  val wordIndex = Reg(Vec(numEntries, UInt((refillOffBits - wordOffBits).W)))
224
225  // read ports
226  for (i <- 0 until numRead) {
227    for (j <- 0 until 8) {
228      data8(j).io.raddr(i) := io.raddr(i)
229    }
230    io.rdata(i) := VecInit((0 until 8).map(j => data8(j).io.rdata(i))).asUInt
231  }
232
233  // below is the write ports (with priorities)
234  for (i <- 0 until numWrite) {
235    // write to data8
236    for (j <- 0 until 8) {
237      data8(j).io.waddr(i) := io.waddr(i)
238      data8(j).io.wdata(i) := io.wdata(i)(8*(j+1)-1, 8*j)
239      data8(j).io.wen(i) := io.wen(i)
240    }
241
242    // write ctrl info
243    // TODO: optimize that
244    when (io.wen(i)) {
245      fwdMask(io.waddr(i)) := io.fwdMaskWdata(i)
246    }
247    when (io.wen(i)) {
248      wordIndex(io.waddr(i)) := get_word(io.paddrWdata(i))
249    }
250  }
251
252  // write refilled data to data8
253
254  // select refill data
255  // split dcache result into words
256  val words = VecInit((0 until refillWords) map { i => io.refillData(DataBits * (i + 1) - 1, DataBits * i)})
257  // select refill data according to wordIndex (paddr)
258  for (i <- 0 until 8) {
259    for (j <- 0 until refillWords) {
260      data8(i).io.mwdata(j) := words(j)(8*(i+1)-1, 8*i)
261    }
262  }
263
264  // gen refill wmask
265  for (j <- 0 until refillWords) {
266    for (k <- 0 until numEntries) {
267      val wordMatch = wordIndex(k) === j.U
268      for (i <- 0 until 8) {
269        data8(i).io.mwmask(j)(k) := wordMatch && io.mwmask(k) && !fwdMask(k)(i)
270      }
271    }
272  }
273
274  // DataModuleTemplate should not be used when there're any write conflicts
275  for (i <- 0 until numWrite) {
276    for (j <- i+1 until numWrite) {
277      assert(!(io.wen(i) && io.wen(j) && io.waddr(i) === io.waddr(j)))
278    }
279  }
280}
281
282// LoadQueueDataWrapper wraps:
283// * load queue paddrModule
284// * load queue maskModule
285// * load queue dataModule
286// and their interconnect
287class LoadQueueDataWrapper(size: Int, wbNumRead: Int, wbNumWrite: Int)(implicit p: Parameters) extends XSModule with HasDCacheParameters with HasCircularQueuePtrHelper {
288  val io = IO(new Bundle() {
289    val paddr = new Bundle() {
290      val wen = Vec(wbNumWrite, Input(Bool()))
291      val waddr = Input(Vec(wbNumWrite, UInt(log2Up(size).W)))
292      val wdata = Input(Vec(wbNumWrite, UInt(PAddrBits.W)))
293    }
294    val wb = new Bundle() {
295      val wen = Vec(wbNumWrite, Input(Bool()))
296      val waddr = Input(Vec(wbNumWrite, UInt(log2Up(size).W)))
297      val wdata = Input(Vec(wbNumWrite, new LQDataEntry))
298      val raddr = Input(Vec(wbNumRead, UInt(log2Up(size).W)))
299      val rdata = Output(Vec(wbNumRead, new LQDataEntry))
300    }
301    val uncache = new Bundle() {
302      val wen = Input(Bool())
303      val waddr = Input(UInt(log2Up(size).W))
304      val wdata = Input(UInt(XLEN.W)) // only write back uncache data
305      val raddr = Input(UInt(log2Up(size).W))
306      val rdata = Output(new LQDataEntry)
307    }
308    val refill = new Bundle() {
309      val valid = Input(Bool())
310      val paddr = Input(UInt(PAddrBits.W))
311      val data = Input(UInt(l1BusDataWidth.W))
312      val refillMask = Input(Vec(size, Bool()))
313      val matchMask = Output(Vec(size, Bool()))
314    }
315    // st-ld violation query, word level cam
316    val violation = Vec(StorePipelineWidth, new Bundle() {
317      val paddr = Input(UInt(PAddrBits.W))
318      val mask = Input(UInt(8.W))
319      val violationMask = Output(Vec(size, Bool()))
320    })
321    // ld-ld violation query, cache line level cam
322    val release_violation = Vec(LoadPipelineWidth, new Bundle() {
323      val paddr = Input(UInt(PAddrBits.W))
324      val match_mask = Output(Vec(size, Bool()))
325      // if ld-ld violation does happened, we replay from the elder load
326    })
327    val debug = Output(Vec(size, new LQDataEntry))
328
329    def wbWrite(channel: Int, waddr: UInt, wdata: LQDataEntry): Unit = {
330      require(channel < wbNumWrite && wbNumWrite >= 0)
331      // need extra "this.wb(channel).wen := true.B"
332      this.wb.waddr(channel) := waddr
333      this.wb.wdata(channel) := wdata
334    }
335
336    def uncacheWrite(waddr: UInt, wdata: UInt): Unit = {
337      // need extra "this.uncache.wen := true.B"
338      this.uncache.waddr := waddr
339      this.uncache.wdata := wdata
340    }
341  })
342
343  // data module
344  val paddrModule = Module(new LQPaddrModule(size, numRead = LoadPipelineWidth+1, numWrite = LoadPipelineWidth, numWBanks = LoadQueueNWriteBanks))
345  val maskModule = Module(new LQMaskModule(size, numRead = LoadPipelineWidth+1, numWrite = LoadPipelineWidth))
346  val dataModule = Module(new LQDataModule(size, numRead = LoadPipelineWidth+1, numWrite = LoadPipelineWidth+1))
347
348  // read data
349  // read port 0 -> wbNumRead-1
350  (0 until wbNumRead).map(i => {
351    paddrModule.io.raddr(i) := io.wb.raddr(i)
352    maskModule.io.raddr(i) := io.wb.raddr(i)
353    dataModule.io.raddr(i) := io.wb.raddr(i)
354
355    io.wb.rdata(i).paddr := paddrModule.io.rdata(i)
356    io.wb.rdata(i).mask := maskModule.io.rdata(i)
357    io.wb.rdata(i).data := dataModule.io.rdata(i)
358    io.wb.rdata(i).fwdMask := DontCare
359  })
360
361  // read port wbNumRead
362  paddrModule.io.raddr(wbNumRead) := io.uncache.raddr
363  maskModule.io.raddr(wbNumRead) := io.uncache.raddr
364  dataModule.io.raddr(wbNumRead) := io.uncache.raddr
365
366  io.uncache.rdata.paddr := paddrModule.io.rdata(wbNumRead)
367  io.uncache.rdata.mask := maskModule.io.rdata(wbNumRead)
368  io.uncache.rdata.data := dataModule.io.rdata(wbNumRead)
369  io.uncache.rdata.fwdMask := DontCare
370
371  // write data
372  // write port 0 -> wbNumWrite-1
373  (0 until wbNumWrite).map(i => {
374    paddrModule.io.wen(i) := false.B
375    maskModule.io.wen(i) := false.B
376    dataModule.io.wen(i) := false.B
377
378    maskModule.io.waddr(i) := io.wb.waddr(i)
379    dataModule.io.waddr(i) := io.wb.waddr(i)
380
381    maskModule.io.wdata(i) := io.wb.wdata(i).mask
382    dataModule.io.wdata(i) := io.wb.wdata(i).data
383    dataModule.io.fwdMaskWdata(i) := io.wb.wdata(i).fwdMask.asUInt
384    dataModule.io.paddrWdata(i) := io.wb.wdata(i).paddr
385
386    when(io.wb.wen(i)){
387      maskModule.io.wen(i) := true.B
388      dataModule.io.wen(i) := true.B
389    }
390
391    paddrModule.io.wen(i) := io.paddr.wen(i)
392    paddrModule.io.waddr(i) := io.paddr.waddr(i)
393    paddrModule.io.wdata(i) := io.paddr.wdata(i)
394  })
395
396  // write port wbNumWrite
397  dataModule.io.wen(wbNumWrite) := io.uncache.wen
398  // dataModule.io.fwdMaskWen(wbNumWrite) := false.B
399  // dataModule.io.paddrWen(wbNumWrite) := false.B
400
401  dataModule.io.waddr(wbNumWrite) := io.uncache.waddr
402
403  dataModule.io.fwdMaskWdata(wbNumWrite) := DontCare
404  dataModule.io.paddrWdata(wbNumWrite) := DontCare
405  dataModule.io.wdata(wbNumWrite) := io.uncache.wdata
406
407  // st-ld mem access violation check, gen violationMask
408  (0 until StorePipelineWidth).map(i => {
409    paddrModule.io.violationMdata(i) := io.violation(i).paddr
410    maskModule.io.violationMdata(i) := io.violation(i).mask
411    io.violation(i).violationMask := (paddrModule.io.violationMmask(i).asUInt & maskModule.io.violationMmask(i).asUInt).asBools
412  })
413
414  // ld-ld mem access violation check, gen violationMask (cam match mask)
415  (0 until LoadPipelineWidth).map(i => {
416    paddrModule.io.releaseMdata(i) := io.release_violation(i).paddr
417    io.release_violation(i).match_mask := paddrModule.io.releaseMmask(i)
418  })
419
420  // gen paddr match mask
421  paddrModule.io.refillMdata := io.refill.paddr
422  (0 until size).map(i => {
423    io.refill.matchMask := paddrModule.io.refillMmask
424    // io.refill.matchMask(i) := get_block_addr(data(i).paddr) === get_block_addr(io.refill.paddr)
425  })
426
427  // refill data according to matchMask, refillMask and refill.valid
428  dataModule.io.refillData := io.refill.data
429  (0 until size).map(i => {
430    dataModule.io.mwmask(i) := io.refill.valid && io.refill.matchMask(i) && io.refill.refillMask(i)
431  })
432
433  // debug data read
434  io.debug := DontCare
435}
436