xref: /XiangShan/src/main/scala/xiangshan/mem/lsqueue/LoadQueueData.scala (revision 3c02ee8f82edea481fa8336c7f54ffc17fafba91)
1/***************************************************************************************
2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3* Copyright (c) 2020-2021 Peng Cheng Laboratory
4*
5* XiangShan is licensed under Mulan PSL v2.
6* You can use this software according to the terms and conditions of the Mulan PSL v2.
7* You may obtain a copy of Mulan PSL v2 at:
8*          http://license.coscl.org.cn/MulanPSL2
9*
10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13*
14* See the Mulan PSL v2 for more details.
15***************************************************************************************/
16
17package xiangshan.mem
18
19import chipsalliance.rocketchip.config.Parameters
20import chisel3._
21import chisel3.util._
22import utils._
23import utility._
24import xiangshan._
25import xiangshan.cache._
26import xiangshan.cache.{DCacheWordIO, DCacheLineIO, MemoryOpConstants}
27import xiangshan.mem._
28import xiangshan.backend.rob.RobPtr
29
30class LQDataEntryWoPaddr(implicit p: Parameters) extends XSBundle {
31  val mask = UInt(8.W)
32  val data = UInt(XLEN.W)
33  val fwdMask = Vec(8, Bool())
34}
35
36class LQDataEntry(implicit p: Parameters) extends LQDataEntryWoPaddr {
37  val paddr = UInt(PAddrBits.W)
38}
39
40// Data module define
41// These data modules are like SyncDataModuleTemplate, but support cam-like ops
42
43// load queue paddr module
44//
45// It supports 3 cam sources:
46// * st-ld violation addr cam
47// * data release addr cam
48// * data refill addr cam
49class LQPaddrModule(numEntries: Int, numRead: Int, numWrite: Int, numWBanks: Int)(implicit p: Parameters) extends XSModule with HasDCacheParameters {
50  val io = IO(new Bundle {
51    // normal read/write ports
52    val raddr = Input(Vec(numRead, UInt(log2Up(numEntries).W)))
53    val rdata = Output(Vec(numRead, UInt((PAddrBits).W)))
54    val wen   = Input(Vec(numWrite, Bool()))
55    val waddr = Input(Vec(numWrite, UInt(log2Up(numEntries).W)))
56    val wdata = Input(Vec(numWrite, UInt((PAddrBits).W)))
57    // violation cam: hit if addr is in the same word
58    val violationMdata = Input(Vec(StorePipelineWidth, UInt((PAddrBits).W))) // addr
59    val violationMmask = Output(Vec(StorePipelineWidth, Vec(numEntries, Bool()))) // cam result mask
60    // release cam: hit if addr is in the same cacheline
61    val releaseMdata = Input(Vec(LoadPipelineWidth, UInt((PAddrBits).W)))
62    val releaseMmask = Output(Vec(LoadPipelineWidth, Vec(numEntries, Bool())))
63    // refill cam: hit if addr is in the same cacheline
64    val refillMdata = Input(UInt((PAddrBits).W))
65    val refillMmask = Output(Vec(numEntries, Bool()))
66  })
67
68  require(isPow2(numWBanks))
69  require(numWBanks >= 2)
70
71  val numEntryPerBank = numEntries / numWBanks
72
73  val data = Reg(Vec(numEntries, UInt((PAddrBits).W)))
74
75  // read ports
76  for (i <- 0 until numRead) {
77    io.rdata(i) := data(RegNext(io.raddr(i)))
78  }
79
80  // write ports
81  val waddr_dec = io.waddr.map(a => UIntToOH(a))
82  def selectBankMask(in: UInt, bank: Int): UInt = {
83    in((bank + 1) * numEntryPerBank - 1, bank * numEntryPerBank)
84  }
85  for (bank <- 0 until numWBanks) {
86    // write ports
87    // s0: write to bank level buffer
88    val s0_bank_waddr_dec = waddr_dec.map(a => selectBankMask(a, bank))
89    val s0_bank_write_en = io.wen.zip(s0_bank_waddr_dec).map(w => w._1 && w._2.orR)
90    s0_bank_waddr_dec.zipWithIndex.map(a =>
91      a._1.suggestName("s0_bank_waddr_dec" + bank + "_" + a._2)
92    )
93    s0_bank_write_en.zipWithIndex.map(a =>
94      a._1.suggestName("s0_bank_write_en" + bank + "_" + a._2)
95    )
96    // s1: write data to entries
97    val s1_bank_waddr_dec = s0_bank_waddr_dec.zip(s0_bank_write_en).map(w => RegEnable(w._1, w._2))
98    val s1_bank_wen = RegNext(VecInit(s0_bank_write_en))
99    val s1_wdata = io.wdata.zip(s0_bank_write_en).map(w => RegEnable(w._1, w._2))
100    s1_bank_waddr_dec.zipWithIndex.map(a =>
101      a._1.suggestName("s1_bank_waddr_dec" + bank + "_" + a._2)
102    )
103    s1_bank_wen.zipWithIndex.map(a =>
104      a._1.suggestName("s1_bank_wen" + bank + "_" + a._2)
105    )
106    s1_wdata.zipWithIndex.map(a =>
107      a._1.suggestName("s1_wdata" + bank + "_" + a._2)
108    )
109
110    // entry write
111    for (entry <- 0 until numEntryPerBank) {
112      // write ports
113      val s1_entry_write_en_vec = s1_bank_wen.zip(s1_bank_waddr_dec).map(w => w._1 && w._2(entry))
114      val s1_entry_write_en = VecInit(s1_entry_write_en_vec).asUInt.orR
115      val s1_entry_write_data = Mux1H(s1_entry_write_en_vec, s1_wdata)
116      when (s1_entry_write_en) {
117        data(bank * numEntryPerBank + entry) := s1_entry_write_data
118      }
119      s1_entry_write_en_vec.zipWithIndex.map(a =>
120        a._1.suggestName("s1_entry_write_en_vec" + bank + "_" + entry + "_" + a._2)
121      )
122      s1_entry_write_en.suggestName("s1_entry_write_en" + bank + "_" + entry)
123      s1_entry_write_data.suggestName("s1_entry_write_data" + bank + "_" + entry)
124    }
125  }
126
127  // content addressed match
128  for (i <- 0 until StorePipelineWidth) {
129    for (j <- 0 until numEntries) {
130      io.violationMmask(i)(j) := io.violationMdata(i)(PAddrBits-1, DCacheWordOffset) === data(j)(PAddrBits-1, DCacheWordOffset)
131    }
132  }
133  for (i <- 0 until LoadPipelineWidth) {
134    for (j <- 0 until numEntries) {
135      io.releaseMmask(i)(j) := io.releaseMdata(i)(PAddrBits-1, DCacheLineOffset) === data(j)(PAddrBits-1, DCacheLineOffset)
136    }
137  }
138
139  for (j <- 0 until numEntries) {
140    io.refillMmask(j) := get_refill_addr(io.refillMdata) === get_refill_addr(data(j))
141  }
142
143  // DataModuleTemplate should not be used when there're any write conflicts
144  for (i <- 0 until numWrite) {
145    for (j <- i+1 until numWrite) {
146      assert(!(io.wen(i) && io.wen(j) && io.waddr(i) === io.waddr(j)))
147    }
148  }
149}
150
151// load queue load mask module
152class LQMaskModule(numEntries: Int, numRead: Int, numWrite: Int)(implicit p: Parameters) extends XSModule {
153  val io = IO(new Bundle {
154    val raddr = Input(Vec(numRead, UInt(log2Up(numEntries).W)))
155    val rdata = Output(Vec(numRead, UInt(8.W)))
156    val wen   = Input(Vec(numWrite, Bool()))
157    val waddr = Input(Vec(numWrite, UInt(log2Up(numEntries).W)))
158    val wdata = Input(Vec(numWrite, UInt(8.W)))
159    // st-ld violation check wmask compare
160    val violationMdata = Input(Vec(StorePipelineWidth, UInt(8.W))) // input 8-bit wmask
161    val violationMmask = Output(Vec(StorePipelineWidth, Vec(numEntries, Bool()))) // output wmask overlap vector
162  })
163
164  val data = Reg(Vec(numEntries, UInt(8.W)))
165
166  // read ports
167  for (i <- 0 until numRead) {
168    io.rdata(i) := data(RegNext(io.raddr(i)))
169  }
170
171  // write ports
172  val waddr_dec = io.waddr.map(a => UIntToOH(a))
173  for (j <- 0 until numEntries) {
174    val write_wen = io.wen.zip(waddr_dec).map(w => w._1 && w._2(j))
175    when (VecInit(write_wen).asUInt.orR) {
176      data(j) := Mux1H(write_wen, io.wdata)
177    }
178  }
179
180  // st-ld violation check wmask compare
181  for (i <- 0 until StorePipelineWidth) {
182    for (j <- 0 until numEntries) {
183      io.violationMmask(i)(j) := (io.violationMdata(i) & data(j)).orR
184    }
185  }
186
187  // DataModuleTemplate should not be used when there're any write conflicts
188  for (i <- 0 until numWrite) {
189    for (j <- i+1 until numWrite) {
190      assert(!(io.wen(i) && io.wen(j) && io.waddr(i) === io.waddr(j)))
191    }
192  }
193}
194
195// SQDataModule is a wrapper of 8 bit MaskedSyncDataModuleTemplates
196//
197// It also contains:
198// * fwdMask, which is used to merge refill data and forwarded data
199// * word index extracted from paddr, which is used to select data from refill data (a cacheline)
200class LQDataModule(numEntries: Int, numRead: Int, numWrite: Int)(implicit p: Parameters) extends XSModule with HasDCacheParameters {
201  val io = IO(new Bundle {
202    // sync read
203    val raddr = Input(Vec(numRead, UInt(log2Up(numEntries).W)))
204    val rdata = Output(Vec(numRead, UInt(XLEN.W)))
205
206    // address indexed write
207    val wen   = Input(Vec(numWrite, Bool()))
208    val waddr = Input(Vec(numWrite, UInt(log2Up(numEntries).W)))
209    val wdata = Input(Vec(numWrite, UInt(XLEN.W)))
210    // forward mask needs to be recorded to merge data
211    val fwdMaskWdata = Input(Vec(numWrite, UInt(8.W)))
212    // refillOffBits - wordOffBits bits in paddr need to be stored in LQDataModule for refilling
213    val paddrWdata = Input(Vec(numWrite, UInt((PAddrBits).W)))
214
215    // masked write
216    val mwmask = Input(Vec(numEntries, Bool()))
217    val refillData = Input(UInt(l1BusDataWidth.W))
218  })
219
220  val data8 = Seq.fill(8)(Module(new MaskedBankedSyncDataModuleTemplate(
221    UInt(8.W), numEntries, numRead, numWrite, numMWrite = refillWords, numWBanks = LoadQueueNWriteBanks
222  )))
223  val fwdMask = Reg(Vec(numEntries, UInt(8.W)))
224  val wordIndex = Reg(Vec(numEntries, UInt((refillOffBits - wordOffBits).W)))
225
226  // read ports
227  for (i <- 0 until numRead) {
228    for (j <- 0 until 8) {
229      data8(j).io.raddr(i) := io.raddr(i)
230    }
231    io.rdata(i) := VecInit((0 until 8).map(j => data8(j).io.rdata(i))).asUInt
232  }
233
234  // below is the write ports (with priorities)
235  for (i <- 0 until numWrite) {
236    // write to data8
237    for (j <- 0 until 8) {
238      data8(j).io.waddr(i) := io.waddr(i)
239      data8(j).io.wdata(i) := io.wdata(i)(8*(j+1)-1, 8*j)
240      data8(j).io.wen(i) := io.wen(i)
241    }
242
243    // write ctrl info
244    // TODO: optimize that
245    when (io.wen(i)) {
246      fwdMask(io.waddr(i)) := io.fwdMaskWdata(i)
247    }
248    when (io.wen(i)) {
249      wordIndex(io.waddr(i)) := get_word(io.paddrWdata(i))
250    }
251  }
252
253  // write refilled data to data8
254
255  // select refill data
256  // split dcache result into words
257  val words = VecInit((0 until refillWords) map { i => io.refillData(DataBits * (i + 1) - 1, DataBits * i)})
258  // select refill data according to wordIndex (paddr)
259  for (i <- 0 until 8) {
260    for (j <- 0 until refillWords) {
261      data8(i).io.mwdata(j) := words(j)(8*(i+1)-1, 8*i)
262    }
263  }
264
265  // gen refill wmask
266  for (j <- 0 until refillWords) {
267    for (k <- 0 until numEntries) {
268      val wordMatch = wordIndex(k) === j.U
269      for (i <- 0 until 8) {
270        data8(i).io.mwmask(j)(k) := wordMatch && io.mwmask(k) && !fwdMask(k)(i)
271      }
272    }
273  }
274
275  // DataModuleTemplate should not be used when there're any write conflicts
276  for (i <- 0 until numWrite) {
277    for (j <- i+1 until numWrite) {
278      assert(!(io.wen(i) && io.wen(j) && io.waddr(i) === io.waddr(j)))
279    }
280  }
281}
282
283// LoadQueueDataWrapper wraps:
284// * load queue paddrModule
285// * load queue maskModule
286// * load queue dataModule
287// and their interconnect
288class LoadQueueDataWrapper(size: Int, wbNumRead: Int, wbNumWrite: Int)(implicit p: Parameters) extends XSModule with HasDCacheParameters with HasCircularQueuePtrHelper {
289  val io = IO(new Bundle() {
290    val paddr = new Bundle() {
291      val wen = Vec(wbNumWrite, Input(Bool()))
292      val waddr = Input(Vec(wbNumWrite, UInt(log2Up(size).W)))
293      val wdata = Input(Vec(wbNumWrite, UInt(PAddrBits.W)))
294    }
295    val wb = new Bundle() {
296      val wen = Vec(wbNumWrite, Input(Bool()))
297      val waddr = Input(Vec(wbNumWrite, UInt(log2Up(size).W)))
298      val wdata = Input(Vec(wbNumWrite, new LQDataEntry))
299      val raddr = Input(Vec(wbNumRead, UInt(log2Up(size).W)))
300      val rdata = Output(Vec(wbNumRead, new LQDataEntry))
301    }
302    val uncache = new Bundle() {
303      val wen = Input(Bool())
304      val waddr = Input(UInt(log2Up(size).W))
305      val wdata = Input(UInt(XLEN.W)) // only write back uncache data
306      val raddr = Input(UInt(log2Up(size).W))
307      val rdata = Output(new LQDataEntry)
308    }
309    val refill = new Bundle() {
310      val valid = Input(Bool())
311      val paddr = Input(UInt(PAddrBits.W))
312      val data = Input(UInt(l1BusDataWidth.W))
313      val refillMask = Input(Vec(size, Bool()))
314      val matchMask = Output(Vec(size, Bool()))
315    }
316    // st-ld violation query, word level cam
317    val violation = Vec(StorePipelineWidth, new Bundle() {
318      val paddr = Input(UInt(PAddrBits.W))
319      val mask = Input(UInt(8.W))
320      val violationMask = Output(Vec(size, Bool()))
321    })
322    // ld-ld violation query, cache line level cam
323    val release_violation = Vec(LoadPipelineWidth, new Bundle() {
324      val paddr = Input(UInt(PAddrBits.W))
325      val match_mask = Output(Vec(size, Bool()))
326      // if ld-ld violation does happened, we replay from the elder load
327    })
328    val debug = Output(Vec(size, new LQDataEntry))
329
330    def wbWrite(channel: Int, waddr: UInt, wdata: LQDataEntry): Unit = {
331      require(channel < wbNumWrite && wbNumWrite >= 0)
332      // need extra "this.wb(channel).wen := true.B"
333      this.wb.waddr(channel) := waddr
334      this.wb.wdata(channel) := wdata
335    }
336
337    def uncacheWrite(waddr: UInt, wdata: UInt): Unit = {
338      // need extra "this.uncache.wen := true.B"
339      this.uncache.waddr := waddr
340      this.uncache.wdata := wdata
341    }
342  })
343
344  // data module
345  val paddrModule = Module(new LQPaddrModule(size, numRead = LoadPipelineWidth+1, numWrite = LoadPipelineWidth, numWBanks = LoadQueueNWriteBanks))
346  val maskModule = Module(new LQMaskModule(size, numRead = LoadPipelineWidth+1, numWrite = LoadPipelineWidth))
347  val dataModule = Module(new LQDataModule(size, numRead = LoadPipelineWidth+1, numWrite = LoadPipelineWidth+1))
348
349  // read data
350  // read port 0 -> wbNumRead-1
351  (0 until wbNumRead).map(i => {
352    paddrModule.io.raddr(i) := io.wb.raddr(i)
353    maskModule.io.raddr(i) := io.wb.raddr(i)
354    dataModule.io.raddr(i) := io.wb.raddr(i)
355
356    io.wb.rdata(i).paddr := paddrModule.io.rdata(i)
357    io.wb.rdata(i).mask := maskModule.io.rdata(i)
358    io.wb.rdata(i).data := dataModule.io.rdata(i)
359    io.wb.rdata(i).fwdMask := DontCare
360  })
361
362  // read port wbNumRead
363  paddrModule.io.raddr(wbNumRead) := io.uncache.raddr
364  maskModule.io.raddr(wbNumRead) := io.uncache.raddr
365  dataModule.io.raddr(wbNumRead) := io.uncache.raddr
366
367  io.uncache.rdata.paddr := paddrModule.io.rdata(wbNumRead)
368  io.uncache.rdata.mask := maskModule.io.rdata(wbNumRead)
369  io.uncache.rdata.data := dataModule.io.rdata(wbNumRead)
370  io.uncache.rdata.fwdMask := DontCare
371
372  // write data
373  // write port 0 -> wbNumWrite-1
374  (0 until wbNumWrite).map(i => {
375    paddrModule.io.wen(i) := false.B
376    maskModule.io.wen(i) := false.B
377    dataModule.io.wen(i) := false.B
378
379    maskModule.io.waddr(i) := io.wb.waddr(i)
380    dataModule.io.waddr(i) := io.wb.waddr(i)
381
382    maskModule.io.wdata(i) := io.wb.wdata(i).mask
383    dataModule.io.wdata(i) := io.wb.wdata(i).data
384    dataModule.io.fwdMaskWdata(i) := io.wb.wdata(i).fwdMask.asUInt
385    dataModule.io.paddrWdata(i) := io.wb.wdata(i).paddr
386
387    when(io.wb.wen(i)){
388      maskModule.io.wen(i) := true.B
389      dataModule.io.wen(i) := true.B
390    }
391
392    paddrModule.io.wen(i) := io.paddr.wen(i)
393    paddrModule.io.waddr(i) := io.paddr.waddr(i)
394    paddrModule.io.wdata(i) := io.paddr.wdata(i)
395  })
396
397  // write port wbNumWrite
398  dataModule.io.wen(wbNumWrite) := io.uncache.wen
399  // dataModule.io.fwdMaskWen(wbNumWrite) := false.B
400  // dataModule.io.paddrWen(wbNumWrite) := false.B
401
402  dataModule.io.waddr(wbNumWrite) := io.uncache.waddr
403
404  dataModule.io.fwdMaskWdata(wbNumWrite) := DontCare
405  dataModule.io.paddrWdata(wbNumWrite) := DontCare
406  dataModule.io.wdata(wbNumWrite) := io.uncache.wdata
407
408  // st-ld mem access violation check, gen violationMask
409  (0 until StorePipelineWidth).map(i => {
410    paddrModule.io.violationMdata(i) := io.violation(i).paddr
411    maskModule.io.violationMdata(i) := io.violation(i).mask
412    io.violation(i).violationMask := (paddrModule.io.violationMmask(i).asUInt & maskModule.io.violationMmask(i).asUInt).asBools
413  })
414
415  // ld-ld mem access violation check, gen violationMask (cam match mask)
416  (0 until LoadPipelineWidth).map(i => {
417    paddrModule.io.releaseMdata(i) := io.release_violation(i).paddr
418    io.release_violation(i).match_mask := paddrModule.io.releaseMmask(i)
419  })
420
421  // gen paddr match mask
422  paddrModule.io.refillMdata := io.refill.paddr
423  (0 until size).map(i => {
424    io.refill.matchMask := paddrModule.io.refillMmask
425    // io.refill.matchMask(i) := get_block_addr(data(i).paddr) === get_block_addr(io.refill.paddr)
426  })
427
428  // refill data according to matchMask, refillMask and refill.valid
429  dataModule.io.refillData := io.refill.data
430  (0 until size).map(i => {
431    dataModule.io.mwmask(i) := io.refill.valid && io.refill.matchMask(i) && io.refill.refillMask(i)
432  })
433
434  // debug data read
435  io.debug := DontCare
436}
437