xref: /XiangShan/src/main/scala/xiangshan/mem/lsqueue/StoreQueueData.scala (revision 39f2ec76d83f983643468ae98702c27ff06db684)
1/***************************************************************************************
2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3* Copyright (c) 2020-2021 Peng Cheng Laboratory
4*
5* XiangShan is licensed under Mulan PSL v2.
6* You can use this software according to the terms and conditions of the Mulan PSL v2.
7* You may obtain a copy of Mulan PSL v2 at:
8*          http://license.coscl.org.cn/MulanPSL2
9*
10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13*
14* See the Mulan PSL v2 for more details.
15***************************************************************************************/
16
17package xiangshan.mem
18
19import chipsalliance.rocketchip.config.Parameters
20import chisel3._
21import chisel3.util._
22import utils._
23import xiangshan._
24import xiangshan.cache._
25import xiangshan.cache.{DCacheWordIO, DCacheLineIO, MemoryOpConstants}
26import xiangshan.mem._
27import xiangshan.backend.rob.RobPtr
28
29
30// Data module define
31// These data modules are like SyncDataModuleTemplate, but support cam-like ops
32class SQAddrModule(dataWidth: Int, numEntries: Int, numRead: Int, numWrite: Int, numForward: Int)(implicit p: Parameters) extends XSModule with HasDCacheParameters {
33  val io = IO(new Bundle {
34    // sync read
35    val raddr = Input(Vec(numRead, UInt(log2Up(numEntries).W)))
36    val rdata = Output(Vec(numRead, UInt(dataWidth.W))) // rdata: store addr
37    val rlineflag = Output(Vec(numRead, Bool())) // rdata: line op flag
38    // write
39    val wen   = Input(Vec(numWrite, Bool()))
40    val waddr = Input(Vec(numWrite, UInt(log2Up(numEntries).W)))
41    val wdata = Input(Vec(numWrite, UInt(dataWidth.W))) // wdata: store addr
42    val wlineflag = Input(Vec(numWrite, Bool())) // wdata: line op flag
43    // forward addr cam
44    val forwardMdata = Input(Vec(numForward, UInt(dataWidth.W))) // addr
45    val forwardMmask = Output(Vec(numForward, Vec(numEntries, Bool()))) // cam result mask
46    // debug
47    val debug_data = Output(Vec(numEntries, UInt(dataWidth.W)))
48  })
49
50  val data = Reg(Vec(numEntries, UInt(dataWidth.W)))
51  val lineflag = Reg(Vec(numEntries, Bool())) // cache line match flag
52  // if lineflag == true, this address points to a whole cacheline
53  io.debug_data := data
54
55  // read ports
56  for (i <- 0 until numRead) {
57    io.rdata(i) := data(RegNext(io.raddr(i)))
58    io.rlineflag(i) := lineflag(RegNext(io.raddr(i)))
59  }
60
61  // below is the write ports (with priorities)
62  for (i <- 0 until numWrite) {
63    when (io.wen(i)) {
64      data(io.waddr(i)) := io.wdata(i)
65      lineflag(io.waddr(i)) := io.wlineflag(i)
66    }
67  }
68
69  // content addressed match
70  for (i <- 0 until numForward) {
71    for (j <- 0 until numEntries) {
72      // io.forwardMmask(i)(j) := io.forwardMdata(i)(dataWidth-1, 3) === data(j)(dataWidth-1, 3)
73      val linehit = io.forwardMdata(i)(dataWidth-1, DCacheLineOffset) === data(j)(dataWidth-1, DCacheLineOffset)
74      val wordhit = io.forwardMdata(i)(DCacheLineOffset-1, DCacheWordOffset) === data(j)(DCacheLineOffset-1, DCacheWordOffset)
75      io.forwardMmask(i)(j) := linehit && (wordhit || lineflag(j))
76    }
77  }
78
79  // DataModuleTemplate should not be used when there're any write conflicts
80  for (i <- 0 until numWrite) {
81    for (j <- i+1 until numWrite) {
82      assert(!(io.wen(i) && io.wen(j) && io.waddr(i) === io.waddr(j)))
83    }
84  }
85}
86
87class SQData8Entry(implicit p: Parameters) extends XSBundle {
88  val valid = Bool() // this byte is valid
89  val data = UInt((XLEN/8).W)
90}
91
92class SQData8Module(numEntries: Int, numRead: Int, numWrite: Int, numForward: Int)(implicit p: Parameters) extends XSModule
93  with HasDCacheParameters
94  with HasCircularQueuePtrHelper
95{
96  val io = IO(new Bundle() {
97    // sync read port
98    val raddr = Vec(numRead, Input(UInt(log2Up(numEntries).W)))
99    val rdata = Vec(numRead, Output(new SQData8Entry))
100    // data write port
101    val data = new Bundle() {
102      val wen   = Vec(numWrite, Input(Bool()))
103      val waddr = Vec(numWrite, Input(UInt(log2Up(numEntries).W)))
104      val wdata = Vec(numWrite, Input(UInt((XLEN/8).W)))
105    }
106    // mask (data valid) write port
107    val mask = new Bundle() {
108      val wen   = Vec(numWrite, Input(Bool()))
109      val waddr = Vec(numWrite, Input(UInt(log2Up(numEntries).W)))
110      val wdata = Vec(numWrite, Input(Bool()))
111    }
112
113    // st-ld forward addr cam result input, used to select forward data
114    val needForward = Input(Vec(numForward, Vec(2, UInt(numEntries.W))))
115    // forward result valid bit generated in current cycle
116    val forwardValidFast = Vec(numForward, Output(Bool()))
117    // forward result generated in the next cycle
118    val forwardValid = Vec(numForward, Output(Bool())) // forwardValid = RegNext(forwardValidFast)
119    val forwardData = Vec(numForward, Output(UInt(8.W)))
120  })
121
122  io := DontCare
123
124  val data = Reg(Vec(numEntries, new SQData8Entry))
125
126  require(isPow2(StoreQueueNWriteBanks))
127  require(StoreQueueNWriteBanks > 1)
128  def get_bank(in: UInt): UInt = in(log2Up(StoreQueueNWriteBanks) -1, 0)
129  def get_bank_index(in: UInt): UInt = in >> log2Up(StoreQueueNWriteBanks)
130  def get_vec_index(index: Int, bank: Int): Int = {
131    (index << log2Up(StoreQueueNWriteBanks)) + bank
132  }
133
134  // writeback to sq
135  // store queue data write takes 2 cycles
136  // (0 until numWrite).map(i => {
137  //   when(RegNext(io.data.wen(i))){
138  //     data(RegNext(io.data.waddr(i))).data := RegNext(io.data.wdata(i))
139  //   }
140  // })
141  (0 until numWrite).map(i => {
142    (0 until StoreQueueNWriteBanks).map(bank => {
143      val s0_wen = io.data.wen(i) && get_bank(io.data.waddr(i)) === bank.U
144      val s1_wen = RegNext(s0_wen)
145      val s1_wdata = RegEnable(io.data.wdata(i), s0_wen)
146      val s1_waddr = RegEnable(get_bank_index(io.data.waddr(i)), s0_wen)
147      val numRegsPerBank = StoreQueueSize / StoreQueueNWriteBanks
148      (0 until numRegsPerBank).map(index => {
149        when(s1_wen && s1_waddr === index.U){
150          data(get_vec_index(index, bank)).data := s1_wdata
151        }
152      })
153      s0_wen.suggestName("data_s0_wen_" + i +"_bank_" + bank)
154      s1_wen.suggestName("data_s1_wen_" + i +"_bank_" + bank)
155      s1_wdata.suggestName("data_s1_wdata_" + i +"_bank_" + bank)
156      s1_waddr.suggestName("data_s1_waddr_" + i +"_bank_" + bank)
157    })
158  })
159
160  // (0 until numWrite).map(i => {
161  //   when(RegNext(io.mask.wen(i))){
162  //     data(RegNext(io.mask.waddr(i))).valid := RegNext(io.mask.wdata(i))
163  //   }
164  // })
165  (0 until numWrite).map(i => {
166    (0 until StoreQueueNWriteBanks).map(bank => {
167      val s0_wen = io.mask.wen(i) && get_bank(io.mask.waddr(i)) === bank.U
168      val s1_wen = RegNext(s0_wen)
169      val s1_wdata = RegEnable(io.mask.wdata(i), s0_wen)
170      val s1_waddr = RegEnable(get_bank_index(io.mask.waddr(i)), s0_wen)
171      val numRegsPerBank = StoreQueueSize / StoreQueueNWriteBanks
172      (0 until numRegsPerBank).map(index => {
173        when(s1_wen && s1_waddr === index.U){
174          data(get_vec_index(index, bank)).valid := s1_wdata
175        }
176      })
177      s0_wen.suggestName("mask_s0_wen_" + i +"_bank_" + bank)
178      s1_wen.suggestName("mask_s1_wen_" + i +"_bank_" + bank)
179      s1_wdata.suggestName("mask_s1_wdata_" + i +"_bank_" + bank)
180      s1_waddr.suggestName("mask_s1_waddr_" + i +"_bank_" + bank)
181    })
182  })
183
184  // destorequeue read data
185  (0 until numRead).map(i => {
186      io.rdata(i) := data(RegNext(io.raddr(i)))
187  })
188
189  // DataModuleTemplate should not be used when there're any write conflicts
190  for (i <- 0 until numWrite) {
191    for (j <- i+1 until numWrite) {
192      assert(!(io.data.wen(i) && io.data.wen(j) && io.data.waddr(i) === io.data.waddr(j)))
193    }
194  }
195  for (i <- 0 until numWrite) {
196    for (j <- i+1 until numWrite) {
197      assert(!(io.mask.wen(i) && io.mask.wen(j) && io.mask.waddr(i) === io.mask.waddr(j)))
198    }
199  }
200
201  // forwarding
202  // Compare ringBufferTail (deqPtr) and forward.sqIdx, we have two cases:
203  // (1) if they have the same flag, we need to check range(tail, sqIdx)
204  // (2) if they have different flags, we need to check range(tail, LoadQueueSize) and range(0, sqIdx)
205  // Forward1: Mux(same_flag, range(tail, sqIdx), range(tail, LoadQueueSize))
206  // Forward2: Mux(same_flag, 0.U,                   range(0, sqIdx)    )
207  // i.e. forward1 is the target entries with the same flag bits and forward2 otherwise
208
209  // entry with larger index should have higher priority since it's data is younger
210
211  (0 until numForward).map(i => {
212    // parallel fwd logic
213    val matchResultVec = Wire(Vec(numEntries * 2, new FwdEntry))
214
215    def parallelFwd(xs: Seq[Data]): Data = {
216      ParallelOperation(xs, (a: Data, b: Data) => {
217        val l = a.asTypeOf(new FwdEntry)
218        val r = b.asTypeOf(new FwdEntry)
219        val res = Wire(new FwdEntry)
220        res.validFast := l.validFast || r.validFast
221        res.valid := l.valid || r.valid
222        // res.valid := RegNext(res.validFast)
223        res.data := Mux(r.valid, r.data, l.data)
224        res
225      })
226    }
227
228    for (j <- 0 until numEntries) {
229      val needCheck0 = io.needForward(i)(0)(j)
230      val needCheck1 = io.needForward(i)(1)(j)
231      val needCheck0Reg = RegNext(needCheck0)
232      val needCheck1Reg = RegNext(needCheck1)
233      (0 until XLEN / 8).foreach(k => {
234        matchResultVec(j).validFast := needCheck0 && data(j).valid
235        matchResultVec(j).valid := needCheck0Reg && data(j).valid
236        matchResultVec(j).data := data(j).data
237        matchResultVec(numEntries + j).validFast := needCheck1 && data(j).valid
238        matchResultVec(numEntries + j).valid := needCheck1Reg && data(j).valid
239        matchResultVec(numEntries + j).data := data(j).data
240      })
241    }
242
243    val parallelFwdResult = parallelFwd(matchResultVec).asTypeOf(new FwdEntry)
244
245    // validFast is generated the same cycle with query
246    io.forwardValidFast(i) := parallelFwdResult.validFast
247    // valid is generated 1 cycle after query request
248    io.forwardValid(i) := parallelFwdResult.valid
249    // data is generated 1 cycle after query request
250    io.forwardData(i) := parallelFwdResult.data
251
252  })
253}
254
255class SQDataEntry(implicit p: Parameters) extends XSBundle {
256  val mask = UInt(8.W)
257  val data = UInt(XLEN.W)
258}
259
260// SQDataModule is a wrapper of SQData8Modules
261class SQDataModule(numEntries: Int, numRead: Int, numWrite: Int, numForward: Int)(implicit p: Parameters) extends XSModule with HasDCacheParameters with HasCircularQueuePtrHelper {
262  val io = IO(new Bundle() {
263    // sync read port
264    val raddr = Vec(numRead,  Input(UInt(log2Up(numEntries).W)))
265    val rdata = Vec(numRead,  Output(new SQDataEntry))
266    // data write port
267    val data = new Bundle() {
268      val wen   = Vec(numWrite, Input(Bool()))
269      val waddr = Vec(numWrite, Input(UInt(log2Up(numEntries).W)))
270      val wdata = Vec(numWrite, Input(UInt(XLEN.W)))
271    }
272    // mask (data valid) write port
273    val mask = new Bundle() {
274      val wen   = Vec(numWrite, Input(Bool()))
275      val waddr = Vec(numWrite, Input(UInt(log2Up(numEntries).W)))
276      val wdata = Vec(numWrite, Input(UInt(8.W)))
277    }
278
279    // st-ld forward addr cam result input, used to select forward data
280    val needForward = Input(Vec(numForward, Vec(2, UInt(numEntries.W))))
281    // forward result valid bit generated in current cycle
282    val forwardMaskFast = Vec(numForward, Output(Vec(8, Bool())))
283    // forward result generated in the next cycle
284    val forwardMask = Vec(numForward, Output(Vec(8, Bool()))) // forwardMask = RegNext(forwardMaskFast)
285    val forwardData = Vec(numForward, Output(Vec(8, UInt(8.W))))
286  })
287
288  val data8 = Seq.fill(8)(Module(new SQData8Module(numEntries, numRead, numWrite, numForward)))
289
290  // writeback to lq/sq
291  for (i <- 0 until numWrite) {
292    // write to data8
293    for (j <- 0 until 8) {
294      data8(j).io.mask.waddr(i) := io.mask.waddr(i)
295      data8(j).io.mask.wdata(i) := io.mask.wdata(i)(j)
296      data8(j).io.mask.wen(i)   := io.mask.wen(i)
297      data8(j).io.data.waddr(i) := io.data.waddr(i)
298      data8(j).io.data.wdata(i) := io.data.wdata(i)(8*(j+1)-1, 8*j)
299      data8(j).io.data.wen(i)   := io.data.wen(i)
300    }
301  }
302
303  // destorequeue read data
304  for (i <- 0 until numRead) {
305    for (j <- 0 until 8) {
306      data8(j).io.raddr(i) := io.raddr(i)
307    }
308    io.rdata(i).mask := VecInit((0 until 8).map(j => data8(j).io.rdata(i).valid)).asUInt
309    io.rdata(i).data := VecInit((0 until 8).map(j => data8(j).io.rdata(i).data)).asUInt
310  }
311
312  // DataModuleTemplate should not be used when there're any write conflicts
313  for (i <- 0 until numWrite) {
314    for (j <- i+1 until numWrite) {
315      assert(!(io.data.wen(i) && io.data.wen(j) && io.data.waddr(i) === io.data.waddr(j)))
316    }
317  }
318  for (i <- 0 until numWrite) {
319    for (j <- i+1 until numWrite) {
320      assert(!(io.mask.wen(i) && io.mask.wen(j) && io.mask.waddr(i) === io.mask.waddr(j)))
321    }
322  }
323
324  (0 until numForward).map(i => {
325    // parallel fwd logic
326    for (j <- 0 until 8) {
327      data8(j).io.needForward(i) <> io.needForward(i)
328      io.forwardMaskFast(i) := VecInit((0 until 8).map(j => data8(j).io.forwardValidFast(i)))
329      io.forwardMask(i) := VecInit((0 until 8).map(j => data8(j).io.forwardValid(i)))
330      io.forwardData(i) := VecInit((0 until 8).map(j => data8(j).io.forwardData(i)))
331    }
332  })
333}
334