xref: /XiangShan/src/main/scala/xiangshan/mem/lsqueue/StoreQueueData.scala (revision 3c02ee8f82edea481fa8336c7f54ffc17fafba91)
1/***************************************************************************************
2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3* Copyright (c) 2020-2021 Peng Cheng Laboratory
4*
5* XiangShan is licensed under Mulan PSL v2.
6* You can use this software according to the terms and conditions of the Mulan PSL v2.
7* You may obtain a copy of Mulan PSL v2 at:
8*          http://license.coscl.org.cn/MulanPSL2
9*
10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13*
14* See the Mulan PSL v2 for more details.
15***************************************************************************************/
16
17package xiangshan.mem
18
19import chipsalliance.rocketchip.config.Parameters
20import chisel3._
21import chisel3.util._
22import utils._
23import utility._
24import xiangshan._
25import xiangshan.cache._
26import xiangshan.cache.{DCacheWordIO, DCacheLineIO, MemoryOpConstants}
27import xiangshan.mem._
28import xiangshan.backend.rob.RobPtr
29
30
31// Data module define
32// These data modules are like SyncDataModuleTemplate, but support cam-like ops
33class SQAddrModule(dataWidth: Int, numEntries: Int, numRead: Int, numWrite: Int, numForward: Int)(implicit p: Parameters) extends XSModule with HasDCacheParameters {
34  val io = IO(new Bundle {
35    // sync read
36    val raddr = Input(Vec(numRead, UInt(log2Up(numEntries).W)))
37    val rdata = Output(Vec(numRead, UInt(dataWidth.W))) // rdata: store addr
38    val rlineflag = Output(Vec(numRead, Bool())) // rdata: line op flag
39    // write
40    val wen   = Input(Vec(numWrite, Bool()))
41    val waddr = Input(Vec(numWrite, UInt(log2Up(numEntries).W)))
42    val wdata = Input(Vec(numWrite, UInt(dataWidth.W))) // wdata: store addr
43    val wlineflag = Input(Vec(numWrite, Bool())) // wdata: line op flag
44    // forward addr cam
45    val forwardMdata = Input(Vec(numForward, UInt(dataWidth.W))) // addr
46    val forwardMmask = Output(Vec(numForward, Vec(numEntries, Bool()))) // cam result mask
47    // debug
48    val debug_data = Output(Vec(numEntries, UInt(dataWidth.W)))
49  })
50
51  val data = Reg(Vec(numEntries, UInt(dataWidth.W)))
52  val lineflag = Reg(Vec(numEntries, Bool())) // cache line match flag
53  // if lineflag == true, this address points to a whole cacheline
54  io.debug_data := data
55
56  // read ports
57  for (i <- 0 until numRead) {
58    io.rdata(i) := data(RegNext(io.raddr(i)))
59    io.rlineflag(i) := lineflag(RegNext(io.raddr(i)))
60  }
61
62  // below is the write ports (with priorities)
63  for (i <- 0 until numWrite) {
64    when (io.wen(i)) {
65      data(io.waddr(i)) := io.wdata(i)
66      lineflag(io.waddr(i)) := io.wlineflag(i)
67    }
68  }
69
70  // content addressed match
71  for (i <- 0 until numForward) {
72    for (j <- 0 until numEntries) {
73      // io.forwardMmask(i)(j) := io.forwardMdata(i)(dataWidth-1, 3) === data(j)(dataWidth-1, 3)
74      val linehit = io.forwardMdata(i)(dataWidth-1, DCacheLineOffset) === data(j)(dataWidth-1, DCacheLineOffset)
75      val wordhit = io.forwardMdata(i)(DCacheLineOffset-1, DCacheWordOffset) === data(j)(DCacheLineOffset-1, DCacheWordOffset)
76      io.forwardMmask(i)(j) := linehit && (wordhit || lineflag(j))
77    }
78  }
79
80  // DataModuleTemplate should not be used when there're any write conflicts
81  for (i <- 0 until numWrite) {
82    for (j <- i+1 until numWrite) {
83      assert(!(io.wen(i) && io.wen(j) && io.waddr(i) === io.waddr(j)))
84    }
85  }
86}
87
88class SQData8Entry(implicit p: Parameters) extends XSBundle {
89  val valid = Bool() // this byte is valid
90  val data = UInt((XLEN/8).W)
91}
92
93class SQData8Module(numEntries: Int, numRead: Int, numWrite: Int, numForward: Int)(implicit p: Parameters) extends XSModule
94  with HasDCacheParameters
95  with HasCircularQueuePtrHelper
96{
97  val io = IO(new Bundle() {
98    // sync read port
99    val raddr = Vec(numRead, Input(UInt(log2Up(numEntries).W)))
100    val rdata = Vec(numRead, Output(new SQData8Entry))
101    // data write port
102    val data = new Bundle() {
103      val wen   = Vec(numWrite, Input(Bool()))
104      val waddr = Vec(numWrite, Input(UInt(log2Up(numEntries).W)))
105      val wdata = Vec(numWrite, Input(UInt((XLEN/8).W)))
106    }
107    // mask (data valid) write port
108    val mask = new Bundle() {
109      val wen   = Vec(numWrite, Input(Bool()))
110      val waddr = Vec(numWrite, Input(UInt(log2Up(numEntries).W)))
111      val wdata = Vec(numWrite, Input(Bool()))
112    }
113
114    // st-ld forward addr cam result input, used to select forward data
115    val needForward = Input(Vec(numForward, Vec(2, UInt(numEntries.W))))
116    // forward result valid bit generated in current cycle
117    val forwardValidFast = Vec(numForward, Output(Bool()))
118    // forward result generated in the next cycle
119    val forwardValid = Vec(numForward, Output(Bool())) // forwardValid = RegNext(forwardValidFast)
120    val forwardData = Vec(numForward, Output(UInt(8.W)))
121  })
122
123  io := DontCare
124
125  val data = Reg(Vec(numEntries, new SQData8Entry))
126
127  require(isPow2(StoreQueueNWriteBanks))
128  require(StoreQueueNWriteBanks > 1)
129  def get_bank(in: UInt): UInt = in(log2Up(StoreQueueNWriteBanks) -1, 0)
130  def get_bank_index(in: UInt): UInt = in >> log2Up(StoreQueueNWriteBanks)
131  def get_vec_index(index: Int, bank: Int): Int = {
132    (index << log2Up(StoreQueueNWriteBanks)) + bank
133  }
134
135  // writeback to sq
136  // store queue data write takes 2 cycles
137  // (0 until numWrite).map(i => {
138  //   when(RegNext(io.data.wen(i))){
139  //     data(RegNext(io.data.waddr(i))).data := RegNext(io.data.wdata(i))
140  //   }
141  // })
142  (0 until numWrite).map(i => {
143    (0 until StoreQueueNWriteBanks).map(bank => {
144      val s0_wen = io.data.wen(i) && get_bank(io.data.waddr(i)) === bank.U
145      val s1_wen = RegNext(s0_wen)
146      val s1_wdata = RegEnable(io.data.wdata(i), s0_wen)
147      val s1_waddr = RegEnable(get_bank_index(io.data.waddr(i)), s0_wen)
148      val numRegsPerBank = StoreQueueSize / StoreQueueNWriteBanks
149      (0 until numRegsPerBank).map(index => {
150        when(s1_wen && s1_waddr === index.U){
151          data(get_vec_index(index, bank)).data := s1_wdata
152        }
153      })
154      s0_wen.suggestName("data_s0_wen_" + i +"_bank_" + bank)
155      s1_wen.suggestName("data_s1_wen_" + i +"_bank_" + bank)
156      s1_wdata.suggestName("data_s1_wdata_" + i +"_bank_" + bank)
157      s1_waddr.suggestName("data_s1_waddr_" + i +"_bank_" + bank)
158    })
159  })
160
161  // (0 until numWrite).map(i => {
162  //   when(RegNext(io.mask.wen(i))){
163  //     data(RegNext(io.mask.waddr(i))).valid := RegNext(io.mask.wdata(i))
164  //   }
165  // })
166  (0 until numWrite).map(i => {
167    (0 until StoreQueueNWriteBanks).map(bank => {
168      val s0_wen = io.mask.wen(i) && get_bank(io.mask.waddr(i)) === bank.U
169      val s1_wen = RegNext(s0_wen)
170      val s1_wdata = RegEnable(io.mask.wdata(i), s0_wen)
171      val s1_waddr = RegEnable(get_bank_index(io.mask.waddr(i)), s0_wen)
172      val numRegsPerBank = StoreQueueSize / StoreQueueNWriteBanks
173      (0 until numRegsPerBank).map(index => {
174        when(s1_wen && s1_waddr === index.U){
175          data(get_vec_index(index, bank)).valid := s1_wdata
176        }
177      })
178      s0_wen.suggestName("mask_s0_wen_" + i +"_bank_" + bank)
179      s1_wen.suggestName("mask_s1_wen_" + i +"_bank_" + bank)
180      s1_wdata.suggestName("mask_s1_wdata_" + i +"_bank_" + bank)
181      s1_waddr.suggestName("mask_s1_waddr_" + i +"_bank_" + bank)
182    })
183  })
184
185  // destorequeue read data
186  (0 until numRead).map(i => {
187      io.rdata(i) := data(RegNext(io.raddr(i)))
188  })
189
190  // DataModuleTemplate should not be used when there're any write conflicts
191  for (i <- 0 until numWrite) {
192    for (j <- i+1 until numWrite) {
193      assert(!(io.data.wen(i) && io.data.wen(j) && io.data.waddr(i) === io.data.waddr(j)))
194    }
195  }
196  for (i <- 0 until numWrite) {
197    for (j <- i+1 until numWrite) {
198      assert(!(io.mask.wen(i) && io.mask.wen(j) && io.mask.waddr(i) === io.mask.waddr(j)))
199    }
200  }
201
202  // forwarding
203  // Compare ringBufferTail (deqPtr) and forward.sqIdx, we have two cases:
204  // (1) if they have the same flag, we need to check range(tail, sqIdx)
205  // (2) if they have different flags, we need to check range(tail, LoadQueueSize) and range(0, sqIdx)
206  // Forward1: Mux(same_flag, range(tail, sqIdx), range(tail, LoadQueueSize))
207  // Forward2: Mux(same_flag, 0.U,                   range(0, sqIdx)    )
208  // i.e. forward1 is the target entries with the same flag bits and forward2 otherwise
209
210  // entry with larger index should have higher priority since it's data is younger
211
212  (0 until numForward).map(i => {
213    // parallel fwd logic
214    val matchResultVec = Wire(Vec(numEntries * 2, new FwdEntry))
215
216    def parallelFwd(xs: Seq[Data]): Data = {
217      ParallelOperation(xs, (a: Data, b: Data) => {
218        val l = a.asTypeOf(new FwdEntry)
219        val r = b.asTypeOf(new FwdEntry)
220        val res = Wire(new FwdEntry)
221        res.validFast := l.validFast || r.validFast
222        res.valid := l.valid || r.valid
223        // res.valid := RegNext(res.validFast)
224        res.data := Mux(r.valid, r.data, l.data)
225        res
226      })
227    }
228
229    for (j <- 0 until numEntries) {
230      val needCheck0 = io.needForward(i)(0)(j)
231      val needCheck1 = io.needForward(i)(1)(j)
232      val needCheck0Reg = RegNext(needCheck0)
233      val needCheck1Reg = RegNext(needCheck1)
234      (0 until XLEN / 8).foreach(k => {
235        matchResultVec(j).validFast := needCheck0 && data(j).valid
236        matchResultVec(j).valid := needCheck0Reg && data(j).valid
237        matchResultVec(j).data := data(j).data
238        matchResultVec(numEntries + j).validFast := needCheck1 && data(j).valid
239        matchResultVec(numEntries + j).valid := needCheck1Reg && data(j).valid
240        matchResultVec(numEntries + j).data := data(j).data
241      })
242    }
243
244    val parallelFwdResult = parallelFwd(matchResultVec).asTypeOf(new FwdEntry)
245
246    // validFast is generated the same cycle with query
247    io.forwardValidFast(i) := parallelFwdResult.validFast
248    // valid is generated 1 cycle after query request
249    io.forwardValid(i) := parallelFwdResult.valid
250    // data is generated 1 cycle after query request
251    io.forwardData(i) := parallelFwdResult.data
252
253  })
254}
255
256class SQDataEntry(implicit p: Parameters) extends XSBundle {
257  val mask = UInt(8.W)
258  val data = UInt(XLEN.W)
259}
260
261// SQDataModule is a wrapper of SQData8Modules
262class SQDataModule(numEntries: Int, numRead: Int, numWrite: Int, numForward: Int)(implicit p: Parameters) extends XSModule with HasDCacheParameters with HasCircularQueuePtrHelper {
263  val io = IO(new Bundle() {
264    // sync read port
265    val raddr = Vec(numRead,  Input(UInt(log2Up(numEntries).W)))
266    val rdata = Vec(numRead,  Output(new SQDataEntry))
267    // data write port
268    val data = new Bundle() {
269      val wen   = Vec(numWrite, Input(Bool()))
270      val waddr = Vec(numWrite, Input(UInt(log2Up(numEntries).W)))
271      val wdata = Vec(numWrite, Input(UInt(XLEN.W)))
272    }
273    // mask (data valid) write port
274    val mask = new Bundle() {
275      val wen   = Vec(numWrite, Input(Bool()))
276      val waddr = Vec(numWrite, Input(UInt(log2Up(numEntries).W)))
277      val wdata = Vec(numWrite, Input(UInt(8.W)))
278    }
279
280    // st-ld forward addr cam result input, used to select forward data
281    val needForward = Input(Vec(numForward, Vec(2, UInt(numEntries.W))))
282    // forward result valid bit generated in current cycle
283    val forwardMaskFast = Vec(numForward, Output(Vec(8, Bool())))
284    // forward result generated in the next cycle
285    val forwardMask = Vec(numForward, Output(Vec(8, Bool()))) // forwardMask = RegNext(forwardMaskFast)
286    val forwardData = Vec(numForward, Output(Vec(8, UInt(8.W))))
287  })
288
289  val data8 = Seq.fill(8)(Module(new SQData8Module(numEntries, numRead, numWrite, numForward)))
290
291  // writeback to lq/sq
292  for (i <- 0 until numWrite) {
293    // write to data8
294    for (j <- 0 until 8) {
295      data8(j).io.mask.waddr(i) := io.mask.waddr(i)
296      data8(j).io.mask.wdata(i) := io.mask.wdata(i)(j)
297      data8(j).io.mask.wen(i)   := io.mask.wen(i)
298      data8(j).io.data.waddr(i) := io.data.waddr(i)
299      data8(j).io.data.wdata(i) := io.data.wdata(i)(8*(j+1)-1, 8*j)
300      data8(j).io.data.wen(i)   := io.data.wen(i)
301    }
302  }
303
304  // destorequeue read data
305  for (i <- 0 until numRead) {
306    for (j <- 0 until 8) {
307      data8(j).io.raddr(i) := io.raddr(i)
308    }
309    io.rdata(i).mask := VecInit((0 until 8).map(j => data8(j).io.rdata(i).valid)).asUInt
310    io.rdata(i).data := VecInit((0 until 8).map(j => data8(j).io.rdata(i).data)).asUInt
311  }
312
313  // DataModuleTemplate should not be used when there're any write conflicts
314  for (i <- 0 until numWrite) {
315    for (j <- i+1 until numWrite) {
316      assert(!(io.data.wen(i) && io.data.wen(j) && io.data.waddr(i) === io.data.waddr(j)))
317    }
318  }
319  for (i <- 0 until numWrite) {
320    for (j <- i+1 until numWrite) {
321      assert(!(io.mask.wen(i) && io.mask.wen(j) && io.mask.waddr(i) === io.mask.waddr(j)))
322    }
323  }
324
325  (0 until numForward).map(i => {
326    // parallel fwd logic
327    for (j <- 0 until 8) {
328      data8(j).io.needForward(i) <> io.needForward(i)
329      io.forwardMaskFast(i) := VecInit((0 until 8).map(j => data8(j).io.forwardValidFast(i)))
330      io.forwardMask(i) := VecInit((0 until 8).map(j => data8(j).io.forwardValid(i)))
331      io.forwardData(i) := VecInit((0 until 8).map(j => data8(j).io.forwardData(i)))
332    }
333  })
334}
335