xref: /XiangShan/src/main/scala/xiangshan/mem/lsqueue/StoreQueueData.scala (revision b03c55a5df5dc8793cb44b42dd60141566e57e78)
1/***************************************************************************************
2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3* Copyright (c) 2020-2021 Peng Cheng Laboratory
4*
5* XiangShan is licensed under Mulan PSL v2.
6* You can use this software according to the terms and conditions of the Mulan PSL v2.
7* You may obtain a copy of Mulan PSL v2 at:
8*          http://license.coscl.org.cn/MulanPSL2
9*
10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13*
14* See the Mulan PSL v2 for more details.
15***************************************************************************************/
16
17package xiangshan.mem
18
19import org.chipsalliance.cde.config.Parameters
20import chisel3._
21import chisel3.util._
22import utils._
23import utility._
24import xiangshan._
25import xiangshan.cache._
26import xiangshan.cache.{DCacheWordIO, DCacheLineIO, MemoryOpConstants}
27import xiangshan.mem._
28import xiangshan.backend.rob.RobPtr
29
30
31// Data module define
32// These data modules are like SyncDataModuleTemplate, but support cam-like ops
33class SQAddrModule(dataWidth: Int, numEntries: Int, numRead: Int, numWrite: Int, numForward: Int)(implicit p: Parameters) extends XSModule with HasDCacheParameters {
34  val io = IO(new Bundle {
35    // sync read
36    val raddr = Input(Vec(numRead, UInt(log2Up(numEntries).W)))
37    val rdata = Output(Vec(numRead, UInt(dataWidth.W))) // rdata: store addr
38    val rlineflag = Output(Vec(numRead, Bool())) // rdata: line op flag
39    // write
40    val wen   = Input(Vec(numWrite, Bool()))
41    val waddr = Input(Vec(numWrite, UInt(log2Up(numEntries).W)))
42    val wdata = Input(Vec(numWrite, UInt(dataWidth.W))) // wdata: store addr
43    val wmask = Input(Vec(numWrite, UInt((VLEN/8).W)))
44    val wlineflag = Input(Vec(numWrite, Bool())) // wdata: line op flag
45    // forward addr cam
46    val forwardMdata = Input(Vec(numForward, UInt(dataWidth.W))) // addr
47    val forwardDataMask = Input(Vec(numForward, UInt((VLEN/8).W))) // forward mask
48    val forwardMmask = Output(Vec(numForward, Vec(numEntries, Bool()))) // cam result mask
49    // debug
50    val debug_data = Output(Vec(numEntries, UInt(dataWidth.W)))
51  })
52
53  val data = Reg(Vec(numEntries, UInt(dataWidth.W)))
54  val mask = Reg(Vec(numEntries, UInt((VLEN/8).W)))
55  val lineflag = Reg(Vec(numEntries, Bool())) // cache line match flag
56  // if lineflag == true, this address points to a whole cacheline
57  io.debug_data := data
58
59  // read ports
60  for (i <- 0 until numRead) {
61    io.rdata(i) := data(GatedRegNext(io.raddr(i)))
62    io.rlineflag(i) := lineflag(GatedRegNext(io.raddr(i)))
63  }
64
65  // below is the write ports (with priorities)
66  for (i <- 0 until numWrite) {
67    when (io.wen(i)) {
68      data(io.waddr(i)) := io.wdata(i)
69      mask(io.waddr(i)) := io.wmask(i)
70      lineflag(io.waddr(i)) := io.wlineflag(i)
71    }
72  }
73
74  // content addressed match
75  for (i <- 0 until numForward) {
76    for (j <- 0 until numEntries) {
77      // io.forwardMmask(i)(j) := io.forwardMdata(i)(dataWidth-1, 3) === data(j)(dataWidth-1, 3)
78      val linehit = io.forwardMdata(i)(dataWidth-1, DCacheLineOffset) === data(j)(dataWidth-1, DCacheLineOffset)
79      val hit128bit = (io.forwardMdata(i)(DCacheLineOffset-1, DCacheVWordOffset) === data(j)(DCacheLineOffset-1, DCacheVWordOffset)) &&
80                    (!StoreQueueForwardWithMask.B || (mask(j) & io.forwardDataMask(i)).orR)
81      io.forwardMmask(i)(j) := linehit && (hit128bit || lineflag(j))
82    }
83  }
84
85  // DataModuleTemplate should not be used when there're any write conflicts
86  for (i <- 0 until numWrite) {
87    for (j <- i+1 until numWrite) {
88      assert(!(io.wen(i) && io.wen(j) && io.waddr(i) === io.waddr(j)))
89    }
90  }
91}
92
93class SQData8Entry(implicit p: Parameters) extends XSBundle {
94  val valid = Bool() // this byte is valid
95  val data = UInt((XLEN/8).W)
96}
97
98class SQData8Module(numEntries: Int, numRead: Int, numWrite: Int, numForward: Int)(implicit p: Parameters) extends XSModule
99  with HasDCacheParameters
100  with HasCircularQueuePtrHelper
101{
102  val io = IO(new Bundle() {
103    // sync read port
104    val raddr = Vec(numRead, Input(UInt(log2Up(numEntries).W)))
105    val rdata = Vec(numRead, Output(new SQData8Entry))
106    // data write port
107    val data = new Bundle() {
108      val wen   = Vec(numWrite, Input(Bool()))
109      val waddr = Vec(numWrite, Input(UInt(log2Up(numEntries).W)))
110      val wdata = Vec(numWrite, Input(UInt((XLEN/8).W)))
111    }
112    // mask (data valid) write port
113    val mask = new Bundle() {
114      val wen   = Vec(numWrite, Input(Bool()))
115      val waddr = Vec(numWrite, Input(UInt(log2Up(numEntries).W)))
116      val wdata = Vec(numWrite, Input(Bool()))
117    }
118
119    // st-ld forward addr cam result input, used to select forward data
120    val needForward = Input(Vec(numForward, Vec(2, UInt(numEntries.W))))
121    // forward result valid bit generated in current cycle
122    val forwardValidFast = Vec(numForward, Output(Bool()))
123    // forward result generated in the next cycle
124    val forwardValid = Vec(numForward, Output(Bool())) // forwardValid = RegNext(forwardValidFast)
125    val forwardData = Vec(numForward, Output(UInt(8.W)))
126  })
127
128  io := DontCare
129
130  val data = Reg(Vec(numEntries, new SQData8Entry))
131
132  require(isPow2(StoreQueueNWriteBanks))
133  require(StoreQueueNWriteBanks > 1)
134  def get_bank(in: UInt): UInt = in(log2Up(StoreQueueNWriteBanks) -1, 0)
135  def get_bank_index(in: UInt): UInt = in >> log2Up(StoreQueueNWriteBanks)
136  def get_vec_index(index: Int, bank: Int): Int = {
137    (index << log2Up(StoreQueueNWriteBanks)) + bank
138  }
139
140  // writeback to sq
141  // store queue data write takes 2 cycles
142  // (0 until numWrite).map(i => {
143  //   when(RegNext(io.data.wen(i))){
144  //     data(RegNext(io.data.waddr(i))).data := RegNext(io.data.wdata(i))
145  //   }
146  // })
147  (0 until numWrite).map(i => {
148     val s0_wenVec = Wire(Vec(StoreQueueNWriteBanks, Bool()))
149    for(bank <- 0 until StoreQueueNWriteBanks) {
150      s0_wenVec(bank) := io.data.wen(i) && get_bank(io.data.waddr(i)) === bank.U
151    }
152   val s1_wenVec = GatedValidRegNext(s0_wenVec)
153    (0 until StoreQueueNWriteBanks).map(bank => {
154      val s0_wen = s0_wenVec(bank)
155      val s1_wen = s1_wenVec(bank)
156      val s1_wdata = RegEnable(io.data.wdata(i), s0_wen)
157      val s1_waddr = RegEnable(get_bank_index(io.data.waddr(i)), s0_wen)
158      val numRegsPerBank = StoreQueueSize / StoreQueueNWriteBanks
159      (0 until numRegsPerBank).map(index => {
160        when(s1_wen && s1_waddr === index.U){
161          data(get_vec_index(index, bank)).data := s1_wdata
162        }
163      })
164      s0_wen.suggestName("data_s0_wen_" + i +"_bank_" + bank)
165      s1_wen.suggestName("data_s1_wen_" + i +"_bank_" + bank)
166      s1_wdata.suggestName("data_s1_wdata_" + i +"_bank_" + bank)
167      s1_waddr.suggestName("data_s1_waddr_" + i +"_bank_" + bank)
168    })
169  })
170
171  // (0 until numWrite).map(i => {
172  //   when(RegNext(io.mask.wen(i))){
173  //     data(RegNext(io.mask.waddr(i))).valid := RegNext(io.mask.wdata(i))
174  //   }
175  // })
176  (0 until numWrite).map(i => {
177    val s0_wenVec = Wire(Vec(StoreQueueNWriteBanks, Bool()))
178    for(bank <- 0 until StoreQueueNWriteBanks) {
179      s0_wenVec(bank) := io.mask.wen(i) && get_bank(io.mask.waddr(i)) === bank.U
180    }
181    val s1_wenVec = GatedValidRegNext(s0_wenVec)
182
183    (0 until StoreQueueNWriteBanks).map(bank => {
184      // val s0_wen = io.mask.wen(i) && get_bank(io.mask.waddr(i)) === bank.U
185      // val s1_wen = RegNext(s0_wen)
186      val s0_wen = s0_wenVec(bank)
187      val s1_wen = s1_wenVec(bank)
188      val s1_wdata = RegEnable(io.mask.wdata(i), s0_wen)
189      val s1_waddr = RegEnable(get_bank_index(io.mask.waddr(i)), s0_wen)
190      val numRegsPerBank = StoreQueueSize / StoreQueueNWriteBanks
191      (0 until numRegsPerBank).map(index => {
192        when(s1_wen && s1_waddr === index.U){
193          data(get_vec_index(index, bank)).valid := s1_wdata
194        }
195      })
196      s0_wen.suggestName("mask_s0_wen_" + i +"_bank_" + bank)
197      s1_wen.suggestName("mask_s1_wen_" + i +"_bank_" + bank)
198      s1_wdata.suggestName("mask_s1_wdata_" + i +"_bank_" + bank)
199      s1_waddr.suggestName("mask_s1_waddr_" + i +"_bank_" + bank)
200    })
201  })
202
203  // destorequeue read data
204  (0 until numRead).map(i => {
205      io.rdata(i) := data(GatedRegNext(io.raddr(i)))
206  })
207
208  // DataModuleTemplate should not be used when there're any write conflicts
209  for (i <- 0 until numWrite) {
210    for (j <- i+1 until numWrite) {
211      assert(!(io.data.wen(i) && io.data.wen(j) && io.data.waddr(i) === io.data.waddr(j)))
212    }
213  }
214  for (i <- 0 until numWrite) {
215    for (j <- i+1 until numWrite) {
216      assert(!(io.mask.wen(i) && io.mask.wen(j) && io.mask.waddr(i) === io.mask.waddr(j)))
217    }
218  }
219
220  // forwarding
221  // Compare ringBufferTail (deqPtr) and forward.sqIdx, we have two cases:
222  // (1) if they have the same flag, we need to check range(tail, sqIdx)
223  // (2) if they have different flags, we need to check range(tail, VirtualLoadQueueSize) and range(0, sqIdx)
224  // Forward1: Mux(same_flag, range(tail, sqIdx), range(tail, VirtualLoadQueueSize))
225  // Forward2: Mux(same_flag, 0.U,                   range(0, sqIdx)    )
226  // i.e. forward1 is the target entries with the same flag bits and forward2 otherwise
227
228  // entry with larger index should have higher priority since it's data is younger
229
230  (0 until numForward).map(i => {
231    // parallel fwd logic
232    val matchResultVec = Wire(Vec(numEntries * 2, new FwdEntry))
233
234    def parallelFwd(xs: Seq[Data]): Data = {
235      ParallelOperation(xs, (a: Data, b: Data) => {
236        val l = a.asTypeOf(new FwdEntry)
237        val r = b.asTypeOf(new FwdEntry)
238        val res = Wire(new FwdEntry)
239        res.validFast := l.validFast || r.validFast
240        res.valid := l.valid || r.valid
241        // res.valid := RegNext(res.validFast)
242        res.data := Mux(r.valid, r.data, l.data)
243        res
244      })
245    }
246
247    for (j <- 0 until numEntries) {
248      val needCheck0 = io.needForward(i)(0)(j)
249      val needCheck1 = io.needForward(i)(1)(j)
250      val needCheck0Reg = RegNext(needCheck0)
251      val needCheck1Reg = RegNext(needCheck1)
252
253      matchResultVec(j).validFast := needCheck0 && data(j).valid
254      matchResultVec(j).valid := needCheck0Reg && data(j).valid
255      matchResultVec(j).data := data(j).data
256      matchResultVec(numEntries + j).validFast := needCheck1 && data(j).valid
257      matchResultVec(numEntries + j).valid := needCheck1Reg && data(j).valid
258      matchResultVec(numEntries + j).data := data(j).data
259    }
260
261    val parallelFwdResult = parallelFwd(matchResultVec).asTypeOf(new FwdEntry)
262
263    // validFast is generated the same cycle with query
264    io.forwardValidFast(i) := parallelFwdResult.validFast
265    // valid is generated 1 cycle after query request
266    io.forwardValid(i) := parallelFwdResult.valid
267    // data is generated 1 cycle after query request
268    io.forwardData(i) := parallelFwdResult.data
269  })
270}
271
272class SQDataEntry(implicit p: Parameters) extends XSBundle {
273  val mask = UInt((VLEN/8).W)
274  val data = UInt(VLEN.W)
275}
276
277// SQDataModule is a wrapper of SQData8Modules
278class SQDataModule(numEntries: Int, numRead: Int, numWrite: Int, numForward: Int)(implicit p: Parameters) extends XSModule with HasDCacheParameters with HasCircularQueuePtrHelper {
279  val io = IO(new Bundle() {
280    // sync read port
281    val raddr = Vec(numRead,  Input(UInt(log2Up(numEntries).W)))
282    val rdata = Vec(numRead,  Output(new SQDataEntry))
283    // data write port
284    val data = new Bundle() {
285      val wen   = Vec(numWrite, Input(Bool()))
286      val waddr = Vec(numWrite, Input(UInt(log2Up(numEntries).W)))
287      val wdata = Vec(numWrite, Input(UInt(VLEN.W)))
288    }
289    // mask (data valid) write port
290    val mask = new Bundle() {
291      val wen   = Vec(numWrite, Input(Bool()))
292      val waddr = Vec(numWrite, Input(UInt(log2Up(numEntries).W)))
293      val wdata = Vec(numWrite, Input(UInt((VLEN/8).W)))
294    }
295
296    // st-ld forward addr cam result input, used to select forward data
297    val needForward = Input(Vec(numForward, Vec(2, UInt(numEntries.W))))
298    // forward result valid bit generated in current cycle
299    val forwardMaskFast = Vec(numForward, Output(Vec((VLEN/8), Bool())))
300    // forward result generated in the next cycle
301    val forwardMask = Vec(numForward, Output(Vec((VLEN/8), Bool()))) // forwardMask = RegNext(forwardMaskFast)
302    val forwardData = Vec(numForward, Output(Vec((VLEN/8), UInt(8.W))))
303  })
304
305  val data16 = Seq.fill(16)(Module(new SQData8Module(numEntries, numRead, numWrite, numForward)))
306
307  // writeback to lq/sq
308  for (i <- 0 until numWrite) {
309    // write to data16
310    for (j <- 0 until 16) {
311      data16(j).io.mask.waddr(i) := io.mask.waddr(i)
312      data16(j).io.mask.wdata(i) := io.mask.wdata(i)(j)
313      data16(j).io.mask.wen(i)   := io.mask.wen(i)
314      data16(j).io.data.waddr(i) := io.data.waddr(i)
315      data16(j).io.data.wdata(i) := io.data.wdata(i)(8*(j+1)-1, 8*j)
316      data16(j).io.data.wen(i)   := io.data.wen(i)
317    }
318  }
319
320  // destorequeue read data
321  for (i <- 0 until numRead) {
322    for (j <- 0 until 16) {
323      data16(j).io.raddr(i) := io.raddr(i)
324    }
325    io.rdata(i).mask := VecInit((0 until 16).map(j => data16(j).io.rdata(i).valid)).asUInt
326    io.rdata(i).data := VecInit((0 until 16).map(j => data16(j).io.rdata(i).data)).asUInt
327  }
328
329  // DataModuleTemplate should not be used when there're any write conflicts
330  for (i <- 0 until numWrite) {
331    for (j <- i+1 until numWrite) {
332      assert(!(io.data.wen(i) && io.data.wen(j) && io.data.waddr(i) === io.data.waddr(j)))
333    }
334  }
335  for (i <- 0 until numWrite) {
336    for (j <- i+1 until numWrite) {
337      assert(!(io.mask.wen(i) && io.mask.wen(j) && io.mask.waddr(i) === io.mask.waddr(j)))
338    }
339  }
340
341  (0 until numForward).map(i => {
342    // parallel fwd logic
343    for (j <- 0 until 16) {
344      data16(j).io.needForward(i) <> io.needForward(i)
345      io.forwardMaskFast(i) := VecInit((0 until 16).map(j => data16(j).io.forwardValidFast(i)))
346      io.forwardMask(i) := VecInit((0 until 16).map(j => data16(j).io.forwardValid(i)))
347      io.forwardData(i) := VecInit((0 until 16).map(j => data16(j).io.forwardData(i)))
348    }
349  })
350}
351