xref: /XiangShan/src/main/scala/xiangshan/mem/pipeline/LoadUnit.scala (revision 59a40467d3335a6fa23002b5293b833b54ce7ea8)
1package xiangshan.mem
2
3import chisel3._
4import chisel3.util._
5import utils._
6import xiangshan._
7import xiangshan.cache._
8// import xiangshan.cache.{DCacheWordIO, TlbRequestIO, TlbCmd, MemoryOpConstants, TlbReq, DCacheLoadReq, DCacheWordResp}
9import xiangshan.backend.LSUOpType
10
11class LoadToLsroqIO extends XSBundle {
12  val loadIn = ValidIO(new LsPipelineBundle)
13  val ldout = Flipped(DecoupledIO(new ExuOutput))
14  val forward = new LoadForwardQueryIO
15}
16
17// Load Pipeline Stage 0
18// Generate addr, use addr to query DCache and DTLB
19class LoadUnit_S0 extends XSModule {
20  val io = IO(new Bundle() {
21    val in = Flipped(Decoupled(new ExuInput))
22    val out = Decoupled(new LsPipelineBundle)
23    val redirect = Flipped(ValidIO(new Redirect))
24    val dtlbReq = Valid(new TlbReq)
25    val dcacheReq = DecoupledIO(new DCacheLoadReq)
26  })
27
28  val s0_uop = io.in.bits.uop
29  val s0_vaddr = io.in.bits.src1 + s0_uop.ctrl.imm
30  val s0_mask = genWmask(s0_vaddr, s0_uop.ctrl.fuOpType(1,0))
31
32  // query DTLB
33  io.dtlbReq.valid := io.out.valid
34  io.dtlbReq.bits.vaddr := s0_vaddr
35  io.dtlbReq.bits.cmd := TlbCmd.read
36  io.dtlbReq.bits.roqIdx := s0_uop.roqIdx
37  io.dtlbReq.bits.debug.pc := s0_uop.cf.pc
38  io.dtlbReq.bits.debug.lsroqIdx := s0_uop.lsroqIdx
39
40  // query DCache
41  io.dcacheReq.valid := io.out.valid
42  io.dcacheReq.bits.cmd  := MemoryOpConstants.M_XRD
43  io.dcacheReq.bits.addr := s0_vaddr
44  io.dcacheReq.bits.mask := s0_mask
45  io.dcacheReq.bits.data := DontCare
46
47  // TODO: update cache meta
48  io.dcacheReq.bits.meta.id       := DontCare
49  io.dcacheReq.bits.meta.vaddr    := s0_vaddr
50  io.dcacheReq.bits.meta.paddr    := DontCare
51  io.dcacheReq.bits.meta.uop      := s0_uop
52  io.dcacheReq.bits.meta.mmio     := false.B
53  io.dcacheReq.bits.meta.tlb_miss := false.B
54  io.dcacheReq.bits.meta.mask     := s0_mask
55  io.dcacheReq.bits.meta.replay   := false.B
56
57  val addrAligned = LookupTree(s0_uop.ctrl.fuOpType(1, 0), List(
58    "b00".U   -> true.B,                   //b
59    "b01".U   -> (s0_vaddr(0)    === 0.U), //h
60    "b10".U   -> (s0_vaddr(1, 0) === 0.U), //w
61    "b11".U   -> (s0_vaddr(2, 0) === 0.U)  //d
62  ))
63
64  io.out.valid := io.in.valid && !s0_uop.needFlush(io.redirect)
65  io.out.bits := DontCare
66  io.out.bits.vaddr := s0_vaddr
67  io.out.bits.mask := s0_mask
68  io.out.bits.uop := s0_uop
69  io.out.bits.uop.cf.exceptionVec(loadAddrMisaligned) := !addrAligned
70
71  io.in.ready := io.out.ready
72}
73
74
75// Load Pipeline Stage 1
76// TLB resp (send paddr to dcache)
77class LoadUnit_S1 extends XSModule {
78  val io = IO(new Bundle() {
79    val in = Flipped(Decoupled(new LsPipelineBundle))
80    val out = Decoupled(new LsPipelineBundle)
81    val redirect = Flipped(ValidIO(new Redirect))
82    val tlbFeedback = ValidIO(new TlbFeedback)
83    val dtlbResp = Flipped(Valid(new TlbResp))
84    val forward = new LoadForwardQueryIO
85    val s1_kill = Output(Bool())
86    val s1_paddr = Output(UInt(PAddrBits.W))
87  })
88
89  val s1_uop = io.in.bits.uop
90  val s1_tlb_miss = io.dtlbResp.bits.miss
91  val s1_paddr = io.dtlbResp.bits.paddr
92  val s1_mmio = !s1_tlb_miss && AddressSpace.isMMIO(s1_paddr)
93
94  // io.dtlbResp.ready := io.out.ready
95
96  io.tlbFeedback.valid := io.out.valid
97  io.tlbFeedback.bits.hit := !s1_tlb_miss
98  io.tlbFeedback.bits.roqIdx := s1_uop.roqIdx
99
100  // if tlb misses or mmio, kill prvious cycles dcache request
101  // TODO: kill dcache request when flushed
102  io.s1_kill :=  s1_tlb_miss || s1_mmio
103  io.s1_paddr :=  s1_paddr
104
105  io.forward.valid := io.out.valid
106  io.forward.paddr := s1_paddr
107  io.forward.mask := io.in.bits.mask
108  io.forward.lsroqIdx := s1_uop.lsroqIdx
109  io.forward.sqIdx := s1_uop.sqIdx
110  io.forward.uop := s1_uop
111  io.forward.pc := s1_uop.cf.pc
112
113  io.out.valid := io.in.valid && !s1_uop.needFlush(io.redirect)
114  io.out.bits := io.in.bits
115  io.out.bits.paddr := s1_paddr
116  io.out.bits.mmio := s1_mmio
117  io.out.bits.tlbMiss := s1_tlb_miss
118  io.out.bits.uop.cf.exceptionVec(loadPageFault) := io.dtlbResp.bits.excp.pf.ld
119
120  io.in.ready := io.out.ready || !io.in.valid
121
122}
123
124
125// Load Pipeline Stage 2
126// DCache resp
127class LoadUnit_S2 extends XSModule {
128  val io = IO(new Bundle() {
129    val in = Flipped(Decoupled(new LsPipelineBundle))
130    val out = Decoupled(new LsPipelineBundle)
131    val redirect = Flipped(ValidIO(new Redirect))
132    val dcacheResp = Flipped(DecoupledIO(new DCacheWordResp))
133    val sbuffer = new LoadForwardQueryIO
134    val lsroq = new LoadForwardQueryIO
135  })
136
137  val s2_uop = io.in.bits.uop
138  val s2_mask = io.in.bits.mask
139  val s2_paddr = io.in.bits.paddr
140  val s2_cache_miss = io.dcacheResp.bits.miss
141
142  // load forward query datapath
143  io.sbuffer.valid := io.in.valid
144  io.sbuffer.paddr := s2_paddr
145  io.sbuffer.uop := s2_uop
146  io.sbuffer.sqIdx := s2_uop.sqIdx
147  io.sbuffer.lsroqIdx := s2_uop.lsroqIdx
148  io.sbuffer.mask := s2_mask
149  io.sbuffer.pc := s2_uop.cf.pc // FIXME: remove it
150
151  io.lsroq.valid := io.in.valid
152  io.lsroq.paddr := s2_paddr
153  io.lsroq.uop := s2_uop
154  io.lsroq.sqIdx := s2_uop.sqIdx
155  io.lsroq.lsroqIdx := s2_uop.lsroqIdx
156  io.lsroq.mask := s2_mask
157  io.lsroq.pc := s2_uop.cf.pc // FIXME: remove it
158
159  io.dcacheResp.ready := true.B
160  assert(!(io.in.valid && !io.dcacheResp.valid), "DCache response got lost")
161
162  val forwardMask = WireInit(io.sbuffer.forwardMask)
163  val forwardData = WireInit(io.sbuffer.forwardData)
164  // generate XLEN/8 Muxs
165  for (i <- 0 until XLEN / 8) {
166    when(io.lsroq.forwardMask(i)) {
167      forwardMask(i) := true.B
168      forwardData(i) := io.lsroq.forwardData(i)
169    }
170  }
171  val fullForward = (~forwardMask.asUInt & s2_mask) === 0.U
172
173  // data merge
174  val rdata = VecInit((0 until XLEN / 8).map(j =>
175    Mux(forwardMask(j), forwardData(j), io.dcacheResp.bits.data(8*(j+1)-1, 8*j)))).asUInt
176  val rdataSel = LookupTree(s2_paddr(2, 0), List(
177    "b000".U -> rdata(63, 0),
178    "b001".U -> rdata(63, 8),
179    "b010".U -> rdata(63, 16),
180    "b011".U -> rdata(63, 24),
181    "b100".U -> rdata(63, 32),
182    "b101".U -> rdata(63, 40),
183    "b110".U -> rdata(63, 48),
184    "b111".U -> rdata(63, 56)
185  ))
186  val rdataPartialLoad = LookupTree(s2_uop.ctrl.fuOpType, List(
187      LSUOpType.lb   -> SignExt(rdataSel(7, 0) , XLEN),
188      LSUOpType.lh   -> SignExt(rdataSel(15, 0), XLEN),
189      LSUOpType.lw   -> SignExt(rdataSel(31, 0), XLEN),
190      LSUOpType.ld   -> SignExt(rdataSel(63, 0), XLEN),
191      LSUOpType.lbu  -> ZeroExt(rdataSel(7, 0) , XLEN),
192      LSUOpType.lhu  -> ZeroExt(rdataSel(15, 0), XLEN),
193      LSUOpType.lwu  -> ZeroExt(rdataSel(31, 0), XLEN)
194  ))
195
196  // TODO: ECC check
197
198  io.out.valid := io.in.valid && !s2_uop.needFlush(io.redirect)
199  io.out.bits := io.in.bits
200  io.out.bits.data := rdataPartialLoad
201  io.out.bits.miss := s2_cache_miss && !fullForward
202
203  io.in.ready := io.out.ready || !io.in.valid
204
205}
206
207
208class LoadUnit extends XSModule {
209  val io = IO(new Bundle() {
210    val ldin = Flipped(Decoupled(new ExuInput))
211    val ldout = Decoupled(new ExuOutput)
212    val redirect = Flipped(ValidIO(new Redirect))
213    val tlbFeedback = ValidIO(new TlbFeedback)
214    val dcache = new DCacheLoadIO
215    val dtlb = new TlbRequestIO()
216    val sbuffer = new LoadForwardQueryIO
217    val lsroq = new LoadToLsroqIO
218  })
219
220  val load_s0 = Module(new LoadUnit_S0)
221  val load_s1 = Module(new LoadUnit_S1)
222  val load_s2 = Module(new LoadUnit_S2)
223
224  load_s0.io.in <> io.ldin
225  load_s0.io.redirect <> io.redirect
226  load_s0.io.dtlbReq <> io.dtlb.req
227  load_s0.io.dcacheReq <> io.dcache.req
228
229  PipelineConnect(load_s0.io.out, load_s1.io.in, load_s1.io.out.fire(), false.B)
230
231  io.dcache.s1_paddr := load_s1.io.out.bits.paddr
232  load_s1.io.redirect <> io.redirect
233  load_s1.io.tlbFeedback <> io.tlbFeedback
234  load_s1.io.dtlbResp <> io.dtlb.resp
235  load_s1.io.s1_kill <> io.dcache.s1_kill
236  io.sbuffer <> load_s1.io.forward
237  io.lsroq.forward <> load_s1.io.forward
238
239  PipelineConnect(load_s1.io.out, load_s2.io.in, load_s2.io.out.fire(), false.B)
240
241  load_s2.io.redirect <> io.redirect
242  load_s2.io.dcacheResp <> io.dcache.resp
243  load_s2.io.sbuffer.forwardMask := io.sbuffer.forwardMask
244  load_s2.io.sbuffer.forwardData := io.sbuffer.forwardData
245  load_s2.io.lsroq.forwardMask := io.lsroq.forward.forwardMask
246  load_s2.io.lsroq.forwardData := io.lsroq.forward.forwardData
247
248  XSDebug(load_s0.io.out.valid,
249    p"S0: pc ${Hexadecimal(load_s0.io.out.bits.uop.cf.pc)}, " +
250    p"vaddr ${Hexadecimal(load_s0.io.out.bits.vaddr)}, mask ${Hexadecimal(load_s0.io.out.bits.mask)}\n")
251  XSDebug(load_s1.io.out.valid,
252    p"S1: pc ${Hexadecimal(load_s1.io.out.bits.uop.cf.pc)}, tlb_miss ${io.dtlb.resp.bits.miss}, " +
253    p"paddr ${Hexadecimal(load_s1.io.out.bits.paddr)}, mmio ${load_s1.io.out.bits.mmio}")
254
255  // writeback to LSROQ
256  // Current dcache use MSHR
257  io.lsroq.loadIn.valid := load_s2.io.out.valid
258  io.lsroq.loadIn.bits := load_s2.io.out.bits
259
260  val hitLoadOut = Wire(Valid(new ExuOutput))
261  hitLoadOut.valid := load_s2.io.out.valid && !load_s2.io.out.bits.miss
262  hitLoadOut.bits.uop := load_s2.io.out.bits.uop
263  hitLoadOut.bits.data := load_s2.io.out.bits.data
264  hitLoadOut.bits.redirectValid := false.B
265  hitLoadOut.bits.redirect := DontCare
266  hitLoadOut.bits.brUpdate := DontCare
267  hitLoadOut.bits.debug.isMMIO := load_s2.io.out.bits.mmio
268
269  // TODO: arbiter
270  // if hit, writeback result to CDB
271  // val ldout = Vec(2, Decoupled(new ExuOutput))
272  // when io.loadIn(i).fire() && !io.io.loadIn(i).miss, commit load to cdb
273  // val cdbArb = Module(new Arbiter(new ExuOutput, 2))
274  // io.ldout <> cdbArb.io.out
275  // hitLoadOut <> cdbArb.io.in(0)
276  // io.lsroq.ldout <> cdbArb.io.in(1) // missLoadOut
277  load_s2.io.out.ready := true.B
278  io.lsroq.ldout.ready := !hitLoadOut.valid
279  io.ldout.bits := Mux(load_s2.io.out.ready, hitLoadOut.bits, io.lsroq.ldout.bits)
280  io.ldout.valid := hitLoadOut.valid || io.lsroq.ldout.valid
281
282  when(io.ldout.fire()){
283    XSDebug("ldout %x iw %x fw %x\n", io.ldout.bits.uop.cf.pc, io.ldout.bits.uop.ctrl.rfWen, io.ldout.bits.uop.ctrl.fpWen)
284  }
285}
286