xref: /XiangShan/src/main/scala/xiangshan/mem/pipeline/AtomicsUnit.scala (revision 94aa21c6009c2f39c5c5dae9c87260c78887efcc)
1/***************************************************************************************
2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3* Copyright (c) 2020-2021 Peng Cheng Laboratory
4*
5* XiangShan is licensed under Mulan PSL v2.
6* You can use this software according to the terms and conditions of the Mulan PSL v2.
7* You may obtain a copy of Mulan PSL v2 at:
8*          http://license.coscl.org.cn/MulanPSL2
9*
10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13*
14* See the Mulan PSL v2 for more details.
15***************************************************************************************/
16
17package xiangshan.mem
18
19import org.chipsalliance.cde.config.Parameters
20import chisel3._
21import chisel3.util._
22import utils._
23import utility._
24import xiangshan._
25import xiangshan.cache.{AtomicWordIO, HasDCacheParameters, MemoryOpConstants}
26import xiangshan.cache.mmu.{TlbCmd, TlbRequestIO}
27import difftest._
28import xiangshan.ExceptionNO._
29import xiangshan.backend.fu.PMPRespBundle
30import xiangshan.backend.fu.FuType
31import xiangshan.backend.Bundles.{MemExuInput, MemExuOutput}
32import xiangshan.backend.fu.NewCSR.TriggerUtil
33import xiangshan.backend.fu.util.SdtrigExt
34import xiangshan.cache.mmu.Pbmt
35
36class AtomicsUnit(implicit p: Parameters) extends XSModule
37  with MemoryOpConstants
38  with HasDCacheParameters
39  with SdtrigExt{
40
41  val StdCnt  = backendParams.StdCnt
42
43  val io = IO(new Bundle() {
44    val hartId        = Input(UInt(hartIdLen.W))
45    val in            = Flipped(Decoupled(new MemExuInput))
46    val storeDataIn   = Flipped(Vec(StdCnt, Valid(new MemExuOutput)))
47    val out           = Decoupled(new MemExuOutput)
48    val dcache        = new AtomicWordIO
49    val dtlb          = new TlbRequestIO(2)
50    val pmpResp       = Flipped(new PMPRespBundle())
51    val flush_sbuffer = new SbufferFlushBundle
52    val feedbackSlow  = ValidIO(new RSFeedback)
53    val redirect      = Flipped(ValidIO(new Redirect))
54    val exceptionInfo = ValidIO(new Bundle {
55      val vaddr = UInt(XLEN.W)
56      val gpaddr = UInt(XLEN.W)
57      val isForVSnonLeafPTE = Bool()
58    })
59    val csrCtrl       = Flipped(new CustomCSRCtrlIO)
60  })
61
62  //-------------------------------------------------------
63  // Atomics Memory Accsess FSM
64  //-------------------------------------------------------
65  val s_invalid :: s_tlb_and_flush_sbuffer_req :: s_pm :: s_wait_flush_sbuffer_resp :: s_cache_req :: s_cache_resp :: s_cache_resp_latch :: s_finish :: s_finish2 :: Nil = Enum(9)
66  val state = RegInit(s_invalid)
67  val out_valid = RegInit(false.B)
68  val data_valid = RegInit(false.B)
69
70  val uop = Reg(io.in.bits.uop.cloneType)
71  val isLr = LSUOpType.isLr(uop.fuOpType)
72  val isSc = LSUOpType.isSc(uop.fuOpType)
73  val isAMOCAS = LSUOpType.isAMOCAS(uop.fuOpType)
74  val isNotLr = !isLr
75  val isNotSc = !isSc
76  // AMOCAS.Q needs to write two int registers, therefore backend issues two sta uops for AMOCAS.Q.
77  // `pdest2` is used to record the pdest of the second uop
78  val pdest1, pdest2 = Reg(UInt(PhyRegIdxWidth.W))
79  val pdest1Valid, pdest2Valid = RegInit(false.B)
80  /**
81    * The # of std uops that an atomic instruction require:
82    * (1) For AMOs (except AMOCAS) and LR/SC, 1 std uop is wanted: X(rs2) with uopIdx = 0
83    * (2) For AMOCAS.W/D, 2 std uops are wanted: X(rd), X(rs2) with uopIdx = 0, 1
84    * (3) For AMOCAS.Q, 4 std uops are wanted: X(rd), X(rs2), X(rd+1), X(rs2+1) with uopIdx = 0, 1, 2, 3
85    * stds are not needed for write-back.
86    *
87    * The # of sta uops that an atomic instruction require, also the # of write-back:
88    * (1) For AMOs(except AMOCAS.Q) and LR/SC, 1 sta uop is wanted: X(rs1) with uopIdx = 0
89    * (2) For AMOCAS.Q, 2 sta uop is wanted: X(rs1)*2 with uopIdx = 0, 2
90    */
91  val rs1, rs2_l, rs2_h, rd_l, rd_h = Reg(UInt(XLEN.W))
92  val stds = Seq(rd_l, rs2_l, rd_h, rs2_h)
93  val rs2 = Cat(rs2_h, Mux(isAMOCAS, rs2_l, stds.head))
94  val rd = Cat(rd_h, rd_l)
95  val stdCnt = RegInit(0.U(log2Ceil(stds.length + 1).W))
96
97  val exceptionVec = RegInit(0.U.asTypeOf(ExceptionVec()))
98  val trigger = RegInit(TriggerAction.None)
99  val atom_override_xtval = RegInit(false.B)
100  val have_sent_first_tlb_req = RegInit(false.B)
101  // paddr after translation
102  val paddr = Reg(UInt())
103  val gpaddr = Reg(UInt())
104  val vaddr = rs1
105
106  val is_mmio = Reg(Bool())
107  val is_nc = RegInit(false.B)
108  val isForVSnonLeafPTE = Reg(Bool())
109
110  // dcache response data
111  val resp_data = Reg(UInt())
112  val resp_data_wire = WireInit(0.U)
113  val success = Reg(Bool())
114  // sbuffer is empty or not
115  val sbuffer_empty = io.flush_sbuffer.empty
116
117  // Only the least significant AMOFuOpWidth = 6 bits of fuOpType are used,
118  // therefore the MSBs are reused to identify uopIdx
119  val stdUopIdxs = io.storeDataIn.map(_.bits.uop.fuOpType >> LSUOpType.AMOFuOpWidth)
120  val staUopIdx = io.in.bits.uop.fuOpType >> LSUOpType.AMOFuOpWidth
121
122  // assign default value to output signals
123  io.in.ready          := false.B
124
125  io.dcache.req.valid  := false.B
126  io.dcache.req.bits   := DontCare
127
128  io.dtlb.req.valid    := false.B
129  io.dtlb.req.bits     := DontCare
130  io.dtlb.req_kill     := false.B
131  io.dtlb.resp.ready   := true.B
132
133  io.flush_sbuffer.valid := false.B
134
135  when (state === s_invalid) {
136    when (io.in.fire) {
137      uop := io.in.bits.uop
138      rs1 := io.in.bits.src_rs1
139      state := s_tlb_and_flush_sbuffer_req
140      have_sent_first_tlb_req := false.B
141    }
142  }
143
144  when (io.in.fire) {
145    val pdest = io.in.bits.uop.pdest
146    when (staUopIdx === 0.U) {
147      pdest1Valid := true.B
148      pdest1 := pdest
149    }.elsewhen (staUopIdx === 2.U) {
150      pdest2Valid := true.B
151      pdest2 := pdest
152    }.otherwise {
153      assert(false.B, "unrecognized sta uopIdx")
154    }
155  }
156
157  stds.zipWithIndex.foreach { case (data, i) =>
158    val sels = io.storeDataIn.zip(stdUopIdxs).map { case (in, uopIdx) =>
159      val sel = in.fire && uopIdx === i.U
160      when (sel) { data := in.bits.data }
161      sel
162    }
163    OneHot.checkOneHot(sels)
164  }
165  stdCnt := stdCnt + PopCount(io.storeDataIn.map(_.fire))
166
167  val StdCntNCAS = 1 // LR/SC and AMO need only 1 src besides rs1
168  val StdCntCASWD = 2 // AMOCAS.W/D needs 2 src regs (rs2 and rd) besides rs1
169  val StdCntCASQ = 4 // AMOCAS.Q needs 4 src regs (rs2, rs2+1, rd, rd+1) besides rs1
170  when (!data_valid) {
171    data_valid := state =/= s_invalid && (
172      LSUOpType.isAMOCASQ(uop.fuOpType) && stdCnt === StdCntCASQ.U ||
173      LSUOpType.isAMOCASWD(uop.fuOpType) && stdCnt === StdCntCASWD.U ||
174      !isAMOCAS && stdCnt === StdCntNCAS.U
175    )
176  }
177  assert(stdCnt <= stds.length.U, "unexpected std")
178  assert(!(Cat(io.storeDataIn.map(_.fire)).orR && data_valid), "atomic unit re-receive data")
179
180  // atomic trigger
181  val csrCtrl = io.csrCtrl
182  val tdata = Reg(Vec(TriggerNum, new MatchTriggerIO))
183  val tEnableVec = RegInit(VecInit(Seq.fill(TriggerNum)(false.B)))
184  tEnableVec := csrCtrl.mem_trigger.tEnableVec
185  when (csrCtrl.mem_trigger.tUpdate.valid) {
186    tdata(csrCtrl.mem_trigger.tUpdate.bits.addr) := csrCtrl.mem_trigger.tUpdate.bits.tdata
187  }
188
189  val debugMode = csrCtrl.mem_trigger.debugMode
190  val triggerCanRaiseBpExp = csrCtrl.mem_trigger.triggerCanRaiseBpExp
191  val backendTriggerTimingVec = VecInit(tdata.map(_.timing))
192  val backendTriggerChainVec = VecInit(tdata.map(_.chain))
193  val backendTriggerHitVec = WireInit(VecInit(Seq.fill(TriggerNum)(false.B)))
194  val backendTriggerCanFireVec = RegInit(VecInit(Seq.fill(TriggerNum)(false.B)))
195
196  assert(state === s_invalid ||
197    uop.fuOpType(1,0) === "b10".U ||
198    uop.fuOpType(1,0) === "b11".U ||
199    LSUOpType.isAMOCASQ(uop.fuOpType),
200    "Only word or doubleword or quadword is supported"
201  )
202
203  // store trigger
204  val store_hit = Wire(Vec(TriggerNum, Bool()))
205  for (j <- 0 until TriggerNum) {
206    store_hit(j) := !tdata(j).select && !debugMode && isNotLr && TriggerCmp(
207      vaddr,
208      tdata(j).tdata2,
209      tdata(j).matchType,
210      tEnableVec(j) && tdata(j).store
211    )
212  }
213  // load trigger
214  val load_hit = Wire(Vec(TriggerNum, Bool()))
215  for (j <- 0 until TriggerNum) {
216    load_hit(j) := !tdata(j).select && !debugMode && isNotSc && TriggerCmp(
217      vaddr,
218      tdata(j).tdata2,
219      tdata(j).matchType,
220      tEnableVec(j) && tdata(j).load
221    )
222  }
223  backendTriggerHitVec := store_hit.zip(load_hit).map { case (sh, lh) => sh || lh }
224  // triggerCanFireVec will update at T+1
225  TriggerCheckCanFire(TriggerNum, backendTriggerCanFireVec, backendTriggerHitVec,
226    backendTriggerTimingVec, backendTriggerChainVec)
227
228  val actionVec = VecInit(tdata.map(_.action))
229  val triggerAction = Wire(TriggerAction())
230  TriggerUtil.triggerActionGen(triggerAction, backendTriggerCanFireVec, actionVec, triggerCanRaiseBpExp)
231  val triggerDebugMode = TriggerAction.isDmode(triggerAction)
232  val triggerBreakpoint = TriggerAction.isExp(triggerAction)
233
234  // tlb translation, manipulating signals && deal with exception
235  // at the same time, flush sbuffer
236  when (state === s_tlb_and_flush_sbuffer_req) {
237    // do not accept tlb resp in the first cycle
238    // this limition is for hw prefetcher
239    // when !have_sent_first_tlb_req, tlb resp may come from hw prefetch
240    have_sent_first_tlb_req := true.B
241
242    when (io.dtlb.resp.fire && have_sent_first_tlb_req) {
243      paddr   := io.dtlb.resp.bits.paddr(0)
244      gpaddr  := io.dtlb.resp.bits.gpaddr(0)
245      vaddr   := io.dtlb.resp.bits.fullva
246      isForVSnonLeafPTE := io.dtlb.resp.bits.isForVSnonLeafPTE
247      // exception handling
248      val addrAligned = LookupTree(uop.fuOpType(1,0), List(
249        "b10".U -> (vaddr(1,0) === 0.U), // W
250        "b11".U -> (vaddr(2,0) === 0.U), // D
251        "b00".U -> (vaddr(3,0) === 0.U)  // Q
252      ))
253      exceptionVec(loadAddrMisaligned)  := !addrAligned && isLr
254      exceptionVec(storeAddrMisaligned) := !addrAligned && !isLr
255      exceptionVec(storePageFault)      := io.dtlb.resp.bits.excp(0).pf.st
256      exceptionVec(loadPageFault)       := io.dtlb.resp.bits.excp(0).pf.ld
257      exceptionVec(storeAccessFault)    := io.dtlb.resp.bits.excp(0).af.st
258      exceptionVec(loadAccessFault)     := io.dtlb.resp.bits.excp(0).af.ld
259      exceptionVec(storeGuestPageFault) := io.dtlb.resp.bits.excp(0).gpf.st
260      exceptionVec(loadGuestPageFault)  := io.dtlb.resp.bits.excp(0).gpf.ld
261
262      exceptionVec(breakPoint) := triggerBreakpoint
263      trigger                  := triggerAction
264
265      when (!io.dtlb.resp.bits.miss) {
266        is_nc := Pbmt.isNC(io.dtlb.resp.bits.pbmt(0))
267        io.out.bits.uop.debugInfo.tlbRespTime := GTimer()
268        when (!addrAligned || triggerDebugMode || triggerBreakpoint) {
269          // NOTE: when addrAligned or trigger fire, do not need to wait tlb actually
270          // check for miss aligned exceptions, tlb exception are checked next cycle for timing
271          // if there are exceptions, no need to execute it
272          state := s_finish
273          out_valid := true.B
274          atom_override_xtval := true.B
275        }.otherwise {
276          state := s_pm
277        }
278      }
279    }
280  }
281
282  val pbmtReg = RegEnable(io.dtlb.resp.bits.pbmt(0), io.dtlb.resp.fire && !io.dtlb.resp.bits.miss)
283  when (state === s_pm) {
284    val pmp = WireInit(io.pmpResp)
285    is_mmio := Pbmt.isIO(pbmtReg) || (Pbmt.isPMA(pbmtReg) && pmp.mmio)
286
287    // NOTE: only handle load/store exception here, if other exception happens, don't send here
288    val exception_va = exceptionVec(storePageFault) || exceptionVec(loadPageFault) ||
289      exceptionVec(storeGuestPageFault) || exceptionVec(loadGuestPageFault) ||
290      exceptionVec(storeAccessFault) || exceptionVec(loadAccessFault)
291    val exception_pa = pmp.st || pmp.ld || pmp.mmio
292    when (exception_va || exception_pa) {
293      state := s_finish
294      out_valid := true.B
295      atom_override_xtval := true.B
296    }.otherwise {
297      // if sbuffer has been flushed, go to query dcache, otherwise wait for sbuffer.
298      state := Mux(sbuffer_empty, s_cache_req, s_wait_flush_sbuffer_resp);
299    }
300    // update storeAccessFault bit
301    exceptionVec(loadAccessFault) := exceptionVec(loadAccessFault) || (pmp.ld || pmp.mmio) && isLr
302    exceptionVec(storeAccessFault) := exceptionVec(storeAccessFault) || pmp.st || (pmp.ld || pmp.mmio) && !isLr
303  }
304
305  when (state === s_wait_flush_sbuffer_resp) {
306    when (sbuffer_empty) {
307      state := s_cache_req
308    }
309  }
310
311  def genWdataAMO(data: UInt, sizeEncode: UInt): UInt = {
312    LookupTree(sizeEncode(1, 0), List(
313      "b10".U -> Fill(4, data(31, 0)),
314      "b11".U -> Fill(2, data(63, 0)),
315      "b00".U -> data(127, 0)
316    ))
317  }
318
319  def genWmaskAMO(addr: UInt, sizeEncode: UInt): UInt = {
320    /**
321      * `MainPipeReq` uses `word_idx` to recognize which 64-bits data bank to operate on. Double-word atomics are
322      * always 8B aligned and quad-word atomics are always 16B aligned except for misaligned exception, therefore
323      * `word_idx` is enough and there is no need to shift according address. Only word atomics needs LSBs of the
324      * address to shift mask inside a 64-bits aligned range.
325      */
326    LookupTree(sizeEncode(1, 0), List(
327      "b10".U -> (0xf.U << addr(2,0)), // W
328      "b11".U -> 0xff.U, // D
329      "b00".U -> 0xffff.U // Q
330    ))
331  }
332
333  when (state === s_cache_req) {
334    when (io.dcache.req.fire) {
335      state := s_cache_resp
336    }
337  }
338
339  val dcache_resp_data  = Reg(UInt())
340  val dcache_resp_id    = Reg(UInt())
341  val dcache_resp_error = Reg(Bool())
342
343  when (state === s_cache_resp) {
344    // when not miss
345    // everything is OK, simply send response back to sbuffer
346    // when miss and not replay
347    // wait for missQueue to handling miss and replaying our request
348    // when miss and replay
349    // req missed and fail to enter missQueue, manually replay it later
350    // TODO: add assertions:
351    // 1. add a replay delay counter?
352    // 2. when req gets into MissQueue, it should not miss any more
353    when (io.dcache.resp.fire) {
354      when (io.dcache.resp.bits.miss) {
355        when (io.dcache.resp.bits.replay) {
356          state := s_cache_req
357        }
358      }.otherwise {
359        dcache_resp_data := io.dcache.resp.bits.data
360        dcache_resp_id := io.dcache.resp.bits.id
361        dcache_resp_error := io.dcache.resp.bits.error
362        state := s_cache_resp_latch
363      }
364    }
365  }
366
367  when (state === s_cache_resp_latch) {
368    success := dcache_resp_id
369    val rdataSel = Mux(
370      paddr(2, 0) === 0.U,
371      dcache_resp_data,
372      dcache_resp_data >> 32
373    )
374    assert(paddr(2, 0) === "b000".U || paddr(2, 0) === "b100".U)
375
376    resp_data_wire := Mux(
377      isSc,
378      dcache_resp_data,
379      LookupTree(uop.fuOpType(1,0), List(
380        "b10".U -> SignExt(rdataSel(31, 0), QuadWordBits), // W
381        "b11".U -> SignExt(rdataSel(63, 0), QuadWordBits), // D
382        "b00".U -> rdataSel // Q
383      ))
384    )
385
386    when (dcache_resp_error && io.csrCtrl.cache_error_enable) {
387      exceptionVec(loadAccessFault)  := isLr
388      exceptionVec(storeAccessFault) := !isLr
389      assert(!exceptionVec(loadAccessFault))
390      assert(!exceptionVec(storeAccessFault))
391    }
392
393    resp_data := resp_data_wire
394    state := s_finish
395    out_valid := true.B
396  }
397
398  when (state === s_finish) {
399    when (io.out.fire) {
400      when (LSUOpType.isAMOCASQ(uop.fuOpType)) {
401        // enter `s_finish2` to write the 2nd uop back
402        state := s_finish2
403        out_valid := true.B
404      }.otherwise {
405        // otherwise the FSM ends here
406        resetFSM()
407      }
408    }
409  }
410
411  when (state === s_finish2) {
412    when (io.out.fire) {
413      resetFSM()
414    }
415  }
416
417  when (io.redirect.valid) {
418    atom_override_xtval := false.B
419  }
420
421  def resetFSM(): Unit = {
422    state := s_invalid
423    out_valid := false.B
424    data_valid := false.B
425    stdCnt := 0.U
426    pdest1Valid := false.B
427    pdest2Valid := false.B
428  }
429
430  /**
431    * IO assignment
432    */
433  io.exceptionInfo.valid := atom_override_xtval
434  io.exceptionInfo.bits.vaddr := vaddr
435  io.exceptionInfo.bits.gpaddr := gpaddr
436  io.exceptionInfo.bits.isForVSnonLeafPTE := isForVSnonLeafPTE
437
438  // Send TLB feedback to store issue queue
439  // we send feedback right after we receives request
440  // also, we always treat amo as tlb hit
441  // since we will continue polling tlb all by ourself
442  io.feedbackSlow.valid       := GatedValidRegNext(GatedValidRegNext(io.in.valid))
443  io.feedbackSlow.bits.hit    := true.B
444  io.feedbackSlow.bits.robIdx  := RegEnable(io.in.bits.uop.robIdx, io.in.valid)
445  io.feedbackSlow.bits.sqIdx   := RegEnable(io.in.bits.uop.sqIdx, io.in.valid)
446  io.feedbackSlow.bits.lqIdx   := RegEnable(io.in.bits.uop.lqIdx, io.in.valid)
447  io.feedbackSlow.bits.flushState := DontCare
448  io.feedbackSlow.bits.sourceType := DontCare
449  io.feedbackSlow.bits.dataInvalidSqIdx := DontCare
450
451  // send req to dtlb
452  // keep firing until tlb hit
453  io.dtlb.req.valid       := state === s_tlb_and_flush_sbuffer_req
454  io.dtlb.req.bits.vaddr  := vaddr
455  io.dtlb.req.bits.fullva := vaddr
456  io.dtlb.req.bits.checkfullva := true.B
457  io.dtlb.resp.ready      := true.B
458  io.dtlb.req.bits.cmd    := Mux(isLr, TlbCmd.atom_read, TlbCmd.atom_write)
459  io.dtlb.req.bits.debug.pc := uop.pc
460  io.dtlb.req.bits.debug.robIdx := uop.robIdx
461  io.dtlb.req.bits.debug.isFirstIssue := false.B
462  io.out.bits.uop.debugInfo.tlbFirstReqTime := GTimer() // FIXME lyq: it will be always assigned
463
464  // send req to sbuffer to flush it if it is not empty
465  io.flush_sbuffer.valid := !sbuffer_empty && state === s_tlb_and_flush_sbuffer_req
466
467  // When is sta issue port ready:
468  // (1) AtomicsUnit is idle, or
469  // (2) For AMOCAS.Q, the second uop with the pdest of the higher bits of rd is not received yet
470  io.in.ready := state === s_invalid || LSUOpType.isAMOCASQ(uop.fuOpType) && (!pdest2Valid || !pdest1Valid)
471
472  io.out.valid := out_valid && Mux(state === s_finish2, pdest2Valid, pdest1Valid)
473  XSError((state === s_finish || state === s_finish2) =/= out_valid, "out_valid reg error\n")
474  io.out.bits := DontCare
475  io.out.bits.uop := uop
476  io.out.bits.uop.fuType := FuType.mou.U
477  io.out.bits.uop.pdest := Mux(state === s_finish2, pdest2, pdest1)
478  io.out.bits.uop.exceptionVec := exceptionVec
479  io.out.bits.uop.trigger := trigger
480  io.out.bits.data := Mux(state === s_finish2, resp_data >> XLEN, resp_data)
481  io.out.bits.debug.isMMIO := is_mmio
482  io.out.bits.debug.paddr := paddr
483
484  io.dcache.req.valid := Mux(
485    io.dcache.req.bits.cmd === M_XLR,
486    !io.dcache.block_lr, // block lr to survive in lr storm
487    data_valid // wait until src(1) is ready
488  ) && state === s_cache_req
489  val pipe_req = io.dcache.req.bits
490  pipe_req := DontCare
491  pipe_req.cmd := LookupTree(uop.fuOpType, List(
492    // TODO: optimize this
493    LSUOpType.lr_w      -> M_XLR,
494    LSUOpType.sc_w      -> M_XSC,
495    LSUOpType.amoswap_w -> M_XA_SWAP,
496    LSUOpType.amoadd_w  -> M_XA_ADD,
497    LSUOpType.amoxor_w  -> M_XA_XOR,
498    LSUOpType.amoand_w  -> M_XA_AND,
499    LSUOpType.amoor_w   -> M_XA_OR,
500    LSUOpType.amomin_w  -> M_XA_MIN,
501    LSUOpType.amomax_w  -> M_XA_MAX,
502    LSUOpType.amominu_w -> M_XA_MINU,
503    LSUOpType.amomaxu_w -> M_XA_MAXU,
504    LSUOpType.amocas_w  -> M_XA_CASW,
505
506    LSUOpType.lr_d      -> M_XLR,
507    LSUOpType.sc_d      -> M_XSC,
508    LSUOpType.amoswap_d -> M_XA_SWAP,
509    LSUOpType.amoadd_d  -> M_XA_ADD,
510    LSUOpType.amoxor_d  -> M_XA_XOR,
511    LSUOpType.amoand_d  -> M_XA_AND,
512    LSUOpType.amoor_d   -> M_XA_OR,
513    LSUOpType.amomin_d  -> M_XA_MIN,
514    LSUOpType.amomax_d  -> M_XA_MAX,
515    LSUOpType.amominu_d -> M_XA_MINU,
516    LSUOpType.amomaxu_d -> M_XA_MAXU,
517    LSUOpType.amocas_d  -> M_XA_CASD,
518
519    LSUOpType.amocas_q  -> M_XA_CASQ
520  ))
521  pipe_req.miss := false.B
522  pipe_req.probe := false.B
523  pipe_req.probe_need_data := false.B
524  pipe_req.source := AMO_SOURCE.U
525  pipe_req.addr   := get_block_addr(paddr)
526  pipe_req.vaddr  := get_block_addr(vaddr)
527  pipe_req.word_idx  := get_word(paddr)
528  pipe_req.amo_data := genWdataAMO(rs2, uop.fuOpType)
529  pipe_req.amo_mask := genWmaskAMO(paddr, uop.fuOpType)
530  pipe_req.amo_cmp  := genWdataAMO(rd, uop.fuOpType)
531
532  if (env.EnableDifftest) {
533    val difftest = DifftestModule(new DiffAtomicEvent)
534    val en = io.dcache.req.fire
535    difftest.coreid := io.hartId
536    difftest.valid  := state === s_cache_resp_latch
537    difftest.addr   := RegEnable(paddr, en)
538    difftest.data   := RegEnable(io.dcache.req.bits.amo_data.asTypeOf(difftest.data), en)
539    difftest.mask   := RegEnable(io.dcache.req.bits.amo_mask, en)
540    difftest.cmp    := RegEnable(io.dcache.req.bits.amo_cmp.asTypeOf(difftest.cmp), en)
541    difftest.fuop   := RegEnable(uop.fuOpType, en)
542    difftest.out    := resp_data_wire.asTypeOf(difftest.out)
543  }
544
545  if (env.EnableDifftest || env.AlwaysBasicDiff) {
546    val uop = io.out.bits.uop
547    val difftest = DifftestModule(new DiffLrScEvent)
548    difftest.coreid := io.hartId
549    difftest.valid := io.out.fire && state === s_finish && isSc
550    difftest.success := success
551  }
552}
553