xref: /XiangShan/src/main/scala/xiangshan/mem/pipeline/AtomicsUnit.scala (revision 6520f4f4e11d69821766caf92d4f93020d85ac25)
1/***************************************************************************************
2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3* Copyright (c) 2020-2021 Peng Cheng Laboratory
4*
5* XiangShan is licensed under Mulan PSL v2.
6* You can use this software according to the terms and conditions of the Mulan PSL v2.
7* You may obtain a copy of Mulan PSL v2 at:
8*          http://license.coscl.org.cn/MulanPSL2
9*
10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13*
14* See the Mulan PSL v2 for more details.
15***************************************************************************************/
16
17package xiangshan.mem
18
19import org.chipsalliance.cde.config.Parameters
20import chisel3._
21import chisel3.util._
22import utils._
23import utility._
24import xiangshan._
25import xiangshan.cache.{AtomicWordIO, HasDCacheParameters, MemoryOpConstants}
26import xiangshan.cache.mmu.{TlbCmd, TlbRequestIO}
27import difftest._
28import xiangshan.ExceptionNO._
29import xiangshan.backend.fu.PMPRespBundle
30import xiangshan.backend.fu.FuType
31import xiangshan.backend.Bundles.{MemExuInput, MemExuOutput}
32import xiangshan.backend.fu.NewCSR.TriggerUtil
33import xiangshan.backend.fu.util.SdtrigExt
34import xiangshan.cache.mmu.Pbmt
35
36class AtomicsUnit(implicit p: Parameters) extends XSModule
37  with MemoryOpConstants
38  with HasDCacheParameters
39  with SdtrigExt{
40
41  val StdCnt  = backendParams.StdCnt
42
43  val io = IO(new Bundle() {
44    val hartId        = Input(UInt(hartIdLen.W))
45    val in            = Flipped(Decoupled(new MemExuInput))
46    val storeDataIn   = Flipped(Vec(StdCnt, Valid(new MemExuOutput)))
47    val out           = Decoupled(new MemExuOutput)
48    val dcache        = new AtomicWordIO
49    val dtlb          = new TlbRequestIO(2)
50    val pmpResp       = Flipped(new PMPRespBundle())
51    val flush_sbuffer = new SbufferFlushBundle
52    val feedbackSlow  = ValidIO(new RSFeedback)
53    val redirect      = Flipped(ValidIO(new Redirect))
54    val exceptionInfo = ValidIO(new Bundle {
55      val vaddr = UInt(XLEN.W)
56      val gpaddr = UInt(XLEN.W)
57      val isForVSnonLeafPTE = Bool()
58    })
59    val csrCtrl       = Flipped(new CustomCSRCtrlIO)
60  })
61
62  //-------------------------------------------------------
63  // Atomics Memory Accsess FSM
64  //-------------------------------------------------------
65  val s_invalid :: s_tlb_and_flush_sbuffer_req :: s_pm :: s_wait_flush_sbuffer_resp :: s_cache_req :: s_cache_resp :: s_cache_resp_latch :: s_finish :: s_finish2 :: Nil = Enum(9)
66  val state = RegInit(s_invalid)
67  val out_valid = RegInit(false.B)
68  val data_valid = RegInit(false.B)
69
70  val uop = Reg(io.in.bits.uop.cloneType)
71  val isLr = LSUOpType.isLr(uop.fuOpType)
72  val isSc = LSUOpType.isSc(uop.fuOpType)
73  val isAMOCAS = LSUOpType.isAMOCAS(uop.fuOpType)
74  val isNotLr = !isLr
75  val isNotSc = !isSc
76  // AMOCAS.Q needs to write two int registers, therefore backend issues two sta uops for AMOCAS.Q.
77  // `pdest2` is used to record the pdest of the second uop
78  val pdest1, pdest2 = Reg(UInt(PhyRegIdxWidth.W))
79  val pdest1Valid, pdest2Valid = RegInit(false.B)
80  /**
81    * The # of std uops that an atomic instruction require:
82    * (1) For AMOs (except AMOCAS) and LR/SC, 1 std uop is wanted: X(rs2) with uopIdx = 0
83    * (2) For AMOCAS.W/D, 2 std uops are wanted: X(rd), X(rs2) with uopIdx = 0, 1
84    * (3) For AMOCAS.Q, 4 std uops are wanted: X(rd), X(rs2), X(rd+1), X(rs2+1) with uopIdx = 0, 1, 2, 3
85    * stds are not needed for write-back.
86    *
87    * The # of sta uops that an atomic instruction require, also the # of write-back:
88    * (1) For AMOs(except AMOCAS.Q) and LR/SC, 1 sta uop is wanted: X(rs1) with uopIdx = 0
89    * (2) For AMOCAS.Q, 2 sta uop is wanted: X(rs1)*2 with uopIdx = 0, 2
90    */
91  val rs1, rs2_l, rs2_h, rd_l, rd_h = Reg(UInt(XLEN.W))
92  val stds = Seq(rd_l, rs2_l, rd_h, rs2_h)
93  val rs2 = Cat(rs2_h, Mux(isAMOCAS, rs2_l, stds.head))
94  val rd = Cat(rd_h, rd_l)
95  val stdCnt = RegInit(0.U(log2Ceil(stds.length + 1).W))
96
97  val exceptionVec = RegInit(0.U.asTypeOf(ExceptionVec()))
98  val trigger = RegInit(TriggerAction.None)
99  val atom_override_xtval = RegInit(false.B)
100  val have_sent_first_tlb_req = RegInit(false.B)
101  // paddr after translation
102  val paddr = Reg(UInt())
103  val gpaddr = Reg(UInt())
104  val vaddr = rs1
105
106  val is_mmio = Reg(Bool())
107  val isForVSnonLeafPTE = Reg(Bool())
108
109  // dcache response data
110  val resp_data = Reg(UInt())
111  val resp_data_wire = WireInit(0.U)
112  val success = Reg(Bool())
113  // sbuffer is empty or not
114  val sbuffer_empty = io.flush_sbuffer.empty
115
116  // Only the least significant AMOFuOpWidth = 6 bits of fuOpType are used,
117  // therefore the MSBs are reused to identify uopIdx
118  val stdUopIdxs = io.storeDataIn.map(_.bits.uop.fuOpType >> LSUOpType.AMOFuOpWidth)
119  val staUopIdx = io.in.bits.uop.fuOpType >> LSUOpType.AMOFuOpWidth
120
121  // assign default value to output signals
122  io.in.ready          := false.B
123
124  io.dcache.req.valid  := false.B
125  io.dcache.req.bits   := DontCare
126
127  io.dtlb.req.valid    := false.B
128  io.dtlb.req.bits     := DontCare
129  io.dtlb.req_kill     := false.B
130  io.dtlb.resp.ready   := true.B
131
132  io.flush_sbuffer.valid := false.B
133
134  when (state === s_invalid) {
135    when (io.in.fire) {
136      uop := io.in.bits.uop
137      rs1 := io.in.bits.src_rs1
138      state := s_tlb_and_flush_sbuffer_req
139      have_sent_first_tlb_req := false.B
140    }
141  }
142
143  when (io.in.fire) {
144    val pdest = io.in.bits.uop.pdest
145    when (staUopIdx === 0.U) {
146      pdest1Valid := true.B
147      pdest1 := pdest
148    }.elsewhen (staUopIdx === 2.U) {
149      pdest2Valid := true.B
150      pdest2 := pdest
151    }.otherwise {
152      assert(false.B, "unrecognized sta uopIdx")
153    }
154  }
155
156  stds.zipWithIndex.foreach { case (data, i) =>
157    val sels = io.storeDataIn.zip(stdUopIdxs).map { case (in, uopIdx) =>
158      val sel = in.fire && uopIdx === i.U
159      when (sel) { data := in.bits.data }
160      sel
161    }
162    OneHot.checkOneHot(sels)
163  }
164  stdCnt := stdCnt + PopCount(io.storeDataIn.map(_.fire))
165
166  val StdCntNCAS = 1 // LR/SC and AMO need only 1 src besides rs1
167  val StdCntCASWD = 2 // AMOCAS.W/D needs 2 src regs (rs2 and rd) besides rs1
168  val StdCntCASQ = 4 // AMOCAS.Q needs 4 src regs (rs2, rs2+1, rd, rd+1) besides rs1
169  when (!data_valid) {
170    data_valid := state =/= s_invalid && (
171      LSUOpType.isAMOCASQ(uop.fuOpType) && stdCnt === StdCntCASQ.U ||
172      LSUOpType.isAMOCASWD(uop.fuOpType) && stdCnt === StdCntCASWD.U ||
173      !isAMOCAS && stdCnt === StdCntNCAS.U
174    )
175  }
176  assert(stdCnt <= stds.length.U, "unexpected std")
177  assert(!(Cat(io.storeDataIn.map(_.fire)).orR && data_valid), "atomic unit re-receive data")
178
179  // atomic trigger
180  val csrCtrl = io.csrCtrl
181  val tdata = Reg(Vec(TriggerNum, new MatchTriggerIO))
182  val tEnableVec = RegInit(VecInit(Seq.fill(TriggerNum)(false.B)))
183  tEnableVec := csrCtrl.mem_trigger.tEnableVec
184  when (csrCtrl.mem_trigger.tUpdate.valid) {
185    tdata(csrCtrl.mem_trigger.tUpdate.bits.addr) := csrCtrl.mem_trigger.tUpdate.bits.tdata
186  }
187
188  val debugMode = csrCtrl.mem_trigger.debugMode
189  val triggerCanRaiseBpExp = csrCtrl.mem_trigger.triggerCanRaiseBpExp
190  val backendTriggerTimingVec = VecInit(tdata.map(_.timing))
191  val backendTriggerChainVec = VecInit(tdata.map(_.chain))
192  val backendTriggerHitVec = WireInit(VecInit(Seq.fill(TriggerNum)(false.B)))
193  val backendTriggerCanFireVec = RegInit(VecInit(Seq.fill(TriggerNum)(false.B)))
194
195  assert(state === s_invalid ||
196    uop.fuOpType(1,0) === "b10".U ||
197    uop.fuOpType(1,0) === "b11".U ||
198    LSUOpType.isAMOCASQ(uop.fuOpType),
199    "Only word or doubleword or quadword is supported"
200  )
201
202  // store trigger
203  val store_hit = Wire(Vec(TriggerNum, Bool()))
204  for (j <- 0 until TriggerNum) {
205    store_hit(j) := !tdata(j).select && !debugMode && isNotLr && TriggerCmp(
206      vaddr,
207      tdata(j).tdata2,
208      tdata(j).matchType,
209      tEnableVec(j) && tdata(j).store
210    )
211  }
212  // load trigger
213  val load_hit = Wire(Vec(TriggerNum, Bool()))
214  for (j <- 0 until TriggerNum) {
215    load_hit(j) := !tdata(j).select && !debugMode && isNotSc && TriggerCmp(
216      vaddr,
217      tdata(j).tdata2,
218      tdata(j).matchType,
219      tEnableVec(j) && tdata(j).load
220    )
221  }
222  backendTriggerHitVec := store_hit.zip(load_hit).map { case (sh, lh) => sh || lh }
223  // triggerCanFireVec will update at T+1
224  TriggerCheckCanFire(TriggerNum, backendTriggerCanFireVec, backendTriggerHitVec,
225    backendTriggerTimingVec, backendTriggerChainVec)
226
227  val actionVec = VecInit(tdata.map(_.action))
228  val triggerAction = Wire(TriggerAction())
229  TriggerUtil.triggerActionGen(triggerAction, backendTriggerCanFireVec, actionVec, triggerCanRaiseBpExp)
230  val triggerDebugMode = TriggerAction.isDmode(triggerAction)
231  val triggerBreakpoint = TriggerAction.isExp(triggerAction)
232
233  // tlb translation, manipulating signals && deal with exception
234  // at the same time, flush sbuffer
235  when (state === s_tlb_and_flush_sbuffer_req) {
236    // do not accept tlb resp in the first cycle
237    // this limition is for hw prefetcher
238    // when !have_sent_first_tlb_req, tlb resp may come from hw prefetch
239    have_sent_first_tlb_req := true.B
240
241    when (io.dtlb.resp.fire && have_sent_first_tlb_req) {
242      paddr   := io.dtlb.resp.bits.paddr(0)
243      gpaddr  := io.dtlb.resp.bits.gpaddr(0)
244      vaddr   := io.dtlb.resp.bits.fullva
245      isForVSnonLeafPTE := io.dtlb.resp.bits.isForVSnonLeafPTE
246      // exception handling
247      val addrAligned = LookupTree(uop.fuOpType(1,0), List(
248        "b10".U -> (vaddr(1,0) === 0.U), // W
249        "b11".U -> (vaddr(2,0) === 0.U), // D
250        "b00".U -> (vaddr(3,0) === 0.U)  // Q
251      ))
252      exceptionVec(loadAddrMisaligned)  := !addrAligned && isLr
253      exceptionVec(storeAddrMisaligned) := !addrAligned && !isLr
254      exceptionVec(storePageFault)      := io.dtlb.resp.bits.excp(0).pf.st
255      exceptionVec(loadPageFault)       := io.dtlb.resp.bits.excp(0).pf.ld
256      exceptionVec(storeAccessFault)    := io.dtlb.resp.bits.excp(0).af.st
257      exceptionVec(loadAccessFault)     := io.dtlb.resp.bits.excp(0).af.ld
258      exceptionVec(storeGuestPageFault) := io.dtlb.resp.bits.excp(0).gpf.st
259      exceptionVec(loadGuestPageFault)  := io.dtlb.resp.bits.excp(0).gpf.ld
260
261      exceptionVec(breakPoint) := triggerBreakpoint
262      trigger                  := triggerAction
263
264      when (!io.dtlb.resp.bits.miss) {
265        io.out.bits.uop.debugInfo.tlbRespTime := GTimer()
266        when (!addrAligned || triggerDebugMode || triggerBreakpoint) {
267          // NOTE: when addrAligned or trigger fire, do not need to wait tlb actually
268          // check for miss aligned exceptions, tlb exception are checked next cycle for timing
269          // if there are exceptions, no need to execute it
270          state := s_finish
271          out_valid := true.B
272          atom_override_xtval := true.B
273        }.otherwise {
274          state := s_pm
275        }
276      }
277    }
278  }
279
280  val pbmtReg = RegEnable(io.dtlb.resp.bits.pbmt(0), io.dtlb.resp.fire && !io.dtlb.resp.bits.miss)
281  when (state === s_pm) {
282    val pmp = WireInit(io.pmpResp)
283    is_mmio := Pbmt.isIO(pbmtReg) || (Pbmt.isPMA(pbmtReg) && pmp.mmio)
284
285    // NOTE: only handle load/store exception here, if other exception happens, don't send here
286    val exception_va = exceptionVec(storePageFault) || exceptionVec(loadPageFault) ||
287      exceptionVec(storeGuestPageFault) || exceptionVec(loadGuestPageFault) ||
288      exceptionVec(storeAccessFault) || exceptionVec(loadAccessFault)
289    val exception_pa_mmio_nc = pmp.mmio || Pbmt.isIO(pbmtReg) || Pbmt.isNC(pbmtReg)
290    val exception_pa = pmp.st || pmp.ld || exception_pa_mmio_nc
291    when (exception_va || exception_pa) {
292      state := s_finish
293      out_valid := true.B
294      atom_override_xtval := true.B
295    }.otherwise {
296      // if sbuffer has been flushed, go to query dcache, otherwise wait for sbuffer.
297      state := Mux(sbuffer_empty, s_cache_req, s_wait_flush_sbuffer_resp);
298    }
299    // update storeAccessFault bit
300    exceptionVec(loadAccessFault) := exceptionVec(loadAccessFault) ||
301      (pmp.ld || exception_pa_mmio_nc) && isLr
302    exceptionVec(storeAccessFault) := exceptionVec(storeAccessFault) || pmp.st ||
303      (pmp.ld || exception_pa_mmio_nc) && !isLr
304  }
305
306  when (state === s_wait_flush_sbuffer_resp) {
307    when (sbuffer_empty) {
308      state := s_cache_req
309    }
310  }
311
312  def genWdataAMO(data: UInt, sizeEncode: UInt): UInt = {
313    LookupTree(sizeEncode(1, 0), List(
314      "b10".U -> Fill(4, data(31, 0)),
315      "b11".U -> Fill(2, data(63, 0)),
316      "b00".U -> data(127, 0)
317    ))
318  }
319
320  def genWmaskAMO(addr: UInt, sizeEncode: UInt): UInt = {
321    /**
322      * `MainPipeReq` uses `word_idx` to recognize which 64-bits data bank to operate on. Double-word atomics are
323      * always 8B aligned and quad-word atomics are always 16B aligned except for misaligned exception, therefore
324      * `word_idx` is enough and there is no need to shift according address. Only word atomics needs LSBs of the
325      * address to shift mask inside a 64-bits aligned range.
326      */
327    LookupTree(sizeEncode(1, 0), List(
328      "b10".U -> (0xf.U << addr(2,0)), // W
329      "b11".U -> 0xff.U, // D
330      "b00".U -> 0xffff.U // Q
331    ))
332  }
333
334  when (state === s_cache_req) {
335    when (io.dcache.req.fire) {
336      state := s_cache_resp
337    }
338  }
339
340  val dcache_resp_data  = Reg(UInt())
341  val dcache_resp_id    = Reg(UInt())
342  val dcache_resp_error = Reg(Bool())
343
344  when (state === s_cache_resp) {
345    // when not miss
346    // everything is OK, simply send response back to sbuffer
347    // when miss and not replay
348    // wait for missQueue to handling miss and replaying our request
349    // when miss and replay
350    // req missed and fail to enter missQueue, manually replay it later
351    // TODO: add assertions:
352    // 1. add a replay delay counter?
353    // 2. when req gets into MissQueue, it should not miss any more
354    when (io.dcache.resp.fire) {
355      when (io.dcache.resp.bits.miss) {
356        when (io.dcache.resp.bits.replay) {
357          state := s_cache_req
358        }
359      }.otherwise {
360        dcache_resp_data := io.dcache.resp.bits.data
361        dcache_resp_id := io.dcache.resp.bits.id
362        dcache_resp_error := io.dcache.resp.bits.error
363        state := s_cache_resp_latch
364      }
365    }
366  }
367
368  when (state === s_cache_resp_latch) {
369    success := dcache_resp_id
370    val rdataSel = Mux(
371      paddr(2, 0) === 0.U,
372      dcache_resp_data,
373      dcache_resp_data >> 32
374    )
375    assert(paddr(2, 0) === "b000".U || paddr(2, 0) === "b100".U)
376
377    resp_data_wire := Mux(
378      isSc,
379      dcache_resp_data,
380      LookupTree(uop.fuOpType(1,0), List(
381        "b10".U -> SignExt(rdataSel(31, 0), QuadWordBits), // W
382        "b11".U -> SignExt(rdataSel(63, 0), QuadWordBits), // D
383        "b00".U -> rdataSel // Q
384      ))
385    )
386
387    when (dcache_resp_error && io.csrCtrl.cache_error_enable) {
388      exceptionVec(loadAccessFault)  := isLr
389      exceptionVec(storeAccessFault) := !isLr
390      assert(!exceptionVec(loadAccessFault))
391      assert(!exceptionVec(storeAccessFault))
392    }
393
394    resp_data := resp_data_wire
395    state := s_finish
396    out_valid := true.B
397  }
398
399  when (state === s_finish) {
400    when (io.out.fire) {
401      when (LSUOpType.isAMOCASQ(uop.fuOpType)) {
402        // enter `s_finish2` to write the 2nd uop back
403        state := s_finish2
404        out_valid := true.B
405      }.otherwise {
406        // otherwise the FSM ends here
407        resetFSM()
408      }
409    }
410  }
411
412  when (state === s_finish2) {
413    when (io.out.fire) {
414      resetFSM()
415    }
416  }
417
418  when (io.redirect.valid) {
419    atom_override_xtval := false.B
420  }
421
422  def resetFSM(): Unit = {
423    state := s_invalid
424    out_valid := false.B
425    data_valid := false.B
426    stdCnt := 0.U
427    pdest1Valid := false.B
428    pdest2Valid := false.B
429  }
430
431  /**
432    * IO assignment
433    */
434  io.exceptionInfo.valid := atom_override_xtval
435  io.exceptionInfo.bits.vaddr := vaddr
436  io.exceptionInfo.bits.gpaddr := gpaddr
437  io.exceptionInfo.bits.isForVSnonLeafPTE := isForVSnonLeafPTE
438
439  // Send TLB feedback to store issue queue
440  // we send feedback right after we receives request
441  // also, we always treat amo as tlb hit
442  // since we will continue polling tlb all by ourself
443  io.feedbackSlow.valid       := GatedValidRegNext(GatedValidRegNext(io.in.valid))
444  io.feedbackSlow.bits.hit    := true.B
445  io.feedbackSlow.bits.robIdx  := RegEnable(io.in.bits.uop.robIdx, io.in.valid)
446  io.feedbackSlow.bits.sqIdx   := RegEnable(io.in.bits.uop.sqIdx, io.in.valid)
447  io.feedbackSlow.bits.lqIdx   := RegEnable(io.in.bits.uop.lqIdx, io.in.valid)
448  io.feedbackSlow.bits.flushState := DontCare
449  io.feedbackSlow.bits.sourceType := DontCare
450  io.feedbackSlow.bits.dataInvalidSqIdx := DontCare
451
452  // send req to dtlb
453  // keep firing until tlb hit
454  io.dtlb.req.valid       := state === s_tlb_and_flush_sbuffer_req
455  io.dtlb.req.bits.vaddr  := vaddr
456  io.dtlb.req.bits.fullva := vaddr
457  io.dtlb.req.bits.checkfullva := true.B
458  io.dtlb.resp.ready      := true.B
459  io.dtlb.req.bits.cmd    := Mux(isLr, TlbCmd.atom_read, TlbCmd.atom_write)
460  io.dtlb.req.bits.debug.pc := uop.pc
461  io.dtlb.req.bits.debug.robIdx := uop.robIdx
462  io.dtlb.req.bits.debug.isFirstIssue := false.B
463  io.out.bits.uop.debugInfo.tlbFirstReqTime := GTimer() // FIXME lyq: it will be always assigned
464
465  // send req to sbuffer to flush it if it is not empty
466  io.flush_sbuffer.valid := !sbuffer_empty && state === s_tlb_and_flush_sbuffer_req
467
468  // When is sta issue port ready:
469  // (1) AtomicsUnit is idle, or
470  // (2) For AMOCAS.Q, the second uop with the pdest of the higher bits of rd is not received yet
471  io.in.ready := state === s_invalid || LSUOpType.isAMOCASQ(uop.fuOpType) && (!pdest2Valid || !pdest1Valid)
472
473  io.out.valid := out_valid && Mux(state === s_finish2, pdest2Valid, pdest1Valid)
474  XSError((state === s_finish || state === s_finish2) =/= out_valid, "out_valid reg error\n")
475  io.out.bits := DontCare
476  io.out.bits.uop := uop
477  io.out.bits.uop.fuType := FuType.mou.U
478  io.out.bits.uop.pdest := Mux(state === s_finish2, pdest2, pdest1)
479  io.out.bits.uop.exceptionVec := exceptionVec
480  io.out.bits.uop.trigger := trigger
481  io.out.bits.data := Mux(state === s_finish2, resp_data >> XLEN, resp_data)
482  io.out.bits.debug.isMMIO := is_mmio
483  io.out.bits.debug.paddr := paddr
484
485  io.dcache.req.valid := Mux(
486    io.dcache.req.bits.cmd === M_XLR,
487    !io.dcache.block_lr, // block lr to survive in lr storm
488    data_valid // wait until src(1) is ready
489  ) && state === s_cache_req
490  val pipe_req = io.dcache.req.bits
491  pipe_req := DontCare
492  pipe_req.cmd := LookupTree(uop.fuOpType, List(
493    // TODO: optimize this
494    LSUOpType.lr_w      -> M_XLR,
495    LSUOpType.sc_w      -> M_XSC,
496    LSUOpType.amoswap_w -> M_XA_SWAP,
497    LSUOpType.amoadd_w  -> M_XA_ADD,
498    LSUOpType.amoxor_w  -> M_XA_XOR,
499    LSUOpType.amoand_w  -> M_XA_AND,
500    LSUOpType.amoor_w   -> M_XA_OR,
501    LSUOpType.amomin_w  -> M_XA_MIN,
502    LSUOpType.amomax_w  -> M_XA_MAX,
503    LSUOpType.amominu_w -> M_XA_MINU,
504    LSUOpType.amomaxu_w -> M_XA_MAXU,
505    LSUOpType.amocas_w  -> M_XA_CASW,
506
507    LSUOpType.lr_d      -> M_XLR,
508    LSUOpType.sc_d      -> M_XSC,
509    LSUOpType.amoswap_d -> M_XA_SWAP,
510    LSUOpType.amoadd_d  -> M_XA_ADD,
511    LSUOpType.amoxor_d  -> M_XA_XOR,
512    LSUOpType.amoand_d  -> M_XA_AND,
513    LSUOpType.amoor_d   -> M_XA_OR,
514    LSUOpType.amomin_d  -> M_XA_MIN,
515    LSUOpType.amomax_d  -> M_XA_MAX,
516    LSUOpType.amominu_d -> M_XA_MINU,
517    LSUOpType.amomaxu_d -> M_XA_MAXU,
518    LSUOpType.amocas_d  -> M_XA_CASD,
519
520    LSUOpType.amocas_q  -> M_XA_CASQ
521  ))
522  pipe_req.miss := false.B
523  pipe_req.probe := false.B
524  pipe_req.probe_need_data := false.B
525  pipe_req.source := AMO_SOURCE.U
526  pipe_req.addr   := get_block_addr(paddr)
527  pipe_req.vaddr  := get_block_addr(vaddr)
528  pipe_req.word_idx  := get_word(paddr)
529  pipe_req.amo_data := genWdataAMO(rs2, uop.fuOpType)
530  pipe_req.amo_mask := genWmaskAMO(paddr, uop.fuOpType)
531  pipe_req.amo_cmp  := genWdataAMO(rd, uop.fuOpType)
532
533  if (env.EnableDifftest) {
534    val difftest = DifftestModule(new DiffAtomicEvent)
535    val en = io.dcache.req.fire
536    difftest.coreid := io.hartId
537    difftest.valid  := state === s_cache_resp_latch
538    difftest.addr   := RegEnable(paddr, en)
539    difftest.data   := RegEnable(io.dcache.req.bits.amo_data.asTypeOf(difftest.data), en)
540    difftest.mask   := RegEnable(io.dcache.req.bits.amo_mask, en)
541    difftest.cmp    := RegEnable(io.dcache.req.bits.amo_cmp.asTypeOf(difftest.cmp), en)
542    difftest.fuop   := RegEnable(uop.fuOpType, en)
543    difftest.out    := resp_data_wire.asTypeOf(difftest.out)
544  }
545
546  if (env.EnableDifftest || env.AlwaysBasicDiff) {
547    val uop = io.out.bits.uop
548    val difftest = DifftestModule(new DiffLrScEvent)
549    difftest.coreid := io.hartId
550    difftest.valid := io.out.fire && state === s_finish && isSc
551    difftest.success := success
552  }
553}
554