xref: /XiangShan/src/main/scala/xiangshan/mem/pipeline/AtomicsUnit.scala (revision 1592abd11eecf7bec0f1453ffe4a7617167f8ba9)
1/***************************************************************************************
2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3* Copyright (c) 2020-2021 Peng Cheng Laboratory
4*
5* XiangShan is licensed under Mulan PSL v2.
6* You can use this software according to the terms and conditions of the Mulan PSL v2.
7* You may obtain a copy of Mulan PSL v2 at:
8*          http://license.coscl.org.cn/MulanPSL2
9*
10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13*
14* See the Mulan PSL v2 for more details.
15***************************************************************************************/
16
17package xiangshan.mem
18
19import org.chipsalliance.cde.config.Parameters
20import chisel3._
21import chisel3.util._
22import utils._
23import utility._
24import xiangshan._
25import xiangshan.ExceptionNO._
26import xiangshan.backend.fu.PMPRespBundle
27import xiangshan.backend.fu.FuType
28import xiangshan.backend.Bundles.{MemExuInput, MemExuOutput}
29import xiangshan.backend.fu.NewCSR.TriggerUtil
30import xiangshan.backend.fu.util.SdtrigExt
31import xiangshan.mem.Bundles._
32import xiangshan.cache.mmu.Pbmt
33import xiangshan.cache.{AtomicWordIO, HasDCacheParameters, MemoryOpConstants}
34import xiangshan.cache.mmu.{TlbCmd, TlbRequestIO}
35import difftest._
36
37class AtomicsUnit(implicit p: Parameters) extends XSModule
38  with MemoryOpConstants
39  with HasDCacheParameters
40  with SdtrigExt{
41
42  val StdCnt  = backendParams.StdCnt
43
44  val io = IO(new Bundle() {
45    val hartId        = Input(UInt(hartIdLen.W))
46    val in            = Flipped(Decoupled(new MemExuInput))
47    val storeDataIn   = Flipped(Vec(StdCnt, Valid(new MemExuOutput)))
48    val out           = Decoupled(new MemExuOutput)
49    val dcache        = new AtomicWordIO
50    val dtlb          = new TlbRequestIO(2)
51    val pmpResp       = Flipped(new PMPRespBundle())
52    val flush_sbuffer = new SbufferFlushBundle
53    val feedbackSlow  = ValidIO(new RSFeedback)
54    val redirect      = Flipped(ValidIO(new Redirect))
55    val exceptionInfo = ValidIO(new Bundle {
56      val vaddr = UInt(XLEN.W)
57      val gpaddr = UInt(XLEN.W)
58      val isForVSnonLeafPTE = Bool()
59    })
60    val csrCtrl       = Flipped(new CustomCSRCtrlIO)
61  })
62
63  PerfCCT.updateInstPos(io.in.bits.uop.debug_seqNum, PerfCCT.InstPos.AtFU.id.U, io.in.valid, clock, reset)
64
65  //-------------------------------------------------------
66  // Atomics Memory Accsess FSM
67  //-------------------------------------------------------
68  val s_invalid :: s_tlb_and_flush_sbuffer_req :: s_pm :: s_wait_flush_sbuffer_resp :: s_cache_req :: s_cache_resp :: s_cache_resp_latch :: s_finish :: s_finish2 :: Nil = Enum(9)
69  val state = RegInit(s_invalid)
70  val out_valid = RegInit(false.B)
71  val data_valid = RegInit(false.B)
72
73  val uop = Reg(io.in.bits.uop.cloneType)
74  val isLr = LSUOpType.isLr(uop.fuOpType)
75  val isSc = LSUOpType.isSc(uop.fuOpType)
76  val isAMOCAS = LSUOpType.isAMOCAS(uop.fuOpType)
77  val isNotLr = !isLr
78  val isNotSc = !isSc
79  // AMOCAS.Q needs to write two int registers, therefore backend issues two sta uops for AMOCAS.Q.
80  // `pdest2` is used to record the pdest of the second uop
81  val pdest1, pdest2 = Reg(UInt(PhyRegIdxWidth.W))
82  val pdest1Valid, pdest2Valid = RegInit(false.B)
83  /**
84    * The # of std uops that an atomic instruction require:
85    * (1) For AMOs (except AMOCAS) and LR/SC, 1 std uop is wanted: X(rs2) with uopIdx = 0
86    * (2) For AMOCAS.W/D, 2 std uops are wanted: X(rd), X(rs2) with uopIdx = 0, 1
87    * (3) For AMOCAS.Q, 4 std uops are wanted: X(rd), X(rs2), X(rd+1), X(rs2+1) with uopIdx = 0, 1, 2, 3
88    * stds are not needed for write-back.
89    *
90    * The # of sta uops that an atomic instruction require, also the # of write-back:
91    * (1) For AMOs(except AMOCAS.Q) and LR/SC, 1 sta uop is wanted: X(rs1) with uopIdx = 0
92    * (2) For AMOCAS.Q, 2 sta uop is wanted: X(rs1)*2 with uopIdx = 0, 2
93    */
94  val rs1, rs2_l, rs2_h, rd_l, rd_h = Reg(UInt(XLEN.W))
95  val stds = Seq(rd_l, rs2_l, rd_h, rs2_h)
96  val rs2 = Cat(rs2_h, Mux(isAMOCAS, rs2_l, stds.head))
97  val rd = Cat(rd_h, rd_l)
98  val stdCnt = RegInit(0.U(log2Ceil(stds.length + 1).W))
99
100  val exceptionVec = RegInit(0.U.asTypeOf(ExceptionVec()))
101  val trigger = RegInit(TriggerAction.None)
102  val atom_override_xtval = RegInit(false.B)
103  val have_sent_first_tlb_req = RegInit(false.B)
104  // paddr after translation
105  val paddr = Reg(UInt())
106  val gpaddr = Reg(UInt())
107  val vaddr = rs1
108
109  val is_mmio = Reg(Bool())
110  val isForVSnonLeafPTE = Reg(Bool())
111
112  // dcache response data
113  val resp_data = Reg(UInt())
114  val resp_data_wire = WireInit(0.U)
115  val success = Reg(Bool())
116  // sbuffer is empty or not
117  val sbuffer_empty = io.flush_sbuffer.empty
118
119  // Only the least significant AMOFuOpWidth = 6 bits of fuOpType are used,
120  // therefore the MSBs are reused to identify uopIdx
121  val stdUopIdxs = io.storeDataIn.map(_.bits.uop.fuOpType >> LSUOpType.AMOFuOpWidth)
122  val staUopIdx = io.in.bits.uop.fuOpType >> LSUOpType.AMOFuOpWidth
123
124  // assign default value to output signals
125  io.in.ready          := false.B
126
127  io.dcache.req.valid  := false.B
128  io.dcache.req.bits   := DontCare
129
130  io.dtlb.req.valid    := false.B
131  io.dtlb.req.bits     := DontCare
132  io.dtlb.req_kill     := false.B
133  io.dtlb.resp.ready   := true.B
134
135  io.flush_sbuffer.valid := false.B
136
137  when (state === s_invalid) {
138    when (io.in.fire) {
139      uop := io.in.bits.uop
140      rs1 := io.in.bits.src_rs1
141      state := s_tlb_and_flush_sbuffer_req
142      have_sent_first_tlb_req := false.B
143    }
144  }
145
146  when (io.in.fire) {
147    val pdest = io.in.bits.uop.pdest
148    when (staUopIdx === 0.U) {
149      pdest1Valid := true.B
150      pdest1 := pdest
151    }.elsewhen (staUopIdx === 2.U) {
152      pdest2Valid := true.B
153      pdest2 := pdest
154    }.otherwise {
155      assert(false.B, "unrecognized sta uopIdx")
156    }
157  }
158
159  stds.zipWithIndex.foreach { case (data, i) =>
160    val sels = io.storeDataIn.zip(stdUopIdxs).map { case (in, uopIdx) =>
161      val sel = in.fire && uopIdx === i.U
162      when (sel) { data := in.bits.data }
163      sel
164    }
165    OneHot.checkOneHot(sels)
166  }
167  stdCnt := stdCnt + PopCount(io.storeDataIn.map(_.fire))
168
169  val StdCntNCAS = 1 // LR/SC and AMO need only 1 src besides rs1
170  val StdCntCASWD = 2 // AMOCAS.W/D needs 2 src regs (rs2 and rd) besides rs1
171  val StdCntCASQ = 4 // AMOCAS.Q needs 4 src regs (rs2, rs2+1, rd, rd+1) besides rs1
172  when (!data_valid) {
173    data_valid := state =/= s_invalid && (
174      LSUOpType.isAMOCASQ(uop.fuOpType) && stdCnt === StdCntCASQ.U ||
175      LSUOpType.isAMOCASWD(uop.fuOpType) && stdCnt === StdCntCASWD.U ||
176      !isAMOCAS && stdCnt === StdCntNCAS.U
177    )
178  }
179  assert(stdCnt <= stds.length.U, "unexpected std")
180  assert(!(Cat(io.storeDataIn.map(_.fire)).orR && data_valid), "atomic unit re-receive data")
181
182  // atomic trigger
183  val csrCtrl = io.csrCtrl
184  val tdata = Reg(Vec(TriggerNum, new MatchTriggerIO))
185  val tEnableVec = RegInit(VecInit(Seq.fill(TriggerNum)(false.B)))
186  tEnableVec := csrCtrl.mem_trigger.tEnableVec
187  when (csrCtrl.mem_trigger.tUpdate.valid) {
188    tdata(csrCtrl.mem_trigger.tUpdate.bits.addr) := csrCtrl.mem_trigger.tUpdate.bits.tdata
189  }
190
191  val debugMode = csrCtrl.mem_trigger.debugMode
192  val triggerCanRaiseBpExp = csrCtrl.mem_trigger.triggerCanRaiseBpExp
193  val backendTriggerTimingVec = VecInit(tdata.map(_.timing))
194  val backendTriggerChainVec = VecInit(tdata.map(_.chain))
195  val backendTriggerHitVec = WireInit(VecInit(Seq.fill(TriggerNum)(false.B)))
196  val backendTriggerCanFireVec = RegInit(VecInit(Seq.fill(TriggerNum)(false.B)))
197
198  assert(state === s_invalid ||
199    uop.fuOpType(1,0) === "b10".U ||
200    uop.fuOpType(1,0) === "b11".U ||
201    LSUOpType.isAMOCASQ(uop.fuOpType),
202    "Only word or doubleword or quadword is supported"
203  )
204
205  // store trigger
206  val store_hit = Wire(Vec(TriggerNum, Bool()))
207  for (j <- 0 until TriggerNum) {
208    store_hit(j) := !tdata(j).select && !debugMode && isNotLr && TriggerCmp(
209      vaddr,
210      tdata(j).tdata2,
211      tdata(j).matchType,
212      tEnableVec(j) && tdata(j).store
213    )
214  }
215  // load trigger
216  val load_hit = Wire(Vec(TriggerNum, Bool()))
217  for (j <- 0 until TriggerNum) {
218    load_hit(j) := !tdata(j).select && !debugMode && isNotSc && TriggerCmp(
219      vaddr,
220      tdata(j).tdata2,
221      tdata(j).matchType,
222      tEnableVec(j) && tdata(j).load
223    )
224  }
225  backendTriggerHitVec := store_hit.zip(load_hit).map { case (sh, lh) => sh || lh }
226  // triggerCanFireVec will update at T+1
227  TriggerCheckCanFire(TriggerNum, backendTriggerCanFireVec, backendTriggerHitVec,
228    backendTriggerTimingVec, backendTriggerChainVec)
229
230  val actionVec = VecInit(tdata.map(_.action))
231  val triggerAction = Wire(TriggerAction())
232  TriggerUtil.triggerActionGen(triggerAction, backendTriggerCanFireVec, actionVec, triggerCanRaiseBpExp)
233  val triggerDebugMode = TriggerAction.isDmode(triggerAction)
234  val triggerBreakpoint = TriggerAction.isExp(triggerAction)
235
236  // tlb translation, manipulating signals && deal with exception
237  // at the same time, flush sbuffer
238  when (state === s_tlb_and_flush_sbuffer_req) {
239    // do not accept tlb resp in the first cycle
240    // this limition is for hw prefetcher
241    // when !have_sent_first_tlb_req, tlb resp may come from hw prefetch
242    have_sent_first_tlb_req := true.B
243
244    when (io.dtlb.resp.fire && have_sent_first_tlb_req) {
245      paddr   := io.dtlb.resp.bits.paddr(0)
246      gpaddr  := io.dtlb.resp.bits.gpaddr(0)
247      vaddr   := io.dtlb.resp.bits.fullva
248      isForVSnonLeafPTE := io.dtlb.resp.bits.isForVSnonLeafPTE
249      // exception handling
250      val addrAligned = LookupTree(uop.fuOpType(1,0), List(
251        "b10".U -> (vaddr(1,0) === 0.U), // W
252        "b11".U -> (vaddr(2,0) === 0.U), // D
253        "b00".U -> (vaddr(3,0) === 0.U)  // Q
254      ))
255      exceptionVec(loadAddrMisaligned)  := !addrAligned && isLr
256      exceptionVec(storeAddrMisaligned) := !addrAligned && !isLr
257      exceptionVec(storePageFault)      := io.dtlb.resp.bits.excp(0).pf.st
258      exceptionVec(loadPageFault)       := io.dtlb.resp.bits.excp(0).pf.ld
259      exceptionVec(storeAccessFault)    := io.dtlb.resp.bits.excp(0).af.st
260      exceptionVec(loadAccessFault)     := io.dtlb.resp.bits.excp(0).af.ld
261      exceptionVec(storeGuestPageFault) := io.dtlb.resp.bits.excp(0).gpf.st
262      exceptionVec(loadGuestPageFault)  := io.dtlb.resp.bits.excp(0).gpf.ld
263
264      exceptionVec(breakPoint) := triggerBreakpoint
265      trigger                  := triggerAction
266
267      when (!io.dtlb.resp.bits.miss) {
268        io.out.bits.uop.debugInfo.tlbRespTime := GTimer()
269        when (!addrAligned || triggerDebugMode || triggerBreakpoint) {
270          // NOTE: when addrAligned or trigger fire, do not need to wait tlb actually
271          // check for miss aligned exceptions, tlb exception are checked next cycle for timing
272          // if there are exceptions, no need to execute it
273          state := s_finish
274          out_valid := true.B
275          atom_override_xtval := true.B
276        }.otherwise {
277          state := s_pm
278        }
279      }
280    }
281  }
282
283  val pbmtReg = RegEnable(io.dtlb.resp.bits.pbmt(0), io.dtlb.resp.fire && !io.dtlb.resp.bits.miss)
284  when (state === s_pm) {
285    val pmp = WireInit(io.pmpResp)
286    is_mmio := Pbmt.isIO(pbmtReg) || (Pbmt.isPMA(pbmtReg) && pmp.mmio)
287
288    // NOTE: only handle load/store exception here, if other exception happens, don't send here
289    val exception_va = exceptionVec(storePageFault) || exceptionVec(loadPageFault) ||
290      exceptionVec(storeGuestPageFault) || exceptionVec(loadGuestPageFault) ||
291      exceptionVec(storeAccessFault) || exceptionVec(loadAccessFault)
292    val exception_pa_mmio_nc = pmp.mmio || Pbmt.isIO(pbmtReg) || Pbmt.isNC(pbmtReg)
293    val exception_pa = pmp.st || pmp.ld || exception_pa_mmio_nc
294    when (exception_va || exception_pa) {
295      state := s_finish
296      out_valid := true.B
297      atom_override_xtval := true.B
298    }.otherwise {
299      // if sbuffer has been flushed, go to query dcache, otherwise wait for sbuffer.
300      state := Mux(sbuffer_empty, s_cache_req, s_wait_flush_sbuffer_resp);
301    }
302    // update storeAccessFault bit
303    exceptionVec(loadAccessFault) := exceptionVec(loadAccessFault) ||
304      (pmp.ld || exception_pa_mmio_nc) && isLr
305    exceptionVec(storeAccessFault) := exceptionVec(storeAccessFault) || pmp.st ||
306      (pmp.ld || exception_pa_mmio_nc) && !isLr
307  }
308
309  when (state === s_wait_flush_sbuffer_resp) {
310    when (sbuffer_empty) {
311      state := s_cache_req
312    }
313  }
314
315  def genWdataAMO(data: UInt, sizeEncode: UInt): UInt = {
316    LookupTree(sizeEncode(1, 0), List(
317      "b10".U -> Fill(4, data(31, 0)),
318      "b11".U -> Fill(2, data(63, 0)),
319      "b00".U -> data(127, 0)
320    ))
321  }
322
323  def genWmaskAMO(addr: UInt, sizeEncode: UInt): UInt = {
324    /**
325      * `MainPipeReq` uses `word_idx` to recognize which 64-bits data bank to operate on. Double-word atomics are
326      * always 8B aligned and quad-word atomics are always 16B aligned except for misaligned exception, therefore
327      * `word_idx` is enough and there is no need to shift according address. Only word atomics needs LSBs of the
328      * address to shift mask inside a 64-bits aligned range.
329      */
330    LookupTree(sizeEncode(1, 0), List(
331      "b10".U -> (0xf.U << addr(2,0)), // W
332      "b11".U -> 0xff.U, // D
333      "b00".U -> 0xffff.U // Q
334    ))
335  }
336
337  when (state === s_cache_req) {
338    when (io.dcache.req.fire) {
339      state := s_cache_resp
340    }
341  }
342
343  val dcache_resp_data  = Reg(UInt())
344  val dcache_resp_id    = Reg(UInt())
345  val dcache_resp_error = Reg(Bool())
346
347  when (state === s_cache_resp) {
348    // when not miss
349    // everything is OK, simply send response back to sbuffer
350    // when miss and not replay
351    // wait for missQueue to handling miss and replaying our request
352    // when miss and replay
353    // req missed and fail to enter missQueue, manually replay it later
354    // TODO: add assertions:
355    // 1. add a replay delay counter?
356    // 2. when req gets into MissQueue, it should not miss any more
357    when (io.dcache.resp.fire) {
358      when (io.dcache.resp.bits.miss) {
359        when (io.dcache.resp.bits.replay) {
360          state := s_cache_req
361        }
362      }.otherwise {
363        dcache_resp_data := io.dcache.resp.bits.data
364        dcache_resp_id := io.dcache.resp.bits.id
365        dcache_resp_error := io.dcache.resp.bits.error
366        state := s_cache_resp_latch
367      }
368    }
369  }
370
371  when (state === s_cache_resp_latch) {
372    success := dcache_resp_id
373    val rdataSel = Mux(
374      paddr(2, 0) === 0.U,
375      dcache_resp_data,
376      dcache_resp_data >> 32
377    )
378    assert(paddr(2, 0) === "b000".U || paddr(2, 0) === "b100".U)
379
380    resp_data_wire := Mux(
381      isSc,
382      dcache_resp_data,
383      LookupTree(uop.fuOpType(1,0), List(
384        "b10".U -> SignExt(rdataSel(31, 0), QuadWordBits), // W
385        "b11".U -> SignExt(rdataSel(63, 0), QuadWordBits), // D
386        "b00".U -> rdataSel // Q
387      ))
388    )
389
390    when (dcache_resp_error && io.csrCtrl.cache_error_enable) {
391      exceptionVec(loadAccessFault)  := isLr
392      exceptionVec(storeAccessFault) := !isLr
393      assert(!exceptionVec(loadAccessFault))
394      assert(!exceptionVec(storeAccessFault))
395    }
396
397    resp_data := resp_data_wire
398    state := s_finish
399    out_valid := true.B
400  }
401
402  when (state === s_finish) {
403    when (io.out.fire) {
404      when (LSUOpType.isAMOCASQ(uop.fuOpType)) {
405        // enter `s_finish2` to write the 2nd uop back
406        state := s_finish2
407        out_valid := true.B
408      }.otherwise {
409        // otherwise the FSM ends here
410        resetFSM()
411      }
412    }
413  }
414
415  when (state === s_finish2) {
416    when (io.out.fire) {
417      resetFSM()
418    }
419  }
420
421  when (io.redirect.valid) {
422    atom_override_xtval := false.B
423  }
424
425  def resetFSM(): Unit = {
426    state := s_invalid
427    out_valid := false.B
428    data_valid := false.B
429    stdCnt := 0.U
430    pdest1Valid := false.B
431    pdest2Valid := false.B
432  }
433
434  /**
435    * IO assignment
436    */
437  io.exceptionInfo.valid := atom_override_xtval
438  io.exceptionInfo.bits.vaddr := vaddr
439  io.exceptionInfo.bits.gpaddr := gpaddr
440  io.exceptionInfo.bits.isForVSnonLeafPTE := isForVSnonLeafPTE
441
442  // Send TLB feedback to store issue queue
443  // we send feedback right after we receives request
444  // also, we always treat amo as tlb hit
445  // since we will continue polling tlb all by ourself
446  io.feedbackSlow.valid       := GatedValidRegNext(GatedValidRegNext(io.in.valid))
447  io.feedbackSlow.bits.hit    := true.B
448  io.feedbackSlow.bits.robIdx  := RegEnable(io.in.bits.uop.robIdx, io.in.valid)
449  io.feedbackSlow.bits.sqIdx   := RegEnable(io.in.bits.uop.sqIdx, io.in.valid)
450  io.feedbackSlow.bits.lqIdx   := RegEnable(io.in.bits.uop.lqIdx, io.in.valid)
451  io.feedbackSlow.bits.flushState := DontCare
452  io.feedbackSlow.bits.sourceType := DontCare
453  io.feedbackSlow.bits.dataInvalidSqIdx := DontCare
454
455  // send req to dtlb
456  // keep firing until tlb hit
457  io.dtlb.req.valid       := state === s_tlb_and_flush_sbuffer_req
458  io.dtlb.req.bits.vaddr  := vaddr
459  io.dtlb.req.bits.fullva := vaddr
460  io.dtlb.req.bits.checkfullva := true.B
461  io.dtlb.resp.ready      := true.B
462  io.dtlb.req.bits.cmd    := Mux(isLr, TlbCmd.atom_read, TlbCmd.atom_write)
463  io.dtlb.req.bits.debug.pc := uop.pc
464  io.dtlb.req.bits.debug.robIdx := uop.robIdx
465  io.dtlb.req.bits.debug.isFirstIssue := false.B
466  io.out.bits.uop.debugInfo.tlbFirstReqTime := GTimer() // FIXME lyq: it will be always assigned
467
468  // send req to sbuffer to flush it if it is not empty
469  io.flush_sbuffer.valid := !sbuffer_empty && (
470    state === s_tlb_and_flush_sbuffer_req ||
471    state === s_pm ||
472    state === s_wait_flush_sbuffer_resp
473  )
474
475  // When is sta issue port ready:
476  // (1) AtomicsUnit is idle, or
477  // (2) For AMOCAS.Q, the second uop with the pdest of the higher bits of rd is not received yet
478  io.in.ready := state === s_invalid || LSUOpType.isAMOCASQ(uop.fuOpType) && (!pdest2Valid || !pdest1Valid)
479
480  io.out.valid := out_valid && Mux(state === s_finish2, pdest2Valid, pdest1Valid)
481  XSError((state === s_finish || state === s_finish2) =/= out_valid, "out_valid reg error\n")
482  io.out.bits := DontCare
483  io.out.bits.uop := uop
484  io.out.bits.uop.fuType := FuType.mou.U
485  io.out.bits.uop.pdest := Mux(state === s_finish2, pdest2, pdest1)
486  io.out.bits.uop.exceptionVec := exceptionVec
487  io.out.bits.uop.trigger := trigger
488  io.out.bits.data := Mux(state === s_finish2, resp_data >> XLEN, resp_data)
489  io.out.bits.debug.isMMIO := is_mmio
490  io.out.bits.debug.paddr := paddr
491
492  io.dcache.req.valid := Mux(
493    io.dcache.req.bits.cmd === M_XLR,
494    !io.dcache.block_lr, // block lr to survive in lr storm
495    data_valid // wait until src(1) is ready
496  ) && state === s_cache_req
497  val pipe_req = io.dcache.req.bits
498  pipe_req := DontCare
499  pipe_req.cmd := LookupTree(uop.fuOpType, List(
500    // TODO: optimize this
501    LSUOpType.lr_w      -> M_XLR,
502    LSUOpType.sc_w      -> M_XSC,
503    LSUOpType.amoswap_w -> M_XA_SWAP,
504    LSUOpType.amoadd_w  -> M_XA_ADD,
505    LSUOpType.amoxor_w  -> M_XA_XOR,
506    LSUOpType.amoand_w  -> M_XA_AND,
507    LSUOpType.amoor_w   -> M_XA_OR,
508    LSUOpType.amomin_w  -> M_XA_MIN,
509    LSUOpType.amomax_w  -> M_XA_MAX,
510    LSUOpType.amominu_w -> M_XA_MINU,
511    LSUOpType.amomaxu_w -> M_XA_MAXU,
512    LSUOpType.amocas_w  -> M_XA_CASW,
513
514    LSUOpType.lr_d      -> M_XLR,
515    LSUOpType.sc_d      -> M_XSC,
516    LSUOpType.amoswap_d -> M_XA_SWAP,
517    LSUOpType.amoadd_d  -> M_XA_ADD,
518    LSUOpType.amoxor_d  -> M_XA_XOR,
519    LSUOpType.amoand_d  -> M_XA_AND,
520    LSUOpType.amoor_d   -> M_XA_OR,
521    LSUOpType.amomin_d  -> M_XA_MIN,
522    LSUOpType.amomax_d  -> M_XA_MAX,
523    LSUOpType.amominu_d -> M_XA_MINU,
524    LSUOpType.amomaxu_d -> M_XA_MAXU,
525    LSUOpType.amocas_d  -> M_XA_CASD,
526
527    LSUOpType.amocas_q  -> M_XA_CASQ
528  ))
529  pipe_req.miss := false.B
530  pipe_req.probe := false.B
531  pipe_req.probe_need_data := false.B
532  pipe_req.source := AMO_SOURCE.U
533  pipe_req.addr   := get_block_addr(paddr)
534  pipe_req.vaddr  := get_block_addr(vaddr)
535  pipe_req.word_idx  := get_word(paddr)
536  pipe_req.amo_data := genWdataAMO(rs2, uop.fuOpType)
537  pipe_req.amo_mask := genWmaskAMO(paddr, uop.fuOpType)
538  pipe_req.amo_cmp  := genWdataAMO(rd, uop.fuOpType)
539
540  if (env.EnableDifftest) {
541    val difftest = DifftestModule(new DiffAtomicEvent)
542    val en = io.dcache.req.fire
543    difftest.coreid := io.hartId
544    difftest.valid  := state === s_cache_resp_latch
545    difftest.addr   := RegEnable(paddr, en)
546    difftest.data   := RegEnable(io.dcache.req.bits.amo_data.asTypeOf(difftest.data), en)
547    difftest.mask   := RegEnable(io.dcache.req.bits.amo_mask, en)
548    difftest.cmp    := RegEnable(io.dcache.req.bits.amo_cmp.asTypeOf(difftest.cmp), en)
549    difftest.fuop   := RegEnable(uop.fuOpType, en)
550    difftest.out    := resp_data_wire.asTypeOf(difftest.out)
551  }
552
553  if (env.EnableDifftest || env.AlwaysBasicDiff) {
554    val uop = io.out.bits.uop
555    val difftest = DifftestModule(new DiffLrScEvent)
556    difftest.coreid := io.hartId
557    difftest.valid := io.out.fire && state === s_finish && isSc
558    difftest.success := success
559  }
560}
561