xref: /XiangShan/src/main/scala/xiangshan/mem/lsqueue/StoreMisalignBuffer.scala (revision 4a02bbda53e20fbaf49032cf1deb7376a2aeefe5)
1/***************************************************************************************
2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3* Copyright (c) 2020-2021 Peng Cheng Laboratory
4*
5* XiangShan is licensed under Mulan PSL v2.
6* You can use this software according to the terms and conditions of the Mulan PSL v2.
7* You may obtain a copy of Mulan PSL v2 at:
8*          http://license.coscl.org.cn/MulanPSL2
9*
10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13*
14* See the Mulan PSL v2 for more details.
15***************************************************************************************/
16
17package xiangshan.mem
18
19import org.chipsalliance.cde.config.Parameters
20import chisel3._
21import chisel3.util._
22import utils._
23import utility._
24import xiangshan._
25import xiangshan.ExceptionNO._
26import xiangshan.frontend.FtqPtr
27import xiangshan.backend.fu.FuConfig._
28import xiangshan.backend.fu.fpu.FPU
29import xiangshan.backend.rob.RobLsqIO
30import xiangshan.backend.rob.RobPtr
31import xiangshan.backend.Bundles._
32import xiangshan.backend.fu.FuConfig.StaCfg
33import xiangshan.backend.fu.FuType.isVStore
34import xiangshan.mem.Bundles._
35import xiangshan.cache._
36import xiangshan.cache.wpu.ReplayCarry
37
38class StoreMisalignBuffer(implicit p: Parameters) extends XSModule
39  with HasCircularQueuePtrHelper
40{
41  private val enqPortNum = StorePipelineWidth
42  private val maxSplitNum = 2
43
44  require(maxSplitNum == 2)
45
46  private val SB = "b00".U(2.W)
47  private val SH = "b01".U(2.W)
48  private val SW = "b10".U(2.W)
49  private val SD = "b11".U(2.W)
50
51  // encode of how many bytes to shift or truncate
52  private val BYTE0 = "b000".U(3.W)
53  private val BYTE1 = "b001".U(3.W)
54  private val BYTE2 = "b010".U(3.W)
55  private val BYTE3 = "b011".U(3.W)
56  private val BYTE4 = "b100".U(3.W)
57  private val BYTE5 = "b101".U(3.W)
58  private val BYTE6 = "b110".U(3.W)
59  private val BYTE7 = "b111".U(3.W)
60
61  def getMask(sizeEncode: UInt) = LookupTree(sizeEncode, List(
62    SB -> 0x1.U,
63    SH -> 0x3.U,
64    SW -> 0xf.U,
65    SD -> 0xff.U
66  ))
67
68  def selectOldest[T <: LsPipelineBundle](valid: Seq[Bool], bits: Seq[T], index: Seq[UInt]): (Seq[Bool], Seq[T], Seq[UInt]) = {
69    assert(valid.length == bits.length)
70    if (valid.length == 0 || valid.length == 1) {
71      (valid, bits, index)
72    } else if (valid.length == 2) {
73      val res = Seq.fill(2)(Wire(ValidIO(chiselTypeOf(bits(0)))))
74      val resIndex = Seq.fill(2)(Wire(chiselTypeOf(index(0))))
75      for (i <- res.indices) {
76        res(i).valid := valid(i)
77        res(i).bits := bits(i)
78        resIndex(i) := index(i)
79      }
80      val oldest = Mux(valid(0) && valid(1),
81        Mux(isAfter(bits(0).uop.robIdx, bits(1).uop.robIdx) ||
82          (isNotBefore(bits(0).uop.robIdx, bits(1).uop.robIdx) && bits(0).uop.uopIdx > bits(1).uop.uopIdx), res(1), res(0)),
83        Mux(valid(0) && !valid(1), res(0), res(1)))
84
85      val oldestIndex = Mux(valid(0) && valid(1),
86        Mux(isAfter(bits(0).uop.robIdx, bits(1).uop.robIdx) ||
87          (bits(0).uop.robIdx === bits(1).uop.robIdx && bits(0).uop.uopIdx > bits(1).uop.uopIdx), resIndex(1), resIndex(0)),
88        Mux(valid(0) && !valid(1), resIndex(0), resIndex(1)))
89      (Seq(oldest.valid), Seq(oldest.bits), Seq(oldestIndex))
90    } else {
91      val left = selectOldest(valid.take(valid.length / 2), bits.take(bits.length / 2), index.take(index.length / 2))
92      val right = selectOldest(valid.takeRight(valid.length - (valid.length / 2)), bits.takeRight(bits.length - (bits.length / 2)), index.takeRight(index.length - (index.length / 2)))
93      selectOldest(left._1 ++ right._1, left._2 ++ right._2, left._3 ++ right._3)
94    }
95  }
96
97  val io = IO(new Bundle() {
98    val redirect        = Flipped(Valid(new Redirect))
99    val enq             = Vec(enqPortNum, Flipped(new MisalignBufferEnqIO))
100    val rob             = Flipped(new RobLsqIO)
101    val splitStoreReq   = Decoupled(new LsPipelineBundle)
102    val splitStoreResp  = Flipped(Valid(new SqWriteBundle))
103    val writeBack       = Decoupled(new MemExuOutput)
104    val vecWriteBack    = Vec(VecStorePipelineWidth, Decoupled(new VecPipelineFeedbackIO(isVStore = true)))
105    val storeOutValid    = Input(Bool())
106    val storeVecOutValid = Input(Bool())
107    val overwriteExpBuf = Output(new XSBundle {
108      val valid = Bool()
109      val vaddr = UInt(XLEN.W)
110      val isHyper = Bool()
111      val gpaddr = UInt(XLEN.W)
112      val isForVSnonLeafPTE = Bool()
113    })
114    val sqControl       = new StoreMaBufToSqControlIO
115
116    val toVecStoreMergeBuffer = Vec(VecStorePipelineWidth, new StoreMaBufToVecStoreMergeBufferIO)
117    val full = Bool()
118  })
119
120  io.rob.mmio := 0.U.asTypeOf(Vec(LoadPipelineWidth, Bool()))
121  io.rob.uop  := 0.U.asTypeOf(Vec(LoadPipelineWidth, new DynInst))
122
123  class StoreMisalignBufferEntry(implicit p: Parameters) extends LsPipelineBundle {
124    val portIndex = UInt(log2Up(enqPortNum).W)
125  }
126  val req_valid = RegInit(false.B)
127  val req = Reg(new StoreMisalignBufferEntry)
128
129  val cross4KBPageBoundary = Wire(Bool())
130  val needFlushPipe = RegInit(false.B)
131
132  // buffer control:
133  //  - s_idle:  Idle
134  //  - s_split: Split miss-aligned store into aligned stores
135  //  - s_req:   Send split store to sta and get result from sta
136  //  - s_resp:  Responds to a split store access request
137  //  - s_wb:    writeback yo rob/vecMergeBuffer
138  //  - s_block: Wait for this instr to reach the head of Rob.
139  val s_idle :: s_split :: s_req :: s_resp :: s_wb :: s_block :: Nil = Enum(6)
140  val bufferState    = RegInit(s_idle)
141
142  // enqueue
143  // s1:
144  val s1_req = VecInit(io.enq.map(_.req.bits))
145  val s1_valid = VecInit(io.enq.map(x => x.req.valid))
146
147  val s1_index = (0 until io.enq.length).map(_.asUInt)
148  val reqSel = selectOldest(s1_valid, s1_req, s1_index)
149
150  val reqSelValid = reqSel._1(0)
151  val reqSelBits  = reqSel._2(0)
152  val reqSelPort  = reqSel._3(0)
153
154  val reqRedirect = reqSelBits.uop.robIdx.needFlush(io.redirect)
155
156  val canEnq = !req_valid && !reqRedirect && reqSelValid
157  val robMatch = req_valid && io.rob.pendingst && (io.rob.pendingPtr === req.uop.robIdx)
158
159  val s2_canEnq = GatedRegNext(canEnq)
160  val s2_reqSelPort = GatedRegNext(reqSelPort)
161  val misalign_can_split = Wire(Bool())
162  misalign_can_split := Mux(s2_canEnq, (0 until enqPortNum).map {
163    case i => !io.enq(i).revoke && s2_reqSelPort === i.U
164  }.reduce(_|_), GatedRegNext(misalign_can_split))
165
166  when(canEnq) {
167    connectSamePort(req, reqSelBits)
168    req.portIndex := reqSelPort
169    req_valid := true.B
170  }
171  val cross4KBPageEnq = WireInit(false.B)
172  when (cross4KBPageBoundary && !reqRedirect) {
173    when(
174      reqSelValid &&
175      (isAfter(req.uop.robIdx, reqSelBits.uop.robIdx) || (isNotBefore(req.uop.robIdx, reqSelBits.uop.robIdx) && req.uop.uopIdx > reqSelBits.uop.uopIdx)) &&
176      bufferState === s_idle
177    ) {
178      connectSamePort(req, reqSelBits)
179      req.portIndex := reqSelPort
180      cross4KBPageEnq := true.B
181      needFlushPipe   := true.B
182    } .otherwise {
183      req := req
184      cross4KBPageEnq := false.B
185    }
186  }
187
188  val reqSelCanEnq = UIntToOH(reqSelPort)
189
190  io.enq.zipWithIndex.map{
191    case (reqPort, index) => reqPort.req.ready := reqSelCanEnq(index) && (!req_valid || cross4KBPageBoundary && cross4KBPageEnq)
192  }
193
194  io.toVecStoreMergeBuffer.zipWithIndex.map{
195    case (toStMB, index) => {
196      toStMB.flush   := req_valid && cross4KBPageBoundary && cross4KBPageEnq && UIntToOH(req.portIndex)(index)
197      toStMB.mbIndex := req.mbIndex
198    }
199  }
200  io.full := req_valid
201
202  //logic
203  val splitStoreReqs = RegInit(VecInit(List.fill(maxSplitNum)(0.U.asTypeOf(new LsPipelineBundle))))
204  val splitStoreResp = RegInit(VecInit(List.fill(maxSplitNum)(0.U.asTypeOf(new SqWriteBundle))))
205  val isCrossPage    = RegInit(false.B)
206  val exceptionVec   = RegInit(0.U.asTypeOf(ExceptionVec()))
207  val unSentStores   = RegInit(0.U(maxSplitNum.W))
208  val unWriteStores  = RegInit(0.U(maxSplitNum.W))
209  val curPtr = RegInit(0.U(log2Ceil(maxSplitNum).W))
210
211  // if there is exception or mmio in split store
212  val globalException = RegInit(false.B)
213  val globalUncache = RegInit(false.B)
214
215  // debug info
216  val globalMMIO = RegInit(false.B)
217  val globalNC   = RegInit(false.B)
218
219  val hasException = io.splitStoreResp.bits.vecActive && !io.splitStoreResp.bits.need_rep &&
220    ExceptionNO.selectByFu(io.splitStoreResp.bits.uop.exceptionVec, StaCfg).asUInt.orR || TriggerAction.isDmode(io.splitStoreResp.bits.uop.trigger)
221  val isUncache = (io.splitStoreResp.bits.mmio || io.splitStoreResp.bits.nc) && !io.splitStoreResp.bits.need_rep
222
223  io.sqControl.toStoreQueue.crossPageWithHit := io.sqControl.toStoreMisalignBuffer.sqPtr === req.uop.sqIdx && isCrossPage
224  io.sqControl.toStoreQueue.crossPageCanDeq := !isCrossPage || bufferState === s_block
225  io.sqControl.toStoreQueue.paddr := Cat(splitStoreResp(1).paddr(splitStoreResp(1).paddr.getWidth - 1, 3), 0.U(3.W))
226
227  io.sqControl.toStoreQueue.withSameUop := io.sqControl.toStoreMisalignBuffer.uop.robIdx === req.uop.robIdx && io.sqControl.toStoreMisalignBuffer.uop.uopIdx === req.uop.uopIdx && req.isvec && robMatch && isCrossPage
228
229  //state transition
230  switch(bufferState) {
231    is (s_idle) {
232      when(cross4KBPageBoundary && misalign_can_split) {
233        when(robMatch) {
234          bufferState := s_split
235          isCrossPage := true.B
236        }
237      } .otherwise {
238        when (req_valid && misalign_can_split) {
239          bufferState := s_split
240          isCrossPage := false.B
241        }
242      }
243    }
244
245    is (s_split) {
246      bufferState := s_req
247    }
248
249    is (s_req) {
250      when (io.splitStoreReq.fire) {
251        bufferState := s_resp
252      }
253    }
254
255    is (s_resp) {
256      val needDelay = WireInit(false.B)
257
258      when (io.splitStoreResp.valid) {
259        val clearOh = UIntToOH(curPtr)
260        when (hasException || isUncache) {
261          // commit directly when exception ocurs
262          // if any split store reaches mmio space, delegate to software storeAddrMisaligned exception
263          bufferState := s_wb
264          globalException := hasException
265          globalUncache := isUncache
266          globalMMIO := io.splitStoreResp.bits.mmio
267          globalNC   := io.splitStoreResp.bits.nc
268        } .elsewhen(io.splitStoreResp.bits.need_rep || (unSentStores & (~clearOh).asUInt).orR) {
269          // need replay or still has unsent requests
270          bufferState := s_req
271        } .otherwise {
272          // got result, goto calculate data and control sq.
273          // Wait a beat to get  misalign writeback aligned raw rollback.
274          needDelay := true.B
275          bufferState := s_resp
276        }
277      }
278
279      when (RegNextN(needDelay, RAWTotalDelayCycles)) {
280        bufferState := s_wb
281      }
282    }
283
284    is (s_wb) {
285      when (req.isvec) {
286        when (io.vecWriteBack.map(x => x.fire).reduce( _ || _)) {
287          bufferState := s_idle
288          req_valid := false.B
289          curPtr := 0.U
290          unSentStores := 0.U
291          unWriteStores := 0.U
292          globalException := false.B
293          globalUncache := false.B
294          isCrossPage := false.B
295          needFlushPipe := false.B
296
297          globalMMIO := false.B
298          globalNC   := false.B
299        }
300
301      }.otherwise {
302        when (io.writeBack.fire && (!isCrossPage || globalUncache || globalException)) {
303          bufferState := s_idle
304          req_valid := false.B
305          curPtr := 0.U
306          unSentStores := 0.U
307          unWriteStores := 0.U
308          globalException := false.B
309          globalUncache := false.B
310          isCrossPage := false.B
311          needFlushPipe := false.B
312
313          globalMMIO := false.B
314          globalNC   := false.B
315        } .elsewhen(io.writeBack.fire && isCrossPage) {
316          bufferState := s_block
317        } .otherwise {
318          bufferState := s_wb
319        }
320
321      }
322    }
323
324    is (s_block) {
325      when (io.sqControl.toStoreMisalignBuffer.doDeq) {
326        bufferState := s_idle
327        req_valid := false.B
328        curPtr := 0.U
329        unSentStores := 0.U
330        unWriteStores := 0.U
331        globalException := false.B
332        globalUncache := false.B
333        isCrossPage := false.B
334        needFlushPipe := false.B
335
336        globalMMIO := false.B
337        globalNC   := false.B
338      }
339    }
340  }
341
342  val alignedType = Mux(req.isvec, req.alignedType(1,0), req.uop.fuOpType(1, 0))
343
344  val highAddress = LookupTree(alignedType, List(
345    SB -> 0.U,
346    SH -> 1.U,
347    SW -> 3.U,
348    SD -> 7.U
349  )) + req.vaddr(4, 0)
350
351  val highPageAddress = LookupTree(alignedType, List(
352    SB -> 0.U,
353    SH -> 1.U,
354    SW -> 3.U,
355    SD -> 7.U
356  )) + req.vaddr(12, 0)
357  // to see if (vaddr + opSize - 1) and vaddr are in the same 16 bytes region
358  val cross16BytesBoundary = req_valid && (highAddress(4) =/= req.vaddr(4))
359  cross4KBPageBoundary := req_valid && (highPageAddress(12) =/= req.vaddr(12))
360  val aligned16BytesAddr   = (req.vaddr >> 4) << 4// req.vaddr & ~("b1111".U)
361  val aligned16BytesSel    = req.vaddr(3, 0)
362
363  // meta of 128 bit store
364  val new128Store = WireInit(0.U.asTypeOf(new LsPipelineBundle))
365  // meta of split loads
366  val lowAddrStore  = WireInit(0.U.asTypeOf(new LsPipelineBundle))
367  val highAddrStore = WireInit(0.U.asTypeOf(new LsPipelineBundle))
368  // final lowResult = Cat(`lowResultWidth` of store data, 0.U(make it to fill total length of Vlen))
369  val lowResultWidth = RegInit(0.U(3.W)) // how many bytes should we take from the store data
370  // final highResult = Zero extend to Vlen(`highResultWidth` of (store data >> lowResultWidth))
371  val highResultWidth = RegInit(0.U(3.W)) // how many bytes should we take from the store data
372
373  when (bufferState === s_split) {
374    when (!cross16BytesBoundary) {
375      assert(false.B, s"There should be no non-aligned access that does not cross 16Byte boundaries.")
376    } .otherwise {
377      // split this unaligned store into `maxSplitNum` aligned stores
378      unWriteStores := Fill(maxSplitNum, 1.U(1.W))
379      unSentStores := Fill(maxSplitNum, 1.U(1.W))
380      curPtr := 0.U
381      lowAddrStore.uop := req.uop
382      lowAddrStore.uop.exceptionVec(storeAddrMisaligned) := false.B
383      highAddrStore.uop := req.uop
384      highAddrStore.uop.exceptionVec(storeAddrMisaligned) := false.B
385
386      switch (alignedType(1, 0)) {
387        is (SB) {
388          assert(false.B, "lb should not trigger miss align")
389        }
390
391        is (SH) {
392          lowAddrStore.uop.fuOpType := SB
393          lowAddrStore.vaddr := req.vaddr
394          lowAddrStore.mask  := 0x1.U << lowAddrStore.vaddr(3, 0)
395          lowResultWidth    := BYTE1
396
397          highAddrStore.uop.fuOpType := SB
398          highAddrStore.vaddr := req.vaddr + 1.U
399          highAddrStore.mask  := 0x1.U << highAddrStore.vaddr(3, 0)
400          highResultWidth    := BYTE1
401        }
402
403        is (SW) {
404          switch (req.vaddr(1, 0)) {
405            is ("b00".U) {
406              assert(false.B, "should not trigger miss align")
407            }
408
409            is ("b01".U) {
410              lowAddrStore.uop.fuOpType := SW
411              lowAddrStore.vaddr := req.vaddr - 1.U
412              lowAddrStore.mask  := 0xf.U << lowAddrStore.vaddr(3, 0)
413              lowResultWidth    := BYTE3
414
415              highAddrStore.uop.fuOpType := SB
416              highAddrStore.vaddr := req.vaddr + 3.U
417              highAddrStore.mask  := 0x1.U << highAddrStore.vaddr(3, 0)
418              highResultWidth    := BYTE1
419            }
420
421            is ("b10".U) {
422              lowAddrStore.uop.fuOpType := SH
423              lowAddrStore.vaddr := req.vaddr
424              lowAddrStore.mask  := 0x3.U << lowAddrStore.vaddr(3, 0)
425              lowResultWidth    := BYTE2
426
427              highAddrStore.uop.fuOpType := SH
428              highAddrStore.vaddr := req.vaddr + 2.U
429              highAddrStore.mask  := 0x3.U << highAddrStore.vaddr(3, 0)
430              highResultWidth    := BYTE2
431            }
432
433            is ("b11".U) {
434              lowAddrStore.uop.fuOpType := SB
435              lowAddrStore.vaddr := req.vaddr
436              lowAddrStore.mask  := 0x1.U << lowAddrStore.vaddr(3, 0)
437              lowResultWidth    := BYTE1
438
439              highAddrStore.uop.fuOpType := SW
440              highAddrStore.vaddr := req.vaddr + 1.U
441              highAddrStore.mask  := 0xf.U << highAddrStore.vaddr(3, 0)
442              highResultWidth    := BYTE3
443            }
444          }
445        }
446
447        is (SD) {
448          switch (req.vaddr(2, 0)) {
449            is ("b000".U) {
450              assert(false.B, "should not trigger miss align")
451            }
452
453            is ("b001".U) {
454              lowAddrStore.uop.fuOpType := SD
455              lowAddrStore.vaddr := req.vaddr - 1.U
456              lowAddrStore.mask  := 0xff.U << lowAddrStore.vaddr(3, 0)
457              lowResultWidth    := BYTE7
458
459              highAddrStore.uop.fuOpType := SB
460              highAddrStore.vaddr := req.vaddr + 7.U
461              highAddrStore.mask  := 0x1.U << highAddrStore.vaddr(3, 0)
462              highResultWidth    := BYTE1
463            }
464
465            is ("b010".U) {
466              lowAddrStore.uop.fuOpType := SD
467              lowAddrStore.vaddr := req.vaddr - 2.U
468              lowAddrStore.mask  := 0xff.U << lowAddrStore.vaddr(3, 0)
469              lowResultWidth    := BYTE6
470
471              highAddrStore.uop.fuOpType := SH
472              highAddrStore.vaddr := req.vaddr + 6.U
473              highAddrStore.mask  := 0x3.U << highAddrStore.vaddr(3, 0)
474              highResultWidth    := BYTE2
475            }
476
477            is ("b011".U) {
478              lowAddrStore.uop.fuOpType := SD
479              lowAddrStore.vaddr := req.vaddr - 3.U
480              lowAddrStore.mask  := 0xff.U << lowAddrStore.vaddr(3, 0)
481              lowResultWidth    := BYTE5
482
483              highAddrStore.uop.fuOpType := SW
484              highAddrStore.vaddr := req.vaddr + 5.U
485              highAddrStore.mask  := 0xf.U << highAddrStore.vaddr(3, 0)
486              highResultWidth    := BYTE3
487            }
488
489            is ("b100".U) {
490              lowAddrStore.uop.fuOpType := SW
491              lowAddrStore.vaddr := req.vaddr
492              lowAddrStore.mask  := 0xf.U << lowAddrStore.vaddr(3, 0)
493              lowResultWidth    := BYTE4
494
495              highAddrStore.uop.fuOpType := SW
496              highAddrStore.vaddr := req.vaddr + 4.U
497              highAddrStore.mask  := 0xf.U << highAddrStore.vaddr(3, 0)
498              highResultWidth    := BYTE4
499            }
500
501            is ("b101".U) {
502              lowAddrStore.uop.fuOpType := SD
503              lowAddrStore.vaddr := req.vaddr - 5.U
504              lowAddrStore.mask  := 0xff.U << lowAddrStore.vaddr(3, 0)
505              lowResultWidth    := BYTE3
506
507              highAddrStore.uop.fuOpType := SD
508              highAddrStore.vaddr := req.vaddr + 3.U
509              highAddrStore.mask  := 0xff.U << highAddrStore.vaddr(3, 0)
510              highResultWidth    := BYTE5
511            }
512
513            is ("b110".U) {
514              lowAddrStore.uop.fuOpType := SD
515              lowAddrStore.vaddr := req.vaddr - 6.U
516              lowAddrStore.mask  := 0xff.U << lowAddrStore.vaddr(3, 0)
517              lowResultWidth    := BYTE2
518
519              highAddrStore.uop.fuOpType := SD
520              highAddrStore.vaddr := req.vaddr + 2.U
521              highAddrStore.mask  := 0xff.U << highAddrStore.vaddr(3, 0)
522              highResultWidth    := BYTE6
523            }
524
525            is ("b111".U) {
526              lowAddrStore.uop.fuOpType := SD
527              lowAddrStore.vaddr := req.vaddr - 7.U
528              lowAddrStore.mask  := 0xff.U << lowAddrStore.vaddr(3, 0)
529              lowResultWidth    := BYTE1
530
531              highAddrStore.uop.fuOpType := SD
532              highAddrStore.vaddr := req.vaddr + 1.U
533              highAddrStore.mask  := 0xff.U << highAddrStore.vaddr(3, 0)
534              highResultWidth    := BYTE7
535            }
536          }
537        }
538      }
539
540      splitStoreReqs(0) := lowAddrStore
541      splitStoreReqs(1) := highAddrStore
542    }
543  }
544
545  io.splitStoreReq.valid := req_valid && (bufferState === s_req)
546  io.splitStoreReq.bits  := splitStoreReqs(curPtr)
547  io.splitStoreReq.bits.isvec  := req.isvec
548  // Restore the information of H extension store
549  // bit encoding: | hsv 1 | store 00 | size(2bit) |
550  val reqIsHsv  = LSUOpType.isHsv(req.uop.fuOpType)
551  io.splitStoreReq.bits.uop.fuOpType := Mux(req.isvec, req.uop.fuOpType, Cat(reqIsHsv, 0.U(2.W), splitStoreReqs(curPtr).uop.fuOpType(1, 0)))
552  io.splitStoreReq.bits.alignedType  := Mux(req.isvec, splitStoreReqs(curPtr).uop.fuOpType(1, 0), req.alignedType)
553  io.splitStoreReq.bits.isFinalSplit := curPtr(0)
554
555  when (io.splitStoreResp.valid) {
556    val resp = io.splitStoreResp.bits
557    splitStoreResp(curPtr) := io.splitStoreResp.bits
558    when (isUncache) {
559      unWriteStores := 0.U
560      unSentStores := 0.U
561      exceptionVec := ExceptionNO.selectByFu(0.U.asTypeOf(exceptionVec.cloneType), StaCfg)
562      // delegate to software
563      exceptionVec(storeAddrMisaligned) := true.B
564    } .elsewhen (hasException) {
565      unWriteStores := 0.U
566      unSentStores := 0.U
567      StaCfg.exceptionOut.map(no => exceptionVec(no) := exceptionVec(no) || resp.uop.exceptionVec(no))
568    } .elsewhen (!io.splitStoreResp.bits.need_rep) {
569      unSentStores := unSentStores & (~UIntToOH(curPtr)).asUInt
570      curPtr := curPtr + 1.U
571      exceptionVec := 0.U.asTypeOf(ExceptionVec())
572    }
573  }
574
575  val splitStoreData = RegInit(VecInit(List.fill(maxSplitNum)(0.U.asTypeOf(new XSBundle {
576    val wdata = UInt(VLEN.W)
577    val wmask = UInt((VLEN / 8).W)
578  }))))
579
580  val wmaskLow  = Wire(Vec(VLEN / 8, Bool()))
581  val wmaskHigh = Wire(Vec(VLEN / 8, Bool()))
582  (0 until (VLEN / 8)).map {
583    case i  => {
584      when (i.U < highResultWidth) {
585        wmaskHigh(i) := true.B
586      } .otherwise {
587        wmaskHigh(i) := false.B
588      }
589      when (i.U < lowResultWidth) {
590        wmaskLow(i) := true.B
591      } .otherwise {
592        wmaskLow(i) := false.B
593      }
594    }
595  }
596
597  io.writeBack.valid := req_valid && (bufferState === s_wb) && !io.storeOutValid && !req.isvec
598  io.writeBack.bits.uop := req.uop
599  io.writeBack.bits.uop.exceptionVec := DontCare
600  StaCfg.exceptionOut.map(no => io.writeBack.bits.uop.exceptionVec(no) := (globalUncache || globalException) && exceptionVec(no))
601  io.writeBack.bits.uop.flushPipe := needFlushPipe
602  io.writeBack.bits.uop.replayInst := false.B
603  io.writeBack.bits.data := DontCare
604  io.writeBack.bits.isFromLoadUnit := DontCare
605  io.writeBack.bits.debug.isMMIO := globalMMIO
606  io.writeBack.bits.debug.isNC := globalNC
607  io.writeBack.bits.debug.isPerfCnt := false.B
608  io.writeBack.bits.debug.paddr := req.paddr
609  io.writeBack.bits.debug.vaddr := req.vaddr
610
611  io.vecWriteBack.zipWithIndex.map{
612    case (wb, index) => {
613      wb.valid := req_valid && (bufferState === s_wb) && req.isvec && !io.storeVecOutValid && UIntToOH(req.portIndex)(index)
614
615      wb.bits.mBIndex           := req.mbIndex
616      wb.bits.hit               := true.B
617      wb.bits.isvec             := true.B
618      wb.bits.sourceType        := RSFeedbackType.tlbMiss
619      wb.bits.flushState        := DontCare
620      wb.bits.trigger           := TriggerAction.None
621      wb.bits.mmio              := globalMMIO
622      wb.bits.exceptionVec      := ExceptionNO.selectByFu(exceptionVec, VstuCfg)
623      wb.bits.hasException      := globalException
624      wb.bits.usSecondInv       := req.usSecondInv
625      wb.bits.vecFeedback       := true.B
626      wb.bits.elemIdx           := req.elemIdx
627      wb.bits.alignedType       := req.alignedType
628      wb.bits.mask              := req.mask
629      wb.bits.vaddr             := req.vaddr
630      wb.bits.vaNeedExt         := req.vaNeedExt
631      wb.bits.gpaddr            := req.gpaddr
632      wb.bits.isForVSnonLeafPTE := req.isForVSnonLeafPTE
633      wb.bits.vstart            := req.uop.vpu.vstart
634      wb.bits.vecTriggerMask    := 0.U
635      wb.bits.nc                := globalNC
636    }
637  }
638
639  val flush = req_valid && req.uop.robIdx.needFlush(io.redirect)
640
641  when (flush) {
642    bufferState := s_idle
643    req_valid := Mux(cross4KBPageEnq && cross4KBPageBoundary && !reqRedirect, req_valid, false.B)
644    curPtr := 0.U
645    unSentStores := 0.U
646    unWriteStores := 0.U
647    globalException := false.B
648    globalUncache := false.B
649    isCrossPage := false.B
650    needFlushPipe := false.B
651
652    globalMMIO := false.B
653    globalNC   := false.B
654  }
655
656  // NOTE: spectial case (unaligned store cross page, page fault happens in next page)
657  // if exception happens in the higher page address part, overwrite the storeExceptionBuffer vaddr
658  val shouldOverwrite = req_valid && cross16BytesBoundary && globalException && (curPtr === 1.U)
659  val overwriteExpBuf = GatedValidRegNext(shouldOverwrite)
660  val overwriteVaddr = RegEnable(splitStoreResp(curPtr).vaddr, shouldOverwrite)
661  val overwriteIsHyper = RegEnable(splitStoreResp(curPtr).isHyper, shouldOverwrite)
662  val overwriteGpaddr = RegEnable(splitStoreResp(curPtr).gpaddr, shouldOverwrite)
663  val overwriteIsForVSnonLeafPTE = RegEnable(splitStoreResp(curPtr).isForVSnonLeafPTE, shouldOverwrite)
664
665  //TODO In theory, there is no need to overwrite, but for now, the signal is retained in the code in this way.
666  // and the signal will be removed after sufficient verification.
667  io.overwriteExpBuf.valid := false.B
668  io.overwriteExpBuf.vaddr := overwriteVaddr
669  io.overwriteExpBuf.isHyper := overwriteIsHyper
670  io.overwriteExpBuf.gpaddr := overwriteGpaddr
671  io.overwriteExpBuf.isForVSnonLeafPTE := overwriteIsForVSnonLeafPTE
672
673  XSPerfAccumulate("alloc",                  RegNext(!req_valid) && req_valid)
674  XSPerfAccumulate("flush",                  flush)
675  XSPerfAccumulate("flush_idle",             flush && (bufferState === s_idle))
676  XSPerfAccumulate("flush_non_idle",         flush && (bufferState =/= s_idle))
677}
678