xref: /XiangShan/src/main/scala/xiangshan/backend/VecExcpDataMergeModule.scala (revision 71c0165fa5c2925e0bf31c3cf0e308e3292f71e9)
1package xiangshan.backend
2
3import chisel3.util._
4import chisel3._
5import org.chipsalliance.cde.config.Parameters
6import utility._
7import xiangshan._
8import xiangshan.backend.fu.vector.Bundles._
9
10class VecExcpDataMergeModule(implicit p: Parameters) extends XSModule {
11  private val MaxLMUL = 8
12  private val VdIdxInGroupWidth = log2Ceil(MaxLMUL) // hold 0~7
13  private val minElemLen = 8 // 8 bits
14  private val maxElemNumPerVreg = VLEN / minElemLen
15  private val tailZeroBit = log2Ceil(maxElemNumPerVreg) // 16 -> 4
16
17  val i = IO(Input(new Bundle {
18    val fromExceptionGen = ValidIO(new VecExcpInfo)
19    val fromRab = new RabToVecExcpMod
20    val fromRat = new RatToVecExcpMod
21    val fromVprf = new VprfToExcpMod(maxMergeNumPerCycle * 2)
22  }))
23  val o = IO(Output(new Bundle {
24    val toVPRF = new ExcpModToVprf(maxMergeNumPerCycle * 2, maxMergeNumPerCycle)
25    val status = new Bundle {
26      val busy = Bool()
27    }
28  }))
29
30  private val oldPregVecFromRat: Vec[ValidIO[UInt]] = Wire(Vec(RabCommitWidth, ValidIO(UInt(VfPhyRegIdxWidth.W))))
31  oldPregVecFromRat.zipWithIndex.foreach { case (oldPreg: ValidIO[UInt], idx) =>
32    val vecOldVd = i.fromRat.vecOldVdPdest(idx)
33    val v0OldVd  = i.fromRat.v0OldVdPdest(idx)
34    oldPreg.valid := (vecOldVd.valid || v0OldVd.valid)
35    oldPreg.bits := Mux1H(Seq(
36      vecOldVd.valid -> vecOldVd.bits,
37      v0OldVd.valid -> v0OldVd.bits,
38    ))
39  }
40
41  private val lregNewPregVecFromRab = WireInit(i.fromRab.logicPhyRegMap)
42
43  private val preMergedOldVd = WireInit(VecInit(i.fromVprf.rdata.take(maxMergeNumPerCycle).map(_.bits.asTypeOf(new VecElemData(VLEN)))))
44  private val preMergedNewVd = WireInit(VecInit(i.fromVprf.rdata.drop(maxMergeNumPerCycle).map(_.bits.asTypeOf(new VecElemData(VLEN)))))
45  private val preMoveOldVd   = WireInit(VecInit(i.fromVprf.rdata.map(_.bits.asTypeOf(new VecElemData(VLEN)))))
46
47  private val sNoExcp_vecExcpInfo = WireInit(i.fromExceptionGen)
48  private val sNoExcp_vemul = sNoExcp_vecExcpInfo.bits.vlmul + sNoExcp_vecExcpInfo.bits.veew - sNoExcp_vecExcpInfo.bits.vsew
49  // data vemul
50  private val sNoExcp_dvemul = Mux(
51    sNoExcp_vecExcpInfo.bits.isIndexed,
52    sNoExcp_vecExcpInfo.bits.vlmul,
53    sNoExcp_vemul,
54  )
55  // index vemul
56  private val sNoExcp_ivemul = WireInit(VLmul(), sNoExcp_vemul)
57  dontTouch(sNoExcp_vemul)
58  dontTouch(sNoExcp_dvemul)
59  dontTouch(sNoExcp_ivemul)
60  private val sNoExcp_dvemulNoLessThanM1 = VLmul.makeNoLessThanM1(sNoExcp_dvemul).take(2)
61  private val sNoExcp_ivemulNoLessThanM1 = VLmul.makeNoLessThanM1(sNoExcp_ivemul).take(2)
62
63  // if ivemul - dvemul = idx
64  private val sNoExcp_vemul_i_d = VecInit.tabulate(4)(idx =>
65    sNoExcp_ivemulNoLessThanM1 === (sNoExcp_dvemulNoLessThanM1 +& idx.U) ||
66    (idx == 0).B && (sNoExcp_ivemulNoLessThanM1 < sNoExcp_dvemulNoLessThanM1)
67  )
68  private val sNoExcp_nonSegIndexed = sNoExcp_vecExcpInfo.bits.isIndexed && sNoExcp_vecExcpInfo.bits.nf === 0.U
69
70  private val commitNeeded = RegInit(VecInit.fill(MaxLMUL)(false.B))
71  private val rabCommitted = RegInit(VecInit.fill(MaxLMUL)(false.B))
72  private val ratCommitted = RegInit(VecInit.fill(MaxLMUL)(false.B))
73  private val hasReadRf    = RegInit(VecInit.fill(MaxLMUL)(false.B))
74
75  private val regMaps = Reg(Vec(MaxLMUL, new LogicPhyRegMap))
76
77  private val currentIdx = RegInit(0.U(log2Up(8 + 1).W))
78  private val currentIdxVec = (0 until maxMergeNumPerCycle).map(idx => currentIdx + idx.U)
79
80  private val mergedVd = Reg(Vec(maxMergeNumPerCycle, new VecElemData(VLEN)))
81
82  private val sNoExcp_eewOH = SewOH.convertFromVSew(sNoExcp_vecExcpInfo.bits.veew)
83  private val sNoExcp_sewOH = SewOH.convertFromVSew(sNoExcp_vecExcpInfo.bits.vsew)
84  private val sNoExcp_deewOH = Mux(
85    sNoExcp_vecExcpInfo.bits.isIndexed,
86    sNoExcp_sewOH,
87    sNoExcp_eewOH,
88  )
89  private val sNoExcp_voffset = Module(new GetE8OffsetInVreg(VLEN))(sNoExcp_deewOH, sNoExcp_vecExcpInfo.bits.vstart)
90  private val sNoExcp_idxRangeVec: Vec[HWRange] =
91    Module(new NfMappedElemIdx(VLEN))(
92      Mux(!sNoExcp_vecExcpInfo.bits.isWhole, sNoExcp_vecExcpInfo.bits.nf, 0.U),
93      sNoExcp_deewOH
94    )
95  private val sNoExcp_vstartIsAligned: Bool = Mux(!sNoExcp_vecExcpInfo.bits.isVlm, sNoExcp_voffset === 0.U, false.B)
96
97  private val sNoExcp_inRangeVec: Vec[Bool] = VecInit((0 until 8).map(idx =>
98    if (idx == 0) {
99      sNoExcp_vecExcpInfo.bits.isVlm ||
100      sNoExcp_idxRangeVec(idx).inRange (sNoExcp_vecExcpInfo.bits.vstart)
101    } else {
102      !sNoExcp_vecExcpInfo.bits.isVlm &&
103      sNoExcp_idxRangeVec(idx).inRange (sNoExcp_vecExcpInfo.bits.vstart)
104    }
105  ))
106  // The last no exception vdIdx, hold 0~7.
107  // No need to hold 8, since if all vd are new, there is no exception occuration.
108  private val sNoExcp_useNewVdUntil: UInt = PriorityEncoder(sNoExcp_inRangeVec)
109  // The last exception vdIdx, hold 0~8.
110  // Need to hold 8.
111  private val sNoExcp_needMergeUntil: UInt = sNoExcp_useNewVdUntil +
112    Mux(!sNoExcp_vecExcpInfo.bits.isWhole, sNoExcp_vecExcpInfo.bits.nf, 0.U) +&
113    1.U
114  // the max vd idx need to write
115  private val sNoExcp_maxVdIdx = Mux(
116    sNoExcp_vecExcpInfo.valid,
117    MuxCase(
118      default = ((sNoExcp_vecExcpInfo.bits.nf +& 1.U) << sNoExcp_dvemulNoLessThanM1).asUInt,
119      Seq(
120        sNoExcp_vecExcpInfo.bits.isVlm -> 1.U,
121        sNoExcp_vecExcpInfo.bits.isWhole -> (sNoExcp_vecExcpInfo.bits.nf +& 1.U),
122      )
123    ),
124    0.U
125  )
126
127  private val sNoExcp_handleUntil = sNoExcp_maxVdIdx(3, 0) // [1, 8]
128  // strided vector load need 2 uop to move data, so skip these reg maps
129  private val sNoExcp_writeOffset = Mux(sNoExcp_vecExcpInfo.bits.isStride, 2.U, 1.U)
130
131  private val sWaitRab_vecExcpInfo     = RegNextWithEnable(sNoExcp_vecExcpInfo)
132
133  // At the beginning of waitRab,
134  // when not offset not aligned, currentIdx = useNewVdUntil <= needMergeUntil <= handleUntil
135  // otherwise, currentIdx = needMergeUntil <= handleUntil
136  private val sWaitRab_useNewVdUntil   = RegEnable(sNoExcp_useNewVdUntil, sNoExcp_vecExcpInfo.valid)
137  private val sWaitRab_needMergeUntil  = RegEnable(sNoExcp_needMergeUntil, sNoExcp_vecExcpInfo.valid)
138  private val sWaitRab_e8offset        = RegEnable(
139    Mux1H((0 until 4).map(idx => sNoExcp_deewOH(idx) -> ZeroExt(sNoExcp_voffset(tailZeroBit - 1, 0), tailZeroBit))),
140    sNoExcp_vecExcpInfo.valid
141  )
142  private val sWaitRab_idxRangeVec     = RegEnable(sNoExcp_idxRangeVec, sNoExcp_vecExcpInfo.valid)
143  private val sWaitRab_vstartIsAligned = RegEnable(sNoExcp_vstartIsAligned, sNoExcp_vecExcpInfo.valid)
144  private val sWaitRab_handleUntil     = RegEnable(sNoExcp_handleUntil, sNoExcp_vecExcpInfo.valid)
145
146  private val sWaitRab_nonSegIndexed   = RegEnable(sNoExcp_nonSegIndexed, sNoExcp_vecExcpInfo.valid)
147  private val sWaitRab_vemul_i_d       = RegEnable(sNoExcp_vemul_i_d, sNoExcp_vecExcpInfo.valid)
148  private val sWaitRab_dvemulNoLessThanM1 = RegEnable(sNoExcp_dvemulNoLessThanM1, sNoExcp_vecExcpInfo.valid)
149
150  private val sWaitRab_rabWriteOffset = Reg(UInt(4.W)) // [1,10]
151  private val sWaitRab_ratWriteOffset = Reg(UInt(4.W)) // [1,10]
152
153  // segShuffledRegIdxTable(nf)(dvemul)(vdIdx)
154  private val segShuffledRegIdxTable: Seq[Seq[Seq[Int]]] = Seq.tabulate(8, 4) {
155    case (nf, dvemul) =>
156      val nField = nf + 1     // 1~8
157      val dEMUL = 1 << dvemul // 1, 2, 4, 8
158      if (nField == 2 && dEMUL == 2) {
159        Seq(0, 2, 1, 3, 0, 0, 0, 0)
160      }
161      else if (nField == 2 && dEMUL == 4) {
162        Seq(0, 4, 1, 5, 2, 6, 3, 7)
163      }
164      else if (nField == 3 && dEMUL == 2) {
165        Seq(0, 2, 4, 1, 3, 5, 0, 0)
166      }
167      else if (nField == 4 && dEMUL == 2) {
168        Seq(0, 2, 4, 6, 1, 3, 5, 7)
169      }
170      else {
171        Seq(0, 1, 2, 3, 4, 5, 6, 7)
172      }
173  }
174  private val segRegTableHW: Vec[Vec[Vec[UInt]]] = WireInit(VecInit.tabulate(8, 4) {
175    case (nf, dvemul) => VecInit(segShuffledRegIdxTable(nf)(dvemul).map(_.U(VdIdxInGroupWidth.W)))
176  })
177
178  // when nonSegIndexed load, iemul/demul = 1 << 2, vdLoc will be mapped as (0, 1, 2, 3, ...) -> (0, 4, ...)
179  private val oldVdLocVec: Vec[UInt] = VecInit(currentIdxVec.map(idx =>
180    Mux(
181      sWaitRab_nonSegIndexed,
182      Mux1H(sWaitRab_vemul_i_d.zipWithIndex.map { case (i_d_n, ii) => i_d_n -> (idx << ii).asUInt }),
183      Mux(
184        sWaitRab_vecExcpInfo.bits.isWhole,
185        idx,
186        segRegTableHW(sWaitRab_vecExcpInfo.bits.nf)(sWaitRab_dvemulNoLessThanM1)(idx),
187      )
188    ).take(VdIdxInGroupWidth)
189  ))
190
191  // when nonSegIndexed load, iemul/demul = 1 << 2, vdLoc will be mapped as (0, 1, 2, 3, ...) -> (3, 7, ...)
192  private val newVdLocVec = VecInit(currentIdxVec.map(idx =>
193    Mux(
194      sWaitRab_nonSegIndexed,
195      Mux1H(sWaitRab_vemul_i_d.zipWithIndex.map { case (i_d_n, ii) => i_d_n -> ((idx << ii).asUInt | ((1 << ii) - 1).U) }),
196      Mux(
197        sWaitRab_vecExcpInfo.bits.isWhole,
198        idx,
199        segRegTableHW(sWaitRab_vecExcpInfo.bits.nf)(sWaitRab_dvemulNoLessThanM1)(idx),
200      )
201    ).take(VdIdxInGroupWidth)
202  ))
203
204  dontTouch(oldVdLocVec)
205  dontTouch(newVdLocVec)
206
207  private object State extends ChiselEnum {
208    val noExcp  = Value
209    val waitRab = Value
210    val mergeVd = Value
211    val mvOldVd = Value
212    val finish  = Value
213  }
214
215  private val state: State.Type = RegInit(State.noExcp)
216  private val stateNext = WireInit(state)
217  state := stateNext
218
219  private val collectedAllRegMap = Wire(Bool())
220  private val mergeFinished = currentIdx >= sWaitRab_needMergeUntil
221  private val mvFinished = currentIdx >= sWaitRab_handleUntil
222
223  // get lreg and new preg, the last mapped newPdest
224  private val filteredRabCommitedVec: Vec[Vec[Bool]] = WireInit(VecInit.tabulate(4, MaxLMUL) { case (i_d_n, vdIdx) =>
225    val vdLoc = ((vdIdx + 1) << i_d_n) - 1
226    rabCommitted(if (vdLoc >= MaxLMUL) 0 else vdLoc)
227  })
228  // get old preg, the first mapped oldPdest
229  private val filteredRatCommitedVec: Vec[Vec[Bool]] = WireInit(VecInit.tabulate(4, MaxLMUL) { case (i_d_n, vdIdx) =>
230    val vdLoc = vdIdx << i_d_n
231    ratCommitted(if (vdLoc >= MaxLMUL) 0 else vdLoc)
232  })
233
234  private val filteredRabCommited = Wire(Vec(MaxLMUL, Bool()))
235  private val filteredRatCommited = Wire(Vec(MaxLMUL, Bool()))
236  when (sWaitRab_nonSegIndexed) {
237    filteredRabCommited := Mux1H(sWaitRab_vemul_i_d, filteredRabCommitedVec)
238    filteredRatCommited := Mux1H(sWaitRab_vemul_i_d, filteredRatCommitedVec)
239  }.otherwise {
240    // No need to shuffle, since the vdIdx always compressed towards zero and left tail unused.
241    filteredRabCommited := rabCommitted
242    filteredRatCommited := ratCommitted
243  }
244
245  // 1. no need commit
246  // 2. need commit and both rab and rat committed
247  collectedAllRegMap := ((~commitNeeded.asUInt).asUInt | (commitNeeded.asUInt & filteredRabCommited.asUInt & filteredRatCommited.asUInt)).andR
248
249  switch(state) {
250    is(State.noExcp) {
251      when (i.fromExceptionGen.valid) {
252        stateNext := State.waitRab
253      }
254    }
255    is(State.waitRab) {
256      when (collectedAllRegMap) {
257        stateNext := State.mergeVd
258        currentIdx := sWaitRab_useNewVdUntil
259      }
260    }
261    is(State.mergeVd) {
262      when (mvFinished) {
263        stateNext := State.finish
264      }.elsewhen (mergeFinished) {
265        stateNext := State.mvOldVd
266      }
267      when(o.toVPRF.w.head.valid) {
268        currentIdx := currentIdx + PopCount(o.toVPRF.w.map(_.valid))
269      }
270    }
271    is(State.mvOldVd) {
272      when (mvFinished) {
273        stateNext := State.finish
274      }
275      when(o.toVPRF.w.head.valid) {
276        currentIdx := currentIdx + PopCount(o.toVPRF.w.map(_.valid))
277      }
278    }
279    is(State.finish) {
280      stateNext := State.noExcp
281      currentIdx := 0.U
282    }
283  }
284
285  private val regWriteFromRabVec: Vec[ValidIO[RegWriteFromRab]] = i.fromRab.logicPhyRegMap
286  private val regWriteFromRatVec: Vec[ValidIO[UInt]] = oldPregVecFromRat
287
288  val mergedVdWData: Vec[VecE8Vec] = Wire(Vec(maxMergeNumPerCycle, new VecE8Vec(VLEN)))
289  mergedVdWData.zipWithIndex.foreach { case (vd, vIdx) =>
290    vd.data.zipWithIndex.foreach { case (vde, eIdx) =>
291      vde := Mux(
292        state === State.mergeVd,
293        Mux(
294          eIdx.U >= sWaitRab_e8offset,
295          preMergedOldVd(vIdx).e8Vec(eIdx),
296          preMergedNewVd(vIdx).e8Vec(eIdx),
297        ),
298        preMoveOldVd(vIdx).e8Vec(eIdx),
299      )
300    }
301  }
302
303  private val hasRabWrite = regWriteFromRabVec.head.valid
304  private val hasRatWrite = regWriteFromRatVec.head.valid
305  require(
306    2 * RabCommitWidth >= (MaxLMUL + 2),
307    "Cannot receive all 10 reg maps from RAB and RAT in two cycles. " +
308      "This module should be rewrited to support more than 2 cycles receiving"
309  )
310
311  switch (state) {
312    is (State.noExcp) {
313      when (stateNext === State.waitRab) {
314        sWaitRab_rabWriteOffset := 0.U
315        sWaitRab_ratWriteOffset := 0.U
316        commitNeeded.zipWithIndex.foreach { case (needed, idx) =>
317          needed := sNoExcp_maxVdIdx > idx.U
318        }
319      }
320    }
321    is (State.waitRab) {
322      when (hasRabWrite) {
323        sWaitRab_rabWriteOffset := sWaitRab_rabWriteOffset +
324          PriorityMux((0 until RabCommitWidth).map(
325            idx => i.fromRab.logicPhyRegMap.reverse(idx).valid -> (6 - idx).U
326          ))
327      }
328      when (hasRatWrite) {
329        sWaitRab_ratWriteOffset := sWaitRab_ratWriteOffset +
330          PriorityMux((0 until RabCommitWidth).map(
331            idx => regWriteFromRatVec.reverse(idx).valid -> (6 - idx).U
332          ))
333      }
334
335      when(sWaitRab_rabWriteOffset === 0.U) {
336        // the first patch of RAB commit consider offset
337        when(sWaitRab_vecExcpInfo.bits.isStride) {
338          (2 until RabCommitWidth).map { idx =>
339            val vdIdx = idx - 2
340            when(regWriteFromRabVec(idx).valid) {
341              regMaps(vdIdx).lreg := regWriteFromRabVec(idx).bits.lreg
342              regMaps(vdIdx).newPreg := regWriteFromRabVec(idx).bits.preg
343              rabCommitted(vdIdx) := true.B
344            }
345          }
346        }.otherwise {
347          (1 until RabCommitWidth).map { idx =>
348            val vdIdx = idx - 1
349            when(regWriteFromRabVec(idx).valid) {
350              regMaps(vdIdx).lreg := regWriteFromRabVec(idx).bits.lreg
351              regMaps(vdIdx).newPreg := regWriteFromRabVec(idx).bits.preg
352              rabCommitted(vdIdx) := true.B
353            }
354          }
355        }
356      }.otherwise {
357        // the second patch of RAB/RAT commit need no offset
358        when(sWaitRab_vecExcpInfo.bits.isStride) {
359          (0 until (MaxLMUL + 2 - RabCommitWidth)).map { idx =>
360            val vdIdx = idx - 2 + RabCommitWidth
361            when(regWriteFromRabVec(idx).valid) {
362              regMaps(vdIdx).lreg := regWriteFromRabVec(idx).bits.lreg
363              regMaps(vdIdx).newPreg := regWriteFromRabVec(idx).bits.preg
364              rabCommitted(vdIdx) := true.B
365            }
366          }
367        }.otherwise {
368          (0 until MaxLMUL + 1 - RabCommitWidth).map { idx =>
369            val vdIdx = idx - 1 + RabCommitWidth
370            when(regWriteFromRabVec(idx).valid) {
371              regMaps(vdIdx).lreg := regWriteFromRabVec(idx).bits.lreg
372              regMaps(vdIdx).newPreg := regWriteFromRabVec(idx).bits.preg
373              rabCommitted(vdIdx) := true.B
374            }
375          }
376        }
377      }
378
379      when (sWaitRab_ratWriteOffset === 0.U) {
380        // the first patch of RAT commit consider offset
381        when(sWaitRab_vecExcpInfo.bits.isStride) {
382          (2 until RabCommitWidth).map { idx =>
383            val vdIdx = idx - 2
384            when(regWriteFromRatVec(idx).valid) {
385              regMaps(vdIdx).oldPreg := regWriteFromRatVec(idx).bits
386              ratCommitted(vdIdx) := true.B
387            }
388          }
389        }.otherwise {
390          (1 until RabCommitWidth).map { idx =>
391            val vdIdx = idx - 1
392            when(regWriteFromRatVec(idx).valid) {
393              regMaps(vdIdx).oldPreg := regWriteFromRatVec(idx).bits
394              ratCommitted(vdIdx) := true.B
395            }
396          }
397        }
398      }.otherwise {
399        // the second patch of RAT commit need no offset
400        when(sWaitRab_vecExcpInfo.bits.isStride) {
401          (0 until (MaxLMUL + 2 - RabCommitWidth)).map { idx =>
402            val vdIdx = idx - 2 + RabCommitWidth
403            when(regWriteFromRatVec(idx).valid) {
404              regMaps(vdIdx).oldPreg := regWriteFromRatVec(idx).bits
405              ratCommitted(vdIdx) := true.B
406            }
407          }
408        }.otherwise {
409          (0 until MaxLMUL + 1 - RabCommitWidth).map { idx =>
410            val vdIdx = idx - 1 + RabCommitWidth
411            when(regWriteFromRatVec(idx).valid) {
412              regMaps(vdIdx).oldPreg := regWriteFromRatVec(idx).bits
413              ratCommitted(vdIdx) := true.B
414            }
415          }
416        }
417      }
418    }
419    is (State.finish) {
420      commitNeeded.foreach(_ := false.B)
421      rabCommitted.foreach(_ := false.B)
422      ratCommitted.foreach(_ := false.B)
423      hasReadRf   .foreach(_ := false.B)
424      sWaitRab_rabWriteOffset := 0.U
425      sWaitRab_ratWriteOffset := 0.U
426      sWaitRab_vecExcpInfo.valid := false.B
427    }
428  }
429
430  switch (state) {
431    is (State.mergeVd, State.mvOldVd) {
432      (0 until maxMergeNumPerCycle).map(vIdx =>
433        when(i.fromVprf.rdata(vIdx).valid) {
434          mergedVd(vIdx) := mergedVdWData(vIdx).asTypeOf(new VecElemData(VLEN))
435        }
436      )
437    }
438  }
439
440  when (state === State.mergeVd) {
441    (0 until maxMergeNumPerCycle).foreach { case (idx) =>
442      val vdIdx = currentIdxVec(idx)
443      // when nonSegIndexed load, iemul/demul = 1 << 2, vdLoc will be mapped as (0, 1, 2, 3, ...) -> (0, 4, ...)
444      val oldVdLoc = oldVdLocVec(idx)
445      // when nonSegIndexed load, iemul/demul = 1 << 2, vdLoc will be mapped as (0, 1, 2, 3, ...) -> (3, 7, ...)
446      val newVdLoc = newVdLocVec(idx)
447      o.toVPRF.r(idx).valid := commitNeeded(vdIdx) && !hasReadRf(vdIdx) && vdIdx < sWaitRab_needMergeUntil
448      o.toVPRF.r(idx).bits.addr := regMaps(oldVdLoc).oldPreg
449      o.toVPRF.r(idx).bits.isV0 := (regMaps(oldVdLoc).lreg === 0.U) && (idx == 0).B
450      o.toVPRF.r(idx + maxMergeNumPerCycle).valid := commitNeeded(vdIdx) && !hasReadRf(vdIdx) && vdIdx < sWaitRab_needMergeUntil
451      o.toVPRF.r(idx + maxMergeNumPerCycle).bits.addr := regMaps(newVdLoc).newPreg
452      o.toVPRF.r(idx + maxMergeNumPerCycle).bits.isV0 := (regMaps(newVdLoc).lreg === 0.U) && (idx == 0).B
453      hasReadRf(vdIdx) := true.B && vdIdx < sWaitRab_needMergeUntil
454    }
455  }.elsewhen (state === State.mvOldVd) {
456    (0 until maxMergeNumPerCycle).foreach { case (idx) =>
457      val vdIdx = currentIdxVec(idx)
458      // when nonSegIndexed load, iemul/demul = 1 << 2, vdLoc will be mapped as (0, 1, 2, 3, ...) -> (0, 4, ...)
459      val oldVdLoc = oldVdLocVec(idx)
460      // when nonSegIndexed load, iemul/demul = 1 << 2, vdLoc will be mapped as (0, 1, 2, 3, ...) -> (3, 7, ...)
461      val newVdLoc = newVdLocVec(idx)
462      o.toVPRF.r(idx).valid := commitNeeded(vdIdx) && !hasReadRf(vdIdx) && vdIdx < sWaitRab_handleUntil
463      o.toVPRF.r(idx).bits.addr := regMaps(oldVdLoc).oldPreg
464      o.toVPRF.r(idx).bits.isV0 := (regMaps(oldVdLoc).lreg === 0.U) && (idx == 0).B
465      o.toVPRF.r(idx + maxMergeNumPerCycle).valid := 0.U
466      o.toVPRF.r(idx + maxMergeNumPerCycle).bits.addr := 0.U
467      o.toVPRF.r(idx + maxMergeNumPerCycle).bits.isV0 := false.B
468      hasReadRf(vdIdx) := true.B && vdIdx < sWaitRab_handleUntil
469    }
470  }.otherwise {
471    o.toVPRF.r := 0.U.asTypeOf(chiselTypeOf(o.toVPRF.r))
472  }
473
474  o.toVPRF.w.zipWithIndex.foreach { case (w, idx) =>
475    val vdIdx = currentIdxVec(idx)
476    // when nonSegIndexed load, iemul/demul = 1 << 2, vdLoc will be mapped as (0, 1, 2, 3, ...) -> (0, 4, ...)
477    val oldVdLoc = oldVdLocVec(idx)
478    // when nonSegIndexed load, iemul/demul = 1 << 2, vdLoc will be mapped as (0, 1, 2, 3, ...) -> (3, 7, ...)
479    val newVdLoc = newVdLocVec(idx)
480    w.valid          := RegNext(i.fromVprf.rdata(idx).valid)
481    w.bits.isV0      := (regMaps(newVdLoc).lreg === 0.U) && (idx == 0).B
482    w.bits.newVdAddr := regMaps(newVdLoc).newPreg
483    w.bits.newVdData := mergedVd(idx.U).asUInt
484  }
485
486  o.status.busy := DelayN(state.isOneOf(State.waitRab, State.mergeVd, State.mvOldVd), 1)
487}
488
489class LogicPhyRegMap(implicit p: Parameters) extends XSBundle {
490  val lreg = UInt(LogicRegsWidth.W)
491  val newPreg = UInt(VfPhyRegIdxWidth.W)
492  val oldPreg = UInt(VfPhyRegIdxWidth.W)
493}
494
495class RegWriteFromRab(implicit p: Parameters) extends XSBundle {
496  private val maxVregLMUL = 8
497  val lreg = UInt(LogicRegsWidth.W)
498  val preg = UInt(VfPhyRegIdxWidth.W)
499}
500
501class RabToVecExcpMod(implicit p: Parameters) extends XSBundle {
502  val logicPhyRegMap = Vec(RabCommitWidth, ValidIO(new RegWriteFromRab))
503}
504
505class VecExcpInfo(implicit p: Parameters) extends XSBundle {
506  val vstart = Vstart()
507  val vsew = VSew()
508  val veew = VSew()
509  val vlmul = VLmul()
510  val nf = Nf()
511  val isStride = Bool()
512  val isIndexed = Bool()
513  val isWhole = Bool()
514  val isVlm = Bool()
515}
516
517class RatToVecExcpMod(implicit p: Parameters) extends XSBundle {
518  val vecOldVdPdest = Vec(RabCommitWidth, ValidIO(UInt(VfPhyRegIdxWidth.W)))
519  val v0OldVdPdest = Vec(RabCommitWidth, ValidIO(UInt(VfPhyRegIdxWidth.W)))
520}
521
522class VprfToExcpMod(numPort: Int)(implicit p: Parameters) extends XSBundle {
523  val rdata = Vec(numPort, ValidIO(UInt(VLEN.W)))
524}
525
526class ExcpModToVprf(numReadPort: Int, numWritePort: Int)(implicit p: Parameters) extends XSBundle {
527  val r = Vec(numReadPort, ValidIO(new Bundle {
528    val isV0 = Bool()
529    val addr = UInt(VfPhyRegIdxWidth.W)
530  }))
531  val w = Vec(numWritePort, ValidIO(new Bundle {
532    val isV0      = Bool()
533    val newVdAddr = UInt(VfPhyRegIdxWidth.W)
534    val newVdData = UInt(VLEN.W)
535  }))
536}
537
538class NfMappedElemIdx(vlen: Int) extends Module {
539  require(isPow2(vlen))
540  // vlen = 128, idxWidth = 8, hold 0~128
541  val idxWidth = log2Up(vlen + 1)
542
543  val in = IO(Input(new Bundle {
544    val nf = Nf()
545    val eewOH = SewOH()
546  }))
547  val out = IO(Output(new Bundle {
548    val idxRangeVec = Vec(8, new HWRange(idxWidth))
549  }))
550
551  private val minElemLen = 8
552  private val maxElemNumPerVreg = vlen / minElemLen
553
554  private val rangeTable: Vec[Vec[HWRange]] = VecInit.tabulate(8, 8) { case(nf, vdIdx) =>
555    val nFields = nf + 1
556    // vector register group
557    val vrgIdx = vdIdx / nFields
558    HWRange(idxWidth)((maxElemNumPerVreg * vrgIdx).U, (maxElemNumPerVreg * (vrgIdx + 1)).U)
559  }
560
561  out.idxRangeVec := VecInit(rangeTable.map { case rangeVec: Vec[HWRange] =>
562    Mux1H(
563      (0 until 4).map(i =>
564        in.eewOH(i) -> VecInit(rangeVec.map(
565          x => HWRange(idxWidth)(x.from >> i, x.until >> i)
566        ))
567      )
568    )
569  })(in.nf)
570
571  dontTouch(out.idxRangeVec)
572
573  def apply(nf: UInt, eewOH: UInt): Vec[HWRange] = {
574    this.in.nf := nf
575    this.in.eewOH := eewOH
576    this.out.idxRangeVec
577  }
578}
579
580class GetE8OffsetInVreg(vlen: Int) extends Module {
581  require(isPow2(vlen))
582  private val minElemLen = 8
583  private val maxElemNumPerVreg = vlen / minElemLen
584  private val tailZeroBit = log2Ceil(maxElemNumPerVreg) // 16 -> 4
585
586  val in = IO(Input(new Bundle {
587    val eewOH = SewOH()
588    val idx = UInt(log2Up(vlen).W)
589  }))
590  val out = IO(Output(new Bundle {
591    val offset = UInt(tailZeroBit.W)
592  }))
593
594  out.offset := Mux1H(
595    (0 until 4).map(
596      // eew=32(0b0100), idx=1, get offset=4
597      i => in.eewOH(i) -> (in.idx << i)
598    )
599  )
600
601  def apply(eewOH: UInt, idx: UInt): UInt = {
602    this.in.eewOH := eewOH
603    this.in.idx := idx
604    this.out.offset
605  }
606}
607
608class VecElemData(vlen: Int) extends Bundle {
609  val rawData = UInt(vlen.W)
610
611  def e8Vec  = this.rawData.asTypeOf(new VecE8Vec(vlen))
612  def e16Vec = this.rawData.asTypeOf(new VecE16Vec(vlen))
613  def e32Vec = this.rawData.asTypeOf(new VecE32Vec(vlen))
614  def e64Vec = this.rawData.asTypeOf(new VecE64Vec(vlen))
615}
616
617class VecE8Vec(vlen: Int) extends Bundle {
618  val data = Vec(vlen / 8, UInt(8.W))
619
620  def apply(idx: Int): UInt = this.data(idx)
621}
622
623class VecE16Vec(vlen: Int) extends Bundle {
624  val data = Vec(vlen / 16, UInt(16.W))
625
626  def apply(idx: Int): UInt = this.data(idx)
627}
628
629class VecE32Vec(vlen: Int) extends Bundle {
630  val data = Vec(vlen / 32, UInt(32.W))
631
632  def apply(idx: Int): UInt = this.data(idx)
633}
634
635class VecE64Vec(vlen: Int) extends Bundle {
636  val data = Vec(vlen / 64, UInt(64.W))
637
638  def apply(idx: Int): UInt = this.data(idx)
639}
640
641class HWRange(w: Int) extends Bundle {
642  val from  = UInt(w.W)
643  val until = UInt(w.W)
644
645  def inRange(uint: UInt) = {
646    uint >= this.from && uint < this.until
647  }
648
649  def apply(_from: Bits, _until: Bits): this.type = {
650    this.from := _from
651    this.until := _until
652    this
653  }
654}
655
656object HWRange {
657  def apply(w: Int)(_from: Bits, _until: Bits): HWRange = Wire(new HWRange(w)).apply(_from, _until)
658}
659
660