xref: /XiangShan/src/main/scala/xiangshan/mem/lsqueue/StoreQueueData.scala (revision 5003e6f8af8c3020e83ed43708fab7efc0816e54)
1c6d43980SLemover/***************************************************************************************
2c6d43980SLemover* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3f320e0f0SYinan Xu* Copyright (c) 2020-2021 Peng Cheng Laboratory
4c6d43980SLemover*
5c6d43980SLemover* XiangShan is licensed under Mulan PSL v2.
6c6d43980SLemover* You can use this software according to the terms and conditions of the Mulan PSL v2.
7c6d43980SLemover* You may obtain a copy of Mulan PSL v2 at:
8c6d43980SLemover*          http://license.coscl.org.cn/MulanPSL2
9c6d43980SLemover*
10c6d43980SLemover* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11c6d43980SLemover* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12c6d43980SLemover* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13c6d43980SLemover*
14c6d43980SLemover* See the Mulan PSL v2 for more details.
15c6d43980SLemover***************************************************************************************/
16c6d43980SLemover
17e786ff3fSWilliam Wangpackage xiangshan.mem
18e786ff3fSWilliam Wang
198891a219SYinan Xuimport org.chipsalliance.cde.config.Parameters
20e786ff3fSWilliam Wangimport chisel3._
21e786ff3fSWilliam Wangimport chisel3.util._
22e786ff3fSWilliam Wangimport utils._
233c02ee8fSwakafaimport utility._
24e786ff3fSWilliam Wangimport xiangshan._
25e786ff3fSWilliam Wangimport xiangshan.cache._
266d5ddbceSLemoverimport xiangshan.cache.{DCacheWordIO, DCacheLineIO, MemoryOpConstants}
27e786ff3fSWilliam Wangimport xiangshan.mem._
289aca92b9SYinan Xuimport xiangshan.backend.rob.RobPtr
29e786ff3fSWilliam Wang
30e786ff3fSWilliam Wang
31b72585b9SWilliam Wang// Data module define
32b72585b9SWilliam Wang// These data modules are like SyncDataModuleTemplate, but support cam-like ops
3388fbccddSWilliam Wangclass SQAddrModule(dataWidth: Int, numEntries: Int, numRead: Int, numWrite: Int, numForward: Int)(implicit p: Parameters) extends XSModule with HasDCacheParameters {
34b72585b9SWilliam Wang  val io = IO(new Bundle {
3596b1e495SWilliam Wang    // sync read
36b72585b9SWilliam Wang    val raddr = Input(Vec(numRead, UInt(log2Up(numEntries).W)))
3796b1e495SWilliam Wang    val rdata = Output(Vec(numRead, UInt(dataWidth.W))) // rdata: store addr
3896b1e495SWilliam Wang    val rlineflag = Output(Vec(numRead, Bool())) // rdata: line op flag
3996b1e495SWilliam Wang    // write
40b72585b9SWilliam Wang    val wen   = Input(Vec(numWrite, Bool()))
41b72585b9SWilliam Wang    val waddr = Input(Vec(numWrite, UInt(log2Up(numEntries).W)))
4296b1e495SWilliam Wang    val wdata = Input(Vec(numWrite, UInt(dataWidth.W))) // wdata: store addr
43cdbff57cSHaoyuan Feng    val wmask = Input(Vec(numWrite, UInt((VLEN/8).W)))
4496b1e495SWilliam Wang    val wlineflag = Input(Vec(numWrite, Bool())) // wdata: line op flag
4596b1e495SWilliam Wang    // forward addr cam
4696b1e495SWilliam Wang    val forwardMdata = Input(Vec(numForward, UInt(dataWidth.W))) // addr
47cdbff57cSHaoyuan Feng    val forwardDataMask = Input(Vec(numForward, UInt((VLEN/8).W))) // forward mask
4896b1e495SWilliam Wang    val forwardMmask = Output(Vec(numForward, Vec(numEntries, Bool()))) // cam result mask
4996b1e495SWilliam Wang    // debug
5088fbccddSWilliam Wang    val debug_data = Output(Vec(numEntries, UInt(dataWidth.W)))
51b72585b9SWilliam Wang  })
52b72585b9SWilliam Wang
5388fbccddSWilliam Wang  val data = Reg(Vec(numEntries, UInt(dataWidth.W)))
54cdbff57cSHaoyuan Feng  val mask = Reg(Vec(numEntries, UInt((VLEN/8).W)))
55ca18a0b4SWilliam Wang  val lineflag = Reg(Vec(numEntries, Bool())) // cache line match flag
56ca18a0b4SWilliam Wang  // if lineflag == true, this address points to a whole cacheline
5788fbccddSWilliam Wang  io.debug_data := data
58b72585b9SWilliam Wang
59b72585b9SWilliam Wang  // read ports
60b72585b9SWilliam Wang  for (i <- 0 until numRead) {
61*5003e6f8SHuijin Li    io.rdata(i) := data(GatedRegNext(io.raddr(i)))
62*5003e6f8SHuijin Li    io.rlineflag(i) := lineflag(GatedRegNext(io.raddr(i)))
63b72585b9SWilliam Wang  }
64b72585b9SWilliam Wang
65b72585b9SWilliam Wang  // below is the write ports (with priorities)
66b72585b9SWilliam Wang  for (i <- 0 until numWrite) {
67b72585b9SWilliam Wang    when (io.wen(i)) {
68b72585b9SWilliam Wang      data(io.waddr(i)) := io.wdata(i)
69e4f69d78Ssfencevma      mask(io.waddr(i)) := io.wmask(i)
70ca18a0b4SWilliam Wang      lineflag(io.waddr(i)) := io.wlineflag(i)
71b72585b9SWilliam Wang    }
72b72585b9SWilliam Wang  }
73b72585b9SWilliam Wang
74b72585b9SWilliam Wang  // content addressed match
75b72585b9SWilliam Wang  for (i <- 0 until numForward) {
76b72585b9SWilliam Wang    for (j <- 0 until numEntries) {
77ca18a0b4SWilliam Wang      // io.forwardMmask(i)(j) := io.forwardMdata(i)(dataWidth-1, 3) === data(j)(dataWidth-1, 3)
78ca18a0b4SWilliam Wang      val linehit = io.forwardMdata(i)(dataWidth-1, DCacheLineOffset) === data(j)(dataWidth-1, DCacheLineOffset)
79cdbff57cSHaoyuan Feng      val hit128bit = (io.forwardMdata(i)(DCacheLineOffset-1, DCacheVWordOffset) === data(j)(DCacheLineOffset-1, DCacheVWordOffset)) &&
80e4f69d78Ssfencevma                    (!StoreQueueForwardWithMask.B || (mask(j) & io.forwardDataMask(i)).orR)
81cdbff57cSHaoyuan Feng      io.forwardMmask(i)(j) := linehit && (hit128bit || lineflag(j))
82b72585b9SWilliam Wang    }
83b72585b9SWilliam Wang  }
84b72585b9SWilliam Wang
85b72585b9SWilliam Wang  // DataModuleTemplate should not be used when there're any write conflicts
86b72585b9SWilliam Wang  for (i <- 0 until numWrite) {
87b72585b9SWilliam Wang    for (j <- i+1 until numWrite) {
88b72585b9SWilliam Wang      assert(!(io.wen(i) && io.wen(j) && io.waddr(i) === io.waddr(j)))
89b72585b9SWilliam Wang    }
90b72585b9SWilliam Wang  }
91b72585b9SWilliam Wang}
92b72585b9SWilliam Wang
932225d46eSJiawei Linclass SQData8Entry(implicit p: Parameters) extends XSBundle {
9496b1e495SWilliam Wang  val valid = Bool() // this byte is valid
95b5b78226SWilliam Wang  val data = UInt((XLEN/8).W)
96e786ff3fSWilliam Wang}
97e786ff3fSWilliam Wang
980a992150SWilliam Wangclass SQData8Module(numEntries: Int, numRead: Int, numWrite: Int, numForward: Int)(implicit p: Parameters) extends XSModule
990a992150SWilliam Wang  with HasDCacheParameters
1000a992150SWilliam Wang  with HasCircularQueuePtrHelper
1010a992150SWilliam Wang{
102e786ff3fSWilliam Wang  val io = IO(new Bundle() {
10396b1e495SWilliam Wang    // sync read port
10488fbccddSWilliam Wang    val raddr = Vec(numRead, Input(UInt(log2Up(numEntries).W)))
105b5b78226SWilliam Wang    val rdata = Vec(numRead, Output(new SQData8Entry))
10696b1e495SWilliam Wang    // data write port
1071b7adedcSWilliam Wang    val data = new Bundle() {
108e786ff3fSWilliam Wang      val wen   = Vec(numWrite, Input(Bool()))
10988fbccddSWilliam Wang      val waddr = Vec(numWrite, Input(UInt(log2Up(numEntries).W)))
1101b7adedcSWilliam Wang      val wdata = Vec(numWrite, Input(UInt((XLEN/8).W)))
1111b7adedcSWilliam Wang    }
11296b1e495SWilliam Wang    // mask (data valid) write port
1131b7adedcSWilliam Wang    val mask = new Bundle() {
1141b7adedcSWilliam Wang      val wen   = Vec(numWrite, Input(Bool()))
11588fbccddSWilliam Wang      val waddr = Vec(numWrite, Input(UInt(log2Up(numEntries).W)))
1161b7adedcSWilliam Wang      val wdata = Vec(numWrite, Input(Bool()))
1171b7adedcSWilliam Wang    }
118e786ff3fSWilliam Wang
11996b1e495SWilliam Wang    // st-ld forward addr cam result input, used to select forward data
12088fbccddSWilliam Wang    val needForward = Input(Vec(numForward, Vec(2, UInt(numEntries.W))))
12196b1e495SWilliam Wang    // forward result valid bit generated in current cycle
1223db2cf75SWilliam Wang    val forwardValidFast = Vec(numForward, Output(Bool()))
12396b1e495SWilliam Wang    // forward result generated in the next cycle
12496b1e495SWilliam Wang    val forwardValid = Vec(numForward, Output(Bool())) // forwardValid = RegNext(forwardValidFast)
125b5b78226SWilliam Wang    val forwardData = Vec(numForward, Output(UInt(8.W)))
126e786ff3fSWilliam Wang  })
127e786ff3fSWilliam Wang
128e786ff3fSWilliam Wang  io := DontCare
129e786ff3fSWilliam Wang
13088fbccddSWilliam Wang  val data = Reg(Vec(numEntries, new SQData8Entry))
131e786ff3fSWilliam Wang
1320a992150SWilliam Wang  require(isPow2(StoreQueueNWriteBanks))
1330a992150SWilliam Wang  require(StoreQueueNWriteBanks > 1)
13439f2ec76SWilliam Wang  def get_bank(in: UInt): UInt = in(log2Up(StoreQueueNWriteBanks) -1, 0)
1350a992150SWilliam Wang  def get_bank_index(in: UInt): UInt = in >> log2Up(StoreQueueNWriteBanks)
1360a992150SWilliam Wang  def get_vec_index(index: Int, bank: Int): Int = {
1370a992150SWilliam Wang    (index << log2Up(StoreQueueNWriteBanks)) + bank
1380a992150SWilliam Wang  }
1390a992150SWilliam Wang
1401b7adedcSWilliam Wang  // writeback to sq
1410a992150SWilliam Wang  // store queue data write takes 2 cycles
1420a992150SWilliam Wang  // (0 until numWrite).map(i => {
1430a992150SWilliam Wang  //   when(RegNext(io.data.wen(i))){
1440a992150SWilliam Wang  //     data(RegNext(io.data.waddr(i))).data := RegNext(io.data.wdata(i))
1450a992150SWilliam Wang  //   }
1460a992150SWilliam Wang  // })
147e786ff3fSWilliam Wang  (0 until numWrite).map(i => {
148*5003e6f8SHuijin Li     val s0_wenVec = Wire(Vec(StoreQueueNWriteBanks, Bool()))
149*5003e6f8SHuijin Li    for(bank <- 0 until StoreQueueNWriteBanks) {
150*5003e6f8SHuijin Li      s0_wenVec(bank) := io.data.wen(i) && get_bank(io.data.waddr(i)) === bank.U
151*5003e6f8SHuijin Li    }
152*5003e6f8SHuijin Li   val s1_wenVec = GatedValidRegNext(s0_wenVec)
1530a992150SWilliam Wang    (0 until StoreQueueNWriteBanks).map(bank => {
154*5003e6f8SHuijin Li      val s0_wen = s0_wenVec(bank)
155*5003e6f8SHuijin Li      val s1_wen = s1_wenVec(bank)
1560a992150SWilliam Wang      val s1_wdata = RegEnable(io.data.wdata(i), s0_wen)
1570a992150SWilliam Wang      val s1_waddr = RegEnable(get_bank_index(io.data.waddr(i)), s0_wen)
1580a992150SWilliam Wang      val numRegsPerBank = StoreQueueSize / StoreQueueNWriteBanks
1590a992150SWilliam Wang      (0 until numRegsPerBank).map(index => {
1600a992150SWilliam Wang        when(s1_wen && s1_waddr === index.U){
1610a992150SWilliam Wang          data(get_vec_index(index, bank)).data := s1_wdata
1621b7adedcSWilliam Wang        }
1631b7adedcSWilliam Wang      })
1640a992150SWilliam Wang      s0_wen.suggestName("data_s0_wen_" + i +"_bank_" + bank)
1650a992150SWilliam Wang      s1_wen.suggestName("data_s1_wen_" + i +"_bank_" + bank)
1660a992150SWilliam Wang      s1_wdata.suggestName("data_s1_wdata_" + i +"_bank_" + bank)
1670a992150SWilliam Wang      s1_waddr.suggestName("data_s1_waddr_" + i +"_bank_" + bank)
1680a992150SWilliam Wang    })
1690a992150SWilliam Wang  })
1700a992150SWilliam Wang
1710a992150SWilliam Wang  // (0 until numWrite).map(i => {
1720a992150SWilliam Wang  //   when(RegNext(io.mask.wen(i))){
1730a992150SWilliam Wang  //     data(RegNext(io.mask.waddr(i))).valid := RegNext(io.mask.wdata(i))
1740a992150SWilliam Wang  //   }
1750a992150SWilliam Wang  // })
1761b7adedcSWilliam Wang  (0 until numWrite).map(i => {
177*5003e6f8SHuijin Li    val s0_wenVec = Wire(Vec(StoreQueueNWriteBanks, Bool()))
178*5003e6f8SHuijin Li    for(bank <- 0 until StoreQueueNWriteBanks) {
179*5003e6f8SHuijin Li      s0_wenVec(bank) := io.mask.wen(i) && get_bank(io.mask.waddr(i)) === bank.U
180*5003e6f8SHuijin Li    }
181*5003e6f8SHuijin Li    val s1_wenVec = GatedValidRegNext(s0_wenVec)
182*5003e6f8SHuijin Li
1830a992150SWilliam Wang    (0 until StoreQueueNWriteBanks).map(bank => {
184*5003e6f8SHuijin Li      // val s0_wen = io.mask.wen(i) && get_bank(io.mask.waddr(i)) === bank.U
185*5003e6f8SHuijin Li      // val s1_wen = RegNext(s0_wen)
186*5003e6f8SHuijin Li      val s0_wen = s0_wenVec(bank)
187*5003e6f8SHuijin Li      val s1_wen = s1_wenVec(bank)
1880a992150SWilliam Wang      val s1_wdata = RegEnable(io.mask.wdata(i), s0_wen)
1890a992150SWilliam Wang      val s1_waddr = RegEnable(get_bank_index(io.mask.waddr(i)), s0_wen)
1900a992150SWilliam Wang      val numRegsPerBank = StoreQueueSize / StoreQueueNWriteBanks
1910a992150SWilliam Wang      (0 until numRegsPerBank).map(index => {
1920a992150SWilliam Wang        when(s1_wen && s1_waddr === index.U){
1930a992150SWilliam Wang          data(get_vec_index(index, bank)).valid := s1_wdata
194e786ff3fSWilliam Wang        }
195e786ff3fSWilliam Wang      })
1960a992150SWilliam Wang      s0_wen.suggestName("mask_s0_wen_" + i +"_bank_" + bank)
1970a992150SWilliam Wang      s1_wen.suggestName("mask_s1_wen_" + i +"_bank_" + bank)
1980a992150SWilliam Wang      s1_wdata.suggestName("mask_s1_wdata_" + i +"_bank_" + bank)
1990a992150SWilliam Wang      s1_waddr.suggestName("mask_s1_waddr_" + i +"_bank_" + bank)
2000a992150SWilliam Wang    })
2010a992150SWilliam Wang  })
202e786ff3fSWilliam Wang
203e786ff3fSWilliam Wang  // destorequeue read data
204e786ff3fSWilliam Wang  (0 until numRead).map(i => {
205*5003e6f8SHuijin Li      io.rdata(i) := data(GatedRegNext(io.raddr(i)))
206e786ff3fSWilliam Wang  })
207e786ff3fSWilliam Wang
208e786ff3fSWilliam Wang  // DataModuleTemplate should not be used when there're any write conflicts
209e786ff3fSWilliam Wang  for (i <- 0 until numWrite) {
210e786ff3fSWilliam Wang    for (j <- i+1 until numWrite) {
2111b7adedcSWilliam Wang      assert(!(io.data.wen(i) && io.data.wen(j) && io.data.waddr(i) === io.data.waddr(j)))
2121b7adedcSWilliam Wang    }
2131b7adedcSWilliam Wang  }
2141b7adedcSWilliam Wang  for (i <- 0 until numWrite) {
2151b7adedcSWilliam Wang    for (j <- i+1 until numWrite) {
2161b7adedcSWilliam Wang      assert(!(io.mask.wen(i) && io.mask.wen(j) && io.mask.waddr(i) === io.mask.waddr(j)))
217e786ff3fSWilliam Wang    }
218e786ff3fSWilliam Wang  }
219e786ff3fSWilliam Wang
220e786ff3fSWilliam Wang  // forwarding
221e786ff3fSWilliam Wang  // Compare ringBufferTail (deqPtr) and forward.sqIdx, we have two cases:
222e786ff3fSWilliam Wang  // (1) if they have the same flag, we need to check range(tail, sqIdx)
223e4f69d78Ssfencevma  // (2) if they have different flags, we need to check range(tail, VirtualLoadQueueSize) and range(0, sqIdx)
224e4f69d78Ssfencevma  // Forward1: Mux(same_flag, range(tail, sqIdx), range(tail, VirtualLoadQueueSize))
225e786ff3fSWilliam Wang  // Forward2: Mux(same_flag, 0.U,                   range(0, sqIdx)    )
226e786ff3fSWilliam Wang  // i.e. forward1 is the target entries with the same flag bits and forward2 otherwise
227e786ff3fSWilliam Wang
228e786ff3fSWilliam Wang  // entry with larger index should have higher priority since it's data is younger
229e786ff3fSWilliam Wang
230e786ff3fSWilliam Wang  (0 until numForward).map(i => {
231e786ff3fSWilliam Wang    // parallel fwd logic
23288fbccddSWilliam Wang    val matchResultVec = Wire(Vec(numEntries * 2, new FwdEntry))
233e786ff3fSWilliam Wang
234e786ff3fSWilliam Wang    def parallelFwd(xs: Seq[Data]): Data = {
235e786ff3fSWilliam Wang      ParallelOperation(xs, (a: Data, b: Data) => {
236e786ff3fSWilliam Wang        val l = a.asTypeOf(new FwdEntry)
237e786ff3fSWilliam Wang        val r = b.asTypeOf(new FwdEntry)
238e786ff3fSWilliam Wang        val res = Wire(new FwdEntry)
2393db2cf75SWilliam Wang        res.validFast := l.validFast || r.validFast
240ce28536fSWilliam Wang        res.valid := l.valid || r.valid
241ce28536fSWilliam Wang        // res.valid := RegNext(res.validFast)
242b5b78226SWilliam Wang        res.data := Mux(r.valid, r.data, l.data)
243e786ff3fSWilliam Wang        res
244e786ff3fSWilliam Wang      })
245e786ff3fSWilliam Wang    }
246a7828dc1STang Haojin
24788fbccddSWilliam Wang    for (j <- 0 until numEntries) {
2483db2cf75SWilliam Wang      val needCheck0 = io.needForward(i)(0)(j)
2493db2cf75SWilliam Wang      val needCheck1 = io.needForward(i)(1)(j)
250a7828dc1STang Haojin      val needCheck0Reg = RegNext(needCheck0)
251a7828dc1STang Haojin      val needCheck1Reg = RegNext(needCheck1)
2526e3aca77Ssfencevma
2533db2cf75SWilliam Wang      matchResultVec(j).validFast := needCheck0 && data(j).valid
2543db2cf75SWilliam Wang      matchResultVec(j).valid := needCheck0Reg && data(j).valid
255b5b78226SWilliam Wang      matchResultVec(j).data := data(j).data
2563db2cf75SWilliam Wang      matchResultVec(numEntries + j).validFast := needCheck1 && data(j).valid
2573db2cf75SWilliam Wang      matchResultVec(numEntries + j).valid := needCheck1Reg && data(j).valid
25888fbccddSWilliam Wang      matchResultVec(numEntries + j).data := data(j).data
259e786ff3fSWilliam Wang    }
260e786ff3fSWilliam Wang
261e786ff3fSWilliam Wang    val parallelFwdResult = parallelFwd(matchResultVec).asTypeOf(new FwdEntry)
262e786ff3fSWilliam Wang
2633db2cf75SWilliam Wang    // validFast is generated the same cycle with query
2643db2cf75SWilliam Wang    io.forwardValidFast(i) := parallelFwdResult.validFast
2653db2cf75SWilliam Wang    // valid is generated 1 cycle after query request
266b5b78226SWilliam Wang    io.forwardValid(i) := parallelFwdResult.valid
2673db2cf75SWilliam Wang    // data is generated 1 cycle after query request
268b72585b9SWilliam Wang    io.forwardData(i) := parallelFwdResult.data
269e786ff3fSWilliam Wang  })
270b5b78226SWilliam Wang}
271e786ff3fSWilliam Wang
2722225d46eSJiawei Linclass SQDataEntry(implicit p: Parameters) extends XSBundle {
273cdbff57cSHaoyuan Feng  val mask = UInt((VLEN/8).W)
274cdbff57cSHaoyuan Feng  val data = UInt(VLEN.W)
275b5b78226SWilliam Wang}
276b5b78226SWilliam Wang
27796b1e495SWilliam Wang// SQDataModule is a wrapper of SQData8Modules
27888fbccddSWilliam Wangclass SQDataModule(numEntries: Int, numRead: Int, numWrite: Int, numForward: Int)(implicit p: Parameters) extends XSModule with HasDCacheParameters with HasCircularQueuePtrHelper {
279b5b78226SWilliam Wang  val io = IO(new Bundle() {
28096b1e495SWilliam Wang    // sync read port
28188fbccddSWilliam Wang    val raddr = Vec(numRead,  Input(UInt(log2Up(numEntries).W)))
282b5b78226SWilliam Wang    val rdata = Vec(numRead,  Output(new SQDataEntry))
28396b1e495SWilliam Wang    // data write port
2841b7adedcSWilliam Wang    val data = new Bundle() {
285b5b78226SWilliam Wang      val wen   = Vec(numWrite, Input(Bool()))
28688fbccddSWilliam Wang      val waddr = Vec(numWrite, Input(UInt(log2Up(numEntries).W)))
287cdbff57cSHaoyuan Feng      val wdata = Vec(numWrite, Input(UInt(VLEN.W)))
2881b7adedcSWilliam Wang    }
28996b1e495SWilliam Wang    // mask (data valid) write port
2901b7adedcSWilliam Wang    val mask = new Bundle() {
2911b7adedcSWilliam Wang      val wen   = Vec(numWrite, Input(Bool()))
29288fbccddSWilliam Wang      val waddr = Vec(numWrite, Input(UInt(log2Up(numEntries).W)))
293cdbff57cSHaoyuan Feng      val wdata = Vec(numWrite, Input(UInt((VLEN/8).W)))
2941b7adedcSWilliam Wang    }
295b5b78226SWilliam Wang
29696b1e495SWilliam Wang    // st-ld forward addr cam result input, used to select forward data
29788fbccddSWilliam Wang    val needForward = Input(Vec(numForward, Vec(2, UInt(numEntries.W))))
29896b1e495SWilliam Wang    // forward result valid bit generated in current cycle
299cdbff57cSHaoyuan Feng    val forwardMaskFast = Vec(numForward, Output(Vec((VLEN/8), Bool())))
30096b1e495SWilliam Wang    // forward result generated in the next cycle
301cdbff57cSHaoyuan Feng    val forwardMask = Vec(numForward, Output(Vec((VLEN/8), Bool()))) // forwardMask = RegNext(forwardMaskFast)
302cdbff57cSHaoyuan Feng    val forwardData = Vec(numForward, Output(Vec((VLEN/8), UInt(8.W))))
303b5b78226SWilliam Wang  })
304b5b78226SWilliam Wang
305cdbff57cSHaoyuan Feng  val data16 = Seq.fill(16)(Module(new SQData8Module(numEntries, numRead, numWrite, numForward)))
306b5b78226SWilliam Wang
307b5b78226SWilliam Wang  // writeback to lq/sq
308b5b78226SWilliam Wang  for (i <- 0 until numWrite) {
309cdbff57cSHaoyuan Feng    // write to data16
310cdbff57cSHaoyuan Feng    for (j <- 0 until 16) {
311cdbff57cSHaoyuan Feng      data16(j).io.mask.waddr(i) := io.mask.waddr(i)
312cdbff57cSHaoyuan Feng      data16(j).io.mask.wdata(i) := io.mask.wdata(i)(j)
313cdbff57cSHaoyuan Feng      data16(j).io.mask.wen(i)   := io.mask.wen(i)
314cdbff57cSHaoyuan Feng      data16(j).io.data.waddr(i) := io.data.waddr(i)
315cdbff57cSHaoyuan Feng      data16(j).io.data.wdata(i) := io.data.wdata(i)(8*(j+1)-1, 8*j)
316cdbff57cSHaoyuan Feng      data16(j).io.data.wen(i)   := io.data.wen(i)
317b5b78226SWilliam Wang    }
318b5b78226SWilliam Wang  }
319b5b78226SWilliam Wang
320b5b78226SWilliam Wang  // destorequeue read data
321b5b78226SWilliam Wang  for (i <- 0 until numRead) {
322cdbff57cSHaoyuan Feng    for (j <- 0 until 16) {
323cdbff57cSHaoyuan Feng      data16(j).io.raddr(i) := io.raddr(i)
324b5b78226SWilliam Wang    }
325cdbff57cSHaoyuan Feng    io.rdata(i).mask := VecInit((0 until 16).map(j => data16(j).io.rdata(i).valid)).asUInt
326cdbff57cSHaoyuan Feng    io.rdata(i).data := VecInit((0 until 16).map(j => data16(j).io.rdata(i).data)).asUInt
327b5b78226SWilliam Wang  }
328b5b78226SWilliam Wang
329b5b78226SWilliam Wang  // DataModuleTemplate should not be used when there're any write conflicts
330b5b78226SWilliam Wang  for (i <- 0 until numWrite) {
331b5b78226SWilliam Wang    for (j <- i+1 until numWrite) {
3321b7adedcSWilliam Wang      assert(!(io.data.wen(i) && io.data.wen(j) && io.data.waddr(i) === io.data.waddr(j)))
3331b7adedcSWilliam Wang    }
3341b7adedcSWilliam Wang  }
3351b7adedcSWilliam Wang  for (i <- 0 until numWrite) {
3361b7adedcSWilliam Wang    for (j <- i+1 until numWrite) {
3371b7adedcSWilliam Wang      assert(!(io.mask.wen(i) && io.mask.wen(j) && io.mask.waddr(i) === io.mask.waddr(j)))
338b5b78226SWilliam Wang    }
339b5b78226SWilliam Wang  }
340b5b78226SWilliam Wang
341b5b78226SWilliam Wang  (0 until numForward).map(i => {
342b5b78226SWilliam Wang    // parallel fwd logic
343cdbff57cSHaoyuan Feng    for (j <- 0 until 16) {
344cdbff57cSHaoyuan Feng      data16(j).io.needForward(i) <> io.needForward(i)
345cdbff57cSHaoyuan Feng      io.forwardMaskFast(i) := VecInit((0 until 16).map(j => data16(j).io.forwardValidFast(i)))
346cdbff57cSHaoyuan Feng      io.forwardMask(i) := VecInit((0 until 16).map(j => data16(j).io.forwardValid(i)))
347cdbff57cSHaoyuan Feng      io.forwardData(i) := VecInit((0 until 16).map(j => data16(j).io.forwardData(i)))
348b5b78226SWilliam Wang    }
349b5b78226SWilliam Wang  })
350e786ff3fSWilliam Wang}
351