xref: /XiangShan/src/main/scala/xiangshan/cache/dcache/data/BankedDataArray.scala (revision 602aa9f1a8fb63310bea30e8b3e247e5aca5f123)
1/***************************************************************************************
2* Copyright (c) 2024 Beijing Institute of Open Source Chip (BOSC)
3* Copyright (c) 2020-2024 Institute of Computing Technology, Chinese Academy of Sciences
4* Copyright (c) 2020-2021 Peng Cheng Laboratory
5*
6* XiangShan is licensed under Mulan PSL v2.
7* You can use this software according to the terms and conditions of the Mulan PSL v2.
8* You may obtain a copy of Mulan PSL v2 at:
9*          http://license.coscl.org.cn/MulanPSL2
10*
11* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
12* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
13* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
14*
15* See the Mulan PSL v2 for more details.
16*
17*
18* Acknowledgement
19*
20* This implementation is inspired by several key papers:
21* [1] Gurindar S. Sohi, and Manoj Franklin. "[High-bandwidth data memory systems for superscalar processors.]
22* (https://doi.org/10.1145/106972.106980)" 4th International Conference on Architectural Support for Programming
23* Languages and Operating Systems (ASPLOS). 1991.
24***************************************************************************************/
25
26package xiangshan.cache
27
28import org.chipsalliance.cde.config.Parameters
29import chisel3._
30import utils._
31import utility._
32import utility.sram.SRAMTemplate
33import chisel3.util._
34import utility.mbist.MbistPipeline
35import xiangshan.mem.LqPtr
36import xiangshan.{L1CacheErrorInfo, XSCoreParamsKey}
37
38import scala.math.max
39
40class BankConflictDB(implicit p: Parameters) extends DCacheBundle{
41  val addr = Vec(LoadPipelineWidth, Bits(PAddrBits.W))
42  val set_index = Vec(LoadPipelineWidth, UInt((DCacheAboveIndexOffset - DCacheSetOffset).W))
43  val bank_index = Vec(VLEN/DCacheSRAMRowBits, UInt((DCacheSetOffset - DCacheBankOffset).W))
44  val way_index = UInt(wayBits.W)
45  val fake_rr_bank_conflict = Bool()
46}
47
48class L1BankedDataReadReq(implicit p: Parameters) extends DCacheBundle
49{
50  val way_en = Bits(DCacheWays.W)
51  val addr = Bits(PAddrBits.W)
52}
53
54class L1BankedDataReadReqWithMask(implicit p: Parameters) extends DCacheBundle
55{
56  val way_en = Bits(DCacheWays.W)
57  val addr = Bits(PAddrBits.W)
58  val addr_dup = Bits(PAddrBits.W)
59  val bankMask = Bits(DCacheBanks.W)
60  val kill = Bool()
61  val lqIdx = new LqPtr
62}
63
64class L1BankedDataReadLineReq(implicit p: Parameters) extends L1BankedDataReadReq
65{
66  val rmask = Bits(DCacheBanks.W)
67}
68
69// Now, we can write a cache-block in a single cycle
70class L1BankedDataWriteReq(implicit p: Parameters) extends L1BankedDataReadReq
71{
72  val wmask = Bits(DCacheBanks.W)
73  val data = Vec(DCacheBanks, Bits(DCacheSRAMRowBits.W))
74}
75
76// cache-block write request without data
77class L1BankedDataWriteReqCtrl(implicit p: Parameters) extends L1BankedDataReadReq
78
79class L1BankedDataReadResult(implicit p: Parameters) extends DCacheBundle
80{
81  // you can choose which bank to read to save power
82  val ecc = Bits(dataECCBits.W)
83  val raw_data = Bits(DCacheSRAMRowBits.W)
84  val error_delayed = Bool() // 1 cycle later than data resp
85
86  def asECCData() = {
87    Cat(ecc, raw_data)
88  }
89}
90
91class DataSRAMBankWriteReq(implicit p: Parameters) extends DCacheBundle {
92  val en = Bool()
93  val addr = UInt()
94  val way_en = UInt(DCacheWays.W)
95  val data = UInt(encDataBits.W)
96}
97
98// wrap a sram
99class DataSRAM(bankIdx: Int, wayIdx: Int)(implicit p: Parameters) extends DCacheModule {
100  val io = IO(new Bundle() {
101    val w = new Bundle() {
102      val en = Input(Bool())
103      val addr = Input(UInt())
104      val data = Input(UInt(encDataBits.W))
105    }
106
107    val r = new Bundle() {
108      val en = Input(Bool())
109      val addr = Input(UInt())
110      val data = Output(UInt(encDataBits.W))
111    }
112  })
113
114  // data sram
115  val data_sram = Module(new SRAMTemplate(
116    Bits(encDataBits.W),
117    set = DCacheSets / DCacheSetDiv,
118    way = 1,
119    shouldReset = false,
120    holdRead = false,
121    singlePort = true,
122    hasMbist = hasMbist,
123    hasSramCtl = hasSramCtl
124  ))
125
126  data_sram.io.w.req.valid := io.w.en
127  data_sram.io.w.req.bits.apply(
128    setIdx = io.w.addr,
129    data = io.w.data,
130    waymask = 1.U
131  )
132  data_sram.io.r.req.valid := io.r.en
133  data_sram.io.r.req.bits.apply(setIdx = io.r.addr)
134  io.r.data := data_sram.io.r.resp.data(0)
135  XSPerfAccumulate("part_data_read_counter", data_sram.io.r.req.valid)
136
137  def dump_r() = {
138    XSDebug(RegNext(io.r.en),
139      "bank read set %x bank %x way %x data %x\n",
140      RegEnable(io.r.addr, io.r.en),
141      bankIdx.U,
142      wayIdx.U,
143      io.r.data
144    )
145  }
146
147  def dump_w() = {
148    XSDebug(io.w.en,
149      "bank write set %x bank %x way %x data %x\n",
150      io.w.addr,
151      bankIdx.U,
152      wayIdx.U,
153      io.w.data
154    )
155  }
156
157  def dump() = {
158    dump_w()
159    dump_r()
160  }
161}
162
163// wrap data rows of 8 ways
164class DataSRAMBank(index: Int)(implicit p: Parameters) extends DCacheModule {
165  val io = IO(new Bundle() {
166    val w = Input(new DataSRAMBankWriteReq)
167
168    val r = new Bundle() {
169      val en = Input(Bool())
170      val addr = Input(UInt())
171      val data = Output(Vec(DCacheWays, UInt(encDataBits.W)))
172    }
173  })
174
175  assert(RegNext(!io.w.en || PopCount(io.w.way_en) <= 1.U))
176
177  // external controls do not read and write at the same time
178  val w_info = io.w
179  // val rw_bypass = RegNext(io.w.addr === io.r.addr && io.w.way_en === io.r.way_en && io.w.en)
180
181  // multiway data bank
182  val data_bank = Seq.fill(DCacheWays) {
183    Module(new SRAMTemplate(
184      Bits(encDataBits.W),
185      set = DCacheSets / DCacheSetDiv,
186      way = 1,
187      shouldReset = false,
188      holdRead = false,
189      singlePort = true,
190      withClockGate = true,
191      hasMbist = hasMbist,
192      hasSramCtl = hasSramCtl,
193      suffix = Some("dcsh_dat")
194    ))
195  }
196
197  for (w <- 0 until DCacheWays) {
198    val wen = w_info.en && w_info.way_en(w)
199    data_bank(w).io.w.req.valid := wen
200    data_bank(w).io.w.req.bits.apply(
201      setIdx = w_info.addr,
202      data = w_info.data,
203      waymask = 1.U
204    )
205    data_bank(w).io.r.req.valid := io.r.en
206    data_bank(w).io.r.req.bits.apply(setIdx = io.r.addr)
207  }
208  XSPerfAccumulate("part_data_read_counter", PopCount(Cat(data_bank.map(_.io.r.req.valid))))
209
210  io.r.data := data_bank.map(_.io.r.resp.data(0))
211
212  def dump_r() = {
213    XSDebug(RegNext(io.r.en),
214      "bank read addr %x data %x\n",
215      RegEnable(io.r.addr, io.r.en),
216      io.r.data.asUInt
217    )
218  }
219
220  def dump_w() = {
221    XSDebug(io.w.en,
222      "bank write addr %x way_en %x data %x\n",
223      io.w.addr,
224      io.w.way_en,
225      io.w.data
226    )
227  }
228
229  def dump() = {
230    dump_w()
231    dump_r()
232  }
233}
234
235case object HasDataEccParam
236
237//                     Banked DCache Data
238// -----------------------------------------------------------------
239// | Bank0 | Bank1 | Bank2 | Bank3 | Bank4 | Bank5 | Bank6 | Bank7 |
240// -----------------------------------------------------------------
241// | Way0  | Way0  | Way0  | Way0  | Way0  | Way0  | Way0  | Way0  |
242// | Way1  | Way1  | Way1  | Way1  | Way1  | Way1  | Way1  | Way1  |
243// | ....  | ....  | ....  | ....  | ....  | ....  | ....  | ....  |
244// -----------------------------------------------------------------
245abstract class AbstractBankedDataArray(implicit p: Parameters) extends DCacheModule
246{
247  val DataEccParam = if(EnableDataEcc) Some(HasDataEccParam) else None
248  val ReadlinePortErrorIndex = LoadPipelineWidth
249  val io = IO(new DCacheBundle {
250    // load pipeline read word req
251    val read = Vec(LoadPipelineWidth, Flipped(DecoupledIO(new L1BankedDataReadReqWithMask)))
252    val is128Req = Input(Vec(LoadPipelineWidth, Bool()))
253    // main pipeline read / write line req
254    val readline_intend = Input(Bool())
255    val readline = Flipped(DecoupledIO(new L1BankedDataReadLineReq))
256    val readline_can_go = Input(Bool())
257    val readline_stall = Input(Bool())
258    val readline_can_resp = Input(Bool())
259    val write = Flipped(DecoupledIO(new L1BankedDataWriteReq))
260    val write_dup = Vec(DCacheBanks, Flipped(Decoupled(new L1BankedDataWriteReqCtrl)))
261    // data for readline and loadpipe
262    val readline_resp = Output(Vec(DCacheBanks, new L1BankedDataReadResult()))
263    val readline_error_delayed = Output(Bool())
264    val read_resp          = Output(Vec(LoadPipelineWidth, Vec(VLEN/DCacheSRAMRowBits, new L1BankedDataReadResult())))
265    val read_error_delayed = Output(Vec(LoadPipelineWidth,Vec(VLEN/DCacheSRAMRowBits, Bool())))
266    // val nacks = Output(Vec(LoadPipelineWidth, Bool()))
267    // val errors = Output(Vec(LoadPipelineWidth + 1, ValidIO(new L1CacheErrorInfo))) // read ports + readline port
268    // when bank_conflict, read (1) port should be ignored
269    val bank_conflict_slow = Output(Vec(LoadPipelineWidth, Bool()))
270    val disable_ld_fast_wakeup = Output(Vec(LoadPipelineWidth, Bool()))
271    val pseudo_error = Flipped(DecoupledIO(Vec(DCacheBanks, new CtrlUnitSignalingBundle)))
272  })
273
274  // bank (0, 1, 2, 3) each way use duplicate addr
275  def DuplicatedQueryBankSeq = Seq(0, 1, 2, 3)
276
277  def pipeMap[T <: Data](f: Int => T) = VecInit((0 until LoadPipelineWidth).map(f))
278
279  def getECCFromEncWord(encWord: UInt) = {
280    if (EnableDataEcc) {
281      require(encWord.getWidth == encDataBits, s"encDataBits=$encDataBits != encDataBits=$encDataBits!")
282      encWord(encDataBits-1, DCacheSRAMRowBits)
283    } else {
284      0.U
285    }
286  }
287
288  def getDataFromEncWord(encWord: UInt) = {
289    encWord(DCacheSRAMRowBits-1, 0)
290  }
291
292  def asECCData(ecc: UInt, data: UInt) = {
293    if (EnableDataEcc) {
294      Cat(ecc, data)
295    } else {
296      data
297    }
298  }
299
300  def dumpRead = {
301    (0 until LoadPipelineWidth) map { w =>
302      XSDebug(io.read(w).valid,
303        s"DataArray Read channel: $w valid way_en: %x addr: %x\n",
304        io.read(w).bits.way_en, io.read(w).bits.addr)
305    }
306    XSDebug(io.readline.valid,
307      s"DataArray Read Line, valid way_en: %x addr: %x rmask %x\n",
308      io.readline.bits.way_en, io.readline.bits.addr, io.readline.bits.rmask)
309  }
310
311  def dumpWrite = {
312    XSDebug(io.write.valid,
313      s"DataArray Write valid way_en: %x addr: %x\n",
314      io.write.bits.way_en, io.write.bits.addr)
315
316    (0 until DCacheBanks) map { r =>
317      XSDebug(io.write.valid,
318        s"cycle: $r data: %x wmask: %x\n",
319        io.write.bits.data(r), io.write.bits.wmask(r))
320    }
321  }
322
323  def dumpResp = {
324    XSDebug(s"DataArray ReadeResp channel:\n")
325    (0 until LoadPipelineWidth) map { r =>
326      XSDebug(s"cycle: $r data: %x\n", Mux(io.is128Req(r),
327        Cat(io.read_resp(r)(1).raw_data,io.read_resp(r)(0).raw_data),
328        io.read_resp(r)(0).raw_data))
329    }
330  }
331
332  def dump() = {
333    dumpRead
334    dumpWrite
335    dumpResp
336  }
337
338  def selcetOldestPort(valid: Seq[Bool], bits: Seq[LqPtr], index: Seq[UInt]):((Bool, LqPtr), UInt) = {
339    require(valid.length == bits.length &&  bits.length == index.length, s"length must eq, valid:${valid.length}, bits:${bits.length}, index:${index.length}")
340    ParallelOperation(valid zip bits zip index,
341      (a: ((Bool, LqPtr), UInt), b: ((Bool, LqPtr), UInt)) => {
342        val au = a._1._2
343        val bu = b._1._2
344        val aValid = a._1._1
345        val bValid = b._1._1
346        val bSel = au > bu
347        val bits = Mux(
348          aValid && bValid,
349          Mux(bSel, b._1._2, a._1._2),
350          Mux(aValid && !bValid, a._1._2, b._1._2)
351        )
352        val idx = Mux(
353          aValid && bValid,
354          Mux(bSel, b._2, a._2),
355          Mux(aValid && !bValid, a._2, b._2)
356        )
357        ((aValid || bValid, bits), idx)
358      }
359    )
360  }
361
362}
363
364// the smallest access unit is sram
365class SramedDataArray(implicit p: Parameters) extends AbstractBankedDataArray {
366  println("  DCacheType: SramedDataArray")
367  val ReduceReadlineConflict = false
368
369  io.write.ready := true.B
370  io.write_dup.foreach(_.ready := true.B)
371
372  val data_banks = List.tabulate(DCacheSetDiv)( k => {
373    val banks = List.tabulate(DCacheBanks)(i => List.tabulate(DCacheWays)(j => Module(new DataSRAM(i,j))))
374    val mbistPl = MbistPipeline.PlaceMbistPipeline(1, s"MbistPipeDataSet$k", hasMbist)
375    banks
376  })
377  data_banks.map(_.map(_.map(_.dump())))
378
379  val way_en = Wire(Vec(LoadPipelineWidth, io.read(0).bits.way_en.cloneType))
380  val set_addrs = Wire(Vec(LoadPipelineWidth, UInt()))
381  val div_addrs = Wire(Vec(LoadPipelineWidth, UInt()))
382  val bank_addrs = Wire(Vec(LoadPipelineWidth, Vec(VLEN/DCacheSRAMRowBits, UInt())))
383
384  val line_set_addr = addr_to_dcache_div_set(io.readline.bits.addr)
385  val line_div_addr = addr_to_dcache_div(io.readline.bits.addr)
386  // when WPU is enabled, line_way_en is all enabled when read data
387  val line_way_en = Fill(DCacheWays, 1.U) // val line_way_en = io.readline.bits.way_en
388  val line_way_en_reg = RegEnable(io.readline.bits.way_en, 0.U(DCacheWays.W),io.readline.valid)
389
390  val write_bank_mask_reg = RegEnable(io.write.bits.wmask, 0.U(DCacheBanks.W), io.write.valid)
391  val write_data_reg = RegEnable(io.write.bits.data, io.write.valid)
392  val write_valid_reg = RegNext(io.write.valid)
393  val write_valid_dup_reg = io.write_dup.map(x => RegNext(x.valid))
394  val write_wayen_dup_reg = io.write_dup.map(x => RegEnable(x.bits.way_en, 0.U(DCacheWays.W), x.valid))
395  val write_set_addr_dup_reg = io.write_dup.map(x => RegEnable(addr_to_dcache_div_set(x.bits.addr), x.valid))
396  val write_div_addr_dup_reg = io.write_dup.map(x => RegEnable(addr_to_dcache_div(x.bits.addr), x.valid))
397
398  // read data_banks and ecc_banks
399  // for single port SRAM, do not allow read and write in the same cycle
400  val rrhazard = false.B // io.readline.valid
401  (0 until LoadPipelineWidth).map(rport_index => {
402    div_addrs(rport_index) := addr_to_dcache_div(io.read(rport_index).bits.addr)
403    set_addrs(rport_index) := addr_to_dcache_div_set(io.read(rport_index).bits.addr)
404    bank_addrs(rport_index)(0) := addr_to_dcache_bank(io.read(rport_index).bits.addr)
405    bank_addrs(rport_index)(1) := bank_addrs(rport_index)(0) + 1.U
406
407    // use way_en to select a way after data read out
408    assert(!(RegNext(io.read(rport_index).fire && PopCount(io.read(rport_index).bits.way_en) > 1.U)))
409    way_en(rport_index) := io.read(rport_index).bits.way_en
410  })
411
412  // read conflict
413  val rr_bank_conflict = Seq.tabulate(LoadPipelineWidth)(x => Seq.tabulate(LoadPipelineWidth)(y => {
414    if (x == y) {
415      false.B
416    } else {
417      io.read(x).valid && io.read(y).valid &&
418        div_addrs(x) === div_addrs(y) &&
419        (io.read(x).bits.bankMask & io.read(y).bits.bankMask) =/= 0.U &&
420        io.read(x).bits.way_en === io.read(y).bits.way_en &&
421        set_addrs(x) =/= set_addrs(y)
422    }
423  }))
424  val load_req_with_bank_conflict = rr_bank_conflict.map(_.reduce(_ || _))
425  val load_req_valid = io.read.map(_.valid)
426  val load_req_lqIdx = io.read.map(_.bits.lqIdx)
427  val load_req_index = (0 until LoadPipelineWidth).map(_.asUInt)
428
429
430  val load_req_bank_conflict_selcet = selcetOldestPort(load_req_with_bank_conflict, load_req_lqIdx, load_req_index)
431  val load_req_bank_select_port  = UIntToOH(load_req_bank_conflict_selcet._2).asBools
432
433  val rr_bank_conflict_oldest = (0 until LoadPipelineWidth).map(i =>
434    !load_req_bank_select_port(i) && load_req_with_bank_conflict(i)
435  )
436
437  val rrl_bank_conflict = Wire(Vec(LoadPipelineWidth, Bool()))
438  val rrl_bank_conflict_intend = Wire(Vec(LoadPipelineWidth, Bool()))
439  (0 until LoadPipelineWidth).foreach { i =>
440    val judge = if (ReduceReadlineConflict) io.read(i).valid && (io.readline.bits.rmask & io.read(i).bits.bankMask) =/= 0.U && line_div_addr === div_addrs(i) && line_set_addr =/= set_addrs(i)
441                else io.read(i).valid && line_div_addr === div_addrs(i) && line_set_addr =/= set_addrs(i)
442    rrl_bank_conflict(i) := judge && io.readline.valid
443    rrl_bank_conflict_intend(i) := judge && io.readline_intend
444  }
445  val wr_bank_conflict = Seq.tabulate(LoadPipelineWidth)(x =>
446    io.read(x).valid && write_valid_reg &&
447    div_addrs(x) === write_div_addr_dup_reg.head &&
448    way_en(x) === write_wayen_dup_reg.head &&
449    (write_bank_mask_reg(bank_addrs(x)(0)) || write_bank_mask_reg(bank_addrs(x)(1)) && io.is128Req(x))
450  )
451  val wrl_bank_conflict = io.readline.valid && write_valid_reg && line_div_addr === write_div_addr_dup_reg.head
452  // ready
453  io.readline.ready := !(wrl_bank_conflict)
454  io.read.zipWithIndex.map { case (x, i) => x.ready := !(wr_bank_conflict(i) || rrhazard) }
455
456  val perf_multi_read = PopCount(io.read.map(_.valid)) >= 2.U
457  val bank_conflict_fast = Wire(Vec(LoadPipelineWidth, Bool()))
458  (0 until LoadPipelineWidth).foreach(i => {
459    bank_conflict_fast(i) := wr_bank_conflict(i) || rrl_bank_conflict(i) ||
460    rr_bank_conflict_oldest(i)
461    io.bank_conflict_slow(i) := RegNext(bank_conflict_fast(i))
462    io.disable_ld_fast_wakeup(i) := wr_bank_conflict(i) || rrl_bank_conflict_intend(i) ||
463      (if (i == 0) 0.B else (0 until i).map(rr_bank_conflict(_)(i)).reduce(_ || _))
464  })
465  XSPerfAccumulate("data_array_multi_read", perf_multi_read)
466  (1 until LoadPipelineWidth).foreach(y => (0 until y).foreach(x =>
467    XSPerfAccumulate(s"data_array_rr_bank_conflict_${x}_${y}", rr_bank_conflict(x)(y))
468  ))
469  (0 until LoadPipelineWidth).foreach(i => {
470    XSPerfAccumulate(s"data_array_rrl_bank_conflict_${i}", rrl_bank_conflict(i))
471    XSPerfAccumulate(s"data_array_rw_bank_conflict_${i}", wr_bank_conflict(i))
472    XSPerfAccumulate(s"data_array_read_${i}", io.read(i).valid)
473  })
474  XSPerfAccumulate("data_array_access_total", PopCount(io.read.map(_.valid)))
475  XSPerfAccumulate("data_array_read_line", io.readline.valid)
476  XSPerfAccumulate("data_array_write", io.write.valid)
477
478  val read_result = Wire(Vec(DCacheSetDiv, Vec(DCacheBanks, Vec(DCacheWays,new L1BankedDataReadResult()))))
479  val read_result_delayed = Wire(Vec(DCacheSetDiv, Vec(DCacheBanks, Vec(DCacheWays,new L1BankedDataReadResult()))))
480  val read_error_delayed_result = Wire(Vec(DCacheSetDiv, Vec(DCacheBanks, Vec(DCacheWays, Bool()))))
481  dontTouch(read_result)
482  dontTouch(read_error_delayed_result)
483
484  val pseudo_data_toggle_mask = io.pseudo_error.bits.map {
485    case bank =>
486      Mux(io.pseudo_error.valid && bank.valid, bank.mask, 0.U)
487  }
488  val readline_hit = io.readline.fire &&
489                     (io.readline.bits.rmask & VecInit(io.pseudo_error.bits.map(_.valid)).asUInt).orR
490  val readbank_hit = io.read.zip(bank_addrs.zip(io.is128Req)).zipWithIndex.map {
491                          case ((read, (bank_addr, is128Req)), i) =>
492                            val error_bank0 = io.pseudo_error.bits(bank_addr(0))
493                            val error_bank1 = io.pseudo_error.bits(bank_addr(1))
494                            read.fire && (error_bank0.valid || error_bank1.valid && is128Req) && !io.bank_conflict_slow(i)
495                      }.reduce(_|_)
496  io.pseudo_error.ready := RegNext(readline_hit || readbank_hit)
497
498  for (div_index <- 0 until DCacheSetDiv){
499    for (bank_index <- 0 until DCacheBanks) {
500      for (way_index <- 0 until DCacheWays) {
501        //     Set Addr & Read Way Mask
502        //
503        //    Pipe 0   ....  Pipe (n-1)
504        //      +      ....     +
505        //      |      ....     |
506        // +----+---------------+-----+
507        //  X                        X
508        //   X                      +------+ Bank Addr Match
509        //    +---------+----------+
510        //              |
511        //     +--------+--------+
512        //     |    Data Bank    |
513        //     +-----------------+
514        val loadpipe_en = WireInit(VecInit(List.tabulate(LoadPipelineWidth)(i => {
515          io.read(i).valid && div_addrs(i) === div_index.U && (bank_addrs(i)(0) === bank_index.U || bank_addrs(i)(1) === bank_index.U && io.is128Req(i)) &&
516          way_en(i)(way_index) &&
517          !rr_bank_conflict_oldest(i)
518        })))
519        val readline_en = Wire(Bool())
520        if (ReduceReadlineConflict) {
521          readline_en := io.readline.valid && io.readline.bits.rmask(bank_index) && line_way_en(way_index) && div_index.U === line_div_addr
522        } else {
523          readline_en := io.readline.valid && line_way_en(way_index) && div_index.U === line_div_addr
524        }
525        val sram_set_addr = Mux(readline_en,
526          addr_to_dcache_div_set(io.readline.bits.addr),
527          PriorityMux(Seq.tabulate(LoadPipelineWidth)(i => loadpipe_en(i) -> set_addrs(i)))
528        )
529        val read_en = loadpipe_en.asUInt.orR || readline_en
530        // read raw data
531        val data_bank = data_banks(div_index)(bank_index)(way_index)
532        data_bank.io.r.en := read_en
533        data_bank.io.r.addr := sram_set_addr
534
535        read_result(div_index)(bank_index)(way_index).ecc := getECCFromEncWord(data_bank.io.r.data)
536        read_result(div_index)(bank_index)(way_index).raw_data := getDataFromEncWord(data_bank.io.r.data) ^ pseudo_data_toggle_mask(bank_index)
537
538        if (EnableDataEcc) {
539          val ecc_data = read_result(div_index)(bank_index)(way_index).asECCData()
540          val ecc_data_delayed = RegEnable(ecc_data, RegNext(read_en))
541          read_result(div_index)(bank_index)(way_index).error_delayed := dcacheParameters.dataCode.decode(ecc_data_delayed).error
542          read_error_delayed_result(div_index)(bank_index)(way_index) := read_result(div_index)(bank_index)(way_index).error_delayed
543        } else {
544          read_result(div_index)(bank_index)(way_index).error_delayed := false.B
545          read_error_delayed_result(div_index)(bank_index)(way_index) := false.B
546        }
547
548        read_result_delayed(div_index)(bank_index)(way_index) := RegEnable(read_result(div_index)(bank_index)(way_index), RegNext(read_en))
549      }
550    }
551  }
552
553  val data_read_oh = WireInit(VecInit(Seq.fill(DCacheSetDiv * DCacheBanks * DCacheWays)(0.U(1.W))))
554  for(div_index <- 0 until DCacheSetDiv){
555    for (bank_index <- 0 until DCacheBanks) {
556      for (way_index <- 0 until DCacheWays) {
557        data_read_oh(div_index *  DCacheBanks * DCacheWays + bank_index * DCacheWays + way_index) := data_banks(div_index)(bank_index)(way_index).io.r.en
558      }
559    }
560  }
561  XSPerfAccumulate("data_read_counter", PopCount(Cat(data_read_oh)))
562
563  // read result: expose banked read result
564  // TODO: clock gate
565  (0 until LoadPipelineWidth).map(i => {
566    // io.read_resp(i) := read_result(RegNext(bank_addrs(i)))(RegNext(OHToUInt(way_en(i))))
567    val r_read_fire = RegNext(io.read(i).fire)
568    val r_div_addr  = RegEnable(div_addrs(i), io.read(i).fire)
569    val r_bank_addr = RegEnable(bank_addrs(i), io.read(i).fire)
570    val r_way_addr  = RegNext(OHToUInt(way_en(i)))
571    val rr_read_fire = RegNext(RegNext(io.read(i).fire))
572    val rr_div_addr = RegEnable(RegEnable(div_addrs(i), io.read(i).fire), r_read_fire)
573    val rr_bank_addr = RegEnable(RegEnable(bank_addrs(i), io.read(i).fire), r_read_fire)
574    val rr_way_addr = RegEnable(RegEnable(OHToUInt(way_en(i)), io.read(i).fire), r_read_fire)
575    (0 until VLEN/DCacheSRAMRowBits).map( j =>{
576      io.read_resp(i)(j) := read_result(r_div_addr)(r_bank_addr(j))(r_way_addr)
577      // error detection
578      // normal read ports
579      io.read_error_delayed(i)(j) := rr_read_fire && read_error_delayed_result(rr_div_addr)(rr_bank_addr(j))(rr_way_addr) && !RegNext(io.bank_conflict_slow(i))
580    })
581  })
582
583  // readline port
584  val readline_error_delayed = Wire(Vec(DCacheBanks, Bool()))
585  val readline_r_way_addr = RegEnable(OHToUInt(io.readline.bits.way_en), io.readline.valid)
586  val readline_rr_way_addr = RegEnable(readline_r_way_addr, RegNext(io.readline.valid))
587  val readline_r_div_addr = RegEnable(line_div_addr, io.readline.valid)
588  val readline_rr_div_addr = RegEnable(readline_r_div_addr, RegNext(io.readline.valid))
589  (0 until DCacheBanks).map(i => {
590    io.readline_resp(i) := read_result(readline_r_div_addr)(i)(readline_r_way_addr)
591    readline_error_delayed(i) := read_result(readline_rr_div_addr)(i)(readline_rr_way_addr).error_delayed
592  })
593  io.readline_error_delayed := RegNext(RegNext(io.readline.fire)) && readline_error_delayed.asUInt.orR
594
595  // write data_banks & ecc_banks
596  for (div_index <- 0 until DCacheSetDiv) {
597    for (bank_index <- 0 until DCacheBanks) {
598      for (way_index <- 0 until DCacheWays) {
599        // data write
600        val wen_reg = write_bank_mask_reg(bank_index) &&
601          write_valid_dup_reg(bank_index) &&
602          write_div_addr_dup_reg(bank_index) === div_index.U &&
603          write_wayen_dup_reg(bank_index)(way_index)
604        val write_ecc_reg = RegEnable(getECCFromEncWord(cacheParams.dataCode.encode(io.write.bits.data(bank_index))), io.write.valid)
605        val data_bank = data_banks(div_index)(bank_index)(way_index)
606        data_bank.io.w.en := wen_reg
607        data_bank.io.w.addr := write_set_addr_dup_reg(bank_index)
608        data_bank.io.w.data := asECCData(write_ecc_reg, write_data_reg(bank_index))
609      }
610    }
611  }
612
613  val tableName =  "BankConflict" + p(XSCoreParamsKey).HartId.toString
614  val siteName = "BankedDataArray" + p(XSCoreParamsKey).HartId.toString
615  val bankConflictTable = ChiselDB.createTable(tableName, new BankConflictDB)
616  val bankConflictData = Wire(new BankConflictDB)
617  for (i <- 0 until LoadPipelineWidth) {
618    bankConflictData.set_index(i) := set_addrs(i)
619    bankConflictData.addr(i) := io.read(i).bits.addr
620  }
621
622  // FIXME: rr_bank_conflict(0)(1) no generalization
623  when(rr_bank_conflict(0)(1)) {
624    (0 until (VLEN/DCacheSRAMRowBits)).map(i => {
625      bankConflictData.bank_index(i) := bank_addrs(0)(i)
626    })
627    bankConflictData.way_index  := OHToUInt(way_en(0))
628    bankConflictData.fake_rr_bank_conflict := set_addrs(0) === set_addrs(1) && div_addrs(0) === div_addrs(1)
629  }.otherwise {
630    (0 until (VLEN/DCacheSRAMRowBits)).map(i => {
631      bankConflictData.bank_index(i) := 0.U
632    })
633    bankConflictData.way_index := 0.U
634    bankConflictData.fake_rr_bank_conflict := false.B
635  }
636
637  val isWriteBankConflictTable = Constantin.createRecord(s"isWriteBankConflictTable${p(XSCoreParamsKey).HartId}")
638  bankConflictTable.log(
639    data = bankConflictData,
640    en = isWriteBankConflictTable.orR && rr_bank_conflict(0)(1),
641    site = siteName,
642    clock = clock,
643    reset = reset
644  )
645
646  (1 until LoadPipelineWidth).foreach(y => (0 until y).foreach(x =>
647    XSPerfAccumulate(s"data_array_fake_rr_bank_conflict_${x}_${y}", rr_bank_conflict(x)(y) && set_addrs(x)===set_addrs(y) && div_addrs(x) === div_addrs(y))
648  ))
649
650  if (backendParams.debugEn){
651    load_req_with_bank_conflict.map(dontTouch(_))
652    dontTouch(read_result)
653    dontTouch(read_error_delayed_result)
654  }
655}
656
657// the smallest access unit is bank
658class BankedDataArray(implicit p: Parameters) extends AbstractBankedDataArray {
659  println("  DCacheType: BankedDataArray")
660  val ReduceReadlineConflict = false
661
662  io.write.ready := true.B
663  io.write_dup.foreach(_.ready := true.B)
664
665  val data_banks = Seq.tabulate(DCacheSetDiv, DCacheBanks)({(k, i) => Module(new DataSRAMBank(i))})
666  val mbistPl = MbistPipeline.PlaceMbistPipeline(1, s"MbistPipeDCacheData", hasMbist)
667  val mbistSramPorts = mbistPl.map(pl => Seq.tabulate(DCacheSetDiv, DCacheBanks, DCacheWays) ({ (i, j, k) =>
668    pl.toSRAM(i * DCacheBanks * DCacheWays + j * DCacheWays + k)
669  }))
670  data_banks.map(_.map(_.dump()))
671
672  val way_en = Wire(Vec(LoadPipelineWidth, io.read(0).bits.way_en.cloneType))
673  val set_addrs = Wire(Vec(LoadPipelineWidth, UInt()))
674  val set_addrs_dup = Wire(Vec(LoadPipelineWidth, UInt()))
675  val div_addrs = Wire(Vec(LoadPipelineWidth, UInt()))
676  val div_addrs_dup = Wire(Vec(LoadPipelineWidth, UInt()))
677  val bank_addrs = Wire(Vec(LoadPipelineWidth, Vec(VLEN/DCacheSRAMRowBits, UInt())))
678  val bank_addrs_dup = Wire(Vec(LoadPipelineWidth, Vec(VLEN/DCacheSRAMRowBits, UInt())))
679  val way_en_reg = Wire(Vec(LoadPipelineWidth, io.read(0).bits.way_en.cloneType))
680  val set_addrs_reg = Wire(Vec(LoadPipelineWidth, UInt()))
681  val set_addrs_dup_reg = Wire(Vec(LoadPipelineWidth, UInt()))
682
683  val line_set_addr = addr_to_dcache_div_set(io.readline.bits.addr)
684  val line_div_addr = addr_to_dcache_div(io.readline.bits.addr)
685  val line_way_en = io.readline.bits.way_en
686
687  val write_bank_mask_reg = RegEnable(io.write.bits.wmask, io.write.valid)
688  val write_data_reg = RegEnable(io.write.bits.data, io.write.valid)
689  val write_valid_reg = RegNext(io.write.valid)
690  val write_valid_dup_reg = io.write_dup.map(x => RegNext(x.valid))
691  val write_wayen_dup_reg = io.write_dup.map(x => RegEnable(x.bits.way_en, x.valid))
692  val write_set_addr_dup_reg = io.write_dup.map(x => RegEnable(addr_to_dcache_div_set(x.bits.addr), x.valid))
693  val write_div_addr_dup_reg = io.write_dup.map(x => RegEnable(addr_to_dcache_div(x.bits.addr), x.valid))
694
695  // read data_banks and ecc_banks
696  // for single port SRAM, do not allow read and write in the same cycle
697  val rwhazard = RegNext(io.write.valid)
698  val rrhazard = false.B // io.readline.valid
699  (0 until LoadPipelineWidth).map(rport_index => {
700    div_addrs(rport_index) := addr_to_dcache_div(io.read(rport_index).bits.addr)
701    div_addrs_dup(rport_index) := addr_to_dcache_div(io.read(rport_index).bits.addr_dup)
702    bank_addrs(rport_index)(0) := addr_to_dcache_bank(io.read(rport_index).bits.addr)
703    bank_addrs(rport_index)(1) := Mux(io.is128Req(rport_index), bank_addrs(rport_index)(0) + 1.U, bank_addrs(rport_index)(0))
704    bank_addrs_dup(rport_index)(0) := addr_to_dcache_bank(io.read(rport_index).bits.addr_dup)
705    bank_addrs_dup(rport_index)(1) := Mux(io.is128Req(rport_index), bank_addrs_dup(rport_index)(0) + 1.U, bank_addrs_dup(rport_index)(0))
706    set_addrs(rport_index) := addr_to_dcache_div_set(io.read(rport_index).bits.addr)
707    set_addrs_dup(rport_index) := addr_to_dcache_div_set(io.read(rport_index).bits.addr_dup)
708    set_addrs_reg(rport_index) := RegEnable(addr_to_dcache_div_set(io.read(rport_index).bits.addr), io.read(rport_index).valid)
709    set_addrs_dup_reg(rport_index) := RegEnable(addr_to_dcache_div_set(io.read(rport_index).bits.addr_dup), io.read(rport_index).valid)
710
711    // use way_en to select a way after data read out
712    assert(!(RegNext(io.read(rport_index).fire && PopCount(io.read(rport_index).bits.way_en) > 1.U)))
713    way_en(rport_index) := io.read(rport_index).bits.way_en
714    way_en_reg(rport_index) := RegEnable(io.read(rport_index).bits.way_en, io.read(rport_index).valid)
715  })
716
717  // read each bank, get bank result
718  val rr_bank_conflict = Seq.tabulate(LoadPipelineWidth)(x => Seq.tabulate(LoadPipelineWidth)(y => {
719    if (x == y) {
720      false.B
721    } else {
722      io.read(x).valid && io.read(y).valid &&
723      div_addrs(x) === div_addrs(y) &&
724      (io.read(x).bits.bankMask & io.read(y).bits.bankMask) =/= 0.U &&
725      set_addrs(x) =/= set_addrs(y)
726    }
727  }
728  ))
729
730  val load_req_with_bank_conflict = rr_bank_conflict.map(_.reduce(_ || _))
731  val load_req_valid = io.read.map(_.valid)
732  val load_req_lqIdx = io.read.map(_.bits.lqIdx)
733  val load_req_index = (0 until LoadPipelineWidth).map(_.asUInt)
734
735  val load_req_bank_conflict_selcet = selcetOldestPort(load_req_with_bank_conflict, load_req_lqIdx, load_req_index)
736  val load_req_bank_select_port  = UIntToOH(load_req_bank_conflict_selcet._2).asBools
737
738  val rr_bank_conflict_oldest = (0 until LoadPipelineWidth).map(i =>
739    !load_req_bank_select_port(i) && load_req_with_bank_conflict(i)
740  )
741
742  val rrl_bank_conflict = Wire(Vec(LoadPipelineWidth, Bool()))
743  val rrl_bank_conflict_intend = Wire(Vec(LoadPipelineWidth, Bool()))
744  (0 until LoadPipelineWidth).foreach { i =>
745    val judge = if (ReduceReadlineConflict) io.read(i).valid && (io.readline.bits.rmask & io.read(i).bits.bankMask) =/= 0.U && div_addrs(i) === line_div_addr
746                else io.read(i).valid && div_addrs(i)===line_div_addr
747    rrl_bank_conflict(i) := judge && io.readline.valid
748    rrl_bank_conflict_intend(i) := judge && io.readline_intend
749  }
750  val wr_bank_conflict = Seq.tabulate(LoadPipelineWidth)(x =>
751    io.read(x).valid &&
752    write_valid_reg &&
753    div_addrs(x) === write_div_addr_dup_reg.head &&
754    (write_bank_mask_reg(bank_addrs(x)(0)) || write_bank_mask_reg(bank_addrs(x)(1)) && io.is128Req(x))
755  )
756  val wrl_bank_conflict = io.readline.valid && write_valid_reg && line_div_addr === write_div_addr_dup_reg.head
757  // ready
758  io.readline.ready := !(wrl_bank_conflict)
759  io.read.zipWithIndex.map{case(x, i) => x.ready := !(wr_bank_conflict(i) || rrhazard)}
760
761  val perf_multi_read = PopCount(io.read.map(_.valid)) >= 2.U
762  (0 until LoadPipelineWidth).foreach(i => {
763    // remove fake rr_bank_conflict situation in s2
764    val real_other_bank_conflict_reg = RegNext(wr_bank_conflict(i) || rrl_bank_conflict(i))
765    val real_rr_bank_conflict_reg = RegNext(rr_bank_conflict_oldest(i))
766    io.bank_conflict_slow(i) := real_other_bank_conflict_reg || real_rr_bank_conflict_reg
767
768    // get result in s1
769    io.disable_ld_fast_wakeup(i) := wr_bank_conflict(i) || rrl_bank_conflict_intend(i) ||
770      (if (i == 0) 0.B else (0 until i).map(rr_bank_conflict(_)(i)).reduce(_ || _))
771  })
772  XSPerfAccumulate("data_array_multi_read", perf_multi_read)
773  (1 until LoadPipelineWidth).foreach(y => (0 until y).foreach(x =>
774    XSPerfAccumulate(s"data_array_rr_bank_conflict_${x}_${y}", rr_bank_conflict(x)(y))
775  ))
776  (0 until LoadPipelineWidth).foreach(i => {
777    XSPerfAccumulate(s"data_array_rrl_bank_conflict_${i}", rrl_bank_conflict(i))
778    XSPerfAccumulate(s"data_array_rw_bank_conflict_${i}", wr_bank_conflict(i))
779    XSPerfAccumulate(s"data_array_read_${i}", io.read(i).valid)
780  })
781  XSPerfAccumulate("data_array_access_total", PopCount(io.read.map(_.valid)))
782  XSPerfAccumulate("data_array_read_line", io.readline.valid)
783  XSPerfAccumulate("data_array_write", io.write.valid)
784
785  val bank_result = Wire(Vec(DCacheSetDiv, Vec(DCacheBanks, Vec(DCacheWays, new L1BankedDataReadResult()))))
786  val bank_result_delayed = Wire(Vec(DCacheSetDiv, Vec(DCacheBanks, Vec(DCacheWays, new L1BankedDataReadResult()))))
787  val read_bank_error_delayed = Wire(Vec(DCacheSetDiv, Vec(DCacheBanks, Vec(DCacheWays, Bool()))))
788
789  val pseudo_data_toggle_mask = io.pseudo_error.bits.map {
790    case bank =>
791      Mux(io.pseudo_error.valid && bank.valid, bank.mask, 0.U)
792  }
793  val readline_hit = io.readline.fire &&
794                     (io.readline.bits.rmask & VecInit(io.pseudo_error.bits.map(_.valid)).asUInt).orR
795  val readbank_hit = io.read.zip(bank_addrs.zip(io.is128Req)).zipWithIndex.map {
796                          case ((read, (bank_addr, is128Req)), i) =>
797                            val error_bank0 = io.pseudo_error.bits(bank_addr(0))
798                            val error_bank1 = io.pseudo_error.bits(bank_addr(1))
799                            read.fire && (error_bank0.valid || error_bank1.valid && is128Req) && !io.bank_conflict_slow(i)
800                      }.reduce(_|_)
801  io.pseudo_error.ready := RegNext(readline_hit || readbank_hit)
802
803  for (div_index <- 0 until DCacheSetDiv) {
804    for (bank_index <- 0 until DCacheBanks) {
805      //     Set Addr & Read Way Mask
806      //
807      //    Pipe 0   ....  Pipe (n-1)
808      //      +      ....     +
809      //      |      ....     |
810      // +----+---------------+-----+
811      //  X                        X
812      //   X                      +------+ Bank Addr Match
813      //    +---------+----------+
814      //              |
815      //     +--------+--------+
816      //     |    Data Bank    |
817      //     +-----------------+
818      val bank_addr_matchs = WireInit(VecInit(List.tabulate(LoadPipelineWidth)(i => {
819        io.read(i).valid && div_addrs(i) === div_index.U && (bank_addrs(i)(0) === bank_index.U || bank_addrs(i)(1) === bank_index.U && io.is128Req(i)) &&
820          !rr_bank_conflict_oldest(i)
821      })))
822      val bank_addr_matchs_dup = WireInit(VecInit(List.tabulate(LoadPipelineWidth)(i => {
823        io.read(i).valid && div_addrs_dup(i) === div_index.U && (bank_addrs_dup(i)(0) === bank_index.U || bank_addrs_dup(i)(1) === bank_index.U && io.is128Req(i)) &&
824          !rr_bank_conflict_oldest(i)
825      })))
826      val readline_match = Wire(Bool())
827      if (ReduceReadlineConflict) {
828        readline_match := io.readline.valid && io.readline.bits.rmask(bank_index) && line_div_addr === div_index.U
829      } else {
830        readline_match := io.readline.valid && line_div_addr === div_index.U
831      }
832
833      val bank_set_addr = Mux(readline_match,
834        line_set_addr,
835        PriorityMux(Seq.tabulate(LoadPipelineWidth)(i => bank_addr_matchs(i) -> set_addrs(i)))
836      )
837      val bank_set_addr_dup = Mux(readline_match,
838        line_set_addr,
839        PriorityMux(Seq.tabulate(LoadPipelineWidth)(i => bank_addr_matchs_dup(i) -> set_addrs_dup(i)))
840      )
841      val read_enable = bank_addr_matchs.asUInt.orR || readline_match
842
843      // read raw data
844      val data_bank = data_banks(div_index)(bank_index)
845      data_bank.io.r.en := read_enable
846
847      if (DuplicatedQueryBankSeq.contains(bank_index)) {
848        data_bank.io.r.addr := bank_set_addr_dup
849      } else {
850        data_bank.io.r.addr := bank_set_addr
851      }
852      for (way_index <- 0 until DCacheWays) {
853        val mbistAck = mbistSramPorts.map(_(div_index)(bank_index)(way_index).ack).getOrElse(false.B)
854        bank_result(div_index)(bank_index)(way_index).ecc := getECCFromEncWord(data_bank.io.r.data(way_index))
855        bank_result(div_index)(bank_index)(way_index).raw_data := getDataFromEncWord(data_bank.io.r.data(way_index)) ^ Mux(mbistAck, 0.U, pseudo_data_toggle_mask(bank_index))
856
857        if (EnableDataEcc) {
858          val ecc_data = bank_result(div_index)(bank_index)(way_index).asECCData()
859          val ecc_data_delayed = RegEnable(ecc_data, RegNext(read_enable))
860          bank_result(div_index)(bank_index)(way_index).error_delayed := dcacheParameters.dataCode.decode(ecc_data_delayed).error
861          read_bank_error_delayed(div_index)(bank_index)(way_index) := bank_result(div_index)(bank_index)(way_index).error_delayed
862        } else {
863          bank_result(div_index)(bank_index)(way_index).error_delayed := false.B
864          read_bank_error_delayed(div_index)(bank_index)(way_index) := false.B
865        }
866        bank_result_delayed(div_index)(bank_index)(way_index) := RegEnable(bank_result(div_index)(bank_index)(way_index), RegNext(read_enable))
867      }
868    }
869  }
870
871  val data_read_oh = WireInit(VecInit(Seq.fill(DCacheSetDiv)(0.U(XLEN.W))))
872  for (div_index <- 0 until DCacheSetDiv){
873    val temp = WireInit(VecInit(Seq.fill(DCacheBanks)(0.U(XLEN.W))))
874    for (bank_index <- 0 until DCacheBanks) {
875      temp(bank_index) := PopCount(Fill(DCacheWays, data_banks(div_index)(bank_index).io.r.en.asUInt))
876    }
877    data_read_oh(div_index) := temp.reduce(_ + _)
878  }
879  XSPerfAccumulate("data_read_counter", data_read_oh.foldLeft(0.U)(_ + _))
880
881  (0 until LoadPipelineWidth).map(i => {
882    // 1 cycle after read fire(load s2)
883    val r_read_fire = RegNext(io.read(i).fire)
884    val r_div_addr = RegEnable(div_addrs(i), io.read(i).fire)
885    val r_bank_addr = RegEnable(bank_addrs(i), io.read(i).fire)
886    val r_way_addr = RegEnable(OHToUInt(way_en(i)), io.read(i).fire)
887    // 2 cycles after read fire(load s3)
888    val rr_read_fire = RegNext(r_read_fire)
889    val rr_div_addr = RegEnable(RegEnable(div_addrs(i), io.read(i).fire), r_read_fire)
890    val rr_bank_addr = RegEnable(RegEnable(bank_addrs(i), io.read(i).fire), r_read_fire)
891    val rr_way_addr = RegEnable(RegEnable(OHToUInt(way_en(i)), io.read(i).fire), r_read_fire)
892    (0 until VLEN/DCacheSRAMRowBits).map( j =>{
893      io.read_resp(i)(j)          := bank_result(r_div_addr)(r_bank_addr(j))(r_way_addr)
894      // error detection
895      io.read_error_delayed(i)(j) := rr_read_fire && read_bank_error_delayed(rr_div_addr)(rr_bank_addr(j))(rr_way_addr) && !RegNext(io.bank_conflict_slow(i))
896    })
897  })
898
899  // read result: expose banked read result
900  private val mbist_r_way = OHToUInt(mbistSramPorts.map(_.flatMap(_.map(w => Cat(w.map(_.re).reverse))).reduce(_ | _)).getOrElse(0.U(DCacheWays.W)))
901  private val mbist_r_div = OHToUInt(mbistSramPorts.map(_.map(d => Cat(d.flatMap(w => w.map(_.re))).orR)).getOrElse(Seq.fill(DCacheSetDiv)(false.B)))
902  private val mbist_ack = mbistPl.map(_.mbist.ack).getOrElse(false.B)
903
904  val readline_error_delayed = Wire(Vec(DCacheBanks, Bool()))
905  val readline_r_way_addr = RegEnable(Mux(mbist_ack, mbist_r_way, OHToUInt(io.readline.bits.way_en)), io.readline.valid | mbist_ack)
906  val readline_rr_way_addr = RegEnable(readline_r_way_addr, RegNext(io.readline.valid))
907  val readline_r_div_addr = RegEnable(Mux(mbist_ack, mbist_r_div, line_div_addr), io.readline.valid | mbist_ack)
908  val readline_rr_div_addr = RegEnable(readline_r_div_addr, RegNext(io.readline.valid))
909  val readline_resp = Wire(io.readline_resp.cloneType)
910  (0 until DCacheBanks).foreach(i => {
911    mbistSramPorts.foreach(_.foreach(_(i).foreach(_.rdata := Cat(io.readline_resp(i).ecc, io.readline_resp(i).raw_data))))
912    readline_resp(i) := Mux(
913      io.readline_can_go | mbist_ack,
914      bank_result(readline_r_div_addr)(i)(readline_r_way_addr),
915      RegEnable(readline_resp(i), io.readline_stall | mbist_ack)
916    )
917    readline_error_delayed(i) := bank_result(readline_rr_div_addr)(i)(readline_rr_way_addr).error_delayed
918  })
919  io.readline_resp := RegEnable(readline_resp, io.readline_can_resp | mbist_ack)
920  io.readline_error_delayed := RegNext(RegNext(io.readline.fire)) && readline_error_delayed.asUInt.orR
921
922  // write data_banks & ecc_banks
923  for (div_index <- 0 until DCacheSetDiv) {
924    for (bank_index <- 0 until DCacheBanks) {
925      // data write
926      val wen_reg = write_bank_mask_reg(bank_index) &&
927        write_valid_dup_reg(bank_index) &&
928        write_div_addr_dup_reg(bank_index) === div_index.U && RegNext(io.write.valid)
929      val write_ecc_reg = RegEnable(getECCFromEncWord(cacheParams.dataCode.encode(io.write.bits.data(bank_index))), io.write.valid)
930      val data_bank = data_banks(div_index)(bank_index)
931      data_bank.io.w.en := wen_reg
932      data_bank.io.w.way_en := write_wayen_dup_reg(bank_index)
933      data_bank.io.w.addr := write_set_addr_dup_reg(bank_index)
934      data_bank.io.w.data := asECCData(write_ecc_reg, write_data_reg(bank_index))
935    }
936  }
937
938  val tableName = "BankConflict" + p(XSCoreParamsKey).HartId.toString
939  val siteName = "BankedDataArray" + p(XSCoreParamsKey).HartId.toString
940  val bankConflictTable = ChiselDB.createTable(tableName, new BankConflictDB)
941  val bankConflictData = Wire(new BankConflictDB)
942  for (i <- 0 until LoadPipelineWidth) {
943    bankConflictData.set_index(i) := set_addrs(i)
944    bankConflictData.addr(i) := io.read(i).bits.addr
945  }
946
947  // FIXME: rr_bank_conflict(0)(1) no generalization
948  when(rr_bank_conflict(0)(1)) {
949    (0 until (VLEN/DCacheSRAMRowBits)).map(i => {
950      bankConflictData.bank_index(i) := bank_addrs(0)(i)
951    })
952    bankConflictData.way_index := OHToUInt(way_en(0))
953    bankConflictData.fake_rr_bank_conflict := set_addrs(0) === set_addrs(1) && div_addrs(0) === div_addrs(1)
954  }.otherwise {
955    (0 until (VLEN/DCacheSRAMRowBits)).map(i => {
956      bankConflictData.bank_index(i) := 0.U
957    })
958    bankConflictData.way_index := 0.U
959    bankConflictData.fake_rr_bank_conflict := false.B
960  }
961
962  val isWriteBankConflictTable = Constantin.createRecord(s"isWriteBankConflictTable${p(XSCoreParamsKey).HartId}")
963  bankConflictTable.log(
964    data = bankConflictData,
965    en = isWriteBankConflictTable.orR && rr_bank_conflict(0)(1),
966    site = siteName,
967    clock = clock,
968    reset = reset
969  )
970
971  (1 until LoadPipelineWidth).foreach(y => (0 until y).foreach(x =>
972    XSPerfAccumulate(s"data_array_fake_rr_bank_conflict_${x}_${y}", rr_bank_conflict(x)(y) && set_addrs(x) === set_addrs(y) && div_addrs(x) === div_addrs(y))
973  ))
974
975  if (backendParams.debugEn){
976    load_req_with_bank_conflict.map(dontTouch(_))
977    dontTouch(bank_result)
978    dontTouch(read_bank_error_delayed)
979  }
980}
981